pax_global_header00006660000000000000000000000064136373464660014534gustar00rootroot0000000000000052 comment=5caa8bc28dafb2283fc20c95adfa91ed81b52067 compute-runtime-20.13.16352/000077500000000000000000000000001363734646600153555ustar00rootroot00000000000000compute-runtime-20.13.16352/.clang-format000066400000000000000000000063171363734646600177370ustar00rootroot00000000000000--- Language: Cpp # BasedOnStyle: LLVM AccessModifierOffset: -2 AlignAfterOpenBracket: Align AlignConsecutiveAssignments: false AlignConsecutiveDeclarations: false AlignEscapedNewlinesLeft: false AlignOperands: true AlignTrailingComments: true AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: false AllowShortCaseLabelsOnASingleLine: false AllowShortFunctionsOnASingleLine: All AllowShortIfStatementsOnASingleLine: false AllowShortLoopsOnASingleLine: false AlwaysBreakAfterDefinitionReturnType: None AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: false AlwaysBreakTemplateDeclarations: false BinPackArguments: true BinPackParameters: true BraceWrapping: AfterClass: false AfterControlStatement: false AfterEnum: false AfterFunction: false AfterNamespace: false AfterObjCDeclaration: false AfterStruct: false AfterUnion: false BeforeCatch: false BeforeElse: false IndentBraces: false BreakBeforeBinaryOperators: false BreakBeforeBraces: Attach BreakBeforeTernaryOperators: true BreakConstructorInitializersBeforeComma: false # clang-format > v3.8.0: BreakAfterJavaFieldAnnotations: false # clang-format > v3.8.0: BreakStringLiterals: true ColumnLimit: 0 CommentPragmas: '^ IWYU pragma:' ConstructorInitializerAllOnOneLineOrOnePerLine: false ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: true DerivePointerAlignment: false DisableFormat: false ExperimentalAutoDetectBinPacking: false ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] IncludeBlocks: Regroup IncludeCategories: - Regex: '^.(shared)/' Priority: 1 - Regex: '^.(documentation|manifests|offline_compiler|opencl|scripts)/' Priority: 2 - Regex: '^"test\.h"$' Priority: 2 - Regex: '^.(level_zero)/' Priority: 3 - Regex: '(d3d9types|d3d10_1)\.h' Priority: 4 - Regex: '(gfxEscape|windows)\.h' Priority: 5 - Regex: '^.(third_party|llvm|llvm-c|clang|clang-c|gtest|isl|json)/' Priority: 5 - Regex: '^.(gmock|cl|gl|d3d)' Priority: 5 - Regex: '^<.*>$' Priority: 10 - Regex: '.*' Priority: 5 IndentCaseLabels: false IndentWidth: 4 IndentWrappedFunctionNames: false KeepEmptyLinesAtTheStartOfBlocks: true MacroBlockBegin: '' MacroBlockEnd: '' MaxEmptyLinesToKeep: 1 NamespaceIndentation: None ObjCBlockIndentWidth: 2 ObjCSpaceAfterProperty: false ObjCSpaceBeforeProtocolList: true PenaltyBreakBeforeFirstCallParameter: 19 PenaltyBreakComment: 300 PenaltyBreakFirstLessLess: 120 PenaltyBreakString: 1000 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 60 PointerAlignment: Right ReflowComments: true SortIncludes: true SpaceAfterCStyleCast: false SpaceBeforeAssignmentOperators: true SpaceBeforeParens: ControlStatements SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 1 SpacesInAngles: false SpacesInContainerLiterals: true SpacesInCStyleCastParentheses: false SpacesInParentheses: false SpacesInSquareBrackets: false Standard: Cpp11 TabWidth: 8 UseTab: Never ... compute-runtime-20.13.16352/.clang-tidy000066400000000000000000000034511363734646600174140ustar00rootroot00000000000000--- Checks: 'clang-diagnostic-*,clang-analyzer-*,google-default-arguments,readability-identifier-naming,modernize-use-override,modernize-use-default-member-init,-clang-analyzer-alpha*' # WarningsAsErrors: '.*' HeaderFilterRegex: '/runtime/|/core/|/offline_compiler/' AnalyzeTemporaryDtors: false CheckOptions: - key: google-readability-braces-around-statements.ShortStatementLines value: '1' - key: google-readability-function-size.StatementThreshold value: '800' - key: google-readability-namespace-comments.ShortNamespaceLines value: '10' - key: google-readability-namespace-comments.SpacesBeforeComments value: '2' - key: readability-identifier-naming.MethodCase value: camelBack - key: readability-identifier-naming.ParameterCase value: camelBack - key: readability-identifier-naming.StructMemberCase value: camelBack - key: readability-identifier-naming.ClassMemberCase value: camelBack - key: readability-identifier-naming.ClassMethodCase value: camelBack - key: modernize-loop-convert.MaxCopySize value: '16' - key: modernize-loop-convert.MinConfidence value: reasonable - key: modernize-loop-convert.NamingStyle value: CamelCase - key: modernize-pass-by-value.IncludeStyle value: llvm - key: modernize-replace-auto-ptr.IncludeStyle value: llvm - key: modernize-use-nullptr.NullMacros value: 'NULL' - key: modernize-use-default-member-init.UseAssignment value: '1' ... compute-runtime-20.13.16352/.ctags000066400000000000000000000000461363734646600164570ustar00rootroot00000000000000-R -h .inl.h --langmap=c++:.inl.cpp.h compute-runtime-20.13.16352/.gitattributes000066400000000000000000000000551363734646600202500ustar00rootroot00000000000000manifests/manifest.yml filter=repo_converter compute-runtime-20.13.16352/.gitignore000066400000000000000000000000261363734646600173430ustar00rootroot00000000000000build/* build_linux/* compute-runtime-20.13.16352/.gitreview000066400000000000000000000001221363734646600173560ustar00rootroot00000000000000[gerrit] defaultremote=origin defaultbranch=master track=true defaultrebase=false compute-runtime-20.13.16352/.travis.yml000066400000000000000000000042301363734646600174650ustar00rootroot00000000000000language: c compiler: - gcc sudo: required services: - docker git: depth: false notifications: email: recipients: - compute-runtime@intel.com matrix: include: - os: linux compiler: gcc env: BUILD_OS=ubuntu-16.04 COMPILER=gcc GEN=gen-8 - os: linux compiler: gcc env: BUILD_OS=ubuntu-16.04 COMPILER=gcc GEN=gen-9 - os: linux compiler: gcc env: BUILD_OS=ubuntu-16.04 COMPILER=gcc GEN=gen-11 - os: linux compiler: gcc env: BUILD_OS=ubuntu-16.04 COMPILER=gcc GEN=gen-12 - os: linux compiler: clang env: BUILD_OS=ubuntu-18.04 COMPILER=clang-8 - os: linux compiler: gcc env: BUILD_OS=ubuntu-18.04 COMPILER=gcc GEN=gen-8 - os: linux compiler: gcc env: BUILD_OS=ubuntu-18.04 COMPILER=gcc GEN=gen-9 - os: linux compiler: gcc env: BUILD_OS=ubuntu-18.04 COMPILER=gcc GEN=gen-11 - os: linux compiler: gcc env: BUILD_OS=ubuntu-18.04 COMPILER=gcc GEN=gen-12 - os: linux compiler: clang env: BUILD_OS=ubuntu-19.10 COMPILER=clang - os: linux compiler: gcc env: BUILD_OS=ubuntu-19.10 COMPILER=gcc GEN=gen-8 - os: linux compiler: gcc env: BUILD_OS=ubuntu-19.10 COMPILER=gcc GEN=gen-9 - os: linux compiler: gcc env: BUILD_OS=ubuntu-19.10 COMPILER=gcc GEN=gen-11 - os: linux compiler: gcc env: BUILD_OS=ubuntu-19.10 COMPILER=gcc GEN=gen-12 - os: linux compiler: clang env: BUILD_OS=ubuntu-20.04 COMPILER=clang - os: linux compiler: gcc env: BUILD_OS=ubuntu-20.04 COMPILER=gcc GEN=gen-8 - os: linux compiler: gcc env: BUILD_OS=ubuntu-20.04 COMPILER=gcc GEN=gen-9 - os: linux compiler: gcc env: BUILD_OS=ubuntu-20.04 COMPILER=gcc GEN=gen-11 - os: linux compiler: gcc env: BUILD_OS=ubuntu-20.04 COMPILER=gcc GEN=gen-12 script: ./scripts/run-build.sh compute-runtime-20.13.16352/BUILD.md000066400000000000000000000034351363734646600165430ustar00rootroot00000000000000# Building NEO driver Instructions have been tested on Ubuntu* and CentOS*. They assume a clean installation of a stable version. 1. Download & install required packages Example (Ubuntu): ```shell sudo apt-get install cmake g++ git pkg-config ``` Example (CentOS): ```shell sudo dnf install gcc-c++ cmake git make ``` See [LIMITATIONS.md](https://github.com/intel/compute-runtime/blob/master/opencl/doc/LIMITATIONS.md) for other requirements and dependencies, when building and installing NEO. 2. Install required dependencies Neo requires: - [Intel(R) Graphics Compiler for OpenCL(TM)](https://github.com/intel/intel-graphics-compiler) - [Intel(R) Graphics Memory Management](https://github.com/intel/gmmlib) Please visit their repositories for building and instalation instructions or use prebuilt packages: - Ubuntu: [launchpad repository](https://launchpad.net/~intel-opencl/+archive/ubuntu/intel-opencl) - libigc-dev - libigdgmm-dev - CentOS: [copr repository](https://copr.fedorainfracloud.org/coprs/jdanecki/intel-opencl) - intel-igc-opencl-devel - intel-gmmlib-devel Use versions compatible with selected [Neo release](https://github.com/intel/compute-runtime/releases). 3. Create workspace folder and download sources: Example: ```shell mkdir workspace cd workspace git clone https://github.com/intel/compute-runtime neo ``` 4. Create folder for build: Example: ```shell mkdir build ``` 5. (Optional) Enabling additional extensions * [cl_intel_va_api_media_sharing](https://github.com/intel/compute-runtime/blob/master/opencl/doc/cl_intel_va_api_media_sharing.md) 6. Build and install Example: ```shell cd build cmake -DCMAKE_BUILD_TYPE=Release -DSKIP_UNIT_TESTS=1 ../neo make -j`nproc` sudo make install ``` ___(*) Other names and brands may be claimed as property of others.___ compute-runtime-20.13.16352/CMakeLists.txt000066400000000000000000001067541363734646600201320ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) cmake_minimum_required(VERSION 3.13.0 FATAL_ERROR) else() cmake_minimum_required(VERSION 3.2.0 FATAL_ERROR) endif() include(ExternalProject) # Include custom configs file(GLOB custom_configs *custom_config.cmake) foreach(config ${custom_configs}) include(${config}) endforeach() # Include needed macros include(common_macros.cmake) if(MSVC) set(CMAKE_GENERATOR_TOOLSET "host=x64") endif() if(NOT PROJECT_NAME) set(PROJECT_NAME igdrcl) endif() project(${PROJECT_NAME}) if(CMAKE_CONFIGURATION_TYPES) # Set custom build types for multi-configuration generators set(CMAKE_CONFIGURATION_TYPES "Release;ReleaseInternal;Debug") if(DEFINED CMAKE_BUILD_TYPE) list(APPEND CMAKE_CONFIGURATION_TYPES ${CMAKE_BUILD_TYPE}) endif() list(REMOVE_DUPLICATES CMAKE_CONFIGURATION_TYPES) endif() include(GNUInstallDirs) # define global property where we will collect component list to package define_property(GLOBAL PROPERTY NEO_OCL_COMPONENTS_LIST BRIEF_DOCS "List of components" FULL_DOCS "List of components to create packages") set(GLOBAL PROPERTY NEO_OCL_COMPONENTS_LIST "") define_property(GLOBAL PROPERTY NEO_L0_COMPONENTS_LIST BRIEF_DOCS "List of components" FULL_DOCS "List of components to create packages") set(GLOBAL PROPERTY NEO_L0_COMPONENTS_LIST "") set(BRANCH_TYPE "") set(BRANCH_DIR_SUFFIX "/${BRANCH_TYPE}") if(TR_DEPRECATED) add_definitions(-D_SILENCE_TR1_NAMESPACE_DEPRECATION_WARNING=1) endif(TR_DEPRECATED) if(RELEASE_WITH_REGKEYS) message(STATUS "Enabling RegKey reading in release build") add_definitions(-D_RELEASE_BUILD_WITH_REGKEYS) endif(RELEASE_WITH_REGKEYS) if(DEFINED NEO_EXTRA_DEFINES) string(REPLACE "," ";" NEO_EXTRA_DEFINES "${NEO_EXTRA_DEFINES}") foreach(_tmp ${NEO_EXTRA_DEFINES}) message(STATUS "adding extra define: ${_tmp}") add_definitions(-D${_tmp}) endforeach() endif() if(NOT CMAKE_BUILD_TYPE) message(STATUS "CMAKE_BUILD_TYPE not specified, using Release") set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Build type: [Release, ReleaseInternal, Debug]" FORCE) endif() add_definitions(-DCL_USE_DEPRECATED_OPENCL_1_1_APIS -DCL_USE_DEPRECATED_OPENCL_1_2_APIS -DCL_USE_DEPRECATED_OPENCL_2_0_APIS) if(WIN32) add_definitions(-D_CRT_SECURE_NO_WARNINGS -DWDDM_VERSION_NUMBER=23 -DNOMINMAX) endif() set(CMAKE_C_FLAGS_RELEASEINTERNAL "${CMAKE_C_FLAGS_RELEASE}") set(CMAKE_CXX_FLAGS_RELEASEINTERNAL "${CMAKE_CXX_FLAGS_RELEASE}") set(CMAKE_SHARED_LINKER_FLAGS_RELEASEINTERNAL "${CMAKE_SHARED_LINKER_FLAGS_RELEASE}") set(CMAKE_EXE_LINKER_FLAGS_RELEASEINTERNAL "${CMAKE_EXE_LINKER_FLAGS_RELEASE}") if(MSVC AND ENABLE_VS_FASTLINK) string(APPEND CMAKE_EXE_LINKER_FLAGS_DEBUG " /debug:FASTLINK") string(APPEND CMAKE_SHARED_LINKER_FLAGS_DEBUG " /debug:FASTLINK") string(APPEND CMAKE_STATIC_LINKER_FLAGS_DEBUG " /debug:FASTLINK") string(APPEND CMAKE_MODULE_LINKER_FLAGS_DEBUG " /debug:FASTLINK") endif() string(TOLOWER "${CMAKE_BUILD_TYPE}" BUILD_TYPE_lower) if("${BUILD_TYPE_lower}" STREQUAL "releaseinternal") add_definitions(-D_RELEASE_INTERNAL) endif("${BUILD_TYPE_lower}" STREQUAL "releaseinternal") message(STATUS "${CMAKE_BUILD_TYPE} build configuration") # Set the runtime source directory if(NOT DEFINED NEO_SOURCE_DIR) set(NEO_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) endif() set(NEO_RUNTIME_SUB_DIR "opencl/source") set(NEO_RUNTIME_SOURCE_DIR "${NEO_SOURCE_DIR}/${NEO_RUNTIME_SUB_DIR}") set(NEO_RUNTIME_TESTS_SUB_DIR "opencl/test/unit_test") set(NEO_RUNTIME_TESTS_DIR "${NEO_SOURCE_DIR}/${NEO_RUNTIME_TESTS_SUB_DIR}") # Set the ocloc directory if(NOT DEFINED OCLOC_DIRECTORY) set(OCLOC_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/shared/offline_compiler) endif() # Set the shared source directory if(NOT DEFINED NEO_SHARED_DIRECTORY) set(NEO_SHARED_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/shared/source) endif() # Set the shared test directory if(NOT DEFINED NEO_SHARED_TEST_DIRECTORY) set(NEO_SHARED_TEST_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/shared/test) endif() if(SKIP_UNIT_TESTS) set(SKIP_NEO_UNIT_TESTS TRUE) endif() # Set our build directory if(NOT DEFINED NEO_BUILD_DIR) set(NEO_BUILD_DIR ${CMAKE_BINARY_DIR}) endif() if(NOT NEO_BINARY_DIR) set(NEO_BINARY_DIR ${CMAKE_BINARY_DIR}) endif() # we use c++14 set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED ON) # we force using response files set(CMAKE_NINJA_FORCE_RESPONSE_FILE 1) # set output paths set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${NEO_BINARY_DIR}/bin) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${NEO_BINARY_DIR}/bin) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${NEO_BINARY_DIR}/lib) # determine Neo version include(version.cmake) # We want to build with the static, multithreaded runtime libraries (as opposed # to the multithreaded runtime DLLs) if(MSVC) string(REPLACE "/MDd" "/MTd" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELEASEINTERNAL "${CMAKE_CXX_FLAGS_RELEASEINTERNAL}") endif(MSVC) if(CMAKE_SIZEOF_VOID_P EQUAL 8) set(NEO_BITS "64") set(NEO_ARCH "x64") else() set(NEO_BITS "32") set(NEO_ARCH "x86") endif() if(NOT SKIP_UNIT_TESTS) if(NOT DEFINED GTEST_REPEAT) set(GTEST_REPEAT 1) endif() message(STATUS "GTest repeat count set to ${GTEST_REPEAT}") if(NOT DEFINED GTEST_SHUFFLE) set(GTEST_SHUFFLE --gtest_shuffle --gtest_random_seed=0) endif() message(STATUS "GTest shuffle set to ${GTEST_SHUFFLE}") endif() if(NOT KMDAF_HEADERS_DIR) if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../kmdaf/inc/common/kmDaf.h") get_filename_component(KMDAF_HEADERS_DIR "../kmdaf/inc/common/" ABSOLUTE) message(STATUS "KM-DAF headers dir: ${KMDAF_HEADERS_DIR}") endif() endif(NOT KMDAF_HEADERS_DIR) if(NOT DEFINED SOURCE_LEVEL_DEBUGGER_HEADERS_DIR) get_filename_component(SOURCE_LEVEL_DEBUGGER_HEADERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/third_party/source_level_debugger/" ABSOLUTE) message(STATUS "Source Level Debugger headers dir: ${SOURCE_LEVEL_DEBUGGER_HEADERS_DIR}") endif() get_filename_component(AUB_STREAM_HEADERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/third_party${BRANCH_DIR_SUFFIX}/aub_stream/headers" ABSOLUTE) if(IS_DIRECTORY ${AUB_STREAM_HEADERS_DIR}) message(STATUS "Aub Stream Headers dir: ${AUB_STREAM_HEADERS_DIR}") else() message(FATAL_ERROR "Aub Stream headers not available!") endif() if(NOT DEFINED AUB_STREAM_DIR) get_filename_component(TEMP_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../aub_stream/aub_mem_dump" ABSOLUTE) if(IS_DIRECTORY ${TEMP_DIR}) set(AUB_STREAM_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../aub_stream") message(STATUS "Aub Stream dir: ${AUB_STREAM_DIR}") endif() endif() if(DISABLE_AUB_STREAM) unset(AUB_STREAM_DIR) endif() get_filename_component(I915_INCLUDES_DIR "${NEO_SOURCE_DIR}/third_party${BRANCH_DIR_SUFFIX}/uapi" ABSOLUTE) message(STATUS "i915 includes dir: ${I915_INCLUDES_DIR}") if(NOT DEFINED KHRONOS_HEADERS_DIR) get_filename_component(DIR_tmp "${CMAKE_CURRENT_SOURCE_DIR}/third_party/opencl_headers" ABSOLUTE) if(IS_DIRECTORY ${DIR_tmp}) set(KHRONOS_HEADERS_DIR ${DIR_tmp}) add_definitions(-DCL_TARGET_OPENCL_VERSION=220) else() message(FATAL_ERROR "Khronos OpenCL headers not available!") endif() endif() message(STATUS "Khronos OpenCL headers dir: ${KHRONOS_HEADERS_DIR}") set(OCL_HEADERS_DIR ${KHRONOS_HEADERS_DIR}) if(NOT DEFINED KHRONOS_GL_HEADERS_DIR) get_filename_component(GL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/third_party/opengl_headers" ABSOLUTE) if(IS_DIRECTORY ${GL_DIR}) set(KHRONOS_GL_HEADERS_DIR ${GL_DIR}) add_definitions(-DGL_TARGET_OPENGL_VERSION=210) endif() endif() message(STATUS "Khronos OpenGL headers dir: ${KHRONOS_GL_HEADERS_DIR}") if(NOT THIRD_PARTY_DIR) get_filename_component(THIRD_PARTY_DIR "../third_party/" ABSOLUTE) endif(NOT THIRD_PARTY_DIR) message(STATUS "Third party dir: ${THIRD_PARTY_DIR}") if(WIN32) # get WDK location and version to use if(NOT WDK_DIR) if(IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/../wdk") get_filename_component(WDK_DIR "../wdk" ABSOLUTE) endif() endif(NOT WDK_DIR) if(WDK_DIR) if(IS_DIRECTORY "${WDK_DIR}/Win15") get_filename_component(WDK_DIR "${WDK_DIR}/Win15" ABSOLUTE) endif() endif() message(STATUS "WDK Directory: ${WDK_DIR}") if(NOT WDK_VERSION) # Get WDK version from ${WDK_DIR}/WDKVersion.txt file(READ "${WDK_DIR}/WDKVersion.txt" WindowsTargetPlatformVersion) string(REPLACE " " ";" WindowsTargetPlatformVersion ${WindowsTargetPlatformVersion}) list(LENGTH WindowsTargetPlatformVersion versionListLength) if(NOT versionListLength EQUAL 3) message(ERROR "Error reading content of WDKVersion.txt file") endif(NOT versionListLength EQUAL 3) list(GET WindowsTargetPlatformVersion 2 WindowsTargetPlatformVersion) else() set(WindowsTargetPlatformVersion ${WDK_VERSION}) endif() message(STATUS "WDK Version is ${WindowsTargetPlatformVersion}") if(${WindowsTargetPlatformVersion} VERSION_LESS "10.0.18328.0") set(CONST_FROM_WDK_10_0_18328_0) else() set(CONST_FROM_WDK_10_0_18328_0 "CONST") endif() add_compile_options(-DCONST_FROM_WDK_10_0_18328_0=${CONST_FROM_WDK_10_0_18328_0}) set(WDK_INCLUDE_PATHS "${WDK_DIR}/Include/${WindowsTargetPlatformVersion}/um" "${WDK_DIR}/Include/${WindowsTargetPlatformVersion}/shared" "${WDK_DIR}/Include/${WindowsTargetPlatformVersion}/km" ) message(STATUS "WDK include paths: ${WDK_INCLUDE_PATHS}") endif() # Intel Graphics Compiler detection if(NOT IGC__IGC_TARGETS) # check whether igc is part of workspace if(DEFINED IGC_DIR) get_filename_component(IGC_DIR "${IGC_DIR}" ABSOLUTE) else() get_filename_component(IGC_DIR_tmp "${NEO_SOURCE_DIR}/../igc" ABSOLUTE) if(IS_DIRECTORY "${IGC_DIR_tmp}") set(IGC_DIR "${IGC_DIR_tmp}") endif() endif() if(UNIX) # on Unix-like use pkg-config find_package(PkgConfig) if(DEFINED IGC_DIR) if(IS_DIRECTORY "${IGC_DIR}/lib/pkgconfig/") set(__tmp_LIBDIR "lib") elseif(IS_DIRECTORY "${IGC_DIR}/${CMAKE_INSTALL_LIBDIR}/pkgconfig/") set(__tmp_LIBDIR ${CMAKE_INSTALL_LIBDIR}) endif() endif() if(DEFINED __tmp_LIBDIR) set(OLD_PKG_CONFIG_PATH $ENV{PKG_CONFIG_PATH}) set(ENV{PKG_CONFIG_PATH} "${IGC_DIR}/${__tmp_LIBDIR}/pkgconfig") endif() pkg_check_modules(NEO__IGC igc-opencl) if(DEFINED __tmp_LIBDIR) set(ENV{PKG_CONFIG_PATH} ${OLD_PKG_CONFIG_PATH}) set(NEO__IGC_LIBRARY_PATH "${IGC_DIR}/${__tmp_LIBDIR}/") endif() if(NEO__IGC_FOUND) if(DEFINED IGC_DIR AND IS_DIRECTORY "${IGC_DIR}/${__tmp_LIBDIR}/pkgconfig/") string(REPLACE "${NEO__IGC_INCLUDEDIR}" "${IGC_DIR}/include/igc" NEO__IGC_INCLUDE_DIRS "${NEO__IGC_INCLUDE_DIRS}") endif() set(NEO__IGC_INCLUDE_DIR ${NEO__IGC_INCLUDE_DIRS}) message(STATUS "IGC include dirs: ${NEO__IGC_INCLUDE_DIR}") endif() if(DEFINED __tmp_LIBDIR) unset(__tmp_LIBDIR) endif() endif() if(NEO__IGC_FOUND) # do nothing elseif(EXISTS "${IGC_DIR}/CMakeLists.txt") message(STATUS "IGC source dir is: ${IGC_DIR}") set(IGC_OPTION__OUTPUT_DIR "${NEO_BUILD_DIR}/igc") set(IGC_OPTION__INCLUDE_IGC_COMPILER_TOOLS OFF) add_subdirectory_unique("${IGC_DIR}" "${NEO_BUILD_DIR}/igc" EXCLUDE_FROM_ALL) set(NEO__IGC_TARGETS "${IGC__IGC_TARGETS}") foreach(TARGET_tmp ${NEO__IGC_TARGETS}) list(APPEND NEO__IGC_INCLUDE_DIR $) list(APPEND NEO__IGC_COMPILE_DEFINITIONS $) endforeach(TARGET_tmp) message(STATUS "IGC targets: ${NEO__IGC_TARGETS}") else() message(FATAL_ERROR "Intel Graphics Compiler not found!") endif() else() set(NEO__IGC_TARGETS "${IGC__IGC_TARGETS}") foreach(TARGET_tmp ${NEO__IGC_TARGETS}) list(APPEND NEO__IGC_INCLUDE_DIR $) list(APPEND NEO__IGC_LIBRARY_PATH $) endforeach(TARGET_tmp) string(REPLACE ";" ":" NEO__IGC_LIBRARY_PATH "${NEO__IGC_LIBRARY_PATH}") message(STATUS "IGC targets: ${NEO__IGC_TARGETS}") endif() # VISA headers - always relative to IGC if(IS_DIRECTORY "${IGC_DIR}/../visa") get_filename_component(VISA_DIR "${IGC_DIR}/../visa" ABSOLUTE) elseif(IS_DIRECTORY "${IGC_DIR}/visa") set(VISA_DIR "${IGC_DIR}/visa") elseif(IS_DIRECTORY "${IGC_DIR}/include/visa") set(VISA_DIR "${IGC_DIR}/include/visa") elseif(IS_DIRECTORY "${NEO__IGC_INCLUDEDIR}/../visa") get_filename_component(VISA_DIR "${NEO__IGC_INCLUDEDIR}/../visa" ABSOLUTE) elseif(IS_DIRECTORY "${IGC_OCL_ADAPTOR_DIR}/../../visa") get_filename_component(VISA_DIR "${IGC_OCL_ADAPTOR_DIR}/../../visa" ABSOLUTE) endif() message(STATUS "VISA Dir: ${VISA_DIR}") if(IS_DIRECTORY "${VISA_DIR}/include") set(VISA_INCLUDE_DIR "${VISA_DIR}/include") else() set(VISA_INCLUDE_DIR "${VISA_DIR}") endif() # IGA headers - always relative to VISA if(IS_DIRECTORY "${VISA_DIR}/../iga") get_filename_component(IGA_DIR "${VISA_DIR}/../iga" ABSOLUTE) elseif(IS_DIRECTORY "${VISA_DIR}/iga") set(IGA_DIR "${VISA_DIR}/iga") endif() if(IS_DIRECTORY "${IGA_DIR}/IGALibrary/api") set(IGA_INCLUDE_DIR "${IGA_DIR}/IGALibrary/api") else() set(IGA_INCLUDE_DIR "${IGA_DIR}") endif() if(IS_DIRECTORY ${IGA_INCLUDE_DIR}) set(IGA_HEADERS_AVAILABLE TRUE) set(IGA_LIBRARY_NAME "iga${NEO_BITS}") else() set(IGA_HEADERS_AVAILABLE FALSE) endif() message(STATUS "IGA Includes dir: ${IGA_INCLUDE_DIR}") if(WIN32) set(IGC_LIBRARY_NAME "igc${NEO_BITS}") set(FCL_LIBRARY_NAME "igdfcl${NEO_BITS}") endif() # GmmLib detection if(TARGET igfx_gmmumd_dll) set(GMM_TARGET_NAME "igfx_gmmumd_dll") set(GMM_LINK_NAME $) set(NEO__GMM_LIBRARY_PATH $) set(NEO__GMM_INCLUDE_DIR $) else() if(DEFINED GMM_DIR) get_filename_component(GMM_DIR "${GMM_DIR}" ABSOLUTE) else() get_filename_component(GMM_DIR_tmp "${NEO_SOURCE_DIR}/../gmmlib" ABSOLUTE) if(IS_DIRECTORY "${GMM_DIR_tmp}") set(GMM_DIR "${GMM_DIR_tmp}") endif() endif() if(UNIX) if(DEFINED GMM_DIR) if(IS_DIRECTORY "${GMM_DIR}/lib/pkgconfig/") set(__tmp_LIBDIR "lib") elseif(IS_DIRECTORY "${GMM_DIR}/${CMAKE_INSTALL_LIBDIR}/pkgconfig/") set(__tmp_LIBDIR ${CMAKE_INSTALL_LIBDIR}) endif() endif() find_package(PkgConfig) if(DEFINED __tmp_LIBDIR) set(OLD_PKG_CONFIG_PATH $ENV{PKG_CONFIG_PATH}) set(ENV{PKG_CONFIG_PATH} "${GMM_DIR}/${__tmp_LIBDIR}/pkgconfig/") endif() pkg_check_modules(NEO__GMM igdgmm) if(DEFINED __tmp_LIBDIR) set(ENV{PKG_CONFIG_PATH} ${OLD_PKG_CONFIG_PATH}) endif() if(NEO__GMM_FOUND) if(DEFINED __tmp_LIBDIR) string(REPLACE "${NEO__GMM_INCLUDEDIR}" "${GMM_DIR}/include/igdgmm" NEO__GMM_INCLUDE_DIRS "${NEO__GMM_INCLUDE_DIRS}") string(REPLACE "${NEO__GMM_LIBDIR}" "${GMM_DIR}/${__tmp_LIBDIR}" NEO__GMM_LIBDIR "${NEO__GMM_LIBDIR}") set(NEO__GMM_LIBRARY_PATH "${NEO__GMM_LIBDIR}") endif() set(GMM_TARGET_NAME "igfx_gmmumd_dll") set(GMM_LINK_NAME ${NEO__GMM_LIBRARIES}) set(NEO__GMM_INCLUDE_DIR ${NEO__GMM_INCLUDE_DIRS}) message(STATUS "GmmLib include dirs: ${NEO__GMM_INCLUDE_DIR}") else() message(FATAL_ERROR "GmmLib not found!") endif() if(DEFINED __tmp_LIBDIR) unset(__tmp_LIBDIR) endif() else() if(EXISTS "${GMM_DIR}/CMakeLists.txt") message(STATUS "GmmLib source dir is: ${GMM_DIR}") add_subdirectory_unique("${GMM_DIR}" "${NEO_BUILD_DIR}/gmmlib") if(NOT DEFINED GMM_TARGET_NAME) set(GMM_TARGET_NAME "igfx_gmmumd_dll") endif() set(NEO__GMM_INCLUDE_DIR $) set(NEO__GMM_LIBRARY_PATH $) set(GMM_LINK_NAME $) else() message(FATAL_ERROR "GmmLib not found!") endif() endif() endif() macro(copy_gmm_dll_for target) if(NOT UNIX) add_custom_command( TARGET ${target} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ $ ) endif() endmacro() link_directories(${NEO__GMM_LIBRARY_PATH}) # Instrumentation detection if(NOT DEFINED INSTRUMENTATION_LIB_NAME) if(DEFINED INSTRUMENTATION_SOURCE_DIR) get_filename_component(INSTRUMENTATION_SOURCE_DIR "${INSTRUMENTATION_SOURCE_DIR}" ABSOLUTE) else() get_filename_component(INSTRUMENTATION_SOURCE_DIR_tmp "${NEO_SOURCE_DIR}/../instrumentation" ABSOLUTE) if(IS_DIRECTORY "${INSTRUMENTATION_SOURCE_DIR_tmp}") set(INSTRUMENTATION_SOURCE_DIR "${INSTRUMENTATION_SOURCE_DIR_tmp}") endif() endif() if(IS_DIRECTORY "${INSTRUMENTATION_SOURCE_DIR}") message(STATUS "Instrumentation source dir is: ${INSTRUMENTATION_SOURCE_DIR}") set(INSTRUMENTATION_INCLUDE_PATHS ${INSTRUMENTATION_SOURCE_DIR}/inc) set(HAVE_INSTRUMENTATION TRUE) else() message(STATUS "Instrumentation source dir not found") set(HAVE_INSTRUMENTATION FALSE) endif() else() if(TARGET ${INSTRUMENTATION_LIB_NAME}) set(INSTRUMENTATION_INCLUDE_PATHS $) set(HAVE_INSTRUMENTATION TRUE) else() message(STATUS "Instrumentation target not found. ${INSTRUMENTATION_LIB_NAME} is not a valid target") set(HAVE_INSTRUMENTATION FALSE) endif() endif() if(NOT DISABLE_LIBVA) # LibVA detection if(UNIX) set(NEO__LIBVA_IS_REQUIRED "") if(IGDRCL_FORCE_USE_LIBVA) set(NEO__LIBVA_IS_REQUIRED "REQUIRED") endif() if(DEFINED LIBVA_SOURCE_DIR) get_filename_component(LIBVA_SOURCE_DIR "${LIBVA_SOURCE_DIR}" ABSOLUTE) else() get_filename_component(LIBVA_SOURCE_DIR_tmp "${NEO_SOURCE_DIR}/../libva" ABSOLUTE) if(IS_DIRECTORY "${LIBVA_SOURCE_DIR_tmp}") set(LIBVA_SOURCE_DIR "${LIBVA_SOURCE_DIR_tmp}") endif() endif() find_package(PkgConfig) if(DEFINED LIBVA_SOURCE_DIR AND IS_DIRECTORY "${LIBVA_SOURCE_DIR}/lib/pkgconfig/") set(OLD_PKG_CONFIG_PATH $ENV{PKG_CONFIG_PATH}) set(ENV{PKG_CONFIG_PATH} "${LIBVA_SOURCE_DIR}/lib/pkgconfig/") endif() pkg_check_modules(NEO__LIBVA ${NEO__LIBVA_IS_REQUIRED} libva>=1.0.0) include(CheckLibraryExists) if(DEFINED LIBVA_SOURCE_DIR AND IS_DIRECTORY "${LIBVA_SOURCE_DIR}/lib/pkgconfig/") set(ENV{PKG_CONFIG_PATH} ${OLD_PKG_CONFIG_PATH}) endif() if(NEO__LIBVA_FOUND) CHECK_LIBRARY_EXISTS(va vaGetLibFunc ${NEO__LIBVA_LIBDIR} HAVE_VAGETLIBFUNC) add_definitions(-DLIBVA) message(STATUS "Using libva ") if(DEFINED LIBVA_SOURCE_DIR AND IS_DIRECTORY "${LIBVA_SOURCE_DIR}/lib/pkgconfig/") string(REPLACE "${NEO__LIBVA_INCLUDEDIR}" "${LIBVA_SOURCE_DIR}/include" NEO__LIBVA_INCLUDE_DIRS "${NEO__LIBVA_INCLUDE_DIRS}") string(REPLACE "${NEO__LIBVA_LIBDIR}" "${LIBVA_SOURCE_DIR}/lib" NEO__LIBVA_LIBDIR "${NEO__LIBVA_LIBDIR}") set(NEO__LIBVA_LIBRARY_PATH "${NEO__LIBVA_LIBDIR}") endif() list(LENGTH NEO__LIBVA_INCLUDE_DIRS __len) if(__len GREATER 0) set(NEO__LIBVA_INCLUDE_DIR ${NEO__LIBVA_INCLUDE_DIRS}) include_directories("${NEO__LIBVA_INCLUDE_DIR}") message(STATUS "LibVA include dirs: ${NEO__LIBVA_INCLUDE_DIR}") endif() endif() endif() endif() set(SLD_LIBRARY_NAME "igfxdbgxchg") if(NEO_BITS STREQUAL "64") set(SLD_LIBRARY_NAME "${SLD_LIBRARY_NAME}${NEO_BITS}") endif() add_subdirectory_unique(third_party/gtest) message(STATUS "AUB_STREAM_DIR = ${AUB_STREAM_DIR}" ) if(DEFINED AUB_STREAM_DIR) set(AUB_STREAM_PROJECT_NAME "aub_stream") add_subdirectory_unique(${AUB_STREAM_DIR} ${CMAKE_BINARY_DIR}/aub_stream EXCLUDE_FROM_ALL) set(ENGINE_NODE_DIR ${AUB_STREAM_DIR}${BRANCH_DIR_SUFFIX}/headers) else() set(ENGINE_NODE_DIR ${AUB_STREAM_HEADERS_DIR}) endif() message(STATUS "Engine node dir: ${ENGINE_NODE_DIR}") add_definitions(-DGMM_OCL) # We want to organize our IDE targets into folders set_property(GLOBAL PROPERTY USE_FOLDERS ON) # Get available platfroms include(platforms.cmake) # Enable/Disable BuiltIns compilation during build set(COMPILE_BUILT_INS TRUE CACHE BOOL "Enable built-in kernels compilation") # Changing the default executable and library output directories set(IGDRCL_OUTPUT_DIR "${IGDRCL_OPTION__OUTPUT_DIR}") # do not add rpath set(CMAKE_SKIP_RPATH YES CACHE BOOL "" FORCE ) set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_DEBUG") option(APPVERIFIER_ALLOWED "allow use of AppVerifier" OFF) option(CCACHE_ALLOWED "allow use of ccache" TRUE) find_program(CCACHE_EXE_FOUND ccache) if(CCACHE_EXE_FOUND AND CCACHE_ALLOWED) message(STATUS "Found ccache: ${CCACHE_EXE_FOUND}") set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache) set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache) endif() # Miscs options option(IGDRCL_GCOV "generate gcov report" OFF) option(USE_CL_CACHE "Use OpenCL program binary cache" ON) set(CL_CACHE_LOCATION "cl_cache" CACHE STRING "OpenCL program binary cache location") # Put profiling enable flag into define if(KMD_PROFILING) add_definitions(-DKMD_PROFILING=${KMD_PROFILING}) endif() if(MSVC) # Force to treat warnings as errors if(NOT CMAKE_CXX_FLAGS MATCHES "/WX") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /WX") if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /we4189") endif() endif() # Disable generating manifest set(LINKER_FLAGS "/MANIFEST:NO") # Support for WUD set(CMAKE_CXX_STANDARD_LIBRARIES "onecore.lib") foreach(IT kernel32.lib;user32.lib;gdi32.lib;advapi32.lib;ole32.lib;) set(LINKER_FLAGS "${LINKER_FLAGS} /NODEFAULTLIB:${IT}") endforeach() set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${LINKER_FLAGS}") set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${LINKER_FLAGS}") set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${LINKER_FLAGS}") add_definitions(-DUNICODE -D_UNICODE) link_directories("${WDK_DIR}/Lib/${WindowsTargetPlatformVersion}/um/${NEO_ARCH}/") else() if(IGDRCL_GCOV) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage --coverage") endif() option(USE_ASAN "Link with address sanitization support" OFF) if(USE_ASAN) if(CMAKE_COMPILER_IS_GNUCC) set(ASAN_FLAGS " -fsanitize=address -fno-omit-frame-pointer -DSANITIZER_BUILD") set(ASAN_LIBS "asan") else() message(STATUS "Address sanitization with clang not yet support") endif() endif() if(USE_TSAN) if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") set(TSAN_FLAGS " -fsanitize=thread -DSANITIZER_BUILD") set(TSAN_LIBS "tsan") else() message(STATUS "Thread sanitization with gcc is not fully supported") endif() endif() endif() # setup variables needed for custom configuration type # generate PDB files even for release build on MSVC if(MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zi") set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF") set(CMAKE_CXX_FLAGS_RELEASEINTERNAL "${CMAKE_CXX_FLAGS_RELEASEINTERNAL} /Zi") set(CMAKE_SHARED_LINKER_FLAGS_RELEASEINTERNAL "${CMAKE_SHARED_LINKER_FLAGS_RELEASEINTERNAL} /DEBUG /OPT:REF /OPT:ICF") if(NO_PDB) string(REGEX REPLACE "/Zi" "" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") string(REGEX REPLACE "/Zi" "" CMAKE_CXX_FLAGS_RELEASEINTERNAL "${CMAKE_CXX_FLAGS_RELEASEINTERNAL}") string(REGEX REPLACE "/Zi" "/Z7" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") endif() endif() # spectre mitigation include(CheckCXXCompilerFlag) if(MSVC) check_cxx_compiler_flag(/Qspectre COMPILER_SUPPORTS_QSPECTRE) check_cxx_compiler_flag(/d2guardspecload COMPILER_SUPPORTS_D2GUARDSPECLOAD) if(COMPILER_SUPPORTS_QSPECTRE) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qspectre") elseif(COMPILER_SUPPORTS_D2GUARDSPECLOAD) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /d2guardspecload") else() message(WARNING "Spectre mitigation is not supported by the compiler") endif() else() if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") check_cxx_compiler_flag(-mretpoline COMPILER_SUPPORTS_RETPOLINE) if(COMPILER_SUPPORTS_RETPOLINE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mretpoline") else() message(WARNING "Spectre mitigation -mretpoline flag is not supported by the compiler") endif() else() check_cxx_compiler_flag(-mindirect-branch=thunk COMPILER_SUPPORTS_INDIRECT_BRANCH_THUNK) if(COMPILER_SUPPORTS_INDIRECT_BRANCH_THUNK) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mindirect-branch=thunk") else() message(WARNING "Spectre mitigation -mindirect-branch=thunk flag is not supported by the compiler") endif() check_cxx_compiler_flag(-mfunction-return=thunk COMPILER_SUPPORTS_FUNCTION_RETURN_THUNK) if(COMPILER_SUPPORTS_FUNCTION_RETURN_THUNK) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfunction-return=thunk") else() message(WARNING "Spectre mitigation -mfunction-return=thunk flag is not supported by the compiler") endif() check_cxx_compiler_flag(-mindirect-branch-register COMPILER_SUPPORTS_INDIRECT_BRANCH_REGISTER) if(COMPILER_SUPPORTS_INDIRECT_BRANCH_REGISTER) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mindirect-branch-register") else() message(WARNING "Spectre mitigation -mindirect-branch-register flag is not supported by the compiler") endif() endif() endif(MSVC) if(NOT MSVC) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ftemplate-depth=1024") endif(NOT MSVC) # Compiler warning flags if(NOT MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wempty-body") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wignored-qualifiers") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wtype-limits") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wuninitialized") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra -Wno-unused-parameter -Wno-missing-field-initializers") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Winvalid-pch") if("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang" ) # clang only set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wshorten-64-to-32") if(USE_SANITIZE_UB) message(STATUS "Enabling undefined behavior sanitizer") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fsanitize-recover=vptr -fno-rtti -DSANITIZER_BUILD") if(NOT SANITIZE_UB_ALLOW_CONTINUE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-sanitize-recover=undefined") endif() endif(USE_SANITIZE_UB) if (NOT (CMAKE_C_COMPILER_VERSION VERSION_LESS 3.6)) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-local-typedefs -DSANITIZER_BUILD") endif() if (NOT (CMAKE_C_COMPILER_VERSION VERSION_LESS 4.0)) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-register") # Added for htons() endif() else() # gcc only set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-local-typedefs -Wno-unused-but-set-variable") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wclobbered") if (CMAKE_C_COMPILER_VERSION VERSION_LESS 7.0) else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wimplicit-fallthrough=4") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-noexcept-type") # Added for gtest endif() endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") endif() # Compile code with defenses enabled (settings to be used for production release code) if("${CMAKE_BUILD_TYPE}" STREQUAL "Release") if(MSVC) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /GS") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /sdl") set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /NXCompat") set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DynamicBase") if("${NEO_ARCH}" STREQUAL "x86") set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /SafeSEH") endif() else() if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector-strong") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -D_FORTIFY_SOURCE=2") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wformat -Wformat-security") else() # gcc, g++ only if (CMAKE_C_COMPILER_VERSION VERSION_LESS 4.9) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector") else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector-strong") endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -D_FORTIFY_SOURCE=2") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wformat -Wformat-security") set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} -Wl,-z,noexecstack") set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} -Wl,-z,relro") set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} -Wl,-z,now") endif() endif() endif() # Project-wide include paths # Please keep alphabetical order include_directories(${NEO_BUILD_DIR}) include_directories(${NEO_SOURCE_DIR}) include_directories(${NEO_SHARED_DIRECTORY}/built_ins/builtinops${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SHARED_DIRECTORY}/command_stream/definitions${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SHARED_DIRECTORY}/compiler_interface/compiler_options${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SHARED_DIRECTORY}/debug_settings/definitions${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SHARED_DIRECTORY}/gen_common/reg_configs${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SHARED_DIRECTORY}/gmm_helper/client_context${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SHARED_DIRECTORY}/gmm_helper/windows/gmm_memory${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SHARED_DIRECTORY}/memory_manager/definitions${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SHARED_DIRECTORY}/memory_properties${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SHARED_DIRECTORY}/sku_info/definitions${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SOURCE_DIR}/opencl/source/command_queue/definitions${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SOURCE_DIR}/opencl/source/command_stream/definitions${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SOURCE_DIR}/opencl/source/mem_obj/definitions${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SOURCE_DIR}/opencl/source/memory_manager/definitions${BRANCH_DIR_SUFFIX}) if(HAVE_INSTRUMENTATION) set(NEO__INSTRUMENTATION_DIR_SUFFIX ${BRANCH_DIR_SUFFIX}) message(STATUS "Instrumentation include dirs: ${INSTRUMENTATION_INCLUDE_PATHS}") include_directories(${INSTRUMENTATION_INCLUDE_PATHS}) else() set(NEO__INSTRUMENTATION_DIR_SUFFIX "/") endif() include_directories(${NEO_SOURCE_DIR}/opencl/source/instrumentation${NEO__INSTRUMENTATION_DIR_SUFFIX}) # Define where to put binaries set(TargetDir ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) if(MSVC) if (NOT "${CMAKE_GENERATOR}" STREQUAL "Ninja") set(TargetDir ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${CMAKE_BUILD_TYPE}) endif() endif() add_subdirectory_unique(${OCLOC_DIRECTORY} ${NEO_BUILD_DIR}/offline_compiler) target_compile_definitions(ocloc_lib PRIVATE MOCKABLE_VIRTUAL=) if(DONT_CARE_OF_VIRTUALS) set(NEO_SHARED_RELEASE_LIB_NAME "neo_shared") set(NEO_SHARED_MOCKABLE_LIB_NAME ${NEO_SHARED_RELEASE_LIB_NAME}) else() set(NEO_SHARED_RELEASE_LIB_NAME "neo_shared") if(NOT SKIP_UNIT_TESTS) set(NEO_SHARED_MOCKABLE_LIB_NAME "neo_shared_mockable") endif() endif() set(BIKSIM_LIB_NAME "biksim") set(BUILTINS_SOURCES_LIB_NAME "builtins_sources") set(BUILTINS_BINARIES_LIB_NAME "builtins_binaries") set(BUILTINS_VME_LIB_NAME "builtins_vme") set(SCHEDULER_BINARY_LIB_NAME "scheduler_binary") add_subdirectory_unique(shared/source) add_subdirectory_unique(shared/test/unit_test) macro(generate_runtime_lib LIB_NAME MOCKABLE GENERATE_EXEC) set(NEO_STATIC_LIB_NAME ${LIB_NAME}) set(SHARINGS_ENABLE_LIB_NAME "${LIB_NAME}_sharings_enable") set(GENERATE_EXECUTABLE ${GENERATE_EXEC}) if(${MOCKABLE}) set(NEO_SHARED_LIB ${NEO_SHARED_MOCKABLE_LIB_NAME}) else() set(NEO_SHARED_LIB ${NEO_SHARED_RELEASE_LIB_NAME}) endif() if(NOT BUILD_WITHOUT_RUNTIME) add_subdirectory(${NEO_RUNTIME_SUB_DIR} "${NEO_BUILD_DIR}/${LIB_NAME}") else() add_subdirectory(${NEO_RUNTIME_SUB_DIR} "${NEO_BUILD_DIR}/${LIB_NAME}" EXCLUDE_FROM_ALL) endif() target_compile_definitions(${BUILTINS_SOURCES_LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=) target_compile_definitions(${BUILTINS_VME_LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=) target_compile_definitions(${BUILTINS_BINARIES_LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=) target_compile_definitions(${SCHEDULER_BINARY_LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=) if(${MOCKABLE}) target_compile_definitions(${LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=virtual) target_compile_definitions(${SHARINGS_ENABLE_LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=virtual) target_compile_definitions(${LIB_NAME} PUBLIC DEFAULT_TEST_PLATFORM=${DEFAULT_TESTED_PLATFORM} DEFAULT_TEST_FAMILY_NAME=${DEFAULT_TESTED_FAMILY_NAME} ${TESTED_GEN_FLAGS_DEFINITONS}) else() target_compile_definitions(${LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=) target_compile_definitions(${SHARINGS_ENABLE_LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=) endif() endmacro(generate_runtime_lib) set(NEO_DYNAMIC_LIB_NAME "igdrcl_dll") # single NEO dll set(NEO_DLL_NAME_BASE "igdrcl") if(DONT_CARE_OF_VIRTUALS) message(STATUS "All targets will use virtuals") set(NEO_RELEASE_LIB_NAME "igdrcl_lib") set(NEO_MOCKABLE_LIB_NAME ${NEO_RELEASE_LIB_NAME}) generate_runtime_lib(${NEO_RELEASE_LIB_NAME} TRUE TRUE) else() set(NEO_RELEASE_LIB_NAME "igdrcl_lib_release") # Used by dll/so generate_runtime_lib(${NEO_RELEASE_LIB_NAME} FALSE TRUE) if(NOT SKIP_UNIT_TESTS) set(NEO_MOCKABLE_LIB_NAME "igdrcl_lib_mockable") # Used by ULTS generate_runtime_lib(${NEO_MOCKABLE_LIB_NAME} TRUE FALSE) endif() endif() set(NEO_STATICALLY_LINKED_LIBRARIES ${NEO_RELEASE_LIB_NAME} ${NEO_SHARED_RELEASE_LIB_NAME} ${NEO_RELEASE_LIB_NAME} ${NEO_SHARED_RELEASE_LIB_NAME}) set(NEO_STATICALLY_LINKED_LIBRARIES_MOCKABLE ${NEO_MOCKABLE_LIB_NAME} ${NEO_SHARED_MOCKABLE_LIB_NAME} ${NEO_MOCKABLE_LIB_NAME} ${NEO_SHARED_MOCKABLE_LIB_NAME}) if(DEFAULT_TESTED_PLATFORM AND NOT SKIP_NEO_UNIT_TESTS) add_subdirectory_unique(${NEO_RUNTIME_TESTS_SUB_DIR} ${NEO_BUILD_DIR}/${NEO_RUNTIME_TESTS_SUB_DIR}) else() hide_subdir(${NEO_RUNTIME_TESTS_SUB_DIR}) endif() set(DONT_LINK_ELF_STATICALLY TRUE) if(EXISTS ${NEO_SOURCE_DIR}/../internal) add_subdirectory_unique(${NEO_SOURCE_DIR}/../internal ${NEO_BUILD_DIR}/internal) endif(EXISTS ${NEO_SOURCE_DIR}/../internal) add_subdirectory_unique(scripts/lint) if(EXISTS ${NEO_SOURCE_DIR}/scripts/format) add_subdirectory_unique(scripts/format) endif() configure_file(config.h.in ${NEO_BUILD_DIR}/config.h) if(WIN32 AND NOT NEO__IGC_FOUND) configure_file(igc.opencl.h.in ${NEO_BUILD_DIR}/igc.opencl.h) endif() configure_file(driver_version.h.in ${NEO_BUILD_DIR}/driver_version.h) # Put Driver version into define configure_file(lib_names.h.in ${NEO_BUILD_DIR}/lib_names.h) hide_subdir(${NEO_RUNTIME_SUB_DIR}) if(BUILD_WITH_L0) add_subdirectory_unique(level_zero) endif() add_subdirectory_unique(target_unit_tests) add_subdirectories() include(package.cmake) compute-runtime-20.13.16352/CONTRIBUTING.md000066400000000000000000000060611363734646600176110ustar00rootroot00000000000000# Contribution guidelines ## Process overview ### 1. Patch creation Start with a patch (we prefer smaller self-contained incremental changes vs. large blobs of code). When adding new code, please also add corresponding unit level tests (ULT). Added ULTs should cover all the decision points introduced by the commit and should fail if executed without the code changes. Make sure it builds and passes _all_ ULT tests. For details about what compilers and build configurations we expect, refer to instructions for [building](https://github.com/intel/compute-runtime/blob/master/BUILD.md) the driver. Make sure you adhere to our [coding standard](https://github.com/intel/compute-runtime/blob/master/GUIDELINES.md); this will be verified by clang-format and clang-tidy (tool configuration is already included in NEO repository). ### 2. Certificate of origin In order to get a clear contribution chain of trust we use the [signed-off-by language](https://01.org/community/signed-process) used by the Linux kernel project. Please make sure your commit message adheres to this guideline. ### 3. Patch submission Create a pull request on github once you are confident that your changes are complete and fulfill the requirements above. Make sure your commit message follows these rules: * each line has 80 character limit * title (first line) should be self-contained (i.e. make sense without looking at the body) * additional description can be provided in the body * title and body need to be separated by an empty line ### 4. Initial (cursory) review One of NEO maintainers will do an initial (brief) review of your code. We will let you know if anything major is missing. ### 5. Verification We'll double-check that your code meets all of our minimal quality expectations. Every commit must: * Build under Linux - using clang (8.0) and gcc (7.x ... 9.0) * Build under Windows (this is currently a requirement that cannot be verified externally) * Pass ULTs for all supported platforms * Pass clang-format check with the configuration contained within repository * Pass clang-tidy check with the configuration contained within repository * Pass sanity testing (test content recommendation for the external community will be provided in the future) When all the automated checks are confirmed to be passing, we will start actual code review process. ### 6. Code review We'll make sure that your code fits within the architecture and design of NEO, is readable and maintainable. Please make sure to address our questions and concerns. ### 7. Patch disposition We reserve, upon conclusion of the code review, the right to do one of the following: 1. Merge the patch as submitted 1. Merge the patch (with modifications) 1. Reject the patch If merged, you will be listed as patch author. Your patch may be reverted later in case of major regression that was not detected prior to commit. ## Intel Employees If you are an Intel Employee *and* you want to contribute to NEO as part of your regular job duties please: * Contact us in advance * Make sure your github account is linked to your intel.com email addresscompute-runtime-20.13.16352/DISTRIBUTIONS.md000066400000000000000000000054321363734646600177450ustar00rootroot00000000000000# NEO in Linux distributions [![Packaging status](https://repology.org/badge/vertical-allrepos/intel-compute-runtime.svg)](https://repology.org/project/intel-compute-runtime/versions) ## Arch Linux* ``` pacman -S intel-compute-runtime ``` ## Centos* 7, 8, Red Hat Enterprise Linux* 7 ``` yum install yum-plugin-copr yum copr enable jdanecki/intel-opencl yum install intel-opencl ``` ## Clear Linux ``` swupd bundle-add computer-vision-basic ``` ## Exherbo Linux* ``` cave resolve --execute intel-compute-runtime ``` ## Fedora* 30, 31, rawhide, Red Hat Enterprise Linux* 8 Beta, Mageia* 7 ``` dnf install dnf-plugins-core dnf copr enable jdanecki/intel-opencl dnf install intel-opencl ``` ## Gentoo*, Funtoo* ``` emerge intel-neo ``` ## NixOS ``` nix-channel --add https://nixos.org/channels/nixpkgs-unstable nix-channel --update nix-env -i intel-compute-runtime ``` ## OpenSUSE Leap 15.1 ``` zypper addrepo -r https://copr.fedorainfracloud.org/coprs/jdanecki/intel-opencl/repo/opensuse-leap-15.1/jdanecki-intel-opencl-opensuse-leap-15.1.repo zypper install intel-opencl ``` ## OpenSUSE tumbleweed ``` zypper addrepo -r https://copr.fedorainfracloud.org/coprs/jdanecki/intel-opencl/repo/opensuse-tumbleweed/jdanecki-intel-opencl-opensuse-tumbleweed.repo zypper install intel-opencl ``` ## PLD Linux* ``` ipoldek install intel-gmmlib intel-graphics-compiler intel-compute-runtime ``` ## Ubuntu* ppa for 16.04, 18.04, 19.04, 19.10, 20.04 ``` add-apt-repository ppa:intel-opencl/intel-opencl apt-get update apt-get install intel-opencl-icd ``` ## Ubuntu* 19.04, 19.10, 20.04 ``` apt-get install intel-opencl-icd ``` ## Packages mirror Starting with [release 19.43.14583](https://github.com/intel/compute-runtime/releases/tag/19.43.14583) all packages are mirrored on [SourceForge](https://sourceforge.net/projects/intel-compute-runtime) as older packages are automatically deleted on [launchpad](https://launchpad.net/~intel-opencl/+archive/ubuntu/intel-opencl) and [copr](https://copr.fedorainfracloud.org/coprs/jdanecki/intel-opencl). ## Neo in docker containers Docker images are provided in [intel-opencl](https://hub.docker.com/r/intelopencl/intel-opencl) repository. Example for Fedora* 30 ``` docker run -it --device /dev/dri:/dev/dri --rm docker.io/intelopencl/intel-opencl:fedora-30-copr clinfo ``` ## Building and installation * [Ubuntu*](https://github.com/intel/compute-runtime/blob/master/BUILD.md) * [Centos* 8](https://github.com/intel/compute-runtime/blob/master/BUILD.md) * Scripts to build or download rpm (copr) and deb (github and ppa) packages are available in [neo-specs](https://github.com/JacekDanecki/neo-specs) repository. # NEO in other distributions ## FreeBSD*, DragonFly* ``` pkg install intel-compute-runtime ``` (*) Other names and brands may be claimed as property of others. compute-runtime-20.13.16352/FAQ.md000066400000000000000000000043351363734646600163130ustar00rootroot00000000000000 # Frequently asked questions For OpenCL specific questions, see the [OpenCL FAQ](https://github.com/intel/compute-runtime/blob/master/opencl/doc/FAQ.md). ## OS support ### Which Linux versions does NEO support? NEO should work on any modern Linux distribution (i.e. Ubuntu, Fedora, etc.) with default / stock configuration (no kernel patches), assuming the underlying kernel's drm subsystem is 4.7 or higher. Newer platforms will require a kernel version that provides support for that platform (e.g. Coffee Lake requires kernel 4.14 or higher). Our default (most frequent) validation config is currently Ubuntu 18.04 LTS (as of Q1'20). ### Does NEO support Microsoft Windows? Our closed-source driver for Windows is using the same codebase. At this time, we do not support compilation of the stack for Windows. It is our long-term intention to offer that option. ### Why is the feature set different in latest Windows driver vs. latest NEO on github? Our Windows release process takes up to several weeks before drivers are available through intel.com and/or Windows update. Features available in github will be available on Windows later. Note: Older platforms (e.g. Broadwell) are considered to be in maintenance mode for Windows. ## Platform support ### Which Intel platforms are supported by the driver? See [README.md](https://github.com/intel/compute-runtime/blob/master/README.md). ### How can I check that my specific device is supported by the driver? To check support for any device, you can follow these steps: 1. Go to [Ark]( https://ark.intel.com) and find your Device ID 1. Find the corresponding device ID label in [GMM]( https://github.com/intel/gmmlib/blob/master/Source/inc/common/igfxfmid.h) 1. Check if this device ID label is enumerated in the [supported device list](https://github.com/intel/compute-runtime/blob/master/opencl/source/dll/linux/devices/devices_base.inl) ### When will support for platform X be added? We will start adding platform support after platform is disclosed by Intel. It is our intention to offer full support ahead of platform's market availability. ## Who are we? The Compute Runtime team is part of VTT (Visual Technologies Team). Most of our engineers are located in Poland and the United States.compute-runtime-20.13.16352/GUIDELINES.md000066400000000000000000000054411363734646600173330ustar00rootroot00000000000000File to cover guidelines for NEO project. # C++ usage * use c++ style casts instead of c style casts. * do not use default parameters * prefer using over typedef * avoid defines for constants, use constexpr * prefer forward declarations in headers * avoid includes in headers unless absolutely necessary * use of exceptions in driver code needs strong justification * prefer static create methods returning std::unique_ptr instead of throwing from constructor # Naming conventions * use snake_case for new files * use PascalCase for class, struct, enum, and namespace names * use camelCase for variable and function names * prefer verbose names for variables and functions ``` bad examples : sld, elws, aws good examples : sourceLevelDebugger, enqueuedLocalWorkGroupSize, actualWorkGroupSize ``` * follow givenWhenThen test naming pattern, indicate what is interesting in the test bad examples : ``` TEST(CsrTests, initialize) TEST(CQTests, simple) TEST(CQTests, basic) TEST(CQTests, works) ``` good examples: ``` TEST(CommandStreamReceiverTests, givenCommandStreamReceiverWhenItIsInitializedThenProperFieldsAreSet) TEST(CommandQueueTests, givenCommandQueueWhenEnqueueIsDoneThenTaskLevelIsModifed) TEST(CommandQueueTests, givenCommandQueueWithDefaultParamtersWhenEnqueueIsDoneThenTaskCountIncreases) TEST(CommandQueueTests, givenCommandQueueWhenEnqueueWithBlockingFlagIsSetThenDriverWaitsUntilAllCommandsAreCompleted) ``` # Testing mindset * Test behaviors instead of implementations, do not focus on adding a test per every function in the class (avoid tests for setters and getters), focus on the functionality you are adding and how it changes the driver behavior, do not bind tests to implementation. * Make sure that test is fast, our test suite needs to complete in seconds for efficient development pace, as a general rule test shouldn't be longer then 1ms in Debug driver. # Coding guidelines * Favor the design of a self-explanatory code over the use of comments; if comments are needed, use double slash instead of block comments * HW commands and structures used in NEO must be initialized with constants defines for each Gfx Family: i.e. PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl * Any new HW command or structure must have its own static constant initializer added to any Gfx Family that is going to use it. * One-line branches use braces * Headers are guarded using `#pragma once` * Do not use `TODO`s in the code * Use `UNRECOVERABLE_IF` and `DEBUG_BREAK_IF` instead of `asserts`: * Use `UNRECOVERABLE_IF` when a failure is found and driver cannot proceed with normal execution. `UNRECOVERABLE_IF` is implemented in Release and Debug builds. * Use `DEBUG_BREAK_IF` when a failure can be handled gracefully by the driver and it can continue with normal execution. `DEBUG_BREAK_IF` is only implemented in Debug builds.compute-runtime-20.13.16352/Jenkinsfile000066400000000000000000000001711363734646600175400ustar00rootroot00000000000000#!groovy dependenciesRevision='d3caed990688cb206b5bf215caa71094ce1c9110-1396' strategy='EQUAL' allowedCD=222 allowedF=11 compute-runtime-20.13.16352/LICENSE000066400000000000000000000020621363734646600163620ustar00rootroot00000000000000MIT License Copyright (c) 2018 Intel Corporation Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. compute-runtime-20.13.16352/README.md000066400000000000000000000106251363734646600166400ustar00rootroot00000000000000# Intel(R) Graphics Compute Runtime for oneAPI Level Zero and OpenCL(TM) Driver ## Introduction The Intel(R) Graphics Compute Runtime for oneAPI Level Zero and OpenCL(TM) Driver is an open source project providing compute API support (Level Zero, OpenCL) for Intel graphics hardware architectures (HD Graphics, Xe). ## What is NEO? NEO is the shorthand name for Compute Runtime contained within this repository. It is also a development mindset that we adopted when we first started the implementation effort for OpenCL. The project evolved beyond a single API and NEO no longer implies a specific API. When talking about a specific API, we will mention it by name (e.g. Level Zero, OpenCL). ## License The Intel(R) Graphics Compute Runtime for oneAPI Level Zero and OpenCL(TM) Driver is distributed under the MIT License. You may obtain a copy of the License at: https://opensource.org/licenses/MIT ## Supported Platforms |Platform|OpenCL|Level Zero| |--------|:----:|:--------:| |Intel Core Processors with Gen8 graphics devices (formerly Broadwell)| 2.1 | - | |Intel Core Processors with Gen9 graphics devices (formerly Skylake, Kaby Lake, Coffee Lake)| 2.1 | Y | |Intel Atom Processors with Gen9 graphics devices (formerly Apollo Lake, Gemini Lake)| 1.2 | - | |Intel Core Processors with Gen11 graphics devices (formerly Ice Lake)| 2.1 | Y | |Intel Core Processors with Gen12 graphics devices (formerly Tiger Lake)| 2.1 | Y | ## Release cadence * Once a week, we run extended validation cycle on a selected driver. * When the extended validation cycle tests pass, the corresponding commit on github is tagged using the format yy.ww.bbbb (yy - year, ww - work week, bbbb - incremental build number). * Typically for weekly tags we will post a binary release (e.g. deb). * Quality level of the driver (per platform) will be provided in the Release Notes. ## Installation Options To allow NEO access to GPU device make sure user has permissions to files /dev/dri/renderD*. ### Via system package manager NEO is available for installation on a variety of Linux distributions and can be installed via the distro's package manager. For example on Ubuntu* 19.04, 19.10: ``` apt-get install intel-opencl-icd ``` Procedures for other [distributions](https://github.com/intel/compute-runtime/blob/master/DISTRIBUTIONS.md). ### Manual download .deb packages for Ubuntu are provided along with installation instructions and Release Notes on the [release page](https://github.com/intel/compute-runtime/releases) ## Linking applications Directly linking to the runtime library is not supported: * Level Zero applications should link with [Level Zero loader](https://github.com/oneapi-src/level-zero) * OpenCL applications should link with [ICD loader library (ocl-icd)](https://github.com/OCL-dev/ocl-icd) ## Dependencies * GmmLib - https://github.com/intel/gmmlib * Intel Graphics Compiler - https://github.com/intel/intel-graphics-compiler ## How to provide feedback By default, please submit an issue using native github.com [interface](https://github.com/intel/compute-runtime/issues). ## How to contribute Create a pull request on github.com with your patch. Make sure your change is cleanly building and passing ULTs. A maintainer will contact you if there are questions or concerns. See [contribution guidelines](https://github.com/intel/compute-runtime/blob/master/CONTRIBUTING.md) for more details. ## See also * [Contribution guidelines](https://github.com/intel/compute-runtime/blob/master/CONTRIBUTING.md) * [Frequently Asked Questions](https://github.com/intel/compute-runtime/blob/master/FAQ.md) ### Level Zero specific * [oneAPI Level Zero specification](https://spec.oneapi.com/versions/latest/elements/l0/source/index.html) * [Intel(R) OneApi Level Zero Specification API C/C++ header files](https://github.com/oneapi-src/level-zero/) * [oneAPI Level Zero tests](https://github.com/oneapi-src/level-zero-tests/) ### OpenCL specific * [OpenCL on Linux guide](https://github.com/bashbaug/OpenCLPapers/blob/markdown/OpenCLOnLinux.md) * [Intel(R) GPU Compute Samples](https://github.com/intel/compute-samples) * [Frequently Asked Questions](https://github.com/intel/compute-runtime/blob/master/opencl/doc/FAQ.md) * [Interoperability with VTune](https://github.com/intel/compute-runtime/blob/master/opencl/doc/VTUNE.md) * [OpenCL Conformance Tests](https://github.com/KhronosGroup/OpenCL-CTS/) ___(*) Other names and brands may be claimed as property of others.___compute-runtime-20.13.16352/cmake/000077500000000000000000000000001363734646600164355ustar00rootroot00000000000000compute-runtime-20.13.16352/cmake/run_ult_target.cmake000066400000000000000000000044041363734646600224770ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # string(REPLACE "/" ";" unit_test_config ${unit_test_config}) list(GET unit_test_config 0 product) list(GET unit_test_config 1 slices) list(GET unit_test_config 2 subslices) list(GET unit_test_config 3 eu_per_ss) add_custom_target(run_${product}_unit_tests ALL DEPENDS unit_tests) set_target_properties(run_${product}_unit_tests PROPERTIES FOLDER "${PLATFORM_SPECIFIC_TEST_TARGETS_FOLDER}/${product}") if(NOT SKIP_NEO_UNIT_TESTS) add_custom_command( TARGET run_${product}_unit_tests POST_BUILD COMMAND WORKING_DIRECTORY ${TargetDir} COMMAND echo Running igdrcl_tests ${target} ${slices}x${subslices}x${eu_per_ss} in ${TargetDir}/${product} COMMAND $ --product ${product} --slices ${slices} --subslices ${subslices} --eu_per_ss ${eu_per_ss} ${GTEST_EXCEPTION_OPTIONS} --gtest_repeat=${GTEST_REPEAT} ${GTEST_SHUFFLE} ${IGDRCL_TESTS_LISTENER_OPTION} ${GTEST_FILTER_OPTION} ) if(WIN32 AND ${CMAKE_BUILD_TYPE} STREQUAL "Debug" AND "${IGDRCL_OPTION__BITS}" STREQUAL "64" AND APPVERIFIER_ALLOWED) add_custom_command( TARGET run_${product}_unit_tests POST_BUILD COMMAND WORKING_DIRECTORY ${TargetDir} COMMAND echo Running igdrcl_tests with App Verifier COMMAND ${NEO_SOURCE_DIR}/scripts/verify.bat $ --product ${product} --slices ${slices} --subslices ${subslices} --eu_per_ss ${eu_per_ss} ${GTEST_EXCEPTION_OPTIONS} ${IGDRCL_TESTS_LISTENER_OPTION} ${GTEST_FILTER_OPTION} COMMAND echo App Verifier returned: %errorLevel% ) endif() endif() if(NOT SKIP_L0_UNIT_TESTS AND BUILD_WITH_L0) add_custom_command( TARGET run_${product}_unit_tests POST_BUILD COMMAND WORKING_DIRECTORY ${TargetDir} COMMAND echo Running ze_intel_gpu_core_tests ${target} ${slices}x${subslices}x${eu_per_ss} in ${TargetDir}/${product} COMMAND $ --product ${product} --slices ${slices} --subslices ${subslices} --eu_per_ss ${eu_per_ss} ${GTEST_EXCEPTION_OPTIONS} --gtest_repeat=${GTEST_REPEAT} ${GTEST_SHUFFLE} ${IGDRCL_TESTS_LISTENER_OPTION} ${GTEST_FILTER_OPTION} ) endif() add_dependencies(run_unit_tests run_${product}_unit_tests) compute-runtime-20.13.16352/common_macros.cmake000066400000000000000000000061231363734646600212150ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # macro(hide_subdir subdir) file(RELATIVE_PATH subdir_relative ${NEO_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}) set(${subdir_relative}_hidden} TRUE) endmacro() macro(add_subdirectory_unique subdir) file(RELATIVE_PATH subdir_relative ${NEO_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}) if(NOT ${subdir_relative}_hidden}) add_subdirectory(${subdir} ${ARGN}) endif() hide_subdir(${subdir}) endmacro() macro(add_subdirectories) file(GLOB subdirectories RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/*) foreach(subdir ${subdirectories}) file(RELATIVE_PATH subdir_relative ${NEO_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}) if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/CMakeLists.txt AND NOT ${subdir_relative}_hidden}) add_subdirectory(${subdir}) endif() endforeach() endmacro() macro(create_project_source_tree target) if(MSVC) set(prefixes ${CMAKE_CURRENT_SOURCE_DIR} ${ARGN} ${NEO_SOURCE_DIR}) get_target_property(source_list ${target} SOURCES) foreach(source_file ${source_list}) if(NOT ${source_file} MATCHES "\<*\>") string(TOLOWER ${source_file} source_file_relative) foreach(prefix ${prefixes}) if(source_file_relative) string(TOLOWER ${prefix} prefix) string(REPLACE "${prefix}" "" source_file_relative ${source_file_relative}) endif() endforeach() get_filename_component(source_path_relative ${source_file_relative} PATH) if(source_path_relative) string(REPLACE "/" "\\" source_path_relative ${source_path_relative}) endif() source_group("Source Files\\${source_path_relative}" FILES ${source_file}) endif() endforeach() endif() endmacro() macro(create_project_source_tree_with_exports target exports_filename) create_project_source_tree(${target} ${ARGN}) if(MSVC) if(NOT "${exports_filename}" STREQUAL "") source_group("exports" FILES "${exports_filename}") endif() endif() endmacro() macro(apply_macro_for_each_gen type) set(given_type ${type}) foreach(GEN_TYPE ${ALL_GEN_TYPES}) string(TOLOWER ${GEN_TYPE} GEN_TYPE_LOWER) GEN_CONTAINS_PLATFORMS(${given_type} ${GEN_TYPE} GENX_HAS_PLATFORMS) if(${GENX_HAS_PLATFORMS}) macro_for_each_gen() endif() endforeach() endmacro() macro(apply_macro_for_each_platform) GET_PLATFORMS_FOR_GEN(${given_type} ${GEN_TYPE} TESTED_GENX_PLATFORMS) foreach(PLATFORM_IT ${TESTED_GENX_PLATFORMS}) string(TOLOWER ${PLATFORM_IT} PLATFORM_IT_LOWER) macro_for_each_platform() endforeach() endmacro() macro(get_family_name_with_type gen_type platform_type) string(REPLACE "GEN" "Gen" gen_type_capitalized ${gen_type}) string(TOLOWER ${platform_type} platform_type_lower) set(family_name_with_type ${gen_type_capitalized}${platform_type_lower}) endmacro() macro(append_sources_from_properties list_name) foreach(name ${ARGN}) get_property(${name} GLOBAL PROPERTY ${name}) list(APPEND ${list_name} ${${name}}) endforeach() endmacro() compute-runtime-20.13.16352/config.h.in000066400000000000000000000006041363734646600174000ustar00rootroot00000000000000/* * Copyright (C) 2017-2018 Intel Corporation * * SPDX-License-Identifier: MIT * */ #ifndef CONFIG_H #define CONFIG_H #cmakedefine USE_CL_CACHE #if defined(USE_CL_CACHE) static const bool clCacheEnabled = true; #else static const bool clCacheEnabled = false; #endif #cmakedefine CL_CACHE_LOCATION "${CL_CACHE_LOCATION}" #cmakedefine NEO_ARCH "${NEO_ARCH}" #endif /* CONFIG_H */ compute-runtime-20.13.16352/driver_version.h.in000066400000000000000000000003531363734646600211740ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #ifndef DRIVER_VERSION_H #define DRIVER_VERSION_H #cmakedefine NEO_OCL_DRIVER_VERSION ${NEO_OCL_DRIVER_VERSION} #endif /* DRIVER_VERSION_H */ compute-runtime-20.13.16352/igc.opencl.h.in000066400000000000000000000006101363734646600201510ustar00rootroot00000000000000/* * Copyright (C) 2018 Intel Corporation * * SPDX-License-Identifier: MIT * */ #ifndef COMPILER_SETUP_H #define COMPILER_SETUP_H #cmakedefine IGC_LIBRARY_NAME "${CMAKE_SHARED_LIBRARY_PREFIX}${IGC_LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}" #cmakedefine FCL_LIBRARY_NAME "${CMAKE_SHARED_LIBRARY_PREFIX}${FCL_LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}" #endif /* COMPILER_SETUP_H */ compute-runtime-20.13.16352/level_zero/000077500000000000000000000000001363734646600175235ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/CMakeLists.txt000066400000000000000000000424331363734646600222710ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # option(BUILD_WITH_L0 "Build level zero by default" ON) if(BUILD_WITH_L0 AND "${NEO_BITS}" STREQUAL "64") set(TARGET_NAME_L0 ze_intel_gpu) # Level Zero third party detection if(DEFINED LEVEL_ZERO_ROOT) get_filename_component(LEVEL_ZERO_ROOT "${LEVEL_ZERO_ROOT}" ABSOLUTE) else() get_filename_component(LEVEL_ZERO_ROOT_tmp "${NEO_SOURCE_DIR}/../level_zero" ABSOLUTE) # Level Zero Headers if read from the git repo are in include/core & include/tools. # To support the installation path of level_zero headers which is include/level_zero/* # the header files are combined into the path include/level_zero/* in the commands below. if(IS_DIRECTORY "${LEVEL_ZERO_ROOT_tmp}") set(CUSTOM_L0_INCLUDE_PATH "${LEVEL_ZERO_ROOT_tmp}/include/level_zero/") file(GLOB LEVEL_ZERO_SOURCE_HEADERS "${LEVEL_ZERO_ROOT_tmp}/include/core/*" "${LEVEL_ZERO_ROOT_tmp}/include/tools/*" ) file(MAKE_DIRECTORY ${CUSTOM_L0_INCLUDE_PATH}) file(COPY ${LEVEL_ZERO_SOURCE_HEADERS} DESTINATION ${CUSTOM_L0_INCLUDE_PATH}) set(LEVEL_ZERO_ROOT "${LEVEL_ZERO_ROOT_tmp}") endif() endif() project(level-zero-gpu VERSION ${NEO_L0_VERSION_MAJOR}.${NEO_L0_VERSION_MINOR}.${NEO_VERSION_BUILD}) message(STATUS "Level Zero driver version: ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}") include(cmake/source_tree.cmake) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") find_package(LevelZero) if(NOT LevelZero_FOUND) message(STATUS "Level zero headers not found") return() endif() add_definitions( -DL0_PROJECT_VERSION_MAJOR="${PROJECT_VERSION_MAJOR}" ) add_definitions( -DL0_PROJECT_VERSION_MINOR="${PROJECT_VERSION_MINOR}" ) add_definitions( -DZE_ENABLE_OCL_INTEROP=1) file(WRITE "${CMAKE_BINARY_DIR}/VERSION" "${PROJECT_VERSION}") #Define a path for custom commands to work around MSVC set(CUSTOM_COMMAND_BINARY_DIR ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) if(MSVC) #MSVC implicitly adds $ to the output path if(NOT "${CMAKE_GENERATOR}" STREQUAL "Ninja") set(CUSTOM_COMMAND_BINARY_DIR ${CUSTOM_COMMAND_BINARY_DIR}/$) endif() endif() if(UNIX) # Load GNUInstallDirs to determine install targets for Linux packages include(GNUInstallDirs) endif() if(NOT MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fpermissive -fPIC") endif() set(L0_ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}") if(NOT DEFINED COMPUTE_RUNTIME_DIR) get_filename_component(COMPUTE_RUNTIME_DIR ${CMAKE_CURRENT_SOURCE_DIR}/.. ABSOLUTE) endif() # Option to disable tests option(${PROJECT_NAME}_BUILD_TESTS "Build unit tests." ON) if(SKIP_UNIT_TESTS) set(SKIP_L0_UNIT_TESTS TRUE) endif() if(NOT SKIP_L0_UNIT_TESTS) include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/${BRANCH_TYPE}/l0_tests.cmake) endif() # Copy third_party_binaries to output BIN folder add_custom_target(copy_third_party_files) set_target_properties(copy_third_party_files PROPERTIES FOLDER ${TARGET_NAME_L0}) if(DEFINED NEO__IGC_TARGETS) if(WIN32) add_dependencies(copy_third_party_files copy_compiler_files) else() add_dependencies(copy_third_party_files ${NEO__IGC_TARGETS}) foreach(TARGET_tmp ${NEO__IGC_TARGETS}) if(UNIX) add_custom_command( TARGET copy_third_party_files PRE_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${CUSTOM_COMMAND_BINARY_DIR} COMMAND ${CMAKE_COMMAND} -E copy_if_different $ "${CUSTOM_COMMAND_BINARY_DIR}/" ) endif() add_custom_command( TARGET copy_third_party_files PRE_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${CUSTOM_COMMAND_BINARY_DIR} COMMAND ${CMAKE_COMMAND} -E copy_if_different $ "${CUSTOM_COMMAND_BINARY_DIR}/" ) endforeach() endif() else() if(DEFINED IGC_DIR) # Only copy igc libs if available message(STATUS "L0::Igc Dir: ${IGC_DIR}") add_custom_command( TARGET copy_third_party_files PRE_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${CUSTOM_COMMAND_BINARY_DIR} COMMAND ${CMAKE_COMMAND} -E copy_directory "${IGC_DIR}/lib" "${CUSTOM_COMMAND_BINARY_DIR}/" DEPENDS "${IGC_DIR}/lib" ) endif() endif() if(TARGET ${GMM_TARGET_NAME}) message(STATUS "L0::Gmm Target: ${GMM_TARGET_NAME}") add_dependencies(copy_third_party_files ${GMM_TARGET_NAME}) if(UNIX) add_custom_command( TARGET copy_third_party_files PRE_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${CUSTOM_COMMAND_BINARY_DIR} COMMAND ${CMAKE_COMMAND} -E copy_if_different "$" "${CUSTOM_COMMAND_BINARY_DIR}/" ) endif() add_custom_command( TARGET copy_third_party_files PRE_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${CUSTOM_COMMAND_BINARY_DIR} COMMAND ${CMAKE_COMMAND} -E copy_if_different "$" "${CUSTOM_COMMAND_BINARY_DIR}/" ) else() if(DEFINED GMM_DIR) # Only copy gmm libs if available message(STATUS "L0::Gmm Dir: ${GMM_DIR}") add_custom_command( TARGET copy_third_party_files PRE_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${CUSTOM_COMMAND_BINARY_DIR} COMMAND ${CMAKE_COMMAND} -E copy_directory "${GMM_DIR}/lib" "${CUSTOM_COMMAND_BINARY_DIR}/" DEPENDS "${GMM_DIR}/lib" ) endif() endif() # Get build type string(TOLOWER "${CMAKE_BUILD_TYPE}" BUILD_TYPE) if("${BUILD_TYPE}" STREQUAL "debug") add_definitions(-DZE_DEBUG) endif() include_directories(${COMPUTE_RUNTIME_DIR}/third_party/opencl_headers) include_directories(${LevelZero_INCLUDE_DIRS}) include_directories(${NEO_SOURCE_DIR}/level_zero/api/experimental${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SOURCE_DIR}/shared/source/compiler_interface/compiler_options${BRANCH_DIR_SUFFIX}) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/core/source/hw_helpers${BRANCH_DIR_SUFFIX}) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/core/source/cmdlist/cmdlist_extended${BRANCH_DIR_SUFFIX}) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/core/source/cmdqueue/cmdqueue_extended${BRANCH_DIR_SUFFIX}) include_directories("${CMAKE_CURRENT_SOURCE_DIR}") include_directories(ddi${BRANCH_DIR_SUFFIX}) include_directories(tools/source) include_directories(experimental${BRANCH_DIR_SUFFIX}/source) # Create our shared library/DLL add_library(${TARGET_NAME_L0} SHARED ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${NEO_SHARED_DIRECTORY}/dll/options_dll.cpp ${NEO_SHARED_DIRECTORY}/gmm_helper/resource_info.cpp ${NEO_SHARED_DIRECTORY}/utilities/cpuintrinsics.cpp ${NEO_SHARED_DIRECTORY}/utilities/debug_settings_reader_creator.cpp ${NEO_SHARED_DIRECTORY}/gmm_helper/page_table_mngr.cpp ${NEO_SHARED_DIRECTORY}/gmm_helper/resource_info.cpp ${NEO_SHARED_DIRECTORY}/helpers/abort.cpp ${NEO_SHARED_DIRECTORY}/helpers/debug_helpers.cpp ) target_sources(${TARGET_NAME_L0} PRIVATE ${COMPUTE_RUNTIME_DIR}/opencl/source/aub/aub_stream_interface.cpp ${COMPUTE_RUNTIME_DIR}/opencl/source/dll/create_command_stream.cpp ${COMPUTE_RUNTIME_DIR}/opencl/source/dll/create_deferred_deleter.cpp ${COMPUTE_RUNTIME_DIR}/opencl/source/dll/create_tbx_sockets.cpp ${COMPUTE_RUNTIME_DIR}/opencl/source/dll/get_devices.cpp ${COMPUTE_RUNTIME_DIR}/opencl/source/dll/source_level_debugger.cpp ${COMPUTE_RUNTIME_DIR}/opencl/source/helpers/built_ins_helper.cpp ${COMPUTE_RUNTIME_DIR}/opencl/source/instrumentation/instrumentation.cpp ) if(WIN32) target_sources(${TARGET_NAME_L0} PRIVATE ${NEO_SHARED_DIRECTORY}/gmm_helper/windows/gmm_memory_base.cpp ${NEO_SHARED_DIRECTORY}/gmm_helper/windows/gmm_memory.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/sys_calls.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/os_interface.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/wddm/wddm_calls.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/wddm/wddm_create.cpp ${NEO_SHARED_DIRECTORY}/dll/windows/environment_variables.cpp ${COMPUTE_RUNTIME_DIR}/opencl/source/dll/windows/options_windows.cpp ${COMPUTE_RUNTIME_DIR}/opencl/source/dll/windows/os_interface.cpp ${COMPUTE_RUNTIME_DIR}/opencl/source/dll/windows/create_wddm_memory_manager.cpp ) target_link_libraries(${TARGET_NAME_L0} dxgi ws2_32 ) else() target_sources(${TARGET_NAME_L0} PRIVATE ${NEO_SHARED_DIRECTORY}/os_interface/linux/sys_calls_linux.cpp ${COMPUTE_RUNTIME_DIR}/opencl/source/dll/linux/allocator_helper.cpp ${COMPUTE_RUNTIME_DIR}/opencl/source/dll/linux/create_drm_memory_manager.cpp ${COMPUTE_RUNTIME_DIR}/opencl/source/dll/linux/devices${BRANCH_DIR_SUFFIX}/devices.inl ${COMPUTE_RUNTIME_DIR}/opencl/source/dll/linux/devices/devices_base.inl ${COMPUTE_RUNTIME_DIR}/opencl/source/dll/linux/options_linux.cpp ${COMPUTE_RUNTIME_DIR}/opencl/source/dll/linux/os_interface.cpp ) endif() if(DEFINED AUB_STREAM_DIR) target_sources(${TARGET_NAME_L0} PRIVATE $ ) endif() target_sources(${TARGET_NAME_L0} PRIVATE $ ) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/source/inc) target_compile_definitions(${TARGET_NAME_L0} PUBLIC ZE_MAKEDLL ) get_property(COMPUTE_RUNTIME_DEFINITIONS TARGET ${NEO_RELEASE_LIB_NAME} PROPERTY COMPILE_DEFINITIONS ) target_compile_definitions(${TARGET_NAME_L0} PRIVATE ${COMPUTE_RUNTIME_DEFINITIONS} ) if(UNIX) target_sources(${TARGET_NAME_L0} PRIVATE ${NEO_SHARED_DIRECTORY}/os_interface/linux/gmm_interface_linux.cpp ${COMPUTE_RUNTIME_DIR}/opencl/source/dll/linux/drm_neo_create.cpp ) set(OS_SPECIFIC_LIBS dl pthread rt) target_include_directories(${TARGET_NAME_L0} PUBLIC ${L0_ROOT_DIR}/core/source/os_interface/linux ${I915_INCLUDES_DIR} ) target_include_directories(${TARGET_NAME_L0} PRIVATE ${COMPUTE_RUNTIME_DIR}/opencl/source/dll/linux/devices${BRANCH_DIR_SUFFIX} ) else() set(OS_SPECIFIC_LIBS "") target_include_directories(${TARGET_NAME_L0} PUBLIC ${L0_ROOT_DIR}/core/source/os_interface/windows ) if(CMAKE_SIZEOF_VOID_P EQUAL 4) set(L0_BITNESS_SUFIX 32) elseif(CMAKE_SIZEOF_VOID_P EQUAL 8) set(L0_BITNESS_SUFIX 64) endif() set_target_properties(${TARGET_NAME_L0} PROPERTIES DEBUG_OUTPUT_NAME "${TARGET_NAME_L0}${L0_BITNESS_SUFIX}" RELEASE_OUTPUT_NAME "${TARGET_NAME_L0}${L0_BITNESS_SUFIX}" RELEASEINTERNAL_OUTPUT_NAME "${TARGET_NAME_L0}${L0_BITNESS_SUFIX}" OUTPUT_NAME "${TARGET_NAME_L0}${L0_BITNESS_SUFIX}" ) add_dependencies(${TARGET_NAME_L0} ${GMM_TARGET_NAME}) target_sources(${TARGET_NAME_L0} PRIVATE ${NEO_SHARED_DIRECTORY}/os_interface/windows/gmm_interface_win.cpp ) endif() add_subdirectory_unique(api) add_subdirectory_unique(source) if(DONT_CARE_OF_VIRTUALS) set(L0_RELEASE_LIB_NAME "${TARGET_NAME_L0}_lib") set(L0_MOCKABLE_LIB_NAME "${TARGET_NAME_L0}_lib") else() set(L0_RELEASE_LIB_NAME "${TARGET_NAME_L0}_lib") if(NOT SKIP_UNIT_TESTS) set(L0_MOCKABLE_LIB_NAME "${TARGET_NAME_L0}_mockable") endif() endif() function(generate_l0_lib LIB_NAME MOCKABLE) set(L0_STATIC_LIB_NAME ${LIB_NAME}) add_library(${LIB_NAME} OBJECT ${L0_RUNTIME_SOURCES} ) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/core/source "${NEO_BUILD_DIR}/${LIB_NAME}/core/source") add_subdirectory(experimental${BRANCH_DIR_SUFFIX}/source "${NEO_BUILD_DIR}/${LIB_NAME}/experimental${BRANCH_DIR_SUFFIX}/source") add_subdirectory(tools/source "${NEO_BUILD_DIR}/${LIB_NAME}tools/source") append_sources_from_properties(L0_RUNTIME_SOURCES L0_API L0_SOURCES_LINUX L0_SOURCES_WINDOWS L0_SRCS_COMPILER_INTERFACE L0_SRCS_DEBUGGER L0_SRCS_OCLOC_SHARED ) target_sources(${LIB_NAME} PRIVATE ${L0_RUNTIME_SOURCES}) if(${MOCKABLE}) get_property(COMPUTE_RUNTIME_DEFINITIONS TARGET ${NEO_MOCKABLE_LIB_NAME} PROPERTY COMPILE_DEFINITIONS ) target_compile_definitions(${LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=virtual PRIVATE ${COMPUTE_RUNTIME_DEFINITIONS} ) else() get_property(COMPUTE_RUNTIME_DEFINITIONS TARGET ${NEO_RELEASE_LIB_NAME} PROPERTY COMPILE_DEFINITIONS ) target_compile_definitions(${LIB_NAME} PUBLIC MOCKABLE_VIRTUAL= PRIVATE ${COMPUTE_RUNTIME_DEFINITIONS} ) endif() set_property(TARGET ${LIB_NAME} APPEND_STRING PROPERTY COMPILE_FLAGS ${ASAN_FLAGS} ${TSAN_FLAGS}) set_target_properties(${LIB_NAME} PROPERTIES FOLDER ${TARGET_NAME_L0}) target_include_directories(${LIB_NAME} PUBLIC ${ENGINE_NODE_DIR} ${NEO__GMM_INCLUDE_DIR} ${CIF_BASE_DIR} ${IGC_OCL_ADAPTOR_DIR} ${NEO__IGC_INCLUDE_DIR} ${KHRONOS_HEADERS_DIR} ) if(WIN32) target_include_directories(${LIB_NAME} PUBLIC ${WDK_INCLUDE_PATHS} ${CMAKE_CURRENT_SOURCE_DIR}/os_interface/windows ) else() target_include_directories(${LIB_NAME} PUBLIC ${I915_INCLUDES_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/os_interface/linux ) endif() create_project_source_tree(${LIB_NAME}) endfunction() if(DONT_CARE_OF_VIRTUALS) generate_l0_lib(${L0_RELEASE_LIB_NAME} TRUE) else() generate_l0_lib(${L0_RELEASE_LIB_NAME} FALSE) if(NOT SKIP_UNIT_TESTS) generate_l0_lib(${L0_MOCKABLE_LIB_NAME} TRUE) endif() endif() append_sources_from_properties(L0_SHARED_LIB_SRCS L0_SRCS_DLL NEO_CORE_SRCS_LINK) target_sources(${TARGET_NAME_L0} PRIVATE $ ${L0_SHARED_LIB_SRCS}) target_link_libraries(${TARGET_NAME_L0} ${NEO_STATICALLY_LINKED_LIBRARIES} ${OS_SPECIFIC_LIBS} ) if(UNIX) target_link_libraries(${TARGET_NAME_L0} ${GMM_LINK_NAME}) set_property(TARGET ${TARGET_NAME_L0} APPEND_STRING PROPERTY LINK_FLAGS " -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/core/source/dll/linux/ze.exports" ) endif() if(HAVE_INSTRUMENTATION) target_link_libraries(${TARGET_NAME_L0} ${INSTRUMENTATION_LIB_NAME}) target_include_directories(${TARGET_NAME_L0} BEFORE PRIVATE ${INSTRUMENTATION_INCLUDE_PATH} ) endif() create_source_tree(${TARGET_NAME_L0} ${L0_ROOT_DIR}/..) set_property(TARGET ${TARGET_NAME_L0} APPEND_STRING PROPERTY COMPILE_FLAGS ${ASAN_FLAGS}) set_target_properties(${TARGET_NAME_L0} PROPERTIES FOLDER ${TARGET_NAME_L0} VERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}" SOVERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}" ) if(UNIX) install(TARGETS ${TARGET_NAME_L0} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT ${PROJECT_NAME} NAMELINK_SKIP ) else() install(TARGETS ${TARGET_NAME_L0} RUNTIME DESTINATION Release/lh64 CONFIGURATIONS Release ) install(TARGETS ${TARGET_NAME_L0} RUNTIME DESTINATION Release-Internal/lh64 CONFIGURATIONS ReleaseInternal ) install(TARGETS ${TARGET_NAME_L0} RUNTIME DESTINATION Debug/lh64 CONFIGURATIONS Debug ) endif() if(NOT SKIP_L0_UNIT_TESTS) add_subdirectory_unique(core/test) else() hide_subdir(core/test) endif() add_subdirectories() if(UNIX AND NEO_BUILD_L0_PACKAGE) message(STATUS "Building LevelZero package") set_property(GLOBAL APPEND PROPERTY NEO_L0_COMPONENTS_LIST ${PROJECT_NAME}) set(L0_PACKAGE_VERSION_DEB "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}") set(L0_PACKAGE_VERSION_RPM "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}") if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${BRANCH_TYPE}/cpack.cmake) include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/${BRANCH_TYPE}/cpack.cmake) endif() endif() else() set(BUILD_WITH_L0 FALSE PARENT_SCOPE) endif() compute-runtime-20.13.16352/level_zero/api/000077500000000000000000000000001363734646600202745ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/api/CMakeLists.txt000066400000000000000000000005111363734646600230310ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # add_subdirectory(core) add_subdirectory(experimental${BRANCH_DIR_SUFFIX}/) add_subdirectory(tools) set(L0_API "") append_sources_from_properties(L0_API L0_SRCS_API L0_EXPERIMENTAL_API L0_TOOLS_API) set_property(GLOBAL PROPERTY L0_API ${L0_API}) compute-runtime-20.13.16352/level_zero/api/core/000077500000000000000000000000001363734646600212245ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/api/core/CMakeLists.txt000066400000000000000000000015451363734646600237710ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_API ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/ze_barrier.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ze_cl_interop.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ze_cmdlist.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ze_cmdqueue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ze_copy.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ze_core_loader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ze_device.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ze_driver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ze_event.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ze_fence.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ze_image.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ze_memory.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ze_module.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ze_residency.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ze_sampler.cpp ) set_property(GLOBAL PROPERTY L0_SRCS_API ${L0_SRCS_API}) compute-runtime-20.13.16352/level_zero/api/core/ze_barrier.cpp000066400000000000000000000021731363734646600240570ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/cmdlist/cmdlist.h" #include extern "C" { __zedllexport ze_result_t __zecall zeCommandListAppendBarrier( ze_command_list_handle_t hCommandList, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return L0::CommandList::fromHandle(hCommandList)->appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents); } __zedllexport ze_result_t __zecall zeCommandListAppendMemoryRangesBarrier( ze_command_list_handle_t hCommandList, uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return L0::CommandList::fromHandle(hCommandList)->appendMemoryRangesBarrier(numRanges, pRangeSizes, pRanges, hSignalEvent, numWaitEvents, phWaitEvents); } __zedllexport ze_result_t __zecall zeDeviceSystemBarrier( ze_device_handle_t hDevice) { return L0::Device::fromHandle(hDevice)->systemBarrier(); } } // extern "C" compute-runtime-20.13.16352/level_zero/api/core/ze_cl_interop.cpp000066400000000000000000000020031363734646600245570ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/device/device.h" #include extern "C" { __zedllexport ze_result_t __zecall zeDeviceRegisterCLMemory( ze_device_handle_t hDevice, cl_context context, cl_mem mem, void **ptr) { return L0::Device::fromHandle(hDevice)->registerCLMemory(context, mem, ptr); } __zedllexport ze_result_t __zecall zeDeviceRegisterCLProgram( ze_device_handle_t hDevice, cl_context context, cl_program program, ze_module_handle_t *phModule) { return L0::Device::fromHandle(hDevice)->registerCLProgram(context, program, phModule); } __zedllexport ze_result_t __zecall zeDeviceRegisterCLCommandQueue( ze_device_handle_t hDevice, cl_context context, cl_command_queue commandQueue, ze_command_queue_handle_t *phCommandQueue) { return L0::Device::fromHandle(hDevice)->registerCLCommandQueue(context, commandQueue, phCommandQueue); } } // extern "C" compute-runtime-20.13.16352/level_zero/api/core/ze_cmdlist.cpp000066400000000000000000000023501363734646600240650ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/cmdlist/cmdlist.h" #include extern "C" { __zedllexport ze_result_t __zecall zeCommandListCreate( ze_device_handle_t hDevice, const ze_command_list_desc_t *desc, ze_command_list_handle_t *phCommandList) { return L0::Device::fromHandle(hDevice)->createCommandList(desc, phCommandList); } __zedllexport ze_result_t __zecall zeCommandListCreateImmediate( ze_device_handle_t hDevice, const ze_command_queue_desc_t *altdesc, ze_command_list_handle_t *phCommandList) { return L0::Device::fromHandle(hDevice)->createCommandListImmediate(altdesc, phCommandList); } __zedllexport ze_result_t __zecall zeCommandListDestroy( ze_command_list_handle_t hCommandList) { return L0::CommandList::fromHandle(hCommandList)->destroy(); } __zedllexport ze_result_t __zecall zeCommandListClose( ze_command_list_handle_t hCommandList) { return L0::CommandList::fromHandle(hCommandList)->close(); } __zedllexport ze_result_t __zecall zeCommandListReset( ze_command_list_handle_t hCommandList) { return L0::CommandList::fromHandle(hCommandList)->reset(); } } // extern "C" compute-runtime-20.13.16352/level_zero/api/core/ze_cmdqueue.cpp000066400000000000000000000022741363734646600242430ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/cmdqueue/cmdqueue.h" #include extern "C" { __zedllexport ze_result_t __zecall zeCommandQueueCreate( ze_device_handle_t hDevice, const ze_command_queue_desc_t *desc, ze_command_queue_handle_t *phCommandQueue) { return L0::Device::fromHandle(hDevice)->createCommandQueue(desc, phCommandQueue); } __zedllexport ze_result_t __zecall zeCommandQueueDestroy( ze_command_queue_handle_t hCommandQueue) { return L0::CommandQueue::fromHandle(hCommandQueue)->destroy(); } __zedllexport ze_result_t __zecall zeCommandQueueExecuteCommandLists( ze_command_queue_handle_t hCommandQueue, uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists, ze_fence_handle_t hFence) { return L0::CommandQueue::fromHandle(hCommandQueue)->executeCommandLists(numCommandLists, phCommandLists, hFence, true); } __zedllexport ze_result_t __zecall zeCommandQueueSynchronize( ze_command_queue_handle_t hCommandQueue, uint32_t timeout) { return L0::CommandQueue::fromHandle(hCommandQueue)->synchronize(timeout); } } // extern "C" compute-runtime-20.13.16352/level_zero/api/core/ze_copy.cpp000066400000000000000000000066301363734646600234050ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/cmdlist/cmdlist.h" #include extern "C" { __zedllexport ze_result_t __zecall zeCommandListAppendMemoryCopy( ze_command_list_handle_t hCommandList, void *dstptr, const void *srcptr, size_t size, ze_event_handle_t hEvent) { return L0::CommandList::fromHandle(hCommandList)->appendMemoryCopy(dstptr, srcptr, size, hEvent, 0, nullptr); } __zedllexport ze_result_t __zecall zeCommandListAppendMemoryFill( ze_command_list_handle_t hCommandList, void *ptr, const void *pattern, size_t patternSize, size_t size, ze_event_handle_t hEvent) { return L0::CommandList::fromHandle(hCommandList)->appendMemoryFill(ptr, pattern, patternSize, size, hEvent); } __zedllexport ze_result_t __zecall zeCommandListAppendMemoryCopyRegion( ze_command_list_handle_t hCommandList, void *dstptr, const ze_copy_region_t *dstRegion, uint32_t dstPitch, uint32_t dstSlicePitch, const void *srcptr, const ze_copy_region_t *srcRegion, uint32_t srcPitch, uint32_t srcSlicePitch, ze_event_handle_t hEvent) { return L0::CommandList::fromHandle(hCommandList)->appendMemoryCopyRegion(dstptr, dstRegion, dstPitch, dstSlicePitch, srcptr, srcRegion, srcPitch, srcSlicePitch, hEvent); } __zedllexport ze_result_t __zecall zeCommandListAppendImageCopy( ze_command_list_handle_t hCommandList, ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, ze_event_handle_t hEvent) { return L0::CommandList::fromHandle(hCommandList)->appendImageCopy(hDstImage, hSrcImage, hEvent, 0, nullptr); } __zedllexport ze_result_t __zecall zeCommandListAppendImageCopyRegion( ze_command_list_handle_t hCommandList, ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, const ze_image_region_t *pDstRegion, const ze_image_region_t *pSrcRegion, ze_event_handle_t hEvent) { return L0::CommandList::fromHandle(hCommandList)->appendImageCopyRegion(hDstImage, hSrcImage, pDstRegion, pSrcRegion, hEvent, 0, nullptr); } __zedllexport ze_result_t __zecall zeCommandListAppendImageCopyToMemory( ze_command_list_handle_t hCommandList, void *dstptr, ze_image_handle_t hSrcImage, const ze_image_region_t *pSrcRegion, ze_event_handle_t hEvent) { return L0::CommandList::fromHandle(hCommandList)->appendImageCopyToMemory(dstptr, hSrcImage, pSrcRegion, hEvent, 0, nullptr); } __zedllexport ze_result_t __zecall zeCommandListAppendImageCopyFromMemory( ze_command_list_handle_t hCommandList, ze_image_handle_t hDstImage, const void *srcptr, const ze_image_region_t *pDstRegion, ze_event_handle_t hEvent) { return L0::CommandList::fromHandle(hCommandList)->appendImageCopyFromMemory(hDstImage, srcptr, pDstRegion, hEvent, 0, nullptr); } __zedllexport ze_result_t __zecall zeCommandListAppendMemoryPrefetch( ze_command_list_handle_t hCommandList, const void *ptr, size_t size) { return L0::CommandList::fromHandle(hCommandList)->appendMemoryPrefetch(ptr, size); } __zedllexport ze_result_t __zecall zeCommandListAppendMemAdvise( ze_command_list_handle_t hCommandList, ze_device_handle_t hDevice, const void *ptr, size_t size, ze_memory_advice_t advice) { return L0::CommandList::fromHandle(hCommandList)->appendMemAdvise(hDevice, ptr, size, advice); } } // extern "C" compute-runtime-20.13.16352/level_zero/api/core/ze_core_loader.cpp000066400000000000000000001610121363734646600247050ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/source/inc/ze_intel_gpu.h" #include #include #include #include #include "ze_ddi_tables.h" extern "C" { ze_gpu_driver_dditable_t driver_ddiTable; __zedllexport ze_result_t __zecall zeGetDriverProcAddrTable( ze_api_version_t version, ze_driver_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { driver_ddiTable.driverLibrary = LOAD_INTEL_GPU_LIBRARY(); if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } driver_ddiTable.enableTracing = getenv_tobool("ZE_ENABLE_API_TRACING"); } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGet = (ze_pfnDriverGet_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverGet"); pDdiTable->pfnGetApiVersion = (ze_pfnDriverGetApiVersion_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverGetApiVersion"); pDdiTable->pfnGetProperties = (ze_pfnDriverGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverGetProperties"); pDdiTable->pfnGetIPCProperties = (ze_pfnDriverGetIPCProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverGetIPCProperties"); pDdiTable->pfnGetExtensionFunctionAddress = (ze_pfnDriverGetExtensionFunctionAddress_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverGetExtensionFunctionAddress"); pDdiTable->pfnAllocSharedMem = (ze_pfnDriverAllocSharedMem_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverAllocSharedMem"); pDdiTable->pfnAllocDeviceMem = (ze_pfnDriverAllocDeviceMem_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverAllocDeviceMem"); pDdiTable->pfnAllocHostMem = (ze_pfnDriverAllocHostMem_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverAllocHostMem"); pDdiTable->pfnFreeMem = (ze_pfnDriverFreeMem_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverFreeMem"); pDdiTable->pfnGetMemAllocProperties = (ze_pfnDriverGetMemAllocProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverGetMemAllocProperties"); pDdiTable->pfnGetMemAddressRange = (ze_pfnDriverGetMemAddressRange_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverGetMemAddressRange"); pDdiTable->pfnGetMemIpcHandle = (ze_pfnDriverGetMemIpcHandle_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverGetMemIpcHandle"); pDdiTable->pfnOpenMemIpcHandle = (ze_pfnDriverOpenMemIpcHandle_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverOpenMemIpcHandle"); pDdiTable->pfnCloseMemIpcHandle = (ze_pfnDriverCloseMemIpcHandle_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverCloseMemIpcHandle"); driver_ddiTable.core_ddiTable.Driver = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnGet = (ze_pfnDriverGet_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverGet_Tracing"); if (nullptr == pDdiTable->pfnGet) { pDdiTable->pfnGet = driver_ddiTable.core_ddiTable.Driver.pfnGet; } pDdiTable->pfnGetApiVersion = (ze_pfnDriverGetApiVersion_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverGetApiVersion_Tracing"); if (nullptr == pDdiTable->pfnGetApiVersion) { pDdiTable->pfnGetApiVersion = driver_ddiTable.core_ddiTable.Driver.pfnGetApiVersion; } pDdiTable->pfnGetProperties = (ze_pfnDriverGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverGetProperties_Tracing"); if (nullptr == pDdiTable->pfnGetProperties) { pDdiTable->pfnGetProperties = driver_ddiTable.core_ddiTable.Driver.pfnGetProperties; } pDdiTable->pfnGetIPCProperties = (ze_pfnDriverGetIPCProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverGetIPCProperties_Tracing"); if (nullptr == pDdiTable->pfnGetIPCProperties) { pDdiTable->pfnGetIPCProperties = driver_ddiTable.core_ddiTable.Driver.pfnGetIPCProperties; } pDdiTable->pfnGetExtensionFunctionAddress = (ze_pfnDriverGetExtensionFunctionAddress_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverGetExtensionFunctionAddress_Tracing"); if (nullptr == pDdiTable->pfnGetExtensionFunctionAddress) { pDdiTable->pfnGetExtensionFunctionAddress = driver_ddiTable.core_ddiTable.Driver.pfnGetExtensionFunctionAddress; } pDdiTable->pfnAllocSharedMem = (ze_pfnDriverAllocSharedMem_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverAllocSharedMem_Tracing"); if (nullptr == pDdiTable->pfnAllocSharedMem) { pDdiTable->pfnAllocSharedMem = driver_ddiTable.core_ddiTable.Driver.pfnAllocSharedMem; } pDdiTable->pfnAllocDeviceMem = (ze_pfnDriverAllocDeviceMem_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverAllocDeviceMem_Tracing"); if (nullptr == pDdiTable->pfnAllocDeviceMem) { pDdiTable->pfnAllocDeviceMem = driver_ddiTable.core_ddiTable.Driver.pfnAllocDeviceMem; } pDdiTable->pfnAllocHostMem = (ze_pfnDriverAllocHostMem_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverAllocHostMem_Tracing"); if (nullptr == pDdiTable->pfnAllocHostMem) { pDdiTable->pfnAllocHostMem = driver_ddiTable.core_ddiTable.Driver.pfnAllocHostMem; } pDdiTable->pfnFreeMem = (ze_pfnDriverFreeMem_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverFreeMem_Tracing"); if (nullptr == pDdiTable->pfnFreeMem) { pDdiTable->pfnFreeMem = driver_ddiTable.core_ddiTable.Driver.pfnFreeMem; } pDdiTable->pfnGetMemAllocProperties = (ze_pfnDriverGetMemAllocProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverGetMemAllocProperties_Tracing"); if (nullptr == pDdiTable->pfnGetMemAllocProperties) { pDdiTable->pfnGetMemAllocProperties = driver_ddiTable.core_ddiTable.Driver.pfnGetMemAllocProperties; } pDdiTable->pfnGetMemAddressRange = (ze_pfnDriverGetMemAddressRange_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverGetMemAddressRange_Tracing"); if (nullptr == pDdiTable->pfnGetMemAddressRange) { pDdiTable->pfnGetMemAddressRange = driver_ddiTable.core_ddiTable.Driver.pfnGetMemAddressRange; } pDdiTable->pfnGetMemIpcHandle = (ze_pfnDriverGetMemIpcHandle_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverGetMemIpcHandle_Tracing"); if (nullptr == pDdiTable->pfnGetMemIpcHandle) { pDdiTable->pfnGetMemIpcHandle = driver_ddiTable.core_ddiTable.Driver.pfnGetMemIpcHandle; } pDdiTable->pfnOpenMemIpcHandle = (ze_pfnDriverOpenMemIpcHandle_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverOpenMemIpcHandle_Tracing"); if (nullptr == pDdiTable->pfnOpenMemIpcHandle) { pDdiTable->pfnOpenMemIpcHandle = driver_ddiTable.core_ddiTable.Driver.pfnOpenMemIpcHandle; } pDdiTable->pfnCloseMemIpcHandle = (ze_pfnDriverCloseMemIpcHandle_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverCloseMemIpcHandle_Tracing"); if (nullptr == pDdiTable->pfnCloseMemIpcHandle) { pDdiTable->pfnCloseMemIpcHandle = driver_ddiTable.core_ddiTable.Driver.pfnCloseMemIpcHandle; } } return result; } __zedllexport ze_result_t __zecall zeGetGlobalProcAddrTable( ze_api_version_t version, ze_global_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { driver_ddiTable.driverLibrary = LOAD_INTEL_GPU_LIBRARY(); if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } driver_ddiTable.enableTracing = getenv_tobool("ZE_ENABLE_API_TRACING"); } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnInit = (ze_pfnInit_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeInit"); driver_ddiTable.core_ddiTable.Global = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnInit = (ze_pfnInit_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeInit_Tracing"); if (nullptr == pDdiTable->pfnInit) { pDdiTable->pfnInit = driver_ddiTable.core_ddiTable.Global.pfnInit; } } return result; } __zedllexport ze_result_t __zecall zeGetDeviceProcAddrTable( ze_api_version_t version, ze_device_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { driver_ddiTable.driverLibrary = LOAD_INTEL_GPU_LIBRARY(); if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } driver_ddiTable.enableTracing = getenv_tobool("ZE_ENABLE_API_TRACING"); } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGet = (ze_pfnDeviceGet_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGet"); pDdiTable->pfnGetSubDevices = (ze_pfnDeviceGetSubDevices_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGetSubDevices"); pDdiTable->pfnGetProperties = (ze_pfnDeviceGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGetProperties"); pDdiTable->pfnSystemBarrier = (ze_pfnDeviceSystemBarrier_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceSystemBarrier"); pDdiTable->pfnRegisterCLMemory = (ze_pfnDeviceRegisterCLMemory_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceRegisterCLMemory"); pDdiTable->pfnRegisterCLProgram = (ze_pfnDeviceRegisterCLProgram_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceRegisterCLProgram"); pDdiTable->pfnRegisterCLCommandQueue = (ze_pfnDeviceRegisterCLCommandQueue_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceRegisterCLCommandQueue"); pDdiTable->pfnGetComputeProperties = (ze_pfnDeviceGetComputeProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGetComputeProperties"); pDdiTable->pfnGetKernelProperties = (ze_pfnDeviceGetKernelProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGetKernelProperties"); pDdiTable->pfnGetMemoryProperties = (ze_pfnDeviceGetMemoryProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGetMemoryProperties"); pDdiTable->pfnGetMemoryAccessProperties = (ze_pfnDeviceGetMemoryAccessProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGetMemoryAccessProperties"); pDdiTable->pfnGetCacheProperties = (ze_pfnDeviceGetCacheProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGetCacheProperties"); pDdiTable->pfnGetImageProperties = (ze_pfnDeviceGetImageProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGetImageProperties"); pDdiTable->pfnGetP2PProperties = (ze_pfnDeviceGetP2PProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGetP2PProperties"); pDdiTable->pfnCanAccessPeer = (ze_pfnDeviceCanAccessPeer_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceCanAccessPeer"); pDdiTable->pfnSetLastLevelCacheConfig = (ze_pfnDeviceSetLastLevelCacheConfig_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceSetLastLevelCacheConfig"); pDdiTable->pfnMakeMemoryResident = (ze_pfnDeviceMakeMemoryResident_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceMakeMemoryResident"); pDdiTable->pfnEvictMemory = (ze_pfnDeviceEvictMemory_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceEvictMemory"); pDdiTable->pfnMakeImageResident = (ze_pfnDeviceMakeImageResident_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceMakeImageResident"); pDdiTable->pfnEvictImage = (ze_pfnDeviceEvictImage_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceEvictImage"); driver_ddiTable.core_ddiTable.Device = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnGet = (ze_pfnDeviceGet_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGet_Tracing"); if (nullptr == pDdiTable->pfnGet) { pDdiTable->pfnGet = driver_ddiTable.core_ddiTable.Device.pfnGet; } pDdiTable->pfnGetSubDevices = (ze_pfnDeviceGetSubDevices_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGetSubDevices_Tracing"); if (nullptr == pDdiTable->pfnGetSubDevices) { pDdiTable->pfnGetSubDevices = driver_ddiTable.core_ddiTable.Device.pfnGetSubDevices; } pDdiTable->pfnGetProperties = (ze_pfnDeviceGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGetProperties_Tracing"); if (nullptr == pDdiTable->pfnGetProperties) { pDdiTable->pfnGetProperties = driver_ddiTable.core_ddiTable.Device.pfnGetProperties; } pDdiTable->pfnSystemBarrier = (ze_pfnDeviceSystemBarrier_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceSystemBarrier_Tracing"); if (nullptr == pDdiTable->pfnSystemBarrier) { pDdiTable->pfnSystemBarrier = driver_ddiTable.core_ddiTable.Device.pfnSystemBarrier; } pDdiTable->pfnRegisterCLMemory = (ze_pfnDeviceRegisterCLMemory_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceRegisterCLMemory_Tracing"); if (nullptr == pDdiTable->pfnRegisterCLMemory) { pDdiTable->pfnRegisterCLMemory = driver_ddiTable.core_ddiTable.Device.pfnRegisterCLMemory; } pDdiTable->pfnRegisterCLProgram = (ze_pfnDeviceRegisterCLProgram_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceRegisterCLProgram_Tracing"); if (nullptr == pDdiTable->pfnRegisterCLProgram) { pDdiTable->pfnRegisterCLProgram = driver_ddiTable.core_ddiTable.Device.pfnRegisterCLProgram; } pDdiTable->pfnRegisterCLCommandQueue = (ze_pfnDeviceRegisterCLCommandQueue_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceRegisterCLCommandQueue_Tracing"); if (nullptr == pDdiTable->pfnRegisterCLCommandQueue) { pDdiTable->pfnRegisterCLCommandQueue = driver_ddiTable.core_ddiTable.Device.pfnRegisterCLCommandQueue; } pDdiTable->pfnGetComputeProperties = (ze_pfnDeviceGetComputeProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGetComputeProperties_Tracing"); if (nullptr == pDdiTable->pfnGetComputeProperties) { pDdiTable->pfnGetComputeProperties = driver_ddiTable.core_ddiTable.Device.pfnGetComputeProperties; } pDdiTable->pfnGetKernelProperties = (ze_pfnDeviceGetKernelProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGetKernelProperties_Tracing"); if (nullptr == pDdiTable->pfnGetKernelProperties) { pDdiTable->pfnGetKernelProperties = driver_ddiTable.core_ddiTable.Device.pfnGetKernelProperties; } pDdiTable->pfnGetMemoryProperties = (ze_pfnDeviceGetMemoryProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGetMemoryProperties_Tracing"); if (nullptr == pDdiTable->pfnGetMemoryProperties) { pDdiTable->pfnGetMemoryProperties = driver_ddiTable.core_ddiTable.Device.pfnGetMemoryProperties; } pDdiTable->pfnGetMemoryAccessProperties = (ze_pfnDeviceGetMemoryAccessProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGetMemoryAccessProperties_Tracing"); if (nullptr == pDdiTable->pfnGetMemoryAccessProperties) { pDdiTable->pfnGetMemoryAccessProperties = driver_ddiTable.core_ddiTable.Device.pfnGetMemoryAccessProperties; } pDdiTable->pfnGetCacheProperties = (ze_pfnDeviceGetCacheProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGetCacheProperties_Tracing"); if (nullptr == pDdiTable->pfnGetCacheProperties) { pDdiTable->pfnGetCacheProperties = driver_ddiTable.core_ddiTable.Device.pfnGetCacheProperties; } pDdiTable->pfnGetImageProperties = (ze_pfnDeviceGetImageProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGetImageProperties_Tracing"); if (nullptr == pDdiTable->pfnGetImageProperties) { pDdiTable->pfnGetImageProperties = driver_ddiTable.core_ddiTable.Device.pfnGetImageProperties; } pDdiTable->pfnGetP2PProperties = (ze_pfnDeviceGetP2PProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGetP2PProperties_Tracing"); if (nullptr == pDdiTable->pfnGetP2PProperties) { pDdiTable->pfnGetP2PProperties = driver_ddiTable.core_ddiTable.Device.pfnGetP2PProperties; } pDdiTable->pfnCanAccessPeer = (ze_pfnDeviceCanAccessPeer_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceCanAccessPeer_Tracing"); if (nullptr == pDdiTable->pfnCanAccessPeer) { pDdiTable->pfnCanAccessPeer = driver_ddiTable.core_ddiTable.Device.pfnCanAccessPeer; } pDdiTable->pfnSetLastLevelCacheConfig = (ze_pfnDeviceSetLastLevelCacheConfig_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceSetLastLevelCacheConfig_Tracing"); if (nullptr == pDdiTable->pfnSetLastLevelCacheConfig) { pDdiTable->pfnSetLastLevelCacheConfig = driver_ddiTable.core_ddiTable.Device.pfnSetLastLevelCacheConfig; } pDdiTable->pfnMakeMemoryResident = (ze_pfnDeviceMakeMemoryResident_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceMakeMemoryResident_Tracing"); if (nullptr == pDdiTable->pfnMakeMemoryResident) { pDdiTable->pfnMakeMemoryResident = driver_ddiTable.core_ddiTable.Device.pfnMakeMemoryResident; } pDdiTable->pfnEvictMemory = (ze_pfnDeviceEvictMemory_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceEvictMemory_Tracing"); if (nullptr == pDdiTable->pfnEvictMemory) { pDdiTable->pfnEvictMemory = driver_ddiTable.core_ddiTable.Device.pfnEvictMemory; } pDdiTable->pfnMakeImageResident = (ze_pfnDeviceMakeImageResident_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceMakeImageResident_Tracing"); if (nullptr == pDdiTable->pfnMakeImageResident) { pDdiTable->pfnMakeImageResident = driver_ddiTable.core_ddiTable.Device.pfnMakeImageResident; } pDdiTable->pfnEvictImage = (ze_pfnDeviceEvictImage_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceEvictImage_Tracing"); if (nullptr == pDdiTable->pfnEvictImage) { pDdiTable->pfnEvictImage = driver_ddiTable.core_ddiTable.Device.pfnEvictImage; } } return result; } __zedllexport ze_result_t __zecall zeGetCommandQueueProcAddrTable( ze_api_version_t version, ze_command_queue_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { driver_ddiTable.driverLibrary = LOAD_INTEL_GPU_LIBRARY(); if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } driver_ddiTable.enableTracing = getenv_tobool("ZE_ENABLE_API_TRACING"); } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnCreate = (ze_pfnCommandQueueCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandQueueCreate"); pDdiTable->pfnDestroy = (ze_pfnCommandQueueDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandQueueDestroy"); pDdiTable->pfnExecuteCommandLists = (ze_pfnCommandQueueExecuteCommandLists_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandQueueExecuteCommandLists"); pDdiTable->pfnSynchronize = (ze_pfnCommandQueueSynchronize_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandQueueSynchronize"); driver_ddiTable.core_ddiTable.CommandQueue = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnCreate = (ze_pfnCommandQueueCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandQueueCreate_Tracing"); if (nullptr == pDdiTable->pfnCreate) { pDdiTable->pfnCreate = driver_ddiTable.core_ddiTable.CommandQueue.pfnCreate; } pDdiTable->pfnDestroy = (ze_pfnCommandQueueDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandQueueDestroy_Tracing"); if (nullptr == pDdiTable->pfnDestroy) { pDdiTable->pfnDestroy = driver_ddiTable.core_ddiTable.CommandQueue.pfnDestroy; } pDdiTable->pfnExecuteCommandLists = (ze_pfnCommandQueueExecuteCommandLists_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandQueueExecuteCommandLists_Tracing"); if (nullptr == pDdiTable->pfnExecuteCommandLists) { pDdiTable->pfnExecuteCommandLists = driver_ddiTable.core_ddiTable.CommandQueue.pfnExecuteCommandLists; } pDdiTable->pfnSynchronize = (ze_pfnCommandQueueSynchronize_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandQueueSynchronize_Tracing"); if (nullptr == pDdiTable->pfnSynchronize) { pDdiTable->pfnSynchronize = driver_ddiTable.core_ddiTable.CommandQueue.pfnSynchronize; } } return result; } __zedllexport ze_result_t __zecall zeGetCommandListProcAddrTable( ze_api_version_t version, ze_command_list_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { driver_ddiTable.driverLibrary = LOAD_INTEL_GPU_LIBRARY(); if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } driver_ddiTable.enableTracing = getenv_tobool("ZE_ENABLE_API_TRACING"); } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnAppendBarrier = (ze_pfnCommandListAppendBarrier_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendBarrier"); pDdiTable->pfnAppendMemoryRangesBarrier = (ze_pfnCommandListAppendMemoryRangesBarrier_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendMemoryRangesBarrier"); pDdiTable->pfnCreate = (ze_pfnCommandListCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListCreate"); pDdiTable->pfnCreateImmediate = (ze_pfnCommandListCreateImmediate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListCreateImmediate"); pDdiTable->pfnDestroy = (ze_pfnCommandListDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListDestroy"); pDdiTable->pfnClose = (ze_pfnCommandListClose_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListClose"); pDdiTable->pfnReset = (ze_pfnCommandListReset_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListReset"); pDdiTable->pfnAppendMemoryCopy = (ze_pfnCommandListAppendMemoryCopy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendMemoryCopy"); pDdiTable->pfnAppendMemoryCopyRegion = (ze_pfnCommandListAppendMemoryCopyRegion_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendMemoryCopyRegion"); pDdiTable->pfnAppendMemoryFill = (ze_pfnCommandListAppendMemoryFill_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendMemoryFill"); pDdiTable->pfnAppendImageCopy = (ze_pfnCommandListAppendImageCopy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendImageCopy"); pDdiTable->pfnAppendImageCopyRegion = (ze_pfnCommandListAppendImageCopyRegion_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendImageCopyRegion"); pDdiTable->pfnAppendImageCopyToMemory = (ze_pfnCommandListAppendImageCopyToMemory_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendImageCopyToMemory"); pDdiTable->pfnAppendImageCopyFromMemory = (ze_pfnCommandListAppendImageCopyFromMemory_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendImageCopyFromMemory"); pDdiTable->pfnAppendMemoryPrefetch = (ze_pfnCommandListAppendMemoryPrefetch_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendMemoryPrefetch"); pDdiTable->pfnAppendMemAdvise = (ze_pfnCommandListAppendMemAdvise_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendMemAdvise"); pDdiTable->pfnAppendSignalEvent = (ze_pfnCommandListAppendSignalEvent_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendSignalEvent"); pDdiTable->pfnAppendWaitOnEvents = (ze_pfnCommandListAppendWaitOnEvents_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendWaitOnEvents"); pDdiTable->pfnAppendEventReset = (ze_pfnCommandListAppendEventReset_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendEventReset"); pDdiTable->pfnAppendLaunchKernel = (ze_pfnCommandListAppendLaunchKernel_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendLaunchKernel"); pDdiTable->pfnAppendLaunchCooperativeKernel = (ze_pfnCommandListAppendLaunchCooperativeKernel_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendLaunchCooperativeKernel"); pDdiTable->pfnAppendLaunchKernelIndirect = (ze_pfnCommandListAppendLaunchKernelIndirect_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendLaunchKernelIndirect"); pDdiTable->pfnAppendLaunchMultipleKernelsIndirect = (ze_pfnCommandListAppendLaunchMultipleKernelsIndirect_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendLaunchMultipleKernelsIndirect"); driver_ddiTable.core_ddiTable.CommandList = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnAppendBarrier = (ze_pfnCommandListAppendBarrier_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendBarrier_Tracing"); if (nullptr == pDdiTable->pfnAppendBarrier) { pDdiTable->pfnAppendBarrier = driver_ddiTable.core_ddiTable.CommandList.pfnAppendBarrier; } pDdiTable->pfnAppendMemoryRangesBarrier = (ze_pfnCommandListAppendMemoryRangesBarrier_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendMemoryRangesBarrier_Tracing"); if (nullptr == pDdiTable->pfnAppendMemoryRangesBarrier) { pDdiTable->pfnAppendMemoryRangesBarrier = driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryRangesBarrier; } pDdiTable->pfnCreate = (ze_pfnCommandListCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListCreate_Tracing"); if (nullptr == pDdiTable->pfnCreate) { pDdiTable->pfnCreate = driver_ddiTable.core_ddiTable.CommandList.pfnCreate; } pDdiTable->pfnCreateImmediate = (ze_pfnCommandListCreateImmediate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListCreateImmediate_Tracing"); if (nullptr == pDdiTable->pfnDestroy) { pDdiTable->pfnDestroy = driver_ddiTable.core_ddiTable.CommandList.pfnDestroy; } pDdiTable->pfnDestroy = (ze_pfnCommandListDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListDestroy_Tracing"); if (nullptr == pDdiTable->pfnDestroy) { pDdiTable->pfnDestroy = driver_ddiTable.core_ddiTable.CommandList.pfnDestroy; } pDdiTable->pfnClose = (ze_pfnCommandListClose_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListClose_Tracing"); if (nullptr == pDdiTable->pfnClose) { pDdiTable->pfnClose = driver_ddiTable.core_ddiTable.CommandList.pfnClose; } pDdiTable->pfnReset = (ze_pfnCommandListReset_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListReset_Tracing"); if (nullptr == pDdiTable->pfnReset) { pDdiTable->pfnReset = driver_ddiTable.core_ddiTable.CommandList.pfnReset; } pDdiTable->pfnAppendMemoryCopy = (ze_pfnCommandListAppendMemoryCopy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendMemoryCopy_Tracing"); if (nullptr == pDdiTable->pfnAppendMemoryCopy) { pDdiTable->pfnAppendMemoryCopy = driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryCopy; } pDdiTable->pfnAppendMemoryCopyRegion = (ze_pfnCommandListAppendMemoryCopyRegion_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendMemoryCopyRegion_Tracing"); if (nullptr == pDdiTable->pfnAppendMemoryCopyRegion) { pDdiTable->pfnAppendMemoryCopyRegion = driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryCopyRegion; } pDdiTable->pfnAppendMemoryFill = (ze_pfnCommandListAppendMemoryFill_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendMemoryFill_Tracing"); if (nullptr == pDdiTable->pfnAppendMemoryFill) { pDdiTable->pfnAppendMemoryFill = driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryFill; } pDdiTable->pfnAppendImageCopy = (ze_pfnCommandListAppendImageCopy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendImageCopy_Tracing"); if (nullptr == pDdiTable->pfnAppendImageCopy) { pDdiTable->pfnAppendImageCopy = driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopy; } pDdiTable->pfnAppendImageCopyRegion = (ze_pfnCommandListAppendImageCopyRegion_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendImageCopyRegion_Tracing"); if (nullptr == pDdiTable->pfnAppendImageCopyRegion) { pDdiTable->pfnAppendImageCopyRegion = driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopyRegion; } pDdiTable->pfnAppendImageCopyToMemory = (ze_pfnCommandListAppendImageCopyToMemory_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendImageCopyToMemory_Tracing"); if (nullptr == pDdiTable->pfnAppendImageCopyToMemory) { pDdiTable->pfnAppendImageCopyToMemory = driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopyToMemory; } pDdiTable->pfnAppendImageCopyFromMemory = (ze_pfnCommandListAppendImageCopyFromMemory_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendImageCopyFromMemory_Tracing"); if (nullptr == pDdiTable->pfnAppendImageCopyFromMemory) { pDdiTable->pfnAppendImageCopyFromMemory = driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopyFromMemory; } pDdiTable->pfnAppendMemoryPrefetch = (ze_pfnCommandListAppendMemoryPrefetch_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendMemoryPrefetch_Tracing"); if (nullptr == pDdiTable->pfnAppendMemoryPrefetch) { pDdiTable->pfnAppendMemoryPrefetch = driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryPrefetch; } pDdiTable->pfnAppendMemAdvise = (ze_pfnCommandListAppendMemAdvise_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendMemAdvise_Tracing"); if (nullptr == pDdiTable->pfnAppendMemAdvise) { pDdiTable->pfnAppendMemAdvise = driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemAdvise; } pDdiTable->pfnAppendSignalEvent = (ze_pfnCommandListAppendSignalEvent_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendSignalEvent_Tracing"); if (nullptr == pDdiTable->pfnAppendSignalEvent) { pDdiTable->pfnAppendSignalEvent = driver_ddiTable.core_ddiTable.CommandList.pfnAppendSignalEvent; } pDdiTable->pfnAppendWaitOnEvents = (ze_pfnCommandListAppendWaitOnEvents_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendWaitOnEvents_Tracing"); if (nullptr == pDdiTable->pfnAppendWaitOnEvents) { pDdiTable->pfnAppendWaitOnEvents = driver_ddiTable.core_ddiTable.CommandList.pfnAppendWaitOnEvents; } pDdiTable->pfnAppendEventReset = (ze_pfnCommandListAppendEventReset_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendEventReset_Tracing"); if (nullptr == pDdiTable->pfnAppendEventReset) { pDdiTable->pfnAppendEventReset = driver_ddiTable.core_ddiTable.CommandList.pfnAppendEventReset; } pDdiTable->pfnAppendLaunchKernel = (ze_pfnCommandListAppendLaunchKernel_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendLaunchKernel_Tracing"); if (nullptr == pDdiTable->pfnAppendLaunchKernel) { pDdiTable->pfnAppendLaunchKernel = driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchKernel; } pDdiTable->pfnAppendLaunchCooperativeKernel = (ze_pfnCommandListAppendLaunchCooperativeKernel_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendLaunchCooperativeKernel_Tracing"); if (nullptr == pDdiTable->pfnAppendLaunchCooperativeKernel) { pDdiTable->pfnAppendLaunchCooperativeKernel = driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchCooperativeKernel; } pDdiTable->pfnAppendLaunchKernelIndirect = (ze_pfnCommandListAppendLaunchKernelIndirect_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendLaunchKernelIndirect_Tracing"); if (nullptr == pDdiTable->pfnAppendLaunchKernelIndirect) { pDdiTable->pfnAppendLaunchKernelIndirect = driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchKernelIndirect; } pDdiTable->pfnAppendLaunchMultipleKernelsIndirect = (ze_pfnCommandListAppendLaunchMultipleKernelsIndirect_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendLaunchMultipleKernelsIndirect_Tracing"); if (nullptr == pDdiTable->pfnAppendLaunchMultipleKernelsIndirect) { pDdiTable->pfnAppendLaunchMultipleKernelsIndirect = driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchMultipleKernelsIndirect; } } return result; } __zedllexport ze_result_t __zecall zeGetFenceProcAddrTable( ze_api_version_t version, ze_fence_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { driver_ddiTable.driverLibrary = LOAD_INTEL_GPU_LIBRARY(); if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } driver_ddiTable.enableTracing = getenv_tobool("ZE_ENABLE_API_TRACING"); } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnCreate = (ze_pfnFenceCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeFenceCreate"); pDdiTable->pfnDestroy = (ze_pfnFenceDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeFenceDestroy"); pDdiTable->pfnHostSynchronize = (ze_pfnFenceHostSynchronize_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeFenceHostSynchronize"); pDdiTable->pfnQueryStatus = (ze_pfnFenceQueryStatus_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeFenceQueryStatus"); pDdiTable->pfnReset = (ze_pfnFenceReset_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeFenceReset"); driver_ddiTable.core_ddiTable.Fence = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnCreate = (ze_pfnFenceCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeFenceCreate_Tracing"); if (nullptr == pDdiTable->pfnCreate) { pDdiTable->pfnCreate = driver_ddiTable.core_ddiTable.Fence.pfnCreate; } pDdiTable->pfnDestroy = (ze_pfnFenceDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeFenceDestroy_Tracing"); if (nullptr == pDdiTable->pfnDestroy) { pDdiTable->pfnDestroy = driver_ddiTable.core_ddiTable.Fence.pfnDestroy; } pDdiTable->pfnHostSynchronize = (ze_pfnFenceHostSynchronize_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeFenceHostSynchronize_Tracing"); if (nullptr == pDdiTable->pfnHostSynchronize) { pDdiTable->pfnHostSynchronize = driver_ddiTable.core_ddiTable.Fence.pfnHostSynchronize; } pDdiTable->pfnQueryStatus = (ze_pfnFenceQueryStatus_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeFenceQueryStatus_Tracing"); if (nullptr == pDdiTable->pfnQueryStatus) { pDdiTable->pfnQueryStatus = driver_ddiTable.core_ddiTable.Fence.pfnQueryStatus; } pDdiTable->pfnReset = (ze_pfnFenceReset_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeFenceReset_Tracing"); if (nullptr == pDdiTable->pfnReset) { pDdiTable->pfnReset = driver_ddiTable.core_ddiTable.Fence.pfnReset; } } return result; } __zedllexport ze_result_t __zecall zeGetEventPoolProcAddrTable( ze_api_version_t version, ze_event_pool_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { driver_ddiTable.driverLibrary = LOAD_INTEL_GPU_LIBRARY(); if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } driver_ddiTable.enableTracing = getenv_tobool("ZE_ENABLE_API_TRACING"); } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnCreate = (ze_pfnEventPoolCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventPoolCreate"); pDdiTable->pfnDestroy = (ze_pfnEventPoolDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventPoolDestroy"); pDdiTable->pfnGetIpcHandle = (ze_pfnEventPoolGetIpcHandle_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventPoolGetIpcHandle"); pDdiTable->pfnOpenIpcHandle = (ze_pfnEventPoolOpenIpcHandle_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventPoolOpenIpcHandle"); pDdiTable->pfnCloseIpcHandle = (ze_pfnEventPoolCloseIpcHandle_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventPoolCloseIpcHandle"); driver_ddiTable.core_ddiTable.EventPool = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnCreate = (ze_pfnEventPoolCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventPoolCreate_Tracing"); if (nullptr == pDdiTable->pfnCreate) { pDdiTable->pfnCreate = driver_ddiTable.core_ddiTable.EventPool.pfnCreate; } pDdiTable->pfnDestroy = (ze_pfnEventPoolDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventPoolDestroy_Tracing"); if (nullptr == pDdiTable->pfnDestroy) { pDdiTable->pfnDestroy = driver_ddiTable.core_ddiTable.EventPool.pfnDestroy; } pDdiTable->pfnGetIpcHandle = (ze_pfnEventPoolGetIpcHandle_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventPoolGetIpcHandle_Tracing"); if (nullptr == pDdiTable->pfnGetIpcHandle) { pDdiTable->pfnGetIpcHandle = driver_ddiTable.core_ddiTable.EventPool.pfnGetIpcHandle; } pDdiTable->pfnOpenIpcHandle = (ze_pfnEventPoolOpenIpcHandle_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventPoolOpenIpcHandle_Tracing"); if (nullptr == pDdiTable->pfnOpenIpcHandle) { pDdiTable->pfnOpenIpcHandle = driver_ddiTable.core_ddiTable.EventPool.pfnOpenIpcHandle; } pDdiTable->pfnCloseIpcHandle = (ze_pfnEventPoolCloseIpcHandle_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventPoolCloseIpcHandle_Tracing"); if (nullptr == pDdiTable->pfnCloseIpcHandle) { pDdiTable->pfnCloseIpcHandle = driver_ddiTable.core_ddiTable.EventPool.pfnCloseIpcHandle; } } return result; } __zedllexport ze_result_t __zecall zeGetEventProcAddrTable( ze_api_version_t version, ze_event_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { driver_ddiTable.driverLibrary = LOAD_INTEL_GPU_LIBRARY(); if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } driver_ddiTable.enableTracing = getenv_tobool("ZE_ENABLE_API_TRACING"); } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnCreate = (ze_pfnEventCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventCreate"); pDdiTable->pfnDestroy = (ze_pfnEventDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventDestroy"); pDdiTable->pfnHostSignal = (ze_pfnEventHostSignal_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventHostSignal"); pDdiTable->pfnHostSynchronize = (ze_pfnEventHostSynchronize_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventHostSynchronize"); pDdiTable->pfnQueryStatus = (ze_pfnEventQueryStatus_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventQueryStatus"); pDdiTable->pfnHostReset = (ze_pfnEventHostReset_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventHostReset"); pDdiTable->pfnGetTimestamp = (ze_pfnEventGetTimestamp_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventGetTimestamp"); driver_ddiTable.core_ddiTable.Event = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnCreate = (ze_pfnEventCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventCreate_Tracing"); if (nullptr == pDdiTable->pfnCreate) { pDdiTable->pfnCreate = driver_ddiTable.core_ddiTable.Event.pfnCreate; } pDdiTable->pfnDestroy = (ze_pfnEventDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventDestroy_Tracing"); if (nullptr == pDdiTable->pfnDestroy) { pDdiTable->pfnDestroy = driver_ddiTable.core_ddiTable.Event.pfnDestroy; } pDdiTable->pfnHostSignal = (ze_pfnEventHostSignal_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventHostSignal_Tracing"); if (nullptr == pDdiTable->pfnHostSignal) { pDdiTable->pfnHostSignal = driver_ddiTable.core_ddiTable.Event.pfnHostSignal; } pDdiTable->pfnHostSynchronize = (ze_pfnEventHostSynchronize_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventHostSynchronize_Tracing"); if (nullptr == pDdiTable->pfnHostSynchronize) { pDdiTable->pfnHostSynchronize = driver_ddiTable.core_ddiTable.Event.pfnHostSynchronize; } pDdiTable->pfnQueryStatus = (ze_pfnEventQueryStatus_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventQueryStatus_Tracing"); if (nullptr == pDdiTable->pfnQueryStatus) { pDdiTable->pfnQueryStatus = driver_ddiTable.core_ddiTable.Event.pfnQueryStatus; } pDdiTable->pfnHostReset = (ze_pfnEventHostReset_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventHostReset_Tracing"); if (nullptr == pDdiTable->pfnHostReset) { pDdiTable->pfnHostReset = driver_ddiTable.core_ddiTable.Event.pfnHostReset; } pDdiTable->pfnGetTimestamp = (ze_pfnEventGetTimestamp_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventGetTimestamp_Tracing"); if (nullptr == pDdiTable->pfnGetTimestamp) { pDdiTable->pfnGetTimestamp = driver_ddiTable.core_ddiTable.Event.pfnGetTimestamp; } } return result; } __zedllexport ze_result_t __zecall zeGetImageProcAddrTable( ze_api_version_t version, ze_image_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { driver_ddiTable.driverLibrary = LOAD_INTEL_GPU_LIBRARY(); if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } driver_ddiTable.enableTracing = getenv_tobool("ZE_ENABLE_API_TRACING"); } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGetProperties = (ze_pfnImageGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeImageGetProperties"); pDdiTable->pfnCreate = (ze_pfnImageCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeImageCreate"); pDdiTable->pfnDestroy = (ze_pfnImageDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeImageDestroy"); driver_ddiTable.core_ddiTable.Image = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnGetProperties = (ze_pfnImageGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeImageGetProperties_Tracing"); if (nullptr == pDdiTable->pfnGetProperties) { pDdiTable->pfnGetProperties = driver_ddiTable.core_ddiTable.Image.pfnGetProperties; } pDdiTable->pfnCreate = (ze_pfnImageCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeImageCreate_Tracing"); if (nullptr == pDdiTable->pfnCreate) { pDdiTable->pfnCreate = driver_ddiTable.core_ddiTable.Image.pfnCreate; } pDdiTable->pfnDestroy = (ze_pfnImageDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeImageDestroy_Tracing"); if (nullptr == pDdiTable->pfnDestroy) { pDdiTable->pfnDestroy = driver_ddiTable.core_ddiTable.Image.pfnDestroy; } } return result; } __zedllexport ze_result_t __zecall zeGetModuleProcAddrTable( ze_api_version_t version, ze_module_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { driver_ddiTable.driverLibrary = LOAD_INTEL_GPU_LIBRARY(); if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } driver_ddiTable.enableTracing = getenv_tobool("ZE_ENABLE_API_TRACING"); } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnCreate = (ze_pfnModuleCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeModuleCreate"); pDdiTable->pfnDestroy = (ze_pfnModuleDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeModuleDestroy"); pDdiTable->pfnGetNativeBinary = (ze_pfnModuleGetNativeBinary_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeModuleGetNativeBinary"); pDdiTable->pfnGetGlobalPointer = (ze_pfnModuleGetGlobalPointer_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeModuleGetGlobalPointer"); pDdiTable->pfnGetFunctionPointer = (ze_pfnModuleGetFunctionPointer_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeModuleGetFunctionPointer"); pDdiTable->pfnGetKernelNames = (ze_pfnModuleGetKernelNames_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeModuleGetKernelNames"); driver_ddiTable.core_ddiTable.Module = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnCreate = (ze_pfnModuleCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeModuleCreate_Tracing"); if (nullptr == pDdiTable->pfnCreate) { pDdiTable->pfnCreate = driver_ddiTable.core_ddiTable.Module.pfnCreate; } pDdiTable->pfnDestroy = (ze_pfnModuleDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeModuleDestroy_Tracing"); if (nullptr == pDdiTable->pfnDestroy) { pDdiTable->pfnDestroy = driver_ddiTable.core_ddiTable.Module.pfnDestroy; } pDdiTable->pfnGetNativeBinary = (ze_pfnModuleGetNativeBinary_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeModuleGetNativeBinary_Tracing"); if (nullptr == pDdiTable->pfnGetNativeBinary) { pDdiTable->pfnGetNativeBinary = driver_ddiTable.core_ddiTable.Module.pfnGetNativeBinary; } pDdiTable->pfnGetGlobalPointer = (ze_pfnModuleGetGlobalPointer_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeModuleGetGlobalPointer_Tracing"); if (nullptr == pDdiTable->pfnGetGlobalPointer) { pDdiTable->pfnGetGlobalPointer = driver_ddiTable.core_ddiTable.Module.pfnGetGlobalPointer; } pDdiTable->pfnGetFunctionPointer = (ze_pfnModuleGetFunctionPointer_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeModuleGetFunctionPointer_Tracing"); if (nullptr == pDdiTable->pfnGetFunctionPointer) { pDdiTable->pfnGetFunctionPointer = driver_ddiTable.core_ddiTable.Module.pfnGetFunctionPointer; } pDdiTable->pfnGetKernelNames = (ze_pfnModuleGetKernelNames_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeModuleGetKernelNames_Tracing"); if (nullptr == pDdiTable->pfnGetKernelNames) { pDdiTable->pfnGetKernelNames = driver_ddiTable.core_ddiTable.Module.pfnGetKernelNames; } } return result; } __zedllexport ze_result_t __zecall zeGetModuleBuildLogProcAddrTable( ze_api_version_t version, ze_module_build_log_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { driver_ddiTable.driverLibrary = LOAD_INTEL_GPU_LIBRARY(); if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } driver_ddiTable.enableTracing = getenv_tobool("ZE_ENABLE_API_TRACING"); } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnDestroy = (ze_pfnModuleBuildLogDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeModuleBuildLogDestroy"); pDdiTable->pfnGetString = (ze_pfnModuleBuildLogGetString_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeModuleBuildLogGetString"); driver_ddiTable.core_ddiTable.ModuleBuildLog = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnDestroy = (ze_pfnModuleBuildLogDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeModuleBuildLogDestroy_Tracing"); if (nullptr == pDdiTable->pfnDestroy) { pDdiTable->pfnDestroy = driver_ddiTable.core_ddiTable.ModuleBuildLog.pfnDestroy; } pDdiTable->pfnGetString = (ze_pfnModuleBuildLogGetString_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeModuleBuildLogGetString_Tracing"); if (nullptr == pDdiTable->pfnGetString) { pDdiTable->pfnGetString = driver_ddiTable.core_ddiTable.ModuleBuildLog.pfnGetString; } } return result; } __zedllexport ze_result_t __zecall zeGetKernelProcAddrTable( ze_api_version_t version, ze_kernel_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { driver_ddiTable.driverLibrary = LOAD_INTEL_GPU_LIBRARY(); if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } driver_ddiTable.enableTracing = getenv_tobool("ZE_ENABLE_API_TRACING"); } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnSetIntermediateCacheConfig = (ze_pfnKernelSetIntermediateCacheConfig_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelSetIntermediateCacheConfig"); pDdiTable->pfnCreate = (ze_pfnKernelCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelCreate"); pDdiTable->pfnDestroy = (ze_pfnKernelDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelDestroy"); pDdiTable->pfnSetGroupSize = (ze_pfnKernelSetGroupSize_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelSetGroupSize"); pDdiTable->pfnSuggestGroupSize = (ze_pfnKernelSuggestGroupSize_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelSuggestGroupSize"); pDdiTable->pfnSuggestMaxCooperativeGroupCount = (ze_pfnKernelSuggestMaxCooperativeGroupCount_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelSuggestMaxCooperativeGroupCount"); pDdiTable->pfnSetArgumentValue = (ze_pfnKernelSetArgumentValue_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelSetArgumentValue"); pDdiTable->pfnSetAttribute = (ze_pfnKernelSetAttribute_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelSetAttribute"); pDdiTable->pfnGetAttribute = (ze_pfnKernelGetAttribute_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelGetAttribute"); pDdiTable->pfnGetProperties = (ze_pfnKernelGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelGetProperties"); driver_ddiTable.core_ddiTable.Kernel = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnSetIntermediateCacheConfig = (ze_pfnKernelSetIntermediateCacheConfig_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelSetIntermediateCacheConfig_Tracing"); if (nullptr == pDdiTable->pfnSetIntermediateCacheConfig) { pDdiTable->pfnSetIntermediateCacheConfig = driver_ddiTable.core_ddiTable.Kernel.pfnSetIntermediateCacheConfig; } pDdiTable->pfnCreate = (ze_pfnKernelCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelCreate_Tracing"); if (nullptr == pDdiTable->pfnCreate) { pDdiTable->pfnCreate = driver_ddiTable.core_ddiTable.Kernel.pfnCreate; } pDdiTable->pfnDestroy = (ze_pfnKernelDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelDestroy_Tracing"); if (nullptr == pDdiTable->pfnDestroy) { pDdiTable->pfnDestroy = driver_ddiTable.core_ddiTable.Kernel.pfnDestroy; } pDdiTable->pfnSetGroupSize = (ze_pfnKernelSetGroupSize_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelSetGroupSize_Tracing"); if (nullptr == pDdiTable->pfnSetGroupSize) { pDdiTable->pfnSetGroupSize = driver_ddiTable.core_ddiTable.Kernel.pfnSetGroupSize; } pDdiTable->pfnSuggestGroupSize = (ze_pfnKernelSuggestGroupSize_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelSuggestGroupSize_Tracing"); if (nullptr == pDdiTable->pfnSuggestGroupSize) { pDdiTable->pfnSuggestGroupSize = driver_ddiTable.core_ddiTable.Kernel.pfnSuggestGroupSize; } pDdiTable->pfnSuggestMaxCooperativeGroupCount = (ze_pfnKernelSuggestMaxCooperativeGroupCount_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelSuggestMaxCooperativeGroupCount_Tracing"); if (nullptr == pDdiTable->pfnSuggestMaxCooperativeGroupCount) { pDdiTable->pfnSuggestMaxCooperativeGroupCount = driver_ddiTable.core_ddiTable.Kernel.pfnSuggestMaxCooperativeGroupCount; } pDdiTable->pfnSetArgumentValue = (ze_pfnKernelSetArgumentValue_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelSetArgumentValue_Tracing"); if (nullptr == pDdiTable->pfnSetArgumentValue) { pDdiTable->pfnSetArgumentValue = driver_ddiTable.core_ddiTable.Kernel.pfnSetArgumentValue; } pDdiTable->pfnSetAttribute = (ze_pfnKernelSetAttribute_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelSetAttribute_Tracing"); if (nullptr == pDdiTable->pfnSetAttribute) { pDdiTable->pfnSetAttribute = driver_ddiTable.core_ddiTable.Kernel.pfnSetAttribute; } pDdiTable->pfnGetAttribute = (ze_pfnKernelGetAttribute_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelGetAttribute_Tracing"); if (nullptr == pDdiTable->pfnGetAttribute) { pDdiTable->pfnGetAttribute = driver_ddiTable.core_ddiTable.Kernel.pfnGetAttribute; } pDdiTable->pfnGetProperties = (ze_pfnKernelGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelGetProperties_Tracing"); if (nullptr == pDdiTable->pfnGetProperties) { pDdiTable->pfnGetProperties = driver_ddiTable.core_ddiTable.Kernel.pfnGetProperties; } } return result; } __zedllexport ze_result_t __zecall zeGetSamplerProcAddrTable( ze_api_version_t version, ze_sampler_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { driver_ddiTable.driverLibrary = LOAD_INTEL_GPU_LIBRARY(); if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } driver_ddiTable.enableTracing = getenv_tobool("ZE_ENABLE_API_TRACING"); } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnCreate = (ze_pfnSamplerCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeSamplerCreate"); pDdiTable->pfnDestroy = (ze_pfnSamplerDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeSamplerDestroy"); driver_ddiTable.core_ddiTable.Sampler = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnCreate = (ze_pfnSamplerCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeSamplerCreate_Tracing"); if (nullptr == pDdiTable->pfnCreate) { pDdiTable->pfnCreate = driver_ddiTable.core_ddiTable.Sampler.pfnCreate; } pDdiTable->pfnDestroy = (ze_pfnSamplerDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeSamplerDestroy_Tracing"); if (nullptr == pDdiTable->pfnDestroy) { pDdiTable->pfnDestroy = driver_ddiTable.core_ddiTable.Sampler.pfnDestroy; } } return result; } } // extern "C" compute-runtime-20.13.16352/level_zero/api/core/ze_device.cpp000066400000000000000000000062361363734646600236740ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/driver/driver.h" #include "level_zero/core/source/driver/driver_handle.h" #include #include extern "C" { __zedllexport ze_result_t __zecall zeDeviceGet( ze_driver_handle_t hDriver, uint32_t *pCount, ze_device_handle_t *phDevices) { return L0::DriverHandle::fromHandle(hDriver)->getDevice(pCount, phDevices); } __zedllexport ze_result_t __zecall zeDeviceGetSubDevices( ze_device_handle_t hDevice, uint32_t *pCount, ze_device_handle_t *phSubdevices) { return L0::Device::fromHandle(hDevice)->getSubDevices(pCount, phSubdevices); } __zedllexport ze_result_t __zecall zeDeviceGetProperties( ze_device_handle_t hDevice, ze_device_properties_t *pDeviceProperties) { return L0::Device::fromHandle(hDevice)->getProperties(pDeviceProperties); } __zedllexport ze_result_t __zecall zeDeviceGetComputeProperties( ze_device_handle_t hDevice, ze_device_compute_properties_t *pComputeProperties) { return L0::Device::fromHandle(hDevice)->getComputeProperties(pComputeProperties); } __zedllexport ze_result_t __zecall zeDeviceGetKernelProperties( ze_device_handle_t hDevice, ze_device_kernel_properties_t *pKernelProperties) { return L0::Device::fromHandle(hDevice)->getKernelProperties(pKernelProperties); } __zedllexport ze_result_t __zecall zeDeviceGetMemoryProperties( ze_device_handle_t hDevice, uint32_t *pCount, ze_device_memory_properties_t *pMemProperties) { return L0::Device::fromHandle(hDevice)->getMemoryProperties(pCount, pMemProperties); } __zedllexport ze_result_t __zecall zeDeviceGetMemoryAccessProperties( ze_device_handle_t hDevice, ze_device_memory_access_properties_t *pMemAccessProperties) { return L0::Device::fromHandle(hDevice)->getMemoryAccessProperties(pMemAccessProperties); } __zedllexport ze_result_t __zecall zeDeviceGetCacheProperties( ze_device_handle_t hDevice, ze_device_cache_properties_t *pCacheProperties) { return L0::Device::fromHandle(hDevice)->getCacheProperties(pCacheProperties); } __zedllexport ze_result_t __zecall zeDeviceGetImageProperties( ze_device_handle_t hDevice, ze_device_image_properties_t *pImageProperties) { return L0::Device::fromHandle(hDevice)->getDeviceImageProperties(pImageProperties); } __zedllexport ze_result_t __zecall zeDeviceGetP2PProperties( ze_device_handle_t hDevice, ze_device_handle_t hPeerDevice, ze_device_p2p_properties_t *pP2PProperties) { return L0::Device::fromHandle(hDevice)->getP2PProperties(hPeerDevice, pP2PProperties); } __zedllexport ze_result_t __zecall zeDeviceCanAccessPeer( ze_device_handle_t hDevice, ze_device_handle_t hPeerDevice, ze_bool_t *value) { return L0::Device::fromHandle(hDevice)->canAccessPeer(hPeerDevice, value); } __zedllexport ze_result_t __zecall zeDeviceSetLastLevelCacheConfig( ze_device_handle_t hDevice, ze_cache_config_t cacheConfig) { return L0::Device::fromHandle(hDevice)->setLastLevelCacheConfig(cacheConfig); } } // extern "C" compute-runtime-20.13.16352/level_zero/api/core/ze_driver.cpp000066400000000000000000000026121363734646600237220ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/driver/driver.h" #include "level_zero/core/source/driver/driver_handle.h" #include extern "C" { __zedllexport ze_result_t __zecall zeInit( ze_init_flag_t flags) { return L0::init(flags); } __zedllexport ze_result_t __zecall zeDriverGet( uint32_t *pCount, ze_driver_handle_t *phDrivers) { return L0::driverHandleGet(pCount, phDrivers); } __zedllexport ze_result_t __zecall zeDriverGetProperties( ze_driver_handle_t hDriver, ze_driver_properties_t *pProperties) { return L0::DriverHandle::fromHandle(hDriver)->getProperties(pProperties); } __zedllexport ze_result_t __zecall zeDriverGetApiVersion( ze_driver_handle_t hDriver, ze_api_version_t *version) { return L0::DriverHandle::fromHandle(hDriver)->getApiVersion(version); } __zedllexport ze_result_t __zecall zeDriverGetIPCProperties( ze_driver_handle_t hDriver, ze_driver_ipc_properties_t *pIPCProperties) { return L0::DriverHandle::fromHandle(hDriver)->getIPCProperties(pIPCProperties); } __zedllexport ze_result_t __zecall zeDriverGetExtensionFunctionAddress( ze_driver_handle_t hDriver, const char *pFuncName, void **pfunc) { return L0::DriverHandle::fromHandle(hDriver)->getExtensionFunctionAddress(pFuncName, pfunc); } } // extern "C" compute-runtime-20.13.16352/level_zero/api/core/ze_event.cpp000066400000000000000000000063421363734646600235540ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/event/event.h" #include extern "C" { __zedllexport ze_result_t __zecall zeEventPoolCreate( ze_driver_handle_t hDriver, const ze_event_pool_desc_t *desc, uint32_t numDevices, ze_device_handle_t *phDevices, ze_event_pool_handle_t *phEventPool) { return L0::DriverHandle::fromHandle(hDriver)->createEventPool(desc, numDevices, phDevices, phEventPool); } __zedllexport ze_result_t __zecall zeEventPoolDestroy( ze_event_pool_handle_t hEventPool) { return L0::EventPool::fromHandle(hEventPool)->destroy(); } __zedllexport ze_result_t __zecall zeEventCreate( ze_event_pool_handle_t hEventPool, const ze_event_desc_t *desc, ze_event_handle_t *phEvent) { return L0::EventPool::fromHandle(hEventPool)->createEvent(desc, phEvent); } __zedllexport ze_result_t __zecall zeEventDestroy( ze_event_handle_t hEvent) { return L0::Event::fromHandle(hEvent)->destroy(); } __zedllexport ze_result_t __zecall zeEventPoolGetIpcHandle( ze_event_pool_handle_t hEventPool, ze_ipc_event_pool_handle_t *phIpc) { return L0::EventPool::fromHandle(hEventPool)->getIpcHandle(phIpc); } __zedllexport ze_result_t __zecall zeEventPoolOpenIpcHandle( ze_driver_handle_t hDriver, ze_ipc_event_pool_handle_t hIpc, ze_event_pool_handle_t *phEventPool) { return L0::DriverHandle::fromHandle(hDriver)->openEventPoolIpcHandle(hIpc, phEventPool); } __zedllexport ze_result_t __zecall zeEventPoolCloseIpcHandle( ze_event_pool_handle_t hEventPool) { return L0::EventPool::fromHandle(hEventPool)->closeIpcHandle(); } __zedllexport ze_result_t __zecall zeCommandListAppendSignalEvent( ze_command_list_handle_t hCommandList, ze_event_handle_t hEvent) { return L0::CommandList::fromHandle(hCommandList)->appendSignalEvent(hEvent); } __zedllexport ze_result_t __zecall zeCommandListAppendWaitOnEvents( ze_command_list_handle_t hCommandList, uint32_t numEvents, ze_event_handle_t *phEvents) { return L0::CommandList::fromHandle(hCommandList)->appendWaitOnEvents(numEvents, phEvents); } __zedllexport ze_result_t __zecall zeEventHostSignal( ze_event_handle_t hEvent) { return L0::Event::fromHandle(hEvent)->hostSignal(); } __zedllexport ze_result_t __zecall zeEventHostSynchronize( ze_event_handle_t hEvent, uint32_t timeout) { return L0::Event::fromHandle(hEvent)->hostSynchronize(timeout); } __zedllexport ze_result_t __zecall zeEventQueryStatus( ze_event_handle_t hEvent) { return L0::Event::fromHandle(hEvent)->queryStatus(); } __zedllexport ze_result_t __zecall zeCommandListAppendEventReset( ze_command_list_handle_t hCommandList, ze_event_handle_t hEvent) { return L0::CommandList::fromHandle(hCommandList)->appendEventReset(hEvent); } __zedllexport ze_result_t __zecall zeEventHostReset( ze_event_handle_t hEvent) { return L0::Event::fromHandle(hEvent)->reset(); } __zedllexport ze_result_t __zecall zeEventGetTimestamp( ze_event_handle_t hEvent, ze_event_timestamp_type_t timestampType, void *dstptr) { return L0::Event::fromHandle(hEvent)->getTimestamp(timestampType, dstptr); } } // extern "C"compute-runtime-20.13.16352/level_zero/api/core/ze_fence.cpp000066400000000000000000000020371363734646600235100ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/fence/fence.h" #include extern "C" { __zedllexport ze_result_t __zecall zeFenceCreate( ze_command_queue_handle_t hCommandQueue, const ze_fence_desc_t *desc, ze_fence_handle_t *phFence) { return L0::CommandQueue::fromHandle(hCommandQueue)->createFence(desc, phFence); } __zedllexport ze_result_t __zecall zeFenceDestroy( ze_fence_handle_t hFence) { return L0::Fence::fromHandle(hFence)->destroy(); } __zedllexport ze_result_t __zecall zeFenceHostSynchronize( ze_fence_handle_t hFence, uint32_t timeout) { return L0::Fence::fromHandle(hFence)->hostSynchronize(timeout); } __zedllexport ze_result_t __zecall zeFenceQueryStatus( ze_fence_handle_t hFence) { return L0::Fence::fromHandle(hFence)->queryStatus(); } __zedllexport ze_result_t __zecall zeFenceReset( ze_fence_handle_t hFence) { return L0::Fence::fromHandle(hFence)->reset(); } } // extern "C" compute-runtime-20.13.16352/level_zero/api/core/ze_image.cpp000066400000000000000000000014701363734646600235120ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/image/image.h" #include extern "C" { __zedllexport ze_result_t __zecall zeImageGetProperties( ze_device_handle_t hDevice, const ze_image_desc_t *desc, ze_image_properties_t *pImageProperties) { return L0::Device::fromHandle(hDevice)->imageGetProperties(desc, pImageProperties); } __zedllexport ze_result_t __zecall zeImageCreate( ze_device_handle_t hDevice, const ze_image_desc_t *desc, ze_image_handle_t *phImage) { return L0::Device::fromHandle(hDevice)->createImage(desc, phImage); } __zedllexport ze_result_t __zecall zeImageDestroy( ze_image_handle_t hImage) { return L0::Image::fromHandle(hImage)->destroy(); } } // extern "C" compute-runtime-20.13.16352/level_zero/api/core/ze_memory.cpp000066400000000000000000000053071363734646600237430ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/driver/driver_handle.h" #include extern "C" { __zedllexport ze_result_t __zecall zeDriverAllocSharedMem( ze_driver_handle_t hDriver, const ze_device_mem_alloc_desc_t *deviceDesc, const ze_host_mem_alloc_desc_t *hostDesc, size_t size, size_t alignment, ze_device_handle_t hDevice, void **pptr) { return L0::DriverHandle::fromHandle(hDriver)->allocSharedMem(hDevice, deviceDesc->flags, hostDesc->flags, size, alignment, pptr); } __zedllexport ze_result_t __zecall zeDriverAllocDeviceMem( ze_driver_handle_t hDriver, const ze_device_mem_alloc_desc_t *deviceDesc, size_t size, size_t alignment, ze_device_handle_t hDevice, void **pptr) { return L0::DriverHandle::fromHandle(hDriver)->allocDeviceMem(hDevice, deviceDesc->flags, size, alignment, pptr); } __zedllexport ze_result_t __zecall zeDriverAllocHostMem( ze_driver_handle_t hDriver, const ze_host_mem_alloc_desc_t *hostDesc, size_t size, size_t alignment, void **pptr) { return L0::DriverHandle::fromHandle(hDriver)->allocHostMem(hostDesc->flags, size, alignment, pptr); } __zedllexport ze_result_t __zecall zeDriverFreeMem( ze_driver_handle_t hDriver, void *ptr) { return L0::DriverHandle::fromHandle(hDriver)->freeMem(ptr); } __zedllexport ze_result_t __zecall zeDriverGetMemAllocProperties( ze_driver_handle_t hDriver, const void *ptr, ze_memory_allocation_properties_t *pMemAllocProperties, ze_device_handle_t *phDevice) { return L0::DriverHandle::fromHandle(hDriver)->getMemAllocProperties(ptr, pMemAllocProperties, phDevice); } __zedllexport ze_result_t __zecall zeDriverGetMemAddressRange( ze_driver_handle_t hDriver, const void *ptr, void **pBase, size_t *pSize) { return L0::DriverHandle::fromHandle(hDriver)->getMemAddressRange(ptr, pBase, pSize); } __zedllexport ze_result_t __zecall zeDriverGetMemIpcHandle( ze_driver_handle_t hDriver, const void *ptr, ze_ipc_mem_handle_t *pIpcHandle) { return L0::DriverHandle::fromHandle(hDriver)->getIpcMemHandle(ptr, pIpcHandle); } __zedllexport ze_result_t __zecall zeDriverOpenMemIpcHandle( ze_driver_handle_t hDriver, ze_device_handle_t hDevice, ze_ipc_mem_handle_t handle, ze_ipc_memory_flag_t flags, void **pptr) { return L0::DriverHandle::fromHandle(hDriver)->openIpcMemHandle(hDevice, handle, flags, pptr); } __zedllexport ze_result_t __zecall zeDriverCloseMemIpcHandle( ze_driver_handle_t hDriver, const void *ptr) { return L0::DriverHandle::fromHandle(hDriver)->closeIpcMemHandle(ptr); } } // extern "C" compute-runtime-20.13.16352/level_zero/api/core/ze_module.cpp000066400000000000000000000144421363734646600237200ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/module/module.h" #include extern "C" { __zedllexport ze_result_t __zecall zeModuleCreate( ze_device_handle_t hDevice, const ze_module_desc_t *desc, ze_module_handle_t *phModule, ze_module_build_log_handle_t *phBuildLog) { return L0::Device::fromHandle(hDevice)->createModule(desc, phModule, phBuildLog); } __zedllexport ze_result_t __zecall zeModuleDestroy( ze_module_handle_t hModule) { return L0::Module::fromHandle(hModule)->destroy(); } __zedllexport ze_result_t __zecall zeModuleBuildLogDestroy( ze_module_build_log_handle_t hModuleBuildLog) { return L0::ModuleBuildLog::fromHandle(hModuleBuildLog)->destroy(); } __zedllexport ze_result_t __zecall zeModuleBuildLogGetString( ze_module_build_log_handle_t hModuleBuildLog, size_t *pSize, char *pBuildLog) { return L0::ModuleBuildLog::fromHandle(hModuleBuildLog)->getString(pSize, pBuildLog); } __zedllexport ze_result_t __zecall zeModuleGetNativeBinary( ze_module_handle_t hModule, size_t *pSize, uint8_t *pModuleNativeBinary) { return L0::Module::fromHandle(hModule)->getNativeBinary(pSize, pModuleNativeBinary); } __zedllexport ze_result_t __zecall zeModuleGetGlobalPointer( ze_module_handle_t hModule, const char *pGlobalName, void **pptr) { return L0::Module::fromHandle(hModule)->getGlobalPointer(pGlobalName, pptr); } __zedllexport ze_result_t __zecall zeModuleGetKernelNames( ze_module_handle_t hModule, uint32_t *pCount, const char **pNames) { return L0::Module::fromHandle(hModule)->getKernelNames(pCount, pNames); } __zedllexport ze_result_t __zecall zeKernelCreate( ze_module_handle_t hModule, const ze_kernel_desc_t *desc, ze_kernel_handle_t *phFunction) { return L0::Module::fromHandle(hModule)->createKernel(desc, phFunction); } __zedllexport ze_result_t __zecall zeKernelDestroy( ze_kernel_handle_t hKernel) { return L0::Kernel::fromHandle(hKernel)->destroy(); } __zedllexport ze_result_t __zecall zeModuleGetFunctionPointer( ze_module_handle_t hModule, const char *pKernelName, void **pfnFunction) { return L0::Module::fromHandle(hModule)->getFunctionPointer(pKernelName, pfnFunction); } __zedllexport ze_result_t __zecall zeKernelSetGroupSize( ze_kernel_handle_t hFunction, uint32_t groupSizeX, uint32_t groupSizeY, uint32_t groupSizeZ) { return L0::Kernel::fromHandle(hFunction)->setGroupSize(groupSizeX, groupSizeY, groupSizeZ); } __zedllexport ze_result_t __zecall zeKernelSuggestGroupSize( ze_kernel_handle_t hFunction, uint32_t globalSizeX, uint32_t globalSizeY, uint32_t globalSizeZ, uint32_t *groupSizeX, uint32_t *groupSizeY, uint32_t *groupSizeZ) { return L0::Kernel::fromHandle(hFunction)->suggestGroupSize(globalSizeX, globalSizeY, globalSizeZ, groupSizeX, groupSizeY, groupSizeZ); } __zedllexport ze_result_t __zecall zeKernelSuggestMaxCooperativeGroupCount( ze_kernel_handle_t hKernel, uint32_t *totalGroupCount) { return L0::Kernel::fromHandle(hKernel)->suggestMaxCooperativeGroupCount(totalGroupCount); } __zedllexport ze_result_t __zecall zeKernelSetArgumentValue( ze_kernel_handle_t hFunction, uint32_t argIndex, size_t argSize, const void *pArgValue) { return L0::Kernel::fromHandle(hFunction)->setArgumentValue(argIndex, argSize, pArgValue); } __zedllexport ze_result_t __zecall zeKernelSetAttribute( ze_kernel_handle_t hKernel, ze_kernel_attribute_t attr, uint32_t size, const void *pValue) { return L0::Kernel::fromHandle(hKernel)->setAttribute(attr, size, pValue); } __zedllexport ze_result_t __zecall zeKernelGetAttribute( ze_kernel_handle_t hKernel, ze_kernel_attribute_t attr, uint32_t *pSize, void *pValue) { return L0::Kernel::fromHandle(hKernel)->getAttribute(attr, pSize, pValue); } __zedllexport ze_result_t __zecall zeKernelSetIntermediateCacheConfig( ze_kernel_handle_t hKernel, ze_cache_config_t cacheConfig) { return L0::Kernel::fromHandle(hKernel)->setIntermediateCacheConfig(cacheConfig); } __zedllexport ze_result_t __zecall zeKernelGetProperties( ze_kernel_handle_t hKernel, ze_kernel_properties_t *pKernelProperties) { return L0::Kernel::fromHandle(hKernel)->getProperties(pKernelProperties); } __zedllexport ze_result_t __zecall zeCommandListAppendLaunchKernel( ze_command_list_handle_t hCommandList, ze_kernel_handle_t hFunction, const ze_group_count_t *pLaunchFuncArgs, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return L0::CommandList::fromHandle(hCommandList)->appendLaunchFunction(hFunction, pLaunchFuncArgs, hSignalEvent, numWaitEvents, phWaitEvents); } __zedllexport ze_result_t __zecall zeCommandListAppendLaunchCooperativeKernel( ze_command_list_handle_t hCommandList, ze_kernel_handle_t hKernel, const ze_group_count_t *pLaunchFuncArgs, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return L0::CommandList::fromHandle(hCommandList)->appendLaunchCooperativeKernel(hKernel, pLaunchFuncArgs, hSignalEvent, numWaitEvents, phWaitEvents); } __zedllexport ze_result_t __zecall zeCommandListAppendLaunchKernelIndirect( ze_command_list_handle_t hCommandList, ze_kernel_handle_t hFunction, const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return L0::CommandList::fromHandle(hCommandList)->appendLaunchFunctionIndirect(hFunction, pLaunchArgumentsBuffer, hSignalEvent, numWaitEvents, phWaitEvents); } __zedllexport ze_result_t __zecall zeCommandListAppendLaunchMultipleKernelsIndirect( ze_command_list_handle_t hCommandList, uint32_t numFunctions, ze_kernel_handle_t *phFunctions, const uint32_t *pCountBuffer, const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return L0::CommandList::fromHandle(hCommandList)->appendLaunchMultipleFunctionsIndirect(numFunctions, phFunctions, pCountBuffer, pLaunchArgumentsBuffer, hSignalEvent, numWaitEvents, phWaitEvents); } } // extern "C" compute-runtime-20.13.16352/level_zero/api/core/ze_residency.cpp000066400000000000000000000017351363734646600244210ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/device/device.h" #include extern "C" { __zedllexport ze_result_t __zecall zeDeviceMakeMemoryResident( ze_device_handle_t hDevice, void *ptr, size_t size) { return L0::Device::fromHandle(hDevice)->makeMemoryResident(ptr, size); } __zedllexport ze_result_t __zecall zeDeviceEvictMemory( ze_device_handle_t hDevice, void *ptr, size_t size) { return L0::Device::fromHandle(hDevice)->evictMemory(ptr, size); } __zedllexport ze_result_t __zecall zeDeviceMakeImageResident( ze_device_handle_t hDevice, ze_image_handle_t hImage) { return L0::Device::fromHandle(hDevice)->makeImageResident(hImage); } __zedllexport ze_result_t __zecall zeDeviceEvictImage( ze_device_handle_t hDevice, ze_image_handle_t hImage) { return L0::Device::fromHandle(hDevice)->evictImage(hImage); } } // extern "C" compute-runtime-20.13.16352/level_zero/api/core/ze_sampler.cpp000066400000000000000000000011161363734646600240700ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/sampler/sampler.h" #include extern "C" { __zedllexport ze_result_t __zecall zeSamplerCreate( ze_device_handle_t hDevice, const ze_sampler_desc_t *desc, ze_sampler_handle_t *phSampler) { return L0::Device::fromHandle(hDevice)->createSampler(desc, phSampler); } __zedllexport ze_result_t __zecall zeSamplerDestroy( ze_sampler_handle_t hSampler) { return L0::Sampler::fromHandle(hSampler)->destroy(); } } // extern "C" compute-runtime-20.13.16352/level_zero/api/experimental/000077500000000000000000000000001363734646600227715ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/api/experimental/CMakeLists.txt000066400000000000000000000002601363734646600255270ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_EXPERIMENTAL_API ) set_property(GLOBAL PROPERTY L0_EXPERIMENTAL_API ${L0_EXPERIMENTAL_API})compute-runtime-20.13.16352/level_zero/api/tools/000077500000000000000000000000001363734646600214345ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/api/tools/CMakeLists.txt000066400000000000000000000006731363734646600242020ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_TOOLS_API ${CMAKE_CURRENT_SOURCE_DIR}/ze_tools_loader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/zet_metric.cpp ${CMAKE_CURRENT_SOURCE_DIR}/zet_sysman.cpp ${CMAKE_CURRENT_SOURCE_DIR}/zet_tracing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/zet_driver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/zet_module.cpp ) set_property(GLOBAL PROPERTY L0_TOOLS_API ${L0_TOOLS_API}) compute-runtime-20.13.16352/level_zero/api/tools/ze_tools_loader.cpp000066400000000000000000000763151363734646600253400ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/source/inc/ze_intel_gpu.h" #include #include #include #include #include "ze_ddi_tables.h" extern "C" { extern ze_gpu_driver_dditable_t driver_ddiTable; __zedllexport ze_result_t __zecall zetGetGlobalProcAddrTable( ze_api_version_t version, zet_global_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnInit = (zet_pfnInit_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetInit"); return result; } __zedllexport ze_result_t __zecall zetGetDeviceProcAddrTable( ze_api_version_t version, zet_device_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnActivateMetricGroups = (zet_pfnDeviceActivateMetricGroups_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetDeviceActivateMetricGroups"); return result; } __zedllexport ze_result_t __zecall zetGetCommandListProcAddrTable( ze_api_version_t version, zet_command_list_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnAppendMetricTracerMarker = (zet_pfnCommandListAppendMetricTracerMarker_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetCommandListAppendMetricTracerMarker"); pDdiTable->pfnAppendMetricQueryBegin = (zet_pfnCommandListAppendMetricQueryBegin_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetCommandListAppendMetricQueryBegin"); pDdiTable->pfnAppendMetricQueryEnd = (zet_pfnCommandListAppendMetricQueryEnd_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetCommandListAppendMetricQueryEnd"); pDdiTable->pfnAppendMetricMemoryBarrier = (zet_pfnCommandListAppendMetricMemoryBarrier_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetCommandListAppendMetricMemoryBarrier"); return result; } __zedllexport ze_result_t __zecall zetGetModuleProcAddrTable( ze_api_version_t version, zet_module_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGetDebugInfo = (zet_pfnModuleGetDebugInfo_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetModuleGetDebugInfo"); return result; } __zedllexport ze_result_t __zecall zetGetKernelProcAddrTable( ze_api_version_t version, zet_kernel_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGetProfileInfo = (zet_pfnKernelGetProfileInfo_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetKernelGetProfileInfo"); return result; } __zedllexport ze_result_t __zecall zetGetMetricGroupProcAddrTable( ze_api_version_t version, zet_metric_group_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGet = (zet_pfnMetricGroupGet_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetMetricGroupGet"); pDdiTable->pfnGetProperties = (zet_pfnMetricGroupGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetMetricGroupGetProperties"); pDdiTable->pfnCalculateMetricValues = (zet_pfnMetricGroupCalculateMetricValues_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetMetricGroupCalculateMetricValues"); return result; } __zedllexport ze_result_t __zecall zetGetMetricProcAddrTable( ze_api_version_t version, zet_metric_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGet = (zet_pfnMetricGet_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetMetricGet"); pDdiTable->pfnGetProperties = (zet_pfnMetricGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetMetricGetProperties"); return result; } __zedllexport ze_result_t __zecall zetGetMetricTracerProcAddrTable( ze_api_version_t version, zet_metric_tracer_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnOpen = (zet_pfnMetricTracerOpen_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetMetricTracerOpen"); pDdiTable->pfnClose = (zet_pfnMetricTracerClose_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetMetricTracerClose"); pDdiTable->pfnReadData = (zet_pfnMetricTracerReadData_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetMetricTracerReadData"); return result; } __zedllexport ze_result_t __zecall zetGetMetricQueryPoolProcAddrTable( ze_api_version_t version, zet_metric_query_pool_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnCreate = (zet_pfnMetricQueryPoolCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetMetricQueryPoolCreate"); pDdiTable->pfnDestroy = (zet_pfnMetricQueryPoolDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetMetricQueryPoolDestroy"); return result; } __zedllexport ze_result_t __zecall zetGetMetricQueryProcAddrTable( ze_api_version_t version, zet_metric_query_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnCreate = (zet_pfnMetricQueryCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetMetricQueryCreate"); pDdiTable->pfnDestroy = (zet_pfnMetricQueryDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetMetricQueryDestroy"); pDdiTable->pfnReset = (zet_pfnMetricQueryReset_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetMetricQueryReset"); pDdiTable->pfnGetData = (zet_pfnMetricQueryGetData_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetMetricQueryGetData"); return result; } __zedllexport ze_result_t __zecall zetGetTracerProcAddrTable( ze_api_version_t version, zet_tracer_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnCreate = (zet_pfnTracerCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetTracerCreate"); pDdiTable->pfnDestroy = (zet_pfnTracerDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetTracerDestroy"); pDdiTable->pfnSetPrologues = (zet_pfnTracerSetPrologues_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetTracerSetPrologues"); pDdiTable->pfnSetEpilogues = (zet_pfnTracerSetEpilogues_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetTracerSetEpilogues"); pDdiTable->pfnSetEnabled = (zet_pfnTracerSetEnabled_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetTracerSetEnabled"); return result; } __zedllexport ze_result_t __zecall zetGetSysmanProcAddrTable( ze_api_version_t version, zet_sysman_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGet = (zet_pfnSysmanGet_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanGet"); pDdiTable->pfnDeviceGetProperties = (zet_pfnSysmanDeviceGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanDeviceGetProperties"); pDdiTable->pfnSchedulerGetSupportedModes = (zet_pfnSysmanSchedulerGetSupportedModes_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanSchedulerGetSupportedModes"); pDdiTable->pfnSchedulerGetCurrentMode = (zet_pfnSysmanSchedulerGetCurrentMode_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanSchedulerGetCurrentMode"); pDdiTable->pfnSchedulerGetTimeoutModeProperties = (zet_pfnSysmanSchedulerGetTimeoutModeProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanSchedulerGetTimeoutModeProperties"); pDdiTable->pfnSchedulerGetTimesliceModeProperties = (zet_pfnSysmanSchedulerGetTimesliceModeProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanSchedulerGetTimesliceModeProperties"); pDdiTable->pfnSchedulerSetTimeoutMode = (zet_pfnSysmanSchedulerSetTimeoutMode_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanSchedulerSetTimeoutMode"); pDdiTable->pfnSchedulerSetTimesliceMode = (zet_pfnSysmanSchedulerSetTimesliceMode_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanSchedulerSetTimesliceMode"); pDdiTable->pfnSchedulerSetExclusiveMode = (zet_pfnSysmanSchedulerSetExclusiveMode_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanSchedulerSetExclusiveMode"); pDdiTable->pfnSchedulerSetComputeUnitDebugMode = (zet_pfnSysmanSchedulerSetComputeUnitDebugMode_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanSchedulerSetComputeUnitDebugMode"); pDdiTable->pfnPerformanceProfileGetSupported = (zet_pfnSysmanPerformanceProfileGetSupported_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanPerformanceProfileGetSupported"); pDdiTable->pfnPerformanceProfileGet = (zet_pfnSysmanPerformanceProfileGet_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanPerformanceProfileGet"); pDdiTable->pfnPerformanceProfileSet = (zet_pfnSysmanPerformanceProfileSet_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanPerformanceProfileSet"); pDdiTable->pfnProcessesGetState = (zet_pfnSysmanProcessesGetState_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanProcessesGetState"); pDdiTable->pfnDeviceReset = (zet_pfnSysmanDeviceReset_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanDeviceReset"); pDdiTable->pfnDeviceGetRepairStatus = (zet_pfnSysmanDeviceGetRepairStatus_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanDeviceGetRepairStatus"); pDdiTable->pfnPciGetProperties = (zet_pfnSysmanPciGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanPciGetProperties"); pDdiTable->pfnPciGetState = (zet_pfnSysmanPciGetState_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanPciGetState"); pDdiTable->pfnPciGetBars = (zet_pfnSysmanPciGetBars_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanPciGetBars"); pDdiTable->pfnPciGetStats = (zet_pfnSysmanPciGetStats_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanPciGetStats"); pDdiTable->pfnPowerGet = (zet_pfnSysmanPowerGet_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanPowerGet"); pDdiTable->pfnFrequencyGet = (zet_pfnSysmanFrequencyGet_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFrequencyGet"); pDdiTable->pfnEngineGet = (zet_pfnSysmanEngineGet_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanEngineGet"); pDdiTable->pfnStandbyGet = (zet_pfnSysmanStandbyGet_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanStandbyGet"); pDdiTable->pfnFirmwareGet = (zet_pfnSysmanFirmwareGet_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFirmwareGet"); pDdiTable->pfnMemoryGet = (zet_pfnSysmanMemoryGet_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanMemoryGet"); pDdiTable->pfnFabricPortGet = (zet_pfnSysmanFabricPortGet_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFabricPortGet"); pDdiTable->pfnTemperatureGet = (zet_pfnSysmanTemperatureGet_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanTemperatureGet"); pDdiTable->pfnPsuGet = (zet_pfnSysmanPsuGet_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanPsuGet"); pDdiTable->pfnFanGet = (zet_pfnSysmanFanGet_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFanGet"); pDdiTable->pfnLedGet = (zet_pfnSysmanLedGet_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanLedGet"); pDdiTable->pfnRasGet = (zet_pfnSysmanRasGet_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanRasGet"); pDdiTable->pfnEventGet = (zet_pfnSysmanEventGet_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanEventGet"); pDdiTable->pfnDiagnosticsGet = (zet_pfnSysmanDiagnosticsGet_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanDiagnosticsGet"); return result; } __zedllexport ze_result_t __zecall zetGetSysmanPowerProcAddrTable( ze_api_version_t version, zet_sysman_power_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGetProperties = (zet_pfnSysmanPowerGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanPowerGetProperties"); pDdiTable->pfnGetEnergyCounter = (zet_pfnSysmanPowerGetEnergyCounter_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanPowerGetEnergyCounter"); pDdiTable->pfnGetLimits = (zet_pfnSysmanPowerGetLimits_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanPowerGetLimits"); pDdiTable->pfnSetLimits = (zet_pfnSysmanPowerSetLimits_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanPowerSetLimits"); pDdiTable->pfnGetEnergyThreshold = (zet_pfnSysmanPowerGetEnergyThreshold_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanPowerGetEnergyThreshold"); pDdiTable->pfnSetEnergyThreshold = (zet_pfnSysmanPowerSetEnergyThreshold_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanPowerSetEnergyThreshold"); return result; } __zedllexport ze_result_t __zecall zetGetSysmanFrequencyProcAddrTable( ze_api_version_t version, zet_sysman_frequency_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGetProperties = (zet_pfnSysmanFrequencyGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFrequencyGetProperties"); pDdiTable->pfnGetAvailableClocks = (zet_pfnSysmanFrequencyGetAvailableClocks_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFrequencyGetAvailableClocks"); pDdiTable->pfnGetRange = (zet_pfnSysmanFrequencyGetRange_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFrequencyGetRange"); pDdiTable->pfnSetRange = (zet_pfnSysmanFrequencySetRange_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFrequencySetRange"); pDdiTable->pfnGetState = (zet_pfnSysmanFrequencyGetState_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFrequencyGetState"); pDdiTable->pfnGetThrottleTime = (zet_pfnSysmanFrequencyGetThrottleTime_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFrequencyGetThrottleTime"); pDdiTable->pfnOcGetCapabilities = (zet_pfnSysmanFrequencyOcGetCapabilities_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFrequencyOcGetCapabilities"); pDdiTable->pfnOcGetConfig = (zet_pfnSysmanFrequencyOcGetConfig_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFrequencyOcGetConfig"); pDdiTable->pfnOcSetConfig = (zet_pfnSysmanFrequencyOcSetConfig_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFrequencyOcSetConfig"); pDdiTable->pfnOcGetIccMax = (zet_pfnSysmanFrequencyOcGetIccMax_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFrequencyOcGetIccMax"); pDdiTable->pfnOcSetIccMax = (zet_pfnSysmanFrequencyOcSetIccMax_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFrequencyOcSetIccMax"); pDdiTable->pfnOcGetTjMax = (zet_pfnSysmanFrequencyOcGetTjMax_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFrequencyOcGetTjMax"); pDdiTable->pfnOcSetTjMax = (zet_pfnSysmanFrequencyOcSetTjMax_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFrequencyOcSetTjMax"); return result; } __zedllexport ze_result_t __zecall zetGetSysmanEngineProcAddrTable( ze_api_version_t version, zet_sysman_engine_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGetProperties = (zet_pfnSysmanEngineGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanEngineGetProperties"); pDdiTable->pfnGetActivity = (zet_pfnSysmanEngineGetActivity_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanEngineGetActivity"); return result; } __zedllexport ze_result_t __zecall zetGetSysmanStandbyProcAddrTable( ze_api_version_t version, zet_sysman_standby_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGetProperties = (zet_pfnSysmanStandbyGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanStandbyGetProperties"); pDdiTable->pfnGetMode = (zet_pfnSysmanStandbyGetMode_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanStandbyGetMode"); pDdiTable->pfnSetMode = (zet_pfnSysmanStandbySetMode_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanStandbySetMode"); return result; } __zedllexport ze_result_t __zecall zetGetSysmanFirmwareProcAddrTable( ze_api_version_t version, zet_sysman_firmware_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGetProperties = (zet_pfnSysmanFirmwareGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFirmwareGetProperties"); pDdiTable->pfnGetChecksum = (zet_pfnSysmanFirmwareGetChecksum_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFirmwareGetChecksum"); pDdiTable->pfnFlash = (zet_pfnSysmanFirmwareFlash_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFirmwareFlash"); return result; } __zedllexport ze_result_t __zecall zetGetSysmanMemoryProcAddrTable( ze_api_version_t version, zet_sysman_memory_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGetProperties = (zet_pfnSysmanMemoryGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanMemoryGetProperties"); pDdiTable->pfnGetState = (zet_pfnSysmanMemoryGetState_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanMemoryGetState"); pDdiTable->pfnGetBandwidth = (zet_pfnSysmanMemoryGetBandwidth_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanMemoryGetBandwidth"); return result; } __zedllexport ze_result_t __zecall zetGetSysmanFabricPortProcAddrTable( ze_api_version_t version, zet_sysman_fabric_port_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGetProperties = (zet_pfnSysmanFabricPortGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFabricPortGetProperties"); pDdiTable->pfnGetLinkType = (zet_pfnSysmanFabricPortGetLinkType_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFabricPortGetLinkType"); pDdiTable->pfnGetConfig = (zet_pfnSysmanFabricPortGetConfig_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFabricPortGetConfig"); pDdiTable->pfnSetConfig = (zet_pfnSysmanFabricPortSetConfig_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFabricPortSetConfig"); pDdiTable->pfnGetState = (zet_pfnSysmanFabricPortGetState_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFabricPortGetState"); pDdiTable->pfnGetThroughput = (zet_pfnSysmanFabricPortGetThroughput_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFabricPortGetThroughput"); return result; } __zedllexport ze_result_t __zecall zetGetSysmanTemperatureProcAddrTable( ze_api_version_t version, zet_sysman_temperature_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGetProperties = (zet_pfnSysmanTemperatureGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanTemperatureGetProperties"); pDdiTable->pfnGetConfig = (zet_pfnSysmanTemperatureGetConfig_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanTemperatureGetConfig"); pDdiTable->pfnSetConfig = (zet_pfnSysmanTemperatureSetConfig_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanTemperatureSetConfig"); pDdiTable->pfnGetState = (zet_pfnSysmanTemperatureGetState_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanTemperatureGetState"); return result; } __zedllexport ze_result_t __zecall zetGetSysmanPsuProcAddrTable( ze_api_version_t version, zet_sysman_psu_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGetProperties = (zet_pfnSysmanPsuGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanPsuGetProperties"); pDdiTable->pfnGetState = (zet_pfnSysmanPsuGetState_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanPsuGetState"); return result; } __zedllexport ze_result_t __zecall zetGetSysmanFanProcAddrTable( ze_api_version_t version, zet_sysman_fan_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGetProperties = (zet_pfnSysmanFanGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFanGetProperties"); pDdiTable->pfnGetConfig = (zet_pfnSysmanFanGetConfig_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFanGetConfig"); pDdiTable->pfnSetConfig = (zet_pfnSysmanFanSetConfig_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFanSetConfig"); pDdiTable->pfnGetState = (zet_pfnSysmanFanGetState_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanFanGetState"); return result; } __zedllexport ze_result_t __zecall zetGetSysmanLedProcAddrTable( ze_api_version_t version, zet_sysman_led_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGetProperties = (zet_pfnSysmanLedGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanLedGetProperties"); pDdiTable->pfnGetState = (zet_pfnSysmanLedGetState_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanLedGetState"); pDdiTable->pfnSetState = (zet_pfnSysmanLedSetState_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanLedSetState"); return result; } __zedllexport ze_result_t __zecall zetGetSysmanRasProcAddrTable( ze_api_version_t version, zet_sysman_ras_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGetProperties = (zet_pfnSysmanRasGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanRasGetProperties"); pDdiTable->pfnGetConfig = (zet_pfnSysmanRasGetConfig_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanRasGetConfig"); pDdiTable->pfnSetConfig = (zet_pfnSysmanRasSetConfig_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanRasSetConfig"); pDdiTable->pfnGetState = (zet_pfnSysmanRasGetState_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanRasGetState"); return result; } __zedllexport ze_result_t __zecall zetGetSysmanDiagnosticsProcAddrTable( ze_api_version_t version, zet_sysman_diagnostics_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGetProperties = (zet_pfnSysmanDiagnosticsGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanDiagnosticsGetProperties"); pDdiTable->pfnGetTests = (zet_pfnSysmanDiagnosticsGetTests_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanDiagnosticsGetTests"); pDdiTable->pfnRunTests = (zet_pfnSysmanDiagnosticsRunTests_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanDiagnosticsRunTests"); return result; } __zedllexport ze_result_t __zecall zetGetSysmanEventProcAddrTable( ze_api_version_t version, zet_sysman_event_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnGetConfig = (zet_pfnSysmanEventGetConfig_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanEventGetConfig"); pDdiTable->pfnSetConfig = (zet_pfnSysmanEventSetConfig_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanEventSetConfig"); pDdiTable->pfnGetState = (zet_pfnSysmanEventGetState_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanEventGetState"); pDdiTable->pfnListen = (zet_pfnSysmanEventListen_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetSysmanEventListen"); return result; } __zedllexport ze_result_t __zecall zetGetDebugProcAddrTable( ze_api_version_t version, zet_debug_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; if (driver_ddiTable.version < version) return ZE_RESULT_ERROR_UNKNOWN; if (nullptr == driver_ddiTable.driverLibrary) { return ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t result = ZE_RESULT_SUCCESS; pDdiTable->pfnAttach = (zet_pfnDebugAttach_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetDebugAttach"); pDdiTable->pfnDetach = (zet_pfnDebugDetach_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetDebugDetach"); pDdiTable->pfnGetNumThreads = (zet_pfnDebugGetNumThreads_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetDebugGetNumThreads"); pDdiTable->pfnReadEvent = (zet_pfnDebugReadEvent_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetDebugReadEvent"); pDdiTable->pfnInterrupt = (zet_pfnDebugInterrupt_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetDebugInterrupt"); pDdiTable->pfnResume = (zet_pfnDebugResume_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetDebugResume"); pDdiTable->pfnReadMemory = (zet_pfnDebugReadMemory_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetDebugReadMemory"); pDdiTable->pfnWriteMemory = (zet_pfnDebugWriteMemory_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetDebugWriteMemory"); pDdiTable->pfnReadState = (zet_pfnDebugReadState_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetDebugReadState"); pDdiTable->pfnWriteState = (zet_pfnDebugWriteState_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zetDebugWriteState"); return result; } } // extern C compute-runtime-20.13.16352/level_zero/api/tools/zet_driver.cpp000066400000000000000000000005331363734646600243160ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/tools_init.h" #include #include extern "C" { __zedllexport ze_result_t __zecall zetInit( ze_init_flag_t flags) { return L0::ToolsInit::get()->initTools(flags); } } // extern C compute-runtime-20.13.16352/level_zero/api/tools/zet_metric.cpp000066400000000000000000000117721363734646600243150ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/device/device.h" #include "level_zero/tools/source/metrics/metric.h" #include extern "C" { __zedllexport ze_result_t __zecall zetMetricGroupGet( zet_device_handle_t hDevice, uint32_t *pCount, zet_metric_group_handle_t *phMetricGroups) { return L0::metricGroupGet(hDevice, pCount, phMetricGroups); } __zedllexport ze_result_t __zecall zetMetricGroupGetProperties( zet_metric_group_handle_t hMetricGroup, zet_metric_group_properties_t *pProperties) { return L0::MetricGroup::fromHandle(hMetricGroup)->getProperties(pProperties); } __zedllexport ze_result_t __zecall zetMetricGet( zet_metric_group_handle_t hMetricGroup, uint32_t *pCount, zet_metric_handle_t *phMetrics) { return L0::metricGet(hMetricGroup, pCount, phMetrics); } __zedllexport ze_result_t __zecall zetMetricGetProperties( zet_metric_handle_t hMetric, zet_metric_properties_t *pProperties) { return L0::Metric::fromHandle(hMetric)->getProperties(pProperties); } __zedllexport ze_result_t __zecall zetMetricGroupCalculateMetricValues( zet_metric_group_handle_t hMetricGroup, size_t rawDataSize, const uint8_t *pRawData, uint32_t *pMetricValueCount, zet_typed_value_t *pMetricValues) { return L0::MetricGroup::fromHandle(hMetricGroup)->calculateMetricValues(rawDataSize, pRawData, pMetricValueCount, pMetricValues); } __zedllexport ze_result_t __zecall zetDeviceActivateMetricGroups( zet_device_handle_t hDevice, uint32_t count, zet_metric_group_handle_t *phMetricGroups) { return L0::Device::fromHandle(hDevice)->activateMetricGroups(count, phMetricGroups); } __zedllexport ze_result_t __zecall zetMetricTracerOpen( zet_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup, zet_metric_tracer_desc_t *pDesc, ze_event_handle_t hNotificationEvent, zet_metric_tracer_handle_t *phMetricTracer) { return L0::metricTracerOpen(hDevice, hMetricGroup, pDesc, hNotificationEvent, phMetricTracer); } __zedllexport ze_result_t __zecall zetCommandListAppendMetricTracerMarker( ze_command_list_handle_t hCommandList, zet_metric_tracer_handle_t hMetricTracer, uint32_t value) { return L0::CommandList::fromHandle(hCommandList)->appendMetricTracerMarker(hMetricTracer, value); } __zedllexport ze_result_t __zecall zetMetricTracerClose( zet_metric_tracer_handle_t hMetricTracer) { return L0::MetricTracer::fromHandle(hMetricTracer)->close(); } __zedllexport ze_result_t __zecall zetMetricTracerReadData( zet_metric_tracer_handle_t hMetricTracer, uint32_t maxReportCount, size_t *pRawDataSize, uint8_t *pRawData) { return L0::MetricTracer::fromHandle(hMetricTracer)->readData(maxReportCount, pRawDataSize, pRawData); } __zedllexport ze_result_t __zecall zetMetricQueryPoolCreate( zet_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup, const zet_metric_query_pool_desc_t *desc, zet_metric_query_pool_handle_t *phMetricQueryPool) { return L0::metricQueryPoolCreate(hDevice, hMetricGroup, desc, phMetricQueryPool); } __zedllexport ze_result_t __zecall zetMetricQueryPoolDestroy( zet_metric_query_pool_handle_t hMetricQueryPool) { return L0::metricQueryPoolDestroy(hMetricQueryPool); } __zedllexport ze_result_t __zecall zetMetricQueryCreate( zet_metric_query_pool_handle_t hMetricQueryPool, uint32_t index, zet_metric_query_handle_t *phMetricQuery) { return L0::MetricQueryPool::fromHandle(hMetricQueryPool)->createMetricQuery(index, phMetricQuery); } __zedllexport ze_result_t __zecall zetMetricQueryDestroy( zet_metric_query_handle_t hMetricQuery) { return L0::MetricQuery::fromHandle(hMetricQuery)->destroy(); } __zedllexport ze_result_t __zecall zetMetricQueryReset( zet_metric_query_handle_t hMetricQuery) { return L0::MetricQuery::fromHandle(hMetricQuery)->reset(); } __zedllexport ze_result_t __zecall zetCommandListAppendMetricQueryBegin( zet_command_list_handle_t hCommandList, zet_metric_query_handle_t hMetricQuery) { return L0::CommandList::fromHandle(hCommandList)->appendMetricQueryBegin(hMetricQuery); } __zedllexport ze_result_t __zecall zetCommandListAppendMetricQueryEnd( zet_command_list_handle_t hCommandList, zet_metric_query_handle_t hMetricQuery, ze_event_handle_t hCompletionEvent) { return L0::CommandList::fromHandle(hCommandList)->appendMetricQueryEnd(hMetricQuery, hCompletionEvent); } __zedllexport ze_result_t __zecall zetCommandListAppendMetricMemoryBarrier( zet_command_list_handle_t hCommandList) { return L0::CommandList::fromHandle(hCommandList)->appendMetricMemoryBarrier(); } __zedllexport ze_result_t __zecall zetMetricQueryGetData( zet_metric_query_handle_t hMetricQuery, size_t *pRawDataSize, uint8_t *pRawData) { return L0::MetricQuery::fromHandle(hMetricQuery)->getData(pRawDataSize, pRawData); } } // extern C compute-runtime-20.13.16352/level_zero/api/tools/zet_module.cpp000066400000000000000000000007101363734646600243050ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/module/module.h" #include extern "C" { __zedllexport ze_result_t __zecall zetModuleGetDebugInfo( zet_module_handle_t hModule, zet_module_debug_info_format_t format, size_t *pSize, uint8_t *pDebugInfo) { return L0::Module::fromHandle(hModule)->getDebugInfo(pSize, pDebugInfo); } } // extern Ccompute-runtime-20.13.16352/level_zero/api/tools/zet_sysman.cpp000066400000000000000000000454061363734646600243450ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include "sysman/sysman.h" extern "C" { __zedllexport ze_result_t __zecall zetSysmanGet( zet_device_handle_t hDevice, zet_sysman_version_t version, zet_sysman_handle_t *phSysman) { return L0::SysmanHandleContext::sysmanGet(hDevice, phSysman); } __zedllexport ze_result_t __zecall zetSysmanDeviceGetProperties( zet_sysman_handle_t hSysman, zet_sysman_properties_t *pProperties) { return L0::Sysman::fromHandle(hSysman)->deviceGetProperties(pProperties); } __zedllexport ze_result_t __zecall zetSysmanSchedulerGetCurrentMode( zet_sysman_handle_t hSysman, zet_sched_mode_t *pMode) { return L0::Sysman::fromHandle(hSysman)->schedulerGetCurrentMode(pMode); } __zedllexport ze_result_t __zecall zetSysmanSchedulerGetTimeoutModeProperties( zet_sysman_handle_t hSysman, ze_bool_t getDefaults, zet_sched_timeout_properties_t *pConfig) { return L0::Sysman::fromHandle(hSysman)->schedulerGetTimeoutModeProperties(getDefaults, pConfig); } __zedllexport ze_result_t __zecall zetSysmanSchedulerGetTimesliceModeProperties( zet_sysman_handle_t hSysman, ze_bool_t getDefaults, zet_sched_timeslice_properties_t *pConfig) { return L0::Sysman::fromHandle(hSysman)->schedulerGetTimesliceModeProperties(getDefaults, pConfig); } __zedllexport ze_result_t __zecall zetSysmanSchedulerSetTimeoutMode( zet_sysman_handle_t hSysman, zet_sched_timeout_properties_t *pProperties, ze_bool_t *pNeedReboot) { return L0::Sysman::fromHandle(hSysman)->schedulerSetTimeoutMode(pProperties, pNeedReboot); } __zedllexport ze_result_t __zecall zetSysmanSchedulerSetTimesliceMode( zet_sysman_handle_t hSysman, zet_sched_timeslice_properties_t *pProperties, ze_bool_t *pNeedReboot) { return L0::Sysman::fromHandle(hSysman)->schedulerSetTimesliceMode(pProperties, pNeedReboot); } __zedllexport ze_result_t __zecall zetSysmanSchedulerSetExclusiveMode( zet_sysman_handle_t hSysman, ze_bool_t *pNeedReboot) { return L0::Sysman::fromHandle(hSysman)->schedulerSetExclusiveMode(pNeedReboot); } __zedllexport ze_result_t __zecall zetSysmanSchedulerSetComputeUnitDebugMode( zet_sysman_handle_t hSysman, ze_bool_t *pNeedReboot) { return L0::Sysman::fromHandle(hSysman)->schedulerSetComputeUnitDebugMode(pNeedReboot); } __zedllexport ze_result_t __zecall zetSysmanProcessesGetState( zet_sysman_handle_t hSysman, uint32_t *pCount, zet_process_state_t *pProcesses) { return L0::Sysman::fromHandle(hSysman)->processesGetState(pCount, pProcesses); } __zedllexport ze_result_t __zecall zetSysmanDeviceReset( zet_sysman_handle_t hSysman) { return L0::Sysman::fromHandle(hSysman)->deviceReset(); } __zedllexport ze_result_t __zecall zetSysmanDeviceGetRepairStatus( zet_sysman_handle_t hSysman, zet_repair_status_t *pRepairStatus) { return L0::Sysman::fromHandle(hSysman)->deviceGetRepairStatus(pRepairStatus); } __zedllexport ze_result_t __zecall zetSysmanPciGetProperties( zet_sysman_handle_t hSysman, zet_pci_properties_t *pProperties) { return L0::Sysman::fromHandle(hSysman)->pciGetProperties(pProperties); } __zedllexport ze_result_t __zecall zetSysmanPciGetState( zet_sysman_handle_t hSysman, zet_pci_state_t *pState) { return L0::Sysman::fromHandle(hSysman)->pciGetState(pState); } __zedllexport ze_result_t __zecall zetSysmanPciGetBars( zet_sysman_handle_t hSysman, uint32_t *pCount, zet_pci_bar_properties_t *pProperties) { return L0::Sysman::fromHandle(hSysman)->pciGetBars(pCount, pProperties); } __zedllexport ze_result_t __zecall zetSysmanPciGetStats( zet_sysman_handle_t hSysman, zet_pci_stats_t *pStats) { return L0::Sysman::fromHandle(hSysman)->pciGetStats(pStats); } __zedllexport ze_result_t __zecall zetSysmanPowerGet( zet_sysman_handle_t hSysman, uint32_t *pCount, zet_sysman_pwr_handle_t *phPower) { return L0::Sysman::fromHandle(hSysman)->powerGet(pCount, phPower); } __zedllexport ze_result_t __zecall zetSysmanPowerGetProperties( zet_sysman_pwr_handle_t hPower, zet_power_properties_t *pProperties) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanPowerGetEnergyCounter( zet_sysman_pwr_handle_t hPower, zet_power_energy_counter_t *pEnergy) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanPowerGetLimits( zet_sysman_pwr_handle_t hPower, zet_power_sustained_limit_t *pSustained, zet_power_burst_limit_t *pBurst, zet_power_peak_limit_t *pPeak) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanPowerSetLimits( zet_sysman_pwr_handle_t hPower, const zet_power_sustained_limit_t *pSustained, const zet_power_burst_limit_t *pBurst, const zet_power_peak_limit_t *pPeak) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanPowerGetEnergyThreshold( zet_sysman_pwr_handle_t hPower, zet_energy_threshold_t *pThreshold) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanPowerSetEnergyThreshold( zet_sysman_pwr_handle_t hPower, double threshold) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanFrequencyGet( zet_sysman_handle_t hSysman, uint32_t *pCount, zet_sysman_freq_handle_t *phFrequency) { return L0::Sysman::fromHandle(hSysman)->frequencyGet(pCount, phFrequency); } __zedllexport ze_result_t __zecall zetSysmanFrequencyGetProperties( zet_sysman_freq_handle_t hFrequency, zet_freq_properties_t *pProperties) { return L0::Frequency::fromHandle(hFrequency)->frequencyGetProperties(pProperties); } __zedllexport ze_result_t __zecall zetSysmanFrequencyGetAvailableClocks( zet_sysman_freq_handle_t hFrequency, uint32_t *pCount, double *phFrequency) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanFrequencyGetRange( zet_sysman_freq_handle_t hFrequency, zet_freq_range_t *pLimits) { return L0::Frequency::fromHandle(hFrequency)->frequencyGetRange(pLimits); } __zedllexport ze_result_t __zecall zetSysmanFrequencySetRange( zet_sysman_freq_handle_t hFrequency, const zet_freq_range_t *pLimits) { return L0::Frequency::fromHandle(hFrequency)->frequencySetRange(pLimits); } __zedllexport ze_result_t __zecall zetSysmanFrequencyGetState( zet_sysman_freq_handle_t hFrequency, zet_freq_state_t *pState) { return L0::Frequency::fromHandle(hFrequency)->frequencyGetState(pState); } __zedllexport ze_result_t __zecall zetSysmanFrequencyGetThrottleTime( zet_sysman_freq_handle_t hFrequency, zet_freq_throttle_time_t *pThrottleTime) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanFrequencyOcGetCapabilities( zet_sysman_freq_handle_t hFrequency, zet_oc_capabilities_t *pOcCapabilities) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanFrequencyOcGetConfig( zet_sysman_freq_handle_t hFrequency, zet_oc_config_t *pOcConfiguration) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanFrequencyOcSetConfig( zet_sysman_freq_handle_t hFrequency, zet_oc_config_t *pOcConfiguration, ze_bool_t *pDeviceRestart) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanFrequencyOcGetIccMax( zet_sysman_freq_handle_t hFrequency, double *pOcIccMax) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanFrequencyOcSetIccMax( zet_sysman_freq_handle_t hFrequency, double ocIccMax) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanFrequencyOcGetTjMax( zet_sysman_freq_handle_t hFrequency, double *pOcTjMax) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanFrequencyOcSetTjMax( zet_sysman_freq_handle_t hFrequency, double ocTjMax) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanEngineGet( zet_sysman_handle_t hSysman, uint32_t *pCount, zet_sysman_engine_handle_t *phEngine) { return L0::Sysman::fromHandle(hSysman)->engineGet(pCount, phEngine); } __zedllexport ze_result_t __zecall zetSysmanEngineGetProperties( zet_sysman_engine_handle_t hEngine, zet_engine_properties_t *pProperties) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanEngineGetActivity( zet_sysman_engine_handle_t hEngine, zet_engine_stats_t *pStats) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanStandbyGet( zet_sysman_handle_t hSysman, uint32_t *pCount, zet_sysman_standby_handle_t *phStandby) { return L0::Sysman::fromHandle(hSysman)->standbyGet(pCount, phStandby); } __zedllexport ze_result_t __zecall zetSysmanStandbyGetProperties( zet_sysman_standby_handle_t hStandby, zet_standby_properties_t *pProperties) { return L0::Standby::fromHandle(hStandby)->standbyGetProperties(pProperties); } __zedllexport ze_result_t __zecall zetSysmanStandbyGetMode( zet_sysman_standby_handle_t hStandby, zet_standby_promo_mode_t *pMode) { return L0::Standby::fromHandle(hStandby)->standbyGetMode(pMode); } __zedllexport ze_result_t __zecall zetSysmanStandbySetMode( zet_sysman_standby_handle_t hStandby, zet_standby_promo_mode_t mode) { return L0::Standby::fromHandle(hStandby)->standbySetMode(mode); } __zedllexport ze_result_t __zecall zetSysmanFirmwareGet( zet_sysman_handle_t hSysman, uint32_t *pCount, zet_sysman_firmware_handle_t *phFirmware) { return L0::Sysman::fromHandle(hSysman)->firmwareGet(pCount, phFirmware); } __zedllexport ze_result_t __zecall zetSysmanFirmwareGetProperties( zet_sysman_firmware_handle_t hFirmware, zet_firmware_properties_t *pProperties) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanFirmwareGetChecksum( zet_sysman_firmware_handle_t hFirmware, uint32_t *pChecksum) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanFirmwareFlash( zet_sysman_firmware_handle_t hFirmware, void *pImage, uint32_t size) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanMemoryGet( zet_sysman_handle_t hSysman, uint32_t *pCount, zet_sysman_mem_handle_t *phMemory) { return L0::Sysman::fromHandle(hSysman)->memoryGet(pCount, phMemory); } __zedllexport ze_result_t __zecall zetSysmanMemoryGetProperties( zet_sysman_mem_handle_t hMemory, zet_mem_properties_t *pProperties) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanMemoryGetState( zet_sysman_mem_handle_t hMemory, zet_mem_state_t *pState) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanMemoryGetBandwidth( zet_sysman_mem_handle_t hMemory, zet_mem_bandwidth_t *pBandwidth) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanFabricPortGet( zet_sysman_handle_t hSysman, uint32_t *pCount, zet_sysman_fabric_port_handle_t *phPort) { return L0::Sysman::fromHandle(hSysman)->fabricPortGet(pCount, phPort); } __zedllexport ze_result_t __zecall zetSysmanFabricPortGetProperties( zet_sysman_fabric_port_handle_t hPort, zet_fabric_port_properties_t *pProperties) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanFabricPortGetLinkType( zet_sysman_fabric_port_handle_t hPort, ze_bool_t verbose, zet_fabric_link_type_t *pLinkType) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanFabricPortGetConfig( zet_sysman_fabric_port_handle_t hPort, zet_fabric_port_config_t *pConfig) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanFabricPortSetConfig( zet_sysman_fabric_port_handle_t hPort, const zet_fabric_port_config_t *pConfig) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanFabricPortGetState( zet_sysman_fabric_port_handle_t hPort, zet_fabric_port_state_t *pState) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanFabricPortGetThroughput( zet_sysman_fabric_port_handle_t hPort, zet_fabric_port_throughput_t *pThroughput) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanTemperatureGet( zet_sysman_handle_t hSysman, uint32_t *pCount, zet_sysman_temp_handle_t *phTemperature) { return L0::Sysman::fromHandle(hSysman)->temperatureGet(pCount, phTemperature); } __zedllexport ze_result_t __zecall zetSysmanTemperatureGetProperties( zet_sysman_temp_handle_t hTemperature, zet_temp_properties_t *pProperties) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanTemperatureGetConfig( zet_sysman_temp_handle_t hTemperature, zet_temp_config_t *pConfig) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanTemperatureSetConfig( zet_sysman_temp_handle_t hTemperature, const zet_temp_config_t *pConfig) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanTemperatureGetState( zet_sysman_temp_handle_t hTemperature, double *pTemperature) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanPsuGet( zet_sysman_handle_t hSysman, uint32_t *pCount, zet_sysman_psu_handle_t *phPsu) { return L0::Sysman::fromHandle(hSysman)->psuGet(pCount, phPsu); } __zedllexport ze_result_t __zecall zetSysmanPsuGetProperties( zet_sysman_psu_handle_t hPsu, zet_psu_properties_t *pProperties) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanPsuGetState( zet_sysman_psu_handle_t hPsu, zet_psu_state_t *pState) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanFanGet( zet_sysman_handle_t hSysman, uint32_t *pCount, zet_sysman_fan_handle_t *phFan) { return L0::Sysman::fromHandle(hSysman)->fanGet(pCount, phFan); } __zedllexport ze_result_t __zecall zetSysmanFanGetProperties( zet_sysman_fan_handle_t hFan, zet_fan_properties_t *pProperties) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanFanGetConfig( zet_sysman_fan_handle_t hFan, zet_fan_config_t *pConfig) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanFanSetConfig( zet_sysman_fan_handle_t hFan, const zet_fan_config_t *pConfig) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanFanGetState( zet_sysman_fan_handle_t hFan, zet_fan_speed_units_t units, uint32_t *pSpeed) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanLedGet( zet_sysman_handle_t hSysman, uint32_t *pCount, zet_sysman_led_handle_t *phLed) { return L0::Sysman::fromHandle(hSysman)->ledGet(pCount, phLed); } __zedllexport ze_result_t __zecall zetSysmanLedGetProperties( zet_sysman_led_handle_t hLed, zet_led_properties_t *pProperties) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanLedGetState( zet_sysman_led_handle_t hLed, zet_led_state_t *pState) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanLedSetState( zet_sysman_led_handle_t hLed, const zet_led_state_t *pState) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanRasGet( zet_sysman_handle_t hSysman, uint32_t *pCount, zet_sysman_ras_handle_t *phRas) { return L0::Sysman::fromHandle(hSysman)->rasGet(pCount, phRas); } __zedllexport ze_result_t __zecall zetSysmanRasGetProperties( zet_sysman_ras_handle_t hRas, zet_ras_properties_t *pProperties) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanRasGetConfig( zet_sysman_ras_handle_t hRas, zet_ras_config_t *pConfig) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanRasSetConfig( zet_sysman_ras_handle_t hRas, const zet_ras_config_t *pConfig) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanRasGetState( zet_sysman_ras_handle_t hRas, ze_bool_t clear, uint64_t *pTotalErrors, zet_ras_details_t *pDetails) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanEventGet( zet_sysman_handle_t hSysman, zet_sysman_event_handle_t *phEvent) { return L0::Sysman::fromHandle(hSysman)->eventGet(phEvent); } __zedllexport ze_result_t __zecall zetSysmanEventGetConfig( zet_sysman_event_handle_t hEvent, zet_event_config_t *pConfig) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanEventSetConfig( zet_sysman_event_handle_t hEvent, const zet_event_config_t *pConfig) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanEventGetState( zet_sysman_event_handle_t hEvent, ze_bool_t clear, uint32_t *pEvents) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanEventListen( ze_driver_handle_t hDriver, uint32_t timeout, uint32_t count, zet_sysman_event_handle_t *phEvents, uint32_t *pEvents) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanDiagnosticsGet( zet_sysman_handle_t hSysman, uint32_t *pCount, zet_sysman_diag_handle_t *phDiagnostics) { return L0::Sysman::fromHandle(hSysman)->diagnosticsGet(pCount, phDiagnostics); } __zedllexport ze_result_t __zecall zetSysmanDiagnosticsGetProperties( zet_sysman_diag_handle_t hDiagnostics, zet_diag_properties_t *pProperties) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanDiagnosticsGetTests( zet_sysman_diag_handle_t hDiagnostics, uint32_t *pCount, zet_diag_test_t *pTests) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } __zedllexport ze_result_t __zecall zetSysmanDiagnosticsRunTests( zet_sysman_diag_handle_t hDiagnostics, uint32_t start, uint32_t end, zet_diag_result_t *pResult) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } } // extern C compute-runtime-20.13.16352/level_zero/api/tools/zet_tracing.cpp000066400000000000000000000022271363734646600244540ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/tracing/tracing.h" #include extern "C" { __zedllexport ze_result_t __zecall zetTracerCreate( zet_driver_handle_t hDriver, const zet_tracer_desc_t *desc, zet_tracer_handle_t *phTracer) { return L0::createAPITracer(hDriver, desc, phTracer); } __zedllexport ze_result_t __zecall zetTracerDestroy( zet_tracer_handle_t hTracer) { return L0::APITracer::fromHandle(hTracer)->destroyTracer(hTracer); } __zedllexport ze_result_t __zecall zetTracerSetPrologues( zet_tracer_handle_t hTracer, zet_core_callbacks_t *pCoreCbs) { return L0::APITracer::fromHandle(hTracer)->setPrologues(pCoreCbs); } __zedllexport ze_result_t __zecall zetTracerSetEpilogues( zet_tracer_handle_t hTracer, zet_core_callbacks_t *pCoreCbs) { return L0::APITracer::fromHandle(hTracer)->setEpilogues(pCoreCbs); } __zedllexport ze_result_t __zecall zetTracerSetEnabled( zet_tracer_handle_t hTracer, ze_bool_t enable) { return L0::APITracer::fromHandle(hTracer)->enableTracer(enable); } } // extern C compute-runtime-20.13.16352/level_zero/cmake/000077500000000000000000000000001363734646600206035ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/cmake/FindLevelZero.cmake000066400000000000000000000013431363734646600243160ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # include(FindPackageHandleStandardArgs) find_path(LevelZero_INCLUDE_DIR NAMES level_zero/ze_api.h PATHS ${LEVEL_ZERO_ROOT} PATH_SUFFIXES "include" ) find_package_handle_standard_args(LevelZero REQUIRED_VARS LevelZero_INCLUDE_DIR ) if(LevelZero_FOUND) list(APPEND LevelZero_INCLUDE_DIRS ${LevelZero_INCLUDE_DIR}) endif() if(LevelZero_FOUND AND NOT TARGET LevelZero::LevelZero) add_library(LevelZero::LevelZero INTERFACE IMPORTED) set_target_properties(LevelZero::LevelZero PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${LevelZero_INCLUDE_DIRS}" ) endif() MESSAGE(STATUS "LevelZero_INCLUDE_DIRS: " ${LevelZero_INCLUDE_DIRS}) compute-runtime-20.13.16352/level_zero/cmake/l0_tests.cmake000066400000000000000000000134321363734646600233450ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # ## ## L0 tests settings ## if(NOT SKIP_L0_UNIT_TESTS) # These need to be added to a project to enable platform support in ULTs if(TESTS_GEN8) set(COMPUTE_RUNTIME_ULT_GEN8 ${NEO_SHARED_TEST_DIRECTORY}/unit_test/libult/gen8.cpp ${NEO_SHARED_TEST_DIRECTORY}/unit_test/gen8/cmd_parse_gen8.cpp ) endif() if(TESTS_GEN9) set(COMPUTE_RUNTIME_ULT_GEN9 ${NEO_SHARED_TEST_DIRECTORY}/unit_test/libult/gen9.cpp ${NEO_SHARED_TEST_DIRECTORY}/unit_test/gen9/cmd_parse_gen9.cpp ) endif() if(TESTS_GEN11) set(COMPUTE_RUNTIME_ULT_GEN11 ${NEO_SHARED_TEST_DIRECTORY}/unit_test/libult/gen11.cpp ${NEO_SHARED_TEST_DIRECTORY}/unit_test/gen11/cmd_parse_gen11.cpp ) endif() if(TESTS_GEN12LP) set(COMPUTE_RUNTIME_ULT_GEN12LP ${NEO_SHARED_TEST_DIRECTORY}/unit_test/libult/gen12lp.cpp ${NEO_SHARED_TEST_DIRECTORY}/unit_test/gen12lp/cmd_parse_gen12lp.cpp ) include_directories(${NEO_SHARED_TEST_DIRECTORY}/unit_test/gen12lp/cmd_parse${BRANCH_DIR_SUFFIX}/) endif() ## ULT related settings #Extract compute runtime COMPILE_DEFINITIONS if(NOT SKIP_L0_UNIT_TESTS) get_property(COMPUTE_RUNTIME_MOCKABLE_DEFINITIONS TARGET ${NEO_MOCKABLE_LIB_NAME} PROPERTY COMPILE_DEFINITIONS ) endif() #Append additional definitions set(COMPUTE_RUNTIME_MOCKABLE_DEFINITIONS ${COMPUTE_RUNTIME_MOCKABLE_DEFINITIONS} CL_TARGET_OPENCL_VERSION=220 CL_USE_DEPRECATED_OPENCL_1_1_APIS CL_USE_DEPRECATED_OPENCL_1_2_APIS CL_USE_DEPRECATED_OPENCL_2_0_APIS ) if(WIN32) set(COMPUTE_RUNTIME_MOCKABLE_DEFINITIONS ${COMPUTE_RUNTIME_MOCKABLE_DEFINITIONS} WDDM_VERSION_NUMBER=23 CONST_FROM_WDK_10_0_18328_0= ) endif() #Extract compute runtime INCLUDE_DIRECTORIES get_property(COMPUTE_RUNTIME_MOCKABLE_INCLUDES TARGET ${NEO_MOCKABLE_LIB_NAME} PROPERTY INCLUDE_DIRECTORIES ) # Create a library that has the missing ingredients to link add_library(compute_runtime_mockable_extra STATIC EXCLUDE_FROM_ALL ${CMAKE_CURRENT_LIST_DIR}/l0_tests.cmake ${NEO_SHARED_TEST_DIRECTORY}/unit_test/utilities/cpuintrinsics.cpp ${COMPUTE_RUNTIME_DIR}/opencl/source/aub/aub_stream_interface.cpp ${COMPUTE_RUNTIME_DIR}/shared/source/debug_settings/debug_settings_manager.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/abort.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/helpers/built_ins_helper.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/helpers/debug_helpers.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/helpers/test_files.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/libult/os_interface.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/libult/source_level_debugger.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/libult/source_level_debugger_library.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/mocks/mock_cif.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/mocks/mock_csr.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/mocks/mock_compilers.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/mocks/mock_device.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/mocks/mock_gmm_page_table_mngr.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/mocks${BRANCH_SUFIX_DIR}/mock_gmm_client_context.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/mocks/mock_gmm_resource_info.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/mocks/mock_gmm_client_context_base.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/mocks/mock_memory_manager.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/mocks/mock_program.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/mocks/mock_sip.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/utilities/debug_settings_reader_creator.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/libult/create_tbx_sockets.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/mocks/mock_deferred_deleter.cpp ) set_property(TARGET compute_runtime_mockable_extra APPEND_STRING PROPERTY COMPILE_FLAGS ${ASAN_FLAGS} ${TSAN_FLAGS}) set_target_properties(compute_runtime_mockable_extra PROPERTIES FOLDER ${TARGET_NAME_L0}) # These need to be added to a project to enable platform support in ULTs #Additional includes for ULT builds target_include_directories(compute_runtime_mockable_extra PUBLIC ${COMPUTE_RUNTIME_MOCKABLE_INCLUDES} ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/mocks/gmm_memory ${SOURCE_LEVEL_DEBUGGER_HEADERS_DIR} ) #Additional compile definitions for ULT builds target_compile_definitions(compute_runtime_mockable_extra PUBLIC ${COMPUTE_RUNTIME_MOCKABLE_DEFINITIONS} ) target_link_libraries(compute_runtime_mockable_extra gmock-gtest ) if(WIN32) target_sources(compute_runtime_mockable_extra PRIVATE ${COMPUTE_RUNTIME_DIR}/shared/source/dll/windows/environment_variables.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/mocks/mock_gmm_memory_base.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/mocks/mock_wddm.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/mocks/mock_compilers_windows.cpp ) target_link_libraries(compute_runtime_mockable_extra ws2_32 ) endif() if(UNIX) target_sources(compute_runtime_mockable_extra PRIVATE ${COMPUTE_RUNTIME_DIR}/opencl/source/dll/linux/allocator_helper.cpp ${COMPUTE_RUNTIME_DIR}/opencl/source/tbx/tbx_sockets_imp.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/os_interface/linux/drm_mock.cpp ) target_link_libraries(compute_runtime_mockable_extra dl ) endif() set_target_properties(compute_runtime_mockable_extra PROPERTIES POSITION_INDEPENDENT_CODE ON) endif() compute-runtime-20.13.16352/level_zero/cmake/source_tree.cmake000066400000000000000000000014631363734646600241300ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # function(create_source_tree target directory) if(WIN32) get_filename_component(directory ${directory} ABSOLUTE) get_target_property(source_list ${target} SOURCES) source_group(TREE ${directory} FILES ${source_list}) endif() endfunction() macro(add_subdirectoriesL0 curdir dirmask) file(GLOB children RELATIVE ${curdir} ${curdir}/${dirmask}) set(dirlist "") foreach(child ${children}) if(IS_DIRECTORY ${curdir}/${child}) list(APPEND dirlist ${child}) endif() endforeach() foreach(subdir ${dirlist}) if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/CMakeLists.txt) add_subdirectory(${subdir}) endif() endforeach() endmacro() compute-runtime-20.13.16352/level_zero/core/000077500000000000000000000000001363734646600204535ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/000077500000000000000000000000001363734646600217535ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/CMakeLists.txt000066400000000000000000000111431363734646600245130ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_RUNTIME_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/additional_kernel_properties.cpp ${CMAKE_CURRENT_SOURCE_DIR}/builtin/builtin_functions_lib.h ${CMAKE_CURRENT_SOURCE_DIR}/builtin/builtin_functions_lib_impl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/builtin/builtin_functions_lib_impl.h ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist/cmdlist.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist/cmdlist.h ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist/cmdlist_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist/cmdlist_hw.inl ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist/cmdlist_hw_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist/cmdlist_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist/cmdlist_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist/cmdlist_hw_immediate.h ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist/cmdlist_hw_immediate.inl ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist/cmdlist_extended${BRANCH_DIR_SUFFIX}/cmdlist_extended.inl ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue/cmdqueue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue/cmdqueue.h ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue/cmdqueue_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue/cmdqueue_hw.inl ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue/cmdqueue_hw_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue/cmdqueue_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue/cmdqueue_extended${BRANCH_DIR_SUFFIX}/cmdqueue_extended.inl ${CMAKE_CURRENT_SOURCE_DIR}/debugger/debug_manager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/debugger/debugger.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device/device.h ${CMAKE_CURRENT_SOURCE_DIR}/device/device_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device/device_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/driver/driver_handle.h ${CMAKE_CURRENT_SOURCE_DIR}/driver/driver_handle_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/driver/driver_handle_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/driver/driver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/driver/driver.h ${CMAKE_CURRENT_SOURCE_DIR}/driver/driver_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/event/event.cpp ${CMAKE_CURRENT_SOURCE_DIR}/event/event.h ${CMAKE_CURRENT_SOURCE_DIR}/fence/fence.cpp ${CMAKE_CURRENT_SOURCE_DIR}/fence/fence.h ${CMAKE_CURRENT_SOURCE_DIR}/hw_helpers${BRANCH_DIR_SUFFIX}/hw_helpers.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel/kernel.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel/kernel.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel/kernel_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel/kernel_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel/kernel_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/image/image.h ${CMAKE_CURRENT_SOURCE_DIR}/image/image_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/image/image_hw.inl ${CMAKE_CURRENT_SOURCE_DIR}/image/image_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image/image_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/memory/memory.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory/memory_operations_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/memory/cpu_page_fault_memory_manager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/module/module.h ${CMAKE_CURRENT_SOURCE_DIR}/module/module_build_log.cpp ${CMAKE_CURRENT_SOURCE_DIR}/module/module_build_log.h ${CMAKE_CURRENT_SOURCE_DIR}/module${BRANCH_DIR_SUFFIX}/module_extra_options.cpp ${CMAKE_CURRENT_SOURCE_DIR}/module/module_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/module/module_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/printf_handler/printf_handler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/printf_handler/printf_handler.h ${CMAKE_CURRENT_SOURCE_DIR}/sampler/sampler.h ${CMAKE_CURRENT_SOURCE_DIR}/sampler/sampler_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/sampler/sampler_hw.inl ${CMAKE_CURRENT_SOURCE_DIR}/sampler/sampler_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler/sampler_imp.h ) target_include_directories(${L0_STATIC_LIB_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/ ) target_include_directories(${TARGET_NAME_L0} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/ ) add_subdirectories() include_directories(${CMAKE_CURRENT_SOURCE_DIR}) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_RUNTIME_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) target_sources(${TARGET_NAME_L0} PRIVATE ${L0_SRCS_DLL} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) if(UNIX) set_source_files_properties(${L0_RUNTIME_SOURCES} PROPERTIES COMPILE_FLAGS -Wall) endif() if(NOT DEFINED L0_DRIVER_VERSION) set(L0_DRIVER_VERSION 1) endif() configure_file(driver/driver_version.h.in ${CMAKE_BINARY_DIR}/driver_version_l0.h) # Put Driver version into define # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_RUNTIME_SOURCES ${L0_RUNTIME_SOURCES}) compute-runtime-20.13.16352/level_zero/core/source/additional_kernel_properties.cpp000066400000000000000000000004651363734646600304100ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/device/device_imp.h" namespace L0 { void DeviceImp::processAdditionalKernelProperties(NEO::HwHelper &hwHelper, ze_device_kernel_properties_t *pKernelProperties) { } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/builtin/000077500000000000000000000000001363734646600234215ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/builtin/builtin_functions_lib.h000066400000000000000000000023671363734646600301660ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { class BuiltIns; } // namespace NEO namespace L0 { struct Device; struct Kernel; enum class Builtin : uint32_t { CopyBufferBytes = 0u, CopyBufferRectBytes2d, CopyBufferRectBytes3d, CopyBufferToBufferMiddle, CopyBufferToBufferSide, CopyBufferToImage3d16Bytes, CopyBufferToImage3d2Bytes, CopyBufferToImage3d4Bytes, CopyBufferToImage3d8Bytes, CopyBufferToImage3dBytes, CopyImage3dToBuffer16Bytes, CopyImage3dToBuffer2Bytes, CopyImage3dToBuffer4Bytes, CopyImage3dToBuffer8Bytes, CopyImage3dToBufferBytes, CopyImageRegion, FillBufferImmediate, FillBufferSSHOffset, COUNT }; struct BuiltinFunctionsLib { virtual ~BuiltinFunctionsLib() = default; static std::unique_ptr create(Device *device, NEO::BuiltIns *builtins); virtual Kernel *getFunction(Builtin func) = 0; virtual void initFunctions() = 0; virtual Kernel *getPageFaultFunction() = 0; virtual void initPageFaultFunction() = 0; protected: BuiltinFunctionsLib() = default; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/builtin/builtin_functions_lib_impl.cpp000066400000000000000000000140051363734646600315320ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/builtin/builtin_functions_lib_impl.h" #include "shared/source/built_ins/built_ins.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/module/module.h" namespace L0 { std::unique_ptr BuiltinFunctionsLib::create(Device *device, NEO::BuiltIns *builtins) { return std::unique_ptr(new BuiltinFunctionsLibImpl(device, builtins)); } struct BuiltinFunctionsLibImpl::BuiltinData { ~BuiltinData() { func.reset(); module.reset(); } std::unique_ptr module; std::unique_ptr func; }; void BuiltinFunctionsLibImpl::initFunctions() { for (uint32_t builtId = 0; builtId < static_cast(Builtin::COUNT); builtId++) { const char *builtinName = nullptr; NEO::EBuiltInOps::Type builtin; switch (static_cast(builtId)) { case Builtin::CopyBufferBytes: builtinName = "copyBufferToBufferBytesSingle"; builtin = NEO::EBuiltInOps::CopyBufferToBuffer; break; case Builtin::CopyBufferRectBytes2d: builtinName = "CopyBufferRectBytes2d"; builtin = NEO::EBuiltInOps::CopyBufferRect; break; case Builtin::CopyBufferRectBytes3d: builtinName = "CopyBufferRectBytes3d"; builtin = NEO::EBuiltInOps::CopyBufferRect; break; case Builtin::CopyBufferToBufferMiddle: builtinName = "CopyBufferToBufferMiddleRegion"; builtin = NEO::EBuiltInOps::CopyBufferToBuffer; break; case Builtin::CopyBufferToBufferSide: builtinName = "CopyBufferToBufferSideRegion"; builtin = NEO::EBuiltInOps::CopyBufferToBuffer; break; case Builtin::CopyBufferToImage3d16Bytes: builtinName = "CopyBufferToImage3d16Bytes"; builtin = NEO::EBuiltInOps::CopyBufferToImage3d; break; case Builtin::CopyBufferToImage3d2Bytes: builtinName = "CopyBufferToImage3d2Bytes"; builtin = NEO::EBuiltInOps::CopyBufferToImage3d; break; case Builtin::CopyBufferToImage3d4Bytes: builtinName = "CopyBufferToImage3d4Bytes"; builtin = NEO::EBuiltInOps::CopyBufferToImage3d; break; case Builtin::CopyBufferToImage3d8Bytes: builtinName = "CopyBufferToImage3d8Bytes"; builtin = NEO::EBuiltInOps::CopyBufferToImage3d; break; case Builtin::CopyBufferToImage3dBytes: builtinName = "CopyBufferToImage3dBytes"; builtin = NEO::EBuiltInOps::CopyBufferToImage3d; break; case Builtin::CopyImage3dToBuffer16Bytes: builtinName = "CopyImage3dToBuffer16Bytes"; builtin = NEO::EBuiltInOps::CopyImage3dToBuffer; break; case Builtin::CopyImage3dToBuffer2Bytes: builtinName = "CopyImage3dToBuffer2Bytes"; builtin = NEO::EBuiltInOps::CopyImage3dToBuffer; break; case Builtin::CopyImage3dToBuffer4Bytes: builtinName = "CopyImage3dToBuffer4Bytes"; builtin = NEO::EBuiltInOps::CopyImage3dToBuffer; break; case Builtin::CopyImage3dToBuffer8Bytes: builtinName = "CopyImage3dToBuffer8Bytes"; builtin = NEO::EBuiltInOps::CopyImage3dToBuffer; break; case Builtin::CopyImage3dToBufferBytes: builtinName = "CopyImage3dToBufferBytes"; builtin = NEO::EBuiltInOps::CopyImage3dToBuffer; break; case Builtin::CopyImageRegion: builtinName = "CopyImageToImage3d"; builtin = NEO::EBuiltInOps::CopyImageToImage3d; break; case Builtin::FillBufferImmediate: builtinName = "FillBufferImmediate"; builtin = NEO::EBuiltInOps::FillBuffer; break; case Builtin::FillBufferSSHOffset: builtinName = "FillBufferSSHOffset"; builtin = NEO::EBuiltInOps::FillBuffer; break; default: continue; }; builtins[builtId] = loadBuiltIn(builtin, builtinName); } } Kernel *BuiltinFunctionsLibImpl::getFunction(Builtin func) { auto builtId = static_cast(func); return builtins[builtId]->func.get(); } void BuiltinFunctionsLibImpl::initPageFaultFunction() { pageFaultBuiltin = loadBuiltIn(NEO::EBuiltInOps::CopyBufferToBuffer, "CopyBufferToBufferSideRegion"); } Kernel *BuiltinFunctionsLibImpl::getPageFaultFunction() { return pageFaultBuiltin->func.get(); } std::unique_ptr BuiltinFunctionsLibImpl::loadBuiltIn(NEO::EBuiltInOps::Type builtin, const char *builtInName) { auto builtInCode = builtInsLib->getBuiltinsLib().getBuiltinCode(builtin, NEO::BuiltinCode::ECodeType::Binary, *device->getNEODevice()); ze_result_t res; std::unique_ptr module; ze_module_handle_t moduleHandle; ze_module_desc_t moduleDesc = {ZE_MODULE_DESC_VERSION_CURRENT}; moduleDesc.format = ZE_MODULE_FORMAT_NATIVE; moduleDesc.pInputModule = reinterpret_cast(&builtInCode.resource[0]); moduleDesc.inputSize = builtInCode.resource.size(); res = device->createModule(&moduleDesc, &moduleHandle, nullptr); UNRECOVERABLE_IF(res != ZE_RESULT_SUCCESS); module.reset(Module::fromHandle(moduleHandle)); std::unique_ptr function; ze_kernel_handle_t functionHandle; ze_kernel_desc_t functionDesc = {ZE_KERNEL_DESC_VERSION_CURRENT}; functionDesc.pKernelName = builtInName; res = module->createKernel(&functionDesc, &functionHandle); DEBUG_BREAK_IF(res != ZE_RESULT_SUCCESS); UNUSED_VARIABLE(res); function.reset(Kernel::fromHandle(functionHandle)); return std::unique_ptr(new BuiltinData{std::move(module), std::move(function)}); } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/builtin/builtin_functions_lib_impl.h000066400000000000000000000021531363734646600312000ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/builtin/builtin_functions_lib.h" namespace NEO { namespace EBuiltInOps { using Type = uint32_t; } class BuiltIns; } // namespace NEO namespace L0 { struct BuiltinFunctionsLibImpl : BuiltinFunctionsLib { struct BuiltinData; BuiltinFunctionsLibImpl(Device *device, NEO::BuiltIns *builtInsLib) : device(device), builtInsLib(builtInsLib) { } ~BuiltinFunctionsLibImpl() override { builtins->reset(); pageFaultBuiltin.release(); } Kernel *getFunction(Builtin func) override; Kernel *getPageFaultFunction() override; void initFunctions() override; void initPageFaultFunction() override; std::unique_ptr loadBuiltIn(NEO::EBuiltInOps::Type builtin, const char *builtInName); protected: std::unique_ptr builtins[static_cast(Builtin::COUNT)]; std::unique_ptr pageFaultBuiltin; Device *device; NEO::BuiltIns *builtInsLib; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/cmdlist/000077500000000000000000000000001363734646600234125ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/cmdlist/cmdlist.cpp000066400000000000000000000074031363734646600255610ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/cmdlist/cmdlist.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/device/device_info.h" #include "shared/source/memory_manager/memory_manager.h" namespace L0 { CommandList::~CommandList() { if (cmdQImmediate) { cmdQImmediate->destroy(); } removeDeallocationContainerData(); removeHostPtrAllocations(); printfFunctionContainer.clear(); } void CommandList::storePrintfFunction(Kernel *function) { auto it = std::find(this->printfFunctionContainer.begin(), this->printfFunctionContainer.end(), function); if (it == this->printfFunctionContainer.end()) { this->printfFunctionContainer.push_back(function); } } void CommandList::removeHostPtrAllocations() { auto memoryManager = device ? device->getNEODevice()->getMemoryManager() : nullptr; for (auto &allocation : hostPtrMap) { UNRECOVERABLE_IF(memoryManager == nullptr); memoryManager->freeGraphicsMemory(allocation.second); } hostPtrMap.clear(); } void CommandList::removeDeallocationContainerData() { auto memoryManager = device ? device->getNEODevice()->getMemoryManager() : nullptr; auto container = commandContainer.getDeallocationContainer(); for (auto deallocation : container) { DEBUG_BREAK_IF(deallocation == nullptr); UNRECOVERABLE_IF(memoryManager == nullptr); NEO::SvmAllocationData *allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(reinterpret_cast(deallocation->getGpuAddress())); if (allocData) { device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->remove(*allocData); } if (!((deallocation->getAllocationType() == NEO::GraphicsAllocation::AllocationType::INTERNAL_HEAP) || (deallocation->getAllocationType() == NEO::GraphicsAllocation::AllocationType::LINEAR_STREAM))) { memoryManager->freeGraphicsMemory(deallocation); eraseDeallocationContainerEntry(deallocation); } } } void CommandList::eraseDeallocationContainerEntry(NEO::GraphicsAllocation *allocation) { std::vector::iterator allocErase; auto container = &commandContainer.getDeallocationContainer(); allocErase = std::find(container->begin(), container->end(), allocation); if (allocErase != container->end()) { container->erase(allocErase); } } void CommandList::eraseResidencyContainerEntry(NEO::GraphicsAllocation *allocation) { std::vector::iterator allocErase; auto container = &commandContainer.getResidencyContainer(); allocErase = std::find(container->begin(), container->end(), allocation); if (allocErase != container->end()) { container->erase(allocErase); } } NEO::PreemptionMode CommandList::obtainFunctionPreemptionMode(Kernel *function) { auto functionAttributes = function->getImmutableData()->getDescriptor().kernelAttributes; NEO::PreemptionFlags flags = {}; flags.flags.disabledMidThreadPreemptionKernel = functionAttributes.flags.requiresDisabledMidThreadPreemption; flags.flags.usesFencesForReadWriteImages = functionAttributes.flags.usesFencesForReadWriteImages; flags.flags.deviceSupportsVmePreemption = device->getDeviceInfo().vmeAvcSupportsPreemption; flags.flags.disablePerCtxtPreemptionGranularityControl = device->getHwInfo().workaroundTable.waDisablePerCtxtPreemptionGranularityControl; flags.flags.disableLSQCROPERFforOCL = device->getHwInfo().workaroundTable.waDisableLSQCROPERFforOCL; return NEO::PreemptionHelper::taskPreemptionMode(device->getDevicePreemptionMode(), flags); } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/cmdlist/cmdlist.h000066400000000000000000000241041363734646600252230ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/cmdcontainer.h" #include "shared/source/command_stream/preemption_mode.h" #include "level_zero/core/source/cmdqueue/cmdqueue.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/kernel/kernel.h" #include #include #include struct _ze_command_list_handle_t {}; namespace L0 { struct EventPool; struct Event; struct Kernel; struct CommandList : _ze_command_list_handle_t { static constexpr uint32_t maxNumInterfaceDescriptorsPerMediaInterfaceDescriptorLoad = 62u; static constexpr uint32_t defaultNumIddsPerBlock = maxNumInterfaceDescriptorsPerMediaInterfaceDescriptorLoad; static constexpr uint32_t commandListimmediateIddsPerBlock = 1u; CommandList() {} CommandList(uint32_t numIddsPerBlock) : commandContainer(numIddsPerBlock) {} template struct Allocator { static CommandList *allocate(uint32_t numIddsPerBlock) { return new Type(numIddsPerBlock); } }; virtual ze_result_t close() = 0; virtual ze_result_t destroy() = 0; virtual ze_result_t appendEventReset(ze_event_handle_t hEvent) = 0; virtual ze_result_t appendBarrier(ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendImageCopyFromMemory(ze_image_handle_t hDstImage, const void *srcptr, const ze_image_region_t *pDstRegion, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendImageCopyToMemory(void *dstptr, ze_image_handle_t hSrcImage, const ze_image_region_t *pSrcRegion, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendImageCopyRegion(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, const ze_image_region_t *pDstRegion, const ze_image_region_t *pSrcRegion, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendImageCopy(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendLaunchFunction(ze_kernel_handle_t hFunction, const ze_group_count_t *pThreadGroupDimensions, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendLaunchCooperativeKernel(ze_kernel_handle_t hKernel, const ze_group_count_t *pLaunchFuncArgs, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendLaunchFunctionIndirect(ze_kernel_handle_t hFunction, const ze_group_count_t *pDispatchArgumentsBuffer, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendLaunchMultipleFunctionsIndirect(uint32_t numFunctions, const ze_kernel_handle_t *phFunctions, const uint32_t *pNumLaunchArguments, const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendMemAdvise(ze_device_handle_t hDevice, const void *ptr, size_t size, ze_memory_advice_t advice) = 0; virtual ze_result_t appendMemoryCopy(void *dstptr, const void *srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendPageFaultCopy(NEO::GraphicsAllocation *dstptr, NEO::GraphicsAllocation *srcptr, size_t size, bool flushHost) = 0; virtual ze_result_t appendMemoryCopyRegion(void *dstPtr, const ze_copy_region_t *dstRegion, uint32_t dstPitch, uint32_t dstSlicePitch, const void *srcPtr, const ze_copy_region_t *srcRegion, uint32_t srcPitch, uint32_t srcSlicePitch, ze_event_handle_t hSignalEvent) = 0; virtual ze_result_t appendMemoryFill(void *ptr, const void *pattern, size_t patternSize, size_t size, ze_event_handle_t hEvent) = 0; virtual ze_result_t appendMemoryPrefetch(const void *ptr, size_t count) = 0; virtual ze_result_t appendSignalEvent(ze_event_handle_t hEvent) = 0; virtual ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent) = 0; virtual ze_result_t reserveSpace(size_t size, void **ptr) = 0; virtual ze_result_t reset() = 0; virtual ze_result_t appendMetricMemoryBarrier() = 0; virtual ze_result_t appendMetricTracerMarker(zet_metric_tracer_handle_t hMetricTracer, uint32_t value) = 0; virtual ze_result_t appendMetricQueryBegin(zet_metric_query_handle_t hMetricQuery) = 0; virtual ze_result_t appendMetricQueryEnd(zet_metric_query_handle_t hMetricQuery, ze_event_handle_t hCompletionEvent) = 0; virtual ze_result_t appendMILoadRegImm(uint32_t reg, uint32_t value) = 0; virtual ze_result_t appendMILoadRegReg(uint32_t reg1, uint32_t reg2) = 0; virtual ze_result_t appendMILoadRegMem(uint32_t reg1, uint64_t address) = 0; virtual ze_result_t appendMIStoreRegMem(uint32_t reg1, uint64_t address) = 0; virtual ze_result_t appendMIMath(void *aluArray, size_t aluCount) = 0; virtual ze_result_t appendMIBBStart(uint64_t address, size_t predication, bool secondLevel) = 0; virtual ze_result_t appendMIBBEnd() = 0; virtual ze_result_t appendMINoop() = 0; static CommandList *create(uint32_t productFamily, Device *device); static CommandList *createImmediate(uint32_t productFamily, Device *device, const ze_command_queue_desc_t *desc, bool internalUsage); static CommandList *fromHandle(ze_command_list_handle_t handle) { return static_cast(handle); } inline ze_command_list_handle_t toHandle() { return this; } uint32_t getCommandListPerThreadScratchSize() const { return commandListPerThreadScratchSize; } NEO::PreemptionMode getCommandListPreemptionMode() const { return commandListPreemptionMode; } NEO::PreemptionMode obtainFunctionPreemptionMode(Kernel *function); std::vector &getPrintfFunctionContainer() { return this->printfFunctionContainer; } void storePrintfFunction(Kernel *function); void removeDeallocationContainerData(); void removeHostPtrAllocations(); void eraseDeallocationContainerEntry(NEO::GraphicsAllocation *allocation); void eraseResidencyContainerEntry(NEO::GraphicsAllocation *allocation); enum CommandListType : uint32_t { TYPE_REGULAR = 0u, TYPE_IMMEDIATE = 1u }; CommandQueue *cmdQImmediate = nullptr; uint32_t cmdListType = CommandListType::TYPE_REGULAR; const ze_command_queue_desc_t *cmdQImmediateDesc = nullptr; Device *device = nullptr; std::vector printfFunctionContainer; virtual ze_result_t executeCommandListImmediate(bool performMigration) = 0; virtual bool initialize(Device *device) = 0; virtual ~CommandList(); NEO::CommandContainer commandContainer; protected: std::map hostPtrMap; uint32_t commandListPerThreadScratchSize = 0u; NEO::PreemptionMode commandListPreemptionMode = NEO::PreemptionMode::Initial; }; using CommandListAllocatorFn = CommandList *(*)(uint32_t); extern CommandListAllocatorFn commandListFactory[]; extern CommandListAllocatorFn commandListFactoryImmediate[]; template struct CommandListPopulateFactory { CommandListPopulateFactory() { commandListFactory[productFamily] = CommandList::Allocator::allocate; } }; template struct CommandListImmediatePopulateFactory { CommandListImmediatePopulateFactory() { commandListFactoryImmediate[productFamily] = CommandList::Allocator::allocate; } }; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/cmdlist/cmdlist_extended/000077500000000000000000000000001363734646600267315ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/cmdlist/cmdlist_extended/cmdlist_extended.inl000066400000000000000000000035271363734646600327630ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/linear_stream.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.h" namespace L0 { template ze_result_t CommandListCoreFamily::appendMILoadRegImm(uint32_t reg, uint32_t value) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } template ze_result_t CommandListCoreFamily::appendMILoadRegReg(uint32_t reg1, uint32_t reg2) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } template ze_result_t CommandListCoreFamily::appendMILoadRegMem(uint32_t reg1, uint64_t address) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } template ze_result_t CommandListCoreFamily::appendMIStoreRegMem(uint32_t reg1, uint64_t address) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } template ze_result_t CommandListCoreFamily::appendMIMath(void *aluArray, size_t aluCount) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } template ze_result_t CommandListCoreFamily::appendMIBBStart(uint64_t address, size_t predication, bool secondLevel) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } template ze_result_t CommandListCoreFamily::appendMIBBEnd() { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } template ze_result_t CommandListCoreFamily::appendMINoop() { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/cmdlist/cmdlist_hw.h000066400000000000000000000236221363734646600257250ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/builtin/builtin_functions_lib.h" #include "level_zero/core/source/cmdlist/cmdlist_imp.h" #include "igfxfmid.h" namespace NEO { enum class ImageType; } namespace L0 { struct AlignedAllocationData { uintptr_t alignedAllocationPtr = 0u; size_t offset = 0u; NEO::GraphicsAllocation *alloc = nullptr; bool needsFlush = false; }; struct EventPool; struct Event; template struct CommandListCoreFamily : CommandListImp { using BaseClass = CommandListImp; using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; using CommandListImp::CommandListImp; CommandListCoreFamily() {} CommandListCoreFamily(uint32_t numIddsPerBlock) : CommandListImp(numIddsPerBlock) {} bool initialize(Device *device) override; virtual void programL3(bool isSLMused); ze_result_t close() override; ze_result_t appendEventReset(ze_event_handle_t hEvent) override; ze_result_t appendBarrier(ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendImageCopyFromMemory(ze_image_handle_t hDstImage, const void *srcptr, const ze_image_region_t *pDstRegion, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendImageCopyToMemory(void *dstptr, ze_image_handle_t hSrcImage, const ze_image_region_t *pSrcRegion, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendImageCopyRegion(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, const ze_image_region_t *pDstRegion, const ze_image_region_t *pSrcRegion, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendImageCopy(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendLaunchFunction(ze_kernel_handle_t hFunction, const ze_group_count_t *pThreadGroupDimensions, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendLaunchCooperativeKernel(ze_kernel_handle_t hKernel, const ze_group_count_t *pLaunchFuncArgs, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendLaunchFunctionIndirect(ze_kernel_handle_t hFunction, const ze_group_count_t *pDispatchArgumentsBuffer, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendLaunchMultipleFunctionsIndirect(uint32_t numFunctions, const ze_kernel_handle_t *phFunctions, const uint32_t *pNumLaunchArguments, const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendMemAdvise(ze_device_handle_t hDevice, const void *ptr, size_t size, ze_memory_advice_t advice) override; ze_result_t appendMemoryCopy(void *dstptr, const void *srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendPageFaultCopy(NEO::GraphicsAllocation *dstptr, NEO::GraphicsAllocation *srcptr, size_t size, bool flushHost) override; ze_result_t appendMemoryCopyRegion(void *dstPtr, const ze_copy_region_t *dstRegion, uint32_t dstPitch, uint32_t dstSlicePitch, const void *srcPtr, const ze_copy_region_t *srcRegion, uint32_t srcPitch, uint32_t srcSlicePitch, ze_event_handle_t hSignalEvent) override; ze_result_t appendMemoryPrefetch(const void *ptr, size_t count) override; ze_result_t appendMemoryFill(void *ptr, const void *pattern, size_t patternSize, size_t size, ze_event_handle_t hEvent) override; ze_result_t appendMILoadRegImm(uint32_t reg, uint32_t value) override; ze_result_t appendMILoadRegReg(uint32_t reg1, uint32_t reg2) override; ze_result_t appendMILoadRegMem(uint32_t reg1, uint64_t address) override; ze_result_t appendMIStoreRegMem(uint32_t reg1, uint64_t address) override; ze_result_t appendMIMath(void *aluArray, size_t aluCount) override; ze_result_t appendMIBBStart(uint64_t address, size_t predication, bool secondLevel) override; ze_result_t appendMIBBEnd() override; ze_result_t appendMINoop() override; ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override; ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent) override; ze_result_t reserveSpace(size_t size, void **ptr) override; ze_result_t reset() override; ze_result_t executeCommandListImmediate(bool performMigration) override; protected: ze_result_t appendMemoryCopyKernelWithGA(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc, uint64_t dstOffset, void *srcPtr, NEO::GraphicsAllocation *srcPtrAlloc, uint64_t srcOffset, uint32_t size, uint32_t elementSize, Builtin builtin); ze_result_t appendMemoryCopyKernel2d(const void *dstptr, const void *srcptr, Builtin builtin, const ze_copy_region_t *dstRegion, uint32_t dstPitch, size_t dstOffset, const ze_copy_region_t *srcRegion, uint32_t srcPitch, size_t srcOffset, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); ze_result_t appendMemoryCopyKernel3d(const void *dstptr, const void *srcptr, Builtin builtin, const ze_copy_region_t *dstRegion, uint32_t dstPitch, uint32_t dstSlicePitch, size_t dstOffset, const ze_copy_region_t *srcRegion, uint32_t srcPitch, uint32_t srcSlicePitch, size_t srcOffset, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); ze_result_t appendLaunchFunctionWithParams(ze_kernel_handle_t hFunction, const ze_group_count_t *pThreadGroupDimensions, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool isIndirect, bool isPredicate); ze_result_t prepareIndirectParams(const ze_group_count_t *pThreadGroupDimensions); void applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges); ze_result_t setGroupSizeIndirect(uint32_t offsets[3], void *crossThreadAddress, uint32_t lws[3]); void appendEventForProfiling(ze_event_handle_t hEvent, bool beforeWalker); void appendSignalEventPostWalker(ze_event_handle_t hEvent); uint64_t getInputBufferSize(NEO::ImageType imageType, uint64_t bytesPerPixel, const ze_image_region_t *region); AlignedAllocationData getAlignedAllocation(Device *device, const void *buffer, uint64_t bufferSize); ze_result_t addEventsToCmdList(ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); }; template struct CommandListProductFamily; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/cmdlist/cmdlist_hw.inl000066400000000000000000001563471363734646600262730ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/device/device.h" #include "shared/source/helpers/heap_helper.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/register_offsets.h" #include "shared/source/helpers/string.h" #include "shared/source/helpers/surface_format_info.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/core/source/event/event.h" #include "level_zero/core/source/image/image.h" #include "level_zero/core/source/module/module.h" #include namespace L0 { template struct EncodeStateBaseAddress; template bool CommandListCoreFamily::initialize(Device *device) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; if (!commandContainer.initialize(static_cast(device)->neoDevice)) { return false; } NEO::EncodeStateBaseAddress::encode(commandContainer); commandContainer.setDirtyStateForAllHeaps(false); this->device = device; this->commandListPreemptionMode = device->getDevicePreemptionMode(); return true; } template ze_result_t CommandListCoreFamily::executeCommandListImmediate(bool performMigration) { this->close(); ze_command_list_handle_t immediateHandle = this->toHandle(); this->cmdQImmediate->executeCommandLists(1, &immediateHandle, nullptr, performMigration); this->cmdQImmediate->synchronize(std::numeric_limits::max()); this->reset(); return ZE_RESULT_SUCCESS; } template ze_result_t CommandListCoreFamily::close() { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; commandContainer.removeDuplicatesFromResidencyContainer(); NEO::EncodeBatchBufferStartOrEnd::programBatchBufferEnd(commandContainer); return ZE_RESULT_SUCCESS; } template void CommandListCoreFamily::programL3(bool isSLMused) {} template ze_result_t CommandListCoreFamily::appendLaunchFunction(ze_kernel_handle_t hFunction, const ze_group_count_t *pThreadGroupDimensions, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { if (addEventsToCmdList(hEvent, numWaitEvents, phWaitEvents) == ZE_RESULT_ERROR_INVALID_ARGUMENT) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } ze_result_t ret = appendLaunchFunctionWithParams(hFunction, pThreadGroupDimensions, hEvent, numWaitEvents, phWaitEvents, false, false); if (ret != ZE_RESULT_SUCCESS) { return ret; } return ret; } template ze_result_t CommandListCoreFamily::appendLaunchCooperativeKernel(ze_kernel_handle_t hKernel, const ze_group_count_t *pLaunchFuncArgs, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } template ze_result_t CommandListCoreFamily::appendLaunchFunctionIndirect(ze_kernel_handle_t hFunction, const ze_group_count_t *pDispatchArgumentsBuffer, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { if (addEventsToCmdList(hEvent, numWaitEvents, phWaitEvents) == ZE_RESULT_ERROR_INVALID_ARGUMENT) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } ze_result_t ret = appendLaunchFunctionWithParams(hFunction, pDispatchArgumentsBuffer, nullptr, 0, nullptr, true, false); if (hEvent) { appendSignalEventPostWalker(hEvent); } return ret; } template ze_result_t CommandListCoreFamily::appendLaunchMultipleFunctionsIndirect(uint32_t numFunctions, const ze_kernel_handle_t *phFunctions, const uint32_t *pNumLaunchArguments, const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { if (addEventsToCmdList(hEvent, numWaitEvents, phWaitEvents) == ZE_RESULT_ERROR_INVALID_ARGUMENT) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; for (uint32_t i = 0; i < numFunctions; i++) { NEO::EncodeMathMMIO::encodeGreaterThanPredicate(commandContainer, reinterpret_cast(pNumLaunchArguments), i); auto ret = appendLaunchFunctionWithParams(phFunctions[i], &pLaunchArgumentsBuffer[i], nullptr, 0, nullptr, true, true); if (ret != ZE_RESULT_SUCCESS) { return ret; } } if (hEvent) { appendSignalEventPostWalker(hEvent); } return ZE_RESULT_SUCCESS; } template ze_result_t CommandListCoreFamily::appendEventReset(ze_event_handle_t hEvent) { using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION; auto event = Event::fromHandle(hEvent); commandContainer.addToResidencyContainer(&event->getAllocation()); NEO::MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( *commandContainer.getCommandStream(), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, event->getGpuAddress(), Event::STATE_CLEARED, true, commandContainer.getDevice()->getHardwareInfo()); return ZE_RESULT_SUCCESS; } template ze_result_t CommandListCoreFamily::appendBarrier(ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { if (addEventsToCmdList(hSignalEvent, numWaitEvents, phWaitEvents) == ZE_RESULT_ERROR_INVALID_ARGUMENT) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), false); if (hSignalEvent) { this->appendSignalEventPostWalker(hSignalEvent); } return ZE_RESULT_SUCCESS; } template ze_result_t CommandListCoreFamily::appendMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { if (addEventsToCmdList(hSignalEvent, numWaitEvents, phWaitEvents) == ZE_RESULT_ERROR_INVALID_ARGUMENT) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } applyMemoryRangesBarrier(numRanges, pRangeSizes, pRanges); if (hSignalEvent) { this->appendSignalEventPostWalker(hSignalEvent); } if (this->cmdListType == CommandListType::TYPE_IMMEDIATE) { executeCommandListImmediate(true); } return ZE_RESULT_SUCCESS; } template ze_result_t CommandListCoreFamily::appendImageCopyFromMemory(ze_image_handle_t hDstImage, const void *srcPtr, const ze_image_region_t *pDstRegion, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { auto image = Image::fromHandle(hDstImage); auto bytesPerPixel = image->getImageInfo().surfaceFormat->NumChannels * image->getImageInfo().surfaceFormat->PerChannelSizeInBytes; ze_image_region_t tmpRegion; if (pDstRegion == nullptr) { tmpRegion = {0, 0, 0, static_cast(image->getImageInfo().imgDesc.imageWidth), static_cast(image->getImageInfo().imgDesc.imageHeight), static_cast(image->getImageInfo().imgDesc.imageDepth)}; pDstRegion = &tmpRegion; } uint64_t bufferSize = getInputBufferSize(image->getImageInfo().imgDesc.imageType, bytesPerPixel, pDstRegion); auto allocationStruct = getAlignedAllocation(this->device, srcPtr, bufferSize); Kernel *builtinKernel = nullptr; switch (bytesPerPixel) { default: UNRECOVERABLE_IF(true); case 1u: builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::CopyBufferToImage3dBytes); break; case 2u: builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::CopyBufferToImage3d2Bytes); break; case 4u: builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::CopyBufferToImage3d4Bytes); break; case 8u: builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::CopyBufferToImage3d8Bytes); break; case 16u: builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::CopyBufferToImage3d16Bytes); break; } builtinKernel->setArgBufferWithAlloc(0u, reinterpret_cast(&allocationStruct.alignedAllocationPtr), allocationStruct.alloc); builtinKernel->setArgRedescribedImage(1u, hDstImage); builtinKernel->setArgumentValue(2u, sizeof(size_t), &allocationStruct.offset); uint32_t origin[] = { static_cast(pDstRegion->originX), static_cast(pDstRegion->originY), static_cast(pDstRegion->originZ), 0}; builtinKernel->setArgumentValue(3u, sizeof(origin), &origin); auto srcRowPitch = pDstRegion->width * bytesPerPixel; auto srcSlicePitch = (image->getImageInfo().imgDesc.imageType == NEO::ImageType::Image1DArray ? 1 : pDstRegion->height) * srcRowPitch; uint32_t pitch[] = { srcRowPitch, srcSlicePitch}; builtinKernel->setArgumentValue(4u, sizeof(pitch), &pitch); uint32_t groupSizeX = pDstRegion->width; uint32_t groupSizeY = pDstRegion->height; uint32_t groupSizeZ = pDstRegion->depth; if (builtinKernel->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (pDstRegion->width % groupSizeX || pDstRegion->height % groupSizeY || pDstRegion->depth % groupSizeZ) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } ze_group_count_t functionArgs{pDstRegion->width / groupSizeX, pDstRegion->height / groupSizeY, pDstRegion->depth / groupSizeZ}; return this->appendLaunchFunction(builtinKernel->toHandle(), &functionArgs, hEvent, numWaitEvents, phWaitEvents); } template ze_result_t CommandListCoreFamily::appendImageCopyToMemory(void *dstPtr, ze_image_handle_t hSrcImage, const ze_image_region_t *pSrcRegion, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { auto image = Image::fromHandle(hSrcImage); auto bytesPerPixel = image->getImageInfo().surfaceFormat->NumChannels * image->getImageInfo().surfaceFormat->PerChannelSizeInBytes; ze_image_region_t tmpRegion; if (pSrcRegion == nullptr) { tmpRegion = {0, 0, 0, static_cast(image->getImageInfo().imgDesc.imageWidth), static_cast(image->getImageInfo().imgDesc.imageHeight), static_cast(image->getImageInfo().imgDesc.imageDepth)}; pSrcRegion = &tmpRegion; } uint64_t bufferSize = getInputBufferSize(image->getImageInfo().imgDesc.imageType, bytesPerPixel, pSrcRegion); auto allocationStruct = getAlignedAllocation(this->device, dstPtr, bufferSize); Kernel *builtinKernel = nullptr; switch (bytesPerPixel) { default: UNRECOVERABLE_IF(true); case 1u: builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::CopyImage3dToBufferBytes); break; case 2u: builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::CopyImage3dToBuffer2Bytes); break; case 4u: builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::CopyImage3dToBuffer4Bytes); break; case 8u: builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::CopyImage3dToBuffer8Bytes); break; case 16u: builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::CopyImage3dToBuffer16Bytes); break; } builtinKernel->setArgRedescribedImage(0u, hSrcImage); builtinKernel->setArgBufferWithAlloc(1u, reinterpret_cast(&allocationStruct.alignedAllocationPtr), allocationStruct.alloc); uint32_t origin[] = { static_cast(pSrcRegion->originX), static_cast(pSrcRegion->originY), static_cast(pSrcRegion->originZ), 0}; builtinKernel->setArgumentValue(2u, sizeof(origin), &origin); builtinKernel->setArgumentValue(3u, sizeof(size_t), &allocationStruct.offset); auto srcRowPitch = pSrcRegion->width * bytesPerPixel; auto srcSlicePitch = (image->getImageInfo().imgDesc.imageType == NEO::ImageType::Image1DArray ? 1 : pSrcRegion->height) * srcRowPitch; uint32_t pitch[] = { srcRowPitch, srcSlicePitch}; builtinKernel->setArgumentValue(4u, sizeof(pitch), &pitch); uint32_t groupSizeX = pSrcRegion->width; uint32_t groupSizeY = pSrcRegion->height; uint32_t groupSizeZ = pSrcRegion->depth; if (builtinKernel->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (pSrcRegion->width % groupSizeX || pSrcRegion->height % groupSizeY || pSrcRegion->depth % groupSizeZ) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } ze_group_count_t functionArgs{pSrcRegion->width / groupSizeX, pSrcRegion->height / groupSizeY, pSrcRegion->depth / groupSizeZ}; auto ret = CommandListCoreFamily::appendLaunchFunction(builtinKernel->toHandle(), &functionArgs, hEvent, numWaitEvents, phWaitEvents); if (allocationStruct.needsFlush) { NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), true); } return ret; } template ze_result_t CommandListCoreFamily::appendImageCopyRegion(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, const ze_image_region_t *pDstRegion, const ze_image_region_t *pSrcRegion, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { auto function = device->getBuiltinFunctionsLib()->getFunction(Builtin::CopyImageRegion); auto dstImage = L0::Image::fromHandle(hDstImage); auto srcImage = L0::Image::fromHandle(hSrcImage); cl_int4 srcOffset, dstOffset; ze_image_region_t srcRegion, dstRegion; if (pSrcRegion != nullptr) { srcRegion = *pSrcRegion; } else { ze_image_desc_t srcDesc = srcImage->getImageDesc(); srcRegion = {0, 0, 0, static_cast(srcDesc.width), srcDesc.height, srcDesc.depth}; } srcOffset.x = static_cast(srcRegion.originX); srcOffset.y = static_cast(srcRegion.originY); srcOffset.z = static_cast(srcRegion.originZ); srcOffset.w = 0; if (pDstRegion != nullptr) { dstRegion = *pDstRegion; } else { ze_image_desc_t dstDesc = dstImage->getImageDesc(); dstRegion = {0, 0, 0, static_cast(dstDesc.width), dstDesc.height, dstDesc.depth}; } dstOffset.x = static_cast(dstRegion.originX); dstOffset.y = static_cast(dstRegion.originY); dstOffset.z = static_cast(dstRegion.originZ); dstOffset.w = 0; if (srcRegion.width != dstRegion.width || srcRegion.height != dstRegion.height || srcRegion.depth != dstRegion.depth) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } uint32_t groupSizeX = srcRegion.width; uint32_t groupSizeY = srcRegion.height; uint32_t groupSizeZ = srcRegion.depth; if (function->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (function->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (srcRegion.width % groupSizeX || srcRegion.height % groupSizeY || srcRegion.depth % groupSizeZ) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } ze_group_count_t functionArgs{srcRegion.width / groupSizeX, srcRegion.height / groupSizeY, srcRegion.depth / groupSizeZ}; function->setArgRedescribedImage(0, hSrcImage); function->setArgRedescribedImage(1, hDstImage); function->setArgumentValue(2, sizeof(srcOffset), &srcOffset); function->setArgumentValue(3, sizeof(dstOffset), &dstOffset); appendEventForProfiling(hEvent, true); return this->CommandListCoreFamily::appendLaunchFunction(function->toHandle(), &functionArgs, hEvent, numWaitEvents, phWaitEvents); } template ze_result_t CommandListCoreFamily::appendImageCopy(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { return this->appendImageCopyRegion(hDstImage, hSrcImage, nullptr, nullptr, hEvent, numWaitEvents, phWaitEvents); } template ze_result_t CommandListCoreFamily::appendMemAdvise(ze_device_handle_t hDevice, const void *ptr, size_t size, ze_memory_advice_t advice) { auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(ptr); if (allocData) { if (allocData->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY) { return ZE_RESULT_SUCCESS; } else { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } } DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } template ze_result_t CommandListCoreFamily::appendMemoryCopyKernelWithGA(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc, uint64_t dstOffset, void *srcPtr, NEO::GraphicsAllocation *srcPtrAlloc, uint64_t srcOffset, uint32_t size, uint32_t elementSize, Builtin builtin) { auto builtinFunction = device->getBuiltinFunctionsLib()->getFunction(builtin); uint32_t groupSizeX = builtinFunction->getImmutableData() ->getDescriptor() .kernelAttributes.simdSize; uint32_t groupSizeY = 1u; uint32_t groupSizeZ = 1u; if (builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ)) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } builtinFunction->setArgBufferWithAlloc(0u, dstPtr, dstPtrAlloc); builtinFunction->setArgBufferWithAlloc(1u, srcPtr, srcPtrAlloc); uint32_t elems = size / elementSize; builtinFunction->setArgumentValue(2, sizeof(elems), &elems); builtinFunction->setArgumentValue(3, sizeof(dstOffset), &dstOffset); builtinFunction->setArgumentValue(4, sizeof(srcOffset), &srcOffset); uint32_t groups = (size + ((groupSizeX * elementSize) - 1)) / (groupSizeX * elementSize); ze_group_count_t dispatchFuncArgs{groups, 1u, 1u}; return CommandListCoreFamily::appendLaunchFunction(builtinFunction->toHandle(), &dispatchFuncArgs, nullptr, 0, nullptr); } template ze_result_t CommandListCoreFamily::appendPageFaultCopy(NEO::GraphicsAllocation *dstptr, NEO::GraphicsAllocation *srcptr, size_t size, bool flushHost) { auto builtinFunction = device->getBuiltinFunctionsLib()->getPageFaultFunction(); uint32_t groupSizeX = builtinFunction->getImmutableData() ->getDescriptor() .kernelAttributes.simdSize; uint32_t groupSizeY = 1u; uint32_t groupSizeZ = 1u; if (builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ)) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } auto dstValPtr = static_cast(dstptr->getGpuAddress()); auto srcValPtr = static_cast(srcptr->getGpuAddress()); builtinFunction->setArgBufferWithAlloc(0, reinterpret_cast(&dstValPtr), dstptr); builtinFunction->setArgBufferWithAlloc(1, reinterpret_cast(&srcValPtr), srcptr); builtinFunction->setArgumentValue(2, sizeof(size), &size); uint32_t groups = (static_cast(size) + ((groupSizeX)-1)) / (groupSizeX); ze_group_count_t dispatchFuncArgs{groups, 1u, 1u}; ze_result_t ret = appendLaunchFunctionWithParams(builtinFunction->toHandle(), &dispatchFuncArgs, nullptr, 0, nullptr, false, false); if (ret != ZE_RESULT_SUCCESS) { return ret; } if (flushHost) { NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), true); } return ret; } template ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, const void *srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; uintptr_t start = reinterpret_cast(dstptr); size_t middleAlignment = MemoryConstants::cacheLineSize; size_t middleElSize = sizeof(uint32_t) * 4; uintptr_t leftSize = start % middleAlignment; leftSize = (leftSize > 0) ? (middleAlignment - leftSize) : 0; leftSize = std::min(leftSize, size); uintptr_t rightSize = (start + size) % middleAlignment; rightSize = std::min(rightSize, size - leftSize); uintptr_t middleSizeBytes = size - leftSize - rightSize; if (!isAligned<4>(reinterpret_cast(srcptr) + leftSize)) { leftSize += middleSizeBytes; middleSizeBytes = 0; } DEBUG_BREAK_IF(size != leftSize + middleSizeBytes + rightSize); auto dstAllocationStruct = getAlignedAllocation(this->device, dstptr, size); auto srcAllocationStruct = getAlignedAllocation(this->device, srcptr, size); ze_result_t ret = ZE_RESULT_SUCCESS; appendEventForProfiling(hSignalEvent, true); if (ret == ZE_RESULT_SUCCESS && leftSize) { ret = appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAllocationStruct.alignedAllocationPtr), dstAllocationStruct.alloc, dstAllocationStruct.offset, reinterpret_cast(&srcAllocationStruct.alignedAllocationPtr), srcAllocationStruct.alloc, srcAllocationStruct.offset, static_cast(leftSize), 1, Builtin::CopyBufferToBufferSide); } if (ret == ZE_RESULT_SUCCESS && middleSizeBytes) { ret = appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAllocationStruct.alignedAllocationPtr), dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset, reinterpret_cast(&srcAllocationStruct.alignedAllocationPtr), srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, static_cast(middleSizeBytes), static_cast(middleElSize), Builtin::CopyBufferToBufferMiddle); } if (ret == ZE_RESULT_SUCCESS && rightSize) { ret = appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAllocationStruct.alignedAllocationPtr), dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset, reinterpret_cast(&srcAllocationStruct.alignedAllocationPtr), srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, static_cast(rightSize), 1u, Builtin::CopyBufferToBufferSide); } if (hSignalEvent) { this->appendSignalEventPostWalker(hSignalEvent); } if (dstAllocationStruct.needsFlush) { NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), true); } return ret; } template ze_result_t CommandListCoreFamily::appendMemoryCopyRegion(void *dstPtr, const ze_copy_region_t *dstRegion, uint32_t dstPitch, uint32_t dstSlicePitch, const void *srcPtr, const ze_copy_region_t *srcRegion, uint32_t srcPitch, uint32_t srcSlicePitch, ze_event_handle_t hSignalEvent) { uintptr_t destinationPtr = reinterpret_cast(dstPtr); size_t dstOffset = 0; NEO::EncodeSurfaceState::getSshAlignedPointer(destinationPtr, dstOffset); void *alignedDstPtr = reinterpret_cast(destinationPtr); uintptr_t sourcePtr = reinterpret_cast(const_cast(srcPtr)); size_t srcOffset = 0; NEO::EncodeSurfaceState::getSshAlignedPointer(sourcePtr, srcOffset); void *alignedSrcPtr = reinterpret_cast(sourcePtr); size_t dstSize = 0; size_t srcSize = 0; if (srcRegion->depth > 1) { uint hostPtrDstOffset = dstRegion->originX + ((dstRegion->originY) * dstPitch) + ((dstRegion->originZ) * dstSlicePitch); uint hostPtrSrcOffset = srcRegion->originX + ((srcRegion->originY) * srcPitch) + ((srcRegion->originZ) * srcSlicePitch); dstSize = (dstRegion->width * dstRegion->height * dstRegion->depth) + dstOffset + hostPtrDstOffset; srcSize = (srcRegion->width * srcRegion->height * srcRegion->depth) + srcOffset + hostPtrSrcOffset; } else { uint hostPtrDstOffset = dstRegion->originX + ((dstRegion->originY) * dstPitch); uint hostPtrSrcOffset = srcRegion->originX + ((srcRegion->originY) * srcPitch); dstSize = (dstRegion->width * dstRegion->height) + dstOffset + hostPtrDstOffset; srcSize = (srcRegion->width * srcRegion->height) + srcOffset + hostPtrSrcOffset; } NEO::SvmAllocationData *allocData = nullptr; bool hostPointerNeedsFlush = false; bool dstAllocFound = device->getDriverHandle()->findAllocationDataForRange(alignedDstPtr, dstSize, &allocData); if (dstAllocFound == false) { auto dstAlloc = device->getDriverHandle()->allocateManagedMemoryFromHostPtr(device, alignedDstPtr, dstSize, this); commandContainer.getDeallocationContainer().push_back(dstAlloc); hostPointerNeedsFlush = true; } else { if (allocData->memoryType == InternalMemoryType::HOST_UNIFIED_MEMORY || allocData->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY) { hostPointerNeedsFlush = true; } } bool srcAllocFound = device->getDriverHandle()->findAllocationDataForRange(alignedSrcPtr, srcSize, nullptr); if (srcAllocFound == false) { auto srcAlloc = device->getDriverHandle()->allocateManagedMemoryFromHostPtr(device, alignedSrcPtr, dstSize, this); commandContainer.getDeallocationContainer().push_back(srcAlloc); } appendEventForProfiling(hSignalEvent, true); ze_result_t result = ZE_RESULT_SUCCESS; if (srcRegion->depth > 1) { result = this->appendMemoryCopyKernel3d(alignedDstPtr, alignedSrcPtr, Builtin::CopyBufferRectBytes3d, dstRegion, dstPitch, dstSlicePitch, dstOffset, srcRegion, srcPitch, srcSlicePitch, srcOffset, hSignalEvent, 0, nullptr); } else { result = this->appendMemoryCopyKernel2d(alignedDstPtr, alignedSrcPtr, Builtin::CopyBufferRectBytes2d, dstRegion, dstPitch, dstOffset, srcRegion, srcPitch, srcOffset, hSignalEvent, 0, nullptr); } if (result) { return result; } if (hostPointerNeedsFlush) { NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), true); } return ZE_RESULT_SUCCESS; } template ze_result_t CommandListCoreFamily::appendMemoryCopyKernel3d(const void *dstptr, const void *srcptr, Builtin builtin, const ze_copy_region_t *dstRegion, uint32_t dstPitch, uint32_t dstSlicePitch, size_t dstOffset, const ze_copy_region_t *srcRegion, uint32_t srcPitch, uint32_t srcSlicePitch, size_t srcOffset, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { auto builtinFunction = device->getBuiltinFunctionsLib()->getFunction(builtin); uint32_t groupSizeX = srcRegion->width; uint32_t groupSizeY = srcRegion->height; uint32_t groupSizeZ = srcRegion->depth; if (builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (srcRegion->width % groupSizeX || srcRegion->height % groupSizeY || srcRegion->depth % groupSizeZ) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } ze_group_count_t dispatchFuncArgs{srcRegion->width / groupSizeX, srcRegion->height / groupSizeY, srcRegion->depth / groupSizeZ}; uint srcOrigin[3] = {(srcRegion->originX + static_cast(srcOffset)), (srcRegion->originY), (srcRegion->originZ)}; uint dstOrigin[3] = {(dstRegion->originX + static_cast(dstOffset)), (dstRegion->originY), (srcRegion->originZ)}; uint srcPitches[2] = {(srcPitch), (srcSlicePitch)}; uint dstPitches[2] = {(dstPitch), (dstSlicePitch)}; builtinFunction->setArgumentValue(0, sizeof(dstptr), &srcptr); builtinFunction->setArgumentValue(1, sizeof(srcptr), &dstptr); builtinFunction->setArgumentValue(2, sizeof(srcOrigin), &srcOrigin); builtinFunction->setArgumentValue(3, sizeof(dstOrigin), &dstOrigin); builtinFunction->setArgumentValue(4, sizeof(srcPitches), &srcPitches); builtinFunction->setArgumentValue(5, sizeof(dstPitches), &dstPitches); return CommandListCoreFamily::appendLaunchFunction(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent, numWaitEvents, phWaitEvents); } template ze_result_t CommandListCoreFamily::appendMemoryCopyKernel2d(const void *dstptr, const void *srcptr, Builtin builtin, const ze_copy_region_t *dstRegion, uint32_t dstPitch, size_t dstOffset, const ze_copy_region_t *srcRegion, uint32_t srcPitch, size_t srcOffset, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { auto builtinFunction = device->getBuiltinFunctionsLib()->getFunction(builtin); uint32_t groupSizeX = srcRegion->width; uint32_t groupSizeY = srcRegion->height; uint32_t groupSizeZ = 1u; if (builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (srcRegion->width % groupSizeX || srcRegion->height % groupSizeY) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } ze_group_count_t dispatchFuncArgs{srcRegion->width / groupSizeX, srcRegion->height / groupSizeY, 1u}; uint srcOrigin[2] = {(srcRegion->originX + static_cast(srcOffset)), (srcRegion->originY)}; uint dstOrigin[2] = {(dstRegion->originX + static_cast(dstOffset)), (dstRegion->originY)}; builtinFunction->setArgumentValue(0, sizeof(dstptr), &srcptr); builtinFunction->setArgumentValue(1, sizeof(srcptr), &dstptr); builtinFunction->setArgumentValue(2, sizeof(srcOrigin), &srcOrigin); builtinFunction->setArgumentValue(3, sizeof(dstOrigin), &dstOrigin); builtinFunction->setArgumentValue(4, sizeof(srcPitch), &srcPitch); builtinFunction->setArgumentValue(5, sizeof(dstPitch), &dstPitch); return CommandListCoreFamily::appendLaunchFunction(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent, numWaitEvents, phWaitEvents); } template ze_result_t CommandListCoreFamily::appendMemoryPrefetch(const void *ptr, size_t count) { auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(ptr); if (allocData) { if (allocData->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY) { return ZE_RESULT_SUCCESS; } else { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } } DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } template ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, const void *pattern, size_t patternSize, size_t size, ze_event_handle_t hEvent) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; bool hostPointerNeedsFlush = false; NEO::SvmAllocationData *allocData = nullptr; bool dstAllocFound = device->getDriverHandle()->findAllocationDataForRange(ptr, size, &allocData); if (dstAllocFound == false) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } else { if (allocData->memoryType == InternalMemoryType::HOST_UNIFIED_MEMORY || allocData->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY) { hostPointerNeedsFlush = true; } } uintptr_t dstPtr = reinterpret_cast(ptr); size_t dstOffset = 0; NEO::EncodeSurfaceState::getSshAlignedPointer(dstPtr, dstOffset); uintptr_t srcPtr = reinterpret_cast(const_cast(pattern)); size_t srcOffset = 0; NEO::EncodeSurfaceState::getSshAlignedPointer(srcPtr, srcOffset); Kernel *builtinFunction = nullptr; uint32_t groupSizeX = 1u; if (patternSize == 1) { builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediate); groupSizeX = builtinFunction->getImmutableData()->getDescriptor().kernelAttributes.simdSize; if (builtinFunction->setGroupSize(groupSizeX, 1u, 1u)) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } uint32_t value = *(reinterpret_cast(const_cast(pattern))); builtinFunction->setArgumentValue(0, sizeof(dstPtr), &dstPtr); builtinFunction->setArgumentValue(1, sizeof(dstOffset), &dstOffset); builtinFunction->setArgumentValue(2, sizeof(value), &value); } else { builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferSSHOffset); auto patternAlloc = device->getDriverHandle()->allocateManagedMemoryFromHostPtr(device, reinterpret_cast(srcPtr), srcOffset + patternSize, this); if (patternAlloc == nullptr) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } commandContainer.getDeallocationContainer().push_back(patternAlloc); groupSizeX = static_cast(patternSize); if (builtinFunction->setGroupSize(groupSizeX, 1u, 1u)) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } builtinFunction->setArgumentValue(0, sizeof(dstPtr), &dstPtr); builtinFunction->setArgumentValue(1, sizeof(dstOffset), &dstOffset); builtinFunction->setArgumentValue(2, sizeof(srcPtr), &srcPtr); builtinFunction->setArgumentValue(3, sizeof(srcOffset), &srcOffset); } appendEventForProfiling(hEvent, true); uint32_t groups = static_cast(size) / groupSizeX; ze_group_count_t dispatchFuncArgs{groups, 1u, 1u}; ze_result_t res = CommandListCoreFamily::appendLaunchFunction(builtinFunction->toHandle(), &dispatchFuncArgs, nullptr, 0, nullptr); if (res) { return res; } uint32_t groupRemainderSizeX = static_cast(size) % groupSizeX; if (groupRemainderSizeX) { if (builtinFunction->setGroupSize(groupRemainderSizeX, 1u, 1u)) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } ze_group_count_t dispatchFuncArgs{1u, 1u, 1u}; dstPtr = dstPtr + (size - groupRemainderSizeX); dstOffset = 0; NEO::EncodeSurfaceState::getSshAlignedPointer(dstPtr, dstOffset); builtinFunction->setArgumentValue(0, sizeof(dstPtr), &dstPtr); builtinFunction->setArgumentValue(1, sizeof(dstOffset), &dstOffset); res = CommandListCoreFamily::appendLaunchFunction(builtinFunction->toHandle(), &dispatchFuncArgs, nullptr, 0, nullptr); } if (hEvent) { this->appendSignalEventPostWalker(hEvent); } if (hostPointerNeedsFlush) { NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), true); } return res; } template void CommandListCoreFamily::appendSignalEventPostWalker(ze_event_handle_t hEvent) { auto event = Event::fromHandle(hEvent); if (event->isTimestampEvent) { appendEventForProfiling(hEvent, false); } else { CommandListCoreFamily::appendSignalEvent(hEvent); } } template inline uint64_t CommandListCoreFamily::getInputBufferSize(NEO::ImageType imageType, uint64_t bytesPerPixel, const ze_image_region_t *region) { switch (imageType) { default: UNRECOVERABLE_IF(true); case NEO::ImageType::Image1D: case NEO::ImageType::Image1DArray: return bytesPerPixel * region->width; case NEO::ImageType::Image2D: case NEO::ImageType::Image2DArray: return bytesPerPixel * region->width * region->height; case NEO::ImageType::Image3D: return bytesPerPixel * region->width * region->height * region->depth; } } template inline AlignedAllocationData CommandListCoreFamily::getAlignedAllocation(Device *device, const void *buffer, uint64_t bufferSize) { NEO::SvmAllocationData *allocData = nullptr; bool srcAllocFound = device->getDriverHandle()->findAllocationDataForRange(const_cast(buffer), bufferSize, &allocData); NEO::GraphicsAllocation *alloc = nullptr; uintptr_t sourcePtr = reinterpret_cast(const_cast(buffer)); size_t offset = 0; NEO::EncodeSurfaceState::getSshAlignedPointer(sourcePtr, offset); uintptr_t alignedPtr = 0u; bool hostPointerNeedsFlush = false; if (srcAllocFound == false) { alloc = device->getDriverHandle()->allocateMemoryFromHostPtr(device, buffer, bufferSize); hostPtrMap.insert(std::make_pair(buffer, alloc)); alignedPtr = static_cast(alloc->getGpuAddress() - offset); } else { alloc = allocData->gpuAllocation; alignedPtr = reinterpret_cast(buffer) - offset; if (allocData->memoryType == InternalMemoryType::HOST_UNIFIED_MEMORY || allocData->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY) { hostPointerNeedsFlush = true; } } return {alignedPtr, offset, alloc, hostPointerNeedsFlush}; } template inline ze_result_t CommandListCoreFamily::addEventsToCmdList(ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { if (numWaitEvents > 0) { if (phWaitEvents) { CommandListCoreFamily::appendWaitOnEvents(numWaitEvents, phWaitEvents); } else { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } } appendEventForProfiling(hEvent, true); return ZE_RESULT_SUCCESS; } template ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_handle_t hEvent) { using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION; using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; auto event = Event::fromHandle(hEvent); commandContainer.addToResidencyContainer(&event->getAllocation()); bool dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true; NEO::MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( *commandContainer.getCommandStream(), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, event->getGpuAddress(), Event::STATE_SIGNALED, dcFlushEnable, commandContainer.getDevice()->getHardwareInfo()); return ZE_RESULT_SUCCESS; } template ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; uint64_t gpuAddr = 0; constexpr uint32_t eventStateClear = static_cast(-1); for (uint32_t i = 0; i < numEvents; i++) { auto event = Event::fromHandle(phEvent[i]); commandContainer.addToResidencyContainer(&event->getAllocation()); gpuAddr = event->getGpuAddress(); if (event->isTimestampEvent) { gpuAddr += event->getOffsetOfEventTimestampRegister(Event::CONTEXT_END); } NEO::HardwareCommandsHelper::programMiSemaphoreWait(*(commandContainer.getCommandStream()), gpuAddr, eventStateClear, COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); bool dcFlushEnable = (event->waitScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true; if (dcFlushEnable) { NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), true); } } return ZE_RESULT_SUCCESS; } template ze_result_t CommandListCoreFamily::reserveSpace(size_t size, void **ptr) { auto availableSpace = commandContainer.getCommandStream()->getAvailableSpace(); if (availableSpace < size) { *ptr = nullptr; } else { *ptr = commandContainer.getCommandStream()->getSpace(size); } return ZE_RESULT_SUCCESS; } template ze_result_t CommandListCoreFamily::reset() { printfFunctionContainer.clear(); removeDeallocationContainerData(); removeHostPtrAllocations(); commandContainer.reset(); NEO::EncodeStateBaseAddress::encode(commandContainer); commandContainer.setDirtyStateForAllHeaps(false); return ZE_RESULT_SUCCESS; } template ze_result_t CommandListCoreFamily::prepareIndirectParams(const ze_group_count_t *pThreadGroupDimensions) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(pThreadGroupDimensions); if (allocData) { auto alloc = allocData->gpuAllocation; commandContainer.addToResidencyContainer(alloc); NEO::EncodeSetMMIO::encodeMEM(commandContainer, GPUGPU_DISPATCHDIMX, ptrOffset(alloc->getGpuAddress(), offsetof(ze_group_count_t, groupCountX))); NEO::EncodeSetMMIO::encodeMEM(commandContainer, GPUGPU_DISPATCHDIMY, ptrOffset(alloc->getGpuAddress(), offsetof(ze_group_count_t, groupCountY))); NEO::EncodeSetMMIO::encodeMEM(commandContainer, GPUGPU_DISPATCHDIMZ, ptrOffset(alloc->getGpuAddress(), offsetof(ze_group_count_t, groupCountZ))); } return ZE_RESULT_SUCCESS; } template ze_result_t CommandListCoreFamily::setGroupSizeIndirect(uint32_t offsets[3], void *crossThreadAddress, uint32_t lws[3]) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; NEO::EncodeIndirectParams::setGroupSizeIndirect(commandContainer, offsets, crossThreadAddress, lws); return ZE_RESULT_SUCCESS; } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/cmdlist/cmdlist_hw_base.inl000066400000000000000000000122771363734646600272560ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/helpers/register_offsets.h" #include "shared/source/helpers/simd_helper.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/residency_container.h" #include "shared/source/unified_memory/unified_memory.h" #include namespace L0 { struct DeviceImp; template ze_result_t CommandListCoreFamily::appendLaunchFunctionWithParams(ze_kernel_handle_t hFunction, const ze_group_count_t *pThreadGroupDimensions, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool isIndirect, bool isPredicate) { const auto function = Kernel::fromHandle(hFunction); UNRECOVERABLE_IF(function == nullptr); const auto functionImmutableData = function->getImmutableData(); commandListPerThreadScratchSize = std::max(commandListPerThreadScratchSize, function->getPerThreadScratchSize()); auto functionPreemptionMode = obtainFunctionPreemptionMode(function); commandListPreemptionMode = std::min(commandListPreemptionMode, functionPreemptionMode); if (!isIndirect) { function->setGroupCount(pThreadGroupDimensions->groupCountX, pThreadGroupDimensions->groupCountY, pThreadGroupDimensions->groupCountZ); } if (isIndirect && pThreadGroupDimensions) { prepareIndirectParams(pThreadGroupDimensions); } if (function->hasIndirectAllocationsAllowed()) { UnifiedMemoryControls unifiedMemoryControls = function->getUnifiedMemoryControls(); auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager(); auto &residencyContainer = commandContainer.getResidencyContainer(); svmAllocsManager->addInternalAllocationsToResidencyContainer(residencyContainer, unifiedMemoryControls.generateMask()); } NEO::EncodeDispatchKernel::encode(commandContainer, reinterpret_cast(pThreadGroupDimensions), isIndirect, isPredicate, function, 0, device->getNEODevice(), commandListPreemptionMode); if (hEvent) { appendSignalEventPostWalker(hEvent); } commandContainer.addToResidencyContainer(functionImmutableData->getIsaGraphicsAllocation()); auto &residencyContainer = function->getResidencyContainer(); for (auto resource : residencyContainer) { commandContainer.addToResidencyContainer(resource); } if (functionImmutableData->getDescriptor().kernelAttributes.flags.usesPrintf) { storePrintfFunction(function); } return ZE_RESULT_SUCCESS; } template void CommandListCoreFamily::appendEventForProfiling(ze_event_handle_t hEvent, bool beforeWalker) { if (!hEvent) { return; } auto event = Event::fromHandle(hEvent); if (!event->isTimestampEvent) { return; } uint64_t timeStampAddress = 0; using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; commandContainer.addToResidencyContainer(&event->getAllocation()); if (beforeWalker) { timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_START_LOW); NEO::EncodeStoreMMIO::encode(commandContainer, REG_GLOBAL_TIMESTAMP_LDW, timeStampAddress); timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_START_HIGH); NEO::EncodeStoreMMIO::encode(commandContainer, REG_GLOBAL_TIMESTAMP_UN, timeStampAddress); timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::CONTEXT_START); NEO::EncodeStoreMMIO::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress); } else { timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::CONTEXT_END); NEO::EncodeStoreMMIO::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress); timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_END); bool dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true; NEO::MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( *(commandContainer.getCommandStream()), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP, timeStampAddress, 0llu, dcFlushEnable, device->getHwInfo()); } } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h000066400000000000000000000076401363734646600277450ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/cmdlist/cmdlist_hw.h" namespace L0 { struct EventPool; struct Event; template struct CommandListCoreFamilyImmediate : public CommandListCoreFamily { using BaseClass = CommandListCoreFamily; using BaseClass::executeCommandListImmediate; using BaseClass::BaseClass; CommandListCoreFamilyImmediate() {} CommandListCoreFamilyImmediate(uint32_t numIddsPerBlock) : CommandListCoreFamily(numIddsPerBlock) {} ze_result_t appendLaunchFunction(ze_kernel_handle_t hFunction, const ze_group_count_t *pThreadGroupDimensions, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendLaunchFunctionIndirect(ze_kernel_handle_t hFunction, const ze_group_count_t *pDispatchArgumentsBuffer, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendBarrier(ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendMemoryCopy(void *dstptr, const void *srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendMemoryCopyRegion(void *dstPtr, const ze_copy_region_t *dstRegion, uint32_t dstPitch, uint32_t dstSlicePitch, const void *srcPtr, const ze_copy_region_t *srcRegion, uint32_t srcPitch, uint32_t srcSlicePitch, ze_event_handle_t hSignalEvent) override; ze_result_t appendMemoryFill(void *ptr, const void *pattern, size_t patternSize, size_t size, ze_event_handle_t hEvent) override; ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override; ze_result_t appendEventReset(ze_event_handle_t hEvent) override; ze_result_t appendPageFaultCopy(NEO::GraphicsAllocation *dstptr, NEO::GraphicsAllocation *srcptr, size_t size, bool flushHost) override; ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent) override; ze_result_t appendImageCopyFromMemory(ze_image_handle_t hDstImage, const void *srcPtr, const ze_image_region_t *pDstRegion, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendImageCopyToMemory(void *dstPtr, ze_image_handle_t hSrcImage, const ze_image_region_t *pSrcRegion, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; }; template struct CommandListImmediateProductFamily; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl000066400000000000000000000147421363734646600303010ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" namespace L0 { template ze_result_t CommandListCoreFamilyImmediate::appendLaunchFunction( ze_kernel_handle_t hFunction, const ze_group_count_t *pThreadGroupDimensions, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { auto ret = CommandListCoreFamily::appendLaunchFunction(hFunction, pThreadGroupDimensions, hEvent, numWaitEvents, phWaitEvents); if (ret == ZE_RESULT_SUCCESS) { executeCommandListImmediate(true); } return ret; } template ze_result_t CommandListCoreFamilyImmediate::appendLaunchFunctionIndirect( ze_kernel_handle_t hFunction, const ze_group_count_t *pDispatchArgumentsBuffer, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { auto ret = CommandListCoreFamily::appendLaunchFunctionIndirect(hFunction, pDispatchArgumentsBuffer, hEvent, numWaitEvents, phWaitEvents); if (ret == ZE_RESULT_SUCCESS) { executeCommandListImmediate(true); } return ret; } template ze_result_t CommandListCoreFamilyImmediate::appendBarrier( ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { auto ret = CommandListCoreFamily::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents); if (ret == ZE_RESULT_SUCCESS) { executeCommandListImmediate(true); } return ret; } template ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopy( void *dstptr, const void *srcptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { auto ret = CommandListCoreFamily::appendMemoryCopy(dstptr, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents); if (ret == ZE_RESULT_SUCCESS) { executeCommandListImmediate(true); } return ret; } template ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopyRegion( void *dstPtr, const ze_copy_region_t *dstRegion, uint32_t dstPitch, uint32_t dstSlicePitch, const void *srcPtr, const ze_copy_region_t *srcRegion, uint32_t srcPitch, uint32_t srcSlicePitch, ze_event_handle_t hSignalEvent) { auto ret = CommandListCoreFamily::appendMemoryCopyRegion(dstPtr, dstRegion, dstPitch, dstSlicePitch, srcPtr, srcRegion, srcPitch, srcSlicePitch, hSignalEvent); if (ret == ZE_RESULT_SUCCESS) { executeCommandListImmediate(true); } return ret; } template ze_result_t CommandListCoreFamilyImmediate::appendMemoryFill(void *ptr, const void *pattern, size_t patternSize, size_t size, ze_event_handle_t hEvent) { auto ret = CommandListCoreFamily::appendMemoryFill(ptr, pattern, patternSize, size, hEvent); if (ret == ZE_RESULT_SUCCESS) { executeCommandListImmediate(true); } return ret; } template ze_result_t CommandListCoreFamilyImmediate::appendSignalEvent(ze_event_handle_t hEvent) { auto ret = CommandListCoreFamily::appendSignalEvent(hEvent); if (ret == ZE_RESULT_SUCCESS) { executeCommandListImmediate(true); } return ret; } template ze_result_t CommandListCoreFamilyImmediate::appendEventReset(ze_event_handle_t hEvent) { auto ret = CommandListCoreFamily::appendEventReset(hEvent); if (ret == ZE_RESULT_SUCCESS) { executeCommandListImmediate(true); } return ret; } template ze_result_t CommandListCoreFamilyImmediate::appendPageFaultCopy(NEO::GraphicsAllocation *dstptr, NEO::GraphicsAllocation *srcptr, size_t size, bool flushHost) { auto ret = CommandListCoreFamily::appendPageFaultCopy(dstptr, srcptr, size, flushHost); if (ret == ZE_RESULT_SUCCESS) { executeCommandListImmediate(false); } return ret; } template ze_result_t CommandListCoreFamilyImmediate::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent) { auto ret = CommandListCoreFamily::appendWaitOnEvents(numEvents, phEvent); if (ret == ZE_RESULT_SUCCESS) { executeCommandListImmediate(true); } return ret; } template ze_result_t CommandListCoreFamilyImmediate::appendImageCopyFromMemory( ze_image_handle_t hDstImage, const void *srcPtr, const ze_image_region_t *pDstRegion, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { auto ret = CommandListCoreFamily::appendImageCopyFromMemory(hDstImage, srcPtr, pDstRegion, hEvent, numWaitEvents, phWaitEvents); if (ret == ZE_RESULT_SUCCESS) { executeCommandListImmediate(true); } return ret; } template ze_result_t CommandListCoreFamilyImmediate::appendImageCopyToMemory( void *dstPtr, ze_image_handle_t hSrcImage, const ze_image_region_t *pSrcRegion, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { auto ret = CommandListCoreFamily::appendImageCopyToMemory(dstPtr, hSrcImage, pSrcRegion, hEvent, numWaitEvents, phWaitEvents); if (ret == ZE_RESULT_SUCCESS) { executeCommandListImmediate(true); } return ret; } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/cmdlist/cmdlist_imp.cpp000066400000000000000000000066531363734646600264340ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/cmdlist/cmdlist_imp.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/device/device.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/tools/source/metrics/metric.h" #include "igfxfmid.h" #include namespace L0 { CommandListAllocatorFn commandListFactory[IGFX_MAX_PRODUCT] = {}; CommandListAllocatorFn commandListFactoryImmediate[IGFX_MAX_PRODUCT] = {}; ze_result_t CommandListImp::destroy() { delete this; return ZE_RESULT_SUCCESS; } ze_result_t CommandListImp::appendMetricMemoryBarrier() { return MetricQuery::appendMemoryBarrier(*this); } ze_result_t CommandListImp::appendMetricTracerMarker(zet_metric_tracer_handle_t hMetricTracer, uint32_t value) { return MetricQuery::appendTracerMarker(*this, hMetricTracer, value); } ze_result_t CommandListImp::appendMetricQueryBegin(zet_metric_query_handle_t hMetricQuery) { return MetricQuery::fromHandle(hMetricQuery)->appendBegin(*this); } ze_result_t CommandListImp::appendMetricQueryEnd(zet_metric_query_handle_t hMetricQuery, ze_event_handle_t hCompletionEvent) { return MetricQuery::fromHandle(hMetricQuery)->appendEnd(*this, hCompletionEvent); } CommandList *CommandList::create(uint32_t productFamily, Device *device) { CommandListAllocatorFn allocator = nullptr; if (productFamily < IGFX_MAX_PRODUCT) { allocator = commandListFactory[productFamily]; } CommandListImp *commandList = nullptr; if (allocator) { commandList = static_cast((*allocator)(CommandList::defaultNumIddsPerBlock)); commandList->initialize(device); } return commandList; } CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device, const ze_command_queue_desc_t *desc, bool internalUsage) { auto deviceImp = static_cast(device); NEO::CommandStreamReceiver *csr = nullptr; if (internalUsage) { csr = deviceImp->neoDevice->getInternalEngine().commandStreamReceiver; } else { csr = deviceImp->neoDevice->getDefaultEngine().commandStreamReceiver; } auto commandQueue = CommandQueue::create(productFamily, device, csr, desc); if (!commandQueue) { return nullptr; } CommandListAllocatorFn allocator = nullptr; if (productFamily < IGFX_MAX_PRODUCT) { allocator = commandListFactoryImmediate[productFamily]; } CommandListImp *commandList = nullptr; if (allocator) { commandList = static_cast((*allocator)(CommandList::commandListimmediateIddsPerBlock)); commandList->initialize(device); } if (!commandList) { commandQueue->destroy(); return nullptr; } commandList->cmdQImmediate = commandQueue; commandList->cmdListType = CommandListType::TYPE_IMMEDIATE; commandList->cmdQImmediateDesc = desc; commandList->commandListPreemptionMode = device->getDevicePreemptionMode(); return commandList; } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/cmdlist/cmdlist_imp.h000066400000000000000000000016451363734646600260750ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/device/device.h" namespace L0 { struct CommandListImp : CommandList { CommandListImp() {} CommandListImp(uint32_t numIddsPerBlock) : CommandList(numIddsPerBlock) {} ze_result_t destroy() override; ze_result_t appendMetricMemoryBarrier() override; ze_result_t appendMetricTracerMarker(zet_metric_tracer_handle_t hMetricTracer, uint32_t value) override; ze_result_t appendMetricQueryBegin(zet_metric_query_handle_t hMetricQuery) override; ze_result_t appendMetricQueryEnd(zet_metric_query_handle_t hMetricQuery, ze_event_handle_t hCompletionEvent) override; protected: ~CommandListImp() override = default; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/cmdqueue/000077500000000000000000000000001363734646600235635ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/cmdqueue/cmdqueue.cpp000066400000000000000000000136371363734646600261110ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/memory_manager/memory_manager.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/cmdqueue/cmdqueue_imp.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/device/device_imp.h" #include "hw_helpers.h" #include "igfxfmid.h" namespace L0 { CommandQueueAllocatorFn commandQueueFactory[IGFX_MAX_PRODUCT] = {}; ze_result_t CommandQueueImp::destroy() { delete this; return ZE_RESULT_SUCCESS; } void CommandQueueImp::initialize() { buffers.initialize(device, totalCmdBufferSize); NEO::GraphicsAllocation *bufferAllocation = buffers.getCurrentBufferAllocation(); commandStream = new NEO::LinearStream(bufferAllocation->getUnderlyingBuffer(), defaultQueueCmdBufferSize); UNRECOVERABLE_IF(commandStream == nullptr); commandStream->replaceGraphicsAllocation(bufferAllocation); } void CommandQueueImp::reserveLinearStreamSize(size_t size) { UNRECOVERABLE_IF(commandStream == nullptr); if (commandStream->getAvailableSpace() < size) { buffers.switchBuffers(csr); NEO::GraphicsAllocation *nextBufferAllocation = buffers.getCurrentBufferAllocation(); commandStream->replaceBuffer(nextBufferAllocation->getUnderlyingBuffer(), defaultQueueCmdBufferSize); commandStream->replaceGraphicsAllocation(nextBufferAllocation); } } void CommandQueueImp::submitBatchBuffer(size_t offset, NEO::ResidencyContainer &residencyContainer, void *endingCmdPtr) { UNRECOVERABLE_IF(csr == nullptr); NEO::BatchBuffer batchBuffer(commandStream->getGraphicsAllocation(), offset, 0u, nullptr, false, false, NEO::QueueThrottle::HIGH, NEO::QueueSliceCount::defaultSliceCount, commandStream->getUsed(), commandStream, endingCmdPtr); csr->submitBatchBuffer(batchBuffer, residencyContainer); buffers.setCurrentFlushStamp(csr->obtainCurrentFlushStamp()); } ze_result_t CommandQueueImp::synchronize(uint32_t timeout) { return synchronizeByPollingForTaskCount(timeout); } ze_result_t CommandQueueImp::synchronizeByPollingForTaskCount(uint32_t timeout) { UNRECOVERABLE_IF(csr == nullptr); auto taskCountToWait = this->taskCount; waitForTaskCountWithKmdNotifyFallbackHelper(csr, this->taskCount, 0, false, false); bool enableTimeout = (timeout != std::numeric_limits::max()); csr->waitForCompletionWithTimeout(enableTimeout, timeout, this->taskCount); if (*csr->getTagAddress() < taskCountToWait) { return ZE_RESULT_NOT_READY; } printFunctionsPrintfOutput(); return ZE_RESULT_SUCCESS; } void CommandQueueImp::printFunctionsPrintfOutput() { size_t size = this->printfFunctionContainer.size(); for (size_t i = 0; i < size; i++) { this->printfFunctionContainer[i]->printPrintfOutput(); } this->printfFunctionContainer.clear(); } CommandQueue *CommandQueue::create(uint32_t productFamily, Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) { CommandQueueAllocatorFn allocator = nullptr; if (productFamily < IGFX_MAX_PRODUCT) { allocator = commandQueueFactory[productFamily]; } CommandQueueImp *commandQueue = nullptr; if (allocator) { commandQueue = static_cast((*allocator)(device, csr, desc)); commandQueue->initialize(); } return commandQueue; } ze_command_queue_mode_t CommandQueueImp::getSynchronousMode() { return desc.mode; } void CommandQueueImp::CommandBufferManager::initialize(Device *device, size_t sizeRequested) { size_t alignedSize = alignUp(sizeRequested, MemoryConstants::pageSize64k); NEO::AllocationProperties properties{device->getRootDeviceIndex(), true, alignedSize, NEO::GraphicsAllocation::AllocationType::COMMAND_BUFFER, device->isMultiDeviceCapable(), false, NEO::SubDevice::unspecifiedSubDeviceIndex}; buffers[BUFFER_ALLOCATION::FIRST] = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); UNRECOVERABLE_IF(nullptr == buffers[BUFFER_ALLOCATION::FIRST]); memset(buffers[BUFFER_ALLOCATION::FIRST]->getUnderlyingBuffer(), 0, buffers[BUFFER_ALLOCATION::FIRST]->getUnderlyingBufferSize()); buffers[BUFFER_ALLOCATION::SECOND] = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); UNRECOVERABLE_IF(nullptr == buffers[BUFFER_ALLOCATION::SECOND]); memset(buffers[BUFFER_ALLOCATION::SECOND]->getUnderlyingBuffer(), 0, buffers[BUFFER_ALLOCATION::SECOND]->getUnderlyingBufferSize()); flushId[BUFFER_ALLOCATION::FIRST] = 0u; flushId[BUFFER_ALLOCATION::SECOND] = 0u; } void CommandQueueImp::CommandBufferManager::destroy(NEO::MemoryManager *memoryManager) { memoryManager->freeGraphicsMemory(buffers[BUFFER_ALLOCATION::FIRST]); memoryManager->freeGraphicsMemory(buffers[BUFFER_ALLOCATION::SECOND]); } void CommandQueueImp::CommandBufferManager::switchBuffers(NEO::CommandStreamReceiver *csr) { if (bufferUse == BUFFER_ALLOCATION::FIRST) { bufferUse = BUFFER_ALLOCATION::SECOND; } else { bufferUse = BUFFER_ALLOCATION::FIRST; } NEO::FlushStamp completionId = flushId[bufferUse]; if (completionId != 0u) { UNRECOVERABLE_IF(csr == nullptr); csr->waitForFlushStamp(completionId); } } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/cmdqueue/cmdqueue.h000066400000000000000000000047571363734646600255610ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/device/device.h" #include #include #include struct _ze_command_queue_handle_t {}; namespace NEO { class CommandStreamReceiver; } namespace L0 { struct CommandQueue : _ze_command_queue_handle_t { template struct Allocator { static CommandQueue *allocate(Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) { return new Type(device, csr, desc); } }; virtual ~CommandQueue() = default; virtual ze_result_t createFence(const ze_fence_desc_t *desc, ze_fence_handle_t *phFence) = 0; virtual ze_result_t destroy() = 0; virtual ze_result_t executeCommandLists(uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists, ze_fence_handle_t hFence, bool performMigration) = 0; virtual ze_result_t executeCommands(uint32_t numCommands, void *phCommands, ze_fence_handle_t hFence) = 0; virtual ze_result_t synchronize(uint32_t timeout) = 0; static CommandQueue *create(uint32_t productFamily, Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc); static CommandQueue *fromHandle(ze_command_queue_handle_t handle) { return static_cast(handle); } ze_command_queue_handle_t toHandle() { return this; } void setCommandQueuePreemptionMode(NEO::PreemptionMode newPreemptionMode) { commandQueuePreemptionMode = newPreemptionMode; } protected: std::atomic commandQueuePerThreadScratchSize; NEO::PreemptionMode commandQueuePreemptionMode = NEO::PreemptionMode::Initial; bool commandQueueDebugCmdsProgrammed = false; }; using CommandQueueAllocatorFn = CommandQueue *(*)(Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc); extern CommandQueueAllocatorFn commandQueueFactory[]; template struct CommandQueuePopulateFactory { CommandQueuePopulateFactory() { commandQueueFactory[productFamily] = CommandQueue::Allocator::allocate; } }; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/cmdqueue/cmdqueue_extended/000077500000000000000000000000001363734646600272535ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/cmdqueue/cmdqueue_extended/cmdqueue_extended.inl000066400000000000000000000010301363734646600334410ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/cmdqueue/cmdqueue_hw.h" namespace L0 { template ze_result_t CommandQueueHw::executeCommands(uint32_t numCommandGraphs, void *phCommands, ze_fence_handle_t hFence) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/cmdqueue/cmdqueue_hw.h000066400000000000000000000036341363734646600262500ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/scratch_space_controller.h" #include "shared/source/command_stream/submissions_aggregator.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/memory_constants.h" #include "shared/source/memory_manager/residency_container.h" #include "level_zero/core/source/cmdqueue/cmdqueue_imp.h" #include "igfxfmid.h" namespace L0 { template struct CommandQueueHw : public CommandQueueImp { using CommandQueueImp::CommandQueueImp; ze_result_t createFence(const ze_fence_desc_t *desc, ze_fence_handle_t *phFence) override; ze_result_t destroy() override; ze_result_t executeCommandLists(uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists, ze_fence_handle_t hFence, bool performMigration) override; ze_result_t executeCommands(uint32_t numCommands, void *phCommands, ze_fence_handle_t hFence) override; void dispatchTaskCountWrite(NEO::LinearStream &commandStream, bool flushDataCache) override; void programGeneralStateBaseAddress(uint64_t gsba, NEO::LinearStream &commandStream); size_t estimateStateBaseAddressCmdSize(); void programFrontEnd(uint64_t scratchAddress, NEO::LinearStream &commandStream); size_t estimateFrontEndCmdSize(); size_t estimatePipelineSelect(); void programPipelineSelect(NEO::LinearStream &commandStream); void handleScratchSpace(NEO::ResidencyContainer &residency, NEO::ScratchSpaceController *scratchController, bool &gsbaState, bool &frontEndState); }; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/cmdqueue/cmdqueue_hw.inl000066400000000000000000000343671363734646600266120ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/built_ins/sip.h" #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/device/device.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/interlocked_max.h" #include "shared/source/helpers/preamble.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/page_fault_manager/cpu_page_fault_manager.h" #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/fence/fence.h" #include "level_zero/tools/source/metrics/metric.h" #include #include namespace L0 { template ze_result_t CommandQueueHw::createFence(const ze_fence_desc_t *desc, ze_fence_handle_t *phFence) { *phFence = Fence::create(this, desc); return ZE_RESULT_SUCCESS; } template ze_result_t CommandQueueHw::destroy() { delete commandStream; buffers.destroy(this->getDevice()->getDriverHandle()->getMemoryManager()); delete this; return ZE_RESULT_SUCCESS; } template ze_result_t CommandQueueHw::executeCommandLists( uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists, ze_fence_handle_t hFence, bool performMigration) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END; using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; size_t spaceForResidency = 0; size_t preemptionSize = 0u; size_t debuggerCmdsSize = 0; constexpr size_t residencyContainerSpaceForPreemption = 2; constexpr size_t residencyContainerSpaceForFence = 1; constexpr size_t residencyContainerSpaceForTagWrite = 1; NEO::Device *neoDevice = device->getNEODevice(); NEO::PreemptionMode statePreemption = commandQueuePreemptionMode; auto devicePreemption = device->getDevicePreemptionMode(); if (commandQueuePreemptionMode == NEO::PreemptionMode::Initial) { preemptionSize += NEO::PreemptionHelper::getRequiredCmdStreamSize(commandQueuePreemptionMode, devicePreemption) + NEO::PreemptionHelper::getRequiredPreambleSize(*neoDevice) + NEO::PreemptionHelper::getRequiredStateSipCmdSize(*neoDevice); statePreemption = devicePreemption; } if (!commandQueueDebugCmdsProgrammed) { debuggerCmdsSize += NEO::PreambleHelper::getKernelDebuggingCommandsSize(neoDevice->isDebuggerActive()); } if (devicePreemption == NEO::PreemptionMode::MidThread) { spaceForResidency += residencyContainerSpaceForPreemption; } bool directSubmissionEnabled = csr->isDirectSubmissionEnabled(); NEO::ResidencyContainer residencyContainer; L0::Fence *fence = nullptr; device->activateMetricGroups(); size_t totalCmdBuffers = 0; for (auto i = 0u; i < numCommandLists; i++) { auto commandList = CommandList::fromHandle(phCommandLists[i]); totalCmdBuffers += commandList->commandContainer.getCmdBufferAllocations().size(); spaceForResidency += commandList->commandContainer.getResidencyContainer().size(); auto commandListPreemption = commandList->getCommandListPreemptionMode(); if (statePreemption != commandListPreemption) { preemptionSize += sizeof(PIPE_CONTROL); preemptionSize += NEO::PreemptionHelper::getRequiredCmdStreamSize(commandListPreemption, statePreemption); statePreemption = commandListPreemption; } interlockedMax(commandQueuePerThreadScratchSize, commandList->getCommandListPerThreadScratchSize()); } size_t linearStreamSizeEstimate = totalCmdBuffers * sizeof(MI_BATCH_BUFFER_START); if (directSubmissionEnabled) { linearStreamSizeEstimate += sizeof(MI_BATCH_BUFFER_START); } else { linearStreamSizeEstimate += sizeof(MI_BATCH_BUFFER_END); } if (hFence) { fence = Fence::fromHandle(hFence); spaceForResidency += residencyContainerSpaceForFence; linearStreamSizeEstimate += NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(device->getHwInfo()); } spaceForResidency += residencyContainerSpaceForTagWrite; residencyContainer.reserve(spaceForResidency); auto scratchSpaceController = csr->getScratchSpaceController(); bool gsbaStateDirty = false; bool frontEndStateDirty = false; handleScratchSpace(residencyContainer, scratchSpaceController, gsbaStateDirty, frontEndStateDirty); gsbaStateDirty |= !gsbaInit; frontEndStateDirty |= !frontEndInit; if (!gpgpuEnabled) { linearStreamSizeEstimate += estimatePipelineSelect(); } if (frontEndStateDirty) { linearStreamSizeEstimate += estimateFrontEndCmdSize(); } if (gsbaStateDirty) { linearStreamSizeEstimate += estimateStateBaseAddressCmdSize(); } linearStreamSizeEstimate += NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(device->getHwInfo()); linearStreamSizeEstimate += preemptionSize + debuggerCmdsSize; size_t alignedSize = alignUp(linearStreamSizeEstimate, minCmdBufferPtrAlign); size_t padding = alignedSize - linearStreamSizeEstimate; reserveLinearStreamSize(alignedSize); NEO::LinearStream child(commandStream->getSpace(alignedSize), alignedSize); if (!gpgpuEnabled) { programPipelineSelect(child); } if (!commandQueueDebugCmdsProgrammed && neoDevice->isDebuggerActive()) { NEO::PreambleHelper::programKernelDebugging(&child); commandQueueDebugCmdsProgrammed = true; } if (frontEndStateDirty) { programFrontEnd(scratchSpaceController->getScratchPatchAddress(), child); } if (gsbaStateDirty) { programGeneralStateBaseAddress(scratchSpaceController->calculateNewGSH(), child); } if (commandQueuePreemptionMode == NEO::PreemptionMode::Initial) { NEO::PreemptionHelper::programCsrBaseAddress(child, *neoDevice, csr->getPreemptionAllocation()); NEO::PreemptionHelper::programStateSip(child, *neoDevice); NEO::PreemptionHelper::programCmdStream(child, devicePreemption, commandQueuePreemptionMode, csr->getPreemptionAllocation()); commandQueuePreemptionMode = devicePreemption; statePreemption = commandQueuePreemptionMode; } const bool sipKernelUsed = devicePreemption == NEO::PreemptionMode::MidThread || neoDevice->isDebuggerActive(); if (devicePreemption == NEO::PreemptionMode::MidThread) { residencyContainer.push_back(csr->getPreemptionAllocation()); } if (sipKernelUsed) { auto sipIsa = NEO::SipKernel::getSipKernelAllocation(*neoDevice); residencyContainer.push_back(sipIsa); } if (neoDevice->isDebuggerActive()) { residencyContainer.push_back(device->getDebugSurface()); } for (auto i = 0u; i < numCommandLists; ++i) { auto commandList = CommandList::fromHandle(phCommandLists[i]); auto cmdBufferAllocations = commandList->commandContainer.getCmdBufferAllocations(); auto cmdBufferCount = cmdBufferAllocations.size(); auto commandListPreemption = commandList->getCommandListPreemptionMode(); if (statePreemption != commandListPreemption) { NEO::MemorySynchronizationCommands::addPipeControl(child, false); NEO::PreemptionHelper::programCmdStream(child, commandListPreemption, statePreemption, csr->getPreemptionAllocation()); statePreemption = commandListPreemption; } for (size_t iter = 0; iter < cmdBufferCount; iter++) { auto allocation = cmdBufferAllocations[iter]; NEO::EncodeBatchBufferStartOrEnd::programBatchBufferStart(&child, allocation->getGpuAddress(), true); } printfFunctionContainer.insert(printfFunctionContainer.end(), commandList->getPrintfFunctionContainer().begin(), commandList->getPrintfFunctionContainer().end()); NEO::PageFaultManager *pageFaultManager = nullptr; if (performMigration) { pageFaultManager = device->getDriverHandle()->getMemoryManager()->getPageFaultManager(); if (pageFaultManager == nullptr) { performMigration = false; } } for (auto alloc : commandList->commandContainer.getResidencyContainer()) { if (residencyContainer.end() == std::find(residencyContainer.begin(), residencyContainer.end(), alloc)) { residencyContainer.push_back(alloc); if (performMigration) { if (alloc && (alloc->getAllocationType() == NEO::GraphicsAllocation::AllocationType::SVM_GPU || alloc->getAllocationType() == NEO::GraphicsAllocation::AllocationType::SVM_CPU)) { pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast(alloc->getGpuAddress())); } } } } } commandQueuePreemptionMode = statePreemption; if (hFence) { residencyContainer.push_back(&fence->getAllocation()); NEO::MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( child, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, fence->getGpuAddress(), Fence::STATE_SIGNALED, true, device->getHwInfo()); } dispatchTaskCountWrite(child, true); residencyContainer.push_back(csr->getTagAllocation()); void *endingCmd = nullptr; if (directSubmissionEnabled) { endingCmd = child.getSpace(0); NEO::EncodeBatchBufferStartOrEnd::programBatchBufferStart(&child, 0ull, false); } else { MI_BATCH_BUFFER_END cmd = GfxFamily::cmdInitBatchBufferEnd; auto buffer = child.getSpaceForCmd(); *(MI_BATCH_BUFFER_END *)buffer = cmd; } if (padding) { void *paddingPtr = child.getSpace(padding); memset(paddingPtr, 0, padding); } submitBatchBuffer(ptrDiff(child.getCpuBase(), commandStream->getCpuBase()), residencyContainer, endingCmd); this->taskCount = csr->peekTaskCount(); csr->makeSurfacePackNonResident(residencyContainer); if (getSynchronousMode() == ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS) { this->synchronize(std::numeric_limits::max()); } return ZE_RESULT_SUCCESS; } template void CommandQueueHw::programFrontEnd(uint64_t scratchAddress, NEO::LinearStream &commandStream) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; UNRECOVERABLE_IF(csr == nullptr); NEO::PreambleHelper::programVFEState(&commandStream, device->getHwInfo(), commandQueuePerThreadScratchSize, scratchAddress, device->getMaxNumHwThreads(), csr->getOsContext().getEngineType()); frontEndInit = true; } template size_t CommandQueueHw::estimateFrontEndCmdSize() { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; return NEO::PreambleHelper::getVFECommandsSize(); } template size_t CommandQueueHw::estimatePipelineSelect() { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; return NEO::PreambleHelper::getCmdSizeForPipelineSelect(device->getHwInfo()); } template void CommandQueueHw::programPipelineSelect(NEO::LinearStream &commandStream) { NEO::PipelineSelectArgs args = {0, 0}; using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; NEO::PreambleHelper::programPipelineSelect(&commandStream, args, device->getHwInfo()); gpgpuEnabled = true; } template void CommandQueueHw::dispatchTaskCountWrite(NEO::LinearStream &commandStream, bool flushDataCache) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; UNRECOVERABLE_IF(csr == nullptr); auto taskCountToWrite = csr->peekTaskCount() + 1; auto gpuAddress = static_cast(csr->getTagAllocation()->getGpuAddress()); NEO::MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( commandStream, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, gpuAddress, taskCountToWrite, true, device->getHwInfo()); } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl000066400000000000000000000070511363734646600275720ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/device/device.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/interlocked_max.h" #include "shared/source/helpers/preamble.h" #include "shared/source/helpers/state_base_address.h" #include "shared/source/os_interface/os_context.h" #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/fence/fence.h" #include "level_zero/tools/source/metrics/metric.h" #include namespace L0 { template void CommandQueueHw::programGeneralStateBaseAddress(uint64_t gsba, NEO::LinearStream &commandStream) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS; using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; PIPE_CONTROL *pcCmd = commandStream.getSpaceForCmd(); *pcCmd = GfxFamily::cmdInitPipeControl; pcCmd->setTextureCacheInvalidationEnable(true); pcCmd->setDcFlushEnable(true); pcCmd->setCommandStreamerStallEnable(true); auto gmmHelper = device->getNEODevice()->getGmmHelper(); NEO::StateBaseAddressHelper::programStateBaseAddress(commandStream, nullptr, nullptr, nullptr, gsba, true, (device->getMOCS(true, false) >> 1), device->getDriverHandle()->getMemoryManager()->getInternalHeapBaseAddress(0), true, gmmHelper, false); gsbaInit = true; } template size_t CommandQueueHw::estimateStateBaseAddressCmdSize() { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS; using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; constexpr size_t size = sizeof(STATE_BASE_ADDRESS) + sizeof(PIPE_CONTROL); return size; } template void CommandQueueHw::handleScratchSpace(NEO::ResidencyContainer &residency, NEO::ScratchSpaceController *scratchController, bool &gsbaState, bool &frontEndState) { if (commandQueuePerThreadScratchSize > 0) { scratchController->setRequiredScratchSpace(nullptr, commandQueuePerThreadScratchSize, 0u, csr->peekTaskCount(), csr->getOsContext(), gsbaState, frontEndState); auto scratchAllocation = scratchController->getScratchSpaceAllocation(); residency.push_back(scratchAllocation); } } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/cmdqueue/cmdqueue_imp.h000066400000000000000000000056551363734646600264240ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/command_stream/submissions_aggregator.h" #include "shared/source/memory_manager/memory_constants.h" #include "level_zero/core/source/cmdqueue/cmdqueue.h" #include namespace NEO { class LinearStream; class GraphicsAllocation; class MemoryManager; } // namespace NEO namespace L0 { struct CommandList; struct Kernel; struct CommandQueueImp : public CommandQueue { class CommandBufferManager { public: enum BUFFER_ALLOCATION : uint32_t { FIRST = 0, SECOND, COUNT }; void initialize(Device *device, size_t sizeRequested); void destroy(NEO::MemoryManager *memoryManager); void switchBuffers(NEO::CommandStreamReceiver *csr); NEO::GraphicsAllocation *getCurrentBufferAllocation() { return buffers[bufferUse]; } void setCurrentFlushStamp(NEO::FlushStamp flushStamp) { flushId[bufferUse] = flushStamp; } private: NEO::GraphicsAllocation *buffers[BUFFER_ALLOCATION::COUNT]; NEO::FlushStamp flushId[BUFFER_ALLOCATION::COUNT]; BUFFER_ALLOCATION bufferUse = BUFFER_ALLOCATION::FIRST; }; static constexpr size_t defaultQueueCmdBufferSize = 128 * MemoryConstants::kiloByte; static constexpr size_t minCmdBufferPtrAlign = 8; static constexpr size_t totalCmdBufferSize = defaultQueueCmdBufferSize + MemoryConstants::cacheLineSize + NEO::CSRequirements::csOverfetchSize; CommandQueueImp(Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) : device(device), csr(csr), desc(*desc) { std::atomic_init(&commandQueuePerThreadScratchSize, 0u); } ze_result_t destroy() override; ze_result_t synchronize(uint32_t timeout) override; void initialize(); Device *getDevice() { return device; } uint32_t getTaskCount() { return taskCount; } NEO::CommandStreamReceiver *getCsr() { return csr; } void reserveLinearStreamSize(size_t size); ze_command_queue_mode_t getSynchronousMode(); virtual void dispatchTaskCountWrite(NEO::LinearStream &commandStream, bool flushDataCache) = 0; protected: void submitBatchBuffer(size_t offset, NEO::ResidencyContainer &residencyContainer, void *endingCmdPtr); ze_result_t synchronizeByPollingForTaskCount(uint32_t timeout); void printFunctionsPrintfOutput(); Device *device = nullptr; NEO::CommandStreamReceiver *csr = nullptr; const ze_command_queue_desc_t desc; NEO::LinearStream *commandStream = nullptr; uint32_t taskCount = 0; std::vector printfFunctionContainer; bool gsbaInit = false; bool frontEndInit = false; bool gpgpuEnabled = false; CommandBufferManager buffers; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/compiler_interface/000077500000000000000000000000001363734646600256055ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/compiler_interface/CMakeLists.txt000066400000000000000000000007101363734646600303430ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_COMPILER_INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/default_cache_config.cpp ${CMAKE_CURRENT_SOURCE_DIR}/default_l0_cache_config.cpp ${CMAKE_CURRENT_SOURCE_DIR}/default_l0_cache_config.h ${CMAKE_CURRENT_SOURCE_DIR}/l0_reg_path.h ) set_property(GLOBAL PROPERTY L0_SRCS_COMPILER_INTERFACE ${L0_SRCS_COMPILER_INTERFACE}) compute-runtime-20.13.16352/level_zero/core/source/compiler_interface/default_cache_config.cpp000066400000000000000000000005701363734646600324070ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/default_cache_config.h" #include "level_zero/core/source/compiler_interface/default_l0_cache_config.h" namespace NEO { CompilerCacheConfig getDefaultCompilerCacheConfig() { return L0::getDefaultL0CompilerCacheConfig(); } } // namespace NEO compute-runtime-20.13.16352/level_zero/core/source/compiler_interface/default_l0_cache_config.cpp000066400000000000000000000015441363734646600330040ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #define L0_CACHE_LOCATION "l0_c_cache" #include "level_zero/core/source/compiler_interface/default_l0_cache_config.h" #include "shared/source/utilities/debug_settings_reader.h" #include "level_zero/core/source/compiler_interface/l0_reg_path.h" #include namespace L0 { NEO::CompilerCacheConfig getDefaultL0CompilerCacheConfig() { NEO::CompilerCacheConfig ret; std::string keyName = registryPath; keyName += "l0_c_cache_dir"; std::unique_ptr settingsReader(NEO::SettingsReader::createOsReader(false, keyName)); ret.cacheDir = settingsReader->getSetting(settingsReader->appSpecificLocation(keyName), static_cast(L0_CACHE_LOCATION)); ret.cacheFileExtension = ".l0_c_cache"; return ret; } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/compiler_interface/default_l0_cache_config.h000066400000000000000000000003601363734646600324440ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/compiler_interface/compiler_cache.h" namespace L0 { NEO::CompilerCacheConfig getDefaultL0CompilerCacheConfig(); } compute-runtime-20.13.16352/level_zero/core/source/compiler_interface/l0_reg_path.h000066400000000000000000000002451363734646600301430ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once namespace L0 { extern const char *registryPath; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/debugger/000077500000000000000000000000001363734646600235375ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/debugger/CMakeLists.txt000066400000000000000000000004571363734646600263050ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_DEBUGGER ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/debugger_l0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/debugger_l0.h ) set_property(GLOBAL PROPERTY L0_SRCS_DEBUGGER ${L0_SRCS_DEBUGGER}) compute-runtime-20.13.16352/level_zero/core/source/debugger/debug_manager.cpp000066400000000000000000000005071363734646600270250ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "level_zero/core/source/compiler_interface/l0_reg_path.h" namespace NEO { DebugSettingsManager DebugManager(L0::registryPath); } compute-runtime-20.13.16352/level_zero/core/source/debugger/debugger.cpp000066400000000000000000000016641363734646600260360ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debugger/debugger.h" #include "shared/source/built_ins/sip.h" #include "shared/source/built_ins/sip_kernel_type.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/source_level_debugger/source_level_debugger.h" std::unique_ptr NEO::Debugger::create(HardwareInfo *hwInfo) { std::unique_ptr sourceLevelDebugger; if (hwInfo->capabilityTable.debuggerSupported) { sourceLevelDebugger.reset(SourceLevelDebugger::create()); } if (sourceLevelDebugger) { bool localMemorySipAvailable = (SipKernelType::DbgCsrLocal == SipKernel::getSipKernelType(hwInfo->platform.eRenderCoreFamily, true)); sourceLevelDebugger->initialize(localMemorySipAvailable); } return sourceLevelDebugger; }compute-runtime-20.13.16352/level_zero/core/source/debugger/debugger_l0.cpp000066400000000000000000000003521363734646600264220ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/debugger/debugger_l0.h" namespace L0 { bool DebuggerL0::isDebuggerActive() { return false; } } // namespace L0compute-runtime-20.13.16352/level_zero/core/source/debugger/debugger_l0.h000066400000000000000000000004651363734646600260740ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/debugger/debugger.h" namespace L0 { class DebuggerL0 : public NEO::Debugger { public: bool isDebuggerActive() override; ~DebuggerL0() override = default; }; } // namespace L0compute-runtime-20.13.16352/level_zero/core/source/device/000077500000000000000000000000001363734646600232125ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/device/device.h000066400000000000000000000133071363734646600246260ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/preemption_mode.h" #include "shared/source/device/device.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/os_interface.h" #include "level_zero/core/source/driver/driver.h" #include "level_zero/core/source/driver/driver_handle.h" #include #include struct _ze_device_handle_t {}; namespace NEO { class Device; class MemoryManager; class SourceLevelDebugger; struct DeviceInfo; } // namespace NEO namespace L0 { struct DriverHandle; struct BuiltinFunctionsLib; struct ExecutionEnvironment; struct MetricContext; struct Device : _ze_device_handle_t { virtual uint32_t getRootDeviceIndex() = 0; virtual ze_result_t canAccessPeer(ze_device_handle_t hPeerDevice, ze_bool_t *value) = 0; virtual ze_result_t copyCommandList(ze_command_list_handle_t hCommandList, ze_command_list_handle_t *phCommandList) = 0; virtual ze_result_t createCommandList(const ze_command_list_desc_t *desc, ze_command_list_handle_t *commandList) = 0; virtual ze_result_t createCommandListImmediate(const ze_command_queue_desc_t *desc, ze_command_list_handle_t *commandList) = 0; virtual ze_result_t createCommandQueue(const ze_command_queue_desc_t *desc, ze_command_queue_handle_t *commandQueue) = 0; virtual ze_result_t createEventPool(const ze_event_pool_desc_t *desc, ze_event_pool_handle_t *phEventPool) = 0; virtual ze_result_t createImage(const ze_image_desc_t *desc, ze_image_handle_t *phImage) = 0; virtual ze_result_t createModule(const ze_module_desc_t *desc, ze_module_handle_t *module, ze_module_build_log_handle_t *buildLog) = 0; virtual ze_result_t createSampler(const ze_sampler_desc_t *pDesc, ze_sampler_handle_t *phSampler) = 0; virtual ze_result_t evictImage(ze_image_handle_t hImage) = 0; virtual ze_result_t evictMemory(void *ptr, size_t size) = 0; virtual ze_result_t getComputeProperties(ze_device_compute_properties_t *pComputeProperties) = 0; virtual ze_result_t getP2PProperties(ze_device_handle_t hPeerDevice, ze_device_p2p_properties_t *pP2PProperties) = 0; virtual ze_result_t getKernelProperties(ze_device_kernel_properties_t *pKernelProperties) = 0; virtual ze_result_t getMemoryProperties(uint32_t *pCount, ze_device_memory_properties_t *pMemProperties) = 0; virtual ze_result_t getMemoryAccessProperties(ze_device_memory_access_properties_t *pMemAccessProperties) = 0; virtual ze_result_t getProperties(ze_device_properties_t *pDeviceProperties) = 0; virtual ze_result_t getSubDevices(uint32_t *pCount, ze_device_handle_t *phSubdevices) = 0; virtual ze_result_t makeImageResident(ze_image_handle_t hImage) = 0; virtual ze_result_t makeMemoryResident(void *ptr, size_t size) = 0; virtual ze_result_t setIntermediateCacheConfig(ze_cache_config_t cacheConfig) = 0; virtual ze_result_t setLastLevelCacheConfig(ze_cache_config_t cacheConfig) = 0; virtual ze_result_t getCacheProperties(ze_device_cache_properties_t *pCacheProperties) = 0; virtual ze_result_t imageGetProperties(const ze_image_desc_t *desc, ze_image_properties_t *pImageProperties) = 0; virtual ze_result_t getDeviceImageProperties(ze_device_image_properties_t *pDeviceImageProperties) = 0; virtual ze_result_t systemBarrier() = 0; virtual ze_result_t registerCLMemory(cl_context context, cl_mem mem, void **ptr) = 0; virtual ze_result_t registerCLProgram(cl_context context, cl_program program, ze_module_handle_t *phModule) = 0; virtual ze_result_t registerCLCommandQueue(cl_context context, cl_command_queue commandQueue, ze_command_queue_handle_t *phCommandQueue) = 0; virtual ~Device() = default; virtual void *getExecEnvironment() = 0; virtual BuiltinFunctionsLib *getBuiltinFunctionsLib() = 0; virtual uint32_t getMOCS(bool l3enabled, bool l1enabled) = 0; virtual uint32_t getMaxNumHwThreads() const = 0; virtual NEO::HwHelper &getHwHelper() = 0; virtual bool isMultiDeviceCapable() const = 0; virtual const NEO::HardwareInfo &getHwInfo() const = 0; virtual NEO::OSInterface &getOsInterface() = 0; virtual uint32_t getPlatformInfo() const = 0; virtual MetricContext &getMetricContext() = 0; virtual ze_result_t activateMetricGroups(uint32_t count, zet_metric_group_handle_t *phMetricGroups) = 0; virtual void activateMetricGroups() = 0; virtual DriverHandle *getDriverHandle() = 0; virtual void setDriverHandle(DriverHandle *driverHandle) = 0; static Device *fromHandle(ze_device_handle_t handle) { return static_cast(handle); } inline ze_device_handle_t toHandle() { return this; } static Device *create(DriverHandle *driverHandle, NEO::Device *neoDevice); virtual NEO::PreemptionMode getDevicePreemptionMode() const = 0; virtual const NEO::DeviceInfo &getDeviceInfo() const = 0; virtual NEO::Device *getNEODevice() = 0; NEO::SourceLevelDebugger *getSourceLevelDebugger() { return getNEODevice() ? reinterpret_cast(getNEODevice()->getDebugger()) : nullptr; } virtual NEO::GraphicsAllocation *getDebugSurface() const = 0; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/device/device_imp.cpp000066400000000000000000000626641363734646600260400ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/device/device_imp.h" #include "shared/source/built_ins/sip.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device_info.h" #include "shared/source/device/sub_device.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/string.h" #include "shared/source/kernel/grf_config.h" #include "shared/source/memory_manager/memory_constants.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/memory_operations_handler.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/os_time.h" #include "shared/source/source_level_debugger/source_level_debugger.h" #include "opencl/source/mem_obj/mem_obj.h" #include "opencl/source/program/program.h" #include "level_zero/core/source/builtin/builtin_functions_lib.h" #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/cmdqueue/cmdqueue.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/event/event.h" #include "level_zero/core/source/image/image.h" #include "level_zero/core/source/memory/memory_operations_helper.h" #include "level_zero/core/source/module/module.h" #include "level_zero/core/source/printf_handler/printf_handler.h" #include "level_zero/core/source/sampler/sampler.h" #include "level_zero/tools/source/metrics/metric.h" #include "hw_helpers.h" namespace L0 { uint32_t DeviceImp::getRootDeviceIndex() { return neoDevice->getRootDeviceIndex(); } DriverHandle *DeviceImp::getDriverHandle() { return this->driverHandle; } void DeviceImp::setDriverHandle(DriverHandle *driverHandle) { this->driverHandle = driverHandle; } ze_result_t DeviceImp::canAccessPeer(ze_device_handle_t hPeerDevice, ze_bool_t *value) { *value = false; if (NEO::DebugManager.flags.CreateMultipleRootDevices.get() > 0) { *value = true; } if (NEO::DebugManager.flags.CreateMultipleSubDevices.get() > 0) { *value = true; } return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::copyCommandList(ze_command_list_handle_t hCommandList, ze_command_list_handle_t *phCommandList) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t DeviceImp::createCommandList(const ze_command_list_desc_t *desc, ze_command_list_handle_t *commandList) { auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily; *commandList = CommandList::create(productFamily, this); return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::createCommandListImmediate(const ze_command_queue_desc_t *desc, ze_command_list_handle_t *phCommandList) { auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily; *phCommandList = CommandList::createImmediate(productFamily, this, desc, false); return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::createCommandQueue(const ze_command_queue_desc_t *desc, ze_command_queue_handle_t *commandQueue) { auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily; auto csr = neoDevice->getDefaultEngine().commandStreamReceiver; *commandQueue = CommandQueue::create(productFamily, this, csr, desc); return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::createEventPool(const ze_event_pool_desc_t *desc, ze_event_pool_handle_t *eventPool) { *eventPool = EventPool::create(this, desc); return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::createImage(const ze_image_desc_t *desc, ze_image_handle_t *phImage) { if (desc->format.layout >= ze_image_format_layout_t::ZE_IMAGE_FORMAT_LAYOUT_Y8) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily; *phImage = Image::create(productFamily, this, desc); return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::createSampler(const ze_sampler_desc_t *desc, ze_sampler_handle_t *sampler) { auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily; *sampler = Sampler::create(productFamily, this, desc); return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::createModule(const ze_module_desc_t *desc, ze_module_handle_t *module, ze_module_build_log_handle_t *buildLog) { ModuleBuildLog *moduleBuildLog = nullptr; if (buildLog) { moduleBuildLog = ModuleBuildLog::create(); *buildLog = moduleBuildLog->toHandle(); } auto modulePtr = Module::create(this, desc, neoDevice, moduleBuildLog); if (modulePtr == nullptr) { return ZE_RESULT_ERROR_MODULE_BUILD_FAILURE; } *module = modulePtr; return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::evictImage(ze_image_handle_t hImage) { auto alloc = Image::fromHandle(hImage)->getAllocation(); NEO::MemoryOperationsHandler *memoryOperationsIface = neoDevice->getRootDeviceEnvironment().memoryOperationsInterface.get(); auto success = memoryOperationsIface->evict(*alloc); return changeMemoryOperationStatusToL0ResultType(success); } ze_result_t DeviceImp::evictMemory(void *ptr, size_t size) { auto alloc = getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(ptr); if (alloc == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } NEO::MemoryOperationsHandler *memoryOperationsIface = neoDevice->getRootDeviceEnvironment().memoryOperationsInterface.get(); auto success = memoryOperationsIface->evict(*alloc->gpuAllocation); return changeMemoryOperationStatusToL0ResultType(success); } ze_result_t DeviceImp::getComputeProperties(ze_device_compute_properties_t *pComputeProperties) { const auto &deviceInfo = this->neoDevice->getDeviceInfo(); pComputeProperties->maxTotalGroupSize = static_cast(deviceInfo.maxWorkGroupSize); pComputeProperties->maxGroupSizeX = static_cast(deviceInfo.maxWorkItemSizes[0]); pComputeProperties->maxGroupSizeY = static_cast(deviceInfo.maxWorkItemSizes[1]); pComputeProperties->maxGroupSizeZ = static_cast(deviceInfo.maxWorkItemSizes[2]); pComputeProperties->maxGroupCountX = UINT32_MAX; pComputeProperties->maxGroupCountY = UINT32_MAX; pComputeProperties->maxGroupCountZ = UINT32_MAX; pComputeProperties->maxSharedLocalMemory = static_cast(deviceInfo.localMemSize); pComputeProperties->numSubGroupSizes = static_cast(deviceInfo.maxSubGroups.size()); for (uint32_t i = 0; i < pComputeProperties->numSubGroupSizes; ++i) { pComputeProperties->subGroupSizes[i] = static_cast(deviceInfo.maxSubGroups[i]); } return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::getP2PProperties(ze_device_handle_t hPeerDevice, ze_device_p2p_properties_t *pP2PProperties) { pP2PProperties->accessSupported = true; pP2PProperties->atomicsSupported = false; return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::getMemoryProperties(uint32_t *pCount, ze_device_memory_properties_t *pMemProperties) { if (*pCount == 0) { *pCount = 1; return ZE_RESULT_SUCCESS; } if (*pCount > 1) { *pCount = 1; } if (nullptr == pMemProperties) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } const auto &deviceInfo = this->neoDevice->getDeviceInfo(); pMemProperties->maxClockRate = deviceInfo.maxClockFrequency; pMemProperties->maxBusWidth = deviceInfo.addressBits; pMemProperties->totalSize = deviceInfo.globalMemSize; return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::getMemoryAccessProperties(ze_device_memory_access_properties_t *pMemAccessProperties) { pMemAccessProperties->hostAllocCapabilities = static_cast(ZE_MEMORY_ACCESS | ZE_MEMORY_ATOMIC_ACCESS); pMemAccessProperties->deviceAllocCapabilities = static_cast(ZE_MEMORY_ACCESS | ZE_MEMORY_ATOMIC_ACCESS); pMemAccessProperties->sharedSingleDeviceAllocCapabilities = static_cast(ZE_MEMORY_ACCESS | ZE_MEMORY_ATOMIC_ACCESS); pMemAccessProperties->sharedCrossDeviceAllocCapabilities = ze_memory_access_capabilities_t{}; pMemAccessProperties->sharedSystemAllocCapabilities = ze_memory_access_capabilities_t{}; return ZE_RESULT_SUCCESS; } static constexpr ze_fp_capabilities_t defaultFpFlags = static_cast(ZE_FP_CAPS_ROUND_TO_NEAREST | ZE_FP_CAPS_ROUND_TO_ZERO | ZE_FP_CAPS_ROUND_TO_INF | ZE_FP_CAPS_INF_NAN | ZE_FP_CAPS_DENORM | ZE_FP_CAPS_FMA); ze_result_t DeviceImp::getKernelProperties(ze_device_kernel_properties_t *pKernelProperties) { const auto &hardwareInfo = this->neoDevice->getHardwareInfo(); const auto &deviceInfo = this->neoDevice->getDeviceInfo(); auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); std::string ilVersion = deviceInfo.ilVersion; size_t majorVersionPos = ilVersion.find('_'); size_t minorVersionPos = ilVersion.find('.'); if (majorVersionPos != std::string::npos && minorVersionPos != std::string::npos) { uint32_t majorSpirvVersion = static_cast(std::stoul(ilVersion.substr(majorVersionPos + 1, minorVersionPos))); uint32_t minorSpirvVersion = static_cast(std::stoul(ilVersion.substr(minorVersionPos + 1))); pKernelProperties->spirvVersionSupported = ZE_MAKE_VERSION(majorSpirvVersion, minorSpirvVersion); } else { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } pKernelProperties->fp16Supported = true; pKernelProperties->int64AtomicsSupported = hardwareInfo.capabilityTable.ftrSupportsInteger64BitAtomics; pKernelProperties->fp64Supported = hardwareInfo.capabilityTable.ftrSupportsFP64; pKernelProperties->halfFpCapabilities = defaultFpFlags; pKernelProperties->singleFpCapabilities = hardwareInfo.capabilityTable.ftrSupports64BitMath ? ZE_FP_CAPS_ROUNDED_DIVIDE_SQRT : ZE_FP_CAPS_NONE; pKernelProperties->doubleFpCapabilities = hardwareInfo.capabilityTable.ftrSupportsFP64 ? defaultFpFlags : ZE_FP_CAPS_NONE; pKernelProperties->nativeKernelSupported.id[0] = 0; processAdditionalKernelProperties(hwHelper, pKernelProperties); pKernelProperties->maxArgumentsSize = static_cast(this->neoDevice->getDeviceInfo().maxParameterSize); pKernelProperties->printfBufferSize = static_cast(this->neoDevice->getDeviceInfo().printfBufferSize); return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::getProperties(ze_device_properties_t *pDeviceProperties) { const auto &deviceInfo = this->neoDevice->getDeviceInfo(); const auto &hardwareInfo = this->neoDevice->getHardwareInfo(); auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); pDeviceProperties->type = ZE_DEVICE_TYPE_GPU; pDeviceProperties->vendorId = deviceInfo.vendorId; pDeviceProperties->deviceId = hardwareInfo.platform.usDeviceID; uint32_t rootDeviceIndex = this->neoDevice->getRootDeviceIndex(); memcpy_s(pDeviceProperties->uuid.id, sizeof(uint32_t), &pDeviceProperties->vendorId, sizeof(pDeviceProperties->vendorId)); memcpy_s(pDeviceProperties->uuid.id + sizeof(uint32_t), sizeof(uint32_t), &pDeviceProperties->deviceId, sizeof(pDeviceProperties->deviceId)); memcpy_s(pDeviceProperties->uuid.id + (2 * sizeof(uint32_t)), sizeof(uint32_t), &rootDeviceIndex, sizeof(rootDeviceIndex)); pDeviceProperties->isSubdevice = isSubdevice; pDeviceProperties->subdeviceId = isSubdevice ? static_cast(neoDevice)->getSubDeviceIndex() : 0; pDeviceProperties->coreClockRate = deviceInfo.maxClockFrequency; pDeviceProperties->unifiedMemorySupported = true; pDeviceProperties->eccMemorySupported = this->neoDevice->getDeviceInfo().errorCorrectionSupport; pDeviceProperties->onDemandPageFaultsSupported = true; pDeviceProperties->maxCommandQueues = deviceInfo.maxOnDeviceQueues; pDeviceProperties->numAsyncComputeEngines = static_cast(hwHelper.getGpgpuEngineInstances(hardwareInfo).size()); pDeviceProperties->numAsyncCopyEngines = 1; pDeviceProperties->maxCommandQueuePriority = 0; pDeviceProperties->numThreadsPerEU = deviceInfo.numThreadsPerEU; pDeviceProperties->physicalEUSimdWidth = hwHelper.getMinimalSIMDSize(); pDeviceProperties->numEUsPerSubslice = hardwareInfo.gtSystemInfo.MaxEuPerSubSlice; pDeviceProperties->numSubslicesPerSlice = hardwareInfo.gtSystemInfo.SubSliceCount / hardwareInfo.gtSystemInfo.SliceCount; pDeviceProperties->numSlices = hardwareInfo.gtSystemInfo.SliceCount * ((this->numSubDevices > 0) ? this->numSubDevices : 1); pDeviceProperties->timerResolution = this->neoDevice->getDeviceInfo().outProfilingTimerResolution; std::string name = "Intel(R) "; name += NEO::familyName[hardwareInfo.platform.eRenderCoreFamily]; name += '\0'; memcpy_s(pDeviceProperties->name, name.length(), name.c_str(), name.length()); return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::getSubDevices(uint32_t *pCount, ze_device_handle_t *phSubdevices) { if (*pCount == 0) { *pCount = this->numSubDevices; return ZE_RESULT_SUCCESS; } if (phSubdevices == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } if (*pCount > this->numSubDevices) { *pCount = this->numSubDevices; } for (uint32_t i = 0; i < *pCount; i++) { phSubdevices[i] = this->subDevices[i]; } return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::makeImageResident(ze_image_handle_t hImage) { auto alloc = Image::fromHandle(hImage)->getAllocation(); NEO::MemoryOperationsHandler *memoryOperationsIface = neoDevice->getRootDeviceEnvironment().memoryOperationsInterface.get(); auto success = memoryOperationsIface->makeResident(ArrayRef(&alloc, 1)); return changeMemoryOperationStatusToL0ResultType(success); } ze_result_t DeviceImp::makeMemoryResident(void *ptr, size_t size) { auto alloc = getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(ptr); if (alloc == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } NEO::MemoryOperationsHandler *memoryOperationsIface = neoDevice->getRootDeviceEnvironment().memoryOperationsInterface.get(); auto success = memoryOperationsIface->makeResident(ArrayRef(&alloc->gpuAllocation, 1)); return changeMemoryOperationStatusToL0ResultType(success); } ze_result_t DeviceImp::setIntermediateCacheConfig(ze_cache_config_t cacheConfig) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t DeviceImp::setLastLevelCacheConfig(ze_cache_config_t cacheConfig) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t DeviceImp::getCacheProperties(ze_device_cache_properties_t *pCacheProperties) { const auto &hardwareInfo = this->getHwInfo(); auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); pCacheProperties->intermediateCacheControlSupported = false; pCacheProperties->intermediateCacheSize = getIntermediateCacheSize(hardwareInfo); pCacheProperties->intermediateCachelineSize = 0; pCacheProperties->lastLevelCacheSizeControlSupported = hwHelper.isL3Configurable(hardwareInfo); pCacheProperties->lastLevelCacheSize = static_cast(hardwareInfo.gtSystemInfo.L3CacheSizeInKb * KB); pCacheProperties->lastLevelCachelineSize = this->neoDevice->getDeviceInfo().globalMemCachelineSize; return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::imageGetProperties(const ze_image_desc_t *desc, ze_image_properties_t *pImageProperties) { const auto &deviceInfo = this->neoDevice->getDeviceInfo(); if (deviceInfo.imageSupport) { pImageProperties->samplerFilterFlags = ZE_IMAGE_SAMPLER_FILTER_FLAGS_LINEAR; } else { pImageProperties->samplerFilterFlags = ZE_IMAGE_SAMPLER_FILTER_FLAGS_NONE; } return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::getDeviceImageProperties(ze_device_image_properties_t *pDeviceImageProperties) { const auto &deviceInfo = this->neoDevice->getDeviceInfo(); pDeviceImageProperties->supported = deviceInfo.imageSupport; pDeviceImageProperties->maxImageDims1D = static_cast(deviceInfo.image2DMaxWidth); pDeviceImageProperties->maxImageDims2D = static_cast(deviceInfo.image2DMaxHeight); pDeviceImageProperties->maxImageDims3D = static_cast(deviceInfo.image3DMaxDepth); pDeviceImageProperties->maxImageBufferSize = this->neoDevice->getDeviceInfo().imageMaxBufferSize; pDeviceImageProperties->maxImageArraySlices = static_cast(deviceInfo.imageMaxArraySize); pDeviceImageProperties->maxSamplers = this->neoDevice->getDeviceInfo().maxSamplers; pDeviceImageProperties->maxReadImageArgs = this->neoDevice->getDeviceInfo().maxReadImageArgs; pDeviceImageProperties->maxWriteImageArgs = this->neoDevice->getDeviceInfo().maxWriteImageArgs; return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::systemBarrier() { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t DeviceImp::activateMetricGroups(uint32_t count, zet_metric_group_handle_t *phMetricGroups) { return metricContext->activateMetricGroupsDeferred(count, phMetricGroups); } void *DeviceImp::getExecEnvironment() { return execEnvironment; } BuiltinFunctionsLib *DeviceImp::getBuiltinFunctionsLib() { return builtins.get(); } uint32_t DeviceImp::getMOCS(bool l3enabled, bool l1enabled) { return getHwHelper().getMocsIndex(*getNEODevice()->getGmmHelper(), l3enabled, l1enabled) << 1; } NEO::HwHelper &DeviceImp::getHwHelper() { const auto &hardwareInfo = neoDevice->getHardwareInfo(); return NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); } NEO::OSInterface &DeviceImp::getOsInterface() { return *neoDevice->getOSTime()->getOSInterface(); } uint32_t DeviceImp::getPlatformInfo() const { const auto &hardwareInfo = neoDevice->getHardwareInfo(); return hardwareInfo.platform.eRenderCoreFamily; } MetricContext &DeviceImp::getMetricContext() { return *metricContext; } void DeviceImp::activateMetricGroups() { if (metricContext != nullptr) { metricContext->activateMetricGroups(); } } uint32_t DeviceImp::getMaxNumHwThreads() const { return maxNumHwThreads; } ze_result_t DeviceImp::registerCLMemory(cl_context context, cl_mem mem, void **ptr) { NEO::MemObj *memObj = static_cast(mem); NEO::GraphicsAllocation *graphicsAllocation = memObj->getGraphicsAllocation(); DEBUG_BREAK_IF(graphicsAllocation == nullptr); auto allocation = getDriverHandle()->allocateManagedMemoryFromHostPtr( this, graphicsAllocation->getUnderlyingBuffer(), graphicsAllocation->getUnderlyingBufferSize(), nullptr); *ptr = allocation->getUnderlyingBuffer(); return ZE_RESULT_SUCCESS; } ze_result_t DeviceImp::registerCLProgram(cl_context context, cl_program program, ze_module_handle_t *phModule) { NEO::Program *neoProgram = static_cast(program); if (neoProgram->getIsSpirV()) { size_t deviceBinarySize = 0; if (0 != neoProgram->getInfo(CL_PROGRAM_BINARY_SIZES, sizeof(deviceBinarySize), &deviceBinarySize, nullptr)) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } std::vector deviceBinary; deviceBinary.resize(deviceBinarySize); auto deviceBinaryPtr = deviceBinary.data(); if (0 != neoProgram->getInfo(CL_PROGRAM_BINARIES, sizeof(void *), &deviceBinaryPtr, nullptr)) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } ze_module_desc_t module_desc; module_desc.version = ZE_MODULE_DESC_VERSION_CURRENT; module_desc.format = ZE_MODULE_FORMAT_NATIVE; module_desc.inputSize = deviceBinarySize; module_desc.pInputModule = deviceBinary.data(); module_desc.pBuildFlags = nullptr; return createModule(&module_desc, phModule, nullptr); } else { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } } ze_result_t DeviceImp::registerCLCommandQueue(cl_context context, cl_command_queue commandQueue, ze_command_queue_handle_t *phCommandQueue) { ze_command_queue_desc_t desc; desc.version = ZE_COMMAND_QUEUE_DESC_VERSION_CURRENT; desc.flags = ZE_COMMAND_QUEUE_FLAG_NONE; desc.mode = ZE_COMMAND_QUEUE_MODE_DEFAULT; desc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily; auto csr = neoDevice->getDefaultEngine().commandStreamReceiver; *phCommandQueue = CommandQueue::create(productFamily, this, csr, &desc); return ZE_RESULT_SUCCESS; } const NEO::HardwareInfo &DeviceImp::getHwInfo() const { return neoDevice->getHardwareInfo(); } bool DeviceImp::isMultiDeviceCapable() const { return neoDevice->getNumAvailableDevices() > 1u; } Device *Device::create(DriverHandle *driverHandle, NEO::Device *neoDevice) { auto device = new DeviceImp; UNRECOVERABLE_IF(device == nullptr); device->setDriverHandle(driverHandle); neoDevice->setSpecializedDevice(device); device->neoDevice = neoDevice; neoDevice->incRefInternal(); device->execEnvironment = (void *)neoDevice->getExecutionEnvironment(); device->metricContext = MetricContext::create(*device); device->builtins = BuiltinFunctionsLib::create( device, neoDevice->getBuiltIns()); device->maxNumHwThreads = NEO::HwHelper::getMaxThreadsForVfe(neoDevice->getHardwareInfo()); if (device->neoDevice->getNumAvailableDevices() == 1) { device->numSubDevices = 0; } else { device->numSubDevices = device->neoDevice->getNumAvailableDevices(); for (uint32_t i = 0; i < device->numSubDevices; i++) { ze_device_handle_t subDevice = Device::create(driverHandle, device->neoDevice->getDeviceById(i)); if (subDevice == nullptr) { return nullptr; } reinterpret_cast(subDevice)->isSubdevice = true; device->subDevices.push_back(static_cast(subDevice)); } } if (neoDevice->getCompilerInterface()) { device->getBuiltinFunctionsLib()->initFunctions(); device->getBuiltinFunctionsLib()->initPageFaultFunction(); } auto supportDualStorageSharedMemory = device->getDriverHandle()->getMemoryManager()->isLocalMemorySupported(device->neoDevice->getRootDeviceIndex()); if (NEO::DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.get() != -1) { supportDualStorageSharedMemory = NEO::DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.get(); } if (supportDualStorageSharedMemory) { ze_command_queue_desc_t cmdQueueDesc; cmdQueueDesc.version = ZE_COMMAND_QUEUE_DESC_VERSION_CURRENT; cmdQueueDesc.ordinal = 0; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; device->pageFaultCommandList = CommandList::createImmediate( device->neoDevice->getHardwareInfo().platform.eProductFamily, device, &cmdQueueDesc, true); } if (neoDevice->getDeviceInfo().debuggerActive) { auto osInterface = neoDevice->getRootDeviceEnvironment().osInterface.get(); auto debugSurface = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties( {device->getRootDeviceIndex(), NEO::SipKernel::maxDbgSurfaceSize, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY}); device->setDebugSurface(debugSurface); device->getSourceLevelDebugger() ->notifyNewDevice(osInterface ? osInterface->getDeviceHandle() : 0); } return device; } DeviceImp::~DeviceImp() { for (uint32_t i = 0; i < this->numSubDevices; i++) { delete this->subDevices[i]; } if (this->pageFaultCommandList) { this->pageFaultCommandList->destroy(); this->pageFaultCommandList = nullptr; } metricContext.reset(); builtins.reset(); if (neoDevice->getDeviceInfo().debuggerActive) { getSourceLevelDebugger()->notifyDeviceDestruction(); this->driverHandle->getMemoryManager()->freeGraphicsMemory(this->debugSurface); this->debugSurface = nullptr; } if (neoDevice) { neoDevice->decRefInternal(); } } NEO::PreemptionMode DeviceImp::getDevicePreemptionMode() const { return neoDevice->getPreemptionMode(); } const NEO::DeviceInfo &DeviceImp::getDeviceInfo() const { return neoDevice->getDeviceInfo(); } NEO::Device *DeviceImp::getNEODevice() { return neoDevice; } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/device/device_imp.h000066400000000000000000000126701363734646600254750ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/builtin/builtin_functions_lib.h" #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/driver/driver_handle.h" #include "level_zero/tools/source/metrics/metric.h" #include "level_zero/tools/source/tracing/tracing.h" namespace L0 { struct DeviceImp : public Device { uint32_t getRootDeviceIndex() override; ze_result_t canAccessPeer(ze_device_handle_t hPeerDevice, ze_bool_t *value) override; ze_result_t copyCommandList(ze_command_list_handle_t hCommandList, ze_command_list_handle_t *phCommandList) override; ze_result_t createCommandList(const ze_command_list_desc_t *desc, ze_command_list_handle_t *commandList) override; ze_result_t createCommandListImmediate(const ze_command_queue_desc_t *desc, ze_command_list_handle_t *phCommandList) override; ze_result_t createCommandQueue(const ze_command_queue_desc_t *desc, ze_command_queue_handle_t *commandQueue) override; ze_result_t createEventPool(const ze_event_pool_desc_t *desc, ze_event_pool_handle_t *eventPool) override; ze_result_t createImage(const ze_image_desc_t *desc, ze_image_handle_t *phImage) override; ze_result_t createModule(const ze_module_desc_t *desc, ze_module_handle_t *module, ze_module_build_log_handle_t *buildLog) override; ze_result_t createSampler(const ze_sampler_desc_t *pDesc, ze_sampler_handle_t *phSampler) override; ze_result_t evictImage(ze_image_handle_t hImage) override; ze_result_t evictMemory(void *ptr, size_t size) override; ze_result_t getComputeProperties(ze_device_compute_properties_t *pComputeProperties) override; ze_result_t getP2PProperties(ze_device_handle_t hPeerDevice, ze_device_p2p_properties_t *pP2PProperties) override; ze_result_t getKernelProperties(ze_device_kernel_properties_t *pKernelProperties) override; ze_result_t getMemoryProperties(uint32_t *pCount, ze_device_memory_properties_t *pMemProperties) override; ze_result_t getMemoryAccessProperties(ze_device_memory_access_properties_t *pMemAccessProperties) override; ze_result_t getProperties(ze_device_properties_t *pDeviceProperties) override; ze_result_t getSubDevices(uint32_t *pCount, ze_device_handle_t *phSubdevices) override; ze_result_t makeImageResident(ze_image_handle_t hImage) override; ze_result_t makeMemoryResident(void *ptr, size_t size) override; ze_result_t setIntermediateCacheConfig(ze_cache_config_t cacheConfig) override; ze_result_t setLastLevelCacheConfig(ze_cache_config_t cacheConfig) override; ze_result_t getCacheProperties(ze_device_cache_properties_t *pCacheProperties) override; ze_result_t imageGetProperties(const ze_image_desc_t *desc, ze_image_properties_t *pImageProperties) override; ze_result_t getDeviceImageProperties(ze_device_image_properties_t *pDeviceImageProperties) override; ze_result_t systemBarrier() override; void *getExecEnvironment() override; BuiltinFunctionsLib *getBuiltinFunctionsLib() override; uint32_t getMOCS(bool l3enabled, bool l1enabled) override; NEO::HwHelper &getHwHelper() override; bool isMultiDeviceCapable() const override; const NEO::HardwareInfo &getHwInfo() const override; NEO::OSInterface &getOsInterface() override; uint32_t getPlatformInfo() const override; MetricContext &getMetricContext() override; uint32_t getMaxNumHwThreads() const override; ze_result_t registerCLMemory(cl_context context, cl_mem mem, void **ptr) override; ze_result_t registerCLProgram(cl_context context, cl_program program, ze_module_handle_t *phModule) override; ze_result_t registerCLCommandQueue(cl_context context, cl_command_queue commandQueue, ze_command_queue_handle_t *phCommandQueue) override; ze_result_t activateMetricGroups(uint32_t count, zet_metric_group_handle_t *phMetricGroups) override; DriverHandle *getDriverHandle() override; void setDriverHandle(DriverHandle *driverHandle) override; NEO::PreemptionMode getDevicePreemptionMode() const override; const NEO::DeviceInfo &getDeviceInfo() const override; NEO::Device *getNEODevice() override; void activateMetricGroups() override; void processAdditionalKernelProperties(NEO::HwHelper &hwHelper, ze_device_kernel_properties_t *pKernelProperties); NEO::GraphicsAllocation *getDebugSurface() const override { return debugSurface; } void setDebugSurface(NEO::GraphicsAllocation *debugSurface) { this->debugSurface = debugSurface; }; ~DeviceImp() override; NEO::Device *neoDevice = nullptr; bool isSubdevice = false; void *execEnvironment = nullptr; std::unique_ptr builtins = nullptr; std::unique_ptr metricContext = nullptr; uint32_t maxNumHwThreads = 0; uint32_t numSubDevices = 0; std::vector subDevices; DriverHandle *driverHandle = nullptr; CommandList *pageFaultCommandList = nullptr; protected: NEO::GraphicsAllocation *debugSurface = nullptr; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/dll/000077500000000000000000000000001363734646600225265ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/dll/CMakeLists.txt000066400000000000000000000004041363734646600252640ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_DLL ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/disallow_deferred_deleter.cpp ) set_property(GLOBAL PROPERTY L0_SRCS_DLL ${L0_SRCS_DLL})compute-runtime-20.13.16352/level_zero/core/source/dll/disallow_deferred_deleter.cpp000066400000000000000000000003431363734646600304140ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/deferred_deleter_helper.h" bool NEO::isDeferredDeleterEnabled() { return false; } // namespace NEO compute-runtime-20.13.16352/level_zero/core/source/dll/linux/000077500000000000000000000000001363734646600236655ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/dll/linux/ze.exports000066400000000000000000000001571363734646600257340ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ { local: cl*; }; compute-runtime-20.13.16352/level_zero/core/source/driver/000077500000000000000000000000001363734646600232465ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/driver/driver.cpp000066400000000000000000000040631363734646600252500ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/driver/driver.h" #include "shared/source/device/device.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/os_interface/device_factory.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/driver/driver_handle.h" #include "level_zero/core/source/driver/driver_imp.h" #include #include namespace L0 { std::unique_ptr<_ze_driver_handle_t> GlobalDriver; uint32_t driverCount = 1; void DriverImp::initialize(bool *result) { *result = false; auto executionEnvironment = new NEO::ExecutionEnvironment(); UNRECOVERABLE_IF(nullptr == executionEnvironment); executionEnvironment->incRefInternal(); auto devices = NEO::DeviceFactory::createDevices(*executionEnvironment); executionEnvironment->decRefInternal(); if (!devices.empty()) { GlobalDriver.reset(DriverHandle::create(std::move(devices))); if (GlobalDriver != nullptr) { *result = true; } } } bool DriverImp::initStatus(false); ze_result_t DriverImp::driverInit(ze_init_flag_t flag) { std::call_once(initDriverOnce, [this]() { bool result; this->initialize(&result); initStatus = result; }); return ((initStatus) ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNINITIALIZED); } ze_result_t driverHandleGet(uint32_t *pCount, ze_driver_handle_t *phDriverHandles) { if (*pCount == 0) { *pCount = driverCount; return ZE_RESULT_SUCCESS; } if (*pCount > driverCount) { *pCount = driverCount; } if (phDriverHandles == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } for (uint32_t i = 0; i < *pCount; i++) { phDriverHandles[i] = GlobalDriver.get(); } return ZE_RESULT_SUCCESS; } static DriverImp driverImp; Driver *Driver::driver = &driverImp; ze_result_t init(ze_init_flag_t flag) { return Driver::get()->driverInit(flag); } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/driver/driver.h000066400000000000000000000011271363734646600247130ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include namespace L0 { struct Driver { virtual ze_result_t driverInit(_ze_init_flag_t) = 0; virtual void initialize(bool *result) = 0; static Driver *get() { return driver; } protected: static Driver *driver; }; ze_result_t init(_ze_init_flag_t); ze_result_t driverHandleGet(uint32_t *pCount, ze_driver_handle_t *phDrivers); extern uint32_t driverCount; extern std::unique_ptr<_ze_driver_handle_t> GlobalDriver; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/driver/driver_handle.h000066400000000000000000000077451363734646600262420ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/unified_memory_manager.h" #include "level_zero/core/source/device/device.h" #include struct _ze_driver_handle_t { virtual ~_ze_driver_handle_t() = default; }; namespace L0 { struct Device; struct DriverHandle : _ze_driver_handle_t { virtual ze_result_t getDevice(uint32_t *pCount, ze_device_handle_t *phDevices) = 0; virtual ze_result_t getProperties(ze_driver_properties_t *properties) = 0; virtual ze_result_t getApiVersion(ze_api_version_t *version) = 0; virtual ze_result_t getIPCProperties(ze_driver_ipc_properties_t *pIPCProperties) = 0; virtual ze_result_t getExtensionFunctionAddress(const char *pFuncName, void **pfunc) = 0; virtual ze_result_t getMemAllocProperties(const void *ptr, ze_memory_allocation_properties_t *pMemAllocProperties, ze_device_handle_t *phDevice) = 0; virtual ze_result_t allocHostMem(ze_host_mem_alloc_flag_t flags, size_t size, size_t alignment, void **ptr) = 0; virtual ze_result_t allocDeviceMem(ze_device_handle_t hDevice, ze_device_mem_alloc_flag_t flags, size_t size, size_t alignment, void **ptr) = 0; virtual ze_result_t allocSharedMem(ze_device_handle_t hDevice, ze_device_mem_alloc_flag_t deviceFlags, ze_host_mem_alloc_flag_t hostFlags, size_t size, size_t alignment, void **ptr) = 0; virtual ze_result_t freeMem(const void *ptr) = 0; virtual NEO::MemoryManager *getMemoryManager() = 0; virtual void setMemoryManager(NEO::MemoryManager *memoryManager) = 0; virtual ze_result_t getMemAddressRange(const void *ptr, void **pBase, size_t *pSize) = 0; virtual ze_result_t closeIpcMemHandle(const void *ptr) = 0; virtual ze_result_t getIpcMemHandle(const void *ptr, ze_ipc_mem_handle_t *pIpcHandle) = 0; virtual ze_result_t openIpcMemHandle(ze_device_handle_t hDevice, ze_ipc_mem_handle_t handle, ze_ipc_memory_flag_t flags, void **ptr) = 0; virtual ze_result_t createEventPool(const ze_event_pool_desc_t *desc, uint32_t numDevices, ze_device_handle_t *phDevices, ze_event_pool_handle_t *phEventPool) = 0; virtual ze_result_t openEventPoolIpcHandle(ze_ipc_event_pool_handle_t hIpc, ze_event_pool_handle_t *phEventPool) = 0; virtual ze_result_t checkMemoryAccessFromDevice(Device *device, const void *ptr) = 0; virtual NEO::GraphicsAllocation *allocateManagedMemoryFromHostPtr(Device *device, void *buffer, size_t size, struct CommandList *commandList) = 0; virtual NEO::GraphicsAllocation *allocateMemoryFromHostPtr(Device *device, const void *buffer, size_t size) = 0; virtual bool findAllocationDataForRange(const void *buffer, size_t size, NEO::SvmAllocationData **allocData) = 0; virtual std::vector findAllocationsWithinRange(const void *buffer, size_t size, bool *allocationRangeCovered) = 0; virtual NEO::SVMAllocsManager *getSvmAllocsManager() = 0; static DriverHandle *fromHandle(ze_driver_handle_t handle) { return static_cast(handle); } inline ze_driver_handle_t toHandle() { return this; } DriverHandle &operator=(const DriverHandle &) = delete; DriverHandle &operator=(DriverHandle &&) = delete; static DriverHandle *create(std::vector> devices); }; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/driver/driver_handle_imp.cpp000066400000000000000000000254371363734646600274400ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/driver/driver_handle_imp.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/os_library.h" #include "level_zero/core/source/device/device_imp.h" #include "driver_version_l0.h" #include #include namespace L0 { NEO::MemoryManager *DriverHandleImp::getMemoryManager() { return this->memoryManager; } void DriverHandleImp::setMemoryManager(NEO::MemoryManager *memoryManager) { this->memoryManager = memoryManager; } NEO::SVMAllocsManager *DriverHandleImp::getSvmAllocsManager() { return this->svmAllocsManager; } ze_result_t DriverHandleImp::getApiVersion(ze_api_version_t *version) { *version = ZE_API_VERSION_1_0; return ZE_RESULT_SUCCESS; } ze_result_t DriverHandleImp::getProperties(ze_driver_properties_t *properties) { uint32_t versionMajor = (uint32_t)strtoul(L0_PROJECT_VERSION_MAJOR, NULL, 16); uint32_t versionMinor = (uint32_t)strtoul(L0_PROJECT_VERSION_MINOR, NULL, 16); properties->driverVersion = ZE_MAKE_VERSION(versionMajor, versionMinor); return ZE_RESULT_SUCCESS; } ze_result_t DriverHandleImp::getIPCProperties(ze_driver_ipc_properties_t *pIPCProperties) { pIPCProperties->eventsSupported = false; pIPCProperties->memsSupported = true; return ZE_RESULT_SUCCESS; } inline ze_memory_type_t parseUSMType(InternalMemoryType memoryType) { switch (memoryType) { case InternalMemoryType::SHARED_UNIFIED_MEMORY: return ZE_MEMORY_TYPE_SHARED; case InternalMemoryType::DEVICE_UNIFIED_MEMORY: return ZE_MEMORY_TYPE_DEVICE; case InternalMemoryType::HOST_UNIFIED_MEMORY: return ZE_MEMORY_TYPE_HOST; default: return ZE_MEMORY_TYPE_UNKNOWN; } return ZE_MEMORY_TYPE_UNKNOWN; } ze_result_t DriverHandleImp::getExtensionFunctionAddress(const char *pFuncName, void **pfunc) { *pfunc = this->osLibrary->getProcAddress(std::string(pFuncName)); if (*pfunc == nullptr) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } return ZE_RESULT_SUCCESS; } ze_result_t DriverHandleImp::getMemAllocProperties(const void *ptr, ze_memory_allocation_properties_t *pMemAllocProperties, ze_device_handle_t *phDevice) { auto alloc = svmAllocsManager->getSVMAllocs()->get(ptr); if (alloc) { pMemAllocProperties->type = parseUSMType(alloc->memoryType); pMemAllocProperties->id = alloc->gpuAllocation->getGpuAddress(); return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_INVALID_ARGUMENT; } DriverHandleImp::~DriverHandleImp() { for (auto &device : this->devices) { delete device; } if (this->svmAllocsManager) { delete this->svmAllocsManager; this->svmAllocsManager = nullptr; } delete this->osLibrary; } ze_result_t DriverHandleImp::initialize(std::vector> devices) { this->memoryManager = devices[0]->getMemoryManager(); if (this->memoryManager == nullptr) { return ZE_RESULT_ERROR_UNINITIALIZED; } this->svmAllocsManager = new NEO::SVMAllocsManager(memoryManager); if (this->svmAllocsManager == nullptr) { return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; } this->numDevices = static_cast(devices.size()); for (auto &neoDevice : devices) { auto device = Device::create(this, neoDevice.release()); this->devices.push_back(device); } this->osLibrary = NEO::OsLibrary::load(""); if (this->osLibrary->isLoaded() == false) { return ZE_RESULT_ERROR_UNINITIALIZED; } return ZE_RESULT_SUCCESS; } DriverHandle *DriverHandle::create(std::vector> devices) { DriverHandleImp *driverHandle = new DriverHandleImp; UNRECOVERABLE_IF(nullptr == driverHandle); ze_result_t res = driverHandle->initialize(std::move(devices)); if (res != ZE_RESULT_SUCCESS) { delete driverHandle; return nullptr; } driverHandle->memoryManager->setForceNonSvmForExternalHostPtr(true); return driverHandle; } ze_result_t DriverHandleImp::getDevice(uint32_t *pCount, ze_device_handle_t *phDevices) { if (*pCount == 0) { *pCount = this->numDevices; return ZE_RESULT_SUCCESS; } if (phDevices == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } for (uint32_t i = 0; i < *pCount; i++) { phDevices[i] = this->devices[i]; } return ZE_RESULT_SUCCESS; } bool DriverHandleImp::findAllocationDataForRange(const void *buffer, size_t size, NEO::SvmAllocationData **allocData) { // Make sure the host buffer does not overlap any existing allocation const char *baseAddress = reinterpret_cast(buffer); NEO::SvmAllocationData *beginAllocData = svmAllocsManager->getSVMAllocs()->get(baseAddress); NEO::SvmAllocationData *endAllocData = svmAllocsManager->getSVMAllocs()->get(baseAddress + size - 1); if (allocData) { if (beginAllocData) { *allocData = beginAllocData; } else { *allocData = endAllocData; } } // Return true if the whole range requested is covered by the same allocation if (beginAllocData && endAllocData && (beginAllocData->gpuAllocation == endAllocData->gpuAllocation)) { return true; } return false; } std::vector DriverHandleImp::findAllocationsWithinRange(const void *buffer, size_t size, bool *allocationRangeCovered) { std::vector allocDataArray; const char *baseAddress = reinterpret_cast(buffer); // Check if the host buffer overlaps any existing allocation NEO::SvmAllocationData *beginAllocData = svmAllocsManager->getSVMAllocs()->get(baseAddress); NEO::SvmAllocationData *endAllocData = svmAllocsManager->getSVMAllocs()->get(baseAddress + size - 1); // Add the allocation that matches the beginning address if (beginAllocData) { allocDataArray.push_back(beginAllocData); } // Add the allocation that matches the end address range if there was no beginning allocation // or the beginning allocation does not match the ending allocation if (endAllocData) { if ((beginAllocData && (beginAllocData->gpuAllocation != endAllocData->gpuAllocation)) || !beginAllocData) { allocDataArray.push_back(endAllocData); } } // Return true if the whole range requested is covered by the same allocation if (beginAllocData && endAllocData && (beginAllocData->gpuAllocation == endAllocData->gpuAllocation)) { *allocationRangeCovered = true; } else { *allocationRangeCovered = false; } return allocDataArray; } NEO::GraphicsAllocation *DriverHandleImp::allocateManagedMemoryFromHostPtr(Device *device, void *buffer, size_t size, struct CommandList *commandList) { char *baseAddress = reinterpret_cast(buffer); NEO::GraphicsAllocation *allocation = nullptr; bool allocFound = false; std::vector allocDataArray = findAllocationsWithinRange(buffer, size, &allocFound); if (allocFound) { return allocDataArray[0]->gpuAllocation; } if (!allocDataArray.empty()) { UNRECOVERABLE_IF(commandList == nullptr); for (auto allocData : allocDataArray) { allocation = allocData->gpuAllocation; char *allocAddress = reinterpret_cast(allocation->getGpuAddress()); size_t allocSize = allocData->size; device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->remove(*allocData); memoryManager->freeGraphicsMemory(allocation); commandList->eraseDeallocationContainerEntry(allocation); commandList->eraseResidencyContainerEntry(allocation); if (allocAddress < baseAddress) { buffer = reinterpret_cast(allocAddress); baseAddress += size; size = ptrDiff(baseAddress, allocAddress); baseAddress = reinterpret_cast(buffer); } else { allocAddress += allocSize; baseAddress += size; if (allocAddress > baseAddress) { baseAddress = reinterpret_cast(buffer); size = ptrDiff(allocAddress, baseAddress); } else { baseAddress = reinterpret_cast(buffer); } } } } allocation = memoryManager->allocateGraphicsMemoryWithProperties( {0u, false, size, NEO::GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, false}, buffer); if (allocation == nullptr) { return allocation; } NEO::SvmAllocationData allocData; allocData.gpuAllocation = allocation; allocData.cpuAllocation = nullptr; allocData.size = size; allocData.memoryType = InternalMemoryType::NOT_SPECIFIED; allocData.device = nullptr; svmAllocsManager->getSVMAllocs()->insert(allocData); return allocation; } NEO::GraphicsAllocation *DriverHandleImp::allocateMemoryFromHostPtr(Device *device, const void *buffer, size_t size) { NEO::AllocationProperties properties = {0u, false, size, NEO::GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR, false}; properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = true; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, buffer); UNRECOVERABLE_IF(allocation == nullptr); return allocation; } ze_result_t DriverHandleImp::createEventPool(const ze_event_pool_desc_t *desc, uint32_t numDevices, ze_device_handle_t *phDevices, ze_event_pool_handle_t *phEventPool) { auto device = Device::fromHandle(phDevices[0]); return device->createEventPool(desc, phEventPool); } ze_result_t DriverHandleImp::openEventPoolIpcHandle(ze_ipc_event_pool_handle_t hIpc, ze_event_pool_handle_t *phEventPool) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/driver/driver_handle_imp.h000066400000000000000000000072611363734646600271000ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/os_library.h" #include "level_zero/core/source/driver/driver_handle.h" #include "level_zero/tools/source/tracing/tracing.h" namespace L0 { struct DriverHandleImp : public DriverHandle { ~DriverHandleImp() override; ze_result_t getDevice(uint32_t *pCount, ze_device_handle_t *phDevices) override; ze_result_t getProperties(ze_driver_properties_t *properties) override; ze_result_t getApiVersion(ze_api_version_t *version) override; ze_result_t getIPCProperties(ze_driver_ipc_properties_t *pIPCProperties) override; ze_result_t getExtensionFunctionAddress(const char *pFuncName, void **pfunc) override; ze_result_t getMemAllocProperties(const void *ptr, ze_memory_allocation_properties_t *pMemAllocProperties, ze_device_handle_t *phDevice) override; ze_result_t allocHostMem(ze_host_mem_alloc_flag_t flags, size_t size, size_t alignment, void **ptr) override; ze_result_t allocDeviceMem(ze_device_handle_t hDevice, ze_device_mem_alloc_flag_t flags, size_t size, size_t alignment, void **ptr) override; ze_result_t allocSharedMem(ze_device_handle_t hDevice, ze_device_mem_alloc_flag_t deviceFlags, ze_host_mem_alloc_flag_t hostFlags, size_t size, size_t alignment, void **ptr) override; ze_result_t getMemAddressRange(const void *ptr, void **pBase, size_t *pSize) override; ze_result_t freeMem(const void *ptr) override; NEO::MemoryManager *getMemoryManager() override; void setMemoryManager(NEO::MemoryManager *memoryManager) override; ze_result_t closeIpcMemHandle(const void *ptr) override; ze_result_t getIpcMemHandle(const void *ptr, ze_ipc_mem_handle_t *pIpcHandle) override; ze_result_t openIpcMemHandle(ze_device_handle_t hDevice, ze_ipc_mem_handle_t handle, ze_ipc_memory_flag_t flags, void **ptr) override; ze_result_t createEventPool(const ze_event_pool_desc_t *desc, uint32_t numDevices, ze_device_handle_t *phDevices, ze_event_pool_handle_t *phEventPool) override; ze_result_t openEventPoolIpcHandle(ze_ipc_event_pool_handle_t hIpc, ze_event_pool_handle_t *phEventPool) override; ze_result_t checkMemoryAccessFromDevice(Device *device, const void *ptr) override; NEO::SVMAllocsManager *getSvmAllocsManager() override; ze_result_t initialize(std::vector> devices); NEO::GraphicsAllocation *allocateManagedMemoryFromHostPtr(Device *device, void *buffer, size_t size, struct CommandList *commandList) override; NEO::GraphicsAllocation *allocateMemoryFromHostPtr(Device *device, const void *buffer, size_t size) override; bool findAllocationDataForRange(const void *buffer, size_t size, NEO::SvmAllocationData **allocData) override; std::vector findAllocationsWithinRange(const void *buffer, size_t size, bool *allocationRangeCovered) override; uint32_t numDevices = 0; std::vector devices; NEO::MemoryManager *memoryManager = nullptr; NEO::SVMAllocsManager *svmAllocsManager = nullptr; NEO::OsLibrary *osLibrary = nullptr; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/driver/driver_imp.h000066400000000000000000000006571363734646600255670ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/driver/driver.h" #include namespace L0 { class DriverImp : public Driver { public: ze_result_t driverInit(_ze_init_flag_t) override; void initialize(bool *result) override; protected: std::once_flag initDriverOnce; static bool initStatus; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/driver/driver_version.h.in000066400000000000000000000003441363734646600270650ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #ifndef L0_DRIVER_VERSION_H #define L0_DRIVER_VERSION_H #define L0_DRIVER_VERSION @L0_DRIVER_VERSION@ #endif /* L0_DRIVER_VERSION_H */ compute-runtime-20.13.16352/level_zero/core/source/event/000077500000000000000000000000001363734646600230745ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/event/event.cpp000066400000000000000000000315221363734646600247240ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/event/event.h" #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/memory_constants.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/memory_operations_handler.h" #include "shared/source/utilities/cpuintrinsics.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/tools/source/metrics/metric.h" #include #include namespace L0 { struct EventImp : public Event { EventImp(EventPool *eventPool, int index, Device *device) : device(device), eventPool(eventPool) {} ~EventImp() override {} ze_result_t hostSignal() override; ze_result_t hostSynchronize(uint32_t timeout) override; ze_result_t queryStatus() override { uint64_t *hostAddr = static_cast(hostAddress); auto alloc = &(this->eventPool->getAllocation()); auto csr = static_cast(this->device)->neoDevice->getDefaultEngine().commandStreamReceiver; if (metricTracer != nullptr) { *hostAddr = metricTracer->getNotificationState(); } csr->downloadAllocation(*alloc); if (isTimestampEvent) { auto baseAddr = reinterpret_cast(hostAddress); auto timeStampAddress = baseAddr + getOffsetOfEventTimestampRegister(Event::CONTEXT_END); hostAddr = reinterpret_cast(timeStampAddress); } return *hostAddr == Event::STATE_CLEARED ? ZE_RESULT_NOT_READY : ZE_RESULT_SUCCESS; } ze_result_t reset() override; ze_result_t getTimestamp(ze_event_timestamp_type_t timestampType, void *dstptr) override; Device *device; EventPool *eventPool; protected: ze_result_t hostEventSetValue(uint64_t eventValue); ze_result_t hostEventSetValueTimestamps(uint64_t eventVal); void makeAllocationResident(); }; struct EventPoolImp : public EventPool { EventPoolImp(Device *device, uint32_t count, ze_event_pool_flag_t flags) : device(device), count(count) { pool = std::vector(this->count); eventPoolUsedCount = 0; for (uint32_t i = 0; i < count; i++) { pool[i] = EventPool::EVENT_STATE_INITIAL; } auto timestampMultiplier = 1; if (flags == ZE_EVENT_POOL_FLAG_TIMESTAMP) { isEventPoolUsedForTimestamp = true; timestampMultiplier = numEventTimestampsToRead; } NEO::AllocationProperties properties( device->getRootDeviceIndex(), count * eventSize * timestampMultiplier, NEO::GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY); properties.alignment = MemoryConstants::cacheLineSize; eventPoolAllocation = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); UNRECOVERABLE_IF(eventPoolAllocation == nullptr); } ~EventPoolImp() override { device->getDriverHandle()->getMemoryManager()->freeGraphicsMemory(eventPoolAllocation); eventPoolAllocation = nullptr; eventTracker.clear(); } ze_result_t destroy() override; size_t getPoolSize() override { return this->pool.size(); } uint32_t getPoolUsedCount() override { return eventPoolUsedCount; } ze_result_t getIpcHandle(ze_ipc_event_pool_handle_t *pIpcHandle) override; ze_result_t closeIpcHandle() override; ze_result_t createEvent(const ze_event_desc_t *desc, ze_event_handle_t *phEvent) override { if (desc->index > (this->getPoolSize() - 1)) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } if ((this->getPoolUsedCount() + 1) > this->getPoolSize()) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } *phEvent = Event::create(this, desc, this->getDevice()); return ZE_RESULT_SUCCESS; } ze_result_t reserveEventFromPool(int index, Event *event) override; ze_result_t releaseEventToPool(Event *event) override; uint32_t getEventSize() override { return eventSize; } uint32_t getNumEventTimestampsToRead() override { return numEventTimestampsToRead; } ze_result_t destroyPool() { if (eventPoolUsedCount != 0) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } pool.clear(); return ZE_RESULT_SUCCESS; } Device *getDevice() override { return device; } Device *device; uint32_t count; uint32_t eventPoolUsedCount; std::vector pool; std::unordered_map eventTracker; std::queue lastEventPoolOffsetUsed; protected: const uint32_t eventSize = 16u; const uint32_t eventAlignment = MemoryConstants::cacheLineSize; const int32_t numEventTimestampsToRead = 5u; }; Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *device) { auto event = new EventImp(eventPool, desc->index, device); UNRECOVERABLE_IF(event == nullptr); eventPool->reserveEventFromPool(desc->index, static_cast(event)); if (eventPool->isEventPoolUsedForTimestamp) { event->isTimestampEvent = true; } event->signalScope = desc->signal; event->waitScope = desc->wait; event->reset(); return event; } NEO::GraphicsAllocation &Event::getAllocation() { auto eventImp = static_cast(this); return eventImp->eventPool->getAllocation(); } uint64_t Event::getOffsetOfEventTimestampRegister(uint32_t eventTimestampReg) { auto eventImp = static_cast(this); auto eventSize = eventImp->eventPool->getEventSize(); return (eventTimestampReg * eventSize); } ze_result_t Event::destroy() { auto eventImp = static_cast(this); if (eventImp->eventPool->releaseEventToPool(this)) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } delete this; return ZE_RESULT_SUCCESS; } void EventImp::makeAllocationResident() { auto deviceImp = static_cast(this->device); NEO::MemoryOperationsHandler *memoryOperationsIface = deviceImp->neoDevice->getRootDeviceEnvironment().memoryOperationsInterface.get(); if (memoryOperationsIface) { auto alloc = &(this->eventPool->getAllocation()); memoryOperationsIface->makeResident(ArrayRef(&alloc, 1)); } } ze_result_t EventImp::hostEventSetValueTimestamps(uint64_t eventVal) { for (uint32_t i = 0; i < this->eventPool->getNumEventTimestampsToRead(); i++) { auto baseAddr = reinterpret_cast(hostAddress); auto timeStampAddress = baseAddr + getOffsetOfEventTimestampRegister(i); auto tsptr = reinterpret_cast(timeStampAddress); *(tsptr) = eventVal; if (this->signalScope != ZE_EVENT_SCOPE_FLAG_NONE) { NEO::CpuIntrinsics::clFlush(tsptr); } } makeAllocationResident(); return ZE_RESULT_SUCCESS; } ze_result_t EventImp::hostEventSetValue(uint64_t eventVal) { if (isTimestampEvent) { hostEventSetValueTimestamps(eventVal); } auto hostAddr = static_cast(hostAddress); UNRECOVERABLE_IF(hostAddr == nullptr); *(hostAddr) = eventVal; makeAllocationResident(); if (this->signalScope != ZE_EVENT_SCOPE_FLAG_NONE) { NEO::CpuIntrinsics::clFlush(hostAddr); } return ZE_RESULT_SUCCESS; } ze_result_t EventImp::hostSignal() { return hostEventSetValue(Event::STATE_SIGNALED); } ze_result_t EventImp::hostSynchronize(uint32_t timeout) { std::chrono::high_resolution_clock::time_point time1, time2; int64_t timeDiff = 0; ze_result_t ret = ZE_RESULT_NOT_READY; auto csr = static_cast(this->device)->neoDevice->getDefaultEngine().commandStreamReceiver; if (csr->getType() == NEO::CommandStreamReceiverType::CSR_AUB) { return ZE_RESULT_SUCCESS; } if (timeout == 0) { return queryStatus(); } time1 = std::chrono::high_resolution_clock::now(); while (true) { ret = queryStatus(); if (ret == ZE_RESULT_SUCCESS) { return ZE_RESULT_SUCCESS; } std::this_thread::yield(); NEO::CpuIntrinsics::pause(); if (timeout == std::numeric_limits::max()) { continue; } time2 = std::chrono::high_resolution_clock::now(); timeDiff = std::chrono::duration_cast(time2 - time1).count(); if (timeDiff >= timeout) { break; } } return ret; } ze_result_t EventImp::reset() { return hostEventSetValue(Event::STATE_INITIAL); } ze_result_t EventImp::getTimestamp(ze_event_timestamp_type_t timestampType, void *dstptr) { auto baseAddr = reinterpret_cast(hostAddress); uint64_t *tsptr = nullptr; uint64_t tsData = Event::STATE_INITIAL; constexpr uint64_t tsMask = (1ull << 32) - 1; if (!this->isTimestampEvent) return ZE_RESULT_ERROR_INVALID_ARGUMENT; // Ensure timestamps have been written if (queryStatus() != ZE_RESULT_SUCCESS) { memcpy_s(dstptr, sizeof(uint64_t), static_cast(&tsData), sizeof(uint64_t)); return ZE_RESULT_SUCCESS; } if (timestampType == ZE_EVENT_TIMESTAMP_GLOBAL_START) { tsptr = reinterpret_cast(baseAddr + getOffsetOfEventTimestampRegister(Event::GLOBAL_START_LOW)); auto tsptrUpper = reinterpret_cast(baseAddr + getOffsetOfEventTimestampRegister(Event::GLOBAL_START_HIGH)); tsData = ((*tsptrUpper & tsMask) << 32) | (*tsptr & tsMask); memcpy_s(dstptr, sizeof(uint64_t), static_cast(&tsData), sizeof(uint64_t)); return ZE_RESULT_SUCCESS; } if (timestampType == ZE_EVENT_TIMESTAMP_GLOBAL_END) { tsptr = reinterpret_cast(baseAddr + getOffsetOfEventTimestampRegister(Event::GLOBAL_END)); } else if (timestampType == ZE_EVENT_TIMESTAMP_CONTEXT_START) { tsptr = reinterpret_cast(baseAddr + getOffsetOfEventTimestampRegister(Event::CONTEXT_START)); } else { tsptr = reinterpret_cast(baseAddr + getOffsetOfEventTimestampRegister(Event::CONTEXT_END)); } tsData = (*tsptr & tsMask); memcpy_s(dstptr, sizeof(uint64_t), static_cast(&tsData), sizeof(uint64_t)); return ZE_RESULT_SUCCESS; } EventPool *EventPool::create(Device *device, const ze_event_pool_desc_t *desc) { auto eventPool = new EventPoolImp(device, desc->count, desc->flags); UNRECOVERABLE_IF(eventPool == nullptr); return eventPool; } ze_result_t EventPoolImp::reserveEventFromPool(int index, Event *event) { if (pool[index] == EventPool::EVENT_STATE_CREATED) { return ZE_RESULT_SUCCESS; } pool[index] = EventPool::EVENT_STATE_CREATED; eventTracker.insert(std::pair(event, index)); if (lastEventPoolOffsetUsed.empty()) { event->offsetUsed = index; } else { event->offsetUsed = lastEventPoolOffsetUsed.front(); lastEventPoolOffsetUsed.pop(); } auto timestampMultiplier = 1; if (static_cast(this)->isEventPoolUsedForTimestamp) { timestampMultiplier = numEventTimestampsToRead; } uint64_t baseHostAddr = reinterpret_cast(eventPoolAllocation->getUnderlyingBuffer()); event->hostAddress = reinterpret_cast(baseHostAddr + (event->offsetUsed * eventSize * timestampMultiplier)); event->gpuAddress = eventPoolAllocation->getGpuAddress() + (event->offsetUsed * eventSize * timestampMultiplier); eventPoolUsedCount++; return ZE_RESULT_SUCCESS; } ze_result_t EventPoolImp::releaseEventToPool(Event *event) { UNRECOVERABLE_IF(event == nullptr); if (eventTracker.find(event) == eventTracker.end()) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } int index = eventTracker[event]; pool[index] = EventPool::EVENT_STATE_DESTROYED; eventTracker.erase(event); event->hostAddress = nullptr; event->gpuAddress = 0; lastEventPoolOffsetUsed.push(event->offsetUsed); event->offsetUsed = -1; eventPoolUsedCount--; return ZE_RESULT_SUCCESS; } ze_result_t EventPoolImp::getIpcHandle(ze_ipc_event_pool_handle_t *pIpcHandle) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t EventPoolImp::closeIpcHandle() { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t EventPoolImp::destroy() { if (this->destroyPool()) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } delete this; return ZE_RESULT_SUCCESS; } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/event/event.h000066400000000000000000000064061363734646600243740ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/driver/driver_handle.h" #include struct _ze_event_handle_t {}; struct _ze_event_pool_handle_t {}; namespace L0 { typedef uint64_t FlushStamp; struct EventPool; struct MetricTracer; struct Event : _ze_event_handle_t { virtual ~Event() = default; virtual ze_result_t destroy(); virtual ze_result_t hostSignal() = 0; virtual ze_result_t hostSynchronize(uint32_t timeout) = 0; virtual ze_result_t queryStatus() = 0; virtual ze_result_t reset() = 0; virtual ze_result_t getTimestamp(ze_event_timestamp_type_t timestampType, void *dstptr) = 0; enum State : uint64_t { STATE_SIGNALED = 0u, STATE_CLEARED = static_cast(-1), STATE_INITIAL = STATE_CLEARED }; enum EventTimestampRegister : uint32_t { GLOBAL_START_LOW = 0u, GLOBAL_START_HIGH, GLOBAL_END, CONTEXT_START, CONTEXT_END }; static Event *create(EventPool *eventPool, const ze_event_desc_t *desc, Device *device); static Event *fromHandle(ze_event_handle_t handle) { return static_cast(handle); } inline ze_event_handle_t toHandle() { return this; } NEO::GraphicsAllocation &getAllocation(); uint64_t getGpuAddress() { return gpuAddress; } uint64_t getOffsetOfEventTimestampRegister(uint32_t eventTimestampReg); void *hostAddress = nullptr; uint64_t gpuAddress; int offsetUsed = -1; ze_event_scope_flag_t signalScope; // Saving scope for use later ze_event_scope_flag_t waitScope; bool isTimestampEvent = false; // Metric tracer instance associated with the event. MetricTracer *metricTracer = nullptr; protected: NEO::GraphicsAllocation *allocation = nullptr; }; struct EventPool : _ze_event_pool_handle_t { static EventPool *create(Device *device, const ze_event_pool_desc_t *desc); virtual ~EventPool() = default; virtual ze_result_t destroy() = 0; virtual size_t getPoolSize() = 0; virtual uint32_t getPoolUsedCount() = 0; virtual ze_result_t getIpcHandle(ze_ipc_event_pool_handle_t *pIpcHandle) = 0; virtual ze_result_t closeIpcHandle() = 0; virtual ze_result_t createEvent(const ze_event_desc_t *desc, ze_event_handle_t *phEvent) = 0; virtual ze_result_t reserveEventFromPool(int index, Event *event) = 0; virtual ze_result_t releaseEventToPool(Event *event) = 0; virtual Device *getDevice() = 0; enum EventCreationState : int { EVENT_STATE_INITIAL = 0, EVENT_STATE_DESTROYED = EVENT_STATE_INITIAL, EVENT_STATE_CREATED = 1 }; static EventPool *fromHandle(ze_event_pool_handle_t handle) { return static_cast(handle); } inline ze_event_pool_handle_t toHandle() { return this; } NEO::GraphicsAllocation &getAllocation() { return *eventPoolAllocation; } virtual uint32_t getEventSize() = 0; virtual uint32_t getNumEventTimestampsToRead() = 0; bool isEventPoolUsedForTimestamp = false; protected: NEO::GraphicsAllocation *eventPoolAllocation = nullptr; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/fence/000077500000000000000000000000001363734646600230335ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/fence/fence.cpp000066400000000000000000000065721363734646600246310ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/fence/fence.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/memory_manager/memory_constants.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/utilities/cpuintrinsics.h" #include "hw_helpers.h" namespace L0 { struct FenceImp : public Fence { FenceImp(CommandQueueImp *cmdQueueImp) : cmdQueue(cmdQueueImp) {} ~FenceImp() override { cmdQueue->getDevice()->getDriverHandle()->getMemoryManager()->freeGraphicsMemory(allocation); allocation = nullptr; } ze_result_t destroy() override { delete this; return ZE_RESULT_SUCCESS; } ze_result_t hostSynchronize(uint32_t timeout) override; ze_result_t queryStatus() override { auto csr = cmdQueue->getCsr(); if (csr) { csr->downloadAllocation(*allocation); } auto hostAddr = static_cast(allocation->getUnderlyingBuffer()); return *hostAddr == Fence::STATE_CLEARED ? ZE_RESULT_NOT_READY : ZE_RESULT_SUCCESS; } ze_result_t reset() override; static Fence *fromHandle(ze_fence_handle_t handle) { return static_cast(handle); } inline ze_fence_handle_t toHandle() { return this; } bool initialize(); protected: CommandQueueImp *cmdQueue; }; Fence *Fence::create(CommandQueueImp *cmdQueue, const ze_fence_desc_t *desc) { auto fence = new FenceImp(cmdQueue); UNRECOVERABLE_IF(fence == nullptr); fence->initialize(); return fence; } bool FenceImp::initialize() { NEO::AllocationProperties properties( cmdQueue->getDevice()->getRootDeviceIndex(), MemoryConstants::cacheLineSize, NEO::GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY); properties.alignment = MemoryConstants::cacheLineSize; allocation = cmdQueue->getDevice()->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); UNRECOVERABLE_IF(allocation == nullptr); reset(); return true; } ze_result_t FenceImp::reset() { auto hostAddress = static_cast(allocation->getUnderlyingBuffer()); *(hostAddress) = Fence::STATE_CLEARED; NEO::CpuIntrinsics::clFlush(hostAddress); return ZE_RESULT_SUCCESS; } ze_result_t FenceImp::hostSynchronize(uint32_t timeout) { std::chrono::high_resolution_clock::time_point time1, time2; int64_t timeDiff = 0; ze_result_t ret = ZE_RESULT_NOT_READY; if (cmdQueue->getCsr()->getType() == NEO::CommandStreamReceiverType::CSR_AUB) { return ZE_RESULT_SUCCESS; } waitForTaskCountWithKmdNotifyFallbackHelper(cmdQueue->getCsr(), cmdQueue->getTaskCount(), 0, false, false); if (timeout == 0) { return queryStatus(); } time1 = std::chrono::high_resolution_clock::now(); while (timeDiff < timeout) { ret = queryStatus(); if (ret == ZE_RESULT_SUCCESS) { return ZE_RESULT_SUCCESS; } std::this_thread::yield(); NEO::CpuIntrinsics::pause(); if (timeout == std::numeric_limits::max()) { continue; } time2 = std::chrono::high_resolution_clock::now(); timeDiff = std::chrono::duration_cast(time2 - time1).count(); } return ret; } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/fence/fence.h000066400000000000000000000026241363734646600242700ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/csr_definitions.h" #include "level_zero/core/source/cmdqueue/cmdqueue.h" #include "level_zero/core/source/cmdqueue/cmdqueue_imp.h" #include #include struct _ze_fence_handle_t {}; namespace L0 { struct Fence : _ze_fence_handle_t { static Fence *create(CommandQueueImp *cmdQueue, const ze_fence_desc_t *desc); virtual ~Fence() = default; virtual ze_result_t destroy() = 0; virtual ze_result_t hostSynchronize(uint32_t timeout) = 0; virtual ze_result_t queryStatus() = 0; virtual ze_result_t reset() = 0; static Fence *fromHandle(ze_fence_handle_t handle) { return static_cast(handle); } inline ze_fence_handle_t toHandle() { return this; } enum State : uint32_t { STATE_SIGNALED = 0u, STATE_CLEARED = std::numeric_limits::max(), STATE_INITIAL = STATE_CLEARED }; enum EnqueueState : uint32_t { ENQUEUE_NOT_READY = 0u, ENQUEUE_READY }; NEO::GraphicsAllocation &getAllocation() { return *allocation; } uint64_t getGpuAddress() { UNRECOVERABLE_IF(allocation == nullptr); return allocation->getGpuAddress(); } protected: NEO::GraphicsAllocation *allocation = nullptr; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen11/000077500000000000000000000000001363734646600226665ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/gen11/CMakeLists.txt000066400000000000000000000007421363734646600254310ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_GEN11) set(HW_SOURCES_GEN11 ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_gen11.inl ${CMAKE_CURRENT_SOURCE_DIR}/enable_family_full_l0_gen11.cpp ) add_subdirectories() target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${HW_SOURCES_GEN11}) set_property(GLOBAL PROPERTY L0_HW_SOURCES_GEN11 ${HW_SOURCES_GEN11}) endif() compute-runtime-20.13.16352/level_zero/core/source/gen11/cmdlist_gen11.inl000066400000000000000000000011541363734646600260250ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ namespace L0 { template void CommandListCoreFamily::applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges) { NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), true); } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen11/enable_family_full_l0_gen11.cpp000066400000000000000000000013541363734646600305740ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" #include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h" #include "opencl/source/mem_obj/buffer.h" namespace NEO { typedef ICLFamily Family; struct EnableL0Gen11 { EnableL0Gen11() { populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); } }; static EnableL0Gen11 enable; } // namespace NEO compute-runtime-20.13.16352/level_zero/core/source/gen11/icllp/000077500000000000000000000000001363734646600237715ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/gen11/icllp/CMakeLists.txt000066400000000000000000000007731363734646600265400ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_ICLLP) set(HW_SOURCES_GEN11 ${HW_SOURCES_GEN11} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_icllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_icllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_icllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_icllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_icllp.cpp PARENT_SCOPE ) endif() compute-runtime-20.13.16352/level_zero/core/source/gen11/icllp/cmdlist_icllp.cpp000066400000000000000000000023571363734646600273260ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen11/hw_cmds.h" #include "shared/source/gen11/hw_info.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw_base.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl" #include "level_zero/core/source/gen11/cmdlist_gen11.inl" #include "cmdlist_extended.inl" #include "igfxfmid.h" namespace L0 { template struct CommandListCoreFamily; template struct CommandListCoreFamilyImmediate; template <> struct CommandListProductFamily : public CommandListCoreFamily { using CommandListCoreFamily::CommandListCoreFamily; }; static CommandListPopulateFactory> populateICLLP; template <> struct CommandListImmediateProductFamily : public CommandListCoreFamilyImmediate { using CommandListCoreFamilyImmediate::CommandListCoreFamilyImmediate; }; static CommandListImmediatePopulateFactory> populateICLLPImmediate; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen11/icllp/cmdqueue_icllp.cpp000066400000000000000000000007511363734646600274730ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen11/hw_cmds.h" #include "shared/source/gen11/hw_info.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl" #include "level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl" #include "cmdqueue_extended.inl" #include "igfxfmid.h" namespace L0 { static CommandQueuePopulateFactory> populateICLLP; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen11/icllp/image_icllp.cpp000066400000000000000000000012321363734646600267400ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen11/hw_cmds.h" #include "shared/source/gen11/hw_info.h" #include "level_zero/core/source/image/image_hw.inl" namespace L0 { template <> struct ImageProductFamily : public ImageCoreFamily { using ImageCoreFamily::ImageCoreFamily; bool initialize(Device *device, const ze_image_desc_t *desc) override { return ImageCoreFamily::initialize(device, desc); }; }; static ImagePopulateFactory> populateICLLP; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen11/icllp/kernel_icllp.cpp000066400000000000000000000004141363734646600271370ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/kernel/kernel_hw.h" namespace L0 { static KernelPopulateFactory> populateICLLP; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen11/icllp/sampler_icllp.cpp000066400000000000000000000010131363734646600273160ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen11/hw_cmds.h" #include "shared/source/gen11/hw_info.h" #include "level_zero/core/source/sampler/sampler_hw.inl" namespace L0 { template <> struct SamplerProductFamily : public SamplerCoreFamily { using SamplerCoreFamily::SamplerCoreFamily; }; static SamplerPopulateFactory> populateICLLP; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen12lp/000077500000000000000000000000001363734646600232235ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/gen12lp/CMakeLists.txt000066400000000000000000000013141363734646600257620ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_GEN12LP) set(HW_SOURCES_GEN12LP ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_gen12lp.h ${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/cache_flush_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/enable_family_full_l0_gen12lp.cpp ) add_subdirectories() target_include_directories(${L0_STATIC_LIB_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${HW_SOURCES_GEN12LP}) set_property(GLOBAL APPEND PROPERTY L0_HW_SOURCES_GEN12LP ${HW_SOURCES_GEN12LP}) endif() compute-runtime-20.13.16352/level_zero/core/source/gen12lp/cmdlist_gen12lp.h000066400000000000000000000012321363734646600263610ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" namespace L0 { template struct CommandListProductFamily : public CommandListCoreFamily { using CommandListCoreFamily::CommandListCoreFamily; }; template struct CommandListImmediateProductFamily : public CommandListCoreFamilyImmediate { using CommandListCoreFamilyImmediate::CommandListCoreFamilyImmediate; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen12lp/definitions/000077500000000000000000000000001363734646600255365ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/gen12lp/definitions/cache_flush_gen12lp.inl000066400000000000000000000012371363734646600320410ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/cmdlist/cmdlist_hw.h" namespace L0 { template void CommandListCoreFamily::applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges) { NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), true); } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen12lp/enable_family_full_l0_gen12lp.cpp000066400000000000000000000013641363734646600314670ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" #include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h" #include "opencl/source/mem_obj/buffer.h" namespace NEO { typedef TGLLPFamily Family; struct EnableL0Gen12LP { EnableL0Gen12LP() { populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); } }; static EnableL0Gen12LP enable; } // namespace NEO compute-runtime-20.13.16352/level_zero/core/source/gen12lp/tgllp/000077500000000000000000000000001363734646600243455ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/gen12lp/tgllp/CMakeLists.txt000066400000000000000000000007771363734646600271200ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_TGLLP) set(HW_SOURCES_GEN12LP ${HW_SOURCES_GEN12LP} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_tgllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_tgllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_tgllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_tgllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_tgllp.cpp PARENT_SCOPE ) endif() compute-runtime-20.13.16352/level_zero/core/source/gen12lp/tgllp/cmdlist_tgllp.cpp000066400000000000000000000015451363734646600277170ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_cmds.h" #include "shared/source/gen12lp/hw_info.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw_base.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl" #include "level_zero/core/source/gen12lp/cmdlist_gen12lp.h" #include "cache_flush_gen12lp.inl" #include "cmdlist_extended.inl" #include "igfxfmid.h" namespace L0 { template struct CommandListCoreFamily; static CommandListPopulateFactory> populateTGLLP; static CommandListImmediatePopulateFactory> populateTGLLPImmediate; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen12lp/tgllp/cmdqueue_tgllp.cpp000066400000000000000000000007651363734646600300730ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_cmds.h" #include "shared/source/gen12lp/hw_info.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl" #include "level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl" #include "cmdqueue_extended.inl" #include "igfxfmid.h" namespace L0 { static CommandQueuePopulateFactory> populateTGLLP; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen12lp/tgllp/image_tgllp.cpp000066400000000000000000000012501363734646600273330ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_cmds.h" #include "shared/source/gen12lp/hw_info.h" #include "level_zero/core/source/image/image_hw.inl" namespace L0 { template <> struct ImageProductFamily : public ImageCoreFamily { using ImageCoreFamily::ImageCoreFamily; bool initialize(Device *device, const ze_image_desc_t *desc) override { return ImageCoreFamily::initialize(device, desc); }; }; static ImagePopulateFactory> populateTGLLP; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen12lp/tgllp/kernel_tgllp.cpp000066400000000000000000000004201363734646600275270ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/kernel/kernel_hw.h" namespace L0 { static KernelPopulateFactory> populateTGLLP; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen12lp/tgllp/sampler_tgllp.cpp000066400000000000000000000010271363734646600277160ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_cmds.h" #include "shared/source/gen12lp/hw_info.h" #include "level_zero/core/source/sampler/sampler_hw.inl" namespace L0 { template <> struct SamplerProductFamily : public SamplerCoreFamily { using SamplerCoreFamily::SamplerCoreFamily; }; static SamplerPopulateFactory> populateTGLLP; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen8/000077500000000000000000000000001363734646600226145ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/gen8/CMakeLists.txt000066400000000000000000000006151363734646600253560ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_GEN8) set(HW_SOURCES_GEN8 ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/enable_family_full_l0_gen8.cpp ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${HW_SOURCES_GEN8}) set_property(GLOBAL PROPERTY L0_HW_SOURCES_GEN8 ${HW_SOURCES_GEN8}) endif() compute-runtime-20.13.16352/level_zero/core/source/gen8/enable_family_full_l0_gen8.cpp000066400000000000000000000013511363734646600304450ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" #include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h" #include "opencl/source/mem_obj/buffer.h" namespace NEO { typedef BDWFamily Family; struct EnableL0Gen8 { EnableL0Gen8() { populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); } }; static EnableL0Gen8 enable; } // namespace NEO compute-runtime-20.13.16352/level_zero/core/source/gen9/000077500000000000000000000000001363734646600226155ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/gen9/CMakeLists.txt000066400000000000000000000007341363734646600253610ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_GEN9) set(HW_SOURCES_GEN9 ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_gen9.inl ${CMAKE_CURRENT_SOURCE_DIR}/enable_family_full_l0_gen9.cpp ) add_subdirectories() target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${HW_SOURCES_GEN9}) set_property(GLOBAL PROPERTY L0_HW_SOURCES_GEN9 ${HW_SOURCES_GEN9}) endif() compute-runtime-20.13.16352/level_zero/core/source/gen9/cfl/000077500000000000000000000000001363734646600233615ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/gen9/cfl/CMakeLists.txt000066400000000000000000000007551363734646600261300ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_CFL) set(HW_SOURCES_GEN9 ${HW_SOURCES_GEN9} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_cfl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_cfl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_cfl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_cfl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_cfl.cpp PARENT_SCOPE ) endif() compute-runtime-20.13.16352/level_zero/core/source/gen9/cfl/cmdlist_cfl.cpp000066400000000000000000000014501363734646600263500ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw_base.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl" #include "level_zero/core/source/gen9/cmdlist_gen9.h" #include "level_zero/core/source/gen9/cmdlist_gen9.inl" #include "cmdlist_extended.inl" #include "igfxfmid.h" namespace L0 { static CommandListPopulateFactory> populateCFL; static CommandListImmediatePopulateFactory> populateCFLImmediate; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen9/cfl/cmdqueue_cfl.cpp000066400000000000000000000007441363734646600265260ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl" #include "level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl" #include "cmdqueue_extended.inl" #include "igfxfmid.h" namespace L0 { static CommandQueuePopulateFactory> populateCFL; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen9/cfl/image_cfl.cpp000066400000000000000000000007661363734646600260040ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/image/image_hw.inl" namespace L0 { template <> struct ImageProductFamily : public ImageCoreFamily { using ImageCoreFamily::ImageCoreFamily; }; static ImagePopulateFactory> populateCFL; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen9/cfl/kernel_cfl.cpp000066400000000000000000000004111363734646600261650ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/kernel/kernel_hw.h" namespace L0 { static KernelPopulateFactory> populateCFL; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen9/cfl/sampler_cfl.cpp000066400000000000000000000010061363734646600263510ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/sampler/sampler_hw.inl" namespace L0 { template <> struct SamplerProductFamily : public SamplerCoreFamily { using SamplerCoreFamily::SamplerCoreFamily; }; static SamplerPopulateFactory> populateCFL; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen9/cmdlist_gen9.h000066400000000000000000000022121363734646600253440ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/command_encoder.h" #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" #include "igfxfmid.h" namespace L0 { template struct CommandListCoreFamily; template struct CommandListCoreFamilyImmediate; template struct CommandListProductFamily : public CommandListCoreFamily { using CommandListCoreFamily::CommandListCoreFamily; void programL3(bool isSLMused) override { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; NEO::EncodeL3State::encode(commandContainer, isSLMused); } }; template struct CommandListImmediateProductFamily : public CommandListCoreFamilyImmediate { using CommandListCoreFamilyImmediate::CommandListCoreFamilyImmediate; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen9/cmdlist_gen9.inl000066400000000000000000000014021363734646600256770ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.h" namespace L0 { template void CommandListCoreFamily::applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges) { NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), true); } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen9/enable_family_full_l0_gen9.cpp000066400000000000000000000013511363734646600304470ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" #include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h" #include "opencl/source/mem_obj/buffer.h" namespace NEO { typedef SKLFamily Family; struct EnableL0Gen9 { EnableL0Gen9() { populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); } }; static EnableL0Gen9 enable; } // namespace NEO compute-runtime-20.13.16352/level_zero/core/source/gen9/glk/000077500000000000000000000000001363734646600233725ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/gen9/glk/CMakeLists.txt000066400000000000000000000007551363734646600261410ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_GLK) set(HW_SOURCES_GEN9 ${HW_SOURCES_GEN9} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_glk.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_glk.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_glk.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_glk.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_glk.cpp PARENT_SCOPE ) endif() compute-runtime-20.13.16352/level_zero/core/source/gen9/glk/cmdlist_glk.cpp000066400000000000000000000015461363734646600264000ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw_base.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl" #include "level_zero/core/source/gen9/cmdlist_gen9.h" #include "level_zero/core/source/gen9/cmdlist_gen9.inl" #include "cmdlist_extended.inl" #include "igfxfmid.h" namespace L0 { static CommandListPopulateFactory> populateGLK; static CommandListImmediatePopulateFactory> populateGLKImmediate; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen9/glk/cmdqueue_glk.cpp000066400000000000000000000007441363734646600265500ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl" #include "level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl" #include "cmdqueue_extended.inl" #include "igfxfmid.h" namespace L0 { static CommandQueuePopulateFactory> populateGLK; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen9/glk/image_glk.cpp000066400000000000000000000007661363734646600260260ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/image/image_hw.inl" namespace L0 { template <> struct ImageProductFamily : public ImageCoreFamily { using ImageCoreFamily::ImageCoreFamily; }; static ImagePopulateFactory> populateGLK; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen9/glk/kernel_glk.cpp000066400000000000000000000004111363734646600262070ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/kernel/kernel_hw.h" namespace L0 { static KernelPopulateFactory> populateGLK; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen9/glk/sampler_glk.cpp000066400000000000000000000010061363734646600263730ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/sampler/sampler_hw.inl" namespace L0 { template <> struct SamplerProductFamily : public SamplerCoreFamily { using SamplerCoreFamily::SamplerCoreFamily; }; static SamplerPopulateFactory> populateGLK; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen9/kbl/000077500000000000000000000000001363734646600233655ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/gen9/kbl/CMakeLists.txt000066400000000000000000000007551363734646600261340ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_KBL) set(HW_SOURCES_GEN9 ${HW_SOURCES_GEN9} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_kbl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_kbl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_kbl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_kbl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_kbl.cpp PARENT_SCOPE ) endif() compute-runtime-20.13.16352/level_zero/core/source/gen9/kbl/cmdlist_kbl.cpp000066400000000000000000000014371363734646600263650ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw_base.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl" #include "level_zero/core/source/gen9/cmdlist_gen9.h" #include "level_zero/core/source/gen9/cmdlist_gen9.inl" #include "cmdlist_extended.inl" #include "igfxfmid.h" namespace L0 { static CommandListPopulateFactory> populateKBL; static CommandListImmediatePopulateFactory> populateKBLImmediate; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen9/kbl/cmdqueue_kbl.cpp000066400000000000000000000007421363734646600265340ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl" #include "level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl" #include "cmdqueue_extended.inl" #include "igfxfmid.h" namespace L0 { static CommandQueuePopulateFactory> populateKBL; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen9/kbl/image_kbl.cpp000066400000000000000000000007601363734646600260060ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/image/image_hw.inl" namespace L0 { template <> struct ImageProductFamily : public ImageCoreFamily { using ImageCoreFamily::ImageCoreFamily; }; static ImagePopulateFactory> populateKBL; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen9/kbl/kernel_kbl.cpp000066400000000000000000000004071363734646600262020ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/kernel/kernel_hw.h" namespace L0 { static KernelPopulateFactory> populateKBL; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen9/kbl/sampler_kbl.cpp000066400000000000000000000010001363734646600263530ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/sampler/sampler_hw.inl" namespace L0 { template <> struct SamplerProductFamily : public SamplerCoreFamily { using SamplerCoreFamily::SamplerCoreFamily; }; static SamplerPopulateFactory> populateKBL; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen9/skl/000077500000000000000000000000001363734646600234065ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/gen9/skl/CMakeLists.txt000066400000000000000000000007551363734646600261550ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_SKL) set(HW_SOURCES_GEN9 ${HW_SOURCES_GEN9} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_skl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_skl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_skl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_skl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_skl.cpp PARENT_SCOPE ) endif() compute-runtime-20.13.16352/level_zero/core/source/gen9/skl/cmdlist_skl.cpp000066400000000000000000000014311363734646600264210ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw_base.inl" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl" #include "level_zero/core/source/gen9/cmdlist_gen9.h" #include "level_zero/core/source/gen9/cmdlist_gen9.inl" #include "cmdlist_extended.inl" #include "igfxfmid.h" namespace L0 { static CommandListPopulateFactory> populateSKL; static CommandListImmediatePopulateFactory> populateSKLImmediate; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen9/skl/cmdqueue_skl.cpp000066400000000000000000000007411363734646600265750ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl" #include "level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl" #include "cmdqueue_extended.inl" #include "igfxfmid.h" namespace L0 { static CommandQueuePopulateFactory> populateSKL; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen9/skl/image_skl.cpp000066400000000000000000000007551363734646600260540ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/image/image_hw.inl" namespace L0 { template <> struct ImageProductFamily : public ImageCoreFamily { using ImageCoreFamily::ImageCoreFamily; }; static ImagePopulateFactory> populateSKL; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen9/skl/kernel_skl.cpp000066400000000000000000000004061363734646600262430ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/kernel/kernel_hw.h" namespace L0 { static KernelPopulateFactory> populateSKL; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/gen9/skl/sampler_skl.cpp000066400000000000000000000007751363734646600264370ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/gen9/hw_info.h" #include "level_zero/core/source/sampler/sampler_hw.inl" namespace L0 { template <> struct SamplerProductFamily : public SamplerCoreFamily { using SamplerCoreFamily::SamplerCoreFamily; }; static SamplerPopulateFactory> populateSKL; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/hw_helpers/000077500000000000000000000000001363734646600241135ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/hw_helpers/hw_helpers.h000066400000000000000000000014611363734646600264260ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/hw_info.h" #include namespace L0 { inline uint64_t getIntermediateCacheSize(const NEO::HardwareInfo &hwInfo) { return 0u; } inline void waitForTaskCountWithKmdNotifyFallbackHelper(NEO::CommandStreamReceiver *csr, uint32_t taskCountToWait, NEO::FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) { } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/image/000077500000000000000000000000001363734646600230355ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/image/image.h000066400000000000000000000031351363734646600242720ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/cmdcontainer.h" #include "level_zero/core/source/device/device.h" #include struct _ze_image_handle_t {}; namespace NEO { struct ImageInfo; } namespace L0 { struct Image : _ze_image_handle_t { template struct Allocator { static Image *allocate() { return new Type(); } }; virtual ~Image() = default; virtual ze_result_t destroy() = 0; static Image *create(uint32_t productFamily, Device *device, const ze_image_desc_t *desc); virtual NEO::GraphicsAllocation *getAllocation() = 0; virtual void decoupleAllocation(NEO::CommandContainer &commandContainer) = 0; virtual void copySurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset) = 0; virtual void copyRedescribedSurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset) = 0; virtual size_t getSizeInBytes() = 0; virtual NEO::ImageInfo getImageInfo() = 0; virtual ze_image_desc_t getImageDesc() = 0; static Image *fromHandle(ze_image_handle_t handle) { return static_cast(handle); } inline ze_image_handle_t toHandle() { return this; } }; using ImageAllocatorFn = Image *(*)(); extern ImageAllocatorFn imageFactory[]; template struct ImagePopulateFactory { ImagePopulateFactory() { imageFactory[productFamily] = Image::Allocator::allocate; } }; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/image/image_formats_append.h000066400000000000000000000022421363734646600273520ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/image/image_imp.h" #define GEN10PLUS_IMAGE_FORMATS() \ { \ mediaSurfaceFormatTable \ [ZE_IMAGE_FORMAT_LAYOUT_P016 - ZE_IMAGE_FORMAT_MEDIA_LAYOUT_OFFSET] = \ RSS::SURFACE_FORMAT_PLANAR_420_16; \ } #define GEN11PLUS_IMAGE_FORMATS() \ { \ GEN10PLUS_IMAGE_FORMATS() \ } #define GEN12PLUS_IMAGE_FORMATS() \ { \ GEN11PLUS_IMAGE_FORMATS() \ surfaceFormatTable \ [ZE_IMAGE_FORMAT_LAYOUT_10_10_10_2][ZE_IMAGE_FORMAT_TYPE_FLOAT] = \ RSS::SURFACE_FORMAT_R10G10B10_FLOAT_A2_UNORM; \ } compute-runtime-20.13.16352/level_zero/core/source/image/image_hw.h000066400000000000000000000366701363734646600250020ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/linear_stream.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/surface_format_info.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "level_zero/core/source/image/image_formats_append.h" #include "level_zero/core/source/image/image_imp.h" namespace L0 { template struct ImageCoreFamily : public ImageImp { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; using RSS = typename GfxFamily::RENDER_SURFACE_STATE; using RENDER_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; using SHADER_CHANNEL_SELECT = typename RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT; using ImageImp::ImageImp; static const RENDER_FORMAT surfaceFormatUndefined = static_cast(-1); static const GMM_RESOURCE_FORMAT gmmResourceFormatUndefined = static_cast(-1); NEO::SurfaceFormatInfo surfaceFormatsForRedescribe[5] = { {GMM_FORMAT_R8_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R8_UINT), 0, 1, 1, 1}, {GMM_FORMAT_R16_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16_UINT), 0, 1, 2, 2}, {GMM_FORMAT_R32_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32_UINT), 0, 1, 4, 4}, {GMM_FORMAT_R32G32_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32_UINT), 0, 2, 4, 8}, {GMM_FORMAT_R32G32B32A32_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32B32A32_UINT), 0, 4, 4, 16}}; NEO::SurfaceFormatInfo surfaceFormatTable[ZE_IMAGE_FORMAT_RENDER_LAYOUT_MAX + 1] [ZE_IMAGE_FORMAT_TYPE_MAX + 1] = { // ZE_IMAGE_FORMAT_LAYOUT_8 {{GMM_FORMAT_R8_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R8_UINT), 0, 1, 1, 1}, {GMM_FORMAT_R8_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R8_SINT), 0, 1, 1, 1}, {GMM_FORMAT_R8_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R8_UNORM), 0, 1, 1, 1}, {GMM_FORMAT_R8_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R8_SNORM), 0, 1, 1, 1}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 1, 1, 1}}, // ZE_IMAGE_FORMAT_LAYOUT_16 {{GMM_FORMAT_R16_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16_UINT), 0, 1, 2, 2}, {GMM_FORMAT_R16_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16_SINT), 0, 1, 2, 2}, {GMM_FORMAT_R16_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R16_UNORM), 0, 1, 2, 2}, {GMM_FORMAT_R16_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R16_SNORM), 0, 1, 2, 2}, {GMM_FORMAT_R16_FLOAT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16_FLOAT), 0, 1, 2, 2}}, // ZE_IMAGE_FORMAT_LAYOUT_32 {{GMM_FORMAT_R32_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32_UINT), 0, 1, 4, 4}, {GMM_FORMAT_R32_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32_SINT), 0, 1, 4, 4}, {GMM_FORMAT_R32_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R32_UNORM), 0, 1, 4, 4}, {GMM_FORMAT_R32_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R32_SNORM), 0, 1, 4, 4}, {GMM_FORMAT_R32_FLOAT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32_FLOAT), 0, 1, 4, 4}}, // ZE_IMAGE_FORMAT_LAYOUT_8_8 {{GMM_FORMAT_R8G8_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R8G8_UINT), 0, 2, 1, 2}, {GMM_FORMAT_R8G8_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R8G8_SINT), 0, 2, 1, 2}, {GMM_FORMAT_R8G8_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R8G8_UNORM), 0, 2, 1, 2}, {GMM_FORMAT_R8G8_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R8G8_SNORM), 0, 2, 1, 2}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 2, 1, 2}}, // ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8 {{GMM_FORMAT_R8G8B8A8_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R8G8B8A8_UINT), 0, 4, 1, 4}, {GMM_FORMAT_R8G8B8A8_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R8G8B8A8_SINT), 0, 4, 1, 4}, {GMM_FORMAT_R8G8B8A8_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R8G8B8A8_UNORM), 0, 4, 1, 4}, {GMM_FORMAT_R8G8B8A8_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R8G8B8A8_SNORM), 0, 4, 1, 4}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 4, 1, 4}}, // ZE_IMAGE_FORMAT_LAYOUT_16_16 {{GMM_FORMAT_R16G16_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16_UINT), 0, 2, 2, 4}, {GMM_FORMAT_R16G16_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16_SINT), 0, 2, 2, 4}, {GMM_FORMAT_R16G16_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16_UNORM), 0, 2, 2, 4}, {GMM_FORMAT_R16G16_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16_SNORM), 0, 2, 2, 4}, {GMM_FORMAT_R16G16_FLOAT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16_FLOAT), 0, 2, 2, 4}}, // ZE_IMAGE_FORMAT_LAYOUT_16_16_16_16 {{GMM_FORMAT_R16G16B16A16_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16B16A16_UINT), 0, 4, 2, 8}, {GMM_FORMAT_R16G16B16A16_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16B16A16_SINT), 0, 4, 2, 8}, {GMM_FORMAT_R16G16B16A16_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16B16A16_UNORM), 0, 4, 2, 8}, {GMM_FORMAT_R16G16B16A16_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16B16A16_SNORM), 0, 4, 2, 8}, {GMM_FORMAT_R16G16B16A16_FLOAT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16B16A16_FLOAT), 0, 4, 2, 8}}, // ZE_IMAGE_FORMAT_LAYOUT_32_32 {{GMM_FORMAT_R32G32_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32_UINT), 0, 2, 4, 8}, {GMM_FORMAT_R32G32_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32_SINT), 0, 2, 4, 8}, {GMM_FORMAT_R32G32_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32_UNORM), 0, 2, 4, 8}, {GMM_FORMAT_R32G32_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32_SNORM), 0, 2, 4, 8}, {GMM_FORMAT_R32G32_FLOAT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32_FLOAT), 0, 2, 4, 8}}, // ZE_IMAGE_FORMAT_LAYOUT_32_32_32_32 {{GMM_FORMAT_R32G32B32A32_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32B32A32_UINT), 0, 4, 4, 16}, {GMM_FORMAT_R32G32B32A32_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32B32A32_SINT), 0, 4, 4, 16}, {GMM_FORMAT_R32G32B32A32_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32B32A32_UNORM), 0, 4, 4, 16}, {GMM_FORMAT_R32G32B32A32_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32B32A32_SNORM), 0, 4, 4, 16}, {GMM_FORMAT_R32G32B32A32_FLOAT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32B32A32_FLOAT), 0, 4, 4, 16}}, // ZE_IMAGE_FORMAT_LAYOUT_10_10_10_2 {{GMM_FORMAT_R10G10B10A2_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R10G10B10A2_UINT), 0, 1, 1, 1}, {GMM_FORMAT_R10G10B10A2_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R10G10B10A2_SINT), 0, 1, 1, 1}, {GMM_FORMAT_R10G10B10A2_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R10G10B10A2_UNORM), 0, 1, 1, 1}, {GMM_FORMAT_R10G10B10A2_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R10G10B10A2_SNORM), 0, 1, 1, 1}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 1, 1, 1}}, // ZE_IMAGE_FORMAT_LAYOUT_11_11_10 {{gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}}, // ZE_IMAGE_FORMAT_LAYOUT_5_6_5 {{gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}}, // ZE_IMAGE_FORMAT_LAYOUT_5_5_5_1 {{gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}}, // ZE_IMAGE_FORMAT_LAYOUT_4_4_4_4 {{gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}}}; const SHADER_CHANNEL_SELECT shaderChannelSelect[ZE_IMAGE_FORMAT_SWIZZLE_MAX + 1] = { RSS::SHADER_CHANNEL_SELECT_RED, RSS::SHADER_CHANNEL_SELECT_GREEN, RSS::SHADER_CHANNEL_SELECT_BLUE, RSS::SHADER_CHANNEL_SELECT_ALPHA, RSS::SHADER_CHANNEL_SELECT_ZERO, RSS::SHADER_CHANNEL_SELECT_ONE, RSS::SHADER_CHANNEL_SELECT_ZERO, }; bool initialize(Device *device, const ze_image_desc_t *desc) override; void copySurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset) override; void copyRedescribedSurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset) override; protected: RENDER_SURFACE_STATE surfaceState; RENDER_SURFACE_STATE redescribedSurfaceState; }; template struct ImageProductFamily; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/image/image_hw.inl000066400000000000000000000236711363734646600253320ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/string.h" #include "shared/source/helpers/surface_format_info.h" #include "shared/source/image/image_surface_state.h" #include "shared/source/memory_manager/allocation_properties.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/utilities/compiler_support.h" #include "level_zero/core/source/image/image_hw.h" inline NEO::ImageType convertType(const ze_image_type_t type) { switch (type) { case ZE_IMAGE_TYPE_2D: return NEO::ImageType::Image2D; case ZE_IMAGE_TYPE_3D: return NEO::ImageType::Image3D; case ZE_IMAGE_TYPE_2DARRAY: return NEO::ImageType::Image2DArray; case ZE_IMAGE_TYPE_1D: return NEO::ImageType::Image1D; case ZE_IMAGE_TYPE_1DARRAY: return NEO::ImageType::Image1DArray; case ZE_IMAGE_TYPE_BUFFER: return NEO::ImageType::Image1DBuffer; default: break; } return NEO::ImageType::Invalid; } inline NEO::ImageDescriptor convertDescriptor(const ze_image_desc_t &imageDesc) { NEO::ImageDescriptor desc = {}; desc.fromParent = false; desc.imageArraySize = imageDesc.arraylevels; desc.imageDepth = imageDesc.depth; desc.imageHeight = imageDesc.height; desc.imageRowPitch = 0u; desc.imageSlicePitch = 0u; desc.imageType = convertType(imageDesc.type); desc.imageWidth = imageDesc.width; desc.numMipLevels = imageDesc.miplevels; desc.numSamples = 0u; return desc; } namespace L0 { template bool ImageCoreFamily::initialize(Device *device, const ze_image_desc_t *desc) { using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; if (desc == nullptr) { return false; } if (desc->format.layout > ZE_IMAGE_FORMAT_LAYOUT_MAX) { return false; } if (desc->format.type > ZE_IMAGE_FORMAT_TYPE_MAX) { return false; } if (desc->format.x > ZE_IMAGE_FORMAT_SWIZZLE_MAX || desc->format.y > ZE_IMAGE_FORMAT_SWIZZLE_MAX || desc->format.z > ZE_IMAGE_FORMAT_SWIZZLE_MAX || desc->format.w > ZE_IMAGE_FORMAT_SWIZZLE_MAX) { return false; } if (desc->format.type > ZE_IMAGE_FORMAT_TYPE_MAX) { return false; } auto imageDescriptor = convertDescriptor(*desc); imgInfo.imgDesc = imageDescriptor; imgInfo.surfaceFormat = &surfaceFormatTable[desc->format.layout][desc->format.type]; imageFormatDesc = *const_cast(desc); UNRECOVERABLE_IF(device == nullptr); this->device = device; if (desc == nullptr) { return false; } typename RENDER_SURFACE_STATE::SURFACE_TYPE surfaceType; switch (desc->type) { case ZE_IMAGE_TYPE_1D: case ZE_IMAGE_TYPE_1DARRAY: surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_1D; break; case ZE_IMAGE_TYPE_2D: case ZE_IMAGE_TYPE_2DARRAY: surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_2D; break; case ZE_IMAGE_TYPE_3D: surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_3D; break; default: return false; } imgInfo.linearStorage = surfaceType == RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_1D; imgInfo.plane = GMM_NO_PLANE; imgInfo.useLocalMemory = false; imgInfo.preferRenderCompression = false; NEO::AllocationProperties properties(device->getRootDeviceIndex(), true, imgInfo, NEO::GraphicsAllocation::AllocationType::IMAGE); allocation = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); UNRECOVERABLE_IF(allocation == nullptr); auto gmm = this->allocation->getDefaultGmm(); NEO::SurfaceOffsets surfaceOffsets = {imgInfo.offset, imgInfo.xOffset, imgInfo.yOffset, imgInfo.yOffsetForUVPlane}; auto gmmHelper = static_cast(device->getNEODevice()->getRootDeviceEnvironment()).getGmmHelper(); if (gmm != nullptr) { gmm->updateImgInfoAndDesc(imgInfo, 0u); } { surfaceState = GfxFamily::cmdInitRenderSurfaceState; NEO::setImageSurfaceState(&surfaceState, imgInfo, gmm, *gmmHelper, __GMM_NO_CUBE_MAP, this->allocation->getGpuAddress(), surfaceOffsets, desc->format.layout == ZE_IMAGE_FORMAT_LAYOUT_NV12); NEO::setImageSurfaceStateDimensions(&surfaceState, imgInfo, __GMM_NO_CUBE_MAP, surfaceType); surfaceState.setSurfaceMinLod(0u); surfaceState.setMipCountLod(0u); NEO::setMipTailStartLod(&surfaceState, gmm); surfaceState.setShaderChannelSelectRed( static_cast( shaderChannelSelect[desc->format.x])); surfaceState.setShaderChannelSelectGreen( static_cast( shaderChannelSelect[desc->format.y])); surfaceState.setShaderChannelSelectBlue( static_cast( shaderChannelSelect[desc->format.z])); surfaceState.setShaderChannelSelectAlpha( static_cast( shaderChannelSelect[desc->format.w])); surfaceState.setNumberOfMultisamples(RENDER_SURFACE_STATE::NUMBER_OF_MULTISAMPLES::NUMBER_OF_MULTISAMPLES_MULTISAMPLECOUNT_1); if (gmm && gmm->isRenderCompressed) { NEO::setAuxParamsForCCS(&surfaceState, gmm); } } { const uint32_t exponent = Math::log2(imgInfo.surfaceFormat->NumChannels * imgInfo.surfaceFormat->PerChannelSizeInBytes); DEBUG_BREAK_IF(exponent >= 5u); NEO::ImageInfo imgInfoRedescirebed; imgInfoRedescirebed.surfaceFormat = &surfaceFormatsForRedescribe[exponent % 5]; imgInfoRedescirebed.imgDesc = imgInfo.imgDesc; imgInfoRedescirebed.qPitch = imgInfo.qPitch; redescribedSurfaceState = GfxFamily::cmdInitRenderSurfaceState; NEO::setImageSurfaceState(&redescribedSurfaceState, imgInfoRedescirebed, gmm, *gmmHelper, __GMM_NO_CUBE_MAP, this->allocation->getGpuAddress(), surfaceOffsets, desc->format.layout == ZE_IMAGE_FORMAT_LAYOUT_NV12); NEO::setImageSurfaceStateDimensions(&redescribedSurfaceState, imgInfoRedescirebed, __GMM_NO_CUBE_MAP, surfaceType); redescribedSurfaceState.setSurfaceMinLod(0u); redescribedSurfaceState.setMipCountLod(0u); NEO::setMipTailStartLod(&redescribedSurfaceState, gmm); if (imgInfoRedescirebed.surfaceFormat->GMMSurfaceFormat == GMM_FORMAT_R8_UINT_TYPE || imgInfoRedescirebed.surfaceFormat->GMMSurfaceFormat == GMM_FORMAT_R16_UINT_TYPE || imgInfoRedescirebed.surfaceFormat->GMMSurfaceFormat == GMM_FORMAT_R32_UINT_TYPE) { redescribedSurfaceState.setShaderChannelSelectRed(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED); redescribedSurfaceState.setShaderChannelSelectGreen(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO); redescribedSurfaceState.setShaderChannelSelectBlue(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO); } else if (imgInfoRedescirebed.surfaceFormat->GMMSurfaceFormat == GMM_FORMAT_R32G32_UINT_TYPE) { redescribedSurfaceState.setShaderChannelSelectRed(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED); redescribedSurfaceState.setShaderChannelSelectGreen(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN); redescribedSurfaceState.setShaderChannelSelectBlue(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO); } else { redescribedSurfaceState.setShaderChannelSelectRed(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED); redescribedSurfaceState.setShaderChannelSelectGreen(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN); redescribedSurfaceState.setShaderChannelSelectBlue(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE); } redescribedSurfaceState.setNumberOfMultisamples(RENDER_SURFACE_STATE::NUMBER_OF_MULTISAMPLES::NUMBER_OF_MULTISAMPLES_MULTISAMPLECOUNT_1); if (gmm && gmm->isRenderCompressed) { NEO::setAuxParamsForCCS(&redescribedSurfaceState, gmm); } } return true; } template void ImageCoreFamily::copySurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; // Copy the image's surface state into position in the provided surface state heap auto destSurfaceState = ptrOffset(surfaceStateHeap, surfaceStateOffset); memcpy_s(destSurfaceState, sizeof(RENDER_SURFACE_STATE), &surfaceState, sizeof(RENDER_SURFACE_STATE)); } template void ImageCoreFamily::copyRedescribedSurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; // Copy the image's surface state into position in the provided surface state heap auto destSurfaceState = ptrOffset(surfaceStateHeap, surfaceStateOffset); memcpy_s(destSurfaceState, sizeof(RENDER_SURFACE_STATE), &redescribedSurfaceState, sizeof(RENDER_SURFACE_STATE)); } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/image/image_imp.cpp000066400000000000000000000017471363734646600255010ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/image/image_imp.h" #include "shared/source/memory_manager/memory_manager.h" #include "igfxfmid.h" namespace L0 { ImageAllocatorFn imageFactory[IGFX_MAX_PRODUCT] = {}; ze_result_t ImageImp::destroy() { this->device->getDriverHandle()->getMemoryManager()->freeGraphicsMemory(this->allocation); delete this; return ZE_RESULT_SUCCESS; } Image *Image::create(uint32_t productFamily, Device *device, const ze_image_desc_t *desc) { ImageAllocatorFn allocator = nullptr; if (productFamily < IGFX_MAX_PRODUCT) { allocator = imageFactory[productFamily]; } ImageImp *image = nullptr; if (allocator) { image = static_cast((*allocator)()); image->initialize(device, desc); } return image; } bool ImageImp::initialize(Device *device, const ze_image_desc_t *desc) { return true; } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/image/image_imp.h000066400000000000000000000075241363734646600251450ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/surface_format_info.h" #include "level_zero/core/source/image/image.h" namespace L0 { const int ZE_IMAGE_FORMAT_RENDER_LAYOUT_MAX = ZE_IMAGE_FORMAT_LAYOUT_P416; const int ZE_IMAGE_FORMAT_MEDIA_LAYOUT_OFFSET = ZE_IMAGE_FORMAT_LAYOUT_Y8; const int ZE_IMAGE_FORMAT_MEDIA_LAYOUT_MAX = ZE_IMAGE_FORMAT_LAYOUT_P416; const int ZE_IMAGE_FORMAT_LAYOUT_MAX = ZE_IMAGE_FORMAT_MEDIA_LAYOUT_MAX; const int ZE_IMAGE_FORMAT_TYPE_MAX = ZE_IMAGE_FORMAT_TYPE_FLOAT; const int ZE_IMAGE_FORMAT_SWIZZLE_MAX = ZE_IMAGE_FORMAT_SWIZZLE_X; struct ImageFormatDescriptor { enum formatType { RENDER, MEDIA }; uint32_t channelBytes; uint32_t bitsPerPixel; enum formatType type; ImageFormatDescriptor(enum formatType type, uint32_t cBytes, uint32_t bpp = 0) : channelBytes(cBytes), type(type) { bitsPerPixel = bpp > 0 ? bpp : cBytes * 8; } }; struct ImageImp : public Image { using IFD = ImageFormatDescriptor; ze_result_t destroy() override; virtual bool initialize(Device *device, const ze_image_desc_t *desc); const ImageFormatDescriptor formats[ZE_IMAGE_FORMAT_LAYOUT_MAX + 1] = { IFD(IFD::RENDER, 1), // ZE_IMAGE_FORMAT_LAYOUT_8 IFD(IFD::RENDER, 2), // ZE_IMAGE_FORMAT_LAYOUT_16 IFD(IFD::RENDER, 4), // ZE_IMAGE_FORMAT_LAYOUT_32 IFD(IFD::RENDER, 2), // ZE_IMAGE_FORMAT_LAYOUT_8_8 IFD(IFD::RENDER, 4), // ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8 IFD(IFD::RENDER, 4), // ZE_IMAGE_FORMAT_LAYOUT_16_16 IFD(IFD::RENDER, 8), // ZE_IMAGE_FORMAT_LAYOUT_16_16_16_16 IFD(IFD::RENDER, 8), // ZE_IMAGE_FORMAT_LAYOUT_32_32 IFD(IFD::RENDER, 16), // ZE_IMAGE_FORMAT_LAYOUT_32_32_32_32 IFD(IFD::RENDER, 4), // ZE_IMAGE_FORMAT_LAYOUT_10_10_10_2 IFD(IFD::RENDER, 4), // ZE_IMAGE_FORMAT_LAYOUT_11_11_10 IFD(IFD::RENDER, 2), // ZE_IMAGE_FORMAT_LAYOUT_5_6_5 IFD(IFD::RENDER, 2), // ZE_IMAGE_FORMAT_LAYOUT_5_5_5_1 IFD(IFD::RENDER, 2), // ZE_IMAGE_FORMAT_LAYOUT_4_4_4_4 IFD(IFD::MEDIA, 1), // ZE_IMAGE_FORMAT_LAYOUT_Y8 IFD(IFD::MEDIA, 1, 12), // ZE_IMAGE_FORMAT_LAYOUT_NV12 IFD(IFD::MEDIA, 2), // ZE_IMAGE_FORMAT_LAYOUT_YUYV IFD(IFD::MEDIA, 2), // ZE_IMAGE_FORMAT_LAYOUT_VYUY IFD(IFD::MEDIA, 2), // ZE_IMAGE_FORMAT_LAYOUT_YVYU IFD(IFD::MEDIA, 2), // ZE_IMAGE_FORMAT_LAYOUT_UYVY IFD(IFD::MEDIA, 4), // ZE_IMAGE_FORMAT_LAYOUT_AYUV IFD(IFD::MEDIA, 0), // ZE_IMAGE_FORMAT_LAYOUT_YUAV IFD(IFD::MEDIA, 2, 24), // ZE_IMAGE_FORMAT_LAYOUT_P010 IFD(IFD::MEDIA, 4, 32), // ZE_IMAGE_FORMAT_LAYOUT_Y410 IFD(IFD::MEDIA, 2, 24), // ZE_IMAGE_FORMAT_LAYOUT_P012 IFD(IFD::MEDIA, 2, 16), // ZE_IMAGE_FORMAT_LAYOUT_Y16 IFD(IFD::MEDIA, 2, 24), // ZE_IMAGE_FORMAT_LAYOUT_P016 IFD(IFD::MEDIA, 2, 32), // ZE_IMAGE_FORMAT_LAYOUT_Y216 IFD(IFD::MEDIA, 2, 32), // ZE_IMAGE_FORMAT_LAYOUT_P216 IFD(IFD::MEDIA, 2, 64) // ZE_IMAGE_FORMAT_LAYOUT_P416 }; ImageImp() {} ~ImageImp() override = default; NEO::GraphicsAllocation *getAllocation() override { return allocation; } void decoupleAllocation(NEO::CommandContainer &commandContainer) override { commandContainer.getDeallocationContainer().push_back(allocation); allocation = NULL; } size_t getSizeInBytes() override { return imgInfo.size; } NEO::ImageInfo getImageInfo() override { return imgInfo; } ze_image_desc_t getImageDesc() override { return imageFormatDesc; } protected: Device *device = nullptr; NEO::ImageInfo imgInfo = {}; NEO::GraphicsAllocation *allocation = nullptr; ze_image_desc_t imageFormatDesc = {}; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/kernel/000077500000000000000000000000001363734646600232335ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/kernel/kernel.cpp000066400000000000000000000004001363734646600252110ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/kernel/kernel.h" #include "igfxfmid.h" namespace L0 { KernelAllocatorFn kernelFactory[IGFX_MAX_PRODUCT] = {}; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/kernel/kernel.h000066400000000000000000000144501363734646600246700ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/kernel/dispatch_kernel_encoder_interface.h" #include "shared/source/kernel/kernel_descriptor.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/unified_memory/unified_memory.h" #include #include #include struct _ze_kernel_handle_t {}; namespace NEO { class Device; struct KernelInfo; class MemoryManager; } // namespace NEO namespace L0 { struct Device; struct Module; struct KernelImmutableData { KernelImmutableData(L0::Device *l0device = nullptr); virtual ~KernelImmutableData(); void initialize(NEO::KernelInfo *kernelInfo, NEO::MemoryManager &memoryManager, const NEO::Device *device, uint32_t computeUnitsUsedForSratch, NEO::GraphicsAllocation *globalConstBuffer, NEO::GraphicsAllocation *globalVarBuffer); const std::vector &getResidencyContainer() const { return residencyContainer; } uint32_t getIsaSize() const; NEO::GraphicsAllocation *getIsaGraphicsAllocation() const { return isaGraphicsAllocation.get(); } uint64_t getPrivateMemorySize() const; NEO::GraphicsAllocation *getPrivateMemoryGraphicsAllocation() const { return privateMemoryGraphicsAllocation.get(); } const uint8_t *getCrossThreadDataTemplate() const { return crossThreadDataTemplate.get(); } uint32_t getSurfaceStateHeapSize() const { return surfaceStateHeapSize; } const uint8_t *getSurfaceStateHeapTemplate() const { return surfaceStateHeapTemplate.get(); } uint32_t getDynamicStateHeapDataSize() const { return dynamicStateHeapSize; } const uint8_t *getDynamicStateHeapTemplate() const { return dynamicStateHeapTemplate.get(); } const NEO::KernelDescriptor &getDescriptor() const { return *kernelDescriptor; } Device *getDevice() { return this->device; } protected: Device *device = nullptr; NEO::KernelDescriptor *kernelDescriptor = nullptr; std::unique_ptr isaGraphicsAllocation = nullptr; std::unique_ptr privateMemoryGraphicsAllocation = nullptr; uint32_t crossThreadDataSize = 0; std::unique_ptr crossThreadDataTemplate = nullptr; uint32_t surfaceStateHeapSize = 0; std::unique_ptr surfaceStateHeapTemplate = nullptr; uint32_t dynamicStateHeapSize = 0; std::unique_ptr dynamicStateHeapTemplate = nullptr; std::vector residencyContainer; }; struct Kernel : _ze_kernel_handle_t, virtual NEO::DispatchKernelEncoderI { template struct Allocator { static Kernel *allocate(Module *module) { return new Type(module); } }; static Kernel *create(uint32_t productFamily, Module *module, const ze_kernel_desc_t *desc, ze_result_t *ret); ~Kernel() override = default; virtual ze_result_t destroy() = 0; virtual ze_result_t setAttribute(ze_kernel_attribute_t attr, uint32_t size, const void *pValue) = 0; virtual ze_result_t getAttribute(ze_kernel_attribute_t attr, uint32_t *pSize, void *pValue) = 0; virtual ze_result_t setIntermediateCacheConfig(ze_cache_config_t cacheConfig) = 0; virtual ze_result_t getProperties(ze_kernel_properties_t *pKernelProperties) = 0; virtual ze_result_t setArgumentValue(uint32_t argIndex, size_t argSize, const void *pArgValue) = 0; virtual void setGroupCount(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) = 0; virtual ze_result_t setArgBufferWithAlloc(uint32_t argIndex, const void *argVal, NEO::GraphicsAllocation *allocation) = 0; virtual ze_result_t setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal) = 0; virtual bool getGroupCountOffsets(uint32_t *locations) = 0; virtual bool getGroupSizeOffsets(uint32_t *locations) = 0; virtual ze_result_t setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY, uint32_t groupSizeZ) = 0; virtual ze_result_t suggestGroupSize(uint32_t globalSizeX, uint32_t globalSizeY, uint32_t globalSizeZ, uint32_t *groupSizeX, uint32_t *groupSizeY, uint32_t *groupSizeZ) = 0; virtual ze_result_t suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount) = 0; virtual const KernelImmutableData *getImmutableData() const = 0; virtual std::unique_ptr clone() const = 0; virtual const std::vector &getResidencyContainer() const = 0; virtual void getGroupSize(uint32_t &outGroupSizeX, uint32_t &outGroupSizeY, uint32_t &outGroupSizeZ) const = 0; virtual uint32_t getThreadsPerThreadGroup() const = 0; virtual uint32_t getThreadExecutionMask() const = 0; virtual const uint8_t *getCrossThreadData() const = 0; virtual uint32_t getCrossThreadDataSize() const = 0; virtual const uint8_t *getPerThreadData() const = 0; virtual uint32_t getPerThreadDataSizeForWholeThreadGroup() const = 0; virtual uint32_t getPerThreadDataSize() const = 0; virtual const uint8_t *getSurfaceStateHeapData() const = 0; virtual uint32_t getSurfaceStateHeapDataSize() const = 0; virtual const uint8_t *getDynamicStateHeapData() const = 0; virtual size_t getDynamicStateHeapDataSize() const = 0; virtual UnifiedMemoryControls getUnifiedMemoryControls() const = 0; virtual bool hasIndirectAllocationsAllowed() const = 0; virtual NEO::GraphicsAllocation *getPrintfBufferAllocation() = 0; virtual void printPrintfOutput() = 0; Kernel() = default; Kernel(const Kernel &) = delete; Kernel(Kernel &&) = delete; Kernel &operator=(const Kernel &) = delete; Kernel &operator=(Kernel &&) = delete; static Kernel *fromHandle(ze_kernel_handle_t handle) { return static_cast(handle); } inline ze_kernel_handle_t toHandle() { return this; } }; using KernelAllocatorFn = Kernel *(*)(Module *module); extern KernelAllocatorFn kernelFactory[]; template struct KernelPopulateFactory { KernelPopulateFactory() { kernelFactory[productFamily] = KernelType::template Allocator::allocate; } }; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/kernel/kernel_hw.h000066400000000000000000000136321363734646600253670ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/command_encoder.h" #include "shared/source/helpers/string.h" #include "level_zero/core/source/kernel/kernel_imp.h" #include "level_zero/core/source/module/module.h" #include "igfxfmid.h" #include namespace L0 { template struct KernelHw : public KernelImp { using KernelImp::KernelImp; using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override { uintptr_t baseAddress = static_cast(alloc->getGpuAddress()); auto sshAlignmentMask = NEO::EncodeSurfaceState::getSurfaceBaseAddressAlignmentMask(); // Remove misalligned bytes, accounted for in in bufferOffset patch token baseAddress &= sshAlignmentMask; auto offset = ptrDiff(address, reinterpret_cast(baseAddress)); size_t sizeTillEndOfSurface = alloc->getUnderlyingBufferSize() - offset; auto argInfo = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as(); bool offsetWasPatched = NEO::patchNonPointer(ArrayRef(this->crossThreadData.get(), this->crossThreadDataSize), argInfo.bufferOffset, static_cast(offset)); if (false == offsetWasPatched) { // fallback to handling offset in surface state baseAddress = reinterpret_cast(address); DEBUG_BREAK_IF(baseAddress != (baseAddress & sshAlignmentMask)); offset = 0; } auto surfaceStateAddress = ptrOffset(surfaceStateHeapData.get(), argInfo.bindful); void *bufferAddressForSsh = reinterpret_cast(baseAddress); auto alignment = NEO::EncodeSurfaceState::getSurfaceBaseAddressAlignment(); size_t bufferSizeForSsh = ptrDiff(reinterpret_cast(alloc->getGpuAddress()), bufferAddressForSsh); bufferSizeForSsh += sizeTillEndOfSurface; // take address alignment offset into account bufferSizeForSsh = alignUp(bufferSizeForSsh, alignment); auto mocs = this->module->getDevice()->getMOCS(true, false); bool requiresCoherency; NEO::EncodeSurfaceState::encodeBuffer(surfaceStateAddress, bufferAddressForSsh, bufferSizeForSsh, mocs, requiresCoherency = false); } std::unique_ptr clone() const override { std::unique_ptr ret{new KernelHw}; auto cloned = static_cast *>(ret.get()); cloned->kernelImmData = kernelImmData; cloned->module = module; cloned->kernelArgHandlers.assign(this->kernelArgHandlers.begin(), this->kernelArgHandlers.end()); cloned->residencyContainer.assign(this->residencyContainer.begin(), this->residencyContainer.end()); if (printfBuffer != nullptr) { const auto &it = std::find(cloned->residencyContainer.rbegin(), cloned->residencyContainer.rend(), this->printfBuffer); if (it == cloned->residencyContainer.rbegin()) { cloned->residencyContainer.resize(cloned->residencyContainer.size() - 1); } else { std::iter_swap(it, cloned->residencyContainer.rbegin()); } cloned->createPrintfBuffer(); } std::copy(this->groupSize, this->groupSize + 3, cloned->groupSize); cloned->threadsPerThreadGroup = this->threadsPerThreadGroup; cloned->threadExecutionMask = this->threadExecutionMask; if (this->surfaceStateHeapDataSize > 0) { cloned->surfaceStateHeapData.reset(new uint8_t[this->surfaceStateHeapDataSize]); memcpy_s(cloned->surfaceStateHeapData.get(), this->surfaceStateHeapDataSize, this->surfaceStateHeapData.get(), this->surfaceStateHeapDataSize); cloned->surfaceStateHeapDataSize = this->surfaceStateHeapDataSize; } if (this->crossThreadDataSize != 0) { cloned->crossThreadData.reset(new uint8_t[this->crossThreadDataSize]); memcpy_s(cloned->crossThreadData.get(), this->crossThreadDataSize, this->crossThreadData.get(), this->crossThreadDataSize); cloned->crossThreadDataSize = this->crossThreadDataSize; } if (this->dynamicStateHeapDataSize != 0) { cloned->dynamicStateHeapData.reset(new uint8_t[this->dynamicStateHeapDataSize]); memcpy_s(cloned->dynamicStateHeapData.get(), this->dynamicStateHeapDataSize, this->dynamicStateHeapData.get(), this->dynamicStateHeapDataSize); cloned->dynamicStateHeapDataSize = this->dynamicStateHeapDataSize; } if (this->perThreadDataForWholeThreadGroup != nullptr) { alignedFree(cloned->perThreadDataForWholeThreadGroup); cloned->perThreadDataForWholeThreadGroup = reinterpret_cast(alignedMalloc(perThreadDataSizeForWholeThreadGroupAllocated, 32)); memcpy_s(cloned->perThreadDataForWholeThreadGroup, this->perThreadDataSizeForWholeThreadGroupAllocated, this->perThreadDataForWholeThreadGroup, this->perThreadDataSizeForWholeThreadGroupAllocated); cloned->perThreadDataSizeForWholeThreadGroupAllocated = this->perThreadDataSizeForWholeThreadGroupAllocated; cloned->perThreadDataSizeForWholeThreadGroup = this->perThreadDataSizeForWholeThreadGroup; cloned->perThreadDataSize = this->perThreadDataSize; } return ret; } }; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/kernel/kernel_imp.cpp000066400000000000000000001033701363734646600260700ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/kernel/kernel_imp.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/kernel_helpers.h" #include "shared/source/helpers/register_offsets.h" #include "shared/source/helpers/string.h" #include "shared/source/kernel/kernel_descriptor.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/utilities/arrayref.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/program/kernel_info.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/image/image.h" #include "level_zero/core/source/module/module.h" #include "level_zero/core/source/module/module_imp.h" #include "level_zero/core/source/printf_handler/printf_handler.h" #include "level_zero/core/source/sampler/sampler.h" #include namespace L0 { KernelImmutableData::KernelImmutableData(L0::Device *l0device) : device(l0device) {} KernelImmutableData::~KernelImmutableData() { if (nullptr != isaGraphicsAllocation) { this->getDevice()->getDriverHandle()->getMemoryManager()->freeGraphicsMemory(&*isaGraphicsAllocation); isaGraphicsAllocation.release(); } crossThreadDataTemplate.reset(); if (nullptr != privateMemoryGraphicsAllocation) { this->getDevice()->getDriverHandle()->getMemoryManager()->freeGraphicsMemory(&*privateMemoryGraphicsAllocation); crossThreadDataTemplate.release(); } surfaceStateHeapTemplate.reset(); dynamicStateHeapTemplate.reset(); } inline void patchWithImplicitSurface(ArrayRef crossThreadData, ArrayRef surfaceStateHeap, uintptr_t ptrToPatchInCrossThreadData, NEO::GraphicsAllocation &allocation, const NEO::ArgDescPointer &ptr, const NEO::Device &device) { if (false == crossThreadData.empty()) { NEO::patchPointer(crossThreadData, ptr, ptrToPatchInCrossThreadData); } if ((false == surfaceStateHeap.empty()) && (NEO::isValidOffset(ptr.bindful))) { auto surfaceState = surfaceStateHeap.begin() + ptr.bindful; void *addressToPatch = reinterpret_cast(allocation.getUnderlyingBuffer()); size_t sizeToPatch = allocation.getUnderlyingBufferSize(); NEO::Buffer::setSurfaceState(&device, surfaceState, sizeToPatch, addressToPatch, 0, &allocation, 0, 0); } } void KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, NEO::MemoryManager &memoryManager, const NEO::Device *device, uint32_t computeUnitsUsedForSratch, NEO::GraphicsAllocation *globalConstBuffer, NEO::GraphicsAllocation *globalVarBuffer) { UNRECOVERABLE_IF(kernelInfo == nullptr); this->kernelDescriptor = &kernelInfo->kernelDescriptor; auto kernelIsaSize = kernelInfo->heapInfo.pKernelHeader->KernelHeapSize; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties( {device->getRootDeviceIndex(), kernelIsaSize, NEO::GraphicsAllocation::AllocationType::KERNEL_ISA}); UNRECOVERABLE_IF(allocation == nullptr); if (kernelInfo->heapInfo.pKernelHeap != nullptr) { memoryManager.copyMemoryToAllocation(allocation, kernelInfo->heapInfo.pKernelHeap, kernelIsaSize); } isaGraphicsAllocation.reset(allocation); this->crossThreadDataSize = this->kernelDescriptor->kernelAttributes.crossThreadDataSize; ArrayRef crossThredDataArrayRef; if (crossThreadDataSize != 0) { crossThreadDataTemplate.reset(new uint8_t[crossThreadDataSize]); if (kernelInfo->crossThreadData) { memcpy_s(crossThreadDataTemplate.get(), crossThreadDataSize, kernelInfo->crossThreadData, crossThreadDataSize); } else { memset(crossThreadDataTemplate.get(), 0x00, crossThreadDataSize); } crossThredDataArrayRef = ArrayRef(this->crossThreadDataTemplate.get(), this->crossThreadDataSize); NEO::patchNonPointer(crossThredDataArrayRef, kernelDescriptor->payloadMappings.implicitArgs.simdSize, kernelDescriptor->kernelAttributes.simdSize); } if (kernelInfo->heapInfo.pKernelHeader->SurfaceStateHeapSize != 0) { this->surfaceStateHeapSize = kernelInfo->heapInfo.pKernelHeader->SurfaceStateHeapSize; surfaceStateHeapTemplate.reset(new uint8_t[surfaceStateHeapSize]); memcpy_s(surfaceStateHeapTemplate.get(), surfaceStateHeapSize, kernelInfo->heapInfo.pSsh, surfaceStateHeapSize); } if (kernelInfo->heapInfo.pKernelHeader->DynamicStateHeapSize != 0) { this->dynamicStateHeapSize = kernelInfo->heapInfo.pKernelHeader->DynamicStateHeapSize; dynamicStateHeapTemplate.reset(new uint8_t[dynamicStateHeapSize]); memcpy_s(dynamicStateHeapTemplate.get(), dynamicStateHeapSize, kernelInfo->heapInfo.pDsh, dynamicStateHeapSize); } ArrayRef surfaceStateHeapArrayRef = ArrayRef(surfaceStateHeapTemplate.get(), getSurfaceStateHeapSize()); uint32_t privateSurfaceSize = kernelDescriptor->kernelAttributes.perThreadPrivateMemorySize; if (privateSurfaceSize != 0) { privateSurfaceSize *= computeUnitsUsedForSratch * kernelDescriptor->kernelAttributes.simdSize; UNRECOVERABLE_IF(privateSurfaceSize == 0); this->privateMemoryGraphicsAllocation.reset(memoryManager.allocateGraphicsMemoryWithProperties( {0, privateSurfaceSize, NEO::GraphicsAllocation::AllocationType::PRIVATE_SURFACE})); UNRECOVERABLE_IF(this->privateMemoryGraphicsAllocation == nullptr); patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef, static_cast(privateMemoryGraphicsAllocation->getGpuAddressToPatch()), *privateMemoryGraphicsAllocation, kernelDescriptor->payloadMappings.implicitArgs.privateMemoryAddress, *device); this->residencyContainer.push_back(this->privateMemoryGraphicsAllocation.get()); } if (NEO::isValidOffset(kernelDescriptor->payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless)) { UNRECOVERABLE_IF(nullptr == globalConstBuffer); patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef, static_cast(globalConstBuffer->getGpuAddressToPatch()), *globalConstBuffer, kernelDescriptor->payloadMappings.implicitArgs.globalConstantsSurfaceAddress, *device); this->residencyContainer.push_back(globalConstBuffer); } if (NEO::isValidOffset(kernelDescriptor->payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless)) { UNRECOVERABLE_IF(globalVarBuffer == nullptr); patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef, static_cast(globalVarBuffer->getGpuAddressToPatch()), *globalVarBuffer, kernelDescriptor->payloadMappings.implicitArgs.globalVariablesSurfaceAddress, *device); this->residencyContainer.push_back(globalVarBuffer); } } uint32_t KernelImmutableData::getIsaSize() const { return static_cast(isaGraphicsAllocation->getUnderlyingBufferSize()); } uint64_t KernelImmutableData::getPrivateMemorySize() const { uint64_t size = 0; if (privateMemoryGraphicsAllocation != nullptr) { size = privateMemoryGraphicsAllocation->getUnderlyingBufferSize(); } return size; } KernelImp::KernelImp(Module *module) : module(module) {} KernelImp::~KernelImp() { if (perThreadDataForWholeThreadGroup != nullptr) { alignedFree(perThreadDataForWholeThreadGroup); } if (printfBuffer != nullptr) { module->getDevice()->getDriverHandle()->getMemoryManager()->freeGraphicsMemory(printfBuffer); } slmArgSizes.clear(); crossThreadData.reset(); surfaceStateHeapData.reset(); dynamicStateHeapData.reset(); } ze_result_t KernelImp::setArgumentValue(uint32_t argIndex, size_t argSize, const void *pArgValue) { if (argIndex >= kernelArgHandlers.size()) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } return (this->*kernelArgHandlers[argIndex])(argIndex, argSize, pArgValue); } void KernelImp::setGroupCount(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) { uint32_t groupSizeX; uint32_t groupSizeY; uint32_t groupSizeZ; getGroupSize(groupSizeX, groupSizeY, groupSizeZ); const NEO::KernelDescriptor &desc = kernelImmData->getDescriptor(); uint32_t globalWorkSize[3] = {groupCountX * groupSizeX, groupCountY * groupSizeY, groupCountZ * groupSizeZ}; auto dst = ArrayRef(crossThreadData.get(), crossThreadDataSize); NEO::patchVecNonPointer(dst, desc.payloadMappings.dispatchTraits.globalWorkSize, globalWorkSize); uint32_t groupCount[3] = {groupCountX, groupCountY, groupCountZ}; NEO::patchVecNonPointer(dst, desc.payloadMappings.dispatchTraits.numWorkGroups, groupCount); } bool KernelImp::getGroupCountOffsets(uint32_t *locations) { const NEO::KernelDescriptor &desc = kernelImmData->getDescriptor(); for (int i = 0; i < 3; i++) { if (NEO::isValidOffset(desc.payloadMappings.dispatchTraits.numWorkGroups[i])) { locations[i] = desc.payloadMappings.dispatchTraits.numWorkGroups[i]; } else { return false; } } return true; } bool KernelImp::getGroupSizeOffsets(uint32_t *locations) { const NEO::KernelDescriptor &desc = kernelImmData->getDescriptor(); for (int i = 0; i < 3; i++) { if (NEO::isValidOffset(desc.payloadMappings.dispatchTraits.globalWorkSize[i])) { locations[i] = desc.payloadMappings.dispatchTraits.globalWorkSize[i]; } else { return false; } } return true; } ze_result_t KernelImp::setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY, uint32_t groupSizeZ) { if ((0 == groupSizeX) || (0 == groupSizeY) || (0 == groupSizeZ)) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } auto numChannels = kernelImmData->getDescriptor().kernelAttributes.numLocalIdChannels; Vec3 groupSize{groupSizeX, groupSizeY, groupSizeZ}; auto itemsInGroup = Math::computeTotalElementsCount(groupSize); if (itemsInGroup > module->getMaxGroupSize()) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION; } auto grfSize = kernelImmData->getDescriptor().kernelAttributes.grfSize; uint32_t perThreadDataSizeForWholeThreadGroupNeeded = static_cast(NEO::PerThreadDataHelper::getPerThreadDataSizeTotal( kernelImmData->getDescriptor().kernelAttributes.simdSize, grfSize, numChannels, itemsInGroup)); if (perThreadDataSizeForWholeThreadGroupNeeded > perThreadDataSizeForWholeThreadGroupAllocated) { alignedFree(perThreadDataForWholeThreadGroup); perThreadDataForWholeThreadGroup = static_cast(alignedMalloc(perThreadDataSizeForWholeThreadGroupNeeded, 32)); perThreadDataSizeForWholeThreadGroupAllocated = perThreadDataSizeForWholeThreadGroupNeeded; } perThreadDataSizeForWholeThreadGroup = perThreadDataSizeForWholeThreadGroupNeeded; if (numChannels > 0) { UNRECOVERABLE_IF(3 != numChannels); NEO::generateLocalIDs( perThreadDataForWholeThreadGroup, static_cast(kernelImmData->getDescriptor().kernelAttributes.simdSize), std::array{{static_cast(groupSizeX), static_cast(groupSizeY), static_cast(groupSizeZ)}}, std::array{{0, 1, 2}}, false, grfSize); } this->groupSize[0] = groupSizeX; this->groupSize[1] = groupSizeY; this->groupSize[2] = groupSizeZ; auto simdSize = kernelImmData->getDescriptor().kernelAttributes.simdSize; this->threadsPerThreadGroup = static_cast((itemsInGroup + simdSize - 1u) / simdSize); this->perThreadDataSize = perThreadDataSizeForWholeThreadGroup / threadsPerThreadGroup; patchWorkgroupSizeInCrossThreadData(groupSizeX, groupSizeY, groupSizeZ); auto remainderSimdLanes = itemsInGroup & (simdSize - 1u); threadExecutionMask = static_cast(maxNBitValue(remainderSimdLanes)); if (!threadExecutionMask) { threadExecutionMask = static_cast(maxNBitValue((simdSize == 1) ? 32 : simdSize)); } return ZE_RESULT_SUCCESS; } ze_result_t KernelImp::suggestGroupSize(uint32_t globalSizeX, uint32_t globalSizeY, uint32_t globalSizeZ, uint32_t *groupSizeX, uint32_t *groupSizeY, uint32_t *groupSizeZ) { size_t retGroupSize[3] = {}; auto maxWorkGroupSize = module->getMaxGroupSize(); auto simd = kernelImmData->getDescriptor().kernelAttributes.simdSize; size_t workItems[3] = {globalSizeX, globalSizeY, globalSizeZ}; uint32_t dim = (globalSizeY > 1U) ? 2 : 1U; dim = (globalSizeZ > 1U) ? 3 : dim; if (NEO::DebugManager.flags.EnableComputeWorkSizeND.get()) { auto usesImages = getImmutableData()->getDescriptor().kernelAttributes.flags.usesImages; auto coreFamily = module->getDevice()->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily; const auto &deviceInfo = module->getDevice()->getNEODevice()->getDeviceInfo(); uint32_t numThreadsPerSubSlice = (uint32_t)deviceInfo.maxNumEUsPerSubSlice * deviceInfo.numThreadsPerEU; uint32_t localMemSize = (uint32_t)deviceInfo.localMemSize; NEO::WorkSizeInfo wsInfo(maxWorkGroupSize, this->hasBarriers(), simd, this->getSlmTotalSize(), coreFamily, numThreadsPerSubSlice, localMemSize, usesImages, false); NEO::computeWorkgroupSizeND(wsInfo, retGroupSize, workItems, dim); } else { if (1U == dim) { NEO::computeWorkgroupSize1D(maxWorkGroupSize, retGroupSize, workItems, simd); } else if (NEO::DebugManager.flags.EnableComputeWorkSizeSquared.get() && (2U == dim)) { NEO::computeWorkgroupSizeSquared(maxWorkGroupSize, retGroupSize, workItems, simd, dim); } else { NEO::computeWorkgroupSize2D(maxWorkGroupSize, retGroupSize, workItems, simd); } } *groupSizeX = static_cast(retGroupSize[0]); *groupSizeY = static_cast(retGroupSize[1]); *groupSizeZ = static_cast(retGroupSize[2]); return ZE_RESULT_SUCCESS; } ze_result_t KernelImp::suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount) { UNRECOVERABLE_IF(0 == groupSize[0]); UNRECOVERABLE_IF(0 == groupSize[1]); UNRECOVERABLE_IF(0 == groupSize[2]); auto &hardwareInfo = module->getDevice()->getHwInfo(); auto dssCount = hardwareInfo.gtSystemInfo.DualSubSliceCount; if (dssCount == 0) { dssCount = hardwareInfo.gtSystemInfo.SubSliceCount; } auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); auto &descriptor = kernelImmData->getDescriptor(); auto availableThreadCount = hwHelper.calculateAvailableThreadCount( hardwareInfo.platform.eProductFamily, descriptor.kernelAttributes.numGrfRequired, hardwareInfo.gtSystemInfo.EUCount, hardwareInfo.gtSystemInfo.ThreadCount / hardwareInfo.gtSystemInfo.EUCount); auto usesBarriers = descriptor.kernelAttributes.flags.usesBarriers; const uint32_t workDim = 3; const size_t localWorkSize[] = {groupSize[0], groupSize[1], groupSize[2]}; *totalGroupCount = NEO::KernelHelper::getMaxWorkGroupCount(descriptor.kernelAttributes.simdSize, availableThreadCount, dssCount, dssCount * KB * hardwareInfo.capabilityTable.slmSize, hwHelper.alignSlmSize(slmArgsTotalSize + descriptor.kernelAttributes.slmInlineSize), static_cast(hwHelper.getMaxBarrierRegisterPerSlice()), hwHelper.getBarriersCountFromHasBarriers(usesBarriers), workDim, localWorkSize); return ZE_RESULT_SUCCESS; } ze_result_t KernelImp::setAttribute(ze_kernel_attribute_t attr, uint32_t size, const void *pValue) { if (size != sizeof(bool)) { return ZE_RESULT_ERROR_INVALID_KERNEL_ATTRIBUTE_VALUE; } if (attr == ZE_KERNEL_ATTR_INDIRECT_DEVICE_ACCESS) { this->unifiedMemoryControls.indirectDeviceAllocationsAllowed = *(static_cast(pValue)); } else if (attr == ZE_KERNEL_ATTR_INDIRECT_HOST_ACCESS) { this->unifiedMemoryControls.indirectHostAllocationsAllowed = *(static_cast(pValue)); } else if (attr == ZE_KERNEL_ATTR_INDIRECT_SHARED_ACCESS) { this->unifiedMemoryControls.indirectSharedAllocationsAllowed = *(static_cast(pValue)); } else { return ZE_RESULT_ERROR_INVALID_ENUMERATION; } return ZE_RESULT_SUCCESS; } ze_result_t KernelImp::getAttribute(ze_kernel_attribute_t attr, uint32_t *pSize, void *pValue) { if (attr == ZE_KERNEL_ATTR_INDIRECT_DEVICE_ACCESS) { memcpy_s(pValue, sizeof(bool), &this->unifiedMemoryControls.indirectDeviceAllocationsAllowed, sizeof(bool)); return ZE_RESULT_SUCCESS; } if (attr == ZE_KERNEL_ATTR_INDIRECT_HOST_ACCESS) { memcpy_s(pValue, sizeof(bool), &this->unifiedMemoryControls.indirectHostAllocationsAllowed, sizeof(bool)); return ZE_RESULT_SUCCESS; } if (attr == ZE_KERNEL_ATTR_INDIRECT_SHARED_ACCESS) { memcpy_s(pValue, sizeof(bool), &this->unifiedMemoryControls.indirectSharedAllocationsAllowed, sizeof(bool)); return ZE_RESULT_SUCCESS; } if (attr == ZE_KERNEL_ATTR_SOURCE_ATTRIBUTE) { auto &desc = kernelImmData->getDescriptor(); if (pValue == nullptr) { *pSize = (uint32_t)desc.kernelMetadata.kernelLanguageAttributes.length() + 1; } else { strncpy_s((char *)pValue, desc.kernelMetadata.kernelLanguageAttributes.length() + 1, desc.kernelMetadata.kernelLanguageAttributes.c_str(), desc.kernelMetadata.kernelLanguageAttributes.length() + 1); } return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_INVALID_ENUMERATION; } ze_result_t KernelImp::setArgImmediate(uint32_t argIndex, size_t argSize, const void *argVal) { if (kernelImmData->getDescriptor().payloadMappings.explicitArgs.size() <= argIndex) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } const auto &arg = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex]; for (const auto &element : arg.as().elements) { if (element.sourceOffset < argSize) { size_t maxBytesToCopy = argSize - element.sourceOffset; size_t bytesToCopy = std::min(static_cast(element.size), maxBytesToCopy); auto pDst = ptrOffset(crossThreadData.get(), element.offset); if (argVal) { auto pSrc = ptrOffset(argVal, element.sourceOffset); memcpy_s(pDst, element.size, pSrc, bytesToCopy); } else { uint64_t val = 0; memcpy_s(pDst, element.size, reinterpret_cast(&val), bytesToCopy); } } else { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } } return ZE_RESULT_SUCCESS; } ze_result_t KernelImp::setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal) { const auto &arg = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as(); if (argVal == nullptr) { residencyContainer[argIndex] = nullptr; return ZE_RESULT_SUCCESS; } const auto image = Image::fromHandle(argVal); image->copyRedescribedSurfaceStateToSSH(surfaceStateHeapData.get(), arg.bindful); residencyContainer[argIndex] = image->getAllocation(); return ZE_RESULT_SUCCESS; } ze_result_t KernelImp::setArgBufferWithAlloc(uint32_t argIndex, const void *argVal, NEO::GraphicsAllocation *allocation) { const auto &arg = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as(); const auto val = *reinterpret_cast(argVal); NEO::patchPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), arg, val); if (NEO::isValidOffset(arg.bindful)) { setBufferSurfaceState(argIndex, reinterpret_cast(val), allocation); } residencyContainer[argIndex] = allocation; return ZE_RESULT_SUCCESS; } ze_result_t KernelImp::setArgBuffer(uint32_t argIndex, size_t argSize, const void *argVal) { const auto &allArgs = kernelImmData->getDescriptor().payloadMappings.explicitArgs; const auto &currArg = allArgs[argIndex]; if (currArg.getTraits().getAddressQualifier() == NEO::KernelArgMetadata::AddrLocal) { slmArgSizes[argIndex] = static_cast(argSize); UNRECOVERABLE_IF(NEO::isUndefinedOffset(currArg.as().slmOffset)); auto slmOffset = *reinterpret_cast(crossThreadData.get() + currArg.as().slmOffset); slmOffset += static_cast(argSize); ++argIndex; while (argIndex < kernelImmData->getDescriptor().payloadMappings.explicitArgs.size()) { if (allArgs[argIndex].getTraits().getAddressQualifier() != NEO::KernelArgMetadata::AddrLocal) { ++argIndex; continue; } const auto &nextArg = allArgs[argIndex].as(); UNRECOVERABLE_IF(0 == nextArg.requiredSlmAlignment); slmOffset = alignUp(slmOffset, nextArg.requiredSlmAlignment); NEO::patchNonPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), nextArg.slmOffset, slmOffset); slmOffset += static_cast(slmArgSizes[argIndex]); ++argIndex; } slmArgsTotalSize = static_cast(alignUp(slmOffset, KB)); return ZE_RESULT_SUCCESS; } if (nullptr == argVal) { residencyContainer[argIndex] = nullptr; return ZE_RESULT_SUCCESS; } auto requestedAddress = *reinterpret_cast(argVal); auto svmAllocsManager = module->getDevice()->getDriverHandle()->getSvmAllocsManager(); NEO::GraphicsAllocation *alloc = svmAllocsManager->getSVMAllocs()->get(requestedAddress)->gpuAllocation; return setArgBufferWithAlloc(argIndex, argVal, alloc); } ze_result_t KernelImp::setArgImage(uint32_t argIndex, size_t argSize, const void *argVal) { const auto &arg = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as(); if (argVal == nullptr) { residencyContainer[argIndex] = nullptr; return ZE_RESULT_SUCCESS; } const auto image = Image::fromHandle(*static_cast(argVal)); image->copySurfaceStateToSSH(surfaceStateHeapData.get(), arg.bindful); residencyContainer[argIndex] = image->getAllocation(); return ZE_RESULT_SUCCESS; } ze_result_t KernelImp::setArgSampler(uint32_t argIndex, size_t argSize, const void *argVal) { const auto &arg = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as(); const auto sampler = Sampler::fromHandle(*static_cast(argVal)); sampler->copySamplerStateToDSH(dynamicStateHeapData.get(), dynamicStateHeapDataSize, arg.bindful); return ZE_RESULT_SUCCESS; } ze_result_t KernelImp::getProperties(ze_kernel_properties_t *pKernelProperties) { size_t kernel_name_size = std::min(this->kernelImmData->getDescriptor().kernelMetadata.kernelName.size(), static_cast(ZE_MAX_KERNEL_NAME)); strncpy_s(pKernelProperties->name, ZE_MAX_KERNEL_NAME, this->kernelImmData->getDescriptor().kernelMetadata.kernelName.c_str(), kernel_name_size); pKernelProperties->requiredGroupSizeX = this->groupSize[0]; pKernelProperties->requiredGroupSizeY = this->groupSize[1]; pKernelProperties->requiredGroupSizeZ = this->groupSize[2]; pKernelProperties->numKernelArgs = static_cast(this->kernelImmData->getDescriptor().payloadMappings.explicitArgs.size()); return ZE_RESULT_SUCCESS; } ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) { if (desc->version != ZE_KERNEL_DESC_VERSION_CURRENT) { return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; } this->kernelImmData = module->getKernelImmutableData(desc->pKernelName); if (this->kernelImmData == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } for (const auto &argT : kernelImmData->getDescriptor().payloadMappings.explicitArgs) { switch (argT.type) { default: UNRECOVERABLE_IF(true); break; case NEO::ArgDescriptor::ArgTPointer: this->kernelArgHandlers.push_back(&KernelImp::setArgBuffer); break; case NEO::ArgDescriptor::ArgTImage: this->kernelArgHandlers.push_back(&KernelImp::setArgImage); break; case NEO::ArgDescriptor::ArgTSampler: this->kernelArgHandlers.push_back(&KernelImp::setArgSampler); break; case NEO::ArgDescriptor::ArgTValue: this->kernelArgHandlers.push_back(&KernelImp::setArgImmediate); break; } } slmArgSizes.resize(this->kernelArgHandlers.size(), 0); if (kernelImmData->getSurfaceStateHeapSize() > 0) { this->surfaceStateHeapData.reset(new uint8_t[kernelImmData->getSurfaceStateHeapSize()]); memcpy_s(this->surfaceStateHeapData.get(), kernelImmData->getSurfaceStateHeapSize(), kernelImmData->getSurfaceStateHeapTemplate(), kernelImmData->getSurfaceStateHeapSize()); this->surfaceStateHeapDataSize = kernelImmData->getSurfaceStateHeapSize(); } if (kernelImmData->getDescriptor().kernelAttributes.crossThreadDataSize != 0) { this->crossThreadData.reset(new uint8_t[kernelImmData->getDescriptor().kernelAttributes.crossThreadDataSize]); memcpy_s(this->crossThreadData.get(), kernelImmData->getDescriptor().kernelAttributes.crossThreadDataSize, kernelImmData->getCrossThreadDataTemplate(), kernelImmData->getDescriptor().kernelAttributes.crossThreadDataSize); this->crossThreadDataSize = kernelImmData->getDescriptor().kernelAttributes.crossThreadDataSize; } if (kernelImmData->getDynamicStateHeapDataSize() != 0) { this->dynamicStateHeapData.reset(new uint8_t[kernelImmData->getDynamicStateHeapDataSize()]); memcpy_s(this->dynamicStateHeapData.get(), kernelImmData->getDynamicStateHeapDataSize(), kernelImmData->getDynamicStateHeapTemplate(), kernelImmData->getDynamicStateHeapDataSize()); this->dynamicStateHeapDataSize = kernelImmData->getDynamicStateHeapDataSize(); } if (kernelImmData->getDescriptor().kernelAttributes.requiredWorkgroupSize[0] > 0) { auto *reqdSize = kernelImmData->getDescriptor().kernelAttributes.requiredWorkgroupSize; UNRECOVERABLE_IF(reqdSize[1] == 0); UNRECOVERABLE_IF(reqdSize[2] == 0); auto result = setGroupSize(reqdSize[0], reqdSize[1], reqdSize[2]); if (result != ZE_RESULT_SUCCESS) { return result; } } else { auto result = setGroupSize(kernelImmData->getDescriptor().kernelAttributes.simdSize, 1, 1); if (result != ZE_RESULT_SUCCESS) { return result; } } residencyContainer.resize(this->kernelArgHandlers.size(), nullptr); this->createPrintfBuffer(); this->setDebugSurface(); for (auto &alloc : kernelImmData->getResidencyContainer()) { residencyContainer.push_back(alloc); } return ZE_RESULT_SUCCESS; } void KernelImp::createPrintfBuffer() { if (this->kernelImmData->getDescriptor().kernelAttributes.flags.usesPrintf) { this->printfBuffer = PrintfHandler::createPrintfBuffer(this->module->getDevice()); this->residencyContainer.push_back(printfBuffer); NEO::patchPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.printfSurfaceAddress, static_cast(this->printfBuffer->getGpuAddressToPatch())); } } void KernelImp::printPrintfOutput() { PrintfHandler::printOutput(kernelImmData, this->printfBuffer, module->getDevice()); } void KernelImp::setDebugSurface() { auto device = module->getDevice(); if (module->isDebugEnabled() && device->getNEODevice()->isDebuggerActive()) { auto surfaceStateHeapRef = ArrayRef(surfaceStateHeapData.get(), surfaceStateHeapDataSize); patchWithImplicitSurface(ArrayRef(), surfaceStateHeapRef, 0, *device->getDebugSurface(), this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.systemThreadSurfaceAddress, *device->getNEODevice()); } } void KernelImp::patchWorkgroupSizeInCrossThreadData(uint32_t x, uint32_t y, uint32_t z) { const NEO::KernelDescriptor &desc = kernelImmData->getDescriptor(); auto dst = ArrayRef(crossThreadData.get(), crossThreadDataSize); uint32_t workgroupSize[3] = {x, y, z}; NEO::patchVecNonPointer(dst, desc.payloadMappings.dispatchTraits.localWorkSize, workgroupSize); NEO::patchVecNonPointer(dst, desc.payloadMappings.dispatchTraits.localWorkSize2, workgroupSize); NEO::patchVecNonPointer(dst, desc.payloadMappings.dispatchTraits.enqueuedLocalWorkSize, workgroupSize); } Kernel *Kernel::create(uint32_t productFamily, Module *module, const ze_kernel_desc_t *desc, ze_result_t *res) { UNRECOVERABLE_IF(productFamily >= IGFX_MAX_PRODUCT); KernelAllocatorFn allocator = kernelFactory[productFamily]; auto function = static_cast(allocator(module)); *res = function->initialize(desc); if (*res) { function->destroy(); return nullptr; } return function; } bool KernelImp::hasIndirectAllocationsAllowed() const { return (unifiedMemoryControls.indirectDeviceAllocationsAllowed || unifiedMemoryControls.indirectHostAllocationsAllowed || unifiedMemoryControls.indirectSharedAllocationsAllowed); } bool KernelImp::hasBarriers() { return getImmutableData()->getDescriptor().kernelAttributes.flags.usesBarriers; } uint32_t KernelImp::getSlmTotalSize() { return slmArgsTotalSize + getImmutableData()->getDescriptor().kernelAttributes.slmInlineSize; } uint32_t KernelImp::getBindingTableOffset() { return getImmutableData()->getDescriptor().payloadMappings.bindingTable.tableOffset; } uint32_t KernelImp::getBorderColor() { return getImmutableData()->getDescriptor().payloadMappings.samplerTable.borderColor; } uint32_t KernelImp::getSamplerTableOffset() { return getImmutableData()->getDescriptor().payloadMappings.samplerTable.tableOffset; } uint32_t KernelImp::getNumSurfaceStates() { return getImmutableData()->getDescriptor().payloadMappings.bindingTable.numEntries; } uint32_t KernelImp::getNumSamplers() { return getImmutableData()->getDescriptor().payloadMappings.samplerTable.numSamplers; } uint32_t KernelImp::getSimdSize() { return getImmutableData()->getDescriptor().kernelAttributes.simdSize; } uint32_t KernelImp::getSizeCrossThreadData() { return getCrossThreadDataSize(); } uint32_t KernelImp::getPerThreadScratchSize() { return getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]; } uint32_t KernelImp::getThreadsPerThreadGroupCount() { return getThreadsPerThreadGroup(); } uint32_t KernelImp::getSizePerThreadData() { return getPerThreadDataSize(); } uint32_t KernelImp::getSizePerThreadDataForWholeGroup() { return getPerThreadDataSizeForWholeThreadGroup(); } uint32_t KernelImp::getSizeSurfaceStateHeapData() { return getSurfaceStateHeapDataSize(); } uint32_t KernelImp::getPerThreadExecutionMask() { return getThreadExecutionMask(); } uint32_t *KernelImp::getCountOffsets() { return groupCountOffsets; } uint32_t *KernelImp::getSizeOffsets() { return groupSizeOffsets; } uint32_t *KernelImp::getLocalWorkSize() { if (hasGroupSize()) { getGroupSize(localWorkSize[0], localWorkSize[1], localWorkSize[2]); } return localWorkSize; } uint32_t KernelImp::getNumGrfRequired() { return getImmutableData()->getDescriptor().kernelAttributes.numGrfRequired; } NEO::GraphicsAllocation *KernelImp::getIsaAllocation() { return getImmutableData()->getIsaGraphicsAllocation(); } bool KernelImp::hasGroupCounts() { return getGroupCountOffsets(groupCountOffsets); } bool KernelImp::hasGroupSize() { return getGroupSizeOffsets(groupSizeOffsets); } const void *KernelImp::getSurfaceStateHeap() { return getSurfaceStateHeapData(); } const void *KernelImp::getDynamicStateHeap() { return getDynamicStateHeapData(); } const void *KernelImp::getCrossThread() { return getCrossThreadData(); } const void *KernelImp::getPerThread() { return getPerThreadData(); } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/kernel/kernel_imp.h000066400000000000000000000152701363734646600255360ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/kernel/dispatch_kernel_encoder_interface.h" #include "shared/source/unified_memory/unified_memory.h" #include "level_zero/core/source/kernel/kernel.h" #include namespace L0 { struct GraphicsAllocation; struct KernelImp : Kernel { KernelImp(Module *module); ~KernelImp() override; ze_result_t destroy() override { delete this; return ZE_RESULT_SUCCESS; } ze_result_t setAttribute(ze_kernel_attribute_t attr, uint32_t size, const void *pValue) override; ze_result_t getAttribute(ze_kernel_attribute_t attr, uint32_t *pSize, void *pValue) override; ze_result_t getProperties(ze_kernel_properties_t *pKernelProperties) override; ze_result_t setIntermediateCacheConfig(ze_cache_config_t cacheConfig) override { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t setArgumentValue(uint32_t argIndex, size_t argSize, const void *pArgValue) override; void setGroupCount(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) override; bool getGroupCountOffsets(uint32_t *locations) override; bool getGroupSizeOffsets(uint32_t *locations) override; ze_result_t setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY, uint32_t groupSizeZ) override; ze_result_t suggestGroupSize(uint32_t globalSizeX, uint32_t globalSizeY, uint32_t globalSizeZ, uint32_t *groupSizeX, uint32_t *groupSizeY, uint32_t *groupSizeZ) override; ze_result_t suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount) override; const uint8_t *getCrossThreadData() const override { return crossThreadData.get(); } uint32_t getCrossThreadDataSize() const override { return crossThreadDataSize; } const std::vector &getResidencyContainer() const override { return residencyContainer; } void getGroupSize(uint32_t &outGroupSizeX, uint32_t &outGroupSizeY, uint32_t &outGroupSizeZ) const override { outGroupSizeX = this->groupSize[0]; outGroupSizeY = this->groupSize[1]; outGroupSizeZ = this->groupSize[2]; } ze_result_t setArgImmediate(uint32_t argIndex, size_t argSize, const void *argVal); ze_result_t setArgBuffer(uint32_t argIndex, size_t argSize, const void *argVal); ze_result_t setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal) override; ze_result_t setArgBufferWithAlloc(uint32_t argIndex, const void *argVal, NEO::GraphicsAllocation *allocation) override; ze_result_t setArgImage(uint32_t argIndex, size_t argSize, const void *argVal); ze_result_t setArgSampler(uint32_t argIndex, size_t argSize, const void *argVal); virtual void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) = 0; ze_result_t initialize(const ze_kernel_desc_t *desc); const uint8_t *getPerThreadData() const override { return perThreadDataForWholeThreadGroup; } uint32_t getPerThreadDataSizeForWholeThreadGroup() const override { return perThreadDataSizeForWholeThreadGroup; } uint32_t getPerThreadDataSize() const override { return perThreadDataSize; } uint32_t getThreadsPerThreadGroup() const override { return threadsPerThreadGroup; } uint32_t getThreadExecutionMask() const override { return threadExecutionMask; } NEO::GraphicsAllocation *getPrintfBufferAllocation() override { return this->printfBuffer; } void printPrintfOutput() override; const uint8_t *getSurfaceStateHeapData() const override { return surfaceStateHeapData.get(); } uint32_t getSurfaceStateHeapDataSize() const override { return surfaceStateHeapDataSize; } const uint8_t *getDynamicStateHeapData() const override { return dynamicStateHeapData.get(); } size_t getDynamicStateHeapDataSize() const override { return dynamicStateHeapDataSize; } const KernelImmutableData *getImmutableData() const override { return kernelImmData; } UnifiedMemoryControls getUnifiedMemoryControls() const override { return unifiedMemoryControls; } bool hasIndirectAllocationsAllowed() const override; bool hasBarriers() override; uint32_t getSlmTotalSize() override; uint32_t getBindingTableOffset() override; uint32_t getBorderColor() override; uint32_t getSamplerTableOffset() override; uint32_t getNumSurfaceStates() override; uint32_t getNumSamplers() override; uint32_t getSimdSize() override; uint32_t getSizeCrossThreadData() override; uint32_t getPerThreadScratchSize() override; uint32_t getThreadsPerThreadGroupCount() override; uint32_t getSizePerThreadData() override; uint32_t getSizePerThreadDataForWholeGroup() override; uint32_t getSizeSurfaceStateHeapData() override; uint32_t getPerThreadExecutionMask() override; uint32_t *getCountOffsets() override; uint32_t *getSizeOffsets() override; uint32_t *getLocalWorkSize() override; uint32_t getNumGrfRequired() override; NEO::GraphicsAllocation *getIsaAllocation() override; bool hasGroupCounts() override; bool hasGroupSize() override; const void *getSurfaceStateHeap() override; const void *getDynamicStateHeap() override; const void *getCrossThread() override; const void *getPerThread() override; protected: KernelImp() = default; void patchWorkgroupSizeInCrossThreadData(uint32_t x, uint32_t y, uint32_t z); void createPrintfBuffer(); void setDebugSurface(); const KernelImmutableData *kernelImmData = nullptr; Module *module = nullptr; typedef ze_result_t (KernelImp::*KernelArgHandler)(uint32_t argIndex, size_t argSize, const void *argVal); std::vector kernelArgHandlers; std::vector residencyContainer; NEO::GraphicsAllocation *printfBuffer = nullptr; uint32_t groupSize[3] = {0u, 0u, 0u}; uint32_t threadsPerThreadGroup = 0u; uint32_t threadExecutionMask = 0u; std::unique_ptr crossThreadData = 0; uint32_t crossThreadDataSize = 0; std::unique_ptr surfaceStateHeapData = nullptr; uint32_t surfaceStateHeapDataSize = 0; std::unique_ptr dynamicStateHeapData = nullptr; uint32_t dynamicStateHeapDataSize = 0; uint8_t *perThreadDataForWholeThreadGroup = nullptr; uint32_t perThreadDataSizeForWholeThreadGroupAllocated = 0; uint32_t perThreadDataSizeForWholeThreadGroup = 0u; uint32_t perThreadDataSize = 0u; UnifiedMemoryControls unifiedMemoryControls; std::vector slmArgSizes; uint32_t slmArgsTotalSize = 0U; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/linux/000077500000000000000000000000001363734646600231125ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/linux/CMakeLists.txt000066400000000000000000000004621363734646600256540ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(UNIX) set(L0_SOURCES_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/registry_path.cpp ) set_property(GLOBAL PROPERTY L0_SOURCES_LINUX ${L0_SOURCES_LINUX}) endif() compute-runtime-20.13.16352/level_zero/core/source/linux/registry_path.cpp000066400000000000000000000002131363734646600264760ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ namespace L0 { const char *registryPath = ""; } compute-runtime-20.13.16352/level_zero/core/source/memory/000077500000000000000000000000001363734646600232635ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/memory/cpu_page_fault_memory_manager.cpp000066400000000000000000000031161363734646600320300ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/page_fault_manager/cpu_page_fault_manager.h" #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/core/source/driver/driver_handle_imp.h" namespace NEO { void PageFaultManager::transferToCpu(void *ptr, size_t size, void *device) { L0::DeviceImp *deviceImp = static_cast(device); NEO::SvmAllocationData *allocData = deviceImp->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); UNRECOVERABLE_IF(allocData == nullptr); auto ret = deviceImp->pageFaultCommandList->appendPageFaultCopy(allocData->cpuAllocation, allocData->gpuAllocation, allocData->size, true); UNRECOVERABLE_IF(ret); } void PageFaultManager::transferToGpu(void *ptr, void *device) { L0::DeviceImp *deviceImp = static_cast(device); NEO::SvmAllocationData *allocData = deviceImp->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); UNRECOVERABLE_IF(allocData == nullptr); auto ret = deviceImp->pageFaultCommandList->appendPageFaultCopy(allocData->gpuAllocation, allocData->cpuAllocation, allocData->size, false); UNRECOVERABLE_IF(ret); } } // namespace NEO compute-runtime-20.13.16352/level_zero/core/source/memory/memory.cpp000066400000000000000000000153711363734646600253060ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/memory_manager.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/driver/driver_handle_imp.h" namespace L0 { ze_result_t DriverHandleImp::getIpcMemHandle(const void *ptr, ze_ipc_mem_handle_t *pIpcHandle) { NEO::SvmAllocationData *allocData = svmAllocsManager->getSVMAllocs()->get(ptr); if (allocData) { uint64_t handle = allocData->gpuAllocation->peekInternalHandle(this->getMemoryManager()); memcpy_s(reinterpret_cast(pIpcHandle->data), sizeof(ze_ipc_mem_handle_t), &handle, sizeof(handle)); return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_INVALID_ARGUMENT; } ze_result_t DriverHandleImp::openIpcMemHandle(ze_device_handle_t hDevice, ze_ipc_mem_handle_t pIpcHandle, ze_ipc_memory_flag_t flags, void **ptr) { uint64_t handle = *(pIpcHandle.data); NEO::osHandle osHandle = static_cast(handle); NEO::AllocationProperties unifiedMemoryProperties{Device::fromHandle(hDevice)->getRootDeviceIndex(), MemoryConstants::pageSize, NEO::GraphicsAllocation::AllocationType::BUFFER}; NEO::GraphicsAllocation *alloc = this->getMemoryManager()->createGraphicsAllocationFromSharedHandle(osHandle, unifiedMemoryProperties, false); if (alloc == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } NEO::SvmAllocationData allocData; allocData.gpuAllocation = alloc; allocData.cpuAllocation = nullptr; allocData.size = alloc->getUnderlyingBufferSize(); allocData.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; allocData.device = Device::fromHandle(hDevice)->getNEODevice(); this->getSvmAllocsManager()->getSVMAllocs()->insert(allocData); *ptr = reinterpret_cast(alloc->getGpuAddress()); return ZE_RESULT_SUCCESS; } ze_result_t DriverHandleImp::closeIpcMemHandle(const void *ptr) { return ZE_RESULT_SUCCESS; } ze_result_t DriverHandleImp::checkMemoryAccessFromDevice(Device *device, const void *ptr) { auto allocation = svmAllocsManager->getSVMAllocs()->get(ptr); if (allocation == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } if (allocation->memoryType == InternalMemoryType::HOST_UNIFIED_MEMORY || allocation->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY) return ZE_RESULT_SUCCESS; if (allocation->gpuAllocation->getRootDeviceIndex() == device->getRootDeviceIndex()) return ZE_RESULT_SUCCESS; ze_bool_t p2pCapable = true; device->canAccessPeer(devices[allocation->gpuAllocation->getRootDeviceIndex()], &p2pCapable); return p2pCapable ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_INVALID_ARGUMENT; } ze_result_t DriverHandleImp::getMemAddressRange(const void *ptr, void **pBase, size_t *pSize) { NEO::SvmAllocationData *allocData = svmAllocsManager->getSVMAllocs()->get(ptr); if (allocData) { NEO::GraphicsAllocation *alloc; alloc = allocData->gpuAllocation; if (pBase) { uint64_t *allocBase = reinterpret_cast(pBase); *allocBase = alloc->getGpuAddress(); } if (pSize) { *pSize = alloc->getUnderlyingBufferSize(); } return ZE_RESULT_SUCCESS; } DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } ze_result_t DriverHandleImp::allocHostMem(ze_host_mem_alloc_flag_t flags, size_t size, size_t alignment, void **ptr) { if (size > this->devices[0]->getDeviceInfo().maxMemAllocSize) { *ptr = nullptr; return ZE_RESULT_ERROR_UNSUPPORTED_SIZE; } NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY); auto usmPtr = svmAllocsManager->createUnifiedMemoryAllocation(0u, size, unifiedMemoryProperties); if (usmPtr == nullptr) { return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; } *ptr = usmPtr; return ZE_RESULT_SUCCESS; } ze_result_t DriverHandleImp::allocDeviceMem(ze_device_handle_t hDevice, ze_device_mem_alloc_flag_t flags, size_t size, size_t alignment, void **ptr) { if (size > this->devices[0]->getDeviceInfo().maxMemAllocSize) { *ptr = nullptr; return ZE_RESULT_ERROR_UNSUPPORTED_SIZE; } NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY); unifiedMemoryProperties.allocationFlags.flags.shareable = 1u; void *usmPtr = svmAllocsManager->createUnifiedMemoryAllocation(Device::fromHandle(hDevice)->getRootDeviceIndex(), size, unifiedMemoryProperties); if (usmPtr == nullptr) { return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } *ptr = usmPtr; return ZE_RESULT_SUCCESS; } ze_result_t DriverHandleImp::allocSharedMem(ze_device_handle_t hDevice, ze_device_mem_alloc_flag_t deviceFlags, ze_host_mem_alloc_flag_t hostFlags, size_t size, size_t alignment, void **ptr) { if (size > this->devices[0]->getDeviceInfo().maxMemAllocSize) { *ptr = nullptr; return ZE_RESULT_ERROR_UNSUPPORTED_SIZE; } NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY); auto usmPtr = svmAllocsManager->createSharedUnifiedMemoryAllocation(Device::fromHandle(hDevice)->getRootDeviceIndex(), size, unifiedMemoryProperties, static_cast(L0::Device::fromHandle(hDevice))); if (usmPtr == nullptr) { return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } *ptr = usmPtr; return ZE_RESULT_SUCCESS; } ze_result_t DriverHandleImp::freeMem(const void *ptr) { auto allocation = svmAllocsManager->getSVMAllocs()->get(ptr); if (allocation == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } svmAllocsManager->freeSVMAlloc(const_cast(ptr)); if (svmAllocsManager->getSvmMapOperation(ptr)) { svmAllocsManager->removeSvmMapOperation(ptr); } return ZE_RESULT_SUCCESS; } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/memory/memory_operations_helper.h000066400000000000000000000017031363734646600305470ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/memory_operations_status.h" #include static ze_result_t changeMemoryOperationStatusToL0ResultType(NEO::MemoryOperationsStatus status) { switch (status) { case NEO::MemoryOperationsStatus::SUCCESS: return ZE_RESULT_SUCCESS; case NEO::MemoryOperationsStatus::MEMORY_NOT_FOUND: return ZE_RESULT_ERROR_INVALID_ARGUMENT; case NEO::MemoryOperationsStatus::OUT_OF_MEMORY: case NEO::MemoryOperationsStatus::FAILED: return ZE_RESULT_ERROR_DEVICE_LOST; case NEO::MemoryOperationsStatus::DEVICE_UNINITIALIZED: return ZE_RESULT_ERROR_UNINITIALIZED; case NEO::MemoryOperationsStatus::UNSUPPORTED: return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; default: DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } } compute-runtime-20.13.16352/level_zero/core/source/module/000077500000000000000000000000001363734646600232405ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/module/module.h000066400000000000000000000036261363734646600247050ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/kernel/kernel.h" #include "level_zero/core/source/module/module_build_log.h" #include #include struct _ze_module_handle_t {}; namespace L0 { struct Device; struct Module : _ze_module_handle_t { static Module *create(Device *device, const ze_module_desc_t *desc, NEO::Device *neoDevice, ModuleBuildLog *moduleBuildLog); virtual ~Module() = default; virtual Device *getDevice() const = 0; virtual ze_result_t createKernel(const ze_kernel_desc_t *desc, ze_kernel_handle_t *phFunction) = 0; virtual ze_result_t destroy() = 0; virtual ze_result_t getNativeBinary(size_t *pSize, uint8_t *pModuleNativeBinary) = 0; virtual ze_result_t getFunctionPointer(const char *pKernelName, void **pfnFunction) = 0; virtual ze_result_t getGlobalPointer(const char *pGlobalName, void **pPtr) = 0; virtual ze_result_t getDebugInfo(size_t *pDebugDataSize, uint8_t *pDebugData) = 0; virtual ze_result_t getKernelNames(uint32_t *pCount, const char **pNames) = 0; virtual const KernelImmutableData *getKernelImmutableData(const char *functionName) const = 0; virtual const std::vector> &getKernelImmutableDataVector() const = 0; virtual uint32_t getMaxGroupSize() const = 0; virtual bool isDebugEnabled() const = 0; Module() = default; Module(const Module &) = delete; Module(Module &&) = delete; Module &operator=(const Module &) = delete; Module &operator=(Module &&) = delete; static Module *fromHandle(ze_module_handle_t handle) { return static_cast(handle); } inline ze_module_handle_t toHandle() { return this; } }; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/module/module_build_log.cpp000066400000000000000000000030361363734646600272530ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/string.h" #include "level_zero/core/source/module/module.h" #include namespace L0 { struct ModuleBuildLogImp : public ModuleBuildLog { ModuleBuildLogImp() {} ~ModuleBuildLogImp() override {} ze_result_t destroy() override { delete this; return ZE_RESULT_SUCCESS; } ze_result_t getString(size_t *pSize, char *pBuildLog) override { const char *buildLog = this->buildLog.c_str(); if (buildLog != nullptr) { auto szLog = this->buildLog.size(); if (pBuildLog) { memcpy_s(pBuildLog, szLog, buildLog, szLog); pBuildLog[szLog] = '\0'; } *pSize = szLog + 1; } else { *pSize = 0; } return ZE_RESULT_SUCCESS; } void appendString(const char *pBuildLog, size_t size) override { if ((pBuildLog == nullptr) || (size == 0) || (pBuildLog[0] == '\0')) return; if (pBuildLog[size - 1] == '\0') --size; if (this->buildLog.length() != 0) this->buildLog.append("\n"); this->buildLog.append(pBuildLog, size); } protected: std::string buildLog; }; ModuleBuildLog *ModuleBuildLog::create() { auto moduleBuildLog = new ModuleBuildLogImp(); UNRECOVERABLE_IF(moduleBuildLog == nullptr); return moduleBuildLog; } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/module/module_build_log.h000066400000000000000000000017641363734646600267260ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include struct _ze_module_build_log_handle_t {}; namespace L0 { struct Module; struct ModuleBuildLog : _ze_module_build_log_handle_t { static ModuleBuildLog *create(); virtual ~ModuleBuildLog() = default; virtual ze_result_t destroy() = 0; virtual ze_result_t getString(size_t *pSize, char *pBuildLog) = 0; virtual void appendString(const char *pBuildLog, size_t size) = 0; ModuleBuildLog() = default; ModuleBuildLog(const ModuleBuildLog &) = delete; ModuleBuildLog(ModuleBuildLog &&) = delete; ModuleBuildLog &operator=(const ModuleBuildLog &) = delete; ModuleBuildLog &operator=(ModuleBuildLog &&) = delete; static ModuleBuildLog *fromHandle(ze_module_build_log_handle_t handle) { return static_cast(handle); } inline ze_module_build_log_handle_t toHandle() { return this; } }; } // namespace L0compute-runtime-20.13.16352/level_zero/core/source/module/module_extra_options.cpp000066400000000000000000000004331363734646600302070ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/module/module_imp.h" namespace L0 { void ModuleImp::createBuildExtraOptions(std::string &apiOptions, std::string &nternalBuildOptions) { } } // namespace L0compute-runtime-20.13.16352/level_zero/core/source/module/module_imp.cpp000066400000000000000000000632571363734646600261130ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/module/module_imp.h" #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/compiler_interface/intermediate_representations.h" #include "shared/source/device/device.h" #include "shared/source/device_binary_format/device_binary_formats.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/program/program_info.h" #include "shared/source/program/program_initialization.h" #include "shared/source/source_level_debugger/source_level_debugger.h" #include "opencl/source/program/kernel_info.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/kernel/kernel.h" #include "level_zero/core/source/module/module_build_log.h" #include "compiler_options.h" #include "program_debug_data.h" #include namespace L0 { namespace BuildOptions { ConstStringRef optDisable = "-ze-opt-disable"; ConstStringRef greaterThan4GbRequired = "-ze-opt-greater-than-4GB-buffer-required"; } // namespace BuildOptions struct ModuleTranslationUnit { ModuleTranslationUnit(L0::Device *device) : device(device) { } ~ModuleTranslationUnit() { if (globalConstBuffer) { auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager(); if (svmAllocsManager->getSVMAlloc(reinterpret_cast(globalConstBuffer->getGpuAddress()))) { svmAllocsManager->freeSVMAlloc(reinterpret_cast(globalConstBuffer->getGpuAddress())); } else { this->device->getNEODevice()->getExecutionEnvironment()->memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(globalConstBuffer); } } if (globalVarBuffer) { auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager(); if (svmAllocsManager->getSVMAlloc(reinterpret_cast(globalVarBuffer->getGpuAddress()))) { svmAllocsManager->freeSVMAlloc(reinterpret_cast(globalVarBuffer->getGpuAddress())); } else { this->device->getNEODevice()->getExecutionEnvironment()->memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(globalVarBuffer); } } } bool buildFromSpirV(const char *input, uint32_t inputSize, const char *buildOptions, const char *internalBuildOptions) { auto compilerInterface = device->getNEODevice()->getCompilerInterface(); UNRECOVERABLE_IF(nullptr == compilerInterface); UNRECOVERABLE_IF((nullptr == device) || (nullptr == device->getNEODevice())); std::string options = buildOptions; std::string internalOptions = NEO::CompilerOptions::concatenate(internalBuildOptions, NEO::CompilerOptions::hasBufferOffsetArg); if (device->getNEODevice()->getDeviceInfo().debuggerActive) { options = NEO::CompilerOptions::concatenate(options, NEO::CompilerOptions::generateDebugInfo); internalOptions = NEO::CompilerOptions::concatenate(internalOptions, NEO::CompilerOptions::debugKernelEnable); } NEO::TranslationInput inputArgs = {IGC::CodeType::spirV, IGC::CodeType::oclGenBin}; inputArgs.src = ArrayRef(input, inputSize); inputArgs.apiOptions = ArrayRef(options.c_str(), options.length()); inputArgs.internalOptions = ArrayRef(internalOptions.c_str(), internalOptions.length()); NEO::TranslationOutput compilerOuput = {}; auto compilerErr = compilerInterface->build(*device->getNEODevice(), inputArgs, compilerOuput); this->updateBuildLog(compilerOuput.frontendCompilerLog); this->updateBuildLog(compilerOuput.backendCompilerLog); if (NEO::TranslationOutput::ErrorCode::Success != compilerErr) { return false; } this->irBinary = std::move(compilerOuput.intermediateRepresentation.mem); this->irBinarySize = compilerOuput.intermediateRepresentation.size; this->unpackedDeviceBinary = std::move(compilerOuput.deviceBinary.mem); this->unpackedDeviceBinarySize = compilerOuput.deviceBinary.size; this->debugData = std::move(compilerOuput.debugData.mem); this->debugDataSize = compilerOuput.debugData.size; return processUnpackedBinary(); } bool createFromNativeBinary(const char *input, size_t inputSize) { UNRECOVERABLE_IF((nullptr == device) || (nullptr == device->getNEODevice())); auto productAbbreviation = NEO::hardwarePrefix[device->getNEODevice()->getHardwareInfo().platform.eProductFamily]; NEO::TargetDevice targetDevice = {}; targetDevice.coreFamily = device->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily; targetDevice.stepping = device->getNEODevice()->getHardwareInfo().platform.usRevId; targetDevice.maxPointerSizeInBytes = sizeof(uintptr_t); std::string decodeErrors; std::string decodeWarnings; ArrayRef archive(reinterpret_cast(input), inputSize); auto singleDeviceBinary = unpackSingleDeviceBinary(archive, ConstStringRef(productAbbreviation, strlen(productAbbreviation)), targetDevice, decodeErrors, decodeWarnings); if (decodeWarnings.empty() == false) { NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", decodeWarnings.c_str()); } if (singleDeviceBinary.intermediateRepresentation.empty() && singleDeviceBinary.deviceBinary.empty()) { NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", decodeErrors.c_str()); return false; } else { this->irBinary = makeCopy(reinterpret_cast(singleDeviceBinary.intermediateRepresentation.begin()), singleDeviceBinary.intermediateRepresentation.size()); this->irBinarySize = singleDeviceBinary.intermediateRepresentation.size(); this->options = singleDeviceBinary.buildOptions.str(); if ((false == singleDeviceBinary.deviceBinary.empty()) && (false == NEO::DebugManager.flags.RebuildPrecompiledKernels.get())) { this->unpackedDeviceBinary = makeCopy(reinterpret_cast(singleDeviceBinary.deviceBinary.begin()), singleDeviceBinary.deviceBinary.size()); this->unpackedDeviceBinarySize = singleDeviceBinary.deviceBinary.size(); this->packedDeviceBinary = makeCopy(reinterpret_cast(archive.begin()), archive.size()); this->packedDeviceBinarySize = archive.size(); } } if (nullptr == this->unpackedDeviceBinary) { return buildFromSpirV(this->irBinary.get(), static_cast(this->irBinarySize), this->options.c_str(), ""); } else { return processUnpackedBinary(); } } bool processUnpackedBinary() { if (0 == unpackedDeviceBinarySize) { return false; } auto blob = ArrayRef(reinterpret_cast(this->unpackedDeviceBinary.get()), this->unpackedDeviceBinarySize); NEO::SingleDeviceBinary binary = {}; binary.deviceBinary = blob; std::string decodeErrors; std::string decodeWarnings; NEO::DecodeError decodeError; NEO::DeviceBinaryFormat singleDeviceBinaryFormat; std::tie(decodeError, singleDeviceBinaryFormat) = NEO::decodeSingleDeviceBinary(programInfo, binary, decodeErrors, decodeWarnings); if (decodeWarnings.empty() == false) { NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", decodeWarnings.c_str()); } if (NEO::DecodeError::Success != decodeError) { NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", decodeErrors.c_str()); return false; } processDebugData(); size_t slmNeeded = NEO::getMaxInlineSlmNeeded(programInfo); size_t slmAvailable = 0U; NEO::DeviceInfoKernelPayloadConstants deviceInfoConstants; slmAvailable = static_cast(device->getDeviceInfo().localMemSize); deviceInfoConstants.maxWorkGroupSize = static_cast(device->getDeviceInfo().maxWorkGroupSize); deviceInfoConstants.computeUnitsUsedForScratch = static_cast(device->getDeviceInfo().computeUnitsUsedForScratch); deviceInfoConstants.slmWindowSize = static_cast(device->getDeviceInfo().localMemSize); if (NEO::requiresLocalMemoryWindowVA(programInfo)) { deviceInfoConstants.slmWindow = device->getNEODevice()->getExecutionEnvironment()->memoryManager->getReservedMemory(MemoryConstants::slmWindowSize, MemoryConstants::slmWindowAlignment); } if (slmNeeded > slmAvailable) { return false; } auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager(); if (programInfo.globalConstants.size != 0) { this->globalConstBuffer = NEO::allocateGlobalsSurface(svmAllocsManager, *device->getNEODevice(), programInfo.globalConstants.size, true, programInfo.linkerInput.get(), programInfo.globalConstants.initData); } if (programInfo.globalVariables.size != 0) { this->globalVarBuffer = NEO::allocateGlobalsSurface(svmAllocsManager, *device->getNEODevice(), programInfo.globalVariables.size, false, programInfo.linkerInput.get(), programInfo.globalVariables.initData); } for (auto &kernelInfo : this->programInfo.kernelInfos) { kernelInfo->apply(deviceInfoConstants); } auto gfxCore = device->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily; auto stepping = device->getNEODevice()->getHardwareInfo().platform.usRevId; if (this->packedDeviceBinary != nullptr) { return true; } NEO::SingleDeviceBinary singleDeviceBinary; singleDeviceBinary.buildOptions = this->options; singleDeviceBinary.targetDevice.coreFamily = gfxCore; singleDeviceBinary.targetDevice.stepping = stepping; singleDeviceBinary.deviceBinary = ArrayRef(reinterpret_cast(this->unpackedDeviceBinary.get()), this->unpackedDeviceBinarySize); singleDeviceBinary.intermediateRepresentation = ArrayRef(reinterpret_cast(this->irBinary.get()), this->irBinarySize); singleDeviceBinary.debugData = ArrayRef(reinterpret_cast(this->debugData.get()), this->debugDataSize); std::string packWarnings; std::string packErrors; auto packedDeviceBinary = NEO::packDeviceBinary(singleDeviceBinary, packErrors, packWarnings); if (packedDeviceBinary.empty()) { DEBUG_BREAK_IF(true); return false; } this->packedDeviceBinary = makeCopy(packedDeviceBinary.data(), packedDeviceBinary.size()); this->packedDeviceBinarySize = packedDeviceBinary.size(); return true; } void updateBuildLog(const std::string &newLogEntry) { if (newLogEntry.empty() || ('\0' == newLogEntry[0])) { return; } buildLog += newLogEntry.c_str(); if ('\n' != *buildLog.rbegin()) { buildLog.append("\n"); } } void processDebugData() { if (this->debugData != nullptr) { iOpenCL::SProgramDebugDataHeaderIGC *programDebugHeader = reinterpret_cast(debugData.get()); DEBUG_BREAK_IF(programDebugHeader->NumberOfKernels != programInfo.kernelInfos.size()); const iOpenCL::SKernelDebugDataHeaderIGC *kernelDebugHeader = reinterpret_cast( ptrOffset(programDebugHeader, sizeof(iOpenCL::SProgramDebugDataHeaderIGC))); const char *kernelName = nullptr; const char *kernelDebugData = nullptr; for (uint32_t i = 0; i < programDebugHeader->NumberOfKernels; i++) { kernelName = reinterpret_cast(ptrOffset(kernelDebugHeader, sizeof(iOpenCL::SKernelDebugDataHeaderIGC))); auto kernelInfo = programInfo.kernelInfos[i]; UNRECOVERABLE_IF(kernelInfo->name.compare(0, kernelInfo->name.size(), kernelName) != 0); kernelDebugData = ptrOffset(kernelName, kernelDebugHeader->KernelNameSize); kernelInfo->kernelDescriptor.external.debugData = std::make_unique(); kernelInfo->kernelDescriptor.external.debugData->vIsa = kernelDebugData; kernelInfo->kernelDescriptor.external.debugData->genIsa = ptrOffset(kernelDebugData, kernelDebugHeader->SizeVisaDbgInBytes); kernelInfo->kernelDescriptor.external.debugData->vIsaSize = kernelDebugHeader->SizeVisaDbgInBytes; kernelInfo->kernelDescriptor.external.debugData->genIsaSize = kernelDebugHeader->SizeGenIsaDbgInBytes; kernelDebugData = ptrOffset(kernelDebugData, static_cast(kernelDebugHeader->SizeVisaDbgInBytes) + kernelDebugHeader->SizeGenIsaDbgInBytes); kernelDebugHeader = reinterpret_cast(kernelDebugData); } } } L0::Device *device = nullptr; NEO::GraphicsAllocation *globalConstBuffer = nullptr; NEO::GraphicsAllocation *globalVarBuffer = nullptr; NEO::ProgramInfo programInfo; std::string options; std::string buildLog; std::unique_ptr irBinary; size_t irBinarySize = 0U; std::unique_ptr unpackedDeviceBinary; size_t unpackedDeviceBinarySize = 0U; std::unique_ptr packedDeviceBinary; size_t packedDeviceBinarySize = 0U; std::unique_ptr debugData; size_t debugDataSize = 0U; }; ModuleImp::ModuleImp(Device *device, NEO::Device *neoDevice, ModuleBuildLog *moduleBuildLog) : device(device), translationUnit(new ModuleTranslationUnit(device)), moduleBuildLog(moduleBuildLog) { productFamily = neoDevice->getHardwareInfo().platform.eProductFamily; } ModuleImp::~ModuleImp() { kernelImmDatas.clear(); delete translationUnit; } bool ModuleImp::initialize(const ze_module_desc_t *desc, NEO::Device *neoDevice) { bool success = true; NEO::useKernelDescriptor = true; std::string buildOptions; std::string internalBuildOptions; this->createBuildOptions(desc->pBuildFlags, buildOptions, internalBuildOptions); if (desc->format == ZE_MODULE_FORMAT_NATIVE) { success = this->translationUnit->createFromNativeBinary( reinterpret_cast(desc->pInputModule), desc->inputSize); } else if (desc->format == ZE_MODULE_FORMAT_IL_SPIRV) { success = this->translationUnit->buildFromSpirV(reinterpret_cast(desc->pInputModule), static_cast(desc->inputSize), buildOptions.c_str(), internalBuildOptions.c_str()); } else { return false; } debugEnabled = this->translationUnit->debugDataSize > 0; this->updateBuildLog(neoDevice); if (debugEnabled && device->getNEODevice()->isDebuggerActive()) { for (auto kernelInfo : this->translationUnit->programInfo.kernelInfos) { device->getSourceLevelDebugger()->notifyKernelDebugData(kernelInfo->kernelDescriptor.external.debugData.get(), kernelInfo->kernelDescriptor.kernelMetadata.kernelName, kernelInfo->heapInfo.pKernelHeap, kernelInfo->heapInfo.pKernelHeader->KernelHeapSize); } } if (false == success) { return false; } kernelImmDatas.reserve(this->translationUnit->programInfo.kernelInfos.size()); for (auto &ki : this->translationUnit->programInfo.kernelInfos) { std::unique_ptr kernelImmData{new KernelImmutableData(this->device)}; kernelImmData->initialize(ki, *(getDevice()->getDriverHandle()->getMemoryManager()), device->getNEODevice(), device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch, this->translationUnit->globalConstBuffer, this->translationUnit->globalVarBuffer); kernelImmDatas.push_back(std::move(kernelImmData)); } this->maxGroupSize = static_cast(this->translationUnit->device->getNEODevice()->getDeviceInfo().maxWorkGroupSize); return this->linkBinary(); } const KernelImmutableData *ModuleImp::getKernelImmutableData(const char *functionName) const { for (auto &kernelImmData : kernelImmDatas) { if (kernelImmData->getDescriptor().kernelMetadata.kernelName.compare(functionName) == 0) { return kernelImmData.get(); } } return nullptr; } void ModuleImp::createBuildOptions(const char *pBuildFlags, std::string &apiOptions, std::string &internalBuildOptions) { if (pBuildFlags != nullptr) { std::string buildFlags(pBuildFlags); apiOptions = pBuildFlags; moveBuildOption(apiOptions, apiOptions, NEO::CompilerOptions::optDisable, BuildOptions::optDisable); moveBuildOption(internalBuildOptions, apiOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired, BuildOptions::greaterThan4GbRequired); createBuildExtraOptions(apiOptions, internalBuildOptions); } } void ModuleImp::updateBuildLog(NEO::Device *neoDevice) { if (this->moduleBuildLog) { moduleBuildLog->appendString(this->translationUnit->buildLog.c_str(), this->translationUnit->buildLog.size()); } } ze_result_t ModuleImp::createKernel(const ze_kernel_desc_t *desc, ze_kernel_handle_t *phFunction) { ze_result_t res = ZE_RESULT_SUCCESS; *phFunction = Kernel::create(productFamily, this, desc, &res)->toHandle(); return ZE_RESULT_SUCCESS; } ze_result_t ModuleImp::getNativeBinary(size_t *pSize, uint8_t *pModuleNativeBinary) { auto genBinary = this->translationUnit->packedDeviceBinary.get(); *pSize = this->translationUnit->packedDeviceBinarySize; if (pModuleNativeBinary != nullptr) { memcpy_s(pModuleNativeBinary, this->translationUnit->packedDeviceBinarySize, genBinary, this->translationUnit->packedDeviceBinarySize); } return ZE_RESULT_SUCCESS; } ze_result_t ModuleImp::getDebugInfo(size_t *pDebugDataSize, uint8_t *pDebugData) { if (translationUnit == nullptr) { return ZE_RESULT_ERROR_UNINITIALIZED; } if (pDebugData == nullptr) { *pDebugDataSize = translationUnit->debugDataSize; return ZE_RESULT_SUCCESS; } memcpy_s(pDebugData, *pDebugDataSize, translationUnit->debugData.get(), translationUnit->debugDataSize); return ZE_RESULT_SUCCESS; } bool ModuleImp::linkBinary() { using namespace NEO; if (this->translationUnit->programInfo.linkerInput == nullptr) { return true; } Linker linker(*this->translationUnit->programInfo.linkerInput); Linker::SegmentInfo globals; Linker::SegmentInfo constants; Linker::SegmentInfo exportedFunctions; Linker::PatchableSegment globalsForPatching; Linker::PatchableSegment constantsForPatching; if (translationUnit->globalVarBuffer != nullptr) { globals.gpuAddress = static_cast(translationUnit->globalVarBuffer->getGpuAddress()); globals.segmentSize = translationUnit->globalVarBuffer->getUnderlyingBufferSize(); globalsForPatching.hostPointer = translationUnit->globalVarBuffer->getUnderlyingBuffer(); globalsForPatching.segmentSize = translationUnit->globalVarBuffer->getUnderlyingBufferSize(); } if (translationUnit->globalConstBuffer != nullptr) { constants.gpuAddress = static_cast(translationUnit->globalConstBuffer->getGpuAddress()); constants.segmentSize = translationUnit->globalConstBuffer->getUnderlyingBufferSize(); constantsForPatching.hostPointer = translationUnit->globalConstBuffer->getUnderlyingBuffer(); constantsForPatching.segmentSize = translationUnit->globalConstBuffer->getUnderlyingBufferSize(); } if (this->translationUnit->programInfo.linkerInput->getExportedFunctionsSegmentId() >= 0) { auto exportedFunctionHeapId = this->translationUnit->programInfo.linkerInput->getExportedFunctionsSegmentId(); this->exportedFunctionsSurface = this->kernelImmDatas[exportedFunctionHeapId]->getIsaGraphicsAllocation(); exportedFunctions.gpuAddress = static_cast(exportedFunctionsSurface->getGpuAddressToPatch()); exportedFunctions.segmentSize = exportedFunctionsSurface->getUnderlyingBufferSize(); } Linker::PatchableSegments isaSegmentsForPatching; std::vector> patchedIsaTempStorage; if (this->translationUnit->programInfo.linkerInput->getTraits().requiresPatchingOfInstructionSegments) { patchedIsaTempStorage.reserve(this->kernelImmDatas.size()); for (const auto &kernelInfo : this->translationUnit->programInfo.kernelInfos) { auto &kernHeapInfo = kernelInfo->heapInfo; const char *originalIsa = reinterpret_cast(kernHeapInfo.pKernelHeap); patchedIsaTempStorage.push_back(std::vector(originalIsa, originalIsa + kernHeapInfo.pKernelHeader->KernelHeapSize)); isaSegmentsForPatching.push_back(Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), kernHeapInfo.pKernelHeader->KernelHeapSize}); } } Linker::UnresolvedExternals unresolvedExternalsInfo; bool linkSuccess = linker.link(globals, constants, exportedFunctions, globalsForPatching, constantsForPatching, isaSegmentsForPatching, unresolvedExternalsInfo); this->symbols = linker.extractRelocatedSymbols(); if (false == linkSuccess) { std::vector kernelNames; for (const auto &kernelInfo : this->translationUnit->programInfo.kernelInfos) { kernelNames.push_back("kernel : " + kernelInfo->name); } auto error = constructLinkerErrorMessage(unresolvedExternalsInfo, kernelNames); moduleBuildLog->appendString(error.c_str(), error.size()); return false; } else if (this->translationUnit->programInfo.linkerInput->getTraits().requiresPatchingOfInstructionSegments) { for (const auto &kernelImmData : this->kernelImmDatas) { if (nullptr == kernelImmData->getIsaGraphicsAllocation()) { continue; } auto segmentId = &kernelImmData - &this->kernelImmDatas[0]; this->device->getDriverHandle()->getMemoryManager()->copyMemoryToAllocation(kernelImmData->getIsaGraphicsAllocation(), isaSegmentsForPatching[segmentId].hostPointer, isaSegmentsForPatching[segmentId].segmentSize); } } return true; } ze_result_t ModuleImp::getFunctionPointer(const char *pFunctionName, void **pfnFunction) { auto symbolIt = symbols.find(pFunctionName); if ((symbolIt == symbols.end()) || (symbolIt->second.symbol.segment != NEO::SegmentType::Instructions)) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } *pfnFunction = reinterpret_cast(symbolIt->second.gpuAddress); return ZE_RESULT_SUCCESS; } ze_result_t ModuleImp::getGlobalPointer(const char *pGlobalName, void **pPtr) { auto symbolIt = symbols.find(pGlobalName); if ((symbolIt == symbols.end()) || (symbolIt->second.symbol.segment == NEO::SegmentType::Instructions)) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } *pPtr = reinterpret_cast(symbolIt->second.gpuAddress); return ZE_RESULT_SUCCESS; } Module *Module::create(Device *device, const ze_module_desc_t *desc, NEO::Device *neoDevice, ModuleBuildLog *moduleBuildLog) { auto module = new ModuleImp(device, neoDevice, moduleBuildLog); bool success = module->initialize(desc, neoDevice); if (success == false) { module->destroy(); return nullptr; } return module; } ze_result_t ModuleImp::getKernelNames(uint32_t *pCount, const char **pNames) { auto &kernelImmDatas = this->getKernelImmutableDataVector(); if (*pCount == 0) { *pCount = static_cast(kernelImmDatas.size()); return ZE_RESULT_SUCCESS; } if (*pCount > static_cast(kernelImmDatas.size())) { *pCount = static_cast(kernelImmDatas.size()); } uint32_t outCount = 0; for (auto &kernelImmData : kernelImmDatas) { *(pNames + outCount) = kernelImmData->getDescriptor().kernelMetadata.kernelName.c_str(); outCount++; if (outCount == *pCount) { break; } } return ZE_RESULT_SUCCESS; } bool ModuleImp::isDebugEnabled() const { return this->translationUnit->debugDataSize > 0; } bool moveBuildOption(std::string &dstOptionsSet, std::string &srcOptionSet, ConstStringRef dstOptionName, ConstStringRef srcOptionName) { auto optInSrcPos = srcOptionSet.find(srcOptionName.begin()); if (std::string::npos == optInSrcPos) { return false; } srcOptionSet.erase(optInSrcPos, srcOptionName.length()); NEO::CompilerOptions::concatenateAppend(dstOptionsSet, dstOptionName); return true; } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/module/module_imp.h000066400000000000000000000050611363734646600255450ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/compiler_interface/linker.h" #include "shared/source/utilities/const_stringref.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/module/module.h" #include "igfxfmid.h" #include #include namespace L0 { struct ModuleTranslationUnit; struct ModuleImp : public Module { ModuleImp(Device *device, NEO::Device *neoDevice, ModuleBuildLog *moduleBuildLog); ~ModuleImp() override; ze_result_t destroy() override { delete this; return ZE_RESULT_SUCCESS; } ze_result_t createKernel(const ze_kernel_desc_t *desc, ze_kernel_handle_t *phFunction) override; ze_result_t getNativeBinary(size_t *pSize, uint8_t *pModuleNativeBinary) override; ze_result_t getFunctionPointer(const char *pFunctionName, void **pfnFunction) override; ze_result_t getGlobalPointer(const char *pGlobalName, void **pPtr) override; ze_result_t getKernelNames(uint32_t *pCount, const char **pNames) override; ze_result_t getDebugInfo(size_t *pDebugDataSize, uint8_t *pDebugData) override; const KernelImmutableData *getKernelImmutableData(const char *functionName) const override; const std::vector> &getKernelImmutableDataVector() const override { return kernelImmDatas; } uint32_t getMaxGroupSize() const override { return maxGroupSize; } void createBuildOptions(const char *pBuildFlags, std::string &buildOptions, std::string &internalBuildOptions); void createBuildExtraOptions(std::string &buildOptions, std::string &internalBuildOptions); void updateBuildLog(NEO::Device *neoDevice); Device *getDevice() const override { return device; } bool linkBinary(); bool initialize(const ze_module_desc_t *desc, NEO::Device *neoDevice); bool isDebugEnabled() const override; protected: ModuleImp() = default; Device *device = nullptr; PRODUCT_FAMILY productFamily{}; ModuleTranslationUnit *translationUnit = nullptr; ModuleBuildLog *moduleBuildLog = nullptr; NEO::GraphicsAllocation *exportedFunctionsSurface = nullptr; uint32_t maxGroupSize = 0U; std::vector> kernelImmDatas; NEO::Linker::RelocatedSymbolsMap symbols; bool debugEnabled = false; }; bool moveBuildOption(std::string &dstOptionsSet, std::string &srcOptionSet, ConstStringRef dstOptionName, ConstStringRef srcOptionName); } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/printf_handler/000077500000000000000000000000001363734646600247525ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/printf_handler/printf_handler.cpp000066400000000000000000000035451363734646600304640ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/printf_handler/printf_handler.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/program/print_formatter.h" #include "level_zero/core/source/device/device_imp.h" namespace L0 { NEO::GraphicsAllocation *PrintfHandler::createPrintfBuffer(Device *device) { NEO::AllocationProperties properties( device->getRootDeviceIndex(), PrintfHandler::printfBufferSize, NEO::GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY); properties.alignment = MemoryConstants::pageSize64k; auto allocation = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); *reinterpret_cast(allocation->getUnderlyingBuffer()) = PrintfHandler::printfSurfaceInitialDataSize; return allocation; } void PrintfHandler::printOutput(const KernelImmutableData *function, NEO::GraphicsAllocation *printfBuffer, Device *device) { bool using32BitGpuPointers = function->getDescriptor().kernelAttributes.gpuPointerSize == 4u; NEO::PrintFormatter printfFormatter{static_cast(printfBuffer->getUnderlyingBuffer()), static_cast(printfBuffer->getUnderlyingBufferSize()), using32BitGpuPointers, function->getDescriptor().kernelMetadata.printfStringsMap}; printfFormatter.printKernelOutput(); *reinterpret_cast(printfBuffer->getUnderlyingBuffer()) = PrintfHandler::printfSurfaceInitialDataSize; } size_t PrintfHandler::getPrintBufferSize() { return PrintfHandler::printfBufferSize; } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/printf_handler/printf_handler.h000066400000000000000000000017041363734646600301240ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/memory_constants.h" #include "level_zero/core/source/kernel/kernel.h" #include namespace NEO { class Kernel; class GraphicsAllocation; } // namespace NEO namespace L0 { struct Device; struct PrintfHandler { static NEO::GraphicsAllocation *createPrintfBuffer(Device *device); static void printOutput(const KernelImmutableData *function, NEO::GraphicsAllocation *printfBuffer, Device *device); static size_t getPrintBufferSize(); protected: PrintfHandler(const PrintfHandler &) = delete; PrintfHandler &operator=(PrintfHandler const &) = delete; PrintfHandler() = delete; constexpr static size_t printfBufferSize = 4 * MemoryConstants::megaByte; constexpr static uint32_t printfSurfaceInitialDataSize = sizeof(uint32_t); }; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/sampler/000077500000000000000000000000001363734646600234165ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/sampler/sampler.h000066400000000000000000000024461363734646600252400ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/device/device.h" #include struct _ze_sampler_handle_t {}; namespace L0 { struct Sampler : _ze_sampler_handle_t { template struct Allocator { static Sampler *allocate() { return new Type(); } }; virtual ~Sampler() = default; virtual ze_result_t destroy() = 0; static Sampler *create(uint32_t productFamily, Device *device, const ze_sampler_desc_t *desc); virtual void copySamplerStateToDSH(void *dynamicStateHeap, const uint32_t dynamicStateHeapSize, const uint32_t heapOffset) = 0; static Sampler *fromHandle(ze_sampler_handle_t handle) { return static_cast(handle); } inline ze_sampler_handle_t toHandle() { return this; } }; using SamplerAllocatorFn = Sampler *(*)(); extern SamplerAllocatorFn samplerFactory[]; template struct SamplerPopulateFactory { SamplerPopulateFactory() { samplerFactory[productFamily] = Sampler::Allocator::allocate; } }; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/sampler/sampler_hw.h000066400000000000000000000016731363734646600257370ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/sampler/sampler_imp.h" namespace L0 { template struct SamplerCoreFamily : public SamplerImp { public: using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE; using BaseClass = SamplerImp; ze_result_t initialize(Device *device, const ze_sampler_desc_t *desc) override; void copySamplerStateToDSH(void *dynamicStateHeap, const uint32_t dynamicStateHeapSize, const uint32_t offset) override; static constexpr float getGenSamplerMaxLod() { return 14.0f; } protected: SAMPLER_STATE samplerState; float lodMin = 1.0f; float lodMax = 1.0f; }; template struct SamplerProductFamily; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/sampler/sampler_hw.inl000066400000000000000000000112351363734646600262650ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/string.h" #include "shared/source/utilities/numeric.h" #include "level_zero/core/source/sampler/sampler_hw.h" namespace L0 { template ze_result_t SamplerCoreFamily::initialize(Device *device, const ze_sampler_desc_t *desc) { using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE; ze_result_t ret; ret = BaseClass::initialize(device, desc); if (ret != ZE_RESULT_SUCCESS) return ret; auto addressControlModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP_BORDER; auto addressControlModeY = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP_BORDER; auto addressControlModeZ = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP_BORDER; switch (desc->addressMode) { case ZE_SAMPLER_ADDRESS_MODE_NONE: case ZE_SAMPLER_ADDRESS_MODE_CLAMP: break; case ZE_SAMPLER_ADDRESS_MODE_MIRROR: addressControlModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR; addressControlModeY = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR; addressControlModeZ = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR; break; case ZE_SAMPLER_ADDRESS_MODE_REPEAT: addressControlModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_WRAP; addressControlModeY = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_WRAP; addressControlModeZ = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_WRAP; break; default: return ZE_RESULT_ERROR_INVALID_ARGUMENT; } auto minMode = SAMPLER_STATE::MIN_MODE_FILTER_NEAREST; auto magMode = SAMPLER_STATE::MAG_MODE_FILTER_NEAREST; auto mipMode = SAMPLER_STATE::MIP_MODE_FILTER_NEAREST; auto rAddrMinFilterRounding = false; auto rAddrMagFilterRounding = false; auto vAddrMinFilterRounding = false; auto vAddrMagFilterRounding = false; auto uAddrMinFilterRounding = false; auto uAddrMagFilterRounding = false; switch (desc->filterMode) { case ZE_SAMPLER_FILTER_MODE_NEAREST: minMode = SAMPLER_STATE::MIN_MODE_FILTER_NEAREST; magMode = SAMPLER_STATE::MAG_MODE_FILTER_NEAREST; mipMode = SAMPLER_STATE::MIP_MODE_FILTER_NEAREST; break; case ZE_SAMPLER_FILTER_MODE_LINEAR: minMode = SAMPLER_STATE::MIN_MODE_FILTER_LINEAR; magMode = SAMPLER_STATE::MAG_MODE_FILTER_LINEAR; mipMode = SAMPLER_STATE::MIP_MODE_FILTER_NEAREST; rAddrMinFilterRounding = true; rAddrMagFilterRounding = true; vAddrMinFilterRounding = true; vAddrMagFilterRounding = true; uAddrMinFilterRounding = true; uAddrMagFilterRounding = true; break; default: return ZE_RESULT_ERROR_INVALID_ARGUMENT; } samplerState.setMinModeFilter(minMode); samplerState.setMagModeFilter(magMode); samplerState.setMipModeFilter(mipMode); samplerState.setRAddressMinFilterRoundingEnable(rAddrMinFilterRounding); samplerState.setRAddressMagFilterRoundingEnable(rAddrMagFilterRounding); samplerState.setVAddressMinFilterRoundingEnable(vAddrMinFilterRounding); samplerState.setVAddressMagFilterRoundingEnable(vAddrMagFilterRounding); samplerState.setUAddressMinFilterRoundingEnable(uAddrMinFilterRounding); samplerState.setUAddressMagFilterRoundingEnable(uAddrMagFilterRounding); samplerState.setTcxAddressControlMode(addressControlModeX); samplerState.setTcyAddressControlMode(addressControlModeY); samplerState.setTczAddressControlMode(addressControlModeZ); NEO::FixedU4D8 minLodValue = NEO::FixedU4D8(std::min(getGenSamplerMaxLod(), this->lodMin)); NEO::FixedU4D8 maxLodValue = NEO::FixedU4D8(std::min(getGenSamplerMaxLod(), this->lodMax)); samplerState.setMinLod(minLodValue.getRawAccess()); samplerState.setMaxLod(maxLodValue.getRawAccess()); return ZE_RESULT_SUCCESS; } template void SamplerCoreFamily::copySamplerStateToDSH(void *dynamicStateHeap, const uint32_t dynamicStateHeapSize, const uint32_t samplerOffset) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE; using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; auto destSamplerState = ptrOffset(dynamicStateHeap, samplerOffset); auto freeSpace = dynamicStateHeapSize - (samplerOffset + sizeof(SAMPLER_STATE)); memcpy_s(destSamplerState, freeSpace, &samplerState, sizeof(SAMPLER_STATE)); } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/sampler/sampler_imp.cpp000066400000000000000000000020441363734646600264320ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/sampler/sampler_imp.h" #include "level_zero/core/source/device/device.h" namespace L0 { SamplerAllocatorFn samplerFactory[IGFX_MAX_PRODUCT] = {}; ze_result_t SamplerImp::destroy() { delete this; return ZE_RESULT_SUCCESS; } ze_result_t SamplerImp::initialize(Device *device, const ze_sampler_desc_t *desc) { samplerDesc = *desc; return ZE_RESULT_SUCCESS; } Sampler *Sampler::create(uint32_t productFamily, Device *device, const ze_sampler_desc_t *desc) { SamplerAllocatorFn allocator = nullptr; if (productFamily < IGFX_MAX_PRODUCT) { allocator = samplerFactory[productFamily]; } SamplerImp *sampler = nullptr; if (allocator) { sampler = static_cast((*allocator)()); if (sampler->initialize(device, desc)) { delete sampler; DEBUG_BREAK_IF(true); return nullptr; } } return sampler; } } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/sampler/sampler_imp.h000066400000000000000000000007001363734646600260740ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/sampler/sampler.h" namespace L0 { class SamplerImp : public Sampler { public: ze_result_t destroy() override; virtual ze_result_t initialize(Device *device, const ze_sampler_desc_t *desc); ~SamplerImp() override = default; protected: ze_sampler_desc_t samplerDesc; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/core/source/windows/000077500000000000000000000000001363734646600234455ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/source/windows/CMakeLists.txt000066400000000000000000000004711363734646600262070ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(L0_SOURCES_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/registry_path.cpp ) set_property(GLOBAL PROPERTY L0_SOURCES_WINDOWS ${L0_SOURCES_WINDOWS}) endif() compute-runtime-20.13.16352/level_zero/core/source/windows/registry_path.cpp000066400000000000000000000003521363734646600270350ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/compiler_interface/l0_reg_path.h" namespace L0 { const char *registryPath = "Software\\Intel\\IGFX\\L0\\"; } compute-runtime-20.13.16352/level_zero/core/test/000077500000000000000000000000001363734646600214325ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/test/CMakeLists.txt000066400000000000000000000002111363734646600241640ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(NOT SKIP_L0_UNIT_TESTS) add_subdirectories() endif() compute-runtime-20.13.16352/level_zero/core/test/black_box_tests/000077500000000000000000000000001363734646600246005ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/test/black_box_tests/zello_world.cpp000066400000000000000000000113051363734646600276400ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include #include #include int main(int argc, char *argv[]) { // Initialize driver ze_result_t res = zeInit(ZE_INIT_FLAG_NONE); if (res) { std::terminate(); } // Retrieve driver uint32_t driverCount = 0; res = zeDriverGet(&driverCount, nullptr); if (res || driverCount == 0) { std::terminate(); } ze_driver_handle_t driverHandle; res = zeDriverGet(&driverCount, &driverHandle); if (res) { std::terminate(); } // Retrieve device uint32_t deviceCount = 0; res = zeDeviceGet(driverHandle, &deviceCount, nullptr); if (res || deviceCount == 0) { std::terminate(); } ze_device_handle_t device; deviceCount = 1; res = zeDeviceGet(driverHandle, &deviceCount, &device); if (res) { std::terminate(); } // Print some properties ze_device_properties_t deviceProperties = {ZE_DEVICE_PROPERTIES_VERSION_CURRENT}; res = zeDeviceGetProperties(device, &deviceProperties); if (res) { std::terminate(); } std::cout << "Device : \n" << " * name : " << deviceProperties.name << "\n" << " * type : " << ((deviceProperties.type == ZE_DEVICE_TYPE_GPU) ? "GPU" : "FPGA") << "\n" << " * vendorId : " << deviceProperties.vendorId << "\n"; // Create command queue ze_command_queue_handle_t cmdQueue; ze_command_queue_desc_t cmdQueueDesc = {ZE_COMMAND_QUEUE_DESC_VERSION_CURRENT}; cmdQueueDesc.ordinal = 0; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; res = zeCommandQueueCreate(device, &cmdQueueDesc, &cmdQueue); if (res) { std::terminate(); } // Create command list ze_command_list_handle_t cmdList; ze_command_list_desc_t cmdListDesc = {ZE_COMMAND_LIST_DESC_VERSION_CURRENT}; res = zeCommandListCreate(device, &cmdListDesc, &cmdList); if (res) { std::terminate(); } // Create two shared buffers constexpr size_t allocSize = 4096; ze_device_mem_alloc_desc_t deviceDesc; deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_DEFAULT; deviceDesc.ordinal = 0; deviceDesc.version = ZE_DEVICE_MEM_ALLOC_DESC_VERSION_CURRENT; ze_host_mem_alloc_desc_t hostDesc; hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_DEFAULT; hostDesc.version = ZE_HOST_MEM_ALLOC_DESC_VERSION_CURRENT; void *srcBuffer = nullptr; res = zeDriverAllocSharedMem(driverHandle, &deviceDesc, &hostDesc, allocSize, 1, device, &srcBuffer); if (res) { std::terminate(); } void *dstBuffer = nullptr; res = zeDriverAllocSharedMem(driverHandle, &deviceDesc, &hostDesc, allocSize, 1, device, &dstBuffer); if (res) { std::terminate(); } // Initialize memory constexpr uint8_t val = 55; memset(srcBuffer, val, allocSize); memset(dstBuffer, 0, allocSize); // Perform a GPU copy res = zeCommandListAppendMemoryCopy(cmdList, dstBuffer, srcBuffer, allocSize, nullptr); if (res) { std::terminate(); } // Close list and submit for execution res = zeCommandListClose(cmdList); if (res) { std::terminate(); } res = zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr); if (res) { std::terminate(); } // Wait for completion res = zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max()); if (res) { std::terminate(); } // Validate bool outputValidationSuccessful = true; if (memcmp(dstBuffer, srcBuffer, allocSize)) { outputValidationSuccessful = false; uint8_t *srcCharBuffer = static_cast(srcBuffer); uint8_t *dstCharBuffer = static_cast(dstBuffer); for (size_t i = 0; i < allocSize; i++) { if (srcCharBuffer[i] != dstCharBuffer[i]) { std::cout << "srcBuffer[" << i << "] = " << static_cast(srcCharBuffer[i]) << " not equal to " << "dstBuffer[" << i << "] = " << static_cast(dstCharBuffer[i]) << "\n"; break; } } } // Cleanup res = zeDriverFreeMem(driverHandle, dstBuffer); if (res) { std::terminate(); } res = zeDriverFreeMem(driverHandle, srcBuffer); if (res) { std::terminate(); } res = zeCommandListDestroy(cmdList); if (res) { std::terminate(); } res = zeCommandQueueDestroy(cmdQueue); if (res) { std::terminate(); } std::cout << "\nZello World Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n"; return 0; } compute-runtime-20.13.16352/level_zero/core/test/unit_tests/000077500000000000000000000000001363734646600236335ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/test/unit_tests/CMakeLists.txt000066400000000000000000000125131363734646600263750ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # link_libraries(${ASAN_LIBS} ${TSAN_LIBS}) set(TARGET_NAME ${TARGET_NAME_L0}_core_tests) append_sources_from_properties(L0_CORE_ENABLERS NEO_CORE_SRCS_LINK) function(ADD_SUPPORTED_TEST_PRODUCT_FAMILIES_DEFINITION) set(L0_TESTED_PRODUCT_FAMILIES ${ALL_TESTED_PRODUCT_FAMILY}) string(REPLACE ";" "," L0_TESTED_PRODUCT_FAMILIES "${L0_TESTED_PRODUCT_FAMILIES}") add_definitions(-DSUPPORTED_TEST_PRODUCT_FAMILIES=${L0_TESTED_PRODUCT_FAMILIES}) endfunction() ADD_SUPPORTED_TEST_PRODUCT_FAMILIES_DEFINITION() add_executable(${TARGET_NAME} ${NEO_SOURCE_DIR}/level_zero/core/source/dll/disallow_deferred_deleter.cpp ${NEO_SOURCE_DIR}/shared/test/unit_test/helpers/memory_management.h ${NEO_SOURCE_DIR}/shared/test/unit_test/helpers/memory_management.cpp ${NEO_SOURCE_DIR}/shared/test/unit_test/helpers/memory_leak_listener.h ${NEO_SOURCE_DIR}/shared/test/unit_test/helpers/memory_leak_listener.cpp ${L0_CORE_ENABLERS} ) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock.h ${CMAKE_CURRENT_SOURCE_DIR}/test_mode.h ${CMAKE_CURRENT_SOURCE_DIR}/ult_configuration.cpp ${CMAKE_CURRENT_SOURCE_DIR}/white_box.h ${NEO_SHARED_TEST_DIRECTORY}/unit_test/tests_configuration.h ) target_sources(${TARGET_NAME} PRIVATE ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/libult/create_command_stream.cpp $ ) if (UNIX) target_sources(${TARGET_NAME} PRIVATE ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/os_interface/linux/create_drm_memory_manager.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/os_interface/linux/drm_neo_create.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/os_interface/linux/options.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/os_interface/linux/sys_calls_linux_ult.cpp ) else() target_sources(${TARGET_NAME} PRIVATE ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/os_interface/windows/create_wddm_memory_manager.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/os_interface/windows/options.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/os_interface/windows/sys_calls.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/os_interface/windows/ult_dxgi_factory.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/os_interface/windows/wddm_calls.cpp ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/os_interface/windows/wddm_create.cpp ) endif() set_property(TARGET ${TARGET_NAME} APPEND_STRING PROPERTY COMPILE_FLAGS ${ASAN_FLAGS}) set_target_properties(${TARGET_NAME} PROPERTIES FOLDER ${TARGET_NAME_L0}) add_subdirectoriesL0(${CMAKE_CURRENT_SOURCE_DIR} "*") target_compile_definitions(${TARGET_NAME} PRIVATE $) target_include_directories(${TARGET_NAME} BEFORE PRIVATE $/core $/tools $/../../../../instrumentation/inc/common/instrumentation/api/ $/core/os_interface ${NEO_SHARED_TEST_DIRECTORY}/unit_test/test_macros${BRANCH_DIR_SUFFIX} ) if (UNIX) target_include_directories(${TARGET_NAME} BEFORE PRIVATE $/core/os_interface/linux $/tools/linux ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/linux ) else() target_include_directories(${TARGET_NAME} BEFORE PRIVATE $/core/os_interface/windows $/tools/windows ${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/windows ) endif() if (UNIX) target_link_libraries(${TARGET_NAME} pthread rt) else() target_link_libraries(${TARGET_NAME} dbghelp) endif() target_link_libraries(${TARGET_NAME} ${NEO_STATICALLY_LINKED_LIBRARIES_MOCKABLE} compute_runtime_mockable_extra ${HW_LIBS_ULT} gmock-gtest ) if(SKIP_NEO_UNIT_TESTS) add_subdirectory(${COMPUTE_RUNTIME_DIR}/opencl/test/unit_test/mock_gmm ${CMAKE_BINARY_DIR}/mock_gmm) endif() target_sources(${TARGET_NAME} PRIVATE $ $ ) option(L0_ULT_VERBOSE "Use the default/verbose test output" OFF) if(NOT L0_ULT_VERBOSE) set(L0_TESTS_LISTENER_OPTION "--disable_default_listener") else() set(L0_TESTS_LISTENER_OPTION "--enable_default_listener") endif() if(L0_ULT_FILTER) set(L0_TESTS_FILTER_OPTION "--gtest_filter=*${L0_ULT_FILTER}*") else() set(L0_TESTS_FILTER_OPTION "--gtest_filter=*") endif() if(MSVC) set_target_properties(${TARGET_NAME} PROPERTIES VS_DEBUGGER_COMMAND_ARGUMENTS "${L0_TESTS_FILTER_OPTION} --gtest_catch_exceptions=0 ${L0_TESTS_LISTENER_OPTION}" VS_DEBUGGER_WORKING_DIRECTORY "$(OutDir)" ) set(RUN_ULT_CMD $) else() set(RUN_ULT_CMD LD_LIBRARY_PATH=$ $) endif() create_source_tree(${TARGET_NAME} ${L0_ROOT_DIR}/..) compute-runtime-20.13.16352/level_zero/core/test/unit_tests/gen11/000077500000000000000000000000001363734646600245465ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/test/unit_tests/gen11/CMakeLists.txt000066400000000000000000000003401363734646600273030ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN11) target_sources(${TARGET_NAME} PRIVATE ${COMPUTE_RUNTIME_ULT_GEN11} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) endif()compute-runtime-20.13.16352/level_zero/core/test/unit_tests/gen12lp/000077500000000000000000000000001363734646600251035ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/test/unit_tests/gen12lp/CMakeLists.txt000066400000000000000000000005611363734646600276450ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN12LP) target_sources(${TARGET_NAME} PRIVATE ${COMPUTE_RUNTIME_ULT_GEN12LP} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) target_include_directories(${TARGET_NAME} PRIVATE ${COMPUTE_RUNTIME_DIR}/level_zero/core/source/gen12lp/definitions${BRANCH_DIR_SUFFIX}/) endif()compute-runtime-20.13.16352/level_zero/core/test/unit_tests/gen8/000077500000000000000000000000001363734646600244745ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/test/unit_tests/gen8/CMakeLists.txt000066400000000000000000000003361363734646600272360ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN8) target_sources(${TARGET_NAME} PRIVATE ${COMPUTE_RUNTIME_ULT_GEN8} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) endif()compute-runtime-20.13.16352/level_zero/core/test/unit_tests/gen9/000077500000000000000000000000001363734646600244755ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/test/unit_tests/gen9/CMakeLists.txt000066400000000000000000000003371363734646600272400ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN9) target_sources(${TARGET_NAME} PRIVATE ${COMPUTE_RUNTIME_ULT_GEN9} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) endif() compute-runtime-20.13.16352/level_zero/core/test/unit_tests/main.cpp000066400000000000000000000153011363734646600252630ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/unit_test/helpers/default_hw_info.inl" #include "shared/test/unit_test/helpers/memory_leak_listener.h" #include "shared/test/unit_test/helpers/ult_hw_config.inl" #include "opencl/source/program/kernel_info.h" #include "opencl/source/utilities/logger.h" #include "opencl/test/unit_test/custom_event_listener.h" #include "opencl/test/unit_test/mocks/mock_gmm_client_context.h" #include "opencl/test/unit_test/mocks/mock_sip.h" #include "level_zero/core/source/cmdlist/cmdlist.h" #include "gmock/gmock.h" #include "igfxfmid.h" #include #include #include #include #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Winconsistent-missing-override" #endif #ifdef WIN32 const char *fSeparator = "\\"; #else const char *fSeparator = "/"; #endif TEST(Should, pass) { EXPECT_TRUE(true); } namespace L0 { namespace ult { ::testing::Environment *environment = nullptr; } } // namespace L0 using namespace L0::ult; PRODUCT_FAMILY productFamily = NEO::DEFAULT_TEST_PLATFORM::hwInfo.platform.eProductFamily; GFXCORE_FAMILY renderCoreFamily = NEO::DEFAULT_TEST_PLATFORM::hwInfo.platform.eRenderCoreFamily; namespace NEO { extern const HardwareInfo *hardwareInfoTable[IGFX_MAX_PRODUCT]; namespace MockSipData { extern std::unique_ptr mockSipKernel; } } // namespace NEO std::string getRunPath(char *argv0) { std::string res(argv0); auto pos = res.rfind(fSeparator); if (pos != std::string::npos) res = res.substr(0, pos); if (res == "." || pos == std::string::npos) { #if defined(__linux__) res = getcwd(nullptr, 0); #else res = _getcwd(nullptr, 0); #endif } return res; } std::thread::id tempThreadID; void applyWorkarounds() { NEO::platformsImpl.reserve(1); { std::ofstream f; const std::string fileName("_tmp_"); f.open(fileName, std::ofstream::binary); f.close(); } { std::mutex mtx; std::unique_lock stateLock(mtx); } { std::stringstream ss("1"); int val; ss >> val; } { class BaseClass { public: int method(int param) { return 1; } }; class MockClass : public BaseClass { public: MOCK_METHOD1(method, int(int param)); }; ::testing::NiceMock mockObj; EXPECT_CALL(mockObj, method(::testing::_)) .Times(1); mockObj.method(2); } //intialize rand srand(static_cast(time(nullptr))); //Create at least on thread to prevent false memory leaks in tests using threads std::thread t([&]() { }); tempThreadID = t.get_id(); t.join(); //Create FileLogger to prevent false memory leaks { NEO::FileLoggerInstance(); } } int main(int argc, char **argv) { bool useDefaultListener = false; applyWorkarounds(); testing::InitGoogleMock(&argc, argv); NEO::HardwareInfo hwInfoForTests = NEO::DEFAULT_TEST_PLATFORM::hwInfo; for (int i = 1; i < argc; ++i) { if (!strcmp("--product", argv[i])) { ++i; if (i < argc) { if (::isdigit(argv[i][0])) { int productValue = atoi(argv[i]); if (productValue > 0 && productValue < IGFX_MAX_PRODUCT && NEO::hardwarePrefix[productValue] != nullptr) { productFamily = static_cast(productValue); } else { productFamily = IGFX_UNKNOWN; } } else { productFamily = IGFX_UNKNOWN; for (int j = 0; j < IGFX_MAX_PRODUCT; j++) { if (NEO::hardwarePrefix[j] == nullptr) continue; if (strcmp(NEO::hardwarePrefix[j], argv[i]) == 0) { productFamily = static_cast(j); break; } } } if (productFamily == IGFX_UNKNOWN) { std::cout << "unknown or unsupported product family has been set: " << argv[i] << std::endl; return -1; } else { std::cout << "product family: " << NEO::hardwarePrefix[productFamily] << " (" << productFamily << ")" << std::endl; } hwInfoForTests = *NEO::hardwareInfoTable[productFamily]; } } if (!strcmp("--disable_default_listener", argv[i])) { useDefaultListener = false; } else if (!strcmp("--enable_default_listener", argv[i])) { useDefaultListener = true; } } // Platforms with uninitialized factory are not supported if (L0::commandListFactory[productFamily] == nullptr) { std::cout << "unsupported product family has been set: " << NEO::hardwarePrefix[::productFamily] << std::endl; std::cout << "skipping tests" << std::endl; return 0; } auto &listeners = ::testing::UnitTest::GetInstance()->listeners(); if (useDefaultListener == false) { auto defaultListener = listeners.default_result_printer(); auto customEventListener = new CCustomEventListener(defaultListener, NEO::hardwarePrefix[productFamily]); listeners.Release(defaultListener); listeners.Append(customEventListener); } listeners.Append(new NEO::MemoryLeakListener); NEO::GmmHelper::createGmmContextWrapperFunc = NEO::GmmClientContextBase::create; if (environment) { ::testing::AddGlobalTestEnvironment(environment); } uint64_t hwInfoConfig = NEO::defaultHardwareInfoConfigTable[productFamily]; NEO::setHwInfoValuesFromConfig(hwInfoConfig, hwInfoForTests); // set Gt and FeatureTable to initial state NEO::hardwareInfoSetup[productFamily](&hwInfoForTests, false, hwInfoConfig); productFamily = hwInfoForTests.platform.eProductFamily; renderCoreFamily = hwInfoForTests.platform.eRenderCoreFamily; NEO::defaultHwInfo = std::make_unique(); *NEO::defaultHwInfo = hwInfoForTests; NEO::useKernelDescriptor = true; NEO::MockSipData::mockSipKernel.reset(new NEO::MockSipKernel()); auto retVal = RUN_ALL_TESTS(); return retVal; } #if defined(__clang__) #pragma clang diagnostic pop #endif compute-runtime-20.13.16352/level_zero/core/test/unit_tests/mock.h000066400000000000000000000003771363734646600247440ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "gmock/gmock.h" namespace L0 { namespace ult { template struct Mock : public Type {}; } // namespace ult } // namespace L0 compute-runtime-20.13.16352/level_zero/core/test/unit_tests/mocks/000077500000000000000000000000001363734646600247475ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/test/unit_tests/mocks/CMakeLists.txt000066400000000000000000000024541363734646600275140ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(TARGET_NAME ${TARGET_NAME_L0}_mocks) set(L0_MOCKS_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/mock_built_ins.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_built_ins.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_cmdqueue.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_cmdqueue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_device.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_device.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_driver.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_driver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_memory_manager.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_memory_manager.cpp ) add_library(${TARGET_NAME} OBJECT ${L0_MOCKS_SOURCES}) target_include_directories(${TARGET_NAME} PRIVATE $ $ $ ${NEO_SOURCE_DIR}/level_zero/core/test/unit_test ) target_compile_definitions(${TARGET_NAME} PRIVATE $) set_target_properties(${TARGET_NAME} PROPERTIES FOLDER ${TARGET_NAME_L0}) create_source_tree(${TARGET_NAME} ${L0_ROOT_DIR}) compute-runtime-20.13.16352/level_zero/core/test/unit_tests/mocks/mock_built_ins.cpp000066400000000000000000000006511363734646600304560ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" #include "opencl/source/helpers/built_ins_helper.h" namespace L0 { namespace ult { const NEO::SipKernel &MockBuiltins::getSipKernel(NEO::SipKernelType type, NEO::Device &device) { return NEO::initSipKernel(type, device); } } // namespace ult } // namespace L0 compute-runtime-20.13.16352/level_zero/core/test/unit_tests/mocks/mock_built_ins.h000066400000000000000000000005651363734646600301270ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" namespace L0 { namespace ult { class MockBuiltins : public NEO::BuiltIns { public: const NEO::SipKernel &getSipKernel(NEO::SipKernelType type, NEO::Device &device) override; }; } // namespace ult } // namespace L0 compute-runtime-20.13.16352/level_zero/core/test/unit_tests/mocks/mock_cmdqueue.cpp000066400000000000000000000012621363734646600302750ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "mock_cmdqueue.h" #include "shared/source/device/device.h" namespace L0 { namespace ult { WhiteBox<::L0::CommandQueue>::WhiteBox(L0::Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) : ::L0::CommandQueueImp(device, csr, desc) {} WhiteBox<::L0::CommandQueue>::~WhiteBox() {} Mock::Mock(L0::Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) : WhiteBox<::L0::CommandQueue>(device, csr, desc) { this->device = device; } Mock::~Mock() { } } // namespace ult } // namespace L0 compute-runtime-20.13.16352/level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h000066400000000000000000000050551363734646600277460ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/cmdqueue/cmdqueue_hw.h" #include "level_zero/core/source/cmdqueue/cmdqueue_imp.h" #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/core/test/unit_tests/white_box.h" #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Winconsistent-missing-override" #endif namespace L0 { namespace ult { template <> struct WhiteBox<::L0::CommandQueue> : public ::L0::CommandQueueImp { using BaseClass = ::L0::CommandQueueImp; using BaseClass::buffers; using BaseClass::commandStream; using BaseClass::csr; using BaseClass::device; using BaseClass::printfFunctionContainer; using BaseClass::synchronizeByPollingForTaskCount; WhiteBox(Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc); ~WhiteBox() override; }; using CommandQueue = WhiteBox<::L0::CommandQueue>; static ze_command_queue_desc_t default_cmd_queue_desc = {}; template <> struct Mock : public CommandQueue { Mock(L0::Device *device = nullptr, NEO::CommandStreamReceiver *csr = nullptr, const ze_command_queue_desc_t *desc = &default_cmd_queue_desc); ~Mock() override; MOCK_METHOD2(createFence, ze_result_t(const ze_fence_desc_t *desc, ze_fence_handle_t *phFence)); MOCK_METHOD0(destroy, ze_result_t()); MOCK_METHOD4(executeCommandLists, ze_result_t(uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists, ze_fence_handle_t hFence, bool performMigration)); MOCK_METHOD3(executeCommands, ze_result_t(uint32_t numCommands, void *phCommands, ze_fence_handle_t hFence)); MOCK_METHOD1(synchronize, ze_result_t(uint32_t timeout)); MOCK_METHOD2(dispatchTaskCountWrite, void(NEO::LinearStream &commandStream, bool flushDataCache)); }; template struct MockCommandQueueHw : public L0::CommandQueueHw { MockCommandQueueHw(L0::Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) : L0::CommandQueueHw(device, csr, desc) { } ze_result_t synchronize(uint32_t timeout) override { synchronizedCalled++; return ZE_RESULT_SUCCESS; } uint32_t synchronizedCalled = 0; }; } // namespace ult } // namespace L0 #if defined(__clang__) #pragma clang diagnostic pop #endif compute-runtime-20.13.16352/level_zero/core/test/unit_tests/mocks/mock_device.cpp000066400000000000000000000012731363734646600277260ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "mock_device.h" #include "shared/source/device/device.h" #include "gmock/gmock.h" namespace L0 { namespace ult { using ::testing::AnyNumber; using ::testing::Return; Mock::Mock() { ON_CALL(*this, getMOCS(false, false)).WillByDefault(Return(0)); EXPECT_CALL(*this, getMOCS(false, false)).Times(AnyNumber()); ON_CALL(*this, getMOCS(true, false)).WillByDefault(Return(2)); EXPECT_CALL(*this, getMOCS(true, false)).Times(AnyNumber()); EXPECT_CALL(*this, getMaxNumHwThreads).WillRepeatedly(Return(16)); } Mock::~Mock() { } } // namespace ult } // namespace L0 compute-runtime-20.13.16352/level_zero/core/test/unit_tests/mocks/mock_device.h000066400000000000000000000140321363734646600273700ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/preemption_mode.h" #include "shared/source/device/device.h" #include "shared/source/device/device_info.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/core/source/driver/driver_handle.h" #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/core/test/unit_tests/white_box.h" #include "level_zero/tools/source/metrics/metric.h" #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Winconsistent-missing-override" #endif namespace L0 { namespace ult { template <> struct WhiteBox<::L0::Device> : public ::L0::Device {}; using Device = WhiteBox<::L0::Device>; template <> struct Mock : public Device { Mock(); ~Mock() override; MOCK_METHOD0(getRootDeviceIndex, uint32_t()); MOCK_METHOD2(canAccessPeer, ze_result_t(ze_device_handle_t hPeerDevice, ze_bool_t *value)); MOCK_METHOD2(copyCommandList, ze_result_t(ze_command_list_handle_t hCommandList, ze_command_list_handle_t *phCommandList)); MOCK_METHOD2(createCommandList, ze_result_t(const ze_command_list_desc_t *desc, ze_command_list_handle_t *commandList)); MOCK_METHOD2(createCommandListImmediate, ze_result_t(const ze_command_queue_desc_t *desc, ze_command_list_handle_t *commandList)); MOCK_METHOD2(createCommandQueue, ze_result_t(const ze_command_queue_desc_t *desc, ze_command_queue_handle_t *commandQueue)); MOCK_METHOD2(createEventPool, ze_result_t(const ze_event_pool_desc_t *desc, ze_event_pool_handle_t *eventPool)); MOCK_METHOD2(createImage, ze_result_t(const ze_image_desc_t *desc, ze_image_handle_t *phImage)); MOCK_METHOD3(createModule, ze_result_t(const ze_module_desc_t *desc, ze_module_handle_t *module, ze_module_build_log_handle_t *buildLog)); MOCK_METHOD2(createSampler, ze_result_t(const ze_sampler_desc_t *pDesc, ze_sampler_handle_t *phSampler)); MOCK_METHOD1(evictImage, ze_result_t(ze_image_handle_t hImage)); MOCK_METHOD2(evictMemory, ze_result_t(void *ptr, size_t size)); MOCK_METHOD1(getComputeProperties, ze_result_t(ze_device_compute_properties_t *pComputeProperties)); MOCK_METHOD2(getP2PProperties, ze_result_t(ze_device_handle_t hPeerDevice, ze_device_p2p_properties_t *pP2PProperties)); MOCK_METHOD1(getKernelProperties, ze_result_t(ze_device_kernel_properties_t *pKernelProperties)); MOCK_METHOD2(getMemoryProperties, ze_result_t(uint32_t *pCount, ze_device_memory_properties_t *pMemProperties)); MOCK_METHOD1(getMemoryAccessProperties, ze_result_t(ze_device_memory_access_properties_t *pMemAccessProperties)); MOCK_METHOD1(getProperties, ze_result_t(ze_device_properties_t *pDeviceProperties)); MOCK_METHOD2(getSubDevice, ze_result_t(uint32_t ordinal, ze_device_handle_t *phSubDevice)); MOCK_METHOD2(getSubDevices, ze_result_t(uint32_t *pCount, ze_device_handle_t *phSubdevices)); MOCK_METHOD1(makeImageResident, ze_result_t(ze_image_handle_t hImage)); MOCK_METHOD2(makeMemoryResident, ze_result_t(void *ptr, size_t size)); MOCK_METHOD1(setIntermediateCacheConfig, ze_result_t(ze_cache_config_t CacheConfig)); MOCK_METHOD1(setLastLevelCacheConfig, ze_result_t(ze_cache_config_t CacheConfig)); MOCK_METHOD1(getCacheProperties, ze_result_t(ze_device_cache_properties_t *pCacheProperties)); MOCK_METHOD2(imageGetProperties, ze_result_t(const ze_image_desc_t *desc, ze_image_properties_t *pImageProperties)); MOCK_METHOD1(getDeviceImageProperties, ze_result_t(ze_device_image_properties_t *pDeviceImageProperties)); MOCK_METHOD0(systemBarrier, ze_result_t()); MOCK_METHOD3(registerCLMemory, ze_result_t(cl_context context, cl_mem mem, void **ptr)); MOCK_METHOD3(registerCLProgram, ze_result_t(cl_context context, cl_program program, ze_module_handle_t *phModule)); MOCK_METHOD3(registerCLCommandQueue, ze_result_t(cl_context context, cl_command_queue command_queue, ze_command_queue_handle_t *phCommandQueue)); // Runtime internal methods MOCK_METHOD0(getMemoryManager, NEO::MemoryManager *()); MOCK_METHOD0(getExecEnvironment, void *()); MOCK_METHOD0(getHwHelper, NEO::HwHelper &()); MOCK_CONST_METHOD0(isMultiDeviceCapable, bool()); MOCK_METHOD0(getBuiltinFunctionsLib, BuiltinFunctionsLib *()); MOCK_METHOD2(getMOCS, uint32_t(bool l3enabled, bool l1enabled)); MOCK_CONST_METHOD0(getMaxNumHwThreads, uint32_t()); MOCK_METHOD2(activateMetricGroups, ze_result_t(uint32_t count, zet_metric_group_handle_t *phMetricGroups)); MOCK_METHOD0(getOsInterface, NEO::OSInterface &()); MOCK_CONST_METHOD0(getPlatformInfo, uint32_t()); MOCK_METHOD0(getMetricContext, MetricContext &()); MOCK_CONST_METHOD0(getHwInfo, const NEO::HardwareInfo &()); MOCK_METHOD0(getDriverHandle, L0::DriverHandle *()); MOCK_METHOD1(setDriverHandle, void(L0::DriverHandle *)); MOCK_CONST_METHOD0(getDevicePreemptionMode, NEO::PreemptionMode()); MOCK_CONST_METHOD0(getDeviceInfo, const NEO::DeviceInfo &()); MOCK_METHOD0(getNEODevice, NEO::Device *()); MOCK_METHOD0(activateMetricGroups, void()); MOCK_CONST_METHOD0(getDebugSurface, NEO::GraphicsAllocation *()); }; template <> struct Mock : public L0::DeviceImp { using Base = L0::DeviceImp; explicit Mock(NEO::Device *device, NEO::ExecutionEnvironment *execEnv) { device->incRefInternal(); Base::execEnvironment = execEnv; Base::neoDevice = device; } }; } // namespace ult } // namespace L0 #if defined(__clang__) #pragma clang diagnostic pop #endif compute-runtime-20.13.16352/level_zero/core/test/unit_tests/mocks/mock_driver.cpp000066400000000000000000000010131363734646600277520ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "mock_driver.h" namespace L0 { namespace ult { using MockDriver = Mock; using ::testing::Invoke; Mock::Mock() { previousDriver = driver; driver = this; EXPECT_CALL(*this, initialize) .WillRepeatedly(Invoke(this, &MockDriver::mockInitialize)); } Mock::~Mock() { if (driver == this) { driver = previousDriver; } } } // namespace ult } // namespace L0 compute-runtime-20.13.16352/level_zero/core/test/unit_tests/mocks/mock_driver.h000066400000000000000000000020411363734646600274210ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/driver/driver_imp.h" #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/core/test/unit_tests/white_box.h" #include #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Winconsistent-missing-override" #endif namespace L0 { namespace ult { template <> struct WhiteBox<::L0::DriverImp> : public ::L0::DriverImp { }; using Driver = WhiteBox<::L0::DriverImp>; template <> struct Mock : public DriverImp { Mock(); virtual ~Mock(); MOCK_METHOD1(driverInit, ze_result_t(ze_init_flag_t)); MOCK_METHOD1(initialize, void(bool *result)); ze_result_t mockInit(ze_init_flag_t) { return this->DriverImp::driverInit(ZE_INIT_FLAG_NONE); } void mockInitialize(bool *result) { *result = true; } Driver *previousDriver = nullptr; }; } // namespace ult } // namespace L0 #if defined(__clang__) #pragma clang diagnostic pop #endif compute-runtime-20.13.16352/level_zero/core/test/unit_tests/mocks/mock_memory_manager.cpp000066400000000000000000000036031363734646600314700ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "mock_memory_manager.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/string.h" #include namespace L0 { namespace ult { using namespace testing; using ::testing::Return; Mock::Mock(NEO::ExecutionEnvironment &executionEnvironment) : MemoryManagerMock(executionEnvironment) { EXPECT_CALL(*this, allocateGraphicsMemoryInPreferredPool) .WillRepeatedly(Invoke(this, &Mock::doAllocateGraphicsMemoryInPreferredPool)); EXPECT_CALL(*this, freeGraphicsMemory) .WillRepeatedly(Invoke(this, &Mock::doFreeGraphicsMemory)); } inline NEO::GraphicsAllocation *Mock::doAllocateGraphicsMemoryInPreferredPool(const NEO::AllocationProperties &properties, const void *hostPtr) { void *buffer; if (hostPtr != nullptr) { buffer = const_cast(hostPtr); } else { buffer = alignedMalloc(properties.size, MemoryConstants::pageSize); } uint64_t baseAddress = 0; if (properties.allocationType == NEO::GraphicsAllocation::AllocationType::INTERNAL_HEAP) { baseAddress = castToUint64(alignDown(buffer, MemoryConstants::pageSize64k)); } auto allocation = new NEO::GraphicsAllocation(properties.rootDeviceIndex, properties.allocationType, buffer, reinterpret_cast(buffer), baseAddress, properties.size, MemoryPool::System4KBPages); return allocation; } inline void Mock::doFreeGraphicsMemory(NEO::GraphicsAllocation *allocation) { if (allocation == nullptr) { return; } alignedFree(allocation->getUnderlyingBuffer()); delete allocation; } } // namespace ult } // namespace L0 compute-runtime-20.13.16352/level_zero/core/test/unit_tests/mocks/mock_memory_manager.h000066400000000000000000000030231363734646600311310ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "level_zero/core/test/unit_tests/mock.h" #include "level_zero/core/test/unit_tests/white_box.h" #include #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Winconsistent-missing-override" #endif namespace L0 { namespace ult { template <> struct WhiteBox<::NEO::OsAgnosticMemoryManager> : public ::NEO::OsAgnosticMemoryManager { using BaseClass = ::NEO::OsAgnosticMemoryManager; WhiteBox(NEO::ExecutionEnvironment &executionEnvironment) : NEO::OsAgnosticMemoryManager(executionEnvironment) {} }; using MemoryManagerMock = WhiteBox<::NEO::OsAgnosticMemoryManager>; template <> struct Mock : public MemoryManagerMock { Mock(NEO::ExecutionEnvironment &executionEnvironment); MOCK_METHOD2(allocateGraphicsMemoryInPreferredPool, NEO::GraphicsAllocation *(const NEO::AllocationProperties &properties, const void *hostPtr)); MOCK_METHOD1(freeGraphicsMemory, void(NEO::GraphicsAllocation *)); NEO::GraphicsAllocation *doAllocateGraphicsMemoryInPreferredPool(const NEO::AllocationProperties &properties, const void *hostPtr); void doFreeGraphicsMemory(NEO::GraphicsAllocation *allocation); }; } // namespace ult } // namespace L0 #if defined(__clang__) #pragma clang diagnostic pop #endif compute-runtime-20.13.16352/level_zero/core/test/unit_tests/sources/000077500000000000000000000000001363734646600253165ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/test/unit_tests/sources/CMakeLists.txt000066400000000000000000000003641363734646600300610ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/deferred_deleter_test.cpp ) add_subdirectories() compute-runtime-20.13.16352/level_zero/core/test/unit_tests/sources/cmdlist/000077500000000000000000000000001363734646600267555ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/test/unit_tests/sources/cmdlist/CMakeLists.txt000066400000000000000000000003241363734646600315140ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist.cpp )compute-runtime-20.13.16352/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist.cpp000066400000000000000000000045171363734646600321660ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/default_hw_info.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "test.h" #include "level_zero/core/test/unit_tests/mocks/mock_device.h" namespace L0 { namespace ult { struct L0DeviceFixture { void SetUp() { neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get()); device = std::make_unique>(neoDevice, neoDevice->getExecutionEnvironment()); } void TearDown() { } NEO::MockDevice *neoDevice = nullptr; std::unique_ptr> device = nullptr; }; using CommandListCreate = Test; TEST_F(CommandListCreate, whenCommandListIsCreatedThenItIsInitialized) { std::unique_ptr commandList(CommandList::create(productFamily, device.get())); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device.get(), commandList->device); ASSERT_GT(commandList->commandContainer.getCmdBufferAllocations().size(), 0u); auto numAllocations = 0u; auto allocation = whitebox_cast(commandList->commandContainer.getCmdBufferAllocations()[0]); ASSERT_NE(allocation, nullptr); ++numAllocations; ASSERT_NE(nullptr, commandList->commandContainer.getCommandStream()); for (uint32_t i = 0; i < NEO::HeapType::NUM_TYPES; i++) { ASSERT_NE(commandList->commandContainer.getIndirectHeap(static_cast(i)), nullptr); ++numAllocations; ASSERT_NE(commandList->commandContainer.getIndirectHeapAllocation(static_cast(i)), nullptr); } EXPECT_LT(0u, commandList->commandContainer.getCommandStream()->getAvailableSpace()); ASSERT_EQ(commandList->commandContainer.getResidencyContainer().size(), numAllocations); EXPECT_EQ(commandList->commandContainer.getResidencyContainer().front(), allocation); } TEST_F(CommandListCreate, givenRegularCommandListThenDefaultNumIddPerBlockIsUsed) { std::unique_ptr commandList(CommandList::create(productFamily, device.get())); ASSERT_NE(nullptr, commandList); const uint32_t defaultNumIdds = CommandList::defaultNumIddsPerBlock; EXPECT_EQ(defaultNumIdds, commandList->commandContainer.getNumIddPerBlock()); } } // namespace ult } // namespace L0compute-runtime-20.13.16352/level_zero/core/test/unit_tests/sources/debugger/000077500000000000000000000000001363734646600271025ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/core/test/unit_tests/sources/debugger/CMakeLists.txt000066400000000000000000000003251363734646600316420ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_debugger.cpp )compute-runtime-20.13.16352/level_zero/core/test/unit_tests/sources/debugger/test_debugger.cpp000066400000000000000000000115521363734646600324350ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/linear_stream.h" #include "shared/source/gen_common/reg_configs/reg_configs_common.h" #include "shared/source/helpers/preamble.h" #include "shared/test/unit_test/mocks/mock_os_library.h" #include "opencl/test/unit_test/gen_common/gen_cmd_parse.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_source_level_debugger.h" #include "test.h" #include "level_zero/core/source/cmdqueue/cmdqueue_hw.h" #include "level_zero/core/source/fence/fence.h" #include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" #include "level_zero/core/test/unit_tests/mocks/mock_device.h" #include "level_zero/core/test/unit_tests/mocks/mock_driver.h" #include "level_zero/core/test/unit_tests/mocks/mock_memory_manager.h" #include #include "gtest/gtest.h" namespace L0 { namespace ult { struct ActiveDebuggerFixture { void SetUp() { auto executionEnvironment = new NEO::ExecutionEnvironment(); auto mockBuiltIns = new MockBuiltins(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltIns); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); debugger = new MockActiveSourceLevelDebugger(new MockOsLibrary); executionEnvironment->rootDeviceEnvironments[0]->debugger.reset(debugger); executionEnvironment->initializeMemoryManager(); device = NEO::MockDevice::create(executionEnvironment, 0u); device->setDebuggerActive(true); std::vector> devices; devices.push_back(std::unique_ptr(device)); auto driverHandleUlt = whitebox_cast(DriverHandle::create(std::move(devices))); driverHandle.reset(driverHandleUlt); ASSERT_NE(nullptr, driverHandle); ze_device_handle_t hDevice; uint32_t count = 1; ze_result_t result = driverHandle->getDevice(&count, &hDevice); EXPECT_EQ(ZE_RESULT_SUCCESS, result); deviceL0 = L0::Device::fromHandle(hDevice); ASSERT_NE(nullptr, deviceL0); } void TearDown() { } std::unique_ptr> driverHandle; NEO::MockDevice *device = nullptr; L0::Device *deviceL0; MockActiveSourceLevelDebugger *debugger = nullptr; }; using CommandQueueDebugCommandsTest = Test; HWTEST_F(CommandQueueDebugCommandsTest, givenDebuggingEnabledWhenCommandListIsExecutedThenKernelDebugCommandsAreAdded) { ze_command_queue_desc_t queueDesc = {}; auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, deviceL0, device->getDefaultEngine().commandStreamReceiver, &queueDesc)); ASSERT_NE(nullptr, commandQueue->commandStream); auto usedSpaceBefore = commandQueue->commandStream->getUsed(); ze_command_list_handle_t commandLists[] = { CommandList::create(productFamily, deviceL0)->toHandle()}; uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; auto miLoadImm = findAll(cmdList.begin(), cmdList.end()); ASSERT_LE(2u, miLoadImm.size()); MI_LOAD_REGISTER_IMM *miLoad = genCmdCast(*miLoadImm[0]); ASSERT_NE(nullptr, miLoad); EXPECT_EQ(DebugModeRegisterOffset::registerOffset, miLoad->getRegisterOffset()); EXPECT_EQ(DebugModeRegisterOffset::debugEnabledValue, miLoad->getDataDword()); miLoad = genCmdCast(*miLoadImm[1]); ASSERT_NE(nullptr, miLoad); EXPECT_EQ(TdDebugControlRegisterOffset::registerOffset, miLoad->getRegisterOffset()); EXPECT_EQ(TdDebugControlRegisterOffset::debugEnabledValue, miLoad->getDataDword()); for (auto i = 0u; i < numCommandLists; i++) { auto commandList = CommandList::fromHandle(commandLists[i]); commandList->destroy(); } commandQueue->destroy(); } using DeviceWithDebuggerEnabledTest = Test; TEST_F(DeviceWithDebuggerEnabledTest, givenDebuggingEnabledWhenDeviceIsCreatedThenItHasDebugSurfaceCreated) { EXPECT_NE(nullptr, deviceL0->getDebugSurface()); } } // namespace ult } // namespace L0 compute-runtime-20.13.16352/level_zero/core/test/unit_tests/sources/deferred_deleter_test.cpp000066400000000000000000000005401363734646600323440ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/deferred_deleter_helper.h" #include "gtest/gtest.h" using namespace NEO; TEST(DeferredDeleterHelper, GivenDefferedDeleterHelperWhenCheckIFDeferrDeleterIsEnabledThenFalseIsReturned) { EXPECT_FALSE(isDeferredDeleterEnabled()); } compute-runtime-20.13.16352/level_zero/core/test/unit_tests/test_mode.h000066400000000000000000000003711363734646600257700ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/unit_test/tests_configuration.h" namespace NEO { constexpr TestMode defaultTestMode = TestMode::UnitTests; } // namespace NEO compute-runtime-20.13.16352/level_zero/core/test/unit_tests/ult_configuration.cpp000066400000000000000000000002661363734646600300760ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test_mode.h" namespace NEO { TestMode testMode = defaultTestMode; } // namespace NEO compute-runtime-20.13.16352/level_zero/core/test/unit_tests/white_box.h000066400000000000000000000011021363734646600257660ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once namespace L0 { namespace ult { template struct WhiteBox : public Type {}; template WhiteBox *whitebox_cast(Type *obj) { return static_cast *>(obj); } template WhiteBox &whitebox_cast(Type &obj) { return static_cast &>(obj); } template Type *blackbox_cast(WhiteBox *obj) { return static_cast(obj); } } // namespace ult } // namespace L0 compute-runtime-20.13.16352/level_zero/ddi/000077500000000000000000000000001363734646600202635ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/ddi/ze_ddi_tables.h000066400000000000000000000011151363734646600232220ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include #include extern "C" { typedef struct _ze_gpu_driver_dditable_t { ze_dditable_t ddiTable; ze_dditable_t core_ddiTable; ze_dditable_t tracing_ddiTable; zet_dditable_t tools_ddiTable; ze_api_version_t version = ZE_API_VERSION_1_0; bool enableTracing; void *driverLibrary; } ze_gpu_driver_dditable_t; extern ze_gpu_driver_dditable_t driver_ddiTable; } // extern "C" compute-runtime-20.13.16352/level_zero/doc/000077500000000000000000000000001363734646600202705ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/doc/BUILD.md000066400000000000000000000030441363734646600214520ustar00rootroot00000000000000# Building Level Zero These instructions have been tested on Ubuntu* and complement those existing for NEO in the top-level BUILD.md file. 1. Install Level Zero loader and Level Zero headers Build Level Zero loader, as indicated in [https://github.com/oneapi-src/level-zero](https://github.com/oneapi-src/level-zero). Build will generate ze_loader library and symlinks, as well as those for ze_validation_layer. Optionally, two packages can be built: binary and devel. 2. Build Level Zero driver Follow instructions in top-level BUILD.md file to build NEO. Level Zero is built by default. When built, ze_intel_gpu library and symlinks are generated. Optionally, you may install Level Zero loader and driver packages. 3. Build your application Compilation needs to include the Level Zero headers and to link against the loader library: ```shell g++ zello_world.cpp -o zello_world -lze_loader ``` If libraries not installed in system paths, include Level Zero headers and path to Level Zero loader: ```shell g++ -I zello_world.cpp -o zello_world -L -lze_loader ``` 4. Execute your application If Level Zero loader packages have been built and installed in the system, then they will be present in system paths: ```shell ./zello_world ``` If libraries not installed in system paths, add paths to ze_loader and ze_intel_gpu libraries: ```shell LD_LIBRARY_PATH=: ./zello_world ``` ___(*) Other names and brands may be claimed as property of others.___compute-runtime-20.13.16352/level_zero/experimental/000077500000000000000000000000001363734646600222205ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/experimental/source/000077500000000000000000000000001363734646600235205ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/experimental/source/CMakeLists.txt000066400000000000000000000010031363734646600262520ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_EXPERIMENTAL_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/experimental.cpp ) target_include_directories(${L0_STATIC_LIB_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/ ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_EXPERIMENTAL_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_EXPERIMENTAL_SOURCES ${L0_EXPERIMENTAL_SOURCES}) compute-runtime-20.13.16352/level_zero/experimental/source/experimental.cpp000066400000000000000000000001741363734646600267230ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ namespace L0 { } // namespace L0 compute-runtime-20.13.16352/level_zero/os_release_info.cmake000066400000000000000000000135061363734646600236660ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(NOT DEFINED _os_release_info) set(_os_release_info TRUE) # os_release_info.cmake - Function to dump OS name and version # This file has no dependencies on other files (e.g., functions or definitions) # of the local cmake environment. # Set cmake policies for at least this level: cmake_minimum_required(VERSION 3.2.0) # Function get_os_release_info - Determine and return OS name and version # # Args: # 1. the name of a variable to receive os_name # 2. the name of a variable to receive os_version # # Return values: (Quotation marks are always stripped). # Upon failure, return values are null strings. # # Examples: # os_name os_version # -------------- ------- # clear-linux-os 21180 (Changes twice daily) # ubuntu 12.04 16.04 17.10 18.04 # fedora 27 # centos 6.9 7.4.1708 # # Potential sources are tried (in order of preference) until a # suitable one is found. # Implementation documentation: # # The potential sources, in order, are as follows. # - /etc/centos-release # Centos 7 also has /etc/os-release. File /etc/os-release is less # precise about the Centos version (e.g., "7" instead of "7.4.1708"). # For that reason, this file is checked first. # Examples: # CentOS release 6.9 (Final) # CentOS Linux release 7.4.1708 (Core) # - /usr/lib/os-release # Present for Clear Linux, modern Fedora, and Ubuntu since some time # between 14.04 and 16.04. The ID and VERSION_ID values are used. # Examples: # ID=clear-linux-os VERSION_ID=21180 # ID=fedora VERSION_ID=27 # ID=ubuntu VERSION_ID="14.04" # ID=ubuntu VERSION_ID="16.04" # ID="ubuntu" VERSION_ID="17.10" # - /etc/os-release - Same form as (sometimes a link to) /usr/lib/os-release # ID="Ubuntu" VERSION_ID="12.04" # ID="Ubuntu" VERSION_ID="14.04" # with a symbolic link: /etc/os-release -> ../usr/lib/os-release # ID="CentOS Linux" VERSION_ID="7" Also: ID_LIKE="rhel fedora" # - /etc/lsb-release # For Centos, not too meaningful. # Other "OS"s are more reasonable: # DISTRIB_ID=Ubuntu DISTRIB_RELEASE=12.04 # DISTRIB_ID=Ubuntu DISTRIB_RELEASE=14.04 # DISTRIB_ID=Ubuntu DISTRIB_RELEASE=17.10 function(get_os_release_info _vn_id _vn_version_id _vn_codename) set(_var_id "") set(_var_version_id "") set(_var_codename "") if("${_var_id}" STREQUAL "") set(file_path "/etc/centos-release") if(EXISTS "${file_path}") # Example: CentOS release 6.9 (Final) file(STRINGS "${file_path}" file_list LIMIT_COUNT 1) list(GET file_list 0 file_line) # Remove all parenthesized items. string(REGEX REPLACE "\\([^)]+\\)" "" file_line "${file_line}") # Extract start and end, discard optional "version" or "release" string(REGEX MATCH "^([A-Za-z0-9_]+)( +(version|release))? +(.*)$" _dummy "${file_line}") # 1 2 3 4 set(_var_id "${CMAKE_MATCH_1}") set(_var_version_id "${CMAKE_MATCH_4}") endif() endif() if("${_var_id}" STREQUAL "") if(EXISTS "/usr/lib/os-release") set(file_path "/usr/lib/os-release") elseif(EXISTS "/etc/os-release") set(file_path "/etc/os-release") else() set(file_path "") endif() if(NOT "${file_path}" STREQUAL "") file(STRINGS "${file_path}" data_list REGEX "^(ID|VERSION_ID|VERSION_CODENAME)=") # Look for lines like "ID="..." and VERSION_ID="..." foreach(_var ${data_list}) if("${_var}" MATCHES "^(ID)=(.*)$") set(_var_id "${CMAKE_MATCH_2}") elseif("${_var}" MATCHES "^(VERSION_ID)=(.*)$") set(_var_version_id "${CMAKE_MATCH_2}") elseif("${_var}" MATCHES "^(VERSION_CODENAME)=(.*)$") set(_var_codename "${CMAKE_MATCH_2}") endif() endforeach() endif() endif() if("${_var_id}" STREQUAL "") set(file_path "/etc/lsb-release") if(EXISTS "${file_path}") file(STRINGS "${file_path}" data_list REGEX "^(DISTRIB_ID|DISTRIB_RELEASE|DISTRIB_CODENAME)=") # Look for lines like "DISTRIB_ID="..." and DISTRIB_RELEASE="..." foreach(_var ${data_list}) if("${_var}" MATCHES "^(DISTRIB_ID)=(.*)$") set(_var_id "${CMAKE_MATCH_2}") elseif("${_var}" MATCHES "^(DISTRIB_RELEASE)=(.*)$") set(_var_version_id "${CMAKE_MATCH_2}") elseif("${_var}" MATCHES "^(DISTRIB_CODENAME)=(.*)$") set(_var_codename "${CMAKE_MATCH_2}") endif() endforeach() endif() endif() string(TOLOWER "${_var_id}" "_var_id") string(STRIP "${_var_id}" _var_id) string(STRIP "${_var_version_id}" _var_version_id) string(STRIP "${_var_codename}" _var_codename) # Remove any enclosing quotation marks string(REGEX REPLACE "^\"(.*)\"$" "\\1" _var_id "${_var_id}") string(REGEX REPLACE "^\"(.*)\"$" "\\1" _var_version_id "${_var_version_id}") string(REGEX REPLACE "^\"(.*)\"$" "\\1" _var_codename "${_var_codename}") if(NOT "${_vn_id}" STREQUAL "") set(${_vn_id} "${_var_id}" PARENT_SCOPE) endif() if(NOT "${_vn_version_id}" STREQUAL "") set(${_vn_version_id} "${_var_version_id}" PARENT_SCOPE) endif() if(NOT "${_vn_codename}" STREQUAL "") set(${_vn_codename} "${_var_codename}" PARENT_SCOPE) endif() endfunction() endif(NOT DEFINED _os_release_info) compute-runtime-20.13.16352/level_zero/source/000077500000000000000000000000001363734646600210235ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/source/CMakeLists.txt000066400000000000000000000001501363734646600235570ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # add_subdirectories() compute-runtime-20.13.16352/level_zero/source/inc/000077500000000000000000000000001363734646600215745ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/source/inc/ze_intel_gpu.h000066400000000000000000000032211363734646600244270ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include #include /////////////////////////////////////////////////////////////////////////////// #if defined(__linux__) #include #define HMODULE void * #define MAKE_VERSION() L0_PROJECT_VERSION_MAJOR "." L0_PROJECT_VERSION_MINOR #define MAKE_LIBRARY_NAME(NAME, VERSION) "lib" NAME ".so." VERSION #define LOAD_DRIVER_LIBRARY(NAME) dlopen(NAME, RTLD_LAZY | RTLD_LOCAL) #define FREE_DRIVER_LIBRARY(LIB) \ if (LIB) \ dlclose(LIB) #define GET_FUNCTION_PTR(LIB, FUNC_NAME) dlsym(LIB, FUNC_NAME) #elif defined(_WIN32) #include #define MAKE_LIBRARY_NAME(NAME, VERSION) NAME VERSION ".dll" #define LOAD_DRIVER_LIBRARY(NAME) LoadLibraryA(NAME) #define FREE_DRIVER_LIBRARY(LIB) \ if (LIB) \ FreeLibrary(LIB) #define GET_FUNCTION_PTR(LIB, FUNC_NAME) GetProcAddress((HMODULE)LIB, FUNC_NAME) #else #error "Unsupported OS" #endif /////////////////////////////////////////////////////////////////////////////// inline bool getenv_tobool(const char *name) { const char *env = getenv(name); if ((nullptr == env) || (0 == strcmp("0", env))) return false; return (0 == strcmp("1", env)); } #if defined(__linux__) #define LOAD_INTEL_GPU_LIBRARY() LOAD_DRIVER_LIBRARY(MAKE_LIBRARY_NAME("ze_intel_gpu", MAKE_VERSION())) #elif defined(_WIN32) #if _WIN64 #define LOAD_INTEL_GPU_LIBRARY() LOAD_DRIVER_LIBRARY(MAKE_LIBRARY_NAME("ze_intel_gpu", "64")) #else #define LOAD_INTEL_GPU_LIBRARY() LOAD_DRIVER_LIBRARY(MAKE_LIBRARY_NAME("ze_intel_gpu", "32")) #endif #endifcompute-runtime-20.13.16352/level_zero/tools/000077500000000000000000000000001363734646600206635ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/tools/source/000077500000000000000000000000001363734646600221635ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/tools/source/CMakeLists.txt000066400000000000000000000011761363734646600247300ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # add_subdirectory(tracing) add_subdirectory(metrics) add_subdirectory(sysman) add_subdirectory(pin) set(L0_TOOLS_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/tools_init.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tools_init.h ${CMAKE_CURRENT_SOURCE_DIR}/tools_init_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tools_init_imp.h ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_TOOLS_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_TOOLS_SOURCES ${L0_TOOLS_SOURCES}) compute-runtime-20.13.16352/level_zero/tools/source/metrics/000077500000000000000000000000001363734646600236315ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/tools/source/metrics/CMakeLists.txt000066400000000000000000000015161363734646600263740ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_METRICS ) if(HAVE_INSTRUMENTATION) list(APPEND L0_SRCS_TOOLS_METRICS ${CMAKE_CURRENT_SOURCE_DIR}/metric.cpp ${CMAKE_CURRENT_SOURCE_DIR}/metric_enumeration_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/metric_tracer_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/metric_query_imp.cpp) else() list(APPEND L0_SRCS_TOOLS_METRICS ${CMAKE_CURRENT_SOURCE_DIR}/metric_stubs.cpp) endif() if(UNIX) add_subdirectory(linux) else() add_subdirectory(windows) endif() target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_METRICS} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_METRICS ${L0_SRCS_TOOLS_METRICS}) compute-runtime-20.13.16352/level_zero/tools/source/metrics/linux/000077500000000000000000000000001363734646600247705ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/tools/source/metrics/linux/CMakeLists.txt000066400000000000000000000010711363734646600275270ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(HAVE_INSTRUMENTATION) set(L0_SRCS_TOOLS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_metric_query_imp_linux.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_metric_enumeration_imp_linux.cpp) if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_LINUX} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_LINUX ${L0_SRCS_TOOLS_LINUX}) endif() os_metric_enumeration_imp_linux.cpp000066400000000000000000000004271363734646600340760ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/tools/source/metrics/linux/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/metrics/metric_enumeration_imp.h" namespace L0 { const char *MetricEnumeration::getMetricsDiscoveryFilename() { return "libmd.so"; } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/metrics/linux/os_metric_query_imp_linux.cpp000066400000000000000000000026551363734646600330010ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/core/source/device/device.h" #include "level_zero/tools/source/metrics/metric_query_imp.h" using namespace MetricsLibraryApi; namespace L0 { const char *MetricsLibrary::getFilename() { return "libigdml64.so"; } bool MetricsLibrary::getContextData(Device &device, ContextCreateData_1_0 &contextData) { return true; } bool MetricsLibrary::activateConfiguration(const ConfigurationHandle_1_0 configurationHandle) { ConfigurationActivateData_1_0 activateData = {}; activateData.Type = GpuConfigurationActivationType::Tbs; const bool validMetricsLibrary = isInitialized(); const bool validConfiguration = configurationHandle.IsValid(); const bool result = validMetricsLibrary && validConfiguration && (api.ConfigurationActivate(configurationHandle, &activateData) == StatusCode::Success); DEBUG_BREAK_IF(!result); return result; } bool MetricsLibrary::deactivateConfiguration(const ConfigurationHandle_1_0 configurationHandle) { const bool validMetricsLibrary = isInitialized(); const bool validConfiguration = configurationHandle.IsValid(); const bool result = validMetricsLibrary && validConfiguration && (api.ConfigurationDeactivate(configurationHandle) == StatusCode::Success); DEBUG_BREAK_IF(!result); return result; } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/metrics/metric.cpp000066400000000000000000000310061363734646600256200ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/metrics/metric.h" #include "shared/source/os_interface/os_library.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/driver/driver.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/source/inc/ze_intel_gpu.h" #include "level_zero/tools/source/metrics/metric_enumeration_imp.h" #include "level_zero/tools/source/metrics/metric_query_imp.h" #include "instrumentation.h" #include #include namespace L0 { struct MetricGroupDomains { public: MetricGroupDomains(MetricContext &metricContext); ze_result_t activateDeferred(const uint32_t count, zet_metric_group_handle_t *phMetricGroups); ze_result_t activate(); ze_result_t deactivate(); bool isActivated(const zet_metric_group_handle_t hMetricGroup); protected: bool activateMetricGroupDeffered(const zet_metric_group_handle_t hMetricGroup); bool activateEventMetricGroup(const zet_metric_group_handle_t hMetricGroup); protected: MetricsLibrary &metricsLibrary; // Map holds activated domains and associated metric groups. // Content: std::map> domains; }; struct MetricContextImp : public MetricContext { public: MetricContextImp(Device &device); ~MetricContextImp() override; bool loadDependencies() override; bool isInitialized() override; void setInitializationState(const ze_result_t state) override; Device &getDevice() override; MetricsLibrary &getMetricsLibrary() override; MetricEnumeration &getMetricEnumeration() override; MetricTracer *getMetricTracer() override; void setMetricTracer(MetricTracer *pMetricTracer) override; void setMetricsLibrary(MetricsLibrary &metricsLibrary) override; void setMetricEnumeration(MetricEnumeration &metricEnumeration) override; ze_result_t activateMetricGroups() override; ze_result_t activateMetricGroupsDeferred(const uint32_t count, zet_metric_group_handle_t *phMetricGroups) override; bool isMetricGroupActivated(const zet_metric_group_handle_t hMetricGroup) override; void setUseCompute(const bool useCompute) override; bool isComputeUsed() override; protected: ze_result_t initializationState = ZE_RESULT_ERROR_UNINITIALIZED; struct Device &device; std::unique_ptr metricEnumeration = nullptr; std::unique_ptr metricsLibrary = nullptr; MetricGroupDomains metricGroupDomains; MetricTracer *pMetricTracer = nullptr; bool useCompute = false; }; MetricContextImp::MetricContextImp(Device &deviceInput) : device(deviceInput), metricEnumeration(std::unique_ptr(new (std::nothrow) MetricEnumeration(*this))), metricsLibrary(std::unique_ptr(new (std::nothrow) MetricsLibrary(*this))), metricGroupDomains(*this) { } MetricContextImp::~MetricContextImp() { metricsLibrary.reset(); metricEnumeration.reset(); } bool MetricContextImp::loadDependencies() { bool result = true; if (metricEnumeration->loadMetricsDiscovery() != ZE_RESULT_SUCCESS) { result = false; DEBUG_BREAK_IF(!result); } if (result && !metricsLibrary->load()) { result = false; DEBUG_BREAK_IF(!result); } if (result) { setInitializationState(ZE_RESULT_SUCCESS); } return result; } bool MetricContextImp::isInitialized() { return initializationState == ZE_RESULT_SUCCESS; } void MetricContextImp::setInitializationState(const ze_result_t state) { initializationState = state; } Device &MetricContextImp::getDevice() { return device; } MetricsLibrary &MetricContextImp::getMetricsLibrary() { return *metricsLibrary; } MetricEnumeration &MetricContextImp::getMetricEnumeration() { return *metricEnumeration; } MetricTracer *MetricContextImp::getMetricTracer() { return pMetricTracer; } void MetricContextImp::setMetricTracer(MetricTracer *pMetricTracer) { this->pMetricTracer = pMetricTracer; } void MetricContextImp::setMetricsLibrary(MetricsLibrary &metricsLibrary) { this->metricsLibrary.release(); this->metricsLibrary.reset(&metricsLibrary); } void MetricContextImp::setMetricEnumeration(MetricEnumeration &metricEnumeration) { this->metricEnumeration.release(); this->metricEnumeration.reset(&metricEnumeration); } void MetricContextImp::setUseCompute(const bool useCompute) { this->useCompute = useCompute; } bool MetricContextImp::isComputeUsed() { return useCompute; } ze_result_t MetricContextImp::activateMetricGroupsDeferred(const uint32_t count, zet_metric_group_handle_t *phMetricGroups) { // Activation: postpone until zetMetricTracerOpen or zeCommandQueueExecuteCommandLists // Deactivation: execute immediately. return phMetricGroups ? metricGroupDomains.activateDeferred(count, phMetricGroups) : metricGroupDomains.deactivate(); } bool MetricContextImp::isMetricGroupActivated(const zet_metric_group_handle_t hMetricGroup) { return metricGroupDomains.isActivated(hMetricGroup); } ze_result_t MetricContextImp::activateMetricGroups() { return metricGroupDomains.activate(); } void MetricContext::enableMetricApi(ze_result_t &result) { if (!getenv_tobool("ZE_ENABLE_METRICS")) { result = ZE_RESULT_SUCCESS; return; } if (!isMetricApiAvailable()) { result = ZE_RESULT_ERROR_UNKNOWN; return; } DriverHandle *driverHandle = L0::DriverHandle::fromHandle(GlobalDriver.get()); uint32_t count = 0; result = driverHandle->getDevice(&count, nullptr); if (result != ZE_RESULT_SUCCESS) { result = ZE_RESULT_ERROR_UNKNOWN; return; } std::vector devices(count); result = driverHandle->getDevice(&count, devices.data()); if (result != ZE_RESULT_SUCCESS) { result = ZE_RESULT_ERROR_UNKNOWN; return; } for (auto deviceHandle : devices) { Device *device = L0::Device::fromHandle(deviceHandle); if (!device->getMetricContext().loadDependencies()) { result = ZE_RESULT_ERROR_UNKNOWN; return; } } } std::unique_ptr MetricContext::create(Device &device) { auto metricContextImp = new (std::nothrow) MetricContextImp(device); std::unique_ptr metricContext{metricContextImp}; return metricContext; } bool MetricContext::isMetricApiAvailable() { std::unique_ptr library = nullptr; // Check Metrics Discovery availability. library.reset(NEO::OsLibrary::load(MetricEnumeration::getMetricsDiscoveryFilename())); if (library == nullptr) { return false; } // Check Metrics Library availability. library.reset(NEO::OsLibrary::load(MetricsLibrary::getFilename())); if (library == nullptr) { return false; } return true; } MetricGroupDomains::MetricGroupDomains(MetricContext &metricContext) : metricsLibrary(metricContext.getMetricsLibrary()) {} ze_result_t MetricGroupDomains::activateDeferred(const uint32_t count, zet_metric_group_handle_t *phMetricGroups) { // For each metric group: for (uint32_t i = 0; i < count; ++i) { DEBUG_BREAK_IF(!phMetricGroups[i]); // Try to associate it with a domain (oa, ...). if (!activateMetricGroupDeffered(phMetricGroups[i])) { return ZE_RESULT_ERROR_UNKNOWN; } } return ZE_RESULT_SUCCESS; } bool MetricGroupDomains::activateMetricGroupDeffered(const zet_metric_group_handle_t hMetricGroup) { const auto properites = MetricGroup::getProperties(hMetricGroup); const auto domain = properites.domain; const bool isDomainFree = domains[domain].first == nullptr; const bool isSameGroup = domains[domain].first == hMetricGroup; // The same metric group has been already associated. if (isSameGroup) { return true; } // Domain has been already associated with a different metric group. if (!isDomainFree) { return false; } // Associate metric group with domain and mark it as not active. // Activation will be performed during zeCommandQueueExecuteCommandLists (query) // or zetMetricTracerOpen (time based sampling). domains[domain].first = hMetricGroup; domains[domain].second = false; return true; } ze_result_t MetricGroupDomains::activate() { // For each domain. for (auto &domain : domains) { auto hMetricGroup = domain.second.first; bool &metricGroupActive = domain.second.second; bool metricGroupEventBased = hMetricGroup && MetricGroup::getProperties(hMetricGroup).samplingType == ZET_METRIC_GROUP_SAMPLING_TYPE_EVENT_BASED; // Activate only event based metric groups. // Time based metric group will be activated during zetMetricTracerOpen. if (metricGroupEventBased && !metricGroupActive) { metricGroupActive = activateEventMetricGroup(hMetricGroup); if (metricGroupActive == false) { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } } } return ZE_RESULT_SUCCESS; } bool MetricGroupDomains::activateEventMetricGroup(const zet_metric_group_handle_t hMetricGroup) { // Obtain metric group configuration handle from metrics library. auto hConfiguration = metricsLibrary.getConfiguration(hMetricGroup); // Validate metrics library handle. if (!hConfiguration.IsValid()) { DEBUG_BREAK_IF(true); return false; } // Write metric group configuration to gpu. const bool result = metricsLibrary.activateConfiguration(hConfiguration); DEBUG_BREAK_IF(!result); return result; } ze_result_t MetricGroupDomains::deactivate() { // Deactivate metric group for each domain. for (auto &domain : domains) { auto hMetricGroup = domain.second.first; auto metricGroup = MetricGroup::fromHandle(hMetricGroup); bool metricGroupActivated = domain.second.second; auto metricGroupEventBased = (metricGroup != nullptr) ? MetricGroup::getProperties(hMetricGroup).samplingType == ZET_METRIC_GROUP_SAMPLING_TYPE_EVENT_BASED : false; auto hConfigurationEmpty = ConfigurationHandle_1_0{}; auto hConfiguration = metricGroupEventBased ? metricsLibrary.getConfiguration(hMetricGroup) : hConfigurationEmpty; // Deactivate metric group configuration using metrics library. if (hConfiguration.IsValid() && metricGroupActivated) { metricsLibrary.deactivateConfiguration(hConfiguration); } // Mark domain as free. domain.second = {}; } return ZE_RESULT_SUCCESS; } bool MetricGroupDomains::isActivated(const zet_metric_group_handle_t hMetricGroup) { auto metricGroupProperties = MetricGroup::getProperties(hMetricGroup); // 1. Check whether domain is activated. const auto domain = domains.find(metricGroupProperties.domain); if (domain == domains.end()) { return false; } // 2. Check whether the specific MetricGroup is activated. return domain->second.first == hMetricGroup; } ze_result_t metricGroupGet(zet_device_handle_t hDevice, uint32_t *pCount, zet_metric_group_handle_t *phMetricGroups) { auto device = Device::fromHandle(hDevice); return device->getMetricContext().getMetricEnumeration().metricGroupGet(*pCount, phMetricGroups); } ze_result_t metricGet(zet_metric_group_handle_t hMetricGroup, uint32_t *pCount, zet_metric_handle_t *phMetrics) { auto metricGroup = MetricGroup::fromHandle(hMetricGroup); return metricGroup->getMetric(pCount, phMetrics); } ze_result_t metricTracerOpen(zet_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup, zet_metric_tracer_desc_t *pDesc, ze_event_handle_t hNotificationEvent, zet_metric_tracer_handle_t *phMetricTracer) { *phMetricTracer = MetricTracer::open(hDevice, hMetricGroup, *pDesc, hNotificationEvent); return (*phMetricTracer != nullptr) ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN; } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/metrics/metric.h000066400000000000000000000151411363734646600252670ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/core/source/event/event.h" #include #include struct _zet_metric_group_handle_t {}; struct _zet_metric_handle_t {}; struct _zet_metric_tracer_handle_t {}; struct _zet_metric_query_pool_handle_t {}; struct _zet_metric_query_handle_t {}; namespace MetricsDiscovery { class IMetricSet_1_5; class IConcurrentGroup_1_5; } // namespace MetricsDiscovery namespace L0 { struct MetricsLibrary; struct CommandList; struct MetricEnumeration; struct MetricTracer; struct MetricContext { virtual ~MetricContext() = default; static std::unique_ptr create(struct Device &device); static bool isMetricApiAvailable(); virtual bool loadDependencies() = 0; virtual bool isInitialized() = 0; virtual void setInitializationState(const ze_result_t state) = 0; virtual Device &getDevice() = 0; virtual MetricsLibrary &getMetricsLibrary() = 0; virtual MetricEnumeration &getMetricEnumeration() = 0; virtual MetricTracer *getMetricTracer() = 0; virtual void setMetricTracer(MetricTracer *pMetricTracer) = 0; virtual void setMetricsLibrary(MetricsLibrary &metricsLibrary) = 0; virtual void setMetricEnumeration(MetricEnumeration &metricEnumeration) = 0; // Called by zetInit. static void enableMetricApi(ze_result_t &result); // Metric groups activation. virtual ze_result_t activateMetricGroups() = 0; virtual ze_result_t activateMetricGroupsDeferred(const uint32_t count, zet_metric_group_handle_t *phMetricGroups) = 0; virtual bool isMetricGroupActivated(const zet_metric_group_handle_t hMetricGroup) = 0; virtual void setUseCompute(const bool useCompute) = 0; virtual bool isComputeUsed() = 0; }; struct Metric : _zet_metric_handle_t { virtual ~Metric() = default; virtual ze_result_t getProperties(zet_metric_properties_t *pProperties) = 0; static Metric *create(zet_metric_properties_t &properties); static Metric *fromHandle(zet_metric_handle_t handle) { return static_cast(handle); } inline zet_metric_handle_t toHandle() { return this; } }; struct MetricGroup : _zet_metric_group_handle_t { virtual ~MetricGroup() = default; virtual ze_result_t getProperties(zet_metric_group_properties_t *pProperties) = 0; virtual ze_result_t getMetric(uint32_t *pCount, zet_metric_handle_t *phMetrics) = 0; virtual ze_result_t calculateMetricValues(size_t rawDataSize, const uint8_t *pRawData, uint32_t *pMetricValueCount, zet_typed_value_t *pMetricValues) = 0; static MetricGroup *create(zet_metric_group_properties_t &properties, MetricsDiscovery::IMetricSet_1_5 &metricSet, MetricsDiscovery::IConcurrentGroup_1_5 &concurrentGroup, const std::vector &metrics); static MetricGroup *fromHandle(zet_metric_group_handle_t handle) { return static_cast(handle); } static zet_metric_group_properties_t getProperties(const zet_metric_group_handle_t handle); zet_metric_group_handle_t toHandle() { return this; } virtual uint32_t getRawReportSize() = 0; virtual bool activate() = 0; virtual bool deactivate() = 0; virtual ze_result_t openIoStream(uint32_t &timerPeriodNs, uint32_t &oaBufferSize) = 0; virtual ze_result_t waitForReports(const uint32_t timeoutMs) = 0; virtual ze_result_t readIoStream(uint32_t &reportCount, uint8_t &reportData) = 0; virtual ze_result_t closeIoStream() = 0; }; struct MetricTracer : _zet_metric_tracer_handle_t { virtual ~MetricTracer() = default; virtual ze_result_t readData(uint32_t maxReportCount, size_t *pRawDataSize, uint8_t *pRawData) = 0; virtual ze_result_t close() = 0; static MetricTracer *open(zet_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup, zet_metric_tracer_desc_t &desc, ze_event_handle_t hNotificationEvent); static MetricTracer *fromHandle(zet_metric_tracer_handle_t handle) { return static_cast(handle); } virtual Event::State getNotificationState() = 0; inline zet_metric_tracer_handle_t toHandle() { return this; } }; struct MetricQueryPool : _zet_metric_query_pool_handle_t { virtual ~MetricQueryPool() = default; virtual bool destroy() = 0; virtual ze_result_t createMetricQuery(uint32_t index, zet_metric_query_handle_t *phMetricQuery) = 0; static MetricQueryPool *create(zet_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup, const zet_metric_query_pool_desc_t &desc); static MetricQueryPool *fromHandle(zet_metric_query_pool_handle_t handle); zet_metric_query_pool_handle_t toHandle(); }; struct MetricQuery : _zet_metric_query_handle_t { virtual ~MetricQuery() = default; virtual ze_result_t appendBegin(CommandList &commandList) = 0; virtual ze_result_t appendEnd(CommandList &commandList, ze_event_handle_t hCompletionEvent) = 0; static ze_result_t appendMemoryBarrier(CommandList &commandList); static ze_result_t appendTracerMarker(CommandList &commandList, zet_metric_tracer_handle_t hMetricTracer, uint32_t value); virtual ze_result_t getData(size_t *pRawDataSize, uint8_t *pRawData) = 0; virtual ze_result_t reset() = 0; virtual ze_result_t destroy() = 0; static MetricQuery *fromHandle(zet_metric_query_handle_t handle); zet_metric_query_handle_t toHandle(); }; // MetricGroup. ze_result_t metricGroupGet(zet_device_handle_t hDevice, uint32_t *pCount, zet_metric_group_handle_t *phMetricGroups); // Metric. ze_result_t metricGet(zet_metric_group_handle_t hMetricGroup, uint32_t *pCount, zet_metric_handle_t *phMetrics); // MetricTracer. ze_result_t metricTracerOpen(zet_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup, zet_metric_tracer_desc_t *pDesc, ze_event_handle_t hNotificationEvent, zet_metric_tracer_handle_t *phMetricTracer); // MetricQueryPool. ze_result_t metricQueryPoolCreate(zet_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup, const zet_metric_query_pool_desc_t *pDesc, zet_metric_query_pool_handle_t *phMetricQueryPool); ze_result_t metricQueryPoolDestroy(zet_metric_query_pool_handle_t hMetricQueryPool); } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/metrics/metric_enumeration_imp.cpp000066400000000000000000000610021363734646600310720ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/metrics/metric_enumeration_imp.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/string.h" #include "shared/source/os_interface/os_library.h" #include namespace L0 { const char *MetricEnumeration::oaConcurrentGroupName = "OA"; MetricEnumeration::MetricEnumeration(MetricContext &metricContextInput) : metricContext(metricContextInput) {} MetricEnumeration::~MetricEnumeration() { cleanupMetricsDiscovery(); initializationState = ZE_RESULT_ERROR_UNINITIALIZED; } ze_result_t MetricEnumeration::metricGroupGet(uint32_t &count, zet_metric_group_handle_t *phMetricGroups) { if (initialize() != ZE_RESULT_SUCCESS) { return ZE_RESULT_ERROR_UNKNOWN; } if (count == 0) { count = static_cast(metricGroups.size()); return ZE_RESULT_SUCCESS; } else if (count > metricGroups.size()) { count = static_cast(metricGroups.size()); } // User is expected to allocate space. if (phMetricGroups == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } for (uint32_t i = 0; i < count; i++) { phMetricGroups[i] = metricGroups[i]->toHandle(); } return ZE_RESULT_SUCCESS; } bool MetricEnumeration::isInitialized() { return initializationState == ZE_RESULT_SUCCESS; } ze_result_t MetricEnumeration::initialize() { if (initializationState == ZE_RESULT_ERROR_UNINITIALIZED) { if (metricContext.isInitialized() && openMetricsDiscovery() == ZE_RESULT_SUCCESS && cacheMetricInformation() == ZE_RESULT_SUCCESS) { initializationState = ZE_RESULT_SUCCESS; } else { initializationState = ZE_RESULT_ERROR_UNKNOWN; cleanupMetricsDiscovery(); } } return initializationState; } ze_result_t MetricEnumeration::loadMetricsDiscovery() { // Load library. hMetricsDiscovery.reset(NEO::OsLibrary::load(getMetricsDiscoveryFilename())); // Load exported functions. if (hMetricsDiscovery) { openMetricsDevice = reinterpret_cast( hMetricsDiscovery->getProcAddress("OpenMetricsDevice")); closeMetricsDevice = reinterpret_cast( hMetricsDiscovery->getProcAddress("CloseMetricsDevice")); openMetricsDeviceFromFile = reinterpret_cast( hMetricsDiscovery->getProcAddress("OpenMetricsDeviceFromFile")); } if (openMetricsDevice == nullptr || closeMetricsDevice == nullptr || openMetricsDeviceFromFile == nullptr) { cleanupMetricsDiscovery(); return ZE_RESULT_ERROR_UNKNOWN; } // Return success if exported functions have been loaded. return ZE_RESULT_SUCCESS; } ze_result_t MetricEnumeration::openMetricsDiscovery() { UNRECOVERABLE_IF(openMetricsDevice == nullptr); UNRECOVERABLE_IF(closeMetricsDevice == nullptr); auto openResult = openMetricsDevice(&pMetricsDevice); if (openResult != MetricsDiscovery::CC_OK) { return ZE_RESULT_ERROR_UNKNOWN; } return ZE_RESULT_SUCCESS; } ze_result_t MetricEnumeration::cleanupMetricsDiscovery() { for (uint32_t i = 0; i < metricGroups.size(); ++i) { delete metricGroups[i]; } metricGroups.clear(); if (pMetricsDevice) { closeMetricsDevice(pMetricsDevice); pMetricsDevice = nullptr; } if (hMetricsDiscovery != nullptr) { openMetricsDevice = nullptr; closeMetricsDevice = nullptr; openMetricsDeviceFromFile = nullptr; hMetricsDiscovery.reset(); } return ZE_RESULT_SUCCESS; } ze_result_t MetricEnumeration::cacheMetricInformation() { DEBUG_BREAK_IF(pMetricsDevice == nullptr); MetricsDiscovery::TMetricsDeviceParams_1_2 *pMetricsDeviceParams = pMetricsDevice->GetParams(); DEBUG_BREAK_IF(pMetricsDeviceParams == nullptr); // Check required Metrics Discovery API version - should be at least 1.5. const bool unsupportedMajorVersion = pMetricsDeviceParams->Version.MajorNumber < requiredMetricsDiscoveryMajorVersion; const bool unsupportedMinorVersion = (pMetricsDeviceParams->Version.MajorNumber == requiredMetricsDiscoveryMajorVersion) && (pMetricsDeviceParams->Version.MinorNumber < requiredMetricsDiscoveryMinorVersion); if (unsupportedMajorVersion || unsupportedMinorVersion) { // Metrics Discovery API version too low return ZE_RESULT_ERROR_UNKNOWN; } // 1. Iterate over concurrent groups. MetricsDiscovery::IConcurrentGroup_1_5 *pConcurrentGroup = nullptr; for (uint32_t i = 0; i < pMetricsDeviceParams->ConcurrentGroupsCount; ++i) { pConcurrentGroup = pMetricsDevice->GetConcurrentGroup(i); DEBUG_BREAK_IF(pConcurrentGroup == nullptr); MetricsDiscovery::TConcurrentGroupParams_1_0 *pConcurrentGroupParams = pConcurrentGroup->GetParams(); DEBUG_BREAK_IF(pConcurrentGroupParams == nullptr); // 2. Find "OA" concurrent group. if (strcmp(pConcurrentGroupParams->SymbolName, oaConcurrentGroupName) == 0) { // Reserve memory for metric groups metricGroups.reserve(pConcurrentGroupParams->MetricSetsCount); // 3. Iterate over metric sets. for (uint32_t j = 0; j < pConcurrentGroupParams->MetricSetsCount; ++j) { MetricsDiscovery::IMetricSet_1_5 *pMetricSet = pConcurrentGroup->GetMetricSet(j); DEBUG_BREAK_IF(pMetricSet == nullptr); cacheMetricGroup(*pMetricSet, *pConcurrentGroup, i, ZET_METRIC_GROUP_SAMPLING_TYPE_TIME_BASED); cacheMetricGroup(*pMetricSet, *pConcurrentGroup, i, ZET_METRIC_GROUP_SAMPLING_TYPE_EVENT_BASED); } } } return ZE_RESULT_SUCCESS; } ze_result_t MetricEnumeration::cacheMetricGroup(MetricsDiscovery::IMetricSet_1_5 &metricSet, MetricsDiscovery::IConcurrentGroup_1_5 &concurrentGroup, const uint32_t domain, const zet_metric_group_sampling_type_t samplingType) { MetricsDiscovery::TMetricSetParams_1_4 *pMetricSetParams = metricSet.GetParams(); DEBUG_BREAK_IF(pMetricSetParams == nullptr); const uint32_t sourceApiMask = MetricGroupImp::getApiMask(samplingType); // Map metric groups to level zero format and cache them. if (pMetricSetParams->ApiMask & sourceApiMask) { metricSet.SetApiFiltering(sourceApiMask); // Obtain params once again - updated after SetApiFiltering pMetricSetParams = metricSet.GetParams(); zet_metric_group_properties_t properties = {}; properties.version = ZET_METRIC_GROUP_PROPERTIES_VERSION_CURRENT; snprintf(properties.name, sizeof(properties.name), "%s", pMetricSetParams->SymbolName); // To always have null-terminated string snprintf(properties.description, sizeof(properties.description), "%s", pMetricSetParams->ShortName); properties.samplingType = samplingType; properties.domain = domain; // Concurrent group number properties.metricCount = pMetricSetParams->MetricsCount + pMetricSetParams->InformationCount; std::vector metrics; createMetrics(metricSet, metrics); auto pMetricGroup = MetricGroup::create(properties, metricSet, concurrentGroup, metrics); DEBUG_BREAK_IF(pMetricGroup == nullptr); metricGroups.push_back(pMetricGroup); // Disable api filtering metricSet.SetApiFiltering(MetricsDiscovery::API_TYPE_ALL); } return ZE_RESULT_SUCCESS; } ze_result_t MetricEnumeration::createMetrics(MetricsDiscovery::IMetricSet_1_5 &metricSet, std::vector &metrics) { MetricsDiscovery::TMetricSetParams_1_4 *pMetricSetParams = metricSet.GetParams(); DEBUG_BREAK_IF(pMetricSetParams == nullptr); metrics.reserve(pMetricSetParams->MetricsCount + pMetricSetParams->InformationCount); // Map metrics to level zero format and add them to 'metrics' vector. for (uint32_t i = 0; i < pMetricSetParams->MetricsCount; ++i) { MetricsDiscovery::IMetric_1_0 *pSourceMetric = metricSet.GetMetric(i); DEBUG_BREAK_IF(pSourceMetric == nullptr); MetricsDiscovery::TMetricParams_1_0 *pSourceMetricParams = pSourceMetric->GetParams(); DEBUG_BREAK_IF(pSourceMetricParams == nullptr); zet_metric_properties_t properties = {}; properties.version = ZET_METRIC_PROPERTIES_VERSION_CURRENT; snprintf(properties.name, sizeof(properties.name), "%s", pSourceMetricParams->SymbolName); // To always have a null-terminated string snprintf(properties.description, sizeof(properties.description), "%s", pSourceMetricParams->LongName); snprintf(properties.component, sizeof(properties.component), "%s", pSourceMetricParams->GroupName); snprintf(properties.resultUnits, sizeof(properties.resultUnits), "%s", pSourceMetricParams->MetricResultUnits); properties.tierNumber = getMetricTierNumber(pSourceMetricParams->UsageFlagsMask); properties.metricType = getMetricType(pSourceMetricParams->MetricType); properties.resultType = getMetricResultType(pSourceMetricParams->ResultType); auto pMetric = Metric::create(properties); UNRECOVERABLE_IF(pMetric == nullptr); metrics.push_back(pMetric); } // Map information to level zero format and add them to 'metrics' vector (as metrics). for (uint32_t i = 0; i < pMetricSetParams->InformationCount; ++i) { MetricsDiscovery::IInformation_1_0 *pSourceInformation = metricSet.GetInformation(i); DEBUG_BREAK_IF(pSourceInformation == nullptr); MetricsDiscovery::TInformationParams_1_0 *pSourceInformationParams = pSourceInformation->GetParams(); DEBUG_BREAK_IF(pSourceInformationParams == nullptr); zet_metric_properties_t properties; properties.version = ZET_METRIC_PROPERTIES_VERSION_CURRENT; snprintf(properties.name, sizeof(properties.name), "%s", pSourceInformationParams->SymbolName); // To always have a null-terminated string snprintf(properties.description, sizeof(properties.description), "%s", pSourceInformationParams->LongName); snprintf(properties.component, sizeof(properties.component), "%s", pSourceInformationParams->GroupName); snprintf(properties.resultUnits, sizeof(properties.resultUnits), "%s", pSourceInformationParams->InfoUnits); properties.tierNumber = 1; properties.metricType = getMetricType(pSourceInformationParams->InfoType); // MetricsDiscovery information are always UINT64 properties.resultType = ZET_VALUE_TYPE_UINT64; auto pMetric = Metric::create(properties); UNRECOVERABLE_IF(pMetric == nullptr); metrics.push_back(pMetric); } return ZE_RESULT_SUCCESS; } uint32_t MetricEnumeration::getMetricTierNumber(const uint32_t sourceUsageFlagsMask) const { uint32_t tierNumber = 0; if (sourceUsageFlagsMask & MetricsDiscovery::USAGE_FLAG_TIER_1) { tierNumber = 1; } else if (sourceUsageFlagsMask & MetricsDiscovery::USAGE_FLAG_TIER_2) { tierNumber = 2; } else if (sourceUsageFlagsMask & MetricsDiscovery::USAGE_FLAG_TIER_3) { tierNumber = 3; } else if (sourceUsageFlagsMask & MetricsDiscovery::USAGE_FLAG_TIER_4) { tierNumber = 4; } else { // No tier - some metrics may have this undefined tierNumber = 0; } return tierNumber; } zet_metric_type_t MetricEnumeration::getMetricType(const MetricsDiscovery::TMetricType sourceMetricType) const { switch (sourceMetricType) { case MetricsDiscovery::METRIC_TYPE_DURATION: return ZET_METRIC_TYPE_DURATION; case MetricsDiscovery::METRIC_TYPE_EVENT: return ZET_METRIC_TYPE_EVENT; case MetricsDiscovery::METRIC_TYPE_EVENT_WITH_RANGE: return ZET_METRIC_TYPE_EVENT_WITH_RANGE; case MetricsDiscovery::METRIC_TYPE_THROUGHPUT: return ZET_METRIC_TYPE_THROUGHPUT; case MetricsDiscovery::METRIC_TYPE_TIMESTAMP: return ZET_METRIC_TYPE_TIMESTAMP; case MetricsDiscovery::METRIC_TYPE_FLAG: return ZET_METRIC_TYPE_FLAG; case MetricsDiscovery::METRIC_TYPE_RATIO: return ZET_METRIC_TYPE_RATIO; case MetricsDiscovery::METRIC_TYPE_RAW: return ZET_METRIC_TYPE_RAW; default: DEBUG_BREAK_IF(!false); return ZET_METRIC_TYPE_RAW; } } zet_metric_type_t MetricEnumeration::getMetricType( const MetricsDiscovery::TInformationType sourceInformationType) const { switch (sourceInformationType) { case MetricsDiscovery::INFORMATION_TYPE_REPORT_REASON: return ZET_METRIC_TYPE_EVENT; case MetricsDiscovery::INFORMATION_TYPE_VALUE: case MetricsDiscovery::INFORMATION_TYPE_CONTEXT_ID_TAG: case MetricsDiscovery::INFORMATION_TYPE_SAMPLE_PHASE: case MetricsDiscovery::INFORMATION_TYPE_GPU_NODE: return ZET_METRIC_TYPE_RAW; case MetricsDiscovery::INFORMATION_TYPE_FLAG: return ZET_METRIC_TYPE_FLAG; case MetricsDiscovery::INFORMATION_TYPE_TIMESTAMP: return ZET_METRIC_TYPE_TIMESTAMP; default: DEBUG_BREAK_IF(!false); return ZET_METRIC_TYPE_RAW; } } zet_value_type_t MetricEnumeration::getMetricResultType( const MetricsDiscovery::TMetricResultType sourceMetricResultType) const { switch (sourceMetricResultType) { case MetricsDiscovery::RESULT_UINT32: return ZET_VALUE_TYPE_UINT32; case MetricsDiscovery::RESULT_UINT64: return ZET_VALUE_TYPE_UINT64; case MetricsDiscovery::RESULT_BOOL: return ZET_VALUE_TYPE_BOOL8; case MetricsDiscovery::RESULT_FLOAT: return ZET_VALUE_TYPE_FLOAT32; default: DEBUG_BREAK_IF(!false); return ZET_VALUE_TYPE_UINT64; } } MetricGroupImp ::~MetricGroupImp() { for (uint32_t i = 0; i < metrics.size(); ++i) { delete metrics[i]; } metrics.clear(); }; ze_result_t MetricGroupImp::getProperties(zet_metric_group_properties_t *pProperties) { DEBUG_BREAK_IF(pProperties->version > ZET_METRIC_GROUP_PROPERTIES_VERSION_CURRENT); copyProperties(properties, *pProperties); return ZE_RESULT_SUCCESS; } zet_metric_group_properties_t MetricGroup::getProperties(const zet_metric_group_handle_t handle) { auto metricGroup = MetricGroup::fromHandle(handle); UNRECOVERABLE_IF(!metricGroup); zet_metric_group_properties_t properties = {ZET_METRIC_GROUP_PROPERTIES_VERSION_CURRENT}; metricGroup->getProperties(&properties); return properties; } ze_result_t MetricGroupImp::getMetric(uint32_t *pCount, zet_metric_handle_t *phMetrics) { if (*pCount == 0) { *pCount = static_cast(metrics.size()); return ZE_RESULT_SUCCESS; } else if (*pCount > metrics.size()) { *pCount = static_cast(metrics.size()); } // User is expected to allocate space. if (phMetrics == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } for (uint32_t i = 0; i < *pCount; i++) { phMetrics[i] = metrics[i]->toHandle(); } return ZE_RESULT_SUCCESS; } bool MetricGroupImp::activate() { DEBUG_BREAK_IF(pReferenceMetricSet == nullptr); const bool result = pReferenceMetricSet->Activate() == MetricsDiscovery::CC_OK; DEBUG_BREAK_IF(!result); return result; } bool MetricGroupImp::deactivate() { DEBUG_BREAK_IF(pReferenceMetricSet == nullptr); const bool result = pReferenceMetricSet->Deactivate() == MetricsDiscovery::CC_OK; return result; } uint32_t MetricGroupImp::getApiMask(const zet_metric_group_sampling_type_t samplingType) { switch (samplingType) { case ZET_METRIC_GROUP_SAMPLING_TYPE_TIME_BASED: return MetricsDiscovery::API_TYPE_IOSTREAM; case ZET_METRIC_GROUP_SAMPLING_TYPE_EVENT_BASED: return MetricsDiscovery::API_TYPE_OCL | MetricsDiscovery::API_TYPE_OGL4_X; default: DEBUG_BREAK_IF(true); return 0; } } ze_result_t MetricGroupImp::openIoStream(uint32_t &timerPeriodNs, uint32_t &oaBufferSize) { const auto openResult = pReferenceConcurrentGroup->OpenIoStream(pReferenceMetricSet, 0, &timerPeriodNs, &oaBufferSize); return (openResult == MetricsDiscovery::CC_OK) ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN; } ze_result_t MetricGroupImp::waitForReports(const uint32_t timeoutMs) { return (pReferenceConcurrentGroup->WaitForReports(timeoutMs) == MetricsDiscovery::TCompletionCode::CC_OK) ? ZE_RESULT_SUCCESS : ZE_RESULT_NOT_READY; } ze_result_t MetricGroupImp::readIoStream(uint32_t &reportCount, uint8_t &reportData) { char *castedReportData = reinterpret_cast(&reportData); const auto readResult = pReferenceConcurrentGroup->ReadIoStream(&reportCount, castedReportData, 0); switch (readResult) { case MetricsDiscovery::CC_OK: case MetricsDiscovery::CC_READ_PENDING: return ZE_RESULT_SUCCESS; default: return ZE_RESULT_ERROR_UNKNOWN; } } ze_result_t MetricGroupImp::closeIoStream() { const auto closeResult = pReferenceConcurrentGroup->CloseIoStream(); return (closeResult == MetricsDiscovery::CC_OK) ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN; } ze_result_t MetricGroupImp::calculateMetricValues(size_t rawDataSize, const uint8_t *pRawData, uint32_t *pMetricValueCount, zet_typed_value_t *pMetricValues) { const bool calculateCountOnly = *pMetricValueCount == 0; const bool result = calculateCountOnly ? getCalculatedMetricCount(rawDataSize, *pMetricValueCount) : getCalculatedMetricValues(rawDataSize, pRawData, *pMetricValueCount, pMetricValues); return result ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN; } bool MetricGroupImp::getCalculatedMetricCount(const size_t rawDataSize, uint32_t &metricValueCount) { uint32_t rawReportSize = getRawReportSize(); if (rawReportSize == 0) { return false; } if ((rawDataSize % rawReportSize) != 0) { return false; } const uint32_t rawReportCount = static_cast(rawDataSize) / rawReportSize; metricValueCount = rawReportCount * properties.metricCount; return true; } bool MetricGroupImp::getCalculatedMetricValues(const size_t rawDataSize, const uint8_t *pRawData, uint32_t &metricValueCount, zet_typed_value_t *pCalculatedData) { uint32_t calculatedReportCount = 0; uint32_t expectedMetricValueCount = 0; if (pCalculatedData == nullptr) { return false; } if (getCalculatedMetricCount(rawDataSize, expectedMetricValueCount) == false) { return false; } // Calculated metrics container. std::vector calculatedMetrics(expectedMetricValueCount); // Set filtering type. pReferenceMetricSet->SetApiFiltering(MetricGroupImp::getApiMask(properties.samplingType)); // Calculate metrics. const bool result = pReferenceMetricSet->CalculateMetrics( reinterpret_cast(const_cast(pRawData)), static_cast(rawDataSize), calculatedMetrics.data(), static_cast(calculatedMetrics.size()) * sizeof(MetricsDiscovery::TTypedValue_1_0), &calculatedReportCount, nullptr, static_cast(0)) == MetricsDiscovery::CC_OK; if (result) { // Adjust copied reports to buffer provided by the user. metricValueCount = std::min(metricValueCount, calculatedReportCount * properties.metricCount); // Translate metrics from metrics discovery to oneAPI format. for (size_t i = 0; i < metricValueCount; ++i) { copyValue(calculatedMetrics[i], pCalculatedData[i]); } } return result; } ze_result_t MetricGroupImp::initialize(const zet_metric_group_properties_t &sourceProperties, MetricsDiscovery::IMetricSet_1_5 &metricSet, MetricsDiscovery::IConcurrentGroup_1_5 &concurrentGroup, const std::vector &groupMetrics) { copyProperties(sourceProperties, properties); pReferenceMetricSet = &metricSet; pReferenceConcurrentGroup = &concurrentGroup; metrics = groupMetrics; return ZE_RESULT_SUCCESS; } uint32_t MetricGroupImp::getRawReportSize() { auto pMetricSetParams = pReferenceMetricSet->GetParams(); return (properties.samplingType == ZET_METRIC_GROUP_SAMPLING_TYPE_TIME_BASED) ? pMetricSetParams->RawReportSize : pMetricSetParams->QueryReportSize; } void MetricGroupImp::copyProperties(const zet_metric_group_properties_t &source, zet_metric_group_properties_t &destination) { DEBUG_BREAK_IF(source.version < destination.version); destination = source; memcpy_s(destination.name, sizeof(destination.name), source.name, sizeof(destination.name)); memcpy_s(destination.description, sizeof(destination.description), source.description, sizeof(destination.description)); } void MetricGroupImp::copyValue(const MetricsDiscovery::TTypedValue_1_0 &source, zet_typed_value_t &destination) const { destination = {}; switch (source.ValueType) { case MetricsDiscovery::VALUE_TYPE_UINT32: destination.type = ZET_VALUE_TYPE_UINT32; destination.value.ui32 = source.ValueUInt32; break; case MetricsDiscovery::VALUE_TYPE_UINT64: destination.type = ZET_VALUE_TYPE_UINT64; destination.value.ui64 = source.ValueUInt64; break; case MetricsDiscovery::VALUE_TYPE_FLOAT: destination.type = ZET_VALUE_TYPE_FLOAT32; destination.value.fp32 = source.ValueFloat; break; case MetricsDiscovery::VALUE_TYPE_BOOL: destination.type = ZET_VALUE_TYPE_BOOL8; destination.value.b8 = source.ValueBool; break; default: destination.type = ZET_VALUE_TYPE_UINT64; destination.value.ui64 = 0; DEBUG_BREAK_IF(true); break; } } ze_result_t MetricImp::getProperties(zet_metric_properties_t *pProperties) { DEBUG_BREAK_IF(pProperties->version > ZET_METRIC_PROPERTIES_VERSION_CURRENT); copyProperties(properties, *pProperties); return ZE_RESULT_SUCCESS; } ze_result_t MetricImp::initialize(const zet_metric_properties_t &sourceProperties) { copyProperties(sourceProperties, properties); return ZE_RESULT_SUCCESS; } void MetricImp::copyProperties(const zet_metric_properties_t &source, zet_metric_properties_t &destination) { DEBUG_BREAK_IF(source.version < destination.version); destination = source; memcpy_s(destination.name, sizeof(destination.name), source.name, sizeof(destination.name)); memcpy_s(destination.description, sizeof(destination.description), source.description, sizeof(destination.description)); memcpy_s(destination.component, sizeof(destination.component), source.component, sizeof(destination.component)); memcpy_s(destination.resultUnits, sizeof(destination.resultUnits), source.resultUnits, sizeof(destination.resultUnits)); } MetricGroup *MetricGroup::create(zet_metric_group_properties_t &properties, MetricsDiscovery::IMetricSet_1_5 &metricSet, MetricsDiscovery::IConcurrentGroup_1_5 &concurrentGroup, const std::vector &metrics) { auto pMetricGroup = new MetricGroupImp(); UNRECOVERABLE_IF(pMetricGroup == nullptr); pMetricGroup->initialize(properties, metricSet, concurrentGroup, metrics); return pMetricGroup; } Metric *Metric::create(zet_metric_properties_t &properties) { auto pMetric = new MetricImp(); UNRECOVERABLE_IF(pMetric == nullptr); pMetric->initialize(properties); return pMetric; } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/metrics/metric_enumeration_imp.h000066400000000000000000000130271363734646600305430ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/metrics/metric.h" // Ignore function-overload warning in clang #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Woverloaded-virtual" #elif defined(__linux__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Woverloaded-virtual" #endif #include "common/instrumentation/api/metrics_discovery_api.h" #if defined(__clang__) #pragma clang diagnostic pop #elif defined(__linux__) #pragma GCC diagnostic pop #endif #include "shared/source/os_interface/os_library.h" #include namespace L0 { struct MetricEnumeration { MetricEnumeration(MetricContext &metricContext); virtual ~MetricEnumeration(); ze_result_t metricGroupGet(uint32_t &count, zet_metric_group_handle_t *phMetricGroups); virtual bool isInitialized(); virtual ze_result_t loadMetricsDiscovery(); static const char *getMetricsDiscoveryFilename(); protected: ze_result_t initialize(); virtual ze_result_t openMetricsDiscovery(); virtual ze_result_t cleanupMetricsDiscovery(); ze_result_t cacheMetricInformation(); ze_result_t cacheMetricGroup(MetricsDiscovery::IMetricSet_1_5 &metricSet, MetricsDiscovery::IConcurrentGroup_1_5 &pConcurrentGroup, const uint32_t domain, const zet_metric_group_sampling_type_t samplingType); ze_result_t createMetrics(MetricsDiscovery::IMetricSet_1_5 &metricSet, std::vector &metrics); // Metrics Discovery types mapping. uint32_t getMetricTierNumber(const uint32_t sourceUsageFlagsMask) const; zet_metric_type_t getMetricType(const MetricsDiscovery::TMetricType sourceMetricType) const; zet_metric_type_t getMetricType(const MetricsDiscovery::TInformationType sourceInformationType) const; zet_value_type_t getMetricResultType(const MetricsDiscovery::TMetricResultType sourceMetricResultType) const; protected: MetricContext &metricContext; std::vector metricGroups; // Cached metric groups ze_result_t initializationState = ZE_RESULT_ERROR_UNINITIALIZED; // Metrics Discovery API. std::unique_ptr hMetricsDiscovery = nullptr; MetricsDiscovery::OpenMetricsDevice_fn openMetricsDevice = nullptr; MetricsDiscovery::CloseMetricsDevice_fn closeMetricsDevice = nullptr; MetricsDiscovery::OpenMetricsDeviceFromFile_fn openMetricsDeviceFromFile = nullptr; MetricsDiscovery::IMetricsDevice_1_5 *pMetricsDevice = nullptr; public: // Metrics Discovery version should be at least 1.5. static const uint32_t requiredMetricsDiscoveryMajorVersion = 1; static const uint32_t requiredMetricsDiscoveryMinorVersion = 5; static const char *oaConcurrentGroupName; }; struct MetricGroupImp : MetricGroup { ~MetricGroupImp() override; ze_result_t getProperties(zet_metric_group_properties_t *pProperties) override; ze_result_t getMetric(uint32_t *pCount, zet_metric_handle_t *phMetrics) override; ze_result_t calculateMetricValues(size_t rawDataSize, const uint8_t *pRawData, uint32_t *pMetricValueCount, zet_typed_value_t *pCalculatedData) override; ze_result_t initialize(const zet_metric_group_properties_t &sourceProperties, MetricsDiscovery::IMetricSet_1_5 &metricSet, MetricsDiscovery::IConcurrentGroup_1_5 &concurrentGroup, const std::vector &groupMetrics); uint32_t getRawReportSize() override; bool activate() override; bool deactivate() override; static uint32_t getApiMask(const zet_metric_group_sampling_type_t samplingType); // Time based measurements. ze_result_t openIoStream(uint32_t &timerPeriodNs, uint32_t &oaBufferSize) override; ze_result_t waitForReports(const uint32_t timeoutMs) override; ze_result_t readIoStream(uint32_t &reportCount, uint8_t &reportData) override; ze_result_t closeIoStream() override; protected: void copyProperties(const zet_metric_group_properties_t &source, zet_metric_group_properties_t &destination); void copyValue(const MetricsDiscovery::TTypedValue_1_0 &source, zet_typed_value_t &destination) const; bool getCalculatedMetricCount(const size_t rawDataSize, uint32_t &metricValueCount); bool getCalculatedMetricValues(const size_t rawDataSize, const uint8_t *pRawData, uint32_t &metricValueCount, zet_typed_value_t *pCalculatedData); // Cached metrics. std::vector metrics; zet_metric_group_properties_t properties{ ZET_METRIC_GROUP_PROPERTIES_VERSION_CURRENT, }; MetricsDiscovery::IMetricSet_1_5 *pReferenceMetricSet = nullptr; MetricsDiscovery::IConcurrentGroup_1_5 *pReferenceConcurrentGroup = nullptr; }; struct MetricImp : Metric { ~MetricImp() override{}; ze_result_t getProperties(zet_metric_properties_t *pProperties) override; ze_result_t initialize(const zet_metric_properties_t &sourceProperties); protected: void copyProperties(const zet_metric_properties_t &source, zet_metric_properties_t &destination); zet_metric_properties_t properties{ ZET_METRIC_PROPERTIES_VERSION_CURRENT, }; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/metrics/metric_query_imp.cpp000066400000000000000000000473001363734646600277160ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/metrics/metric_query_imp.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/memory_manager/allocation_properties.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/os_interface/os_library.h" #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/tools/source/metrics/metric_enumeration_imp.h" using namespace MetricsLibraryApi; namespace L0 { MetricsLibrary::MetricsLibrary(MetricContext &metricContextInput) : metricContext(metricContextInput) {} MetricsLibrary::~MetricsLibrary() { // Delete all metric group configurations. for (auto &configuration : configurations) { if (configuration.second.IsValid()) { api.ConfigurationDelete(configuration.second); } } configurations.clear(); // Destroy context. if (context.IsValid() && contextDeleteFunction) { contextDeleteFunction(context); context = {}; } } bool MetricsLibrary::isInitialized() { // Try to initialize metrics library only once. if (initializationState == ZE_RESULT_ERROR_UNINITIALIZED) { initialize(); } return initializationState == ZE_RESULT_SUCCESS; } bool MetricsLibrary::createMetricQuery(const uint32_t slotsCount, QueryHandle_1_0 &query, NEO::GraphicsAllocation *&pAllocation) { // Validate metrics library state. if (!isInitialized()) { DEBUG_BREAK_IF(true); return false; } TypedValue_1_0 gpuReportSize = {}; QueryCreateData_1_0 queryData = {}; queryData.HandleContext = context; queryData.Type = ObjectType::QueryHwCounters; queryData.Slots = slotsCount; // Obtain gpu report size. api.GetParameter(ParameterType::QueryHwCountersReportGpuSize, &gpuReportSize.Type, &gpuReportSize); // Validate gpu report size. if (!gpuReportSize.ValueUInt32) { DEBUG_BREAK_IF(true); return false; } // Allocate gpu memory. NEO::AllocationProperties properties( metricContext.getDevice().getRootDeviceIndex(), gpuReportSize.ValueUInt32 * slotsCount, NEO::GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY); properties.alignment = 64u; pAllocation = metricContext.getDevice().getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); // Validate gpu report size. if (!pAllocation) { DEBUG_BREAK_IF(true); return false; } // Mark allocation as shared and clear it. memset(pAllocation->getUnderlyingBuffer(), 0, gpuReportSize.ValueUInt32 * slotsCount); // Create query pool within metrics library. if (api.QueryCreate(&queryData, &query) != StatusCode::Success) { DEBUG_BREAK_IF(true); metricContext.getDevice().getDriverHandle()->getMemoryManager()->freeGraphicsMemory(pAllocation); return false; } return true; } bool MetricsLibrary::destroyMetricQuery(QueryHandle_1_0 &query) { DEBUG_BREAK_IF(!query.IsValid()); const bool result = isInitialized() && (api.QueryDelete(query) == StatusCode::Success); DEBUG_BREAK_IF(!result); return result; } bool MetricsLibrary::getMetricQueryReportSize(size_t &rawDataSize) { ValueType valueType = ValueType::Last; TypedValue_1_0 value = {}; const bool result = isInitialized() && (api.GetParameter(ParameterType::QueryHwCountersReportApiSize, &valueType, &value) == StatusCode::Success); rawDataSize = static_cast(value.ValueUInt32); DEBUG_BREAK_IF(!result); return result; } bool MetricsLibrary::getMetricQueryReport(QueryHandle_1_0 &query, const size_t rawDataSize, uint8_t *pData) { GetReportData_1_0 report = {}; report.Type = ObjectType::QueryHwCounters; report.Query.Handle = query; report.Query.Slot = 0; report.Query.SlotsCount = 1; report.Query.Data = pData; report.Query.DataSize = static_cast(rawDataSize); const bool result = isInitialized() && (api.GetData(&report) == StatusCode::Success); DEBUG_BREAK_IF(!result); return result; } void MetricsLibrary::initialize() { auto &metricsEnumeration = metricContext.getMetricEnumeration(); // Function should be called only once. DEBUG_BREAK_IF(initializationState != ZE_RESULT_ERROR_UNINITIALIZED); // Metrics Enumeration needs to be initialized before Metrics Library const bool validMetricsEnumeration = metricsEnumeration.isInitialized(); const bool validMetricsLibrary = validMetricsEnumeration && metricContext.isInitialized() && createContext(); // Load metrics library and exported functions. initializationState = validMetricsLibrary ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN; DEBUG_BREAK_IF(initializationState != ZE_RESULT_SUCCESS); } bool MetricsLibrary::load() { // Load library. handle = NEO::OsLibrary::load(getFilename()); // Load exported functions. if (handle) { contextCreateFunction = reinterpret_cast( handle->getProcAddress(METRICS_LIBRARY_CONTEXT_CREATE_1_0)); contextDeleteFunction = reinterpret_cast( handle->getProcAddress(METRICS_LIBRARY_CONTEXT_DELETE_1_0)); } // Return success if exported functions have been loaded. const bool result = contextCreateFunction && contextDeleteFunction; DEBUG_BREAK_IF(!result); return result; } bool MetricsLibrary::createContext() { auto &device = metricContext.getDevice(); const auto &hwHelper = device.getHwHelper(); const auto &asyncComputeEngines = hwHelper.getGpgpuEngineInstances(device.getHwInfo()); ContextCreateData_1_0 createData = {}; ClientOptionsData_1_0 clientOptions[1] = {}; ClientData_1_0 clientData = {}; ClientType_1_0 clientType = {}; // Check if compute command streamer is used. auto asyncComputeEngine = std::find_if(asyncComputeEngines.begin(), asyncComputeEngines.end(), [&](const auto &engine) { return engine == aub_stream::ENGINE_CCS; }); const auto &deviceImp = *static_cast(&device); const auto &commandStreamReceiver = *deviceImp.neoDevice->getDefaultEngine().commandStreamReceiver; const auto engineType = commandStreamReceiver.getOsContext().getEngineType(); const bool isComputeUsed = NEO::EngineHelpers::isCcs(engineType); metricContext.setUseCompute(isComputeUsed); // Create metrics library context. DEBUG_BREAK_IF(!contextCreateFunction); clientType.Api = ClientApi::OpenCL; clientType.Gen = getGenType(device.getPlatformInfo()); clientOptions[0].Type = ClientOptionsType::Compute; clientOptions[0].Compute.Asynchronous = asyncComputeEngine != asyncComputeEngines.end(); clientData.ClientOptions = clientOptions; clientData.ClientOptionsCount = sizeof(clientOptions) / sizeof(ClientOptionsData_1_0); createData.Api = &api; createData.ClientCallbacks = &callbacks; createData.ClientData = &clientData; const bool result = getContextData(device, createData) && contextCreateFunction(clientType, &createData, &context) == StatusCode::Success; DEBUG_BREAK_IF(!result); return result; } ClientGen MetricsLibrary::getGenType(const uint32_t gen) const { auto &hwHelper = NEO::HwHelper::get(static_cast(gen)); return static_cast(hwHelper.getMetricsLibraryGenId()); } uint32_t MetricsLibrary::getGpuCommandsSize(CommandBufferData_1_0 &commandBuffer) { CommandBufferSize_1_0 commandBufferSize = {}; bool result = isInitialized(); // Validate metrics library initialization state. if (result) { commandBuffer.HandleContext = context; result = api.CommandBufferGetSize(&commandBuffer, &commandBufferSize) == StatusCode::Success; } DEBUG_BREAK_IF(!result); return result ? commandBufferSize.GpuMemorySize : 0; } bool MetricsLibrary::getGpuCommands(CommandList &commandList, CommandBufferData_1_0 &commandBuffer) { // Obtain required command buffer size. commandBuffer.Size = getGpuCommandsSize(commandBuffer); // Validate gpu commands size. if (!commandBuffer.Size) { DEBUG_BREAK_IF(true); return false; } // Allocate command buffer. auto stream = commandList.commandContainer.getCommandStream(); auto buffer = stream->getSpace(commandBuffer.Size); // Validate command buffer space. if (!buffer) { DEBUG_BREAK_IF(true); return false; } // Fill attached command buffer with gpu commands. commandBuffer.Data = buffer; // Obtain gpu commands from metrics library. const bool result = isInitialized() && (api.CommandBufferGet(&commandBuffer) == StatusCode::Success); DEBUG_BREAK_IF(!result); return result; } ConfigurationHandle_1_0 MetricsLibrary::createConfiguration(const zet_metric_group_handle_t metricGroupHandle, const zet_metric_group_properties_t properties) { // Metric group internal data. auto metricGroup = MetricGroup::fromHandle(metricGroupHandle); auto metricGroupDummy = ConfigurationHandle_1_0{}; DEBUG_BREAK_IF(!metricGroup); // Metrics library configuration creation data. ConfigurationHandle_1_0 handle = {}; ConfigurationCreateData_1_0 handleData = {}; handleData.HandleContext = context; handleData.Type = ObjectType::ConfigurationHwCountersOa; // Check supported sampling types. const bool validSampling = properties.samplingType == ZET_METRIC_GROUP_SAMPLING_TYPE_EVENT_BASED || properties.samplingType == ZET_METRIC_GROUP_SAMPLING_TYPE_TIME_BASED; // Activate metric group through metrics discovery to send metric group // configuration to kernel driver. const bool validActivate = isInitialized() && validSampling && metricGroup->activate(); if (validActivate) { // Use metrics library to create configuration for the activated metric group. api.ConfigurationCreate(&handleData, &handle); // Use metrics discovery to deactivate metric group. metricGroup->deactivate(); } return validActivate ? handle : metricGroupDummy; } ConfigurationHandle_1_0 MetricsLibrary::getConfiguration(zet_metric_group_handle_t handle) { auto iter = configurations.find(handle); auto configuration = (iter != end(configurations)) ? iter->second : addConfiguration(handle); DEBUG_BREAK_IF(!configuration.IsValid()); return configuration; } ConfigurationHandle_1_0 MetricsLibrary::addConfiguration(zet_metric_group_handle_t handle) { ConfigurationHandle_1_0 libraryHandle = {}; DEBUG_BREAK_IF(!handle); // Create metrics library configuration. auto metricGroup = MetricGroup::fromHandle(handle); auto properties = MetricGroup::getProperties(handle); auto configuration = createConfiguration(metricGroup, properties); // Cache configuration if valid. if (configuration.IsValid()) { libraryHandle = configuration; configurations[handle] = libraryHandle; } DEBUG_BREAK_IF(!libraryHandle.IsValid()); return libraryHandle; } ze_result_t metricQueryPoolCreate(zet_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup, const zet_metric_query_pool_desc_t *pDesc, zet_metric_query_pool_handle_t *phMetricQueryPool) { // Create metric query pool *phMetricQueryPool = MetricQueryPool::create(hDevice, hMetricGroup, *pDesc); // Return result status. return (*phMetricQueryPool != nullptr) ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_INVALID_ARGUMENT; } MetricQueryPool *MetricQueryPool::create(zet_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup, const zet_metric_query_pool_desc_t &desc) { auto device = Device::fromHandle(hDevice); auto metricPoolImp = new MetricQueryPoolImp(device->getMetricContext(), hMetricGroup, desc); if (!metricPoolImp->create()) { delete metricPoolImp; metricPoolImp = nullptr; } return metricPoolImp; } MetricQueryPoolImp::MetricQueryPoolImp(MetricContext &metricContextInput, zet_metric_group_handle_t hEventMetricGroupInput, const zet_metric_query_pool_desc_t &poolDescription) : metricContext(metricContextInput), metricsLibrary(metricContext.getMetricsLibrary()), description(poolDescription), hMetricGroup(hEventMetricGroupInput) {} bool MetricQueryPoolImp::create() { switch (description.flags) { case ZET_METRIC_QUERY_POOL_FLAG_PERFORMANCE: return createMetricQueryPool(); default: DEBUG_BREAK_IF(true); return false; } } bool MetricQueryPoolImp::destroy() { bool result = false; switch (description.flags) { case ZET_METRIC_QUERY_POOL_FLAG_PERFORMANCE: DEBUG_BREAK_IF(!(pAllocation && query.IsValid())); metricContext.getDevice().getDriverHandle()->getMemoryManager()->freeGraphicsMemory(pAllocation); result = metricsLibrary.destroyMetricQuery(query); delete this; break; default: DEBUG_BREAK_IF(true); break; } return result; } bool MetricQueryPoolImp::createMetricQueryPool() { // Validate metric group query - only event based is supported. auto metricGroupProperites = MetricGroup::getProperties(hMetricGroup); const bool validMetricGroup = metricGroupProperites.samplingType == ZET_METRIC_GROUP_SAMPLING_TYPE_EVENT_BASED; if (!validMetricGroup) { return false; } // Pool initialization. pool.reserve(description.count); for (uint32_t i = 0; i < description.count; ++i) { pool.push_back({metricContext, *this, i}); } // Metrics library query object initialization. return metricsLibrary.createMetricQuery(description.count, query, pAllocation); } ze_result_t metricQueryPoolDestroy(zet_metric_query_pool_handle_t hMetricQueryPool) { MetricQueryPool::fromHandle(hMetricQueryPool)->destroy(); return ZE_RESULT_SUCCESS; } MetricQueryPool *MetricQueryPool::fromHandle(zet_metric_query_pool_handle_t handle) { return static_cast(handle); } zet_metric_query_pool_handle_t MetricQueryPool::toHandle() { return this; } ze_result_t MetricQueryPoolImp::createMetricQuery(uint32_t index, zet_metric_query_handle_t *phMetricQuery) { *phMetricQuery = (index < description.count) ? &(pool[index]) : nullptr; return (*phMetricQuery != nullptr) ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_INVALID_ARGUMENT; } MetricQueryImp::MetricQueryImp(MetricContext &metricContextInput, MetricQueryPoolImp &poolInput, const uint32_t slotInput) : metricContext(metricContextInput), metricsLibrary(metricContext.getMetricsLibrary()), pool(poolInput), slot(slotInput) {} ze_result_t MetricQueryImp::appendBegin(CommandList &commandList) { switch (pool.description.flags) { case ZET_METRIC_QUERY_POOL_FLAG_PERFORMANCE: return writeMetricQuery(commandList, nullptr, true); default: DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_INVALID_ARGUMENT; } } ze_result_t MetricQueryImp::appendEnd(CommandList &commandList, ze_event_handle_t hCompletionEvent) { switch (pool.description.flags) { case ZET_METRIC_QUERY_POOL_FLAG_PERFORMANCE: return writeMetricQuery(commandList, hCompletionEvent, false); default: DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_INVALID_ARGUMENT; } } ze_result_t MetricQueryImp::getData(size_t *pRawDataSize, uint8_t *pRawData) { const bool calculateSizeOnly = *pRawDataSize == 0; const bool result = calculateSizeOnly ? metricsLibrary.getMetricQueryReportSize(*pRawDataSize) : metricsLibrary.getMetricQueryReport(pool.query, *pRawDataSize, pRawData); return result ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN; } ze_result_t MetricQueryImp::reset() { return ZE_RESULT_SUCCESS; } ze_result_t MetricQueryImp::destroy() { return ZE_RESULT_SUCCESS; } ze_result_t MetricQueryImp::writeMetricQuery(CommandList &commandList, ze_event_handle_t hCompletionEvent, const bool begin) { // Make gpu allocation visible. commandList.commandContainer.addToResidencyContainer(pool.pAllocation); // Obtain gpu commands. CommandBufferData_1_0 commandBuffer = {}; commandBuffer.CommandsType = ObjectType::QueryHwCounters; commandBuffer.QueryHwCounters.Handle = pool.query; commandBuffer.QueryHwCounters.Begin = begin; commandBuffer.QueryHwCounters.Slot = slot; commandBuffer.Allocation.GpuAddress = pool.pAllocation->getGpuAddress(); commandBuffer.Allocation.CpuAddress = pool.pAllocation->getUnderlyingBuffer(); commandBuffer.Type = metricContext.isComputeUsed() ? GpuCommandBufferType::Compute : GpuCommandBufferType::Render; bool writeCompletionEvent = hCompletionEvent && !begin; bool result = metricsLibrary.getGpuCommands(commandList, commandBuffer); // Write completion event. if (result && writeCompletionEvent) { result = zeCommandListAppendSignalEvent(commandList.toHandle(), hCompletionEvent) == ZE_RESULT_SUCCESS; } return result ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN; } ze_result_t MetricQuery::appendMemoryBarrier(CommandList &commandList) { auto &metricContext = commandList.device->getMetricContext(); auto &metricsLibrary = metricContext.getMetricsLibrary(); // Obtain gpu commands. CommandBufferData_1_0 commandBuffer = {}; commandBuffer.CommandsType = ObjectType::OverrideFlushCaches; commandBuffer.Override.Enable = true; commandBuffer.Type = metricContext.isComputeUsed() ? GpuCommandBufferType::Compute : GpuCommandBufferType::Render; return metricsLibrary.getGpuCommands(commandList, commandBuffer) ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN; } ze_result_t MetricQuery::appendTracerMarker(CommandList &commandList, zet_metric_tracer_handle_t hMetricTracer, uint32_t value) { auto &metricContext = commandList.device->getMetricContext(); auto &metricsLibrary = metricContext.getMetricsLibrary(); // Obtain gpu commands. CommandBufferData_1_0 commandBuffer = {}; commandBuffer.CommandsType = ObjectType::MarkerStreamUser; commandBuffer.MarkerStreamUser.Value = value; commandBuffer.Type = metricContext.isComputeUsed() ? GpuCommandBufferType::Compute : GpuCommandBufferType::Render; return metricsLibrary.getGpuCommands(commandList, commandBuffer) ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN; } MetricQuery *MetricQuery::fromHandle(zet_metric_query_handle_t handle) { return static_cast(handle); } zet_metric_query_handle_t MetricQuery::toHandle() { return this; } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/metrics/metric_query_imp.h000066400000000000000000000106071363734646600273630ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/metrics/metric.h" #include "instrumentation.h" #include #include using MetricsLibraryApi::ClientCallbacks_1_0; using MetricsLibraryApi::ClientGen; using MetricsLibraryApi::CommandBufferData_1_0; using MetricsLibraryApi::ConfigurationHandle_1_0; using MetricsLibraryApi::ContextCreateData_1_0; using MetricsLibraryApi::ContextCreateFunction_1_0; using MetricsLibraryApi::ContextDeleteFunction_1_0; using MetricsLibraryApi::ContextHandle_1_0; using MetricsLibraryApi::Interface_1_0; using MetricsLibraryApi::QueryHandle_1_0; namespace L0 { struct Device; struct CommandList; struct MetricGroup; } // namespace L0 namespace NEO { class OsLibrary; class GraphicsAllocation; } // namespace NEO namespace L0 { struct MetricsLibrary { public: MetricsLibrary(MetricContext &metricContext); virtual ~MetricsLibrary(); // Initialization. virtual bool load(); bool isInitialized(); static const char *getFilename(); // Metric query. bool createMetricQuery(const uint32_t slotsCount, QueryHandle_1_0 &query, NEO::GraphicsAllocation *&pAllocation); bool getMetricQueryReport(QueryHandle_1_0 &query, const size_t rawDataSize, uint8_t *pData); virtual bool getMetricQueryReportSize(size_t &rawDataSize); bool destroyMetricQuery(QueryHandle_1_0 &query); // Command buffer. bool getGpuCommands(CommandList &commandList, CommandBufferData_1_0 &commandBuffer); uint32_t getGpuCommandsSize(CommandBufferData_1_0 &commandBuffer); // Metric group configuration. ConfigurationHandle_1_0 getConfiguration(const zet_metric_group_handle_t metricGroup); bool activateConfiguration(const ConfigurationHandle_1_0 configurationHandle); bool deactivateConfiguration(const ConfigurationHandle_1_0 configurationHandle); protected: void initialize(); bool createContext(); virtual bool getContextData(Device &device, ContextCreateData_1_0 &contextData); ConfigurationHandle_1_0 createConfiguration(const zet_metric_group_handle_t metricGroup, const zet_metric_group_properties_t properties); ConfigurationHandle_1_0 addConfiguration(const zet_metric_group_handle_t metricGroup); ClientGen getGenType(const uint32_t gen) const; protected: NEO::OsLibrary *handle = nullptr; MetricContext &metricContext; ze_result_t initializationState = ZE_RESULT_ERROR_UNINITIALIZED; // Metrics library types. Interface_1_0 api = {}; ClientCallbacks_1_0 callbacks = {}; ContextHandle_1_0 context = {}; ContextCreateFunction_1_0 contextCreateFunction = nullptr; ContextDeleteFunction_1_0 contextDeleteFunction = nullptr; // MetricGroup configurations std::map configurations; }; struct MetricQueryImp : MetricQuery { public: MetricQueryImp(MetricContext &metricContext, struct MetricQueryPoolImp &pool, const uint32_t slot); ze_result_t appendBegin(CommandList &commandList) override; ze_result_t appendEnd(CommandList &commandList, ze_event_handle_t hCompletionEvent) override; ze_result_t getData(size_t *pRawDataSize, uint8_t *pRawData) override; ze_result_t reset() override; ze_result_t destroy() override; protected: ze_result_t writeMetricQuery(CommandList &commandList, ze_event_handle_t hCompletionEvent, const bool begin); protected: MetricContext &metricContext; MetricsLibrary &metricsLibrary; MetricQueryPoolImp &pool; uint32_t slot; }; struct MetricQueryPoolImp : MetricQueryPool { public: MetricQueryPoolImp(MetricContext &metricContext, zet_metric_group_handle_t hEventMetricGroup, const zet_metric_query_pool_desc_t &poolDescription); bool create(); bool destroy() override; ze_result_t createMetricQuery(uint32_t index, zet_metric_query_handle_t *phMetricQuery) override; protected: bool createMetricQueryPool(); public: MetricContext &metricContext; MetricsLibrary &metricsLibrary; std::vector pool; NEO::GraphicsAllocation *pAllocation = nullptr; zet_metric_query_pool_desc_t description = {}; zet_metric_group_handle_t hMetricGroup = nullptr; QueryHandle_1_0 query = {}; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/metrics/metric_stubs.cpp000066400000000000000000000051221363734646600270400ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_library.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/driver/driver.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/source/inc/ze_intel_gpu.h" #include "level_zero/tools/source/metrics/metric.h" #include "level_zero/tools/source/metrics/metric_query_imp.h" #include "instrumentation.h" #include #include namespace L0 { MetricQueryPool *MetricQueryPool::create(zet_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup, const zet_metric_query_pool_desc_t &desc) { return nullptr; } MetricQueryPool *MetricQueryPool::fromHandle(zet_metric_query_pool_handle_t handle) { return nullptr; } ze_result_t metricQueryPoolCreate(zet_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup, const zet_metric_query_pool_desc_t *pDesc, zet_metric_query_pool_handle_t *phMetricQueryPool) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t metricQueryPoolDestroy(zet_metric_query_pool_handle_t hMetricQueryPool) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t MetricQuery::appendMemoryBarrier(CommandList &commandList) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t MetricQuery::appendTracerMarker(CommandList &commandList, zet_metric_tracer_handle_t hMetricTracer, uint32_t value) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } MetricQuery *MetricQuery::fromHandle(zet_metric_query_handle_t handle) { return nullptr; } void MetricContext::enableMetricApi(ze_result_t &result) { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; return; } std::unique_ptr MetricContext::create(Device &device) { return nullptr; } bool MetricContext::isMetricApiAvailable() { return false; } ze_result_t metricGroupGet(zet_device_handle_t hDevice, uint32_t *pCount, zet_metric_group_handle_t *phMetricGroups) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t metricGet(zet_metric_group_handle_t hMetricGroup, uint32_t *pCount, zet_metric_handle_t *phMetrics) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t metricTracerOpen(zet_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup, zet_metric_tracer_desc_t *pDesc, ze_event_handle_t hNotificationEvent, zet_metric_tracer_handle_t *phMetricTracer) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/metrics/metric_tracer_imp.cpp000066400000000000000000000133231363734646600300270ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/metrics/metric_tracer_imp.h" #include "shared/source/helpers/debug_helpers.h" #include "level_zero/core/source/device/device.h" namespace L0 { ze_result_t MetricTracerImp::readData(uint32_t maxReportCount, size_t *pRawDataSize, uint8_t *pRawData) { DEBUG_BREAK_IF(rawReportSize == 0); auto metricGroup = MetricGroup::fromHandle(hMetricGroup); // Return required size if requested. if (*pRawDataSize == 0) { *pRawDataSize = getRequiredBufferSize(maxReportCount); return ZE_RESULT_SUCCESS; } // User is expected to allocate space. if (pRawData == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } // Retrieve the number of reports that fit into the buffer. uint32_t reportCount = static_cast(*pRawDataSize / rawReportSize); // Read tracer data. const ze_result_t result = metricGroup->readIoStream(reportCount, *pRawData); if (result == ZE_RESULT_SUCCESS) { *pRawDataSize = reportCount * rawReportSize; } return result; } ze_result_t MetricTracerImp::close() { const auto result = stopMeasurements(); if (result == ZE_RESULT_SUCCESS) { // Clear metric tracer reference in context. auto device = Device::fromHandle(hDevice); device->getMetricContext().setMetricTracer(nullptr); // Release notification event. if (pNotificationEvent != nullptr) { pNotificationEvent->metricTracer = nullptr; } // Delete metric tracer. delete this; } return result; } ze_result_t MetricTracerImp::initialize(ze_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup) { this->hDevice = hDevice; this->hMetricGroup = hMetricGroup; auto metricGroup = MetricGroup::fromHandle(this->hMetricGroup); rawReportSize = metricGroup->getRawReportSize(); return ZE_RESULT_SUCCESS; } ze_result_t MetricTracerImp::startMeasurements(uint32_t ¬ifyEveryNReports, uint32_t &samplingPeriodNs, ze_event_handle_t hNotificationEvent) { auto metricGroup = MetricGroup::fromHandle(hMetricGroup); uint32_t requestedOaBufferSize = getOaBufferSize(notifyEveryNReports); const ze_result_t result = metricGroup->openIoStream(samplingPeriodNs, requestedOaBufferSize); // Return oa buffer size and notification event aligned to gpu capabilities. if (result == ZE_RESULT_SUCCESS) { oaBufferSize = requestedOaBufferSize; notifyEveryNReports = getNotifyEveryNReports(requestedOaBufferSize); } // Associate notification event with metric tracer. pNotificationEvent = Event::fromHandle(hNotificationEvent); if (pNotificationEvent != nullptr) { pNotificationEvent->metricTracer = this; } return result; } ze_result_t MetricTracerImp::stopMeasurements() { auto metricGroup = MetricGroup::fromHandle(hMetricGroup); const ze_result_t result = metricGroup->closeIoStream(); if (result == ZE_RESULT_SUCCESS) { oaBufferSize = 0; } return result; } uint32_t MetricTracerImp::getOaBufferSize(const uint32_t notifyEveryNReports) const { // Notification is on half full buffer, hence multiplication by 2. return notifyEveryNReports * rawReportSize * 2; } uint32_t MetricTracerImp::getNotifyEveryNReports(const uint32_t oaBufferSize) const { // Notification is on half full buffer, hence division by 2. return rawReportSize ? oaBufferSize / (rawReportSize * 2) : 0; } Event::State MetricTracerImp::getNotificationState() { auto metricGroup = MetricGroup::fromHandle(hMetricGroup); bool reportsReady = metricGroup->waitForReports(0) == ZE_RESULT_SUCCESS; return reportsReady ? Event::State::STATE_SIGNALED : Event::State::STATE_INITIAL; } uint32_t MetricTracerImp::getRequiredBufferSize(const uint32_t maxReportCount) const { DEBUG_BREAK_IF(rawReportSize == 0); uint32_t maxOaBufferReportCount = oaBufferSize / rawReportSize; // Trim to OA buffer size if needed. return maxReportCount > maxOaBufferReportCount ? oaBufferSize : maxReportCount * rawReportSize; } MetricTracer *MetricTracer::open(zet_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup, zet_metric_tracer_desc_t &desc, ze_event_handle_t hNotificationEvent) { auto pDevice = Device::fromHandle(hDevice); auto &metricContext = pDevice->getMetricContext(); // Check whether metric tracer is already open. if (metricContext.getMetricTracer() != nullptr) { return nullptr; } // Check metric group sampling type. auto metricGroupProperties = MetricGroup::getProperties(hMetricGroup); if (metricGroupProperties.samplingType != ZET_METRIC_GROUP_SAMPLING_TYPE_TIME_BASED) { return nullptr; } // Check whether metric group is activated. if (!metricContext.isMetricGroupActivated(hMetricGroup)) { return nullptr; } auto pMetricTracer = new MetricTracerImp(); UNRECOVERABLE_IF(pMetricTracer == nullptr); pMetricTracer->initialize(hDevice, hMetricGroup); const ze_result_t result = pMetricTracer->startMeasurements( desc.notifyEveryNReports, desc.samplingPeriod, hNotificationEvent); if (result == ZE_RESULT_SUCCESS) { metricContext.setMetricTracer(pMetricTracer); } else { delete pMetricTracer; pMetricTracer = nullptr; } return pMetricTracer; } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/metrics/metric_tracer_imp.h000066400000000000000000000022201363734646600274660ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/metrics/metric.h" struct Event; namespace L0 { struct MetricTracerImp : MetricTracer { ~MetricTracerImp() override{}; ze_result_t readData(uint32_t maxReportCount, size_t *pRawDataSize, uint8_t *pRawData) override; ze_result_t close() override; ze_result_t initialize(ze_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup); ze_result_t startMeasurements(uint32_t ¬ifyEveryNReports, uint32_t &samplingPeriodNs, ze_event_handle_t hNotificationEvent); Event::State getNotificationState() override; protected: ze_result_t stopMeasurements(); uint32_t getOaBufferSize(const uint32_t notifyEveryNReports) const; uint32_t getNotifyEveryNReports(const uint32_t oaBufferSize) const; uint32_t getRequiredBufferSize(const uint32_t maxReportCount) const; ze_device_handle_t hDevice = nullptr; zet_metric_group_handle_t hMetricGroup = nullptr; Event *pNotificationEvent = nullptr; uint32_t rawReportSize = 0; uint32_t oaBufferSize = 0; }; } // namespace L0compute-runtime-20.13.16352/level_zero/tools/source/metrics/windows/000077500000000000000000000000001363734646600253235ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/tools/source/metrics/windows/CMakeLists.txt000066400000000000000000000011061363734646600300610ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(HAVE_INSTRUMENTATION) set(L0_SRCS_TOOLS_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_metric_query_imp_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_metric_enumeration_imp_windows.cpp) if(WIN32) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_WINDOWS} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_WINDOWS ${L0_SRCS_TOOLS_WINDOWS}) endif() os_metric_enumeration_imp_windows.cpp000066400000000000000000000007151363734646600347640ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/tools/source/metrics/windows/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/metrics/metric_enumeration_imp.h" #if defined(_WIN64) #define METRICS_DISCOVERY_NAME "igdmd64.dll" #elif defined(_WIN32) #define METRICS_DISCOVERY_NAME "igdmd32.dll" #else #error "Unsupported OS" #endif namespace L0 { const char *MetricEnumeration::getMetricsDiscoveryFilename() { return METRICS_DISCOVERY_NAME; } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/metrics/windows/os_metric_query_imp_windows.cpp000066400000000000000000000042551363734646600336650ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/windows/os_interface.h" #include "level_zero/core/source/device/device.h" #include "level_zero/tools/source/metrics/metric_query_imp.h" #if defined(_WIN64) #define METRICS_LIBRARY_NAME "igdml64.dll" #elif defined(_WIN32) #define METRICS_LIBRARY_NAME "igdml32.dll" #else #error "Unsupported OS" #endif using namespace MetricsLibraryApi; namespace L0 { const char *MetricsLibrary::getFilename() { return METRICS_LIBRARY_NAME; } bool MetricsLibrary::getContextData(Device &device, ContextCreateData_1_0 &contextData) { auto osInterface = device.getOsInterface().get(); auto &osData = contextData.ClientData->Windows; // Copy escape data (adapter/device/escape function). osData.KmdInstrumentationEnabled = true; osData.Device = reinterpret_cast(static_cast(osInterface->getDeviceHandle())); osData.Escape = osInterface->getEscapeHandle(); osData.Adapter = reinterpret_cast(static_cast(osInterface->getAdapterHandle())); return osData.Device && osData.Escape && osData.Adapter; } bool MetricsLibrary::activateConfiguration(const ConfigurationHandle_1_0 configurationHandle) { ConfigurationActivateData_1_0 activateData = {}; activateData.Type = GpuConfigurationActivationType::EscapeCode; const bool validMetricsLibrary = isInitialized(); const bool validConfiguration = configurationHandle.IsValid(); const bool result = validMetricsLibrary && validConfiguration && (api.ConfigurationActivate(configurationHandle, &activateData) == StatusCode::Success); DEBUG_BREAK_IF(!result); return result; } bool MetricsLibrary::deactivateConfiguration(const ConfigurationHandle_1_0 configurationHandle) { const bool validMetricsLibrary = isInitialized(); const bool validConfiguration = configurationHandle.IsValid(); const bool result = validMetricsLibrary && validConfiguration && (api.ConfigurationDeactivate(configurationHandle) == StatusCode::Success); DEBUG_BREAK_IF(!result); return result; } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/pin/000077500000000000000000000000001363734646600227515ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/tools/source/pin/CMakeLists.txt000066400000000000000000000006401363734646600255110ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_PIN ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/pin.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pin.h ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_PIN} ) # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_PIN ${L0_SRCS_TOOLS_PIN}) compute-runtime-20.13.16352/level_zero/tools/source/pin/pin.cpp000066400000000000000000000010071363734646600242410ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "pin.h" #include "level_zero/core/source/module/module.h" #include "level_zero/source/inc/ze_intel_gpu.h" namespace L0 { static PinContext *PinContextInstance = nullptr; void PinContext::init(ze_init_flag_t flag) { if (!getenv_tobool("ZE_ENABLE_PROGRAM_INSTRUMENTATION")) { return; } if (PinContextInstance == nullptr) { PinContextInstance = new PinContext(); } } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/pin/pin.h000066400000000000000000000004261363734646600237120ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include namespace L0 { class PinContext { public: static void init(ze_init_flag_t flag); }; } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/000077500000000000000000000000001363734646600234755ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/tools/source/sysman/CMakeLists.txt000066400000000000000000000013321363734646600262340ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN ${CMAKE_CURRENT_SOURCE_DIR}/sysman.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sysman.h ${CMAKE_CURRENT_SOURCE_DIR}/sysman_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sysman_imp.h ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_subdirectory(sysman_device) add_subdirectory(pci) add_subdirectory(frequency) add_subdirectory(standby) if(UNIX) add_subdirectory(linux) else() add_subdirectory(windows) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN ${L0_SRCS_TOOLS_SYSMAN}) compute-runtime-20.13.16352/level_zero/tools/source/sysman/frequency/000077500000000000000000000000001363734646600254765ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/tools/source/sysman/frequency/CMakeLists.txt000066400000000000000000000013071363734646600302370ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_FREQUENCY ${CMAKE_CURRENT_SOURCE_DIR}/frequency.cpp ${CMAKE_CURRENT_SOURCE_DIR}/frequency.h ${CMAKE_CURRENT_SOURCE_DIR}/frequency_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/frequency_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_frequency.h ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_FREQUENCY} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) if(UNIX) add_subdirectory(linux) else() add_subdirectory(windows) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_FREQUENCY ${L0_SRCS_TOOLS_SYSMAN_FREQUENCY})compute-runtime-20.13.16352/level_zero/tools/source/sysman/frequency/frequency.cpp000066400000000000000000000017741363734646600302140ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/frequency/frequency.h" #include "level_zero/tools/source/sysman/frequency/frequency_imp.h" namespace L0 { FrequencyHandleContext::~FrequencyHandleContext() { for (Frequency *pFrequency : handle_list) { delete pFrequency; } } ze_result_t FrequencyHandleContext::init() { Frequency *pFrequency = new FrequencyImp(pOsSysman); handle_list.push_back(pFrequency); return ZE_RESULT_SUCCESS; } ze_result_t FrequencyHandleContext::frequencyGet(uint32_t *pCount, zet_sysman_freq_handle_t *phFrequency) { if (nullptr == phFrequency) { *pCount = static_cast(handle_list.size()); return ZE_RESULT_SUCCESS; } uint32_t i = 0; for (Frequency *freq : handle_list) { if (i >= *pCount) break; phFrequency[i++] = freq->toHandle(); } *pCount = i; return ZE_RESULT_SUCCESS; } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/frequency/frequency.h000066400000000000000000000022071363734646600276510ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include struct _zet_sysman_freq_handle_t {}; namespace L0 { struct OsSysman; class Frequency : _zet_sysman_freq_handle_t { public: virtual ~Frequency() {} virtual ze_result_t frequencyGetProperties(zet_freq_properties_t *pProperties) = 0; virtual ze_result_t frequencyGetRange(zet_freq_range_t *pLimits) = 0; virtual ze_result_t frequencySetRange(const zet_freq_range_t *pLimits) = 0; virtual ze_result_t frequencyGetState(zet_freq_state_t *pState) = 0; static Frequency *fromHandle(zet_sysman_freq_handle_t handle) { return static_cast(handle); } inline zet_sysman_freq_handle_t toHandle() { return this; } }; struct FrequencyHandleContext { FrequencyHandleContext(OsSysman *pOsSysman) : pOsSysman(pOsSysman){}; ~FrequencyHandleContext(); ze_result_t init(); ze_result_t frequencyGet(uint32_t *pCount, zet_sysman_freq_handle_t *phFrequency); OsSysman *pOsSysman; std::vector handle_list; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/frequency/frequency_imp.cpp000066400000000000000000000100151363734646600310450ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/frequency/frequency_imp.h" #include "shared/source/helpers/debug_helpers.h" #include namespace L0 { const double FrequencyImp::step = 50.0 / 3; // Step of 16.6666667 Mhz (GEN9 Hardcode) const bool FrequencyImp::canControl = true; // canControl is true on i915 (GEN9 Hardcode) ze_result_t FrequencyImp::frequencyGetProperties(zet_freq_properties_t *pProperties) { *pProperties = frequencyProperties; return ZE_RESULT_SUCCESS; } ze_result_t FrequencyImp::frequencyGetRange(zet_freq_range_t *pLimits) { ze_result_t result = pOsFrequency->getMax(pLimits->max); if (ZE_RESULT_SUCCESS != result) { return result; } return pOsFrequency->getMin(pLimits->min); } ze_result_t FrequencyImp::frequencySetRange(const zet_freq_range_t *pLimits) { double newMin = round(pLimits->min); double newMax = round(pLimits->max); bool newMinValid = false, newMaxValid = false; for (unsigned int i = 0; i < numClocks; i++) { if (newMin == pClocks[i]) { newMinValid = true; } if (newMax == pClocks[i]) { newMaxValid = true; } } if (newMin > newMax || !newMinValid || !newMaxValid) { return ZE_RESULT_ERROR_UNKNOWN; } double currentMax; pOsFrequency->getMax(currentMax); if (newMin > currentMax) { // set the max first ze_result_t result = pOsFrequency->setMax(newMax); if (ZE_RESULT_SUCCESS != result) { return result; } return pOsFrequency->setMin(newMin); } // set the min first ze_result_t result = pOsFrequency->setMin(newMin); if (ZE_RESULT_SUCCESS != result) { return result; } return pOsFrequency->setMax(newMax); } ze_result_t FrequencyImp::frequencyGetState(zet_freq_state_t *pState) { ze_result_t result; result = pOsFrequency->getRequest(pState->request); if (ZE_RESULT_SUCCESS != result) { return result; } result = pOsFrequency->getTdp(pState->tdp); if (ZE_RESULT_SUCCESS != result) { return result; } result = pOsFrequency->getEfficient(pState->efficient); if (ZE_RESULT_SUCCESS != result) { return result; } result = pOsFrequency->getActual(pState->actual); if (ZE_RESULT_SUCCESS != result) { return result; } result = pOsFrequency->getThrottleReasons(pState->throttleReasons); if (ZE_RESULT_ERROR_UNKNOWN == result) { // Throttle Reason is optional, set to none for now. pState->throttleReasons = ZET_FREQ_THROTTLE_REASONS_NONE; result = ZE_RESULT_SUCCESS; } return result; } void FrequencyImp::init() { frequencyProperties.type = ZET_FREQ_DOMAIN_GPU; frequencyProperties.onSubdevice = false; frequencyProperties.subdeviceId = 0; frequencyProperties.canControl = canControl; ze_result_t result1 = pOsFrequency->getMinVal(frequencyProperties.min); ze_result_t result2 = pOsFrequency->getMaxVal(frequencyProperties.max); // If can't figure out the valid range, then can't control it. if (ZE_RESULT_SUCCESS != result1 || ZE_RESULT_SUCCESS != result2) { frequencyProperties.canControl = false; frequencyProperties.min = 0.0; frequencyProperties.max = 0.0; } frequencyProperties.step = step; double freqRange = frequencyProperties.max - frequencyProperties.min; numClocks = static_cast(round(freqRange / frequencyProperties.step)) + 1; pClocks = new double[numClocks]; for (unsigned int i = 0; i < numClocks; i++) { pClocks[i] = round(frequencyProperties.min + (frequencyProperties.step * i)); } frequencyProperties.isThrottleEventSupported = false; } FrequencyImp::FrequencyImp(OsSysman *pOsSysman) { pOsFrequency = OsFrequency::create(pOsSysman); UNRECOVERABLE_IF(nullptr == pOsFrequency); init(); } FrequencyImp::~FrequencyImp() { delete pOsFrequency; delete[] pClocks; } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/frequency/frequency_imp.h000066400000000000000000000022631363734646600305200ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/sysman/frequency/frequency.h" #include "level_zero/tools/source/sysman/frequency/os_frequency.h" #include namespace L0 { class FrequencyImp : public Frequency { public: ze_result_t frequencyGetProperties(zet_freq_properties_t *pProperties) override; ze_result_t frequencyGetRange(zet_freq_range_t *pLimits) override; ze_result_t frequencySetRange(const zet_freq_range_t *pLimits) override; ze_result_t frequencyGetState(zet_freq_state_t *pState) override; FrequencyImp(OsSysman *pOsSysman); ~FrequencyImp() override; FrequencyImp(OsFrequency *pOsFrequency) : pOsFrequency(pOsFrequency) { init(); }; // Don't allow copies of the FrequencyImp object FrequencyImp(const FrequencyImp &obj) = delete; FrequencyImp &operator=(const FrequencyImp &obj) = delete; private: static const double step; static const bool canControl; OsFrequency *pOsFrequency; zet_freq_properties_t frequencyProperties; double *pClocks; uint32_t numClocks; void init(); }; } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/frequency/linux/000077500000000000000000000000001363734646600266355ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/tools/source/sysman/frequency/linux/CMakeLists.txt000066400000000000000000000007501363734646600313770ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_FREQUENCY_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_frequency_imp.cpp ) if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_FREQUENCY_LINUX} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_FREQUENCY_LINUX ${L0_SRCS_TOOLS_SYSMAN_FREQUENCY_LINUX}) compute-runtime-20.13.16352/level_zero/tools/source/sysman/frequency/linux/os_frequency_imp.cpp000066400000000000000000000125131363734646600327120ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/linux/os_interface.h" #include "level_zero/core/source/device/device.h" #include "sysman/frequency/frequency_imp.h" #include "sysman/frequency/os_frequency.h" #include "sysman/linux/fs_access.h" #include "sysman/linux/os_sysman_imp.h" namespace L0 { class LinuxFrequencyImp : public OsFrequency { public: ze_result_t getMin(double &min) override; ze_result_t setMin(double min) override; ze_result_t getMax(double &max) override; ze_result_t setMax(double max) override; ze_result_t getRequest(double &request) override; ze_result_t getTdp(double &tdp) override; ze_result_t getActual(double &actual) override; ze_result_t getEfficient(double &efficient) override; ze_result_t getMaxVal(double &maxVal) override; ze_result_t getMinVal(double &minVal) override; ze_result_t getThrottleReasons(uint32_t &throttleReasons) override; LinuxFrequencyImp(OsSysman *pOsSysman); ~LinuxFrequencyImp() override = default; // Don't allow copies of the LinuxFrequencyImp object LinuxFrequencyImp(const LinuxFrequencyImp &obj) = delete; LinuxFrequencyImp &operator=(const LinuxFrequencyImp &obj) = delete; private: SysfsAccess *pSysfsAccess; static const std::string minFreqFile; static const std::string maxFreqFile; static const std::string requestFreqFile; static const std::string tdpFreqFile; static const std::string actualFreqFile; static const std::string efficientFreqFile; static const std::string maxValFreqFile; static const std::string minValFreqFile; }; const std::string LinuxFrequencyImp::minFreqFile("gt_min_freq_mhz"); const std::string LinuxFrequencyImp::maxFreqFile("gt_max_freq_mhz"); const std::string LinuxFrequencyImp::requestFreqFile("gt_cur_freq_mhz"); const std::string LinuxFrequencyImp::tdpFreqFile("gt_boost_freq_mhz"); const std::string LinuxFrequencyImp::actualFreqFile("gt_act_freq_mhz"); const std::string LinuxFrequencyImp::efficientFreqFile("gt_RP1_freq_mhz"); const std::string LinuxFrequencyImp::maxValFreqFile("gt_RP0_freq_mhz"); const std::string LinuxFrequencyImp::minValFreqFile("gt_RPn_freq_mhz"); ze_result_t LinuxFrequencyImp::getMin(double &min) { int intval; ze_result_t result = pSysfsAccess->read(minFreqFile, intval); if (ZE_RESULT_SUCCESS != result) { return result; } min = intval; return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::setMin(double min) { ze_result_t result = pSysfsAccess->write(minFreqFile, min); if (ZE_RESULT_SUCCESS != result) { return result; } return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::getMax(double &max) { int intval; ze_result_t result = pSysfsAccess->read(maxFreqFile, intval); if (ZE_RESULT_SUCCESS != result) { return result; } max = intval; return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::setMax(double max) { ze_result_t result = pSysfsAccess->write(maxFreqFile, max); if (ZE_RESULT_SUCCESS != result) { return result; } return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::getRequest(double &request) { int intval; ze_result_t result = pSysfsAccess->read(requestFreqFile, intval); if (ZE_RESULT_SUCCESS != result) { return result; } request = intval; return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::getTdp(double &tdp) { int intval; ze_result_t result = pSysfsAccess->read(tdpFreqFile, intval); if (ZE_RESULT_SUCCESS != result) { return result; } tdp = intval; return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::getActual(double &actual) { int intval; ze_result_t result = pSysfsAccess->read(actualFreqFile, intval); if (ZE_RESULT_SUCCESS != result) { return result; } actual = intval; return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::getEfficient(double &efficient) { int intval; ze_result_t result = pSysfsAccess->read(efficientFreqFile, intval); if (ZE_RESULT_SUCCESS != result) { return result; } efficient = intval; return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::getMaxVal(double &maxVal) { int intval; ze_result_t result = pSysfsAccess->read(maxValFreqFile, intval); if (ZE_RESULT_SUCCESS != result) { return result; } maxVal = intval; return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::getMinVal(double &minVal) { int intval; ze_result_t result = pSysfsAccess->read(minValFreqFile, intval); if (ZE_RESULT_SUCCESS != result) { return result; } minVal = intval; return ZE_RESULT_SUCCESS; } ze_result_t LinuxFrequencyImp::getThrottleReasons(uint32_t &throttleReasons) { throttleReasons = ZET_FREQ_THROTTLE_REASONS_NONE; return ZE_RESULT_SUCCESS; } LinuxFrequencyImp::LinuxFrequencyImp(OsSysman *pOsSysman) { LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess(); } OsFrequency *OsFrequency::create(OsSysman *pOsSysman) { LinuxFrequencyImp *pLinuxFrequencyImp = new LinuxFrequencyImp(pOsSysman); return static_cast(pLinuxFrequencyImp); } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/frequency/os_frequency.h000066400000000000000000000016611363734646600303550ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/sysman/os_sysman.h" #include namespace L0 { class OsFrequency { public: virtual ze_result_t getMin(double &min) = 0; virtual ze_result_t setMin(double min) = 0; virtual ze_result_t getMax(double &max) = 0; virtual ze_result_t setMax(double max) = 0; virtual ze_result_t getRequest(double &request) = 0; virtual ze_result_t getTdp(double &tdp) = 0; virtual ze_result_t getActual(double &actual) = 0; virtual ze_result_t getEfficient(double &efficient) = 0; virtual ze_result_t getMaxVal(double &maxVal) = 0; virtual ze_result_t getMinVal(double &minVal) = 0; virtual ze_result_t getThrottleReasons(uint32_t &throttleReasons) = 0; static OsFrequency *create(OsSysman *pOsSysman); virtual ~OsFrequency() {} }; } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/frequency/windows/000077500000000000000000000000001363734646600271705ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/tools/source/sysman/frequency/windows/CMakeLists.txt000066400000000000000000000007611363734646600317340ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_FREQUENCY_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_frequency_imp.cpp ) if(WIN32) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_FREQUENCY_WINDOWS} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_FREQUENCY_WINDOWS ${L0_SRCS_TOOLS_SYSMAN_FREQUENCY_WINDOWS}) compute-runtime-20.13.16352/level_zero/tools/source/sysman/frequency/windows/os_frequency_imp.cpp000066400000000000000000000042271363734646600332500ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/frequency/os_frequency.h" namespace L0 { class WddmFrequencyImp : public OsFrequency { public: ze_result_t getMin(double &min) override; ze_result_t setMin(double min) override; ze_result_t getMax(double &max) override; ze_result_t setMax(double max) override; ze_result_t getRequest(double &request) override; ze_result_t getTdp(double &tdp) override; ze_result_t getActual(double &actual) override; ze_result_t getEfficient(double &efficient) override; ze_result_t getMaxVal(double &maxVal) override; ze_result_t getMinVal(double &minVal) override; ze_result_t getThrottleReasons(uint32_t &throttleReasons) override; }; ze_result_t WddmFrequencyImp::getMin(double &min) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmFrequencyImp::setMin(double min) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmFrequencyImp::getMax(double &max) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmFrequencyImp::setMax(double max) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmFrequencyImp::getRequest(double &request) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmFrequencyImp::getTdp(double &tdp) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmFrequencyImp::getActual(double &actual) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmFrequencyImp::getEfficient(double &efficient) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmFrequencyImp::getMaxVal(double &maxVal) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmFrequencyImp::getMinVal(double &minVal) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmFrequencyImp::getThrottleReasons(uint32_t &throttleReasons) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } OsFrequency *OsFrequency::create(OsSysman *pOsSysman) { WddmFrequencyImp *pWddmFrequencyImp = new WddmFrequencyImp(); return static_cast(pWddmFrequencyImp); } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/linux/000077500000000000000000000000001363734646600246345ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/tools/source/sysman/linux/CMakeLists.txt000066400000000000000000000010361363734646600273740ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_sysman_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_sysman_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/fs_access.cpp) if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_LINUX} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_LINUX ${L0_SRCS_TOOLS_SYSMAN_LINUX}) compute-runtime-20.13.16352/level_zero/tools/source/sysman/linux/fs_access.cpp000066400000000000000000000265341363734646600273030ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/linux/fs_access.h" #include #include #include #include #include #include #include namespace L0 { static ze_result_t getResult(int err) { if ((EPERM == err) || (EACCES == err)) { return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS; } else if (ENOENT == err) { return ZE_RESULT_ERROR_UNKNOWN; } else { return ZE_RESULT_ERROR_UNKNOWN; } } // Generic Filesystem Access FsAccess::FsAccess() { } FsAccess *FsAccess::create() { return new FsAccess(); } ze_result_t FsAccess::read(const std::string file, std::string &val) { // Read a single line from text file without trailing newline std::ifstream fs; val.clear(); fs.open(file.c_str()); if (fs.fail()) { return getResult(errno); } fs >> val; if (fs.fail()) { fs.close(); return getResult(errno); } fs.close(); // Strip trailing newline if (val.back() == '\n') { val.pop_back(); } return ZE_RESULT_SUCCESS; } ze_result_t FsAccess::read(const std::string file, std::vector &val) { // Read a entire text file, one line per vector entry std::string line; std::ifstream fs; val.clear(); fs.open(file.c_str()); if (fs.fail()) { return getResult(errno); } while (std::getline(fs, line)) { if (fs.fail()) { fs.close(); return getResult(errno); } if (line.back() == '\n') { line.pop_back(); } val.push_back(line); } fs.close(); return ZE_RESULT_SUCCESS; } ze_result_t FsAccess::write(const std::string file, const std::string val) { std::ofstream sysfs; sysfs.open(file.c_str()); if (sysfs.fail()) { return getResult(errno); } sysfs << val << std::endl; if (sysfs.fail()) { sysfs.close(); return getResult(errno); } sysfs.close(); return ZE_RESULT_SUCCESS; } ze_result_t FsAccess::canRead(const std::string file) { if (access(file.c_str(), F_OK)) { return ZE_RESULT_ERROR_UNKNOWN; } if (access(file.c_str(), R_OK)) { return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS; } return ZE_RESULT_SUCCESS; } ze_result_t FsAccess::canWrite(const std::string file) { if (access(file.c_str(), F_OK)) { return ZE_RESULT_ERROR_UNKNOWN; } if (access(file.c_str(), W_OK)) { return ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS; } return ZE_RESULT_SUCCESS; } ze_result_t FsAccess::readSymLink(const std::string path, std::string &val) { // returns the value of symlink at path char buf[PATH_MAX]; ssize_t len = ::readlink(path.c_str(), buf, PATH_MAX - 1); if (len < 0) { return getResult(errno); } buf[len] = '\0'; val = std::string(buf); return ZE_RESULT_SUCCESS; } ze_result_t FsAccess::getRealPath(const std::string path, std::string &val) { // returns the real file path after resolving all symlinks in path char buf[PATH_MAX]; char *realPath = ::realpath(path.c_str(), buf); if (!realPath) { return getResult(errno); } val = std::string(buf); return ZE_RESULT_SUCCESS; } ze_result_t FsAccess::listDirectory(const std::string path, std::vector &list) { list.clear(); ::DIR *procDir = ::opendir(path.c_str()); if (!procDir) { return ZE_RESULT_ERROR_UNKNOWN; } struct ::dirent *ent; while (NULL != (ent = ::readdir(procDir))) { // Ignore . and .. std::string name = std::string(ent->d_name); if (!name.compare(".") || !name.compare("..")) { continue; } list.push_back(std::string(ent->d_name)); } ::closedir(procDir); return ZE_RESULT_SUCCESS; } std::string FsAccess::getBaseName(const std::string path) { size_t pos = path.rfind("/"); if (std::string::npos == pos) { return path; } return path.substr(pos + 1, std::string::npos); } std::string FsAccess::getDirName(const std::string path) { size_t pos = path.rfind("/"); if (std::string::npos == pos) { return std::string(""); } // Include trailing slash return path.substr(0, pos); } ze_bool_t FsAccess::fileExists(const std::string file) { struct stat sb; if (stat(file.c_str(), &sb) == 0) { return true; } return false; } // Procfs Access const std::string ProcfsAccess::procDir = "/proc/"; const std::string ProcfsAccess::fdDir = "/fd/"; std::string ProcfsAccess::fullPath(const ::pid_t pid) { // Returns the full path for proc entry for process pid return std::string(procDir + std::to_string(pid)); } std::string ProcfsAccess::fdDirPath(const ::pid_t pid) { // Returns the full path to file descritpor directory // for process pid return std::string(fullPath(pid) + fdDir); } std::string ProcfsAccess::fullFdPath(const ::pid_t pid, const int fd) { // Returns the full path for filedescriptor fd // for process pid return std::string(fdDirPath(pid) + std::to_string(fd)); } ProcfsAccess *ProcfsAccess::create() { return new ProcfsAccess(); } ze_result_t ProcfsAccess::listProcesses(std::vector<::pid_t> &list) { // Returns a vector with all the active process ids in the system list.clear(); std::vector dir; ze_result_t result = FsAccess::listDirectory(procDir, dir); if (ZE_RESULT_SUCCESS != result) { return result; } for (auto &&file : dir) { ::pid_t pid; std::istringstream stream(file); stream >> pid; if (stream.fail()) { // Non numeric filename, not a process, skip continue; } list.push_back(pid); } return ZE_RESULT_SUCCESS; } ze_result_t ProcfsAccess::getFileDescriptors(const ::pid_t pid, std::vector &list) { // Returns a vector with all the filedescriptor numbers opened by a pid list.clear(); std::vector dir; ze_result_t result = FsAccess::listDirectory(fdDirPath(pid), dir); if (ZE_RESULT_SUCCESS != result) { return result; } for (auto &&file : dir) { int fd; std::istringstream stream(file); stream >> fd; if (stream.fail()) { // Non numeric filename, not a file descriptor continue; } list.push_back(fd); } return ZE_RESULT_SUCCESS; } ze_result_t ProcfsAccess::getFileName(const ::pid_t pid, const int fd, std::string &val) { // Given a process id and a file descriptor number // return full name of the open file. // NOTE: For sockets, the name will be of the format "socket:[nnnnnnn]" return FsAccess::readSymLink(fullFdPath(pid, fd), val); } ::pid_t ProcfsAccess::myProcessId() { return ::getpid(); } // Sysfs Access const std::string SysfsAccess::drmPath = "/sys/class/drm/"; const std::string SysfsAccess::devicesPath = "device/drm/"; const std::string SysfsAccess::primaryDevName = "card"; const std::string SysfsAccess::drmDriverDevNodeDir = "/dev/dri/"; const std::string SysfsAccess::intelGpuBindEntry = "/sys/bus/pci/drivers/i915/bind"; const std::string SysfsAccess::intelGpuUnbindEntry = "/sys/bus/pci/drivers/i915/unbind"; std::string SysfsAccess::fullPath(const std::string file) { // Prepend sysfs directory path for this device return std::string(dirname + file); } SysfsAccess::SysfsAccess(const std::string dev) { // dev could be either /dev/dri/cardX or /dev/dri/renderDX std::string fileName = FsAccess::getBaseName(dev); std::string devicesDir = drmPath + fileName + std::string("/") + devicesPath; FsAccess::listDirectory(devicesDir, deviceNames); for (auto &&next : deviceNames) { if (!next.compare(0, primaryDevName.length(), primaryDevName)) { dirname = drmPath + next + std::string("/"); break; } } } SysfsAccess *SysfsAccess::create(const std::string dev) { return new SysfsAccess(dev); } ze_result_t SysfsAccess::canRead(const std::string file) { // Prepend sysfs directory path and call the base canRead return FsAccess::canRead(fullPath(file)); } ze_result_t SysfsAccess::canWrite(const std::string file) { // Prepend sysfs directory path and call the base canWrite return FsAccess::canWrite(fullPath(file)); } ze_result_t SysfsAccess::read(const std::string file, std::string &val) { // Prepend sysfs directory path and call the base read return FsAccess::read(fullPath(file).c_str(), val); } ze_result_t SysfsAccess::read(const std::string file, int &val) { std::string str; ze_result_t result; result = FsAccess::read(fullPath(file), str); if (ZE_RESULT_SUCCESS != result) { return result; } std::istringstream stream(str); stream >> val; if (stream.fail()) { return ZE_RESULT_ERROR_UNKNOWN; } return ZE_RESULT_SUCCESS; } ze_result_t SysfsAccess::read(const std::string file, double &val) { std::string str; ze_result_t result; result = FsAccess::read(fullPath(file), str); if (ZE_RESULT_SUCCESS != result) { return result; } std::istringstream stream(str); stream >> val; if (stream.fail()) { return ZE_RESULT_ERROR_UNKNOWN; } return ZE_RESULT_SUCCESS; } ze_result_t SysfsAccess::read(const std::string file, std::vector &val) { // Prepend sysfs directory path and call the base read return FsAccess::read(fullPath(file), val); } ze_result_t SysfsAccess::write(const std::string file, const std::string val) { // Prepend sysfs directory path and call the base write return FsAccess::write(fullPath(file).c_str(), val); } ze_result_t SysfsAccess::write(const std::string file, const int val) { std::ostringstream stream; stream << val; if (stream.fail()) { return ZE_RESULT_ERROR_UNKNOWN; } return FsAccess::write(fullPath(file), stream.str()); } ze_result_t SysfsAccess::write(const std::string file, const double val) { std::ostringstream stream; stream << val; if (stream.fail()) { return ZE_RESULT_ERROR_UNKNOWN; } return FsAccess::write(fullPath(file), stream.str()); } ze_result_t SysfsAccess::readSymLink(const std::string path, std::string &val) { // Prepend sysfs directory path and call the base readSymLink return FsAccess::readSymLink(fullPath(path).c_str(), val); } ze_result_t SysfsAccess::getRealPath(const std::string path, std::string &val) { // Prepend sysfs directory path and call the base getRealPath return FsAccess::getRealPath(fullPath(path).c_str(), val); } ze_result_t SysfsAccess::bindDevice(std::string device) { return FsAccess::write(intelGpuBindEntry, device); } ze_result_t SysfsAccess::unbindDevice(std::string device) { return FsAccess::write(intelGpuUnbindEntry, device); } ze_bool_t SysfsAccess::fileExists(const std::string file) { // Prepend sysfs directory path and call the base fileExists return FsAccess::fileExists(fullPath(file).c_str()); } ze_bool_t SysfsAccess::isMyDeviceFile(const std::string dev) { // dev is a full pathname. if (getDirName(dev).compare(drmDriverDevNodeDir)) { for (auto &&next : deviceNames) { if (!getBaseName(dev).compare(next)) { return true; } } } return false; } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/linux/fs_access.h000066400000000000000000000064601363734646600267440ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/ze_api.h" #include "level_zero/zet_api.h" #include #include #include #include #include #include #include #include #include #include namespace L0 { class FsAccess { public: static FsAccess *create(); ~FsAccess() = default; ze_result_t canRead(const std::string file); ze_result_t canWrite(const std::string file); ze_result_t read(const std::string file, std::string &val); ze_result_t read(const std::string file, std::vector &val); ze_result_t write(const std::string file, const std::string val); ze_result_t readSymLink(const std::string path, std::string &buf); ze_result_t getRealPath(const std::string path, std::string &buf); ze_result_t listDirectory(const std::string path, std::vector &list); std::string getBaseName(const std::string path); std::string getDirName(const std::string path); ze_bool_t fileExists(const std::string file); protected: FsAccess(); }; class ProcfsAccess : private FsAccess { public: static ProcfsAccess *create(); ~ProcfsAccess() = default; ze_result_t listProcesses(std::vector<::pid_t> &list); ::pid_t myProcessId(); ze_result_t getFileDescriptors(const ::pid_t pid, std::vector &list); ze_result_t getFileName(const ::pid_t pid, const int fd, std::string &val); private: ProcfsAccess() = default; std::string fullPath(const ::pid_t pid); std::string fdDirPath(const ::pid_t pid); std::string fullFdPath(const ::pid_t pid, const int fd); static const std::string procDir; static const std::string fdDir; }; class SysfsAccess : private FsAccess { public: static SysfsAccess *create(const std::string file); ~SysfsAccess() = default; ze_result_t canRead(const std::string file); ze_result_t canWrite(const std::string file); ze_result_t read(const std::string file, std::string &val); ze_result_t read(const std::string file, int &val); ze_result_t read(const std::string file, double &val); ze_result_t read(const std::string file, std::vector &val); ze_result_t write(const std::string file, const std::string val); ze_result_t write(const std::string file, const int val); ze_result_t write(const std::string file, const double val); ze_result_t readSymLink(const std::string path, std::string &buf); ze_result_t getRealPath(const std::string path, std::string &buf); ze_result_t bindDevice(const std::string device); ze_result_t unbindDevice(const std::string device); ze_bool_t fileExists(const std::string file); ze_bool_t isMyDeviceFile(const std::string dev); private: SysfsAccess() = delete; SysfsAccess(const std::string file); std::string fullPath(const std::string file); std::vector deviceNames; std::string dirname; static const std::string drmPath; static const std::string devicesPath; static const std::string primaryDevName; static const std::string drmDriverDevNodeDir; static const std::string intelGpuBindEntry; static const std::string intelGpuUnbindEntry; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/linux/os_sysman_imp.cpp000066400000000000000000000036361363734646600302300ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/linux/os_sysman_imp.h" #include "level_zero/tools/source/sysman/linux/fs_access.h" namespace L0 { ze_result_t LinuxSysmanImp::init() { pFsAccess = FsAccess::create(); UNRECOVERABLE_IF(nullptr == pFsAccess); pProcfsAccess = ProcfsAccess::create(); UNRECOVERABLE_IF(nullptr == pProcfsAccess); Device *pDevice = Device::fromHandle(pParentSysmanImp->hCoreDevice); NEO::OSInterface &OsInterface = pDevice->getOsInterface(); NEO::Drm *pDrm = OsInterface.get()->getDrm(); int myDeviceFd = pDrm->getFileDescriptor(); std::string myDeviceName; ze_result_t result = pProcfsAccess->getFileName(pProcfsAccess->myProcessId(), myDeviceFd, myDeviceName); if (ZE_RESULT_SUCCESS != result) { return result; } pSysfsAccess = SysfsAccess::create(myDeviceName); UNRECOVERABLE_IF(nullptr == pSysfsAccess); return ZE_RESULT_SUCCESS; } FsAccess &LinuxSysmanImp::getFsAccess() { UNRECOVERABLE_IF(nullptr == pFsAccess); return *pFsAccess; } ProcfsAccess &LinuxSysmanImp::getProcfsAccess() { UNRECOVERABLE_IF(nullptr == pProcfsAccess); return *pProcfsAccess; } SysfsAccess &LinuxSysmanImp::getSysfsAccess() { UNRECOVERABLE_IF(nullptr == pSysfsAccess); return *pSysfsAccess; } LinuxSysmanImp::LinuxSysmanImp(SysmanImp *pParentSysmanImp) { this->pParentSysmanImp = pParentSysmanImp; } LinuxSysmanImp::~LinuxSysmanImp() { if (nullptr != pSysfsAccess) { delete pSysfsAccess; } if (nullptr != pProcfsAccess) { delete pProcfsAccess; } if (nullptr != pFsAccess) { delete pFsAccess; } } OsSysman *OsSysman::create(SysmanImp *pParentSysmanImp) { LinuxSysmanImp *pLinuxSysmanImp = new LinuxSysmanImp(pParentSysmanImp); return static_cast(pLinuxSysmanImp); } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/linux/os_sysman_imp.h000066400000000000000000000020451363734646600276660ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/linux/os_interface.h" #include "level_zero/core/source/device/device.h" #include "level_zero/tools/source/sysman/linux/fs_access.h" #include "level_zero/tools/source/sysman/sysman_imp.h" namespace L0 { class LinuxSysmanImp : public OsSysman { public: LinuxSysmanImp(SysmanImp *pParentSysmanImp); ~LinuxSysmanImp() override; // Don't allow copies of the LinuxSysmanImp object LinuxSysmanImp(const LinuxSysmanImp &obj) = delete; LinuxSysmanImp &operator=(const LinuxSysmanImp &obj) = delete; ze_result_t init() override; FsAccess &getFsAccess(); ProcfsAccess &getProcfsAccess(); SysfsAccess &getSysfsAccess(); private: LinuxSysmanImp() = delete; SysmanImp *pParentSysmanImp = nullptr; FsAccess *pFsAccess = nullptr; ProcfsAccess *pProcfsAccess = nullptr; SysfsAccess *pSysfsAccess = nullptr; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/os_sysman.h000066400000000000000000000005101363734646600256550ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace L0 { struct SysmanImp; struct OsSysman { virtual ~OsSysman(){}; virtual ze_result_t init() = 0; static OsSysman *create(SysmanImp *pSysmanImp); }; } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/pci/000077500000000000000000000000001363734646600242505ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/tools/source/sysman/pci/CMakeLists.txt000066400000000000000000000011561363734646600270130ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_PCI ${CMAKE_CURRENT_SOURCE_DIR}/pci.h ${CMAKE_CURRENT_SOURCE_DIR}/pci_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pci_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_pci.h ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_PCI} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) if(UNIX) add_subdirectory(linux) else() add_subdirectory(windows) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_PCI ${L0_SRCS_TOOLS_SYSMAN_PCI}) compute-runtime-20.13.16352/level_zero/tools/source/sysman/pci/linux/000077500000000000000000000000001363734646600254075ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/tools/source/sysman/pci/linux/CMakeLists.txt000066400000000000000000000007121363734646600301470ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_PCI_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_pci_imp.cpp ) if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_PCI_LINUX} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_PCI_LINUX ${L0_SRCS_TOOLS_SYSMAN_PCI_LINUX}) compute-runtime-20.13.16352/level_zero/tools/source/sysman/pci/linux/os_pci_imp.cpp000066400000000000000000000117251363734646600302420ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/linux/os_interface.h" #include "level_zero/core/source/device/device.h" #include "sysman/linux/os_sysman_imp.h" #include "sysman/pci/os_pci.h" #include "sysman/pci/pci_imp.h" #include namespace L0 { constexpr uint8_t maxPciBars = 6; class LinuxPciImp : public OsPci { public: ze_result_t getPciBdf(std::string &bdf) override; ze_result_t getMaxLinkSpeed(double &maxLinkSpeed) override; ze_result_t getMaxLinkWidth(uint32_t &maxLinkwidth) override; ze_result_t getLinkGen(uint32_t &linkGen) override; ze_result_t initializeBarProperties(std::vector &pBarProperties) override; LinuxPciImp(OsSysman *pOsSysman); ~LinuxPciImp() override = default; // Don't allow copies of the LinuxPciImp object LinuxPciImp(const LinuxPciImp &obj) = delete; LinuxPciImp &operator=(const LinuxPciImp &obj) = delete; private: SysfsAccess *pSysfsAccess; static const std::string deviceDir; static const std::string resourceFile; static const std::string maxLinkSpeedFile; static const std::string maxLinkWidthFile; }; const std::string LinuxPciImp::deviceDir("device"); const std::string LinuxPciImp::resourceFile("device/resource"); const std::string LinuxPciImp::maxLinkSpeedFile("device/max_link_speed"); const std::string LinuxPciImp::maxLinkWidthFile("device/max_link_width"); ze_result_t LinuxPciImp::getPciBdf(std::string &bdf) { std::string bdfDir; ze_result_t result = pSysfsAccess->readSymLink(deviceDir, bdfDir); if (ZE_RESULT_SUCCESS != result) { return result; } const auto loc = bdfDir.find_last_of('/'); bdf = bdfDir.substr(loc + 1); return ZE_RESULT_SUCCESS; } ze_result_t LinuxPciImp::getMaxLinkSpeed(double &maxLinkSpeed) { double val; ze_result_t result = pSysfsAccess->read(maxLinkSpeedFile, val); if (ZE_RESULT_SUCCESS != result) { maxLinkSpeed = 0; return result; } maxLinkSpeed = val; return ZE_RESULT_SUCCESS; } ze_result_t LinuxPciImp::getMaxLinkWidth(uint32_t &maxLinkwidth) { int intVal; ze_result_t result = pSysfsAccess->read(maxLinkWidthFile, intVal); if (ZE_RESULT_SUCCESS != result) { return result; } maxLinkwidth = intVal; return ZE_RESULT_SUCCESS; } ze_result_t LinuxPciImp::getLinkGen(uint32_t &linkGen) { double maxLinkSpeed; getMaxLinkSpeed(maxLinkSpeed); if (maxLinkSpeed == 2.5) { linkGen = 1; } else if (maxLinkSpeed == 5) { linkGen = 2; } else if (maxLinkSpeed == 8) { linkGen = 3; } else if (maxLinkSpeed == 16) { linkGen = 4; } else if (maxLinkSpeed == 32) { linkGen = 5; } else { linkGen = 0; } return ZE_RESULT_SUCCESS; } void getBarBaseAndSize(std::string readBytes, uint64_t &baseAddr, uint64_t &barSize, uint64_t &barFlags) { unsigned long long start, end, flags; std::stringstream sStreamReadBytes; sStreamReadBytes << readBytes; sStreamReadBytes >> std::hex >> start; sStreamReadBytes >> end; sStreamReadBytes >> flags; flags &= 0xf; barFlags = flags; baseAddr = start; barSize = end - start + 1; } ze_result_t LinuxPciImp::initializeBarProperties(std::vector &pBarProperties) { std::vector ReadBytes; ze_result_t result = pSysfsAccess->read(resourceFile, ReadBytes); if (result != ZE_RESULT_SUCCESS) { return result; } for (uint32_t i = 0; i <= maxPciBars; i++) { uint64_t baseAddr, barSize, barFlags; getBarBaseAndSize(ReadBytes[i], baseAddr, barSize, barFlags); if (baseAddr) { zet_pci_bar_properties_t *pBarProp = new zet_pci_bar_properties_t; pBarProp->index = i; pBarProp->base = baseAddr; pBarProp->size = barSize; // Bar Flags Desc. // Bit-0 - Value 0x0 -> MMIO type BAR // Bit-0 - Value 0x1 -> I/O Type BAR // Bit-1 - Reserved // Bit-2 - Valid only for MMIO type BAR // Value 0x1 -> 64bit BAR*/ pBarProp->type = (barFlags & 0x1) ? ZET_PCI_BAR_TYPE_VGA_IO : ZET_PCI_BAR_TYPE_MMIO; if (i == 6) { // the 7th entry of resource file is expected to be ROM BAR pBarProp->type = ZET_PCI_BAR_TYPE_ROM; } pBarProperties.push_back(pBarProp); } } if (pBarProperties.size() == 0) { result = ZE_RESULT_ERROR_UNKNOWN; } return result; } LinuxPciImp::LinuxPciImp(OsSysman *pOsSysman) { LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess(); } OsPci *OsPci::create(OsSysman *pOsSysman) { LinuxPciImp *pLinuxPciImp = new LinuxPciImp(pOsSysman); return static_cast(pLinuxPciImp); } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/pci/os_pci.h000066400000000000000000000013241363734646600256750ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/sysman/os_sysman.h" #include #include #include namespace L0 { class OsPci { public: virtual ze_result_t getPciBdf(std::string &bdf) = 0; virtual ze_result_t getMaxLinkSpeed(double &maxLinkSpeed) = 0; virtual ze_result_t getMaxLinkWidth(uint32_t &maxLinkWidth) = 0; virtual ze_result_t getLinkGen(uint32_t &linkGen) = 0; virtual ze_result_t initializeBarProperties(std::vector &pBarProperties) = 0; static OsPci *create(OsSysman *pOsSysman); virtual ~OsPci() {} }; } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/pci/pci.h000066400000000000000000000006711363734646600252000ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace L0 { class Pci { public: virtual ~Pci(){}; virtual ze_result_t pciStaticProperties(zet_pci_properties_t *pProperties) = 0; virtual ze_result_t pciGetInitializedBars(uint32_t *pCount, zet_pci_bar_properties_t *pProperties) = 0; virtual void init() = 0; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/pci/pci_imp.cpp000066400000000000000000000062651363734646600264050ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "pci_imp.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/debug_helpers.h" namespace L0 { // // While computing the PCIe bandwidth, also consider that due to 8b/10b encoding // in PCIe gen1 and gen2 real bandwidth will be reduced by 20%, // And in case of gen3 and above due to 128b/130b encoding real bandwidth is // reduced by approx 1.54% as compared to theoretical bandwidth. // In below method, get real PCIe speed in pcieSpeedWithEnc in Mega bits per second // pcieSpeedWithEnc = maxLinkSpeedInGt * (Gigabit to Megabit) * Encoding = // maxLinkSpeedInGt * 1000 * Encoding // uint64_t convertPcieSpeedFromGTsToBs(double maxLinkSpeedInGt) { double pcieSpeedWithEnc; if ((maxLinkSpeedInGt == 16) || (maxLinkSpeedInGt == 8)) { pcieSpeedWithEnc = maxLinkSpeedInGt * 1000 * 128 / 130; } else if ((maxLinkSpeedInGt == 5) || (maxLinkSpeedInGt == 2.5)) { pcieSpeedWithEnc = maxLinkSpeedInGt * 1000 * 8 / 10; } else { pcieSpeedWithEnc = 0; } // // PCIE speed we got above is in Mega bits per second // Convert that speed in bytes/second. // Now, because 1Mb/s = (1000*1000)/8 bytes/second = 125000 bytes/second // pcieSpeedWithEnc = pcieSpeedWithEnc * 125000; return static_cast(pcieSpeedWithEnc); } ze_result_t PciImp::pciStaticProperties(zet_pci_properties_t *pProperties) { *pProperties = pciProperties; return ZE_RESULT_SUCCESS; } ze_result_t PciImp::pciGetInitializedBars(uint32_t *pCount, zet_pci_bar_properties_t *pProperties) { if (pProperties == nullptr) { *pCount = static_cast(pciBarProperties.size()); return ZE_RESULT_SUCCESS; } else { *pCount = std::min(*pCount, static_cast(pciBarProperties.size())); for (uint32_t i = 0; i < *pCount; i++) { pProperties[i] = *pciBarProperties[i]; } } return ZE_RESULT_SUCCESS; } void PciImp::init() { if (pOsPci == nullptr) { pOsPci = OsPci::create(pOsSysman); } UNRECOVERABLE_IF(nullptr == pOsPci); std::string bdf; pOsPci->getPciBdf(bdf); if (bdf.empty()) { pciProperties.address.domain = 0; pciProperties.address.bus = 0; pciProperties.address.device = 0; pciProperties.address.function = 0; } else { sscanf(bdf.c_str(), "%04x:%02x:%02x.%d", &pciProperties.address.domain, &pciProperties.address.bus, &pciProperties.address.device, &pciProperties.address.function); } uint32_t maxLinkWidth, gen; uint64_t maxBandWidth; double maxLinkSpeed; pOsPci->getMaxLinkSpeed(maxLinkSpeed); pOsPci->getMaxLinkWidth(maxLinkWidth); maxBandWidth = maxLinkWidth * convertPcieSpeedFromGTsToBs(maxLinkSpeed); pciProperties.maxSpeed.maxBandwidth = maxBandWidth; pciProperties.maxSpeed.width = maxLinkWidth; pOsPci->getLinkGen(gen); pciProperties.maxSpeed.gen = gen; pOsPci->initializeBarProperties(pciBarProperties); } PciImp::~PciImp() { if (nullptr != pOsPci) { delete pOsPci; } } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/pci/pci_imp.h000066400000000000000000000016531363734646600260460ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include "os_pci.h" #include "pci.h" #include namespace L0 { class PciImp : public Pci { public: void init() override; ze_result_t pciStaticProperties(zet_pci_properties_t *pProperties) override; ze_result_t pciGetInitializedBars(uint32_t *pCount, zet_pci_bar_properties_t *pProperties) override; PciImp(OsSysman *pOsSysman) : pOsSysman(pOsSysman) { pOsPci = nullptr; }; ~PciImp() override; PciImp(OsPci *pOsPci) : pOsPci(pOsPci) { init(); }; // Don't allow copies of the PciImp object PciImp(const PciImp &obj) = delete; PciImp &operator=(const PciImp &obj) = delete; private: OsSysman *pOsSysman; OsPci *pOsPci; zet_pci_properties_t pciProperties; std::vector pciBarProperties; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/pci/windows/000077500000000000000000000000001363734646600257425ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/tools/source/sysman/pci/windows/CMakeLists.txt000066400000000000000000000007231363734646600305040ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_PCI_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_pci_imp.cpp ) if(WIN32) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_PCI_WINDOWS} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_PCI_WINDOWS ${L0_SRCS_TOOLS_SYSMAN_PCI_WINDOWS}) compute-runtime-20.13.16352/level_zero/tools/source/sysman/pci/windows/os_pci_imp.cpp000066400000000000000000000025511363734646600305720ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/pci/os_pci.h" #include "level_zero/tools/source/sysman/windows/os_sysman_imp.h" namespace L0 { class WddmPciImp : public OsPci { public: ze_result_t getPciBdf(std::string &bdf) override; ze_result_t getMaxLinkSpeed(double &maxLinkSpeed) override; ze_result_t getMaxLinkWidth(uint32_t &maxLinkwidth) override; ze_result_t getLinkGen(uint32_t &linkGen) override; ze_result_t initializeBarProperties(std::vector &pBarProperties) override; }; ze_result_t WddmPciImp::getPciBdf(std::string &bdf) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmPciImp::getMaxLinkSpeed(double &maxLinkSpeed) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmPciImp::getMaxLinkWidth(uint32_t &maxLinkwidth) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmPciImp::getLinkGen(uint32_t &linkGen) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmPciImp::initializeBarProperties(std::vector &pBarProperties) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } OsPci *OsPci::create(OsSysman *pOsSysman) { WddmPciImp *pWddmPciImp = new WddmPciImp(); return static_cast(pWddmPciImp); } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/standby/000077500000000000000000000000001363734646600251415ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/tools/source/sysman/standby/CMakeLists.txt000066400000000000000000000012711363734646600277020ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_STANDBY ${CMAKE_CURRENT_SOURCE_DIR}/standby.cpp ${CMAKE_CURRENT_SOURCE_DIR}/standby.h ${CMAKE_CURRENT_SOURCE_DIR}/standby_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/standby_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_standby.h ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_STANDBY} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) if(UNIX) add_subdirectory(linux) else() add_subdirectory(windows) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_STANDBY ${L0_SRCS_TOOLS_SYSMAN_STANDBY})compute-runtime-20.13.16352/level_zero/tools/source/sysman/standby/linux/000077500000000000000000000000001363734646600263005ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/tools/source/sysman/standby/linux/CMakeLists.txt000066400000000000000000000007361363734646600310460ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_STANDBY_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_standby_imp.cpp ) if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_STANDBY_LINUX} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_STANDBY_LINUX ${L0_SRCS_TOOLS_SYSMAN_STANDBY_LINUX}) compute-runtime-20.13.16352/level_zero/tools/source/sysman/standby/linux/os_standby_imp.cpp000066400000000000000000000043731363734646600320250ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/linux/os_interface.h" #include "level_zero/core/source/device/device.h" #include "sysman/linux/os_sysman_imp.h" #include "sysman/standby/os_standby.h" #include "sysman/standby/standby_imp.h" namespace L0 { class LinuxStandbyImp : public OsStandby { public: ze_result_t getMode(zet_standby_promo_mode_t &mode) override; ze_result_t setMode(zet_standby_promo_mode_t mode) override; LinuxStandbyImp(OsSysman *pOsSysman); ~LinuxStandbyImp() override = default; // Don't allow copies of the LinuxStandbyImp object LinuxStandbyImp(const LinuxStandbyImp &obj) = delete; LinuxStandbyImp &operator=(const LinuxStandbyImp &obj) = delete; private: SysfsAccess *pSysfsAccess; static const std::string standbyModeFile; static const int standbyModeDefault = 1; static const int standbyModeNever = 0; }; const std::string LinuxStandbyImp::standbyModeFile("power/rc6_enable"); ze_result_t LinuxStandbyImp::getMode(zet_standby_promo_mode_t &mode) { int currentMode; ze_result_t result = pSysfsAccess->read(standbyModeFile, currentMode); if (ZE_RESULT_SUCCESS != result) { return result; } if (standbyModeDefault == currentMode) { mode = ZET_STANDBY_PROMO_MODE_DEFAULT; return ZE_RESULT_SUCCESS; } if (standbyModeNever == currentMode) { mode = ZET_STANDBY_PROMO_MODE_NEVER; return ZE_RESULT_SUCCESS; } return ZE_RESULT_ERROR_UNKNOWN; } ze_result_t LinuxStandbyImp::setMode(zet_standby_promo_mode_t mode) { if (ZET_STANDBY_PROMO_MODE_DEFAULT == mode) { return pSysfsAccess->write(standbyModeFile, standbyModeDefault); } return pSysfsAccess->write(standbyModeFile, standbyModeNever); } LinuxStandbyImp::LinuxStandbyImp(OsSysman *pOsSysman) { LinuxSysmanImp *pLinuxSysmanImp = static_cast(pOsSysman); pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess(); } OsStandby *OsStandby::create(OsSysman *pOsSysman) { LinuxStandbyImp *pLinuxStandbyImp = new LinuxStandbyImp(pOsSysman); return static_cast(pLinuxStandbyImp); } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/standby/os_standby.h000066400000000000000000000007331363734646600274620ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/sysman/os_sysman.h" #include namespace L0 { class OsStandby { public: virtual ze_result_t getMode(zet_standby_promo_mode_t &mode) = 0; virtual ze_result_t setMode(zet_standby_promo_mode_t mode) = 0; static OsStandby *create(OsSysman *pOsSysman); virtual ~OsStandby() {} }; } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/standby/standby.cpp000066400000000000000000000016171363734646600273160ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "standby.h" #include "standby_imp.h" namespace L0 { StandbyHandleContext::~StandbyHandleContext() { for (Standby *pStandby : handle_list) { delete pStandby; } } ze_result_t StandbyHandleContext::init() { Standby *pStandby = new StandbyImp(pOsSysman); handle_list.push_back(pStandby); return ZE_RESULT_SUCCESS; } ze_result_t StandbyHandleContext::standbyGet(uint32_t *pCount, zet_sysman_standby_handle_t *phStandby) { if (nullptr == phStandby) { *pCount = static_cast(handle_list.size()); return ZE_RESULT_SUCCESS; } uint32_t i = 0; for (Standby *standby : handle_list) { if (i >= *pCount) break; phStandby[i++] = standby->toHandle(); } *pCount = i; return ZE_RESULT_SUCCESS; } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/standby/standby.h000066400000000000000000000020761363734646600267630ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include struct _zet_sysman_standby_handle_t {}; namespace L0 { struct OsSysman; class Standby : _zet_sysman_standby_handle_t { public: virtual ~Standby() {} virtual ze_result_t standbyGetProperties(zet_standby_properties_t *pProperties) = 0; virtual ze_result_t standbyGetMode(zet_standby_promo_mode_t *pMode) = 0; virtual ze_result_t standbySetMode(const zet_standby_promo_mode_t mode) = 0; static Standby *fromHandle(zet_sysman_standby_handle_t handle) { return static_cast(handle); } inline zet_sysman_standby_handle_t toHandle() { return this; } }; struct StandbyHandleContext { StandbyHandleContext(OsSysman *pOsSysman) : pOsSysman(pOsSysman){}; ~StandbyHandleContext(); ze_result_t init(); ze_result_t standbyGet(uint32_t *pCount, zet_sysman_standby_handle_t *phStandby); OsSysman *pOsSysman; std::vector handle_list; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/standby/standby_imp.cpp000066400000000000000000000017741363734646600301670ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "standby_imp.h" #include "shared/source/helpers/debug_helpers.h" #include namespace L0 { ze_result_t StandbyImp::standbyGetProperties(zet_standby_properties_t *pProperties) { *pProperties = standbyProperties; return ZE_RESULT_SUCCESS; } ze_result_t StandbyImp::standbyGetMode(zet_standby_promo_mode_t *pMode) { return pOsStandby->getMode(*pMode); } ze_result_t StandbyImp::standbySetMode(const zet_standby_promo_mode_t mode) { return pOsStandby->setMode(mode); } void StandbyImp::init() { standbyProperties.type = ZET_STANDBY_TYPE_GLOBAL; // Currently the only defined type standbyProperties.onSubdevice = false; standbyProperties.subdeviceId = 0; } StandbyImp::StandbyImp(OsSysman *pOsSysman) { pOsStandby = OsStandby::create(pOsSysman); UNRECOVERABLE_IF(nullptr == pOsStandby); init(); } StandbyImp::~StandbyImp() { delete pOsStandby; } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/standby/standby_imp.h000066400000000000000000000016201363734646600276220ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include "os_standby.h" #include "standby.h" namespace L0 { class StandbyImp : public Standby { public: ze_result_t standbyGetProperties(zet_standby_properties_t *pProperties) override; ze_result_t standbyGetMode(zet_standby_promo_mode_t *pMode) override; ze_result_t standbySetMode(const zet_standby_promo_mode_t mode) override; StandbyImp(OsSysman *pOsSysman); ~StandbyImp() override; StandbyImp(OsStandby *pOsStandby) : pOsStandby(pOsStandby) { init(); }; // Don't allow copies of the StandbyImp object StandbyImp(const StandbyImp &obj) = delete; StandbyImp &operator=(const StandbyImp &obj) = delete; private: OsStandby *pOsStandby; zet_standby_properties_t standbyProperties; void init(); }; } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/standby/windows/000077500000000000000000000000001363734646600266335ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/tools/source/sysman/standby/windows/CMakeLists.txt000066400000000000000000000007471363734646600314030ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_STANDBY_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_standby_imp.cpp ) if(WIN32) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_STANDBY_WINDOWS} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_STANDBY_WINDOWS ${L0_SRCS_TOOLS_SYSMAN_STANDBY_WINDOWS}) compute-runtime-20.13.16352/level_zero/tools/source/sysman/standby/windows/os_standby_imp.cpp000066400000000000000000000014221363734646600323500ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/standby/os_standby.h" namespace L0 { class WddmStandbyImp : public OsStandby { public: ze_result_t getMode(zet_standby_promo_mode_t &mode) override; ze_result_t setMode(zet_standby_promo_mode_t mode) override; }; ze_result_t WddmStandbyImp::getMode(zet_standby_promo_mode_t &mode) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t WddmStandbyImp::setMode(zet_standby_promo_mode_t mode) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } OsStandby *OsStandby::create(OsSysman *pOsSysman) { WddmStandbyImp *pWddmStandbyImp = new WddmStandbyImp(); return static_cast(pWddmStandbyImp); } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/sysman.cpp000066400000000000000000000027741363734646600255250ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/sysman.h" #include "level_zero/core/source/driver/driver.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/tools/source/sysman/sysman_imp.h" #include namespace L0 { static SysmanHandleContext *SysmanHandleContextInstance = nullptr; void SysmanHandleContext::init(ze_init_flag_t flag) { if (SysmanHandleContextInstance == nullptr) { SysmanHandleContextInstance = new SysmanHandleContext(); } } SysmanHandleContext::SysmanHandleContext() { DriverHandle *dH = L0::DriverHandle::fromHandle(GlobalDriver.get()); uint32_t count = 0; dH->getDevice(&count, nullptr); std::vector devices(count); dH->getDevice(&count, devices.data()); for (auto device : devices) { SysmanImp *sysman = new SysmanImp(device); UNRECOVERABLE_IF(!sysman); sysman->init(); handle_map[device] = sysman; } } ze_result_t SysmanHandleContext::sysmanGet(zet_device_handle_t hDevice, zet_sysman_handle_t *phSysman) { if (SysmanHandleContextInstance == nullptr) { return ZE_RESULT_ERROR_UNINITIALIZED; } auto got = SysmanHandleContextInstance->handle_map.find(hDevice); if (got == SysmanHandleContextInstance->handle_map.end()) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } *phSysman = got->second; return ZE_RESULT_SUCCESS; } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/sysman.h000066400000000000000000000072111363734646600251610ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/sysman/frequency/frequency.h" #include "level_zero/tools/source/sysman/pci/pci.h" #include "level_zero/tools/source/sysman/standby/standby.h" #include "level_zero/tools/source/sysman/sysman_device/sysman_device.h" #include #include #include struct _zet_sysman_handle_t {}; namespace L0 { struct Sysman : _zet_sysman_handle_t { static Sysman *fromHandle(zet_sysman_handle_t handle) { return static_cast(handle); } inline zet_sysman_handle_t toHandle() { return this; } virtual ze_result_t deviceGetProperties(zet_sysman_properties_t *pProperties) = 0; virtual ze_result_t schedulerGetCurrentMode(zet_sched_mode_t *pMode) = 0; virtual ze_result_t schedulerGetTimeoutModeProperties(ze_bool_t getDefaults, zet_sched_timeout_properties_t *pConfig) = 0; virtual ze_result_t schedulerGetTimesliceModeProperties(ze_bool_t getDefaults, zet_sched_timeslice_properties_t *pConfig) = 0; virtual ze_result_t schedulerSetTimeoutMode(zet_sched_timeout_properties_t *pProperties, ze_bool_t *pNeedReboot) = 0; virtual ze_result_t schedulerSetTimesliceMode(zet_sched_timeslice_properties_t *pProperties, ze_bool_t *pNeedReboot) = 0; virtual ze_result_t schedulerSetExclusiveMode(ze_bool_t *pNeedReboot) = 0; virtual ze_result_t schedulerSetComputeUnitDebugMode(ze_bool_t *pNeedReboot) = 0; virtual ze_result_t processesGetState(uint32_t *pCount, zet_process_state_t *pProcesses) = 0; virtual ze_result_t deviceReset() = 0; virtual ze_result_t deviceGetRepairStatus(zet_repair_status_t *pRepairStatus) = 0; virtual ze_result_t pciGetProperties(zet_pci_properties_t *pProperties) = 0; virtual ze_result_t pciGetState(zet_pci_state_t *pState) = 0; virtual ze_result_t pciGetBars(uint32_t *pCount, zet_pci_bar_properties_t *pProperties) = 0; virtual ze_result_t pciGetStats(zet_pci_stats_t *pStats) = 0; virtual ze_result_t powerGet(uint32_t *pCount, zet_sysman_pwr_handle_t *phPower) = 0; virtual ze_result_t frequencyGet(uint32_t *pCount, zet_sysman_freq_handle_t *phFrequency) = 0; virtual ze_result_t engineGet(uint32_t *pCount, zet_sysman_engine_handle_t *phEngine) = 0; virtual ze_result_t standbyGet(uint32_t *pCount, zet_sysman_standby_handle_t *phStandby) = 0; virtual ze_result_t firmwareGet(uint32_t *pCount, zet_sysman_firmware_handle_t *phFirmware) = 0; virtual ze_result_t memoryGet(uint32_t *pCount, zet_sysman_mem_handle_t *phMemory) = 0; virtual ze_result_t fabricPortGet(uint32_t *pCount, zet_sysman_fabric_port_handle_t *phPort) = 0; virtual ze_result_t temperatureGet(uint32_t *pCount, zet_sysman_temp_handle_t *phTemperature) = 0; virtual ze_result_t psuGet(uint32_t *pCount, zet_sysman_psu_handle_t *phPsu) = 0; virtual ze_result_t fanGet(uint32_t *pCount, zet_sysman_fan_handle_t *phFan) = 0; virtual ze_result_t ledGet(uint32_t *pCount, zet_sysman_led_handle_t *phLed) = 0; virtual ze_result_t rasGet(uint32_t *pCount, zet_sysman_ras_handle_t *phRas) = 0; virtual ze_result_t eventGet(zet_sysman_event_handle_t *phEvent) = 0; virtual ze_result_t diagnosticsGet(uint32_t *pCount, zet_sysman_diag_handle_t *phDiagnostics) = 0; virtual ~Sysman() = default; }; class SysmanHandleContext { public: SysmanHandleContext(); static void init(ze_init_flag_t flag); static ze_result_t sysmanGet(zet_device_handle_t hDevice, zet_sysman_handle_t *phSysman); private: std::unordered_map handle_map; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/sysman_device/000077500000000000000000000000001363734646600263265ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/tools/source/sysman/sysman_device/CMakeLists.txt000066400000000000000000000013751363734646600310740ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_DEVICE ${CMAKE_CURRENT_SOURCE_DIR}/sysman_device.h ${CMAKE_CURRENT_SOURCE_DIR}/sysman_device_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sysman_device_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_sysman_device.h ) target_include_directories(${L0_STATIC_LIB_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/ ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_DEVICE} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) if(UNIX) add_subdirectory(linux) else() add_subdirectory(windows) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_DEVICE ${L0_SRCS_TOOLS_SYSMAN_DEVICE}) compute-runtime-20.13.16352/level_zero/tools/source/sysman/sysman_device/linux/000077500000000000000000000000001363734646600274655ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/tools/source/sysman/sysman_device/linux/CMakeLists.txt000066400000000000000000000007331363734646600322300ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_DEVICE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_sysman_device_imp.cpp ) if(UNIX) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_DEVICE_LINUX} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_DEVICE_LINUX ${L0_SRCS_TOOLS_SYSMAN_DEVICE_LINUX}) os_sysman_device_imp.cpp000066400000000000000000000213141363734646600343120ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/tools/source/sysman/sysman_device/linux/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/linux/os_interface.h" #include "level_zero/core/source/device/device.h" #include "level_zero/tools/source/sysman/linux/fs_access.h" #include "level_zero/tools/source/sysman/linux/os_sysman_imp.h" #include "level_zero/tools/source/sysman/sysman_device/os_sysman_device.h" #include "level_zero/tools/source/sysman/sysman_device/sysman_device_imp.h" #include namespace L0 { class LinuxSysmanDeviceImp : public OsSysmanDevice { public: void getSerialNumber(int8_t (&serialNumber)[ZET_STRING_PROPERTY_SIZE]) override; void getBoardNumber(int8_t (&boardNumber)[ZET_STRING_PROPERTY_SIZE]) override; void getBrandName(int8_t (&brandName)[ZET_STRING_PROPERTY_SIZE]) override; void getModelName(int8_t (&modelName)[ZET_STRING_PROPERTY_SIZE]) override; void getVendorName(int8_t (&vendorName)[ZET_STRING_PROPERTY_SIZE]) override; void getDriverVersion(int8_t (&driverVersion)[ZET_STRING_PROPERTY_SIZE]) override; ze_result_t reset() override; LinuxSysmanDeviceImp(OsSysman *pOsSysman); ~LinuxSysmanDeviceImp() override = default; // Don't allow copies of the LinuxSysmanDeviceImp object LinuxSysmanDeviceImp(const LinuxSysmanDeviceImp &obj) = delete; LinuxSysmanDeviceImp &operator=(const LinuxSysmanDeviceImp &obj) = delete; private: SysfsAccess *pSysfsAccess; LinuxSysmanImp *pLinuxSysmanImp; static const std::string deviceDir; static const std::string vendorFile; static const std::string deviceFile; static const std::string subsystemVendorFile; static const std::string driverFile; static const std::string functionLevelReset; }; const std::string vendorIntel("Intel(R) Corporation"); const std::string unknown("Unknown"); const std::string intelPciId("0x8086"); const std::string LinuxSysmanDeviceImp::deviceDir("device"); const std::string LinuxSysmanDeviceImp::vendorFile("device/vendor"); const std::string LinuxSysmanDeviceImp::deviceFile("device/device"); const std::string LinuxSysmanDeviceImp::subsystemVendorFile("device/subsystem_vendor"); const std::string LinuxSysmanDeviceImp::driverFile("device/driver"); const std::string LinuxSysmanDeviceImp::functionLevelReset("device/reset"); void LinuxSysmanDeviceImp::getSerialNumber(int8_t (&serialNumber)[ZET_STRING_PROPERTY_SIZE]) { std::copy(unknown.begin(), unknown.end(), serialNumber); serialNumber[unknown.size()] = '\0'; } void LinuxSysmanDeviceImp::getBoardNumber(int8_t (&boardNumber)[ZET_STRING_PROPERTY_SIZE]) { std::copy(unknown.begin(), unknown.end(), boardNumber); boardNumber[unknown.size()] = '\0'; } void LinuxSysmanDeviceImp::getBrandName(int8_t (&brandName)[ZET_STRING_PROPERTY_SIZE]) { std::string strVal; ze_result_t result = pSysfsAccess->read(subsystemVendorFile, strVal); if (ZE_RESULT_SUCCESS != result) { std::copy(unknown.begin(), unknown.end(), brandName); brandName[unknown.size()] = '\0'; return; } if (strVal.compare(intelPciId) == 0) { std::copy(vendorIntel.begin(), vendorIntel.end(), brandName); brandName[vendorIntel.size()] = '\0'; return; } std::copy(unknown.begin(), unknown.end(), brandName); brandName[unknown.size()] = '\0'; } void LinuxSysmanDeviceImp::getModelName(int8_t (&modelName)[ZET_STRING_PROPERTY_SIZE]) { std::string strVal; ze_result_t result = pSysfsAccess->read(deviceFile, strVal); if (ZE_RESULT_SUCCESS != result) { std::copy(unknown.begin(), unknown.end(), modelName); modelName[unknown.size()] = '\0'; return; } std::copy(strVal.begin(), strVal.end(), modelName); modelName[strVal.size()] = '\0'; } void LinuxSysmanDeviceImp::getVendorName(int8_t (&vendorName)[ZET_STRING_PROPERTY_SIZE]) { std::string strVal; ze_result_t result = pSysfsAccess->read(vendorFile, strVal); if (ZE_RESULT_SUCCESS != result) { std::copy(unknown.begin(), unknown.end(), vendorName); vendorName[unknown.size()] = '\0'; return; } if (strVal.compare(intelPciId) == 0) { std::copy(vendorIntel.begin(), vendorIntel.end(), vendorName); vendorName[vendorIntel.size()] = '\0'; return; } std::copy(unknown.begin(), unknown.end(), vendorName); vendorName[unknown.size()] = '\0'; } void LinuxSysmanDeviceImp::getDriverVersion(int8_t (&driverVersion)[ZET_STRING_PROPERTY_SIZE]) { std::copy(unknown.begin(), unknown.end(), driverVersion); driverVersion[unknown.size()] = '\0'; } static void getPidFdsForOpenDevice(ProcfsAccess *pProcfsAccess, SysfsAccess *pSysfsAccess, const ::pid_t pid, std::vector &deviceFds) { // Return a list of all the file descriptors of this process that point to this device std::vector fds; deviceFds.clear(); if (ZE_RESULT_SUCCESS != pProcfsAccess->getFileDescriptors(pid, fds)) { // Process exited. Not an error. Just ignore. return; } for (auto &&fd : fds) { std::string file; if (pProcfsAccess->getFileName(pid, fd, file) != ZE_RESULT_SUCCESS) { // Process closed this file. Not an error. Just ignore. continue; } if (pSysfsAccess->isMyDeviceFile(file)) { deviceFds.push_back(fd); } } } ze_result_t LinuxSysmanDeviceImp::reset() { FsAccess *pFsAccess = &pLinuxSysmanImp->getFsAccess(); ProcfsAccess *pProcfsAccess = &pLinuxSysmanImp->getProcfsAccess(); SysfsAccess *pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess(); std::string resetPath; std::string resetName; ze_result_t result = ZE_RESULT_SUCCESS; pSysfsAccess->getRealPath(functionLevelReset, resetPath); // Must run as root. Verify permission to perform reset. result = pFsAccess->canWrite(resetPath); if (ZE_RESULT_SUCCESS != result) { return result; } pSysfsAccess->getRealPath(deviceDir, resetName); resetName = pFsAccess->getBaseName(resetName); ::pid_t myPid = pProcfsAccess->myProcessId(); std::vector myPidFds; std::vector<::pid_t> processes; // For all processes in the system, see if the process // has this device open. result = pProcfsAccess->listProcesses(processes); if (ZE_RESULT_SUCCESS != result) { return result; } for (auto &&pid : processes) { std::vector fds; getPidFdsForOpenDevice(pProcfsAccess, pSysfsAccess, pid, fds); if (pid == myPid) { // L0 is expected to have this file open. // Keep list of fds. Close before unbind. myPidFds = fds; } else if (!fds.empty()) { // Device is in use by another process. // Don't reset while in use. return ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE; } } for (auto &&fd : myPidFds) { // Close open filedescriptors to the device // before unbinding device. // From this point forward, there is // graceful way to fail the reset call. // All future ze calls by this process for this // device will fail. ::close(fd); } // Unbind the device from the kernel driver. result = pSysfsAccess->unbindDevice(resetName); if (ZE_RESULT_SUCCESS != result) { return result; } // Verify no other process has the device open. // This addresses the window between checking // file handles above, and unbinding the device. result = pProcfsAccess->listProcesses(processes); if (ZE_RESULT_SUCCESS != result) { return result; } for (auto &&pid : processes) { std::vector fds; getPidFdsForOpenDevice(pProcfsAccess, pSysfsAccess, pid, fds); if (!fds.empty()) { // Process is using this device after we unbound it. // Send a sigkill to the process to force it to close // the device. Otherwise, the device cannot be rebound. ::kill(pid, SIGKILL); } } // Reset the device. result = pFsAccess->write(resetPath, "1"); if (ZE_RESULT_SUCCESS != result) { return result; } // Rebind the device to the kernel driver. result = pSysfsAccess->bindDevice(resetName); if (ZE_RESULT_SUCCESS != result) { return result; } return ZE_RESULT_SUCCESS; } LinuxSysmanDeviceImp::LinuxSysmanDeviceImp(OsSysman *pOsSysman) { pLinuxSysmanImp = static_cast(pOsSysman); pSysfsAccess = &pLinuxSysmanImp->getSysfsAccess(); } OsSysmanDevice *OsSysmanDevice::create(OsSysman *pOsSysman) { LinuxSysmanDeviceImp *pLinuxSysmanDeviceImp = new LinuxSysmanDeviceImp(pOsSysman); return static_cast(pLinuxSysmanDeviceImp); } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/sysman_device/os_sysman_device.h000066400000000000000000000016231363734646600320330ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/sysman/os_sysman.h" #include #include namespace L0 { class OsSysmanDevice { public: virtual void getSerialNumber(int8_t (&serialNumber)[ZET_STRING_PROPERTY_SIZE]) = 0; virtual void getBoardNumber(int8_t (&boardNumber)[ZET_STRING_PROPERTY_SIZE]) = 0; virtual void getBrandName(int8_t (&brandName)[ZET_STRING_PROPERTY_SIZE]) = 0; virtual void getModelName(int8_t (&modelName)[ZET_STRING_PROPERTY_SIZE]) = 0; virtual void getVendorName(int8_t (&vendorName)[ZET_STRING_PROPERTY_SIZE]) = 0; virtual void getDriverVersion(int8_t (&driverVersion)[ZET_STRING_PROPERTY_SIZE]) = 0; virtual ze_result_t reset() = 0; static OsSysmanDevice *create(OsSysman *pOsSysman); virtual ~OsSysmanDevice() {} }; } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/sysman_device/sysman_device.h000066400000000000000000000006021363734646600313260ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace L0 { class SysmanDevice { public: virtual ~SysmanDevice(){}; virtual ze_result_t deviceGetProperties(zet_sysman_properties_t *pProperties) = 0; virtual ze_result_t reset() = 0; virtual void init() = 0; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/sysman_device/sysman_device_imp.cpp000066400000000000000000000030151363734646600325270ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "sysman_device_imp.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/debug_helpers.h" #include "level_zero/core/source/device/device.h" namespace L0 { ze_result_t SysmanDeviceImp::deviceGetProperties(zet_sysman_properties_t *pProperties) { Device *device = L0::Device::fromHandle(hCoreDevice); ze_device_properties_t deviceProperties; device->getProperties(&deviceProperties); sysmanProperties.core = deviceProperties; uint32_t count = 0; device->getSubDevices(&count, nullptr); sysmanProperties.numSubdevices = count; *pProperties = sysmanProperties; return ZE_RESULT_SUCCESS; } ze_result_t SysmanDeviceImp::reset() { return pOsSysmanDevice->reset(); } void SysmanDeviceImp::init() { if (pOsSysmanDevice == nullptr) { pOsSysmanDevice = OsSysmanDevice::create(pOsSysman); } UNRECOVERABLE_IF(nullptr == pOsSysmanDevice); pOsSysmanDevice->getVendorName(sysmanProperties.vendorName); pOsSysmanDevice->getDriverVersion(sysmanProperties.driverVersion); pOsSysmanDevice->getModelName(sysmanProperties.modelName); pOsSysmanDevice->getBrandName(sysmanProperties.brandName); pOsSysmanDevice->getBoardNumber(sysmanProperties.boardNumber); pOsSysmanDevice->getSerialNumber(sysmanProperties.serialNumber); } SysmanDeviceImp::~SysmanDeviceImp() { if (nullptr != pOsSysmanDevice) { delete pOsSysmanDevice; } } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/sysman_device/sysman_device_imp.h000066400000000000000000000024211363734646600321740ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include "os_sysman_device.h" #include "sysman_device.h" #include namespace L0 { class SysmanDeviceImp : public SysmanDevice { public: void init() override; ze_result_t deviceGetProperties(zet_sysman_properties_t *pProperties) override; ze_result_t reset() override; SysmanDeviceImp(OsSysman *pOsSysman, ze_device_handle_t hCoreDevice) : pOsSysman(pOsSysman), hCoreDevice(hCoreDevice) { pOsSysmanDevice = nullptr; }; ~SysmanDeviceImp() override; SysmanDeviceImp(OsSysmanDevice *pOsSysmanDevice, ze_device_handle_t hCoreDevice) : pOsSysmanDevice(pOsSysmanDevice), hCoreDevice(hCoreDevice) { init(); }; // Don't allow copies of the SysmanDeviceImp object SysmanDeviceImp(const SysmanDeviceImp &obj) = delete; SysmanDeviceImp &operator=(const SysmanDeviceImp &obj) = delete; private: OsSysman *pOsSysman; OsSysmanDevice *pOsSysmanDevice; zet_sysman_properties_t sysmanProperties; ze_device_handle_t hCoreDevice; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/sysman_device/windows/000077500000000000000000000000001363734646600300205ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/tools/source/sysman/sysman_device/windows/CMakeLists.txt000066400000000000000000000007451363734646600325660ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_DEVICE_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_sysman_device_imp.cpp ) if(WIN32) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_DEVICE_WINDOWS} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_DEVICE_WINDOWS ${L0_SRCS_TOOLS_SYSMAN_DEVICE_WINDOWS}) os_sysman_device_imp.cpp000066400000000000000000000040451363734646600346470ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/tools/source/sysman/sysman_device/windows/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/sysman_device/os_sysman_device.h" #include "level_zero/tools/source/sysman/windows/os_sysman_imp.h" namespace L0 { class WddmSysmanDeviceImp : public OsSysmanDevice { public: void getSerialNumber(int8_t (&serialNumber)[ZET_STRING_PROPERTY_SIZE]) override; void getBoardNumber(int8_t (&boardNumber)[ZET_STRING_PROPERTY_SIZE]) override; void getBrandName(int8_t (&brandName)[ZET_STRING_PROPERTY_SIZE]) override; void getModelName(int8_t (&modelName)[ZET_STRING_PROPERTY_SIZE]) override; void getVendorName(int8_t (&vendorName)[ZET_STRING_PROPERTY_SIZE]) override; void getDriverVersion(int8_t (&driverVersion)[ZET_STRING_PROPERTY_SIZE]) override; ze_result_t reset() override; WddmSysmanDeviceImp(OsSysman *pOsSysman); ~WddmSysmanDeviceImp() = default; // Don't allow copies of the WddmSysmanDeviceImp object WddmSysmanDeviceImp(const WddmSysmanDeviceImp &obj) = delete; WddmSysmanDeviceImp &operator=(const WddmSysmanDeviceImp &obj) = delete; }; void WddmSysmanDeviceImp::getSerialNumber(int8_t (&serialNumber)[ZET_STRING_PROPERTY_SIZE]) { } void WddmSysmanDeviceImp::getBoardNumber(int8_t (&boardNumber)[ZET_STRING_PROPERTY_SIZE]) { } void WddmSysmanDeviceImp::getBrandName(int8_t (&brandName)[ZET_STRING_PROPERTY_SIZE]) { } void WddmSysmanDeviceImp::getModelName(int8_t (&modelName)[ZET_STRING_PROPERTY_SIZE]) { } void WddmSysmanDeviceImp::getVendorName(int8_t (&vendorName)[ZET_STRING_PROPERTY_SIZE]) { } void WddmSysmanDeviceImp::getDriverVersion(int8_t (&driverVersion)[ZET_STRING_PROPERTY_SIZE]) { } ze_result_t WddmSysmanDeviceImp::reset() { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } WddmSysmanDeviceImp::WddmSysmanDeviceImp(OsSysman *pOsSysman) { } OsSysmanDevice *OsSysmanDevice::create(OsSysman *pOsSysman) { WddmSysmanDeviceImp *pWddmSysmanDeviceImp = new WddmSysmanDeviceImp(pOsSysman); return static_cast(pWddmSysmanDeviceImp); } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/sysman_imp.cpp000066400000000000000000000122321363734646600263600ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/sysman_imp.h" #include "level_zero/core/source/driver/driver.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/tools/source/sysman/pci/pci_imp.h" #include "level_zero/tools/source/sysman/sysman.h" #include "level_zero/tools/source/sysman/sysman_device/sysman_device_imp.h" #include namespace L0 { SysmanImp::SysmanImp(ze_device_handle_t hDevice) { hCoreDevice = hDevice; pOsSysman = OsSysman::create(this); pPci = new PciImp(pOsSysman); pSysmanDevice = new SysmanDeviceImp(pOsSysman, hCoreDevice); pFrequencyHandleContext = new FrequencyHandleContext(pOsSysman); pStandbyHandleContext = new StandbyHandleContext(pOsSysman); } SysmanImp::~SysmanImp() { delete pStandbyHandleContext; delete pFrequencyHandleContext; delete pSysmanDevice; delete pPci; delete pOsSysman; } void SysmanImp::init() { pOsSysman->init(); pFrequencyHandleContext->init(); pStandbyHandleContext->init(); pPci->init(); pSysmanDevice->init(); } ze_result_t SysmanImp::deviceGetProperties(zet_sysman_properties_t *pProperties) { return pSysmanDevice->deviceGetProperties(pProperties); } ze_result_t SysmanImp::schedulerGetCurrentMode(zet_sched_mode_t *pMode) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t SysmanImp::schedulerGetTimeoutModeProperties(ze_bool_t getDefaults, zet_sched_timeout_properties_t *pConfig) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t SysmanImp::schedulerGetTimesliceModeProperties(ze_bool_t getDefaults, zet_sched_timeslice_properties_t *pConfig) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t SysmanImp::schedulerSetTimeoutMode(zet_sched_timeout_properties_t *pProperties, ze_bool_t *pNeedReboot) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t SysmanImp::schedulerSetTimesliceMode(zet_sched_timeslice_properties_t *pProperties, ze_bool_t *pNeedReboot) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t SysmanImp::schedulerSetExclusiveMode(ze_bool_t *pNeedReboot) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t SysmanImp::schedulerSetComputeUnitDebugMode(ze_bool_t *pNeedReboot) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t SysmanImp::processesGetState(uint32_t *pCount, zet_process_state_t *pProcesses) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t SysmanImp::deviceReset() { return pSysmanDevice->reset(); } ze_result_t SysmanImp::deviceGetRepairStatus(zet_repair_status_t *pRepairStatus) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t SysmanImp::pciGetProperties(zet_pci_properties_t *pProperties) { return pPci->pciStaticProperties(pProperties); } ze_result_t SysmanImp::pciGetState(zet_pci_state_t *pState) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t SysmanImp::pciGetBars(uint32_t *pCount, zet_pci_bar_properties_t *pProperties) { return pPci->pciGetInitializedBars(pCount, pProperties); } ze_result_t SysmanImp::pciGetStats(zet_pci_stats_t *pStats) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t SysmanImp::powerGet(uint32_t *pCount, zet_sysman_pwr_handle_t *phPower) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t SysmanImp::frequencyGet(uint32_t *pCount, zet_sysman_freq_handle_t *phFrequency) { return pFrequencyHandleContext->frequencyGet(pCount, phFrequency); } ze_result_t SysmanImp::engineGet(uint32_t *pCount, zet_sysman_engine_handle_t *phEngine) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t SysmanImp::standbyGet(uint32_t *pCount, zet_sysman_standby_handle_t *phStandby) { return pStandbyHandleContext->standbyGet(pCount, phStandby); } ze_result_t SysmanImp::firmwareGet(uint32_t *pCount, zet_sysman_firmware_handle_t *phFirmware) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t SysmanImp::memoryGet(uint32_t *pCount, zet_sysman_mem_handle_t *phMemory) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t SysmanImp::fabricPortGet(uint32_t *pCount, zet_sysman_fabric_port_handle_t *phPort) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t SysmanImp::temperatureGet(uint32_t *pCount, zet_sysman_temp_handle_t *phTemperature) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t SysmanImp::psuGet(uint32_t *pCount, zet_sysman_psu_handle_t *phPsu) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t SysmanImp::fanGet(uint32_t *pCount, zet_sysman_fan_handle_t *phFan) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t SysmanImp::ledGet(uint32_t *pCount, zet_sysman_led_handle_t *phLed) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t SysmanImp::rasGet(uint32_t *pCount, zet_sysman_ras_handle_t *phRas) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t SysmanImp::eventGet(zet_sysman_event_handle_t *phEvent) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t SysmanImp::diagnosticsGet(uint32_t *pCount, zet_sysman_diag_handle_t *phDiagnostics) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/sysman_imp.h000066400000000000000000000064761363734646600260420ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/sysman/os_sysman.h" #include "level_zero/tools/source/sysman/sysman.h" #include #include #include namespace L0 { struct SysmanImp : Sysman { SysmanImp(ze_device_handle_t hDevice); ~SysmanImp() override; SysmanImp() = delete; SysmanImp(const SysmanImp &obj) = delete; SysmanImp &operator=(const SysmanImp &obj) = delete; void init(); ze_device_handle_t hCoreDevice; OsSysman *pOsSysman; Pci *pPci; SysmanDevice *pSysmanDevice; FrequencyHandleContext *pFrequencyHandleContext; StandbyHandleContext *pStandbyHandleContext; ze_result_t deviceGetProperties(zet_sysman_properties_t *pProperties) override; ze_result_t schedulerGetCurrentMode(zet_sched_mode_t *pMode) override; ze_result_t schedulerGetTimeoutModeProperties(ze_bool_t getDefaults, zet_sched_timeout_properties_t *pConfig) override; ze_result_t schedulerGetTimesliceModeProperties(ze_bool_t getDefaults, zet_sched_timeslice_properties_t *pConfig) override; ze_result_t schedulerSetTimeoutMode(zet_sched_timeout_properties_t *pProperties, ze_bool_t *pNeedReboot) override; ze_result_t schedulerSetTimesliceMode(zet_sched_timeslice_properties_t *pProperties, ze_bool_t *pNeedReboot) override; ze_result_t schedulerSetExclusiveMode(ze_bool_t *pNeedReboot) override; ze_result_t schedulerSetComputeUnitDebugMode(ze_bool_t *pNeedReboot) override; ze_result_t processesGetState(uint32_t *pCount, zet_process_state_t *pProcesses) override; ze_result_t deviceReset() override; ze_result_t deviceGetRepairStatus(zet_repair_status_t *pRepairStatus) override; ze_result_t pciGetProperties(zet_pci_properties_t *pProperties) override; ze_result_t pciGetState(zet_pci_state_t *pState) override; ze_result_t pciGetBars(uint32_t *pCount, zet_pci_bar_properties_t *pProperties) override; ze_result_t pciGetStats(zet_pci_stats_t *pStats) override; ze_result_t powerGet(uint32_t *pCount, zet_sysman_pwr_handle_t *phPower) override; ze_result_t frequencyGet(uint32_t *pCount, zet_sysman_freq_handle_t *phFrequency) override; ze_result_t engineGet(uint32_t *pCount, zet_sysman_engine_handle_t *phEngine) override; ze_result_t standbyGet(uint32_t *pCount, zet_sysman_standby_handle_t *phStandby) override; ze_result_t firmwareGet(uint32_t *pCount, zet_sysman_firmware_handle_t *phFirmware) override; ze_result_t memoryGet(uint32_t *pCount, zet_sysman_mem_handle_t *phMemory) override; ze_result_t fabricPortGet(uint32_t *pCount, zet_sysman_fabric_port_handle_t *phPort) override; ze_result_t temperatureGet(uint32_t *pCount, zet_sysman_temp_handle_t *phTemperature) override; ze_result_t psuGet(uint32_t *pCount, zet_sysman_psu_handle_t *phPsu) override; ze_result_t fanGet(uint32_t *pCount, zet_sysman_fan_handle_t *phFan) override; ze_result_t ledGet(uint32_t *pCount, zet_sysman_led_handle_t *phLed) override; ze_result_t rasGet(uint32_t *pCount, zet_sysman_ras_handle_t *phRas) override; ze_result_t eventGet(zet_sysman_event_handle_t *phEvent) override; ze_result_t diagnosticsGet(uint32_t *pCount, zet_sysman_diag_handle_t *phDiagnostics) override; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/windows/000077500000000000000000000000001363734646600251675ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/tools/source/sysman/windows/CMakeLists.txt000066400000000000000000000007651363734646600277370ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(L0_SRCS_TOOLS_SYSMAN_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/os_sysman_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/os_sysman_imp.cpp) if(WIN32) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_SYSMAN_WINDOWS} ) endif() # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_SYSMAN_WINDOWS ${L0_SRCS_TOOLS_SYSMAN_WINDOWS}) compute-runtime-20.13.16352/level_zero/tools/source/sysman/windows/os_sysman_imp.cpp000066400000000000000000000006771363734646600305650ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/windows/os_sysman_imp.h" namespace L0 { ze_result_t WddmSysmanImp::init() { return ZE_RESULT_SUCCESS; } OsSysman *OsSysman::create(SysmanImp *pParentSysmanImp) { WddmSysmanImp *pWddmSysmanImp = new WddmSysmanImp(pParentSysmanImp); return static_cast(pWddmSysmanImp); } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/sysman/windows/os_sysman_imp.h000066400000000000000000000012061363734646600302170ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/sysman/sysman_imp.h" namespace L0 { class WddmSysmanImp : public OsSysman { public: WddmSysmanImp(SysmanImp *pParentSysmanImp) : pParentSysmanImp(pParentSysmanImp){}; ~WddmSysmanImp() override = default; // Don't allow copies of the WddmSysmanImp object WddmSysmanImp(const WddmSysmanImp &obj) = delete; WddmSysmanImp &operator=(const WddmSysmanImp &obj) = delete; ze_result_t init() override; private: WddmSysmanImp() = delete; SysmanImp *pParentSysmanImp; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/tools_init.cpp000066400000000000000000000004651363734646600250570ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/tools_init.h" #include "level_zero/tools/source/tools_init_imp.h" namespace L0 { static ToolsInitImp toolsInitImp; ToolsInit *ToolsInit::toolsInit = &toolsInitImp; } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/tools_init.h000066400000000000000000000006171363734646600245230ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace L0 { class ToolsInit { public: virtual ze_result_t initTools(ze_init_flag_t flag) = 0; virtual bool areToolsEnabled() = 0; static ToolsInit *get() { return toolsInit; } protected: static ToolsInit *toolsInit; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/tools_init_imp.cpp000066400000000000000000000022331363734646600257170ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/tools_init_imp.h" #include "level_zero/tools/source/metrics/metric.h" #include "level_zero/tools/source/pin/pin.h" #include "level_zero/tools/source/sysman/sysman.h" #include "level_zero/tools/source/tools_init.h" #include "level_zero/tools/source/tracing/tracing.h" #include "level_zero/tools/source/tracing/tracing_imp.h" #include #include namespace L0 { static void enableTools(ze_result_t &result, ze_init_flag_t flag, bool *ptoolsAreEnabled) { MetricContext::enableMetricApi(std::ref(result)); if (result != ZE_RESULT_SUCCESS) { return; } SysmanHandleContext::init(flag); PinContext::init(flag); APITracerContextImp::apiTracingEnable(flag); *ptoolsAreEnabled = true; } ze_result_t ToolsInitImp::initTools(ze_init_flag_t flag) { ze_result_t result = ZE_RESULT_SUCCESS; std::call_once(this->initToolsOnce, enableTools, std::ref(result), flag, &this->toolsAreEnabled); return result; } bool ToolsInitImp::areToolsEnabled() { return this->toolsAreEnabled; } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/tools_init_imp.h000066400000000000000000000006511363734646600253660ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/tools_init.h" #include namespace L0 { class ToolsInitImp : public ToolsInit { public: ze_result_t initTools(ze_init_flag_t flag) override; bool areToolsEnabled() override; private: std::once_flag initToolsOnce; bool toolsAreEnabled; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/tracing/000077500000000000000000000000001363734646600236125ustar00rootroot00000000000000compute-runtime-20.13.16352/level_zero/tools/source/tracing/CMakeLists.txt000066400000000000000000000042371363734646600263600ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # # DO NOT EDIT: Generated from "scripts/templates/CMakeLists.cpp.mako" set(L0_SRCS_TOOLS_TRACING ${CMAKE_CURRENT_SOURCE_DIR}/tracing.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_barrier_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_barrier_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_cmdlist_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_cmdlist_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_cmdqueue_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_cmdqueue_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_cl_interop_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_cl_interop_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_copy_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_copy_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_device_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_device_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_driver_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_driver_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_event_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_event_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_fence_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_fence_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_global_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_global_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_sampler_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_sampler_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_residency_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_residency_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_image_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_image_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_memory_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_memory_imp.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_module_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_module_imp.h ) target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${L0_SRCS_TOOLS_TRACING} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) # Make our source files visible to parent set_property(GLOBAL PROPERTY L0_SRCS_TOOLS_TRACING ${L0_SRCS_TOOLS_TRACING}) compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing.h000066400000000000000000000021011363734646600254040ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include struct _zet_tracer_handle_t {}; namespace L0 { struct APITracer : _zet_tracer_handle_t { static APITracer *create(); virtual ~APITracer() = default; static APITracer *fromHandle(zet_tracer_handle_t handle) { return static_cast(handle); } inline zet_tracer_handle_t toHandle() { return this; } virtual ze_result_t destroyTracer(zet_tracer_handle_t phTracer) = 0; virtual ze_result_t setPrologues(zet_core_callbacks_t *pCoreCbs) = 0; virtual ze_result_t setEpilogues(zet_core_callbacks_t *pCoreCbs) = 0; virtual ze_result_t enableTracer(ze_bool_t enable) = 0; }; ze_result_t createAPITracer(zet_driver_handle_t hDriver, const zet_tracer_desc_t *desc, zet_tracer_handle_t *phTracer); struct APITracerContext { virtual ~APITracerContext() = default; virtual void *getActiveTracersList() = 0; virtual void releaseActivetracersList() = 0; }; } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_barrier_imp.cpp000066400000000000000000000120001363734646600303110ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/tracing/tracing_imp.h" __zedllexport ze_result_t __zecall zeCommandListAppendBarrier_Tracing(ze_command_list_handle_t hCommandList, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendBarrier, hCommandList, hSignalEvent, numWaitEvents, phWaitEvents); ze_command_list_append_barrier_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.phSignalEvent = &hSignalEvent; tracerParams.pnumWaitEvents = &numWaitEvents; tracerParams.pphWaitEvents = &phWaitEvents; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListAppendBarrierCb_t, CommandList, pfnAppendBarrierCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendBarrier, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.phSignalEvent, *tracerParams.pnumWaitEvents, *tracerParams.pphWaitEvents); } __zedllexport ze_result_t __zecall zeCommandListAppendMemoryRangesBarrier_Tracing(ze_command_list_handle_t hCommandList, uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryRangesBarrier, hCommandList, numRanges, pRangeSizes, pRanges, hSignalEvent, numWaitEvents, phWaitEvents); ze_command_list_append_memory_ranges_barrier_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.pnumRanges = &numRanges; tracerParams.ppRangeSizes = &pRangeSizes; tracerParams.ppRanges = &pRanges; tracerParams.phSignalEvent = &hSignalEvent; tracerParams.pnumWaitEvents = &numWaitEvents; tracerParams.pphWaitEvents = &phWaitEvents; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListAppendMemoryRangesBarrierCb_t, CommandList, pfnAppendMemoryRangesBarrierCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryRangesBarrier, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.pnumRanges, *tracerParams.ppRangeSizes, *tracerParams.ppRanges, *tracerParams.phSignalEvent, *tracerParams.pnumWaitEvents, *tracerParams.pphWaitEvents); } __zedllexport ze_result_t __zecall zeDeviceSystemBarrier_Tracing(ze_device_handle_t hDevice) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnSystemBarrier, hDevice); ze_device_system_barrier_params_t tracerParams; tracerParams.phDevice = &hDevice; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnDeviceSystemBarrierCb_t, Device, pfnSystemBarrierCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnSystemBarrier, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDevice); } compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_barrier_imp.h000066400000000000000000000020641363734646600277670ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { __zedllexport ze_result_t __zecall zeCommandListAppendBarrier_Tracing(ze_command_list_handle_t hCommandList, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); __zedllexport ze_result_t __zecall zeCommandListAppendMemoryRangesBarrier_Tracing(ze_command_list_handle_t hCommandList, uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); __zedllexport ze_result_t __zecall zeDeviceSystemBarrier_Tracing(ze_device_handle_t hDevice); } compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_cl_interop_imp.cpp000066400000000000000000000112521363734646600310310ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/tracing/tracing_imp.h" __zedllexport ze_result_t __zecall zeDeviceRegisterCLMemory_Tracing(ze_device_handle_t hDevice, cl_context context, cl_mem mem, void **ptr) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnRegisterCLMemory, hDevice, context, mem, ptr); ze_device_register_cl_memory_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.pcontext = &context; tracerParams.pmem = &mem; tracerParams.pptr = &ptr; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnDeviceRegisterCLMemoryCb_t, Device, pfnRegisterCLMemoryCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnRegisterCLMemory, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.pcontext, *tracerParams.pmem, *tracerParams.pptr); } __zedllexport ze_result_t __zecall zeDeviceRegisterCLProgram_Tracing(ze_device_handle_t hDevice, cl_context context, cl_program program, ze_module_handle_t *phModule) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnRegisterCLProgram, hDevice, context, program, phModule); ze_device_register_cl_program_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.pcontext = &context; tracerParams.pprogram = &program; tracerParams.pphModule = &phModule; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnDeviceRegisterCLProgramCb_t, Device, pfnRegisterCLProgramCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnRegisterCLProgram, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.pcontext, *tracerParams.pprogram, *tracerParams.pphModule); } __zedllexport ze_result_t __zecall zeDeviceRegisterCLCommandQueue_Tracing(ze_device_handle_t hDevice, cl_context context, cl_command_queue commandQueue, ze_command_queue_handle_t *phCommandQueue) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnRegisterCLCommandQueue, hDevice, context, commandQueue, phCommandQueue); ze_device_register_cl_command_queue_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.pcontext = &context; tracerParams.pcommand_queue = &commandQueue; tracerParams.pphCommandQueue = &phCommandQueue; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnDeviceRegisterCLCommandQueueCb_t, Device, pfnRegisterCLCommandQueueCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnRegisterCLCommandQueue, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.pcontext, *tracerParams.pcommand_queue, *tracerParams.pphCommandQueue); } compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_cl_interop_imp.h000066400000000000000000000016631363734646600305030ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { __zedllexport ze_result_t __zecall zeDeviceRegisterCLMemory_Tracing(ze_device_handle_t hDevice, cl_context context, cl_mem mem, void **ptr); __zedllexport ze_result_t __zecall zeDeviceRegisterCLProgram_Tracing(ze_device_handle_t hDevice, cl_context context, cl_program program, ze_module_handle_t *phModule); __zedllexport ze_result_t __zecall zeDeviceRegisterCLCommandQueue_Tracing(ze_device_handle_t hDevice, cl_context context, cl_command_queue commandQueue, ze_command_queue_handle_t *phCommandQueue); } compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_cmdlist_imp.cpp000066400000000000000000000126341363734646600303370ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/tracing/tracing_imp.h" __zedllexport ze_result_t __zecall zeCommandListCreate_Tracing(ze_device_handle_t hDevice, const ze_command_list_desc_t *desc, ze_command_list_handle_t *phCommandList) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnCreate, hDevice, desc, phCommandList); ze_command_list_create_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.pdesc = &desc; tracerParams.pphCommandList = &phCommandList; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListCreateCb_t, CommandList, pfnCreateCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnCreate, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.pdesc, *tracerParams.pphCommandList); } __zedllexport ze_result_t __zecall zeCommandListCreateImmediate_Tracing( ze_device_handle_t hDevice, const ze_command_queue_desc_t *altdesc, ze_command_list_handle_t *phCommandList) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnCreateImmediate, hDevice, altdesc, phCommandList); ze_command_list_create_immediate_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.paltdesc = &altdesc; tracerParams.pphCommandList = &phCommandList; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListCreateImmediateCb_t, CommandList, pfnCreateImmediateCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnCreateImmediate, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.paltdesc, *tracerParams.pphCommandList); } __zedllexport ze_result_t __zecall zeCommandListDestroy_Tracing(ze_command_list_handle_t hCommandList) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnDestroy, hCommandList); ze_command_list_destroy_params_t tracerParams; tracerParams.phCommandList = &hCommandList; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListDestroyCb_t, CommandList, pfnDestroyCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnDestroy, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandList); } __zedllexport ze_result_t __zecall zeCommandListClose_Tracing(ze_command_list_handle_t hCommandList) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnClose, hCommandList); ze_command_list_close_params_t tracerParams; tracerParams.phCommandList = &hCommandList; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListCloseCb_t, CommandList, pfnCloseCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnClose, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandList); } __zedllexport ze_result_t __zecall zeCommandListReset_Tracing(ze_command_list_handle_t hCommandList) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnReset, hCommandList); ze_command_list_reset_params_t tracerParams; tracerParams.phCommandList = &hCommandList; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListResetCb_t, CommandList, pfnResetCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnReset, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandList); } compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_cmdlist_imp.h000066400000000000000000000017761363734646600300110ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { __zedllexport ze_result_t __zecall zeCommandListCreate_Tracing(ze_device_handle_t hDevice, const ze_command_list_desc_t *desc, ze_command_list_handle_t *phCommandList); __zedllexport ze_result_t __zecall zeCommandListCreateImmediate_Tracing(ze_device_handle_t hDevice, const ze_command_queue_desc_t *altdesc, ze_command_list_handle_t *phCommandList); __zedllexport ze_result_t __zecall zeCommandListDestroy_Tracing(ze_command_list_handle_t hCommandList); __zedllexport ze_result_t __zecall zeCommandListClose_Tracing(ze_command_list_handle_t hCommandList); __zedllexport ze_result_t __zecall zeCommandListReset_Tracing(ze_command_list_handle_t hCommandList); __zedllexport ze_result_t __zecall zeCommandListResetParameters_Tracing(ze_command_list_handle_t hCommandList); } compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_cmdqueue_imp.cpp000066400000000000000000000122751363734646600305110ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/tracing/tracing_imp.h" __zedllexport ze_result_t __zecall zeCommandQueueCreate_Tracing(ze_device_handle_t hDevice, const ze_command_queue_desc_t *desc, ze_command_queue_handle_t *phCommandQueue) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandQueue.pfnCreate, hDevice, desc, phCommandQueue); ze_command_queue_create_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.pdesc = &desc; tracerParams.pphCommandQueue = &phCommandQueue; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandQueueCreateCb_t, CommandQueue, pfnCreateCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandQueue.pfnCreate, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.pdesc, *tracerParams.pphCommandQueue); } __zedllexport ze_result_t __zecall zeCommandQueueDestroy_Tracing(ze_command_queue_handle_t hCommandQueue) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandQueue.pfnDestroy, hCommandQueue); ze_command_queue_destroy_params_t tracerParams; tracerParams.phCommandQueue = &hCommandQueue; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandQueueDestroyCb_t, CommandQueue, pfnDestroyCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandQueue.pfnDestroy, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandQueue); } __zedllexport ze_result_t __zecall zeCommandQueueExecuteCommandLists_Tracing(ze_command_queue_handle_t hCommandQueue, uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists, ze_fence_handle_t hFence) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandQueue.pfnExecuteCommandLists, hCommandQueue, numCommandLists, phCommandLists, hFence); ze_command_queue_execute_command_lists_params_t tracerParams; tracerParams.phCommandQueue = &hCommandQueue; tracerParams.pnumCommandLists = &numCommandLists; tracerParams.pphCommandLists = &phCommandLists; tracerParams.phFence = &hFence; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandQueueExecuteCommandListsCb_t, CommandQueue, pfnExecuteCommandListsCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandQueue.pfnExecuteCommandLists, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandQueue, *tracerParams.pnumCommandLists, *tracerParams.pphCommandLists, *tracerParams.phFence); } __zedllexport ze_result_t __zecall zeCommandQueueSynchronize_Tracing(ze_command_queue_handle_t hCommandQueue, uint32_t timeout) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandQueue.pfnSynchronize, hCommandQueue, timeout); ze_command_queue_synchronize_params_t tracerParams; tracerParams.phCommandQueue = &hCommandQueue; tracerParams.ptimeout = &timeout; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandQueueSynchronizeCb_t, CommandQueue, pfnSynchronizeCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandQueue.pfnSynchronize, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandQueue, *tracerParams.ptimeout); } compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_cmdqueue_imp.h000066400000000000000000000016771363734646600301620ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { __zedllexport ze_result_t __zecall zeCommandQueueCreate_Tracing(ze_device_handle_t hDevice, const ze_command_queue_desc_t *desc, ze_command_queue_handle_t *phCommandQueue); __zedllexport ze_result_t __zecall zeCommandQueueDestroy_Tracing(ze_command_queue_handle_t hCommandQueue); __zedllexport ze_result_t __zecall zeCommandQueueExecuteCommandLists_Tracing(ze_command_queue_handle_t hCommandQueue, uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists, ze_fence_handle_t hFence); __zedllexport ze_result_t __zecall zeCommandQueueSynchronize_Tracing(ze_command_queue_handle_t hCommandQueue, uint32_t timeout); } compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_copy_imp.cpp000066400000000000000000000417001363734646600276460ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/tracing/tracing_imp.h" __zedllexport ze_result_t __zecall zeCommandListAppendMemoryCopy_Tracing(ze_command_list_handle_t hCommandList, void *dstptr, const void *srcptr, size_t size, ze_event_handle_t hEvent) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryCopy, hCommandList, dstptr, srcptr, size, hEvent); ze_command_list_append_memory_copy_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.pdstptr = &dstptr; tracerParams.psrcptr = &srcptr; tracerParams.psize = &size; tracerParams.phEvent = &hEvent; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnCommandListAppendMemoryCopyCb_t, CommandList, pfnAppendMemoryCopyCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryCopy, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.pdstptr, *tracerParams.psrcptr, *tracerParams.psize, *tracerParams.phEvent); } __zedllexport ze_result_t __zecall zeCommandListAppendMemoryFill_Tracing(ze_command_list_handle_t hCommandList, void *ptr, const void *pattern, size_t patternSize, size_t size, ze_event_handle_t hEvent) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryFill, hCommandList, ptr, pattern, patternSize, size, hEvent); ze_command_list_append_memory_fill_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.pptr = &ptr; tracerParams.ppattern = &pattern; tracerParams.ppattern_size = &patternSize; tracerParams.psize = &size; tracerParams.phEvent = &hEvent; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnCommandListAppendMemoryFillCb_t, CommandList, pfnAppendMemoryFillCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryFill, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.pptr, *tracerParams.ppattern, *tracerParams.ppattern_size, *tracerParams.psize, *tracerParams.phEvent); } __zedllexport ze_result_t __zecall zeCommandListAppendMemoryCopyRegion_Tracing(ze_command_list_handle_t hCommandList, void *dstptr, const ze_copy_region_t *dstRegion, uint32_t dstPitch, uint32_t dstSlicePitch, const void *srcptr, const ze_copy_region_t *srcRegion, uint32_t srcPitch, uint32_t srcSlicePitch, ze_event_handle_t hEvent) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryCopyRegion, hCommandList, dstptr, dstRegion, dstPitch, dstSlicePitch, srcptr, srcRegion, srcPitch, srcSlicePitch, hEvent); ze_command_list_append_memory_copy_region_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.pdstptr = &dstptr; tracerParams.pdstRegion = &dstRegion; tracerParams.pdstPitch = &dstPitch; tracerParams.pdstSlicePitch = &dstSlicePitch; tracerParams.psrcptr = &srcptr; tracerParams.psrcRegion = &srcRegion; tracerParams.psrcPitch = &srcPitch; tracerParams.psrcSlicePitch = &srcSlicePitch; tracerParams.phEvent = &hEvent; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnCommandListAppendMemoryCopyRegionCb_t, CommandList, pfnAppendMemoryCopyRegionCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryCopyRegion, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.pdstptr, *tracerParams.pdstRegion, *tracerParams.pdstPitch, *tracerParams.pdstSlicePitch, *tracerParams.psrcptr, *tracerParams.psrcRegion, *tracerParams.psrcPitch, *tracerParams.psrcSlicePitch, *tracerParams.phEvent); } __zedllexport ze_result_t __zecall zeCommandListAppendImageCopy_Tracing(ze_command_list_handle_t hCommandList, ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, ze_event_handle_t hEvent) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopy, hCommandList, hDstImage, hSrcImage, hEvent); ze_command_list_append_image_copy_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.phDstImage = &hDstImage; tracerParams.phSrcImage = &hSrcImage; tracerParams.phEvent = &hEvent; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnCommandListAppendImageCopyCb_t, CommandList, pfnAppendImageCopyCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopy, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.phDstImage, *tracerParams.phSrcImage, *tracerParams.phEvent); } __zedllexport ze_result_t __zecall zeCommandListAppendImageCopyRegion_Tracing( ze_command_list_handle_t hCommandList, ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, const ze_image_region_t *pDstRegion, const ze_image_region_t *pSrcRegion, ze_event_handle_t hEvent) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopyRegion, hCommandList, hDstImage, hSrcImage, pDstRegion, pSrcRegion, hEvent); ze_command_list_append_image_copy_region_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.phDstImage = &hDstImage; tracerParams.phSrcImage = &hSrcImage; tracerParams.ppDstRegion = &pDstRegion; tracerParams.ppSrcRegion = &pSrcRegion; tracerParams.phEvent = &hEvent; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnCommandListAppendImageCopyRegionCb_t, CommandList, pfnAppendImageCopyRegionCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopyRegion, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.phDstImage, *tracerParams.phSrcImage, *tracerParams.ppDstRegion, *tracerParams.ppSrcRegion, *tracerParams.phEvent); } __zedllexport ze_result_t __zecall zeCommandListAppendImageCopyToMemory_Tracing(ze_command_list_handle_t hCommandList, void *dstptr, ze_image_handle_t hSrcImage, const ze_image_region_t *pSrcRegion, ze_event_handle_t hEvent) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopyToMemory, hCommandList, dstptr, hSrcImage, pSrcRegion, hEvent); ze_command_list_append_image_copy_to_memory_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.pdstptr = &dstptr; tracerParams.phSrcImage = &hSrcImage; tracerParams.ppSrcRegion = &pSrcRegion; tracerParams.phEvent = &hEvent; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnCommandListAppendImageCopyToMemoryCb_t, CommandList, pfnAppendImageCopyToMemoryCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopyToMemory, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.pdstptr, *tracerParams.phSrcImage, *tracerParams.ppSrcRegion, *tracerParams.phEvent); } __zedllexport ze_result_t __zecall zeCommandListAppendImageCopyFromMemory_Tracing(ze_command_list_handle_t hCommandList, ze_image_handle_t hDstImage, const void *srcptr, const ze_image_region_t *pDstRegion, ze_event_handle_t hEvent) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopyFromMemory, hCommandList, hDstImage, srcptr, pDstRegion, hEvent); ze_command_list_append_image_copy_from_memory_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.phDstImage = &hDstImage; tracerParams.psrcptr = &srcptr; tracerParams.ppDstRegion = &pDstRegion; tracerParams.phEvent = &hEvent; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnCommandListAppendImageCopyFromMemoryCb_t, CommandList, pfnAppendImageCopyFromMemoryCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopyFromMemory, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.phDstImage, *tracerParams.psrcptr, *tracerParams.ppDstRegion, *tracerParams.phEvent); } __zedllexport ze_result_t __zecall zeCommandListAppendMemoryPrefetch_Tracing(ze_command_list_handle_t hCommandList, const void *ptr, size_t size) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryPrefetch, hCommandList, ptr, size); ze_command_list_append_memory_prefetch_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.pptr = &ptr; tracerParams.psize = &size; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnCommandListAppendMemoryPrefetchCb_t, CommandList, pfnAppendMemoryPrefetchCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryPrefetch, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.pptr, *tracerParams.psize); } __zedllexport ze_result_t __zecall zeCommandListAppendMemAdvise_Tracing(ze_command_list_handle_t hCommandList, ze_device_handle_t hDevice, const void *ptr, size_t size, ze_memory_advice_t advice) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemAdvise, hCommandList, hDevice, ptr, size, advice); ze_command_list_append_mem_advise_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.phDevice = &hDevice; tracerParams.pptr = &ptr; tracerParams.psize = &size; tracerParams.padvice = &advice; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnCommandListAppendMemAdviseCb_t, CommandList, pfnAppendMemAdviseCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemAdvise, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.phDevice, *tracerParams.pptr, *tracerParams.psize, *tracerParams.padvice); }compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_copy_imp.h000066400000000000000000000073411363734646600273160ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { __zedllexport ze_result_t __zecall zeCommandListAppendMemoryCopy_Tracing(ze_command_list_handle_t hCommandList, void *dstptr, const void *srcptr, size_t size, ze_event_handle_t hEvent); __zedllexport ze_result_t __zecall zeCommandListAppendMemoryFill_Tracing(ze_command_list_handle_t hCommandList, void *ptr, const void *pattern, size_t patternSize, size_t size, ze_event_handle_t hEvent); __zedllexport ze_result_t __zecall zeCommandListAppendMemoryCopyRegion_Tracing(ze_command_list_handle_t hCommandList, void *dstptr, const ze_copy_region_t *dstRegion, uint32_t dstPitch, uint32_t dstSlicePitch, const void *srcptr, const ze_copy_region_t *srcRegion, uint32_t srcPitch, uint32_t srcSlicePitch, ze_event_handle_t hEvent); __zedllexport ze_result_t __zecall zeCommandListAppendImageCopy_Tracing(ze_command_list_handle_t hCommandList, ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, ze_event_handle_t hEvent); __zedllexport ze_result_t __zecall zeCommandListAppendImageCopyRegion_Tracing(ze_command_list_handle_t hCommandList, ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, const ze_image_region_t *pDstRegion, const ze_image_region_t *pSrcRegion, ze_event_handle_t hEvent); __zedllexport ze_result_t __zecall zeCommandListAppendImageCopyToMemory_Tracing(ze_command_list_handle_t hCommandList, void *dstptr, ze_image_handle_t hSrcImage, const ze_image_region_t *pSrcRegion, ze_event_handle_t hEvent); __zedllexport ze_result_t __zecall zeCommandListAppendImageCopyFromMemory_Tracing(ze_command_list_handle_t hCommandList, ze_image_handle_t hDstImage, const void *srcptr, const ze_image_region_t *pDstRegion, ze_event_handle_t hEvent); __zedllexport ze_result_t __zecall zeCommandListAppendMemoryPrefetch_Tracing(ze_command_list_handle_t hCommandList, const void *ptr, size_t size); __zedllexport ze_result_t __zecall zeCommandListAppendMemAdvise_Tracing(ze_command_list_handle_t hCommandList, ze_device_handle_t hDevice, const void *ptr, size_t size, ze_memory_advice_t advice); } // extern "C" compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_device_imp.cpp000066400000000000000000000403671363734646600301430ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/tracing/tracing_imp.h" __zedllexport ze_result_t __zecall zeDeviceGet_Tracing(ze_driver_handle_t hDriver, uint32_t *pCount, ze_device_handle_t *phDevices) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnGet, hDriver, pCount, phDevices); ze_device_get_params_t tracerParams; tracerParams.phDriver = &hDriver; tracerParams.ppCount = &pCount; tracerParams.pphDevices = &phDevices; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnDeviceGetCb_t, Device, pfnGetCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnGet, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phDriver, *tracerParams.ppCount, *tracerParams.pphDevices); } __zedllexport ze_result_t __zecall zeDeviceGetProperties_Tracing(ze_device_handle_t hDevice, ze_device_properties_t *pDeviceProperties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnGetProperties, hDevice, pDeviceProperties); ze_device_get_properties_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.ppDeviceProperties = &pDeviceProperties; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnDeviceGetPropertiesCb_t, Device, pfnGetPropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnGetProperties, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.ppDeviceProperties); } __zedllexport ze_result_t __zecall zeDeviceGetComputeProperties_Tracing(ze_device_handle_t hDevice, ze_device_compute_properties_t *pComputeProperties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnGetComputeProperties, hDevice, pComputeProperties); ze_device_get_compute_properties_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.ppComputeProperties = &pComputeProperties; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnDeviceGetComputePropertiesCb_t, Device, pfnGetComputePropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnGetComputeProperties, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.ppComputeProperties); } __zedllexport ze_result_t __zecall zeDeviceGetMemoryProperties_Tracing(ze_device_handle_t hDevice, uint32_t *pCount, ze_device_memory_properties_t *pMemProperties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnGetMemoryProperties, hDevice, pCount, pMemProperties); ze_device_get_memory_properties_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.ppCount = &pCount; tracerParams.ppMemProperties = &pMemProperties; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnDeviceGetMemoryPropertiesCb_t, Device, pfnGetMemoryPropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnGetMemoryProperties, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.ppCount, *tracerParams.ppMemProperties); } __zedllexport ze_result_t __zecall zeDeviceGetCacheProperties_Tracing(ze_device_handle_t hDevice, ze_device_cache_properties_t *pCacheProperties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnGetCacheProperties, hDevice, pCacheProperties); ze_device_get_cache_properties_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.ppCacheProperties = &pCacheProperties; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnDeviceGetCachePropertiesCb_t, Device, pfnGetCachePropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnGetCacheProperties, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.ppCacheProperties); } __zedllexport ze_result_t __zecall zeDeviceGetImageProperties_Tracing(ze_device_handle_t hDevice, ze_device_image_properties_t *pImageProperties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnGetImageProperties, hDevice, pImageProperties); ze_device_get_image_properties_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.ppImageProperties = &pImageProperties; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnDeviceGetImagePropertiesCb_t, Device, pfnGetImagePropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnGetImageProperties, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.ppImageProperties); } __zedllexport ze_result_t __zecall zeDeviceGetSubDevices_Tracing(ze_device_handle_t hDevice, uint32_t *pCount, ze_device_handle_t *phSubdevices) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnGetSubDevices, hDevice, pCount, phSubdevices); ze_device_get_sub_devices_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.ppCount = &pCount; tracerParams.pphSubdevices = &phSubdevices; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnDeviceGetSubDevicesCb_t, Device, pfnGetSubDevicesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnGetSubDevices, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.ppCount, *tracerParams.pphSubdevices); } __zedllexport ze_result_t __zecall zeDeviceGetP2PProperties_Tracing(ze_device_handle_t hDevice, ze_device_handle_t hPeerDevice, ze_device_p2p_properties_t *pP2PProperties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnGetP2PProperties, hDevice, hPeerDevice, pP2PProperties); ze_device_get_p2_p_properties_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.phPeerDevice = &hPeerDevice; tracerParams.ppP2PProperties = &pP2PProperties; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnDeviceGetP2PPropertiesCb_t, Device, pfnGetP2PPropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnGetP2PProperties, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.phPeerDevice, *tracerParams.ppP2PProperties); } __zedllexport ze_result_t __zecall zeDeviceCanAccessPeer_Tracing(ze_device_handle_t hDevice, ze_device_handle_t hPeerDevice, ze_bool_t *value) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnCanAccessPeer, hDevice, hPeerDevice, value); ze_device_can_access_peer_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.phPeerDevice = &hPeerDevice; tracerParams.pvalue = &value; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnDeviceCanAccessPeerCb_t, Device, pfnCanAccessPeerCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnCanAccessPeer, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.phPeerDevice, *tracerParams.pvalue); } __zedllexport ze_result_t __zecall zeKernelSetIntermediateCacheConfig_Tracing(ze_kernel_handle_t hKernel, ze_cache_config_t cacheConfig) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Kernel.pfnSetIntermediateCacheConfig, hKernel, cacheConfig); ze_kernel_set_intermediate_cache_config_params_t tracerParams; tracerParams.phKernel = &hKernel; tracerParams.pCacheConfig = &cacheConfig; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnKernelSetIntermediateCacheConfigCb_t, Kernel, pfnSetIntermediateCacheConfigCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Kernel.pfnSetIntermediateCacheConfig, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phKernel, *tracerParams.pCacheConfig); } __zedllexport ze_result_t __zecall zeDeviceSetLastLevelCacheConfig_Tracing(ze_device_handle_t hDevice, ze_cache_config_t cacheConfig) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnSetLastLevelCacheConfig, hDevice, cacheConfig); ze_device_set_last_level_cache_config_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.pCacheConfig = &cacheConfig; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnDeviceSetLastLevelCacheConfigCb_t, Device, pfnSetLastLevelCacheConfigCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnSetLastLevelCacheConfig, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.pCacheConfig); } __zedllexport ze_result_t __zecall zeDeviceGetKernelProperties_Tracing(ze_device_handle_t hDevice, ze_device_kernel_properties_t *pKernelProperties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnGetKernelProperties, hDevice, pKernelProperties); ze_device_get_kernel_properties_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.ppKernelProperties = &pKernelProperties; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnDeviceGetKernelPropertiesCb_t, Device, pfnGetKernelPropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnGetKernelProperties, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.ppKernelProperties); } __zedllexport ze_result_t __zecall zeDeviceGetMemoryAccessProperties_Tracing(ze_device_handle_t hDevice, ze_device_memory_access_properties_t *pMemAccessProperties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnGetMemoryAccessProperties, hDevice, pMemAccessProperties); ze_device_get_memory_access_properties_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.ppMemAccessProperties = &pMemAccessProperties; L0::APITracerCallbackDataImp api_callbackData; ZE_GEN_PER_API_CALLBACK_STATE(api_callbackData, ze_pfnDeviceGetMemoryAccessPropertiesCb_t, Device, pfnGetMemoryAccessPropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnGetMemoryAccessProperties, &tracerParams, api_callbackData.apiOrdinal, api_callbackData.prologCallbacks, api_callbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.ppMemAccessProperties); } compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_device_imp.h000066400000000000000000000051471363734646600276050ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { __zedllexport ze_result_t __zecall zeDeviceGet_Tracing(ze_driver_handle_t hDriver, uint32_t *pCount, ze_device_handle_t *phDevices); __zedllexport ze_result_t __zecall zeDeviceGetProperties_Tracing(ze_device_handle_t hDevice, ze_device_properties_t *pDeviceProperties); __zedllexport ze_result_t __zecall zeDeviceGetComputeProperties_Tracing(ze_device_handle_t hDevice, ze_device_compute_properties_t *pComputeProperties); __zedllexport ze_result_t __zecall zeDeviceGetMemoryProperties_Tracing(ze_device_handle_t hDevice, uint32_t *pCount, ze_device_memory_properties_t *pMemProperties); __zedllexport ze_result_t __zecall zeDeviceGetCacheProperties_Tracing(ze_device_handle_t hDevice, ze_device_cache_properties_t *pCacheProperties); __zedllexport ze_result_t __zecall zeDeviceGetImageProperties_Tracing(ze_device_handle_t hDevice, ze_device_image_properties_t *pImageProperties); __zedllexport ze_result_t __zecall zeDeviceGetSubDevices_Tracing(ze_device_handle_t hDevice, uint32_t *pCount, ze_device_handle_t *phSubdevices); __zedllexport ze_result_t __zecall zeDeviceGetP2PProperties_Tracing(ze_device_handle_t hDevice, ze_device_handle_t hPeerDevice, ze_device_p2p_properties_t *pP2PProperties); __zedllexport ze_result_t __zecall zeDeviceCanAccessPeer_Tracing(ze_device_handle_t hDevice, ze_device_handle_t hPeerDevice, ze_bool_t *value); __zedllexport ze_result_t __zecall zeKernelSetIntermediateCacheConfig_Tracing(ze_kernel_handle_t hKernel, ze_cache_config_t cacheConfig); __zedllexport ze_result_t __zecall zeDeviceSetLastLevelCacheConfig_Tracing(ze_device_handle_t hDevice, ze_cache_config_t cacheConfig); __zedllexport ze_result_t __zecall zeDeviceGetKernelProperties_Tracing(ze_device_handle_t hDevice, ze_device_kernel_properties_t *pKernelProperties); __zedllexport ze_result_t __zecall zeDeviceGetMemoryAccessProperties_Tracing(ze_device_handle_t hDevice, ze_device_memory_access_properties_t *pMemAccessProperties); } compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_driver_imp.cpp000066400000000000000000000134651363734646600301760ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/tracing/tracing_imp.h" __zedllexport ze_result_t __zecall zeDriverGet_Tracing(uint32_t *pCount, ze_driver_handle_t *phDrivers) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Driver.pfnGet, pCount, phDrivers); ze_driver_get_params_t tracerParams; tracerParams.ppCount = &pCount; tracerParams.pphDrivers = &phDrivers; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnDriverGetCb_t, Driver, pfnGetCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Driver.pfnGet, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.ppCount, *tracerParams.pphDrivers); } __zedllexport ze_result_t __zecall zeDriverGetProperties_Tracing(ze_driver_handle_t hDriver, ze_driver_properties_t *properties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Driver.pfnGetProperties, hDriver, properties); ze_driver_get_properties_params_t tracerParams; tracerParams.phDriver = &hDriver; tracerParams.ppDriverProperties = &properties; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnDriverGetPropertiesCb_t, Driver, pfnGetPropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Driver.pfnGetProperties, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDriver, *tracerParams.ppDriverProperties); } __zedllexport ze_result_t __zecall zeDriverGetApiVersion_Tracing(ze_driver_handle_t hDrivers, ze_api_version_t *version) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Driver.pfnGetApiVersion, hDrivers, version); ze_driver_get_api_version_params_t tracerParams; tracerParams.phDriver = &hDrivers; tracerParams.pversion = &version; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnDriverGetApiVersionCb_t, Driver, pfnGetApiVersionCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Driver.pfnGetApiVersion, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDriver, *tracerParams.pversion); } __zedllexport ze_result_t __zecall zeDriverGetIPCProperties_Tracing(ze_driver_handle_t hDriver, ze_driver_ipc_properties_t *pIPCProperties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Driver.pfnGetIPCProperties, hDriver, pIPCProperties); ze_driver_get_ipc_properties_params_t tracerParams; tracerParams.phDriver = &hDriver; tracerParams.ppIPCProperties = &pIPCProperties; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnDriverGetIPCPropertiesCb_t, Driver, pfnGetIPCPropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Driver.pfnGetIPCProperties, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDriver, *tracerParams.ppIPCProperties); } __zedllexport ze_result_t __zecall zeDriverGetExtensionFunctionAddress_Tracing(ze_driver_handle_t hDriver, const char *pFuncName, void **pfunc) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Driver.pfnGetExtensionFunctionAddress, hDriver, pFuncName, pfunc); ze_driver_get_extension_function_address_params_t tracerParams; tracerParams.phDriver = &hDriver; tracerParams.ppFuncName = &pFuncName; tracerParams.ppfunc = &pfunc; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnDriverGetExtensionFunctionAddressCb_t, Driver, pfnGetExtensionFunctionAddressCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Driver.pfnGetExtensionFunctionAddress, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDriver, *tracerParams.ppFuncName, *tracerParams.ppfunc); } compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_driver_imp.h000066400000000000000000000017111363734646600276320ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { __zedllexport ze_result_t __zecall zeDriverGet_Tracing(uint32_t *pCount, ze_driver_handle_t *phDrivers); __zedllexport ze_result_t __zecall zeDriverGetProperties_Tracing(ze_driver_handle_t hDriver, ze_driver_properties_t *properties); __zedllexport ze_result_t __zecall zeDriverGetApiVersion_Tracing(ze_driver_handle_t hDrivers, ze_api_version_t *version); __zedllexport ze_result_t __zecall zeDriverGetIPCProperties_Tracing(ze_driver_handle_t hDriver, ze_driver_ipc_properties_t *pIPCProperties); __zedllexport ze_result_t __zecall zeDriverGetExtensionFunctionAddress_Tracing(ze_driver_handle_t hDriver, const char *pFuncName, void **pfunc); } compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_event_imp.cpp000066400000000000000000000414231363734646600300170ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/tracing/tracing_imp.h" __zedllexport ze_result_t __zecall zeEventPoolCreate_Tracing(ze_driver_handle_t hDriver, const ze_event_pool_desc_t *desc, uint32_t numDevices, ze_device_handle_t *phDevices, ze_event_pool_handle_t *phEventPool) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.EventPool.pfnCreate, hDriver, desc, numDevices, phDevices, phEventPool); ze_event_pool_create_params_t tracerParams; tracerParams.phDriver = &hDriver; tracerParams.pdesc = &desc; tracerParams.pnumDevices = &numDevices; tracerParams.pphDevices = &phDevices; tracerParams.pphEventPool = &phEventPool; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnEventPoolCreateCb_t, EventPool, pfnCreateCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.EventPool.pfnCreate, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDriver, *tracerParams.pdesc, *tracerParams.pnumDevices, *tracerParams.pphDevices, *tracerParams.pphEventPool); } __zedllexport ze_result_t __zecall zeEventPoolDestroy_Tracing(ze_event_pool_handle_t hEventPool) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.EventPool.pfnDestroy, hEventPool); ze_event_pool_destroy_params_t tracerParams; tracerParams.phEventPool = &hEventPool; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnEventPoolDestroyCb_t, EventPool, pfnDestroyCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.EventPool.pfnDestroy, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phEventPool); } __zedllexport ze_result_t __zecall zeEventCreate_Tracing(ze_event_pool_handle_t hEventPool, const ze_event_desc_t *desc, ze_event_handle_t *phEvent) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Event.pfnCreate, hEventPool, desc, phEvent); ze_event_create_params_t tracerParams; tracerParams.phEventPool = &hEventPool; tracerParams.pdesc = &desc; tracerParams.pphEvent = &phEvent; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnEventCreateCb_t, Event, pfnCreateCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Event.pfnCreate, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phEventPool, *tracerParams.pdesc, *tracerParams.pphEvent); } __zedllexport ze_result_t __zecall zeEventDestroy_Tracing(ze_event_handle_t hEvent) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Event.pfnDestroy, hEvent); ze_event_destroy_params_t tracerParams; tracerParams.phEvent = &hEvent; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnEventDestroyCb_t, Event, pfnDestroyCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Event.pfnDestroy, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phEvent); } __zedllexport ze_result_t __zecall zeEventPoolGetIpcHandle_Tracing(ze_event_pool_handle_t hEventPool, ze_ipc_event_pool_handle_t *phIpc) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.EventPool.pfnGetIpcHandle, hEventPool, phIpc); ze_event_pool_get_ipc_handle_params_t tracerParams; tracerParams.phEventPool = &hEventPool; tracerParams.pphIpc = &phIpc; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnEventPoolGetIpcHandleCb_t, EventPool, pfnGetIpcHandleCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.EventPool.pfnGetIpcHandle, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phEventPool, *tracerParams.pphIpc); } __zedllexport ze_result_t __zecall zeEventPoolOpenIpcHandle_Tracing(ze_driver_handle_t hDriver, ze_ipc_event_pool_handle_t hIpc, ze_event_pool_handle_t *phEventPool) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.EventPool.pfnOpenIpcHandle, hDriver, hIpc, phEventPool); ze_event_pool_open_ipc_handle_params_t tracerParams; tracerParams.phDriver = &hDriver; tracerParams.phIpc = &hIpc; tracerParams.pphEventPool = &phEventPool; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnEventPoolOpenIpcHandleCb_t, EventPool, pfnOpenIpcHandleCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.EventPool.pfnOpenIpcHandle, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDriver, *tracerParams.phIpc, *tracerParams.pphEventPool); } __zedllexport ze_result_t __zecall zeEventPoolCloseIpcHandle_Tracing(ze_event_pool_handle_t hEventPool) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.EventPool.pfnCloseIpcHandle, hEventPool); ze_event_pool_close_ipc_handle_params_t tracerParams; tracerParams.phEventPool = &hEventPool; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnEventPoolCloseIpcHandleCb_t, EventPool, pfnCloseIpcHandleCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.EventPool.pfnCloseIpcHandle, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phEventPool); } __zedllexport ze_result_t __zecall zeCommandListAppendSignalEvent_Tracing(ze_command_list_handle_t hCommandList, ze_event_handle_t hEvent) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendSignalEvent, hCommandList, hEvent); ze_command_list_append_signal_event_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.phEvent = &hEvent; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListAppendSignalEventCb_t, CommandList, pfnAppendSignalEventCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendSignalEvent, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.phEvent); } __zedllexport ze_result_t __zecall zeCommandListAppendWaitOnEvents_Tracing(ze_command_list_handle_t hCommandList, uint32_t numEvents, ze_event_handle_t *phEvents) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendWaitOnEvents, hCommandList, numEvents, phEvents); ze_command_list_append_wait_on_events_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.pnumEvents = &numEvents; tracerParams.pphEvents = &phEvents; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListAppendWaitOnEventsCb_t, CommandList, pfnAppendWaitOnEventsCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendWaitOnEvents, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.pnumEvents, *tracerParams.pphEvents); } __zedllexport ze_result_t __zecall zeEventHostSignal_Tracing(ze_event_handle_t hEvent) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Event.pfnHostSignal, hEvent); ze_event_host_signal_params_t tracerParams; tracerParams.phEvent = &hEvent; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnEventHostSignalCb_t, Event, pfnHostSignalCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Event.pfnHostSignal, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phEvent); } __zedllexport ze_result_t __zecall zeEventHostSynchronize_Tracing(ze_event_handle_t hEvent, uint32_t timeout) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Event.pfnHostSynchronize, hEvent, timeout); ze_event_host_synchronize_params_t tracerParams; tracerParams.phEvent = &hEvent; tracerParams.ptimeout = &timeout; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnEventHostSynchronizeCb_t, Event, pfnHostSynchronizeCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Event.pfnHostSynchronize, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phEvent, *tracerParams.ptimeout); } __zedllexport ze_result_t __zecall zeEventQueryStatus_Tracing(ze_event_handle_t hEvent) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Event.pfnQueryStatus, hEvent); ze_event_query_status_params_t tracerParams; tracerParams.phEvent = &hEvent; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnEventQueryStatusCb_t, Event, pfnQueryStatusCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Event.pfnQueryStatus, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phEvent); } __zedllexport ze_result_t __zecall zeEventHostReset_Tracing(ze_event_handle_t hEvent) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Event.pfnHostReset, hEvent); ze_event_host_reset_params_t tracerParams; tracerParams.phEvent = &hEvent; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnEventHostResetCb_t, Event, pfnHostResetCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Event.pfnHostReset, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phEvent); } __zedllexport ze_result_t __zecall zeCommandListAppendEventReset_Tracing(ze_command_list_handle_t hCommandList, ze_event_handle_t hEvent) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendEventReset, hCommandList, hEvent); ze_command_list_append_event_reset_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.phEvent = &hEvent; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListAppendEventResetCb_t, CommandList, pfnAppendEventResetCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendEventReset, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.phEvent); } __zedllexport ze_result_t __zecall zeEventGetTimestamp_Tracing(ze_event_handle_t hEvent, ze_event_timestamp_type_t timestampType, void *dstptr) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Event.pfnGetTimestamp, hEvent, timestampType, dstptr); ze_event_get_timestamp_params_t tracerParams; tracerParams.phEvent = &hEvent; tracerParams.ptimestampType = ×tampType; tracerParams.pdstptr = &dstptr; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnEventGetTimestampCb_t, Event, pfnGetTimestampCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Event.pfnGetTimestamp, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phEvent, *tracerParams.ptimestampType, *tracerParams.pdstptr); } compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_event_imp.h000066400000000000000000000047601363734646600274670ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { __zedllexport ze_result_t __zecall zeEventPoolCreate_Tracing(ze_driver_handle_t hDriver, const ze_event_pool_desc_t *desc, uint32_t numDevices, ze_device_handle_t *phDevices, ze_event_pool_handle_t *phEventPool); __zedllexport ze_result_t __zecall zeEventPoolDestroy_Tracing(ze_event_pool_handle_t hEventPool); __zedllexport ze_result_t __zecall zeEventCreate_Tracing(ze_event_pool_handle_t hEventPool, const ze_event_desc_t *desc, ze_event_handle_t *phEvent); __zedllexport ze_result_t __zecall zeEventDestroy_Tracing(ze_event_handle_t hEvent); __zedllexport ze_result_t __zecall zeEventPoolGetIpcHandle_Tracing(ze_event_pool_handle_t hEventPool, ze_ipc_event_pool_handle_t *phIpc); __zedllexport ze_result_t __zecall zeEventPoolOpenIpcHandle_Tracing(ze_driver_handle_t hDriver, ze_ipc_event_pool_handle_t hIpc, ze_event_pool_handle_t *phEventPool); __zedllexport ze_result_t __zecall zeEventPoolCloseIpcHandle_Tracing(ze_event_pool_handle_t hEventPool); __zedllexport ze_result_t __zecall zeCommandListAppendSignalEvent_Tracing(ze_command_list_handle_t hCommandList, ze_event_handle_t hEvent); __zedllexport ze_result_t __zecall zeCommandListAppendWaitOnEvents_Tracing(ze_command_list_handle_t hCommandList, uint32_t numEvents, ze_event_handle_t *phEvents); __zedllexport ze_result_t __zecall zeEventHostSignal_Tracing(ze_event_handle_t hEvent); __zedllexport ze_result_t __zecall zeEventHostSynchronize_Tracing(ze_event_handle_t hEvent, uint32_t timeout); __zedllexport ze_result_t __zecall zeEventQueryStatus_Tracing(ze_event_handle_t hEvent); __zedllexport ze_result_t __zecall zeEventHostReset_Tracing(ze_event_handle_t hEvent); __zedllexport ze_result_t __zecall zeCommandListAppendEventReset_Tracing(ze_command_list_handle_t hCommandList, ze_event_handle_t hEvent); __zedllexport ze_result_t __zecall zeEventGetTimestamp_Tracing(ze_event_handle_t hEvent, ze_event_timestamp_type_t timestampType, void *dstptr); } compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_fence_imp.cpp000066400000000000000000000117271363734646600277620ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/tracing/tracing_imp.h" __zedllexport ze_result_t __zecall zeFenceCreate_Tracing(ze_command_queue_handle_t hCommandQueue, const ze_fence_desc_t *desc, ze_fence_handle_t *phFence) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Fence.pfnCreate, hCommandQueue, desc, phFence); ze_fence_create_params_t tracerParams; tracerParams.phCommandQueue = &hCommandQueue; tracerParams.pdesc = &desc; tracerParams.pphFence = &phFence; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnFenceCreateCb_t, Fence, pfnCreateCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Fence.pfnCreate, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandQueue, *tracerParams.pdesc, *tracerParams.pphFence); } __zedllexport ze_result_t __zecall zeFenceDestroy_Tracing(ze_fence_handle_t hFence) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Fence.pfnDestroy, hFence); ze_fence_destroy_params_t tracerParams; tracerParams.phFence = &hFence; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnFenceDestroyCb_t, Fence, pfnDestroyCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Fence.pfnDestroy, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phFence); } __zedllexport ze_result_t __zecall zeFenceHostSynchronize_Tracing(ze_fence_handle_t hFence, uint32_t timeout) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Fence.pfnHostSynchronize, hFence, timeout); ze_fence_host_synchronize_params_t tracerParams; tracerParams.phFence = &hFence; tracerParams.ptimeout = &timeout; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnFenceHostSynchronizeCb_t, Fence, pfnHostSynchronizeCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Fence.pfnHostSynchronize, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phFence, *tracerParams.ptimeout); } __zedllexport ze_result_t __zecall zeFenceQueryStatus_Tracing(ze_fence_handle_t hFence) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Fence.pfnQueryStatus, hFence); ze_fence_query_status_params_t tracerParams; tracerParams.phFence = &hFence; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnFenceQueryStatusCb_t, Fence, pfnQueryStatusCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Fence.pfnQueryStatus, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phFence); } __zedllexport ze_result_t __zecall zeFenceReset_Tracing(ze_fence_handle_t hFence) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Fence.pfnReset, hFence); ze_fence_reset_params_t tracerParams; tracerParams.phFence = &hFence; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnFenceResetCb_t, Fence, pfnResetCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Fence.pfnReset, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phFence); } compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_fence_imp.h000066400000000000000000000013171363734646600274210ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { __zedllexport ze_result_t __zecall zeFenceCreate_Tracing(ze_command_queue_handle_t hCommandQueue, const ze_fence_desc_t *desc, ze_fence_handle_t *phFence); __zedllexport ze_result_t __zecall zeFenceDestroy_Tracing(ze_fence_handle_t hFence); __zedllexport ze_result_t __zecall zeFenceHostSynchronize_Tracing(ze_fence_handle_t hFence, uint32_t timeout); __zedllexport ze_result_t __zecall zeFenceQueryStatus_Tracing(ze_fence_handle_t hFence); __zedllexport ze_result_t __zecall zeFenceReset_Tracing(ze_fence_handle_t hFence); } compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_global_imp.cpp000066400000000000000000000016661363734646600301430ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/tracing/tracing_imp.h" __zedllexport ze_result_t __zecall zeInit_Tracing(ze_init_flag_t flags) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Global.pfnInit, flags); ze_init_params_t tracerParams; tracerParams.pflags = &flags; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnInitCb_t, Global, pfnInitCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Global.pfnInit, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.pflags); } compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_global_imp.h000066400000000000000000000002751363734646600276030ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { __zedllexport ze_result_t __zecall zeInit_Tracing(ze_init_flag_t flags); } compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_image_imp.cpp000066400000000000000000000067361363734646600277700ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/tracing/tracing_imp.h" __zedllexport ze_result_t __zecall zeImageGetProperties_Tracing(ze_device_handle_t hDevice, const ze_image_desc_t *desc, ze_image_properties_t *pImageProperties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Image.pfnGetProperties, hDevice, desc, pImageProperties); ze_image_get_properties_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.pdesc = &desc; tracerParams.ppImageProperties = &pImageProperties; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnImageGetPropertiesCb_t, Image, pfnGetPropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Image.pfnGetProperties, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.pdesc, *tracerParams.ppImageProperties); } __zedllexport ze_result_t __zecall zeImageCreate_Tracing(ze_device_handle_t hDevice, const ze_image_desc_t *desc, ze_image_handle_t *phImage) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Image.pfnCreate, hDevice, desc, phImage); ze_image_create_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.pdesc = &desc; tracerParams.pphImage = &phImage; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnImageCreateCb_t, Image, pfnCreateCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Image.pfnCreate, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.pdesc, *tracerParams.pphImage); } __zedllexport ze_result_t __zecall zeImageDestroy_Tracing(ze_image_handle_t hImage) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Image.pfnDestroy, hImage); ze_image_destroy_params_t tracerParams; tracerParams.phImage = &hImage; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnImageDestroyCb_t, Image, pfnDestroyCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Image.pfnDestroy, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phImage); } compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_image_imp.h000066400000000000000000000011431363734646600274200ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { __zedllexport ze_result_t __zecall zeImageGetProperties_Tracing(ze_device_handle_t hDevice, const ze_image_desc_t *desc, ze_image_properties_t *pImageProperties); __zedllexport ze_result_t __zecall zeImageCreate_Tracing(ze_device_handle_t hDevice, const ze_image_desc_t *desc, ze_image_handle_t *phImage); __zedllexport ze_result_t __zecall zeImageDestroy_Tracing(ze_image_handle_t hImage); } compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_imp.cpp000066400000000000000000000273551363734646600266260ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/tracing/tracing_imp.h" #include "shared/source/helpers/debug_helpers.h" namespace L0 { thread_local ze_bool_t tracingInProgress = 0; bool tracingIsEnabled = false; struct APITracerContextImp GLOBAL_APITracerContextImp; struct APITracerContextImp *PGLOBAL_APITracerContextImp = &GLOBAL_APITracerContextImp; APITracer *APITracer::create() { APITracerImp *tracer = new APITracerImp; tracer->tracingState = disabledState; tracer->tracerFunctions = {}; UNRECOVERABLE_IF(tracer == nullptr); return tracer; } ze_result_t createAPITracer(zet_driver_handle_t hDriver, const zet_tracer_desc_t *desc, zet_tracer_handle_t *phTracer) { if (!PGLOBAL_APITracerContextImp->isTracingEnabled()) { return ZE_RESULT_ERROR_UNINITIALIZED; } APITracerImp *tracer = static_cast(APITracer::create()); tracer->tracerFunctions.pUserData = desc->pUserData; *phTracer = tracer->toHandle(); return ZE_RESULT_SUCCESS; } ze_result_t APITracerImp::destroyTracer(zet_tracer_handle_t phTracer) { APITracerImp *tracer = static_cast(phTracer); ze_result_t result = PGLOBAL_APITracerContextImp->finalizeDisableImpTracingWait(tracer); if (result == ZE_RESULT_SUCCESS) { delete L0::APITracer::fromHandle(phTracer); } return result; } ze_result_t APITracerImp::setPrologues(zet_core_callbacks_t *pCoreCbs) { if (this->tracingState != disabledState) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } this->tracerFunctions.corePrologues = *pCoreCbs; return ZE_RESULT_SUCCESS; } ze_result_t APITracerImp::setEpilogues(zet_core_callbacks_t *pCoreCbs) { if (this->tracingState != disabledState) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } this->tracerFunctions.coreEpilogues = *pCoreCbs; return ZE_RESULT_SUCCESS; } ze_result_t APITracerImp::enableTracer(ze_bool_t enable) { return PGLOBAL_APITracerContextImp->enableTracingImp(this, enable); } static std::mutex perThreadTracerDataMutex; static std::list perThreadTracerDataList; void ThreadPrivateTracerData::allocatePerThreadPublicTracerData() { if (myThreadPublicTracerData == nullptr) { myThreadPublicTracerData = new per_thread_tracer_data_t; myThreadPublicTracerData->tracerArrayPointer.store(NULL, std::memory_order_relaxed); myThreadPublicTracerData->thread_id = std::this_thread::get_id(); std::lock_guard lock(perThreadTracerDataMutex); perThreadTracerDataList.push_back(myThreadPublicTracerData); } } void ThreadPrivateTracerData::freePerThreadPublicTracerData() { // // There is no need to hold a mutex when testing // my_thread_tracer_data is a thread_local object. // my_threadd_tracer_data for nullptr since it can only be done // within the current thread's context. // So there can be no other racing threads. // if (myThreadPublicTracerData != nullptr) { std::lock_guard lock(perThreadTracerDataMutex); perThreadTracerDataList.remove(myThreadPublicTracerData); delete myThreadPublicTracerData; myThreadPublicTracerData = nullptr; } } ThreadPrivateTracerData::ThreadPrivateTracerData() { myThreadPublicTracerData = nullptr; } ThreadPrivateTracerData::~ThreadPrivateTracerData() { freePerThreadPublicTracerData(); } thread_local ThreadPrivateTracerData myThreadPrivateTracerData; // // This thread_local allows for an optimisation of the test_for_tracer_array_references() // function. The optimization adds a test and branch, but it allows the common code path // to avoid TWO out of line function calls. // // One function call is to call the constructor for the thread_private_tracer_data class. // Note that this call is probably pretty heavy-weight, because it needs to be thread safe. // It MUST include a mutex. // // The second function call we avoid is the call to the thread_private_tracer_data class's // allocate memory member. It appears that at least with the Linux g++ compiler, // the "inline" annotation on a member function is accepted at compile time, but does not // change the code that is generated. // static thread_local bool myThreadPrivateTracerDataIsInitialized = false; void APITracerContextImp::apiTracingEnable(ze_init_flag_t flag) { if (driver_ddiTable.enableTracing) { tracingIsEnabled = true; } } void APITracerContextImp::enableTracing() { tracingIsEnabled = true; } void APITracerContextImp::disableTracing() { tracingIsEnabled = false; } bool APITracerContextImp::isTracingEnabled() { return tracingIsEnabled; } // // Walk the list of per-thread private data structures, testing // whether any of them reference this array. // // Return 1 if a reference is found. Otherwise return 0. // ze_bool_t APITracerContextImp::testForTracerArrayReferences(tracer_array_t *tracerArray) { std::list::iterator itr; for (itr = perThreadTracerDataList.begin(); itr != perThreadTracerDataList.end(); itr++) { if ((*itr)->tracerArrayPointer.load(std::memory_order_relaxed) == tracerArray) return 1; } return 0; } // // Walk the retiring_tracer_array_list, checking each member of the list for // references by per thread tracer array pointer. Delete and free // each tracer array that has no per-thread references. // // Return the number of entries on the retiring tracer array list. // size_t APITracerContextImp::testAndFreeRetiredTracers() { std::list::iterator itr = this->retiringTracerArrayList.begin(); while (itr != this->retiringTracerArrayList.end()) { tracer_array_t *retiringTracerArray = *itr; itr++; if (testForTracerArrayReferences(retiringTracerArray)) continue; this->retiringTracerArrayList.remove(retiringTracerArray); delete[] retiringTracerArray->tracerArrayEntries; delete retiringTracerArray; } return this->retiringTracerArrayList.size(); } int APITracerContextImp::updateTracerArrays() { tracer_array_t *newTracerArray; size_t newTracerArrayCount = this->enabledTracerImpList.size(); if (newTracerArrayCount != 0) { newTracerArray = new tracer_array_t; newTracerArray->tracerArrayCount = newTracerArrayCount; newTracerArray->tracerArrayEntries = new tracer_array_entry_t[newTracerArrayCount]; // // iterate over the list of enabled tracers, copying their entries into the // new tracer array // size_t i = 0; std::list::iterator itr; for (itr = enabledTracerImpList.begin(); itr != enabledTracerImpList.end(); itr++) { newTracerArray->tracerArrayEntries[i] = (*itr)->tracerFunctions; i++; } } else { newTracerArray = &emptyTracerArray; } // // active_tracer_array.load can use memory_order_relaxed here because // there is logically no transfer of other memory context between // threads in this case. // tracer_array_t *active_tracer_array_shadow = activeTracerArray.load(std::memory_order_relaxed); if (active_tracer_array_shadow != &emptyTracerArray) { retiringTracerArrayList.push_back(active_tracer_array_shadow); } // // This active_tracer_array.store must use memory_order_release. // This store DOES signal a logical transfer of tracer state information // from this thread to the tracing threads. // activeTracerArray.store(newTracerArray, std::memory_order_release); testAndFreeRetiredTracers(); return 0; } ze_result_t APITracerContextImp::enableTracingImp(struct APITracerImp *tracerImp, ze_bool_t enable) { std::lock_guard lock(traceTableMutex); ze_result_t result; switch (tracerImp->tracingState) { case disabledState: if (enable) { enabledTracerImpList.push_back(tracerImp); tracerImp->tracingState = enabledState; updateTracerArrays(); } result = ZE_RESULT_SUCCESS; break; case enabledState: if (!enable) { enabledTracerImpList.remove(tracerImp); tracerImp->tracingState = disabledWaitingState; updateTracerArrays(); } result = ZE_RESULT_SUCCESS; break; case disabledWaitingState: result = ZE_RESULT_ERROR_UNINITIALIZED; break; default: result = ZE_RESULT_ERROR_UNINITIALIZED; UNRECOVERABLE_IF(true); break; } return result; } // This is called by the destroy tracer method. // // This routine will return ZE_RESULT_SUCCESS // state if either it has never been enabled, // or if it has been enabled and then disabled. // // On ZE_RESULT_SUCESS, the destroy tracer method // can free the tracer's memory. // // ZE_RESULT_ERROR_UNINITIALIZED is returned // if the tracer has been enabled but not // disabled. The destroy tracer method // should NOT free this tracer's memory. // ze_result_t APITracerContextImp::finalizeDisableImpTracingWait(struct APITracerImp *tracerImp) { std::lock_guard lock(traceTableMutex); ze_result_t result; switch (tracerImp->tracingState) { case disabledState: result = ZE_RESULT_SUCCESS; break; case enabledState: result = ZE_RESULT_ERROR_UNINITIALIZED; break; case disabledWaitingState: while (testAndFreeRetiredTracers() != 0) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); } tracerImp->tracingState = disabledState; result = ZE_RESULT_SUCCESS; break; default: result = ZE_RESULT_ERROR_UNINITIALIZED; UNRECOVERABLE_IF(true); break; } return result; } // // For an explanation of this function and the reason for its while loop, // see the comments at the top of this file. // void *APITracerContextImp::getActiveTracersList() { tracer_array_t *stableTracerArray = NULL; // // This test and branch allows us to avoid TWO function calls. One call is for the // constructor for my_thread_private_tracer_data. The other is to avoid the function // call to allocate_per_thread_tracer_data(). // // Since my_thread_private_tracer_data_is_initialized and my_thread_private_tracer_data are // thread_local, there is no thread safety issue here. Each thread will find // my_thread_private_tracer_data_is_initialized to be "false" at most once. // if (!myThreadPrivateTracerDataIsInitialized) { myThreadPrivateTracerData.allocatePerThreadPublicTracerData(); myThreadPrivateTracerDataIsInitialized = true; } do { // // This read of active_tracer_array DOES logically signal a transfer // of tracer structure information from the threader enable/disable/destroy // thread to this tracing thread. So it must use memory_order_acquire // stableTracerArray = PGLOBAL_APITracerContextImp->activeTracerArray.load(std::memory_order_acquire); myThreadPrivateTracerData.myThreadPublicTracerData->tracerArrayPointer.store(stableTracerArray, std::memory_order_relaxed); // // This read of active_tracer_array does NOT transfer any information // that was not already transferred by the previous read within this loop. // So it can use memory_order_relaxed. // } while (stableTracerArray != PGLOBAL_APITracerContextImp->activeTracerArray.load(std::memory_order_relaxed)); return (void *)stableTracerArray; } void APITracerContextImp::releaseActivetracersList() { myThreadPrivateTracerData.myThreadPublicTracerData->tracerArrayPointer.store(NULL, std::memory_order_relaxed); } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_imp.h000066400000000000000000000224641363734646600262670ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "level_zero/tools/source/tracing/tracing.h" #include "level_zero/tools/source/tracing/tracing_barrier_imp.h" #include "level_zero/tools/source/tracing/tracing_cl_interop_imp.h" #include "level_zero/tools/source/tracing/tracing_cmdlist_imp.h" #include "level_zero/tools/source/tracing/tracing_cmdqueue_imp.h" #include "level_zero/tools/source/tracing/tracing_copy_imp.h" #include "level_zero/tools/source/tracing/tracing_device_imp.h" #include "level_zero/tools/source/tracing/tracing_driver_imp.h" #include "level_zero/tools/source/tracing/tracing_event_imp.h" #include "level_zero/tools/source/tracing/tracing_fence_imp.h" #include "level_zero/tools/source/tracing/tracing_global_imp.h" #include "level_zero/tools/source/tracing/tracing_image_imp.h" #include "level_zero/tools/source/tracing/tracing_memory_imp.h" #include "level_zero/tools/source/tracing/tracing_module_imp.h" #include "level_zero/tools/source/tracing/tracing_residency_imp.h" #include "level_zero/tools/source/tracing/tracing_sampler_imp.h" #include #include #include "ze_ddi_tables.h" #include #include #include #include #include #include extern ze_gpu_driver_dditable_t driver_ddiTable; namespace L0 { extern thread_local ze_bool_t tracingInProgress; extern struct APITracerContextImp *PGLOBAL_APITracerContextImp; typedef struct tracer_array_entry { zet_core_callbacks_t corePrologues; zet_core_callbacks_t coreEpilogues; zet_device_handle_t hDevice; void *pUserData; } tracer_array_entry_t; typedef struct tracerArray { size_t tracerArrayCount; tracer_array_entry_t *tracerArrayEntries; } tracer_array_t; typedef struct per_thread_public_tracer_data { std::atomic tracerArrayPointer; std::thread::id thread_id; } per_thread_tracer_data_t; class ThreadPrivateTracerData { public: per_thread_tracer_data_t *myThreadPublicTracerData; void allocatePerThreadPublicTracerData(); void freePerThreadPublicTracerData(); ThreadPrivateTracerData(); ~ThreadPrivateTracerData(); private: ThreadPrivateTracerData(const ThreadPrivateTracerData &); ThreadPrivateTracerData &operator=(const ThreadPrivateTracerData &); }; extern thread_local ThreadPrivateTracerData myThreadPrivateTracerData; typedef enum tracingState { disabledState, // tracing has never been enabled enabledState, // tracing is enabled. disabledWaitingState, // tracing has been disabled, but not waited for } tracingState_t; struct APITracerImp : APITracer { ze_result_t destroyTracer(zet_tracer_handle_t phTracer) override; ze_result_t setPrologues(zet_core_callbacks_t *pCoreCbs) override; ze_result_t setEpilogues(zet_core_callbacks_t *pCoreCbs) override; ze_result_t enableTracer(ze_bool_t enable) override; tracer_array_entry_t tracerFunctions; tracingState_t tracingState; private: }; struct APITracerContextImp : APITracerContext { public: APITracerContextImp() { activeTracerArray.store(&emptyTracerArray, std::memory_order_relaxed); }; ~APITracerContextImp() override {} static void apiTracingEnable(ze_init_flag_t flag); void *getActiveTracersList() override; void releaseActivetracersList() override; ze_result_t enableTracingImp(struct APITracerImp *newTracer, ze_bool_t enable); ze_result_t finalizeDisableImpTracingWait(struct APITracerImp *oldTracer); void enableTracing(); void disableTracing(); bool isTracingEnabled(); private: std::mutex traceTableMutex; tracer_array_t emptyTracerArray = {0, NULL}; std::atomic activeTracerArray; // // a list of tracer arrays that were once active, but // have been replaced by a new active array. These // once-active tracer arrays may continue for some time // to have references to them among the per-thread // tracer array pointers. // std::list retiringTracerArrayList; std::list enabledTracerImpList; ze_bool_t testForTracerArrayReferences(tracer_array_t *tracerArray); size_t testAndFreeRetiredTracers(); int updateTracerArrays(); }; template class APITracerCallbackStateImp { public: T current_api_callback; void *pUserData; }; template class APITracerCallbackDataImp { public: T apiOrdinal = {}; std::vector> prologCallbacks; std::vector> epilogCallbacks; }; #define ZE_HANDLE_TRACER_RECURSION(ze_api_ptr, ...) \ do { \ if (L0::tracingInProgress) { \ return ze_api_ptr(__VA_ARGS__); \ } \ L0::tracingInProgress = 1; \ } while (0) #define ZE_GEN_TRACER_ARRAY_ENTRY(callbackPtr, tracerArray, tracerArrayIndex, callbackType, callbackCategory, callbackFunction) \ do { \ callbackPtr = tracerArray->tracerArrayEntries[tracerArrayIndex].callbackType.callbackCategory.callbackFunction; \ } while (0) #define ZE_GEN_PER_API_CALLBACK_STATE(perApiCallbackData, tracerType, callbackCategory, callbackFunctionType) \ L0::tracer_array_t *currentTracerArray; \ currentTracerArray = (L0::tracer_array_t *)L0::PGLOBAL_APITracerContextImp->getActiveTracersList(); \ for (size_t i = 0; i < currentTracerArray->tracerArrayCount; i++) { \ tracerType prologueCallbackPtr; \ tracerType epilogue_callback_ptr; \ ZE_GEN_TRACER_ARRAY_ENTRY(prologueCallbackPtr, currentTracerArray, i, corePrologues, callbackCategory, callbackFunctionType); \ ZE_GEN_TRACER_ARRAY_ENTRY(epilogue_callback_ptr, currentTracerArray, i, coreEpilogues, callbackCategory, callbackFunctionType); \ \ L0::APITracerCallbackStateImp prologCallback; \ prologCallback.current_api_callback = prologueCallbackPtr; \ prologCallback.pUserData = currentTracerArray->tracerArrayEntries[i].pUserData; \ perApiCallbackData.prologCallbacks.push_back(prologCallback); \ \ L0::APITracerCallbackStateImp epilogCallback; \ epilogCallback.current_api_callback = epilogue_callback_ptr; \ epilogCallback.pUserData = currentTracerArray->tracerArrayEntries[i].pUserData; \ perApiCallbackData.epilogCallbacks.push_back(epilogCallback); \ } template ze_result_t APITracerWrapperImp(TFunction_pointer zeApiPtr, TParams paramsStruct, TTracer apiOrdinal, TTracerPrologCallbacks prologCallbacks, TTracerEpilogCallbacks epilogCallbacks, Args &&... args) { ze_result_t ret = ZE_RESULT_SUCCESS; std::vector> *callbacks_prologs = &prologCallbacks; std::vector ppTracerInstanceUserData; ppTracerInstanceUserData.resize(callbacks_prologs->size()); for (size_t i = 0; i < callbacks_prologs->size(); i++) { if (callbacks_prologs->at(i).current_api_callback != nullptr) callbacks_prologs->at(i).current_api_callback(paramsStruct, ret, callbacks_prologs->at(i).pUserData, &ppTracerInstanceUserData[i]); } ret = zeApiPtr(args...); std::vector> *callbacksEpilogs = &epilogCallbacks; for (size_t i = 0; i < callbacksEpilogs->size(); i++) { if (callbacksEpilogs->at(i).current_api_callback != nullptr) callbacksEpilogs->at(i).current_api_callback(paramsStruct, ret, callbacksEpilogs->at(i).pUserData, &ppTracerInstanceUserData[i]); } L0::tracingInProgress = 0; L0::PGLOBAL_APITracerContextImp->releaseActivetracersList(); return ret; } } // namespace L0 compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_memory_imp.cpp000066400000000000000000000330331363734646600302040ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/tracing/tracing_imp.h" __zedllexport ze_result_t __zecall zeDriverAllocSharedMem_Tracing(ze_driver_handle_t hDriver, const ze_device_mem_alloc_desc_t *deviceDesc, const ze_host_mem_alloc_desc_t *hostDesc, size_t size, size_t alignment, ze_device_handle_t hDevice, void **pptr) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Driver.pfnAllocSharedMem, hDriver, deviceDesc, hostDesc, size, alignment, hDevice, pptr); ze_driver_alloc_shared_mem_params_t tracerParams; tracerParams.phDriver = &hDriver; tracerParams.pdevice_desc = &deviceDesc; tracerParams.phost_desc = &hostDesc; tracerParams.psize = &size; tracerParams.palignment = &alignment; tracerParams.phDevice = &hDevice; tracerParams.ppptr = &pptr; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnDriverAllocSharedMemCb_t, Driver, pfnAllocSharedMemCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Driver.pfnAllocSharedMem, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDriver, *tracerParams.pdevice_desc, *tracerParams.phost_desc, *tracerParams.psize, *tracerParams.palignment, *tracerParams.phDevice, *tracerParams.ppptr); } __zedllexport ze_result_t __zecall zeDriverAllocDeviceMem_Tracing(ze_driver_handle_t hDriver, const ze_device_mem_alloc_desc_t *deviceDesc, size_t size, size_t alignment, ze_device_handle_t hDevice, void **pptr) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Driver.pfnAllocDeviceMem, hDriver, deviceDesc, size, alignment, hDevice, pptr); ze_driver_alloc_device_mem_params_t tracerParams; tracerParams.phDriver = &hDriver; tracerParams.pdevice_desc = &deviceDesc; tracerParams.psize = &size; tracerParams.palignment = &alignment; tracerParams.phDevice = &hDevice; tracerParams.ppptr = &pptr; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnDriverAllocDeviceMemCb_t, Driver, pfnAllocDeviceMemCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Driver.pfnAllocDeviceMem, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDriver, *tracerParams.pdevice_desc, *tracerParams.psize, *tracerParams.palignment, *tracerParams.phDevice, *tracerParams.ppptr); } __zedllexport ze_result_t __zecall zeDriverAllocHostMem_Tracing(ze_driver_handle_t hDriver, const ze_host_mem_alloc_desc_t *hostDesc, size_t size, size_t alignment, void **pptr) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Driver.pfnAllocHostMem, hDriver, hostDesc, size, alignment, pptr); ze_driver_alloc_host_mem_params_t tracerParams; tracerParams.phDriver = &hDriver; tracerParams.phost_desc = &hostDesc; tracerParams.psize = &size; tracerParams.palignment = &alignment; tracerParams.ppptr = &pptr; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnDriverAllocHostMemCb_t, Driver, pfnAllocHostMemCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Driver.pfnAllocHostMem, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDriver, *tracerParams.phost_desc, *tracerParams.psize, *tracerParams.palignment, *tracerParams.ppptr); } __zedllexport ze_result_t __zecall zeDriverFreeMem_Tracing(ze_driver_handle_t hDriver, void *ptr) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Driver.pfnFreeMem, hDriver, ptr); ze_driver_free_mem_params_t tracerParams; tracerParams.phDriver = &hDriver; tracerParams.pptr = &ptr; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnDriverFreeMemCb_t, Driver, pfnFreeMemCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Driver.pfnFreeMem, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDriver, *tracerParams.pptr); } __zedllexport ze_result_t __zecall zeDriverGetMemAllocProperties_Tracing(ze_driver_handle_t hDriver, const void *ptr, ze_memory_allocation_properties_t *pMemAllocProperties, ze_device_handle_t *phDevice) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Driver.pfnGetMemAllocProperties, hDriver, ptr, pMemAllocProperties, phDevice); ze_driver_get_mem_alloc_properties_params_t tracerParams; tracerParams.phDriver = &hDriver; tracerParams.pptr = &ptr; tracerParams.ppMemAllocProperties = &pMemAllocProperties; tracerParams.pphDevice = &phDevice; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnDriverGetMemAllocPropertiesCb_t, Driver, pfnGetMemAllocPropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Driver.pfnGetMemAllocProperties, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDriver, *tracerParams.pptr, *tracerParams.ppMemAllocProperties, *tracerParams.pphDevice); } __zedllexport ze_result_t __zecall zeDriverGetMemAddressRange_Tracing(ze_driver_handle_t hDriver, const void *ptr, void **pBase, size_t *pSize) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Driver.pfnGetMemAddressRange, hDriver, ptr, pBase, pSize); ze_driver_get_mem_address_range_params_t tracerParams; tracerParams.phDriver = &hDriver; tracerParams.pptr = &ptr; tracerParams.ppBase = &pBase; tracerParams.ppSize = &pSize; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnDriverGetMemAddressRangeCb_t, Driver, pfnGetMemAddressRangeCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Driver.pfnGetMemAddressRange, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDriver, *tracerParams.pptr, *tracerParams.ppBase, *tracerParams.ppSize); } __zedllexport ze_result_t __zecall zeDriverGetMemIpcHandle_Tracing(ze_driver_handle_t hDriver, const void *ptr, ze_ipc_mem_handle_t *pIpcHandle) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Driver.pfnGetMemIpcHandle, hDriver, ptr, pIpcHandle); ze_driver_get_mem_ipc_handle_params_t tracerParams; tracerParams.phDriver = &hDriver; tracerParams.pptr = &ptr; tracerParams.ppIpcHandle = &pIpcHandle; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnDriverGetMemIpcHandleCb_t, Driver, pfnGetMemIpcHandleCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Driver.pfnGetMemIpcHandle, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDriver, *tracerParams.pptr, *tracerParams.ppIpcHandle); } __zedllexport ze_result_t __zecall zeDriverOpenMemIpcHandle_Tracing(ze_driver_handle_t hDriver, ze_device_handle_t hDevice, ze_ipc_mem_handle_t handle, ze_ipc_memory_flag_t flags, void **pptr) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Driver.pfnOpenMemIpcHandle, hDriver, hDevice, handle, flags, pptr); ze_driver_open_mem_ipc_handle_params_t tracerParams; tracerParams.phDriver = &hDriver; tracerParams.phDevice = &hDevice; tracerParams.phandle = &handle; tracerParams.pflags = &flags; tracerParams.ppptr = &pptr; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnDriverOpenMemIpcHandleCb_t, Driver, pfnOpenMemIpcHandleCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Driver.pfnOpenMemIpcHandle, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDriver, *tracerParams.phDevice, *tracerParams.phandle, *tracerParams.pflags, *tracerParams.ppptr); } __zedllexport ze_result_t __zecall zeDriverCloseMemIpcHandle_Tracing(ze_driver_handle_t hDriver, const void *ptr) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Driver.pfnCloseMemIpcHandle, hDriver, ptr); ze_driver_close_mem_ipc_handle_params_t tracerParams; tracerParams.phDriver = &hDriver; tracerParams.pptr = &ptr; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnDriverCloseMemIpcHandleCb_t, Driver, pfnCloseMemIpcHandleCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Driver.pfnCloseMemIpcHandle, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDriver, *tracerParams.pptr); } compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_memory_imp.h000066400000000000000000000050621363734646600276520ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { __zedllexport ze_result_t __zecall zeDriverAllocSharedMem_Tracing(ze_driver_handle_t hDriver, const ze_device_mem_alloc_desc_t *deviceDesc, const ze_host_mem_alloc_desc_t *hostDesc, size_t size, size_t alignment, ze_device_handle_t hDevice, void **pptr); __zedllexport ze_result_t __zecall zeDriverAllocDeviceMem_Tracing(ze_driver_handle_t hDriver, const ze_device_mem_alloc_desc_t *deviceDesc, size_t size, size_t alignment, ze_device_handle_t hDevice, void **pptr); __zedllexport ze_result_t __zecall zeDriverAllocHostMem_Tracing(ze_driver_handle_t hDriver, const ze_host_mem_alloc_desc_t *hostDesc, size_t size, size_t alignment, void **pptr); __zedllexport ze_result_t __zecall zeDriverFreeMem_Tracing(ze_driver_handle_t hDriver, void *ptr); __zedllexport ze_result_t __zecall zeDriverGetMemAllocProperties_Tracing(ze_driver_handle_t hDriver, const void *ptr, ze_memory_allocation_properties_t *pMemAllocProperties, ze_device_handle_t *phDevice); __zedllexport ze_result_t __zecall zeDriverGetMemAddressRange_Tracing(ze_driver_handle_t hDriver, const void *ptr, void **pBase, size_t *pSize); __zedllexport ze_result_t __zecall zeDriverGetMemIpcHandle_Tracing(ze_driver_handle_t hDriver, const void *ptr, ze_ipc_mem_handle_t *pIpcHandle); __zedllexport ze_result_t __zecall zeDriverOpenMemIpcHandle_Tracing(ze_driver_handle_t hDriver, ze_device_handle_t hDevice, ze_ipc_mem_handle_t handle, ze_ipc_memory_flag_t flags, void **pptr); __zedllexport ze_result_t __zecall zeDriverCloseMemIpcHandle_Tracing(ze_driver_handle_t hDriver, const void *ptr); } compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_module_imp.cpp000066400000000000000000000775321363734646600301750ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/tracing/tracing_imp.h" __zedllexport ze_result_t __zecall zeModuleCreate_Tracing(ze_device_handle_t hDevice, const ze_module_desc_t *desc, ze_module_handle_t *phModule, ze_module_build_log_handle_t *phBuildLog) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Module.pfnCreate, hDevice, desc, phModule, phBuildLog); ze_module_create_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.pdesc = &desc; tracerParams.pphModule = &phModule; tracerParams.pphBuildLog = &phBuildLog; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnModuleCreateCb_t, Module, pfnCreateCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Module.pfnCreate, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.pdesc, *tracerParams.pphModule, *tracerParams.pphBuildLog); } __zedllexport ze_result_t __zecall zeModuleDestroy_Tracing(ze_module_handle_t hModule) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Module.pfnDestroy, hModule); ze_module_destroy_params_t tracerParams; tracerParams.phModule = &hModule; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnModuleDestroyCb_t, Module, pfnDestroyCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Module.pfnDestroy, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phModule); } __zedllexport ze_result_t __zecall zeModuleBuildLogDestroy_Tracing(ze_module_build_log_handle_t hModuleBuildLog) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.ModuleBuildLog.pfnDestroy, hModuleBuildLog); ze_module_build_log_destroy_params_t tracerParams; tracerParams.phModuleBuildLog = &hModuleBuildLog; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnModuleBuildLogDestroyCb_t, ModuleBuildLog, pfnDestroyCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.ModuleBuildLog.pfnDestroy, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phModuleBuildLog); } __zedllexport ze_result_t __zecall zeModuleBuildLogGetString_Tracing(ze_module_build_log_handle_t hModuleBuildLog, size_t *pSize, char *pBuildLog) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.ModuleBuildLog.pfnGetString, hModuleBuildLog, pSize, pBuildLog); ze_module_build_log_get_string_params_t tracerParams; tracerParams.phModuleBuildLog = &hModuleBuildLog; tracerParams.ppSize = &pSize; tracerParams.ppBuildLog = &pBuildLog; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnModuleBuildLogGetStringCb_t, ModuleBuildLog, pfnGetStringCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.ModuleBuildLog.pfnGetString, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phModuleBuildLog, *tracerParams.ppSize, *tracerParams.ppBuildLog); } __zedllexport ze_result_t __zecall zeModuleGetNativeBinary_Tracing(ze_module_handle_t hModule, size_t *pSize, uint8_t *pModuleNativeBinary) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Module.pfnGetNativeBinary, hModule, pSize, pModuleNativeBinary); ze_module_get_native_binary_params_t tracerParams; tracerParams.phModule = &hModule; tracerParams.ppSize = &pSize; tracerParams.ppModuleNativeBinary = &pModuleNativeBinary; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnModuleGetNativeBinaryCb_t, Module, pfnGetNativeBinaryCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Module.pfnGetNativeBinary, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phModule, *tracerParams.ppSize, *tracerParams.ppModuleNativeBinary); } __zedllexport ze_result_t __zecall zeModuleGetGlobalPointer_Tracing(ze_module_handle_t hModule, const char *pGlobalName, void **pptr) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Module.pfnGetGlobalPointer, hModule, pGlobalName, pptr); ze_module_get_global_pointer_params_t tracerParams; tracerParams.phModule = &hModule; tracerParams.ppGlobalName = &pGlobalName; tracerParams.ppptr = &pptr; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnModuleGetGlobalPointerCb_t, Module, pfnGetGlobalPointerCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Module.pfnGetGlobalPointer, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phModule, *tracerParams.ppGlobalName, *tracerParams.ppptr); } __zedllexport ze_result_t __zecall zeKernelCreate_Tracing(ze_module_handle_t hModule, const ze_kernel_desc_t *desc, ze_kernel_handle_t *phKernel) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Kernel.pfnCreate, hModule, desc, phKernel); ze_kernel_create_params_t tracerParams; tracerParams.phModule = &hModule; tracerParams.pdesc = &desc; tracerParams.pphKernel = &phKernel; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnKernelCreateCb_t, Kernel, pfnCreateCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Kernel.pfnCreate, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phModule, *tracerParams.pdesc, *tracerParams.pphKernel); } __zedllexport ze_result_t __zecall zeKernelDestroy_Tracing(ze_kernel_handle_t hKernel) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Kernel.pfnDestroy, hKernel); ze_kernel_destroy_params_t tracerParams; tracerParams.phKernel = &hKernel; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnKernelDestroyCb_t, Kernel, pfnDestroyCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Kernel.pfnDestroy, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phKernel); } __zedllexport ze_result_t __zecall zeModuleGetFunctionPointer_Tracing(ze_module_handle_t hModule, const char *pKernelName, void **pfnFunction) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Module.pfnGetFunctionPointer, hModule, pKernelName, pfnFunction); ze_module_get_function_pointer_params_t tracerParams; tracerParams.phModule = &hModule; tracerParams.ppFunctionName = &pKernelName; tracerParams.ppfnFunction = &pfnFunction; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnModuleGetFunctionPointerCb_t, Module, pfnGetFunctionPointerCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Module.pfnGetFunctionPointer, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phModule, *tracerParams.ppFunctionName, *tracerParams.ppfnFunction); } __zedllexport ze_result_t __zecall zeKernelSetGroupSize_Tracing(ze_kernel_handle_t hKernel, uint32_t groupSizeX, uint32_t groupSizeY, uint32_t groupSizeZ) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Kernel.pfnSetGroupSize, hKernel, groupSizeX, groupSizeY, groupSizeZ); ze_kernel_set_group_size_params_t tracerParams; tracerParams.phKernel = &hKernel; tracerParams.pgroupSizeX = &groupSizeX; tracerParams.pgroupSizeY = &groupSizeY; tracerParams.pgroupSizeZ = &groupSizeZ; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnKernelSetGroupSizeCb_t, Kernel, pfnSetGroupSizeCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Kernel.pfnSetGroupSize, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phKernel, *tracerParams.pgroupSizeX, *tracerParams.pgroupSizeY, *tracerParams.pgroupSizeZ); } __zedllexport ze_result_t __zecall zeKernelSuggestGroupSize_Tracing(ze_kernel_handle_t hKernel, uint32_t globalSizeX, uint32_t globalSizeY, uint32_t globalSizeZ, uint32_t *groupSizeX, uint32_t *groupSizeY, uint32_t *groupSizeZ) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Kernel.pfnSuggestGroupSize, hKernel, globalSizeX, globalSizeY, globalSizeZ, groupSizeX, groupSizeY, groupSizeZ); ze_kernel_suggest_group_size_params_t tracerParams; tracerParams.phKernel = &hKernel; tracerParams.pglobalSizeX = &globalSizeX; tracerParams.pglobalSizeY = &globalSizeY; tracerParams.pglobalSizeZ = &globalSizeZ; tracerParams.pgroupSizeX = &groupSizeX; tracerParams.pgroupSizeY = &groupSizeY; tracerParams.pgroupSizeZ = &groupSizeZ; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnKernelSuggestGroupSizeCb_t, Kernel, pfnSuggestGroupSizeCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Kernel.pfnSuggestGroupSize, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phKernel, *tracerParams.pglobalSizeX, *tracerParams.pglobalSizeY, *tracerParams.pglobalSizeZ, *tracerParams.pgroupSizeX, *tracerParams.pgroupSizeY, *tracerParams.pgroupSizeZ); } __zedllexport ze_result_t __zecall zeKernelSetArgumentValue_Tracing(ze_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, const void *pArgValue) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Kernel.pfnSetArgumentValue, hKernel, argIndex, argSize, pArgValue); ze_kernel_set_argument_value_params_t tracerParams; tracerParams.phKernel = &hKernel; tracerParams.pargIndex = &argIndex; tracerParams.pargSize = &argSize; tracerParams.ppArgValue = &pArgValue; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnKernelSetArgumentValueCb_t, Kernel, pfnSetArgumentValueCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Kernel.pfnSetArgumentValue, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phKernel, *tracerParams.pargIndex, *tracerParams.pargSize, *tracerParams.ppArgValue); } __zedllexport ze_result_t __zecall zeKernelSetAttribute_Tracing(ze_kernel_handle_t hKernel, ze_kernel_attribute_t attr, uint32_t size, const void *pValue) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Kernel.pfnSetAttribute, hKernel, attr, size, pValue); ze_kernel_set_attribute_params_t tracerParams; tracerParams.phKernel = &hKernel; tracerParams.pattr = &attr; tracerParams.psize = &size; tracerParams.ppValue = &pValue; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnKernelSetAttributeCb_t, Kernel, pfnSetAttributeCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Kernel.pfnSetAttribute, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phKernel, *tracerParams.pattr, *tracerParams.psize, *tracerParams.ppValue); } __zedllexport ze_result_t __zecall zeKernelGetProperties_Tracing(ze_kernel_handle_t hKernel, ze_kernel_properties_t *pKernelProperties) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Kernel.pfnGetProperties, hKernel, pKernelProperties); ze_kernel_get_properties_params_t tracerParams; tracerParams.phKernel = &hKernel; tracerParams.ppKernelProperties = &pKernelProperties; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnKernelGetPropertiesCb_t, Kernel, pfnGetPropertiesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Kernel.pfnGetProperties, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phKernel, *tracerParams.ppKernelProperties); } __zedllexport ze_result_t __zecall zeCommandListAppendLaunchKernel_Tracing(ze_command_list_handle_t hCommandList, ze_kernel_handle_t hKernel, const ze_group_count_t *pLaunchFuncArgs, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchKernel, hCommandList, hKernel, pLaunchFuncArgs, hSignalEvent, numWaitEvents, phWaitEvents); ze_command_list_append_launch_kernel_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.phKernel = &hKernel; tracerParams.ppLaunchFuncArgs = &pLaunchFuncArgs; tracerParams.phSignalEvent = &hSignalEvent; tracerParams.pnumWaitEvents = &numWaitEvents; tracerParams.pphWaitEvents = &phWaitEvents; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListAppendLaunchKernelCb_t, CommandList, pfnAppendLaunchKernelCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchKernel, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.phKernel, *tracerParams.ppLaunchFuncArgs, *tracerParams.phSignalEvent, *tracerParams.pnumWaitEvents, *tracerParams.pphWaitEvents); } __zedllexport ze_result_t __zecall zeCommandListAppendLaunchKernelIndirect_Tracing(ze_command_list_handle_t hCommandList, ze_kernel_handle_t hKernel, const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchKernelIndirect, hCommandList, hKernel, pLaunchArgumentsBuffer, hSignalEvent, numWaitEvents, phWaitEvents); ze_command_list_append_launch_kernel_indirect_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.phKernel = &hKernel; tracerParams.ppLaunchArgumentsBuffer = &pLaunchArgumentsBuffer; tracerParams.phSignalEvent = &hSignalEvent; tracerParams.pnumWaitEvents = &numWaitEvents; tracerParams.pphWaitEvents = &phWaitEvents; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListAppendLaunchKernelIndirectCb_t, CommandList, pfnAppendLaunchKernelIndirectCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchKernelIndirect, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.phKernel, *tracerParams.ppLaunchArgumentsBuffer, *tracerParams.phSignalEvent, *tracerParams.pnumWaitEvents, *tracerParams.pphWaitEvents); } __zedllexport ze_result_t __zecall zeCommandListAppendLaunchMultipleKernelsIndirect_Tracing(ze_command_list_handle_t hCommandList, uint32_t numKernels, ze_kernel_handle_t *phKernels, const uint32_t *pCountBuffer, const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchMultipleKernelsIndirect, hCommandList, numKernels, phKernels, pCountBuffer, pLaunchArgumentsBuffer, hSignalEvent, numWaitEvents, phWaitEvents); ze_command_list_append_launch_multiple_kernels_indirect_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.pnumKernels = &numKernels; tracerParams.pphKernels = &phKernels; tracerParams.ppCountBuffer = &pCountBuffer; tracerParams.ppLaunchArgumentsBuffer = &pLaunchArgumentsBuffer; tracerParams.phSignalEvent = &hSignalEvent; tracerParams.pnumWaitEvents = &numWaitEvents; tracerParams.pphWaitEvents = &phWaitEvents; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListAppendLaunchMultipleKernelsIndirectCb_t, CommandList, pfnAppendLaunchMultipleKernelsIndirectCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchMultipleKernelsIndirect, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.pnumKernels, *tracerParams.pphKernels, *tracerParams.ppCountBuffer, *tracerParams.ppLaunchArgumentsBuffer, *tracerParams.phSignalEvent, *tracerParams.pnumWaitEvents, *tracerParams.pphWaitEvents); } __zedllexport ze_result_t __zecall zeCommandListAppendLaunchCooperativeKernel_Tracing(ze_command_list_handle_t hCommandList, ze_kernel_handle_t hKernel, const ze_group_count_t *pLaunchFuncArgs, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchCooperativeKernel, hCommandList, hKernel, pLaunchFuncArgs, hSignalEvent, numWaitEvents, phWaitEvents); ze_command_list_append_launch_cooperative_kernel_params_t tracerParams; tracerParams.phCommandList = &hCommandList; tracerParams.phKernel = &hKernel; tracerParams.ppLaunchFuncArgs = &pLaunchFuncArgs; tracerParams.phSignalEvent = &hSignalEvent; tracerParams.pnumWaitEvents = &numWaitEvents; tracerParams.pphWaitEvents = &phWaitEvents; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnCommandListAppendLaunchCooperativeKernelCb_t, CommandList, pfnAppendLaunchCooperativeKernelCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchCooperativeKernel, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phCommandList, *tracerParams.phKernel, *tracerParams.ppLaunchFuncArgs, *tracerParams.phSignalEvent, *tracerParams.pnumWaitEvents, *tracerParams.pphWaitEvents); } __zedllexport ze_result_t __zecall zeModuleGetKernelNames_Tracing(ze_module_handle_t hModule, uint32_t *pCount, const char **pNames) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Module.pfnGetKernelNames, hModule, pCount, pNames); ze_module_get_kernel_names_params_t tracerParams; tracerParams.phModule = &hModule; tracerParams.ppCount = &pCount; tracerParams.ppNames = &pNames; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnModuleGetKernelNamesCb_t, Module, pfnGetKernelNamesCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Module.pfnGetKernelNames, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phModule, *tracerParams.ppCount, *tracerParams.ppNames); } __zedllexport ze_result_t __zecall zeKernelSuggestMaxCooperativeGroupCount_Tracing(ze_kernel_handle_t hKernel, uint32_t *totalGroupCount) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Kernel.pfnSuggestMaxCooperativeGroupCount, hKernel, totalGroupCount); ze_kernel_suggest_max_cooperative_group_count_params_t tracerParams; tracerParams.phKernel = &hKernel; tracerParams.ptotalGroupCount = &totalGroupCount; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnKernelSuggestMaxCooperativeGroupCountCb_t, Kernel, pfnSuggestMaxCooperativeGroupCountCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Kernel.pfnSuggestMaxCooperativeGroupCount, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phKernel, *tracerParams.ptotalGroupCount); } __zedllexport ze_result_t __zecall zeKernelGetAttribute_Tracing(ze_kernel_handle_t hKernel, ze_kernel_attribute_t attr, uint32_t *pSize, void *pValue) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Kernel.pfnGetAttribute, hKernel, attr, pSize, pValue); ze_kernel_get_attribute_params_t tracerParams; tracerParams.phKernel = &hKernel; tracerParams.pattr = &attr; tracerParams.ppSize = &pSize; tracerParams.ppValue = &pValue; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnKernelGetAttributeCb_t, Kernel, pfnGetAttributeCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Kernel.pfnGetAttribute, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phKernel, *tracerParams.pattr, *tracerParams.ppSize, *tracerParams.ppValue); } compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_module_imp.h000066400000000000000000000135361363734646600276340ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { __zedllexport ze_result_t __zecall zeModuleCreate_Tracing(ze_device_handle_t hDevice, const ze_module_desc_t *desc, ze_module_handle_t *phModule, ze_module_build_log_handle_t *phBuildLog); __zedllexport ze_result_t __zecall zeModuleDestroy_Tracing(ze_module_handle_t hModule); __zedllexport ze_result_t __zecall zeModuleBuildLogDestroy_Tracing(ze_module_build_log_handle_t hModuleBuildLog); __zedllexport ze_result_t __zecall zeModuleBuildLogGetString_Tracing(ze_module_build_log_handle_t hModuleBuildLog, size_t *pSize, char *pBuildLog); __zedllexport ze_result_t __zecall zeModuleGetNativeBinary_Tracing(ze_module_handle_t hModule, size_t *pSize, uint8_t *pModuleNativeBinary); __zedllexport ze_result_t __zecall zeModuleGetGlobalPointer_Tracing(ze_module_handle_t hModule, const char *pGlobalName, void **pptr); __zedllexport ze_result_t __zecall zeKernelCreate_Tracing(ze_module_handle_t hModule, const ze_kernel_desc_t *desc, ze_kernel_handle_t *phFunction); __zedllexport ze_result_t __zecall zeKernelDestroy_Tracing(ze_kernel_handle_t hFunction); __zedllexport ze_result_t __zecall zeModuleGetFunctionPointer_Tracing(ze_module_handle_t hModule, const char *pKernelName, void **pfnFunction); __zedllexport ze_result_t __zecall zeKernelSetGroupSize_Tracing(ze_kernel_handle_t hFunction, uint32_t groupSizeX, uint32_t groupSizeY, uint32_t groupSizeZ); __zedllexport ze_result_t __zecall zeKernelSuggestGroupSize_Tracing(ze_kernel_handle_t hFunction, uint32_t globalSizeX, uint32_t globalSizeY, uint32_t globalSizeZ, uint32_t *groupSizeX, uint32_t *groupSizeY, uint32_t *groupSizeZ); __zedllexport ze_result_t __zecall zeKernelSetArgumentValue_Tracing(ze_kernel_handle_t hFunction, uint32_t argIndex, size_t argSize, const void *pArgValue); __zedllexport ze_result_t __zecall zeKernelSetAttribute_Tracing(ze_kernel_handle_t hKernel, ze_kernel_attribute_t attr, uint32_t size, const void *pValue); __zedllexport ze_result_t __zecall zeKernelGetProperties_Tracing(ze_kernel_handle_t hKernel, ze_kernel_properties_t *pKernelProperties); __zedllexport ze_result_t __zecall zeCommandListAppendLaunchKernel_Tracing(ze_command_list_handle_t hCommandList, ze_kernel_handle_t hFunction, const ze_group_count_t *pLaunchFuncArgs, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); __zedllexport ze_result_t __zecall zeCommandListAppendLaunchKernelIndirect_Tracing(ze_command_list_handle_t hCommandList, ze_kernel_handle_t hFunction, const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); __zedllexport ze_result_t __zecall zeCommandListAppendLaunchMultipleKernelsIndirect_Tracing(ze_command_list_handle_t hCommandList, uint32_t numFunctions, ze_kernel_handle_t *phFunctions, const uint32_t *pCountBuffer, const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); __zedllexport ze_result_t __zecall zeCommandListAppendLaunchCooperativeKernel_Tracing(ze_command_list_handle_t hCommandList, ze_kernel_handle_t hKernel, const ze_group_count_t *pLaunchFuncArgs, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); __zedllexport ze_result_t __zecall zeModuleGetKernelNames_Tracing(ze_module_handle_t hModule, uint32_t *pCount, const char **pNames); __zedllexport ze_result_t __zecall zeKernelSuggestMaxCooperativeGroupCount_Tracing(ze_kernel_handle_t hKernel, uint32_t *totalGroupCount); __zedllexport ze_result_t __zecall zeKernelGetAttribute_Tracing(ze_kernel_handle_t hKernel, ze_kernel_attribute_t attr, uint32_t *pSize, void *pValue); } compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_residency_imp.cpp000066400000000000000000000113051363734646600306570ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/tracing/tracing_imp.h" __zedllexport ze_result_t __zecall zeDeviceMakeMemoryResident_Tracing(ze_device_handle_t hDevice, void *ptr, size_t size) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnMakeMemoryResident, hDevice, ptr, size); ze_device_make_memory_resident_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.pptr = &ptr; tracerParams.psize = &size; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnDeviceMakeMemoryResidentCb_t, Device, pfnMakeMemoryResidentCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnMakeMemoryResident, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.pptr, *tracerParams.psize); } __zedllexport ze_result_t __zecall zeDeviceEvictMemory_Tracing(ze_device_handle_t hDevice, void *ptr, size_t size) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnEvictMemory, hDevice, ptr, size); ze_device_evict_memory_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.pptr = &ptr; tracerParams.psize = &size; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnDeviceEvictMemoryCb_t, Device, pfnEvictMemoryCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnEvictMemory, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.pptr, *tracerParams.psize); } __zedllexport ze_result_t __zecall zeDeviceMakeImageResident_Tracing(ze_device_handle_t hDevice, ze_image_handle_t hImage) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnMakeImageResident, hDevice, hImage); ze_device_make_image_resident_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.phImage = &hImage; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnDeviceMakeImageResidentCb_t, Device, pfnMakeImageResidentCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnMakeImageResident, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.phImage); } __zedllexport ze_result_t __zecall zeDeviceEvictImage_Tracing(ze_device_handle_t hDevice, ze_image_handle_t hImage) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Device.pfnEvictImage, hDevice, hImage); ze_device_evict_image_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.phImage = &hImage; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnDeviceEvictImageCb_t, Device, pfnEvictImageCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Device.pfnEvictImage, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.phImage); } compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_residency_imp.h000066400000000000000000000014411363734646600303240ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { __zedllexport ze_result_t __zecall zeDeviceMakeMemoryResident_Tracing(ze_device_handle_t hDevice, void *ptr, size_t size); __zedllexport ze_result_t __zecall zeDeviceEvictMemory_Tracing(ze_device_handle_t hDevice, void *ptr, size_t size); __zedllexport ze_result_t __zecall zeDeviceMakeImageResident_Tracing(ze_device_handle_t hDevice, ze_image_handle_t hImage); __zedllexport ze_result_t __zecall zeDeviceEvictImage_Tracing(ze_device_handle_t hDevice, ze_image_handle_t hImage); } // extern "C" compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_sampler_imp.cpp000066400000000000000000000043511363734646600303400ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "level_zero/tools/source/tracing/tracing_imp.h" __zedllexport ze_result_t __zecall zeSamplerCreate_Tracing(ze_device_handle_t hDevice, const ze_sampler_desc_t *pDesc, ze_sampler_handle_t *phSampler) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Sampler.pfnCreate, hDevice, pDesc, phSampler); ze_sampler_create_params_t tracerParams; tracerParams.phDevice = &hDevice; tracerParams.pdesc = &pDesc; tracerParams.pphSampler = &phSampler; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnSamplerCreateCb_t, Sampler, pfnCreateCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Sampler.pfnCreate, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phDevice, *tracerParams.pdesc, *tracerParams.pphSampler); } __zedllexport ze_result_t __zecall zeSamplerDestroy_Tracing(ze_sampler_handle_t hSampler) { ZE_HANDLE_TRACER_RECURSION(driver_ddiTable.core_ddiTable.Sampler.pfnDestroy, hSampler); ze_sampler_destroy_params_t tracerParams; tracerParams.phSampler = &hSampler; L0::APITracerCallbackDataImp apiCallbackData; ZE_GEN_PER_API_CALLBACK_STATE(apiCallbackData, ze_pfnSamplerDestroyCb_t, Sampler, pfnDestroyCb); return L0::APITracerWrapperImp(driver_ddiTable.core_ddiTable.Sampler.pfnDestroy, &tracerParams, apiCallbackData.apiOrdinal, apiCallbackData.prologCallbacks, apiCallbackData.epilogCallbacks, *tracerParams.phSampler); } compute-runtime-20.13.16352/level_zero/tools/source/tracing/tracing_sampler_imp.h000066400000000000000000000006271363734646600300070ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern "C" { __zedllexport ze_result_t __zecall zeSamplerCreate_Tracing(ze_device_handle_t hDevice, const ze_sampler_desc_t *desc, ze_sampler_handle_t *phSampler); __zedllexport ze_result_t __zecall zeSamplerDestroy_Tracing(ze_sampler_handle_t hSampler); } compute-runtime-20.13.16352/lib_names.h.in000066400000000000000000000003241363734646600200630ustar00rootroot00000000000000/* * Copyright (C) 2018 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #cmakedefine SLD_LIBRARY_NAME "${CMAKE_SHARED_LIBRARY_PREFIX}${SLD_LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}" compute-runtime-20.13.16352/manifests/000077500000000000000000000000001363734646600173465ustar00rootroot00000000000000compute-runtime-20.13.16352/manifests/manifest.yml000066400000000000000000000021371363734646600217020ustar00rootroot00000000000000components: gmmlib: dest_dir: gmmlib type: git branch: gmmlib revision: intel-gmmlib-20.1.1 repository: https://github.com/intel/gmmlib.git igc: dest_dir: igc type: git branch: igc revision: f1e279a30faaa77be51085d69360da8182ca57e3 repository: https://github.com/intel/intelgraphicscompiler infra: branch: infra dest_dir: infra revision: 64e23e03de6adb8a8c3a718d67d35ef8a5318806 type: git internal: branch: master dest_dir: internal revision: 9569d817efd31ae89115e6f8d57cf23231e439a6 type: git kmdaf: branch: kmdaf dest_dir: kmdaf revision: 57e0a77511edfc4ca0e42b939075d1ab23c48a09 type: git level_zero: branch: master dest_dir: level_zero repository: https://github.com/oneapi-src/level-zero revision: v0.91 type: git libva: dest_dir: libva type: git branch: libva revision: c9bb65b repository: https://github.com/intel/libva.git wdk: dest_dir: wdk type: git branch: wdk revision: a8173734bcadfb652dee6fef242e7634de9220d8-1390 converter: M-1396 version: '1' compute-runtime-20.13.16352/opencl/000077500000000000000000000000001363734646600166355ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/doc/000077500000000000000000000000001363734646600174025ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/doc/FAQ.md000066400000000000000000000147551363734646600203470ustar00rootroot00000000000000 # Frequently asked questions (OpenCL) For general questions, see the [main FAQ](https://github.com/intel/compute-runtime/blob/master/FAQ.md). ## OpenCL version ### Which version of OpenCL is supported? See [README.md](https://github.com/intel/compute-runtime/blob/master/README.md). ### Which platforms will receive OpenCL 2.2 support? Any platforms supporting OpenCL 2.1 are eligible for move to OpenCL 2.2. ### How can I enable OpenCL 2.1 on the Apollo Lake / Broxton platform? You can enable a higher version of OpenCL using the ForceOCLVersion debug flag. ## Known Issues and Limitations OpenCL compliance of a driver built from open-source components should not be assumed by default. Intel will clearly designate / tag specific builds to indicate production quality including formal compliance. Other builds should be considered experimental. ### What is the functional delta to the "Beignet" driver? Intel's former open-source [Beignet driver](https://01.org/beignet) provided sharing capabilities with MESA OpenGL driver. Our intention is to provide these capabilities in NEO in the future. NEO supports platforms starting with Gen8 graphics (formerly Broadwell). For earlier platforms, please use Beignet driver. ## Feature: cl_intel_va_api_media_sharing extension ### Where can I learn more about this extension? See the enabling [guide](cl_intel_va_api_media_sharing.md). ## Feature: cl_cache ### What is cl_cache? This is a mechanism to cache binary representations of OpenCL kernels provided in text form by the application. By storing the binary representations, compiling is required only the first time, which improves performance. ### How can cl_cache be enabled? In the working directory, manually create *cl_cache* directory. The driver will use this directory to store the binary representations of the compiled kernels. Note: This will work on all supported OSes. ### Configuring cl_cache location Cached kernels can be stored in a different directory than the default one. This is useful when the application is installed into a directory for which the user doesn't have permissions. #### Linux configuration Set the environment variable named `cl_cache_dir` to new location of cl_cache directory. #### Example: If the application's directory is `/home/user/Document`, by default cl_cache will be stored in `/home/user/Document/cl_cache`. If the new path should be `/home/user/Desktop/cl_cache_place`, set environment variable `cl_cache_dir` to `/home/user/Desktop/cl_cache_place`. ```bash export cl_cache_dir=/home/user/Desktop/cl_cache_place ``` Subsequent application runs with passed source code and `cl_cache_dir` environment variable set will reuse previously cached kernel binaries instead of compiling kernels from source. #### Windows configuration To set the new location of cl_cache directory - in the registry `HKEY_LOCAL_MACHINE\SOFTWARE\Intel\IGFX\OCL`: 1. add key `cl_cache_dir` 1. add string value named to `cl_cache_dir` key 1. set data of added value to desired location of cl_cache #### Example: If application is located in `C:\Program Files\application\app.exe`, by default cl_cache will be stored in `C:\Program Files\application\cl_cache`. If the new path should be `C:\Users\USER\Documents\application\cl_cache`, to subkey `HKEY_LOCAL_MACHINE\SOFTWARE\Intel\IGFX\OCL\cl_cache_dir` add string value named `C:\Program Files\application\app.exe` with data `C:\Users\USER\Documents\application\cl_cache`. e.g. string value : `HKEY_LOCAL_MACHINE\SOFTWARE\Intel\IGFX\OCL\cl_cache_dir\C:\Program Files\application\app.exe` data : `C:\Users\USER\Documents\application\cl_cache` Neo will look for string value (REG_SZ) `C:\Program Files\application\app.exe` in key `HKEY_LOCAL_MACHINE\SOFTWARE\Intel\IGFX\OCL\cl_cache_dir`. Data of this string value will be used as new cl_cache dump directory for this specific application. ### What are the known limitations of cl_cache? 1. Not thread safe. (Workaround: Make sure your clBuildProgram calls are executed in thread safe fashion.) 1. Binary representation may not be compatible between various versions of NEO and IGC drivers. (Workaround: Manually empty *cl_cache* directory prior to update) 1. Cache is not automatically cleaned. (Workaround: Manually empty *cl_cache* directory) 1. Cache may exhaust disk space and cause further failures. (Workaround: Monitor and manually empty *cl_cache* directory) 1. Cache is not process safe. ## Feature: Out of order queues ### Implementation details of out of order queues implementation Current implementation of out of order queues allows multiple kernels to be run concurently. This allows for better device utilization in scenarios where single kernel doesn't fill whole device. More details can be found here: * [Sample applications](https://github.com/intel/compute-samples/tree/master/compute_samples/applications/commands_aggregation) * [IWOCL(*) presentation](https://www.iwocl.org/wp-content/uploads/iwocl-2019-michal-mrozek-intel-breaking-the-last-line-of-performance-border.pdf) ### Known issues and limitations 1. Turning on profiling on out of order command queue serializes kernel execution. 1. Blocking command queue with user events blocks all further submissions until event is unblocked. 1. Commands blocked by user events, when unblocked are serialized as well. ## Feature: Double-precision emulation (FP64) By default NEO driver enables double precision operations only on platforms with supporting hardware. This is signified by exposing the "cl_khr_fp64" extension in the extension string. For other platforms, this support can be emulated by the compiler (IGC). ### How do I enable emulation? FP64 emulation can only be enabled on Linux. There are two settings that have to be set. #### Runtime setting: There are two ways you can enable this feature in NEO: * Set an environment variable **OverrideDefaultFP64Settings** to **1**: `OverrideDefaultFP64Settings=1` * In **igdrcl.config** configuration file in the same directory as application binary (you may have to create this file) add a line as such: `OverrideDefaultFP64Settings = 1` #### Compiler setting: IGC reads flags only from environment, so set **IGC_EnableDPEmulation** to **1** as such: `IGC_EnableDPEmulation=1` After both settings have been set you can run the application normally. ### Known issues and limitations Intel does not claim full specification conformance when using emulated mode. We reserve the right to not fix issues that appear only in emulation mode. Performance degradation is to be expected and has not been measured by Intel.compute-runtime-20.13.16352/opencl/doc/VTUNE.md000066400000000000000000000032521363734646600206270ustar00rootroot00000000000000# Using NEO runtime with VTune Amplifier You can use the Intel VTune Amplifier to identify GPU "hotspots". It will show GPGPU queue, GPU usage, memory throughputs, etc. Using this tool, you can compare how the application behaves under different configurations (LWS, GWS, driver versions, etc.) and identify bottlenecks. ## Requirements * [Intel(R) VTune(tm) Amplifier](https://software.intel.com/en-us/intel-vtune-amplifier-xe) * [Intel(R) SDK for OpenCL(tm) Applications](https://software.intel.com/en-us/intel-opencl/download) * [Intel(R) Metrics Discovery Application Programming Interface](https://github.com/intel/metrics-discovery) * Current Intel(R) OpenCL(tm) GPU driver ## Installation Note: This is an example. Actual filenames may differ 1. Install OpenCL SDK & VTune ``` cd tar xvf intel_sdk_for_opencl_2017_7.0.0.2568_x64.gz tar xvf vtune_amplifier_2018_update2.tar.gz sudo dpkg -i intel-opencl_18.26.10987_amd64.deb cd ~/intel_sdk_for_opencl_2017_7.0.0.2568_x64/; sudo ./install_GUI.sh cd ~/vtune_amplifier_2018_update2/; sudo ./install_GUI.sh #use offline activation with file ``` To verify that VTune was installed properly run: ``` lsmod | grep sep4 ``` This should return 2 lines. Otherwise follow sepdk installation in VTune documentation. 2. Compile and install MD API - see MD API [README](https://github.com/intel/metrics-discovery/blob/master/README.md) for instructions. ## Running VTune ``` /opt/intel/vtune_amplifier_2018/bin64/amplxe-gui ``` Note: If you built Metrics Discovery with libstdc++ > 3.4.20, please use the following workaround: ``` sudo sh -c 'LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libstdc++.so.6 /opt/intel/vtune_amplifier_2018/bin64/amplxe-gui' ```compute-runtime-20.13.16352/opencl/doc/cl_intel_va_api_media_sharing.md000066400000000000000000000023431363734646600257100ustar00rootroot00000000000000# Intel(R) Graphics Compute Runtime for OpenCL(TM) ## Enabling [cl_intel_va_api_media_sharing](https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_va_api_media_sharing.txt) extension To enable cl_intel_va_api_media_sharing extension Neo needs to be compiled on system with libva 2.x installed. This extension is supported by [iHD media driver](https://github.com/intel/media-driver). Before compilation additional packages have to be installed. 1. Download sources: * libdrm https://anongit.freedesktop.org/git/mesa/drm.git * libva https://github.com/intel/libva.git Example: ```shell git clone https://anongit.freedesktop.org/git/mesa/drm.git libdrm git clone https://github.com/intel/libva.git libva ``` 2. Compile and install libdrm Example: ```shell cd libdrm ./autogen.sh make -j `nproc` sudo make install ``` 3. Compile and install libva Example: ```shell cd libva export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig ./autogen.sh make -j `nproc` sudo make install ``` 4. During Neo compilation verify libva was discovered ```shell -- Checking for module 'libva>=1.0.0' -- Found libva, version 1.1.0 -- Looking for vaGetLibFunc in va -- Looking for vaGetLibFunc in va - found -- Using libva ``` compute-runtime-20.13.16352/opencl/extensions/000077500000000000000000000000001363734646600210345ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/extensions/public/000077500000000000000000000000001363734646600223125ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/extensions/public/cl_ext_private.h000066400000000000000000000115241363734646600254760ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl.h" /********************************** * Internal only queue properties * **********************************/ // Intel evaluation now. Remove it after approval for public release #define CL_DEVICE_DRIVER_VERSION_INTEL 0x10010 #define CL_DEVICE_DRIVER_VERSION_INTEL_NEO1 0x454E4831 // Driver version is ENH1 /********************************************* * Internal only kernel exec info properties * *********************************************/ #define CL_KERNEL_EXEC_INFO_KERNEL_TYPE_INTEL 0x1000C #define CL_KERNEL_EXEC_INFO_DEFAULT_TYPE_INTEL 0x1000D #define CL_KERNEL_EXEC_INFO_CONCURRENT_TYPE_INTEL 0x1000E /********************************* * cl_intel_debug_info extension * *********************************/ #define cl_intel_debug_info 1 // New queries for clGetProgramInfo: #define CL_PROGRAM_DEBUG_INFO_INTEL 0x4100 #define CL_PROGRAM_DEBUG_INFO_SIZES_INTEL 0x4101 // New queries for clGetKernelInfo: #define CL_KERNEL_BINARY_PROGRAM_INTEL 0x407D #define CL_KERNEL_BINARIES_INTEL 0x4102 #define CL_KERNEL_BINARY_SIZES_INTEL 0x4103 #define CL_KERNEL_BINARY_GPU_ADDRESS_INTEL 0x10010 /******************************************** * event properties for performance counter * ********************************************/ /* performance counter */ #define CL_PROFILING_COMMAND_PERFCOUNTERS_INTEL 0x407F /************************** * Internal only cl types * **************************/ using cl_execution_info_kernel_type_intel = cl_uint; using cl_mem_alloc_flags_intel = cl_bitfield; using cl_mem_properties_intel = cl_bitfield; using cl_mem_flags_intel = cl_mem_flags; using cl_mem_info_intel = cl_uint; using cl_mem_advice_intel = cl_uint; using cl_unified_shared_memory_type_intel = cl_uint; using cl_unified_shared_memory_capabilities_intel = cl_bitfield; /****************************** * Internal only cl_mem_flags * ******************************/ #define CL_MEM_FLAGS_INTEL 0x10001 #define CL_MEM_LOCALLY_UNCACHED_RESOURCE (1 << 18) #define CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE (1 << 25) #define CL_MEM_48BIT_RESOURCE_INTEL (1 << 26) // Used with clEnqueueVerifyMemory #define CL_MEM_COMPARE_EQUAL 0u #define CL_MEM_COMPARE_NOT_EQUAL 1u #define CL_MEM_FORCE_LINEAR_STORAGE_INTEL (1 << 19) #define CL_MEM_FORCE_SHARED_PHYSICAL_MEMORY_INTEL (1 << 20) #define CL_MEM_ALLOCATION_HANDLE_INTEL 0x10050 //Used with createBuffer #define CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL (1 << 23) /****************************** * UNIFIED MEMORY * *******************************/ /* cl_device_info */ #define CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL 0x4190 #define CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL 0x4191 #define CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4192 #define CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4193 #define CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL 0x4194 /* cl_unified_shared_memory_capabilities_intel - bitfield */ #define CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL (1 << 0) #define CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL (1 << 1) #define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL (1 << 2) #define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL (1 << 3) /* cl_mem_properties_intel */ #define CL_MEM_ALLOC_FLAGS_INTEL 0x4195 /* cl_mem_alloc_flags_intel - bitfield */ #define CL_MEM_ALLOC_DEFAULT_INTEL 0 #define CL_MEM_ALLOC_WRITE_COMBINED_INTEL (1 << 0) /* cl_mem_alloc_info_intel */ #define CL_MEM_ALLOC_TYPE_INTEL 0x419A #define CL_MEM_ALLOC_BASE_PTR_INTEL 0x419B #define CL_MEM_ALLOC_SIZE_INTEL 0x419C #define CL_MEM_ALLOC_DEVICE_INTEL 0x419D /* cl_unified_shared_memory_type_intel */ #define CL_MEM_TYPE_UNKNOWN_INTEL 0x4196 #define CL_MEM_TYPE_HOST_INTEL 0x4197 #define CL_MEM_TYPE_DEVICE_INTEL 0x4198 #define CL_MEM_TYPE_SHARED_INTEL 0x4199 /* cl_kernel_exec_info */ #define CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL 0x4200 #define CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL 0x4201 #define CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL 0x4202 #define CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL 0x4203 #define CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_OLDEST_FIRST_INTEL 0x10022 #define CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL 0x10023 #define CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_AFTER_DEPENDENCY_ROUND_ROBIN_INTEL 0x10024 #define CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_INTEL 0x10025 /* cl_command_type */ #define CL_COMMAND_MEMSET_INTEL 0x4204 #define CL_COMMAND_MEMFILL_INTEL 0x4204 #define CL_COMMAND_MEMCPY_INTEL 0x4205 #define CL_COMMAND_MIGRATEMEM_INTEL 0x4206 #define CL_COMMAND_MEMADVISE_INTEL 0x4207 /****************************** * SLICE COUNT SELECTING * *******************************/ /* cl_device_info */ #define CL_DEVICE_SLICE_COUNT_INTEL 0x10020 /* cl_queue_properties */ #define CL_QUEUE_SLICE_COUNT_INTEL 0x10021 compute-runtime-20.13.16352/opencl/extensions/public/cl_gl_private_intel.h000066400000000000000000000076671363734646600265100ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #ifndef OPENCL_SHARED_RESOURCE #define OPENCL_SHARED_RESOURCE #include "GL/gl.h" #include "GmmLib.h" #include "third_party/opencl_headers/CL/cl_gl.h" // Used for creating CL resources from GL resources typedef struct _tagCLGLResourceInfo { GLuint name; GLenum target; unsigned int globalShareHandle; GMM_RESOURCE_INFO *pGmmResInfo; /// Pointer to GMMResInfo from GL that will be copied in CL (GL) GLenum glFormat; GLint glInternalFormat; GLuint glHWFormat; GLboolean isEmulatedTarget; GLuint borderWidth; GLint textureBufferWidth; GLint textureBufferSize; GLint textureBufferOffset; GLboolean oglSynchronized; GMM_STATUS status; unsigned int globalShareHandleMCS; GMM_RESOURCE_INFO *pGmmResInfoMCS; GLint numberOfSamples; // Number of samples as specified by API GLvoid *pReleaseData; } CL_GL_RESOURCE_INFO, *PCL_GL_RESOURCE_INFO; // Used for creating GL resources from CL resources typedef struct _tagGLCLResourceInfo { unsigned int globalShareHandle; unsigned int clChannelOrder; unsigned int clChannelDataType; size_t imageWidth; size_t imageHeight; size_t rowPitch; size_t slicePitch; unsigned int mipCount; bool isCreatedFromBuffer; unsigned int arraySize; unsigned int depth; } GL_CL_RESOURCE_INFO, *PGL_CL_RESOURCE_INFO; typedef struct _tagCLGLBufferInfo { GLenum bufferName; unsigned int globalShareHandle; GMM_RESOURCE_INFO *pGmmResInfo; /// Pointer to GMMResInfo from GL that will be copied in CL (GL) GLvoid *pSysMem; GLint bufferSize; GLint bufferOffset; GLboolean oglSynchronized; GMM_STATUS status; GLvoid *pReleaseData; } CL_GL_BUFFER_INFO, *PCL_GL_BUFFER_INFO; #ifdef _WIN32 // Used for creating GL sync objects from CL events typedef struct _tagCLGLSyncInfo { _tagCLGLSyncInfo() : eventName(NULL), event((HANDLE)0), submissionEventName(NULL), submissionEvent((HANDLE)0), clientSynchronizationObject((D3DKMT_HANDLE)0), serverSynchronizationObject((D3DKMT_HANDLE)0), submissionSynchronizationObject((D3DKMT_HANDLE)0), hContextToBlock((D3DKMT_HANDLE)0), waitCalled(false) { } char *eventName; HANDLE event; char *submissionEventName; HANDLE submissionEvent; D3DKMT_HANDLE clientSynchronizationObject; D3DKMT_HANDLE serverSynchronizationObject; D3DKMT_HANDLE submissionSynchronizationObject; D3DKMT_HANDLE hContextToBlock; bool waitCalled; } CL_GL_SYNC_INFO, *PCL_GL_SYNC_INFO; // Used for creating CL events from GL sync objects typedef struct _tagGLCLSyncInfo { __GLsync *syncName; GLvoid *pSync; } GL_CL_SYNC_INFO, *PGL_CL_SYNC_INFO; #endif typedef int(__stdcall *pfn_clRetainEvent)(struct _cl_event *event); typedef int(__stdcall *pfn_clReleaseEvent)(struct _cl_event *event); typedef int(__stdcall *INTELpfn_clGetCLObjectInfoINTEL)(struct _cl_mem *pMemObj, void *pResourceInfo); typedef int(__stdcall *INTELpfn_clEnqueueMarkerWithSyncObjectINTEL)( struct _cl_command_queue *pCommandQueue, struct _cl_event **pOclEvent, struct _cl_context **pOclContext); typedef struct _tagCLGLDispatch { pfn_clRetainEvent clRetainEvent; pfn_clReleaseEvent clReleaseEvent; INTELpfn_clGetCLObjectInfoINTEL clGetCLObjectInfoINTEL; INTELpfn_clEnqueueMarkerWithSyncObjectINTEL clEnqueueMarkerWithSyncObjectINTEL; } CL_GL_DISPATCH, *PCL_GL_DISPATCH; #ifdef _WIN32 typedef struct _tagCLGLContextInfo { D3DKMT_HANDLE DeviceHandle; D3DKMT_HANDLE ContextHandle; } CL_GL_CONTEXT_INFO, *PCL_GL_CONTEXT_INFO; typedef struct _tagCLGLEvent { struct { void *dispatch1; void *dispatch2; } dispatch; void *pObj; void *CLCmdQ; struct _cl_context *CLCtx; unsigned int IsUserEvent; PCL_GL_SYNC_INFO pSyncInfo; } CL_GL_EVENT, *PCL_GL_EVENT; #endif //_WIN32 #endif compute-runtime-20.13.16352/opencl/source/000077500000000000000000000000001363734646600201355ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/CMakeLists.txt000066400000000000000000000156371363734646600227110ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(POLICY CMP0042) cmake_policy(SET CMP0042 NEW) endif() if(POLICY CMP0063) cmake_policy(SET CMP0063 NEW) endif() project(neo) set(MSVC_DEF_ADDITIONAL_EXPORTS "") if(NOT TARGET ${BIKSIM_LIB_NAME}) add_subdirectory(builtin_kernels_simulation) endif() hide_subdir(builtin_kernels_simulation) hide_subdir(dll) hide_subdir(instrumentation) add_library(${NEO_STATIC_LIB_NAME} STATIC EXCLUDE_FROM_ALL ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/enable_gens.cmake $ ) add_subdirectories() add_subdirectory(instrumentation${NEO__INSTRUMENTATION_DIR_SUFFIX}) include(enable_gens.cmake) # Enable SSE4/AVX2 options for files that need them if(MSVC) set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/command_queue/local_id_gen_avx2.cpp PROPERTIES COMPILE_FLAGS /arch:AVX2) else() set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/command_queue/local_id_gen_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/command_queue/local_id_gen_sse4.cpp PROPERTIES COMPILE_FLAGS -msse4.2) endif() if(WIN32) if("${IGDRCL_OPTION__BITS}" STREQUAL "32" ) set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /SAFESEH:NO") endif() endif() if(DEFINED AUB_STREAM_DIR) target_link_libraries(${NEO_STATIC_LIB_NAME} ${AUB_STREAM_PROJECT_NAME}) endif() target_include_directories(${NEO_STATIC_LIB_NAME} PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${KMDAF_HEADERS_DIR} ) target_include_directories(${NEO_STATIC_LIB_NAME} PUBLIC ${ENGINE_NODE_DIR} ${KHRONOS_HEADERS_DIR} ${KHRONOS_GL_HEADERS_DIR} ${CIF_BASE_DIR} ${IGC_OCL_ADAPTOR_DIR} ${VISA_INCLUDE_DIR} ${NEO__IGC_INCLUDE_DIR} ${THIRD_PARTY_DIR} ${NEO__GMM_INCLUDE_DIR} ) target_compile_definitions(${NEO_STATIC_LIB_NAME} PUBLIC "" ${NEO__IGC_COMPILE_DEFINITIONS} ) if(WIN32) set(IGDRCL_EXTRA_LIBS Ws2_32) target_include_directories(${NEO_STATIC_LIB_NAME} PUBLIC ${WDK_INCLUDE_PATHS} ${NEO_SHARED_DIRECTORY}/os_interface/windows ${NEO_SOURCE_DIR}/opencl/source/os_interface/windows ) target_compile_definitions(${NEO_STATIC_LIB_NAME} PRIVATE OGL=1) else() set(IGDRCL_EXTRA_LIBS dl pthread) target_include_directories(${NEO_STATIC_LIB_NAME} PUBLIC ${NEO_SHARED_DIRECTORY}/os_interface/linux ${NEO_SOURCE_DIR}/opencl/source/os_interface/linux ${I915_INCLUDES_DIR} ) endif() target_compile_definitions(${NEO_STATIC_LIB_NAME} PUBLIC GMM_LIB_DLL DEFAULT_PLATFORM=${DEFAULT_SUPPORTED_PLATFORM}) list(APPEND LIB_FLAGS_DEFINITIONS -DCIF_HEADERS_ONLY_BUILD ${SUPPORTED_GEN_FLAGS_DEFINITONS}) target_compile_definitions(${NEO_STATIC_LIB_NAME} PUBLIC ${LIB_FLAGS_DEFINITIONS}) set_target_properties(${NEO_STATIC_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) set_property(TARGET ${NEO_STATIC_LIB_NAME} APPEND_STRING PROPERTY COMPILE_FLAGS ${ASAN_FLAGS} ${TSAN_FLAGS}) set_target_properties(${NEO_STATIC_LIB_NAME} PROPERTIES FOLDER "opencl runtime") set(EXPORTS_FILENAME "") if(WIN32) set(EXPORTS_FILENAME "${CMAKE_CURRENT_BINARY_DIR}/OpenCLExports${IGDRCL_OPTION__BITS}.def") set(MSVC_DEF_LIB_NAME "igdrcl${IGDRCL_OPTION__BITS}") set(MSVC_DEF_HEADER "This file was generated during CMake project configuration - please don't edit") configure_file("${CMAKE_CURRENT_SOURCE_DIR}/dll/windows/OpenCLExports.def.in" "${EXPORTS_FILENAME}") endif(WIN32) if(${GENERATE_EXECUTABLE}) list(APPEND NEO_DYNAMIC_LIB__TARGET_OBJECTS $ $ $ $ $ ) if(DEFINED AUB_STREAM_DIR) list(APPEND NEO_DYNAMIC_LIB__TARGET_OBJECTS $) endif() add_library(${NEO_DYNAMIC_LIB_NAME} SHARED ${NEO_DYNAMIC_LIB__TARGET_OBJECTS} ${NEO_SOURCE_DIR}/opencl/source/aub/aub_stream_interface.cpp ) if(UNIX) install(FILES $ DESTINATION ${CMAKE_INSTALL_LIBDIR}/intel-opencl COMPONENT opencl ) set_property(GLOBAL APPEND PROPERTY NEO_OCL_COMPONENTS_LIST "opencl") endif() if(NOT DISABLED_GTPIN_SUPPORT) macro(macro_for_each_gen) target_sources(${NEO_DYNAMIC_LIB_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/${GEN_TYPE_LOWER}/gtpin_setup_${GEN_TYPE_LOWER}.cpp) endmacro() apply_macro_for_each_gen("SUPPORTED") endif() add_subdirectory(dll) target_link_libraries(${NEO_DYNAMIC_LIB_NAME} ${NEO_STATIC_LIB_NAME} ${NEO_SHARED_LIB} ${NEO_STATIC_LIB_NAME} ${NEO_SHARED_LIB} ${IGDRCL_EXTRA_LIBS}) target_include_directories(${NEO_DYNAMIC_LIB_NAME} BEFORE PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${AUB_STREAM_DIR}/.. ) if(WIN32) target_link_libraries(${NEO_DYNAMIC_LIB_NAME} dxgi) add_dependencies(${NEO_DYNAMIC_LIB_NAME} ${GMM_TARGET_NAME}) target_sources(${NEO_DYNAMIC_LIB_NAME} PRIVATE ${NEO_SHARED_DIRECTORY}/os_interface/windows/gmm_interface_win.cpp ) else() target_link_libraries(${NEO_DYNAMIC_LIB_NAME} ${GMM_LINK_NAME}) target_include_directories(${NEO_DYNAMIC_LIB_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/dll/linux/devices${BRANCH_DIR_SUFFIX} ) set_property(TARGET ${NEO_DYNAMIC_LIB_NAME} APPEND_STRING PROPERTY LINK_FLAGS " -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/dll/linux/ocl.exports" ) set_property(TARGET ${NEO_DYNAMIC_LIB_NAME} APPEND_STRING PROPERTY LINK_FLAGS " -Wl,-Bsymbolic" ) set_property(TARGET ${NEO_DYNAMIC_LIB_NAME} APPEND_STRING PROPERTY COMPILE_FLAGS ${ASAN_FLAGS}) target_sources(${NEO_DYNAMIC_LIB_NAME} PRIVATE ${NEO_SHARED_DIRECTORY}/os_interface/linux/gmm_interface_linux.cpp ) endif() set_target_properties(${NEO_DYNAMIC_LIB_NAME} PROPERTIES DEBUG_OUTPUT_NAME "${NEO_DLL_NAME_BASE}${IGDRCL_NAME_POSTFIX}${IGDRCL_OPTION__BITS}" RELEASE_OUTPUT_NAME "${NEO_DLL_NAME_BASE}${IGDRCL_NAME_POSTFIX}${IGDRCL_OPTION__BITS}" RELEASEINTERNAL_OUTPUT_NAME "${NEO_DLL_NAME_BASE}${IGDRCL_NAME_POSTFIX}${IGDRCL_OPTION__BITS}" OUTPUT_NAME "${NEO_DLL_NAME_BASE}${IGDRCL_NAME_POSTFIX}${IGDRCL_OPTION__BITS}" ) set_target_properties(${NEO_DYNAMIC_LIB_NAME} PROPERTIES FOLDER "opencl runtime") create_project_source_tree_with_exports(${NEO_DYNAMIC_LIB_NAME} "${EXPORTS_FILENAME}") endif(${GENERATE_EXECUTABLE}) create_project_source_tree(${NEO_STATIC_LIB_NAME}) if(UNIX AND NOT (TARGET clang-tidy)) add_custom_target(clang-tidy DEPENDS scheduler ) add_custom_command( TARGET clang-tidy POST_BUILD COMMAND echo clang-tidy... COMMAND find ${CMAKE_CURRENT_SOURCE_DIR} -name *.cpp | xargs --verbose -I{} -P`nproc` clang-tidy-8 -p ${NEO_BINARY_DIR} {} | tee ${NEO_BINARY_DIR}/clang-tidy.log WORKING_DIRECTORY ${NEO_SOURCE_DIR} ) endif() set(IGDRCL_EXTRA_LIBS ${IGDRCL_EXTRA_LIBS} PARENT_SCOPE) compute-runtime-20.13.16352/opencl/source/accelerators/000077500000000000000000000000001363734646600226045ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/accelerators/CMakeLists.txt000066400000000000000000000010311363734646600253370ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_ACCELERATORS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/intel_accelerator.cpp ${CMAKE_CURRENT_SOURCE_DIR}/intel_accelerator.h ${CMAKE_CURRENT_SOURCE_DIR}/intel_motion_estimation.cpp ${CMAKE_CURRENT_SOURCE_DIR}/intel_motion_estimation.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_ACCELERATORS}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_ACCELERATORS ${RUNTIME_SRCS_ACCELERATORS}) compute-runtime-20.13.16352/opencl/source/accelerators/intel_accelerator.cpp000066400000000000000000000034031363734646600267670ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/accelerators/intel_accelerator.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/string.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/get_info_status_mapper.h" namespace NEO { cl_int IntelAccelerator::getInfo(cl_accelerator_info_intel paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { cl_int result = CL_SUCCESS; size_t ret = 0; switch (paramName) { case CL_ACCELERATOR_DESCRIPTOR_INTEL: { ret = getDescriptorSize(); result = changeGetInfoStatusToCLResultType(::getInfo(paramValue, paramValueSize, getDescriptor(), ret)); } break; case CL_ACCELERATOR_REFERENCE_COUNT_INTEL: { auto v = getReference(); ret = sizeof(cl_uint); result = changeGetInfoStatusToCLResultType(::getInfo(paramValue, paramValueSize, &v, ret)); } break; case CL_ACCELERATOR_CONTEXT_INTEL: { ret = sizeof(cl_context); cl_context ctx = static_cast(pContext); result = changeGetInfoStatusToCLResultType(::getInfo(paramValue, paramValueSize, &ctx, ret)); } break; case CL_ACCELERATOR_TYPE_INTEL: { auto v = getTypeId(); ret = sizeof(cl_accelerator_type_intel); result = changeGetInfoStatusToCLResultType(::getInfo(paramValue, paramValueSize, &v, ret)); } break; default: result = CL_INVALID_VALUE; break; } if (paramValueSizeRet) { *paramValueSizeRet = ret; } return result; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/accelerators/intel_accelerator.h000066400000000000000000000035431363734646600264410ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/api/cl_types.h" #include "opencl/source/helpers/base_object.h" //------------------------------------------------------------------------------ // cl_intel_accelerator Class Stuff //------------------------------------------------------------------------------ namespace NEO { class Context; typedef struct TagAcceleratorObjParams { cl_uint AcceleratorType; cl_uint AcceleratorFlags; } OCLRT_ACCELERATOR_OBJECT_PARAMS, *POCLRT_ACCELERATOR_OBJECT_PARAMS; template <> struct OpenCLObjectMapper<_cl_accelerator_intel> { typedef class IntelAccelerator DerivedType; }; class IntelAccelerator : public BaseObject<_cl_accelerator_intel> { public: IntelAccelerator(Context *context, cl_accelerator_type_intel typeId, size_t descriptorSize, const void *descriptor) : pContext(context), typeId(typeId), descriptorSize(descriptorSize), pDescriptor(descriptor) {} IntelAccelerator() {} static const cl_ulong objectMagic = 0xC6D72FA2E81EA569ULL; cl_accelerator_type_intel getTypeId() const { return typeId; } size_t getDescriptorSize() const { return descriptorSize; } const void *getDescriptor() const { return pDescriptor; } cl_int getInfo(cl_accelerator_info_intel paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const; protected: Context *pContext = nullptr; const cl_accelerator_type_intel typeId = -1; const size_t descriptorSize = 0; const void *pDescriptor = nullptr; private: }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/accelerators/intel_motion_estimation.cpp000066400000000000000000000034531363734646600302510ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/accelerators/intel_motion_estimation.h" namespace NEO { cl_int VmeAccelerator::validateVmeArgs(Context *context, cl_accelerator_type_intel typeId, size_t descriptorSize, const void *descriptor) { const cl_motion_estimation_desc_intel *descObj = (const cl_motion_estimation_desc_intel *)descriptor; DEBUG_BREAK_IF(!context); DEBUG_BREAK_IF(typeId != CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL); if ((descriptorSize != sizeof(cl_motion_estimation_desc_intel)) || (descriptor == NULL)) { return CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL; } switch (descObj->mb_block_type) { case CL_ME_MB_TYPE_16x16_INTEL: case CL_ME_MB_TYPE_8x8_INTEL: case CL_ME_MB_TYPE_4x4_INTEL: break; default: return CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL; } switch (descObj->subpixel_mode) { case CL_ME_SUBPIXEL_MODE_INTEGER_INTEL: case CL_ME_SUBPIXEL_MODE_HPEL_INTEL: case CL_ME_SUBPIXEL_MODE_QPEL_INTEL: break; default: return CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL; } switch (descObj->sad_adjust_mode) { case CL_ME_SAD_ADJUST_MODE_NONE_INTEL: case CL_ME_SAD_ADJUST_MODE_HAAR_INTEL: break; default: return CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL; } switch (descObj->search_path_type) { case CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL: case CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL: case CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL: break; default: return CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL; } return CL_SUCCESS; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/accelerators/intel_motion_estimation.h000066400000000000000000000034011363734646600277070ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/accelerators/intel_accelerator.h" //------------------------------------------------------------------------------ // VmeAccelerator Class Stuff //------------------------------------------------------------------------------ namespace NEO { class Context; class VmeAccelerator : public IntelAccelerator { public: static VmeAccelerator *create(Context *context, cl_accelerator_type_intel typeId, size_t descriptorSize, const void *descriptor, cl_int &result) { result = validateVmeArgs(context, typeId, descriptorSize, descriptor); VmeAccelerator *acc = nullptr; if (result == CL_SUCCESS) { acc = new VmeAccelerator( context, typeId, descriptorSize, descriptor); } return acc; } protected: private: VmeAccelerator(Context *context, cl_accelerator_type_intel typeId, size_t descriptorSize, const void *descriptor) : IntelAccelerator(context, typeId, descriptorSize, descriptor) { } static cl_int validateVmeArgs(Context *context, cl_accelerator_type_intel typeId, size_t descriptorSize, const void *descriptor); }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/api/000077500000000000000000000000001363734646600207065ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/api/CMakeLists.txt000066400000000000000000000012441363734646600234470ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_API ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/additional_extensions.cpp ${CMAKE_CURRENT_SOURCE_DIR}/additional_extensions.h ${CMAKE_CURRENT_SOURCE_DIR}/api.cpp ${CMAKE_CURRENT_SOURCE_DIR}/api.h ${CMAKE_CURRENT_SOURCE_DIR}/api_enter.h ${CMAKE_CURRENT_SOURCE_DIR}/cl_types.h ${CMAKE_CURRENT_SOURCE_DIR}/dispatch.cpp ${CMAKE_CURRENT_SOURCE_DIR}/dispatch.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_API}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_API ${RUNTIME_SRCS_API}) add_subdirectories() compute-runtime-20.13.16352/opencl/source/api/additional_extensions.cpp000066400000000000000000000004331363734646600260010ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/api/additional_extensions.h" namespace NEO { void *CL_API_CALL getAdditionalExtensionFunctionAddress(const char *funcName) { return nullptr; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/api/additional_extensions.h000066400000000000000000000003631363734646600254500ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "config.h" #include namespace NEO { void *CL_API_CALL getAdditionalExtensionFunctionAddress(const char *funcName); } compute-runtime-20.13.16352/opencl/source/api/api.cpp000066400000000000000000006553431363734646600222030ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "api.h" #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/kernel_helpers.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/utilities/api_intercept.h" #include "shared/source/utilities/stackvec.h" #include "opencl/source/accelerators/intel_motion_estimation.h" #include "opencl/source/api/additional_extensions.h" #include "opencl/source/aub/aub_center.h" #include "opencl/source/built_ins/vme_builtin.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/context/driver_diagnostics.h" #include "opencl/source/device_queue/device_queue.h" #include "opencl/source/event/user_event.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/helpers/get_info_status_mapper.h" #include "opencl/source/helpers/mem_properties_parser_helper.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/helpers/queue_helpers.h" #include "opencl/source/helpers/validators.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/kernel/kernel_info_cl.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/mem_obj/mem_obj_helper.h" #include "opencl/source/mem_obj/pipe.h" #include "opencl/source/platform/platform.h" #include "opencl/source/program/program.h" #include "opencl/source/sampler/sampler.h" #include "opencl/source/sharings/sharing_factory.h" #include "opencl/source/tracing/tracing_api.h" #include "opencl/source/tracing/tracing_notify.h" #include "CL/cl.h" #include "config.h" #include #include using namespace NEO; cl_int CL_API_CALL clGetPlatformIDs(cl_uint numEntries, cl_platform_id *platforms, cl_uint *numPlatforms) { TRACING_ENTER(clGetPlatformIDs, &numEntries, &platforms, &numPlatforms); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("numEntries", numEntries, "platforms", platforms, "numPlatforms", numPlatforms); do { // if platforms is nullptr, we must return the number of valid platforms we // support in the num_platforms variable (if it is non-nullptr) if ((platforms == nullptr) && (numPlatforms == nullptr)) { retVal = CL_INVALID_VALUE; break; } // platform != nullptr and num_entries == 0 is defined by spec as invalid if (numEntries == 0 && platforms != nullptr) { retVal = CL_INVALID_VALUE; break; } static std::mutex mutex; std::unique_lock lock(mutex); if (platformsImpl.empty()) { auto executionEnvironment = new ExecutionEnvironment(); executionEnvironment->incRefInternal(); auto allDevices = DeviceFactory::createDevices(*executionEnvironment); executionEnvironment->decRefInternal(); if (allDevices.empty()) { retVal = CL_OUT_OF_HOST_MEMORY; break; } auto groupedDevices = Platform::groupDevices(std::move(allDevices)); for (auto &deviceVector : groupedDevices) { auto pPlatform = Platform::createFunc(*executionEnvironment); if (!pPlatform || !pPlatform->initialize(std::move(deviceVector))) { retVal = CL_OUT_OF_HOST_MEMORY; break; } platformsImpl.push_back(std::move(pPlatform)); } if (retVal != CL_SUCCESS) { break; } } cl_uint numPlatformsToExpose = std::min(numEntries, static_cast(platformsImpl.size())); if (numEntries == 0) { numPlatformsToExpose = static_cast(platformsImpl.size()); } if (platforms) { for (auto i = 0u; i < numPlatformsToExpose; i++) { platforms[i] = platformsImpl[i].get(); } } if (numPlatforms) { *numPlatforms = numPlatformsToExpose; } } while (false); TRACING_EXIT(clGetPlatformIDs, &retVal); return retVal; } CL_API_ENTRY cl_int CL_API_CALL clIcdGetPlatformIDsKHR(cl_uint numEntries, cl_platform_id *platforms, cl_uint *numPlatforms) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("numEntries", numEntries, "platforms", platforms, "numPlatforms", numPlatforms); retVal = clGetPlatformIDs(numEntries, platforms, numPlatforms); return retVal; } cl_int CL_API_CALL clGetPlatformInfo(cl_platform_id platform, cl_platform_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetPlatformInfo, &platform, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retVal = CL_INVALID_PLATFORM; API_ENTER(&retVal); DBG_LOG_INPUTS("platform", platform, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); auto pPlatform = castToObject(platform); if (pPlatform) { retVal = pPlatform->getInfo(paramName, paramValueSize, paramValue, paramValueSizeRet); } TRACING_EXIT(clGetPlatformInfo, &retVal); return retVal; } cl_int CL_API_CALL clGetDeviceIDs(cl_platform_id platform, cl_device_type deviceType, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) { TRACING_ENTER(clGetDeviceIDs, &platform, &deviceType, &numEntries, &devices, &numDevices); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("platform", platform, "deviceType", deviceType, "numEntries", numEntries, "devices", devices, "numDevices", numDevices); const cl_device_type validType = CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_CPU | CL_DEVICE_TYPE_ACCELERATOR | CL_DEVICE_TYPE_DEFAULT | CL_DEVICE_TYPE_CUSTOM; Platform *pPlatform = nullptr; do { /* Check parameter consistency */ if (devices == nullptr && numDevices == nullptr) { retVal = CL_INVALID_VALUE; break; } if (devices && numEntries == 0) { retVal = CL_INVALID_VALUE; break; } if ((deviceType & validType) == 0) { retVal = CL_INVALID_DEVICE_TYPE; break; } if (platform != nullptr) { pPlatform = castToObject(platform); if (pPlatform == nullptr) { retVal = CL_INVALID_PLATFORM; break; } } else { cl_uint numPlatforms = 0u; retVal = clGetPlatformIDs(0, nullptr, &numPlatforms); if (numPlatforms == 0u) { retVal = CL_DEVICE_NOT_FOUND; break; } pPlatform = platformsImpl[0].get(); } DEBUG_BREAK_IF(pPlatform->isInitialized() != true); cl_uint numDev = static_cast(pPlatform->getNumDevices()); if (numDev == 0) { retVal = CL_DEVICE_NOT_FOUND; break; } if (DebugManager.flags.LimitAmountOfReturnedDevices.get()) { numDev = std::min(static_cast(DebugManager.flags.LimitAmountOfReturnedDevices.get()), numDev); } if (deviceType == CL_DEVICE_TYPE_ALL) { /* According to Spec, set it to all except TYPE_CUSTOM. */ deviceType = CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_CPU | CL_DEVICE_TYPE_ACCELERATOR | CL_DEVICE_TYPE_DEFAULT; } else if (deviceType == CL_DEVICE_TYPE_DEFAULT) { /* We just set it to GPU now. */ deviceType = CL_DEVICE_TYPE_GPU; } cl_uint retNum = 0; for (auto platformDeviceIndex = 0u; platformDeviceIndex < numDev; platformDeviceIndex++) { ClDevice *device = pPlatform->getClDevice(platformDeviceIndex); UNRECOVERABLE_IF(device == nullptr); if (deviceType & device->getDeviceInfo().deviceType) { if (devices) { if (retNum >= numEntries) { break; } devices[retNum] = device; } retNum++; } } if (numDevices) { *numDevices = retNum; } /* If no suitable device, set a error. */ if (retNum == 0) retVal = CL_DEVICE_NOT_FOUND; } while (false); TRACING_EXIT(clGetDeviceIDs, &retVal); return retVal; } cl_int CL_API_CALL clGetDeviceInfo(cl_device_id device, cl_device_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetDeviceInfo, &device, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retVal = CL_INVALID_DEVICE; API_ENTER(&retVal); DBG_LOG_INPUTS("clDevice", device, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); ClDevice *pDevice = castToObject(device); if (pDevice != nullptr) { retVal = pDevice->getDeviceInfo(paramName, paramValueSize, paramValue, paramValueSizeRet); } TRACING_EXIT(clGetDeviceInfo, &retVal); return retVal; } cl_int CL_API_CALL clCreateSubDevices(cl_device_id inDevice, const cl_device_partition_property *properties, cl_uint numDevices, cl_device_id *outDevices, cl_uint *numDevicesRet) { ClDevice *pInDevice = castToObject(inDevice); if (pInDevice == nullptr) { return CL_INVALID_DEVICE; } auto subDevicesCount = pInDevice->getNumAvailableDevices(); if (subDevicesCount <= 1) { return CL_DEVICE_PARTITION_FAILED; } if ((properties == nullptr) || (properties[0] != CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) || ((properties[1] != CL_DEVICE_AFFINITY_DOMAIN_NUMA) && (properties[1] != CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE)) || (properties[2] != 0)) { return CL_INVALID_VALUE; } if (numDevicesRet != nullptr) { *numDevicesRet = subDevicesCount; } if (outDevices == nullptr) { return CL_SUCCESS; } if (numDevices < subDevicesCount) { return CL_INVALID_VALUE; } for (uint32_t i = 0; i < subDevicesCount; i++) { auto pClDevice = pInDevice->getDeviceById(i); pClDevice->retainApi(); outDevices[i] = pClDevice; } return CL_SUCCESS; } cl_int CL_API_CALL clRetainDevice(cl_device_id device) { TRACING_ENTER(clRetainDevice, &device); cl_int retVal = CL_INVALID_DEVICE; API_ENTER(&retVal); DBG_LOG_INPUTS("device", device); auto pDevice = castToObject(device); if (pDevice) { pDevice->retainApi(); retVal = CL_SUCCESS; } TRACING_EXIT(clRetainDevice, &retVal); return retVal; } cl_int CL_API_CALL clReleaseDevice(cl_device_id device) { TRACING_ENTER(clReleaseDevice, &device); cl_int retVal = CL_INVALID_DEVICE; API_ENTER(&retVal); DBG_LOG_INPUTS("device", device); auto pDevice = castToObject(device); if (pDevice) { pDevice->releaseApi(); retVal = CL_SUCCESS; } TRACING_EXIT(clReleaseDevice, &retVal); return retVal; } cl_context CL_API_CALL clCreateContext(const cl_context_properties *properties, cl_uint numDevices, const cl_device_id *devices, void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *), void *userData, cl_int *errcodeRet) { TRACING_ENTER(clCreateContext, &properties, &numDevices, &devices, &funcNotify, &userData, &errcodeRet); cl_int retVal = CL_SUCCESS; cl_context context = nullptr; API_ENTER(&retVal); DBG_LOG_INPUTS("properties", properties, "numDevices", numDevices, "cl_device_id", devices, "funcNotify", funcNotify, "userData", userData); do { if (devices == nullptr) { /* Must have device. */ retVal = CL_INVALID_VALUE; break; } /* validateObjects make sure numDevices != 0. */ retVal = validateObjects(DeviceList(numDevices, devices)); if (retVal != CL_SUCCESS) break; if (funcNotify == nullptr && userData != nullptr) { retVal = CL_INVALID_VALUE; break; } ClDeviceVector allDevs(devices, numDevices); context = Context::create(properties, allDevs, funcNotify, userData, retVal); if (context != nullptr) { gtpinNotifyContextCreate(context); } } while (false); if (errcodeRet) { *errcodeRet = retVal; } TRACING_EXIT(clCreateContext, &context); return context; } cl_context CL_API_CALL clCreateContextFromType(const cl_context_properties *properties, cl_device_type deviceType, void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *), void *userData, cl_int *errcodeRet) { TRACING_ENTER(clCreateContextFromType, &properties, &deviceType, &funcNotify, &userData, &errcodeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("properties", properties, "deviceType", deviceType, "funcNotify", funcNotify, "userData", userData); Context *pContext = nullptr; do { if (funcNotify == nullptr && userData != nullptr) { retVal = CL_INVALID_VALUE; break; } cl_uint numDevices = 0; /* Query the number of device first. */ retVal = clGetDeviceIDs(nullptr, deviceType, 0, nullptr, &numDevices); if (retVal != CL_SUCCESS) { break; } DEBUG_BREAK_IF(numDevices <= 0); StackVec supportedDevs; supportedDevs.resize(numDevices); retVal = clGetDeviceIDs(nullptr, deviceType, numDevices, supportedDevs.begin(), nullptr); DEBUG_BREAK_IF(retVal != CL_SUCCESS); ClDeviceVector allDevs(supportedDevs.begin(), numDevices); pContext = Context::create(properties, allDevs, funcNotify, userData, retVal); if (pContext != nullptr) { gtpinNotifyContextCreate((cl_context)pContext); } } while (false); if (errcodeRet) { *errcodeRet = retVal; } TRACING_EXIT(clCreateContextFromType, (cl_context *)&pContext); return pContext; } cl_int CL_API_CALL clRetainContext(cl_context context) { TRACING_ENTER(clRetainContext, &context); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context); Context *pContext = castToObject(context); if (pContext) { pContext->retain(); TRACING_EXIT(clRetainContext, &retVal); return retVal; } retVal = CL_INVALID_CONTEXT; TRACING_EXIT(clRetainContext, &retVal); return retVal; } cl_int CL_API_CALL clReleaseContext(cl_context context) { TRACING_ENTER(clReleaseContext, &context); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context); Context *pContext = castToObject(context); if (pContext) { pContext->release(); TRACING_EXIT(clReleaseContext, &retVal); return retVal; } retVal = CL_INVALID_CONTEXT; TRACING_EXIT(clReleaseContext, &retVal); return retVal; } cl_int CL_API_CALL clGetContextInfo(cl_context context, cl_context_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetContextInfo, &context, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); auto retVal = CL_INVALID_CONTEXT; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); auto pContext = castToObject(context); if (pContext) { retVal = pContext->getInfo(paramName, paramValueSize, paramValue, paramValueSizeRet); } TRACING_EXIT(clGetContextInfo, &retVal); return retVal; } cl_command_queue CL_API_CALL clCreateCommandQueue(cl_context context, cl_device_id device, const cl_command_queue_properties properties, cl_int *errcodeRet) { TRACING_ENTER(clCreateCommandQueue, &context, &device, (cl_command_queue_properties *)&properties, &errcodeRet); cl_command_queue commandQueue = nullptr; ErrorCodeHelper err(errcodeRet, CL_SUCCESS); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "device", device, "properties", properties); do { if (properties & ~(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE)) { retVal = CL_INVALID_VALUE; break; } Context *pContext = nullptr; ClDevice *pDevice = nullptr; retVal = validateObjects( WithCastToInternal(context, &pContext), WithCastToInternal(device, &pDevice)); if (retVal != CL_SUCCESS) { break; } if (!pContext->isDeviceAssociated(*pDevice)) { retVal = CL_INVALID_DEVICE; break; } cl_queue_properties props[] = { CL_QUEUE_PROPERTIES, properties, 0}; commandQueue = CommandQueue::create(pContext, pDevice, props, false, retVal); if (pContext->isProvidingPerformanceHints()) { pContext->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, DRIVER_CALLS_INTERNAL_CL_FLUSH); if (castToObjectOrAbort(commandQueue)->isProfilingEnabled()) { pContext->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, PROFILING_ENABLED); if (pDevice->getDeviceInfo().preemptionSupported && pDevice->getHardwareInfo().platform.eProductFamily < IGFX_SKYLAKE) { pContext->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, PROFILING_ENABLED_WITH_DISABLED_PREEMPTION); } } } } while (false); err.set(retVal); DBG_LOG_INPUTS("commandQueue", commandQueue); TRACING_EXIT(clCreateCommandQueue, &commandQueue); return commandQueue; } cl_int CL_API_CALL clRetainCommandQueue(cl_command_queue commandQueue) { TRACING_ENTER(clRetainCommandQueue, &commandQueue); cl_int retVal = CL_INVALID_COMMAND_QUEUE; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue); retainQueue(commandQueue, retVal); if (retVal == CL_SUCCESS) { TRACING_EXIT(clRetainCommandQueue, &retVal); return retVal; } // if host queue not found - try to query device queue retainQueue(commandQueue, retVal); TRACING_EXIT(clRetainCommandQueue, &retVal); return retVal; } cl_int CL_API_CALL clReleaseCommandQueue(cl_command_queue commandQueue) { TRACING_ENTER(clReleaseCommandQueue, &commandQueue); cl_int retVal = CL_INVALID_COMMAND_QUEUE; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue); releaseQueue(commandQueue, retVal); if (retVal == CL_SUCCESS) { TRACING_EXIT(clReleaseCommandQueue, &retVal); return retVal; } // if host queue not found - try to query device queue releaseQueue(commandQueue, retVal); TRACING_EXIT(clReleaseCommandQueue, &retVal); return retVal; } cl_int CL_API_CALL clGetCommandQueueInfo(cl_command_queue commandQueue, cl_command_queue_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetCommandQueueInfo, &commandQueue, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retVal = CL_INVALID_COMMAND_QUEUE; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); getQueueInfo(commandQueue, paramName, paramValueSize, paramValue, paramValueSizeRet, retVal); // if host queue not found - try to query device queue if (retVal == CL_SUCCESS) { TRACING_EXIT(clGetCommandQueueInfo, &retVal); return retVal; } getQueueInfo(commandQueue, paramName, paramValueSize, paramValue, paramValueSizeRet, retVal); TRACING_EXIT(clGetCommandQueueInfo, &retVal); return retVal; } // deprecated OpenCL 1.0 cl_int CL_API_CALL clSetCommandQueueProperty(cl_command_queue commandQueue, cl_command_queue_properties properties, cl_bool enable, cl_command_queue_properties *oldProperties) { TRACING_ENTER(clSetCommandQueueProperty, &commandQueue, &properties, &enable, &oldProperties); cl_int retVal = CL_INVALID_VALUE; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "properties", properties, "enable", enable, "oldProperties", oldProperties); TRACING_EXIT(clSetCommandQueueProperty, &retVal); return retVal; } cl_mem CL_API_CALL clCreateBuffer(cl_context context, cl_mem_flags flags, size_t size, void *hostPtr, cl_int *errcodeRet) { TRACING_ENTER(clCreateBuffer, &context, &flags, &size, &hostPtr, &errcodeRet); DBG_LOG_INPUTS("cl_context", context, "cl_mem_flags", flags, "size", size, "hostPtr", NEO::FileLoggerInstance().infoPointerToString(hostPtr, size)); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); cl_mem buffer = nullptr; ErrorCodeHelper err(errcodeRet, CL_SUCCESS); MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0); if (isFieldValid(flags, MemObjHelper::validFlagsForBuffer)) { Buffer::validateInputAndCreateBuffer(context, memoryProperties, flags, 0, size, hostPtr, retVal, buffer); } else { retVal = CL_INVALID_VALUE; } err.set(retVal); DBG_LOG_INPUTS("buffer", buffer); TRACING_EXIT(clCreateBuffer, &buffer); return buffer; } cl_mem CL_API_CALL clCreateBufferWithPropertiesINTEL(cl_context context, const cl_mem_properties_intel *properties, size_t size, void *hostPtr, cl_int *errcodeRet) { DBG_LOG_INPUTS("cl_context", context, "cl_mem_properties_intel", properties, "size", size, "hostPtr", NEO::FileLoggerInstance().infoPointerToString(hostPtr, size)); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); cl_mem buffer = nullptr; ErrorCodeHelper err(errcodeRet, CL_SUCCESS); MemoryPropertiesFlags memoryProperties; cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; cl_mem_alloc_flags_intel allocflags = 0; if (MemoryPropertiesParser::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesParser::MemoryPropertiesParser::ObjType::BUFFER)) { Buffer::validateInputAndCreateBuffer(context, memoryProperties, flags, flagsIntel, size, hostPtr, retVal, buffer); } else { retVal = CL_INVALID_VALUE; } err.set(retVal); DBG_LOG_INPUTS("buffer", buffer); return buffer; } cl_mem CL_API_CALL clCreateSubBuffer(cl_mem buffer, cl_mem_flags flags, cl_buffer_create_type bufferCreateType, const void *bufferCreateInfo, cl_int *errcodeRet) { TRACING_ENTER(clCreateSubBuffer, &buffer, &flags, &bufferCreateType, &bufferCreateInfo, &errcodeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("buffer", buffer, "flags", flags, "bufferCreateType", bufferCreateType, "bufferCreateInfo", bufferCreateInfo); cl_mem subBuffer = nullptr; Buffer *parentBuffer = castToObject(buffer); do { if (parentBuffer == nullptr) { retVal = CL_INVALID_MEM_OBJECT; break; } /* Are there some invalid flag bits? */ if (!MemObjHelper::checkMemFlagsForSubBuffer(flags)) { retVal = CL_INVALID_VALUE; break; } cl_mem_flags parentFlags = parentBuffer->getMemoryPropertiesFlags(); cl_mem_flags_intel parentFlagsIntel = parentBuffer->getMemoryPropertiesFlagsIntel(); if (parentBuffer->isSubBuffer() == true) { retVal = CL_INVALID_MEM_OBJECT; break; } /* Check whether flag is valid. */ if (((flags & CL_MEM_HOST_READ_ONLY) && (flags & CL_MEM_HOST_NO_ACCESS)) || ((flags & CL_MEM_HOST_READ_ONLY) && (flags & CL_MEM_HOST_WRITE_ONLY)) || ((flags & CL_MEM_HOST_WRITE_ONLY) && (flags & CL_MEM_HOST_NO_ACCESS))) { retVal = CL_INVALID_VALUE; break; } /* Check whether flag is valid and compatible with parent. */ if (flags && (((parentFlags & CL_MEM_WRITE_ONLY) && (flags & (CL_MEM_READ_WRITE | CL_MEM_READ_ONLY))) || ((parentFlags & CL_MEM_READ_ONLY) && (flags & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY))) || ((parentFlags & CL_MEM_HOST_WRITE_ONLY) && (flags & CL_MEM_HOST_READ_ONLY)) || ((parentFlags & CL_MEM_HOST_READ_ONLY) && (flags & CL_MEM_HOST_WRITE_ONLY)) || ((parentFlags & CL_MEM_HOST_NO_ACCESS) && (flags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY))))) { retVal = CL_INVALID_VALUE; break; } /* Inherit some flags if we do not set. */ if ((flags & (CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_READ_WRITE)) == 0) { flags |= parentFlags & (CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_READ_WRITE); } if ((flags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) == 0) { flags |= parentFlags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS); } flags |= parentFlags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR); if (bufferCreateType != CL_BUFFER_CREATE_TYPE_REGION) { retVal = CL_INVALID_VALUE; break; } if (bufferCreateInfo == nullptr) { retVal = CL_INVALID_VALUE; break; } /* Must have non-zero size. */ const cl_buffer_region *region = reinterpret_cast(bufferCreateInfo); if (region->size == 0) { retVal = CL_INVALID_BUFFER_SIZE; break; } /* Out of range. */ if (region->origin > parentBuffer->getSize() || region->origin + region->size > parentBuffer->getSize()) { retVal = CL_INVALID_VALUE; break; } if (!parentBuffer->isValidSubBufferOffset(region->origin)) { retVal = CL_MISALIGNED_SUB_BUFFER_OFFSET; break; } subBuffer = parentBuffer->createSubBuffer(flags, parentFlagsIntel, region, retVal); } while (false); if (errcodeRet) { *errcodeRet = retVal; } TRACING_EXIT(clCreateSubBuffer, &subBuffer); return subBuffer; } cl_mem CL_API_CALL clCreateImage(cl_context context, cl_mem_flags flags, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, void *hostPtr, cl_int *errcodeRet) { TRACING_ENTER(clCreateImage, &context, &flags, &imageFormat, &imageDesc, &hostPtr, &errcodeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("cl_context", context, "cl_mem_flags", flags, "cl_image_format.channel_data_type", imageFormat->image_channel_data_type, "cl_image_format.channel_order", imageFormat->image_channel_order, "cl_image_desc.width", imageDesc->image_width, "cl_image_desc.heigth", imageDesc->image_height, "cl_image_desc.depth", imageDesc->image_depth, "cl_image_desc.type", imageDesc->image_type, "cl_image_desc.array_size", imageDesc->image_array_size, "hostPtr", hostPtr); cl_mem image = nullptr; Context *pContext = nullptr; retVal = validateObjects(WithCastToInternal(context, &pContext)); if (retVal == CL_SUCCESS) { MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0); if (isFieldValid(flags, MemObjHelper::validFlagsForImage)) { image = Image::validateAndCreateImage(pContext, memoryProperties, flags, 0, imageFormat, imageDesc, hostPtr, retVal); } else { retVal = CL_INVALID_VALUE; } } ErrorCodeHelper err(errcodeRet, retVal); DBG_LOG_INPUTS("image", image); TRACING_EXIT(clCreateImage, &image); return image; } cl_mem CL_API_CALL clCreateImageWithPropertiesINTEL(cl_context context, cl_mem_properties_intel *properties, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, void *hostPtr, cl_int *errcodeRet) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("cl_context", context, "cl_mem_properties_intel", properties, "cl_image_format.channel_data_type", imageFormat->image_channel_data_type, "cl_image_format.channel_order", imageFormat->image_channel_order, "cl_image_desc.width", imageDesc->image_width, "cl_image_desc.heigth", imageDesc->image_height, "cl_image_desc.depth", imageDesc->image_depth, "cl_image_desc.type", imageDesc->image_type, "cl_image_desc.array_size", imageDesc->image_array_size, "hostPtr", hostPtr); cl_mem image = nullptr; Context *pContext = nullptr; MemoryPropertiesFlags memoryProperties; cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; cl_mem_alloc_flags_intel allocflags = 0; retVal = validateObjects(WithCastToInternal(context, &pContext)); if (retVal == CL_SUCCESS) { if (MemoryPropertiesParser::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesParser::MemoryPropertiesParser::ObjType::IMAGE)) { image = Image::validateAndCreateImage(pContext, memoryProperties, flags, flagsIntel, imageFormat, imageDesc, hostPtr, retVal); } else { retVal = CL_INVALID_VALUE; } } ErrorCodeHelper err(errcodeRet, retVal); DBG_LOG_INPUTS("image", image); return image; } // deprecated OpenCL 1.1 cl_mem CL_API_CALL clCreateImage2D(cl_context context, cl_mem_flags flags, const cl_image_format *imageFormat, size_t imageWidth, size_t imageHeight, size_t imageRowPitch, void *hostPtr, cl_int *errcodeRet) { TRACING_ENTER(clCreateImage2D, &context, &flags, &imageFormat, &imageWidth, &imageHeight, &imageRowPitch, &hostPtr, &errcodeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "flags", flags, "imageFormat", imageFormat, "imageWidth", imageWidth, "imageHeight", imageHeight, "imageRowPitch", imageRowPitch, "hostPtr", hostPtr); cl_mem image2D = nullptr; cl_image_desc imageDesc; memset(&imageDesc, 0, sizeof(cl_image_desc)); imageDesc.image_height = imageHeight; imageDesc.image_width = imageWidth; imageDesc.image_row_pitch = imageRowPitch; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; Context *pContext = nullptr; retVal = validateObjects(WithCastToInternal(context, &pContext)); if (retVal == CL_SUCCESS) { MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0); image2D = Image::validateAndCreateImage(pContext, memoryProperties, flags, 0, imageFormat, &imageDesc, hostPtr, retVal); } ErrorCodeHelper err(errcodeRet, retVal); DBG_LOG_INPUTS("image 2D", image2D); TRACING_EXIT(clCreateImage2D, &image2D); return image2D; } // deprecated OpenCL 1.1 cl_mem CL_API_CALL clCreateImage3D(cl_context context, cl_mem_flags flags, const cl_image_format *imageFormat, size_t imageWidth, size_t imageHeight, size_t imageDepth, size_t imageRowPitch, size_t imageSlicePitch, void *hostPtr, cl_int *errcodeRet) { TRACING_ENTER(clCreateImage3D, &context, &flags, &imageFormat, &imageWidth, &imageHeight, &imageDepth, &imageRowPitch, &imageSlicePitch, &hostPtr, &errcodeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "flags", flags, "imageFormat", imageFormat, "imageWidth", imageWidth, "imageHeight", imageHeight, "imageDepth", imageDepth, "imageRowPitch", imageRowPitch, "imageSlicePitch", imageSlicePitch, "hostPtr", hostPtr); cl_mem image3D = nullptr; cl_image_desc imageDesc; memset(&imageDesc, 0, sizeof(cl_image_desc)); imageDesc.image_depth = imageDepth; imageDesc.image_height = imageHeight; imageDesc.image_width = imageWidth; imageDesc.image_row_pitch = imageRowPitch; imageDesc.image_slice_pitch = imageSlicePitch; imageDesc.image_type = CL_MEM_OBJECT_IMAGE3D; Context *pContext = nullptr; retVal = validateObjects(WithCastToInternal(context, &pContext)); if (retVal == CL_SUCCESS) { MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0); image3D = Image::validateAndCreateImage(pContext, memoryProperties, flags, 0, imageFormat, &imageDesc, hostPtr, retVal); } ErrorCodeHelper err(errcodeRet, retVal); DBG_LOG_INPUTS("image 3D", image3D); TRACING_EXIT(clCreateImage3D, &image3D); return image3D; } cl_int CL_API_CALL clRetainMemObject(cl_mem memobj) { TRACING_ENTER(clRetainMemObject, &memobj); cl_int retVal = CL_INVALID_MEM_OBJECT; API_ENTER(&retVal); DBG_LOG_INPUTS("memobj", memobj); auto pMemObj = castToObject(memobj); if (pMemObj) { pMemObj->retain(); retVal = CL_SUCCESS; TRACING_EXIT(clRetainMemObject, &retVal); return retVal; } TRACING_EXIT(clRetainMemObject, &retVal); return retVal; } cl_int CL_API_CALL clReleaseMemObject(cl_mem memobj) { TRACING_ENTER(clReleaseMemObject, &memobj); cl_int retVal = CL_INVALID_MEM_OBJECT; API_ENTER(&retVal); DBG_LOG_INPUTS("memobj", memobj); auto pMemObj = castToObject(memobj); if (pMemObj) { pMemObj->release(); retVal = CL_SUCCESS; TRACING_EXIT(clReleaseMemObject, &retVal); return retVal; } TRACING_EXIT(clReleaseMemObject, &retVal); return retVal; } cl_int CL_API_CALL clGetSupportedImageFormats(cl_context context, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint numEntries, cl_image_format *imageFormats, cl_uint *numImageFormats) { TRACING_ENTER(clGetSupportedImageFormats, &context, &flags, &imageType, &numEntries, &imageFormats, &numImageFormats); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "flags", flags, "imageType", imageType, "numEntries", numEntries, "imageFormats", imageFormats, "numImageFormats", numImageFormats); auto pContext = castToObject(context); if (pContext) { auto pClDevice = pContext->getDevice(0); if (pClDevice->getHardwareInfo().capabilityTable.supportsImages) { retVal = pContext->getSupportedImageFormats(&pClDevice->getDevice(), flags, imageType, numEntries, imageFormats, numImageFormats); } else { if (numImageFormats) { *numImageFormats = 0u; } retVal = CL_SUCCESS; } } else { retVal = CL_INVALID_CONTEXT; } TRACING_EXIT(clGetSupportedImageFormats, &retVal); return retVal; } cl_int CL_API_CALL clGetMemObjectInfo(cl_mem memobj, cl_mem_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetMemObjectInfo, &memobj, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("memobj", memobj, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); MemObj *pMemObj = nullptr; retVal = validateObjects(WithCastToInternal(memobj, &pMemObj)); if (CL_SUCCESS != retVal) { TRACING_EXIT(clGetMemObjectInfo, &retVal); return retVal; } retVal = pMemObj->getMemObjectInfo(paramName, paramValueSize, paramValue, paramValueSizeRet); TRACING_EXIT(clGetMemObjectInfo, &retVal); return retVal; } cl_int CL_API_CALL clGetImageInfo(cl_mem image, cl_image_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetImageInfo, &image, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("image", image, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); retVal = validateObjects(image); if (CL_SUCCESS != retVal) { TRACING_EXIT(clGetImageInfo, &retVal); return retVal; } auto pImgObj = castToObject(image); if (pImgObj == nullptr) { retVal = CL_INVALID_MEM_OBJECT; TRACING_EXIT(clGetImageInfo, &retVal); return retVal; } retVal = pImgObj->getImageInfo(paramName, paramValueSize, paramValue, paramValueSizeRet); TRACING_EXIT(clGetImageInfo, &retVal); return retVal; } cl_int CL_API_CALL clGetImageParamsINTEL(cl_context context, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, size_t *imageRowPitch, size_t *imageSlicePitch) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "imageFormat", imageFormat, "imageDesc", imageDesc, "imageRowPitch", imageRowPitch, "imageSlicePitch", imageSlicePitch); ClSurfaceFormatInfo *surfaceFormat = nullptr; cl_mem_flags memFlags = CL_MEM_READ_ONLY; retVal = validateObjects(context); auto pContext = castToObject(context); if (CL_SUCCESS == retVal) { if ((imageFormat == nullptr) || (imageDesc == nullptr) || (imageRowPitch == nullptr) || (imageSlicePitch == nullptr)) { retVal = CL_INVALID_VALUE; } } if (CL_SUCCESS == retVal) { retVal = Image::validateImageFormat(imageFormat); } if (CL_SUCCESS == retVal) { surfaceFormat = (ClSurfaceFormatInfo *)Image::getSurfaceFormatFromTable(memFlags, imageFormat, pContext->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); retVal = Image::validate(pContext, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(memFlags, 0, 0), surfaceFormat, imageDesc, nullptr); } if (CL_SUCCESS == retVal) { retVal = Image::getImageParams(pContext, memFlags, surfaceFormat, imageDesc, imageRowPitch, imageSlicePitch); } return retVal; } cl_int CL_API_CALL clSetMemObjectDestructorCallback(cl_mem memobj, void(CL_CALLBACK *funcNotify)(cl_mem, void *), void *userData) { TRACING_ENTER(clSetMemObjectDestructorCallback, &memobj, &funcNotify, &userData); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("memobj", memobj, "funcNotify", funcNotify, "userData", userData); retVal = validateObjects(memobj, (void *)funcNotify); if (CL_SUCCESS != retVal) { TRACING_EXIT(clSetMemObjectDestructorCallback, &retVal); return retVal; } auto pMemObj = castToObject(memobj); retVal = pMemObj->setDestructorCallback(funcNotify, userData); TRACING_EXIT(clSetMemObjectDestructorCallback, &retVal); return retVal; } cl_sampler CL_API_CALL clCreateSampler(cl_context context, cl_bool normalizedCoords, cl_addressing_mode addressingMode, cl_filter_mode filterMode, cl_int *errcodeRet) { TRACING_ENTER(clCreateSampler, &context, &normalizedCoords, &addressingMode, &filterMode, &errcodeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "normalizedCoords", normalizedCoords, "addressingMode", addressingMode, "filterMode", filterMode); retVal = validateObjects(context); cl_sampler sampler = nullptr; if (retVal == CL_SUCCESS) { auto pContext = castToObject(context); sampler = Sampler::create( pContext, normalizedCoords, addressingMode, filterMode, CL_FILTER_NEAREST, 0.0f, std::numeric_limits::max(), retVal); } if (errcodeRet) { *errcodeRet = retVal; } TRACING_EXIT(clCreateSampler, &sampler); return sampler; } cl_int CL_API_CALL clRetainSampler(cl_sampler sampler) { TRACING_ENTER(clRetainSampler, &sampler); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("sampler", sampler); auto pSampler = castToObject(sampler); if (pSampler) { pSampler->retain(); TRACING_EXIT(clRetainSampler, &retVal); return retVal; } retVal = CL_INVALID_SAMPLER; TRACING_EXIT(clRetainSampler, &retVal); return retVal; } cl_int CL_API_CALL clReleaseSampler(cl_sampler sampler) { TRACING_ENTER(clReleaseSampler, &sampler); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("sampler", sampler); auto pSampler = castToObject(sampler); if (pSampler) { pSampler->release(); TRACING_EXIT(clReleaseSampler, &retVal); return retVal; } retVal = CL_INVALID_SAMPLER; TRACING_EXIT(clReleaseSampler, &retVal); return retVal; } cl_int CL_API_CALL clGetSamplerInfo(cl_sampler sampler, cl_sampler_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetSamplerInfo, &sampler, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retVal = CL_INVALID_SAMPLER; API_ENTER(&retVal); DBG_LOG_INPUTS("sampler", sampler, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); auto pSampler = castToObject(sampler); if (pSampler) { retVal = pSampler->getInfo(paramName, paramValueSize, paramValue, paramValueSizeRet); } TRACING_EXIT(clGetSamplerInfo, &retVal); return retVal; } cl_program CL_API_CALL clCreateProgramWithSource(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcodeRet) { TRACING_ENTER(clCreateProgramWithSource, &context, &count, &strings, &lengths, &errcodeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "count", count, "strings", strings, "lengths", lengths); retVal = validateObjects(context, count, strings); cl_program program = nullptr; if (CL_SUCCESS == retVal) { program = Program::create( context, count, strings, lengths, retVal); } if (errcodeRet) { *errcodeRet = retVal; } TRACING_EXIT(clCreateProgramWithSource, &program); return program; } cl_program CL_API_CALL clCreateProgramWithBinary(cl_context context, cl_uint numDevices, const cl_device_id *deviceList, const size_t *lengths, const unsigned char **binaries, cl_int *binaryStatus, cl_int *errcodeRet) { TRACING_ENTER(clCreateProgramWithBinary, &context, &numDevices, &deviceList, &lengths, &binaries, &binaryStatus, &errcodeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "numDevices", numDevices, "deviceList", deviceList, "lengths", lengths, "binaries", binaries, "binaryStatus", binaryStatus); retVal = validateObjects(context, deviceList, *deviceList, binaries, *binaries, lengths, *lengths); cl_program program = nullptr; NEO::FileLoggerInstance().dumpBinaryProgram(numDevices, lengths, binaries); if (CL_SUCCESS == retVal) { program = Program::create( context, numDevices, deviceList, lengths, binaries, binaryStatus, retVal); } if (errcodeRet) { *errcodeRet = retVal; } TRACING_EXIT(clCreateProgramWithBinary, &program); return program; } cl_program CL_API_CALL clCreateProgramWithIL(cl_context context, const void *il, size_t length, cl_int *errcodeRet) { TRACING_ENTER(clCreateProgramWithIL, &context, &il, &length, &errcodeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "il", il, "length", length); cl_program program = nullptr; retVal = validateObjects(context, il); if (retVal == CL_SUCCESS) { program = Program::createFromIL( castToObjectOrAbort(context), il, length, retVal); } if (errcodeRet != nullptr) { *errcodeRet = retVal; } TRACING_EXIT(clCreateProgramWithIL, &program); return program; } cl_program CL_API_CALL clCreateProgramWithBuiltInKernels(cl_context context, cl_uint numDevices, const cl_device_id *deviceList, const char *kernelNames, cl_int *errcodeRet) { TRACING_ENTER(clCreateProgramWithBuiltInKernels, &context, &numDevices, &deviceList, &kernelNames, &errcodeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "numDevices", numDevices, "deviceList", deviceList, "kernelNames", kernelNames); cl_program program = nullptr; retVal = validateObjects( context, deviceList, kernelNames, errcodeRet); if (numDevices == 0) { retVal = CL_INVALID_VALUE; } if (retVal == CL_SUCCESS) { for (cl_uint i = 0; i < numDevices; i++) { auto pContext = castToObject(context); auto pDevice = castToObject(*deviceList); program = Vme::createBuiltInProgram( *pContext, pDevice->getDevice(), kernelNames, retVal); if (program && retVal == CL_SUCCESS) { break; } } } if (errcodeRet) { *errcodeRet = retVal; } TRACING_EXIT(clCreateProgramWithBuiltInKernels, &program); return program; } cl_int CL_API_CALL clRetainProgram(cl_program program) { TRACING_ENTER(clRetainProgram, &program); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("program", program); auto pProgram = castToObject(program); if (pProgram) { pProgram->retain(); TRACING_EXIT(clRetainProgram, &retVal); return retVal; } retVal = CL_INVALID_PROGRAM; TRACING_EXIT(clRetainProgram, &retVal); return retVal; } cl_int CL_API_CALL clReleaseProgram(cl_program program) { TRACING_ENTER(clReleaseProgram, &program); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("program", program); auto pProgram = castToObject(program); if (pProgram) { pProgram->release(); TRACING_EXIT(clReleaseProgram, &retVal); return retVal; } retVal = CL_INVALID_PROGRAM; TRACING_EXIT(clReleaseProgram, &retVal); return retVal; } cl_int CL_API_CALL clBuildProgram(cl_program program, cl_uint numDevices, const cl_device_id *deviceList, const char *options, void(CL_CALLBACK *funcNotify)(cl_program program, void *userData), void *userData) { TRACING_ENTER(clBuildProgram, &program, &numDevices, &deviceList, &options, &funcNotify, &userData); cl_int retVal = CL_INVALID_PROGRAM; API_ENTER(&retVal); DBG_LOG_INPUTS("clProgram", program, "numDevices", numDevices, "cl_device_id", deviceList, "options", (options != nullptr) ? options : "", "funcNotify", funcNotify, "userData", userData); auto pProgram = castToObject(program); if (pProgram) { retVal = pProgram->build(numDevices, deviceList, options, funcNotify, userData, clCacheEnabled); } TRACING_EXIT(clBuildProgram, &retVal); return retVal; } cl_int CL_API_CALL clCompileProgram(cl_program program, cl_uint numDevices, const cl_device_id *deviceList, const char *options, cl_uint numInputHeaders, const cl_program *inputHeaders, const char **headerIncludeNames, void(CL_CALLBACK *funcNotify)(cl_program program, void *userData), void *userData) { TRACING_ENTER(clCompileProgram, &program, &numDevices, &deviceList, &options, &numInputHeaders, &inputHeaders, &headerIncludeNames, &funcNotify, &userData); cl_int retVal = CL_INVALID_PROGRAM; API_ENTER(&retVal); DBG_LOG_INPUTS("clProgram", program, "numDevices", numDevices, "cl_device_id", deviceList, "options", (options != nullptr) ? options : "", "numInputHeaders", numInputHeaders); auto pProgram = castToObject(program); if (pProgram != nullptr) { retVal = pProgram->compile(numDevices, deviceList, options, numInputHeaders, inputHeaders, headerIncludeNames, funcNotify, userData); } TRACING_EXIT(clCompileProgram, &retVal); return retVal; } cl_program CL_API_CALL clLinkProgram(cl_context context, cl_uint numDevices, const cl_device_id *deviceList, const char *options, cl_uint numInputPrograms, const cl_program *inputPrograms, void(CL_CALLBACK *funcNotify)(cl_program program, void *userData), void *userData, cl_int *errcodeRet) { TRACING_ENTER(clLinkProgram, &context, &numDevices, &deviceList, &options, &numInputPrograms, &inputPrograms, &funcNotify, &userData, &errcodeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("cl_context", context, "numDevices", numDevices, "cl_device_id", deviceList, "options", (options != nullptr) ? options : "", "numInputPrograms", numInputPrograms); ErrorCodeHelper err(errcodeRet, CL_SUCCESS); Context *pContext = nullptr; Program *program = nullptr; retVal = validateObject(context); if (CL_SUCCESS == retVal) { pContext = castToObject(context); } if (pContext != nullptr) { program = new Program(*pContext->getDevice(0)->getExecutionEnvironment(), pContext, false, &pContext->getDevice(0)->getDevice()); retVal = program->link(numDevices, deviceList, options, numInputPrograms, inputPrograms, funcNotify, userData); } err.set(retVal); TRACING_EXIT(clLinkProgram, (cl_program *)&program); return program; } cl_int CL_API_CALL clUnloadPlatformCompiler(cl_platform_id platform) { TRACING_ENTER(clUnloadPlatformCompiler, &platform); cl_int retVal = CL_OUT_OF_HOST_MEMORY; API_ENTER(&retVal); DBG_LOG_INPUTS("platform", platform); TRACING_EXIT(clUnloadPlatformCompiler, &retVal); return retVal; } cl_int CL_API_CALL clGetProgramInfo(cl_program program, cl_program_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetProgramInfo, &program, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("clProgram", program, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); retVal = validateObjects(program); if (CL_SUCCESS == retVal) { Program *pProgram = (Program *)(program); retVal = pProgram->getInfo( paramName, paramValueSize, paramValue, paramValueSizeRet); } TRACING_EXIT(clGetProgramInfo, &retVal); return retVal; } cl_int CL_API_CALL clGetProgramBuildInfo(cl_program program, cl_device_id device, cl_program_build_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetProgramBuildInfo, &program, &device, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("clProgram", program, "cl_device_id", device, "paramName", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSize", paramValueSize, "paramValue", paramValue, "paramValueSizeRet", paramValueSizeRet); retVal = validateObjects(program); if (CL_SUCCESS == retVal) { Program *pProgram = (Program *)(program); retVal = pProgram->getBuildInfo( device, paramName, paramValueSize, paramValue, paramValueSizeRet); } TRACING_EXIT(clGetProgramBuildInfo, &retVal); return retVal; } cl_kernel CL_API_CALL clCreateKernel(cl_program clProgram, const char *kernelName, cl_int *errcodeRet) { TRACING_ENTER(clCreateKernel, &clProgram, &kernelName, &errcodeRet); API_ENTER(errcodeRet); Program *pProgram = nullptr; cl_kernel kernel = nullptr; cl_int retVal = CL_SUCCESS; DBG_LOG_INPUTS("clProgram", clProgram, "kernelName", kernelName); do { if (!isValidObject(clProgram) || !(pProgram = castToObject(clProgram))) { retVal = CL_INVALID_PROGRAM; break; } if (kernelName == nullptr) { retVal = CL_INVALID_VALUE; break; } if (pProgram->getBuildStatus() != CL_SUCCESS) { retVal = CL_INVALID_PROGRAM_EXECUTABLE; break; } const KernelInfo *pKernelInfo = pProgram->getKernelInfo(kernelName); if (!pKernelInfo) { retVal = CL_INVALID_KERNEL_NAME; break; } kernel = Kernel::create( pProgram, *pKernelInfo, &retVal); DBG_LOG_INPUTS("kernel", kernel); } while (false); if (errcodeRet) { *errcodeRet = retVal; } gtpinNotifyKernelCreate(kernel); TRACING_EXIT(clCreateKernel, &kernel); return kernel; } cl_int CL_API_CALL clCreateKernelsInProgram(cl_program clProgram, cl_uint numKernels, cl_kernel *kernels, cl_uint *numKernelsRet) { TRACING_ENTER(clCreateKernelsInProgram, &clProgram, &numKernels, &kernels, &numKernelsRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("clProgram", clProgram, "numKernels", numKernels, "kernels", kernels, "numKernelsRet", numKernelsRet); auto program = castToObject(clProgram); if (program) { auto numKernelsInProgram = program->getNumKernels(); if (kernels) { if (numKernels < numKernelsInProgram) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clCreateKernelsInProgram, &retVal); return retVal; } for (unsigned int i = 0; i < numKernelsInProgram; ++i) { const auto kernelInfo = program->getKernelInfo(i); DEBUG_BREAK_IF(kernelInfo == nullptr); kernels[i] = Kernel::create( program, *kernelInfo, nullptr); gtpinNotifyKernelCreate(kernels[i]); } } if (numKernelsRet) { *numKernelsRet = static_cast(numKernelsInProgram); } TRACING_EXIT(clCreateKernelsInProgram, &retVal); return retVal; } retVal = CL_INVALID_PROGRAM; TRACING_EXIT(clCreateKernelsInProgram, &retVal); return retVal; } cl_int CL_API_CALL clRetainKernel(cl_kernel kernel) { TRACING_ENTER(clRetainKernel, &kernel); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("kernel", kernel); auto pKernel = castToObject(kernel); if (pKernel) { pKernel->retain(); TRACING_EXIT(clRetainKernel, &retVal); return retVal; } retVal = CL_INVALID_KERNEL; TRACING_EXIT(clRetainKernel, &retVal); return retVal; } cl_int CL_API_CALL clReleaseKernel(cl_kernel kernel) { TRACING_ENTER(clReleaseKernel, &kernel); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("kernel", kernel); auto pKernel = castToObject(kernel); if (pKernel) { pKernel->release(); TRACING_EXIT(clReleaseKernel, &retVal); return retVal; } retVal = CL_INVALID_KERNEL; TRACING_EXIT(clReleaseKernel, &retVal); return retVal; } cl_int CL_API_CALL clSetKernelArg(cl_kernel kernel, cl_uint argIndex, size_t argSize, const void *argValue) { TRACING_ENTER(clSetKernelArg, &kernel, &argIndex, &argSize, &argValue); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); auto pKernel = castToObject(kernel); DBG_LOG_INPUTS("kernel", kernel, "argIndex", argIndex, "argSize", argSize, "argValue", NEO::FileLoggerInstance().infoPointerToString(argValue, argSize)); do { if (!pKernel) { retVal = CL_INVALID_KERNEL; break; } if (pKernel->getKernelInfo().kernelArgInfo.size() <= argIndex) { retVal = CL_INVALID_ARG_INDEX; break; } retVal = pKernel->checkCorrectImageAccessQualifier(argIndex, argSize, argValue); if (retVal != CL_SUCCESS) { pKernel->unsetArg(argIndex); break; } retVal = pKernel->setArg( argIndex, argSize, argValue); break; } while (false); TRACING_EXIT(clSetKernelArg, &retVal); return retVal; } cl_int CL_API_CALL clGetKernelInfo(cl_kernel kernel, cl_kernel_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetKernelInfo, &kernel, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("kernel", kernel, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); auto pKernel = castToObject(kernel); retVal = pKernel ? pKernel->getInfo( paramName, paramValueSize, paramValue, paramValueSizeRet) : CL_INVALID_KERNEL; TRACING_EXIT(clGetKernelInfo, &retVal); return retVal; } cl_int CL_API_CALL clGetKernelArgInfo(cl_kernel kernel, cl_uint argIndx, cl_kernel_arg_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetKernelArgInfo, &kernel, &argIndx, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("kernel", kernel, "argIndx", argIndx, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); auto pKernel = castToObject(kernel); retVal = pKernel ? pKernel->getArgInfo( argIndx, paramName, paramValueSize, paramValue, paramValueSizeRet) : CL_INVALID_KERNEL; TRACING_EXIT(clGetKernelArgInfo, &retVal); return retVal; } cl_int CL_API_CALL clGetKernelWorkGroupInfo(cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetKernelWorkGroupInfo, &kernel, &device, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("kernel", kernel, "device", device, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); auto pKernel = castToObject(kernel); retVal = pKernel ? pKernel->getWorkGroupInfo( device, paramName, paramValueSize, paramValue, paramValueSizeRet) : CL_INVALID_KERNEL; TRACING_EXIT(clGetKernelWorkGroupInfo, &retVal); return retVal; } cl_int CL_API_CALL clWaitForEvents(cl_uint numEvents, const cl_event *eventList) { TRACING_ENTER(clWaitForEvents, &numEvents, &eventList); auto retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("eventList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventList), numEvents)); for (unsigned int i = 0; i < numEvents && retVal == CL_SUCCESS; i++) retVal = validateObjects(eventList[i]); if (retVal != CL_SUCCESS) { TRACING_EXIT(clWaitForEvents, &retVal); return retVal; } retVal = Event::waitForEvents(numEvents, eventList); TRACING_EXIT(clWaitForEvents, &retVal); return retVal; } cl_int CL_API_CALL clGetEventInfo(cl_event event, cl_event_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetEventInfo, &event, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); auto retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("event", event, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); Event *neoEvent = castToObject(event); if (neoEvent == nullptr) { retVal = CL_INVALID_EVENT; TRACING_EXIT(clGetEventInfo, &retVal); return retVal; } GetInfoHelper info(paramValue, paramValueSize, paramValueSizeRet); switch (paramName) { default: { retVal = CL_INVALID_VALUE; TRACING_EXIT(clGetEventInfo, &retVal); return retVal; } // From OCL spec : // "Return the command-queue associated with event. For user event objects," // a nullptr value is returned." case CL_EVENT_COMMAND_QUEUE: { if (neoEvent->isUserEvent()) { retVal = changeGetInfoStatusToCLResultType(info.set(nullptr)); TRACING_EXIT(clGetEventInfo, &retVal); return retVal; } retVal = changeGetInfoStatusToCLResultType(info.set(neoEvent->getCommandQueue())); TRACING_EXIT(clGetEventInfo, &retVal); return retVal; } case CL_EVENT_CONTEXT: retVal = changeGetInfoStatusToCLResultType(info.set(neoEvent->getContext())); TRACING_EXIT(clGetEventInfo, &retVal); return retVal; case CL_EVENT_COMMAND_TYPE: retVal = changeGetInfoStatusToCLResultType(info.set(neoEvent->getCommandType())); TRACING_EXIT(clGetEventInfo, &retVal); return retVal; case CL_EVENT_COMMAND_EXECUTION_STATUS: neoEvent->tryFlushEvent(); if (neoEvent->isUserEvent()) { auto executionStatus = neoEvent->peekExecutionStatus(); //Spec requires initial state to be queued //our current design relies heavily on SUBMITTED status which directly corresponds //to command being able to be submitted, to overcome this we set initial status to queued //and we override the value stored with the value required by the spec. if (executionStatus == CL_QUEUED) { executionStatus = CL_SUBMITTED; } retVal = changeGetInfoStatusToCLResultType(info.set(executionStatus)); TRACING_EXIT(clGetEventInfo, &retVal); return retVal; } retVal = changeGetInfoStatusToCLResultType(info.set(neoEvent->updateEventAndReturnCurrentStatus())); TRACING_EXIT(clGetEventInfo, &retVal); return retVal; case CL_EVENT_REFERENCE_COUNT: retVal = changeGetInfoStatusToCLResultType(info.set(neoEvent->getReference())); TRACING_EXIT(clGetEventInfo, &retVal); return retVal; } } cl_event CL_API_CALL clCreateUserEvent(cl_context context, cl_int *errcodeRet) { TRACING_ENTER(clCreateUserEvent, &context, &errcodeRet); API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context); ErrorCodeHelper err(errcodeRet, CL_SUCCESS); Context *ctx = castToObject(context); if (ctx == nullptr) { err.set(CL_INVALID_CONTEXT); cl_event retVal = nullptr; TRACING_EXIT(clCreateUserEvent, &retVal); return retVal; } Event *userEvent = new UserEvent(ctx); cl_event userClEvent = userEvent; DBG_LOG_INPUTS("cl_event", userClEvent, "UserEvent", userEvent); TRACING_EXIT(clCreateUserEvent, &userClEvent); return userClEvent; } cl_int CL_API_CALL clRetainEvent(cl_event event) { TRACING_ENTER(clRetainEvent, &event); auto retVal = CL_SUCCESS; API_ENTER(&retVal); auto pEvent = castToObject(event); DBG_LOG_INPUTS("cl_event", event, "Event", pEvent); if (pEvent) { pEvent->retain(); TRACING_EXIT(clRetainEvent, &retVal); return retVal; } retVal = CL_INVALID_EVENT; TRACING_EXIT(clRetainEvent, &retVal); return retVal; } cl_int CL_API_CALL clReleaseEvent(cl_event event) { TRACING_ENTER(clReleaseEvent, &event); auto retVal = CL_SUCCESS; API_ENTER(&retVal); auto pEvent = castToObject(event); DBG_LOG_INPUTS("cl_event", event, "Event", pEvent); if (pEvent) { pEvent->release(); TRACING_EXIT(clReleaseEvent, &retVal); return retVal; } retVal = CL_INVALID_EVENT; TRACING_EXIT(clReleaseEvent, &retVal); return retVal; } cl_int CL_API_CALL clSetUserEventStatus(cl_event event, cl_int executionStatus) { TRACING_ENTER(clSetUserEventStatus, &event, &executionStatus); auto retVal = CL_SUCCESS; API_ENTER(&retVal); auto userEvent = castToObject(event); DBG_LOG_INPUTS("cl_event", event, "executionStatus", executionStatus, "UserEvent", userEvent); if (userEvent == nullptr) { retVal = CL_INVALID_EVENT; TRACING_EXIT(clSetUserEventStatus, &retVal); return retVal; } if (executionStatus > CL_COMPLETE) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clSetUserEventStatus, &retVal); return retVal; } if (!userEvent->isInitialEventStatus()) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clSetUserEventStatus, &retVal); return retVal; } userEvent->setStatus(executionStatus); TRACING_EXIT(clSetUserEventStatus, &retVal); return retVal; } cl_int CL_API_CALL clSetEventCallback(cl_event event, cl_int commandExecCallbackType, void(CL_CALLBACK *funcNotify)(cl_event, cl_int, void *), void *userData) { TRACING_ENTER(clSetEventCallback, &event, &commandExecCallbackType, &funcNotify, &userData); auto retVal = CL_SUCCESS; API_ENTER(&retVal); auto eventObject = castToObject(event); DBG_LOG_INPUTS("cl_event", event, "commandExecCallbackType", commandExecCallbackType, "Event", eventObject); if (eventObject == nullptr) { retVal = CL_INVALID_EVENT; TRACING_EXIT(clSetEventCallback, &retVal); return retVal; } switch (commandExecCallbackType) { case CL_COMPLETE: case CL_SUBMITTED: case CL_RUNNING: break; default: { retVal = CL_INVALID_VALUE; TRACING_EXIT(clSetEventCallback, &retVal); return retVal; } } if (funcNotify == nullptr) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clSetEventCallback, &retVal); return retVal; } eventObject->tryFlushEvent(); eventObject->addCallback(funcNotify, commandExecCallbackType, userData); TRACING_EXIT(clSetEventCallback, &retVal); return retVal; } cl_int CL_API_CALL clGetEventProfilingInfo(cl_event event, cl_profiling_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetEventProfilingInfo, &event, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); auto retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("event", event, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); auto eventObject = castToObject(event); if (eventObject == nullptr) { retVal = CL_INVALID_EVENT; TRACING_EXIT(clGetEventProfilingInfo, &retVal); return retVal; } retVal = eventObject->getEventProfilingInfo(paramName, paramValueSize, paramValue, paramValueSizeRet); TRACING_EXIT(clGetEventProfilingInfo, &retVal); return retVal; } cl_int CL_API_CALL clFlush(cl_command_queue commandQueue) { TRACING_ENTER(clFlush, &commandQueue); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue); auto pCommandQueue = castToObject(commandQueue); retVal = pCommandQueue ? pCommandQueue->flush() : CL_INVALID_COMMAND_QUEUE; TRACING_EXIT(clFlush, &retVal); return retVal; } cl_int CL_API_CALL clFinish(cl_command_queue commandQueue) { TRACING_ENTER(clFinish, &commandQueue); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue); auto pCommandQueue = castToObject(commandQueue); retVal = pCommandQueue ? pCommandQueue->finish() : CL_INVALID_COMMAND_QUEUE; TRACING_EXIT(clFinish, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueReadBuffer(cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingRead, size_t offset, size_t cb, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueReadBuffer, &commandQueue, &buffer, &blockingRead, &offset, &cb, &ptr, &numEventsInWaitList, &eventWaitList, &event); CommandQueue *pCommandQueue = nullptr; Buffer *pBuffer = nullptr; auto retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(buffer, &pBuffer), ptr); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "buffer", buffer, "blockingRead", blockingRead, "offset", offset, "cb", cb, "ptr", ptr, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); if (CL_SUCCESS == retVal) { if (pBuffer->readMemObjFlagsInvalid()) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueReadBuffer, &retVal); return retVal; } retVal = pCommandQueue->enqueueReadBuffer( pBuffer, blockingRead, offset, cb, ptr, nullptr, numEventsInWaitList, eventWaitList, event); } DBG_LOG_INPUTS("event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueReadBuffer, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueReadBufferRect(cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingRead, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueReadBufferRect, &commandQueue, &buffer, &blockingRead, &bufferOrigin, &hostOrigin, ®ion, &bufferRowPitch, &bufferSlicePitch, &hostRowPitch, &hostSlicePitch, &ptr, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "buffer", buffer, "blockingRead", blockingRead, "bufferOrigin[0]", NEO::FileLoggerInstance().getInput(bufferOrigin, 0), "bufferOrigin[1]", NEO::FileLoggerInstance().getInput(bufferOrigin, 1), "bufferOrigin[2]", NEO::FileLoggerInstance().getInput(bufferOrigin, 2), "hostOrigin[0]", NEO::FileLoggerInstance().getInput(hostOrigin, 0), "hostOrigin[1]", NEO::FileLoggerInstance().getInput(hostOrigin, 1), "hostOrigin[2]", NEO::FileLoggerInstance().getInput(hostOrigin, 2), "region[0]", NEO::FileLoggerInstance().getInput(region, 0), "region[1]", NEO::FileLoggerInstance().getInput(region, 1), "region[2]", NEO::FileLoggerInstance().getInput(region, 2), "bufferRowPitch", bufferRowPitch, "bufferSlicePitch", bufferSlicePitch, "hostRowPitch", hostRowPitch, "hostSlicePitch", hostSlicePitch, "ptr", ptr, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; Buffer *pBuffer = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(buffer, &pBuffer), ptr); if (CL_SUCCESS != retVal) { TRACING_EXIT(clEnqueueReadBufferRect, &retVal); return retVal; } if (pBuffer->readMemObjFlagsInvalid()) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueReadBufferRect, &retVal); return retVal; } if (pBuffer->bufferRectPitchSet(bufferOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch) == false) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueReadBufferRect, &retVal); return retVal; } retVal = pCommandQueue->enqueueReadBufferRect( pBuffer, blockingRead, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, ptr, numEventsInWaitList, eventWaitList, event); TRACING_EXIT(clEnqueueReadBufferRect, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueWriteBuffer(cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingWrite, size_t offset, size_t cb, const void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueWriteBuffer, &commandQueue, &buffer, &blockingWrite, &offset, &cb, &ptr, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "buffer", buffer, "blockingWrite", blockingWrite, "offset", offset, "cb", cb, "ptr", ptr, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; Buffer *pBuffer = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(buffer, &pBuffer), ptr); if (CL_SUCCESS == retVal) { if (pBuffer->writeMemObjFlagsInvalid()) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueWriteBuffer, &retVal); return retVal; } retVal = pCommandQueue->enqueueWriteBuffer( pBuffer, blockingWrite, offset, cb, ptr, nullptr, numEventsInWaitList, eventWaitList, event); } DBG_LOG_INPUTS("event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueWriteBuffer, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueWriteBufferRect(cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingWrite, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, const void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueWriteBufferRect, &commandQueue, &buffer, &blockingWrite, &bufferOrigin, &hostOrigin, ®ion, &bufferRowPitch, &bufferSlicePitch, &hostRowPitch, &hostSlicePitch, &ptr, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "buffer", buffer, "blockingWrite", blockingWrite, "bufferOrigin[0]", NEO::FileLoggerInstance().getInput(bufferOrigin, 0), "bufferOrigin[1]", NEO::FileLoggerInstance().getInput(bufferOrigin, 1), "bufferOrigin[2]", NEO::FileLoggerInstance().getInput(bufferOrigin, 2), "hostOrigin[0]", NEO::FileLoggerInstance().getInput(hostOrigin, 0), "hostOrigin[1]", NEO::FileLoggerInstance().getInput(hostOrigin, 1), "hostOrigin[2]", NEO::FileLoggerInstance().getInput(hostOrigin, 2), "region[0]", NEO::FileLoggerInstance().getInput(region, 0), "region[1]", NEO::FileLoggerInstance().getInput(region, 1), "region[2]", NEO::FileLoggerInstance().getInput(region, 2), "bufferRowPitch", bufferRowPitch, "bufferSlicePitch", bufferSlicePitch, "hostRowPitch", hostRowPitch, "hostSlicePitch", hostSlicePitch, "ptr", ptr, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; Buffer *pBuffer = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(buffer, &pBuffer), ptr); if (CL_SUCCESS != retVal) { TRACING_EXIT(clEnqueueWriteBufferRect, &retVal); return retVal; } if (pBuffer->writeMemObjFlagsInvalid()) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueWriteBufferRect, &retVal); return retVal; } if (pBuffer->bufferRectPitchSet(bufferOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch) == false) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueWriteBufferRect, &retVal); return retVal; } retVal = pCommandQueue->enqueueWriteBufferRect( pBuffer, blockingWrite, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, ptr, numEventsInWaitList, eventWaitList, event); TRACING_EXIT(clEnqueueWriteBufferRect, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueFillBuffer(cl_command_queue commandQueue, cl_mem buffer, const void *pattern, size_t patternSize, size_t offset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueFillBuffer, &commandQueue, &buffer, &pattern, &patternSize, &offset, &size, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "buffer", buffer, "pattern", NEO::FileLoggerInstance().infoPointerToString(pattern, patternSize), "patternSize", patternSize, "offset", offset, "size", size, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; Buffer *pBuffer = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(buffer, &pBuffer), pattern, (PatternSize)patternSize, EventWaitList(numEventsInWaitList, eventWaitList)); if (CL_SUCCESS == retVal) { retVal = pCommandQueue->enqueueFillBuffer( pBuffer, pattern, patternSize, offset, size, numEventsInWaitList, eventWaitList, event); } TRACING_EXIT(clEnqueueFillBuffer, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueCopyBuffer(cl_command_queue commandQueue, cl_mem srcBuffer, cl_mem dstBuffer, size_t srcOffset, size_t dstOffset, size_t cb, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueCopyBuffer, &commandQueue, &srcBuffer, &dstBuffer, &srcOffset, &dstOffset, &cb, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "srcBuffer", srcBuffer, "dstBuffer", dstBuffer, "srcOffset", srcOffset, "dstOffset", dstOffset, "cb", cb, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; Buffer *pSrcBuffer = nullptr; Buffer *pDstBuffer = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(srcBuffer, &pSrcBuffer), WithCastToInternal(dstBuffer, &pDstBuffer)); if (CL_SUCCESS == retVal) { size_t srcSize = pSrcBuffer->getSize(); size_t dstSize = pDstBuffer->getSize(); if (srcOffset + cb > srcSize || dstOffset + cb > dstSize) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueCopyBuffer, &retVal); return retVal; } retVal = pCommandQueue->enqueueCopyBuffer( pSrcBuffer, pDstBuffer, srcOffset, dstOffset, cb, numEventsInWaitList, eventWaitList, event); } DBG_LOG_INPUTS("event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueCopyBuffer, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueCopyBufferRect(cl_command_queue commandQueue, cl_mem srcBuffer, cl_mem dstBuffer, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueCopyBufferRect, &commandQueue, &srcBuffer, &dstBuffer, &srcOrigin, &dstOrigin, ®ion, &srcRowPitch, &srcSlicePitch, &dstRowPitch, &dstSlicePitch, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "srcBuffer", srcBuffer, "dstBuffer", dstBuffer, "srcOrigin[0]", NEO::FileLoggerInstance().getInput(srcOrigin, 0), "srcOrigin[1]", NEO::FileLoggerInstance().getInput(srcOrigin, 1), "srcOrigin[2]", NEO::FileLoggerInstance().getInput(srcOrigin, 2), "dstOrigin[0]", NEO::FileLoggerInstance().getInput(dstOrigin, 0), "dstOrigin[1]", NEO::FileLoggerInstance().getInput(dstOrigin, 1), "dstOrigin[2]", NEO::FileLoggerInstance().getInput(dstOrigin, 2), "region[0]", NEO::FileLoggerInstance().getInput(region, 0), "region[1]", NEO::FileLoggerInstance().getInput(region, 1), "region[2]", NEO::FileLoggerInstance().getInput(region, 2), "srcRowPitch", srcRowPitch, "srcSlicePitch", srcSlicePitch, "dstRowPitch", dstRowPitch, "dstSlicePitch", dstSlicePitch, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; Buffer *pSrcBuffer = nullptr; Buffer *pDstBuffer = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(srcBuffer, &pSrcBuffer), WithCastToInternal(dstBuffer, &pDstBuffer)); if (CL_SUCCESS == retVal) { retVal = pCommandQueue->enqueueCopyBufferRect( pSrcBuffer, pDstBuffer, srcOrigin, dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList, eventWaitList, event); } DBG_LOG_INPUTS("event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueCopyBufferRect, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueReadImage(cl_command_queue commandQueue, cl_mem image, cl_bool blockingRead, const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueReadImage, &commandQueue, &image, &blockingRead, &origin, ®ion, &rowPitch, &slicePitch, &ptr, &numEventsInWaitList, &eventWaitList, &event); CommandQueue *pCommandQueue = nullptr; Image *pImage = nullptr; auto retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(image, &pImage)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "image", image, "blockingRead", blockingRead, "origin[0]", NEO::FileLoggerInstance().getInput(origin, 0), "origin[1]", NEO::FileLoggerInstance().getInput(origin, 1), "origin[2]", NEO::FileLoggerInstance().getInput(origin, 2), "region[0]", NEO::FileLoggerInstance().getInput(region, 0), "region[1]", NEO::FileLoggerInstance().getInput(region, 1), "region[2]", NEO::FileLoggerInstance().getInput(region, 2), "rowPitch", rowPitch, "slicePitch", slicePitch, "ptr", ptr, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); if (CL_SUCCESS == retVal) { if (pImage->readMemObjFlagsInvalid()) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueReadImage, &retVal); return retVal; } if (IsPackedYuvImage(&pImage->getImageFormat())) { retVal = validateYuvOperation(origin, region); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueReadImage, &retVal); return retVal; } } retVal = Image::validateRegionAndOrigin(origin, region, pImage->getImageDesc()); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueReadImage, &retVal); return retVal; } retVal = pCommandQueue->enqueueReadImage( pImage, blockingRead, origin, region, rowPitch, slicePitch, ptr, nullptr, numEventsInWaitList, eventWaitList, event); } DBG_LOG_INPUTS("event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueReadImage, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueWriteImage(cl_command_queue commandQueue, cl_mem image, cl_bool blockingWrite, const size_t *origin, const size_t *region, size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueWriteImage, &commandQueue, &image, &blockingWrite, &origin, ®ion, &inputRowPitch, &inputSlicePitch, &ptr, &numEventsInWaitList, &eventWaitList, &event); CommandQueue *pCommandQueue = nullptr; Image *pImage = nullptr; auto retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(image, &pImage)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "image", image, "blockingWrite", blockingWrite, "origin[0]", NEO::FileLoggerInstance().getInput(origin, 0), "origin[1]", NEO::FileLoggerInstance().getInput(origin, 1), "origin[2]", NEO::FileLoggerInstance().getInput(origin, 2), "region[0]", NEO::FileLoggerInstance().getInput(region, 0), "region[1]", NEO::FileLoggerInstance().getInput(region, 1), "region[2]", NEO::FileLoggerInstance().getInput(region, 2), "inputRowPitch", inputRowPitch, "inputSlicePitch", inputSlicePitch, "ptr", ptr, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); if (CL_SUCCESS == retVal) { if (pImage->writeMemObjFlagsInvalid()) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueWriteImage, &retVal); return retVal; } if (IsPackedYuvImage(&pImage->getImageFormat())) { retVal = validateYuvOperation(origin, region); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueWriteImage, &retVal); return retVal; } } retVal = Image::validateRegionAndOrigin(origin, region, pImage->getImageDesc()); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueWriteImage, &retVal); return retVal; } retVal = pCommandQueue->enqueueWriteImage( pImage, blockingWrite, origin, region, inputRowPitch, inputSlicePitch, ptr, nullptr, numEventsInWaitList, eventWaitList, event); } DBG_LOG_INPUTS("event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueWriteImage, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueFillImage(cl_command_queue commandQueue, cl_mem image, const void *fillColor, const size_t *origin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueFillImage, &commandQueue, &image, &fillColor, &origin, ®ion, &numEventsInWaitList, &eventWaitList, &event); CommandQueue *pCommandQueue = nullptr; Image *dstImage = nullptr; auto retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(image, &dstImage), fillColor, EventWaitList(numEventsInWaitList, eventWaitList)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "image", image, "fillColor", fillColor, "origin[0]", NEO::FileLoggerInstance().getInput(origin, 0), "origin[1]", NEO::FileLoggerInstance().getInput(origin, 1), "origin[2]", NEO::FileLoggerInstance().getInput(origin, 2), "region[0]", NEO::FileLoggerInstance().getInput(region, 0), "region[1]", NEO::FileLoggerInstance().getInput(region, 1), "region[2]", NEO::FileLoggerInstance().getInput(region, 2), "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); if (CL_SUCCESS == retVal) { retVal = Image::validateRegionAndOrigin(origin, region, dstImage->getImageDesc()); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueFillImage, &retVal); return retVal; } retVal = pCommandQueue->enqueueFillImage( dstImage, fillColor, origin, region, numEventsInWaitList, eventWaitList, event); } DBG_LOG_INPUTS("event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueFillImage, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueCopyImage(cl_command_queue commandQueue, cl_mem srcImage, cl_mem dstImage, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueCopyImage, &commandQueue, &srcImage, &dstImage, &srcOrigin, &dstOrigin, ®ion, &numEventsInWaitList, &eventWaitList, &event); CommandQueue *pCommandQueue = nullptr; Image *pSrcImage = nullptr; Image *pDstImage = nullptr; auto retVal = validateObjects(WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(srcImage, &pSrcImage), WithCastToInternal(dstImage, &pDstImage)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "srcImage", srcImage, "dstImage", dstImage, "srcOrigin[0]", NEO::FileLoggerInstance().getInput(srcOrigin, 0), "srcOrigin[1]", NEO::FileLoggerInstance().getInput(srcOrigin, 1), "srcOrigin[2]", NEO::FileLoggerInstance().getInput(srcOrigin, 2), "dstOrigin[0]", NEO::FileLoggerInstance().getInput(dstOrigin, 0), "dstOrigin[1]", NEO::FileLoggerInstance().getInput(dstOrigin, 1), "dstOrigin[2]", NEO::FileLoggerInstance().getInput(dstOrigin, 2), "region[0]", region ? region[0] : 0, "region[1]", region ? region[1] : 0, "region[2]", region ? region[2] : 0, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); if (CL_SUCCESS == retVal) { if (memcmp(&pSrcImage->getImageFormat(), &pDstImage->getImageFormat(), sizeof(cl_image_format))) { retVal = CL_IMAGE_FORMAT_MISMATCH; TRACING_EXIT(clEnqueueCopyImage, &retVal); return retVal; } if (IsPackedYuvImage(&pSrcImage->getImageFormat())) { retVal = validateYuvOperation(srcOrigin, region); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueCopyImage, &retVal); return retVal; } } if (IsPackedYuvImage(&pDstImage->getImageFormat())) { retVal = validateYuvOperation(dstOrigin, region); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueCopyImage, &retVal); return retVal; } if (pDstImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE2D && dstOrigin[2] != 0) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueCopyImage, &retVal); return retVal; } } retVal = Image::validateRegionAndOrigin(srcOrigin, region, pSrcImage->getImageDesc()); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueCopyImage, &retVal); return retVal; } retVal = Image::validateRegionAndOrigin(dstOrigin, region, pDstImage->getImageDesc()); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueCopyImage, &retVal); return retVal; } retVal = pCommandQueue->enqueueCopyImage( pSrcImage, pDstImage, srcOrigin, dstOrigin, region, numEventsInWaitList, eventWaitList, event); } DBG_LOG_INPUTS("event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueCopyImage, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueCopyImageToBuffer(cl_command_queue commandQueue, cl_mem srcImage, cl_mem dstBuffer, const size_t *srcOrigin, const size_t *region, const size_t dstOffset, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueCopyImageToBuffer, &commandQueue, &srcImage, &dstBuffer, &srcOrigin, ®ion, (size_t *)&dstOffset, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "srcImage", srcImage, "dstBuffer", dstBuffer, "srcOrigin[0]", NEO::FileLoggerInstance().getInput(srcOrigin, 0), "srcOrigin[1]", NEO::FileLoggerInstance().getInput(srcOrigin, 1), "srcOrigin[2]", NEO::FileLoggerInstance().getInput(srcOrigin, 2), "region[0]", NEO::FileLoggerInstance().getInput(region, 0), "region[1]", NEO::FileLoggerInstance().getInput(region, 1), "region[2]", NEO::FileLoggerInstance().getInput(region, 2), "dstOffset", dstOffset, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; Image *pSrcImage = nullptr; Buffer *pDstBuffer = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(srcImage, &pSrcImage), WithCastToInternal(dstBuffer, &pDstBuffer)); if (CL_SUCCESS == retVal) { if (IsPackedYuvImage(&pSrcImage->getImageFormat())) { retVal = validateYuvOperation(srcOrigin, region); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueCopyImageToBuffer, &retVal); return retVal; } } retVal = Image::validateRegionAndOrigin(srcOrigin, region, pSrcImage->getImageDesc()); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueCopyImageToBuffer, &retVal); return retVal; } retVal = pCommandQueue->enqueueCopyImageToBuffer( pSrcImage, pDstBuffer, srcOrigin, region, dstOffset, numEventsInWaitList, eventWaitList, event); } DBG_LOG_INPUTS("event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueCopyImageToBuffer, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueCopyBufferToImage(cl_command_queue commandQueue, cl_mem srcBuffer, cl_mem dstImage, size_t srcOffset, const size_t *dstOrigin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueCopyBufferToImage, &commandQueue, &srcBuffer, &dstImage, &srcOffset, &dstOrigin, ®ion, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "srcBuffer", srcBuffer, "dstImage", dstImage, "srcOffset", srcOffset, "dstOrigin[0]", NEO::FileLoggerInstance().getInput(dstOrigin, 0), "dstOrigin[1]", NEO::FileLoggerInstance().getInput(dstOrigin, 1), "dstOrigin[2]", NEO::FileLoggerInstance().getInput(dstOrigin, 2), "region[0]", NEO::FileLoggerInstance().getInput(region, 0), "region[1]", NEO::FileLoggerInstance().getInput(region, 1), "region[2]", NEO::FileLoggerInstance().getInput(region, 2), "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; Buffer *pSrcBuffer = nullptr; Image *pDstImage = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(srcBuffer, &pSrcBuffer), WithCastToInternal(dstImage, &pDstImage)); if (CL_SUCCESS == retVal) { if (IsPackedYuvImage(&pDstImage->getImageFormat())) { retVal = validateYuvOperation(dstOrigin, region); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueCopyBufferToImage, &retVal); return retVal; } } retVal = Image::validateRegionAndOrigin(dstOrigin, region, pDstImage->getImageDesc()); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueCopyBufferToImage, &retVal); return retVal; } retVal = pCommandQueue->enqueueCopyBufferToImage( pSrcBuffer, pDstImage, srcOffset, dstOrigin, region, numEventsInWaitList, eventWaitList, event); } DBG_LOG_INPUTS("event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueCopyBufferToImage, &retVal); return retVal; } void *CL_API_CALL clEnqueueMapBuffer(cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingMap, cl_map_flags mapFlags, size_t offset, size_t cb, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, cl_int *errcodeRet) { TRACING_ENTER(clEnqueueMapBuffer, &commandQueue, &buffer, &blockingMap, &mapFlags, &offset, &cb, &numEventsInWaitList, &eventWaitList, &event, &errcodeRet); void *retPtr = nullptr; ErrorCodeHelper err(errcodeRet, CL_SUCCESS); cl_int retVal; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "buffer", buffer, "blockingMap", blockingMap, "mapFlags", mapFlags, "offset", offset, "cb", cb, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); do { auto pCommandQueue = castToObject(commandQueue); if (!pCommandQueue) { retVal = CL_INVALID_COMMAND_QUEUE; break; } auto pBuffer = castToObject(buffer); if (!pBuffer) { retVal = CL_INVALID_MEM_OBJECT; break; } if (pBuffer->mapMemObjFlagsInvalid(mapFlags)) { retVal = CL_INVALID_OPERATION; break; } retPtr = pCommandQueue->enqueueMapBuffer( pBuffer, blockingMap, mapFlags, offset, cb, numEventsInWaitList, eventWaitList, event, retVal); } while (false); err.set(retVal); DBG_LOG_INPUTS("retPtr", retPtr, "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueMapBuffer, &retPtr); return retPtr; } void *CL_API_CALL clEnqueueMapImage(cl_command_queue commandQueue, cl_mem image, cl_bool blockingMap, cl_map_flags mapFlags, const size_t *origin, const size_t *region, size_t *imageRowPitch, size_t *imageSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, cl_int *errcodeRet) { TRACING_ENTER(clEnqueueMapImage, &commandQueue, &image, &blockingMap, &mapFlags, &origin, ®ion, &imageRowPitch, &imageSlicePitch, &numEventsInWaitList, &eventWaitList, &event, &errcodeRet); void *retPtr = nullptr; ErrorCodeHelper err(errcodeRet, CL_SUCCESS); cl_int retVal; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "image", image, "blockingMap", blockingMap, "mapFlags", mapFlags, "origin[0]", NEO::FileLoggerInstance().getInput(origin, 0), "origin[1]", NEO::FileLoggerInstance().getInput(origin, 1), "origin[2]", NEO::FileLoggerInstance().getInput(origin, 2), "region[0]", NEO::FileLoggerInstance().getInput(region, 0), "region[1]", NEO::FileLoggerInstance().getInput(region, 1), "region[2]", NEO::FileLoggerInstance().getInput(region, 2), "imageRowPitch", NEO::FileLoggerInstance().getInput(imageRowPitch, 0), "imageSlicePitch", NEO::FileLoggerInstance().getInput(imageSlicePitch, 0), "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); do { Image *pImage = nullptr; CommandQueue *pCommandQueue = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(image, &pImage)); if (retVal != CL_SUCCESS) { break; } if (pImage->mapMemObjFlagsInvalid(mapFlags)) { retVal = CL_INVALID_OPERATION; break; } if (IsPackedYuvImage(&pImage->getImageFormat())) { retVal = validateYuvOperation(origin, region); if (retVal != CL_SUCCESS) { break; } } retVal = Image::validateRegionAndOrigin(origin, region, pImage->getImageDesc()); if (retVal != CL_SUCCESS) { break; } retPtr = pCommandQueue->enqueueMapImage( pImage, blockingMap, mapFlags, origin, region, imageRowPitch, imageSlicePitch, numEventsInWaitList, eventWaitList, event, retVal); } while (false); err.set(retVal); DBG_LOG_INPUTS("retPtr", retPtr, "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueMapImage, &retPtr); return retPtr; } cl_int CL_API_CALL clEnqueueUnmapMemObject(cl_command_queue commandQueue, cl_mem memObj, void *mappedPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueUnmapMemObject, &commandQueue, &memObj, &mappedPtr, &numEventsInWaitList, &eventWaitList, &event); CommandQueue *pCommandQueue = nullptr; MemObj *pMemObj = nullptr; cl_int retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(memObj, &pMemObj)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "memObj", memObj, "mappedPtr", mappedPtr, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); if (retVal == CL_SUCCESS) { if (pMemObj->peekClMemObjType() == CL_MEM_OBJECT_PIPE) { retVal = CL_INVALID_MEM_OBJECT; TRACING_EXIT(clEnqueueUnmapMemObject, &retVal); return retVal; } retVal = pCommandQueue->enqueueUnmapMemObject(pMemObj, mappedPtr, numEventsInWaitList, eventWaitList, event); } DBG_LOG_INPUTS("event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueUnmapMemObject, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueMigrateMemObjects(cl_command_queue commandQueue, cl_uint numMemObjects, const cl_mem *memObjects, cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueMigrateMemObjects, &commandQueue, &numMemObjects, &memObjects, &flags, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "numMemObjects", numMemObjects, "memObjects", memObjects, "flags", flags, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), EventWaitList(numEventsInWaitList, eventWaitList)); if (CL_SUCCESS != retVal) { TRACING_EXIT(clEnqueueMigrateMemObjects, &retVal); return retVal; } if (numMemObjects == 0 || memObjects == nullptr) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueMigrateMemObjects, &retVal); return retVal; } const cl_mem_migration_flags allValidFlags = CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED | CL_MIGRATE_MEM_OBJECT_HOST; if ((flags & (~allValidFlags)) != 0) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueMigrateMemObjects, &retVal); return retVal; } retVal = pCommandQueue->enqueueMigrateMemObjects(numMemObjects, memObjects, flags, numEventsInWaitList, eventWaitList, event); DBG_LOG_INPUTS("event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueMigrateMemObjects, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue commandQueue, cl_kernel kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *globalWorkSize, const size_t *localWorkSize, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueNDRangeKernel, &commandQueue, &kernel, &workDim, &globalWorkOffset, &globalWorkSize, &localWorkSize, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "cl_kernel", kernel, "globalWorkOffset[0]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 0), "globalWorkOffset[1]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 1), "globalWorkOffset[2]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 2), "globalWorkSize", NEO::FileLoggerInstance().getSizes(globalWorkSize, workDim, false), "localWorkSize", NEO::FileLoggerInstance().getSizes(localWorkSize, workDim, true), "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; Kernel *pKernel = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(kernel, &pKernel), EventWaitList(numEventsInWaitList, eventWaitList)); if (CL_SUCCESS != retVal) { TRACING_EXIT(clEnqueueNDRangeKernel, &retVal); return retVal; } if ((pKernel->getExecutionType() != KernelExecutionType::Default) || pKernel->isUsingSyncBuffer()) { retVal = CL_INVALID_KERNEL; TRACING_EXIT(clEnqueueNDRangeKernel, &retVal); return retVal; } TakeOwnershipWrapper kernelOwnership(*pKernel, gtpinIsGTPinInitialized()); if (gtpinIsGTPinInitialized()) { gtpinNotifyKernelSubmit(kernel, pCommandQueue); } retVal = pCommandQueue->enqueueKernel( kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); DBG_LOG_INPUTS("event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1u)); TRACING_EXIT(clEnqueueNDRangeKernel, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueTask(cl_command_queue commandQueue, cl_kernel kernel, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueTask, &commandQueue, &kernel, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "kernel", kernel, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); cl_uint workDim = 3; size_t *globalWorkOffset = nullptr; size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; retVal = (clEnqueueNDRangeKernel( commandQueue, kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event)); TRACING_EXIT(clEnqueueTask, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueNativeKernel(cl_command_queue commandQueue, void(CL_CALLBACK *userFunc)(void *), void *args, size_t cbArgs, cl_uint numMemObjects, const cl_mem *memList, const void **argsMemLoc, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueNativeKernel, &commandQueue, &userFunc, &args, &cbArgs, &numMemObjects, &memList, &argsMemLoc, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_OUT_OF_HOST_MEMORY; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "userFunc", userFunc, "args", args, "cbArgs", cbArgs, "numMemObjects", numMemObjects, "memList", memList, "argsMemLoc", argsMemLoc, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); TRACING_EXIT(clEnqueueNativeKernel, &retVal); return retVal; } // deprecated OpenCL 1.1 cl_int CL_API_CALL clEnqueueMarker(cl_command_queue commandQueue, cl_event *event) { TRACING_ENTER(clEnqueueMarker, &commandQueue, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "cl_event", event); auto pCommandQueue = castToObject(commandQueue); if (pCommandQueue) { retVal = pCommandQueue->enqueueMarkerWithWaitList( 0, nullptr, event); TRACING_EXIT(clEnqueueMarker, &retVal); return retVal; } retVal = CL_INVALID_COMMAND_QUEUE; TRACING_EXIT(clEnqueueMarker, &retVal); return retVal; } // deprecated OpenCL 1.1 cl_int CL_API_CALL clEnqueueWaitForEvents(cl_command_queue commandQueue, cl_uint numEvents, const cl_event *eventList) { TRACING_ENTER(clEnqueueWaitForEvents, &commandQueue, &numEvents, &eventList); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "eventList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventList), numEvents)); auto pCommandQueue = castToObject(commandQueue); if (!pCommandQueue) { retVal = CL_INVALID_COMMAND_QUEUE; TRACING_EXIT(clEnqueueWaitForEvents, &retVal); return retVal; } for (unsigned int i = 0; i < numEvents && retVal == CL_SUCCESS; i++) { retVal = validateObjects(eventList[i]); } if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueWaitForEvents, &retVal); return retVal; } retVal = Event::waitForEvents(numEvents, eventList); TRACING_EXIT(clEnqueueWaitForEvents, &retVal); return retVal; } // deprecated OpenCL 1.1 cl_int CL_API_CALL clEnqueueBarrier(cl_command_queue commandQueue) { TRACING_ENTER(clEnqueueBarrier, &commandQueue); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue); auto pCommandQueue = castToObject(commandQueue); if (pCommandQueue) { retVal = pCommandQueue->enqueueBarrierWithWaitList( 0, nullptr, nullptr); TRACING_EXIT(clEnqueueBarrier, &retVal); return retVal; } retVal = CL_INVALID_COMMAND_QUEUE; TRACING_EXIT(clEnqueueBarrier, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueMarkerWithWaitList(cl_command_queue commandQueue, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueMarkerWithWaitList, &commandQueue, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("cl_command_queue", commandQueue, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), EventWaitList(numEventsInWaitList, eventWaitList)); if (CL_SUCCESS != retVal) { TRACING_EXIT(clEnqueueMarkerWithWaitList, &retVal); return retVal; } retVal = pCommandQueue->enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, event); TRACING_EXIT(clEnqueueMarkerWithWaitList, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueBarrierWithWaitList(cl_command_queue commandQueue, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueBarrierWithWaitList, &commandQueue, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("cl_command_queue", commandQueue, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), EventWaitList(numEventsInWaitList, eventWaitList)); if (CL_SUCCESS != retVal) { TRACING_EXIT(clEnqueueBarrierWithWaitList, &retVal); return retVal; } retVal = pCommandQueue->enqueueBarrierWithWaitList( numEventsInWaitList, eventWaitList, event); TRACING_EXIT(clEnqueueBarrierWithWaitList, &retVal); return retVal; } CL_API_ENTRY cl_command_queue CL_API_CALL clCreatePerfCountersCommandQueueINTEL( cl_context context, cl_device_id device, cl_command_queue_properties properties, cl_uint configuration, cl_int *errcodeRet) { API_ENTER(nullptr); DBG_LOG_INPUTS("context", context, "device", device, "properties", properties, "configuration", configuration); cl_command_queue commandQueue = nullptr; ErrorCodeHelper err(errcodeRet, CL_SUCCESS); ClDevice *pDevice = nullptr; WithCastToInternal(device, &pDevice); if (pDevice == nullptr) { err.set(CL_INVALID_DEVICE); return commandQueue; } if (!pDevice->getHardwareInfo().capabilityTable.instrumentationEnabled) { err.set(CL_INVALID_DEVICE); return commandQueue; } if ((properties & CL_QUEUE_PROFILING_ENABLE) == 0) { err.set(CL_INVALID_QUEUE_PROPERTIES); return commandQueue; } if ((properties & CL_QUEUE_ON_DEVICE) != 0) { err.set(CL_INVALID_QUEUE_PROPERTIES); return commandQueue; } if ((properties & CL_QUEUE_ON_DEVICE_DEFAULT) != 0) { err.set(CL_INVALID_QUEUE_PROPERTIES); return commandQueue; } if (configuration != 0) { err.set(CL_INVALID_OPERATION); return commandQueue; } commandQueue = clCreateCommandQueue(context, device, properties, errcodeRet); if (commandQueue != nullptr) { auto commandQueueObject = castToObjectOrAbort(commandQueue); if (!commandQueueObject->setPerfCountersEnabled()) { clReleaseCommandQueue(commandQueue); commandQueue = nullptr; err.set(CL_OUT_OF_RESOURCES); } } return commandQueue; } CL_API_ENTRY cl_int CL_API_CALL clSetPerformanceConfigurationINTEL( cl_device_id device, cl_uint count, cl_uint *offsets, cl_uint *values) { // Not supported, covered by Metric Library DLL. return CL_INVALID_OPERATION; } void *clHostMemAllocINTEL( cl_context context, cl_mem_properties_intel *properties, size_t size, cl_uint alignment, cl_int *errcodeRet) { Context *neoContext = nullptr; ErrorCodeHelper err(errcodeRet, CL_SUCCESS); auto retVal = validateObjects(WithCastToInternal(context, &neoContext)); if (retVal != CL_SUCCESS) { err.set(retVal); return nullptr; } SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY); cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; cl_mem_alloc_flags_intel allocflags = 0; if (!MemoryPropertiesParser::parseMemoryProperties(properties, unifiedMemoryProperties.allocationFlags, flags, flagsIntel, allocflags, MemoryPropertiesParser::MemoryPropertiesParser::ObjType::UNKNOWN)) { err.set(CL_INVALID_VALUE); return nullptr; } if (size > neoContext->getDevice(0u)->getSharedDeviceInfo().maxMemAllocSize && !unifiedMemoryProperties.allocationFlags.flags.allowUnrestrictedSize) { err.set(CL_INVALID_BUFFER_SIZE); return nullptr; } return neoContext->getSVMAllocsManager()->createUnifiedMemoryAllocation(neoContext->getDevice(0)->getRootDeviceIndex(), size, unifiedMemoryProperties); } void *clDeviceMemAllocINTEL( cl_context context, cl_device_id device, cl_mem_properties_intel *properties, size_t size, cl_uint alignment, cl_int *errcodeRet) { Context *neoContext = nullptr; ClDevice *neoDevice = nullptr; ErrorCodeHelper err(errcodeRet, CL_SUCCESS); auto retVal = validateObjects(WithCastToInternal(context, &neoContext), WithCastToInternal(device, &neoDevice)); if (retVal != CL_SUCCESS) { err.set(retVal); return nullptr; } SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY); cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; cl_mem_alloc_flags_intel allocflags = 0; if (!MemoryPropertiesParser::parseMemoryProperties(properties, unifiedMemoryProperties.allocationFlags, flags, flagsIntel, allocflags, MemoryPropertiesParser::MemoryPropertiesParser::ObjType::UNKNOWN)) { err.set(CL_INVALID_VALUE); return nullptr; } if (size > neoContext->getDevice(0u)->getSharedDeviceInfo().maxMemAllocSize && !unifiedMemoryProperties.allocationFlags.flags.allowUnrestrictedSize) { err.set(CL_INVALID_BUFFER_SIZE); return nullptr; } unifiedMemoryProperties.device = device; unifiedMemoryProperties.subdeviceBitfield = neoDevice->getDefaultEngine().osContext->getDeviceBitfield(); return neoContext->getSVMAllocsManager()->createUnifiedMemoryAllocation(neoDevice->getRootDeviceIndex(), size, unifiedMemoryProperties); } void *clSharedMemAllocINTEL( cl_context context, cl_device_id device, cl_mem_properties_intel *properties, size_t size, cl_uint alignment, cl_int *errcodeRet) { Context *neoContext = nullptr; ClDevice *neoDevice = nullptr; ErrorCodeHelper err(errcodeRet, CL_SUCCESS); auto retVal = validateObjects(WithCastToInternal(context, &neoContext)); if (retVal != CL_SUCCESS) { err.set(retVal); return nullptr; } SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY); cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; cl_mem_alloc_flags_intel allocflags = 0; if (!MemoryPropertiesParser::parseMemoryProperties(properties, unifiedMemoryProperties.allocationFlags, flags, flagsIntel, allocflags, MemoryPropertiesParser::MemoryPropertiesParser::ObjType::UNKNOWN)) { err.set(CL_INVALID_VALUE); return nullptr; } if (size > neoContext->getDevice(0u)->getSharedDeviceInfo().maxMemAllocSize && !unifiedMemoryProperties.allocationFlags.flags.allowUnrestrictedSize) { err.set(CL_INVALID_BUFFER_SIZE); return nullptr; } unifiedMemoryProperties.device = device; if (!device) { return neoContext->getSVMAllocsManager()->createUnifiedMemoryAllocation(neoContext->getDevice(0)->getRootDeviceIndex(), size, unifiedMemoryProperties); } validateObjects(WithCastToInternal(device, &neoDevice)); unifiedMemoryProperties.subdeviceBitfield = neoDevice->getDefaultEngine().osContext->getDeviceBitfield(); return neoContext->getSVMAllocsManager()->createSharedUnifiedMemoryAllocation(neoContext->getDevice(0)->getRootDeviceIndex(), size, unifiedMemoryProperties, neoContext->getSpecialQueue()); } cl_int clMemFreeCommon(cl_context context, const void *ptr, bool blocking) { Context *neoContext = nullptr; auto retVal = validateObjects(WithCastToInternal(context, &neoContext)); if (retVal != CL_SUCCESS) { return retVal; } if (ptr && !neoContext->getSVMAllocsManager()->freeSVMAlloc(const_cast(ptr), blocking)) { return CL_INVALID_VALUE; } if (neoContext->getSVMAllocsManager()->getSvmMapOperation(ptr)) { neoContext->getSVMAllocsManager()->removeSvmMapOperation(ptr); } return CL_SUCCESS; } cl_int clMemFreeINTEL( cl_context context, const void *ptr) { return clMemFreeCommon(context, ptr, false); } cl_int clMemBlockingFreeINTEL( cl_context context, void *ptr) { return clMemFreeCommon(context, ptr, true); } cl_int clGetMemAllocInfoINTEL( cl_context context, const void *ptr, cl_mem_info_intel paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { Context *pContext = nullptr; cl_int retVal = CL_SUCCESS; retVal = validateObject(WithCastToInternal(context, &pContext)); if (!pContext) { return retVal; } auto allocationsManager = pContext->getSVMAllocsManager(); if (!allocationsManager) { return CL_INVALID_VALUE; } GetInfoHelper info(paramValue, paramValueSize, paramValueSizeRet); auto unifiedMemoryAllocation = allocationsManager->getSVMAlloc(ptr); switch (paramName) { case CL_MEM_ALLOC_TYPE_INTEL: { if (!unifiedMemoryAllocation) { retVal = changeGetInfoStatusToCLResultType(info.set(CL_MEM_TYPE_UNKNOWN_INTEL)); return retVal; } else if (unifiedMemoryAllocation->memoryType == InternalMemoryType::HOST_UNIFIED_MEMORY) { retVal = changeGetInfoStatusToCLResultType(info.set(CL_MEM_TYPE_HOST_INTEL)); return retVal; } else if (unifiedMemoryAllocation->memoryType == InternalMemoryType::DEVICE_UNIFIED_MEMORY) { retVal = changeGetInfoStatusToCLResultType(info.set(CL_MEM_TYPE_DEVICE_INTEL)); return retVal; } else { retVal = changeGetInfoStatusToCLResultType(info.set(CL_MEM_TYPE_SHARED_INTEL)); return retVal; } break; } case CL_MEM_ALLOC_BASE_PTR_INTEL: { if (!unifiedMemoryAllocation) { return changeGetInfoStatusToCLResultType(info.set(nullptr)); } return changeGetInfoStatusToCLResultType(info.set(unifiedMemoryAllocation->gpuAllocation->getGpuAddress())); } case CL_MEM_ALLOC_SIZE_INTEL: { if (!unifiedMemoryAllocation) { return changeGetInfoStatusToCLResultType(info.set(0u)); } return changeGetInfoStatusToCLResultType(info.set(unifiedMemoryAllocation->size)); } case CL_MEM_ALLOC_FLAGS_INTEL: { if (!unifiedMemoryAllocation) { return changeGetInfoStatusToCLResultType(info.set(0u)); } return changeGetInfoStatusToCLResultType(info.set(unifiedMemoryAllocation->allocationFlagsProperty.allAllocFlags)); } case CL_MEM_ALLOC_DEVICE_INTEL: { if (!unifiedMemoryAllocation) { return changeGetInfoStatusToCLResultType(info.set(static_cast(nullptr))); } return changeGetInfoStatusToCLResultType(info.set(static_cast(unifiedMemoryAllocation->device))); } default: { } } return CL_INVALID_VALUE; } cl_int clSetKernelArgMemPointerINTEL( cl_kernel kernel, cl_uint argIndex, const void *argValue) { return clSetKernelArgSVMPointer(kernel, argIndex, argValue); } cl_int clEnqueueMemsetINTEL( cl_command_queue commandQueue, void *dstPtr, cl_int value, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { auto retVal = clEnqueueSVMMemFill(commandQueue, dstPtr, &value, 1u, size, numEventsInWaitList, eventWaitList, event); if (retVal == CL_SUCCESS && event) { auto pEvent = castToObjectOrAbort(*event); pEvent->setCmdType(CL_COMMAND_MEMSET_INTEL); } return retVal; } cl_int clEnqueueMemFillINTEL( cl_command_queue commandQueue, void *dstPtr, const void *pattern, size_t patternSize, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { auto retVal = clEnqueueSVMMemFill(commandQueue, dstPtr, pattern, patternSize, size, numEventsInWaitList, eventWaitList, event); if (retVal == CL_SUCCESS && event) { auto pEvent = castToObjectOrAbort(*event); pEvent->setCmdType(CL_COMMAND_MEMFILL_INTEL); } return retVal; } cl_int clEnqueueMemcpyINTEL( cl_command_queue commandQueue, cl_bool blocking, void *dstPtr, const void *srcPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { auto retVal = clEnqueueSVMMemcpy(commandQueue, blocking, dstPtr, srcPtr, size, numEventsInWaitList, eventWaitList, event); if (retVal == CL_SUCCESS && event) { auto pEvent = castToObjectOrAbort(*event); pEvent->setCmdType(CL_COMMAND_MEMCPY_INTEL); } return retVal; } cl_int clEnqueueMigrateMemINTEL( cl_command_queue commandQueue, const void *ptr, size_t size, cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { cl_int retVal = CL_SUCCESS; CommandQueue *pCommandQueue = nullptr; retVal = validateObjects(WithCastToInternal(commandQueue, &pCommandQueue), ptr, EventWaitList(numEventsInWaitList, eventWaitList)); if (retVal == CL_SUCCESS) { pCommandQueue->enqueueMarkerWithWaitList(numEventsInWaitList, eventWaitList, event); if (event) { auto pEvent = castToObjectOrAbort(*event); pEvent->setCmdType(CL_COMMAND_MIGRATEMEM_INTEL); } } return retVal; } cl_int clEnqueueMemAdviseINTEL( cl_command_queue commandQueue, const void *ptr, size_t size, cl_mem_advice_intel advice, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { cl_int retVal = CL_SUCCESS; CommandQueue *pCommandQueue = nullptr; retVal = validateObjects(WithCastToInternal(commandQueue, &pCommandQueue), ptr, EventWaitList(numEventsInWaitList, eventWaitList)); if (retVal == CL_SUCCESS) { pCommandQueue->enqueueMarkerWithWaitList(numEventsInWaitList, eventWaitList, event); if (event) { auto pEvent = castToObjectOrAbort(*event); pEvent->setCmdType(CL_COMMAND_MEMADVISE_INTEL); } } return retVal; } cl_command_queue CL_API_CALL clCreateCommandQueueWithPropertiesKHR(cl_context context, cl_device_id device, const cl_queue_properties_khr *properties, cl_int *errcodeRet) { API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "device", device, "properties", properties); return clCreateCommandQueueWithProperties(context, device, properties, errcodeRet); } cl_accelerator_intel CL_API_CALL clCreateAcceleratorINTEL( cl_context context, cl_accelerator_type_intel acceleratorType, size_t descriptorSize, const void *descriptor, cl_int *errcodeRet) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "acceleratorType", acceleratorType, "descriptorSize", descriptorSize, "descriptor", NEO::FileLoggerInstance().infoPointerToString(descriptor, descriptorSize)); cl_accelerator_intel accelerator = nullptr; do { retVal = validateObjects(context); if (retVal != CL_SUCCESS) { retVal = CL_INVALID_CONTEXT; break; } Context *pContext = castToObject(context); DEBUG_BREAK_IF(!pContext); switch (acceleratorType) { case CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL: accelerator = VmeAccelerator::create( pContext, acceleratorType, descriptorSize, descriptor, retVal); break; default: retVal = CL_INVALID_ACCELERATOR_TYPE_INTEL; } } while (false); if (errcodeRet) { *errcodeRet = retVal; } return accelerator; } cl_int CL_API_CALL clRetainAcceleratorINTEL( cl_accelerator_intel accelerator) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("accelerator", accelerator); IntelAccelerator *pAccelerator = nullptr; do { pAccelerator = castToObject(accelerator); if (!pAccelerator) { retVal = CL_INVALID_ACCELERATOR_INTEL; break; } pAccelerator->retain(); } while (false); return retVal; } cl_int CL_API_CALL clGetAcceleratorInfoINTEL( cl_accelerator_intel accelerator, cl_accelerator_info_intel paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("accelerator", accelerator, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); IntelAccelerator *pAccelerator = nullptr; do { pAccelerator = castToObject(accelerator); if (!pAccelerator) { retVal = CL_INVALID_ACCELERATOR_INTEL; break; } retVal = pAccelerator->getInfo( paramName, paramValueSize, paramValue, paramValueSizeRet); } while (false); return retVal; } cl_int CL_API_CALL clReleaseAcceleratorINTEL( cl_accelerator_intel accelerator) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("accelerator", accelerator); IntelAccelerator *pAccelerator = nullptr; do { pAccelerator = castToObject(accelerator); if (!pAccelerator) { retVal = CL_INVALID_ACCELERATOR_INTEL; break; } pAccelerator->release(); } while (false); return retVal; } cl_program CL_API_CALL clCreateProgramWithILKHR(cl_context context, const void *il, size_t length, cl_int *errcodeRet) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "il", NEO::FileLoggerInstance().infoPointerToString(il, length), "length", length); cl_program program = nullptr; retVal = validateObjects(context, il); if (retVal == CL_SUCCESS) { program = Program::createFromIL( castToObjectOrAbort(context), il, length, retVal); } if (errcodeRet != nullptr) { *errcodeRet = retVal; } return program; } #define RETURN_FUNC_PTR_IF_EXIST(name) \ { \ if (!strcmp(funcName, #name)) { \ TRACING_EXIT(clGetExtensionFunctionAddress, (void **)&name); \ return ((void *)(name)); \ } \ } void *CL_API_CALL clGetExtensionFunctionAddress(const char *funcName) { TRACING_ENTER(clGetExtensionFunctionAddress, &funcName); DBG_LOG_INPUTS("funcName", funcName); // Support an internal call by the ICD RETURN_FUNC_PTR_IF_EXIST(clIcdGetPlatformIDsKHR); //perf counters RETURN_FUNC_PTR_IF_EXIST(clCreatePerfCountersCommandQueueINTEL); RETURN_FUNC_PTR_IF_EXIST(clSetPerformanceConfigurationINTEL); // Support device extensions RETURN_FUNC_PTR_IF_EXIST(clCreateAcceleratorINTEL); RETURN_FUNC_PTR_IF_EXIST(clGetAcceleratorInfoINTEL); RETURN_FUNC_PTR_IF_EXIST(clRetainAcceleratorINTEL); RETURN_FUNC_PTR_IF_EXIST(clReleaseAcceleratorINTEL); RETURN_FUNC_PTR_IF_EXIST(clCreateBufferWithPropertiesINTEL); RETURN_FUNC_PTR_IF_EXIST(clCreateImageWithPropertiesINTEL); RETURN_FUNC_PTR_IF_EXIST(clAddCommentINTEL); RETURN_FUNC_PTR_IF_EXIST(clEnqueueVerifyMemoryINTEL); RETURN_FUNC_PTR_IF_EXIST(clCreateTracingHandleINTEL); RETURN_FUNC_PTR_IF_EXIST(clSetTracingPointINTEL); RETURN_FUNC_PTR_IF_EXIST(clDestroyTracingHandleINTEL); RETURN_FUNC_PTR_IF_EXIST(clEnableTracingINTEL); RETURN_FUNC_PTR_IF_EXIST(clDisableTracingINTEL); RETURN_FUNC_PTR_IF_EXIST(clGetTracingStateINTEL); RETURN_FUNC_PTR_IF_EXIST(clHostMemAllocINTEL); RETURN_FUNC_PTR_IF_EXIST(clDeviceMemAllocINTEL); RETURN_FUNC_PTR_IF_EXIST(clSharedMemAllocINTEL); RETURN_FUNC_PTR_IF_EXIST(clMemFreeINTEL); RETURN_FUNC_PTR_IF_EXIST(clMemBlockingFreeINTEL); RETURN_FUNC_PTR_IF_EXIST(clGetMemAllocInfoINTEL); RETURN_FUNC_PTR_IF_EXIST(clSetKernelArgMemPointerINTEL); RETURN_FUNC_PTR_IF_EXIST(clEnqueueMemsetINTEL); RETURN_FUNC_PTR_IF_EXIST(clEnqueueMemFillINTEL); RETURN_FUNC_PTR_IF_EXIST(clEnqueueMemcpyINTEL); RETURN_FUNC_PTR_IF_EXIST(clEnqueueMigrateMemINTEL); RETURN_FUNC_PTR_IF_EXIST(clEnqueueMemAdviseINTEL); RETURN_FUNC_PTR_IF_EXIST(clGetDeviceFunctionPointerINTEL); RETURN_FUNC_PTR_IF_EXIST(clGetDeviceGlobalVariablePointerINTEL); RETURN_FUNC_PTR_IF_EXIST(clGetKernelMaxConcurrentWorkGroupCountINTEL); RETURN_FUNC_PTR_IF_EXIST(clGetKernelSuggestedLocalWorkSizeINTEL); RETURN_FUNC_PTR_IF_EXIST(clEnqueueNDCountKernelINTEL); void *ret = sharingFactory.getExtensionFunctionAddress(funcName); if (ret != nullptr) { TRACING_EXIT(clGetExtensionFunctionAddress, &ret); return ret; } // SPIR-V support through the cl_khr_il_program extension RETURN_FUNC_PTR_IF_EXIST(clCreateProgramWithILKHR); RETURN_FUNC_PTR_IF_EXIST(clCreateCommandQueueWithPropertiesKHR); RETURN_FUNC_PTR_IF_EXIST(clSetProgramSpecializationConstant); ret = getAdditionalExtensionFunctionAddress(funcName); TRACING_EXIT(clGetExtensionFunctionAddress, &ret); return ret; } // OpenCL 1.2 void *CL_API_CALL clGetExtensionFunctionAddressForPlatform(cl_platform_id platform, const char *funcName) { TRACING_ENTER(clGetExtensionFunctionAddressForPlatform, &platform, &funcName); DBG_LOG_INPUTS("platform", platform, "funcName", funcName); auto pPlatform = castToObject(platform); if (pPlatform == nullptr) { void *ret = nullptr; TRACING_EXIT(clGetExtensionFunctionAddressForPlatform, &ret); return ret; } void *ret = clGetExtensionFunctionAddress(funcName); TRACING_EXIT(clGetExtensionFunctionAddressForPlatform, &ret); return ret; } void *CL_API_CALL clSVMAlloc(cl_context context, cl_svm_mem_flags flags, size_t size, cl_uint alignment) { TRACING_ENTER(clSVMAlloc, &context, &flags, &size, &alignment); DBG_LOG_INPUTS("context", context, "flags", flags, "size", size, "alignment", alignment); void *pAlloc = nullptr; Context *pContext = nullptr; if (validateObjects(WithCastToInternal(context, &pContext)) != CL_SUCCESS) { TRACING_EXIT(clSVMAlloc, &pAlloc); return pAlloc; } if (flags == 0) { flags = CL_MEM_READ_WRITE; } if (!((flags == CL_MEM_READ_WRITE) || (flags == CL_MEM_WRITE_ONLY) || (flags == CL_MEM_READ_ONLY) || (flags == CL_MEM_SVM_FINE_GRAIN_BUFFER) || (flags == (CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS)) || (flags == (CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER)) || (flags == (CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS)) || (flags == (CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER)) || (flags == (CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS)) || (flags == (CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER)) || (flags == (CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS)))) { TRACING_EXIT(clSVMAlloc, &pAlloc); return pAlloc; } auto pDevice = pContext->getDevice(0); if ((size == 0) || (size > pDevice->getSharedDeviceInfo().maxMemAllocSize)) { TRACING_EXIT(clSVMAlloc, &pAlloc); return pAlloc; } if ((alignment && (alignment & (alignment - 1))) || (alignment > sizeof(cl_ulong16))) { TRACING_EXIT(clSVMAlloc, &pAlloc); return pAlloc; } const HardwareInfo &hwInfo = pDevice->getHardwareInfo(); if (!hwInfo.capabilityTable.ftrSvm) { TRACING_EXIT(clSVMAlloc, &pAlloc); return pAlloc; } if (!hwInfo.capabilityTable.ftrSupportsCoherency && (flags & (CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS))) { TRACING_EXIT(clSVMAlloc, &pAlloc); return pAlloc; } pAlloc = pContext->getSVMAllocsManager()->createSVMAlloc(pDevice->getRootDeviceIndex(), size, MemObjHelper::getSvmAllocationProperties(flags)); if (pContext->isProvidingPerformanceHints()) { pContext->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_SVM_ALLOC_MEETS_ALIGNMENT_RESTRICTIONS, pAlloc, size); } TRACING_EXIT(clSVMAlloc, &pAlloc); return pAlloc; } void CL_API_CALL clSVMFree(cl_context context, void *svmPointer) { TRACING_ENTER(clSVMFree, &context, &svmPointer); DBG_LOG_INPUTS("context", context, "svmPointer", svmPointer); Context *pContext = nullptr; cl_int retVal = validateObjects( WithCastToInternal(context, &pContext)); if (retVal != CL_SUCCESS) { TRACING_EXIT(clSVMFree, nullptr); return; } auto pClDevice = pContext->getDevice(0); if (!pClDevice->getHardwareInfo().capabilityTable.ftrSvm) { TRACING_EXIT(clSVMFree, nullptr); return; } pContext->getSVMAllocsManager()->freeSVMAlloc(svmPointer); TRACING_EXIT(clSVMFree, nullptr); } cl_int CL_API_CALL clEnqueueSVMFree(cl_command_queue commandQueue, cl_uint numSvmPointers, void *svmPointers[], void(CL_CALLBACK *pfnFreeFunc)(cl_command_queue queue, cl_uint numSvmPointers, void *svmPointers[], void *userData), void *userData, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueSVMFree, &commandQueue, &numSvmPointers, &svmPointers, &pfnFreeFunc, &userData, &numEventsInWaitList, &eventWaitList, &event); CommandQueue *pCommandQueue = nullptr; cl_int retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), EventWaitList(numEventsInWaitList, eventWaitList)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "numSvmPointers", numSvmPointers, "svmPointers", svmPointers, "pfnFreeFunc", pfnFreeFunc, "userData", userData, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueSVMFree, &retVal); return retVal; } auto &device = pCommandQueue->getDevice(); if (!device.getHardwareInfo().capabilityTable.ftrSvm) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueSVMFree, &retVal); return retVal; } if (((svmPointers != nullptr) && (numSvmPointers == 0)) || ((svmPointers == nullptr) && (numSvmPointers != 0))) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueSVMFree, &retVal); return retVal; } retVal = pCommandQueue->enqueueSVMFree( numSvmPointers, svmPointers, pfnFreeFunc, userData, numEventsInWaitList, eventWaitList, event); TRACING_EXIT(clEnqueueSVMFree, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueSVMMemcpy(cl_command_queue commandQueue, cl_bool blockingCopy, void *dstPtr, const void *srcPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueSVMMemcpy, &commandQueue, &blockingCopy, &dstPtr, &srcPtr, &size, &numEventsInWaitList, &eventWaitList, &event); CommandQueue *pCommandQueue = nullptr; cl_int retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), EventWaitList(numEventsInWaitList, eventWaitList)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "blockingCopy", blockingCopy, "dstPtr", dstPtr, "srcPtr", srcPtr, "size", size, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueSVMMemcpy, &retVal); return retVal; } auto &device = pCommandQueue->getDevice(); if (!device.getHardwareInfo().capabilityTable.ftrSvm) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueSVMMemcpy, &retVal); return retVal; } if ((dstPtr == nullptr) || (srcPtr == nullptr)) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueSVMMemcpy, &retVal); return retVal; } retVal = pCommandQueue->enqueueSVMMemcpy( blockingCopy, dstPtr, srcPtr, size, numEventsInWaitList, eventWaitList, event); TRACING_EXIT(clEnqueueSVMMemcpy, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueSVMMemFill(cl_command_queue commandQueue, void *svmPtr, const void *pattern, size_t patternSize, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueSVMMemFill, &commandQueue, &svmPtr, &pattern, &patternSize, &size, &numEventsInWaitList, &eventWaitList, &event); CommandQueue *pCommandQueue = nullptr; cl_int retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), EventWaitList(numEventsInWaitList, eventWaitList)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "svmPtr", NEO::FileLoggerInstance().infoPointerToString(svmPtr, size), "pattern", NEO::FileLoggerInstance().infoPointerToString(pattern, patternSize), "patternSize", patternSize, "size", size, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueSVMMemFill, &retVal); return retVal; } auto &device = pCommandQueue->getDevice(); if (!device.getHardwareInfo().capabilityTable.ftrSvm) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueSVMMemFill, &retVal); return retVal; } if ((svmPtr == nullptr) || (size == 0)) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueSVMMemFill, &retVal); return retVal; } retVal = pCommandQueue->enqueueSVMMemFill( svmPtr, pattern, patternSize, size, numEventsInWaitList, eventWaitList, event); TRACING_EXIT(clEnqueueSVMMemFill, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueSVMMap(cl_command_queue commandQueue, cl_bool blockingMap, cl_map_flags mapFlags, void *svmPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueSVMMap, &commandQueue, &blockingMap, &mapFlags, &svmPtr, &size, &numEventsInWaitList, &eventWaitList, &event); CommandQueue *pCommandQueue = nullptr; cl_int retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), EventWaitList(numEventsInWaitList, eventWaitList)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "blockingMap", blockingMap, "mapFlags", mapFlags, "svmPtr", NEO::FileLoggerInstance().infoPointerToString(svmPtr, size), "size", size, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); if (CL_SUCCESS != retVal) { TRACING_EXIT(clEnqueueSVMMap, &retVal); return retVal; } auto &device = pCommandQueue->getDevice(); if (!device.getHardwareInfo().capabilityTable.ftrSvm) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueSVMMap, &retVal); return retVal; } if ((svmPtr == nullptr) || (size == 0)) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueSVMMap, &retVal); return retVal; } retVal = pCommandQueue->enqueueSVMMap( blockingMap, mapFlags, svmPtr, size, numEventsInWaitList, eventWaitList, event, true); TRACING_EXIT(clEnqueueSVMMap, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueSVMUnmap(cl_command_queue commandQueue, void *svmPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueSVMUnmap, &commandQueue, &svmPtr, &numEventsInWaitList, &eventWaitList, &event); CommandQueue *pCommandQueue = nullptr; cl_int retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), EventWaitList(numEventsInWaitList, eventWaitList), svmPtr); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "svmPtr", svmPtr, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); if (retVal != CL_SUCCESS) { TRACING_EXIT(clEnqueueSVMUnmap, &retVal); return retVal; } auto &device = pCommandQueue->getDevice(); if (!device.getHardwareInfo().capabilityTable.ftrSvm) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueSVMUnmap, &retVal); return retVal; } retVal = pCommandQueue->enqueueSVMUnmap( svmPtr, numEventsInWaitList, eventWaitList, event, true); TRACING_EXIT(clEnqueueSVMUnmap, &retVal); return retVal; } cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel, cl_uint argIndex, const void *argValue) { TRACING_ENTER(clSetKernelArgSVMPointer, &kernel, &argIndex, &argValue); Kernel *pKernel = nullptr; auto retVal = validateObjects(WithCastToInternal(kernel, &pKernel)); API_ENTER(&retVal); DBG_LOG_INPUTS("kernel", kernel, "argIndex", argIndex, "argValue", argValue); if (CL_SUCCESS != retVal) { TRACING_EXIT(clSetKernelArgSVMPointer, &retVal); return retVal; } const HardwareInfo &hwInfo = pKernel->getDevice().getHardwareInfo(); if (!hwInfo.capabilityTable.ftrSvm) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clSetKernelArgSVMPointer, &retVal); return retVal; } if (argIndex >= pKernel->getKernelArgsNumber()) { retVal = CL_INVALID_ARG_INDEX; TRACING_EXIT(clSetKernelArgSVMPointer, &retVal); return retVal; } cl_int kernelArgAddressQualifier = asClKernelArgAddressQualifier(pKernel->getKernelInfo().kernelArgInfo[argIndex].metadata.getAddressQualifier()); if ((kernelArgAddressQualifier != CL_KERNEL_ARG_ADDRESS_GLOBAL) && (kernelArgAddressQualifier != CL_KERNEL_ARG_ADDRESS_CONSTANT)) { retVal = CL_INVALID_ARG_VALUE; TRACING_EXIT(clSetKernelArgSVMPointer, &retVal); return retVal; } GraphicsAllocation *pSvmAlloc = nullptr; if (argValue != nullptr) { auto svmManager = pKernel->getContext().getSVMAllocsManager(); auto svmData = svmManager->getSVMAlloc(argValue); if (svmData == nullptr) { if (!pKernel->getDevice().areSharedSystemAllocationsAllowed()) { retVal = CL_INVALID_ARG_VALUE; TRACING_EXIT(clSetKernelArgSVMPointer, &retVal); return retVal; } } else { pSvmAlloc = svmData->gpuAllocation; } } retVal = pKernel->setArgSvmAlloc(argIndex, const_cast(argValue), pSvmAlloc); TRACING_EXIT(clSetKernelArgSVMPointer, &retVal); return retVal; } cl_int CL_API_CALL clSetKernelExecInfo(cl_kernel kernel, cl_kernel_exec_info paramName, size_t paramValueSize, const void *paramValue) { TRACING_ENTER(clSetKernelExecInfo, &kernel, ¶mName, ¶mValueSize, ¶mValue); Kernel *pKernel = nullptr; auto retVal = validateObjects(WithCastToInternal(kernel, &pKernel)); API_ENTER(&retVal); DBG_LOG_INPUTS("kernel", kernel, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize)); if (CL_SUCCESS != retVal) { TRACING_EXIT(clSetKernelExecInfo, &retVal); return retVal; } const HardwareInfo &hwInfo = pKernel->getDevice().getHardwareInfo(); if (!hwInfo.capabilityTable.ftrSvm) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clSetKernelExecInfo, &retVal); return retVal; } switch (paramName) { case CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL: case CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL: case CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL: { auto propertyValue = *reinterpret_cast(paramValue); pKernel->setUnifiedMemoryProperty(paramName, propertyValue); } break; case CL_KERNEL_EXEC_INFO_SVM_PTRS: case CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL: { if ((paramValueSize == 0) || (paramValueSize % sizeof(void *)) || (paramValue == nullptr)) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clSetKernelExecInfo, &retVal); return retVal; } size_t numPointers = paramValueSize / sizeof(void *); size_t *pSvmPtrList = (size_t *)paramValue; if (paramName == CL_KERNEL_EXEC_INFO_SVM_PTRS) { pKernel->clearSvmKernelExecInfo(); } else { pKernel->clearUnifiedMemoryExecInfo(); } for (uint32_t i = 0; i < numPointers; i++) { auto svmData = pKernel->getContext().getSVMAllocsManager()->getSVMAlloc((const void *)pSvmPtrList[i]); if (svmData == nullptr) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clSetKernelExecInfo, &retVal); return retVal; } GraphicsAllocation *svmAlloc = svmData->gpuAllocation; if (paramName == CL_KERNEL_EXEC_INFO_SVM_PTRS) { pKernel->setSvmKernelExecInfo(svmAlloc); } else { pKernel->setUnifiedMemoryExecInfo(svmAlloc); } } break; } case CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_INTEL: { auto propertyValue = *static_cast(paramValue); retVal = pKernel->setKernelThreadArbitrationPolicy(propertyValue); return retVal; } case CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM: { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clSetKernelExecInfo, &retVal); return retVal; } case CL_KERNEL_EXEC_INFO_KERNEL_TYPE_INTEL: { if (paramValueSize != sizeof(cl_execution_info_kernel_type_intel) || paramValue == nullptr) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clSetKernelExecInfo, &retVal); return retVal; } auto kernelType = *static_cast(paramValue); retVal = pKernel->setKernelExecutionType(kernelType); TRACING_EXIT(clSetKernelExecInfo, &retVal); return retVal; } default: { retVal = CL_INVALID_VALUE; TRACING_EXIT(clSetKernelExecInfo, &retVal); return retVal; } } TRACING_EXIT(clSetKernelExecInfo, &retVal); return retVal; }; cl_mem CL_API_CALL clCreatePipe(cl_context context, cl_mem_flags flags, cl_uint pipePacketSize, cl_uint pipeMaxPackets, const cl_pipe_properties *properties, cl_int *errcodeRet) { TRACING_ENTER(clCreatePipe, &context, &flags, &pipePacketSize, &pipeMaxPackets, &properties, &errcodeRet); cl_mem pipe = nullptr; cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("cl_context", context, "cl_mem_flags", flags, "cl_uint", pipePacketSize, "cl_uint", pipeMaxPackets, "const cl_pipe_properties", properties, "cl_int", errcodeRet); Context *pContext = nullptr; const cl_mem_flags allValidFlags = CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS; do { if ((pipePacketSize == 0) || (pipeMaxPackets == 0)) { retVal = CL_INVALID_PIPE_SIZE; break; } /* Are there some invalid flag bits? */ if ((flags & (~allValidFlags)) != 0) { retVal = CL_INVALID_VALUE; break; } if (properties != nullptr) { retVal = CL_INVALID_VALUE; break; } retVal = validateObjects(WithCastToInternal(context, &pContext)); if (retVal != CL_SUCCESS) { break; } auto pDevice = pContext->getDevice(0); if (pipePacketSize > pDevice->getDeviceInfo().pipeMaxPacketSize) { retVal = CL_INVALID_PIPE_SIZE; break; } // create the pipe pipe = Pipe::create(pContext, flags, pipePacketSize, pipeMaxPackets, properties, retVal); } while (false); if (errcodeRet) { *errcodeRet = retVal; } DBG_LOG_INPUTS("pipe", pipe); TRACING_EXIT(clCreatePipe, &pipe); return pipe; } cl_int CL_API_CALL clGetPipeInfo(cl_mem pipe, cl_pipe_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetPipeInfo, &pipe, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("cl_mem", pipe, "cl_pipe_info", paramName, "size_t", paramValueSize, "void *", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "size_t*", paramValueSizeRet); retVal = validateObjects(pipe); if (CL_SUCCESS != retVal) { TRACING_EXIT(clGetPipeInfo, &retVal); return retVal; } auto pPipeObj = castToObject(pipe); if (pPipeObj == nullptr) { retVal = CL_INVALID_MEM_OBJECT; TRACING_EXIT(clGetPipeInfo, &retVal); return retVal; } retVal = pPipeObj->getPipeInfo(paramName, paramValueSize, paramValue, paramValueSizeRet); TRACING_EXIT(clGetPipeInfo, &retVal); return retVal; } cl_command_queue CL_API_CALL clCreateCommandQueueWithProperties(cl_context context, cl_device_id device, const cl_queue_properties *properties, cl_int *errcodeRet) { TRACING_ENTER(clCreateCommandQueueWithProperties, &context, &device, &properties, &errcodeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "device", device, "properties", properties); cl_command_queue commandQueue = nullptr; ErrorCodeHelper err(errcodeRet, CL_SUCCESS); Context *pContext = nullptr; ClDevice *pDevice = nullptr; retVal = validateObjects( WithCastToInternal(context, &pContext), WithCastToInternal(device, &pDevice)); if (CL_SUCCESS != retVal) { err.set(retVal); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } if (!pContext->isDeviceAssociated(*pDevice)) { err.set(CL_INVALID_DEVICE); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } auto minimumCreateDeviceQueueFlags = static_cast(CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE); auto tokenValue = properties ? *properties : 0; auto propertiesAddress = properties; while (tokenValue != 0) { if (tokenValue != CL_QUEUE_PROPERTIES && tokenValue != CL_QUEUE_SIZE && tokenValue != CL_QUEUE_PRIORITY_KHR && tokenValue != CL_QUEUE_THROTTLE_KHR && tokenValue != CL_QUEUE_SLICE_COUNT_INTEL && !isExtraToken(propertiesAddress)) { err.set(CL_INVALID_VALUE); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } propertiesAddress += 2; tokenValue = *propertiesAddress; } if (!verifyExtraTokens(pDevice, *pContext, properties)) { err.set(CL_INVALID_VALUE); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } auto commandQueueProperties = getCmdQueueProperties(properties); uint32_t maxOnDeviceQueueSize = pDevice->getDeviceInfo().queueOnDeviceMaxSize; uint32_t maxOnDeviceQueues = pDevice->getSharedDeviceInfo().maxOnDeviceQueues; if (commandQueueProperties & static_cast(CL_QUEUE_ON_DEVICE)) { if (!(commandQueueProperties & static_cast(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE))) { err.set(CL_INVALID_VALUE); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } if (!pDevice->getHardwareInfo().capabilityTable.supportsDeviceEnqueue) { err.set(CL_INVALID_QUEUE_PROPERTIES); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } } if (commandQueueProperties & static_cast(CL_QUEUE_ON_DEVICE_DEFAULT)) { if (!(commandQueueProperties & static_cast(CL_QUEUE_ON_DEVICE))) { err.set(CL_INVALID_VALUE); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } } else if (commandQueueProperties & static_cast(CL_QUEUE_ON_DEVICE)) { if ((maxOnDeviceQueues == 0) || ((maxOnDeviceQueues == 1) && pContext->getDefaultDeviceQueue())) { err.set(CL_OUT_OF_RESOURCES); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } } if (getCmdQueueProperties(properties, CL_QUEUE_SIZE) > maxOnDeviceQueueSize) { err.set(CL_INVALID_QUEUE_PROPERTIES); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } if (commandQueueProperties & static_cast(CL_QUEUE_ON_DEVICE)) { if (getCmdQueueProperties(properties, CL_QUEUE_PRIORITY_KHR)) { err.set(CL_INVALID_QUEUE_PROPERTIES); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } } if (commandQueueProperties & static_cast(CL_QUEUE_ON_DEVICE)) { if (getCmdQueueProperties(properties, CL_QUEUE_THROTTLE_KHR)) { err.set(CL_INVALID_QUEUE_PROPERTIES); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } } if (getCmdQueueProperties(properties, CL_QUEUE_SLICE_COUNT_INTEL) > pDevice->getDeviceInfo().maxSliceCount) { err.set(CL_INVALID_QUEUE_PROPERTIES); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } auto maskedFlags = commandQueueProperties & minimumCreateDeviceQueueFlags; if (maskedFlags == minimumCreateDeviceQueueFlags) { commandQueue = DeviceQueue::create( pContext, pDevice, *properties, retVal); } else { commandQueue = CommandQueue::create( pContext, pDevice, properties, false, retVal); if (pContext->isProvidingPerformanceHints()) { pContext->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, DRIVER_CALLS_INTERNAL_CL_FLUSH); if (castToObjectOrAbort(commandQueue)->isProfilingEnabled()) { pContext->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, PROFILING_ENABLED); if (pDevice->getDeviceInfo().preemptionSupported && pDevice->getHardwareInfo().platform.eProductFamily < IGFX_SKYLAKE) { pContext->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, PROFILING_ENABLED_WITH_DISABLED_PREEMPTION); } } } } if (!commandQueue) retVal = CL_OUT_OF_HOST_MEMORY; DBG_LOG_INPUTS("commandQueue", commandQueue, "properties", static_cast(getCmdQueueProperties(properties))); err.set(retVal); TRACING_EXIT(clCreateCommandQueueWithProperties, &commandQueue); return commandQueue; } cl_sampler CL_API_CALL clCreateSamplerWithProperties(cl_context context, const cl_sampler_properties *samplerProperties, cl_int *errcodeRet) { TRACING_ENTER(clCreateSamplerWithProperties, &context, &samplerProperties, &errcodeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "samplerProperties", samplerProperties); cl_sampler sampler = nullptr; retVal = validateObjects(context); if (CL_SUCCESS == retVal) { sampler = Sampler::create( castToObject(context), samplerProperties, retVal); } if (errcodeRet) { *errcodeRet = retVal; } TRACING_EXIT(clCreateSamplerWithProperties, &sampler); return sampler; } cl_int CL_API_CALL clUnloadCompiler() { TRACING_ENTER(clUnloadCompiler); cl_int retVal = CL_OUT_OF_HOST_MEMORY; API_ENTER(&retVal); TRACING_EXIT(clUnloadCompiler, &retVal); return retVal; } cl_int CL_API_CALL clGetKernelSubGroupInfoKHR(cl_kernel kernel, cl_device_id device, cl_kernel_sub_group_info paramName, size_t inputValueSize, const void *inputValue, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("kernel", kernel, "device", device, "paramName", paramName, "inputValueSize", inputValueSize, "inputValue", NEO::FileLoggerInstance().infoPointerToString(inputValue, inputValueSize), "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); Kernel *pKernel = nullptr; retVal = validateObjects(device, WithCastToInternal(kernel, &pKernel)); if (CL_SUCCESS != retVal) { return retVal; } switch (paramName) { case CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE: case CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE: case CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL: return pKernel->getSubGroupInfo(paramName, inputValueSize, inputValue, paramValueSize, paramValue, paramValueSizeRet); default: { retVal = CL_INVALID_VALUE; return retVal; } } } cl_int CL_API_CALL clGetDeviceAndHostTimer(cl_device_id device, cl_ulong *deviceTimestamp, cl_ulong *hostTimestamp) { TRACING_ENTER(clGetDeviceAndHostTimer, &device, &deviceTimestamp, &hostTimestamp); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("device", device, "deviceTimestamp", deviceTimestamp, "hostTimestamp", hostTimestamp); do { ClDevice *pDevice = castToObject(device); if (pDevice == nullptr) { retVal = CL_INVALID_DEVICE; break; } if (deviceTimestamp == nullptr || hostTimestamp == nullptr) { retVal = CL_INVALID_VALUE; break; } if (!pDevice->getDeviceAndHostTimer(static_cast(deviceTimestamp), static_cast(hostTimestamp))) { retVal = CL_OUT_OF_RESOURCES; break; } } while (false); TRACING_EXIT(clGetDeviceAndHostTimer, &retVal); return retVal; } cl_int CL_API_CALL clGetHostTimer(cl_device_id device, cl_ulong *hostTimestamp) { TRACING_ENTER(clGetHostTimer, &device, &hostTimestamp); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("device", device, "hostTimestamp", hostTimestamp); do { ClDevice *pDevice = castToObject(device); if (pDevice == nullptr) { retVal = CL_INVALID_DEVICE; break; } if (hostTimestamp == nullptr) { retVal = CL_INVALID_VALUE; break; } if (!pDevice->getHostTimer(static_cast(hostTimestamp))) { retVal = CL_OUT_OF_RESOURCES; break; } } while (false); TRACING_EXIT(clGetHostTimer, &retVal); return retVal; } cl_int CL_API_CALL clGetKernelSubGroupInfo(cl_kernel kernel, cl_device_id device, cl_kernel_sub_group_info paramName, size_t inputValueSize, const void *inputValue, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetKernelSubGroupInfo, &kernel, &device, ¶mName, &inputValueSize, &inputValue, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("kernel", kernel, "device", device, "paramName", paramName, "inputValueSize", inputValueSize, "inputValue", NEO::FileLoggerInstance().infoPointerToString(inputValue, inputValueSize), "paramValueSize", paramValueSize, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); Kernel *pKernel = nullptr; retVal = validateObjects(device, WithCastToInternal(kernel, &pKernel)); if (CL_SUCCESS != retVal) { TRACING_EXIT(clGetKernelSubGroupInfo, &retVal); return retVal; } retVal = pKernel->getSubGroupInfo(paramName, inputValueSize, inputValue, paramValueSize, paramValue, paramValueSizeRet); TRACING_EXIT(clGetKernelSubGroupInfo, &retVal); return retVal; } cl_int CL_API_CALL clSetDefaultDeviceCommandQueue(cl_context context, cl_device_id device, cl_command_queue commandQueue) { TRACING_ENTER(clSetDefaultDeviceCommandQueue, &context, &device, &commandQueue); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("context", context, "device", device, "commandQueue", commandQueue); Context *pContext = nullptr; retVal = validateObjects(WithCastToInternal(context, &pContext), device); if (CL_SUCCESS != retVal) { TRACING_EXIT(clSetDefaultDeviceCommandQueue, &retVal); return retVal; } auto pDeviceQueue = castToObject(static_cast<_device_queue *>(commandQueue)); if (!pDeviceQueue) { retVal = CL_INVALID_COMMAND_QUEUE; TRACING_EXIT(clSetDefaultDeviceCommandQueue, &retVal); return retVal; } if (&pDeviceQueue->getContext() != pContext) { retVal = CL_INVALID_COMMAND_QUEUE; TRACING_EXIT(clSetDefaultDeviceCommandQueue, &retVal); return retVal; } pContext->setDefaultDeviceQueue(pDeviceQueue); retVal = CL_SUCCESS; TRACING_EXIT(clSetDefaultDeviceCommandQueue, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueSVMMigrateMem(cl_command_queue commandQueue, cl_uint numSvmPointers, const void **svmPointers, const size_t *sizes, const cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueSVMMigrateMem, &commandQueue, &numSvmPointers, &svmPointers, &sizes, &flags, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "numSvmPointers", numSvmPointers, "svmPointers", NEO::FileLoggerInstance().infoPointerToString(svmPointers ? svmPointers[0] : 0, NEO::FileLoggerInstance().getInput(sizes, 0)), "sizes", NEO::FileLoggerInstance().getInput(sizes, 0), "flags", flags, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), EventWaitList(numEventsInWaitList, eventWaitList)); if (CL_SUCCESS != retVal) { TRACING_EXIT(clEnqueueSVMMigrateMem, &retVal); return retVal; } auto &device = pCommandQueue->getDevice(); if (!device.getHardwareInfo().capabilityTable.ftrSvm) { retVal = CL_INVALID_OPERATION; TRACING_EXIT(clEnqueueSVMMigrateMem, &retVal); return retVal; } if (numSvmPointers == 0 || svmPointers == nullptr) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueSVMMigrateMem, &retVal); return retVal; } const cl_mem_migration_flags allValidFlags = CL_MIGRATE_MEM_OBJECT_HOST | CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED; if ((flags & (~allValidFlags)) != 0) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueSVMMigrateMem, &retVal); return retVal; } auto pSvmAllocMgr = pCommandQueue->getContext().getSVMAllocsManager(); UNRECOVERABLE_IF(pSvmAllocMgr == nullptr); for (uint32_t i = 0; i < numSvmPointers; i++) { auto svmData = pSvmAllocMgr->getSVMAlloc(svmPointers[i]); if (svmData == nullptr) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueSVMMigrateMem, &retVal); return retVal; } if (sizes != nullptr && sizes[i] != 0) { svmData = pSvmAllocMgr->getSVMAlloc(reinterpret_cast((size_t)svmPointers[i] + sizes[i] - 1)); if (svmData == nullptr) { retVal = CL_INVALID_VALUE; TRACING_EXIT(clEnqueueSVMMigrateMem, &retVal); return retVal; } } } for (uint32_t i = 0; i < numEventsInWaitList; i++) { auto pEvent = castToObject(eventWaitList[i]); if (pEvent->getContext() != &pCommandQueue->getContext()) { retVal = CL_INVALID_CONTEXT; TRACING_EXIT(clEnqueueSVMMigrateMem, &retVal); return retVal; } } retVal = pCommandQueue->enqueueSVMMigrateMem(numSvmPointers, svmPointers, sizes, flags, numEventsInWaitList, eventWaitList, event); TRACING_EXIT(clEnqueueSVMMigrateMem, &retVal); return retVal; } cl_kernel CL_API_CALL clCloneKernel(cl_kernel sourceKernel, cl_int *errcodeRet) { TRACING_ENTER(clCloneKernel, &sourceKernel, &errcodeRet); Kernel *pSourceKernel = nullptr; Kernel *pClonedKernel = nullptr; auto retVal = validateObjects(WithCastToInternal(sourceKernel, &pSourceKernel)); API_ENTER(&retVal); DBG_LOG_INPUTS("sourceKernel", sourceKernel); if (CL_SUCCESS == retVal) { pClonedKernel = Kernel::create(pSourceKernel->getProgram(), pSourceKernel->getKernelInfo(), &retVal); UNRECOVERABLE_IF((pClonedKernel == nullptr) || (retVal != CL_SUCCESS)); retVal = pClonedKernel->cloneKernel(pSourceKernel); } if (errcodeRet) { *errcodeRet = retVal; } if (pClonedKernel != nullptr) { gtpinNotifyKernelCreate(pClonedKernel); } TRACING_EXIT(clCloneKernel, (cl_kernel *)&pClonedKernel); return pClonedKernel; } CL_API_ENTRY cl_int CL_API_CALL clEnqueueVerifyMemoryINTEL(cl_command_queue commandQueue, const void *allocationPtr, const void *expectedData, size_t sizeOfComparison, cl_uint comparisonMode) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "allocationPtr", allocationPtr, "expectedData", expectedData, "sizeOfComparison", sizeOfComparison, "comparisonMode", comparisonMode); if (sizeOfComparison == 0 || expectedData == nullptr || allocationPtr == nullptr) { retVal = CL_INVALID_VALUE; return retVal; } CommandQueue *pCommandQueue = nullptr; retVal = validateObjects(WithCastToInternal(commandQueue, &pCommandQueue)); if (retVal != CL_SUCCESS) { return retVal; } auto &csr = pCommandQueue->getGpgpuCommandStreamReceiver(); auto status = csr.expectMemory(allocationPtr, expectedData, sizeOfComparison, comparisonMode); return status ? CL_SUCCESS : CL_INVALID_VALUE; } cl_int CL_API_CALL clAddCommentINTEL(cl_device_id device, const char *comment) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("device", device, "comment", comment); ClDevice *pDevice = nullptr; retVal = validateObjects(WithCastToInternal(device, &pDevice)); if (retVal != CL_SUCCESS) { return retVal; } auto aubCenter = pDevice->getRootDeviceEnvironment().aubCenter.get(); if (!comment || (aubCenter && !aubCenter->getAubManager())) { retVal = CL_INVALID_VALUE; } if (retVal == CL_SUCCESS && aubCenter) { aubCenter->getAubManager()->addComment(comment); } return retVal; } cl_int CL_API_CALL clGetDeviceGlobalVariablePointerINTEL( cl_device_id device, cl_program program, const char *globalVariableName, size_t *globalVariableSizeRet, void **globalVariablePointerRet) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("device", device, "program", program, "globalVariableName", globalVariableName, "globalVariablePointerRet", globalVariablePointerRet); retVal = validateObjects(device, program); if (globalVariablePointerRet == nullptr) { retVal = CL_INVALID_ARG_VALUE; } if (CL_SUCCESS == retVal) { Program *pProgram = (Program *)(program); const auto &symbols = pProgram->getSymbols(); auto symbolIt = symbols.find(globalVariableName); if ((symbolIt == symbols.end()) || (symbolIt->second.symbol.segment == NEO::SegmentType::Instructions)) { retVal = CL_INVALID_ARG_VALUE; } else { if (globalVariableSizeRet != nullptr) { *globalVariableSizeRet = symbolIt->second.symbol.size; } *globalVariablePointerRet = reinterpret_cast(symbolIt->second.gpuAddress); } } return retVal; } cl_int CL_API_CALL clGetDeviceFunctionPointerINTEL( cl_device_id device, cl_program program, const char *functionName, cl_ulong *functionPointerRet) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("device", device, "program", program, "functionName", functionName, "functionPointerRet", functionPointerRet); retVal = validateObjects(device, program); if ((CL_SUCCESS == retVal) && (functionPointerRet == nullptr)) { retVal = CL_INVALID_ARG_VALUE; } if (CL_SUCCESS == retVal) { Program *pProgram = (Program *)(program); const auto &symbols = pProgram->getSymbols(); auto symbolIt = symbols.find(functionName); if ((symbolIt == symbols.end()) || (symbolIt->second.symbol.segment != NEO::SegmentType::Instructions)) { retVal = CL_INVALID_ARG_VALUE; } else { *functionPointerRet = static_cast(symbolIt->second.gpuAddress); } } return retVal; } cl_int CL_API_CALL clSetProgramSpecializationConstant(cl_program program, cl_uint specId, size_t specSize, const void *specValue) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("program", program, "specId", specId, "specSize", specSize, "specValue", specValue); Program *pProgram = nullptr; retVal = validateObjects(WithCastToInternal(program, &pProgram), specValue); if (retVal == CL_SUCCESS) { retVal = pProgram->setProgramSpecializationConstant(specId, specSize, specValue); } return retVal; } cl_int CL_API_CALL clGetKernelSuggestedLocalWorkSizeINTEL(cl_command_queue commandQueue, cl_kernel kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *globalWorkSize, size_t *suggestedLocalWorkSize) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "cl_kernel", kernel, "globalWorkOffset[0]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 0), "globalWorkOffset[1]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 1), "globalWorkOffset[2]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 2), "globalWorkSize", NEO::FileLoggerInstance().getSizes(globalWorkSize, workDim, true), "suggestedLocalWorkSize", suggestedLocalWorkSize); retVal = validateObjects(commandQueue, kernel); if (CL_SUCCESS != retVal) { return retVal; } if ((workDim == 0) || (workDim > 3)) { retVal = CL_INVALID_WORK_DIMENSION; return retVal; } if (globalWorkOffset == nullptr) { retVal = CL_INVALID_GLOBAL_OFFSET; return retVal; } if (globalWorkSize == nullptr) { retVal = CL_INVALID_GLOBAL_WORK_SIZE; return retVal; } auto pKernel = castToObjectOrAbort(kernel); if (!pKernel->isPatched()) { retVal = CL_INVALID_KERNEL; return retVal; } if (suggestedLocalWorkSize == nullptr) { retVal = CL_INVALID_VALUE; return retVal; } pKernel->getSuggestedLocalWorkSize(workDim, globalWorkSize, globalWorkOffset, suggestedLocalWorkSize); return retVal; } cl_int CL_API_CALL clGetKernelMaxConcurrentWorkGroupCountINTEL(cl_command_queue commandQueue, cl_kernel kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *localWorkSize, size_t *suggestedWorkGroupCount) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "cl_kernel", kernel, "globalWorkOffset[0]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 0), "globalWorkOffset[1]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 1), "globalWorkOffset[2]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 2), "localWorkSize", NEO::FileLoggerInstance().getSizes(localWorkSize, workDim, true), "suggestedWorkGroupCount", suggestedWorkGroupCount); retVal = validateObjects(commandQueue, kernel); if (CL_SUCCESS != retVal) { return retVal; } if ((workDim == 0) || (workDim > 3)) { retVal = CL_INVALID_WORK_DIMENSION; return retVal; } if (globalWorkOffset == nullptr) { retVal = CL_INVALID_GLOBAL_OFFSET; return retVal; } if (localWorkSize == nullptr) { retVal = CL_INVALID_WORK_GROUP_SIZE; return retVal; } auto pKernel = castToObjectOrAbort(kernel); if (!pKernel->isPatched()) { retVal = CL_INVALID_KERNEL; return retVal; } if (suggestedWorkGroupCount == nullptr) { retVal = CL_INVALID_VALUE; return retVal; } *suggestedWorkGroupCount = pKernel->getMaxWorkGroupCount(workDim, localWorkSize); return retVal; } cl_int CL_API_CALL clEnqueueNDCountKernelINTEL(cl_command_queue commandQueue, cl_kernel kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *workgroupCount, const size_t *localWorkSize, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "cl_kernel", kernel, "globalWorkOffset[0]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 0), "globalWorkOffset[1]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 1), "globalWorkOffset[2]", NEO::FileLoggerInstance().getInput(globalWorkOffset, 2), "workgroupCount", NEO::FileLoggerInstance().getSizes(workgroupCount, workDim, false), "localWorkSize", NEO::FileLoggerInstance().getSizes(localWorkSize, workDim, true), "numEventsInWaitList", numEventsInWaitList, "eventWaitList", NEO::FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; Kernel *pKernel = nullptr; retVal = validateObjects( WithCastToInternal(commandQueue, &pCommandQueue), WithCastToInternal(kernel, &pKernel), EventWaitList(numEventsInWaitList, eventWaitList)); if (CL_SUCCESS != retVal) { return retVal; } size_t globalWorkSize[3]; for (size_t i = 0; i < workDim; i++) { globalWorkSize[i] = workgroupCount[i] * localWorkSize[i]; } if (pKernel->getExecutionType() == KernelExecutionType::Concurrent) { size_t requestedNumberOfWorkgroups = 1; for (size_t i = 0; i < workDim; i++) { requestedNumberOfWorkgroups *= workgroupCount[i]; } size_t maximalNumberOfWorkgroupsAllowed = pKernel->getMaxWorkGroupCount(workDim, localWorkSize); if (requestedNumberOfWorkgroups > maximalNumberOfWorkgroupsAllowed) { retVal = CL_INVALID_VALUE; return retVal; } } if (pKernel->isUsingSyncBuffer()) { if (pKernel->getExecutionType() != KernelExecutionType::Concurrent) { retVal = CL_INVALID_KERNEL; return retVal; } pCommandQueue->getDevice().getSpecializedDevice()->allocateSyncBufferHandler(); } TakeOwnershipWrapper kernelOwnership(*pKernel, gtpinIsGTPinInitialized()); if (gtpinIsGTPinInitialized()) { gtpinNotifyKernelSubmit(kernel, pCommandQueue); } retVal = pCommandQueue->enqueueKernel( kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); DBG_LOG_INPUTS("event", NEO::FileLoggerInstance().getEvents(reinterpret_cast(event), 1u)); return retVal; } compute-runtime-20.13.16352/opencl/source/api/api.h000066400000000000000000000672531363734646600216450ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/api/api_enter.h" #include "opencl/source/api/dispatch.h" #include "CL/cl.h" #include "CL/cl_gl.h" #ifdef __cplusplus extern "C" { #endif cl_int CL_API_CALL clGetPlatformIDs( cl_uint numEntries, cl_platform_id *platforms, cl_uint *numPlatforms); cl_int CL_API_CALL clGetPlatformInfo( cl_platform_id platform, cl_platform_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int CL_API_CALL clGetDeviceIDs( cl_platform_id platform, cl_device_type deviceType, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices); cl_int CL_API_CALL clGetDeviceInfo( cl_device_id device, cl_device_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int CL_API_CALL clCreateSubDevices( cl_device_id inDevice, const cl_device_partition_property *properties, cl_uint numDevices, cl_device_id *outDevices, cl_uint *numDevicesRet); cl_int CL_API_CALL clRetainDevice( cl_device_id device); cl_int CL_API_CALL clReleaseDevice( cl_device_id device); cl_context CL_API_CALL clCreateContext( const cl_context_properties *properties, cl_uint numDevices, const cl_device_id *devices, void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *), void *userData, cl_int *errcodeRet); cl_context CL_API_CALL clCreateContextFromType( const cl_context_properties *properties, cl_device_type deviceType, void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *), void *userData, cl_int *errcodeRet); cl_int CL_API_CALL clRetainContext( cl_context context); cl_int CL_API_CALL clReleaseContext( cl_context context); cl_int CL_API_CALL clGetContextInfo( cl_context context, cl_context_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int CL_API_CALL clGetGLContextInfoKHR( const cl_context_properties *properties, cl_gl_context_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_command_queue CL_API_CALL clCreateCommandQueue( cl_context context, cl_device_id device, cl_command_queue_properties properties, cl_int *errcodeRet); cl_int CL_API_CALL clRetainCommandQueue( cl_command_queue commandQueue); cl_int CL_API_CALL clReleaseCommandQueue( cl_command_queue commandQueue); cl_int CL_API_CALL clGetCommandQueueInfo( cl_command_queue commandQueue, cl_command_queue_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); // deprecated OpenCL 1.0 cl_int CL_API_CALL clSetCommandQueueProperty( cl_command_queue commandQueue, cl_command_queue_properties properties, cl_bool enable, cl_command_queue_properties *oldProperties); cl_mem CL_API_CALL clCreateBuffer( cl_context context, cl_mem_flags flags, size_t size, void *hostPtr, cl_int *errcodeRet); cl_mem CL_API_CALL clCreateBufferWithPropertiesINTEL( cl_context context, const cl_mem_properties_intel *properties, size_t size, void *hostPtr, cl_int *errcodeRet); cl_mem CL_API_CALL clCreateSubBuffer( cl_mem buffer, cl_mem_flags flags, cl_buffer_create_type bufferCreateType, const void *bufferCreateInfo, cl_int *errcodeRet); cl_mem CL_API_CALL clCreateImage( cl_context context, cl_mem_flags flags, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, void *hostPtr, cl_int *errcodeRet); cl_mem CL_API_CALL clCreateImageWithPropertiesINTEL( cl_context context, cl_mem_properties_intel *properties, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, void *hostPtr, cl_int *errcodeRet); // deprecated OpenCL 1.1 cl_mem CL_API_CALL clCreateImage2D( cl_context context, cl_mem_flags flags, const cl_image_format *imageFormat, size_t imageWidth, size_t imageHeight, size_t imageRowPitch, void *hostPtr, cl_int *errcodeRet); // deprecated OpenCL 1.1 cl_mem CL_API_CALL clCreateImage3D( cl_context context, cl_mem_flags flags, const cl_image_format *imageFormat, size_t imageWidth, size_t imageHeight, size_t imageDepth, size_t imageRowPitch, size_t imageSlicePitch, void *hostPtr, cl_int *errcodeRet); cl_int CL_API_CALL clRetainMemObject( cl_mem memobj); cl_int CL_API_CALL clReleaseMemObject( cl_mem memobj); cl_int CL_API_CALL clGetSupportedImageFormats( cl_context context, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint numEntries, cl_image_format *imageFormats, cl_uint *numImageFormats); cl_int CL_API_CALL clGetMemObjectInfo( cl_mem memobj, cl_mem_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int CL_API_CALL clGetImageInfo( cl_mem image, cl_image_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int CL_API_CALL clGetImageParamsINTEL( cl_context context, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, size_t *imageRowPitch, size_t *imageSlicePitch); cl_int CL_API_CALL clSetMemObjectDestructorCallback( cl_mem memobj, void(CL_CALLBACK *funcNotify)(cl_mem, void *), void *userData); cl_sampler CL_API_CALL clCreateSampler( cl_context context, cl_bool normalizedCoords, cl_addressing_mode addressingMode, cl_filter_mode filterMode, cl_int *errcodeRet); cl_int CL_API_CALL clRetainSampler( cl_sampler sampler); cl_int CL_API_CALL clReleaseSampler( cl_sampler sampler); cl_int CL_API_CALL clGetSamplerInfo( cl_sampler sampler, cl_sampler_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_program CL_API_CALL clCreateProgramWithSource( cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcodeRet); cl_program CL_API_CALL clCreateProgramWithBinary( cl_context context, cl_uint numDevices, const cl_device_id *deviceList, const size_t *lengths, const unsigned char **binaries, cl_int *binaryStatus, cl_int *errcodeRet); cl_program CL_API_CALL clCreateProgramWithIL(cl_context context, const void *il, size_t length, cl_int *errcodeRet); cl_program CL_API_CALL clCreateProgramWithBuiltInKernels( cl_context context, cl_uint numDevices, const cl_device_id *deviceList, const char *kernelNames, cl_int *errcodeRet); cl_int CL_API_CALL clRetainProgram( cl_program program); cl_int CL_API_CALL clReleaseProgram( cl_program program); cl_int CL_API_CALL clBuildProgram( cl_program program, cl_uint numDevices, const cl_device_id *deviceList, const char *options, void(CL_CALLBACK *funcNotify)(cl_program program, void *userData), void *userData); cl_int CL_API_CALL clCompileProgram( cl_program program, cl_uint numDevices, const cl_device_id *deviceList, const char *options, cl_uint numInputHeaders, const cl_program *inputHeaders, const char **headerIncludeNames, void(CL_CALLBACK *funcNotify)(cl_program program, void *userData), void *userData); cl_program CL_API_CALL clLinkProgram( cl_context context, cl_uint numDevices, const cl_device_id *deviceList, const char *options, cl_uint numInputPrograms, const cl_program *inputPrograms, void(CL_CALLBACK *funcNotify)(cl_program program, void *userData), void *userData, cl_int *errcodeRet); cl_int CL_API_CALL clUnloadPlatformCompiler( cl_platform_id platform); // deprecated OpenCL 1.1 cl_int CL_API_CALL clUnloadCompiler(void); cl_int CL_API_CALL clGetProgramInfo( cl_program program, cl_program_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int CL_API_CALL clGetProgramBuildInfo( cl_program program, cl_device_id device, cl_program_build_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_kernel CL_API_CALL clCreateKernel( cl_program program, const char *kernelName, cl_int *errcodeRet); cl_int CL_API_CALL clCreateKernelsInProgram( cl_program program, cl_uint numKernels, cl_kernel *kernels, cl_uint *numKernelsRet); cl_int CL_API_CALL clRetainKernel( cl_kernel kernel); cl_int CL_API_CALL clReleaseKernel( cl_kernel kernel); cl_int CL_API_CALL clSetKernelArg( cl_kernel kernel, cl_uint argIndex, size_t argSize, const void *argValue); cl_int CL_API_CALL clGetKernelInfo( cl_kernel kernel, cl_kernel_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int CL_API_CALL clGetKernelArgInfo( cl_kernel kernel, cl_uint argIndx, cl_kernel_arg_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int CL_API_CALL clGetKernelWorkGroupInfo( cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int CL_API_CALL clWaitForEvents( cl_uint numEvents, const cl_event *eventList); cl_int CL_API_CALL clGetEventInfo( cl_event event, cl_event_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_event CL_API_CALL clCreateUserEvent( cl_context context, cl_int *errcodeRet); cl_int CL_API_CALL clRetainEvent( cl_event event); cl_int CL_API_CALL clReleaseEvent( cl_event event); cl_int CL_API_CALL clSetUserEventStatus( cl_event event, cl_int executionStatus); cl_int CL_API_CALL clSetEventCallback( cl_event event, cl_int commandExecCallbackType, void(CL_CALLBACK *funcNotify)(cl_event, cl_int, void *), void *userData); cl_int CL_API_CALL clGetEventProfilingInfo( cl_event event, cl_profiling_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int CL_API_CALL clFlush( cl_command_queue commandQueue); cl_int CL_API_CALL clFinish( cl_command_queue commandQueue); cl_int CL_API_CALL clEnqueueReadBuffer( cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingRead, size_t offset, size_t cb, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueReadBufferRect( cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingRead, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueWriteBuffer( cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingWrite, size_t offset, size_t cb, const void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueWriteBufferRect( cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingWrite, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, const void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueFillBuffer( cl_command_queue commandQueue, cl_mem buffer, const void *pattern, size_t patternSize, size_t offset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueCopyBuffer( cl_command_queue commandQueue, cl_mem srcBuffer, cl_mem dstBuffer, size_t srcOffset, size_t dstOffset, size_t cb, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueCopyBufferRect( cl_command_queue commandQueue, cl_mem srcBuffer, cl_mem dstBuffer, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueReadImage( cl_command_queue commandQueue, cl_mem image, cl_bool blockingRead, const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueWriteImage( cl_command_queue commandQueue, cl_mem image, cl_bool blockingWrite, const size_t *origin, const size_t *region, size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueFillImage( cl_command_queue commandQueue, cl_mem image, const void *fillColor, const size_t *origin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueCopyImage( cl_command_queue commandQueue, cl_mem srcImage, cl_mem dstImage, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueCopyImageToBuffer( cl_command_queue commandQueue, cl_mem srcImage, cl_mem dstBuffer, const size_t *srcOrigin, const size_t *region, size_t dstOffset, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueCopyBufferToImage( cl_command_queue commandQueue, cl_mem srcBuffer, cl_mem dstImage, size_t srcOffset, const size_t *dstOrigin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); void *CL_API_CALL clEnqueueMapBuffer( cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingMap, cl_map_flags mapFlags, size_t offset, size_t cb, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, cl_int *errcodeRet); void *CL_API_CALL clEnqueueMapImage( cl_command_queue commandQueue, cl_mem image, cl_bool blockingMap, cl_map_flags mapFlags, const size_t *origin, const size_t *region, size_t *imageRowPitch, size_t *imageSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, cl_int *errcodeRet); cl_int CL_API_CALL clEnqueueUnmapMemObject( cl_command_queue commandQueue, cl_mem memobj, void *mappedPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueMigrateMemObjects( cl_command_queue commandQueue, cl_uint numMemObjects, const cl_mem *memObjects, cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueNDRangeKernel( cl_command_queue commandQueue, cl_kernel kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *globalWorkSize, const size_t *localWorkSize, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueTask( cl_command_queue commandQueue, cl_kernel kernel, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueNativeKernel( cl_command_queue commandQueue, void(CL_CALLBACK *userFunc)(void *), void *args, size_t cbArgs, cl_uint numMemObjects, const cl_mem *memList, const void **argsMemLoc, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); // deprecated OpenCL 1.1 cl_int CL_API_CALL clEnqueueMarker( cl_command_queue commandQueue, cl_event *event); // deprecated OpenCL 1.1 cl_int CL_API_CALL clEnqueueWaitForEvents( cl_command_queue commandQueue, cl_uint numEvents, const cl_event *eventList); // deprecated OpenCL 1.1 cl_int CL_API_CALL clEnqueueBarrier( cl_command_queue commandQueue); cl_int CL_API_CALL clEnqueueMarkerWithWaitList( cl_command_queue commandQueue, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueBarrierWithWaitList( cl_command_queue commandQueue, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); // deprecated OpenCL 1.1 void *CL_API_CALL clGetExtensionFunctionAddress( const char *funcName); void *CL_API_CALL clGetExtensionFunctionAddressForPlatform( cl_platform_id platform, const char *funcName); // CL-GL Sharing cl_mem CL_API_CALL clCreateFromGLBuffer( cl_context context, cl_mem_flags flags, cl_GLuint bufobj, int *errcodeRet); // OpenCL 1.2 cl_mem CL_API_CALL clCreateFromGLTexture( cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int *errcodeRet); // deprecated OpenCL 1.1 cl_mem CL_API_CALL clCreateFromGLTexture2D( cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int *errcodeRet); // deprecated OpenCL 1.1 cl_mem CL_API_CALL clCreateFromGLTexture3D( cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int *errcodeRet); cl_mem CL_API_CALL clCreateFromGLRenderbuffer( cl_context context, cl_mem_flags flags, cl_GLuint renderbuffer, cl_int *errcodeRet); cl_int CL_API_CALL clGetGLObjectInfo( cl_mem memobj, cl_gl_object_type *glObjectType, cl_GLuint *glObjectName); cl_int CL_API_CALL clGetGLTextureInfo( cl_mem memobj, cl_gl_texture_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int CL_API_CALL clEnqueueAcquireGLObjects( cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueReleaseGLObjects( cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); // OpenCL 2.0 void *CL_API_CALL clSVMAlloc( cl_context context, cl_svm_mem_flags flags, size_t size, cl_uint alignment); void CL_API_CALL clSVMFree( cl_context context, void *svmPointer); cl_int CL_API_CALL clEnqueueSVMFree( cl_command_queue commandQueue, cl_uint numSvmPointers, void *svmPointers[], void(CL_CALLBACK *pfnFreeFunc)( cl_command_queue queue, cl_uint numSvmPointers, void *svmPointers[], void *userData), void *userData, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueSVMMemcpy( cl_command_queue commandQueue, cl_bool blockingCopy, void *dstPtr, const void *srcPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueSVMMemFill( cl_command_queue commandQueue, void *svmPtr, const void *pattern, size_t patternSize, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueSVMMap( cl_command_queue commandQueue, cl_bool blockingMap, cl_map_flags mapFlags, void *svmPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clEnqueueSVMUnmap( cl_command_queue commandQueue, void *svmPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int CL_API_CALL clSetKernelArgSVMPointer( cl_kernel kernel, cl_uint argIndex, const void *argValue); cl_int CL_API_CALL clSetKernelExecInfo( cl_kernel kernel, cl_kernel_exec_info paramName, size_t paramValueSize, const void *paramValue); cl_mem CL_API_CALL clCreatePipe( cl_context context, cl_mem_flags flags, cl_uint pipePacketSize, cl_uint pipeMaxPackets, const cl_pipe_properties *properties, cl_int *errcodeRet); cl_int CL_API_CALL clGetPipeInfo( cl_mem pipe, cl_pipe_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_command_queue CL_API_CALL clCreateCommandQueueWithProperties( cl_context context, cl_device_id device, const cl_queue_properties *properties, cl_int *errcodeRet); cl_command_queue CL_API_CALL clCreateCommandQueueWithPropertiesKHR( cl_context context, cl_device_id device, const cl_queue_properties_khr *properties, cl_int *errcodeRet); cl_sampler CL_API_CALL clCreateSamplerWithProperties( cl_context context, const cl_sampler_properties *samplerProperties, cl_int *errcodeRet); cl_int CL_API_CALL clEnqueueVerifyMemoryINTEL( cl_command_queue commandQueue, const void *allocationPtr, const void *expectedData, size_t sizeOfComparison, cl_uint comparisonMode); cl_int CL_API_CALL clAddCommentINTEL(cl_device_id device, const char *comment); // OpenCL 2.1 cl_int CL_API_CALL clGetDeviceAndHostTimer(cl_device_id device, cl_ulong *deviceTimestamp, cl_ulong *hostTimestamp); cl_int CL_API_CALL clGetHostTimer(cl_device_id device, cl_ulong *hostTimestamp); cl_int CL_API_CALL clGetKernelSubGroupInfo(cl_kernel kernel, cl_device_id device, cl_kernel_sub_group_info paramName, size_t inputValueSize, const void *inputValue, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int CL_API_CALL clSetDefaultDeviceCommandQueue(cl_context context, cl_device_id device, cl_command_queue commandQueue); cl_int CL_API_CALL clEnqueueSVMMigrateMem(cl_command_queue commandQueue, cl_uint numSvmPointers, const void **svmPointers, const size_t *sizes, const cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_kernel CL_API_CALL clCloneKernel(cl_kernel sourceKernel, cl_int *errcodeRet); extern CL_API_ENTRY cl_command_queue CL_API_CALL clCreatePerfCountersCommandQueueINTEL( cl_context context, cl_device_id device, cl_command_queue_properties properties, cl_uint configuration, cl_int *errcodeRet); extern CL_API_ENTRY cl_int CL_API_CALL clSetPerformanceConfigurationINTEL( cl_device_id device, cl_uint count, cl_uint *offsets, cl_uint *values); extern CL_API_ENTRY cl_event CL_API_CALL clCreateEventFromGLsyncKHR( cl_context context, cl_GLsync sync, cl_int *errcodeRet) CL_EXT_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithILKHR( cl_context context, const void *il, size_t length, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_2; void *clHostMemAllocINTEL( cl_context context, cl_mem_properties_intel *properties, size_t size, cl_uint alignment, cl_int *errcodeRet); void *clDeviceMemAllocINTEL( cl_context context, cl_device_id device, cl_mem_properties_intel *properties, size_t size, cl_uint alignment, cl_int *errcodeRet); void *clSharedMemAllocINTEL( cl_context context, cl_device_id device, cl_mem_properties_intel *properties, size_t size, cl_uint alignment, cl_int *errcodeRet); cl_int clMemFreeINTEL( cl_context context, const void *ptr); cl_int clMemBlockingFreeINTEL( cl_context context, void *ptr); cl_int clGetMemAllocInfoINTEL( cl_context context, const void *ptr, cl_mem_info_intel paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int clSetKernelArgMemPointerINTEL( cl_kernel kernel, cl_uint argIndex, const void *argValue); cl_int clEnqueueMemsetINTEL( cl_command_queue commandQueue, void *dstPtr, cl_int value, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int clEnqueueMemFillINTEL( cl_command_queue commandQueue, void *dstPtr, const void *pattern, size_t patternSize, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int clEnqueueMemcpyINTEL( cl_command_queue commandQueue, cl_bool blocking, void *dstPtr, const void *srcPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int clEnqueueMigrateMemINTEL( cl_command_queue commandQueue, const void *ptr, size_t size, cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int clEnqueueMemAdviseINTEL( cl_command_queue commandQueue, const void *ptr, size_t size, cl_mem_advice_intel advice, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); } cl_int CL_API_CALL clGetDeviceFunctionPointerINTEL( cl_device_id device, cl_program program, const char *functionName, cl_ulong *functionPointerRet); cl_int CL_API_CALL clGetDeviceGlobalVariablePointerINTEL( cl_device_id device, cl_program program, const char *globalVariableName, size_t *globalVariableSizeRet, void **globalVariablePointerRet); cl_int CL_API_CALL clGetKernelSuggestedLocalWorkSizeINTEL( cl_command_queue commandQueue, cl_kernel kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *globalWorkSize, size_t *suggestedLocalWorkSize); cl_int CL_API_CALL clGetKernelMaxConcurrentWorkGroupCountINTEL( cl_command_queue commandQueue, cl_kernel kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *localWorkSize, size_t *suggestedWorkGroupCount); cl_int CL_API_CALL clEnqueueNDCountKernelINTEL( cl_command_queue commandQueue, cl_kernel kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *workgroupCount, const size_t *localWorkSize, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); // OpenCL 2.2 cl_int CL_API_CALL clSetProgramSpecializationConstant( cl_program program, cl_uint specId, size_t specSize, const void *specValue); compute-runtime-20.13.16352/opencl/source/api/api_enter.h000066400000000000000000000010141363734646600230210ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/utilities/perf_profiler.h" #include "opencl/source/utilities/logger.h" #define API_ENTER(retValPointer) \ LoggerApiEnterWrapper::enabled()> ApiWrapperForSingleCall(__FUNCTION__, retValPointer) #if KMD_PROFILING == 1 #undef API_ENTER #define API_ENTER(x) \ PerfProfilerApiWrapper globalPerfProfilersWrapperInstanceForSingleApiFunction(__FUNCTION__) #endif compute-runtime-20.13.16352/opencl/source/api/cl_types.h000066400000000000000000000020401363734646600226750ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/api/dispatch.h" #include struct ClDispatch { SEntryPointsTable dispatch; ClDispatch() : dispatch(globalDispatchTable) { } }; struct _cl_accelerator_intel : public ClDispatch { }; struct _cl_command_queue : public ClDispatch { }; // device_queue is a type used internally struct _device_queue : public _cl_command_queue { }; typedef _device_queue *device_queue; struct _cl_context : public ClDispatch { bool isSharedContext = false; }; struct _cl_device_id : public ClDispatch { }; struct _cl_event : public ClDispatch { }; struct _cl_kernel : public ClDispatch { }; struct _cl_mem : public ClDispatch { }; struct _cl_platform_id : public ClDispatch { }; struct _cl_program : public ClDispatch { }; struct _cl_sampler : public ClDispatch { }; template inline bool isValidObject(Type object) { return object && object->dispatch.icdDispatch == &icdGlobalDispatchTable; } compute-runtime-20.13.16352/opencl/source/api/dispatch.cpp000066400000000000000000000134261363734646600232170ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "dispatch.h" #include "api.h" SDispatchTable icdGlobalDispatchTable = { clGetPlatformIDs, clGetPlatformInfo, clGetDeviceIDs, clGetDeviceInfo, clCreateContext, clCreateContextFromType, clRetainContext, clReleaseContext, clGetContextInfo, clCreateCommandQueue, clRetainCommandQueue, clReleaseCommandQueue, clGetCommandQueueInfo, clSetCommandQueueProperty, clCreateBuffer, clCreateImage2D, clCreateImage3D, clRetainMemObject, clReleaseMemObject, clGetSupportedImageFormats, clGetMemObjectInfo, clGetImageInfo, clCreateSampler, clRetainSampler, clReleaseSampler, clGetSamplerInfo, clCreateProgramWithSource, clCreateProgramWithBinary, clRetainProgram, clReleaseProgram, clBuildProgram, clUnloadCompiler, clGetProgramInfo, clGetProgramBuildInfo, clCreateKernel, clCreateKernelsInProgram, clRetainKernel, clReleaseKernel, clSetKernelArg, clGetKernelInfo, clGetKernelWorkGroupInfo, clWaitForEvents, clGetEventInfo, clRetainEvent, clReleaseEvent, clGetEventProfilingInfo, clFlush, clFinish, clEnqueueReadBuffer, clEnqueueWriteBuffer, clEnqueueCopyBuffer, clEnqueueReadImage, clEnqueueWriteImage, clEnqueueCopyImage, clEnqueueCopyImageToBuffer, clEnqueueCopyBufferToImage, clEnqueueMapBuffer, clEnqueueMapImage, clEnqueueUnmapMemObject, clEnqueueNDRangeKernel, clEnqueueTask, clEnqueueNativeKernel, clEnqueueMarker, clEnqueueWaitForEvents, clEnqueueBarrier, clGetExtensionFunctionAddress, /* cl_khr_gl_sharing */ nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, /* cl_khr_d3d10_sharing */ nullptr, // clGetDeviceIDsFromD3D10KHR, nullptr, // clCreateFromD3D10BufferKHR, nullptr, // clCreateFromD3D10Texture2DKHR, nullptr, // clCreateFromD3D10Texture3DKHR, nullptr, // clEnqueueAcquireD3D10ObjectsKHR, nullptr, // clEnqueueReleaseD3D10ObjectsKHR, /* OpenCL 1.1 */ clSetEventCallback, clCreateSubBuffer, clSetMemObjectDestructorCallback, clCreateUserEvent, clSetUserEventStatus, clEnqueueReadBufferRect, clEnqueueWriteBufferRect, clEnqueueCopyBufferRect, /* cl_ext_device_fission */ nullptr, //clCreateSubDevicesEXT, nullptr, //clRetainDeviceEXT, nullptr, //clReleaseDeviceEXT, /* cl_khr_gl_event */ nullptr, /* OpenCL 1.2 */ clCreateSubDevices, clRetainDevice, clReleaseDevice, clCreateImage, clCreateProgramWithBuiltInKernels, clCompileProgram, clLinkProgram, clUnloadPlatformCompiler, clGetKernelArgInfo, clEnqueueFillBuffer, clEnqueueFillImage, clEnqueueMigrateMemObjects, clEnqueueMarkerWithWaitList, clEnqueueBarrierWithWaitList, clGetExtensionFunctionAddressForPlatform, nullptr, /* cl_khr_d3d11_sharing */ nullptr, // clGetDeviceIDsFromD3D11KHR, nullptr, // clCreateFromD3D11BufferKHR, nullptr, // clCreateFromD3D11Texture2DKHR, nullptr, // clCreateFromD3D11Texture3DKHR, nullptr, // clCreateFromDX9MediaSurfaceKHR, nullptr, // clEnqueueAcquireD3D11ObjectsKHR, nullptr, // clEnqueueReleaseD3D11ObjectsKHR, /* cl_khr_dx9_media_sharing */ nullptr, // clGetDeviceIDsFromDX9MediaAdapterKHR, nullptr, // clEnqueueAcquireDX9MediaSurfacesKHR, nullptr, // clEnqueueReleaseDX9MediaSurfacesKHR, /* cl_khr_egl_image */ nullptr, //clCreateFromEGLImageKHR, nullptr, //clEnqueueAcquireEGLObjectsKHR, nullptr, //clEnqueueReleaseEGLObjectsKHR, /* cl_khr_egl_event */ nullptr, //clCreateEventFromEGLSyncKHR, /* OpenCL 2.0 */ clCreateCommandQueueWithProperties, clCreatePipe, clGetPipeInfo, clSVMAlloc, clSVMFree, clEnqueueSVMFree, clEnqueueSVMMemcpy, clEnqueueSVMMemFill, clEnqueueSVMMap, clEnqueueSVMUnmap, clCreateSamplerWithProperties, clSetKernelArgSVMPointer, clSetKernelExecInfo, clGetKernelSubGroupInfoKHR, /* OpenCL 2.1 */ clCloneKernel, clCreateProgramWithIL, clEnqueueSVMMigrateMem, clGetDeviceAndHostTimer, clGetHostTimer, clGetKernelSubGroupInfo, clSetDefaultDeviceCommandQueue, /* OpenCL 2.2 */ nullptr, // clSetProgramReleaseCallback clSetProgramSpecializationConstant}; SCRTDispatchTable crtGlobalDispatchTable = { clGetKernelArgInfo, nullptr, // clGetDeviceIDsFromDX9INTEL, nullptr, // clCreateFromDX9MediaSurfaceINTEL, nullptr, // clEnqueueAcquireDX9ObjectsINTEL, nullptr, // clEnqueueReleaseDX9ObjectsINTEL, clGetImageParamsINTEL, clCreatePerfCountersCommandQueueINTEL, clCreateAcceleratorINTEL, clGetAcceleratorInfoINTEL, clRetainAcceleratorINTEL, clReleaseAcceleratorINTEL, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, clSetPerformanceConfigurationINTEL}; SEntryPointsTable globalDispatchTable = {&icdGlobalDispatchTable, &crtGlobalDispatchTable}; compute-runtime-20.13.16352/opencl/source/api/dispatch.h000066400000000000000000001341631363734646600226660ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl.h" #include "CL/cl_ext.h" #include "CL/cl_ext_intel.h" #include "CL/cl_gl.h" #include "CL/cl_gl_ext.h" #if defined(_WIN32) #include #include "CL/cl_d3d10.h" #include "CL/cl_d3d11.h" #include "CL/cl_dx9_media_sharing.h" #define CL_DX9_MEDIA_SHARING_INTEL_EXT #include "shared/source/os_interface/windows/windows_wrapper.h" #include "CL/cl_dx9_media_sharing_intel.h" #else #define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014 #define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C #define CL_MEM_D3D10_RESOURCE_KHR 0x4015 typedef cl_uint cl_dx9_device_source_intel; typedef cl_uint cl_dx9_device_set_intel; typedef cl_uint cl_dx9_media_adapter_type_khr; typedef cl_uint cl_dx9_media_adapter_set_khr; typedef cl_uint cl_d3d10_device_source_khr; typedef cl_uint cl_d3d10_device_set_khr; typedef void *IDirect3DSurface9; typedef void *ID3D10Buffer; typedef void *ID3D10Texture2D; typedef void *ID3D10Texture3D; typedef unsigned int UINT; typedef cl_uint cl_d3d11_device_source_khr; typedef cl_uint cl_d3d11_device_set_khr; typedef void *ID3D11Buffer; typedef void *ID3D11Texture2D; typedef void *ID3D11Texture3D; typedef void *HANDLE; #endif typedef cl_bitfield cl_queue_properties_khr; typedef void(CL_CALLBACK *ctxt_logging_fn)(const char *, const void *, size_t, void *); typedef void(CL_CALLBACK *prog_logging_fn)(cl_program, void *); typedef void(CL_CALLBACK *evnt_logging_fn)(cl_event, cl_int, void *); typedef void(CL_CALLBACK *memobj_logging_fn)(cl_mem, void *); typedef void(CL_CALLBACK *svmfree_logging_fn)(cl_command_queue, cl_uint, void *[], void *); /* * * function pointer typedefs * */ // Platform APIs typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetPlatformIDs)( cl_uint numEntries, cl_platform_id *platforms, cl_uint *numPlatforms) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetPlatformInfo)( cl_platform_id platform, cl_platform_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; // Device APIs typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetDeviceIDs)( cl_platform_id platform, cl_device_type deviceType, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetDeviceInfo)( cl_device_id device, cl_device_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; // Context APIs typedef CL_API_ENTRY cl_context(CL_API_CALL *KHRpfn_clCreateContext)( const cl_context_properties *properties, cl_uint numDevices, const cl_device_id *devices, ctxt_logging_fn funcNotify, void *userData, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_context(CL_API_CALL *KHRpfn_clCreateContextFromType)( const cl_context_properties *properties, cl_device_type deviceType, ctxt_logging_fn funcNotify, void *userData, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clRetainContext)( cl_context context) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clReleaseContext)( cl_context context) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetContextInfo)( cl_context context, cl_context_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; // Command Queue APIs typedef CL_API_ENTRY cl_command_queue(CL_API_CALL *KHRpfn_clCreateCommandQueue)( cl_context context, cl_device_id device, cl_command_queue_properties properties, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clRetainCommandQueue)( cl_command_queue commandQueue) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clReleaseCommandQueue)( cl_command_queue commandQueue) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetCommandQueueInfo)( cl_command_queue commandQueue, cl_command_queue_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clSetCommandQueueProperty)( cl_command_queue commandQueue, cl_command_queue_properties properties, cl_bool enable, cl_command_queue_properties *oldProperties) CL_API_SUFFIX__VERSION_1_0; // Memory Object APIs typedef CL_API_ENTRY cl_mem(CL_API_CALL *KHRpfn_clCreateBuffer)( cl_context context, cl_mem_flags flags, size_t size, void *hostPtr, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem(CL_API_CALL *KHRpfn_clCreateImage2D)( cl_context context, cl_mem_flags flags, const cl_image_format *imageFormat, size_t imageWidth, size_t imageHeight, size_t imageRowPitch, void *hostPtr, cl_int *errcodeRet) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; typedef CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem(CL_API_CALL *KHRpfn_clCreateImage3D)( cl_context context, cl_mem_flags flags, const cl_image_format *imageFormat, size_t imageWidth, size_t imageHeight, size_t imageDepth, size_t imageRowPitch, size_t imageSlicePitch, void *hostPtr, cl_int *errcodeRet) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clRetainMemObject)( cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clReleaseMemObject)( cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetSupportedImageFormats)( cl_context context, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint numEntries, cl_image_format *imageFormats, cl_uint *numImageFormats) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetMemObjectInfo)( cl_mem memobj, cl_mem_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetImageInfo)( cl_mem image, cl_image_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; // Sampler APIs typedef CL_API_ENTRY cl_sampler(CL_API_CALL *KHRpfn_clCreateSampler)( cl_context context, cl_bool normalizedCoords, cl_addressing_mode addressingMode, cl_filter_mode filterMode, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_sampler(CL_API_CALL *KHRpfn_clCreateSamplerWithProperties)( cl_context context, const cl_sampler_properties *samplerProperties, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_mem(CL_API_CALL *KHRpfn_clCreatePipe)( cl_context context, cl_mem_flags flags, cl_uint pipePacketSize, cl_uint pipeMaxPackets, const cl_pipe_properties *properties, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetPipeInfo)( cl_mem image, cl_pipe_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clRetainSampler)( cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clReleaseSampler)( cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetSamplerInfo)( cl_sampler sampler, cl_sampler_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; // Program Object APIs typedef CL_API_ENTRY cl_program(CL_API_CALL *KHRpfn_clCreateProgramWithSource)( cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_program(CL_API_CALL *KHRpfn_clCreateProgramWithBinary)( cl_context context, cl_uint numDevices, const cl_device_id *deviceList, const size_t *lengths, const unsigned char **binaries, cl_int *binaryStatus, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clRetainProgram)( cl_program program) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clReleaseProgram)( cl_program program) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clBuildProgram)( cl_program program, cl_uint numDevices, const cl_device_id *deviceList, const char *options, prog_logging_fn funcNotify, void *userData) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int(CL_API_CALL *KHRpfn_clUnloadCompiler)() CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetProgramInfo)( cl_program program, cl_program_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetProgramBuildInfo)( cl_program program, cl_device_id device, cl_program_build_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; // Kernel Object APIs typedef CL_API_ENTRY cl_kernel(CL_API_CALL *KHRpfn_clCreateKernel)( cl_program program, const char *kernelName, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clCreateKernelsInProgram)( cl_program program, cl_uint numKernels, cl_kernel *kernels, cl_uint *numKernelsRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clRetainKernel)( cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clReleaseKernel)( cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clSetKernelArg)( cl_kernel kernel, cl_uint argIndex, size_t argSize, const void *argValue) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetKernelInfo)( cl_kernel kernel, cl_kernel_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetKernelWorkGroupInfo)( cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; // Event Object APIs typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clWaitForEvents)( cl_uint numEvents, const cl_event *eventList) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetEventInfo)( cl_event event, cl_event_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clRetainEvent)( cl_event event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clReleaseEvent)( cl_event event) CL_API_SUFFIX__VERSION_1_0; // Profiling APIs typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetEventProfilingInfo)( cl_event event, cl_profiling_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; // Flush and Finish APIs typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clFlush)( cl_command_queue commandQueue) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clFinish)( cl_command_queue commandQueue) CL_API_SUFFIX__VERSION_1_0; // Enqueued Commands APIs typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueReadBuffer)( cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingRead, size_t offset, size_t cb, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueWriteBuffer)( cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingWrite, size_t offset, size_t cb, const void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueCopyBuffer)( cl_command_queue commandQueue, cl_mem srcBuffer, cl_mem dstBuffer, size_t srcOffset, size_t dstOffset, size_t cb, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueReadImage)( cl_command_queue commandQueue, cl_mem image, cl_bool blockingRead, const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueWriteImage)( cl_command_queue commandQueue, cl_mem image, cl_bool blockingWrite, const size_t *origin, const size_t *region, size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueCopyImage)( cl_command_queue commandQueue, cl_mem srcImage, cl_mem dstImage, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueCopyImageToBuffer)( cl_command_queue commandQueue, cl_mem srcImage, cl_mem dstBuffer, const size_t *srcOrigin, const size_t *region, size_t dstOffset, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueCopyBufferToImage)( cl_command_queue commandQueue, cl_mem srcBuffer, cl_mem dstImage, size_t srcOffset, const size_t *dstOrigin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY void *(CL_API_CALL *KHRpfn_clEnqueueMapBuffer)( cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingMap, cl_map_flags mapFlags, size_t offset, size_t cb, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, cl_int *errcodeRet)CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY void *(CL_API_CALL *KHRpfn_clEnqueueMapImage)( cl_command_queue commandQueue, cl_mem image, cl_bool blockingMap, cl_map_flags mapFlags, const size_t *origin, const size_t *region, size_t *imageRowPitch, size_t *imageSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, cl_int *errcodeRet)CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueUnmapMemObject)( cl_command_queue commandQueue, cl_mem memobj, void *mappedPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueNDRangeKernel)( cl_command_queue commandQueue, cl_kernel kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *globalWorkSize, const size_t *localWorkSize, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueTask)( cl_command_queue commandQueue, cl_kernel kernel, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueNativeKernel)( cl_command_queue commandQueue, void(CL_CALLBACK *userFunc)(void *), void *args, size_t cbArgs, cl_uint numMemObjects, const cl_mem *memList, const void **argsMemLoc, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int(CL_API_CALL *KHRpfn_clEnqueueMarker)( cl_command_queue commandQueue, cl_event *event) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; typedef CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int(CL_API_CALL *KHRpfn_clEnqueueWaitForEvents)( cl_command_queue commandQueue, cl_uint numEvents, const cl_event *eventList) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; typedef CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int(CL_API_CALL *KHRpfn_clEnqueueBarrier)( cl_command_queue commandQueue) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; typedef CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED void *(CL_API_CALL *KHRpfn_clGetExtensionFunctionAddress)( const char *functionName)CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; typedef CL_API_ENTRY cl_mem(CL_API_CALL *KHRpfn_clCreateFromGLBuffer)( cl_context context, cl_mem_flags flags, cl_GLuint bufobj, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_mem(CL_API_CALL *KHRpfn_clCreateFromGLTexture)( cl_context context, cl_mem_flags flags, cl_GLenum textureTarget, cl_GLint miplevel, cl_GLuint texture, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem(CL_API_CALL *KHRpfn_clCreateFromGLTexture2D)( cl_context context, cl_mem_flags flags, cl_GLenum textureTarget, cl_GLint miplevel, cl_GLuint texture, cl_int *errcodeRet) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; typedef CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem(CL_API_CALL *KHRpfn_clCreateFromGLTexture3D)( cl_context context, cl_mem_flags flags, cl_GLenum textureTarget, cl_GLint miplevel, cl_GLuint texture, cl_int *errcodeRet) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; typedef CL_API_ENTRY cl_mem(CL_API_CALL *KHRpfn_clCreateFromGLRenderbuffer)( cl_context context, cl_mem_flags flags, cl_GLuint renderbuffer, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetGLObjectInfo)( cl_mem memobj, cl_gl_object_type *glObjectType, cl_GLuint *glObjectName) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetGLTextureInfo)( cl_mem memobj, cl_gl_texture_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueAcquireGLObjects)( cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueReleaseGLObjects)( cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; /* cl_khr_gl_sharing */ typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetGLContextInfoKHR)( const cl_context_properties *properties, cl_gl_context_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_0; #ifndef _WIN32 typedef void *clGetDeviceIDsFromDX9INTEL_fn; typedef void *clCreateFromDX9MediaSurfaceINTEL_fn; typedef void *clEnqueueAcquireDX9ObjectsINTEL_fn; typedef void *clEnqueueReleaseDX9ObjectsINTEL_fn; typedef void *clGetDeviceIDsFromDX9MediaAdapterKHR_fn; typedef void *clCreateFromDX9MediaSurfaceKHR_fn; typedef void *clEnqueueAcquireDX9MediaSurfacesKHR_fn; typedef void *clEnqueueReleaseDX9MediaSurfacesKHR_fn; typedef void *clGetDeviceIDsFromD3D10KHR_fn; typedef void *clCreateFromD3D10BufferKHR_fn; typedef void *clCreateFromD3D10Texture2DKHR_fn; typedef void *clCreateFromD3D10Texture3DKHR_fn; typedef void *clEnqueueAcquireD3D10ObjectsKHR_fn; typedef void *clEnqueueReleaseD3D10ObjectsKHR_fn; typedef void *clGetDeviceIDsFromD3D11KHR_fn; typedef void *clCreateFromD3D11BufferKHR_fn; typedef void *clCreateFromD3D11Texture2DKHR_fn; typedef void *clCreateFromD3D11Texture3DKHR_fn; typedef void *clEnqueueAcquireD3D11ObjectsKHR_fn; typedef void *clEnqueueReleaseD3D11ObjectsKHR_fn; #endif /* OpenCL 1.1 */ /* cl_kgr_gl_event */ typedef CL_API_ENTRY cl_event(CL_API_CALL *KHRpfn_clCreateEventFromGLsyncKHR)( cl_context context, cl_GLsync sync, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clSetEventCallback)( cl_event event, cl_int commandExecCallbackType, evnt_logging_fn pfnNotify, void *userData) CL_API_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_mem(CL_API_CALL *KHRpfn_clCreateSubBuffer)( cl_mem buffer, cl_mem_flags flags, cl_buffer_create_type bufferCreateType, const void *bufferCreateInfo, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clSetMemObjectDestructorCallback)( cl_mem memobj, memobj_logging_fn pfnNotify, void *userData) CL_API_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_event(CL_API_CALL *KHRpfn_clCreateUserEvent)( cl_context context, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clSetUserEventStatus)( cl_event event, cl_int executionStatus) CL_API_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueReadBufferRect)( cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingRead, const size_t *bufferOffset, const size_t *hostOffset, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueWriteBufferRect)( cl_command_queue commandQueue, cl_mem buffer, cl_bool blockingRead, const size_t *bufferOffset, const size_t *hostOffset, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, const void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_1; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueCopyBufferRect)( cl_command_queue commandQueue, cl_mem srcBuffer, cl_mem dstBuffer, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_1; /*OpenCL 1.2*/ typedef CL_API_ENTRY cl_mem(CL_API_CALL *KHRpfn_clCreateImage)( cl_context context, cl_mem_flags flags, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, void *hostPtr, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetKernelArgInfo)( cl_kernel kernel, cl_uint argIndx, cl_kernel_arg_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueFillBuffer)( cl_command_queue commandQueue, cl_mem buffer, const void *pattern, size_t patternSize, size_t offset, size_t cb, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueFillImage)( cl_command_queue commandQueue, cl_mem image, const void *fillColor, const size_t *origin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueMigrateMemObjects)( cl_command_queue commandQueue, cl_uint numMemObjects, const cl_mem *memObjects, cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueMarkerWithWaitList)( cl_command_queue commandQueue, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueBarrierWithWaitList)( cl_command_queue commandQueue, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clCreateSubDevices)( cl_device_id inDevice, const cl_device_partition_property *properties, cl_uint numEntries, cl_device_id *outDevices, cl_uint *numDevices) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clRetainDevice)( cl_device_id device) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clReleaseDevice)( cl_device_id device) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_program(CL_API_CALL *KHRpfn_clCreateProgramWithBuiltInKernels)( cl_context context, cl_uint numDevices, const cl_device_id *deviceList, const char *kernelNames, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clCompileProgram)( cl_program program, cl_uint numDevices, const cl_device_id *deviceList, const char *options, cl_uint numInputHeaders, const cl_program *inputHeaders, const char **headerIncludeNames, void(CL_CALLBACK *pfnNotify)(cl_program program, void *userData), void *userData) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_program(CL_API_CALL *KHRpfn_clLinkProgram)( cl_context context, cl_uint numDevices, const cl_device_id *deviceList, const char *options, cl_uint numInputPrograms, const cl_program *inputPrograms, void(CL_CALLBACK *pfnNotify)(cl_program program, void *userData), void *userData, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clUnloadPlatformCompiler)( cl_platform_id platform) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY void *(CL_API_CALL *KHRpfn_clGetExtensionFunctionAddressForPlatform)( cl_platform_id platform, const char *funcName)CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clCreateSubDevicesEXT)( cl_device_id inDevice, const cl_device_partition_property_ext *partitionProperties, cl_uint numEntries, cl_device_id *outDevices, cl_uint *numDevices); typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clRetainDeviceEXT)( cl_device_id device) CL_API_SUFFIX__VERSION_1_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clReleaseDeviceEXT)( cl_device_id device) CL_API_SUFFIX__VERSION_1_0; /*cl_khr_egl_image*/ typedef void *KHRpfn_clCreateFromEGLImageKHR; typedef void *KHRpfn_clEnqueueAcquireEGLObjectsKHR; typedef void *KHRpfn_clEnqueueReleaseEGLObjectsKHR; /*cl_khr_egl_event*/ typedef void *KHRpfn_clCreateEventFromEGLSyncKHR; /*OpenCL2.0*/ typedef CL_API_ENTRY cl_command_queue(CL_API_CALL *KHRpfn_clCreateCommandQueueWithProperties)( cl_context context, cl_device_id device, const cl_queue_properties *properties, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY void *(CL_API_CALL *KHRpfn_clSVMAlloc)( cl_context context, cl_svm_mem_flags flags, size_t size, cl_uint alignment)CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY void(CL_API_CALL *KHRpfn_clSVMFree)( cl_context context, void *svmPointer) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueSVMFree)( cl_command_queue commandQueue, cl_uint numSvmPointers, void *svmPointers[], void(CL_CALLBACK *pfnFreeFunc)(cl_command_queue queue, cl_uint numSvmPointers, void *svmPointers[], void *userData), void *userData, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueSVMMemcpy)( cl_command_queue commandQueue, cl_bool blockingCopy, void *dstPtr, const void *srcPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueSVMMemFill)( cl_command_queue commandQueue, void *svmPtr, const void *pattern, size_t patternSize, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueSVMMap)( cl_command_queue commandQueue, cl_bool blockingMap, cl_map_flags mapFlags, void *svmPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueSVMUnmap)( cl_command_queue commandQueue, void *svmPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clSetKernelArgSVMPointer)( cl_kernel kernel, cl_uint argIndex, const void *argValue) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clSetKernelExecInfo)( cl_kernel kernel, cl_kernel_exec_info paramName, size_t paramValueSize, const void *paramValue) CL_API_SUFFIX__VERSION_2_0; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetKernelSubGroupInfoKHR)( cl_kernel kernel, cl_device_id device, cl_kernel_sub_group_info paramName, size_t inputValueSize, const void *inputValue, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_2_0; /*OpenCL2.1*/ typedef CL_API_ENTRY cl_kernel(CL_API_CALL *KHRpfn_clCloneKernel)( cl_kernel sourceKernel, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_2_1; typedef CL_API_ENTRY cl_program(CL_API_CALL *KHRpfn_clCreateProgramWithIL)( cl_context context, const void *il, size_t length, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_2_1; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clEnqueueSVMMigrateMem)( cl_command_queue commandQueue, cl_uint numSvmPointers, const void **svmPointers, const size_t *sizes, const cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_2_1; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetDeviceAndHostTimer)( cl_device_id device, cl_ulong *deviceTimestamp, cl_ulong *hostTimestamp) CL_API_SUFFIX__VERSION_2_1; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetHostTimer)( cl_device_id device, cl_ulong *hostTimestamp) CL_API_SUFFIX__VERSION_2_1; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clGetKernelSubGroupInfo)( cl_kernel kernel, cl_device_id device, cl_kernel_sub_group_info paramName, size_t inputValueSize, const void *inputValue, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_2_1; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clSetDefaultDeviceCommandQueue)( cl_context context, cl_device_id device, cl_command_queue commandQueue) CL_API_SUFFIX__VERSION_2_1; /*OpenCL2.2*/ typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clSetProgramReleaseCallback)( cl_program program, void(CL_CALLBACK *pfnNotify)(cl_program program, void *userData), void *userData) CL_API_SUFFIX__VERSION_2_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *KHRpfn_clSetProgramSpecializationConstant)( cl_program program, cl_uint specIdd, size_t specSize, const void *specValue) CL_API_SUFFIX__VERSION_2_2; /* clCreateImage */ typedef CL_API_ENTRY cl_int(CL_API_CALL *INTELpfn_clGetImageParamsINTEL)( cl_context context, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, size_t *imageRowPitch, size_t *imageSlicePitch) CL_API_SUFFIX__VERSION_1_1; /* Performance Counter APIs */ typedef CL_API_ENTRY cl_command_queue(CL_API_CALL *INTELpfn_clCreatePerfCountersCommandQueueINTEL)( cl_context context, cl_device_id device, cl_command_queue_properties properties, cl_uint configuration, cl_int *errcodeRet); typedef CL_API_ENTRY cl_int(CL_API_CALL *INTELpfn_clSetPerformanceConfigurationINTEL)( cl_device_id device, cl_uint count, cl_uint *offsets, cl_uint *values); /* cl_intel_accelerator */ typedef CL_API_ENTRY cl_accelerator_intel(CL_API_CALL *INTELpfn_clCreateAcceleratorINTEL)( cl_context context, cl_accelerator_type_intel acceleratorType, size_t descriptorSize, const void *descriptor, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *INTELpfn_clGetAcceleratorInfoINTEL)( cl_accelerator_intel accelerator, cl_accelerator_info_intel paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *INTELpfn_clRetainAcceleratorINTEL)( cl_accelerator_intel accelerator) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *INTELpfn_clReleaseAcceleratorINTEL)( cl_accelerator_intel accelerator) CL_API_SUFFIX__VERSION_1_2; /* cl_intel_va_api_media_sharing */ #ifdef LIBVA #include "CL/cl_va_api_media_sharing_intel.h" typedef CL_API_ENTRY cl_mem(CL_API_CALL *INTELpfn_clCreateFromVA_APIMediaSurfaceINTEL)( cl_context context, cl_mem_flags flags, VASurfaceID *surface, cl_uint plane, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *INTELpfn_clGetDeviceIDsFromVA_APIMediaAdapterINTEL)( cl_platform_id platform, cl_va_api_device_source_intel mediaAdapterType, void *mediaAdapter, cl_va_api_device_set_intel mediaAdapterSet, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *INTELpfn_clEnqueueAcquireVA_APIMediaSurfacesINTEL)( cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_2; typedef CL_API_ENTRY cl_int(CL_API_CALL *INTELpfn_clEnqueueReleaseVA_APIMediaSurfacesINTEL)( cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_2; #else typedef void (*INTELpfn_clCreateFromVA_APIMediaSurfaceINTEL)(); typedef void (*INTELpfn_clGetDeviceIDsFromVA_APIMediaAdapterINTEL)(); typedef void (*INTELpfn_clEnqueueAcquireVA_APIMediaSurfacesINTEL)(); typedef void (*INTELpfn_clEnqueueReleaseVA_APIMediaSurfacesINTEL)(); #endif /* * * vendor dispatch table structure * * note that the types in the structure KHRicdVendorDispatch mirror the function * names listed in the string table khrIcdVendorDispatchFunctionNames * */ struct SDispatchTable { KHRpfn_clGetPlatformIDs clGetPlatformIDs; KHRpfn_clGetPlatformInfo clGetPlatformInfo; KHRpfn_clGetDeviceIDs clGetDeviceIDs; KHRpfn_clGetDeviceInfo clGetDeviceInfo; KHRpfn_clCreateContext clCreateContext; KHRpfn_clCreateContextFromType clCreateContextFromType; KHRpfn_clRetainContext clRetainContext; KHRpfn_clReleaseContext clReleaseContext; KHRpfn_clGetContextInfo clGetContextInfo; KHRpfn_clCreateCommandQueue clCreateCommandQueue; KHRpfn_clRetainCommandQueue clRetainCommandQueue; KHRpfn_clReleaseCommandQueue clReleaseCommandQueue; KHRpfn_clGetCommandQueueInfo clGetCommandQueueInfo; KHRpfn_clSetCommandQueueProperty clSetCommandQueueProperty; KHRpfn_clCreateBuffer clCreateBuffer; KHRpfn_clCreateImage2D clCreateImage2D; KHRpfn_clCreateImage3D clCreateImage3D; KHRpfn_clRetainMemObject clRetainMemObject; KHRpfn_clReleaseMemObject clReleaseMemObject; KHRpfn_clGetSupportedImageFormats clGetSupportedImageFormats; KHRpfn_clGetMemObjectInfo clGetMemObjectInfo; KHRpfn_clGetImageInfo clGetImageInfo; KHRpfn_clCreateSampler clCreateSampler; KHRpfn_clRetainSampler clRetainSampler; KHRpfn_clReleaseSampler clReleaseSampler; KHRpfn_clGetSamplerInfo clGetSamplerInfo; KHRpfn_clCreateProgramWithSource clCreateProgramWithSource; KHRpfn_clCreateProgramWithBinary clCreateProgramWithBinary; KHRpfn_clRetainProgram clRetainProgram; KHRpfn_clReleaseProgram clReleaseProgram; KHRpfn_clBuildProgram clBuildProgram; KHRpfn_clUnloadCompiler clUnloadCompiler; KHRpfn_clGetProgramInfo clGetProgramInfo; KHRpfn_clGetProgramBuildInfo clGetProgramBuildInfo; KHRpfn_clCreateKernel clCreateKernel; KHRpfn_clCreateKernelsInProgram clCreateKernelsInProgram; KHRpfn_clRetainKernel clRetainKernel; KHRpfn_clReleaseKernel clReleaseKernel; KHRpfn_clSetKernelArg clSetKernelArg; KHRpfn_clGetKernelInfo clGetKernelInfo; KHRpfn_clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo; KHRpfn_clWaitForEvents clWaitForEvents; KHRpfn_clGetEventInfo clGetEventInfo; KHRpfn_clRetainEvent clRetainEvent; KHRpfn_clReleaseEvent clReleaseEvent; KHRpfn_clGetEventProfilingInfo clGetEventProfilingInfo; KHRpfn_clFlush clFlush; KHRpfn_clFinish clFinish; KHRpfn_clEnqueueReadBuffer clEnqueueReadBuffer; KHRpfn_clEnqueueWriteBuffer clEnqueueWriteBuffer; KHRpfn_clEnqueueCopyBuffer clEnqueueCopyBuffer; KHRpfn_clEnqueueReadImage clEnqueueReadImage; KHRpfn_clEnqueueWriteImage clEnqueueWriteImage; KHRpfn_clEnqueueCopyImage clEnqueueCopyImage; KHRpfn_clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer; KHRpfn_clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage; KHRpfn_clEnqueueMapBuffer clEnqueueMapBuffer; KHRpfn_clEnqueueMapImage clEnqueueMapImage; KHRpfn_clEnqueueUnmapMemObject clEnqueueUnmapMemObject; KHRpfn_clEnqueueNDRangeKernel clEnqueueNDRangeKernel; KHRpfn_clEnqueueTask clEnqueueTask; KHRpfn_clEnqueueNativeKernel clEnqueueNativeKernel; KHRpfn_clEnqueueMarker clEnqueueMarker; KHRpfn_clEnqueueWaitForEvents clEnqueueWaitForEvents; KHRpfn_clEnqueueBarrier clEnqueueBarrier; KHRpfn_clGetExtensionFunctionAddress clGetExtensionFunctionAddress; KHRpfn_clCreateFromGLBuffer clCreateFromGLBuffer; KHRpfn_clCreateFromGLTexture2D clCreateFromGLTexture2D; KHRpfn_clCreateFromGLTexture3D clCreateFromGLTexture3D; KHRpfn_clCreateFromGLRenderbuffer clCreateFromGLRenderbuffer; KHRpfn_clGetGLObjectInfo clGetGLObjectInfo; KHRpfn_clGetGLTextureInfo clGetGLTextureInfo; KHRpfn_clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects; KHRpfn_clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects; KHRpfn_clGetGLContextInfoKHR clGetGLContextInfoKHR; /* cl_khr_d3d10_sharing */ clGetDeviceIDsFromD3D10KHR_fn clGetDeviceIDsFromD3D10KHR; clCreateFromD3D10BufferKHR_fn clCreateFromD3D10BufferKHR; clCreateFromD3D10Texture2DKHR_fn clCreateFromD3D10Texture2DKHR; clCreateFromD3D10Texture3DKHR_fn clCreateFromD3D10Texture3DKHR; clEnqueueAcquireD3D10ObjectsKHR_fn clEnqueueAcquireD3D10ObjectsKHR; clEnqueueReleaseD3D10ObjectsKHR_fn clEnqueueReleaseD3D10ObjectsKHR; /* OpenCL 1.1 */ KHRpfn_clSetEventCallback clSetEventCallback; KHRpfn_clCreateSubBuffer clCreateSubBuffer; KHRpfn_clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback; KHRpfn_clCreateUserEvent clCreateUserEvent; KHRpfn_clSetUserEventStatus clSetUserEventStatus; KHRpfn_clEnqueueReadBufferRect clEnqueueReadBufferRect; KHRpfn_clEnqueueWriteBufferRect clEnqueueWriteBufferRect; KHRpfn_clEnqueueCopyBufferRect clEnqueueCopyBufferRect; /* cl_ext_device_fission */ KHRpfn_clCreateSubDevicesEXT clCreateSubDevicesEXT; KHRpfn_clRetainDeviceEXT clRetainDeviceEXT; KHRpfn_clReleaseDeviceEXT clReleaseDeviceEXT; /* cl_khr_gl_event */ KHRpfn_clCreateEventFromGLsyncKHR clCreateEventFromGLsyncKHR; /* OpenCL 1.2 */ KHRpfn_clCreateSubDevices clCreateSubDevices; KHRpfn_clRetainDevice clRetainDevice; KHRpfn_clReleaseDevice clReleaseDevice; KHRpfn_clCreateImage clCreateImage; KHRpfn_clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels; KHRpfn_clCompileProgram clCompileProgram; KHRpfn_clLinkProgram clLinkProgram; KHRpfn_clUnloadPlatformCompiler clUnloadPlatformCompiler; KHRpfn_clGetKernelArgInfo clGetKernelArgInfo; KHRpfn_clEnqueueFillBuffer clEnqueueFillBuffer; KHRpfn_clEnqueueFillImage clEnqueueFillImage; KHRpfn_clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects; KHRpfn_clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList; KHRpfn_clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList; KHRpfn_clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform; KHRpfn_clCreateFromGLTexture clCreateFromGLTexture; /* cl_khr_d3d11_sharing */ clGetDeviceIDsFromD3D11KHR_fn clGetDeviceIDsFromD3D11KHR; clCreateFromD3D11BufferKHR_fn clCreateFromD3D11BufferKHR; clCreateFromD3D11Texture2DKHR_fn clCreateFromD3D11Texture2DKHR; clCreateFromD3D11Texture3DKHR_fn clCreateFromD3D11Texture3DKHR; clCreateFromDX9MediaSurfaceKHR_fn clCreateFromDX9MediaSurfaceKHR; clEnqueueAcquireD3D11ObjectsKHR_fn clEnqueueAcquireD3D11ObjectsKHR; clEnqueueReleaseD3D11ObjectsKHR_fn clEnqueueReleaseD3D11ObjectsKHR; /* cl_khr_dx9_media_sharing */ clGetDeviceIDsFromDX9MediaAdapterKHR_fn clGetDeviceIDsFromDX9MediaAdapterKHR; clEnqueueAcquireDX9MediaSurfacesKHR_fn clEnqueueAcquireDX9MediaSurfacesKHR; clEnqueueReleaseDX9MediaSurfacesKHR_fn clEnqueueReleaseDX9MediaSurfacesKHR; /* cl_khr_egl_image */ KHRpfn_clCreateFromEGLImageKHR clCreateFromEGLImageKHR; KHRpfn_clEnqueueAcquireEGLObjectsKHR clEnqueueAcquireEGLObjectsKHR; KHRpfn_clEnqueueReleaseEGLObjectsKHR clEnqueueReleaseEGLObjectsKHR; /* cl_khr_egl_event */ KHRpfn_clCreateEventFromEGLSyncKHR clCreateEventFromEGLSyncKHR; /* OpenCL 2.0 */ KHRpfn_clCreateCommandQueueWithProperties clCreateCommandQueueWithProperties; KHRpfn_clCreatePipe clCreatePipe; KHRpfn_clGetPipeInfo clGetPipeInfo; KHRpfn_clSVMAlloc clSVMAlloc; KHRpfn_clSVMFree clSVMFree; KHRpfn_clEnqueueSVMFree clEnqueueSVMFree; KHRpfn_clEnqueueSVMMemcpy clEnqueueSVMMemcpy; KHRpfn_clEnqueueSVMMemFill clEnqueueSVMMemFill; KHRpfn_clEnqueueSVMMap clEnqueueSVMMap; KHRpfn_clEnqueueSVMUnmap clEnqueueSVMUnmap; KHRpfn_clCreateSamplerWithProperties clCreateSamplerWithProperties; KHRpfn_clSetKernelArgSVMPointer clSetKernelArgSVMPointer; KHRpfn_clSetKernelExecInfo clSetKernelExecInfo; KHRpfn_clGetKernelSubGroupInfoKHR clGetKernelSubGroupInfoKHR; /* OpenCL 2.1 */ KHRpfn_clCloneKernel clCloneKernel; KHRpfn_clCreateProgramWithIL clCreateProgramWithIL; KHRpfn_clEnqueueSVMMigrateMem clEnqueueSVMMigrateMem; KHRpfn_clGetDeviceAndHostTimer clGetDeviceAndHostTimer; KHRpfn_clGetHostTimer clGetHostTimer; KHRpfn_clGetKernelSubGroupInfo clGetKernelSubGroupInfo; KHRpfn_clSetDefaultDeviceCommandQueue clSetDefaultDeviceCommandQueue; /* OpenCL 2.2 */ KHRpfn_clSetProgramReleaseCallback clSetProgramReleaseCallback; KHRpfn_clSetProgramSpecializationConstant clSetProgramSpecializationConstant; }; struct SCRTDispatchTable { // Support CRT entry point KHRpfn_clGetKernelArgInfo clGetKernelArgInfo; clGetDeviceIDsFromDX9INTEL_fn clGetDeviceIDsFromDX9INTEL; clCreateFromDX9MediaSurfaceINTEL_fn clCreateFromDX9MediaSurfaceINTEL; clEnqueueAcquireDX9ObjectsINTEL_fn clEnqueueAcquireDX9ObjectsINTEL; clEnqueueReleaseDX9ObjectsINTEL_fn clEnqueueReleaseDX9ObjectsINTEL; INTELpfn_clGetImageParamsINTEL clGetImageParamsINTEL; // API to expose the Performance Counters to applications INTELpfn_clCreatePerfCountersCommandQueueINTEL clCreatePerfCountersCommandQueueINTEL; // Video Analytics Accelerator INTELpfn_clCreateAcceleratorINTEL clCreateAcceleratorINTEL; INTELpfn_clGetAcceleratorInfoINTEL clGetAcceleratorInfoINTEL; INTELpfn_clRetainAcceleratorINTEL clRetainAcceleratorINTEL; INTELpfn_clReleaseAcceleratorINTEL clReleaseAcceleratorINTEL; void *placeholder12; void *placeholder13; // VAMedia sharing extension #ifdef LIBVA INTELpfn_clCreateFromVA_APIMediaSurfaceINTEL clCreateFromVA_APIMediaSurfaceINTEL; INTELpfn_clGetDeviceIDsFromVA_APIMediaAdapterINTEL clGetDeviceIDsFromVA_APIMediaAdapterINTEL; INTELpfn_clEnqueueReleaseVA_APIMediaSurfacesINTEL clEnqueueReleaseVA_APIMediaSurfacesINTEL; INTELpfn_clEnqueueAcquireVA_APIMediaSurfacesINTEL clEnqueueAcquireVA_APIMediaSurfacesINTEL; #else void *placeholder14; void *placeholder15; void *placeholder16; void *placeholder17; #endif void *placeholder18; void *placeholder19; void *placeholder20; void *placeholder21; // OCL Performance Counters configuration INTELpfn_clSetPerformanceConfigurationINTEL clSetPerformanceConfigurationINTEL; }; extern SDispatchTable icdGlobalDispatchTable; extern SCRTDispatchTable crtGlobalDispatchTable; struct SEntryPointsTable { SDispatchTable *icdDispatch; SCRTDispatchTable *crtDispatch; }; struct SEntryPointsTableData { SDispatchTable icdDispatch; SCRTDispatchTable crtDispatch; }; extern SEntryPointsTable globalDispatchTable; compute-runtime-20.13.16352/opencl/source/aub/000077500000000000000000000000001363734646600207045ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/aub/CMakeLists.txt000066400000000000000000000012231363734646600234420ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_AUB ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_center.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_center.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/aub_helper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_helper_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/aub_helper_bdw_plus.inl ${CMAKE_CURRENT_SOURCE_DIR}/aub_helper_add_mmio.cpp ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_AUB}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_AUB ${RUNTIME_SRCS_AUB}) add_subdirectories() compute-runtime-20.13.16352/opencl/source/aub/aub_center.cpp000066400000000000000000000076441363734646600235320ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/aub/aub_center.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" #include "opencl/source/aub/aub_helper.h" #include "third_party/aub_stream/headers/aub_manager.h" #include "third_party/aub_stream/headers/aubstream.h" namespace NEO { extern aub_stream::AubManager *createAubManager(uint32_t productFamily, uint32_t devicesCount, uint64_t memoryBankSize, bool localMemorySupported, uint32_t streamMode, uint64_t gpuAddressSpace); AubCenter::AubCenter(const HardwareInfo *pHwInfo, bool localMemoryEnabled, const std::string &aubFileName, CommandStreamReceiverType csrType) { if (DebugManager.flags.UseAubStream.get()) { auto devicesCount = HwHelper::getSubDevicesCount(pHwInfo); auto memoryBankSize = AubHelper::getMemBankSize(pHwInfo); CommandStreamReceiverType type = csrType; if (DebugManager.flags.SetCommandStreamReceiver.get() >= CommandStreamReceiverType::CSR_HW) { type = static_cast(DebugManager.flags.SetCommandStreamReceiver.get()); } aubStreamMode = getAubStreamMode(aubFileName, type); AubHelper::setAdditionalMmioList(); if (DebugManager.flags.AubDumpAddMmioRegistersList.get() != "unk") { aub_stream::injectMMIOList(AubHelper::getAdditionalMmioList()); } aub_stream::setTbxServerIp(DebugManager.flags.TbxServer.get()); aub_stream::setTbxServerPort(DebugManager.flags.TbxPort.get()); aubManager.reset(createAubManager(pHwInfo->platform.eProductFamily, devicesCount, memoryBankSize, localMemoryEnabled, aubStreamMode, pHwInfo->capabilityTable.gpuAddressSpace)); } addressMapper = std::make_unique(); streamProvider = std::make_unique(); subCaptureCommon = std::make_unique(); if (DebugManager.flags.AUBDumpSubCaptureMode.get()) { this->subCaptureCommon->subCaptureMode = static_cast(DebugManager.flags.AUBDumpSubCaptureMode.get()); this->subCaptureCommon->subCaptureFilter.dumpKernelStartIdx = static_cast(DebugManager.flags.AUBDumpFilterKernelStartIdx.get()); this->subCaptureCommon->subCaptureFilter.dumpKernelEndIdx = static_cast(DebugManager.flags.AUBDumpFilterKernelEndIdx.get()); this->subCaptureCommon->subCaptureFilter.dumpNamedKernelStartIdx = static_cast(DebugManager.flags.AUBDumpFilterNamedKernelStartIdx.get()); this->subCaptureCommon->subCaptureFilter.dumpNamedKernelEndIdx = static_cast(DebugManager.flags.AUBDumpFilterNamedKernelEndIdx.get()); if (DebugManager.flags.AUBDumpFilterKernelName.get() != "unk") { this->subCaptureCommon->subCaptureFilter.dumpKernelName = DebugManager.flags.AUBDumpFilterKernelName.get(); } } } AubCenter::AubCenter() { addressMapper = std::make_unique(); streamProvider = std::make_unique(); subCaptureCommon = std::make_unique(); } AubCenter::~AubCenter() { if (DebugManager.flags.UseAubStream.get()) { aub_stream::injectMMIOList(MMIOList{}); } } uint32_t AubCenter::getAubStreamMode(const std::string &aubFileName, uint32_t csrType) { uint32_t mode = aub_stream::mode::aubFile; switch (csrType) { case CommandStreamReceiverType::CSR_HW_WITH_AUB: case CommandStreamReceiverType::CSR_AUB: mode = aub_stream::mode::aubFile; break; case CommandStreamReceiverType::CSR_TBX: mode = aub_stream::mode::tbx; break; case CommandStreamReceiverType::CSR_TBX_WITH_AUB: mode = aub_stream::mode::aubFileAndTbx; break; default: break; } return mode; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/aub/aub_center.h000066400000000000000000000034731363734646600231730ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/options.h" #include "opencl/source/command_stream/aub_stream_provider.h" #include "opencl/source/command_stream/aub_subcapture.h" #include "opencl/source/memory_manager/address_mapper.h" #include "opencl/source/memory_manager/physical_address_allocator.h" #include "third_party/aub_stream/headers/aub_manager.h" namespace NEO { struct HardwareInfo; class AubCenter { public: AubCenter(const HardwareInfo *pHwInfo, bool localMemoryEnabled, const std::string &aubFileName, CommandStreamReceiverType csrType); AubCenter(); virtual ~AubCenter(); void initPhysicalAddressAllocator(PhysicalAddressAllocator *pPhysicalAddressAllocator) { physicalAddressAllocator = std::unique_ptr(pPhysicalAddressAllocator); } PhysicalAddressAllocator *getPhysicalAddressAllocator() const { return physicalAddressAllocator.get(); } AddressMapper *getAddressMapper() const { return addressMapper.get(); } AubStreamProvider *getStreamProvider() const { return streamProvider.get(); } AubSubCaptureCommon *getSubCaptureCommon() const { return subCaptureCommon.get(); } aub_stream::AubManager *getAubManager() const { return aubManager.get(); } static uint32_t getAubStreamMode(const std::string &aubFileName, uint32_t csrType); protected: std::unique_ptr physicalAddressAllocator; std::unique_ptr addressMapper; std::unique_ptr streamProvider; std::unique_ptr subCaptureCommon; std::unique_ptr aubManager; uint32_t aubStreamMode = 0; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/aub/aub_helper.cpp000066400000000000000000000014651363734646600235240ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/aub/aub_helper.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/basic_math.h" #include "opencl/source/aub_mem_dump/aub_mem_dump.h" namespace NEO { uint64_t AubHelper::getTotalMemBankSize() { return 2 * GB; } int AubHelper::getMemTrace(uint64_t pdEntryBits) { return AubMemDump::AddressSpaceValues::TraceNonlocal; } uint64_t AubHelper::getPTEntryBits(uint64_t pdEntryBits) { return pdEntryBits; } uint32_t AubHelper::getMemType(uint32_t addressSpace) { return 0; } uint64_t AubHelper::getMemBankSize(const HardwareInfo *pHwInfo) { return getTotalMemBankSize(); } void AubHelper::setAdditionalMmioList() { } } // namespace NEO compute-runtime-20.13.16352/opencl/source/aub/aub_helper.h000066400000000000000000000057051363734646600231720ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/gen_common/aub_mapper_base.h" namespace NEO { class AubHelper : public NonCopyableOrMovableClass { public: static bool isOneTimeAubWritableAllocationType(const GraphicsAllocation::AllocationType &type) { switch (type) { case GraphicsAllocation::AllocationType::PIPE: case GraphicsAllocation::AllocationType::CONSTANT_SURFACE: case GraphicsAllocation::AllocationType::GLOBAL_SURFACE: case GraphicsAllocation::AllocationType::KERNEL_ISA: case GraphicsAllocation::AllocationType::PRIVATE_SURFACE: case GraphicsAllocation::AllocationType::SCRATCH_SURFACE: case GraphicsAllocation::AllocationType::BUFFER: case GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY: case GraphicsAllocation::AllocationType::BUFFER_COMPRESSED: case GraphicsAllocation::AllocationType::IMAGE: case GraphicsAllocation::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER: case GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR: case GraphicsAllocation::AllocationType::MAP_ALLOCATION: case GraphicsAllocation::AllocationType::SVM_GPU: return true; default: return false; } } static uint64_t getTotalMemBankSize(); static int getMemTrace(uint64_t pdEntryBits); static uint64_t getPTEntryBits(uint64_t pdEntryBits); static uint32_t getMemType(uint32_t addressSpace); static uint64_t getMemBankSize(const HardwareInfo *pHwInfo); static MMIOList getAdditionalMmioList(); static void setAdditionalMmioList(); virtual int getDataHintForPml4Entry() const = 0; virtual int getDataHintForPdpEntry() const = 0; virtual int getDataHintForPdEntry() const = 0; virtual int getDataHintForPtEntry() const = 0; virtual int getMemTraceForPml4Entry() const = 0; virtual int getMemTraceForPdpEntry() const = 0; virtual int getMemTraceForPdEntry() const = 0; virtual int getMemTraceForPtEntry() const = 0; protected: static MMIOList splitMMIORegisters(const std::string ®isters, char delimiter); }; template class AubHelperHw : public AubHelper { public: AubHelperHw(bool localMemoryEnabled) : localMemoryEnabled(localMemoryEnabled){}; int getDataHintForPml4Entry() const override; int getDataHintForPdpEntry() const override; int getDataHintForPdEntry() const override; int getDataHintForPtEntry() const override; int getMemTraceForPml4Entry() const override; int getMemTraceForPdpEntry() const override; int getMemTraceForPdEntry() const override; int getMemTraceForPtEntry() const override; protected: bool localMemoryEnabled; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/aub/aub_helper_add_mmio.cpp000066400000000000000000000031541363734646600253520ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "opencl/source/aub/aub_helper.h" #include "third_party/aub_stream/headers/aubstream.h" namespace NEO { MMIOList AubHelper::getAdditionalMmioList() { return splitMMIORegisters(DebugManager.flags.AubDumpAddMmioRegistersList.get(), ';'); } MMIOList AubHelper::splitMMIORegisters(const std::string ®isters, char delimiter) { MMIOList result; bool firstElementInPair = false; std::string token; uint32_t registerOffset = 0; uint32_t registerValue = 0; std::istringstream stream(""); for (std::string::const_iterator i = registers.begin();; i++) { if (i == registers.end() || *i == delimiter) { if (token.size() > 0) { stream.str(token); stream.clear(); firstElementInPair = !firstElementInPair; stream >> std::hex >> (firstElementInPair ? registerOffset : registerValue); if (stream.fail()) { result.clear(); break; } token.clear(); if (!firstElementInPair) { result.push_back(std::pair(registerOffset, registerValue)); registerValue = 0; registerOffset = 0; } } if (i == registers.end()) { break; } } else { token.push_back(*i); } } return result; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/aub/aub_helper_base.inl000066400000000000000000000022611363734646600245110ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/aub/aub_helper.h" #include "opencl/source/aub_mem_dump/aub_mem_dump.h" namespace NEO { template int AubHelperHw::getMemTraceForPml4Entry() const { if (localMemoryEnabled) { return AubMemDump::AddressSpaceValues::TraceLocal; } return AubMemDump::AddressSpaceValues::TracePml4Entry; } template int AubHelperHw::getMemTraceForPdpEntry() const { if (localMemoryEnabled) { return AubMemDump::AddressSpaceValues::TraceLocal; } return AubMemDump::AddressSpaceValues::TracePhysicalPdpEntry; } template int AubHelperHw::getMemTraceForPdEntry() const { if (localMemoryEnabled) { return AubMemDump::AddressSpaceValues::TraceLocal; } return AubMemDump::AddressSpaceValues::TracePpgttPdEntry; } template int AubHelperHw::getMemTraceForPtEntry() const { if (localMemoryEnabled) { return AubMemDump::AddressSpaceValues::TraceLocal; } return AubMemDump::AddressSpaceValues::TracePpgttEntry; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/aub/aub_helper_bdw_plus.inl000066400000000000000000000014101363734646600254110ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/aub/aub_helper_base.inl" namespace NEO { template int AubHelperHw::getDataHintForPml4Entry() const { return AubMemDump::DataTypeHintValues::TraceNotype; } template int AubHelperHw::getDataHintForPdpEntry() const { return AubMemDump::DataTypeHintValues::TraceNotype; } template int AubHelperHw::getDataHintForPdEntry() const { return AubMemDump::DataTypeHintValues::TraceNotype; } template int AubHelperHw::getDataHintForPtEntry() const { return AubMemDump::DataTypeHintValues::TraceNotype; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/aub/aub_stream_interface.cpp000066400000000000000000000007561363734646600255620ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/aub/aub_center.h" using namespace aub_stream; namespace NEO { AubManager *createAubManager(uint32_t gfxFamily, uint32_t devicesCount, uint64_t memoryBankSize, bool localMemorySupported, uint32_t streamMode, uint64_t gpuAddressSpace) { return AubManager::create(gfxFamily, devicesCount, memoryBankSize, localMemorySupported, streamMode, gpuAddressSpace); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/aub_mem_dump/000077500000000000000000000000001363734646600225675ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/aub_mem_dump/CMakeLists.txt000066400000000000000000000017051363734646600253320ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_AUB_MEM_DUMP ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_alloc_dump.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_alloc_dump.inl ${CMAKE_CURRENT_SOURCE_DIR}/aub_data.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_header.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_mem_dump.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_mem_dump.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_mem_dump.inl ${CMAKE_CURRENT_SOURCE_DIR}/aub_services.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/context_flags.cpp ${CMAKE_CURRENT_SOURCE_DIR}/page_table_entry_bits.h ) if(NOT DEFINED AUB_STREAM_DIR) list(APPEND RUNTIME_SRCS_AUB_MEM_DUMP ${CMAKE_CURRENT_SOURCE_DIR}/aub_stream_stubs.cpp ) endif() target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_AUB_MEM_DUMP}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_AUB_MEM_DUMP ${RUNTIME_SRCS_AUB_MEM_DUMP}) add_subdirectories() compute-runtime-20.13.16352/opencl/source/aub_mem_dump/aub_alloc_dump.h000066400000000000000000000071641363734646600257160ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/aub_mem_dump/aub_mem_dump.h" using namespace NEO; namespace aub_stream { struct SurfaceInfo; } namespace AubAllocDump { enum DumpFormat { NONE, BUFFER_BIN, BUFFER_TRE, IMAGE_BMP, IMAGE_TRE, }; inline bool isWritableBuffer(GraphicsAllocation &gfxAllocation) { return (gfxAllocation.getAllocationType() == GraphicsAllocation::AllocationType::BUFFER || gfxAllocation.getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED || gfxAllocation.getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY || gfxAllocation.getAllocationType() == GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR || gfxAllocation.getAllocationType() == GraphicsAllocation::AllocationType::MAP_ALLOCATION) && gfxAllocation.isMemObjectsAllocationWithWritableFlags(); } inline bool isWritableImage(GraphicsAllocation &gfxAllocation) { return (gfxAllocation.getAllocationType() == GraphicsAllocation::AllocationType::IMAGE) && gfxAllocation.isMemObjectsAllocationWithWritableFlags(); } inline DumpFormat getDumpFormat(GraphicsAllocation &gfxAllocation) { auto dumpBufferFormat = DebugManager.flags.AUBDumpBufferFormat.get(); auto dumpImageFormat = DebugManager.flags.AUBDumpImageFormat.get(); auto isDumpableBuffer = isWritableBuffer(gfxAllocation); auto isDumpableImage = isWritableImage(gfxAllocation); auto dumpFormat = DumpFormat::NONE; if (isDumpableBuffer) { if (0 == dumpBufferFormat.compare("BIN")) { dumpFormat = DumpFormat::BUFFER_BIN; } else if (0 == dumpBufferFormat.compare("TRE")) { dumpFormat = DumpFormat::BUFFER_TRE; } } else if (isDumpableImage) { if (0 == dumpImageFormat.compare("BMP")) { dumpFormat = DumpFormat::IMAGE_BMP; } else if (0 == dumpImageFormat.compare("TRE")) { dumpFormat = DumpFormat::IMAGE_TRE; } } return dumpFormat; } inline bool isBufferDumpFormat(DumpFormat dumpFormat) { return (AubAllocDump::DumpFormat::BUFFER_BIN == dumpFormat) || (dumpFormat == AubAllocDump::DumpFormat::BUFFER_TRE); } inline bool isImageDumpFormat(DumpFormat dumpFormat) { return (AubAllocDump::DumpFormat::IMAGE_BMP == dumpFormat) || (dumpFormat == AubAllocDump::DumpFormat::IMAGE_TRE); } template aub_stream::SurfaceInfo *getDumpSurfaceInfo(GraphicsAllocation &gfxAllocation, DumpFormat dumpFormat); template uint32_t getImageSurfaceTypeFromGmmResourceType(GMM_RESOURCE_TYPE gmmResourceType); template void dumpBufferInBinFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpImageInBmpFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpBufferInTreFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpImageInTreFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpAllocation(DumpFormat dumpFormat, GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); } // namespace AubAllocDump compute-runtime-20.13.16352/opencl/source/aub_mem_dump/aub_alloc_dump.inl000066400000000000000000000222531363734646600262450ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "opencl/source/aub_mem_dump/aub_alloc_dump.h" #include "third_party/aub_stream/headers/aubstream.h" using namespace NEO; using namespace aub_stream; namespace AubAllocDump { template uint32_t getImageSurfaceTypeFromGmmResourceType(GMM_RESOURCE_TYPE gmmResourceType) { using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; auto surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL; switch (gmmResourceType) { case GMM_RESOURCE_TYPE::RESOURCE_1D: surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_1D; break; case GMM_RESOURCE_TYPE::RESOURCE_2D: surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_2D; break; case GMM_RESOURCE_TYPE::RESOURCE_3D: surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_3D; break; default: DEBUG_BREAK_IF(true); break; } return surfaceType; } template SurfaceInfo *getDumpSurfaceInfo(GraphicsAllocation &gfxAllocation, DumpFormat dumpFormat) { SurfaceInfo *surfaceInfo = nullptr; if (isBufferDumpFormat(dumpFormat)) { using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; surfaceInfo = new SurfaceInfo(); surfaceInfo->address = GmmHelper::decanonize(gfxAllocation.getGpuAddress()); surfaceInfo->width = static_cast(gfxAllocation.getUnderlyingBufferSize()); surfaceInfo->height = 1; surfaceInfo->pitch = static_cast(gfxAllocation.getUnderlyingBufferSize()); surfaceInfo->format = SURFACE_FORMAT::SURFACE_FORMAT_RAW; surfaceInfo->tilingType = RENDER_SURFACE_STATE::TILE_MODE_LINEAR; surfaceInfo->surftype = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER; surfaceInfo->compressed = GraphicsAllocation::AllocationType::BUFFER_COMPRESSED == gfxAllocation.getAllocationType(); surfaceInfo->dumpType = (AubAllocDump::DumpFormat::BUFFER_TRE == dumpFormat) ? dumpType::tre : dumpType::bin; } else if (isImageDumpFormat(dumpFormat)) { auto gmm = gfxAllocation.getDefaultGmm(); if (gmm->gmmResourceInfo->getNumSamples() > 1) { return nullptr; } surfaceInfo = new SurfaceInfo(); surfaceInfo->address = GmmHelper::decanonize(gfxAllocation.getGpuAddress()); surfaceInfo->width = static_cast(gmm->gmmResourceInfo->getBaseWidth()); surfaceInfo->height = static_cast(gmm->gmmResourceInfo->getBaseHeight()); surfaceInfo->pitch = static_cast(gmm->gmmResourceInfo->getRenderPitch()); surfaceInfo->format = gmm->gmmResourceInfo->getResourceFormatSurfaceState(); surfaceInfo->tilingType = gmm->gmmResourceInfo->getTileModeSurfaceState(); surfaceInfo->surftype = getImageSurfaceTypeFromGmmResourceType(gmm->gmmResourceInfo->getResourceType()); surfaceInfo->compressed = gmm->isRenderCompressed; surfaceInfo->dumpType = (AubAllocDump::DumpFormat::IMAGE_TRE == dumpFormat) ? dumpType::tre : dumpType::bmp; } return surfaceInfo; } template void dumpBufferInBinFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context) { AubMemDump::AubCaptureBinaryDumpHD cmd; memset(&cmd, 0, sizeof(cmd)); cmd.Header.Type = 0x7; cmd.Header.Opcode = 0x1; cmd.Header.SubOp = 0x15; cmd.Header.DwordLength = ((sizeof(cmd) - sizeof(cmd.Header)) / sizeof(uint32_t)) - 1; cmd.setHeight(1); cmd.setWidth(gfxAllocation.getUnderlyingBufferSize()); cmd.setBaseAddr(gfxAllocation.getGpuAddress()); cmd.setPitch(gfxAllocation.getUnderlyingBufferSize()); cmd.GttType = 1; cmd.DirectoryHandle = context; stream->write(reinterpret_cast(&cmd), sizeof(cmd)); } template void dumpImageInBmpFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context) { auto gmm = gfxAllocation.getDefaultGmm(); AubMemDump::AubCmdDumpBmpHd cmd; memset(&cmd, 0, sizeof(cmd)); cmd.Header.Type = 0x7; cmd.Header.Opcode = 0x1; cmd.Header.SubOp = 0x44; cmd.Header.DwordLength = ((sizeof(cmd) - sizeof(cmd.Header)) / sizeof(uint32_t)) - 1; cmd.Xmin = 0; cmd.Ymin = 0; auto pitch = gmm->gmmResourceInfo->getRenderPitch(); auto bitsPerPixel = gmm->gmmResourceInfo->getBitsPerPixel(); auto pitchInPixels = static_cast(8 * pitch / bitsPerPixel); cmd.BufferPitch = pitchInPixels; cmd.BitsPerPixel = bitsPerPixel; cmd.Format = gmm->gmmResourceInfo->getResourceFormatSurfaceState(); cmd.Xsize = static_cast(gmm->gmmResourceInfo->getBaseWidth()); cmd.Ysize = static_cast(gmm->gmmResourceInfo->getBaseHeight()); cmd.setBaseAddr(gfxAllocation.getGpuAddress()); cmd.Secure = 0; cmd.UseFence = 0; auto flagInfo = gmm->gmmResourceInfo->getResourceFlags()->Info; cmd.TileOn = flagInfo.TiledW || flagInfo.TiledX || flagInfo.TiledY || flagInfo.TiledYf || flagInfo.TiledYs; cmd.WalkY = flagInfo.TiledY; cmd.UsePPGTT = 1; cmd.Use32BitDump = 1; // Dump out in 32bpp vs 24bpp cmd.UseFullFormat = 1; cmd.DirectoryHandle = context; stream->write(reinterpret_cast(&cmd), sizeof(cmd)); } template void dumpBufferInTreFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context) { using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; AubMemDump::CmdServicesMemTraceDumpCompress cmd; memset(&cmd, 0, sizeof(AubMemDump::CmdServicesMemTraceDumpCompress)); cmd.dwordCount = (sizeof(AubMemDump::CmdServicesMemTraceDumpCompress) - 1) / 4; cmd.instructionSubOpcode = 0x10; cmd.instructionOpcode = 0x2e; cmd.instructionType = 0x7; cmd.setSurfaceAddress(gfxAllocation.getGpuAddress()); cmd.surfaceWidth = static_cast(gfxAllocation.getUnderlyingBufferSize()); cmd.surfaceHeight = 1; cmd.surfacePitch = static_cast(gfxAllocation.getUnderlyingBufferSize()); cmd.surfaceFormat = SURFACE_FORMAT::SURFACE_FORMAT_RAW; cmd.dumpType = AubMemDump::CmdServicesMemTraceDumpCompress::DumpTypeValues::Tre; cmd.surfaceTilingType = RENDER_SURFACE_STATE::TILE_MODE_LINEAR; cmd.surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER; cmd.algorithm = AubMemDump::CmdServicesMemTraceDumpCompress::AlgorithmValues::Uncompressed; cmd.gttType = 1; cmd.directoryHandle = context; stream->write(reinterpret_cast(&cmd), sizeof(cmd)); } template void dumpImageInTreFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context) { using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; auto gmm = gfxAllocation.getDefaultGmm(); if ((gmm->gmmResourceInfo->getNumSamples() > 1) || (gmm->isRenderCompressed)) { DEBUG_BREAK_IF(true); //unsupported return; } auto surfaceType = getImageSurfaceTypeFromGmmResourceType(gmm->gmmResourceInfo->getResourceType()); AubMemDump::CmdServicesMemTraceDumpCompress cmd; memset(&cmd, 0, sizeof(AubMemDump::CmdServicesMemTraceDumpCompress)); cmd.dwordCount = (sizeof(AubMemDump::CmdServicesMemTraceDumpCompress) - 1) / 4; cmd.instructionSubOpcode = 0x10; cmd.instructionOpcode = 0x2e; cmd.instructionType = 0x7; cmd.setSurfaceAddress(gfxAllocation.getGpuAddress()); cmd.surfaceWidth = static_cast(gmm->gmmResourceInfo->getBaseWidth()); cmd.surfaceHeight = static_cast(gmm->gmmResourceInfo->getBaseHeight()); cmd.surfacePitch = static_cast(gmm->gmmResourceInfo->getRenderPitch()); cmd.surfaceFormat = gmm->gmmResourceInfo->getResourceFormatSurfaceState(); cmd.dumpType = AubMemDump::CmdServicesMemTraceDumpCompress::DumpTypeValues::Tre; cmd.surfaceTilingType = gmm->gmmResourceInfo->getTileModeSurfaceState(); cmd.surfaceType = surfaceType; cmd.algorithm = AubMemDump::CmdServicesMemTraceDumpCompress::AlgorithmValues::Uncompressed; cmd.gttType = 1; cmd.directoryHandle = context; stream->write(reinterpret_cast(&cmd), sizeof(cmd)); } template void dumpAllocation(DumpFormat dumpFormat, GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context) { switch (dumpFormat) { case DumpFormat::BUFFER_BIN: dumpBufferInBinFormat(gfxAllocation, stream, context); break; case DumpFormat::BUFFER_TRE: dumpBufferInTreFormat(gfxAllocation, stream, context); break; case DumpFormat::IMAGE_BMP: dumpImageInBmpFormat(gfxAllocation, stream, context); break; case DumpFormat::IMAGE_TRE: dumpImageInTreFormat(gfxAllocation, stream, context); break; default: break; } } } // namespace AubAllocDump compute-runtime-20.13.16352/opencl/source/aub_mem_dump/aub_data.h000066400000000000000000000002741363734646600245030ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include struct AubGTTData { bool present; bool localMemory; }; compute-runtime-20.13.16352/opencl/source/aub_mem_dump/aub_header.h000066400000000000000000000061631363734646600250250ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include #ifndef WIN32 #pragma pack(4) #else #pragma pack(push, 4) #endif inline void setMisalignedUint64(uint64_t *address, const uint64_t value) { uint32_t *addressBits = reinterpret_cast(address); addressBits[0] = static_cast(value); addressBits[1] = static_cast(value >> 32); } inline uint64_t getMisalignedUint64(const uint64_t *address) { const uint32_t *addressBits = reinterpret_cast(address); return static_cast(static_cast(addressBits[1]) << 32) | addressBits[0]; } struct AubCmdHdr { uint32_t DwordLength : 16, SubOp : 7, Opcode : 6, Type : 3; }; static_assert(4 == sizeof(AubCmdHdr), "Invalid size for AubCmdHdr"); struct AubCmdDumpBmpHd { AubCmdHdr Header; uint32_t Xmin; uint32_t Ymin; uint32_t BufferPitch; uint32_t BitsPerPixel : 8, Format : 8, Reserved_0 : 16; uint32_t Xsize; uint32_t Ysize; uint64_t BaseAddr; uint32_t Secure : 1, UseFence : 1, TileOn : 1, WalkY : 1, UsePPGTT : 1, Use32BitDump : 1, UseFullFormat : 1, Reserved_1 : 25; uint32_t DirectoryHandle; uint64_t getBaseAddr() const { return getMisalignedUint64(&this->BaseAddr); } void setBaseAddr(const uint64_t baseAddr) { setMisalignedUint64(&this->BaseAddr, baseAddr); } }; static_assert(44 == sizeof(AubCmdDumpBmpHd), "Invalid size for AubCmdDumpBmpHd"); struct AubPpgttContextCreate { AubCmdHdr Header; uint32_t Handle; uint32_t AdvancedContext : 1, SixtyFourBit : 1, Reserved_31_2 : 30; uint64_t PageDirPointer[4]; }; static_assert(44 == sizeof(AubPpgttContextCreate), "Invalid size for AubPpgttContextCreate"); struct AubCaptureBinaryDumpHD { AubCmdHdr Header; uint64_t BaseAddr; uint64_t Width; uint64_t Height; uint64_t Pitch; uint32_t SurfaceType : 4, GttType : 2, Reserved_31_6 : 26; uint32_t DirectoryHandle; uint32_t ReservedDW1; uint32_t ReservedDW2; char OutputFile[4]; uint64_t getBaseAddr() const { return getMisalignedUint64(&this->BaseAddr); } void setBaseAddr(const uint64_t baseAddr) { setMisalignedUint64(&this->BaseAddr, baseAddr); } uint64_t getWidth() const { return getMisalignedUint64(&this->Width); } void setWidth(const uint64_t width) { setMisalignedUint64(&this->Width, width); } uint64_t getHeight() const { return getMisalignedUint64(&this->Height); } void setHeight(const uint64_t height) { setMisalignedUint64(&this->Height, height); } uint64_t getPitch() const { return getMisalignedUint64(&this->Pitch); } void setPitch(const uint64_t pitch) { setMisalignedUint64(&this->Pitch, pitch); } }; static_assert(56 == sizeof(AubCaptureBinaryDumpHD), "Invalid size for AubCaptureBinaryDumpHD"); #ifndef WIN32 #pragma pack() #else #pragma pack(pop) #endif compute-runtime-20.13.16352/opencl/source/aub_mem_dump/aub_mem_dump.cpp000066400000000000000000000206661363734646600257370ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "aub_mem_dump.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/ptr_math.h" #include "opencl/source/aub/aub_helper.h" namespace AubMemDump { const uint64_t g_pageMask = ~(4096ull - 1); const size_t g_dwordCountMax = 65536; // Some page table constants used in virtualizing the page tables. // clang-format off // 32 bit page table traits const uint64_t PageTableTraits<32>::physicalMemory = 0; // 1ull <::numPTEntries = BIT(PageTableTraits<32>::addressingBits - PageTableTraits<32>::NUM_OFFSET_BITS); const uint64_t PageTableTraits<32>::sizePT = BIT(PageTableTraits<32>::addressingBits - PageTableTraits<32>::NUM_OFFSET_BITS) * sizeof(uint64_t); const uint64_t PageTableTraits<32>::ptBaseAddress = BIT(38); const uint64_t PageTableTraits<32>::numPDEntries = BIT(PageTableTraits<32>::addressingBits - PageTableTraits<32>::NUM_OFFSET_BITS - PageTableTraits<32>::NUM_PTE_BITS); const uint64_t PageTableTraits<32>::sizePD = BIT(PageTableTraits<32>::addressingBits - PageTableTraits<32>::NUM_OFFSET_BITS - PageTableTraits<32>::NUM_PTE_BITS) * sizeof(uint64_t); const uint64_t PageTableTraits<32>::pdBaseAddress = BIT(37); const uint64_t PageTableTraits<32>::numPDPEntries = BIT(PageTableTraits<32>::addressingBits - PageTableTraits<32>::NUM_OFFSET_BITS - PageTableTraits<32>::NUM_PTE_BITS - PageTableTraits<32>::NUM_PDE_BITS); const uint64_t PageTableTraits<32>::sizePDP = BIT(PageTableTraits<32>::addressingBits - PageTableTraits<32>::NUM_OFFSET_BITS - PageTableTraits<32>::NUM_PTE_BITS - PageTableTraits<32>::NUM_PDE_BITS) * sizeof(uint64_t); const uint64_t PageTableTraits<32>::pdpBaseAddress = BIT(36); // 48 bit page table traits const uint64_t PageTableTraits<48>::physicalMemory = 0; // 1ull <::numPTEntries = BIT(PageTableTraits<48>::addressingBits - PageTableTraits<48>::NUM_OFFSET_BITS); const uint64_t PageTableTraits<48>::sizePT = BIT(PageTableTraits<48>::addressingBits - PageTableTraits<48>::NUM_OFFSET_BITS) * sizeof(uint64_t); const uint64_t PageTableTraits<48>::ptBaseAddress = BIT(32); const uint64_t PageTableTraits<48>::numPDEntries = BIT(PageTableTraits<48>::addressingBits - PageTableTraits<48>::NUM_OFFSET_BITS - PageTableTraits<48>::NUM_PTE_BITS); const uint64_t PageTableTraits<48>::sizePD = BIT(PageTableTraits<48>::addressingBits - PageTableTraits<48>::NUM_OFFSET_BITS - PageTableTraits<48>::NUM_PTE_BITS) * sizeof(uint64_t); const uint64_t PageTableTraits<48>::pdBaseAddress = BIT(31); const uint64_t PageTableTraits<48>::numPDPEntries = BIT(PageTableTraits<48>::addressingBits - PageTableTraits<48>::NUM_OFFSET_BITS - PageTableTraits<48>::NUM_PTE_BITS - PageTableTraits<48>::NUM_PDE_BITS); const uint64_t PageTableTraits<48>::sizePDP = BIT(PageTableTraits<48>::addressingBits - PageTableTraits<48>::NUM_OFFSET_BITS - PageTableTraits<48>::NUM_PTE_BITS - PageTableTraits<48>::NUM_PDE_BITS) * sizeof(uint64_t); const uint64_t PageTableTraits<48>::pdpBaseAddress = BIT(30); const uint64_t PageTableTraits<48>::numPML4Entries = BIT(NUM_PML4_BITS); const uint64_t PageTableTraits<48>::sizePML4 = BIT(NUM_PML4_BITS) * sizeof(uint64_t); const uint64_t PageTableTraits<48>::pml4BaseAddress = BIT(29); // clang-format on void LrcaHelper::setRingTail(void *pLRCIn, uint32_t ringTail) const { auto pLRCA = ptrOffset(reinterpret_cast(pLRCIn), offsetContext + offsetRingRegisters + offsetRingTail); *pLRCA++ = AubMemDump::computeRegisterOffset(mmioBase, 0x2030); *pLRCA++ = ringTail; } void LrcaHelper::setRingHead(void *pLRCIn, uint32_t ringHead) const { auto pLRCA = ptrOffset(reinterpret_cast(pLRCIn), offsetContext + offsetRingRegisters + offsetRingHead); *pLRCA++ = AubMemDump::computeRegisterOffset(mmioBase, 0x2034); *pLRCA++ = ringHead; } void LrcaHelper::setRingBase(void *pLRCIn, uint32_t ringBase) const { auto pLRCA = ptrOffset(reinterpret_cast(pLRCIn), offsetContext + offsetRingRegisters + offsetRingBase); *pLRCA++ = AubMemDump::computeRegisterOffset(mmioBase, 0x2038); *pLRCA++ = ringBase; } void LrcaHelper::setRingCtrl(void *pLRCIn, uint32_t ringCtrl) const { auto pLRCA = ptrOffset(reinterpret_cast(pLRCIn), offsetContext + offsetRingRegisters + offsetRingCtrl); *pLRCA++ = AubMemDump::computeRegisterOffset(mmioBase, 0x203c); *pLRCA++ = ringCtrl; } void LrcaHelper::setPDP0(void *pLRCIn, uint64_t address) const { auto pLRCA = ptrOffset(reinterpret_cast(pLRCIn), offsetContext + offsetPageTableRegisters + offsetPDP0); *pLRCA++ = AubMemDump::computeRegisterOffset(mmioBase, 0x2274); *pLRCA++ = address >> 32; *pLRCA++ = AubMemDump::computeRegisterOffset(mmioBase, 0x2270); *pLRCA++ = address & 0xffffffff; } void LrcaHelper::setPDP1(void *pLRCIn, uint64_t address) const { auto pLRCA = ptrOffset(reinterpret_cast(pLRCIn), offsetContext + offsetPageTableRegisters + offsetPDP1); *pLRCA++ = AubMemDump::computeRegisterOffset(mmioBase, 0x227c); *pLRCA++ = address >> 32; *pLRCA++ = AubMemDump::computeRegisterOffset(mmioBase, 0x2278); *pLRCA++ = address & 0xffffffff; } void LrcaHelper::setPDP2(void *pLRCIn, uint64_t address) const { auto pLRCA = ptrOffset(reinterpret_cast(pLRCIn), offsetContext + offsetPageTableRegisters + offsetPDP2); *pLRCA++ = AubMemDump::computeRegisterOffset(mmioBase, 0x2284); *pLRCA++ = address >> 32; *pLRCA++ = AubMemDump::computeRegisterOffset(mmioBase, 0x2280); *pLRCA++ = address & 0xffffffff; } void LrcaHelper::setPDP3(void *pLRCIn, uint64_t address) const { auto pLRCA = ptrOffset(reinterpret_cast(pLRCIn), offsetContext + offsetPageTableRegisters + offsetPDP3); *pLRCA++ = AubMemDump::computeRegisterOffset(mmioBase, 0x228c); *pLRCA++ = address >> 32; *pLRCA++ = AubMemDump::computeRegisterOffset(mmioBase, 0x2288); *pLRCA++ = address & 0xffffffff; } void LrcaHelper::setPML4(void *pLRCIn, uint64_t address) const { setPDP0(pLRCIn, address); } void LrcaHelper::initialize(void *pLRCIn) const { auto pLRCABase = reinterpret_cast(pLRCIn); // Initialize to known but benign garbage for (size_t i = 0; i < sizeLRCA / sizeof(uint32_t); i++) { pLRCABase[i] = 0x1; } auto pLRCA = ptrOffset(pLRCABase, offsetContext); // Initialize the ring context of the LRCA auto pLRI = ptrOffset(pLRCA, offsetLRI0); auto numRegs = numRegsLRI0; *pLRI++ = 0x11001000 | (2 * numRegs - 1); uint32_t ctxSrCtlValue = 0x00010001; // Inhibit context-restore setContextSaveRestoreFlags(ctxSrCtlValue); while (numRegs-- > 0) { *pLRI++ = AubMemDump::computeRegisterOffset(mmioBase, 0x2244); // CTXT_SR_CTL *pLRI++ = ctxSrCtlValue; } // Initialize the other LRI DEBUG_BREAK_IF(offsetLRI1 != 0x21 * sizeof(uint32_t)); pLRI = ptrOffset(pLRCA, offsetLRI1); numRegs = numRegsLRI1; *pLRI++ = 0x11001000 | (2 * numRegs - 1); while (numRegs-- > 0) { *pLRI++ = AubMemDump::computeRegisterOffset(mmioBase, 0x20d8); // DEBUG *pLRI++ = 0x00200020; } DEBUG_BREAK_IF(offsetLRI2 != 0x41 * sizeof(uint32_t)); pLRI = ptrOffset(pLRCA, offsetLRI2); numRegs = numRegsLRI2; *pLRI++ = 0x11000000 | (2 * numRegs - 1); while (numRegs-- > 0) { *pLRI++ = AubMemDump::computeRegisterOffset(mmioBase, 0x2094); // NOP ID *pLRI++ = 0x00000000; } setRingHead(pLRCIn, 0); setRingTail(pLRCIn, 0); setRingBase(pLRCIn, 0); setRingCtrl(pLRCIn, 0); setPDP0(pLRCIn, 0); setPDP1(pLRCIn, 0); setPDP2(pLRCIn, 0); setPDP3(pLRCIn, 0); } void AubStream::writeMMIO(uint32_t offset, uint32_t value) { auto dbgOffset = NEO::DebugManager.flags.AubDumpOverrideMmioRegister.get(); if (dbgOffset > 0) { if (offset == static_cast(dbgOffset)) { offset = static_cast(dbgOffset); value = static_cast(NEO::DebugManager.flags.AubDumpOverrideMmioRegisterValue.get()); } } writeMMIOImpl(offset, value); } } // namespace AubMemDump compute-runtime-20.13.16352/opencl/source/aub_mem_dump/aub_mem_dump.h000066400000000000000000000370211363734646600253750ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include #include #include #include #ifndef BIT #define BIT(x) (((uint64_t)1) << (x)) #endif #include "opencl/source/aub_mem_dump/aub_data.h" namespace NEO { class AubHelper; } namespace AubMemDump { #include "aub_services.h" constexpr uint32_t rcsRegisterBase = 0x2000; inline uint32_t computeRegisterOffset(uint32_t mmioBase, uint32_t rcsRegisterOffset) { return mmioBase + rcsRegisterOffset - rcsRegisterBase; } template inline void setAddress(Cmd &cmd, uint64_t address) { cmd.address = address; } template <> inline void setAddress(CmdServicesMemTraceMemoryCompare &cmd, uint64_t address) { cmd.address = static_cast(address); cmd.addressHigh = static_cast(address >> 32); } union IAPageTableEntry { struct { uint64_t Present : 1; //[0] uint64_t Writable : 1; //[1] uint64_t UserSupervisor : 1; //[2] uint64_t PWT : 1; //[3] uint64_t PCD : 1; //[4] uint64_t Accessed : 1; //[5] uint64_t Dirty : 1; //[6] uint64_t PAT : 1; //[7] uint64_t Global : 1; //[8] uint64_t Reserved_9 : 1; //[9] uint64_t Reserved_10 : 1; //[10] uint64_t Reserved_11 : 1; //[11] uint64_t PhysicalAddress : 27; //[38:12] uint64_t Reserved_51_39 : 13; //[51:39] uint64_t Ignored : 11; //[62:52] uint64_t ExecuteDisable : 1; //[63] } pageConfig; uint32_t dwordData[2]; uint64_t uiData; }; union MiGttEntry { struct { uint64_t Present : 1; //[0] uint64_t LocalMemory : 1; //[1] uint64_t FunctionNumber : 10; //[11:2] uint64_t PhysicalAddress : 35; //[46:12] uint64_t Ignored : 17; //[63:47] } pageConfig; uint32_t dwordData[2]; uint64_t uiData; }; // Use the latest DeviceValues enumerations available typedef CmdServicesMemTraceVersion::DeviceValues DeviceValues; typedef CmdServicesMemTraceVersion::SteppingValues SteppingValues; typedef CmdServicesMemTraceMemoryWrite::AddressSpaceValues AddressSpaceValues; typedef CmdServicesMemTraceMemoryWrite::DataTypeHintValues DataTypeHintValues; typedef CmdServicesMemTraceMemoryDump::TilingValues TilingValues; typedef CmdServicesMemTraceMemoryWrite::RepeatMemoryValues RepeatMemoryValues; typedef CmdServicesMemTraceRegisterWrite::MessageSourceIdValues MessageSourceIdValues; typedef CmdServicesMemTraceRegisterWrite::RegisterSizeValues RegisterSizeValues; typedef CmdServicesMemTraceRegisterWrite::RegisterSpaceValues RegisterSpaceValues; typedef CmdServicesMemTraceMemoryPoll::DataSizeValues DataSizeValues; template struct Traits { typedef struct AubStream Stream; enum { addressingBits = addressingBitsIn, device = deviceIn }; }; struct AubStream { virtual void open(const char *filePath) = 0; virtual void close() = 0; virtual bool init(uint32_t stepping, uint32_t device) = 0; virtual void createContext(const AubPpgttContextCreate &cmd) {} virtual void writeMemory(uint64_t physAddress, const void *memory, size_t sizeToDumpThisIteration, uint32_t addressSpace, uint32_t hint) = 0; virtual void writeMemoryWriteHeader(uint64_t physAddress, size_t size, uint32_t addressSpace, uint32_t hint) = 0; virtual void writeMemoryWriteHeader(uint64_t physAddress, size_t size, uint32_t addressSpace) { writeMemoryWriteHeader(physAddress, size, addressSpace, CmdServicesMemTraceMemoryWrite::DataTypeHintValues::TraceNotype); } virtual void writePTE(uint64_t physAddress, uint64_t entry, uint32_t addressSpace) = 0; virtual void writeGTT(uint32_t offset, uint64_t entry) = 0; void writeMMIO(uint32_t offset, uint32_t value); virtual void registerPoll(uint32_t registerOffset, uint32_t mask, uint32_t value, bool pollNotEqual, uint32_t timeoutAction) = 0; virtual ~AubStream() = default; protected: virtual void writeMMIOImpl(uint32_t offset, uint32_t value) = 0; }; struct AubFileStream : public AubStream { void open(const char *filePath) override; void close() override; bool init(uint32_t stepping, uint32_t device) override; void createContext(const AubPpgttContextCreate &cmd) override; void writeMemory(uint64_t physAddress, const void *memory, size_t size, uint32_t addressSpace, uint32_t hint) override; void writeMemoryWriteHeader(uint64_t physAddress, size_t size, uint32_t addressSpace, uint32_t hint) override; void writePTE(uint64_t physAddress, uint64_t entry, uint32_t addressSpace) override; void writeGTT(uint32_t offset, uint64_t entry) override; void writeMMIOImpl(uint32_t offset, uint32_t value) override; void registerPoll(uint32_t registerOffset, uint32_t mask, uint32_t value, bool pollNotEqual, uint32_t timeoutAction) override; MOCKABLE_VIRTUAL bool isOpen() const { return fileHandle.is_open(); } MOCKABLE_VIRTUAL const std::string &getFileName() const { return fileName; } MOCKABLE_VIRTUAL void write(const char *data, size_t size); MOCKABLE_VIRTUAL void flush(); MOCKABLE_VIRTUAL void expectMMIO(uint32_t mmioRegister, uint32_t expectedValue); MOCKABLE_VIRTUAL void expectMemory(uint64_t physAddress, const void *memory, size_t size, uint32_t addressSpace, uint32_t compareOperation); MOCKABLE_VIRTUAL bool addComment(const char *message); MOCKABLE_VIRTUAL std::unique_lock lockStream(); std::ofstream fileHandle; std::string fileName; std::mutex mutex; }; template struct PageTableTraits { }; template <> struct PageTableTraits<32> { // clang-format off enum { addressingBits = 32, NUM_OFFSET_BITS = 12, NUM_PTE_BITS = 9, NUM_PDE_BITS = 9, NUM_PDP_BITS = addressingBits - NUM_PDE_BITS - NUM_PTE_BITS - NUM_OFFSET_BITS, }; static const uint64_t physicalMemory; static const uint64_t numPTEntries; static const uint64_t sizePT; static const uint64_t ptBaseAddress; static const uint64_t numPDEntries; static const uint64_t sizePD; static const uint64_t pdBaseAddress; static const uint64_t numPDPEntries; static const uint64_t sizePDP; static const uint64_t pdpBaseAddress; // clang-format on }; template <> struct PageTableTraits<48> { // clang-format off enum { addressingBits = 48, NUM_OFFSET_BITS = PageTableTraits<32>::NUM_OFFSET_BITS, NUM_PTE_BITS = PageTableTraits<32>::NUM_PTE_BITS, NUM_PDE_BITS = PageTableTraits<32>::NUM_PDE_BITS, NUM_PDP_BITS = PageTableTraits<32>::NUM_PDP_BITS, NUM_PML4_BITS = addressingBits - NUM_PDP_BITS - NUM_PDE_BITS - NUM_PTE_BITS - NUM_OFFSET_BITS }; static const uint64_t physicalMemory; static const uint64_t numPTEntries; static const uint64_t sizePT; static const uint64_t ptBaseAddress; static const uint64_t numPDEntries; static const uint64_t sizePD; static const uint64_t pdBaseAddress; static const uint64_t numPDPEntries; static const uint64_t sizePDP; static const uint64_t pdpBaseAddress; static const uint64_t numPML4Entries; static const uint64_t sizePML4; static const uint64_t pml4BaseAddress; // clang-format on }; template struct AubPageTableHelper { typedef AubMemDump::PageTableTraits PageTableTraits; enum { addressingBits = Traits::addressingBits }; static inline uint32_t ptrToGGTT(const void *memory) { return static_cast(reinterpret_cast(memory)); } static inline uintptr_t ptrToPPGTT(const void *memory) { return reinterpret_cast(memory); } static inline uint64_t getPTEAddress(uint64_t ptIndex) { return PageTableTraits::ptBaseAddress + ptIndex * sizeof(uint64_t); } static inline uint64_t getPDEAddress(uint64_t pdIndex) { return PageTableTraits::pdBaseAddress + pdIndex * sizeof(uint64_t); } static inline uint64_t getPDPAddress(uint64_t pdpIndex) { return PageTableTraits::pdpBaseAddress + pdpIndex * sizeof(uint64_t); } }; template struct AubPageTableHelper32 : public AubPageTableHelper, PageTableTraits<32> { typedef AubPageTableHelper BaseClass; static void createContext(typename Traits::Stream &stream, uint32_t context); static uint64_t reserveAddressPPGTT(typename Traits::Stream &stream, uintptr_t gfxAddress, size_t blockSize, uint64_t physAddress, uint64_t additionalBits, const NEO::AubHelper &aubHelper); static void fixupLRC(uint8_t *pLrc); }; template struct AubPageTableHelper64 : public AubPageTableHelper, PageTableTraits<48> { typedef AubPageTableHelper BaseClass; static inline uint64_t getPML4Address(uint64_t pml4Index) { return pml4BaseAddress + pml4Index * sizeof(uint64_t); } static void createContext(typename Traits::Stream &stream, uint32_t context); static uint64_t reserveAddressPPGTT(typename Traits::Stream &stream, uintptr_t gfxAddress, size_t blockSize, uint64_t physAddress, uint64_t additionalBits, const NEO::AubHelper &aubHelper); static void fixupLRC(uint8_t *pLrc); }; template struct AubDump : public std::conditional, AubPageTableHelper64>::type { using Traits = TraitsIn; using AddressType = typename std::conditional::type; using BaseHelper = typename std::conditional, AubPageTableHelper64>::type; using Stream = typename Traits::Stream; typedef union _MiContextDescriptorReg_ { struct { uint64_t Valid : 1; //[0] uint64_t ForcePageDirRestore : 1; //[1] uint64_t ForceRestore : 1; //[2] uint64_t Legacy : 1; //[3] uint64_t ADor64bitSupport : 1; //[4] Selects 64-bit PPGTT in Legacy mode uint64_t LlcCoherencySupport : 1; //[5] uint64_t FaultSupport : 2; //[7:6] uint64_t PrivilegeAccessOrPPGTT : 1; //[8] Selects PPGTT in Legacy mode uint64_t FunctionType : 3; //[11:9] uint64_t LogicalRingCtxAddress : 20; //[31:12] uint64_t ContextID : 32; //[63:32] } sData; uint32_t ulData[2]; uint64_t qwordData[2 / 2]; } MiContextDescriptorReg, *pMiContextDescriptorReg; // Write a block of memory to a given address space using an optional hint static void addMemoryWrite(Stream &stream, uint64_t addr, const void *memory, size_t blockSize, int addressSpace, int hint = DataTypeHintValues::TraceNotype); static uint64_t reserveAddressGGTT(Stream &stream, uint32_t addr, size_t size, uint64_t physStart, AubGTTData data); static uint64_t reserveAddressGGTT(Stream &stream, const void *memory, size_t size, uint64_t physStart, AubGTTData data); static void reserveAddressGGTTAndWriteMmeory(Stream &stream, uintptr_t gfxAddress, const void *memory, uint64_t physAddress, size_t size, size_t offset, uint64_t additionalBits, const NEO::AubHelper &aubHelper); static void setGttEntry(MiGttEntry &entry, uint64_t address, AubGTTData data); private: static uint64_t reserveAddress(Stream &stream, uint32_t addr, size_t size, unsigned int addressSpace /* = AddressSpaceValues::TraceGttEntry*/, uint64_t physStart, AubGTTData data); }; struct LrcaHelper { LrcaHelper(uint32_t base) : mmioBase(base) { } int aubHintLRCA = DataTypeHintValues::TraceNotype; int aubHintCommandBuffer = DataTypeHintValues::TraceCommandBuffer; int aubHintBatchBuffer = DataTypeHintValues::TraceBatchBuffer; const char *name = "XCS"; uint32_t mmioBase = 0; size_t sizeLRCA = 0x2000; uint32_t alignLRCA = 0x1000; uint32_t offsetContext = 0x1000; uint32_t offsetLRI0 = 0x01 * sizeof(uint32_t); uint32_t numRegsLRI0 = 14; uint32_t numNoops0 = 3; uint32_t offsetLRI1 = offsetLRI0 + (1 + numRegsLRI0 * 2 + numNoops0) * sizeof(uint32_t); //offsetLRI == 0x21 * sizeof(uint32_t); uint32_t numRegsLRI1 = 9; uint32_t numNoops1 = 13; uint32_t offsetLRI2 = offsetLRI1 + (1 + numRegsLRI1 * 2 + numNoops1) * sizeof(uint32_t); //offsetLR2 == 0x41 * sizeof(uint32_t); uint32_t numRegsLRI2 = 1; uint32_t offsetRingRegisters = offsetLRI0 + (3 * sizeof(uint32_t)); uint32_t offsetRingHead = 0x0 * sizeof(uint32_t); uint32_t offsetRingTail = 0x2 * sizeof(uint32_t); uint32_t offsetRingBase = 0x4 * sizeof(uint32_t); uint32_t offsetRingCtrl = 0x6 * sizeof(uint32_t); uint32_t offsetPageTableRegisters = offsetLRI1 + (3 * sizeof(uint32_t)); uint32_t offsetPDP0 = 0xc * sizeof(uint32_t); uint32_t offsetPDP1 = 0x8 * sizeof(uint32_t); uint32_t offsetPDP2 = 0x4 * sizeof(uint32_t); uint32_t offsetPDP3 = 0x0 * sizeof(uint32_t); void initialize(void *pLRCIn) const; void setRingHead(void *pLRCIn, uint32_t ringHead) const; void setRingTail(void *pLRCIn, uint32_t ringTail) const; void setRingBase(void *pLRCIn, uint32_t ringBase) const; void setRingCtrl(void *pLRCIn, uint32_t ringCtrl) const; void setPDP0(void *pLRCIn, uint64_t address) const; void setPDP1(void *pLRCIn, uint64_t address) const; void setPDP2(void *pLRCIn, uint64_t address) const; void setPDP3(void *pLRCIn, uint64_t address) const; void setPML4(void *pLRCIn, uint64_t address) const; MOCKABLE_VIRTUAL void setContextSaveRestoreFlags(uint32_t &value) const; }; struct LrcaHelperRcs : public LrcaHelper { LrcaHelperRcs(uint32_t base) : LrcaHelper(base) { aubHintLRCA = DataTypeHintValues::TraceLogicalRingContextRcs; aubHintCommandBuffer = DataTypeHintValues::TraceCommandBufferPrimary; aubHintBatchBuffer = DataTypeHintValues::TraceBatchBufferPrimary; sizeLRCA = 0x11000; name = "RCS"; } }; struct LrcaHelperBcs : public LrcaHelper { LrcaHelperBcs(uint32_t base) : LrcaHelper(base) { aubHintLRCA = DataTypeHintValues::TraceLogicalRingContextBcs; aubHintCommandBuffer = DataTypeHintValues::TraceCommandBufferBlt; aubHintBatchBuffer = DataTypeHintValues::TraceBatchBufferBlt; name = "BCS"; } }; struct LrcaHelperVcs : public LrcaHelper { LrcaHelperVcs(uint32_t base) : LrcaHelper(base) { aubHintLRCA = DataTypeHintValues::TraceLogicalRingContextVcs; aubHintCommandBuffer = DataTypeHintValues::TraceCommandBufferMfx; aubHintBatchBuffer = DataTypeHintValues::TraceBatchBufferMfx; name = "VCS"; } }; struct LrcaHelperVecs : public LrcaHelper { LrcaHelperVecs(uint32_t base) : LrcaHelper(base) { aubHintLRCA = DataTypeHintValues::TraceLogicalRingContextVecs; name = "VECS"; } }; struct LrcaHelperCcs : public LrcaHelper { LrcaHelperCcs(uint32_t base) : LrcaHelper(base) { aubHintLRCA = DataTypeHintValues::TraceLogicalRingContextCcs; name = "CCS"; } }; extern const uint64_t g_pageMask; extern const size_t g_dwordCountMax; } // namespace AubMemDump compute-runtime-20.13.16352/opencl/source/aub_mem_dump/aub_mem_dump.inl000066400000000000000000000330331363734646600257270ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/memory_constants.h" #include "opencl/source/aub/aub_helper.h" #include "aub_mem_dump.h" #include #include namespace AubMemDump { template void AubPageTableHelper32::fixupLRC(uint8_t *pLRC) { uint32_t pdAddress; pdAddress = BaseClass::getPDEAddress(0x600) >> 32; *(uint32_t *)(pLRC + 0x1094) = pdAddress; pdAddress = BaseClass::getPDEAddress(0x600) & 0xffffffff; *(uint32_t *)(pLRC + 0x109c) = pdAddress; pdAddress = BaseClass::getPDEAddress(0x400) >> 32; *(uint32_t *)(pLRC + 0x10a4) = pdAddress; pdAddress = BaseClass::getPDEAddress(0x400) & 0xffffffff; *(uint32_t *)(pLRC + 0x10ac) = pdAddress; pdAddress = BaseClass::getPDEAddress(0x200) >> 32; *(uint32_t *)(pLRC + 0x10b4) = pdAddress; pdAddress = BaseClass::getPDEAddress(0x200) & 0xffffffff; *(uint32_t *)(pLRC + 0x10bc) = pdAddress; pdAddress = BaseClass::getPDEAddress(0) >> 32; *(uint32_t *)(pLRC + 0x10c4) = pdAddress; pdAddress = BaseClass::getPDEAddress(0) & 0xffffffff; *(uint32_t *)(pLRC + 0x10cc) = pdAddress; } template void AubPageTableHelper64::fixupLRC(uint8_t *pLRC) { uint32_t pml4Address = getPML4Address(0) >> 32; *(uint32_t *)(pLRC + 0x10c4) = pml4Address; pml4Address = getPML4Address(0) & 0xffffffff; *(uint32_t *)(pLRC + 0x10cc) = pml4Address; } // Write a block of memory to a given address space using an optional hint template void AubDump::addMemoryWrite(typename Traits::Stream &stream, uint64_t addr, const void *memory, size_t sizeRemaining, int addressSpace, int hint) { // We can only dump a relatively small amount per CmdServicesMemTraceMemoryWrite auto sizeMemoryWriteHeader = sizeof(CmdServicesMemTraceMemoryWrite) - sizeof(CmdServicesMemTraceMemoryWrite::data); auto blockSizeMax = g_dwordCountMax * sizeof(uint32_t) - sizeMemoryWriteHeader; if (hint == CmdServicesMemTraceMemoryWrite::DataTypeHintValues::TraceLogicalRingContextRcs || hint == CmdServicesMemTraceMemoryWrite::DataTypeHintValues::TraceLogicalRingContextBcs || hint == CmdServicesMemTraceMemoryWrite::DataTypeHintValues::TraceLogicalRingContextVcs || hint == CmdServicesMemTraceMemoryWrite::DataTypeHintValues::TraceLogicalRingContextVecs) { DEBUG_BREAK_IF(sizeRemaining <= 0x10cc); uint8_t *pLRC = reinterpret_cast(const_cast(memory)); BaseHelper::fixupLRC(pLRC); } // loop to dump all of the blocks while (sizeRemaining > 0) { auto sizeThisIteration = std::min(blockSizeMax, sizeRemaining); stream.writeMemory(addr, memory, sizeThisIteration, addressSpace, hint); sizeRemaining -= sizeThisIteration; memory = (uint8_t *)memory + sizeThisIteration; addr += sizeThisIteration; } } // Reserve memory in the GGTT. template uint64_t AubDump::reserveAddress(typename Traits::Stream &stream, uint32_t addr, size_t size, unsigned int addressSpace, uint64_t physStart, AubGTTData data) { auto startPage = addr & g_pageMask; auto endPage = (addr + size - 1) & g_pageMask; auto numPages = (uint32_t)(((endPage - startPage) / 4096) + 1); // Can only handle 16 bits of dwordCount. DEBUG_BREAK_IF(!(numPages > 0 && (numPages + 4) < 65536)); auto gttTableOffset = static_cast((((uint32_t)startPage) / 4096) * sizeof(MiGttEntry)); // Write header { typedef AubMemDump::CmdServicesMemTraceMemoryWrite CmdServicesMemTraceMemoryWrite; stream.writeMemoryWriteHeader(gttTableOffset, numPages * sizeof(AubMemDump::MiGttEntry), addressSpace, CmdServicesMemTraceMemoryWrite::DataTypeHintValues::TraceNotype); } uint64_t physAddress = physStart; while (startPage <= endPage) { MiGttEntry entry; setGttEntry(entry, physAddress, data); stream.writeGTT(gttTableOffset, entry.uiData); gttTableOffset += sizeof(entry); physAddress += 4096; startPage += 4096; } return physStart; } template uint64_t AubDump::reserveAddressGGTT(typename Traits::Stream &stream, uint32_t addr, size_t size, uint64_t physStart, AubGTTData data) { return AubDump::reserveAddress(stream, addr, size, AddressSpaceValues::TraceGttEntry, physStart, data); } template uint64_t AubDump::reserveAddressGGTT(typename Traits::Stream &stream, const void *memory, size_t size, uint64_t physStart, AubGTTData data) { auto gfxAddress = BaseHelper::ptrToGGTT(memory); return AubDump::reserveAddress(stream, gfxAddress, size, AddressSpaceValues::TraceGttEntry, physStart, data); } template void AubDump::reserveAddressGGTTAndWriteMmeory(typename Traits::Stream &stream, uintptr_t gfxAddress, const void *memory, uint64_t physAddress, size_t size, size_t offset, uint64_t additionalBits, const NEO::AubHelper &aubHelper) { auto vmAddr = (gfxAddress + offset) & ~(MemoryConstants::pageSize - 1); auto pAddr = physAddress & ~(MemoryConstants::pageSize - 1); AubDump::reserveAddressPPGTT(stream, vmAddr, MemoryConstants::pageSize, pAddr, additionalBits, aubHelper); int hint = NEO::AubHelper::getMemTrace(additionalBits); AubDump::addMemoryWrite(stream, physAddress, reinterpret_cast(reinterpret_cast(memory) + offset), size, hint); } template void AubDump::setGttEntry(MiGttEntry &entry, uint64_t address, AubGTTData data) { entry.uiData = 0; entry.pageConfig.PhysicalAddress = address / 4096; entry.pageConfig.Present = data.present; entry.pageConfig.LocalMemory = data.localMemory; } template uint64_t AubPageTableHelper32::reserveAddressPPGTT(typename Traits::Stream &stream, uintptr_t gfxAddress, size_t blockSize, uint64_t physAddress, uint64_t additionalBits, const NEO::AubHelper &aubHelper) { auto startAddress = gfxAddress; auto endAddress = gfxAddress + blockSize - 1; auto startPTE = startAddress >> 12; auto endPTE = endAddress >> 12; auto numPTEs = endPTE - startPTE + 1; auto startPDE = startPTE >> 9; auto endPDE = endPTE >> 9; auto numPDEs = endPDE - startPDE + 1; // Process the PD entries bool writePDE = true; if (writePDE) { auto startAddress = BaseClass::getPDEAddress(startPDE); auto addressSpace = aubHelper.getMemTraceForPdEntry(); auto hint = aubHelper.getDataHintForPdEntry(); stream.writeMemoryWriteHeader(startAddress, numPDEs * sizeof(uint64_t), addressSpace, hint); auto currPDE = startPDE; auto physPage = BaseClass::getPTEAddress(startPTE) & g_pageMask; while (currPDE <= endPDE) { auto pde = physPage | NEO::AubHelper::getPTEntryBits(additionalBits); stream.writePTE(startAddress, pde, addressSpace); startAddress += sizeof(pde); physPage += 4096; currPDE++; } } // Process the PT entries bool writePTE = true; if (writePTE) { auto startAddress = BaseClass::getPTEAddress(startPTE); auto addressSpace = aubHelper.getMemTraceForPtEntry(); auto hint = aubHelper.getDataHintForPtEntry(); stream.writeMemoryWriteHeader(startAddress, numPTEs * sizeof(uint64_t), addressSpace, hint); auto currPTE = startPTE; auto physPage = physAddress & g_pageMask; while (currPTE <= endPTE) { auto pte = physPage | additionalBits; stream.writePTE(startAddress, pte, addressSpace); startAddress += sizeof(pte); physPage += 4096; currPTE++; } } return physAddress; } template uint64_t AubPageTableHelper64::reserveAddressPPGTT(typename Traits::Stream &stream, uintptr_t gfxAddress, size_t blockSize, uint64_t physAddress, uint64_t additionalBits, const NEO::AubHelper &aubHelper) { auto startAddress = gfxAddress; auto endAddress = gfxAddress + blockSize - 1; auto startPTE = startAddress >> 12; auto endPTE = endAddress >> 12; auto numPTEs = endPTE - startPTE + 1; auto startPDE = startPTE >> 9; auto endPDE = endPTE >> 9; auto numPDEs = endPDE - startPDE + 1; auto startPDP = startPDE >> 9; auto endPDP = endPDE >> 9; auto numPDPs = endPDP - startPDP + 1; auto startPML4 = startPDP >> 9; auto endPML4 = endPDP >> 9; auto numPML4s = endPML4 - startPML4 + 1; // Process the PML4 entries bool writePML4 = true; if (writePML4) { auto startAddress = getPML4Address(startPML4); auto addressSpace = aubHelper.getMemTraceForPml4Entry(); auto hint = aubHelper.getDataHintForPml4Entry(); stream.writeMemoryWriteHeader(startAddress, numPML4s * sizeof(uint64_t), addressSpace, hint); auto currPML4 = startPML4; auto physPage = BaseClass::getPDPAddress(startPDP) & g_pageMask; while (currPML4 <= endPML4) { auto pml4 = physPage | NEO::AubHelper::getPTEntryBits(additionalBits); stream.writePTE(startAddress, pml4, addressSpace); startAddress += sizeof(pml4); physPage += 4096; currPML4++; } } // Process the PDP entries bool writePDPE = true; if (writePDPE) { auto startAddress = BaseClass::getPDPAddress(startPDP); auto addressSpace = aubHelper.getMemTraceForPdpEntry(); auto hint = aubHelper.getDataHintForPdpEntry(); stream.writeMemoryWriteHeader(startAddress, numPDPs * sizeof(uint64_t), addressSpace, hint); auto currPDP = startPDP; auto physPage = BaseClass::getPDEAddress(startPDE) & g_pageMask; while (currPDP <= endPDP) { auto pdp = physPage | NEO::AubHelper::getPTEntryBits(additionalBits); stream.writePTE(startAddress, pdp, addressSpace); startAddress += sizeof(pdp); physPage += 4096; currPDP++; } } // Process the PD entries bool writePDE = true; if (writePDE) { auto startAddress = BaseClass::getPDEAddress(startPDE); auto addressSpace = aubHelper.getMemTraceForPdEntry(); auto hint = aubHelper.getDataHintForPdEntry(); stream.writeMemoryWriteHeader(startAddress, numPDEs * sizeof(uint64_t), addressSpace, hint); auto currPDE = startPDE; auto physPage = BaseClass::getPTEAddress(startPTE) & g_pageMask; while (currPDE <= endPDE) { auto pde = physPage | NEO::AubHelper::getPTEntryBits(additionalBits); stream.writePTE(startAddress, pde, addressSpace); startAddress += sizeof(pde); physPage += 4096; currPDE++; } } // Process the PT entries bool writePTE = true; if (writePTE) { auto startAddress = BaseClass::getPTEAddress(startPTE); auto addressSpace = aubHelper.getMemTraceForPtEntry(); auto hint = aubHelper.getDataHintForPtEntry(); stream.writeMemoryWriteHeader(startAddress, numPTEs * sizeof(uint64_t), addressSpace, hint); auto currPTE = startPTE; auto physPage = physAddress & g_pageMask; while (currPTE <= endPTE) { auto pte = physPage | additionalBits; stream.writePTE(startAddress, pte, addressSpace); startAddress += sizeof(pte); physPage += 4096; currPTE++; } } return physAddress; } template void AubPageTableHelper32::createContext(typename Traits::Stream &stream, uint32_t context) { AubPpgttContextCreate cmd; memset(&cmd, 0, sizeof(cmd)); cmd.Header.Type = 0x7; cmd.Header.Opcode = 0x1; cmd.Header.SubOp = 0x14; cmd.Header.DwordLength = ((sizeof(cmd) - sizeof(cmd.Header)) / sizeof(uint32_t)) - 1; cmd.Handle = context; cmd.AdvancedContext = false; cmd.SixtyFourBit = 0; cmd.PageDirPointer[0] = BaseClass::getPDEAddress(0x000); cmd.PageDirPointer[1] = BaseClass::getPDEAddress(0x200); cmd.PageDirPointer[2] = BaseClass::getPDEAddress(0x400); cmd.PageDirPointer[3] = BaseClass::getPDEAddress(0x600); stream.createContext(cmd); } template void AubPageTableHelper64::createContext(typename Traits::Stream &stream, uint32_t context) { AubPpgttContextCreate cmd; memset(&cmd, 0, sizeof(cmd)); cmd.Header.Type = 0x7; cmd.Header.Opcode = 0x1; cmd.Header.SubOp = 0x14; cmd.Header.DwordLength = ((sizeof(cmd) - sizeof(cmd.Header)) / sizeof(uint32_t)) - 1; cmd.Handle = context; cmd.AdvancedContext = false; cmd.SixtyFourBit = 1; cmd.PageDirPointer[0] = getPML4Address(0); stream.createContext(cmd); } } // namespace AubMemDump compute-runtime-20.13.16352/opencl/source/aub_mem_dump/aub_services.h000066400000000000000000001156401363734646600254210ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "aub_header.h" #include #ifndef WIN32 #pragma pack(4) #else #pragma pack(push, 4) #endif struct CmdServicesMemTraceVersion { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; uint32_t memtraceFileVersion; struct { uint32_t metal : 3; uint32_t stepping : 5; uint32_t device : 8; uint32_t csxSwizzling : 2; uint32_t recordingMethod : 2; uint32_t pch : 8; uint32_t captureTool : 4; }; uint32_t primaryVersion; uint32_t secondaryVersion; char commandLine[4]; int32_t getCommandLineLength() const { return getPacketSize() - (5); } int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 4; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0xe) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0xe; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0xe; } struct CaptureToolValues { enum { GenKmdCapture = 1, Aubload = 0, Amber = 3, Ghal3DUlt = 2, AubDump = 4 }; }; struct DeviceValues { enum { Blc = 2, Il = 5, Glk = 17, Skl = 12, Hsw = 9, Bxt = 14, Sbr = 6, Cnl = 15, Ivb = 7, Chv = 13, El = 4, Ctg = 3, Lrb2 = 8, Bwr = 0, Vlv = 10, Cln = 1, Kbl = 16, Bdw = 11, Icllp = 19, Cfl = 24, Lkf = 25, Ehl = 28, Tgllp = 22 }; }; struct RecordingMethodValues { enum { Phy = 1, Gfx = 0 }; }; struct CsxSwizzlingValues { enum { Disabled = 0, Enabled = 1 }; }; struct PchValues { enum { LynxPoint = 4, CougarPoint = 2, PantherPoint = 3, Default = 0, IbexPeak = 1 }; }; struct SteppingValues { enum { N = 13, O = 14, L = 11, M = 12, B = 1, C = 2, A = 0, F = 5, G = 6, D = 3, E = 4, Z = 25, X = 23, Y = 24, R = 17, S = 18, P = 15, Q = 16, V = 21, W = 22, T = 19, U = 20, J = 9, K = 10, H = 7, I = 8 }; }; }; struct CmdServicesMemTraceRegisterCompare { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; uint32_t registerOffset; struct { uint32_t noReadExpect : 1; uint32_t : 15; uint32_t registerSize : 4; uint32_t : 8; uint32_t registerSpace : 4; }; uint32_t readMaskLow; uint32_t readMaskHigh; uint32_t data[1]; int32_t getDataLength() const { return getPacketSize() - (5); } int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 4; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0x1) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0x1; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0x1; } struct RegisterSpaceValues { enum { MchBar = 1, Mmio = 0, VtdBar = 5, PciConfig = 2, IO = 4, AzaliaBar = 3 }; }; struct RegisterSizeValues { enum { Qword = 3, Dword = 2, Word = 1, Byte = 0 }; }; struct NoReadExpectValues { enum { ReadExpect = 0, ReadWithoutExpect = 1 }; }; }; struct CmdServicesMemTraceRegisterPoll { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; uint32_t registerOffset; struct { uint32_t : 1; uint32_t timeoutAction : 1; uint32_t pollNotEqual : 1; uint32_t : 1; uint32_t operationType : 4; uint32_t : 8; uint32_t registerSize : 4; uint32_t : 8; uint32_t registerSpace : 4; }; uint32_t pollMaskLow; uint32_t pollMaskHigh; uint32_t data[1]; int32_t getDataLength() const { return getPacketSize() - (5); } int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 4; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0x2) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0x2; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0x2; } struct OperationTypeValues { enum { Normal = 0, InterlacedCrc = 1 }; }; struct RegisterSpaceValues { enum { MchBar = 1, Mmio = 0, VtdBar = 5, PciConfig = 2, IO = 4, AzaliaBar = 3 }; }; struct TimeoutActionValues { enum { Abort = 0, Ignore = 1 }; }; struct RegisterSizeValues { enum { Qword = 3, Dword = 2, Word = 1, Byte = 0 }; }; }; struct CmdServicesMemTraceRegisterWrite { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; uint32_t registerOffset; struct { uint32_t : 4; uint32_t messageSourceId : 4; uint32_t : 8; uint32_t registerSize : 4; uint32_t : 8; uint32_t registerSpace : 4; }; uint32_t writeMaskLow; uint32_t writeMaskHigh; uint32_t data[1]; int32_t getDataLength() const { return getPacketSize() - (5); } int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 4; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0x3) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0x3; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0x3; } struct MessageSourceIdValues { enum { Workaround = 4, Gt = 2, Ia = 0, Me = 1, Pch = 3 }; }; struct RegisterSpaceValues { enum { MchBar = 1, Mmio = 0, VtdBar = 5, PciConfig = 2, IO = 4, AzaliaBar = 3 }; }; struct RegisterSizeValues { enum { Qword = 3, Dword = 2, Word = 1, Byte = 0 }; }; }; struct CmdServicesMemTraceMemoryCompare { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; uint32_t address; uint32_t addressHigh; struct { uint32_t noReadExpect : 1; uint32_t repeatMemory : 1; uint32_t tiling : 2; uint32_t : 2; uint32_t crcCompare : 1; uint32_t compareOperation : 1; uint32_t : 12; uint32_t dataTypeHint : 8; uint32_t addressSpace : 4; }; uint32_t dataSizeInBytes; uint32_t data[1]; int32_t getDataLength() const { return getPacketSize() - (5); } int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 4; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0x4) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0x4; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0x4; } struct RepeatMemoryValues { enum { NoRepeat = 0, Repeat = 1 }; }; struct DataTypeHintValues { enum { TraceInterfaceDescriptor = 29, TraceCommandBufferPrimary = 39, TraceRemap = 37, TraceVertexShaderState = 16, TraceSfViewport = 23, TraceMediaObjectIndirectData = 36, Trace1DMap = 10, TraceVolumeMap = 9, TraceVldState = 30, TraceBatchBufferPrimary = 42, TraceSamplerDefaultColor = 28, TraceClipViewport = 22, TraceStripsFansState = 19, TraceNotype = 0, TraceAudioLinkTable = 46, TraceGeometryShaderState = 17, TraceConstantBuffer = 11, TraceBatchBufferBlt = 43, TraceBinBuffer = 2, TraceIndexBuffer = 13, Trace2DMap = 6, TraceCubeMap = 7, TraceVfeState = 31, TraceDepthStencilState = 33, TraceBatchBufferMfx = 44, TraceRenderSurfaceState = 35, TraceWindowerIzState = 20, TraceCommandBufferMfx = 41, TraceBatchBuffer = 1, TraceCcViewport = 24, TraceColorCalcState = 21, TraceCommandBuffer = 38, TraceAudioData = 47, TraceSlowStateBuffer = 4, TraceAudioCommandBuffer = 45, TraceCommandBufferBlt = 40, TraceKernelInstructions = 26, TraceConstantUrbEntry = 12, TraceBlendState = 32, TraceIndirectStateBuffer = 8, TraceClipperState = 18, TraceSamplerState = 25, TraceBindingTableState = 34, TraceBinPointerList = 3, TraceVertexBufferState = 5, TraceScratchSpace = 27 }; }; struct TilingValues { enum { NoTiling = 0, WTiling = 3, YTiling = 2, XTiling = 1 }; }; struct CrcCompareValues { enum { Crc = 1, NoCrc = 0 }; }; struct NoReadExpectValues { enum { ReadExpect = 0, ReadWithoutExpect = 1 }; }; struct AddressSpaceValues { enum { TraceGttEntry = 4, TraceNonapetureGttGfx = 7, TraceLocal = 1, TracePml4Entry = 10, TraceGttGfx = 0, TraceNonlocal = 2, TraceGttPdEntry = 3, TracePpgttEntry = 6, TracePpgttGfx = 5, TracePpgttPdEntry = 9, TracePhysicalPdpEntry = 8 }; }; struct CompareOperationValues { enum { CompareNotEqual = 1, CompareEqual = 0 }; }; }; struct CmdServicesMemTraceMemoryPoll { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; uint32_t address; uint32_t addressHigh; struct { uint32_t pollNotEqual : 1; uint32_t : 1; uint32_t tiling : 2; uint32_t dataSize : 2; uint32_t : 2; uint32_t timeoutAction : 1; uint32_t : 11; uint32_t dataTypeHint : 8; uint32_t addressSpace : 4; }; uint32_t pollMaskLow; uint32_t pollMaskHigh; uint32_t data[1]; int32_t getDataLength() const { return getPacketSize() - (6); } int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 5; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0x5) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0x5; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0x5; } struct DataTypeHintValues { enum { TraceInterfaceDescriptor = 29, TraceCommandBufferPrimary = 39, TraceRemap = 37, TraceVertexShaderState = 16, TraceSfViewport = 23, TraceMediaObjectIndirectData = 36, Trace1DMap = 10, TraceVolumeMap = 9, TraceVldState = 30, TraceBatchBufferPrimary = 42, TraceSamplerDefaultColor = 28, TraceClipViewport = 22, TraceStripsFansState = 19, TraceNotype = 0, TraceAudioLinkTable = 46, TraceGeometryShaderState = 17, TraceConstantBuffer = 11, TraceBatchBufferBlt = 43, TraceBinBuffer = 2, TraceIndexBuffer = 13, Trace2DMap = 6, TraceCubeMap = 7, TraceVfeState = 31, TraceDepthStencilState = 33, TraceBatchBufferMfx = 44, TraceRenderSurfaceState = 35, TraceWindowerIzState = 20, TraceCommandBufferMfx = 41, TraceBatchBuffer = 1, TraceCcViewport = 24, TraceColorCalcState = 21, TraceCommandBuffer = 38, TraceAudioData = 47, TraceSlowStateBuffer = 4, TraceAudioCommandBuffer = 45, TraceCommandBufferBlt = 40, TraceKernelInstructions = 26, TraceConstantUrbEntry = 12, TraceBlendState = 32, TraceIndirectStateBuffer = 8, TraceClipperState = 18, TraceSamplerState = 25, TraceBindingTableState = 34, TraceBinPointerList = 3, TraceVertexBufferState = 5, TraceScratchSpace = 27 }; }; struct DataSizeValues { enum { Qword = 3, Dword = 2, Word = 1, Byte = 0 }; }; struct TilingValues { enum { NoTiling = 0, WTiling = 3, YTiling = 2, XTiling = 1 }; }; struct TimeoutActionValues { enum { Abort = 0, Ignore = 1 }; }; struct AddressSpaceValues { enum { TraceGttEntry = 4, TraceNonapetureGttGfx = 7, TraceLocal = 1, TracePml4Entry = 10, TraceGttGfx = 0, TraceNonlocal = 2, TraceGttPdEntry = 3, TracePpgttEntry = 6, TracePpgttGfx = 5, TracePpgttPdEntry = 9, TracePhysicalPdpEntry = 8 }; }; }; struct CmdServicesMemTraceMemoryWrite { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; uint64_t address; int32_t getAddressLength() const { return 2 - (1) + 1; } struct { uint32_t frontDoorAccess : 1; uint32_t repeatMemory : 1; uint32_t tiling : 2; uint32_t : 16; uint32_t dataTypeHint : 8; uint32_t addressSpace : 4; }; uint32_t dataSizeInBytes; uint32_t data[1]; int32_t getDataLength() const { return getPacketSize() - (5); } int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 4; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0x6) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0x6; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0x6; } struct RepeatMemoryValues { enum { NoRepeat = 0, Repeat = 1 }; }; struct DataTypeHintValues { enum { TraceVertexBufferState = 5, TraceCommandBufferPrimary = 39, TraceVertexShaderState = 16, TraceExtendedRootTableEntry = 52, TraceClipViewport = 22, Trace1DMap = 10, TraceBatchBufferPrimary = 42, TraceClipperState = 18, TraceLogicalRingContextVecs = 51, TraceRingContextVcs = 57, TraceLri = 59, TraceBlendState = 32, TraceBinBuffer = 2, TraceSlowStateBuffer = 4, TraceRemap = 37, TraceDepthStencilState = 33, TraceAudioData = 47, TraceDummyGgttEntry = 62, TraceWindowerIzState = 20, Trace2DMap = 6, TraceBindingTableState = 34, TraceGucProcessDescriptor = 60, TraceIndirectStateBuffer = 8, TraceConstantBuffer = 11, TraceMediaObjectIndirectData = 36, TraceStripsFansState = 19, TraceBatchBuffer = 1, TraceLogicalRingContextVcs = 50, TraceSfViewport = 23, TraceCommandBufferBlt = 40, TraceRingContextBcs = 56, TraceCcViewport = 24, TraceLogicalRingContextCcs = 64, TraceIndexBuffer = 13, TraceScratchSpace = 27, TraceGucContextDescriptor = 61, TraceBatchBufferMfx = 44, TraceCommandBufferMfx = 41, TraceBatchBufferBlt = 43, TraceSamplerState = 25, TraceRingContextRcs = 55, TraceAudioLinkTable = 46, TraceRenderSurfaceState = 35, TraceSamplerDefaultColor = 28, TraceVldState = 30, TraceVfeState = 31, TraceExtendedContextTableEntry = 53, TraceLogicalRingContextRcs = 48, TraceInterfaceDescriptor = 29, TraceConstantUrbEntry = 12, TraceCommandBuffer = 38, TracePasidTableEntry = 54, TraceBinPointerList = 3, TraceRingContextVecs = 58, TraceNotype = 0, TraceGeometryShaderState = 17, TraceAudioCommandBuffer = 45, TraceColorCalcState = 21, TraceKernelInstructions = 26, TraceVolumeMap = 9, TraceCubeMap = 7, TraceLogicalRingContextBcs = 49 }; }; struct TilingValues { enum { NoTiling = 0, WTiling = 3, YTiling = 2, XTiling = 1 }; }; struct AddressSpaceValues { enum { TraceGttEntry = 4, TraceNonapetureGttGfx = 7, TraceLocal = 1, TracePml4Entry = 10, TraceGttGfx = 0, TraceNonlocal = 2, TraceGttPdEntry = 3, TracePpgttEntry = 6, TracePpgttGfx = 5, TracePpgttPdEntry = 9, TracePowerContext = 11, TracePhysicalPdpEntry = 8 }; }; }; struct CmdServicesMemTraceMemoryWriteDiscontiguous { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; struct { uint32_t frontDoorAccess : 1; uint32_t repeatMemory : 1; uint32_t tiling : 2; uint32_t numberOfAddressDataPairs : 16; uint32_t dataTypeHint : 8; uint32_t addressSpace : 4; }; struct { uint64_t address; uint32_t dataSizeInBytes; } Dword_2_To_190[63]; int32_t getDword2To190Length() const { return 190 - (2) + 1; } uint32_t data[1]; int32_t getDataLength() const { return getPacketSize() - (191); } int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 190; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0xb) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0xb; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0xb; } struct RepeatMemoryValues { enum { NoRepeat = 0, Repeat = 1 }; }; struct DataTypeHintValues { enum { TraceVertexBufferState = 5, TraceCommandBufferPrimary = 39, TraceRingContextBcs = 56, TraceExtendedRootTableEntry = 52, TraceClipViewport = 22, Trace1DMap = 10, TraceBatchBufferPrimary = 42, TraceClipperState = 18, TraceRingContextVcs = 57, TraceVolumeMap = 9, TraceBlendState = 32, TraceSlowStateBuffer = 4, TraceRemap = 37, TraceDepthStencilState = 33, TraceAudioData = 47, TraceColorCalcState = 21, TraceWindowerIzState = 20, Trace2DMap = 6, TraceBindingTableState = 34, TraceIndirectStateBuffer = 8, TraceConstantBuffer = 11, TraceMediaObjectIndirectData = 36, TraceStripsFansState = 19, TraceBatchBuffer = 1, TraceSfViewport = 23, TraceCommandBufferBlt = 40, TraceBinBuffer = 2, TraceCcViewport = 24, TraceIndexBuffer = 13, TraceScratchSpace = 27, TraceLogicalRingContextVecs = 51, TraceBatchBufferMfx = 44, TraceCommandBufferMfx = 41, TraceBatchBufferBlt = 43, TraceSamplerState = 25, TraceRingContextRcs = 55, TraceAudioLinkTable = 46, TraceRenderSurfaceState = 35, TraceSamplerDefaultColor = 28, TraceVldState = 30, TraceVfeState = 31, TraceExtendedContextTableEntry = 53, TraceLogicalRingContextRcs = 48, TraceInterfaceDescriptor = 29, TraceConstantUrbEntry = 12, TraceCommandBuffer = 38, TraceVertexShaderState = 16, TraceBinPointerList = 3, TraceRingContextVecs = 58, TraceNotype = 0, TraceGeometryShaderState = 17, TraceAudioCommandBuffer = 45, TraceLogicalRingContextVcs = 50, TraceKernelInstructions = 26, TracePasidTableEntry = 54, TraceCubeMap = 7, TraceLogicalRingContextBcs = 49 }; }; struct TilingValues { enum { NoTiling = 0, WTiling = 3, YTiling = 2, XTiling = 1 }; }; struct AddressSpaceValues { enum { TraceGttEntry = 4, TraceNonapetureGttGfx = 7, TraceLocal = 1, TracePml4Entry = 10, TraceGttGfx = 0, TraceNonlocal = 2, TraceGttPdEntry = 3, TracePpgttEntry = 6, TracePpgttGfx = 5, TracePpgttPdEntry = 9, TracePowerContext = 11, TracePhysicalPdpEntry = 8 }; }; }; struct CmdServicesMemTraceFrameBegin { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; struct { uint32_t frameNumber : 16; uint32_t : 16; }; int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 1; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0x7) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0x7; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0x7; } }; struct CmdServicesMemTraceComment { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; struct { uint32_t syncOnComment : 1; uint32_t syncOnSimulatorDisplay : 1; uint32_t : 30; }; char comment[4]; int32_t getCommentLength() const { return getPacketSize() - (2); } int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 1; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0x8) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0x8; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0x8; } }; struct CmdServicesMemTraceDelay { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; uint32_t time; int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 1; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0x9) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0x9; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0x9; } }; struct CmdServicesMemTraceMemoryDump { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; uint32_t physicalAddressDwordLow; uint32_t physicalAddressDwordHigh; uint32_t stride; uint32_t width; uint32_t height; struct { uint32_t addressSpace : 2; uint32_t : 2; uint32_t tiling : 2; uint32_t : 26; }; char filename[4]; int32_t getFilenameLength() const { return getPacketSize() - (7); } int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 5; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0xa) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0xa; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0xa; } struct TilingValues { enum { NoTiling = 0, WTiling = 3, YTiling = 2, XTiling = 1 }; }; struct AddressSpaceValues { enum { TraceGttGfx = 0, TraceLocal = 1 }; }; }; struct CmdServicesMemTraceTestPhaseMarker { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; struct { uint32_t toolSpecificSubPhase : 12; uint32_t beginTestPhase : 4; uint32_t : 16; }; int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 1; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0xc) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0xc; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0xc; } struct BeginTestPhaseValues { enum { PollForTestCompletion = 8, SetupPhase = 2, DispatchPhase = 3, VerificationPhase = 10, MemoryInitializationPhase = 0, ExecutePhase = 4 }; }; }; struct CmdServicesMemTraceMemoryContinuousRegion { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; uint64_t address; int32_t getAddressLength() const { return 2 - (1) + 1; } uint64_t regionSize; int32_t getRegionSizeLength() const { return 4 - (3) + 1; } int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 4; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0xd) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0xd; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0xd; } }; struct CmdServicesMemTracePredicate { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; struct { uint32_t predicateState : 1; uint32_t target : 4; uint32_t : 27; }; int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 4; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0xf) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0xf; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0xf; } struct PredicateStateValues { enum { Disabled = 0, Enabled = 1 }; }; struct TargetValues { enum { Fpgarunlist = 8, Simulator = 0, Pipe = 1, Silicon = 4, Uncore = 6, Emulator = 3, Pipe2D = 7, Fpgamedia = 5, Pipegt = 2 }; }; }; struct CmdServicesMemTraceDumpCompress { union { AubCmdHdr Header; struct { uint32_t dwordCount : 16; uint32_t instructionSubOpcode : 7; uint32_t instructionOpcode : 6; uint32_t instructionType : 3; }; }; uint64_t surfaceAddress; uint64_t getSurfaceAddress() const { return getMisalignedUint64(&this->surfaceAddress); } void setSurfaceAddress(const uint64_t surfaceAddress) { setMisalignedUint64(&this->surfaceAddress, surfaceAddress); } int getSurfaceAddressLength() const { return 2 - (1) + 1; } uint32_t surfaceWidth; uint32_t surfaceHeight; uint32_t surfacePitch; struct { uint32_t surfaceFormat : 12; uint32_t dumpType : 3; uint32_t : 1; uint32_t surfaceTilingType : 3; uint32_t : 3; uint32_t surfaceType : 3; uint32_t : 3; uint32_t tiledResourceMode : 2; uint32_t : 1; uint32_t useClearValue : 1; }; uint64_t auxSurfaceAddress; int getAuxSurfaceAddressLength() const { return 8 - (7) + 1; } uint32_t auxSurfaceWidth; uint32_t auxSurfaceHeight; uint32_t auxSurfacePitch; struct { uint32_t auxSurfaceQPitch : 17; uint32_t : 4; uint32_t auxSurfaceTilingType : 3; uint32_t : 8; }; struct { uint32_t blockWidth : 8; uint32_t blockHeight : 8; uint32_t blockDepth : 8; uint32_t mode : 1; uint32_t algorithm : 3; uint32_t : 4; }; uint32_t tileWidth; uint32_t tileHeight; uint32_t tileDepth; uint32_t clearColorRed; uint32_t clearColorGreen; uint32_t clearColorBlue; uint32_t clearColorAlpha; struct { uint32_t gttType : 2; uint32_t clearColorType : 1; uint32_t : 29; }; uint32_t directoryHandle; uint64_t clearColorAddress; int getClearColorAddressLength() const { return 24 - (23) + 1; } int32_t getPacketSize() const { return dwordCount + 1; } int32_t getLengthBias() const { return 1; } uint32_t getBaseLength() const { return 19; } bool matchesHeader() const { if (instructionType != 0x7) return false; if (instructionOpcode != 0x2e) return false; if (instructionSubOpcode != 0x10) return false; return true; } void setHeader() { instructionType = 0x7; instructionOpcode = 0x2e; instructionSubOpcode = 0x10; } static uint32_t type() { return 0x7; } static uint32_t opcode() { return 0x2e; } static uint32_t subOpcode() { return 0x10; } struct GttTypeValues { enum { Ppgtt = 1, Ggtt = 0 }; }; struct SurfaceTilingTypeValues { enum { YmajorS = 4, Xmajor = 2, YmajorF = 5, Linear = 0, Wmajor = 1, Ymajor = 3 }; }; struct ModeValues { enum { Horizontal = 1, Vertical = 0 }; }; struct ClearColorTypeValues { enum { Immediate = 0, Address = 1 }; }; struct SurfaceTypeValues { enum { SurftypeCube = 3, SurftypeStrbuf = 5, SurftypeBuffer = 4, Surftype3D = 2, Surftype2D = 1, Surftype1D = 0, SurftypeNull = 6 }; }; struct AlgorithmValues { enum { Uncompressed = 4, Astc = 1, Lossless = 2, Media = 0, Msaa = 3 }; }; struct AuxSurfaceTilingTypeValues { enum { YmajorS = 4, Xmajor = 2, YmajorF = 5, Linear = 0, Wmajor = 1, Ymajor = 3 }; }; struct DumpTypeValues { enum { Bin = 1, Png = 4, Bmp = 0, Bmp32 = 2, Tre = 3 }; }; struct TiledResourceModeValues { enum { TrmodeNone = 0, TrmodeYf = 1, TrmodeYs = 2 }; }; }; #ifndef WIN32 #pragma pack() #else #pragma pack(pop) #endif compute-runtime-20.13.16352/opencl/source/aub_mem_dump/aub_stream_stubs.cpp000066400000000000000000000020411363734646600266320ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "third_party/aub_stream/headers/aub_manager.h" #include "third_party/aub_stream/headers/aubstream.h" namespace aub_stream_stubs { uint16_t tbxServerPort = 4321; std::string tbxServerIp = "127.0.0.1"; } // namespace aub_stream_stubs namespace aub_stream { AubManager *AubManager::create(uint32_t productFamily, uint32_t devicesCount, uint64_t memoryBankSizeInGB, bool localMemorySupported, uint32_t streamMode, uint64_t gpuAddressSpace) { return nullptr; } extern "C" { void injectMMIOList(MMIOList mmioList){}; void setTbxServerPort(uint16_t port) { aub_stream_stubs::tbxServerPort = port; }; void setTbxServerIp(std::string server) { // better to avoid reassigning global variables which assume memory allocations since // we could step into false-positive memory leak detection with embedded leak check helper if (aub_stream_stubs::tbxServerIp != server) aub_stream_stubs::tbxServerIp = server; }; } } // namespace aub_stream compute-runtime-20.13.16352/opencl/source/aub_mem_dump/context_flags.cpp000066400000000000000000000004211363734646600261300ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/aub_mem_dump/aub_mem_dump.h" namespace AubMemDump { void LrcaHelper::setContextSaveRestoreFlags(uint32_t &ctxSrCtlValue) const { } } // namespace AubMemDump compute-runtime-20.13.16352/opencl/source/aub_mem_dump/page_table_entry_bits.h000066400000000000000000000005611363734646600272670ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include namespace PageTableEntry { const uint32_t presentBit = 0; const uint32_t writableBit = 1; const uint32_t userSupervisorBit = 2; const uint64_t nonValidBits = std::numeric_limits::max(); } // namespace PageTableEntry compute-runtime-20.13.16352/opencl/source/built_ins/000077500000000000000000000000001363734646600221255ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/built_ins/CMakeLists.txt000066400000000000000000000025141363734646600246670ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_BUILT_INS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aux_translation_builtin.h ${CMAKE_CURRENT_SOURCE_DIR}/builtins_dispatch_builder.cpp ${CMAKE_CURRENT_SOURCE_DIR}/builtins_dispatch_builder.h ${CMAKE_CURRENT_SOURCE_DIR}/built_in_ops_vme.h ${CMAKE_CURRENT_SOURCE_DIR}/built_ins.inl ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/unknown_built_in_name.cpp ${CMAKE_CURRENT_SOURCE_DIR}/vme_builtin.cpp ${CMAKE_CURRENT_SOURCE_DIR}/vme_builtin.h ${CMAKE_CURRENT_SOURCE_DIR}/vme_dispatch_builder.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_BUILT_INS}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_BUILT_INS ${RUNTIME_SRCS_BUILT_INS}) set(RUNTIME_SRCS_BUILT_IN_KERNELS ${CMAKE_CURRENT_SOURCE_DIR}/kernels/vme_block_advanced_motion_estimate_bidirectional_check_intel.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/vme_block_advanced_motion_estimate_check_intel.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/vme_block_motion_estimate_intel.builtin_kernel ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_BUILT_IN_KERNELS}) if(NOT (TARGET ${BUILTINS_VME_LIB_NAME})) add_subdirectory(registry) if(COMPILE_BUILT_INS) add_subdirectory(kernels) endif() endif() compute-runtime-20.13.16352/opencl/source/built_ins/aux_translation_builtin.h000066400000000000000000000074231363734646600272450ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/helpers/hw_helper.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/helpers/dispatch_info_builder.h" #include namespace NEO { template <> class BuiltInOp : public BuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, Device &device); template bool buildDispatchInfosForAuxTranslation(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const { size_t kernelInstanceNumber = 0; size_t numMemObjectsToTranslate = multiDispatchInfo.getMemObjsForAuxTranslation()->size(); resizeKernelInstances(numMemObjectsToTranslate); multiDispatchInfo.setBuiltinOpParams(operationParams); for (auto &memObj : *multiDispatchInfo.getMemObjsForAuxTranslation()) { DispatchInfoBuilder builder; size_t allocationSize = alignUp(memObj->getSize(), 512); UNRECOVERABLE_IF(builder.getMaxNumDispatches() != 1); if (kernelInstanceNumber == 0) { // Before Kernel registerPipeControlProgramming(builder.getDispatchInfo(0).dispatchInitCommands, true); } if (kernelInstanceNumber == numMemObjectsToTranslate - 1) { // After Kernel registerPipeControlProgramming(builder.getDispatchInfo(0).dispatchEpilogueCommands, false); } if (AuxTranslationDirection::AuxToNonAux == operationParams.auxTranslationDirection) { builder.setKernel(convertToNonAuxKernel[kernelInstanceNumber++].get()); } else { UNRECOVERABLE_IF(AuxTranslationDirection::NonAuxToAux != operationParams.auxTranslationDirection); builder.setKernel(convertToAuxKernel[kernelInstanceNumber++].get()); } builder.setArg(0, memObj); builder.setArg(1, memObj); size_t xGws = allocationSize / 16; builder.setDispatchGeometry(Vec3{xGws, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); builder.bake(multiDispatchInfo); } return true; } protected: using RegisteredMethodDispatcherT = RegisteredMethodDispatcher; template static void dispatchPipeControl(LinearStream &linearStream, TimestampPacketDependencies *, const HardwareInfo &) { MemorySynchronizationCommands::addPipeControl(linearStream, dcFlush); } template static size_t getSizeForSinglePipeControl(size_t, const HardwareInfo &, bool) { return MemorySynchronizationCommands::getSizeForSinglePipeControl(); } template void registerPipeControlProgramming(RegisteredMethodDispatcherT &dispatcher, bool dcFlush) const { if (dcFlush) { dispatcher.registerMethod(this->dispatchPipeControl); } else { dispatcher.registerMethod(this->dispatchPipeControl); } dispatcher.registerCommandsSizeEstimationMethod(this->getSizeForSinglePipeControl); } void resizeKernelInstances(size_t size) const; Kernel *baseKernel = nullptr; mutable std::vector> convertToNonAuxKernel; mutable std::vector> convertToAuxKernel; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/built_ins/built_in_ops_vme.h000066400000000000000000000007231363734646600256350ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "built_in_ops.h" namespace NEO { namespace EBuiltInOps { using Type = uint32_t; constexpr Type VmeBlockMotionEstimateIntel{MaxCoreValue + 1}; constexpr Type VmeBlockAdvancedMotionEstimateCheckIntel{MaxCoreValue + 2}; constexpr Type VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel{MaxCoreValue + 3}; } // namespace EBuiltInOps } // namespace NEO compute-runtime-20.13.16352/opencl/source/built_ins/built_ins.inl000066400000000000000000000027771363734646600246360ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/built_ins/aux_translation_builtin.h" #include "opencl/source/built_ins/populate_built_ins.inl" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h" namespace NEO { BuiltInOp::BuiltInOp(BuiltIns &kernelsLib, Device &device) : BuiltinDispatchInfoBuilder(kernelsLib) { BuiltinDispatchInfoBuilder::populate(device, EBuiltInOps::AuxTranslation, "", "fullCopy", baseKernel); resizeKernelInstances(5); } void BuiltInOp::resizeKernelInstances(size_t size) const { convertToNonAuxKernel.reserve(size); convertToAuxKernel.reserve(size); for (size_t i = convertToNonAuxKernel.size(); i < size; i++) { auto clonedNonAuxToAuxKernel = Kernel::create(baseKernel->getProgram(), baseKernel->getKernelInfo(), nullptr); clonedNonAuxToAuxKernel->setAuxTranslationDirection(AuxTranslationDirection::NonAuxToAux); auto clonedAuxToNonAuxKernel = Kernel::create(baseKernel->getProgram(), baseKernel->getKernelInfo(), nullptr); clonedAuxToNonAuxKernel->setAuxTranslationDirection(AuxTranslationDirection::AuxToNonAux); clonedNonAuxToAuxKernel->cloneKernel(baseKernel); clonedAuxToNonAuxKernel->cloneKernel(baseKernel); convertToAuxKernel.emplace_back(clonedNonAuxToAuxKernel); convertToNonAuxKernel.emplace_back(clonedAuxToNonAuxKernel); } } } // namespace NEO compute-runtime-20.13.16352/opencl/source/built_ins/builtins_dispatch_builder.cpp000066400000000000000000001133131363734646600300510ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "shared/source/built_ins/built_ins.h" #include "shared/source/built_ins/sip.h" #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/debug_helpers.h" #include "opencl/source/built_ins/aux_translation_builtin.h" #include "opencl/source/built_ins/built_ins.inl" #include "opencl/source/built_ins/vme_dispatch_builder.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/helpers/built_ins_helper.h" #include "opencl/source/helpers/convert_color.h" #include "opencl/source/helpers/dispatch_info_builder.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/program/program.h" #include "compiler_options.h" #include #include namespace NEO { template <> class BuiltInOp : public BuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, Device &device) : BuiltinDispatchInfoBuilder(kernelsLib) { populate(device, EBuiltInOps::CopyBufferToBuffer, "", "CopyBufferToBufferLeftLeftover", kernLeftLeftover, "CopyBufferToBufferMiddle", kernMiddle, "CopyBufferToBufferRightLeftover", kernRightLeftover); } template bool buildDispatchInfosTyped(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const { DispatchInfoBuilder kernelSplit1DBuilder; multiDispatchInfo.setBuiltinOpParams(operationParams); uintptr_t start = reinterpret_cast(operationParams.dstPtr) + operationParams.dstOffset.x; size_t middleAlignment = MemoryConstants::cacheLineSize; size_t middleElSize = sizeof(uint32_t) * 4; uintptr_t leftSize = start % middleAlignment; leftSize = (leftSize > 0) ? (middleAlignment - leftSize) : 0; // calc left leftover size leftSize = std::min(leftSize, operationParams.size.x); // clamp left leftover size to requested size uintptr_t rightSize = (start + operationParams.size.x) % middleAlignment; // calc right leftover size rightSize = std::min(rightSize, operationParams.size.x - leftSize); // clamp uintptr_t middleSizeBytes = operationParams.size.x - leftSize - rightSize; // calc middle size if (!isAligned<4>(reinterpret_cast(operationParams.srcPtr) + operationParams.srcOffset.x + leftSize)) { //corner case - src relative to dst does not have DWORD alignment leftSize += middleSizeBytes; middleSizeBytes = 0; } auto middleSizeEls = middleSizeBytes / middleElSize; // num work items in middle walker // Set-up ISA kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Left, kernLeftLeftover); kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Middle, kernMiddle); kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Right, kernRightLeftover); // Set-up common kernel args if (operationParams.srcSvmAlloc) { kernelSplit1DBuilder.setArgSvmAlloc(0, operationParams.srcPtr, operationParams.srcSvmAlloc); } else if (operationParams.srcMemObj) { kernelSplit1DBuilder.setArg(0, operationParams.srcMemObj); } else { kernelSplit1DBuilder.setArgSvm(0, operationParams.size.x + operationParams.srcOffset.x, operationParams.srcPtr, nullptr, CL_MEM_READ_ONLY); } if (operationParams.dstSvmAlloc) { kernelSplit1DBuilder.setArgSvmAlloc(1, operationParams.dstPtr, operationParams.dstSvmAlloc); } else if (operationParams.dstMemObj) { kernelSplit1DBuilder.setArg(1, operationParams.dstMemObj); } else { kernelSplit1DBuilder.setArgSvm(1, operationParams.size.x + operationParams.dstOffset.x, operationParams.dstPtr, nullptr, 0u); } kernelSplit1DBuilder.setUnifiedMemorySyncRequirement(operationParams.unifiedMemoryArgsRequireMemSync); // Set-up srcOffset kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Left, 2, static_cast(operationParams.srcOffset.x)); kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Middle, 2, static_cast(operationParams.srcOffset.x + leftSize)); kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Right, 2, static_cast(operationParams.srcOffset.x + leftSize + middleSizeBytes)); // Set-up dstOffset kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Left, 3, static_cast(operationParams.dstOffset.x)); kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Middle, 3, static_cast(operationParams.dstOffset.x + leftSize)); kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Right, 3, static_cast(operationParams.dstOffset.x + leftSize + middleSizeBytes)); // Set-up work sizes // Note for split walker, it would be just builder.SetDipatchGeometry(GWS, ELWS, OFFSET) kernelSplit1DBuilder.setDispatchGeometry(SplitDispatch::RegionCoordX::Left, Vec3{leftSize, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelSplit1DBuilder.setDispatchGeometry(SplitDispatch::RegionCoordX::Middle, Vec3{middleSizeEls, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelSplit1DBuilder.setDispatchGeometry(SplitDispatch::RegionCoordX::Right, Vec3{rightSize, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelSplit1DBuilder.bake(multiDispatchInfo); return true; } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override { return buildDispatchInfosTyped(multiDispatchInfo, operationParams); } protected: Kernel *kernLeftLeftover = nullptr; Kernel *kernMiddle = nullptr; Kernel *kernRightLeftover = nullptr; BuiltInOp(BuiltIns &kernelsLib) : BuiltinDispatchInfoBuilder(kernelsLib) { } }; template <> class BuiltInOp : public BuiltInOp { public: BuiltInOp(BuiltIns &kernelsLib, Device &device) : BuiltInOp(kernelsLib) { populate(device, EBuiltInOps::CopyBufferToBufferStateless, CompilerOptions::greaterThan4gbBuffersRequired, "CopyBufferToBufferLeftLeftover", kernLeftLeftover, "CopyBufferToBufferMiddle", kernMiddle, "CopyBufferToBufferRightLeftover", kernRightLeftover); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override { return buildDispatchInfosTyped(multiDispatchInfo, operationParams); } }; template <> class BuiltInOp : public BuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, Device &device) : BuiltinDispatchInfoBuilder(kernelsLib), kernelBytes{nullptr} { populate(device, EBuiltInOps::CopyBufferRect, "", "CopyBufferRectBytes2d", kernelBytes[0], "CopyBufferRectBytes2d", kernelBytes[1], "CopyBufferRectBytes3d", kernelBytes[2]); } template bool buildDispatchInfosTyped(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const { DispatchInfoBuilder kernelNoSplit3DBuilder; multiDispatchInfo.setBuiltinOpParams(operationParams); size_t hostPtrSize = 0; bool is3D = false; if (operationParams.srcMemObj && operationParams.dstMemObj) { DEBUG_BREAK_IF(!((operationParams.srcPtr == nullptr) && (operationParams.dstPtr == nullptr))); is3D = (operationParams.size.z > 1) || (operationParams.srcOffset.z > 0) || (operationParams.dstOffset.z > 0); } else { if (operationParams.srcPtr) { size_t origin[] = {operationParams.srcOffset.x, operationParams.srcOffset.y, operationParams.srcOffset.z}; size_t region[] = {operationParams.size.x, operationParams.size.y, operationParams.size.z}; hostPtrSize = Buffer::calculateHostPtrSize(origin, region, operationParams.srcRowPitch, operationParams.srcSlicePitch); is3D = (operationParams.size.z > 1) || (operationParams.dstOffset.z > 0); } else if (operationParams.dstPtr) { size_t origin[] = {operationParams.dstOffset.x, operationParams.dstOffset.y, operationParams.dstOffset.z}; size_t region[] = {operationParams.size.x, operationParams.size.y, operationParams.size.z}; hostPtrSize = Buffer::calculateHostPtrSize(origin, region, operationParams.dstRowPitch, operationParams.dstSlicePitch); is3D = (operationParams.size.z > 1) || (operationParams.srcOffset.z > 0); } else { DEBUG_BREAK_IF(!false); } } // Set-up ISA int dimensions = is3D ? 3 : 2; kernelNoSplit3DBuilder.setKernel(kernelBytes[dimensions - 1]); size_t srcOffsetFromAlignedPtr = 0; size_t dstOffsetFromAlignedPtr = 0; // arg0 = src if (operationParams.srcMemObj) { kernelNoSplit3DBuilder.setArg(0, operationParams.srcMemObj); } else { void *srcPtrToSet = operationParams.srcPtr; if (!is3D) { auto srcPtr = ptrOffset(operationParams.srcPtr, operationParams.srcOffset.z * operationParams.srcSlicePitch); srcPtrToSet = alignDown(srcPtr, 4); srcOffsetFromAlignedPtr = ptrDiff(srcPtr, srcPtrToSet); } kernelNoSplit3DBuilder.setArgSvm(0, hostPtrSize, srcPtrToSet, nullptr, CL_MEM_READ_ONLY); } // arg1 = dst if (operationParams.dstMemObj) { kernelNoSplit3DBuilder.setArg(1, operationParams.dstMemObj); } else { void *dstPtrToSet = operationParams.dstPtr; if (!is3D) { auto dstPtr = ptrOffset(operationParams.dstPtr, operationParams.dstOffset.z * operationParams.dstSlicePitch); dstPtrToSet = alignDown(dstPtr, 4); dstOffsetFromAlignedPtr = ptrDiff(dstPtr, dstPtrToSet); } kernelNoSplit3DBuilder.setArgSvm(1, hostPtrSize, dstPtrToSet, nullptr, 0u); } // arg2 = srcOrigin OffsetType kSrcOrigin[4] = {static_cast(operationParams.srcOffset.x + srcOffsetFromAlignedPtr), static_cast(operationParams.srcOffset.y), static_cast(operationParams.srcOffset.z), 0}; kernelNoSplit3DBuilder.setArg(2, sizeof(OffsetType) * 4, kSrcOrigin); // arg3 = dstOrigin OffsetType kDstOrigin[4] = {static_cast(operationParams.dstOffset.x + dstOffsetFromAlignedPtr), static_cast(operationParams.dstOffset.y), static_cast(operationParams.dstOffset.z), 0}; kernelNoSplit3DBuilder.setArg(3, sizeof(OffsetType) * 4, kDstOrigin); // arg4 = srcPitch OffsetType kSrcPitch[2] = {static_cast(operationParams.srcRowPitch), static_cast(operationParams.srcSlicePitch)}; kernelNoSplit3DBuilder.setArg(4, sizeof(OffsetType) * 2, kSrcPitch); // arg5 = dstPitch OffsetType kDstPitch[2] = {static_cast(operationParams.dstRowPitch), static_cast(operationParams.dstSlicePitch)}; kernelNoSplit3DBuilder.setArg(5, sizeof(OffsetType) * 2, kDstPitch); // Set-up work sizes kernelNoSplit3DBuilder.setDispatchGeometry(operationParams.size, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelNoSplit3DBuilder.bake(multiDispatchInfo); return true; } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override { return buildDispatchInfosTyped(multiDispatchInfo, operationParams); } protected: Kernel *kernelBytes[3]; BuiltInOp(BuiltIns &kernelsLib) : BuiltinDispatchInfoBuilder(kernelsLib), kernelBytes{nullptr} {}; }; template <> class BuiltInOp : public BuiltInOp { public: BuiltInOp(BuiltIns &kernelsLib, Device &device) : BuiltInOp(kernelsLib) { populate(device, EBuiltInOps::CopyBufferRectStateless, CompilerOptions::greaterThan4gbBuffersRequired, "CopyBufferRectBytes2d", kernelBytes[0], "CopyBufferRectBytes2d", kernelBytes[1], "CopyBufferRectBytes3d", kernelBytes[2]); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override { return buildDispatchInfosTyped(multiDispatchInfo, operationParams); } }; template <> class BuiltInOp : public BuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, Device &device) : BuiltinDispatchInfoBuilder(kernelsLib) { populate(device, EBuiltInOps::FillBuffer, "", "FillBufferLeftLeftover", kernLeftLeftover, "FillBufferMiddle", kernMiddle, "FillBufferRightLeftover", kernRightLeftover); } template bool buildDispatchInfosTyped(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const { DispatchInfoBuilder kernelSplit1DBuilder; multiDispatchInfo.setBuiltinOpParams(operationParams); uintptr_t start = reinterpret_cast(operationParams.dstPtr) + operationParams.dstOffset.x; size_t middleAlignment = MemoryConstants::cacheLineSize; size_t middleElSize = sizeof(uint32_t); uintptr_t leftSize = start % middleAlignment; leftSize = (leftSize > 0) ? (middleAlignment - leftSize) : 0; // calc left leftover size leftSize = std::min(leftSize, operationParams.size.x); // clamp left leftover size to requested size uintptr_t rightSize = (start + operationParams.size.x) % middleAlignment; // calc right leftover size rightSize = std::min(rightSize, operationParams.size.x - leftSize); // clamp uintptr_t middleSizeBytes = operationParams.size.x - leftSize - rightSize; // calc middle size auto middleSizeEls = middleSizeBytes / middleElSize; // num work items in middle walker // Set-up ISA kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Left, kernLeftLeftover); kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Middle, kernMiddle); kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Right, kernRightLeftover); DEBUG_BREAK_IF((operationParams.srcMemObj == nullptr) || (operationParams.srcOffset != 0)); DEBUG_BREAK_IF((operationParams.dstMemObj == nullptr) && (operationParams.dstSvmAlloc == nullptr)); // Set-up dstMemObj with buffer if (operationParams.dstSvmAlloc) { kernelSplit1DBuilder.setArgSvmAlloc(0, operationParams.dstPtr, operationParams.dstSvmAlloc); } else { kernelSplit1DBuilder.setArg(0, operationParams.dstMemObj); } // Set-up dstOffset kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Left, 1, static_cast(operationParams.dstOffset.x)); kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Middle, 1, static_cast(operationParams.dstOffset.x + leftSize)); kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Right, 1, static_cast(operationParams.dstOffset.x + leftSize + middleSizeBytes)); // Set-up srcMemObj with pattern kernelSplit1DBuilder.setArgSvm(2, operationParams.srcMemObj->getSize(), operationParams.srcMemObj->getGraphicsAllocation()->getUnderlyingBuffer(), operationParams.srcMemObj->getGraphicsAllocation(), CL_MEM_READ_ONLY); // Set-up patternSizeInEls kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Left, 3, static_cast(operationParams.srcMemObj->getSize())); kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Middle, 3, static_cast(operationParams.srcMemObj->getSize() / middleElSize)); kernelSplit1DBuilder.setArg(SplitDispatch::RegionCoordX::Right, 3, static_cast(operationParams.srcMemObj->getSize())); // Set-up work sizes // Note for split walker, it would be just builder.SetDipatchGeomtry(GWS, ELWS, OFFSET) kernelSplit1DBuilder.setDispatchGeometry(SplitDispatch::RegionCoordX::Left, Vec3{leftSize, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelSplit1DBuilder.setDispatchGeometry(SplitDispatch::RegionCoordX::Middle, Vec3{middleSizeEls, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelSplit1DBuilder.setDispatchGeometry(SplitDispatch::RegionCoordX::Right, Vec3{rightSize, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelSplit1DBuilder.bake(multiDispatchInfo); return true; } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override { return buildDispatchInfosTyped(multiDispatchInfo, operationParams); } protected: Kernel *kernLeftLeftover = nullptr; Kernel *kernMiddle = nullptr; Kernel *kernRightLeftover = nullptr; BuiltInOp(BuiltIns &kernelsLib) : BuiltinDispatchInfoBuilder(kernelsLib) {} }; template <> class BuiltInOp : public BuiltInOp { public: BuiltInOp(BuiltIns &kernelsLib, Device &device) : BuiltInOp(kernelsLib) { populate(device, EBuiltInOps::FillBufferStateless, CompilerOptions::greaterThan4gbBuffersRequired, "FillBufferLeftLeftover", kernLeftLeftover, "FillBufferMiddle", kernMiddle, "FillBufferRightLeftover", kernRightLeftover); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override { return buildDispatchInfosTyped(multiDispatchInfo, operationParams); } }; template <> class BuiltInOp : public BuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, Device &device) : BuiltinDispatchInfoBuilder(kernelsLib) { populate(device, EBuiltInOps::CopyBufferToImage3d, "", "CopyBufferToImage3dBytes", kernelBytes[0], "CopyBufferToImage3d2Bytes", kernelBytes[1], "CopyBufferToImage3d4Bytes", kernelBytes[2], "CopyBufferToImage3d8Bytes", kernelBytes[3], "CopyBufferToImage3d16Bytes", kernelBytes[4]); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override { return buildDispatchInfosTyped(multiDispatchInfo, operationParams); } protected: Kernel *kernelBytes[5] = {nullptr}; BuiltInOp(BuiltIns &kernelsLib) : BuiltinDispatchInfoBuilder(kernelsLib){}; template bool buildDispatchInfosTyped(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const { DispatchInfoBuilder kernelNoSplit3DBuilder; multiDispatchInfo.setBuiltinOpParams(operationParams); DEBUG_BREAK_IF(!(((operationParams.srcPtr != nullptr) || (operationParams.srcMemObj != nullptr)) && (operationParams.dstPtr == nullptr))); auto dstImage = castToObjectOrAbort(operationParams.dstMemObj); // Redescribe image to be byte-copy auto dstImageRedescribed = dstImage->redescribe(); multiDispatchInfo.pushRedescribedMemObj(std::unique_ptr(dstImageRedescribed)); // life range same as mdi's // Calculate srcRowPitch and srcSlicePitch auto bytesPerPixel = dstImage->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes; size_t region[] = {operationParams.size.x, operationParams.size.y, operationParams.size.z}; auto srcRowPitch = operationParams.dstRowPitch ? operationParams.dstRowPitch : region[0] * bytesPerPixel; auto srcSlicePitch = operationParams.dstSlicePitch ? operationParams.dstSlicePitch : ((dstImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 1 : region[1]) * srcRowPitch); // Determine size of host ptr surface for residency purposes size_t hostPtrSize = operationParams.srcPtr ? Image::calculateHostPtrSize(region, srcRowPitch, srcSlicePitch, bytesPerPixel, dstImage->getImageDesc().image_type) : 0; hostPtrSize += operationParams.srcOffset.x; // Set-up kernel auto bytesExponent = Math::log2(bytesPerPixel); DEBUG_BREAK_IF(bytesExponent >= 5); kernelNoSplit3DBuilder.setKernel(kernelBytes[bytesExponent]); // Set-up source host ptr / buffer if (operationParams.srcPtr) { kernelNoSplit3DBuilder.setArgSvm(0, hostPtrSize, operationParams.srcPtr, nullptr, CL_MEM_READ_ONLY); } else { kernelNoSplit3DBuilder.setArg(0, operationParams.srcMemObj); } // Set-up destination image kernelNoSplit3DBuilder.setArg(1, dstImageRedescribed, operationParams.dstMipLevel); // Set-up srcOffset kernelNoSplit3DBuilder.setArg(2, static_cast(operationParams.srcOffset.x)); // Set-up dstOrigin { uint32_t origin[] = { static_cast(operationParams.dstOffset.x), static_cast(operationParams.dstOffset.y), static_cast(operationParams.dstOffset.z), 0}; kernelNoSplit3DBuilder.setArg(3, sizeof(origin), origin); } // Set-up srcRowPitch { OffsetType pitch[] = { static_cast(srcRowPitch), static_cast(srcSlicePitch)}; kernelNoSplit3DBuilder.setArg(4, sizeof(pitch), pitch); } // Set-up work sizes kernelNoSplit3DBuilder.setDispatchGeometry(operationParams.size, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelNoSplit3DBuilder.bake(multiDispatchInfo); return true; } }; template <> class BuiltInOp : public BuiltInOp { public: BuiltInOp(BuiltIns &kernelsLib, Device &device) : BuiltInOp(kernelsLib) { populate(device, EBuiltInOps::CopyBufferToImage3dStateless, CompilerOptions::greaterThan4gbBuffersRequired, "CopyBufferToImage3dBytes", kernelBytes[0], "CopyBufferToImage3d2Bytes", kernelBytes[1], "CopyBufferToImage3d4Bytes", kernelBytes[2], "CopyBufferToImage3d8Bytes", kernelBytes[3], "CopyBufferToImage3d16Bytes", kernelBytes[4]); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override { return buildDispatchInfosTyped(multiDispatchInfo, operationParams); } }; template <> class BuiltInOp : public BuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, Device &device) : BuiltinDispatchInfoBuilder(kernelsLib) { populate(device, EBuiltInOps::CopyImage3dToBuffer, "", "CopyImage3dToBufferBytes", kernelBytes[0], "CopyImage3dToBuffer2Bytes", kernelBytes[1], "CopyImage3dToBuffer4Bytes", kernelBytes[2], "CopyImage3dToBuffer8Bytes", kernelBytes[3], "CopyImage3dToBuffer16Bytes", kernelBytes[4]); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override { return buildDispatchInfosTyped(multiDispatchInfo, operationParams); } protected: Kernel *kernelBytes[5] = {nullptr}; BuiltInOp(BuiltIns &kernelsLib) : BuiltinDispatchInfoBuilder(kernelsLib) {} template bool buildDispatchInfosTyped(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const { DispatchInfoBuilder kernelNoSplit3DBuilder; multiDispatchInfo.setBuiltinOpParams(operationParams); DEBUG_BREAK_IF(!((operationParams.srcPtr == nullptr) && ((operationParams.dstPtr != nullptr) || (operationParams.dstMemObj != nullptr)))); auto srcImage = castToObjectOrAbort(operationParams.srcMemObj); // Redescribe image to be byte-copy auto srcImageRedescribed = srcImage->redescribe(); multiDispatchInfo.pushRedescribedMemObj(std::unique_ptr(srcImageRedescribed)); // life range same as mdi's // Calculate dstRowPitch and dstSlicePitch auto bytesPerPixel = srcImage->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes; size_t region[] = {operationParams.size.x, operationParams.size.y, operationParams.size.z}; auto dstRowPitch = operationParams.srcRowPitch ? operationParams.srcRowPitch : region[0] * bytesPerPixel; auto dstSlicePitch = operationParams.srcSlicePitch ? operationParams.srcSlicePitch : ((srcImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 1 : region[1]) * dstRowPitch); // Determine size of host ptr surface for residency purposes size_t hostPtrSize = operationParams.dstPtr ? Image::calculateHostPtrSize(region, dstRowPitch, dstSlicePitch, bytesPerPixel, srcImage->getImageDesc().image_type) : 0; hostPtrSize += operationParams.dstOffset.x; // Set-up ISA auto bytesExponent = Math::log2(bytesPerPixel); DEBUG_BREAK_IF(bytesExponent >= 5); kernelNoSplit3DBuilder.setKernel(kernelBytes[bytesExponent]); // Set-up source image kernelNoSplit3DBuilder.setArg(0, srcImageRedescribed, operationParams.srcMipLevel); // Set-up destination host ptr / buffer if (operationParams.dstPtr) { kernelNoSplit3DBuilder.setArgSvm(1, hostPtrSize, operationParams.dstPtr, nullptr, 0u); } else { kernelNoSplit3DBuilder.setArg(1, operationParams.dstMemObj); } // Set-up srcOrigin { uint32_t origin[] = { static_cast(operationParams.srcOffset.x), static_cast(operationParams.srcOffset.y), static_cast(operationParams.srcOffset.z), 0}; kernelNoSplit3DBuilder.setArg(2, sizeof(origin), origin); } // Set-up dstOffset kernelNoSplit3DBuilder.setArg(3, static_cast(operationParams.dstOffset.x)); // Set-up dstRowPitch { OffsetType pitch[] = { static_cast(dstRowPitch), static_cast(dstSlicePitch)}; kernelNoSplit3DBuilder.setArg(4, sizeof(pitch), pitch); } // Set-up work sizes kernelNoSplit3DBuilder.setDispatchGeometry(operationParams.size, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelNoSplit3DBuilder.bake(multiDispatchInfo); return true; } }; template <> class BuiltInOp : public BuiltInOp { public: BuiltInOp(BuiltIns &kernelsLib, Device &device) : BuiltInOp(kernelsLib) { populate(device, EBuiltInOps::CopyImage3dToBufferStateless, CompilerOptions::greaterThan4gbBuffersRequired, "CopyImage3dToBufferBytes", kernelBytes[0], "CopyImage3dToBuffer2Bytes", kernelBytes[1], "CopyImage3dToBuffer4Bytes", kernelBytes[2], "CopyImage3dToBuffer8Bytes", kernelBytes[3], "CopyImage3dToBuffer16Bytes", kernelBytes[4]); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override { return buildDispatchInfosTyped(multiDispatchInfo, operationParams); } }; template <> class BuiltInOp : public BuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, Device &device) : BuiltinDispatchInfoBuilder(kernelsLib), kernel(nullptr) { populate(device, EBuiltInOps::CopyImageToImage3d, "", "CopyImageToImage3d", kernel); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override { DispatchInfoBuilder kernelNoSplit3DBuilder; multiDispatchInfo.setBuiltinOpParams(operationParams); DEBUG_BREAK_IF(!((operationParams.srcPtr == nullptr) && (operationParams.dstPtr == nullptr))); auto srcImage = castToObjectOrAbort(operationParams.srcMemObj); auto dstImage = castToObjectOrAbort(operationParams.dstMemObj); // Redescribe images to be byte-copies auto srcImageRedescribed = srcImage->redescribe(); auto dstImageRedescribed = dstImage->redescribe(); multiDispatchInfo.pushRedescribedMemObj(std::unique_ptr(srcImageRedescribed)); // life range same as mdi's multiDispatchInfo.pushRedescribedMemObj(std::unique_ptr(dstImageRedescribed)); // life range same as mdi's // Set-up kernel kernelNoSplit3DBuilder.setKernel(kernel); // Set-up source image kernelNoSplit3DBuilder.setArg(0, srcImageRedescribed, operationParams.srcMipLevel); // Set-up destination image kernelNoSplit3DBuilder.setArg(1, dstImageRedescribed, operationParams.dstMipLevel); // Set-up srcOrigin { uint32_t origin[] = { static_cast(operationParams.srcOffset.x), static_cast(operationParams.srcOffset.y), static_cast(operationParams.srcOffset.z), 0}; kernelNoSplit3DBuilder.setArg(2, sizeof(origin), origin); } // Set-up dstOrigin { uint32_t origin[] = { static_cast(operationParams.dstOffset.x), static_cast(operationParams.dstOffset.y), static_cast(operationParams.dstOffset.z), 0}; kernelNoSplit3DBuilder.setArg(3, sizeof(origin), origin); } // Set-up work sizes kernelNoSplit3DBuilder.setDispatchGeometry(operationParams.size, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelNoSplit3DBuilder.bake(multiDispatchInfo); return true; } protected: Kernel *kernel; }; template <> class BuiltInOp : public BuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, Device &device) : BuiltinDispatchInfoBuilder(kernelsLib), kernel(nullptr) { populate(device, EBuiltInOps::FillImage3d, "", "FillImage3d", kernel); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override { DispatchInfoBuilder kernelNoSplit3DBuilder; multiDispatchInfo.setBuiltinOpParams(operationParams); DEBUG_BREAK_IF(!((operationParams.srcMemObj == nullptr) && (operationParams.srcPtr != nullptr) && (operationParams.dstPtr == nullptr))); auto image = castToObjectOrAbort(operationParams.dstMemObj); // Redescribe image to be byte-copy auto imageRedescribed = image->redescribeFillImage(); multiDispatchInfo.pushRedescribedMemObj(std::unique_ptr(imageRedescribed)); // Set-up kernel kernelNoSplit3DBuilder.setKernel(kernel); // Set-up destination image kernelNoSplit3DBuilder.setArg(0, imageRedescribed); // Set-up fill color int iFillColor[4] = {0}; const void *fillColor = operationParams.srcPtr; convertFillColor(fillColor, iFillColor, image->getSurfaceFormatInfo().OCLImageFormat, imageRedescribed->getSurfaceFormatInfo().OCLImageFormat); kernelNoSplit3DBuilder.setArg(1, 4 * sizeof(int32_t), iFillColor); // Set-up dstOffset { uint32_t offset[] = { static_cast(operationParams.dstOffset.x), static_cast(operationParams.dstOffset.y), static_cast(operationParams.dstOffset.z), 0}; kernelNoSplit3DBuilder.setArg(2, sizeof(offset), offset); } // Set-up work sizes kernelNoSplit3DBuilder.setDispatchGeometry(operationParams.size, Vec3{0, 0, 0}, Vec3{0, 0, 0}); kernelNoSplit3DBuilder.bake(multiDispatchInfo); return true; } protected: Kernel *kernel; }; BuiltinDispatchInfoBuilder &BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::Type operation, Device &device) { uint32_t operationId = static_cast(operation); auto kernelsLib = device.getBuiltIns(); auto &operationBuilder = kernelsLib->BuiltinOpsBuilders[operationId]; switch (operation) { case EBuiltInOps::CopyBufferToBuffer: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*kernelsLib, device); }); break; case EBuiltInOps::CopyBufferToBufferStateless: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*kernelsLib, device); }); break; case EBuiltInOps::CopyBufferRect: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*kernelsLib, device); }); break; case EBuiltInOps::CopyBufferRectStateless: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*kernelsLib, device); }); break; case EBuiltInOps::FillBuffer: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*kernelsLib, device); }); break; case EBuiltInOps::FillBufferStateless: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*kernelsLib, device); }); break; case EBuiltInOps::CopyBufferToImage3d: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*kernelsLib, device); }); break; case EBuiltInOps::CopyBufferToImage3dStateless: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*kernelsLib, device); }); break; case EBuiltInOps::CopyImage3dToBuffer: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*kernelsLib, device); }); break; case EBuiltInOps::CopyImage3dToBufferStateless: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*kernelsLib, device); }); break; case EBuiltInOps::CopyImageToImage3d: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*kernelsLib, device); }); break; case EBuiltInOps::FillImage3d: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*kernelsLib, device); }); break; case EBuiltInOps::AuxTranslation: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(*kernelsLib, device); }); break; default: return getUnknownDispatchInfoBuilder(operation, device); } return *operationBuilder.first; } BuiltInOwnershipWrapper::BuiltInOwnershipWrapper(BuiltinDispatchInfoBuilder &inputBuilder, Context *context) { takeOwnership(inputBuilder, context); } BuiltInOwnershipWrapper::~BuiltInOwnershipWrapper() { if (builder) { for (auto &kernel : builder->peekUsedKernels()) { kernel->setContext(nullptr); kernel->releaseOwnership(); } } } void BuiltInOwnershipWrapper::takeOwnership(BuiltinDispatchInfoBuilder &inputBuilder, Context *context) { UNRECOVERABLE_IF(builder); builder = &inputBuilder; for (auto &kernel : builder->peekUsedKernels()) { kernel->takeOwnership(); kernel->setContext(context); } } } // namespace NEOcompute-runtime-20.13.16352/opencl/source/built_ins/builtins_dispatch_builder.h000066400000000000000000000102461363734646600275170ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/helpers/vec.h" #include "opencl/source/kernel/kernel.h" #include "CL/cl.h" #include "built_in_ops.h" #include #include #include #include #include #include #include #include namespace NEO { typedef std::vector BuiltinResourceT; class Context; class Device; class MemObj; struct MultiDispatchInfo; class Program; struct BuiltinOpParams { void *srcPtr = nullptr; void *dstPtr = nullptr; MemObj *srcMemObj = nullptr; MemObj *dstMemObj = nullptr; GraphicsAllocation *srcSvmAlloc = nullptr; GraphicsAllocation *dstSvmAlloc = nullptr; GraphicsAllocation *transferAllocation = nullptr; //mapAllocation or hostPtrAllocation AuxTranslationDirection auxTranslationDirection = AuxTranslationDirection::None; bool unifiedMemoryArgsRequireMemSync = true; Vec3 srcOffset = {0, 0, 0}; Vec3 dstOffset = {0, 0, 0}; Vec3 size = {0, 0, 0}; size_t srcRowPitch = 0; size_t dstRowPitch = 0; size_t srcSlicePitch = 0; size_t dstSlicePitch = 0; uint32_t srcMipLevel = 0; uint32_t dstMipLevel = 0; }; class BuiltinDispatchInfoBuilder { public: BuiltinDispatchInfoBuilder(BuiltIns &kernelLib) : kernelsLib(kernelLib) {} virtual ~BuiltinDispatchInfoBuilder() = default; template void populate(Device &device, EBuiltInOps::Type operation, const char *options, KernelsDescArgsT &&... desc); virtual bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const { return false; } virtual bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, Kernel *kernel, const uint32_t dim, const Vec3 &gws, const Vec3 &elws, const Vec3 &offset) const { return false; } virtual cl_int validateDispatch(Kernel *kernel, uint32_t inworkDim, const Vec3 &gws, const Vec3 &elws, const Vec3 &offset) const { return CL_SUCCESS; } // returns true if argument should be updated in kernel exposed to user code virtual bool setExplicitArg(uint32_t argIndex, size_t argSize, const void *argVal, cl_int &err) const { err = 0; return true; } std::vector> &peekUsedKernels() { return usedKernels; } protected: template void grabKernels(KernelNameT &&kernelName, Kernel *&kernelDst, KernelsDescArgsT &&... kernelsDesc) { const KernelInfo *kernelInfo = prog->getKernelInfo(kernelName); UNRECOVERABLE_IF(nullptr == kernelInfo); cl_int err = 0; kernelDst = Kernel::create(prog.get(), *kernelInfo, &err); kernelDst->isBuiltIn = true; usedKernels.push_back(std::unique_ptr(kernelDst)); grabKernels(std::forward(kernelsDesc)...); } cl_int grabKernels() { return CL_SUCCESS; } std::unique_ptr prog; std::vector> usedKernels; BuiltIns &kernelsLib; }; class BuiltInDispatchBuilderOp { public: static BuiltinDispatchInfoBuilder &getBuiltinDispatchInfoBuilder(EBuiltInOps::Type op, Device &device); static BuiltinDispatchInfoBuilder &getUnknownDispatchInfoBuilder(EBuiltInOps::Type op, Device &device); std::unique_ptr setBuiltinDispatchInfoBuilder(EBuiltInOps::Type op, Device &device, std::unique_ptr newBuilder); }; class BuiltInOwnershipWrapper : public NonCopyableOrMovableClass { public: BuiltInOwnershipWrapper() = default; BuiltInOwnershipWrapper(BuiltinDispatchInfoBuilder &inputBuilder, Context *context); ~BuiltInOwnershipWrapper(); void takeOwnership(BuiltinDispatchInfoBuilder &inputBuilder, Context *context); protected: BuiltinDispatchInfoBuilder *builder = nullptr; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/built_ins/kernels/000077500000000000000000000000001363734646600235705ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/built_ins/kernels/CMakeLists.txt000066400000000000000000000104011363734646600263240ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # add_custom_target(builtins_vme_sources) set_target_properties(builtins_vme_sources PROPERTIES FOLDER "built_ins") set(BUILTINS_OUTDIR_WITH_ARCH "${TargetDir}/built_ins/${NEO_ARCH}") add_dependencies(${BUILTINS_BINARIES_LIB_NAME} builtins_vme_sources) add_subdirectories() set(GENERATED_BUILTINS ${GENERATED_BUILTINS} PARENT_SCOPE) set(GENERATED_BUILTINS_STATELESS ${GENERATED_BUILTINS_STATELESS} PARENT_SCOPE) if("${NEO_ARCH}" STREQUAL "x32") set(BUILTIN_OPTIONS "-cl-intel-greater-than-4GB-buffer-required") else() set(BUILTIN_OPTIONS "") endif() set(BUILTIN_OPTIONS_STATELESS "-cl-intel-greater-than-4GB-buffer-required" ) if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug" ) list(APPEND __cloc__options__ "-D DEBUG") endif() set(BUILTINS_INCLUDE_DIR ${TargetDir} PARENT_SCOPE) set(BUILTIN_CPP "") function(get_bits_for_stateless gen_type platform_type) # Force 32bits compiling on gen9lp for stateless builtins if( (${GEN_TYPE} STREQUAL "GEN9" ) AND ( ${PLATFORM_TYPE} STREQUAL "LP")) set(BITS "32" PARENT_SCOPE) else() set(BITS ${NEO_BITS} PARENT_SCOPE) endif() endfunction() # Define function for compiling built-ins (with ocloc) function(compile_builtin gen_type platform_type builtin bits builtin_options) string(TOLOWER ${gen_type} gen_type_lower) get_family_name_with_type(${gen_type} ${platform_type}) set(OUTPUTDIR "${BUILTINS_OUTDIR_WITH_ARCH}/${gen_type_lower}") # get filename set(FILENAME ${builtin}) # get name of the file w/o extension get_filename_component(BASENAME ${builtin} NAME_WE) set(OUTPUTPATH_BASE "${OUTPUTDIR}/${BASENAME}_${family_name_with_type}") set(OUTPUT_FILES ${OUTPUTPATH_BASE}.spv ${OUTPUTPATH_BASE}.bin ${OUTPUTPATH_BASE}.cpp ${OUTPUTPATH_BASE}.gen ) # function returns builtin cpp filename unset(BUILTIN_CPP) # set variable outside function set(BUILTIN_CPP built_ins/${NEO_ARCH}/${gen_type_lower}/${BASENAME}_${family_name_with_type}.cpp PARENT_SCOPE) if(NOT DEFINED cloc_cmd_prefix) if(WIN32) set(cloc_cmd_prefix ocloc) else() if(DEFINED NEO__IGC_LIBRARY_PATH) set(cloc_cmd_prefix LD_LIBRARY_PATH=${NEO__IGC_LIBRARY_PATH}:$ $) else() set(cloc_cmd_prefix LD_LIBRARY_PATH=$ $) endif() endif() endif() list(APPEND __cloc__options__ "-cl-kernel-arg-info") add_custom_command( OUTPUT ${OUTPUT_FILES} COMMAND ${cloc_cmd_prefix} -q -file ${FILENAME} -device ${DEFAULT_SUPPORTED_${gen_type}_${platform_type}_PLATFORM} ${builtin_options} -${bits} -out_dir ${OUTPUTDIR} -cpp_file -options "$" WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DEPENDS ${builtin} ocloc copy_compiler_files ) endfunction() macro(macro_for_each_gen) foreach(PLATFORM_TYPE ${PLATFORM_TYPES}) if(${GEN_TYPE}_HAS_${PLATFORM_TYPE}) get_family_name_with_type(${GEN_TYPE} ${PLATFORM_TYPE}) string(TOLOWER ${PLATFORM_TYPE} PLATFORM_TYPE_LOWER) unset(BUILTINS_COMMANDS) foreach(GENERATED_BUILTIN ${GENERATED_BUILTINS}) compile_builtin(${GEN_TYPE} ${PLATFORM_TYPE} ${GENERATED_BUILTIN}.builtin_kernel ${NEO_BITS} "${BUILTIN_OPTIONS}") list(APPEND BUILTINS_COMMANDS ${TargetDir}/${BUILTIN_CPP}) set(RUNTIME_GENERATED_${GENERATED_BUILTIN}_${family_name_with_type} ${BUILTIN_CPP} PARENT_SCOPE) endforeach() get_bits_for_stateless(${GEN_TYPE} ${PLATFORM_TYPE}) foreach(GENERATED_BUILTIN_STATELESS ${GENERATED_BUILTINS_STATELESS}) compile_builtin(${GEN_TYPE} ${PLATFORM_TYPE} ${GENERATED_BUILTIN_STATELESS}.builtin_kernel ${BITS} "${BUILTIN_OPTIONS_STATELESS}") list(APPEND BUILTINS_COMMANDS ${TargetDir}/${BUILTIN_CPP}) set(RUNTIME_GENERATED_${GENERATED_BUILTIN_STATELESS}_${family_name_with_type} ${BUILTIN_CPP} PARENT_SCOPE) endforeach() set(target_name builtins_${family_name_with_type}_vme) add_custom_target(${target_name} DEPENDS ${BUILTINS_COMMANDS}) add_dependencies(builtins ${target_name}) set_target_properties(${target_name} PROPERTIES FOLDER "opencl/source/built_ins/${family_name_with_type}") endif() endforeach() endmacro() apply_macro_for_each_gen("SUPPORTED") vme_block_advanced_motion_estimate_bidirectional_check_intel.builtin_kernel000066400000000000000000000361721363734646600427400ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/built_ins/kernels/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel __attribute__((reqd_work_group_size(16, 1, 1))) void block_advanced_motion_estimate_bidirectional_check_intel( sampler_t accelerator, __read_only image2d_t srcImg, __read_only image2d_t refImg, __read_only image2d_t src_check_image, __read_only image2d_t ref0_check_image, __read_only image2d_t ref1_check_image, uint flags, uint search_cost_penalty, uint search_cost_precision, short2 count_global, uchar bidir_weight, __global short2 *count_motion_vector_buffer, __global short2 *prediction_motion_vector_buffer, __global char *skip_input_mode_buffer, __global short2 *skip_motion_vector_buffer, __global short2 *search_motion_vector_buffer, __global char *intra_search_predictor_modes, __global ushort *search_residuals, __global ushort *skip_residuals, __global ushort *intra_residuals, __read_only image2d_t intraSrcImg, int height, int width, int stride) { __local uint dstSearch[64]; // 8 GRFs __local uint dstSkipIntra[32 + 24]; // 7 GRFs (4 for inter, 3 for intra) // distortion in the 6th GRF __local ushort *distSearch = (__local ushort *)&dstSearch[8 * 5]; // Initialize the MV cost table: // MV Cost in U4U4 format: // No cost : 0, 0, 0, 0, 0, 0, 0, 0 // Low Cost : 1, 4, 5, 9, 10, 12, 14, 15 // Normal Cost: 5, 26, 29, 43, 45, 47, 57, 57 // High Cost : 29, 61, 72, 78, 88, 89, 91, 92 uint2 MVCostTable; if (search_cost_penalty == 1) { MVCostTable.s0 = 0x09050401; MVCostTable.s1 = 0x0F0E0C0A; } else if (search_cost_penalty == 2) { MVCostTable.s0 = 0x2B1D1A05; MVCostTable.s1 = 0x39392F2D; } else if (search_cost_penalty == 3) { MVCostTable.s0 = 0x4E483D1D; MVCostTable.s1 = 0x5C5B5958; } else { MVCostTable.s0 = 0; MVCostTable.s1 = 0; } uint MVCostPrecision = ((uint)search_cost_precision) << 16; // Frame is divided into rows * columns of MBs. // One h/w thread per WG. // One WG processes "row" MBs - one row per iteration and one MB per row. // Number of WGs (or h/w threads) is number of columns MBs.Each iteration // processes the MB in a row - gid_0 is the MB id in a row and gid_1 is the // row offset. int sid_0 = stride * get_group_id(0); int gid_0 = sid_0 / height; int gid_1 = sid_0 % height; for (int sid = sid_0; sid < sid_0 + stride && gid_0 < width && gid_1 < height; sid++, gid_0 = sid / height, gid_1 = sid % height) { int2 srcCoord; srcCoord.x = gid_0 * 16 + get_global_offset(0); // 16 pixels wide MBs (globally scalar) srcCoord.y = gid_1 * 16 + get_global_offset(1); // 16 pixels tall MBs (globally scalar) uint curMB = gid_0 + gid_1 * width; // current MB id short2 count; // If either the search or skip vector counts are per-MB, then we need to // read in // the count motion vector buffer. if ((count_global.s0 == -1) | (count_global.s1 == -1)) { count = count_motion_vector_buffer[curMB]; } // If either the search or skip vector counts are per-frame, we need to use // those. if (count_global.s0 >= 0) { count.s0 = count_global.s0; } if (count_global.s1 >= 0) { count.s1 = count_global.s1; } int countPredMVs = count.x; if (countPredMVs != 0) { uint offset = curMB * 4; // 4 predictors per MB offset += get_local_id(0) % 4; // 16 work-items access 4 MVs for MB // one predictor for MB per SIMD channel // Reduce predictors from Q-pixel to integer precision. int2 predMV = 0; if (get_local_id(0) < countPredMVs) { // one MV per work-item if(prediction_motion_vector_buffer != NULL) { predMV = convert_int2(prediction_motion_vector_buffer[offset]); } // Predictors are input in QP resolution. Convert that to integer // resolution. predMV.x /= 4; predMV.y /= 4; predMV.y &= 0xFFFFFFFE; } // Do up to 4 IMEs, get the best MVs and their distortions, and optionally // a FBR of // the best MVs. Finally the results are written out to SLM. intel_work_group_vme_mb_multi_query_4( dstSearch, // best search MV and its distortions into SLM countPredMVs, // count of predictor MVs (globally scalar - value range // 1 to 4) MVCostPrecision, // MV cost precision MVCostTable, // MV cost table srcCoord, // MB 2-D offset (globally scalar) predMV, // predictor MVs (up to 4 distinct MVs for SIMD16 thread) srcImg, // source refImg, // reference accelerator); // vme object } int doIntra = ((flags & 0x2) != 0); int intraEdges = 0; if (doIntra) { // Enable all edges by default. intraEdges = 0x3C; // If this is a left-edge MB, then disable left edges. if ((gid_0 == 0) & (get_global_offset(0) == 0)) { intraEdges &= 0x18; } // If this is a right edge MB then disable right edges. if (gid_0 == width - 1) { intraEdges &= 0x34; } // If this is a top-edge MB, then disable top edges. if ((gid_1 == 0) & (get_global_offset(1) == 0)) { intraEdges &= 0x20; } // Set bit6=bit5. intraEdges |= ((intraEdges & 0x20) << 1); intraEdges <<= 8; } int skip_block_type_8x8 = flags & 0x4; int countSkipMVs = count.y; if (countSkipMVs != 0 || doIntra == true) { // one set of skip MV per SIMD channel // Do up to 4 skip checks and get the distortions for each of them. // Finally the results are written out to SLM. if ((skip_block_type_8x8 == 0) | ((doIntra) & (countSkipMVs == 0))) { // 16x16: uint offset = curMB * 4 * 2; // 4 sets of skip check MVs per MB int skipMV = 0; if (get_local_id(0) < countSkipMVs * 2) // need 2 values per MV { offset += (get_local_id(0)); // 16 work-items access 4 sets of MVs for MB if(skip_motion_vector_buffer != NULL){ __global int *skip1_motion_vector_buffer = (__global int *)skip_motion_vector_buffer; skipMV = skip1_motion_vector_buffer[offset]; // one MV per work-item } } uchar skipMode = 0; if (get_local_id(0) < countSkipMVs) { if(skip_input_mode_buffer != NULL) skipMode = skip_input_mode_buffer[curMB]; if (skipMode == 0) { skipMode = 1; } if (skipMode > 3) { skipMode = 3; } } intel_work_group_vme_mb_multi_bidir_check_16x16( dstSkipIntra, // distortions into SLM countSkipMVs, // count of skip check MVs (globally scalar - value // range 1 to 4) doIntra, // compute intra modes intraEdges, // intra edges to use srcCoord, // MB 2-D offset (globally scalar) bidir_weight, // bidirectional weight skipMode, // skip modes skipMV, // skip check MVs (up to 4 distinct sets of skip check MVs // for SIMD16 thread) src_check_image, // source ref0_check_image, // reference fwd ref1_check_image, // reference bwd intraSrcImg, // intra source accelerator); // vme object } else { // 8x8: uint offset = curMB * 4 * 8; // 4 sets of skip check MVs, 16 shorts (8 ints) each per MB int2 skipMVs = 0; if (get_local_id(0) < countSkipMVs * 8) // need 8 values per MV { offset += (get_local_id(0)); // 16 work-items access 4 sets of MVs for MB if(skip_motion_vector_buffer != NULL){ __global int *skip1_motion_vector_buffer = (__global int *)(skip_motion_vector_buffer); skipMVs.x = skip1_motion_vector_buffer[offset]; // four component MVs // per work-item skipMVs.y = skip1_motion_vector_buffer[offset + 16];} } uchar skipModes = 0; if (get_local_id(0) < countSkipMVs) { if(skip_input_mode_buffer != NULL) skipModes = skip_input_mode_buffer[curMB]; } intel_work_group_vme_mb_multi_bidir_check_8x8( dstSkipIntra, // distortions into SLM countSkipMVs, // count of skip check MVs per MB (globally scalar - // value range 1 to 4) doIntra, // compute intra modes intraEdges, // intra edges to use srcCoord, // MB 2-D offset (globally scalar) bidir_weight, // bidirectional weight skipModes, // skip modes skipMVs, // skip check MVs (up to 4 distinct sets of skip check MVs // for SIMD16 thread) src_check_image, // source ref0_check_image, // reference fwd ref1_check_image, // reference bwd intraSrcImg, // intra source accelerator); // vme object } } barrier(CLK_LOCAL_MEM_FENCE); // Write Out motion estimation result: // Result format // Hierarchical row-major layout // i.e. row-major of blocks MVs in MBs, and row-major of 4 sets of // MVs/distortion in blocks if (countPredMVs != 0) { // 4x4 if (intel_get_accelerator_mb_block_type(accelerator) == 0x2) { int index = (gid_0 * 16 + get_local_id(0)) + (gid_1 * 16 * width); // 1. 16 work-items enabled. // 2. Work-items gather fwd MVs in strided dword locations 0, 2, .., 30 // (interleaved // fwd/bdw MVs) with constant offset 8 (control data size) from SLM // into contiguous // short2 locations 0, 1, .., 15 of global buffer // search_motion_vector_buffer with // offset index. // 3. Work-items gather contiguous ushort locations 0, 1, .., 15 from // distSearch into // contiguous ushort locations 0, 1, .., 15 of search_residuals with // offset index. short2 val = as_short2(dstSearch[8 + get_local_id(0) * 2]); if(search_motion_vector_buffer != NULL) search_motion_vector_buffer[index] = val; if (search_residuals != NULL) { search_residuals[index] = distSearch[get_local_id(0)]; } } // 8x8 else if (intel_get_accelerator_mb_block_type(accelerator) == 0x1) { // Only 1st 4 work-item are needed. if (get_local_id(0) < 4) { int index = (gid_0 * 4 + get_local_id(0)) + (gid_1 * 4 * width); // 1. 4 work-items enabled. // 2. Work-items gather fw MVs in strided dword locations 0, 8, 16, 24 // (interleaved // fwd/bdw MVs) with constant offset 8 from SLM into contiguous // short2 locations // 0, 1, .., 15 of global buffer search_motion_vector_buffer with // offset index. // 3. Work-items gather strided ushort locations 0, 4, 8, 12 from // distSearch into // contiguous ushort locations 0, 1, .., 15 of search_residuals // with offset index. short2 val = as_short2(dstSearch[8 + get_local_id(0) * 4 * 2]); if(search_motion_vector_buffer != NULL) search_motion_vector_buffer[index] = val; if (search_residuals != NULL) { search_residuals[index] = distSearch[get_local_id(0) * 4]; } } } // 16x16 else if (intel_get_accelerator_mb_block_type(accelerator) == 0x0) { // One 1st work is needed. if (get_local_id(0) == 0) { int index = gid_0 + gid_1 * width; // 1. 1 work-item enabled. // 2. Work-item gathers fwd MV in dword location 0 with constant // offset 8 from // SLM into short2 locations 0 of global buffer // search_motion_vector_buffer. // 3. Work-item gathers ushort location 0 from distSearch into ushort // location 0 of search_residuals with offset index. short2 val = as_short2(dstSearch[8]); if(search_motion_vector_buffer != NULL) search_motion_vector_buffer[index] = val; if (search_residuals != NULL) { search_residuals[index] = distSearch[0]; } } } } // Write out motion skip check result: // Result format // Hierarchical row-major layout // i.e. row-major of blocks in MBs, and row-major of 8 sets of // distortions in blocks if (countSkipMVs != 0) { if (skip_block_type_8x8 == false) { // Copy out 4 (1 component) sets of distortion values. int index = (gid_0 * 4) + (get_local_id(0)) + (gid_1 * 4 * width); if (get_local_id(0) < countSkipMVs) { // 1. Up to 4 work-items are enabled. // 2. The work-item gathers distSkip locations 0, 16*1, .., 16*7 and // copies them to contiguous skip_residual locations 0, 1, 2, .., // 7. __local ushort *distSkip = (__local ushort *)&dstSkipIntra[0]; if(skip_residuals != NULL) skip_residuals[index] = distSkip[get_local_id(0) * 16]; } } else { // Copy out 4 (4 component) sets of distortion values. int index = (gid_0 * 4 * 4) + (get_local_id(0)) + (gid_1 * 4 * 4 * width); if (get_local_id(0) < countSkipMVs * 4) { // 1. Up to 16 work-items are enabled. // 2. The work-item gathers distSkip locations 0, 4*1, .., 4*15 and // copies them to contiguous skip_residual locations 0, 1, 2, .., // 15. __local ushort *distSkip = (__local ushort *)&dstSkipIntra[0]; if(skip_residuals != NULL) skip_residuals[index] = distSkip[get_local_id(0) * 4]; } } } // Write out intra search result: if (doIntra) { // Write out the 4x4 intra modes if (get_local_id(0) < 8) { __local char *dstIntra_4x4 = (__local char *)(&dstSkipIntra[32 + 16 + 4]); char value = dstIntra_4x4[get_local_id(0)]; char value_low = (value)&0xf; char value_high = (value >> 4) & 0xf; int index_low = (gid_0 * 22) + (get_local_id(0) * 2) + (gid_1 * 22 * width); int index_high = (gid_0 * 22) + (get_local_id(0) * 2) + 1 + (gid_1 * 22 * width); if(intra_search_predictor_modes != NULL) { intra_search_predictor_modes[index_low + 5] = value_low; intra_search_predictor_modes[index_high + 5] = value_high; } } // Write out the 8x8 intra modes if (get_local_id(0) < 4) { __local char *dstIntra_8x8 = (__local char *)(&dstSkipIntra[32 + 8 + 4]); char value = dstIntra_8x8[get_local_id(0) * 2]; char value_low = (value)&0xf; int index = (gid_0 * 22) + (get_local_id(0)) + (gid_1 * 22 * width); if(intra_search_predictor_modes != NULL) intra_search_predictor_modes[index + 1] = value_low; } // Write out the 16x16 intra modes if (get_local_id(0) < 1) { __local char *dstIntra_16x16 = (__local char *)(&dstSkipIntra[32 + 0 + 4]); char value = dstIntra_16x16[0]; char value_low = (value)&0xf; int index = (gid_0 * 22) + (gid_1 * 22 * width); if(intra_search_predictor_modes != NULL) intra_search_predictor_modes[index] = value_low; } // Get the intra residuals. if (intra_residuals != NULL) { int index = (gid_0 * 4) + (gid_1 * 4 * width); if (get_local_id(0) < 1) { __local ushort *distIntra_4x4 = (__local ushort *)(&dstSkipIntra[32 + 16 + 3]); __local ushort *distIntra_8x8 = (__local ushort *)(&dstSkipIntra[32 + 8 + 3]); __local ushort *distIntra_16x16 = (__local ushort *)(&dstSkipIntra[32 + 0 + 3]); intra_residuals[index + 2] = distIntra_4x4[0]; intra_residuals[index + 1] = distIntra_8x8[0]; intra_residuals[index + 0] = distIntra_16x16[0]; } } } } } )===" vme_block_advanced_motion_estimate_bidirectional_check_intel_frontend.builtin_kernel000066400000000000000000000017011363734646600446250ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/built_ins/kernels/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel __attribute__((reqd_work_group_size(16, 1, 1))) void block_advanced_motion_estimate_bidirectional_check_intel( sampler_t accelerator, __read_only image2d_t srcImg, __read_only image2d_t refImg, __read_only image2d_t src_check_image, __read_only image2d_t ref0_check_image, __read_only image2d_t ref1_check_image, uint flags, uint search_cost_penalty, uint search_cost_precision, short2 count_global, uchar bidir_weight, __global short2 *count_motion_vector_buffer, __global short2 *prediction_motion_vector_buffer, __global char *skip_input_mode_buffer, __global short2 *skip_motion_vector_buffer, __global short2 *search_motion_vector_buffer, __global char *intra_search_predictor_modes, __global ushort *search_residuals, __global ushort *skip_residuals, __global ushort *intra_residuals) { } )===" vme_block_advanced_motion_estimate_check_intel.builtin_kernel000066400000000000000000000341621363734646600400450ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/built_ins/kernels/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel __attribute__((reqd_work_group_size(16, 1, 1))) void block_advanced_motion_estimate_check_intel( sampler_t accelerator, __read_only image2d_t srcImg, __read_only image2d_t refImg, uint flags, uint skip_block_type, uint search_cost_penalty, uint search_cost_precision, __global short2 *count_motion_vector_buffer, __global short2 *predictors_buffer, __global short2 *skip_motion_vector_buffer, __global short2 *motion_vector_buffer, __global char *intra_search_predictor_modes, __global ushort *residuals, __global ushort *skip_residuals, __global ushort *intra_residuals, __read_only image2d_t intraSrcImg, int height, int width, int stride) { __local uint dstSearch[64]; // 8 GRFs __local uint dstSkipIntra[64 + 24]; // 11 GRFs (8 for inter, 3 for intra) __local ushort *distSearch = (__local ushort *)&dstSearch[8 * 5]; // distortion in the 6th GRF // Initialize the MV cost table: // MV Cost in U4U4 format: // No cost : 0, 0, 0, 0, 0, 0, 0, 0 // Low Cost : 1, 4, 5, 9, 10, 12, 14, 15 // Normal Cost: 5, 26, 29, 43, 45, 47, 57, 57 // High Cost : 29, 61, 72, 78, 88, 89, 91, 92 uint2 MVCostTable; if (search_cost_penalty == 1) { MVCostTable.s0 = 0x09050401; MVCostTable.s1 = 0x0F0E0C0A; } else if (search_cost_penalty == 2) { MVCostTable.s0 = 0x2B1D1A05; MVCostTable.s1 = 0x39392F2D; } else if (search_cost_penalty == 3) { MVCostTable.s0 = 0x4E483D1D; MVCostTable.s1 = 0x5C5B5958; } else { MVCostTable.s0 = 0; MVCostTable.s1 = 0; } uint MVCostPrecision = ((uint)search_cost_precision) << 16; // Frame is divided into rows * columns of MBs. // One h/w thread per WG. // One WG processes 'row' MBs - one row per iteration and one MB per row. // Number of WGs (or h/w threads) is number of columns MBs // Each iteration processes the MB in a row - gid_0 is the MB id in a row and // gid_1 is the row offset. int sid_0 = stride * get_group_id(0); int gid_0 = sid_0 / height; int gid_1 = sid_0 % height; for (int sid = sid_0; sid < sid_0 + stride && gid_0 < width && gid_1 < height; sid++, gid_0 = sid / height, gid_1 = sid % height) { int2 srcCoord; srcCoord.x = gid_0 * 16 + get_global_offset(0); // 16 pixels wide MBs (globally scalar) srcCoord.y = gid_1 * 16 + get_global_offset(1); // 16 pixels tall MBs (globally scalar) uint curMB = gid_0 + gid_1 * width; // current MB id short2 count = 0; if(count_motion_vector_buffer != NULL) count = count_motion_vector_buffer[curMB]; int countPredMVs = count.x; if (countPredMVs != 0) { uint offset = curMB * 8; // 8 predictors per MB offset += get_local_id(0) % 8; // 16 work-items access 8 MVs for MB // one predictor for MB per SIMD channel // Reduce predictors from Q-pixel to integer precision. int2 predMV = 0; if (get_local_id(0) < countPredMVs) { if(predictors_buffer != NULL){ predMV = convert_int2(predictors_buffer[offset]); // one MV per work-item predMV.x /= 4; predMV.y /= 4; predMV.y &= 0xFFFE;} } // Do up to 8 IMEs, get the best MVs and their distortions, and optionally // a FBR of the best MVs. // Finally the results are written out to SLM. intel_work_group_vme_mb_multi_query_8( dstSearch, // best search MV and its distortions into SLM countPredMVs, // count of predictor MVs (globally scalar - value range // 1 to 8) MVCostPrecision, // MV cost precision MVCostTable, // MV cost table srcCoord, // MB 2-D offset (globally scalar) predMV, // predictor MVs (up to 8 distinct MVs for SIMD16 thread) srcImg, // source refImg, // reference accelerator); // vme object } int doIntra = (flags & 0x2) != 0; int intraEdges = 0; if (doIntra) { // Enable all edges by default. intraEdges = 0x3C; // If this is a left-edge MB, then disable left edges. if ((gid_0 == 0) & (get_global_offset(0) == 0)) { intraEdges &= 0x18; } // If this is a right edge MB then disable right edges. if (gid_0 == width - 1) { intraEdges &= 0x34; } // If this is a top-edge MB, then disable top edges. if ((gid_1 == 0) & (get_global_offset(1) == 0)) { intraEdges &= 0x20; } // Set bit6=bit5. intraEdges |= ((intraEdges & 0x20) << 1); intraEdges <<= 8; } int countSkipMVs = count.y; if (countSkipMVs != 0 || doIntra == true) { uint offset = curMB * 8; // 8 sets of skip check MVs per MB offset += (get_local_id(0) % 8); // 16 work-items access 8 sets of MVs for MB // one set of skip MV per SIMD channel // Do up to 8 skip checks and get the distortions for each of them. // Finally the results are written out to SLM. if ((skip_block_type == 0x0) | ((doIntra) & (countSkipMVs == 0))) { int skipMVs = 0; if (get_local_id(0) < countSkipMVs) { if(skip_motion_vector_buffer != NULL ) { __global int *skip1_motion_vector_buffer = (__global int *)skip_motion_vector_buffer; skipMVs = skip1_motion_vector_buffer[offset]; } // one packed MV for one // work-item } intel_work_group_vme_mb_multi_check_16x16( dstSkipIntra, // distortions into SLM countSkipMVs, // count of skip check MVs (value range 0 to 8) doIntra, // compute intra modes intraEdges, // intra edges to use srcCoord, // MB 2-D offset (globally scalar) skipMVs, // skip check MVs (up to 8 sets of skip check MVs for // SIMD16 thread) srcImg, // source refImg, // reference intraSrcImg, // intra source accelerator); } if ((skip_block_type == 0x1) & (countSkipMVs > 0)) { int4 skipMVs = 0; if (get_local_id(0) < countSkipMVs) { if(skip_motion_vector_buffer != NULL){ __global int4 *skip4_motion_vector_buffer = (__global int4 *)(skip_motion_vector_buffer); skipMVs = skip4_motion_vector_buffer[offset]; } // four component MVs // per work-item } intel_work_group_vme_mb_multi_check_8x8( dstSkipIntra, // distortions into SLM countSkipMVs, // count of skip check MVs per MB (value range 0 to 8) doIntra, // compute intra modes intraEdges, // intra edges to use srcCoord, // MB 2-D offset (globally scalar) skipMVs, // skip check MVs (up to 8 ets of skip check MVs for SIMD16 // thread) srcImg, // source refImg, // reference intraSrcImg, // intra source accelerator); } } barrier(CLK_LOCAL_MEM_FENCE); // Write Out motion estimation result: // Result format // Hierarchical row-major layout // i.e. row-major of blocks MVs in MBs, and row-major of 8 sets of // MVs/distortion in blocks if (countPredMVs != 0) { // 4x4 if (intel_get_accelerator_mb_block_type(accelerator) == 0x2) { int index = (gid_0 * 16 + get_local_id(0)) + (gid_1 * 16 * width); // 1. 16 work-items enabled. // 2. Work-items gather fwd MVs in strided dword locations 0, 2, .., 30 // (interleaved // fwd/bdw MVs) with constant offset 8 (control data size) from SLM // into contiguous // short2 locations 0, 1, .., 15 of global buffer // search_motion_vector_buffer with // offset index. // 3. Work-items gather contiguous ushort locations 0, 1, .., 15 from // distSearch into // contiguous ushort locations 0, 1, .., 15 of search_residuals with // offset index. short2 val = as_short2(dstSearch[8 + get_local_id(0) * 2]); if(motion_vector_buffer != NULL) motion_vector_buffer[index] = val; if (residuals != NULL) { residuals[index] = distSearch[get_local_id(0)]; } } // 8x8 else if (intel_get_accelerator_mb_block_type(accelerator) == 0x1) { // Only 1st 4 work-item are needed. if (get_local_id(0) < 4) { int index = (gid_0 * 4 + get_local_id(0)) + (gid_1 * 4 * width); // 1. 4 work-items enabled. // 2. Work-items gather fw MVs in strided dword locations 0, 8, 16, 24 // (interleaved // fwd/bdw MVs) with constant offset 8 from SLM into contiguous // short2 locations // 0, 1, .., 15 of global buffer search_motion_vector_buffer with // offset index. // 3. Work-items gather strided ushort locations 0, 4, 8, 12 from // distSearch into // contiguous ushort locations 0, 1, .., 15 of search_residuals // with offset index. short2 val = as_short2(dstSearch[8 + get_local_id(0) * 4 * 2]); if(motion_vector_buffer != NULL) motion_vector_buffer[index] = val; if (residuals != NULL) { residuals[index] = distSearch[get_local_id(0) * 4]; } } } // 16x16 else if (intel_get_accelerator_mb_block_type(accelerator) == 0x0) { // One 1st work is needed. if (get_local_id(0) == 0) { int index = gid_0 + gid_1 * width; // 1. 1 work-item enabled. // 2. Work-item gathers fwd MV in dword location 0 with constant // offset 8 from // SLM into short2 locations 0 of global buffer // search_motion_vector_buffer. // 3. Work-item gathers ushort location 0 from distSearch into ushort // location 0 of search_residuals with offset index. short2 val = as_short2(dstSearch[8]); if(motion_vector_buffer != NULL) motion_vector_buffer[index] = val; if (residuals != NULL) { residuals[index] = distSearch[0]; } } } } // Write out motion skip check result: // Result format // Hierarchical row-major layout // i.e. row-major of blocks in MBs, and row-major of 8 sets of // distortions in blocks if (countSkipMVs != 0) { if (skip_block_type == 0x0) { // Copy out 8 (1 component) sets of distortion values. int index = (gid_0 * 8) + (get_local_id(0)) + (gid_1 * 8 * width); if (get_local_id(0) < countSkipMVs) { __local ushort *distSkip = (__local ushort *)&dstSkipIntra[0]; // 1. Up to 8 work-items are enabled. // 2. The work-item gathers distSkip locations 0, 16*1, .., 16*7 and // copies them to contiguous skip_residual locations 0, 1, 2, .., // 7. if(skip_residuals != NULL) skip_residuals[index] = distSkip[get_local_id(0) * 16]; } } else { // Copy out 8 (4 component) sets of distortion values. int index = (gid_0 * 8 * 4) + (get_local_id(0)) + (gid_1 * 8 * 4 * width); __local ushort *distSkip = (__local ushort *)&dstSkipIntra[0]; if (get_local_id(0) < countSkipMVs * 4) { // 1. Up to 16 work-items are enabled. // 2. The work-item gathers distSkip locations 0, 4*1, .., 4*31 and // copies them to contiguous skip_residual locations 0, 1, 2, .., // 31. if(skip_residuals != NULL){ skip_residuals[index] = distSkip[get_local_id(0) * 4]; skip_residuals[index + 16] = distSkip[(get_local_id(0) + 16) * 4];} } } } // Write out intra search result: if (doIntra) { int index_low = (gid_0 * 22) + (get_local_id(0) * 2) + (gid_1 * 22 * width); int index_high = (gid_0 * 22) + (get_local_id(0) * 2) + 1 + (gid_1 * 22 * width); // Write out the 4x4 intra modes if (get_local_id(0) < 8) { __local char *dstIntra_4x4 = (__local char *)(&dstSkipIntra[64 + 16 + 4]); char value = dstIntra_4x4[get_local_id(0)]; char value_low = (value)&0xf; char value_high = (value >> 4) & 0xf; if(intra_search_predictor_modes != NULL){ intra_search_predictor_modes[index_low + 5] = value_low; intra_search_predictor_modes[index_high + 5] = value_high;} } // Write out the 8x8 intra modes if (get_local_id(0) < 4) { __local char *dstIntra_8x8 = (__local char *)(&dstSkipIntra[64 + 8 + 4]); char value = dstIntra_8x8[get_local_id(0) * 2]; char value_low = (value)&0xf; int index = (gid_0 * 22) + (get_local_id(0)) + (gid_1 * 22 * width); if(intra_search_predictor_modes != NULL) intra_search_predictor_modes[index + 1] = value_low; } // Write out the 16x16 intra modes if (get_local_id(0) < 1) { __local char *dstIntra_16x16 = (__local char *)(&dstSkipIntra[64 + 0 + 4]); char value = dstIntra_16x16[get_local_id(0)]; char value_low = (value)&0xf; if(intra_search_predictor_modes != NULL) intra_search_predictor_modes[index_low] = value_low; } // Get the intra residuals. if (intra_residuals != NULL) { int index = (gid_0 * 4) + (gid_1 * 4 * width); if (get_local_id(0) < 1) { __local ushort *distIntra_4x4 = (__local ushort *)(&dstSkipIntra[64 + 16 + 3]); __local ushort *distIntra_8x8 = (__local ushort *)(&dstSkipIntra[64 + 8 + 3]); __local ushort *distIntra_16x16 = (__local ushort *)(&dstSkipIntra[64 + 0 + 3]); intra_residuals[index + 2] = distIntra_4x4[0]; intra_residuals[index + 1] = distIntra_8x8[0]; intra_residuals[index + 0] = distIntra_16x16[0]; } } } } } )===" vme_block_advanced_motion_estimate_check_intel_frontend.builtin_kernel000066400000000000000000000013251363734646600417370ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/built_ins/kernels/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel __attribute__((reqd_work_group_size(16, 1, 1))) void block_advanced_motion_estimate_check_intel( sampler_t accelerator, __read_only image2d_t srcImg, __read_only image2d_t refImg, uint flags, uint skip_block_type, uint search_cost_penalty, uint search_cost_precision, __global short2 *count_motion_vector_buffer, __global short2 *predictors_buffer, __global short2 *skip_motion_vector_buffer, __global short2 *motion_vector_buffer, __global char *intra_search_predictor_modes, __global ushort *residuals, __global ushort *skip_residuals, __global ushort *intra_residuals) { } )===" vme_block_motion_estimate_intel.builtin_kernel000066400000000000000000000054461363734646600350660ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/built_ins/kernels/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel __attribute__((reqd_work_group_size(16, 1, 1))) void block_motion_estimate_intel(sampler_t accelerator, __read_only image2d_t srcImg, __read_only image2d_t refImg, __global short2 *prediction_motion_vector_buffer, __global short2 *motion_vector_buffer, __global ushort *residuals, int height, int width, int stride) { __local uint dst[64]; __local ushort *dist = (__local ushort *)&dst[8 * 5]; int sid_0 = stride * get_group_id(0); int gid_0 = sid_0 / height; int gid_1 = sid_0 % height; for (int sid = sid_0; sid < sid_0 + stride && gid_0 < width && gid_1 < height; sid++, gid_0 = sid / height, gid_1 = sid % height) { int2 srcCoord = 0; int2 refCoord = 0; srcCoord.x = gid_0 * 16 + get_global_offset(0); srcCoord.y = gid_1 * 16 + get_global_offset(1); short2 predMV = 0; #ifndef HW_NULL_CHECK if (prediction_motion_vector_buffer != NULL) #endif { predMV = prediction_motion_vector_buffer[gid_0 + gid_1 * width]; refCoord.x = predMV.x / 4; refCoord.y = predMV.y / 4; refCoord.y = refCoord.y & 0xFFFE; } { intel_work_group_vme_mb_query(dst, srcCoord, refCoord, srcImg, refImg, accelerator); } barrier(CLK_LOCAL_MEM_FENCE); // Write Out Result // 4x4 if (intel_get_accelerator_mb_block_type(accelerator) == 0x2) { int x = get_local_id(0) % 4; int y = get_local_id(0) / 4; int index = (gid_0 * 4 + x) + (gid_1 * 4 + y) * width * 4; short2 val = as_short2(dst[8 + (y * 4 + x) * 2]); motion_vector_buffer[index] = val; #ifndef HW_NULL_CHECK if (residuals != NULL) #endif { residuals[index] = dist[y * 4 + x]; } } // 8x8 if (intel_get_accelerator_mb_block_type(accelerator) == 0x1) { if (get_local_id(0) < 4) { int x = get_local_id(0) % 2; int y = get_local_id(0) / 2; int index = (gid_0 * 2 + x) + (gid_1 * 2 + y) * width * 2; short2 val = as_short2(dst[8 + (y * 2 + x) * 8]); motion_vector_buffer[index] = val; #ifndef HW_NULL_CHECK if (residuals != NULL) #endif { residuals[index] = dist[(y * 2 + x) * 4]; } } } // 16x16 if (intel_get_accelerator_mb_block_type(accelerator) == 0x0) { if (get_local_id(0) == 0) { int index = gid_0 + gid_1 * width; short2 val = as_short2(dst[8]); motion_vector_buffer[index] = val; #ifndef HW_NULL_CHECK if (residuals != NULL) #endif { residuals[index] = dist[0]; } } } } } )===" vme_block_motion_estimate_intel_frontend.builtin_kernel000066400000000000000000000007761363734646600367660ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/built_ins/kernels/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel __attribute__((reqd_work_group_size(16, 1, 1))) void block_motion_estimate_intel(sampler_t accelerator, __read_only image2d_t srcImg, __read_only image2d_t refImg, __global short2 *prediction_motion_vector_buffer, __global short2 *motion_vector_buffer, __global ushort *residuals) { } )===" compute-runtime-20.13.16352/opencl/source/built_ins/populate_built_ins.inl000066400000000000000000000011551363734646600265340ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ namespace NEO { template void BuiltinDispatchInfoBuilder::populate(Device &device, EBuiltInOps::Type op, const char *options, KernelsDescArgsT &&... desc) { auto src = kernelsLib.getBuiltinsLib().getBuiltinCode(op, BuiltinCode::ECodeType::Any, device); prog.reset(BuiltinsLib::createProgramFromCode(src, device).release()); prog->build(0, nullptr, options, nullptr, nullptr, kernelsLib.isCacheingEnabled()); grabKernels(std::forward(desc)...); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/built_ins/registry/000077500000000000000000000000001363734646600237755ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/built_ins/registry/CMakeLists.txt000066400000000000000000000006761363734646600265460ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # add_library(${BUILTINS_VME_LIB_NAME} OBJECT EXCLUDE_FROM_ALL CMakeLists.txt register_ext_vme_source.cpp ) set_target_properties(${BUILTINS_VME_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(${BUILTINS_VME_LIB_NAME} PROPERTIES FOLDER "built_ins") target_include_directories(${BUILTINS_VME_LIB_NAME} PRIVATE ${KHRONOS_HEADERS_DIR} ) compute-runtime-20.13.16352/opencl/source/built_ins/registry/register_ext_vme_source.cpp000066400000000000000000000025451363734646600314420ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/registry/built_ins_registry.h" #include "opencl/source/built_ins/built_in_ops_vme.h" #include namespace NEO { static RegisterEmbeddedResource registerVmeSrc( createBuiltinResourceName( EBuiltInOps::VmeBlockMotionEstimateIntel, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "opencl/source/built_ins/kernels/vme_block_motion_estimate_intel.builtin_kernel" )); static RegisterEmbeddedResource registerVmeAdvancedSrc( createBuiltinResourceName( EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "opencl/source/built_ins/kernels/vme_block_advanced_motion_estimate_check_intel.builtin_kernel" )); static RegisterEmbeddedResource registerVmeAdvancedBidirectionalSrc( createBuiltinResourceName( EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "opencl/source/built_ins/kernels/vme_block_advanced_motion_estimate_bidirectional_check_intel.builtin_kernel" )); } // namespace NEO compute-runtime-20.13.16352/opencl/source/built_ins/unknown_built_in_name.cpp000066400000000000000000000010261363734646600272140ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" namespace NEO { const char *getUnknownBuiltinAsString(EBuiltInOps::Type builtin) { return "unknown"; } BuiltinDispatchInfoBuilder &BuiltInDispatchBuilderOp::getUnknownDispatchInfoBuilder(EBuiltInOps::Type operation, Device &device) { throw std::runtime_error("getBuiltinDispatchInfoBuilder failed"); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/built_ins/vme_builtin.cpp000066400000000000000000000122151363734646600251470ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/built_ins/vme_builtin.h" #include "shared/source/built_ins/built_ins.h" #include "shared/source/device/device.h" #include "opencl/source/built_ins/built_in_ops_vme.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/built_ins/populate_built_ins.inl" #include "opencl/source/built_ins/vme_dispatch_builder.h" #include "opencl/source/program/program.h" namespace NEO { static const char *blockMotionEstimateIntelSrc = { #include "kernels/vme_block_motion_estimate_intel_frontend.builtin_kernel" }; static const char *blockAdvancedMotionEstimateCheckIntelSrc = { #include "kernels/vme_block_advanced_motion_estimate_check_intel_frontend.builtin_kernel" }; static const char *blockAdvancedMotionEstimateBidirectionalCheckIntelSrc = { #include "kernels/vme_block_advanced_motion_estimate_bidirectional_check_intel_frontend.builtin_kernel" }; static const std::tuple mediaBuiltIns[] = { std::make_tuple("block_motion_estimate_intel", blockMotionEstimateIntelSrc), std::make_tuple("block_advanced_motion_estimate_check_intel", blockAdvancedMotionEstimateCheckIntelSrc), std::make_tuple("block_advanced_motion_estimate_bidirectional_check_intel", blockAdvancedMotionEstimateBidirectionalCheckIntelSrc), }; // Unlike other built-ins media kernels are not stored in BuiltIns object. // Pointer to program with built in kernels is returned to the user through API // call and user is responsible for releasing it by calling clReleaseProgram. Program *Vme::createBuiltInProgram( Context &context, Device &device, const char *kernelNames, int &errcodeRet) { std::string programSourceStr = ""; std::istringstream ss(kernelNames); std::string currentKernelName; while (std::getline(ss, currentKernelName, ';')) { bool found = false; for (auto &builtInTuple : mediaBuiltIns) { if (currentKernelName == std::get<0>(builtInTuple)) { programSourceStr += std::get<1>(builtInTuple); found = true; break; } } if (!found) { errcodeRet = CL_INVALID_VALUE; return nullptr; } } if (programSourceStr.empty() == true) { errcodeRet = CL_INVALID_VALUE; return nullptr; } Program *pBuiltInProgram = nullptr; pBuiltInProgram = Program::create(programSourceStr.c_str(), &context, device, true, nullptr); if (pBuiltInProgram) { std::unordered_map builtinsBuilders; builtinsBuilders["block_motion_estimate_intel"] = &Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockMotionEstimateIntel, device); builtinsBuilders["block_advanced_motion_estimate_check_intel"] = &Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel, device); builtinsBuilders["block_advanced_motion_estimate_bidirectional_check_intel"] = &Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel, device); errcodeRet = pBuiltInProgram->build(&device, mediaKernelsBuildOptions, true, builtinsBuilders); } else { errcodeRet = CL_INVALID_VALUE; } return pBuiltInProgram; } const char *getAdditionalBuiltinAsString(EBuiltInOps::Type builtin) { switch (builtin) { default: return nullptr; case EBuiltInOps::VmeBlockMotionEstimateIntel: return "vme_block_motion_estimate_intel.builtin_kernel"; case EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel: return "vme_block_advanced_motion_estimate_check_intel.builtin_kernel"; case EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel: return "vme_block_advanced_motion_estimate_bidirectional_check_intel"; } } BuiltinDispatchInfoBuilder &Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::Type operation, Device &device) { auto &builtins = *device.getBuiltIns(); uint32_t operationId = static_cast(operation); auto &operationBuilder = builtins.BuiltinOpsBuilders[operationId]; switch (operation) { default: return BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(operation, device); case EBuiltInOps::VmeBlockMotionEstimateIntel: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(builtins, device); }); break; case EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(builtins, device); }); break; case EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel: std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique>(builtins, device); }); break; } return *operationBuilder.first; } } // namespace NEOcompute-runtime-20.13.16352/opencl/source/built_ins/vme_builtin.h000066400000000000000000000010441363734646600246120ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/built_ins/built_in_ops_vme.h" namespace NEO { class Program; class Device; class Context; class BuiltIns; class BuiltinDispatchInfoBuilder; namespace Vme { Program *createBuiltInProgram( Context &context, Device &device, const char *kernelNames, int &errcodeRet); BuiltinDispatchInfoBuilder &getBuiltinDispatchInfoBuilder(EBuiltInOps::Type operation, Device &device); } // namespace Vme } // namespace NEOcompute-runtime-20.13.16352/opencl/source/built_ins/vme_dispatch_builder.h000066400000000000000000000520671363734646600264640ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "opencl/source/accelerators/intel_accelerator.h" #include "opencl/source/accelerators/intel_motion_estimation.h" #include "opencl/source/built_ins/built_in_ops_vme.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/helpers/dispatch_info_builder.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" namespace NEO { class VmeBuiltinDispatchInfoBuilder : public BuiltinDispatchInfoBuilder { public: VmeBuiltinDispatchInfoBuilder(BuiltIns &kernelsLib, Device &device, EBuiltInOps::Type builtinOp, const char *kernelName) : BuiltinDispatchInfoBuilder(kernelsLib) { populate(device, builtinOp, mediaKernelsBuildOptions, kernelName, vmeKernel); widthArgNum = vmeKernel->getKernelInfo().getArgNumByName("width"); heightArgNum = vmeKernel->getKernelInfo().getArgNumByName("height"); strideArgNum = vmeKernel->getKernelInfo().getArgNumByName("stride"); acceleratorArgNum = vmeKernel->getKernelInfo().getArgNumByName("accelerator"); srcImgArgNum = vmeKernel->getKernelInfo().getArgNumByName("srcImg"); refImgArgNum = vmeKernel->getKernelInfo().getArgNumByName("refImg"); motionVectorBufferArgNum = vmeKernel->getKernelInfo().getArgNumByName("motion_vector_buffer"); predictionMotionVectorBufferArgNum = vmeKernel->getKernelInfo().getArgNumByName("prediction_motion_vector_buffer"); residualsArgNum = vmeKernel->getKernelInfo().getArgNumByName("residuals"); } void getBlkTraits(const Vec3 &inGws, size_t &gwWidthInBlk, size_t &gwHeightInBlk) const { const size_t vmeMacroBlockWidth = 16; const size_t vmeMacroBlockHeight = 16; gwWidthInBlk = Math::divideAndRoundUp(inGws.x, vmeMacroBlockWidth); gwHeightInBlk = Math::divideAndRoundUp(inGws.y, vmeMacroBlockHeight); } bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, Kernel *kern, const uint32_t inDim, const Vec3 &inGws, const Vec3 &inLws, const Vec3 &inOffset) const override { if (kern == nullptr) { return false; } size_t gwWidthInBlk = 0; size_t gwHeightInBlk = 0; getBlkTraits(inGws, gwWidthInBlk, gwHeightInBlk); cl_int height = (cl_int)gwHeightInBlk; cl_int width = (cl_int)gwWidthInBlk; cl_int stride = height; size_t numThreadsX = gwWidthInBlk; const size_t simdWidth = vmeKernel->getKernelInfo().getMaxSimdSize(); stride = static_cast(Math::divideAndRoundUp(height * width, numThreadsX)); // update implicit args vmeKernel->setArg(heightArgNum, sizeof(height), &height); vmeKernel->setArg(widthArgNum, sizeof(width), &width); vmeKernel->setArg(strideArgNum, sizeof(stride), &stride); // Update global work size to force macro-block to HW thread execution model Vec3 gws = {numThreadsX * simdWidth, 1, 1}; Vec3 lws = {vmeKernel->getKernelInfo().reqdWorkGroupSize[0], 1, 1}; DispatchInfoBuilder builder; builder.setDispatchGeometry(gws, lws, inOffset, gws, lws); builder.setKernel(vmeKernel); builder.bake(multiDispatchInfo); return true; } bool setExplicitArg(uint32_t argIndex, size_t argSize, const void *argVal, cl_int &err) const override { DEBUG_BREAK_IF(!((argIndex != widthArgNum) && (argIndex != heightArgNum) && (argIndex != strideArgNum))); if ((argIndex == acceleratorArgNum) && (argVal == nullptr)) { err = CL_INVALID_ACCELERATOR_INTEL; return false; } err = vmeKernel->setArg(argIndex, argSize, argVal); return false; } cl_int validateDispatch(Kernel *kernel, uint32_t inworkDim, const Vec3 &inGws, const Vec3 &inLws, const Vec3 &inOffset) const override { if (inworkDim != 2) { return CL_INVALID_WORK_DIMENSION; } size_t gwWidthInBlk = 0; size_t gwHeightInBlk = 0; getBlkTraits(inGws, gwWidthInBlk, gwHeightInBlk); size_t BlkNum = gwWidthInBlk * gwHeightInBlk; size_t BlkMul = 1; IntelAccelerator *accelerator = castToObject((cl_accelerator_intel)vmeKernel->getKernelArg(acceleratorArgNum)); if (accelerator == nullptr) { return CL_INVALID_KERNEL_ARGS; // accelerator was not set } DEBUG_BREAK_IF(accelerator->getDescriptorSize() != sizeof(cl_motion_estimation_desc_intel)); const cl_motion_estimation_desc_intel *acceleratorDesc = reinterpret_cast(accelerator->getDescriptor()); switch (acceleratorDesc->mb_block_type) { case CL_ME_MB_TYPE_8x8_INTEL: BlkMul = 4; break; case CL_ME_MB_TYPE_4x4_INTEL: BlkMul = 16; break; default: break; } return validateVmeDispatch(inGws, inOffset, BlkNum, BlkMul); } // notes on corner cases : // * if arg not available in kernels - returns true // * if arg set to nullptr - returns true bool validateBufferSize(int32_t bufferArgNum, size_t minimumSizeExpected) const { if (bufferArgNum == -1) { return true; } auto buff = castToObject((cl_mem)vmeKernel->getKernelArg(bufferArgNum)); if (buff == nullptr) { return true; } size_t bufferSize = buff->getSize(); if (bufferSize < minimumSizeExpected) { return false; } return true; } template bool validateEnumVal(EnumBaseType val) const { return false; } template bool validateEnumVal(EnumBaseType val, ExpectedValType expectedVal, ExpectedValsTypes... expVals) const { return (val == static_cast(expectedVal)) || validateEnumVal(val, expVals...); } // notes on corner cases : // * if arg not available in kernels - returns true template bool validateEnumArg(int32_t argNum, ExpectedValsTypes... expVals) const { if (argNum == -1) { return true; } EnumBaseType val = this->getKernelArgByValValue(static_cast(argNum)); return validateEnumVal(val, expVals...); } template RetType getKernelArgByValValue(uint32_t argNum) const { auto &kai = vmeKernel->getKernelInfo().kernelArgInfo[argNum]; DEBUG_BREAK_IF(kai.kernelArgPatchInfoVector.size() != 1); const KernelArgPatchInfo &patchInfo = kai.kernelArgPatchInfoVector[0]; DEBUG_BREAK_IF(sizeof(RetType) > patchInfo.size); return *(RetType *)(vmeKernel->getCrossThreadData() + patchInfo.crossthreadOffset); } cl_int validateImages(Vec3 inputRegion, Vec3 offset) const { Image *srcImg = castToObject((cl_mem)vmeKernel->getKernelArg(srcImgArgNum)); Image *refImg = castToObject((cl_mem)vmeKernel->getKernelArg(refImgArgNum)); if ((srcImg == nullptr) || (refImg == nullptr)) { return CL_INVALID_KERNEL_ARGS; } for (Image *img : {srcImg, refImg}) { const cl_image_format &imgFormat = img->getImageFormat(); if ((imgFormat.image_channel_order != CL_R) || (imgFormat.image_channel_data_type != CL_UNORM_INT8)) { return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; } if (false == img->isTiledAllocation()) { //VME only works with tiled images. return CL_OUT_OF_RESOURCES; } } { const cl_image_desc &srcImgDesc = srcImg->getImageDesc(); size_t srcImageWidth = srcImgDesc.image_width; size_t srcImageHeight = srcImgDesc.image_height; if (((inputRegion.x + offset.x) > srcImageWidth) || ((inputRegion.y + offset.y) > srcImageHeight)) { return CL_INVALID_IMAGE_SIZE; } } return CL_SUCCESS; } virtual cl_int validateVmeDispatch(Vec3 inputRegion, Vec3 offset, size_t blkNum, size_t blkMul) const { { cl_int imageValidationStatus = validateImages(inputRegion, offset); if (imageValidationStatus != CL_SUCCESS) { return imageValidationStatus; } } size_t numPredictors = 1; std::pair bufferRequirements[] = { std::make_pair(motionVectorBufferArgNum, (blkNum * blkMul * 2 * sizeof(cl_short))), std::make_pair(predictionMotionVectorBufferArgNum, (blkNum * numPredictors * 2 * sizeof(cl_short))), std::make_pair(residualsArgNum, (blkNum * blkMul * sizeof(cl_ushort)))}; for (const auto &req : bufferRequirements) { if (false == validateBufferSize(req.first, req.second)) { return CL_INVALID_BUFFER_SIZE; } } return CL_SUCCESS; } protected: uint32_t heightArgNum; uint32_t widthArgNum; uint32_t strideArgNum; uint32_t acceleratorArgNum; uint32_t srcImgArgNum; uint32_t refImgArgNum; int32_t motionVectorBufferArgNum; int32_t predictionMotionVectorBufferArgNum; int32_t residualsArgNum; Kernel *vmeKernel; }; template <> class BuiltInOp : public VmeBuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, Device &device) : VmeBuiltinDispatchInfoBuilder(kernelsLib, device, EBuiltInOps::VmeBlockMotionEstimateIntel, "block_motion_estimate_intel") { } }; class AdvancedVmeBuiltinDispatchInfoBuilder : public VmeBuiltinDispatchInfoBuilder { public: AdvancedVmeBuiltinDispatchInfoBuilder(BuiltIns &kernelsLib, Device &device, EBuiltInOps::Type builtinOp, const char *kernelName) : VmeBuiltinDispatchInfoBuilder(kernelsLib, device, builtinOp, kernelName) { flagsArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("flags"); intraSrcImgArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("intraSrcImg"); skipBlockTypeArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("skip_block_type"); searchCostPenaltyArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("search_cost_penalty"); searchCostPrecisionArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("search_cost_precision"); bidirWeightArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("bidir_weight"); predictorsBufferArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("predictors_buffer"); countMotionVectorBufferArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("count_motion_vector_buffer"); skipMotionVectorBufferArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("skip_motion_vector_buffer"); intraSearchPredictorModesArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("intra_search_predictor_modes"); skipResidualsArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("skip_residuals"); intraResidualsArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("intra_residuals"); } bool setExplicitArg(uint32_t argIndex, size_t argSize, const void *argVal, cl_int &err) const override { DEBUG_BREAK_IF(argIndex == intraSrcImgArgNum); if (argIndex == this->srcImgArgNum) { // rebind also as media block image this->vmeKernel->setArg(intraSrcImgArgNum, argSize, argVal); } return VmeBuiltinDispatchInfoBuilder::setExplicitArg(argIndex, argSize, argVal, err); } virtual bool isBidirKernel() const { return false; } bool validateFlags(uint32_t &outSkipBlockType) const { uint32_t flagsVal = VmeBuiltinDispatchInfoBuilder::template getKernelArgByValValue(flagsArgNum); if ((flagsVal & CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL) == CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL) { return false; } if (flagsVal == CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL) { outSkipBlockType = CL_ME_MB_TYPE_16x16_INTEL; } else if ((flagsVal & CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL) == CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL) { outSkipBlockType = CL_ME_MB_TYPE_8x8_INTEL; } return true; } bool validateSkipBlockTypeArg(uint32_t &outSkipBlockType) const { if (skipBlockTypeArgNum == -1) { return true; } outSkipBlockType = VmeBuiltinDispatchInfoBuilder::template getKernelArgByValValue(static_cast(skipBlockTypeArgNum)); switch (outSkipBlockType) { case CL_ME_MB_TYPE_16x16_INTEL: break; case CL_ME_MB_TYPE_8x8_INTEL: break; default: return false; ; } return true; } size_t getIntraSearchPredictorModesBuffExpSize(size_t blkNum) const { // vector size is 22 - 1 (16x16 luma block) + 4 (8x8 luma block) + 16 (4x4 luma block) + 1 (8x8 chroma block) int VectorSize = 22; size_t intraSearchPredictorModesBuffExpSize = blkNum * VectorSize; return intraSearchPredictorModesBuffExpSize; } size_t getSkipMotionVectorBufferExpSize(uint32_t skipBlockType, size_t blkNum) const { // vector size is either 1 (16x16 block) or 4 (8x8 block) // 0 to 8 skip MVs per MB // may be null if all MBs in frame have 0 skip check MVs in which case VME skip checks are not performed // layout assumes 4 (for bidir) or 8 (otherwise) skip check MVs per MB // row-major block layout; all MVs for a block are contiguous // buffer size depends on the block and frame size . int vectorSize = (skipBlockType == CL_ME_MB_TYPE_16x16_INTEL) ? 1 : 4; int numChecks = (isBidirKernel() ? 4 : 8); size_t skipMotionVectorBufferExpSize = blkNum * numChecks * vectorSize * 2 * sizeof(cl_short); return skipMotionVectorBufferExpSize; } size_t getSkipResidualsBuffExpSize(uint32_t skipBlockType, size_t blkNum) const { /* output buffer of vectors of unsigned short SAD adjusted values corresponding to the input skip check MVs may be null if skip_motion_vector_buffer is null vector size is either 1 (16x16 block) or 4 (8x8 block) 0 to 8 skip check residuals per MB layout always assumes 8 skip check residuals per MB row major block layout; all MVs for a block are contiguous buffer size depends on the block and frame size */ int vectorSize = 1; switch (skipBlockType) { case CL_ME_MB_TYPE_16x16_INTEL: vectorSize = 1; break; case CL_ME_MB_TYPE_8x8_INTEL: vectorSize = 4; break; default: break; }; int numChecks = (isBidirKernel() ? 4 : 8); size_t skipResidualsBuffExpSize = blkNum * vectorSize * numChecks * sizeof(cl_ushort); return skipResidualsBuffExpSize; } size_t getIntraResidualsBuffExpSize(size_t blkNum) const { /* output buffer of vectors of unsigned short SAD adjusted values may be null in which case the intra residuals corresponding not returned vector size is 4 - 1 (16x16 luma block) + 1 (8x8 luma block) + 1 (4x4 luma block) + 1 (8x8 chroma block) 1 vector per MB buffer size depends on the frame size */ int vectorSize = 4; size_t intraResidualsBuffExpSize = (blkNum * sizeof(cl_ushort) * vectorSize); return intraResidualsBuffExpSize; } size_t getPredictorsBufferExpSize(size_t blkNum) const { size_t numPredictors = 8; size_t predictorsBufferExpSize = (blkNum * numPredictors * 2 * sizeof(cl_short)); return predictorsBufferExpSize; } cl_int validateVmeDispatch(Vec3 inputRegion, Vec3 offset, size_t blkNum, size_t blkMul) const override { cl_int basicVmeValidationStatus = VmeBuiltinDispatchInfoBuilder::validateVmeDispatch(inputRegion, offset, blkNum, blkMul); if (basicVmeValidationStatus != CL_SUCCESS) { return basicVmeValidationStatus; } uint32_t skipBlockType = CL_ME_MB_TYPE_16x16_INTEL; if (false == validateFlags(skipBlockType)) { return CL_INVALID_KERNEL_ARGS; } if (false == validateSkipBlockTypeArg(skipBlockType)) { return CL_OUT_OF_RESOURCES; } if (false == VmeBuiltinDispatchInfoBuilder::template validateEnumArg(searchCostPenaltyArgNum, CL_ME_COST_PENALTY_NONE_INTEL, CL_ME_COST_PENALTY_LOW_INTEL, CL_ME_COST_PENALTY_NORMAL_INTEL, CL_ME_COST_PENALTY_HIGH_INTEL)) { return CL_OUT_OF_RESOURCES; } if (false == VmeBuiltinDispatchInfoBuilder::template validateEnumArg(searchCostPrecisionArgNum, CL_ME_COST_PRECISION_QPEL_INTEL, CL_ME_COST_PRECISION_HPEL_INTEL, CL_ME_COST_PRECISION_PEL_INTEL, CL_ME_COST_PRECISION_DPEL_INTEL)) { return CL_OUT_OF_RESOURCES; } if (false == VmeBuiltinDispatchInfoBuilder::template validateEnumArg(bidirWeightArgNum, 0, CL_ME_BIDIR_WEIGHT_QUARTER_INTEL, CL_ME_BIDIR_WEIGHT_THIRD_INTEL, CL_ME_BIDIR_WEIGHT_HALF_INTEL, CL_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL, CL_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL)) { return CL_INVALID_KERNEL_ARGS; } std::pair bufferRequirements[] = { std::make_pair(countMotionVectorBufferArgNum, (blkNum * 2 * sizeof(cl_short))), std::make_pair(skipMotionVectorBufferArgNum, getSkipMotionVectorBufferExpSize(skipBlockType, blkNum)), std::make_pair(intraSearchPredictorModesArgNum, getIntraSearchPredictorModesBuffExpSize(blkNum)), std::make_pair(skipResidualsArgNum, getSkipResidualsBuffExpSize(skipBlockType, blkNum)), std::make_pair(intraResidualsArgNum, getIntraResidualsBuffExpSize(blkNum)), std::make_pair(predictorsBufferArgNum, getPredictorsBufferExpSize(blkNum))}; for (const auto &req : bufferRequirements) { if (false == this->validateBufferSize(req.first, req.second)) { return CL_INVALID_BUFFER_SIZE; } } return CL_SUCCESS; } protected: uint32_t flagsArgNum; int32_t skipBlockTypeArgNum; uint32_t searchCostPenaltyArgNum; uint32_t searchCostPrecisionArgNum; int32_t bidirWeightArgNum; int32_t predictorsBufferArgNum; uint32_t countMotionVectorBufferArgNum; uint32_t skipMotionVectorBufferArgNum; uint32_t intraSearchPredictorModesArgNum; uint32_t skipResidualsArgNum; uint32_t intraResidualsArgNum; uint32_t intraSrcImgArgNum; }; template <> class BuiltInOp : public AdvancedVmeBuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, Device &device) : AdvancedVmeBuiltinDispatchInfoBuilder(kernelsLib, device, EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel, "block_advanced_motion_estimate_check_intel") { } cl_int validateVmeDispatch(Vec3 inputRegion, Vec3 offset, size_t gwWidthInBlk, size_t gwHeightInBlk) const override { cl_int basicAdvVmeValidationStatus = AdvancedVmeBuiltinDispatchInfoBuilder::validateVmeDispatch(inputRegion, offset, gwWidthInBlk, gwHeightInBlk); if (basicAdvVmeValidationStatus != CL_SUCCESS) { return basicAdvVmeValidationStatus; } auto countMotionVectorBuff = castToObject((cl_mem)this->vmeKernel->getKernelArg(this->countMotionVectorBufferArgNum)); if (countMotionVectorBuff == nullptr) { return CL_INVALID_BUFFER_SIZE; } return CL_SUCCESS; } }; template <> class BuiltInOp : public AdvancedVmeBuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, Device &device) : AdvancedVmeBuiltinDispatchInfoBuilder(kernelsLib, device, EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel, "block_advanced_motion_estimate_bidirectional_check_intel") { } bool isBidirKernel() const override { return true; } }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/builtin_kernels_simulation/000077500000000000000000000000001363734646600255725ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/builtin_kernels_simulation/CMakeLists.txt000066400000000000000000000040671363734646600303410ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(BUILTIN_KERNELS_SIMULATION_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/opencl_c.cpp ${CMAKE_CURRENT_SOURCE_DIR}/opencl_c.h ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_simulation.cpp ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_simulation.inl ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_simulation.h ) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") string(REPLACE "/WX" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) string(REGEX REPLACE "-Werror[^ \t\n]*" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) string(REPLACE "-Wsometimes-uninitialized" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) string(REPLACE "-Wsign-compare" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) string(REPLACE "-Wunused-variable" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-everything") endif() if(COMPILER_SUPPORTS_CXX11) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") elseif(COMPILER_SUPPORTS_CXX0X) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x") endif() if(NOT MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fpermissive -fPIC") endif() macro(macro_for_each_gen) list(APPEND DEFAULT_GEN_PLATFORMS_DEFITIONS DEFAULT_${GEN_TYPE}_PLATFORM=${DEFAULT_SUPPORTED_${GEN_TYPE}_PLATFORM}) if(${SUPPORT_DEVICE_ENQUEUE_${GEN_TYPE}}) list(APPEND BUILTIN_KERNELS_SIMULATION_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/${GEN_TYPE_LOWER}/scheduler_simulation_${GEN_TYPE_LOWER}.cpp) endif() endmacro() apply_macro_for_each_gen("SUPPORTED") add_library(${BIKSIM_LIB_NAME} OBJECT EXCLUDE_FROM_ALL ${BUILTIN_KERNELS_SIMULATION_SRCS}) target_include_directories(${BIKSIM_LIB_NAME} BEFORE PRIVATE ${ENGINE_NODE_DIR} ${NEO__GMM_INCLUDE_DIR} ${KHRONOS_HEADERS_DIR} ${KHRONOS_GL_HEADERS_DIR} ${NEO__IGC_INCLUDE_DIR} ${THIRD_PARTY_DIR} ) set_target_properties(${BIKSIM_LIB_NAME} PROPERTIES FOLDER "built_ins") target_compile_definitions(${BIKSIM_LIB_NAME} PUBLIC ${SUPPORTED_GEN_FLAGS_DEFINITONS} ${DEFAULT_GEN_PLATFORMS_DEFITIONS} MOCKABLE_VIRTUAL=) compute-runtime-20.13.16352/opencl/source/builtin_kernels_simulation/gen11/000077500000000000000000000000001363734646600265055ustar00rootroot00000000000000scheduler_simulation_gen11.cpp000066400000000000000000000106171363734646600343540ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/builtin_kernels_simulation/gen11/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen11/hw_cmds.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/builtin_kernels_simulation/opencl_c.h" #include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h" #include "opencl/source/builtin_kernels_simulation/scheduler_simulation.inl" #include "opencl/source/execution_model/device_enqueue.h" #include "CL/cl.h" using namespace NEO; using namespace BuiltinKernelsSimulation; namespace Gen11SchedulerSimulation { #define SCHEDULER_EMULATION uint GetNextPowerof2(uint number); float __intel__getProfilingTimerResolution() { return static_cast(DEFAULT_GEN11_PLATFORM::hwInfo.capabilityTable.defaultProfilingTimerResolution); } #include "opencl/source/gen11/device_enqueue.h" #include "opencl/source/gen11/scheduler_builtin_kernel.inl" #include "opencl/source/scheduler/scheduler.cl" } // namespace Gen11SchedulerSimulation namespace BuiltinKernelsSimulation { template <> void SchedulerSimulation::startScheduler(uint32_t index, GraphicsAllocation *queue, GraphicsAllocation *commandsStack, GraphicsAllocation *eventsPool, GraphicsAllocation *secondaryBatchBuffer, GraphicsAllocation *dsh, GraphicsAllocation *reflectionSurface, GraphicsAllocation *queueStorageBuffer, GraphicsAllocation *ssh, GraphicsAllocation *debugQueue) { threadIDToLocalIDmap.insert(std::make_pair(std::this_thread::get_id(), index)); while (!conditionReady) { } Gen11SchedulerSimulation::SchedulerParallel20((IGIL_CommandQueue *)queue->getUnderlyingBuffer(), (uint *)commandsStack->getUnderlyingBuffer(), (IGIL_EventPool *)eventsPool->getUnderlyingBuffer(), (uint *)secondaryBatchBuffer->getUnderlyingBuffer(), (char *)dsh->getUnderlyingBuffer(), (IGIL_KernelDataHeader *)reflectionSurface->getUnderlyingBuffer(), (uint *)queueStorageBuffer->getUnderlyingBuffer(), (char *)ssh->getUnderlyingBuffer(), debugQueue != nullptr ? (DebugDataBuffer *)debugQueue->getUnderlyingBuffer() : nullptr); } template <> void SchedulerSimulation::patchGpGpuWalker(uint secondLevelBatchOffset, __global uint *secondaryBatchBuffer, uint interfaceDescriptorOffset, uint simdSize, uint totalLocalWorkSize, uint3 dimSize, uint3 startPoint, uint numberOfHwThreadsPerWg, uint indirectPayloadSize, uint ioHoffset) { Gen11SchedulerSimulation::patchGpGpuWalker(secondLevelBatchOffset, secondaryBatchBuffer, interfaceDescriptorOffset, simdSize, totalLocalWorkSize, dimSize, startPoint, numberOfHwThreadsPerWg, indirectPayloadSize, ioHoffset); } template class SchedulerSimulation; } // namespace BuiltinKernelsSimulation compute-runtime-20.13.16352/opencl/source/builtin_kernels_simulation/gen12lp/000077500000000000000000000000001363734646600270425ustar00rootroot00000000000000scheduler_simulation_gen12lp.cpp000066400000000000000000000110031363734646600352340ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/builtin_kernels_simulation/gen12lp/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_cmds.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/builtin_kernels_simulation/opencl_c.h" #include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h" #include "opencl/source/builtin_kernels_simulation/scheduler_simulation.inl" #include "opencl/source/execution_model/device_enqueue.h" #include "CL/cl.h" #include using namespace NEO; using namespace BuiltinKernelsSimulation; namespace Gen12LPSchedulerSimulation { #define SCHEDULER_EMULATION uint GetNextPowerof2(uint number); float __intel__getProfilingTimerResolution() { return static_cast(DEFAULT_GEN12LP_PLATFORM::hwInfo.capabilityTable.defaultProfilingTimerResolution); } #include "opencl/source/gen12lp/device_enqueue.h" #include "opencl/source/gen12lp/scheduler_builtin_kernel.inl" #include "opencl/source/scheduler/scheduler.cl" } // namespace Gen12LPSchedulerSimulation namespace BuiltinKernelsSimulation { template <> void SchedulerSimulation::startScheduler(uint32_t index, GraphicsAllocation *queue, GraphicsAllocation *commandsStack, GraphicsAllocation *eventsPool, GraphicsAllocation *secondaryBatchBuffer, GraphicsAllocation *dsh, GraphicsAllocation *reflectionSurface, GraphicsAllocation *queueStorageBuffer, GraphicsAllocation *ssh, GraphicsAllocation *debugQueue) { threadIDToLocalIDmap.insert(std::make_pair(std::this_thread::get_id(), index)); while (!conditionReady) { } Gen12LPSchedulerSimulation::SchedulerParallel20((IGIL_CommandQueue *)queue->getUnderlyingBuffer(), (uint *)commandsStack->getUnderlyingBuffer(), (IGIL_EventPool *)eventsPool->getUnderlyingBuffer(), (uint *)secondaryBatchBuffer->getUnderlyingBuffer(), (char *)dsh->getUnderlyingBuffer(), (IGIL_KernelDataHeader *)reflectionSurface->getUnderlyingBuffer(), (uint *)queueStorageBuffer->getUnderlyingBuffer(), (char *)ssh->getUnderlyingBuffer(), debugQueue != nullptr ? (DebugDataBuffer *)debugQueue->getUnderlyingBuffer() : nullptr); } template <> void SchedulerSimulation::patchGpGpuWalker(uint secondLevelBatchOffset, __global uint *secondaryBatchBuffer, uint interfaceDescriptorOffset, uint simdSize, uint totalLocalWorkSize, uint3 dimSize, uint3 startPoint, uint numberOfHwThreadsPerWg, uint indirectPayloadSize, uint ioHoffset) { Gen12LPSchedulerSimulation::patchGpGpuWalker(secondLevelBatchOffset, secondaryBatchBuffer, interfaceDescriptorOffset, simdSize, totalLocalWorkSize, dimSize, startPoint, numberOfHwThreadsPerWg, indirectPayloadSize, ioHoffset); } template class SchedulerSimulation; } // namespace BuiltinKernelsSimulation compute-runtime-20.13.16352/opencl/source/builtin_kernels_simulation/gen8/000077500000000000000000000000001363734646600264335ustar00rootroot00000000000000scheduler_simulation_gen8.cpp000066400000000000000000000105661363734646600342330ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/builtin_kernels_simulation/gen8/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen8/hw_cmds.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/builtin_kernels_simulation/opencl_c.h" #include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h" #include "opencl/source/builtin_kernels_simulation/scheduler_simulation.inl" #include "opencl/source/execution_model/device_enqueue.h" #include "CL/cl.h" using namespace NEO; using namespace BuiltinKernelsSimulation; namespace Gen8SchedulerSimulation { #define SCHEDULER_EMULATION uint GetNextPowerof2(uint number); float __intel__getProfilingTimerResolution() { return static_cast(DEFAULT_GEN8_PLATFORM::hwInfo.capabilityTable.defaultProfilingTimerResolution); } #include "opencl/source/gen8/device_enqueue.h" #include "opencl/source/gen8/scheduler_builtin_kernel.inl" #include "opencl/source/scheduler/scheduler.cl" } // namespace Gen8SchedulerSimulation namespace BuiltinKernelsSimulation { template <> void SchedulerSimulation::startScheduler(uint32_t index, GraphicsAllocation *queue, GraphicsAllocation *commandsStack, GraphicsAllocation *eventsPool, GraphicsAllocation *secondaryBatchBuffer, GraphicsAllocation *dsh, GraphicsAllocation *reflectionSurface, GraphicsAllocation *queueStorageBuffer, GraphicsAllocation *ssh, GraphicsAllocation *debugQueue) { threadIDToLocalIDmap.insert(std::make_pair(std::this_thread::get_id(), index)); while (!conditionReady) { } Gen8SchedulerSimulation::SchedulerParallel20((IGIL_CommandQueue *)queue->getUnderlyingBuffer(), (uint *)commandsStack->getUnderlyingBuffer(), (IGIL_EventPool *)eventsPool->getUnderlyingBuffer(), (uint *)secondaryBatchBuffer->getUnderlyingBuffer(), (char *)dsh->getUnderlyingBuffer(), (IGIL_KernelDataHeader *)reflectionSurface->getUnderlyingBuffer(), (uint *)queueStorageBuffer->getUnderlyingBuffer(), (char *)ssh->getUnderlyingBuffer(), debugQueue != nullptr ? (DebugDataBuffer *)debugQueue->getUnderlyingBuffer() : nullptr); } template <> void SchedulerSimulation::patchGpGpuWalker(uint secondLevelBatchOffset, __global uint *secondaryBatchBuffer, uint interfaceDescriptorOffset, uint simdSize, uint totalLocalWorkSize, uint3 dimSize, uint3 startPoint, uint numberOfHwThreadsPerWg, uint indirectPayloadSize, uint ioHoffset) { Gen8SchedulerSimulation::patchGpGpuWalker(secondLevelBatchOffset, secondaryBatchBuffer, interfaceDescriptorOffset, simdSize, totalLocalWorkSize, dimSize, startPoint, numberOfHwThreadsPerWg, indirectPayloadSize, ioHoffset); } template class SchedulerSimulation; } // namespace BuiltinKernelsSimulation compute-runtime-20.13.16352/opencl/source/builtin_kernels_simulation/gen9/000077500000000000000000000000001363734646600264345ustar00rootroot00000000000000scheduler_simulation_gen9.cpp000066400000000000000000000105641363734646600342330ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/builtin_kernels_simulation/gen9/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/builtin_kernels_simulation/opencl_c.h" #include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h" #include "opencl/source/builtin_kernels_simulation/scheduler_simulation.inl" #include "opencl/source/execution_model/device_enqueue.h" #include "CL/cl.h" using namespace NEO; using namespace BuiltinKernelsSimulation; namespace NEO { struct SKLFamily; } namespace Gen9SchedulerSimulation { #define SCHEDULER_EMULATION float __intel__getProfilingTimerResolution() { return static_cast(DEFAULT_GEN9_PLATFORM::hwInfo.capabilityTable.defaultProfilingTimerResolution); } #include "opencl/source/gen9/device_enqueue.h" #include "opencl/source/gen9/scheduler_builtin_kernel.inl" #include "opencl/source/scheduler/scheduler.cl" } // namespace Gen9SchedulerSimulation namespace BuiltinKernelsSimulation { template <> void SchedulerSimulation::startScheduler(uint32_t index, GraphicsAllocation *queue, GraphicsAllocation *commandsStack, GraphicsAllocation *eventsPool, GraphicsAllocation *secondaryBatchBuffer, GraphicsAllocation *dsh, GraphicsAllocation *reflectionSurface, GraphicsAllocation *queueStorageBuffer, GraphicsAllocation *ssh, GraphicsAllocation *debugQueue) { threadIDToLocalIDmap.insert(std::make_pair(std::this_thread::get_id(), index)); while (!conditionReady) { } Gen9SchedulerSimulation::SchedulerParallel20((IGIL_CommandQueue *)queue->getUnderlyingBuffer(), (uint *)commandsStack->getUnderlyingBuffer(), (IGIL_EventPool *)eventsPool->getUnderlyingBuffer(), (uint *)secondaryBatchBuffer->getUnderlyingBuffer(), (char *)dsh->getUnderlyingBuffer(), (IGIL_KernelDataHeader *)reflectionSurface->getUnderlyingBuffer(), (uint *)queueStorageBuffer->getUnderlyingBuffer(), (char *)ssh->getUnderlyingBuffer(), debugQueue != nullptr ? (DebugDataBuffer *)debugQueue->getUnderlyingBuffer() : nullptr); } template <> void SchedulerSimulation::patchGpGpuWalker(uint secondLevelBatchOffset, __global uint *secondaryBatchBuffer, uint interfaceDescriptorOffset, uint simdSize, uint totalLocalWorkSize, uint3 dimSize, uint3 startPoint, uint numberOfHwThreadsPerWg, uint indirectPayloadSize, uint ioHoffset) { Gen9SchedulerSimulation::patchGpGpuWalker(secondLevelBatchOffset, secondaryBatchBuffer, interfaceDescriptorOffset, simdSize, totalLocalWorkSize, dimSize, startPoint, numberOfHwThreadsPerWg, indirectPayloadSize, ioHoffset); } template class SchedulerSimulation; } // namespace BuiltinKernelsSimulation compute-runtime-20.13.16352/opencl/source/builtin_kernels_simulation/opencl_c.cpp000066400000000000000000000057021363734646600300640ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl_c.h" #include "shared/source/helpers/string.h" namespace BuiltinKernelsSimulation { #define SCHEDULER_EMULATION 1 // globals std::mutex gMutex; unsigned int globalID[3]; unsigned int localID[3]; unsigned int localSize[3]; std::map threadIDToLocalIDmap; SynchronizationBarrier *pGlobalBarrier = nullptr; uint4 operator+(uint4 const &a, uint4 const &b) { uint4 c(0, 0, 0, 0); c.x = a.x + b.x; c.y = a.y + b.y; c.z = a.z + b.z; c.w = a.w + b.w; return c; } int4 operator+(int4 const &a, int4 const &b) { int4 c(0, 0, 0, 0); c.x = a.x + b.x; c.y = a.y + b.y; c.z = a.z + b.z; c.w = a.w + b.w; return c; } uint get_local_id(int dim) { uint LID = 0; // use thread id if (threadIDToLocalIDmap.size() > 0) { std::thread::id id = std::this_thread::get_id(); LID = threadIDToLocalIDmap[id] % 24; } // use id from loop iteration else { LID = localID[dim]; } return LID; } uint get_global_id(int dim) { uint GID = 0; // use thread id if (threadIDToLocalIDmap.size() > 0) { std::thread::id id = std::this_thread::get_id(); GID = threadIDToLocalIDmap[id]; } // use id from loop iteration else { GID = globalID[dim]; } return GID; } uint get_local_size(int dim) { return localSize[dim]; } uint get_num_groups(int dim) { return NUM_OF_THREADS / 24; } uint get_group_id(int dim) { return get_global_id(dim) / 24; } void barrier(int x) { pGlobalBarrier->enter(); // int LID = get_local_id(0); volatile int BreakPointHere = 0; // PUT BREAKPOINT HERE to stop after each barrier BreakPointHere++; } uint4 read_imageui(image *im, int4 coord) { uint4 color = {0, 0, 0, 1}; uint offset = ((coord.z * im->height + coord.y) * im->width + coord.x) * im->bytesPerChannel * im->channels; char *temp = &im->ptr[offset]; char *colorDst = (char *)&color; for (uint i = 0; i < im->channels; i++) { memcpy_s(colorDst, sizeof(uint4), temp, im->bytesPerChannel); temp += im->bytesPerChannel; colorDst += 4; } return color; } uint4 write_imageui(image *im, uint4 coord, uint4 color) { uint offset = ((coord.z * im->height + coord.y) * im->width + coord.x) * im->bytesPerChannel * im->channels; char *temp = &im->ptr[offset]; char *colorSrc = (char *)&color; size_t size = im->width * im->height * im->depth * im->bytesPerChannel * im->channels; for (uint i = 0; i < im->channels; i++) { memcpy_s(temp, size - offset, colorSrc, im->bytesPerChannel); temp += im->bytesPerChannel; colorSrc += 4; } return *(uint4 *)temp; // NOLINT } uchar convert_uchar_sat(uint c) { return (uchar)c; } ushort convert_ushort_sat(uint c) { return (ushort)c; } } // namespace BuiltinKernelsSimulation compute-runtime-20.13.16352/opencl/source/builtin_kernels_simulation/opencl_c.h000066400000000000000000000132421363734646600275270ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl.h" #include #include #include #include #include #include // OpenCL Types typedef uint32_t uint; typedef uint8_t uchar; typedef uint16_t ushort; typedef uint64_t ulong; namespace BuiltinKernelsSimulation { // number of threads in wkg #define NUM_OF_THREADS 24 #define CLK_GLOBAL_MEM_FENCE 1 #define CLK_LOCAL_MEM_FENCE 2 class SynchronizationBarrier { public: SynchronizationBarrier(int count) : m_InitialCount(count) { m_Count = count; m_BarrierCounter = 0; } ~SynchronizationBarrier() { } void enter() { std::unique_lock lck(m_Mutex); m_Count--; unsigned int BarrierCount = m_BarrierCounter; if (m_Count > 0) { while (BarrierCount == m_BarrierCounter) { m_AllHitBarrierCondition.wait(lck); } } else { m_Count = m_InitialCount; m_BarrierCounter++; m_AllHitBarrierCondition.notify_all(); } } private: std::mutex m_Mutex; std::condition_variable m_AllHitBarrierCondition; int m_Count; const int m_InitialCount; unsigned int m_BarrierCounter; }; // globals extern std::mutex gMutex; extern unsigned int globalID[3]; extern unsigned int localID[3]; extern unsigned int localSize[3]; extern std::map threadIDToLocalIDmap; extern SynchronizationBarrier *pGlobalBarrier; typedef struct taguint2 { taguint2(uint x, uint y) { this->x = x; this->y = y; } taguint2() { this->x = 0; this->y = 0; } uint x; uint y; } uint2; typedef struct taguint3 { taguint3(uint x, uint y, uint z) { this->x = x; this->y = y; this->z = z; } taguint3() { this->x = 0; this->y = 0; this->z = 0; } uint x; uint y; uint z; } uint3; typedef struct taguint4 { taguint4(uint x, uint y, uint z, uint w) { this->x = x; this->y = y; this->z = z; this->w = w; } uint x; uint y; uint z; uint w; } uint4; typedef struct tagint2 { tagint2(int x, int y) { this->x = x; this->y = y; } int x; int y; } int2; typedef struct tagint3 { tagint3(int x, int y, int z) { this->x = x; this->y = y; this->z = z; } int x; int y; int z; } int3; typedef struct tagint4 { tagint4(int x, int y, int z, int w) { this->x = x; this->y = y; this->z = z; this->w = w; } int x; int y; int z; int w; } int4; typedef struct tagushort2 { tagushort2(ushort x, ushort y) { this->x = x; this->y = y; } unsigned short x; unsigned short y; } ushort2; typedef struct tagushort8 { unsigned short xxx[8]; } ushort8; typedef struct tagushort16 { unsigned short xxx[16]; } ushort16; uint4 operator+(uint4 const &a, uint4 const &b); int4 operator+(int4 const &a, int4 const &b); typedef struct tagimage { char *ptr; uint width; uint height; uint depth; uint bytesPerChannel; uint channels; } image; // images as pointer typedef image *image1d_t; typedef image *image2d_t; typedef image *image3d_t; // OpenCL keywords #define __global #define __local #define __private #define __kernel #define __attribute__(...) #define __read_only #define __write_only #define queue_t void * struct clk_event_t { clk_event_t() { value = 0; } clk_event_t(void *v) { value = static_cast(reinterpret_cast(v)); } explicit operator void *() const { return reinterpret_cast(static_cast(value)); } operator uint() { return (uint)value; } void operator=(uint input) { value = input; } uint value; }; // OpenCL builtins #define __builtin_astype(var, type) \ ( \ (type)var) #define select(a, b, c) (c ? b : a) uint get_local_id(int dim); uint get_global_id(int dim); uint get_local_size(int dim); uint get_num_groups(int dim); uint get_group_id(int dim); void barrier(int x); uint4 read_imageui(image *im, int4 coord); uint4 write_imageui(image *im, uint4 coord, uint4 color); uchar convert_uchar_sat(uint c); ushort convert_ushort_sat(uint c); #define EMULATION_ENTER_FUNCTION() \ uint __LOCAL_ID__ = 0; \ __LOCAL_ID__ = get_local_id(0); template void atomic_xchg(TYPE *dest, TYPE2 val) { gMutex.lock(); dest[0] = (TYPE)val; gMutex.unlock(); } template TYPE atomic_add(TYPE *first, TYPE2 second) { gMutex.lock(); TYPE temp = first[0]; first[0] = (TYPE)(temp + (TYPE)second); gMutex.unlock(); return temp; } template TYPE atomic_sub(TYPE *first, TYPE2 second) { gMutex.lock(); TYPE temp = first[0]; first[0] = temp - second; gMutex.unlock(); return temp; } template TYPE atomic_inc(TYPE *first) { gMutex.lock(); TYPE temp = first[0]; first[0] = temp + 1; gMutex.unlock(); return temp; } template TYPE atomic_dec(TYPE *first) { gMutex.lock(); TYPE temp = first[0]; first[0] = temp - 1; gMutex.unlock(); return temp; } template TYPE atomic_min(TYPE *first, TYPE2 second) { gMutex.lock(); TYPE temp = first[0]; first[0] = (TYPE)((TYPE)second < temp ? (TYPE)second : temp); gMutex.unlock(); return temp; } } // namespace BuiltinKernelsSimulation compute-runtime-20.13.16352/opencl/source/builtin_kernels_simulation/scheduler_simulation.cpp000066400000000000000000000006661363734646600325300ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h" #include "opencl/source/builtin_kernels_simulation/opencl_c.h" #include using namespace std; using namespace NEO; namespace BuiltinKernelsSimulation { bool conditionReady = false; std::thread threads[NUM_OF_THREADS]; } // namespace BuiltinKernelsSimulation compute-runtime-20.13.16352/opencl/source/builtin_kernels_simulation/scheduler_simulation.h000066400000000000000000000063421363734646600321720ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/builtin_kernels_simulation/opencl_c.h" #include #include namespace NEO { class GraphicsAllocation; } namespace BuiltinKernelsSimulation { extern bool conditionReady; extern std::thread threads[]; template class SchedulerSimulation { public: void runSchedulerSimulation(NEO::GraphicsAllocation *queue, NEO::GraphicsAllocation *commandsStack, NEO::GraphicsAllocation *eventsPool, NEO::GraphicsAllocation *secondaryBatchBuffer, NEO::GraphicsAllocation *dsh, NEO::GraphicsAllocation *reflectionSurface, NEO::GraphicsAllocation *queueStorageBuffer, NEO::GraphicsAllocation *ssh, NEO::GraphicsAllocation *debugQueue); void cleanSchedulerSimulation(); static void startScheduler(uint32_t index, NEO::GraphicsAllocation *queue, NEO::GraphicsAllocation *commandsStack, NEO::GraphicsAllocation *eventsPool, NEO::GraphicsAllocation *secondaryBatchBuffer, NEO::GraphicsAllocation *dsh, NEO::GraphicsAllocation *reflectionSurface, NEO::GraphicsAllocation *queueStorageBuffer, NEO::GraphicsAllocation *ssh, NEO::GraphicsAllocation *debugQueue); void initializeSchedulerSimulation(NEO::GraphicsAllocation *queue, NEO::GraphicsAllocation *commandsStack, NEO::GraphicsAllocation *eventsPool, NEO::GraphicsAllocation *secondaryBatchBuffer, NEO::GraphicsAllocation *dsh, NEO::GraphicsAllocation *reflectionSurface, NEO::GraphicsAllocation *queueStorageBuffer, NEO::GraphicsAllocation *ssh, NEO::GraphicsAllocation *debugQueue); static void patchGpGpuWalker(uint secondLevelBatchOffset, __global uint *secondaryBatchBuffer, uint interfaceDescriptorOffset, uint simdSize, uint totalLocalWorkSize, uint3 dimSize, uint3 startPoint, uint numberOfHwThreadsPerWg, uint indirectPayloadSize, uint ioHoffset); static bool enabled; static bool simulationRun; }; template bool SchedulerSimulation::enabled = true; template bool SchedulerSimulation::simulationRun = false; } // namespace BuiltinKernelsSimulation compute-runtime-20.13.16352/opencl/source/builtin_kernels_simulation/scheduler_simulation.inl000066400000000000000000000076661363734646600325370ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h" #include #include #include using namespace std; using namespace NEO; namespace BuiltinKernelsSimulation { template void SchedulerSimulation::cleanSchedulerSimulation() { threadIDToLocalIDmap.clear(); delete pGlobalBarrier; } template void SchedulerSimulation::initializeSchedulerSimulation(GraphicsAllocation *queue, GraphicsAllocation *commandsStack, GraphicsAllocation *eventsPool, GraphicsAllocation *secondaryBatchBuffer, GraphicsAllocation *dsh, GraphicsAllocation *reflectionSurface, GraphicsAllocation *queueStorageBuffer, GraphicsAllocation *ssh, GraphicsAllocation *debugQueue) { localSize[0] = NUM_OF_THREADS; localSize[1] = 1; localSize[2] = 1; threadIDToLocalIDmap.clear(); pGlobalBarrier = new SynchronizationBarrier(NUM_OF_THREADS); // Spawn Thread ID == 0 on main thread for (uint32_t i = 1; i < NUM_OF_THREADS; i++) { threads[i] = std::thread(startScheduler, i, queue, commandsStack, eventsPool, secondaryBatchBuffer, dsh, reflectionSurface, queueStorageBuffer, ssh, debugQueue); } conditionReady = true; } template void SchedulerSimulation::runSchedulerSimulation(GraphicsAllocation *queue, GraphicsAllocation *commandsStack, GraphicsAllocation *eventsPool, GraphicsAllocation *secondaryBatchBuffer, GraphicsAllocation *dsh, GraphicsAllocation *reflectionSurface, GraphicsAllocation *queueStorageBuffer, GraphicsAllocation *ssh, GraphicsAllocation *debugQueue) { simulationRun = true; if (enabled) { initializeSchedulerSimulation(queue, commandsStack, eventsPool, secondaryBatchBuffer, dsh, reflectionSurface, queueStorageBuffer, ssh, debugQueue); // start main thread with LID == 0 startScheduler(0, queue, commandsStack, eventsPool, secondaryBatchBuffer, dsh, reflectionSurface, queueStorageBuffer, ssh, debugQueue); // Wait for all threads on main thread if (threadIDToLocalIDmap[std::this_thread::get_id()] == 0) { for (uint32_t i = 1; i < NUM_OF_THREADS; i++) threads[i].join(); cleanSchedulerSimulation(); } } }; } // namespace BuiltinKernelsSimulation compute-runtime-20.13.16352/opencl/source/cl_device/000077500000000000000000000000001363734646600220525ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/cl_device/CMakeLists.txt000066400000000000000000000013111363734646600246060ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_CL_DEVICE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cl_device.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_device.h ${CMAKE_CURRENT_SOURCE_DIR}/cl_device_caps.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_device_get_cap.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_device_info.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_device_info.h ${CMAKE_CURRENT_SOURCE_DIR}/cl_device_info_map.h ${CMAKE_CURRENT_SOURCE_DIR}/cl_device_vector.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_CL_DEVICE}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_CL_DEVICE ${RUNTIME_SRCS_CL_DEVICE}) add_subdirectories() compute-runtime-20.13.16352/opencl/source/cl_device/cl_device.cpp000066400000000000000000000156141363734646600245020ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/cl_device/cl_device.h" #include "shared/source/device/device.h" #include "shared/source/device/sub_device.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/driver_info.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/program/sync_buffer_handler.h" #include "shared/source/source_level_debugger/source_level_debugger.h" #include "opencl/source/platform/extensions.h" #include "opencl/source/platform/platform.h" namespace NEO { ClDevice::ClDevice(Device &device, Platform *platform) : device(device), platformId(platform) { device.incRefInternal(); device.setSpecializedDevice(this); deviceExtensions.reserve(1000); name.reserve(100); auto osInterface = getRootDeviceEnvironment().osInterface.get(); driverInfo.reset(DriverInfo::create(osInterface)); initializeCaps(); compilerExtensions = convertEnabledExtensionsToCompilerInternalOptions(deviceInfo.deviceExtensions); auto numAvailableDevices = device.getNumAvailableDevices(); if (numAvailableDevices > 1) { for (uint32_t i = 0; i < numAvailableDevices; i++) { auto &coreSubDevice = static_cast(*device.getDeviceById(i)); auto pClSubDevice = std::make_unique(coreSubDevice, platform); pClSubDevice->incRefInternal(); pClSubDevice->decRefApi(); auto &deviceInfo = pClSubDevice->deviceInfo; deviceInfo.parentDevice = this; deviceInfo.partitionMaxSubDevices = 0; deviceInfo.partitionProperties[0] = 0; deviceInfo.partitionAffinityDomain = 0; deviceInfo.partitionType[0] = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; deviceInfo.partitionType[1] = CL_DEVICE_AFFINITY_DOMAIN_NUMA; deviceInfo.partitionType[2] = 0; subDevices.push_back(std::move(pClSubDevice)); } } if (getSharedDeviceInfo().debuggerActive) { auto osInterface = device.getRootDeviceEnvironment().osInterface.get(); getSourceLevelDebugger()->notifyNewDevice(osInterface ? osInterface->getDeviceHandle() : 0); } } ClDevice::~ClDevice() { if (getSharedDeviceInfo().debuggerActive) { getSourceLevelDebugger()->notifyDeviceDestruction(); } syncBufferHandler.reset(); for (auto &subDevice : subDevices) { subDevice.reset(); } device.decRefInternal(); } void ClDevice::allocateSyncBufferHandler() { TakeOwnershipWrapper lock(*this); if (syncBufferHandler.get() == nullptr) { syncBufferHandler = std::make_unique(this->getDevice()); UNRECOVERABLE_IF(syncBufferHandler.get() == nullptr); } } unsigned int ClDevice::getSupportedClVersion() const { return device.getHardwareInfo().capabilityTable.clVersionSupport; } void ClDevice::retainApi() { auto parentDeviceId = deviceInfo.parentDevice; if (parentDeviceId) { auto pParentClDevice = static_cast(parentDeviceId); pParentClDevice->incRefInternal(); this->incRefApi(); } }; unique_ptr_if_unused ClDevice::releaseApi() { auto parentDeviceId = deviceInfo.parentDevice; if (!parentDeviceId) { return unique_ptr_if_unused(this, false); } auto pParentClDevice = static_cast(parentDeviceId); pParentClDevice->decRefInternal(); return this->decRefApi(); } const DeviceInfo &ClDevice::getSharedDeviceInfo() const { return device.getDeviceInfo(); } ClDevice *ClDevice::getDeviceById(uint32_t deviceId) { UNRECOVERABLE_IF(deviceId >= getNumAvailableDevices()); if (subDevices.empty()) { return this; } return subDevices[deviceId].get(); } bool ClDevice::getDeviceAndHostTimer(uint64_t *deviceTimestamp, uint64_t *hostTimestamp) const { return device.getDeviceAndHostTimer(deviceTimestamp, hostTimestamp); } bool ClDevice::getHostTimer(uint64_t *hostTimestamp) const { return device.getHostTimer(hostTimestamp); } const HardwareInfo &ClDevice::getHardwareInfo() const { return device.getHardwareInfo(); } EngineControl &ClDevice::getEngine(aub_stream::EngineType engineType, bool lowPriority) { return device.getEngine(engineType, lowPriority); } EngineControl &ClDevice::getDefaultEngine() { return device.getDefaultEngine(); } EngineControl &ClDevice::getInternalEngine() { return device.getInternalEngine(); } std::atomic &ClDevice::getSelectorCopyEngine() { return device.getSelectorCopyEngine(); } MemoryManager *ClDevice::getMemoryManager() const { return device.getMemoryManager(); } GmmHelper *ClDevice::getGmmHelper() const { return device.getGmmHelper(); } GmmClientContext *ClDevice::getGmmClientContext() const { return device.getGmmClientContext(); } double ClDevice::getProfilingTimerResolution() { return device.getProfilingTimerResolution(); } double ClDevice::getPlatformHostTimerResolution() const { return device.getPlatformHostTimerResolution(); } bool ClDevice::isSimulation() const { return device.isSimulation(); } GFXCORE_FAMILY ClDevice::getRenderCoreFamily() const { return device.getRenderCoreFamily(); } PerformanceCounters *ClDevice::getPerformanceCounters() { return device.getPerformanceCounters(); } PreemptionMode ClDevice::getPreemptionMode() const { return device.getPreemptionMode(); } bool ClDevice::isDebuggerActive() const { return device.isDebuggerActive(); } Debugger *ClDevice::getDebugger() { return device.getDebugger(); } SourceLevelDebugger *ClDevice::getSourceLevelDebugger() { return reinterpret_cast(device.getDebugger()); } ExecutionEnvironment *ClDevice::getExecutionEnvironment() const { return device.getExecutionEnvironment(); } const RootDeviceEnvironment &ClDevice::getRootDeviceEnvironment() const { return device.getRootDeviceEnvironment(); } const HardwareCapabilities &ClDevice::getHardwareCapabilities() const { return device.getHardwareCapabilities(); } bool ClDevice::isFullRangeSvm() const { return device.isFullRangeSvm(); } bool ClDevice::areSharedSystemAllocationsAllowed() const { return device.areSharedSystemAllocationsAllowed(); } uint32_t ClDevice::getRootDeviceIndex() const { return device.getRootDeviceIndex(); } uint32_t ClDevice::getNumAvailableDevices() const { return device.getNumAvailableDevices(); } ClDeviceVector::ClDeviceVector(const cl_device_id *devices, cl_uint numDevices) { for (cl_uint i = 0; i < numDevices; i++) { auto pClDevice = castToObject(devices[i]); this->push_back(pClDevice); } } void ClDeviceVector::toDeviceIDs(std::vector &devIDs) { int i = 0; devIDs.resize(this->size()); for (auto &it : *this) { devIDs[i] = it; i++; } } const std::string &ClDevice::peekCompilerExtensions() const { return compilerExtensions; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/cl_device/cl_device.h000066400000000000000000000104161363734646600241420ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/preemption_mode.h" #include "shared/source/utilities/reference_tracked_object.h" #include "opencl/source/api/cl_types.h" #include "opencl/source/cl_device/cl_device_info.h" #include "opencl/source/helpers/base_object.h" #include "engine_node.h" #include "igfxfmid.h" #include namespace NEO { class Debugger; class Device; class DriverInfo; class ExecutionEnvironment; class GmmHelper; class GmmClientContext; class MemoryManager; class PerformanceCounters; class Platform; class SourceLevelDebugger; class SyncBufferHandler; struct DeviceInfo; struct EngineControl; struct HardwareCapabilities; struct HardwareInfo; struct RootDeviceEnvironment; template <> struct OpenCLObjectMapper<_cl_device_id> { typedef class ClDevice DerivedType; }; class ClDevice : public BaseObject<_cl_device_id> { public: static const cl_ulong objectMagic = 0x8055832341AC8D08LL; ClDevice &operator=(const ClDevice &) = delete; ClDevice(const ClDevice &) = delete; explicit ClDevice(Device &device, Platform *platformId); ~ClDevice() override; unsigned int getEnabledClVersion() const { return enabledClVersion; }; unsigned int getSupportedClVersion() const; void retainApi(); unique_ptr_if_unused releaseApi(); bool getDeviceAndHostTimer(uint64_t *deviceTimestamp, uint64_t *hostTimestamp) const; bool getHostTimer(uint64_t *hostTimestamp) const; const HardwareInfo &getHardwareInfo() const; EngineControl &getEngine(aub_stream::EngineType engineType, bool lowPriority); EngineControl &getDefaultEngine(); EngineControl &getInternalEngine(); std::atomic &getSelectorCopyEngine(); MemoryManager *getMemoryManager() const; GmmHelper *getGmmHelper() const; GmmClientContext *getGmmClientContext() const; double getProfilingTimerResolution(); double getPlatformHostTimerResolution() const; bool isSimulation() const; GFXCORE_FAMILY getRenderCoreFamily() const; void allocateSyncBufferHandler(); PerformanceCounters *getPerformanceCounters(); PreemptionMode getPreemptionMode() const; bool isDebuggerActive() const; Debugger *getDebugger(); SourceLevelDebugger *getSourceLevelDebugger(); ExecutionEnvironment *getExecutionEnvironment() const; const RootDeviceEnvironment &getRootDeviceEnvironment() const; const HardwareCapabilities &getHardwareCapabilities() const; bool isFullRangeSvm() const; bool areSharedSystemAllocationsAllowed() const; uint32_t getRootDeviceIndex() const; uint32_t getNumAvailableDevices() const; // API entry points cl_int getDeviceInfo(cl_device_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); bool getDeviceInfoForImage(cl_device_info paramName, const void *&src, size_t &srcSize, size_t &retSize); // This helper template is meant to simplify getDeviceInfo template void getCap(const void *&src, size_t &size, size_t &retSize); template void getStr(const void *&src, size_t &size, size_t &retSize); Device &getDevice() const noexcept { return device; } const ClDeviceInfo &getDeviceInfo() const { return deviceInfo; } const DeviceInfo &getSharedDeviceInfo() const; ClDevice *getDeviceById(uint32_t deviceId); const std::string &peekCompilerExtensions() const; std::unique_ptr syncBufferHandler; protected: void initializeCaps(); void initializeExtraCaps(); void setupFp64Flags(); Device &device; std::vector> subDevices; cl_platform_id platformId; std::string name; std::unique_ptr driverInfo; unsigned int enabledClVersion = 0u; std::string deviceExtensions; std::string exposedBuiltinKernels = ""; ClDeviceInfo deviceInfo = {}; std::vector simultaneousInterops = {0}; std::string compilerExtensions; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/cl_device/cl_device_caps.cpp000066400000000000000000000335541363734646600255130ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device_info.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/os_interface/driver_info.h" #include "shared/source/os_interface/hw_info_config.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/platform/extensions.h" #include "opencl/source/sharings/sharing_factory.h" #include "driver_version.h" #include namespace NEO { extern const char *familyName[]; static std::string vendor = "Intel(R) Corporation"; static std::string profile = "FULL_PROFILE"; static std::string spirVersions = "1.2 "; #define QTR(a) #a #define TOSTR(b) QTR(b) static std::string driverVersion = TOSTR(NEO_OCL_DRIVER_VERSION); const char *builtInKernels = ""; // the "always available" (extension-independent) builtin kernels static constexpr cl_device_fp_config defaultFpFlags = static_cast(CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM | CL_FP_FMA); bool releaseFP64Override(); void ClDevice::setupFp64Flags() { auto &hwInfo = getHardwareInfo(); if (releaseFP64Override() || DebugManager.flags.OverrideDefaultFP64Settings.get() == 1) { deviceExtensions += "cl_khr_fp64 "; deviceInfo.singleFpConfig = static_cast(CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT); deviceInfo.doubleFpConfig = defaultFpFlags; } else if (DebugManager.flags.OverrideDefaultFP64Settings.get() == -1) { if (hwInfo.capabilityTable.ftrSupportsFP64) { deviceExtensions += "cl_khr_fp64 "; } deviceInfo.singleFpConfig = static_cast( hwInfo.capabilityTable.ftrSupports64BitMath ? CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT : 0); deviceInfo.doubleFpConfig = hwInfo.capabilityTable.ftrSupportsFP64 ? defaultFpFlags : 0; } } void ClDevice::initializeCaps() { auto &hwInfo = getHardwareInfo(); auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily); auto &sharedDeviceInfo = getSharedDeviceInfo(); deviceExtensions.clear(); deviceExtensions.append(deviceExtensionsList); driverVersion = TOSTR(NEO_OCL_DRIVER_VERSION); // Add our graphics family name to the device name name += "Intel(R) "; name += familyName[hwInfo.platform.eRenderCoreFamily]; name += " HD Graphics NEO"; if (driverInfo) { name.assign(driverInfo.get()->getDeviceName(name).c_str()); driverVersion.assign(driverInfo.get()->getVersion(driverVersion).c_str()); sharingFactory.verifyExtensionSupport(driverInfo.get()); } auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); deviceInfo.name = name.c_str(); deviceInfo.driverVersion = driverVersion.c_str(); setupFp64Flags(); deviceInfo.vendor = vendor.c_str(); deviceInfo.profile = profile.c_str(); enabledClVersion = hwInfo.capabilityTable.clVersionSupport; if (DebugManager.flags.ForceOCLVersion.get() != 0) { enabledClVersion = DebugManager.flags.ForceOCLVersion.get(); } switch (enabledClVersion) { case 21: deviceInfo.clVersion = "OpenCL 2.1 NEO "; deviceInfo.clCVersion = "OpenCL C 2.0 "; break; case 20: deviceInfo.clVersion = "OpenCL 2.0 NEO "; deviceInfo.clCVersion = "OpenCL C 2.0 "; break; case 12: default: deviceInfo.clVersion = "OpenCL 1.2 NEO "; deviceInfo.clCVersion = "OpenCL C 1.2 "; break; } deviceInfo.platformLP = (hwInfo.capabilityTable.clVersionSupport == 12) ? true : false; deviceInfo.spirVersions = spirVersions.c_str(); auto supportsVme = hwInfo.capabilityTable.supportsVme; auto supportsAdvancedVme = hwInfo.capabilityTable.supportsVme; if (enabledClVersion >= 21) { deviceInfo.independentForwardProgress = true; deviceExtensions += "cl_khr_subgroups "; deviceExtensions += "cl_khr_il_program "; if (supportsVme) { deviceExtensions += "cl_intel_spirv_device_side_avc_motion_estimation "; } if (hwInfo.capabilityTable.supportsImages) { deviceExtensions += "cl_intel_spirv_media_block_io "; } deviceExtensions += "cl_intel_spirv_subgroups "; deviceExtensions += "cl_khr_spirv_no_integer_wrap_decoration "; } else { deviceInfo.independentForwardProgress = false; } if (enabledClVersion >= 20) { deviceExtensions += "cl_intel_unified_shared_memory_preview "; if (hwInfo.capabilityTable.supportsImages) { deviceExtensions += "cl_khr_mipmap_image cl_khr_mipmap_image_writes "; } } if (DebugManager.flags.EnableNV12.get() && hwInfo.capabilityTable.supportsImages) { deviceExtensions += "cl_intel_planar_yuv "; deviceInfo.nv12Extension = true; } if (DebugManager.flags.EnablePackedYuv.get() && hwInfo.capabilityTable.supportsImages) { deviceExtensions += "cl_intel_packed_yuv "; deviceInfo.packedYuvExtension = true; } if (DebugManager.flags.EnableIntelVme.get() != -1) { supportsVme = !!DebugManager.flags.EnableIntelVme.get(); } if (supportsVme) { deviceExtensions += "cl_intel_motion_estimation cl_intel_device_side_avc_motion_estimation "; deviceInfo.vmeExtension = true; } if (DebugManager.flags.EnableIntelAdvancedVme.get() != -1) { supportsAdvancedVme = !!DebugManager.flags.EnableIntelAdvancedVme.get(); } if (supportsAdvancedVme) { deviceExtensions += "cl_intel_advanced_motion_estimation "; } if (hwInfo.capabilityTable.ftrSupportsInteger64BitAtomics) { deviceExtensions += "cl_khr_int64_base_atomics "; deviceExtensions += "cl_khr_int64_extended_atomics "; } if (hwInfo.capabilityTable.supportsImages) { deviceExtensions += "cl_khr_image2d_from_buffer "; deviceExtensions += "cl_khr_depth_images "; deviceExtensions += "cl_intel_media_block_io "; deviceExtensions += "cl_khr_3d_image_writes "; } auto sharingAllowed = (HwHelper::getSubDevicesCount(&hwInfo) == 1u); if (sharingAllowed) { deviceExtensions += sharingFactory.getExtensions(); } deviceExtensions += hwHelper.getExtensions(); deviceInfo.deviceExtensions = deviceExtensions.c_str(); exposedBuiltinKernels = builtInKernels; if (supportsVme) { exposedBuiltinKernels.append("block_motion_estimate_intel;"); } if (supportsAdvancedVme) { auto advVmeKernels = "block_advanced_motion_estimate_check_intel;block_advanced_motion_estimate_bidirectional_check_intel;"; exposedBuiltinKernels.append(advVmeKernels); } deviceInfo.builtInKernels = exposedBuiltinKernels.c_str(); deviceInfo.deviceType = CL_DEVICE_TYPE_GPU; deviceInfo.endianLittle = 1; deviceInfo.hostUnifiedMemory = (false == hwHelper.isLocalMemoryEnabled(hwInfo)); deviceInfo.deviceAvailable = CL_TRUE; deviceInfo.compilerAvailable = CL_TRUE; deviceInfo.parentDevice = nullptr; deviceInfo.partitionMaxSubDevices = HwHelper::getSubDevicesCount(&hwInfo); if (deviceInfo.partitionMaxSubDevices > 1) { deviceInfo.partitionProperties[0] = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; deviceInfo.partitionProperties[1] = 0; deviceInfo.partitionAffinityDomain = CL_DEVICE_AFFINITY_DOMAIN_NUMA | CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE; } else { deviceInfo.partitionMaxSubDevices = 0; deviceInfo.partitionProperties[0] = 0; deviceInfo.partitionAffinityDomain = 0; } deviceInfo.partitionType[0] = 0; deviceInfo.preferredVectorWidthChar = 16; deviceInfo.preferredVectorWidthShort = 8; deviceInfo.preferredVectorWidthInt = 4; deviceInfo.preferredVectorWidthLong = 1; deviceInfo.preferredVectorWidthFloat = 1; deviceInfo.preferredVectorWidthDouble = 1; deviceInfo.preferredVectorWidthHalf = 8; deviceInfo.nativeVectorWidthChar = 16; deviceInfo.nativeVectorWidthShort = 8; deviceInfo.nativeVectorWidthInt = 4; deviceInfo.nativeVectorWidthLong = 1; deviceInfo.nativeVectorWidthFloat = 1; deviceInfo.nativeVectorWidthDouble = 1; deviceInfo.nativeVectorWidthHalf = 8; deviceInfo.maxReadWriteImageArgs = 128; deviceInfo.executionCapabilities = CL_EXEC_KERNEL; //copy system info to prevent misaligned reads const auto systemInfo = hwInfo.gtSystemInfo; deviceInfo.globalMemCacheSize = systemInfo.L3BankCount * 128 * KB; deviceInfo.grfSize = hwInfo.capabilityTable.grfSize; deviceInfo.globalMemCacheType = CL_READ_WRITE_CACHE; deviceInfo.memBaseAddressAlign = 1024; deviceInfo.minDataTypeAlignSize = 128; deviceInfo.maxOnDeviceEvents = 1024; deviceInfo.queueOnDeviceMaxSize = 64 * MB; deviceInfo.queueOnDevicePreferredSize = 128 * KB; deviceInfo.queueOnDeviceProperties = CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; deviceInfo.preferredInteropUserSync = 1u; // OpenCL 1.2 requires 128MB minimum deviceInfo.maxConstantBufferSize = sharedDeviceInfo.maxMemAllocSize; deviceInfo.maxWorkItemDimensions = 3; deviceInfo.maxComputUnits = systemInfo.EUCount; deviceInfo.maxConstantArgs = 8; deviceInfo.maxSliceCount = systemInfo.SliceCount; auto simdSizeUsed = DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get() ? 32u : hwHelper.getMinimalSIMDSize(); // calculate a maximum number of subgroups in a workgroup (for the required SIMD size) deviceInfo.maxNumOfSubGroups = static_cast(sharedDeviceInfo.maxWorkGroupSize / simdSizeUsed); deviceInfo.singleFpConfig |= defaultFpFlags; deviceInfo.halfFpConfig = defaultFpFlags; printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "computeUnitsUsedForScratch: %d\n", sharedDeviceInfo.computeUnitsUsedForScratch); printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "hwInfo: {%d, %d}: (%d, %d, %d)\n", systemInfo.EUCount, systemInfo.ThreadCount, systemInfo.MaxEuPerSubSlice, systemInfo.MaxSlicesSupported, systemInfo.MaxSubSlicesSupported); deviceInfo.localMemType = CL_LOCAL; deviceInfo.image3DMaxWidth = this->getHardwareCapabilities().image3DMaxWidth; deviceInfo.image3DMaxHeight = this->getHardwareCapabilities().image3DMaxHeight; // cl_khr_image2d_from_buffer deviceInfo.imagePitchAlignment = hwHelper.getPitchAlignmentForImage(&hwInfo); deviceInfo.imageBaseAddressAlignment = 4; deviceInfo.maxPipeArgs = 16; deviceInfo.pipeMaxPacketSize = 1024; deviceInfo.pipeMaxActiveReservations = 1; deviceInfo.queueOnHostProperties = CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; deviceInfo.linkerAvailable = true; deviceInfo.svmCapabilities = hwInfo.capabilityTable.ftrSvm * CL_DEVICE_SVM_COARSE_GRAIN_BUFFER; if (hwInfo.capabilityTable.ftrSvm) { auto reportFineGrained = hwInfo.capabilityTable.ftrSvm * hwInfo.capabilityTable.ftrSupportsCoherency; if (DebugManager.flags.ForceFineGrainedSVMSupport.get() != -1) { reportFineGrained = !!DebugManager.flags.ForceFineGrainedSVMSupport.get(); } if (reportFineGrained) { deviceInfo.svmCapabilities |= static_cast(CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS); } } deviceInfo.preemptionSupported = false; deviceInfo.maxGlobalVariableSize = 64 * KB; deviceInfo.globalVariablePreferredTotalSize = static_cast(sharedDeviceInfo.maxMemAllocSize); deviceInfo.planarYuvMaxWidth = 16384; deviceInfo.planarYuvMaxHeight = 16352; deviceInfo.vmeAvcSupportsTextureSampler = hwInfo.capabilityTable.ftrSupportsVmeAvcTextureSampler; if (hwInfo.capabilityTable.supportsVme) { deviceInfo.vmeAvcVersion = CL_AVC_ME_VERSION_1_INTEL; deviceInfo.vmeVersion = CL_ME_VERSION_ADVANCED_VER_2_INTEL; } deviceInfo.platformHostTimerResolution = getPlatformHostTimerResolution(); deviceInfo.internalDriverVersion = CL_DEVICE_DRIVER_VERSION_INTEL_NEO1; deviceInfo.preferredGlobalAtomicAlignment = MemoryConstants::cacheLineSize; deviceInfo.preferredLocalAtomicAlignment = MemoryConstants::cacheLineSize; deviceInfo.preferredPlatformAtomicAlignment = MemoryConstants::cacheLineSize; deviceInfo.hostMemCapabilities = hwInfoConfig->getHostMemCapabilities(); deviceInfo.deviceMemCapabilities = hwInfoConfig->getDeviceMemCapabilities(); deviceInfo.singleDeviceSharedMemCapabilities = hwInfoConfig->getSingleDeviceSharedMemCapabilities(); deviceInfo.crossDeviceSharedMemCapabilities = hwInfoConfig->getCrossDeviceSharedMemCapabilities(); deviceInfo.sharedSystemMemCapabilities = hwInfoConfig->getSharedSystemMemCapabilities(); if (DebugManager.flags.EnableSharedSystemUsmSupport.get() != -1) { if (DebugManager.flags.EnableSharedSystemUsmSupport.get() == 0) { deviceInfo.sharedSystemMemCapabilities = 0u; } else { deviceInfo.sharedSystemMemCapabilities = CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL; } } initializeExtraCaps(); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/cl_device/cl_device_get_cap.inl000066400000000000000000000010041363734646600261500ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/cl_device/cl_device_info_map.h" namespace NEO { template inline void ClDevice::getCap(const void *&src, size_t &size, size_t &retSize) { src = &ClDeviceInfoTable::Map::getValue(*this); retSize = size = ClDeviceInfoTable::Map::size; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/cl_device/cl_device_info.cpp000066400000000000000000000507121363734646600255130ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/cl_device/cl_device_info.h" #include "shared/source/device/device.h" #include "shared/source/device/device_info.h" #include "shared/source/helpers/get_info.h" #include "shared/source/os_interface/os_time.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/cl_device/cl_device_get_cap.inl" #include "opencl/source/cl_device/cl_device_info_map.h" #include "opencl/source/cl_device/cl_device_vector.h" #include "opencl/source/helpers/cl_device_helpers.h" #include "opencl/source/helpers/get_info_status_mapper.h" #include "opencl/source/platform/platform.h" namespace NEO { using ClDeviceInfoTable::Map; template inline void ClDevice::getStr(const void *&src, size_t &size, size_t &retSize) { src = Map::getValue(*this); retSize = size = strlen(Map::getValue(*this)) + 1; } template <> inline void ClDevice::getCap(const void *&src, size_t &size, size_t &retSize) { src = getSharedDeviceInfo().maxWorkItemSizes; retSize = size = sizeof(getSharedDeviceInfo().maxWorkItemSizes); } template <> inline void ClDevice::getCap(const void *&src, size_t &size, size_t &retSize) { src = &platformId; retSize = size = sizeof(cl_platform_id); } template <> inline void ClDevice::getCap(const void *&src, size_t &size, size_t &retSize) { src = getSharedDeviceInfo().maxSubGroups.begin(); retSize = size = (getSharedDeviceInfo().maxSubGroups.size() * sizeof(size_t)); } cl_int ClDevice::getDeviceInfo(cl_device_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { cl_int retVal = CL_INVALID_VALUE; size_t srcSize = 0; size_t retSize = 0; size_t value = 0u; cl_uint param; const void *src = nullptr; // clang-format off // please keep alphabetical order switch (paramName) { case CL_DEVICE_ADDRESS_BITS: getCap(src, srcSize, retSize); break; case CL_DEVICE_AVAILABLE: getCap(src, srcSize, retSize); break; case CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL: getCap(src, srcSize, retSize); break; case CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL: getCap(src, srcSize, retSize); break; case CL_DEVICE_AVC_ME_VERSION_INTEL: getCap(src, srcSize, retSize); break; case CL_DEVICE_BUILT_IN_KERNELS: getStr(src, srcSize, retSize); break; case CL_DEVICE_COMPILER_AVAILABLE: getCap(src, srcSize, retSize); break; case CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL: getCap(src, srcSize, retSize); break; case CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL: getCap(src, srcSize, retSize); break; case CL_DEVICE_DOUBLE_FP_CONFIG: getCap(src, srcSize, retSize); break; case CL_DEVICE_DRIVER_VERSION_INTEL: getCap(src, srcSize, retSize); break; case CL_DEVICE_ENDIAN_LITTLE: getCap(src, srcSize, retSize); break; case CL_DEVICE_ERROR_CORRECTION_SUPPORT: getCap(src, srcSize, retSize); break; case CL_DEVICE_EXECUTION_CAPABILITIES: getCap(src, srcSize, retSize); break; case CL_DEVICE_EXTENSIONS: getStr(src, srcSize, retSize); break; case CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_GLOBAL_MEM_CACHE_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: getCap(src, srcSize, retSize); break; case CL_DEVICE_GLOBAL_MEM_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_HALF_FP_CONFIG: getCap(src, srcSize, retSize); break; case CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL: getCap(src, srcSize, retSize); break; case CL_DEVICE_HOST_UNIFIED_MEMORY: getCap(src, srcSize, retSize); break; case CL_DEVICE_IL_VERSION: getStr(src, srcSize, retSize); break; case CL_DEVICE_IMAGE_SUPPORT: getCap(src, srcSize, retSize); break; case CL_DEVICE_LINKER_AVAILABLE: getCap(src, srcSize, retSize); break; case CL_DEVICE_LOCAL_MEM_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_LOCAL_MEM_TYPE: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_CLOCK_FREQUENCY: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_COMPUTE_UNITS: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_CONSTANT_ARGS: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_MEM_ALLOC_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_NUM_SUB_GROUPS: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_ON_DEVICE_EVENTS: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_ON_DEVICE_QUEUES: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_PARAMETER_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_PIPE_ARGS: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_SAMPLERS: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_WORK_GROUP_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_WORK_ITEM_SIZES: getCap(src, srcSize, retSize); break; case CL_DEVICE_MEM_BASE_ADDR_ALIGN: getCap(src, srcSize, retSize); break; case CL_DEVICE_ME_VERSION_INTEL: getCap(src, srcSize, retSize); break; case CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_NAME: getStr(src, srcSize, retSize); break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR: getCap(src, srcSize, retSize); break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: getCap(src, srcSize, retSize); break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT: getCap(src, srcSize, retSize); break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF: getCap(src, srcSize, retSize); break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_INT: getCap(src, srcSize, retSize); break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG: getCap(src, srcSize, retSize); break; case CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT: getCap(src, srcSize, retSize); break; case CL_DEVICE_OPENCL_C_VERSION: getStr(src, srcSize, retSize); break; case CL_DEVICE_PARENT_DEVICE: getCap(src, srcSize, retSize); break; case CL_DEVICE_PARTITION_AFFINITY_DOMAIN: getCap(src, srcSize, retSize); break; case CL_DEVICE_PARTITION_MAX_SUB_DEVICES: getCap(src, srcSize, retSize); break; case CL_DEVICE_PARTITION_PROPERTIES: getCap(src, srcSize, retSize); break; case CL_DEVICE_PARTITION_TYPE: getCap(src, srcSize, retSize); break; case CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS: getCap(src, srcSize, retSize); break; case CL_DEVICE_PIPE_MAX_PACKET_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_PLATFORM: getCap(src, srcSize, retSize); break; case CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT: getCap(src, srcSize, retSize); break; case CL_DEVICE_PREFERRED_INTEROP_USER_SYNC: getCap(src, srcSize, retSize); break; case CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT: getCap(src, srcSize, retSize); break; case CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT: getCap(src, srcSize, retSize); break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR: getCap(src, srcSize, retSize); break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: getCap(src, srcSize, retSize); break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT: getCap(src, srcSize, retSize); break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF: getCap(src, srcSize, retSize); break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT: getCap(src, srcSize, retSize); break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG: getCap(src, srcSize, retSize); break; case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT: getCap(src, srcSize, retSize); break; case CL_DEVICE_PRINTF_BUFFER_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_PROFILE: getStr(src, srcSize, retSize); break; case CL_DEVICE_PROFILING_TIMER_RESOLUTION: getCap(src, srcSize, retSize); break; case CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES: getCap(src, srcSize, retSize); break; case CL_DEVICE_QUEUE_ON_HOST_PROPERTIES: getCap(src, srcSize, retSize); break; case CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL: getCap(src, srcSize, retSize); break; case CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL: getCap(src, srcSize, retSize); break; case CL_DEVICE_SINGLE_FP_CONFIG: getCap(src, srcSize, retSize); break; case CL_DEVICE_SLICE_COUNT_INTEL: getCap(src, srcSize, retSize); break; case CL_DEVICE_SPIR_VERSIONS: getStr(src, srcSize, retSize); break; case CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: getCap(src, srcSize, retSize); break; case CL_DEVICE_SUB_GROUP_SIZES_INTEL: getCap(src, srcSize, retSize); break; case CL_DEVICE_SVM_CAPABILITIES: getCap(src, srcSize, retSize); break; case CL_DEVICE_TYPE: getCap(src, srcSize, retSize); break; case CL_DEVICE_VENDOR: getStr(src, srcSize, retSize); break; case CL_DEVICE_VENDOR_ID: getCap(src, srcSize, retSize); break; case CL_DEVICE_VERSION: getStr(src, srcSize, retSize); break; case CL_DRIVER_VERSION: getStr(src, srcSize, retSize); break; // clang-format on case CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL: if (simultaneousInterops.size() > 1u) { srcSize = retSize = sizeof(cl_uint); param = 1u; src = ¶m; } break; case CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL: if (simultaneousInterops.size() > 1u) { srcSize = retSize = sizeof(cl_uint) * simultaneousInterops.size(); src = &simultaneousInterops[0]; } break; case CL_DEVICE_REFERENCE_COUNT: { cl_int ref = this->getReference(); DEBUG_BREAK_IF(ref != 1); param = static_cast(ref); src = ¶m; retSize = srcSize = sizeof(param); break; } default: if (getDeviceInfoForImage(paramName, src, srcSize, retSize) && !getSharedDeviceInfo().imageSupport) { src = &value; break; } ClDeviceHelper::getExtraDeviceInfo(*this, paramName, param, src, srcSize, retSize); } retVal = changeGetInfoStatusToCLResultType(::getInfo(paramValue, paramValueSize, src, srcSize)); if (paramValueSizeRet) { *paramValueSizeRet = retSize; } return retVal; } bool ClDevice::getDeviceInfoForImage(cl_device_info paramName, const void *&src, size_t &srcSize, size_t &retSize) { bool retVal = true; switch (paramName) { case CL_DEVICE_MAX_READ_IMAGE_ARGS: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS: getCap(src, srcSize, retSize); break; case CL_DEVICE_MAX_WRITE_IMAGE_ARGS: getCap(src, srcSize, retSize); break; case CL_DEVICE_IMAGE2D_MAX_HEIGHT: getCap(src, srcSize, retSize); break; case CL_DEVICE_IMAGE2D_MAX_WIDTH: getCap(src, srcSize, retSize); break; case CL_DEVICE_IMAGE3D_MAX_DEPTH: getCap(src, srcSize, retSize); break; case CL_DEVICE_IMAGE3D_MAX_HEIGHT: getCap(src, srcSize, retSize); break; case CL_DEVICE_IMAGE3D_MAX_WIDTH: getCap(src, srcSize, retSize); break; case CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT: getCap(src, srcSize, retSize); break; case CL_DEVICE_IMAGE_MAX_ARRAY_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_IMAGE_MAX_BUFFER_SIZE: getCap(src, srcSize, retSize); break; case CL_DEVICE_IMAGE_PITCH_ALIGNMENT: getCap(src, srcSize, retSize); break; case CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL: if (deviceInfo.nv12Extension) { getCap(src, srcSize, retSize); break; } retVal = false; break; case CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL: if (deviceInfo.nv12Extension) { getCap(src, srcSize, retSize); break; } retVal = false; break; default: retVal = false; } return retVal; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/cl_device/cl_device_info.h000066400000000000000000000123301363734646600251520ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/extensions/public/cl_ext_private.h" namespace NEO { // clang-format off struct ClDeviceInfo { cl_device_type deviceType; size_t maxSliceCount; size_t image3DMaxWidth; size_t image3DMaxHeight; size_t maxBufferSize; size_t maxArraySize; cl_device_fp_config singleFpConfig; cl_device_fp_config halfFpConfig; cl_device_fp_config doubleFpConfig; cl_ulong globalMemCacheSize; cl_ulong maxConstantBufferSize; size_t maxGlobalVariableSize; size_t globalVariablePreferredTotalSize; cl_device_exec_capabilities executionCapabilities; cl_command_queue_properties queueOnHostProperties; cl_command_queue_properties queueOnDeviceProperties; const char *builtInKernels; cl_platform_id platform; const char *name; const char *vendor; const char *driverVersion; const char *profile; const char *clVersion; const char *clCVersion; const char *spirVersions; const char *deviceExtensions; cl_device_id parentDevice; cl_device_affinity_domain partitionAffinityDomain; cl_uint partitionMaxSubDevices; cl_device_partition_property partitionProperties[2]; cl_device_partition_property partitionType[3]; cl_device_svm_capabilities svmCapabilities; double platformHostTimerResolution; size_t planarYuvMaxWidth; size_t planarYuvMaxHeight; cl_uint maxComputUnits; cl_uint maxWorkItemDimensions; cl_uint maxNumOfSubGroups; cl_bool independentForwardProgress; cl_uint preferredVectorWidthChar; cl_uint preferredVectorWidthShort; cl_uint preferredVectorWidthInt; cl_uint preferredVectorWidthLong; cl_uint preferredVectorWidthFloat; cl_uint preferredVectorWidthDouble; cl_uint preferredVectorWidthHalf; cl_uint nativeVectorWidthChar; cl_uint nativeVectorWidthShort; cl_uint nativeVectorWidthInt; cl_uint nativeVectorWidthLong; cl_uint nativeVectorWidthFloat; cl_uint nativeVectorWidthDouble; cl_uint nativeVectorWidthHalf; cl_uint maxReadWriteImageArgs; cl_uint imagePitchAlignment; cl_uint imageBaseAddressAlignment; cl_uint maxPipeArgs; cl_uint pipeMaxActiveReservations; cl_uint pipeMaxPacketSize; cl_uint memBaseAddressAlign; cl_uint minDataTypeAlignSize; cl_device_mem_cache_type globalMemCacheType; cl_uint maxConstantArgs; cl_device_local_mem_type localMemType; cl_bool endianLittle; cl_bool deviceAvailable; cl_bool compilerAvailable; cl_bool linkerAvailable; cl_uint queueOnDevicePreferredSize; cl_uint queueOnDeviceMaxSize; cl_uint maxOnDeviceEvents; cl_bool preferredInteropUserSync; cl_uint referenceCount; cl_uint preferredPlatformAtomicAlignment; cl_uint preferredGlobalAtomicAlignment; cl_uint preferredLocalAtomicAlignment; cl_bool hostUnifiedMemory; cl_bool vmeAvcSupportsTextureSampler; cl_uint vmeAvcVersion; cl_uint vmeVersion; cl_uint internalDriverVersion; cl_uint grfSize; bool preemptionSupported; /* Extensions supported */ bool nv12Extension; bool vmeExtension; bool platformLP; bool packedYuvExtension; /*Unified Shared Memory Capabilites*/ cl_unified_shared_memory_capabilities_intel hostMemCapabilities; cl_unified_shared_memory_capabilities_intel deviceMemCapabilities; cl_unified_shared_memory_capabilities_intel singleDeviceSharedMemCapabilities; cl_unified_shared_memory_capabilities_intel crossDeviceSharedMemCapabilities; cl_unified_shared_memory_capabilities_intel sharedSystemMemCapabilities; }; // clang-format on } // namespace NEO compute-runtime-20.13.16352/opencl/source/cl_device/cl_device_info_map.h000066400000000000000000000566701363734646600260260ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/device/device_info.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/cl_device/cl_device_info.h" #include "CL/cl_ext_intel.h" #include #include namespace NEO { namespace ClDeviceInfoTable { template struct ClMapBase { enum { param = Param }; typedef _Type Type; enum { size = sizeof(Type) }; static const Type &getValue(const NEO::ClDevice &clDevice) { return clDevice.getDeviceInfo().*val; } }; template struct MapBase { enum { param = Param }; typedef _Type Type; enum { size = sizeof(Type) }; static const Type &getValue(const NEO::ClDevice &clDevice) { return clDevice.getSharedDeviceInfo().*val; } }; template struct Map {}; ////////////////////////////////////////////////////// // DeviceInfo mapping table // Map::param - i.e. CL_DEVICE_ADDRESS_BITS // Map::Type - i.e. cl_uint // Map::size - ie. sizeof( cl_uint ) // Map::getValue - ie. return deviceInfo.AddressBits ////////////////////////////////////////////////////// // clang-format off // please keep alphabetical order template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public MapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; template<> struct Map : public ClMapBase {}; // clang-format on } // namespace ClDeviceInfoTable } // namespace NEO compute-runtime-20.13.16352/opencl/source/cl_device/cl_device_vector.h000066400000000000000000000011061363734646600255200ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/api/cl_types.h" #include namespace NEO { class ClDevice; class ClDeviceVector : public std::vector { public: ClDeviceVector() = default; ClDeviceVector(const ClDeviceVector &) = default; ClDeviceVector &operator=(const ClDeviceVector &) = default; ClDeviceVector(const cl_device_id *devices, cl_uint numDevices); void toDeviceIDs(std::vector &devIDs); }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/cl_device/linux/000077500000000000000000000000001363734646600232115ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/cl_device/linux/CMakeLists.txt000066400000000000000000000006121363734646600257500ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_CL_DEVICE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/fp64_override.cpp ) if(UNIX) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_CL_DEVICE_LINUX}) endif() set_property(GLOBAL PROPERTY RUNTIME_SRCS_CL_DEVICE_LINUX ${RUNTIME_SRCS_CL_DEVICE_LINUX})compute-runtime-20.13.16352/opencl/source/cl_device/linux/fp64_override.cpp000066400000000000000000000006501363734646600263740ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/utilities/debug_settings_reader_creator.h" #include "opencl/source/os_interface/ocl_reg_path.h" namespace NEO { bool releaseFP64Override() { auto settingsReader = SettingsReaderCreator::create(oclRegPath); return settingsReader->getSetting("OverrideDefaultFP64Settings", -1) == 1; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/cl_device/windows/000077500000000000000000000000001363734646600235445ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/cl_device/windows/CMakeLists.txt000066400000000000000000000006231363734646600263050ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_CL_DEVICE_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/fp64_override.cpp ) if(WIN32) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_CL_DEVICE_WINDOWS}) endif() set_property(GLOBAL PROPERTY RUNTIME_SRCS_CL_DEVICE_WINDOWS ${RUNTIME_SRCS_CL_DEVICE_WINDOWS})compute-runtime-20.13.16352/opencl/source/cl_device/windows/fp64_override.cpp000066400000000000000000000002541363734646600267270ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ namespace NEO { bool releaseFP64Override() { return false; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/000077500000000000000000000000001363734646600227575ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/command_queue/CMakeLists.txt000066400000000000000000000047101363734646600255210ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_COMMAND_QUEUE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/command_queue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_queue.h ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_bdw_plus.inl ${CMAKE_CURRENT_SOURCE_DIR}/cpu_data_transfer_handler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_barrier.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_common.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_buffer.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_buffer_rect.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_buffer_to_image.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_image.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_image_to_buffer.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fill_buffer.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fill_image.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_marker.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_migrate_mem_objects.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_rect.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_image.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/enqueue_resource_barrier.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_svm.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_buffer.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_buffer_rect.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_image.h ${CMAKE_CURRENT_SOURCE_DIR}/finish.h ${CMAKE_CURRENT_SOURCE_DIR}/flush.h ${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker.h ${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_bdw_plus.inl ${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface.h ${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface_bdw_plus.inl ${CMAKE_CURRENT_SOURCE_DIR}/local_id_gen.cpp ${CMAKE_CURRENT_SOURCE_DIR}/local_id_gen.h ${CMAKE_CURRENT_SOURCE_DIR}/local_id_gen.inl ${CMAKE_CURRENT_SOURCE_DIR}/local_id_gen_avx2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/local_id_gen_sse4.cpp ${CMAKE_CURRENT_SOURCE_DIR}/local_work_size.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/resource_barrier.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_COMMAND_QUEUE}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_COMMAND_QUEUE ${RUNTIME_SRCS_COMMAND_QUEUE}) add_subdirectories()compute-runtime-20.13.16352/opencl/source/command_queue/command_queue.cpp000066400000000000000000000677511363734646600263250ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/array_count.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/string.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/utilities/api_intercept.h" #include "shared/source/utilities/tag_allocator.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/device_queue/device_queue.h" #include "opencl/source/event/event_builder.h" #include "opencl/source/event/user_event.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/helpers/convert_color.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/mipmap.h" #include "opencl/source/helpers/queue_helpers.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "CL/cl_ext.h" #include namespace NEO { // Global table of create functions CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE] = {}; CommandQueue *CommandQueue::create(Context *context, ClDevice *device, const cl_queue_properties *properties, bool internalUsage, cl_int &retVal) { retVal = CL_SUCCESS; auto funcCreate = commandQueueFactory[device->getRenderCoreFamily()]; DEBUG_BREAK_IF(nullptr == funcCreate); return funcCreate(context, device, properties, internalUsage); } CommandQueue::CommandQueue(Context *context, ClDevice *device, const cl_queue_properties *properties) : context(context), device(device) { if (context) { context->incRefInternal(); } commandQueueProperties = getCmdQueueProperties(properties); flushStamp.reset(new FlushStampTracker(true)); if (device) { gpgpuEngine = &device->getDefaultEngine(); if (gpgpuEngine->commandStreamReceiver->peekTimestampPacketWriteEnabled()) { timestampPacketContainer = std::make_unique(); } auto hwInfo = device->getHardwareInfo(); if (hwInfo.capabilityTable.blitterOperationsSupported) { auto &selectorCopyEngine = device->getDeviceById(0)->getSelectorCopyEngine(); bcsEngine = &device->getDeviceById(0)->getEngine(EngineHelpers::getBcsEngineType(hwInfo, selectorCopyEngine), false); } } processProperties(properties); } CommandQueue::~CommandQueue() { if (virtualEvent) { UNRECOVERABLE_IF(this->virtualEvent->getCommandQueue() != this && this->virtualEvent->getCommandQueue() != nullptr); virtualEvent->decRefInternal(); } if (device) { auto storageForAllocation = gpgpuEngine->commandStreamReceiver->getInternalAllocationStorage(); if (commandStream) { storageForAllocation->storeAllocation(std::unique_ptr(commandStream->getGraphicsAllocation()), REUSABLE_ALLOCATION); } delete commandStream; if (this->perfCountersEnabled) { device->getPerformanceCounters()->shutdown(); } } timestampPacketContainer.reset(); //for normal queue, decrement ref count on context //special queue is owned by context so ref count doesn't have to be decremented if (context && !isSpecialCommandQueue) { context->decRefInternal(); } } CommandStreamReceiver &CommandQueue::getGpgpuCommandStreamReceiver() const { return *gpgpuEngine->commandStreamReceiver; } CommandStreamReceiver *CommandQueue::getBcsCommandStreamReceiver() const { if (bcsEngine) { return bcsEngine->commandStreamReceiver; } return nullptr; } Device &CommandQueue::getDevice() const noexcept { return device->getDevice(); } uint32_t CommandQueue::getHwTag() const { uint32_t tag = *getHwTagAddress(); return tag; } volatile uint32_t *CommandQueue::getHwTagAddress() const { return getGpgpuCommandStreamReceiver().getTagAddress(); } bool CommandQueue::isCompleted(uint32_t taskCount) const { uint32_t tag = getHwTag(); DEBUG_BREAK_IF(tag == CompletionStamp::levelNotReady); return tag >= taskCount; } void CommandQueue::waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) { WAIT_ENTER() DBG_LOG(LogTaskCounts, __FUNCTION__, "Waiting for taskCount:", taskCountToWait); DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "Current taskCount:", getHwTag()); bool forcePowerSavingMode = this->throttle == QueueThrottle::LOW; getGpgpuCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode); DEBUG_BREAK_IF(getHwTag() < taskCountToWait); if (gtpinIsGTPinInitialized()) { gtpinNotifyTaskCompletion(taskCountToWait); } if (auto bcsCsr = getBcsCommandStreamReceiver()) { bcsCsr->waitForTaskCountWithKmdNotifyFallback(bcsTaskCount, 0, false, false); bcsCsr->waitForTaskCountAndCleanTemporaryAllocationList(bcsTaskCount); } getGpgpuCommandStreamReceiver().waitForTaskCountAndCleanTemporaryAllocationList(taskCountToWait); WAIT_LEAVE() } bool CommandQueue::isQueueBlocked() { TakeOwnershipWrapper takeOwnershipWrapper(*this); //check if we have user event and if so, if it is in blocked state. if (this->virtualEvent) { auto executionStatus = this->virtualEvent->peekExecutionStatus(); if (executionStatus <= CL_SUBMITTED) { UNRECOVERABLE_IF(this->virtualEvent == nullptr); if (this->virtualEvent->isStatusCompletedByTermination(executionStatus) == false) { taskCount = this->virtualEvent->peekTaskCount(); flushStamp->setStamp(this->virtualEvent->flushStamp->peekStamp()); taskLevel = this->virtualEvent->taskLevel; // If this isn't an OOQ, update the taskLevel for the queue if (!isOOQEnabled()) { taskLevel++; } } else { //at this point we may reset queue TaskCount, since all command previous to this were aborted taskCount = 0; flushStamp->setStamp(0); taskLevel = getGpgpuCommandStreamReceiver().peekTaskLevel(); } FileLoggerInstance().log(DebugManager.flags.EventsDebugEnable.get(), "isQueueBlocked taskLevel change from", taskLevel, "to new from virtualEvent", this->virtualEvent, "new tasklevel", this->virtualEvent->taskLevel.load()); //close the access to virtual event, driver added only 1 ref count. this->virtualEvent->decRefInternal(); this->virtualEvent = nullptr; return false; } return true; } return false; } cl_int CommandQueue::getCommandQueueInfo(cl_command_queue_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { return getQueueInfo(this, paramName, paramValueSize, paramValue, paramValueSizeRet); } uint32_t CommandQueue::getTaskLevelFromWaitList(uint32_t taskLevel, cl_uint numEventsInWaitList, const cl_event *eventWaitList) { for (auto iEvent = 0u; iEvent < numEventsInWaitList; ++iEvent) { auto pEvent = (Event *)(eventWaitList[iEvent]); uint32_t eventTaskLevel = pEvent->taskLevel; taskLevel = std::max(taskLevel, eventTaskLevel); } return taskLevel; } LinearStream &CommandQueue::getCS(size_t minRequiredSize) { DEBUG_BREAK_IF(nullptr == device); if (!commandStream) { commandStream = new LinearStream(nullptr); } minRequiredSize += CSRequirements::minCommandQueueCommandStreamSize; constexpr static auto additionalAllocationSize = CSRequirements::minCommandQueueCommandStreamSize + CSRequirements::csOverfetchSize; getGpgpuCommandStreamReceiver().ensureCommandBufferAllocation(*commandStream, minRequiredSize, additionalAllocationSize); return *commandStream; } cl_int CommandQueue::enqueueAcquireSharedObjects(cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *oclEvent, cl_uint cmdType) { if ((memObjects == nullptr && numObjects != 0) || (memObjects != nullptr && numObjects == 0)) { return CL_INVALID_VALUE; } for (unsigned int object = 0; object < numObjects; object++) { auto memObject = castToObject(memObjects[object]); if (memObject == nullptr || memObject->peekSharingHandler() == nullptr) { return CL_INVALID_MEM_OBJECT; } int result = memObject->peekSharingHandler()->acquire(memObject); if (result != CL_SUCCESS) { return result; } memObject->acquireCount++; } auto status = enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, oclEvent); if (oclEvent) { castToObjectOrAbort(*oclEvent)->setCmdType(cmdType); } return status; } cl_int CommandQueue::enqueueReleaseSharedObjects(cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *oclEvent, cl_uint cmdType) { if ((memObjects == nullptr && numObjects != 0) || (memObjects != nullptr && numObjects == 0)) { return CL_INVALID_VALUE; } for (unsigned int object = 0; object < numObjects; object++) { auto memObject = castToObject(memObjects[object]); if (memObject == nullptr || memObject->peekSharingHandler() == nullptr) { return CL_INVALID_MEM_OBJECT; } memObject->peekSharingHandler()->release(memObject); DEBUG_BREAK_IF(memObject->acquireCount <= 0); memObject->acquireCount--; } auto status = enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, oclEvent); if (oclEvent) { castToObjectOrAbort(*oclEvent)->setCmdType(cmdType); } return status; } void CommandQueue::updateFromCompletionStamp(const CompletionStamp &completionStamp) { DEBUG_BREAK_IF(this->taskLevel > completionStamp.taskLevel); DEBUG_BREAK_IF(this->taskCount > completionStamp.taskCount); if (completionStamp.taskCount != CompletionStamp::levelNotReady) { taskCount = completionStamp.taskCount; } flushStamp->setStamp(completionStamp.flushStamp); this->taskLevel = completionStamp.taskLevel; } bool CommandQueue::setPerfCountersEnabled() { DEBUG_BREAK_IF(device == nullptr); auto perfCounters = device->getPerformanceCounters(); bool isCcsEngine = EngineHelpers::isCcs(getGpgpuEngine().osContext->getEngineType()); perfCountersEnabled = perfCounters->enable(isCcsEngine); if (!perfCountersEnabled) { perfCounters->shutdown(); } return perfCountersEnabled; } PerformanceCounters *CommandQueue::getPerfCounters() { return device->getPerformanceCounters(); } cl_int CommandQueue::enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest) { cl_int retVal = CL_SUCCESS; MapInfo unmapInfo; if (!memObj->findMappedPtr(mappedPtr, unmapInfo)) { return CL_INVALID_VALUE; } if (!unmapInfo.readOnly) { memObj->getMapAllocation()->setAubWritable(true, GraphicsAllocation::defaultBank); memObj->getMapAllocation()->setTbxWritable(true, GraphicsAllocation::defaultBank); if (memObj->peekClMemObjType() == CL_MEM_OBJECT_BUFFER) { auto buffer = castToObject(memObj); retVal = enqueueWriteBuffer(buffer, CL_FALSE, unmapInfo.offset[0], unmapInfo.size[0], mappedPtr, memObj->getMapAllocation(), eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, eventsRequest.outEvent); } else { auto image = castToObjectOrAbort(memObj); size_t writeOrigin[4] = {unmapInfo.offset[0], unmapInfo.offset[1], unmapInfo.offset[2], 0}; auto mipIdx = getMipLevelOriginIdx(image->peekClMemObjType()); UNRECOVERABLE_IF(mipIdx >= 4); writeOrigin[mipIdx] = unmapInfo.mipLevel; retVal = enqueueWriteImage(image, CL_FALSE, writeOrigin, &unmapInfo.size[0], image->getHostPtrRowPitch(), image->getHostPtrSlicePitch(), mappedPtr, memObj->getMapAllocation(), eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, eventsRequest.outEvent); } } else { retVal = enqueueMarkerWithWaitList(eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, eventsRequest.outEvent); } if (retVal == CL_SUCCESS) { memObj->removeMappedPtr(mappedPtr); if (eventsRequest.outEvent) { auto event = castToObject(*eventsRequest.outEvent); event->setCmdType(CL_COMMAND_UNMAP_MEM_OBJECT); } } return retVal; } void *CommandQueue::enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet) { void *basePtr = transferProperties.memObj->getBasePtrForMap(getDevice().getRootDeviceIndex()); size_t mapPtrOffset = transferProperties.memObj->calculateOffsetForMapping(transferProperties.offset) + transferProperties.mipPtrOffset; if (transferProperties.memObj->peekClMemObjType() == CL_MEM_OBJECT_BUFFER) { mapPtrOffset += transferProperties.memObj->getOffset(); } void *returnPtr = ptrOffset(basePtr, mapPtrOffset); if (!transferProperties.memObj->addMappedPtr(returnPtr, transferProperties.memObj->calculateMappedPtrLength(transferProperties.size), transferProperties.mapFlags, transferProperties.size, transferProperties.offset, transferProperties.mipLevel)) { errcodeRet = CL_INVALID_OPERATION; return nullptr; } if (transferProperties.memObj->peekClMemObjType() == CL_MEM_OBJECT_BUFFER) { auto buffer = castToObject(transferProperties.memObj); errcodeRet = enqueueReadBuffer(buffer, transferProperties.blocking, transferProperties.offset[0], transferProperties.size[0], returnPtr, transferProperties.memObj->getMapAllocation(), eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, eventsRequest.outEvent); } else { auto image = castToObjectOrAbort(transferProperties.memObj); size_t readOrigin[4] = {transferProperties.offset[0], transferProperties.offset[1], transferProperties.offset[2], 0}; auto mipIdx = getMipLevelOriginIdx(image->peekClMemObjType()); UNRECOVERABLE_IF(mipIdx >= 4); readOrigin[mipIdx] = transferProperties.mipLevel; errcodeRet = enqueueReadImage(image, transferProperties.blocking, readOrigin, &transferProperties.size[0], image->getHostPtrRowPitch(), image->getHostPtrSlicePitch(), returnPtr, transferProperties.memObj->getMapAllocation(), eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, eventsRequest.outEvent); } if (errcodeRet != CL_SUCCESS) { transferProperties.memObj->removeMappedPtr(returnPtr); return nullptr; } if (eventsRequest.outEvent) { auto event = castToObject(*eventsRequest.outEvent); event->setCmdType(transferProperties.cmdType); } return returnPtr; } void *CommandQueue::enqueueMapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet) { if (transferProperties.memObj->mappingOnCpuAllowed()) { return cpuDataTransferHandler(transferProperties, eventsRequest, errcodeRet); } else { return enqueueReadMemObjForMap(transferProperties, eventsRequest, errcodeRet); } } cl_int CommandQueue::enqueueUnmapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest) { cl_int retVal = CL_SUCCESS; if (transferProperties.memObj->mappingOnCpuAllowed()) { cpuDataTransferHandler(transferProperties, eventsRequest, retVal); } else { retVal = enqueueWriteMemObjForUnmap(transferProperties.memObj, transferProperties.ptr, eventsRequest); } return retVal; } void *CommandQueue::enqueueMapBuffer(Buffer *buffer, cl_bool blockingMap, cl_map_flags mapFlags, size_t offset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, cl_int &errcodeRet) { TransferProperties transferProperties(buffer, CL_COMMAND_MAP_BUFFER, mapFlags, blockingMap != CL_FALSE, &offset, &size, nullptr, false); EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event); return enqueueMapMemObject(transferProperties, eventsRequest, errcodeRet); } void *CommandQueue::enqueueMapImage(Image *image, cl_bool blockingMap, cl_map_flags mapFlags, const size_t *origin, const size_t *region, size_t *imageRowPitch, size_t *imageSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, cl_int &errcodeRet) { TransferProperties transferProperties(image, CL_COMMAND_MAP_IMAGE, mapFlags, blockingMap != CL_FALSE, const_cast(origin), const_cast(region), nullptr, false); EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event); if (image->isMemObjZeroCopy() && image->mappingOnCpuAllowed()) { GetInfoHelper::set(imageSlicePitch, image->getImageDesc().image_slice_pitch); if (image->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { // There are differences in qPitch programming between Gen8 vs Gen9+ devices. // For Gen8 qPitch is distance in rows while Gen9+ it is in pixels. // Minimum value of qPitch is 4 and this causes slicePitch = 4*rowPitch on Gen8. // To allow zero-copy we have to tell what is correct value rowPitch which should equal to slicePitch. GetInfoHelper::set(imageRowPitch, image->getImageDesc().image_slice_pitch); } else { GetInfoHelper::set(imageRowPitch, image->getImageDesc().image_row_pitch); } } else { GetInfoHelper::set(imageSlicePitch, image->getHostPtrSlicePitch()); GetInfoHelper::set(imageRowPitch, image->getHostPtrRowPitch()); } if (Image::hasSlices(image->peekClMemObjType()) == false) { GetInfoHelper::set(imageSlicePitch, static_cast(0)); } return enqueueMapMemObject(transferProperties, eventsRequest, errcodeRet); } cl_int CommandQueue::enqueueUnmapMemObject(MemObj *memObj, void *mappedPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TransferProperties transferProperties(memObj, CL_COMMAND_UNMAP_MEM_OBJECT, 0, false, nullptr, nullptr, mappedPtr, false); EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event); return enqueueUnmapMemObject(transferProperties, eventsRequest); } void CommandQueue::enqueueBlockedMapUnmapOperation(const cl_event *eventWaitList, size_t numEventsInWaitlist, MapOperationType opType, MemObj *memObj, MemObjSizeArray ©Size, MemObjOffsetArray ©Offset, bool readOnly, EventBuilder &externalEventBuilder) { EventBuilder internalEventBuilder; EventBuilder *eventBuilder; // check if event will be exposed externally if (externalEventBuilder.getEvent()) { externalEventBuilder.getEvent()->incRefInternal(); eventBuilder = &externalEventBuilder; } else { // it will be an internal event internalEventBuilder.create(this, context); eventBuilder = &internalEventBuilder; } //store task data in event auto cmd = std::unique_ptr(new CommandMapUnmap(opType, *memObj, copySize, copyOffset, readOnly, *this)); eventBuilder->getEvent()->setCommand(std::move(cmd)); //bind output event with input events eventBuilder->addParentEvents(ArrayRef(eventWaitList, numEventsInWaitlist)); eventBuilder->addParentEvent(this->virtualEvent); eventBuilder->finalize(); if (this->virtualEvent) { this->virtualEvent->decRefInternal(); } this->virtualEvent = eventBuilder->getEvent(); } bool CommandQueue::setupDebugSurface(Kernel *kernel) { auto debugSurface = getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation(); if (!debugSurface) { debugSurface = getGpgpuCommandStreamReceiver().allocateDebugSurface(SipKernel::maxDbgSurfaceSize); } DEBUG_BREAK_IF(!kernel->requiresSshForBuffers()); auto surfaceState = ptrOffset(reinterpret_cast(kernel->getSurfaceStateHeap()), kernel->getKernelInfo().patchInfo.pAllocateSystemThreadSurface->Offset); void *addressToPatch = reinterpret_cast(debugSurface->getGpuAddress()); size_t sizeToPatch = debugSurface->getUnderlyingBufferSize(); Buffer::setSurfaceState(&device->getDevice(), surfaceState, sizeToPatch, addressToPatch, 0, debugSurface, 0, 0); return true; } IndirectHeap &CommandQueue::getIndirectHeap(IndirectHeap::Type heapType, size_t minRequiredSize) { return getGpgpuCommandStreamReceiver().getIndirectHeap(heapType, minRequiredSize); } void CommandQueue::allocateHeapMemory(IndirectHeap::Type heapType, size_t minRequiredSize, IndirectHeap *&indirectHeap) { getGpgpuCommandStreamReceiver().allocateHeapMemory(heapType, minRequiredSize, indirectHeap); } void CommandQueue::releaseIndirectHeap(IndirectHeap::Type heapType) { getGpgpuCommandStreamReceiver().releaseIndirectHeap(heapType); } void CommandQueue::obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies) { auto allocator = getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(); previousNodes.swapNodes(*timestampPacketContainer); previousNodes.resolveDependencies(clearAllDependencies); DEBUG_BREAK_IF(timestampPacketContainer->peekNodes().size() > 0); for (size_t i = 0; i < numberOfNodes; i++) { timestampPacketContainer->add(allocator->getTag()); } } size_t CommandQueue::estimateTimestampPacketNodesCount(const MultiDispatchInfo &dispatchInfo) const { size_t nodesCount = dispatchInfo.size(); auto mainKernel = dispatchInfo.peekMainKernel(); if (obtainTimestampPacketForCacheFlush(mainKernel->requiresCacheFlushCommand(*this))) { nodesCount++; } return nodesCount; } bool CommandQueue::bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandType, cl_bool blocking, size_t size, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList) { auto debugVariableSet = false; // Requested by debug variable or allowed by Buffer if (CL_COMMAND_READ_BUFFER == commandType && DebugManager.flags.DoCpuCopyOnReadBuffer.get() != -1) { if (DebugManager.flags.DoCpuCopyOnReadBuffer.get() == 0) { return false; } debugVariableSet = true; } if (CL_COMMAND_WRITE_BUFFER == commandType && DebugManager.flags.DoCpuCopyOnWriteBuffer.get() != -1) { if (DebugManager.flags.DoCpuCopyOnWriteBuffer.get() == 0) { return false; } debugVariableSet = true; } //if we are blocked by user events, we can't service the call on CPU if (Event::checkUserEventDependencies(numEventsInWaitList, eventWaitList)) { return false; } //check if buffer is compatible if (!buffer->isReadWriteOnCpuAllowed()) { return false; } if (buffer->getMemoryManager() && buffer->getMemoryManager()->isCpuCopyRequired(ptr)) { return true; } if (debugVariableSet) { return true; } //non blocking transfers are not expected to be serviced by CPU //we do not want to artifically stall the pipeline to allow CPU access if (blocking == CL_FALSE) { return false; } //check if it is beneficial to do transfer on CPU if (!buffer->isReadWriteOnCpuPreffered(ptr, size)) { return false; } //make sure that event wait list is empty if (numEventsInWaitList == 0) { return true; } return false; } bool CommandQueue::queueDependenciesClearRequired() const { return isOOQEnabled() || DebugManager.flags.OmitTimestampPacketDependencies.get(); } bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType) const { bool blitAllowed = device->getHardwareInfo().capabilityTable.blitterOperationsSupported; if (DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.get() != -1) { blitAllowed &= !!DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.get(); } bool commandAllowed = (CL_COMMAND_READ_BUFFER == cmdType) || (CL_COMMAND_WRITE_BUFFER == cmdType) || (CL_COMMAND_COPY_BUFFER == cmdType) || (CL_COMMAND_READ_BUFFER_RECT == cmdType); return commandAllowed && blitAllowed; } bool CommandQueue::isBlockedCommandStreamRequired(uint32_t commandType, const EventsRequest &eventsRequest, bool blockedQueue) const { if (!blockedQueue) { return false; } if (isCacheFlushCommand(commandType) || !isCommandWithoutKernel(commandType)) { return true; } if ((CL_COMMAND_BARRIER == commandType || CL_COMMAND_MARKER == commandType) && getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { for (size_t i = 0; i < eventsRequest.numEventsInWaitList; i++) { auto waitlistEvent = castToObjectOrAbort(eventsRequest.eventWaitList[i]); if (waitlistEvent->getTimestampPacketNodes()) { return true; } } } return false; } void CommandQueue::aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo) { if (DebugManager.flags.AUBDumpSubCaptureMode.get()) { auto status = getGpgpuCommandStreamReceiver().checkAndActivateAubSubCapture(multiDispatchInfo); if (!status.isActive) { // make each enqueue blocking when subcapture is not active to split batch buffer blocking = true; } else if (!status.wasActiveInPreviousEnqueue) { // omit timestamp packet dependencies dependencies upon subcapture activation clearAllDependencies = true; } } if (getGpgpuCommandStreamReceiver().getType() > CommandStreamReceiverType::CSR_HW) { for (auto &dispatchInfo : multiDispatchInfo) { auto kernelName = dispatchInfo.getKernel()->getKernelInfo().name; getGpgpuCommandStreamReceiver().addAubComment(kernelName.c_str()); } } } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/command_queue.h000066400000000000000000000422371363734646600257620ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/engine_control.h" #include "opencl/source/event/event.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/helpers/task_information.h" #include #include namespace NEO { class BarrierCommand; class Buffer; class LinearStream; class ClDevice; class Context; class Device; class Event; class EventBuilder; class FlushStampTracker; class Image; class IndirectHeap; class Kernel; class MemObj; class PerformanceCounters; struct CompletionStamp; struct DispatchGlobalsArgs; struct MultiDispatchInfo; enum class QueuePriority { LOW, MEDIUM, HIGH }; inline bool shouldFlushDC(uint32_t commandType, PrintfHandler *printfHandler) { return (commandType == CL_COMMAND_READ_BUFFER || commandType == CL_COMMAND_READ_BUFFER_RECT || commandType == CL_COMMAND_READ_IMAGE || commandType == CL_COMMAND_SVM_MAP || printfHandler); } template <> struct OpenCLObjectMapper<_cl_command_queue> { typedef class CommandQueue DerivedType; }; class CommandQueue : public BaseObject<_cl_command_queue> { public: static const cl_ulong objectMagic = 0x1234567890987654LL; static CommandQueue *create(Context *context, ClDevice *device, const cl_queue_properties *properties, bool internalUsage, cl_int &errcodeRet); CommandQueue() = delete; CommandQueue(Context *context, ClDevice *device, const cl_queue_properties *properties); CommandQueue &operator=(const CommandQueue &) = delete; CommandQueue(const CommandQueue &) = delete; ~CommandQueue() override; // API entry points virtual cl_int enqueueCopyImage(Image *srcImage, Image *dstImage, const size_t srcOrigin[3], const size_t dstOrigin[3], const size_t region[3], cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueFillImage(Image *image, const void *fillColor, const size_t *origin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueFillBuffer(Buffer *buffer, const void *pattern, size_t patternSize, size_t offset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueKernel(cl_kernel kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *globalWorkSize, const size_t *localWorkSize, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueBarrierWithWaitList(cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; MOCKABLE_VIRTUAL void *enqueueMapBuffer(Buffer *buffer, cl_bool blockingMap, cl_map_flags mapFlags, size_t offset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, cl_int &errcodeRet); MOCKABLE_VIRTUAL void *enqueueMapImage(Image *image, cl_bool blockingMap, cl_map_flags mapFlags, const size_t *origin, const size_t *region, size_t *imageRowPitch, size_t *imageSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, cl_int &errcodeRet); MOCKABLE_VIRTUAL cl_int enqueueUnmapMemObject(MemObj *memObj, void *mappedPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); virtual cl_int enqueueSVMMap(cl_bool blockingMap, cl_map_flags mapFlags, void *svmPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool externalAppCall) = 0; virtual cl_int enqueueSVMUnmap(void *svmPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool externalAppCall) = 0; virtual cl_int enqueueSVMFree(cl_uint numSvmPointers, void *svmPointers[], void(CL_CALLBACK *pfnFreeFunc)(cl_command_queue queue, cl_uint numSvmPointers, void *svmPointers[], void *userData), void *userData, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueSVMMemcpy(cl_bool blockingCopy, void *dstPtr, const void *srcPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueSVMMemFill(void *svmPtr, const void *pattern, size_t patternSize, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueMarkerWithWaitList(cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueMigrateMemObjects(cl_uint numMemObjects, const cl_mem *memObjects, cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueSVMMigrateMem(cl_uint numSvmPointers, const void **svmPointers, const size_t *sizes, const cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueCopyBuffer(Buffer *srcBuffer, Buffer *dstBuffer, size_t srcOffset, size_t dstOffset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueReadBuffer(Buffer *buffer, cl_bool blockingRead, size_t offset, size_t size, void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueReadImage(Image *srcImage, cl_bool blockingRead, const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch, void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueWriteBuffer(Buffer *buffer, cl_bool blockingWrite, size_t offset, size_t cb, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueWriteImage(Image *dstImage, cl_bool blockingWrite, const size_t *origin, const size_t *region, size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueCopyBufferRect(Buffer *srcBuffer, Buffer *dstBuffer, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueWriteBufferRect(Buffer *buffer, cl_bool blockingWrite, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, const void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueReadBufferRect(Buffer *buffer, cl_bool blockingRead, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueCopyBufferToImage(Buffer *srcBuffer, Image *dstImage, size_t srcOffset, const size_t *dstOrigin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int enqueueCopyImageToBuffer(Image *srcImage, Buffer *dstBuffer, const size_t *srcOrigin, const size_t *region, size_t dstOffset, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; cl_int enqueueAcquireSharedObjects(cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *oclEvent, cl_uint cmdType); cl_int enqueueReleaseSharedObjects(cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *oclEvent, cl_uint cmdType); MOCKABLE_VIRTUAL void *cpuDataTransferHandler(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &retVal); virtual cl_int enqueueResourceBarrier(BarrierCommand *resourceBarrier, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int finish() = 0; virtual cl_int enqueueInitDispatchGlobals(DispatchGlobalsArgs *dispatchGlobalsArgs, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; virtual cl_int flush() = 0; MOCKABLE_VIRTUAL void updateFromCompletionStamp(const CompletionStamp &completionStamp); virtual bool isCacheFlushCommand(uint32_t commandType) const { return false; } cl_int getCommandQueueInfo(cl_command_queue_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); uint32_t getHwTag() const; volatile uint32_t *getHwTagAddress() const; bool isCompleted(uint32_t taskCount) const; MOCKABLE_VIRTUAL bool isQueueBlocked(); MOCKABLE_VIRTUAL void waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep); static uint32_t getTaskLevelFromWaitList(uint32_t taskLevel, cl_uint numEventsInWaitList, const cl_event *eventWaitList); MOCKABLE_VIRTUAL CommandStreamReceiver &getGpgpuCommandStreamReceiver() const; CommandStreamReceiver *getBcsCommandStreamReceiver() const; Device &getDevice() const noexcept; Context &getContext() const { return *context; } Context *getContextPtr() const { return context; } EngineControl &getGpgpuEngine() const { return *gpgpuEngine; } MOCKABLE_VIRTUAL LinearStream &getCS(size_t minRequiredSize); IndirectHeap &getIndirectHeap(IndirectHeap::Type heapType, size_t minRequiredSize); void allocateHeapMemory(IndirectHeap::Type heapType, size_t minRequiredSize, IndirectHeap *&indirectHeap); MOCKABLE_VIRTUAL void releaseIndirectHeap(IndirectHeap::Type heapType); void releaseVirtualEvent() { if (this->virtualEvent != nullptr) { this->virtualEvent->decRefInternal(); this->virtualEvent = nullptr; } } cl_command_queue_properties getCommandQueueProperties() const { return commandQueueProperties; } bool isProfilingEnabled() const { return !!(this->getCommandQueueProperties() & CL_QUEUE_PROFILING_ENABLE); } bool isOOQEnabled() const { return !!(this->getCommandQueueProperties() & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE); } bool isPerfCountersEnabled() const { return perfCountersEnabled; } PerformanceCounters *getPerfCounters(); bool setPerfCountersEnabled(); void setIsSpecialCommandQueue(bool newValue) { this->isSpecialCommandQueue = newValue; } QueuePriority getPriority() const { return priority; } QueueThrottle getThrottle() const { return throttle; } void enqueueBlockedMapUnmapOperation(const cl_event *eventWaitList, size_t numEventsInWaitlist, MapOperationType opType, MemObj *memObj, MemObjSizeArray ©Size, MemObjOffsetArray ©Offset, bool readOnly, EventBuilder &externalEventBuilder); MOCKABLE_VIRTUAL bool setupDebugSurface(Kernel *kernel); bool getRequiresCacheFlushAfterWalker() const { return requiresCacheFlushAfterWalker; } void updateBcsTaskCount(uint32_t newBcsTaskCount) { this->bcsTaskCount = newBcsTaskCount; } // taskCount of last task uint32_t taskCount = 0; // current taskLevel. Used for determining if a PIPE_CONTROL is needed. uint32_t taskLevel = 0; std::unique_ptr flushStamp; // virtual event that holds last Enqueue information Event *virtualEvent = nullptr; size_t estimateTimestampPacketNodesCount(const MultiDispatchInfo &dispatchInfo) const; uint64_t getSliceCount() const { return sliceCount; } uint64_t dispatchHints = 0; protected: void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet); cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest); void *enqueueMapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet); cl_int enqueueUnmapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest); virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType){}; bool isBlockedCommandStreamRequired(uint32_t commandType, const EventsRequest &eventsRequest, bool blockedQueue) const; MOCKABLE_VIRTUAL void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies); void processProperties(const cl_queue_properties *properties); bool bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandType, cl_bool blocking, size_t size, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList); void providePerformanceHint(TransferProperties &transferProperties); bool queueDependenciesClearRequired() const; bool blitEnqueueAllowed(cl_command_type cmdType) const; void aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo); virtual bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const = 0; Context *context = nullptr; ClDevice *device = nullptr; EngineControl *gpgpuEngine = nullptr; EngineControl *bcsEngine = nullptr; cl_command_queue_properties commandQueueProperties = 0; QueuePriority priority = QueuePriority::MEDIUM; QueueThrottle throttle = QueueThrottle::MEDIUM; uint64_t sliceCount = QueueSliceCount::defaultSliceCount; uint32_t bcsTaskCount = 0; bool perfCountersEnabled = false; LinearStream *commandStream = nullptr; bool isSpecialCommandQueue = false; bool requiresCacheFlushAfterWalker = false; std::unique_ptr timestampPacketContainer; }; using CommandQueueCreateFunc = CommandQueue *(*)(Context *context, ClDevice *device, const cl_queue_properties *properties, bool internalUsage); } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/command_queue_hw.h000066400000000000000000000612521363734646600264560ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/helpers/engine_control.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/helpers/queue_helpers.h" #include "opencl/source/mem_obj/mem_obj.h" #include "opencl/source/program/printf_handler.h" #include namespace NEO { class EventBuilder; struct EnqueueProperties; template class CommandQueueHw : public CommandQueue { using BaseClass = CommandQueue; public: CommandQueueHw(Context *context, ClDevice *device, const cl_queue_properties *properties, bool internalUsage) : BaseClass(context, device, properties) { auto clPriority = getCmdQueueProperties(properties, CL_QUEUE_PRIORITY_KHR); if (clPriority & static_cast(CL_QUEUE_PRIORITY_LOW_KHR)) { priority = QueuePriority::LOW; this->gpgpuEngine = &device->getDeviceById(0)->getEngine(HwHelperHw::lowPriorityEngineType, true); } else if (clPriority & static_cast(CL_QUEUE_PRIORITY_MED_KHR)) { priority = QueuePriority::MEDIUM; } else if (clPriority & static_cast(CL_QUEUE_PRIORITY_HIGH_KHR)) { priority = QueuePriority::HIGH; } auto clThrottle = getCmdQueueProperties(properties, CL_QUEUE_THROTTLE_KHR); if (clThrottle & static_cast(CL_QUEUE_THROTTLE_LOW_KHR)) { throttle = QueueThrottle::LOW; } else if (clThrottle & static_cast(CL_QUEUE_THROTTLE_MED_KHR)) { throttle = QueueThrottle::MEDIUM; } else if (clThrottle & static_cast(CL_QUEUE_THROTTLE_HIGH_KHR)) { throttle = QueueThrottle::HIGH; } if (internalUsage) { this->gpgpuEngine = &device->getInternalEngine(); } if (getCmdQueueProperties(properties, CL_QUEUE_PROPERTIES) & static_cast(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)) { getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch); if (DebugManager.flags.CsrDispatchMode.get() != 0) { getGpgpuCommandStreamReceiver().overrideDispatchPolicy(static_cast(DebugManager.flags.CsrDispatchMode.get())); } getGpgpuCommandStreamReceiver().enableNTo1SubmissionModel(); } uint64_t requestedSliceCount = getCmdQueueProperties(properties, CL_QUEUE_SLICE_COUNT_INTEL); if (requestedSliceCount > 0) { sliceCount = requestedSliceCount; } } static CommandQueue *create(Context *context, ClDevice *device, const cl_queue_properties *properties, bool internalUsage) { return new CommandQueueHw(context, device, properties, internalUsage); } MOCKABLE_VIRTUAL void notifyEnqueueReadBuffer(Buffer *buffer, bool blockingRead); MOCKABLE_VIRTUAL void notifyEnqueueReadImage(Image *image, bool blockingRead); cl_int enqueueBarrierWithWaitList(cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueCopyBuffer(Buffer *srcBuffer, Buffer *dstBuffer, size_t srcOffset, size_t dstOffset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueCopyBufferRect(Buffer *srcBuffer, Buffer *dstBuffer, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueCopyImage(Image *srcImage, Image *dstImage, const size_t srcOrigin[3], const size_t dstOrigin[3], const size_t region[3], cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueFillBuffer(Buffer *buffer, const void *pattern, size_t patternSize, size_t offset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueFillImage(Image *image, const void *fillColor, const size_t *origin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueKernel(cl_kernel kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *globalWorkSize, const size_t *localWorkSize, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueSVMMap(cl_bool blockingMap, cl_map_flags mapFlags, void *svmPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool externalAppCall) override; cl_int enqueueSVMUnmap(void *svmPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool externalAppCall) override; cl_int enqueueSVMFree(cl_uint numSvmPointers, void *svmPointers[], void(CL_CALLBACK *pfnFreeFunc)(cl_command_queue queue, cl_uint numSvmPointers, void *svmPointers[], void *userData), void *userData, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueSVMMemcpy(cl_bool blockingCopy, void *dstPtr, const void *srcPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueSVMMemFill(void *svmPtr, const void *pattern, size_t patternSize, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueMarkerWithWaitList(cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueMigrateMemObjects(cl_uint numMemObjects, const cl_mem *memObjects, cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueSVMMigrateMem(cl_uint numSvmPointers, const void **svmPointers, const size_t *sizes, const cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueReadBuffer(Buffer *buffer, cl_bool blockingRead, size_t offset, size_t size, void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueReadBufferRect(Buffer *buffer, cl_bool blockingRead, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueReadImage(Image *srcImage, cl_bool blockingRead, const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch, void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueWriteBuffer(Buffer *buffer, cl_bool blockingWrite, size_t offset, size_t cb, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueWriteBufferRect(Buffer *buffer, cl_bool blockingWrite, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, const void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueWriteImage(Image *dstImage, cl_bool blockingWrite, const size_t *origin, const size_t *region, size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueCopyBufferToImage(Buffer *srcBuffer, Image *dstImage, size_t srcOffset, const size_t *dstOrigin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueCopyImageToBuffer(Image *srcImage, Buffer *dstBuffer, const size_t *srcOrigin, const size_t *region, size_t dstOffset, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int enqueueResourceBarrier(BarrierCommand *resourceBarrier, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int finish() override; cl_int enqueueInitDispatchGlobals(DispatchGlobalsArgs *dispatchGlobalsArgs, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override; cl_int flush() override; template void enqueueHandler(Surface **surfacesForResidency, size_t numSurfaceForResidency, bool blocking, const MultiDispatchInfo &dispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); template void enqueueHandler(Surface *(&surfacesForResidency)[size], bool blocking, const MultiDispatchInfo &dispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { enqueueHandler(surfacesForResidency, size, blocking, dispatchInfo, numEventsInWaitList, eventWaitList, event); } template void enqueueHandler(Surface *(&surfacesForResidency)[size], bool blocking, Kernel *kernel, cl_uint workDim, const size_t globalOffsets[3], const size_t workItems[3], const size_t *localWorkSizesIn, const size_t *enqueuedWorkSizes, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); template CompletionStamp enqueueNonBlocked(Surface **surfacesForResidency, size_t surfaceCount, LinearStream &commandStream, size_t commandStreamStart, bool &blocking, const MultiDispatchInfo &multiDispatchInfo, const EnqueueProperties &enqueueProperties, TimestampPacketDependencies ×tampPacketDependencies, EventsRequest &eventsRequest, EventBuilder &eventBuilder, uint32_t taskLevel, PrintfHandler *printfHandler); void enqueueBlocked(uint32_t commandType, Surface **surfacesForResidency, size_t surfacesCount, const MultiDispatchInfo &multiDispatchInfo, TimestampPacketDependencies ×tampPacketDependencies, std::unique_ptr &blockedCommandsData, const EnqueueProperties &enqueueProperties, EventsRequest &eventsRequest, EventBuilder &externalEventBuilder, std::unique_ptr printfHandler); CompletionStamp enqueueCommandWithoutKernel(Surface **surfaces, size_t surfaceCount, LinearStream &commandStream, size_t commandStreamStart, bool &blocking, const EnqueueProperties &enqueueProperties, TimestampPacketDependencies ×tampPacketDependencies, EventsRequest &eventsRequest, EventBuilder &eventBuilder, uint32_t taskLevel); void processDispatchForCacheFlush(Surface **surfaces, size_t numSurfaces, LinearStream *commandStream, CsrDependencies &csrDeps); BlitProperties processDispatchForBlitEnqueue(const MultiDispatchInfo &multiDispatchInfo, TimestampPacketDependencies ×tampPacketDependencies, const EventsRequest &eventsRequest, LinearStream &commandStream, uint32_t commandType, bool queueBlocked); void submitCacheFlush(Surface **surfaces, size_t numSurfaces, LinearStream *commandStream, uint64_t postSyncAddress); bool isCacheFlushCommand(uint32_t commandType) const override; MOCKABLE_VIRTUAL bool isCacheFlushForBcsRequired() const; protected: MOCKABLE_VIRTUAL void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo){}; size_t calculateHostPtrSizeForImage(const size_t *region, size_t rowPitch, size_t slicePitch, Image *image); cl_int enqueueReadWriteBufferOnCpuWithMemoryTransfer(cl_command_type commandType, Buffer *buffer, size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int enqueueReadWriteBufferOnCpuWithoutMemoryTransfer(cl_command_type commandType, Buffer *buffer, size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); cl_int enqueueMarkerForReadWriteOperation(MemObj *memObj, void *ptr, cl_command_type commandType, cl_bool blocking, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); MOCKABLE_VIRTUAL void dispatchAuxTranslationBuiltin(MultiDispatchInfo &multiDispatchInfo, AuxTranslationDirection auxTranslationDirection); void setupBlitAuxTranslation(MultiDispatchInfo &multiDispatchInfo); MOCKABLE_VIRTUAL bool forceStateless(size_t size); template LinearStream *obtainCommandStream(const CsrDependencies &csrDependencies, bool blitEnqueue, bool blockedQueue, const MultiDispatchInfo &multiDispatchInfo, const EventsRequest &eventsRequest, std::unique_ptr &blockedCommandsData, Surface **surfaces, size_t numSurfaces) { LinearStream *commandStream = nullptr; bool profilingRequired = (this->isProfilingEnabled() && eventsRequest.outEvent); bool perfCountersRequired = (this->isPerfCountersEnabled() && eventsRequest.outEvent); if (isBlockedCommandStreamRequired(commandType, eventsRequest, blockedQueue)) { constexpr size_t additionalAllocationSize = CSRequirements::csOverfetchSize; constexpr size_t allocationSize = MemoryConstants::pageSize64k - CSRequirements::csOverfetchSize; commandStream = new LinearStream(); auto &gpgpuCsr = getGpgpuCommandStreamReceiver(); gpgpuCsr.ensureCommandBufferAllocation(*commandStream, allocationSize, additionalAllocationSize); blockedCommandsData = std::make_unique(commandStream, *gpgpuCsr.getInternalAllocationStorage()); } else { commandStream = &getCommandStream(*this, csrDependencies, profilingRequired, perfCountersRequired, blitEnqueue, multiDispatchInfo, surfaces, numSurfaces); } return commandStream; } void processDispatchForBlitAuxTranslation(const MultiDispatchInfo &multiDispatchInfo, BlitPropertiesContainer &blitPropertiesContainer, TimestampPacketDependencies ×tampPacketDependencies, const EventsRequest &eventsRequest, bool queueBlocked); bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const override; bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType); void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) override; void forceDispatchScheduler(NEO::MultiDispatchInfo &multiDispatchInfo); void runSchedulerSimulation(DeviceQueueHw &devQueueHw, Kernel &parentKernel); static void computeOffsetsValueForRectCommands(size_t *bufferOffset, size_t *hostOffset, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch); void processDeviceEnqueue(DeviceQueueHw *devQueueHw, const MultiDispatchInfo &multiDispatchInfo, TagNode *hwTimeStamps, bool &blocking); template void processDispatchForKernels(const MultiDispatchInfo &multiDispatchInfo, std::unique_ptr &printfHandler, Event *event, TagNode *&hwTimeStamps, bool blockQueue, DeviceQueueHw *devQueueHw, CsrDependencies &csrDeps, KernelOperation *blockedCommandsData, TimestampPacketDependencies ×tampPacketDependencies); }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/command_queue_hw_base.inl000066400000000000000000000154241363734646600300030ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/blit_commands_helper.h" #include "opencl/source/built_ins/aux_translation_builtin.h" #include "opencl/source/command_queue/enqueue_barrier.h" #include "opencl/source/command_queue/enqueue_copy_buffer.h" #include "opencl/source/command_queue/enqueue_copy_buffer_rect.h" #include "opencl/source/command_queue/enqueue_copy_buffer_to_image.h" #include "opencl/source/command_queue/enqueue_copy_image.h" #include "opencl/source/command_queue/enqueue_copy_image_to_buffer.h" #include "opencl/source/command_queue/enqueue_fill_buffer.h" #include "opencl/source/command_queue/enqueue_fill_image.h" #include "opencl/source/command_queue/enqueue_kernel.h" #include "opencl/source/command_queue/enqueue_marker.h" #include "opencl/source/command_queue/enqueue_migrate_mem_objects.h" #include "opencl/source/command_queue/enqueue_read_buffer.h" #include "opencl/source/command_queue/enqueue_read_buffer_rect.h" #include "opencl/source/command_queue/enqueue_read_image.h" #include "opencl/source/command_queue/enqueue_svm.h" #include "opencl/source/command_queue/enqueue_write_buffer.h" #include "opencl/source/command_queue/enqueue_write_buffer_rect.h" #include "opencl/source/command_queue/enqueue_write_image.h" #include "opencl/source/command_queue/finish.h" #include "opencl/source/command_queue/flush.h" #include "opencl/source/command_queue/gpgpu_walker.h" namespace NEO { template void CommandQueueHw::notifyEnqueueReadBuffer(Buffer *buffer, bool blockingRead) { if (DebugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.get()) { buffer->getGraphicsAllocation()->setAllocDumpable(blockingRead); buffer->forceDisallowCPUCopy = blockingRead; } } template void CommandQueueHw::notifyEnqueueReadImage(Image *image, bool blockingRead) { if (DebugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.get()) { image->getGraphicsAllocation()->setAllocDumpable(blockingRead); } } template cl_int CommandQueueHw::enqueueReadWriteBufferOnCpuWithMemoryTransfer(cl_command_type commandType, Buffer *buffer, size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { cl_int retVal = CL_SUCCESS; EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event); TransferProperties transferProperties(buffer, commandType, 0, true, &offset, &size, ptr, true); cpuDataTransferHandler(transferProperties, eventsRequest, retVal); return retVal; } template cl_int CommandQueueHw::enqueueReadWriteBufferOnCpuWithoutMemoryTransfer(cl_command_type commandType, Buffer *buffer, size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { cl_int retVal = CL_SUCCESS; EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event); TransferProperties transferProperties(buffer, CL_COMMAND_MARKER, 0, true, &offset, &size, ptr, false); cpuDataTransferHandler(transferProperties, eventsRequest, retVal); if (event) { auto pEvent = castToObjectOrAbort(*event); pEvent->setCmdType(commandType); } if (context->isProvidingPerformanceHints()) { context->providePerformanceHintForMemoryTransfer(commandType, false, static_cast(buffer), ptr); } return retVal; } template cl_int CommandQueueHw::enqueueMarkerForReadWriteOperation(MemObj *memObj, void *ptr, cl_command_type commandType, cl_bool blocking, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { MultiDispatchInfo multiDispatchInfo; NullSurface s; Surface *surfaces[] = {&s}; enqueueHandler( surfaces, blocking == CL_TRUE, multiDispatchInfo, numEventsInWaitList, eventWaitList, event); if (event) { auto pEvent = castToObjectOrAbort(*event); pEvent->setCmdType(commandType); } if (context->isProvidingPerformanceHints()) { context->providePerformanceHintForMemoryTransfer(commandType, false, static_cast(memObj), ptr); } return CL_SUCCESS; } template void CommandQueueHw::dispatchAuxTranslationBuiltin(MultiDispatchInfo &multiDispatchInfo, AuxTranslationDirection auxTranslationDirection) { if (HwHelperHw::getAuxTranslationMode() != AuxTranslationMode::Builtin) { return; } auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getDevice()); auto &auxTranslationBuilder = static_cast &>(builder); BuiltinOpParams dispatchParams; dispatchParams.auxTranslationDirection = auxTranslationDirection; auxTranslationBuilder.buildDispatchInfosForAuxTranslation(multiDispatchInfo, dispatchParams); } template bool CommandQueueHw::forceStateless(size_t size) { return size >= 4ull * MemoryConstants::gigaByte; } template bool CommandQueueHw::isCacheFlushForBcsRequired() const { return true; } template void CommandQueueHw::setupBlitAuxTranslation(MultiDispatchInfo &multiDispatchInfo) { multiDispatchInfo.begin()->dispatchInitCommands.registerMethod( TimestampPacketHelper::programSemaphoreWithImplicitDependencyForAuxTranslation); multiDispatchInfo.begin()->dispatchInitCommands.registerCommandsSizeEstimationMethod( TimestampPacketHelper::getRequiredCmdStreamSizeForAuxTranslationNodeDependency); multiDispatchInfo.rbegin()->dispatchEpilogueCommands.registerMethod( TimestampPacketHelper::programSemaphoreWithImplicitDependencyForAuxTranslation); multiDispatchInfo.rbegin()->dispatchEpilogueCommands.registerCommandsSizeEstimationMethod( TimestampPacketHelper::getRequiredCmdStreamSizeForAuxTranslationNodeDependency); } template bool CommandQueueHw::obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const { return isCacheFlushRequired; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/command_queue_hw_bdw_plus.inl000066400000000000000000000030161363734646600307020ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue_hw_base.inl" namespace NEO { template void CommandQueueHw::runSchedulerSimulation(DeviceQueueHw &devQueueHw, Kernel &parentKernel) { BuiltinKernelsSimulation::SchedulerSimulation simulation; simulation.runSchedulerSimulation(devQueueHw.getQueueBuffer(), devQueueHw.getStackBuffer(), devQueueHw.getEventPoolBuffer(), devQueueHw.getSlbBuffer(), devQueueHw.getDshBuffer(), parentKernel.getKernelReflectionSurface(), devQueueHw.getQueueStorageBuffer(), this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation(), devQueueHw.getDebugQueue()); } template void CommandQueueHw::submitCacheFlush(Surface **surfaces, size_t numSurfaces, LinearStream *commandStream, uint64_t postSyncAddress) { } template bool CommandQueueHw::isCacheFlushCommand(uint32_t commandType) const { return false; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/cpu_data_transfer_handler.cpp000066400000000000000000000207321363734646600306500ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/helpers/get_info.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/event/event.h" #include "opencl/source/event/event_builder.h" #include "opencl/source/helpers/mipmap.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" namespace NEO { void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &retVal) { MapInfo unmapInfo; Event *outEventObj = nullptr; void *returnPtr = nullptr; EventBuilder eventBuilder; bool eventCompleted = false; bool mapOperation = transferProperties.cmdType == CL_COMMAND_MAP_BUFFER || transferProperties.cmdType == CL_COMMAND_MAP_IMAGE; ErrorCodeHelper err(&retVal, CL_SUCCESS); if (mapOperation) { returnPtr = ptrOffset(transferProperties.memObj->getCpuAddressForMapping(), transferProperties.memObj->calculateOffsetForMapping(transferProperties.offset) + transferProperties.mipPtrOffset); if (!transferProperties.memObj->addMappedPtr(returnPtr, transferProperties.memObj->calculateMappedPtrLength(transferProperties.size), transferProperties.mapFlags, transferProperties.size, transferProperties.offset, transferProperties.mipLevel)) { err.set(CL_INVALID_OPERATION); return nullptr; } } else if (transferProperties.cmdType == CL_COMMAND_UNMAP_MEM_OBJECT) { if (!transferProperties.memObj->findMappedPtr(transferProperties.ptr, unmapInfo)) { err.set(CL_INVALID_VALUE); return nullptr; } transferProperties.memObj->removeMappedPtr(unmapInfo.ptr); } if (eventsRequest.outEvent) { eventBuilder.create(this, transferProperties.cmdType, CompletionStamp::levelNotReady, CompletionStamp::levelNotReady); outEventObj = eventBuilder.getEvent(); outEventObj->setQueueTimeStamp(); outEventObj->setCPUProfilingPath(true); *eventsRequest.outEvent = outEventObj; } auto commandStreamReceieverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership(); TakeOwnershipWrapper queueOwnership(*this); auto blockQueue = false; auto taskLevel = 0u; obtainTaskLevelAndBlockedStatus(taskLevel, eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, blockQueue, transferProperties.cmdType); DBG_LOG(LogTaskCounts, __FUNCTION__, "taskLevel", taskLevel); if (outEventObj) { outEventObj->taskLevel = taskLevel; } if (blockQueue && (transferProperties.cmdType == CL_COMMAND_MAP_BUFFER || transferProperties.cmdType == CL_COMMAND_MAP_IMAGE || transferProperties.cmdType == CL_COMMAND_UNMAP_MEM_OBJECT)) { // Pass size and offset only. Unblocked command will call transferData(size, offset) method enqueueBlockedMapUnmapOperation(eventsRequest.eventWaitList, static_cast(eventsRequest.numEventsInWaitList), mapOperation ? MAP : UNMAP, transferProperties.memObj, mapOperation ? transferProperties.size : unmapInfo.size, mapOperation ? transferProperties.offset : unmapInfo.offset, mapOperation ? transferProperties.mapFlags == CL_MAP_READ : unmapInfo.readOnly, eventBuilder); } queueOwnership.unlock(); commandStreamReceieverOwnership.unlock(); // read/write buffers are always blocking if (!blockQueue || transferProperties.blocking) { err.set(Event::waitForEvents(eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList)); bool modifySimulationFlags = false; if (outEventObj) { outEventObj->setSubmitTimeStamp(); } //wait for the completness of previous commands if (transferProperties.cmdType != CL_COMMAND_UNMAP_MEM_OBJECT) { if (!transferProperties.memObj->isMemObjZeroCopy() || transferProperties.blocking) { finish(); eventCompleted = true; } } if (outEventObj) { outEventObj->setStartTimeStamp(); } UNRECOVERABLE_IF((transferProperties.memObj->isMemObjZeroCopy() == false) && isMipMapped(transferProperties.memObj)); switch (transferProperties.cmdType) { case CL_COMMAND_MAP_BUFFER: if (!transferProperties.memObj->isMemObjZeroCopy()) { transferProperties.memObj->transferDataToHostPtr(transferProperties.size, transferProperties.offset); eventCompleted = true; } break; case CL_COMMAND_MAP_IMAGE: if (!transferProperties.memObj->isMemObjZeroCopy()) { transferProperties.memObj->transferDataToHostPtr(transferProperties.size, transferProperties.offset); eventCompleted = true; } break; case CL_COMMAND_UNMAP_MEM_OBJECT: if (!transferProperties.memObj->isMemObjZeroCopy()) { if (!unmapInfo.readOnly) { transferProperties.memObj->transferDataFromHostPtr(unmapInfo.size, unmapInfo.offset); } eventCompleted = true; } if (!unmapInfo.readOnly) { modifySimulationFlags = true; } break; case CL_COMMAND_READ_BUFFER: memcpy_s(transferProperties.ptr, transferProperties.size[0], transferProperties.getCpuPtrForReadWrite(), transferProperties.size[0]); eventCompleted = true; break; case CL_COMMAND_WRITE_BUFFER: memcpy_s(transferProperties.getCpuPtrForReadWrite(), transferProperties.size[0], transferProperties.ptr, transferProperties.size[0]); eventCompleted = true; modifySimulationFlags = true; break; case CL_COMMAND_MARKER: break; default: err.set(CL_INVALID_OPERATION); } if (outEventObj) { outEventObj->setEndTimeStamp(); outEventObj->updateTaskCount(this->taskCount); outEventObj->flushStamp->replaceStampObject(this->flushStamp->getStampReference()); if (eventCompleted) { outEventObj->setStatus(CL_COMPLETE); } else { outEventObj->updateExecutionStatus(); } } if (modifySimulationFlags) { auto graphicsAllocation = transferProperties.memObj->getGraphicsAllocation(); graphicsAllocation->setAubWritable(true, GraphicsAllocation::defaultBank); graphicsAllocation->setTbxWritable(true, GraphicsAllocation::defaultBank); } } if (context->isProvidingPerformanceHints()) { providePerformanceHint(transferProperties); } return returnPtr; // only map returns pointer } void CommandQueue::providePerformanceHint(TransferProperties &transferProperties) { switch (transferProperties.cmdType) { case CL_COMMAND_MAP_BUFFER: case CL_COMMAND_MAP_IMAGE: context->providePerformanceHintForMemoryTransfer(transferProperties.cmdType, !transferProperties.memObj->isMemObjZeroCopy(), static_cast(transferProperties.memObj)); break; case CL_COMMAND_UNMAP_MEM_OBJECT: if (!transferProperties.memObj->isMemObjZeroCopy()) { context->providePerformanceHintForMemoryTransfer(transferProperties.cmdType, true, transferProperties.ptr, static_cast(transferProperties.memObj)); break; } context->providePerformanceHintForMemoryTransfer(transferProperties.cmdType, false, transferProperties.ptr); break; case CL_COMMAND_READ_BUFFER: case CL_COMMAND_WRITE_BUFFER: context->providePerformanceHintForMemoryTransfer(transferProperties.cmdType, true, static_cast(transferProperties.memObj), transferProperties.ptr); break; } } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/definitions/000077500000000000000000000000001363734646600252725ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/command_queue/definitions/enqueue_init_dispatch_globals.h000066400000000000000000000012221363734646600335140ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/command_queue/command_queue_hw.h" namespace NEO { struct DispatchGlobalsArgs { }; template cl_int CommandQueueHw::enqueueInitDispatchGlobals(DispatchGlobalsArgs *dispatchGlobalsArgs, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { return CL_INVALID_VALUE; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/enqueue_barrier.h000066400000000000000000000017331363734646600263110ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/memory_manager/surface.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/event/event.h" #include namespace NEO { template cl_int CommandQueueHw::enqueueBarrierWithWaitList( cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { NullSurface s; Surface *surfaces[] = {&s}; enqueueHandler(surfaces, false, MultiDispatchInfo(), numEventsInWaitList, eventWaitList, event); return CL_SUCCESS; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/enqueue_common.h000066400000000000000000001431611363734646600261550ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/array_count.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/program/sync_buffer_handler.h" #include "shared/source/utilities/range.h" #include "shared/source/utilities/tag_allocator.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/command_queue/hardware_interface.h" #include "opencl/source/event/event_builder.h" #include "opencl/source/event/user_event.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/helpers/cl_blit_properties.h" #include "opencl/source/helpers/dispatch_info_builder.h" #include "opencl/source/helpers/enqueue_properties.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/task_information.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/program/block_kernel_manager.h" #include "opencl/source/program/printf_handler.h" #include #include namespace NEO { template template void CommandQueueHw::enqueueHandler(Surface *(&surfaces)[surfaceCount], bool blocking, Kernel *kernel, cl_uint workDim, const size_t globalOffsets[3], const size_t workItems[3], const size_t *localWorkSizesIn, const size_t *enqueuedWorkSizes, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { BuiltInOwnershipWrapper builtInLock; MemObjsForAuxTranslation memObjsForAuxTranslation; MultiDispatchInfo multiDispatchInfo(kernel); if (DebugManager.flags.ForceDispatchScheduler.get()) { forceDispatchScheduler(multiDispatchInfo); } else { if (kernel->isAuxTranslationRequired()) { auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getDevice()); builtInLock.takeOwnership(builder, this->context); kernel->fillWithBuffersForAuxTranslation(memObjsForAuxTranslation); multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation); if (!memObjsForAuxTranslation.empty()) { dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::AuxToNonAux); } } if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) { DispatchInfoBuilder builder; builder.setDispatchGeometry(workDim, workItems, enqueuedWorkSizes, globalOffsets, Vec3{0, 0, 0}, localWorkSizesIn); builder.setKernel(kernel); builder.bake(multiDispatchInfo); } else { auto builder = kernel->getKernelInfo().builtinDispatchBuilder; builder->buildDispatchInfos(multiDispatchInfo, kernel, workDim, workItems, enqueuedWorkSizes, globalOffsets); if (multiDispatchInfo.size() == 0) { return; } } if (kernel->isAuxTranslationRequired()) { if (!memObjsForAuxTranslation.empty()) { UNRECOVERABLE_IF(kernel->isParentKernel); dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::NonAuxToAux); } } } if (HwHelperHw::isBlitAuxTranslationRequired(device->getHardwareInfo(), multiDispatchInfo)) { setupBlitAuxTranslation(multiDispatchInfo); } enqueueHandler(surfaces, blocking, multiDispatchInfo, numEventsInWaitList, eventWaitList, event); } template void CommandQueueHw::forceDispatchScheduler(NEO::MultiDispatchInfo &multiDispatchInfo) { SchedulerKernel &scheduler = getContext().getSchedulerKernel(); DispatchInfo dispatchInfo(&scheduler, 1, Vec3(scheduler.getGws(), 1, 1), Vec3(scheduler.getLws(), 1, 1), Vec3(0, 0, 0)); auto devQueue = this->getContext().getDefaultDeviceQueue(); DeviceQueueHw *devQueueHw = castToObjectOrAbort>(devQueue); scheduler.createReflectionSurface(); GraphicsAllocation *reflectionSurface = scheduler.getKernelReflectionSurface(); devQueueHw->resetDeviceQueue(); scheduler.setArgs(devQueueHw->getQueueBuffer(), devQueueHw->getStackBuffer(), devQueueHw->getEventPoolBuffer(), devQueueHw->getSlbBuffer(), devQueueHw->getDshBuffer(), reflectionSurface, devQueueHw->getQueueStorageBuffer(), this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation()); multiDispatchInfo.push(dispatchInfo); } template template void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, size_t numSurfaceForResidency, bool blocking, const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { if (multiDispatchInfo.empty() && !isCommandWithoutKernel(commandType)) { enqueueHandler(surfacesForResidency, numSurfaceForResidency, blocking, multiDispatchInfo, numEventsInWaitList, eventWaitList, event); if (event) { castToObjectOrAbort(*event)->setCmdType(commandType); } return; } Kernel *parentKernel = multiDispatchInfo.peekParentKernel(); auto devQueue = this->getContext().getDefaultDeviceQueue(); DeviceQueueHw *devQueueHw = castToObject>(devQueue); auto clearAllDependencies = queueDependenciesClearRequired(); TagNode *hwTimeStamps = nullptr; auto commandStreamRecieverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership(); TimeStampData queueTimeStamp; if (isProfilingEnabled() && event) { this->getDevice().getOSTime()->getCpuGpuTime(&queueTimeStamp); } EventBuilder eventBuilder; if (event) { eventBuilder.create(this, commandType, CompletionStamp::levelNotReady, 0); *event = eventBuilder.getEvent(); if (eventBuilder.getEvent()->isProfilingEnabled()) { eventBuilder.getEvent()->setQueueTimeStamp(&queueTimeStamp); if (isCommandWithoutKernel(commandType)) { eventBuilder.getEvent()->setCPUProfilingPath(true); eventBuilder.getEvent()->setQueueTimeStamp(); } } DBG_LOG(EventsDebugEnable, "enqueueHandler commandType", commandType, "output Event", eventBuilder.getEvent()); } std::unique_ptr blockedCommandsData; std::unique_ptr printfHandler; TakeOwnershipWrapper> queueOwnership(*this); auto blockQueue = false; auto taskLevel = 0u; obtainTaskLevelAndBlockedStatus(taskLevel, numEventsInWaitList, eventWaitList, blockQueue, commandType); bool blitEnqueue = blitEnqueueAllowed(commandType); DBG_LOG(EventsDebugEnable, "blockQueue", blockQueue, "virtualEvent", virtualEvent, "taskLevel", taskLevel); if (parentKernel && !blockQueue) { while (!devQueueHw->isEMCriticalSectionFree()) ; } enqueueHandlerHook(commandType, multiDispatchInfo); aubCaptureHook(blocking, clearAllDependencies, multiDispatchInfo); if (DebugManager.flags.MakeEachEnqueueBlocking.get()) { blocking = true; } TimestampPacketDependencies timestampPacketDependencies; EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event); CsrDependencies csrDeps; BlitPropertiesContainer blitPropertiesContainer; bool enqueueWithBlitAuxTranslation = HwHelperHw::isBlitAuxTranslationRequired(device->getHardwareInfo(), multiDispatchInfo); if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { eventsRequest.fillCsrDependencies(csrDeps, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr); auto allocator = getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(); size_t nodesCount = 0u; if (blitEnqueue || isCacheFlushCommand(commandType)) { nodesCount = 1; } else if (!multiDispatchInfo.empty()) { nodesCount = estimateTimestampPacketNodesCount(multiDispatchInfo); } if (isCacheFlushForBcsRequired() && (blitEnqueue || enqueueWithBlitAuxTranslation)) { timestampPacketDependencies.cacheFlushNodes.add(allocator->getTag()); } if (blitEnqueue && !blockQueue && getGpgpuCommandStreamReceiver().isStallingPipeControlOnNextFlushRequired()) { timestampPacketDependencies.barrierNodes.add(allocator->getTag()); } if (nodesCount > 0) { obtainNewTimestampPacketNodes(nodesCount, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies); csrDeps.push_back(×tampPacketDependencies.previousEnqueueNodes); } } auto &commandStream = *obtainCommandStream(csrDeps, blitEnqueue, blockQueue, multiDispatchInfo, eventsRequest, blockedCommandsData, surfacesForResidency, numSurfaceForResidency); auto commandStreamStart = commandStream.getUsed(); if (HwHelperHw::isBlitAuxTranslationRequired(device->getHardwareInfo(), multiDispatchInfo)) { processDispatchForBlitAuxTranslation(multiDispatchInfo, blitPropertiesContainer, timestampPacketDependencies, eventsRequest, blockQueue); } if (eventBuilder.getEvent() && getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer); eventBuilder.getEvent()->addTimestampPacketNodes(timestampPacketDependencies.nonAuxToAuxNodes); } bool flushDependenciesForNonKernelCommand = false; if (blitEnqueue) { blitPropertiesContainer.push_back(processDispatchForBlitEnqueue(multiDispatchInfo, timestampPacketDependencies, eventsRequest, commandStream, commandType, blockQueue)); } else if (multiDispatchInfo.empty() == false) { processDispatchForKernels(multiDispatchInfo, printfHandler, eventBuilder.getEvent(), hwTimeStamps, blockQueue, devQueueHw, csrDeps, blockedCommandsData.get(), timestampPacketDependencies); } else if (isCacheFlushCommand(commandType)) { processDispatchForCacheFlush(surfacesForResidency, numSurfaceForResidency, &commandStream, csrDeps); } else if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { if (CL_COMMAND_BARRIER == commandType) { getGpgpuCommandStreamReceiver().requestStallingPipeControlOnNextFlush(); } for (size_t i = 0; i < eventsRequest.numEventsInWaitList; i++) { auto waitlistEvent = castToObjectOrAbort(eventsRequest.eventWaitList[i]); if (waitlistEvent->getTimestampPacketNodes()) { flushDependenciesForNonKernelCommand = true; if (eventBuilder.getEvent()) { eventBuilder.getEvent()->addTimestampPacketNodes(*waitlistEvent->getTimestampPacketNodes()); } } } if (flushDependenciesForNonKernelCommand) { TimestampPacketHelper::programCsrDependencies(commandStream, csrDeps); } } CompletionStamp completionStamp = {CompletionStamp::levelNotReady, taskLevel, 0}; const EnqueueProperties enqueueProperties(blitEnqueue, !multiDispatchInfo.empty(), isCacheFlushCommand(commandType), flushDependenciesForNonKernelCommand, &blitPropertiesContainer); if (!blockQueue) { csrDeps.makeResident(getGpgpuCommandStreamReceiver()); if (parentKernel) { processDeviceEnqueue(devQueueHw, multiDispatchInfo, hwTimeStamps, blocking); } if (enqueueProperties.operation == EnqueueProperties::Operation::GpuKernel) { completionStamp = enqueueNonBlocked( surfacesForResidency, numSurfaceForResidency, commandStream, commandStreamStart, blocking, multiDispatchInfo, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, taskLevel, printfHandler.get()); if (parentKernel) { getGpgpuCommandStreamReceiver().setMediaVFEStateDirty(true); if (devQueueHw->getSchedulerReturnInstance() > 0) { waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp, false); this->runSchedulerSimulation(*devQueueHw, *parentKernel); } } } else if (enqueueProperties.isFlushWithoutKernelRequired()) { completionStamp = enqueueCommandWithoutKernel( surfacesForResidency, numSurfaceForResidency, commandStream, commandStreamStart, blocking, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, taskLevel); } else { UNRECOVERABLE_IF(enqueueProperties.operation != EnqueueProperties::Operation::EnqueueWithoutSubmission); auto maxTaskCount = this->taskCount; for (auto eventId = 0u; eventId < numEventsInWaitList; eventId++) { auto event = castToObject(eventWaitList[eventId]); if (!event->isUserEvent() && !event->isExternallySynchronized()) { maxTaskCount = std::max(maxTaskCount, event->peekTaskCount()); } } //inherit data from event_wait_list and previous packets completionStamp.flushStamp = this->flushStamp->peekStamp(); completionStamp.taskCount = maxTaskCount; completionStamp.taskLevel = taskLevel; if (eventBuilder.getEvent() && isProfilingEnabled()) { TimeStampData submitTimeStamp; this->getDevice().getOSTime()->getCpuGpuTime(&submitTimeStamp); eventBuilder.getEvent()->setSubmitTimeStamp(&submitTimeStamp); eventBuilder.getEvent()->setSubmitTimeStamp(); eventBuilder.getEvent()->setStartTimeStamp(); } } if (eventBuilder.getEvent()) { eventBuilder.getEvent()->flushStamp->replaceStampObject(this->flushStamp->getStampReference()); } } updateFromCompletionStamp(completionStamp); if (eventBuilder.getEvent()) { eventBuilder.getEvent()->updateCompletionStamp(completionStamp.taskCount, completionStamp.taskLevel, completionStamp.flushStamp); FileLoggerInstance().log(DebugManager.flags.EventsDebugEnable.get(), "updateCompletionStamp Event", eventBuilder.getEvent(), "taskLevel", eventBuilder.getEvent()->taskLevel.load()); } if (blockQueue) { if (parentKernel) { size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); blockedCommandsData->surfaceStateHeapSizeEM = minSizeSSHForEM; } enqueueBlocked(commandType, surfacesForResidency, numSurfaceForResidency, multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueueProperties, eventsRequest, eventBuilder, std::move(printfHandler)); } queueOwnership.unlock(); commandStreamRecieverOwnership.unlock(); if (blocking) { if (blockQueue) { while (isQueueBlocked()) { } waitUntilComplete(taskCount, flushStamp->peekStamp(), false); } else { waitUntilComplete(taskCount, flushStamp->peekStamp(), false); if (printfHandler) { printfHandler->printEnqueueOutput(); } } } } template template void CommandQueueHw::processDispatchForKernels(const MultiDispatchInfo &multiDispatchInfo, std::unique_ptr &printfHandler, Event *event, TagNode *&hwTimeStamps, bool blockQueue, DeviceQueueHw *devQueueHw, CsrDependencies &csrDeps, KernelOperation *blockedCommandsData, TimestampPacketDependencies ×tampPacketDependencies) { TagNode *hwPerfCounter = nullptr; FileLoggerInstance().dumpKernelArgs(&multiDispatchInfo); printfHandler.reset(PrintfHandler::create(multiDispatchInfo, *device)); if (printfHandler) { printfHandler->prepareDispatch(multiDispatchInfo); } if (commandType == CL_COMMAND_NDRANGE_KERNEL) { if (multiDispatchInfo.peekMainKernel()->getProgram()->isKernelDebugEnabled()) { setupDebugSurface(multiDispatchInfo.peekMainKernel()); } } if (event && this->isProfilingEnabled()) { // Get allocation for timestamps hwTimeStamps = event->getHwTimeStampNode(); } if (auto parentKernel = multiDispatchInfo.peekParentKernel()) { parentKernel->createReflectionSurface(); parentKernel->patchDefaultDeviceQueue(context->getDefaultDeviceQueue()); parentKernel->patchEventPool(context->getDefaultDeviceQueue()); parentKernel->patchReflectionSurface(context->getDefaultDeviceQueue(), printfHandler.get()); if (!blockQueue) { devQueueHw->resetDeviceQueue(); devQueueHw->acquireEMCriticalSection(); } } if (event && this->isPerfCountersEnabled()) { hwPerfCounter = event->getHwPerfCounterNode(); } HardwareInterface::dispatchWalker( *this, multiDispatchInfo, csrDeps, blockedCommandsData, hwTimeStamps, hwPerfCounter, ×tampPacketDependencies, timestampPacketContainer.get(), commandType); if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { for (auto &dispatchInfo : multiDispatchInfo) { for (auto &patchInfoData : dispatchInfo.getKernel()->getPatchInfoDataList()) { getGpgpuCommandStreamReceiver().getFlatBatchBufferHelper().setPatchInfoData(patchInfoData); } } } getGpgpuCommandStreamReceiver().setRequiredScratchSizes(multiDispatchInfo.getRequiredScratchSize(), multiDispatchInfo.getRequiredPrivateScratchSize()); } template BlitProperties CommandQueueHw::processDispatchForBlitEnqueue(const MultiDispatchInfo &multiDispatchInfo, TimestampPacketDependencies ×tampPacketDependencies, const EventsRequest &eventsRequest, LinearStream &commandStream, uint32_t commandType, bool queueBlocked) { auto blitDirection = ClBlitProperties::obtainBlitDirection(commandType); auto blitCommandStreamReceiver = getBcsCommandStreamReceiver(); auto blitProperties = ClBlitProperties::constructProperties(blitDirection, *blitCommandStreamReceiver, multiDispatchInfo.peekBuiltinOpParams()); if (!queueBlocked) { eventsRequest.fillCsrDependencies(blitProperties.csrDependencies, *blitCommandStreamReceiver, CsrDependencies::DependenciesType::All); blitProperties.csrDependencies.push_back(×tampPacketDependencies.cacheFlushNodes); blitProperties.csrDependencies.push_back(×tampPacketDependencies.previousEnqueueNodes); blitProperties.csrDependencies.push_back(×tampPacketDependencies.barrierNodes); } auto currentTimestampPacketNode = timestampPacketContainer->peekNodes().at(0); blitProperties.outputTimestampPacket = currentTimestampPacketNode; if (isCacheFlushForBcsRequired()) { auto cacheFlushTimestampPacketGpuAddress = timestampPacketDependencies.cacheFlushNodes.peekNodes()[0]->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( commandStream, GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, cacheFlushTimestampPacketGpuAddress, 0, true, device->getHardwareInfo()); } TimestampPacketHelper::programSemaphoreWithImplicitDependency(commandStream, *currentTimestampPacketNode); return blitProperties; } template void CommandQueueHw::processDispatchForBlitAuxTranslation(const MultiDispatchInfo &multiDispatchInfo, BlitPropertiesContainer &blitPropertiesContainer, TimestampPacketDependencies ×tampPacketDependencies, const EventsRequest &eventsRequest, bool queueBlocked) { auto nodesAllocator = getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(); auto numBuffers = multiDispatchInfo.getMemObjsForAuxTranslation()->size(); blitPropertiesContainer.resize(numBuffers * 2); auto bufferIndex = 0; for (auto &buffer : *multiDispatchInfo.getMemObjsForAuxTranslation()) { { // Aux to NonAux blitPropertiesContainer[bufferIndex] = BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection::AuxToNonAux, buffer->getGraphicsAllocation()); auto auxToNonAuxNode = nodesAllocator->getTag(); timestampPacketDependencies.auxToNonAuxNodes.add(auxToNonAuxNode); } { // NonAux to Aux blitPropertiesContainer[bufferIndex + numBuffers] = BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection::NonAuxToAux, buffer->getGraphicsAllocation()); auto nonAuxToAuxNode = nodesAllocator->getTag(); timestampPacketDependencies.nonAuxToAuxNodes.add(nonAuxToAuxNode); } bufferIndex++; } if (!queueBlocked) { CsrDependencies csrDeps; eventsRequest.fillCsrDependencies(csrDeps, *getBcsCommandStreamReceiver(), CsrDependencies::DependenciesType::All); BlitProperties::setupDependenciesForAuxTranslation(blitPropertiesContainer, timestampPacketDependencies, *this->timestampPacketContainer, csrDeps, getGpgpuCommandStreamReceiver(), *getBcsCommandStreamReceiver()); } } template void CommandQueueHw::processDispatchForCacheFlush(Surface **surfaces, size_t numSurfaces, LinearStream *commandStream, CsrDependencies &csrDeps) { TimestampPacketHelper::programCsrDependencies(*commandStream, csrDeps); uint64_t postSyncAddress = 0; if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { auto timestampPacketNodeForPostSync = timestampPacketContainer->peekNodes().at(0); postSyncAddress = timestampPacketNodeForPostSync->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); } submitCacheFlush(surfaces, numSurfaces, commandStream, postSyncAddress); } template void CommandQueueHw::processDeviceEnqueue(DeviceQueueHw *devQueueHw, const MultiDispatchInfo &multiDispatchInfo, TagNode *hwTimeStamps, bool &blocking) { auto parentKernel = multiDispatchInfo.peekParentKernel(); size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); bool isCcsUsed = EngineHelpers::isCcs(gpgpuEngine->osContext->getEngineType()); uint32_t taskCount = getGpgpuCommandStreamReceiver().peekTaskCount() + 1; devQueueHw->setupExecutionModelDispatch(getIndirectHeap(IndirectHeap::SURFACE_STATE, minSizeSSHForEM), *devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE), parentKernel, (uint32_t)multiDispatchInfo.size(), getGpgpuCommandStreamReceiver().getTagAllocation()->getGpuAddress(), taskCount, hwTimeStamps, isCcsUsed); SchedulerKernel &scheduler = getContext().getSchedulerKernel(); scheduler.setArgs(devQueueHw->getQueueBuffer(), devQueueHw->getStackBuffer(), devQueueHw->getEventPoolBuffer(), devQueueHw->getSlbBuffer(), devQueueHw->getDshBuffer(), parentKernel->getKernelReflectionSurface(), devQueueHw->getQueueStorageBuffer(), this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation(), devQueueHw->getDebugQueue()); auto preemptionMode = PreemptionHelper::taskPreemptionMode(getDevice(), multiDispatchInfo); GpgpuWalkerHelper::dispatchScheduler( *this->commandStream, *devQueueHw, preemptionMode, scheduler, &getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u), devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE), isCcsUsed); scheduler.makeResident(getGpgpuCommandStreamReceiver()); parentKernel->getProgram()->getBlockKernelManager()->makeInternalAllocationsResident(getGpgpuCommandStreamReceiver()); if (parentKernel->isAuxTranslationRequired()) { blocking = true; } } template void CommandQueueHw::obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) { auto isQueueBlockedStatus = isQueueBlocked(); taskLevel = getTaskLevelFromWaitList(this->taskLevel, numEventsInWaitList, eventWaitList); blockQueueStatus = (taskLevel == CompletionStamp::levelNotReady) || isQueueBlockedStatus; auto taskLevelUpdateRequired = isTaskLevelUpdateRequired(taskLevel, eventWaitList, numEventsInWaitList, commandType); if (taskLevelUpdateRequired) { taskLevel++; this->taskLevel = taskLevel; } } template bool CommandQueueHw::isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType) { bool updateTaskLevel = true; //if we are blocked by user event then no update if (taskLevel == CompletionStamp::levelNotReady) { updateTaskLevel = false; } //if we are executing command without kernel then it will inherit state from //previous commands, barrier is exception if (isCommandWithoutKernel(commandType) && commandType != CL_COMMAND_BARRIER) { updateTaskLevel = false; } //ooq special cases starts here if (this->isOOQEnabled()) { //if no wait list and barrier , do not update task level if (eventWaitList == nullptr && commandType != CL_COMMAND_BARRIER) { updateTaskLevel = false; } //if we have waitlist then deduce task level from waitlist and check if it is higher then current task level of queue if (eventWaitList != nullptr) { auto taskLevelFromEvents = getTaskLevelFromWaitList(0, numEventsInWaitList, eventWaitList); taskLevelFromEvents++; if (taskLevelFromEvents <= this->taskLevel) { updateTaskLevel = false; } } } return updateTaskLevel; } template template CompletionStamp CommandQueueHw::enqueueNonBlocked( Surface **surfaces, size_t surfaceCount, LinearStream &commandStream, size_t commandStreamStart, bool &blocking, const MultiDispatchInfo &multiDispatchInfo, const EnqueueProperties &enqueueProperties, TimestampPacketDependencies ×tampPacketDependencies, EventsRequest &eventsRequest, EventBuilder &eventBuilder, uint32_t taskLevel, PrintfHandler *printfHandler) { UNRECOVERABLE_IF(multiDispatchInfo.empty()); auto implicitFlush = false; if (printfHandler) { blocking = true; printfHandler->makeResident(getGpgpuCommandStreamReceiver()); } if (multiDispatchInfo.peekMainKernel()->usesSyncBuffer()) { auto &gws = multiDispatchInfo.begin()->getGWS(); auto &lws = multiDispatchInfo.begin()->getLocalWorkgroupSize(); size_t workGroupsCount = (gws.x * gws.y * gws.z) / (lws.x * lws.y * lws.z); device->syncBufferHandler->prepareForEnqueue(workGroupsCount, *multiDispatchInfo.peekMainKernel(), getGpgpuCommandStreamReceiver()); } if (timestampPacketContainer) { timestampPacketContainer->makeResident(getGpgpuCommandStreamReceiver()); timestampPacketDependencies.previousEnqueueNodes.makeResident(getGpgpuCommandStreamReceiver()); timestampPacketDependencies.cacheFlushNodes.makeResident(getGpgpuCommandStreamReceiver()); } bool anyUncacheableArgs = false; auto requiresCoherency = false; for (auto surface : CreateRange(surfaces, surfaceCount)) { surface->makeResident(getGpgpuCommandStreamReceiver()); requiresCoherency |= surface->IsCoherent; if (!surface->allowsL3Caching()) { anyUncacheableArgs = true; } } auto mediaSamplerRequired = false; uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber; auto specialPipelineSelectMode = false; Kernel *kernel = nullptr; bool usePerDssBackedBuffer = false; for (auto &dispatchInfo : multiDispatchInfo) { if (kernel != dispatchInfo.getKernel()) { kernel = dispatchInfo.getKernel(); } else { continue; } kernel->makeResident(getGpgpuCommandStreamReceiver()); requiresCoherency |= kernel->requiresCoherency(); mediaSamplerRequired |= kernel->isVmeKernel(); auto numGrfRequiredByKernel = kernel->getKernelInfo().patchInfo.executionEnvironment->NumGRFRequired; numGrfRequired = std::max(numGrfRequired, numGrfRequiredByKernel); specialPipelineSelectMode |= kernel->requiresSpecialPipelineSelectMode(); if (kernel->hasUncacheableStatelessArgs()) { anyUncacheableArgs = true; } if (kernel->requiresPerDssBackedBuffer()) { usePerDssBackedBuffer = true; } } if (mediaSamplerRequired) { DEBUG_BREAK_IF(device->getDeviceInfo().preemptionSupported != false); } TimeStampData submitTimeStamp; if (isProfilingEnabled() && eventBuilder.getEvent()) { this->getDevice().getOSTime()->getCpuGpuTime(&submitTimeStamp); eventBuilder.getEvent()->setSubmitTimeStamp(&submitTimeStamp); getGpgpuCommandStreamReceiver().makeResident(*eventBuilder.getEvent()->getHwTimeStampNode()->getBaseGraphicsAllocation()); if (isPerfCountersEnabled()) { getGpgpuCommandStreamReceiver().makeResident(*eventBuilder.getEvent()->getHwPerfCounterNode()->getBaseGraphicsAllocation()); } } IndirectHeap *dsh = nullptr; IndirectHeap *ioh = nullptr; if (multiDispatchInfo.peekParentKernel()) { DeviceQueueHw *pDevQueue = castToObject>(this->getContext().getDefaultDeviceQueue()); DEBUG_BREAK_IF(pDevQueue == nullptr); dsh = pDevQueue->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); // In ExecutionModel IOH is the same as DSH to eliminate StateBaseAddress reprogramming for scheduler kernel and blocks. ioh = dsh; implicitFlush = true; } else { dsh = &getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); ioh = &getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); } auto allocNeedsFlushDC = false; if (!device->isFullRangeSvm()) { if (std::any_of(getGpgpuCommandStreamReceiver().getResidencyAllocations().begin(), getGpgpuCommandStreamReceiver().getResidencyAllocations().end(), [](const auto allocation) { return allocation->isFlushL3Required(); })) { allocNeedsFlushDC = true; } } DispatchFlags dispatchFlags( {}, //csrDependencies ×tampPacketDependencies.barrierNodes, //barrierTimestampPacketNodes {}, //pipelineSelectArgs this->flushStamp->getStampReference(), //flushStampReference getThrottle(), //throttle PreemptionHelper::taskPreemptionMode(getDevice(), multiDispatchInfo), //preemptionMode numGrfRequired, //numGrfRequired L3CachingSettings::l3CacheOn, //l3CacheSettings kernel->getThreadArbitrationPolicy(), //threadArbitrationPolicy getSliceCount(), //sliceCount blocking, //blocking shouldFlushDC(commandType, printfHandler) || allocNeedsFlushDC, //dcFlush multiDispatchInfo.usesSlm() || multiDispatchInfo.peekParentKernel(), //useSLM true, //guardCommandBufferWithPipeControl commandType == CL_COMMAND_NDRANGE_KERNEL, //GSBA32BitRequired requiresCoherency, //requiresCoherency (QueuePriority::LOW == priority), //lowPriority implicitFlush, //implicitFlush !eventBuilder.getEvent() || getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed false, //epilogueRequired usePerDssBackedBuffer //usePerDssBackedBuffer ); dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired; dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode = specialPipelineSelectMode; if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { eventsRequest.fillCsrDependencies(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr); dispatchFlags.csrDependencies.makeResident(getGpgpuCommandStreamReceiver()); } DEBUG_BREAK_IF(taskLevel >= CompletionStamp::levelNotReady); if (anyUncacheableArgs) { dispatchFlags.l3CacheSettings = L3CachingSettings::l3CacheOff; } else if (!kernel->areStatelessWritesUsed()) { dispatchFlags.l3CacheSettings = L3CachingSettings::l3AndL1On; } if (this->dispatchHints != 0) { dispatchFlags.engineHints = this->dispatchHints; dispatchFlags.epilogueRequired = true; } if (gtpinIsGTPinInitialized()) { gtpinNotifyPreFlushTask(this); } if (enqueueProperties.blitPropertiesContainer->size() > 0) { this->bcsTaskCount = getBcsCommandStreamReceiver()->blitBuffer(*enqueueProperties.blitPropertiesContainer, false); } printDebugString(DebugManager.flags.PrintDebugMessages.get(), stdout, "preemption = %d.\n", static_cast(dispatchFlags.preemptionMode)); CompletionStamp completionStamp = getGpgpuCommandStreamReceiver().flushTask( commandStream, commandStreamStart, *dsh, *ioh, getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u), taskLevel, dispatchFlags, getDevice()); if (gtpinIsGTPinInitialized()) { gtpinNotifyFlushTask(completionStamp.taskCount); } return completionStamp; } template void CommandQueueHw::enqueueBlocked( uint32_t commandType, Surface **surfaces, size_t surfaceCount, const MultiDispatchInfo &multiDispatchInfo, TimestampPacketDependencies ×tampPacketDependencies, std::unique_ptr &blockedCommandsData, const EnqueueProperties &enqueueProperties, EventsRequest &eventsRequest, EventBuilder &externalEventBuilder, std::unique_ptr printfHandler) { TakeOwnershipWrapper> queueOwnership(*this); //store previous virtual event as it will add dependecies to new virtual event if (this->virtualEvent) { DBG_LOG(EventsDebugEnable, "enqueueBlocked", "previousVirtualEvent", this->virtualEvent); } EventBuilder internalEventBuilder; EventBuilder *eventBuilder; // check if event will be exposed externally if (externalEventBuilder.getEvent()) { externalEventBuilder.getEvent()->incRefInternal(); eventBuilder = &externalEventBuilder; DBG_LOG(EventsDebugEnable, "enqueueBlocked", "output event as virtualEvent", virtualEvent); } else { // it will be an internal event internalEventBuilder.create(this, context); eventBuilder = &internalEventBuilder; DBG_LOG(EventsDebugEnable, "enqueueBlocked", "new virtualEvent", eventBuilder->getEvent()); } auto outEvent = eventBuilder->getEvent(); //update queue taskCount taskCount = outEvent->getCompletionStamp(); std::unique_ptr command; bool storeTimestampPackets = false; if (blockedCommandsData) { if (enqueueProperties.blitPropertiesContainer) { blockedCommandsData->blitPropertiesContainer = *enqueueProperties.blitPropertiesContainer; blockedCommandsData->blitEnqueue = true; } storeTimestampPackets = (timestampPacketContainer != nullptr); } if (enqueueProperties.operation != EnqueueProperties::Operation::GpuKernel) { command = std::make_unique(*this, blockedCommandsData); } else { //store task data in event std::vector allSurfaces; Kernel *kernel = nullptr; for (auto &dispatchInfo : multiDispatchInfo) { if (kernel != dispatchInfo.getKernel()) { kernel = dispatchInfo.getKernel(); } else { continue; } kernel->getResidency(allSurfaces); } for (auto &surface : CreateRange(surfaces, surfaceCount)) { allSurfaces.push_back(surface->duplicate()); } PreemptionMode preemptionMode = PreemptionHelper::taskPreemptionMode(getDevice(), multiDispatchInfo); bool slmUsed = multiDispatchInfo.usesSlm() || multiDispatchInfo.peekParentKernel(); command = std::make_unique(*this, blockedCommandsData, allSurfaces, shouldFlushDC(commandType, printfHandler.get()), slmUsed, commandType == CL_COMMAND_NDRANGE_KERNEL, std::move(printfHandler), preemptionMode, multiDispatchInfo.peekMainKernel(), (uint32_t)multiDispatchInfo.size()); } if (storeTimestampPackets) { for (cl_uint i = 0; i < eventsRequest.numEventsInWaitList; i++) { auto event = castToObjectOrAbort(eventsRequest.eventWaitList[i]); event->incRefInternal(); } command->setTimestampPacketNode(*timestampPacketContainer, std::move(timestampPacketDependencies)); command->setEventsRequest(eventsRequest); } outEvent->setCommand(std::move(command)); eventBuilder->addParentEvents(ArrayRef(eventsRequest.eventWaitList, eventsRequest.numEventsInWaitList)); eventBuilder->addParentEvent(this->virtualEvent); eventBuilder->finalize(); if (this->virtualEvent) { this->virtualEvent->decRefInternal(); } this->virtualEvent = outEvent; } template CompletionStamp CommandQueueHw::enqueueCommandWithoutKernel( Surface **surfaces, size_t surfaceCount, LinearStream &commandStream, size_t commandStreamStart, bool &blocking, const EnqueueProperties &enqueueProperties, TimestampPacketDependencies ×tampPacketDependencies, EventsRequest &eventsRequest, EventBuilder &eventBuilder, uint32_t taskLevel) { if (timestampPacketContainer) { timestampPacketContainer->makeResident(getGpgpuCommandStreamReceiver()); timestampPacketDependencies.previousEnqueueNodes.makeResident(getGpgpuCommandStreamReceiver()); timestampPacketDependencies.cacheFlushNodes.makeResident(getGpgpuCommandStreamReceiver()); } for (auto surface : CreateRange(surfaces, surfaceCount)) { surface->makeResident(getGpgpuCommandStreamReceiver()); } if (enqueueProperties.operation == EnqueueProperties::Operation::Blit) { UNRECOVERABLE_IF(!enqueueProperties.blitPropertiesContainer); this->bcsTaskCount = getBcsCommandStreamReceiver()->blitBuffer(*enqueueProperties.blitPropertiesContainer, false); } DispatchFlags dispatchFlags( {}, //csrDependencies ×tampPacketDependencies.barrierNodes, //barrierTimestampPacketNodes {}, //pipelineSelectArgs flushStamp->getStampReference(), //flushStampReference getThrottle(), //throttle device->getPreemptionMode(), //preemptionMode GrfConfig::DefaultGrfNumber, //numGrfRequired L3CachingSettings::l3CacheOn, //l3CacheSettings ThreadArbitrationPolicy::NotPresent, //threadArbitrationPolicy getSliceCount(), //sliceCount blocking, //blocking false, //dcFlush false, //useSLM true, //guardCommandBufferWithPipeControl false, //GSBA32BitRequired false, //requiresCoherency false, //lowPriority (enqueueProperties.operation == EnqueueProperties::Operation::Blit), //implicitFlush getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed false, //epilogueRequired false //usePerDssBackedBuffer ); if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { eventsRequest.fillCsrDependencies(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr); dispatchFlags.csrDependencies.makeResident(getGpgpuCommandStreamReceiver()); } CompletionStamp completionStamp = getGpgpuCommandStreamReceiver().flushTask( commandStream, commandStreamStart, getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u), getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u), getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u), taskLevel, dispatchFlags, getDevice()); return completionStamp; } template void CommandQueueHw::computeOffsetsValueForRectCommands(size_t *bufferOffset, size_t *hostOffset, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch) { size_t computedBufferRowPitch = bufferRowPitch ? bufferRowPitch : region[0]; size_t computedBufferSlicePitch = bufferSlicePitch ? bufferSlicePitch : region[1] * computedBufferRowPitch; size_t computedHostRowPitch = hostRowPitch ? hostRowPitch : region[0]; size_t computedHostSlicePitch = hostSlicePitch ? hostSlicePitch : region[1] * computedHostRowPitch; *bufferOffset = bufferOrigin[2] * computedBufferSlicePitch + bufferOrigin[1] * computedBufferRowPitch + bufferOrigin[0]; *hostOffset = hostOrigin[2] * computedHostSlicePitch + hostOrigin[1] * computedHostRowPitch + hostOrigin[0]; } template size_t CommandQueueHw::calculateHostPtrSizeForImage(const size_t *region, size_t rowPitch, size_t slicePitch, Image *image) { auto bytesPerPixel = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes; auto dstRowPitch = rowPitch ? rowPitch : region[0] * bytesPerPixel; auto dstSlicePitch = slicePitch ? slicePitch : ((image->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 1 : region[1]) * dstRowPitch); return Image::calculateHostPtrSize(region, dstRowPitch, dstSlicePitch, bytesPerPixel, image->getImageDesc().image_type); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/enqueue_copy_buffer.h000066400000000000000000000035201363734646600271620ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/enqueue_common.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include namespace NEO { template cl_int CommandQueueHw::enqueueCopyBuffer( Buffer *srcBuffer, Buffer *dstBuffer, size_t srcOffset, size_t dstOffset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { MultiDispatchInfo dispatchInfo; auto eBuiltInOpsType = EBuiltInOps::CopyBufferToBuffer; if (forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()))) { eBuiltInOpsType = EBuiltInOps::CopyBufferToBufferStateless; } auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(eBuiltInOpsType, this->getDevice()); BuiltInOwnershipWrapper builtInLock(builder, this->context); BuiltinOpParams dc; dc.srcMemObj = srcBuffer; dc.dstMemObj = dstBuffer; dc.srcOffset = {srcOffset, 0, 0}; dc.dstOffset = {dstOffset, 0, 0}; dc.size = {size, 0, 0}; builder.buildDispatchInfos(dispatchInfo, dc); MemObjSurface s1(srcBuffer); MemObjSurface s2(dstBuffer); Surface *surfaces[] = {&s1, &s2}; enqueueHandler( surfaces, false, dispatchInfo, numEventsInWaitList, eventWaitList, event); return CL_SUCCESS; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/enqueue_copy_buffer_rect.h000066400000000000000000000040551363734646600302030ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include namespace NEO { template cl_int CommandQueueHw::enqueueCopyBufferRect( Buffer *srcBuffer, Buffer *dstBuffer, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { auto eBuiltInOps = EBuiltInOps::CopyBufferRect; if (forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()))) { eBuiltInOps = EBuiltInOps::CopyBufferRectStateless; } auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(eBuiltInOps, this->getDevice()); BuiltInOwnershipWrapper builtInLock(builder, this->context); MemObjSurface srcBufferSurf(srcBuffer); MemObjSurface dstBufferSurf(dstBuffer); Surface *surfaces[] = {&srcBufferSurf, &dstBufferSurf}; BuiltinOpParams dc; dc.srcMemObj = srcBuffer; dc.dstMemObj = dstBuffer; dc.srcOffset = srcOrigin; dc.dstOffset = dstOrigin; dc.size = region; dc.srcRowPitch = srcRowPitch; dc.srcSlicePitch = srcSlicePitch; dc.dstRowPitch = dstRowPitch; dc.dstSlicePitch = dstSlicePitch; MultiDispatchInfo dispatchInfo; builder.buildDispatchInfos(dispatchInfo, dc); enqueueHandler( surfaces, false, dispatchInfo, numEventsInWaitList, eventWaitList, event); return CL_SUCCESS; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/enqueue_copy_buffer_to_image.h000066400000000000000000000040241363734646600310260ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/mipmap.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include namespace NEO { template cl_int CommandQueueHw::enqueueCopyBufferToImage( Buffer *srcBuffer, Image *dstImage, size_t srcOffset, const size_t *dstOrigin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { auto eBuiltInOpsType = EBuiltInOps::CopyBufferToImage3d; if (forceStateless(srcBuffer->getSize())) { eBuiltInOpsType = EBuiltInOps::CopyBufferToImage3dStateless; } auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(eBuiltInOpsType, this->getDevice()); BuiltInOwnershipWrapper builtInLock(builder, this->context); MemObjSurface srcBufferSurf(srcBuffer); MemObjSurface dstImgSurf(dstImage); Surface *surfaces[] = {&srcBufferSurf, &dstImgSurf}; BuiltinOpParams dc; dc.srcMemObj = srcBuffer; dc.dstMemObj = dstImage; dc.srcOffset = {srcOffset, 0, 0}; dc.dstOffset = dstOrigin; dc.size = region; if (dstImage->getImageDesc().num_mip_levels > 0) { dc.dstMipLevel = findMipLevel(dstImage->getImageDesc().image_type, dstOrigin); } MultiDispatchInfo dispatchInfo; builder.buildDispatchInfos(dispatchInfo, dc); enqueueHandler( surfaces, false, dispatchInfo, numEventsInWaitList, eventWaitList, event); return CL_SUCCESS; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/enqueue_copy_image.h000066400000000000000000000037431363734646600270020ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/basic_math.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/mipmap.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include #include namespace NEO { template cl_int CommandQueueHw::enqueueCopyImage( Image *srcImage, Image *dstImage, const size_t srcOrigin[3], const size_t dstOrigin[3], const size_t region[3], cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { MultiDispatchInfo di; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImageToImage3d, this->getDevice()); BuiltInOwnershipWrapper builtInLock(builder, this->context); MemObjSurface srcImgSurf(srcImage); MemObjSurface dstImgSurf(dstImage); Surface *surfaces[] = {&srcImgSurf, &dstImgSurf}; BuiltinOpParams dc; dc.srcMemObj = srcImage; dc.dstMemObj = dstImage; dc.srcOffset = srcOrigin; dc.dstOffset = dstOrigin; dc.size = region; if (srcImage->getImageDesc().num_mip_levels > 0) { dc.srcMipLevel = findMipLevel(srcImage->getImageDesc().image_type, srcOrigin); } if (dstImage->getImageDesc().num_mip_levels > 0) { dc.dstMipLevel = findMipLevel(dstImage->getImageDesc().image_type, dstOrigin); } builder.buildDispatchInfos(di, dc); enqueueHandler( surfaces, false, di, numEventsInWaitList, eventWaitList, event); return CL_SUCCESS; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/enqueue_copy_image_to_buffer.h000066400000000000000000000040231363734646600310250ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/mipmap.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include namespace NEO { template cl_int CommandQueueHw::enqueueCopyImageToBuffer( Image *srcImage, Buffer *dstBuffer, const size_t *srcOrigin, const size_t *region, size_t dstOffset, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { auto eBuiltInOpsType = EBuiltInOps::CopyImage3dToBuffer; if (forceStateless(dstBuffer->getSize())) { eBuiltInOpsType = EBuiltInOps::CopyImage3dToBufferStateless; } auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(eBuiltInOpsType, this->getDevice()); BuiltInOwnershipWrapper builtInLock(builder, this->context); MemObjSurface srcImgSurf(srcImage); MemObjSurface dstBufferSurf(dstBuffer); Surface *surfaces[] = {&srcImgSurf, &dstBufferSurf}; BuiltinOpParams dc; dc.srcMemObj = srcImage; dc.dstMemObj = dstBuffer; dc.srcOffset = srcOrigin; dc.dstOffset = {dstOffset, 0, 0}; dc.size = region; if (srcImage->getImageDesc().num_mip_levels > 0) { dc.srcMipLevel = findMipLevel(srcImage->getImageDesc().image_type, srcOrigin); } MultiDispatchInfo dispatchInfo; builder.buildDispatchInfos(dispatchInfo, dc); enqueueHandler( surfaces, false, dispatchInfo, numEventsInWaitList, eventWaitList, event); return CL_SUCCESS; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/enqueue_fill_buffer.h000066400000000000000000000062651363734646600271470ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include namespace NEO { template cl_int CommandQueueHw::enqueueFillBuffer( Buffer *buffer, const void *pattern, size_t patternSize, size_t offset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { auto memoryManager = getDevice().getMemoryManager(); DEBUG_BREAK_IF(nullptr == memoryManager); auto patternAllocation = memoryManager->allocateGraphicsMemoryWithProperties({getDevice().getRootDeviceIndex(), alignUp(patternSize, MemoryConstants::cacheLineSize), GraphicsAllocation::AllocationType::FILL_PATTERN}); if (patternSize == 1) { int patternInt = (uint32_t)((*(uint8_t *)pattern << 24) | (*(uint8_t *)pattern << 16) | (*(uint8_t *)pattern << 8) | *(uint8_t *)pattern); memcpy_s(patternAllocation->getUnderlyingBuffer(), sizeof(int), &patternInt, sizeof(int)); } else if (patternSize == 2) { int patternInt = (uint32_t)((*(uint16_t *)pattern << 16) | *(uint16_t *)pattern); memcpy_s(patternAllocation->getUnderlyingBuffer(), sizeof(int), &patternInt, sizeof(int)); } else { memcpy_s(patternAllocation->getUnderlyingBuffer(), patternSize, pattern, patternSize); } auto eBuiltInOps = EBuiltInOps::FillBuffer; if (forceStateless(buffer->getSize())) { eBuiltInOps = EBuiltInOps::FillBufferStateless; } auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(eBuiltInOps, this->getDevice()); BuiltInOwnershipWrapper builtInLock(builder, this->context); BuiltinOpParams dc; MemObj patternMemObj(this->context, 0, {}, 0, 0, alignUp(patternSize, 4), patternAllocation->getUnderlyingBuffer(), patternAllocation->getUnderlyingBuffer(), patternAllocation, false, false, true); dc.srcMemObj = &patternMemObj; dc.dstMemObj = buffer; dc.dstOffset = {offset, 0, 0}; dc.size = {size, 0, 0}; MultiDispatchInfo dispatchInfo; builder.buildDispatchInfos(dispatchInfo, dc); MemObjSurface s1(buffer); GeneralSurface s2(patternAllocation); Surface *surfaces[] = {&s1, &s2}; enqueueHandler( surfaces, false, dispatchInfo, numEventsInWaitList, eventWaitList, event); auto storageForAllocation = getGpgpuCommandStreamReceiver().getInternalAllocationStorage(); storageForAllocation->storeAllocationWithTaskCount(std::unique_ptr(patternAllocation), TEMPORARY_ALLOCATION, taskCount); return CL_SUCCESS; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/enqueue_fill_image.h000066400000000000000000000030731363734646600267520ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/basic_math.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include #include namespace NEO { template cl_int CommandQueueHw::enqueueFillImage( Image *image, const void *fillColor, const size_t *origin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { MultiDispatchInfo di; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::FillImage3d, this->getDevice()); BuiltInOwnershipWrapper builtInLock(builder, this->context); MemObjSurface dstImgSurf(image); Surface *surfaces[] = {&dstImgSurf}; BuiltinOpParams dc; dc.srcPtr = const_cast(fillColor); dc.dstMemObj = image; dc.srcOffset = {0, 0, 0}; dc.dstOffset = origin; dc.size = region; builder.buildDispatchInfos(di, dc); enqueueHandler( surfaces, false, di, numEventsInWaitList, eventWaitList, event); return CL_SUCCESS; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/enqueue_kernel.h000066400000000000000000000115531363734646600261440ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/task_information.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include namespace NEO { template cl_int CommandQueueHw::enqueueKernel( cl_kernel clKernel, cl_uint workDim, const size_t *globalWorkOffsetIn, const size_t *globalWorkSizeIn, const size_t *localWorkSizeIn, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { size_t region[3] = {1, 1, 1}; size_t globalWorkOffset[3] = {0, 0, 0}; size_t workGroupSize[3] = {1, 1, 1}; size_t enqueuedLocalWorkSize[3] = {0, 0, 0}; auto &kernel = *castToObjectOrAbort(clKernel); const auto &kernelInfo = kernel.getKernelInfo(); if (kernel.isParentKernel && !this->context->getDefaultDeviceQueue()) { return CL_INVALID_OPERATION; } if (!kernel.isPatched()) { if (event) { *event = nullptr; } return CL_INVALID_KERNEL_ARGS; } if (kernel.isUsingSharedObjArgs()) { kernel.resetSharedObjectsPatchAddresses(); } bool haveRequiredWorkGroupSize = false; if (kernelInfo.reqdWorkGroupSize[0] != WorkloadInfo::undefinedOffset) { haveRequiredWorkGroupSize = true; } size_t remainder = 0; size_t totalWorkItems = 1u; const size_t *localWkgSizeToPass = localWorkSizeIn ? workGroupSize : nullptr; for (auto i = 0u; i < workDim; i++) { region[i] = globalWorkSizeIn ? globalWorkSizeIn[i] : 0; if (region[i] == 0 && (kernel.getDevice().getEnabledClVersion() < 21)) { return CL_INVALID_GLOBAL_WORK_SIZE; } globalWorkOffset[i] = globalWorkOffsetIn ? globalWorkOffsetIn[i] : 0; if (localWorkSizeIn) { if (haveRequiredWorkGroupSize) { if (kernelInfo.reqdWorkGroupSize[i] != localWorkSizeIn[i]) { return CL_INVALID_WORK_GROUP_SIZE; } } if (localWorkSizeIn[i] == 0) { return CL_INVALID_WORK_GROUP_SIZE; } if (kernel.getAllowNonUniform()) { workGroupSize[i] = std::min(localWorkSizeIn[i], std::max(static_cast(1), globalWorkSizeIn[i])); } else { workGroupSize[i] = localWorkSizeIn[i]; } enqueuedLocalWorkSize[i] = localWorkSizeIn[i]; totalWorkItems *= localWorkSizeIn[i]; } remainder += region[i] % workGroupSize[i]; } if (remainder != 0 && !kernel.getAllowNonUniform()) { return CL_INVALID_WORK_GROUP_SIZE; } if (haveRequiredWorkGroupSize) { localWkgSizeToPass = kernelInfo.reqdWorkGroupSize; } NullSurface s; Surface *surfaces[] = {&s}; if (context->isProvidingPerformanceHints()) { if (kernel.hasPrintfOutput()) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, PRINTF_DETECTED_IN_KERNEL, kernel.getKernelInfo().name.c_str()); } if (kernel.requiresCoherency()) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, KERNEL_REQUIRES_COHERENCY, kernel.getKernelInfo().name.c_str()); } } if (kernel.getKernelInfo().builtinDispatchBuilder != nullptr) { cl_int err = kernel.getKernelInfo().builtinDispatchBuilder->validateDispatch(&kernel, workDim, Vec3(region), Vec3(workGroupSize), Vec3(globalWorkOffset)); if (err != CL_SUCCESS) return err; } DBG_LOG(PrintDispatchParameters, "Kernel: ", kernel.getKernelInfo().name, ",LWS:, ", localWorkSizeIn ? localWorkSizeIn[0] : 0, ",", localWorkSizeIn ? localWorkSizeIn[1] : 0, ",", localWorkSizeIn ? localWorkSizeIn[2] : 0, ",GWS:,", globalWorkSizeIn[0], ",", globalWorkSizeIn[1], ",", globalWorkSizeIn[2], ",SIMD:, ", kernel.getKernelInfo().getMaxSimdSize()); if (totalWorkItems > kernel.maxKernelWorkGroupSize) { return CL_INVALID_WORK_GROUP_SIZE; } enqueueHandler( surfaces, false, &kernel, workDim, globalWorkOffset, region, localWkgSizeToPass, enqueuedLocalWorkSize, numEventsInWaitList, eventWaitList, event); return CL_SUCCESS; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/enqueue_marker.h000066400000000000000000000020231363734646600261350ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/event/event.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include namespace NEO { template cl_int CommandQueueHw::enqueueMarkerWithWaitList( cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { NullSurface s; Surface *surfaces[] = {&s}; enqueueHandler(surfaces, false, MultiDispatchInfo(), numEventsInWaitList, eventWaitList, event); return CL_SUCCESS; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/enqueue_migrate_mem_objects.h000066400000000000000000000026621363734646600306640ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/memory_manager/surface.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/event/event.h" namespace NEO { template cl_int CommandQueueHw::enqueueMigrateMemObjects(cl_uint numMemObjects, const cl_mem *memObjects, cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { NullSurface s; Surface *surfaces[] = {&s}; enqueueHandler(surfaces, false, MultiDispatchInfo(), numEventsInWaitList, eventWaitList, event); return CL_SUCCESS; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/enqueue_read_buffer.h000066400000000000000000000125521363734646600271300ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/cache_policy.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/enqueue_common.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include namespace NEO { template cl_int CommandQueueHw::enqueueReadBuffer( Buffer *buffer, cl_bool blockingRead, size_t offset, size_t size, void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { if (nullptr == mapAllocation) { notifyEnqueueReadBuffer(buffer, !!blockingRead); } const cl_command_type cmdType = CL_COMMAND_READ_BUFFER; bool isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, cmdType) : true; bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, cmdType, blockingRead, size, ptr, numEventsInWaitList, eventWaitList); //check if we are dealing with SVM pointer here for which we already have an allocation if (!mapAllocation && this->getContext().getSVMAllocsManager()) { auto svmEntry = this->getContext().getSVMAllocsManager()->getSVMAlloc(ptr); if (svmEntry) { if ((svmEntry->gpuAllocation->getGpuAddress() + svmEntry->size) < (castToUint64(ptr) + size)) { return CL_INVALID_OPERATION; } mapAllocation = svmEntry->cpuAllocation ? svmEntry->cpuAllocation : svmEntry->gpuAllocation; if (isCpuCopyAllowed) { if (svmEntry->memoryType == DEVICE_UNIFIED_MEMORY) { isCpuCopyAllowed = false; } } } } if (isCpuCopyAllowed) { if (isMemTransferNeeded) { return enqueueReadWriteBufferOnCpuWithMemoryTransfer(cmdType, buffer, offset, size, ptr, numEventsInWaitList, eventWaitList, event); } else { return enqueueReadWriteBufferOnCpuWithoutMemoryTransfer(cmdType, buffer, offset, size, ptr, numEventsInWaitList, eventWaitList, event); } } else if (!isMemTransferNeeded) { return enqueueMarkerForReadWriteOperation(buffer, ptr, cmdType, blockingRead, numEventsInWaitList, eventWaitList, event); } auto eBuiltInOps = EBuiltInOps::CopyBufferToBuffer; if (forceStateless(buffer->getSize())) { eBuiltInOps = EBuiltInOps::CopyBufferToBufferStateless; } auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(eBuiltInOps, this->getDevice()); BuiltInOwnershipWrapper builtInLock(builder, this->context); void *dstPtr = ptr; MemObjSurface bufferSurf(buffer); HostPtrSurface hostPtrSurf(dstPtr, size); GeneralSurface mapSurface; Surface *surfaces[] = {&bufferSurf, nullptr}; if (mapAllocation) { surfaces[1] = &mapSurface; mapSurface.setGraphicsAllocation(mapAllocation); //get offset between base cpu ptr of map allocation and dst ptr size_t dstOffset = ptrDiff(dstPtr, mapAllocation->getUnderlyingBuffer()); dstPtr = reinterpret_cast(mapAllocation->getGpuAddress() + dstOffset); } else { surfaces[1] = &hostPtrSurf; if (size != 0) { auto &csr = blitEnqueueAllowed(cmdType) ? *getBcsCommandStreamReceiver() : getGpgpuCommandStreamReceiver(); bool status = csr.createAllocationForHostSurface(hostPtrSurf, true); if (!status) { return CL_OUT_OF_RESOURCES; } dstPtr = reinterpret_cast(hostPtrSurf.getAllocation()->getGpuAddress()); } } void *alignedDstPtr = alignDown(dstPtr, 4); size_t dstPtrOffset = ptrDiff(dstPtr, alignedDstPtr); BuiltinOpParams dc; dc.dstPtr = alignedDstPtr; dc.dstOffset = {dstPtrOffset, 0, 0}; dc.srcMemObj = buffer; dc.srcOffset = {offset, 0, 0}; dc.size = {size, 0, 0}; dc.transferAllocation = mapAllocation ? mapAllocation : hostPtrSurf.getAllocation(); MultiDispatchInfo dispatchInfo; builder.buildDispatchInfos(dispatchInfo, dc); if (context->isProvidingPerformanceHints()) { context->providePerformanceHintForMemoryTransfer(CL_COMMAND_READ_BUFFER, true, static_cast(buffer), ptr); if (!isL3Capable(ptr, size)) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, ptr, size, MemoryConstants::pageSize, MemoryConstants::pageSize); } } enqueueHandler( surfaces, blockingRead == CL_TRUE, dispatchInfo, numEventsInWaitList, eventWaitList, event); return CL_SUCCESS; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/enqueue_read_buffer_rect.h000066400000000000000000000100031363734646600301320ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/enqueue_common.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include namespace NEO { template cl_int CommandQueueHw::enqueueReadBufferRect( Buffer *buffer, cl_bool blockingRead, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { const cl_command_type cmdType = CL_COMMAND_READ_BUFFER_RECT; auto isMemTransferNeeded = true; if (buffer->isMemObjZeroCopy()) { size_t bufferOffset; size_t hostOffset; computeOffsetsValueForRectCommands(&bufferOffset, &hostOffset, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch); isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(bufferOffset, hostOffset, ptr, cmdType); } if (!isMemTransferNeeded) { return enqueueMarkerForReadWriteOperation(buffer, ptr, cmdType, blockingRead, numEventsInWaitList, eventWaitList, event); } auto eBuiltInOps = EBuiltInOps::CopyBufferRect; if (forceStateless(buffer->getSize())) { eBuiltInOps = EBuiltInOps::CopyBufferRectStateless; } auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(eBuiltInOps, this->getDevice()); BuiltInOwnershipWrapper builtInLock(builder, this->context); size_t hostPtrSize = Buffer::calculateHostPtrSize(hostOrigin, region, hostRowPitch, hostSlicePitch); void *dstPtr = ptr; MemObjSurface bufferSurf(buffer); HostPtrSurface hostPtrSurf(dstPtr, hostPtrSize); Surface *surfaces[] = {&bufferSurf, &hostPtrSurf}; if (region[0] != 0 && region[1] != 0 && region[2] != 0) { auto &csr = blitEnqueueAllowed(cmdType) ? *getBcsCommandStreamReceiver() : getGpgpuCommandStreamReceiver(); bool status = csr.createAllocationForHostSurface(hostPtrSurf, true); if (!status) { return CL_OUT_OF_RESOURCES; } dstPtr = reinterpret_cast(hostPtrSurf.getAllocation()->getGpuAddress()); } void *alignedDstPtr = alignDown(dstPtr, 4); size_t dstPtrOffset = ptrDiff(dstPtr, alignedDstPtr); BuiltinOpParams dc; dc.srcMemObj = buffer; dc.dstPtr = alignedDstPtr; dc.srcOffset = bufferOrigin; dc.dstOffset = hostOrigin; dc.transferAllocation = hostPtrSurf.getAllocation(); dc.dstOffset.x += dstPtrOffset; dc.size = region; dc.srcRowPitch = bufferRowPitch; dc.srcSlicePitch = bufferSlicePitch; dc.dstRowPitch = hostRowPitch; dc.dstSlicePitch = hostSlicePitch; MultiDispatchInfo dispatchInfo; builder.buildDispatchInfos(dispatchInfo, dc); enqueueHandler( surfaces, blockingRead == CL_TRUE, dispatchInfo, numEventsInWaitList, eventWaitList, event); if (context->isProvidingPerformanceHints()) { context->providePerformanceHintForMemoryTransfer(CL_COMMAND_READ_BUFFER_RECT, true, static_cast(buffer), ptr); if (!isL3Capable(ptr, hostPtrSize)) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, ptr, hostPtrSize, MemoryConstants::pageSize, MemoryConstants::pageSize); } } return CL_SUCCESS; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/enqueue_read_image.h000066400000000000000000000106241363734646600267370ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/cache_policy.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/context/context.h" #include "opencl/source/event/event.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/mipmap.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include #include namespace NEO { template cl_int CommandQueueHw::enqueueReadImage( Image *srcImage, cl_bool blockingRead, const size_t *origin, const size_t *region, size_t inputRowPitch, size_t inputSlicePitch, void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { if (nullptr == mapAllocation) { notifyEnqueueReadImage(srcImage, !!blockingRead); } MultiDispatchInfo di; auto isMemTransferNeeded = true; if (srcImage->isMemObjZeroCopy()) { size_t hostOffset; Image::calculateHostPtrOffset(&hostOffset, origin, region, inputRowPitch, inputSlicePitch, srcImage->getImageDesc().image_type, srcImage->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes); isMemTransferNeeded = srcImage->checkIfMemoryTransferIsRequired(hostOffset, 0, ptr, CL_COMMAND_READ_IMAGE); } if (!isMemTransferNeeded) { return enqueueMarkerForReadWriteOperation(srcImage, ptr, CL_COMMAND_READ_IMAGE, blockingRead, numEventsInWaitList, eventWaitList, event); } auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImage3dToBuffer, this->getDevice()); BuiltInOwnershipWrapper builtInLock(builder, this->context); size_t hostPtrSize = calculateHostPtrSizeForImage(region, inputRowPitch, inputSlicePitch, srcImage); void *dstPtr = ptr; MemObjSurface srcImgSurf(srcImage); HostPtrSurface hostPtrSurf(dstPtr, hostPtrSize); GeneralSurface mapSurface; Surface *surfaces[] = {&srcImgSurf, nullptr}; if (mapAllocation) { surfaces[1] = &mapSurface; mapSurface.setGraphicsAllocation(mapAllocation); //get offset between base cpu ptr of map allocation and dst ptr size_t dstOffset = ptrDiff(dstPtr, mapAllocation->getUnderlyingBuffer()); dstPtr = reinterpret_cast(mapAllocation->getGpuAddress() + dstOffset); } else { surfaces[1] = &hostPtrSurf; if (region[0] != 0 && region[1] != 0 && region[2] != 0) { bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, true); if (!status) { return CL_OUT_OF_RESOURCES; } dstPtr = reinterpret_cast(hostPtrSurf.getAllocation()->getGpuAddress()); } } void *alignedDstPtr = alignDown(dstPtr, 4); size_t dstPtrOffset = ptrDiff(dstPtr, alignedDstPtr); BuiltinOpParams dc; dc.srcMemObj = srcImage; dc.dstPtr = alignedDstPtr; dc.dstOffset.x = dstPtrOffset; dc.srcOffset = origin; dc.size = region; dc.srcRowPitch = (srcImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) ? inputSlicePitch : inputRowPitch; dc.srcSlicePitch = inputSlicePitch; if (srcImage->getImageDesc().num_mip_levels > 0) { dc.srcMipLevel = findMipLevel(srcImage->getImageDesc().image_type, origin); } builder.buildDispatchInfos(di, dc); enqueueHandler( surfaces, blockingRead == CL_TRUE, di, numEventsInWaitList, eventWaitList, event); if (context->isProvidingPerformanceHints()) { if (!isL3Capable(ptr, hostPtrSize)) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_IMAGE_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, ptr, hostPtrSize, MemoryConstants::pageSize, MemoryConstants::pageSize); } } return CL_SUCCESS; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/enqueue_resource_barrier.h000066400000000000000000000011211363734646600302070ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/command_queue/command_queue_hw.h" namespace NEO { template cl_int CommandQueueHw::enqueueResourceBarrier(BarrierCommand *resourceBarrier, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { return CL_SUCCESS; } } // namespace NEOcompute-runtime-20.13.16352/opencl/source/command_queue/enqueue_svm.h000066400000000000000000000555661363734646600255050ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/enqueue_common.h" #include "opencl/source/event/event.h" #include namespace NEO { using SvmFreeClbT = void(CL_CALLBACK *)(cl_command_queue queue, cl_uint numSvmPointers, void *svmPointers[], void *userData); struct SvmFreeUserData { cl_uint numSvmPointers; void **svmPointers; SvmFreeClbT clb; void *userData; bool ownsEventDeletion; SvmFreeUserData(cl_uint numSvmPointers, void **svmPointers, SvmFreeClbT clb, void *userData, bool ownsEventDeletion) : numSvmPointers(numSvmPointers), svmPointers(svmPointers), clb(clb), userData(userData), ownsEventDeletion(ownsEventDeletion){}; }; inline void CL_CALLBACK freeSvmEventClb(cl_event event, cl_int commandExecCallbackType, void *usrData) { auto freeDt = reinterpret_cast(usrData); auto eventObject = castToObjectOrAbort(event); if (freeDt->clb == nullptr) { auto ctx = eventObject->getContext(); for (cl_uint i = 0; i < freeDt->numSvmPointers; i++) { castToObjectOrAbort(ctx)->getSVMAllocsManager()->freeSVMAlloc(freeDt->svmPointers[i]); } } else { freeDt->clb(eventObject->getCommandQueue(), freeDt->numSvmPointers, freeDt->svmPointers, freeDt->userData); } if (freeDt->ownsEventDeletion) { castToObjectOrAbort(event)->release(); } delete freeDt; } template cl_int CommandQueueHw::enqueueSVMMap(cl_bool blockingMap, cl_map_flags mapFlags, void *svmPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool externalAppCall) { auto svmData = context->getSVMAllocsManager()->getSVMAlloc(svmPtr); if (svmData == nullptr) { return CL_INVALID_VALUE; } bool blocking = blockingMap == CL_TRUE; if (svmData->gpuAllocation->getAllocationType() == GraphicsAllocation::AllocationType::SVM_ZERO_COPY) { NullSurface s; Surface *surfaces[] = {&s}; if (context->isProvidingPerformanceHints()) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_SVM_MAP_DOESNT_REQUIRE_COPY_DATA, svmPtr); } enqueueHandler(surfaces, blocking, MultiDispatchInfo(), numEventsInWaitList, eventWaitList, event); return CL_SUCCESS; } else { auto svmOperation = context->getSVMAllocsManager()->getSvmMapOperation(svmPtr); if (svmOperation) { NullSurface s; Surface *surfaces[] = {&s}; enqueueHandler(surfaces, blocking, MultiDispatchInfo(), numEventsInWaitList, eventWaitList, event); return CL_SUCCESS; } MultiDispatchInfo dispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, this->getDevice()); BuiltInOwnershipWrapper builtInLock(builder, this->context); GeneralSurface dstSurface(svmData->cpuAllocation); GeneralSurface srcSurface(svmData->gpuAllocation); Surface *surfaces[] = {&dstSurface, &srcSurface}; void *svmBasePtr = svmData->cpuAllocation->getUnderlyingBuffer(); size_t svmOffset = ptrDiff(svmPtr, svmBasePtr); BuiltinOpParams dc; dc.dstPtr = reinterpret_cast(svmData->cpuAllocation->getGpuAddressToPatch()); dc.dstSvmAlloc = svmData->cpuAllocation; dc.dstOffset = {svmOffset, 0, 0}; dc.srcPtr = reinterpret_cast(svmData->gpuAllocation->getGpuAddressToPatch()); dc.srcSvmAlloc = svmData->gpuAllocation; dc.srcOffset = {svmOffset, 0, 0}; dc.size = {size, 0, 0}; dc.unifiedMemoryArgsRequireMemSync = externalAppCall; builder.buildDispatchInfos(dispatchInfo, dc); enqueueHandler( surfaces, blocking, dispatchInfo, numEventsInWaitList, eventWaitList, event); if (event) { castToObjectOrAbort(*event)->setCmdType(CL_COMMAND_SVM_MAP); } bool readOnlyMap = isValueSet(mapFlags, CL_MAP_READ); context->getSVMAllocsManager()->insertSvmMapOperation(svmPtr, size, svmBasePtr, svmOffset, readOnlyMap); dispatchInfo.backupUnifiedMemorySyncRequirement(); return CL_SUCCESS; } } template cl_int CommandQueueHw::enqueueSVMUnmap(void *svmPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool externalAppCall) { auto svmData = context->getSVMAllocsManager()->getSVMAlloc(svmPtr); if (svmData == nullptr) { return CL_INVALID_VALUE; } if (svmData->gpuAllocation->getAllocationType() == GraphicsAllocation::AllocationType::SVM_ZERO_COPY) { NullSurface s; Surface *surfaces[] = {&s}; enqueueHandler(surfaces, false, MultiDispatchInfo(), numEventsInWaitList, eventWaitList, event); return CL_SUCCESS; } else { auto svmOperation = context->getSVMAllocsManager()->getSvmMapOperation(svmPtr); if (!svmOperation) { NullSurface s; Surface *surfaces[] = {&s}; enqueueHandler(surfaces, false, MultiDispatchInfo(), numEventsInWaitList, eventWaitList, event); return CL_SUCCESS; } if (svmOperation->readOnlyMap) { NullSurface s; Surface *surfaces[] = {&s}; enqueueHandler(surfaces, false, MultiDispatchInfo(), numEventsInWaitList, eventWaitList, event); context->getSVMAllocsManager()->removeSvmMapOperation(svmPtr); return CL_SUCCESS; } svmData->gpuAllocation->setAubWritable(true, GraphicsAllocation::defaultBank); svmData->gpuAllocation->setTbxWritable(true, GraphicsAllocation::defaultBank); MultiDispatchInfo dispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, this->getDevice()); BuiltInOwnershipWrapper builtInLock(builder, this->context); GeneralSurface dstSurface(svmData->gpuAllocation); GeneralSurface srcSurface(svmData->cpuAllocation); Surface *surfaces[] = {&dstSurface, &srcSurface}; BuiltinOpParams dc; dc.dstPtr = reinterpret_cast(svmData->gpuAllocation->getGpuAddressToPatch()); dc.dstSvmAlloc = svmData->gpuAllocation; dc.dstOffset = {svmOperation->offset, 0, 0}; dc.srcPtr = reinterpret_cast(svmData->cpuAllocation->getGpuAddressToPatch()); dc.srcSvmAlloc = svmData->cpuAllocation; dc.srcOffset = {svmOperation->offset, 0, 0}; dc.size = {svmOperation->regionSize, 0, 0}; dc.unifiedMemoryArgsRequireMemSync = externalAppCall; builder.buildDispatchInfos(dispatchInfo, dc); enqueueHandler( surfaces, false, dispatchInfo, numEventsInWaitList, eventWaitList, event); if (event) { castToObjectOrAbort(*event)->setCmdType(CL_COMMAND_SVM_UNMAP); } context->getSVMAllocsManager()->removeSvmMapOperation(svmPtr); dispatchInfo.backupUnifiedMemorySyncRequirement(); return CL_SUCCESS; } } template cl_int CommandQueueHw::enqueueSVMFree(cl_uint numSvmPointers, void *svmPointers[], SvmFreeClbT clb, void *userData, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *retEvent) { cl_event event = nullptr; bool ownsEventDeletion = false; if (retEvent == nullptr) { ownsEventDeletion = true; retEvent = &event; } SvmFreeUserData *pFreeData = new SvmFreeUserData(numSvmPointers, svmPointers, clb, userData, ownsEventDeletion); NullSurface s; Surface *surfaces[] = {&s}; enqueueHandler(surfaces, false, MultiDispatchInfo(), numEventsInWaitList, eventWaitList, retEvent); auto eventObject = castToObjectOrAbort(*retEvent); eventObject->addCallback(freeSvmEventClb, CL_COMPLETE, pFreeData); return CL_SUCCESS; } inline void setOperationParams(BuiltinOpParams &operationParams, size_t size, const void *srcPtr, GraphicsAllocation *srcSvmAlloc, void *dstPtr, GraphicsAllocation *dstSvmAlloc) { operationParams.size = {size, 0, 0}; operationParams.srcPtr = const_cast(alignDown(srcPtr, 4)); operationParams.srcSvmAlloc = srcSvmAlloc; operationParams.srcOffset = {ptrDiff(srcPtr, operationParams.srcPtr), 0, 0}; operationParams.dstPtr = alignDown(dstPtr, 4); operationParams.dstSvmAlloc = dstSvmAlloc; operationParams.dstOffset = {ptrDiff(dstPtr, operationParams.dstPtr), 0, 0}; } template cl_int CommandQueueHw::enqueueSVMMemcpy(cl_bool blockingCopy, void *dstPtr, const void *srcPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { if ((dstPtr == nullptr) || (srcPtr == nullptr)) { return CL_INVALID_VALUE; } auto dstSvmData = context->getSVMAllocsManager()->getSVMAlloc(dstPtr); auto srcSvmData = context->getSVMAllocsManager()->getSVMAlloc(srcPtr); enum CopyType { HostToHost, SvmToHost, HostToSvm, SvmToSvm }; CopyType copyType = HostToHost; if ((srcSvmData != nullptr) && (dstSvmData != nullptr)) { copyType = SvmToSvm; } else if ((srcSvmData == nullptr) && (dstSvmData != nullptr)) { copyType = HostToSvm; } else if (srcSvmData != nullptr) { copyType = SvmToHost; } auto pageFaultManager = context->getMemoryManager()->getPageFaultManager(); if (dstSvmData && pageFaultManager) { pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast(dstSvmData->gpuAllocation->getGpuAddress())); } if (srcSvmData && pageFaultManager) { pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast(srcSvmData->gpuAllocation->getGpuAddress())); } auto isStatelessRequired = false; if (srcSvmData != nullptr) { isStatelessRequired = forceStateless(srcSvmData->size); } if (dstSvmData != nullptr) { isStatelessRequired |= forceStateless(dstSvmData->size); } auto builtInType = EBuiltInOps::CopyBufferToBuffer; if (isStatelessRequired) { builtInType = EBuiltInOps::CopyBufferToBufferStateless; } auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, this->getDevice()); BuiltInOwnershipWrapper builtInLock(builder, this->context); MultiDispatchInfo dispatchInfo; BuiltinOpParams operationParams; Surface *surfaces[2]; if (copyType == SvmToHost) { GeneralSurface srcSvmSurf(srcSvmData->gpuAllocation); HostPtrSurface dstHostPtrSurf(dstPtr, size); if (size != 0) { bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(dstHostPtrSurf, true); if (!status) { return CL_OUT_OF_RESOURCES; } dstPtr = reinterpret_cast(dstHostPtrSurf.getAllocation()->getGpuAddress()); } setOperationParams(operationParams, size, srcPtr, srcSvmData->gpuAllocation, dstPtr, dstHostPtrSurf.getAllocation()); surfaces[0] = &srcSvmSurf; surfaces[1] = &dstHostPtrSurf; builder.buildDispatchInfos(dispatchInfo, operationParams); enqueueHandler( surfaces, blockingCopy == CL_TRUE, dispatchInfo, numEventsInWaitList, eventWaitList, event); } else if (copyType == HostToSvm) { HostPtrSurface srcHostPtrSurf(const_cast(srcPtr), size); GeneralSurface dstSvmSurf(dstSvmData->gpuAllocation); if (size != 0) { bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(srcHostPtrSurf, false); if (!status) { return CL_OUT_OF_RESOURCES; } srcPtr = reinterpret_cast(srcHostPtrSurf.getAllocation()->getGpuAddress()); } setOperationParams(operationParams, size, srcPtr, srcHostPtrSurf.getAllocation(), dstPtr, dstSvmData->gpuAllocation); surfaces[0] = &dstSvmSurf; surfaces[1] = &srcHostPtrSurf; builder.buildDispatchInfos(dispatchInfo, operationParams); enqueueHandler( surfaces, blockingCopy == CL_TRUE, dispatchInfo, numEventsInWaitList, eventWaitList, event); } else if (copyType == SvmToSvm) { GeneralSurface srcSvmSurf(srcSvmData->gpuAllocation); GeneralSurface dstSvmSurf(dstSvmData->gpuAllocation); setOperationParams(operationParams, size, srcPtr, srcSvmData->gpuAllocation, dstPtr, dstSvmData->gpuAllocation); surfaces[0] = &srcSvmSurf; surfaces[1] = &dstSvmSurf; builder.buildDispatchInfos(dispatchInfo, operationParams); enqueueHandler( surfaces, blockingCopy ? true : false, dispatchInfo, numEventsInWaitList, eventWaitList, event); } else { HostPtrSurface srcHostPtrSurf(const_cast(srcPtr), size); HostPtrSurface dstHostPtrSurf(dstPtr, size); if (size != 0) { bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(srcHostPtrSurf, false); status &= getGpgpuCommandStreamReceiver().createAllocationForHostSurface(dstHostPtrSurf, true); if (!status) { return CL_OUT_OF_RESOURCES; } srcPtr = reinterpret_cast(srcHostPtrSurf.getAllocation()->getGpuAddress()); dstPtr = reinterpret_cast(dstHostPtrSurf.getAllocation()->getGpuAddress()); } setOperationParams(operationParams, size, srcPtr, srcHostPtrSurf.getAllocation(), dstPtr, dstHostPtrSurf.getAllocation()); surfaces[0] = &srcHostPtrSurf; surfaces[1] = &dstHostPtrSurf; builder.buildDispatchInfos(dispatchInfo, operationParams); enqueueHandler( surfaces, blockingCopy ? true : false, dispatchInfo, numEventsInWaitList, eventWaitList, event); } if (event) { auto pEvent = castToObjectOrAbort(*event); pEvent->setCmdType(CL_COMMAND_SVM_MEMCPY); } return CL_SUCCESS; } template cl_int CommandQueueHw::enqueueSVMMemFill(void *svmPtr, const void *pattern, size_t patternSize, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { auto svmData = context->getSVMAllocsManager()->getSVMAlloc(svmPtr); if (svmData == nullptr) { return CL_INVALID_VALUE; } auto memoryManager = context->getMemoryManager(); DEBUG_BREAK_IF(nullptr == memoryManager); auto pageFaultManager = memoryManager->getPageFaultManager(); if (pageFaultManager) { pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast(svmData->gpuAllocation->getGpuAddress())); } auto commandStreamReceieverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership(); auto storageWithAllocations = getGpgpuCommandStreamReceiver().getInternalAllocationStorage(); auto allocationType = GraphicsAllocation::AllocationType::FILL_PATTERN; auto patternAllocation = storageWithAllocations->obtainReusableAllocation(patternSize, allocationType).release(); commandStreamReceieverOwnership.unlock(); if (!patternAllocation) { patternAllocation = memoryManager->allocateGraphicsMemoryWithProperties({getDevice().getRootDeviceIndex(), patternSize, allocationType}); } if (patternSize == 1) { int patternInt = (uint32_t)((*(uint8_t *)pattern << 24) | (*(uint8_t *)pattern << 16) | (*(uint8_t *)pattern << 8) | *(uint8_t *)pattern); memcpy_s(patternAllocation->getUnderlyingBuffer(), sizeof(int), &patternInt, sizeof(int)); } else if (patternSize == 2) { int patternInt = (uint32_t)((*(uint16_t *)pattern << 16) | *(uint16_t *)pattern); memcpy_s(patternAllocation->getUnderlyingBuffer(), sizeof(int), &patternInt, sizeof(int)); } else { memcpy_s(patternAllocation->getUnderlyingBuffer(), patternSize, pattern, patternSize); } auto builtInType = EBuiltInOps::FillBuffer; if (forceStateless(svmData->size)) { builtInType = EBuiltInOps::FillBufferStateless; } auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInType, this->getDevice()); BuiltInOwnershipWrapper builtInLock(builder, this->context); BuiltinOpParams operationParams; MemObj patternMemObj(this->context, 0, {}, 0, 0, alignUp(patternSize, 4), patternAllocation->getUnderlyingBuffer(), patternAllocation->getUnderlyingBuffer(), patternAllocation, false, false, true); void *alignedDstPtr = alignDown(svmPtr, 4); size_t dstPtrOffset = ptrDiff(svmPtr, alignedDstPtr); operationParams.srcMemObj = &patternMemObj; operationParams.dstPtr = alignedDstPtr; operationParams.dstSvmAlloc = svmData->gpuAllocation; operationParams.dstOffset = {dstPtrOffset, 0, 0}; operationParams.size = {size, 0, 0}; MultiDispatchInfo dispatchInfo; builder.buildDispatchInfos(dispatchInfo, operationParams); GeneralSurface s1(svmData->gpuAllocation); GeneralSurface s2(patternAllocation); Surface *surfaces[] = {&s1, &s2}; enqueueHandler( surfaces, false, dispatchInfo, numEventsInWaitList, eventWaitList, event); storageWithAllocations->storeAllocationWithTaskCount(std::unique_ptr(patternAllocation), REUSABLE_ALLOCATION, taskCount); return CL_SUCCESS; } template cl_int CommandQueueHw::enqueueSVMMigrateMem(cl_uint numSvmPointers, const void **svmPointers, const size_t *sizes, const cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { NullSurface s; Surface *surfaces[] = {&s}; enqueueHandler(surfaces, false, MultiDispatchInfo(), numEventsInWaitList, eventWaitList, event); return CL_SUCCESS; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/enqueue_write_buffer.h000066400000000000000000000121451363734646600273450ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include namespace NEO { template cl_int CommandQueueHw::enqueueWriteBuffer( Buffer *buffer, cl_bool blockingWrite, size_t offset, size_t size, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { const cl_command_type cmdType = CL_COMMAND_WRITE_BUFFER; auto isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, cmdType) : true; bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, cmdType, blockingWrite, size, const_cast(ptr), numEventsInWaitList, eventWaitList); //check if we are dealing with SVM pointer here for which we already have an allocation if (!mapAllocation && this->getContext().getSVMAllocsManager()) { auto svmEntry = this->getContext().getSVMAllocsManager()->getSVMAlloc(ptr); if (svmEntry) { if ((svmEntry->gpuAllocation->getGpuAddress() + svmEntry->size) < (castToUint64(ptr) + size)) { return CL_INVALID_OPERATION; } if (isCpuCopyAllowed) { if (svmEntry->memoryType == DEVICE_UNIFIED_MEMORY) { isCpuCopyAllowed = false; } } mapAllocation = svmEntry->cpuAllocation ? svmEntry->cpuAllocation : svmEntry->gpuAllocation; } } if (isCpuCopyAllowed) { if (isMemTransferNeeded) { return enqueueReadWriteBufferOnCpuWithMemoryTransfer(cmdType, buffer, offset, size, const_cast(ptr), numEventsInWaitList, eventWaitList, event); } else { return enqueueReadWriteBufferOnCpuWithoutMemoryTransfer(cmdType, buffer, offset, size, const_cast(ptr), numEventsInWaitList, eventWaitList, event); } } else if (!isMemTransferNeeded) { return enqueueMarkerForReadWriteOperation(buffer, const_cast(ptr), cmdType, blockingWrite, numEventsInWaitList, eventWaitList, event); } auto eBuiltInOps = EBuiltInOps::CopyBufferToBuffer; if (forceStateless(buffer->getSize())) { eBuiltInOps = EBuiltInOps::CopyBufferToBufferStateless; } auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(eBuiltInOps, this->getDevice()); BuiltInOwnershipWrapper builtInLock(builder, this->context); void *srcPtr = const_cast(ptr); HostPtrSurface hostPtrSurf(srcPtr, size, true); MemObjSurface bufferSurf(buffer); GeneralSurface mapSurface; Surface *surfaces[] = {&bufferSurf, nullptr}; if (mapAllocation) { surfaces[1] = &mapSurface; mapSurface.setGraphicsAllocation(mapAllocation); //get offset between base cpu ptr of map allocation and dst ptr size_t srcOffset = ptrDiff(srcPtr, mapAllocation->getUnderlyingBuffer()); srcPtr = reinterpret_cast(mapAllocation->getGpuAddress() + srcOffset); } else { surfaces[1] = &hostPtrSurf; if (size != 0) { auto &csr = blitEnqueueAllowed(cmdType) ? *getBcsCommandStreamReceiver() : getGpgpuCommandStreamReceiver(); bool status = csr.createAllocationForHostSurface(hostPtrSurf, false); if (!status) { return CL_OUT_OF_RESOURCES; } srcPtr = reinterpret_cast(hostPtrSurf.getAllocation()->getGpuAddress()); } } void *alignedSrcPtr = alignDown(srcPtr, 4); size_t srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr); BuiltinOpParams dc; dc.srcPtr = alignedSrcPtr; dc.srcOffset = {srcPtrOffset, 0, 0}; dc.dstMemObj = buffer; dc.dstOffset = {offset, 0, 0}; dc.size = {size, 0, 0}; dc.transferAllocation = mapAllocation ? mapAllocation : hostPtrSurf.getAllocation(); MultiDispatchInfo dispatchInfo; builder.buildDispatchInfos(dispatchInfo, dc); enqueueHandler( surfaces, blockingWrite == CL_TRUE, dispatchInfo, numEventsInWaitList, eventWaitList, event); if (context->isProvidingPerformanceHints()) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA, static_cast(buffer)); } return CL_SUCCESS; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/enqueue_write_buffer_rect.h000066400000000000000000000071621363734646600303650ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include namespace NEO { template cl_int CommandQueueHw::enqueueWriteBufferRect( Buffer *buffer, cl_bool blockingWrite, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, const void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { auto isMemTransferNeeded = true; if (buffer->isMemObjZeroCopy()) { size_t bufferOffset; size_t hostOffset; computeOffsetsValueForRectCommands(&bufferOffset, &hostOffset, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch); isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(bufferOffset, hostOffset, ptr, CL_COMMAND_WRITE_BUFFER_RECT); } if (!isMemTransferNeeded) { return enqueueMarkerForReadWriteOperation(buffer, const_cast(ptr), CL_COMMAND_WRITE_BUFFER_RECT, blockingWrite, numEventsInWaitList, eventWaitList, event); } auto eBuiltInOps = EBuiltInOps::CopyBufferRect; if (forceStateless(buffer->getSize())) { eBuiltInOps = EBuiltInOps::CopyBufferRectStateless; } auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(eBuiltInOps, this->getDevice()); BuiltInOwnershipWrapper builtInLock(builder, this->context); size_t hostPtrSize = Buffer::calculateHostPtrSize(hostOrigin, region, hostRowPitch, hostSlicePitch); void *srcPtr = const_cast(ptr); MemObjSurface dstBufferSurf(buffer); HostPtrSurface hostPtrSurf(srcPtr, hostPtrSize, true); Surface *surfaces[] = {&dstBufferSurf, &hostPtrSurf}; if (region[0] != 0 && region[1] != 0 && region[2] != 0) { bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, false); if (!status) { return CL_OUT_OF_RESOURCES; } srcPtr = reinterpret_cast(hostPtrSurf.getAllocation()->getGpuAddress()); } void *alignedSrcPtr = alignDown(srcPtr, 4); size_t srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr); BuiltinOpParams dc; dc.srcPtr = alignedSrcPtr; dc.dstMemObj = buffer; dc.srcOffset = hostOrigin; dc.srcOffset.x += srcPtrOffset; dc.dstOffset = bufferOrigin; dc.size = region; dc.srcRowPitch = hostRowPitch; dc.srcSlicePitch = hostSlicePitch; dc.dstRowPitch = bufferRowPitch; dc.dstSlicePitch = bufferSlicePitch; MultiDispatchInfo dispatchInfo; builder.buildDispatchInfos(dispatchInfo, dc); enqueueHandler( surfaces, blockingWrite == CL_TRUE, dispatchInfo, numEventsInWaitList, eventWaitList, event); if (context->isProvidingPerformanceHints()) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA, static_cast(buffer)); } return CL_SUCCESS; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/enqueue_write_image.h000066400000000000000000000101371363734646600271550ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/mipmap.h" #include "opencl/source/mem_obj/image.h" #include #include namespace NEO { template cl_int CommandQueueHw::enqueueWriteImage( Image *dstImage, cl_bool blockingWrite, const size_t *origin, const size_t *region, size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { MultiDispatchInfo di; auto isMemTransferNeeded = true; if (dstImage->isMemObjZeroCopy()) { size_t hostOffset; Image::calculateHostPtrOffset(&hostOffset, origin, region, inputRowPitch, inputSlicePitch, dstImage->getImageDesc().image_type, dstImage->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes); isMemTransferNeeded = dstImage->checkIfMemoryTransferIsRequired(hostOffset, 0, ptr, CL_COMMAND_WRITE_IMAGE); } if (!isMemTransferNeeded) { return enqueueMarkerForReadWriteOperation(dstImage, const_cast(ptr), CL_COMMAND_WRITE_IMAGE, blockingWrite, numEventsInWaitList, eventWaitList, event); } auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToImage3d, this->getDevice()); BuiltInOwnershipWrapper lock(builder, this->context); size_t hostPtrSize = calculateHostPtrSizeForImage(region, inputRowPitch, inputSlicePitch, dstImage); void *srcPtr = const_cast(ptr); MemObjSurface dstImgSurf(dstImage); HostPtrSurface hostPtrSurf(srcPtr, hostPtrSize, true); GeneralSurface mapSurface; Surface *surfaces[] = {&dstImgSurf, nullptr}; if (mapAllocation) { surfaces[1] = &mapSurface; mapSurface.setGraphicsAllocation(mapAllocation); //get offset between base cpu ptr of map allocation and dst ptr size_t srcOffset = ptrDiff(srcPtr, mapAllocation->getUnderlyingBuffer()); srcPtr = reinterpret_cast(mapAllocation->getGpuAddress() + srcOffset); } else { surfaces[1] = &hostPtrSurf; if (region[0] != 0 && region[1] != 0 && region[2] != 0) { bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, false); if (!status) { return CL_OUT_OF_RESOURCES; } srcPtr = reinterpret_cast(hostPtrSurf.getAllocation()->getGpuAddress()); } } void *alignedSrcPtr = alignDown(srcPtr, 4); size_t srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr); BuiltinOpParams dc; dc.srcPtr = alignedSrcPtr; dc.srcOffset.x = srcPtrOffset; dc.dstMemObj = dstImage; dc.dstOffset = origin; dc.size = region; dc.dstRowPitch = ((dstImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (inputSlicePitch > inputRowPitch)) ? inputSlicePitch : inputRowPitch; dc.dstSlicePitch = inputSlicePitch; if (dstImage->getImageDesc().num_mip_levels > 0) { dc.dstMipLevel = findMipLevel(dstImage->getImageDesc().image_type, origin); } builder.buildDispatchInfos(di, dc); enqueueHandler( surfaces, blockingWrite == CL_TRUE, di, numEventsInWaitList, eventWaitList, event); if (context->isProvidingPerformanceHints()) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA, static_cast(dstImage)); } return CL_SUCCESS; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/finish.h000066400000000000000000000014661363734646600244170ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "opencl/source/command_queue/command_queue_hw.h" namespace NEO { template cl_int CommandQueueHw::finish() { auto result = getGpgpuCommandStreamReceiver().flushBatchedSubmissions(); if (!result) { return CL_OUT_OF_RESOURCES; } //as long as queue is blocked we need to stall. while (isQueueBlocked()) ; auto taskCountToWaitFor = this->taskCount; auto flushStampToWaitFor = this->flushStamp->peekStamp(); // Stall until HW reaches CQ taskCount waitUntilComplete(taskCountToWaitFor, flushStampToWaitFor, false); return CL_SUCCESS; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/flush.h000066400000000000000000000005011363734646600242450ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once namespace NEO { template cl_int CommandQueueHw::flush() { return getGpgpuCommandStreamReceiver().flushBatchedSubmissions() ? CL_SUCCESS : CL_OUT_OF_RESOURCES; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/gpgpu_walker.h000066400000000000000000000177131363734646600256300ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/helpers/register_offsets.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/helpers/vec.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/utilities/tag_allocator.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/source/event/hw_timestamps.h" #include "opencl/source/event/perf_counter.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/task_information.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/program/kernel_info.h" namespace NEO { struct RootDeviceEnvironment; template using WALKER_TYPE = typename GfxFamily::WALKER_TYPE; template using MI_STORE_REG_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM_CMD; void computeWorkgroupSize1D( uint32_t maxWorkGroupSize, size_t workGroupSize[3], const size_t workItems[3], size_t simdSize); void computeWorkgroupSizeND( WorkSizeInfo wsInfo, size_t workGroupSize[3], const size_t workItems[3], const uint32_t workDim); void computeWorkgroupSize2D( uint32_t maxWorkGroupSize, size_t workGroupSize[3], const size_t workItems[3], size_t simdSize); void computeWorkgroupSizeSquared( uint32_t maxWorkGroupSize, size_t workGroupSize[3], const size_t workItems[3], size_t simdSize, const uint32_t workDim); Vec3 computeWorkgroupSize( const DispatchInfo &dispatchInfo); Vec3 generateWorkgroupSize( const DispatchInfo &dispatchInfo); Vec3 computeWorkgroupsNumber( const Vec3 gws, const Vec3 lws); Vec3 generateWorkgroupsNumber( const Vec3 gws, const Vec3 lws); Vec3 generateWorkgroupsNumber( const DispatchInfo &dispatchInfo); inline uint32_t calculateDispatchDim(Vec3 dispatchSize, Vec3 dispatchOffset) { return std::max(1U, std::max(dispatchSize.getSimplifiedDim(), dispatchOffset.getSimplifiedDim())); } Vec3 canonizeWorkgroup( Vec3 workgroup); void provideLocalWorkGroupSizeHints(Context *context, DispatchInfo dispatchInfo); inline cl_uint computeDimensions(const size_t workItems[3]) { return (workItems[2] > 1) ? 3 : (workItems[1] > 1) ? 2 : 1; } template class GpgpuWalkerHelper { public: static void applyWADisableLSQCROPERFforOCL(LinearStream *pCommandStream, const Kernel &kernel, bool disablePerfMode); static size_t getSizeForWADisableLSQCROPERFforOCL(const Kernel *pKernel); static size_t setGpgpuWalkerThreadData( WALKER_TYPE *walkerCmd, const size_t globalOffsets[3], const size_t startWorkGroups[3], const size_t numWorkGroups[3], const size_t localWorkSizesIn[3], uint32_t simd, uint32_t workDim, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, const iOpenCL::SPatchThreadPayload &threadPayload, uint32_t requiredWorkgroupOrder); static void dispatchProfilingCommandsStart( TagNode &hwTimeStamps, LinearStream *commandStream, const HardwareInfo &hwInfo); static void dispatchProfilingCommandsEnd( TagNode &hwTimeStamps, LinearStream *commandStream); static void dispatchPerfCountersCommandsStart( CommandQueue &commandQueue, TagNode &hwPerfCounter, LinearStream *commandStream); static void dispatchPerfCountersCommandsEnd( CommandQueue &commandQueue, TagNode &hwPerfCounter, LinearStream *commandStream); static void setupTimestampPacket( LinearStream *cmdStream, WALKER_TYPE *walkerCmd, TagNode *timestampPacketNode, TimestampPacketStorage::WriteOperationType writeOperationType, const RootDeviceEnvironment &rootDeviceEnvironment); static void dispatchScheduler( LinearStream &commandStream, DeviceQueueHw &devQueueHw, PreemptionMode preemptionMode, SchedulerKernel &scheduler, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed); static void adjustMiStoreRegMemMode(MI_STORE_REG_MEM *storeCmd); private: using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; static void addAluReadModifyWriteRegister( LinearStream *pCommandStream, uint32_t aluRegister, AluRegisters operation, uint32_t mask); }; template struct EnqueueOperation { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; static size_t getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo); static size_t getSizeRequiredCS(uint32_t cmdType, bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel); static size_t getSizeRequiredForTimestampPacketWrite(); private: static size_t getSizeRequiredCSKernel(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel); static size_t getSizeRequiredCSNonKernel(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue); }; template LinearStream &getCommandStream(CommandQueue &commandQueue, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounterCmdsSpace, bool blitEnqueue, const MultiDispatchInfo &multiDispatchInfo, Surface **surfaces, size_t numSurfaces) { size_t expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(eventType, csrDeps, reserveProfilingCmdsSpace, reservePerfCounterCmdsSpace, blitEnqueue, commandQueue, multiDispatchInfo); return commandQueue.getCS(expectedSizeCS); } template IndirectHeap &getIndirectHeap(CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo) { size_t expectedSize = 0; IndirectHeap *ih = nullptr; // clang-format off switch (heapType) { case IndirectHeap::DYNAMIC_STATE: expectedSize = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); break; case IndirectHeap::INDIRECT_OBJECT: expectedSize = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); break; case IndirectHeap::SURFACE_STATE: expectedSize = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); break; } // clang-format on if (Kernel *parentKernel = multiDispatchInfo.peekParentKernel()) { if (heapType == IndirectHeap::SURFACE_STATE) { expectedSize += HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); } else //if (heapType == IndirectHeap::DYNAMIC_STATE || heapType == IndirectHeap::INDIRECT_OBJECT) { DeviceQueueHw *pDevQueue = castToObject>(commandQueue.getContext().getDefaultDeviceQueue()); DEBUG_BREAK_IF(pDevQueue == nullptr); ih = pDevQueue->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); } } if (ih == nullptr) ih = &commandQueue.getIndirectHeap(heapType, expectedSize); return *ih; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/gpgpu_walker_base.inl000066400000000000000000000300311363734646600271410ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/utilities/tag_allocator.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/command_queue/local_id_gen.h" #include "opencl/source/event/perf_counter.h" #include "opencl/source/event/user_event.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/queue_helpers.h" #include "opencl/source/helpers/validators.h" #include "opencl/source/mem_obj/mem_obj.h" #include #include namespace NEO { // Performs ReadModifyWrite operation on value of a register: Register = Register Operation Mask template void GpgpuWalkerHelper::addAluReadModifyWriteRegister( LinearStream *pCommandStream, uint32_t aluRegister, AluRegisters operation, uint32_t mask) { // Load "Register" value into CS_GPR_R0 typedef typename GfxFamily::MI_LOAD_REGISTER_REG MI_LOAD_REGISTER_REG; typedef typename GfxFamily::MI_MATH MI_MATH; typedef typename GfxFamily::MI_MATH_ALU_INST_INLINE MI_MATH_ALU_INST_INLINE; auto pCmd = pCommandStream->getSpaceForCmd(); *pCmd = GfxFamily::cmdInitLoadRegisterReg; pCmd->setSourceRegisterAddress(aluRegister); pCmd->setDestinationRegisterAddress(CS_GPR_R0); // Load "Mask" into CS_GPR_R1 typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; auto pCmd2 = pCommandStream->getSpaceForCmd(); *pCmd2 = GfxFamily::cmdInitLoadRegisterImm; pCmd2->setRegisterOffset(CS_GPR_R1); pCmd2->setDataDword(mask); // Add instruction MI_MATH with 4 MI_MATH_ALU_INST_INLINE operands auto pCmd3 = reinterpret_cast(pCommandStream->getSpace(sizeof(MI_MATH) + NUM_ALU_INST_FOR_READ_MODIFY_WRITE * sizeof(MI_MATH_ALU_INST_INLINE))); reinterpret_cast(pCmd3)->DW0.Value = 0x0; reinterpret_cast(pCmd3)->DW0.BitField.InstructionType = MI_MATH::COMMAND_TYPE_MI_COMMAND; reinterpret_cast(pCmd3)->DW0.BitField.InstructionOpcode = MI_MATH::MI_COMMAND_OPCODE_MI_MATH; // 0x3 - 5 Dwords length cmd (-2): 1 for MI_MATH, 4 for MI_MATH_ALU_INST_INLINE reinterpret_cast(pCmd3)->DW0.BitField.DwordLength = NUM_ALU_INST_FOR_READ_MODIFY_WRITE - 1; pCmd3++; MI_MATH_ALU_INST_INLINE *pAluParam = reinterpret_cast(pCmd3); // Setup first operand of MI_MATH - load CS_GPR_R0 into register A pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_LOAD); pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_SRCA); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_0); pAluParam++; // Setup second operand of MI_MATH - load CS_GPR_R1 into register B pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_LOAD); pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_SRCB); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_1); pAluParam++; // Setup third operand of MI_MATH - "Operation" on registers A and B pAluParam->DW0.BitField.ALUOpcode = static_cast(operation); pAluParam->DW0.BitField.Operand1 = 0; pAluParam->DW0.BitField.Operand2 = 0; pAluParam++; // Setup fourth operand of MI_MATH - store result into CS_GPR_R0 pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_STORE); pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_0); pAluParam->DW0.BitField.Operand2 = static_cast(AluRegisters::R_ACCU); // LOAD value of CS_GPR_R0 into "Register" auto pCmd4 = pCommandStream->getSpaceForCmd(); *pCmd4 = GfxFamily::cmdInitLoadRegisterReg; pCmd4->setSourceRegisterAddress(CS_GPR_R0); pCmd4->setDestinationRegisterAddress(aluRegister); // Add PIPE_CONTROL to flush caches auto pCmd5 = pCommandStream->getSpaceForCmd(); *pCmd5 = GfxFamily::cmdInitPipeControl; pCmd5->setCommandStreamerStallEnable(true); pCmd5->setDcFlushEnable(true); pCmd5->setTextureCacheInvalidationEnable(true); pCmd5->setPipeControlFlushEnable(true); pCmd5->setStateCacheInvalidationEnable(true); } template void GpgpuWalkerHelper::dispatchProfilingCommandsStart( TagNode &hwTimeStamps, LinearStream *commandStream, const HardwareInfo &hwInfo) { using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; // PIPE_CONTROL for global timestamp uint64_t timeStampAddress = hwTimeStamps.getGpuAddress() + offsetof(HwTimeStamps, GlobalStartTS); MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( *commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, timeStampAddress, 0llu, false, hwInfo); //MI_STORE_REGISTER_MEM for context local timestamp timeStampAddress = hwTimeStamps.getGpuAddress() + offsetof(HwTimeStamps, ContextStartTS); //low part auto pMICmdLow = commandStream->getSpaceForCmd(); *pMICmdLow = GfxFamily::cmdInitStoreRegisterMem; adjustMiStoreRegMemMode(pMICmdLow); pMICmdLow->setRegisterAddress(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); pMICmdLow->setMemoryAddress(timeStampAddress); } template void GpgpuWalkerHelper::dispatchProfilingCommandsEnd( TagNode &hwTimeStamps, LinearStream *commandStream) { using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; // PIPE_CONTROL for global timestamp auto pPipeControlCmd = commandStream->getSpaceForCmd(); *pPipeControlCmd = GfxFamily::cmdInitPipeControl; pPipeControlCmd->setCommandStreamerStallEnable(true); //MI_STORE_REGISTER_MEM for context local timestamp uint64_t timeStampAddress = hwTimeStamps.getGpuAddress() + offsetof(HwTimeStamps, ContextEndTS); //low part auto pMICmdLow = commandStream->getSpaceForCmd(); *pMICmdLow = GfxFamily::cmdInitStoreRegisterMem; adjustMiStoreRegMemMode(pMICmdLow); pMICmdLow->setRegisterAddress(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); pMICmdLow->setMemoryAddress(timeStampAddress); } template void GpgpuWalkerHelper::dispatchPerfCountersCommandsStart( CommandQueue &commandQueue, TagNode &hwPerfCounter, LinearStream *commandStream) { const auto pPerformanceCounters = commandQueue.getPerfCounters(); const auto commandBufferType = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType()) ? MetricsLibraryApi::GpuCommandBufferType::Compute : MetricsLibraryApi::GpuCommandBufferType::Render; const uint32_t size = pPerformanceCounters->getGpuCommandsSize(commandBufferType, true); void *pBuffer = commandStream->getSpace(size); pPerformanceCounters->getGpuCommands(commandBufferType, hwPerfCounter, true, size, pBuffer); } template void GpgpuWalkerHelper::dispatchPerfCountersCommandsEnd( CommandQueue &commandQueue, TagNode &hwPerfCounter, LinearStream *commandStream) { const auto pPerformanceCounters = commandQueue.getPerfCounters(); const auto commandBufferType = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType()) ? MetricsLibraryApi::GpuCommandBufferType::Compute : MetricsLibraryApi::GpuCommandBufferType::Render; const uint32_t size = pPerformanceCounters->getGpuCommandsSize(commandBufferType, false); void *pBuffer = commandStream->getSpace(size); pPerformanceCounters->getGpuCommands(commandBufferType, hwPerfCounter, false, size, pBuffer); } template void GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(NEO::LinearStream *pCommandStream, const Kernel &kernel, bool disablePerfMode) { } template size_t GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(const Kernel *pKernel) { return (size_t)0; } template void GpgpuWalkerHelper::adjustMiStoreRegMemMode(MI_STORE_REG_MEM *storeCmd) { } template size_t EnqueueOperation::getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo) { size_t expectedSizeCS = 0; auto &hwInfo = commandQueue.getDevice().getHardwareInfo(); auto &commandQueueHw = static_cast &>(commandQueue); if (blitEnqueue) { size_t expectedSizeCS = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue(); if (commandQueueHw.isCacheFlushForBcsRequired()) { expectedSizeCS += MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); } return expectedSizeCS; } Kernel *parentKernel = multiDispatchInfo.peekParentKernel(); for (auto &dispatchInfo : multiDispatchInfo) { expectedSizeCS += EnqueueOperation::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, dispatchInfo.getKernel()); size_t memObjAuxCount = multiDispatchInfo.getMemObjsForAuxTranslation() != nullptr ? multiDispatchInfo.getMemObjsForAuxTranslation()->size() : 0; expectedSizeCS += dispatchInfo.dispatchInitCommands.estimateCommandsSize(memObjAuxCount, hwInfo, commandQueueHw.isCacheFlushForBcsRequired()); expectedSizeCS += dispatchInfo.dispatchEpilogueCommands.estimateCommandsSize(memObjAuxCount, hwInfo, commandQueueHw.isCacheFlushForBcsRequired()); } if (parentKernel) { SchedulerKernel &scheduler = commandQueue.getContext().getSchedulerKernel(); expectedSizeCS += EnqueueOperation::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, &scheduler); } if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSize(csrDeps); expectedSizeCS += EnqueueOperation::getSizeRequiredForTimestampPacketWrite(); } return expectedSizeCS; } template size_t EnqueueOperation::getSizeRequiredCS(uint32_t cmdType, bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel) { if (isCommandWithoutKernel(cmdType)) { return EnqueueOperation::getSizeRequiredCSNonKernel(reserveProfilingCmdsSpace, reservePerfCounters, commandQueue); } else { return EnqueueOperation::getSizeRequiredCSKernel(reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, pKernel); } } template size_t EnqueueOperation::getSizeRequiredCSNonKernel(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue) { size_t size = 0; if (reserveProfilingCmdsSpace) { size += 2 * sizeof(PIPE_CONTROL) + 4 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM); } return size; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl000066400000000000000000000231001363734646600300450ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/simd_helper.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/gpgpu_walker_base.inl" namespace NEO { template inline size_t GpgpuWalkerHelper::setGpgpuWalkerThreadData( WALKER_TYPE *walkerCmd, const size_t globalOffsets[3], const size_t startWorkGroups[3], const size_t numWorkGroups[3], const size_t localWorkSizesIn[3], uint32_t simd, uint32_t workDim, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, const iOpenCL::SPatchThreadPayload &threadPayload, uint32_t requiredWorkgroupOrder) { auto localWorkSize = localWorkSizesIn[0] * localWorkSizesIn[1] * localWorkSizesIn[2]; auto threadsPerWorkGroup = getThreadsPerWG(simd, localWorkSize); walkerCmd->setThreadWidthCounterMaximum(static_cast(threadsPerWorkGroup)); walkerCmd->setThreadGroupIdXDimension(static_cast(numWorkGroups[0])); walkerCmd->setThreadGroupIdYDimension(static_cast(numWorkGroups[1])); walkerCmd->setThreadGroupIdZDimension(static_cast(numWorkGroups[2])); // compute executionMask - to tell which SIMD lines are active within thread auto remainderSimdLanes = localWorkSize & (simd - 1); uint64_t executionMask = maxNBitValue(remainderSimdLanes); if (!executionMask) executionMask = ~executionMask; using SIMD_SIZE = typename WALKER_TYPE::SIMD_SIZE; walkerCmd->setRightExecutionMask(static_cast(executionMask)); walkerCmd->setBottomExecutionMask(static_cast(0xffffffff)); walkerCmd->setSimdSize(getSimdConfig>(simd)); walkerCmd->setThreadGroupIdStartingX(static_cast(startWorkGroups[0])); walkerCmd->setThreadGroupIdStartingY(static_cast(startWorkGroups[1])); walkerCmd->setThreadGroupIdStartingResumeZ(static_cast(startWorkGroups[2])); return localWorkSize; } template void GpgpuWalkerHelper::dispatchScheduler( LinearStream &commandStream, DeviceQueueHw &devQueueHw, PreemptionMode preemptionMode, SchedulerKernel &scheduler, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed) { using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER; using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; bool dcFlush = false; MemorySynchronizationCommands::addPipeControl(commandStream, dcFlush); uint32_t interfaceDescriptorIndex = devQueueHw.schedulerIDIndex; const size_t offsetInterfaceDescriptorTable = devQueueHw.colorCalcStateSize; const size_t offsetInterfaceDescriptor = offsetInterfaceDescriptorTable; const size_t totalInterfaceDescriptorTableSize = devQueueHw.interfaceDescriptorEntries * sizeof(INTERFACE_DESCRIPTOR_DATA); // Program media interface descriptor load HardwareCommandsHelper::sendMediaInterfaceDescriptorLoad( commandStream, offsetInterfaceDescriptor, totalInterfaceDescriptorTableSize); DEBUG_BREAK_IF(offsetInterfaceDescriptorTable % 64 != 0); // Determine SIMD size uint32_t simd = scheduler.getKernelInfo().getMaxSimdSize(); DEBUG_BREAK_IF(simd != PARALLEL_SCHEDULER_COMPILATION_SIZE_20); // Patch our kernel constants *scheduler.globalWorkOffsetX = 0; *scheduler.globalWorkOffsetY = 0; *scheduler.globalWorkOffsetZ = 0; *scheduler.globalWorkSizeX = (uint32_t)scheduler.getGws(); *scheduler.globalWorkSizeY = 1; *scheduler.globalWorkSizeZ = 1; *scheduler.localWorkSizeX = (uint32_t)scheduler.getLws(); *scheduler.localWorkSizeY = 1; *scheduler.localWorkSizeZ = 1; *scheduler.localWorkSizeX2 = (uint32_t)scheduler.getLws(); *scheduler.localWorkSizeY2 = 1; *scheduler.localWorkSizeZ2 = 1; *scheduler.enqueuedLocalWorkSizeX = (uint32_t)scheduler.getLws(); *scheduler.enqueuedLocalWorkSizeY = 1; *scheduler.enqueuedLocalWorkSizeZ = 1; *scheduler.numWorkGroupsX = (uint32_t)(scheduler.getGws() / scheduler.getLws()); *scheduler.numWorkGroupsY = 0; *scheduler.numWorkGroupsZ = 0; *scheduler.workDim = 1; // Send our indirect object data size_t localWorkSizes[3] = {scheduler.getLws(), 1, 1}; // Create indirectHeap for IOH that is located at the end of device enqueue DSH size_t curbeOffset = devQueueHw.setSchedulerCrossThreadData(scheduler); IndirectHeap indirectObjectHeap(dsh->getCpuBase(), dsh->getMaxAvailableSpace()); indirectObjectHeap.getSpace(curbeOffset); IndirectHeap *ioh = &indirectObjectHeap; // Program the walker. Invokes execution so all state should already be programmed auto pGpGpuWalkerCmd = static_cast(commandStream.getSpace(sizeof(GPGPU_WALKER))); *pGpGpuWalkerCmd = GfxFamily::cmdInitGpgpuWalker; bool inlineDataProgrammingRequired = HardwareCommandsHelper::inlineDataProgrammingRequired(scheduler); auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(scheduler); HardwareCommandsHelper::sendIndirectState( commandStream, *dsh, *ioh, *ssh, scheduler, scheduler.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), simd, localWorkSizes, offsetInterfaceDescriptorTable, interfaceDescriptorIndex, preemptionMode, pGpGpuWalkerCmd, nullptr, true); // Implement enabling special WA DisableLSQCROPERFforOCL if needed GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(&commandStream, scheduler, true); size_t globalOffsets[3] = {0, 0, 0}; size_t workGroups[3] = {(scheduler.getGws() / scheduler.getLws()), 1, 1}; GpgpuWalkerHelper::setGpgpuWalkerThreadData(pGpGpuWalkerCmd, globalOffsets, globalOffsets, workGroups, localWorkSizes, simd, 1, true, inlineDataProgrammingRequired, *scheduler.getKernelInfo().patchInfo.threadPayload, 0u); // Implement disabling special WA DisableLSQCROPERFforOCL if needed GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(&commandStream, scheduler, false); // Do not put BB_START only when returning in first Scheduler run if (devQueueHw.getSchedulerReturnInstance() != 1) { MemorySynchronizationCommands::addPipeControl(commandStream, true); // Add BB Start Cmd to the SLB in the Primary Batch Buffer auto *bbStart = static_cast(commandStream.getSpace(sizeof(MI_BATCH_BUFFER_START))); *bbStart = GfxFamily::cmdInitBatchBufferStart; bbStart->setSecondLevelBatchBuffer(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_FIRST_LEVEL_BATCH); uint64_t slbAddress = devQueueHw.getSlbBuffer()->getGpuAddress(); bbStart->setBatchBufferStartAddressGraphicsaddress472(slbAddress); } } template void GpgpuWalkerHelper::setupTimestampPacket( LinearStream *cmdStream, WALKER_TYPE *walkerCmd, TagNode *timestampPacketNode, TimestampPacketStorage::WriteOperationType writeOperationType, const RootDeviceEnvironment &rootDeviceEnvironment) { if (TimestampPacketStorage::WriteOperationType::AfterWalker == writeOperationType) { uint64_t address = timestampPacketNode->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( *cmdStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, address, 0, false, *rootDeviceEnvironment.getHardwareInfo()); } } template size_t EnqueueOperation::getSizeRequiredCSKernel(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel) { size_t size = sizeof(typename GfxFamily::GPGPU_WALKER) + HardwareCommandsHelper::getSizeRequiredCS(pKernel) + sizeof(PIPE_CONTROL) * (HardwareCommandsHelper::isPipeControlWArequired(pKernel->getDevice().getHardwareInfo()) ? 2 : 1); size += HardwareCommandsHelper::getSizeRequiredForCacheFlush(commandQueue, pKernel, 0U); size += PreemptionHelper::getPreemptionWaCsSize(commandQueue.getDevice()); if (reserveProfilingCmdsSpace) { size += 2 * sizeof(PIPE_CONTROL) + 2 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM); } if (reservePerfCounters) { const auto commandBufferType = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType()) ? MetricsLibraryApi::GpuCommandBufferType::Compute : MetricsLibraryApi::GpuCommandBufferType::Render; size += commandQueue.getPerfCounters()->getGpuCommandsSize(commandBufferType, true); size += commandQueue.getPerfCounters()->getGpuCommandsSize(commandBufferType, false); } size += GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(pKernel); return size; } template size_t EnqueueOperation::getSizeRequiredForTimestampPacketWrite() { return sizeof(PIPE_CONTROL); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/hardware_interface.h000066400000000000000000000071551363734646600267550ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/preemption_mode.h" #include "CL/cl.h" #include namespace NEO { class CommandQueue; class DispatchInfo; class IndirectHeap; class Kernel; class LinearStream; struct HwPerfCounter; struct HwTimeStamps; struct KernelOperation; struct MultiDispatchInfo; template struct TagNode; template using WALKER_TYPE = typename GfxFamily::WALKER_TYPE; template class HardwareInterface { public: using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; static void dispatchWalker( CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, const CsrDependencies &csrDependencies, KernelOperation *blockedCommandsData, TagNode *hwTimeStamps, TagNode *hwPerfCounter, TimestampPacketDependencies *timestampPacketDependencies, TimestampPacketContainer *currentTimestampPacketNodes, uint32_t commandType); static void getDefaultDshSpace( const size_t &offsetInterfaceDescriptorTable, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, size_t &totalInterfaceDescriptorTableSize, Kernel *parentKernel, IndirectHeap *dsh, LinearStream *commandStream); static void dispatchWorkarounds( LinearStream *commandStream, CommandQueue &commandQueue, Kernel &kernel, const bool &enable); static void dispatchProfilingPerfStartCommands( TagNode *hwTimeStamps, TagNode *hwPerfCounter, LinearStream *commandStream, CommandQueue &commandQueue); static void dispatchProfilingPerfEndCommands( TagNode *hwTimeStamps, TagNode *hwPerfCounter, LinearStream *commandStream, CommandQueue &commandQueue); static void programWalker( LinearStream &commandStream, Kernel &kernel, CommandQueue &commandQueue, TimestampPacketContainer *currentTimestampPacketNodes, IndirectHeap &dsh, IndirectHeap &ioh, IndirectHeap &ssh, size_t globalWorkSizes[3], size_t localWorkSizes[3], PreemptionMode preemptionMode, size_t currentDispatchIndex, uint32_t &interfaceDescriptorIndex, const DispatchInfo &dispatchInfo, size_t offsetInterfaceDescriptorTable, Vec3 &numberOfWorkgroups, Vec3 &startOfWorkgroups); static WALKER_TYPE *allocateWalkerSpace(LinearStream &commandStream, const Kernel &kernel); static void obtainIndirectHeaps(CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool blockedQueue, IndirectHeap *&dsh, IndirectHeap *&ioh, IndirectHeap *&ssh); static void dispatchKernelCommands(CommandQueue &commandQueue, const DispatchInfo &dispatchInfo, uint32_t commandType, LinearStream &commandStream, bool isMainKernel, size_t currentDispatchIndex, TimestampPacketContainer *currentTimestampPacketNodes, PreemptionMode preemptionMode, uint32_t &interfaceDescriptorIndex, size_t offsetInterfaceDescriptorTable, IndirectHeap &dsh, IndirectHeap &ioh, IndirectHeap &ssh); }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/hardware_interface_base.inl000066400000000000000000000266131363734646600303020ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/internal_allocation_storage.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/command_queue/hardware_interface.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/task_information.h" namespace NEO { template inline WALKER_TYPE *HardwareInterface::allocateWalkerSpace(LinearStream &commandStream, const Kernel &kernel) { auto walkerCmd = static_cast *>(commandStream.getSpace(sizeof(WALKER_TYPE))); *walkerCmd = GfxFamily::cmdInitGpgpuWalker; return walkerCmd; } template void HardwareInterface::dispatchWalker( CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, const CsrDependencies &csrDependencies, KernelOperation *blockedCommandsData, TagNode *hwTimeStamps, TagNode *hwPerfCounter, TimestampPacketDependencies *timestampPacketDependencies, TimestampPacketContainer *currentTimestampPacketNodes, uint32_t commandType) { LinearStream *commandStream = nullptr; IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; auto parentKernel = multiDispatchInfo.peekParentKernel(); auto mainKernel = multiDispatchInfo.peekMainKernel(); auto preemptionMode = PreemptionHelper::taskPreemptionMode(commandQueue.getDevice(), multiDispatchInfo); for (auto &dispatchInfo : multiDispatchInfo) { // Compute local workgroup sizes if (dispatchInfo.getLocalWorkgroupSize().x == 0) { const auto lws = generateWorkgroupSize(dispatchInfo); const_cast(dispatchInfo).setLWS(lws); } } // Allocate command stream and indirect heaps bool blockedQueue = (blockedCommandsData != nullptr); obtainIndirectHeaps(commandQueue, multiDispatchInfo, blockedQueue, dsh, ioh, ssh); if (blockedQueue) { blockedCommandsData->setHeaps(dsh, ioh, ssh); commandStream = blockedCommandsData->commandStream.get(); } else { commandStream = &commandQueue.getCS(0); } TimestampPacketHelper::programCsrDependencies(*commandStream, csrDependencies); dsh->align(HardwareCommandsHelper::alignInterfaceDescriptorData); uint32_t interfaceDescriptorIndex = 0; const size_t offsetInterfaceDescriptorTable = dsh->getUsed(); size_t totalInterfaceDescriptorTableSize = sizeof(INTERFACE_DESCRIPTOR_DATA); getDefaultDshSpace(offsetInterfaceDescriptorTable, commandQueue, multiDispatchInfo, totalInterfaceDescriptorTableSize, parentKernel, dsh, commandStream); // Program media interface descriptor load HardwareCommandsHelper::sendMediaInterfaceDescriptorLoad( *commandStream, offsetInterfaceDescriptorTable, totalInterfaceDescriptorTableSize); DEBUG_BREAK_IF(offsetInterfaceDescriptorTable % 64 != 0); dispatchProfilingPerfStartCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue); size_t currentDispatchIndex = 0; for (auto &dispatchInfo : multiDispatchInfo) { dispatchInfo.dispatchInitCommands(*commandStream, timestampPacketDependencies, commandQueue.getDevice().getHardwareInfo()); bool isMainKernel = (dispatchInfo.getKernel() == mainKernel); dispatchKernelCommands(commandQueue, dispatchInfo, commandType, *commandStream, isMainKernel, currentDispatchIndex, currentTimestampPacketNodes, preemptionMode, interfaceDescriptorIndex, offsetInterfaceDescriptorTable, *dsh, *ioh, *ssh); currentDispatchIndex++; dispatchInfo.dispatchEpilogueCommands(*commandStream, timestampPacketDependencies, commandQueue.getDevice().getHardwareInfo()); } if (mainKernel->requiresCacheFlushCommand(commandQueue)) { uint64_t postSyncAddress = 0; if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { auto timestampPacketNodeForPostSync = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex); postSyncAddress = timestampPacketNodeForPostSync->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); } HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(commandStream, commandQueue, mainKernel, postSyncAddress); } dispatchProfilingPerfEndCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue); } template void HardwareInterface::dispatchKernelCommands(CommandQueue &commandQueue, const DispatchInfo &dispatchInfo, uint32_t commandType, LinearStream &commandStream, bool isMainKernel, size_t currentDispatchIndex, TimestampPacketContainer *currentTimestampPacketNodes, PreemptionMode preemptionMode, uint32_t &interfaceDescriptorIndex, size_t offsetInterfaceDescriptorTable, IndirectHeap &dsh, IndirectHeap &ioh, IndirectHeap &ssh) { auto &kernel = *dispatchInfo.getKernel(); DEBUG_BREAK_IF(!(dispatchInfo.getDim() >= 1 && dispatchInfo.getDim() <= 3)); DEBUG_BREAK_IF(!(dispatchInfo.getGWS().z == 1 || dispatchInfo.getDim() == 3)); DEBUG_BREAK_IF(!(dispatchInfo.getGWS().y == 1 || dispatchInfo.getDim() >= 2)); DEBUG_BREAK_IF(!(dispatchInfo.getOffset().z == 0 || dispatchInfo.getDim() == 3)); DEBUG_BREAK_IF(!(dispatchInfo.getOffset().y == 0 || dispatchInfo.getDim() >= 2)); // If we don't have a required WGS, compute one opportunistically if (commandType == CL_COMMAND_NDRANGE_KERNEL) { provideLocalWorkGroupSizeHints(commandQueue.getContextPtr(), dispatchInfo); } //Get dispatch geometry uint32_t dim = dispatchInfo.getDim(); Vec3 gws = dispatchInfo.getGWS(); Vec3 offset = dispatchInfo.getOffset(); Vec3 startOfWorkgroups = dispatchInfo.getStartOfWorkgroups(); // Compute local workgroup sizes Vec3 lws = dispatchInfo.getLocalWorkgroupSize(); Vec3 elws = (dispatchInfo.getEnqueuedWorkgroupSize().x > 0) ? dispatchInfo.getEnqueuedWorkgroupSize() : lws; // Compute number of work groups Vec3 totalNumberOfWorkgroups = (dispatchInfo.getTotalNumberOfWorkgroups().x > 0) ? dispatchInfo.getTotalNumberOfWorkgroups() : generateWorkgroupsNumber(gws, lws); Vec3 numberOfWorkgroups = (dispatchInfo.getNumberOfWorkgroups().x > 0) ? dispatchInfo.getNumberOfWorkgroups() : totalNumberOfWorkgroups; size_t globalWorkSizes[3] = {gws.x, gws.y, gws.z}; // Patch our kernel constants *kernel.globalWorkOffsetX = static_cast(offset.x); *kernel.globalWorkOffsetY = static_cast(offset.y); *kernel.globalWorkOffsetZ = static_cast(offset.z); *kernel.globalWorkSizeX = static_cast(gws.x); *kernel.globalWorkSizeY = static_cast(gws.y); *kernel.globalWorkSizeZ = static_cast(gws.z); if (isMainKernel || (kernel.localWorkSizeX2 == &Kernel::dummyPatchLocation)) { *kernel.localWorkSizeX = static_cast(lws.x); *kernel.localWorkSizeY = static_cast(lws.y); *kernel.localWorkSizeZ = static_cast(lws.z); } *kernel.localWorkSizeX2 = static_cast(lws.x); *kernel.localWorkSizeY2 = static_cast(lws.y); *kernel.localWorkSizeZ2 = static_cast(lws.z); *kernel.enqueuedLocalWorkSizeX = static_cast(elws.x); *kernel.enqueuedLocalWorkSizeY = static_cast(elws.y); *kernel.enqueuedLocalWorkSizeZ = static_cast(elws.z); if (isMainKernel) { *kernel.numWorkGroupsX = static_cast(totalNumberOfWorkgroups.x); *kernel.numWorkGroupsY = static_cast(totalNumberOfWorkgroups.y); *kernel.numWorkGroupsZ = static_cast(totalNumberOfWorkgroups.z); } *kernel.workDim = dim; // Send our indirect object data size_t localWorkSizes[3] = {lws.x, lws.y, lws.z}; dispatchWorkarounds(&commandStream, commandQueue, kernel, true); if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { auto timestampPacketNode = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex); GpgpuWalkerHelper::setupTimestampPacket(&commandStream, nullptr, timestampPacketNode, TimestampPacketStorage::WriteOperationType::BeforeWalker, commandQueue.getDevice().getRootDeviceEnvironment()); } programWalker(commandStream, kernel, commandQueue, currentTimestampPacketNodes, dsh, ioh, ssh, globalWorkSizes, localWorkSizes, preemptionMode, currentDispatchIndex, interfaceDescriptorIndex, dispatchInfo, offsetInterfaceDescriptorTable, numberOfWorkgroups, startOfWorkgroups); dispatchWorkarounds(&commandStream, commandQueue, kernel, false); } template void HardwareInterface::obtainIndirectHeaps(CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool blockedQueue, IndirectHeap *&dsh, IndirectHeap *&ioh, IndirectHeap *&ssh) { auto parentKernel = multiDispatchInfo.peekParentKernel(); if (blockedQueue) { size_t dshSize = 0; size_t colorCalcSize = 0; size_t sshSize = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); bool iohEqualsDsh = false; if (parentKernel) { dshSize = commandQueue.getContext().getDefaultDeviceQueue()->getDshBuffer()->getUnderlyingBufferSize(); sshSize += HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); iohEqualsDsh = true; colorCalcSize = static_cast(commandQueue.getContext().getDefaultDeviceQueue()->colorCalcStateSize); } else { dshSize = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); } commandQueue.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh); dsh->getSpace(colorCalcSize); commandQueue.allocateHeapMemory(IndirectHeap::SURFACE_STATE, sshSize, ssh); if (iohEqualsDsh) { ioh = dsh; } else { commandQueue.allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo), ioh); } } else { if (parentKernel && (commandQueue.getIndirectHeap(IndirectHeap::SURFACE_STATE, 0).getUsed() > 0)) { commandQueue.releaseIndirectHeap(IndirectHeap::SURFACE_STATE); } dsh = &getIndirectHeap(commandQueue, multiDispatchInfo); ioh = &getIndirectHeap(commandQueue, multiDispatchInfo); ssh = &getIndirectHeap(commandQueue, multiDispatchInfo); } } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/hardware_interface_bdw_plus.inl000066400000000000000000000130641363734646600312030ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/os_interface/os_context.h" #include "opencl/source/command_queue/hardware_interface_base.inl" namespace NEO { template inline void HardwareInterface::getDefaultDshSpace( const size_t &offsetInterfaceDescriptorTable, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, size_t &totalInterfaceDescriptorTableSize, Kernel *parentKernel, IndirectHeap *dsh, LinearStream *commandStream) { size_t numDispatches = multiDispatchInfo.size(); totalInterfaceDescriptorTableSize *= numDispatches; if (!parentKernel) { dsh->getSpace(totalInterfaceDescriptorTableSize); } else { dsh->getSpace(commandQueue.getContext().getDefaultDeviceQueue()->getDshOffset() - dsh->getUsed()); } } template inline void HardwareInterface::dispatchWorkarounds( LinearStream *commandStream, CommandQueue &commandQueue, Kernel &kernel, const bool &enable) { if (enable) { PreemptionHelper::applyPreemptionWaCmdsBegin(commandStream, commandQueue.getDevice()); // Implement enabling special WA DisableLSQCROPERFforOCL if needed GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(commandStream, kernel, enable); } else { // Implement disabling special WA DisableLSQCROPERFforOCL if needed GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(commandStream, kernel, enable); PreemptionHelper::applyPreemptionWaCmdsEnd(commandStream, commandQueue.getDevice()); } } template inline void HardwareInterface::dispatchProfilingPerfStartCommands( TagNode *hwTimeStamps, TagNode *hwPerfCounter, LinearStream *commandStream, CommandQueue &commandQueue) { // If hwTimeStampAlloc is passed (not nullptr), then we know that profiling is enabled if (hwTimeStamps != nullptr) { GpgpuWalkerHelper::dispatchProfilingCommandsStart(*hwTimeStamps, commandStream, commandQueue.getDevice().getHardwareInfo()); } if (hwPerfCounter != nullptr) { GpgpuWalkerHelper::dispatchPerfCountersCommandsStart(commandQueue, *hwPerfCounter, commandStream); } } template inline void HardwareInterface::dispatchProfilingPerfEndCommands( TagNode *hwTimeStamps, TagNode *hwPerfCounter, LinearStream *commandStream, CommandQueue &commandQueue) { // If hwTimeStamps is passed (not nullptr), then we know that profiling is enabled if (hwTimeStamps != nullptr) { GpgpuWalkerHelper::dispatchProfilingCommandsEnd(*hwTimeStamps, commandStream); } if (hwPerfCounter != nullptr) { GpgpuWalkerHelper::dispatchPerfCountersCommandsEnd(commandQueue, *hwPerfCounter, commandStream); } } template inline void HardwareInterface::programWalker( LinearStream &commandStream, Kernel &kernel, CommandQueue &commandQueue, TimestampPacketContainer *currentTimestampPacketNodes, IndirectHeap &dsh, IndirectHeap &ioh, IndirectHeap &ssh, size_t globalWorkSizes[3], size_t localWorkSizes[3], PreemptionMode preemptionMode, size_t currentDispatchIndex, uint32_t &interfaceDescriptorIndex, const DispatchInfo &dispatchInfo, size_t offsetInterfaceDescriptorTable, Vec3 &numberOfWorkgroups, Vec3 &startOfWorkgroups) { auto walkerCmd = allocateWalkerSpace(commandStream, kernel); uint32_t dim = dispatchInfo.getDim(); uint32_t simd = kernel.getKernelInfo().getMaxSimdSize(); size_t globalOffsets[3] = {dispatchInfo.getOffset().x, dispatchInfo.getOffset().y, dispatchInfo.getOffset().z}; size_t startWorkGroups[3] = {startOfWorkgroups.x, startOfWorkgroups.y, startOfWorkgroups.z}; size_t numWorkGroups[3] = {numberOfWorkgroups.x, numberOfWorkgroups.y, numberOfWorkgroups.z}; if (currentTimestampPacketNodes && commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { auto timestampPacketNode = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex); GpgpuWalkerHelper::setupTimestampPacket(&commandStream, walkerCmd, timestampPacketNode, TimestampPacketStorage::WriteOperationType::AfterWalker, commandQueue.getDevice().getRootDeviceEnvironment()); } auto isCcsUsed = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType()); auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(kernel); HardwareCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, kernel, kernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), simd, localWorkSizes, offsetInterfaceDescriptorTable, interfaceDescriptorIndex, preemptionMode, walkerCmd, nullptr, true); GpgpuWalkerHelper::setGpgpuWalkerThreadData(walkerCmd, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizes, simd, dim, false, false, *kernel.getKernelInfo().patchInfo.threadPayload, 0u); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/local_id_gen.cpp000066400000000000000000000140461363734646600260670ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/local_id_gen.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/utilities/cpu_info.h" #include namespace NEO { struct uint16x8_t; struct uint16x16_t; // This is the initial value of SIMD for local ID // computation. It correlates to the SIMD lane. // Must be 32byte aligned for AVX2 usage ALIGNAS(32) const uint16_t initialLocalID[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; // Lookup table for generating LocalIDs based on the SIMD of the kernel void (*LocalIDHelper::generateSimd8)(void *buffer, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder, bool chooseMaxRowSize) = generateLocalIDsSimd; void (*LocalIDHelper::generateSimd16)(void *buffer, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder, bool chooseMaxRowSize) = generateLocalIDsSimd; void (*LocalIDHelper::generateSimd32)(void *buffer, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder, bool chooseMaxRowSize) = generateLocalIDsSimd; // Initialize the lookup table based on CPU capabilities LocalIDHelper::LocalIDHelper() { bool supportsAVX2 = CpuInfo::getInstance().isFeatureSupported(CpuInfo::featureAvX2); if (supportsAVX2) { LocalIDHelper::generateSimd8 = generateLocalIDsSimd; LocalIDHelper::generateSimd16 = generateLocalIDsSimd; LocalIDHelper::generateSimd32 = generateLocalIDsSimd; } } LocalIDHelper LocalIDHelper::initializer; //traditional function to generate local IDs void generateLocalIDs(void *buffer, uint16_t simd, const std::array &localWorkgroupSize, const std::array &dimensionsOrder, bool isImageOnlyKernel, uint32_t grfSize) { auto threadsPerWorkGroup = static_cast(getThreadsPerWG(simd, localWorkgroupSize[0] * localWorkgroupSize[1] * localWorkgroupSize[2])); bool useLayoutForImages = isImageOnlyKernel && isCompatibleWithLayoutForImages(localWorkgroupSize, dimensionsOrder, simd); if (useLayoutForImages) { generateLocalIDsWithLayoutForImages(buffer, localWorkgroupSize, simd); } else if (simd == 32) { LocalIDHelper::generateSimd32(buffer, localWorkgroupSize, threadsPerWorkGroup, dimensionsOrder, grfSize != 32); } else if (simd == 16) { LocalIDHelper::generateSimd16(buffer, localWorkgroupSize, threadsPerWorkGroup, dimensionsOrder, grfSize != 32); } else if (simd == 8) { LocalIDHelper::generateSimd8(buffer, localWorkgroupSize, threadsPerWorkGroup, dimensionsOrder, grfSize != 32); } else { generateLocalIDsForSimdOne(buffer, localWorkgroupSize, dimensionsOrder, grfSize); } } bool isCompatibleWithLayoutForImages(const std::array &localWorkgroupSize, const std::array &dimensionsOrder, uint16_t simd) { uint8_t xMask = simd == 8u ? 0b1 : 0b11; uint8_t yMask = 0b11; return dimensionsOrder.at(0) == 0 && dimensionsOrder.at(1) == 1 && (localWorkgroupSize.at(0) & xMask) == 0 && (localWorkgroupSize.at(1) & yMask) == 0 && localWorkgroupSize.at(2) == 1u; } inline void generateLocalIDsWithLayoutForImages(void *b, const std::array &localWorkgroupSize, uint16_t simd) { uint8_t rowWidth = simd == 32u ? 32u : 16u; uint8_t xDelta = simd == 8u ? 2u : 4u; // difference between corresponding values in consecutive X rows uint8_t yDelta = (simd == 8u || localWorkgroupSize.at(1) == 4u) ? 4u : rowWidth / xDelta; // difference between corresponding values in consecutive Y rows auto buffer = reinterpret_cast(b); uint16_t offset = 0u; auto numGrfs = (localWorkgroupSize.at(0) * localWorkgroupSize.at(1) * localWorkgroupSize.at(2) + (simd - 1)) / simd; uint8_t xMask = simd == 8u ? 0b1 : 0b11; uint16_t x = 0u; uint16_t y = 0u; for (auto grfId = 0; grfId < numGrfs; grfId++) { auto rowX = buffer + offset; auto rowY = buffer + offset + rowWidth; auto rowZ = buffer + offset + 2 * rowWidth; uint16_t extraX = 0u; uint16_t extraY = 0u; for (uint8_t i = 0u; i < simd; i++) { if (i > 0) { extraX++; if (extraX == xDelta) { extraX = 0u; } if ((i & xMask) == 0) { extraY++; if (y + extraY == localWorkgroupSize.at(1)) { extraY = 0; x += xDelta; } } } if (x == localWorkgroupSize.at(0)) { x = 0u; y += yDelta; if (y >= localWorkgroupSize.at(1)) { y = 0u; } } rowX[i] = x + extraX; rowY[i] = y + extraY; rowZ[i] = 0u; } x += xDelta; offset += 3 * rowWidth; } } void generateLocalIDsForSimdOne(void *b, const std::array &localWorkgroupSize, const std::array &dimensionsOrder, uint32_t grfSize) { uint32_t xDimNum = dimensionsOrder[0]; uint32_t yDimNum = dimensionsOrder[1]; uint32_t zDimNum = dimensionsOrder[2]; for (int i = 0; i < localWorkgroupSize[zDimNum]; i++) { for (int j = 0; j < localWorkgroupSize[yDimNum]; j++) { for (int k = 0; k < localWorkgroupSize[xDimNum]; k++) { static_cast(b)[0] = k; static_cast(b)[1] = j; static_cast(b)[2] = i; b = ptrOffset(b, grfSize); } } } } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/local_id_gen.h000066400000000000000000000053731363734646600255370ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/ptr_math.h" #include #include #include namespace NEO { inline uint32_t getGRFsPerThread(uint32_t simd, uint32_t grfSize) { return (simd == 32 && grfSize == 32) ? 2 : 1; } inline size_t getThreadsPerWG(uint32_t simd, size_t lws) { auto result = lws + simd - 1; // Original logic: // result = (lws + simd - 1) / simd; // This sequence is meant to avoid an CPU DIV instruction. result >>= simd == 32 ? 5 : simd == 16 ? 4 : simd == 8 ? 3 : 0; return result; } inline uint32_t getPerThreadSizeLocalIDs(uint32_t simd, uint32_t grfSize, uint32_t numChannels = 3) { auto numGRFSPerThread = getGRFsPerThread(simd, grfSize); uint32_t returnSize = numGRFSPerThread * grfSize * (simd == 1 ? 1u : numChannels); returnSize = std::max(returnSize, grfSize); return returnSize; } struct LocalIDHelper { static void (*generateSimd8)(void *buffer, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder, bool chooseMaxRowSize); static void (*generateSimd16)(void *buffer, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder, bool chooseMaxRowSize); static void (*generateSimd32)(void *buffer, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder, bool chooseMaxRowSize); static LocalIDHelper initializer; private: LocalIDHelper(); }; extern const uint16_t initialLocalID[]; template void generateLocalIDsSimd(void *b, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder, bool chooseMaxRowSize); void generateLocalIDs(void *buffer, uint16_t simd, const std::array &localWorkgroupSize, const std::array &dimensionsOrder, bool isImageOnlyKernel, uint32_t grfSize); void generateLocalIDsWithLayoutForImages(void *b, const std::array &localWorkgroupSize, uint16_t simd); bool isCompatibleWithLayoutForImages(const std::array &localWorkgroupSize, const std::array &dimensionsOrder, uint16_t simd); void generateLocalIDsForSimdOne(void *b, const std::array &localWorkgroupSize, const std::array &dimensionsOrder, uint32_t grfSize); } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/local_id_gen.inl000066400000000000000000000074041363734646600260670ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/local_id_gen.h" #include namespace NEO { template inline void generateLocalIDsSimd(void *b, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder, bool chooseMaxRowSize) { const int passes = simd / Vec::numChannels; int pass = 0; uint32_t xDimNum = dimensionsOrder[0]; uint32_t yDimNum = dimensionsOrder[1]; uint32_t zDimNum = dimensionsOrder[2]; const Vec vLwsX(localWorkgroupSize[xDimNum]); const Vec vLwsY(localWorkgroupSize[yDimNum]); auto zero = Vec::zero(); auto one = Vec::one(); const auto threadSkipSize = ((simd == 32 || chooseMaxRowSize) ? 32 : 16) * sizeof(uint16_t); Vec vSimdX(simd); Vec vSimdY = zero; Vec vSimdZ = zero; Vec xWrap; Vec yWrap; // We need to convert simd into appropriate delta adders do { xWrap = vSimdX >= vLwsX; // xWrap ? lwsX : 0; auto deltaX = blend(vLwsX, zero, xWrap); // x -= xWrap ? lwsX : 0; vSimdX -= deltaX; // xWrap ? 1 : 0; auto deltaY = blend(one, zero, xWrap); // y += xWrap ? 1 : 0; vSimdY += deltaY; yWrap = vSimdY >= vLwsY; // yWrap ? lwsY : 0; auto deltaY2 = blend(vLwsY, zero, yWrap); // y -= yWrap ? lwsY : 0; vSimdY -= deltaY2; // yWrap ? 1 : 0; auto deltaZ = blend(one, zero, yWrap); // z += yWrap ? 1 : 0; vSimdZ += deltaZ; } while (xWrap || yWrap); // Loop for each of the passes do { auto buffer = b; Vec x(&initialLocalID[pass * Vec::numChannels]); Vec y = zero; Vec z = zero; // Convert the initial SIMD lanes to localIDs do { xWrap = x >= vLwsX; // xWrap ? lwsX : 0; auto deltaX = blend(vLwsX, zero, xWrap); // x -= xWrap ? lwsX : 0; x -= deltaX; // xWrap ? 1 : 0; auto deltaY = blend(one, zero, xWrap); // y += xWrap ? 1 : 0; y += deltaY; yWrap = y >= vLwsY; // yWrap ? lwsY : 0; auto deltaY2 = blend(vLwsY, zero, yWrap); // y -= yWrap ? lwsY : 0; y -= deltaY2; // yWrap ? 1 : 0; auto deltaZ = blend(one, zero, yWrap); // z += yWrap ? 1 : 0; z += deltaZ; } while (xWrap); for (size_t i = 0; i < threadsPerWorkGroup; ++i) { x.store(ptrOffset(buffer, xDimNum * threadSkipSize)); y.store(ptrOffset(buffer, yDimNum * threadSkipSize)); z.store(ptrOffset(buffer, zDimNum * threadSkipSize)); x += vSimdX; y += vSimdY; z += vSimdZ; xWrap = x >= vLwsX; // xWrap ? lwsX : 0; auto deltaX = blend(vLwsX, zero, xWrap); // x -= xWrap ? lwsX : 0; x -= deltaX; // xWrap ? 1 : 0; auto deltaY = blend(one, zero, xWrap); // y += xWrap ? 1 : 0; y += deltaY; yWrap = y >= vLwsY; // yWrap ? lwsY : 0; auto deltaY2 = blend(vLwsY, zero, yWrap); // y -= yWrap ? lwsY : 0; y -= deltaY2; // yWrap ? 1 : 0; auto deltaZ = blend(one, zero, yWrap); // z += yWrap ? 1 : 0; z += deltaZ; buffer = ptrOffset(buffer, 3 * threadSkipSize); } // Adjust buffer for next pass b = ptrOffset(b, Vec::numChannels * sizeof(uint16_t)); } while (++pass < passes); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/local_id_gen_avx2.cpp000066400000000000000000000012661363734646600270270ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #if __AVX2__ #include "opencl/source/command_queue/local_id_gen.inl" #include "opencl/source/helpers/uint16_avx2.h" #include namespace NEO { template void generateLocalIDsSimd(void *b, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder, bool chooseMaxRowSize); template void generateLocalIDsSimd(void *b, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder, bool chooseMaxRowSize); } // namespace NEO #endif compute-runtime-20.13.16352/opencl/source/command_queue/local_id_gen_sse4.cpp000066400000000000000000000015631363734646600270250ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/local_id_gen.inl" #include "opencl/source/helpers/uint16_sse4.h" #include namespace NEO { template void generateLocalIDsSimd(void *b, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder, bool chooseMaxRowSize); template void generateLocalIDsSimd(void *b, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder, bool chooseMaxRowSize); template void generateLocalIDsSimd(void *b, const std::array &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array &dimensionsOrder, bool chooseMaxRowSize); } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/local_work_size.cpp000066400000000000000000000473671363734646600266720ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/helpers/array_count.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/debug_helpers.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/kernel/kernel.h" #include #include #include #include namespace NEO { //threshold used to determine what kind of device is underneath //big cores like SKL have 8EU * 7 HW threads per subslice and are considered as highThreadCount devices constexpr uint32_t highThreadCountThreshold = 56u; static const uint32_t optimalHardwareThreadCountGeneric[] = {32, 16, 8, 4, 2, 1}; static const uint32_t primeNumbers[] = { 251, 241, 239, 233, 229, 227, 223, 211, 199, 197, 193, 191, 181, 179, 173, 167, 163, 157, 151, 149, 139, 137, 131, 127, 113, 109, 107, 103, 101, 97, 89, 83, 79, 73, 71, 67, 61, 59, 53, 47, 43, 41, 37, 31, 29, 23, 19, 17, 13, 11, 7, 5, 3, 2}; static const size_t MAX_PRIMES = sizeof(primeNumbers) / sizeof(primeNumbers[0]); // Recursive template function to test prime factors template static inline uint32_t factor(size_t workItems, uint32_t workSize, uint32_t maxWorkGroupSize) { auto primeNumber = primeNumbers[primeIndex]; auto newWorkSize = workSize * primeNumber; if (newWorkSize <= workItems) { while (newWorkSize <= maxWorkGroupSize && (workItems % newWorkSize) == 0) { workSize = newWorkSize; newWorkSize = workSize * primeNumber; } workSize = factor(workItems, workSize, maxWorkGroupSize); } return workSize; } // Terminator of recursive factoring logic template <> inline uint32_t factor<0>(size_t workItems, uint32_t workSize, uint32_t maxWorkGroupSize) { uint32_t primeIndex = 0; auto primeNumber = primeNumbers[primeIndex]; auto newWorkSize = workSize * primeNumber; if (newWorkSize <= workItems) { while (newWorkSize <= maxWorkGroupSize && (workItems % newWorkSize) == 0) { workSize = newWorkSize; newWorkSize = workSize * primeNumber; } } return workSize; } void computePowerOfTwoLWS(const size_t workItems[3], WorkSizeInfo &workGroupInfo, size_t workGroupSize[3], const uint32_t workDim, bool canUseNx4) { uint32_t targetIndex = (canUseNx4 || workGroupInfo.numThreadsPerSubSlice < highThreadCountThreshold) ? 2 : 0; auto arraySize = arrayCount(optimalHardwareThreadCountGeneric); auto simdSize = workGroupInfo.simdSize; while (targetIndex < arraySize && optimalHardwareThreadCountGeneric[targetIndex] > 1 && workGroupInfo.maxWorkGroupSize < optimalHardwareThreadCountGeneric[targetIndex] * simdSize) { targetIndex++; } uint32_t optimalLocalThreads = optimalHardwareThreadCountGeneric[targetIndex]; if (workDim == 2) { uint32_t xDim, yDim; xDim = uint32_t(optimalLocalThreads * simdSize) / (canUseNx4 ? 4 : 1); while (xDim > workItems[0]) xDim = xDim >> 1; yDim = canUseNx4 ? 4 : (uint32_t(optimalLocalThreads * simdSize) / xDim); workGroupSize[0] = xDim; workGroupSize[1] = yDim; } else { uint32_t xDim, yDim, zDim; xDim = uint32_t(optimalLocalThreads * simdSize); while (xDim > workItems[0]) xDim = xDim >> 1; yDim = uint32_t(optimalLocalThreads * simdSize) / xDim; while (yDim > workItems[1]) yDim = yDim >> 1; UNRECOVERABLE_IF((xDim * yDim) == 0); zDim = uint32_t(optimalLocalThreads * simdSize) / (xDim * yDim); workGroupSize[0] = xDim; workGroupSize[1] = yDim; workGroupSize[2] = zDim; } } void choosePreferredWorkGroupSizeWithRatio(uint32_t xyzFactors[3][1024], uint32_t xyzFactorsLen[3], size_t workGroupSize[3], const size_t workItems[3], WorkSizeInfo wsInfo) { float ratioDiff = 0; float localRatio = float(0xffffffff); ulong localWkgs = 0xffffffff; ulong workGroups; for (cl_uint XFactorsIdx = 0; XFactorsIdx < xyzFactorsLen[0]; ++XFactorsIdx) { for (cl_uint YFactorsIdx = 0; YFactorsIdx < xyzFactorsLen[1]; ++YFactorsIdx) { uint32_t Xdim = xyzFactors[0][xyzFactorsLen[0] - 1 - XFactorsIdx]; uint32_t Ydim = xyzFactors[1][YFactorsIdx]; if ((Xdim * Ydim) > wsInfo.maxWorkGroupSize) { break; } if ((Xdim * Ydim) < wsInfo.minWorkGroupSize) { continue; } workGroups = Math::divideAndRoundUp(workItems[0], Xdim); workGroups *= Math::divideAndRoundUp(workItems[1], Ydim); ratioDiff = log((float)Xdim) - log((float)Ydim); ratioDiff = fabs(wsInfo.targetRatio - ratioDiff); if (wsInfo.useStrictRatio == CL_TRUE) { if (ratioDiff < localRatio) { workGroupSize[0] = Xdim; workGroupSize[1] = Ydim; localRatio = ratioDiff; localWkgs = workGroups; } } else { if ((workGroups < localWkgs) || ((workGroups == localWkgs) && (ratioDiff < localRatio))) { workGroupSize[0] = Xdim; workGroupSize[1] = Ydim; localRatio = ratioDiff; localWkgs = workGroups; } } } } } void choosePreferredWorkGroupSizeWithOutRatio(uint32_t xyzFactors[3][1024], uint32_t xyzFactorsLen[3], size_t workGroupSize[3], const size_t workItems[3], WorkSizeInfo wsInfo, uint32_t workdim) { ulong localEuThrdsDispatched = 0xffffffff; ulong workGroups; for (uint32_t ZFactorsIdx = 0; ZFactorsIdx < xyzFactorsLen[2]; ++ZFactorsIdx) { for (uint32_t XFactorsIdx = 0; XFactorsIdx < xyzFactorsLen[0]; ++XFactorsIdx) { for (uint32_t YFactorsIdx = 0; YFactorsIdx < xyzFactorsLen[1]; ++YFactorsIdx) { uint32_t Xdim = xyzFactors[0][xyzFactorsLen[0] - 1 - XFactorsIdx]; uint32_t Ydim = xyzFactors[1][YFactorsIdx]; uint32_t Zdim = xyzFactors[2][ZFactorsIdx]; if ((Xdim * Ydim * Zdim) > wsInfo.maxWorkGroupSize) { break; } if ((Xdim * Ydim * Zdim) < wsInfo.minWorkGroupSize) { continue; } workGroups = Math::divideAndRoundUp(workItems[0], Xdim); workGroups *= Math::divideAndRoundUp(workItems[1], Ydim); workGroups *= Math::divideAndRoundUp(workItems[2], Zdim); cl_ulong euThrdsDispatched; euThrdsDispatched = Math::divideAndRoundUp(Xdim * Ydim * Zdim, wsInfo.simdSize); euThrdsDispatched *= workGroups; if (euThrdsDispatched < localEuThrdsDispatched) { localEuThrdsDispatched = euThrdsDispatched; workGroupSize[0] = Xdim; workGroupSize[1] = Ydim; workGroupSize[2] = Zdim; } } } } } void computeWorkgroupSize1D(uint32_t maxWorkGroupSize, size_t workGroupSize[3], const size_t workItems[3], size_t simdSize) { auto items = workItems[0]; // Determine the LSB set to quickly handle factors of 2 auto numBits = Math::getMinLsbSet(static_cast(items)); // Clamp power of 2 result to maxWorkGroupSize uint32_t workSize = 1u << numBits; //Assumes maxWorkGroupSize is a power of two. DEBUG_BREAK_IF((maxWorkGroupSize & (maxWorkGroupSize - 1)) != 0); workSize = std::min(workSize, maxWorkGroupSize); // Try all primes as potential factors workSize = factor(items, workSize, maxWorkGroupSize); workGroupSize[0] = workSize; workGroupSize[1] = 1; workGroupSize[2] = 1; } void computeWorkgroupSize2D(uint32_t maxWorkGroupSize, size_t workGroupSize[3], const size_t workItems[3], size_t simdSize) { uint32_t xFactors[1024]; uint32_t yFactors[1024]; uint32_t xFactorsLen = 0; uint32_t yFactorsLen = 0; uint64_t waste; uint64_t localWSWaste = 0xffffffff; uint64_t euThrdsDispatched; uint64_t localEuThrdsDispatched = 0xffffffff; uint64_t workGroups; uint32_t xDim; uint32_t yDim; for (int i = 0; i < 3; i++) workGroupSize[i] = 1; for (uint32_t i = 2; i <= maxWorkGroupSize; i++) { if ((workItems[0] % i) == 0) { xFactors[xFactorsLen++] = i; } if (((workItems[1] % i) == 0)) { yFactors[yFactorsLen++] = i; } } for (uint32_t xFactorsIdx = 0; xFactorsIdx < xFactorsLen; ++xFactorsIdx) { for (uint32_t yFactorsIdx = 0; yFactorsIdx < yFactorsLen; ++yFactorsIdx) { // Pick a LocalWorkSize that is a multiple as well as appropriate: // 1 <= workGroupSize[ 0 ] <= workItems[ 0 ] // 1 <= workGroupSize[ 1 ] <= workItems[ 1 ] xDim = xFactors[xFactorsLen - 1 - xFactorsIdx]; yDim = yFactors[yFactorsIdx]; if ((xDim * yDim) > maxWorkGroupSize) { // The yDim value is too big, so break out of this loop. // No other entries will work. break; } // Find the wasted channels. workGroups = Math::divideAndRoundUp(workItems[0], xDim); workGroups *= Math::divideAndRoundUp(workItems[1], yDim); // Compaction Mode! euThrdsDispatched = Math::divideAndRoundUp(xDim * yDim, simdSize); euThrdsDispatched *= workGroups; waste = simdSize - ((xDim * yDim - 1) & (simdSize - 1)); waste *= workGroups; if (((euThrdsDispatched < localEuThrdsDispatched) || ((euThrdsDispatched == localEuThrdsDispatched) && (waste < localWSWaste)))) { localWSWaste = waste; localEuThrdsDispatched = euThrdsDispatched; workGroupSize[0] = xDim; workGroupSize[1] = yDim; } } } } void computeWorkgroupSizeSquared(uint32_t maxWorkGroupSize, size_t workGroupSize[3], const size_t workItems[3], size_t simdSize, const uint32_t workDim) { for (int i = 0; i < 3; i++) workGroupSize[i] = 1; size_t itemsPowerOfTwoDivisors[3] = {1, 1, 1}; for (auto i = 0u; i < workDim; i++) { uint32_t requiredWorkItemsCount = maxWorkGroupSize; while (requiredWorkItemsCount > 1 && !(Math::isDivisibleByPowerOfTwoDivisor(uint32_t(workItems[i]), requiredWorkItemsCount))) requiredWorkItemsCount >>= 1; itemsPowerOfTwoDivisors[i] = requiredWorkItemsCount; } if (itemsPowerOfTwoDivisors[0] * itemsPowerOfTwoDivisors[1] >= maxWorkGroupSize) { while (itemsPowerOfTwoDivisors[0] * itemsPowerOfTwoDivisors[1] > maxWorkGroupSize) { if (itemsPowerOfTwoDivisors[0] > itemsPowerOfTwoDivisors[1]) itemsPowerOfTwoDivisors[0] >>= 1; else itemsPowerOfTwoDivisors[1] >>= 1; } for (auto i = 0u; i < 3; i++) workGroupSize[i] = itemsPowerOfTwoDivisors[i]; return; } else if (workItems[0] * workItems[1] > maxWorkGroupSize) { computeWorkgroupSize2D(maxWorkGroupSize, workGroupSize, workItems, simdSize); return; } else { for (auto i = 0u; i < workDim; i++) workGroupSize[i] = workItems[i]; return; } } void computeWorkgroupSizeND(WorkSizeInfo wsInfo, size_t workGroupSize[3], const size_t workItems[3], const uint32_t workDim) { for (int i = 0; i < 3; i++) workGroupSize[i] = 1; uint64_t totalNuberOfItems = workItems[0] * workItems[1] * workItems[2]; UNRECOVERABLE_IF(wsInfo.simdSize == 0); //Find biggest power of two which devide each dimension size if (wsInfo.slmTotalSize == 0 && !wsInfo.hasBarriers) { if (DebugManager.flags.EnableComputeWorkSizeSquared.get() && workDim == 2 && !wsInfo.imgUsed) { return computeWorkgroupSizeSquared(wsInfo.maxWorkGroupSize, workGroupSize, workItems, wsInfo.simdSize, workDim); } size_t itemsPowerOfTwoDivisors[3] = {1, 1, 1}; for (auto i = 0u; i < workDim; i++) { uint32_t requiredWorkItemsCount = uint32_t(wsInfo.simdSize * optimalHardwareThreadCountGeneric[0]); while (requiredWorkItemsCount > 1 && !(Math::isDivisibleByPowerOfTwoDivisor(uint32_t(workItems[i]), requiredWorkItemsCount))) requiredWorkItemsCount >>= 1; itemsPowerOfTwoDivisors[i] = requiredWorkItemsCount; } bool canUseNx4 = (wsInfo.imgUsed && (itemsPowerOfTwoDivisors[0] >= 4 || (itemsPowerOfTwoDivisors[0] >= 2 && wsInfo.simdSize == 8)) && itemsPowerOfTwoDivisors[1] >= 4); //If computed dimension sizes which are powers of two are creating group which is //bigger than maxWorkGroupSize or this group would create more than optimal hardware threads then downsize it uint64_t allItems = itemsPowerOfTwoDivisors[0] * itemsPowerOfTwoDivisors[1] * itemsPowerOfTwoDivisors[2]; if (allItems > wsInfo.simdSize && (allItems > wsInfo.maxWorkGroupSize || allItems > wsInfo.simdSize * optimalHardwareThreadCountGeneric[0])) { computePowerOfTwoLWS(itemsPowerOfTwoDivisors, wsInfo, workGroupSize, workDim, canUseNx4); return; } //If coputed workgroup is at this point in correct size else if (allItems >= wsInfo.simdSize) { itemsPowerOfTwoDivisors[1] = canUseNx4 ? 4 : itemsPowerOfTwoDivisors[1]; for (auto i = 0u; i < workDim; i++) workGroupSize[i] = itemsPowerOfTwoDivisors[i]; return; } } //If dimensions are not powers of two but total number of items is less than max work group size if (totalNuberOfItems <= wsInfo.maxWorkGroupSize) { for (auto i = 0u; i < workDim; i++) workGroupSize[i] = workItems[i]; return; } else { if (workDim == 1) computeWorkgroupSize1D(wsInfo.maxWorkGroupSize, workGroupSize, workItems, wsInfo.simdSize); else { uint32_t xyzFactors[3][1024]; uint32_t xyzFactorsLen[3] = {}; //check if algorithm should use ratio wsInfo.checkRatio(workItems); //find all divisors for all dimensions for (int i = 0; i < 3; i++) xyzFactors[i][xyzFactorsLen[i]++] = 1; for (auto i = 0u; i < workDim; i++) { for (auto j = 2u; j < wsInfo.maxWorkGroupSize; ++j) { if ((workItems[i] % j) == 0) { xyzFactors[i][xyzFactorsLen[i]++] = j; } } } if (wsInfo.useRatio) { choosePreferredWorkGroupSizeWithRatio(xyzFactors, xyzFactorsLen, workGroupSize, workItems, wsInfo); if (wsInfo.useStrictRatio && workGroupSize[0] * workGroupSize[1] * 2 <= wsInfo.simdSize) { wsInfo.useStrictRatio = false; choosePreferredWorkGroupSizeWithRatio(xyzFactors, xyzFactorsLen, workGroupSize, workItems, wsInfo); } } else choosePreferredWorkGroupSizeWithOutRatio(xyzFactors, xyzFactorsLen, workGroupSize, workItems, wsInfo, workDim); } } } Vec3 computeWorkgroupSize(const DispatchInfo &dispatchInfo) { size_t workGroupSize[3] = {}; if (dispatchInfo.getKernel() != nullptr) { if (DebugManager.flags.EnableComputeWorkSizeND.get()) { WorkSizeInfo wsInfo(dispatchInfo); size_t workItems[3] = {dispatchInfo.getGWS().x, dispatchInfo.getGWS().y, dispatchInfo.getGWS().z}; computeWorkgroupSizeND(wsInfo, workGroupSize, workItems, dispatchInfo.getDim()); } else { auto maxWorkGroupSize = dispatchInfo.getKernel()->maxKernelWorkGroupSize; auto simd = dispatchInfo.getKernel()->getKernelInfo().getMaxSimdSize(); size_t workItems[3] = {dispatchInfo.getGWS().x, dispatchInfo.getGWS().y, dispatchInfo.getGWS().z}; if (dispatchInfo.getDim() == 1) { computeWorkgroupSize1D(maxWorkGroupSize, workGroupSize, workItems, simd); } else if (DebugManager.flags.EnableComputeWorkSizeSquared.get() && dispatchInfo.getDim() == 2) { computeWorkgroupSizeSquared(maxWorkGroupSize, workGroupSize, workItems, simd, dispatchInfo.getDim()); } else { computeWorkgroupSize2D(maxWorkGroupSize, workGroupSize, workItems, simd); } } } DBG_LOG(PrintLWSSizes, "Input GWS enqueueBlocked", dispatchInfo.getGWS().x, dispatchInfo.getGWS().y, dispatchInfo.getGWS().z, " Driver deduced LWS", workGroupSize[0], workGroupSize[1], workGroupSize[2]); return {workGroupSize[0], workGroupSize[1], workGroupSize[2]}; } Vec3 generateWorkgroupSize(const DispatchInfo &dispatchInfo) { return (dispatchInfo.getEnqueuedWorkgroupSize().x == 0) ? computeWorkgroupSize(dispatchInfo) : dispatchInfo.getEnqueuedWorkgroupSize(); } Vec3 computeWorkgroupsNumber(const Vec3 gws, const Vec3 lws) { return (Vec3(gws.x / lws.x + ((gws.x % lws.x) ? 1 : 0), gws.y / lws.y + ((gws.y % lws.y) ? 1 : 0), gws.z / lws.z + ((gws.z % lws.z) ? 1 : 0))); } Vec3 generateWorkgroupsNumber(const Vec3 gws, const Vec3 lws) { return (lws.x > 0) ? computeWorkgroupsNumber(gws, lws) : Vec3(0, 0, 0); } Vec3 generateWorkgroupsNumber(const DispatchInfo &dispatchInfo) { return generateWorkgroupsNumber(dispatchInfo.getGWS(), dispatchInfo.getLocalWorkgroupSize()); } Vec3 canonizeWorkgroup(Vec3 workgroup) { return ((workgroup.x > 0) ? Vec3({workgroup.x, std::max(workgroup.y, static_cast(1)), std::max(workgroup.z, static_cast(1))}) : Vec3(0, 0, 0)); } void provideLocalWorkGroupSizeHints(Context *context, DispatchInfo dispatchInfo) { if (context != nullptr && context->isProvidingPerformanceHints() && dispatchInfo.getDim() <= 3) { size_t preferredWorkGroupSize[3]; auto lws = computeWorkgroupSize(dispatchInfo); preferredWorkGroupSize[0] = lws.x; preferredWorkGroupSize[1] = lws.y; preferredWorkGroupSize[2] = lws.z; if (dispatchInfo.getEnqueuedWorkgroupSize().x == 0) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, NULL_LOCAL_WORKGROUP_SIZE, dispatchInfo.getKernel()->getKernelInfo().name.c_str(), preferredWorkGroupSize[0], preferredWorkGroupSize[1], preferredWorkGroupSize[2]); } else { size_t localWorkSizesIn[3] = {dispatchInfo.getEnqueuedWorkgroupSize().x, dispatchInfo.getEnqueuedWorkgroupSize().y, dispatchInfo.getEnqueuedWorkgroupSize().z}; for (auto i = 0u; i < dispatchInfo.getDim(); i++) { if (localWorkSizesIn[i] != preferredWorkGroupSize[i]) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, BAD_LOCAL_WORKGROUP_SIZE, localWorkSizesIn[0], localWorkSizesIn[1], localWorkSizesIn[2], dispatchInfo.getKernel()->getKernelInfo().name.c_str(), preferredWorkGroupSize[0], preferredWorkGroupSize[1], preferredWorkGroupSize[2]); break; } } } } } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_queue/resource_barrier.h000066400000000000000000000002271363734646600264660ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ namespace NEO { struct BarrierCommand {}; } // namespace NEOcompute-runtime-20.13.16352/opencl/source/command_stream/000077500000000000000000000000001363734646600231265ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/command_stream/CMakeLists.txt000066400000000000000000000034171363734646600256730ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_COMMAND_STREAM ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_hw_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_hw_bdw_plus.inl ${CMAKE_CURRENT_SOURCE_DIR}/aub_stream_provider.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_subcapture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_subcapture.h ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_with_aub_dump.h ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_with_aub_dump.inl ${CMAKE_CURRENT_SOURCE_DIR}/create_command_stream_impl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/create_command_stream_impl.h ${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/command_stream_receiver_simulated_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_simulated_common_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_simulated_common_hw_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_simulated_common_hw_bdw_plus.inl ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/per_dss_backed_buffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver.h ${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver_hw.inl ${CMAKE_CURRENT_SOURCE_DIR}/tbx_stream.cpp ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_COMMAND_STREAM}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_COMMAND_STREAM ${RUNTIME_SRCS_COMMAND_STREAM}) add_subdirectories() compute-runtime-20.13.16352/opencl/source/command_stream/aub_command_stream_receiver.cpp000066400000000000000000000272611363734646600313460ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_stream/aub_command_stream_receiver.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/options.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/source/os_interface/os_inc_base.h" #include #include #include namespace NEO { AubCommandStreamReceiverCreateFunc aubCommandStreamReceiverFactory[IGFX_MAX_CORE] = {}; std::string AUBCommandStreamReceiver::createFullFilePath(const HardwareInfo &hwInfo, const std::string &filename) { std::string hwPrefix = hardwarePrefix[hwInfo.platform.eProductFamily]; // Generate the full filename const auto >SystemInfo = hwInfo.gtSystemInfo; std::stringstream strfilename; auto subDevicesCount = HwHelper::getSubDevicesCount(&hwInfo); uint32_t subSlicesPerSlice = gtSystemInfo.SubSliceCount / gtSystemInfo.SliceCount; strfilename << hwPrefix << "_"; if (subDevicesCount > 1) { strfilename << subDevicesCount << "tx"; } strfilename << gtSystemInfo.SliceCount << "x" << subSlicesPerSlice << "x" << gtSystemInfo.MaxEuPerSubSlice << "_" << filename << ".aub"; // clean-up any fileName issues because of the file system incompatibilities auto fileName = strfilename.str(); for (char &i : fileName) { i = i == '/' ? '_' : i; } std::string filePath(folderAUB); filePath.append(Os::fileSeparator); filePath.append(fileName); return filePath; } CommandStreamReceiver *AUBCommandStreamReceiver::create(const std::string &baseName, bool standalone, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) { auto hwInfo = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); std::string filePath = AUBCommandStreamReceiver::createFullFilePath(*hwInfo, baseName); if (DebugManager.flags.AUBDumpCaptureFileName.get() != "unk") { filePath.assign(DebugManager.flags.AUBDumpCaptureFileName.get()); } if (hwInfo->platform.eRenderCoreFamily >= IGFX_MAX_CORE) { DEBUG_BREAK_IF(!false); return nullptr; } auto pCreate = aubCommandStreamReceiverFactory[hwInfo->platform.eRenderCoreFamily]; return pCreate ? pCreate(filePath, standalone, executionEnvironment, rootDeviceIndex) : nullptr; } } // namespace NEO namespace AubMemDump { using CmdServicesMemTraceMemoryCompare = AubMemDump::CmdServicesMemTraceMemoryCompare; using CmdServicesMemTraceMemoryWrite = AubMemDump::CmdServicesMemTraceMemoryWrite; using CmdServicesMemTraceRegisterPoll = AubMemDump::CmdServicesMemTraceRegisterPoll; using CmdServicesMemTraceRegisterWrite = AubMemDump::CmdServicesMemTraceRegisterWrite; using CmdServicesMemTraceVersion = AubMemDump::CmdServicesMemTraceVersion; static auto sizeMemoryWriteHeader = sizeof(CmdServicesMemTraceMemoryWrite) - sizeof(CmdServicesMemTraceMemoryWrite::data); extern const size_t g_dwordCountMax; void AubFileStream::open(const char *filePath) { fileHandle.open(filePath, std::ofstream::binary); fileName.assign(filePath); } void AubFileStream::close() { fileHandle.close(); fileName.clear(); } void AubFileStream::write(const char *data, size_t size) { fileHandle.write(data, size); } void AubFileStream::flush() { fileHandle.flush(); } bool AubFileStream::init(uint32_t stepping, uint32_t device) { CmdServicesMemTraceVersion header = {}; header.setHeader(); header.dwordCount = (sizeof(header) / sizeof(uint32_t)) - 1; header.stepping = stepping; header.metal = 0; header.device = device; header.csxSwizzling = CmdServicesMemTraceVersion::CsxSwizzlingValues::Disabled; //Which recording method used: // Phys is required for GGTT memory to be written directly to phys vs through aperture. header.recordingMethod = CmdServicesMemTraceVersion::RecordingMethodValues::Phy; header.pch = CmdServicesMemTraceVersion::PchValues::Default; header.captureTool = CmdServicesMemTraceVersion::CaptureToolValues::GenKmdCapture; header.primaryVersion = 0; header.secondaryVersion = 0; header.commandLine[0] = 'N'; header.commandLine[1] = 'E'; header.commandLine[2] = 'O'; header.commandLine[3] = 0; write(reinterpret_cast(&header), sizeof(header)); return true; } void AubFileStream::writeMemory(uint64_t physAddress, const void *memory, size_t size, uint32_t addressSpace, uint32_t hint) { writeMemoryWriteHeader(physAddress, size, addressSpace, hint); // Copy the contents from source to destination. write(reinterpret_cast(memory), size); auto sizeRemainder = size % sizeof(uint32_t); if (sizeRemainder) { //if input size is not 4 byte aligned, write extra zeros to AUB uint32_t zero = 0; write(reinterpret_cast(&zero), sizeof(uint32_t) - sizeRemainder); } } void AubFileStream::writeMemoryWriteHeader(uint64_t physAddress, size_t size, uint32_t addressSpace, uint32_t hint) { CmdServicesMemTraceMemoryWrite header = {}; auto alignedBlockSize = (size + sizeof(uint32_t) - 1) & ~(sizeof(uint32_t) - 1); auto dwordCount = (sizeMemoryWriteHeader + alignedBlockSize) / sizeof(uint32_t); DEBUG_BREAK_IF(dwordCount > AubMemDump::g_dwordCountMax); header.setHeader(); header.dwordCount = static_cast(dwordCount - 1); header.address = physAddress; header.repeatMemory = CmdServicesMemTraceMemoryWrite::RepeatMemoryValues::NoRepeat; header.tiling = CmdServicesMemTraceMemoryWrite::TilingValues::NoTiling; header.dataTypeHint = hint; header.addressSpace = addressSpace; header.dataSizeInBytes = static_cast(size); write(reinterpret_cast(&header), sizeMemoryWriteHeader); } void AubFileStream::writeGTT(uint32_t gttOffset, uint64_t entry) { write(reinterpret_cast(&entry), sizeof(entry)); } void AubFileStream::writePTE(uint64_t physAddress, uint64_t entry, uint32_t addressSpace) { write(reinterpret_cast(&entry), sizeof(entry)); } void AubFileStream::writeMMIOImpl(uint32_t offset, uint32_t value) { CmdServicesMemTraceRegisterWrite header = {}; header.setHeader(); header.dwordCount = (sizeof(header) / sizeof(uint32_t)) - 1; header.registerOffset = offset; header.messageSourceId = MessageSourceIdValues::Ia; header.registerSize = RegisterSizeValues::Dword; header.registerSpace = RegisterSpaceValues::Mmio; header.writeMaskLow = 0xffffffff; header.writeMaskHigh = 0x00000000; header.data[0] = value; write(reinterpret_cast(&header), sizeof(header)); } void AubFileStream::registerPoll(uint32_t registerOffset, uint32_t mask, uint32_t value, bool pollNotEqual, uint32_t timeoutAction) { CmdServicesMemTraceRegisterPoll header = {}; header.setHeader(); header.registerOffset = registerOffset; header.timeoutAction = timeoutAction; header.pollNotEqual = pollNotEqual; header.operationType = CmdServicesMemTraceRegisterPoll::OperationTypeValues::Normal; header.registerSize = CmdServicesMemTraceRegisterPoll::RegisterSizeValues::Dword; header.registerSpace = CmdServicesMemTraceRegisterPoll::RegisterSpaceValues::Mmio; header.pollMaskLow = mask; header.data[0] = value; header.dwordCount = (sizeof(header) / sizeof(uint32_t)) - 1; write(reinterpret_cast(&header), sizeof(header)); } void AubFileStream::expectMMIO(uint32_t mmioRegister, uint32_t expectedValue) { using AubMemDump::CmdServicesMemTraceRegisterCompare; CmdServicesMemTraceRegisterCompare header; memset(&header, 0, sizeof(header)); header.setHeader(); header.data[0] = expectedValue; header.registerOffset = mmioRegister; header.noReadExpect = CmdServicesMemTraceRegisterCompare::NoReadExpectValues::ReadExpect; header.registerSize = CmdServicesMemTraceRegisterCompare::RegisterSizeValues::Dword; header.registerSpace = CmdServicesMemTraceRegisterCompare::RegisterSpaceValues::Mmio; header.readMaskLow = 0xffffffff; header.readMaskHigh = 0xffffffff; header.dwordCount = (sizeof(header) / sizeof(uint32_t)) - 1; write(reinterpret_cast(&header), sizeof(header)); } void AubFileStream::expectMemory(uint64_t physAddress, const void *memory, size_t sizeRemaining, uint32_t addressSpace, uint32_t compareOperation) { using CmdServicesMemTraceMemoryCompare = AubMemDump::CmdServicesMemTraceMemoryCompare; CmdServicesMemTraceMemoryCompare header = {}; header.setHeader(); header.noReadExpect = CmdServicesMemTraceMemoryCompare::NoReadExpectValues::ReadExpect; header.repeatMemory = CmdServicesMemTraceMemoryCompare::RepeatMemoryValues::NoRepeat; header.tiling = CmdServicesMemTraceMemoryCompare::TilingValues::NoTiling; header.crcCompare = CmdServicesMemTraceMemoryCompare::CrcCompareValues::NoCrc; header.compareOperation = compareOperation; header.dataTypeHint = CmdServicesMemTraceMemoryCompare::DataTypeHintValues::TraceNotype; header.addressSpace = addressSpace; auto headerSize = sizeof(CmdServicesMemTraceMemoryCompare) - sizeof(CmdServicesMemTraceMemoryCompare::data); auto blockSizeMax = g_dwordCountMax * sizeof(uint32_t) - headerSize; // We have to decompose memory into chunks that can be streamed per iteration while (sizeRemaining > 0) { AubMemDump::setAddress(header, physAddress); auto sizeThisIteration = std::min(sizeRemaining, blockSizeMax); // Round up to the number of dwords auto dwordCount = Math::divideAndRoundUp(headerSize + sizeThisIteration, sizeof(uint32_t)); header.dwordCount = static_cast(dwordCount - 1); header.dataSizeInBytes = static_cast(sizeThisIteration); // Write the header write(reinterpret_cast(&header), headerSize); // Copy the contents from source to destination. write(reinterpret_cast(memory), sizeThisIteration); sizeRemaining -= sizeThisIteration; memory = (uint8_t *)memory + sizeThisIteration; physAddress += sizeThisIteration; auto remainder = sizeThisIteration & (sizeof(uint32_t) - 1); if (remainder) { //if size is not 4 byte aligned, write extra zeros to AUB uint32_t zero = 0; write(reinterpret_cast(&zero), sizeof(uint32_t) - remainder); } } } void AubFileStream::createContext(const AubPpgttContextCreate &cmd) { write(reinterpret_cast(&cmd), sizeof(cmd)); } bool AubFileStream::addComment(const char *message) { using CmdServicesMemTraceComment = AubMemDump::CmdServicesMemTraceComment; CmdServicesMemTraceComment cmd = {}; cmd.setHeader(); cmd.syncOnComment = false; cmd.syncOnSimulatorDisplay = false; auto messageLen = strlen(message) + 1; auto dwordLen = ((messageLen + sizeof(uint32_t) - 1) & ~(sizeof(uint32_t) - 1)) / sizeof(uint32_t); cmd.dwordCount = static_cast(dwordLen + 1); write(reinterpret_cast(&cmd), sizeof(cmd) - sizeof(cmd.comment)); write(message, messageLen); auto remainder = messageLen & (sizeof(uint32_t) - 1); if (remainder) { //if size is not 4 byte aligned, write extra zeros to AUB uint32_t zero = 0; write(reinterpret_cast(&zero), sizeof(uint32_t) - remainder); } return true; } std::unique_lock AubFileStream::lockStream() { return std::unique_lock(mutex); } } // namespace AubMemDump compute-runtime-20.13.16352/opencl/source/command_stream/aub_command_stream_receiver.h000066400000000000000000000014731363734646600310100ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/aub_mem_dump/aub_mem_dump.h" #include namespace NEO { struct HardwareInfo; class CommandStreamReceiver; class ExecutionEnvironment; struct AUBCommandStreamReceiver { static CommandStreamReceiver *create(const std::string &filename, bool standalone, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex); static std::string createFullFilePath(const HardwareInfo &hwInfo, const std::string &filename); using AubFileStream = AubMemDump::AubFileStream; }; typedef CommandStreamReceiver *(*AubCommandStreamReceiverCreateFunc)(const std::string &fileName, bool standalone, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex); } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_stream/aub_command_stream_receiver_hw.h000066400000000000000000000115631363734646600315070ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/array_count.h" #include "shared/source/utilities/spinlock.h" #include "opencl/source/aub/aub_center.h" #include "opencl/source/command_stream/aub_command_stream_receiver.h" #include "opencl/source/gen_common/aub_mapper.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/source/memory_manager/page_table.h" #include "opencl/source/memory_manager/physical_address_allocator.h" #include "command_stream_receiver_simulated_hw.h" namespace NEO { class AubSubCaptureManager; template class AUBCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw { protected: typedef CommandStreamReceiverSimulatedHw BaseClass; using AUB = typename AUBFamilyMapper::AUB; using ExternalAllocationsContainer = std::vector; using BaseClass::getParametersForWriteMemory; using BaseClass::osContext; public: using CommandStreamReceiverSimulatedCommonHw::initAdditionalMMIO; using CommandStreamReceiverSimulatedCommonHw::aubManager; using CommandStreamReceiverSimulatedCommonHw::hardwareContextController; using CommandStreamReceiverSimulatedCommonHw::engineInfo; using CommandStreamReceiverSimulatedCommonHw::stream; bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override; void makeNonResident(GraphicsAllocation &gfxAllocation) override; void processResidency(const ResidencyContainer &allocationsForResidency, uint32_t handleId) override; void makeResidentExternal(AllocationView &allocationView); void makeNonResidentExternal(uint64_t gpuAddress); AubMemDump::AubFileStream *getAubStream() const { return static_cast(this->stream); } void writeMemory(uint64_t gpuAddress, void *cpuAddress, size_t size, uint32_t memoryBank, uint64_t entryBits) override; bool writeMemory(GraphicsAllocation &gfxAllocation) override; MOCKABLE_VIRTUAL bool writeMemory(AllocationView &allocationView); void expectMMIO(uint32_t mmioRegister, uint32_t expectedValue); bool expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation) override; AubSubCaptureStatus checkAndActivateAubSubCapture(const MultiDispatchInfo &dispatchInfo) override; void addAubComment(const char *message) override; // Family specific version MOCKABLE_VIRTUAL void submitBatchBuffer(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits); void pollForCompletion() override; void pollForCompletionImpl() override; void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override; uint32_t getDumpHandle(); MOCKABLE_VIRTUAL void addContextToken(uint32_t dumpHandle); MOCKABLE_VIRTUAL void dumpAllocation(GraphicsAllocation &gfxAllocation); static CommandStreamReceiver *create(const std::string &fileName, bool standalone, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex); AUBCommandStreamReceiverHw(const std::string &fileName, bool standalone, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex); ~AUBCommandStreamReceiverHw() override; AUBCommandStreamReceiverHw(const AUBCommandStreamReceiverHw &) = delete; AUBCommandStreamReceiverHw &operator=(const AUBCommandStreamReceiverHw &) = delete; MOCKABLE_VIRTUAL void openFile(const std::string &fileName); MOCKABLE_VIRTUAL bool reopenFile(const std::string &fileName); MOCKABLE_VIRTUAL void initFile(const std::string &fileName); MOCKABLE_VIRTUAL void closeFile(); MOCKABLE_VIRTUAL bool isFileOpen() const; MOCKABLE_VIRTUAL const std::string getFileName(); MOCKABLE_VIRTUAL void initializeEngine(); std::unique_ptr subCaptureManager; uint32_t aubDeviceId; bool standalone; std::unique_ptr::type> ppgtt; std::unique_ptr ggtt; // remap CPU VA -> GGTT VA AddressMapper *gttRemap; MOCKABLE_VIRTUAL bool addPatchInfoComments(); void addGUCStartMessage(uint64_t batchBufferAddress); uint32_t getGUCWorkQueueItemHeader(); CommandStreamReceiverType getType() override { return CommandStreamReceiverType::CSR_AUB; } int getAddressSpaceFromPTEBits(uint64_t entryBits) const; protected: constexpr static uint32_t getMaskAndValueForPollForCompletion(); bool dumpAubNonWritable = false; ExternalAllocationsContainer externalAllocations; uint32_t pollForCompletionTaskCount = 0u; SpinLock pollForCompletionLock; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_stream/aub_command_stream_receiver_hw_base.inl000066400000000000000000001016751363734646600330400ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/helpers/hash.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/memory_constants.h" #include "shared/source/os_interface/os_context.h" #include "opencl/source/aub/aub_helper.h" #include "opencl/source/aub_mem_dump/aub_alloc_dump.h" #include "opencl/source/aub_mem_dump/aub_alloc_dump.inl" #include "opencl/source/aub_mem_dump/page_table_entry_bits.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" #include "opencl/source/command_stream/aub_stream_provider.h" #include "opencl/source/command_stream/aub_subcapture.h" #include "opencl/source/helpers/hardware_context_controller.h" #include "opencl/source/helpers/neo_driver_version.h" #include "opencl/source/memory_manager/memory_banks.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "driver_version.h" #include "third_party/aub_stream/headers/aub_manager.h" #include "third_party/aub_stream/headers/aubstream.h" #include #include namespace NEO { template AUBCommandStreamReceiverHw::AUBCommandStreamReceiverHw(const std::string &fileName, bool standalone, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) : BaseClass(executionEnvironment, rootDeviceIndex), standalone(standalone) { executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->initAubCenter(this->isLocalMemoryEnabled(), fileName, this->getType()); auto aubCenter = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->aubCenter.get(); UNRECOVERABLE_IF(nullptr == aubCenter); auto subCaptureCommon = aubCenter->getSubCaptureCommon(); UNRECOVERABLE_IF(nullptr == subCaptureCommon); subCaptureManager = std::make_unique(fileName, *subCaptureCommon); aubManager = aubCenter->getAubManager(); if (!aubCenter->getPhysicalAddressAllocator()) { aubCenter->initPhysicalAddressAllocator(this->createPhysicalAddressAllocator(&this->peekHwInfo())); } auto physicalAddressAllocator = aubCenter->getPhysicalAddressAllocator(); UNRECOVERABLE_IF(nullptr == physicalAddressAllocator); ppgtt = std::make_unique::type>(physicalAddressAllocator); ggtt = std::make_unique(physicalAddressAllocator); gttRemap = aubCenter->getAddressMapper(); UNRECOVERABLE_IF(nullptr == gttRemap); auto streamProvider = aubCenter->getStreamProvider(); UNRECOVERABLE_IF(nullptr == streamProvider); stream = streamProvider->getStream(); UNRECOVERABLE_IF(nullptr == stream); this->dispatchMode = DispatchMode::BatchedDispatch; if (DebugManager.flags.CsrDispatchMode.get()) { this->dispatchMode = (DispatchMode)DebugManager.flags.CsrDispatchMode.get(); } auto debugDeviceId = DebugManager.flags.OverrideAubDeviceId.get(); this->aubDeviceId = debugDeviceId == -1 ? this->peekHwInfo().capabilityTable.aubDeviceId : static_cast(debugDeviceId); this->defaultSshSize = 64 * KB; } template AUBCommandStreamReceiverHw::~AUBCommandStreamReceiverHw() { if (osContext) { pollForCompletion(); } this->freeEngineInfo(*gttRemap); } template void AUBCommandStreamReceiverHw::openFile(const std::string &fileName) { auto streamLocked = getAubStream()->lockStream(); initFile(fileName); } template bool AUBCommandStreamReceiverHw::reopenFile(const std::string &fileName) { auto streamLocked = getAubStream()->lockStream(); if (isFileOpen()) { if (fileName != getFileName()) { closeFile(); this->freeEngineInfo(*gttRemap); } } if (!isFileOpen()) { initFile(fileName); return true; } return false; } template void AUBCommandStreamReceiverHw::initFile(const std::string &fileName) { if (aubManager) { if (!aubManager->isOpen()) { aubManager->open(fileName); UNRECOVERABLE_IF(!aubManager->isOpen()); std::ostringstream str; str << "driver version: " << driverVersion; aubManager->addComment(str.str().c_str()); } return; } if (!getAubStream()->isOpen()) { // Open our file stream->open(fileName.c_str()); if (!getAubStream()->isOpen()) { // This UNRECOVERABLE_IF most probably means you are not executing aub tests with correct current directory (containing aub_out folder) // try adding _aub UNRECOVERABLE_IF(true); } // Add the file header stream->init(AubMemDump::SteppingValues::A, aubDeviceId); } } template void AUBCommandStreamReceiverHw::closeFile() { aubManager ? aubManager->close() : stream->close(); } template bool AUBCommandStreamReceiverHw::isFileOpen() const { return aubManager ? aubManager->isOpen() : getAubStream()->isOpen(); } template const std::string AUBCommandStreamReceiverHw::getFileName() { return aubManager ? aubManager->getFileName() : getAubStream()->getFileName(); } template void AUBCommandStreamReceiverHw::initializeEngine() { auto streamLocked = getAubStream()->lockStream(); if (hardwareContextController) { hardwareContextController->initialize(); return; } auto csTraits = this->getCsTraits(osContext->getEngineType()); if (engineInfo.pLRCA) { return; } this->initGlobalMMIO(); this->initEngineMMIO(); this->initAdditionalMMIO(); // Write driver version { std::ostringstream str; str << "driver version: " << driverVersion; getAubStream()->addComment(str.str().c_str()); } // Global HW Status Page { const size_t sizeHWSP = 0x1000; const size_t alignHWSP = 0x1000; engineInfo.pGlobalHWStatusPage = alignedMalloc(sizeHWSP, alignHWSP); engineInfo.ggttHWSP = gttRemap->map(engineInfo.pGlobalHWStatusPage, sizeHWSP); auto physHWSP = ggtt->map(engineInfo.ggttHWSP, sizeHWSP, this->getGTTBits(), this->getMemoryBankForGtt()); // Write our GHWSP { std::ostringstream str; str << "ggtt: " << std::hex << std::showbase << engineInfo.ggttHWSP; getAubStream()->addComment(str.str().c_str()); } AubGTTData data = {0}; this->getGTTData(reinterpret_cast(physHWSP), data); AUB::reserveAddressGGTT(*stream, engineInfo.ggttHWSP, sizeHWSP, physHWSP, data); stream->writeMMIO(AubMemDump::computeRegisterOffset(csTraits.mmioBase, 0x2080), engineInfo.ggttHWSP); } // Allocate the LRCA const size_t sizeLRCA = csTraits.sizeLRCA; const size_t alignLRCA = csTraits.alignLRCA; auto pLRCABase = alignedMalloc(sizeLRCA, alignLRCA); engineInfo.pLRCA = pLRCABase; // Initialize the LRCA to a known state csTraits.initialize(pLRCABase); // Reserve the ring buffer engineInfo.sizeRingBuffer = 0x4 * 0x1000; { const size_t alignRingBuffer = 0x1000; engineInfo.pRingBuffer = alignedMalloc(engineInfo.sizeRingBuffer, alignRingBuffer); engineInfo.ggttRingBuffer = gttRemap->map(engineInfo.pRingBuffer, engineInfo.sizeRingBuffer); auto physRingBuffer = ggtt->map(engineInfo.ggttRingBuffer, engineInfo.sizeRingBuffer, this->getGTTBits(), this->getMemoryBankForGtt()); { std::ostringstream str; str << "ggtt: " << std::hex << std::showbase << engineInfo.ggttRingBuffer; getAubStream()->addComment(str.str().c_str()); } AubGTTData data = {0}; this->getGTTData(reinterpret_cast(physRingBuffer), data); AUB::reserveAddressGGTT(*stream, engineInfo.ggttRingBuffer, engineInfo.sizeRingBuffer, physRingBuffer, data); } // Initialize the ring MMIO registers { uint32_t ringHead = 0x000; uint32_t ringTail = 0x000; auto ringBase = engineInfo.ggttRingBuffer; auto ringCtrl = (uint32_t)((engineInfo.sizeRingBuffer - 0x1000) | 1); csTraits.setRingHead(pLRCABase, ringHead); csTraits.setRingTail(pLRCABase, ringTail); csTraits.setRingBase(pLRCABase, ringBase); csTraits.setRingCtrl(pLRCABase, ringCtrl); } // Write our LRCA { engineInfo.ggttLRCA = gttRemap->map(engineInfo.pLRCA, sizeLRCA); auto lrcAddressPhys = ggtt->map(engineInfo.ggttLRCA, sizeLRCA, this->getGTTBits(), this->getMemoryBankForGtt()); { std::ostringstream str; str << "ggtt: " << std::hex << std::showbase << engineInfo.ggttLRCA; getAubStream()->addComment(str.str().c_str()); } AubGTTData data = {0}; this->getGTTData(reinterpret_cast(lrcAddressPhys), data); AUB::reserveAddressGGTT(*stream, engineInfo.ggttLRCA, sizeLRCA, lrcAddressPhys, data); AUB::addMemoryWrite( *stream, lrcAddressPhys, pLRCABase, sizeLRCA, this->getAddressSpace(csTraits.aubHintLRCA), csTraits.aubHintLRCA); } // Create a context to facilitate AUB dumping of memory using PPGTT addContextToken(getDumpHandle()); DEBUG_BREAK_IF(!engineInfo.pLRCA); } template CommandStreamReceiver *AUBCommandStreamReceiverHw::create(const std::string &fileName, bool standalone, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) { auto csr = std::make_unique>(fileName, standalone, executionEnvironment, rootDeviceIndex); if (!csr->subCaptureManager->isSubCaptureMode()) { csr->openFile(fileName); } return csr.release(); } template bool AUBCommandStreamReceiverHw::flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) { if (subCaptureManager->isSubCaptureMode()) { if (!subCaptureManager->isSubCaptureEnabled()) { if (this->standalone) { *this->tagAddress = this->peekLatestSentTaskCount(); } return true; } } initializeEngine(); // Write our batch buffer auto pBatchBuffer = ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.startOffset); auto batchBufferGpuAddress = ptrOffset(batchBuffer.commandBufferAllocation->getGpuAddress(), batchBuffer.startOffset); auto currentOffset = batchBuffer.usedSize; DEBUG_BREAK_IF(currentOffset < batchBuffer.startOffset); auto sizeBatchBuffer = currentOffset - batchBuffer.startOffset; std::unique_ptr> flatBatchBuffer( nullptr, [&](GraphicsAllocation *ptr) { this->getMemoryManager()->freeGraphicsMemory(ptr); }); if (DebugManager.flags.FlattenBatchBufferForAUBDump.get()) { flatBatchBuffer.reset(this->flatBatchBufferHelper->flattenBatchBuffer(this->rootDeviceIndex, batchBuffer, sizeBatchBuffer, this->dispatchMode)); if (flatBatchBuffer.get() != nullptr) { pBatchBuffer = flatBatchBuffer->getUnderlyingBuffer(); batchBufferGpuAddress = flatBatchBuffer->getGpuAddress(); batchBuffer.commandBufferAllocation = flatBatchBuffer.get(); } } allocationsForResidency.push_back(batchBuffer.commandBufferAllocation); processResidency(allocationsForResidency, 0u); if (!this->standalone || DebugManager.flags.FlattenBatchBufferForAUBDump.get()) { allocationsForResidency.pop_back(); } submitBatchBuffer(batchBufferGpuAddress, pBatchBuffer, sizeBatchBuffer, this->getMemoryBank(batchBuffer.commandBufferAllocation), this->getPPGTTAdditionalBits(batchBuffer.commandBufferAllocation)); if (this->standalone) { *this->tagAddress = this->peekLatestSentTaskCount(); } if (subCaptureManager->isSubCaptureMode()) { pollForCompletion(); subCaptureManager->disableSubCapture(); } if (DebugManager.flags.FlattenBatchBufferForAUBDump.get()) { pollForCompletion(); } getAubStream()->flush(); return true; } template bool AUBCommandStreamReceiverHw::addPatchInfoComments() { std::map allocationsMap; std::ostringstream str; str << "PatchInfoData" << std::endl; for (auto &patchInfoData : this->flatBatchBufferHelper->getPatchInfoCollection()) { str << std::hex << patchInfoData.sourceAllocation << ";"; str << std::hex << patchInfoData.sourceAllocationOffset << ";"; str << std::hex << patchInfoData.sourceType << ";"; str << std::hex << patchInfoData.targetAllocation << ";"; str << std::hex << patchInfoData.targetAllocationOffset << ";"; str << std::hex << patchInfoData.targetType << ";"; str << std::endl; if (patchInfoData.sourceAllocation) { allocationsMap.insert(std::pair(patchInfoData.sourceAllocation, ppgtt->map(static_cast(patchInfoData.sourceAllocation), 1, 0, MemoryBanks::MainBank))); } if (patchInfoData.targetAllocation) { allocationsMap.insert(std::pair(patchInfoData.targetAllocation, ppgtt->map(static_cast(patchInfoData.targetAllocation), 1, 0, MemoryBanks::MainBank))); } } bool result = getAubStream()->addComment(str.str().c_str()); this->flatBatchBufferHelper->getPatchInfoCollection().clear(); if (!result) { return false; } std::ostringstream allocationStr; allocationStr << "AllocationsList" << std::endl; for (auto &element : allocationsMap) { allocationStr << std::hex << element.first << ";" << element.second << std::endl; } result = getAubStream()->addComment(allocationStr.str().c_str()); if (!result) { return false; } return true; } template void AUBCommandStreamReceiverHw::submitBatchBuffer(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits) { auto streamLocked = getAubStream()->lockStream(); if (hardwareContextController) { if (batchBufferSize) { hardwareContextController->submit(batchBufferGpuAddress, batchBuffer, batchBufferSize, memoryBank, MemoryConstants::pageSize64k, false); } return; } auto csTraits = this->getCsTraits(osContext->getEngineType()); { { std::ostringstream str; str << "ppgtt: " << std::hex << std::showbase << batchBuffer; getAubStream()->addComment(str.str().c_str()); } auto physBatchBuffer = ppgtt->map(static_cast(batchBufferGpuAddress), batchBufferSize, entryBits, memoryBank); AubHelperHw aubHelperHw(this->isLocalMemoryEnabled()); AUB::reserveAddressPPGTT(*stream, static_cast(batchBufferGpuAddress), batchBufferSize, physBatchBuffer, entryBits, aubHelperHw); AUB::addMemoryWrite( *stream, physBatchBuffer, batchBuffer, batchBufferSize, this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceBatchBufferPrimary), AubMemDump::DataTypeHintValues::TraceBatchBufferPrimary); } if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { addGUCStartMessage(static_cast(reinterpret_cast(batchBuffer))); addPatchInfoComments(); } // Add a batch buffer start to the ring buffer auto previousTail = engineInfo.tailRingBuffer; { typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; typedef typename GfxFamily::MI_NOOP MI_NOOP; auto pTail = ptrOffset(engineInfo.pRingBuffer, engineInfo.tailRingBuffer); auto ggttTail = ptrOffset(engineInfo.ggttRingBuffer, engineInfo.tailRingBuffer); auto sizeNeeded = sizeof(MI_BATCH_BUFFER_START) + sizeof(MI_LOAD_REGISTER_IMM); auto tailAlignment = sizeof(uint64_t); sizeNeeded = alignUp(sizeNeeded, tailAlignment); if (engineInfo.tailRingBuffer + sizeNeeded >= engineInfo.sizeRingBuffer) { // Pad the remaining ring with NOOPs auto sizeToWrap = engineInfo.sizeRingBuffer - engineInfo.tailRingBuffer; memset(pTail, 0, sizeToWrap); // write remaining ring auto physDumpStart = ggtt->map(ggttTail, sizeToWrap, this->getGTTBits(), this->getMemoryBankForGtt()); AUB::addMemoryWrite( *stream, physDumpStart, pTail, sizeToWrap, this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceCommandBuffer), AubMemDump::DataTypeHintValues::TraceCommandBuffer); previousTail = 0; engineInfo.tailRingBuffer = 0; pTail = engineInfo.pRingBuffer; } else if (engineInfo.tailRingBuffer == 0) { // Add a LRI if this is our first submission auto lri = GfxFamily::cmdInitLoadRegisterImm; lri.setRegisterOffset(AubMemDump::computeRegisterOffset(csTraits.mmioBase, 0x2244)); lri.setDataDword(0x00010000); *(MI_LOAD_REGISTER_IMM *)pTail = lri; pTail = ((MI_LOAD_REGISTER_IMM *)pTail) + 1; } // Add our BBS auto bbs = GfxFamily::cmdInitBatchBufferStart; bbs.setBatchBufferStartAddressGraphicsaddress472(static_cast(batchBufferGpuAddress)); bbs.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT); *(MI_BATCH_BUFFER_START *)pTail = bbs; pTail = ((MI_BATCH_BUFFER_START *)pTail) + 1; // Compute our new ring tail. engineInfo.tailRingBuffer = (uint32_t)ptrDiff(pTail, engineInfo.pRingBuffer); // Add NOOPs as needed as our tail needs to be aligned while (engineInfo.tailRingBuffer % tailAlignment) { *(MI_NOOP *)pTail = GfxFamily::cmdInitNoop; pTail = ((MI_NOOP *)pTail) + 1; engineInfo.tailRingBuffer = (uint32_t)ptrDiff(pTail, engineInfo.pRingBuffer); } UNRECOVERABLE_IF((engineInfo.tailRingBuffer % tailAlignment) != 0); // Only dump the new commands auto ggttDumpStart = ptrOffset(engineInfo.ggttRingBuffer, previousTail); auto dumpStart = ptrOffset(engineInfo.pRingBuffer, previousTail); auto dumpLength = engineInfo.tailRingBuffer - previousTail; // write ring { std::ostringstream str; str << "ggtt: " << std::hex << std::showbase << ggttDumpStart; getAubStream()->addComment(str.str().c_str()); } auto physDumpStart = ggtt->map(ggttDumpStart, dumpLength, this->getGTTBits(), this->getMemoryBankForGtt()); AUB::addMemoryWrite( *stream, physDumpStart, dumpStart, dumpLength, this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceCommandBuffer), AubMemDump::DataTypeHintValues::TraceCommandBuffer); // update the ring mmio tail in the LRCA { std::ostringstream str; str << "ggtt: " << std::hex << std::showbase << engineInfo.ggttLRCA + 0x101c; getAubStream()->addComment(str.str().c_str()); } auto physLRCA = ggtt->map(engineInfo.ggttLRCA, sizeof(engineInfo.tailRingBuffer), this->getGTTBits(), this->getMemoryBankForGtt()); AUB::addMemoryWrite( *stream, physLRCA + 0x101c, &engineInfo.tailRingBuffer, sizeof(engineInfo.tailRingBuffer), this->getAddressSpace(csTraits.aubHintLRCA)); DEBUG_BREAK_IF(engineInfo.tailRingBuffer >= engineInfo.sizeRingBuffer); } // Submit our execlist by submitting to the execlist submit ports { typename AUB::MiContextDescriptorReg contextDescriptor = {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; contextDescriptor.sData.Valid = true; contextDescriptor.sData.ForcePageDirRestore = false; contextDescriptor.sData.ForceRestore = false; contextDescriptor.sData.Legacy = true; contextDescriptor.sData.FaultSupport = 0; contextDescriptor.sData.PrivilegeAccessOrPPGTT = true; contextDescriptor.sData.ADor64bitSupport = AUB::Traits::addressingBits > 32; auto ggttLRCA = engineInfo.ggttLRCA; contextDescriptor.sData.LogicalRingCtxAddress = ggttLRCA / 4096; contextDescriptor.sData.ContextID = 0; this->submitLRCA(contextDescriptor); } } template void AUBCommandStreamReceiverHw::pollForCompletion() { const auto lock = std::unique_lock{pollForCompletionLock}; if (this->pollForCompletionTaskCount == this->latestSentTaskCount) { return; } pollForCompletionImpl(); } template void AUBCommandStreamReceiverHw::pollForCompletionImpl() { this->pollForCompletionTaskCount = this->latestSentTaskCount; if (subCaptureManager->isSubCaptureMode()) { if (!subCaptureManager->isSubCaptureEnabled()) { return; } } auto streamLocked = getAubStream()->lockStream(); if (hardwareContextController) { hardwareContextController->pollForCompletion(); return; } const auto mmioBase = this->getCsTraits(osContext->getEngineType()).mmioBase; const bool pollNotEqual = false; const uint32_t mask = getMaskAndValueForPollForCompletion(); const uint32_t value = mask; stream->registerPoll( AubMemDump::computeRegisterOffset(mmioBase, 0x2234), //EXECLIST_STATUS mask, value, pollNotEqual, AubMemDump::CmdServicesMemTraceRegisterPoll::TimeoutActionValues::Abort); } template inline void AUBCommandStreamReceiverHw::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) { CommandStreamReceiverSimulatedHw::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode); pollForCompletion(); } template void AUBCommandStreamReceiverHw::makeResidentExternal(AllocationView &allocationView) { externalAllocations.push_back(allocationView); } template void AUBCommandStreamReceiverHw::makeNonResidentExternal(uint64_t gpuAddress) { for (auto it = externalAllocations.begin(); it != externalAllocations.end(); it++) { if (it->first == gpuAddress) { externalAllocations.erase(it); break; } } } template void AUBCommandStreamReceiverHw::writeMemory(uint64_t gpuAddress, void *cpuAddress, size_t size, uint32_t memoryBank, uint64_t entryBits) { { std::ostringstream str; str << "ppgtt: " << std::hex << std::showbase << gpuAddress << " end address: " << gpuAddress + size << " cpu address: " << cpuAddress << " size: " << std::dec << size; getAubStream()->addComment(str.str().c_str()); } AubHelperHw aubHelperHw(this->isLocalMemoryEnabled()); PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset, uint64_t entryBits) { AUB::reserveAddressGGTTAndWriteMmeory(*stream, static_cast(gpuAddress), cpuAddress, physAddress, size, offset, entryBits, aubHelperHw); }; ppgtt->pageWalk(static_cast(gpuAddress), size, 0, entryBits, walker, memoryBank); } template bool AUBCommandStreamReceiverHw::writeMemory(GraphicsAllocation &gfxAllocation) { if (!this->isAubWritable(gfxAllocation)) { return false; } bool ownsLock = !gfxAllocation.isLocked(); uint64_t gpuAddress; void *cpuAddress; size_t size; if (!this->getParametersForWriteMemory(gfxAllocation, gpuAddress, cpuAddress, size)) { return false; } auto streamLocked = getAubStream()->lockStream(); if (aubManager) { this->writeMemoryWithAubManager(gfxAllocation); } else { writeMemory(gpuAddress, cpuAddress, size, this->getMemoryBank(&gfxAllocation), this->getPPGTTAdditionalBits(&gfxAllocation)); } streamLocked.unlock(); if (gfxAllocation.isLocked() && ownsLock) { this->getMemoryManager()->unlockResource(&gfxAllocation); } if (AubHelper::isOneTimeAubWritableAllocationType(gfxAllocation.getAllocationType())) { this->setAubWritable(false, gfxAllocation); } return true; } template bool AUBCommandStreamReceiverHw::writeMemory(AllocationView &allocationView) { GraphicsAllocation gfxAllocation(this->rootDeviceIndex, GraphicsAllocation::AllocationType::UNKNOWN, reinterpret_cast(allocationView.first), allocationView.first, 0llu, allocationView.second, MemoryPool::MemoryNull); return writeMemory(gfxAllocation); } template void AUBCommandStreamReceiverHw::expectMMIO(uint32_t mmioRegister, uint32_t expectedValue) { if (hardwareContextController) { //Add support for expectMMIO to AubStream return; } this->getAubStream()->expectMMIO(mmioRegister, expectedValue); } template bool AUBCommandStreamReceiverHw::expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation) { pollForCompletion(); auto streamLocked = getAubStream()->lockStream(); if (hardwareContextController) { hardwareContextController->expectMemory(reinterpret_cast(gfxAddress), srcAddress, length, compareOperation); } PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset, uint64_t entryBits) { UNRECOVERABLE_IF(offset > length); this->getAubStream()->expectMemory(physAddress, ptrOffset(srcAddress, offset), size, this->getAddressSpaceFromPTEBits(entryBits), compareOperation); }; this->ppgtt->pageWalk(reinterpret_cast(gfxAddress), length, 0, PageTableEntry::nonValidBits, walker, MemoryBanks::BankNotSpecified); return true; } template void AUBCommandStreamReceiverHw::processResidency(const ResidencyContainer &allocationsForResidency, uint32_t handleId) { if (subCaptureManager->isSubCaptureMode()) { if (!subCaptureManager->isSubCaptureEnabled()) { return; } } for (auto &externalAllocation : externalAllocations) { if (!writeMemory(externalAllocation)) { DEBUG_BREAK_IF(externalAllocation.second != 0); } } for (auto &gfxAllocation : allocationsForResidency) { if (dumpAubNonWritable) { this->setAubWritable(true, *gfxAllocation); } if (!writeMemory(*gfxAllocation)) { DEBUG_BREAK_IF(!((gfxAllocation->getUnderlyingBufferSize() == 0) || !this->isAubWritable(*gfxAllocation))); } gfxAllocation->updateResidencyTaskCount(this->taskCount + 1, this->osContext->getContextId()); } dumpAubNonWritable = false; } template void AUBCommandStreamReceiverHw::dumpAllocation(GraphicsAllocation &gfxAllocation) { if (EngineHelpers::isBcs(this->osContext->getEngineType())) { return; } if (DebugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.get()) { if (!gfxAllocation.isAllocDumpable()) { return; } gfxAllocation.setAllocDumpable(false); } auto dumpFormat = AubAllocDump::getDumpFormat(gfxAllocation); if (dumpFormat > AubAllocDump::DumpFormat::NONE) { pollForCompletion(); } auto streamLocked = getAubStream()->lockStream(); if (hardwareContextController) { auto surfaceInfo = std::unique_ptr(AubAllocDump::getDumpSurfaceInfo(gfxAllocation, dumpFormat)); if (nullptr != surfaceInfo) { hardwareContextController->dumpSurface(*surfaceInfo.get()); } return; } AubAllocDump::dumpAllocation(dumpFormat, gfxAllocation, getAubStream(), getDumpHandle()); } template void AUBCommandStreamReceiverHw::makeNonResident(GraphicsAllocation &gfxAllocation) { if (gfxAllocation.isResident(this->osContext->getContextId())) { dumpAllocation(gfxAllocation); this->getEvictionAllocations().push_back(&gfxAllocation); gfxAllocation.releaseResidencyInOsContext(this->osContext->getContextId()); } } template AubSubCaptureStatus AUBCommandStreamReceiverHw::checkAndActivateAubSubCapture(const MultiDispatchInfo &dispatchInfo) { auto status = subCaptureManager->checkAndActivateSubCapture(dispatchInfo); if (status.isActive) { std::string subCaptureFile = subCaptureManager->getSubCaptureFileName(dispatchInfo); auto isReopened = reopenFile(subCaptureFile); if (isReopened) { dumpAubNonWritable = true; } } if (this->standalone) { this->programForAubSubCapture(status.wasActiveInPreviousEnqueue, status.isActive); } return status; } template void AUBCommandStreamReceiverHw::addAubComment(const char *message) { auto streamLocked = getAubStream()->lockStream(); if (aubManager) { aubManager->addComment(message); return; } getAubStream()->addComment(message); } template uint32_t AUBCommandStreamReceiverHw::getDumpHandle() { return hashPtrToU32(this); } template void AUBCommandStreamReceiverHw::addGUCStartMessage(uint64_t batchBufferAddress) { typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; auto bufferSize = sizeof(uint32_t) + sizeof(MI_BATCH_BUFFER_START); AubHelperHw aubHelperHw(this->isLocalMemoryEnabled()); std::unique_ptr> buffer(this->getMemoryManager()->alignedMallocWrapper(bufferSize, MemoryConstants::pageSize), [&](void *ptr) { this->getMemoryManager()->alignedFreeWrapper(ptr); }); LinearStream linearStream(buffer.get(), bufferSize); uint32_t *header = static_cast(linearStream.getSpace(sizeof(uint32_t))); *header = getGUCWorkQueueItemHeader(); MI_BATCH_BUFFER_START *miBatchBufferStart = linearStream.getSpaceForCmd(); DEBUG_BREAK_IF(bufferSize != linearStream.getUsed()); *miBatchBufferStart = GfxFamily::cmdInitBatchBufferStart; miBatchBufferStart->setBatchBufferStartAddressGraphicsaddress472(AUB::ptrToPPGTT(buffer.get())); miBatchBufferStart->setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT); auto physBufferAddres = ppgtt->map(reinterpret_cast(buffer.get()), bufferSize, this->getPPGTTAdditionalBits(linearStream.getGraphicsAllocation()), MemoryBanks::MainBank); AUB::reserveAddressPPGTT(*stream, reinterpret_cast(buffer.get()), bufferSize, physBufferAddres, this->getPPGTTAdditionalBits(linearStream.getGraphicsAllocation()), aubHelperHw); AUB::addMemoryWrite( *stream, physBufferAddres, buffer.get(), bufferSize, this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceNotype)); PatchInfoData patchInfoData(batchBufferAddress, 0u, PatchInfoAllocationType::Default, reinterpret_cast(buffer.get()), sizeof(uint32_t) + sizeof(MI_BATCH_BUFFER_START) - sizeof(uint64_t), PatchInfoAllocationType::GUCStartMessage); this->flatBatchBufferHelper->setPatchInfoData(patchInfoData); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_stream/aub_command_stream_receiver_hw_bdw_plus.inl000066400000000000000000000017331363734646600337370ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_stream/aub_command_stream_receiver_hw_base.inl" namespace NEO { template constexpr uint32_t AUBCommandStreamReceiverHw::getMaskAndValueForPollForCompletion() { return 0x100; } template void AUBCommandStreamReceiverHw::addContextToken(uint32_t dumpHandle) { // Some simulator versions don't support adding the context token. // This hook allows specialization for those that do. } template uint32_t AUBCommandStreamReceiverHw::getGUCWorkQueueItemHeader() { uint32_t GUCWorkQueueItemHeader = 0x00030001; return GUCWorkQueueItemHeader; } template int AUBCommandStreamReceiverHw::getAddressSpaceFromPTEBits(uint64_t entryBits) const { return AubMemDump::AddressSpaceValues::TraceNonlocal; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_stream/aub_stream_provider.h000066400000000000000000000010611363734646600273310ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/aub_mem_dump/aub_mem_dump.h" #include namespace NEO { class AubStreamProvider { public: virtual ~AubStreamProvider() = default; virtual AubMemDump::AubFileStream *getStream() = 0; }; class AubFileStreamProvider : public AubStreamProvider { public: AubMemDump::AubFileStream *getStream() override { return &stream; }; protected: AubMemDump::AubFileStream stream; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_stream/aub_subcapture.cpp000066400000000000000000000137141363734646600266440ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_stream/aub_subcapture.h" #include "shared/source/utilities/debug_settings_reader.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/os_interface/ocl_reg_path.h" namespace NEO { AubSubCaptureManager::AubSubCaptureManager(const std::string &fileName, AubSubCaptureCommon &subCaptureCommon) : initialFileName(fileName), subCaptureCommon(subCaptureCommon) { settingsReader.reset(SettingsReader::createOsReader(true, oclRegPath)); } AubSubCaptureManager::~AubSubCaptureManager() = default; bool AubSubCaptureManager::isSubCaptureEnabled() const { auto guard = this->lock(); return subCaptureIsActive || subCaptureWasActiveInPreviousEnqueue; } void AubSubCaptureManager::disableSubCapture() { auto guard = this->lock(); subCaptureIsActive = subCaptureWasActiveInPreviousEnqueue = false; }; AubSubCaptureStatus AubSubCaptureManager::checkAndActivateSubCapture(const MultiDispatchInfo &dispatchInfo) { if (dispatchInfo.empty()) { return {false, false}; } auto guard = this->lock(); kernelCurrentIdx = subCaptureCommon.getKernelCurrentIndexAndIncrement(); subCaptureWasActiveInPreviousEnqueue = subCaptureIsActive; subCaptureIsActive = false; switch (subCaptureCommon.subCaptureMode) { case SubCaptureMode::Toggle: subCaptureIsActive = isSubCaptureToggleActive(); break; case SubCaptureMode::Filter: subCaptureIsActive = isSubCaptureFilterActive(dispatchInfo); break; default: DEBUG_BREAK_IF(false); break; } return {subCaptureIsActive, subCaptureWasActiveInPreviousEnqueue}; } AubSubCaptureStatus AubSubCaptureManager::getSubCaptureStatus() const { auto guard = this->lock(); return {this->subCaptureIsActive, this->subCaptureWasActiveInPreviousEnqueue}; } const std::string &AubSubCaptureManager::getSubCaptureFileName(const MultiDispatchInfo &dispatchInfo) { auto guard = this->lock(); if (useToggleFileName) { currentFileName = getToggleFileName(); } if (currentFileName.empty()) { currentFileName = getAubCaptureFileName(); useToggleFileName = false; } switch (subCaptureCommon.subCaptureMode) { case SubCaptureMode::Filter: if (currentFileName.empty()) { currentFileName = generateFilterFileName(); useToggleFileName = false; } break; case SubCaptureMode::Toggle: if (currentFileName.empty()) { currentFileName = generateToggleFileName(dispatchInfo); useToggleFileName = false; } break; default: DEBUG_BREAK_IF(false); break; } return currentFileName; } bool AubSubCaptureManager::isKernelIndexInSubCaptureRange(uint32_t kernelIdx, uint32_t rangeStartIdx, uint32_t rangeEndIdx) const { return ((rangeStartIdx <= kernelIdx) && (kernelIdx <= rangeEndIdx)); } bool AubSubCaptureManager::isSubCaptureToggleActive() const { return settingsReader->getSetting("AUBDumpToggleCaptureOnOff", false); } std::string AubSubCaptureManager::getToggleFileName() const { return settingsReader->getSetting("AUBDumpToggleFileName", std::string("")); } std::string AubSubCaptureManager::getAubCaptureFileName() const { if (DebugManager.flags.AUBDumpCaptureFileName.get() != "unk") { return DebugManager.flags.AUBDumpCaptureFileName.get(); } return {}; } std::string AubSubCaptureManager::generateFilterFileName() const { std::string baseFileName = initialFileName.substr(0, initialFileName.length() - strlen(".aub")); std::string filterFileName = baseFileName + "_filter"; filterFileName += "_from_" + std::to_string(subCaptureCommon.subCaptureFilter.dumpKernelStartIdx); filterFileName += "_to_" + std::to_string(subCaptureCommon.subCaptureFilter.dumpKernelEndIdx); if (!subCaptureCommon.subCaptureFilter.dumpKernelName.empty()) { filterFileName += "_" + subCaptureCommon.subCaptureFilter.dumpKernelName; filterFileName += "_from_" + std::to_string(subCaptureCommon.subCaptureFilter.dumpNamedKernelStartIdx); filterFileName += "_to_" + std::to_string(subCaptureCommon.subCaptureFilter.dumpNamedKernelEndIdx); } filterFileName += ".aub"; return filterFileName; } std::string AubSubCaptureManager::generateToggleFileName(const MultiDispatchInfo &dispatchInfo) const { std::string baseFileName = initialFileName.substr(0, initialFileName.length() - strlen(".aub")); std::string toggleFileName = baseFileName + "_toggle"; toggleFileName += "_from_" + std::to_string(kernelCurrentIdx); if (!dispatchInfo.empty()) { toggleFileName += "_" + dispatchInfo.peekMainKernel()->getKernelInfo().name; } toggleFileName += ".aub"; return toggleFileName; } bool AubSubCaptureManager::isSubCaptureFilterActive(const MultiDispatchInfo &dispatchInfo) { auto kernelName = dispatchInfo.peekMainKernel()->getKernelInfo().name; auto subCaptureIsActive = false; if (subCaptureCommon.subCaptureFilter.dumpKernelName.empty()) { if (isKernelIndexInSubCaptureRange(kernelCurrentIdx, subCaptureCommon.subCaptureFilter.dumpKernelStartIdx, subCaptureCommon.subCaptureFilter.dumpKernelEndIdx)) { subCaptureIsActive = true; } } else { if (0 == kernelName.compare(subCaptureCommon.subCaptureFilter.dumpKernelName)) { kernelNameMatchesNum = subCaptureCommon.getKernelNameMatchesNumAndIncrement(); if (isKernelIndexInSubCaptureRange(kernelNameMatchesNum, subCaptureCommon.subCaptureFilter.dumpNamedKernelStartIdx, subCaptureCommon.subCaptureFilter.dumpNamedKernelEndIdx)) { subCaptureIsActive = true; } } } return subCaptureIsActive; } std::unique_lock AubSubCaptureManager::lock() const { return std::unique_lock{mutex}; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_stream/aub_subcapture.h000066400000000000000000000056421363734646600263120ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/aub_subcapture_status.h" #include #include #include #include namespace NEO { struct MultiDispatchInfo; class SettingsReader; class AubSubCaptureCommon { public: enum class SubCaptureMode { Off = 0, //subcapture off Filter, //subcapture kernel specified by filter (static regkey) Toggle //toggle subcapture on/off (dynamic regkey) } subCaptureMode = SubCaptureMode::Off; struct SubCaptureFilter { std::string dumpKernelName = ""; uint32_t dumpNamedKernelStartIdx = 0; uint32_t dumpNamedKernelEndIdx = static_cast(-1); uint32_t dumpKernelStartIdx = 0; uint32_t dumpKernelEndIdx = static_cast(-1); } subCaptureFilter; inline uint32_t getKernelCurrentIndexAndIncrement() { return kernelCurrentIdx.fetch_add(1); } inline uint32_t getKernelNameMatchesNumAndIncrement() { return kernelNameMatchesNum.fetch_add(1); } protected: std::atomic kernelCurrentIdx{0}; std::atomic kernelNameMatchesNum{0}; }; class AubSubCaptureManager { public: using SubCaptureMode = AubSubCaptureCommon::SubCaptureMode; using SubCaptureFilter = AubSubCaptureCommon::SubCaptureFilter; inline bool isSubCaptureMode() const { return subCaptureCommon.subCaptureMode > SubCaptureMode::Off; } bool isSubCaptureEnabled() const; void disableSubCapture(); AubSubCaptureStatus checkAndActivateSubCapture(const MultiDispatchInfo &dispatchInfo); AubSubCaptureStatus getSubCaptureStatus() const; const std::string &getSubCaptureFileName(const MultiDispatchInfo &dispatchInfo); AubSubCaptureManager(const std::string &fileName, AubSubCaptureCommon &subCaptureCommon); virtual ~AubSubCaptureManager(); protected: MOCKABLE_VIRTUAL bool isSubCaptureToggleActive() const; bool isSubCaptureFilterActive(const MultiDispatchInfo &dispatchInfo); MOCKABLE_VIRTUAL std::string getAubCaptureFileName() const; MOCKABLE_VIRTUAL std::string getToggleFileName() const; MOCKABLE_VIRTUAL std::string generateFilterFileName() const; MOCKABLE_VIRTUAL std::string generateToggleFileName(const MultiDispatchInfo &dispatchInfo) const; bool isKernelIndexInSubCaptureRange(uint32_t kernelIdx, uint32_t rangeStartIdx, uint32_t rangeEndIdx) const; MOCKABLE_VIRTUAL std::unique_lock lock() const; bool subCaptureIsActive = false; bool subCaptureWasActiveInPreviousEnqueue = false; uint32_t kernelCurrentIdx = 0; uint32_t kernelNameMatchesNum = 0; bool useToggleFileName = true; std::string initialFileName; std::string currentFileName; std::unique_ptr settingsReader; AubSubCaptureCommon &subCaptureCommon; mutable std::mutex mutex; }; } // namespace NEO command_stream_receiver_simulated_common_hw.h000066400000000000000000000062421363734646600342160ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/command_stream/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "opencl/source/gen_common/aub_mapper.h" #include "opencl/source/memory_manager/memory_banks.h" #include "third_party/aub_stream/headers/hardware_context.h" namespace aub_stream { class AubManager; struct AubStream; } // namespace aub_stream namespace NEO { class AddressMapper; class GraphicsAllocation; class HardwareContextController; template class CommandStreamReceiverSimulatedCommonHw : public CommandStreamReceiverHw { protected: using CommandStreamReceiverHw::osContext; using AUB = typename AUBFamilyMapper::AUB; using MiContextDescriptorReg = typename AUB::MiContextDescriptorReg; bool getParametersForWriteMemory(GraphicsAllocation &graphicsAllocation, uint64_t &gpuAddress, void *&cpuAddress, size_t &size) const; void freeEngineInfo(AddressMapper >tRemap); MOCKABLE_VIRTUAL uint32_t getDeviceIndex() const; public: CommandStreamReceiverSimulatedCommonHw(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex); ~CommandStreamReceiverSimulatedCommonHw() override; uint64_t getGTTBits() const { return 0u; } void initGlobalMMIO(); void initAdditionalMMIO(); uint64_t getPPGTTAdditionalBits(GraphicsAllocation *gfxAllocation); void getGTTData(void *memory, AubGTTData &data); uint32_t getMemoryBankForGtt() const; static const AubMemDump::LrcaHelper &getCsTraits(aub_stream::EngineType engineType); void initEngineMMIO(); void submitLRCA(const MiContextDescriptorReg &contextDescriptor); void setupContext(OsContext &osContext) override; virtual void expectMemoryEqual(void *gfxAddress, const void *srcAddress, size_t length); virtual void expectMemoryNotEqual(void *gfxAddress, const void *srcAddress, size_t length); virtual void pollForCompletion() = 0; virtual void pollForCompletionImpl(){}; virtual bool writeMemory(GraphicsAllocation &gfxAllocation) = 0; virtual void writeMemory(uint64_t gpuAddress, void *cpuAddress, size_t size, uint32_t memoryBank, uint64_t entryBits) = 0; virtual void writeMemoryWithAubManager(GraphicsAllocation &graphicsAllocation) = 0; virtual void setAubWritable(bool writable, GraphicsAllocation &graphicsAllocation) = 0; virtual bool isAubWritable(GraphicsAllocation &graphicsAllocation) const = 0; virtual void setTbxWritable(bool writable, GraphicsAllocation &graphicsAllocation) = 0; virtual bool isTbxWritable(GraphicsAllocation &graphicsAllocation) const = 0; size_t getPreferredTagPoolSize() const override { return 1; } aub_stream::AubManager *aubManager = nullptr; std::unique_ptr hardwareContextController; struct EngineInfo { void *pLRCA; uint32_t ggttLRCA; void *pGlobalHWStatusPage; uint32_t ggttHWSP; void *pRingBuffer; uint32_t ggttRingBuffer; size_t sizeRingBuffer; uint32_t tailRingBuffer; } engineInfo = {}; AubMemDump::AubStream *stream; }; } // namespace NEO command_stream_receiver_simulated_common_hw_base.inl000066400000000000000000000102631363734646600355410ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/command_stream/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/os_context.h" #include "opencl/source/aub/aub_helper.h" #include "opencl/source/aub_mem_dump/page_table_entry_bits.h" #include "opencl/source/command_stream/command_stream_receiver_simulated_common_hw.h" #include "opencl/source/helpers/hardware_context_controller.h" #include "opencl/source/memory_manager/address_mapper.h" #include "third_party/aub_stream/headers/aub_manager.h" namespace NEO { template void CommandStreamReceiverSimulatedCommonHw::initAdditionalMMIO() { if (DebugManager.flags.AubDumpAddMmioRegistersList.get() != "unk") { auto mmioList = AubHelper::getAdditionalMmioList(); for (auto &mmioPair : mmioList) { stream->writeMMIO(mmioPair.first, mmioPair.second); } } } template void CommandStreamReceiverSimulatedCommonHw::setupContext(OsContext &osContext) { CommandStreamReceiverHw::setupContext(osContext); auto engineType = osContext.getEngineType(); uint32_t flags = 0; getCsTraits(engineType).setContextSaveRestoreFlags(flags); if (aubManager && !osContext.isLowPriority()) { hardwareContextController = std::make_unique(*aubManager, osContext, flags); } } template bool CommandStreamReceiverSimulatedCommonHw::getParametersForWriteMemory(GraphicsAllocation &graphicsAllocation, uint64_t &gpuAddress, void *&cpuAddress, size_t &size) const { cpuAddress = graphicsAllocation.getUnderlyingBuffer(); gpuAddress = GmmHelper::decanonize(graphicsAllocation.getGpuAddress()); size = graphicsAllocation.getUnderlyingBufferSize(); auto gmm = graphicsAllocation.getDefaultGmm(); if (gmm && gmm->isRenderCompressed) { size = gmm->gmmResourceInfo->getSizeAllocation(); } if (size == 0) return false; if (cpuAddress == nullptr) { cpuAddress = this->getMemoryManager()->lockResource(&graphicsAllocation); } return true; } template void CommandStreamReceiverSimulatedCommonHw::expectMemoryEqual(void *gfxAddress, const void *srcAddress, size_t length) { this->expectMemory(gfxAddress, srcAddress, length, AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual); } template void CommandStreamReceiverSimulatedCommonHw::expectMemoryNotEqual(void *gfxAddress, const void *srcAddress, size_t length) { this->expectMemory(gfxAddress, srcAddress, length, AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareNotEqual); } template void CommandStreamReceiverSimulatedCommonHw::freeEngineInfo(AddressMapper >tRemap) { alignedFree(engineInfo.pLRCA); gttRemap.unmap(engineInfo.pLRCA); engineInfo.pLRCA = nullptr; alignedFree(engineInfo.pGlobalHWStatusPage); gttRemap.unmap(engineInfo.pGlobalHWStatusPage); engineInfo.pGlobalHWStatusPage = nullptr; alignedFree(engineInfo.pRingBuffer); gttRemap.unmap(engineInfo.pRingBuffer); engineInfo.pRingBuffer = nullptr; } template uint32_t CommandStreamReceiverSimulatedCommonHw::getDeviceIndex() const { return osContext->getDeviceBitfield().any() ? static_cast(Math::log2(static_cast(osContext->getDeviceBitfield().to_ulong()))) : 0u; } template CommandStreamReceiverSimulatedCommonHw::CommandStreamReceiverSimulatedCommonHw(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) : CommandStreamReceiverHw(executionEnvironment, rootDeviceIndex) {} template CommandStreamReceiverSimulatedCommonHw::~CommandStreamReceiverSimulatedCommonHw() = default; } // namespace NEO command_stream_receiver_simulated_common_hw_bdw_plus.inl000066400000000000000000000043111363734646600364430ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/command_stream/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_stream/command_stream_receiver_simulated_common_hw_base.inl" namespace NEO { template void CommandStreamReceiverSimulatedCommonHw::initGlobalMMIO() { for (auto &mmioPair : AUBFamilyMapper::globalMMIO) { stream->writeMMIO(mmioPair.first, mmioPair.second); } } template uint64_t CommandStreamReceiverSimulatedCommonHw::getPPGTTAdditionalBits(GraphicsAllocation *gfxAllocation) { return BIT(PageTableEntry::presentBit) | BIT(PageTableEntry::writableBit) | BIT(PageTableEntry::userSupervisorBit); } template void CommandStreamReceiverSimulatedCommonHw::getGTTData(void *memory, AubGTTData &data) { data.present = true; data.localMemory = false; } template uint32_t CommandStreamReceiverSimulatedCommonHw::getMemoryBankForGtt() const { return MemoryBanks::getBank(getDeviceIndex()); } template const AubMemDump::LrcaHelper &CommandStreamReceiverSimulatedCommonHw::getCsTraits(aub_stream::EngineType engineType) { return *AUBFamilyMapper::csTraits[engineType]; } template void CommandStreamReceiverSimulatedCommonHw::initEngineMMIO() { auto mmioList = AUBFamilyMapper::perEngineMMIO[osContext->getEngineType()]; DEBUG_BREAK_IF(!mmioList); for (auto &mmioPair : *mmioList) { stream->writeMMIO(mmioPair.first, mmioPair.second); } } template void CommandStreamReceiverSimulatedCommonHw::submitLRCA(const MiContextDescriptorReg &contextDescriptor) { auto mmioBase = getCsTraits(osContext->getEngineType()).mmioBase; stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2230), 0); stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2230), 0); stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2230), contextDescriptor.ulData[1]); stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2230), contextDescriptor.ulData[0]); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_stream/command_stream_receiver_with_aub_dump.h000066400000000000000000000032051363734646600330630ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include namespace NEO { template class CommandStreamReceiverWithAUBDump : public BaseCSR { protected: using BaseCSR::osContext; public: CommandStreamReceiverWithAUBDump(const std::string &baseName, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex); CommandStreamReceiverWithAUBDump(const CommandStreamReceiverWithAUBDump &) = delete; CommandStreamReceiverWithAUBDump &operator=(const CommandStreamReceiverWithAUBDump &) = delete; bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override; void makeNonResident(GraphicsAllocation &gfxAllocation) override; AubSubCaptureStatus checkAndActivateAubSubCapture(const MultiDispatchInfo &dispatchInfo) override; void setupContext(OsContext &osContext) override; CommandStreamReceiverType getType() override { if (BaseCSR::getType() == CommandStreamReceiverType::CSR_TBX) { return CommandStreamReceiverType::CSR_TBX_WITH_AUB; } return CommandStreamReceiverType::CSR_HW_WITH_AUB; } void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override; size_t getPreferredTagPoolSize() const override { return 1; } void addAubComment(const char *comment) override; std::unique_ptr aubCSR; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_stream/command_stream_receiver_with_aub_dump.inl000066400000000000000000000072151363734646600334230ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "opencl/source/aub/aub_center.h" #include "opencl/source/command_stream/aub_command_stream_receiver.h" #include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.h" namespace NEO { extern CommandStreamReceiverCreateFunc commandStreamReceiverFactory[IGFX_MAX_CORE]; template CommandStreamReceiverWithAUBDump::CommandStreamReceiverWithAUBDump(const std::string &baseName, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) : BaseCSR(executionEnvironment, rootDeviceIndex) { bool isAubManager = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->aubCenter && executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->aubCenter->getAubManager(); bool isTbxMode = CommandStreamReceiverType::CSR_TBX == BaseCSR::getType(); bool createAubCsr = (isAubManager && isTbxMode) ? false : true; if (createAubCsr) { aubCSR.reset(AUBCommandStreamReceiver::create(baseName, false, executionEnvironment, rootDeviceIndex)); UNRECOVERABLE_IF(!aubCSR->initializeTagAllocation()); *aubCSR->getTagAddress() = std::numeric_limits::max(); } } template bool CommandStreamReceiverWithAUBDump::flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) { if (aubCSR) { aubCSR->flush(batchBuffer, allocationsForResidency); aubCSR->setLatestSentTaskCount(BaseCSR::peekLatestSentTaskCount()); } return BaseCSR::flush(batchBuffer, allocationsForResidency); } template void CommandStreamReceiverWithAUBDump::makeNonResident(GraphicsAllocation &gfxAllocation) { auto residencyTaskCount = gfxAllocation.getResidencyTaskCount(this->osContext->getContextId()); BaseCSR::makeNonResident(gfxAllocation); if (aubCSR) { gfxAllocation.updateResidencyTaskCount(residencyTaskCount, this->osContext->getContextId()); aubCSR->makeNonResident(gfxAllocation); } } template AubSubCaptureStatus CommandStreamReceiverWithAUBDump::checkAndActivateAubSubCapture(const MultiDispatchInfo &dispatchInfo) { auto status = BaseCSR::checkAndActivateAubSubCapture(dispatchInfo); if (aubCSR) { status = aubCSR->checkAndActivateAubSubCapture(dispatchInfo); } BaseCSR::programForAubSubCapture(status.wasActiveInPreviousEnqueue, status.isActive); return status; } template void CommandStreamReceiverWithAUBDump::setupContext(OsContext &osContext) { BaseCSR::setupContext(osContext); if (aubCSR) { aubCSR->setupContext(osContext); } } template void CommandStreamReceiverWithAUBDump::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) { if (aubCSR) { aubCSR->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode); } BaseCSR::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode); } template void CommandStreamReceiverWithAUBDump::addAubComment(const char *comment) { if (aubCSR) { aubCSR->addAubComment(comment); } BaseCSR::addAubComment(comment); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_stream/create_command_stream_impl.cpp000066400000000000000000000043761363734646600312010ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/device_factory.h" #include "opencl/source/command_stream/aub_command_stream_receiver.h" #include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.h" #include "opencl/source/command_stream/tbx_command_stream_receiver.h" namespace NEO { extern CommandStreamReceiverCreateFunc commandStreamReceiverFactory[IGFX_MAX_CORE]; CommandStreamReceiver *createCommandStreamImpl(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) { auto funcCreate = commandStreamReceiverFactory[executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->platform.eRenderCoreFamily]; if (funcCreate == nullptr) { return nullptr; } CommandStreamReceiver *commandStreamReceiver = nullptr; int32_t csr = DebugManager.flags.SetCommandStreamReceiver.get(); if (csr < 0) { csr = CommandStreamReceiverType::CSR_HW; } switch (csr) { case CSR_HW: commandStreamReceiver = funcCreate(false, executionEnvironment, rootDeviceIndex); break; case CSR_AUB: commandStreamReceiver = AUBCommandStreamReceiver::create("aubfile", true, executionEnvironment, rootDeviceIndex); break; case CSR_TBX: commandStreamReceiver = TbxCommandStreamReceiver::create("", false, executionEnvironment, rootDeviceIndex); break; case CSR_HW_WITH_AUB: commandStreamReceiver = funcCreate(true, executionEnvironment, rootDeviceIndex); break; case CSR_TBX_WITH_AUB: commandStreamReceiver = TbxCommandStreamReceiver::create("aubfile", true, executionEnvironment, rootDeviceIndex); break; default: break; } return commandStreamReceiver; } bool prepareDeviceEnvironmentsImpl(ExecutionEnvironment &executionEnvironment) { if (DeviceFactory::isHwModeSelected()) { return DeviceFactory::prepareDeviceEnvironments(executionEnvironment); } return DeviceFactory::prepareDeviceEnvironmentsForProductFamilyOverride(executionEnvironment); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_stream/create_command_stream_impl.h000066400000000000000000000006761363734646600306450ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" namespace NEO { class ExecutionEnvironment; extern CommandStreamReceiver *createCommandStreamImpl(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex); extern bool prepareDeviceEnvironmentsImpl(ExecutionEnvironment &executionEnvironment); } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_stream/definitions/000077500000000000000000000000001363734646600254415ustar00rootroot00000000000000command_stream_receiver_simulated_hw.h000066400000000000000000000036471363734646600351670ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/command_stream/definitions/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/command_stream/command_stream_receiver_simulated_common_hw.h" #include "opencl/source/memory_manager/memory_banks.h" #include "opencl/source/memory_manager/physical_address_allocator.h" namespace NEO { class GraphicsAllocation; template class CommandStreamReceiverSimulatedHw : public CommandStreamReceiverSimulatedCommonHw { protected: using CommandStreamReceiverSimulatedCommonHw::CommandStreamReceiverSimulatedCommonHw; using CommandStreamReceiverSimulatedCommonHw::osContext; using CommandStreamReceiverSimulatedCommonHw::getDeviceIndex; public: uint32_t getMemoryBank(GraphicsAllocation *allocation) const { return MemoryBanks::getBank(getDeviceIndex()); } int getAddressSpace(int hint) { return AubMemDump::AddressSpaceValues::TraceNonlocal; } PhysicalAddressAllocator *createPhysicalAddressAllocator(const HardwareInfo *hwInfo) { return new PhysicalAddressAllocator(); } void writeMemoryWithAubManager(GraphicsAllocation &graphicsAllocation) override{}; void setAubWritable(bool writable, GraphicsAllocation &graphicsAllocation) override { graphicsAllocation.setAubWritable(writable, getMemoryBank(&graphicsAllocation)); } bool isAubWritable(GraphicsAllocation &graphicsAllocation) const override { return graphicsAllocation.isAubWritable(getMemoryBank(&graphicsAllocation)); } void setTbxWritable(bool writable, GraphicsAllocation &graphicsAllocation) override { graphicsAllocation.setTbxWritable(writable, getMemoryBank(&graphicsAllocation)); } bool isTbxWritable(GraphicsAllocation &graphicsAllocation) const override { return graphicsAllocation.isTbxWritable(getMemoryBank(&graphicsAllocation)); } }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_stream/per_dss_backed_buffer.cpp000066400000000000000000000004341363734646600301140ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" namespace NEO { bool CommandStreamReceiver::createPerDssBackedBuffer(Device &device) { return true; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_stream/tbx_command_stream_receiver.cpp000066400000000000000000000021001363734646600313550ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_stream/tbx_command_stream_receiver.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/hw_info.h" #include namespace NEO { TbxCommandStreamReceiverCreateFunc tbxCommandStreamReceiverFactory[IGFX_MAX_CORE] = {}; CommandStreamReceiver *TbxCommandStreamReceiver::create(const std::string &baseName, bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) { auto hwInfo = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); if (hwInfo->platform.eRenderCoreFamily >= IGFX_MAX_CORE) { DEBUG_BREAK_IF(!false); return nullptr; } auto pCreate = tbxCommandStreamReceiverFactory[hwInfo->platform.eRenderCoreFamily]; return pCreate ? pCreate(baseName, withAubDump, executionEnvironment, rootDeviceIndex) : nullptr; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_stream/tbx_command_stream_receiver.h000066400000000000000000000033111363734646600310270ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/aub_mem_dump/aub_mem_dump.h" namespace NEO { class CommandStreamReceiver; class TbxSockets; class ExecutionEnvironment; class TbxStream : public AubMemDump::AubStream { protected: TbxSockets *socket = nullptr; public: TbxStream(); ~TbxStream() override; TbxStream(const TbxStream &) = delete; TbxStream &operator=(const TbxStream &) = delete; void open(const char *options) override; void close() override; bool init(uint32_t stepping, uint32_t device) override; void writeMemory(uint64_t physAddress, const void *memory, size_t size, uint32_t addressSpace, uint32_t hint) override; void writeMemoryWriteHeader(uint64_t physAddress, size_t size, uint32_t addressSpace, uint32_t hint) override; void writeGTT(uint32_t gttOffset, uint64_t entry) override; void writePTE(uint64_t physAddress, uint64_t entry, uint32_t addressSpace) override; void writeMMIOImpl(uint32_t offset, uint32_t value) override; void registerPoll(uint32_t registerOffset, uint32_t mask, uint32_t value, bool pollNotEqual, uint32_t timeoutAction) override; void readMemory(uint64_t physAddress, void *memory, size_t size); }; struct TbxCommandStreamReceiver { static CommandStreamReceiver *create(const std::string &baseName, bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex); using TbxStream = NEO::TbxStream; }; typedef CommandStreamReceiver *(*TbxCommandStreamReceiverCreateFunc)(const std::string &baseName, bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex); } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_stream/tbx_command_stream_receiver_hw.h000066400000000000000000000066251363734646600315400ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/command_stream/tbx_command_stream_receiver.h" #include "opencl/source/gen_common/aub_mapper.h" #include "opencl/source/memory_manager/address_mapper.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/source/memory_manager/page_table.h" #include "command_stream_receiver_simulated_hw.h" #include namespace NEO { class AubSubCaptureManager; class TbxStream; template class TbxCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw { protected: typedef CommandStreamReceiverSimulatedHw BaseClass; using AUB = typename AUBFamilyMapper::AUB; using BaseClass::getParametersForWriteMemory; using BaseClass::osContext; uint32_t getMaskAndValueForPollForCompletion() const; bool getpollNotEqualValueForPollForCompletion() const; public: using CommandStreamReceiverSimulatedCommonHw::initAdditionalMMIO; using CommandStreamReceiverSimulatedCommonHw::aubManager; using CommandStreamReceiverSimulatedCommonHw::hardwareContextController; using CommandStreamReceiverSimulatedCommonHw::engineInfo; using CommandStreamReceiverSimulatedCommonHw::stream; bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override; void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override; void downloadAllocation(GraphicsAllocation &gfxAllocation) override; void processEviction() override; void processResidency(const ResidencyContainer &allocationsForResidency, uint32_t handleId) override; void writeMemory(uint64_t gpuAddress, void *cpuAddress, size_t size, uint32_t memoryBank, uint64_t entryBits) override; bool writeMemory(GraphicsAllocation &gfxAllocation) override; AubSubCaptureStatus checkAndActivateAubSubCapture(const MultiDispatchInfo &dispatchInfo) override; // Family specific version MOCKABLE_VIRTUAL void submitBatchBuffer(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits, bool overrideRingHead); void pollForCompletion() override; static CommandStreamReceiver *create(const std::string &baseName, bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex); TbxCommandStreamReceiverHw(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex); ~TbxCommandStreamReceiverHw() override; void initializeEngine(); MemoryManager *getMemoryManager() { return CommandStreamReceiver::getMemoryManager(); } TbxStream tbxStream; std::unique_ptr subCaptureManager; uint32_t aubDeviceId; bool streamInitialized = false; std::unique_ptr physicalAddressAllocator; std::unique_ptr::type> ppgtt; std::unique_ptr ggtt; // remap CPU VA -> GGTT VA AddressMapper gttRemap; std::set allocationsForDownload = {}; CommandStreamReceiverType getType() override { return CommandStreamReceiverType::CSR_TBX; } bool dumpTbxNonWritable = false; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_stream/tbx_command_stream_receiver_hw.inl000066400000000000000000000520671363734646600320740ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/memory_constants.h" #include "shared/source/os_interface/os_context.h" #include "opencl/source/aub/aub_center.h" #include "opencl/source/aub/aub_helper.h" #include "opencl/source/aub_mem_dump/page_table_entry_bits.h" #include "opencl/source/command_stream/aub_command_stream_receiver.h" #include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/helpers/hardware_context_controller.h" #include "opencl/source/memory_manager/memory_banks.h" #include "opencl/source/memory_manager/physical_address_allocator.h" #include namespace NEO { template TbxCommandStreamReceiverHw::TbxCommandStreamReceiverHw(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) : BaseClass(executionEnvironment, rootDeviceIndex) { physicalAddressAllocator.reset(this->createPhysicalAddressAllocator(&this->peekHwInfo())); executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->initAubCenter(this->localMemoryEnabled, "", this->getType()); auto aubCenter = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->aubCenter.get(); UNRECOVERABLE_IF(nullptr == aubCenter); aubManager = aubCenter->getAubManager(); ppgtt = std::make_unique::type>(physicalAddressAllocator.get()); ggtt = std::make_unique(physicalAddressAllocator.get()); auto debugDeviceId = DebugManager.flags.OverrideAubDeviceId.get(); this->aubDeviceId = debugDeviceId == -1 ? this->peekHwInfo().capabilityTable.aubDeviceId : static_cast(debugDeviceId); this->stream = &tbxStream; } template TbxCommandStreamReceiverHw::~TbxCommandStreamReceiverHw() { if (streamInitialized) { tbxStream.close(); } this->freeEngineInfo(gttRemap); } template void TbxCommandStreamReceiverHw::initializeEngine() { if (hardwareContextController) { hardwareContextController->initialize(); return; } auto csTraits = this->getCsTraits(osContext->getEngineType()); if (engineInfo.pLRCA) { return; } this->initGlobalMMIO(); this->initEngineMMIO(); this->initAdditionalMMIO(); // Global HW Status Page { const size_t sizeHWSP = 0x1000; const size_t alignHWSP = 0x1000; engineInfo.pGlobalHWStatusPage = alignedMalloc(sizeHWSP, alignHWSP); engineInfo.ggttHWSP = gttRemap.map(engineInfo.pGlobalHWStatusPage, sizeHWSP); auto physHWSP = ggtt->map(engineInfo.ggttHWSP, sizeHWSP, this->getGTTBits(), this->getMemoryBankForGtt()); // Write our GHWSP AubGTTData data = {0}; this->getGTTData(reinterpret_cast(physHWSP), data); AUB::reserveAddressGGTT(tbxStream, engineInfo.ggttHWSP, sizeHWSP, physHWSP, data); tbxStream.writeMMIO(AubMemDump::computeRegisterOffset(csTraits.mmioBase, 0x2080), engineInfo.ggttHWSP); } // Allocate the LRCA const size_t sizeLRCA = csTraits.sizeLRCA; const size_t alignLRCA = csTraits.alignLRCA; auto pLRCABase = alignedMalloc(sizeLRCA, alignLRCA); engineInfo.pLRCA = pLRCABase; // Initialize the LRCA to a known state csTraits.initialize(pLRCABase); // Reserve the RCS ring buffer engineInfo.sizeRingBuffer = 0x4 * 0x1000; { const size_t alignRCS = 0x1000; engineInfo.pRingBuffer = alignedMalloc(engineInfo.sizeRingBuffer, alignRCS); engineInfo.ggttRingBuffer = gttRemap.map(engineInfo.pRingBuffer, engineInfo.sizeRingBuffer); auto physRCS = ggtt->map(engineInfo.ggttRingBuffer, engineInfo.sizeRingBuffer, this->getGTTBits(), this->getMemoryBankForGtt()); AubGTTData data = {0}; this->getGTTData(reinterpret_cast(physRCS), data); AUB::reserveAddressGGTT(tbxStream, engineInfo.ggttRingBuffer, engineInfo.sizeRingBuffer, physRCS, data); } // Initialize the ring MMIO registers { uint32_t ringHead = 0x000; uint32_t ringTail = 0x000; auto ringBase = engineInfo.ggttRingBuffer; auto ringCtrl = (uint32_t)((engineInfo.sizeRingBuffer - 0x1000) | 1); csTraits.setRingHead(pLRCABase, ringHead); csTraits.setRingTail(pLRCABase, ringTail); csTraits.setRingBase(pLRCABase, ringBase); csTraits.setRingCtrl(pLRCABase, ringCtrl); } // Write our LRCA { engineInfo.ggttLRCA = gttRemap.map(engineInfo.pLRCA, sizeLRCA); auto lrcAddressPhys = ggtt->map(engineInfo.ggttLRCA, sizeLRCA, this->getGTTBits(), this->getMemoryBankForGtt()); AubGTTData data = {0}; this->getGTTData(reinterpret_cast(lrcAddressPhys), data); AUB::reserveAddressGGTT(tbxStream, engineInfo.ggttLRCA, sizeLRCA, lrcAddressPhys, data); AUB::addMemoryWrite( tbxStream, lrcAddressPhys, pLRCABase, sizeLRCA, this->getAddressSpace(csTraits.aubHintLRCA), csTraits.aubHintLRCA); } DEBUG_BREAK_IF(!engineInfo.pLRCA); } template CommandStreamReceiver *TbxCommandStreamReceiverHw::create(const std::string &baseName, bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) { TbxCommandStreamReceiverHw *csr; if (withAubDump) { auto hwInfo = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily); auto localMemoryEnabled = hwHelper.getEnableLocalMemory(*hwInfo); auto fullName = AUBCommandStreamReceiver::createFullFilePath(*hwInfo, baseName); if (DebugManager.flags.AUBDumpCaptureFileName.get() != "unk") { fullName.assign(DebugManager.flags.AUBDumpCaptureFileName.get()); } executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->initAubCenter(localMemoryEnabled, fullName, CommandStreamReceiverType::CSR_TBX_WITH_AUB); csr = new CommandStreamReceiverWithAUBDump>(baseName, executionEnvironment, rootDeviceIndex); auto aubCenter = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->aubCenter.get(); UNRECOVERABLE_IF(nullptr == aubCenter); auto subCaptureCommon = aubCenter->getSubCaptureCommon(); UNRECOVERABLE_IF(nullptr == subCaptureCommon); if (subCaptureCommon->subCaptureMode > AubSubCaptureManager::SubCaptureMode::Off) { csr->subCaptureManager = std::make_unique(fullName, *subCaptureCommon); } if (csr->aubManager) { if (!csr->aubManager->isOpen()) { MultiDispatchInfo dispatchInfo; csr->aubManager->open(csr->subCaptureManager ? csr->subCaptureManager->getSubCaptureFileName(dispatchInfo) : fullName); UNRECOVERABLE_IF(!csr->aubManager->isOpen()); } } } else { csr = new TbxCommandStreamReceiverHw(executionEnvironment, rootDeviceIndex); } if (!csr->aubManager) { // Open our stream csr->stream->open(nullptr); // Add the file header. bool streamInitialized = csr->stream->init(AubMemDump::SteppingValues::A, csr->aubDeviceId); csr->streamInitialized = streamInitialized; } return csr; } template bool TbxCommandStreamReceiverHw::flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) { if (subCaptureManager) { if (aubManager) { aubManager->pause(false); } } initializeEngine(); // Write our batch buffer auto pBatchBuffer = ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.startOffset); auto batchBufferGpuAddress = ptrOffset(batchBuffer.commandBufferAllocation->getGpuAddress(), batchBuffer.startOffset); auto currentOffset = batchBuffer.usedSize; DEBUG_BREAK_IF(currentOffset < batchBuffer.startOffset); auto sizeBatchBuffer = currentOffset - batchBuffer.startOffset; auto overrideRingHead = false; auto submissionTaskCount = this->taskCount + 1; allocationsForResidency.push_back(batchBuffer.commandBufferAllocation); batchBuffer.commandBufferAllocation->updateResidencyTaskCount(submissionTaskCount, this->osContext->getContextId()); batchBuffer.commandBufferAllocation->updateTaskCount(submissionTaskCount, osContext->getContextId()); // Write allocations for residency processResidency(allocationsForResidency, 0u); if (subCaptureManager) { if (aubManager) { auto status = subCaptureManager->getSubCaptureStatus(); if (!status.wasActiveInPreviousEnqueue && status.isActive) { overrideRingHead = true; } if (!status.wasActiveInPreviousEnqueue && !status.isActive) { aubManager->pause(true); } } } submitBatchBuffer( batchBufferGpuAddress, pBatchBuffer, sizeBatchBuffer, this->getMemoryBank(batchBuffer.commandBufferAllocation), this->getPPGTTAdditionalBits(batchBuffer.commandBufferAllocation), overrideRingHead); if (subCaptureManager) { pollForCompletion(); subCaptureManager->disableSubCapture(); } return true; } template void TbxCommandStreamReceiverHw::submitBatchBuffer(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits, bool overrideRingHead) { if (hardwareContextController) { if (batchBufferSize) { hardwareContextController->submit(batchBufferGpuAddress, batchBuffer, batchBufferSize, memoryBank, MemoryConstants::pageSize64k, overrideRingHead); } return; } auto csTraits = this->getCsTraits(osContext->getEngineType()); { auto physBatchBuffer = ppgtt->map(static_cast(batchBufferGpuAddress), batchBufferSize, entryBits, memoryBank); AubHelperHw aubHelperHw(this->localMemoryEnabled); AUB::reserveAddressPPGTT(tbxStream, static_cast(batchBufferGpuAddress), batchBufferSize, physBatchBuffer, entryBits, aubHelperHw); AUB::addMemoryWrite( tbxStream, physBatchBuffer, batchBuffer, batchBufferSize, this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceBatchBufferPrimary), AubMemDump::DataTypeHintValues::TraceBatchBufferPrimary); } // Add a batch buffer start to the RCS auto previousTail = engineInfo.tailRingBuffer; { typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; typedef typename GfxFamily::MI_NOOP MI_NOOP; auto pTail = ptrOffset(engineInfo.pRingBuffer, engineInfo.tailRingBuffer); auto ggttTail = ptrOffset(engineInfo.ggttRingBuffer, engineInfo.tailRingBuffer); auto sizeNeeded = sizeof(MI_BATCH_BUFFER_START) + sizeof(MI_NOOP) + sizeof(MI_LOAD_REGISTER_IMM); if (engineInfo.tailRingBuffer + sizeNeeded >= engineInfo.sizeRingBuffer) { // Pad the remaining ring with NOOPs auto sizeToWrap = engineInfo.sizeRingBuffer - engineInfo.tailRingBuffer; memset(pTail, 0, sizeToWrap); // write remaining ring auto physDumpStart = ggtt->map(ggttTail, sizeToWrap, this->getGTTBits(), this->getMemoryBankForGtt()); AUB::addMemoryWrite( tbxStream, physDumpStart, pTail, sizeToWrap, this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceCommandBuffer), AubMemDump::DataTypeHintValues::TraceCommandBuffer); previousTail = 0; engineInfo.tailRingBuffer = 0; pTail = engineInfo.pRingBuffer; } else if (engineInfo.tailRingBuffer == 0) { // Add a LRI if this is our first submission auto lri = GfxFamily::cmdInitLoadRegisterImm; lri.setRegisterOffset(AubMemDump::computeRegisterOffset(csTraits.mmioBase, 0x2244)); lri.setDataDword(0x00010000); *(MI_LOAD_REGISTER_IMM *)pTail = lri; pTail = ((MI_LOAD_REGISTER_IMM *)pTail) + 1; } // Add our BBS auto bbs = GfxFamily::cmdInitBatchBufferStart; bbs.setBatchBufferStartAddressGraphicsaddress472(static_cast(batchBufferGpuAddress)); bbs.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT); *(MI_BATCH_BUFFER_START *)pTail = bbs; pTail = ((MI_BATCH_BUFFER_START *)pTail) + 1; // Add a NOOP as our tail needs to be aligned to a QWORD *(MI_NOOP *)pTail = GfxFamily::cmdInitNoop; pTail = ((MI_NOOP *)pTail) + 1; // Compute our new ring tail. engineInfo.tailRingBuffer = (uint32_t)ptrDiff(pTail, engineInfo.pRingBuffer); // Only dump the new commands auto ggttDumpStart = ptrOffset(engineInfo.ggttRingBuffer, previousTail); auto dumpStart = ptrOffset(engineInfo.pRingBuffer, previousTail); auto dumpLength = engineInfo.tailRingBuffer - previousTail; // write RCS auto physDumpStart = ggtt->map(ggttDumpStart, dumpLength, this->getGTTBits(), this->getMemoryBankForGtt()); AUB::addMemoryWrite( tbxStream, physDumpStart, dumpStart, dumpLength, this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceCommandBuffer), AubMemDump::DataTypeHintValues::TraceCommandBuffer); // update the RCS mmio tail in the LRCA auto physLRCA = ggtt->map(engineInfo.ggttLRCA, sizeof(engineInfo.tailRingBuffer), this->getGTTBits(), this->getMemoryBankForGtt()); AUB::addMemoryWrite( tbxStream, physLRCA + 0x101c, &engineInfo.tailRingBuffer, sizeof(engineInfo.tailRingBuffer), this->getAddressSpace(AubMemDump::DataTypeHintValues::TraceNotype)); DEBUG_BREAK_IF(engineInfo.tailRingBuffer >= engineInfo.sizeRingBuffer); } // Submit our execlist by submitting to the execlist submit ports { typename AUB::MiContextDescriptorReg contextDescriptor = {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; contextDescriptor.sData.Valid = true; contextDescriptor.sData.ForcePageDirRestore = false; contextDescriptor.sData.ForceRestore = false; contextDescriptor.sData.Legacy = true; contextDescriptor.sData.FaultSupport = 0; contextDescriptor.sData.PrivilegeAccessOrPPGTT = true; contextDescriptor.sData.ADor64bitSupport = AUB::Traits::addressingBits > 32; auto ggttLRCA = engineInfo.ggttLRCA; contextDescriptor.sData.LogicalRingCtxAddress = ggttLRCA / 4096; contextDescriptor.sData.ContextID = 0; this->submitLRCA(contextDescriptor); } } template void TbxCommandStreamReceiverHw::pollForCompletion() { if (hardwareContextController) { hardwareContextController->pollForCompletion(); return; } typedef typename AubMemDump::CmdServicesMemTraceRegisterPoll CmdServicesMemTraceRegisterPoll; auto mmioBase = this->getCsTraits(osContext->getEngineType()).mmioBase; bool pollNotEqual = getpollNotEqualValueForPollForCompletion(); uint32_t mask = getMaskAndValueForPollForCompletion(); uint32_t value = mask; tbxStream.registerPoll( AubMemDump::computeRegisterOffset(mmioBase, 0x2234), //EXECLIST_STATUS mask, value, pollNotEqual, CmdServicesMemTraceRegisterPoll::TimeoutActionValues::Abort); } template void TbxCommandStreamReceiverHw::writeMemory(uint64_t gpuAddress, void *cpuAddress, size_t size, uint32_t memoryBank, uint64_t entryBits) { AubHelperHw aubHelperHw(this->localMemoryEnabled); PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset, uint64_t entryBits) { AUB::reserveAddressGGTTAndWriteMmeory(tbxStream, static_cast(gpuAddress), cpuAddress, physAddress, size, offset, entryBits, aubHelperHw); }; ppgtt->pageWalk(static_cast(gpuAddress), size, 0, entryBits, walker, memoryBank); } template bool TbxCommandStreamReceiverHw::writeMemory(GraphicsAllocation &gfxAllocation) { if (!this->isTbxWritable(gfxAllocation)) { return false; } uint64_t gpuAddress; void *cpuAddress; size_t size; if (!this->getParametersForWriteMemory(gfxAllocation, gpuAddress, cpuAddress, size)) { return false; } if (aubManager) { this->writeMemoryWithAubManager(gfxAllocation); } else { writeMemory(gpuAddress, cpuAddress, size, this->getMemoryBank(&gfxAllocation), this->getPPGTTAdditionalBits(&gfxAllocation)); } if (AubHelper::isOneTimeAubWritableAllocationType(gfxAllocation.getAllocationType())) { this->setTbxWritable(false, gfxAllocation); } return true; } template void TbxCommandStreamReceiverHw::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) { this->flushBatchedSubmissions(); while (*this->getTagAddress() < this->latestFlushedTaskCount) { downloadAllocation(*this->getTagAllocation()); } for (GraphicsAllocation *graphicsAllocation : this->allocationsForDownload) { downloadAllocation(*graphicsAllocation); } this->allocationsForDownload.clear(); BaseClass::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode); } template void TbxCommandStreamReceiverHw::processEviction() { this->allocationsForDownload.insert(this->getEvictionAllocations().begin(), this->getEvictionAllocations().end()); BaseClass::processEviction(); } template void TbxCommandStreamReceiverHw::processResidency(const ResidencyContainer &allocationsForResidency, uint32_t handleId) { for (auto &gfxAllocation : allocationsForResidency) { if (dumpTbxNonWritable) { this->setTbxWritable(true, *gfxAllocation); } if (!writeMemory(*gfxAllocation)) { DEBUG_BREAK_IF(!((gfxAllocation->getUnderlyingBufferSize() == 0) || !this->isTbxWritable(*gfxAllocation))); } gfxAllocation->updateResidencyTaskCount(this->taskCount + 1, this->osContext->getContextId()); } dumpTbxNonWritable = false; } template void TbxCommandStreamReceiverHw::downloadAllocation(GraphicsAllocation &gfxAllocation) { if (hardwareContextController) { hardwareContextController->readMemory(gfxAllocation.getGpuAddress(), gfxAllocation.getUnderlyingBuffer(), gfxAllocation.getUnderlyingBufferSize(), this->getMemoryBank(&gfxAllocation), MemoryConstants::pageSize64k); return; } auto cpuAddress = gfxAllocation.getUnderlyingBuffer(); auto gpuAddress = gfxAllocation.getGpuAddress(); auto length = gfxAllocation.getUnderlyingBufferSize(); if (length) { PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset, uint64_t entryBits) { DEBUG_BREAK_IF(offset > length); tbxStream.readMemory(physAddress, ptrOffset(cpuAddress, offset), size); }; ppgtt->pageWalk(static_cast(gpuAddress), length, 0, 0, walker, this->getMemoryBank(&gfxAllocation)); } } template uint32_t TbxCommandStreamReceiverHw::getMaskAndValueForPollForCompletion() const { return 0x100; } template bool TbxCommandStreamReceiverHw::getpollNotEqualValueForPollForCompletion() const { return false; } template AubSubCaptureStatus TbxCommandStreamReceiverHw::checkAndActivateAubSubCapture(const MultiDispatchInfo &dispatchInfo) { if (!subCaptureManager) { return {false, false}; } auto status = subCaptureManager->checkAndActivateSubCapture(dispatchInfo); if (status.isActive && !status.wasActiveInPreviousEnqueue) { dumpTbxNonWritable = true; } return status; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/command_stream/tbx_stream.cpp000066400000000000000000000041661363734646600260110ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/debug_helpers.h" #include "opencl/source/aub/aub_helper.h" #include "opencl/source/command_stream/tbx_command_stream_receiver.h" #include "opencl/source/tbx/tbx_sockets.h" namespace NEO { TbxStream::TbxStream() { } TbxStream::~TbxStream() { delete socket; } void TbxStream::open(const char *options) { } void TbxStream::close() { DEBUG_BREAK_IF(!socket); socket->close(); } bool TbxStream::init(uint32_t stepping, uint32_t device) { socket = TbxSockets::create(); DEBUG_BREAK_IF(!socket); auto tbxServer = DebugManager.flags.TbxServer.get(); auto tbxPort = DebugManager.flags.TbxPort.get(); return socket->init(tbxServer, tbxPort); } void TbxStream::writeMemory(uint64_t addr, const void *memory, size_t size, uint32_t addressSpace, uint32_t hint) { uint32_t type = AubHelper::getMemType(addressSpace); socket->writeMemory(addr, memory, size, type); } void TbxStream::writeGTT(uint32_t gttOffset, uint64_t entry) { socket->writeGTT(gttOffset, entry); } void TbxStream::writePTE(uint64_t physAddress, uint64_t entry, uint32_t addressSpace) { uint32_t type = AubHelper::getMemType(addressSpace); socket->writeMemory(physAddress, &entry, sizeof(entry), type); } void TbxStream::writeMemoryWriteHeader(uint64_t physAddress, size_t size, uint32_t addressSpace, uint32_t hint) { } void TbxStream::writeMMIOImpl(uint32_t offset, uint32_t value) { socket->writeMMIO(offset, value); } void TbxStream::registerPoll(uint32_t registerOffset, uint32_t mask, uint32_t desiredValue, bool pollNotEqual, uint32_t timeoutAction) { bool matches = false; bool asyncMMIO = false; do { uint32_t value; socket->readMMIO(registerOffset, &value); matches = ((value & mask) == desiredValue); } while (matches == pollNotEqual && asyncMMIO); } void TbxStream::readMemory(uint64_t physAddress, void *memory, size_t size) { socket->readMemory(physAddress, memory, size); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/compiler_interface/000077500000000000000000000000001363734646600237675ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/compiler_interface/CMakeLists.txt000066400000000000000000000013001363734646600265210ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_COMPILER_INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/default_cl_cache_config.cpp ) get_property(NEO_COMPILER_INTERFACE GLOBAL PROPERTY NEO_COMPILER_INTERFACE) get_property(NEO_DEVICE_BINARY_FORMAT GLOBAL PROPERTY NEO_DEVICE_BINARY_FORMAT) list(APPEND RUNTIME_SRCS_COMPILER_INTERFACE ${NEO_COMPILER_INTERFACE}) list(APPEND RUNTIME_SRCS_COMPILER_INTERFACE ${NEO_DEVICE_BINARY_FORMAT}) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_COMPILER_INTERFACE}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_COMPILER_INTERFACE ${RUNTIME_SRCS_COMPILER_INTERFACE}) compute-runtime-20.13.16352/opencl/source/compiler_interface/default_cache_config.cpp000066400000000000000000000005531363734646600305720ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/default_cache_config.h" #include "opencl/source/compiler_interface/default_cl_cache_config.h" namespace NEO { CompilerCacheConfig getDefaultCompilerCacheConfig() { return getDefaultClCompilerCacheConfig(); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/compiler_interface/default_cl_cache_config.cpp000066400000000000000000000014321363734646600312450ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "default_cl_cache_config.h" #include "shared/source/utilities/debug_settings_reader.h" #include "opencl/source/os_interface/ocl_reg_path.h" #include "config.h" #include "os_inc.h" #include namespace NEO { CompilerCacheConfig getDefaultClCompilerCacheConfig() { CompilerCacheConfig ret; std::string keyName = oclRegPath; keyName += "cl_cache_dir"; std::unique_ptr settingsReader(SettingsReader::createOsReader(false, keyName)); ret.cacheDir = settingsReader->getSetting(settingsReader->appSpecificLocation(keyName), static_cast(CL_CACHE_LOCATION)); ret.cacheFileExtension = ".cl_cache"; return ret; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/compiler_interface/default_cl_cache_config.h000066400000000000000000000003611363734646600307120ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/compiler_interface/compiler_cache.h" namespace NEO { CompilerCacheConfig getDefaultClCompilerCacheConfig(); } compute-runtime-20.13.16352/opencl/source/context/000077500000000000000000000000001363734646600216215ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/context/CMakeLists.txt000066400000000000000000000012021363734646600243540ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_CONTEXT ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/context.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/context_extra.cpp ${CMAKE_CURRENT_SOURCE_DIR}/context.h ${CMAKE_CURRENT_SOURCE_DIR}/context.inl ${CMAKE_CURRENT_SOURCE_DIR}/context_type.h ${CMAKE_CURRENT_SOURCE_DIR}/driver_diagnostics.cpp ${CMAKE_CURRENT_SOURCE_DIR}/driver_diagnostics.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_CONTEXT}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_CONTEXT ${RUNTIME_SRCS_CONTEXT}) compute-runtime-20.13.16352/opencl/source/context/context.cpp000066400000000000000000000316371363734646600240230ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/deferred_deleter.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/device_queue/device_queue.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/helpers/get_info_status_mapper.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/platform/platform.h" #include "opencl/source/scheduler/scheduler_kernel.h" #include "opencl/source/sharings/sharing.h" #include "opencl/source/sharings/sharing_factory.h" #include "d3d_sharing_functions.h" #include #include namespace NEO { Context::Context( void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *), void *data) { properties = nullptr; numProperties = 0; contextCallback = funcNotify; userData = data; memoryManager = nullptr; specialQueue = nullptr; defaultDeviceQueue = nullptr; driverDiagnostics = nullptr; sharingFunctions.resize(SharingType::MAX_SHARING_VALUE); schedulerBuiltIn = std::make_unique(); } Context::~Context() { delete[] properties; if (specialQueue) { delete specialQueue; } if (svmAllocsManager) { delete svmAllocsManager; } if (driverDiagnostics) { delete driverDiagnostics; } if (memoryManager && memoryManager->isAsyncDeleterEnabled()) { memoryManager->getDeferredDeleter()->removeClient(); } gtpinNotifyContextDestroy((cl_context)this); for (auto &device : devices) { device->decRefInternal(); } delete static_cast(schedulerBuiltIn->pKernel); delete schedulerBuiltIn->pProgram; schedulerBuiltIn->pKernel = nullptr; schedulerBuiltIn->pProgram = nullptr; } DeviceQueue *Context::getDefaultDeviceQueue() { return defaultDeviceQueue; } void Context::setDefaultDeviceQueue(DeviceQueue *queue) { defaultDeviceQueue = queue; } CommandQueue *Context::getSpecialQueue() { return specialQueue; } void Context::setSpecialQueue(CommandQueue *commandQueue) { specialQueue = commandQueue; } void Context::overrideSpecialQueueAndDecrementRefCount(CommandQueue *commandQueue) { setSpecialQueue(commandQueue); commandQueue->setIsSpecialCommandQueue(true); //decrement ref count that special queue added this->decRefInternal(); }; bool Context::areMultiStorageAllocationsPreferred() { return this->contextType != ContextType::CONTEXT_TYPE_SPECIALIZED; } bool Context::createImpl(const cl_context_properties *properties, const ClDeviceVector &inputDevices, void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *), void *data, cl_int &errcodeRet) { auto propertiesCurrent = properties; bool interopUserSync = false; int32_t driverDiagnosticsUsed = -1; auto sharingBuilder = sharingFactory.build(); std::unique_ptr driverDiagnostics; while (propertiesCurrent && *propertiesCurrent) { errcodeRet = CL_SUCCESS; auto propertyType = propertiesCurrent[0]; auto propertyValue = propertiesCurrent[1]; propertiesCurrent += 2; switch (propertyType) { case CL_CONTEXT_PLATFORM: { if (castToObject(reinterpret_cast(propertyValue)) == nullptr) { errcodeRet = CL_INVALID_PLATFORM; return false; } } break; case CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL: driverDiagnosticsUsed = static_cast(propertyValue); break; case CL_CONTEXT_INTEROP_USER_SYNC: interopUserSync = propertyValue > 0; break; default: if (!sharingBuilder->processProperties(propertyType, propertyValue, errcodeRet)) { errcodeRet = processExtraProperties(propertyType, propertyValue); } if (errcodeRet != CL_SUCCESS) { return false; } break; } } auto numProperties = ptrDiff(propertiesCurrent, properties) / sizeof(cl_context_properties); cl_context_properties *propertiesNew = nullptr; // copy the user properties if there are any if (numProperties) { propertiesNew = new cl_context_properties[numProperties + 1]; memcpy_s(propertiesNew, (numProperties + 1) * sizeof(cl_context_properties), properties, numProperties * sizeof(cl_context_properties)); propertiesNew[numProperties] = 0; numProperties++; } if (DebugManager.flags.PrintDriverDiagnostics.get() != -1) { driverDiagnosticsUsed = DebugManager.flags.PrintDriverDiagnostics.get(); } if (driverDiagnosticsUsed >= 0) { driverDiagnostics.reset(new DriverDiagnostics((cl_diagnostics_verbose_level)driverDiagnosticsUsed)); } this->numProperties = numProperties; this->properties = propertiesNew; this->setInteropUserSyncEnabled(interopUserSync); if (!sharingBuilder->finalizeProperties(*this, errcodeRet)) { return false; } this->driverDiagnostics = driverDiagnostics.release(); this->devices = inputDevices; // We currently assume each device uses the same MemoryManager if (devices.size() > 0) { auto device = this->getDevice(0); this->memoryManager = device->getMemoryManager(); if (memoryManager->isAsyncDeleterEnabled()) { memoryManager->getDeferredDeleter()->addClient(); } bool anySvmSupport = false; for (auto &device : devices) { device->incRefInternal(); anySvmSupport |= device->getHardwareInfo().capabilityTable.ftrSvm; } if (anySvmSupport) { this->svmAllocsManager = new SVMAllocsManager(this->memoryManager); } } auto commandQueue = CommandQueue::create(this, devices[0], nullptr, true, errcodeRet); DEBUG_BREAK_IF(commandQueue == nullptr); overrideSpecialQueueAndDecrementRefCount(commandQueue); return true; } cl_int Context::getInfo(cl_context_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { cl_int retVal; size_t valueSize = 0; const void *pValue = nullptr; cl_uint numDevices; cl_uint refCount = 0; std::vector devIDs; auto callGetinfo = true; switch (paramName) { case CL_CONTEXT_DEVICES: valueSize = devices.size() * sizeof(cl_device_id); devices.toDeviceIDs(devIDs); pValue = devIDs.data(); break; case CL_CONTEXT_NUM_DEVICES: numDevices = (cl_uint)(devices.size()); valueSize = sizeof(numDevices); pValue = &numDevices; break; case CL_CONTEXT_PROPERTIES: valueSize = this->numProperties * sizeof(cl_context_properties); pValue = this->properties; if (valueSize == 0) { callGetinfo = false; } break; case CL_CONTEXT_REFERENCE_COUNT: refCount = static_cast(this->getReference()); valueSize = sizeof(refCount); pValue = &refCount; break; default: pValue = getOsContextInfo(paramName, &valueSize); break; } if (callGetinfo) { retVal = changeGetInfoStatusToCLResultType(::getInfo(paramValue, paramValueSize, pValue, valueSize)); } else { retVal = CL_SUCCESS; } if (paramValueSizeRet) { *paramValueSizeRet = valueSize; } return retVal; } size_t Context::getNumDevices() const { return devices.size(); } size_t Context::getTotalNumDevices() const { size_t numAvailableDevices = 0u; for (auto &device : devices) { numAvailableDevices += device->getNumAvailableDevices(); } return numAvailableDevices; } ClDevice *Context::getDevice(size_t deviceOrdinal) const { return (ClDevice *)devices[deviceOrdinal]; } cl_int Context::getSupportedImageFormats( Device *device, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint numEntries, cl_image_format *imageFormats, cl_uint *numImageFormatsReturned) { size_t numImageFormats = 0; const bool nv12ExtensionEnabled = device->getSpecializedDevice()->getDeviceInfo().nv12Extension; const bool packedYuvExtensionEnabled = device->getSpecializedDevice()->getDeviceInfo().packedYuvExtension; auto appendImageFormats = [&](ArrayRef formats) { if (imageFormats) { size_t offset = numImageFormats; for (size_t i = 0; i < formats.size() && offset < numEntries; ++i) { imageFormats[offset++] = formats[i].OCLImageFormat; } } numImageFormats += formats.size(); }; if (flags & CL_MEM_READ_ONLY) { if (this->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport >= 20) { appendImageFormats(SurfaceFormats::readOnly20()); } else { appendImageFormats(SurfaceFormats::readOnly12()); } if (Image::isImage2d(imageType) && nv12ExtensionEnabled) { appendImageFormats(SurfaceFormats::planarYuv()); } if (Image::isImage2dOr2dArray(imageType)) { appendImageFormats(SurfaceFormats::readOnlyDepth()); } if (Image::isImage2d(imageType) && packedYuvExtensionEnabled) { appendImageFormats(SurfaceFormats::packedYuv()); } } else if (flags & CL_MEM_WRITE_ONLY) { appendImageFormats(SurfaceFormats::writeOnly()); if (Image::isImage2dOr2dArray(imageType)) { appendImageFormats(SurfaceFormats::readWriteDepth()); } } else if (nv12ExtensionEnabled && (flags & CL_MEM_NO_ACCESS_INTEL)) { if (this->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport >= 20) { appendImageFormats(SurfaceFormats::readOnly20()); } else { appendImageFormats(SurfaceFormats::readOnly12()); } if (Image::isImage2d(imageType)) { appendImageFormats(SurfaceFormats::planarYuv()); } } else { appendImageFormats(SurfaceFormats::readWrite()); if (Image::isImage2dOr2dArray(imageType)) { appendImageFormats(SurfaceFormats::readWriteDepth()); } } if (numImageFormatsReturned) { *numImageFormatsReturned = static_cast(numImageFormats); } return CL_SUCCESS; } SchedulerKernel &Context::getSchedulerKernel() { if (schedulerBuiltIn->pKernel) { return *static_cast(schedulerBuiltIn->pKernel); } auto initializeSchedulerProgramAndKernel = [&] { cl_int retVal = CL_SUCCESS; auto src = SchedulerKernel::loadSchedulerKernel(&getDevice(0)->getDevice()); auto program = Program::createFromGenBinary(*getDevice(0)->getExecutionEnvironment(), this, src.resource.data(), src.resource.size(), true, &retVal, &getDevice(0)->getDevice()); DEBUG_BREAK_IF(retVal != CL_SUCCESS); DEBUG_BREAK_IF(!program); retVal = program->processGenBinary(); DEBUG_BREAK_IF(retVal != CL_SUCCESS); schedulerBuiltIn->pProgram = program; auto kernelInfo = schedulerBuiltIn->pProgram->getKernelInfo(SchedulerKernel::schedulerName); DEBUG_BREAK_IF(!kernelInfo); schedulerBuiltIn->pKernel = Kernel::create( schedulerBuiltIn->pProgram, *kernelInfo, &retVal); UNRECOVERABLE_IF(schedulerBuiltIn->pKernel->getScratchSize() != 0); DEBUG_BREAK_IF(retVal != CL_SUCCESS); }; std::call_once(schedulerBuiltIn->programIsInitialized, initializeSchedulerProgramAndKernel); UNRECOVERABLE_IF(schedulerBuiltIn->pKernel == nullptr); return *static_cast(schedulerBuiltIn->pKernel); } bool Context::isDeviceAssociated(const ClDevice &clDevice) const { for (const auto &device : devices) { if (device == &clDevice) { return true; } } return false; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/context/context.h000066400000000000000000000135171363734646600234650ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/vec.h" #include "opencl/source/cl_device/cl_device_vector.h" #include "opencl/source/context/context_type.h" #include "opencl/source/context/driver_diagnostics.h" #include "opencl/source/helpers/base_object.h" #include namespace NEO { struct BuiltInKernel; class CommandStreamReceiver; class CommandQueue; class Device; class DeviceQueue; class MemObj; class MemoryManager; class SharingFunctions; class SVMAllocsManager; class SchedulerKernel; enum class BlitOperationResult { Unsupported, Fail, Success }; template <> struct OpenCLObjectMapper<_cl_context> { typedef class Context DerivedType; }; class Context : public BaseObject<_cl_context> { public: static const cl_ulong objectMagic = 0xA4234321DC002130LL; bool createImpl(const cl_context_properties *properties, const ClDeviceVector &devices, void(CL_CALLBACK *pfnNotify)(const char *, const void *, size_t, void *), void *userData, cl_int &errcodeRet); template static T *create(const cl_context_properties *properties, const ClDeviceVector &devices, void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *), void *data, cl_int &errcodeRet) { auto pContext = new T(funcNotify, data); if (!pContext->createImpl(properties, devices, funcNotify, data, errcodeRet)) { delete pContext; pContext = nullptr; } return pContext; } Context &operator=(const Context &) = delete; Context(const Context &) = delete; ~Context() override; cl_int getInfo(cl_context_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int getSupportedImageFormats(Device *device, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint numEntries, cl_image_format *imageFormats, cl_uint *numImageFormats); size_t getNumDevices() const; size_t getTotalNumDevices() const; ClDevice *getDevice(size_t deviceOrdinal) const; MemoryManager *getMemoryManager() const { return memoryManager; } SVMAllocsManager *getSVMAllocsManager() const { return svmAllocsManager; } DeviceQueue *getDefaultDeviceQueue(); void setDefaultDeviceQueue(DeviceQueue *queue); CommandQueue *getSpecialQueue(); void setSpecialQueue(CommandQueue *commandQueue); void overrideSpecialQueueAndDecrementRefCount(CommandQueue *commandQueue); template Sharing *getSharing(); template void registerSharing(Sharing *sharing); template void providePerformanceHint(cl_diagnostics_verbose_level flags, PerformanceHints performanceHint, Args &&... args) { DEBUG_BREAK_IF(contextCallback == nullptr); DEBUG_BREAK_IF(driverDiagnostics == nullptr); char hint[DriverDiagnostics::maxHintStringSize]; snprintf(hint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[performanceHint], std::forward(args)..., 0); if (driverDiagnostics->validFlags(flags)) { if (contextCallback) { contextCallback(hint, &flags, sizeof(flags), userData); } if (DebugManager.flags.PrintDriverDiagnostics.get() != -1) { printf("\n%s\n", hint); } } } template void providePerformanceHintForMemoryTransfer(cl_command_type commandType, bool transferRequired, Args &&... args) { cl_diagnostics_verbose_level verboseLevel = transferRequired ? CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL : CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL; PerformanceHints hint = driverDiagnostics->obtainHintForTransferOperation(commandType, transferRequired); providePerformanceHint(verboseLevel, hint, args...); } cl_bool isProvidingPerformanceHints() const { return driverDiagnostics != nullptr; } bool getInteropUserSyncEnabled() { return interopUserSync; } void setInteropUserSyncEnabled(bool enabled) { interopUserSync = enabled; } bool areMultiStorageAllocationsPreferred(); ContextType peekContextType() { return this->contextType; } MOCKABLE_VIRTUAL BlitOperationResult blitMemoryToAllocation(MemObj &memObj, GraphicsAllocation *memory, void *hostPtr, Vec3 size) const; SchedulerKernel &getSchedulerKernel(); bool isDeviceAssociated(const ClDevice &clDevice) const; protected: Context(void(CL_CALLBACK *pfnNotify)(const char *, const void *, size_t, void *) = nullptr, void *userData = nullptr); // OS specific implementation void *getOsContextInfo(cl_context_info ¶mName, size_t *srcParamSize); cl_int processExtraProperties(cl_context_properties propertyType, cl_context_properties propertyValue); const cl_context_properties *properties; size_t numProperties; void(CL_CALLBACK *contextCallback)(const char *, const void *, size_t, void *); void *userData; std::unique_ptr schedulerBuiltIn; ClDeviceVector devices; MemoryManager *memoryManager; SVMAllocsManager *svmAllocsManager = nullptr; CommandQueue *specialQueue; DeviceQueue *defaultDeviceQueue; std::vector> sharingFunctions; DriverDiagnostics *driverDiagnostics; bool interopUserSync = false; cl_bool preferD3dSharedResources = 0u; ContextType contextType = ContextType::CONTEXT_TYPE_DEFAULT; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/context/context.inl000066400000000000000000000011061363734646600240070ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" namespace NEO { template void Context::registerSharing(Sharing *sharing) { UNRECOVERABLE_IF(!sharing); this->sharingFunctions[Sharing::sharingId].reset(sharing); } template Sharing *Context::getSharing() { if (Sharing::sharingId >= sharingFunctions.size()) { return nullptr; } return reinterpret_cast(sharingFunctions[Sharing::sharingId].get()); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/context/context_extra.cpp000066400000000000000000000011471363734646600252170ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "opencl/source/context/context.h" #include "opencl/source/mem_obj/mem_obj.h" namespace NEO { cl_int Context::processExtraProperties(cl_context_properties propertyType, cl_context_properties propertyValue) { return CL_INVALID_PROPERTY; } BlitOperationResult Context::blitMemoryToAllocation(MemObj &memObj, GraphicsAllocation *memory, void *hostPtr, Vec3 size) const { return BlitOperationResult::Unsupported; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/context/context_type.h000066400000000000000000000004311363734646600245150ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { enum ContextType : uint32_t { CONTEXT_TYPE_DEFAULT, CONTEXT_TYPE_SPECIALIZED, CONTEXT_TYPE_UNRESTRICTIVE }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/context/driver_diagnostics.cpp000066400000000000000000000324571363734646600262220ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "driver_diagnostics.h" #include "shared/source/helpers/debug_helpers.h" namespace NEO { DriverDiagnostics::DriverDiagnostics(cl_diagnostics_verbose_level level) { this->verboseLevel = level; } bool DriverDiagnostics::validFlags(cl_diagnostics_verbose_level flags) const { return !!(verboseLevel & flags); } const char *const DriverDiagnostics::hintFormat[] = { "Performance hint: clCreateBuffer with pointer %p and size %u doesn't meet alignment restrictions. Size should be aligned to %u bytes and pointer should be aligned to %u. Buffer is not sharing the same physical memory with CPU.", //CL_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS "Performance hint: clCreateBuffer with pointer %p and size %u meets alignment restrictions and buffer will share the same physical memory with CPU.", //CL_BUFFER_MEETS_ALIGNMENT_RESTRICTIONS "Performance hint: clCreateBuffer needs to allocate memory for buffer. For subsequent operations the buffer will share the same physical memory with CPU.", //CL_BUFFER_NEEDS_ALLOCATE_MEMORY "Performance hint: clCreateImage with pointer %p meets alignment restrictions and image will share the same physical memory with CPU.", //CL_IMAGE_MEETS_ALIGNMENT_RESTRICTIONS "Performance hint: Driver calls internal clFlush on the command queue each time 1 command is enqueued.", //DRIVER_CALLS_INTERNAL_CL_FLUSH "Performance hint: Profiling adds overhead on all enqueue commands with events.", //PROFILING_ENABLED "Performance hint: Profiled kernels will be executed with disabled preemption.", //PROFILING_ENABLED_WITH_DISABLED_PREEMPTION "Performance hint: Subbuffer created from buffer %p shares the same memory with buffer.", //SUBBUFFER_SHARES_MEMORY "Performance hint: clSVMAlloc with pointer %p and size %u meets alignment restrictions.", //CL_SVM_ALLOC_MEETS_ALIGNMENT_RESTRICTIONS "Performance hint: clEnqueueReadBuffer call on a buffer %p with pointer %p will require driver to copy the data.Consider using clEnqueueMapBuffer with buffer that shares the same physical memory with CPU.", //CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA "Performance hint: clEnqueueReadBuffer call on a buffer %p with pointer %p will not require any data copy as the buffer shares the same physical memory with CPU.", //CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA "Performance hint: Pointer %p and size %u passed to clEnqueueReadBuffer doesn't meet alignment restrictions. Size should be aligned to %u bytes and pointer should be aligned to %u. Driver needs to disable L3 caching.", //CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS "Performance hint: clEnqueueReadBufferRect call on a buffer %p with pointer %p will require driver to copy the data.Consider using clEnqueueMapBuffer with buffer that shares the same physical memory with CPU.", //CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA "Performance hint: clEnqueueReadBufferRect call on a buffer %p with pointer %p will not require any data copy as the buffer shares the same physical memory with CPU.", //CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_REQUIRES_COPY_DATA "Performance hint: Pointer %p and size %u passed to clEnqueueReadBufferRect doesn't meet alignment restrictions. Size should be aligned to %u bytes and pointer should be aligned to %u. Driver needs to disable L3 caching.", //CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_MEET_ALIGNMENT_RESTRICTIONS "Performance hint: clEnqueueWriteBuffer call on a buffer %p require driver to copy the data. Consider using clEnqueueMapBuffer with buffer that shares the same physical memory with CPU.", //CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA "Performance hint: clEnqueueWriteBuffer call on a buffer %p with pointer %p will not require any data copy as the buffer shares the same physical memory with CPU.", //CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA "Performance hint: clEnqueueWriteBufferRect call on a buffer %p require driver to copy the data. Consider using clEnqueueMapBuffer with buffer that shares the same physical memory with CPU.", //CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA "Performance hint: clEnqueueWriteBufferRect call on a buffer %p will not require any data copy as the buffer shares the same physical memory with CPU.", //CL_ENQUEUE_WRITE_BUFFER_RECT_DOESNT_REQUIRE_COPY_DATA "Performance hint: Pointer %p and size %u passed to clEnqueueReadImage doesn't meet alignment restrictions. Size should be aligned to %u bytes and pointer should be aligned to %u. Driver needs to disable L3 caching.", //CL_ENQUEUE_READ_IMAGE_DOESNT_MEET_ALIGNMENT_RESTRICTIONS "Performance hint: clEnqueueReadImage call on an image %p will not require any data copy as the image shares the same physical memory with CPU.", //CL_ENQUEUE_READ_IMAGE_DOESNT_REQUIRES_COPY_DATA "Performance hint: clEnqueueWriteImage call on an image %p require driver to copy the data.", //CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA "Performance hint: clEnqueueWriteImage call on an image %p will not require any data copy as the image shares the same physical memory with CPU.", //CL_ENQUEUE_WRITE_IMAGE_DOESNT_REQUIRES_COPY_DATA "Performance hint: clEnqueueMapBuffer call on a buffer %p will require driver to make a copy as buffer is not sharing the same physical memory with CPU.", //CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA "Performance hint: clEnqueueMapBuffer call on a buffer %p will not require any data copy as buffer shares the same physical memory with CPU.", //CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA "Performance hint: clEnqueueMapImage call on an image %p will require driver to make a copy, as image is not sharing the same physical memory with CPU.", //CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA "Performance hint: clEnqueueMapImage call on an image %p will not require any data copy as image shares the same physical memory with CPU.", //CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA "Performance hint: clEnqueueUnmapMemObject call with pointer %p will not require any data copy.", //CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA "Performance hint: clEnqueueUnmapMemObject call with pointer %p will require driver to copy the data to memory object %p.", //CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA "Performance hint: clEnqueueSVMMap call with pointer %p will not require any data copy.", //CL_ENQUEUE_SVM_MAP_DOESNT_REQUIRE_COPY_DATA "Performance hint: Printf detected in kernel %s, it may cause overhead.", //PRINTF_DETECTED_IN_KERNEL "Performance hint: Null local workgroup size detected ( kernel name: %s ); following sizes will be used for execution : { %u, %u, %u }.", //NULL_LOCAL_WORKGROUP_SIZE "Performance hint: Local workgroup sizes { %u, %u, %u } selected for this workload ( kernel name: %s ) may not be optimal, consider using following local workgroup size: { %u, %u, %u }.", //BAD_LOCAL_WORKGROUP_SIZE "Performance hint: Kernel %s register pressure is too high, spill fills will be generated, additional surface needs to be allocated of size %u, consider simplifying your kernel.", //REGISTER_PRESSURE_TOO_HIGH "Performance hint: Kernel %s private memory usage is too high and exhausts register space, additional surface needs to be allocated of size %u, consider reducing amount of private memory used, avoid using private memory arrays.", //PRIVATE_MEMORY_USAGE_TOO_HIGH "Performance hint: Kernel %s submission requires coherency with CPU; this will impact performance.", //KERNEL_REQUIRES_COHERENCY "Performance hint: Kernel %s requires aux translation on argument [%u] = \"%s\"", //KERNEL_ARGUMENT_AUX_TRANSLATION "Performance hint: Buffer %p will use compressed memory.", //BUFFER_IS_COMPRESSED "Performance hint: Buffer %p will not use compressed memory.", //BUFFER_IS_NOT_COMPRESSED "Performance hint: Image %p will use compressed memory.", //IMAGE_IS_COMPRESSED "Performance hint: Image %p will not use compressed memory."}; //IMAGE_IS_NOT_COMPRESSED PerformanceHints DriverDiagnostics::obtainHintForTransferOperation(cl_command_type commandType, bool transferRequired) { PerformanceHints hint; switch (commandType) { case CL_COMMAND_MAP_BUFFER: hint = transferRequired ? CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA : CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA; break; case CL_COMMAND_MAP_IMAGE: hint = transferRequired ? CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA : CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA; break; case CL_COMMAND_UNMAP_MEM_OBJECT: hint = transferRequired ? CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA : CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA; break; case CL_COMMAND_WRITE_BUFFER: hint = transferRequired ? CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA : CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA; break; case CL_COMMAND_READ_BUFFER: hint = transferRequired ? CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA : CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA; break; case CL_COMMAND_WRITE_BUFFER_RECT: hint = transferRequired ? CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA : CL_ENQUEUE_WRITE_BUFFER_RECT_DOESNT_REQUIRE_COPY_DATA; break; case CL_COMMAND_READ_BUFFER_RECT: hint = transferRequired ? CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA : CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_REQUIRES_COPY_DATA; break; case CL_COMMAND_WRITE_IMAGE: hint = transferRequired ? CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA : CL_ENQUEUE_WRITE_IMAGE_DOESNT_REQUIRES_COPY_DATA; break; case CL_COMMAND_READ_IMAGE: UNRECOVERABLE_IF(transferRequired) hint = CL_ENQUEUE_READ_IMAGE_DOESNT_REQUIRES_COPY_DATA; break; default: UNRECOVERABLE_IF(true); } return hint; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/context/driver_diagnostics.h000066400000000000000000000046421363734646600256620ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl_ext_intel.h" namespace NEO { enum PerformanceHints { CL_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, CL_BUFFER_MEETS_ALIGNMENT_RESTRICTIONS, CL_BUFFER_NEEDS_ALLOCATE_MEMORY, CL_IMAGE_MEETS_ALIGNMENT_RESTRICTIONS, DRIVER_CALLS_INTERNAL_CL_FLUSH, PROFILING_ENABLED, PROFILING_ENABLED_WITH_DISABLED_PREEMPTION, SUBBUFFER_SHARES_MEMORY, CL_SVM_ALLOC_MEETS_ALIGNMENT_RESTRICTIONS, CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA, CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA, CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA, CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_REQUIRES_COPY_DATA, CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA, CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA, CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA, CL_ENQUEUE_WRITE_BUFFER_RECT_DOESNT_REQUIRE_COPY_DATA, CL_ENQUEUE_READ_IMAGE_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, CL_ENQUEUE_READ_IMAGE_DOESNT_REQUIRES_COPY_DATA, CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA, CL_ENQUEUE_WRITE_IMAGE_DOESNT_REQUIRES_COPY_DATA, CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA, CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA, CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA, CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA, CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA, CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA, CL_ENQUEUE_SVM_MAP_DOESNT_REQUIRE_COPY_DATA, PRINTF_DETECTED_IN_KERNEL, NULL_LOCAL_WORKGROUP_SIZE, BAD_LOCAL_WORKGROUP_SIZE, REGISTER_PRESSURE_TOO_HIGH, PRIVATE_MEMORY_USAGE_TOO_HIGH, KERNEL_REQUIRES_COHERENCY, KERNEL_ARGUMENT_AUX_TRANSLATION, BUFFER_IS_COMPRESSED, BUFFER_IS_NOT_COMPRESSED, IMAGE_IS_COMPRESSED, IMAGE_IS_NOT_COMPRESSED }; class DriverDiagnostics { public: DriverDiagnostics(cl_diagnostics_verbose_level level); bool validFlags(cl_diagnostics_verbose_level flags) const; ~DriverDiagnostics() = default; static const char *const hintFormat[]; static const cl_int maxHintStringSize = 1024; PerformanceHints obtainHintForTransferOperation(cl_command_type commandType, bool transferRequired); protected: cl_diagnostics_verbose_level verboseLevel; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/device_queue/000077500000000000000000000000001363734646600226005ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/device_queue/CMakeLists.txt000066400000000000000000000012171363734646600253410ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_DEVICE_QUEUE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_queue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device_queue.h ${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw_bdw_plus.inl ${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw_profiling.inl ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_DEVICE_QUEUE}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_DEVICE_QUEUE ${RUNTIME_SRCS_DEVICE_QUEUE}) add_subdirectories() compute-runtime-20.13.16352/opencl/source/device_queue/device_queue.cpp000066400000000000000000000202501363734646600257460ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/device_queue/device_queue.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/helpers/queue_helpers.h" namespace NEO { DeviceQueueCreateFunc deviceQueueFactory[IGFX_MAX_CORE] = {}; const uint32_t DeviceQueue::numberOfDeviceEnqueues = 128; DeviceQueue::DeviceQueue(Context *context, ClDevice *device, cl_queue_properties &properties) : DeviceQueue() { this->context = context; this->device = device; if (context) { context->incRefInternal(); } commandQueueProperties = getCmdQueueProperties(&properties, CL_QUEUE_PROPERTIES); queueSize = getCmdQueueProperties(&properties, CL_QUEUE_SIZE); if (queueSize == 0) { queueSize = device->getDeviceInfo().queueOnDevicePreferredSize; } allocateResources(); initDeviceQueue(); } DeviceQueue *DeviceQueue::create(Context *context, ClDevice *device, const cl_queue_properties &properties, cl_int &errcodeRet) { errcodeRet = CL_SUCCESS; DeviceQueue *deviceQueue = context->getDefaultDeviceQueue(); auto isDefaultDeviceQueue = getCmdQueueProperties(&properties) & static_cast(CL_QUEUE_ON_DEVICE_DEFAULT); if (isDefaultDeviceQueue && deviceQueue) { deviceQueue->retain(); return deviceQueue; } auto funcCreate = deviceQueueFactory[device->getRenderCoreFamily()]; DEBUG_BREAK_IF(nullptr == funcCreate); deviceQueue = funcCreate(context, device, const_cast(properties)); context->setDefaultDeviceQueue(deviceQueue); return deviceQueue; } DeviceQueue::~DeviceQueue() { for (uint32_t i = 0; i < IndirectHeap::NUM_TYPES; i++) { if (heaps[i]) delete heaps[i]; } if (queueBuffer) device->getMemoryManager()->freeGraphicsMemory(queueBuffer); if (eventPoolBuffer) device->getMemoryManager()->freeGraphicsMemory(eventPoolBuffer); if (slbBuffer) device->getMemoryManager()->freeGraphicsMemory(slbBuffer); if (stackBuffer) device->getMemoryManager()->freeGraphicsMemory(stackBuffer); if (queueStorageBuffer) device->getMemoryManager()->freeGraphicsMemory(queueStorageBuffer); if (dshBuffer) device->getMemoryManager()->freeGraphicsMemory(dshBuffer); if (debugQueue) device->getMemoryManager()->freeGraphicsMemory(debugQueue); if (context) { context->setDefaultDeviceQueue(nullptr); context->decRefInternal(); } } Device &DeviceQueue::getDevice() { return device->getDevice(); } cl_int DeviceQueue::getCommandQueueInfo(cl_command_queue_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { return getQueueInfo(this, paramName, paramValueSize, paramValue, paramValueSizeRet); } void DeviceQueue::allocateResources() { auto &caps = device->getDeviceInfo(); uint32_t alignedQueueSize = alignUp(queueSize, MemoryConstants::pageSize); auto rootDeviceIndex = device->getRootDeviceIndex(); queueBuffer = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, alignedQueueSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER}); auto eventPoolBufferSize = static_cast(caps.maxOnDeviceEvents) * sizeof(IGIL_DeviceEvent) + sizeof(IGIL_EventPool); eventPoolBufferSize = alignUp(eventPoolBufferSize, MemoryConstants::pageSize); eventPoolBuffer = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, eventPoolBufferSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER}); auto maxEnqueue = static_cast(alignedQueueSize) / sizeof(IGIL_CommandHeader); auto expectedStackSize = maxEnqueue * sizeof(uint32_t) * 3; // 3 full loads of commands expectedStackSize = alignUp(expectedStackSize, MemoryConstants::pageSize); stackBuffer = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, expectedStackSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER}); memset(stackBuffer->getUnderlyingBuffer(), 0, stackBuffer->getUnderlyingBufferSize()); auto queueStorageSize = alignedQueueSize * 2; // place for 2 full loads of queue_t queueStorageSize = alignUp(queueStorageSize, MemoryConstants::pageSize); queueStorageBuffer = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, queueStorageSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER}); memset(queueStorageBuffer->getUnderlyingBuffer(), 0, queueStorageBuffer->getUnderlyingBufferSize()); auto &hwHelper = HwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily); const size_t IDTSize = numberOfIDTables * interfaceDescriptorEntries * hwHelper.getInterfaceDescriptorDataSize(); // Additional padding of PAGE_SIZE for PageFaults just after DSH to satisfy hw requirements auto dshSize = (PARALLEL_SCHEDULER_HW_GROUPS + 2) * MAX_DSH_SIZE_PER_ENQUEUE * 8 + IDTSize + colorCalcStateSize + MemoryConstants::pageSize; dshSize = alignUp(dshSize, MemoryConstants::pageSize); dshBuffer = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, dshSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER}); debugQueue = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER}); debugData = (DebugDataBuffer *)debugQueue->getUnderlyingBuffer(); memset(debugQueue->getUnderlyingBuffer(), 0, debugQueue->getUnderlyingBufferSize()); } void DeviceQueue::initDeviceQueue() { auto igilCmdQueue = reinterpret_cast(queueBuffer->getUnderlyingBuffer()); auto &caps = device->getDeviceInfo(); memset(queueBuffer->getUnderlyingBuffer(), 0x0, queueBuffer->getUnderlyingBufferSize()); igilCmdQueue->m_controls.m_SLBENDoffsetInBytes = -1; igilCmdQueue->m_head = IGIL_DEVICE_QUEUE_HEAD_INIT; igilCmdQueue->m_size = static_cast(queueBuffer->getUnderlyingBufferSize() - sizeof(IGIL_CommandQueue)); igilCmdQueue->m_magic = IGIL_MAGIC_NUMBER; auto igilEventPool = reinterpret_cast(eventPoolBuffer->getUnderlyingBuffer()); memset(eventPoolBuffer->getUnderlyingBuffer(), 0x0, eventPoolBuffer->getUnderlyingBufferSize()); igilEventPool->m_TimestampResolution = static_cast(device->getProfilingTimerResolution()); igilEventPool->m_size = caps.maxOnDeviceEvents; } void DeviceQueue::setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint64_t tagAddress, uint32_t taskCount, TagNode *hwTimeStamp, bool isCcsUsed) { setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentCount, isCcsUsed); addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, tagAddress, taskCount); } void DeviceQueue::setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed) { return; } void DeviceQueue::addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode *hwTimeStamp, uint64_t tagAddress, uint32_t taskCount) { return; } void DeviceQueue::resetDeviceQueue() { return; } void DeviceQueue::dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed) { return; } IndirectHeap *DeviceQueue::getIndirectHeap(IndirectHeap::Type type) { return nullptr; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/device_queue/device_queue.h000066400000000000000000000117441363734646600254230ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/hw_info.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/api/cl_types.h" #include "opencl/source/execution_model/device_enqueue.h" #include "opencl/source/helpers/base_object.h" namespace NEO { class ClDevice; class CommandQueue; class Context; class Device; class Kernel; class Event; struct MultiDispatchInfo; class SchedulerKernel; struct HwTimeStamps; template struct TagNode; template <> struct OpenCLObjectMapper<_device_queue> { typedef class DeviceQueue DerivedType; }; class DeviceQueue : public BaseObject<_device_queue> { public: static const cl_ulong objectMagic = 0x1734547890087154LL; DeviceQueue() { for (uint32_t i = 0; i < IndirectHeap::NUM_TYPES; i++) { heaps[i] = nullptr; } offsetDsh = 0; } DeviceQueue(Context *context, ClDevice *device, cl_queue_properties &properties); ~DeviceQueue() override; Device &getDevice(); Context &getContext() { return *context; } cl_uint getQueueSize() { return queueSize; } cl_command_queue_properties getCommandQueueProperties() const { return commandQueueProperties; } GraphicsAllocation *getQueueBuffer() { return queueBuffer; } GraphicsAllocation *getEventPoolBuffer() { return eventPoolBuffer; } GraphicsAllocation *getSlbBuffer() { return slbBuffer; } GraphicsAllocation *getStackBuffer() { return stackBuffer; } GraphicsAllocation *getQueueStorageBuffer() { return queueStorageBuffer; } GraphicsAllocation *getDshBuffer() { return dshBuffer; } GraphicsAllocation *getDebugQueue() { return debugQueue; } bool isProfilingEnabled() { return !!(commandQueueProperties & CL_QUEUE_PROFILING_ENABLE); } static DeviceQueue *create(Context *context, ClDevice *device, const cl_queue_properties &properties, cl_int &errcodeRet); cl_int getCommandQueueInfo(cl_command_queue_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); void setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint64_t tagAddress, uint32_t taskCount, TagNode *hwTimeStamp, bool isCcsUsed); virtual void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed); virtual void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode *hwTimeStamp, uint64_t tagAddress, uint32_t taskCount); MOCKABLE_VIRTUAL bool isEMCriticalSectionFree() { auto igilCmdQueue = reinterpret_cast(queueBuffer->getUnderlyingBuffer()); auto igilCriticalSection = const_cast(&igilCmdQueue->m_controls.m_CriticalSection); return *igilCriticalSection == ExecutionModelCriticalSection::Free; } virtual void resetDeviceQueue(); virtual void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed); virtual IndirectHeap *getIndirectHeap(IndirectHeap::Type type); void acquireEMCriticalSection() { if (DebugManager.flags.EnableNullHardware.get()) { return; } auto igilCmdQueue = reinterpret_cast(queueBuffer->getUnderlyingBuffer()); igilCmdQueue->m_controls.m_CriticalSection = ExecutionModelCriticalSection::Taken; } uint32_t getDshOffset() const { return offsetDsh; } enum ExecutionModelCriticalSection { Free = 0, Taken = 1 }; static const uint32_t numberOfIDTables = 2; static const uint32_t interfaceDescriptorEntries = 64; static const uint32_t colorCalcStateSize = 192; static const uint32_t schedulerIDIndex = 62; static const uint32_t numberOfDeviceEnqueues; protected: void allocateResources(); void initDeviceQueue(); Context *context = nullptr; ClDevice *device = nullptr; cl_command_queue_properties commandQueueProperties = 0; cl_uint queueSize = 0; GraphicsAllocation *queueBuffer = nullptr; GraphicsAllocation *eventPoolBuffer = nullptr; GraphicsAllocation *slbBuffer = nullptr; GraphicsAllocation *stackBuffer = nullptr; GraphicsAllocation *queueStorageBuffer = nullptr; GraphicsAllocation *dshBuffer = nullptr; GraphicsAllocation *debugQueue = nullptr; DebugDataBuffer *debugData = nullptr; IndirectHeap *heaps[IndirectHeap::NUM_TYPES]; uint32_t offsetDsh; }; typedef DeviceQueue *(*DeviceQueueCreateFunc)( Context *context, ClDevice *device, cl_queue_properties &properties); } // namespace NEO compute-runtime-20.13.16352/opencl/source/device_queue/device_queue_hw.h000066400000000000000000000075441363734646600261240ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/linear_stream.h" #include "shared/source/helpers/hw_cmds.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "opencl/source/device_queue/device_queue.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/program/program.h" #include "opencl/source/scheduler/scheduler_kernel.h" namespace NEO { template class DeviceQueueHw : public DeviceQueue { using BaseClass = DeviceQueue; using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END; using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG; using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; using MI_MATH = typename GfxFamily::MI_MATH; using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE; public: DeviceQueueHw(Context *context, ClDevice *device, cl_queue_properties &properties) : BaseClass(context, device, properties) { allocateSlbBuffer(); offsetDsh = colorCalcStateSize + (uint32_t)sizeof(INTERFACE_DESCRIPTOR_DATA) * interfaceDescriptorEntries * numberOfIDTables; igilQueue = reinterpret_cast(queueBuffer->getUnderlyingBuffer()); } static DeviceQueue *create(Context *context, ClDevice *device, cl_queue_properties &properties) { return new (std::nothrow) DeviceQueueHw(context, device, properties); } IndirectHeap *getIndirectHeap(IndirectHeap::Type type) override; LinearStream *getSlbCS() { return &slbCS; } void resetDSH(); size_t setSchedulerCrossThreadData(SchedulerKernel &scheduler); void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed) override; void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode *hwTimeStamp, uint64_t tagAddress, uint32_t taskCount) override; void resetDeviceQueue() override; void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed) override; uint32_t getSchedulerReturnInstance() { return igilQueue->m_controls.m_SchedulerEarlyReturn; } static size_t getCSPrefetchSize(); protected: void allocateSlbBuffer(); size_t getMinimumSlbSize(); size_t getWaCommandsSize(); void addArbCheckCmdWa(); void addMiAtomicCmdWa(uint64_t atomicOpPlaceholder); void addLriCmdWa(bool setArbCheck); void addLriCmd(bool setArbCheck); void addPipeControlCmdWa(bool isNoopCmd = false); void initPipeControl(PIPE_CONTROL *pc); void buildSlbDummyCommands(); void addDcFlushToPipeControlWa(PIPE_CONTROL *pc); void addProfilingEndCmds(uint64_t timestampAddress); static size_t getProfilingEndCmdsSize(); MOCKABLE_VIRTUAL void addMediaStateClearCmds(); static size_t getMediaStateClearCmdsSize(); static size_t getExecutionModelCleanupSectionSize(); static uint64_t getBlockKernelStartPointer(const Device &device, const KernelInfo *blockInfo, bool isCcsUsed); LinearStream slbCS; IGIL_CommandQueue *igilQueue = nullptr; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/device_queue/device_queue_hw_base.inl000066400000000000000000000255031363734646600274440ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/preamble.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/utilities/tag_allocator.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/source/helpers/hardware_commands_helper.h" namespace NEO { template void DeviceQueueHw::allocateSlbBuffer() { auto slbSize = getMinimumSlbSize() + getWaCommandsSize(); slbSize *= 128; //num of enqueues slbSize += sizeof(MI_BATCH_BUFFER_START); slbSize = alignUp(slbSize, MemoryConstants::pageSize); slbSize += DeviceQueueHw::getExecutionModelCleanupSectionSize(); slbSize += (4 * MemoryConstants::pageSize); // +4 pages spec restriction slbSize = alignUp(slbSize, MemoryConstants::pageSize); slbBuffer = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), slbSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER}); } template void DeviceQueueHw::resetDeviceQueue() { auto &caps = device->getDeviceInfo(); auto igilEventPool = reinterpret_cast(eventPoolBuffer->getUnderlyingBuffer()); memset(eventPoolBuffer->getUnderlyingBuffer(), 0x0, eventPoolBuffer->getUnderlyingBufferSize()); igilEventPool->m_TimestampResolution = static_cast(device->getProfilingTimerResolution()); igilEventPool->m_size = caps.maxOnDeviceEvents; auto igilCmdQueue = reinterpret_cast(queueBuffer->getUnderlyingBuffer()); igilQueue = igilCmdQueue; igilCmdQueue->m_controls.m_StackSize = static_cast((stackBuffer->getUnderlyingBufferSize() / sizeof(cl_uint)) - 1); igilCmdQueue->m_controls.m_StackTop = static_cast((stackBuffer->getUnderlyingBufferSize() / sizeof(cl_uint)) - 1); igilCmdQueue->m_controls.m_PreviousHead = IGIL_DEVICE_QUEUE_HEAD_INIT; igilCmdQueue->m_controls.m_IDTAfterFirstPhase = 1; igilCmdQueue->m_controls.m_CurrentIDToffset = 1; igilCmdQueue->m_controls.m_PreviousStorageTop = static_cast(queueStorageBuffer->getUnderlyingBufferSize()); igilCmdQueue->m_controls.m_PreviousStackTop = static_cast((stackBuffer->getUnderlyingBufferSize() / sizeof(cl_uint)) - 1); igilCmdQueue->m_controls.m_DebugNextBlockID = 0xFFFFFFFF; igilCmdQueue->m_controls.m_QstorageSize = static_cast(queueStorageBuffer->getUnderlyingBufferSize()); igilCmdQueue->m_controls.m_QstorageTop = static_cast(queueStorageBuffer->getUnderlyingBufferSize()); igilCmdQueue->m_controls.m_IsProfilingEnabled = static_cast(isProfilingEnabled()); igilCmdQueue->m_controls.m_IsSimulation = static_cast(device->isSimulation()); igilCmdQueue->m_controls.m_LastScheduleEventNumber = 0; igilCmdQueue->m_controls.m_PreviousNumberOfQueues = 0; igilCmdQueue->m_controls.m_EnqueueMarkerScheduled = 0; igilCmdQueue->m_controls.m_SecondLevelBatchOffset = 0; igilCmdQueue->m_controls.m_TotalNumberOfQueues = 0; igilCmdQueue->m_controls.m_EventTimestampAddress = 0; igilCmdQueue->m_controls.m_ErrorCode = 0; igilCmdQueue->m_controls.m_CurrentScheduleEventNumber = 0; igilCmdQueue->m_controls.m_DummyAtomicOperationPlaceholder = 0x00; igilCmdQueue->m_controls.m_DebugNextBlockGWS = 0; // set first stack element in surface at value "1", it protects Scheduler in corner case when StackTop is empty after Child execution auto stack = static_cast(stackBuffer->getUnderlyingBuffer()); stack += ((stackBuffer->getUnderlyingBufferSize() / sizeof(cl_uint)) - 1); *stack = 1; igilCmdQueue->m_head = IGIL_DEVICE_QUEUE_HEAD_INIT; igilCmdQueue->m_size = static_cast(queueBuffer->getUnderlyingBufferSize() - sizeof(IGIL_CommandQueue)); igilCmdQueue->m_magic = IGIL_MAGIC_NUMBER; igilCmdQueue->m_controls.m_SchedulerEarlyReturn = DebugManager.flags.SchedulerSimulationReturnInstance.get(); igilCmdQueue->m_controls.m_SchedulerEarlyReturnCounter = 0; buildSlbDummyCommands(); igilCmdQueue->m_controls.m_SLBENDoffsetInBytes = -1; igilCmdQueue->m_controls.m_CriticalSection = ExecutionModelCriticalSection::Free; resetDSH(); } template void DeviceQueueHw::initPipeControl(PIPE_CONTROL *pc) { *pc = GfxFamily::cmdInitPipeControl; pc->setStateCacheInvalidationEnable(0x1); pc->setDcFlushEnable(true); pc->setPipeControlFlushEnable(true); pc->setTextureCacheInvalidationEnable(true); pc->setCommandStreamerStallEnable(true); } template void DeviceQueueHw::addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode *hwTimeStamp, uint64_t tagAddress, uint32_t taskCount) { // CleanUp Section auto offset = slbCS.getUsed(); auto alignmentSize = alignUp(offset, MemoryConstants::pageSize) - offset; slbCS.getSpace(alignmentSize); offset = slbCS.getUsed(); igilQueue->m_controls.m_CleanupSectionAddress = ptrOffset(slbBuffer->getGpuAddress(), slbCS.getUsed()); GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(&slbCS, *parentKernel, true); using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; if (hwTimeStamp != nullptr) { uint64_t timeStampAddress = hwTimeStamp->getGpuAddress() + offsetof(HwTimeStamps, ContextCompleteTS); igilQueue->m_controls.m_EventTimestampAddress = timeStampAddress; addProfilingEndCmds(timeStampAddress); //enable preemption addLriCmd(false); } uint64_t criticalSectionAddress = (uint64_t)&igilQueue->m_controls.m_CriticalSection; MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( slbCS, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, criticalSectionAddress, ExecutionModelCriticalSection::Free, false, device->getHardwareInfo()); MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( slbCS, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, tagAddress, taskCount, false, device->getHardwareInfo()); addMediaStateClearCmds(); auto pBBE = slbCS.getSpaceForCmd(); *pBBE = GfxFamily::cmdInitBatchBufferEnd; igilQueue->m_controls.m_CleanupSectionSize = (uint32_t)(slbCS.getUsed() - offset); } template void DeviceQueueHw::resetDSH() { if (heaps[IndirectHeap::DYNAMIC_STATE]) { heaps[IndirectHeap::DYNAMIC_STATE]->replaceBuffer(heaps[IndirectHeap::DYNAMIC_STATE]->getCpuBase(), heaps[IndirectHeap::DYNAMIC_STATE]->getMaxAvailableSpace()); heaps[IndirectHeap::DYNAMIC_STATE]->getSpace(colorCalcStateSize); } } template IndirectHeap *DeviceQueueHw::getIndirectHeap(IndirectHeap::Type type) { UNRECOVERABLE_IF(type != IndirectHeap::DYNAMIC_STATE); if (!heaps[type]) { heaps[type] = new IndirectHeap(dshBuffer); // get space for colorCalc and 2 ID tables at the beginning heaps[type]->getSpace(colorCalcStateSize); } return heaps[type]; } template size_t DeviceQueueHw::setSchedulerCrossThreadData(SchedulerKernel &scheduler) { using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; size_t offset = dshBuffer->getUnderlyingBufferSize() - scheduler.getCurbeSize() - 4096; // Page size padding auto igilCmdQueue = reinterpret_cast(queueBuffer->getUnderlyingBuffer()); igilCmdQueue->m_controls.m_SchedulerDSHOffset = (uint32_t)offset; igilCmdQueue->m_controls.m_SchedulerConstantBufferSize = (uint32_t)scheduler.getCurbeSize(); return offset; } template void DeviceQueueHw::dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed) { GpgpuWalkerHelper::dispatchScheduler(commandStream, *this, preemptionMode, scheduler, ssh, dsh, isCcsUsed); return; } template size_t DeviceQueueHw::getCSPrefetchSize() { return 512; } template void DeviceQueueHw::addLriCmd(bool setArbCheck) { using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; auto lri = slbCS.getSpaceForCmd(); *lri = GfxFamily::cmdInitLoadRegisterImm; lri->setRegisterOffset(0x2248); // CTXT_PREMP_DBG offset if (setArbCheck) lri->setDataDword(0x00000100); // set only bit 8 (Preempt On MI_ARB_CHK Only) else lri->setDataDword(0x0); } template size_t DeviceQueueHw::getExecutionModelCleanupSectionSize() { size_t totalSize = 0; totalSize += sizeof(PIPE_CONTROL) + 2 * sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_LOAD_REGISTER_IMM) + sizeof(PIPE_CONTROL) + sizeof(MI_MATH) + NUM_ALU_INST_FOR_READ_MODIFY_WRITE * sizeof(MI_MATH_ALU_INST_INLINE); totalSize += getProfilingEndCmdsSize(); totalSize += getMediaStateClearCmdsSize(); totalSize += 4 * sizeof(PIPE_CONTROL); totalSize += sizeof(MI_BATCH_BUFFER_END); return totalSize; } template size_t DeviceQueueHw::getProfilingEndCmdsSize() { size_t size = 0; size += sizeof(PIPE_CONTROL) + sizeof(MI_STORE_REGISTER_MEM); size += sizeof(MI_LOAD_REGISTER_IMM); return size; } template void DeviceQueueHw::addDcFlushToPipeControlWa(PIPE_CONTROL *pc) {} template uint64_t DeviceQueueHw::getBlockKernelStartPointer(const Device &device, const KernelInfo *blockInfo, bool isCcsUsed) { auto blockAllocation = blockInfo->getGraphicsAllocation(); DEBUG_BREAK_IF(!blockAllocation); auto blockKernelStartPointer = blockAllocation ? blockAllocation->getGpuAddressToPatch() : 0llu; auto &hardwareInfo = device.getHardwareInfo(); auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); if (blockAllocation && isCcsUsed && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) { blockKernelStartPointer += blockInfo->patchInfo.threadPayload->OffsetToSkipSetFFIDGP; } return blockKernelStartPointer; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/device_queue/device_queue_hw_bdw_plus.inl000066400000000000000000000255711363734646600303560ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/device_queue/device_queue_hw_base.inl" #include "opencl/source/program/block_kernel_manager.h" namespace NEO { template size_t DeviceQueueHw::getMinimumSlbSize() { using MEDIA_STATE_FLUSH = typename GfxFamily::MEDIA_STATE_FLUSH; using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename GfxFamily::MEDIA_INTERFACE_DESCRIPTOR_LOAD; using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER; return sizeof(MEDIA_STATE_FLUSH) + sizeof(MEDIA_INTERFACE_DESCRIPTOR_LOAD) + sizeof(PIPE_CONTROL) + sizeof(GPGPU_WALKER) + sizeof(MEDIA_STATE_FLUSH) + sizeof(PIPE_CONTROL) + DeviceQueueHw::getCSPrefetchSize(); } template void DeviceQueueHw::buildSlbDummyCommands() { using MEDIA_STATE_FLUSH = typename GfxFamily::MEDIA_STATE_FLUSH; using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename GfxFamily::MEDIA_INTERFACE_DESCRIPTOR_LOAD; using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER; auto igilCmdQueue = reinterpret_cast(queueBuffer->getUnderlyingBuffer()); auto slbEndOffset = igilCmdQueue->m_controls.m_SLBENDoffsetInBytes; size_t commandsSize = getMinimumSlbSize() + getWaCommandsSize(); size_t numEnqueues = numberOfDeviceEnqueues; // buildSlbDummyCommands is called from resetDeviceQueue() - reset slbCS each time slbCS.replaceBuffer(slbBuffer->getUnderlyingBuffer(), slbBuffer->getUnderlyingBufferSize()); if (slbEndOffset >= 0) { DEBUG_BREAK_IF(slbEndOffset % commandsSize != 0); //We always overwrite at most one enqueue space with BB_START command pointing to cleanup section //if SLBENDoffset is the at the end then BB_START added after scheduler did not corrupt anything so no need to regenerate numEnqueues = (slbEndOffset == static_cast(commandsSize)) ? 0 : 1; slbCS.getSpace(slbEndOffset); } for (size_t i = 0; i < numEnqueues; i++) { auto mediaStateFlush = slbCS.getSpaceForCmd(); *mediaStateFlush = GfxFamily::cmdInitMediaStateFlush; addArbCheckCmdWa(); addMiAtomicCmdWa((uint64_t)&igilCmdQueue->m_controls.m_DummyAtomicOperationPlaceholder); auto mediaIdLoad = slbCS.getSpaceForCmd(); *mediaIdLoad = GfxFamily::cmdInitMediaInterfaceDescriptorLoad; mediaIdLoad->setInterfaceDescriptorTotalLength(2048); auto dataStartAddress = colorCalcStateSize; mediaIdLoad->setInterfaceDescriptorDataStartAddress(dataStartAddress + sizeof(INTERFACE_DESCRIPTOR_DATA) * schedulerIDIndex); addLriCmdWa(true); if (isProfilingEnabled()) { addPipeControlCmdWa(); auto pipeControl = slbCS.getSpaceForCmd(); initPipeControl(pipeControl); } else { auto noop = slbCS.getSpace(sizeof(PIPE_CONTROL)); memset(noop, 0x0, sizeof(PIPE_CONTROL)); addPipeControlCmdWa(true); } auto gpgpuWalker = slbCS.getSpaceForCmd(); *gpgpuWalker = GfxFamily::cmdInitGpgpuWalker; gpgpuWalker->setSimdSize(GPGPU_WALKER::SIMD_SIZE::SIMD_SIZE_SIMD16); gpgpuWalker->setThreadGroupIdXDimension(1); gpgpuWalker->setThreadGroupIdYDimension(1); gpgpuWalker->setThreadGroupIdZDimension(1); gpgpuWalker->setRightExecutionMask(0xFFFFFFFF); gpgpuWalker->setBottomExecutionMask(0xFFFFFFFF); mediaStateFlush = slbCS.getSpaceForCmd(); *mediaStateFlush = GfxFamily::cmdInitMediaStateFlush; addArbCheckCmdWa(); addPipeControlCmdWa(); auto pipeControl2 = slbCS.getSpaceForCmd(); initPipeControl(pipeControl2); addLriCmdWa(false); auto prefetch = slbCS.getSpace(getCSPrefetchSize()); memset(prefetch, 0x0, getCSPrefetchSize()); } // always the same BBStart position (after 128 enqueues) auto bbStartOffset = (commandsSize * 128) - slbCS.getUsed(); slbCS.getSpace(bbStartOffset); auto bbStart = slbCS.getSpaceForCmd(); *bbStart = GfxFamily::cmdInitBatchBufferStart; auto slbPtr = reinterpret_cast(slbBuffer->getUnderlyingBuffer()); bbStart->setBatchBufferStartAddressGraphicsaddress472(slbPtr); igilCmdQueue->m_controls.m_CleanupSectionSize = 0; igilQueue->m_controls.m_CleanupSectionAddress = 0; } template void DeviceQueueHw::addMediaStateClearCmds() { typedef typename GfxFamily::MEDIA_VFE_STATE MEDIA_VFE_STATE; addPipeControlCmdWa(); auto pipeControl = slbCS.getSpaceForCmd(); *pipeControl = GfxFamily::cmdInitPipeControl; pipeControl->setGenericMediaStateClear(true); pipeControl->setCommandStreamerStallEnable(true); addDcFlushToPipeControlWa(pipeControl); PreambleHelper::programVFEState(&slbCS, device->getHardwareInfo(), 0, 0, device->getSharedDeviceInfo().maxFrontEndThreads, aub_stream::EngineType::ENGINE_RCS); } template size_t DeviceQueueHw::getMediaStateClearCmdsSize() { using MEDIA_VFE_STATE = typename GfxFamily::MEDIA_VFE_STATE; // PC with GenreicMediaStateClear + WA PC size_t size = 2 * sizeof(PIPE_CONTROL); // VFE state cmds size += sizeof(PIPE_CONTROL); size += sizeof(MEDIA_VFE_STATE); return size; } template void DeviceQueueHw::setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed) { using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER; void *pDSH = dynamicStateHeap.getCpuBase(); // Set scheduler ID to last entry in first table, it will have ID == 0, blocks will have following entries. auto igilCmdQueue = reinterpret_cast(queueBuffer->getUnderlyingBuffer()); igilCmdQueue->m_controls.m_IDTstart = colorCalcStateSize + sizeof(INTERFACE_DESCRIPTOR_DATA) * (interfaceDescriptorEntries - 2); // Parent's dsh is located after ColorCalcState and 2 ID tables igilCmdQueue->m_controls.m_DynamicHeapStart = offsetDsh + alignUp((uint32_t)parentKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); igilCmdQueue->m_controls.m_DynamicHeapSizeInBytes = (uint32_t)dshBuffer->getUnderlyingBufferSize(); igilCmdQueue->m_controls.m_CurrentDSHoffset = igilCmdQueue->m_controls.m_DynamicHeapStart; igilCmdQueue->m_controls.m_ParentDSHOffset = offsetDsh; uint32_t blockIndex = parentIDCount; pDSH = ptrOffset(pDSH, colorCalcStateSize); INTERFACE_DESCRIPTOR_DATA *pIDDestination = static_cast(pDSH); BlockKernelManager *blockManager = parentKernel->getProgram()->getBlockKernelManager(); uint32_t blockCount = static_cast(blockManager->getCount()); uint32_t maxBindingTableCount = 0; uint32_t totalBlockSSHSize = 0; igilCmdQueue->m_controls.m_StartBlockID = blockIndex; for (uint32_t i = 0; i < blockCount; i++) { const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); auto blockKernelStartPointer = getBlockKernelStartPointer(getDevice(), pBlockInfo, isCcsUsed); auto bindingTableCount = pBlockInfo->patchInfo.bindingTableState->Count; maxBindingTableCount = std::max(maxBindingTableCount, bindingTableCount); totalBlockSSHSize += alignUp(pBlockInfo->heapInfo.pKernelHeader->SurfaceStateHeapSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); auto btOffset = HardwareCommandsHelper::pushBindingTableAndSurfaceStates(surfaceStateHeap, bindingTableCount, pBlockInfo->heapInfo.pSsh, pBlockInfo->heapInfo.pKernelHeader->SurfaceStateHeapSize, bindingTableCount, pBlockInfo->patchInfo.bindingTableState->Offset); parentKernel->setReflectionSurfaceBlockBtOffset(i, static_cast(btOffset)); // Determine SIMD size uint32_t simd = pBlockInfo->getMaxSimdSize(); DEBUG_BREAK_IF(pBlockInfo->patchInfo.interfaceDescriptorData == nullptr); uint32_t idOffset = pBlockInfo->patchInfo.interfaceDescriptorData->Offset; const INTERFACE_DESCRIPTOR_DATA *pBlockID = static_cast(ptrOffset(pBlockInfo->heapInfo.pDsh, idOffset)); pIDDestination[blockIndex + i] = *pBlockID; pIDDestination[blockIndex + i].setKernelStartPointerHigh(blockKernelStartPointer >> 32); pIDDestination[blockIndex + i].setKernelStartPointer(static_cast(blockKernelStartPointer)); pIDDestination[blockIndex + i].setDenormMode(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL); HardwareCommandsHelper::programBarrierEnable(&pIDDestination[blockIndex + i], pBlockInfo->patchInfo.executionEnvironment->HasBarriers, parentKernel->getDevice().getHardwareInfo()); // Set offset to sampler states, block's DHSOffset is added by scheduler pIDDestination[blockIndex + i].setSamplerStatePointer(static_cast(pBlockInfo->getBorderColorStateSize())); auto threadPayload = pBlockInfo->patchInfo.threadPayload; DEBUG_BREAK_IF(nullptr == threadPayload); auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*threadPayload); auto grfSize = device->getDeviceInfo().grfSize; auto sizePerThreadData = getPerThreadSizeLocalIDs(simd, grfSize, numChannels); auto numGrfPerThreadData = static_cast(sizePerThreadData / grfSize); // HW requires a minimum of 1 GRF of perThreadData for each thread in a thread group // when sizeCrossThreadData != 0 numGrfPerThreadData = std::max(numGrfPerThreadData, 1u); pIDDestination[blockIndex + i].setConstantIndirectUrbEntryReadLength(numGrfPerThreadData); } igilCmdQueue->m_controls.m_BTmaxSize = alignUp(maxBindingTableCount * (uint32_t)sizeof(BINDING_TABLE_STATE), INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER::BINDINGTABLEPOINTER_ALIGN_SIZE); igilCmdQueue->m_controls.m_BTbaseOffset = alignUp((uint32_t)surfaceStateHeap.getUsed(), INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER::BINDINGTABLEPOINTER_ALIGN_SIZE); igilCmdQueue->m_controls.m_CurrentSSHoffset = igilCmdQueue->m_controls.m_BTbaseOffset; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/device_queue/device_queue_hw_profiling.inl000066400000000000000000000016051363734646600305200ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/device_queue/device_queue_hw.h" namespace NEO { template void DeviceQueueHw::addProfilingEndCmds(uint64_t timestampAddress) { auto pPipeControlCmd = (PIPE_CONTROL *)slbCS.getSpace(sizeof(PIPE_CONTROL)); *pPipeControlCmd = GfxFamily::cmdInitPipeControl; pPipeControlCmd->setCommandStreamerStallEnable(true); //low part auto pMICmdLow = (MI_STORE_REGISTER_MEM *)slbCS.getSpace(sizeof(MI_STORE_REGISTER_MEM)); *pMICmdLow = GfxFamily::cmdInitStoreRegisterMem; GpgpuWalkerHelper::adjustMiStoreRegMemMode(pMICmdLow); pMICmdLow->setRegisterAddress(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); pMICmdLow->setMemoryAddress(timestampAddress); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/dll/000077500000000000000000000000001363734646600207105ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/dll/CMakeLists.txt000066400000000000000000000054441363734646600234570ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(NOT DISABLED_GTPIN_SUPPORT) set(GTPIN_INIT_FILE "${NEO_SOURCE_DIR}/opencl/source/gtpin/gtpin_init.cpp") else() set(GTPIN_INIT_FILE "") endif() set(RUNTIME_SRCS_DLL_BASE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/create_command_stream.cpp ${CMAKE_CURRENT_SOURCE_DIR}/create_deferred_deleter.cpp ${CMAKE_CURRENT_SOURCE_DIR}/create_tbx_sockets.cpp ${CMAKE_CURRENT_SOURCE_DIR}/debug_manager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/debugger.cpp ${CMAKE_CURRENT_SOURCE_DIR}/source_level_debugger.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/get_devices.cpp ${NEO_SHARED_DIRECTORY}/dll/options_dll.cpp ${NEO_SHARED_DIRECTORY}/gmm_helper/resource_info.cpp ${NEO_SHARED_DIRECTORY}/gmm_helper/page_table_mngr.cpp ${NEO_SHARED_DIRECTORY}/helpers/abort.cpp ${NEO_SHARED_DIRECTORY}/helpers/debug_helpers.cpp ${NEO_SHARED_DIRECTORY}/helpers/allow_deferred_deleter.cpp ${NEO_SHARED_DIRECTORY}/utilities/cpuintrinsics.cpp ${NEO_SHARED_DIRECTORY}/utilities/debug_settings_reader_creator.cpp ${NEO_SOURCE_DIR}/opencl/source/api/api.cpp ${NEO_SOURCE_DIR}/opencl/source/compiler_interface/default_cache_config.cpp ${NEO_SOURCE_DIR}/opencl/source/helpers/built_ins_helper.cpp ${GTPIN_INIT_FILE} ${HW_SRC_LINK} ${EXPORTS_FILENAME} ) append_sources_from_properties(RUNTIME_SRCS_DLL_BASE NEO_CORE_SRCS_LINK) set(RUNTIME_SRCS_DLL_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/linux/allocator_helper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/linux/create_drm_memory_manager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/linux/devices${BRANCH_DIR_SUFFIX}/devices.inl ${CMAKE_CURRENT_SOURCE_DIR}/linux/devices/devices_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/linux/drm_neo_create.cpp ${CMAKE_CURRENT_SOURCE_DIR}/linux/options_linux.cpp ${CMAKE_CURRENT_SOURCE_DIR}/linux/os_interface.cpp ${NEO_SHARED_DIRECTORY}/os_interface/linux/sys_calls_linux.cpp ) set(RUNTIME_SRCS_DLL_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/windows/create_wddm_memory_manager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/windows/options_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/windows/os_interface.cpp ${NEO_SHARED_DIRECTORY}/dll/windows/environment_variables.cpp ${NEO_SHARED_DIRECTORY}/gmm_helper/windows/gmm_memory.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/sys_calls.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/wddm/wddm_calls.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/wddm/wddm_create.cpp ) target_sources(${NEO_DYNAMIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_DLL_BASE}) if(WIN32) if(DEFINED NEO_DLL_RC_FILE) list(APPEND RUNTIME_SRCS_DLL_WINDOWS ${NEO_DLL_RC_FILE}) endif() target_sources(${NEO_DYNAMIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_DLL_WINDOWS}) else() target_sources(${NEO_DYNAMIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_DLL_LINUX}) endif() compute-runtime-20.13.16352/opencl/source/dll/create_command_stream.cpp000066400000000000000000000013161363734646600257310ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/device_command_stream.h" #include "shared/source/helpers/hw_info.h" #include "opencl/source/command_stream/aub_command_stream_receiver.h" #include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.h" #include "opencl/source/command_stream/create_command_stream_impl.h" #include "opencl/source/command_stream/tbx_command_stream_receiver.h" namespace NEO { CommandStreamReceiver *createCommandStream(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) { return createCommandStreamImpl(executionEnvironment, rootDeviceIndex); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/dll/create_deferred_deleter.cpp000066400000000000000000000004721363734646600262260ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/deferred_deleter.h" namespace NEO { std::unique_ptr createDeferredDeleter() { return std::unique_ptr(new DeferredDeleter()); } } // namespace NEOcompute-runtime-20.13.16352/opencl/source/dll/create_tbx_sockets.cpp000066400000000000000000000004071363734646600252700ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/tbx/tbx_sockets_imp.h" using namespace NEO; namespace NEO { TbxSockets *TbxSockets::create() { return new TbxSocketsImp; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/dll/debug_manager.cpp000066400000000000000000000005111363734646600241710ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "opencl/source/os_interface/ocl_reg_path.h" using namespace std; namespace NEO { DebugSettingsManager DebugManager(oclRegPath); } compute-runtime-20.13.16352/opencl/source/dll/debugger.cpp000066400000000000000000000016141363734646600232020ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debugger/debugger.h" #include "shared/source/built_ins/sip.h" #include "shared/source/built_ins/sip_kernel_type.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/source_level_debugger/source_level_debugger.h" namespace NEO { std::unique_ptr Debugger::create(HardwareInfo *hwInfo) { std::unique_ptr sourceLevelDebugger; if (hwInfo->capabilityTable.debuggerSupported) { sourceLevelDebugger.reset(SourceLevelDebugger::create()); } if (sourceLevelDebugger) { bool localMemorySipAvailable = (SipKernelType::DbgCsrLocal == SipKernel::getSipKernelType(hwInfo->platform.eRenderCoreFamily, true)); sourceLevelDebugger->initialize(localMemorySipAvailable); } return sourceLevelDebugger; } } // namespace NEOcompute-runtime-20.13.16352/opencl/source/dll/get_devices.cpp000066400000000000000000000007241363734646600237000ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/os_interface/device_factory.h" #include "opencl/source/command_stream/create_command_stream_impl.h" namespace NEO { bool prepareDeviceEnvironments(ExecutionEnvironment &executionEnvironment) { return prepareDeviceEnvironmentsImpl(executionEnvironment); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/dll/linux/000077500000000000000000000000001363734646600220475ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/dll/linux/allocator_helper.cpp000066400000000000000000000005461363734646600260770ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/allocator_helper.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/basic_math.h" namespace NEO { size_t getSizeToReserve() { return (maxNBitValue(47) + 1) / 4; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/dll/linux/create_drm_memory_manager.cpp000066400000000000000000000015121363734646600277410ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/linux/drm_memory_manager.h" #include "shared/source/os_interface/linux/os_interface.h" #include "shared/source/os_interface/os_interface.h" namespace NEO { std::unique_ptr MemoryManager::createMemoryManager(ExecutionEnvironment &executionEnvironment) { return std::make_unique(gemCloseWorkerMode::gemCloseWorkerActive, DebugManager.flags.EnableForcePin.get(), true, executionEnvironment); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/dll/linux/devices/000077500000000000000000000000001363734646600234715ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/dll/linux/devices/devices.inl000066400000000000000000000002271363734646600256200ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/dll/linux/devices/devices_base.inl" compute-runtime-20.13.16352/opencl/source/dll/linux/devices/devices_base.inl000066400000000000000000000257451363734646600266260ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ // clang-format off #ifdef SUPPORT_GEN12LP #ifdef SUPPORT_TGLLP DEVICE( IGEN12LP_GT1_MOB_DEVICE_F0_ID, TGLLP_1x6x16, GTTYPE_GT2 ) DEVICE( ITGL_LP_1x6x16_ULT_15W_DEVICE_F0_ID, TGLLP_1x6x16, GTTYPE_GT2 ) DEVICE( ITGL_LP_1x6x16_ULX_5_2W_DEVICE_F0_ID, TGLLP_1x6x16, GTTYPE_GT2 ) DEVICE( ITGL_LP_1x6x16_ULT_12W_DEVICE_F0_ID, TGLLP_1x6x16, GTTYPE_GT2 ) DEVICE( ITGL_LP_1x2x16_HALO_45W_DEVICE_F0_ID, TGLLP_1x2x16, GTTYPE_GT2 ) DEVICE( ITGL_LP_1x2x16_DESK_65W_DEVICE_F0_ID, TGLLP_1x2x16, GTTYPE_GT2 ) DEVICE( ITGL_LP_1x2x16_HALO_WS_45W_DEVICE_F0_ID, TGLLP_1x2x16, GTTYPE_GT2 ) #endif #endif #ifdef SUPPORT_GEN11 #ifdef SUPPORT_ICLLP // GT1 DEVICE( IICL_LP_GT1_MOB_DEVICE_F0_ID, ICLLP_1x4x8, GTTYPE_GT1 ) DEVICE( IICL_LP_1x4x8_LOW_MEDIA_ULT_DEVICE_F0_ID, ICLLP_1x4x8, GTTYPE_GT1 ) DEVICE( IICL_LP_1x4x8_LOW_MEDIA_ULX_DEVICE_F0_ID, ICLLP_1x4x8, GTTYPE_GT1 ) DEVICE( IICL_LP_1x6x8_ULX_DEVICE_F0_ID, ICLLP_1x6x8, GTTYPE_GT1 ) DEVICE( IICL_LP_1x6x8_ULT_DEVICE_F0_ID, ICLLP_1x6x8, GTTYPE_GT1 ) // GT2 DEVICE( IICL_LP_1x8x8_SUPERSKU_DEVICE_F0_ID, ICLLP_1x8x8, GTTYPE_GT2 ) DEVICE( IICL_LP_1x8x8_ULT_DEVICE_F0_ID, ICLLP_1x8x8, GTTYPE_GT2 ) DEVICE( IICL_LP_1x8x8_ULX_DEVICE_F0_ID, ICLLP_1x8x8, GTTYPE_GT2 ) #endif #ifdef SUPPORT_LKF DEVICE( ILKF_1x8x8_DESK_DEVICE_F0_ID, LKF_1x8x8, GTTYPE_GT1 ) #endif #ifdef SUPPORT_EHL DEVICE( IEHL_1x4x8_SUPERSKU_DEVICE_A0_ID, EHL_1x4x8, GTTYPE_GT1 ) DEVICE( IEHL_1x2x4_DEVICE_A0_ID, EHL_1x2x4, GTTYPE_GT1 ) DEVICE( IEHL_1x4x4_DEVICE_A0_ID, EHL_1x4x4, GTTYPE_GT1 ) DEVICE( IEHL_1x4x8_DEVICE_A0_ID, EHL_1x4x8, GTTYPE_GT1 ) DEVICE( IJSL_1x4x4_DEVICE_B0_ID, EHL_1x4x4, GTTYPE_GT1 ) DEVICE( IJSL_1x4x6_DEVICE_B0_ID, EHL_1x4x6, GTTYPE_GT1 ) DEVICE( IJSL_1x4x8_DEVICE_B0_ID, EHL_1x4x8, GTTYPE_GT1 ) #endif #endif #ifdef SUPPORT_GEN9 #ifdef SUPPORT_SKL // GT1 DEVICE( ISKL_GT1_DESK_DEVICE_F0_ID, SKL_1x2x6, GTTYPE_GT1 ) DEVICE( ISKL_GT1_DT_DEVICE_F0_ID, SKL_1x2x6, GTTYPE_GT1 ) DEVICE( ISKL_GT1_HALO_MOBL_DEVICE_F0_ID, SKL_1x2x6, GTTYPE_GT1 ) DEVICE( ISKL_GT1_SERV_DEVICE_F0_ID, SKL_1x2x6, GTTYPE_GT1 ) DEVICE( ISKL_GT1_ULT_DEVICE_F0_ID, SKL_1x2x6, GTTYPE_GT1 ) DEVICE( ISKL_GT1_ULX_DEVICE_F0_ID, SKL_1x2x6, GTTYPE_GT1 ) // GT1_5 DEVICE( ISKL_GT1_5_DT_DEVICE_F0_ID, SKL_1x3x6, GTTYPE_GT1_5 ) DEVICE( ISKL_GT1_5_ULT_DEVICE_F0_ID, SKL_1x3x6, GTTYPE_GT1_5 ) DEVICE( ISKL_GT1_5_ULX_DEVICE_F0_ID, SKL_1x3x6, GTTYPE_GT1_5 ) // GT2 DEVICE( ISKL_GT2_DESK_DEVICE_F0_ID, SKL_1x3x8, GTTYPE_GT2 ) DEVICE( ISKL_GT2_DT_DEVICE_F0_ID, SKL_1x3x8, GTTYPE_GT2 ) DEVICE( ISKL_GT2_HALO_MOBL_DEVICE_F0_ID, SKL_1x3x8, GTTYPE_GT2 ) DEVICE( ISKL_GT2_SERV_DEVICE_F0_ID, SKL_1x3x8, GTTYPE_GT2 ) DEVICE( ISKL_GT2_ULT_DEVICE_F0_ID, SKL_1x3x8, GTTYPE_GT2 ) DEVICE( ISKL_GT2_ULX_DEVICE_F0_ID, SKL_1x3x8, GTTYPE_GT2 ) DEVICE( ISKL_GT2_WRK_DEVICE_F0_ID, SKL_1x3x8, GTTYPE_GT2 ) DEVICE( ISKL_GT2F_ULT_DEVICE_F0_ID, SKL_1x3x8, GTTYPE_GT2 ) DEVICE( ISKL_LP_DEVICE_F0_ID, SKL_1x3x8, GTTYPE_GT2 ) // GT3 DEVICE( ISKL_GT3_DESK_DEVICE_F0_ID, SKL_2x3x8, GTTYPE_GT3 ) DEVICE( ISKL_GT3_HALO_MOBL_DEVICE_F0_ID, SKL_2x3x8, GTTYPE_GT3 ) DEVICE( ISKL_GT3_MEDIA_SERV_DEVICE_F0_ID, SKL_2x3x8, GTTYPE_GT3 ) DEVICE( ISKL_GT3_SERV_DEVICE_F0_ID, SKL_2x3x8, GTTYPE_GT3 ) DEVICE( ISKL_GT3_ULT_DEVICE_F0_ID, SKL_2x3x8, GTTYPE_GT3 ) DEVICE( ISKL_GT3e_ULT_DEVICE_F0_ID_540, SKL_2x3x8, GTTYPE_GT3 ) DEVICE( ISKL_GT3e_ULT_DEVICE_F0_ID_550, SKL_2x3x8, GTTYPE_GT3 ) // GT4 DEVICE( ISKL_GT4_DESK_DEVICE_F0_ID, SKL_3x3x8, GTTYPE_GT4 ) DEVICE( ISKL_GT4_DT_DEVICE_F0_ID, SKL_3x3x8, GTTYPE_GT4 ) DEVICE( ISKL_GT4_HALO_MOBL_DEVICE_F0_ID, SKL_3x3x8, GTTYPE_GT4 ) DEVICE( ISKL_GT4_SERV_DEVICE_F0_ID, SKL_3x3x8, GTTYPE_GT4 ) DEVICE( ISKL_GT4_WRK_DEVICE_F0_ID, SKL_3x3x8, GTTYPE_GT4 ) #endif #ifdef SUPPORT_KBL // GT1 DEVICE( IKBL_GT1_DT_DEVICE_F0_ID, KBL_1x2x6, GTTYPE_GT1 ) DEVICE( IKBL_GT1_HALO_DEVICE_F0_ID, KBL_1x2x6, GTTYPE_GT1 ) DEVICE( IKBL_GT1_SERV_DEVICE_F0_ID, KBL_1x2x6, GTTYPE_GT1 ) DEVICE( IKBL_GT1_ULT_DEVICE_F0_ID, KBL_1x2x6, GTTYPE_GT1 ) DEVICE( IKBL_GT1_ULX_DEVICE_F0_ID, KBL_1x3x6, GTTYPE_GT1 ) DEVICE( IKBL_GT1F_HALO_DEVICE_F0_ID, KBL_1x2x6, GTTYPE_GT1 ) // GT1_5 DEVICE( IKBL_GT1_5_ULT_DEVICE_F0_ID, KBL_1x3x6, GTTYPE_GT1_5 ) DEVICE( IKBL_GT1_5_ULX_DEVICE_F0_ID, KBL_1x2x6, GTTYPE_GT1_5 ) // GT2 DEVICE( IKBL_GT2_DT_DEVICE_F0_ID, KBL_1x3x8, GTTYPE_GT2 ) DEVICE( IKBL_GT2_HALO_DEVICE_F0_ID, KBL_1x3x8, GTTYPE_GT2 ) DEVICE( IKBL_GT2_R_ULT_DEVICE_F0_ID, KBL_1x3x8, GTTYPE_GT2 ) DEVICE( IKBL_GT2_SERV_DEVICE_F0_ID, KBL_1x3x8, GTTYPE_GT2 ) DEVICE( IKBL_GT2_ULT_DEVICE_F0_ID, KBL_1x3x8, GTTYPE_GT2 ) DEVICE( IKBL_GT2_ULX_DEVICE_F0_ID, KBL_1x3x8, GTTYPE_GT2 ) DEVICE( IKBL_GT2_WRK_DEVICE_F0_ID, KBL_1x3x8, GTTYPE_GT2 ) DEVICE( IKBL_GT2_R_ULX_DEVICE_F0_ID, KBL_1x3x8, GTTYPE_GT2 ) DEVICE( IKBL_GT2F_ULT_DEVICE_F0_ID, KBL_1x3x8, GTTYPE_GT2 ) // GT3 DEVICE( IKBL_GT3_15W_ULT_DEVICE_F0_ID, KBL_2x3x8, GTTYPE_GT3 ) DEVICE( IKBL_GT3_28W_ULT_DEVICE_F0_ID, KBL_2x3x8, GTTYPE_GT3 ) DEVICE( IKBL_GT3_HALO_DEVICE_F0_ID, KBL_2x3x8, GTTYPE_GT3 ) DEVICE( IKBL_GT3_SERV_DEVICE_F0_ID, KBL_2x3x8, GTTYPE_GT3 ) DEVICE( IKBL_GT3_ULT_DEVICE_F0_ID, KBL_2x3x8, GTTYPE_GT3 ) // GT4 DEVICE( IKBL_GT4_DT_DEVICE_F0_ID, KBL_3x3x8, GTTYPE_GT4 ) DEVICE( IKBL_GT4_HALO_DEVICE_F0_ID, KBL_3x3x8, GTTYPE_GT4 ) DEVICE( IKBL_GT4_SERV_DEVICE_F0_ID, KBL_3x3x8, GTTYPE_GT4 ) DEVICE( IKBL_GT4_WRK_DEVICE_F0_ID, KBL_3x3x8, GTTYPE_GT4 ) #endif #ifdef SUPPORT_CFL // GT1 DEVICE( ICFL_GT1_DT_DEVICE_F0_ID, CFL_1x2x6, GTTYPE_GT1 ) DEVICE( ICFL_GT1_S41_DT_DEVICE_F0_ID, CFL_1x2x6, GTTYPE_GT1 ) DEVICE( ICFL_GT1_S61_DT_DEVICE_F0_ID, CFL_1x2x6, GTTYPE_GT1 ) DEVICE( ICFL_GT1_41F_2F1F_ULT_DEVICE_F0_ID, CFL_1x2x6, GTTYPE_GT1 ) DEVICE( ICFL_GT1_S6_S4_S2_F1F_DT_DEVICE_F0_ID, CFL_1x2x6, GTTYPE_GT1 ) DEVICE( ICFL_GT1_U41F_U2F1F_ULT_DEVICE_F0_ID, CFL_1x2x6, GTTYPE_GT1 ) // GT2 DEVICE( ICFL_GT2_DT_DEVICE_F0_ID, CFL_1x3x8, GTTYPE_GT2 ) DEVICE( ICFL_GT2_HALO_DEVICE_F0_ID, CFL_1x3x8, GTTYPE_GT2 ) DEVICE( ICFL_GT2_HALO_WS_DEVICE_F0_ID, CFL_1x3x8, GTTYPE_GT2 ) DEVICE( ICFL_GT2_S42_DT_DEVICE_F0_ID, CFL_1x3x8, GTTYPE_GT2 ) DEVICE( ICFL_GT2_S62_DT_DEVICE_F0_ID, CFL_1x3x8, GTTYPE_GT2 ) DEVICE( ICFL_GT2_SERV_DEVICE_F0_ID, CFL_1x3x8, GTTYPE_GT2 ) DEVICE( ICFL_GT2_S82_S6F2_DT_DEVICE_F0_ID, CFL_1x3x8, GTTYPE_GT2 ) DEVICE( ICFL_GT2_U42F_U2F1F_ULT_DEVICE_F0_ID, CFL_1x3x8, GTTYPE_GT2 ) DEVICE( ICFL_GT2_U42F_U2F2F_ULT_DEVICE_F0_ID, CFL_1x3x8, GTTYPE_GT2 ) DEVICE( ICFL_GT2_U42F_U2F2_ULT_DEVICE_F0_ID, CFL_1x3x8, GTTYPE_GT2 ) DEVICE( ICFL_GT2_S8_S2_DT_DEVICE_F0_ID, CFL_1x3x8, GTTYPE_GT2 ) // GT3 DEVICE( ICFL_HALO_DEVICE_F0_ID, CFL_2x3x8, GTTYPE_GT3 ) DEVICE( ICFL_GT3_ULT_15W_DEVICE_F0_ID, CFL_2x3x8, GTTYPE_GT3 ) DEVICE( ICFL_GT3_ULT_15W_42EU_DEVICE_F0_ID, CFL_2x3x8, GTTYPE_GT3 ) DEVICE( ICFL_GT3_ULT_28W_DEVICE_F0_ID, CFL_2x3x8, GTTYPE_GT3 ) DEVICE( ICFL_GT3_ULT_DEVICE_F0_ID, CFL_2x3x8, GTTYPE_GT3 ) DEVICE( ICFL_GT3_U43_ULT_DEVICE_F0_ID, CFL_2x3x8, GTTYPE_GT3 ) // CML GT1 DEVICE( ICFL_GT1_ULT_DEVICE_V0_ID, CFL_1x2x6, GTTYPE_GT1 ) DEVICE( ICFL_GT1_ULT_DEVICE_A0_ID, CFL_1x2x6, GTTYPE_GT1 ) DEVICE( ICFL_GT1_ULT_DEVICE_S0_ID, CFL_1x2x6, GTTYPE_GT1 ) DEVICE( ICFL_GT1_ULT_DEVICE_K0_ID, CFL_1x2x6, GTTYPE_GT1 ) DEVICE( ICFL_GT1_ULX_DEVICE_S0_ID, CFL_1x2x6, GTTYPE_GT1 ) DEVICE( ICFL_GT1_DT_DEVICE_P0_ID, CFL_1x2x6, GTTYPE_GT1 ) DEVICE( ICFL_GT1_DT_DEVICE_G0_ID, CFL_1x2x6, GTTYPE_GT1 ) DEVICE( ICFL_GT1_HALO_DEVICE_16_ID, CFL_1x2x6, GTTYPE_GT1 ) DEVICE( ICFL_GT1_HALO_DEVICE_18_ID, CFL_1x2x6, GTTYPE_GT1 ) // CML GT2 DEVICE( ICFL_GT2_ULT_DEVICE_V0_ID, CFL_1x3x8, GTTYPE_GT2 ) DEVICE( ICFL_GT2_ULT_DEVICE_A0_ID, CFL_1x3x8, GTTYPE_GT2 ) DEVICE( ICFL_GT2_ULT_DEVICE_S0_ID, CFL_1x3x8, GTTYPE_GT2 ) DEVICE( ICFL_GT2_ULT_DEVICE_K0_ID, CFL_1x3x8, GTTYPE_GT2 ) DEVICE( ICFL_GT2_ULX_DEVICE_S0_ID, CFL_1x3x8, GTTYPE_GT2 ) DEVICE( ICFL_GT2_DT_DEVICE_P0_ID, CFL_1x3x8, GTTYPE_GT2 ) DEVICE( ICFL_GT2_DT_DEVICE_G0_ID, CFL_1x3x8, GTTYPE_GT2 ) DEVICE( ICFL_GT2_HALO_DEVICE_15_ID, CFL_1x3x8, GTTYPE_GT2 ) DEVICE( ICFL_GT2_HALO_DEVICE_17_ID, CFL_1x3x8, GTTYPE_GT2 ) // CML WORKSTATION DEVICE( ICFL_GT2_WKS_DEVICE_P0_ID, CFL_1x3x8, GTTYPE_GT2 ) DEVICE( ICFL_GT2_WKS_DEVICE_G0_ID, CFL_1x3x8, GTTYPE_GT2 ) #endif #ifdef SUPPORT_GLK DEVICE( IGLK_GT2_ULT_18EU_DEVICE_F0_ID, GLK_1x3x6, GTTYPE_GTA ) DEVICE( IGLK_GT2_ULT_12EU_DEVICE_F0_ID, GLK_1x2x6, GTTYPE_GTA ) #endif #ifdef SUPPORT_BXT DEVICE(IBXT_A_DEVICE_F0_ID, BXT_1x3x6, GTTYPE_GTA) DEVICE(IBXT_C_DEVICE_F0_ID, BXT_1x3x6, GTTYPE_GTA) DEVICE(IBXT_GT_3x6_DEVICE_ID, BXT_1x3x6, GTTYPE_GTA) DEVICE(IBXT_P_3x6_DEVICE_ID, BXT_1x3x6, GTTYPE_GTA) //18EU APL DEVICE(IBXT_P_12EU_3x6_DEVICE_ID, BXT_1x2x6, GTTYPE_GTA) //12EU APL DEVICE(IBXT_PRO_12EU_3x6_DEVICE_ID, BXT_1x2x6, GTTYPE_GTA) //12EU APL DEVICE(IBXT_PRO_3x6_DEVICE_ID, BXT_1x3x6, GTTYPE_GTA) DEVICE(IBXT_X_DEVICE_F0_ID, BXT_1x3x6, GTTYPE_GTA) #endif #endif #ifdef SUPPORT_GEN8 // GT1 DEVICE( IBDW_GT1_DESK_DEVICE_F0_ID, BDW_1x2x6, GTTYPE_GT1 ) DEVICE( IBDW_GT1_HALO_MOBL_DEVICE_F0_ID, BDW_1x2x6, GTTYPE_GT1 ) DEVICE( IBDW_GT1_SERV_DEVICE_F0_ID, BDW_1x2x6, GTTYPE_GT1 ) DEVICE( IBDW_GT1_ULT_MOBL_DEVICE_F0_ID, BDW_1x2x6, GTTYPE_GT1 ) DEVICE( IBDW_GT1_ULX_DEVICE_F0_ID, BDW_1x2x6, GTTYPE_GT1 ) DEVICE( IBDW_GT1_WRK_DEVICE_F0_ID, BDW_1x2x6, GTTYPE_GT1 ) // GT2 DEVICE( IBDW_GT2_DESK_DEVICE_F0_ID, BDW_1x3x8, GTTYPE_GT2 ) DEVICE( IBDW_GT2_HALO_MOBL_DEVICE_F0_ID, BDW_1x3x8, GTTYPE_GT2 ) DEVICE( IBDW_GT2_SERV_DEVICE_F0_ID, BDW_1x3x8, GTTYPE_GT2 ) DEVICE( IBDW_GT2_ULT_MOBL_DEVICE_F0_ID, BDW_1x3x8, GTTYPE_GT2 ) DEVICE( IBDW_GT2_ULX_DEVICE_F0_ID, BDW_1x3x8, GTTYPE_GT2 ) DEVICE( IBDW_GT2_WRK_DEVICE_F0_ID, BDW_1x3x8, GTTYPE_GT2 ) // GT3 DEVICE( IBDW_GT3_DESK_DEVICE_F0_ID, BDW_2x3x8, GTTYPE_GT3 ) DEVICE( IBDW_GT3_HALO_MOBL_DEVICE_F0_ID, BDW_2x3x8, GTTYPE_GT3 ) DEVICE( IBDW_GT3_SERV_DEVICE_F0_ID, BDW_2x3x8, GTTYPE_GT3 ) DEVICE( IBDW_GT3_ULT_MOBL_DEVICE_F0_ID, BDW_2x3x8, GTTYPE_GT3 ) DEVICE( IBDW_GT3_ULT25W_MOBL_DEVICE_F0_ID, BDW_2x3x8, GTTYPE_GT3 ) DEVICE( IBDW_GT3_ULX_DEVICE_F0_ID, BDW_2x3x8, GTTYPE_GT3 ) DEVICE( IBDW_GT3_WRK_DEVICE_F0_ID, BDW_2x3x8, GTTYPE_GT3 ) #endif // clang-format on compute-runtime-20.13.16352/opencl/source/dll/linux/drm_neo_create.cpp000066400000000000000000000076241363734646600255320ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/hw_cmds.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/linux/drm_null_device.h" #include "drm/i915_drm.h" #include #include #include #include namespace NEO { const DeviceDescriptor deviceDescriptorTable[] = { #define DEVICE(devId, gt, gtType) {devId, >::hwInfo, >::setupHardwareInfo, gtType}, #include "devices.inl" #undef DEVICE {0, nullptr, nullptr, GTTYPE_UNDEFINED}}; Drm *Drm::create(std::unique_ptr hwDeviceId, RootDeviceEnvironment &rootDeviceEnvironment) { std::unique_ptr drmObject; if (DebugManager.flags.EnableNullHardware.get() == true) { drmObject.reset(new DrmNullDevice(std::move(hwDeviceId), rootDeviceEnvironment)); } else { drmObject.reset(new Drm(std::move(hwDeviceId), rootDeviceEnvironment)); } // Get HW version (I915_drm.h) int ret = drmObject->getDeviceID(drmObject->deviceId); if (ret != 0) { printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "%s", "FATAL: Cannot query device ID parameter!\n"); return nullptr; } // Get HW Revision (I915_drm.h) ret = drmObject->getDeviceRevID(drmObject->revisionId); if (ret != 0) { printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "%s", "FATAL: Cannot query device Rev ID parameter!\n"); return nullptr; } const DeviceDescriptor *device = nullptr; GTTYPE eGtType = GTTYPE_UNDEFINED; for (auto &d : deviceDescriptorTable) { if (drmObject->deviceId == d.deviceId) { device = &d; eGtType = d.eGtType; break; } } if (device) { ret = drmObject->setupHardwareInfo(const_cast(device), true); if (ret != 0) { return nullptr; } drmObject->setGtType(eGtType); rootDeviceEnvironment.setHwInfo(device->pHwInfo); } else { printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "FATAL: Unknown device: deviceId: %04x, revisionId: %04x\n", drmObject->deviceId, drmObject->revisionId); return nullptr; } // Detect device parameters int hasExecSoftPin = 0; ret = drmObject->getExecSoftPin(hasExecSoftPin); if (ret != 0) { printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "%s", "FATAL: Cannot query Soft Pin parameter!\n"); return nullptr; } if (!hasExecSoftPin) { printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "%s", "FATAL: Device doesn't support Soft-Pin but this is required.\n"); return nullptr; } // Activate the Turbo Boost Frequency feature ret = drmObject->enableTurboBoost(); if (ret != 0) { printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "%s", "WARNING: Failed to request OCL Turbo Boost\n"); } if (!drmObject->queryEngineInfo()) { printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "%s", "WARNING: Failed to query engine info\n"); } if (HwHelper::get(device->pHwInfo->platform.eRenderCoreFamily).getEnableLocalMemory(*device->pHwInfo)) { if (!drmObject->queryMemoryInfo()) { printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "%s", "WARNING: Failed to query memory info\n"); } } return drmObject.release(); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/dll/linux/ocl.exports000066400000000000000000000003451363734646600242540ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ { global: clGetExtensionFunctionAddress; clIcdGetPlatformIDsKHR; clGetPlatformInfo; GTPin_Init; local: *; }; compute-runtime-20.13.16352/opencl/source/dll/linux/options_linux.cpp000066400000000000000000000012531363734646600254660ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_library.h" #include "External/Common/GmmLibDllName.h" #include "igc.opencl.h" namespace Os { // Compiler library names const char *frontEndDllName = FCL_LIBRARY_NAME; const char *igcDllName = IGC_LIBRARY_NAME; const char *libvaDllName = "libva.so.2"; const char *sysFsPciPath = "/sys/bus/pci/devices/"; const char *tbxLibName = "libtbxAccess.so"; // Os specific Metrics Library name #if __x86_64__ || __ppc64__ const char *metricsLibraryDllName = "libigdml64.so"; #else const char *metricsLibraryDllName = "libigdml32.so"; #endif } // namespace Os compute-runtime-20.13.16352/opencl/source/dll/linux/os_interface.cpp000066400000000000000000000007651363734646600252240ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/os_interface.h" #include "shared/source/gmm_helper/gmm_interface.h" #include "shared/source/os_interface/linux/drm_neo.h" namespace NEO { bool OSInterface::osEnableLocalMemory = true; void OSInterface::setGmmInputArgs(void *args) { reinterpret_cast(args)->FileDescriptor = this->get()->getDrm()->getFileDescriptor(); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/dll/source_level_debugger.cpp000066400000000000000000000005121363734646600257450ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/source_level_debugger/source_level_debugger.h" using namespace std; namespace NEO { OsLibrary *SourceLevelDebugger::loadDebugger() { return OsLibrary::load(SourceLevelDebugger::dllName); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/dll/windows/000077500000000000000000000000001363734646600224025ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/dll/windows/OpenCLExports.def.in000066400000000000000000000045721363734646600262040ustar00rootroot00000000000000; Copyright (c) 2017 - 2020, Intel Corporation ; ; Permission is hereby granted, free of charge, to any person obtaining a ; copy of this software and associated documentation files (the "Software"), ; to deal in the Software without restriction, including without limitation ; the rights to use, copy, modify, merge, publish, distribute, sublicense, ; and/or sell copies of the Software, and to permit persons to whom the ; Software is furnished to do so, subject to the following conditions: ; ; The above copyright notice and this permission notice shall be included ; in all copies or substantial portions of the Software. ; ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ; OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ; THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR ; OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ; ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ; OTHER DEALINGS IN THE SOFTWARE. ; ${MSVC_DEF_HEADER} LIBRARY "${MSVC_DEF_LIB_NAME}" EXPORTS clGetPlatformIDs clGetPlatformInfo clGetDeviceIDs clGetDeviceInfo clCreateContext clCreateContextFromType clRetainContext clReleaseContext clGetContextInfo clCreateCommandQueue clRetainCommandQueue clReleaseCommandQueue clGetCommandQueueInfo clCreateBuffer clCreateImage2D clCreateImage3D clRetainMemObject clReleaseMemObject clGetSupportedImageFormats clGetMemObjectInfo clGetImageInfo clCreateSampler clRetainSampler clReleaseSampler clGetSamplerInfo clCreateProgramWithSource clCreateProgramWithBinary clRetainProgram clReleaseProgram clBuildProgram clGetProgramInfo clGetProgramBuildInfo clCreateKernel clCreateKernelsInProgram clRetainKernel clReleaseKernel clSetKernelArg clGetKernelInfo clGetKernelSubGroupInfoKHR clGetKernelWorkGroupInfo clWaitForEvents clGetEventInfo clRetainEvent clReleaseEvent clGetEventProfilingInfo clFlush clFinish clEnqueueReadBuffer clEnqueueWriteBuffer clEnqueueCopyBuffer clEnqueueReadImage clEnqueueWriteImage clEnqueueCopyImage clEnqueueCopyImageToBuffer clEnqueueCopyBufferToImage clEnqueueMapBuffer clEnqueueMapImage clEnqueueUnmapMemObject clEnqueueNDRangeKernel clEnqueueTask clEnqueueNativeKernel clEnqueueMarker clEnqueueWaitForEvents clEnqueueBarrier clGetExtensionFunctionAddress ${MSVC_DEF_ADDITIONAL_EXPORTS} compute-runtime-20.13.16352/opencl/source/dll/windows/create_wddm_memory_manager.cpp000066400000000000000000000012561363734646600304520ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/windows/os_interface.h" #include "shared/source/os_interface/windows/wddm_memory_manager.h" namespace NEO { std::unique_ptr MemoryManager::createMemoryManager(ExecutionEnvironment &executionEnvironment) { return std::make_unique(executionEnvironment); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/dll/windows/options_windows.cpp000066400000000000000000000007451363734646600263610ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "External/Common/GmmLibDllName.h" #include "igc.opencl.h" namespace Os { const char *frontEndDllName = FCL_LIBRARY_NAME; const char *igcDllName = IGC_LIBRARY_NAME; const char *gdiDllName = "gdi32.dll"; // Os specific Metrics Library name #if _WIN64 const char *metricsLibraryDllName = "igdml64.dll"; #else const char *metricsLibraryDllName = "igdml32.dll"; #endif } // namespace Os compute-runtime-20.13.16352/opencl/source/dll/windows/os_interface.cpp000066400000000000000000000007071363734646600255530ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/windows/os_interface.h" #include "shared/source/memory_manager/memory_constants.h" #include "shared/source/os_interface/windows/wddm/wddm.h" namespace NEO { bool OSInterface::osEnableLocalMemory = true; void OSInterface::setGmmInputArgs(void *args) { this->get()->getWddm()->setGmmInputArg(args); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/enable_gens.cmake000066400000000000000000000104131363734646600234000ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_GENX_CPP_WINDOWS windows/command_stream_receiver windows/gmm_callbacks ) set(RUNTIME_SRCS_GENX_CPP_LINUX linux/command_stream_receiver ) set(RUNTIME_SRCS_GENX_H_BASE aub_mapper.h ) set(RUNTIME_SRCS_GENX_CPP_BASE aub_command_stream_receiver aub_mem_dump buffer command_queue command_stream_receiver_simulated_common_hw experimental_command_buffer gpgpu_walker hardware_commands_helper hw_helper hw_info image sampler state_base_address tbx_command_stream_receiver ) macro(macro_for_each_platform) string(TOLOWER ${PLATFORM_IT} PLATFORM_IT_LOWER) foreach(PLATFORM_FILE "hw_cmds_${PLATFORM_IT_LOWER}.h" "hw_info_${PLATFORM_IT_LOWER}.h") if(EXISTS ${CORE_GENX_PREFIX}/${PLATFORM_FILE}) list(APPEND RUNTIME_SRCS_${GEN_TYPE}_H_BASE ${CORE_GENX_PREFIX}/${PLATFORM_FILE}) endif() endforeach() foreach(PLATFORM_FILE "reg_configs.h") if(EXISTS ${GENX_PREFIX}/${PLATFORM_FILE}) list(APPEND RUNTIME_SRCS_${GEN_TYPE}_H_BASE ${GENX_PREFIX}/${PLATFORM_FILE}) endif() endforeach() foreach(PLATFORM_FILE "hw_info_${PLATFORM_IT_LOWER}.inl") list(APPEND RUNTIME_SRCS_${GEN_TYPE}_CPP_BASE ${GENX_PREFIX}/${PLATFORM_FILE}) endforeach() list(APPEND RUNTIME_SRCS_${GEN_TYPE}_CPP_LINUX ${GENX_PREFIX}/linux/hw_info_config_${PLATFORM_IT_LOWER}.inl) endmacro() macro(macro_for_each_gen) set(GENX_PREFIX ${CMAKE_CURRENT_SOURCE_DIR}/${GEN_TYPE_LOWER}) # Add default GEN files foreach(SRC_IT ${RUNTIME_SRCS_GENX_H_BASE}) list(APPEND RUNTIME_SRCS_${GEN_TYPE}_H_BASE ${GENX_PREFIX}/${SRC_IT}) endforeach() foreach(SRC_IT "state_compute_mode_helper_${GEN_TYPE_LOWER}.cpp") if(EXISTS ${GENX_PREFIX}/${SRC_IT}) list(APPEND RUNTIME_SRCS_${GEN_TYPE}_CPP_BASE ${GENX_PREFIX}/${SRC_IT}) endif() endforeach() if(EXISTS "${GENX_PREFIX}/additional_files_${GEN_TYPE_LOWER}.cmake") include("${GENX_PREFIX}/additional_files_${GEN_TYPE_LOWER}.cmake") endif() if(${SUPPORT_DEVICE_ENQUEUE_${GEN_TYPE}}) list(APPEND RUNTIME_SRCS_${GEN_TYPE}_H_BASE ${GENX_PREFIX}/device_enqueue.h) list(APPEND RUNTIME_SRCS_${GEN_TYPE}_H_BASE ${GENX_PREFIX}/scheduler_definitions.h) list(APPEND RUNTIME_SRCS_${GEN_TYPE}_H_BASE ${GENX_PREFIX}/scheduler_builtin_kernel.inl) list(APPEND RUNTIME_SRCS_${GEN_TYPE}_CPP_BASE ${GENX_PREFIX}/device_queue_${GEN_TYPE_LOWER}.cpp) endif() foreach(OS_IT "BASE" "WINDOWS" "LINUX") foreach(SRC_IT ${RUNTIME_SRCS_GENX_CPP_${OS_IT}}) list(APPEND RUNTIME_SRCS_${GEN_TYPE}_CPP_${OS_IT} ${GENX_PREFIX}/${SRC_IT}_${GEN_TYPE_LOWER}.cpp) endforeach() endforeach() apply_macro_for_each_platform() list(APPEND RUNTIME_SRCS_${GEN_TYPE}_CPP_BASE ${NEO_SHARED_DIRECTORY}/${GEN_TYPE_LOWER}/image_core_${GEN_TYPE_LOWER}.cpp) list(APPEND RUNTIME_SRCS_${GEN_TYPE}_CPP_WINDOWS ${GENX_PREFIX}/windows/hw_info_config_${GEN_TYPE_LOWER}.cpp) list(APPEND RUNTIME_SRCS_${GEN_TYPE}_CPP_LINUX ${GENX_PREFIX}/linux/hw_info_config_${GEN_TYPE_LOWER}.cpp) list(APPEND ${GEN_TYPE}_SRC_LINK_BASE ${GENX_PREFIX}/enable_family_full_ocl_${GEN_TYPE_LOWER}.cpp) list(APPEND RUNTIME_SRCS_GENX_ALL_BASE ${RUNTIME_SRCS_${GEN_TYPE}_H_BASE}) list(APPEND RUNTIME_SRCS_GENX_ALL_BASE ${RUNTIME_SRCS_${GEN_TYPE}_CPP_BASE}) list(APPEND HW_SRC_LINK ${${GEN_TYPE}_SRC_LINK_BASE}) list(APPEND RUNTIME_SRCS_GENX_ALL_WINDOWS ${RUNTIME_SRCS_${GEN_TYPE}_CPP_WINDOWS}) list(APPEND RUNTIME_SRCS_GENX_ALL_LINUX ${RUNTIME_SRCS_${GEN_TYPE}_CPP_LINUX}) if(UNIX) list(APPEND HW_SRC_LINK ${${GEN_TYPE}_SRC_LINK_LINUX}) endif() if(NOT DISABLED_GTPIN_SUPPORT) list(APPEND ${GEN_TYPE}_SRC_LINK_BASE ${GENX_PREFIX}/gtpin_setup_${GEN_TYPE_LOWER}.cpp) endif() endmacro() apply_macro_for_each_gen("SUPPORTED") target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_GENX_ALL_BASE}) if(WIN32) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_GENX_ALL_WINDOWS}) else() target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_GENX_ALL_LINUX}) endif() set_property(GLOBAL PROPERTY RUNTIME_SRCS_GENX_ALL_BASE ${RUNTIME_SRCS_GENX_ALL_BASE}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_GENX_ALL_LINUX ${RUNTIME_SRCS_GENX_ALL_LINUX}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_GENX_ALL_WINDOWS ${RUNTIME_SRCS_GENX_ALL_WINDOWS}) compute-runtime-20.13.16352/opencl/source/event/000077500000000000000000000000001363734646600212565ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/event/CMakeLists.txt000066400000000000000000000015151363734646600240200ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_EVENT ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/async_events_handler.h ${CMAKE_CURRENT_SOURCE_DIR}/async_events_handler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp ${CMAKE_CURRENT_SOURCE_DIR}/event.h ${CMAKE_CURRENT_SOURCE_DIR}/event_builder.cpp ${CMAKE_CURRENT_SOURCE_DIR}/event_builder.h ${CMAKE_CURRENT_SOURCE_DIR}/event_tracker.cpp ${CMAKE_CURRENT_SOURCE_DIR}/event_tracker.h ${CMAKE_CURRENT_SOURCE_DIR}/user_event.cpp ${CMAKE_CURRENT_SOURCE_DIR}/user_event.h ${CMAKE_CURRENT_SOURCE_DIR}/hw_timestamps.h ${CMAKE_CURRENT_SOURCE_DIR}/perf_counter.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_EVENT}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_EVENT ${RUNTIME_SRCS_EVENT}) compute-runtime-20.13.16352/opencl/source/event/async_events_handler.cpp000066400000000000000000000061061363734646600261630ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/event/async_events_handler.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/os_interface/os_thread.h" #include "opencl/source/event/event.h" #include namespace NEO { AsyncEventsHandler::AsyncEventsHandler() { allowAsyncProcess = false; registerList.reserve(64); list.reserve(64); pendingList.reserve(64); } AsyncEventsHandler::~AsyncEventsHandler() { closeThread(); } void AsyncEventsHandler::registerEvent(Event *event) { std::unique_lock lock(asyncMtx); //Create on first use openThread(); event->incRefInternal(); registerList.push_back(event); asyncCond.notify_one(); } Event *AsyncEventsHandler::processList() { uint32_t lowestTaskCount = CompletionStamp::levelNotReady; Event *sleepCandidate = nullptr; pendingList.clear(); for (auto event : list) { event->updateExecutionStatus(); if (event->peekHasCallbacks() || (event->isExternallySynchronized() && (event->peekExecutionStatus() > CL_COMPLETE))) { pendingList.push_back(event); if (event->peekTaskCount() < lowestTaskCount) { sleepCandidate = event; lowestTaskCount = event->peekTaskCount(); } } else { event->decRefInternal(); } } list.swap(pendingList); return sleepCandidate; } void *AsyncEventsHandler::asyncProcess(void *arg) { auto self = reinterpret_cast(arg); std::unique_lock lock(self->asyncMtx, std::defer_lock); Event *sleepCandidate = nullptr; while (true) { lock.lock(); self->transferRegisterList(); if (!self->allowAsyncProcess) { self->processList(); self->releaseEvents(); break; } if (self->list.empty()) { self->asyncCond.wait(lock); } lock.unlock(); sleepCandidate = self->processList(); if (sleepCandidate) { sleepCandidate->wait(true, true); } std::this_thread::yield(); } return nullptr; } void AsyncEventsHandler::closeThread() { std::unique_lock lock(asyncMtx); if (allowAsyncProcess) { allowAsyncProcess = false; asyncCond.notify_one(); lock.unlock(); thread.get()->join(); thread.reset(nullptr); } } void AsyncEventsHandler::openThread() { if (!thread.get()) { DEBUG_BREAK_IF(allowAsyncProcess); allowAsyncProcess = true; thread = Thread::create(asyncProcess, reinterpret_cast(this)); } } void AsyncEventsHandler::transferRegisterList() { std::move(registerList.begin(), registerList.end(), std::back_inserter(list)); registerList.clear(); } void AsyncEventsHandler::releaseEvents() { for (auto event : list) { event->decRefInternal(); } list.clear(); UNRECOVERABLE_IF(!registerList.empty()) // transferred before release } } // namespace NEO compute-runtime-20.13.16352/opencl/source/event/async_events_handler.h000066400000000000000000000015601363734646600256270ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include #include #include #include namespace NEO { class Event; class Thread; class AsyncEventsHandler { public: AsyncEventsHandler(); virtual ~AsyncEventsHandler(); void registerEvent(Event *event); void closeThread(); protected: Event *processList(); static void *asyncProcess(void *arg); void releaseEvents(); MOCKABLE_VIRTUAL void openThread(); MOCKABLE_VIRTUAL void transferRegisterList(); std::vector registerList; std::vector list; std::vector pendingList; std::unique_ptr thread; std::mutex asyncMtx; std::condition_variable asyncCond; std::atomic allowAsyncProcess; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/event/event.cpp000066400000000000000000000623151363734646600231120ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/event/event.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/utilities/range.h" #include "shared/source/utilities/stackvec.h" #include "shared/source/utilities/tag_allocator.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/api/cl_types.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/event/async_events_handler.h" #include "opencl/source/event/event_tracker.h" #include "opencl/source/helpers/get_info_status_mapper.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/mem_obj.h" #include "opencl/source/platform/platform.h" #define OCLRT_NUM_TIMESTAMP_BITS (32) namespace NEO { Event::Event( Context *ctx, CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount) : taskLevel(taskLevel), currentCmdQVirtualEvent(false), cmdToSubmit(nullptr), submittedCmd(nullptr), ctx(ctx), cmdQueue(cmdQueue), cmdType(cmdType), dataCalculated(false), taskCount(taskCount) { if (NEO::DebugManager.flags.EventsTrackerEnable.get()) { EventsTracker::getEventsTracker().notifyCreation(this); } parentCount = 0; executionStatus = CL_QUEUED; flushStamp.reset(new FlushStampTracker(true)); DBG_LOG(EventsDebugEnable, "Event()", this); // Event can live longer than command queue that created it, // hence command queue refCount must be incremented // non-null command queue is only passed when Base Event object is created // any other Event types must increment refcount when setting command queue if (cmdQueue != nullptr) { cmdQueue->incRefInternal(); } if ((this->ctx == nullptr) && (cmdQueue != nullptr)) { this->ctx = &cmdQueue->getContext(); if (cmdQueue->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { timestampPacketContainer = std::make_unique(); } } if (this->ctx != nullptr) { this->ctx->incRefInternal(); } queueTimeStamp = {0, 0}; submitTimeStamp = {0, 0}; startTimeStamp = 0; endTimeStamp = 0; completeTimeStamp = 0; profilingEnabled = !isUserEvent() && (cmdQueue ? cmdQueue->getCommandQueueProperties() & CL_QUEUE_PROFILING_ENABLE : false); profilingCpuPath = ((cmdType == CL_COMMAND_MAP_BUFFER) || (cmdType == CL_COMMAND_MAP_IMAGE)) && profilingEnabled; perfCountersEnabled = cmdQueue ? cmdQueue->isPerfCountersEnabled() : false; } Event::Event( CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount) : Event(nullptr, cmdQueue, cmdType, taskLevel, taskCount) { } Event::~Event() { if (NEO::DebugManager.flags.EventsTrackerEnable.get()) { EventsTracker::getEventsTracker().notifyDestruction(this); } DBG_LOG(EventsDebugEnable, "~Event()", this); //no commands should be registred DEBUG_BREAK_IF(this->cmdToSubmit.load()); submitCommand(true); int32_t lastStatus = executionStatus; if (isStatusCompleted(lastStatus) == false) { transitionExecutionStatus(-1); DEBUG_BREAK_IF(peekHasCallbacks() || peekHasChildEvents()); } // Note from OCL spec: // "All callbacks registered for an event object must be called. // All enqueued callbacks shall be called before the event object is destroyed." if (peekHasCallbacks()) { executeCallbacks(lastStatus); } { // clean-up submitted command if needed std::unique_ptr submittedCommand(submittedCmd.exchange(nullptr)); } if (cmdQueue != nullptr) { if (timeStampNode != nullptr) { timeStampNode->returnTag(); } if (perfCounterNode != nullptr) { perfCounterNode->returnTag(); } cmdQueue->decRefInternal(); } if (ctx != nullptr) { ctx->decRefInternal(); } // in case event did not unblock child events before unblockEventsBlockedByThis(executionStatus); } cl_int Event::getEventProfilingInfo(cl_profiling_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { cl_int retVal; const void *src = nullptr; size_t srcSize = 0; // CL_PROFILING_INFO_NOT_AVAILABLE if event refers to the clEnqueueSVMFree command if (isUserEvent() != CL_FALSE || // or is a user event object. !updateStatusAndCheckCompletion() || //if the execution status of the command identified by event is not CL_COMPLETE !profilingEnabled) // the CL_QUEUE_PROFILING_ENABLE flag is not set for the command-queue, { return CL_PROFILING_INFO_NOT_AVAILABLE; } // if paramValue is NULL, it is ignored switch (paramName) { case CL_PROFILING_COMMAND_QUEUED: src = &queueTimeStamp.CPUTimeinNS; if (DebugManager.flags.ReturnRawGpuTimestamps.get()) { src = &queueTimeStamp.GPUTimeStamp; } srcSize = sizeof(cl_ulong); break; case CL_PROFILING_COMMAND_SUBMIT: src = &submitTimeStamp.CPUTimeinNS; if (DebugManager.flags.ReturnRawGpuTimestamps.get()) { src = &submitTimeStamp.GPUTimeStamp; } srcSize = sizeof(cl_ulong); break; case CL_PROFILING_COMMAND_START: calcProfilingData(); src = &startTimeStamp; srcSize = sizeof(cl_ulong); break; case CL_PROFILING_COMMAND_END: calcProfilingData(); src = &endTimeStamp; srcSize = sizeof(cl_ulong); break; case CL_PROFILING_COMMAND_COMPLETE: calcProfilingData(); src = &completeTimeStamp; srcSize = sizeof(cl_ulong); break; case CL_PROFILING_COMMAND_PERFCOUNTERS_INTEL: if (!perfCountersEnabled) { return CL_INVALID_VALUE; } if (!cmdQueue->getPerfCounters()->getApiReport(paramValueSize, paramValue, paramValueSizeRet, updateStatusAndCheckCompletion())) { return CL_PROFILING_INFO_NOT_AVAILABLE; } return CL_SUCCESS; default: return CL_INVALID_VALUE; } retVal = changeGetInfoStatusToCLResultType(::getInfo(paramValue, paramValueSize, src, srcSize)); if (paramValueSizeRet) { *paramValueSizeRet = srcSize; } return retVal; } // namespace NEO uint32_t Event::getCompletionStamp() const { return this->taskCount; } void Event::updateCompletionStamp(uint32_t taskCount, uint32_t tasklevel, FlushStamp flushStamp) { this->taskCount = taskCount; this->taskLevel = tasklevel; this->flushStamp->setStamp(flushStamp); } cl_ulong Event::getDelta(cl_ulong startTime, cl_ulong endTime) { cl_ulong Max = maxNBitValue(OCLRT_NUM_TIMESTAMP_BITS); cl_ulong Delta = 0; startTime &= Max; endTime &= Max; if (startTime > endTime) { Delta = Max - startTime; Delta += endTime; } else { Delta = endTime - startTime; } return Delta; } bool Event::calcProfilingData() { if (!dataCalculated && !profilingCpuPath) { if (timestampPacketContainer && timestampPacketContainer->peekNodes().size() > 0) { const auto timestamps = timestampPacketContainer->peekNodes(); auto contextStartTS = timestamps[0]->tagForCpuAccess->packets[0].contextStart; uint64_t contextEndTS = timestamps[0]->tagForCpuAccess->packets[0].contextEnd; auto globalStartTS = timestamps[0]->tagForCpuAccess->packets[0].globalStart; for (const auto ×tamp : timestamps) { const auto &packet = timestamp->tagForCpuAccess->packets[0]; if (contextStartTS > packet.contextStart) { contextStartTS = packet.contextStart; } if (contextEndTS < packet.contextEnd) { contextEndTS = packet.contextEnd; } if (globalStartTS > packet.globalStart) { globalStartTS = packet.globalStart; } } calculateProfilingDataInternal(contextStartTS, contextEndTS, &contextEndTS, globalStartTS); } else if (timeStampNode) { calculateProfilingDataInternal( timeStampNode->tagForCpuAccess->ContextStartTS, timeStampNode->tagForCpuAccess->ContextEndTS, &timeStampNode->tagForCpuAccess->ContextCompleteTS, timeStampNode->tagForCpuAccess->GlobalStartTS); } } return dataCalculated; } void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS) { uint64_t gpuDuration = 0; uint64_t cpuDuration = 0; uint64_t gpuCompleteDuration = 0; uint64_t cpuCompleteDuration = 0; double frequency = cmdQueue->getDevice().getDeviceInfo().profilingTimerResolution; int64_t c0 = queueTimeStamp.CPUTimeinNS - static_cast(queueTimeStamp.GPUTimeStamp * frequency); /* calculation based on equation CpuTime = GpuTime * scalar + const( == c0) scalar = DeltaCpu( == dCpu) / DeltaGpu( == dGpu) to determine the value of the const we can use one pair of values const = CpuTimeQueue - GpuTimeQueue * scalar */ //If device enqueue has not updated complete timestamp, assign end timestamp gpuDuration = getDelta(contextStartTS, contextEndTS); if (*contextCompleteTS == 0) { *contextCompleteTS = contextEndTS; gpuCompleteDuration = gpuDuration; } else { gpuCompleteDuration = getDelta(contextStartTS, *contextCompleteTS); } cpuDuration = static_cast(gpuDuration * frequency); cpuCompleteDuration = static_cast(gpuCompleteDuration * frequency); startTimeStamp = static_cast(globalStartTS * frequency) + c0; endTimeStamp = startTimeStamp + cpuDuration; completeTimeStamp = startTimeStamp + cpuCompleteDuration; if (DebugManager.flags.ReturnRawGpuTimestamps.get()) { startTimeStamp = contextStartTS; endTimeStamp = contextEndTS; completeTimeStamp = *contextCompleteTS; } dataCalculated = true; } inline bool Event::wait(bool blocking, bool useQuickKmdSleep) { while (this->taskCount == CompletionStamp::levelNotReady) { if (blocking == false) { return false; } } cmdQueue->waitUntilComplete(taskCount.load(), flushStamp->peekStamp(), useQuickKmdSleep); updateExecutionStatus(); DEBUG_BREAK_IF(this->taskLevel == CompletionStamp::levelNotReady && this->executionStatus >= 0); auto *allocationStorage = cmdQueue->getGpgpuCommandStreamReceiver().getInternalAllocationStorage(); allocationStorage->cleanAllocationList(this->taskCount, TEMPORARY_ALLOCATION); return true; } void Event::updateExecutionStatus() { if (taskLevel == CompletionStamp::levelNotReady) { return; } int32_t statusSnapshot = executionStatus; if (isStatusCompleted(statusSnapshot)) { executeCallbacks(statusSnapshot); return; } if (peekIsBlocked()) { transitionExecutionStatus(CL_QUEUED); executeCallbacks(CL_QUEUED); return; } if (statusSnapshot == CL_QUEUED) { bool abortBlockedTasks = isStatusCompletedByTermination(statusSnapshot); submitCommand(abortBlockedTasks); transitionExecutionStatus(CL_SUBMITTED); executeCallbacks(CL_SUBMITTED); unblockEventsBlockedByThis(CL_SUBMITTED); // Note : Intentional fallthrough (no return) to check for CL_COMPLETE } if ((cmdQueue != nullptr) && (cmdQueue->isCompleted(getCompletionStamp()))) { transitionExecutionStatus(CL_COMPLETE); executeCallbacks(CL_COMPLETE); unblockEventsBlockedByThis(CL_COMPLETE); auto *allocationStorage = cmdQueue->getGpgpuCommandStreamReceiver().getInternalAllocationStorage(); allocationStorage->cleanAllocationList(this->taskCount, TEMPORARY_ALLOCATION); return; } transitionExecutionStatus(CL_SUBMITTED); } void Event::addChild(Event &childEvent) { childEvent.parentCount++; childEvent.incRefInternal(); childEventsToNotify.pushRefFrontOne(childEvent); DBG_LOG(EventsDebugEnable, "addChild: Parent event:", this, "child:", &childEvent); if (DebugManager.flags.TrackParentEvents.get()) { childEvent.parentEvents.push_back(this); } if (executionStatus == CL_COMPLETE) { unblockEventsBlockedByThis(CL_COMPLETE); } } void Event::unblockEventsBlockedByThis(int32_t transitionStatus) { int32_t status = transitionStatus; (void)status; DEBUG_BREAK_IF(!(isStatusCompleted(status) || (peekIsSubmitted(status)))); uint32_t taskLevelToPropagate = CompletionStamp::levelNotReady; if (isStatusCompletedByTermination(transitionStatus) == false) { //if we are event on top of the tree , obtain taskLevel from CSR if (taskLevel == CompletionStamp::levelNotReady) { this->taskLevel = getTaskLevel(); // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) taskLevelToPropagate = this->taskLevel; } else { taskLevelToPropagate = taskLevel + 1; } } auto childEventRef = childEventsToNotify.detachNodes(); while (childEventRef != nullptr) { auto childEvent = childEventRef->ref; childEvent->unblockEventBy(*this, taskLevelToPropagate, transitionStatus); childEvent->decRefInternal(); auto next = childEventRef->next; delete childEventRef; childEventRef = next; } } bool Event::setStatus(cl_int status) { int32_t prevStatus = executionStatus; DBG_LOG(EventsDebugEnable, "setStatus event", this, " new status", status, "previousStatus", prevStatus); if (isStatusCompleted(prevStatus)) { return false; } if (status == prevStatus) { return false; } if (peekIsBlocked() && (isStatusCompletedByTermination(status) == false)) { return false; } if ((status == CL_SUBMITTED) || (isStatusCompleted(status))) { bool abortBlockedTasks = isStatusCompletedByTermination(status); submitCommand(abortBlockedTasks); } this->incRefInternal(); transitionExecutionStatus(status); if (isStatusCompleted(status) || (status == CL_SUBMITTED)) { unblockEventsBlockedByThis(status); } executeCallbacks(status); this->decRefInternal(); return true; } void Event::transitionExecutionStatus(int32_t newExecutionStatus) const { int32_t prevStatus = executionStatus; DBG_LOG(EventsDebugEnable, "transitionExecutionStatus event", this, " new status", newExecutionStatus, "previousStatus", prevStatus); while (prevStatus > newExecutionStatus) { executionStatus.compare_exchange_weak(prevStatus, newExecutionStatus); } if (NEO::DebugManager.flags.EventsTrackerEnable.get()) { EventsTracker::getEventsTracker().notifyTransitionedExecutionStatus(); } } void Event::submitCommand(bool abortTasks) { std::unique_ptr cmdToProcess(cmdToSubmit.exchange(nullptr)); if (cmdToProcess.get() != nullptr) { std::unique_lock lockCSR; if (this->cmdQueue) { lockCSR = this->getCommandQueue()->getGpgpuCommandStreamReceiver().obtainUniqueOwnership(); } if ((this->isProfilingEnabled()) && (this->cmdQueue != nullptr)) { if (timeStampNode) { this->cmdQueue->getGpgpuCommandStreamReceiver().makeResident(*timeStampNode->getBaseGraphicsAllocation()); cmdToProcess->timestamp = timeStampNode; } if (profilingCpuPath) { setSubmitTimeStamp(); setStartTimeStamp(); } else { this->cmdQueue->getDevice().getOSTime()->getCpuGpuTime(&submitTimeStamp); } if (perfCountersEnabled && perfCounterNode) { this->cmdQueue->getGpgpuCommandStreamReceiver().makeResident(*perfCounterNode->getBaseGraphicsAllocation()); } } auto &complStamp = cmdToProcess->submit(taskLevel, abortTasks); if (profilingCpuPath && this->isProfilingEnabled() && (this->cmdQueue != nullptr)) { setEndTimeStamp(); } updateTaskCount(complStamp.taskCount); flushStamp->setStamp(complStamp.flushStamp); submittedCmd.exchange(cmdToProcess.release()); } else if (profilingCpuPath && endTimeStamp == 0) { setEndTimeStamp(); } if (this->taskCount == CompletionStamp::levelNotReady) { if (!this->isUserEvent() && this->eventWithoutCommand) { if (this->cmdQueue) { auto lockCSR = this->getCommandQueue()->getGpgpuCommandStreamReceiver().obtainUniqueOwnership(); updateTaskCount(this->cmdQueue->getGpgpuCommandStreamReceiver().peekTaskCount()); } } //make sure that task count is synchronized for events with kernels if (!this->eventWithoutCommand && !abortTasks) { this->synchronizeTaskCount(); } } } cl_int Event::waitForEvents(cl_uint numEvents, const cl_event *eventList) { if (numEvents == 0) { return CL_SUCCESS; } //flush all command queues for (const cl_event *it = eventList, *end = eventList + numEvents; it != end; ++it) { Event *event = castToObjectOrAbort(*it); if (event->cmdQueue) { if (event->taskLevel != CompletionStamp::levelNotReady) { event->cmdQueue->flush(); } } } using WorkerListT = StackVec; WorkerListT workerList1(eventList, eventList + numEvents); WorkerListT workerList2; workerList2.reserve(numEvents); // pointers to workerLists - for fast swap operations WorkerListT *currentlyPendingEvents = &workerList1; WorkerListT *pendingEventsLeft = &workerList2; while (currentlyPendingEvents->size() > 0) { for (auto &e : *currentlyPendingEvents) { Event *event = castToObjectOrAbort(e); if (event->peekExecutionStatus() < CL_COMPLETE) { return CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST; } if (event->wait(false, false) == false) { pendingEventsLeft->push_back(event); } } std::swap(currentlyPendingEvents, pendingEventsLeft); pendingEventsLeft->clear(); } return CL_SUCCESS; } uint32_t Event::getTaskLevel() { return taskLevel; } inline void Event::unblockEventBy(Event &event, uint32_t taskLevel, int32_t transitionStatus) { int32_t numEventsBlockingThis = --parentCount; DEBUG_BREAK_IF(numEventsBlockingThis < 0); int32_t blockerStatus = transitionStatus; DEBUG_BREAK_IF(!(isStatusCompleted(blockerStatus) || peekIsSubmitted(blockerStatus))); if ((numEventsBlockingThis > 0) && (isStatusCompletedByTermination(blockerStatus) == false)) { return; } DBG_LOG(EventsDebugEnable, "Event", this, "is unblocked by", &event); if (this->taskLevel == CompletionStamp::levelNotReady) { this->taskLevel = std::max(cmdQueue->getGpgpuCommandStreamReceiver().peekTaskLevel(), taskLevel); } else { this->taskLevel = std::max(this->taskLevel.load(), taskLevel); } int32_t statusToPropagate = CL_SUBMITTED; if (isStatusCompletedByTermination(blockerStatus)) { statusToPropagate = blockerStatus; } setStatus(statusToPropagate); //event may be completed after this operation, transtition the state to not block others. this->updateExecutionStatus(); } bool Event::updateStatusAndCheckCompletion() { auto currentStatus = updateEventAndReturnCurrentStatus(); return isStatusCompleted(currentStatus); } bool Event::isReadyForSubmission() { return taskLevel != CompletionStamp::levelNotReady ? true : false; } void Event::addCallback(Callback::ClbFuncT fn, cl_int type, void *data) { ECallbackTarget target = translateToCallbackTarget(type); if (target == ECallbackTarget::Invalid) { DEBUG_BREAK_IF(true); return; } incRefInternal(); // Note from spec : // "All callbacks registered for an event object must be called. // All enqueued callbacks shall be called before the event object is destroyed." // That's why each registered calback increments the internal refcount incRefInternal(); DBG_LOG(EventsDebugEnable, "event", this, "addCallback", "ECallbackTarget", (uint32_t)type); callbacks[(uint32_t)target].pushFrontOne(*new Callback(this, fn, type, data)); // Callback added after event reached its "completed" state if (updateStatusAndCheckCompletion()) { int32_t status = executionStatus; DBG_LOG(EventsDebugEnable, "event", this, "addCallback executing callbacks with status", status); executeCallbacks(status); } if (peekHasCallbacks() && !isUserEvent() && DebugManager.flags.EnableAsyncEventsHandler.get()) { platformsImpl[0]->getAsyncEventsHandler()->registerEvent(this); } decRefInternal(); } void Event::executeCallbacks(int32_t executionStatusIn) { int32_t execStatus = executionStatusIn; bool terminated = isStatusCompletedByTermination(execStatus); ECallbackTarget target; if (terminated) { target = ECallbackTarget::Completed; } else { target = translateToCallbackTarget(execStatus); if (target == ECallbackTarget::Invalid) { DEBUG_BREAK_IF(true); return; } } // run through all needed callback targets and execute callbacks for (uint32_t i = 0; i <= (uint32_t)target; ++i) { auto cb = callbacks[i].detachNodes(); auto curr = cb; while (curr != nullptr) { auto next = curr->next; if (terminated) { curr->overrideCallbackExecutionStatusTarget(execStatus); } DBG_LOG(EventsDebugEnable, "event", this, "executing callback", "ECallbackTarget", (uint32_t)target); curr->execute(); decRefInternal(); delete curr; curr = next; } } } void Event::tryFlushEvent() { //only if event is not completed, completed event has already been flushed if (cmdQueue && updateStatusAndCheckCompletion() == false) { //flush the command queue only if it is not blocked event if (taskLevel != CompletionStamp::levelNotReady) { cmdQueue->getGpgpuCommandStreamReceiver().flushBatchedSubmissions(); } } } void Event::setQueueTimeStamp() { if (this->profilingEnabled && (this->cmdQueue != nullptr)) { this->cmdQueue->getDevice().getOSTime()->getCpuTime(&queueTimeStamp.CPUTimeinNS); } } void Event::setSubmitTimeStamp() { if (this->profilingEnabled && (this->cmdQueue != nullptr)) { this->cmdQueue->getDevice().getOSTime()->getCpuTime(&submitTimeStamp.CPUTimeinNS); } } void Event::setStartTimeStamp() { if (this->profilingEnabled && (this->cmdQueue != nullptr)) { this->cmdQueue->getDevice().getOSTime()->getCpuTime(&startTimeStamp); } } void Event::setEndTimeStamp() { if (this->profilingEnabled && (this->cmdQueue != nullptr)) { this->cmdQueue->getDevice().getOSTime()->getCpuTime(&endTimeStamp); completeTimeStamp = endTimeStamp; } } TagNode *Event::getHwTimeStampNode() { if (!timeStampNode) { timeStampNode = cmdQueue->getGpgpuCommandStreamReceiver().getEventTsAllocator()->getTag(); } return timeStampNode; } TagNode *Event::getHwPerfCounterNode() { if (!perfCounterNode && cmdQueue->getPerfCounters()) { const uint32_t gpuReportSize = cmdQueue->getPerfCounters()->getGpuReportSize(); perfCounterNode = cmdQueue->getGpgpuCommandStreamReceiver().getEventPerfCountAllocator(gpuReportSize)->getTag(); } return perfCounterNode; } void Event::addTimestampPacketNodes(const TimestampPacketContainer &inputTimestampPacketContainer) { timestampPacketContainer->assignAndIncrementNodesRefCounts(inputTimestampPacketContainer); } TimestampPacketContainer *Event::getTimestampPacketNodes() const { return timestampPacketContainer.get(); } bool Event::checkUserEventDependencies(cl_uint numEventsInWaitList, const cl_event *eventWaitList) { bool userEventsDependencies = false; for (uint32_t i = 0; i < numEventsInWaitList; i++) { auto event = castToObjectOrAbort(eventWaitList[i]); if (!event->isReadyForSubmission()) { userEventsDependencies = true; break; } } return userEventsDependencies; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/event/event.h000066400000000000000000000271261363734646600225600ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/flush_stamp.h" #include "shared/source/os_interface/os_time.h" #include "shared/source/utilities/arrayref.h" #include "shared/source/utilities/idlist.h" #include "shared/source/utilities/iflist.h" #include "opencl/source/api/cl_types.h" #include "opencl/source/event/hw_timestamps.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/helpers/task_information.h" #include "opencl/source/os_interface/performance_counters.h" #include #include #include namespace NEO { template struct TagNode; class CommandQueue; class Context; class Device; class TimestampPacketContainer; template <> struct OpenCLObjectMapper<_cl_event> { typedef class Event DerivedType; }; class Event : public BaseObject<_cl_event>, public IDNode { public: enum class ECallbackTarget : uint32_t { Queued = 0, Submitted, Running, Completed, MAX, Invalid }; struct Callback : public IFNode { typedef void(CL_CALLBACK *ClbFuncT)(cl_event, cl_int, void *); Callback(cl_event event, ClbFuncT clb, cl_int type, void *data) : event(event), callbackFunction(clb), callbackExecutionStatusTarget(type), userData(data) { } void execute() { callbackFunction(event, callbackExecutionStatusTarget, userData); } int32_t getCallbackExecutionStatusTarget() const { return callbackExecutionStatusTarget; } // From OCL spec : // "If the callback is called as the result of the command associated with // event being abnormally terminated, an appropriate error code for the error that caused // the termination will be passed to event_command_exec_status instead." // This function allows to override this value void overrideCallbackExecutionStatusTarget(int32_t newCallbackExecutionStatusTarget) { DEBUG_BREAK_IF(newCallbackExecutionStatusTarget >= 0); callbackExecutionStatusTarget = newCallbackExecutionStatusTarget; } private: cl_event event; ClbFuncT callbackFunction; int32_t callbackExecutionStatusTarget; // minimum event execution status that will triger this callback void *userData; }; static const cl_ulong objectMagic = 0x80134213A43C981ALL; Event(CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount); Event(const Event &) = delete; Event &operator=(const Event &) = delete; ~Event() override; uint32_t getCompletionStamp(void) const; void updateCompletionStamp(uint32_t taskCount, uint32_t tasklevel, FlushStamp flushStamp); cl_ulong getDelta(cl_ulong startTime, cl_ulong endTime); void setCPUProfilingPath(bool isCPUPath) { this->profilingCpuPath = isCPUPath; } bool isCPUProfilingPath() const { return profilingCpuPath; } cl_int getEventProfilingInfo(cl_profiling_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); bool isProfilingEnabled() const { return profilingEnabled; } void setProfilingEnabled(bool profilingEnabled) { this->profilingEnabled = profilingEnabled; } TagNode *getHwTimeStampNode(); void addTimestampPacketNodes(const TimestampPacketContainer &inputTimestampPacketContainer); TimestampPacketContainer *getTimestampPacketNodes() const; bool isPerfCountersEnabled() const { return perfCountersEnabled; } void setPerfCountersEnabled(bool perfCountersEnabled) { this->perfCountersEnabled = perfCountersEnabled; } TagNode *getHwPerfCounterNode(); std::unique_ptr flushStamp; std::atomic taskLevel; void addChild(Event &e); virtual bool setStatus(cl_int status); static cl_int waitForEvents(cl_uint numEvents, const cl_event *eventList); void setCommand(std::unique_ptr newCmd) { UNRECOVERABLE_IF(cmdToSubmit.load()); cmdToSubmit.exchange(newCmd.release()); eventWithoutCommand = false; } Command *peekCommand() { return cmdToSubmit; } IFNodeRef *peekChildEvents() { return childEventsToNotify.peekHead(); } bool peekHasChildEvents() { return (peekChildEvents() != nullptr); } bool peekHasCallbacks(ECallbackTarget target) { if (target >= ECallbackTarget::MAX) { DEBUG_BREAK_IF(true); return false; } return (callbacks[(uint32_t)target].peekHead() != nullptr); } bool peekHasCallbacks() { for (uint32_t i = 0; i < (uint32_t)ECallbackTarget::MAX; ++i) { if (peekHasCallbacks((ECallbackTarget)i)) { return true; } } return false; } // return the number of events that are blocking this event uint32_t peekNumEventsBlockingThis() const { return parentCount; } // returns true if event is completed (in terms of definition provided by OCL spec) // Note from OLC spec : // "A command is considered complete if its execution status // is CL_COMPLETE or a negative value." bool isStatusCompleted(const int32_t executionStatusSnapshot) { return executionStatusSnapshot <= CL_COMPLETE; } bool updateStatusAndCheckCompletion(); // Note from OCL spec : // "A negative integer value causes all enqueued commands that wait on this user event // to be terminated." bool isStatusCompletedByTermination(const int32_t executionStatusSnapshot) const { return executionStatusSnapshot < 0; } bool peekIsSubmitted(const int32_t executionStatusSnapshot) const { return executionStatusSnapshot == CL_SUBMITTED; } bool peekIsCmdSubmitted() { return submittedCmd != nullptr; } //commands blocked by user event depencies bool isReadyForSubmission(); // adds a callback (execution state change listener) to this event's list of callbacks void addCallback(Callback::ClbFuncT fn, cl_int type, void *data); //returns true on success //if(blocking==false), will return with false instead of blocking while waiting for completion virtual bool wait(bool blocking, bool useQuickKmdSleep); bool isUserEvent() const { return (CL_COMMAND_USER == cmdType); } bool isEventWithoutCommand() const { return eventWithoutCommand; } Context *getContext() { return ctx; } CommandQueue *getCommandQueue() { return cmdQueue; } cl_command_type getCommandType() { return cmdType; } virtual uint32_t getTaskLevel(); cl_int peekExecutionStatus() const { return executionStatus; } cl_int updateEventAndReturnCurrentStatus() { updateExecutionStatus(); return executionStatus; } bool peekIsBlocked() const { return (peekNumEventsBlockingThis() > 0); } virtual void unblockEventBy(Event &event, uint32_t taskLevel, int32_t transitionStatus); void updateTaskCount(uint32_t taskCount) { if (taskCount == CompletionStamp::levelNotReady) { DEBUG_BREAK_IF(true); return; } uint32_t prevTaskCount = this->taskCount.exchange(taskCount); if ((prevTaskCount != CompletionStamp::levelNotReady) && (prevTaskCount > taskCount)) { this->taskCount = prevTaskCount; DEBUG_BREAK_IF(true); } } bool isCurrentCmdQVirtualEvent() { return currentCmdQVirtualEvent; } void setCurrentCmdQVirtualEvent(bool isCurrentVirtualEvent) { currentCmdQVirtualEvent = isCurrentVirtualEvent; } virtual void updateExecutionStatus(); void tryFlushEvent(); uint32_t peekTaskCount() const { return this->taskCount; } void setQueueTimeStamp(TimeStampData *queueTimeStamp) { this->queueTimeStamp = *queueTimeStamp; }; void setSubmitTimeStamp(TimeStampData *submitTimeStamp) { this->submitTimeStamp = *submitTimeStamp; }; void setQueueTimeStamp(); void setSubmitTimeStamp(); void setStartTimeStamp(); void setEndTimeStamp(); void setCmdType(uint32_t cmdType) { this->cmdType = cmdType; } std::vector &getParentEvents() { return this->parentEvents; } virtual bool isExternallySynchronized() const { return false; } static bool checkUserEventDependencies(cl_uint numEventsInWaitList, const cl_event *eventWaitList); protected: Event(Context *ctx, CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount); ECallbackTarget translateToCallbackTarget(cl_int execStatus) { switch (execStatus) { default: { DEBUG_BREAK_IF(true); return ECallbackTarget::Invalid; } case CL_QUEUED: return ECallbackTarget::Queued; case CL_SUBMITTED: return ECallbackTarget::Submitted; case CL_RUNNING: return ECallbackTarget::Running; case CL_COMPLETE: return ECallbackTarget::Completed; } } bool calcProfilingData(); MOCKABLE_VIRTUAL void calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS); MOCKABLE_VIRTUAL void synchronizeTaskCount() { while (this->taskCount == CompletionStamp::levelNotReady) ; }; // executes all callbacks associated with this event void executeCallbacks(int32_t executionStatus); // transitions event to new execution state // guarantees that newStatus <= oldStatus void transitionExecutionStatus(int32_t newExecutionStatus) const; //vector storing events that needs to be notified when this event is ready to go IFRefList childEventsToNotify; void unblockEventsBlockedByThis(int32_t transitionStatus); void submitCommand(bool abortBlockedTasks); bool currentCmdQVirtualEvent; std::atomic cmdToSubmit; std::atomic submittedCmd; bool eventWithoutCommand = true; Context *ctx; CommandQueue *cmdQueue; cl_command_type cmdType; // callbacks to be executed when this event changes its execution state IFList callbacks[(uint32_t)ECallbackTarget::MAX]; // can be accessed only with transitionExecutionState // this is to ensure state consitency event when doning lock-free multithreading // e.g. CL_COMPLETE -> CL_SUBMITTED or CL_SUBMITTED -> CL_QUEUED becomes forbiden mutable std::atomic executionStatus; // Timestamps bool profilingEnabled; bool profilingCpuPath; bool dataCalculated; TimeStampData queueTimeStamp; TimeStampData submitTimeStamp; uint64_t startTimeStamp; uint64_t endTimeStamp; uint64_t completeTimeStamp; bool perfCountersEnabled; TagNode *timeStampNode = nullptr; TagNode *perfCounterNode = nullptr; std::unique_ptr timestampPacketContainer; //number of events this event depends on std::atomic parentCount; //event parents std::vector parentEvents; private: // can be accessed only with updateTaskCount std::atomic taskCount; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/event/event_builder.cpp000066400000000000000000000037241363734646600246170ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/event/event_builder.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/timestamp_packet.h" #include "opencl/source/api/cl_types.h" #include "opencl/source/context/context.h" #include "opencl/source/event/user_event.h" namespace NEO { EventBuilder::~EventBuilder() { UNRECOVERABLE_IF((this->event == nullptr) && ((parentEvents.size() != 0U))); finalize(); } void EventBuilder::addParentEvent(Event &newParentEvent) { bool duplicate = false; for (Event *parent : parentEvents) { if (parent == &newParentEvent) { duplicate = true; break; } } if (!duplicate) { newParentEvent.incRefInternal(); parentEvents.push_back(&newParentEvent); } } void EventBuilder::addParentEvents(ArrayRef newParentEvents) { for (cl_event clEv : newParentEvents) { auto neoEv = castToObject(clEv); DEBUG_BREAK_IF(neoEv == nullptr); addParentEvent(neoEv); } } void EventBuilder::finalize() { if ((this->event == nullptr) || finalized) { clear(); return; } if (parentEvents.size() != 0) { UserEvent sentinel; sentinel.addChild(*this->event); for (Event *parent : parentEvents) { //do not add as child if: //parent has no parents and is not blocked if (!(parent->peekIsBlocked() == false && parent->taskLevel != CompletionStamp::levelNotReady) || (!parent->isEventWithoutCommand() && !parent->peekIsCmdSubmitted())) { parent->addChild(*this->event); } } sentinel.setStatus(CL_COMPLETE); } clear(); finalized = true; } void EventBuilder::clear() { for (Event *parent : parentEvents) { parent->decRefInternal(); } parentEvents.clear(); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/event/event_builder.h000066400000000000000000000026341363734646600242630ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/utilities/arrayref.h" #include "shared/source/utilities/stackvec.h" #include "CL/cl.h" #include namespace NEO { class Event; class EventBuilder { public: template void create(ArgsT &&... args) { event = new EventType(std::forward(args)...); } EventBuilder() = default; EventBuilder(const EventBuilder &) = delete; EventBuilder &operator=(const EventBuilder &) = delete; EventBuilder(EventBuilder &&) = delete; EventBuilder &operator=(EventBuilder &&) = delete; ~EventBuilder(); Event *getEvent() const { return event; } void addParentEvent(Event &newParentEvent); void addParentEvent(Event *newParentEvent) { if (newParentEvent != nullptr) { addParentEvent(*newParentEvent); } } void addParentEvents(ArrayRef newParentEvents); void finalize(); Event *finalizeAndRelease() { finalize(); Event *retEvent = this->event; this->event = nullptr; finalized = false; return retEvent; } protected: void clear(); Event *event = nullptr; bool finalized = false; StackVec parentEvents; bool doNotRegister = false; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/event/event_tracker.cpp000066400000000000000000000174111363734646600246220ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/event/event_tracker.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/helpers/cl_helper.h" namespace NEO { std::unique_ptr EventsTracker::globalEvTracker = nullptr; EventsTracker &EventsTracker::getEventsTracker() { static std::mutex initMutex; std::lock_guard autolock(initMutex); if (!EventsTracker::globalEvTracker) EventsTracker::globalEvTracker = std::unique_ptr{new EventsTracker()}; return *EventsTracker::globalEvTracker; } std::string EventsTracker::label(Event *node, const EventIdMap &eventsIdMapping) { std::string retLabel("e"); auto eventTag = eventsIdMapping.find(node); if (eventTag != eventsIdMapping.end()) { auto id = eventTag->second; retLabel += std::to_string(id); } return retLabel; } std::string EventsTracker::label(CommandQueue *cmdQ) { return "cq" + std::to_string(reinterpret_cast(cmdQ)); } void EventsTracker::dumpQueue(CommandQueue *cmdQ, std::ostream &out, CmdqSet &dumpedCmdQs) { if ((cmdQ == nullptr) || (dumpedCmdQs.find(cmdQ) != dumpedCmdQs.end())) { return; } out << label(cmdQ) << "[label=\"{------CmdQueue, ptr=" << cmdQ << "------|task count="; auto taskCount = cmdQ->taskCount; auto taskLevel = cmdQ->taskLevel; if (taskCount == CompletionStamp::levelNotReady) { out << "NOT_READY"; } else { out << taskCount; } out << ", level="; if (taskLevel == CompletionStamp::levelNotReady) { out << "NOT_READY"; } else { out << taskLevel; } out << "}\",color=blue];\n"; dumpedCmdQs.insert(cmdQ); } void EventsTracker::dumpNode(Event *node, std::ostream &out, const EventIdMap &eventsIdMapping) { if (node == nullptr) { out << "eNULL[label=\"{ptr=nullptr}\",color=red];\n"; return; } bool isUserEvent = node->isUserEvent(); uint32_t statusId = static_cast(node->peekExecutionStatus()); // clamp to aborted statusId = (statusId > CL_QUEUED) ? (CL_QUEUED + 1) : statusId; const char *color = ((statusId == CL_COMPLETE) || (statusId > CL_QUEUED)) ? "green" : (((statusId == CL_SUBMITTED) && (isUserEvent == false)) ? "yellow" : "red"); std::string eventType = isUserEvent ? "USER_EVENT" : (node->isCurrentCmdQVirtualEvent() ? "---V_EVENT " : "-----EVENT "); std::string commandType = ""; if (isUserEvent == false) { commandType = NEO::cmdTypetoString(node->getCommandType()); } static const char *status[] = { "CL_COMPLETE", "CL_RUNNING", "CL_SUBMITTED", "CL_QUEUED", "ABORTED"}; auto taskCount = node->peekTaskCount(); auto taskLevel = node->taskLevel.load(); out << label(node, eventsIdMapping) << "[label=\"{------" << eventType << " ptr=" << node << "------" "|" << commandType << "|" << status[statusId] << "|" "task count="; if (taskCount == CompletionStamp::levelNotReady) { out << "NOT_READY"; } else { out << taskCount; } out << ", level="; if (taskLevel == CompletionStamp::levelNotReady) { out << "NOT_READY"; } else { out << taskLevel; } out << "|CALLBACKS=" << (node->peekHasCallbacks() ? "TRUE" : "FALSE") << "}\",color=" << color << "];\n"; if (node->isCurrentCmdQVirtualEvent()) { out << label(node->getCommandQueue()) << "->" << label(node, eventsIdMapping); out << "[label=\"VIRTUAL_EVENT\"]"; out << ";\n"; } } void EventsTracker::dumpEdge(Event *leftNode, Event *rightNode, std::ostream &out, const EventIdMap &eventsIdMapping) { out << label(leftNode, eventsIdMapping) << "->" << label(rightNode, eventsIdMapping) << ";\n"; } // walk in DFS manner void EventsTracker::dumpGraph(Event *node, std::ostream &out, CmdqSet &dumpedCmdQs, std::set &dumpedEvents, const EventIdMap &eventsIdMapping) { if ((node == nullptr) || (dumpedEvents.find(node) != dumpedEvents.end())) { return; } dumpedEvents.insert(node); if (node->getCommandQueue() != nullptr) { dumpQueue(node->getCommandQueue(), out, dumpedCmdQs); } dumpNode(node, out, eventsIdMapping); auto *childNode = node->peekChildEvents(); while (childNode != nullptr) { dumpGraph(childNode->ref, out, dumpedCmdQs, dumpedEvents, eventsIdMapping); dumpEdge(node, childNode->ref, out, eventsIdMapping); childNode = childNode->next; } } TrackedEvent *EventsTracker::getNodes() { return trackedEvents.detachNodes(); } void EventsTracker::dump() { static std::mutex mutex; std::lock_guard lock(mutex); auto time = std::chrono::system_clock::now(); std::string dumpFileName = "eg_" "reg" + std::to_string(reinterpret_cast(this)) + "_" + std::to_string(time.time_since_epoch().count()) + ".gv"; auto out = createDumpStream(dumpFileName); *out << "digraph events_registry_" << this << " {\n"; *out << "node [shape=record]\n"; *out << "//pragma: somePragmaData" << "\n"; auto allNodes = getNodes(); EventIdMap deadNodeTags; auto curr = allNodes; TrackedEvent *prev = nullptr; EventIdMap eventsIdMapping; while (curr != nullptr) { auto next = curr->next; bool eraseNode = false; if (curr->eventId < 0) { auto prevTag = deadNodeTags.find(curr->ev); if (prevTag == deadNodeTags.end()) { deadNodeTags[curr->ev] = -curr->eventId; } eraseNode = true; } else if ((deadNodeTags.find(curr->ev) != deadNodeTags.end()) && (deadNodeTags[curr->ev] > curr->eventId)) { eraseNode = true; } if (eraseNode) { if (prev != nullptr) { prev->next = next; } if (allNodes == curr) { allNodes = nullptr; } delete curr; } else { if (allNodes == nullptr) { allNodes = curr; } prev = curr; eventsIdMapping[curr->ev] = curr->eventId; } curr = next; } auto node = allNodes; CmdqSet dumpedCmdQs; std::set dumpedEvents; while (node != nullptr) { if (node->ev->peekNumEventsBlockingThis() != 0) { node = node->next; continue; } dumpGraph(node->ev, *out, dumpedCmdQs, dumpedEvents, eventsIdMapping); node = node->next; } *out << "\n}\n"; if (allNodes == nullptr) { return; } if (trackedEvents.peekHead() != nullptr) { trackedEvents.peekHead()->getTail()->insertAllNext(*allNodes); } else { auto rest = allNodes->next; trackedEvents.pushFrontOne(*allNodes); if (rest != nullptr) { allNodes->insertAllNext(*rest); } } } void EventsTracker::notifyCreation(Event *eventToTrack) { dump(); auto trackedE = new TrackedEvent{eventToTrack, eventId++}; trackedEvents.pushFrontOne(*trackedE); } void EventsTracker::notifyDestruction(Event *eventToDestroy) { auto trackedE = new TrackedEvent{eventToDestroy, -(eventId++)}; trackedEvents.pushFrontOne(*trackedE); dump(); } void EventsTracker::notifyTransitionedExecutionStatus() { dump(); } std::unique_ptr EventsTracker::createDumpStream(const std::string &filename) { return std::make_unique(filename, std::ios::binary | std::ios::out); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/event/event_tracker.h000066400000000000000000000034251363734646600242670ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/utilities/iflist.h" #include "opencl/source/event/event.h" #include #include namespace NEO { class CommandQueue; struct TrackedEvent : IFNode { TrackedEvent(Event *ev, int64_t eventId) : ev(ev), eventId(eventId) { } Event *ev = nullptr; int64_t eventId = 1; }; class EventsTracker { using EventIdMap = std::unordered_map; using CmdqSet = std::set; protected: std::atomic eventId{0}; static std::unique_ptr globalEvTracker; IFList trackedEvents; EventsTracker() = default; public: void dump(); void notifyCreation(Event *eventToTrack); void notifyDestruction(Event *eventToDestroy); void notifyTransitionedExecutionStatus(); MOCKABLE_VIRTUAL ~EventsTracker() = default; MOCKABLE_VIRTUAL TrackedEvent *getNodes(); MOCKABLE_VIRTUAL std::unique_ptr createDumpStream(const std::string &filename); static EventsTracker &getEventsTracker(); static std::string label(Event *node, const EventIdMap &eventsIdMapping); static std::string label(CommandQueue *cmdQ); static void dumpQueue(CommandQueue *cmdQ, std::ostream &out, CmdqSet &dumpedCmdQs); static void dumpEdge(Event *leftNode, Event *rightNode, std::ostream &out, const EventIdMap &eventsIdMapping); static void dumpNode(Event *node, std::ostream &out, const EventIdMap &eventsIdMapping); static void dumpGraph(Event *node, std::ostream &out, CmdqSet &dumpedCmdQs, std::set &dumpedEvents, const EventIdMap &eventsIdMapping); }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/event/hw_timestamps.h000066400000000000000000000014551363734646600243200ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/graphics_allocation.h" #include namespace NEO { struct HwTimeStamps { void initialize() { GlobalStartTS = 0; ContextStartTS = 0; GlobalEndTS = 0; ContextEndTS = 0; GlobalCompleteTS = 0; ContextCompleteTS = 0; } bool isCompleted() const { return true; } static GraphicsAllocation::AllocationType getAllocationType() { return GraphicsAllocation::AllocationType::PROFILING_TAG_BUFFER; } uint64_t GlobalStartTS; uint64_t ContextStartTS; uint64_t GlobalEndTS; uint64_t ContextEndTS; uint64_t GlobalCompleteTS; uint64_t ContextCompleteTS; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/event/perf_counter.h000066400000000000000000000015131363734646600241220ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/event/hw_timestamps.h" namespace NEO { struct HwPerfCounter { void initialize() { report[0] = 0; } static GraphicsAllocation::AllocationType getAllocationType() { return GraphicsAllocation::AllocationType::PROFILING_TAG_BUFFER; } bool isCompleted() const { return true; } // Gpu report size is not known during compile time. // Such information will be provided by metrics library dll. // Bellow variable will be allocated dynamically based on information // from metrics library. Take look at CommandStreamReceiver::getEventPerfCountAllocator. uint8_t report[1] = {}; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/event/user_event.cpp000066400000000000000000000042141363734646600241420ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "user_event.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" namespace NEO { UserEvent::UserEvent(Context *ctx) : Event(ctx, nullptr, CL_COMMAND_USER, CompletionStamp::levelNotReady, CompletionStamp::levelNotReady) { transitionExecutionStatus(CL_QUEUED); } void UserEvent::updateExecutionStatus() { return; } bool UserEvent::wait(bool blocking, bool useQuickKmdSleep) { while (updateStatusAndCheckCompletion() == false) { if (blocking == false) { return false; } } return true; } uint32_t UserEvent::getTaskLevel() { if (peekExecutionStatus() == CL_COMPLETE) { return 0; } return CompletionStamp::levelNotReady; } bool UserEvent::isInitialEventStatus() const { return executionStatus == CL_QUEUED; } VirtualEvent::VirtualEvent(CommandQueue *cmdQ, Context *ctx) : Event(ctx, cmdQ, -1, CompletionStamp::levelNotReady, CompletionStamp::levelNotReady) { transitionExecutionStatus(CL_QUEUED); // internal object - no need for API refcount convertToInternalObject(); } void VirtualEvent::updateExecutionStatus() { ; } bool VirtualEvent::wait(bool blocking, bool useQuickKmdSleep) { while (updateStatusAndCheckCompletion() == false) { if (blocking == false) { return false; } } return true; } uint32_t VirtualEvent::getTaskLevel() { uint32_t taskLevel = 0; if (cmdQueue != nullptr) { auto &csr = cmdQueue->getGpgpuCommandStreamReceiver(); taskLevel = csr.peekTaskLevel(); } return taskLevel; } bool VirtualEvent::setStatus(cl_int status) { // virtual events are just helper events and will have either // "waiting" (after construction) or "complete" (on change if not blocked) execution state if (isStatusCompletedByTermination(status) == false) { status = CL_COMPLETE; } return Event::setStatus(status); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/event/user_event.h000066400000000000000000000015321363734646600236070ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "event.h" namespace NEO { class CommandQueue; class Context; class UserEvent : public Event { public: UserEvent(Context *ctx = nullptr); ~UserEvent() override = default; bool wait(bool blocking, bool useQuickKmdSleep) override; void updateExecutionStatus() override; uint32_t getTaskLevel() override; bool isInitialEventStatus() const; }; class VirtualEvent : public Event { public: VirtualEvent(CommandQueue *cmdQ = nullptr, Context *ctx = nullptr); ~VirtualEvent() override = default; bool wait(bool blocking, bool useQuickKmdSleep) override; bool setStatus(cl_int status) override; void updateExecutionStatus() override; uint32_t getTaskLevel() override; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/execution_model/000077500000000000000000000000001363734646600233205ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/execution_model/CMakeLists.txt000066400000000000000000000004411363734646600260570ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_EXECUTION_MODEL ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_enqueue.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_EXECUTION_MODEL}) compute-runtime-20.13.16352/opencl/source/execution_model/device_enqueue.h000066400000000000000000000110431363734646600264560ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once // Uncomment this macro to build "empty" schedulers //#define WA_DISABLE_SCHEDULERS 1 #if !defined(__OPENCL_VERSION__) #include typedef uint32_t uint; typedef uint64_t ulong; #endif #define OCLRT_SIZEOF_MEDIA_INTERFACE_DESCRIPTOR_LOAD_DEVICE_CMD (4 * sizeof(uint)) #define OCLRT_SIZEOF_MEDIA_CURBE_LOAD_DEVICE_CMD (4 * sizeof(uint)) #define OCLRT_SIZEOF_MEDIA_STATE_FLUSH (2 * sizeof(uint)) #define OCLRT_SIZEOF_MI_ATOMIC_CMD (11 * sizeof(uint)) #define OCLRT_SIZEOF_MEDIA_VFE_STATE_CMD (9 * sizeof(uint)) #define OCLRT_SIZEOF_MI_ARB_CHECK (1 * sizeof(uint)) #define OCLRT_SIZEOF_MEDIA_INTERFACE_DESCRIPTOR_LOAD_DEVICE_CMD_DWORD_OFFSET (4) #define OCLRT_SIZEOF_MI_ATOMIC_CMD_DWORD_OFFSET (11) #define OCLRT_SIZEOF_MEDIA_CURBE_LOAD_DEVICE_CMD_DWORD_OFFSET (4) #define OCLRT_IMM_LOAD_REGISTER_CMD_DEVICE_CMD_DWORD_OFFSET (3) #define OCLRT_SIZEOF_MSFLUSH_DWORD (2) #define OCLRT_SIZEOF_MI_ARB_CHECK_DWORD (1) #define OCLRT_SIZEOF_MEDIA_VFE_STATE_DWORD (9) #define OCLRT_BATCH_BUFFER_END_CMD (83886080) //Constant buffer stuff #define COMPILER_DATA_PARAMETER_GLOBAL_SURFACE (49) #define SCHEDULER_DATA_PARAMETER_IMAGES_CURBE_SHIFT (50) #define SCHEDULER_DATA_PARAMETER_GLOBAL_POINTER_SHIFT (63) #define SCHEDULER_DATA_PARAMETER_SAMPLER_SHIFT (51) #define SCHEDULER_DATA_PARAMETER_SAMPLER_ADDED_VALUE (2 * SCHEDULER_DATA_PARAMETER_IMAGES_CURBE_SHIFT) #define CS_PREFETCH_SIZE (8 * 64) #define ALL_BITS_SET_DWORD_MASK (0xffffffff) #define DWORD_SIZE_IN_BITS (32) #define CL_sRGB 0x10BF #define CL_sRGBX 0x10C0 #define CL_sRGBA 0x10C1 #define CL_sBGRA 0x10C2 //scheduler currently can spawn up to 8 GPGPU_WALKERS between scheduler runs, so it needs 8 * 3 HW threads for scheduling blocks + 1 HW thread to scheduler next scheduler //each HW group consist of 3 HW threads that are capable of scheduling 1 block //!!! Make sure value of this define equals MAX_NUMBER_OF_PARALLEL_GPGPU_WALKERS in DeviceEnqueueInternalTypes.h #define PARALLEL_SCHEDULER_HW_GROUPS (8) #define PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP (3) #define PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20 (3) #define PARALLEL_SCHEDULER_HW_GROUPS_IN_THREADS (PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP * PARALLEL_SCHEDULER_HW_GROUPS) #define PARALLEL_SCHEDULER_NUMBER_HW_THREADS (PARALLEL_SCHEDULER_HW_GROUPS_IN_THREADS + 1) //parallel scheduler 2.0 is compiled in simd8 #define PARALLEL_SCHEDULER_COMPILATION_SIZE_20 (8) #define HW_GROUP_ID_SHIFT(COMPILATION_SIZE) ((COMPILATION_SIZE & 0x10) ? 4 : 3) #define GRF_SIZE (32) #define SIZEOF_3GRFS (3 * GRF_SIZE) //estimation for dynamic payload size #define SCHEDULER_DYNAMIC_PAYLOAD_SIZE (PARALLEL_SCHEDULER_NUMBER_HW_THREADS * SIZEOF_3GRFS) //assume that max DSH per walker is 9472B ( assuming registers can take up to 4KB, and max dynamic payload is around 96B * 56(HW threads) it should be fine. #define MAX_DSH_SIZE_PER_ENQUEUE 9472 #define MAX_BINDING_TABLE_INDEX (253) #define MAX_SSH_PER_KERNEL_SIZE (MAX_BINDING_TABLE_INDEX * 64) //max SSH that can be one kernel. It is 253 binding table entries multiplied by the Surface State size. #define OCLRT_ARG_OFFSET_TO_SAMPLER_OBJECT_ID(ArgOffset) (ArgOffset + MAX_SSH_PER_KERNEL_SIZE) #define OCLRT_IMAGE_MAX_OBJECT_ID (MAX_SSH_PER_KERNEL_SIZE - 1) #define OCLRT_SAMPLER_MIN_OBJECT_ID (MAX_SSH_PER_KERNEL_SIZE) typedef enum tagDebugDataTypes { DBG_DEFAULT = 0, DBG_COMMAND_QUEUE = 1, DBG_EVENTS_UPDATE = 2, DBG_EVENTS_NUMBER = 3, DBG_STACK_UPDATE = 4, DBG_BEFORE_PATCH = 5, DBG_KERNELID = 6, DBG_DSHOFFSET = 7, DBG_IDOFFSET = 8, DBG_AFTER_PATCH = 9, DBG_UNSPECIFIED = 10, DBG_ENQUEUES_NUMBER = 11, DBG_LOCAL_ID, DBG_WKG_ID, DBG_SCHEDULER_END, // Add here new debug enums DBG_MAX } DebugDataTypes; // Struct for debugging kernels typedef struct { DebugDataTypes m_dataType; uint m_dataSize; } DebugDataInfo; typedef struct { enum DDBFlags { DDB_HAS_DATA_INFO = 1, DDB_SCHEDULER_PROFILING = 2, DDB_COMMAND_QUEUE_RAW = 4 } ddbFlags; uint m_size; uint m_stackTop; //index of data stack uint m_dataInfoTop; //index of the top of DataInfo stack, this stacks grows with decrementing address uint m_stackBottom; uint m_dataInfoBottom; //index of the bottom of DataInfo uint m_dataInfoSize; uint m_flags; uint m_offset; //current offset indicates free place uint m_data[100]; //buffer } DebugDataBuffer; #pragma pack(push) #pragma pack(4) #include "DeviceEnqueueInternalTypes.h" #pragma pack(pop) compute-runtime-20.13.16352/opencl/source/gen11/000077500000000000000000000000001363734646600210505ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/gen11/CMakeLists.txt000066400000000000000000000002031363734646600236030ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_GEN11) add_subdirectories() endif() compute-runtime-20.13.16352/opencl/source/gen11/aub_command_stream_receiver_gen11.cpp000066400000000000000000000017141363734646600302560ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/array_count.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw_bdw_plus.inl" #include "opencl/source/helpers/base_object.h" namespace NEO { typedef ICLFamily Family; static auto gfxCore = IGFX_GEN11_CORE; template <> constexpr uint32_t AUBCommandStreamReceiverHw::getMaskAndValueForPollForCompletion() { return 0x00008000; } template <> void populateFactoryTable>() { extern AubCommandStreamReceiverCreateFunc aubCommandStreamReceiverFactory[IGFX_MAX_CORE]; UNRECOVERABLE_IF(!isInRange(gfxCore, aubCommandStreamReceiverFactory)); aubCommandStreamReceiverFactory[gfxCore] = AUBCommandStreamReceiverHw::create; } template class AUBCommandStreamReceiverHw; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen11/aub_mapper.h000066400000000000000000000013271363734646600233370ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/memory_constants.h" #include "opencl/source/gen_common/aub_mapper_base.h" #include "engine_node.h" namespace NEO { struct ICLFamily; template <> struct AUBFamilyMapper { enum { device = AubMemDump::DeviceValues::Icllp }; using AubTraits = AubMemDump::Traits; static const AubMemDump::LrcaHelper *const csTraits[aub_stream::NUM_ENGINES]; static const MMIOList globalMMIO; static const MMIOList *perEngineMMIO[aub_stream::NUM_ENGINES]; typedef AubMemDump::AubDump AUB; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen11/aub_mem_dump_gen11.cpp000066400000000000000000000064631363734646600252120ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/completion_stamp.h" #include "shared/source/helpers/hw_helper.h" #include "opencl/source/aub_mem_dump/aub_alloc_dump.inl" #include "opencl/source/aub_mem_dump/aub_mem_dump.inl" #include "aub_mapper.h" #include "config.h" namespace AubMemDump { enum { device = DeviceValues::Icllp }; // Instantiate these common template implementations. template struct AubDump>; template struct AubDump>; template struct AubPageTableHelper32>; template struct AubPageTableHelper64>; } // namespace AubMemDump namespace NEO { using Family = ICLFamily; static const AubMemDump::LrcaHelperRcs rcs(0x002000); static const AubMemDump::LrcaHelperBcs bcs(0x022000); static const AubMemDump::LrcaHelperVcs vcs(0x1c0000); static const AubMemDump::LrcaHelperVecs vecs(0x1c8000); const AubMemDump::LrcaHelper *const AUBFamilyMapper::csTraits[aub_stream::NUM_ENGINES] = { &rcs, &bcs, &vcs, &vecs}; const MMIOList AUBFamilyMapper::globalMMIO; static const MMIOList mmioListRCS = { MMIOPair(0x000020d8, 0x00020000), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x229c), 0xffff8280), MMIOPair(0x0000C800, 0x00000009), MMIOPair(0x0000C804, 0x00000038), MMIOPair(0x0000C808, 0x0000003B), MMIOPair(0x0000C80C, 0x00000039), MMIOPair(0x0000C810, 0x00000037), MMIOPair(0x0000C814, 0x00000039), MMIOPair(0x0000C818, 0x00000037), MMIOPair(0x0000C81C, 0x0000001B), MMIOPair(0x0000C820, 0x00060037), MMIOPair(0x0000C824, 0x00000032), MMIOPair(0x0000C828, 0x00000033), MMIOPair(0x0000C82C, 0x0000003B), MMIOPair(0x0000C8C0, 0x00000037), MMIOPair(0x0000E18C, 0x00200020), }; static const MMIOList mmioListBCS = { MMIOPair(AubMemDump::computeRegisterOffset(bcs.mmioBase, 0x229c), 0xffff8280), }; static const MMIOList mmioListVCS = { MMIOPair(AubMemDump::computeRegisterOffset(vcs.mmioBase, 0x229c), 0xffff8280), }; static const MMIOList mmioListVECS = { MMIOPair(AubMemDump::computeRegisterOffset(vecs.mmioBase, 0x229c), 0xffff8280), }; const MMIOList *AUBFamilyMapper::perEngineMMIO[aub_stream::NUM_ENGINES] = { &mmioListRCS, &mmioListBCS, &mmioListVCS, &mmioListVECS}; } // namespace NEO namespace AubAllocDump { using namespace NEO; template SurfaceInfo *getDumpSurfaceInfo(GraphicsAllocation &gfxAllocation, DumpFormat dumpFormat); template uint32_t getImageSurfaceTypeFromGmmResourceType(GMM_RESOURCE_TYPE gmmResourceType); template void dumpBufferInBinFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpImageInBmpFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpBufferInTreFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpImageInTreFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpAllocation(DumpFormat dumpFormat, GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); } // namespace AubAllocDump compute-runtime-20.13.16352/opencl/source/gen11/buffer_gen11.cpp000066400000000000000000000005321363734646600240200ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen11/hw_cmds.h" #include "opencl/source/mem_obj/buffer_bdw_plus.inl" namespace NEO { typedef ICLFamily Family; static auto gfxCore = IGFX_GEN11_CORE; #include "opencl/source/mem_obj/buffer_factory_init.inl" } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen11/command_queue_gen11.cpp000066400000000000000000000013461363734646600253750ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/command_queue_hw_bdw_plus.inl" #include "opencl/source/command_queue/enqueue_resource_barrier.h" #include "enqueue_init_dispatch_globals.h" namespace NEO { typedef ICLFamily Family; static auto gfxCore = IGFX_GEN11_CORE; template class CommandQueueHw; template <> void populateFactoryTable>() { extern CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE]; commandQueueFactory[gfxCore] = CommandQueueHw::create; } } // namespace NEO command_stream_receiver_simulated_common_hw_gen11.cpp000066400000000000000000000015021363734646600334600ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/gen11/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_stream/command_stream_receiver_simulated_common_hw_bdw_plus.inl" namespace NEO { typedef ICLFamily Family; template <> void CommandStreamReceiverSimulatedCommonHw::submitLRCA(const MiContextDescriptorReg &contextDescriptor) { auto mmioBase = getCsTraits(osContext->getEngineType()).mmioBase; stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2510), contextDescriptor.ulData[0]); stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2514), contextDescriptor.ulData[1]); // Load our new exec list stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2550), 1); } template class CommandStreamReceiverSimulatedCommonHw; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen11/device_enqueue.h000066400000000000000000000020271363734646600242100ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "../execution_model/device_enqueue.h" // Uncomment this macro to build "empty" schedulers //#define WA_DISABLE_SCHEDULERS 1 #define OCLRT_GPGPU_WALKER_CMD_DEVICE_CMD_G11 (15 * sizeof(uint)) #define OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G11 (6 * sizeof(uint)) #define OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G11_DWORD_OFFSET (6) #define OCLRT_GPGPU_WALKER_CMD_DEVICE_CMD_G11_DWORD_OFFSET (15) #define SLB_SPACE_FOR_EACH_ENQUEUE_UNALIGNED_GEN11 (OCLRT_SIZEOF_MEDIA_STATE_FLUSH + OCLRT_SIZEOF_MEDIA_INTERFACE_DESCRIPTOR_LOAD_DEVICE_CMD + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G11 + OCLRT_GPGPU_WALKER_CMD_DEVICE_CMD_G11 + OCLRT_SIZEOF_MEDIA_STATE_FLUSH + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G11) #define SLB_SIZE_ALIGNEMENT_WA_GEN11 52 #define SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE_GEN11 (SLB_SPACE_FOR_EACH_ENQUEUE_UNALIGNED_GEN11 + SLB_SIZE_ALIGNEMENT_WA_GEN11 + CS_PREFETCH_SIZE) #define SECOND_LEVEL_BUFFER_NUMBER_OF_ENQUEUES_GEN11 (128) compute-runtime-20.13.16352/opencl/source/gen11/device_queue_gen11.cpp000066400000000000000000000024511363734646600252140ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen11/hw_cmds.h" #include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/source/device_queue/device_queue_hw_bdw_plus.inl" #include "opencl/source/device_queue/device_queue_hw_profiling.inl" #include "opencl/source/gen11/device_enqueue.h" namespace NEO { typedef ICLFamily Family; static auto gfxCore = IGFX_GEN11_CORE; template <> void populateFactoryTable>() { extern DeviceQueueCreateFunc deviceQueueFactory[IGFX_MAX_CORE]; deviceQueueFactory[gfxCore] = DeviceQueueHw::create; } static const size_t csPrefetchSizeAlignementWa = SLB_SIZE_ALIGNEMENT_WA_GEN11; template <> size_t DeviceQueueHw::getCSPrefetchSize() { return 8 * MemoryConstants::cacheLineSize + csPrefetchSizeAlignementWa; } template <> size_t DeviceQueueHw::getWaCommandsSize() { return 0; } template <> void DeviceQueueHw::addArbCheckCmdWa() {} template <> void DeviceQueueHw::addMiAtomicCmdWa(uint64_t atomicOpPlaceholder) {} template <> void DeviceQueueHw::addLriCmdWa(bool setArbCheck) {} template <> void DeviceQueueHw::addPipeControlCmdWa(bool isNoopCmd) {} template class DeviceQueueHw; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen11/enable_family_full_ocl_gen11.cpp000066400000000000000000000023321363734646600272150ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" #include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h" #include "opencl/source/device_queue/device_queue_hw.h" #ifdef HAVE_INSTRUMENTATION #include "opencl/source/event/perf_counter.h" #endif #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/sampler/sampler.h" namespace NEO { typedef ICLFamily Family; struct EnableOCLGen11 { EnableOCLGen11() { populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); } }; static EnableOCLGen11 enable; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen11/experimental_command_buffer_gen11.cpp000066400000000000000000000021001363734646600302640ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/experimental_command_buffer.h" #include "shared/source/command_stream/experimental_command_buffer.inl" #include "shared/source/helpers/hw_helper.h" namespace NEO { typedef ICLFamily GfxFamily; template void ExperimentalCommandBuffer::injectBufferStart(LinearStream &parentStream, size_t cmdBufferOffset); template size_t ExperimentalCommandBuffer::getRequiredInjectionSize() noexcept; template size_t ExperimentalCommandBuffer::programExperimentalCommandBuffer(); template size_t ExperimentalCommandBuffer::getTotalExperimentalSize() noexcept; template void ExperimentalCommandBuffer::addTimeStampPipeControl(); template size_t ExperimentalCommandBuffer::getTimeStampPipeControlSize() noexcept; template void ExperimentalCommandBuffer::addExperimentalCommands(); template size_t ExperimentalCommandBuffer::getExperimentalCommandsSize() noexcept; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen11/gpgpu_walker_gen11.cpp000066400000000000000000000006741363734646600252450ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen11/hw_info.h" #include "opencl/source/command_queue/gpgpu_walker_bdw_plus.inl" #include "opencl/source/command_queue/hardware_interface_bdw_plus.inl" namespace NEO { template class HardwareInterface; template class GpgpuWalkerHelper; template struct EnqueueOperation; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen11/gtpin_setup_gen11.cpp000066400000000000000000000013331363734646600251100ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/gtpin/gtpin_hw_helper.h" #include "opencl/source/gtpin/gtpin_hw_helper.inl" #include "ocl_igc_shared/gtpin/gtpin_ocl_interface.h" namespace NEO { extern GTPinHwHelper *gtpinHwHelperFactory[IGFX_MAX_CORE]; typedef ICLFamily Family; static const auto gfxFamily = IGFX_GEN11_CORE; template <> uint32_t GTPinHwHelperHw::getGenVersion() { return gtpin::GTPIN_GEN_11; } template class GTPinHwHelperHw; struct GTPinEnableGen11 { GTPinEnableGen11() { gtpinHwHelperFactory[gfxFamily] = >PinHwHelperHw::get(); } }; static GTPinEnableGen11 gtpinEnable; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen11/hardware_commands_helper_gen11.cpp000066400000000000000000000010771363734646600275710ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/gen11/hw_cmds.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/hardware_commands_helper.inl" #include "opencl/source/helpers/hardware_commands_helper_base.inl" namespace NEO { template <> bool HardwareCommandsHelper::doBindingTablePrefetch() { return false; } template struct HardwareCommandsHelper; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen11/hw_helper_gen11.cpp000066400000000000000000000015021363734646600245220ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/flat_batch_buffer_helper_hw.inl" #include "shared/source/helpers/hw_helper_bdw_plus.inl" #include "opencl/source/aub/aub_helper_bdw_plus.inl" namespace NEO { typedef ICLFamily Family; template <> uint32_t HwHelperHw::getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const { return pHwInfo->gtSystemInfo.MaxSubSlicesSupported * pHwInfo->gtSystemInfo.MaxEuPerSubSlice * 8; } template <> std::string HwHelperHw::getExtensions() const { return "cl_intel_subgroup_local_block_io "; } template class AubHelperHw; template class HwHelperHw; template class FlatBatchBufferHelperHw; template struct MemorySynchronizationCommands; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen11/hw_info_ehl.inl000066400000000000000000000246741363734646600240520ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen11/hw_cmds.h" #include "shared/source/memory_manager/memory_constants.h" #include "opencl/source/aub_mem_dump/aub_services.h" #include "engine_node.h" namespace NEO { const char *HwMapper::abbreviation = "ehl"; bool isSimulationEHL(unsigned short deviceId) { switch (deviceId) { case IEHL_1x4x8_SUPERSKU_DEVICE_A0_ID: return true; } return false; }; const PLATFORM EHL::platform = { IGFX_ELKHARTLAKE, PCH_UNKNOWN, IGFX_GEN11_CORE, IGFX_GEN11_CORE, PLATFORM_NONE, // default init 0, // usDeviceID 0, // usRevId. 0 sets the stepping to A0 0, // usDeviceID_PCH 0, // usRevId_PCH GTTYPE_UNDEFINED}; const RuntimeCapabilityTable EHL::capabilityTable{ EngineDirectSubmissionInitVec{ {aub_stream::ENGINE_RCS, {true, true}}}, // directSubmissionEngines {0, 0, 0, false, false, false}, // kmdNotifyProperties MemoryConstants::max36BitAddress, // gpuAddressSpace 83.333, // defaultProfilingTimerResolution MemoryConstants::pageSize, // requiredPreemptionSurfaceSize &isSimulationEHL, // isSimulation PreemptionMode::MidThread, // defaultPreemptionMode aub_stream::ENGINE_RCS, // defaultEngineType 0, // maxRenderFrequency 12, // clVersionSupport CmdServicesMemTraceVersion::DeviceValues::Ehl, // aubDeviceId 1, // extraQuantityThreadsPerEU 64, // slmSize sizeof(EHL::GRF), // grfSize false, // blitterOperationsSupported false, // ftrSupportsInteger64BitAtomics false, // ftrSupportsFP64 false, // ftrSupports64BitMath false, // ftrSvm true, // ftrSupportsCoherency true, // ftrSupportsVmeAvcTextureSampler true, // ftrSupportsVmeAvcPreemption false, // ftrRenderCompressedBuffers false, // ftrRenderCompressedImages true, // ftr64KBpages true, // instrumentationEnabled true, // forceStatelessCompilationFor32Bit "lp", // platformType true, // sourceLevelDebuggerSupported false, // supportsVme false, // supportCacheFlushAfterWalker true, // supportsImages true, // supportsDeviceEnqueue true // hostPtrTrackingEnabled }; WorkaroundTable EHL::workaroundTable = {}; FeatureTable EHL::featureTable = {}; void EHL::setupFeatureAndWorkaroundTable(HardwareInfo *hwInfo) { FeatureTable *featureTable = &hwInfo->featureTable; WorkaroundTable *workaroundTable = &hwInfo->workaroundTable; featureTable->ftrL3IACoherency = true; featureTable->ftrPPGTT = true; featureTable->ftrSVM = true; featureTable->ftrIA32eGfxPTEs = true; featureTable->ftrStandardMipTailFormat = true; featureTable->ftrDisplayYTiling = true; featureTable->ftrTranslationTable = true; featureTable->ftrUserModeTranslationTable = true; featureTable->ftrTileMappedResource = true; featureTable->ftrEnableGuC = true; featureTable->ftrFbc = true; featureTable->ftrFbc2AddressTranslation = true; featureTable->ftrFbcBlitterTracking = true; featureTable->ftrFbcCpuTracking = true; featureTable->ftrTileY = true; featureTable->ftrAstcHdr2D = true; featureTable->ftrAstcLdr2D = true; featureTable->ftr3dMidBatchPreempt = true; featureTable->ftrGpGpuMidBatchPreempt = true; featureTable->ftrGpGpuMidThreadLevelPreempt = true; featureTable->ftrGpGpuThreadGroupLevelPreempt = true; featureTable->ftrPerCtxtPreemptionGranularityControl = true; workaroundTable->wa4kAlignUVOffsetNV12LinearSurface = true; workaroundTable->waReportPerfCountUseGlobalContextID = true; }; const HardwareInfo EHL_1x2x4::hwInfo = { &EHL::platform, &EHL::featureTable, &EHL::workaroundTable, &EHL_1x2x4::gtSystemInfo, EHL::capabilityTable, }; GT_SYSTEM_INFO EHL_1x2x4::gtSystemInfo = {0}; void EHL_1x2x4::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * EHL::threadsPerEu; gtSysInfo->SliceCount = 1; gtSysInfo->L3CacheSizeInKb = 1280; gtSysInfo->L3BankCount = 4; gtSysInfo->MaxFillRate = 8; gtSysInfo->TotalVsThreads = 56; gtSysInfo->TotalHsThreads = 56; gtSysInfo->TotalDsThreads = 56; gtSysInfo->TotalGsThreads = 56; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = EHL::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = EHL::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = EHL::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo EHL_1x4x4::hwInfo = { &EHL::platform, &EHL::featureTable, &EHL::workaroundTable, &EHL_1x4x4::gtSystemInfo, EHL::capabilityTable, }; GT_SYSTEM_INFO EHL_1x4x4::gtSystemInfo = {0}; void EHL_1x4x4::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * EHL::threadsPerEu; gtSysInfo->SliceCount = 1; gtSysInfo->L3CacheSizeInKb = 1280; gtSysInfo->L3BankCount = 4; gtSysInfo->MaxFillRate = 8; gtSysInfo->TotalVsThreads = 112; gtSysInfo->TotalHsThreads = 112; gtSysInfo->TotalDsThreads = 112; gtSysInfo->TotalGsThreads = 112; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = EHL::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = EHL::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = EHL::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo EHL_1x4x8::hwInfo = { &EHL::platform, &EHL::featureTable, &EHL::workaroundTable, &EHL_1x4x8::gtSystemInfo, EHL::capabilityTable, }; GT_SYSTEM_INFO EHL_1x4x8::gtSystemInfo = {0}; void EHL_1x4x8::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * EHL::threadsPerEu; gtSysInfo->SliceCount = 1; gtSysInfo->L3CacheSizeInKb = 1280; gtSysInfo->L3BankCount = 4; gtSysInfo->MaxFillRate = 8; gtSysInfo->TotalVsThreads = 224; gtSysInfo->TotalHsThreads = 224; gtSysInfo->TotalDsThreads = 224; gtSysInfo->TotalGsThreads = 224; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = EHL::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = EHL::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = EHL::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo EHL_1x4x6::hwInfo = { &EHL::platform, &EHL::featureTable, &EHL::workaroundTable, &EHL_1x4x6::gtSystemInfo, EHL::capabilityTable, }; GT_SYSTEM_INFO EHL_1x4x6::gtSystemInfo = {0}; void EHL_1x4x6::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * EHL::threadsPerEu; gtSysInfo->SliceCount = 1; gtSysInfo->L3CacheSizeInKb = 1280; gtSysInfo->L3BankCount = 4; gtSysInfo->MaxFillRate = 8; gtSysInfo->TotalVsThreads = 168; gtSysInfo->TotalHsThreads = 168; gtSysInfo->TotalDsThreads = 168; gtSysInfo->TotalGsThreads = 168; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = EHL::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = EHL::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = EHL::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo EHL::hwInfo = EHL_1x4x8::hwInfo; const uint64_t EHL::defaultHardwareInfoConfig = 0x100040008; void setupEHLHardwareInfoImpl(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable, uint64_t hwInfoConfig) { if (hwInfoConfig == 0x100040008) { EHL_1x4x8::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x100040006) { EHL_1x4x6::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x100040004) { EHL_1x4x4::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x100020004) { EHL_1x2x4::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x0) { // Default config EHL_1x4x8::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else { UNRECOVERABLE_IF(true); } } void (*EHL::setupHardwareInfo)(HardwareInfo *, bool, uint64_t) = setupEHLHardwareInfoImpl; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen11/hw_info_gen11.cpp000066400000000000000000000005401363734646600241770ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #ifdef SUPPORT_ICLLP #include "hw_info_icllp.inl" #endif #ifdef SUPPORT_LKF #include "hw_info_lkf.inl" #endif #ifdef SUPPORT_EHL #include "hw_info_ehl.inl" #endif namespace NEO { const char *GfxFamilyMapper::name = "Gen11"; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen11/hw_info_icllp.inl000066400000000000000000000225151363734646600243750ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen11/hw_cmds.h" #include "shared/source/memory_manager/memory_constants.h" #include "opencl/source/aub_mem_dump/aub_services.h" #include "engine_node.h" namespace NEO { const char *HwMapper::abbreviation = "icllp"; bool isSimulationICLLP(unsigned short deviceId) { switch (deviceId) { case IICL_LP_GT1_MOB_DEVICE_F0_ID: return true; } return false; }; const PLATFORM ICLLP::platform = { IGFX_ICELAKE_LP, PCH_UNKNOWN, IGFX_GEN11_CORE, IGFX_GEN11_CORE, PLATFORM_NONE, // default init 0, // usDeviceID 0, // usRevId. 0 sets the stepping to A0 0, // usDeviceID_PCH 0, // usRevId_PCH GTTYPE_UNDEFINED}; const RuntimeCapabilityTable ICLLP::capabilityTable{ EngineDirectSubmissionInitVec{ {aub_stream::ENGINE_RCS, {true, true}}}, // directSubmissionEngines {0, 0, 0, false, false, false}, // kmdNotifyProperties MemoryConstants::max48BitAddress, // gpuAddressSpace 83.333, // defaultProfilingTimerResolution MemoryConstants::pageSize, // requiredPreemptionSurfaceSize &isSimulationICLLP, // isSimulation PreemptionMode::MidThread, // defaultPreemptionMode aub_stream::ENGINE_RCS, // defaultEngineType 0, // maxRenderFrequency 21, // clVersionSupport CmdServicesMemTraceVersion::DeviceValues::Icllp, // aubDeviceId 1, // extraQuantityThreadsPerEU 64, // slmSize sizeof(ICLLP::GRF), // grfSize false, // blitterOperationsSupported true, // ftrSupportsInteger64BitAtomics false, // ftrSupportsFP64 false, // ftrSupports64BitMath true, // ftrSvm true, // ftrSupportsCoherency true, // ftrSupportsVmeAvcTextureSampler true, // ftrSupportsVmeAvcPreemption false, // ftrRenderCompressedBuffers false, // ftrRenderCompressedImages false, // ftr64KBpages true, // instrumentationEnabled true, // forceStatelessCompilationFor32Bit "lp", // platformType true, // sourceLevelDebuggerSupported true, // supportsVme false, // supportCacheFlushAfterWalker true, // supportsImages true, // supportsDeviceEnqueue true // hostPtrTrackingEnabled }; WorkaroundTable ICLLP::workaroundTable = {}; FeatureTable ICLLP::featureTable = {}; void ICLLP::setupFeatureAndWorkaroundTable(HardwareInfo *hwInfo) { FeatureTable *featureTable = &hwInfo->featureTable; WorkaroundTable *workaroundTable = &hwInfo->workaroundTable; featureTable->ftrL3IACoherency = true; featureTable->ftrPPGTT = true; featureTable->ftrSVM = true; featureTable->ftrIA32eGfxPTEs = true; featureTable->ftrStandardMipTailFormat = true; featureTable->ftrDisplayYTiling = true; featureTable->ftrTranslationTable = true; featureTable->ftrUserModeTranslationTable = true; featureTable->ftrTileMappedResource = true; featureTable->ftrEnableGuC = true; featureTable->ftrFbc = true; featureTable->ftrFbc2AddressTranslation = true; featureTable->ftrFbcBlitterTracking = true; featureTable->ftrFbcCpuTracking = true; featureTable->ftrTileY = true; featureTable->ftrAstcHdr2D = true; featureTable->ftrAstcLdr2D = true; featureTable->ftr3dMidBatchPreempt = true; featureTable->ftrGpGpuMidBatchPreempt = true; featureTable->ftrGpGpuMidThreadLevelPreempt = true; featureTable->ftrGpGpuThreadGroupLevelPreempt = true; featureTable->ftrPerCtxtPreemptionGranularityControl = true; workaroundTable->wa4kAlignUVOffsetNV12LinearSurface = true; workaroundTable->waReportPerfCountUseGlobalContextID = true; }; const HardwareInfo ICLLP_1x8x8::hwInfo = { &ICLLP::platform, &ICLLP::featureTable, &ICLLP::workaroundTable, &ICLLP_1x8x8::gtSystemInfo, ICLLP::capabilityTable, }; GT_SYSTEM_INFO ICLLP_1x8x8::gtSystemInfo = {0}; void ICLLP_1x8x8::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * ICLLP::threadsPerEu; gtSysInfo->SliceCount = 1; gtSysInfo->L3CacheSizeInKb = 3072; gtSysInfo->L3BankCount = 8; gtSysInfo->MaxFillRate = 16; gtSysInfo->TotalVsThreads = 336; gtSysInfo->TotalHsThreads = 336; gtSysInfo->TotalDsThreads = 336; gtSysInfo->TotalGsThreads = 336; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 5; gtSysInfo->MaxEuPerSubSlice = ICLLP::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = ICLLP::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = ICLLP::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo ICLLP_1x4x8::hwInfo = { &ICLLP::platform, &ICLLP::featureTable, &ICLLP::workaroundTable, &ICLLP_1x4x8::gtSystemInfo, ICLLP::capabilityTable, }; GT_SYSTEM_INFO ICLLP_1x4x8::gtSystemInfo = {0}; void ICLLP_1x4x8::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * ICLLP::threadsPerEu; gtSysInfo->SliceCount = 1; gtSysInfo->L3CacheSizeInKb = 2304; gtSysInfo->L3BankCount = 6; gtSysInfo->MaxFillRate = 8; gtSysInfo->TotalVsThreads = 364; gtSysInfo->TotalHsThreads = 224; gtSysInfo->TotalDsThreads = 364; gtSysInfo->TotalGsThreads = 224; gtSysInfo->TotalPsThreadsWindowerRange = 128; gtSysInfo->CsrSizeInMb = 5; gtSysInfo->MaxEuPerSubSlice = ICLLP::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = ICLLP::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = ICLLP::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo ICLLP_1x6x8::hwInfo = { &ICLLP::platform, &ICLLP::featureTable, &ICLLP::workaroundTable, &ICLLP_1x6x8::gtSystemInfo, ICLLP::capabilityTable, }; GT_SYSTEM_INFO ICLLP_1x6x8::gtSystemInfo = {0}; void ICLLP_1x6x8::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * ICLLP::threadsPerEu; gtSysInfo->SliceCount = 1; gtSysInfo->L3CacheSizeInKb = 2304; gtSysInfo->L3BankCount = 6; gtSysInfo->MaxFillRate = 8; gtSysInfo->TotalVsThreads = 364; gtSysInfo->TotalHsThreads = 224; gtSysInfo->TotalDsThreads = 364; gtSysInfo->TotalGsThreads = 224; gtSysInfo->TotalPsThreadsWindowerRange = 128; gtSysInfo->CsrSizeInMb = 5; gtSysInfo->MaxEuPerSubSlice = ICLLP::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = ICLLP::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = ICLLP::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo ICLLP::hwInfo = ICLLP_1x8x8::hwInfo; const uint64_t ICLLP::defaultHardwareInfoConfig = 0x100080008; void setupICLLPHardwareInfoImpl(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable, uint64_t hwInfoConfig) { if (hwInfoConfig == 0x100080008) { ICLLP_1x8x8::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x100040008) { ICLLP_1x4x8::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x100060008) { ICLLP_1x6x8::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x0) { // Default config ICLLP_1x8x8::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else { UNRECOVERABLE_IF(true); } } void (*ICLLP::setupHardwareInfo)(HardwareInfo *, bool, uint64_t) = setupICLLPHardwareInfoImpl; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen11/hw_info_lkf.inl000066400000000000000000000151301363734646600240410ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen11/hw_cmds_lkf.h" #include "shared/source/memory_manager/memory_constants.h" #include "opencl/source/aub_mem_dump/aub_services.h" #include "engine_node.h" namespace NEO { const char *HwMapper::abbreviation = "lkf"; bool isSimulationLKF(unsigned short deviceId) { switch (deviceId) { case ILKF_1x8x8_DESK_DEVICE_F0_ID: return true; } return false; }; const PLATFORM LKF::platform = { IGFX_LAKEFIELD, PCH_UNKNOWN, IGFX_GEN11_CORE, IGFX_GEN11_CORE, PLATFORM_NONE, // default init 0, // usDeviceID 0, // usRevId. 0 sets the stepping to A0 0, // usDeviceID_PCH 0, // usRevId_PCH GTTYPE_UNDEFINED}; const RuntimeCapabilityTable LKF::capabilityTable{ EngineDirectSubmissionInitVec{ {aub_stream::ENGINE_RCS, {true, true}}}, // directSubmissionEngines {0, 0, 0, false, false, false}, // kmdNotifyProperties MemoryConstants::max36BitAddress, // gpuAddressSpace 83.333, // defaultProfilingTimerResolution MemoryConstants::pageSize, // requiredPreemptionSurfaceSize &isSimulationLKF, // isSimulation PreemptionMode::MidThread, // defaultPreemptionMode aub_stream::ENGINE_RCS, // defaultEngineType 0, // maxRenderFrequency 12, // clVersionSupport CmdServicesMemTraceVersion::DeviceValues::Lkf, // aubDeviceId 1, // extraQuantityThreadsPerEU 64, // slmSize sizeof(LKF::GRF), // grfSize false, // blitterOperationsSupported false, // ftrSupportsInteger64BitAtomics false, // ftrSupportsFP64 false, // ftrSupports64BitMath false, // ftrSvm true, // ftrSupportsCoherency false, // ftrSupportsVmeAvcTextureSampler false, // ftrSupportsVmeAvcPreemption false, // ftrRenderCompressedBuffers false, // ftrRenderCompressedImages true, // ftr64KBpages true, // instrumentationEnabled true, // forceStatelessCompilationFor32Bit "lp", // platformType true, // sourceLevelDebuggerSupported false, // supportsVme false, // supportCacheFlushAfterWalker true, // supportsImages true, // supportsDeviceEnqueue true // hostPtrTrackingEnabled }; WorkaroundTable LKF::workaroundTable = {}; FeatureTable LKF::featureTable = {}; void LKF::setupFeatureAndWorkaroundTable(HardwareInfo *hwInfo) { FeatureTable *featureTable = &hwInfo->featureTable; WorkaroundTable *workaroundTable = &hwInfo->workaroundTable; featureTable->ftrL3IACoherency = true; featureTable->ftrPPGTT = true; featureTable->ftrSVM = true; featureTable->ftrIA32eGfxPTEs = true; featureTable->ftrStandardMipTailFormat = true; featureTable->ftrDisplayYTiling = true; featureTable->ftrTranslationTable = true; featureTable->ftrUserModeTranslationTable = true; featureTable->ftrTileMappedResource = true; featureTable->ftrEnableGuC = true; featureTable->ftrFbc = true; featureTable->ftrFbc2AddressTranslation = true; featureTable->ftrFbcBlitterTracking = true; featureTable->ftrFbcCpuTracking = true; featureTable->ftrTileY = true; featureTable->ftrAstcHdr2D = true; featureTable->ftrAstcLdr2D = true; featureTable->ftr3dMidBatchPreempt = true; featureTable->ftrGpGpuMidBatchPreempt = true; featureTable->ftrGpGpuMidThreadLevelPreempt = true; featureTable->ftrGpGpuThreadGroupLevelPreempt = true; featureTable->ftrPerCtxtPreemptionGranularityControl = true; workaroundTable->wa4kAlignUVOffsetNV12LinearSurface = true; workaroundTable->waReportPerfCountUseGlobalContextID = true; }; const HardwareInfo LKF_1x8x8::hwInfo = { &LKF::platform, &LKF::featureTable, &LKF::workaroundTable, &LKF_1x8x8::gtSystemInfo, LKF::capabilityTable, }; GT_SYSTEM_INFO LKF_1x8x8::gtSystemInfo = {0}; void LKF_1x8x8::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * LKF::threadsPerEu; gtSysInfo->SliceCount = 1; gtSysInfo->L3CacheSizeInKb = 2560; gtSysInfo->L3BankCount = 8; gtSysInfo->MaxFillRate = 16; gtSysInfo->TotalVsThreads = 448; gtSysInfo->TotalHsThreads = 448; gtSysInfo->TotalDsThreads = 448; gtSysInfo->TotalGsThreads = 448; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = LKF::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = LKF::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = LKF::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo LKF::hwInfo = LKF_1x8x8::hwInfo; const uint64_t LKF::defaultHardwareInfoConfig = 0x100080008; void setupLKFHardwareInfoImpl(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable, uint64_t hwInfoConfig) { if (hwInfoConfig == 0x100080008) { LKF_1x8x8::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x0) { // Default config LKF_1x8x8::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else { UNRECOVERABLE_IF(true); } } void (*LKF::setupHardwareInfo)(HardwareInfo *, bool, uint64_t) = setupLKFHardwareInfoImpl; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen11/image_gen11.cpp000066400000000000000000000030311363734646600236260ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen11/hw_cmds_base.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/mem_obj/image.inl" #include namespace NEO { typedef ICLFamily Family; static auto gfxCore = IGFX_GEN11_CORE; template void ImageHw::setMediaSurfaceRotation(void *memory) { using MEDIA_SURFACE_STATE = typename GfxFamily::MEDIA_SURFACE_STATE; using SURFACE_FORMAT = typename MEDIA_SURFACE_STATE::SURFACE_FORMAT; auto surfaceState = reinterpret_cast(memory); surfaceState->setRotation(MEDIA_SURFACE_STATE::ROTATION_NO_ROTATION_OR_0_DEGREE); surfaceState->setXOffset(0); surfaceState->setYOffset(0); } template void ImageHw::setSurfaceMemoryObjectControlStateIndexToMocsTable(void *memory, uint32_t value) { using MEDIA_SURFACE_STATE = typename GfxFamily::MEDIA_SURFACE_STATE; using SURFACE_FORMAT = typename MEDIA_SURFACE_STATE::SURFACE_FORMAT; auto surfaceState = reinterpret_cast(memory); surfaceState->setSurfaceMemoryObjectControlStateIndexToMocsTables(value); } template <> void ImageHw::appendSurfaceStateParams(RENDER_SURFACE_STATE *surfaceState) { if (hasAlphaChannel(&imageFormat)) { surfaceState->setSampleTapDiscardDisable(RENDER_SURFACE_STATE::SAMPLE_TAP_DISCARD_DISABLE_ENABLE); } } #include "opencl/source/mem_obj/image_factory_init.inl" } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen11/linux/000077500000000000000000000000001363734646600222075ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/gen11/linux/command_stream_receiver_gen11.cpp000066400000000000000000000011501363734646600305600ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.inl" #include "opencl/source/os_interface/linux/device_command_stream.inl" #include "opencl/source/os_interface/linux/drm_command_stream.inl" #include "opencl/source/os_interface/linux/drm_command_stream_bdw_plus.inl" namespace NEO { template class DeviceCommandStreamReceiver; template class DrmCommandStreamReceiver; template class CommandStreamReceiverWithAUBDump>; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen11/linux/hw_info_config_ehl.inl000066400000000000000000000010601363734646600265160ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/hw_info_config.h" namespace NEO { template <> int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) { if (nullptr == osIface) { return 0; } GT_SYSTEM_INFO *gtSystemInfo = &hwInfo->gtSystemInfo; gtSystemInfo->SliceCount = 1; return 0; } template class HwInfoConfigHw; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen11/linux/hw_info_config_gen11.cpp000066400000000000000000000006171363734646600266700ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/hw_info_config.inl" #include "shared/source/os_interface/hw_info_config_bdw_plus.inl" #ifdef SUPPORT_ICLLP #include "hw_info_config_icllp.inl" #endif #ifdef SUPPORT_LKF #include "hw_info_config_lkf.inl" #endif #ifdef SUPPORT_EHL #include "hw_info_config_ehl.inl" #endif compute-runtime-20.13.16352/opencl/source/gen11/linux/hw_info_config_icllp.inl000066400000000000000000000010571363734646600270570ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/hw_info_config.h" namespace NEO { template <> int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) { if (nullptr == osIface) { return 0; } GT_SYSTEM_INFO *gtSystemInfo = &hwInfo->gtSystemInfo; gtSystemInfo->SliceCount = 1; return 0; } template class HwInfoConfigHw; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen11/linux/hw_info_config_lkf.inl000066400000000000000000000010551363734646600265260ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/hw_info_config.h" namespace NEO { template <> int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) { if (nullptr == osIface) { return 0; } GT_SYSTEM_INFO *gtSystemInfo = &hwInfo->gtSystemInfo; gtSystemInfo->SliceCount = 1; return 0; } template class HwInfoConfigHw; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen11/sampler_gen11.cpp000066400000000000000000000005761363734646600242220ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen11/hw_cmds.h" #include "opencl/source/sampler/sampler.h" #include "opencl/source/sampler/sampler.inl" namespace NEO { typedef ICLFamily Family; static auto gfxCore = IGFX_GEN11_CORE; #include "opencl/source/sampler/sampler_factory_init.inl" } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen11/scheduler_builtin_kernel.inl000066400000000000000000000055661363734646600266340ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "scheduler_definitions.h" uint GetPatchValueForSLMSize(uint slMsize) { uint PatchValue = 0; if (slMsize == 0) { PatchValue = 0; } else if (slMsize <= (1 * 1024)) { PatchValue = 1; } else if (slMsize <= (2 * 1024)) { PatchValue = 2; } else if (slMsize <= (4 * 1024)) { PatchValue = 3; } else if (slMsize <= (8 * 1024)) { PatchValue = 4; } else if (slMsize <= (16 * 1024)) { PatchValue = 5; } else if (slMsize <= (32 * 1024)) { PatchValue = 6; } else if (slMsize <= (64 * 1024)) { PatchValue = 7; } return PatchValue; } //on SKL we have pipe control in pairs, therefore when we NOOP we need to do this for both pipe controls void NOOPCSStallPipeControl(__global uint *secondaryBatchBuffer, uint dwordOffset, uint pipeControlOffset) { dwordOffset += pipeControlOffset; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; } //on SKL+ with mid thread preemption we need to have 2 pipe controls instead of 1 any time we do post sync operation void PutCSStallPipeControl(__global uint *secondaryBatchBuffer, uint dwordOffset, uint pipeControlOffset) { dwordOffset += pipeControlOffset; //first pipe control doing CS stall secondaryBatchBuffer[dwordOffset] = PIPE_CONTROL_CSTALL_DWORD0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = PIPE_CONTROL_CSTALL_DWORD1; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; //second pipe control , doing actual timestamp write secondaryBatchBuffer[dwordOffset] = PIPE_CONTROL_CSTALL_DWORD0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = PIPE_CONTROL_CSTALL_DWORD1; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; } compute-runtime-20.13.16352/opencl/source/gen11/scheduler_definitions.h000066400000000000000000000154751363734646600256060ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #define SCHEDULER_COMPILATION_SIZE 8 #define SIZEOF_INTERFACE_DESCRIPTOR_DATA_G11 32 #define NUMBER_OF_INERFACE_DESCRIPTORS 64 #define IDT_BREAKDOWN (NUMBER_OF_INERFACE_DESCRIPTORS - 2) #define MAX_WKG_SIZE 448 #define INTERFACE_DESCRIPTOR_TABLE_SIZE_G11 (NUMBER_OF_INERFACE_DESCRIPTORS * SIZEOF_INTERFACE_DESCRIPTOR_DATA_G11) #define SIZEOF_COLOR_CALCULATOR_STATE_G11 0xC0 #define INTERFACE_DESCRIPTOR_TABLE_START_ADDRESS_G11 SIZEOF_COLOR_CALCULATOR_STATE_G11 #define OCLRT_SIZEOF_SAMPLER_STATE_G11 (16) #define SIZEOF_COLOR_CALCULATOR_STATE SIZEOF_COLOR_CALCULATOR_STATE_G11 #define SIZEOF_INTERFACE_DESCRIPTOR_DATA SIZEOF_INTERFACE_DESCRIPTOR_DATA_G11 #define INTERFACE_DESCRIPTOR_TABLE_SIZE INTERFACE_DESCRIPTOR_TABLE_SIZE_G11 #define INTERFACE_DESCRIPTOR_TABLE_START_ADDRESS INTERFACE_DESCRIPTOR_TABLE_START_ADDRESS_G11 #define OCLRT_SIZEOF_SAMPLER_STATE OCLRT_SIZEOF_SAMPLER_STATE_G11 #define SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE (SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE_GEN11) #define SECOND_LEVEL_BUFFER_NUMBER_OF_ENQUEUES (SECOND_LEVEL_BUFFER_NUMBER_OF_ENQUEUES_GEN11) //#define OCLRT_MEDIA_VFE_STATE_OFFSET ( MEDIA_STATE_FLUSH_INITIAL_OFFSET + OCLRT_SIZEOF_MSFLUSH_DWORD ) #define OCLRT_MEDIA_VFE_STATE_OFFSET (0) //address is QWORD in size and starts on DWORD 1 #define MEDIA_VFE_STATE_ADDRESS_OFFSET (OCLRT_MEDIA_VFE_STATE_OFFSET + 1) // DWORD OFFSET #define MEDIA_STATE_FLUSH_INITIAL_OFFSET 0 //bits 0-5 of 1st DWORD #define MEDIA_STATE_FLUSH_INITIAL_INTERFACE_DESCRIPTOR_OFFSET (MEDIA_STATE_FLUSH_INITIAL_OFFSET + 1) #define MI_ARB_CHECK_AFTER_MEDIA_STATE_FLUSH_INITIAL_OFFSET 0 #define MI_ATOMIC_CMD_OFFSET 0 #define MEDIA_INTERFACE_DESCRIPTOR_LOAD_OFFSET (MEDIA_STATE_FLUSH_INITIAL_OFFSET + OCLRT_SIZEOF_MSFLUSH_DWORD) // DWORD OFFSET of InterfaceDescriptor Length // bits 0 - 16 #define MEDIA_INTERFACE_DESCRIPTOR_LOAD_INTERFACEDESCRIPTORLENGTH_OFFSET (MEDIA_INTERFACE_DESCRIPTOR_LOAD_OFFSET + 2) // DWORD OFFSET of Interface Descriptor Start Address #define MEDIA_INTERFACE_DESCRIPTOR_LOAD_INTERFACEDESCRIPTORSTARTADDRESS_OFFSET (MEDIA_INTERFACE_DESCRIPTOR_LOAD_OFFSET + 3) #define PIPE_CONTROL_FOR_TIMESTAMP_START_OFFSET (MEDIA_INTERFACE_DESCRIPTOR_LOAD_OFFSET + OCLRT_SIZEOF_MEDIA_INTERFACE_DESCRIPTOR_LOAD_DEVICE_CMD_DWORD_OFFSET) #define INTERFACE_DESCRIPTOR_SAMPLER_STATE_TABLE_DWORD 3 #define INTERFACE_DESCRIPTOR_BINDING_TABLE_POINTER_DWORD 4 #define INTERFACE_DESCRIPTOR_CONSTANT_URB_ENTRY_READ_OFFSET 5 #define INTERFACE_DESCRIPTOR_HWTHREADS_NUMBER_DWORD 6 #define INTERFACE_DESCRIPTOR_SLMSIZE_DWORD 6 #define INTERFACE_DESCRIPTOR_HWTHREADS_UPPER_BIT 9 #define SAMPLER_STATE_INDIRECT_STATE_MASK (0x7FFFFC0) #define SAMPLER_STATE_BORDER_COLOR_MASK (0xFFFFFFE0) #define SAMPLER_STATE_DESCRIPTOR_BORDER_COLOR_POINTER_DWORD 2 #define GPGPU_WALKER_OFFSET (PIPE_CONTROL_FOR_TIMESTAMP_START_OFFSET + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G11_DWORD_OFFSET) // DWORD OFFSET of the Interface Descriptor Offset for GPGPU_WALKER // bits 0 - 5 #define GPGPU_WALKER_INTERFACE_DESCRIPTOR_ID_OFFSET (GPGPU_WALKER_OFFSET + 1) // DWORD OFFSET of the Indirect data length Offset for GPGPU_WALKER // bits 0 - 16 #define GPGPU_WALKER_INDIRECT_DATA_LENGTH_OFFSET (GPGPU_WALKER_OFFSET + 2) // DWORD OFFSET of the Indirect Start Address for GPGPU_WALKER #define GPGPU_WALKER_INDIRECT_START_ADDRESS_OFFSET (GPGPU_WALKER_OFFSET + 3) // DWORD OFFSET of the Thread Width Counter Maximum for GPGPU_WALKER // bits 0 - 5 #define GPGPU_WALKER_THREAD_WIDTH_DWORD (GPGPU_WALKER_OFFSET + 4) // DWORD OFFSET of the Thread Height Counter Maximum for GPGPU_WALKER // bits 8 - 13 #define GPGPU_WALKER_THREAD_HEIGHT_DWORD (GPGPU_WALKER_OFFSET + 4) // DWORD OFFSET of the Thread Depth Counter Maximum for GPGPU_WALKER // bits 16 - 21 #define GPGPU_WALKER_THREAD_DEPTH_DWORD (GPGPU_WALKER_OFFSET + 4) // DWORD OFFSET of the SIMD Size for GPGPU_WALKER // bits 30 - 31 #define GPGPU_WALKER_SIMDSIZE_DWORD (GPGPU_WALKER_OFFSET + 4) // DWORD OFFSET of the Starting in X pos for GPGPU_WALKER //bits 0 - 31 #define GPGPU_WALKER_GROUP_ID_START_X (GPGPU_WALKER_OFFSET + 5) // DWORD OFFSET of the X Dimension for GPGPU_WALKER #define GPGPU_WALKER_XDIM_DWORD (GPGPU_WALKER_OFFSET + 7) // DWORD OFFSET of the Starting in Y pos for GPGPU_WALKER //bits 0 - 31 #define GPGPU_WALKER_GROUP_ID_START_Y (GPGPU_WALKER_OFFSET + 8) // DWORD OFFSET of the Y Dimension for GPGPU_WALKER #define GPGPU_WALKER_YDIM_DWORD (GPGPU_WALKER_OFFSET + 10) // DWORD OFFSET of the Starting in Z pos for GPGPU_WALKER //bits 0 - 31 #define GPGPU_WALKER_GROUP_ID_START_Z (GPGPU_WALKER_OFFSET + 11) // DWORD OFFSET of the X Dimension for GPGPU_WALKER #define GPGPU_WALKER_ZDIM_DWORD (GPGPU_WALKER_OFFSET + 12) // DWORD OFFSET of the Right or X Mask for GPGPU_WALKER #define GPGPU_WALKER_XMASK_DWORD (GPGPU_WALKER_OFFSET + 13) // DWORD OFFSET of the Bottom or Y Mask for GPGPU_WALKER #define GPGPU_WALKER_YMASK_DWORD (GPGPU_WALKER_OFFSET + 14) #define MEDIA_STATE_FLUSH_OFFSET (GPGPU_WALKER_OFFSET + OCLRT_GPGPU_WALKER_CMD_DEVICE_CMD_G11_DWORD_OFFSET) //bits 0-5 of 1st DWORD of M_S_F command #define MEDIA_STATE_FLUSH_INTERFACE_DESCRIPTOR_OFFSET (MEDIA_STATE_FLUSH_OFFSET + 1) #define PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET (MEDIA_STATE_FLUSH_OFFSET + OCLRT_SIZEOF_MSFLUSH_DWORD) #define PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET_TO_PATCH (PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET) #define PIPE_CONTROL_POST_SYNC_DWORD 1 #define PIPE_CONTROL_POST_SYNC_START_BIT 14 #define PIPE_CONTROL_POST_SYNC_END_BIT 15 #define PIPE_CONTROL_GENERATE_TIME_STAMP 3 #define PIPE_CONTROL_NO_POSTSYNC_OPERATION 0 #define PIPE_CONTROL_ADDRESS_FIELD_DWORD 2 #define PIPE_CONTROL_PROFILING_START_TIMESTAMP_ADDRESS_OFFSET (PIPE_CONTROL_FOR_TIMESTAMP_START_OFFSET + PIPE_CONTROL_ADDRESS_FIELD_DWORD) //DWORD 2 #define PIPE_CONTROL_GRAPHICS_ADDRESS_START_BIT 2 #define PIPE_CONTROL_GRAPHICS_ADDRESS_END_BIT 31 #define PIPE_CONTROL_GRAPHICS_ADDRESS_HIGH_START_BIT 0 #define PIPE_CONTROL_GRAPHICS_ADDRESS_HIGH_END_BIT 15 #define PIPE_CONTROL_TIME_STAMP_DWORD0 0x7A000004 #define PIPE_CONTROL_TIME_STAMP_DWORD1 0x0010C4A4 #define PIPE_CONTROL_CSTALL_DWORD0 0x7A000004 #define PIPE_CONTROL_CSTALL_DWORD1 0x001004A4 #define PIPE_CONTROL_TAG_WRITE_DWORD0 0x7A000004 #define PIPE_CONTROL_TAG_WRITE_DWORD1 0x001044A4 // the value of g_cInitMiBatchBufferStartCmdG11 DWORD0 #define OCLRT_BATCH_BUFFER_BEGIN_CMD_DWORD0 (0x18800101) #if defined WA_LRI_COMMANDS_EXIST #define IMM_LOAD_REGISTER_FOR_ENABLE_PREEMPTION_OFFSET (PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G11_DWORD_OFFSET + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G11_DWORD_OFFSET) #endif #define OCLRT_LOAD_REGISTER_IMM_CMD 0x11000001 #define CTXT_PREMP_DBG_ADDRESS_VALUE 0x2248 #define CTXT_PREMP_ON_MI_ARB_CHECK_ONLY 0x00000100 #define CTXT_PREMP_DEFAULT_VALUE 0x0 #define IMM_LOAD_REGISTER_ADDRESS_DWORD_OFFSET 1 #define IMM_LOAD_REGISTER_VALUE_DWORD_OFFSET 2 compute-runtime-20.13.16352/opencl/source/gen11/state_base_address_gen11.cpp000066400000000000000000000004301363734646600263630ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/state_base_address.h" #include "shared/source/helpers/state_base_address_bdw_plus.inl" namespace NEO { template struct StateBaseAddressHelper; } compute-runtime-20.13.16352/opencl/source/gen11/tbx_command_stream_receiver_gen11.cpp000066400000000000000000000024041363734646600303010ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen11/hw_cmds.h" #include "shared/source/helpers/array_count.h" #include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.inl" #include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h" #include "opencl/source/command_stream/tbx_command_stream_receiver_hw.inl" #include "opencl/source/helpers/base_object.h" namespace NEO { typedef ICLFamily Family; static auto gfxCore = IGFX_GEN11_CORE; template <> uint32_t TbxCommandStreamReceiverHw::getMaskAndValueForPollForCompletion() const { return 0x80; } template <> bool TbxCommandStreamReceiverHw::getpollNotEqualValueForPollForCompletion() const { return true; } template <> void populateFactoryTable>() { extern TbxCommandStreamReceiverCreateFunc tbxCommandStreamReceiverFactory[IGFX_MAX_CORE]; UNRECOVERABLE_IF(!isInRange(gfxCore, tbxCommandStreamReceiverFactory)); tbxCommandStreamReceiverFactory[gfxCore] = TbxCommandStreamReceiverHw::create; } template class TbxCommandStreamReceiverHw; template class CommandStreamReceiverWithAUBDump>; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen11/windows/000077500000000000000000000000001363734646600225425ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/gen11/windows/command_stream_receiver_gen11.cpp000066400000000000000000000010521363734646600311140ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.inl" #include "opencl/source/os_interface/windows/device_command_stream.inl" #include "opencl/source/os_interface/windows/wddm_device_command_stream.inl" namespace NEO { template class DeviceCommandStreamReceiver; template class WddmCommandStreamReceiver; template class CommandStreamReceiverWithAUBDump>; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen11/windows/gmm_callbacks_gen11.cpp000066400000000000000000000005431363734646600270220ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen11/hw_cmds.h" #include "shared/source/helpers/windows/gmm_callbacks.h" #include "shared/source/helpers/windows/gmm_callbacks.inl" using namespace NEO; template struct DeviceCallbacks; template struct TTCallbacks; compute-runtime-20.13.16352/opencl/source/gen11/windows/hw_info_config_gen11.cpp000066400000000000000000000017261363734646600272250ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/hw_info_config.inl" #include "shared/source/os_interface/hw_info_config_bdw_plus.inl" namespace NEO { #ifdef SUPPORT_ICLLP template <> int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) { return 0; } template class HwInfoConfigHw; #endif #ifdef SUPPORT_LKF template <> int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) { return 0; } template class HwInfoConfigHw; #endif #ifdef SUPPORT_EHL template <> int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) { return 0; } template class HwInfoConfigHw; #endif } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen12lp/000077500000000000000000000000001363734646600214055ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/gen12lp/CMakeLists.txt000066400000000000000000000002051363734646600241420ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_GEN12LP) add_subdirectories() endif() compute-runtime-20.13.16352/opencl/source/gen12lp/additional_files_gen12lp.cmake000066400000000000000000000013731363734646600272350ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_ADDITIONAL_FILES_GEN12LP ${CMAKE_CURRENT_SOURCE_DIR}/gen12lp/helpers_gen12lp.h ${CMAKE_CURRENT_SOURCE_DIR}/gen12lp/definitions${BRANCH_DIR_SUFFIX}/command_queue_helpers_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/gen12lp/definitions${BRANCH_DIR_SUFFIX}/hardware_commands_helper_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/gen12lp${BRANCH_DIR_SUFFIX}/helpers_gen12lp.cpp ) include_directories(${NEO_SOURCE_DIR}/opencl/source/gen12lp/definitions${BRANCH_DIR_SUFFIX}/) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_ADDITIONAL_FILES_GEN12LP}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_ADDITIONAL_FILES_GEN12LP ${RUNTIME_SRCS_ADDITIONAL_FILES_GEN12LP})compute-runtime-20.13.16352/opencl/source/gen12lp/aub_command_stream_receiver_gen12lp.cpp000066400000000000000000000026321363734646600311500ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/array_count.h" #include "shared/source/memory_manager/memory_pool.h" #include "opencl/source/aub_mem_dump/aub_alloc_dump.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw_bdw_plus.inl" #include "opencl/source/helpers/base_object.h" namespace NEO { typedef TGLLPFamily Family; static auto gfxCore = IGFX_GEN12LP_CORE; template <> constexpr uint32_t AUBCommandStreamReceiverHw::getMaskAndValueForPollForCompletion() { return 0x00008000; } template <> void AUBCommandStreamReceiverHw::addContextToken(uint32_t dumpHandle) { AUB::createContext(*stream, dumpHandle); } template <> void populateFactoryTable>() { extern AubCommandStreamReceiverCreateFunc aubCommandStreamReceiverFactory[IGFX_MAX_CORE]; UNRECOVERABLE_IF(!isInRange(gfxCore, aubCommandStreamReceiverFactory)); aubCommandStreamReceiverFactory[gfxCore] = AUBCommandStreamReceiverHw::create; } template <> uint32_t AUBCommandStreamReceiverHw::getGUCWorkQueueItemHeader() { if (aub_stream::ENGINE_CCS == osContext->getEngineType()) { return 0x00030401; } return 0x00030001; } template class AUBCommandStreamReceiverHw; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen12lp/aub_mapper.h000066400000000000000000000013331363734646600236710ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/memory_constants.h" #include "opencl/source/gen_common/aub_mapper_base.h" #include "engine_node.h" namespace NEO { struct TGLLPFamily; template <> struct AUBFamilyMapper { enum { device = AubMemDump::DeviceValues::Tgllp }; using AubTraits = AubMemDump::Traits; static const AubMemDump::LrcaHelper *const csTraits[aub_stream::NUM_ENGINES]; static const MMIOList globalMMIO; static const MMIOList *perEngineMMIO[aub_stream::NUM_ENGINES]; typedef AubMemDump::AubDump AUB; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen12lp/aub_mem_dump_gen12lp.cpp000066400000000000000000000244161363734646600261020ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/completion_stamp.h" #include "shared/source/helpers/hw_helper.h" #include "opencl/source/aub_mem_dump/aub_alloc_dump.inl" #include "opencl/source/aub_mem_dump/aub_mem_dump.inl" #include "aub_mapper.h" #include "config.h" #include "reg_configs_common.h" namespace AubMemDump { enum { device = DeviceValues::Tgllp }; // Instantiate these common template implementations. template struct AubDump>; template struct AubDump>; template struct AubPageTableHelper32>; template struct AubPageTableHelper64>; } // namespace AubMemDump namespace NEO { using Family = TGLLPFamily; static const AubMemDump::LrcaHelperRcs rcs(0x002000); static const AubMemDump::LrcaHelperBcs bcs(0x022000); static const AubMemDump::LrcaHelperVcs vcs(0x1c0000); static const AubMemDump::LrcaHelperVecs vecs(0x1c8000); static const AubMemDump::LrcaHelperCcs ccs(0x1a000); const AubMemDump::LrcaHelper *const AUBFamilyMapper::csTraits[aub_stream::NUM_ENGINES] = { &rcs, &bcs, &vcs, &vecs, &ccs}; const MMIOList AUBFamilyMapper::globalMMIO = { // GLOBAL_MOCS MMIOPair(0x00004000, 0x00000008), MMIOPair(0x00004004, 0x00000038), MMIOPair(0x00004008, 0x00000038), MMIOPair(0x0000400C, 0x00000008), MMIOPair(0x00004010, 0x00000018), MMIOPair(0x00004014, 0x00060038), MMIOPair(0x00004018, 0x00000000), MMIOPair(0x0000401C, 0x00000033), MMIOPair(0x00004020, 0x00060037), MMIOPair(0x00004024, 0x0000003B), MMIOPair(0x00004028, 0x00000032), MMIOPair(0x0000402C, 0x00000036), MMIOPair(0x00004030, 0x0000003A), MMIOPair(0x00004034, 0x00000033), MMIOPair(0x00004038, 0x00000037), MMIOPair(0x0000403C, 0x0000003B), MMIOPair(0x00004040, 0x00000030), MMIOPair(0x00004044, 0x00000034), MMIOPair(0x00004048, 0x00000038), MMIOPair(0x0000404C, 0x00000031), MMIOPair(0x00004050, 0x00000032), MMIOPair(0x00004054, 0x00000036), MMIOPair(0x00004058, 0x0000003A), MMIOPair(0x0000405C, 0x00000033), MMIOPair(0x00004060, 0x00000037), MMIOPair(0x00004064, 0x0000003B), MMIOPair(0x00004068, 0x00000032), MMIOPair(0x0000406C, 0x00000036), MMIOPair(0x00004070, 0x0000003A), MMIOPair(0x00004074, 0x00000033), MMIOPair(0x00004078, 0x00000037), MMIOPair(0x0000407C, 0x0000003B), MMIOPair(0x00004080, 0x00000030), MMIOPair(0x00004084, 0x00000034), MMIOPair(0x00004088, 0x00000038), MMIOPair(0x0000408C, 0x00000031), MMIOPair(0x00004090, 0x00000032), MMIOPair(0x00004094, 0x00000036), MMIOPair(0x00004098, 0x0000003A), MMIOPair(0x0000409C, 0x00000033), MMIOPair(0x000040A0, 0x00000037), MMIOPair(0x000040A4, 0x0000003B), MMIOPair(0x000040A8, 0x00000032), MMIOPair(0x000040AC, 0x00000036), MMIOPair(0x000040B0, 0x0000003A), MMIOPair(0x000040B4, 0x00000033), MMIOPair(0x000040B8, 0x00000037), MMIOPair(0x000040BC, 0x0000003B), MMIOPair(0x000040C0, 0x00000038), MMIOPair(0x000040C4, 0x00000034), MMIOPair(0x000040C8, 0x00000038), MMIOPair(0x000040CC, 0x00000031), MMIOPair(0x000040D0, 0x00000032), MMIOPair(0x000040D4, 0x00000036), MMIOPair(0x000040D8, 0x0000003A), MMIOPair(0x000040DC, 0x00000033), MMIOPair(0x000040E0, 0x00000037), MMIOPair(0x000040E4, 0x0000003B), MMIOPair(0x000040E8, 0x00000032), MMIOPair(0x000040EC, 0x00000036), MMIOPair(0x000040F0, 0x00000038), MMIOPair(0x000040F4, 0x00000038), MMIOPair(0x000040F8, 0x00000038), MMIOPair(0x000040FC, 0x00000038), // LNCF_MOCS MMIOPair(0x0000B020, 0x00300010), MMIOPair(0x0000B024, 0x00300010), MMIOPair(0x0000B028, 0x00300030), MMIOPair(0x0000B02C, 0x00000000), MMIOPair(0x0000B030, 0x0030001F), MMIOPair(0x0000B034, 0x00170013), MMIOPair(0x0000B038, 0x0000001F), MMIOPair(0x0000B03C, 0x00000000), MMIOPair(0x0000B040, 0x00100000), MMIOPair(0x0000B044, 0x00170013), MMIOPair(0x0000B048, 0x0010001F), MMIOPair(0x0000B04C, 0x00170013), MMIOPair(0x0000B050, 0x0030001F), MMIOPair(0x0000B054, 0x00170013), MMIOPair(0x0000B058, 0x0000001F), MMIOPair(0x0000B05C, 0x00000000), MMIOPair(0x0000B060, 0x00100000), MMIOPair(0x0000B064, 0x00170013), MMIOPair(0x0000B068, 0x0010001F), MMIOPair(0x0000B06C, 0x00170013), MMIOPair(0x0000B070, 0x0030001F), MMIOPair(0x0000B074, 0x00170013), MMIOPair(0x0000B078, 0x0000001F), MMIOPair(0x0000B07C, 0x00000000), MMIOPair(0x0000B080, 0x00000030), MMIOPair(0x0000B084, 0x00170013), MMIOPair(0x0000B088, 0x0010001F), MMIOPair(0x0000B08C, 0x00170013), MMIOPair(0x0000B090, 0x0030001F), MMIOPair(0x0000B094, 0x00170013), MMIOPair(0x0000B098, 0x00300010), MMIOPair(0x0000B09C, 0x00300010), //PAT_INDEX MMIOPair(0x00004100, 0x0000000), MMIOPair(0x00004104, 0x0000000), MMIOPair(0x00004108, 0x0000000), MMIOPair(0x0000410c, 0x0000000), MMIOPair(0x00004110, 0x0000000), MMIOPair(0x00004114, 0x0000000), MMIOPair(0x00004118, 0x0000000), MMIOPair(0x0000411c, 0x0000000), MMIOPair(0x00004b80, 0xffff1001), //GACB_PERF_CTRL_REG MMIOPair(0x00007000, 0xffff0000), //CACHE_MODE_0 MMIOPair(0x00007004, 0xffff0000), //CACHE_MODE_1 MMIOPair(0x00009008, 0x00000200), //IDICR MMIOPair(0x0000900c, 0x00001b40), //SNPCR MMIOPair(0x0000b120, 0x14000002), //LTCDREG MMIOPair(0x00042080, 0x00000000), //CHICKEN_MISC_1 }; static const MMIOList mmioListRCS = { MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x00002058), 0x00000000), //CTX_WA_PTR_RCSUNIT MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000020a8), 0x00000000), //IMR MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x0000229c), 0xffff8280), //GFX_MODE MMIOPair(0x00002090, 0xffff0000), //CHICKEN_PWR_CTX_RASTER_1 MMIOPair(0x000020e0, 0xffff4000), //FF_SLICE_CS_CHICKEN1_RCSUNIT MMIOPair(0x000020e4, 0xffff0000), //FF_SLICE_CS_CHICKEN2_RCSUNIT MMIOPair(0x000020ec, 0xffff0051), //CS_DEBUG_MODE1 // FORCE_TO_NONPRIV MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024d0), 0x00007014), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024d4), 0x0000e48c), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024d8), 0x0000e18c), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024dc), 0x00004de0), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024e0), 0x00004de4), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024e4), 0x0000f180), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024e8), 0x0000e194), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024ec), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024f0), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024f4), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024f8), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024fc), 0x0000e000), MMIOPair(0x00002580, 0xffff0005), //CS_CHICKEN1 MMIOPair(0x0000e194, 0xffff0002), //CHICKEN_SAMPLER_2 MMIOPair(0x0000B134, 0xD0000020) //L3ALLOCREG }; static const MMIOList mmioListBCS = { MMIOPair(AubMemDump::computeRegisterOffset(bcs.mmioBase, 0x0000229c), 0xffff8280), //GFX_MODE }; static const MMIOList mmioListVCS = { MMIOPair(AubMemDump::computeRegisterOffset(vcs.mmioBase, 0x0000229c), 0xffff8280), //GFX_MODE }; static const MMIOList mmioListVECS = { MMIOPair(AubMemDump::computeRegisterOffset(vecs.mmioBase, 0x0000229c), 0xffff8280), //GFX_MODE }; static const MMIOList mmioListCCS = { MMIOPair(0x0000ce90, 0x00010001), //GFX_MULT_CTXT_CTL MMIOPair(0x00014800, 0x00010001), //RCU_MODE MMIOPair(AubMemDump::computeRegisterOffset(ccs.mmioBase, 0x0000229c), 0xffff8280), //GFX_MODE // FORCE_TO_NONPRIV MMIOPair(AubMemDump::computeRegisterOffset(ccs.mmioBase, 0x000024d0), 0x00007014), MMIOPair(AubMemDump::computeRegisterOffset(ccs.mmioBase, 0x000024d4), 0x0000e48c), MMIOPair(AubMemDump::computeRegisterOffset(ccs.mmioBase, 0x000024d8), 0x0000e18c), MMIOPair(AubMemDump::computeRegisterOffset(ccs.mmioBase, 0x000024dc), 0x00004de0), MMIOPair(AubMemDump::computeRegisterOffset(ccs.mmioBase, 0x000024e0), 0x00004de4), MMIOPair(AubMemDump::computeRegisterOffset(ccs.mmioBase, 0x000024e4), 0x0000f180), MMIOPair(AubMemDump::computeRegisterOffset(ccs.mmioBase, 0x000024e8), 0x0000e194), MMIOPair(AubMemDump::computeRegisterOffset(ccs.mmioBase, 0x000024ec), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(ccs.mmioBase, 0x000024f0), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(ccs.mmioBase, 0x000024f4), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(ccs.mmioBase, 0x000024f8), 0x0000e000), MMIOPair(AubMemDump::computeRegisterOffset(ccs.mmioBase, 0x000024fc), 0x0000e000), MMIOPair(0x0000B234, 0xD0000020) //L3ALLOCREG_CCS0 }; const MMIOList *AUBFamilyMapper::perEngineMMIO[aub_stream::NUM_ENGINES] = { &mmioListRCS, &mmioListBCS, &mmioListVCS, &mmioListVECS, &mmioListCCS}; } // namespace NEO namespace AubAllocDump { using namespace NEO; template SurfaceInfo *getDumpSurfaceInfo(GraphicsAllocation &gfxAllocation, DumpFormat dumpFormat); template uint32_t getImageSurfaceTypeFromGmmResourceType(GMM_RESOURCE_TYPE gmmResourceType); template void dumpBufferInBinFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpImageInBmpFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpBufferInTreFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpImageInTreFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpAllocation(DumpFormat dumpFormat, GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); } // namespace AubAllocDump compute-runtime-20.13.16352/opencl/source/gen12lp/buffer_gen12lp.cpp000066400000000000000000000005401363734646600247110ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_cmds.h" #include "opencl/source/mem_obj/buffer_bdw_plus.inl" namespace NEO { typedef TGLLPFamily Family; static auto gfxCore = IGFX_GEN12LP_CORE; #include "opencl/source/mem_obj/buffer_factory_init.inl" } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen12lp/command_queue_gen12lp.cpp000066400000000000000000000012241363734646600262620ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/command_queue_hw_bdw_plus.inl" #include "command_queue_helpers_gen12lp.inl" #include "enqueue_init_dispatch_globals.h" namespace NEO { typedef TGLLPFamily Family; static auto gfxCore = IGFX_GEN12LP_CORE; template <> void populateFactoryTable>() { extern CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE]; commandQueueFactory[gfxCore] = CommandQueueHw::create; } template class CommandQueueHw; } // namespace NEO command_stream_receiver_simulated_common_hw_gen12lp.cpp000066400000000000000000000031501363734646600343530ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/gen12lp/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_stream/command_stream_receiver_simulated_common_hw_bdw_plus.inl" #include "opencl/source/gen12lp/helpers_gen12lp.h" namespace NEO { typedef TGLLPFamily Family; template <> void CommandStreamReceiverSimulatedCommonHw::initGlobalMMIO() { for (auto &mmioPair : AUBFamilyMapper::globalMMIO) { stream->writeMMIO(mmioPair.first, mmioPair.second); } Gen12LPHelpers::initAdditionalGlobalMMIO(*this, *stream); } template <> uint64_t CommandStreamReceiverSimulatedCommonHw::getPPGTTAdditionalBits(GraphicsAllocation *gfxAllocation) { return BIT(PageTableEntry::presentBit) | BIT(PageTableEntry::writableBit) | Gen12LPHelpers::getPPGTTAdditionalBits(gfxAllocation); } template <> void CommandStreamReceiverSimulatedCommonHw::getGTTData(void *memory, AubGTTData &data) { data = {}; data.present = true; Gen12LPHelpers::adjustAubGTTData(*this, data); } template <> void CommandStreamReceiverSimulatedCommonHw::submitLRCA(const MiContextDescriptorReg &contextDescriptor) { auto mmioBase = getCsTraits(osContext->getEngineType()).mmioBase; stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2510), contextDescriptor.ulData[0]); stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2514), contextDescriptor.ulData[1]); // Load our new exec list stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2550), 1); } template class CommandStreamReceiverSimulatedCommonHw; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen12lp/definitions/000077500000000000000000000000001363734646600237205ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/gen12lp/definitions/command_queue_helpers_gen12lp.inl000066400000000000000000000002351363734646600323200ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/enqueue_resource_barrier.h" compute-runtime-20.13.16352/opencl/source/gen12lp/definitions/hardware_commands_helper_gen12lp.inl000066400000000000000000000001321363734646600327650ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ compute-runtime-20.13.16352/opencl/source/gen12lp/device_enqueue.h000066400000000000000000000016011363734646600245420ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "../execution_model/device_enqueue.h" // Uncomment this macro to build "empty" schedulers //#define WA_DISABLE_SCHEDULERS 1 #define OCLRT_GPGPU_WALKER_CMD_DEVICE_CMD_G12LP (15 * sizeof(uint)) #define OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G12LP (6 * sizeof(uint)) #define OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G12LP_DWORD_OFFSET (6) #define OCLRT_GPGPU_WALKER_CMD_DEVICE_CMD_G12LP_DWORD_OFFSET (15) #define SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE_GEN12LP (OCLRT_SIZEOF_MEDIA_STATE_FLUSH + OCLRT_SIZEOF_MEDIA_INTERFACE_DESCRIPTOR_LOAD_DEVICE_CMD + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G12LP + OCLRT_GPGPU_WALKER_CMD_DEVICE_CMD_G12LP + OCLRT_SIZEOF_MEDIA_STATE_FLUSH + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G12LP + CS_PREFETCH_SIZE) #define SECOND_LEVEL_BUFFER_NUMBER_OF_ENQUEUES_GEN12LP (128) compute-runtime-20.13.16352/opencl/source/gen12lp/device_queue_gen12lp.cpp000066400000000000000000000020401363734646600261000ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_cmds.h" #include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/source/device_queue/device_queue_hw_bdw_plus.inl" #include "opencl/source/device_queue/device_queue_hw_profiling.inl" namespace NEO { typedef TGLLPFamily Family; static auto gfxCore = IGFX_GEN12LP_CORE; template <> void populateFactoryTable>() { extern DeviceQueueCreateFunc deviceQueueFactory[IGFX_MAX_CORE]; deviceQueueFactory[gfxCore] = DeviceQueueHw::create; } template <> size_t DeviceQueueHw::getWaCommandsSize() { return 0; } template <> void DeviceQueueHw::addArbCheckCmdWa() {} template <> void DeviceQueueHw::addMiAtomicCmdWa(uint64_t atomicOpPlaceholder) {} template <> void DeviceQueueHw::addLriCmdWa(bool setArbCheck) {} template <> void DeviceQueueHw::addPipeControlCmdWa(bool isNoopCmd) {} template class DeviceQueueHw; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen12lp/enable_family_full_ocl_gen12lp.cpp000066400000000000000000000022201363734646600301030ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" #include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h" #include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/sampler/sampler.h" namespace NEO { typedef TGLLPFamily Family; struct EnableOCLGen12LP { EnableOCLGen12LP() { populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); } }; static EnableOCLGen12LP enable; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen12lp/experimental_command_buffer_gen12lp.cpp000066400000000000000000000021021363734646600311600ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/experimental_command_buffer.h" #include "shared/source/command_stream/experimental_command_buffer.inl" #include "shared/source/helpers/hw_helper.h" namespace NEO { typedef TGLLPFamily GfxFamily; template void ExperimentalCommandBuffer::injectBufferStart(LinearStream &parentStream, size_t cmdBufferOffset); template size_t ExperimentalCommandBuffer::getRequiredInjectionSize() noexcept; template size_t ExperimentalCommandBuffer::programExperimentalCommandBuffer(); template size_t ExperimentalCommandBuffer::getTotalExperimentalSize() noexcept; template void ExperimentalCommandBuffer::addTimeStampPipeControl(); template size_t ExperimentalCommandBuffer::getTimeStampPipeControlSize() noexcept; template void ExperimentalCommandBuffer::addExperimentalCommands(); template size_t ExperimentalCommandBuffer::getExperimentalCommandsSize() noexcept; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp000066400000000000000000000011431363734646600261270ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_info.h" #include "opencl/source/command_queue/gpgpu_walker_bdw_plus.inl" #include "opencl/source/command_queue/hardware_interface_bdw_plus.inl" namespace NEO { template class HardwareInterface; template <> void GpgpuWalkerHelper::adjustMiStoreRegMemMode(MI_STORE_REG_MEM *storeCmd) { storeCmd->setMmioRemapEnable(true); } template class GpgpuWalkerHelper; template struct EnqueueOperation; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen12lp/gtpin_setup_gen12lp.cpp000066400000000000000000000013471363734646600260070ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/gtpin/gtpin_hw_helper.h" #include "opencl/source/gtpin/gtpin_hw_helper.inl" #include "ocl_igc_shared/gtpin/gtpin_ocl_interface.h" namespace NEO { extern GTPinHwHelper *gtpinHwHelperFactory[IGFX_MAX_CORE]; typedef TGLLPFamily Family; static const auto gfxFamily = IGFX_GEN12LP_CORE; template <> uint32_t GTPinHwHelperHw::getGenVersion() { return gtpin::GTPIN_GEN_12_1; } template class GTPinHwHelperHw; struct GTPinEnableGen12LP { GTPinEnableGen12LP() { gtpinHwHelperFactory[gfxFamily] = >PinHwHelperHw::get(); } }; static GTPinEnableGen12LP gtpinEnable; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen12lp/hardware_commands_helper_gen12lp.cpp000066400000000000000000000026411363734646600304610ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "hardware_commands_helper_gen12lp.inl" #include "shared/source/gen12lp/hw_cmds.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/gen12lp/helpers_gen12lp.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/hardware_commands_helper.inl" #include "opencl/source/helpers/hardware_commands_helper_base.inl" namespace NEO { template <> size_t HardwareCommandsHelper::getSizeRequiredCS(const Kernel *kernel) { size_t size = 2 * sizeof(typename TGLLPFamily::MEDIA_STATE_FLUSH) + sizeof(typename TGLLPFamily::MEDIA_INTERFACE_DESCRIPTOR_LOAD); return size; } template <> bool HardwareCommandsHelper::doBindingTablePrefetch() { return false; } template <> bool HardwareCommandsHelper::isPipeControlWArequired(const HardwareInfo &hwInfo) { return (Gen12LPHelpers::pipeControlWaRequired(hwInfo.platform.eProductFamily)) && (hwInfo.platform.usRevId == REVISION_A0); } template <> bool HardwareCommandsHelper::isPipeControlPriorToPipelineSelectWArequired(const HardwareInfo &hwInfo) { return (Gen12LPHelpers::pipeControlWaRequired(hwInfo.platform.eProductFamily)) && (hwInfo.platform.usRevId == REVISION_A0); } template struct HardwareCommandsHelper; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen12lp/helpers_gen12lp.cpp000066400000000000000000000036431363734646600251110ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/gen12lp/helpers_gen12lp.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "opencl/source/command_stream/command_stream_receiver_simulated_common_hw.h" namespace NEO { namespace Gen12LPHelpers { bool pipeControlWaRequired(PRODUCT_FAMILY productFamily) { return (productFamily == PRODUCT_FAMILY::IGFX_TIGERLAKE_LP); } bool imagePitchAlignmentWaRequired(PRODUCT_FAMILY productFamily) { return (productFamily == PRODUCT_FAMILY::IGFX_TIGERLAKE_LP); } void adjustCoherencyFlag(PRODUCT_FAMILY productFamily, bool &coherencyFlag) {} bool isLocalMemoryEnabled(const HardwareInfo &hwInfo) { return false; } void initAdditionalGlobalMMIO(const CommandStreamReceiver &commandStreamReceiver, AubMemDump::AubStream &stream) {} uint64_t getPPGTTAdditionalBits(GraphicsAllocation *graphicsAllocation) { return 0; } void adjustAubGTTData(const CommandStreamReceiver &commandStreamReceiver, AubGTTData &data) {} void setAdditionalPipelineSelectFields(void *pipelineSelectCmd, const PipelineSelectArgs &pipelineSelectArgs, const HardwareInfo &hwInfo) {} bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) { return (hwInfo.platform.usRevId == REVISION_A0); } bool isForceDefaultRCSEngineWARequired(const HardwareInfo &hwInfo) { return ((hwInfo.platform.eProductFamily == IGFX_TIGERLAKE_LP) & (hwInfo.platform.usRevId == REVISION_A0)); } bool isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo) { return ((hwInfo.platform.eProductFamily == IGFX_TIGERLAKE_LP) & (hwInfo.platform.usRevId == REVISION_A0)); } bool is3DPipelineSelectWARequired(const HardwareInfo &hwInfo) { return hwInfo.platform.eProductFamily == IGFX_TIGERLAKE_LP; } } // namespace Gen12LPHelpers } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen12lp/helpers_gen12lp.h000066400000000000000000000026221363734646600245520ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/gen12lp/hw_cmds_base.h" namespace AubMemDump { struct AubStream; } struct AubGTTData; namespace NEO { class CommandStreamReceiver; class GraphicsAllocation; struct PipelineSelectArgs; class Image; namespace Gen12LPHelpers { bool pipeControlWaRequired(PRODUCT_FAMILY productFamily); bool imagePitchAlignmentWaRequired(PRODUCT_FAMILY productFamily); void adjustCoherencyFlag(PRODUCT_FAMILY productFamily, bool &coherencyFlag); bool isLocalMemoryEnabled(const HardwareInfo &hwInfo); void initAdditionalGlobalMMIO(const CommandStreamReceiver &commandStreamReceiver, AubMemDump::AubStream &stream); uint64_t getPPGTTAdditionalBits(GraphicsAllocation *graphicsAllocation); void adjustAubGTTData(const CommandStreamReceiver &commandStreamReceiver, AubGTTData &data); void setAdditionalPipelineSelectFields(void *pipelineSelectCmd, const PipelineSelectArgs &pipelineSelectArgs, const HardwareInfo &hwInfo); bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo); bool isForceDefaultRCSEngineWARequired(const HardwareInfo &hwInfo); bool isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo); bool is3DPipelineSelectWARequired(const HardwareInfo &hwInfo); } // namespace Gen12LPHelpers } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen12lp/hw_helper_gen12lp.cpp000066400000000000000000000130441363734646600254200ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_cmds.h" using Family = NEO::TGLLPFamily; #include "shared/source/helpers/flat_batch_buffer_helper_hw.inl" #include "shared/source/helpers/hw_helper_bdw_plus.inl" #include "shared/source/helpers/hw_helper_tgllp_plus.inl" #include "opencl/source/aub/aub_helper_bdw_plus.inl" #include "opencl/source/gen12lp/helpers_gen12lp.h" #include "engine_node.h" namespace NEO { template <> void HwHelperHw::setupHardwareCapabilities(HardwareCapabilities *caps, const HardwareInfo &hwInfo) { caps->image3DMaxHeight = 2048; caps->image3DMaxWidth = 2048; //With statefull messages we have an allocation cap of 4GB //Reason to subtract 8KB is that driver may pad the buffer with addition pages for over fetching.. caps->maxMemAllocSize = (4ULL * MemoryConstants::gigaByte) - (8ULL * MemoryConstants::kiloByte); caps->isStatelesToStatefullWithOffsetSupported = true; } template <> bool HwHelperHw::isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const { return Gen12LPHelpers::isOffsetToSkipSetFFIDGPWARequired(hwInfo); } template <> bool HwHelperHw::is3DPipelineSelectWARequired(const HardwareInfo &hwInfo) const { return Gen12LPHelpers::is3DPipelineSelectWARequired(hwInfo); } template <> bool HwHelperHw::isForceDefaultRCSEngineWARequired(const HardwareInfo &hwInfo) { return Gen12LPHelpers::isForceDefaultRCSEngineWARequired(hwInfo); } template <> bool HwHelperHw::isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo) { return Gen12LPHelpers::isForceEmuInt32DivRemSPWARequired(hwInfo); } template <> void HwHelperHw::adjustDefaultEngineType(HardwareInfo *pHwInfo) { if (!pHwInfo->featureTable.ftrCCSNode || isForceDefaultRCSEngineWARequired(*pHwInfo)) { pHwInfo->capabilityTable.defaultEngineType = aub_stream::ENGINE_RCS; } } template <> uint32_t HwHelperHw::getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const { /* For ICL+ maxThreadCount equals (EUCount * 8). ThreadCount/EUCount=7 is no longer valid, so we have to force 8 in below formula. This is required to allocate enough scratch space. */ return pHwInfo->gtSystemInfo.MaxSubSlicesSupported * pHwInfo->gtSystemInfo.MaxEuPerSubSlice * 8; } template <> bool HwHelperHw::isLocalMemoryEnabled(const HardwareInfo &hwInfo) const { return Gen12LPHelpers::isLocalMemoryEnabled(hwInfo); } template <> bool HwHelperHw::isPageTableManagerSupported(const HardwareInfo &hwInfo) const { return hwInfo.capabilityTable.ftrRenderCompressedBuffers || hwInfo.capabilityTable.ftrRenderCompressedImages; } template <> bool HwHelperHw::obtainRenderBufferCompressionPreference(const HardwareInfo &hwInfo, const size_t size) const { return false; } template <> bool HwHelperHw::checkResourceCompatibility(GraphicsAllocation &graphicsAllocation) { if (graphicsAllocation.getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) { return false; } return true; } template <> void HwHelperHw::setCapabilityCoherencyFlag(const HardwareInfo *pHwInfo, bool &coherencyFlag) { coherencyFlag = true; if (pHwInfo->platform.eProductFamily == IGFX_TIGERLAKE_LP && pHwInfo->platform.usRevId == 0x0) { //stepping A0 devices - turn off coherency coherencyFlag = false; } Gen12LPHelpers::adjustCoherencyFlag(pHwInfo->platform.eProductFamily, coherencyFlag); } template <> uint32_t HwHelperHw::getPitchAlignmentForImage(const HardwareInfo *hwInfo) { if (Gen12LPHelpers::imagePitchAlignmentWaRequired(hwInfo->platform.eProductFamily)) { auto stepping = hwInfo->platform.usRevId; if (stepping == 0) { return 64u; } return 4u; } return 4u; } template <> uint32_t HwHelperHw::getMetricsLibraryGenId() const { return static_cast(MetricsLibraryApi::ClientGen::Gen12); } template <> const HwHelper::EngineInstancesContainer HwHelperHw::getGpgpuEngineInstances(const HardwareInfo &hwInfo) const { auto defaultEngine = getChosenEngineType(hwInfo); EngineInstancesContainer engines = { aub_stream::ENGINE_RCS, aub_stream::ENGINE_RCS, // low priority defaultEngine // internal usage }; if (hwInfo.featureTable.ftrCCSNode) { engines.push_back(aub_stream::ENGINE_CCS); } return engines; }; template <> void MemorySynchronizationCommands::addPipeControlWA(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo) { if (Gen12LPHelpers::pipeControlWaRequired(hwInfo.platform.eProductFamily)) { auto stepping = hwInfo.platform.usRevId; if (stepping == 0) { auto pCmd = static_cast(commandStream.getSpace(sizeof(Family::PIPE_CONTROL))); *pCmd = Family::cmdInitPipeControl; pCmd->setCommandStreamerStallEnable(true); } } } template <> std::string HwHelperHw::getExtensions() const { return "cl_intel_subgroup_local_block_io "; } template <> void MemorySynchronizationCommands::setExtraCacheFlushFields(Family::PIPE_CONTROL *pipeControl) { pipeControl->setHdcPipelineFlush(true); pipeControl->setConstantCacheInvalidationEnable(false); } template class AubHelperHw; template class HwHelperHw; template class FlatBatchBufferHelperHw; template struct MemorySynchronizationCommands; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen12lp/hw_info_gen12lp.cpp000066400000000000000000000003721363734646600250740ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #ifdef SUPPORT_TGLLP #include "hw_info_tgllp.inl" #endif namespace NEO { const char *GfxFamilyMapper::name = "Gen12LP"; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen12lp/hw_info_tgllp.inl000066400000000000000000000210631363734646600247460ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_cmds.h" #include "shared/source/memory_manager/memory_constants.h" #include "opencl/source/aub_mem_dump/aub_services.h" #include "engine_node.h" namespace NEO { const char *HwMapper::abbreviation = "tgllp"; bool isSimulationTGLLP(unsigned short deviceId) { switch (deviceId) { case IGEN12LP_GT1_MOB_DEVICE_F0_ID: return true; } return false; }; const PLATFORM TGLLP::platform = { IGFX_TIGERLAKE_LP, PCH_UNKNOWN, IGFX_GEN12LP_CORE, IGFX_GEN12LP_CORE, PLATFORM_NONE, // default init 0, // usDeviceID 0, // usRevId. 0 sets the stepping to A0 0, // usDeviceID_PCH 0, // usRevId_PCH GTTYPE_UNDEFINED}; const RuntimeCapabilityTable TGLLP::capabilityTable{ EngineDirectSubmissionInitVec{ {aub_stream::ENGINE_RCS, {true, true}}, {aub_stream::ENGINE_CCS, {true, true}}}, // directSubmissionEngines {0, 0, 0, false, false, false}, // kmdNotifyProperties MemoryConstants::max64BitAppAddress, // gpuAddressSpace 83.333, // defaultProfilingTimerResolution MemoryConstants::pageSize, // requiredPreemptionSurfaceSize &isSimulationTGLLP, // isSimulation PreemptionMode::ThreadGroup, // defaultPreemptionMode aub_stream::ENGINE_CCS, // defaultEngineType 0, // maxRenderFrequency 21, // clVersionSupport CmdServicesMemTraceVersion::DeviceValues::Tgllp, // aubDeviceId 1, // extraQuantityThreadsPerEU 64, // slmSize sizeof(TGLLP::GRF), // grfSize false, // blitterOperationsSupported true, // ftrSupportsInteger64BitAtomics false, // ftrSupportsFP64 false, // ftrSupports64BitMath true, // ftrSvm true, // ftrSupportsCoherency false, // ftrSupportsVmeAvcTextureSampler false, // ftrSupportsVmeAvcPreemption false, // ftrRenderCompressedBuffers false, // ftrRenderCompressedImages true, // instrumentationEnabled true, // forceStatelessCompilationFor32Bit true, // ftr64KBpages "lp", // platformType true, // sourceLevelDebuggerSupported false, // supportsVme false, // supportCacheFlushAfterWalker true, // supportsImages true, // supportsDeviceEnqueue false // hostPtrTrackingEnabled }; WorkaroundTable TGLLP::workaroundTable = {}; FeatureTable TGLLP::featureTable = {}; void TGLLP::setupFeatureAndWorkaroundTable(HardwareInfo *hwInfo) { FeatureTable *featureTable = &hwInfo->featureTable; WorkaroundTable *workaroundTable = &hwInfo->workaroundTable; featureTable->ftrL3IACoherency = true; featureTable->ftrPPGTT = true; featureTable->ftrSVM = true; featureTable->ftrIA32eGfxPTEs = true; featureTable->ftrStandardMipTailFormat = true; featureTable->ftrTranslationTable = true; featureTable->ftrUserModeTranslationTable = true; featureTable->ftrTileMappedResource = true; featureTable->ftrEnableGuC = true; featureTable->ftrFbc = true; featureTable->ftrFbc2AddressTranslation = true; featureTable->ftrFbcBlitterTracking = true; featureTable->ftrFbcCpuTracking = true; featureTable->ftrTileY = true; featureTable->ftrAstcHdr2D = true; featureTable->ftrAstcLdr2D = true; featureTable->ftr3dMidBatchPreempt = true; featureTable->ftrGpGpuMidBatchPreempt = true; featureTable->ftrGpGpuThreadGroupLevelPreempt = true; featureTable->ftrPerCtxtPreemptionGranularityControl = true; workaroundTable->wa4kAlignUVOffsetNV12LinearSurface = true; workaroundTable->waEnablePreemptionGranularityControlByUMD = true; workaroundTable->waUntypedBufferCompression = true; }; const HardwareInfo TGLLP_1x6x16::hwInfo = { &TGLLP::platform, &TGLLP::featureTable, &TGLLP::workaroundTable, &TGLLP_1x6x16::gtSystemInfo, TGLLP::capabilityTable, }; GT_SYSTEM_INFO TGLLP_1x6x16::gtSystemInfo = {0}; void TGLLP_1x6x16::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * TGLLP::threadsPerEu; gtSysInfo->SliceCount = 1; gtSysInfo->DualSubSliceCount = 6; gtSysInfo->L3CacheSizeInKb = 3840; gtSysInfo->L3BankCount = 8; gtSysInfo->MaxFillRate = 16; gtSysInfo->TotalVsThreads = 336; gtSysInfo->TotalHsThreads = 336; gtSysInfo->TotalDsThreads = 336; gtSysInfo->TotalGsThreads = 336; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = TGLLP::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = TGLLP::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = TGLLP::maxSubslicesSupported; gtSysInfo->MaxDualSubSlicesSupported = TGLLP::maxDualSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; gtSysInfo->CCSInfo.IsValid = true; gtSysInfo->CCSInfo.NumberOfCCSEnabled = 1; gtSysInfo->CCSInfo.Instances.CCSEnableMask = 0b1; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo TGLLP_1x2x16::hwInfo = { &TGLLP::platform, &TGLLP::featureTable, &TGLLP::workaroundTable, &TGLLP_1x2x16::gtSystemInfo, TGLLP::capabilityTable, }; GT_SYSTEM_INFO TGLLP_1x2x16::gtSystemInfo = {0}; void TGLLP_1x2x16::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * TGLLP::threadsPerEu; gtSysInfo->SliceCount = 1; gtSysInfo->DualSubSliceCount = 2; gtSysInfo->L3CacheSizeInKb = 1920; gtSysInfo->L3BankCount = 4; gtSysInfo->MaxFillRate = 16; gtSysInfo->TotalVsThreads = 224; gtSysInfo->TotalHsThreads = 224; gtSysInfo->TotalDsThreads = 224; gtSysInfo->TotalGsThreads = 224; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = TGLLP::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = TGLLP::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = TGLLP::maxSubslicesSupported; gtSysInfo->MaxDualSubSlicesSupported = TGLLP::maxDualSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; gtSysInfo->CCSInfo.IsValid = true; gtSysInfo->CCSInfo.NumberOfCCSEnabled = 1; gtSysInfo->CCSInfo.Instances.CCSEnableMask = 0b1; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo TGLLP::hwInfo = TGLLP_1x6x16::hwInfo; const uint64_t TGLLP::defaultHardwareInfoConfig = 0x100060010; void setupTGLLPHardwareInfoImpl(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable, uint64_t hwInfoConfig) { if (hwInfoConfig == 0x100060010) { TGLLP_1x6x16::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x100020010) { TGLLP_1x2x16::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x0) { // Default config TGLLP_1x6x16::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else { UNRECOVERABLE_IF(true); } } void (*TGLLP::setupHardwareInfo)(HardwareInfo *, bool, uint64_t) = setupTGLLPHardwareInfoImpl; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen12lp/image_gen12lp.cpp000066400000000000000000000026351363734646600245310ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_cmds.h" #include "opencl/source/gen12lp/helpers_gen12lp.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/mem_obj/image.inl" namespace NEO { typedef TGLLPFamily Family; static auto gfxCore = IGFX_GEN12LP_CORE; template void ImageHw::setMediaSurfaceRotation(void *memory) { using MEDIA_SURFACE_STATE = typename GfxFamily::MEDIA_SURFACE_STATE; using SURFACE_FORMAT = typename MEDIA_SURFACE_STATE::SURFACE_FORMAT; auto surfaceState = reinterpret_cast(memory); surfaceState->setRotation(MEDIA_SURFACE_STATE::ROTATION_NO_ROTATION_OR_0_DEGREE); surfaceState->setXOffset(0); surfaceState->setYOffset(0); } template void ImageHw::setSurfaceMemoryObjectControlStateIndexToMocsTable(void *memory, uint32_t value) { using MEDIA_SURFACE_STATE = typename GfxFamily::MEDIA_SURFACE_STATE; using SURFACE_FORMAT = typename MEDIA_SURFACE_STATE::SURFACE_FORMAT; auto surfaceState = reinterpret_cast(memory); surfaceState->setSurfaceMemoryObjectControlStateIndexToMocsTables(value); } // clang-format off #include "opencl/source/mem_obj/image_tgllp_plus.inl" #include "opencl/source/mem_obj/image_factory_init.inl" // clang-format on } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen12lp/linux/000077500000000000000000000000001363734646600225445ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/gen12lp/linux/command_stream_receiver_gen12lp.cpp000066400000000000000000000011561363734646600314600ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.inl" #include "opencl/source/os_interface/linux/device_command_stream.inl" #include "opencl/source/os_interface/linux/drm_command_stream.inl" #include "opencl/source/os_interface/linux/drm_command_stream_bdw_plus.inl" namespace NEO { template class DeviceCommandStreamReceiver; template class DrmCommandStreamReceiver; template class CommandStreamReceiverWithAUBDump>; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen12lp/linux/hw_info_config_gen12lp.cpp000066400000000000000000000004271363734646600275610ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/hw_info_config.inl" #include "shared/source/os_interface/hw_info_config_bdw_plus.inl" #ifdef SUPPORT_TGLLP #include "hw_info_config_tgllp.inl" #endif compute-runtime-20.13.16352/opencl/source/gen12lp/linux/hw_info_config_tgllp.inl000066400000000000000000000010621363734646600274270ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/hw_info_config.h" namespace NEO { template <> int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) { if (nullptr == osIface) { return 0; } GT_SYSTEM_INFO *gtSystemInfo = &hwInfo->gtSystemInfo; gtSystemInfo->SliceCount = 1; return 0; } template class HwInfoConfigHw; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen12lp/sampler_gen12lp.cpp000066400000000000000000000004271363734646600251070ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_cmds_base.h" using Family = NEO::TGLLPFamily; constexpr static auto gfxCore = IGFX_GEN12LP_CORE; #include "opencl/source/sampler/sampler_tgllp_plus.inl" compute-runtime-20.13.16352/opencl/source/gen12lp/scheduler_builtin_kernel.inl000066400000000000000000000057571363734646600271730ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "scheduler_definitions.h" uint GetPatchValueForSLMSize(uint slMsize) { //todo: veryfy this optimization : //return ( SLMSize == 0 ) ? 0 : max( 33 - clz( ( SLMSize - 1 ) >> 10 ), 7 ); uint PatchValue = 0; if (slMsize == 0) { PatchValue = 0; } else if (slMsize <= (1 * 1024)) { PatchValue = 1; } else if (slMsize <= (2 * 1024)) { PatchValue = 2; } else if (slMsize <= (4 * 1024)) { PatchValue = 3; } else if (slMsize <= (8 * 1024)) { PatchValue = 4; } else if (slMsize <= (16 * 1024)) { PatchValue = 5; } else if (slMsize <= (32 * 1024)) { PatchValue = 6; } else if (slMsize <= (64 * 1024)) { PatchValue = 7; } return PatchValue; } //on SKL we have pipe control in pairs, therefore when we NOOP we need to do this for both pipe controls void NOOPCSStallPipeControl(__global uint *secondaryBatchBuffer, uint dwordOffset, uint pipeControlOffset) { dwordOffset += pipeControlOffset; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; } //on SKL+ with mid thread preemption we need to have 2 pipe controls instead of 1 any time we do post sync operation void PutCSStallPipeControl(__global uint *secondaryBatchBuffer, uint dwordOffset, uint pipeControlOffset) { dwordOffset += pipeControlOffset; //first pipe control doing CS stall secondaryBatchBuffer[dwordOffset] = PIPE_CONTROL_CSTALL_DWORD0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = PIPE_CONTROL_CSTALL_DWORD1; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; //second pipe control , doing actual timestamp write secondaryBatchBuffer[dwordOffset] = PIPE_CONTROL_CSTALL_DWORD0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = PIPE_CONTROL_CSTALL_DWORD1; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; } compute-runtime-20.13.16352/opencl/source/gen12lp/scheduler_definitions.h000066400000000000000000000156541363734646600261420ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #ifdef _DUMMY_WA #endif //POPULATE DEFINES WITH CORRECT VALUES FOR GEN12LP #define SCHEDULER_COMPILATION_SIZE 8 #define SIZEOF_INTERFACE_DESCRIPTOR_DATA_G12LP 32 #define NUMBER_OF_INERFACE_DESCRIPTORS 64 #define IDT_BREAKDOWN (NUMBER_OF_INERFACE_DESCRIPTORS - 2) #define MAX_WKG_SIZE 448 #define INTERFACE_DESCRIPTOR_TABLE_SIZE_G12LP (NUMBER_OF_INERFACE_DESCRIPTORS * SIZEOF_INTERFACE_DESCRIPTOR_DATA_G12LP) #define SIZEOF_COLOR_CALCULATOR_STATE_G12LP 0xC0 #define INTERFACE_DESCRIPTOR_TABLE_START_ADDRESS_G12LP SIZEOF_COLOR_CALCULATOR_STATE_G12LP #define OCLRT_SIZEOF_SAMPLER_STATE_G12LP (16) #define SIZEOF_COLOR_CALCULATOR_STATE SIZEOF_COLOR_CALCULATOR_STATE_G12LP #define SIZEOF_INTERFACE_DESCRIPTOR_DATA SIZEOF_INTERFACE_DESCRIPTOR_DATA_G12LP #define INTERFACE_DESCRIPTOR_TABLE_SIZE INTERFACE_DESCRIPTOR_TABLE_SIZE_G12LP #define INTERFACE_DESCRIPTOR_TABLE_START_ADDRESS INTERFACE_DESCRIPTOR_TABLE_START_ADDRESS_G12LP #define OCLRT_SIZEOF_SAMPLER_STATE OCLRT_SIZEOF_SAMPLER_STATE_G12LP #define SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE (SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE_GEN12LP) #define SECOND_LEVEL_BUFFER_NUMBER_OF_ENQUEUES (SECOND_LEVEL_BUFFER_NUMBER_OF_ENQUEUES_GEN12LP) //#define OCLRT_MEDIA_VFE_STATE_OFFSET ( MEDIA_STATE_FLUSH_INITIAL_OFFSET + OCLRT_SIZEOF_MSFLUSH_DWORD ) #define OCLRT_MEDIA_VFE_STATE_OFFSET (0) //address is QWORD in size and starts on DWORD 1 #define MEDIA_VFE_STATE_ADDRESS_OFFSET (OCLRT_MEDIA_VFE_STATE_OFFSET + 1) // DWORD OFFSET #define MEDIA_STATE_FLUSH_INITIAL_OFFSET 0 //bits 0-5 of 1st DWORD #define MEDIA_STATE_FLUSH_INITIAL_INTERFACE_DESCRIPTOR_OFFSET (MEDIA_STATE_FLUSH_INITIAL_OFFSET + 1) #define MI_ARB_CHECK_AFTER_MEDIA_STATE_FLUSH_INITIAL_OFFSET 0 #define MI_ATOMIC_CMD_OFFSET 0 #define MEDIA_INTERFACE_DESCRIPTOR_LOAD_OFFSET (MEDIA_STATE_FLUSH_INITIAL_OFFSET + OCLRT_SIZEOF_MSFLUSH_DWORD) // DWORD OFFSET of InterfaceDescriptor Length // bits 0 - 16 #define MEDIA_INTERFACE_DESCRIPTOR_LOAD_INTERFACEDESCRIPTORLENGTH_OFFSET (MEDIA_INTERFACE_DESCRIPTOR_LOAD_OFFSET + 2) // DWORD OFFSET of Interface Descriptor Start Address #define MEDIA_INTERFACE_DESCRIPTOR_LOAD_INTERFACEDESCRIPTORSTARTADDRESS_OFFSET (MEDIA_INTERFACE_DESCRIPTOR_LOAD_OFFSET + 3) #define PIPE_CONTROL_FOR_TIMESTAMP_START_OFFSET (MEDIA_INTERFACE_DESCRIPTOR_LOAD_OFFSET + OCLRT_SIZEOF_MEDIA_INTERFACE_DESCRIPTOR_LOAD_DEVICE_CMD_DWORD_OFFSET) #define INTERFACE_DESCRIPTOR_SAMPLER_STATE_TABLE_DWORD 3 #define INTERFACE_DESCRIPTOR_BINDING_TABLE_POINTER_DWORD 4 #define INTERFACE_DESCRIPTOR_CONSTANT_URB_ENTRY_READ_OFFSET 5 #define INTERFACE_DESCRIPTOR_HWTHREADS_NUMBER_DWORD 6 #define INTERFACE_DESCRIPTOR_SLMSIZE_DWORD 6 #define INTERFACE_DESCRIPTOR_HWTHREADS_UPPER_BIT 9 #define SAMPLER_STATE_INDIRECT_STATE_MASK (0x7FFFFC0) #define SAMPLER_STATE_BORDER_COLOR_MASK (0xFFFFFFE0) #define SAMPLER_STATE_DESCRIPTOR_BORDER_COLOR_POINTER_DWORD 2 #define GPGPU_WALKER_OFFSET (PIPE_CONTROL_FOR_TIMESTAMP_START_OFFSET + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G12LP_DWORD_OFFSET) // DWORD OFFSET of the Interface Descriptor Offset for GPGPU_WALKER // bits 0 - 5 #define GPGPU_WALKER_INTERFACE_DESCRIPTOR_ID_OFFSET (GPGPU_WALKER_OFFSET + 1) // DWORD OFFSET of the Indirect data length Offset for GPGPU_WALKER // bits 0 - 16 #define GPGPU_WALKER_INDIRECT_DATA_LENGTH_OFFSET (GPGPU_WALKER_OFFSET + 2) // DWORD OFFSET of the Indirect Start Address for GPGPU_WALKER #define GPGPU_WALKER_INDIRECT_START_ADDRESS_OFFSET (GPGPU_WALKER_OFFSET + 3) // DWORD OFFSET of the Thread Width Counter Maximum for GPGPU_WALKER // bits 0 - 5 #define GPGPU_WALKER_THREAD_WIDTH_DWORD (GPGPU_WALKER_OFFSET + 4) // DWORD OFFSET of the Thread Height Counter Maximum for GPGPU_WALKER // bits 8 - 13 #define GPGPU_WALKER_THREAD_HEIGHT_DWORD (GPGPU_WALKER_OFFSET + 4) // DWORD OFFSET of the Thread Depth Counter Maximum for GPGPU_WALKER // bits 16 - 21 #define GPGPU_WALKER_THREAD_DEPTH_DWORD (GPGPU_WALKER_OFFSET + 4) // DWORD OFFSET of the SIMD Size for GPGPU_WALKER // bits 30 - 31 #define GPGPU_WALKER_SIMDSIZE_DWORD (GPGPU_WALKER_OFFSET + 4) // DWORD OFFSET of the Starting in X pos for GPGPU_WALKER //bits 0 - 31 #define GPGPU_WALKER_GROUP_ID_START_X (GPGPU_WALKER_OFFSET + 5) // DWORD OFFSET of the X Dimension for GPGPU_WALKER #define GPGPU_WALKER_XDIM_DWORD (GPGPU_WALKER_OFFSET + 7) // DWORD OFFSET of the Starting in Y pos for GPGPU_WALKER //bits 0 - 31 #define GPGPU_WALKER_GROUP_ID_START_Y (GPGPU_WALKER_OFFSET + 8) // DWORD OFFSET of the Y Dimension for GPGPU_WALKER #define GPGPU_WALKER_YDIM_DWORD (GPGPU_WALKER_OFFSET + 10) // DWORD OFFSET of the Starting in Z pos for GPGPU_WALKER //bits 0 - 31 #define GPGPU_WALKER_GROUP_ID_START_Z (GPGPU_WALKER_OFFSET + 11) // DWORD OFFSET of the X Dimension for GPGPU_WALKER #define GPGPU_WALKER_ZDIM_DWORD (GPGPU_WALKER_OFFSET + 12) // DWORD OFFSET of the Right or X Mask for GPGPU_WALKER #define GPGPU_WALKER_XMASK_DWORD (GPGPU_WALKER_OFFSET + 13) // DWORD OFFSET of the Bottom or Y Mask for GPGPU_WALKER #define GPGPU_WALKER_YMASK_DWORD (GPGPU_WALKER_OFFSET + 14) #define MEDIA_STATE_FLUSH_OFFSET (GPGPU_WALKER_OFFSET + OCLRT_GPGPU_WALKER_CMD_DEVICE_CMD_G12LP_DWORD_OFFSET) //bits 0-5 of 1st DWORD of M_S_F command #define MEDIA_STATE_FLUSH_INTERFACE_DESCRIPTOR_OFFSET (MEDIA_STATE_FLUSH_OFFSET + 1) #define PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET (MEDIA_STATE_FLUSH_OFFSET + OCLRT_SIZEOF_MSFLUSH_DWORD) #define PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET_TO_PATCH (PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET) #define PIPE_CONTROL_POST_SYNC_DWORD 1 #define PIPE_CONTROL_POST_SYNC_START_BIT 14 #define PIPE_CONTROL_POST_SYNC_END_BIT 15 #define PIPE_CONTROL_GENERATE_TIME_STAMP 3 #define PIPE_CONTROL_NO_POSTSYNC_OPERATION 0 #define PIPE_CONTROL_ADDRESS_FIELD_DWORD 2 #define PIPE_CONTROL_PROFILING_START_TIMESTAMP_ADDRESS_OFFSET (PIPE_CONTROL_FOR_TIMESTAMP_START_OFFSET + PIPE_CONTROL_ADDRESS_FIELD_DWORD) //DWORD 2 #define PIPE_CONTROL_GRAPHICS_ADDRESS_START_BIT 2 #define PIPE_CONTROL_GRAPHICS_ADDRESS_END_BIT 31 #define PIPE_CONTROL_GRAPHICS_ADDRESS_HIGH_START_BIT 0 #define PIPE_CONTROL_GRAPHICS_ADDRESS_HIGH_END_BIT 15 #define PIPE_CONTROL_TIME_STAMP_DWORD0 0x7A000004 #define PIPE_CONTROL_TIME_STAMP_DWORD1 0x0010C4A4 #define PIPE_CONTROL_CSTALL_DWORD0 0x7A000004 #define PIPE_CONTROL_CSTALL_DWORD1 0x001004A4 #define PIPE_CONTROL_TAG_WRITE_DWORD0 0x7A000004 #define PIPE_CONTROL_TAG_WRITE_DWORD1 0x001044A4 // the value of g_cInitMiBatchBufferStartCmdG12 DWORD0 #define OCLRT_BATCH_BUFFER_BEGIN_CMD_DWORD0 (0x18800101) #if defined WA_LRI_COMMANDS_EXIST #define IMM_LOAD_REGISTER_FOR_ENABLE_PREEMPTION_OFFSET (PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G12LP_DWORD_OFFSET + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G12LP_DWORD_OFFSET) #endif #define OCLRT_LOAD_REGISTER_IMM_CMD 0x11000001 #define CTXT_PREMP_DBG_ADDRESS_VALUE 0x2248 #define CTXT_PREMP_ON_MI_ARB_CHECK_ONLY 0x00000100 #define CTXT_PREMP_DEFAULT_VALUE 0x0 #define IMM_LOAD_REGISTER_ADDRESS_DWORD_OFFSET 1 #define IMM_LOAD_REGISTER_VALUE_DWORD_OFFSET 2 compute-runtime-20.13.16352/opencl/source/gen12lp/state_base_address_gen12lp.cpp000066400000000000000000000004321363734646600272570ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/state_base_address.h" #include "shared/source/helpers/state_base_address_bdw_plus.inl" namespace NEO { template struct StateBaseAddressHelper; } compute-runtime-20.13.16352/opencl/source/gen12lp/state_compute_mode_helper_gen12lp.cpp000066400000000000000000000005651363734646600306660ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/state_compute_mode_helper.h" namespace NEO { template <> bool StateComputeModeHelper::isStateComputeModeRequired(const CsrSizeRequestFlags &csrSizeRequestFlags, bool isThreadArbitionPolicyProgrammed) { return false; } } // namespace NEOcompute-runtime-20.13.16352/opencl/source/gen12lp/tbx_command_stream_receiver_gen12lp.cpp000066400000000000000000000025651363734646600312030ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_cmds.h" #include "shared/source/helpers/array_count.h" #include "shared/source/memory_manager/memory_pool.h" #include "opencl/source/aub_mem_dump/aub_mem_dump.h" #include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.inl" #include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h" #include "opencl/source/command_stream/tbx_command_stream_receiver_hw.inl" #include "opencl/source/helpers/base_object.h" namespace NEO { typedef TGLLPFamily Family; static auto gfxCore = IGFX_GEN12LP_CORE; template <> uint32_t TbxCommandStreamReceiverHw::getMaskAndValueForPollForCompletion() const { return 0x80; } template <> bool TbxCommandStreamReceiverHw::getpollNotEqualValueForPollForCompletion() const { return true; } template <> void populateFactoryTable>() { extern TbxCommandStreamReceiverCreateFunc tbxCommandStreamReceiverFactory[IGFX_MAX_CORE]; UNRECOVERABLE_IF(!isInRange(gfxCore, tbxCommandStreamReceiverFactory)); tbxCommandStreamReceiverFactory[gfxCore] = TbxCommandStreamReceiverHw::create; } template class TbxCommandStreamReceiverHw; template class CommandStreamReceiverWithAUBDump>; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen12lp/windows/000077500000000000000000000000001363734646600230775ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/gen12lp/windows/command_stream_receiver_gen12lp.cpp000066400000000000000000000010601363734646600320050ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.inl" #include "opencl/source/os_interface/windows/device_command_stream.inl" #include "opencl/source/os_interface/windows/wddm_device_command_stream.inl" namespace NEO { template class DeviceCommandStreamReceiver; template class WddmCommandStreamReceiver; template class CommandStreamReceiverWithAUBDump>; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen12lp/windows/gmm_callbacks_gen12lp.cpp000066400000000000000000000004741363734646600277170ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_cmds.h" #include "opencl/source/helpers/windows/gmm_callbacks_tgllp_plus.inl" using namespace NEO; template struct DeviceCallbacks; template struct TTCallbacks; compute-runtime-20.13.16352/opencl/source/gen12lp/windows/hw_info_config_gen12lp.cpp000066400000000000000000000020761363734646600301160ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/hw_info_config.inl" #include "shared/source/os_interface/hw_info_config_bdw_plus.inl" namespace NEO { #ifdef SUPPORT_TGLLP template <> int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) { hwInfo->capabilityTable.ftrRenderCompressedImages = hwInfo->featureTable.ftrE2ECompression; hwInfo->capabilityTable.ftrRenderCompressedBuffers = hwInfo->featureTable.ftrE2ECompression; return 0; } template <> void HwInfoConfigHw::adjustPlatformForProductFamily(HardwareInfo *hwInfo) { PLATFORM *platform = &hwInfo->platform; platform->eRenderCoreFamily = IGFX_GEN12LP_CORE; platform->eDisplayCoreFamily = IGFX_GEN12LP_CORE; } template class HwInfoConfigHw; #endif } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen8/000077500000000000000000000000001363734646600207765ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/gen8/CMakeLists.txt000066400000000000000000000002021363734646600235300ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_GEN8) add_subdirectories() endif() compute-runtime-20.13.16352/opencl/source/gen8/aub_command_stream_receiver_gen8.cpp000066400000000000000000000015061363734646600301310ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/array_count.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw_bdw_plus.inl" #include "opencl/source/helpers/base_object.h" namespace NEO { typedef BDWFamily Family; static auto gfxCore = IGFX_GEN8_CORE; template <> void populateFactoryTable>() { extern AubCommandStreamReceiverCreateFunc aubCommandStreamReceiverFactory[IGFX_MAX_CORE]; UNRECOVERABLE_IF(!isInRange(gfxCore, aubCommandStreamReceiverFactory)); aubCommandStreamReceiverFactory[gfxCore] = AUBCommandStreamReceiverHw::create; } template class AUBCommandStreamReceiverHw; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen8/aub_mapper.h000066400000000000000000000013251363734646600232630ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/memory_constants.h" #include "opencl/source/gen_common/aub_mapper_base.h" #include "engine_node.h" namespace NEO { struct BDWFamily; template <> struct AUBFamilyMapper { enum { device = AubMemDump::DeviceValues::Bdw }; using AubTraits = AubMemDump::Traits; static const AubMemDump::LrcaHelper *const csTraits[aub_stream::NUM_ENGINES]; static const MMIOList globalMMIO; static const MMIOList *perEngineMMIO[aub_stream::NUM_ENGINES]; typedef AubMemDump::AubDump AUB; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen8/aub_mem_dump_gen8.cpp000066400000000000000000000053251363734646600250620ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "opencl/source/aub_mem_dump/aub_alloc_dump.inl" #include "opencl/source/aub_mem_dump/aub_mem_dump.inl" #include "aub_mapper.h" namespace AubMemDump { enum { device = DeviceValues::Bdw }; // Instantiate these common template implementations. template struct AubDump>; template struct AubDump>; template struct AubPageTableHelper32>; template struct AubPageTableHelper64>; } // namespace AubMemDump namespace NEO { using Family = BDWFamily; static const AubMemDump::LrcaHelperRcs rcs(0x002000); static const AubMemDump::LrcaHelperBcs bcs(0x022000); static const AubMemDump::LrcaHelperVcs vcs(0x012000); static const AubMemDump::LrcaHelperVecs vecs(0x01a000); const AubMemDump::LrcaHelper *const AUBFamilyMapper::csTraits[aub_stream::NUM_ENGINES] = { &rcs, &bcs, &vcs, &vecs}; const MMIOList AUBFamilyMapper::globalMMIO; static const MMIOList mmioListRCS = { MMIOPair(0x000020d8, 0x00020000), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x229c), 0xffff8280), }; static const MMIOList mmioListBCS = { MMIOPair(AubMemDump::computeRegisterOffset(bcs.mmioBase, 0x229c), 0xffff8280), }; static const MMIOList mmioListVCS = { MMIOPair(AubMemDump::computeRegisterOffset(vcs.mmioBase, 0x229c), 0xffff8280), }; static const MMIOList mmioListVECS = { MMIOPair(AubMemDump::computeRegisterOffset(vecs.mmioBase, 0x229c), 0xffff8280), }; const MMIOList *AUBFamilyMapper::perEngineMMIO[aub_stream::NUM_ENGINES] = { &mmioListRCS, &mmioListBCS, &mmioListVCS, &mmioListVECS}; } // namespace NEO namespace AubAllocDump { using namespace NEO; template SurfaceInfo *getDumpSurfaceInfo(GraphicsAllocation &gfxAllocation, DumpFormat dumpFormat); template uint32_t getImageSurfaceTypeFromGmmResourceType(GMM_RESOURCE_TYPE gmmResourceType); template void dumpBufferInBinFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpImageInBmpFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpBufferInTreFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpImageInTreFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpAllocation(DumpFormat dumpFormat, GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); } // namespace AubAllocDump compute-runtime-20.13.16352/opencl/source/gen8/buffer_gen8.cpp000066400000000000000000000005301363734646600236720ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen8/hw_cmds.h" #include "opencl/source/mem_obj/buffer_bdw_plus.inl" namespace NEO { typedef BDWFamily Family; static auto gfxCore = IGFX_GEN8_CORE; #include "opencl/source/mem_obj/buffer_factory_init.inl" } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen8/command_queue_gen8.cpp000066400000000000000000000013461363734646600252510ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/command_queue_hw_bdw_plus.inl" #include "opencl/source/command_queue/enqueue_resource_barrier.h" #include "enqueue_init_dispatch_globals.h" namespace NEO { typedef BDWFamily Family; static auto gfxCore = IGFX_GEN8_CORE; template class CommandQueueHw; template <> void populateFactoryTable>() { extern CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE]; commandQueueFactory[gfxCore] = CommandQueueHw::create; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen8/command_stream_receiver_simulated_common_hw_gen8.cpp000066400000000000000000000004721363734646600334200ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_stream/command_stream_receiver_simulated_common_hw_bdw_plus.inl" namespace NEO { typedef BDWFamily Family; template class CommandStreamReceiverSimulatedCommonHw; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen8/device_enqueue.h000066400000000000000000000031301363734646600241320ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "../execution_model/device_enqueue.h" #define WA_LRI_COMMANDS_EXIST_GEN8 1 #define WA_PROFILING_PREEMPTION 1 #define WA_SCHEDULER_PREEMPTION 1 #define WA_KERNEL_PREEMPTION 1 #if defined WA_LRI_COMMANDS_EXIST_GEN8 #define WA_LRI_COMMANDS_EXIST 1 #endif #define OCLRT_GPGPU_WALKER_CMD_DEVICE_CMD_G8 (15 * sizeof(uint)) #define OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G8 (6 * sizeof(uint)) #define OCLRT_LOAD_REGISTER_IMM_CMD_G8 (3 * sizeof(uint)) #define OCLRT_GPGPU_WALKER_CMD_DEVICE_CMD_G8_DWORD_OFFSET (15) #define OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G8_DWORD_OFFSET (6) #ifdef WA_LRI_COMMANDS_EXIST_GEN8 #define SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE_GEN8PLUS (OCLRT_SIZEOF_MEDIA_STATE_FLUSH + OCLRT_SIZEOF_MI_ATOMIC_CMD + OCLRT_SIZEOF_MEDIA_INTERFACE_DESCRIPTOR_LOAD_DEVICE_CMD + OCLRT_LOAD_REGISTER_IMM_CMD_G8 + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G8 + OCLRT_GPGPU_WALKER_CMD_DEVICE_CMD_G8 + OCLRT_SIZEOF_MEDIA_STATE_FLUSH + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G8 + OCLRT_LOAD_REGISTER_IMM_CMD_G8 + CS_PREFETCH_SIZE) #else #define SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE_GEN8PLUS (OCLRT_SIZEOF_MEDIA_STATE_FLUSH + OCLRT_SIZEOF_MI_ATOMIC_CMD + OCLRT_SIZEOF_MEDIA_VFE_STATE_CMD + OCLRT_SIZEOF_MEDIA_INTERFACE_DESCRIPTOR_LOAD_DEVICE_CMD + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G8 + OCLRT_GPGPU_WALKER_CMD_DEVICE_CMD_G8 + OCLRT_SIZEOF_MEDIA_STATE_FLUSH + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G8 + CS_PREFETCH_SIZE) #endif //WA_LRI_COMMANDS_EXIST_GEN8 #define SECOND_LEVEL_BUFFER_NUMBER_OF_ENQUEUES_GEN8PLUS (128) compute-runtime-20.13.16352/opencl/source/gen8/device_queue_gen8.cpp000066400000000000000000000050421363734646600250670ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen8/hw_cmds.h" #include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/source/device_queue/device_queue_hw_bdw_plus.inl" namespace NEO { typedef BDWFamily Family; static auto gfxCore = IGFX_GEN8_CORE; template <> void populateFactoryTable>() { extern DeviceQueueCreateFunc deviceQueueFactory[IGFX_MAX_CORE]; deviceQueueFactory[gfxCore] = DeviceQueueHw::create; } template <> size_t DeviceQueueHw::getWaCommandsSize() { return sizeof(Family::MI_ATOMIC) + sizeof(Family::MI_LOAD_REGISTER_IMM) + sizeof(Family::MI_LOAD_REGISTER_IMM); } template <> void DeviceQueueHw::addArbCheckCmdWa() {} template <> void DeviceQueueHw::addMiAtomicCmdWa(uint64_t atomicOpPlaceholder) { auto miAtomic = slbCS.getSpaceForCmd(); *miAtomic = Family::cmdInitAtomic; miAtomic->setAtomicOpcode(Family::MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_8B_INCREMENT); miAtomic->setReturnDataControl(0x1); miAtomic->setCsStall(0x1); miAtomic->setDataSize(Family::MI_ATOMIC::DATA_SIZE::DATA_SIZE_QWORD); miAtomic->setMemoryAddress(static_cast(atomicOpPlaceholder & 0x0000FFFFFFFFULL)); miAtomic->setMemoryAddressHigh(static_cast((atomicOpPlaceholder >> 32) & 0x0000FFFFFFFFULL)); } template <> void DeviceQueueHw::addLriCmdWa(bool setArbCheck) { auto lri = slbCS.getSpaceForCmd(); *lri = Family::cmdInitLoadRegisterImm; lri->setRegisterOffset(0x2248); // CTXT_PREMP_DBG offset if (setArbCheck) lri->setDataDword(0x00000100); // set only bit 8 (Preempt On MI_ARB_CHK Only) else lri->setDataDword(0x0); } template <> void DeviceQueueHw::addPipeControlCmdWa(bool isNoopCmd) {} template <> void DeviceQueueHw::addProfilingEndCmds(uint64_t timestampAddress) { auto pPipeControlCmd = (PIPE_CONTROL *)slbCS.getSpace(sizeof(PIPE_CONTROL)); *pPipeControlCmd = Family::cmdInitPipeControl; pPipeControlCmd->setCommandStreamerStallEnable(true); pPipeControlCmd->setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP); pPipeControlCmd->setAddressHigh(timestampAddress >> 32); pPipeControlCmd->setAddress(timestampAddress & (0xffffffff)); } template <> void DeviceQueueHw::addDcFlushToPipeControlWa(PIPE_CONTROL *pc) { pc->setDcFlushEnable(true); } template class DeviceQueueHw; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen8/enable_family_full_ocl_gen8.cpp000066400000000000000000000023131363734646600270700ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" #include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h" #include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/source/event/perf_counter.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/sampler/sampler.h" #include namespace NEO { typedef BDWFamily Family; struct EnableOCLGen8 { EnableOCLGen8() { populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); } }; static EnableOCLGen8 enable; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen8/experimental_command_buffer_gen8.cpp000066400000000000000000000021001363734646600301400ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/experimental_command_buffer.h" #include "shared/source/command_stream/experimental_command_buffer.inl" #include "shared/source/helpers/hw_helper.h" namespace NEO { typedef BDWFamily GfxFamily; template void ExperimentalCommandBuffer::injectBufferStart(LinearStream &parentStream, size_t cmdBufferOffset); template size_t ExperimentalCommandBuffer::getRequiredInjectionSize() noexcept; template size_t ExperimentalCommandBuffer::programExperimentalCommandBuffer(); template size_t ExperimentalCommandBuffer::getTotalExperimentalSize() noexcept; template void ExperimentalCommandBuffer::addTimeStampPipeControl(); template size_t ExperimentalCommandBuffer::getTimeStampPipeControlSize() noexcept; template void ExperimentalCommandBuffer::addExperimentalCommands(); template size_t ExperimentalCommandBuffer::getExperimentalCommandsSize() noexcept; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen8/gpgpu_walker_gen8.cpp000066400000000000000000000050501363734646600251120ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen8/hw_info.h" #include "opencl/source/command_queue/gpgpu_walker_bdw_plus.inl" #include "opencl/source/command_queue/hardware_interface_bdw_plus.inl" namespace NEO { template <> void GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(NEO::LinearStream *pCommandStream, const Kernel &kernel, bool disablePerfMode) { if (disablePerfMode) { if (kernel.getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) { // Set bit L3SQC_BIT_LQSC_RO_PERF_DIS in L3SQC_REG4 GpgpuWalkerHelper::addAluReadModifyWriteRegister(pCommandStream, L3SQC_REG4, AluRegisters::OPCODE_OR, L3SQC_BIT_LQSC_RO_PERF_DIS); } } else { if (kernel.getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) { // Add PIPE_CONTROL with CS_Stall to wait till GPU finishes its work typedef typename BDWFamily::PIPE_CONTROL PIPE_CONTROL; auto pCmd = reinterpret_cast(pCommandStream->getSpace(sizeof(PIPE_CONTROL))); *pCmd = BDWFamily::cmdInitPipeControl; pCmd->setCommandStreamerStallEnable(true); // Clear bit L3SQC_BIT_LQSC_RO_PERF_DIS in L3SQC_REG4 GpgpuWalkerHelper::addAluReadModifyWriteRegister(pCommandStream, L3SQC_REG4, AluRegisters::OPCODE_AND, ~L3SQC_BIT_LQSC_RO_PERF_DIS); } } } template <> size_t GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(const Kernel *pKernel) { typedef typename BDWFamily::MI_LOAD_REGISTER_REG MI_LOAD_REGISTER_REG; typedef typename BDWFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; typedef typename BDWFamily::PIPE_CONTROL PIPE_CONTROL; typedef typename BDWFamily::MI_MATH MI_MATH; typedef typename BDWFamily::MI_MATH_ALU_INST_INLINE MI_MATH_ALU_INST_INLINE; size_t n = 0; if (pKernel->getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) { n += sizeof(PIPE_CONTROL) + (2 * sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_LOAD_REGISTER_IMM) + sizeof(PIPE_CONTROL) + sizeof(MI_MATH) + NUM_ALU_INST_FOR_READ_MODIFY_WRITE * sizeof(MI_MATH_ALU_INST_INLINE)) * 2; // For 2 WADisableLSQCROPERFforOCL WAs } return n; } template class HardwareInterface; template class GpgpuWalkerHelper; template struct EnqueueOperation; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen8/gtpin_setup_gen8.cpp000066400000000000000000000013261363734646600247660ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/gtpin/gtpin_hw_helper.h" #include "opencl/source/gtpin/gtpin_hw_helper.inl" #include "ocl_igc_shared/gtpin/gtpin_ocl_interface.h" namespace NEO { extern GTPinHwHelper *gtpinHwHelperFactory[IGFX_MAX_CORE]; typedef BDWFamily Family; static const auto gfxFamily = IGFX_GEN8_CORE; template <> uint32_t GTPinHwHelperHw::getGenVersion() { return gtpin::GTPIN_GEN_8; } template class GTPinHwHelperHw; struct GTPinEnableGen8 { GTPinEnableGen8() { gtpinHwHelperFactory[gfxFamily] = >PinHwHelperHw::get(); } }; static GTPinEnableGen8 gtpinEnable; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen8/hardware_commands_helper_gen8.cpp000066400000000000000000000020711363734646600274400ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen8/hw_cmds.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/hardware_commands_helper.inl" #include "opencl/source/helpers/hardware_commands_helper_base.inl" #include namespace NEO { static uint32_t slmSizeId[] = {0, 1, 2, 4, 4, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16}; template <> uint32_t HardwareCommandsHelper::alignSlmSize(uint32_t slmSize) { if (slmSize == 0u) { return 0u; } slmSize = std::max(slmSize, 4096u); slmSize = Math::nextPowerOfTwo(slmSize); return slmSize; } template <> uint32_t HardwareCommandsHelper::computeSlmValues(uint32_t slmSize) { slmSize += (4 * KB - 1); slmSize = slmSize >> 12; slmSize = std::min(slmSize, 15u); slmSize = slmSizeId[slmSize]; return slmSize; } // Explicitly instantiate HardwareCommandsHelper for BDW device family template struct HardwareCommandsHelper; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen8/hw_helper_gen8.cpp000066400000000000000000000023261363734646600244030ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/flat_batch_buffer_helper_hw.inl" #include "shared/source/helpers/hw_helper_bdw_plus.inl" #include "shared/source/memory_manager/memory_constants.h" #include "opencl/source/aub/aub_helper_bdw_plus.inl" namespace NEO { typedef BDWFamily Family; template <> size_t HwHelperHw::getMaxBarrierRegisterPerSlice() const { return 16; } template <> void HwHelperHw::setupHardwareCapabilities(HardwareCapabilities *caps, const HardwareInfo &hwInfo) { caps->image3DMaxHeight = 2048; caps->image3DMaxWidth = 2048; caps->maxMemAllocSize = 2 * MemoryConstants::gigaByte - 8 * MemoryConstants::megaByte; caps->isStatelesToStatefullWithOffsetSupported = false; } template <> typename Family::PIPE_CONTROL *MemorySynchronizationCommands::addPipeControl(LinearStream &commandStream, bool dcFlush) { return MemorySynchronizationCommands::obtainPipeControl(commandStream, true); } template class AubHelperHw; template class HwHelperHw; template class FlatBatchBufferHelperHw; template struct MemorySynchronizationCommands; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen8/hw_info_bdw.inl000066400000000000000000000242101363734646600237660ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen8/hw_cmds.h" #include "shared/source/memory_manager/memory_constants.h" #include "opencl/source/aub_mem_dump/aub_services.h" #include "engine_node.h" namespace NEO { const char *HwMapper::abbreviation = "bdw"; bool isSimulationBDW(unsigned short deviceId) { switch (deviceId) { case IBDW_GT0_DESK_DEVICE_F0_ID: case IBDW_GT1_DESK_DEVICE_F0_ID: case IBDW_GT2_DESK_DEVICE_F0_ID: case IBDW_GT3_DESK_DEVICE_F0_ID: case IBDW_GT4_DESK_DEVICE_F0_ID: return true; } return false; }; const PLATFORM BDW::platform = { IGFX_BROADWELL, PCH_UNKNOWN, IGFX_GEN8_CORE, IGFX_GEN8_CORE, PLATFORM_NONE, // default init 0, // usDeviceID 0, // usRevId. 0 sets the stepping to A0 0, // usDeviceID_PCH 0, // usRevId_PCH GTTYPE_UNDEFINED}; const RuntimeCapabilityTable BDW::capabilityTable{ EngineDirectSubmissionInitVec{ {aub_stream::ENGINE_RCS, {true, true}}}, // directSubmissionEngines {50000, 5000, 200000, true, true, true}, // kmdNotifyProperties MemoryConstants::max48BitAddress, // gpuAddressSpace 80, // defaultProfilingTimerResolution MemoryConstants::pageSize, // requiredPreemptionSurfaceSize &isSimulationBDW, // isSimulation PreemptionMode::Disabled, // defaultPreemptionMode aub_stream::ENGINE_RCS, // defaultEngineType 0, // maxRenderFrequency 21, // clVersionSupport CmdServicesMemTraceVersion::DeviceValues::Bdw, // aubDeviceId 0, // extraQuantityThreadsPerEU 64, // slmSize sizeof(BDW::GRF), // grfSize false, // blitterOperationsSupported true, // ftrSupportsInteger64BitAtomics true, // ftrSupportsFP64 true, // ftrSupports64BitMath true, // ftrSvm true, // ftrSupportsCoherency false, // ftrSupportsVmeAvcTextureSampler false, // ftrSupportsVmeAvcPreemption false, // ftrRenderCompressedBuffers false, // ftrRenderCompressedImages false, // ftr64KBpages true, // instrumentationEnabled true, // forceStatelessCompilationFor32Bit "core", // platformType false, // sourceLevelDebuggerSupported false, // supportsVme false, // supportCacheFlushAfterWalker true, // supportsImages true, // supportsDeviceEnqueue true // hostPtrTrackingEnabled }; WorkaroundTable BDW::workaroundTable = {}; FeatureTable BDW::featureTable = {}; void BDW::setupFeatureAndWorkaroundTable(HardwareInfo *hwInfo) { FeatureTable *featureTable = &hwInfo->featureTable; WorkaroundTable *workaroundTable = &hwInfo->workaroundTable; featureTable->ftrL3IACoherency = true; featureTable->ftrPPGTT = true; featureTable->ftrSVM = true; featureTable->ftrIA32eGfxPTEs = true; featureTable->ftrFbc = true; featureTable->ftrFbc2AddressTranslation = true; featureTable->ftrFbcBlitterTracking = true; featureTable->ftrFbcCpuTracking = true; featureTable->ftrTileY = true; workaroundTable->waDisableLSQCROPERFforOCL = true; workaroundTable->waReportPerfCountUseGlobalContextID = true; workaroundTable->waUseVAlign16OnTileXYBpp816 = true; workaroundTable->waModifyVFEStateAfterGPGPUPreemption = true; workaroundTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = true; } const HardwareInfo BDW_1x2x6::hwInfo = { &BDW::platform, &BDW::featureTable, &BDW::workaroundTable, &BDW_1x2x6::gtSystemInfo, BDW::capabilityTable, }; GT_SYSTEM_INFO BDW_1x2x6::gtSystemInfo = {0}; void BDW_1x2x6::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * BDW::threadsPerEu; gtSysInfo->SliceCount = 1; gtSysInfo->L3CacheSizeInKb = 384; gtSysInfo->L3BankCount = 2; gtSysInfo->MaxFillRate = 8; gtSysInfo->TotalVsThreads = 336; gtSysInfo->TotalHsThreads = 336; gtSysInfo->TotalDsThreads = 336; gtSysInfo->TotalGsThreads = 336; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = BDW::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = BDW::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = BDW::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo BDW_1x3x6::hwInfo = { &BDW::platform, &BDW::featureTable, &BDW::workaroundTable, &BDW_1x3x6::gtSystemInfo, BDW::capabilityTable, }; GT_SYSTEM_INFO BDW_1x3x6::gtSystemInfo = {0}; void BDW_1x3x6::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * BDW::threadsPerEu; gtSysInfo->SliceCount = 1; gtSysInfo->L3CacheSizeInKb = 768; gtSysInfo->L3BankCount = 4; gtSysInfo->MaxFillRate = 8; gtSysInfo->TotalVsThreads = 336; gtSysInfo->TotalHsThreads = 336; gtSysInfo->TotalDsThreads = 336; gtSysInfo->TotalGsThreads = 336; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = BDW::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = BDW::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = BDW::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo BDW_1x3x8::hwInfo = { &BDW::platform, &BDW::featureTable, &BDW::workaroundTable, &BDW_1x3x8::gtSystemInfo, BDW::capabilityTable, }; GT_SYSTEM_INFO BDW_1x3x8::gtSystemInfo = {0}; void BDW_1x3x8::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * BDW::threadsPerEu; gtSysInfo->SliceCount = 1; gtSysInfo->L3CacheSizeInKb = 384; gtSysInfo->L3BankCount = 2; gtSysInfo->MaxFillRate = 8; gtSysInfo->TotalVsThreads = 336; gtSysInfo->TotalHsThreads = 336; gtSysInfo->TotalDsThreads = 336; gtSysInfo->TotalGsThreads = 336; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = BDW::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = BDW::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = BDW::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo BDW_2x3x8::hwInfo = { &BDW::platform, &BDW::featureTable, &BDW::workaroundTable, &BDW_2x3x8::gtSystemInfo, BDW::capabilityTable, }; GT_SYSTEM_INFO BDW_2x3x8::gtSystemInfo = {0}; void BDW_2x3x8::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * BDW::threadsPerEu; gtSysInfo->SliceCount = 2; gtSysInfo->L3CacheSizeInKb = 1536; gtSysInfo->L3BankCount = 8; gtSysInfo->MaxFillRate = 16; gtSysInfo->TotalVsThreads = 336; gtSysInfo->TotalHsThreads = 336; gtSysInfo->TotalDsThreads = 336; gtSysInfo->TotalGsThreads = 336; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = BDW::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = BDW::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = BDW::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo BDW::hwInfo = BDW_1x3x8::hwInfo; const uint64_t BDW::defaultHardwareInfoConfig = 0x100030008; void setupBDWHardwareInfoImpl(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable, uint64_t hwInfoConfig) { if (hwInfoConfig == 0x200030008) { BDW_2x3x8::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x100030008) { BDW_1x3x8::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x100030006) { BDW_1x3x6::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x100020006) { BDW_1x2x6::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x0) { // Default config BDW_1x3x8::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else { UNRECOVERABLE_IF(true); } } void (*BDW::setupHardwareInfo)(HardwareInfo *, bool, uint64_t) = setupBDWHardwareInfoImpl; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen8/hw_info_gen8.cpp000066400000000000000000000003601363734646600240530ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #ifdef SUPPORT_BDW #include "hw_info_bdw.inl" #endif namespace NEO { const char *GfxFamilyMapper::name = "Gen8"; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen8/image_gen8.cpp000066400000000000000000000010671363734646600235110ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen8/hw_cmds.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/mem_obj/image.inl" #include namespace NEO { typedef BDWFamily Family; static auto gfxCore = IGFX_GEN8_CORE; template <> void ImageHw::setMediaSurfaceRotation(void *) {} template <> void ImageHw::setSurfaceMemoryObjectControlStateIndexToMocsTable(void *, uint32_t) {} #include "opencl/source/mem_obj/image_factory_init.inl" } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen8/linux/000077500000000000000000000000001363734646600221355ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/gen8/linux/command_stream_receiver_gen8.cpp000066400000000000000000000011501363734646600304340ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.inl" #include "opencl/source/os_interface/linux/device_command_stream.inl" #include "opencl/source/os_interface/linux/drm_command_stream.inl" #include "opencl/source/os_interface/linux/drm_command_stream_bdw_plus.inl" namespace NEO { template class DeviceCommandStreamReceiver; template class DrmCommandStreamReceiver; template class CommandStreamReceiverWithAUBDump>; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen8/linux/hw_info_config_bdw.inl000066400000000000000000000020061363734646600264510ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/hw_info_config.h" namespace NEO { template <> int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) { if (nullptr == osIface) { return 0; } GT_SYSTEM_INFO *gtSystemInfo = &hwInfo->gtSystemInfo; // There is no interface to read total slice count from drm/i915, so we // derive this from the number of EUs and subslices. // otherwise there is one slice. if (gtSystemInfo->SubSliceCount > 3) { gtSystemInfo->SliceCount = 2; } else { gtSystemInfo->SliceCount = 1; } if (hwInfo->platform.usDeviceID == IBDW_GT3_HALO_MOBL_DEVICE_F0_ID || hwInfo->platform.usDeviceID == IBDW_GT3_SERV_DEVICE_F0_ID) { gtSystemInfo->EdramSizeInKb = 128 * 1024; } return 0; } template class HwInfoConfigHw; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen8/linux/hw_info_config_gen8.cpp000066400000000000000000000004231363734646600265370ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/hw_info_config.inl" #include "shared/source/os_interface/hw_info_config_bdw_plus.inl" #ifdef SUPPORT_BDW #include "hw_info_config_bdw.inl" #endif compute-runtime-20.13.16352/opencl/source/gen8/sampler_gen8.cpp000066400000000000000000000005741363734646600240740ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen8/hw_cmds.h" #include "opencl/source/sampler/sampler.h" #include "opencl/source/sampler/sampler.inl" namespace NEO { typedef BDWFamily Family; static auto gfxCore = IGFX_GEN8_CORE; #include "opencl/source/sampler/sampler_factory_init.inl" } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen8/scheduler_builtin_kernel.inl000066400000000000000000000030441363734646600265470ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "scheduler_definitions.h" uint GetPatchValueForSLMSize(uint slMsize) { uint PatchValue; if (slMsize == 0) { PatchValue = 0; } else { uint count4KB = slMsize / 4096; if (slMsize % 4096 != 0) { count4KB++; } PatchValue = GetNextPowerof2(count4KB); } return PatchValue; } //on BDW we have only 1 pipe control void NOOPCSStallPipeControl(__global uint *secondaryBatchBuffer, uint dwordOffset, uint pipeControlOffset) { dwordOffset += pipeControlOffset; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; } void PutCSStallPipeControl(__global uint *secondaryBatchBuffer, uint dwordOffset, uint pipeControlOffset) { dwordOffset += pipeControlOffset; secondaryBatchBuffer[dwordOffset] = PIPE_CONTROL_CSTALL_DWORD0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = PIPE_CONTROL_CSTALL_DWORD1; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; }compute-runtime-20.13.16352/opencl/source/gen8/scheduler_definitions.h000066400000000000000000000156611363734646600255310ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #define SIZEOF_INTERFACE_DESCRIPTOR_DATA_G8 32 // Generation dependent number // Number of Interface Descriptors is 64 for BDW #define NUMBER_OF_INERFACE_DESCRIPTORS 64 #define IDT_BREAKDOWN (NUMBER_OF_INERFACE_DESCRIPTORS - 2) #define INTERFACE_DESCRIPTOR_TABLE_SIZE_G8 (NUMBER_OF_INERFACE_DESCRIPTORS * SIZEOF_INTERFACE_DESCRIPTOR_DATA_G8) // Based on the alignment ( 64 vs 32 ) requirements this may be Gen dependent #define SIZEOF_COLOR_CALCULATOR_STATE_G8 0xC0 #define OCLRT_SIZEOF_SAMPLER_STATE_G8 (16) #define SIZEOF_COLOR_CALCULATOR_STATE SIZEOF_COLOR_CALCULATOR_STATE_G8 #define SIZEOF_INTERFACE_DESCRIPTOR_DATA SIZEOF_INTERFACE_DESCRIPTOR_DATA_G8 #define INTERFACE_DESCRIPTOR_TABLE_SIZE INTERFACE_DESCRIPTOR_TABLE_SIZE_G8 #define OCLRT_SIZEOF_SAMPLER_STATE OCLRT_SIZEOF_SAMPLER_STATE_G8 #define SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE (SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE_GEN8PLUS) #define SECOND_LEVEL_BUFFER_NUMBER_OF_ENQUEUES (SECOND_LEVEL_BUFFER_NUMBER_OF_ENQUEUES_GEN8PLUS) // DWORD OFFSET #define MEDIA_STATE_FLUSH_INITIAL_OFFSET 0 //bits 0-5 of 1st DWORD #define MEDIA_STATE_FLUSH_INITIAL_INTERFACE_DESCRIPTOR_OFFSET (MEDIA_STATE_FLUSH_INITIAL_OFFSET + 1) #define MI_ATOMIC_CMD_OFFSET (MEDIA_STATE_FLUSH_INITIAL_OFFSET + OCLRT_SIZEOF_MSFLUSH_DWORD) //#define OCLRT_MEDIA_VFE_STATE_OFFSET (MI_ATOMIC_CMD_OFFSET + OCLRT_SIZEOF_MI_ATOMIC_CMD_DWORD_OFFSET) //address is QWORD in size and starts on DWORD 1 //#define MEDIA_VFE_STATE_ADDRESS_OFFSET (OCLRT_MEDIA_VFE_STATE_OFFSET + 1) #define MEDIA_INTERFACE_DESCRIPTOR_LOAD_OFFSET (MI_ATOMIC_CMD_OFFSET + OCLRT_SIZEOF_MI_ATOMIC_CMD_DWORD_OFFSET) //(OCLRT_MEDIA_VFE_STATE_OFFSET + OCLRT_SIZEOF_MEDIA_VFE_STATE_DWORD) // DWORD OFFSET of Interface Descriptor Start Address #define MEDIA_INTERFACE_DESCRIPTOR_LOAD_INTERFACEDESCRIPTORSTARTADDRESS_OFFSET (MEDIA_INTERFACE_DESCRIPTOR_LOAD_OFFSET + 3) #define INTERFACE_DESCRIPTOR_SAMPLER_STATE_TABLE_DWORD 3 #define INTERFACE_DESCRIPTOR_BINDING_TABLE_POINTER_DWORD 4 #define INTERFACE_DESCRIPTOR_CONSTANT_URB_ENTRY_READ_OFFSET 5 #define INTERFACE_DESCRIPTOR_HWTHREADS_NUMBER_DWORD 6 #define INTERFACE_DESCRIPTOR_SLMSIZE_DWORD 6 #define INTERFACE_DESCRIPTOR_HWTHREADS_UPPER_BIT 9 #define SAMPLER_STATE_INDIRECT_STATE_MASK (0x7FFFFC0) #define SAMPLER_STATE_BORDER_COLOR_MASK (0xFFFFFFE0) #define SAMPLER_STATE_DESCRIPTOR_BORDER_COLOR_POINTER_DWORD 2 //disable preemption is for Gen8 #if defined WA_LRI_COMMANDS_EXIST #define IMM_LOAD_REGISTER_FOR_DISABLE_PREEMPTION_OFFSET (MEDIA_INTERFACE_DESCRIPTOR_LOAD_OFFSET + OCLRT_SIZEOF_MEDIA_INTERFACE_DESCRIPTOR_LOAD_DEVICE_CMD_DWORD_OFFSET) #define PIPE_CONTROL_FOR_TIMESTAMP_START_OFFSET (IMM_LOAD_REGISTER_FOR_DISABLE_PREEMPTION_OFFSET + OCLRT_IMM_LOAD_REGISTER_CMD_DEVICE_CMD_DWORD_OFFSET) #else #define PIPE_CONTROL_FOR_TIMESTAMP_START_OFFSET (MEDIA_INTERFACE_DESCRIPTOR_LOAD_OFFSET + OCLRT_SIZEOF_MEDIA_INTERFACE_DESCRIPTOR_LOAD_DEVICE_CMD_DWORD_OFFSET) #endif // WA_LRI_COMMANDS_EXIST #define GPGPU_WALKER_OFFSET (PIPE_CONTROL_FOR_TIMESTAMP_START_OFFSET + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G8_DWORD_OFFSET) // DWORD OFFSET of the Interface Descriptor Offset for GPGPU_WALKER // bits 0 - 5 #define GPGPU_WALKER_INTERFACE_DESCRIPTOR_ID_OFFSET (GPGPU_WALKER_OFFSET + 1) // DWORD OFFSET of the Indirect data length Offset for GPGPU_WALKER // bits 0 - 16 #define GPGPU_WALKER_INDIRECT_DATA_LENGTH_OFFSET (GPGPU_WALKER_OFFSET + 2) // DWORD OFFSET of the Indirect Start Address for GPGPU_WALKER #define GPGPU_WALKER_INDIRECT_START_ADDRESS_OFFSET (GPGPU_WALKER_OFFSET + 3) // DWORD OFFSET of the Thread Width Counter Maximum for GPGPU_WALKER // bits 0 - 5 #define GPGPU_WALKER_THREAD_WIDTH_DWORD (GPGPU_WALKER_OFFSET + 4) // DWORD OFFSET of the Thread Height Counter Maximum for GPGPU_WALKER // bits 8 - 13 #define GPGPU_WALKER_THREAD_HEIGHT_DWORD (GPGPU_WALKER_OFFSET + 4) // DWORD OFFSET of the Thread Depth Counter Maximum for GPGPU_WALKER // bits 16 - 21 #define GPGPU_WALKER_THREAD_DEPTH_DWORD (GPGPU_WALKER_OFFSET + 4) // DWORD OFFSET of the SIMD Size for GPGPU_WALKER // bits 30 - 31 #define GPGPU_WALKER_SIMDSIZE_DWORD (GPGPU_WALKER_OFFSET + 4) // DWORD OFFSET of the Starting in X pos for GPGPU_WALKER //bits 0 - 31 #define GPGPU_WALKER_GROUP_ID_START_X (GPGPU_WALKER_OFFSET + 5) // DWORD OFFSET of the X Dimension for GPGPU_WALKER #define GPGPU_WALKER_XDIM_DWORD (GPGPU_WALKER_OFFSET + 7) // DWORD OFFSET of the Starting in Y pos for GPGPU_WALKER //bits 0 - 31 #define GPGPU_WALKER_GROUP_ID_START_Y (GPGPU_WALKER_OFFSET + 8) // DWORD OFFSET of the Y Dimension for GPGPU_WALKER #define GPGPU_WALKER_YDIM_DWORD (GPGPU_WALKER_OFFSET + 10) // DWORD OFFSET of the Starting in Z pos for GPGPU_WALKER //bits 0 - 31 #define GPGPU_WALKER_GROUP_ID_START_Z (GPGPU_WALKER_OFFSET + 11) // DWORD OFFSET of the X Dimension for GPGPU_WALKER #define GPGPU_WALKER_ZDIM_DWORD (GPGPU_WALKER_OFFSET + 12) // DWORD OFFSET of the Right or X Mask for GPGPU_WALKER #define GPGPU_WALKER_XMASK_DWORD (GPGPU_WALKER_OFFSET + 13) // DWORD OFFSET of the Bottom or Y Mask for GPGPU_WALKER #define GPGPU_WALKER_YMASK_DWORD (GPGPU_WALKER_OFFSET + 14) #define MEDIA_STATE_FLUSH_OFFSET (GPGPU_WALKER_OFFSET + OCLRT_GPGPU_WALKER_CMD_DEVICE_CMD_G8_DWORD_OFFSET) //bits 0-5 of 1st DWORD of M_S_F command #define MEDIA_STATE_FLUSH_INTERFACE_DESCRIPTOR_OFFSET (MEDIA_STATE_FLUSH_OFFSET + 1) #define PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET (MEDIA_STATE_FLUSH_OFFSET + OCLRT_SIZEOF_MSFLUSH_DWORD) #define PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET_TO_PATCH (PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET) #define PIPE_CONTROL_POST_SYNC_DWORD 1 #define PIPE_CONTROL_POST_SYNC_START_BIT 14 #define PIPE_CONTROL_POST_SYNC_END_BIT 15 #define PIPE_CONTROL_GENERATE_TIME_STAMP 3 #define PIPE_CONTROL_NO_POSTSYNC_OPERATION 0 #define PIPE_CONTROL_ADDRESS_FIELD_DWORD 2 #define PIPE_CONTROL_PROFILING_START_TIMESTAMP_ADDRESS_OFFSET (PIPE_CONTROL_FOR_TIMESTAMP_START_OFFSET + PIPE_CONTROL_ADDRESS_FIELD_DWORD) //DWORD 2 #define PIPE_CONTROL_GRAPHICS_ADDRESS_START_BIT 2 #define PIPE_CONTROL_GRAPHICS_ADDRESS_END_BIT 31 #define PIPE_CONTROL_GRAPHICS_ADDRESS_HIGH_START_BIT 0 #define PIPE_CONTROL_GRAPHICS_ADDRESS_HIGH_END_BIT 15 #define PIPE_CONTROL_TIME_STAMP_DWORD0 0x7A000004 #define PIPE_CONTROL_TIME_STAMP_DWORD1 0x0010C4A4 #define PIPE_CONTROL_CSTALL_DWORD0 0x7A000004 #define PIPE_CONTROL_CSTALL_DWORD1 0x001004A4 #define PIPE_CONTROL_TAG_WRITE_DWORD0 0x7A000004 #define PIPE_CONTROL_TAG_WRITE_DWORD1 0x001044A4 // the value of InitMIBBStartCmd_G8 DWORD0 #define OCLRT_BATCH_BUFFER_BEGIN_CMD_DWORD0 (0x18800101) #if defined WA_LRI_COMMANDS_EXIST #define IMM_LOAD_REGISTER_FOR_ENABLE_PREEMPTION_OFFSET (PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G8_DWORD_OFFSET) #endif #define OCLRT_LOAD_REGISTER_IMM_CMD 0x11000001 #define CTXT_PREMP_DBG_ADDRESS_VALUE 0x2248 #define CTXT_PREMP_ON_MI_ARB_CHECK_ONLY 0x00000100 #define CTXT_PREMP_DEFAULT_VALUE 0x0 #define IMM_LOAD_REGISTER_ADDRESS_DWORD_OFFSET 1 #define IMM_LOAD_REGISTER_VALUE_DWORD_OFFSET 2 compute-runtime-20.13.16352/opencl/source/gen8/state_base_address_gen8.cpp000066400000000000000000000004301363734646600262370ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/state_base_address.h" #include "shared/source/helpers/state_base_address_bdw_plus.inl" namespace NEO { template struct StateBaseAddressHelper; } compute-runtime-20.13.16352/opencl/source/gen8/tbx_command_stream_receiver_gen8.cpp000066400000000000000000000017351363734646600301630ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen8/hw_cmds.h" #include "shared/source/helpers/array_count.h" #include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.inl" #include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h" #include "opencl/source/command_stream/tbx_command_stream_receiver_hw.inl" namespace NEO { typedef BDWFamily Family; static auto gfxCore = IGFX_GEN8_CORE; template <> void populateFactoryTable>() { extern TbxCommandStreamReceiverCreateFunc tbxCommandStreamReceiverFactory[IGFX_MAX_CORE]; UNRECOVERABLE_IF(!isInRange(gfxCore, tbxCommandStreamReceiverFactory)); tbxCommandStreamReceiverFactory[gfxCore] = TbxCommandStreamReceiverHw::create; } template class TbxCommandStreamReceiverHw; template class CommandStreamReceiverWithAUBDump>; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen8/windows/000077500000000000000000000000001363734646600224705ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/gen8/windows/command_stream_receiver_gen8.cpp000066400000000000000000000010521363734646600307700ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.inl" #include "opencl/source/os_interface/windows/device_command_stream.inl" #include "opencl/source/os_interface/windows/wddm_device_command_stream.inl" namespace NEO { template class DeviceCommandStreamReceiver; template class WddmCommandStreamReceiver; template class CommandStreamReceiverWithAUBDump>; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen8/windows/gmm_callbacks_gen8.cpp000066400000000000000000000005421363734646600266750ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen8/hw_cmds.h" #include "shared/source/helpers/windows/gmm_callbacks.h" #include "shared/source/helpers/windows/gmm_callbacks.inl" using namespace NEO; template struct DeviceCallbacks; template struct TTCallbacks; compute-runtime-20.13.16352/opencl/source/gen8/windows/hw_info_config_gen8.cpp000066400000000000000000000010561363734646600270750ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/hw_info_config.inl" #include "shared/source/os_interface/hw_info_config_bdw_plus.inl" namespace NEO { #ifdef SUPPORT_BDW template <> int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) { return 0; } template class HwInfoConfigHw; #endif } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/000077500000000000000000000000001363734646600207775ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/gen9/CMakeLists.txt000066400000000000000000000002021363734646600235310ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(SUPPORT_GEN9) add_subdirectories() endif() compute-runtime-20.13.16352/opencl/source/gen9/aub_command_stream_receiver_gen9.cpp000066400000000000000000000015061363734646600301330ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/array_count.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw_bdw_plus.inl" #include "opencl/source/helpers/base_object.h" namespace NEO { typedef SKLFamily Family; static auto gfxCore = IGFX_GEN9_CORE; template <> void populateFactoryTable>() { extern AubCommandStreamReceiverCreateFunc aubCommandStreamReceiverFactory[IGFX_MAX_CORE]; UNRECOVERABLE_IF(!isInRange(gfxCore, aubCommandStreamReceiverFactory)); aubCommandStreamReceiverFactory[gfxCore] = AUBCommandStreamReceiverHw::create; } template class AUBCommandStreamReceiverHw; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/aub_mapper.h000066400000000000000000000013251363734646600232640ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/memory_constants.h" #include "opencl/source/gen_common/aub_mapper_base.h" #include "engine_node.h" namespace NEO { struct SKLFamily; template <> struct AUBFamilyMapper { enum { device = AubMemDump::DeviceValues::Skl }; using AubTraits = AubMemDump::Traits; static const AubMemDump::LrcaHelper *const csTraits[aub_stream::NUM_ENGINES]; static const MMIOList globalMMIO; static const MMIOList *perEngineMMIO[aub_stream::NUM_ENGINES]; typedef AubMemDump::AubDump AUB; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/aub_mem_dump_gen9.cpp000066400000000000000000000062361363734646600250660ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "opencl/source/aub_mem_dump/aub_alloc_dump.inl" #include "opencl/source/aub_mem_dump/aub_mem_dump.inl" #include "aub_mapper.h" namespace AubMemDump { enum { device = DeviceValues::Skl }; // Instantiate these common template implementations. template struct AubDump>; template struct AubDump>; template struct AubPageTableHelper32>; template struct AubPageTableHelper64>; } // namespace AubMemDump namespace NEO { using Family = SKLFamily; static const AubMemDump::LrcaHelperRcs rcs(0x002000); static const AubMemDump::LrcaHelperBcs bcs(0x022000); static const AubMemDump::LrcaHelperVcs vcs(0x012000); static const AubMemDump::LrcaHelperVecs vecs(0x01a000); const AubMemDump::LrcaHelper *const AUBFamilyMapper::csTraits[aub_stream::NUM_ENGINES] = { &rcs, &bcs, &vcs, &vecs}; const MMIOList AUBFamilyMapper::globalMMIO; static const MMIOList mmioListRCS = { MMIOPair(0x000020d8, 0x00020000), MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x229c), 0xffff8280), MMIOPair(0x0000C800, 0x00000009), MMIOPair(0x0000C804, 0x00000038), MMIOPair(0x0000C808, 0x0000003B), MMIOPair(0x0000C80C, 0x00000039), MMIOPair(0x0000C810, 0x00000037), MMIOPair(0x0000C814, 0x00000039), MMIOPair(0x0000C818, 0x00000037), MMIOPair(0x0000C81C, 0x0000001B), MMIOPair(0x0000C820, 0x00060037), MMIOPair(0x0000C824, 0x00000032), MMIOPair(0x0000C828, 0x00000033), MMIOPair(0x0000C82C, 0x0000003B), }; static const MMIOList mmioListBCS = { MMIOPair(AubMemDump::computeRegisterOffset(bcs.mmioBase, 0x229c), 0xffff8280), }; static const MMIOList mmioListVCS = { MMIOPair(AubMemDump::computeRegisterOffset(vcs.mmioBase, 0x229c), 0xffff8280), }; static const MMIOList mmioListVECS = { MMIOPair(AubMemDump::computeRegisterOffset(vecs.mmioBase, 0x229c), 0xffff8280), }; const MMIOList *AUBFamilyMapper::perEngineMMIO[aub_stream::NUM_ENGINES] = { &mmioListRCS, &mmioListBCS, &mmioListVCS, &mmioListVECS}; } // namespace NEO namespace AubAllocDump { using namespace NEO; template SurfaceInfo *getDumpSurfaceInfo(GraphicsAllocation &gfxAllocation, DumpFormat dumpFormat); template uint32_t getImageSurfaceTypeFromGmmResourceType(GMM_RESOURCE_TYPE gmmResourceType); template void dumpBufferInBinFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpImageInBmpFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpBufferInTreFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpImageInTreFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); template void dumpAllocation(DumpFormat dumpFormat, GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); } // namespace AubAllocDump compute-runtime-20.13.16352/opencl/source/gen9/buffer_gen9.cpp000066400000000000000000000005301363734646600236740ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "opencl/source/mem_obj/buffer_bdw_plus.inl" namespace NEO { typedef SKLFamily Family; static auto gfxCore = IGFX_GEN9_CORE; #include "opencl/source/mem_obj/buffer_factory_init.inl" } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/command_queue_gen9.cpp000066400000000000000000000013461363734646600252530ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/command_queue_hw_bdw_plus.inl" #include "opencl/source/command_queue/enqueue_resource_barrier.h" #include "enqueue_init_dispatch_globals.h" namespace NEO { typedef SKLFamily Family; static auto gfxCore = IGFX_GEN9_CORE; template class CommandQueueHw; template <> void populateFactoryTable>() { extern CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE]; commandQueueFactory[gfxCore] = CommandQueueHw::create; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/command_stream_receiver_simulated_common_hw_gen9.cpp000066400000000000000000000004721363734646600334220ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_stream/command_stream_receiver_simulated_common_hw_bdw_plus.inl" namespace NEO { typedef SKLFamily Family; template class CommandStreamReceiverSimulatedCommonHw; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/device_enqueue.h000066400000000000000000000023071363734646600241400ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "../execution_model/device_enqueue.h" #define WA_PROFILING_PREEMPTION 1 #define WA_SCHEDULER_PREEMPTION 1 #define WA_KERNEL_PREEMPTION 1 #define WA_ARB_CHECK_AFTER_MSF 1 #define WA_MI_ATOMIC_BEFORE_MEDIA_ID_LOAD 1 #define OCLRT_GPGPU_WALKER_CMD_DEVICE_CMD_G9 (15 * sizeof(uint)) #define OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G9 (6 * sizeof(uint)) #define OCLRT_LOAD_REGISTER_IMM_CMD_G9 (3 * sizeof(uint)) #define OCLRT_GPGPU_WALKER_CMD_DEVICE_CMD_G9_DWORD_OFFSET (15) #define OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G9_DWORD_OFFSET (6) // Changed: MediaVFE state cmd size removal #define SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE_GEN9 (OCLRT_SIZEOF_MEDIA_STATE_FLUSH + OCLRT_SIZEOF_MI_ARB_CHECK + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G9 + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G9 + OCLRT_SIZEOF_MI_ATOMIC_CMD + OCLRT_SIZEOF_MEDIA_INTERFACE_DESCRIPTOR_LOAD_DEVICE_CMD + OCLRT_GPGPU_WALKER_CMD_DEVICE_CMD_G9 + OCLRT_SIZEOF_MEDIA_STATE_FLUSH + OCLRT_SIZEOF_MI_ARB_CHECK + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G9 + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G9 + CS_PREFETCH_SIZE) #define SECOND_LEVEL_BUFFER_NUMBER_OF_ENQUEUES_GEN9 (128)compute-runtime-20.13.16352/opencl/source/gen9/device_queue_gen9.cpp000066400000000000000000000040541363734646600250730ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/source/device_queue/device_queue_hw_bdw_plus.inl" #include "opencl/source/device_queue/device_queue_hw_profiling.inl" namespace NEO { typedef SKLFamily Family; static auto gfxCore = IGFX_GEN9_CORE; template <> void populateFactoryTable>() { extern DeviceQueueCreateFunc deviceQueueFactory[IGFX_MAX_CORE]; deviceQueueFactory[gfxCore] = DeviceQueueHw::create; } template <> size_t DeviceQueueHw::getWaCommandsSize() { return sizeof(Family::MI_ARB_CHECK) + sizeof(Family::MI_ATOMIC) + sizeof(Family::MI_ARB_CHECK) + sizeof(Family::PIPE_CONTROL) + sizeof(Family::PIPE_CONTROL); } template <> void DeviceQueueHw::addArbCheckCmdWa() { auto arbCheck = slbCS.getSpaceForCmd(); *arbCheck = Family::cmdInitArbCheck; } template <> void DeviceQueueHw::addMiAtomicCmdWa(uint64_t atomicOpPlaceholder) { auto miAtomic = slbCS.getSpaceForCmd(); *miAtomic = Family::cmdInitAtomic; miAtomic->setAtomicOpcode(Family::MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_8B_INCREMENT); miAtomic->setReturnDataControl(0x1); miAtomic->setCsStall(0x1); miAtomic->setDataSize(Family::MI_ATOMIC::DATA_SIZE::DATA_SIZE_QWORD); miAtomic->setMemoryAddress(static_cast(atomicOpPlaceholder & 0x0000FFFFFFFFULL)); miAtomic->setMemoryAddressHigh(static_cast((atomicOpPlaceholder >> 32) & 0x0000FFFFFFFFULL)); } template <> void DeviceQueueHw::addLriCmdWa(bool setArbCheck) {} template <> void DeviceQueueHw::addPipeControlCmdWa(bool isNoopCmd) { auto pipeControl = slbCS.getSpaceForCmd(); if (isNoopCmd) memset(pipeControl, 0x0, sizeof(Family::PIPE_CONTROL)); else initPipeControl(pipeControl); } template class DeviceQueueHw; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/enable_family_full_ocl_gen9.cpp000066400000000000000000000023131363734646600270720ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" #include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h" #include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/source/event/perf_counter.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/sampler/sampler.h" #include namespace NEO { typedef SKLFamily Family; struct EnableOCLGen9 { EnableOCLGen9() { populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); } }; static EnableOCLGen9 enable; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/experimental_command_buffer_gen9.cpp000066400000000000000000000021001363734646600301420ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/experimental_command_buffer.h" #include "shared/source/command_stream/experimental_command_buffer.inl" #include "shared/source/helpers/hw_helper.h" namespace NEO { typedef SKLFamily GfxFamily; template void ExperimentalCommandBuffer::injectBufferStart(LinearStream &parentStream, size_t cmdBufferOffset); template size_t ExperimentalCommandBuffer::getRequiredInjectionSize() noexcept; template size_t ExperimentalCommandBuffer::programExperimentalCommandBuffer(); template size_t ExperimentalCommandBuffer::getTotalExperimentalSize() noexcept; template void ExperimentalCommandBuffer::addTimeStampPipeControl(); template size_t ExperimentalCommandBuffer::getTimeStampPipeControlSize() noexcept; template void ExperimentalCommandBuffer::addExperimentalCommands(); template size_t ExperimentalCommandBuffer::getExperimentalCommandsSize() noexcept; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/gpgpu_walker_gen9.cpp000066400000000000000000000050551363734646600251210ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds_base.h" #include "opencl/source/command_queue/gpgpu_walker_bdw_plus.inl" #include "opencl/source/command_queue/hardware_interface_bdw_plus.inl" namespace NEO { template <> void GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(NEO::LinearStream *pCommandStream, const Kernel &kernel, bool disablePerfMode) { if (disablePerfMode) { if (kernel.getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) { // Set bit L3SQC_BIT_LQSC_RO_PERF_DIS in L3SQC_REG4 GpgpuWalkerHelper::addAluReadModifyWriteRegister(pCommandStream, L3SQC_REG4, AluRegisters::OPCODE_OR, L3SQC_BIT_LQSC_RO_PERF_DIS); } } else { if (kernel.getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) { // Add PIPE_CONTROL with CS_Stall to wait till GPU finishes its work typedef typename SKLFamily::PIPE_CONTROL PIPE_CONTROL; auto pCmd = reinterpret_cast(pCommandStream->getSpace(sizeof(PIPE_CONTROL))); *pCmd = SKLFamily::cmdInitPipeControl; pCmd->setCommandStreamerStallEnable(true); // Clear bit L3SQC_BIT_LQSC_RO_PERF_DIS in L3SQC_REG4 GpgpuWalkerHelper::addAluReadModifyWriteRegister(pCommandStream, L3SQC_REG4, AluRegisters::OPCODE_AND, ~L3SQC_BIT_LQSC_RO_PERF_DIS); } } } template <> size_t GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(const Kernel *pKernel) { typedef typename SKLFamily::MI_LOAD_REGISTER_REG MI_LOAD_REGISTER_REG; typedef typename SKLFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; typedef typename SKLFamily::PIPE_CONTROL PIPE_CONTROL; typedef typename SKLFamily::MI_MATH MI_MATH; typedef typename SKLFamily::MI_MATH_ALU_INST_INLINE MI_MATH_ALU_INST_INLINE; size_t n = 0; if (pKernel->getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) { n += sizeof(PIPE_CONTROL) + (2 * sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_LOAD_REGISTER_IMM) + sizeof(PIPE_CONTROL) + sizeof(MI_MATH) + NUM_ALU_INST_FOR_READ_MODIFY_WRITE * sizeof(MI_MATH_ALU_INST_INLINE)) * 2; // For 2 WADisableLSQCROPERFforOCL WAs } return n; } template class HardwareInterface; template class GpgpuWalkerHelper; template struct EnqueueOperation; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/gtpin_setup_gen9.cpp000066400000000000000000000013261363734646600247700ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/gtpin/gtpin_hw_helper.h" #include "opencl/source/gtpin/gtpin_hw_helper.inl" #include "ocl_igc_shared/gtpin/gtpin_ocl_interface.h" namespace NEO { extern GTPinHwHelper *gtpinHwHelperFactory[IGFX_MAX_CORE]; typedef SKLFamily Family; static const auto gfxFamily = IGFX_GEN9_CORE; template <> uint32_t GTPinHwHelperHw::getGenVersion() { return gtpin::GTPIN_GEN_9; } template class GTPinHwHelperHw; struct GTPinEnableGen9 { GTPinEnableGen9() { gtpinHwHelperFactory[gfxFamily] = >PinHwHelperHw::get(); } }; static GTPinEnableGen9 gtpinEnable; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/hardware_commands_helper_gen9.cpp000066400000000000000000000010471363734646600274440ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/hardware_commands_helper.inl" #include "opencl/source/helpers/hardware_commands_helper_base.inl" #include namespace NEO { template <> bool HardwareCommandsHelper::isPipeControlWArequired(const HardwareInfo &hwInfo) { return true; } template struct HardwareCommandsHelper; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/hw_helper_gen9.cpp000066400000000000000000000022561363734646600244070ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/flat_batch_buffer_helper_hw.inl" #include "shared/source/helpers/hw_helper_bdw_plus.inl" #include "opencl/source/aub/aub_helper_bdw_plus.inl" namespace NEO { typedef SKLFamily Family; template <> SipKernelType HwHelperHw::getSipKernelType(bool debuggingActive) { if (!debuggingActive) { return SipKernelType::Csr; } return SipKernelType::DbgCsrLocal; } template <> void MemorySynchronizationCommands::addPipeControlWA(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo) { auto pCmd = static_cast(commandStream.getSpace(sizeof(Family::PIPE_CONTROL))); *pCmd = Family::cmdInitPipeControl; pCmd->setCommandStreamerStallEnable(true); } template <> uint32_t HwHelperHw::getMetricsLibraryGenId() const { return static_cast(MetricsLibraryApi::ClientGen::Gen9); } template class AubHelperHw; template class HwHelperHw; template class FlatBatchBufferHelperHw; template struct MemorySynchronizationCommands; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/hw_info_bxt.inl000066400000000000000000000206021363734646600240110ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/memory_manager/memory_constants.h" #include "opencl/source/aub_mem_dump/aub_services.h" #include "engine_node.h" namespace NEO { const char *HwMapper::abbreviation = "bxt"; bool isSimulationBXT(unsigned short deviceId) { switch (deviceId) { case IBXT_A_DEVICE_F0_ID: case IBXT_C_DEVICE_F0_ID: return true; } return false; }; const PLATFORM BXT::platform = { IGFX_BROXTON, PCH_UNKNOWN, IGFX_GEN9_CORE, IGFX_GEN9_CORE, PLATFORM_MOBILE, // default init 0, // usDeviceID 0, // usRevId. 0 sets the stepping to A0 0, // usDeviceID_PCH 0, // usRevId_PCH GTTYPE_UNDEFINED}; const RuntimeCapabilityTable BXT::capabilityTable{ EngineDirectSubmissionInitVec{ {aub_stream::ENGINE_RCS, {true, true}}}, // directSubmissionEngines {0, 0, 0, false, false, false}, // kmdNotifyProperties MemoryConstants::max48BitAddress, // gpuAddressSpace 52.083, // defaultProfilingTimerResolution MemoryConstants::pageSize, // requiredPreemptionSurfaceSize &isSimulationBXT, // isSimulation PreemptionMode::MidThread, // defaultPreemptionMode aub_stream::ENGINE_RCS, // defaultEngineType 0, // maxRenderFrequency 12, // clVersionSupport CmdServicesMemTraceVersion::DeviceValues::Bxt, // aubDeviceId 0, // extraQuantityThreadsPerEU 64, // slmSize sizeof(BXT::GRF), // grfSize false, // blitterOperationsSupported false, // ftrSupportsInteger64BitAtomics true, // ftrSupportsFP64 true, // ftrSupports64BitMath false, // ftrSvm true, // ftrSupportsCoherency true, // ftrSupportsVmeAvcTextureSampler false, // ftrSupportsVmeAvcPreemption false, // ftrRenderCompressedBuffers false, // ftrRenderCompressedImages false, // ftr64KBpages true, // instrumentationEnabled false, // forceStatelessCompilationFor32Bit "lp", // platformType true, // sourceLevelDebuggerSupported true, // supportsVme false, // supportCacheFlushAfterWalker true, // supportsImages false, // supportsDeviceEnqueue true // hostPtrTrackingEnabled }; WorkaroundTable BXT::workaroundTable = {}; FeatureTable BXT::featureTable = {}; void BXT::setupFeatureAndWorkaroundTable(HardwareInfo *hwInfo) { PLATFORM *platform = &hwInfo->platform; FeatureTable *featureTable = &hwInfo->featureTable; WorkaroundTable *workaroundTable = &hwInfo->workaroundTable; featureTable->ftrGpGpuMidBatchPreempt = true; featureTable->ftrGpGpuThreadGroupLevelPreempt = true; featureTable->ftrL3IACoherency = true; featureTable->ftrVEBOX = true; featureTable->ftrULT = true; featureTable->ftrGpGpuMidThreadLevelPreempt = true; featureTable->ftr3dMidBatchPreempt = true; featureTable->ftr3dObjectLevelPreempt = true; featureTable->ftrPerCtxtPreemptionGranularityControl = true; featureTable->ftrLCIA = true; featureTable->ftrPPGTT = true; featureTable->ftrIA32eGfxPTEs = true; featureTable->ftrDisplayYTiling = true; featureTable->ftrTranslationTable = true; featureTable->ftrUserModeTranslationTable = true; featureTable->ftrEnableGuC = true; featureTable->ftrFbc = true; featureTable->ftrFbc2AddressTranslation = true; featureTable->ftrFbcBlitterTracking = true; featureTable->ftrFbcCpuTracking = true; featureTable->ftrTileY = true; if (platform->usRevId >= 3) { featureTable->ftrGttCacheInvalidation = true; } workaroundTable->waLLCCachingUnsupported = true; workaroundTable->waMsaa8xTileYDepthPitchAlignment = true; workaroundTable->waFbcLinearSurfaceStride = true; workaroundTable->wa4kAlignUVOffsetNV12LinearSurface = true; workaroundTable->waEnablePreemptionGranularityControlByUMD = true; workaroundTable->waSendMIFLUSHBeforeVFE = true; workaroundTable->waForcePcBbFullCfgRestore = true; workaroundTable->waReportPerfCountUseGlobalContextID = true; workaroundTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = true; } const HardwareInfo BXT_1x2x6::hwInfo = { &BXT::platform, &BXT::featureTable, &BXT::workaroundTable, &BXT_1x2x6::gtSystemInfo, BXT::capabilityTable, }; GT_SYSTEM_INFO BXT_1x2x6::gtSystemInfo = {0}; void BXT_1x2x6::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * BXT::threadsPerEu; gtSysInfo->SliceCount = 1; gtSysInfo->L3CacheSizeInKb = 384; gtSysInfo->L3BankCount = 1; gtSysInfo->MaxFillRate = 8; gtSysInfo->TotalVsThreads = 112; gtSysInfo->TotalHsThreads = 112; gtSysInfo->TotalDsThreads = 112; gtSysInfo->TotalGsThreads = 112; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = BXT::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = BXT::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = BXT::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo BXT_1x3x6::hwInfo = { &BXT::platform, &BXT::featureTable, &BXT::workaroundTable, &BXT_1x3x6::gtSystemInfo, BXT::capabilityTable, }; GT_SYSTEM_INFO BXT_1x3x6::gtSystemInfo = {0}; void BXT_1x3x6::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * BXT::threadsPerEu; gtSysInfo->SliceCount = 1; gtSysInfo->L3CacheSizeInKb = 384; gtSysInfo->L3BankCount = 1; gtSysInfo->MaxFillRate = 8; gtSysInfo->TotalVsThreads = 112; gtSysInfo->TotalHsThreads = 112; gtSysInfo->TotalDsThreads = 112; gtSysInfo->TotalGsThreads = 112; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = BXT::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = BXT::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = BXT::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo BXT::hwInfo = BXT_1x3x6::hwInfo; const uint64_t BXT::defaultHardwareInfoConfig = 0x100030006; void setupBXTHardwareInfoImpl(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable, uint64_t hwInfoConfig) { if (hwInfoConfig == 0x100020006) { BXT_1x2x6::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x100030006) { BXT_1x3x6::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x0) { // Default config BXT_1x3x6::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else { UNRECOVERABLE_IF(true); } } void (*BXT::setupHardwareInfo)(HardwareInfo *, bool, uint64_t) = setupBXTHardwareInfoImpl; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/hw_info_cfl.inl000066400000000000000000000276361363734646600237760ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/memory_manager/memory_constants.h" #include "opencl/source/aub_mem_dump/aub_services.h" #include "engine_node.h" namespace NEO { const char *HwMapper::abbreviation = "cfl"; bool isSimulationCFL(unsigned short deviceId) { return false; }; const PLATFORM CFL::platform = { IGFX_COFFEELAKE, PCH_UNKNOWN, IGFX_GEN9_CORE, IGFX_GEN9_CORE, PLATFORM_NONE, // default init 0, // usDeviceID 0, // usRevId. 0 sets the stepping to A0 0, // usDeviceID_PCH 0, // usRevId_PCH GTTYPE_UNDEFINED}; const RuntimeCapabilityTable CFL::capabilityTable{ EngineDirectSubmissionInitVec{ {aub_stream::ENGINE_RCS, {true, true}}}, // directSubmissionEngines {0, 0, 0, false, false, false}, // kmdNotifyProperties MemoryConstants::max48BitAddress, // gpuAddressSpace 83.333, // defaultProfilingTimerResolution MemoryConstants::pageSize, // requiredPreemptionSurfaceSize &isSimulationCFL, // isSimulation PreemptionMode::MidThread, // defaultPreemptionMode aub_stream::ENGINE_RCS, // defaultEngineType 0, // maxRenderFrequency 21, // clVersionSupport CmdServicesMemTraceVersion::DeviceValues::Cfl, // aubDeviceId 0, // extraQuantityThreadsPerEU 64, // slmSize sizeof(CFL::GRF), // grfSize false, // blitterOperationsSupported true, // ftrSupportsInteger64BitAtomics true, // ftrSupportsFP64 true, // ftrSupports64BitMath true, // ftrSvm true, // ftrSupportsCoherency true, // ftrSupportsVmeAvcTextureSampler false, // ftrSupportsVmeAvcPreemption false, // ftrRenderCompressedBuffers false, // ftrRenderCompressedImages true, // ftr64KBpages true, // instrumentationEnabled true, // forceStatelessCompilationFor32Bit "core", // platformType true, // sourceLevelDebuggerSupported true, // supportsVme false, // supportCacheFlushAfterWalker true, // supportsImages true, // supportsDeviceEnqueue true // hostPtrTrackingEnabled }; WorkaroundTable CFL::workaroundTable = {}; FeatureTable CFL::featureTable = {}; void CFL::setupFeatureAndWorkaroundTable(HardwareInfo *hwInfo) { FeatureTable *featureTable = &hwInfo->featureTable; WorkaroundTable *workaroundTable = &hwInfo->workaroundTable; featureTable->ftrGpGpuMidBatchPreempt = true; featureTable->ftrGpGpuThreadGroupLevelPreempt = true; featureTable->ftrL3IACoherency = true; featureTable->ftrVEBOX = true; featureTable->ftrGpGpuMidThreadLevelPreempt = true; featureTable->ftr3dMidBatchPreempt = true; featureTable->ftr3dObjectLevelPreempt = true; featureTable->ftrPerCtxtPreemptionGranularityControl = true; featureTable->ftrPPGTT = true; featureTable->ftrSVM = true; featureTable->ftrIA32eGfxPTEs = true; featureTable->ftrDisplayYTiling = true; featureTable->ftrTranslationTable = true; featureTable->ftrUserModeTranslationTable = true; featureTable->ftrEnableGuC = true; featureTable->ftrFbc = true; featureTable->ftrFbc2AddressTranslation = true; featureTable->ftrFbcBlitterTracking = true; featureTable->ftrFbcCpuTracking = true; featureTable->ftrTileY = true; workaroundTable->waEnablePreemptionGranularityControlByUMD = true; workaroundTable->waSendMIFLUSHBeforeVFE = true; workaroundTable->waReportPerfCountUseGlobalContextID = true; workaroundTable->waMsaa8xTileYDepthPitchAlignment = true; workaroundTable->waLosslessCompressionSurfaceStride = true; workaroundTable->waFbcLinearSurfaceStride = true; workaroundTable->wa4kAlignUVOffsetNV12LinearSurface = true; workaroundTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = true; } const HardwareInfo CFL_1x2x6::hwInfo = { &CFL::platform, &CFL::featureTable, &CFL::workaroundTable, &CFL_1x2x6::gtSystemInfo, CFL::capabilityTable, }; GT_SYSTEM_INFO CFL_1x2x6::gtSystemInfo = {0}; void CFL_1x2x6::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * CFL::threadsPerEu; gtSysInfo->SliceCount = 1; gtSysInfo->L3CacheSizeInKb = 384; gtSysInfo->L3BankCount = 2; gtSysInfo->MaxFillRate = 8; gtSysInfo->TotalVsThreads = 336; gtSysInfo->TotalHsThreads = 336; gtSysInfo->TotalDsThreads = 336; gtSysInfo->TotalGsThreads = 336; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = CFL::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = CFL::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = CFL::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo CFL_1x3x6::hwInfo = { &CFL::platform, &CFL::featureTable, &CFL::workaroundTable, &CFL_1x3x6::gtSystemInfo, CFL::capabilityTable, }; GT_SYSTEM_INFO CFL_1x3x6::gtSystemInfo = {0}; void CFL_1x3x6::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * CFL::threadsPerEu; gtSysInfo->SliceCount = 1; gtSysInfo->L3CacheSizeInKb = 768; gtSysInfo->L3BankCount = 4; gtSysInfo->MaxFillRate = 8; gtSysInfo->TotalVsThreads = 336; gtSysInfo->TotalHsThreads = 336; gtSysInfo->TotalDsThreads = 336; gtSysInfo->TotalGsThreads = 336; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = CFL::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = CFL::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = CFL::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo CFL_1x3x8::hwInfo = { &CFL::platform, &CFL::featureTable, &CFL::workaroundTable, &CFL_1x3x8::gtSystemInfo, CFL::capabilityTable, }; GT_SYSTEM_INFO CFL_1x3x8::gtSystemInfo = {0}; void CFL_1x3x8::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * CFL::threadsPerEu; gtSysInfo->SliceCount = 1; gtSysInfo->L3CacheSizeInKb = 768; gtSysInfo->L3BankCount = 4; gtSysInfo->MaxFillRate = 8; gtSysInfo->TotalVsThreads = 336; gtSysInfo->TotalHsThreads = 336; gtSysInfo->TotalDsThreads = 336; gtSysInfo->TotalGsThreads = 336; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = CFL::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = CFL::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = CFL::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo CFL_2x3x8::hwInfo = { &CFL::platform, &CFL::featureTable, &CFL::workaroundTable, &CFL_2x3x8::gtSystemInfo, CFL::capabilityTable, }; GT_SYSTEM_INFO CFL_2x3x8::gtSystemInfo = {0}; void CFL_2x3x8::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * CFL::threadsPerEu; gtSysInfo->SliceCount = 2; gtSysInfo->L3CacheSizeInKb = 1536; gtSysInfo->L3BankCount = 8; gtSysInfo->MaxFillRate = 16; gtSysInfo->TotalVsThreads = 336; gtSysInfo->TotalHsThreads = 336; gtSysInfo->TotalDsThreads = 336; gtSysInfo->TotalGsThreads = 336; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = CFL::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = CFL::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = CFL::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo CFL_3x3x8::hwInfo = { &CFL::platform, &CFL::featureTable, &CFL::workaroundTable, &CFL_3x3x8::gtSystemInfo, CFL::capabilityTable, }; GT_SYSTEM_INFO CFL_3x3x8::gtSystemInfo = {0}; void CFL_3x3x8::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * CFL::threadsPerEu; gtSysInfo->SliceCount = 3; gtSysInfo->L3CacheSizeInKb = 2304; gtSysInfo->L3BankCount = 12; gtSysInfo->MaxFillRate = 24; gtSysInfo->TotalVsThreads = 336; gtSysInfo->TotalHsThreads = 336; gtSysInfo->TotalDsThreads = 336; gtSysInfo->TotalGsThreads = 336; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = CFL::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = CFL::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = CFL::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo CFL::hwInfo = CFL_1x3x6::hwInfo; const uint64_t CFL::defaultHardwareInfoConfig = 0x100030006; void setupCFLHardwareInfoImpl(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable, uint64_t hwInfoConfig) { if (hwInfoConfig == 0x100030008) { CFL_1x3x8::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x200030008) { CFL_2x3x8::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x300030008) { CFL_3x3x8::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x100020006) { CFL_1x2x6::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x100030006) { CFL_1x3x6::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x0) { // Default config CFL_1x3x6::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else { UNRECOVERABLE_IF(true); } } void (*CFL::setupHardwareInfo)(HardwareInfo *, bool, uint64_t) = setupCFLHardwareInfoImpl; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/hw_info_gen9.cpp000066400000000000000000000007041363734646600240570ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #ifdef SUPPORT_BXT #include "hw_info_bxt.inl" #endif #ifdef SUPPORT_CFL #include "hw_info_cfl.inl" #endif #ifdef SUPPORT_GLK #include "hw_info_glk.inl" #endif #ifdef SUPPORT_KBL #include "hw_info_kbl.inl" #endif #ifdef SUPPORT_SKL #include "hw_info_skl.inl" #endif namespace NEO { const char *GfxFamilyMapper::name = "Gen9"; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/hw_info_glk.inl000066400000000000000000000200151363734646600237670ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/memory_manager/memory_constants.h" #include "opencl/source/aub_mem_dump/aub_services.h" #include "engine_node.h" namespace NEO { const char *HwMapper::abbreviation = "glk"; bool isSimulationGLK(unsigned short deviceId) { return false; }; const PLATFORM GLK::platform = { IGFX_GEMINILAKE, PCH_UNKNOWN, IGFX_GEN9_CORE, IGFX_GEN9_CORE, PLATFORM_MOBILE, // default init 0, // usDeviceID 0, // usRevId. 0 sets the stepping to A0 0, // usDeviceID_PCH 0, // usRevId_PCH GTTYPE_UNDEFINED}; const RuntimeCapabilityTable GLK::capabilityTable{ EngineDirectSubmissionInitVec{ {aub_stream::ENGINE_RCS, {true, true}}}, // directSubmissionEngines {30000, 0, 0, true, false, false}, // kmdNotifyProperties MemoryConstants::max48BitAddress, // gpuAddressSpace 52.083, // defaultProfilingTimerResolution MemoryConstants::pageSize, // requiredPreemptionSurfaceSize &isSimulationGLK, // isSimulation PreemptionMode::MidThread, // defaultPreemptionMode aub_stream::ENGINE_RCS, // defaultEngineType 0, // maxRenderFrequency 12, // clVersionSupport CmdServicesMemTraceVersion::DeviceValues::Glk, // aubDeviceId 0, // extraQuantityThreadsPerEU 64, // slmSize sizeof(GLK::GRF), // grfSize false, // blitterOperationsSupported false, // ftrSupportsInteger64BitAtomics true, // ftrSupportsFP64 true, // ftrSupports64BitMath false, // ftrSvm true, // ftrSupportsCoherency true, // ftrSupportsVmeAvcTextureSampler false, // ftrSupportsVmeAvcPreemption false, // ftrRenderCompressedBuffers false, // ftrRenderCompressedImages false, // ftr64KBpages true, // instrumentationEnabled false, // forceStatelessCompilationFor32Bit "lp", // platformType true, // sourceLevelDebuggerSupported true, // supportsVme false, // supportCacheFlushAfterWalker true, // supportsImages false, // supportsDeviceEnqueue true // hostPtrTrackingEnabled }; WorkaroundTable GLK::workaroundTable = {}; FeatureTable GLK::featureTable = {}; void GLK::setupFeatureAndWorkaroundTable(HardwareInfo *hwInfo) { FeatureTable *featureTable = &hwInfo->featureTable; WorkaroundTable *workaroundTable = &hwInfo->workaroundTable; featureTable->ftrGpGpuMidBatchPreempt = true; featureTable->ftrGpGpuThreadGroupLevelPreempt = true; featureTable->ftrL3IACoherency = true; featureTable->ftrGpGpuMidThreadLevelPreempt = true; featureTable->ftr3dMidBatchPreempt = true; featureTable->ftr3dObjectLevelPreempt = true; featureTable->ftrPerCtxtPreemptionGranularityControl = true; featureTable->ftrLCIA = true; featureTable->ftrPPGTT = true; featureTable->ftrIA32eGfxPTEs = true; featureTable->ftrTranslationTable = true; featureTable->ftrUserModeTranslationTable = true; featureTable->ftrEnableGuC = true; featureTable->ftrTileMappedResource = true; featureTable->ftrULT = true; featureTable->ftrAstcHdr2D = true; featureTable->ftrAstcLdr2D = true; featureTable->ftrTileY = true; workaroundTable->waLLCCachingUnsupported = true; workaroundTable->waMsaa8xTileYDepthPitchAlignment = true; workaroundTable->waFbcLinearSurfaceStride = true; workaroundTable->wa4kAlignUVOffsetNV12LinearSurface = true; workaroundTable->waEnablePreemptionGranularityControlByUMD = true; workaroundTable->waSendMIFLUSHBeforeVFE = true; workaroundTable->waForcePcBbFullCfgRestore = true; workaroundTable->waReportPerfCountUseGlobalContextID = true; workaroundTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = true; } const HardwareInfo GLK_1x3x6::hwInfo = { &GLK::platform, &GLK::featureTable, &GLK::workaroundTable, &GLK_1x3x6::gtSystemInfo, GLK::capabilityTable, }; GT_SYSTEM_INFO GLK_1x3x6::gtSystemInfo = {0}; void GLK_1x3x6::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * GLK::threadsPerEu; gtSysInfo->SliceCount = 1; gtSysInfo->L3CacheSizeInKb = 384; gtSysInfo->L3BankCount = 2; gtSysInfo->MaxFillRate = 8; gtSysInfo->TotalVsThreads = 112; gtSysInfo->TotalHsThreads = 112; gtSysInfo->TotalDsThreads = 112; gtSysInfo->TotalGsThreads = 112; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = GLK::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = GLK::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = GLK::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo GLK_1x2x6::hwInfo = { &GLK::platform, &GLK::featureTable, &GLK::workaroundTable, &GLK_1x2x6::gtSystemInfo, GLK::capabilityTable, }; GT_SYSTEM_INFO GLK_1x2x6::gtSystemInfo = {0}; void GLK_1x2x6::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * GLK::threadsPerEu; gtSysInfo->SliceCount = 1; gtSysInfo->L3CacheSizeInKb = 384; gtSysInfo->L3BankCount = 2; gtSysInfo->MaxFillRate = 8; gtSysInfo->TotalVsThreads = 112; gtSysInfo->TotalHsThreads = 112; gtSysInfo->TotalDsThreads = 112; gtSysInfo->TotalGsThreads = 112; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = GLK::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = GLK::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = GLK::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo GLK::hwInfo = GLK_1x3x6::hwInfo; const uint64_t GLK::defaultHardwareInfoConfig = 0x100030006; void setupGLKHardwareInfoImpl(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable, uint64_t hwInfoConfig) { if (hwInfoConfig == 0x100020006) { GLK_1x2x6::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x100030006) { GLK_1x3x6::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x0) { // Default config GLK_1x3x6::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else { UNRECOVERABLE_IF(true); } } void (*GLK::setupHardwareInfo)(HardwareInfo *, bool, uint64_t) = setupGLKHardwareInfoImpl; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/hw_info_kbl.inl000066400000000000000000000303201363734646600237620ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/memory_manager/memory_constants.h" #include "opencl/source/aub_mem_dump/aub_services.h" #include "engine_node.h" namespace NEO { const char *HwMapper::abbreviation = "kbl"; bool isSimulationKBL(unsigned short deviceId) { return false; }; const PLATFORM KBL::platform = { IGFX_KABYLAKE, PCH_UNKNOWN, IGFX_GEN9_CORE, IGFX_GEN9_CORE, PLATFORM_NONE, // default init 0, // usDeviceID 0, // usRevId. 0 sets the stepping to A0 0, // usDeviceID_PCH 0, // usRevId_PCH GTTYPE_UNDEFINED}; const RuntimeCapabilityTable KBL::capabilityTable{ EngineDirectSubmissionInitVec{ {aub_stream::ENGINE_RCS, {true, true}}}, // directSubmissionEngines {0, 0, 0, false, false, false}, // kmdNotifyProperties MemoryConstants::max48BitAddress, // gpuAddressSpace 83.333, // defaultProfilingTimerResolution MemoryConstants::pageSize, // requiredPreemptionSurfaceSize &isSimulationKBL, // isSimulation PreemptionMode::MidThread, // defaultPreemptionMode aub_stream::ENGINE_RCS, // defaultEngineType 0, // maxRenderFrequency 21, // clVersionSupport CmdServicesMemTraceVersion::DeviceValues::Kbl, // aubDeviceId 0, // extraQuantityThreadsPerEU 64, // slmSize sizeof(KBL::GRF), // grfSize false, // blitterOperationsSupported true, // ftrSupportsInteger64BitAtomics true, // ftrSupportsFP64 true, // ftrSupports64BitMath true, // ftrSvm true, // ftrSupportsCoherency true, // ftrSupportsVmeAvcTextureSampler false, // ftrSupportsVmeAvcPreemption false, // ftrRenderCompressedBuffers false, // ftrRenderCompressedImages true, // ftr64KBpages true, // instrumentationEnabled true, // forceStatelessCompilationFor32Bit "core", // platformType true, // sourceLevelDebuggerSupported true, // supportsVme false, // supportCacheFlushAfterWalker true, // supportsImages true, // supportsDeviceEnqueue true // hostPtrTrackingEnabled }; WorkaroundTable KBL::workaroundTable = {}; FeatureTable KBL::featureTable = {}; void KBL::setupFeatureAndWorkaroundTable(HardwareInfo *hwInfo) { PLATFORM *platform = &hwInfo->platform; FeatureTable *featureTable = &hwInfo->featureTable; WorkaroundTable *workaroundTable = &hwInfo->workaroundTable; featureTable->ftrGpGpuMidBatchPreempt = true; featureTable->ftrGpGpuThreadGroupLevelPreempt = true; featureTable->ftrL3IACoherency = true; featureTable->ftrVEBOX = true; featureTable->ftrGpGpuMidThreadLevelPreempt = true; featureTable->ftr3dMidBatchPreempt = true; featureTable->ftr3dObjectLevelPreempt = true; featureTable->ftrPerCtxtPreemptionGranularityControl = true; featureTable->ftrPPGTT = true; featureTable->ftrSVM = true; featureTable->ftrIA32eGfxPTEs = true; featureTable->ftrDisplayYTiling = true; featureTable->ftrTranslationTable = true; featureTable->ftrUserModeTranslationTable = true; featureTable->ftrEnableGuC = true; featureTable->ftrFbc = true; featureTable->ftrFbc2AddressTranslation = true; featureTable->ftrFbcBlitterTracking = true; featureTable->ftrFbcCpuTracking = true; featureTable->ftrTileY = true; workaroundTable->waEnablePreemptionGranularityControlByUMD = true; workaroundTable->waSendMIFLUSHBeforeVFE = true; workaroundTable->waReportPerfCountUseGlobalContextID = true; workaroundTable->waMsaa8xTileYDepthPitchAlignment = true; workaroundTable->waLosslessCompressionSurfaceStride = true; workaroundTable->waFbcLinearSurfaceStride = true; workaroundTable->wa4kAlignUVOffsetNV12LinearSurface = true; workaroundTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = true; if (platform->usRevId <= 0x6) { workaroundTable->waDisableLSQCROPERFforOCL = true; workaroundTable->waEncryptedEdramOnlyPartials = true; } if (platform->usRevId <= 0x8) { workaroundTable->waForcePcBbFullCfgRestore = true; } } const HardwareInfo KBL_1x2x6::hwInfo = { &KBL::platform, &KBL::featureTable, &KBL::workaroundTable, &KBL_1x2x6::gtSystemInfo, KBL::capabilityTable, }; GT_SYSTEM_INFO KBL_1x2x6::gtSystemInfo = {0}; void KBL_1x2x6::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * KBL::threadsPerEu; gtSysInfo->SliceCount = 1; gtSysInfo->L3CacheSizeInKb = 384; gtSysInfo->L3BankCount = 2; gtSysInfo->MaxFillRate = 8; gtSysInfo->TotalVsThreads = 336; gtSysInfo->TotalHsThreads = 336; gtSysInfo->TotalDsThreads = 336; gtSysInfo->TotalGsThreads = 336; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = KBL::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = KBL::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = KBL::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo KBL_1x3x6::hwInfo = { &KBL::platform, &KBL::featureTable, &KBL::workaroundTable, &KBL_1x3x6::gtSystemInfo, KBL::capabilityTable, }; GT_SYSTEM_INFO KBL_1x3x6::gtSystemInfo = {0}; void KBL_1x3x6::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * KBL::threadsPerEu; gtSysInfo->SliceCount = 1; gtSysInfo->L3CacheSizeInKb = 768; gtSysInfo->L3BankCount = 4; gtSysInfo->MaxFillRate = 8; gtSysInfo->TotalVsThreads = 336; gtSysInfo->TotalHsThreads = 336; gtSysInfo->TotalDsThreads = 336; gtSysInfo->TotalGsThreads = 336; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = KBL::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = KBL::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = KBL::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo KBL_1x3x8::hwInfo = { &KBL::platform, &KBL::featureTable, &KBL::workaroundTable, &KBL_1x3x8::gtSystemInfo, KBL::capabilityTable, }; GT_SYSTEM_INFO KBL_1x3x8::gtSystemInfo = {0}; void KBL_1x3x8::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * KBL::threadsPerEu; gtSysInfo->SliceCount = 1; gtSysInfo->L3CacheSizeInKb = 768; gtSysInfo->L3BankCount = 4; gtSysInfo->MaxFillRate = 8; gtSysInfo->TotalVsThreads = 336; gtSysInfo->TotalHsThreads = 336; gtSysInfo->TotalDsThreads = 336; gtSysInfo->TotalGsThreads = 336; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = KBL::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = KBL::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = KBL::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo KBL_2x3x8::hwInfo = { &KBL::platform, &KBL::featureTable, &KBL::workaroundTable, &KBL_2x3x8::gtSystemInfo, KBL::capabilityTable, }; GT_SYSTEM_INFO KBL_2x3x8::gtSystemInfo = {0}; void KBL_2x3x8::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * KBL::threadsPerEu; gtSysInfo->SliceCount = 2; gtSysInfo->L3CacheSizeInKb = 1536; gtSysInfo->L3BankCount = 8; gtSysInfo->MaxFillRate = 16; gtSysInfo->TotalVsThreads = 336; gtSysInfo->TotalHsThreads = 336; gtSysInfo->TotalDsThreads = 336; gtSysInfo->TotalGsThreads = 336; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = KBL::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = KBL::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = KBL::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo KBL_3x3x8::hwInfo = { &KBL::platform, &KBL::featureTable, &KBL::workaroundTable, &KBL_3x3x8::gtSystemInfo, KBL::capabilityTable, }; GT_SYSTEM_INFO KBL_3x3x8::gtSystemInfo = {0}; void KBL_3x3x8::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * KBL::threadsPerEu; gtSysInfo->SliceCount = 3; gtSysInfo->L3CacheSizeInKb = 2304; gtSysInfo->L3BankCount = 12; gtSysInfo->MaxFillRate = 23; gtSysInfo->TotalVsThreads = 336; gtSysInfo->TotalHsThreads = 336; gtSysInfo->TotalDsThreads = 336; gtSysInfo->TotalGsThreads = 336; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = KBL::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = KBL::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = KBL::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo KBL::hwInfo = KBL_1x3x6::hwInfo; const uint64_t KBL::defaultHardwareInfoConfig = 0x100030006; void setupKBLHardwareInfoImpl(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable, uint64_t hwInfoConfig) { if (hwInfoConfig == 0x100030008) { KBL_1x3x8::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x200030008) { KBL_2x3x8::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x300030008) { KBL_3x3x8::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x100020006) { KBL_1x2x6::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x100030006) { KBL_1x3x6::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x0) { // Default config KBL_1x3x6::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else { UNRECOVERABLE_IF(true); } } void (*KBL::setupHardwareInfo)(HardwareInfo *, bool, uint64_t) = setupKBLHardwareInfoImpl; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/hw_info_skl.inl000066400000000000000000000316731363734646600240170ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/memory_manager/memory_constants.h" #include "opencl/source/aub_mem_dump/aub_services.h" #include "engine_node.h" namespace NEO { const char *HwMapper::abbreviation = "skl"; bool isSimulationSKL(unsigned short deviceId) { switch (deviceId) { case ISKL_GT0_DESK_DEVICE_F0_ID: case ISKL_GT1_DESK_DEVICE_F0_ID: case ISKL_GT2_DESK_DEVICE_F0_ID: case ISKL_GT3_DESK_DEVICE_F0_ID: case ISKL_GT4_DESK_DEVICE_F0_ID: return true; } return false; }; const PLATFORM SKL::platform = { IGFX_SKYLAKE, PCH_UNKNOWN, IGFX_GEN9_CORE, IGFX_GEN9_CORE, PLATFORM_NONE, // default init 0, // usDeviceID 9, // usRevId. 0 sets the stepping to A0 0, // usDeviceID_PCH 0, // usRevId_PCH GTTYPE_UNDEFINED}; const RuntimeCapabilityTable SKL::capabilityTable{ EngineDirectSubmissionInitVec{ {aub_stream::ENGINE_RCS, {true, true}}}, // directSubmissionEngines {0, 0, 0, false, false, false}, // kmdNotifyProperties MemoryConstants::max48BitAddress, // gpuAddressSpace 83.333, // defaultProfilingTimerResolution MemoryConstants::pageSize, // requiredPreemptionSurfaceSize &isSimulationSKL, // isSimulation PreemptionMode::MidThread, // defaultPreemptionMode aub_stream::ENGINE_RCS, // defaultEngineType 0, // maxRenderFrequency 21, // clVersionSupport CmdServicesMemTraceVersion::DeviceValues::Skl, // aubDeviceId 0, // extraQuantityThreadsPerEU 64, // slmSize sizeof(SKL::GRF), // grfSize false, // blitterOperationsSupported true, // ftrSupportsInteger64BitAtomics true, // ftrSupportsFP64 true, // ftrSupports64BitMath true, // ftrSvm true, // ftrSupportsCoherency true, // ftrSupportsVmeAvcTextureSampler false, // ftrSupportsVmeAvcPreemption false, // ftrRenderCompressedBuffers false, // ftrRenderCompressedImages true, // ftr64KBpages true, // instrumentationEnabled true, // forceStatelessCompilationFor32Bit "core", // platformType true, // sourceLevelDebuggerSupported true, // supportsVme false, // supportCacheFlushAfterWalker true, // supportsImages true, // supportsDeviceEnqueue true // hostPtrTrackingEnabled }; WorkaroundTable SKL::workaroundTable = {}; FeatureTable SKL::featureTable = {}; void SKL::setupFeatureAndWorkaroundTable(HardwareInfo *hwInfo) { FeatureTable *featureTable = &hwInfo->featureTable; WorkaroundTable *workaroundTable = &hwInfo->workaroundTable; featureTable->ftrGpGpuMidBatchPreempt = true; featureTable->ftrGpGpuThreadGroupLevelPreempt = true; featureTable->ftrL3IACoherency = true; featureTable->ftrGpGpuMidThreadLevelPreempt = true; featureTable->ftr3dMidBatchPreempt = true; featureTable->ftr3dObjectLevelPreempt = true; featureTable->ftrPerCtxtPreemptionGranularityControl = true; featureTable->ftrPPGTT = true; featureTable->ftrSVM = true; featureTable->ftrIA32eGfxPTEs = true; featureTable->ftrDisplayYTiling = true; featureTable->ftrTranslationTable = true; featureTable->ftrUserModeTranslationTable = true; featureTable->ftrEnableGuC = true; featureTable->ftrFbc = true; featureTable->ftrFbc2AddressTranslation = true; featureTable->ftrFbcBlitterTracking = true; featureTable->ftrFbcCpuTracking = true; featureTable->ftrVcs2 = featureTable->ftrGT3 || featureTable->ftrGT4; featureTable->ftrVEBOX = true; featureTable->ftrSingleVeboxSlice = featureTable->ftrGT1 || featureTable->ftrGT2; featureTable->ftrTileY = true; workaroundTable->waEnablePreemptionGranularityControlByUMD = true; workaroundTable->waSendMIFLUSHBeforeVFE = true; workaroundTable->waReportPerfCountUseGlobalContextID = true; workaroundTable->waDisableLSQCROPERFforOCL = true; workaroundTable->waMsaa8xTileYDepthPitchAlignment = true; workaroundTable->waLosslessCompressionSurfaceStride = true; workaroundTable->waFbcLinearSurfaceStride = true; workaroundTable->wa4kAlignUVOffsetNV12LinearSurface = true; workaroundTable->waEncryptedEdramOnlyPartials = true; workaroundTable->waDisableEdramForDisplayRT = true; workaroundTable->waForcePcBbFullCfgRestore = true; workaroundTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = true; if ((1 << hwInfo->platform.usRevId) & 0x0eu) { workaroundTable->waCompressedResourceRequiresConstVA21 = true; } if ((1 << hwInfo->platform.usRevId) & 0x0fu) { workaroundTable->waDisablePerCtxtPreemptionGranularityControl = true; workaroundTable->waModifyVFEStateAfterGPGPUPreemption = true; } if ((1 << hwInfo->platform.usRevId) & 0x3f) { workaroundTable->waCSRUncachable = true; } } const HardwareInfo SKL_1x2x6::hwInfo = { &SKL::platform, &SKL::featureTable, &SKL::workaroundTable, &SKL_1x2x6::gtSystemInfo, SKL::capabilityTable, }; GT_SYSTEM_INFO SKL_1x2x6::gtSystemInfo = {0}; void SKL_1x2x6::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * SKL::threadsPerEu; gtSysInfo->SliceCount = 1; gtSysInfo->L3CacheSizeInKb = 384; gtSysInfo->L3BankCount = 2; gtSysInfo->MaxFillRate = 8; gtSysInfo->TotalVsThreads = 336; gtSysInfo->TotalHsThreads = 336; gtSysInfo->TotalDsThreads = 336; gtSysInfo->TotalGsThreads = 336; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = SKL::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = SKL::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = SKL::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo SKL_1x3x6::hwInfo = { &SKL::platform, &SKL::featureTable, &SKL::workaroundTable, &SKL_1x3x6::gtSystemInfo, SKL::capabilityTable, }; GT_SYSTEM_INFO SKL_1x3x6::gtSystemInfo = {0}; void SKL_1x3x6::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * SKL::threadsPerEu; gtSysInfo->SliceCount = 1; gtSysInfo->L3CacheSizeInKb = 768; gtSysInfo->L3BankCount = 4; gtSysInfo->MaxFillRate = 8; gtSysInfo->TotalVsThreads = 336; gtSysInfo->TotalHsThreads = 336; gtSysInfo->TotalDsThreads = 336; gtSysInfo->TotalGsThreads = 336; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = SKL::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = SKL::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = SKL::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo SKL_1x3x8::hwInfo = { &SKL::platform, &SKL::featureTable, &SKL::workaroundTable, &SKL_1x3x8::gtSystemInfo, SKL::capabilityTable, }; GT_SYSTEM_INFO SKL_1x3x8::gtSystemInfo = {0}; void SKL_1x3x8::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * SKL::threadsPerEu; gtSysInfo->SliceCount = 1; gtSysInfo->L3CacheSizeInKb = 768; gtSysInfo->L3BankCount = 4; gtSysInfo->MaxFillRate = 8; gtSysInfo->TotalVsThreads = 336; gtSysInfo->TotalHsThreads = 336; gtSysInfo->TotalDsThreads = 336; gtSysInfo->TotalGsThreads = 336; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = SKL::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = SKL::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = SKL::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo SKL_2x3x8::hwInfo = { &SKL::platform, &SKL::featureTable, &SKL::workaroundTable, &SKL_2x3x8::gtSystemInfo, SKL::capabilityTable, }; GT_SYSTEM_INFO SKL_2x3x8::gtSystemInfo = {0}; void SKL_2x3x8::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * SKL::threadsPerEu; gtSysInfo->SliceCount = 2; gtSysInfo->L3CacheSizeInKb = 1536; gtSysInfo->L3BankCount = 8; gtSysInfo->MaxFillRate = 16; gtSysInfo->TotalVsThreads = 336; gtSysInfo->TotalHsThreads = 336; gtSysInfo->TotalDsThreads = 336; gtSysInfo->TotalGsThreads = 336; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = SKL::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = SKL::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = SKL::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo SKL_3x3x8::hwInfo = { &SKL::platform, &SKL::featureTable, &SKL::workaroundTable, &SKL_3x3x8::gtSystemInfo, SKL::capabilityTable, }; GT_SYSTEM_INFO SKL_3x3x8::gtSystemInfo = {0}; void SKL_3x3x8::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; gtSysInfo->ThreadCount = gtSysInfo->EUCount * SKL::threadsPerEu; gtSysInfo->SliceCount = 3; gtSysInfo->L3CacheSizeInKb = 2304; gtSysInfo->L3BankCount = 12; gtSysInfo->MaxFillRate = 24; gtSysInfo->TotalVsThreads = 336; gtSysInfo->TotalHsThreads = 336; gtSysInfo->TotalDsThreads = 336; gtSysInfo->TotalGsThreads = 336; gtSysInfo->TotalPsThreadsWindowerRange = 64; gtSysInfo->CsrSizeInMb = 8; gtSysInfo->MaxEuPerSubSlice = SKL::maxEuPerSubslice; gtSysInfo->MaxSlicesSupported = SKL::maxSlicesSupported; gtSysInfo->MaxSubSlicesSupported = SKL::maxSubslicesSupported; gtSysInfo->IsL3HashModeEnabled = false; gtSysInfo->IsDynamicallyPopulated = false; if (setupFeatureTableAndWorkaroundTable) { setupFeatureAndWorkaroundTable(hwInfo); } }; const HardwareInfo SKL::hwInfo = SKL_1x3x8::hwInfo; const uint64_t SKL::defaultHardwareInfoConfig = 0x000100030008; void setupSKLHardwareInfoImpl(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable, uint64_t hwInfoConfig) { if (hwInfoConfig == 0x100030008) { SKL_1x3x8::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x200030008) { SKL_2x3x8::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x300030008) { SKL_3x3x8::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x100020006) { SKL_1x2x6::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x100030006) { SKL_1x3x6::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else if (hwInfoConfig == 0x0) { // Default config SKL_1x3x8::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); } else { UNRECOVERABLE_IF(true); } } void (*SKL::setupHardwareInfo)(HardwareInfo *, bool, uint64_t) = setupSKLHardwareInfoImpl; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/image_gen9.cpp000066400000000000000000000024251363734646600235120ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/mem_obj/image.inl" #include namespace NEO { typedef SKLFamily Family; static auto gfxCore = IGFX_GEN9_CORE; template void ImageHw::setMediaSurfaceRotation(void *memory) { using MEDIA_SURFACE_STATE = typename GfxFamily::MEDIA_SURFACE_STATE; using SURFACE_FORMAT = typename MEDIA_SURFACE_STATE::SURFACE_FORMAT; auto surfaceState = reinterpret_cast(memory); surfaceState->setRotation(MEDIA_SURFACE_STATE::ROTATION_NO_ROTATION_OR_0_DEGREE); surfaceState->setXOffset(0); surfaceState->setYOffset(0); } template void ImageHw::setSurfaceMemoryObjectControlStateIndexToMocsTable(void *memory, uint32_t value) { using MEDIA_SURFACE_STATE = typename GfxFamily::MEDIA_SURFACE_STATE; using SURFACE_FORMAT = typename MEDIA_SURFACE_STATE::SURFACE_FORMAT; auto surfaceState = reinterpret_cast(memory); surfaceState->setSurfaceMemoryObjectControlStateIndexToMocsTables(value); } #include "opencl/source/mem_obj/image_factory_init.inl" } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/linux/000077500000000000000000000000001363734646600221365ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/gen9/linux/command_stream_receiver_gen9.cpp000066400000000000000000000011501363734646600304360ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.inl" #include "opencl/source/os_interface/linux/device_command_stream.inl" #include "opencl/source/os_interface/linux/drm_command_stream.inl" #include "opencl/source/os_interface/linux/drm_command_stream_bdw_plus.inl" namespace NEO { template class DeviceCommandStreamReceiver; template class DrmCommandStreamReceiver; template class CommandStreamReceiverWithAUBDump>; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/linux/hw_info_config_bxt.inl000066400000000000000000000042751363734646600265050ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/linux/os_interface.h" namespace NEO { template <> int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) { if (nullptr == osIface) { return 0; } Drm *drm = osIface->get()->getDrm(); FeatureTable *featureTable = &hwInfo->featureTable; GT_SYSTEM_INFO *gtSystemInfo = &hwInfo->gtSystemInfo; gtSystemInfo->SliceCount = 1; gtSystemInfo->VEBoxInfo.Instances.Bits.VEBox0Enabled = 1; gtSystemInfo->VEBoxInfo.IsValid = true; int enabled = 0; int retVal = drm->getEnabledPooledEu(enabled); if (retVal == 0) { featureTable->ftrPooledEuEnabled = (enabled != 0); } if (enabled) { int num = 0; retVal = drm->getMinEuInPool(num); if (retVal == 0 && ((num == 3) || (num == 6) || (num == 9))) { gtSystemInfo->EuCountPerPoolMin = static_cast(num); } //in case of failure or not getting right values, fallback to default else { if (gtSystemInfo->SubSliceCount == 3) { // Native 3x6, PooledEU 2x9 gtSystemInfo->EuCountPerPoolMin = 9; } else { // Native 3x6 fused down to 2x6, PooledEU worst case 3+9 gtSystemInfo->EuCountPerPoolMin = 3; } } gtSystemInfo->EuCountPerPoolMax = gtSystemInfo->EUCount - gtSystemInfo->EuCountPerPoolMin; } auto &kmdNotifyProperties = hwInfo->capabilityTable.kmdNotifyProperties; kmdNotifyProperties.enableKmdNotify = true; kmdNotifyProperties.enableQuickKmdSleep = true; kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits = true; kmdNotifyProperties.delayKmdNotifyMicroseconds = 50000; kmdNotifyProperties.delayQuickKmdSleepMicroseconds = 5000; kmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds = 200000; return 0; } template class HwInfoConfigHw; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/linux/hw_info_config_cfl.inl000066400000000000000000000026161363734646600264510ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/hw_info_config.h" namespace NEO { template <> int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) { if (nullptr == osIface) { return 0; } GT_SYSTEM_INFO *gtSystemInfo = &hwInfo->gtSystemInfo; if (gtSystemInfo->SubSliceCount > 3) { gtSystemInfo->SliceCount = 2; } else { gtSystemInfo->SliceCount = 1; } gtSystemInfo->VEBoxInfo.Instances.Bits.VEBox0Enabled = true; gtSystemInfo->VEBoxInfo.IsValid = true; if (hwInfo->platform.usDeviceID == ICFL_GT3_ULT_28W_DEVICE_F0_ID || hwInfo->platform.usDeviceID == ICFL_GT3_ULT_15W_DEVICE_F0_ID) { gtSystemInfo->EdramSizeInKb = 64 * 1024; } auto &kmdNotifyProperties = hwInfo->capabilityTable.kmdNotifyProperties; kmdNotifyProperties.enableKmdNotify = true; kmdNotifyProperties.enableQuickKmdSleep = true; kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits = true; kmdNotifyProperties.delayKmdNotifyMicroseconds = 50000; kmdNotifyProperties.delayQuickKmdSleepMicroseconds = 5000; kmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds = 200000; return 0; } template class HwInfoConfigHw; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/linux/hw_info_config_gen9.cpp000066400000000000000000000010031363734646600265340ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/hw_info_config.inl" #include "shared/source/os_interface/hw_info_config_bdw_plus.inl" #ifdef SUPPORT_BXT #include "hw_info_config_bxt.inl" #endif #ifdef SUPPORT_CFL #include "hw_info_config_cfl.inl" #endif #ifdef SUPPORT_GLK #include "hw_info_config_glk.inl" #endif #ifdef SUPPORT_KBL #include "hw_info_config_kbl.inl" #endif #ifdef SUPPORT_SKL #include "hw_info_config_skl.inl" #endif compute-runtime-20.13.16352/opencl/source/gen9/linux/hw_info_config_glk.inl000066400000000000000000000042401363734646600264550ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/linux/os_interface.h" namespace NEO { template <> int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) { if (nullptr == osIface) { return 0; } Drm *drm = osIface->get()->getDrm(); FeatureTable *featureTable = &hwInfo->featureTable; GT_SYSTEM_INFO *gtSystemInfo = &hwInfo->gtSystemInfo; gtSystemInfo->VEBoxInfo.Instances.Bits.VEBox0Enabled = 1; gtSystemInfo->VEBoxInfo.IsValid = true; int enabled = 0; int retVal = drm->getEnabledPooledEu(enabled); if (retVal == 0) { featureTable->ftrPooledEuEnabled = (enabled != 0); } if (enabled) { int num = 0; retVal = drm->getMinEuInPool(num); if (retVal == 0 && ((num == 3) || (num == 6) || (num == 9))) { gtSystemInfo->EuCountPerPoolMin = static_cast(num); } //in case of failure or not getting right values, fallback to default else { if (gtSystemInfo->SubSliceCount == 3) { // Native 3x6, PooledEU 2x9 gtSystemInfo->EuCountPerPoolMin = 9; } else { // Native 3x6 fused down to 2x6, PooledEU worst case 3+9 gtSystemInfo->EuCountPerPoolMin = 3; } } gtSystemInfo->EuCountPerPoolMax = gtSystemInfo->EUCount - gtSystemInfo->EuCountPerPoolMin; } auto &kmdNotifyProperties = hwInfo->capabilityTable.kmdNotifyProperties; kmdNotifyProperties.enableKmdNotify = true; kmdNotifyProperties.enableQuickKmdSleep = true; kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits = true; kmdNotifyProperties.delayKmdNotifyMicroseconds = 50000; kmdNotifyProperties.delayQuickKmdSleepMicroseconds = 5000; kmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds = 200000; return 0; } template class HwInfoConfigHw; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/linux/hw_info_config_kbl.inl000066400000000000000000000026071363734646600264550ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/hw_info_config.h" namespace NEO { template <> int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) { if (nullptr == osIface) { return 0; } GT_SYSTEM_INFO *gtSystemInfo = &hwInfo->gtSystemInfo; if (gtSystemInfo->SubSliceCount > 3) { gtSystemInfo->SliceCount = 2; } else { gtSystemInfo->SliceCount = 1; } gtSystemInfo->VEBoxInfo.Instances.Bits.VEBox0Enabled = 1; gtSystemInfo->VEBoxInfo.IsValid = true; if (hwInfo->platform.usDeviceID == IKBL_GT3_28W_ULT_DEVICE_F0_ID || hwInfo->platform.usDeviceID == IKBL_GT3_15W_ULT_DEVICE_F0_ID) { gtSystemInfo->EdramSizeInKb = 64 * 1024; } auto &kmdNotifyProperties = hwInfo->capabilityTable.kmdNotifyProperties; kmdNotifyProperties.enableKmdNotify = true; kmdNotifyProperties.enableQuickKmdSleep = true; kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits = true; kmdNotifyProperties.delayKmdNotifyMicroseconds = 50000; kmdNotifyProperties.delayQuickKmdSleepMicroseconds = 5000; kmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds = 200000; return 0; } template class HwInfoConfigHw; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/linux/hw_info_config_skl.inl000066400000000000000000000034551363734646600265000ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_cmds.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/hw_info_config.h" namespace NEO { template <> int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) { if (nullptr == osIface) { return 0; } GT_SYSTEM_INFO *gtSystemInfo = &hwInfo->gtSystemInfo; if (gtSystemInfo->SubSliceCount > 3) { gtSystemInfo->SliceCount = 2; } else { gtSystemInfo->SliceCount = 1; } gtSystemInfo->VEBoxInfo.Instances.Bits.VEBox0Enabled = 1; gtSystemInfo->VDBoxInfo.Instances.Bits.VDBox0Enabled = 1; gtSystemInfo->VEBoxInfo.IsValid = true; gtSystemInfo->VDBoxInfo.IsValid = true; if (hwInfo->platform.usDeviceID == ISKL_GT3e_ULT_DEVICE_F0_ID_540 || hwInfo->platform.usDeviceID == ISKL_GT3e_ULT_DEVICE_F0_ID_550 || hwInfo->platform.usDeviceID == ISKL_GT3_MEDIA_SERV_DEVICE_F0_ID) { gtSystemInfo->EdramSizeInKb = 64 * 1024; } if (hwInfo->platform.usDeviceID == ISKL_GT4_HALO_MOBL_DEVICE_F0_ID || hwInfo->platform.usDeviceID == ISKL_GT4_WRK_DEVICE_F0_ID) { gtSystemInfo->EdramSizeInKb = 128 * 1024; } auto &kmdNotifyProperties = hwInfo->capabilityTable.kmdNotifyProperties; kmdNotifyProperties.enableKmdNotify = true; kmdNotifyProperties.enableQuickKmdSleep = true; kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits = true; kmdNotifyProperties.delayKmdNotifyMicroseconds = 50000; kmdNotifyProperties.delayQuickKmdSleepMicroseconds = 5000; kmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds = 200000; return 0; } template class HwInfoConfigHw; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/sampler_gen9.cpp000066400000000000000000000005741363734646600240760ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "opencl/source/sampler/sampler.h" #include "opencl/source/sampler/sampler.inl" namespace NEO { typedef SKLFamily Family; static auto gfxCore = IGFX_GEN9_CORE; #include "opencl/source/sampler/sampler_factory_init.inl" } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/scheduler_builtin_kernel.inl000066400000000000000000000052261363734646600265540ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "scheduler_definitions.h" uint GetPatchValueForSLMSize(uint slMsize) { uint PatchValue = 0; if (slMsize == 0) { PatchValue = 0; } else if (slMsize <= (1 * 1024)) { PatchValue = 1; } else if (slMsize <= (2 * 1024)) { PatchValue = 2; } else if (slMsize <= (4 * 1024)) { PatchValue = 3; } else if (slMsize <= (8 * 1024)) { PatchValue = 4; } else if (slMsize <= (16 * 1024)) { PatchValue = 5; } else if (slMsize <= (32 * 1024)) { PatchValue = 6; } else if (slMsize <= (64 * 1024)) { PatchValue = 7; } return PatchValue; } void NOOPCSStallPipeControl(__global uint *secondaryBatchBuffer, uint dwordOffset, uint pipeControlOffset) { dwordOffset += pipeControlOffset; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; } void PutCSStallPipeControl(__global uint *secondaryBatchBuffer, uint dwordOffset, uint pipeControlOffset) { dwordOffset += pipeControlOffset; //first pipe control doing CS stall secondaryBatchBuffer[dwordOffset] = PIPE_CONTROL_CSTALL_DWORD0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = PIPE_CONTROL_CSTALL_DWORD1; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; //second pipe control, doing actual timestamp write secondaryBatchBuffer[dwordOffset] = PIPE_CONTROL_CSTALL_DWORD0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = PIPE_CONTROL_CSTALL_DWORD1; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; secondaryBatchBuffer[dwordOffset] = 0; dwordOffset++; } compute-runtime-20.13.16352/opencl/source/gen9/scheduler_definitions.h000066400000000000000000000154261363734646600255310ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #define SIZEOF_INTERFACE_DESCRIPTOR_DATA_G9 32 #define NUMBER_OF_INERFACE_DESCRIPTORS 64 #define IDT_BREAKDOWN (NUMBER_OF_INERFACE_DESCRIPTORS - 2) #define INTERFACE_DESCRIPTOR_TABLE_SIZE_G9 (NUMBER_OF_INERFACE_DESCRIPTORS * SIZEOF_INTERFACE_DESCRIPTOR_DATA_G9) #define SIZEOF_COLOR_CALCULATOR_STATE_G9 0xC0 #define OCLRT_SIZEOF_SAMPLER_STATE_G9 (16) #define SIZEOF_COLOR_CALCULATOR_STATE SIZEOF_COLOR_CALCULATOR_STATE_G9 #define SIZEOF_INTERFACE_DESCRIPTOR_DATA SIZEOF_INTERFACE_DESCRIPTOR_DATA_G9 #define INTERFACE_DESCRIPTOR_TABLE_SIZE INTERFACE_DESCRIPTOR_TABLE_SIZE_G9 #define OCLRT_SIZEOF_SAMPLER_STATE OCLRT_SIZEOF_SAMPLER_STATE_G9 #define SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE (SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE_GEN9) #define SECOND_LEVEL_BUFFER_NUMBER_OF_ENQUEUES (SECOND_LEVEL_BUFFER_NUMBER_OF_ENQUEUES_GEN9) #define OCLRT_MEDIA_VFE_STATE_OFFSET (0) #define MEDIA_VFE_STATE_ADDRESS_OFFSET (OCLRT_MEDIA_VFE_STATE_OFFSET + 1) // DWORD OFFSET #define MEDIA_STATE_FLUSH_INITIAL_OFFSET 0 //bits 0-5 of 1st DWORD #define MEDIA_STATE_FLUSH_INITIAL_INTERFACE_DESCRIPTOR_OFFSET (MEDIA_STATE_FLUSH_INITIAL_OFFSET + 1) #define MI_ARB_CHECK_AFTER_MEDIA_STATE_FLUSH_INITIAL_OFFSET (MEDIA_STATE_FLUSH_INITIAL_OFFSET + OCLRT_SIZEOF_MSFLUSH_DWORD) #define MI_ATOMIC_CMD_OFFSET (MI_ARB_CHECK_AFTER_MEDIA_STATE_FLUSH_INITIAL_OFFSET + OCLRT_SIZEOF_MI_ARB_CHECK_DWORD) #define MEDIA_INTERFACE_DESCRIPTOR_LOAD_OFFSET (MI_ATOMIC_CMD_OFFSET + OCLRT_SIZEOF_MI_ATOMIC_CMD_DWORD_OFFSET) // DWORD OFFSET of Interface Descriptor Start Address #define MEDIA_INTERFACE_DESCRIPTOR_LOAD_INTERFACEDESCRIPTORSTARTADDRESS_OFFSET (MEDIA_INTERFACE_DESCRIPTOR_LOAD_OFFSET + 3) #define PIPE_CONTROL_STALL_BEFORE_POSTSYNCOP_START_OFFSET (MEDIA_INTERFACE_DESCRIPTOR_LOAD_OFFSET + OCLRT_SIZEOF_MEDIA_INTERFACE_DESCRIPTOR_LOAD_DEVICE_CMD_DWORD_OFFSET) #define PIPE_CONTROL_FOR_TIMESTAMP_START_OFFSET (PIPE_CONTROL_STALL_BEFORE_POSTSYNCOP_START_OFFSET + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G9_DWORD_OFFSET) #define INTERFACE_DESCRIPTOR_SAMPLER_STATE_TABLE_DWORD 3 #define INTERFACE_DESCRIPTOR_BINDING_TABLE_POINTER_DWORD 4 #define INTERFACE_DESCRIPTOR_CONSTANT_URB_ENTRY_READ_OFFSET 5 #define INTERFACE_DESCRIPTOR_HWTHREADS_NUMBER_DWORD 6 #define INTERFACE_DESCRIPTOR_SLMSIZE_DWORD 6 #define INTERFACE_DESCRIPTOR_HWTHREADS_UPPER_BIT 9 #define SAMPLER_STATE_INDIRECT_STATE_MASK (0x7FFFFC0) #define SAMPLER_STATE_BORDER_COLOR_MASK (0xFFFFFFE0) #define SAMPLER_STATE_DESCRIPTOR_BORDER_COLOR_POINTER_DWORD 2 #define GPGPU_WALKER_OFFSET (PIPE_CONTROL_FOR_TIMESTAMP_START_OFFSET + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G9_DWORD_OFFSET) // DWORD OFFSET of the Interface Descriptor Offset for GPGPU_WALKER // bits 0 - 5 #define GPGPU_WALKER_INTERFACE_DESCRIPTOR_ID_OFFSET (GPGPU_WALKER_OFFSET + 1) // DWORD OFFSET of the Indirect data length Offset for GPGPU_WALKER // bits 0 - 16 #define GPGPU_WALKER_INDIRECT_DATA_LENGTH_OFFSET (GPGPU_WALKER_OFFSET + 2) // DWORD OFFSET of the Indirect Start Address for GPGPU_WALKER #define GPGPU_WALKER_INDIRECT_START_ADDRESS_OFFSET (GPGPU_WALKER_OFFSET + 3) // DWORD OFFSET of the Thread Width Counter Maximum for GPGPU_WALKER // bits 0 - 5 #define GPGPU_WALKER_THREAD_WIDTH_DWORD (GPGPU_WALKER_OFFSET + 4) // DWORD OFFSET of the Thread Height Counter Maximum for GPGPU_WALKER // bits 8 - 13 #define GPGPU_WALKER_THREAD_HEIGHT_DWORD (GPGPU_WALKER_OFFSET + 4) // DWORD OFFSET of the Thread Depth Counter Maximum for GPGPU_WALKER // bits 16 - 21 #define GPGPU_WALKER_THREAD_DEPTH_DWORD (GPGPU_WALKER_OFFSET + 4) // DWORD OFFSET of the SIMD Size for GPGPU_WALKER // bits 30 - 31 #define GPGPU_WALKER_SIMDSIZE_DWORD (GPGPU_WALKER_OFFSET + 4) // DWORD OFFSET of the Starting in X pos for GPGPU_WALKER //bits 0 - 31 #define GPGPU_WALKER_GROUP_ID_START_X (GPGPU_WALKER_OFFSET + 5) // DWORD OFFSET of the X Dimension for GPGPU_WALKER #define GPGPU_WALKER_XDIM_DWORD (GPGPU_WALKER_OFFSET + 7) // DWORD OFFSET of the Starting in Y pos for GPGPU_WALKER //bits 0 - 31 #define GPGPU_WALKER_GROUP_ID_START_Y (GPGPU_WALKER_OFFSET + 8) // DWORD OFFSET of the Y Dimension for GPGPU_WALKER #define GPGPU_WALKER_YDIM_DWORD (GPGPU_WALKER_OFFSET + 10) // DWORD OFFSET of the Starting in Z pos for GPGPU_WALKER //bits 0 - 31 #define GPGPU_WALKER_GROUP_ID_START_Z (GPGPU_WALKER_OFFSET + 11) // DWORD OFFSET of the X Dimension for GPGPU_WALKER #define GPGPU_WALKER_ZDIM_DWORD (GPGPU_WALKER_OFFSET + 12) // DWORD OFFSET of the Right or X Mask for GPGPU_WALKER #define GPGPU_WALKER_XMASK_DWORD (GPGPU_WALKER_OFFSET + 13) // DWORD OFFSET of the Bottom or Y Mask for GPGPU_WALKER #define GPGPU_WALKER_YMASK_DWORD (GPGPU_WALKER_OFFSET + 14) #define MEDIA_STATE_FLUSH_OFFSET (GPGPU_WALKER_OFFSET + OCLRT_GPGPU_WALKER_CMD_DEVICE_CMD_G9_DWORD_OFFSET) //bits 0-5 of 1st DWORD of M_S_F command #define MEDIA_STATE_FLUSH_INTERFACE_DESCRIPTOR_OFFSET (MEDIA_STATE_FLUSH_OFFSET + 1) #define MI_ARB_CHECK_AFTER_MEDIA_STATE_FLUSH_OFFSET (MEDIA_STATE_FLUSH_OFFSET + OCLRT_SIZEOF_MSFLUSH_DWORD) #define PIPE_CONTROL_STALL_BEFORE_POSTSYNCOP_END_OFFSET (MI_ARB_CHECK_AFTER_MEDIA_STATE_FLUSH_OFFSET + OCLRT_SIZEOF_MI_ARB_CHECK_DWORD) #define PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET (PIPE_CONTROL_STALL_BEFORE_POSTSYNCOP_END_OFFSET) #define PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET_TO_PATCH (PIPE_CONTROL_STALL_BEFORE_POSTSYNCOP_END_OFFSET + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G9_DWORD_OFFSET) #define PIPE_CONTROL_POST_SYNC_DWORD 1 #define PIPE_CONTROL_POST_SYNC_START_BIT 14 #define PIPE_CONTROL_POST_SYNC_END_BIT 15 #define PIPE_CONTROL_GENERATE_TIME_STAMP 3 #define PIPE_CONTROL_NO_POSTSYNC_OPERATION 0 #define PIPE_CONTROL_ADDRESS_FIELD_DWORD 2 #define PIPE_CONTROL_PROFILING_START_TIMESTAMP_ADDRESS_OFFSET (PIPE_CONTROL_FOR_TIMESTAMP_START_OFFSET + PIPE_CONTROL_ADDRESS_FIELD_DWORD) //DWORD 2 #define PIPE_CONTROL_GRAPHICS_ADDRESS_START_BIT 2 #define PIPE_CONTROL_GRAPHICS_ADDRESS_END_BIT 31 #define PIPE_CONTROL_GRAPHICS_ADDRESS_HIGH_START_BIT 0 #define PIPE_CONTROL_GRAPHICS_ADDRESS_HIGH_END_BIT 15 #define PIPE_CONTROL_TIME_STAMP_DWORD0 0x7A000004 #define PIPE_CONTROL_TIME_STAMP_DWORD1 0x0010C4A4 #define PIPE_CONTROL_CSTALL_DWORD0 0x7A000004 #define PIPE_CONTROL_CSTALL_DWORD1 0x001004A4 #define PIPE_CONTROL_TAG_WRITE_DWORD0 0x7A000004 #define PIPE_CONTROL_TAG_WRITE_DWORD1 0x001044A4 #define OCLRT_BATCH_BUFFER_BEGIN_CMD_DWORD0 (0x18800101) #if defined WA_LRI_COMMANDS_EXIST #define IMM_LOAD_REGISTER_FOR_ENABLE_PREEMPTION_OFFSET (PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G9_DWORD_OFFSET + OCLRT_PIPE_CONTROL_CMD_DEVICE_CMD_G9_DWORD_OFFSET) #endif #define OCLRT_LOAD_REGISTER_IMM_CMD 0x11000001 #define CTXT_PREMP_DBG_ADDRESS_VALUE 0x2248 #define CTXT_PREMP_ON_MI_ARB_CHECK_ONLY 0x00000100 #define CTXT_PREMP_DEFAULT_VALUE 0x0 #define IMM_LOAD_REGISTER_ADDRESS_DWORD_OFFSET 1 #define IMM_LOAD_REGISTER_VALUE_DWORD_OFFSET 2 compute-runtime-20.13.16352/opencl/source/gen9/state_base_address_gen9.cpp000066400000000000000000000004301363734646600262410ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/state_base_address.h" #include "shared/source/helpers/state_base_address_bdw_plus.inl" namespace NEO { template struct StateBaseAddressHelper; } compute-runtime-20.13.16352/opencl/source/gen9/tbx_command_stream_receiver_gen9.cpp000066400000000000000000000020141363734646600301540ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/helpers/array_count.h" #include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.inl" #include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h" #include "opencl/source/command_stream/tbx_command_stream_receiver_hw.inl" #include "opencl/source/helpers/base_object.h" namespace NEO { typedef SKLFamily Family; static auto gfxCore = IGFX_GEN9_CORE; template <> void populateFactoryTable>() { extern TbxCommandStreamReceiverCreateFunc tbxCommandStreamReceiverFactory[IGFX_MAX_CORE]; UNRECOVERABLE_IF(!isInRange(gfxCore, tbxCommandStreamReceiverFactory)); tbxCommandStreamReceiverFactory[gfxCore] = TbxCommandStreamReceiverHw::create; } template class TbxCommandStreamReceiverHw; template class CommandStreamReceiverWithAUBDump>; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/windows/000077500000000000000000000000001363734646600224715ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/gen9/windows/command_stream_receiver_gen9.cpp000066400000000000000000000010521363734646600307720ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.inl" #include "opencl/source/os_interface/windows/device_command_stream.inl" #include "opencl/source/os_interface/windows/wddm_device_command_stream.inl" namespace NEO { template class DeviceCommandStreamReceiver; template class WddmCommandStreamReceiver; template class CommandStreamReceiverWithAUBDump>; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen9/windows/gmm_callbacks_gen9.cpp000066400000000000000000000005421363734646600266770ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "shared/source/helpers/windows/gmm_callbacks.h" #include "shared/source/helpers/windows/gmm_callbacks.inl" using namespace NEO; template struct DeviceCallbacks; template struct TTCallbacks; compute-runtime-20.13.16352/opencl/source/gen9/windows/hw_info_config_gen9.cpp000066400000000000000000000025501363734646600270770ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/hw_info_config.inl" #include "shared/source/os_interface/hw_info_config_bdw_plus.inl" namespace NEO { #ifdef SUPPORT_BXT template <> int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) { return 0; } template class HwInfoConfigHw; #endif #ifdef SUPPORT_CFL template <> int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) { return 0; } template class HwInfoConfigHw; #endif #ifdef SUPPORT_GLK template <> int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) { return 0; } template class HwInfoConfigHw; #endif #ifdef SUPPORT_KBL template <> int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) { return 0; } template class HwInfoConfigHw; #endif #ifdef SUPPORT_SKL template <> int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) { return 0; } template class HwInfoConfigHw; #endif } // namespace NEO compute-runtime-20.13.16352/opencl/source/gen_common/000077500000000000000000000000001363734646600222565ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/gen_common/CMakeLists.txt000066400000000000000000000005031363734646600250140ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_GEN_COMMON ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_mapper.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_mapper_base.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_GEN_COMMON}) compute-runtime-20.13.16352/opencl/source/gen_common/aub_mapper.h000066400000000000000000000006101363734646600245370ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #ifdef SUPPORT_GEN8 #include "opencl/source/gen8/aub_mapper.h" #endif #ifdef SUPPORT_GEN9 #include "opencl/source/gen9/aub_mapper.h" #endif #ifdef SUPPORT_GEN11 #include "opencl/source/gen11/aub_mapper.h" #endif #ifdef SUPPORT_GEN12LP #include "opencl/source/gen12lp/aub_mapper.h" #endif compute-runtime-20.13.16352/opencl/source/gen_common/aub_mapper_base.h000066400000000000000000000006361363734646600255410ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/completion_stamp.h" #include "opencl/source/aub_mem_dump/aub_mem_dump.h" #include namespace NEO { template struct AUBFamilyMapper { }; using MMIOPair = std::pair; using MMIOList = std::vector; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gtpin/000077500000000000000000000000001363734646600212565ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/gtpin/CMakeLists.txt000066400000000000000000000022561363734646600240230ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(NOT DISABLED_GTPIN_SUPPORT) set(RUNTIME_SRCS_GTPIN ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_callbacks.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_helpers.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_helpers.h ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_hw_helper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_hw_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_hw_helper.inl ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_init.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_init.h ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_notify.h ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_defs.h ) if(WIN32) set(MSVC_DEF_ADDITIONAL_EXPORTS "${MSVC_DEF_ADDITIONAL_EXPORTS}\nGTPin_Init") endif() else() set(RUNTIME_SRCS_GTPIN ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_callback_stubs.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_notify.h ) endif() target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_GTPIN}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_GTPIN ${RUNTIME_SRCS_GTPIN}) set(MSVC_DEF_ADDITIONAL_EXPORTS "${MSVC_DEF_ADDITIONAL_EXPORTS}" PARENT_SCOPE) compute-runtime-20.13.16352/opencl/source/gtpin/gtpin_callback_stubs.cpp000066400000000000000000000016661363734646600261500ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "CL/cl.h" #include namespace NEO { bool isGTPinInitialized = false; void gtpinNotifyContextCreate(cl_context context) { } void gtpinNotifyContextDestroy(cl_context context) { } void gtpinNotifyKernelCreate(cl_kernel kernel) { } void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) { } void gtpinNotifyPreFlushTask(void *pCmdQueue) { } void gtpinNotifyFlushTask(uint32_t flushedTaskCount) { } void gtpinNotifyTaskCompletion(uint32_t completedTaskCount) { } void gtpinNotifyMakeResident(void *pKernel, void *pCommandStreamReceiver) { } void gtpinNotifyUpdateResidencyList(void *pKernel, void *pResidencyVector) { } void gtpinNotifyPlatformShutdown() { } void *gtpinGetIgcInit() { return nullptr; } void setIgcInfo(const void *igcInfo) { } const void *gtpinGetIgcInfo() { return nullptr; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/gtpin/gtpin_callbacks.cpp000066400000000000000000000236721363734646600251140ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/utilities/spinlock.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/gtpin/gtpin_defs.h" #include "opencl/source/gtpin/gtpin_hw_helper.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/program/program.h" #include "CL/cl.h" #include "ocl_igc_shared/gtpin/gtpin_ocl_interface.h" #include #include using namespace gtpin; namespace NEO { extern gtpin::ocl::gtpin_events_t GTPinCallbacks; igc_init_t *pIgcInit = nullptr; std::atomic sequenceCount(1); CommandQueue *pCmdQueueForFlushTask = nullptr; std::deque kernelExecQueue; SpinLock kernelExecQueueLock; void gtpinNotifyContextCreate(cl_context context) { if (isGTPinInitialized) { platform_info_t gtpinPlatformInfo; auto pContext = castToObjectOrAbort(context); auto pDevice = pContext->getDevice(0); UNRECOVERABLE_IF(pDevice == nullptr); GFXCORE_FAMILY genFamily = pDevice->getHardwareInfo().platform.eRenderCoreFamily; GTPinHwHelper >pinHelper = GTPinHwHelper::get(genFamily); gtpinPlatformInfo.gen_version = (gtpin::GTPIN_GEN_VERSION)gtpinHelper.getGenVersion(); gtpinPlatformInfo.device_id = static_cast(pDevice->getHardwareInfo().platform.usDeviceID); (*GTPinCallbacks.onContextCreate)((context_handle_t)context, >pinPlatformInfo, &pIgcInit); } } void gtpinNotifyContextDestroy(cl_context context) { if (isGTPinInitialized) { (*GTPinCallbacks.onContextDestroy)((context_handle_t)context); } } void gtpinNotifyKernelCreate(cl_kernel kernel) { if (nullptr == kernel) { return; } if (isGTPinInitialized) { auto pKernel = castToObjectOrAbort(kernel); size_t gtpinBTI = pKernel->getNumberOfBindingTableStates(); // Enlarge local copy of SSH by 1 SS auto &device = pKernel->getDevice(); GFXCORE_FAMILY genFamily = device.getHardwareInfo().platform.eRenderCoreFamily; GTPinHwHelper >pinHelper = GTPinHwHelper::get(genFamily); if (!gtpinHelper.addSurfaceState(pKernel)) { // Kernel with no SSH or Kernel EM, not supported return; } if (pKernel->isKernelHeapSubstituted()) { // ISA for this kernel was already substituted return; } // Notify GT-Pin that new kernel was created Context *pContext = &(pKernel->getContext()); cl_context context = (cl_context)pContext; auto &kernelInfo = pKernel->getKernelInfo(); instrument_params_in_t paramsIn = {}; paramsIn.kernel_type = GTPIN_KERNEL_TYPE_CS; paramsIn.simd = (GTPIN_SIMD_WIDTH)kernelInfo.getMaxSimdSize(); paramsIn.orig_kernel_binary = (uint8_t *)pKernel->getKernelHeap(); paramsIn.orig_kernel_size = static_cast(pKernel->getKernelHeapSize()); paramsIn.buffer_type = GTPIN_BUFFER_BINDFULL; paramsIn.buffer_desc.BTI = static_cast(gtpinBTI); paramsIn.igc_hash_id = kernelInfo.heapInfo.pKernelHeader->ShaderHashCode; paramsIn.kernel_name = (char *)kernelInfo.name.c_str(); paramsIn.igc_info = kernelInfo.igcInfoForGtpin; paramsIn.debug_data = pKernel->getProgram()->getDebugData(); paramsIn.debug_data_size = static_cast(pKernel->getProgram()->getDebugDataSize()); instrument_params_out_t paramsOut = {0}; (*GTPinCallbacks.onKernelCreate)((context_handle_t)(cl_context)context, ¶msIn, ¶msOut); // Substitute ISA of created kernel with instrumented code pKernel->substituteKernelHeap(paramsOut.inst_kernel_binary, paramsOut.inst_kernel_size); pKernel->setKernelId(paramsOut.kernel_id); } } void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) { if (isGTPinInitialized) { auto pKernel = castToObjectOrAbort(kernel); if (pKernel->getSurfaceStateHeapSize() == 0) { // Kernel with no SSH, not supported return; } Context *pContext = &(pKernel->getContext()); cl_context context = (cl_context)pContext; uint64_t kernelId = pKernel->getKernelId(); command_buffer_handle_t commandBuffer = (command_buffer_handle_t)((uintptr_t)(sequenceCount++)); uint32_t kernelOffset = 0; resource_handle_t resource = 0; // Notify GT-Pin that abstract "command buffer" was created (*GTPinCallbacks.onCommandBufferCreate)((context_handle_t)context, commandBuffer); // Notify GT-Pin that kernel was submited for execution (*GTPinCallbacks.onKernelSubmit)(commandBuffer, kernelId, &kernelOffset, &resource); // Create new record in Kernel Execution Queue describing submited kernel pKernel->setStartOffset(kernelOffset); gtpinkexec_t kExec; kExec.pKernel = pKernel; kExec.gtpinResource = (cl_mem)resource; kExec.commandBuffer = commandBuffer; kExec.pCommandQueue = (CommandQueue *)pCmdQueue; std::unique_lock lock{kernelExecQueueLock}; kernelExecQueue.push_back(kExec); lock.unlock(); // Patch SSH[gtpinBTI] with GT-Pin resource if (!resource) { return; } auto &device = pKernel->getDevice(); GFXCORE_FAMILY genFamily = device.getHardwareInfo().platform.eRenderCoreFamily; GTPinHwHelper >pinHelper = GTPinHwHelper::get(genFamily); size_t gtpinBTI = pKernel->getNumberOfBindingTableStates() - 1; void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI); cl_mem buffer = (cl_mem)resource; auto pBuffer = castToObjectOrAbort(buffer); pBuffer->setArgStateful(pSurfaceState, false, false, false, false); } } void gtpinNotifyPreFlushTask(void *pCmdQueue) { if (isGTPinInitialized) { pCmdQueueForFlushTask = (CommandQueue *)pCmdQueue; } } void gtpinNotifyFlushTask(uint32_t flushedTaskCount) { if (isGTPinInitialized) { std::unique_lock lock{kernelExecQueueLock}; size_t numElems = kernelExecQueue.size(); for (size_t n = 0; n < numElems; n++) { if ((kernelExecQueue[n].pCommandQueue == pCmdQueueForFlushTask) && !kernelExecQueue[n].isTaskCountValid) { // Update record in Kernel Execution Queue with kernel's TC kernelExecQueue[n].isTaskCountValid = true; kernelExecQueue[n].taskCount = flushedTaskCount; break; } } pCmdQueueForFlushTask = nullptr; } } void gtpinNotifyTaskCompletion(uint32_t completedTaskCount) { if (isGTPinInitialized) { std::unique_lock lock{kernelExecQueueLock}; size_t numElems = kernelExecQueue.size(); for (size_t n = 0; n < numElems;) { if (kernelExecQueue[n].isTaskCountValid && (kernelExecQueue[n].taskCount <= completedTaskCount)) { // Notify GT-Pin that execution of "command buffer" was completed (*GTPinCallbacks.onCommandBufferComplete)(kernelExecQueue[n].commandBuffer); // Remove kernel's record from Kernel Execution Queue kernelExecQueue.erase(kernelExecQueue.begin() + n); numElems--; } else { n++; } } } } void gtpinNotifyMakeResident(void *pKernel, void *pCSR) { if (isGTPinInitialized) { std::unique_lock lock{kernelExecQueueLock}; size_t numElems = kernelExecQueue.size(); for (size_t n = 0; n < numElems; n++) { if ((kernelExecQueue[n].pKernel == pKernel) && !kernelExecQueue[n].isResourceResident && kernelExecQueue[n].gtpinResource) { // It's time for kernel to make resident its GT-Pin resource CommandStreamReceiver *pCommandStreamReceiver = reinterpret_cast(pCSR); cl_mem gtpinBuffer = kernelExecQueue[n].gtpinResource; auto pBuffer = castToObjectOrAbort(gtpinBuffer); GraphicsAllocation *pGfxAlloc = pBuffer->getGraphicsAllocation(); pCommandStreamReceiver->makeResident(*pGfxAlloc); kernelExecQueue[n].isResourceResident = true; break; } } } } void gtpinNotifyUpdateResidencyList(void *pKernel, void *pResVec) { if (isGTPinInitialized) { std::unique_lock lock{kernelExecQueueLock}; size_t numElems = kernelExecQueue.size(); for (size_t n = 0; n < numElems; n++) { if ((kernelExecQueue[n].pKernel == pKernel) && !kernelExecQueue[n].isResourceResident && kernelExecQueue[n].gtpinResource) { // It's time for kernel to update its residency list with its GT-Pin resource std::vector *pResidencyVector = (std::vector *)pResVec; cl_mem gtpinBuffer = kernelExecQueue[n].gtpinResource; auto pBuffer = castToObjectOrAbort(gtpinBuffer); GraphicsAllocation *pGfxAlloc = pBuffer->getGraphicsAllocation(); GeneralSurface *pSurface = new GeneralSurface(pGfxAlloc); pResidencyVector->push_back(pSurface); kernelExecQueue[n].isResourceResident = true; break; } } } } void gtpinNotifyPlatformShutdown() { if (isGTPinInitialized) { // Clear Kernel Execution Queue kernelExecQueue.clear(); } } void *gtpinGetIgcInit() { return pIgcInit; } void gtpinSetIgcInit(void *pIgcInitPtr) { pIgcInit = static_cast(pIgcInitPtr); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/gtpin/gtpin_defs.h000066400000000000000000000015001363734646600235450ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/kernel/kernel.h" #include "CL/cl.h" #include "ocl_igc_shared/gtpin/gtpin_ocl_interface.h" namespace NEO { struct GTPinKernelExec { Kernel *pKernel; cl_mem gtpinResource; CommandQueue *pCommandQueue; gtpin::command_buffer_handle_t commandBuffer; uint32_t taskCount; bool isTaskCountValid; bool isResourceResident; GTPinKernelExec() { pKernel = nullptr; gtpinResource = nullptr; pCommandQueue = nullptr; commandBuffer = nullptr; taskCount = 0; isTaskCountValid = false; isResourceResident = false; } }; typedef struct GTPinKernelExec gtpinkexec_t; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gtpin/gtpin_helpers.cpp000066400000000000000000000055271363734646600246360ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "gtpin_helpers.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/validators.h" #include "opencl/source/mem_obj/buffer.h" #include "CL/cl.h" #include "ocl_igc_shared/gtpin/gtpin_ocl_interface.h" using namespace gtpin; namespace NEO { GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinCreateBuffer(context_handle_t context, uint32_t reqSize, resource_handle_t *pResource) { cl_int diag = CL_SUCCESS; Context *pContext = castToObject((cl_context)context); if ((pContext == nullptr) || (pResource == nullptr)) { return GTPIN_DI_ERROR_INVALID_ARGUMENT; } size_t size = alignUp(reqSize, MemoryConstants::cacheLineSize); void *hostPtr = pContext->getMemoryManager()->allocateSystemMemory(size, MemoryConstants::pageSize); if (hostPtr == nullptr) { return GTPIN_DI_ERROR_ALLOCATION_FAILED; } cl_mem buffer = Buffer::create(pContext, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE | CL_MEM_FORCE_SHARED_PHYSICAL_MEMORY_INTEL, size, hostPtr, diag); *pResource = (resource_handle_t)buffer; return GTPIN_DI_SUCCESS; } GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinFreeBuffer(context_handle_t context, resource_handle_t resource) { cl_mem buffer = (cl_mem)resource; Context *pContext = castToObject((cl_context)context); if ((pContext == nullptr) || (buffer == nullptr)) { return GTPIN_DI_ERROR_INVALID_ARGUMENT; } auto pMemObj = castToObject(buffer); if (pMemObj == nullptr) { return GTPIN_DI_ERROR_INVALID_ARGUMENT; } alignedFree(pMemObj->getHostPtr()); pMemObj->release(); return GTPIN_DI_SUCCESS; } GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinMapBuffer(context_handle_t context, resource_handle_t resource, uint8_t **pAddress) { cl_mem buffer = (cl_mem)resource; Context *pContext = castToObject((cl_context)context); if ((pContext == nullptr) || (buffer == nullptr) || (pAddress == nullptr)) { return GTPIN_DI_ERROR_INVALID_ARGUMENT; } auto pMemObj = castToObject(buffer); if (pMemObj == nullptr) { return GTPIN_DI_ERROR_INVALID_ARGUMENT; } *pAddress = (uint8_t *)pMemObj->getHostPtr(); return GTPIN_DI_SUCCESS; } GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinUnmapBuffer(context_handle_t context, resource_handle_t resource) { cl_mem buffer = (cl_mem)resource; Context *pContext = castToObject((cl_context)context); if ((pContext == nullptr) || (buffer == nullptr)) { return GTPIN_DI_ERROR_INVALID_ARGUMENT; } auto pMemObj = castToObject(buffer); if (pMemObj == nullptr) { return GTPIN_DI_ERROR_INVALID_ARGUMENT; } return GTPIN_DI_SUCCESS; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/gtpin/gtpin_helpers.h000066400000000000000000000013621363734646600242740ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "ocl_igc_shared/gtpin/gtpin_driver_common.h" namespace NEO { gtpin::GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinCreateBuffer(gtpin::context_handle_t context, uint32_t size, gtpin::resource_handle_t *pResource); gtpin::GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinFreeBuffer(gtpin::context_handle_t context, gtpin::resource_handle_t resource); gtpin::GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinMapBuffer(gtpin::context_handle_t context, gtpin::resource_handle_t resource, uint8_t **pAddress); gtpin::GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinUnmapBuffer(gtpin::context_handle_t context, gtpin::resource_handle_t resource); } // namespace NEO compute-runtime-20.13.16352/opencl/source/gtpin/gtpin_hw_helper.cpp000066400000000000000000000005231363734646600251400ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/gtpin/gtpin_hw_helper.h" namespace NEO { GTPinHwHelper *gtpinHwHelperFactory[IGFX_MAX_CORE] = {}; GTPinHwHelper >PinHwHelper::get(GFXCORE_FAMILY gfxCore) { return *gtpinHwHelperFactory[gfxCore]; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/gtpin/gtpin_hw_helper.h000066400000000000000000000016051363734646600246070ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/hw_cmds.h" namespace NEO { class Kernel; class GTPinHwHelper { public: static GTPinHwHelper &get(GFXCORE_FAMILY gfxCore); virtual uint32_t getGenVersion() = 0; virtual bool addSurfaceState(Kernel *pKernel) = 0; virtual void *getSurfaceState(Kernel *pKernel, size_t bti) = 0; protected: GTPinHwHelper(){}; }; template class GTPinHwHelperHw : public GTPinHwHelper { public: static GTPinHwHelper &get() { static GTPinHwHelperHw gtpinHwHelper; return gtpinHwHelper; } uint32_t getGenVersion() override; bool addSurfaceState(Kernel *pKernel) override; void *getSurfaceState(Kernel *pKernel, size_t bti) override; private: GTPinHwHelperHw(){}; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/gtpin/gtpin_hw_helper.inl000066400000000000000000000051321363734646600251410ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_cmds.h" #include "shared/source/helpers/string.h" #include "opencl/source/gtpin/gtpin_hw_helper.h" #include "opencl/source/kernel/kernel.h" namespace NEO { template bool GTPinHwHelperHw::addSurfaceState(Kernel *pKernel) { using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; size_t sshSize = pKernel->getSurfaceStateHeapSize(); if ((sshSize == 0) || pKernel->isParentKernel) { // Kernels which do not use SSH or use Execution Model are not supported (yet) return false; } size_t ssSize = sizeof(RENDER_SURFACE_STATE); size_t btsSize = sizeof(BINDING_TABLE_STATE); size_t sizeToEnlarge = ssSize + btsSize; size_t currBTOffset = pKernel->getBindingTableOffset(); size_t currSurfaceStateSize = currBTOffset; char *pSsh = static_cast(pKernel->getSurfaceStateHeap()); char *pNewSsh = new char[sshSize + sizeToEnlarge]; memcpy_s(pNewSsh, sshSize + sizeToEnlarge, pSsh, currSurfaceStateSize); RENDER_SURFACE_STATE *pSS = reinterpret_cast(pNewSsh + currSurfaceStateSize); *pSS = GfxFamily::cmdInitRenderSurfaceState; size_t newSurfaceStateSize = currSurfaceStateSize + ssSize; size_t currBTCount = pKernel->getNumberOfBindingTableStates(); memcpy_s(pNewSsh + newSurfaceStateSize, sshSize + sizeToEnlarge - newSurfaceStateSize, pSsh + currBTOffset, currBTCount * btsSize); BINDING_TABLE_STATE *pNewBTS = reinterpret_cast(pNewSsh + newSurfaceStateSize + currBTCount * btsSize); *pNewBTS = GfxFamily::cmdInitBindingTableState; pNewBTS->setSurfaceStatePointer((uint64_t)currBTOffset); pKernel->resizeSurfaceStateHeap(pNewSsh, sshSize + sizeToEnlarge, currBTCount + 1, newSurfaceStateSize); return true; } template void *GTPinHwHelperHw::getSurfaceState(Kernel *pKernel, size_t bti) { using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; if ((nullptr == pKernel->getSurfaceStateHeap()) || (bti >= pKernel->getNumberOfBindingTableStates())) { return nullptr; } auto *pBts = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), (pKernel->getBindingTableOffset() + bti * sizeof(BINDING_TABLE_STATE)))); auto pSurfaceState = ptrOffset(pKernel->getSurfaceStateHeap(), pBts->getSurfaceStatePointer()); return pSurfaceState; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/gtpin/gtpin_init.cpp000066400000000000000000000036011363734646600241260ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "gtpin_init.h" #include "shared/source/device/device.h" #include "opencl/source/platform/platform.h" #include "CL/cl.h" #include "gtpin_helpers.h" using namespace gtpin; using namespace NEO; namespace NEO { bool isGTPinInitialized = false; gtpin::ocl::gtpin_events_t GTPinCallbacks = {0}; } // namespace NEO GTPIN_DI_STATUS GTPin_Init(gtpin::ocl::gtpin_events_t *pGtpinEvents, driver_services_t *pDriverServices, interface_version_t *pDriverVersion) { if (isGTPinInitialized) { return GTPIN_DI_ERROR_INSTANCE_ALREADY_CREATED; } if (pDriverVersion != nullptr) { // GT-Pin is asking to obtain GT-Pin Interface version that is supported pDriverVersion->common = gtpin::GTPIN_COMMON_INTERFACE_VERSION; pDriverVersion->specific = gtpin::ocl::GTPIN_OCL_INTERFACE_VERSION; if ((pDriverServices == nullptr) || (pGtpinEvents == nullptr)) { return GTPIN_DI_SUCCESS; } } if ((pDriverServices == nullptr) || (pGtpinEvents == nullptr)) { return GTPIN_DI_ERROR_INVALID_ARGUMENT; } if ((pGtpinEvents->onContextCreate == nullptr) || (pGtpinEvents->onContextDestroy == nullptr) || (pGtpinEvents->onKernelCreate == nullptr) || (pGtpinEvents->onKernelSubmit == nullptr) || (pGtpinEvents->onCommandBufferCreate == nullptr) || (pGtpinEvents->onCommandBufferComplete == nullptr)) { return GTPIN_DI_ERROR_INVALID_ARGUMENT; } pDriverServices->bufferAllocate = NEO::gtpinCreateBuffer; pDriverServices->bufferDeallocate = NEO::gtpinFreeBuffer; pDriverServices->bufferMap = NEO::gtpinMapBuffer; pDriverServices->bufferUnMap = NEO::gtpinUnmapBuffer; GTPinCallbacks = *pGtpinEvents; isGTPinInitialized = true; return GTPIN_DI_SUCCESS; } compute-runtime-20.13.16352/opencl/source/gtpin/gtpin_init.h000066400000000000000000000006111363734646600235710ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "ocl_igc_shared/gtpin/gtpin_ocl_interface.h" #ifdef __cplusplus extern "C" { #endif gtpin::GTPIN_DI_STATUS GTPin_Init(gtpin::ocl::gtpin_events_t *pGtpinEvents, gtpin::driver_services_t *pDriverServices, gtpin::interface_version_t *pDriverVersion); #ifdef __cplusplus } #endif compute-runtime-20.13.16352/opencl/source/gtpin/gtpin_notify.h000066400000000000000000000016211363734646600241400ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl.h" #include namespace NEO { extern bool isGTPinInitialized; void gtpinNotifyContextCreate(cl_context context); void gtpinNotifyContextDestroy(cl_context context); void gtpinNotifyKernelCreate(cl_kernel kernel); void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue); void gtpinNotifyPreFlushTask(void *pCmdQueue); void gtpinNotifyFlushTask(uint32_t flushedTaskCount); void gtpinNotifyTaskCompletion(uint32_t completedTaskCount); void gtpinNotifyMakeResident(void *pKernel, void *pCommandStreamReceiver); void gtpinNotifyUpdateResidencyList(void *pKernel, void *pResidencyVector); void gtpinNotifyPlatformShutdown(); inline bool gtpinIsGTPinInitialized() { return isGTPinInitialized; } void *gtpinGetIgcInit(); void gtpinSetIgcInit(void *pIgcInitPtr); } // namespace NEO compute-runtime-20.13.16352/opencl/source/guidelines/000077500000000000000000000000001363734646600222655ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/guidelines/CMakeLists.txt000066400000000000000000000004651363734646600250320ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_CODING_GUIDELINES ${NEO_SOURCE_DIR}/GUIDELINES.md ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_CODING_GUIDELINES}) set_property(GLOBAL PROPERTY RUNTIME_CODING_GUIDELINES ${RUNTIME_CODING_GUIDELINES}) compute-runtime-20.13.16352/opencl/source/helpers/000077500000000000000000000000001363734646600215775ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/helpers/CMakeLists.txt000066400000000000000000000065441363734646600243500ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_HELPERS_BASE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/base_object.cpp ${CMAKE_CURRENT_SOURCE_DIR}/base_object.h ${CMAKE_CURRENT_SOURCE_DIR}/built_ins_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/cl_blit_properties.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/cl_device_helpers.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_device_helpers.h ${CMAKE_CURRENT_SOURCE_DIR}/cl_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/convert_color.h ${CMAKE_CURRENT_SOURCE_DIR}/dispatch_info.cpp ${CMAKE_CURRENT_SOURCE_DIR}/dispatch_info.h ${CMAKE_CURRENT_SOURCE_DIR}/dispatch_info_builder.h ${CMAKE_CURRENT_SOURCE_DIR}/enable_product.inl ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_properties.h ${CMAKE_CURRENT_SOURCE_DIR}/error_mappers.h ${CMAKE_CURRENT_SOURCE_DIR}/get_info_status_mapper.h ${CMAKE_CURRENT_SOURCE_DIR}/gmm_types_converter.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gmm_types_converter.h ${CMAKE_CURRENT_SOURCE_DIR}/hardware_commands_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/hardware_commands_helper.inl ${CMAKE_CURRENT_SOURCE_DIR}/hardware_commands_helper_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/hardware_context_controller.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hardware_context_controller.h ${CMAKE_CURRENT_SOURCE_DIR}/helper_options.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/mem_properties_parser_helper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mem_properties_parser_helper.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/memory_properties_flags_helpers.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory_properties_flags_helpers.h ${CMAKE_CURRENT_SOURCE_DIR}/memory_properties_flags_helpers_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/mipmap.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mipmap.h ${CMAKE_CURRENT_SOURCE_DIR}/per_thread_data.cpp ${CMAKE_CURRENT_SOURCE_DIR}/per_thread_data.h ${CMAKE_CURRENT_SOURCE_DIR}/properties_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/properties_helper.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/queue_helpers.cpp ${CMAKE_CURRENT_SOURCE_DIR}/queue_helpers.h ${CMAKE_CURRENT_SOURCE_DIR}/sampler_helpers.h ${CMAKE_CURRENT_SOURCE_DIR}/string_helpers.h ${CMAKE_CURRENT_SOURCE_DIR}/surface_formats.cpp ${CMAKE_CURRENT_SOURCE_DIR}/surface_formats.h ${CMAKE_CURRENT_SOURCE_DIR}/task_information.cpp ${CMAKE_CURRENT_SOURCE_DIR}/task_information.h ${CMAKE_CURRENT_SOURCE_DIR}/task_information.inl ${CMAKE_CURRENT_SOURCE_DIR}/uint16_avx2.h ${CMAKE_CURRENT_SOURCE_DIR}/uint16_sse4.h ${CMAKE_CURRENT_SOURCE_DIR}/validators.cpp ${CMAKE_CURRENT_SOURCE_DIR}/validators.h ) set(RUNTIME_SRCS_HELPERS_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/windows/gmm_callbacks_tgllp_plus.inl ${CMAKE_CURRENT_SOURCE_DIR}/windows/kmd_notify_properties_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/windows/gl_helper.h ) set(RUNTIME_SRCS_HELPERS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/linux/kmd_notify_properties_linux.cpp ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_HELPERS_BASE}) if(WIN32) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_HELPERS_WINDOWS}) else() target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_HELPERS_LINUX}) endif() set_property(GLOBAL PROPERTY RUNTIME_SRCS_HELPERS_LINUX ${RUNTIME_SRCS_HELPERS_LINUX}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_HELPERS_BASE ${RUNTIME_SRCS_HELPERS_BASE}) add_subdirectories() compute-runtime-20.13.16352/opencl/source/helpers/base_object.cpp000066400000000000000000000003211363734646600245370ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/base_object.h" namespace NEO { std::thread::id invalidThreadID; } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/base_object.h000066400000000000000000000135341363734646600242160ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/abort.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/utilities/reference_tracked_object.h" #include "opencl/source/api/dispatch.h" #include "CL/cl.h" #include #include #include #include #include namespace NEO { #if defined(__clang__) #define NO_SANITIZE __attribute__((no_sanitize("undefined"))) #else #define NO_SANITIZE #endif template struct OpenCLObjectMapper { }; template using DerivedType_t = typename OpenCLObjectMapper::DerivedType; template NO_SANITIZE inline DerivedType *castToObject(typename DerivedType::BaseType *object) { if (object == nullptr) { return nullptr; } auto derivedObject = static_cast(object); if (((derivedObject->getMagic() & DerivedType::maskMagic) == DerivedType::objectMagic) && (derivedObject->dispatch.icdDispatch == &icdGlobalDispatchTable)) { return derivedObject; } return nullptr; } template inline DerivedType *castToObjectOrAbort(typename DerivedType::BaseType *object) { auto derivedObject = castToObject(object); if (derivedObject == nullptr) { abortExecution(); } else { return derivedObject; } } template inline const DerivedType *castToObject(const typename DerivedType::BaseType *object) { return castToObject(const_cast(object)); } template inline DerivedType *castToObject(const void *object) { cl_mem clMem = const_cast(static_cast(object)); return castToObject(clMem); } extern std::thread::id invalidThreadID; class ConditionVariableWithCounter { public: ConditionVariableWithCounter() { waitersCount = 0; } template void wait(Args &&... args) { ++waitersCount; cond.wait(std::forward(args)...); --waitersCount; } void notify_one() { // NOLINT cond.notify_one(); } uint32_t peekNumWaiters() { return waitersCount.load(); } private: std::atomic_uint waitersCount; std::condition_variable cond; }; template class TakeOwnershipWrapper { public: TakeOwnershipWrapper(T &obj) : obj(obj) { lock(); } TakeOwnershipWrapper(T &obj, bool lockImmediately) : obj(obj) { if (lockImmediately) { lock(); } } ~TakeOwnershipWrapper() { unlock(); } void unlock() { if (locked) { obj.releaseOwnership(); locked = false; } } void lock() { if (!locked) { obj.takeOwnership(); locked = true; } } private: T &obj; bool locked = false; }; // This class should act as a base class for all CL objects. It will handle the // MT safe and reference things for every CL object. template class BaseObject : public B, public ReferenceTrackedObject> { public: typedef BaseObject ThisType; typedef B BaseType; typedef DerivedType_t DerivedType; const static cl_ulong maskMagic = 0xFFFFFFFFFFFFFFFFLL; const static cl_ulong deadMagic = 0xFFFFFFFFFFFFFFFFLL; BaseObject(const BaseObject &) = delete; BaseObject &operator=(const BaseObject &) = delete; protected: cl_long magic; mutable std::mutex mtx; mutable ConditionVariableWithCounter cond; mutable std::thread::id owner; mutable uint32_t recursiveOwnageCounter = 0; BaseObject() : magic(DerivedType::objectMagic) { this->incRefApi(); } ~BaseObject() override { magic = deadMagic; } bool isValid() const { return (magic & DerivedType::maskMagic) == DerivedType::objectMagic; } void convertToInternalObject() { this->incRefInternal(); this->decRefApi(); } public: NO_SANITIZE cl_ulong getMagic() const { return this->magic; } virtual void retain() { DEBUG_BREAK_IF(!isValid()); this->incRefApi(); } virtual unique_ptr_if_unused release() { DEBUG_BREAK_IF(!isValid()); return this->decRefApi(); } cl_int getReference() const { DEBUG_BREAK_IF(!isValid()); return this->getRefApiCount(); } MOCKABLE_VIRTUAL void takeOwnership() const { DEBUG_BREAK_IF(!isValid()); std::unique_lock theLock(mtx); std::thread::id self = std::this_thread::get_id(); if (owner == invalidThreadID) { owner = self; return; } if (owner == self) { ++recursiveOwnageCounter; return; } cond.wait(theLock, [&] { return owner == invalidThreadID; }); owner = self; recursiveOwnageCounter = 0; } MOCKABLE_VIRTUAL void releaseOwnership() const { DEBUG_BREAK_IF(!isValid()); std::unique_lock theLock(mtx); if (hasOwnership() == false) { DEBUG_BREAK_IF(true); return; } if (recursiveOwnageCounter > 0) { --recursiveOwnageCounter; return; } owner = invalidThreadID; cond.notify_one(); } // checks whether current thread owns object mutex bool hasOwnership() const { DEBUG_BREAK_IF(!isValid()); return (owner == std::this_thread::get_id()); } ConditionVariableWithCounter &getCond() { return this->cond; } }; // Method called by global factory enabler template void populateFactoryTable(); } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/built_ins_helper.cpp000066400000000000000000000022621363734646600256340ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/built_ins_helper.h" #include "shared/source/device/device.h" #include "opencl/source/program/program.h" namespace NEO { const SipKernel &initSipKernel(SipKernelType type, Device &device) { return device.getBuiltIns()->getSipKernel(type, device); } Program *createProgramForSip(ExecutionEnvironment &executionEnvironment, Context *context, std::vector &binary, size_t size, cl_int *errcodeRet, Device *device) { cl_int retVal = 0; auto program = Program::createFromGenBinary(executionEnvironment, context, binary.data(), size, true, &retVal, device); DEBUG_BREAK_IF(retVal != 0); return program; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/built_ins_helper.h000066400000000000000000000012131363734646600252740ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/execution_environment/execution_environment.h" namespace NEO { class Device; const SipKernel &initSipKernel(SipKernelType type, Device &device); Program *createProgramForSip(ExecutionEnvironment &executionEnvironment, Context *context, std::vector &binary, size_t size, int *errcodeRet, Device *device); } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/cache_policy.cpp000066400000000000000000000012011363734646600247170ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/cache_policy.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/memory_manager/graphics_allocation.h" namespace NEO { bool isL3Capable(void *ptr, size_t size) { return isAligned(ptr) && isAligned(size); } bool isL3Capable(const NEO::GraphicsAllocation &graphicsAllocation) { return isL3Capable(graphicsAllocation.getUnderlyingBuffer(), graphicsAllocation.getUnderlyingBufferSize()); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/cl_blit_properties.h000066400000000000000000000115111363734646600256330ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/blit_commands_helper.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "CL/cl.h" namespace NEO { struct ClBlitProperties { static BlitProperties constructProperties(BlitterConstants::BlitDirection blitDirection, CommandStreamReceiver &commandStreamReceiver, const BuiltinOpParams &builtinOpParams) { if (BlitterConstants::BlitDirection::BufferToBuffer == blitDirection) { auto dstOffset = builtinOpParams.dstOffset.x + builtinOpParams.dstMemObj->getOffset(); auto srcOffset = builtinOpParams.srcOffset.x + builtinOpParams.srcMemObj->getOffset(); return BlitProperties::constructPropertiesForCopyBuffer(builtinOpParams.dstMemObj->getGraphicsAllocation(), builtinOpParams.srcMemObj->getGraphicsAllocation(), dstOffset, srcOffset, builtinOpParams.size.x); } GraphicsAllocation *gpuAllocation = nullptr; Vec3 copyOffset = 0; void *hostPtr = nullptr; Vec3 hostPtrOffset = 0; uint64_t memObjGpuVa = 0; uint64_t hostAllocGpuVa = 0; GraphicsAllocation *hostAllocation = builtinOpParams.transferAllocation; Vec3 copySize = 0; size_t hostRowPitch = 0; size_t hostSlicePitch = 0; size_t gpuRowPitch = 0; size_t gpuSlicePitch = 0; if (BlitterConstants::BlitDirection::HostPtrToBuffer == blitDirection) { // write buffer hostPtr = builtinOpParams.srcPtr; hostPtrOffset = builtinOpParams.srcOffset; copyOffset = builtinOpParams.dstOffset; memObjGpuVa = castToUint64(builtinOpParams.dstPtr); hostAllocGpuVa = castToUint64(builtinOpParams.srcPtr); if (builtinOpParams.dstSvmAlloc) { gpuAllocation = builtinOpParams.dstSvmAlloc; hostAllocation = builtinOpParams.srcSvmAlloc; } else { gpuAllocation = builtinOpParams.dstMemObj->getGraphicsAllocation(); memObjGpuVa = (gpuAllocation->getGpuAddress() + builtinOpParams.dstMemObj->getOffset()); } copySize.x = builtinOpParams.size.x; } if (BlitterConstants::BlitDirection::BufferToHostPtr == blitDirection) { // read buffer hostPtr = builtinOpParams.dstPtr; hostPtrOffset = builtinOpParams.dstOffset; copyOffset = builtinOpParams.srcOffset; memObjGpuVa = castToUint64(builtinOpParams.srcPtr); hostAllocGpuVa = castToUint64(builtinOpParams.dstPtr); if (builtinOpParams.srcSvmAlloc) { gpuAllocation = builtinOpParams.srcSvmAlloc; hostAllocation = builtinOpParams.dstSvmAlloc; } else { gpuAllocation = builtinOpParams.srcMemObj->getGraphicsAllocation(); memObjGpuVa = (gpuAllocation->getGpuAddress() + builtinOpParams.srcMemObj->getOffset()); } hostRowPitch = builtinOpParams.dstRowPitch; hostSlicePitch = builtinOpParams.dstSlicePitch; gpuRowPitch = builtinOpParams.srcRowPitch; gpuSlicePitch = builtinOpParams.srcSlicePitch; copySize = builtinOpParams.size; } UNRECOVERABLE_IF(BlitterConstants::BlitDirection::HostPtrToBuffer != blitDirection && BlitterConstants::BlitDirection::BufferToHostPtr != blitDirection); return BlitProperties::constructPropertiesForReadWriteBuffer(blitDirection, commandStreamReceiver, gpuAllocation, hostAllocation, hostPtr, memObjGpuVa, hostAllocGpuVa, hostPtrOffset, copyOffset, copySize, hostRowPitch, hostSlicePitch, gpuRowPitch, gpuSlicePitch); } static BlitterConstants::BlitDirection obtainBlitDirection(uint32_t commandType) { if (CL_COMMAND_WRITE_BUFFER == commandType) { return BlitterConstants::BlitDirection::HostPtrToBuffer; } else if (CL_COMMAND_READ_BUFFER == commandType || CL_COMMAND_READ_BUFFER_RECT == commandType) { return BlitterConstants::BlitDirection::BufferToHostPtr; } else { UNRECOVERABLE_IF(CL_COMMAND_COPY_BUFFER != commandType); return BlitterConstants::BlitDirection::BufferToBuffer; } } }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/cl_device_helpers.cpp000066400000000000000000000005241363734646600257430ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/cl_device_helpers.h" namespace NEO { void ClDeviceHelper::getExtraDeviceInfo(const ClDevice &clDevice, cl_device_info paramName, cl_uint ¶m, const void *&src, size_t &size, size_t &retSize) {} } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/cl_device_helpers.h000066400000000000000000000006131363734646600254070ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl.h" #include namespace NEO { class ClDevice; namespace ClDeviceHelper { void getExtraDeviceInfo(const ClDevice &clDevice, cl_device_info paramName, cl_uint ¶m, const void *&src, size_t &size, size_t &retSize); }; // namespace ClDeviceHelper } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/cl_helper.h000066400000000000000000000053551363734646600237150ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "CL/cl.h" #include "CL/cl_gl_ext.h" #include namespace NEO { inline const std::string cmdTypetoString(cl_command_type cmd) { switch (cmd) { case CL_COMMAND_NDRANGE_KERNEL: return "CL_COMMAND_NDRANGE_KERNEL"; case CL_COMMAND_TASK: return "CL_COMMAND_TASK"; case CL_COMMAND_NATIVE_KERNEL: return "CL_COMMAND_NATIVE_KERNEL"; case CL_COMMAND_READ_BUFFER: return "CL_COMMAND_READ_BUFFER"; case CL_COMMAND_WRITE_BUFFER: return "CL_COMMAND_WRITE_BUFFER"; case CL_COMMAND_COPY_BUFFER: return "CL_COMMAND_COPY_BUFFER"; case CL_COMMAND_READ_IMAGE: return "CL_COMMAND_READ_IMAGE"; case CL_COMMAND_WRITE_IMAGE: return "CL_COMMAND_WRITE_IMAGE"; case CL_COMMAND_COPY_IMAGE: return "CL_COMMAND_COPY_IMAGE"; case CL_COMMAND_COPY_IMAGE_TO_BUFFER: return "CL_COMMAND_COPY_IMAGE_TO_BUFFER"; case CL_COMMAND_COPY_BUFFER_TO_IMAGE: return "CL_COMMAND_COPY_BUFFER_TO_IMAGE"; case CL_COMMAND_MAP_BUFFER: return "CL_COMMAND_MAP_BUFFER"; case CL_COMMAND_MAP_IMAGE: return "CL_COMMAND_MAP_IMAGE"; case CL_COMMAND_UNMAP_MEM_OBJECT: return "CL_COMMAND_UNMAP_MEM_OBJECT"; case CL_COMMAND_MARKER: return "CL_COMMAND_MARKER"; case CL_COMMAND_ACQUIRE_GL_OBJECTS: return "CL_COMMAND_ACQUIRE_GL_OBJECTS"; case CL_COMMAND_RELEASE_GL_OBJECTS: return "CL_COMMAND_RELEASE_GL_OBJECTS"; case CL_COMMAND_READ_BUFFER_RECT: return "CL_COMMAND_READ_BUFFER_RECT"; case CL_COMMAND_WRITE_BUFFER_RECT: return "CL_COMMAND_WRITE_BUFFER_RECT"; case CL_COMMAND_COPY_BUFFER_RECT: return "CL_COMMAND_COPY_BUFFER_RECT"; case CL_COMMAND_USER: return "CL_COMMAND_USER"; case CL_COMMAND_BARRIER: return "CL_COMMAND_BARRIER"; case CL_COMMAND_MIGRATE_MEM_OBJECTS: return "CL_COMMAND_MIGRATE_MEM_OBJECTS"; case CL_COMMAND_FILL_BUFFER: return "CL_COMMAND_FILL_BUFFER"; case CL_COMMAND_FILL_IMAGE: return "CL_COMMAND_FILL_IMAGE"; case CL_COMMAND_SVM_FREE: return "CL_COMMAND_SVM_FREE"; case CL_COMMAND_SVM_MEMCPY: return "CL_COMMAND_SVM_MEMCPY"; case CL_COMMAND_SVM_MEMFILL: return "CL_COMMAND_SVM_MEMFILL"; case CL_COMMAND_SVM_MAP: return "CL_COMMAND_SVM_MAP"; case CL_COMMAND_SVM_UNMAP: return "CL_COMMAND_SVM_UNMAP"; case CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR: return "CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR"; default: { std::string returnString("CMD_UNKNOWN:" + std::to_string((cl_command_type)cmd)); return returnString; } } } } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/convert_color.h000066400000000000000000000067201363734646600246330ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/basic_math.h" #include "CL/cl.h" #include namespace NEO { inline int32_t selectNormalizingFactor(const cl_channel_type &channelType) { if (channelType == CL_UNORM_INT8) { return 0xFF; } if (channelType == CL_SNORM_INT8) { return 0x7F; } if (channelType == CL_UNORM_INT16) { return 0xFFFF; } if (channelType == CL_SNORM_INT16) { return 0x7fFF; } return 0; } inline void convertFillColor(const void *fillColor, int32_t *iFillColor, const cl_image_format &oldImageFormat, const cl_image_format &newImageFormat) { float fFillColor[4] = {0.0f}; for (auto i = 0; i < 4; i++) { iFillColor[i] = *((int32_t *)fillColor + i); fFillColor[i] = *((float *)fillColor + i); } if (oldImageFormat.image_channel_order == CL_A) { std::swap(iFillColor[0], iFillColor[3]); std::swap(fFillColor[0], fFillColor[3]); } else if (oldImageFormat.image_channel_order == CL_BGRA || oldImageFormat.image_channel_order == CL_sBGRA) { std::swap(iFillColor[0], iFillColor[2]); std::swap(fFillColor[0], fFillColor[2]); } if (oldImageFormat.image_channel_order == CL_sRGBA || oldImageFormat.image_channel_order == CL_sBGRA) { for (auto i = 0; i < 3; i++) { if (fFillColor[i] != fFillColor[i]) { fFillColor[i] = 0.0f; } if (fFillColor[i] > 1.0f) { fFillColor[i] = 1.0f; } else if (fFillColor[i] < 0.0f) { fFillColor[i] = 0.0f; } else if (fFillColor[i] < 0.0031308f) { fFillColor[i] = 12.92f * fFillColor[i]; } else { fFillColor[i] = 1.055f * pow(fFillColor[i], 1.0f / 2.4f) - 0.055f; } } } if (newImageFormat.image_channel_data_type == CL_UNSIGNED_INT8) { auto normalizingFactor = selectNormalizingFactor(oldImageFormat.image_channel_data_type); if (normalizingFactor > 0) { for (auto i = 0; i < 4; i++) { if ((oldImageFormat.image_channel_order == CL_sRGBA || oldImageFormat.image_channel_order == CL_sBGRA) && i < 3) { iFillColor[i] = static_cast(normalizingFactor * fFillColor[i] + 0.5f); } else { iFillColor[i] = static_cast(normalizingFactor * fFillColor[i]); } } } for (auto i = 0; i < 4; i++) { iFillColor[i] = iFillColor[i] & 0xFF; } } else if (newImageFormat.image_channel_data_type == CL_UNSIGNED_INT16) { auto normalizingFactor = selectNormalizingFactor(oldImageFormat.image_channel_data_type); if (normalizingFactor > 0) { for (auto i = 0; i < 4; i++) { iFillColor[i] = static_cast(normalizingFactor * fFillColor[i]); } } else if (oldImageFormat.image_channel_data_type == CL_HALF_FLOAT) { //float to half convert. for (auto i = 0; i < 4; i++) { uint16_t temp = Math::float2Half(fFillColor[i]); iFillColor[i] = temp; } } for (auto i = 0; i < 4; i++) { iFillColor[i] = iFillColor[i] & 0xFFFF; } } } } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/csr_deps.cpp000066400000000000000000000007031363734646600241050ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/csr_deps.h" #include "opencl/source/helpers/timestamp_packet.h" namespace NEO { void CsrDependencies::makeResident(CommandStreamReceiver &commandStreamReceiver) const { for (auto ×tampPacketContainer : *this) { timestampPacketContainer->makeResident(commandStreamReceiver); } } } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/dispatch_info.cpp000066400000000000000000000021041363734646600251120ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/kernel/kernel.h" namespace NEO { bool DispatchInfo::usesSlm() const { return (kernel == nullptr) ? false : kernel->slmTotalSize > 0; } bool DispatchInfo::usesStatelessPrintfSurface() const { return (kernel == nullptr) ? false : (kernel->getKernelInfo().patchInfo.pAllocateStatelessPrintfSurface != nullptr); } uint32_t DispatchInfo::getRequiredScratchSize() const { return (kernel == nullptr) ? 0 : kernel->getScratchSize(); } uint32_t DispatchInfo::getRequiredPrivateScratchSize() const { return (kernel == nullptr) ? 0 : kernel->getPrivateScratchSize(); } Kernel *MultiDispatchInfo::peekMainKernel() const { if (dispatchInfos.size() == 0) { return nullptr; } return mainKernel ? mainKernel : dispatchInfos.begin()->getKernel(); } Kernel *MultiDispatchInfo::peekParentKernel() const { return (mainKernel && mainKernel->isParentKernel) ? mainKernel : nullptr; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/dispatch_info.h000066400000000000000000000164161363734646600245720ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/registered_method_dispatcher.h" #include "shared/source/helpers/vec.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/utilities/stackvec.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/mem_obj/mem_obj.h" #include #include namespace NEO { class Kernel; struct TimestampPacketDependencies; class DispatchInfo { public: using DispatchCommandMethodT = void(LinearStream &commandStream, TimestampPacketDependencies *timestampPacketDependencies, const HardwareInfo &); using EstimateCommandsMethodT = size_t(size_t, const HardwareInfo &, bool); DispatchInfo() = default; DispatchInfo(Kernel *kernel, uint32_t dim, Vec3 gws, Vec3 elws, Vec3 offset) : kernel(kernel), dim(dim), gws(gws), elws(elws), offset(offset) {} DispatchInfo(Kernel *kernel, uint32_t dim, Vec3 gws, Vec3 elws, Vec3 offset, Vec3 agws, Vec3 lws, Vec3 twgs, Vec3 nwgs, Vec3 swgs) : kernel(kernel), dim(dim), gws(gws), elws(elws), offset(offset), agws(agws), lws(lws), twgs(twgs), nwgs(nwgs), swgs(swgs) {} bool usesSlm() const; bool usesStatelessPrintfSurface() const; uint32_t getRequiredScratchSize() const; uint32_t getRequiredPrivateScratchSize() const; void setKernel(Kernel *kernel) { this->kernel = kernel; } Kernel *getKernel() const { return kernel; } uint32_t getDim() const { return dim; } void setDim(uint32_t dim) { this->dim = dim; } const Vec3 &getGWS() const { return gws; }; void setGWS(const Vec3 &gws) { this->gws = gws; } const Vec3 &getEnqueuedWorkgroupSize() const { return elws; }; void setEnqueuedWorkgroupSize(const Vec3 &elws) { this->elws = elws; } const Vec3 &getOffset() const { return offset; }; void setOffsets(const Vec3 &offset) { this->offset = offset; } const Vec3 &getActualWorkgroupSize() const { return agws; }; void setActualGlobalWorkgroupSize(const Vec3 &agws) { this->agws = agws; } const Vec3 &getLocalWorkgroupSize() const { return lws; }; void setLWS(const Vec3 &lws) { this->lws = lws; } const Vec3 &getTotalNumberOfWorkgroups() const { return twgs; }; void setTotalNumberOfWorkgroups(const Vec3 &twgs) { this->twgs = twgs; } const Vec3 &getNumberOfWorkgroups() const { return nwgs; }; void setNumberOfWorkgroups(const Vec3 &nwgs) { this->nwgs = nwgs; } const Vec3 &getStartOfWorkgroups() const { return swgs; }; void setStartOfWorkgroups(const Vec3 &swgs) { this->swgs = swgs; } bool peekCanBePartitioned() const { return canBePartitioned; } void setCanBePartitioned(bool canBePartitioned) { this->canBePartitioned = canBePartitioned; } RegisteredMethodDispatcher dispatchInitCommands; RegisteredMethodDispatcher dispatchEpilogueCommands; protected: bool canBePartitioned = false; Kernel *kernel = nullptr; uint32_t dim = 0; Vec3 gws{0, 0, 0}; //global work size Vec3 elws{0, 0, 0}; //enqueued local work size Vec3 offset{0, 0, 0}; //global offset Vec3 agws{0, 0, 0}; //actual global work size Vec3 lws{0, 0, 0}; //local work size Vec3 twgs{0, 0, 0}; //total number of work groups Vec3 nwgs{0, 0, 0}; //number of work groups Vec3 swgs{0, 0, 0}; //start of work groups }; struct MultiDispatchInfo { ~MultiDispatchInfo() { for (MemObj *redescribedSurface : redescribedSurfaces) { redescribedSurface->release(); } } explicit MultiDispatchInfo(Kernel *mainKernel) : mainKernel(mainKernel) {} MultiDispatchInfo() = default; MultiDispatchInfo &operator=(const MultiDispatchInfo &) = delete; MultiDispatchInfo(const MultiDispatchInfo &) = delete; bool empty() const { return dispatchInfos.size() == 0; } bool usesSlm() const { for (const auto &dispatchInfo : dispatchInfos) { if (dispatchInfo.usesSlm()) { return true; } } return false; } bool usesStatelessPrintfSurface() const { for (const auto &dispatchInfo : dispatchInfos) { if (dispatchInfo.usesStatelessPrintfSurface()) { return true; } } return false; } uint32_t getRequiredScratchSize() const { uint32_t ret = 0; for (const auto &dispatchInfo : dispatchInfos) { ret = std::max(ret, dispatchInfo.getRequiredScratchSize()); } return ret; } uint32_t getRequiredPrivateScratchSize() const { uint32_t ret = 0; for (const auto &dispatchInfo : dispatchInfos) { ret = std::max(ret, dispatchInfo.getRequiredPrivateScratchSize()); } return ret; } void backupUnifiedMemorySyncRequirement() { for (const auto &dispatchInfo : dispatchInfos) { dispatchInfo.getKernel()->setUnifiedMemorySyncRequirement(true); } } DispatchInfo *begin() { return dispatchInfos.begin(); } const DispatchInfo *begin() const { return dispatchInfos.begin(); } std::reverse_iterator rbegin() { return dispatchInfos.rbegin(); } std::reverse_iterator crbegin() const { return dispatchInfos.crbegin(); } DispatchInfo *end() { return dispatchInfos.end(); } const DispatchInfo *end() const { return dispatchInfos.end(); } std::reverse_iterator rend() { return dispatchInfos.rend(); } std::reverse_iterator crend() const { return dispatchInfos.crend(); } void push(const DispatchInfo &dispatchInfo) { dispatchInfos.push_back(dispatchInfo); } size_t size() const { return dispatchInfos.size(); } StackVec &getRedescribedSurfaces() { return redescribedSurfaces; } void pushRedescribedMemObj(std::unique_ptr memObj) { redescribedSurfaces.push_back(memObj.release()); } Kernel *peekParentKernel() const; Kernel *peekMainKernel() const; void setBuiltinOpParams(const BuiltinOpParams &builtinOpParams) { this->builtinOpParams = builtinOpParams; } const BuiltinOpParams &peekBuiltinOpParams() const { return builtinOpParams; } void setMemObjsForAuxTranslation(const MemObjsForAuxTranslation &memObjsForAuxTranslation) { this->memObjsForAuxTranslation = &memObjsForAuxTranslation; } const MemObjsForAuxTranslation *getMemObjsForAuxTranslation() const { return memObjsForAuxTranslation; } protected: BuiltinOpParams builtinOpParams = {}; StackVec dispatchInfos; StackVec redescribedSurfaces; const MemObjsForAuxTranslation *memObjsForAuxTranslation = nullptr; Kernel *mainKernel = nullptr; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/dispatch_info_builder.h000066400000000000000000000705651363734646600263050ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/kernel/kernel.h" namespace NEO { namespace SplitDispatch { enum class Dim : uint32_t { d1D = 0, d2D = 1, d3D = 2 }; enum class SplitMode : uint32_t { NoSplit = 0, WalkerSplit = 1, // 1 kernel and many GPGPU walkers (e.g. for non-uniform workgroup sizes) KernelSplit = 2 // many kernels and many GPGPU walkers (e.g. for copy kernels) }; // Left | Middle | Right enum class RegionCoordX : uint32_t { Left = 0, Middle = 1, Right = 2 }; // Top // ------ // Middle // ------ // Bottom enum class RegionCoordY : uint32_t { Top = 0, Middle = 1, Bottom = 2 }; // Front / / // / Middle / // / / Back enum class RegionCoordZ : uint32_t { Front = 0, Middle = 1, Back = 2 }; } // namespace SplitDispatch // Compute power in compile time static constexpr uint32_t powConst(uint32_t base, uint32_t currExp) { return (currExp == 1) ? base : base * powConst(base, currExp - 1); } template class DispatchInfoBuilder { public: DispatchInfoBuilder() = default; void setKernel(Kernel *kernel) { for (auto &dispatchInfo : dispatchInfos) { dispatchInfo.setKernel(kernel); } } cl_int setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocation *svmAlloc) { for (auto &dispatchInfo : dispatchInfos) { if (dispatchInfo.getKernel()) { dispatchInfo.getKernel()->setArgSvmAlloc(argIndex, svmPtr, svmAlloc); } } return CL_SUCCESS; } template cl_int setArgSvm(ArgsT &&... args) { for (auto &dispatchInfo : dispatchInfos) { if (dispatchInfo.getKernel()) { dispatchInfo.getKernel()->setArgSvm(std::forward(args)...); } } return CL_SUCCESS; } void setUnifiedMemorySyncRequirement(bool isUnifiedMemorySyncRequired) { for (auto &dispatchInfo : dispatchInfos) { dispatchInfo.getKernel()->setUnifiedMemorySyncRequirement(isUnifiedMemorySyncRequired); } } template typename std::enable_if<(D == SplitDispatch::Dim::d1D) && (Mode != SplitDispatch::SplitMode::NoSplit), void>::type setArgSvm(SplitDispatch::RegionCoordX x, ArgsT &&... args) { dispatchInfos[getDispatchId(x)].getKernel()->setArgSvm(std::forward(args)...); } template typename std::enable_if<(D == SplitDispatch::Dim::d2D) && (Mode != SplitDispatch::SplitMode::NoSplit), void>::type setArgSvm(SplitDispatch::RegionCoordX x, SplitDispatch::RegionCoordY y, ArgsT &&... args) { dispatchInfos[getDispatchId(x, y)].getKernel()->setArgSvm(std::forward(args)...); } template typename std::enable_if<(D == SplitDispatch::Dim::d3D) && (Mode != SplitDispatch::SplitMode::NoSplit), void>::type setArgSvm(SplitDispatch::RegionCoordX x, SplitDispatch::RegionCoordY y, SplitDispatch::RegionCoordZ z, ArgsT &&... args) { dispatchInfos[getDispatchId(x, y, z)].getKernel()->setArgSvm(std::forward(args)...); } template cl_int setArg(ArgsT &&... args) { cl_int result = CL_SUCCESS; for (auto &dispatchInfo : dispatchInfos) { if (dispatchInfo.getKernel()) { result = dispatchInfo.getKernel()->setArg(std::forward(args)...); if (result != CL_SUCCESS) { break; } } } return result; } template typename std::enable_if<(D == SplitDispatch::Dim::d1D) && (Mode != SplitDispatch::SplitMode::NoSplit), void>::type setArg(SplitDispatch::RegionCoordX x, ArgsT &&... args) { dispatchInfos[getDispatchId(x)].getKernel()->setArg(std::forward(args)...); } template typename std::enable_if<(D == SplitDispatch::Dim::d2D) && (Mode != SplitDispatch::SplitMode::NoSplit), void>::type setArg(SplitDispatch::RegionCoordX x, SplitDispatch::RegionCoordY y, ArgsT &&... args) { dispatchInfos[getDispatchId(x, y)].getKernel()->setArg(std::forward(args)...); } template typename std::enable_if<(D == SplitDispatch::Dim::d3D) && (Mode != SplitDispatch::SplitMode::NoSplit), void>::type setArg(SplitDispatch::RegionCoordX x, SplitDispatch::RegionCoordY y, SplitDispatch::RegionCoordZ z, ArgsT &&... args) { dispatchInfos[getDispatchId(x, y, z)].getKernel()->setArg(std::forward(args)...); } template typename std::enable_if<(D == SplitDispatch::Dim::d1D) && (Mode != SplitDispatch::SplitMode::NoSplit), void>::type setKernel(SplitDispatch::RegionCoordX x, Kernel *kern) { dispatchInfos[getDispatchId(x)].setKernel(kern); } template typename std::enable_if<(D == SplitDispatch::Dim::d2D) && (Mode != SplitDispatch::SplitMode::NoSplit), void>::type setKernel(SplitDispatch::RegionCoordX x, SplitDispatch::RegionCoordY y, Kernel *kern) { dispatchInfos[getDispatchId(x, y)].setKernel(kern); } template typename std::enable_if<(D == SplitDispatch::Dim::d3D) && (Mode != SplitDispatch::SplitMode::NoSplit), void>::type setKernel(SplitDispatch::RegionCoordX x, SplitDispatch::RegionCoordY y, SplitDispatch::RegionCoordZ z, Kernel *kern) { dispatchInfos[getDispatchId(x, y, z)].setKernel(kern); } template typename std::enable_if<(M == SplitDispatch::SplitMode::NoSplit) || (M == SplitDispatch::SplitMode::WalkerSplit), void>::type setDispatchGeometry(const uint32_t dim, const Vec3 &gws, const Vec3 &elws, const Vec3 &offset, const Vec3 &agws = {0, 0, 0}, const Vec3 &lws = {0, 0, 0}, const Vec3 &twgs = {0, 0, 0}, const Vec3 &nwgs = {0, 0, 0}, const Vec3 &swgs = {0, 0, 0}) { auto &dispatchInfo = dispatchInfos[0]; DEBUG_BREAK_IF(dim > static_cast(Dim) + 1); dispatchInfo.setDim(dim); dispatchInfo.setGWS(gws); dispatchInfo.setEnqueuedWorkgroupSize(elws); dispatchInfo.setOffsets(offset); dispatchInfo.setActualGlobalWorkgroupSize(agws); dispatchInfo.setLWS(lws); dispatchInfo.setTotalNumberOfWorkgroups(twgs); dispatchInfo.setNumberOfWorkgroups(nwgs); dispatchInfo.setStartOfWorkgroups(swgs); } template typename std::enable_if<(M == SplitDispatch::SplitMode::NoSplit) || (M == SplitDispatch::SplitMode::WalkerSplit), void>::type setDispatchGeometry(const Vec3 &gws, const Vec3 &elws, const Vec3 &offset, const Vec3 &agws = {0, 0, 0}, const Vec3 &lws = {0, 0, 0}, const Vec3 &twgs = {0, 0, 0}, const Vec3 &nwgs = {0, 0, 0}, const Vec3 &swgs = {0, 0, 0}) { auto &dispatchInfo = dispatchInfos[0]; dispatchInfo.setDim(static_cast(Dim) + 1); dispatchInfo.setGWS(gws); dispatchInfo.setEnqueuedWorkgroupSize(elws); dispatchInfo.setOffsets(offset); dispatchInfo.setActualGlobalWorkgroupSize(agws); dispatchInfo.setLWS(lws); dispatchInfo.setTotalNumberOfWorkgroups(twgs); dispatchInfo.setNumberOfWorkgroups(nwgs); dispatchInfo.setStartOfWorkgroups(swgs); } template typename std::enable_if<(D == SplitDispatch::Dim::d1D) && (Mode != SplitDispatch::SplitMode::NoSplit), void>::type setDispatchGeometry(SplitDispatch::RegionCoordX x, const Vec3 &gws, const Vec3 &elws, const Vec3 &offset, const Vec3 &agws = {0, 0, 0}, const Vec3 &lws = {0, 0, 0}, const Vec3 &twgs = {0, 0, 0}, const Vec3 &nwgs = {0, 0, 0}, const Vec3 &swgs = {0, 0, 0}) { auto &dispatchInfo = dispatchInfos[getDispatchId(x)]; dispatchInfo.setDim(static_cast(Dim) + 1); dispatchInfo.setGWS(gws); dispatchInfo.setEnqueuedWorkgroupSize(elws); dispatchInfo.setOffsets(offset); dispatchInfo.setActualGlobalWorkgroupSize(agws); dispatchInfo.setLWS(lws); dispatchInfo.setTotalNumberOfWorkgroups(twgs); dispatchInfo.setNumberOfWorkgroups(nwgs); dispatchInfo.setStartOfWorkgroups(swgs); } template typename std::enable_if<(D == SplitDispatch::Dim::d2D) && (Mode != SplitDispatch::SplitMode::NoSplit), void>::type setDispatchGeometry(SplitDispatch::RegionCoordX x, SplitDispatch::RegionCoordY y, const Vec3 &gws, const Vec3 &elws, const Vec3 &offset, const Vec3 &agws = {0, 0, 0}, const Vec3 lws = {0, 0, 0}, const Vec3 &twgs = {0, 0, 0}, const Vec3 &nwgs = {0, 0, 0}, const Vec3 &swgs = {0, 0, 0}) { auto &dispatchInfo = dispatchInfos[getDispatchId(x, y)]; dispatchInfo.setDim(static_cast(Dim) + 1); dispatchInfo.setGWS(gws); dispatchInfo.setEnqueuedWorkgroupSize(elws); dispatchInfo.setOffsets(offset); dispatchInfo.setActualGlobalWorkgroupSize(agws); dispatchInfo.setLWS(lws); dispatchInfo.setTotalNumberOfWorkgroups(twgs); dispatchInfo.setNumberOfWorkgroups(nwgs); dispatchInfo.setStartOfWorkgroups(swgs); } template typename std::enable_if<(D == SplitDispatch::Dim::d3D) && (Mode != SplitDispatch::SplitMode::NoSplit), void>::type setDispatchGeometry(SplitDispatch::RegionCoordX x, SplitDispatch::RegionCoordY y, SplitDispatch::RegionCoordZ z, const Vec3 &gws, const Vec3 &elws, const Vec3 &offset, const Vec3 &agws = {0, 0, 0}, const Vec3 &lws = {0, 0, 0}, const Vec3 &twgs = {0, 0, 0}, const Vec3 &nwgs = {0, 0, 0}, const Vec3 &swgs = {0, 0, 0}) { auto &dispatchInfo = dispatchInfos[getDispatchId(x, y, z)]; dispatchInfo.setDim(static_cast(Dim) + 1); dispatchInfo.setGWS(gws); dispatchInfo.setEnqueuedWorkgroupSize(elws); dispatchInfo.setOffsets(offset); dispatchInfo.setActualGlobalWorkgroupSize(agws); dispatchInfo.setLWS(lws); dispatchInfo.setTotalNumberOfWorkgroups(twgs); dispatchInfo.setNumberOfWorkgroups(nwgs); dispatchInfo.setStartOfWorkgroups(swgs); } void bake(MultiDispatchInfo &target) { for (auto &dispatchInfo : dispatchInfos) { if (((dispatchInfo.getDim() == 1) && (dispatchInfo.getGWS().x > 0)) || ((dispatchInfo.getDim() == 2) && (dispatchInfo.getGWS().x > 0) && (dispatchInfo.getGWS().y > 0)) || ((dispatchInfo.getDim() == 3) && (dispatchInfo.getGWS().x > 0) && (dispatchInfo.getGWS().y > 0) && (dispatchInfo.getGWS().z > 0))) { dispatchInfo.setDim(calculateDispatchDim(dispatchInfo.getGWS(), dispatchInfo.getOffset())); dispatchInfo.setGWS(canonizeWorkgroup(dispatchInfo.getGWS())); if (dispatchInfo.getActualWorkgroupSize() == Vec3({0, 0, 0})) { dispatchInfo.setActualGlobalWorkgroupSize(dispatchInfo.getGWS()); } if (((dispatchInfo.getDim() == 1) && (dispatchInfo.getActualWorkgroupSize().x > 0)) || ((dispatchInfo.getDim() == 2) && (dispatchInfo.getActualWorkgroupSize().x > 0) && (dispatchInfo.getActualWorkgroupSize().y > 0)) || ((dispatchInfo.getDim() == 3) && (dispatchInfo.getActualWorkgroupSize().x > 0) && (dispatchInfo.getActualWorkgroupSize().y > 0) && (dispatchInfo.getActualWorkgroupSize().z > 0))) { dispatchInfo.setEnqueuedWorkgroupSize(canonizeWorkgroup(dispatchInfo.getEnqueuedWorkgroupSize())); if (dispatchInfo.getLocalWorkgroupSize().x == 0) { dispatchInfo.setLWS(generateWorkgroupSize(dispatchInfo)); } dispatchInfo.setLWS(canonizeWorkgroup(dispatchInfo.getLocalWorkgroupSize())); if (dispatchInfo.getTotalNumberOfWorkgroups().x == 0) { dispatchInfo.setTotalNumberOfWorkgroups(generateWorkgroupsNumber(dispatchInfo)); } dispatchInfo.setTotalNumberOfWorkgroups(canonizeWorkgroup(dispatchInfo.getTotalNumberOfWorkgroups())); if (dispatchInfo.getNumberOfWorkgroups().x == 0) { dispatchInfo.setNumberOfWorkgroups(dispatchInfo.getTotalNumberOfWorkgroups()); } if (supportsSplit() && needsSplit(dispatchInfo)) { pushSplit(dispatchInfo, target); } else { target.push(dispatchInfo); printDebugString(DebugManager.flags.PrintDebugMessages.get(), stdout, "DIM:%u\tGWS:(%zu, %zu, %zu)\tELWS:(%zu, %zu, %zu)\tOffset:(%zu, %zu, %zu)\tAGWS:(%zu, %zu, %zu)\tLWS:(%zu, %zu, %zu)\tTWGS:(%zu, %zu, %zu)\tNWGS:(%zu, %zu, %zu)\tSWGS:(%zu, %zu, %zu)\n", dispatchInfo.getDim(), dispatchInfo.getGWS().x, dispatchInfo.getGWS().y, dispatchInfo.getGWS().z, dispatchInfo.getEnqueuedWorkgroupSize().x, dispatchInfo.getEnqueuedWorkgroupSize().y, dispatchInfo.getEnqueuedWorkgroupSize().z, dispatchInfo.getOffset().x, dispatchInfo.getOffset().y, dispatchInfo.getOffset().z, dispatchInfo.getActualWorkgroupSize().x, dispatchInfo.getActualWorkgroupSize().y, dispatchInfo.getActualWorkgroupSize().z, dispatchInfo.getLocalWorkgroupSize().x, dispatchInfo.getLocalWorkgroupSize().y, dispatchInfo.getLocalWorkgroupSize().z, dispatchInfo.getTotalNumberOfWorkgroups().x, dispatchInfo.getTotalNumberOfWorkgroups().y, dispatchInfo.getTotalNumberOfWorkgroups().z, dispatchInfo.getNumberOfWorkgroups().x, dispatchInfo.getNumberOfWorkgroups().y, dispatchInfo.getNumberOfWorkgroups().z, dispatchInfo.getStartOfWorkgroups().x, dispatchInfo.getStartOfWorkgroups().y, dispatchInfo.getStartOfWorkgroups().z); } } } } } DispatchInfo &getDispatchInfo(size_t index) { return dispatchInfos[index]; } static constexpr size_t getMaxNumDispatches() { return numDispatches; } protected: static bool supportsSplit() { return (Mode == SplitDispatch::SplitMode::WalkerSplit); } static bool needsSplit(const DispatchInfo &dispatchInfo) { return (dispatchInfo.getGWS().x % dispatchInfo.getLocalWorkgroupSize().x + dispatchInfo.getGWS().y % dispatchInfo.getLocalWorkgroupSize().y + dispatchInfo.getGWS().z % dispatchInfo.getLocalWorkgroupSize().z != 0); } static void pushSplit(const DispatchInfo &dispatchInfo, MultiDispatchInfo &outMdi) { constexpr auto xMain = SplitDispatch::RegionCoordX::Left; constexpr auto xRight = SplitDispatch::RegionCoordX::Middle; constexpr auto yMain = SplitDispatch::RegionCoordY::Top; constexpr auto yBottom = SplitDispatch::RegionCoordY::Middle; constexpr auto zMain = SplitDispatch::RegionCoordZ::Front; constexpr auto zBack = SplitDispatch::RegionCoordZ::Middle; switch (dispatchInfo.getDim()) { default: break; case 1: { Vec3 mainLWS = {dispatchInfo.getLocalWorkgroupSize().x, 1, 1}; Vec3 rightLWS = {dispatchInfo.getGWS().x % dispatchInfo.getLocalWorkgroupSize().x, 1, 1}; Vec3 mainGWS = {alignDown(dispatchInfo.getGWS().x, mainLWS.x), 1, 1}; Vec3 rightGWS = {dispatchInfo.getGWS().x % mainLWS.x, 1, 1}; Vec3 mainNWGS = {mainGWS.x / mainLWS.x, 1, 1}; Vec3 rightNWGS = {mainNWGS.x + isIndivisible(rightGWS.x, mainLWS.x), 1, 1}; Vec3 mainSWGS = {0, 0, 0}; Vec3 rightSWGS = {mainNWGS.x, 0, 0}; DispatchInfoBuilder builder1D; builder1D.setKernel(dispatchInfo.getKernel()); builder1D.setDispatchGeometry(xMain, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), mainGWS, mainLWS, dispatchInfo.getTotalNumberOfWorkgroups(), mainNWGS, mainSWGS); builder1D.setDispatchGeometry(xRight, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), rightGWS, rightLWS, dispatchInfo.getTotalNumberOfWorkgroups(), rightNWGS, rightSWGS); builder1D.bake(outMdi); } break; case 2: { Vec3 mainLWS = {dispatchInfo.getLocalWorkgroupSize().x, dispatchInfo.getLocalWorkgroupSize().y, 1}; Vec3 rightLWS = {dispatchInfo.getGWS().x % dispatchInfo.getLocalWorkgroupSize().x, dispatchInfo.getLocalWorkgroupSize().y, 1}; Vec3 bottomLWS = {dispatchInfo.getLocalWorkgroupSize().x, dispatchInfo.getGWS().y % dispatchInfo.getLocalWorkgroupSize().y, 1}; Vec3 rightbottomLWS = {dispatchInfo.getGWS().x % dispatchInfo.getLocalWorkgroupSize().x, dispatchInfo.getGWS().y % dispatchInfo.getLocalWorkgroupSize().y, 1}; Vec3 mainGWS = {alignDown(dispatchInfo.getGWS().x, mainLWS.x), alignDown(dispatchInfo.getGWS().y, mainLWS.y), 1}; Vec3 rightGWS = {dispatchInfo.getGWS().x % mainLWS.x, alignDown(dispatchInfo.getGWS().y, mainLWS.y), 1}; Vec3 bottomGWS = {alignDown(dispatchInfo.getGWS().x, mainLWS.x), dispatchInfo.getGWS().y % mainLWS.y, 1}; Vec3 rightbottomGWS = {dispatchInfo.getGWS().x % mainLWS.x, dispatchInfo.getGWS().y % mainLWS.y, 1}; Vec3 mainNWGS = {mainGWS.x / mainLWS.x, mainGWS.y / mainLWS.y, 1}; Vec3 rightNWGS = {mainNWGS.x + isIndivisible(rightGWS.x, mainLWS.x), mainGWS.y / mainLWS.y, 1}; Vec3 bottomNWGS = {mainGWS.x / mainLWS.x, mainNWGS.y + isIndivisible(bottomGWS.y, mainLWS.y), 1}; Vec3 rightbottomNWGS = {mainNWGS.x + isIndivisible(rightGWS.x, mainLWS.x), mainNWGS.y + isIndivisible(bottomGWS.y, mainLWS.y), 1}; Vec3 mainSWGS = {0, 0, 0}; Vec3 rightSWGS = {mainNWGS.x, 0, 0}; Vec3 bottomSWGS = {0, mainNWGS.y, 0}; Vec3 rightbottomSWGS = {mainNWGS.x, mainNWGS.y, 0}; DispatchInfoBuilder builder2D; builder2D.setKernel(dispatchInfo.getKernel()); builder2D.setDispatchGeometry(xMain, yMain, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), mainGWS, mainLWS, dispatchInfo.getTotalNumberOfWorkgroups(), mainNWGS, mainSWGS); builder2D.setDispatchGeometry(xRight, yMain, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), rightGWS, rightLWS, dispatchInfo.getTotalNumberOfWorkgroups(), rightNWGS, rightSWGS); builder2D.setDispatchGeometry(xMain, yBottom, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), bottomGWS, bottomLWS, dispatchInfo.getTotalNumberOfWorkgroups(), bottomNWGS, bottomSWGS); builder2D.setDispatchGeometry(xRight, yBottom, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), rightbottomGWS, rightbottomLWS, dispatchInfo.getTotalNumberOfWorkgroups(), rightbottomNWGS, rightbottomSWGS); builder2D.bake(outMdi); } break; case 3: { Vec3 mainLWS = dispatchInfo.getLocalWorkgroupSize(); Vec3 rightLWS = {dispatchInfo.getGWS().x % dispatchInfo.getLocalWorkgroupSize().x, dispatchInfo.getLocalWorkgroupSize().y, dispatchInfo.getLocalWorkgroupSize().z}; Vec3 bottomLWS = {dispatchInfo.getLocalWorkgroupSize().x, dispatchInfo.getGWS().y % dispatchInfo.getLocalWorkgroupSize().y, dispatchInfo.getLocalWorkgroupSize().z}; Vec3 rightbottomLWS = {dispatchInfo.getGWS().x % dispatchInfo.getLocalWorkgroupSize().x, dispatchInfo.getGWS().y % dispatchInfo.getLocalWorkgroupSize().y, dispatchInfo.getLocalWorkgroupSize().z}; Vec3 mainbackLWS = {dispatchInfo.getLocalWorkgroupSize().x, dispatchInfo.getLocalWorkgroupSize().y, dispatchInfo.getGWS().z % dispatchInfo.getLocalWorkgroupSize().z}; Vec3 rightbackLWS = {dispatchInfo.getGWS().x % dispatchInfo.getLocalWorkgroupSize().x, dispatchInfo.getLocalWorkgroupSize().y, dispatchInfo.getGWS().z % dispatchInfo.getLocalWorkgroupSize().z}; Vec3 bottombackLWS = {dispatchInfo.getLocalWorkgroupSize().x, dispatchInfo.getGWS().y % dispatchInfo.getLocalWorkgroupSize().y, dispatchInfo.getGWS().z % dispatchInfo.getLocalWorkgroupSize().z}; Vec3 rightbottombackLWS = {dispatchInfo.getGWS().x % dispatchInfo.getLocalWorkgroupSize().x, dispatchInfo.getGWS().y % dispatchInfo.getLocalWorkgroupSize().y, dispatchInfo.getGWS().z % dispatchInfo.getLocalWorkgroupSize().z}; Vec3 mainGWS = {alignDown(dispatchInfo.getGWS().x, mainLWS.x), alignDown(dispatchInfo.getGWS().y, mainLWS.y), alignDown(dispatchInfo.getGWS().z, mainLWS.z)}; Vec3 rightGWS = {dispatchInfo.getGWS().x % mainLWS.x, alignDown(dispatchInfo.getGWS().y, mainLWS.y), alignDown(dispatchInfo.getGWS().z, mainLWS.z)}; Vec3 bottomGWS = {alignDown(dispatchInfo.getGWS().x, mainLWS.x), dispatchInfo.getGWS().y % mainLWS.y, alignDown(dispatchInfo.getGWS().z, mainLWS.z)}; Vec3 rightbottomGWS = {dispatchInfo.getGWS().x % mainLWS.x, dispatchInfo.getGWS().y % mainLWS.y, alignDown(dispatchInfo.getGWS().z, mainLWS.z)}; Vec3 mainbackGWS = {alignDown(dispatchInfo.getGWS().x, mainLWS.x), alignDown(dispatchInfo.getGWS().y, mainLWS.y), dispatchInfo.getGWS().z % mainLWS.z}; Vec3 rightbackGWS = {dispatchInfo.getGWS().x % mainLWS.x, alignDown(dispatchInfo.getGWS().y, mainLWS.y), dispatchInfo.getGWS().z % mainLWS.z}; Vec3 bottombackGWS = {alignDown(dispatchInfo.getGWS().x, mainLWS.x), dispatchInfo.getGWS().y % mainLWS.y, dispatchInfo.getGWS().z % mainLWS.z}; Vec3 rightbottombackGWS = {dispatchInfo.getGWS().x % mainLWS.x, dispatchInfo.getGWS().y % mainLWS.y, dispatchInfo.getGWS().z % mainLWS.z}; Vec3 mainNWGS = {mainGWS.x / mainLWS.x, mainGWS.y / mainLWS.y, mainGWS.z / mainLWS.z + isIndivisible(mainGWS.z, mainLWS.z)}; Vec3 rightNWGS = {mainNWGS.x + isIndivisible(rightGWS.x, mainLWS.x), mainGWS.y / mainLWS.y, mainGWS.z / mainLWS.z}; Vec3 bottomNWGS = {mainGWS.x / mainLWS.x, mainNWGS.y + isIndivisible(bottomGWS.y, mainLWS.y), mainGWS.z / mainLWS.z}; Vec3 rightbottomNWGS = {mainNWGS.x + isIndivisible(rightGWS.x, mainLWS.x), mainNWGS.y + isIndivisible(bottomGWS.y, mainLWS.y), mainGWS.z / mainLWS.z}; Vec3 mainbackNWGS = {mainGWS.x / mainLWS.x, mainGWS.y / mainLWS.y, mainNWGS.z + isIndivisible(mainbackGWS.z, mainLWS.z)}; Vec3 rightbackNWGS = {mainNWGS.x + isIndivisible(rightGWS.x, mainLWS.x), mainGWS.y / mainLWS.y, mainNWGS.z + isIndivisible(rightbackGWS.z, mainLWS.z)}; Vec3 bottombackNWGS = {mainGWS.x / mainLWS.x, mainNWGS.y + isIndivisible(bottomGWS.y, mainLWS.y), mainNWGS.z + isIndivisible(bottombackGWS.z, mainLWS.z)}; Vec3 rightbottombackNWGS = {mainNWGS.x + isIndivisible(rightGWS.x, mainLWS.x), mainNWGS.y + isIndivisible(bottomGWS.y, mainLWS.y), mainNWGS.z + isIndivisible(rightbottombackGWS.z, mainLWS.z)}; Vec3 mainSWGS = {0, 0, 0}; Vec3 rightSWGS = {mainNWGS.x, 0, 0}; Vec3 bottomSWGS = {0, mainNWGS.y, 0}; Vec3 rightbottomSWGS = {mainNWGS.x, mainNWGS.y, 0}; Vec3 mainbackSWGS = {0, 0, mainNWGS.z}; Vec3 rightbackSWGS = {mainNWGS.x, 0, mainNWGS.z}; Vec3 bottombackSWGS = {0, mainNWGS.y, mainNWGS.z}; Vec3 rightbottombackSWGS = {mainNWGS.x, mainNWGS.y, mainNWGS.z}; DispatchInfoBuilder builder3D; builder3D.setKernel(dispatchInfo.getKernel()); builder3D.setDispatchGeometry(xMain, yMain, zMain, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), mainGWS, mainLWS, dispatchInfo.getTotalNumberOfWorkgroups(), mainNWGS, mainSWGS); builder3D.setDispatchGeometry(xRight, yMain, zMain, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), rightGWS, rightLWS, dispatchInfo.getTotalNumberOfWorkgroups(), rightNWGS, rightSWGS); builder3D.setDispatchGeometry(xMain, yBottom, zMain, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), bottomGWS, bottomLWS, dispatchInfo.getTotalNumberOfWorkgroups(), bottomNWGS, bottomSWGS); builder3D.setDispatchGeometry(xRight, yBottom, zMain, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), rightbottomGWS, rightbottomLWS, dispatchInfo.getTotalNumberOfWorkgroups(), rightbottomNWGS, rightbottomSWGS); builder3D.setDispatchGeometry(xMain, yMain, zBack, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), mainbackGWS, mainbackLWS, dispatchInfo.getTotalNumberOfWorkgroups(), mainbackNWGS, mainbackSWGS); builder3D.setDispatchGeometry(xRight, yMain, zBack, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), rightbackGWS, rightbackLWS, dispatchInfo.getTotalNumberOfWorkgroups(), rightbackNWGS, rightbackSWGS); builder3D.setDispatchGeometry(xMain, yBottom, zBack, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), bottombackGWS, bottombackLWS, dispatchInfo.getTotalNumberOfWorkgroups(), bottombackNWGS, bottombackSWGS); builder3D.setDispatchGeometry(xRight, yBottom, zBack, dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize(), dispatchInfo.getOffset(), rightbottombackGWS, rightbottombackLWS, dispatchInfo.getTotalNumberOfWorkgroups(), rightbottombackNWGS, rightbottombackSWGS); builder3D.bake(outMdi); } break; } } static constexpr uint32_t getDispatchId(SplitDispatch::RegionCoordX x, SplitDispatch::RegionCoordY y, SplitDispatch::RegionCoordZ z) { return static_cast(x) + static_cast(y) * (static_cast(Mode) + 1) + static_cast(z) * (static_cast(Mode) + 1) * (static_cast(Mode) + 1); } static constexpr uint32_t getDispatchId(SplitDispatch::RegionCoordX x, SplitDispatch::RegionCoordY y) { return static_cast(x) + static_cast(y) * (static_cast(Mode) + 1); } static constexpr uint32_t getDispatchId(SplitDispatch::RegionCoordX x) { return static_cast(x); } static const size_t numDispatches = (Mode == SplitDispatch::SplitMode::WalkerSplit) ? 1 : powConst((static_cast(Mode) + 1), // 1 (middle) 2 (middle + right/bottom) or 3 (lef/top + middle + right/mottom) (static_cast(Dim) + 1)); // 1, 2 or 3 DispatchInfo dispatchInfos[numDispatches]; private: static size_t alignDown(size_t x, size_t y) { return x - x % y; } static size_t isIndivisible(size_t x, size_t y) { return x % y ? 1 : 0; } }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/enable_product.inl000066400000000000000000000014061363734646600252720ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" namespace NEO { template struct EnableGfxProductHw { typedef typename HwMapper::GfxProduct GfxProduct; enum { gfxFamily = HwMapper::gfxFamily }; EnableGfxProductHw() { EnableGfxFamilyHw(gfxFamily)> enableFamily; hardwarePrefix[gfxProduct] = HwMapper::abbreviation; defaultHardwareInfoConfigTable[gfxProduct] = GfxProduct::defaultHardwareInfoConfig; hardwareInfoTable[gfxProduct] = &GfxProduct::hwInfo; hardwareInfoSetup[gfxProduct] = GfxProduct::setupHardwareInfo; } }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/enqueue_properties.h000066400000000000000000000031101363734646600256660ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/blit_commands_helper.h" namespace NEO { struct EnqueueProperties { enum class Operation { Blit, ExplicitCacheFlush, EnqueueWithoutSubmission, DependencyResolveOnGpu, GpuKernel, }; EnqueueProperties() = delete; EnqueueProperties(bool blitEnqueue, bool hasKernels, bool isCacheFlushCmd, bool flushDependenciesOnly, const BlitPropertiesContainer *blitPropertiesContainer) { if (blitEnqueue) { operation = Operation::Blit; this->blitPropertiesContainer = blitPropertiesContainer; return; } if (hasKernels) { operation = Operation::GpuKernel; this->blitPropertiesContainer = blitPropertiesContainer; return; } if (isCacheFlushCmd) { operation = Operation::ExplicitCacheFlush; return; } if (flushDependenciesOnly) { operation = Operation::DependencyResolveOnGpu; return; } operation = Operation::EnqueueWithoutSubmission; } bool isFlushWithoutKernelRequired() const { return (operation == Operation::Blit) || (operation == Operation::ExplicitCacheFlush) || (operation == Operation::DependencyResolveOnGpu); } const BlitPropertiesContainer *blitPropertiesContainer = nullptr; Operation operation = Operation::EnqueueWithoutSubmission; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/error_mappers.h000066400000000000000000000054671363734646600246440ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once template struct NullObjectErrorMapper { static const cl_int retVal = CL_SUCCESS; }; // clang-format off template <> struct NullObjectErrorMapper { static const cl_int retVal = CL_INVALID_COMMAND_QUEUE; }; template <> struct NullObjectErrorMapper { static const cl_int retVal = CL_INVALID_CONTEXT; }; template <> struct NullObjectErrorMapper { static const cl_int retVal = CL_INVALID_DEVICE; }; template <> struct NullObjectErrorMapper { static const cl_int retVal = CL_INVALID_EVENT; }; template <> struct NullObjectErrorMapper { static const cl_int retVal = CL_INVALID_KERNEL; }; template <> struct NullObjectErrorMapper { static const cl_int retVal = CL_INVALID_MEM_OBJECT; }; template <> struct NullObjectErrorMapper { static const cl_int retVal = CL_INVALID_PLATFORM; }; template <> struct NullObjectErrorMapper { static const cl_int retVal = CL_INVALID_PROGRAM; }; template <> struct NullObjectErrorMapper { static const cl_int retVal = CL_INVALID_SAMPLER; }; template <> struct NullObjectErrorMapper { static const cl_int retVal = CL_INVALID_VALUE; }; template <> struct NullObjectErrorMapper { static const cl_int retVal = CL_INVALID_VALUE; }; // clang-format on // defaults to CL_SUCCESS template struct InvalidObjectErrorMapper { static const cl_int retVal = CL_SUCCESS; }; // clang-format off // Special case the ones we do have proper validation for. template <> struct InvalidObjectErrorMapper { static const cl_int retVal = NullObjectErrorMapper::retVal; }; template <> struct InvalidObjectErrorMapper { static const cl_int retVal = NullObjectErrorMapper::retVal; }; template <> struct InvalidObjectErrorMapper { static const cl_int retVal = NullObjectErrorMapper::retVal; }; template <> struct InvalidObjectErrorMapper { static const cl_int retVal = NullObjectErrorMapper::retVal; }; template <> struct InvalidObjectErrorMapper { static const cl_int retVal = NullObjectErrorMapper::retVal; }; template <> struct InvalidObjectErrorMapper { static const cl_int retVal = NullObjectErrorMapper::retVal; }; template <> struct InvalidObjectErrorMapper { static const cl_int retVal = NullObjectErrorMapper::retVal; }; template <> struct InvalidObjectErrorMapper { static const cl_int retVal = NullObjectErrorMapper::retVal; }; template <> struct InvalidObjectErrorMapper { static const cl_int retVal = NullObjectErrorMapper::retVal; }; // clang-format on compute-runtime-20.13.16352/opencl/source/helpers/get_info_status_mapper.h000066400000000000000000000010341363734646600265070ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/get_info_status.h" #include #include static inline cl_int changeGetInfoStatusToCLResultType(GetInfoStatus status) { switch (status) { case GetInfoStatus::SUCCESS: return CL_SUCCESS; case GetInfoStatus::INVALID_CONTEXT: return CL_INVALID_CONTEXT; case GetInfoStatus::INVALID_VALUE: return CL_INVALID_VALUE; } return CL_INVALID_VALUE; } compute-runtime-20.13.16352/opencl/source/helpers/gmm_types_converter.cpp000066400000000000000000000042321363734646600263770ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/gmm_types_converter.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/surface_format_info.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "GL/gl.h" #include "GL/glext.h" using namespace NEO; void GmmTypesConverter::queryImgFromBufferParams(ImageInfo &imgInfo, GraphicsAllocation *gfxAlloc) { // 1D or 2D from buffer if (imgInfo.imgDesc.imageRowPitch > 0) { imgInfo.rowPitch = imgInfo.imgDesc.imageRowPitch; } else { imgInfo.rowPitch = getValidParam(imgInfo.imgDesc.imageWidth) * imgInfo.surfaceFormat->ImageElementSizeInBytes; } imgInfo.slicePitch = imgInfo.rowPitch * getValidParam(imgInfo.imgDesc.imageHeight); imgInfo.size = gfxAlloc->getUnderlyingBufferSize(); imgInfo.qPitch = 0; } uint32_t GmmTypesConverter::getRenderMultisamplesCount(uint32_t numSamples) { if (numSamples == 2) { return 1; } else if (numSamples == 4) { return 2; } else if (numSamples == 8) { return 3; } else if (numSamples == 16) { return 4; } return 0; } GMM_YUV_PLANE GmmTypesConverter::convertPlane(ImagePlane imagePlane) { if (imagePlane == ImagePlane::PLANE_Y) { return GMM_PLANE_Y; } else if (imagePlane == ImagePlane::PLANE_U || imagePlane == ImagePlane::PLANE_UV) { return GMM_PLANE_U; } else if (imagePlane == ImagePlane::PLANE_V) { return GMM_PLANE_V; } return GMM_NO_PLANE; } GMM_CUBE_FACE_ENUM GmmTypesConverter::getCubeFaceIndex(uint32_t target) { switch (target) { case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: return __GMM_CUBE_FACE_NEG_X; case GL_TEXTURE_CUBE_MAP_POSITIVE_X: return __GMM_CUBE_FACE_POS_X; case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: return __GMM_CUBE_FACE_NEG_Y; case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: return __GMM_CUBE_FACE_POS_Y; case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: return __GMM_CUBE_FACE_NEG_Z; case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: return __GMM_CUBE_FACE_POS_Z; } return __GMM_NO_CUBE_MAP; } compute-runtime-20.13.16352/opencl/source/helpers/gmm_types_converter.h000066400000000000000000000010741363734646600260450ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/gmm_helper/gmm_lib.h" namespace NEO { enum class ImagePlane; class GraphicsAllocation; struct ImageInfo; struct GmmTypesConverter { static void queryImgFromBufferParams(ImageInfo &imgInfo, GraphicsAllocation *gfxAlloc); static GMM_CUBE_FACE_ENUM getCubeFaceIndex(uint32_t target); static uint32_t getRenderMultisamplesCount(uint32_t numSamples); static GMM_YUV_PLANE convertPlane(ImagePlane imagePlane); }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/hardware_commands_helper.h000066400000000000000000000150521363734646600267700ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "opencl/source/helpers/per_thread_data.h" #include "opencl/source/kernel/kernel.h" #include #include #include namespace NEO { class CommandQueue; class LinearStream; class IndirectHeap; struct CrossThreadInfo; struct MultiDispatchInfo; template using WALKER_TYPE = typename GfxFamily::WALKER_TYPE; template struct HardwareCommandsHelper : public PerThreadDataHelper { using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; using MI_ATOMIC = typename GfxFamily::MI_ATOMIC; using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; static uint32_t alignSlmSize(uint32_t slmSize); static uint32_t computeSlmValues(uint32_t slmSize); static INTERFACE_DESCRIPTOR_DATA *getInterfaceDescriptor( const IndirectHeap &indirectHeap, uint64_t offsetInterfaceDescriptor, INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor); static void setAdditionalInfo( INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const Kernel &kernel, const size_t &sizeCrossThreadData, const size_t &sizePerThreadData, const uint32_t threadsPerThreadGroup); inline static uint32_t additionalSizeRequiredDsh(); static size_t sendInterfaceDescriptorData( const IndirectHeap &indirectHeap, uint64_t offsetInterfaceDescriptor, uint64_t kernelStartOffset, size_t sizeCrossThreadData, size_t sizePerThreadData, size_t bindingTablePointer, size_t offsetSamplerState, uint32_t numSamplers, uint32_t threadsPerThreadGroup, const Kernel &kernel, uint32_t bindingTablePrefetchSize, PreemptionMode preemptionMode, INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor); static void sendMediaStateFlush( LinearStream &commandStream, size_t offsetInterfaceDescriptorData); static void sendMediaInterfaceDescriptorLoad( LinearStream &commandStream, size_t offsetInterfaceDescriptorData, size_t sizeInterfaceDescriptorData); static size_t sendCrossThreadData( IndirectHeap &indirectHeap, Kernel &kernel, bool inlineDataProgrammingRequired, WALKER_TYPE *walkerCmd, uint32_t &sizeCrossThreadData); static size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, size_t bindingTableCount, const void *srcKernelSsh, size_t srcKernelSshSize, size_t numberOfBindingTableStates, size_t offsetOfBindingTable); static size_t sendIndirectState( LinearStream &commandStream, IndirectHeap &dsh, IndirectHeap &ioh, IndirectHeap &ssh, Kernel &kernel, uint64_t kernelStartOffset, uint32_t simd, const size_t localWorkSize[3], const uint64_t offsetInterfaceDescriptorTable, uint32_t &interfaceDescriptorIndex, PreemptionMode preemptionMode, WALKER_TYPE *walkerCmd, INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor, bool localIdsGenerationByRuntime); static void programPerThreadData( size_t &sizePerThreadData, const bool &localIdsGenerationByRuntime, LinearStream &ioh, uint32_t &simd, uint32_t &numChannels, const size_t localWorkSize[3], Kernel &kernel, size_t &sizePerThreadDataTotal, size_t &localWorkItems); static void updatePerThreadDataTotal( size_t &sizePerThreadData, uint32_t &simd, uint32_t &numChannels, size_t &sizePerThreadDataTotal, size_t &localWorkItems); inline static bool resetBindingTablePrefetch(Kernel &kernel); static size_t getSizeRequiredCS(const Kernel *kernel); static size_t getSizeRequiredForCacheFlush(const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress); static bool isPipeControlWArequired(const HardwareInfo &hwInfo); static bool isPipeControlPriorToPipelineSelectWArequired(const HardwareInfo &hwInfo); static size_t getSizeRequiredDSH( const Kernel &kernel); static size_t getSizeRequiredIOH( const Kernel &kernel, size_t localWorkSize = 256); static size_t getSizeRequiredSSH( const Kernel &kernel); static size_t getTotalSizeRequiredDSH( const MultiDispatchInfo &multiDispatchInfo); static size_t getTotalSizeRequiredIOH( const MultiDispatchInfo &multiDispatchInfo); static size_t getTotalSizeRequiredSSH( const MultiDispatchInfo &multiDispatchInfo); static size_t getSshSizeForExecutionModel(const Kernel &kernel); static void setInterfaceDescriptorOffset( WALKER_TYPE *walkerCmd, uint32_t &interfaceDescriptorIndex); static void programMiSemaphoreWait(LinearStream &commandStream, uint64_t compareAddress, uint32_t compareData, COMPARE_OPERATION compareMode); static MI_ATOMIC *programMiAtomic(LinearStream &commandStream, uint64_t writeAddress, typename MI_ATOMIC::ATOMIC_OPCODES opcode, typename MI_ATOMIC::DATA_SIZE dataSize); static void programMiAtomic(MI_ATOMIC &atomic, uint64_t writeAddress, typename MI_ATOMIC::ATOMIC_OPCODES opcode, typename MI_ATOMIC::DATA_SIZE dataSize); static void programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress); static void programBarrierEnable(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo); static void adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo); static const size_t alignInterfaceDescriptorData = 64 * sizeof(uint8_t); static const uint32_t alignIndirectStatePointer = 64 * sizeof(uint8_t); static bool doBindingTablePrefetch(); static bool inlineDataProgrammingRequired(const Kernel &kernel); static bool kernelUsesLocalIds(const Kernel &kernel); }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/hardware_commands_helper.inl000066400000000000000000000511741363734646600273300ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/address_patch.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/string.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/local_id_gen.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/program/block_kernel_manager.h" #include "opencl/source/scheduler/scheduler_kernel.h" #include namespace NEO { template bool HardwareCommandsHelper::isPipeControlPriorToPipelineSelectWArequired(const HardwareInfo &hwInfo) { return false; } template uint32_t HardwareCommandsHelper::alignSlmSize(uint32_t slmSize) { if (slmSize == 0u) { return 0u; } slmSize = std::max(slmSize, 1024u); slmSize = Math::nextPowerOfTwo(slmSize); UNRECOVERABLE_IF(slmSize > 64u * KB); return slmSize; } template uint32_t HardwareCommandsHelper::computeSlmValues(uint32_t slmSize) { auto value = std::max(slmSize, 1024u); value = Math::nextPowerOfTwo(value); value = Math::getMinLsbSet(value); value = value - 9; DEBUG_BREAK_IF(value > 7); return value * !!slmSize; } template size_t HardwareCommandsHelper::getSizeRequiredDSH( const Kernel &kernel) { using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE; const auto &patchInfo = kernel.getKernelInfo().patchInfo; auto samplerCount = patchInfo.samplerStateArray ? patchInfo.samplerStateArray->Count : 0; auto totalSize = samplerCount ? alignUp(samplerCount * sizeof(SAMPLER_STATE), INTERFACE_DESCRIPTOR_DATA::SAMPLERSTATEPOINTER_ALIGN_SIZE) : 0; auto borderColorSize = patchInfo.samplerStateArray ? patchInfo.samplerStateArray->Offset - patchInfo.samplerStateArray->BorderColorOffset : 0; borderColorSize = alignUp(borderColorSize + alignIndirectStatePointer - 1, alignIndirectStatePointer); totalSize += borderColorSize + additionalSizeRequiredDsh(); DEBUG_BREAK_IF(!(totalSize >= kernel.getDynamicStateHeapSize() || kernel.getKernelInfo().isVmeWorkload)); return alignUp(totalSize, alignInterfaceDescriptorData); } template size_t HardwareCommandsHelper::getSizeRequiredIOH( const Kernel &kernel, size_t localWorkSize) { typedef typename GfxFamily::WALKER_TYPE WALKER_TYPE; auto threadPayload = kernel.getKernelInfo().patchInfo.threadPayload; DEBUG_BREAK_IF(nullptr == threadPayload); auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*threadPayload); uint32_t grfSize = sizeof(typename GfxFamily::GRF); return alignUp((kernel.getCrossThreadDataSize() + getPerThreadDataSizeTotal(kernel.getKernelInfo().getMaxSimdSize(), grfSize, numChannels, localWorkSize)), WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); } template size_t HardwareCommandsHelper::getSizeRequiredSSH( const Kernel &kernel) { typedef typename GfxFamily::BINDING_TABLE_STATE BINDING_TABLE_STATE; auto sizeSSH = kernel.getSurfaceStateHeapSize(); sizeSSH += sizeSSH ? BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE : 0; return sizeSSH; } template size_t getSizeRequired(const MultiDispatchInfo &multiDispatchInfo, SizeGetterT &&getSize, ArgsT... args) { size_t totalSize = 0; auto it = multiDispatchInfo.begin(); for (auto e = multiDispatchInfo.end(); it != e; ++it) { totalSize = alignUp(totalSize, MemoryConstants::cacheLineSize); totalSize += getSize(*it, std::forward(args)...); } totalSize = alignUp(totalSize, MemoryConstants::pageSize); return totalSize; } template size_t HardwareCommandsHelper::getTotalSizeRequiredDSH( const MultiDispatchInfo &multiDispatchInfo) { return getSizeRequired(multiDispatchInfo, [](const DispatchInfo &dispatchInfo) { return getSizeRequiredDSH(*dispatchInfo.getKernel()); }); } template size_t HardwareCommandsHelper::getTotalSizeRequiredIOH( const MultiDispatchInfo &multiDispatchInfo) { return getSizeRequired(multiDispatchInfo, [](const DispatchInfo &dispatchInfo) { return getSizeRequiredIOH(*dispatchInfo.getKernel(), Math::computeTotalElementsCount(dispatchInfo.getLocalWorkgroupSize())); }); } template size_t HardwareCommandsHelper::getTotalSizeRequiredSSH( const MultiDispatchInfo &multiDispatchInfo) { return getSizeRequired(multiDispatchInfo, [](const DispatchInfo &dispatchInfo) { return getSizeRequiredSSH(*dispatchInfo.getKernel()); }); } template size_t HardwareCommandsHelper::getSshSizeForExecutionModel(const Kernel &kernel) { typedef typename GfxFamily::BINDING_TABLE_STATE BINDING_TABLE_STATE; size_t totalSize = 0; BlockKernelManager *blockManager = kernel.getProgram()->getBlockKernelManager(); uint32_t blockCount = static_cast(blockManager->getCount()); uint32_t maxBindingTableCount = 0; totalSize = BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE - 1; for (uint32_t i = 0; i < blockCount; i++) { const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); totalSize += pBlockInfo->heapInfo.pKernelHeader->SurfaceStateHeapSize; totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); maxBindingTableCount = std::max(maxBindingTableCount, pBlockInfo->patchInfo.bindingTableState->Count); } SchedulerKernel &scheduler = kernel.getContext().getSchedulerKernel(); totalSize += getSizeRequiredSSH(scheduler); totalSize += maxBindingTableCount * sizeof(BINDING_TABLE_STATE) * DeviceQueue::interfaceDescriptorEntries; totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); return totalSize; } template size_t HardwareCommandsHelper::sendInterfaceDescriptorData( const IndirectHeap &indirectHeap, uint64_t offsetInterfaceDescriptor, uint64_t kernelStartOffset, size_t sizeCrossThreadData, size_t sizePerThreadData, size_t bindingTablePointer, size_t offsetSamplerState, uint32_t numSamplers, uint32_t threadsPerThreadGroup, const Kernel &kernel, uint32_t bindingTablePrefetchSize, PreemptionMode preemptionMode, INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor) { using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE; // Allocate some memory for the interface descriptor auto pInterfaceDescriptor = getInterfaceDescriptor(indirectHeap, offsetInterfaceDescriptor, inlineInterfaceDescriptor); *pInterfaceDescriptor = GfxFamily::cmdInitInterfaceDescriptorData; // Program the kernel start pointer pInterfaceDescriptor->setKernelStartPointerHigh(kernelStartOffset >> 32); pInterfaceDescriptor->setKernelStartPointer((uint32_t)kernelStartOffset); // # of threads in thread group should be based on LWS. pInterfaceDescriptor->setNumberOfThreadsInGpgpuThreadGroup(threadsPerThreadGroup); pInterfaceDescriptor->setDenormMode(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL); setAdditionalInfo(pInterfaceDescriptor, kernel, sizeCrossThreadData, sizePerThreadData, threadsPerThreadGroup); pInterfaceDescriptor->setBindingTablePointer(static_cast(bindingTablePointer)); pInterfaceDescriptor->setSamplerStatePointer(static_cast(offsetSamplerState)); DEBUG_BREAK_IF(numSamplers > 16); auto samplerCountState = static_cast((numSamplers + 3) / 4); pInterfaceDescriptor->setSamplerCount(samplerCountState); pInterfaceDescriptor->setBindingTableEntryCount(bindingTablePrefetchSize); auto programmableIDSLMSize = static_cast(computeSlmValues(kernel.slmTotalSize)); pInterfaceDescriptor->setSharedLocalMemorySize(programmableIDSLMSize); programBarrierEnable(pInterfaceDescriptor, kernel.getKernelInfo().patchInfo.executionEnvironment->HasBarriers, kernel.getDevice().getHardwareInfo()); PreemptionHelper::programInterfaceDescriptorDataPreemption(pInterfaceDescriptor, preemptionMode); HardwareCommandsHelper::adjustInterfaceDescriptorData(pInterfaceDescriptor, kernel.getDevice().getHardwareInfo()); return (size_t)offsetInterfaceDescriptor; } // Returned binding table pointer is relative to given heap (which is assumed to be the Surface state base addess) // as required by the INTERFACE_DESCRIPTOR_DATA. template size_t HardwareCommandsHelper::pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, size_t bindingTableCount, const void *srcKernelSsh, size_t srcKernelSshSize, size_t numberOfBindingTableStates, size_t offsetOfBindingTable) { using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; if (bindingTableCount == 0) { // according to compiler, kernel does not reference BTIs to stateful surfaces, so there's nothing to patch return 0; } size_t sshSize = srcKernelSshSize; DEBUG_BREAK_IF(srcKernelSsh == nullptr); auto srcSurfaceState = srcKernelSsh; // Align the heap and allocate space for new ssh data dstHeap.align(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); auto dstSurfaceState = dstHeap.getSpace(sshSize); // Compiler sends BTI table that is already populated with surface state pointers relative to local SSH. // We may need to patch these pointers so that they are relative to surface state base address if (dstSurfaceState == dstHeap.getCpuBase()) { // nothing to patch, we're at the start of heap (which is assumed to be the surface state base address) // we need to simply copy the ssh (including BTIs from compiler) memcpy_s(dstSurfaceState, sshSize, srcSurfaceState, sshSize); return offsetOfBindingTable; } // We can copy-over the surface states, but BTIs will need to be patched memcpy_s(dstSurfaceState, sshSize, srcSurfaceState, offsetOfBindingTable); uint32_t surfaceStatesOffset = static_cast(ptrDiff(dstSurfaceState, dstHeap.getCpuBase())); // march over BTIs and offset the pointers based on surface state base address auto *dstBtiTableBase = reinterpret_cast(ptrOffset(dstSurfaceState, offsetOfBindingTable)); DEBUG_BREAK_IF(reinterpret_cast(dstBtiTableBase) % INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER_ALIGN_SIZE != 0); auto *srcBtiTableBase = reinterpret_cast(ptrOffset(srcSurfaceState, offsetOfBindingTable)); BINDING_TABLE_STATE bti = GfxFamily::cmdInitBindingTableState; for (uint32_t i = 0, e = (uint32_t)numberOfBindingTableStates; i != e; ++i) { uint32_t localSurfaceStateOffset = srcBtiTableBase[i].getSurfaceStatePointer(); uint32_t offsetedSurfaceStateOffset = localSurfaceStateOffset + surfaceStatesOffset; bti.setSurfaceStatePointer(offsetedSurfaceStateOffset); // patch just the SurfaceStatePointer bits dstBtiTableBase[i] = bti; DEBUG_BREAK_IF(bti.getRawData(0) % sizeof(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE) != 0); } return ptrDiff(dstBtiTableBase, dstHeap.getCpuBase()); } template size_t HardwareCommandsHelper::sendIndirectState( LinearStream &commandStream, IndirectHeap &dsh, IndirectHeap &ioh, IndirectHeap &ssh, Kernel &kernel, uint64_t kernelStartOffset, uint32_t simd, const size_t localWorkSize[3], const uint64_t offsetInterfaceDescriptorTable, uint32_t &interfaceDescriptorIndex, PreemptionMode preemptionMode, WALKER_TYPE *walkerCmd, INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor, bool localIdsGenerationByRuntime) { using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE; DEBUG_BREAK_IF(simd != 1 && simd != 8 && simd != 16 && simd != 32); auto inlineDataProgrammingRequired = HardwareCommandsHelper::inlineDataProgrammingRequired(kernel); // Copy the kernel over to the ISH const auto &kernelInfo = kernel.getKernelInfo(); const auto &patchInfo = kernelInfo.patchInfo; auto dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, (kernelInfo.patchInfo.bindingTableState != nullptr) ? kernelInfo.patchInfo.bindingTableState->Count : 0, kernel.getSurfaceStateHeap(), kernel.getSurfaceStateHeapSize(), kernel.getNumberOfBindingTableStates(), kernel.getBindingTableOffset()); // Copy our sampler state if it exists uint32_t samplerStateOffset = 0; uint32_t samplerCount = 0; if (patchInfo.samplerStateArray) { samplerCount = patchInfo.samplerStateArray->Count; samplerStateOffset = EncodeStates::copySamplerState(&dsh, patchInfo.samplerStateArray->Offset, samplerCount, patchInfo.samplerStateArray->BorderColorOffset, kernel.getDynamicStateHeap()); } auto threadPayload = kernel.getKernelInfo().patchInfo.threadPayload; DEBUG_BREAK_IF(nullptr == threadPayload); auto localWorkItems = localWorkSize[0] * localWorkSize[1] * localWorkSize[2]; auto threadsPerThreadGroup = static_cast(getThreadsPerWG(simd, localWorkItems)); auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*threadPayload); uint32_t sizeCrossThreadData = kernel.getCrossThreadDataSize(); size_t offsetCrossThreadData = HardwareCommandsHelper::sendCrossThreadData( ioh, kernel, inlineDataProgrammingRequired, walkerCmd, sizeCrossThreadData); size_t sizePerThreadDataTotal = 0; size_t sizePerThreadData = 0; HardwareCommandsHelper::programPerThreadData( sizePerThreadData, localIdsGenerationByRuntime, ioh, simd, numChannels, localWorkSize, kernel, sizePerThreadDataTotal, localWorkItems); uint64_t offsetInterfaceDescriptor = offsetInterfaceDescriptorTable + interfaceDescriptorIndex * sizeof(INTERFACE_DESCRIPTOR_DATA); DEBUG_BREAK_IF(patchInfo.executionEnvironment == nullptr); auto bindingTablePrefetchSize = std::min(31u, static_cast(kernel.getNumberOfBindingTableStates())); if (resetBindingTablePrefetch(kernel)) { bindingTablePrefetchSize = 0; } HardwareCommandsHelper::sendInterfaceDescriptorData( dsh, offsetInterfaceDescriptor, kernelStartOffset, sizeCrossThreadData, sizePerThreadData, dstBindingTablePointer, samplerStateOffset, samplerCount, threadsPerThreadGroup, kernel, bindingTablePrefetchSize, preemptionMode, inlineInterfaceDescriptor); if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { PatchInfoData patchInfoData(kernelStartOffset, 0, PatchInfoAllocationType::InstructionHeap, dsh.getGraphicsAllocation()->getGpuAddress(), offsetInterfaceDescriptor, PatchInfoAllocationType::DynamicStateHeap); kernel.getPatchInfoDataList().push_back(patchInfoData); } // Program media state flush to set interface descriptor offset sendMediaStateFlush( commandStream, interfaceDescriptorIndex); DEBUG_BREAK_IF(offsetCrossThreadData % 64 != 0); walkerCmd->setIndirectDataStartAddress(static_cast(offsetCrossThreadData)); setInterfaceDescriptorOffset(walkerCmd, interfaceDescriptorIndex); auto indirectDataLength = alignUp(static_cast(sizeCrossThreadData + sizePerThreadDataTotal), WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); walkerCmd->setIndirectDataLength(indirectDataLength); return offsetCrossThreadData; } template void HardwareCommandsHelper::updatePerThreadDataTotal( size_t &sizePerThreadData, uint32_t &simd, uint32_t &numChannels, size_t &sizePerThreadDataTotal, size_t &localWorkItems) { uint32_t grfSize = sizeof(typename GfxFamily::GRF); sizePerThreadData = getPerThreadSizeLocalIDs(simd, grfSize, numChannels); uint32_t localIdSizePerThread = PerThreadDataHelper::getLocalIdSizePerThread(simd, grfSize, numChannels); localIdSizePerThread = std::max(localIdSizePerThread, grfSize); sizePerThreadDataTotal = getThreadsPerWG(simd, localWorkItems) * localIdSizePerThread; DEBUG_BREAK_IF(sizePerThreadDataTotal == 0); // Hardware requires at least 1 GRF of perThreadData for each thread in thread group } template void HardwareCommandsHelper::programMiSemaphoreWait(LinearStream &commandStream, uint64_t compareAddress, uint32_t compareData, COMPARE_OPERATION compareMode) { using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; auto miSemaphoreCmd = commandStream.getSpaceForCmd(); *miSemaphoreCmd = GfxFamily::cmdInitMiSemaphoreWait; miSemaphoreCmd->setCompareOperation(compareMode); miSemaphoreCmd->setSemaphoreDataDword(compareData); miSemaphoreCmd->setSemaphoreGraphicsAddress(compareAddress); miSemaphoreCmd->setWaitMode(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE); } template typename GfxFamily::MI_ATOMIC *HardwareCommandsHelper::programMiAtomic(LinearStream &commandStream, uint64_t writeAddress, typename MI_ATOMIC::ATOMIC_OPCODES opcode, typename MI_ATOMIC::DATA_SIZE dataSize) { auto miAtomic = commandStream.getSpaceForCmd(); *miAtomic = GfxFamily::cmdInitAtomic; HardwareCommandsHelper::programMiAtomic(*miAtomic, writeAddress, opcode, dataSize); return miAtomic; } template void HardwareCommandsHelper::programMiAtomic(MI_ATOMIC &atomic, uint64_t writeAddress, typename MI_ATOMIC::ATOMIC_OPCODES opcode, typename MI_ATOMIC::DATA_SIZE dataSize) { atomic.setAtomicOpcode(opcode); atomic.setDataSize(dataSize); atomic.setMemoryAddress(static_cast(writeAddress & 0x0000FFFFFFFFULL)); atomic.setMemoryAddressHigh(static_cast(writeAddress >> 32)); } template bool HardwareCommandsHelper::doBindingTablePrefetch() { return true; } template bool HardwareCommandsHelper::inlineDataProgrammingRequired(const Kernel &kernel) { auto checkKernelForInlineData = true; if (DebugManager.flags.EnablePassInlineData.get() != -1) { checkKernelForInlineData = !!DebugManager.flags.EnablePassInlineData.get(); } if (checkKernelForInlineData) { return kernel.getKernelInfo().patchInfo.threadPayload->PassInlineData; } return false; } template bool HardwareCommandsHelper::kernelUsesLocalIds(const Kernel &kernel) { return (kernel.getKernelInfo().patchInfo.threadPayload->LocalIDXPresent || kernel.getKernelInfo().patchInfo.threadPayload->LocalIDYPresent || kernel.getKernelInfo().patchInfo.threadPayload->LocalIDZPresent); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/hardware_commands_helper_base.inl000066400000000000000000000156271363734646600303250ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/hw_helper.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/kernel/kernel.h" namespace NEO { template bool HardwareCommandsHelper::isPipeControlWArequired(const HardwareInfo &hwInfo) { return false; } template typename HardwareCommandsHelper::INTERFACE_DESCRIPTOR_DATA *HardwareCommandsHelper::getInterfaceDescriptor( const IndirectHeap &indirectHeap, uint64_t offsetInterfaceDescriptor, HardwareCommandsHelper::INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor) { return static_cast(ptrOffset(indirectHeap.getCpuBase(), (size_t)offsetInterfaceDescriptor)); } template void HardwareCommandsHelper::setAdditionalInfo( INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const Kernel &kernel, const size_t &sizeCrossThreadData, const size_t &sizePerThreadData, const uint32_t threadsPerThreadGroup) { auto grfSize = sizeof(typename GfxFamily::GRF); DEBUG_BREAK_IF((sizeCrossThreadData % grfSize) != 0); auto numGrfCrossThreadData = static_cast(sizeCrossThreadData / grfSize); DEBUG_BREAK_IF(numGrfCrossThreadData == 0); pInterfaceDescriptor->setCrossThreadConstantDataReadLength(numGrfCrossThreadData); DEBUG_BREAK_IF((sizePerThreadData % grfSize) != 0); auto numGrfPerThreadData = static_cast(sizePerThreadData / grfSize); // at least 1 GRF of perThreadData for each thread in a thread group when sizeCrossThreadData != 0 numGrfPerThreadData = std::max(numGrfPerThreadData, 1u); pInterfaceDescriptor->setConstantIndirectUrbEntryReadLength(numGrfPerThreadData); } template uint32_t HardwareCommandsHelper::additionalSizeRequiredDsh() { return sizeof(INTERFACE_DESCRIPTOR_DATA); } template size_t HardwareCommandsHelper::getSizeRequiredCS(const Kernel *kernel) { size_t size = 2 * sizeof(typename GfxFamily::MEDIA_STATE_FLUSH) + sizeof(typename GfxFamily::MEDIA_INTERFACE_DESCRIPTOR_LOAD); return size; } template size_t HardwareCommandsHelper::getSizeRequiredForCacheFlush(const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress) { return kernel->requiresCacheFlushCommand(commandQueue) ? sizeof(typename GfxFamily::PIPE_CONTROL) : 0; } template void HardwareCommandsHelper::sendMediaStateFlush( LinearStream &commandStream, size_t offsetInterfaceDescriptorData) { typedef typename GfxFamily::MEDIA_STATE_FLUSH MEDIA_STATE_FLUSH; auto pCmd = (MEDIA_STATE_FLUSH *)commandStream.getSpace(sizeof(MEDIA_STATE_FLUSH)); *pCmd = GfxFamily::cmdInitMediaStateFlush; pCmd->setInterfaceDescriptorOffset((uint32_t)offsetInterfaceDescriptorData); } template void HardwareCommandsHelper::sendMediaInterfaceDescriptorLoad( LinearStream &commandStream, size_t offsetInterfaceDescriptorData, size_t sizeInterfaceDescriptorData) { { typedef typename GfxFamily::MEDIA_STATE_FLUSH MEDIA_STATE_FLUSH; auto pCmd = (MEDIA_STATE_FLUSH *)commandStream.getSpace(sizeof(MEDIA_STATE_FLUSH)); *pCmd = GfxFamily::cmdInitMediaStateFlush; } { typedef typename GfxFamily::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; auto pCmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)commandStream.getSpace(sizeof(MEDIA_INTERFACE_DESCRIPTOR_LOAD)); *pCmd = GfxFamily::cmdInitMediaInterfaceDescriptorLoad; pCmd->setInterfaceDescriptorDataStartAddress((uint32_t)offsetInterfaceDescriptorData); pCmd->setInterfaceDescriptorTotalLength((uint32_t)sizeInterfaceDescriptorData); } } template void HardwareCommandsHelper::programPerThreadData( size_t &sizePerThreadData, const bool &localIdsGenerationByRuntime, LinearStream &ioh, uint32_t &simd, uint32_t &numChannels, const size_t localWorkSize[3], Kernel &kernel, size_t &sizePerThreadDataTotal, size_t &localWorkItems) { uint32_t grfSize = sizeof(typename GfxFamily::GRF); sendPerThreadData( ioh, simd, grfSize, numChannels, localWorkSize, kernel.getKernelInfo().workgroupDimensionsOrder, kernel.usesOnlyImages()); updatePerThreadDataTotal(sizePerThreadData, simd, numChannels, sizePerThreadDataTotal, localWorkItems); } template size_t HardwareCommandsHelper::sendCrossThreadData( IndirectHeap &indirectHeap, Kernel &kernel, bool inlineDataProgrammingRequired, WALKER_TYPE *walkerCmd, uint32_t &sizeCrossThreadData) { indirectHeap.align(WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); auto offsetCrossThreadData = indirectHeap.getUsed(); char *pDest = static_cast(indirectHeap.getSpace(sizeCrossThreadData)); memcpy_s(pDest, sizeCrossThreadData, kernel.getCrossThreadData(), sizeCrossThreadData); if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { FlatBatchBufferHelper::fixCrossThreadDataInfo(kernel.getPatchInfoDataList(), offsetCrossThreadData, indirectHeap.getGraphicsAllocation()->getGpuAddress()); } return offsetCrossThreadData + static_cast(indirectHeap.getHeapGpuStartOffset()); } template bool HardwareCommandsHelper::resetBindingTablePrefetch(Kernel &kernel) { return kernel.isSchedulerKernel || !doBindingTablePrefetch(); } template void HardwareCommandsHelper::setInterfaceDescriptorOffset( WALKER_TYPE *walkerCmd, uint32_t &interfaceDescriptorIndex) { walkerCmd->setInterfaceDescriptorOffset(interfaceDescriptorIndex++); } template void HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress) { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; auto pipeControl = reinterpret_cast(commandStream->getSpace(sizeof(PIPE_CONTROL))); *pipeControl = GfxFamily::cmdInitPipeControl; pipeControl->setCommandStreamerStallEnable(true); pipeControl->setDcFlushEnable(true); } template void HardwareCommandsHelper::programBarrierEnable(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo) { pInterfaceDescriptor->setBarrierEnable(value); } template void HardwareCommandsHelper::adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo) {} } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/hardware_context_controller.cpp000066400000000000000000000056041363734646600301140ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/hardware_context_controller.h" #include "shared/source/memory_manager/memory_constants.h" #include "shared/source/os_interface/os_context.h" #include "opencl/source/aub_mem_dump/aub_mem_dump.h" using namespace NEO; HardwareContextController::HardwareContextController(aub_stream::AubManager &aubManager, OsContext &osContext, uint32_t flags) { auto deviceBitfield = osContext.getDeviceBitfield(); for (uint32_t deviceIndex = 0; deviceIndex < deviceBitfield.size(); deviceIndex++) { if (deviceBitfield.test(deviceIndex)) { hardwareContexts.emplace_back(aubManager.createHardwareContext(deviceIndex, osContext.getEngineType(), flags)); } } } void HardwareContextController::initialize() { for (auto &hardwareContext : hardwareContexts) { hardwareContext->initialize(); } } void HardwareContextController::pollForCompletion() { for (auto &hardwareContext : hardwareContexts) { hardwareContext->pollForCompletion(); } } void HardwareContextController::expectMemory(uint64_t gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation) { for (auto &hardwareContext : hardwareContexts) { hardwareContext->expectMemory(gfxAddress, srcAddress, length, compareOperation); } } void HardwareContextController::submit(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits, bool overrideRingHead) { for (auto &hardwareContext : hardwareContexts) { hardwareContext->submitBatchBuffer(batchBufferGpuAddress, overrideRingHead); } } void HardwareContextController::writeMemory(uint64_t gfxAddress, const void *memory, size_t size, uint32_t memoryBanks, int hint, size_t pageSize) { if (hardwareContexts.size() == 1u) { hardwareContexts.at(0)->writeMemory(gfxAddress, memory, size, memoryBanks, hint, pageSize); return; } for (auto bankId = 0u; bankId < hardwareContexts.size(); bankId++) { auto &hardwareContext = hardwareContexts.at(bankId); auto selectedBank = memoryBanks & (1 << bankId); UNRECOVERABLE_IF(selectedBank == 0); hardwareContext->writeMemory(gfxAddress, memory, size, memoryBanks & (1 << bankId), hint, pageSize); } } void HardwareContextController::dumpBufferBIN(uint64_t gfxAddress, size_t size) { hardwareContexts[0]->dumpBufferBIN(gfxAddress, size); } void HardwareContextController::dumpSurface(const aub_stream::SurfaceInfo &surfaceInfo) { hardwareContexts[0]->dumpSurface(surfaceInfo); } void HardwareContextController::readMemory(uint64_t gfxAddress, void *memory, size_t size, uint32_t memoryBanks, size_t pageSize) { hardwareContexts[0]->readMemory(gfxAddress, memory, size, memoryBanks, pageSize); } compute-runtime-20.13.16352/opencl/source/helpers/hardware_context_controller.h000066400000000000000000000023671363734646600275640ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "third_party/aub_stream/headers/aub_manager.h" #include "third_party/aub_stream/headers/hardware_context.h" #include #include namespace NEO { class OsContext; class HardwareContextController { public: HardwareContextController() = delete; HardwareContextController(aub_stream::AubManager &aubManager, OsContext &osContext, uint32_t flags); void initialize(); void pollForCompletion(); void expectMemory(uint64_t gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation); void submit(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits, bool overrideRingHead); void writeMemory(uint64_t gfxAddress, const void *memory, size_t size, uint32_t memoryBanks, int hint, size_t pageSize); void dumpBufferBIN(uint64_t gfxAddress, size_t size); void dumpSurface(const aub_stream::SurfaceInfo &surfaceInfo); void readMemory(uint64_t gfxAddress, void *memory, size_t size, uint32_t memoryBanks, size_t pageSize); std::vector> hardwareContexts; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/helper_options.cpp000066400000000000000000000004761363734646600253440ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include namespace NEO { // AUB file folder location const char *folderAUB = "."; // Initial value for HW tag uint32_t initialHardwareTag = std::numeric_limits::max(); } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/linux/000077500000000000000000000000001363734646600227365ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/helpers/linux/kmd_notify_properties_linux.cpp000066400000000000000000000005471363734646600313060ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/kmd_notify_properties.h" using namespace NEO; void KmdNotifyHelper::updateAcLineStatus() {} int64_t KmdNotifyHelper::getBaseTimeout(const int64_t &multiplier) const { return properties->delayKmdNotifyMicroseconds * multiplier; } compute-runtime-20.13.16352/opencl/source/helpers/mem_properties_parser_helper.cpp000066400000000000000000000042571363734646600302600ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/mem_properties_parser_helper.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/mem_obj/mem_obj_helper.h" namespace NEO { bool NEO::MemoryPropertiesParser::parseMemoryProperties(const cl_mem_properties_intel *properties, MemoryPropertiesFlags &memoryProperties, cl_mem_flags &flags, cl_mem_flags_intel &flagsIntel, cl_mem_alloc_flags_intel &allocflags, ObjType objectType) { if (properties == nullptr) { return true; } for (int i = 0; properties[i] != 0; i += 2) { switch (properties[i]) { case CL_MEM_FLAGS: flags |= static_cast(properties[i + 1]); break; case CL_MEM_FLAGS_INTEL: flagsIntel |= static_cast(properties[i + 1]); break; case CL_MEM_ALLOC_FLAGS_INTEL: allocflags |= static_cast(properties[i + 1]); break; default: return false; } } memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, allocflags); switch (objectType) { case MemoryPropertiesParser::ObjType::BUFFER: return isFieldValid(flags, MemObjHelper::validFlagsForBuffer) && isFieldValid(flagsIntel, MemObjHelper::validFlagsForBufferIntel); case MemoryPropertiesParser::ObjType::IMAGE: return isFieldValid(flags, MemObjHelper::validFlagsForImage) && isFieldValid(flagsIntel, MemObjHelper::validFlagsForImageIntel); default: break; } return true; } void MemoryPropertiesParser::fillPoliciesInProperties(AllocationProperties &allocationProperties, const MemoryPropertiesFlags &memoryProperties, const HardwareInfo &hwInfo) { fillCachePolicyInProperties(allocationProperties, memoryProperties.flags.locallyUncachedResource, memoryProperties.flags.readOnly, false); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/mem_properties_parser_helper.h000066400000000000000000000036441363734646600277240ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/bit_helpers.h" #include "shared/source/memory_manager/allocation_properties.h" #include "opencl/extensions/public/cl_ext_private.h" #include "memory_properties_flags.h" namespace NEO { class MemoryPropertiesParser { public: enum class ObjType { UNKNOWN, BUFFER, IMAGE, }; static bool parseMemoryProperties(const cl_mem_properties_intel *properties, MemoryPropertiesFlags &memoryProperties, cl_mem_flags &flags, cl_mem_flags_intel &flagsIntel, cl_mem_alloc_flags_intel &allocflags, ObjType objectType); static AllocationProperties getAllocationProperties(uint32_t rootDeviceIndex, MemoryPropertiesFlags memoryProperties, bool allocateMemory, size_t size, GraphicsAllocation::AllocationType type, bool multiStorageResource, const HardwareInfo &hwInfo) { AllocationProperties allocationProperties(rootDeviceIndex, allocateMemory, size, type, multiStorageResource); fillPoliciesInProperties(allocationProperties, memoryProperties, hwInfo); return allocationProperties; } static void fillPoliciesInProperties(AllocationProperties &allocationProperties, const MemoryPropertiesFlags &memoryProperties, const HardwareInfo &hwInfo); static void fillCachePolicyInProperties(AllocationProperties &allocationProperties, bool uncached, bool readOnly, bool deviceOnlyVisibilty) { allocationProperties.flags.uncacheable = uncached; auto cacheFlushRequired = !uncached && !readOnly && !deviceOnlyVisibilty; allocationProperties.flags.flushL3RequiredForRead = cacheFlushRequired; allocationProperties.flags.flushL3RequiredForWrite = cacheFlushRequired; } }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/memory_properties_flags_helpers.cpp000066400000000000000000000005511363734646600307660ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/memory_properties_flags_helpers_base.inl" namespace NEO { void MemoryPropertiesFlagsParser::addExtraMemoryPropertiesFlags(MemoryPropertiesFlags &propertiesFlag, cl_mem_flags flags, cl_mem_flags_intel flagsIntel) { } } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/memory_properties_flags_helpers.h000066400000000000000000000010721363734646600304320ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/extensions/public/cl_ext_private.h" #include "memory_properties_flags.h" namespace NEO { class MemoryPropertiesFlagsParser { public: static void addExtraMemoryPropertiesFlags(MemoryPropertiesFlags &propertiesFlags, cl_mem_flags flags, cl_mem_flags_intel flagsIntel); static MemoryPropertiesFlags createMemoryPropertiesFlags(cl_mem_flags flags, cl_mem_flags_intel flagsIntel, cl_mem_alloc_flags_intel allocflags); }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/memory_properties_flags_helpers_base.inl000066400000000000000000000061671363734646600317710ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/bit_helpers.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "CL/cl_ext_intel.h" namespace NEO { MemoryPropertiesFlags MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(cl_mem_flags flags, cl_mem_flags_intel flagsIntel, cl_mem_alloc_flags_intel allocflags) { MemoryPropertiesFlags memoryPropertiesFlags; if (isValueSet(flags, CL_MEM_READ_WRITE)) { memoryPropertiesFlags.flags.readWrite = true; } if (isValueSet(flags, CL_MEM_WRITE_ONLY)) { memoryPropertiesFlags.flags.writeOnly = true; } if (isValueSet(flags, CL_MEM_READ_ONLY)) { memoryPropertiesFlags.flags.readOnly = true; } if (isValueSet(flags, CL_MEM_USE_HOST_PTR)) { memoryPropertiesFlags.flags.useHostPtr = true; } if (isValueSet(flags, CL_MEM_ALLOC_HOST_PTR)) { memoryPropertiesFlags.flags.allocHostPtr = true; } if (isValueSet(flags, CL_MEM_COPY_HOST_PTR)) { memoryPropertiesFlags.flags.copyHostPtr = true; } if (isValueSet(flags, CL_MEM_HOST_WRITE_ONLY)) { memoryPropertiesFlags.flags.hostWriteOnly = true; } if (isValueSet(flags, CL_MEM_HOST_READ_ONLY)) { memoryPropertiesFlags.flags.hostReadOnly = true; } if (isValueSet(flags, CL_MEM_HOST_NO_ACCESS)) { memoryPropertiesFlags.flags.hostNoAccess = true; } if (isValueSet(flags, CL_MEM_KERNEL_READ_AND_WRITE)) { memoryPropertiesFlags.flags.kernelReadAndWrite = true; } if (isValueSet(flags, CL_MEM_FORCE_LINEAR_STORAGE_INTEL) || isValueSet(flagsIntel, CL_MEM_FORCE_LINEAR_STORAGE_INTEL)) { memoryPropertiesFlags.flags.forceLinearStorage = true; } if (isValueSet(flags, CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)) { memoryPropertiesFlags.flags.accessFlagsUnrestricted = true; } if (isValueSet(flags, CL_MEM_NO_ACCESS_INTEL)) { memoryPropertiesFlags.flags.noAccess = true; } if (isValueSet(flags, CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL) || isValueSet(flagsIntel, CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL)) { memoryPropertiesFlags.flags.allowUnrestrictedSize = true; } if (isValueSet(flagsIntel, CL_MEM_LOCALLY_UNCACHED_RESOURCE)) { memoryPropertiesFlags.flags.locallyUncachedResource = true; } if (isValueSet(flagsIntel, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE)) { memoryPropertiesFlags.flags.locallyUncachedInSurfaceState = true; } if (isValueSet(flags, CL_MEM_FORCE_SHARED_PHYSICAL_MEMORY_INTEL)) { memoryPropertiesFlags.flags.forceSharedPhysicalMemory = true; } if (isValueSet(allocflags, CL_MEM_ALLOC_WRITE_COMBINED_INTEL)) { memoryPropertiesFlags.allocFlags.allocWriteCombined = true; } if (isValueSet(flagsIntel, CL_MEM_48BIT_RESOURCE_INTEL)) { memoryPropertiesFlags.flags.resource48Bit = true; } addExtraMemoryPropertiesFlags(memoryPropertiesFlags, flags, flagsIntel); return memoryPropertiesFlags; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/mipmap.cpp000066400000000000000000000053621363734646600235740ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/mipmap.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/resource_info.h" #include "opencl/source/mem_obj/image.h" #include #include #include namespace NEO { uint32_t getMipLevelOriginIdx(cl_mem_object_type imageType) { switch (imageType) { case CL_MEM_OBJECT_IMAGE1D: return 1; case CL_MEM_OBJECT_IMAGE1D_ARRAY: case CL_MEM_OBJECT_IMAGE2D: return 2; case CL_MEM_OBJECT_IMAGE2D_ARRAY: case CL_MEM_OBJECT_IMAGE3D: return 3; case CL_MEM_OBJECT_IMAGE1D_BUFFER: return 0; default: DEBUG_BREAK_IF(true); return std::numeric_limits::max(); } } uint32_t findMipLevel(cl_mem_object_type imageType, const size_t *origin) { size_t mipLevel = 0; switch (imageType) { case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE1D_ARRAY: case CL_MEM_OBJECT_IMAGE2D: case CL_MEM_OBJECT_IMAGE2D_ARRAY: case CL_MEM_OBJECT_IMAGE3D: mipLevel = origin[getMipLevelOriginIdx(imageType)]; break; default: mipLevel = 0; break; } return static_cast(mipLevel); } bool isMipMapped(const MemObj *memObj) { auto image = castToObject(memObj); if (image == nullptr) { return false; } return isMipMapped(image->getImageDesc()); } uint32_t getMipOffset(Image *image, const size_t *origin) { if (isMipMapped(image) == false) { return 0; } UNRECOVERABLE_IF(origin == nullptr); auto bytesPerPixel = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes; size_t offset{}; auto imageType = image->getImageDesc().image_type; auto lod = findMipLevel(imageType, origin); auto baseWidth = image->getImageDesc().image_width; auto baseHeight = image->getImageDesc().image_height; if (lod) { size_t mipHeight = baseHeight; size_t mipWidth = baseWidth; bool translate = false; if (lod >= 2) { translate = true; mipWidth += std::max(baseWidth >> 2, 1); } for (size_t currentLod = 3; currentLod <= lod; currentLod++) { mipHeight += std::max(baseHeight >> currentLod, 1); mipWidth += std::max(baseWidth >> currentLod, 1); } if (imageType == CL_MEM_OBJECT_IMAGE1D) { offset = mipWidth; } else { offset = baseWidth * mipHeight; if (translate) { offset += std::max(baseWidth >> 1, 1); } } } return static_cast(bytesPerPixel * offset); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/mipmap.h000066400000000000000000000010431363734646600232310ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl.h" #include namespace NEO { class MemObj; class Image; uint32_t getMipLevelOriginIdx(cl_mem_object_type imageType); uint32_t findMipLevel(cl_mem_object_type imageType, const size_t *origin); inline bool isMipMapped(const cl_image_desc &imgDesc) { return (imgDesc.num_mip_levels > 1); } bool isMipMapped(const MemObj *memObj); uint32_t getMipOffset(Image *image, const size_t *origin); } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/neo_driver_version.h000066400000000000000000000005401363734646600256500ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "driver_version.h" #ifdef QTR #undef QTR #endif #ifdef TOSTR #undef TOSTR #endif #define QTR(a) #a #define TOSTR(b) QTR(b) namespace NEO { constexpr const char *driverVersion = TOSTR(NEO_OCL_DRIVER_VERSION); } #undef QTR #undef TOSTR compute-runtime-20.13.16352/opencl/source/helpers/per_thread_data.cpp000066400000000000000000000041661363734646600254200ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/per_thread_data.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/helpers/debug_helpers.h" #include namespace NEO { size_t PerThreadDataHelper::sendPerThreadData( LinearStream &indirectHeap, uint32_t simd, uint32_t grfSize, uint32_t numChannels, const size_t localWorkSizes[3], const std::array &workgroupWalkOrder, bool hasKernelOnlyImages) { auto offsetPerThreadData = indirectHeap.getUsed(); if (numChannels) { auto localWorkSize = localWorkSizes[0] * localWorkSizes[1] * localWorkSizes[2]; auto sizePerThreadDataTotal = getPerThreadDataSizeTotal(simd, grfSize, numChannels, localWorkSize); auto pDest = indirectHeap.getSpace(sizePerThreadDataTotal); // Generate local IDs DEBUG_BREAK_IF(numChannels != 3); generateLocalIDs(pDest, static_cast(simd), std::array{{static_cast(localWorkSizes[0]), static_cast(localWorkSizes[1]), static_cast(localWorkSizes[2])}}, std::array{{workgroupWalkOrder[0], workgroupWalkOrder[1], workgroupWalkOrder[2]}}, hasKernelOnlyImages, grfSize); } return offsetPerThreadData; } uint32_t PerThreadDataHelper::getThreadPayloadSize(const iOpenCL::SPatchThreadPayload &threadPayload, uint32_t simd, uint32_t grfSize) { uint32_t multiplier = static_cast(getGRFsPerThread(simd, grfSize)); uint32_t threadPayloadSize = 0; threadPayloadSize = getNumLocalIdChannels(threadPayload) * multiplier * grfSize; threadPayloadSize += (threadPayload.HeaderPresent) ? grfSize : 0; threadPayloadSize += (threadPayload.LocalIDFlattenedPresent) ? (grfSize * multiplier) : 0; threadPayloadSize += (threadPayload.UnusedPerThreadConstantPresent) ? grfSize : 0; return threadPayloadSize; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/per_thread_data.h000066400000000000000000000027051363734646600250620ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/command_queue/local_id_gen.h" #include "patch_shared.h" #include #include #include namespace NEO { class LinearStream; struct PerThreadDataHelper { static inline uint32_t getLocalIdSizePerThread( uint32_t simd, uint32_t grfSize, uint32_t numChannels) { return getPerThreadSizeLocalIDs(simd, grfSize, numChannels); } static inline size_t getPerThreadDataSizeTotal( uint32_t simd, uint32_t grfSize, uint32_t numChannels, size_t localWorkSize) { return getThreadsPerWG(simd, localWorkSize) * getLocalIdSizePerThread(simd, grfSize, numChannels); } static size_t sendPerThreadData( LinearStream &indirectHeap, uint32_t simd, uint32_t grfSize, uint32_t numChannels, const size_t localWorkSizes[3], const std::array &workgroupWalkOrder, bool hasKernelOnlyImages); static inline uint32_t getNumLocalIdChannels(const iOpenCL::SPatchThreadPayload &threadPayload) { return threadPayload.LocalIDXPresent + threadPayload.LocalIDYPresent + threadPayload.LocalIDZPresent; } static uint32_t getThreadPayloadSize(const iOpenCL::SPatchThreadPayload &threadPayload, uint32_t simd, uint32_t grfSize); }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/properties_helper.cpp000066400000000000000000000063101363734646600260360ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/properties_helper.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/helpers/mipmap.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/mem_obj/mem_obj.h" namespace NEO { void EventsRequest::fillCsrDependencies(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr, CsrDependencies::DependenciesType depsType) const { for (cl_uint i = 0; i < this->numEventsInWaitList; i++) { auto event = castToObjectOrAbort(this->eventWaitList[i]); if (event->isUserEvent()) { continue; } auto timestampPacketContainer = event->getTimestampPacketNodes(); if (!timestampPacketContainer || timestampPacketContainer->peekNodes().empty()) { continue; } auto sameCsr = (&event->getCommandQueue()->getGpgpuCommandStreamReceiver() == ¤tCsr); bool pushDependency = (CsrDependencies::DependenciesType::OnCsr == depsType && sameCsr) || (CsrDependencies::DependenciesType::OutOfCsr == depsType && !sameCsr) || (CsrDependencies::DependenciesType::All == depsType); if (pushDependency) { csrDeps.push_back(timestampPacketContainer); } } } TransferProperties::TransferProperties(MemObj *memObj, cl_command_type cmdType, cl_map_flags mapFlags, bool blocking, size_t *offsetPtr, size_t *sizePtr, void *ptr, bool doTransferOnCpu) : memObj(memObj), ptr(ptr), cmdType(cmdType), mapFlags(mapFlags), blocking(blocking), doTransferOnCpu(doTransferOnCpu) { // no size or offset passed for unmap operation if (cmdType != CL_COMMAND_UNMAP_MEM_OBJECT) { if (memObj->peekClMemObjType() == CL_MEM_OBJECT_BUFFER) { size[0] = *sizePtr; offset[0] = *offsetPtr; if (doTransferOnCpu && (false == MemoryPool::isSystemMemoryPool(memObj->getGraphicsAllocation()->getMemoryPool())) && (memObj->getMemoryManager() != nullptr)) { this->lockedPtr = memObj->getMemoryManager()->lockResource(memObj->getGraphicsAllocation()); } } else { size = {{sizePtr[0], sizePtr[1], sizePtr[2]}}; offset = {{offsetPtr[0], offsetPtr[1], offsetPtr[2]}}; if (isMipMapped(memObj)) { // decompose origin to coordinates and miplevel mipLevel = findMipLevel(memObj->peekClMemObjType(), offsetPtr); mipPtrOffset = getMipOffset(castToObjectOrAbort(memObj), offsetPtr); auto mipLevelIdx = getMipLevelOriginIdx(memObj->peekClMemObjType()); if (mipLevelIdx < offset.size()) { offset[mipLevelIdx] = 0; } } } } } void *TransferProperties::getCpuPtrForReadWrite() { return ptrOffset(lockedPtr ? ptrOffset(lockedPtr, memObj->getOffset()) : memObj->getCpuAddressForMemoryTransfer(), offset[0]); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/properties_helper.h000066400000000000000000000040321363734646600255020ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/csr_deps.h" #include "shared/source/command_stream/queue_throttle.h" #include "opencl/source/api/cl_types.h" #include #include namespace NEO { class MemObj; class Buffer; struct EventsRequest { EventsRequest() = delete; EventsRequest(cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *outEvent) : numEventsInWaitList(numEventsInWaitList), eventWaitList(eventWaitList), outEvent(outEvent) {} void fillCsrDependencies(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr, CsrDependencies::DependenciesType depsType) const; cl_uint numEventsInWaitList; const cl_event *eventWaitList; cl_event *outEvent; }; using MemObjSizeArray = std::array; using MemObjOffsetArray = std::array; using MemObjsForAuxTranslation = std::unordered_set; struct TransferProperties { TransferProperties() = delete; TransferProperties(MemObj *memObj, cl_command_type cmdType, cl_map_flags mapFlags, bool blocking, size_t *offsetPtr, size_t *sizePtr, void *ptr, bool doTransferOnCpu); MemObjOffsetArray offset = {}; MemObjSizeArray size = {}; MemObj *memObj = nullptr; void *ptr = nullptr; void *lockedPtr = nullptr; cl_command_type cmdType = 0; cl_map_flags mapFlags = 0; uint32_t mipLevel = 0; uint32_t mipPtrOffset = 0; bool blocking = false; bool doTransferOnCpu = false; void *getCpuPtrForReadWrite(); }; struct MapInfo { MapInfo() = default; MapInfo(void *ptr, size_t ptrLength, MemObjSizeArray size, MemObjOffsetArray offset, uint32_t mipLevel) : size(size), offset(offset), ptrLength(ptrLength), ptr(ptr), mipLevel(mipLevel) { } MemObjSizeArray size = {}; MemObjOffsetArray offset = {}; size_t ptrLength = 0; void *ptr = nullptr; uint32_t mipLevel = 0; bool readOnly = false; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/queue_helpers.cpp000066400000000000000000000017171363734646600251570ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/queue_helpers.h" namespace NEO { bool isExtraToken(const cl_queue_properties *property) { return false; } bool verifyExtraTokens(ClDevice *&device, Context &context, const cl_queue_properties *properties) { return true; } void CommandQueue::processProperties(const cl_queue_properties *properties) { } void getIntelQueueInfo(CommandQueue *queue, cl_command_queue_info paramName, GetInfoHelper &getInfoHelper, cl_int &retVal) { retVal = CL_INVALID_VALUE; } bool isCommandWithoutKernel(uint32_t commandType) { return ((commandType == CL_COMMAND_BARRIER) || (commandType == CL_COMMAND_MARKER) || (commandType == CL_COMMAND_MIGRATE_MEM_OBJECTS) || (commandType == CL_COMMAND_SVM_MAP) || (commandType == CL_COMMAND_SVM_UNMAP) || (commandType == CL_COMMAND_SVM_FREE)); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/queue_helpers.h000066400000000000000000000110301363734646600246110ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/device/device.h" #include "shared/source/helpers/get_info.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/device_queue/device_queue.h" #include "opencl/source/helpers/get_info_status_mapper.h" namespace NEO { inline void releaseVirtualEvent(CommandQueue &commandQueue) { if (commandQueue.getRefApiCount() == 1) { commandQueue.releaseVirtualEvent(); } } inline void releaseVirtualEvent(DeviceQueue &commandQueue) { } bool isCommandWithoutKernel(uint32_t commandType); template void retainQueue(cl_command_queue commandQueue, cl_int &retVal) { using BaseType = typename QueueType::BaseType; auto queue = castToObject(static_cast(commandQueue)); if (queue) { queue->retain(); retVal = CL_SUCCESS; } } void getIntelQueueInfo(CommandQueue *queue, cl_command_queue_info paramName, GetInfoHelper &getInfoHelper, cl_int &retVal); template void releaseQueue(cl_command_queue commandQueue, cl_int &retVal) { using BaseType = typename QueueType::BaseType; auto queue = castToObject(static_cast(commandQueue)); if (queue) { releaseVirtualEvent(*queue); queue->release(); retVal = CL_SUCCESS; } } template cl_int getQueueInfo(QueueType *queue, cl_command_queue_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { cl_int retVal = CL_SUCCESS; GetInfoHelper getInfoHelper(paramValue, paramValueSize, paramValueSizeRet); switch (paramName) { case CL_QUEUE_CONTEXT: retVal = changeGetInfoStatusToCLResultType(getInfoHelper.set(&queue->getContext())); break; case CL_QUEUE_DEVICE: { Device &device = queue->getDevice(); retVal = changeGetInfoStatusToCLResultType(getInfoHelper.set(device.getSpecializedDevice())); break; } case CL_QUEUE_REFERENCE_COUNT: retVal = changeGetInfoStatusToCLResultType(getInfoHelper.set(queue->getReference())); break; case CL_QUEUE_PROPERTIES: retVal = changeGetInfoStatusToCLResultType(getInfoHelper.set(queue->getCommandQueueProperties())); break; case CL_QUEUE_DEVICE_DEFAULT: retVal = changeGetInfoStatusToCLResultType(getInfoHelper.set(queue->getContext().getDefaultDeviceQueue())); break; case CL_QUEUE_SIZE: if (std::is_same::value) { auto devQ = reinterpret_cast(queue); retVal = changeGetInfoStatusToCLResultType(getInfoHelper.set(devQ->getQueueSize())); break; } retVal = CL_INVALID_VALUE; break; default: if (std::is_same::value) { auto cmdQ = reinterpret_cast(queue); getIntelQueueInfo(cmdQ, paramName, getInfoHelper, retVal); break; } retVal = CL_INVALID_VALUE; break; } return retVal; } template void getQueueInfo(cl_command_queue commandQueue, cl_command_queue_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet, cl_int &retVal) { using BaseType = typename QueueType::BaseType; auto queue = castToObject(static_cast(commandQueue)); if (queue) { retVal = getQueueInfo(queue, paramName, paramValueSize, paramValue, paramValueSizeRet); } } template returnType getCmdQueueProperties(const cl_queue_properties *properties, cl_queue_properties propertyName = CL_QUEUE_PROPERTIES) { returnType retVal = 0; while (properties != nullptr && *properties != 0) { if (*properties == propertyName) { ++properties; retVal = static_cast(*properties); return retVal; } ++properties; } return retVal; } bool isExtraToken(const cl_queue_properties *property); bool verifyExtraTokens(ClDevice *&device, Context &context, const cl_queue_properties *properties); } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/sampler_helpers.h000066400000000000000000000026011363734646600251340ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl.h" #include // It's max SSH size per kernel (MAX_BINDING_TABLE_INDEX * 64) const uint32_t SAMPLER_OBJECT_ID_SHIFT = 253 * 64; // Sampler Patch Token Enums enum SAMPLER_PATCH_ENUM { CLK_DEFAULT_SAMPLER = 0x00, CLK_ADDRESS_NONE = 0x00, CLK_ADDRESS_CLAMP = 0x01, CLK_ADDRESS_CLAMP_TO_EDGE = 0x02, CLK_ADDRESS_REPEAT = 0x03, CLK_ADDRESS_MIRRORED_REPEAT = 0x04, CLK_ADDRESS_MIRRORED_REPEAT_101 = 0x05, CLK_NORMALIZED_COORDS_FALSE = 0x00, CLK_NORMALIZED_COORDS_TRUE = 0x08, CLK_FILTER_NEAREST = 0x00, CLK_FILTER_LINEAR = 0x00, }; inline SAMPLER_PATCH_ENUM GetAddrModeEnum(cl_addressing_mode addressingMode) { switch (addressingMode) { case CL_ADDRESS_REPEAT: return CLK_ADDRESS_REPEAT; case CL_ADDRESS_CLAMP_TO_EDGE: return CLK_ADDRESS_CLAMP_TO_EDGE; case CL_ADDRESS_CLAMP: return CLK_ADDRESS_CLAMP; case CL_ADDRESS_NONE: return CLK_ADDRESS_NONE; case CL_ADDRESS_MIRRORED_REPEAT: return CLK_ADDRESS_MIRRORED_REPEAT; } return CLK_ADDRESS_NONE; } inline SAMPLER_PATCH_ENUM GetNormCoordsEnum(cl_bool normalizedCoords) { if (normalizedCoords == CL_TRUE) { return CLK_NORMALIZED_COORDS_TRUE; } else { return CLK_NORMALIZED_COORDS_FALSE; } } compute-runtime-20.13.16352/opencl/source/helpers/string_helpers.h000066400000000000000000000027561363734646600250120ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/utilities/stackvec.h" #include "CL/cl.h" #include #include #include const int maximalStackSizeSizes = 16; inline int createCombinedString( std::string &dstString, size_t &dstStringSizeInBytes, uint32_t numStrings, const char **strings, const size_t *lengths) { int retVal = CL_SUCCESS; if (numStrings == 0 || strings == nullptr) { retVal = CL_INVALID_VALUE; } using SourceSizesT = StackVec; SourceSizesT localSizes; if (retVal == CL_SUCCESS) { localSizes.resize(numStrings); dstStringSizeInBytes = 1; for (uint32_t i = 0; i < numStrings; i++) { if (strings[i] == nullptr) { retVal = CL_INVALID_VALUE; break; } if ((lengths == nullptr) || (lengths[i] == 0)) { localSizes[i] = strlen((const char *)strings[i]); } else { localSizes[i] = lengths[i]; } dstStringSizeInBytes += localSizes[i]; } } if (retVal == CL_SUCCESS) { dstString.reserve(dstStringSizeInBytes); for (uint32_t i = 0; i < numStrings; i++) { dstString.append(strings[i], localSizes[i]); } // add the null terminator dstString += '\0'; } return retVal; } compute-runtime-20.13.16352/opencl/source/helpers/surface_formats.cpp000066400000000000000000000311551363734646600254730ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "surface_formats.h" #include "shared/source/gmm_helper/gmm_lib.h" #include "shared/source/helpers/array_count.h" #include "opencl/source/api/cl_types.h" #include "opencl/source/mem_obj/image.h" #include "validators.h" namespace NEO { // clang-format off #define COMMONFORMATS \ {{CL_RGBA, CL_UNORM_INT8}, {GMM_FORMAT_R8G8B8A8_UNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_UNORM , 0, 4, 1, 4}}, \ {{CL_RGBA, CL_UNORM_INT16}, {GMM_FORMAT_R16G16B16A16_UNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UNORM , 0, 4, 2, 8}}, \ {{CL_RGBA, CL_SIGNED_INT8}, {GMM_FORMAT_R8G8B8A8_SINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_SINT , 0, 4, 1, 4}}, \ {{CL_RGBA, CL_SIGNED_INT16}, {GMM_FORMAT_R16G16B16A16_SINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_SINT , 0, 4, 2, 8}}, \ {{CL_RGBA, CL_SIGNED_INT32}, {GMM_FORMAT_R32G32B32A32_SINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_SINT , 0, 4, 4, 16}}, \ {{CL_RGBA, CL_UNSIGNED_INT8}, {GMM_FORMAT_R8G8B8A8_UINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_UINT , 0, 4, 1, 4}}, \ {{CL_RGBA, CL_UNSIGNED_INT16}, {GMM_FORMAT_R16G16B16A16_UINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UINT , 0, 4, 2, 8}}, \ {{CL_RGBA, CL_UNSIGNED_INT32}, {GMM_FORMAT_R32G32B32A32_UINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_UINT , 0, 4, 4, 16}}, \ {{CL_RGBA, CL_HALF_FLOAT}, {GMM_FORMAT_R16G16B16A16_FLOAT_TYPE, GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_FLOAT , 0, 4, 2, 8}}, \ {{CL_RGBA, CL_FLOAT}, {GMM_FORMAT_R32G32B32A32_FLOAT_TYPE, GFX3DSTATE_SURFACEFORMAT_R32G32B32A32_FLOAT , 0, 4, 4, 16}}, \ {{CL_BGRA, CL_UNORM_INT8}, {GMM_FORMAT_B8G8R8A8_UNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_B8G8R8A8_UNORM , 0, 4, 1, 4}}, \ {{CL_R, CL_FLOAT}, {GMM_FORMAT_R32_FLOAT_TYPE, GFX3DSTATE_SURFACEFORMAT_R32_FLOAT , 0, 1, 4, 4}}, \ {{CL_R, CL_UNORM_INT8}, {GMM_FORMAT_R8_UNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R8_UNORM , 0, 1, 1, 1}}, \ {{CL_R, CL_UNORM_INT16}, {GMM_FORMAT_R16_UNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R16_UNORM , 0, 1, 2, 2}}, \ {{CL_R, CL_SIGNED_INT8}, {GMM_FORMAT_R8_SINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R8_SINT , 0, 1, 1, 1}}, \ {{CL_R, CL_SIGNED_INT16}, {GMM_FORMAT_R16_SINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R16_SINT , 0, 1, 2, 2}}, \ {{CL_R, CL_SIGNED_INT32}, {GMM_FORMAT_R32_SINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R32_SINT , 0, 1, 4, 4}}, \ {{CL_R, CL_UNSIGNED_INT8}, {GMM_FORMAT_R8_UINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R8_UINT , 0, 1, 1, 1}}, \ {{CL_R, CL_UNSIGNED_INT16}, {GMM_FORMAT_R16_UINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R16_UINT , 0, 1, 2, 2}}, \ {{CL_R, CL_UNSIGNED_INT32}, {GMM_FORMAT_R32_UINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R32_UINT , 0, 1, 4, 4}}, \ {{CL_R, CL_HALF_FLOAT}, {GMM_FORMAT_R16_FLOAT_TYPE, GFX3DSTATE_SURFACEFORMAT_R16_FLOAT , 0, 1, 2, 2}}, \ {{CL_A, CL_UNORM_INT8}, {GMM_FORMAT_A8_UNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_A8_UNORM , 0, 1, 1, 1}}, \ {{CL_RG, CL_UNORM_INT8}, {GMM_FORMAT_R8G8_UNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R8G8_UNORM , 0, 2, 1, 2}}, \ {{CL_RG, CL_UNORM_INT16}, {GMM_FORMAT_R16G16_UNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R16G16_UNORM , 0, 2, 2, 4}}, \ {{CL_RG, CL_SIGNED_INT8}, {GMM_FORMAT_R8G8_SINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R8G8_SINT , 0, 2, 1, 2}}, \ {{CL_RG, CL_SIGNED_INT16}, {GMM_FORMAT_R16G16_SINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R16G16_SINT , 0, 2, 2, 4}}, \ {{CL_RG, CL_SIGNED_INT32}, {GMM_FORMAT_R32G32_SINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R32G32_SINT , 0, 2, 4, 8}}, \ {{CL_RG, CL_UNSIGNED_INT8}, {GMM_FORMAT_R8G8_UINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R8G8_UINT , 0, 2, 1, 2}}, \ {{CL_RG, CL_UNSIGNED_INT16}, {GMM_FORMAT_R16G16_UINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R16G16_UINT , 0, 2, 2, 4}}, \ {{CL_RG, CL_UNSIGNED_INT32}, {GMM_FORMAT_R32G32_UINT_TYPE, GFX3DSTATE_SURFACEFORMAT_R32G32_UINT , 0, 2, 4, 8}}, \ {{CL_RG, CL_HALF_FLOAT}, {GMM_FORMAT_R16G16_FLOAT_TYPE, GFX3DSTATE_SURFACEFORMAT_R16G16_FLOAT , 0, 2, 2, 4}}, \ {{CL_RG, CL_FLOAT}, {GMM_FORMAT_R32G32_FLOAT_TYPE, GFX3DSTATE_SURFACEFORMAT_R32G32_FLOAT , 0, 2, 4, 8}}, \ {{CL_LUMINANCE, CL_UNORM_INT8}, {GMM_FORMAT_GENERIC_8BIT, GFX3DSTATE_SURFACEFORMAT_R8_UNORM , 0, 1, 1, 1}}, \ {{CL_LUMINANCE, CL_UNORM_INT16}, {GMM_FORMAT_GENERIC_16BIT, GFX3DSTATE_SURFACEFORMAT_R16_UNORM , 0, 1, 2, 2}}, \ {{CL_LUMINANCE, CL_HALF_FLOAT}, {GMM_FORMAT_GENERIC_16BIT, GFX3DSTATE_SURFACEFORMAT_R16_FLOAT , 0, 1, 2, 2}}, \ {{CL_LUMINANCE, CL_FLOAT}, {GMM_FORMAT_GENERIC_32BIT, GFX3DSTATE_SURFACEFORMAT_R32_FLOAT , 0, 1, 4, 4}}, \ {{CL_R, CL_SNORM_INT8}, {GMM_FORMAT_R8_SNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R8_SNORM , 0, 1, 1, 1}}, \ {{CL_R, CL_SNORM_INT16}, {GMM_FORMAT_R16_SNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R16_SNORM , 0, 1, 2, 2}}, \ {{CL_RG, CL_SNORM_INT8}, {GMM_FORMAT_R8G8_SNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R8G8_SNORM , 0, 2, 1, 2}}, \ {{CL_RG, CL_SNORM_INT16}, {GMM_FORMAT_R16G16_SNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R16G16_SNORM , 0, 2, 2, 4}}, \ {{CL_RGBA, CL_SNORM_INT8}, {GMM_FORMAT_R8G8B8A8_SNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_SNORM , 0, 4, 1, 4}}, \ {{CL_RGBA, CL_SNORM_INT16}, {GMM_FORMAT_R16G16B16A16_SNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_SNORM , 0, 4, 2, 8}} #define READONLYFORMATS \ {{CL_INTENSITY, CL_UNORM_INT8}, {GMM_FORMAT_GENERIC_8BIT, GFX3DSTATE_SURFACEFORMAT_I8_UNORM , 0, 1, 1, 1}}, \ {{CL_INTENSITY, CL_UNORM_INT16}, {GMM_FORMAT_GENERIC_16BIT, GFX3DSTATE_SURFACEFORMAT_I16_UNORM , 0, 1, 2, 2}}, \ {{CL_INTENSITY, CL_HALF_FLOAT}, {GMM_FORMAT_GENERIC_16BIT, GFX3DSTATE_SURFACEFORMAT_I16_FLOAT , 0, 1, 2, 2}}, \ {{CL_INTENSITY, CL_FLOAT}, {GMM_FORMAT_GENERIC_32BIT, GFX3DSTATE_SURFACEFORMAT_I32_FLOAT , 0, 1, 4, 4}}, \ {{CL_A, CL_UNORM_INT16}, {GMM_FORMAT_GENERIC_16BIT, GFX3DSTATE_SURFACEFORMAT_A16_UNORM , 0, 1, 2, 2}}, \ {{CL_A, CL_HALF_FLOAT}, {GMM_FORMAT_GENERIC_16BIT, GFX3DSTATE_SURFACEFORMAT_A16_FLOAT , 0, 1, 2, 2}}, \ {{CL_A, CL_FLOAT}, {GMM_FORMAT_GENERIC_32BIT, GFX3DSTATE_SURFACEFORMAT_A32_FLOAT , 0, 1, 4, 4}} #define SRGBFORMATS \ {{CL_sRGBA, CL_UNORM_INT8}, {GMM_FORMAT_R8G8B8A8_UNORM_SRGB_TYPE, GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB , 0, 4, 1, 4}}, \ {{CL_sBGRA, CL_UNORM_INT8}, {GMM_FORMAT_B8G8R8A8_UNORM_SRGB_TYPE, GFX3DSTATE_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB , 0, 4, 1, 4}} #define DEPTHFORMATS \ {{ CL_DEPTH, CL_FLOAT}, {GMM_FORMAT_R32_FLOAT_TYPE, GFX3DSTATE_SURFACEFORMAT_R32_FLOAT , 0, 1, 4, 4}}, \ {{ CL_DEPTH, CL_UNORM_INT16}, {GMM_FORMAT_R16_UNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R16_UNORM , 0, 1, 2, 2}} #define DEPTHSTENCILFORMATS \ {{ CL_DEPTH_STENCIL, CL_UNORM_INT24}, {GMM_FORMAT_GENERIC_32BIT, GFX3DSTATE_SURFACEFORMAT_R24_UNORM_X8_TYPELESS , 0, 1, 4, 4}}, \ {{ CL_DEPTH_STENCIL, CL_FLOAT}, {GMM_FORMAT_R32G32_FLOAT_TYPE, GFX3DSTATE_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS, 0, 2, 4, 8}} //Initialize this with the required formats first. //Append the optional one later const ClSurfaceFormatInfo SurfaceFormats::readOnlySurfaceFormats12[] = { COMMONFORMATS, READONLYFORMATS }; const ClSurfaceFormatInfo SurfaceFormats::readOnlySurfaceFormats20[] = { COMMONFORMATS, READONLYFORMATS, SRGBFORMATS }; const ClSurfaceFormatInfo SurfaceFormats::writeOnlySurfaceFormats[] = { COMMONFORMATS }; const ClSurfaceFormatInfo SurfaceFormats::readWriteSurfaceFormats[] = { COMMONFORMATS }; const ClSurfaceFormatInfo SurfaceFormats::packedYuvSurfaceFormats[] = { {{CL_YUYV_INTEL, CL_UNORM_INT8}, {GMM_FORMAT_YUY2, GFX3DSTATE_SURFACEFORMAT_YCRCB_NORMAL , 0, 2, 1, 2}}, {{CL_UYVY_INTEL, CL_UNORM_INT8}, {GMM_FORMAT_UYVY, GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPY , 0, 2, 1, 2}}, {{CL_YVYU_INTEL, CL_UNORM_INT8}, {GMM_FORMAT_YVYU, GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPUV , 0, 2, 1, 2}}, {{CL_VYUY_INTEL, CL_UNORM_INT8}, {GMM_FORMAT_VYUY, GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPUVY , 0, 2, 1, 2}} }; const ClSurfaceFormatInfo SurfaceFormats::planarYuvSurfaceFormats[] = { {{CL_NV12_INTEL, CL_UNORM_INT8}, {GMM_FORMAT_NV12, GFX3DSTATE_SURFACEFORMAT_NV12 , 0, 1, 1, 1}} }; const ClSurfaceFormatInfo SurfaceFormats::readOnlyDepthSurfaceFormats[] = { DEPTHFORMATS, DEPTHSTENCILFORMATS }; const ClSurfaceFormatInfo SurfaceFormats::readWriteDepthSurfaceFormats[] = { DEPTHFORMATS }; ArrayRef SurfaceFormats::readOnly12() noexcept { return ArrayRef(readOnlySurfaceFormats12); } ArrayRef SurfaceFormats::readOnly20() noexcept { return ArrayRef(readOnlySurfaceFormats20); } ArrayRef SurfaceFormats::writeOnly() noexcept { return ArrayRef(writeOnlySurfaceFormats); } ArrayRef SurfaceFormats::readWrite() noexcept { return ArrayRef(readWriteSurfaceFormats); } ArrayRef SurfaceFormats::packedYuv() noexcept { return ArrayRef(packedYuvSurfaceFormats); } ArrayRef SurfaceFormats::planarYuv() noexcept { return ArrayRef(planarYuvSurfaceFormats); } ArrayRef SurfaceFormats::readOnlyDepth() noexcept { return ArrayRef(readOnlyDepthSurfaceFormats); } ArrayRef SurfaceFormats::readWriteDepth() noexcept { return ArrayRef(readWriteDepthSurfaceFormats); } ArrayRef SurfaceFormats::surfaceFormats(cl_mem_flags flags, unsigned int clVersionSupport) noexcept { if (flags & CL_MEM_READ_ONLY) { if(clVersionSupport >= 20 ) { return readOnly20(); } else { return readOnly12(); } } else if (flags & CL_MEM_WRITE_ONLY) { return writeOnly(); } else { return readWrite(); } } ArrayRef SurfaceFormats::surfaceFormats(cl_mem_flags flags, const cl_image_format *imageFormat, unsigned int clVersionSupport) noexcept { if (NEO::IsNV12Image(imageFormat)) { return planarYuv(); } else if (IsPackedYuvImage(imageFormat)) { return packedYuv(); } else if (Image::isDepthFormat(*imageFormat)) { if (flags & CL_MEM_READ_ONLY) { return readOnlyDepth(); } else { return readWriteDepth(); } } else if (flags & CL_MEM_READ_ONLY) { if(clVersionSupport >= 20 ) { return readOnly20(); } else { return readOnly12(); } } else if (flags & CL_MEM_WRITE_ONLY) { return writeOnly(); } else { return readWrite(); } } // clang-format on } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/surface_formats.h000066400000000000000000000034511363734646600251360ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/gmm_helper/gmm_lib.h" #include "shared/source/helpers/surface_format_info.h" #include "shared/source/utilities/arrayref.h" #include "CL/cl.h" namespace NEO { struct ClSurfaceFormatInfo { cl_image_format OCLImageFormat; SurfaceFormatInfo surfaceFormat; }; class SurfaceFormats { private: static const ClSurfaceFormatInfo readOnlySurfaceFormats12[]; static const ClSurfaceFormatInfo readOnlySurfaceFormats20[]; static const ClSurfaceFormatInfo writeOnlySurfaceFormats[]; static const ClSurfaceFormatInfo readWriteSurfaceFormats[]; static const ClSurfaceFormatInfo readOnlyDepthSurfaceFormats[]; static const ClSurfaceFormatInfo readWriteDepthSurfaceFormats[]; static const ClSurfaceFormatInfo packedYuvSurfaceFormats[]; static const ClSurfaceFormatInfo planarYuvSurfaceFormats[]; public: static ArrayRef readOnly12() noexcept; static ArrayRef readOnly20() noexcept; static ArrayRef writeOnly() noexcept; static ArrayRef readWrite() noexcept; static ArrayRef packedYuv() noexcept; static ArrayRef planarYuv() noexcept; static ArrayRef readOnlyDepth() noexcept; static ArrayRef readWriteDepth() noexcept; static ArrayRef surfaceFormats(cl_mem_flags flags, unsigned int clVersionSupport) noexcept; static ArrayRef surfaceFormats(cl_mem_flags flags, const cl_image_format *imageFormat, unsigned int clVersionSupport) noexcept; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/task_information.cpp000066400000000000000000000514471363734646600256650ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/task_information.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/csr_deps.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/surface.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/command_queue/enqueue_common.h" #include "opencl/source/device_queue/device_queue.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/helpers/enqueue_properties.h" #include "opencl/source/helpers/task_information.inl" #include "opencl/source/mem_obj/mem_obj.h" namespace NEO { template void KernelOperation::ResourceCleaner::operator()(LinearStream *); template void KernelOperation::ResourceCleaner::operator()(IndirectHeap *); CommandMapUnmap::CommandMapUnmap(MapOperationType operationType, MemObj &memObj, MemObjSizeArray ©Size, MemObjOffsetArray ©Offset, bool readOnly, CommandQueue &commandQueue) : Command(commandQueue), memObj(memObj), copySize(copySize), copyOffset(copyOffset), readOnly(readOnly), operationType(operationType) { memObj.incRefInternal(); } CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) { if (terminated) { memObj.decRefInternal(); return completionStamp; } auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver(); auto commandStreamReceiverOwnership = commandStreamReceiver.obtainUniqueOwnership(); auto &queueCommandStream = commandQueue.getCS(0); size_t offset = queueCommandStream.getUsed(); MultiDispatchInfo multiDispatch; Device &device = commandQueue.getDevice(); DispatchFlags dispatchFlags( {}, //csrDependencies nullptr, //barrierTimestampPacketNodes {}, //pipelineSelectArgs commandQueue.flushStamp->getStampReference(), //flushStampReference commandQueue.getThrottle(), //throttle PreemptionHelper::taskPreemptionMode(device, multiDispatch), //preemptionMode GrfConfig::DefaultGrfNumber, //numGrfRequired L3CachingSettings::l3CacheOn, //l3CacheSettings ThreadArbitrationPolicy::NotPresent, //threadArbitrationPolicy commandQueue.getSliceCount(), //sliceCount true, //blocking true, //dcFlush false, //useSLM true, //guardCommandBufferWithPipeControl false, //GSBA32BitRequired false, //requiresCoherency commandQueue.getPriority() == QueuePriority::LOW, //lowPriority false, //implicitFlush commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed false, //epilogueRequired false //usePerDssBackedBuffer ); DEBUG_BREAK_IF(taskLevel >= CompletionStamp::levelNotReady); gtpinNotifyPreFlushTask(&commandQueue); completionStamp = commandStreamReceiver.flushTask(queueCommandStream, offset, commandQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u), commandQueue.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u), commandQueue.getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u), taskLevel, dispatchFlags, commandQueue.getDevice()); if (!memObj.isMemObjZeroCopy()) { commandQueue.waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp, false); if (operationType == MAP) { memObj.transferDataToHostPtr(copySize, copyOffset); } else if (!readOnly) { DEBUG_BREAK_IF(operationType != UNMAP); memObj.transferDataFromHostPtr(copySize, copyOffset); } } memObj.decRefInternal(); return completionStamp; } CommandComputeKernel::CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr &kernelOperation, std::vector &surfaces, bool flushDC, bool usesSLM, bool ndRangeKernel, std::unique_ptr printfHandler, PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount) : Command(commandQueue, kernelOperation), flushDC(flushDC), slmUsed(usesSLM), NDRangeKernel(ndRangeKernel), printfHandler(std::move(printfHandler)), kernel(kernel), kernelCount(kernelCount), preemptionMode(preemptionMode) { for (auto surface : surfaces) { this->surfaces.push_back(surface); } UNRECOVERABLE_IF(nullptr == this->kernel); kernel->incRefInternal(); } CommandComputeKernel::~CommandComputeKernel() { kernel->decRefInternal(); } CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminated) { if (terminated) { for (auto surface : surfaces) { delete surface; } surfaces.clear(); return completionStamp; } auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver(); bool executionModelKernel = kernel->isParentKernel; auto devQueue = commandQueue.getContext().getDefaultDeviceQueue(); auto commandStreamReceiverOwnership = commandStreamReceiver.obtainUniqueOwnership(); bool isCcsUsed = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType()); if (executionModelKernel) { while (!devQueue->isEMCriticalSectionFree()) ; devQueue->resetDeviceQueue(); devQueue->acquireEMCriticalSection(); } IndirectHeap *dsh = kernelOperation->dsh.get(); IndirectHeap *ioh = kernelOperation->ioh.get(); IndirectHeap *ssh = kernelOperation->ssh.get(); auto requiresCoherency = false; auto anyUncacheableArgs = false; for (auto &surface : surfaces) { DEBUG_BREAK_IF(!surface); surface->makeResident(commandStreamReceiver); requiresCoherency |= surface->IsCoherent; if (!surface->allowsL3Caching()) { anyUncacheableArgs = true; } } if (printfHandler) { printfHandler.get()->makeResident(commandStreamReceiver); } makeTimestampPacketsResident(commandStreamReceiver); if (executionModelKernel) { uint32_t taskCount = commandStreamReceiver.peekTaskCount() + 1; devQueue->setupExecutionModelDispatch(*ssh, *dsh, kernel, kernelCount, commandStreamReceiver.getTagAllocation()->getGpuAddress(), taskCount, timestamp, isCcsUsed); SchedulerKernel &scheduler = commandQueue.getContext().getSchedulerKernel(); scheduler.setArgs(devQueue->getQueueBuffer(), devQueue->getStackBuffer(), devQueue->getEventPoolBuffer(), devQueue->getSlbBuffer(), dsh->getGraphicsAllocation(), kernel->getKernelReflectionSurface(), devQueue->getQueueStorageBuffer(), ssh->getGraphicsAllocation(), devQueue->getDebugQueue()); devQueue->dispatchScheduler( *kernelOperation->commandStream, scheduler, preemptionMode, ssh, dsh, isCcsUsed); scheduler.makeResident(commandStreamReceiver); // Update SLM usage slmUsed |= scheduler.slmTotalSize > 0; this->kernel->getProgram()->getBlockKernelManager()->makeInternalAllocationsResident(commandStreamReceiver); } if (kernelOperation->blitPropertiesContainer.size() > 0) { auto &bcsCsr = *commandQueue.getBcsCommandStreamReceiver(); CsrDependencies csrDeps; eventsRequest.fillCsrDependencies(csrDeps, bcsCsr, CsrDependencies::DependenciesType::All); BlitProperties::setupDependenciesForAuxTranslation(kernelOperation->blitPropertiesContainer, *timestampPacketDependencies, *currentTimestampPacketNodes, csrDeps, commandQueue.getGpgpuCommandStreamReceiver(), bcsCsr); auto bcsTaskCount = bcsCsr.blitBuffer(kernelOperation->blitPropertiesContainer, false); commandQueue.updateBcsTaskCount(bcsTaskCount); } DispatchFlags dispatchFlags( {}, //csrDependencies nullptr, //barrierTimestampPacketNodes {false, kernel->isVmeKernel()}, //pipelineSelectArgs commandQueue.flushStamp->getStampReference(), //flushStampReference commandQueue.getThrottle(), //throttle preemptionMode, //preemptionMode kernel->getKernelInfo().patchInfo.executionEnvironment->NumGRFRequired, //numGrfRequired L3CachingSettings::l3CacheOn, //l3CacheSettings kernel->getThreadArbitrationPolicy(), //threadArbitrationPolicy commandQueue.getSliceCount(), //sliceCount true, //blocking flushDC, //dcFlush slmUsed, //useSLM true, //guardCommandBufferWithPipeControl NDRangeKernel, //GSBA32BitRequired requiresCoherency, //requiresCoherency commandQueue.getPriority() == QueuePriority::LOW, //lowPriority false, //implicitFlush commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed false, //epilogueRequired kernel->requiresPerDssBackedBuffer() //usePerDssBackedBuffer ); if (timestampPacketDependencies) { eventsRequest.fillCsrDependencies(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr); dispatchFlags.barrierTimestampPacketNodes = ×tampPacketDependencies->barrierNodes; } dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode = kernel->requiresSpecialPipelineSelectMode(); if (anyUncacheableArgs) { dispatchFlags.l3CacheSettings = L3CachingSettings::l3CacheOff; } else if (!kernel->areStatelessWritesUsed()) { dispatchFlags.l3CacheSettings = L3CachingSettings::l3AndL1On; } if (commandQueue.dispatchHints != 0) { dispatchFlags.engineHints = commandQueue.dispatchHints; dispatchFlags.epilogueRequired = true; } DEBUG_BREAK_IF(taskLevel >= CompletionStamp::levelNotReady); gtpinNotifyPreFlushTask(&commandQueue); completionStamp = commandStreamReceiver.flushTask(*kernelOperation->commandStream, 0, *dsh, *ioh, *ssh, taskLevel, dispatchFlags, commandQueue.getDevice()); if (gtpinIsGTPinInitialized()) { gtpinNotifyFlushTask(completionStamp.taskCount); } if (printfHandler) { commandQueue.waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp, false); printfHandler.get()->printEnqueueOutput(); } for (auto surface : surfaces) { delete surface; } surfaces.clear(); return completionStamp; } void CommandWithoutKernel::dispatchBlitOperation() { auto bcsCsr = commandQueue.getBcsCommandStreamReceiver(); UNRECOVERABLE_IF(bcsCsr == nullptr); UNRECOVERABLE_IF(kernelOperation->blitPropertiesContainer.size() != 1); auto &blitProperties = *kernelOperation->blitPropertiesContainer.begin(); eventsRequest.fillCsrDependencies(blitProperties.csrDependencies, *bcsCsr, CsrDependencies::DependenciesType::All); blitProperties.csrDependencies.push_back(×tampPacketDependencies->cacheFlushNodes); blitProperties.csrDependencies.push_back(×tampPacketDependencies->previousEnqueueNodes); blitProperties.csrDependencies.push_back(×tampPacketDependencies->barrierNodes); blitProperties.outputTimestampPacket = currentTimestampPacketNodes->peekNodes()[0]; auto bcsTaskCount = bcsCsr->blitBuffer(kernelOperation->blitPropertiesContainer, false); commandQueue.updateBcsTaskCount(bcsTaskCount); } CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminated) { if (terminated) { return completionStamp; } auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver(); if (!kernelOperation) { completionStamp.taskCount = commandStreamReceiver.peekTaskCount(); completionStamp.taskLevel = commandStreamReceiver.peekTaskLevel(); completionStamp.flushStamp = commandStreamReceiver.obtainCurrentFlushStamp(); return completionStamp; } auto lockCSR = commandStreamReceiver.obtainUniqueOwnership(); if (kernelOperation->blitEnqueue) { if (commandStreamReceiver.isStallingPipeControlOnNextFlushRequired()) { timestampPacketDependencies->barrierNodes.add(commandStreamReceiver.getTimestampPacketAllocator()->getTag()); } dispatchBlitOperation(); } DispatchFlags dispatchFlags( {}, //csrDependencies ×tampPacketDependencies->barrierNodes, //barrierTimestampPacketNodes {}, //pipelineSelectArgs commandQueue.flushStamp->getStampReference(), //flushStampReference commandQueue.getThrottle(), //throttle commandQueue.getDevice().getPreemptionMode(), //preemptionMode GrfConfig::DefaultGrfNumber, //numGrfRequired L3CachingSettings::l3CacheOn, //l3CacheSettings ThreadArbitrationPolicy::NotPresent, //threadArbitrationPolicy commandQueue.getSliceCount(), //sliceCount true, //blocking false, //dcFlush false, //useSLM true, //guardCommandBufferWithPipeControl false, //GSBA32BitRequired false, //requiresCoherency commandQueue.getPriority() == QueuePriority::LOW, //lowPriority false, //implicitFlush commandStreamReceiver.isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed false, //epilogueRequired false //usePerDssBackedBuffer ); UNRECOVERABLE_IF(!commandStreamReceiver.peekTimestampPacketWriteEnabled()); eventsRequest.fillCsrDependencies(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr); makeTimestampPacketsResident(commandStreamReceiver); gtpinNotifyPreFlushTask(&commandQueue); completionStamp = commandStreamReceiver.flushTask(*kernelOperation->commandStream, 0, commandQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u), commandQueue.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u), commandQueue.getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u), taskLevel, dispatchFlags, commandQueue.getDevice()); return completionStamp; } void Command::setEventsRequest(EventsRequest &eventsRequest) { this->eventsRequest = eventsRequest; if (eventsRequest.numEventsInWaitList > 0) { eventsWaitlist.resize(eventsRequest.numEventsInWaitList); auto size = eventsRequest.numEventsInWaitList * sizeof(cl_event); memcpy_s(&eventsWaitlist[0], size, eventsRequest.eventWaitList, size); this->eventsRequest.eventWaitList = &eventsWaitlist[0]; } } void Command::setTimestampPacketNode(TimestampPacketContainer ¤t, TimestampPacketDependencies &&dependencies) { currentTimestampPacketNodes = std::make_unique(); currentTimestampPacketNodes->assignAndIncrementNodesRefCounts(current); timestampPacketDependencies = std::make_unique(); *timestampPacketDependencies = std::move(dependencies); } Command::~Command() { auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver(); if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) { for (cl_event &eventFromWaitList : eventsWaitlist) { auto event = castToObjectOrAbort(eventFromWaitList); event->decRefInternal(); } } } void Command::makeTimestampPacketsResident(CommandStreamReceiver &commandStreamReceiver) { if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) { for (cl_event &eventFromWaitList : eventsWaitlist) { auto event = castToObjectOrAbort(eventFromWaitList); if (event->getTimestampPacketNodes()) { event->getTimestampPacketNodes()->makeResident(commandStreamReceiver); } } } if (currentTimestampPacketNodes) { currentTimestampPacketNodes->makeResident(commandStreamReceiver); } if (timestampPacketDependencies) { timestampPacketDependencies->cacheFlushNodes.makeResident(commandStreamReceiver); timestampPacketDependencies->previousEnqueueNodes.makeResident(commandStreamReceiver); } } Command::Command(CommandQueue &commandQueue) : commandQueue(commandQueue) {} Command::Command(CommandQueue &commandQueue, std::unique_ptr &kernelOperation) : commandQueue(commandQueue), kernelOperation(std::move(kernelOperation)) {} } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/task_information.h000066400000000000000000000122551363734646600253240ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/linear_stream.h" #include "shared/source/helpers/blit_commands_helper.h" #include "shared/source/helpers/completion_stamp.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/utilities/iflist.h" #include "opencl/source/helpers/properties_helper.h" #include #include namespace NEO { class CommandQueue; class CommandStreamReceiver; class InternalAllocationStorage; class Kernel; class MemObj; class Surface; class PrintfHandler; struct HwTimeStamps; class TimestampPacketContainer; template struct TagNode; enum MapOperationType { MAP, UNMAP }; struct KernelOperation { protected: struct ResourceCleaner { ResourceCleaner() = delete; ResourceCleaner(InternalAllocationStorage *storageForAllocations) : storageForAllocations(storageForAllocations){}; template void operator()(ObjectT *object); InternalAllocationStorage *storageForAllocations = nullptr; } resourceCleaner{nullptr}; using LinearStreamUniquePtrT = std::unique_ptr; using IndirectHeapUniquePtrT = std::unique_ptr; public: KernelOperation() = delete; KernelOperation(LinearStream *commandStream, InternalAllocationStorage &storageForAllocations) { resourceCleaner.storageForAllocations = &storageForAllocations; this->commandStream = LinearStreamUniquePtrT(commandStream, resourceCleaner); } void setHeaps(IndirectHeap *dsh, IndirectHeap *ioh, IndirectHeap *ssh) { this->dsh = IndirectHeapUniquePtrT(dsh, resourceCleaner); this->ioh = IndirectHeapUniquePtrT(ioh, resourceCleaner); this->ssh = IndirectHeapUniquePtrT(ssh, resourceCleaner); } ~KernelOperation() { if (ioh.get() == dsh.get()) { ioh.release(); } } LinearStreamUniquePtrT commandStream{nullptr, resourceCleaner}; IndirectHeapUniquePtrT dsh{nullptr, resourceCleaner}; IndirectHeapUniquePtrT ioh{nullptr, resourceCleaner}; IndirectHeapUniquePtrT ssh{nullptr, resourceCleaner}; BlitPropertiesContainer blitPropertiesContainer; bool blitEnqueue = false; size_t surfaceStateHeapSizeEM = 0; }; class Command : public IFNode { public: // returns command's taskCount obtained from completion stamp // as acquired from command stream receiver virtual CompletionStamp &submit(uint32_t taskLevel, bool terminated) = 0; Command() = delete; Command(CommandQueue &commandQueue); Command(CommandQueue &commandQueue, std::unique_ptr &kernelOperation); virtual ~Command(); virtual LinearStream *getCommandStream() { return nullptr; } void setTimestampPacketNode(TimestampPacketContainer ¤t, TimestampPacketDependencies &&dependencies); void setEventsRequest(EventsRequest &eventsRequest); void makeTimestampPacketsResident(CommandStreamReceiver &commandStreamReceiver); TagNode *timestamp = nullptr; CompletionStamp completionStamp = {}; protected: CommandQueue &commandQueue; std::unique_ptr kernelOperation; std::unique_ptr currentTimestampPacketNodes; std::unique_ptr timestampPacketDependencies; EventsRequest eventsRequest = {0, nullptr, nullptr}; std::vector eventsWaitlist; }; class CommandMapUnmap : public Command { public: CommandMapUnmap(MapOperationType operationType, MemObj &memObj, MemObjSizeArray ©Size, MemObjOffsetArray ©Offset, bool readOnly, CommandQueue &commandQueue); ~CommandMapUnmap() override = default; CompletionStamp &submit(uint32_t taskLevel, bool terminated) override; private: MemObj &memObj; MemObjSizeArray copySize; MemObjOffsetArray copyOffset; bool readOnly; MapOperationType operationType; }; class CommandComputeKernel : public Command { public: CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr &kernelOperation, std::vector &surfaces, bool flushDC, bool usesSLM, bool ndRangeKernel, std::unique_ptr printfHandler, PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount); ~CommandComputeKernel() override; CompletionStamp &submit(uint32_t taskLevel, bool terminated) override; LinearStream *getCommandStream() override { return kernelOperation->commandStream.get(); } protected: std::vector surfaces; bool flushDC; bool slmUsed; bool NDRangeKernel; std::unique_ptr printfHandler; Kernel *kernel; uint32_t kernelCount; PreemptionMode preemptionMode; }; class CommandWithoutKernel : public Command { public: using Command::Command; CompletionStamp &submit(uint32_t taskLevel, bool terminated) override; void dispatchBlitOperation(); }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/task_information.inl000066400000000000000000000007331363734646600256550ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/task_information.h" namespace NEO { template void KernelOperation::ResourceCleaner::operator()(ObjectT *object) { storageForAllocations->storeAllocation(std::unique_ptr(object->getGraphicsAllocation()), REUSABLE_ALLOCATION); delete object; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/uint16_avx2.h000066400000000000000000000060111363734646600240340ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/debug_helpers.h" #include #include namespace NEO { #if __AVX2__ struct uint16x16_t { enum { numChannels = 16 }; __m256i value; uint16x16_t() { value = _mm256_setzero_si256(); } uint16x16_t(__m256i value) : value(value) { } uint16x16_t(uint16_t a) { value = _mm256_set1_epi16(a); //AVX } explicit uint16x16_t(const void *alignedPtr) { load(alignedPtr); } inline uint16_t get(unsigned int element) { DEBUG_BREAK_IF(element >= numChannels); return reinterpret_cast(&value)[element]; } static inline uint16x16_t zero() { return uint16x16_t(static_cast(0u)); } static inline uint16x16_t one() { return uint16x16_t(static_cast(1u)); } static inline uint16x16_t mask() { return uint16x16_t(static_cast(0xffffu)); } inline void load(const void *alignedPtr) { DEBUG_BREAK_IF(!isAligned<32>(alignedPtr)); value = _mm256_load_si256(reinterpret_cast(alignedPtr)); //AVX } inline void loadUnaligned(const void *ptr) { value = _mm256_loadu_si256(reinterpret_cast(ptr)); //AVX } inline void store(void *alignedPtr) { DEBUG_BREAK_IF(!isAligned<32>(alignedPtr)); _mm256_store_si256(reinterpret_cast<__m256i *>(alignedPtr), value); //AVX } inline void storeUnaligned(void *ptr) { _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr), value); //AVX } inline operator bool() const { return _mm256_testz_si256(value, mask().value) ? false : true; //AVX } inline uint16x16_t &operator-=(const uint16x16_t &a) { value = _mm256_sub_epi16(value, a.value); //AVX2 return *this; } inline uint16x16_t &operator+=(const uint16x16_t &a) { value = _mm256_add_epi16(value, a.value); //AVX2 return *this; } inline friend uint16x16_t operator>=(const uint16x16_t &a, const uint16x16_t &b) { uint16x16_t result; result.value = _mm256_xor_si256(mask().value, _mm256_cmpgt_epi16(b.value, a.value)); //AVX2 return result; } inline friend uint16x16_t operator&&(const uint16x16_t &a, const uint16x16_t &b) { uint16x16_t result; result.value = _mm256_and_si256(a.value, b.value); //AVX2 return result; } // NOTE: uint16x16_t::blend behaves like mask ? a : b inline friend uint16x16_t blend(const uint16x16_t &a, const uint16x16_t &b, const uint16x16_t &mask) { uint16x16_t result; // Have to swap arguments to get intended calling semantics result.value = _mm256_blendv_epi8(b.value, a.value, mask.value); //AVX2 return result; } }; #endif // __AVX2__ } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/uint16_sse4.h000066400000000000000000000057121363734646600240410ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/debug_helpers.h" #include #include namespace NEO { struct uint16x8_t { enum { numChannels = 8 }; __m128i value; uint16x8_t() { value = _mm_setzero_si128(); } uint16x8_t(__m128i value) : value(value) { } uint16x8_t(uint16_t a) { value = _mm_set1_epi16(a); //SSE2 } explicit uint16x8_t(const void *alignedPtr) { load(alignedPtr); } inline uint16_t get(unsigned int element) { DEBUG_BREAK_IF(element >= numChannels); return reinterpret_cast(&value)[element]; } static inline uint16x8_t zero() { return uint16x8_t(static_cast(0u)); } static inline uint16x8_t one() { return uint16x8_t(static_cast(1u)); } static inline uint16x8_t mask() { return uint16x8_t(static_cast(0xffffu)); } inline void load(const void *alignedPtr) { DEBUG_BREAK_IF(!isAligned<16>(alignedPtr)); value = _mm_load_si128(reinterpret_cast(alignedPtr)); //SSE2 } inline void loadUnaligned(const void *ptr) { value = _mm_loadu_si128(reinterpret_cast(ptr)); //SSE2 } inline void store(void *alignedPtr) { DEBUG_BREAK_IF(!isAligned<16>(alignedPtr)); _mm_store_si128(reinterpret_cast<__m128i *>(alignedPtr), value); //SSE2 } inline void storeUnaligned(void *ptr) { _mm_storeu_si128(reinterpret_cast<__m128i *>(ptr), value); //SSE2 } inline operator bool() const { return _mm_test_all_zeros(value, mask().value) ? false : true; //SSE4.1 alternatives? } inline uint16x8_t &operator-=(const uint16x8_t &a) { value = _mm_sub_epi16(value, a.value); //SSE2 return *this; } inline uint16x8_t &operator+=(const uint16x8_t &a) { value = _mm_add_epi16(value, a.value); //SSE2 return *this; } inline friend uint16x8_t operator>=(const uint16x8_t &a, const uint16x8_t &b) { uint16x8_t result; result.value = _mm_xor_si128(mask().value, _mm_cmplt_epi16(a.value, b.value)); //SSE2 return result; } inline friend uint16x8_t operator&&(const uint16x8_t &a, const uint16x8_t &b) { uint16x8_t result; result.value = _mm_and_si128(a.value, b.value); //SSE2 return result; } // NOTE: uint16x8_t::blend behaves like mask ? a : b inline friend uint16x8_t blend(const uint16x8_t &a, const uint16x8_t &b, const uint16x8_t &mask) { uint16x8_t result; // Have to swap arguments to get intended calling semantics result.value = _mm_blendv_epi8(b.value, a.value, mask.value); //SSE4.1 alternatives? return result; } }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/validators.cpp000066400000000000000000000103011363734646600244460ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/validators.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/event/event.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/mem_obj.h" #include "opencl/source/platform/platform.h" #include "opencl/source/program/program.h" #include "opencl/source/sampler/sampler.h" namespace NEO { cl_int validateObject(void *ptr) { return ptr != nullptr ? CL_SUCCESS : CL_INVALID_VALUE; } cl_int validateObject(cl_context object) { return castToObject(object) != nullptr ? CL_SUCCESS : CL_INVALID_CONTEXT; } cl_int validateObject(cl_device_id object) { return castToObject(object) != nullptr ? CL_SUCCESS : CL_INVALID_DEVICE; } cl_int validateObject(cl_platform_id object) { return castToObject(object) != nullptr ? CL_SUCCESS : CL_INVALID_PLATFORM; } cl_int validateObject(cl_command_queue object) { return castToObject(object) != nullptr ? CL_SUCCESS : CL_INVALID_COMMAND_QUEUE; } cl_int validateObject(cl_event object) { return castToObject(object) != nullptr ? CL_SUCCESS : CL_INVALID_EVENT; } cl_int validateObject(cl_mem object) { return castToObject(object) != nullptr ? CL_SUCCESS : CL_INVALID_MEM_OBJECT; } cl_int validateObject(cl_sampler object) { return castToObject(object) != nullptr ? CL_SUCCESS : CL_INVALID_SAMPLER; } cl_int validateObject(cl_program object) { return castToObject(object) != nullptr ? CL_SUCCESS : CL_INVALID_PROGRAM; } cl_int validateObject(cl_kernel object) { return castToObject(object) != nullptr ? CL_SUCCESS : CL_INVALID_KERNEL; } cl_int validateObject(const EventWaitList &eventWaitList) { if ((!eventWaitList.first) != (!eventWaitList.second)) return CL_INVALID_EVENT_WAIT_LIST; for (cl_uint i = 0; i < eventWaitList.first; i++) { if (validateObject(eventWaitList.second[i]) != CL_SUCCESS) return CL_INVALID_EVENT_WAIT_LIST; } return CL_SUCCESS; } cl_int validateObject(const DeviceList &deviceList) { if ((!deviceList.first) != (!deviceList.second)) return CL_INVALID_VALUE; for (cl_uint i = 0; i < deviceList.first; i++) { if (validateObject(deviceList.second[i]) != CL_SUCCESS) return CL_INVALID_DEVICE; } return CL_SUCCESS; } cl_int validateObject(const MemObjList &memObjList) { if ((!memObjList.first) != (!memObjList.second)) return CL_INVALID_VALUE; for (cl_uint i = 0; i < memObjList.first; i++) { if (validateObject(memObjList.second[i]) != CL_SUCCESS) return CL_INVALID_MEM_OBJECT; } return CL_SUCCESS; } cl_int validateObject(const NonZeroBufferSize &nzbs) { return nzbs ? CL_SUCCESS : CL_INVALID_BUFFER_SIZE; } cl_int validateObject(const PatternSize &ps) { switch ((cl_int)ps) { case 128: case 64: case 32: case 16: case 8: case 4: case 2: case 1: return CL_SUCCESS; default: break; } return CL_INVALID_VALUE; } cl_int validateYuvOperation(const size_t *origin, const size_t *region) { if (!origin || !region) return CL_INVALID_VALUE; return ((origin[0] % 2 == 0) && (region[0] % 2 == 0)) ? CL_SUCCESS : CL_INVALID_VALUE; } bool IsPackedYuvImage(const cl_image_format *imageFormat) { auto channelOrder = imageFormat->image_channel_order; return (channelOrder == CL_YUYV_INTEL) || (channelOrder == CL_UYVY_INTEL) || (channelOrder == CL_YVYU_INTEL) || (channelOrder == CL_VYUY_INTEL); } bool IsNV12Image(const cl_image_format *imageFormat) { return imageFormat->image_channel_order == CL_NV12_INTEL; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/validators.h000066400000000000000000000055251363734646600241270ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/api/cl_types.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/helpers/error_mappers.h" #include namespace NEO { // Provide some aggregators... typedef std::pair EventWaitList; typedef std::pair DeviceList; typedef std::pair MemObjList; // Custom validators enum NonZeroBufferSize : size_t; enum PatternSize : size_t; template CLType WithCastToInternal(CLType clObject, InternalType **internalObject) { *internalObject = NEO::castToObject(clObject); return (*internalObject) ? clObject : nullptr; } // This is the default instance of validateObject. // It should be specialized for specific types. template inline cl_int validateObject(Type object) { return CL_SUCCESS; } // Example of specialization. cl_int validateObject(void *ptr); cl_int validateObject(cl_context context); cl_int validateObject(cl_device_id device); cl_int validateObject(cl_platform_id platform); cl_int validateObject(cl_command_queue commandQueue); cl_int validateObject(cl_event platform); cl_int validateObject(cl_mem mem); cl_int validateObject(cl_sampler sampler); cl_int validateObject(cl_program program); cl_int validateObject(cl_kernel kernel); cl_int validateObject(const EventWaitList &eventWaitList); cl_int validateObject(const DeviceList &deviceList); cl_int validateObject(const MemObjList &memObjList); cl_int validateObject(const NonZeroBufferSize &nzbs); cl_int validateObject(const PatternSize &ps); // This is the sentinel for the follow variadic template definition. inline cl_int validateObjects() { return CL_SUCCESS; } // This provides variadic object validation. // It automatically checks for nullptrs and then passes // onto type specific validator. template inline cl_int validateObjects(const Type &object, Types... rest) { auto retVal = validateObject(object); return CL_SUCCESS != retVal ? retVal : validateObjects(rest...); } template inline cl_int validateObjects(Type *object, Types... rest) { auto retVal = object ? validateObject(object) : NullObjectErrorMapper::retVal; return CL_SUCCESS != retVal ? retVal : validateObjects(rest...); } template bool areNotNullptr() { return true; } template bool areNotNullptr(T t, RT... rt) { return (t != nullptr) && areNotNullptr(rt...); } cl_int validateYuvOperation(const size_t *origin, const size_t *region); bool IsPackedYuvImage(const cl_image_format *imageFormat); bool IsNV12Image(const cl_image_format *imageFormat); } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/windows/000077500000000000000000000000001363734646600232715ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/helpers/windows/gl_helper.h000066400000000000000000000012521363734646600254030ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_library.h" #include "GL/gl.h" namespace Os { extern const char *openglDllName; } namespace NEO { class glFunctionHelper { public: glFunctionHelper::glFunctionHelper(OsLibrary *glLibrary, const std::string &functionName) { glFunctionPtr = (*glLibrary)[functionName]; } ConvertibleProcAddr operator[](const char *name) { return ConvertibleProcAddr{glFunctionPtr(name)}; } protected: // clang-format off PROC(__stdcall *glFunctionPtr)(LPCSTR Arg1) = nullptr; // clang-format on }; }; // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/windows/gmm_callbacks_tgllp_plus.inl000066400000000000000000000041431363734646600310230ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/windows/gmm_callbacks.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" #include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.h" #include "opencl/source/os_interface/windows/wddm_device_command_stream.h" namespace NEO { template long __stdcall DeviceCallbacks::notifyAubCapture(void *csrHandle, uint64_t gfxAddress, size_t gfxSize, bool allocate) { auto csr = reinterpret_cast *>(csrHandle); if (DebugManager.flags.SetCommandStreamReceiver.get() == CSR_HW_WITH_AUB) { auto csrWithAub = static_cast> *>(csr); auto aubCsr = static_cast *>(csrWithAub->aubCSR.get()); if (allocate) { AllocationView externalAllocation(gfxAddress, gfxSize); aubCsr->makeResidentExternal(externalAllocation); } else { aubCsr->makeNonResidentExternal(gfxAddress); } } return 1; } template int __stdcall TTCallbacks::writeL3Address(void *queueHandle, uint64_t l3GfxAddress, uint64_t regOffset) { auto csr = reinterpret_cast *>(queueHandle); auto lri1 = LriHelper::program(&csr->getCS(0), static_cast(regOffset & 0xFFFFFFFF), static_cast(l3GfxAddress & 0xFFFFFFFF)); lri1->setMmioRemapEnable(true); auto lri2 = LriHelper::program(&csr->getCS(0), static_cast(regOffset >> 32), static_cast(l3GfxAddress >> 32)); lri2->setMmioRemapEnable(true); return 1; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/helpers/windows/kmd_notify_properties_windows.cpp000066400000000000000000000012031363734646600321620ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/kmd_notify_properties.h" #include "shared/source/os_interface/windows/sys_calls.h" using namespace NEO; void KmdNotifyHelper::updateAcLineStatus() { SYSTEM_POWER_STATUS systemPowerStatus = {}; auto powerStatusRetValue = SysCalls::getSystemPowerStatus(&systemPowerStatus); if (powerStatusRetValue == 1) { acLineConnected = (systemPowerStatus.ACLineStatus == 1); } } int64_t KmdNotifyHelper::getBaseTimeout(const int64_t &multiplier) const { return properties->delayKmdNotifyMicroseconds; } compute-runtime-20.13.16352/opencl/source/instrumentation/000077500000000000000000000000001363734646600234005ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/instrumentation/.clang-tidy000066400000000000000000000034521363734646600254400ustar00rootroot00000000000000--- Checks: 'clang-diagnostic-*,clang-analyzer-*,google-default-arguments,readability-identifier-naming,modernize-use-override,modernize-use-default-member-init,-clang-analyzer-alpha*' # WarningsAsErrors: '.*' HeaderFilterRegex: '/runtime/|/core/|/offline_compiler/' AnalyzeTemporaryDtors: false CheckOptions: - key: google-readability-braces-around-statements.ShortStatementLines value: '1' - key: google-readability-function-size.StatementThreshold value: '800' - key: google-readability-namespace-comments.ShortNamespaceLines value: '10' - key: google-readability-namespace-comments.SpacesBeforeComments value: '2' - key: readability-identifier-naming.MethodCase value: camelBack - key: readability-identifier-naming.ParameterCase value: camelBack - key: readability-identifier-naming.StructMemberCase value: CamelCase - key: readability-identifier-naming.ClassMemberCase value: CamelCase - key: readability-identifier-naming.ClassMethodCase value: camelBack - key: modernize-loop-convert.MaxCopySize value: '16' - key: modernize-loop-convert.MinConfidence value: reasonable - key: modernize-loop-convert.NamingStyle value: CamelCase - key: modernize-pass-by-value.IncludeStyle value: llvm - key: modernize-replace-auto-ptr.IncludeStyle value: llvm - key: modernize-use-nullptr.NullMacros value: 'NULL' - key: modernize-use-default-member-init.UseAssignment value: '1' ... compute-runtime-20.13.16352/opencl/source/instrumentation/CMakeLists.txt000066400000000000000000000006561363734646600261470ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_INSTRUMENTATION ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/instrumentation.cpp ${CMAKE_CURRENT_SOURCE_DIR}/instrumentation.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_INSTRUMENTATION}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_INSTRUMENTATION ${RUNTIME_SRCS_INSTRUMENTATION}) compute-runtime-20.13.16352/opencl/source/instrumentation/instrumentation.cpp000066400000000000000000000003041363734646600273440ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "instrumentation.h" namespace NEO { const bool haveInstrumentation = false; } // namespace NEO compute-runtime-20.13.16352/opencl/source/instrumentation/instrumentation.h000066400000000000000000000135331363734646600270210ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { extern const bool haveInstrumentation; } // namespace NEO namespace MetricsLibraryApi { // Dummy macros. #define ML_STDCALL #define METRICS_LIBRARY_CONTEXT_CREATE_1_0 "create" #define METRICS_LIBRARY_CONTEXT_DELETE_1_0 "delete" // Dummy enumerators. enum class ClientApi : uint32_t { OpenCL }; enum class ClientGen : uint32_t { Unknown, Gen9, Gen11, Gen12 }; enum class ValueType : uint32_t { Uint32, Last }; enum class GpuConfigurationActivationType : uint32_t { Tbs, EscapeCode }; enum class ObjectType : uint32_t { QueryHwCounters, MarkerStreamUser, OverrideFlushCaches, ConfigurationHwCountersUser, ConfigurationHwCountersOa }; enum class ParameterType : uint32_t { QueryHwCountersReportApiSize, QueryHwCountersReportGpuSize }; enum class StatusCode : uint32_t { Failed, IncorrectObject, Success }; enum class GpuCommandBufferType : uint32_t { Render, Compute }; enum class ClientOptionsType : uint32_t { Compute }; // Dummy handles. struct Handle { void *data; bool IsValid() const { return data != nullptr; } // NOLINT }; struct QueryHandle_1_0 : Handle {}; struct ConfigurationHandle_1_0 : Handle {}; struct ContextHandle_1_0 : Handle {}; // Dummy structures. struct ClientCallbacks_1_0 {}; struct ClientDataWindows_1_0 { void *Device; void *Adapter; void *Escape; bool KmdInstrumentationEnabled; }; struct ClientDataLinux_1_0 { void *Reserved; }; struct ClientOptionsComputeData_1_0 { bool Asynchronous; }; struct ClientOptionsData_1_0 { ClientOptionsType Type; ClientOptionsComputeData_1_0 Compute; }; struct ClientData_1_0 { union { ClientDataWindows_1_0 Windows; ClientDataLinux_1_0 Linux; }; ClientOptionsData_1_0 *ClientOptions; uint32_t ClientOptionsCount; }; struct ConfigurationActivateData_1_0 { GpuConfigurationActivationType Type; }; struct ClientType_1_0 { ClientApi Api; ClientGen Gen; }; struct TypedValue_1_0 { uint32_t ValueUInt32; ValueType Type; }; struct GpuMemory_1_0 { uint64_t GpuAddress; void *CpuAddress; }; struct CommandBufferQueryHwCounters_1_0 { QueryHandle_1_0 Handle; ConfigurationHandle_1_0 HandleUserConfiguration; bool Begin; uint32_t Slot; }; struct CommandBufferMarkerStreamUser_1_0 { uint32_t Value; }; struct CommandBufferOverride_1_0 { bool Enable; }; struct CommandBufferSize_1_0 { uint32_t GpuMemorySize; }; struct ConfigurationCreateData_1_0 { ContextHandle_1_0 HandleContext; ObjectType Type; }; struct CommandBufferData_1_0 { ContextHandle_1_0 HandleContext; ObjectType CommandsType; GpuCommandBufferType Type; GpuMemory_1_0 Allocation; void *Data; uint32_t Size; CommandBufferQueryHwCounters_1_0 QueryHwCounters; CommandBufferMarkerStreamUser_1_0 MarkerStreamUser; CommandBufferOverride_1_0 Override; }; struct QueryCreateData_1_0 { ContextHandle_1_0 HandleContext; ObjectType Type; uint32_t Slots; }; struct GetReportQuery_1_0 { QueryHandle_1_0 Handle; uint32_t Slot; uint32_t SlotsCount; uint32_t DataSize; void *Data; }; struct GetReportData_1_0 { ObjectType Type; GetReportQuery_1_0 Query; }; struct ContextCreateData_1_0 { ClientData_1_0 *ClientData; ClientCallbacks_1_0 *ClientCallbacks; struct Interface_1_0 *Api; }; // Dummy functions. using ContextCreateFunction_1_0 = StatusCode(ML_STDCALL *)(ClientType_1_0 clientType, struct ContextCreateData_1_0 *createData, ContextHandle_1_0 *handle); using ContextDeleteFunction_1_0 = StatusCode(ML_STDCALL *)(const ContextHandle_1_0 handle); using GetParameterFunction_1_0 = StatusCode(ML_STDCALL *)(const ParameterType parameter, ValueType *type, TypedValue_1_0 *value); using CommandBufferGetFunction_1_0 = StatusCode(ML_STDCALL *)(const CommandBufferData_1_0 *data); using CommandBufferGetSizeFunction_1_0 = StatusCode(ML_STDCALL *)(const CommandBufferData_1_0 *data, CommandBufferSize_1_0 *size); using QueryCreateFunction_1_0 = StatusCode(ML_STDCALL *)(const QueryCreateData_1_0 *createData, QueryHandle_1_0 *handle); using QueryDeleteFunction_1_0 = StatusCode(ML_STDCALL *)(const QueryHandle_1_0 handle); using ConfigurationCreateFunction_1_0 = StatusCode(ML_STDCALL *)(const ConfigurationCreateData_1_0 *createData, ConfigurationHandle_1_0 *handle); using ConfigurationActivateFunction_1_0 = StatusCode(ML_STDCALL *)(const ConfigurationHandle_1_0 handle, const ConfigurationActivateData_1_0 *activateData); using ConfigurationDeactivateFunction_1_0 = StatusCode(ML_STDCALL *)(const ConfigurationHandle_1_0 handle); using ConfigurationDeleteFunction_1_0 = StatusCode(ML_STDCALL *)(const ConfigurationHandle_1_0 handle); using GetDataFunction_1_0 = StatusCode(ML_STDCALL *)(GetReportData_1_0 *data); // Dummy interface. struct Interface_1_0 { GetParameterFunction_1_0 GetParameter; CommandBufferGetFunction_1_0 CommandBufferGet; CommandBufferGetSizeFunction_1_0 CommandBufferGetSize; QueryCreateFunction_1_0 QueryCreate; QueryDeleteFunction_1_0 QueryDelete; ConfigurationCreateFunction_1_0 ConfigurationCreate; ConfigurationActivateFunction_1_0 ConfigurationActivate; ConfigurationDeactivateFunction_1_0 ConfigurationDeactivate; ConfigurationDeleteFunction_1_0 ConfigurationDelete; GetDataFunction_1_0 GetData; }; }; // namespace MetricsLibraryApicompute-runtime-20.13.16352/opencl/source/kernel/000077500000000000000000000000001363734646600214155ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/kernel/CMakeLists.txt000066400000000000000000000014001363734646600241500ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_KERNEL ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/get_additional_kernel_info.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_transformer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_transformer.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel.inl ${CMAKE_CURRENT_SOURCE_DIR}/kernel_execution_type.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel_info_cl.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/kernel_extra.cpp ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_KERNEL}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_KERNEL ${RUNTIME_SRCS_KERNEL}) compute-runtime-20.13.16352/opencl/source/kernel/get_additional_kernel_info.cpp000066400000000000000000000004371363734646600274470ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/kernel/kernel.h" namespace NEO { void Kernel::getAdditionalInfo(cl_kernel_info paramName, const void *¶mValue, size_t ¶mValueSizeRet) const { } } // namespace NEO compute-runtime-20.13.16352/opencl/source/kernel/image_transformer.cpp000066400000000000000000000034241363734646600256300ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/kernel/image_transformer.h" #include "shared/source/helpers/ptr_math.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/program/kernel_info.h" namespace NEO { void ImageTransformer::registerImage3d(uint32_t argIndex) { if (std::find(argIndexes.begin(), argIndexes.end(), argIndex) == argIndexes.end()) { argIndexes.push_back(argIndex); } } void ImageTransformer::transformImagesTo2dArray(const KernelInfo &kernelInfo, const std::vector &kernelArguments, void *ssh) { for (auto const &argIndex : argIndexes) { if (kernelInfo.kernelArgInfo.at(argIndex).isTransformable) { auto clMemObj = static_cast(kernelArguments.at(argIndex).object); auto image = castToObjectOrAbort(clMemObj); auto surfaceState = ptrOffset(ssh, kernelInfo.kernelArgInfo.at(argIndex).offsetHeap); image->transformImage3dTo2dArray(surfaceState); } } transformed = true; } void ImageTransformer::transformImagesTo3d(const KernelInfo &kernelInfo, const std::vector &kernelArguments, void *ssh) { for (auto const &argIndex : argIndexes) { auto clMemObj = static_cast(kernelArguments.at(argIndex).object); auto image = castToObjectOrAbort(clMemObj); auto surfaceState = ptrOffset(ssh, kernelInfo.kernelArgInfo.at(argIndex).offsetHeap); image->transformImage2dArrayTo3d(surfaceState); } transformed = false; } bool ImageTransformer::didTransform() const { return transformed; } bool ImageTransformer::hasRegisteredImages3d() const { return !argIndexes.empty(); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/kernel/image_transformer.h000066400000000000000000000012671363734646600253000ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/kernel/kernel.h" namespace NEO { class ImageTransformer { public: void registerImage3d(uint32_t argIndex); void transformImagesTo2dArray(const KernelInfo &kernelInfo, const std::vector &kernelArguments, void *ssh); void transformImagesTo3d(const KernelInfo &kernelInfo, const std::vector &kernelArguments, void *ssh); bool didTransform() const; bool hasRegisteredImages3d() const; protected: bool transformed = false; std::vector argIndexes; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/kernel/kernel.cpp000066400000000000000000003451721363734646600234150ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/kernel/kernel.h" #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/kernel_helpers.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/source/accelerators/intel_accelerator.h" #include "opencl/source/accelerators/intel_motion_estimation.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/context/context.h" #include "opencl/source/device_queue/device_queue.h" #include "opencl/source/execution_model/device_enqueue.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/helpers/get_info_status_mapper.h" #include "opencl/source/helpers/per_thread_data.h" #include "opencl/source/helpers/sampler_helpers.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/kernel/image_transformer.h" #include "opencl/source/kernel/kernel.inl" #include "opencl/source/kernel/kernel_info_cl.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/mem_obj/pipe.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include "opencl/source/platform/platform.h" #include "opencl/source/program/block_kernel_manager.h" #include "opencl/source/program/kernel_info.h" #include "opencl/source/sampler/sampler.h" #include "patch_list.h" #include #include #include using namespace iOpenCL; namespace NEO { class Surface; uint32_t Kernel::dummyPatchLocation = 0xbaddf00d; Kernel::Kernel(Program *programArg, const KernelInfo &kernelInfoArg, const ClDevice &deviceArg, bool schedulerKernel) : globalWorkOffsetX(&Kernel::dummyPatchLocation), globalWorkOffsetY(&Kernel::dummyPatchLocation), globalWorkOffsetZ(&Kernel::dummyPatchLocation), localWorkSizeX(&Kernel::dummyPatchLocation), localWorkSizeY(&Kernel::dummyPatchLocation), localWorkSizeZ(&Kernel::dummyPatchLocation), localWorkSizeX2(&Kernel::dummyPatchLocation), localWorkSizeY2(&Kernel::dummyPatchLocation), localWorkSizeZ2(&Kernel::dummyPatchLocation), globalWorkSizeX(&Kernel::dummyPatchLocation), globalWorkSizeY(&Kernel::dummyPatchLocation), globalWorkSizeZ(&Kernel::dummyPatchLocation), enqueuedLocalWorkSizeX(&Kernel::dummyPatchLocation), enqueuedLocalWorkSizeY(&Kernel::dummyPatchLocation), enqueuedLocalWorkSizeZ(&Kernel::dummyPatchLocation), numWorkGroupsX(&Kernel::dummyPatchLocation), numWorkGroupsY(&Kernel::dummyPatchLocation), numWorkGroupsZ(&Kernel::dummyPatchLocation), maxWorkGroupSizeForCrossThreadData(&Kernel::dummyPatchLocation), workDim(&Kernel::dummyPatchLocation), dataParameterSimdSize(&Kernel::dummyPatchLocation), parentEventOffset(&Kernel::dummyPatchLocation), preferredWkgMultipleOffset(&Kernel::dummyPatchLocation), slmTotalSize(kernelInfoArg.workloadInfo.slmStaticSize), isBuiltIn(false), isParentKernel((kernelInfoArg.patchInfo.executionEnvironment != nullptr) ? (kernelInfoArg.patchInfo.executionEnvironment->HasDeviceEnqueue != 0) : false), isSchedulerKernel(schedulerKernel), program(programArg), context(nullptr), device(deviceArg), kernelInfo(kernelInfoArg), numberOfBindingTableStates(0), localBindingTableOffset(0), sshLocalSize(0), crossThreadData(nullptr), crossThreadDataSize(0), privateSurface(nullptr), privateSurfaceSize(0), kernelReflectionSurface(nullptr), usingSharedObjArgs(false) { program->retain(); imageTransformer.reset(new ImageTransformer); maxKernelWorkGroupSize = static_cast(device.getSharedDeviceInfo().maxWorkGroupSize); } Kernel::~Kernel() { delete[] crossThreadData; crossThreadData = nullptr; crossThreadDataSize = 0; if (privateSurface) { program->peekExecutionEnvironment().memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(privateSurface); privateSurface = nullptr; } if (kernelReflectionSurface) { program->peekExecutionEnvironment().memoryManager->freeGraphicsMemory(kernelReflectionSurface); kernelReflectionSurface = nullptr; } for (uint32_t i = 0; i < patchedArgumentsNum; i++) { if (kernelInfo.kernelArgInfo.at(i).isSampler) { auto sampler = castToObject(kernelArguments.at(i).object); if (sampler) { sampler->decRefInternal(); } } } kernelArgHandlers.clear(); program->release(); } // Checks if patch offset is invalid (undefined) inline bool isInvalidOffset(uint32_t offset) { return (offset == KernelArgInfo::undefinedOffset); } // Checks if patch offset is valid inline bool isValidOffset(uint32_t offset) { return isInvalidOffset(offset) == false; } // If dstOffsetBytes is not an invalid offset, then patches dst at dstOffsetBytes // with src casted to DstT type. template inline void patch(const SrcT &src, void *dst, uint32_t dstOffsetBytes) { if (isInvalidOffset(dstOffsetBytes)) { return; } DstT *patchLocation = reinterpret_cast(ptrOffset(dst, dstOffsetBytes)); *patchLocation = static_cast(src); } template void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const PatchTokenT &patch) { uint32_t crossThreadDataOffset = patch.DataParamOffset; uint32_t pointerSize = patch.DataParamSize; uint32_t sshOffset = patch.SurfaceStateHeapOffset; void *crossThreadData = getCrossThreadData(); void *ssh = getSurfaceStateHeap(); if (crossThreadData != nullptr) { auto pp = ptrOffset(crossThreadData, crossThreadDataOffset); uintptr_t addressToPatch = reinterpret_cast(ptrToPatchInCrossThreadData); patchWithRequiredSize(pp, pointerSize, addressToPatch); if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { PatchInfoData patchInfoData(addressToPatch, 0u, PatchInfoAllocationType::KernelArg, reinterpret_cast(getCrossThreadData()), crossThreadDataOffset, PatchInfoAllocationType::IndirectObjectHeap, pointerSize); this->patchInfoDataList.push_back(patchInfoData); } } if (ssh) { auto surfaceState = ptrOffset(ssh, sshOffset); void *addressToPatch = reinterpret_cast(allocation.getGpuAddressToPatch()); size_t sizeToPatch = allocation.getUnderlyingBufferSize(); Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, sizeToPatch, addressToPatch, 0, &allocation, 0, 0); } } template void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization &patch); template void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const SPatchAllocateStatelessPrivateSurface &patch); template void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization &patch); cl_int Kernel::initialize() { cl_int retVal = CL_OUT_OF_HOST_MEMORY; do { const auto &workloadInfo = kernelInfo.workloadInfo; const auto &heapInfo = kernelInfo.heapInfo; const auto &patchInfo = kernelInfo.patchInfo; reconfigureKernel(); auto &hwInfo = device.getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); auto maxSimdSize = getKernelInfo().getMaxSimdSize(); if (maxSimdSize != 1 && maxSimdSize < hwHelper.getMinimalSIMDSize()) { return CL_INVALID_KERNEL; } crossThreadDataSize = patchInfo.dataParameterStream ? patchInfo.dataParameterStream->DataParameterStreamSize : 0; // now allocate our own cross-thread data, if necessary if (crossThreadDataSize) { crossThreadData = new char[crossThreadDataSize]; if (kernelInfo.crossThreadData) { memcpy_s(crossThreadData, crossThreadDataSize, kernelInfo.crossThreadData, crossThreadDataSize); } else { memset(crossThreadData, 0x00, crossThreadDataSize); } auto crossThread = reinterpret_cast(crossThreadData); globalWorkOffsetX = workloadInfo.globalWorkOffsetOffsets[0] != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.globalWorkOffsetOffsets[0]) : globalWorkOffsetX; globalWorkOffsetY = workloadInfo.globalWorkOffsetOffsets[1] != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.globalWorkOffsetOffsets[1]) : globalWorkOffsetY; globalWorkOffsetZ = workloadInfo.globalWorkOffsetOffsets[2] != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.globalWorkOffsetOffsets[2]) : globalWorkOffsetZ; localWorkSizeX = workloadInfo.localWorkSizeOffsets[0] != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.localWorkSizeOffsets[0]) : localWorkSizeX; localWorkSizeY = workloadInfo.localWorkSizeOffsets[1] != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.localWorkSizeOffsets[1]) : localWorkSizeY; localWorkSizeZ = workloadInfo.localWorkSizeOffsets[2] != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.localWorkSizeOffsets[2]) : localWorkSizeZ; localWorkSizeX2 = workloadInfo.localWorkSizeOffsets2[0] != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.localWorkSizeOffsets2[0]) : localWorkSizeX2; localWorkSizeY2 = workloadInfo.localWorkSizeOffsets2[1] != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.localWorkSizeOffsets2[1]) : localWorkSizeY2; localWorkSizeZ2 = workloadInfo.localWorkSizeOffsets2[2] != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.localWorkSizeOffsets2[2]) : localWorkSizeZ2; globalWorkSizeX = workloadInfo.globalWorkSizeOffsets[0] != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.globalWorkSizeOffsets[0]) : globalWorkSizeX; globalWorkSizeY = workloadInfo.globalWorkSizeOffsets[1] != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.globalWorkSizeOffsets[1]) : globalWorkSizeY; globalWorkSizeZ = workloadInfo.globalWorkSizeOffsets[2] != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.globalWorkSizeOffsets[2]) : globalWorkSizeZ; enqueuedLocalWorkSizeX = workloadInfo.enqueuedLocalWorkSizeOffsets[0] != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.enqueuedLocalWorkSizeOffsets[0]) : enqueuedLocalWorkSizeX; enqueuedLocalWorkSizeY = workloadInfo.enqueuedLocalWorkSizeOffsets[1] != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.enqueuedLocalWorkSizeOffsets[1]) : enqueuedLocalWorkSizeY; enqueuedLocalWorkSizeZ = workloadInfo.enqueuedLocalWorkSizeOffsets[2] != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.enqueuedLocalWorkSizeOffsets[2]) : enqueuedLocalWorkSizeZ; numWorkGroupsX = workloadInfo.numWorkGroupsOffset[0] != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.numWorkGroupsOffset[0]) : numWorkGroupsX; numWorkGroupsY = workloadInfo.numWorkGroupsOffset[1] != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.numWorkGroupsOffset[1]) : numWorkGroupsY; numWorkGroupsZ = workloadInfo.numWorkGroupsOffset[2] != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.numWorkGroupsOffset[2]) : numWorkGroupsZ; maxWorkGroupSizeForCrossThreadData = workloadInfo.maxWorkGroupSizeOffset != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.maxWorkGroupSizeOffset) : maxWorkGroupSizeForCrossThreadData; workDim = workloadInfo.workDimOffset != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.workDimOffset) : workDim; dataParameterSimdSize = workloadInfo.simdSizeOffset != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.simdSizeOffset) : dataParameterSimdSize; parentEventOffset = workloadInfo.parentEventOffset != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.parentEventOffset) : parentEventOffset; preferredWkgMultipleOffset = workloadInfo.preferredWkgMultipleOffset != WorkloadInfo::undefinedOffset ? ptrOffset(crossThread, workloadInfo.preferredWkgMultipleOffset) : preferredWkgMultipleOffset; *maxWorkGroupSizeForCrossThreadData = maxKernelWorkGroupSize; *dataParameterSimdSize = maxSimdSize; *preferredWkgMultipleOffset = maxSimdSize; *parentEventOffset = WorkloadInfo::invalidParentEvent; } // allocate our own SSH, if necessary sshLocalSize = heapInfo.pKernelHeader ? heapInfo.pKernelHeader->SurfaceStateHeapSize : 0; if (sshLocalSize) { pSshLocal = std::make_unique(sshLocalSize); // copy the ssh into our local copy memcpy_s(pSshLocal.get(), sshLocalSize, heapInfo.pSsh, sshLocalSize); } numberOfBindingTableStates = (patchInfo.bindingTableState != nullptr) ? patchInfo.bindingTableState->Count : 0; localBindingTableOffset = (patchInfo.bindingTableState != nullptr) ? patchInfo.bindingTableState->Offset : 0; // patch crossthread data and ssh with inline surfaces, if necessary privateSurfaceSize = patchInfo.pAllocateStatelessPrivateSurface ? patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize : 0; if (privateSurfaceSize) { privateSurfaceSize *= device.getSharedDeviceInfo().computeUnitsUsedForScratch * getKernelInfo().getMaxSimdSize(); DEBUG_BREAK_IF(privateSurfaceSize == 0); if ((is32Bit() || device.getMemoryManager()->peekForce32BitAllocations()) && (privateSurfaceSize > std::numeric_limits::max())) { retVal = CL_OUT_OF_RESOURCES; break; } privateSurface = device.getMemoryManager()->allocateGraphicsMemoryWithProperties({device.getRootDeviceIndex(), static_cast(privateSurfaceSize), GraphicsAllocation::AllocationType::PRIVATE_SURFACE}); if (privateSurface == nullptr) { retVal = CL_OUT_OF_RESOURCES; break; } const auto &patch = patchInfo.pAllocateStatelessPrivateSurface; patchWithImplicitSurface(reinterpret_cast(privateSurface->getGpuAddressToPatch()), *privateSurface, *patch); } if (patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization) { DEBUG_BREAK_IF(program->getConstantSurface() == nullptr); uintptr_t constMemory = isBuiltIn ? (uintptr_t)program->getConstantSurface()->getUnderlyingBuffer() : (uintptr_t)program->getConstantSurface()->getGpuAddressToPatch(); const auto &patch = patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization; patchWithImplicitSurface(reinterpret_cast(constMemory), *program->getConstantSurface(), *patch); } if (patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization) { DEBUG_BREAK_IF(program->getGlobalSurface() == nullptr); uintptr_t globalMemory = isBuiltIn ? (uintptr_t)program->getGlobalSurface()->getUnderlyingBuffer() : (uintptr_t)program->getGlobalSurface()->getGpuAddressToPatch(); const auto &patch = patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization; patchWithImplicitSurface(reinterpret_cast(globalMemory), *program->getGlobalSurface(), *patch); } if (patchInfo.pAllocateStatelessEventPoolSurface) { if (requiresSshForBuffers()) { auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap()), patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset); Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, 0, nullptr, 0, nullptr, 0, 0); } } if (patchInfo.pAllocateStatelessDefaultDeviceQueueSurface) { if (requiresSshForBuffers()) { auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap()), patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset); Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, 0, nullptr, 0, nullptr, 0, 0); } } if (kernelInfo.patchInfo.executionEnvironment) { if (!kernelInfo.patchInfo.executionEnvironment->SubgroupIndependentForwardProgressRequired) { setThreadArbitrationPolicy(ThreadArbitrationPolicy::AgeBased); } } patchBlocksSimdSize(); provideInitializationHints(); // resolve the new kernel info to account for kernel handlers // I think by this time we have decoded the binary and know the number of args etc. // double check this assumption bool usingBuffers = false; bool usingImages = false; auto numArgs = kernelInfo.kernelArgInfo.size(); kernelArguments.resize(numArgs); slmSizes.resize(numArgs); kernelArgHandlers.resize(numArgs); kernelArgRequiresCacheFlush.resize(numArgs); for (uint32_t i = 0; i < numArgs; ++i) { storeKernelArg(i, NONE_OBJ, nullptr, nullptr, 0); slmSizes[i] = 0; // set the argument handler auto &argInfo = kernelInfo.kernelArgInfo[i]; if (argInfo.metadata.addressQualifier == KernelArgMetadata::AddrLocal) { kernelArgHandlers[i] = &Kernel::setArgLocal; } else if (argInfo.isAccelerator) { kernelArgHandlers[i] = &Kernel::setArgAccelerator; } else if (argInfo.metadata.typeQualifiers.pipeQual) { kernelArgHandlers[i] = &Kernel::setArgPipe; kernelArguments[i].type = PIPE_OBJ; } else if (argInfo.isImage) { kernelArgHandlers[i] = &Kernel::setArgImage; kernelArguments[i].type = IMAGE_OBJ; usingImages = true; } else if (argInfo.isSampler) { kernelArgHandlers[i] = &Kernel::setArgSampler; kernelArguments[i].type = SAMPLER_OBJ; } else if (argInfo.isBuffer) { kernelArgHandlers[i] = &Kernel::setArgBuffer; kernelArguments[i].type = BUFFER_OBJ; usingBuffers = true; allBufferArgsStateful &= static_cast(argInfo.pureStatefulBufferAccess); this->auxTranslationRequired |= !kernelInfo.kernelArgInfo[i].pureStatefulBufferAccess && HwHelper::renderCompressedBuffersSupported(hwInfo); } else if (argInfo.isDeviceQueue) { kernelArgHandlers[i] = &Kernel::setArgDevQueue; kernelArguments[i].type = DEVICE_QUEUE_OBJ; } else { kernelArgHandlers[i] = &Kernel::setArgImmediate; } } auxTranslationRequired &= hwHelper.requiresAuxResolves(); if (DebugManager.flags.DisableAuxTranslation.get()) { auxTranslationRequired = false; } if (usingImages && !usingBuffers) { usingImagesOnly = true; } if (isParentKernel) { program->allocateBlockPrivateSurfaces(device.getRootDeviceIndex()); } retVal = CL_SUCCESS; } while (false); return retVal; } cl_int Kernel::cloneKernel(Kernel *pSourceKernel) { // copy cross thread data to store arguments set to source kernel with clSetKernelArg on immediate data (non-pointer types) memcpy_s(crossThreadData, crossThreadDataSize, pSourceKernel->crossThreadData, pSourceKernel->crossThreadDataSize); DEBUG_BREAK_IF(pSourceKernel->crossThreadDataSize != crossThreadDataSize); // copy arguments set to source kernel with clSetKernelArg or clSetKernelArgSVMPointer for (uint32_t i = 0; i < pSourceKernel->kernelArguments.size(); i++) { if (0 == pSourceKernel->getKernelArgInfo(i).size) { // skip copying arguments that haven't been set to source kernel continue; } switch (pSourceKernel->kernelArguments[i].type) { case NONE_OBJ: // all arguments with immediate data (non-pointer types) have been copied in cross thread data storeKernelArg(i, NONE_OBJ, nullptr, nullptr, pSourceKernel->getKernelArgInfo(i).size); patchedArgumentsNum++; kernelArguments[i].isPatched = true; break; case SVM_OBJ: setArgSvm(i, pSourceKernel->getKernelArgInfo(i).size, const_cast(pSourceKernel->getKernelArgInfo(i).value), pSourceKernel->getKernelArgInfo(i).pSvmAlloc, pSourceKernel->getKernelArgInfo(i).svmFlags); break; case SVM_ALLOC_OBJ: setArgSvmAlloc(i, const_cast(pSourceKernel->getKernelArgInfo(i).value), (GraphicsAllocation *)pSourceKernel->getKernelArgInfo(i).object); break; default: setArg(i, pSourceKernel->getKernelArgInfo(i).size, pSourceKernel->getKernelArgInfo(i).value); break; } } // copy additional information other than argument values set to source kernel with clSetKernelExecInfo for (auto gfxAlloc : pSourceKernel->kernelSvmGfxAllocations) { kernelSvmGfxAllocations.push_back(gfxAlloc); } this->isBuiltIn = pSourceKernel->isBuiltIn; return CL_SUCCESS; } cl_int Kernel::getInfo(cl_kernel_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { cl_int retVal; const void *pSrc = nullptr; size_t srcSize = 0; cl_uint numArgs = 0; const _cl_program *prog; const _cl_context *ctxt; cl_uint refCount = 0; uint64_t nonCannonizedGpuAddress = 0llu; switch (paramName) { case CL_KERNEL_FUNCTION_NAME: pSrc = kernelInfo.name.c_str(); srcSize = kernelInfo.name.length() + 1; break; case CL_KERNEL_NUM_ARGS: srcSize = sizeof(cl_uint); numArgs = (cl_uint)kernelInfo.kernelArgInfo.size(); pSrc = &numArgs; break; case CL_KERNEL_CONTEXT: ctxt = &program->getContext(); srcSize = sizeof(ctxt); pSrc = &ctxt; break; case CL_KERNEL_PROGRAM: prog = program; srcSize = sizeof(prog); pSrc = &prog; break; case CL_KERNEL_REFERENCE_COUNT: refCount = static_cast(this->getReference()); srcSize = sizeof(refCount); pSrc = &refCount; break; case CL_KERNEL_ATTRIBUTES: pSrc = kernelInfo.attributes.c_str(); srcSize = kernelInfo.attributes.length() + 1; break; case CL_KERNEL_BINARY_PROGRAM_INTEL: pSrc = getKernelHeap(); srcSize = getKernelHeapSize(); break; case CL_KERNEL_BINARY_GPU_ADDRESS_INTEL: nonCannonizedGpuAddress = GmmHelper::decanonize(kernelInfo.kernelAllocation->getGpuAddress()); pSrc = &nonCannonizedGpuAddress; srcSize = sizeof(nonCannonizedGpuAddress); break; default: getAdditionalInfo(paramName, pSrc, srcSize); break; } retVal = changeGetInfoStatusToCLResultType(::getInfo(paramValue, paramValueSize, pSrc, srcSize)); if (paramValueSizeRet) { *paramValueSizeRet = srcSize; } return retVal; } cl_int Kernel::getArgInfo(cl_uint argIndx, cl_kernel_arg_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { cl_int retVal; const void *pSrc = nullptr; size_t srcSize = 0; auto numArgs = (cl_uint)kernelInfo.kernelArgInfo.size(); const auto &argInfo = kernelInfo.kernelArgInfo[argIndx]; if (argIndx >= numArgs) { retVal = CL_INVALID_ARG_INDEX; return retVal; } cl_kernel_arg_address_qualifier addressQualifier; cl_kernel_arg_access_qualifier accessQualifier; cl_kernel_arg_type_qualifier typeQualifier; switch (paramName) { case CL_KERNEL_ARG_ADDRESS_QUALIFIER: addressQualifier = asClKernelArgAddressQualifier(argInfo.metadata.getAddressQualifier()); srcSize = sizeof(addressQualifier); pSrc = &addressQualifier; break; case CL_KERNEL_ARG_ACCESS_QUALIFIER: accessQualifier = asClKernelArgAccessQualifier(argInfo.metadata.getAccessQualifier()); srcSize = sizeof(accessQualifier); pSrc = &accessQualifier; break; case CL_KERNEL_ARG_TYPE_QUALIFIER: typeQualifier = asClKernelArgTypeQualifier(argInfo.metadata.typeQualifiers); srcSize = sizeof(typeQualifier); pSrc = &typeQualifier; break; case CL_KERNEL_ARG_TYPE_NAME: srcSize = argInfo.metadataExtended->type.length() + 1; pSrc = argInfo.metadataExtended->type.c_str(); break; case CL_KERNEL_ARG_NAME: srcSize = argInfo.metadataExtended->argName.length() + 1; pSrc = argInfo.metadataExtended->argName.c_str(); break; default: break; } retVal = changeGetInfoStatusToCLResultType(::getInfo(paramValue, paramValueSize, pSrc, srcSize)); if (paramValueSizeRet) { *paramValueSizeRet = srcSize; } return retVal; } cl_int Kernel::getWorkGroupInfo(cl_device_id device, cl_kernel_work_group_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { cl_int retVal = CL_INVALID_VALUE; struct size_t3 { size_t val[3]; } requiredWorkGroupSize; cl_ulong localMemorySize; const auto &patchInfo = kernelInfo.patchInfo; size_t preferredWorkGroupSizeMultiple = 0; cl_ulong scratchSize; cl_ulong privateMemSize; size_t maxWorkgroupSize; const auto &hwInfo = getDevice().getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); GetInfoHelper info(paramValue, paramValueSize, paramValueSizeRet); switch (paramName) { case CL_KERNEL_WORK_GROUP_SIZE: maxWorkgroupSize = this->maxKernelWorkGroupSize; if (DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get()) { auto divisionSize = 32 / patchInfo.executionEnvironment->LargestCompiledSIMDSize; maxWorkgroupSize /= divisionSize; } retVal = changeGetInfoStatusToCLResultType(info.set(maxWorkgroupSize)); break; case CL_KERNEL_COMPILE_WORK_GROUP_SIZE: DEBUG_BREAK_IF(!patchInfo.executionEnvironment); requiredWorkGroupSize.val[0] = patchInfo.executionEnvironment->RequiredWorkGroupSizeX; requiredWorkGroupSize.val[1] = patchInfo.executionEnvironment->RequiredWorkGroupSizeY; requiredWorkGroupSize.val[2] = patchInfo.executionEnvironment->RequiredWorkGroupSizeZ; retVal = changeGetInfoStatusToCLResultType(info.set(requiredWorkGroupSize)); break; case CL_KERNEL_LOCAL_MEM_SIZE: localMemorySize = patchInfo.localsurface ? patchInfo.localsurface->TotalInlineLocalMemorySize : 0; retVal = changeGetInfoStatusToCLResultType(info.set(localMemorySize)); break; case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: DEBUG_BREAK_IF(!patchInfo.executionEnvironment); preferredWorkGroupSizeMultiple = patchInfo.executionEnvironment->LargestCompiledSIMDSize; if (hwHelper.isFusedEuDispatchEnabled(hwInfo)) { preferredWorkGroupSizeMultiple *= 2; } retVal = changeGetInfoStatusToCLResultType((info.set(preferredWorkGroupSizeMultiple))); break; case CL_KERNEL_SPILL_MEM_SIZE_INTEL: scratchSize = kernelInfo.patchInfo.mediavfestate ? kernelInfo.patchInfo.mediavfestate->PerThreadScratchSpace : 0; retVal = changeGetInfoStatusToCLResultType(info.set(scratchSize)); break; case CL_KERNEL_PRIVATE_MEM_SIZE: privateMemSize = kernelInfo.patchInfo.pAllocateStatelessPrivateSurface ? kernelInfo.patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize : 0; retVal = changeGetInfoStatusToCLResultType(info.set(privateMemSize)); break; default: retVal = CL_INVALID_VALUE; break; } return retVal; } cl_int Kernel::getSubGroupInfo(cl_kernel_sub_group_info paramName, size_t inputValueSize, const void *inputValue, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { size_t numDimensions = 0; size_t WGS = 1; auto maxSimdSize = static_cast(getKernelInfo().getMaxSimdSize()); auto maxRequiredWorkGroupSize = static_cast(getKernelInfo().getMaxRequiredWorkGroupSize(maxKernelWorkGroupSize)); auto largestCompiledSIMDSize = static_cast(getKernelInfo().patchInfo.executionEnvironment->LargestCompiledSIMDSize); GetInfoHelper info(paramValue, paramValueSize, paramValueSizeRet); if ((paramName == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT) || (paramName == CL_KERNEL_MAX_NUM_SUB_GROUPS) || (paramName == CL_KERNEL_COMPILE_NUM_SUB_GROUPS)) { if (device.getEnabledClVersion() < 21) { return CL_INVALID_VALUE; } } if ((paramName == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR) || (paramName == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR)) { if (!inputValue) { return CL_INVALID_VALUE; } if (inputValueSize % sizeof(size_t) != 0) { return CL_INVALID_VALUE; } numDimensions = inputValueSize / sizeof(size_t); if (numDimensions == 0 || numDimensions > static_cast(device.getDeviceInfo().maxWorkItemDimensions)) { return CL_INVALID_VALUE; } } if (paramName == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT) { if (!paramValue) { return CL_INVALID_VALUE; } if (paramValueSize % sizeof(size_t) != 0) { return CL_INVALID_VALUE; } numDimensions = paramValueSize / sizeof(size_t); if (numDimensions == 0 || numDimensions > static_cast(device.getDeviceInfo().maxWorkItemDimensions)) { return CL_INVALID_VALUE; } } switch (paramName) { case CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR: { for (size_t i = 0; i < numDimensions; i++) { WGS *= ((size_t *)inputValue)[i]; } return changeGetInfoStatusToCLResultType(info.set(std::min(WGS, maxSimdSize))); } case CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR: { for (size_t i = 0; i < numDimensions; i++) { WGS *= ((size_t *)inputValue)[i]; } return changeGetInfoStatusToCLResultType( info.set((WGS / maxSimdSize) + std::min(static_cast(1), WGS % maxSimdSize))); // add 1 if WGS % maxSimdSize != 0 } case CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT: { auto subGroupsNum = *(size_t *)inputValue; auto workGroupSize = subGroupsNum * largestCompiledSIMDSize; // return workgroup size in first dimension, the rest shall be 1 in positive case if (workGroupSize > maxRequiredWorkGroupSize) { workGroupSize = 0; } // If no work group size can accommodate the requested number of subgroups, return 0 in each element of the returned array. switch (numDimensions) { case 1: return changeGetInfoStatusToCLResultType(info.set(workGroupSize)); case 2: struct size_t2 { size_t val[2]; } workGroupSize2; workGroupSize2.val[0] = workGroupSize; workGroupSize2.val[1] = (workGroupSize > 0) ? 1 : 0; return changeGetInfoStatusToCLResultType(info.set(workGroupSize2)); case 3: default: struct size_t3 { size_t val[3]; } workGroupSize3; workGroupSize3.val[0] = workGroupSize; workGroupSize3.val[1] = (workGroupSize > 0) ? 1 : 0; workGroupSize3.val[2] = (workGroupSize > 0) ? 1 : 0; return changeGetInfoStatusToCLResultType(info.set(workGroupSize3)); } } case CL_KERNEL_MAX_NUM_SUB_GROUPS: { // round-up maximum number of subgroups return changeGetInfoStatusToCLResultType(info.set(Math::divideAndRoundUp(maxRequiredWorkGroupSize, largestCompiledSIMDSize))); } case CL_KERNEL_COMPILE_NUM_SUB_GROUPS: { return changeGetInfoStatusToCLResultType(info.set(static_cast(getKernelInfo().patchInfo.executionEnvironment->CompiledSubGroupsNumber))); } case CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL: { return changeGetInfoStatusToCLResultType(info.set(getKernelInfo().requiredSubGroupSize)); } default: return CL_INVALID_VALUE; } } const void *Kernel::getKernelHeap() const { return kernelInfo.heapInfo.pKernelHeap; } size_t Kernel::getKernelHeapSize() const { return kernelInfo.heapInfo.pKernelHeader->KernelHeapSize; } void Kernel::substituteKernelHeap(void *newKernelHeap, size_t newKernelHeapSize) { KernelInfo *pKernelInfo = const_cast(&kernelInfo); void **pKernelHeap = const_cast(&pKernelInfo->heapInfo.pKernelHeap); *pKernelHeap = newKernelHeap; SKernelBinaryHeaderCommon *pHeader = const_cast(pKernelInfo->heapInfo.pKernelHeader); pHeader->KernelHeapSize = static_cast(newKernelHeapSize); pKernelInfo->isKernelHeapSubstituted = true; auto memoryManager = device.getMemoryManager(); auto currentAllocationSize = pKernelInfo->kernelAllocation->getUnderlyingBufferSize(); bool status = false; if (currentAllocationSize >= newKernelHeapSize) { status = memoryManager->copyMemoryToAllocation(pKernelInfo->kernelAllocation, newKernelHeap, newKernelHeapSize); } else { memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(pKernelInfo->kernelAllocation); pKernelInfo->kernelAllocation = nullptr; status = pKernelInfo->createKernelAllocation(device.getRootDeviceIndex(), memoryManager); } UNRECOVERABLE_IF(!status); } bool Kernel::isKernelHeapSubstituted() const { return kernelInfo.isKernelHeapSubstituted; } uint64_t Kernel::getKernelId() const { return kernelInfo.kernelId; } void Kernel::setKernelId(uint64_t newKernelId) { KernelInfo *pKernelInfo = const_cast(&kernelInfo); pKernelInfo->kernelId = newKernelId; } uint32_t Kernel::getStartOffset() const { return this->startOffset; } void Kernel::setStartOffset(uint32_t offset) { this->startOffset = offset; } void *Kernel::getSurfaceStateHeap() const { return kernelInfo.usesSsh ? pSshLocal.get() : nullptr; } size_t Kernel::getDynamicStateHeapSize() const { return kernelInfo.heapInfo.pKernelHeader->DynamicStateHeapSize; } const void *Kernel::getDynamicStateHeap() const { return kernelInfo.heapInfo.pDsh; } size_t Kernel::getSurfaceStateHeapSize() const { return kernelInfo.usesSsh ? sshLocalSize : 0; } size_t Kernel::getNumberOfBindingTableStates() const { return numberOfBindingTableStates; } void Kernel::resizeSurfaceStateHeap(void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset) { pSshLocal.reset(static_cast(pNewSsh)); sshLocalSize = static_cast(newSshSize); numberOfBindingTableStates = newBindingTableCount; localBindingTableOffset = newBindingTableOffset; } uint32_t Kernel::getScratchSizeValueToProgramMediaVfeState(int scratchSize) { scratchSize >>= MemoryConstants::kiloByteShiftSize; uint32_t valueToProgram = 0; while (scratchSize >>= 1) { valueToProgram++; } return valueToProgram; } cl_int Kernel::setArg(uint32_t argIndex, size_t argSize, const void *argVal) { cl_int retVal = CL_SUCCESS; bool updateExposedKernel = true; auto argWasUncacheable = false; if (getKernelInfo().builtinDispatchBuilder != nullptr) { updateExposedKernel = getKernelInfo().builtinDispatchBuilder->setExplicitArg(argIndex, argSize, argVal, retVal); } if (updateExposedKernel) { if (argIndex >= kernelArgHandlers.size()) { return CL_INVALID_ARG_INDEX; } argWasUncacheable = kernelArguments[argIndex].isStatelessUncacheable; auto argHandler = kernelArgHandlers[argIndex]; retVal = (this->*argHandler)(argIndex, argSize, argVal); } if (retVal == CL_SUCCESS) { if (!kernelArguments[argIndex].isPatched) { patchedArgumentsNum++; kernelArguments[argIndex].isPatched = true; } auto argIsUncacheable = kernelArguments[argIndex].isStatelessUncacheable; statelessUncacheableArgsCount += (argIsUncacheable ? 1 : 0) - (argWasUncacheable ? 1 : 0); resolveArgs(); } return retVal; } cl_int Kernel::setArg(uint32_t argIndex, uint32_t argVal) { return setArg(argIndex, sizeof(argVal), &argVal); } cl_int Kernel::setArg(uint32_t argIndex, uint64_t argVal) { return setArg(argIndex, sizeof(argVal), &argVal); } cl_int Kernel::setArg(uint32_t argIndex, cl_mem argVal) { return setArg(argIndex, sizeof(argVal), &argVal); } cl_int Kernel::setArg(uint32_t argIndex, cl_mem argVal, uint32_t mipLevel) { return setArgImageWithMipLevel(argIndex, sizeof(argVal), &argVal, mipLevel); } void *Kernel::patchBufferOffset(const KernelArgInfo &argInfo, void *svmPtr, GraphicsAllocation *svmAlloc) { if (isInvalidOffset(argInfo.offsetBufferOffset)) { return svmPtr; } void *ptrToPatch = svmPtr; if (svmAlloc != nullptr) { ptrToPatch = reinterpret_cast(svmAlloc->getGpuAddressToPatch()); } constexpr uint32_t minimumAlignment = 4; ptrToPatch = alignDown(ptrToPatch, minimumAlignment); DEBUG_BREAK_IF(ptrDiff(svmPtr, ptrToPatch) != static_cast(ptrDiff(svmPtr, ptrToPatch))); uint32_t offsetToPatch = static_cast(ptrDiff(svmPtr, ptrToPatch)); patch(offsetToPatch, getCrossThreadData(), argInfo.offsetBufferOffset); return ptrToPatch; } cl_int Kernel::setArgSvm(uint32_t argIndex, size_t svmAllocSize, void *svmPtr, GraphicsAllocation *svmAlloc, cl_mem_flags svmFlags) { void *ptrToPatch = patchBufferOffset(kernelInfo.kernelArgInfo[argIndex], svmPtr, svmAlloc); setArgImmediate(argIndex, sizeof(void *), &svmPtr); storeKernelArg(argIndex, SVM_OBJ, nullptr, svmPtr, sizeof(void *), svmAlloc, svmFlags); if (requiresSshForBuffers()) { const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap); Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, svmAllocSize + ptrDiff(svmPtr, ptrToPatch), ptrToPatch, 0, svmAlloc, svmFlags, 0); } if (!kernelArguments[argIndex].isPatched) { patchedArgumentsNum++; kernelArguments[argIndex].isPatched = true; } addAllocationToCacheFlushVector(argIndex, svmAlloc); return CL_SUCCESS; } cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocation *svmAlloc) { DBG_LOG_INPUTS("setArgBuffer svm_alloc", svmAlloc); const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; storeKernelArg(argIndex, SVM_ALLOC_OBJ, svmAlloc, svmPtr, sizeof(uintptr_t)); void *ptrToPatch = patchBufferOffset(kernelArgInfo, svmPtr, svmAlloc); auto patchLocation = ptrOffset(getCrossThreadData(), kernelArgInfo.kernelArgPatchInfoVector[0].crossthreadOffset); auto patchSize = kernelArgInfo.kernelArgPatchInfoVector[0].size; patchWithRequiredSize(patchLocation, patchSize, reinterpret_cast(svmPtr)); if (requiresSshForBuffers()) { const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap); size_t allocSize = 0; size_t offset = 0; if (svmAlloc != nullptr) { allocSize = svmAlloc->getUnderlyingBufferSize(); offset = ptrDiff(ptrToPatch, svmAlloc->getGpuAddressToPatch()); allocSize -= offset; } Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, allocSize, ptrToPatch, offset, svmAlloc, 0, 0); } if (!kernelArguments[argIndex].isPatched) { patchedArgumentsNum++; kernelArguments[argIndex].isPatched = true; } addAllocationToCacheFlushVector(argIndex, svmAlloc); return CL_SUCCESS; } void Kernel::storeKernelArg(uint32_t argIndex, kernelArgType argType, void *argObject, const void *argValue, size_t argSize, GraphicsAllocation *argSvmAlloc, cl_mem_flags argSvmFlags) { kernelArguments[argIndex].type = argType; kernelArguments[argIndex].object = argObject; kernelArguments[argIndex].value = argValue; kernelArguments[argIndex].size = argSize; kernelArguments[argIndex].pSvmAlloc = argSvmAlloc; kernelArguments[argIndex].svmFlags = argSvmFlags; } const void *Kernel::getKernelArg(uint32_t argIndex) const { return kernelArguments[argIndex].object; } const Kernel::SimpleKernelArgInfo &Kernel::getKernelArgInfo(uint32_t argIndex) const { return kernelArguments[argIndex]; } void Kernel::setSvmKernelExecInfo(GraphicsAllocation *argValue) { kernelSvmGfxAllocations.push_back(argValue); if (allocationForCacheFlush(argValue)) { svmAllocationsRequireCacheFlush = true; } } void Kernel::clearSvmKernelExecInfo() { kernelSvmGfxAllocations.clear(); svmAllocationsRequireCacheFlush = false; } void Kernel::setUnifiedMemoryProperty(cl_kernel_exec_info infoType, bool infoValue) { if (infoType == CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL) { this->unifiedMemoryControls.indirectDeviceAllocationsAllowed = infoValue; return; } if (infoType == CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL) { this->unifiedMemoryControls.indirectHostAllocationsAllowed = infoValue; return; } if (infoType == CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL) { this->unifiedMemoryControls.indirectSharedAllocationsAllowed = infoValue; return; } } void Kernel::setUnifiedMemoryExecInfo(GraphicsAllocation *unifiedMemoryAllocation) { kernelUnifiedMemoryGfxAllocations.push_back(unifiedMemoryAllocation); } void Kernel::clearUnifiedMemoryExecInfo() { kernelUnifiedMemoryGfxAllocations.clear(); } cl_int Kernel::setKernelExecutionType(cl_execution_info_kernel_type_intel executionType) { switch (executionType) { case CL_KERNEL_EXEC_INFO_DEFAULT_TYPE_INTEL: this->executionType = KernelExecutionType::Default; break; case CL_KERNEL_EXEC_INFO_CONCURRENT_TYPE_INTEL: this->executionType = KernelExecutionType::Concurrent; break; default: { return CL_INVALID_VALUE; } } return CL_SUCCESS; } void Kernel::getSuggestedLocalWorkSize(const cl_uint workDim, const size_t *globalWorkSize, const size_t *globalWorkOffset, size_t *localWorkSize) { UNRECOVERABLE_IF((workDim == 0) || (workDim > 3)); UNRECOVERABLE_IF(globalWorkOffset == nullptr); UNRECOVERABLE_IF(globalWorkSize == nullptr); Vec3 elws{0, 0, 0}; Vec3 gws{ globalWorkSize[0], (workDim > 1) ? globalWorkSize[1] : 0, (workDim > 2) ? globalWorkSize[2] : 0}; Vec3 offset{ globalWorkOffset[0], (workDim > 1) ? globalWorkOffset[1] : 0, (workDim > 2) ? globalWorkOffset[2] : 0}; const DispatchInfo dispatchInfo{this, workDim, gws, elws, offset}; auto suggestedLws = computeWorkgroupSize(dispatchInfo); localWorkSize[0] = suggestedLws.x; if (workDim > 1) localWorkSize[1] = suggestedLws.y; if (workDim > 2) localWorkSize[2] = suggestedLws.z; } uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *localWorkSize) const { auto &hardwareInfo = getDevice().getHardwareInfo(); auto executionEnvironment = kernelInfo.patchInfo.executionEnvironment; auto dssCount = hardwareInfo.gtSystemInfo.DualSubSliceCount; if (dssCount == 0) { dssCount = hardwareInfo.gtSystemInfo.SubSliceCount; } auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); auto availableThreadCount = hwHelper.calculateAvailableThreadCount( hardwareInfo.platform.eProductFamily, ((executionEnvironment != nullptr) ? executionEnvironment->NumGRFRequired : GrfConfig::DefaultGrfNumber), hardwareInfo.gtSystemInfo.EUCount, hardwareInfo.gtSystemInfo.ThreadCount / hardwareInfo.gtSystemInfo.EUCount); auto hasBarriers = ((executionEnvironment != nullptr) ? executionEnvironment->HasBarriers : 0u); return KernelHelper::getMaxWorkGroupCount(kernelInfo.getMaxSimdSize(), availableThreadCount, dssCount, dssCount * KB * hardwareInfo.capabilityTable.slmSize, hwHelper.alignSlmSize(slmTotalSize), static_cast(hwHelper.getMaxBarrierRegisterPerSlice()), hwHelper.getBarriersCountFromHasBarriers(hasBarriers), workDim, localWorkSize); } inline void Kernel::makeArgsResident(CommandStreamReceiver &commandStreamReceiver) { auto numArgs = kernelInfo.kernelArgInfo.size(); for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) { if (kernelArguments[argIndex].object) { if (kernelArguments[argIndex].type == SVM_ALLOC_OBJ) { auto pSVMAlloc = (GraphicsAllocation *)kernelArguments[argIndex].object; auto pageFaultManager = device.getMemoryManager()->getPageFaultManager(); if (pageFaultManager && this->isUnifiedMemorySyncRequired) { pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast(pSVMAlloc->getGpuAddress())); } commandStreamReceiver.makeResident(*pSVMAlloc); } else if (Kernel::isMemObj(kernelArguments[argIndex].type)) { auto clMem = const_cast(static_cast(kernelArguments[argIndex].object)); auto memObj = castToObjectOrAbort(clMem); auto image = castToObject(clMem); if (image && image->isImageFromImage()) { commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore); } commandStreamReceiver.makeResident(*memObj->getGraphicsAllocation()); if (memObj->getMcsAllocation()) { commandStreamReceiver.makeResident(*memObj->getMcsAllocation()); } } } } } void Kernel::makeResident(CommandStreamReceiver &commandStreamReceiver) { if (privateSurface) { commandStreamReceiver.makeResident(*privateSurface); } if (program->getConstantSurface()) { commandStreamReceiver.makeResident(*(program->getConstantSurface())); } if (program->getGlobalSurface()) { commandStreamReceiver.makeResident(*(program->getGlobalSurface())); } if (program->getExportedFunctionsSurface()) { commandStreamReceiver.makeResident(*(program->getExportedFunctionsSurface())); } for (auto gfxAlloc : kernelSvmGfxAllocations) { commandStreamReceiver.makeResident(*gfxAlloc); } auto pageFaultManager = program->peekExecutionEnvironment().memoryManager->getPageFaultManager(); for (auto gfxAlloc : kernelUnifiedMemoryGfxAllocations) { commandStreamReceiver.makeResident(*gfxAlloc); if (pageFaultManager) { pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast(gfxAlloc->getGpuAddress())); } } if (unifiedMemoryControls.indirectSharedAllocationsAllowed && pageFaultManager) { pageFaultManager->moveAllocationsWithinUMAllocsManagerToGpuDomain(this->getContext().getSVMAllocsManager()); } makeArgsResident(commandStreamReceiver); auto kernelIsaAllocation = this->kernelInfo.kernelAllocation; if (kernelIsaAllocation) { commandStreamReceiver.makeResident(*kernelIsaAllocation); } gtpinNotifyMakeResident(this, &commandStreamReceiver); if (unifiedMemoryControls.indirectDeviceAllocationsAllowed || unifiedMemoryControls.indirectHostAllocationsAllowed || unifiedMemoryControls.indirectSharedAllocationsAllowed) { this->getContext().getSVMAllocsManager()->makeInternalAllocationsResident(commandStreamReceiver, unifiedMemoryControls.generateMask()); } } void Kernel::getResidency(std::vector &dst) { if (privateSurface) { GeneralSurface *surface = new GeneralSurface(privateSurface); dst.push_back(surface); } if (program->getConstantSurface()) { GeneralSurface *surface = new GeneralSurface(program->getConstantSurface()); dst.push_back(surface); } if (program->getGlobalSurface()) { GeneralSurface *surface = new GeneralSurface(program->getGlobalSurface()); dst.push_back(surface); } if (program->getExportedFunctionsSurface()) { GeneralSurface *surface = new GeneralSurface(program->getExportedFunctionsSurface()); dst.push_back(surface); } for (auto gfxAlloc : kernelSvmGfxAllocations) { GeneralSurface *surface = new GeneralSurface(gfxAlloc); dst.push_back(surface); } auto numArgs = kernelInfo.kernelArgInfo.size(); for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) { if (kernelArguments[argIndex].object) { if (kernelArguments[argIndex].type == SVM_ALLOC_OBJ) { auto pSVMAlloc = (GraphicsAllocation *)kernelArguments[argIndex].object; dst.push_back(new GeneralSurface(pSVMAlloc)); } else if (Kernel::isMemObj(kernelArguments[argIndex].type)) { auto clMem = const_cast(static_cast(kernelArguments[argIndex].object)); auto memObj = castToObject(clMem); DEBUG_BREAK_IF(memObj == nullptr); dst.push_back(new MemObjSurface(memObj)); } } } auto kernelIsaAllocation = this->kernelInfo.kernelAllocation; if (kernelIsaAllocation) { GeneralSurface *surface = new GeneralSurface(kernelIsaAllocation); dst.push_back(surface); } gtpinNotifyUpdateResidencyList(this, &dst); } bool Kernel::requiresCoherency() { auto numArgs = kernelInfo.kernelArgInfo.size(); for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) { if (kernelArguments[argIndex].object) { if (kernelArguments[argIndex].type == SVM_ALLOC_OBJ) { auto pSVMAlloc = (GraphicsAllocation *)kernelArguments[argIndex].object; if (pSVMAlloc->isCoherent()) { return true; } } if (Kernel::isMemObj(kernelArguments[argIndex].type)) { auto clMem = const_cast(static_cast(kernelArguments[argIndex].object)); auto memObj = castToObjectOrAbort(clMem); if (memObj->getGraphicsAllocation()->isCoherent()) { return true; } } } } return false; } cl_int Kernel::setArgLocal(uint32_t argIndex, size_t argSize, const void *argVal) { auto crossThreadData = reinterpret_cast(getCrossThreadData()); storeKernelArg(argIndex, SLM_OBJ, nullptr, argVal, argSize); slmSizes[argIndex] = argSize; // Extract our current slmOffset auto slmOffset = *ptrOffset(crossThreadData, kernelInfo.kernelArgInfo[argIndex].kernelArgPatchInfoVector[0].crossthreadOffset); // Add our size slmOffset += static_cast(argSize); // Update all slm offsets after this argIndex ++argIndex; while (argIndex < slmSizes.size()) { const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; auto slmAlignment = kernelArgInfo.slmAlignment; // If an local argument, alignment should be non-zero if (slmAlignment) { // Align to specified alignment slmOffset = alignUp(slmOffset, slmAlignment); // Patch our new offset into cross thread data auto patchLocation = ptrOffset(crossThreadData, kernelArgInfo.kernelArgPatchInfoVector[0].crossthreadOffset); *patchLocation = slmOffset; } slmOffset += static_cast(slmSizes[argIndex]); ++argIndex; } slmTotalSize = kernelInfo.workloadInfo.slmStaticSize + alignUp(slmOffset, KB); return CL_SUCCESS; } cl_int Kernel::setArgBuffer(uint32_t argIndex, size_t argSize, const void *argVal) { if (argSize != sizeof(cl_mem *)) return CL_INVALID_ARG_SIZE; const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; auto clMem = reinterpret_cast(argVal); patchBufferOffset(kernelArgInfo, nullptr, nullptr); if (clMem && *clMem) { auto clMemObj = *clMem; DBG_LOG_INPUTS("setArgBuffer cl_mem", clMemObj); storeKernelArg(argIndex, BUFFER_OBJ, clMemObj, argVal, argSize); auto buffer = castToObject(clMemObj); if (!buffer) return CL_INVALID_MEM_OBJECT; if (buffer->peekSharingHandler()) { usingSharedObjArgs = true; } auto patchLocation = ptrOffset(getCrossThreadData(), kernelArgInfo.kernelArgPatchInfoVector[0].crossthreadOffset); auto patchSize = kernelArgInfo.kernelArgPatchInfoVector[0].size; uint64_t addressToPatch = buffer->setArgStateless(patchLocation, patchSize, !this->isBuiltIn); if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { PatchInfoData patchInfoData(addressToPatch - buffer->getOffset(), static_cast(buffer->getOffset()), PatchInfoAllocationType::KernelArg, reinterpret_cast(getCrossThreadData()), static_cast(kernelArgInfo.kernelArgPatchInfoVector[0].crossthreadOffset), PatchInfoAllocationType::IndirectObjectHeap, patchSize); this->patchInfoDataList.push_back(patchInfoData); } bool disableL3 = false; bool forceNonAuxMode = false; bool isAuxTranslationKernel = (AuxTranslationDirection::None != auxTranslationDirection); if (isAuxTranslationKernel) { if (((AuxTranslationDirection::AuxToNonAux == auxTranslationDirection) && argIndex == 1) || ((AuxTranslationDirection::NonAuxToAux == auxTranslationDirection) && argIndex == 0)) { forceNonAuxMode = true; } disableL3 = (argIndex == 0); } else if (buffer->getGraphicsAllocation()->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED && !kernelArgInfo.pureStatefulBufferAccess) { forceNonAuxMode = true; } if (requiresSshForBuffers()) { auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap); buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, isAuxTranslationKernel, kernelArgInfo.isReadOnly); } kernelArguments[argIndex].isStatelessUncacheable = kernelArgInfo.pureStatefulBufferAccess ? false : buffer->isMemObjUncacheable(); auto allocationForCacheFlush = buffer->getGraphicsAllocation(); //if we make object uncacheable for surface state and there are not stateless accessess , then ther is no need to flush caches if (buffer->isMemObjUncacheableForSurfaceState() && kernelArgInfo.pureStatefulBufferAccess) { allocationForCacheFlush = nullptr; } addAllocationToCacheFlushVector(argIndex, allocationForCacheFlush); return CL_SUCCESS; } else { auto patchLocation = ptrOffset(getCrossThreadData(), kernelArgInfo.kernelArgPatchInfoVector[0].crossthreadOffset); patchWithRequiredSize(patchLocation, kernelArgInfo.kernelArgPatchInfoVector[0].size, 0u); storeKernelArg(argIndex, BUFFER_OBJ, nullptr, argVal, argSize); if (requiresSshForBuffers()) { auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap); Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, 0, nullptr, 0, nullptr, 0, 0); } return CL_SUCCESS; } } cl_int Kernel::setArgPipe(uint32_t argIndex, size_t argSize, const void *argVal) { if (argSize != sizeof(cl_mem *)) { return CL_INVALID_ARG_SIZE; } const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; auto clMem = reinterpret_cast(argVal); if (clMem && *clMem) { auto clMemObj = *clMem; DBG_LOG_INPUTS("setArgPipe cl_mem", clMemObj); storeKernelArg(argIndex, PIPE_OBJ, clMemObj, argVal, argSize); auto memObj = castToObject(clMemObj); if (!memObj) { return CL_INVALID_MEM_OBJECT; } auto pipe = castToObject(clMemObj); if (!pipe) { return CL_INVALID_ARG_VALUE; } if (memObj->getContext() != &(this->getContext())) { return CL_INVALID_MEM_OBJECT; } auto patchLocation = ptrOffset(getCrossThreadData(), kernelArgInfo.kernelArgPatchInfoVector[0].crossthreadOffset); auto patchSize = kernelArgInfo.kernelArgPatchInfoVector[0].size; pipe->setPipeArg(patchLocation, patchSize); if (requiresSshForBuffers()) { auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap); Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, pipe->getSize(), pipe->getCpuAddress(), 0, pipe->getGraphicsAllocation(), 0, 0); } return CL_SUCCESS; } else { return CL_INVALID_MEM_OBJECT; } } cl_int Kernel::setArgImage(uint32_t argIndex, size_t argSize, const void *argVal) { return setArgImageWithMipLevel(argIndex, argSize, argVal, 0u); } cl_int Kernel::setArgImageWithMipLevel(uint32_t argIndex, size_t argSize, const void *argVal, uint32_t mipLevel) { auto retVal = CL_INVALID_ARG_VALUE; patchBufferOffset(kernelInfo.kernelArgInfo[argIndex], nullptr, nullptr); auto clMemObj = *(static_cast(argVal)); auto pImage = castToObject(clMemObj); if (pImage && argSize == sizeof(cl_mem *)) { if (pImage->peekSharingHandler()) { usingSharedObjArgs = true; } const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; DBG_LOG_INPUTS("setArgImage cl_mem", clMemObj); storeKernelArg(argIndex, IMAGE_OBJ, clMemObj, argVal, argSize); auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap); DEBUG_BREAK_IF(!kernelArgInfo.isImage); // Sets SS structure if (kernelArgInfo.isMediaImage) { DEBUG_BREAK_IF(!kernelInfo.isVmeWorkload); pImage->setMediaImageArg(surfaceState); } else { pImage->setImageArg(surfaceState, kernelArgInfo.isMediaBlockImage, mipLevel); } auto crossThreadData = reinterpret_cast(getCrossThreadData()); auto &imageDesc = pImage->getImageDesc(); auto &imageFormat = pImage->getImageFormat(); if (imageDesc.image_type == CL_MEM_OBJECT_IMAGE3D) { imageTransformer->registerImage3d(argIndex); } patch(imageDesc.image_width, crossThreadData, kernelArgInfo.offsetImgWidth); patch(imageDesc.image_height, crossThreadData, kernelArgInfo.offsetImgHeight); patch(imageDesc.image_depth, crossThreadData, kernelArgInfo.offsetImgDepth); patch(imageDesc.num_samples, crossThreadData, kernelArgInfo.offsetNumSamples); patch(imageDesc.image_array_size, crossThreadData, kernelArgInfo.offsetArraySize); patch(imageFormat.image_channel_data_type, crossThreadData, kernelArgInfo.offsetChannelDataType); patch(imageFormat.image_channel_order, crossThreadData, kernelArgInfo.offsetChannelOrder); patch(kernelArgInfo.offsetHeap, crossThreadData, kernelArgInfo.offsetObjectId); patch(imageDesc.num_mip_levels, crossThreadData, kernelArgInfo.offsetNumMipLevels); auto pixelSize = pImage->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes; patch(pImage->getGraphicsAllocation()->getGpuAddress(), crossThreadData, kernelArgInfo.offsetFlatBaseOffset); patch((imageDesc.image_width * pixelSize) - 1, crossThreadData, kernelArgInfo.offsetFlatWidth); patch((imageDesc.image_height * pixelSize) - 1, crossThreadData, kernelArgInfo.offsetFlatHeight); patch(imageDesc.image_row_pitch - 1, crossThreadData, kernelArgInfo.offsetFlatPitch); addAllocationToCacheFlushVector(argIndex, pImage->getGraphicsAllocation()); retVal = CL_SUCCESS; } return retVal; } cl_int Kernel::setArgImmediate(uint32_t argIndex, size_t argSize, const void *argVal) { auto retVal = CL_INVALID_ARG_VALUE; if (argVal) { const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; DEBUG_BREAK_IF(kernelArgInfo.kernelArgPatchInfoVector.size() <= 0); storeKernelArg(argIndex, NONE_OBJ, nullptr, nullptr, argSize); auto crossThreadData = getCrossThreadData(); auto crossThreadDataEnd = ptrOffset(crossThreadData, getCrossThreadDataSize()); for (const auto &kernelArgPatchInfo : kernelArgInfo.kernelArgPatchInfoVector) { DEBUG_BREAK_IF(kernelArgPatchInfo.size <= 0); auto pDst = ptrOffset(crossThreadData, kernelArgPatchInfo.crossthreadOffset); auto pSrc = ptrOffset(argVal, kernelArgPatchInfo.sourceOffset); DEBUG_BREAK_IF(!(ptrOffset(pDst, kernelArgPatchInfo.size) <= crossThreadDataEnd)); UNUSED_VARIABLE(crossThreadDataEnd); if (kernelArgPatchInfo.sourceOffset < argSize) { size_t maxBytesToCopy = argSize - kernelArgPatchInfo.sourceOffset; size_t bytesToCopy = std::min(static_cast(kernelArgPatchInfo.size), maxBytesToCopy); memcpy_s(pDst, kernelArgPatchInfo.size, pSrc, bytesToCopy); } } retVal = CL_SUCCESS; } return retVal; } cl_int Kernel::setArgSampler(uint32_t argIndex, size_t argSize, const void *argVal) { auto retVal = CL_INVALID_SAMPLER; if (!argVal) { return retVal; } auto clSamplerObj = *(static_cast(argVal)); auto pSampler = castToObject(clSamplerObj); if (pSampler) { pSampler->incRefInternal(); } if (kernelArguments.at(argIndex).object) { auto oldSampler = castToObject(kernelArguments.at(argIndex).object); UNRECOVERABLE_IF(!oldSampler); oldSampler->decRefInternal(); } if (pSampler && argSize == sizeof(cl_sampler *)) { const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; storeKernelArg(argIndex, SAMPLER_OBJ, clSamplerObj, argVal, argSize); auto dsh = getDynamicStateHeap(); auto samplerState = ptrOffset(dsh, kernelArgInfo.offsetHeap); pSampler->setArg(const_cast(samplerState)); auto crossThreadData = reinterpret_cast(getCrossThreadData()); patch(pSampler->getSnapWaValue(), crossThreadData, kernelArgInfo.offsetSamplerSnapWa); patch(GetAddrModeEnum(pSampler->addressingMode), crossThreadData, kernelArgInfo.offsetSamplerAddressingMode); patch(GetNormCoordsEnum(pSampler->normalizedCoordinates), crossThreadData, kernelArgInfo.offsetSamplerNormalizedCoords); patch(SAMPLER_OBJECT_ID_SHIFT + kernelArgInfo.offsetHeap, crossThreadData, kernelArgInfo.offsetObjectId); retVal = CL_SUCCESS; } return retVal; } cl_int Kernel::setArgAccelerator(uint32_t argIndex, size_t argSize, const void *argVal) { auto retVal = CL_INVALID_ARG_VALUE; if (argSize != sizeof(cl_accelerator_intel)) { return CL_INVALID_ARG_SIZE; } if (!argVal) { return retVal; } auto clAcceleratorObj = *(static_cast(argVal)); DBG_LOG_INPUTS("setArgAccelerator cl_mem", clAcceleratorObj); const auto pAccelerator = castToObject(clAcceleratorObj); if (pAccelerator) { storeKernelArg(argIndex, ACCELERATOR_OBJ, clAcceleratorObj, argVal, argSize); const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; if (kernelArgInfo.samplerArgumentType == iOpenCL::SAMPLER_OBJECT_VME) { auto crossThreadData = getCrossThreadData(); const auto pVmeAccelerator = castToObjectOrAbort(pAccelerator); auto pDesc = static_cast(pVmeAccelerator->getDescriptor()); DEBUG_BREAK_IF(!pDesc); auto pVmeMbBlockTypeDst = reinterpret_cast(ptrOffset(crossThreadData, kernelArgInfo.offsetVmeMbBlockType)); *pVmeMbBlockTypeDst = pDesc->mb_block_type; auto pVmeSubpixelMode = reinterpret_cast(ptrOffset(crossThreadData, kernelArgInfo.offsetVmeSubpixelMode)); *pVmeSubpixelMode = pDesc->subpixel_mode; auto pVmeSadAdjustMode = reinterpret_cast(ptrOffset(crossThreadData, kernelArgInfo.offsetVmeSadAdjustMode)); *pVmeSadAdjustMode = pDesc->sad_adjust_mode; auto pVmeSearchPathType = reinterpret_cast(ptrOffset(crossThreadData, kernelArgInfo.offsetVmeSearchPathType)); *pVmeSearchPathType = pDesc->search_path_type; retVal = CL_SUCCESS; } else if (kernelArgInfo.samplerArgumentType == iOpenCL::SAMPLER_OBJECT_VE) { retVal = CL_SUCCESS; } } return retVal; } cl_int Kernel::setArgDevQueue(uint32_t argIndex, size_t argSize, const void *argVal) { if (argVal == nullptr) { return CL_INVALID_ARG_VALUE; } if (argSize != sizeof(cl_command_queue)) { return CL_INVALID_ARG_SIZE; } auto clDeviceQueue = *(static_cast(argVal)); auto pDeviceQueue = castToObject(clDeviceQueue); if (pDeviceQueue == nullptr) { return CL_INVALID_DEVICE_QUEUE; } storeKernelArg(argIndex, DEVICE_QUEUE_OBJ, clDeviceQueue, argVal, argSize); const auto &kernelArgPatchInfo = kernelInfo.kernelArgInfo[argIndex].kernelArgPatchInfoVector[0]; auto patchLocation = ptrOffset(reinterpret_cast(getCrossThreadData()), kernelArgPatchInfo.crossthreadOffset); patchWithRequiredSize(patchLocation, kernelArgPatchInfo.size, static_cast(pDeviceQueue->getQueueBuffer()->getGpuAddressToPatch())); return CL_SUCCESS; } void Kernel::setKernelArgHandler(uint32_t argIndex, KernelArgHandler handler) { if (kernelArgHandlers.size() <= argIndex) { kernelArgHandlers.resize(argIndex + 1); } kernelArgHandlers[argIndex] = handler; } void Kernel::unsetArg(uint32_t argIndex) { if (kernelArguments[argIndex].isPatched) { patchedArgumentsNum--; kernelArguments[argIndex].isPatched = false; if (kernelArguments[argIndex].isStatelessUncacheable) { statelessUncacheableArgsCount--; kernelArguments[argIndex].isStatelessUncacheable = false; } } } void Kernel::createReflectionSurface() { if (this->isParentKernel && kernelReflectionSurface == nullptr) { auto &hwInfo = device.getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); BlockKernelManager *blockManager = program->getBlockKernelManager(); uint32_t blockCount = static_cast(blockManager->getCount()); ObjectCounts objectCount; getParentObjectCounts(objectCount); uint32_t parentImageCount = objectCount.imageCount; uint32_t parentSamplerCount = objectCount.samplerCount; size_t maxConstantBufferSize = 0; std::vector *curbeParamsForBlocks = new std::vector[blockCount]; uint64_t *tokenMask = new uint64_t[blockCount]; uint32_t *sshTokenOffsetsFromKernelData = new uint32_t[blockCount]; size_t kernelReflectionSize = alignUp(sizeof(IGIL_KernelDataHeader) + blockCount * sizeof(IGIL_KernelAddressData), sizeof(void *)); uint32_t kernelDataOffset = static_cast(kernelReflectionSize); uint32_t parentSSHAlignedSize = alignUp(this->kernelInfo.heapInfo.pKernelHeader->SurfaceStateHeapSize, hwHelper.getBindingTableStateAlignement()); uint32_t btOffset = parentSSHAlignedSize; for (uint32_t i = 0; i < blockCount; i++) { const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); size_t samplerStateAndBorderColorSize = 0; uint32_t firstSSHTokenIndex = 0; ReflectionSurfaceHelper::getCurbeParams(curbeParamsForBlocks[i], tokenMask[i], firstSSHTokenIndex, *pBlockInfo, hwInfo); maxConstantBufferSize = std::max(maxConstantBufferSize, static_cast(pBlockInfo->patchInfo.dataParameterStream->DataParameterStreamSize)); samplerStateAndBorderColorSize = pBlockInfo->getSamplerStateArraySize(hwInfo); samplerStateAndBorderColorSize = alignUp(samplerStateAndBorderColorSize, Sampler::samplerStateArrayAlignment); samplerStateAndBorderColorSize += pBlockInfo->getBorderColorStateSize(); samplerStateAndBorderColorSize = alignUp(samplerStateAndBorderColorSize, sizeof(void *)); sshTokenOffsetsFromKernelData[i] = offsetof(IGIL_KernelData, m_data) + sizeof(IGIL_KernelCurbeParams) * firstSSHTokenIndex; kernelReflectionSize += alignUp(sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams) * curbeParamsForBlocks[i].size(), sizeof(void *)); kernelReflectionSize += parentSamplerCount * sizeof(IGIL_SamplerParams) + samplerStateAndBorderColorSize; } maxConstantBufferSize = alignUp(maxConstantBufferSize, sizeof(void *)); kernelReflectionSize += blockCount * alignUp(maxConstantBufferSize, sizeof(void *)); kernelReflectionSize += parentImageCount * sizeof(IGIL_ImageParamters); kernelReflectionSize += parentSamplerCount * sizeof(IGIL_ParentSamplerParams); kernelReflectionSurface = device.getMemoryManager()->allocateGraphicsMemoryWithProperties({device.getRootDeviceIndex(), kernelReflectionSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER}); for (uint32_t i = 0; i < blockCount; i++) { const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); uint32_t newKernelDataOffset = ReflectionSurfaceHelper::setKernelData(kernelReflectionSurface->getUnderlyingBuffer(), kernelDataOffset, curbeParamsForBlocks[i], tokenMask[i], maxConstantBufferSize, parentSamplerCount, *pBlockInfo, device.getHardwareInfo()); uint32_t offset = static_cast(offsetof(IGIL_KernelDataHeader, m_data) + sizeof(IGIL_KernelAddressData) * i); uint32_t samplerHeapOffset = static_cast(alignUp(kernelDataOffset + sizeof(IGIL_KernelData) + curbeParamsForBlocks[i].size() * sizeof(IGIL_KernelCurbeParams), sizeof(void *))); uint32_t samplerHeapSize = static_cast(alignUp(pBlockInfo->getSamplerStateArraySize(device.getHardwareInfo()), Sampler::samplerStateArrayAlignment) + pBlockInfo->getBorderColorStateSize()); uint32_t constantBufferOffset = alignUp(samplerHeapOffset + samplerHeapSize, sizeof(void *)); uint32_t samplerParamsOffset = 0; if (parentSamplerCount) { samplerParamsOffset = newKernelDataOffset - sizeof(IGIL_SamplerParams) * parentSamplerCount; IGIL_SamplerParams *pSamplerParams = (IGIL_SamplerParams *)ptrOffset(kernelReflectionSurface->getUnderlyingBuffer(), samplerParamsOffset); uint32_t sampler = 0; for (uint32_t argID = 0; argID < pBlockInfo->kernelArgInfo.size(); argID++) { if (pBlockInfo->kernelArgInfo[argID].isSampler) { pSamplerParams[sampler].m_ArgID = argID; pSamplerParams[sampler].m_SamplerStateOffset = pBlockInfo->kernelArgInfo[argID].offsetHeap; sampler++; } } } ReflectionSurfaceHelper::setKernelAddressData(kernelReflectionSurface->getUnderlyingBuffer(), offset, kernelDataOffset, samplerHeapOffset, constantBufferOffset, samplerParamsOffset, sshTokenOffsetsFromKernelData[i] + kernelDataOffset, btOffset, *pBlockInfo, device.getHardwareInfo()); if (samplerHeapSize > 0) { void *pDst = ptrOffset(kernelReflectionSurface->getUnderlyingBuffer(), samplerHeapOffset); const void *pSrc = ptrOffset(pBlockInfo->heapInfo.pDsh, pBlockInfo->getBorderColorOffset()); memcpy_s(pDst, samplerHeapSize, pSrc, samplerHeapSize); } void *pDst = ptrOffset(kernelReflectionSurface->getUnderlyingBuffer(), constantBufferOffset); const char *pSrc = pBlockInfo->crossThreadData; memcpy_s(pDst, pBlockInfo->getConstantBufferSize(), pSrc, pBlockInfo->getConstantBufferSize()); btOffset += pBlockInfo->patchInfo.bindingTableState->Offset; kernelDataOffset = newKernelDataOffset; } uint32_t samplerOffset = 0; if (parentSamplerCount) { samplerOffset = kernelDataOffset + parentImageCount * sizeof(IGIL_ImageParamters); } ReflectionSurfaceHelper::setKernelDataHeader(kernelReflectionSurface->getUnderlyingBuffer(), blockCount, parentImageCount, parentSamplerCount, kernelDataOffset, samplerOffset); delete[] curbeParamsForBlocks; delete[] tokenMask; delete[] sshTokenOffsetsFromKernelData; // Patch constant values once after reflection surface creation patchBlocksCurbeWithConstantValues(); } if (DebugManager.flags.ForceDispatchScheduler.get()) { if (this->isSchedulerKernel && kernelReflectionSurface == nullptr) { kernelReflectionSurface = device.getMemoryManager()->allocateGraphicsMemoryWithProperties({device.getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER}); } } } void Kernel::getParentObjectCounts(ObjectCounts &objectCount) { objectCount.imageCount = 0; objectCount.samplerCount = 0; DEBUG_BREAK_IF(!isParentKernel); for (const auto &arg : this->kernelArguments) { if (arg.type == SAMPLER_OBJ) { objectCount.samplerCount++; } else if (arg.type == IMAGE_OBJ) { objectCount.imageCount++; } } } bool Kernel::hasPrintfOutput() const { return getKernelInfo().patchInfo.pAllocateStatelessPrintfSurface != nullptr; } size_t Kernel::getInstructionHeapSizeForExecutionModel() const { BlockKernelManager *blockManager = program->getBlockKernelManager(); uint32_t blockCount = static_cast(blockManager->getCount()); size_t totalSize = 0; if (isParentKernel) { totalSize = kernelBinaryAlignement - 1; // for initial alignment for (uint32_t i = 0; i < blockCount; i++) { const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); totalSize += pBlockInfo->heapInfo.pKernelHeader->KernelHeapSize; totalSize = alignUp(totalSize, kernelBinaryAlignement); } } return totalSize; } void Kernel::patchBlocksCurbeWithConstantValues() { BlockKernelManager *blockManager = program->getBlockKernelManager(); uint32_t blockCount = static_cast(blockManager->getCount()); uint64_t globalMemoryGpuAddress = program->getGlobalSurface() != nullptr ? program->getGlobalSurface()->getGpuAddressToPatch() : 0; uint64_t constantMemoryGpuAddress = program->getConstantSurface() != nullptr ? program->getConstantSurface()->getGpuAddressToPatch() : 0; for (uint32_t blockID = 0; blockID < blockCount; blockID++) { const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(blockID); uint64_t globalMemoryCurbeOffset = ReflectionSurfaceHelper::undefinedOffset; uint32_t globalMemoryPatchSize = 0; uint64_t constantMemoryCurbeOffset = ReflectionSurfaceHelper::undefinedOffset; uint32_t constantMemoryPatchSize = 0; if (pBlockInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization) { globalMemoryCurbeOffset = pBlockInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->DataParamOffset; globalMemoryPatchSize = pBlockInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->DataParamSize; } if (pBlockInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization) { constantMemoryCurbeOffset = pBlockInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->DataParamOffset; constantMemoryPatchSize = pBlockInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->DataParamSize; } ReflectionSurfaceHelper::patchBlocksCurbeWithConstantValues(kernelReflectionSurface->getUnderlyingBuffer(), blockID, globalMemoryCurbeOffset, globalMemoryPatchSize, globalMemoryGpuAddress, constantMemoryCurbeOffset, constantMemoryPatchSize, constantMemoryGpuAddress, ReflectionSurfaceHelper::undefinedOffset, 0, 0); } } void Kernel::ReflectionSurfaceHelper::getCurbeParams(std::vector &curbeParamsOut, uint64_t &tokenMaskOut, uint32_t &firstSSHTokenIndex, const KernelInfo &kernelInfo, const HardwareInfo &hwInfo) { size_t numArgs = kernelInfo.kernelArgInfo.size(); size_t patchTokenCount = +kernelInfo.kernelNonArgInfo.size(); uint64_t tokenMask = 0; tokenMaskOut = 0; firstSSHTokenIndex = 0; curbeParamsOut.reserve(patchTokenCount * 5); uint32_t bindingTableIndex = 253; for (uint32_t argNumber = 0; argNumber < numArgs; argNumber++) { IGIL_KernelCurbeParams curbeParam; bindingTableIndex = 253; auto sizeOfkernelArgForSSH = kernelInfo.gpuPointerSize; if (kernelInfo.kernelArgInfo[argNumber].isBuffer) { curbeParam.m_patchOffset = kernelInfo.kernelArgInfo[argNumber].kernelArgPatchInfoVector[0].crossthreadOffset; curbeParam.m_parameterSize = kernelInfo.gpuPointerSize; curbeParam.m_parameterType = COMPILER_DATA_PARAMETER_GLOBAL_SURFACE; curbeParam.m_sourceOffset = argNumber; curbeParamsOut.push_back(curbeParam); tokenMask |= shiftLeftBy(63); } else if (kernelInfo.kernelArgInfo[argNumber].isImage) { if (isValidOffset(kernelInfo.kernelArgInfo[argNumber].offsetImgWidth)) { curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_IMAGE_WIDTH + 50, sizeof(uint32_t), kernelInfo.kernelArgInfo[argNumber].offsetImgWidth, argNumber}); } if (isValidOffset(kernelInfo.kernelArgInfo[argNumber].offsetImgHeight)) { curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_IMAGE_HEIGHT + 50, sizeof(uint32_t), kernelInfo.kernelArgInfo[argNumber].offsetImgHeight, argNumber}); } if (isValidOffset(kernelInfo.kernelArgInfo[argNumber].offsetImgDepth)) { curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_IMAGE_DEPTH + 50, sizeof(uint32_t), kernelInfo.kernelArgInfo[argNumber].offsetImgDepth, argNumber}); } if (isValidOffset(kernelInfo.kernelArgInfo[argNumber].offsetChannelDataType)) { curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_IMAGE_CHANNEL_DATA_TYPE + 50, sizeof(uint32_t), kernelInfo.kernelArgInfo[argNumber].offsetChannelDataType, argNumber}); } if (isValidOffset(kernelInfo.kernelArgInfo[argNumber].offsetChannelOrder)) { curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_IMAGE_CHANNEL_ORDER + 50, sizeof(uint32_t), kernelInfo.kernelArgInfo[argNumber].offsetChannelOrder, argNumber}); } if (isValidOffset(kernelInfo.kernelArgInfo[argNumber].offsetArraySize)) { curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_IMAGE_ARRAY_SIZE + 50, sizeof(uint32_t), kernelInfo.kernelArgInfo[argNumber].offsetArraySize, argNumber}); } if (isValidOffset(kernelInfo.kernelArgInfo[argNumber].offsetObjectId)) { curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_OBJECT_ID + 50, sizeof(uint32_t), kernelInfo.kernelArgInfo[argNumber].offsetObjectId, argNumber}); } tokenMask |= shiftLeftBy(50); if (kernelInfo.patchInfo.bindingTableState) { auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); const void *ssh = static_cast(kernelInfo.heapInfo.pSsh) + kernelInfo.patchInfo.bindingTableState->Offset; for (uint32_t i = 0; i < kernelInfo.patchInfo.bindingTableState->Count; i++) { uint32_t pointer = hwHelper.getBindingTableStateSurfaceStatePointer(ssh, i); if (pointer == kernelInfo.kernelArgInfo[argNumber].offsetHeap) { bindingTableIndex = i; break; } } DEBUG_BREAK_IF(!((bindingTableIndex != 253) || (kernelInfo.patchInfo.bindingTableState->Count == 0))); } } else if (kernelInfo.kernelArgInfo[argNumber].isSampler) { if (isValidOffset(kernelInfo.kernelArgInfo[argNumber].offsetSamplerSnapWa)) { curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_SAMPLER_COORDINATE_SNAP_WA_REQUIRED + 100, sizeof(uint32_t), kernelInfo.kernelArgInfo[argNumber].offsetSamplerSnapWa, argNumber}); } if (isValidOffset(kernelInfo.kernelArgInfo[argNumber].offsetSamplerAddressingMode)) { curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_SAMPLER_ADDRESS_MODE + 100, sizeof(uint32_t), kernelInfo.kernelArgInfo[argNumber].offsetSamplerAddressingMode, argNumber}); } if (isValidOffset(kernelInfo.kernelArgInfo[argNumber].offsetSamplerNormalizedCoords)) { curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_SAMPLER_NORMALIZED_COORDS + 100, sizeof(uint32_t), kernelInfo.kernelArgInfo[argNumber].offsetSamplerNormalizedCoords, argNumber}); } if (isValidOffset(kernelInfo.kernelArgInfo[argNumber].offsetObjectId)) { curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_OBJECT_ID + 100, sizeof(uint32_t), kernelInfo.kernelArgInfo[argNumber].offsetObjectId, argNumber}); } tokenMask |= shiftLeftBy(51); } else { bindingTableIndex = 0; sizeOfkernelArgForSSH = 0; } curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{1024, sizeOfkernelArgForSSH, bindingTableIndex, argNumber}); if (kernelInfo.kernelArgInfo[argNumber].slmAlignment != 0) { DEBUG_BREAK_IF(kernelInfo.kernelArgInfo[argNumber].kernelArgPatchInfoVector.size() != 1); uint32_t offset = kernelInfo.kernelArgInfo[argNumber].kernelArgPatchInfoVector[0].crossthreadOffset; uint32_t srcOffset = kernelInfo.kernelArgInfo[argNumber].slmAlignment; curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES, 0, offset, srcOffset}); tokenMask |= shiftLeftBy(DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES); } } for (auto param : kernelInfo.patchInfo.dataParameterBuffersKernelArgs) { curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_KERNEL_ARGUMENT, param->DataSize, param->Offset, param->ArgumentNumber}); tokenMask |= shiftLeftBy(DATA_PARAMETER_KERNEL_ARGUMENT); } for (uint32_t i = 0; i < 3; i++) { const uint32_t sizeOfParam = 4; if (kernelInfo.workloadInfo.enqueuedLocalWorkSizeOffsets[i] != WorkloadInfo::undefinedOffset) { curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE, sizeOfParam, kernelInfo.workloadInfo.enqueuedLocalWorkSizeOffsets[i], i * sizeOfParam}); tokenMask |= shiftLeftBy(DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE); } if (kernelInfo.workloadInfo.globalWorkOffsetOffsets[i] != WorkloadInfo::undefinedOffset) { curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_GLOBAL_WORK_OFFSET, sizeOfParam, kernelInfo.workloadInfo.globalWorkOffsetOffsets[i], i * sizeOfParam}); tokenMask |= shiftLeftBy(DATA_PARAMETER_GLOBAL_WORK_OFFSET); } if (kernelInfo.workloadInfo.globalWorkSizeOffsets[i] != WorkloadInfo::undefinedOffset) { curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_GLOBAL_WORK_SIZE, sizeOfParam, kernelInfo.workloadInfo.globalWorkSizeOffsets[i], i * sizeOfParam}); tokenMask |= shiftLeftBy(DATA_PARAMETER_GLOBAL_WORK_SIZE); } if (kernelInfo.workloadInfo.localWorkSizeOffsets[i] != WorkloadInfo::undefinedOffset) { curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_LOCAL_WORK_SIZE, sizeOfParam, kernelInfo.workloadInfo.localWorkSizeOffsets[i], i * sizeOfParam}); tokenMask |= shiftLeftBy(DATA_PARAMETER_LOCAL_WORK_SIZE); } if (kernelInfo.workloadInfo.localWorkSizeOffsets2[i] != WorkloadInfo::undefinedOffset) { curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_LOCAL_WORK_SIZE, sizeOfParam, kernelInfo.workloadInfo.localWorkSizeOffsets2[i], i * sizeOfParam}); tokenMask |= shiftLeftBy(DATA_PARAMETER_LOCAL_WORK_SIZE); } if (kernelInfo.workloadInfo.numWorkGroupsOffset[i] != WorkloadInfo::undefinedOffset) { curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_NUM_WORK_GROUPS, sizeOfParam, kernelInfo.workloadInfo.numWorkGroupsOffset[i], i * sizeOfParam}); tokenMask |= shiftLeftBy(DATA_PARAMETER_NUM_WORK_GROUPS); } } if (kernelInfo.workloadInfo.parentEventOffset != WorkloadInfo::undefinedOffset) { curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_PARENT_EVENT, sizeof(uint32_t), kernelInfo.workloadInfo.parentEventOffset, 0}); tokenMask |= shiftLeftBy(DATA_PARAMETER_PARENT_EVENT); } if (kernelInfo.workloadInfo.workDimOffset != WorkloadInfo::undefinedOffset) { curbeParamsOut.emplace_back(IGIL_KernelCurbeParams{DATA_PARAMETER_WORK_DIMENSIONS, sizeof(uint32_t), kernelInfo.workloadInfo.workDimOffset, 0}); tokenMask |= shiftLeftBy(DATA_PARAMETER_WORK_DIMENSIONS); } std::sort(curbeParamsOut.begin(), curbeParamsOut.end(), compareFunction); tokenMaskOut = tokenMask; firstSSHTokenIndex = static_cast(curbeParamsOut.size() - numArgs); } uint32_t Kernel::ReflectionSurfaceHelper::setKernelData(void *reflectionSurface, uint32_t offset, std::vector &curbeParamsIn, uint64_t tokenMaskIn, size_t maxConstantBufferSize, size_t samplerCount, const KernelInfo &kernelInfo, const HardwareInfo &hwInfo) { uint32_t offsetToEnd = 0; IGIL_KernelData *kernelData = reinterpret_cast(ptrOffset(reflectionSurface, offset)); size_t samplerHeapSize = alignUp(kernelInfo.getSamplerStateArraySize(hwInfo), Sampler::samplerStateArrayAlignment) + kernelInfo.getBorderColorStateSize(); kernelData->m_numberOfCurbeParams = static_cast(curbeParamsIn.size()); // number of paramters to patch kernelData->m_numberOfCurbeTokens = static_cast(curbeParamsIn.size() - kernelInfo.kernelArgInfo.size()); kernelData->m_numberOfSamplerStates = static_cast(kernelInfo.getSamplerStateArrayCount()); kernelData->m_SizeOfSamplerHeap = static_cast(samplerHeapSize); kernelData->m_SamplerBorderColorStateOffsetOnDSH = kernelInfo.patchInfo.samplerStateArray ? kernelInfo.patchInfo.samplerStateArray->BorderColorOffset : 0; kernelData->m_SamplerStateArrayOffsetOnDSH = kernelInfo.patchInfo.samplerStateArray ? kernelInfo.patchInfo.samplerStateArray->Offset : (uint32_t)-1; kernelData->m_sizeOfConstantBuffer = kernelInfo.getConstantBufferSize(); kernelData->m_PatchTokensMask = tokenMaskIn; kernelData->m_ScratchSpacePatchValue = 0; kernelData->m_SIMDSize = kernelInfo.patchInfo.executionEnvironment ? kernelInfo.patchInfo.executionEnvironment->LargestCompiledSIMDSize : 0; kernelData->m_HasBarriers = kernelInfo.patchInfo.executionEnvironment ? kernelInfo.patchInfo.executionEnvironment->HasBarriers : 0; kernelData->m_RequiredWkgSizes[0] = kernelInfo.reqdWorkGroupSize[0] != WorkloadInfo::undefinedOffset ? static_cast(kernelInfo.reqdWorkGroupSize[0]) : 0; kernelData->m_RequiredWkgSizes[1] = kernelInfo.reqdWorkGroupSize[1] != WorkloadInfo::undefinedOffset ? static_cast(kernelInfo.reqdWorkGroupSize[1]) : 0; kernelData->m_RequiredWkgSizes[2] = kernelInfo.reqdWorkGroupSize[2] != WorkloadInfo::undefinedOffset ? static_cast(kernelInfo.reqdWorkGroupSize[2]) : 0; kernelData->m_InilineSLMSize = kernelInfo.workloadInfo.slmStaticSize; bool localIdRequired = false; if (kernelInfo.patchInfo.threadPayload) { if (kernelInfo.patchInfo.threadPayload->LocalIDFlattenedPresent || kernelInfo.patchInfo.threadPayload->LocalIDXPresent || kernelInfo.patchInfo.threadPayload->LocalIDYPresent || kernelInfo.patchInfo.threadPayload->LocalIDZPresent) { localIdRequired = true; } kernelData->m_PayloadSize = PerThreadDataHelper::getThreadPayloadSize(*kernelInfo.patchInfo.threadPayload, kernelData->m_SIMDSize, hwInfo.capabilityTable.grfSize); } kernelData->m_NeedLocalIDS = localIdRequired ? 1 : 0; kernelData->m_DisablePreemption = 0u; bool concurrentExecAllowed = true; if (kernelInfo.patchInfo.pAllocateStatelessPrivateSurface) { if (kernelInfo.patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize > 0) { concurrentExecAllowed = false; } } kernelData->m_CanRunConcurently = concurrentExecAllowed ? 1 : 0; if (DebugManager.flags.DisableConcurrentBlockExecution.get()) { kernelData->m_CanRunConcurently = false; } IGIL_KernelCurbeParams *kernelCurbeParams = kernelData->m_data; for (uint32_t i = 0; i < curbeParamsIn.size(); i++) { kernelCurbeParams[i] = curbeParamsIn[i]; } offsetToEnd = static_cast(offset + alignUp(sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams) * curbeParamsIn.size(), sizeof(void *)) + alignUp(samplerHeapSize, sizeof(void *)) + alignUp(maxConstantBufferSize, sizeof(void *)) + sizeof(IGIL_SamplerParams) * samplerCount); return offsetToEnd; } void Kernel::ReflectionSurfaceHelper::setKernelAddressDataBtOffset(void *reflectionSurface, uint32_t blockID, uint32_t btOffset) { uint32_t offset = static_cast(offsetof(IGIL_KernelDataHeader, m_data) + sizeof(IGIL_KernelAddressData) * blockID); IGIL_KernelAddressData *kernelAddressData = reinterpret_cast(ptrOffset(reflectionSurface, offset)); kernelAddressData->m_BTSoffset = btOffset; } void Kernel::ReflectionSurfaceHelper::setKernelAddressData(void *reflectionSurface, uint32_t offset, uint32_t kernelDataOffset, uint32_t samplerHeapOffset, uint32_t constantBufferOffset, uint32_t samplerParamsOffset, uint32_t sshTokensOffset, uint32_t btOffset, const KernelInfo &kernelInfo, const HardwareInfo &hwInfo) { IGIL_KernelAddressData *kernelAddressData = reinterpret_cast(ptrOffset(reflectionSurface, offset)); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); kernelAddressData->m_KernelDataOffset = kernelDataOffset; kernelAddressData->m_SamplerHeapOffset = samplerHeapOffset; kernelAddressData->m_SamplerParamsOffset = samplerParamsOffset; kernelAddressData->m_ConstantBufferOffset = constantBufferOffset; kernelAddressData->m_SSHTokensOffset = sshTokensOffset; kernelAddressData->m_BTSoffset = btOffset; kernelAddressData->m_BTSize = static_cast(kernelInfo.patchInfo.bindingTableState ? kernelInfo.patchInfo.bindingTableState->Count * hwHelper.getBindingTableStateSize() : 0); } template <> void Kernel::ReflectionSurfaceHelper::patchBlocksCurbe(void *reflectionSurface, uint32_t blockID, uint64_t defaultDeviceQueueCurbeOffset, uint32_t patchSizeDefaultQueue, uint64_t defaultDeviceQueueGpuAddress, uint64_t eventPoolCurbeOffset, uint32_t patchSizeEventPool, uint64_t eventPoolGpuAddress, uint64_t deviceQueueCurbeOffset, uint32_t patchSizeDeviceQueue, uint64_t deviceQueueGpuAddress, uint64_t printfBufferOffset, uint32_t patchSizePrintfBuffer, uint64_t printfBufferGpuAddress, uint64_t privateSurfaceOffset, uint32_t privateSurfaceSize, uint64_t privateSurfaceGpuAddress) { IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface); // Reflection surface must be initialized prior to patching blocks curbe on KRS DEBUG_BREAK_IF(blockID >= pKernelHeader->m_numberOfKernels); IGIL_KernelAddressData *addressData = pKernelHeader->m_data; // const buffer offsets must be set DEBUG_BREAK_IF(addressData[blockID].m_ConstantBufferOffset == 0); void *pCurbe = ptrOffset(reflectionSurface, addressData[blockID].m_ConstantBufferOffset); if (defaultDeviceQueueCurbeOffset != undefinedOffset) { auto *patchedPointer = ptrOffset(pCurbe, (size_t)defaultDeviceQueueCurbeOffset); patchWithRequiredSize(patchedPointer, patchSizeDefaultQueue, (uintptr_t)defaultDeviceQueueGpuAddress); } if (eventPoolCurbeOffset != undefinedOffset) { auto *patchedPointer = ptrOffset(pCurbe, (size_t)eventPoolCurbeOffset); patchWithRequiredSize(patchedPointer, patchSizeEventPool, (uintptr_t)eventPoolGpuAddress); } if (deviceQueueCurbeOffset != undefinedOffset) { auto *patchedPointer = ptrOffset(pCurbe, (size_t)deviceQueueCurbeOffset); patchWithRequiredSize(patchedPointer, patchSizeDeviceQueue, (uintptr_t)deviceQueueGpuAddress); } if (printfBufferOffset != undefinedOffset) { auto *patchedPointer = ptrOffset(pCurbe, (size_t)printfBufferOffset); patchWithRequiredSize(patchedPointer, patchSizePrintfBuffer, (uintptr_t)printfBufferGpuAddress); } if (privateSurfaceOffset != undefinedOffset) { auto *patchedPointer = ptrOffset(pCurbe, (size_t)privateSurfaceOffset); patchWithRequiredSize(patchedPointer, privateSurfaceSize, (uintptr_t)privateSurfaceGpuAddress); } } void Kernel::ReflectionSurfaceHelper::patchBlocksCurbeWithConstantValues(void *reflectionSurface, uint32_t blockID, uint64_t globalMemoryCurbeOffset, uint32_t globalMemoryPatchSize, uint64_t globalMemoryGpuAddress, uint64_t constantMemoryCurbeOffset, uint32_t constantMemoryPatchSize, uint64_t constantMemoryGpuAddress, uint64_t privateMemoryCurbeOffset, uint32_t privateMemoryPatchSize, uint64_t privateMemoryGpuAddress) { IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface); // Reflection surface must be initialized prior to patching blocks curbe on KRS DEBUG_BREAK_IF(blockID >= pKernelHeader->m_numberOfKernels); IGIL_KernelAddressData *addressData = pKernelHeader->m_data; // const buffer offsets must be set DEBUG_BREAK_IF(addressData[blockID].m_ConstantBufferOffset == 0); void *pCurbe = ptrOffset(reflectionSurface, addressData[blockID].m_ConstantBufferOffset); if (globalMemoryCurbeOffset != undefinedOffset) { auto *patchedPointer = ptrOffset(pCurbe, (size_t)globalMemoryCurbeOffset); patchWithRequiredSize(patchedPointer, globalMemoryPatchSize, (uintptr_t)globalMemoryGpuAddress); } if (constantMemoryCurbeOffset != undefinedOffset) { auto *patchedPointer = ptrOffset(pCurbe, (size_t)constantMemoryCurbeOffset); patchWithRequiredSize(patchedPointer, constantMemoryPatchSize, (uintptr_t)constantMemoryGpuAddress); } if (privateMemoryCurbeOffset != undefinedOffset) { auto *patchedPointer = ptrOffset(pCurbe, (size_t)privateMemoryCurbeOffset); patchWithRequiredSize(patchedPointer, privateMemoryPatchSize, (uintptr_t)privateMemoryGpuAddress); } } void Kernel::ReflectionSurfaceHelper::setParentImageParams(void *reflectionSurface, std::vector &parentArguments, const KernelInfo &parentKernelInfo) { IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface); IGIL_ImageParamters *pImageParameters = reinterpret_cast(ptrOffset(pKernelHeader, (size_t)pKernelHeader->m_ParentImageDataOffset)); uint32_t numArgs = (uint32_t)parentArguments.size(); for (uint32_t i = 0; i < numArgs; i++) { if (parentArguments[i].type == Kernel::kernelArgType::IMAGE_OBJ) { const Image *image = castToObject((cl_mem)parentArguments[i].object); if (image) { pImageParameters->m_ArraySize = (uint32_t)image->getImageDesc().image_array_size; pImageParameters->m_Depth = (uint32_t)image->getImageDesc().image_depth; pImageParameters->m_Height = (uint32_t)image->getImageDesc().image_height; pImageParameters->m_Width = (uint32_t)image->getImageDesc().image_width; pImageParameters->m_NumMipLevels = (uint32_t)image->getImageDesc().num_mip_levels; pImageParameters->m_NumSamples = (uint32_t)image->getImageDesc().num_samples; pImageParameters->m_ChannelDataType = (uint32_t)image->getImageFormat().image_channel_data_type; pImageParameters->m_ChannelOrder = (uint32_t)image->getImageFormat().image_channel_data_type; pImageParameters->m_ObjectID = (uint32_t)parentKernelInfo.kernelArgInfo[i].offsetHeap; pImageParameters++; } } } } void Kernel::ReflectionSurfaceHelper::setParentSamplerParams(void *reflectionSurface, std::vector &parentArguments, const KernelInfo &parentKernelInfo) { IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface); IGIL_ParentSamplerParams *pParentSamplerParams = reinterpret_cast(ptrOffset(pKernelHeader, (size_t)pKernelHeader->m_ParentSamplerParamsOffset)); uint32_t numArgs = (uint32_t)parentArguments.size(); for (uint32_t i = 0; i < numArgs; i++) { if (parentArguments[i].type == Kernel::kernelArgType::SAMPLER_OBJ) { const Sampler *sampler = castToObject((cl_sampler)parentArguments[i].object); if (sampler) { pParentSamplerParams->CoordinateSnapRequired = (uint32_t)sampler->getSnapWaValue(); pParentSamplerParams->m_AddressingMode = (uint32_t)sampler->addressingMode; pParentSamplerParams->NormalizedCoords = (uint32_t)sampler->normalizedCoordinates; pParentSamplerParams->m_ObjectID = OCLRT_ARG_OFFSET_TO_SAMPLER_OBJECT_ID((uint32_t)parentKernelInfo.kernelArgInfo[i].offsetHeap); pParentSamplerParams++; } } } } void Kernel::resetSharedObjectsPatchAddresses() { for (size_t i = 0; i < getKernelArgsNumber(); i++) { auto clMem = (cl_mem)kernelArguments[i].object; auto memObj = castToObject(clMem); if (memObj && memObj->peekSharingHandler()) { setArg((uint32_t)i, sizeof(cl_mem), &clMem); } } } void Kernel::provideInitializationHints() { const auto &patchInfo = kernelInfo.patchInfo; Context *context = program->getContextPtr(); if (context == nullptr || !context->isProvidingPerformanceHints()) return; if (privateSurfaceSize) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, PRIVATE_MEMORY_USAGE_TOO_HIGH, kernelInfo.name.c_str(), privateSurfaceSize); } if (patchInfo.mediavfestate) { auto scratchSize = patchInfo.mediavfestate->PerThreadScratchSpace; scratchSize *= device.getSharedDeviceInfo().computeUnitsUsedForScratch * getKernelInfo().getMaxSimdSize(); if (scratchSize > 0) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, REGISTER_PRESSURE_TOO_HIGH, kernelInfo.name.c_str(), scratchSize); } } } void Kernel::patchDefaultDeviceQueue(DeviceQueue *devQueue) { const auto &patchInfo = kernelInfo.patchInfo; if (patchInfo.pAllocateStatelessDefaultDeviceQueueSurface) { if (crossThreadData) { auto patchLocation = ptrOffset(reinterpret_cast(getCrossThreadData()), patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamOffset); patchWithRequiredSize(patchLocation, patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamSize, static_cast(devQueue->getQueueBuffer()->getGpuAddressToPatch())); } if (requiresSshForBuffers()) { auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap()), patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset); Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, devQueue->getQueueBuffer()->getUnderlyingBufferSize(), (void *)devQueue->getQueueBuffer()->getGpuAddress(), 0, devQueue->getQueueBuffer(), 0, 0); } } } void Kernel::patchEventPool(DeviceQueue *devQueue) { const auto &patchInfo = kernelInfo.patchInfo; if (patchInfo.pAllocateStatelessEventPoolSurface) { if (crossThreadData) { auto patchLocation = ptrOffset(reinterpret_cast(getCrossThreadData()), patchInfo.pAllocateStatelessEventPoolSurface->DataParamOffset); patchWithRequiredSize(patchLocation, patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize, static_cast(devQueue->getEventPoolBuffer()->getGpuAddressToPatch())); } if (requiresSshForBuffers()) { auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap()), patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset); Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, devQueue->getEventPoolBuffer()->getUnderlyingBufferSize(), (void *)devQueue->getEventPoolBuffer()->getGpuAddress(), 0, devQueue->getEventPoolBuffer(), 0, 0); } } } void Kernel::patchBlocksSimdSize() { BlockKernelManager *blockManager = program->getBlockKernelManager(); for (auto &idOffset : kernelInfo.childrenKernelsIdOffset) { DEBUG_BREAK_IF(!(idOffset.first < static_cast(blockManager->getCount()))); const KernelInfo *blockInfo = blockManager->getBlockKernelInfo(idOffset.first); uint32_t *simdSize = reinterpret_cast(&crossThreadData[idOffset.second]); *simdSize = blockInfo->getMaxSimdSize(); } } bool Kernel::usesSyncBuffer() { return (kernelInfo.patchInfo.pAllocateSyncBuffer != nullptr); } void Kernel::patchSyncBuffer(Device &device, GraphicsAllocation *gfxAllocation, size_t bufferOffset) { auto &patchInfo = kernelInfo.patchInfo; auto bufferPatchAddress = ptrOffset(getCrossThreadData(), patchInfo.pAllocateSyncBuffer->DataParamOffset); patchWithRequiredSize(bufferPatchAddress, patchInfo.pAllocateSyncBuffer->DataParamSize, ptrOffset(gfxAllocation->getGpuAddressToPatch(), bufferOffset)); if (requiresSshForBuffers()) { auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap()), patchInfo.pAllocateSyncBuffer->SurfaceStateHeapOffset); auto addressToPatch = gfxAllocation->getUnderlyingBuffer(); auto sizeToPatch = gfxAllocation->getUnderlyingBufferSize(); Buffer::setSurfaceState(&device, surfaceState, sizeToPatch, addressToPatch, 0, gfxAllocation, 0, 0); } } template void Kernel::patchReflectionSurface(DeviceQueue *, PrintfHandler *); bool Kernel::isPatched() const { return patchedArgumentsNum == kernelInfo.argumentsToPatchNum; } cl_int Kernel::checkCorrectImageAccessQualifier(cl_uint argIndex, size_t argSize, const void *argValue) const { if (getKernelInfo().kernelArgInfo[argIndex].isImage) { cl_mem mem = *(static_cast(argValue)); MemObj *pMemObj = nullptr; WithCastToInternal(mem, &pMemObj); if (pMemObj) { auto accessQualifier = getKernelInfo().kernelArgInfo[argIndex].metadata.accessQualifier; cl_mem_flags flags = pMemObj->getMemoryPropertiesFlags(); if ((accessQualifier == KernelArgMetadata::AccessReadOnly && ((flags | CL_MEM_WRITE_ONLY) == flags)) || (accessQualifier == KernelArgMetadata::AccessWriteOnly && ((flags | CL_MEM_READ_ONLY) == flags))) { return CL_INVALID_ARG_VALUE; } } else { return CL_INVALID_ARG_VALUE; } } return CL_SUCCESS; } void Kernel::resolveArgs() { if (!Kernel::isPatched() || !imageTransformer->hasRegisteredImages3d() || !canTransformImages()) return; bool canTransformImageTo2dArray = true; for (uint32_t i = 0; i < patchedArgumentsNum; i++) { if (kernelInfo.kernelArgInfo.at(i).isSampler) { auto sampler = castToObject(kernelArguments.at(i).object); if (sampler->isTransformable()) { canTransformImageTo2dArray = true; } else { canTransformImageTo2dArray = false; break; } } } if (canTransformImageTo2dArray) { imageTransformer->transformImagesTo2dArray(kernelInfo, kernelArguments, getSurfaceStateHeap()); } else if (imageTransformer->didTransform()) { imageTransformer->transformImagesTo3d(kernelInfo, kernelArguments, getSurfaceStateHeap()); } } bool Kernel::canTransformImages() const { return device.getHardwareInfo().platform.eRenderCoreFamily >= IGFX_GEN9_CORE && device.getHardwareInfo().platform.eRenderCoreFamily <= IGFX_GEN11LP_CORE; } void Kernel::fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsForAuxTranslation) { memObjsForAuxTranslation.reserve(getKernelArgsNumber()); for (uint32_t i = 0; i < getKernelArgsNumber(); i++) { if (BUFFER_OBJ == kernelArguments.at(i).type && !kernelInfo.kernelArgInfo.at(i).pureStatefulBufferAccess) { auto buffer = castToObject(getKernelArg(i)); if (buffer && buffer->getGraphicsAllocation()->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) { memObjsForAuxTranslation.insert(buffer); auto &context = this->program->getContext(); if (context.isProvidingPerformanceHints()) { context.providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, KERNEL_ARGUMENT_AUX_TRANSLATION, kernelInfo.name.c_str(), i, kernelInfo.kernelArgInfo.at(i).metadataExtended->argName.c_str()); } } } } } void Kernel::getAllocationsForCacheFlush(CacheFlushAllocationsVec &out) const { if (false == HwHelper::cacheFlushAfterWalkerSupported(device.getHardwareInfo())) { return; } for (GraphicsAllocation *alloc : this->kernelArgRequiresCacheFlush) { if (nullptr == alloc) { continue; } out.push_back(alloc); } auto global = getProgram()->getGlobalSurface(); if (global != nullptr) { out.push_back(global); } if (svmAllocationsRequireCacheFlush) { for (GraphicsAllocation *alloc : kernelSvmGfxAllocations) { if (allocationForCacheFlush(alloc)) { out.push_back(alloc); } } } } bool Kernel::allocationForCacheFlush(GraphicsAllocation *argAllocation) const { return argAllocation->isFlushL3Required(); } void Kernel::addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocation *argAllocation) { if (argAllocation == nullptr) { kernelArgRequiresCacheFlush[argIndex] = nullptr; } else { if (allocationForCacheFlush(argAllocation)) { kernelArgRequiresCacheFlush[argIndex] = argAllocation; } else { kernelArgRequiresCacheFlush[argIndex] = nullptr; } } } void Kernel::setReflectionSurfaceBlockBtOffset(uint32_t blockID, uint32_t offset) { DEBUG_BREAK_IF(blockID >= program->getBlockKernelManager()->getCount()); ReflectionSurfaceHelper::setKernelAddressDataBtOffset(getKernelReflectionSurface()->getUnderlyingBuffer(), blockID, offset); } bool Kernel::checkIfIsParentKernelAndBlocksUsesPrintf() { return isParentKernel && getProgram()->getBlockKernelManager()->getIfBlockUsesPrintf(); } uint64_t Kernel::getKernelStartOffset( const bool localIdsGenerationByRuntime, const bool kernelUsesLocalIds, const bool isCssUsed) const { uint64_t kernelStartOffset = 0; if (kernelInfo.getGraphicsAllocation()) { kernelStartOffset = kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); if (localIdsGenerationByRuntime == false && kernelUsesLocalIds == true) { kernelStartOffset += kernelInfo.patchInfo.threadPayload->OffsetToSkipPerThreadDataLoad; } } kernelStartOffset += getStartOffset(); auto &hardwareInfo = getDevice().getHardwareInfo(); auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); if (isCssUsed && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) { kernelStartOffset += kernelInfo.patchInfo.threadPayload->OffsetToSkipSetFFIDGP; } return kernelStartOffset; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/kernel/kernel.h000066400000000000000000000514251363734646600230550ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/command_stream/thread_arbitration_policy.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" #include "shared/source/helpers/address_patch.h" #include "shared/source/helpers/preamble.h" #include "shared/source/unified_memory/unified_memory.h" #include "shared/source/utilities/stackvec.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/api/cl_types.h" #include "opencl/source/device_queue/device_queue.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/helpers/properties_helper.h" #include "opencl/source/kernel/kernel_execution_type.h" #include "opencl/source/program/kernel_info.h" #include "opencl/source/program/program.h" #include namespace NEO { struct CompletionStamp; class Buffer; class CommandStreamReceiver; class GraphicsAllocation; class ImageTransformer; class Surface; class PrintfHandler; template <> struct OpenCLObjectMapper<_cl_kernel> { typedef class Kernel DerivedType; }; class Kernel : public BaseObject<_cl_kernel> { public: static const cl_ulong objectMagic = 0x3284ADC8EA0AFE25LL; static const uint32_t kernelBinaryAlignement = 64; enum kernelArgType { NONE_OBJ, IMAGE_OBJ, BUFFER_OBJ, PIPE_OBJ, SVM_OBJ, SVM_ALLOC_OBJ, SAMPLER_OBJ, ACCELERATOR_OBJ, DEVICE_QUEUE_OBJ, SLM_OBJ }; struct SimpleKernelArgInfo { kernelArgType type; void *object; const void *value; size_t size; GraphicsAllocation *pSvmAlloc; cl_mem_flags svmFlags; bool isPatched = false; bool isStatelessUncacheable = false; }; typedef int32_t (Kernel::*KernelArgHandler)(uint32_t argIndex, size_t argSize, const void *argVal); template static kernel_t *create(program_t *program, const KernelInfo &kernelInfo, cl_int *errcodeRet) { cl_int retVal; kernel_t *pKernel = nullptr; auto clDevice = program->getDevice().template getSpecializedDevice(); pKernel = new kernel_t(program, kernelInfo, *clDevice); retVal = pKernel->initialize(); if (retVal != CL_SUCCESS) { delete pKernel; pKernel = nullptr; } if (errcodeRet) { *errcodeRet = retVal; } if (FileLoggerInstance().enabled()) { std::string source; program->getSource(source); FileLoggerInstance().dumpKernel(kernelInfo.name, source); } return pKernel; } Kernel &operator=(const Kernel &) = delete; Kernel(const Kernel &) = delete; ~Kernel() override; static bool isMemObj(kernelArgType kernelArg) { return kernelArg == BUFFER_OBJ || kernelArg == IMAGE_OBJ || kernelArg == PIPE_OBJ; } bool isAuxTranslationRequired() const { return auxTranslationRequired; } char *getCrossThreadData() const { return crossThreadData; } uint32_t getCrossThreadDataSize() const { return crossThreadDataSize; } cl_int initialize(); MOCKABLE_VIRTUAL cl_int cloneKernel(Kernel *pSourceKernel); MOCKABLE_VIRTUAL bool canTransformImages() const; MOCKABLE_VIRTUAL bool isPatched() const; // API entry points cl_int setArg(uint32_t argIndex, size_t argSize, const void *argVal); cl_int setArgSvm(uint32_t argIndex, size_t svmAllocSize, void *svmPtr, GraphicsAllocation *svmAlloc, cl_mem_flags svmFlags); cl_int setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocation *svmAlloc); void setSvmKernelExecInfo(GraphicsAllocation *argValue); void clearSvmKernelExecInfo(); cl_int getInfo(cl_kernel_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const; void getAdditionalInfo(cl_kernel_info paramName, const void *¶mValue, size_t ¶mValueSizeRet) const; cl_int getArgInfo(cl_uint argIndx, cl_kernel_arg_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const; cl_int getWorkGroupInfo(cl_device_id device, cl_kernel_work_group_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const; cl_int getSubGroupInfo(cl_kernel_sub_group_info paramName, size_t inputValueSize, const void *inputValue, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const; const void *getKernelHeap() const; void *getSurfaceStateHeap() const; const void *getDynamicStateHeap() const; size_t getKernelHeapSize() const; size_t getSurfaceStateHeapSize() const; size_t getDynamicStateHeapSize() const; size_t getNumberOfBindingTableStates() const; size_t getBindingTableOffset() const { return localBindingTableOffset; } void resizeSurfaceStateHeap(void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset); void substituteKernelHeap(void *newKernelHeap, size_t newKernelHeapSize); bool isKernelHeapSubstituted() const; uint64_t getKernelId() const; void setKernelId(uint64_t newKernelId); uint32_t getStartOffset() const; void setStartOffset(uint32_t offset); const std::vector &getKernelArguments() const { return kernelArguments; } size_t getKernelArgsNumber() const { return kernelInfo.kernelArgInfo.size(); } bool requiresSshForBuffers() const { return kernelInfo.requiresSshForBuffers; } const KernelInfo &getKernelInfo() const { return kernelInfo; } const ClDevice &getDevice() const { return device; } Context &getContext() const { return context ? *context : program->getContext(); } void setContext(Context *context) { this->context = context; } Program *getProgram() const { return program; } static uint32_t getScratchSizeValueToProgramMediaVfeState(int scratchSize); uint32_t getScratchSize() { return kernelInfo.patchInfo.mediavfestate ? kernelInfo.patchInfo.mediavfestate->PerThreadScratchSpace : 0; } uint32_t getPrivateScratchSize() { return kernelInfo.patchInfo.mediaVfeStateSlot1 ? kernelInfo.patchInfo.mediaVfeStateSlot1->PerThreadScratchSpace : 0; } void createReflectionSurface(); template void patchReflectionSurface(DeviceQueue *devQueue, PrintfHandler *printfHandler); void patchDefaultDeviceQueue(DeviceQueue *devQueue); void patchEventPool(DeviceQueue *devQueue); void patchBlocksSimdSize(); bool usesSyncBuffer(); void patchSyncBuffer(Device &device, GraphicsAllocation *gfxAllocation, size_t bufferOffset); GraphicsAllocation *getKernelReflectionSurface() const { return kernelReflectionSurface; } size_t getInstructionHeapSizeForExecutionModel() const; // Helpers cl_int setArg(uint32_t argIndex, uint32_t argValue); cl_int setArg(uint32_t argIndex, uint64_t argValue); cl_int setArg(uint32_t argIndex, cl_mem argValue); cl_int setArg(uint32_t argIndex, cl_mem argValue, uint32_t mipLevel); // Handlers void setKernelArgHandler(uint32_t argIndex, KernelArgHandler handler); void unsetArg(uint32_t argIndex); cl_int setArgImmediate(uint32_t argIndex, size_t argSize, const void *argVal); cl_int setArgBuffer(uint32_t argIndex, size_t argSize, const void *argVal); cl_int setArgPipe(uint32_t argIndex, size_t argSize, const void *argVal); cl_int setArgImage(uint32_t argIndex, size_t argSize, const void *argVal); cl_int setArgImageWithMipLevel(uint32_t argIndex, size_t argSize, const void *argVal, uint32_t mipLevel); cl_int setArgLocal(uint32_t argIndex, size_t argSize, const void *argVal); cl_int setArgSampler(uint32_t argIndex, size_t argSize, const void *argVal); cl_int setArgAccelerator(uint32_t argIndex, size_t argSize, const void *argVal); cl_int setArgDevQueue(uint32_t argIndex, size_t argSize, const void *argVal); void storeKernelArg(uint32_t argIndex, kernelArgType argType, void *argObject, const void *argValue, size_t argSize, GraphicsAllocation *argSvmAlloc = nullptr, cl_mem_flags argSvmFlags = 0); const void *getKernelArg(uint32_t argIndex) const; const SimpleKernelArgInfo &getKernelArgInfo(uint32_t argIndex) const; bool getAllowNonUniform() const { return program->getAllowNonUniform(); } bool isVmeKernel() const { return kernelInfo.isVmeWorkload; } bool requiresSpecialPipelineSelectMode() const { return specialPipelineSelectMode; } //residency for kernel surfaces MOCKABLE_VIRTUAL void makeResident(CommandStreamReceiver &commandStreamReceiver); MOCKABLE_VIRTUAL void getResidency(std::vector &dst); bool requiresCoherency(); void resetSharedObjectsPatchAddresses(); bool isUsingSharedObjArgs() const { return usingSharedObjArgs; } bool hasUncacheableStatelessArgs() const { return statelessUncacheableArgsCount > 0; } bool hasPrintfOutput() const; void setReflectionSurfaceBlockBtOffset(uint32_t blockID, uint32_t offset); cl_int checkCorrectImageAccessQualifier(cl_uint argIndex, size_t argSize, const void *argValue) const; uint32_t *globalWorkOffsetX; uint32_t *globalWorkOffsetY; uint32_t *globalWorkOffsetZ; uint32_t *localWorkSizeX; uint32_t *localWorkSizeY; uint32_t *localWorkSizeZ; uint32_t *localWorkSizeX2; uint32_t *localWorkSizeY2; uint32_t *localWorkSizeZ2; uint32_t *globalWorkSizeX; uint32_t *globalWorkSizeY; uint32_t *globalWorkSizeZ; uint32_t *enqueuedLocalWorkSizeX; uint32_t *enqueuedLocalWorkSizeY; uint32_t *enqueuedLocalWorkSizeZ; uint32_t *numWorkGroupsX; uint32_t *numWorkGroupsY; uint32_t *numWorkGroupsZ; uint32_t *maxWorkGroupSizeForCrossThreadData; uint32_t maxKernelWorkGroupSize = 0; uint32_t *workDim; uint32_t *dataParameterSimdSize; uint32_t *parentEventOffset; uint32_t *preferredWkgMultipleOffset; static uint32_t dummyPatchLocation; std::vector slmSizes; uint32_t allBufferArgsStateful = CL_TRUE; uint32_t slmTotalSize; bool isBuiltIn; const bool isParentKernel; const bool isSchedulerKernel; uint32_t getThreadArbitrationPolicy() const { return threadArbitrationPolicy; } KernelExecutionType getExecutionType() const { return executionType; } bool isUsingSyncBuffer() const { return (kernelInfo.patchInfo.pAllocateSyncBuffer != nullptr); } bool checkIfIsParentKernelAndBlocksUsesPrintf(); bool is32Bit() const { return kernelInfo.gpuPointerSize == 4; } int32_t getDebugSurfaceBti() const { if (kernelInfo.patchInfo.pAllocateSystemThreadSurface) { return kernelInfo.patchInfo.pAllocateSystemThreadSurface->BTI; } return -1; } size_t getPerThreadSystemThreadSurfaceSize() const { if (kernelInfo.patchInfo.pAllocateSystemThreadSurface) { return kernelInfo.patchInfo.pAllocateSystemThreadSurface->PerThreadSystemThreadSurfaceSize; } return 0; } std::vector &getPatchInfoDataList() { return patchInfoDataList; }; bool usesOnlyImages() const { return usingImagesOnly; } void fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsForAuxTranslation); MOCKABLE_VIRTUAL bool requiresCacheFlushCommand(const CommandQueue &commandQueue) const; using CacheFlushAllocationsVec = StackVec; void getAllocationsForCacheFlush(CacheFlushAllocationsVec &out) const; void setAuxTranslationDirection(AuxTranslationDirection auxTranslationDirection) { this->auxTranslationDirection = auxTranslationDirection; } void setUnifiedMemorySyncRequirement(bool isUnifiedMemorySyncRequired) { this->isUnifiedMemorySyncRequired = isUnifiedMemorySyncRequired; } void setUnifiedMemoryProperty(cl_kernel_exec_info infoType, bool infoValue); void setUnifiedMemoryExecInfo(GraphicsAllocation *argValue); void clearUnifiedMemoryExecInfo(); bool areStatelessWritesUsed() { return containsStatelessWrites; } int setKernelThreadArbitrationPolicy(uint32_t propertyValue); cl_int setKernelExecutionType(cl_execution_info_kernel_type_intel executionType); void setThreadArbitrationPolicy(uint32_t policy) { this->threadArbitrationPolicy = policy; } void getSuggestedLocalWorkSize(const cl_uint workDim, const size_t *globalWorkSize, const size_t *globalWorkOffset, size_t *localWorkSize); uint32_t getMaxWorkGroupCount(const cl_uint workDim, const size_t *localWorkSize) const; uint64_t getKernelStartOffset( const bool localIdsGenerationByRuntime, const bool kernelUsesLocalIds, const bool isCssUsed) const; bool requiresPerDssBackedBuffer() const; protected: struct ObjectCounts { uint32_t imageCount; uint32_t samplerCount; }; class ReflectionSurfaceHelper { public: static const uint64_t undefinedOffset = (uint64_t)-1; static void setKernelDataHeader(void *reflectionSurface, uint32_t numberOfBlocks, uint32_t parentImages, uint32_t parentSamplers, uint32_t imageOffset, uint32_t samplerOffset) { IGIL_KernelDataHeader *kernelDataHeader = reinterpret_cast(reflectionSurface); kernelDataHeader->m_numberOfKernels = numberOfBlocks; kernelDataHeader->m_ParentKernelImageCount = parentImages; kernelDataHeader->m_ParentSamplerCount = parentSamplers; kernelDataHeader->m_ParentImageDataOffset = imageOffset; kernelDataHeader->m_ParentSamplerParamsOffset = samplerOffset; } static uint32_t setKernelData(void *reflectionSurface, uint32_t offset, std::vector &curbeParamsIn, uint64_t tokenMaskIn, size_t maxConstantBufferSize, size_t samplerCount, const KernelInfo &kernelInfo, const HardwareInfo &hwInfo); static void setKernelAddressData(void *reflectionSurface, uint32_t offset, uint32_t kernelDataOffset, uint32_t samplerHeapOffset, uint32_t constantBufferOffset, uint32_t samplerParamsOffset, uint32_t sshTokensOffset, uint32_t btOffset, const KernelInfo &kernelInfo, const HardwareInfo &hwInfo); static void getCurbeParams(std::vector &curbeParamsOut, uint64_t &tokenMaskOut, uint32_t &firstSSHTokenIndex, const KernelInfo &kernelInfo, const HardwareInfo &hwInfo); static bool compareFunction(IGIL_KernelCurbeParams argFirst, IGIL_KernelCurbeParams argSecond) { if (argFirst.m_parameterType == argSecond.m_parameterType) { if (argFirst.m_parameterType == iOpenCL::DATA_PARAMETER_LOCAL_WORK_SIZE) { return argFirst.m_patchOffset < argSecond.m_patchOffset; } else { return argFirst.m_sourceOffset < argSecond.m_sourceOffset; } } else { return argFirst.m_parameterType < argSecond.m_parameterType; } } static void setKernelAddressDataBtOffset(void *reflectionSurface, uint32_t blockID, uint32_t btOffset); static void setParentImageParams(void *reflectionSurface, std::vector &parentArguments, const KernelInfo &parentKernelInfo); static void setParentSamplerParams(void *reflectionSurface, std::vector &parentArguments, const KernelInfo &parentKernelInfo); template static void patchBlocksCurbe(void *reflectionSurface, uint32_t blockID, uint64_t defaultDeviceQueueCurbeOffset, uint32_t patchSizeDefaultQueue, uint64_t defaultDeviceQueueGpuAddress, uint64_t eventPoolCurbeOffset, uint32_t patchSizeEventPool, uint64_t eventPoolGpuAddress, uint64_t deviceQueueCurbeOffset, uint32_t patchSizeDeviceQueue, uint64_t deviceQueueGpuAddress, uint64_t printfBufferOffset, uint32_t printfBufferSize, uint64_t printfBufferGpuAddress, uint64_t privateSurfaceOffset, uint32_t privateSurfaceSize, uint64_t privateSurfaceGpuAddress); static void patchBlocksCurbeWithConstantValues(void *reflectionSurface, uint32_t blockID, uint64_t globalMemoryCurbeOffset, uint32_t globalMemoryPatchSize, uint64_t globalMemoryGpuAddress, uint64_t constantMemoryCurbeOffset, uint32_t constantMemoryPatchSize, uint64_t constantMemoryGpuAddress, uint64_t privateMemoryCurbeOffset, uint32_t privateMemoryPatchSize, uint64_t privateMemoryGpuAddress); }; void makeArgsResident(CommandStreamReceiver &commandStreamReceiver); void *patchBufferOffset(const KernelArgInfo &argInfo, void *svmPtr, GraphicsAllocation *svmAlloc); // Sets-up both crossThreadData and ssh for given implicit (private/constant, etc.) allocation template void patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const PatchTokenT &patch); void getParentObjectCounts(ObjectCounts &objectCount); Kernel(Program *programArg, const KernelInfo &kernelInfoArg, const ClDevice &deviceArg, bool schedulerKernel = false); void provideInitializationHints(); void patchBlocksCurbeWithConstantValues(); void resolveArgs(); void reconfigureKernel(); void addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocation *argAllocation); bool allocationForCacheFlush(GraphicsAllocation *argAllocation) const; Program *program; Context *context; const ClDevice &device; const KernelInfo &kernelInfo; std::vector kernelArguments; std::vector kernelArgHandlers; std::vector kernelSvmGfxAllocations; std::vector kernelUnifiedMemoryGfxAllocations; AuxTranslationDirection auxTranslationDirection = AuxTranslationDirection::None; size_t numberOfBindingTableStates; size_t localBindingTableOffset; std::unique_ptr pSshLocal; uint32_t sshLocalSize; char *crossThreadData; uint32_t crossThreadDataSize; GraphicsAllocation *privateSurface; uint64_t privateSurfaceSize; GraphicsAllocation *kernelReflectionSurface; bool usingSharedObjArgs; bool usingImagesOnly = false; bool auxTranslationRequired = false; bool containsStatelessWrites = true; uint32_t patchedArgumentsNum = 0; uint32_t startOffset = 0; uint32_t statelessUncacheableArgsCount = 0; uint32_t threadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; KernelExecutionType executionType = KernelExecutionType::Default; std::vector patchInfoDataList; std::unique_ptr imageTransformer; bool specialPipelineSelectMode = false; bool svmAllocationsRequireCacheFlush = false; std::vector kernelArgRequiresCacheFlush; UnifiedMemoryControls unifiedMemoryControls; bool isUnifiedMemorySyncRequired = true; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/kernel/kernel.inl000066400000000000000000000106531363734646600234060ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/kernel/kernel.h" #include "opencl/source/program/block_kernel_manager.h" #include "opencl/source/program/printf_handler.h" namespace NEO { template void Kernel::patchReflectionSurface(DeviceQueue *devQueue, PrintfHandler *printfHandler) { void *reflectionSurface = kernelReflectionSurface->getUnderlyingBuffer(); BlockKernelManager *blockManager = program->getBlockKernelManager(); uint32_t blockCount = static_cast(blockManager->getCount()); for (uint32_t i = 0; i < blockCount; i++) { const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); // clang-format off uint64_t defaultQueueOffset = pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface ? pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamOffset : ReflectionSurfaceHelper::undefinedOffset; uint64_t eventPoolOffset = pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface ? pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamOffset : ReflectionSurfaceHelper::undefinedOffset; uint64_t deviceQueueOffset = ReflectionSurfaceHelper::undefinedOffset; uint32_t defaultQueueSize = pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface ? pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamSize : 0; uint32_t eventPoolSize = pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface ? pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize : 0; uint32_t deviceQueueSize = 0; uint64_t printfBufferOffset = pBlockInfo->patchInfo.pAllocateStatelessPrintfSurface ? pBlockInfo->patchInfo.pAllocateStatelessPrintfSurface->DataParamOffset : ReflectionSurfaceHelper::undefinedOffset; uint32_t printfBufferPatchSize = pBlockInfo->patchInfo.pAllocateStatelessPrintfSurface ? pBlockInfo->patchInfo.pAllocateStatelessPrintfSurface->DataParamSize : 0; uint64_t printfGpuAddress = 0; // clang-format on uint64_t privateSurfaceOffset = ReflectionSurfaceHelper::undefinedOffset; uint32_t privateSurfacePatchSize = 0; uint64_t privateSurfaceGpuAddress = 0; auto privateSurface = blockManager->getPrivateSurface(i); UNRECOVERABLE_IF(pBlockInfo->patchInfo.pAllocateStatelessPrivateSurface != nullptr && pBlockInfo->patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize && privateSurface == nullptr); if (privateSurface) { privateSurfaceOffset = pBlockInfo->patchInfo.pAllocateStatelessPrivateSurface->DataParamOffset; privateSurfacePatchSize = pBlockInfo->patchInfo.pAllocateStatelessPrivateSurface->DataParamSize; privateSurfaceGpuAddress = privateSurface->getGpuAddressToPatch(); } if (printfHandler) { GraphicsAllocation *printfSurface = printfHandler->getSurface(); if (printfSurface) printfGpuAddress = printfSurface->getGpuAddress(); } for (const auto &arg : pBlockInfo->kernelArgInfo) { if (arg.isDeviceQueue) { deviceQueueOffset = arg.kernelArgPatchInfoVector[0].crossthreadOffset; deviceQueueSize = arg.kernelArgPatchInfoVector[0].size; break; } } ReflectionSurfaceHelper::patchBlocksCurbe(reflectionSurface, i, defaultQueueOffset, defaultQueueSize, devQueue->getQueueBuffer()->getGpuAddress(), eventPoolOffset, eventPoolSize, devQueue->getEventPoolBuffer()->getGpuAddress(), deviceQueueOffset, deviceQueueSize, devQueue->getQueueBuffer()->getGpuAddress(), printfBufferOffset, printfBufferPatchSize, printfGpuAddress, privateSurfaceOffset, privateSurfacePatchSize, privateSurfaceGpuAddress); } ReflectionSurfaceHelper::setParentImageParams(reflectionSurface, this->kernelArguments, this->kernelInfo); ReflectionSurfaceHelper::setParentSamplerParams(reflectionSurface, this->kernelArguments, this->kernelInfo); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/kernel/kernel_execution_type.h000066400000000000000000000003251363734646600261720ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once namespace NEO { enum class KernelExecutionType { Default = 0x0u, Concurrent = 0x1u }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/kernel/kernel_extra.cpp000066400000000000000000000023361363734646600246100ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "opencl/source/kernel/kernel.h" namespace NEO { bool Kernel::requiresCacheFlushCommand(const CommandQueue &commandQueue) const { return false; } void Kernel::reconfigureKernel() { } int Kernel::setKernelThreadArbitrationPolicy(uint32_t policy) { if (policy == CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL) { this->threadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin; } else if (policy == CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_OLDEST_FIRST_INTEL) { this->threadArbitrationPolicy = ThreadArbitrationPolicy::AgeBased; } else if (policy == CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_AFTER_DEPENDENCY_ROUND_ROBIN_INTEL) { this->threadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobinAfterDependency; } else { this->threadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; return CL_INVALID_VALUE; } return CL_SUCCESS; } bool Kernel::requiresPerDssBackedBuffer() const { return DebugManager.flags.ForcePerDssBackedBufferProgramming.get(); } } // namespace NEOcompute-runtime-20.13.16352/opencl/source/kernel/kernel_info_cl.h000066400000000000000000000033271363734646600245440ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/program/kernel_arg_info.h" #include "CL/cl.h" namespace NEO { constexpr cl_kernel_arg_access_qualifier asClKernelArgAccessQualifier(KernelArgMetadata::AccessQualifier accessQualifier) { using namespace KernelArgMetadata; switch (accessQualifier) { default: return 0U; case AccessNone: return CL_KERNEL_ARG_ACCESS_NONE; case AccessReadOnly: return CL_KERNEL_ARG_ACCESS_READ_ONLY; case AccessWriteOnly: return CL_KERNEL_ARG_ACCESS_WRITE_ONLY; case AccessReadWrite: return CL_KERNEL_ARG_ACCESS_READ_WRITE; } } constexpr cl_kernel_arg_address_qualifier asClKernelArgAddressQualifier(KernelArgMetadata::AddressSpaceQualifier addressQualifier) { using namespace KernelArgMetadata; switch (addressQualifier) { default: return 0U; case AddrGlobal: return CL_KERNEL_ARG_ADDRESS_GLOBAL; case AddrLocal: return CL_KERNEL_ARG_ADDRESS_LOCAL; case AddrPrivate: return CL_KERNEL_ARG_ADDRESS_PRIVATE; case AddrConstant: return CL_KERNEL_ARG_ADDRESS_CONSTANT; } } constexpr cl_kernel_arg_type_qualifier asClKernelArgTypeQualifier(KernelArgMetadata::TypeQualifiers typeQualifiers) { using namespace KernelArgMetadata; cl_kernel_arg_type_qualifier ret = 0U; ret |= (typeQualifiers.constQual) ? CL_KERNEL_ARG_TYPE_CONST : 0U; ret |= (typeQualifiers.volatileQual) ? CL_KERNEL_ARG_TYPE_VOLATILE : 0U; ret |= (typeQualifiers.restrictQual) ? CL_KERNEL_ARG_TYPE_RESTRICT : 0U; ret |= (typeQualifiers.pipeQual) ? CL_KERNEL_ARG_TYPE_PIPE : 0U; return ret; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/mem_obj/000077500000000000000000000000001363734646600215455ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/mem_obj/CMakeLists.txt000066400000000000000000000025311363734646600243060ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_MEM_OBJ ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/buffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/buffer.h ${CMAKE_CURRENT_SOURCE_DIR}/buffer_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/buffer_bdw_plus.inl ${CMAKE_CURRENT_SOURCE_DIR}/buffer_factory_init.inl ${CMAKE_CURRENT_SOURCE_DIR}/image.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image.h ${CMAKE_CURRENT_SOURCE_DIR}/image.inl ${CMAKE_CURRENT_SOURCE_DIR}/image_tgllp_plus.inl ${CMAKE_CURRENT_SOURCE_DIR}/image_factory_init.inl ${CMAKE_CURRENT_SOURCE_DIR}/map_operations_handler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/map_operations_handler.h ${CMAKE_CURRENT_SOURCE_DIR}/mem_obj.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mem_obj.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/mem_obj_helper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_helper_common.inl ${CMAKE_CURRENT_SOURCE_DIR}/pipe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pipe.h ${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/buffer_ext.inl ${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/image_ext.inl ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_MEM_OBJ}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_MEM_OBJ ${RUNTIME_SRCS_MEM_OBJ}) add_subdirectories() compute-runtime-20.13.16352/opencl/source/mem_obj/buffer.cpp000066400000000000000000000655761363734646600235450ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/mem_obj/buffer.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/string.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/memory_manager/host_ptr_manager.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/memory_operations_handler.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/helpers/validators.h" #include "opencl/source/mem_obj/mem_obj_helper.h" namespace NEO { BufferFuncs bufferFactory[IGFX_MAX_CORE] = {}; Buffer::Buffer(Context *context, MemoryPropertiesFlags memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, GraphicsAllocation *gfxAllocation, bool zeroCopy, bool isHostPtrSVM, bool isObjectRedescribed) : MemObj(context, CL_MEM_OBJECT_BUFFER, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, gfxAllocation, zeroCopy, isHostPtrSVM, isObjectRedescribed) { magic = objectMagic; setHostPtrMinSize(size); } Buffer::Buffer() : MemObj(nullptr, CL_MEM_OBJECT_BUFFER, {}, 0, 0, 0, nullptr, nullptr, nullptr, false, false, false) { } Buffer::~Buffer() = default; bool Buffer::isSubBuffer() { return this->associatedMemObject != nullptr; } bool Buffer::isValidSubBufferOffset(size_t offset) { if (this->getGraphicsAllocation()->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) { // From spec: "origin value is aligned to the CL_DEVICE_MEM_BASE_ADDR_ALIGN value" if (!isAligned(offset, this->getContext()->getDevice(0)->getDeviceInfo().memBaseAddressAlign / 8u)) { return false; } } cl_uint address_align = 32; // 4 byte alignment if ((offset & (address_align / 8 - 1)) == 0) { return true; } return false; } void Buffer::validateInputAndCreateBuffer(cl_context &context, MemoryPropertiesFlags memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *hostPtr, cl_int &retVal, cl_mem &buffer) { Context *pContext = nullptr; retVal = validateObjects(WithCastToInternal(context, &pContext)); if (retVal != CL_SUCCESS) { return; } if (!MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, flagsIntel, *pContext)) { retVal = CL_INVALID_VALUE; return; } auto pDevice = pContext->getDevice(0); bool allowCreateBuffersWithUnrestrictedSize = isValueSet(flags, CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL) || isValueSet(flagsIntel, CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL); if (size == 0 || (size > pDevice->getHardwareCapabilities().maxMemAllocSize && !allowCreateBuffersWithUnrestrictedSize)) { retVal = CL_INVALID_BUFFER_SIZE; return; } /* Check the host ptr and data */ bool expectHostPtr = (flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) != 0; if ((hostPtr == nullptr) == expectHostPtr) { retVal = CL_INVALID_HOST_PTR; return; } // create the buffer buffer = create(pContext, memoryProperties, flags, flagsIntel, size, hostPtr, retVal); } Buffer *Buffer::create(Context *context, cl_mem_flags flags, size_t size, void *hostPtr, cl_int &errcodeRet) { return create(context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, size, hostPtr, errcodeRet); } Buffer *Buffer::create(Context *context, MemoryPropertiesFlags memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *hostPtr, cl_int &errcodeRet) { Buffer *pBuffer = nullptr; errcodeRet = CL_SUCCESS; GraphicsAllocation *memory = nullptr; GraphicsAllocation *mapAllocation = nullptr; bool zeroCopyAllowed = true; bool isHostPtrSVM = false; bool alignementSatisfied = true; bool allocateMemory = true; bool copyMemoryFromHostPtr = false; auto rootDeviceIndex = context->getDevice(0)->getRootDeviceIndex(); MemoryManager *memoryManager = context->getMemoryManager(); UNRECOVERABLE_IF(!memoryManager); GraphicsAllocation::AllocationType allocationType = getGraphicsAllocationType( memoryProperties, *context, HwHelper::renderCompressedBuffersSupported(context->getDevice(0)->getHardwareInfo()), memoryManager->isLocalMemorySupported(rootDeviceIndex), HwHelper::get(context->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily).obtainRenderBufferCompressionPreference(context->getDevice(0)->getHardwareInfo(), size)); checkMemory(memoryProperties, size, hostPtr, errcodeRet, alignementSatisfied, copyMemoryFromHostPtr, memoryManager); if (errcodeRet != CL_SUCCESS) { return nullptr; } if (allocationType == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) { zeroCopyAllowed = false; allocateMemory = true; } if (allocationType == GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY) { if (memoryProperties.flags.useHostPtr) { if (alignementSatisfied) { allocateMemory = false; zeroCopyAllowed = true; } else { zeroCopyAllowed = false; allocateMemory = true; } } } if (memoryProperties.flags.useHostPtr) { if (DebugManager.flags.DisableZeroCopyForUseHostPtr.get()) { zeroCopyAllowed = false; allocateMemory = true; } auto svmManager = context->getSVMAllocsManager(); if (svmManager) { auto svmData = svmManager->getSVMAlloc(hostPtr); if (svmData) { memory = svmData->gpuAllocation; allocationType = memory->getAllocationType(); isHostPtrSVM = true; zeroCopyAllowed = memory->getAllocationType() == GraphicsAllocation::AllocationType::SVM_ZERO_COPY; copyMemoryFromHostPtr = false; allocateMemory = false; mapAllocation = svmData->cpuAllocation; } } } if (context->isSharedContext) { zeroCopyAllowed = true; copyMemoryFromHostPtr = false; allocateMemory = false; } if (hostPtr && context->isProvidingPerformanceHints()) { if (zeroCopyAllowed) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_BUFFER_MEETS_ALIGNMENT_RESTRICTIONS, hostPtr, size); } else { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, hostPtr, size, MemoryConstants::pageSize, MemoryConstants::pageSize); } } if (DebugManager.flags.DisableZeroCopyForBuffers.get()) { zeroCopyAllowed = false; } if (allocateMemory && context->isProvidingPerformanceHints()) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_BUFFER_NEEDS_ALLOCATE_MEMORY); } if (!memory) { AllocationProperties allocProperties = MemoryPropertiesParser::getAllocationProperties(rootDeviceIndex, memoryProperties, allocateMemory, size, allocationType, context->areMultiStorageAllocationsPreferred(), context->getDevice(0)->getHardwareInfo()); memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties, hostPtr); } if (allocateMemory && memory && MemoryPool::isSystemMemoryPool(memory->getMemoryPool())) { memoryManager->addAllocationToHostPtrManager(memory); } //if allocation failed for CL_MEM_USE_HOST_PTR case retry with non zero copy path if (memoryProperties.flags.useHostPtr && !memory && Buffer::isReadOnlyMemoryPermittedByFlags(memoryProperties)) { allocationType = GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY; zeroCopyAllowed = false; copyMemoryFromHostPtr = true; AllocationProperties allocProperties = MemoryPropertiesParser::getAllocationProperties(rootDeviceIndex, memoryProperties, true, size, allocationType, context->areMultiStorageAllocationsPreferred(), context->getDevice(0)->getHardwareInfo()); memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties); } if (!memory) { errcodeRet = CL_OUT_OF_HOST_MEMORY; return nullptr; } if (!MemoryPool::isSystemMemoryPool(memory->getMemoryPool())) { zeroCopyAllowed = false; if (hostPtr) { if (!isHostPtrSVM) { copyMemoryFromHostPtr = true; } } } else if (allocationType == GraphicsAllocation::AllocationType::BUFFER) { allocationType = GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY; } memory->setAllocationType(allocationType); memory->setMemObjectsAllocationWithWritableFlags(!(memoryProperties.flags.readOnly || memoryProperties.flags.hostReadOnly || memoryProperties.flags.hostNoAccess)); pBuffer = createBufferHw(context, memoryProperties, flags, flagsIntel, size, memory->getUnderlyingBuffer(), (memoryProperties.flags.useHostPtr) ? hostPtr : nullptr, memory, zeroCopyAllowed, isHostPtrSVM, false); if (!pBuffer) { errcodeRet = CL_OUT_OF_HOST_MEMORY; memoryManager->removeAllocationFromHostPtrManager(memory); memoryManager->freeGraphicsMemory(memory); return nullptr; } printDebugString(DebugManager.flags.LogMemoryObject.get(), stdout, "\nCreated Buffer: Handle %p, hostPtr %p, size %llu, memoryStorage %p, GPU address %#llx, memoryPool:%du\n", pBuffer, hostPtr, size, memory->getUnderlyingBuffer(), memory->getGpuAddress(), memory->getMemoryPool()); if (memoryProperties.flags.useHostPtr) { if (!zeroCopyAllowed && !isHostPtrSVM) { AllocationProperties properties{rootDeviceIndex, false, size, GraphicsAllocation::AllocationType::MAP_ALLOCATION, false}; properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = true; mapAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, hostPtr); } } Buffer::provideCompressionHint(allocationType, context, pBuffer); pBuffer->mapAllocation = mapAllocation; pBuffer->setHostPtrMinSize(size); if (copyMemoryFromHostPtr) { auto gmm = memory->getDefaultGmm(); bool gpuCopyRequired = (gmm && gmm->isRenderCompressed) || !MemoryPool::isSystemMemoryPool(memory->getMemoryPool()); if (gpuCopyRequired) { auto blitMemoryToAllocationResult = context->blitMemoryToAllocation(*pBuffer, memory, hostPtr, {size, 1, 1}); if (blitMemoryToAllocationResult != BlitOperationResult::Success) { auto cmdQ = context->getSpecialQueue(); if (CL_SUCCESS != cmdQ->enqueueWriteBuffer(pBuffer, CL_TRUE, 0, size, hostPtr, nullptr, 0, nullptr, nullptr)) { errcodeRet = CL_OUT_OF_RESOURCES; } } } else { memcpy_s(memory->getUnderlyingBuffer(), size, hostPtr, size); } } if (errcodeRet != CL_SUCCESS) { pBuffer->release(); return nullptr; } if (DebugManager.flags.MakeAllBuffersResident.get()) { auto graphicsAllocation = pBuffer->getGraphicsAllocation(); context->getDevice(0u)->getRootDeviceEnvironment().memoryOperationsInterface->makeResident(ArrayRef(&graphicsAllocation, 1)); } return pBuffer; } Buffer *Buffer::createSharedBuffer(Context *context, cl_mem_flags flags, SharingHandler *sharingHandler, GraphicsAllocation *graphicsAllocation) { auto sharedBuffer = createBufferHw(context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, graphicsAllocation->getUnderlyingBufferSize(), nullptr, nullptr, graphicsAllocation, false, false, false); sharedBuffer->setSharingHandler(sharingHandler); return sharedBuffer; } void Buffer::checkMemory(MemoryPropertiesFlags memoryProperties, size_t size, void *hostPtr, cl_int &errcodeRet, bool &alignementSatisfied, bool ©MemoryFromHostPtr, MemoryManager *memoryManager) { errcodeRet = CL_SUCCESS; alignementSatisfied = true; copyMemoryFromHostPtr = false; uintptr_t minAddress = 0; auto memRestrictions = memoryManager->getAlignedMallocRestrictions(); if (memRestrictions) { minAddress = memRestrictions->minAddress; } if (hostPtr) { if (!(memoryProperties.flags.useHostPtr || memoryProperties.flags.copyHostPtr)) { errcodeRet = CL_INVALID_HOST_PTR; return; } } if (memoryProperties.flags.useHostPtr) { if (hostPtr) { auto fragment = memoryManager->getHostPtrManager()->getFragment(hostPtr); if (fragment && fragment->driverAllocation) { errcodeRet = CL_INVALID_HOST_PTR; return; } if (alignUp(hostPtr, MemoryConstants::cacheLineSize) != hostPtr || alignUp(size, MemoryConstants::cacheLineSize) != size || minAddress > reinterpret_cast(hostPtr)) { alignementSatisfied = false; copyMemoryFromHostPtr = true; } } else { errcodeRet = CL_INVALID_HOST_PTR; } } if (memoryProperties.flags.copyHostPtr) { if (hostPtr) { copyMemoryFromHostPtr = true; } else { errcodeRet = CL_INVALID_HOST_PTR; } } return; } GraphicsAllocation::AllocationType Buffer::getGraphicsAllocationType(const MemoryPropertiesFlags &properties, Context &context, bool renderCompressedBuffers, bool isLocalMemoryEnabled, bool preferCompression) { if (context.isSharedContext || properties.flags.forceSharedPhysicalMemory) { return GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY; } if (properties.flags.useHostPtr && !isLocalMemoryEnabled) { return GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY; } if (MemObjHelper::isSuitableForRenderCompression(renderCompressedBuffers, properties, context, preferCompression)) { return GraphicsAllocation::AllocationType::BUFFER_COMPRESSED; } return GraphicsAllocation::AllocationType::BUFFER; } bool Buffer::isReadOnlyMemoryPermittedByFlags(const MemoryPropertiesFlags &properties) { // Host won't access or will only read and kernel will only read return (properties.flags.hostNoAccess || properties.flags.hostReadOnly) && properties.flags.readOnly; } Buffer *Buffer::createSubBuffer(cl_mem_flags flags, cl_mem_flags_intel flagsIntel, const cl_buffer_region *region, cl_int &errcodeRet) { DEBUG_BREAK_IF(nullptr == createFunction); MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0); auto buffer = createFunction(this->context, memoryProperties, flags, 0, region->size, ptrOffset(this->memoryStorage, region->origin), this->hostPtr ? ptrOffset(this->hostPtr, region->origin) : nullptr, this->graphicsAllocation, this->isZeroCopy, this->isHostPtrSVM, false); if (this->context->isProvidingPerformanceHints()) { this->context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, SUBBUFFER_SHARES_MEMORY, static_cast(this)); } buffer->associatedMemObject = this; buffer->offset = region->origin; buffer->setParentSharingHandler(this->getSharingHandler()); this->incRefInternal(); errcodeRet = CL_SUCCESS; return buffer; } uint64_t Buffer::setArgStateless(void *memory, uint32_t patchSize, bool set32BitAddressing) { // Subbuffers have offset that graphicsAllocation is not aware of uintptr_t addressToPatch = ((set32BitAddressing) ? static_cast(graphicsAllocation->getGpuAddressToPatch()) : static_cast(graphicsAllocation->getGpuAddress())) + this->offset; DEBUG_BREAK_IF(!(graphicsAllocation->isLocked() || (addressToPatch != 0) || (graphicsAllocation->getGpuBaseAddress() != 0) || (this->getCpuAddress() == nullptr && this->getGraphicsAllocation()->peekSharedHandle()))); patchWithRequiredSize(memory, patchSize, addressToPatch); return addressToPatch; } bool Buffer::bufferRectPitchSet(const size_t *bufferOrigin, const size_t *region, size_t &bufferRowPitch, size_t &bufferSlicePitch, size_t &hostRowPitch, size_t &hostSlicePitch) { if (bufferRowPitch == 0) bufferRowPitch = region[0]; if (bufferSlicePitch == 0) bufferSlicePitch = region[1] * bufferRowPitch; if (hostRowPitch == 0) hostRowPitch = region[0]; if (hostSlicePitch == 0) hostSlicePitch = region[1] * hostRowPitch; if (bufferRowPitch < region[0] || hostRowPitch < region[0]) { return false; } if ((bufferSlicePitch < region[1] * bufferRowPitch || bufferSlicePitch % bufferRowPitch != 0) || (hostSlicePitch < region[1] * hostRowPitch || hostSlicePitch % hostRowPitch != 0)) { return false; } if ((bufferOrigin[2] + region[2] - 1) * bufferSlicePitch + (bufferOrigin[1] + region[1] - 1) * bufferRowPitch + bufferOrigin[0] + region[0] > this->getSize()) { return false; } return true; } void Buffer::transferData(void *dst, void *src, size_t copySize, size_t copyOffset) { DBG_LOG(LogMemoryObject, __FUNCTION__, " hostPtr: ", hostPtr, ", size: ", copySize, ", offset: ", copyOffset, ", memoryStorage: ", memoryStorage); auto dstPtr = ptrOffset(dst, copyOffset); auto srcPtr = ptrOffset(src, copyOffset); memcpy_s(dstPtr, copySize, srcPtr, copySize); } void Buffer::transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { transferData(hostPtr, memoryStorage, copySize[0], copyOffset[0]); } void Buffer::transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { transferData(memoryStorage, hostPtr, copySize[0], copyOffset[0]); } size_t Buffer::calculateHostPtrSize(const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch) { size_t hostPtrOffsetInBytes = origin[2] * slicePitch + origin[1] * rowPitch + origin[0]; size_t hostPtrRegionSizeInbytes = region[0] + rowPitch * (region[1] - 1) + slicePitch * (region[2] - 1); size_t hostPtrSize = hostPtrOffsetInBytes + hostPtrRegionSizeInbytes; return hostPtrSize; } bool Buffer::isReadWriteOnCpuAllowed() { if (forceDisallowCPUCopy) { return false; } if (this->isCompressed()) { return false; } if (graphicsAllocation->peekSharedHandle() != 0) { return false; } return true; } bool Buffer::isReadWriteOnCpuPreffered(void *ptr, size_t size) { if (MemoryPool::isSystemMemoryPool(graphicsAllocation->getMemoryPool())) { //if buffer is not zero copy and pointer is aligned it will be more beneficial to do the transfer on GPU if (!isMemObjZeroCopy() && (reinterpret_cast(ptr) & (MemoryConstants::cacheLineSize - 1)) == 0) { return false; } //on low power devices larger transfers are better on the GPU if (context->getDevice(0)->getDeviceInfo().platformLP && size > maxBufferSizeForReadWriteOnCpu) { return false; } return true; } return false; } Buffer *Buffer::createBufferHw(Context *context, MemoryPropertiesFlags memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, GraphicsAllocation *gfxAllocation, bool zeroCopy, bool isHostPtrSVM, bool isImageRedescribed) { const auto device = context->getDevice(0); const auto &hwInfo = device->getHardwareInfo(); auto funcCreate = bufferFactory[hwInfo.platform.eRenderCoreFamily].createBufferFunction; DEBUG_BREAK_IF(nullptr == funcCreate); auto pBuffer = funcCreate(context, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, gfxAllocation, zeroCopy, isHostPtrSVM, isImageRedescribed); DEBUG_BREAK_IF(nullptr == pBuffer); if (pBuffer) { pBuffer->createFunction = funcCreate; } return pBuffer; } Buffer *Buffer::createBufferHwFromDevice(const Device *device, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, GraphicsAllocation *gfxAllocation, size_t offset, bool zeroCopy, bool isHostPtrSVM, bool isImageRedescribed) { const auto &hwInfo = device->getHardwareInfo(); auto funcCreate = bufferFactory[hwInfo.platform.eRenderCoreFamily].createBufferFunction; DEBUG_BREAK_IF(nullptr == funcCreate); MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0); auto pBuffer = funcCreate(nullptr, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, gfxAllocation, zeroCopy, isHostPtrSVM, isImageRedescribed); pBuffer->offset = offset; pBuffer->executionEnvironment = device->getExecutionEnvironment(); pBuffer->rootDeviceEnvironment = pBuffer->executionEnvironment->rootDeviceEnvironments[device->getRootDeviceIndex()].get(); return pBuffer; } uint32_t Buffer::getMocsValue(bool disableL3Cache, bool isReadOnlyArgument) const { uint64_t bufferAddress = 0; size_t bufferSize = 0; if (getGraphicsAllocation()) { bufferAddress = getGraphicsAllocation()->getGpuAddress(); bufferSize = getGraphicsAllocation()->getUnderlyingBufferSize(); } else { bufferAddress = reinterpret_cast(getHostPtr()); bufferSize = getSize(); } bufferAddress += this->offset; bool readOnlyMemObj = isValueSet(getMemoryPropertiesFlags(), CL_MEM_READ_ONLY) || isReadOnlyArgument; bool alignedMemObj = isAligned(bufferAddress) && isAligned(bufferSize); auto gmmHelper = rootDeviceEnvironment->getGmmHelper(); if (!disableL3Cache && !isMemObjUncacheableForSurfaceState() && (alignedMemObj || readOnlyMemObj || !isMemObjZeroCopy())) { return gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); } else { return gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); } } bool Buffer::isCompressed() const { if (this->getGraphicsAllocation()->getDefaultGmm()) { return this->getGraphicsAllocation()->getDefaultGmm()->isRenderCompressed; } if (this->getGraphicsAllocation()->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) { return true; } return false; } void Buffer::setSurfaceState(const Device *device, void *surfaceState, size_t svmSize, void *svmPtr, size_t offset, GraphicsAllocation *gfxAlloc, cl_mem_flags flags, cl_mem_flags_intel flagsIntel) { auto buffer = Buffer::createBufferHwFromDevice(device, flags, flagsIntel, svmSize, svmPtr, svmPtr, gfxAlloc, offset, true, false, false); buffer->setArgStateful(surfaceState, false, false, false, false); buffer->graphicsAllocation = nullptr; delete buffer; } void Buffer::provideCompressionHint(GraphicsAllocation::AllocationType allocationType, Context *context, Buffer *buffer) { if (context->isProvidingPerformanceHints() && HwHelper::renderCompressedBuffersSupported(context->getDevice(0)->getHardwareInfo())) { if (allocationType == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, BUFFER_IS_COMPRESSED, buffer); } else { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, BUFFER_IS_NOT_COMPRESSED, buffer); } } } } // namespace NEO compute-runtime-20.13.16352/opencl/source/mem_obj/buffer.h000066400000000000000000000235651363734646600232020ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/basic_math.h" #include "shared/source/memory_manager/memory_constants.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/context/context_type.h" #include "opencl/source/mem_obj/mem_obj.h" #include "igfxfmid.h" #include "memory_properties_flags.h" namespace NEO { class Device; class Buffer; class ClDevice; class MemoryManager; typedef Buffer *(*BufferCreatFunc)(Context *context, MemoryPropertiesFlags memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, GraphicsAllocation *gfxAllocation, bool zeroCopy, bool isHostPtrSVM, bool isImageRedescribed); typedef struct { BufferCreatFunc createBufferFunction; } BufferFuncs; extern BufferFuncs bufferFactory[IGFX_MAX_CORE]; class Buffer : public MemObj { public: constexpr static size_t maxBufferSizeForReadWriteOnCpu = 10 * MB; constexpr static cl_ulong maskMagic = 0xFFFFFFFFFFFFFFFFLL; constexpr static cl_ulong objectMagic = MemObj::objectMagic | 0x02; bool forceDisallowCPUCopy = false; ~Buffer() override; static void validateInputAndCreateBuffer(cl_context &context, MemoryPropertiesFlags memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *hostPtr, cl_int &retVal, cl_mem &buffer); static Buffer *create(Context *context, cl_mem_flags flags, size_t size, void *hostPtr, cl_int &errcodeRet); static Buffer *create(Context *context, MemoryPropertiesFlags properties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *hostPtr, cl_int &errcodeRet); static Buffer *createSharedBuffer(Context *context, cl_mem_flags flags, SharingHandler *sharingHandler, GraphicsAllocation *graphicsAllocation); static Buffer *createBufferHw(Context *context, MemoryPropertiesFlags memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, GraphicsAllocation *gfxAllocation, bool zeroCopy, bool isHostPtrSVM, bool isImageRedescribed); static Buffer *createBufferHwFromDevice(const Device *device, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, GraphicsAllocation *gfxAllocation, size_t offset, bool zeroCopy, bool isHostPtrSVM, bool isImageRedescribed); Buffer *createSubBuffer(cl_mem_flags flags, cl_mem_flags_intel flagsIntel, const cl_buffer_region *region, cl_int &errcodeRet); static void setSurfaceState(const Device *device, void *surfaceState, size_t svmSize, void *svmPtr, size_t offset, GraphicsAllocation *gfxAlloc, cl_mem_flags flags, cl_mem_flags_intel flagsIntel); static void provideCompressionHint(GraphicsAllocation::AllocationType allocationType, Context *context, Buffer *buffer); BufferCreatFunc createFunction = nullptr; bool isSubBuffer(); bool isValidSubBufferOffset(size_t offset); uint64_t setArgStateless(void *memory, uint32_t patchSize) { return setArgStateless(memory, patchSize, false); } uint64_t setArgStateless(void *memory, uint32_t patchSize, bool set32BitAddressing); virtual void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly) = 0; bool bufferRectPitchSet(const size_t *bufferOrigin, const size_t *region, size_t &bufferRowPitch, size_t &bufferSlicePitch, size_t &hostRowPitch, size_t &hostSlicePitch); static size_t calculateHostPtrSize(const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch); void transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override; void transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override; bool isReadWriteOnCpuAllowed(); bool isReadWriteOnCpuPreffered(void *ptr, size_t size); uint32_t getMocsValue(bool disableL3Cache, bool isReadOnlyArgument) const; bool isCompressed() const; protected: Buffer(Context *context, MemoryPropertiesFlags memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, GraphicsAllocation *gfxAllocation, bool zeroCopy, bool isHostPtrSVM, bool isObjectRedescribed); Buffer(); static void checkMemory(MemoryPropertiesFlags memoryProperties, size_t size, void *hostPtr, cl_int &errcodeRet, bool &isZeroCopy, bool ©MemoryFromHostPtr, MemoryManager *memMngr); static GraphicsAllocation::AllocationType getGraphicsAllocationType(const MemoryPropertiesFlags &properties, Context &context, bool renderCompressedBuffers, bool localMemoryEnabled, bool preferCompression); static bool isReadOnlyMemoryPermittedByFlags(const MemoryPropertiesFlags &properties); void transferData(void *dst, void *src, size_t copySize, size_t copyOffset); }; template class BufferHw : public Buffer { public: BufferHw(Context *context, MemoryPropertiesFlags memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, GraphicsAllocation *gfxAllocation, bool zeroCopy, bool isHostPtrSVM, bool isObjectRedescribed) : Buffer(context, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, gfxAllocation, zeroCopy, isHostPtrSVM, isObjectRedescribed) {} void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnlyArgument) override; void appendBufferState(void *memory, Context *context, GraphicsAllocation *gfxAllocation, bool isReadOnlyArgument); void appendSurfaceStateExt(void *memory); static Buffer *create(Context *context, MemoryPropertiesFlags memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, GraphicsAllocation *gfxAllocation, bool zeroCopy, bool isHostPtrSVM, bool isObjectRedescribed) { auto buffer = new BufferHw(context, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, gfxAllocation, zeroCopy, isHostPtrSVM, isObjectRedescribed); buffer->surfaceType = SURFACE_STATE::SURFACE_TYPE_SURFTYPE_1D; return buffer; } typedef typename GfxFamily::RENDER_SURFACE_STATE SURFACE_STATE; typename SURFACE_STATE::SURFACE_TYPE surfaceType; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/mem_obj/buffer_base.inl000066400000000000000000000074231363734646600245220ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/bit_helpers.h" #include "shared/source/helpers/hw_cmds.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/mem_obj/buffer.h" #include "buffer_ext.inl" namespace NEO { union SURFACE_STATE_BUFFER_LENGTH { uint32_t Length; struct SurfaceState { uint32_t Width : BITFIELD_RANGE(0, 6); uint32_t Height : BITFIELD_RANGE(7, 20); uint32_t Depth : BITFIELD_RANGE(21, 31); } SurfaceState; }; template void BufferHw::setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnlyArgument) { using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; auto surfaceState = reinterpret_cast(memory); // The graphics allocation for Host Ptr surface will be created in makeResident call and GPU address is expected to be the same as CPU address auto bufferAddress = (getGraphicsAllocation() != nullptr) ? getGraphicsAllocation()->getGpuAddress() : castToUint64(getHostPtr()); bufferAddress += this->offset; auto bufferAddressAligned = alignDown(bufferAddress, 4); auto bufferOffset = ptrDiff(bufferAddress, bufferAddressAligned); auto surfaceSize = alignUp(getSize() + bufferOffset, alignSizeForAuxTranslation ? 512 : 4); SURFACE_STATE_BUFFER_LENGTH Length = {0}; Length.Length = static_cast(surfaceSize - 1); surfaceState->setWidth(Length.SurfaceState.Width + 1); surfaceState->setHeight(Length.SurfaceState.Height + 1); surfaceState->setDepth(Length.SurfaceState.Depth + 1); if (bufferAddress != 0) { surfaceState->setSurfaceType(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER); } else { surfaceState->setSurfaceType(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL); } surfaceState->setSurfaceFormat(SURFACE_FORMAT::SURFACE_FORMAT_RAW); surfaceState->setSurfaceVerticalAlignment(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4); surfaceState->setSurfaceHorizontalAlignment(RENDER_SURFACE_STATE::SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_4); surfaceState->setTileMode(RENDER_SURFACE_STATE::TILE_MODE_LINEAR); surfaceState->setVerticalLineStride(0); surfaceState->setVerticalLineStrideOffset(0); surfaceState->setMemoryObjectControlState(getMocsValue(disableL3, isReadOnlyArgument)); surfaceState->setSurfaceBaseAddress(bufferAddressAligned); Gmm *gmm = graphicsAllocation ? graphicsAllocation->getDefaultGmm() : nullptr; if (gmm && gmm->isRenderCompressed && !forceNonAuxMode && GraphicsAllocation::AllocationType::BUFFER_COMPRESSED == graphicsAllocation->getAllocationType()) { // Its expected to not program pitch/qpitch/baseAddress for Aux surface in CCS scenarios surfaceState->setCoherencyType(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT); surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E); } else { surfaceState->setCoherencyType(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT); surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); } appendBufferState(memory, context, getGraphicsAllocation(), isReadOnlyArgument); appendSurfaceStateExt(memory); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/mem_obj/buffer_bdw_plus.inl000066400000000000000000000005241363734646600254220ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/mem_obj/buffer_base.inl" namespace NEO { template void BufferHw::appendBufferState(void *memory, Context *context, GraphicsAllocation *gfxAllocation, bool isReadOnly) { } } // namespace NEO compute-runtime-20.13.16352/opencl/source/mem_obj/buffer_factory_init.inl000066400000000000000000000004321363734646600262730ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ template <> void populateFactoryTable>() { extern BufferFuncs bufferFactory[IGFX_MAX_CORE]; bufferFactory[gfxCore].createBufferFunction = BufferHw::create; } compute-runtime-20.13.16352/opencl/source/mem_obj/definitions/000077500000000000000000000000001363734646600240605ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/mem_obj/definitions/buffer_ext.inl000066400000000000000000000004131363734646600267130ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/mem_obj/buffer.h" namespace NEO { template void BufferHw::appendSurfaceStateExt(void *memory) { } } // namespace NEO compute-runtime-20.13.16352/opencl/source/mem_obj/definitions/image_ext.inl000066400000000000000000000004111363734646600265220ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/mem_obj/image.h" namespace NEO { template void ImageHw::appendSurfaceStateExt(void *memory) { } } // namespace NEO compute-runtime-20.13.16352/opencl/source/mem_obj/image.cpp000066400000000000000000001655431363734646600233510ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/mem_obj/image.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/utilities/compiler_support.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/cl_device/cl_device_get_cap.inl" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/get_info_status_mapper.h" #include "opencl/source/helpers/gmm_types_converter.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/helpers/mipmap.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/mem_obj_helper.h" #include "opencl/source/platform/platform.h" #include "igfxfmid.h" #include namespace NEO { ImageFuncs imageFactory[IGFX_MAX_CORE] = {}; Image::Image(Context *context, const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *hostPtr, cl_image_format imageFormat, const cl_image_desc &imageDesc, bool zeroCopy, GraphicsAllocation *graphicsAllocation, bool isObjectRedescribed, uint32_t baseMipLevel, uint32_t mipCount, const ClSurfaceFormatInfo &surfaceFormatInfo, const SurfaceOffsets *surfaceOffsets) : MemObj(context, imageDesc.image_type, memoryProperties, flags, flagsIntel, size, graphicsAllocation->getUnderlyingBuffer(), hostPtr, graphicsAllocation, zeroCopy, false, isObjectRedescribed), createFunction(nullptr), imageFormat(std::move(imageFormat)), imageDesc(imageDesc), surfaceFormatInfo(surfaceFormatInfo), cubeFaceIndex(__GMM_NO_CUBE_MAP), mediaPlaneType(0), baseMipLevel(baseMipLevel), mipCount(mipCount) { magic = objectMagic; if (surfaceOffsets) setSurfaceOffsets(surfaceOffsets->offset, surfaceOffsets->xOffset, surfaceOffsets->yOffset, surfaceOffsets->yOffsetForUVplane); else setSurfaceOffsets(0, 0, 0, 0); } void Image::transferData(void *dest, size_t destRowPitch, size_t destSlicePitch, void *src, size_t srcRowPitch, size_t srcSlicePitch, std::array copyRegion, std::array copyOrigin) { size_t pixelSize = surfaceFormatInfo.surfaceFormat.ImageElementSizeInBytes; size_t lineWidth = copyRegion[0] * pixelSize; DBG_LOG(LogMemoryObject, __FUNCTION__, "memcpy dest:", dest, "sizeRowToCopy:", lineWidth, "src:", src); if (imageDesc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { // For 1DArray type, array region and origin are stored on 2nd position. For 2Darray its on 3rd position. std::swap(copyOrigin[1], copyOrigin[2]); std::swap(copyRegion[1], copyRegion[2]); } for (size_t slice = copyOrigin[2]; slice < (copyOrigin[2] + copyRegion[2]); slice++) { auto srcSliceOffset = ptrOffset(src, srcSlicePitch * slice); auto dstSliceOffset = ptrOffset(dest, destSlicePitch * slice); for (size_t height = copyOrigin[1]; height < (copyOrigin[1] + copyRegion[1]); height++) { auto srcRowOffset = ptrOffset(srcSliceOffset, srcRowPitch * height); auto dstRowOffset = ptrOffset(dstSliceOffset, destRowPitch * height); memcpy_s(ptrOffset(dstRowOffset, copyOrigin[0] * pixelSize), lineWidth, ptrOffset(srcRowOffset, copyOrigin[0] * pixelSize), lineWidth); } } } Image::~Image() = default; Image *Image::create(Context *context, const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, const ClSurfaceFormatInfo *surfaceFormat, const cl_image_desc *imageDesc, const void *hostPtr, cl_int &errcodeRet) { UNRECOVERABLE_IF(surfaceFormat == nullptr); Image *image = nullptr; GraphicsAllocation *memory = nullptr; GraphicsAllocation *mapAllocation = nullptr; MemoryManager *memoryManager = context->getMemoryManager(); Buffer *parentBuffer = castToObject(imageDesc->mem_object); Image *parentImage = castToObject(imageDesc->mem_object); auto &hwHelper = HwHelper::get(context->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily); auto rootDeviceIndex = context->getDevice(0)->getRootDeviceIndex(); auto clientContext = context->getDevice(0)->getRootDeviceEnvironment().getGmmClientContext(); do { size_t imageWidth = imageDesc->image_width; size_t imageHeight = 1; size_t imageDepth = 1; size_t imageCount = 1; size_t hostPtrMinSize = 0; cl_image_desc imageDescriptor = *imageDesc; ImageInfo imgInfo = {}; void *hostPtrToSet = nullptr; if (memoryProperties.flags.useHostPtr) { hostPtrToSet = const_cast(hostPtr); } imgInfo.imgDesc = Image::convertDescriptor(imageDescriptor); imgInfo.surfaceFormat = &surfaceFormat->surfaceFormat; imgInfo.mipCount = imageDesc->num_mip_levels; Gmm *gmm = nullptr; if (imageDesc->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || imageDesc->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { imageCount = imageDesc->image_array_size; } switch (imageDesc->image_type) { case CL_MEM_OBJECT_IMAGE3D: imageDepth = imageDesc->image_depth; CPP_ATTRIBUTE_FALLTHROUGH; case CL_MEM_OBJECT_IMAGE2D: case CL_MEM_OBJECT_IMAGE2D_ARRAY: imageHeight = imageDesc->image_height; case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE1D_ARRAY: case CL_MEM_OBJECT_IMAGE1D_BUFFER: break; default: DEBUG_BREAK_IF("Unsupported cl_image_type"); break; } if (parentImage) { imageWidth = parentImage->getImageDesc().image_width; imageHeight = parentImage->getImageDesc().image_height; imageDepth = 1; if (IsNV12Image(&parentImage->getImageFormat())) { if (imageDesc->image_depth == 1) { // UV Plane imageWidth /= 2; imageHeight /= 2; imgInfo.plane = GMM_PLANE_U; } else { imgInfo.plane = GMM_PLANE_Y; } } imgInfo.surfaceFormat = &parentImage->surfaceFormatInfo.surfaceFormat; imageDescriptor = parentImage->getImageDesc(); } auto hostPtrRowPitch = imageDesc->image_row_pitch ? imageDesc->image_row_pitch : imageWidth * surfaceFormat->surfaceFormat.ImageElementSizeInBytes; auto hostPtrSlicePitch = imageDesc->image_slice_pitch ? imageDesc->image_slice_pitch : hostPtrRowPitch * imageHeight; imgInfo.linearStorage = !hwHelper.tilingAllowed(context->isSharedContext, Image::isImage1d(*imageDesc), memoryProperties.flags.forceLinearStorage); imgInfo.preferRenderCompression = MemObjHelper::isSuitableForRenderCompression(!imgInfo.linearStorage, memoryProperties, *context, true); imgInfo.preferRenderCompression &= !Image::isFormatRedescribable(surfaceFormat->OCLImageFormat); if (!context->getDevice(0)->getSharedDeviceInfo().imageSupport && !imgInfo.linearStorage) { errcodeRet = CL_INVALID_OPERATION; return nullptr; } switch (imageDesc->image_type) { case CL_MEM_OBJECT_IMAGE3D: hostPtrMinSize = hostPtrSlicePitch * imageDepth; break; case CL_MEM_OBJECT_IMAGE2D: if (IsNV12Image(&surfaceFormat->OCLImageFormat)) { hostPtrMinSize = hostPtrRowPitch * imageHeight + hostPtrRowPitch * imageHeight / 2; } else { hostPtrMinSize = hostPtrRowPitch * imageHeight; } hostPtrSlicePitch = 0; break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: case CL_MEM_OBJECT_IMAGE2D_ARRAY: hostPtrMinSize = hostPtrSlicePitch * imageCount; break; case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE1D_BUFFER: hostPtrMinSize = hostPtrRowPitch; hostPtrSlicePitch = 0; break; default: DEBUG_BREAK_IF("Unsupported cl_image_type"); break; } bool zeroCopy = false; bool transferNeeded = false; if (((imageDesc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) || (imageDesc->image_type == CL_MEM_OBJECT_IMAGE2D)) && (parentBuffer != nullptr)) { if (!hwHelper.checkResourceCompatibility(*parentBuffer->getGraphicsAllocation())) { errcodeRet = CL_INVALID_MEM_OBJECT; return nullptr; } memory = parentBuffer->getGraphicsAllocation(); // Image from buffer - we never allocate memory, we use what buffer provides zeroCopy = true; hostPtr = parentBuffer->getHostPtr(); hostPtrToSet = const_cast(hostPtr); parentBuffer->incRefInternal(); GmmTypesConverter::queryImgFromBufferParams(imgInfo, memory); UNRECOVERABLE_IF(imgInfo.offset != 0); imgInfo.offset = parentBuffer->getOffset(); if (memoryManager->peekVirtualPaddingSupport() && (imageDesc->image_type == CL_MEM_OBJECT_IMAGE2D)) { // Retrieve sizes from GMM and apply virtual padding if buffer storage is not big enough auto queryGmmImgInfo(imgInfo); auto gmm = std::make_unique(clientContext, queryGmmImgInfo, StorageInfo{}); auto gmmAllocationSize = gmm->gmmResourceInfo->getSizeAllocation(); if (gmmAllocationSize > memory->getUnderlyingBufferSize()) { memory = memoryManager->createGraphicsAllocationWithPadding(memory, gmmAllocationSize); } } } else if (parentImage != nullptr) { memory = parentImage->getGraphicsAllocation(); memory->getDefaultGmm()->queryImageParams(imgInfo); } else { errcodeRet = CL_OUT_OF_HOST_MEMORY; if (memoryProperties.flags.useHostPtr) { if (!context->isSharedContext) { AllocationProperties allocProperties = MemObjHelper::getAllocationPropertiesWithImageInfo(rootDeviceIndex, imgInfo, false, memoryProperties, context->getDevice(0)->getHardwareInfo()); memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties, hostPtr); if (memory) { if (memory->getUnderlyingBuffer() != hostPtr) { zeroCopy = false; transferNeeded = true; } else { zeroCopy = true; } } } else { gmm = new Gmm(clientContext, imgInfo, StorageInfo{}); memory = memoryManager->allocateGraphicsMemoryWithProperties({rootDeviceIndex, false, imgInfo.size, GraphicsAllocation::AllocationType::SHARED_CONTEXT_IMAGE, false}, hostPtr); memory->setDefaultGmm(gmm); zeroCopy = true; } if (memory) { AllocationProperties properties{rootDeviceIndex, false, hostPtrMinSize, GraphicsAllocation::AllocationType::MAP_ALLOCATION, false}; properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = true; mapAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, hostPtr); } } else { AllocationProperties allocProperties = MemObjHelper::getAllocationPropertiesWithImageInfo(rootDeviceIndex, imgInfo, true, memoryProperties, context->getDevice(0)->getHardwareInfo()); memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties); if (memory && MemoryPool::isSystemMemoryPool(memory->getMemoryPool())) { zeroCopy = true; } } } transferNeeded |= memoryProperties.flags.copyHostPtr; if (!memory) { break; } if (parentBuffer == nullptr) { memory->setAllocationType(GraphicsAllocation::AllocationType::IMAGE); } memory->setMemObjectsAllocationWithWritableFlags(!memoryProperties.flags.readOnly && !memoryProperties.flags.hostReadOnly && !memoryProperties.flags.hostNoAccess); DBG_LOG(LogMemoryObject, __FUNCTION__, "hostPtr:", hostPtr, "size:", memory->getUnderlyingBufferSize(), "memoryStorage:", memory->getUnderlyingBuffer(), "GPU address:", std::hex, memory->getGpuAddress()); if (parentImage) { imageDescriptor.image_height = imageHeight; imageDescriptor.image_width = imageWidth; imageDescriptor.image_type = CL_MEM_OBJECT_IMAGE2D; imageDescriptor.image_depth = 1; imageDescriptor.image_array_size = 0; imageDescriptor.image_row_pitch = 0; imageDescriptor.image_slice_pitch = 0; imageDescriptor.mem_object = imageDesc->mem_object; parentImage->incRefInternal(); imgInfo.imgDesc = Image::convertDescriptor(imageDescriptor); } image = createImageHw(context, memoryProperties, flags, flagsIntel, imgInfo.size, hostPtrToSet, surfaceFormat->OCLImageFormat, imageDescriptor, zeroCopy, memory, false, 0, 0, surfaceFormat); if (context->isProvidingPerformanceHints() && HwHelper::renderCompressedImagesSupported(context->getDevice(0)->getHardwareInfo())) { if (memory->getDefaultGmm()) { if (memory->getDefaultGmm()->isRenderCompressed) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, IMAGE_IS_COMPRESSED, image); } else { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, IMAGE_IS_NOT_COMPRESSED, image); } } } if (imageDesc->image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY && imageDesc->image_type != CL_MEM_OBJECT_IMAGE2D_ARRAY) { image->imageDesc.image_array_size = 0; } if ((imageDesc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) || ((imageDesc->image_type == CL_MEM_OBJECT_IMAGE2D) && (imageDesc->mem_object != nullptr))) { image->associatedMemObject = castToObject(imageDesc->mem_object); } // Driver needs to store rowPitch passed by the app in order to synchronize the host_ptr later on map call image->setHostPtrRowPitch(imageDesc->image_row_pitch ? imageDesc->image_row_pitch : hostPtrRowPitch); image->setHostPtrSlicePitch(hostPtrSlicePitch); image->setImageCount(imageCount); image->setHostPtrMinSize(hostPtrMinSize); image->setImageRowPitch(imgInfo.rowPitch); image->setImageSlicePitch(imgInfo.slicePitch); image->setQPitch(imgInfo.qPitch); image->setSurfaceOffsets(imgInfo.offset, imgInfo.xOffset, imgInfo.yOffset, imgInfo.yOffsetForUVPlane); image->setMipCount(imgInfo.mipCount); if (parentImage) { image->setMediaPlaneType(static_cast(imageDesc->image_depth)); image->setParentSharingHandler(parentImage->getSharingHandler()); } if (parentBuffer) { image->setParentSharingHandler(parentBuffer->getSharingHandler()); } errcodeRet = CL_SUCCESS; if (context->isProvidingPerformanceHints() && image->isMemObjZeroCopy()) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_IMAGE_MEETS_ALIGNMENT_RESTRICTIONS, static_cast(image)); } if (transferNeeded) { std::array copyOrigin = {{0, 0, 0}}; std::array copyRegion = {{imageWidth, imageHeight, std::max(imageDepth, imageCount)}}; if (imageDesc->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { copyRegion = {{imageWidth, imageCount, 1}}; } else { copyRegion = {{imageWidth, imageHeight, std::max(imageDepth, imageCount)}}; } if (!imgInfo.linearStorage || !MemoryPool::isSystemMemoryPool(memory->getMemoryPool())) { auto cmdQ = context->getSpecialQueue(); if (IsNV12Image(&image->getImageFormat())) { errcodeRet = image->writeNV12Planes(hostPtr, hostPtrRowPitch); } else { errcodeRet = cmdQ->enqueueWriteImage(image, CL_TRUE, ©Origin[0], ©Region[0], hostPtrRowPitch, hostPtrSlicePitch, hostPtr, mapAllocation, 0, nullptr, nullptr); } } else { image->transferData(memory->getUnderlyingBuffer(), imgInfo.rowPitch, imgInfo.slicePitch, const_cast(hostPtr), hostPtrRowPitch, hostPtrSlicePitch, copyRegion, copyOrigin); } } image->mapAllocation = mapAllocation; if (errcodeRet != CL_SUCCESS) { image->release(); image = nullptr; memory = nullptr; break; } } while (false); return image; } Image *Image::createImageHw(Context *context, const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *hostPtr, const cl_image_format &imageFormat, const cl_image_desc &imageDesc, bool zeroCopy, GraphicsAllocation *graphicsAllocation, bool isObjectRedescribed, uint32_t baseMipLevel, uint32_t mipCount, const ClSurfaceFormatInfo *surfaceFormatInfo) { const auto device = context->getDevice(0); const auto &hwInfo = device->getHardwareInfo(); auto funcCreate = imageFactory[hwInfo.platform.eRenderCoreFamily].createImageFunction; DEBUG_BREAK_IF(nullptr == funcCreate); auto image = funcCreate(context, memoryProperties, flags, flagsIntel, size, hostPtr, imageFormat, imageDesc, zeroCopy, graphicsAllocation, isObjectRedescribed, baseMipLevel, mipCount, surfaceFormatInfo, nullptr); DEBUG_BREAK_IF(nullptr == image); image->createFunction = funcCreate; return image; } Image *Image::createSharedImage(Context *context, SharingHandler *sharingHandler, const McsSurfaceInfo &mcsSurfaceInfo, GraphicsAllocation *graphicsAllocation, GraphicsAllocation *mcsAllocation, cl_mem_flags flags, const ClSurfaceFormatInfo *surfaceFormat, ImageInfo &imgInfo, uint32_t cubeFaceIndex, uint32_t baseMipLevel, uint32_t mipCount) { auto sharedImage = createImageHw(context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, graphicsAllocation->getUnderlyingBufferSize(), nullptr, surfaceFormat->OCLImageFormat, Image::convertDescriptor(imgInfo.imgDesc), false, graphicsAllocation, false, baseMipLevel, mipCount, surfaceFormat); sharedImage->setSharingHandler(sharingHandler); sharedImage->setMcsAllocation(mcsAllocation); sharedImage->setQPitch(imgInfo.qPitch); sharedImage->setHostPtrRowPitch(imgInfo.imgDesc.imageRowPitch); sharedImage->setHostPtrSlicePitch(imgInfo.imgDesc.imageSlicePitch); sharedImage->setCubeFaceIndex(cubeFaceIndex); sharedImage->setSurfaceOffsets(imgInfo.offset, imgInfo.xOffset, imgInfo.yOffset, imgInfo.yOffsetForUVPlane); sharedImage->setMcsSurfaceInfo(mcsSurfaceInfo); return sharedImage; } cl_int Image::validate(Context *context, const MemoryPropertiesFlags &memoryProperties, const ClSurfaceFormatInfo *surfaceFormat, const cl_image_desc *imageDesc, const void *hostPtr) { auto pClDevice = context->getDevice(0); size_t srcSize = 0; size_t retSize = 0; const size_t *maxWidth = nullptr; const size_t *maxHeight = nullptr; const uint32_t *pitchAlignment = nullptr; const uint32_t *baseAddressAlignment = nullptr; if (!surfaceFormat) { return CL_IMAGE_FORMAT_NOT_SUPPORTED; } Image *parentImage = castToObject(imageDesc->mem_object); Buffer *parentBuffer = castToObject(imageDesc->mem_object); if (imageDesc->image_type == CL_MEM_OBJECT_IMAGE2D) { pClDevice->getCap(reinterpret_cast(maxWidth), srcSize, retSize); pClDevice->getCap(reinterpret_cast(maxHeight), srcSize, retSize); if (imageDesc->image_width > *maxWidth || imageDesc->image_height > *maxHeight) { return CL_INVALID_IMAGE_SIZE; } if (parentBuffer) { // Image 2d from buffer pClDevice->getCap(reinterpret_cast(pitchAlignment), srcSize, retSize); pClDevice->getCap(reinterpret_cast(baseAddressAlignment), srcSize, retSize); const auto rowSize = imageDesc->image_row_pitch != 0 ? imageDesc->image_row_pitch : alignUp(imageDesc->image_width * surfaceFormat->surfaceFormat.NumChannels * surfaceFormat->surfaceFormat.PerChannelSizeInBytes, *pitchAlignment); const auto minimumBufferSize = imageDesc->image_height * rowSize; if ((imageDesc->image_row_pitch % (*pitchAlignment)) || ((parentBuffer->getMemoryPropertiesFlags() & CL_MEM_USE_HOST_PTR) && (reinterpret_cast(parentBuffer->getHostPtr()) % (*baseAddressAlignment))) || (minimumBufferSize > parentBuffer->getSize())) { return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; } else if (memoryProperties.flags.useHostPtr || memoryProperties.flags.copyHostPtr) { return CL_INVALID_VALUE; } } if (parentImage && !IsNV12Image(&parentImage->getImageFormat())) { // Image 2d from image 2d if (!parentImage->hasSameDescriptor(*imageDesc) || !parentImage->hasValidParentImageFormat(surfaceFormat->OCLImageFormat)) { return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; } } if (!(parentImage && IsNV12Image(&parentImage->getImageFormat())) && (imageDesc->image_width == 0 || imageDesc->image_height == 0)) { return CL_INVALID_IMAGE_DESCRIPTOR; } } if (hostPtr == nullptr) { if (imageDesc->image_row_pitch != 0 && imageDesc->mem_object == nullptr) { return CL_INVALID_IMAGE_DESCRIPTOR; } } else { if (imageDesc->image_row_pitch != 0) { if (imageDesc->image_row_pitch % surfaceFormat->surfaceFormat.ImageElementSizeInBytes != 0 || imageDesc->image_row_pitch < imageDesc->image_width * surfaceFormat->surfaceFormat.ImageElementSizeInBytes) { return CL_INVALID_IMAGE_DESCRIPTOR; } } } if (parentBuffer && imageDesc->image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER && imageDesc->image_type != CL_MEM_OBJECT_IMAGE2D) { return CL_INVALID_IMAGE_DESCRIPTOR; } if (parentImage && imageDesc->image_type != CL_MEM_OBJECT_IMAGE2D) { return CL_INVALID_IMAGE_DESCRIPTOR; } return validateImageTraits(context, memoryProperties, &surfaceFormat->OCLImageFormat, imageDesc, hostPtr); } cl_int Image::validateImageFormat(const cl_image_format *imageFormat) { if (!imageFormat) { return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; } bool isValidFormat = isValidSingleChannelFormat(imageFormat) || isValidIntensityFormat(imageFormat) || isValidLuminanceFormat(imageFormat) || isValidDepthFormat(imageFormat) || isValidDoubleChannelFormat(imageFormat) || isValidTripleChannelFormat(imageFormat) || isValidRGBAFormat(imageFormat) || isValidSRGBFormat(imageFormat) || isValidARGBFormat(imageFormat) || isValidDepthStencilFormat(imageFormat) || isValidYUVFormat(imageFormat); if (isValidFormat) { return CL_SUCCESS; } return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; } cl_int Image::validatePlanarYUV(Context *context, const MemoryPropertiesFlags &memoryProperties, const cl_image_desc *imageDesc, const void *hostPtr) { cl_int errorCode = CL_SUCCESS; auto pClDevice = context->getDevice(0); const size_t *maxWidth = nullptr; const size_t *maxHeight = nullptr; size_t srcSize = 0; size_t retSize = 0; while (true) { Image *memObject = castToObject(imageDesc->mem_object); if (memObject != nullptr) { if (memObject->memObjectType == CL_MEM_OBJECT_IMAGE2D) { if (imageDesc->image_depth != 1 && imageDesc->image_depth != 0) { errorCode = CL_INVALID_IMAGE_DESCRIPTOR; } } break; } if (imageDesc->mem_object != nullptr) { errorCode = CL_INVALID_IMAGE_DESCRIPTOR; break; } if (!memoryProperties.flags.hostNoAccess) { errorCode = CL_INVALID_VALUE; break; } else { if (imageDesc->image_height % 4 || imageDesc->image_width % 4 || imageDesc->image_type != CL_MEM_OBJECT_IMAGE2D) { errorCode = CL_INVALID_IMAGE_DESCRIPTOR; break; } } pClDevice->getCap(reinterpret_cast(maxWidth), srcSize, retSize); pClDevice->getCap(reinterpret_cast(maxHeight), srcSize, retSize); if (imageDesc->image_width > *maxWidth || imageDesc->image_height > *maxHeight) { errorCode = CL_INVALID_IMAGE_SIZE; break; } break; } return errorCode; } cl_int Image::validatePackedYUV(const MemoryPropertiesFlags &memoryProperties, const cl_image_desc *imageDesc) { cl_int errorCode = CL_SUCCESS; while (true) { if (!memoryProperties.flags.readOnly) { errorCode = CL_INVALID_VALUE; break; } else { if (imageDesc->image_width % 2 != 0 || imageDesc->image_type != CL_MEM_OBJECT_IMAGE2D) { errorCode = CL_INVALID_IMAGE_DESCRIPTOR; break; } } break; } return errorCode; } cl_int Image::validateImageTraits(Context *context, const MemoryPropertiesFlags &memoryProperties, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, const void *hostPtr) { if (IsNV12Image(imageFormat)) return validatePlanarYUV(context, memoryProperties, imageDesc, hostPtr); else if (IsPackedYuvImage(imageFormat)) return validatePackedYUV(memoryProperties, imageDesc); return CL_SUCCESS; } size_t Image::calculateHostPtrSize(const size_t *region, size_t rowPitch, size_t slicePitch, size_t pixelSize, uint32_t imageType) { DEBUG_BREAK_IF(!((rowPitch != 0) && (slicePitch != 0))); size_t sizeToReturn = 0u; switch (imageType) { case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE1D_BUFFER: sizeToReturn = region[0] * pixelSize; break; case CL_MEM_OBJECT_IMAGE2D: sizeToReturn = (region[1] - 1) * rowPitch + region[0] * pixelSize; break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: sizeToReturn = (region[1] - 1) * slicePitch + region[0] * pixelSize; break; case CL_MEM_OBJECT_IMAGE3D: case CL_MEM_OBJECT_IMAGE2D_ARRAY: sizeToReturn = (region[2] - 1) * slicePitch + (region[1] - 1) * rowPitch + region[0] * pixelSize; break; default: DEBUG_BREAK_IF("Unsupported cl_image_type"); break; } DEBUG_BREAK_IF(sizeToReturn == 0); return sizeToReturn; } void Image::calculateHostPtrOffset(size_t *imageOffset, const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch, uint32_t imageType, size_t bytesPerPixel) { size_t computedImageRowPitch = rowPitch ? rowPitch : region[0] * bytesPerPixel; size_t computedImageSlicePitch = slicePitch ? slicePitch : region[1] * computedImageRowPitch * bytesPerPixel; switch (imageType) { case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE1D_BUFFER: case CL_MEM_OBJECT_IMAGE2D: DEBUG_BREAK_IF(slicePitch != 0 && slicePitch < computedImageRowPitch * region[1]); CPP_ATTRIBUTE_FALLTHROUGH; case CL_MEM_OBJECT_IMAGE2D_ARRAY: case CL_MEM_OBJECT_IMAGE3D: *imageOffset = origin[2] * computedImageSlicePitch + origin[1] * computedImageRowPitch + origin[0] * bytesPerPixel; break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: *imageOffset = origin[1] * computedImageSlicePitch + origin[0] * bytesPerPixel; break; default: DEBUG_BREAK_IF("Unsupported cl_image_type"); *imageOffset = 0; break; } } // Called by clGetImageParamsINTEL to obtain image row pitch and slice pitch // Assumption: all parameters are already validated be calling function cl_int Image::getImageParams(Context *context, cl_mem_flags memFlags, const ClSurfaceFormatInfo *surfaceFormat, const cl_image_desc *imageDesc, size_t *imageRowPitch, size_t *imageSlicePitch) { cl_int retVal = CL_SUCCESS; auto clientContext = context->getDevice(0)->getRootDeviceEnvironment().getGmmClientContext(); ImageInfo imgInfo = {}; cl_image_desc imageDescriptor = *imageDesc; imgInfo.imgDesc = Image::convertDescriptor(imageDescriptor); imgInfo.surfaceFormat = &surfaceFormat->surfaceFormat; auto gmm = std::make_unique(clientContext, imgInfo, StorageInfo{}); *imageRowPitch = imgInfo.rowPitch; *imageSlicePitch = imgInfo.slicePitch; return retVal; } const cl_image_desc &Image::getImageDesc() const { return imageDesc; } const cl_image_format &Image::getImageFormat() const { return imageFormat; } const ClSurfaceFormatInfo &Image::getSurfaceFormatInfo() const { return surfaceFormatInfo; } cl_mem_object_type Image::convertType(const ImageType type) { switch (type) { case ImageType::Image2D: return CL_MEM_OBJECT_IMAGE2D; case ImageType::Image3D: return CL_MEM_OBJECT_IMAGE3D; case ImageType::Image2DArray: return CL_MEM_OBJECT_IMAGE2D_ARRAY; case ImageType::Image1D: return CL_MEM_OBJECT_IMAGE1D; case ImageType::Image1DArray: return CL_MEM_OBJECT_IMAGE1D_ARRAY; case ImageType::Image1DBuffer: return CL_MEM_OBJECT_IMAGE1D_BUFFER; default: break; } return 0; } ImageType Image::convertType(const cl_mem_object_type type) { switch (type) { case CL_MEM_OBJECT_IMAGE2D: return ImageType::Image2D; case CL_MEM_OBJECT_IMAGE3D: return ImageType::Image3D; case CL_MEM_OBJECT_IMAGE2D_ARRAY: return ImageType::Image2DArray; case CL_MEM_OBJECT_IMAGE1D: return ImageType::Image1D; case CL_MEM_OBJECT_IMAGE1D_ARRAY: return ImageType::Image1DArray; case CL_MEM_OBJECT_IMAGE1D_BUFFER: return ImageType::Image1DBuffer; default: break; } return ImageType::Invalid; } ImageDescriptor Image::convertDescriptor(const cl_image_desc &imageDesc) { ImageDescriptor desc = {}; desc.fromParent = imageDesc.mem_object != nullptr; desc.imageArraySize = imageDesc.image_array_size; desc.imageDepth = imageDesc.image_depth; desc.imageHeight = imageDesc.image_height; desc.imageRowPitch = imageDesc.image_row_pitch; desc.imageSlicePitch = imageDesc.image_slice_pitch; desc.imageType = convertType(imageDesc.image_type); desc.imageWidth = imageDesc.image_width; desc.numMipLevels = imageDesc.num_mip_levels; desc.numSamples = imageDesc.num_samples; return desc; } cl_image_desc Image::convertDescriptor(const ImageDescriptor &imageDesc) { cl_image_desc desc = {}; desc.mem_object = nullptr; desc.image_array_size = imageDesc.imageArraySize; desc.image_depth = imageDesc.imageDepth; desc.image_height = imageDesc.imageHeight; desc.image_row_pitch = imageDesc.imageRowPitch; desc.image_slice_pitch = imageDesc.imageSlicePitch; desc.image_type = convertType(imageDesc.imageType); desc.image_width = imageDesc.imageWidth; desc.num_mip_levels = imageDesc.numMipLevels; desc.num_samples = imageDesc.numSamples; return desc; } cl_int Image::getImageInfo(cl_image_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { cl_int retVal; size_t srcParamSize = 0; void *srcParam = nullptr; auto imageDesc = getImageDesc(); auto surfFmtInfo = getSurfaceFormatInfo(); size_t retParam; size_t array_size = imageDesc.image_array_size * (imageDesc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || imageDesc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY); size_t SlicePitch = hostPtrSlicePitch * !(imageDesc.image_type == CL_MEM_OBJECT_IMAGE2D || imageDesc.image_type == CL_MEM_OBJECT_IMAGE1D || imageDesc.image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER); switch (paramName) { case CL_IMAGE_FORMAT: srcParamSize = sizeof(cl_image_format); srcParam = &(surfFmtInfo.OCLImageFormat); break; case CL_IMAGE_ELEMENT_SIZE: srcParamSize = sizeof(size_t); srcParam = &(surfFmtInfo.surfaceFormat.ImageElementSizeInBytes); break; case CL_IMAGE_ROW_PITCH: srcParamSize = sizeof(size_t); if (mcsSurfaceInfo.multisampleCount > 1) { retParam = imageDesc.image_width * surfFmtInfo.surfaceFormat.ImageElementSizeInBytes * imageDesc.num_samples; } else { retParam = hostPtrRowPitch; } srcParam = &retParam; break; case CL_IMAGE_SLICE_PITCH: srcParamSize = sizeof(size_t); srcParam = &SlicePitch; break; case CL_IMAGE_WIDTH: srcParamSize = sizeof(size_t); retParam = imageDesc.image_width; if (this->baseMipLevel) { retParam = imageDesc.image_width >> this->baseMipLevel; retParam = std::max(retParam, (size_t)1); } srcParam = &retParam; break; case CL_IMAGE_HEIGHT: srcParamSize = sizeof(size_t); retParam = imageDesc.image_height * !((imageDesc.image_type == CL_MEM_OBJECT_IMAGE1D) || (imageDesc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) || (imageDesc.image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)); if ((retParam != 0) && (this->baseMipLevel > 0)) { retParam = retParam >> this->baseMipLevel; retParam = std::max(retParam, (size_t)1); } srcParam = &retParam; break; case CL_IMAGE_DEPTH: srcParamSize = sizeof(size_t); retParam = imageDesc.image_depth * (imageDesc.image_type == CL_MEM_OBJECT_IMAGE3D); if ((retParam != 0) && (this->baseMipLevel > 0)) { retParam = retParam >> this->baseMipLevel; retParam = std::max(retParam, (size_t)1); } srcParam = &retParam; break; case CL_IMAGE_ARRAY_SIZE: srcParamSize = sizeof(size_t); srcParam = &(array_size); break; case CL_IMAGE_BUFFER: srcParamSize = sizeof(cl_mem); srcParam = &(imageDesc.buffer); break; case CL_IMAGE_NUM_MIP_LEVELS: srcParamSize = sizeof(cl_uint); srcParam = &(imageDesc.num_mip_levels); break; case CL_IMAGE_NUM_SAMPLES: srcParamSize = sizeof(cl_uint); srcParam = &(imageDesc.num_samples); break; default: getOsSpecificImageInfo(paramName, &srcParamSize, &srcParam); break; } retVal = changeGetInfoStatusToCLResultType(::getInfo(paramValue, paramValueSize, srcParam, srcParamSize)); if (paramValueSizeRet) { *paramValueSizeRet = srcParamSize; } return retVal; } Image *Image::redescribeFillImage() { const uint32_t redescribeTable[3][3] = { {17, 27, 5}, // {CL_R, CL_UNSIGNED_INT8}, {CL_RG, CL_UNSIGNED_INT8}, {CL_RGBA, CL_UNSIGNED_INT8} {18, 28, 6}, // {CL_R, CL_UNSIGNED_INT16}, {CL_RG, CL_UNSIGNED_INT16}, {CL_RGBA, CL_UNSIGNED_INT16} {19, 29, 7} // {CL_R, CL_UNSIGNED_INT32}, {CL_RG, CL_UNSIGNED_INT32}, {CL_RGBA, CL_UNSIGNED_INT32} }; auto imageFormatNew = this->imageFormat; auto imageDescNew = this->imageDesc; const ClSurfaceFormatInfo *surfaceFormat = nullptr; uint32_t redescribeTableCol = this->surfaceFormatInfo.surfaceFormat.NumChannels / 2; uint32_t redescribeTableRow = this->surfaceFormatInfo.surfaceFormat.PerChannelSizeInBytes / 2; ArrayRef readWriteSurfaceFormats = SurfaceFormats::readWrite(); uint32_t surfaceFormatIdx = redescribeTable[redescribeTableRow][redescribeTableCol]; surfaceFormat = &readWriteSurfaceFormats[surfaceFormatIdx]; imageFormatNew.image_channel_order = surfaceFormat->OCLImageFormat.image_channel_order; imageFormatNew.image_channel_data_type = surfaceFormat->OCLImageFormat.image_channel_data_type; DEBUG_BREAK_IF(nullptr == createFunction); MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags | CL_MEM_USE_HOST_PTR, flagsIntel, 0); auto image = createFunction(context, memoryProperties, flags | CL_MEM_USE_HOST_PTR, flagsIntel, this->getSize(), this->getCpuAddress(), imageFormatNew, imageDescNew, this->isMemObjZeroCopy(), this->getGraphicsAllocation(), true, this->baseMipLevel, this->mipCount, surfaceFormat, &this->surfaceOffsets); image->setQPitch(this->getQPitch()); image->setCubeFaceIndex(this->getCubeFaceIndex()); image->associatedMemObject = this->associatedMemObject; return image; } static const uint32_t redescribeTableBytes[] = { 17, // {CL_R, CL_UNSIGNED_INT8} 1 byte 18, // {CL_R, CL_UNSIGNED_INT16} 2 byte 19, // {CL_R, CL_UNSIGNED_INT32} 4 byte 29, // {CL_RG, CL_UNSIGNED_INT32} 8 byte 7 // {CL_RGBA, CL_UNSIGNED_INT32} 16 byte }; bool Image::isFormatRedescribable(cl_image_format format) { const ArrayRef readWriteSurfaceFormats = SurfaceFormats::readWrite(); for (auto indexInRedescribeTable = 0u; indexInRedescribeTable < sizeof(redescribeTableBytes) / sizeof(uint32_t); indexInRedescribeTable++) { const uint32_t formatIndex = redescribeTableBytes[indexInRedescribeTable]; const cl_image_format nonRedescribableFormat = readWriteSurfaceFormats[formatIndex].OCLImageFormat; if (nonRedescribableFormat.image_channel_data_type == format.image_channel_data_type && nonRedescribableFormat.image_channel_order == format.image_channel_order) { return false; } } return true; } Image *Image::redescribe() { const uint32_t bytesPerPixel = this->surfaceFormatInfo.surfaceFormat.NumChannels * surfaceFormatInfo.surfaceFormat.PerChannelSizeInBytes; const uint32_t exponent = Math::log2(bytesPerPixel); DEBUG_BREAK_IF(exponent >= 5u); const uint32_t surfaceFormatIdx = redescribeTableBytes[exponent % 5]; const ArrayRef readWriteSurfaceFormats = SurfaceFormats::readWrite(); const ClSurfaceFormatInfo *surfaceFormat = &readWriteSurfaceFormats[surfaceFormatIdx]; auto imageFormatNew = this->imageFormat; imageFormatNew.image_channel_order = surfaceFormat->OCLImageFormat.image_channel_order; imageFormatNew.image_channel_data_type = surfaceFormat->OCLImageFormat.image_channel_data_type; DEBUG_BREAK_IF(nullptr == createFunction); MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags | CL_MEM_USE_HOST_PTR, flagsIntel, 0); auto image = createFunction(context, memoryProperties, flags | CL_MEM_USE_HOST_PTR, flagsIntel, this->getSize(), this->getCpuAddress(), imageFormatNew, this->imageDesc, this->isMemObjZeroCopy(), this->getGraphicsAllocation(), true, this->baseMipLevel, this->mipCount, surfaceFormat, &this->surfaceOffsets); image->setQPitch(this->getQPitch()); image->setCubeFaceIndex(this->getCubeFaceIndex()); image->associatedMemObject = this->associatedMemObject; return image; } void Image::transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { transferData(hostPtr, hostPtrRowPitch, hostPtrSlicePitch, graphicsAllocation->getUnderlyingBuffer(), imageDesc.image_row_pitch, imageDesc.image_slice_pitch, copySize, copyOffset); } void Image::transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { transferData(memoryStorage, imageDesc.image_row_pitch, imageDesc.image_slice_pitch, hostPtr, hostPtrRowPitch, hostPtrSlicePitch, copySize, copyOffset); } cl_int Image::writeNV12Planes(const void *hostPtr, size_t hostPtrRowPitch) { CommandQueue *cmdQ = context->getSpecialQueue(); size_t origin[3] = {0, 0, 0}; size_t region[3] = {this->imageDesc.image_width, this->imageDesc.image_height, 1}; cl_int retVal = 0; cl_image_desc imageDesc = {0}; cl_image_format imageFormat = {0}; // Make NV12 planes readable and writable both on device and host cl_mem_flags flags = CL_MEM_READ_WRITE; // Plane Y imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; // image_width & image_height are ignored for plane extraction imageDesc.image_width = 0; imageDesc.image_height = 0; // set mem_object to the full NV12 image imageDesc.mem_object = this; // get access to the Y plane (CL_R) imageDesc.image_depth = 0; const ClSurfaceFormatInfo *surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); // Create NV12 UV Plane image std::unique_ptr imageYPlane(Image::create( context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); retVal = cmdQ->enqueueWriteImage(imageYPlane.get(), CL_TRUE, origin, region, hostPtrRowPitch, 0, hostPtr, nullptr, 0, nullptr, nullptr); // UV Plane is two times smaller than Plane Y region[0] = region[0] / 2; region[1] = region[1] / 2; imageDesc.image_width = 0; imageDesc.image_height = 0; imageDesc.image_depth = 1; // UV plane imageFormat.image_channel_order = CL_RG; hostPtr = static_cast(static_cast(hostPtr) + (hostPtrRowPitch * this->imageDesc.image_height)); surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); // Create NV12 UV Plane image std::unique_ptr imageUVPlane(Image::create( context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); retVal = cmdQ->enqueueWriteImage(imageUVPlane.get(), CL_TRUE, origin, region, hostPtrRowPitch, 0, hostPtr, nullptr, 0, nullptr, nullptr); return retVal; } const ClSurfaceFormatInfo *Image::getSurfaceFormatFromTable(cl_mem_flags flags, const cl_image_format *imageFormat, unsigned int clVersionSupport) { if (!imageFormat) { DEBUG_BREAK_IF("Invalid format"); return nullptr; } ArrayRef formats = SurfaceFormats::surfaceFormats(flags, imageFormat, clVersionSupport); for (auto &format : formats) { if (format.OCLImageFormat.image_channel_data_type == imageFormat->image_channel_data_type && format.OCLImageFormat.image_channel_order == imageFormat->image_channel_order) { return &format; } } DEBUG_BREAK_IF("Invalid format"); return nullptr; } bool Image::isImage1d(const cl_image_desc &imageDesc) { auto imageType = imageDesc.image_type; auto buffer = castToObject(imageDesc.buffer); return (imageType == CL_MEM_OBJECT_IMAGE1D || imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY || imageType == CL_MEM_OBJECT_IMAGE1D_BUFFER || buffer); } bool Image::isImage2d(cl_mem_object_type imageType) { return imageType == CL_MEM_OBJECT_IMAGE2D; } bool Image::isImage2dOr2dArray(cl_mem_object_type imageType) { return imageType == CL_MEM_OBJECT_IMAGE2D || imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY; } bool Image::isDepthFormat(const cl_image_format &imageFormat) { return imageFormat.image_channel_order == CL_DEPTH || imageFormat.image_channel_order == CL_DEPTH_STENCIL; } Image *Image::validateAndCreateImage(Context *context, const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, const void *hostPtr, cl_int &errcodeRet) { if (!MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, flagsIntel, imageDesc->mem_object, *context)) { errcodeRet = CL_INVALID_VALUE; return nullptr; } bool isHostPtrUsed = (hostPtr != nullptr); bool areHostPtrFlagsUsed = memoryProperties.flags.copyHostPtr || memoryProperties.flags.useHostPtr; if (isHostPtrUsed != areHostPtrFlagsUsed) { errcodeRet = CL_INVALID_HOST_PTR; return nullptr; } errcodeRet = Image::validateImageFormat(imageFormat); if (errcodeRet != CL_SUCCESS) { return nullptr; } const auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); errcodeRet = Image::validate(context, memoryProperties, surfaceFormat, imageDesc, hostPtr); if (errcodeRet != CL_SUCCESS) { return nullptr; } return Image::create(context, memoryProperties, flags, flagsIntel, surfaceFormat, imageDesc, hostPtr, errcodeRet); } bool Image::isValidSingleChannelFormat(const cl_image_format *imageFormat) { auto channelOrder = imageFormat->image_channel_order; auto dataType = imageFormat->image_channel_data_type; bool isValidOrder = (channelOrder == CL_A) || (channelOrder == CL_R) || (channelOrder == CL_Rx); bool isValidDataType = (dataType == CL_UNORM_INT8) || (dataType == CL_UNORM_INT16) || (dataType == CL_SNORM_INT8) || (dataType == CL_SNORM_INT16) || (dataType == CL_HALF_FLOAT) || (dataType == CL_FLOAT) || (dataType == CL_SIGNED_INT8) || (dataType == CL_SIGNED_INT16) || (dataType == CL_SIGNED_INT32) || (dataType == CL_UNSIGNED_INT8) || (dataType == CL_UNSIGNED_INT16) || (dataType == CL_UNSIGNED_INT32); return isValidOrder && isValidDataType; } bool Image::isValidIntensityFormat(const cl_image_format *imageFormat) { if (imageFormat->image_channel_order != CL_INTENSITY) { return false; } auto dataType = imageFormat->image_channel_data_type; return (dataType == CL_UNORM_INT8) || (dataType == CL_UNORM_INT16) || (dataType == CL_SNORM_INT8) || (dataType == CL_SNORM_INT16) || (dataType == CL_HALF_FLOAT) || (dataType == CL_FLOAT); } bool Image::isValidLuminanceFormat(const cl_image_format *imageFormat) { if (imageFormat->image_channel_order != CL_LUMINANCE) { return false; } auto dataType = imageFormat->image_channel_data_type; return (dataType == CL_UNORM_INT8) || (dataType == CL_UNORM_INT16) || (dataType == CL_SNORM_INT8) || (dataType == CL_SNORM_INT16) || (dataType == CL_HALF_FLOAT) || (dataType == CL_FLOAT); } bool Image::isValidDepthFormat(const cl_image_format *imageFormat) { if (imageFormat->image_channel_order != CL_DEPTH) { return false; } auto dataType = imageFormat->image_channel_data_type; return (dataType == CL_UNORM_INT16) || (dataType == CL_FLOAT); } bool Image::isValidDoubleChannelFormat(const cl_image_format *imageFormat) { auto channelOrder = imageFormat->image_channel_order; auto dataType = imageFormat->image_channel_data_type; bool isValidOrder = (channelOrder == CL_RG) || (channelOrder == CL_RGx) || (channelOrder == CL_RA); bool isValidDataType = (dataType == CL_UNORM_INT8) || (dataType == CL_UNORM_INT16) || (dataType == CL_SNORM_INT8) || (dataType == CL_SNORM_INT16) || (dataType == CL_HALF_FLOAT) || (dataType == CL_FLOAT) || (dataType == CL_SIGNED_INT8) || (dataType == CL_SIGNED_INT16) || (dataType == CL_SIGNED_INT32) || (dataType == CL_UNSIGNED_INT8) || (dataType == CL_UNSIGNED_INT16) || (dataType == CL_UNSIGNED_INT32); return isValidOrder && isValidDataType; } bool Image::isValidTripleChannelFormat(const cl_image_format *imageFormat) { auto channelOrder = imageFormat->image_channel_order; auto dataType = imageFormat->image_channel_data_type; bool isValidOrder = (channelOrder == CL_RGB) || (channelOrder == CL_RGBx); bool isValidDataType = (dataType == CL_UNORM_SHORT_565) || (dataType == CL_UNORM_SHORT_555) || (dataType == CL_UNORM_INT_101010); return isValidOrder && isValidDataType; } bool Image::isValidRGBAFormat(const cl_image_format *imageFormat) { if (imageFormat->image_channel_order != CL_RGBA) { return false; } auto dataType = imageFormat->image_channel_data_type; return (dataType == CL_UNORM_INT8) || (dataType == CL_UNORM_INT16) || (dataType == CL_SNORM_INT8) || (dataType == CL_SNORM_INT16) || (dataType == CL_HALF_FLOAT) || (dataType == CL_FLOAT) || (dataType == CL_SIGNED_INT8) || (dataType == CL_SIGNED_INT16) || (dataType == CL_SIGNED_INT32) || (dataType == CL_UNSIGNED_INT8) || (dataType == CL_UNSIGNED_INT16) || (dataType == CL_UNSIGNED_INT32); } bool Image::isValidSRGBFormat(const cl_image_format *imageFormat) { auto channelOrder = imageFormat->image_channel_order; auto dataType = imageFormat->image_channel_data_type; bool isValidOrder = (channelOrder == CL_sRGB) || (channelOrder == CL_sRGBx) || (channelOrder == CL_sRGBA) || (channelOrder == CL_sBGRA); bool isValidDataType = (dataType == CL_UNORM_INT8); return isValidOrder && isValidDataType; } bool Image::isValidARGBFormat(const cl_image_format *imageFormat) { auto channelOrder = imageFormat->image_channel_order; auto dataType = imageFormat->image_channel_data_type; bool isValidOrder = (channelOrder == CL_ARGB) || (channelOrder == CL_BGRA) || (channelOrder == CL_ABGR); bool isValidDataType = (dataType == CL_UNORM_INT8) || (dataType == CL_SNORM_INT8) || (dataType == CL_SIGNED_INT8) || (dataType == CL_UNSIGNED_INT8); return isValidOrder && isValidDataType; } bool Image::isValidDepthStencilFormat(const cl_image_format *imageFormat) { if (imageFormat->image_channel_order != CL_DEPTH_STENCIL) { return false; } auto dataType = imageFormat->image_channel_data_type; return (dataType == CL_UNORM_INT24) || (dataType == CL_FLOAT); } bool Image::isValidYUVFormat(const cl_image_format *imageFormat) { auto dataType = imageFormat->image_channel_data_type; bool isValidOrder = IsNV12Image(imageFormat) || IsPackedYuvImage(imageFormat); bool isValidDataType = (dataType == CL_UNORM_INT8); return isValidOrder && isValidDataType; } bool Image::hasAlphaChannel(const cl_image_format *imageFormat) { auto channelOrder = imageFormat->image_channel_order; return (channelOrder == CL_A) || (channelOrder == CL_Rx) || (channelOrder == CL_RA) || (channelOrder == CL_RGx) || (channelOrder == CL_RGBx) || (channelOrder == CL_RGBA) || (channelOrder == CL_BGRA) || (channelOrder == CL_ARGB) || (channelOrder == CL_INTENSITY) || (channelOrder == CL_sRGBA) || (channelOrder == CL_sBGRA) || (channelOrder == CL_sRGBx) || (channelOrder == CL_ABGR); } size_t Image::calculateOffsetForMapping(const MemObjOffsetArray &origin) const { size_t rowPitch = mappingOnCpuAllowed() ? imageDesc.image_row_pitch : getHostPtrRowPitch(); size_t slicePitch = mappingOnCpuAllowed() ? imageDesc.image_slice_pitch : getHostPtrSlicePitch(); size_t offset = getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes * origin[0]; switch (imageDesc.image_type) { case CL_MEM_OBJECT_IMAGE1D_ARRAY: offset += slicePitch * origin[1]; break; case CL_MEM_OBJECT_IMAGE2D: offset += rowPitch * origin[1]; break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: case CL_MEM_OBJECT_IMAGE3D: offset += rowPitch * origin[1] + slicePitch * origin[2]; break; default: break; } return offset; } cl_int Image::validateRegionAndOrigin(const size_t *origin, const size_t *region, const cl_image_desc &imgDesc) { if (region[0] == 0 || region[1] == 0 || region[2] == 0) { return CL_INVALID_VALUE; } bool notMipMapped = (false == isMipMapped(imgDesc)); if ((imgDesc.image_type == CL_MEM_OBJECT_IMAGE1D || imgDesc.image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) && (((origin[1] > 0) && notMipMapped) || origin[2] > 0 || region[1] > 1 || region[2] > 1)) { return CL_INVALID_VALUE; } if ((imgDesc.image_type == CL_MEM_OBJECT_IMAGE2D || imgDesc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (((origin[2] > 0) && notMipMapped) || region[2] > 1)) { return CL_INVALID_VALUE; } if (notMipMapped) { return CL_SUCCESS; } uint32_t mipLevel = findMipLevel(imgDesc.image_type, origin); if (mipLevel < imgDesc.num_mip_levels) { return CL_SUCCESS; } else { return CL_INVALID_MIP_LEVEL; } } bool Image::hasSameDescriptor(const cl_image_desc &imageDesc) const { return this->imageDesc.image_type == imageDesc.image_type && this->imageDesc.image_width == imageDesc.image_width && this->imageDesc.image_height == imageDesc.image_height && this->imageDesc.image_depth == imageDesc.image_depth && this->imageDesc.image_array_size == imageDesc.image_array_size && this->hostPtrRowPitch == imageDesc.image_row_pitch && this->hostPtrSlicePitch == imageDesc.image_slice_pitch && this->imageDesc.num_mip_levels == imageDesc.num_mip_levels && this->imageDesc.num_samples == imageDesc.num_samples; } bool Image::hasValidParentImageFormat(const cl_image_format &imageFormat) const { if (this->imageFormat.image_channel_data_type != imageFormat.image_channel_data_type) { return false; } switch (this->imageFormat.image_channel_order) { case CL_BGRA: return imageFormat.image_channel_order == CL_sBGRA; case CL_sBGRA: return imageFormat.image_channel_order == CL_BGRA; case CL_RGBA: return imageFormat.image_channel_order == CL_sRGBA; case CL_sRGBA: return imageFormat.image_channel_order == CL_RGBA; case CL_RGB: return imageFormat.image_channel_order == CL_sRGB; case CL_sRGB: return imageFormat.image_channel_order == CL_RGB; case CL_RGBx: return imageFormat.image_channel_order == CL_sRGBx; case CL_sRGBx: return imageFormat.image_channel_order == CL_RGBx; case CL_R: return imageFormat.image_channel_order == CL_DEPTH; default: return false; } } } // namespace NEO compute-runtime-20.13.16352/opencl/source/mem_obj/image.h000066400000000000000000000425211363734646600230040ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/string.h" #include "shared/source/image/image_surface_state.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/helpers/validators.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/mem_obj.h" namespace NEO { class Image; struct KernelInfo; struct SurfaceFormatInfo; typedef Image *(*ImageCreatFunc)(Context *context, const MemoryPropertiesFlags &memoryProperties, uint64_t flags, uint64_t flagsIntel, size_t size, void *hostPtr, const cl_image_format &imageFormat, const cl_image_desc &imageDesc, bool zeroCopy, GraphicsAllocation *graphicsAllocation, bool isImageRedescribed, uint32_t baseMipLevel, uint32_t mipCount, const ClSurfaceFormatInfo *surfaceFormatInfo, const SurfaceOffsets *surfaceOffsets); typedef struct { ImageCreatFunc createImageFunction; } ImageFuncs; class Image : public MemObj { public: const static cl_ulong maskMagic = 0xFFFFFFFFFFFFFFFFLL; static const cl_ulong objectMagic = MemObj::objectMagic | 0x01; ~Image() override; static Image *create(Context *context, const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, const ClSurfaceFormatInfo *surfaceFormat, const cl_image_desc *imageDesc, const void *hostPtr, cl_int &errcodeRet); static Image *validateAndCreateImage(Context *context, const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, const void *hostPtr, cl_int &errcodeRet); static Image *createImageHw(Context *context, const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *hostPtr, const cl_image_format &imageFormat, const cl_image_desc &imageDesc, bool zeroCopy, GraphicsAllocation *graphicsAllocation, bool isObjectRedescribed, uint32_t baseMipLevel, uint32_t mipCount, const ClSurfaceFormatInfo *surfaceFormatInfo = nullptr); static Image *createSharedImage(Context *context, SharingHandler *sharingHandler, const McsSurfaceInfo &mcsSurfaceInfo, GraphicsAllocation *graphicsAllocation, GraphicsAllocation *mcsAllocation, cl_mem_flags flags, const ClSurfaceFormatInfo *surfaceFormat, ImageInfo &imgInfo, uint32_t cubeFaceIndex, uint32_t baseMipLevel, uint32_t mipCount); static cl_int validate(Context *context, const MemoryPropertiesFlags &memoryProperties, const ClSurfaceFormatInfo *surfaceFormat, const cl_image_desc *imageDesc, const void *hostPtr); static cl_int validateImageFormat(const cl_image_format *imageFormat); static int32_t validatePlanarYUV(Context *context, const MemoryPropertiesFlags &memoryProperties, const cl_image_desc *imageDesc, const void *hostPtr); static int32_t validatePackedYUV(const MemoryPropertiesFlags &memoryProperties, const cl_image_desc *imageDesc); static cl_int validateImageTraits(Context *context, const MemoryPropertiesFlags &memoryProperties, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, const void *hostPtr); static size_t calculateHostPtrSize(const size_t *region, size_t rowPitch, size_t slicePitch, size_t pixelSize, uint32_t imageType); static void calculateHostPtrOffset(size_t *imageOffset, const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch, uint32_t imageType, size_t bytesPerPixel); static cl_int getImageParams(Context *context, cl_mem_flags flags, const ClSurfaceFormatInfo *surfaceFormat, const cl_image_desc *imageDesc, size_t *imageRowPitch, size_t *imageSlicePitch); static bool isImage1d(const cl_image_desc &imageDesc); static bool isImage2d(cl_mem_object_type imageType); static bool isImage2dOr2dArray(cl_mem_object_type imageType); static bool isDepthFormat(const cl_image_format &imageFormat); static bool hasSlices(cl_mem_object_type type) { return (type == CL_MEM_OBJECT_IMAGE3D) || (type == CL_MEM_OBJECT_IMAGE1D_ARRAY) || (type == CL_MEM_OBJECT_IMAGE2D_ARRAY); } static ImageType convertType(const cl_mem_object_type type); static cl_mem_object_type convertType(const ImageType type); static ImageDescriptor convertDescriptor(const cl_image_desc &imageDesc); static cl_image_desc convertDescriptor(const ImageDescriptor &imageDesc); cl_int getImageInfo(cl_image_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); virtual void setImageArg(void *memory, bool isMediaBlockImage, uint32_t mipLevel) = 0; virtual void setMediaImageArg(void *memory) = 0; virtual void setMediaSurfaceRotation(void *memory) = 0; virtual void setSurfaceMemoryObjectControlStateIndexToMocsTable(void *memory, uint32_t value) = 0; const cl_image_desc &getImageDesc() const; const cl_image_format &getImageFormat() const; const ClSurfaceFormatInfo &getSurfaceFormatInfo() const; void transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override; void transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override; static bool isFormatRedescribable(cl_image_format format); Image *redescribe(); Image *redescribeFillImage(); ImageCreatFunc createFunction; uint32_t getQPitch() { return qPitch; } void setQPitch(uint32_t qPitch) { this->qPitch = qPitch; } size_t getHostPtrRowPitch() const { return hostPtrRowPitch; } void setHostPtrRowPitch(size_t pitch) { this->hostPtrRowPitch = pitch; } size_t getHostPtrSlicePitch() const { return hostPtrSlicePitch; } void setHostPtrSlicePitch(size_t pitch) { this->hostPtrSlicePitch = pitch; } size_t getImageCount() const { return imageCount; } void setImageCount(size_t imageCount) { this->imageCount = imageCount; } void setImageRowPitch(size_t rowPitch) { imageDesc.image_row_pitch = rowPitch; } void setImageSlicePitch(size_t slicePitch) { imageDesc.image_slice_pitch = slicePitch; } void setSurfaceOffsets(uint64_t offset, uint32_t xOffset, uint32_t yOffset, uint32_t yOffsetForUVPlane) { surfaceOffsets.offset = offset; surfaceOffsets.xOffset = xOffset; surfaceOffsets.yOffset = yOffset; surfaceOffsets.yOffsetForUVplane = yOffsetForUVPlane; } void getSurfaceOffsets(SurfaceOffsets &surfaceOffsetsOut) { surfaceOffsetsOut = this->surfaceOffsets; } void setCubeFaceIndex(uint32_t index) { cubeFaceIndex = index; } uint32_t getCubeFaceIndex() { return cubeFaceIndex; } void setMediaPlaneType(cl_uint type) { mediaPlaneType = type; } cl_uint getMediaPlaneType() const { return mediaPlaneType; } int peekBaseMipLevel() { return baseMipLevel; } void setBaseMipLevel(int level) { this->baseMipLevel = level; } uint32_t peekMipCount() { return mipCount; } void setMipCount(uint32_t mipCountNew) { this->mipCount = mipCountNew; } static const ClSurfaceFormatInfo *getSurfaceFormatFromTable(cl_mem_flags flags, const cl_image_format *imageFormat, unsigned int clVersionSupport); static cl_int validateRegionAndOrigin(const size_t *origin, const size_t *region, const cl_image_desc &imgDesc); cl_int writeNV12Planes(const void *hostPtr, size_t hostPtrRowPitch); void setMcsSurfaceInfo(const McsSurfaceInfo &info) { mcsSurfaceInfo = info; } const McsSurfaceInfo &getMcsSurfaceInfo() { return mcsSurfaceInfo; } size_t calculateOffsetForMapping(const MemObjOffsetArray &origin) const override; virtual void transformImage2dArrayTo3d(void *memory) = 0; virtual void transformImage3dTo2dArray(void *memory) = 0; bool hasSameDescriptor(const cl_image_desc &imageDesc) const; bool hasValidParentImageFormat(const cl_image_format &imageFormat) const; bool isImageFromBuffer() const { return castToObject(static_cast(associatedMemObject)) ? true : false; } bool isImageFromImage() const { return castToObject(static_cast(associatedMemObject)) ? true : false; } protected: Image(Context *context, const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *hostPtr, cl_image_format imageFormat, const cl_image_desc &imageDesc, bool zeroCopy, GraphicsAllocation *graphicsAllocation, bool isObjectRedescribed, uint32_t baseMipLevel, uint32_t mipCount, const ClSurfaceFormatInfo &surfaceFormatInfo, const SurfaceOffsets *surfaceOffsets = nullptr); void getOsSpecificImageInfo(const cl_mem_info ¶mName, size_t *srcParamSize, void **srcParam); void transferData(void *dst, size_t dstRowPitch, size_t dstSlicePitch, void *src, size_t srcRowPitch, size_t srcSlicePitch, std::array copyRegion, std::array copyOrigin); cl_image_format imageFormat; cl_image_desc imageDesc; ClSurfaceFormatInfo surfaceFormatInfo; McsSurfaceInfo mcsSurfaceInfo = {}; uint32_t qPitch = 0; size_t hostPtrRowPitch = 0; size_t hostPtrSlicePitch = 0; size_t imageCount = 0; uint32_t cubeFaceIndex; cl_uint mediaPlaneType; SurfaceOffsets surfaceOffsets = {0}; uint32_t baseMipLevel = 0; uint32_t mipCount = 1; static bool isValidSingleChannelFormat(const cl_image_format *imageFormat); static bool isValidIntensityFormat(const cl_image_format *imageFormat); static bool isValidLuminanceFormat(const cl_image_format *imageFormat); static bool isValidDepthFormat(const cl_image_format *imageFormat); static bool isValidDoubleChannelFormat(const cl_image_format *imageFormat); static bool isValidTripleChannelFormat(const cl_image_format *imageFormat); static bool isValidRGBAFormat(const cl_image_format *imageFormat); static bool isValidSRGBFormat(const cl_image_format *imageFormat); static bool isValidARGBFormat(const cl_image_format *imageFormat); static bool isValidDepthStencilFormat(const cl_image_format *imageFormat); static bool isValidYUVFormat(const cl_image_format *imageFormat); static bool hasAlphaChannel(const cl_image_format *imageFormat); }; template class ImageHw : public Image { using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; public: ImageHw(Context *context, const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *hostPtr, const cl_image_format &imageFormat, const cl_image_desc &imageDesc, bool zeroCopy, GraphicsAllocation *graphicsAllocation, bool isObjectRedescribed, uint32_t baseMipLevel, uint32_t mipCount, const ClSurfaceFormatInfo &surfaceFormatInfo, const SurfaceOffsets *surfaceOffsets = nullptr) : Image(context, memoryProperties, flags, flagsIntel, size, hostPtr, imageFormat, imageDesc, zeroCopy, graphicsAllocation, isObjectRedescribed, baseMipLevel, mipCount, surfaceFormatInfo, surfaceOffsets) { if (getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D || getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER || getImageDesc().image_type == CL_MEM_OBJECT_IMAGE2D || getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || getImageDesc().image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { this->imageDesc.image_depth = 0; } switch (imageDesc.image_type) { case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE1D_BUFFER: case CL_MEM_OBJECT_IMAGE1D_ARRAY: surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_1D; break; default: case CL_MEM_OBJECT_IMAGE2D_ARRAY: case CL_MEM_OBJECT_IMAGE2D: surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_2D; break; case CL_MEM_OBJECT_IMAGE3D: surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_3D; break; } } void setImageArg(void *memory, bool setAsMediaBlockImage, uint32_t mipLevel) override; void setAuxParamsForMultisamples(RENDER_SURFACE_STATE *surfaceState); MOCKABLE_VIRTUAL void setAuxParamsForMCSCCS(RENDER_SURFACE_STATE *surfaceState, Gmm *gmm); void setMediaImageArg(void *memory) override; void setMediaSurfaceRotation(void *memory) override; void setSurfaceMemoryObjectControlStateIndexToMocsTable(void *memory, uint32_t value) override; void appendSurfaceStateParams(RENDER_SURFACE_STATE *surfaceState); void appendSurfaceStateDepthParams(RENDER_SURFACE_STATE *surfaceState); void appendSurfaceStateExt(void *memory); void transformImage2dArrayTo3d(void *memory) override; void transformImage3dTo2dArray(void *memory) override; static Image *create(Context *context, const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *hostPtr, const cl_image_format &imageFormat, const cl_image_desc &imageDesc, bool zeroCopy, GraphicsAllocation *graphicsAllocation, bool isObjectRedescribed, uint32_t baseMipLevel, uint32_t mipCount, const ClSurfaceFormatInfo *surfaceFormatInfo, const SurfaceOffsets *surfaceOffsets) { UNRECOVERABLE_IF(surfaceFormatInfo == nullptr); return new ImageHw(context, memoryProperties, flags, flagsIntel, size, hostPtr, imageFormat, imageDesc, zeroCopy, graphicsAllocation, isObjectRedescribed, baseMipLevel, mipCount, *surfaceFormatInfo, surfaceOffsets); } static int getShaderChannelValue(int inputShaderChannel, cl_channel_order imageChannelOrder) { if (imageChannelOrder == CL_A) { if (inputShaderChannel == RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED || inputShaderChannel == RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN || inputShaderChannel == RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE) { return RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO; } } else if (imageChannelOrder == CL_R || imageChannelOrder == CL_RA || imageChannelOrder == CL_Rx) { if (inputShaderChannel == RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN || inputShaderChannel == RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE) { return RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO; } } else if (imageChannelOrder == CL_RG || imageChannelOrder == CL_RGx) { if (inputShaderChannel == RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE) { return RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO; } } return inputShaderChannel; } typename RENDER_SURFACE_STATE::SURFACE_TYPE surfaceType; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/mem_obj/image.inl000066400000000000000000000227041363734646600233400ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/hw_cmds.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/mem_obj/image.h" #include "image_ext.inl" namespace NEO { union SURFACE_STATE_BUFFER_LENGTH { uint32_t Length; struct SurfaceState { uint32_t Width : BITFIELD_RANGE(0, 6); uint32_t Height : BITFIELD_RANGE(7, 20); uint32_t Depth : BITFIELD_RANGE(21, 31); } SurfaceState; }; template void ImageHw::setImageArg(void *memory, bool setAsMediaBlockImage, uint32_t mipLevel) { using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; auto surfaceState = reinterpret_cast(memory); auto gmm = getGraphicsAllocation()->getDefaultGmm(); auto gmmHelper = rootDeviceEnvironment->getGmmHelper(); auto imageDescriptor = Image::convertDescriptor(getImageDesc()); ImageInfo imgInfo; imgInfo.imgDesc = imageDescriptor; imgInfo.qPitch = qPitch; imgInfo.surfaceFormat = &getSurfaceFormatInfo().surfaceFormat; setImageSurfaceState(surfaceState, imgInfo, getGraphicsAllocation()->getDefaultGmm(), *gmmHelper, cubeFaceIndex, getGraphicsAllocation()->getGpuAddress(), surfaceOffsets, IsNV12Image(&this->getImageFormat())); if (getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { // image1d_buffer is image1d created from buffer. The length of buffer could be larger // than the maximal image width. Mock image1d_buffer with SURFACE_TYPE_SURFTYPE_BUFFER. SURFACE_STATE_BUFFER_LENGTH Length = {0}; Length.Length = static_cast(getImageDesc().image_width - 1); surfaceState->setWidth(static_cast(Length.SurfaceState.Width + 1)); surfaceState->setHeight(static_cast(Length.SurfaceState.Height + 1)); surfaceState->setDepth(static_cast(Length.SurfaceState.Depth + 1)); surfaceState->setSurfacePitch(static_cast(getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes)); surfaceState->setSurfaceType(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER); } else { setImageSurfaceStateDimensions(surfaceState, imgInfo, cubeFaceIndex, surfaceType); if (setAsMediaBlockImage) { uint32_t elSize = static_cast(getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes); surfaceState->setWidth(static_cast((getImageDesc().image_width * elSize) / sizeof(uint32_t))); } } surfaceState->setSurfaceMinLod(this->baseMipLevel + mipLevel); surfaceState->setMipCountLod((this->mipCount > 0) ? (this->mipCount - 1) : 0); setMipTailStartLod(surfaceState, gmm); cl_channel_order imgChannelOrder = getSurfaceFormatInfo().OCLImageFormat.image_channel_order; int shaderChannelValue = ImageHw::getShaderChannelValue(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, imgChannelOrder); surfaceState->setShaderChannelSelectRed(static_cast(shaderChannelValue)); if (imgChannelOrder == CL_LUMINANCE) { surfaceState->setShaderChannelSelectGreen(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED); surfaceState->setShaderChannelSelectBlue(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED); } else { shaderChannelValue = ImageHw::getShaderChannelValue(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN, imgChannelOrder); surfaceState->setShaderChannelSelectGreen(static_cast(shaderChannelValue)); shaderChannelValue = ImageHw::getShaderChannelValue(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE, imgChannelOrder); surfaceState->setShaderChannelSelectBlue(static_cast(shaderChannelValue)); } surfaceState->setNumberOfMultisamples((typename RENDER_SURFACE_STATE::NUMBER_OF_MULTISAMPLES)mcsSurfaceInfo.multisampleCount); if (imageDesc.num_samples > 1) { setAuxParamsForMultisamples(surfaceState); } else if (gmm && gmm->isRenderCompressed) { setAuxParamsForCCS(surfaceState, gmm); } appendSurfaceStateDepthParams(surfaceState); appendSurfaceStateParams(surfaceState); appendSurfaceStateExt(surfaceState); } template void ImageHw::setAuxParamsForMultisamples(RENDER_SURFACE_STATE *surfaceState) { using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; if (getMcsAllocation()) { auto mcsGmm = getMcsAllocation()->getDefaultGmm(); if (mcsGmm->unifiedAuxTranslationCapable() && mcsGmm->hasMultisampleControlSurface()) { setAuxParamsForMCSCCS(surfaceState, mcsGmm); surfaceState->setAuxiliarySurfacePitch(mcsGmm->getUnifiedAuxPitchTiles()); surfaceState->setAuxiliarySurfaceQpitch(mcsGmm->getAuxQPitch()); setClearColorParams(surfaceState, mcsGmm); setUnifiedAuxBaseAddress(surfaceState, mcsGmm); } else if (mcsGmm->unifiedAuxTranslationCapable()) { setAuxParamsForCCS(surfaceState, mcsGmm); } else { surfaceState->setAuxiliarySurfaceMode((typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE)1); surfaceState->setAuxiliarySurfacePitch(mcsSurfaceInfo.pitch); surfaceState->setAuxiliarySurfaceQpitch(mcsSurfaceInfo.qPitch); surfaceState->setAuxiliarySurfaceBaseAddress(mcsAllocation->getGpuAddress()); } } else if (isDepthFormat(imageFormat) && surfaceState->getSurfaceFormat() != SURFACE_FORMAT::SURFACE_FORMAT_R32_FLOAT_X8X24_TYPELESS) { surfaceState->setMultisampledSurfaceStorageFormat(RENDER_SURFACE_STATE::MULTISAMPLED_SURFACE_STORAGE_FORMAT::MULTISAMPLED_SURFACE_STORAGE_FORMAT_DEPTH_STENCIL); } } template void ImageHw::appendSurfaceStateParams(RENDER_SURFACE_STATE *surfaceState) { } template inline void ImageHw::appendSurfaceStateDepthParams(RENDER_SURFACE_STATE *surfaceState) { } template void ImageHw::setMediaImageArg(void *memory) { using MEDIA_SURFACE_STATE = typename GfxFamily::MEDIA_SURFACE_STATE; using SURFACE_FORMAT = typename MEDIA_SURFACE_STATE::SURFACE_FORMAT; SURFACE_FORMAT surfaceFormat = MEDIA_SURFACE_STATE::SURFACE_FORMAT_Y8_UNORM_VA; auto gmmHelper = rootDeviceEnvironment->getGmmHelper(); auto surfaceState = reinterpret_cast(memory); *surfaceState = GfxFamily::cmdInitMediaSurfaceState; setMediaSurfaceRotation(reinterpret_cast(surfaceState)); DEBUG_BREAK_IF(surfaceFormat == MEDIA_SURFACE_STATE::SURFACE_FORMAT_Y1_UNORM); surfaceState->setWidth(static_cast(getImageDesc().image_width)); surfaceState->setHeight(static_cast(getImageDesc().image_height)); surfaceState->setPictureStructure(MEDIA_SURFACE_STATE::PICTURE_STRUCTURE_FRAME_PICTURE); auto gmm = getGraphicsAllocation()->getDefaultGmm(); auto tileMode = static_cast(gmm->gmmResourceInfo->getTileModeSurfaceState()); surfaceState->setTileMode(tileMode); surfaceState->setSurfacePitch(static_cast(getImageDesc().image_row_pitch)); surfaceState->setSurfaceFormat(surfaceFormat); surfaceState->setHalfPitchForChroma(false); surfaceState->setInterleaveChroma(false); surfaceState->setXOffsetForUCb(0); surfaceState->setYOffsetForUCb(0); surfaceState->setXOffsetForVCr(0); surfaceState->setYOffsetForVCr(0); setSurfaceMemoryObjectControlStateIndexToMocsTable( reinterpret_cast(surfaceState), gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_IMAGE)); if (IsNV12Image(&this->getImageFormat())) { surfaceState->setInterleaveChroma(true); surfaceState->setYOffsetForUCb(this->surfaceOffsets.yOffsetForUVplane); } surfaceState->setVerticalLineStride(0); surfaceState->setVerticalLineStrideOffset(0); surfaceState->setSurfaceBaseAddress(getGraphicsAllocation()->getGpuAddress() + this->surfaceOffsets.offset); } template void ImageHw::transformImage2dArrayTo3d(void *memory) { DEBUG_BREAK_IF(imageDesc.image_type != CL_MEM_OBJECT_IMAGE3D); using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; auto surfaceState = reinterpret_cast(memory); surfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D); surfaceState->setSurfaceArray(false); } template void ImageHw::transformImage3dTo2dArray(void *memory) { DEBUG_BREAK_IF(imageDesc.image_type != CL_MEM_OBJECT_IMAGE3D); using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; auto surfaceState = reinterpret_cast(memory); surfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D); surfaceState->setSurfaceArray(true); } template void ImageHw::setAuxParamsForMCSCCS(RENDER_SURFACE_STATE *surfaceState, Gmm *gmm) { } } // namespace NEO compute-runtime-20.13.16352/opencl/source/mem_obj/image_factory_init.inl000066400000000000000000000004641363734646600261110ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ template class ImageHw; template <> void populateFactoryTable>() { extern ImageFuncs imageFactory[IGFX_MAX_CORE]; imageFactory[gfxCore].createImageFunction = ImageHw::create; } compute-runtime-20.13.16352/opencl/source/mem_obj/image_tgllp_plus.inl000066400000000000000000000012041363734646600255750ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ template <> void ImageHw::setAuxParamsForMCSCCS(RENDER_SURFACE_STATE *surfaceState, Gmm *gmm) { surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_MCS_LCE); } template <> void ImageHw::appendSurfaceStateDepthParams(RENDER_SURFACE_STATE *surfaceState) { const auto gmm = this->graphicsAllocation->getDefaultGmm(); if (gmm) { const bool isDepthResource = gmm->gmmResourceInfo->getResourceFlags()->Gpu.Depth; surfaceState->setDepthStencilResource(isDepthResource); } } compute-runtime-20.13.16352/opencl/source/mem_obj/map_operations_handler.cpp000066400000000000000000000040321363734646600267650ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/mem_obj/map_operations_handler.h" #include "shared/source/helpers/ptr_math.h" using namespace NEO; size_t MapOperationsHandler::size() const { std::lock_guard lock(mtx); return mappedPointers.size(); } bool MapOperationsHandler::add(void *ptr, size_t ptrLength, cl_map_flags &mapFlags, MemObjSizeArray &size, MemObjOffsetArray &offset, uint32_t mipLevel) { std::lock_guard lock(mtx); MapInfo mapInfo(ptr, ptrLength, size, offset, mipLevel); mapInfo.readOnly = (mapFlags == CL_MAP_READ); if (isOverlapping(mapInfo)) { return false; } mappedPointers.push_back(mapInfo); return true; } bool MapOperationsHandler::isOverlapping(MapInfo &inputMapInfo) { if (inputMapInfo.readOnly) { return false; } auto inputStartPtr = inputMapInfo.ptr; auto inputEndPtr = ptrOffset(inputStartPtr, inputMapInfo.ptrLength); for (auto &mapInfo : mappedPointers) { auto mappedStartPtr = mapInfo.ptr; auto mappedEndPtr = ptrOffset(mappedStartPtr, mapInfo.ptrLength); // Requested ptr starts before or inside existing ptr range and overlapping end if (inputStartPtr < mappedEndPtr && inputEndPtr >= mappedStartPtr) { return true; } } return false; } bool MapOperationsHandler::find(void *mappedPtr, MapInfo &outMapInfo) { std::lock_guard lock(mtx); for (auto &mapInfo : mappedPointers) { if (mapInfo.ptr == mappedPtr) { outMapInfo = mapInfo; return true; } } return false; } void MapOperationsHandler::remove(void *mappedPtr) { std::lock_guard lock(mtx); auto endIter = mappedPointers.end(); for (auto it = mappedPointers.begin(); it != endIter; it++) { if (it->ptr == mappedPtr) { std::iter_swap(it, mappedPointers.end() - 1); mappedPointers.pop_back(); break; } } } compute-runtime-20.13.16352/opencl/source/mem_obj/map_operations_handler.h000066400000000000000000000012731363734646600264360ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/helpers/properties_helper.h" #include #include namespace NEO { class MapOperationsHandler { public: virtual ~MapOperationsHandler() = default; bool add(void *ptr, size_t ptrLength, cl_map_flags &mapFlags, MemObjSizeArray &size, MemObjOffsetArray &offset, uint32_t mipLevel); void remove(void *mappedPtr); bool find(void *mappedPtr, MapInfo &outMapInfo); size_t size() const; protected: bool isOverlapping(MapInfo &inputMapInfo); std::vector mappedPointers; mutable std::mutex mtx; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/mem_obj/mem_obj.cpp000066400000000000000000000301531363734646600236630ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/mem_obj/mem_obj.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/bit_helpers.h" #include "shared/source/helpers/get_info.h" #include "shared/source/memory_manager/deferred_deleter.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/os_context.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/get_info_status_mapper.h" #include namespace NEO { MemObj::MemObj(Context *context, cl_mem_object_type memObjectType, const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, GraphicsAllocation *gfxAllocation, bool zeroCopy, bool isHostPtrSVM, bool isObjectRedescribed) : context(context), memObjectType(memObjectType), memoryProperties(memoryProperties), flags(flags), flagsIntel(flagsIntel), size(size), memoryStorage(memoryStorage), hostPtr(hostPtr), isZeroCopy(zeroCopy), isHostPtrSVM(isHostPtrSVM), isObjectRedescribed(isObjectRedescribed), graphicsAllocation(gfxAllocation) { if (context) { context->incRefInternal(); memoryManager = context->getMemoryManager(); auto device = context->getDevice(0); executionEnvironment = device->getExecutionEnvironment(); rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[device->getRootDeviceIndex()].get(); } } MemObj::~MemObj() { bool needWait = false; if (allocatedMapPtr != nullptr) { needWait = true; } if (mapOperationsHandler.size() > 0 && !getCpuAddressForMapping()) { needWait = true; } if (!destructorCallbacks.empty()) { needWait = true; } if (memoryManager && !isObjectRedescribed) { if (peekSharingHandler()) { peekSharingHandler()->releaseReusedGraphicsAllocation(); } if (graphicsAllocation && !associatedMemObject && !isHostPtrSVM && graphicsAllocation->peekReuseCount() == 0) { memoryManager->removeAllocationFromHostPtrManager(graphicsAllocation); bool doAsyncDestructions = DebugManager.flags.EnableAsyncDestroyAllocations.get(); if (!doAsyncDestructions) { needWait = true; } if (needWait && graphicsAllocation->isUsed()) { memoryManager->waitForEnginesCompletion(*graphicsAllocation); } destroyGraphicsAllocation(graphicsAllocation, doAsyncDestructions); graphicsAllocation = nullptr; } if (!associatedMemObject) { releaseMapAllocation(); releaseAllocatedMapPtr(); } if (mcsAllocation) { destroyGraphicsAllocation(mcsAllocation, false); } if (associatedMemObject) { if (associatedMemObject->getGraphicsAllocation() != this->getGraphicsAllocation()) { destroyGraphicsAllocation(graphicsAllocation, false); } associatedMemObject->decRefInternal(); } } if (!destructorCallbacks.empty()) { for (auto iter = destructorCallbacks.rbegin(); iter != destructorCallbacks.rend(); iter++) { (*iter)->invoke(this); delete *iter; } } if (context) { context->decRefInternal(); } } void MemObj::DestructorCallback::invoke(cl_mem memObj) { this->funcNotify(memObj, userData); } cl_int MemObj::getMemObjectInfo(cl_mem_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { cl_int retVal; size_t srcParamSize = 0; void *srcParam = nullptr; cl_bool usesSVMPointer; cl_uint refCnt = 0; cl_uint mapCount = 0; cl_mem clAssociatedMemObject = static_cast(this->associatedMemObject); cl_context ctx = nullptr; uint64_t internalHandle = 0llu; switch (paramName) { case CL_MEM_TYPE: srcParamSize = sizeof(memObjectType); srcParam = &memObjectType; break; case CL_MEM_FLAGS: srcParamSize = sizeof(flags); srcParam = &flags; break; case CL_MEM_SIZE: srcParamSize = sizeof(size); srcParam = &size; break; case CL_MEM_HOST_PTR: srcParamSize = sizeof(hostPtr); srcParam = &hostPtr; break; case CL_MEM_CONTEXT: srcParamSize = sizeof(context); ctx = context; srcParam = &ctx; break; case CL_MEM_USES_SVM_POINTER: usesSVMPointer = isHostPtrSVM && isValueSet(flags, CL_MEM_USE_HOST_PTR); srcParamSize = sizeof(cl_bool); srcParam = &usesSVMPointer; break; case CL_MEM_OFFSET: srcParamSize = sizeof(offset); srcParam = &offset; break; case CL_MEM_ASSOCIATED_MEMOBJECT: srcParamSize = sizeof(clAssociatedMemObject); srcParam = &clAssociatedMemObject; break; case CL_MEM_MAP_COUNT: srcParamSize = sizeof(mapCount); mapCount = static_cast(mapOperationsHandler.size()); srcParam = &mapCount; break; case CL_MEM_REFERENCE_COUNT: refCnt = static_cast(this->getReference()); srcParamSize = sizeof(refCnt); srcParam = &refCnt; break; case CL_MEM_ALLOCATION_HANDLE_INTEL: internalHandle = this->getGraphicsAllocation()->peekInternalHandle(this->memoryManager); srcParamSize = sizeof(internalHandle); srcParam = &internalHandle; break; default: getOsSpecificMemObjectInfo(paramName, &srcParamSize, &srcParam); break; } retVal = changeGetInfoStatusToCLResultType(::getInfo(paramValue, paramValueSize, srcParam, srcParamSize)); if (paramValueSizeRet) { *paramValueSizeRet = srcParamSize; } return retVal; } cl_int MemObj::setDestructorCallback(void(CL_CALLBACK *funcNotify)(cl_mem, void *), void *userData) { auto cb = new DestructorCallback(funcNotify, userData); std::unique_lock theLock(mtx); destructorCallbacks.push_back(cb); return CL_SUCCESS; } void *MemObj::getCpuAddress() const { return memoryStorage; } void *MemObj::getHostPtr() const { return hostPtr; } size_t MemObj::getSize() const { return size; } void MemObj::setAllocatedMapPtr(void *allocatedMapPtr) { this->allocatedMapPtr = allocatedMapPtr; } bool MemObj::isMemObjZeroCopy() const { return isZeroCopy; } bool MemObj::isMemObjWithHostPtrSVM() const { return isHostPtrSVM; } bool MemObj::isMemObjUncacheable() const { return isValueSet(flagsIntel, CL_MEM_LOCALLY_UNCACHED_RESOURCE); } bool MemObj::isMemObjUncacheableForSurfaceState() const { return isAnyBitSet(flagsIntel, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE | CL_MEM_LOCALLY_UNCACHED_RESOURCE); } GraphicsAllocation *MemObj::getGraphicsAllocation() const { return graphicsAllocation; } void MemObj::resetGraphicsAllocation(GraphicsAllocation *newGraphicsAllocation) { TakeOwnershipWrapper lock(*this); if (graphicsAllocation != nullptr && (peekSharingHandler() == nullptr || graphicsAllocation->peekReuseCount() == 0)) { memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(graphicsAllocation); } graphicsAllocation = newGraphicsAllocation; } bool MemObj::readMemObjFlagsInvalid() { return isValueSet(flags, CL_MEM_HOST_WRITE_ONLY) || isValueSet(flags, CL_MEM_HOST_NO_ACCESS); } bool MemObj::writeMemObjFlagsInvalid() { return isValueSet(flags, CL_MEM_HOST_READ_ONLY) || isValueSet(flags, CL_MEM_HOST_NO_ACCESS); } bool MemObj::mapMemObjFlagsInvalid(cl_map_flags mapFlags) { return (writeMemObjFlagsInvalid() && (mapFlags & CL_MAP_WRITE)) || (readMemObjFlagsInvalid() && (mapFlags & CL_MAP_READ)); } void MemObj::setHostPtrMinSize(size_t size) { hostPtrMinSize = size; } void *MemObj::getCpuAddressForMapping() { void *ptrToReturn = nullptr; if (isValueSet(flags, CL_MEM_USE_HOST_PTR)) { ptrToReturn = this->hostPtr; } else { ptrToReturn = this->memoryStorage; } return ptrToReturn; } void *MemObj::getCpuAddressForMemoryTransfer() { void *ptrToReturn = nullptr; if (isValueSet(flags, CL_MEM_USE_HOST_PTR) && this->isMemObjZeroCopy()) { ptrToReturn = this->hostPtr; } else { ptrToReturn = this->memoryStorage; } return ptrToReturn; } void MemObj::releaseAllocatedMapPtr() { if (allocatedMapPtr) { DEBUG_BREAK_IF(isValueSet(flags, CL_MEM_USE_HOST_PTR)); memoryManager->freeSystemMemory(allocatedMapPtr); } allocatedMapPtr = nullptr; } void MemObj::releaseMapAllocation() { if (mapAllocation && !isHostPtrSVM) { destroyGraphicsAllocation(mapAllocation, false); } } void MemObj::destroyGraphicsAllocation(GraphicsAllocation *allocation, bool asyncDestroy) { if (asyncDestroy) { memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(allocation); } else { memoryManager->freeGraphicsMemory(allocation); } } bool MemObj::checkIfMemoryTransferIsRequired(size_t offsetInMemObject, size_t offsetInHostPtr, const void *hostPtr, cl_command_type cmdType) { auto bufferStorage = ptrOffset(this->getCpuAddressForMemoryTransfer(), offsetInMemObject); auto hostStorage = ptrOffset(hostPtr, offsetInHostPtr); auto isMemTransferNeeded = !((bufferStorage == hostStorage) && (cmdType == CL_COMMAND_WRITE_BUFFER || cmdType == CL_COMMAND_READ_BUFFER || cmdType == CL_COMMAND_WRITE_BUFFER_RECT || cmdType == CL_COMMAND_READ_BUFFER_RECT || cmdType == CL_COMMAND_WRITE_IMAGE || cmdType == CL_COMMAND_READ_IMAGE)); return isMemTransferNeeded; } void *MemObj::getBasePtrForMap(uint32_t rootDeviceIndex) { if (associatedMemObject) { return associatedMemObject->getBasePtrForMap(rootDeviceIndex); } if (getMemoryPropertiesFlags() & CL_MEM_USE_HOST_PTR) { return getHostPtr(); } else { TakeOwnershipWrapper memObjOwnership(*this); if (getMapAllocation()) { return getMapAllocation()->getUnderlyingBuffer(); } else { auto memory = memoryManager->allocateSystemMemory(getSize(), MemoryConstants::pageSize); setAllocatedMapPtr(memory); AllocationProperties properties{rootDeviceIndex, false, getSize(), GraphicsAllocation::AllocationType::MAP_ALLOCATION, false}; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, memory); setMapAllocation(allocation); return getAllocatedMapPtr(); } } } bool MemObj::addMappedPtr(void *ptr, size_t ptrLength, cl_map_flags &mapFlags, MemObjSizeArray &size, MemObjOffsetArray &offset, uint32_t mipLevel) { return mapOperationsHandler.add(ptr, ptrLength, mapFlags, size, offset, mipLevel); } bool MemObj::isTiledAllocation() const { auto gmm = graphicsAllocation->getDefaultGmm(); return gmm && (gmm->gmmResourceInfo->getTileModeSurfaceState() != 0); } bool MemObj::mappingOnCpuAllowed() const { return !isTiledAllocation() && !peekSharingHandler() && !isMipMapped(this) && !DebugManager.flags.DisableZeroCopyForBuffers.get() && !(graphicsAllocation->getDefaultGmm() && graphicsAllocation->getDefaultGmm()->isRenderCompressed) && MemoryPool::isSystemMemoryPool(graphicsAllocation->getMemoryPool()); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/mem_obj/mem_obj.h000066400000000000000000000146761363734646600233440ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/debug_settings/debug_settings_manager.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/api/cl_types.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/helpers/mipmap.h" #include "opencl/source/mem_obj/map_operations_handler.h" #include "opencl/source/sharings/sharing.h" #include "memory_properties_flags.h" #include #include #include namespace NEO { class ExecutionEnvironment; struct RootDeviceEnvironment; class GraphicsAllocation; struct KernelInfo; class MemoryManager; class Context; template <> struct OpenCLObjectMapper<_cl_mem> { typedef class MemObj DerivedType; }; class MemObj : public BaseObject<_cl_mem> { public: constexpr static cl_ulong maskMagic = 0xFFFFFFFFFFFFFF00LL; constexpr static cl_ulong objectMagic = 0xAB2212340CACDD00LL; MemObj(Context *context, cl_mem_object_type memObjectType, const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, GraphicsAllocation *gfxAllocation, bool zeroCopy, bool isHostPtrSVM, bool isObjectRedescrbied); ~MemObj() override; cl_int getMemObjectInfo(cl_mem_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int setDestructorCallback(void(CL_CALLBACK *funcNotify)(cl_mem, void *), void *userData); void *getCpuAddress() const; void *getHostPtr() const; bool getIsObjectRedescribed() const { return isObjectRedescribed; }; size_t getSize() const; bool addMappedPtr(void *ptr, size_t ptrLength, cl_map_flags &mapFlags, MemObjSizeArray &size, MemObjOffsetArray &offset, uint32_t mipLevel); bool findMappedPtr(void *mappedPtr, MapInfo &outMapInfo) { return mapOperationsHandler.find(mappedPtr, outMapInfo); } void removeMappedPtr(void *mappedPtr) { mapOperationsHandler.remove(mappedPtr); } void *getBasePtrForMap(uint32_t rootDeviceIndex); MOCKABLE_VIRTUAL void setAllocatedMapPtr(void *allocatedMapPtr); void *getAllocatedMapPtr() const { return allocatedMapPtr; } void setHostPtrMinSize(size_t size); void releaseAllocatedMapPtr(); void releaseMapAllocation(); bool isMemObjZeroCopy() const; bool isMemObjWithHostPtrSVM() const; bool isMemObjUncacheable() const; bool isMemObjUncacheableForSurfaceState() const; virtual void transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { UNRECOVERABLE_IF(true); }; virtual void transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { UNRECOVERABLE_IF(true); }; GraphicsAllocation *getGraphicsAllocation() const; void resetGraphicsAllocation(GraphicsAllocation *newGraphicsAllocation); GraphicsAllocation *getMcsAllocation() { return mcsAllocation; } void setMcsAllocation(GraphicsAllocation *alloc) { mcsAllocation = alloc; } bool readMemObjFlagsInvalid(); bool writeMemObjFlagsInvalid(); bool mapMemObjFlagsInvalid(cl_map_flags mapFlags); MOCKABLE_VIRTUAL bool isTiledAllocation() const; void *getCpuAddressForMapping(); void *getCpuAddressForMemoryTransfer(); std::shared_ptr &getSharingHandler() { return sharingHandler; } SharingHandler *peekSharingHandler() const { return sharingHandler.get(); } void setSharingHandler(SharingHandler *sharingHandler) { this->sharingHandler.reset(sharingHandler); } void setParentSharingHandler(std::shared_ptr &handler) { sharingHandler = handler; } unsigned int acquireCount = 0; Context *getContext() const { return context; } void destroyGraphicsAllocation(GraphicsAllocation *allocation, bool asyncDestroy); bool checkIfMemoryTransferIsRequired(size_t offsetInMemObject, size_t offsetInHostPtr, const void *ptr, cl_command_type cmdType); bool mappingOnCpuAllowed() const; virtual size_t calculateOffsetForMapping(const MemObjOffsetArray &offset) const { return offset[0]; } size_t calculateMappedPtrLength(const MemObjSizeArray &size) const { return calculateOffsetForMapping(size); } cl_mem_object_type peekClMemObjType() const { return memObjectType; } size_t getOffset() const { return offset; } MemoryManager *getMemoryManager() const { return memoryManager; } void setMapAllocation(GraphicsAllocation *allocation) { mapAllocation = allocation; } GraphicsAllocation *getMapAllocation() const { if (associatedMemObject) { return associatedMemObject->getMapAllocation(); } return mapAllocation; } const cl_mem_flags &getMemoryPropertiesFlags() const { return flags; } const cl_mem_flags &getMemoryPropertiesFlagsIntel() const { return flagsIntel; } protected: void getOsSpecificMemObjectInfo(const cl_mem_info ¶mName, size_t *srcParamSize, void **srcParam); Context *context; cl_mem_object_type memObjectType; MemoryPropertiesFlags memoryProperties; cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; size_t size; size_t hostPtrMinSize = 0; void *memoryStorage; void *hostPtr; void *allocatedMapPtr = nullptr; MapOperationsHandler mapOperationsHandler; size_t offset = 0; MemObj *associatedMemObject = nullptr; cl_uint refCount = 0; ExecutionEnvironment *executionEnvironment = nullptr; RootDeviceEnvironment *rootDeviceEnvironment = nullptr; bool isZeroCopy; bool isHostPtrSVM; bool isObjectRedescribed; MemoryManager *memoryManager = nullptr; GraphicsAllocation *graphicsAllocation; GraphicsAllocation *mcsAllocation = nullptr; GraphicsAllocation *mapAllocation = nullptr; std::shared_ptr sharingHandler; class DestructorCallback { public: DestructorCallback(void(CL_CALLBACK *funcNotify)(cl_mem, void *), void *userData) : funcNotify(funcNotify), userData(userData){}; void invoke(cl_mem memObj); private: void(CL_CALLBACK *funcNotify)(cl_mem, void *); void *userData; }; std::vector destructorCallbacks; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/mem_obj/mem_obj_helper.cpp000066400000000000000000000013261363734646600252220ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/mem_obj/mem_obj_helper_common.inl" #include "memory_properties_flags.h" namespace NEO { bool MemObjHelper::isSuitableForRenderCompression(bool renderCompressed, const MemoryPropertiesFlags &properties, Context &context, bool preferCompression) { return renderCompressed && preferCompression; } bool MemObjHelper::validateExtraMemoryProperties(const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, const Context &context) { return true; } const uint64_t MemObjHelper::extraFlags = 0; const uint64_t MemObjHelper::extraFlagsIntel = 0; } // namespace NEO compute-runtime-20.13.16352/opencl/source/mem_obj/mem_obj_helper.h000066400000000000000000000037141363734646600246720ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/bit_helpers.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/helpers/mem_properties_parser_helper.h" #include "opencl/source/mem_obj/mem_obj.h" #include "CL/cl.h" #include "memory_properties_flags.h" namespace NEO { class MemObjHelper { public: static const uint64_t extraFlags; static const uint64_t extraFlagsIntel; static const uint64_t commonFlags; static const uint64_t commonFlagsIntel; static const uint64_t validFlagsForBuffer; static const uint64_t validFlagsForBufferIntel; static const uint64_t validFlagsForImage; static const uint64_t validFlagsForImageIntel; static bool validateMemoryPropertiesForBuffer(const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, const Context &context); static bool validateMemoryPropertiesForImage(const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, cl_mem parent, const Context &context); static AllocationProperties getAllocationPropertiesWithImageInfo(uint32_t rootDeviceIndex, ImageInfo &imgInfo, bool allocateMemory, const MemoryPropertiesFlags &memoryProperties, const HardwareInfo &hwInfo); static bool checkMemFlagsForSubBuffer(cl_mem_flags flags); static SVMAllocsManager::SvmAllocationProperties getSvmAllocationProperties(cl_mem_flags flags); static bool isSuitableForRenderCompression(bool renderCompressed, const MemoryPropertiesFlags &properties, Context &context, bool preferCompression); protected: static bool validateExtraMemoryProperties(const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, const Context &context); }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/mem_obj/mem_obj_helper_common.inl000066400000000000000000000140551363734646600265750ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/mem_obj/mem_obj_helper.h" namespace NEO { bool MemObjHelper::validateMemoryPropertiesForBuffer(const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, const Context &context) { /* Check all the invalid flags combination. */ if ((isValueSet(flags, CL_MEM_READ_WRITE | CL_MEM_READ_ONLY)) || (isValueSet(flags, CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY)) || (isValueSet(flags, CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY)) || (isValueSet(flags, CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_HOST_PTR)) || (isValueSet(flags, CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) || (isValueSet(flags, CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) || (isValueSet(flags, CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY)) || (isValueSet(flags, CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS))) { return false; } return validateExtraMemoryProperties(memoryProperties, flags, flagsIntel, context); } bool MemObjHelper::validateMemoryPropertiesForImage(const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, cl_mem parent, const Context &context) { /* Check all the invalid flags combination. */ if ((!isValueSet(flags, CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)) && (isValueSet(flags, CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY) || isValueSet(flags, CL_MEM_READ_WRITE | CL_MEM_READ_ONLY) || isValueSet(flags, CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY) || isValueSet(flags, CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_HOST_PTR) || isValueSet(flags, CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR) || isValueSet(flags, CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY) || isValueSet(flags, CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS) || isValueSet(flags, CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS) || isValueSet(flags, CL_MEM_NO_ACCESS_INTEL | CL_MEM_READ_WRITE) || isValueSet(flags, CL_MEM_NO_ACCESS_INTEL | CL_MEM_WRITE_ONLY) || isValueSet(flags, CL_MEM_NO_ACCESS_INTEL | CL_MEM_READ_ONLY))) { return false; } auto parentMemObj = castToObject(parent); if (parentMemObj != nullptr && flags) { auto parentFlags = parentMemObj->getMemoryPropertiesFlags(); /* Check whether flags are compatible with parent. */ if (isValueSet(flags, CL_MEM_ALLOC_HOST_PTR) || isValueSet(flags, CL_MEM_COPY_HOST_PTR) || isValueSet(flags, CL_MEM_USE_HOST_PTR) || ((!isValueSet(parentFlags, CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)) && (!isValueSet(flags, CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)) && ((isValueSet(parentFlags, CL_MEM_WRITE_ONLY) && isValueSet(flags, CL_MEM_READ_WRITE)) || (isValueSet(parentFlags, CL_MEM_WRITE_ONLY) && isValueSet(flags, CL_MEM_READ_ONLY)) || (isValueSet(parentFlags, CL_MEM_READ_ONLY) && isValueSet(flags, CL_MEM_READ_WRITE)) || (isValueSet(parentFlags, CL_MEM_READ_ONLY) && isValueSet(flags, CL_MEM_WRITE_ONLY)) || (isValueSet(parentFlags, CL_MEM_NO_ACCESS_INTEL) && isValueSet(flags, CL_MEM_READ_WRITE)) || (isValueSet(parentFlags, CL_MEM_NO_ACCESS_INTEL) && isValueSet(flags, CL_MEM_WRITE_ONLY)) || (isValueSet(parentFlags, CL_MEM_NO_ACCESS_INTEL) && isValueSet(flags, CL_MEM_READ_ONLY)) || (isValueSet(parentFlags, CL_MEM_HOST_NO_ACCESS) && isValueSet(flags, CL_MEM_HOST_WRITE_ONLY)) || (isValueSet(parentFlags, CL_MEM_HOST_NO_ACCESS) && isValueSet(flags, CL_MEM_HOST_READ_ONLY))))) { return false; } } return validateExtraMemoryProperties(memoryProperties, flags, flagsIntel, context); } AllocationProperties MemObjHelper::getAllocationPropertiesWithImageInfo(uint32_t rootDeviceIndex, ImageInfo &imgInfo, bool allocateMemory, const MemoryPropertiesFlags &memoryProperties, const HardwareInfo &hwInfo) { AllocationProperties allocationProperties{rootDeviceIndex, allocateMemory, imgInfo, GraphicsAllocation::AllocationType::IMAGE}; MemoryPropertiesParser::fillPoliciesInProperties(allocationProperties, memoryProperties, hwInfo); return allocationProperties; } bool MemObjHelper::checkMemFlagsForSubBuffer(cl_mem_flags flags) { const cl_mem_flags allValidFlags = CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS; return isFieldValid(flags, allValidFlags); } SVMAllocsManager::SvmAllocationProperties MemObjHelper::getSvmAllocationProperties(cl_mem_flags flags) { SVMAllocsManager::SvmAllocationProperties svmProperties; svmProperties.coherent = isValueSet(flags, CL_MEM_SVM_FINE_GRAIN_BUFFER); svmProperties.hostPtrReadOnly = isValueSet(flags, CL_MEM_HOST_READ_ONLY) || isValueSet(flags, CL_MEM_HOST_NO_ACCESS); svmProperties.readOnly = isValueSet(flags, CL_MEM_READ_ONLY); return svmProperties; } const uint64_t MemObjHelper::commonFlags = extraFlags | CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS; const uint64_t MemObjHelper::commonFlagsIntel = extraFlagsIntel | CL_MEM_LOCALLY_UNCACHED_RESOURCE | CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE | CL_MEM_48BIT_RESOURCE_INTEL; const uint64_t MemObjHelper::validFlagsForBuffer = commonFlags | CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL; const uint64_t MemObjHelper::validFlagsForBufferIntel = commonFlagsIntel | CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL; const uint64_t MemObjHelper::validFlagsForImage = commonFlags | CL_MEM_NO_ACCESS_INTEL | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL | CL_MEM_FORCE_LINEAR_STORAGE_INTEL; const uint64_t MemObjHelper::validFlagsForImageIntel = commonFlagsIntel; } // namespace NEO compute-runtime-20.13.16352/opencl/source/mem_obj/pipe.cpp000066400000000000000000000100101363734646600231760ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/mem_obj/pipe.h" #include "shared/source/helpers/get_info.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/get_info_status_mapper.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/mem_obj/mem_obj_helper.h" namespace NEO { Pipe::Pipe(Context *context, cl_mem_flags flags, cl_uint packetSize, cl_uint maxPackets, const cl_pipe_properties *properties, void *memoryStorage, GraphicsAllocation *gfxAllocation) : MemObj(context, CL_MEM_OBJECT_PIPE, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, static_cast(packetSize * (maxPackets + 1) + intelPipeHeaderReservedSpace), memoryStorage, nullptr, gfxAllocation, false, false, false), pipePacketSize(packetSize), pipeMaxPackets(maxPackets) { magic = objectMagic; } Pipe *Pipe::create(Context *context, cl_mem_flags flags, cl_uint packetSize, cl_uint maxPackets, const cl_pipe_properties *properties, cl_int &errcodeRet) { Pipe *pPipe = nullptr; errcodeRet = CL_SUCCESS; MemoryManager *memoryManager = context->getMemoryManager(); DEBUG_BREAK_IF(!memoryManager); MemoryPropertiesFlags memoryPropertiesFlags = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0); while (true) { auto size = static_cast(packetSize * (maxPackets + 1) + intelPipeHeaderReservedSpace); auto rootDeviceIndex = context->getDevice(0)->getRootDeviceIndex(); AllocationProperties allocProperties = MemoryPropertiesParser::getAllocationProperties(rootDeviceIndex, memoryPropertiesFlags, true, size, GraphicsAllocation::AllocationType::PIPE, false, context->getDevice(0)->getHardwareInfo()); GraphicsAllocation *memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties); if (!memory) { errcodeRet = CL_OUT_OF_HOST_MEMORY; break; } pPipe = new (std::nothrow) Pipe(context, flags, packetSize, maxPackets, properties, memory->getUnderlyingBuffer(), memory); if (!pPipe) { memoryManager->freeGraphicsMemory(memory); memory = nullptr; errcodeRet = CL_OUT_OF_HOST_MEMORY; break; } // Initialize pipe_control_intel_t structure located at the beginning of the surface memset(memory->getUnderlyingBuffer(), 0, intelPipeHeaderReservedSpace); *reinterpret_cast(memory->getUnderlyingBuffer()) = maxPackets + 1; break; } return pPipe; } cl_int Pipe::getPipeInfo(cl_image_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { cl_int retVal; size_t srcParamSize = 0; void *srcParam = nullptr; switch (paramName) { case CL_PIPE_PACKET_SIZE: srcParamSize = sizeof(cl_uint); srcParam = &(pipePacketSize); break; case CL_PIPE_MAX_PACKETS: srcParamSize = sizeof(cl_uint); srcParam = &(pipeMaxPackets); break; default: break; } retVal = changeGetInfoStatusToCLResultType(::getInfo(paramValue, paramValueSize, srcParam, srcParamSize)); if (paramValueSizeRet) { *paramValueSizeRet = srcParamSize; } return retVal; } void Pipe::setPipeArg(void *memory, uint32_t patchSize) { patchWithRequiredSize(memory, patchSize, static_cast(getGraphicsAllocation()->getGpuAddressToPatch())); } Pipe::~Pipe() = default; } // namespace NEO compute-runtime-20.13.16352/opencl/source/mem_obj/pipe.h000066400000000000000000000022531363734646600226550ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/mem_obj/buffer.h" namespace NEO { class Pipe : public MemObj { public: static const size_t intelPipeHeaderReservedSpace = 128; static const cl_ulong maskMagic = 0xFFFFFFFFFFFFFFFFLL; static const cl_ulong objectMagic = MemObj::objectMagic | 0x03; static Pipe *create( Context *context, cl_mem_flags flags, cl_uint packetSize, cl_uint maxPackets, const cl_pipe_properties *properties, cl_int &errcodeRet); ~Pipe() override; cl_int getPipeInfo(cl_image_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); void setPipeArg(void *memory, uint32_t patchSize); protected: Pipe(Context *context, cl_mem_flags flags, cl_uint packetSize, cl_uint maxPackets, const cl_pipe_properties *properties, void *memoryStorage, GraphicsAllocation *gfxAllocation); cl_uint pipePacketSize; cl_uint pipeMaxPackets; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/memory_manager/000077500000000000000000000000001363734646600231375ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/memory_manager/CMakeLists.txt000066400000000000000000000017211363734646600257000ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_MEMORY_MANAGER ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/address_mapper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/address_mapper.h ${CMAKE_CURRENT_SOURCE_DIR}/cpu_page_fault_manager_memory_sync.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory_banks.h ${CMAKE_CURRENT_SOURCE_DIR}/os_agnostic_memory_manager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_agnostic_memory_manager.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/os_agnostic_memory_manager_allocate_in_device_pool.cpp ${CMAKE_CURRENT_SOURCE_DIR}/page_table.cpp ${CMAKE_CURRENT_SOURCE_DIR}/page_table.h ${CMAKE_CURRENT_SOURCE_DIR}/page_table.inl ${CMAKE_CURRENT_SOURCE_DIR}/physical_address_allocator.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_MEMORY_MANAGER}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_MEMORY_MANAGER ${RUNTIME_SRCS_MEMORY_MANAGER}) add_subdirectories() compute-runtime-20.13.16352/opencl/source/memory_manager/address_mapper.cpp000066400000000000000000000030351363734646600266350ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/memory_manager/address_mapper.h" #include "shared/source/helpers/aligned_memory.h" #include #include namespace NEO { AddressMapper::AddressMapper() : nextPage(1) { } AddressMapper::~AddressMapper() { for (auto &m : mapping) delete m; } uint32_t AddressMapper::map(void *vm, size_t size) { void *aligned = alignDown(vm, MemoryConstants::pageSize); size_t alignedSize = alignSizeWholePage(vm, size); auto it = mapping.begin(); for (; it != mapping.end(); it++) { if ((*it)->vm == aligned) { if ((*it)->size == alignedSize) { return (*it)->ggtt; } break; } } if (it != mapping.end()) { delete *it; mapping.erase(it); } uint32_t numPages = static_cast(alignedSize / MemoryConstants::pageSize); auto tmp = nextPage.fetch_add(numPages); MapInfo *m = new MapInfo; m->vm = aligned; m->size = alignedSize; m->ggtt = static_cast(tmp * MemoryConstants::pageSize); mapping.push_back(m); return m->ggtt; } void AddressMapper::unmap(void *vm) { void *aligned = alignDown(vm, MemoryConstants::pageSize); auto it = mapping.begin(); for (; it != mapping.end(); it++) { if ((*it)->vm == aligned) { break; } } if (it != mapping.end()) { delete *it; mapping.erase(it); } } } // namespace NEO compute-runtime-20.13.16352/opencl/source/memory_manager/address_mapper.h000066400000000000000000000010651363734646600263030ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include #include namespace NEO { class AddressMapper { public: AddressMapper(); ~AddressMapper(); // maps to continuous region uint32_t map(void *vm, size_t size); // unmaps void unmap(void *vm); protected: struct MapInfo { void *vm; size_t size; uint32_t ggtt; }; std::vector mapping; std::atomic nextPage; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/memory_manager/cpu_page_fault_manager_memory_sync.cpp000066400000000000000000000016541363734646600327450ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/debug_helpers.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/page_fault_manager/cpu_page_fault_manager.h" #include "opencl/source/command_queue/command_queue.h" namespace NEO { void PageFaultManager::transferToCpu(void *ptr, size_t size, void *cmdQ) { auto commandQueue = static_cast(cmdQ); auto retVal = commandQueue->enqueueSVMMap(true, CL_MAP_WRITE, ptr, size, 0, nullptr, nullptr, false); UNRECOVERABLE_IF(retVal); } void PageFaultManager::transferToGpu(void *ptr, void *cmdQ) { auto commandQueue = static_cast(cmdQ); auto retVal = commandQueue->enqueueSVMUnmap(ptr, 0, nullptr, nullptr, false); UNRECOVERABLE_IF(retVal); retVal = commandQueue->finish(); UNRECOVERABLE_IF(retVal); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/memory_manager/mem_obj_surface.h000066400000000000000000000014701363734646600264320ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/surface.h" #include "opencl/source/mem_obj/mem_obj.h" namespace NEO { class MemObjSurface : public Surface { public: MemObjSurface(MemObj *memObj) : Surface(memObj->getGraphicsAllocation()->isCoherent()), memObj(memObj) { memObj->incRefInternal(); } ~MemObjSurface() override { memObj->decRefInternal(); memObj = nullptr; }; void makeResident(CommandStreamReceiver &csr) override { DEBUG_BREAK_IF(!memObj); csr.makeResident(*memObj->getGraphicsAllocation()); } Surface *duplicate() override { return new MemObjSurface(this->memObj); }; protected: class MemObj *memObj; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/memory_manager/memory_banks.h000066400000000000000000000005161363734646600260000ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace MemoryBanks { constexpr uint32_t BankNotSpecified{0}; constexpr uint32_t MainBank{0}; inline uint32_t getBank(uint32_t deviceOrdinal) { return MemoryBanks::MainBank; } } // namespace MemoryBanks compute-runtime-20.13.16352/opencl/source/memory_manager/os_agnostic_memory_manager.cpp000066400000000000000000000427041363734646600312440ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/host_ptr_manager.h" #include "shared/source/memory_manager/residency.h" #include "shared/source/os_interface/os_memory.h" #include "opencl/source/aub/aub_center.h" #include "opencl/source/helpers/surface_formats.h" #include namespace NEO { OsAgnosticMemoryManager::OsAgnosticMemoryManager(bool aubUsage, ExecutionEnvironment &executionEnvironment) : MemoryManager(executionEnvironment) { // 4 x sizeof(Heap32) + 2 x sizeof(Standard/Standard64k) size_t reservedCpuAddressRangeSize = is64bit ? (4 * 4 + 2 * (aubUsage ? 32 : 4)) * GB : 0; for (uint32_t rootDeviceIndex = 0; rootDeviceIndex < gfxPartitions.size(); ++rootDeviceIndex) { auto gpuAddressSpace = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->capabilityTable.gpuAddressSpace; getGfxPartition(rootDeviceIndex)->init(gpuAddressSpace, reservedCpuAddressRangeSize, rootDeviceIndex, gfxPartitions.size()); } } OsAgnosticMemoryManager::~OsAgnosticMemoryManager() = default; struct OsHandle { }; GraphicsAllocation *OsAgnosticMemoryManager::allocateGraphicsMemoryWithAlignment(const AllocationData &allocationData) { auto sizeAligned = alignUp(allocationData.size, MemoryConstants::pageSize); MemoryAllocation *memoryAllocation = nullptr; if (fakeBigAllocations && allocationData.size > bigAllocation) { memoryAllocation = createMemoryAllocation( allocationData.type, nullptr, (void *)dummyAddress, static_cast(dummyAddress), allocationData.size, counter, MemoryPool::System4KBPages, allocationData.rootDeviceIndex, allocationData.flags.uncacheable, allocationData.flags.flushL3, false); counter++; return memoryAllocation; } auto ptr = allocateSystemMemory(sizeAligned, allocationData.alignment ? alignUp(allocationData.alignment, MemoryConstants::pageSize) : MemoryConstants::pageSize); if (ptr != nullptr) { memoryAllocation = createMemoryAllocation(allocationData.type, ptr, ptr, reinterpret_cast(ptr), allocationData.size, counter, MemoryPool::System4KBPages, allocationData.rootDeviceIndex, allocationData.flags.uncacheable, allocationData.flags.flushL3, false); if (allocationData.type == GraphicsAllocation::AllocationType::SVM_CPU) { //add 2MB padding in case mapPtr is not 2MB aligned size_t reserveSize = sizeAligned + allocationData.alignment; void *gpuPtr = reserveCpuAddressRange(reserveSize, allocationData.rootDeviceIndex); if (!gpuPtr) { delete memoryAllocation; alignedFreeWrapper(ptr); return nullptr; } memoryAllocation->setReservedAddressRange(gpuPtr, reserveSize); gpuPtr = alignUp(gpuPtr, allocationData.alignment); memoryAllocation->setCpuPtrAndGpuAddress(ptr, reinterpret_cast(gpuPtr)); } } counter++; return memoryAllocation; } GraphicsAllocation *OsAgnosticMemoryManager::allocateGraphicsMemoryForNonSvmHostPtr(const AllocationData &allocationData) { auto alignedPtr = alignDown(allocationData.hostPtr, MemoryConstants::pageSize); auto offsetInPage = ptrDiff(allocationData.hostPtr, alignedPtr); auto memoryAllocation = createMemoryAllocation(allocationData.type, nullptr, const_cast(allocationData.hostPtr), reinterpret_cast(alignedPtr), allocationData.size, counter, MemoryPool::System4KBPages, allocationData.rootDeviceIndex, false, allocationData.flags.flushL3, false); memoryAllocation->setAllocationOffset(offsetInPage); counter++; return memoryAllocation; } GraphicsAllocation *OsAgnosticMemoryManager::allocateGraphicsMemory64kb(const AllocationData &allocationData) { AllocationData allocationData64kb = allocationData; allocationData64kb.size = alignUp(allocationData.size, MemoryConstants::pageSize64k); allocationData64kb.alignment = MemoryConstants::pageSize64k; auto memoryAllocation = allocateGraphicsMemoryWithAlignment(allocationData64kb); if (memoryAllocation) { static_cast(memoryAllocation)->overrideMemoryPool(MemoryPool::System64KBPages); } return memoryAllocation; } GraphicsAllocation *OsAgnosticMemoryManager::allocate32BitGraphicsMemoryImpl(const AllocationData &allocationData) { auto heap = useInternal32BitAllocator(allocationData.type) ? HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY : HeapIndex::HEAP_EXTERNAL; auto gfxPartition = getGfxPartition(allocationData.rootDeviceIndex); if (allocationData.hostPtr) { auto allocationSize = alignSizeWholePage(allocationData.hostPtr, allocationData.size); auto gpuVirtualAddress = gfxPartition->heapAllocate(heap, allocationSize); if (!gpuVirtualAddress) { return nullptr; } uint64_t offset = static_cast(reinterpret_cast(allocationData.hostPtr) & MemoryConstants::pageMask); MemoryAllocation *memAlloc = new MemoryAllocation( allocationData.rootDeviceIndex, allocationData.type, nullptr, const_cast(allocationData.hostPtr), GmmHelper::canonize(gpuVirtualAddress + offset), allocationData.size, counter, MemoryPool::System4KBPagesWith32BitGpuAddressing, false, false); memAlloc->set32BitAllocation(true); memAlloc->setGpuBaseAddress(GmmHelper::canonize(gfxPartition->getHeapBase(heap))); memAlloc->sizeToFree = allocationSize; counter++; return memAlloc; } auto allocationSize = alignUp(allocationData.size, MemoryConstants::pageSize); void *ptrAlloc = nullptr; auto gpuAddress = gfxPartition->heapAllocate(heap, allocationSize); if (allocationData.size < 0xfffff000) { if (fakeBigAllocations) { ptrAlloc = reinterpret_cast(dummyAddress); } else { ptrAlloc = alignedMallocWrapper(allocationSize, MemoryConstants::allocationAlignment); } } MemoryAllocation *memoryAllocation = nullptr; if (ptrAlloc != nullptr) { memoryAllocation = new MemoryAllocation(allocationData.rootDeviceIndex, allocationData.type, ptrAlloc, ptrAlloc, GmmHelper::canonize(gpuAddress), allocationData.size, counter, MemoryPool::System4KBPagesWith32BitGpuAddressing, false, allocationData.flags.flushL3); memoryAllocation->set32BitAllocation(true); memoryAllocation->setGpuBaseAddress(GmmHelper::canonize(gfxPartition->getHeapBase(heap))); memoryAllocation->sizeToFree = allocationSize; } counter++; return memoryAllocation; } GraphicsAllocation *OsAgnosticMemoryManager::createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness) { auto graphicsAllocation = createMemoryAllocation(properties.allocationType, nullptr, reinterpret_cast(1), 1, 4096u, static_cast(handle), MemoryPool::SystemCpuInaccessible, properties.rootDeviceIndex, false, false, requireSpecificBitness); graphicsAllocation->setSharedHandle(handle); graphicsAllocation->set32BitAllocation(requireSpecificBitness); if (properties.imgInfo) { Gmm *gmm = new Gmm(executionEnvironment.rootDeviceEnvironments[properties.rootDeviceIndex]->getGmmClientContext(), *properties.imgInfo, createStorageInfoFromProperties(properties)); graphicsAllocation->setDefaultGmm(gmm); } return graphicsAllocation; } void OsAgnosticMemoryManager::addAllocationToHostPtrManager(GraphicsAllocation *gfxAllocation) { FragmentStorage fragment = {}; fragment.driverAllocation = true; fragment.fragmentCpuPointer = gfxAllocation->getUnderlyingBuffer(); fragment.fragmentSize = alignUp(gfxAllocation->getUnderlyingBufferSize(), MemoryConstants::pageSize); fragment.osInternalStorage = new OsHandle(); fragment.residency = new ResidencyData(); hostPtrManager->storeFragment(fragment); } void OsAgnosticMemoryManager::removeAllocationFromHostPtrManager(GraphicsAllocation *gfxAllocation) { auto buffer = gfxAllocation->getUnderlyingBuffer(); auto fragment = hostPtrManager->getFragment(buffer); if (fragment && fragment->driverAllocation) { OsHandle *osStorageToRelease = fragment->osInternalStorage; ResidencyData *residencyDataToRelease = fragment->residency; if (hostPtrManager->releaseHostPtr(buffer)) { delete osStorageToRelease; delete residencyDataToRelease; } } } void OsAgnosticMemoryManager::freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation) { for (auto handleId = 0u; handleId < EngineLimits::maxHandleCount; handleId++) { delete gfxAllocation->getGmm(handleId); } if ((uintptr_t)gfxAllocation->getUnderlyingBuffer() == dummyAddress) { delete gfxAllocation; return; } if (gfxAllocation->fragmentsStorage.fragmentCount) { cleanGraphicsMemoryCreatedFromHostPtr(gfxAllocation); delete gfxAllocation; return; } auto memoryAllocation = static_cast(gfxAllocation); auto sizeToFree = memoryAllocation->sizeToFree; if (sizeToFree) { auto gpuAddressToFree = GmmHelper::decanonize(memoryAllocation->getGpuAddress()) & ~MemoryConstants::pageMask; auto gfxPartition = getGfxPartition(memoryAllocation->getRootDeviceIndex()); gfxPartition->freeGpuAddressRange(gpuAddressToFree, sizeToFree); } alignedFreeWrapper(gfxAllocation->getDriverAllocatedCpuPtr()); if (gfxAllocation->getReservedAddressPtr()) { releaseReservedCpuAddressRange(gfxAllocation->getReservedAddressPtr(), gfxAllocation->getReservedAddressSize(), gfxAllocation->getRootDeviceIndex()); } auto rootDeviceIndex = gfxAllocation->getRootDeviceIndex(); if (executionEnvironment.rootDeviceEnvironments.size() > rootDeviceIndex) { auto aubCenter = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->aubCenter.get(); if (aubCenter && aubCenter->getAubManager() && DebugManager.flags.EnableFreeMemory.get()) { aubCenter->getAubManager()->freeMemory(gfxAllocation->getGpuAddress(), gfxAllocation->getUnderlyingBufferSize()); } } delete gfxAllocation; } uint64_t OsAgnosticMemoryManager::getSystemSharedMemory(uint32_t rootDeviceIndex) { return 16 * GB; } GraphicsAllocation *OsAgnosticMemoryManager::createGraphicsAllocation(OsHandleStorage &handleStorage, const AllocationData &allocationData) { auto allocation = createMemoryAllocation(allocationData.type, nullptr, const_cast(allocationData.hostPtr), reinterpret_cast(allocationData.hostPtr), allocationData.size, counter++, MemoryPool::System4KBPages, allocationData.rootDeviceIndex, false, allocationData.flags.flushL3, false); allocation->fragmentsStorage = handleStorage; return allocation; } void OsAgnosticMemoryManager::turnOnFakingBigAllocations() { this->fakeBigAllocations = true; } MemoryManager::AllocationStatus OsAgnosticMemoryManager::populateOsHandles(OsHandleStorage &handleStorage, uint32_t rootDeviceIndex) { for (unsigned int i = 0; i < maxFragmentsCount; i++) { if (!handleStorage.fragmentStorageData[i].osHandleStorage && handleStorage.fragmentStorageData[i].cpuPtr) { handleStorage.fragmentStorageData[i].osHandleStorage = new OsHandle(); handleStorage.fragmentStorageData[i].residency = new ResidencyData(); FragmentStorage newFragment = {}; newFragment.fragmentCpuPointer = const_cast(handleStorage.fragmentStorageData[i].cpuPtr); newFragment.fragmentSize = handleStorage.fragmentStorageData[i].fragmentSize; newFragment.osInternalStorage = handleStorage.fragmentStorageData[i].osHandleStorage; newFragment.residency = handleStorage.fragmentStorageData[i].residency; hostPtrManager->storeFragment(newFragment); } } return AllocationStatus::Success; } void OsAgnosticMemoryManager::cleanOsHandles(OsHandleStorage &handleStorage, uint32_t rootDeviceIndex) { for (unsigned int i = 0; i < maxFragmentsCount; i++) { if (handleStorage.fragmentStorageData[i].freeTheFragment) { auto aubCenter = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->aubCenter.get(); if (aubCenter && aubCenter->getAubManager() && DebugManager.flags.EnableFreeMemory.get()) { aubCenter->getAubManager()->freeMemory((uint64_t)handleStorage.fragmentStorageData[i].cpuPtr, handleStorage.fragmentStorageData[i].fragmentSize); } delete handleStorage.fragmentStorageData[i].osHandleStorage; delete handleStorage.fragmentStorageData[i].residency; } } } GraphicsAllocation *OsAgnosticMemoryManager::allocateShareableMemory(const AllocationData &allocationData) { auto gmm = std::make_unique(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), allocationData.hostPtr, allocationData.size, false); GraphicsAllocation *alloc = nullptr; auto ptr = allocateSystemMemory(alignUp(allocationData.size, MemoryConstants::pageSize), MemoryConstants::pageSize); if (ptr != nullptr) { alloc = createMemoryAllocation(allocationData.type, ptr, ptr, reinterpret_cast(ptr), allocationData.size, counter, MemoryPool::SystemCpuInaccessible, allocationData.rootDeviceIndex, allocationData.flags.uncacheable, allocationData.flags.flushL3, false); counter++; } if (alloc) { alloc->setDefaultGmm(gmm.release()); } return alloc; } GraphicsAllocation *OsAgnosticMemoryManager::allocateGraphicsMemoryForImageImpl(const AllocationData &allocationData, std::unique_ptr gmm) { GraphicsAllocation *alloc = nullptr; if (allocationData.imgInfo->linearStorage && allocationData.imgInfo->mipCount == 0) { alloc = allocateGraphicsMemoryWithAlignment(allocationData); if (alloc) { alloc->setDefaultGmm(gmm.release()); } return alloc; } auto ptr = allocateSystemMemory(alignUp(allocationData.imgInfo->size, MemoryConstants::pageSize), MemoryConstants::pageSize); if (ptr != nullptr) { alloc = createMemoryAllocation(allocationData.type, ptr, ptr, reinterpret_cast(ptr), allocationData.imgInfo->size, counter, MemoryPool::SystemCpuInaccessible, allocationData.rootDeviceIndex, allocationData.flags.uncacheable, allocationData.flags.flushL3, false); counter++; } if (alloc) { alloc->setDefaultGmm(gmm.release()); } return alloc; } void *OsAgnosticMemoryManager::reserveCpuAddressRange(size_t size, uint32_t rootDeviceIndex) { void *reservePtr = allocateSystemMemory(size, MemoryConstants::preferredAlignment); return reservePtr; } void OsAgnosticMemoryManager::releaseReservedCpuAddressRange(void *reserved, size_t size, uint32_t rootDeviceIndex) { alignedFreeWrapper(reserved); } MemoryAllocation *OsAgnosticMemoryManager::createMemoryAllocation(GraphicsAllocation::AllocationType allocationType, void *driverAllocatedCpuPointer, void *pMem, uint64_t gpuAddress, size_t memSize, uint64_t count, MemoryPool::Type pool, uint32_t rootDeviceIndex, bool uncacheable, bool flushL3Required, bool requireSpecificBitness) { if (!isLimitedRange(rootDeviceIndex)) { return new MemoryAllocation(rootDeviceIndex, allocationType, driverAllocatedCpuPointer, pMem, gpuAddress, memSize, count, pool, uncacheable, flushL3Required); } size_t alignedSize = alignSizeWholePage(pMem, memSize); auto heap = (force32bitAllocations || requireSpecificBitness) ? HeapIndex::HEAP_EXTERNAL : HeapIndex::HEAP_STANDARD; auto gfxPartition = getGfxPartition(rootDeviceIndex); uint64_t limitedGpuAddress = gfxPartition->heapAllocate(heap, alignedSize); auto memoryAllocation = new MemoryAllocation(rootDeviceIndex, allocationType, driverAllocatedCpuPointer, pMem, limitedGpuAddress, memSize, count, pool, uncacheable, flushL3Required); if (heap == HeapIndex::HEAP_EXTERNAL) { memoryAllocation->setGpuBaseAddress(GmmHelper::canonize(gfxPartition->getHeapBase(heap))); } memoryAllocation->sizeToFree = alignedSize; return memoryAllocation; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/memory_manager/os_agnostic_memory_manager.h000066400000000000000000000114711363734646600307060ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/basic_math.h" #include "shared/source/memory_manager/memory_manager.h" namespace NEO { constexpr size_t bigAllocation = 1 * MB; constexpr uintptr_t dummyAddress = 0xFFFFF000u; class MemoryAllocation : public GraphicsAllocation { public: const unsigned long long id; size_t sizeToFree = 0; const bool uncacheable; void setSharedHandle(osHandle handle) { sharingInfo.sharedHandle = handle; } MemoryAllocation(uint32_t rootDeviceIndex, AllocationType allocationType, void *cpuPtrIn, uint64_t gpuAddress, uint64_t baseAddress, size_t sizeIn, MemoryPool::Type pool) : GraphicsAllocation(rootDeviceIndex, allocationType, cpuPtrIn, gpuAddress, baseAddress, sizeIn, pool), id(0), uncacheable(false) {} MemoryAllocation(uint32_t rootDeviceIndex, AllocationType allocationType, void *cpuPtrIn, size_t sizeIn, osHandle sharedHandleIn, MemoryPool::Type pool) : GraphicsAllocation(rootDeviceIndex, allocationType, cpuPtrIn, sizeIn, sharedHandleIn, pool), id(0), uncacheable(false) {} MemoryAllocation(uint32_t rootDeviceIndex, AllocationType allocationType, void *driverAllocatedCpuPointer, void *pMem, uint64_t gpuAddress, size_t memSize, uint64_t count, MemoryPool::Type pool, bool uncacheable, bool flushL3Required) : GraphicsAllocation(rootDeviceIndex, allocationType, pMem, gpuAddress, 0u, memSize, pool), id(count), uncacheable(uncacheable) { this->driverAllocatedCpuPointer = driverAllocatedCpuPointer; overrideMemoryPool(pool); allocationInfo.flags.flushL3Required = flushL3Required; } void overrideMemoryPool(MemoryPool::Type pool); }; class OsAgnosticMemoryManager : public MemoryManager { public: using MemoryManager::allocateGraphicsMemory; OsAgnosticMemoryManager(ExecutionEnvironment &executionEnvironment) : OsAgnosticMemoryManager(false, executionEnvironment) {} OsAgnosticMemoryManager(bool aubUsage, ExecutionEnvironment &executionEnvironment); ~OsAgnosticMemoryManager() override; GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness) override; GraphicsAllocation *createGraphicsAllocationFromNTHandle(void *handle, uint32_t rootDeviceIndex) override { return nullptr; } void addAllocationToHostPtrManager(GraphicsAllocation *gfxAllocation) override; void removeAllocationFromHostPtrManager(GraphicsAllocation *gfxAllocation) override; void freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation) override; AllocationStatus populateOsHandles(OsHandleStorage &handleStorage, uint32_t rootDeviceIndex) override; void cleanOsHandles(OsHandleStorage &handleStorage, uint32_t rootDeviceIndex) override; uint64_t getSystemSharedMemory(uint32_t rootDeviceIndex) override; uint64_t getLocalMemorySize(uint32_t rootDeviceIndex) override; void turnOnFakingBigAllocations(); void *reserveCpuAddressRange(size_t size, uint32_t rootDeviceIndex) override; void releaseReservedCpuAddressRange(void *reserved, size_t size, uint32_t rootDeviceIndex) override; protected: GraphicsAllocation *createGraphicsAllocation(OsHandleStorage &handleStorage, const AllocationData &allocationData) override; GraphicsAllocation *allocateGraphicsMemoryForNonSvmHostPtr(const AllocationData &allocationData) override; GraphicsAllocation *allocateGraphicsMemoryWithAlignment(const AllocationData &allocationData) override; GraphicsAllocation *allocateGraphicsMemory64kb(const AllocationData &allocationData) override; GraphicsAllocation *allocateShareableMemory(const AllocationData &allocationData) override; GraphicsAllocation *allocateGraphicsMemoryForImageImpl(const AllocationData &allocationData, std::unique_ptr gmm) override; void *lockResourceImpl(GraphicsAllocation &graphicsAllocation) override { return graphicsAllocation.getUnderlyingBuffer(); } void unlockResourceImpl(GraphicsAllocation &graphicsAllocation) override {} GraphicsAllocation *allocate32BitGraphicsMemoryImpl(const AllocationData &allocationData) override; GraphicsAllocation *allocateGraphicsMemoryInDevicePool(const AllocationData &allocationData, AllocationStatus &status) override; MemoryAllocation *createMemoryAllocation(GraphicsAllocation::AllocationType allocationType, void *driverAllocatedCpuPointer, void *pMem, uint64_t gpuAddress, size_t memSize, uint64_t count, MemoryPool::Type pool, uint32_t rootDeviceIndex, bool uncacheable, bool flushL3Required, bool requireSpecificBitness); private: unsigned long long counter = 0; bool fakeBigAllocations = false; }; } // namespace NEO os_agnostic_memory_manager_allocate_in_device_pool.cpp000066400000000000000000000037231363734646600360650ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/memory_manager/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" namespace NEO { GraphicsAllocation *OsAgnosticMemoryManager::allocateGraphicsMemoryInDevicePool(const AllocationData &allocationData, AllocationStatus &status) { status = AllocationStatus::Error; switch (allocationData.type) { case GraphicsAllocation::AllocationType::IMAGE: case GraphicsAllocation::AllocationType::SHARED_RESOURCE_COPY: break; default: if (!allocationData.flags.useSystemMemory && !(allocationData.flags.allow32Bit && this->force32bitAllocations)) { GraphicsAllocation *allocation = nullptr; if (allocationData.type == GraphicsAllocation::AllocationType::SVM_GPU) { void *cpuAllocation = allocateSystemMemory(allocationData.size, allocationData.alignment); if (!cpuAllocation) { return nullptr; } uint64_t gpuAddress = reinterpret_cast(allocationData.hostPtr); allocation = new MemoryAllocation(allocationData.rootDeviceIndex, allocationData.type, cpuAllocation, cpuAllocation, gpuAddress, allocationData.size, counter++, MemoryPool::LocalMemory, false, false); } else { allocation = allocateGraphicsMemory(allocationData); } if (allocation) { allocation->storageInfo = allocationData.storageInfo; allocation->setFlushL3Required(allocationData.flags.flushL3); status = AllocationStatus::Success; } return allocation; } } status = AllocationStatus::RetryInNonDevicePool; return nullptr; } uint64_t OsAgnosticMemoryManager::getLocalMemorySize(uint32_t rootDeviceIndex) { return 0 * GB; } void MemoryAllocation::overrideMemoryPool(MemoryPool::Type pool) { this->memoryPool = pool; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/memory_manager/page_table.cpp000066400000000000000000000054551363734646600257370ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/memory_manager/page_table.h" #include "opencl/source/aub_mem_dump/page_table_entry_bits.h" #include "opencl/source/memory_manager/page_table.inl" namespace NEO { uintptr_t PTE::map(uintptr_t vm, size_t size, uint64_t entryBits, uint32_t memoryBank) { const size_t shift = 12; const auto mask = static_cast(maxNBitValue(bits)); size_t indexStart = (vm >> shift) & mask; size_t indexEnd = ((vm + size - 1) >> shift) & mask; uintptr_t res = -1; bool updateEntryBits = entryBits != PageTableEntry::nonValidBits; uint64_t newEntryBits = entryBits & MemoryConstants::pageMask; newEntryBits |= 0x1; for (size_t index = indexStart; index <= indexEnd; index++) { if (entries[index] == 0x0) { uint64_t tmp = allocator->reserve4kPage(memoryBank); entries[index] = reinterpret_cast(tmp | newEntryBits); } else if (updateEntryBits) { entries[index] = reinterpret_cast((reinterpret_cast(entries[index]) & MemoryConstants::page4kEntryMask) | newEntryBits); } res = std::min(reinterpret_cast(entries[index]) & MemoryConstants::page4kEntryMask, res); } return (res & ~newEntryBits) + (vm & (pageSize - 1)); } void PTE::pageWalk(uintptr_t vm, size_t size, size_t offset, uint64_t entryBits, PageWalker &pageWalker, uint32_t memoryBank) { static const uint32_t bits = 9; const size_t shift = 12; const auto mask = static_cast(maxNBitValue(bits)); size_t indexStart = (vm >> shift) & mask; size_t indexEnd = ((vm + size - 1) >> shift) & mask; uint64_t res = -1; uintptr_t rem = vm & (pageSize - 1); bool updateEntryBits = entryBits != PageTableEntry::nonValidBits; uint64_t newEntryBits = entryBits & MemoryConstants::pageMask; newEntryBits |= 0x1; for (size_t index = indexStart; index <= indexEnd; index++) { if (entries[index] == 0x0) { uint64_t tmp = allocator->reserve4kPage(memoryBank); entries[index] = reinterpret_cast(tmp | newEntryBits); } else if (updateEntryBits) { entries[index] = reinterpret_cast((reinterpret_cast(entries[index]) & MemoryConstants::page4kEntryMask) | newEntryBits); } res = reinterpret_cast(entries[index]) & MemoryConstants::page4kEntryMask; size_t lSize = std::min(pageSize - rem, size); pageWalker((res & ~0x1) + rem, lSize, offset, reinterpret_cast(entries[index]) & MemoryConstants::pageMask); size -= lSize; offset += lSize; rem = 0; } } template class PageTable; template class PageTable; } // namespace NEO compute-runtime-20.13.16352/opencl/source/memory_manager/page_table.h000066400000000000000000000047411363734646600254010ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/basic_math.h" #include "opencl/source/memory_manager/physical_address_allocator.h" #include #include #include #include #include #include namespace NEO { class GraphicsAllocation; typedef std::function PageWalker; template class PageTable { public: PageTable(PhysicalAddressAllocator *physicalAddressAllocator) : allocator(physicalAddressAllocator) { entries.fill(nullptr); }; virtual ~PageTable() { for (auto &e : entries) delete e; } virtual uintptr_t map(uintptr_t vm, size_t size, uint64_t entryBits, uint32_t memoryBank); virtual void pageWalk(uintptr_t vm, size_t size, size_t offset, uint64_t entryBits, PageWalker &pageWalker, uint32_t memoryBank); static const size_t pageSize = 1 << 12; static size_t getBits() { return T::getBits() + bits; } protected: std::array entries; PhysicalAddressAllocator *allocator = nullptr; }; template <> inline PageTable::~PageTable() { } class PTE : public PageTable { public: PTE(PhysicalAddressAllocator *physicalAddressAllocator) : PageTable(physicalAddressAllocator) {} uintptr_t map(uintptr_t vm, size_t size, uint64_t entryBits, uint32_t memoryBank) override; void pageWalk(uintptr_t vm, size_t size, size_t offset, uint64_t entryBits, PageWalker &pageWalker, uint32_t memoryBank) override; static const uint32_t level = 0; static const uint32_t bits = 9; }; class PDE : public PageTable { public: PDE(PhysicalAddressAllocator *physicalAddressAllocator) : PageTable(physicalAddressAllocator) { } }; class PDP : public PageTable { public: PDP(PhysicalAddressAllocator *physicalAddressAllocator) : PageTable(physicalAddressAllocator) { } }; class PML4 : public PageTable { public: PML4(PhysicalAddressAllocator *physicalAddressAllocator) : PageTable(physicalAddressAllocator) { } }; class PDPE : public PageTable { public: PDPE(PhysicalAddressAllocator *physicalAddressAllocator) : PageTable(physicalAddressAllocator) { } }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/memory_manager/page_table.inl000066400000000000000000000051401363734646600257260ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ namespace NEO { template <> inline uintptr_t PageTable::map(uintptr_t vm, size_t size, uint64_t entryBits, uint32_t memoryBank) { return 0; } template <> inline size_t PageTable::getBits() { return 9; } template <> inline void PageTable::pageWalk(uintptr_t vm, size_t size, size_t offset, uint64_t entryBits, PageWalker &pageWalker, uint32_t memoryBank) { } template inline uintptr_t PageTable::map(uintptr_t vm, size_t size, uint64_t entryBits, uint32_t memoryBank) { const size_t shift = T::getBits() + 12; const uintptr_t mask = static_cast(maxNBitValue(bits)); size_t indexStart = (vm >> shift) & mask; size_t indexEnd = ((vm + size - 1) >> shift) & mask; uintptr_t res = -1; uintptr_t vmMask = (uintptr_t(-1) >> (sizeof(void *) * 8 - shift - bits)); auto maskedVm = vm & vmMask; for (size_t index = indexStart; index <= indexEnd; index++) { uintptr_t vmStart = (uintptr_t(1) << shift) * index; vmStart = std::max(vmStart, maskedVm); uintptr_t vmEnd = (uintptr_t(1) << shift) * (index + 1) - 1; vmEnd = std::min(vmEnd, maskedVm + size - 1); if (entries[index] == nullptr) { entries[index] = new T(allocator); } res = std::min((entries[index])->map(vmStart, vmEnd - vmStart + 1, entryBits, memoryBank), res); } return res; } template inline void PageTable::pageWalk(uintptr_t vm, size_t size, size_t offset, uint64_t entryBits, PageWalker &pageWalker, uint32_t memoryBank) { const size_t shift = T::getBits() + 12; const uintptr_t mask = static_cast(maxNBitValue(bits)); size_t indexStart = (vm >> shift) & mask; size_t indexEnd = ((vm + size - 1) >> shift) & mask; uintptr_t vmMask = (uintptr_t(-1) >> (sizeof(void *) * 8 - shift - bits)); auto maskedVm = vm & vmMask; for (size_t index = indexStart; index <= indexEnd; index++) { uintptr_t vmStart = (uintptr_t(1) << shift) * index; vmStart = std::max(vmStart, maskedVm); uintptr_t vmEnd = (uintptr_t(1) << shift) * (index + 1) - 1; vmEnd = std::min(vmEnd, maskedVm + size - 1); if (entries[index] == nullptr) { entries[index] = new T(allocator); } entries[index]->pageWalk(vmStart, vmEnd - vmStart + 1, offset, entryBits, pageWalker, memoryBank); offset += (vmEnd - vmStart + 1); } } } // namespace NEO compute-runtime-20.13.16352/opencl/source/memory_manager/physical_address_allocator.h000066400000000000000000000027011363734646600306710ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/memory_manager/memory_constants.h" #include "opencl/source/memory_manager/memory_banks.h" #include #include namespace NEO { class PhysicalAddressAllocator { public: PhysicalAddressAllocator() { mainAllocator.store(initialPageAddress); } virtual ~PhysicalAddressAllocator() = default; uint64_t reserve4kPage(uint32_t memoryBank) { return reservePage(memoryBank, MemoryConstants::pageSize, MemoryConstants::pageSize); } uint64_t reserve64kPage(uint32_t memoryBank) { return reservePage(memoryBank, MemoryConstants::pageSize64k, MemoryConstants::pageSize64k); } virtual uint64_t reservePage(uint32_t memoryBank, size_t pageSize, size_t alignement) { UNRECOVERABLE_IF(memoryBank != MemoryBanks::MainBank); std::unique_lock lock(pageReserveMutex); auto currentAddress = mainAllocator.load(); auto alignmentSize = alignUp(currentAddress, alignement) - currentAddress; mainAllocator += alignmentSize; return mainAllocator.fetch_add(pageSize); } protected: std::atomic mainAllocator; std::mutex pageReserveMutex; const uint64_t initialPageAddress = 0x1000; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/os_interface/000077500000000000000000000000001363734646600225765ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/os_interface/CMakeLists.txt000066400000000000000000000012211363734646600253320ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_OS_INTERFACE_BASE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/metrics_library.cpp ${CMAKE_CURRENT_SOURCE_DIR}/metrics_library.h ${CMAKE_CURRENT_SOURCE_DIR}/ocl_reg_path.h ${CMAKE_CURRENT_SOURCE_DIR}/os_inc_base.h ${CMAKE_CURRENT_SOURCE_DIR}/performance_counters.cpp ${CMAKE_CURRENT_SOURCE_DIR}/performance_counters.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_OS_INTERFACE_BASE}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_OS_INTERFACE_BASE ${RUNTIME_SRCS_OS_INTERFACE_BASE}) add_subdirectories() compute-runtime-20.13.16352/opencl/source/os_interface/linux/000077500000000000000000000000001363734646600237355ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/os_interface/linux/CMakeLists.txt000066400000000000000000000017261363734646600265030ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_OS_INTERFACE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/api_linux.cpp ${CMAKE_CURRENT_SOURCE_DIR}/d3d_sharing_functions.h ${CMAKE_CURRENT_SOURCE_DIR}/device_caps_init_linux.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device_command_stream.inl ${CMAKE_CURRENT_SOURCE_DIR}/drm_command_stream.h ${CMAKE_CURRENT_SOURCE_DIR}/drm_command_stream.inl ${CMAKE_CURRENT_SOURCE_DIR}/drm_command_stream_bdw_plus.inl ${CMAKE_CURRENT_SOURCE_DIR}/ocl_reg_path.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_metrics_library.cpp ${CMAKE_CURRENT_SOURCE_DIR}/performance_counters_linux.cpp ${CMAKE_CURRENT_SOURCE_DIR}/performance_counters_linux.h ) if(UNIX) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_OS_INTERFACE_LINUX}) endif() set_property(GLOBAL PROPERTY RUNTIME_SRCS_OS_INTERFACE_LINUX ${RUNTIME_SRCS_OS_INTERFACE_LINUX}) add_subdirectories() compute-runtime-20.13.16352/opencl/source/os_interface/linux/api_linux.cpp000066400000000000000000000021231363734646600264270ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/get_info.h" #include "opencl/source/api/api.h" #include "opencl/source/api/dispatch.h" #include "opencl/source/context/context.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/mem_obj/mem_obj.h" void NEO::MemObj::getOsSpecificMemObjectInfo(const cl_mem_info ¶mName, size_t *srcParamSize, void **srcParam) { switch (paramName) { #ifdef LIBVA case CL_MEM_VA_API_MEDIA_SURFACE_INTEL: peekSharingHandler()->getMemObjectInfo(*srcParamSize, *srcParam); break; #endif default: break; } } void NEO::Image::getOsSpecificImageInfo(const cl_image_info ¶mName, size_t *srcParamSize, void **srcParam) { switch (paramName) { #ifdef LIBVA case CL_IMAGE_VA_API_PLANE_INTEL: *srcParamSize = sizeof(cl_uint); *srcParam = &mediaPlaneType; break; #endif default: break; } } void *NEO::Context::getOsContextInfo(cl_context_info ¶mName, size_t *srcParamSize) { return nullptr; } compute-runtime-20.13.16352/opencl/source/os_interface/linux/d3d_sharing_functions.h000066400000000000000000000004631363734646600303660ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once namespace NEO { namespace D3DTypesHelper { struct D3D9 { }; struct D3D10 { }; struct D3D11 { }; } // namespace D3DTypesHelper template class D3DSharingFunctions { }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/os_interface/linux/device_caps_init_linux.cpp000066400000000000000000000003231363734646600311460ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/cl_device/cl_device.h" namespace NEO { void ClDevice::initializeExtraCaps() { } } // namespace NEO compute-runtime-20.13.16352/opencl/source/os_interface/linux/device_command_stream.inl000066400000000000000000000014501363734646600307510ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/device_command_stream.h" #include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.h" #include "opencl/source/os_interface/linux/drm_command_stream.h" namespace NEO { template CommandStreamReceiver *DeviceCommandStreamReceiver::create(bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) { if (withAubDump) { return new CommandStreamReceiverWithAUBDump>("aubfile", executionEnvironment, rootDeviceIndex); } else { return new DrmCommandStreamReceiver(executionEnvironment, rootDeviceIndex); } }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/os_interface/linux/drm_command_stream.h000066400000000000000000000044661363734646600277530ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/device_command_stream.h" #include "shared/source/os_interface/linux/drm_gem_close_worker.h" #include "drm/i915_drm.h" #include namespace NEO { class BufferObject; class Drm; class DrmAllocation; class DrmMemoryManager; template class DrmCommandStreamReceiver : public DeviceCommandStreamReceiver { protected: typedef DeviceCommandStreamReceiver BaseClass; using CommandStreamReceiverHw::CommandStreamReceiver::getTagAddress; using BaseClass::getScratchPatchAddress; using BaseClass::makeNonResident; using BaseClass::makeResident; using BaseClass::mediaVfeStateDirty; using BaseClass::osContext; using BaseClass::requiredScratchSize; public: // When drm is null default implementation is used. In this case DrmCommandStreamReceiver is responsible to free drm. // When drm is passed, DCSR will not free it at destruction DrmCommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, gemCloseWorkerMode mode = gemCloseWorkerMode::gemCloseWorkerActive); bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override; void processResidency(const ResidencyContainer &allocationsForResidency, uint32_t handleId) override; void makeNonResident(GraphicsAllocation &gfxAllocation) override; bool waitForFlushStamp(FlushStamp &flushStampToWait) override; DrmMemoryManager *getMemoryManager() const; GmmPageTableMngr *createPageTableManager() override; gemCloseWorkerMode peekGemCloseWorkerOperationMode() const { return this->gemCloseWorkerOperationMode; } protected: void makeResidentBufferObjects(const DrmAllocation *drmAllocation, uint32_t handleId); void makeResident(BufferObject *bo); void flushInternal(const BatchBuffer &batchBuffer, const ResidencyContainer &allocationsForResidency); void exec(const BatchBuffer &batchBuffer, uint32_t drmContextId); std::vector residency; std::vector execObjectsStorage; Drm *drm; gemCloseWorkerMode gemCloseWorkerOperationMode; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/os_interface/linux/drm_command_stream.inl000066400000000000000000000150551363734646600303020ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/linear_stream.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/page_table_mngr.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/source/helpers/preamble.h" #include "shared/source/memory_manager/residency.h" #include "shared/source/os_interface/linux/drm_allocation.h" #include "shared/source/os_interface/linux/drm_buffer_object.h" #include "shared/source/os_interface/linux/drm_engine_mapper.h" #include "shared/source/os_interface/linux/drm_memory_manager.h" #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/linux/os_context_linux.h" #include "shared/source/os_interface/linux/os_interface.h" #include "opencl/source/os_interface/linux/drm_command_stream.h" #include #include namespace NEO { template DrmCommandStreamReceiver::DrmCommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, gemCloseWorkerMode mode) : BaseClass(executionEnvironment, rootDeviceIndex), gemCloseWorkerOperationMode(mode) { this->drm = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->osInterface->get()->getDrm(); residency.reserve(512); execObjectsStorage.reserve(512); } template bool DrmCommandStreamReceiver::flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) { DrmAllocation *alloc = static_cast(batchBuffer.commandBufferAllocation); DEBUG_BREAK_IF(!alloc); BufferObject *bb = alloc->getBO(); if (bb == nullptr) { return false; } if (this->lastSentSliceCount != batchBuffer.sliceCount) { if (drm->setQueueSliceCount(batchBuffer.sliceCount)) { this->lastSentSliceCount = batchBuffer.sliceCount; } } this->flushStamp->setStamp(bb->peekHandle()); this->flushInternal(batchBuffer, allocationsForResidency); if (this->gemCloseWorkerOperationMode == gemCloseWorkerMode::gemCloseWorkerActive) { bb->reference(); this->getMemoryManager()->peekGemCloseWorker()->push(bb); } return true; } template void DrmCommandStreamReceiver::exec(const BatchBuffer &batchBuffer, uint32_t drmContextId) { DrmAllocation *alloc = static_cast(batchBuffer.commandBufferAllocation); DEBUG_BREAK_IF(!alloc); BufferObject *bb = alloc->getBO(); DEBUG_BREAK_IF(!bb); auto engineFlag = static_cast(osContext)->getEngineFlag(); // Residency hold all allocation except command buffer, hence + 1 auto requiredSize = this->residency.size() + 1; if (requiredSize > this->execObjectsStorage.size()) { this->execObjectsStorage.resize(requiredSize); } int err = bb->exec(static_cast(alignUp(batchBuffer.usedSize - batchBuffer.startOffset, 8)), batchBuffer.startOffset, engineFlag | I915_EXEC_NO_RELOC, batchBuffer.requiresCoherency, drmContextId, this->residency.data(), this->residency.size(), this->execObjectsStorage.data()); UNRECOVERABLE_IF(err != 0); this->residency.clear(); } template void DrmCommandStreamReceiver::makeResident(BufferObject *bo) { if (bo) { if (bo->peekIsReusableAllocation()) { for (auto bufferObject : this->residency) { if (bufferObject == bo) { return; } } } residency.push_back(bo); } } template void DrmCommandStreamReceiver::processResidency(const ResidencyContainer &inputAllocationsForResidency, uint32_t handleId) { for (auto &alloc : inputAllocationsForResidency) { auto drmAlloc = static_cast(alloc); if (drmAlloc->fragmentsStorage.fragmentCount) { for (unsigned int f = 0; f < drmAlloc->fragmentsStorage.fragmentCount; f++) { const auto osContextId = osContext->getContextId(); if (!drmAlloc->fragmentsStorage.fragmentStorageData[f].residency->resident[osContextId]) { makeResident(drmAlloc->fragmentsStorage.fragmentStorageData[f].osHandleStorage->bo); drmAlloc->fragmentsStorage.fragmentStorageData[f].residency->resident[osContextId] = true; } } } else { makeResidentBufferObjects(drmAlloc, handleId); } } } template void DrmCommandStreamReceiver::makeNonResident(GraphicsAllocation &gfxAllocation) { // Vector is moved to command buffer inside flush. // If flush wasn't called we need to make all objects non-resident. // If makeNonResident is called before flush, vector will be cleared. if (gfxAllocation.isResident(this->osContext->getContextId())) { if (this->residency.size() != 0) { this->residency.clear(); } for (auto fragmentId = 0u; fragmentId < gfxAllocation.fragmentsStorage.fragmentCount; fragmentId++) { gfxAllocation.fragmentsStorage.fragmentStorageData[fragmentId].residency->resident[osContext->getContextId()] = false; } } gfxAllocation.releaseResidencyInOsContext(this->osContext->getContextId()); } template DrmMemoryManager *DrmCommandStreamReceiver::getMemoryManager() const { return static_cast(CommandStreamReceiver::getMemoryManager()); } template GmmPageTableMngr *DrmCommandStreamReceiver::createPageTableManager() { GmmPageTableMngr *gmmPageTableMngr = GmmPageTableMngr::create(this->executionEnvironment.rootDeviceEnvironments[this->rootDeviceIndex]->getGmmClientContext(), TT_TYPE::AUXTT, nullptr); gmmPageTableMngr->setCsrHandle(this); this->executionEnvironment.rootDeviceEnvironments[this->rootDeviceIndex]->pageTableManager.reset(gmmPageTableMngr); return gmmPageTableMngr; } template bool DrmCommandStreamReceiver::waitForFlushStamp(FlushStamp &flushStamp) { drm_i915_gem_wait wait = {}; wait.bo_handle = static_cast(flushStamp); wait.timeout_ns = -1; drm->ioctl(DRM_IOCTL_I915_GEM_WAIT, &wait); return true; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/os_interface/linux/drm_command_stream_bdw_plus.inl000066400000000000000000000014431363734646600321750ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_allocation.h" #include "opencl/source/os_interface/linux/drm_command_stream.h" namespace NEO { template void DrmCommandStreamReceiver::makeResidentBufferObjects(const DrmAllocation *drmAllocation, uint32_t handleId) { auto bo = drmAllocation->getBO(); makeResident(bo); } template void DrmCommandStreamReceiver::flushInternal(const BatchBuffer &batchBuffer, const ResidencyContainer &allocationsForResidency) { this->processResidency(allocationsForResidency, 0u); this->exec(batchBuffer, static_cast(osContext)->getDrmContextIds()[0]); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/os_interface/linux/ocl_reg_path.cpp000066400000000000000000000003001363734646600270600ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/os_interface/ocl_reg_path.h" namespace NEO { const char *oclRegPath = ""; } compute-runtime-20.13.16352/opencl/source/os_interface/linux/os_metrics_library.cpp000066400000000000000000000031701363734646600303350ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/os_interface/metrics_library.h" namespace NEO { ////////////////////////////////////////////////////// // FUNCTION: MetricsLibrary::oaConfigurationActivate ////////////////////////////////////////////////////// bool MetricsLibrary::oaConfigurationActivate( const ConfigurationHandle_1_0 &handle) { ConfigurationActivateData_1_0 data = {}; data.Type = GpuConfigurationActivationType::Tbs; return api->functions.ConfigurationActivate( handle, &data) == StatusCode::Success; } ////////////////////////////////////////////////////// // FUNCTION: MetricsLibrary::oaConfigurationDeactivate ////////////////////////////////////////////////////// bool MetricsLibrary::oaConfigurationDeactivate( const ConfigurationHandle_1_0 &handle) { return api->functions.ConfigurationDeactivate( handle) == StatusCode::Success; } ////////////////////////////////////////////////////// // FUNCTION: MetricsLibrary::userConfigurationCreate ////////////////////////////////////////////////////// bool MetricsLibrary::userConfigurationCreate( const ContextHandle_1_0 &context, ConfigurationHandle_1_0 &handle) { // Not supported on Linux. return true; } ////////////////////////////////////////////////////// // FUNCTION: MetricsLibrary::userConfigurationDelete ////////////////////////////////////////////////////// bool MetricsLibrary::userConfigurationDelete( const ConfigurationHandle_1_0 &handle) { // Not supported on Linux. return true; } } // namespace NEOcompute-runtime-20.13.16352/opencl/source/os_interface/linux/performance_counters_linux.cpp000066400000000000000000000040111363734646600320770ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "performance_counters_linux.h" #include "shared/source/device/device.h" #include "shared/source/helpers/hw_helper.h" namespace NEO { //////////////////////////////////////////////////// // PerformanceCounters::create //////////////////////////////////////////////////// std::unique_ptr PerformanceCounters::create(Device *device) { auto counter = std::make_unique(); auto gen = device->getHardwareInfo().platform.eRenderCoreFamily; auto &hwHelper = HwHelper::get(gen); UNRECOVERABLE_IF(counter == nullptr); counter->clientType.Gen = static_cast(hwHelper.getMetricsLibraryGenId()); return counter; } ////////////////////////////////////////////////////// // PerformanceCountersLinux::enableCountersConfiguration ////////////////////////////////////////////////////// bool PerformanceCountersLinux::enableCountersConfiguration() { // Release previous counters configuration so the user // can change configuration between kernels. releaseCountersConfiguration(); // Create oa configuration. if (!metricsLibrary->oaConfigurationCreate( context, oaConfiguration)) { DEBUG_BREAK_IF(true); return false; } // Enable oa configuration. if (!metricsLibrary->oaConfigurationActivate( oaConfiguration)) { DEBUG_BREAK_IF(true); return false; } return true; } ////////////////////////////////////////////////////// // PerformanceCountersLinux::releaseCountersConfiguration ////////////////////////////////////////////////////// void PerformanceCountersLinux::releaseCountersConfiguration() { // Oa configuration. if (oaConfiguration.IsValid()) { metricsLibrary->oaConfigurationDeactivate(oaConfiguration); metricsLibrary->oaConfigurationDelete(oaConfiguration); oaConfiguration.data = nullptr; } } } // namespace NEO compute-runtime-20.13.16352/opencl/source/os_interface/linux/performance_counters_linux.h000066400000000000000000000011651363734646600315530ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/os_interface/performance_counters.h" namespace NEO { class PerformanceCountersLinux : virtual public PerformanceCounters { public: PerformanceCountersLinux() = default; ~PerformanceCountersLinux() override = default; ///////////////////////////////////////////////////// // Gpu oa/mmio configuration. ///////////////////////////////////////////////////// bool enableCountersConfiguration() override; void releaseCountersConfiguration() override; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/os_interface/metrics_library.cpp000066400000000000000000000146371363734646600265070ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/os_interface/metrics_library.h" #include "shared/source/helpers/hw_helper.h" #include "opencl/source/os_interface/os_inc_base.h" namespace NEO { /////////////////////////////////////////////////////// // FUNCTION: MetricsLibrary::MetricsLibrary /////////////////////////////////////////////////////// MetricsLibrary::MetricsLibrary() { api = std::make_unique(); osLibrary.reset(OsLibrary::load(Os::metricsLibraryDllName)); } ////////////////////////////////////////////////////// // FUNCTION: MetricsLibrary::open ////////////////////////////////////////////////////// bool MetricsLibrary::open() { UNRECOVERABLE_IF(osLibrary.get() == nullptr); if (osLibrary->isLoaded()) { api->contextCreate = reinterpret_cast(osLibrary->getProcAddress(METRICS_LIBRARY_CONTEXT_CREATE_1_0)); api->contextDelete = reinterpret_cast(osLibrary->getProcAddress(METRICS_LIBRARY_CONTEXT_DELETE_1_0)); } else { api->contextCreate = nullptr; api->contextDelete = nullptr; } if (!api->contextCreate) { return false; } if (!api->contextDelete) { return false; } return true; } ////////////////////////////////////////////////////// // MetricsLibrary::createContext ////////////////////////////////////////////////////// bool MetricsLibrary::contextCreate( const ClientType_1_0 &clientType, ClientData_1_0 &clientData, ContextCreateData_1_0 &createData, ContextHandle_1_0 &handle) { MetricsLibraryApi::ClientOptionsData_1_0 clientOptions[1] = {}; clientOptions[0].Type = MetricsLibraryApi::ClientOptionsType::Compute; clientOptions[0].Compute.Asynchronous = true; clientData.ClientOptionsCount = 1; clientData.ClientOptions = clientOptions; createData.Api = &api->functions; createData.ClientCallbacks = &api->callbacks; createData.ClientData = &clientData; return api->contextCreate( clientType, &createData, &handle) == StatusCode::Success; } ////////////////////////////////////////////////////// // MetricsLibrary::contextDelete ////////////////////////////////////////////////////// bool MetricsLibrary::contextDelete( const ContextHandle_1_0 &handle) { return api->contextDelete(handle) == StatusCode::Success; } ////////////////////////////////////////////////////// // MetricsLibrary::hwCountersCreate ////////////////////////////////////////////////////// bool MetricsLibrary::hwCountersCreate( const ContextHandle_1_0 &context, const uint32_t slots, const ConfigurationHandle_1_0 user, QueryHandle_1_0 &query) { QueryCreateData_1_0 data = {}; data.HandleContext = context; data.Type = ObjectType::QueryHwCounters; data.Slots = slots; return api->functions.QueryCreate( &data, &query) == StatusCode::Success; } ////////////////////////////////////////////////////// // MetricsLibrary::hwCountersDelete ////////////////////////////////////////////////////// bool MetricsLibrary::hwCountersDelete( const QueryHandle_1_0 &query) { return api->functions.QueryDelete(query) == StatusCode::Success; } ////////////////////////////////////////////////////// // MetricsLibrary::hwCountersGetReport ////////////////////////////////////////////////////// bool MetricsLibrary::hwCountersGetReport( const QueryHandle_1_0 &handle, const uint32_t slot, const uint32_t slotsCount, const uint32_t dataSize, void *data) { GetReportData_1_0 report = {}; report.Type = ObjectType::QueryHwCounters; report.Query.Handle = handle; report.Query.Slot = slot; report.Query.SlotsCount = slotsCount; report.Query.Data = data; report.Query.DataSize = dataSize; return api->functions.GetData(&report) == StatusCode::Success; } ////////////////////////////////////////////////////// // MetricsLibrary::hwCountersGetApiReportSize ////////////////////////////////////////////////////// uint32_t MetricsLibrary::hwCountersGetApiReportSize() { ValueType type = ValueType::Uint32; TypedValue_1_0 value = {}; return api->functions.GetParameter(ParameterType::QueryHwCountersReportApiSize, &type, &value) == StatusCode::Success ? value.ValueUInt32 : 0; } ////////////////////////////////////////////////////// // MetricsLibrary::hwCountersGetGpuReportSize ////////////////////////////////////////////////////// uint32_t MetricsLibrary::hwCountersGetGpuReportSize() { ValueType type = ValueType::Uint32; TypedValue_1_0 value = {}; return api->functions.GetParameter(ParameterType::QueryHwCountersReportGpuSize, &type, &value) == StatusCode::Success ? value.ValueUInt32 : 0; } ////////////////////////////////////////////////////// // MetricsLibrary::commandBufferGet ////////////////////////////////////////////////////// bool MetricsLibrary::commandBufferGet( CommandBufferData_1_0 &data) { return api->functions.CommandBufferGet( &data) == StatusCode::Success; } ////////////////////////////////////////////////////// // MetricsLibrary::commandBufferGetSize ////////////////////////////////////////////////////// bool MetricsLibrary::commandBufferGetSize( const CommandBufferData_1_0 &commandBufferData, CommandBufferSize_1_0 &commandBufferSize) { return api->functions.CommandBufferGetSize( &commandBufferData, &commandBufferSize) == StatusCode::Success; } ////////////////////////////////////////////////////// // MetricsLibrary::oaConfigurationCreate ////////////////////////////////////////////////////// bool MetricsLibrary::oaConfigurationCreate( const ContextHandle_1_0 &context, ConfigurationHandle_1_0 &handle) { ConfigurationCreateData_1_0 data = {}; data.HandleContext = context; data.Type = ObjectType::ConfigurationHwCountersOa; return api->functions.ConfigurationCreate( &data, &handle) == StatusCode::Success; } ////////////////////////////////////////////////////// // MetricsLibrary::oaConfigurationDelete ////////////////////////////////////////////////////// bool MetricsLibrary::oaConfigurationDelete( const ConfigurationHandle_1_0 &handle) { return api->functions.ConfigurationDelete(handle) == StatusCode::Success; } } // namespace NEOcompute-runtime-20.13.16352/opencl/source/os_interface/metrics_library.h000066400000000000000000000070161363734646600261450ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/os_library.h" #include "instrumentation.h" #include namespace NEO { using MetricsLibraryApi::ClientApi; using MetricsLibraryApi::ClientCallbacks_1_0; using MetricsLibraryApi::ClientData_1_0; using MetricsLibraryApi::ClientGen; using MetricsLibraryApi::ClientType_1_0; using MetricsLibraryApi::CommandBufferData_1_0; using MetricsLibraryApi::CommandBufferSize_1_0; using MetricsLibraryApi::ConfigurationActivateData_1_0; using MetricsLibraryApi::ConfigurationCreateData_1_0; using MetricsLibraryApi::ConfigurationHandle_1_0; using MetricsLibraryApi::ContextCreateData_1_0; using MetricsLibraryApi::ContextCreateFunction_1_0; using MetricsLibraryApi::ContextDeleteFunction_1_0; using MetricsLibraryApi::ContextHandle_1_0; using MetricsLibraryApi::GetReportData_1_0; using MetricsLibraryApi::GpuConfigurationActivationType; using MetricsLibraryApi::GpuMemory_1_0; using MetricsLibraryApi::Interface_1_0; using MetricsLibraryApi::ObjectType; using MetricsLibraryApi::ParameterType; using MetricsLibraryApi::QueryCreateData_1_0; using MetricsLibraryApi::QueryHandle_1_0; using MetricsLibraryApi::StatusCode; using MetricsLibraryApi::TypedValue_1_0; using MetricsLibraryApi::ValueType; class MetricsLibraryInterface { public: ContextCreateFunction_1_0 contextCreate = nullptr; ContextDeleteFunction_1_0 contextDelete = nullptr; Interface_1_0 functions = {}; ClientCallbacks_1_0 callbacks = {}; }; class MetricsLibrary { public: MetricsLibrary(); MOCKABLE_VIRTUAL ~MetricsLibrary(){}; // Library open function. MOCKABLE_VIRTUAL bool open(); // Context create / destroy functions. MOCKABLE_VIRTUAL bool contextCreate(const ClientType_1_0 &client, ClientData_1_0 &clientData, ContextCreateData_1_0 &createData, ContextHandle_1_0 &handle); MOCKABLE_VIRTUAL bool contextDelete(const ContextHandle_1_0 &handle); // HwCounters functions. MOCKABLE_VIRTUAL bool hwCountersCreate(const ContextHandle_1_0 &context, const uint32_t slots, const ConfigurationHandle_1_0 mmio, QueryHandle_1_0 &handle); MOCKABLE_VIRTUAL bool hwCountersDelete(const QueryHandle_1_0 &handle); MOCKABLE_VIRTUAL bool hwCountersGetReport(const QueryHandle_1_0 &handle, const uint32_t slot, const uint32_t slotsCount, const uint32_t dataSize, void *data); MOCKABLE_VIRTUAL uint32_t hwCountersGetApiReportSize(); MOCKABLE_VIRTUAL uint32_t hwCountersGetGpuReportSize(); // Oa configuration functions. MOCKABLE_VIRTUAL bool oaConfigurationCreate(const ContextHandle_1_0 &context, ConfigurationHandle_1_0 &handle); MOCKABLE_VIRTUAL bool oaConfigurationDelete(const ConfigurationHandle_1_0 &handle); MOCKABLE_VIRTUAL bool oaConfigurationActivate(const ConfigurationHandle_1_0 &handle); MOCKABLE_VIRTUAL bool oaConfigurationDeactivate(const ConfigurationHandle_1_0 &handle); // User mmio configuration functions. MOCKABLE_VIRTUAL bool userConfigurationCreate(const ContextHandle_1_0 &context, ConfigurationHandle_1_0 &handle); MOCKABLE_VIRTUAL bool userConfigurationDelete(const ConfigurationHandle_1_0 &handle); // Command buffer functions. MOCKABLE_VIRTUAL bool commandBufferGet(CommandBufferData_1_0 &data); MOCKABLE_VIRTUAL bool commandBufferGetSize(const CommandBufferData_1_0 &commandBufferData, CommandBufferSize_1_0 &commandBufferSize); public: std::unique_ptr osLibrary; std::unique_ptr api; }; } // namespace NEOcompute-runtime-20.13.16352/opencl/source/os_interface/ocl_reg_path.h000066400000000000000000000002521363734646600253740ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once namespace NEO { extern const char *oclRegPath; } // namespace NEO compute-runtime-20.13.16352/opencl/source/os_interface/os_inc_base.h000066400000000000000000000007201363734646600252120ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/os_library.h" namespace Os { // Compiler library names extern const char *frontEndDllName; extern const char *igcDllName; extern const char *testDllName; // OS specific directory separator extern const char *fileSeparator; // Os specific Metrics Library name extern const char *metricsLibraryDllName; }; // namespace Os compute-runtime-20.13.16352/opencl/source/os_interface/performance_counters.cpp000066400000000000000000000173271363734646600275370ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/os_interface/performance_counters.h" #include "shared/source/utilities/tag_allocator.h" using namespace MetricsLibraryApi; namespace NEO { ////////////////////////////////////////////////////// // PerformanceCounters constructor. ////////////////////////////////////////////////////// PerformanceCounters::PerformanceCounters() { metricsLibrary = std::make_unique(); UNRECOVERABLE_IF(metricsLibrary == nullptr); } ////////////////////////////////////////////////////// // PerformanceCounters::getReferenceNumber ////////////////////////////////////////////////////// uint32_t PerformanceCounters::getReferenceNumber() { std::lock_guard lockMutex(mutex); return referenceCounter; } ////////////////////////////////////////////////////// // PerformanceCounters::enable ////////////////////////////////////////////////////// bool PerformanceCounters::enable(bool ccsEngine) { std::lock_guard lockMutex(mutex); if (referenceCounter == 0) { available = openMetricsLibrary(); this->usingCcsEngine = ccsEngine; } referenceCounter++; return available && (this->usingCcsEngine == ccsEngine); } ////////////////////////////////////////////////////// // PerformanceCounters::shutdown ////////////////////////////////////////////////////// void PerformanceCounters::shutdown() { std::lock_guard lockMutex(mutex); if (referenceCounter >= 1) { if (referenceCounter == 1) { available = false; closeMetricsLibrary(); } referenceCounter--; } } ////////////////////////////////////////////////////// // PerformanceCounters::getMetricsLibraryInterface ////////////////////////////////////////////////////// MetricsLibrary *PerformanceCounters::getMetricsLibraryInterface() { return metricsLibrary.get(); } ////////////////////////////////////////////////////// // PerformanceCounters::setMetricsLibraryInterface ////////////////////////////////////////////////////// void PerformanceCounters::setMetricsLibraryInterface(std::unique_ptr newMetricsLibrary) { metricsLibrary = std::move(newMetricsLibrary); } ////////////////////////////////////////////////////// // PerformanceCounters::getMetricsLibraryContext ////////////////////////////////////////////////////// ContextHandle_1_0 PerformanceCounters::getMetricsLibraryContext() { return context; } ////////////////////////////////////////////////////// // PerformanceCounters::openMetricsLibrary ////////////////////////////////////////////////////// bool PerformanceCounters::openMetricsLibrary() { // Open metrics library. bool result = metricsLibrary->open(); DEBUG_BREAK_IF(!result); // Create metrics library context. if (result) { result = metricsLibrary->contextCreate( clientType, clientData, contextData, context); // Validate gpu report size. DEBUG_BREAK_IF(!metricsLibrary->hwCountersGetGpuReportSize()); } // Error handling. if (!result) { closeMetricsLibrary(); } return result; } ////////////////////////////////////////////////////// // PerformanceCounters::closeMetricsLibrary ////////////////////////////////////////////////////// void PerformanceCounters::closeMetricsLibrary() { // Destroy oa/user mmio configuration. releaseCountersConfiguration(); // Destroy hw counters query. if (query.IsValid()) { metricsLibrary->hwCountersDelete(query); } // Destroy metrics library context. if (context.IsValid()) { metricsLibrary->contextDelete(context); } } ////////////////////////////////////////////////////// // PerformanceCounters::getQueryHandle ////////////////////////////////////////////////////// QueryHandle_1_0 PerformanceCounters::getQueryHandle() { if (!query.IsValid()) { metricsLibrary->hwCountersCreate( context, 1, userConfiguration, query); } DEBUG_BREAK_IF(!query.IsValid()); return query; } ////////////////////////////////////////////////////// // PerformanceCounters::getGpuCommandsSize ////////////////////////////////////////////////////// uint32_t PerformanceCounters::getGpuCommandsSize( const MetricsLibraryApi::GpuCommandBufferType commandBufferType, const bool begin) { CommandBufferData_1_0 bufferData = {}; CommandBufferSize_1_0 bufferSize = {}; if (begin) { // Load currently activated (through metrics discovery) oa/user mmio configuration and use it. // It will allow to change counters configuration between subsequent clEnqueueNDCommandRange calls. if (!enableCountersConfiguration()) { return 0; } } bufferData.HandleContext = context; bufferData.CommandsType = ObjectType::QueryHwCounters; bufferData.Type = commandBufferType; bufferData.QueryHwCounters.Begin = begin; bufferData.QueryHwCounters.Handle = getQueryHandle(); bufferData.QueryHwCounters.HandleUserConfiguration = userConfiguration; return metricsLibrary->commandBufferGetSize(bufferData, bufferSize) ? bufferSize.GpuMemorySize : 0; } ////////////////////////////////////////////////////// // PerformanceCounters::getGpuCommands ////////////////////////////////////////////////////// bool PerformanceCounters::getGpuCommands( const MetricsLibraryApi::GpuCommandBufferType commandBufferType, TagNode &performanceCounters, const bool begin, const uint32_t bufferSize, void *pBuffer) { // Command Buffer data. CommandBufferData_1_0 bufferData = {}; bufferData.HandleContext = context; bufferData.CommandsType = ObjectType::QueryHwCounters; bufferData.Data = pBuffer; bufferData.Size = bufferSize; bufferData.Type = commandBufferType; // Gpu memory allocation for query hw counters. bufferData.Allocation.CpuAddress = reinterpret_cast(performanceCounters.tagForCpuAccess); bufferData.Allocation.GpuAddress = performanceCounters.getGpuAddress(); // Query hw counters specific data. bufferData.QueryHwCounters.Begin = begin; bufferData.QueryHwCounters.Handle = getQueryHandle(); bufferData.QueryHwCounters.HandleUserConfiguration = userConfiguration; return metricsLibrary->commandBufferGet(bufferData); } ////////////////////////////////////////////////////// // PerformanceCounters::getApiReportSize ////////////////////////////////////////////////////// uint32_t PerformanceCounters::getApiReportSize() { return metricsLibrary->hwCountersGetApiReportSize(); } ////////////////////////////////////////////////////// // PerformanceCounters::getGpuReportSize ////////////////////////////////////////////////////// uint32_t PerformanceCounters::getGpuReportSize() { return metricsLibrary->hwCountersGetGpuReportSize(); } ////////////////////////////////////////////////////// // PerformanceCounters::getApiReport ////////////////////////////////////////////////////// bool PerformanceCounters::getApiReport(const size_t inputParamSize, void *pInputParam, size_t *pOutputParamSize, bool isEventComplete) { const uint32_t outputSize = metricsLibrary->hwCountersGetApiReportSize(); if (pOutputParamSize) { *pOutputParamSize = outputSize; } if (pInputParam == nullptr && inputParamSize == 0 && pOutputParamSize) { return true; } if (pInputParam == nullptr || isEventComplete == false) { return false; } if (inputParamSize < outputSize) { return false; } return metricsLibrary->hwCountersGetReport(query, 0, 1, outputSize, pInputParam); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/os_interface/performance_counters.h000066400000000000000000000102131363734646600271670ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/event/perf_counter.h" #include "opencl/source/os_interface/metrics_library.h" #include namespace NEO { ////////////////////////////////////////////////////// // Forward declaration. ////////////////////////////////////////////////////// template struct TagNode; ////////////////////////////////////////////////////// // Performance counters implementation. ////////////////////////////////////////////////////// class PerformanceCounters { public: ////////////////////////////////////////////////////// // Constructor/destructor. ////////////////////////////////////////////////////// PerformanceCounters(); virtual ~PerformanceCounters() = default; ////////////////////////////////////////////////////// // Performance counters creation. ////////////////////////////////////////////////////// static std::unique_ptr create(class Device *device); bool enable(bool ccsEngine); void shutdown(); uint32_t getReferenceNumber(); ///////////////////////////////////////////////////// // Gpu oa/mmio configuration. ///////////////////////////////////////////////////// virtual bool enableCountersConfiguration() = 0; virtual void releaseCountersConfiguration() = 0; ////////////////////////////////////////////////////// // Gpu commands. ////////////////////////////////////////////////////// uint32_t getGpuCommandsSize(const MetricsLibraryApi::GpuCommandBufferType commandBufferType, const bool begin); bool getGpuCommands(const MetricsLibraryApi::GpuCommandBufferType commandBufferType, TagNode &performanceCounters, const bool begin, const uint32_t bufferSize, void *pBuffer); ///////////////////////////////////////////////////// // Gpu/Api reports. ///////////////////////////////////////////////////// uint32_t getApiReportSize(); uint32_t getGpuReportSize(); bool getApiReport(const size_t inputParamSize, void *pClientData, size_t *pOutputSize, bool isEventComplete); ///////////////////////////////////////////////////// // Metrics Library interface. ///////////////////////////////////////////////////// MetricsLibrary *getMetricsLibraryInterface(); void setMetricsLibraryInterface(std::unique_ptr newMetricsLibrary); bool openMetricsLibrary(); void closeMetricsLibrary(); ///////////////////////////////////////////////////// // Metrics Library context/query handles. ///////////////////////////////////////////////////// ContextHandle_1_0 getMetricsLibraryContext(); QueryHandle_1_0 getQueryHandle(); protected: ///////////////////////////////////////////////////// // Common members. ///////////////////////////////////////////////////// std::mutex mutex; uint32_t referenceCounter = 0; bool available = false; bool usingCcsEngine = false; ///////////////////////////////////////////////////// // Metrics Library interface. ///////////////////////////////////////////////////// std::unique_ptr metricsLibrary = {}; ///////////////////////////////////////////////////// // Metrics Library client data. ///////////////////////////////////////////////////// ClientData_1_0 clientData = {}; ClientType_1_0 clientType = {ClientApi::OpenCL, ClientGen::Unknown}; ///////////////////////////////////////////////////// // Metrics Library context. ///////////////////////////////////////////////////// ContextCreateData_1_0 contextData = {}; ContextHandle_1_0 context = {}; ///////////////////////////////////////////////////// // Metrics Library oa/mmio counters configuration. ///////////////////////////////////////////////////// ConfigurationHandle_1_0 oaConfiguration = {}; ConfigurationHandle_1_0 userConfiguration = {}; ///////////////////////////////////////////////////// // Metrics Library query object. ///////////////////////////////////////////////////// QueryHandle_1_0 query = {}; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/os_interface/windows/000077500000000000000000000000001363734646600242705ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/os_interface/windows/CMakeLists.txt000066400000000000000000000020501363734646600270250ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_OS_INTERFACE_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/api_win.cpp ${CMAKE_CURRENT_SOURCE_DIR}/d3d10_11_sharing_functions.cpp ${CMAKE_CURRENT_SOURCE_DIR}/d3d9_sharing_functions.cpp ${CMAKE_CURRENT_SOURCE_DIR}/d3d_sharing_functions.h ${CMAKE_CURRENT_SOURCE_DIR}/device_caps_init_win.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device_command_stream.inl ${CMAKE_CURRENT_SOURCE_DIR}/ocl_reg_path.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_metrics_library.cpp ${CMAKE_CURRENT_SOURCE_DIR}/performance_counters_win.cpp ${CMAKE_CURRENT_SOURCE_DIR}/performance_counters_win.h ${CMAKE_CURRENT_SOURCE_DIR}/wddm_device_command_stream.h ${CMAKE_CURRENT_SOURCE_DIR}/wddm_device_command_stream.inl ) if(WIN32) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_OS_INTERFACE_WINDOWS} ) endif() add_subdirectories() set_property(GLOBAL PROPERTY RUNTIME_SRCS_OS_INTERFACE_WINDOWS ${RUNTIME_SRCS_OS_INTERFACE_WINDOWS}) compute-runtime-20.13.16352/opencl/source/os_interface/windows/api_win.cpp000066400000000000000000000765471363734646600264450ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/utilities/api_intercept.h" #include "opencl/source/api/api.h" #include "opencl/source/api/dispatch.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/platform/platform.h" #include "opencl/source/sharings/d3d/d3d_buffer.h" #include "opencl/source/sharings/d3d/d3d_surface.h" #include "opencl/source/sharings/d3d/d3d_texture.h" using namespace NEO; void NEO::MemObj::getOsSpecificMemObjectInfo(const cl_mem_info ¶mName, size_t *srcParamSize, void **srcParam) { switch (paramName) { case CL_MEM_D3D10_RESOURCE_KHR: *srcParamSize = sizeof(ID3D10Resource *); *srcParam = static_cast *>(peekSharingHandler())->getResourceHandler(); break; case CL_MEM_D3D11_RESOURCE_KHR: *srcParamSize = sizeof(ID3D11Resource *); *srcParam = static_cast *>(peekSharingHandler())->getResourceHandler(); break; case CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR: *srcParamSize = sizeof(cl_dx9_surface_info_khr); *srcParam = &static_cast(peekSharingHandler())->getSurfaceInfo(); break; case CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR: *srcParamSize = sizeof(cl_dx9_media_adapter_type_khr); *srcParam = &static_cast(peekSharingHandler())->getAdapterType(); break; case CL_MEM_DX9_RESOURCE_INTEL: *srcParamSize = sizeof(IDirect3DSurface9 *); *srcParam = &static_cast(peekSharingHandler())->getSurfaceInfo().resource; break; case CL_MEM_DX9_SHARED_HANDLE_INTEL: *srcParamSize = sizeof(HANDLE); *srcParam = &static_cast(peekSharingHandler())->getSurfaceInfo().shared_handle; break; } } void NEO::Image::getOsSpecificImageInfo(const cl_mem_info ¶mName, size_t *srcParamSize, void **srcParam) { switch (paramName) { case CL_IMAGE_D3D10_SUBRESOURCE_KHR: *srcParamSize = sizeof(unsigned int); *srcParam = &static_cast *>(peekSharingHandler())->getSubresource(); break; case CL_IMAGE_D3D11_SUBRESOURCE_KHR: *srcParamSize = sizeof(unsigned int); *srcParam = &static_cast *>(peekSharingHandler())->getSubresource(); break; case CL_IMAGE_DX9_MEDIA_PLANE_KHR: case CL_IMAGE_DX9_PLANE_INTEL: *srcParamSize = sizeof(cl_uint); *srcParam = &static_cast(peekSharingHandler())->getPlane(); break; } } void *NEO::Context::getOsContextInfo(cl_context_info ¶mName, size_t *srcParamSize) { switch (paramName) { case CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR: case CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR: *srcParamSize = sizeof(cl_bool); return &preferD3dSharedResources; default: break; } return nullptr; } cl_int CL_API_CALL clGetDeviceIDsFromDX9INTEL(cl_platform_id platform, cl_dx9_device_source_intel dx9DeviceSource, void *dx9Object, cl_dx9_device_set_intel dx9DeviceSet, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) { Platform *platformInternal = nullptr; auto retVal = validateObjects(WithCastToInternal(platform, &platformInternal)); API_ENTER(&retVal); DBG_LOG_INPUTS("platform", platform, "dx9DeviceSource", dx9DeviceSource, "dx9Object", dx9Object, "dx9DeviceSet", dx9DeviceSet, "numEntries", numEntries, "devices", devices, "numDevices", numDevices); if (retVal != CL_SUCCESS) { return retVal; } cl_device_id device = platformInternal->getClDevice(0); GetInfoHelper::set(devices, device); GetInfoHelper::set(numDevices, 1u); retVal = CL_SUCCESS; return retVal; } cl_mem CL_API_CALL clCreateFromDX9MediaSurfaceINTEL(cl_context context, cl_mem_flags flags, IDirect3DSurface9 *resource, HANDLE sharedHandle, UINT plane, cl_int *errcodeRet) { API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "resource", resource, "sharedHandle", sharedHandle, "plane", plane); ErrorCodeHelper err(errcodeRet, CL_SUCCESS); cl_mem_flags validFlags = CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY; if ((flags & (~validFlags)) != 0) { err.set(CL_INVALID_VALUE); return nullptr; } if (!resource) { err.set(CL_INVALID_DX9_RESOURCE_INTEL); return nullptr; } cl_dx9_surface_info_khr surfaceInfo = {resource, sharedHandle}; auto ctx = castToObject(context); if (ctx) { return D3DSurface::create(ctx, &surfaceInfo, flags, 0, plane, errcodeRet); } else { err.set(CL_INVALID_CONTEXT); return nullptr; } } cl_int CL_API_CALL clEnqueueAcquireDX9ObjectsINTEL(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { CommandQueue *cmdQ = nullptr; auto retVal = validateObjects(WithCastToInternal(commandQueue, &cmdQ)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "numObjects", numObjects, "memObjects", memObjects, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); if (retVal != CL_SUCCESS) { return retVal; } retVal = cmdQ->enqueueAcquireSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_ACQUIRE_DX9_OBJECTS_INTEL); return retVal; } cl_int CL_API_CALL clEnqueueReleaseDX9ObjectsINTEL(cl_command_queue commandQueue, cl_uint numObjects, cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { CommandQueue *cmdQ = nullptr; auto retVal = validateObjects(WithCastToInternal(commandQueue, &cmdQ)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "numObjects", numObjects, "memObjects", memObjects, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); if (retVal != CL_SUCCESS) { return retVal; } for (unsigned int object = 0; object < numObjects; object++) { auto memObject = castToObject(memObjects[object]); if (!static_cast *>(memObject->peekSharingHandler())->isSharedResource()) { cmdQ->finish(); break; } } retVal = cmdQ->enqueueReleaseSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL); if (!cmdQ->getContext().getInteropUserSyncEnabled()) { cmdQ->finish(); } return retVal; } cl_int CL_API_CALL clGetDeviceIDsFromDX9MediaAdapterKHR(cl_platform_id platform, cl_uint numMediaAdapters, cl_dx9_media_adapter_type_khr *mediaAdapterType, void *mediaAdapters, cl_dx9_media_adapter_set_khr mediaAdapterSet, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) { Platform *platformInternal = nullptr; auto retVal = validateObjects(WithCastToInternal(platform, &platformInternal)); API_ENTER(&retVal); DBG_LOG_INPUTS("platform", platform, "numMediaAdapters", numMediaAdapters, "mediaAdapterType", mediaAdapterType, "mediaAdapters", mediaAdapters, "mediaAdapterSet", mediaAdapterSet, "numEntries", numEntries, "devices", devices, "numDevices", numDevices); if (retVal != CL_SUCCESS) { return retVal; } cl_device_id device = platformInternal->getClDevice(0); GetInfoHelper::set(devices, device); GetInfoHelper::set(numDevices, 1u); retVal = CL_SUCCESS; return retVal; } cl_mem CL_API_CALL clCreateFromDX9MediaSurfaceKHR(cl_context context, cl_mem_flags flags, cl_dx9_media_adapter_type_khr adapterType, void *surfaceInfo, cl_uint plane, cl_int *errcodeRet) { API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "adapterType", adapterType, "surfaceInfo", surfaceInfo, "plane", plane); ErrorCodeHelper err(errcodeRet, CL_SUCCESS); auto localSurfaceInfo = (cl_dx9_surface_info_khr *)surfaceInfo; auto ctx = castToObject(context); return D3DSurface::create(ctx, localSurfaceInfo, flags, adapterType, plane, errcodeRet); } cl_int CL_API_CALL clEnqueueAcquireDX9MediaSurfacesKHR(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { CommandQueue *cmdQ = nullptr; auto retVal = validateObjects(WithCastToInternal(commandQueue, &cmdQ)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "numObjects", numObjects, "memObjects", memObjects, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); if (retVal != CL_SUCCESS) { return retVal; } retVal = cmdQ->enqueueAcquireSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR); return retVal; } cl_int CL_API_CALL clEnqueueReleaseDX9MediaSurfacesKHR(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { CommandQueue *cmdQ = nullptr; auto retVal = validateObjects(WithCastToInternal(commandQueue, &cmdQ)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "numObjects", numObjects, "memObjects", memObjects, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); if (retVal != CL_SUCCESS) { return retVal; } for (unsigned int object = 0; object < numObjects; object++) { auto memObject = castToObject(memObjects[object]); if (memObject) { if (!static_cast *>(memObject->peekSharingHandler())->isSharedResource()) { cmdQ->finish(); break; } } else { retVal = CL_INVALID_MEM_OBJECT; return retVal; } } retVal = cmdQ->enqueueReleaseSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR); if (!cmdQ->getContext().getInteropUserSyncEnabled()) { cmdQ->finish(); } return retVal; } cl_int CL_API_CALL clGetDeviceIDsFromD3D10KHR(cl_platform_id platform, cl_d3d10_device_source_khr d3dDeviceSource, void *d3dObject, cl_d3d10_device_set_khr d3dDeviceSet, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) { DXGI_ADAPTER_DESC dxgiDesc = {{0}}; IDXGIAdapter *dxgiAdapter = nullptr; ID3D10Device *d3dDevice = nullptr; D3DSharingFunctions sharingFcns((ID3D10Device *)nullptr); cl_uint localNumDevices = 0; cl_int retCode = CL_SUCCESS; Platform *platformInternal = nullptr; auto retVal = validateObjects(WithCastToInternal(platform, &platformInternal)); API_ENTER(&retVal); DBG_LOG_INPUTS("platform", platform, "d3dDeviceSource", d3dDeviceSource, "d3dObject", d3dObject, "d3dDeviceSet", d3dDeviceSet, "numEntries", numEntries, "devices", devices, "numDevices", numDevices); if (retVal != CL_SUCCESS) { return retVal; } if (DebugManager.injectFcn) { sharingFcns.getDxgiDescFcn = (D3DSharingFunctions::GetDxgiDescFcn)DebugManager.injectFcn; } ClDevice *device = platformInternal->getClDevice(0); switch (d3dDeviceSource) { case CL_D3D10_DEVICE_KHR: d3dDevice = (ID3D10Device *)d3dObject; break; case CL_D3D10_DXGI_ADAPTER_KHR: dxgiAdapter = (IDXGIAdapter *)d3dObject; break; default: GetInfoHelper::set(numDevices, localNumDevices); retVal = CL_INVALID_VALUE; return retVal; } sharingFcns.getDxgiDescFcn(&dxgiDesc, dxgiAdapter, d3dDevice); if (dxgiDesc.VendorId != INTEL_VENDOR_ID || dxgiDesc.DeviceId != device->getHardwareInfo().platform.usDeviceID) { GetInfoHelper::set(numDevices, localNumDevices); retVal = CL_DEVICE_NOT_FOUND; return retVal; } switch (d3dDeviceSet) { case CL_PREFERRED_DEVICES_FOR_D3D10_KHR: case CL_ALL_DEVICES_FOR_D3D10_KHR: GetInfoHelper::set(devices, static_cast(device)); localNumDevices = 1; break; default: retCode = CL_INVALID_VALUE; break; } GetInfoHelper::set(numDevices, localNumDevices); return retCode; } cl_mem CL_API_CALL clCreateFromD3D10BufferKHR(cl_context context, cl_mem_flags flags, ID3D10Buffer *resource, cl_int *errcodeRet) { API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "resource", resource); ErrorCodeHelper err(errcodeRet, CL_SUCCESS); Context *ctx = nullptr; err.set(validateObjects(WithCastToInternal(context, &ctx))); if (err.localErrcode != CL_SUCCESS) { return nullptr; } if (ctx->getSharing>()->isTracked(resource, 0)) { err.set(CL_INVALID_D3D10_RESOURCE_KHR); return nullptr; } return D3DBuffer::create(ctx, resource, flags, errcodeRet); } cl_mem CL_API_CALL clCreateFromD3D10Texture2DKHR(cl_context context, cl_mem_flags flags, ID3D10Texture2D *resource, UINT subresource, cl_int *errcodeRet) { API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "resource", resource, "subresource", subresource); ErrorCodeHelper err(errcodeRet, CL_SUCCESS); Context *ctx = nullptr; err.set(validateObjects(WithCastToInternal(context, &ctx))); if (err.localErrcode != CL_SUCCESS) { return nullptr; } if (ctx->getSharing>()->isTracked(resource, subresource)) { err.set(CL_INVALID_D3D10_RESOURCE_KHR); return nullptr; } return D3DTexture::create2d(ctx, resource, flags, subresource, errcodeRet); } cl_mem CL_API_CALL clCreateFromD3D10Texture3DKHR(cl_context context, cl_mem_flags flags, ID3D10Texture3D *resource, UINT subresource, cl_int *errcodeRet) { API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "resource", resource, "subresource", subresource); ErrorCodeHelper err(errcodeRet, CL_SUCCESS); Context *ctx = nullptr; err.set(validateObjects(WithCastToInternal(context, &ctx))); if (err.localErrcode != CL_SUCCESS) { return nullptr; } if (ctx->getSharing>()->isTracked(resource, subresource)) { err.set(CL_INVALID_D3D10_RESOURCE_KHR); return nullptr; } return D3DTexture::create3d(ctx, resource, flags, subresource, errcodeRet); } cl_int CL_API_CALL clEnqueueAcquireD3D10ObjectsKHR(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { CommandQueue *cmdQ = nullptr; auto retVal = validateObjects(WithCastToInternal(commandQueue, &cmdQ)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "numObjects", numObjects, "memObjects", memObjects, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); if (retVal != CL_SUCCESS) { return retVal; } retVal = validateObjects(MemObjList(numObjects, memObjects)); if (retVal != CL_SUCCESS) { return retVal; } for (unsigned int object = 0; object < numObjects; object++) { auto memObj = castToObject(memObjects[object]); if (memObj->acquireCount >= 1) { retVal = CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR; return retVal; } } retVal = cmdQ->enqueueAcquireSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR); return retVal; } cl_int CL_API_CALL clEnqueueReleaseD3D10ObjectsKHR(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { CommandQueue *cmdQ = nullptr; auto retVal = validateObjects(WithCastToInternal(commandQueue, &cmdQ)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "numObjects", numObjects, "memObjects", memObjects, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); if (retVal != CL_SUCCESS) { return retVal; } retVal = validateObjects(MemObjList(numObjects, memObjects)); if (retVal != CL_SUCCESS) { return retVal; } for (unsigned int object = 0; object < numObjects; object++) { auto memObject = castToObject(memObjects[object]); if (memObject->acquireCount == 0) { retVal = CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR; return retVal; } if (!static_cast *>(memObject->peekSharingHandler())->isSharedResource()) { cmdQ->finish(); break; } } retVal = cmdQ->enqueueReleaseSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR); if (!cmdQ->getContext().getInteropUserSyncEnabled()) { cmdQ->finish(); } return retVal; } cl_int CL_API_CALL clGetDeviceIDsFromD3D11KHR(cl_platform_id platform, cl_d3d11_device_source_khr d3dDeviceSource, void *d3dObject, cl_d3d11_device_set_khr d3dDeviceSet, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) { DXGI_ADAPTER_DESC dxgiDesc = {{0}}; IDXGIAdapter *dxgiAdapter = nullptr; ID3D11Device *d3dDevice = nullptr; D3DSharingFunctions sharingFcns((ID3D11Device *)nullptr); cl_uint localNumDevices = 0; Platform *platformInternal = nullptr; auto retVal = validateObjects(WithCastToInternal(platform, &platformInternal)); API_ENTER(&retVal); DBG_LOG_INPUTS("platform", platform, "d3dDeviceSource", d3dDeviceSource, "d3dObject", d3dObject, "d3dDeviceSet", d3dDeviceSet, "numEntries", numEntries, "devices", devices, "numDevices", numDevices); if (retVal != CL_SUCCESS) { return retVal; } if (DebugManager.injectFcn) { sharingFcns.getDxgiDescFcn = (D3DSharingFunctions::GetDxgiDescFcn)DebugManager.injectFcn; } ClDevice *device = platformInternal->getClDevice(0); switch (d3dDeviceSource) { case CL_D3D11_DEVICE_KHR: d3dDevice = (ID3D11Device *)d3dObject; break; case CL_D3D11_DXGI_ADAPTER_KHR: dxgiAdapter = (IDXGIAdapter *)d3dObject; break; default: GetInfoHelper::set(numDevices, localNumDevices); retVal = CL_INVALID_VALUE; return retVal; break; } sharingFcns.getDxgiDescFcn(&dxgiDesc, dxgiAdapter, d3dDevice); if (dxgiDesc.VendorId != INTEL_VENDOR_ID || dxgiDesc.DeviceId != device->getHardwareInfo().platform.usDeviceID) { GetInfoHelper::set(numDevices, localNumDevices); retVal = CL_DEVICE_NOT_FOUND; return retVal; } switch (d3dDeviceSet) { case CL_PREFERRED_DEVICES_FOR_D3D11_KHR: case CL_ALL_DEVICES_FOR_D3D11_KHR: GetInfoHelper::set(devices, static_cast(device)); localNumDevices = 1; break; default: retVal = CL_INVALID_VALUE; break; } GetInfoHelper::set(numDevices, localNumDevices); return retVal; } cl_mem CL_API_CALL clCreateFromD3D11BufferKHR(cl_context context, cl_mem_flags flags, ID3D11Buffer *resource, cl_int *errcodeRet) { API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "resource", resource); ErrorCodeHelper err(errcodeRet, CL_SUCCESS); Context *ctx = nullptr; err.set(validateObjects(WithCastToInternal(context, &ctx))); if (err.localErrcode != CL_SUCCESS) { return nullptr; } if (ctx->getSharing>()->isTracked(resource, 0)) { err.set(CL_INVALID_D3D11_RESOURCE_KHR); return nullptr; } return D3DBuffer::create(ctx, resource, flags, errcodeRet); } cl_mem CL_API_CALL clCreateFromD3D11Texture2DKHR(cl_context context, cl_mem_flags flags, ID3D11Texture2D *resource, UINT subresource, cl_int *errcodeRet) { API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "resource", resource, "subresource", subresource); ErrorCodeHelper err(errcodeRet, CL_SUCCESS); Context *ctx = nullptr; err.set(validateObjects(WithCastToInternal(context, &ctx))); if (err.localErrcode != CL_SUCCESS) { return nullptr; } if (ctx->getSharing>()->isTracked(resource, subresource)) { err.set(CL_INVALID_D3D11_RESOURCE_KHR); return nullptr; } return D3DTexture::create2d(ctx, resource, flags, subresource, errcodeRet); } cl_mem CL_API_CALL clCreateFromD3D11Texture3DKHR(cl_context context, cl_mem_flags flags, ID3D11Texture3D *resource, UINT subresource, cl_int *errcodeRet) { API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "resource", resource, "subresource", subresource); ErrorCodeHelper err(errcodeRet, CL_SUCCESS); Context *ctx = nullptr; err.set(validateObjects(WithCastToInternal(context, &ctx))); if (err.localErrcode != CL_SUCCESS) { return nullptr; } if (ctx->getSharing>()->isTracked(resource, subresource)) { err.set(CL_INVALID_D3D11_RESOURCE_KHR); return nullptr; } return D3DTexture::create3d(ctx, resource, flags, subresource, errcodeRet); } cl_int CL_API_CALL clEnqueueAcquireD3D11ObjectsKHR(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { CommandQueue *cmdQ = nullptr; auto retVal = validateObjects(WithCastToInternal(commandQueue, &cmdQ)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "numObjects", numObjects, "memObjects", memObjects, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); if (retVal != CL_SUCCESS) { return retVal; } retVal = validateObjects(MemObjList(numObjects, memObjects)); if (retVal != CL_SUCCESS) { return retVal; } for (unsigned int object = 0; object < numObjects; object++) { auto memObj = castToObject(memObjects[object]); if (memObj->acquireCount >= 1) { retVal = CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR; return retVal; } } retVal = cmdQ->enqueueAcquireSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR); return retVal; } cl_int CL_API_CALL clEnqueueReleaseD3D11ObjectsKHR(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { CommandQueue *cmdQ = nullptr; auto retVal = validateObjects(WithCastToInternal(commandQueue, &cmdQ)); API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "numObjects", numObjects, "memObjects", memObjects, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); if (retVal != CL_SUCCESS) { return retVal; } retVal = validateObjects(MemObjList(numObjects, memObjects)); if (retVal != CL_SUCCESS) { return retVal; } for (unsigned int object = 0; object < numObjects; object++) { auto memObject = castToObject(memObjects[object]); if (memObject->acquireCount == 0) { retVal = CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR; return retVal; } if (!static_cast *>(memObject->peekSharingHandler())->isSharedResource()) { cmdQ->finish(); break; } } retVal = cmdQ->enqueueReleaseSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR); if (!cmdQ->getContext().getInteropUserSyncEnabled()) { cmdQ->finish(); } return retVal; } cl_int CL_API_CALL clGetSupportedDX9MediaSurfaceFormatsINTEL(cl_context context, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint plane, cl_uint numEntries, D3DFORMAT *dx9Formats, cl_uint *numImageFormats) { if (validateObject(context) != CL_SUCCESS) { return CL_INVALID_CONTEXT; } if ((imageType != CL_MEM_OBJECT_BUFFER) && (imageType != CL_MEM_OBJECT_IMAGE2D) && (imageType != CL_MEM_OBJECT_IMAGE3D)) { return CL_INVALID_VALUE; } if (((flags & CL_MEM_READ_WRITE) == 0) && ((flags & CL_MEM_WRITE_ONLY) == 0) && ((flags & CL_MEM_READ_ONLY) == 0)) { return CL_INVALID_VALUE; } cl_uint i = 0; switch (plane) { case 0: for (auto format : D3DSurface::D3DtoClFormatConversions) { if (i >= numEntries) { break; } dx9Formats[i++] = format.first; } *numImageFormats = static_cast(D3DSurface::D3DtoClFormatConversions.size()); break; case 1: for (auto format : D3DSurface::D3DPlane1Formats) { if (i >= numEntries) { break; } dx9Formats[i++] = format; } *numImageFormats = static_cast(D3DSurface::D3DPlane1Formats.size()); break; case 2: for (auto format : D3DSurface::D3DPlane2Formats) { if (i >= numEntries) { break; } dx9Formats[i++] = format; } *numImageFormats = static_cast(D3DSurface::D3DPlane2Formats.size()); break; default: *numImageFormats = 0; } return CL_SUCCESS; } cl_int CL_API_CALL clGetSupportedD3D10TextureFormatsINTEL(cl_context context, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint numEntries, DXGI_FORMAT *formats, cl_uint *numTextureFormats) { return getSupportedDXTextureFormats(context, imageType, 0, numEntries, formats, numTextureFormats); } cl_int CL_API_CALL clGetSupportedD3D11TextureFormatsINTEL(cl_context context, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint plane, cl_uint numEntries, DXGI_FORMAT *formats, cl_uint *numTextureFormats) { return getSupportedDXTextureFormats(context, imageType, plane, numEntries, formats, numTextureFormats); } compute-runtime-20.13.16352/opencl/source/os_interface/windows/d3d10_11_sharing_functions.cpp000066400000000000000000000323641363734646600317230ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.inl" #include "opencl/source/os_interface/windows/d3d_sharing_functions.h" #include "opencl/source/sharings/d3d/d3d_sharing.h" #include "opencl/source/sharings/sharing_factory.h" using namespace NEO; template class D3DSharingFunctions; template class D3DSharingFunctions; const uint32_t D3DSharingFunctions::sharingId = SharingType::D3D10_SHARING; const uint32_t D3DSharingFunctions::sharingId = SharingType::D3D11_SHARING; static const DXGI_FORMAT DXGIFormats[] = { DXGI_FORMAT_R32G32B32A32_TYPELESS, DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_UINT, DXGI_FORMAT_R32G32B32A32_SINT, DXGI_FORMAT_R32G32B32_TYPELESS, DXGI_FORMAT_R32G32B32_FLOAT, DXGI_FORMAT_R32G32B32_UINT, DXGI_FORMAT_R32G32B32_SINT, DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_R16G16B16A16_UINT, DXGI_FORMAT_R16G16B16A16_SNORM, DXGI_FORMAT_R16G16B16A16_SINT, DXGI_FORMAT_R32G32_TYPELESS, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32_UINT, DXGI_FORMAT_R32G32_SINT, DXGI_FORMAT_R32G8X24_TYPELESS, DXGI_FORMAT_D32_FLOAT_S8X24_UINT, DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS, DXGI_FORMAT_X32_TYPELESS_G8X24_UINT, DXGI_FORMAT_R10G10B10A2_TYPELESS, DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UINT, DXGI_FORMAT_R11G11B10_FLOAT, DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R8G8B8A8_SNORM, DXGI_FORMAT_R8G8B8A8_SINT, DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16_SNORM, DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_R32_TYPELESS, DXGI_FORMAT_D32_FLOAT, DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_UINT, DXGI_FORMAT_R32_SINT, DXGI_FORMAT_R24G8_TYPELESS, DXGI_FORMAT_D24_UNORM_S8_UINT, DXGI_FORMAT_R24_UNORM_X8_TYPELESS, DXGI_FORMAT_X24_TYPELESS_G8_UINT, DXGI_FORMAT_R8G8_TYPELESS, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8_SNORM, DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R16_TYPELESS, DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_D16_UNORM, DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_SNORM, DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_R8_SINT, DXGI_FORMAT_A8_UNORM, DXGI_FORMAT_R1_UNORM, DXGI_FORMAT_R9G9B9E5_SHAREDEXP, DXGI_FORMAT_R8G8_B8G8_UNORM, DXGI_FORMAT_G8R8_G8B8_UNORM, DXGI_FORMAT_BC1_TYPELESS, DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM_SRGB, DXGI_FORMAT_BC2_TYPELESS, DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM_SRGB, DXGI_FORMAT_BC3_TYPELESS, DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_UNORM_SRGB, DXGI_FORMAT_BC4_TYPELESS, DXGI_FORMAT_BC4_UNORM, DXGI_FORMAT_BC4_SNORM, DXGI_FORMAT_BC5_TYPELESS, DXGI_FORMAT_BC5_UNORM, DXGI_FORMAT_BC5_SNORM, DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_B5G5R5A1_UNORM, DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_FORMAT_B8G8R8X8_UNORM, DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM, DXGI_FORMAT_B8G8R8A8_TYPELESS, DXGI_FORMAT_B8G8R8A8_UNORM_SRGB, DXGI_FORMAT_B8G8R8X8_TYPELESS, DXGI_FORMAT_B8G8R8X8_UNORM_SRGB, DXGI_FORMAT_BC6H_TYPELESS, DXGI_FORMAT_BC6H_UF16, DXGI_FORMAT_BC6H_SF16, DXGI_FORMAT_BC7_TYPELESS, DXGI_FORMAT_BC7_UNORM, DXGI_FORMAT_BC7_UNORM_SRGB, DXGI_FORMAT_AYUV, DXGI_FORMAT_Y410, DXGI_FORMAT_Y416, DXGI_FORMAT_NV12, DXGI_FORMAT_P010, DXGI_FORMAT_P016, DXGI_FORMAT_420_OPAQUE, DXGI_FORMAT_YUY2, DXGI_FORMAT_Y210, DXGI_FORMAT_Y216, DXGI_FORMAT_NV11, DXGI_FORMAT_AI44, DXGI_FORMAT_IA44, DXGI_FORMAT_P8, DXGI_FORMAT_A8P8, DXGI_FORMAT_B4G4R4A4_UNORM, DXGI_FORMAT_P208, DXGI_FORMAT_V208, DXGI_FORMAT_V408, DXGI_FORMAT_FORCE_UINT}; template void D3DSharingFunctions::createQuery(D3DQuery **query) { D3DQueryDesc desc = {}; d3dDevice->CreateQuery(&desc, query); } template void D3DSharingFunctions::updateDevice(D3DResource *resource) { resource->GetDevice(&d3dDevice); } template void D3DSharingFunctions::fillCreateBufferDesc(D3DBufferDesc &desc, unsigned int width) { desc.ByteWidth = width; desc.MiscFlags = D3DResourceFlags::MISC_SHARED; } template void D3DSharingFunctions::fillCreateTexture2dDesc(D3DTexture2dDesc &desc, D3DTexture2dDesc *srcDesc, cl_uint subresource) { desc.Width = srcDesc->Width; desc.Height = srcDesc->Height; desc.MipLevels = 1; desc.ArraySize = 1; desc.Format = srcDesc->Format; desc.MiscFlags = D3DResourceFlags::MISC_SHARED; desc.SampleDesc.Count = srcDesc->SampleDesc.Count; desc.SampleDesc.Quality = srcDesc->SampleDesc.Quality; for (uint32_t i = 0u; i < (subresource % srcDesc->MipLevels); i++) { desc.Width /= 2; desc.Height /= 2; } } template void D3DSharingFunctions::fillCreateTexture3dDesc(D3DTexture3dDesc &desc, D3DTexture3dDesc *srcDesc, cl_uint subresource) { desc.Width = srcDesc->Width; desc.Height = srcDesc->Height; desc.Depth = srcDesc->Depth; desc.MipLevels = 1; desc.Format = srcDesc->Format; desc.MiscFlags = D3DResourceFlags::MISC_SHARED; for (uint32_t i = 0u; i < (subresource % srcDesc->MipLevels); i++) { desc.Width /= 2; desc.Height /= 2; desc.Depth /= 2; } } template void D3DSharingFunctions::createBuffer(D3DBufferObj **buffer, unsigned int width) { D3DBufferDesc stagingDesc = {}; fillCreateBufferDesc(stagingDesc, width); d3dDevice->CreateBuffer(&stagingDesc, nullptr, buffer); } template void D3DSharingFunctions::createTexture2d(D3DTexture2d **texture, D3DTexture2dDesc *desc, cl_uint subresource) { D3DTexture2dDesc stagingDesc = {}; fillCreateTexture2dDesc(stagingDesc, desc, subresource); d3dDevice->CreateTexture2D(&stagingDesc, nullptr, texture); } template void D3DSharingFunctions::createTexture3d(D3DTexture3d **texture, D3DTexture3dDesc *desc, cl_uint subresource) { D3DTexture3dDesc stagingDesc = {}; fillCreateTexture3dDesc(stagingDesc, desc, subresource); d3dDevice->CreateTexture3D(&stagingDesc, nullptr, texture); } template void D3DSharingFunctions::checkFormatSupport(DXGI_FORMAT format, UINT *pFormat) { d3dDevice->CheckFormatSupport(format, pFormat); } template std::vector &D3DSharingFunctions::retrieveTextureFormats(cl_mem_object_type imageType, cl_uint plane) { auto cached = textureFormatCache.find(imageType); if (cached == textureFormatCache.end()) { bool success; std::tie(cached, success) = textureFormatCache.emplace(imageType, std::vector(0)); if (!success) { return DXGINoFormats; } std::vector &cached_formats = cached->second; std::vector planarFormats(0); cached_formats.reserve(arrayCount(DXGIFormats)); for (auto DXGIFormat : DXGIFormats) { UINT format = 0; checkFormatSupport(DXGIFormat, &format); if (memObjectFormatSupport(imageType, format)) { cached_formats.push_back(DXGIFormat); if (D3DSharing::isFormatWithPlane1(DXGIFormat)) { planarFormats.push_back(DXGIFormat); } } } cached_formats.shrink_to_fit(); textureFormatPlane1Cache.emplace(imageType, planarFormats); } if (plane == 1) { return textureFormatPlane1Cache.find(imageType)->second; } return cached->second; } template <> bool D3DSharingFunctions::memObjectFormatSupport(cl_mem_object_type objectType, UINT format) { auto d3dformat = static_cast(format); return ((objectType & CL_MEM_OBJECT_BUFFER) && (d3dformat & D3D10_FORMAT_SUPPORT_BUFFER)) || ((objectType & CL_MEM_OBJECT_IMAGE2D) && (d3dformat & D3D10_FORMAT_SUPPORT_TEXTURE2D)) || ((objectType & CL_MEM_OBJECT_IMAGE3D) && (d3dformat & D3D10_FORMAT_SUPPORT_TEXTURE3D)); } template <> bool D3DSharingFunctions::memObjectFormatSupport(cl_mem_object_type objectType, UINT format) { auto d3dformat = static_cast(format); return ((objectType & CL_MEM_OBJECT_BUFFER) && (d3dformat & D3D11_FORMAT_SUPPORT_BUFFER)) || ((objectType & CL_MEM_OBJECT_IMAGE2D) && (d3dformat & D3D11_FORMAT_SUPPORT_TEXTURE2D)) || ((objectType & CL_MEM_OBJECT_IMAGE3D) && (d3dformat & D3D11_FORMAT_SUPPORT_TEXTURE3D)); } template void D3DSharingFunctions::getBufferDesc(D3DBufferDesc *bufferDesc, D3DBufferObj *buffer) { buffer->GetDesc(bufferDesc); } template void D3DSharingFunctions::getTexture2dDesc(D3DTexture2dDesc *textureDesc, D3DTexture2d *texture) { texture->GetDesc(textureDesc); } template void D3DSharingFunctions::getTexture3dDesc(D3DTexture3dDesc *textureDesc, D3DTexture3d *texture) { texture->GetDesc(textureDesc); } template void D3DSharingFunctions::getSharedHandle(D3DResource *resource, void **handle) { IDXGIResource *dxgiResource = nullptr; resource->QueryInterface(__uuidof(IDXGIResource), (void **)&dxgiResource); dxgiResource->GetSharedHandle(handle); dxgiResource->Release(); } template void D3DSharingFunctions::getSharedNTHandle(D3DResource *resource, void **handle) { IDXGIResource *dxgiResource = nullptr; IDXGIResource1 *dxgiResource1 = nullptr; resource->QueryInterface(__uuidof(IDXGIResource), (void **)&dxgiResource); dxgiResource->QueryInterface(__uuidof(IDXGIResource1), (void **)&dxgiResource1); dxgiResource1->CreateSharedHandle(nullptr, DXGI_SHARED_RESOURCE_READ | DXGI_SHARED_RESOURCE_WRITE, nullptr, handle); dxgiResource1->Release(); dxgiResource->Release(); } template void D3DSharingFunctions::addRef(D3DResource *resource) { resource->AddRef(); } template void D3DSharingFunctions::release(IUnknown *resource) { resource->Release(); } template void D3DSharingFunctions::lockRect(D3DTexture2d *resource, D3DLOCKED_RECT *lockedRect, uint32_t flags) { } template void D3DSharingFunctions::unlockRect(D3DTexture2d *resource) { } template void D3DSharingFunctions::updateSurface(D3DTexture2d *src, D3DTexture2d *dst) { } template void D3DSharingFunctions::getRenderTargetData(D3DTexture2d *renderTarget, D3DTexture2d *dstSurface) { } template <> void D3DSharingFunctions::copySubresourceRegion(D3DResource *dst, cl_uint dstSubresource, D3DResource *src, cl_uint srcSubresource) { d3dDevice->CopySubresourceRegion(dst, dstSubresource, 0, 0, 0, src, srcSubresource, nullptr); } template <> void D3DSharingFunctions::copySubresourceRegion(D3DResource *dst, cl_uint dstSubresource, D3DResource *src, cl_uint srcSubresource) { d3d11DeviceContext->CopySubresourceRegion(dst, dstSubresource, 0, 0, 0, src, srcSubresource, nullptr); } template <> void D3DSharingFunctions::flushAndWait(D3DQuery *query) { query->End(); d3dDevice->Flush(); while (query->GetData(nullptr, 0, 0) != S_OK) ; } template <> void D3DSharingFunctions::flushAndWait(D3DQuery *query) { d3d11DeviceContext->End(query); d3d11DeviceContext->Flush(); while (d3d11DeviceContext->GetData(query, nullptr, 0, 0) != S_OK) ; } template <> void D3DSharingFunctions::getDeviceContext(D3DQuery *query) { } template <> void D3DSharingFunctions::getDeviceContext(D3DQuery *query) { d3dDevice->GetImmediateContext(&d3d11DeviceContext); } template <> void D3DSharingFunctions::releaseDeviceContext(D3DQuery *query) { } template <> void D3DSharingFunctions::releaseDeviceContext(D3DQuery *query) { d3d11DeviceContext->Release(); d3d11DeviceContext = nullptr; } template void D3DSharingFunctions::getDxgiDesc(DXGI_ADAPTER_DESC *dxgiDesc, IDXGIAdapter *adapter, D3DDevice *device) { if (!adapter) { IDXGIDevice *dxgiDevice = nullptr; device->QueryInterface(__uuidof(IDXGIDevice), (void **)&dxgiDevice); dxgiDevice->GetAdapter(&adapter); dxgiDevice->Release(); } else { adapter->AddRef(); } adapter->GetDesc(dxgiDesc); adapter->Release(); } template D3DSharingFunctions *Context::getSharing>(); template D3DSharingFunctions *Context::getSharing>(); compute-runtime-20.13.16352/opencl/source/os_interface/windows/d3d9_sharing_functions.cpp000066400000000000000000000117301363734646600313440ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.inl" #include "opencl/source/os_interface/windows/d3d_sharing_functions.h" #include "opencl/source/sharings/sharing_factory.h" using namespace NEO; template class D3DSharingFunctions; const uint32_t D3DSharingFunctions::sharingId = SharingType::D3D9_SHARING; template <> void D3DSharingFunctions::createQuery(D3DQuery **query) { D3DQUERYTYPE queryType = D3DQUERYTYPE_EVENT; d3dDevice->CreateQuery(queryType, query); } template <> void D3DSharingFunctions::updateDevice(D3DResource *resource) { resource->GetDevice(&d3dDevice); } template <> void D3DSharingFunctions::fillCreateBufferDesc(D3DBufferDesc &desc, unsigned int width) { } template <> std::vector &D3DSharingFunctions::retrieveTextureFormats(cl_mem_object_type imageType, cl_uint plane) { return DXGINoFormats; } template <> void D3DSharingFunctions::fillCreateTexture2dDesc(D3DTexture2dDesc &desc, D3DTexture2dDesc *srcDesc, cl_uint subresource) { } template <> void D3DSharingFunctions::fillCreateTexture3dDesc(D3DTexture3dDesc &desc, D3DTexture3dDesc *srcDesc, cl_uint subresource) { } template <> void D3DSharingFunctions::createBuffer(D3DBufferObj **buffer, unsigned int width) { } template <> void D3DSharingFunctions::createTexture2d(D3DTexture2d **texture, D3DTexture2dDesc *desc, cl_uint subresource) { d3dDevice->CreateOffscreenPlainSurface(desc->Width, desc->Height, desc->Format, D3DPOOL_SYSTEMMEM, texture, nullptr); } template <> void D3DSharingFunctions::createTexture3d(D3DTexture3d **texture, D3DTexture3dDesc *desc, cl_uint subresource) { } template <> void D3DSharingFunctions::checkFormatSupport(DXGI_FORMAT format, UINT *pFormat) { } template <> bool D3DSharingFunctions::memObjectFormatSupport(cl_mem_object_type object, UINT format) { return false; } template <> void D3DSharingFunctions::getBufferDesc(D3DBufferDesc *bufferDesc, D3DBufferObj *buffer) { } template <> void D3DSharingFunctions::getTexture2dDesc(D3DTexture2dDesc *textureDesc, D3DTexture2d *texture) { texture->GetDesc(textureDesc); } template <> void D3DSharingFunctions::getTexture3dDesc(D3DTexture3dDesc *textureDesc, D3DTexture3d *texture) { } template <> void D3DSharingFunctions::getSharedHandle(D3DResource *resource, void **handle) { } template <> void D3DSharingFunctions::getSharedNTHandle(D3DResource *resource, void **handle) { } template <> void D3DSharingFunctions::addRef(D3DResource *resource) { resource->AddRef(); } template <> void D3DSharingFunctions::release(IUnknown *resource) { if (resource) { resource->Release(); } } template <> void D3DSharingFunctions::lockRect(D3DTexture2d *d3dresource, D3DLOCKED_RECT *lockedRect, uint32_t flags) { d3dresource->LockRect(lockedRect, nullptr, flags); } template <> void D3DSharingFunctions::unlockRect(D3DTexture2d *d3dresource) { d3dresource->UnlockRect(); } template <> void D3DSharingFunctions::getRenderTargetData(D3DTexture2d *renderTarget, D3DTexture2d *dstSurface) { d3dDevice->GetRenderTargetData(renderTarget, dstSurface); } template <> void D3DSharingFunctions::copySubresourceRegion(D3DResource *dst, cl_uint dstSubresource, D3DResource *src, cl_uint srcSubresource) { } template <> void D3DSharingFunctions::updateSurface(D3DTexture2d *src, D3DTexture2d *dst) { d3dDevice->UpdateSurface(src, nullptr, dst, nullptr); } template <> void D3DSharingFunctions::flushAndWait(D3DQuery *query) { query->Issue(D3DISSUE_END); while (query->GetData(nullptr, 0, D3DGETDATA_FLUSH) != S_OK) ; } template <> void D3DSharingFunctions::getDeviceContext(D3DQuery *query) { } template <> void D3DSharingFunctions::releaseDeviceContext(D3DQuery *query) { } template <> void D3DSharingFunctions::getDxgiDesc(DXGI_ADAPTER_DESC *dxgiDesc, IDXGIAdapter *adapter, D3DDevice *device) { if (!adapter) { IDXGIDevice *dxgiDevice = nullptr; device->QueryInterface(__uuidof(IDXGIDevice), (void **)&dxgiDevice); dxgiDevice->GetAdapter(&adapter); dxgiDevice->Release(); } else { adapter->AddRef(); } adapter->GetDesc(dxgiDesc); adapter->Release(); } template D3DSharingFunctions *Context::getSharing>(); compute-runtime-20.13.16352/opencl/source/os_interface/windows/d3d_sharing_functions.h000066400000000000000000000164631363734646600307300ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/array_count.h" #include "shared/source/helpers/debug_helpers.h" #include "opencl/source/api/dispatch.h" #include "opencl/source/sharings/sharing.h" #include "DXGI1_2.h" #include #include namespace NEO { namespace D3DTypesHelper { struct D3D9 { typedef IDirect3DDevice9 D3DDevice; typedef IDirect3DQuery9 D3DQuery; typedef D3DQUERYTYPE D3DQueryDesc; typedef IDirect3DResource9 D3DResource; typedef struct { } D3DBufferDesc; typedef void *D3DBufferObj; typedef D3DSURFACE_DESC D3DTexture2dDesc; typedef struct { } D3DTexture3dDesc; typedef IDirect3DSurface9 D3DTexture2d; typedef struct { } D3DTexture3d; }; struct D3D10 { typedef ID3D10Device D3DDevice; typedef ID3D10Query D3DQuery; typedef D3D10_QUERY_DESC D3DQueryDesc; typedef ID3D10Resource D3DResource; typedef D3D10_BUFFER_DESC D3DBufferDesc; typedef ID3D10Buffer D3DBufferObj; typedef D3D10_TEXTURE2D_DESC D3DTexture2dDesc; typedef D3D10_TEXTURE3D_DESC D3DTexture3dDesc; typedef ID3D10Texture2D D3DTexture2d; typedef ID3D10Texture3D D3DTexture3d; }; struct D3D11 { typedef ID3D11Device D3DDevice; typedef ID3D11Query D3DQuery; typedef D3D11_QUERY_DESC D3DQueryDesc; typedef ID3D11Resource D3DResource; typedef D3D11_BUFFER_DESC D3DBufferDesc; typedef ID3D11Buffer D3DBufferObj; typedef D3D11_TEXTURE2D_DESC D3DTexture2dDesc; typedef D3D11_TEXTURE3D_DESC D3DTexture3dDesc; typedef ID3D11Texture2D D3DTexture2d; typedef ID3D11Texture3D D3DTexture3d; }; } // namespace D3DTypesHelper enum D3DResourceFlags { USAGE_RENDERTARGET = 1, MISC_SHARED = 2, MISC_SHARED_KEYEDMUTEX = 256, MISC_SHARED_NTHANDLE = 2048 }; template class D3DSharingFunctions : public SharingFunctions { typedef typename D3D::D3DDevice D3DDevice; typedef typename D3D::D3DQuery D3DQuery; typedef typename D3D::D3DQueryDesc D3DQueryDesc; typedef typename D3D::D3DResource D3DResource; typedef typename D3D::D3DBufferDesc D3DBufferDesc; typedef typename D3D::D3DBufferObj D3DBufferObj; typedef typename D3D::D3DTexture2dDesc D3DTexture2dDesc; typedef typename D3D::D3DTexture3dDesc D3DTexture3dDesc; typedef typename D3D::D3DTexture2d D3DTexture2d; typedef typename D3D::D3DTexture3d D3DTexture3d; public: typedef void (*GetDxgiDescFcn)(DXGI_ADAPTER_DESC *dxgiDesc, IDXGIAdapter *adapter, D3DDevice *device); D3DSharingFunctions(D3DDevice *d3dDevice) : d3dDevice(d3dDevice) { trackedResources.reserve(128); getDxgiDescFcn = &this->getDxgiDesc; }; uint32_t getId() const override { return D3DSharingFunctions::sharingId; } D3DSharingFunctions() = delete; static const uint32_t sharingId; MOCKABLE_VIRTUAL void createQuery(D3DQuery **query); MOCKABLE_VIRTUAL void createBuffer(D3DBufferObj **buffer, unsigned int width); MOCKABLE_VIRTUAL void createTexture2d(D3DTexture2d **texture, D3DTexture2dDesc *desc, cl_uint subresource); MOCKABLE_VIRTUAL void createTexture3d(D3DTexture3d **texture, D3DTexture3dDesc *desc, cl_uint subresource); MOCKABLE_VIRTUAL void getBufferDesc(D3DBufferDesc *bufferDesc, D3DBufferObj *buffer); MOCKABLE_VIRTUAL void getTexture2dDesc(D3DTexture2dDesc *textureDesc, D3DTexture2d *texture); MOCKABLE_VIRTUAL void getTexture3dDesc(D3DTexture3dDesc *textureDesc, D3DTexture3d *texture); MOCKABLE_VIRTUAL void getSharedHandle(D3DResource *resource, void **handle); MOCKABLE_VIRTUAL void getSharedNTHandle(D3DResource *resource, void **handle); MOCKABLE_VIRTUAL void addRef(D3DResource *resource); MOCKABLE_VIRTUAL void release(IUnknown *resource); MOCKABLE_VIRTUAL void copySubresourceRegion(D3DResource *dst, cl_uint dstSubresource, D3DResource *src, cl_uint srcSubresource); MOCKABLE_VIRTUAL void flushAndWait(D3DQuery *query); MOCKABLE_VIRTUAL void getDeviceContext(D3DQuery *query); MOCKABLE_VIRTUAL void releaseDeviceContext(D3DQuery *query); MOCKABLE_VIRTUAL void lockRect(D3DTexture2d *d3dResource, D3DLOCKED_RECT *lockedRect, uint32_t flags); MOCKABLE_VIRTUAL void unlockRect(D3DTexture2d *d3dResource); MOCKABLE_VIRTUAL void getRenderTargetData(D3DTexture2d *renderTarget, D3DTexture2d *dstSurface); MOCKABLE_VIRTUAL void updateSurface(D3DTexture2d *src, D3DTexture2d *dst); MOCKABLE_VIRTUAL void updateDevice(D3DResource *resource); MOCKABLE_VIRTUAL void checkFormatSupport(DXGI_FORMAT format, UINT *pFormat); MOCKABLE_VIRTUAL bool memObjectFormatSupport(cl_mem_object_type object, UINT format); GetDxgiDescFcn getDxgiDescFcn = nullptr; bool isTracked(D3DResource *resource, cl_uint subresource) { return std::find(trackedResources.begin(), trackedResources.end(), std::make_pair(resource, subresource)) != trackedResources.end(); } void track(D3DResource *resource, cl_uint subresource) { trackedResources.push_back(std::make_pair(resource, subresource)); } void untrack(D3DResource *resource, cl_uint subresource) { auto element = std::find(trackedResources.begin(), trackedResources.end(), std::make_pair(resource, subresource)); DEBUG_BREAK_IF(element == trackedResources.end()); trackedResources.erase(element); } void setDevice(D3DDevice *d3dDevice) { this->d3dDevice = d3dDevice; } D3DDevice *getDevice() { return d3dDevice; } void fillCreateBufferDesc(D3DBufferDesc &desc, unsigned int width); void fillCreateTexture2dDesc(D3DTexture2dDesc &desc, D3DTexture2dDesc *srcDesc, cl_uint subresource); void fillCreateTexture3dDesc(D3DTexture3dDesc &desc, D3DTexture3dDesc *srcDesc, cl_uint subresource); std::vector &retrieveTextureFormats(cl_mem_object_type imageType, cl_uint plane); protected: D3DDevice *d3dDevice = nullptr; ID3D11DeviceContext *d3d11DeviceContext = nullptr; std::vector DXGINoFormats; std::vector> trackedResources; std::map> textureFormatCache; std::map> textureFormatPlane1Cache; static void getDxgiDesc(DXGI_ADAPTER_DESC *dxgiDesc, IDXGIAdapter *adapter, D3DDevice *device); }; template static inline cl_int getSupportedDXTextureFormats(cl_context context, cl_mem_object_type imageType, cl_uint plane, cl_uint numEntries, DXGI_FORMAT *formats, cl_uint *numImageFormats) { Context *pContext = castToObject(context); if (!pContext) { return CL_INVALID_CONTEXT; } auto pSharing = pContext->getSharing>(); if (!pSharing) { return CL_INVALID_CONTEXT; } auto supported_formats = pSharing->retrieveTextureFormats(imageType, plane); if (formats != nullptr) { memcpy_s(formats, sizeof(DXGI_FORMAT) * numEntries, supported_formats.data(), sizeof(DXGI_FORMAT) * std::min(static_cast(numEntries), supported_formats.size())); } if (numImageFormats) { *numImageFormats = static_cast(supported_formats.size()); } return CL_SUCCESS; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/os_interface/windows/device_caps_init_win.cpp000066400000000000000000000017261363734646600311470ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #ifdef _WIN32 #include "shared/source/device/device.h" #include "opencl/source/cl_device/cl_device.h" namespace NEO { void ClDevice::initializeExtraCaps() { deviceExtensions += "cl_intel_simultaneous_sharing "; deviceInfo.deviceExtensions = deviceExtensions.c_str(); simultaneousInterops = {CL_GL_CONTEXT_KHR, CL_WGL_HDC_KHR, CL_CONTEXT_ADAPTER_D3D9_KHR, CL_CONTEXT_D3D9_DEVICE_INTEL, CL_CONTEXT_ADAPTER_D3D9EX_KHR, CL_CONTEXT_D3D9EX_DEVICE_INTEL, CL_CONTEXT_ADAPTER_DXVA_KHR, CL_CONTEXT_DXVA_DEVICE_INTEL, CL_CONTEXT_D3D10_DEVICE_KHR, CL_CONTEXT_D3D11_DEVICE_KHR, 0}; } } // namespace NEO #endif compute-runtime-20.13.16352/opencl/source/os_interface/windows/device_command_stream.inl000066400000000000000000000020661363734646600313100ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ // Need to suppress warining 4005 caused by hw_cmds.h and wddm.h order. // Current order must be preserved due to two versions of igfxfmid.h #pragma warning(push) #pragma warning(disable : 4005) #include "shared/source/command_stream/device_command_stream.h" #include "shared/source/helpers/hw_cmds.h" #include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.h" #include "opencl/source/os_interface/windows/wddm_device_command_stream.h" #pragma warning(pop) namespace NEO { template CommandStreamReceiver *DeviceCommandStreamReceiver::create(bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) { if (withAubDump) { return new CommandStreamReceiverWithAUBDump>("aubfile", executionEnvironment, rootDeviceIndex); } else { return new WddmCommandStreamReceiver(executionEnvironment, rootDeviceIndex); } } } // namespace NEO compute-runtime-20.13.16352/opencl/source/os_interface/windows/gl/000077500000000000000000000000001363734646600246725ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/os_interface/windows/gl/CMakeLists.txt000066400000000000000000000011041363734646600274260ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_OS_INTERFACE_WINDOWS_GL ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/gl_arb_sync_event_os.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_sharing_win.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_options.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_sharing_os.h ) if(WIN32) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_OS_INTERFACE_WINDOWS_GL}) endif() set_property(GLOBAL PROPERTY RUNTIME_SRCS_OS_INTERFACE_WINDOWS_GL ${RUNTIME_SRCS_OS_INTERFACE_WINDOWS_GL}) compute-runtime-20.13.16352/opencl/source/os_interface/windows/gl/gl_arb_sync_event_os.cpp000066400000000000000000000144731363734646600315730ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/windows/gdi_interface.h" #include "shared/source/os_interface/windows/os_context_win.h" #include "shared/source/os_interface/windows/os_interface.h" #include "shared/source/os_interface/windows/wddm/wddm.h" #include "opencl/extensions/public/cl_gl_private_intel.h" #include "opencl/source/context/context.h" #include "opencl/source/sharings/gl/gl_arb_sync_event.h" #include "opencl/source/sharings/gl/windows/gl_sharing_windows.h" #include namespace NEO { void destroySync(Gdi &gdi, D3DKMT_HANDLE sync) { if (!sync) { return; } D3DKMT_DESTROYSYNCHRONIZATIONOBJECT destroySyncInfo = {0}; destroySyncInfo.hSyncObject = sync; NTSTATUS status = gdi.destroySynchronizationObject(&destroySyncInfo); DEBUG_BREAK_IF(0 != status); } void destroyEvent(OSInterface &osInterface, HANDLE event) { if (!event) { return; } auto ret = osInterface.get()->closeHandle(event); DEBUG_BREAK_IF(TRUE != ret); } void cleanupArbSyncObject(OSInterface &osInterface, CL_GL_SYNC_INFO *glSyncInfo) { if (nullptr == glSyncInfo) { return; } auto gdi = osInterface.get()->getWddm()->getGdi(); UNRECOVERABLE_IF(nullptr == gdi); destroySync(*gdi, glSyncInfo->serverSynchronizationObject); destroySync(*gdi, glSyncInfo->clientSynchronizationObject); destroySync(*gdi, glSyncInfo->submissionSynchronizationObject); destroyEvent(osInterface, glSyncInfo->event); destroyEvent(osInterface, glSyncInfo->submissionEvent); destroyArbSyncEventName(glSyncInfo->eventName); destroyArbSyncEventName(glSyncInfo->submissionEventName); } bool setupArbSyncObject(GLSharingFunctions &sharing, OSInterface &osInterface, CL_GL_SYNC_INFO &glSyncInfo) { auto &sharingFunctions = static_cast(sharing); glSyncInfo.hContextToBlock = static_cast(sharingFunctions.getGLContextHandle()); auto glDevice = static_cast(sharingFunctions.getGLDeviceHandle()); auto wddm = osInterface.get()->getWddm(); D3DKMT_CREATESYNCHRONIZATIONOBJECT serverSyncInitInfo = {0}; serverSyncInitInfo.hDevice = glDevice; serverSyncInitInfo.Info.Type = D3DDDI_SEMAPHORE; serverSyncInitInfo.Info.Semaphore.MaxCount = 32; serverSyncInitInfo.Info.Semaphore.InitialCount = 0; NTSTATUS serverSyncInitStatus = wddm->getGdi()->createSynchronizationObject(&serverSyncInitInfo); glSyncInfo.serverSynchronizationObject = serverSyncInitInfo.hSyncObject; glSyncInfo.eventName = createArbSyncEventName(); glSyncInfo.event = osInterface.get()->createEvent(NULL, TRUE, FALSE, glSyncInfo.eventName); D3DKMT_CREATESYNCHRONIZATIONOBJECT2 clientSyncInitInfo = {0}; clientSyncInitInfo.hDevice = glDevice; clientSyncInitInfo.Info.Type = D3DDDI_CPU_NOTIFICATION; clientSyncInitInfo.Info.CPUNotification.Event = glSyncInfo.event; NTSTATUS clientSyncInitStatus = wddm->getGdi()->createSynchronizationObject2(&clientSyncInitInfo); glSyncInfo.clientSynchronizationObject = clientSyncInitInfo.hSyncObject; D3DKMT_CREATESYNCHRONIZATIONOBJECT2 submissionSyncEventInfo = {0}; glSyncInfo.submissionEventName = createArbSyncEventName(); glSyncInfo.submissionEvent = osInterface.get()->createEvent(NULL, TRUE, FALSE, glSyncInfo.submissionEventName); submissionSyncEventInfo.hDevice = glDevice; submissionSyncEventInfo.Info.Type = D3DDDI_CPU_NOTIFICATION; submissionSyncEventInfo.Info.CPUNotification.Event = glSyncInfo.submissionEvent; auto submissionSyncInitStatus = wddm->getGdi()->createSynchronizationObject2(&submissionSyncEventInfo); glSyncInfo.submissionSynchronizationObject = submissionSyncEventInfo.hSyncObject; glSyncInfo.waitCalled = false; bool setupFailed = (glSyncInfo.event == nullptr) || (glSyncInfo.submissionEvent == nullptr) || (0 != serverSyncInitStatus) || (0 != clientSyncInitStatus) || (0 != submissionSyncInitStatus); if (setupFailed) { DEBUG_BREAK_IF(true); cleanupArbSyncObject(osInterface, &glSyncInfo); return false; } return true; } void signalArbSyncObject(OsContext &osContext, CL_GL_SYNC_INFO &glSyncInfo) { auto osContextWin = static_cast(&osContext); UNRECOVERABLE_IF(!osContextWin); auto wddm = osContextWin->getWddm(); D3DKMT_SIGNALSYNCHRONIZATIONOBJECT signalServerClientSyncInfo = {0}; signalServerClientSyncInfo.hContext = osContextWin->getWddmContextHandle(); signalServerClientSyncInfo.Flags.SignalAtSubmission = 0; // Wait for GPU to complete processing command buffer signalServerClientSyncInfo.ObjectHandleArray[0] = glSyncInfo.serverSynchronizationObject; signalServerClientSyncInfo.ObjectHandleArray[1] = glSyncInfo.clientSynchronizationObject; signalServerClientSyncInfo.ObjectCount = 2; NTSTATUS status = wddm->getGdi()->signalSynchronizationObject(&signalServerClientSyncInfo); if (0 != status) { DEBUG_BREAK_IF(true); return; } D3DKMT_SIGNALSYNCHRONIZATIONOBJECT signalSubmissionSyncInfo = {0}; signalSubmissionSyncInfo.hContext = osContextWin->getWddmContextHandle(); signalSubmissionSyncInfo.Flags.SignalAtSubmission = 1; // Don't wait for GPU to complete processing command buffer signalSubmissionSyncInfo.ObjectHandleArray[0] = glSyncInfo.submissionSynchronizationObject; signalSubmissionSyncInfo.ObjectCount = 1; status = wddm->getGdi()->signalSynchronizationObject(&signalSubmissionSyncInfo); DEBUG_BREAK_IF(0 != status); } void serverWaitForArbSyncObject(OSInterface &osInterface, CL_GL_SYNC_INFO &glSyncInfo) { auto wddm = osInterface.get()->getWddm(); D3DKMT_WAITFORSYNCHRONIZATIONOBJECT waitForSyncInfo = {0}; waitForSyncInfo.hContext = glSyncInfo.hContextToBlock; waitForSyncInfo.ObjectCount = 1; waitForSyncInfo.ObjectHandleArray[0] = glSyncInfo.serverSynchronizationObject; NTSTATUS status = wddm->getGdi()->waitForSynchronizationObject(&waitForSyncInfo); if (status != 0) { DEBUG_BREAK_IF(true); return; } glSyncInfo.waitCalled = true; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/os_interface/windows/gl/gl_options.cpp000066400000000000000000000002541363734646600275540ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include namespace Os { const char *openglDllName = "opengl32.dll"; } compute-runtime-20.13.16352/opencl/source/os_interface/windows/gl/gl_sharing_os.h000066400000000000000000000004171363734646600276630ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/windows/windows_wrapper.h" #include #define OSAPI WINAPI typedef uint32_t GLType; typedef HDC GLDisplay; typedef HGLRC GLContext; compute-runtime-20.13.16352/opencl/source/os_interface/windows/gl/gl_sharing_win.cpp000066400000000000000000000034661363734646600304010ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/os_interface/windows/windows_wrapper.h" #include "opencl/source/helpers/windows/gl_helper.h" #include "opencl/source/sharings/gl/gl_arb_sync_event.h" #include "opencl/source/sharings/gl/windows/gl_sharing_windows.h" #include #include #include namespace Os { extern const char *openglDllName; } namespace NEO { cl_int GLSharingFunctions::getSupportedFormats(cl_mem_flags flags, cl_mem_object_type imageType, size_t numEntries, cl_GLenum *formats, uint32_t *numImageFormats) { if (flags != CL_MEM_READ_ONLY && flags != CL_MEM_WRITE_ONLY && flags != CL_MEM_READ_WRITE && flags != CL_MEM_KERNEL_READ_AND_WRITE) { return CL_INVALID_VALUE; } if (imageType != CL_MEM_OBJECT_IMAGE1D && imageType != CL_MEM_OBJECT_IMAGE2D && imageType != CL_MEM_OBJECT_IMAGE3D && imageType != CL_MEM_OBJECT_IMAGE1D_ARRAY && imageType != CL_MEM_OBJECT_IMAGE1D_BUFFER) { return CL_INVALID_VALUE; } const auto formatsCount = GlSharing::gLToCLFormats.size(); if (numImageFormats != nullptr) { *numImageFormats = static_cast(formatsCount); } if (formats != nullptr && formatsCount > 0) { auto elementsToCopy = std::min(numEntries, formatsCount); uint32_t i = 0; for (auto &x : GlSharing::gLToCLFormats) { formats[i++] = x.first; if (i == elementsToCopy) { break; } } } return CL_SUCCESS; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/os_interface/windows/ocl_reg_path.cpp000066400000000000000000000003341363734646600274220ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/os_interface/ocl_reg_path.h" namespace NEO { const char *oclRegPath = "Software\\Intel\\IGFX\\OCL\\"; } compute-runtime-20.13.16352/opencl/source/os_interface/windows/os_metrics_library.cpp000066400000000000000000000035511363734646600306730ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/os_interface/metrics_library.h" namespace NEO { ////////////////////////////////////////////////////// // FUNCTION: MetricsLibrary::oaConfigurationActivate ////////////////////////////////////////////////////// bool MetricsLibrary::oaConfigurationActivate( const ConfigurationHandle_1_0 &handle) { ConfigurationActivateData_1_0 data = {}; data.Type = GpuConfigurationActivationType::EscapeCode; return api->functions.ConfigurationActivate( handle, &data) == StatusCode::Success; } ////////////////////////////////////////////////////// // FUNCTION: MetricsLibrary::oaConfigurationDeactivate ////////////////////////////////////////////////////// bool MetricsLibrary::oaConfigurationDeactivate( const ConfigurationHandle_1_0 &handle) { return api->functions.ConfigurationDeactivate( handle) == StatusCode::Success; } ////////////////////////////////////////////////////// // FUNCTION: MetricsLibrary::userConfigurationCreate ////////////////////////////////////////////////////// bool MetricsLibrary::userConfigurationCreate( const ContextHandle_1_0 &context, ConfigurationHandle_1_0 &handle) { ConfigurationCreateData_1_0 data = {}; data.HandleContext = context; data.Type = ObjectType::ConfigurationHwCountersUser; return api->functions.ConfigurationCreate( &data, &handle) == StatusCode::Success; } ////////////////////////////////////////////////////// // FUNCTION: MetricsLibrary::userConfigurationDelete ////////////////////////////////////////////////////// bool MetricsLibrary::userConfigurationDelete( const ConfigurationHandle_1_0 &handle) { return api->functions.ConfigurationDelete(handle) == StatusCode::Success; } } // namespace NEOcompute-runtime-20.13.16352/opencl/source/os_interface/windows/performance_counters_win.cpp000066400000000000000000000062701363734646600321010ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "performance_counters_win.h" #include "shared/source/device/device.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/os_interface/windows/os_interface.h" #include "shared/source/os_interface/windows/os_time_win.h" namespace NEO { ///////////////////////////////////////////////////// // PerformanceCounters::create ///////////////////////////////////////////////////// std::unique_ptr PerformanceCounters::create(Device *device) { auto counter = std::make_unique(); auto osInterface = device->getOSTime()->getOSInterface()->get(); auto gen = device->getHardwareInfo().platform.eRenderCoreFamily; auto &hwHelper = HwHelper::get(gen); UNRECOVERABLE_IF(counter == nullptr); counter->clientData.Windows.Adapter = reinterpret_cast(static_cast(osInterface->getAdapterHandle())); counter->clientData.Windows.Device = reinterpret_cast(static_cast(osInterface->getDeviceHandle())); counter->clientData.Windows.Device = reinterpret_cast(static_cast(osInterface->getDeviceHandle())); counter->clientData.Windows.Escape = osInterface->getEscapeHandle(); counter->clientData.Windows.KmdInstrumentationEnabled = device->getHardwareInfo().capabilityTable.instrumentationEnabled; counter->contextData.ClientData = &counter->clientData; counter->clientType.Gen = static_cast(hwHelper.getMetricsLibraryGenId()); return counter; } ////////////////////////////////////////////////////// // PerformanceCountersWin::enableCountersConfiguration ////////////////////////////////////////////////////// bool PerformanceCountersWin::enableCountersConfiguration() { // Release previous counters configuration so the user // can change configuration between kernels. releaseCountersConfiguration(); // Create mmio user configuration. if (!metricsLibrary->userConfigurationCreate( context, userConfiguration)) { DEBUG_BREAK_IF(true); return false; } // Create oa configuration. if (!metricsLibrary->oaConfigurationCreate( context, oaConfiguration)) { DEBUG_BREAK_IF(true); return false; } // Enable oa configuration. if (!metricsLibrary->oaConfigurationActivate( oaConfiguration)) { DEBUG_BREAK_IF(true); return false; } return true; } ////////////////////////////////////////////////////// // PerformanceCountersWin::releaseCountersConfiguration ////////////////////////////////////////////////////// void PerformanceCountersWin::releaseCountersConfiguration() { // Mmio user configuration. if (userConfiguration.IsValid()) { metricsLibrary->userConfigurationDelete(userConfiguration); userConfiguration.data = nullptr; } // Oa configuration. if (oaConfiguration.IsValid()) { metricsLibrary->oaConfigurationDeactivate(oaConfiguration); metricsLibrary->oaConfigurationDelete(oaConfiguration); oaConfiguration.data = nullptr; } } } // namespace NEO compute-runtime-20.13.16352/opencl/source/os_interface/windows/performance_counters_win.h000066400000000000000000000011571363734646600315450ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/os_interface/performance_counters.h" namespace NEO { class PerformanceCountersWin : virtual public PerformanceCounters { public: PerformanceCountersWin() = default; ~PerformanceCountersWin() override = default; ///////////////////////////////////////////////////// // Gpu oa/mmio configuration. ///////////////////////////////////////////////////// bool enableCountersConfiguration() override; void releaseCountersConfiguration() override; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/os_interface/windows/wddm_device_command_stream.h000066400000000000000000000025261363734646600317710ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/device_command_stream.h" struct COMMAND_BUFFER_HEADER_REC; namespace NEO { class GmmPageTableMngr; class GraphicsAllocation; class WddmMemoryManager; class Wddm; template class WddmCommandStreamReceiver : public DeviceCommandStreamReceiver { typedef DeviceCommandStreamReceiver BaseClass; public: WddmCommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex); virtual ~WddmCommandStreamReceiver(); bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override; void processResidency(const ResidencyContainer &allocationsForResidency, uint32_t handleId) override; void processEviction() override; bool waitForFlushStamp(FlushStamp &flushStampToWait) override; WddmMemoryManager *getMemoryManager() const; Wddm *peekWddm() const { return wddm; } GmmPageTableMngr *createPageTableManager() override; bool initDirectSubmission(Device &device, OsContext &osContext) override; protected: void kmDafLockAllocations(ResidencyContainer &allocationsForResidency); Wddm *wddm; COMMAND_BUFFER_HEADER_REC *commandBufferHeader; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/os_interface/windows/wddm_device_command_stream.inl000066400000000000000000000213131363734646600323170ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ // Need to suppress warining 4005 caused by hw_cmds.h and wddm.h order. // Current order must be preserved due to two versions of igfxfmid.h #pragma warning(push) #pragma warning(disable : 4005) #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/direct_submission/dispatchers/blitter_dispatcher.h" #include "shared/source/direct_submission/dispatchers/render_dispatcher.h" #include "shared/source/direct_submission/windows/wddm_direct_submission.h" #include "shared/source/gmm_helper/page_table_mngr.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/source/helpers/hw_cmds.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/windows/gmm_callbacks.h" #include "shared/source/os_interface/windows/wddm/wddm.h" #include "opencl/source/os_interface/windows/wddm_device_command_stream.h" #pragma warning(pop) #include "shared/source/os_interface/windows/gdi_interface.h" #include "shared/source/os_interface/windows/os_context_win.h" #include "shared/source/os_interface/windows/os_interface.h" #include "shared/source/os_interface/windows/wddm_memory_manager.h" namespace NEO { // Initialize COMMAND_BUFFER_HEADER Type PatchList Streamer Perf Tag DECLARE_COMMAND_BUFFER(CommandBufferHeader, UMD_OCL, FALSE, FALSE, PERFTAG_OCL); template WddmCommandStreamReceiver::WddmCommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) : BaseClass(executionEnvironment, rootDeviceIndex) { notifyAubCaptureImpl = DeviceCallbacks::notifyAubCapture; this->wddm = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->osInterface->get()->getWddm(); PreemptionMode preemptionMode = PreemptionHelper::getDefaultPreemptionMode(peekHwInfo()); commandBufferHeader = new COMMAND_BUFFER_HEADER; *commandBufferHeader = CommandBufferHeader; if (preemptionMode != PreemptionMode::Disabled) { commandBufferHeader->NeedsMidBatchPreEmptionSupport = true; } this->dispatchMode = DispatchMode::BatchedDispatch; if (DebugManager.flags.CsrDispatchMode.get()) { this->dispatchMode = (DispatchMode)DebugManager.flags.CsrDispatchMode.get(); } } template WddmCommandStreamReceiver::~WddmCommandStreamReceiver() { if (commandBufferHeader) delete commandBufferHeader; } template bool WddmCommandStreamReceiver::flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) { auto commandStreamAddress = ptrOffset(batchBuffer.commandBufferAllocation->getGpuAddress(), batchBuffer.startOffset); allocationsForResidency.push_back(batchBuffer.commandBufferAllocation); batchBuffer.commandBufferAllocation->updateResidencyTaskCount(this->taskCount, this->osContext->getContextId()); this->processResidency(allocationsForResidency, 0u); if (directSubmission.get()) { return directSubmission->dispatchCommandBuffer(batchBuffer, *(flushStamp.get())); } COMMAND_BUFFER_HEADER *pHeader = reinterpret_cast(commandBufferHeader); pHeader->RequiresCoherency = batchBuffer.requiresCoherency; pHeader->UmdRequestedSliceState = 0; pHeader->UmdRequestedEUCount = wddm->getRequestedEUCount(); const uint32_t maxRequestedSubsliceCount = 7; switch (batchBuffer.throttle) { case QueueThrottle::LOW: case QueueThrottle::MEDIUM: pHeader->UmdRequestedSubsliceCount = 0; break; case QueueThrottle::HIGH: pHeader->UmdRequestedSubsliceCount = (wddm->getGtSysInfo()->SubSliceCount <= maxRequestedSubsliceCount) ? wddm->getGtSysInfo()->SubSliceCount : 0; break; } if (wddm->isKmDafEnabled()) { this->kmDafLockAllocations(allocationsForResidency); } auto osContextWin = static_cast(osContext); WddmSubmitArguments submitArgs = {}; submitArgs.contextHandle = osContextWin->getWddmContextHandle(); submitArgs.hwQueueHandle = osContextWin->getHwQueue().handle; submitArgs.monitorFence = &osContextWin->getResidencyController().getMonitoredFence(); auto status = wddm->submit(commandStreamAddress, batchBuffer.usedSize - batchBuffer.startOffset, commandBufferHeader, submitArgs); flushStamp->setStamp(submitArgs.monitorFence->lastSubmittedFence); return status; } template void WddmCommandStreamReceiver::processResidency(const ResidencyContainer &allocationsForResidency, uint32_t handleId) { bool success = static_cast(osContext)->getResidencyController().makeResidentResidencyAllocations(allocationsForResidency); DEBUG_BREAK_IF(!success); } template void WddmCommandStreamReceiver::processEviction() { static_cast(osContext)->getResidencyController().makeNonResidentEvictionAllocations(this->getEvictionAllocations()); this->getEvictionAllocations().clear(); } template WddmMemoryManager *WddmCommandStreamReceiver::getMemoryManager() const { return static_cast(CommandStreamReceiver::getMemoryManager()); } template bool WddmCommandStreamReceiver::waitForFlushStamp(FlushStamp &flushStampToWait) { return wddm->waitFromCpu(flushStampToWait, static_cast(osContext)->getResidencyController().getMonitoredFence()); } template GmmPageTableMngr *WddmCommandStreamReceiver::createPageTableManager() { GMM_TRANSLATIONTABLE_CALLBACKS ttCallbacks = {}; ttCallbacks.pfWriteL3Adr = TTCallbacks::writeL3Address; auto rootDeviceEnvironment = executionEnvironment.rootDeviceEnvironments[this->rootDeviceIndex].get(); GmmPageTableMngr *gmmPageTableMngr = GmmPageTableMngr::create(rootDeviceEnvironment->getGmmClientContext(), TT_TYPE::AUXTT, &ttCallbacks); gmmPageTableMngr->setCsrHandle(this); rootDeviceEnvironment->pageTableManager.reset(gmmPageTableMngr); return gmmPageTableMngr; } template void WddmCommandStreamReceiver::kmDafLockAllocations(ResidencyContainer &allocationsForResidency) { for (auto &graphicsAllocation : allocationsForResidency) { if ((GraphicsAllocation::AllocationType::LINEAR_STREAM == graphicsAllocation->getAllocationType()) || (GraphicsAllocation::AllocationType::FILL_PATTERN == graphicsAllocation->getAllocationType()) || (GraphicsAllocation::AllocationType::COMMAND_BUFFER == graphicsAllocation->getAllocationType())) { wddm->kmDafLock(static_cast(graphicsAllocation)->getDefaultHandle()); } } } template bool WddmCommandStreamReceiver::initDirectSubmission(Device &device, OsContext &osContext) { bool ret = true; if (DebugManager.flags.EnableDirectSubmission.get() == 1) { auto contextEngineType = osContext.getEngineType(); const DirectSubmissionProperties &directSubmissionProperty = device.getHardwareInfo().capabilityTable.directSubmissionEngines.data[contextEngineType]; bool startDirect = true; if (!osContext.isDefaultContext()) { startDirect = directSubmissionProperty.useNonDefault; } if (osContext.isLowPriority()) { startDirect = directSubmissionProperty.useLowPriority; } if (osContext.isInternalEngine()) { startDirect = directSubmissionProperty.useInternal; } if (osContext.isRootDevice()) { startDirect = directSubmissionProperty.useRootDevice; } if (directSubmissionProperty.engineSupported && startDirect) { if (contextEngineType == ENGINE_TYPE_BCS) { directSubmission = std::make_unique>(device, std::make_unique>(), osContext); } else { directSubmission = std::make_unique>(device, std::make_unique>(), osContext); } ret = directSubmission->initialize(directSubmissionProperty.submitOnInit); this->dispatchMode = DispatchMode::ImmediateDispatch; } } return ret; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/platform/000077500000000000000000000000001363734646600217615ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/platform/CMakeLists.txt000066400000000000000000000010121363734646600245130ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_PLATFORM ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/extensions.cpp ${CMAKE_CURRENT_SOURCE_DIR}/extensions.h ${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp ${CMAKE_CURRENT_SOURCE_DIR}/platform.h ${CMAKE_CURRENT_SOURCE_DIR}/platform_info.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_PLATFORM}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_PLATFORM ${RUNTIME_SRCS_PLATFORM}) compute-runtime-20.13.16352/opencl/source/platform/extensions.cpp000066400000000000000000000066011363734646600246670ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/platform/extensions.h" #include "shared/source/helpers/hw_info.h" #include namespace NEO { const char *deviceExtensionsList = "cl_khr_byte_addressable_store " "cl_khr_fp16 " "cl_khr_global_int32_base_atomics " "cl_khr_global_int32_extended_atomics " "cl_khr_icd " "cl_khr_local_int32_base_atomics " "cl_khr_local_int32_extended_atomics " "cl_intel_subgroups " "cl_intel_required_subgroup_size " "cl_intel_subgroups_short " "cl_khr_spir " "cl_intel_accelerator " "cl_intel_driver_diagnostics " "cl_khr_priority_hints " "cl_khr_throttle_hints " "cl_khr_create_command_queue " "cl_intel_subgroups_char " "cl_intel_subgroups_long "; std::string getExtensionsList(const HardwareInfo &hwInfo) { std::string allExtensionsList; allExtensionsList.reserve(1000); allExtensionsList.append(deviceExtensionsList); if (hwInfo.capabilityTable.clVersionSupport >= 21) { allExtensionsList += "cl_khr_subgroups "; allExtensionsList += "cl_khr_il_program "; if (hwInfo.capabilityTable.supportsVme) { allExtensionsList += "cl_intel_spirv_device_side_avc_motion_estimation "; } if (hwInfo.capabilityTable.supportsImages) { allExtensionsList += "cl_intel_spirv_media_block_io "; } allExtensionsList += "cl_intel_spirv_subgroups "; allExtensionsList += "cl_khr_spirv_no_integer_wrap_decoration "; } if (hwInfo.capabilityTable.ftrSupportsFP64) { allExtensionsList += "cl_khr_fp64 "; } if (hwInfo.capabilityTable.ftrSupportsInteger64BitAtomics) { allExtensionsList += "cl_khr_int64_base_atomics "; allExtensionsList += "cl_khr_int64_extended_atomics "; } if (hwInfo.capabilityTable.supportsImages) { allExtensionsList += "cl_khr_3d_image_writes "; } if (hwInfo.capabilityTable.supportsVme) { allExtensionsList += "cl_intel_motion_estimation cl_intel_device_side_avc_motion_estimation "; } return allExtensionsList; } std::string removeLastSpace(std::string &processedString) { if (processedString.size() > 0) { if (*processedString.rbegin() == ' ') { processedString.pop_back(); } } return processedString; } std::string convertEnabledExtensionsToCompilerInternalOptions(const char *enabledExtensions) { std::string extensionsList = enabledExtensions; extensionsList.reserve(1000); removeLastSpace(extensionsList); std::string::size_type pos = 0; while ((pos = extensionsList.find(" ", pos)) != std::string::npos) { extensionsList.replace(pos, 1, ",+"); } extensionsList = " -cl-ext=-all,+" + extensionsList + ",+cl_khr_3d_image_writes "; return extensionsList; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/platform/extensions.h000066400000000000000000000010471363734646600243330ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/hw_info.h" #include namespace NEO { namespace Extensions { constexpr const char *const sharingFormatQuery = "cl_intel_sharing_format_query "; } extern const char *deviceExtensionsList; std::string getExtensionsList(const HardwareInfo &hwInfo); std::string removeLastSpace(std::string &s); std::string convertEnabledExtensionsToCompilerInternalOptions(const char *deviceExtensions); } // namespace NEO compute-runtime-20.13.16352/opencl/source/platform/platform.cpp000066400000000000000000000173241363734646600243200ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "platform.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/root_device.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/string.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/source_level_debugger/source_level_debugger.h" #include "opencl/source/api/api.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/event/async_events_handler.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/helpers/built_ins_helper.h" #include "opencl/source/helpers/get_info_status_mapper.h" #include "opencl/source/platform/extensions.h" #include "opencl/source/sharings/sharing_factory.h" #include "CL/cl_ext.h" #include "gmm_client_context.h" #include #include namespace NEO { std::vector> platformsImpl; Platform::Platform(ExecutionEnvironment &executionEnvironmentIn) : executionEnvironment(executionEnvironmentIn) { clDevices.reserve(4); setAsyncEventsHandler(std::unique_ptr(new AsyncEventsHandler())); executionEnvironment.incRefInternal(); } Platform::~Platform() { asyncEventsHandler->closeThread(); for (auto clDevice : this->clDevices) { clDevice->decRefInternal(); } gtpinNotifyPlatformShutdown(); executionEnvironment.decRefInternal(); } cl_int Platform::getInfo(cl_platform_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { auto retVal = CL_INVALID_VALUE; const std::string *param = nullptr; size_t paramSize = 0; uint64_t pVal = 0; switch (paramName) { case CL_PLATFORM_HOST_TIMER_RESOLUTION: pVal = static_cast(this->clDevices[0]->getPlatformHostTimerResolution()); paramSize = sizeof(uint64_t); retVal = changeGetInfoStatusToCLResultType(::getInfo(paramValue, paramValueSize, &pVal, paramSize)); break; case CL_PLATFORM_PROFILE: param = &platformInfo->profile; break; case CL_PLATFORM_VERSION: param = &platformInfo->version; break; case CL_PLATFORM_NAME: param = &platformInfo->name; break; case CL_PLATFORM_VENDOR: param = &platformInfo->vendor; break; case CL_PLATFORM_EXTENSIONS: param = &platformInfo->extensions; break; case CL_PLATFORM_ICD_SUFFIX_KHR: param = &platformInfo->icdSuffixKhr; break; default: break; } // Case for string parameters if (param) { paramSize = param->length() + 1; retVal = changeGetInfoStatusToCLResultType(::getInfo(paramValue, paramValueSize, param->c_str(), paramSize)); } if (paramValueSizeRet) { *paramValueSizeRet = paramSize; } return retVal; } bool Platform::initialize(std::vector> devices) { TakeOwnershipWrapper platformOwnership(*this); if (devices.empty()) { return false; } if (state == StateInited) { return true; } if (DebugManager.flags.LoopAtPlatformInitialize.get()) { while (DebugManager.flags.LoopAtPlatformInitialize.get()) this->initializationLoopHelper(); } state = StateIniting; DEBUG_BREAK_IF(this->platformInfo); this->platformInfo.reset(new PlatformInfo); for (auto &inputDevice : devices) { ClDevice *pClDevice = nullptr; auto pDevice = inputDevice.release(); UNRECOVERABLE_IF(!pDevice); pClDevice = new ClDevice{*pDevice, this}; this->clDevices.push_back(pClDevice); this->platformInfo->extensions = pClDevice->getDeviceInfo().deviceExtensions; switch (pClDevice->getEnabledClVersion()) { case 21: this->platformInfo->version = "OpenCL 2.1 "; break; case 20: this->platformInfo->version = "OpenCL 2.0 "; break; default: this->platformInfo->version = "OpenCL 1.2 "; break; } } for (auto &clDevice : clDevices) { auto hwInfo = clDevice->getHardwareInfo(); if (clDevice->getPreemptionMode() == PreemptionMode::MidThread || clDevice->isDebuggerActive()) { auto sipType = SipKernel::getSipKernelType(hwInfo.platform.eRenderCoreFamily, clDevice->isDebuggerActive()); initSipKernel(sipType, clDevice->getDevice()); } } this->fillGlobalDispatchTable(); DEBUG_BREAK_IF(DebugManager.flags.CreateMultipleSubDevices.get() > 1 && !this->clDevices[0]->getDefaultEngine().commandStreamReceiver->peekTimestampPacketWriteEnabled()); state = StateInited; return true; } void Platform::fillGlobalDispatchTable() { sharingFactory.fillGlobalDispatchTable(); } bool Platform::isInitialized() { TakeOwnershipWrapper platformOwnership(*this); bool ret = (this->state == StateInited); return ret; } ClDevice *Platform::getClDevice(size_t deviceOrdinal) { TakeOwnershipWrapper platformOwnership(*this); if (this->state != StateInited || deviceOrdinal >= clDevices.size()) { return nullptr; } auto pClDevice = clDevices[deviceOrdinal]; DEBUG_BREAK_IF(pClDevice == nullptr); return pClDevice; } size_t Platform::getNumDevices() const { TakeOwnershipWrapper platformOwnership(*this); if (this->state != StateInited) { return 0; } return clDevices.size(); } ClDevice **Platform::getClDevices() { TakeOwnershipWrapper platformOwnership(*this); if (this->state != StateInited) { return nullptr; } return clDevices.data(); } const PlatformInfo &Platform::getPlatformInfo() const { DEBUG_BREAK_IF(!platformInfo); return *platformInfo; } AsyncEventsHandler *Platform::getAsyncEventsHandler() { return asyncEventsHandler.get(); } std::unique_ptr Platform::setAsyncEventsHandler(std::unique_ptr handler) { asyncEventsHandler.swap(handler); return handler; } std::unique_ptr (*Platform::createFunc)(ExecutionEnvironment &) = [](ExecutionEnvironment &executionEnvironment) -> std::unique_ptr { return std::make_unique(executionEnvironment); }; std::vector Platform::groupDevices(DeviceVector devices) { std::map platformsMap; std::vector outDevices; for (auto &device : devices) { auto productFamily = device->getHardwareInfo().platform.eProductFamily; auto result = platformsMap.find(productFamily); if (result == platformsMap.end()) { platformsMap.insert({productFamily, platformsMap.size()}); outDevices.push_back(DeviceVector{}); } auto platformId = platformsMap[productFamily]; outDevices[platformId].push_back(std::move(device)); } std::sort(outDevices.begin(), outDevices.end(), [](DeviceVector &lhs, DeviceVector &rhs) -> bool { return lhs[0]->getHardwareInfo().platform.eProductFamily > rhs[0]->getHardwareInfo().platform.eProductFamily; }); return outDevices; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/platform/platform.h000066400000000000000000000044471363734646600237670ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/api/cl_types.h" #include "opencl/source/cl_device/cl_device_vector.h" #include "opencl/source/helpers/base_object.h" #include "platform_info.h" #include #include #include namespace NEO { class CompilerInterface; class Device; class AsyncEventsHandler; class ExecutionEnvironment; class GmmHelper; class GmmClientContext; struct HardwareInfo; template <> struct OpenCLObjectMapper<_cl_platform_id> { typedef class Platform DerivedType; }; using DeviceVector = std::vector>; class Platform : public BaseObject<_cl_platform_id> { public: static const cl_ulong objectMagic = 0x8873ACDEF2342133LL; Platform(ExecutionEnvironment &executionEnvironment); ~Platform() override; Platform(const Platform &) = delete; Platform &operator=(Platform const &) = delete; cl_int getInfo(cl_platform_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); MOCKABLE_VIRTUAL bool initialize(std::vector> devices); bool isInitialized(); size_t getNumDevices() const; ClDevice **getClDevices(); ClDevice *getClDevice(size_t deviceOrdinal); const PlatformInfo &getPlatformInfo() const; AsyncEventsHandler *getAsyncEventsHandler(); std::unique_ptr setAsyncEventsHandler(std::unique_ptr handler); ExecutionEnvironment *peekExecutionEnvironment() const { return &executionEnvironment; } static std::unique_ptr (*createFunc)(ExecutionEnvironment &executionEnvironment); static std::vector groupDevices(DeviceVector devices); protected: enum { StateNone, StateIniting, StateInited, }; cl_uint state = StateNone; void fillGlobalDispatchTable(); MOCKABLE_VIRTUAL void initializationLoopHelper(){}; std::unique_ptr platformInfo; ClDeviceVector clDevices; std::unique_ptr asyncEventsHandler; ExecutionEnvironment &executionEnvironment; }; extern std::vector> platformsImpl; } // namespace NEO compute-runtime-20.13.16352/opencl/source/platform/platform_info.h000066400000000000000000000006071363734646600247740ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include struct PlatformInfo { std::string profile = "FULL_PROFILE"; std::string version = ""; std::string name = "Intel(R) OpenCL HD Graphics"; std::string vendor = "Intel(R) Corporation"; std::string extensions; std::string icdSuffixKhr = "INTEL"; }; compute-runtime-20.13.16352/opencl/source/program/000077500000000000000000000000001363734646600216045ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/program/CMakeLists.txt000066400000000000000000000026411363734646600243470ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_PROGRAM ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/additional_options.cpp ${CMAKE_CURRENT_SOURCE_DIR}/block_kernel_manager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/block_kernel_manager.h ${CMAKE_CURRENT_SOURCE_DIR}/build.cpp ${CMAKE_CURRENT_SOURCE_DIR}/compile.cpp ${CMAKE_CURRENT_SOURCE_DIR}/create.cpp ${CMAKE_CURRENT_SOURCE_DIR}/create.inl ${CMAKE_CURRENT_SOURCE_DIR}/get_info.cpp ${CMAKE_CURRENT_SOURCE_DIR}/heap_info.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/internal_options.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_info.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel_info.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_info.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel_info_from_patchtokens.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_info_from_patchtokens.h ${CMAKE_CURRENT_SOURCE_DIR}/link.cpp ${CMAKE_CURRENT_SOURCE_DIR}/patch_info.h ${CMAKE_CURRENT_SOURCE_DIR}/printf_handler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/printf_handler.h ${CMAKE_CURRENT_SOURCE_DIR}/process_device_binary.cpp ${CMAKE_CURRENT_SOURCE_DIR}/process_intermediate_binary.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_PROGRAM}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_PROGRAM ${RUNTIME_SRCS_PROGRAM}) compute-runtime-20.13.16352/opencl/source/program/additional_options.cpp000066400000000000000000000003261363734646600261740ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/program/program.h" namespace NEO { void Program::applyAdditionalOptions() { } }; // namespace NEO compute-runtime-20.13.16352/opencl/source/program/block_kernel_manager.cpp000066400000000000000000000040771363734646600264440ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/program/block_kernel_manager.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/debug_helpers.h" #include "opencl/source/program/kernel_info.h" namespace NEO { void BlockKernelManager::addBlockKernelInfo(KernelInfo *blockKernelInfo) { blockKernelInfoArray.push_back(blockKernelInfo); blockUsesPrintf |= (blockKernelInfo->patchInfo.pAllocateStatelessPrintfSurface != nullptr); } const KernelInfo *BlockKernelManager::getBlockKernelInfo(size_t ordinal) { DEBUG_BREAK_IF(ordinal >= blockKernelInfoArray.size()); return blockKernelInfoArray[ordinal]; } BlockKernelManager::~BlockKernelManager() { for (auto &i : blockKernelInfoArray) delete i; } void BlockKernelManager::pushPrivateSurface(GraphicsAllocation *allocation, size_t ordinal) { if (blockPrivateSurfaceArray.size() < blockKernelInfoArray.size()) { blockPrivateSurfaceArray.resize(blockKernelInfoArray.size(), nullptr); } DEBUG_BREAK_IF(ordinal >= blockPrivateSurfaceArray.size()); blockPrivateSurfaceArray[ordinal] = allocation; } GraphicsAllocation *BlockKernelManager::getPrivateSurface(size_t ordinal) { // Ff queried ordinal is out of bound return nullptr, // this happens when no private surface was not pushed if (ordinal < blockPrivateSurfaceArray.size()) return blockPrivateSurfaceArray[ordinal]; return nullptr; } void BlockKernelManager::makeInternalAllocationsResident(CommandStreamReceiver &commandStreamReceiver) { auto blockCount = blockKernelInfoArray.size(); for (uint32_t surfaceIndex = 0; surfaceIndex < blockCount; surfaceIndex++) { auto surface = getPrivateSurface(surfaceIndex); if (surface) { commandStreamReceiver.makeResident(*surface); } surface = blockKernelInfoArray[surfaceIndex]->getGraphicsAllocation(); if (surface) { commandStreamReceiver.makeResident(*surface); } } } } // namespace NEO compute-runtime-20.13.16352/opencl/source/program/block_kernel_manager.h000066400000000000000000000017641363734646600261110ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/api/cl_types.h" #include namespace NEO { class GraphicsAllocation; class CommandStreamReceiver; struct KernelInfo; class BlockKernelManager { public: BlockKernelManager() = default; virtual ~BlockKernelManager(); void addBlockKernelInfo(KernelInfo *); const KernelInfo *getBlockKernelInfo(size_t ordinal); size_t getCount() const { return blockKernelInfoArray.size(); } bool getIfBlockUsesPrintf() const { return blockUsesPrintf; } void pushPrivateSurface(GraphicsAllocation *allocation, size_t ordinal); GraphicsAllocation *getPrivateSurface(size_t ordinal); void makeInternalAllocationsResident(CommandStreamReceiver &); protected: bool blockUsesPrintf = false; std::vector blockKernelInfoArray; std::vector blockPrivateSurfaceArray; }; } // namespace NEOcompute-runtime-20.13.16352/opencl/source/program/build.cpp000066400000000000000000000223371363734646600234160ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/device/device.h" #include "shared/source/device_binary_format/device_binary_formats.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/source_level_debugger/source_level_debugger.h" #include "shared/source/utilities/time_measure_wrapper.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/helpers/validators.h" #include "opencl/source/platform/platform.h" #include "opencl/source/program/kernel_info.h" #include "opencl/source/program/program.h" #include "opencl/source/utilities/logger.h" #include "compiler_options.h" #include #include #include namespace NEO { cl_int Program::build( cl_uint numDevices, const cl_device_id *deviceList, const char *buildOptions, void(CL_CALLBACK *funcNotify)(cl_program program, void *userData), void *userData, bool enableCaching) { cl_int retVal = CL_SUCCESS; do { if (((deviceList == nullptr) && (numDevices != 0)) || ((deviceList != nullptr) && (numDevices == 0))) { retVal = CL_INVALID_VALUE; break; } if ((funcNotify == nullptr) && (userData != nullptr)) { retVal = CL_INVALID_VALUE; break; } // if a device_list is specified, make sure it points to our device // NOTE: a null device_list is ok - it means "all devices" if (deviceList && validateObject(*deviceList) != CL_SUCCESS) { retVal = CL_INVALID_DEVICE; break; } // check to see if a previous build request is in progress if (buildStatus == CL_BUILD_IN_PROGRESS) { retVal = CL_INVALID_OPERATION; break; } if (isCreatedFromBinary == false) { buildStatus = CL_BUILD_IN_PROGRESS; options = (buildOptions) ? buildOptions : ""; extractInternalOptions(options); applyAdditionalOptions(); CompilerInterface *pCompilerInterface = pDevice->getCompilerInterface(); if (!pCompilerInterface) { retVal = CL_OUT_OF_HOST_MEMORY; break; } TranslationInput inputArgs = {IGC::CodeType::oclC, IGC::CodeType::oclGenBin}; if (createdFrom != CreatedFrom::SOURCE) { inputArgs.srcType = isSpirV ? IGC::CodeType::spirV : IGC::CodeType::llvmBc; inputArgs.src = ArrayRef(irBinary.get(), irBinarySize); } else { inputArgs.src = ArrayRef(sourceCode.c_str(), sourceCode.size()); } if (inputArgs.src.size() == 0) { retVal = CL_INVALID_PROGRAM; break; } if (isKernelDebugEnabled()) { std::string filename; appendKernelDebugOptions(); notifyDebuggerWithSourceCode(filename); if (!filename.empty()) { // Add "-s" flag first so it will be ignored by clang in case the options already have this flag set. options = std::string("-s ") + filename + " " + options; } } auto clDevice = this->pDevice->getSpecializedDevice(); UNRECOVERABLE_IF(clDevice == nullptr); auto compilerExtensionsOptions = clDevice->peekCompilerExtensions(); if (internalOptions.find(compilerExtensionsOptions) == std::string::npos) { CompilerOptions::concatenateAppend(internalOptions, compilerExtensionsOptions); } inputArgs.apiOptions = ArrayRef(options.c_str(), options.length()); inputArgs.internalOptions = ArrayRef(internalOptions.c_str(), internalOptions.length()); inputArgs.GTPinInput = gtpinGetIgcInit(); inputArgs.specializedValues = this->specConstantsValues; DBG_LOG(LogApiCalls, "Build Options", inputArgs.apiOptions.begin(), "\nBuild Internal Options", inputArgs.internalOptions.begin()); inputArgs.allowCaching = enableCaching; NEO::TranslationOutput compilerOuput = {}; auto compilerErr = pCompilerInterface->build(*this->pDevice, inputArgs, compilerOuput); this->updateBuildLog(this->pDevice, compilerOuput.frontendCompilerLog.c_str(), compilerOuput.frontendCompilerLog.size()); this->updateBuildLog(this->pDevice, compilerOuput.backendCompilerLog.c_str(), compilerOuput.backendCompilerLog.size()); retVal = asClError(compilerErr); if (retVal != CL_SUCCESS) { break; } if (inputArgs.srcType == IGC::CodeType::oclC) { this->irBinary = std::move(compilerOuput.intermediateRepresentation.mem); this->irBinarySize = compilerOuput.intermediateRepresentation.size; this->isSpirV = compilerOuput.intermediateCodeType == IGC::CodeType::spirV; } this->replaceDeviceBinary(std::move(compilerOuput.deviceBinary.mem), compilerOuput.deviceBinary.size); this->debugData = std::move(compilerOuput.debugData.mem); this->debugDataSize = compilerOuput.debugData.size; } updateNonUniformFlag(); if (DebugManager.flags.PrintProgramBinaryProcessingTime.get()) { retVal = TimeMeasureWrapper::functionExecution(*this, &Program::processGenBinary); } else { retVal = processGenBinary(); } if (retVal != CL_SUCCESS) { break; } if (isKernelDebugEnabled()) { processDebugData(); auto clDevice = this->pDevice->getSpecializedDevice(); UNRECOVERABLE_IF(clDevice == nullptr); if (clDevice->getSourceLevelDebugger()) { for (auto kernelInfo : kernelInfoArray) { clDevice->getSourceLevelDebugger()->notifyKernelDebugData(&kernelInfo->debugData, kernelInfo->name, kernelInfo->heapInfo.pKernelHeap, kernelInfo->heapInfo.pKernelHeader->KernelHeapSize); } } } separateBlockKernels(); } while (false); if (retVal != CL_SUCCESS) { buildStatus = CL_BUILD_ERROR; programBinaryType = CL_PROGRAM_BINARY_TYPE_NONE; } else { buildStatus = CL_BUILD_SUCCESS; programBinaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE; } if (funcNotify != nullptr) { (*funcNotify)(this, userData); } return retVal; } bool Program::appendKernelDebugOptions() { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::debugKernelEnable); CompilerOptions::concatenateAppend(options, CompilerOptions::generateDebugInfo); auto clDevice = this->pDevice->getSpecializedDevice(); UNRECOVERABLE_IF(clDevice == nullptr); auto debugger = clDevice->getSourceLevelDebugger(); if (debugger && debugger->isOptimizationDisabled()) { CompilerOptions::concatenateAppend(options, CompilerOptions::optDisable); } return true; } void Program::notifyDebuggerWithSourceCode(std::string &filename) { auto clDevice = this->pDevice->getSpecializedDevice(); UNRECOVERABLE_IF(clDevice == nullptr); if (clDevice->getSourceLevelDebugger()) { clDevice->getSourceLevelDebugger()->notifySourceCode(sourceCode.c_str(), sourceCode.size(), filename); } } cl_int Program::build(const Device *pDevice, const char *buildOptions, bool enableCaching, std::unordered_map &builtinsMap) { cl_device_id deviceId = pDevice->getSpecializedDevice(); auto ret = this->build(1, &deviceId, buildOptions, nullptr, nullptr, enableCaching); if (ret != CL_SUCCESS) { return ret; } for (auto &ki : this->kernelInfoArray) { auto fit = builtinsMap.find(ki->name); if (fit == builtinsMap.end()) { continue; } ki->builtinDispatchBuilder = fit->second; } return ret; } void Program::extractInternalOptions(const std::string &options) { auto tokenized = CompilerOptions::tokenize(options); for (auto &optionString : internalOptionsToExtract) { auto element = std::find(tokenized.begin(), tokenized.end(), optionString); if (element == tokenized.end()) { continue; } if (isFlagOption(optionString)) { CompilerOptions::concatenateAppend(internalOptions, optionString); } else if ((element + 1 != tokenized.end()) && isOptionValueValid(optionString, *(element + 1))) { CompilerOptions::concatenateAppend(internalOptions, optionString); CompilerOptions::concatenateAppend(internalOptions, *(element + 1)); } } } } // namespace NEO compute-runtime-20.13.16352/opencl/source/program/compile.cpp000066400000000000000000000150001363734646600237340ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/device/device.h" #include "shared/source/device_binary_format/elf/elf.h" #include "shared/source/device_binary_format/elf/elf_encoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/source_level_debugger/source_level_debugger.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/helpers/validators.h" #include "opencl/source/platform/platform.h" #include "compiler_options.h" #include "program.h" #include namespace NEO { cl_int Program::compile( cl_uint numDevices, const cl_device_id *deviceList, const char *buildOptions, cl_uint numInputHeaders, const cl_program *inputHeaders, const char **headerIncludeNames, void(CL_CALLBACK *funcNotify)(cl_program program, void *userData), void *userData) { cl_int retVal = CL_SUCCESS; do { if (((deviceList == nullptr) && (numDevices != 0)) || ((deviceList != nullptr) && (numDevices == 0))) { retVal = CL_INVALID_VALUE; break; } if (numInputHeaders == 0) { if ((headerIncludeNames != nullptr) || (inputHeaders != nullptr)) { retVal = CL_INVALID_VALUE; break; } } else { if ((headerIncludeNames == nullptr) || (inputHeaders == nullptr)) { retVal = CL_INVALID_VALUE; break; } } if ((funcNotify == nullptr) && (userData != nullptr)) { retVal = CL_INVALID_VALUE; break; } // if a device_list is specified, make sure it points to our device // NOTE: a null device_list is ok - it means "all devices" if ((deviceList != nullptr) && validateObject(*deviceList) != CL_SUCCESS) { retVal = CL_INVALID_DEVICE; break; } if (buildStatus == CL_BUILD_IN_PROGRESS) { retVal = CL_INVALID_OPERATION; break; } if ((createdFrom == CreatedFrom::IL) || (this->programBinaryType == CL_PROGRAM_BINARY_TYPE_INTERMEDIATE)) { retVal = CL_SUCCESS; break; } buildStatus = CL_BUILD_IN_PROGRESS; options = (buildOptions != nullptr) ? buildOptions : ""; for (const auto optionString : {CompilerOptions::gtpinRera, CompilerOptions::greaterThan4gbBuffersRequired}) { size_t pos = options.find(optionString); if (pos != std::string::npos) { options.erase(pos, optionString.length()); CompilerOptions::concatenateAppend(internalOptions, optionString); } } // create ELF writer to process all sources to be compiled NEO::Elf::ElfEncoder<> elfEncoder(true, true, 1U); elfEncoder.getElfFileHeader().type = NEO::Elf::ET_OPENCL_SOURCE; elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_SOURCE, "CLMain", sourceCode); for (cl_uint i = 0; i < numInputHeaders; i++) { auto program = inputHeaders[i]; if (program == nullptr) { retVal = CL_INVALID_PROGRAM; break; } auto pHeaderProgObj = castToObject(program); if (pHeaderProgObj == nullptr) { retVal = CL_INVALID_PROGRAM; break; } std::string includeHeaderSource; retVal = pHeaderProgObj->getSource(includeHeaderSource); if (retVal != CL_SUCCESS) { break; } elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_HEADER, ConstStringRef(headerIncludeNames[i], strlen(headerIncludeNames[i])), includeHeaderSource); } if (retVal != CL_SUCCESS) { break; } std::vector compileData = elfEncoder.encode(); CompilerInterface *pCompilerInterface = pDevice->getCompilerInterface(); if (!pCompilerInterface) { retVal = CL_OUT_OF_HOST_MEMORY; break; } TranslationInput inputArgs = {IGC::CodeType::elf, IGC::CodeType::undefined}; // set parameters for compilation auto clDevice = this->pDevice->getSpecializedDevice(); UNRECOVERABLE_IF(clDevice == nullptr); auto compilerExtensionsOptions = clDevice->peekCompilerExtensions(); CompilerOptions::concatenateAppend(internalOptions, compilerExtensionsOptions); if (isKernelDebugEnabled()) { std::string filename; appendKernelDebugOptions(); notifyDebuggerWithSourceCode(filename); if (!filename.empty()) { options = std::string("-s ") + filename + " " + options; } } inputArgs.src = ArrayRef(reinterpret_cast(compileData.data()), compileData.size()); inputArgs.apiOptions = ArrayRef(options.c_str(), options.length()); inputArgs.internalOptions = ArrayRef(internalOptions.c_str(), internalOptions.length()); TranslationOutput compilerOuput; auto compilerErr = pCompilerInterface->compile(*this->pDevice, inputArgs, compilerOuput); this->updateBuildLog(this->pDevice, compilerOuput.frontendCompilerLog.c_str(), compilerOuput.frontendCompilerLog.size()); this->updateBuildLog(this->pDevice, compilerOuput.backendCompilerLog.c_str(), compilerOuput.backendCompilerLog.size()); retVal = asClError(compilerErr); if (retVal != CL_SUCCESS) { break; } this->irBinary = std::move(compilerOuput.intermediateRepresentation.mem); this->irBinarySize = compilerOuput.intermediateRepresentation.size; this->isSpirV = compilerOuput.intermediateCodeType == IGC::CodeType::spirV; this->debugData = std::move(compilerOuput.debugData.mem); this->debugDataSize = compilerOuput.debugData.size; updateNonUniformFlag(); } while (false); if (retVal != CL_SUCCESS) { buildStatus = CL_BUILD_ERROR; programBinaryType = CL_PROGRAM_BINARY_TYPE_NONE; } else { buildStatus = CL_BUILD_SUCCESS; programBinaryType = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; } internalOptions.clear(); if (funcNotify != nullptr) { (*funcNotify)(this, userData); } return retVal; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/program/create.cpp000066400000000000000000000017041363734646600235550ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/program/create.inl" #include "opencl/source/program/program.h" namespace NEO { template Program *Program::create(cl_context, cl_uint, const cl_device_id *, const size_t *, const unsigned char **, cl_int *, cl_int &); template Program *Program::create(cl_context, cl_uint, const char **, const size_t *, cl_int &); template Program *Program::create(const char *, Context *, ClDevice &, bool, cl_int *); template Program *Program::create(const char *, Context *, Device &, bool, cl_int *); template Program *Program::createFromIL(Context *, const void *, size_t length, cl_int &); template Program *Program::createFromGenBinary(ExecutionEnvironment &executionEnvironment, Context *context, const void *binary, size_t size, bool isBuiltIn, cl_int *errcodeRet, Device *device); } // namespace NEO compute-runtime-20.13.16352/opencl/source/program/create.inl000066400000000000000000000121161363734646600235540ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" #include "shared/source/memory_manager/memory_constants.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/string_helpers.h" #include "opencl/source/platform/platform.h" #include "opencl/source/program/program.h" #include "compiler_options.h" namespace NEO { template T *Program::create( cl_context context, cl_uint numDevices, const cl_device_id *deviceList, const size_t *lengths, const unsigned char **binaries, cl_int *binaryStatus, cl_int &errcodeRet) { auto pContext = castToObject(context); DEBUG_BREAK_IF(!pContext); auto program = new T(*pContext->getDevice(0)->getExecutionEnvironment(), pContext, false, &pContext->getDevice(0)->getDevice()); auto retVal = program->createProgramFromBinary(binaries[0], lengths[0]); program->createdFrom = CreatedFrom::BINARY; if (binaryStatus) { DEBUG_BREAK_IF(retVal != CL_SUCCESS); *binaryStatus = CL_SUCCESS; } if (retVal != CL_SUCCESS) { delete program; program = nullptr; } errcodeRet = retVal; return program; } template T *Program::create( cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int &errcodeRet) { std::string combinedString; size_t combinedStringSize = 0; T *program = nullptr; auto pContext = castToObject(context); DEBUG_BREAK_IF(!pContext); auto retVal = createCombinedString( combinedString, combinedStringSize, count, strings, lengths); if (CL_SUCCESS == retVal) { program = new T(*pContext->getDevice(0)->getExecutionEnvironment(), pContext, false, &pContext->getDevice(0)->getDevice()); program->sourceCode.swap(combinedString); program->createdFrom = CreatedFrom::SOURCE; } errcodeRet = retVal; return program; } template T *Program::create( const char *nullTerminatedString, Context *context, ClDevice &device, bool isBuiltIn, cl_int *errcodeRet) { cl_int retVal = CL_SUCCESS; T *program = nullptr; if (nullTerminatedString == nullptr) { retVal = CL_INVALID_VALUE; } if (retVal == CL_SUCCESS) { program = new T(*device.getExecutionEnvironment()); program->sourceCode = nullTerminatedString; program->createdFrom = CreatedFrom::SOURCE; program->context = context; program->isBuiltIn = isBuiltIn; if (program->context && !program->isBuiltIn) { program->context->incRefInternal(); } program->pDevice = &device.getDevice(); program->numDevices = 1; if (is32bit || DebugManager.flags.DisableStatelessToStatefulOptimization.get() || device.areSharedSystemAllocationsAllowed()) { CompilerOptions::concatenateAppend(program->internalOptions, CompilerOptions::greaterThan4gbBuffersRequired); } } if (errcodeRet) { *errcodeRet = retVal; } return program; } template T *Program::create( const char *nullTerminatedString, Context *context, Device &device, bool isBuiltIn, cl_int *errcodeRet) { return Program::create(nullTerminatedString, context, *device.getSpecializedDevice(), isBuiltIn, errcodeRet); } template T *Program::createFromGenBinary( ExecutionEnvironment &executionEnvironment, Context *context, const void *binary, size_t size, bool isBuiltIn, cl_int *errcodeRet, Device *device) { cl_int retVal = CL_SUCCESS; T *program = nullptr; if ((binary == nullptr) || (size == 0)) { retVal = CL_INVALID_VALUE; } if (CL_SUCCESS == retVal) { program = new T(executionEnvironment, context, isBuiltIn, device); program->numDevices = 1; program->replaceDeviceBinary(makeCopy(binary, size), size); program->isCreatedFromBinary = true; program->programBinaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE; program->buildStatus = CL_BUILD_SUCCESS; program->createdFrom = CreatedFrom::BINARY; } if (errcodeRet) { *errcodeRet = retVal; } return program; } template T *Program::createFromIL(Context *ctx, const void *il, size_t length, cl_int &errcodeRet) { errcodeRet = CL_SUCCESS; if ((il == nullptr) || (length == 0)) { errcodeRet = CL_INVALID_BINARY; return nullptr; } T *program = new T(*ctx->getDevice(0)->getExecutionEnvironment(), ctx, false, &ctx->getDevice(0)->getDevice()); errcodeRet = program->createProgramFromBinary(il, length); program->createdFrom = CreatedFrom::IL; if (errcodeRet != CL_SUCCESS) { delete program; program = nullptr; } return program; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/program/get_info.cpp000066400000000000000000000136471363734646600241150ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/get_info.h" #include "shared/source/device/device.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/helpers/get_info_status_mapper.h" #include "opencl/source/helpers/validators.h" #include "opencl/source/program/kernel_info.h" #include "program.h" namespace NEO { cl_int Program::getInfo(cl_program_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { cl_int retVal = CL_SUCCESS; const void *pSrc = nullptr; size_t srcSize = 0; size_t retSize = 0; std::string kernelNamesString; cl_device_id device_id = pDevice->getSpecializedDevice(); cl_uint refCount = 0; size_t numKernels; cl_context clContext = context; switch (paramName) { case CL_PROGRAM_CONTEXT: pSrc = &clContext; retSize = srcSize = sizeof(clContext); break; case CL_PROGRAM_BINARIES: packDeviceBinary(); pSrc = packedDeviceBinary.get(); retSize = sizeof(void **); srcSize = packedDeviceBinarySize; if (paramValue != nullptr) { if (paramValueSize < retSize) { retVal = CL_INVALID_VALUE; break; } paramValueSize = srcSize; paramValue = *(void **)paramValue; } break; case CL_PROGRAM_BINARY_SIZES: packDeviceBinary(); pSrc = &packedDeviceBinarySize; retSize = srcSize = sizeof(size_t *); break; case CL_PROGRAM_KERNEL_NAMES: kernelNamesString = concatenateKernelNames(kernelInfoArray); pSrc = kernelNamesString.c_str(); retSize = srcSize = kernelNamesString.length() + 1; if (buildStatus != CL_BUILD_SUCCESS) { retVal = CL_INVALID_PROGRAM_EXECUTABLE; } break; case CL_PROGRAM_NUM_KERNELS: numKernels = kernelInfoArray.size(); pSrc = &numKernels; retSize = srcSize = sizeof(numKernels); if (buildStatus != CL_BUILD_SUCCESS) { retVal = CL_INVALID_PROGRAM_EXECUTABLE; } break; case CL_PROGRAM_NUM_DEVICES: pSrc = &numDevices; retSize = srcSize = sizeof(cl_uint); break; case CL_PROGRAM_DEVICES: pSrc = &device_id; retSize = srcSize = sizeof(cl_device_id); break; case CL_PROGRAM_REFERENCE_COUNT: refCount = static_cast(this->getReference()); retSize = srcSize = sizeof(refCount); pSrc = &refCount; break; case CL_PROGRAM_SOURCE: if (createdFrom == CreatedFrom::SOURCE) { pSrc = sourceCode.c_str(); retSize = srcSize = strlen(sourceCode.c_str()) + 1; } else { if (paramValueSizeRet) { *paramValueSizeRet = 0; } return CL_SUCCESS; } break; case CL_PROGRAM_IL: if (createdFrom != CreatedFrom::IL) { if (paramValueSizeRet) { *paramValueSizeRet = 0; } return CL_SUCCESS; } pSrc = irBinary.get(); retSize = srcSize = irBinarySize; break; case CL_PROGRAM_DEBUG_INFO_SIZES_INTEL: retSize = srcSize = sizeof(debugDataSize); pSrc = &debugDataSize; break; case CL_PROGRAM_DEBUG_INFO_INTEL: pSrc = debugData.get(); retSize = numDevices * sizeof(void **); srcSize = debugDataSize; if (paramValue != nullptr) { if (paramValueSize < retSize) { retVal = CL_INVALID_VALUE; break; } paramValueSize = srcSize; paramValue = *(void **)paramValue; } break; default: retVal = CL_INVALID_VALUE; break; } retVal = (retVal == CL_SUCCESS) ? changeGetInfoStatusToCLResultType(::getInfo(paramValue, paramValueSize, pSrc, srcSize)) : retVal; if (paramValueSizeRet) { *paramValueSizeRet = retSize; } return retVal; } cl_int Program::getBuildInfo(cl_device_id device, cl_program_build_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { cl_int retVal = CL_SUCCESS; const void *pSrc = nullptr; size_t srcSize = 0; size_t retSize = 0; cl_device_id device_id = pDevice->getSpecializedDevice(); if (device != device_id) { return CL_INVALID_DEVICE; } auto pClDev = castToObject(device); switch (paramName) { case CL_PROGRAM_BUILD_STATUS: srcSize = retSize = sizeof(cl_build_status); pSrc = &buildStatus; break; case CL_PROGRAM_BUILD_OPTIONS: srcSize = retSize = strlen(options.c_str()) + 1; pSrc = options.c_str(); break; case CL_PROGRAM_BUILD_LOG: { const char *pBuildLog = getBuildLog(&pClDev->getDevice()); if (pBuildLog != nullptr) { pSrc = pBuildLog; srcSize = retSize = strlen(pBuildLog) + 1; } else { pSrc = ""; srcSize = retSize = 1; } } break; case CL_PROGRAM_BINARY_TYPE: srcSize = retSize = sizeof(cl_program_binary_type); pSrc = &programBinaryType; break; case CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE: pSrc = &globalVarTotalSize; retSize = srcSize = sizeof(size_t); break; default: retVal = CL_INVALID_VALUE; break; } retVal = (retVal == CL_SUCCESS) ? changeGetInfoStatusToCLResultType(::getInfo(paramValue, paramValueSize, pSrc, srcSize)) : retVal; if (paramValueSizeRet) { *paramValueSizeRet = retSize; } return retVal; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/program/heap_info.h000066400000000000000000000006241363734646600237070ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "patch_info.h" #include namespace NEO { struct HeapInfo { const SKernelBinaryHeaderCommon *pKernelHeader = nullptr; const void *pKernelHeap = nullptr; const void *pGsh = nullptr; const void *pDsh = nullptr; const void *pSsh = nullptr; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/program/internal_options.cpp000066400000000000000000000011471363734646600257020ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/program/program.h" #include "compiler_options.h" #include namespace NEO { const std::vector Program::internalOptionsToExtract = {CompilerOptions::gtpinRera, CompilerOptions::greaterThan4gbBuffersRequired}; bool Program::isFlagOption(ConstStringRef option) { return true; } bool Program::isOptionValueValid(ConstStringRef option, ConstStringRef value) { return false; } }; // namespace NEO compute-runtime-20.13.16352/opencl/source/program/kernel_arg_info.h000066400000000000000000000051511363734646600251030ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/compiler_interface/compiler_options/compiler_options_base.h" #include "shared/source/kernel/kernel_arg_descriptor.h" #include "shared/source/utilities/const_stringref.h" #include #include #include #include namespace NEO { struct KernelArgPatchInfo { uint32_t crossthreadOffset = 0; uint32_t size = 0; uint32_t sourceOffset = 0; }; struct KernelArgInfo { KernelArgInfo() { this->metadataExtended = std::make_unique(); } ~KernelArgInfo() = default; KernelArgInfo(const KernelArgInfo &rhs) = delete; KernelArgInfo &operator=(const KernelArgInfo &) = delete; KernelArgInfo(KernelArgInfo &&) = default; KernelArgInfo &operator=(KernelArgInfo &&) = default; static constexpr uint32_t undefinedOffset = (uint32_t)-1; ArgTypeTraits metadata; std::unique_ptr metadataExtended; uint32_t slmAlignment = 0; bool isImage = false; bool isMediaImage = false; bool isMediaBlockImage = false; bool isSampler = false; bool isAccelerator = false; bool isDeviceQueue = false; bool isBuffer = false; bool pureStatefulBufferAccess = false; bool isReadOnly = false; bool needPatch = false; bool isTransformable = false; uint32_t offsetHeap = 0; std::vector kernelArgPatchInfoVector; uint32_t samplerArgumentType = 0; uint32_t offsetImgWidth = undefinedOffset; uint32_t offsetImgHeight = undefinedOffset; uint32_t offsetImgDepth = undefinedOffset; uint32_t offsetChannelDataType = undefinedOffset; uint32_t offsetChannelOrder = undefinedOffset; uint32_t offsetArraySize = undefinedOffset; uint32_t offsetNumSamples = undefinedOffset; uint32_t offsetSamplerSnapWa = undefinedOffset; uint32_t offsetSamplerAddressingMode = undefinedOffset; uint32_t offsetSamplerNormalizedCoords = undefinedOffset; uint32_t offsetVmeMbBlockType = undefinedOffset; uint32_t offsetVmeSubpixelMode = undefinedOffset; uint32_t offsetVmeSadAdjustMode = undefinedOffset; uint32_t offsetVmeSearchPathType = undefinedOffset; uint32_t offsetObjectId = undefinedOffset; uint32_t offsetBufferOffset = undefinedOffset; uint32_t offsetNumMipLevels = undefinedOffset; uint32_t offsetFlatBaseOffset = undefinedOffset; uint32_t offsetFlatWidth = undefinedOffset; uint32_t offsetFlatHeight = undefinedOffset; uint32_t offsetFlatPitch = undefinedOffset; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/program/kernel_info.cpp000066400000000000000000000461351363734646600246140ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device_binary_format/patchtokens_decoder.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/hw_cmds.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/sampler/sampler.h" #include #include #include #include #include namespace NEO { bool useKernelDescriptor = false; struct KernelArgumentType { const char *argTypeQualifier; uint64_t argTypeQualifierValue; }; std::map typeSizeMap = { {"char", sizeof(cl_char)}, {"char2", sizeof(cl_char2)}, {"char3", sizeof(cl_char3)}, {"char4", sizeof(cl_char4)}, {"char8", sizeof(cl_char8)}, {"char16", sizeof(cl_char16)}, {"uchar", sizeof(cl_uchar)}, {"uchar2", sizeof(cl_uchar2)}, {"uchar3", sizeof(cl_uchar3)}, {"uchar4", sizeof(cl_uchar4)}, {"uchar8", sizeof(cl_uchar8)}, {"uchar16", sizeof(cl_uchar16)}, {"short", sizeof(cl_short)}, {"short2", sizeof(cl_short2)}, {"short3", sizeof(cl_short3)}, {"short4", sizeof(cl_short4)}, {"short8", sizeof(cl_short8)}, {"short16", sizeof(cl_short16)}, {"ushort", sizeof(cl_ushort)}, {"ushort2", sizeof(cl_ushort2)}, {"ushort3", sizeof(cl_ushort3)}, {"ushort4", sizeof(cl_ushort4)}, {"ushort8", sizeof(cl_ushort8)}, {"ushort16", sizeof(cl_ushort16)}, {"int", sizeof(cl_int)}, {"int2", sizeof(cl_int2)}, {"int3", sizeof(cl_int3)}, {"int4", sizeof(cl_int4)}, {"int8", sizeof(cl_int8)}, {"int16", sizeof(cl_int16)}, {"uint", sizeof(cl_uint)}, {"uint2", sizeof(cl_uint2)}, {"uint3", sizeof(cl_uint3)}, {"uint4", sizeof(cl_uint4)}, {"uint8", sizeof(cl_uint8)}, {"uint16", sizeof(cl_uint16)}, {"long", sizeof(cl_long)}, {"long2", sizeof(cl_long2)}, {"long3", sizeof(cl_long3)}, {"long4", sizeof(cl_long4)}, {"long8", sizeof(cl_long8)}, {"long16", sizeof(cl_long16)}, {"ulong", sizeof(cl_ulong)}, {"ulong2", sizeof(cl_ulong2)}, {"ulong3", sizeof(cl_ulong3)}, {"ulong4", sizeof(cl_ulong4)}, {"ulong8", sizeof(cl_ulong8)}, {"ulong16", sizeof(cl_ulong16)}, {"half", sizeof(cl_half)}, {"float", sizeof(cl_float)}, {"float2", sizeof(cl_float2)}, {"float3", sizeof(cl_float3)}, {"float4", sizeof(cl_float4)}, {"float8", sizeof(cl_float8)}, {"float16", sizeof(cl_float16)}, #ifdef cl_khr_fp16 {"half2", sizeof(cl_half2)}, {"half3", sizeof(cl_half3)}, {"half4", sizeof(cl_half4)}, {"half8", sizeof(cl_half8)}, {"half16", sizeof(cl_half16)}, #endif {"double", sizeof(cl_double)}, {"double2", sizeof(cl_double2)}, {"double3", sizeof(cl_double3)}, {"double4", sizeof(cl_double4)}, {"double8", sizeof(cl_double8)}, {"double16", sizeof(cl_double16)}, }; WorkSizeInfo::WorkSizeInfo(uint32_t maxWorkGroupSize, bool hasBarriers, uint32_t simdSize, uint32_t slmTotalSize, GFXCORE_FAMILY coreFamily, uint32_t numThreadsPerSubSlice, uint32_t localMemSize, bool imgUsed, bool yTiledSurface) { this->maxWorkGroupSize = maxWorkGroupSize; this->hasBarriers = hasBarriers; this->simdSize = simdSize; this->slmTotalSize = slmTotalSize; this->coreFamily = coreFamily; this->numThreadsPerSubSlice = numThreadsPerSubSlice; this->localMemSize = localMemSize; this->imgUsed = imgUsed; this->yTiledSurfaces = yTiledSurface; setMinWorkGroupSize(); } WorkSizeInfo::WorkSizeInfo(const DispatchInfo &dispatchInfo) { this->maxWorkGroupSize = dispatchInfo.getKernel()->maxKernelWorkGroupSize; auto pExecutionEnvironment = dispatchInfo.getKernel()->getKernelInfo().patchInfo.executionEnvironment; this->hasBarriers = (pExecutionEnvironment != nullptr) && (pExecutionEnvironment->HasBarriers); this->simdSize = (uint32_t)dispatchInfo.getKernel()->getKernelInfo().getMaxSimdSize(); this->slmTotalSize = (uint32_t)dispatchInfo.getKernel()->slmTotalSize; this->coreFamily = dispatchInfo.getKernel()->getDevice().getHardwareInfo().platform.eRenderCoreFamily; this->numThreadsPerSubSlice = (uint32_t)dispatchInfo.getKernel()->getDevice().getSharedDeviceInfo().maxNumEUsPerSubSlice * dispatchInfo.getKernel()->getDevice().getSharedDeviceInfo().numThreadsPerEU; this->localMemSize = (uint32_t)dispatchInfo.getKernel()->getDevice().getSharedDeviceInfo().localMemSize; setIfUseImg(dispatchInfo.getKernel()); setMinWorkGroupSize(); } void WorkSizeInfo::setIfUseImg(Kernel *pKernel) { auto ParamsCount = pKernel->getKernelArgsNumber(); for (auto i = 0u; i < ParamsCount; i++) { if (pKernel->getKernelInfo().kernelArgInfo[i].isImage) { imgUsed = true; yTiledSurfaces = true; } } } void WorkSizeInfo::setMinWorkGroupSize() { minWorkGroupSize = 0; if (hasBarriers) { uint32_t maxBarriersPerHSlice = (coreFamily >= IGFX_GEN9_CORE) ? 32 : 16; minWorkGroupSize = numThreadsPerSubSlice * simdSize / maxBarriersPerHSlice; } if (slmTotalSize > 0) { minWorkGroupSize = std::max(maxWorkGroupSize / ((localMemSize / slmTotalSize)), minWorkGroupSize); } } void WorkSizeInfo::checkRatio(const size_t workItems[3]) { if (slmTotalSize > 0) { useRatio = true; targetRatio = log((float)workItems[0]) - log((float)workItems[1]); useStrictRatio = false; } else if (yTiledSurfaces == true) { useRatio = true; targetRatio = YTilingRatioValue; useStrictRatio = true; } } KernelInfo::~KernelInfo() { kernelArgInfo.clear(); patchInfo.stringDataMap.clear(); delete[] crossThreadData; } void KernelInfo::storePatchToken(const SPatchExecutionEnvironment *execEnv) { this->patchInfo.executionEnvironment = execEnv; if (execEnv->RequiredWorkGroupSizeX != 0) { this->reqdWorkGroupSize[0] = execEnv->RequiredWorkGroupSizeX; this->reqdWorkGroupSize[1] = execEnv->RequiredWorkGroupSizeY; this->reqdWorkGroupSize[2] = execEnv->RequiredWorkGroupSizeZ; DEBUG_BREAK_IF(!(execEnv->RequiredWorkGroupSizeY > 0)); DEBUG_BREAK_IF(!(execEnv->RequiredWorkGroupSizeZ > 0)); } this->workgroupWalkOrder[0] = 0; this->workgroupWalkOrder[1] = 1; this->workgroupWalkOrder[2] = 2; if (execEnv->WorkgroupWalkOrderDims) { constexpr auto dimensionMask = 0b11; constexpr auto dimensionSize = 2; this->workgroupWalkOrder[0] = execEnv->WorkgroupWalkOrderDims & dimensionMask; this->workgroupWalkOrder[1] = (execEnv->WorkgroupWalkOrderDims >> dimensionSize) & dimensionMask; this->workgroupWalkOrder[2] = (execEnv->WorkgroupWalkOrderDims >> dimensionSize * 2) & dimensionMask; this->requiresWorkGroupOrder = true; } for (uint32_t i = 0; i < 3; ++i) { // inverts the walk order mapping (from ORDER_ID->DIM_ID to DIM_ID->ORDER_ID) this->workgroupDimensionsOrder[this->workgroupWalkOrder[i]] = i; } if (execEnv->CompiledForGreaterThan4GBBuffers == false) { this->requiresSshForBuffers = true; } } void KernelInfo::storeArgInfo(uint32_t argNum, ArgTypeTraits metadata, std::unique_ptr metadataExtended) { resizeKernelArgInfoAndRegisterParameter(argNum); auto &argInfo = kernelArgInfo[argNum]; argInfo.metadata = metadata; argInfo.metadataExtended = std::move(metadataExtended); argInfo.isReadOnly |= argInfo.metadata.typeQualifiers.constQual; } void KernelInfo::storeKernelArgument( const SPatchDataParameterBuffer *pDataParameterKernelArg) { uint32_t argNum = pDataParameterKernelArg->ArgumentNumber; uint32_t dataSize = pDataParameterKernelArg->DataSize; uint32_t offset = pDataParameterKernelArg->Offset; uint32_t sourceOffset = pDataParameterKernelArg->SourceOffset; storeKernelArgPatchInfo(argNum, dataSize, offset, sourceOffset, 0); } void KernelInfo::storeKernelArgument( const SPatchStatelessGlobalMemoryObjectKernelArgument *pStatelessGlobalKernelArg) { uint32_t argNum = pStatelessGlobalKernelArg->ArgumentNumber; uint32_t offsetSSH = pStatelessGlobalKernelArg->SurfaceStateHeapOffset; usesSsh |= true; storeKernelArgPatchInfo(argNum, pStatelessGlobalKernelArg->DataParamSize, pStatelessGlobalKernelArg->DataParamOffset, 0, offsetSSH); kernelArgInfo[argNum].isBuffer = true; patchInfo.statelessGlobalMemObjKernelArgs.push_back(pStatelessGlobalKernelArg); } void KernelInfo::storeKernelArgument( const SPatchImageMemoryObjectKernelArgument *pImageMemObjKernelArg) { uint32_t argNum = pImageMemObjKernelArg->ArgumentNumber; uint32_t offsetSurfaceState = pImageMemObjKernelArg->Offset; usesSsh |= true; storeKernelArgPatchInfo(argNum, 0, 0, 0, offsetSurfaceState); kernelArgInfo[argNum].isImage = true; if (pImageMemObjKernelArg->Type == iOpenCL::IMAGE_MEMORY_OBJECT_2D_MEDIA) { kernelArgInfo[argNum].isMediaImage = true; } if (pImageMemObjKernelArg->Type == iOpenCL::IMAGE_MEMORY_OBJECT_2D_MEDIA_BLOCK) { kernelArgInfo[argNum].isMediaBlockImage = true; } kernelArgInfo[argNum].metadata.argByValSize = sizeof(cl_mem); kernelArgInfo[argNum].isTransformable = pImageMemObjKernelArg->Transformable != 0; patchInfo.imageMemObjKernelArgs.push_back(pImageMemObjKernelArg); if (NEO::KernelArgMetadata::AccessUnknown == kernelArgInfo[argNum].metadata.accessQualifier) { auto accessQual = pImageMemObjKernelArg->Writeable ? NEO::KernelArgMetadata::AccessReadWrite : NEO::KernelArgMetadata::AccessReadOnly; kernelArgInfo[argNum].metadata.accessQualifier = accessQual; } } void KernelInfo::storeKernelArgument( const SPatchGlobalMemoryObjectKernelArgument *pGlobalMemObjKernelArg) { uint32_t argNum = pGlobalMemObjKernelArg->ArgumentNumber; uint32_t offsetSurfaceState = pGlobalMemObjKernelArg->Offset; usesSsh |= true; storeKernelArgPatchInfo(argNum, 0, 0, 0, offsetSurfaceState); kernelArgInfo[argNum].isBuffer = true; } void KernelInfo::storeKernelArgument( const SPatchSamplerKernelArgument *pSamplerArgument) { uint32_t argNum = pSamplerArgument->ArgumentNumber; uint32_t offsetSurfaceState = pSamplerArgument->Offset; storeKernelArgPatchInfo(argNum, 0, 0, 0, offsetSurfaceState); kernelArgInfo[argNum].samplerArgumentType = pSamplerArgument->Type; if (pSamplerArgument->Type != iOpenCL::SAMPLER_OBJECT_TEXTURE) { DEBUG_BREAK_IF(pSamplerArgument->Type != iOpenCL::SAMPLER_OBJECT_VME && pSamplerArgument->Type != iOpenCL::SAMPLER_OBJECT_VE && pSamplerArgument->Type != iOpenCL::SAMPLER_OBJECT_VD); kernelArgInfo[argNum].isAccelerator = true; isVmeWorkload = true; } else { kernelArgInfo[argNum].isSampler = true; } } void KernelInfo::storeKernelArgument( const SPatchStatelessConstantMemoryObjectKernelArgument *pStatelessConstMemObjKernelArg) { uint32_t argNum = pStatelessConstMemObjKernelArg->ArgumentNumber; uint32_t offsetSSH = pStatelessConstMemObjKernelArg->SurfaceStateHeapOffset; usesSsh |= true; storeKernelArgPatchInfo(argNum, pStatelessConstMemObjKernelArg->DataParamSize, pStatelessConstMemObjKernelArg->DataParamOffset, 0, offsetSSH); kernelArgInfo[argNum].isBuffer = true; kernelArgInfo[argNum].isReadOnly = true; patchInfo.statelessGlobalMemObjKernelArgs.push_back(reinterpret_cast(pStatelessConstMemObjKernelArg)); } void KernelInfo::storeKernelArgument(const SPatchStatelessDeviceQueueKernelArgument *pStatelessDeviceQueueKernelArg) { uint32_t argNum = pStatelessDeviceQueueKernelArg->ArgumentNumber; resizeKernelArgInfoAndRegisterParameter(argNum); kernelArgInfo[argNum].isDeviceQueue = true; storeKernelArgPatchInfo(argNum, pStatelessDeviceQueueKernelArg->DataParamSize, pStatelessDeviceQueueKernelArg->DataParamOffset, 0, pStatelessDeviceQueueKernelArg->SurfaceStateHeapOffset); } void KernelInfo::storePatchToken( const SPatchAllocateStatelessPrivateSurface *pStatelessPrivateSurfaceArg) { usesSsh |= true; patchInfo.pAllocateStatelessPrivateSurface = pStatelessPrivateSurfaceArg; } void KernelInfo::storePatchToken(const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *pStatelessConstantMemorySurfaceWithInitializationArg) { usesSsh |= true; patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = pStatelessConstantMemorySurfaceWithInitializationArg; } void KernelInfo::storePatchToken(const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *pStatelessGlobalMemorySurfaceWithInitializationArg) { usesSsh |= true; patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = pStatelessGlobalMemorySurfaceWithInitializationArg; } void KernelInfo::storePatchToken(const SPatchAllocateStatelessPrintfSurface *pStatelessPrintfSurfaceArg) { usesSsh |= true; patchInfo.pAllocateStatelessPrintfSurface = pStatelessPrintfSurfaceArg; } void KernelInfo::storePatchToken(const SPatchAllocateStatelessEventPoolSurface *pStatelessEventPoolSurfaceArg) { usesSsh |= true; patchInfo.pAllocateStatelessEventPoolSurface = pStatelessEventPoolSurfaceArg; } void KernelInfo::storePatchToken(const SPatchAllocateStatelessDefaultDeviceQueueSurface *pStatelessDefaultDeviceQueueSurfaceArg) { usesSsh |= true; patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = pStatelessDefaultDeviceQueueSurfaceArg; } void KernelInfo::storePatchToken(const SPatchString *pStringArg) { uint32_t stringIndex = pStringArg->Index; if (pStringArg->StringSize > 0) { const char *stringData = reinterpret_cast(pStringArg + 1); patchInfo.stringDataMap.emplace(stringIndex, std::string(stringData, stringData + pStringArg->StringSize)); } } void KernelInfo::storePatchToken(const SPatchKernelAttributesInfo *pKernelAttributesInfo) { this->patchInfo.pKernelAttributesInfo = pKernelAttributesInfo; attributes = reinterpret_cast(pKernelAttributesInfo) + sizeof(SPatchKernelAttributesInfo); auto start = attributes.find("intel_reqd_sub_group_size("); if (start != std::string::npos) { start += strlen("intel_reqd_sub_group_size("); auto stop = attributes.find(")", start); std::stringstream requiredSubGroupSizeStr(attributes.substr(start, stop - start)); requiredSubGroupSizeStr >> requiredSubGroupSize; } } void KernelInfo::storePatchToken(const SPatchAllocateSystemThreadSurface *pSystemThreadSurface) { usesSsh |= true; patchInfo.pAllocateSystemThreadSurface = pSystemThreadSurface; } void KernelInfo::storePatchToken(const SPatchAllocateSyncBuffer *pAllocateSyncBuffer) { usesSsh |= true; patchInfo.pAllocateSyncBuffer = pAllocateSyncBuffer; } void KernelInfo::storeKernelArgPatchInfo(uint32_t argNum, uint32_t dataSize, uint32_t dataOffset, uint32_t sourceOffset, uint32_t offsetSSH) { resizeKernelArgInfoAndRegisterParameter(argNum); KernelArgPatchInfo kernelArgPatchInfo; kernelArgPatchInfo.crossthreadOffset = dataOffset; kernelArgPatchInfo.size = dataSize; kernelArgPatchInfo.sourceOffset = sourceOffset; kernelArgInfo[argNum].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); kernelArgInfo[argNum].offsetHeap = offsetSSH; } size_t KernelInfo::getSamplerStateArrayCount() const { size_t count = patchInfo.samplerStateArray ? (size_t)patchInfo.samplerStateArray->Count : 0; return count; } size_t KernelInfo::getSamplerStateArraySize(const HardwareInfo &hwInfo) const { size_t samplerStateArraySize = getSamplerStateArrayCount() * Sampler::getSamplerStateSize(hwInfo); return samplerStateArraySize; } size_t KernelInfo::getBorderColorStateSize() const { size_t borderColorSize = 0; if (patchInfo.samplerStateArray) { borderColorSize = patchInfo.samplerStateArray->Offset - patchInfo.samplerStateArray->BorderColorOffset; } return borderColorSize; } size_t KernelInfo::getBorderColorOffset() const { size_t borderColorOffset = 0; if (patchInfo.samplerStateArray) { borderColorOffset = patchInfo.samplerStateArray->BorderColorOffset; } return borderColorOffset; } uint32_t KernelInfo::getConstantBufferSize() const { return patchInfo.dataParameterStream ? patchInfo.dataParameterStream->DataParameterStreamSize : 0; } bool KernelInfo::createKernelAllocation(uint32_t rootDeviceIndex, MemoryManager *memoryManager) { UNRECOVERABLE_IF(kernelAllocation); auto kernelIsaSize = heapInfo.pKernelHeader->KernelHeapSize; kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties({rootDeviceIndex, kernelIsaSize, GraphicsAllocation::AllocationType::KERNEL_ISA}); if (!kernelAllocation) { return false; } return memoryManager->copyMemoryToAllocation(kernelAllocation, heapInfo.pKernelHeap, kernelIsaSize); } void KernelInfo::apply(const DeviceInfoKernelPayloadConstants &constants) { if (nullptr == this->crossThreadData) { return; } uint32_t privateMemoryStatelessSizeOffset = this->workloadInfo.privateMemoryStatelessSizeOffset; uint32_t localMemoryStatelessWindowSizeOffset = this->workloadInfo.localMemoryStatelessWindowSizeOffset; uint32_t localMemoryStatelessWindowStartAddressOffset = this->workloadInfo.localMemoryStatelessWindowStartAddressOffset; if (localMemoryStatelessWindowStartAddressOffset != WorkloadInfo::undefinedOffset) { *(uintptr_t *)&(this->crossThreadData[localMemoryStatelessWindowStartAddressOffset]) = reinterpret_cast(constants.slmWindow); } if (localMemoryStatelessWindowSizeOffset != WorkloadInfo::undefinedOffset) { *(uint32_t *)&(this->crossThreadData[localMemoryStatelessWindowSizeOffset]) = constants.slmWindowSize; } uint32_t privateMemorySize = 0U; if (this->patchInfo.pAllocateStatelessPrivateSurface) { privateMemorySize = this->patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize * constants.computeUnitsUsedForScratch * this->getMaxSimdSize(); } if (privateMemoryStatelessSizeOffset != WorkloadInfo::undefinedOffset) { *(uint32_t *)&(this->crossThreadData[privateMemoryStatelessSizeOffset]) = privateMemorySize; } if (this->workloadInfo.maxWorkGroupSizeOffset != WorkloadInfo::undefinedOffset) { *(uint32_t *)&(this->crossThreadData[this->workloadInfo.maxWorkGroupSizeOffset]) = constants.maxWorkGroupSize; } } std::string concatenateKernelNames(ArrayRef kernelInfos) { std::string semiColonDelimitedKernelNameStr; for (const auto &kernelInfo : kernelInfos) { if (!semiColonDelimitedKernelNameStr.empty()) { semiColonDelimitedKernelNameStr += ';'; } semiColonDelimitedKernelNameStr += kernelInfo->name; } return semiColonDelimitedKernelNameStr; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/program/kernel_info.h000066400000000000000000000223131363734646600242510ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/hw_info.h" #include "shared/source/kernel/kernel_descriptor.h" #include "shared/source/utilities/arrayref.h" #include "shared/source/utilities/const_stringref.h" #include "opencl/source/program/heap_info.h" #include "opencl/source/program/kernel_arg_info.h" #include "patch_info.h" #include #include #include #include #include #include #include #include namespace gtpin { typedef struct igc_info_s igc_info_t; } namespace NEO { class BuiltinDispatchInfoBuilder; class Device; class Kernel; struct KernelInfo; class DispatchInfo; struct KernelArgumentType; class GraphicsAllocation; class MemoryManager; extern bool useKernelDescriptor; extern std::map typeSizeMap; struct WorkloadInfo { enum : uint32_t { undefinedOffset = std::numeric_limits::max() }; enum : uint32_t { invalidParentEvent = std::numeric_limits::max() }; uint32_t globalWorkOffsetOffsets[3] = {undefinedOffset, undefinedOffset, undefinedOffset}; uint32_t globalWorkSizeOffsets[3] = {undefinedOffset, undefinedOffset, undefinedOffset}; uint32_t localWorkSizeOffsets[3] = {undefinedOffset, undefinedOffset, undefinedOffset}; uint32_t localWorkSizeOffsets2[3] = {undefinedOffset, undefinedOffset, undefinedOffset}; uint32_t enqueuedLocalWorkSizeOffsets[3] = {undefinedOffset, undefinedOffset, undefinedOffset}; uint32_t numWorkGroupsOffset[3] = {undefinedOffset, undefinedOffset, undefinedOffset}; uint32_t maxWorkGroupSizeOffset = undefinedOffset; uint32_t workDimOffset = undefinedOffset; uint32_t slmStaticSize = 0; uint32_t simdSizeOffset = undefinedOffset; uint32_t parentEventOffset = undefinedOffset; uint32_t preferredWkgMultipleOffset = undefinedOffset; uint32_t privateMemoryStatelessSizeOffset = undefinedOffset; uint32_t localMemoryStatelessWindowSizeOffset = undefinedOffset; uint32_t localMemoryStatelessWindowStartAddressOffset = undefinedOffset; }; static const float YTilingRatioValue = 1.3862943611198906188344642429164f; struct WorkSizeInfo { uint32_t maxWorkGroupSize; uint32_t minWorkGroupSize; bool hasBarriers; uint32_t simdSize; uint32_t slmTotalSize; GFXCORE_FAMILY coreFamily; uint32_t numThreadsPerSubSlice; uint32_t localMemSize; bool imgUsed = false; bool yTiledSurfaces = false; bool useRatio = false; bool useStrictRatio = false; float targetRatio = 0; WorkSizeInfo(uint32_t maxWorkGroupSize, bool hasBarriers, uint32_t simdSize, uint32_t slmTotalSize, GFXCORE_FAMILY coreFamily, uint32_t numThreadsPerSubSlice, uint32_t localMemSize, bool imgUsed, bool yTiledSurface); WorkSizeInfo(const DispatchInfo &dispatchInfo); void setIfUseImg(Kernel *pKernel); void setMinWorkGroupSize(); void checkRatio(const size_t workItems[3]); }; struct DeviceInfoKernelPayloadConstants { void *slmWindow = nullptr; uint32_t slmWindowSize = 0U; uint32_t computeUnitsUsedForScratch = 0U; uint32_t maxWorkGroupSize = 0U; }; struct KernelInfo { public: KernelInfo() = default; KernelInfo(const KernelInfo &) = delete; KernelInfo &operator=(const KernelInfo &) = delete; ~KernelInfo(); void storeArgInfo(uint32_t argNum, ArgTypeTraits metadata, std::unique_ptr metadataExtended); void storeKernelArgument(const SPatchDataParameterBuffer *pDataParameterKernelArg); void storeKernelArgument(const SPatchStatelessGlobalMemoryObjectKernelArgument *pStatelessGlobalKernelArg); void storeKernelArgument(const SPatchImageMemoryObjectKernelArgument *pImageMemObjKernelArg); void storeKernelArgument(const SPatchGlobalMemoryObjectKernelArgument *pGlobalMemObjKernelArg); void storeKernelArgument(const SPatchStatelessConstantMemoryObjectKernelArgument *pStatelessConstMemObjKernelArg); void storeKernelArgument(const SPatchStatelessDeviceQueueKernelArgument *pStatelessDeviceQueueKernelArg); void storeKernelArgument(const SPatchSamplerKernelArgument *pSamplerKernelArg); void storePatchToken(const SPatchExecutionEnvironment *execEnv); void storePatchToken(const SPatchAllocateStatelessPrivateSurface *pStatelessPrivateSurfaceArg); void storePatchToken(const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *pStatelessConstantMemorySurfaceWithInitializationArg); void storePatchToken(const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *pStatelessGlobalMemorySurfaceWithInitializationArg); void storePatchToken(const SPatchAllocateStatelessPrintfSurface *pStatelessPrintfSurfaceArg); void storePatchToken(const SPatchAllocateStatelessEventPoolSurface *pStatelessEventPoolSurfaceArg); void storePatchToken(const SPatchAllocateStatelessDefaultDeviceQueueSurface *pStatelessDefaultDeviceQueueSurfaceArg); void storePatchToken(const SPatchString *pStringArg); void storePatchToken(const SPatchKernelAttributesInfo *pKernelAttributesInfo); void storePatchToken(const SPatchAllocateSystemThreadSurface *pSystemThreadSurface); void storePatchToken(const SPatchAllocateSyncBuffer *pAllocateSyncBuffer); GraphicsAllocation *getGraphicsAllocation() const { return this->kernelAllocation; } void resizeKernelArgInfoAndRegisterParameter(uint32_t argCount) { if (kernelArgInfo.size() <= argCount) { kernelArgInfo.resize(argCount + 1); } if (!kernelArgInfo[argCount].needPatch) { kernelArgInfo[argCount].needPatch = true; argumentsToPatchNum++; } } void storeKernelArgPatchInfo(uint32_t argNum, uint32_t dataSize, uint32_t crossthreadOffset, uint32_t sourceOffset, uint32_t offsetSSH); size_t getSamplerStateArrayCount() const; size_t getSamplerStateArraySize(const HardwareInfo &hwInfo) const; size_t getBorderColorStateSize() const; size_t getBorderColorOffset() const; unsigned int getMaxSimdSize() const { const auto executionEnvironment = patchInfo.executionEnvironment; if (executionEnvironment == nullptr || executionEnvironment->LargestCompiledSIMDSize == 1) { return 1; } if (executionEnvironment->CompiledSIMD32) { return 32; } if (executionEnvironment->CompiledSIMD16) { return 16; } return 8; } bool hasDeviceEnqueue() const { return patchInfo.executionEnvironment ? !!patchInfo.executionEnvironment->HasDeviceEnqueue : false; } bool requiresSubgroupIndependentForwardProgress() const { return patchInfo.executionEnvironment ? !!patchInfo.executionEnvironment->SubgroupIndependentForwardProgressRequired : false; } size_t getMaxRequiredWorkGroupSize(size_t maxWorkGroupSize) const { auto requiredWorkGroupSizeX = patchInfo.executionEnvironment->RequiredWorkGroupSizeX; auto requiredWorkGroupSizeY = patchInfo.executionEnvironment->RequiredWorkGroupSizeY; auto requiredWorkGroupSizeZ = patchInfo.executionEnvironment->RequiredWorkGroupSizeZ; size_t maxRequiredWorkGroupSize = requiredWorkGroupSizeX * requiredWorkGroupSizeY * requiredWorkGroupSizeZ; if ((maxRequiredWorkGroupSize == 0) || (maxRequiredWorkGroupSize > maxWorkGroupSize)) { maxRequiredWorkGroupSize = maxWorkGroupSize; } return maxRequiredWorkGroupSize; } uint32_t getConstantBufferSize() const; int32_t getArgNumByName(const char *name) const { int32_t argNum = 0; for (auto &arg : kernelArgInfo) { if (arg.metadataExtended && (arg.metadataExtended->argName == name)) { return argNum; } ++argNum; } return -1; } bool createKernelAllocation(uint32_t rootDeviceIndex, MemoryManager *memoryManager); void apply(const DeviceInfoKernelPayloadConstants &constants); std::string name; std::string attributes; HeapInfo heapInfo = {}; PatchInfo patchInfo = {}; std::vector kernelArgInfo; std::vector kernelNonArgInfo; WorkloadInfo workloadInfo = {}; std::vector> childrenKernelsIdOffset; bool usesSsh = false; bool requiresSshForBuffers = false; bool isVmeWorkload = false; char *crossThreadData = nullptr; size_t reqdWorkGroupSize[3] = {WorkloadInfo::undefinedOffset, WorkloadInfo::undefinedOffset, WorkloadInfo::undefinedOffset}; size_t requiredSubGroupSize = 0; std::array workgroupWalkOrder = {{0, 1, 2}}; std::array workgroupDimensionsOrder = {{0, 1, 2}}; bool requiresWorkGroupOrder = false; uint32_t gpuPointerSize = 0; const BuiltinDispatchInfoBuilder *builtinDispatchBuilder = nullptr; uint32_t argumentsToPatchNum = 0; uint32_t systemKernelOffset = 0; uint64_t kernelId = 0; bool isKernelHeapSubstituted = false; GraphicsAllocation *kernelAllocation = nullptr; DebugData debugData; bool computeMode = false; const gtpin::igc_info_t *igcInfoForGtpin = nullptr; KernelDescriptor kernelDescriptor; }; std::string concatenateKernelNames(ArrayRef kernelInfos); } // namespace NEO compute-runtime-20.13.16352/opencl/source/program/kernel_info_from_patchtokens.cpp000066400000000000000000000300131363734646600302260ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/program/kernel_info_from_patchtokens.h" #include "shared/source/device_binary_format/patchtokens_decoder.h" #include "shared/source/kernel/kernel_descriptor_from_patchtokens.h" #include "opencl/source/program/kernel_info.h" #include namespace NEO { using namespace iOpenCL; template inline void storeTokenIfNotNull(KernelInfo &kernelInfo, T *token) { if (token != nullptr) { kernelInfo.storePatchToken(token); } } template inline uint32_t getOffset(T *token) { if (token != nullptr) { return token->Offset; } return WorkloadInfo::undefinedOffset; } void populateKernelInfoArgMetadata(KernelInfo &dstKernelInfoArg, const SPatchKernelArgumentInfo *src) { if (nullptr == src) { return; } uint32_t argNum = src->ArgumentNumber; auto inlineData = PatchTokenBinary::getInlineData(src); auto metadataExtended = std::make_unique(); metadataExtended->addressQualifier = parseLimitedString(inlineData.addressQualifier.begin(), inlineData.addressQualifier.size()); metadataExtended->accessQualifier = parseLimitedString(inlineData.accessQualifier.begin(), inlineData.accessQualifier.size()); metadataExtended->argName = parseLimitedString(inlineData.argName.begin(), inlineData.argName.size()); auto argTypeFull = parseLimitedString(inlineData.typeName.begin(), inlineData.typeName.size()); const char *argTypeDelim = strchr(argTypeFull.data(), ';'); if (nullptr == argTypeDelim) { argTypeDelim = argTypeFull.data() + argTypeFull.size(); } metadataExtended->type = std::string(argTypeFull.data(), argTypeDelim).c_str(); metadataExtended->typeQualifiers = parseLimitedString(inlineData.typeQualifiers.begin(), inlineData.typeQualifiers.size()); ArgTypeTraits metadata = {}; metadata.accessQualifier = KernelArgMetadata::parseAccessQualifier(metadataExtended->accessQualifier); metadata.addressQualifier = KernelArgMetadata::parseAddressSpace(metadataExtended->addressQualifier); metadata.typeQualifiers = KernelArgMetadata::parseTypeQualifiers(metadataExtended->typeQualifiers); dstKernelInfoArg.storeArgInfo(argNum, metadata, std::move(metadataExtended)); } void populateKernelInfoArg(KernelInfo &dstKernelInfo, KernelArgInfo &dstKernelInfoArg, const PatchTokenBinary::KernelArgFromPatchtokens &src) { populateKernelInfoArgMetadata(dstKernelInfo, src.argInfo); if (src.objectArg != nullptr) { switch (src.objectArg->Token) { default: UNRECOVERABLE_IF(PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT != src.objectArg->Token); dstKernelInfo.storeKernelArgument(reinterpret_cast(src.objectArg)); break; case PATCH_TOKEN_SAMPLER_KERNEL_ARGUMENT: dstKernelInfo.storeKernelArgument(reinterpret_cast(src.objectArg)); break; case PATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT: dstKernelInfo.storeKernelArgument(reinterpret_cast(src.objectArg)); break; case PATCH_TOKEN_STATELESS_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT: dstKernelInfo.storeKernelArgument(reinterpret_cast(src.objectArg)); break; case PATCH_TOKEN_STATELESS_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT: dstKernelInfo.storeKernelArgument(reinterpret_cast(src.objectArg)); break; case PATCH_TOKEN_STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT: dstKernelInfo.storeKernelArgument(reinterpret_cast(src.objectArg)); break; } } switch (src.objectType) { default: UNRECOVERABLE_IF(PatchTokenBinary::ArgObjectType::None != src.objectType); break; case PatchTokenBinary::ArgObjectType::Buffer: dstKernelInfoArg.offsetBufferOffset = getOffset(src.metadata.buffer.bufferOffset); dstKernelInfoArg.pureStatefulBufferAccess = (src.metadata.buffer.pureStateful != nullptr); break; case PatchTokenBinary::ArgObjectType::Image: dstKernelInfoArg.offsetImgWidth = getOffset(src.metadata.image.width); dstKernelInfoArg.offsetImgHeight = getOffset(src.metadata.image.height); dstKernelInfoArg.offsetImgDepth = getOffset(src.metadata.image.depth); dstKernelInfoArg.offsetChannelDataType = getOffset(src.metadata.image.channelDataType); dstKernelInfoArg.offsetChannelOrder = getOffset(src.metadata.image.channelOrder); dstKernelInfoArg.offsetArraySize = getOffset(src.metadata.image.arraySize); dstKernelInfoArg.offsetNumSamples = getOffset(src.metadata.image.numSamples); dstKernelInfoArg.offsetNumMipLevels = getOffset(src.metadata.image.numMipLevels); dstKernelInfoArg.offsetFlatBaseOffset = getOffset(src.metadata.image.flatBaseOffset); dstKernelInfoArg.offsetFlatWidth = getOffset(src.metadata.image.flatWidth); dstKernelInfoArg.offsetFlatHeight = getOffset(src.metadata.image.flatHeight); dstKernelInfoArg.offsetFlatPitch = getOffset(src.metadata.image.flatPitch); break; case PatchTokenBinary::ArgObjectType::Sampler: dstKernelInfoArg.offsetSamplerSnapWa = getOffset(src.metadata.sampler.coordinateSnapWaRequired); dstKernelInfoArg.offsetSamplerAddressingMode = getOffset(src.metadata.sampler.addressMode); dstKernelInfoArg.offsetSamplerNormalizedCoords = getOffset(src.metadata.sampler.normalizedCoords); break; case PatchTokenBinary::ArgObjectType::Slm: dstKernelInfoArg.slmAlignment = src.metadata.slm.token->SourceOffset; break; } switch (src.objectTypeSpecialized) { default: UNRECOVERABLE_IF(PatchTokenBinary::ArgObjectTypeSpecialized::None != src.objectTypeSpecialized); break; case PatchTokenBinary::ArgObjectTypeSpecialized::Vme: dstKernelInfoArg.offsetVmeMbBlockType = getOffset(src.metadataSpecialized.vme.mbBlockType); dstKernelInfoArg.offsetVmeSubpixelMode = getOffset(src.metadataSpecialized.vme.subpixelMode); dstKernelInfoArg.offsetVmeSadAdjustMode = getOffset(src.metadataSpecialized.vme.sadAdjustMode); dstKernelInfoArg.offsetVmeSearchPathType = getOffset(src.metadataSpecialized.vme.searchPathType); break; } for (auto &byValArg : src.byValMap) { dstKernelInfo.storeKernelArgument(byValArg); if (byValArg->Type == DATA_PARAMETER_KERNEL_ARGUMENT) { dstKernelInfo.patchInfo.dataParameterBuffersKernelArgs.push_back(byValArg); } } dstKernelInfoArg.offsetObjectId = getOffset(src.objectId); } void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatchtokens &src, uint32_t gpuPointerSizeInBytes) { UNRECOVERABLE_IF(nullptr == src.header); dst.heapInfo.pKernelHeader = src.header; dst.name = std::string(src.name.begin(), src.name.end()).c_str(); dst.heapInfo.pKernelHeap = src.isa.begin(); dst.heapInfo.pGsh = src.heaps.generalState.begin(); dst.heapInfo.pDsh = src.heaps.dynamicState.begin(); dst.heapInfo.pSsh = src.heaps.surfaceState.begin(); storeTokenIfNotNull(dst, src.tokens.executionEnvironment); dst.patchInfo.samplerStateArray = src.tokens.samplerStateArray; dst.patchInfo.bindingTableState = src.tokens.bindingTableState; dst.usesSsh = src.tokens.bindingTableState && (src.tokens.bindingTableState->Count > 0); dst.patchInfo.localsurface = src.tokens.allocateLocalSurface; dst.workloadInfo.slmStaticSize = src.tokens.allocateLocalSurface ? src.tokens.allocateLocalSurface->TotalInlineLocalMemorySize : 0U; dst.patchInfo.mediavfestate = src.tokens.mediaVfeState[0]; dst.patchInfo.mediaVfeStateSlot1 = src.tokens.mediaVfeState[1]; dst.patchInfo.interfaceDescriptorDataLoad = src.tokens.mediaInterfaceDescriptorLoad; dst.patchInfo.interfaceDescriptorData = src.tokens.interfaceDescriptorData; dst.patchInfo.threadPayload = src.tokens.threadPayload; dst.patchInfo.dataParameterStream = src.tokens.dataParameterStream; dst.kernelArgInfo.resize(src.tokens.kernelArgs.size()); for (size_t i = 0U; i < src.tokens.kernelArgs.size(); ++i) { auto &decodedKernelArg = src.tokens.kernelArgs[i]; auto &kernelInfoArg = dst.kernelArgInfo[i]; populateKernelInfoArg(dst, kernelInfoArg, decodedKernelArg); } storeTokenIfNotNull(dst, src.tokens.kernelAttributesInfo); storeTokenIfNotNull(dst, src.tokens.allocateStatelessPrivateSurface); storeTokenIfNotNull(dst, src.tokens.allocateStatelessConstantMemorySurfaceWithInitialization); storeTokenIfNotNull(dst, src.tokens.allocateStatelessGlobalMemorySurfaceWithInitialization); storeTokenIfNotNull(dst, src.tokens.allocateStatelessPrintfSurface); storeTokenIfNotNull(dst, src.tokens.allocateStatelessEventPoolSurface); storeTokenIfNotNull(dst, src.tokens.allocateStatelessDefaultDeviceQueueSurface); storeTokenIfNotNull(dst, src.tokens.allocateSyncBuffer); for (auto &str : src.tokens.strings) { dst.storePatchToken(str); } dst.isVmeWorkload = dst.isVmeWorkload || (src.tokens.inlineVmeSamplerInfo != nullptr); dst.systemKernelOffset = src.tokens.stateSip ? src.tokens.stateSip->SystemKernelOffset : 0U; storeTokenIfNotNull(dst, src.tokens.allocateSystemThreadSurface); for (uint32_t i = 0; i < 3U; ++i) { dst.workloadInfo.localWorkSizeOffsets[i] = getOffset(src.tokens.crossThreadPayloadArgs.localWorkSize[i]); dst.workloadInfo.localWorkSizeOffsets2[i] = getOffset(src.tokens.crossThreadPayloadArgs.localWorkSize2[i]); dst.workloadInfo.globalWorkOffsetOffsets[i] = getOffset(src.tokens.crossThreadPayloadArgs.globalWorkOffset[i]); dst.workloadInfo.enqueuedLocalWorkSizeOffsets[i] = getOffset(src.tokens.crossThreadPayloadArgs.enqueuedLocalWorkSize[i]); dst.workloadInfo.globalWorkSizeOffsets[i] = getOffset(src.tokens.crossThreadPayloadArgs.globalWorkSize[i]); dst.workloadInfo.numWorkGroupsOffset[i] = getOffset(src.tokens.crossThreadPayloadArgs.numWorkGroups[i]); } dst.workloadInfo.maxWorkGroupSizeOffset = getOffset(src.tokens.crossThreadPayloadArgs.maxWorkGroupSize); dst.workloadInfo.workDimOffset = getOffset(src.tokens.crossThreadPayloadArgs.workDimensions); dst.workloadInfo.simdSizeOffset = getOffset(src.tokens.crossThreadPayloadArgs.simdSize); dst.workloadInfo.parentEventOffset = getOffset(src.tokens.crossThreadPayloadArgs.parentEvent); dst.workloadInfo.preferredWkgMultipleOffset = getOffset(src.tokens.crossThreadPayloadArgs.preferredWorkgroupMultiple); dst.workloadInfo.privateMemoryStatelessSizeOffset = getOffset(src.tokens.crossThreadPayloadArgs.privateMemoryStatelessSize); dst.workloadInfo.localMemoryStatelessWindowSizeOffset = getOffset(src.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowSize); dst.workloadInfo.localMemoryStatelessWindowStartAddressOffset = getOffset(src.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowStartAddress); for (auto &childSimdSize : src.tokens.crossThreadPayloadArgs.childBlockSimdSize) { dst.childrenKernelsIdOffset.push_back({childSimdSize->ArgumentNumber, childSimdSize->Offset}); } if (src.tokens.gtpinInfo) { dst.igcInfoForGtpin = reinterpret_cast(src.tokens.gtpinInfo + 1); } dst.gpuPointerSize = gpuPointerSizeInBytes; if (dst.patchInfo.dataParameterStream && dst.patchInfo.dataParameterStream->DataParameterStreamSize) { uint32_t crossThreadDataSize = dst.patchInfo.dataParameterStream->DataParameterStreamSize; dst.crossThreadData = new char[crossThreadDataSize]; memset(dst.crossThreadData, 0x00, crossThreadDataSize); } if (useKernelDescriptor) { populateKernelDescriptor(dst.kernelDescriptor, src, gpuPointerSizeInBytes); } } } // namespace NEO compute-runtime-20.13.16352/opencl/source/program/kernel_info_from_patchtokens.h000066400000000000000000000005621363734646600277010ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { struct KernelInfo; namespace PatchTokenBinary { struct KernelFromPatchtokens; } void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatchtokens &src, uint32_t gpuPointerSizeInBytes); } // namespace NEO compute-runtime-20.13.16352/opencl/source/program/link.cpp000066400000000000000000000171261363734646600232540ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/device/device.h" #include "shared/source/device_binary_format/elf/elf.h" #include "shared/source/device_binary_format/elf/elf_encoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/source_level_debugger/source_level_debugger.h" #include "shared/source/utilities/stackvec.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/helpers/validators.h" #include "opencl/source/platform/platform.h" #include "opencl/source/program/kernel_info.h" #include "opencl/source/program/program.h" #include "compiler_options.h" #include namespace NEO { cl_int Program::link( cl_uint numDevices, const cl_device_id *deviceList, const char *buildOptions, cl_uint numInputPrograms, const cl_program *inputPrograms, void(CL_CALLBACK *funcNotify)(cl_program program, void *userData), void *userData) { cl_int retVal = CL_SUCCESS; bool isCreateLibrary; do { if (((deviceList == nullptr) && (numDevices != 0)) || ((deviceList != nullptr) && (numDevices == 0))) { retVal = CL_INVALID_VALUE; break; } if ((numInputPrograms == 0) || (inputPrograms == nullptr)) { retVal = CL_INVALID_VALUE; break; } if ((funcNotify == nullptr) && (userData != nullptr)) { retVal = CL_INVALID_VALUE; break; } if ((deviceList != nullptr) && validateObject(*deviceList) != CL_SUCCESS) { retVal = CL_INVALID_DEVICE; break; } if (buildStatus == CL_BUILD_IN_PROGRESS) { retVal = CL_INVALID_OPERATION; break; } options = (buildOptions != nullptr) ? buildOptions : ""; if (isKernelDebugEnabled()) { appendKernelDebugOptions(); } isCreateLibrary = CompilerOptions::contains(options, CompilerOptions::createLibrary); buildStatus = CL_BUILD_IN_PROGRESS; NEO::Elf::ElfEncoder<> elfEncoder(true, false, 1U); elfEncoder.getElfFileHeader().type = NEO::Elf::ET_OPENCL_OBJECTS; StackVec inputProgramsInternal; for (cl_uint i = 0; i < numInputPrograms; i++) { auto program = inputPrograms[i]; if (program == nullptr) { retVal = CL_INVALID_PROGRAM; break; } auto pInputProgObj = castToObject(program); if (pInputProgObj == nullptr) { retVal = CL_INVALID_PROGRAM; break; } inputProgramsInternal.push_back(pInputProgObj); if ((pInputProgObj->irBinary == nullptr) || (pInputProgObj->irBinarySize == 0)) { retVal = CL_INVALID_PROGRAM; break; } auto sectionType = pInputProgObj->getIsSpirV() ? NEO::Elf::SHT_OPENCL_SPIRV : NEO::Elf::SHT_OPENCL_LLVM_BINARY; ConstStringRef sectionName = pInputProgObj->getIsSpirV() ? NEO::Elf::SectionNamesOpenCl::spirvObject : NEO::Elf::SectionNamesOpenCl::llvmObject; elfEncoder.appendSection(sectionType, sectionName, ArrayRef(reinterpret_cast(pInputProgObj->irBinary.get()), pInputProgObj->irBinarySize)); } if (retVal != CL_SUCCESS) { break; } auto clLinkInput = elfEncoder.encode(); CompilerInterface *pCompilerInterface = pDevice->getCompilerInterface(); if (!pCompilerInterface) { retVal = CL_OUT_OF_HOST_MEMORY; break; } TranslationInput inputArgs = {IGC::CodeType::elf, IGC::CodeType::undefined}; inputArgs.src = ArrayRef(reinterpret_cast(clLinkInput.data()), clLinkInput.size()); inputArgs.apiOptions = ArrayRef(options.c_str(), options.length()); inputArgs.internalOptions = ArrayRef(internalOptions.c_str(), internalOptions.length()); inputArgs.GTPinInput = gtpinGetIgcInit(); if (!isCreateLibrary) { inputArgs.outType = IGC::CodeType::oclGenBin; NEO::TranslationOutput compilerOuput = {}; auto compilerErr = pCompilerInterface->link(this->getDevice(), inputArgs, compilerOuput); this->updateBuildLog(this->pDevice, compilerOuput.frontendCompilerLog.c_str(), compilerOuput.frontendCompilerLog.size()); this->updateBuildLog(this->pDevice, compilerOuput.backendCompilerLog.c_str(), compilerOuput.backendCompilerLog.size()); retVal = asClError(compilerErr); if (retVal != CL_SUCCESS) { break; } this->replaceDeviceBinary(std::move(compilerOuput.deviceBinary.mem), compilerOuput.deviceBinary.size); this->debugData = std::move(compilerOuput.debugData.mem); this->debugDataSize = compilerOuput.debugData.size; retVal = processGenBinary(); if (retVal != CL_SUCCESS) { break; } programBinaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE; if (isKernelDebugEnabled()) { processDebugData(); auto clDevice = this->getDevice().getSpecializedDevice(); UNRECOVERABLE_IF(clDevice == nullptr); for (auto kernelInfo : kernelInfoArray) { clDevice->getSourceLevelDebugger()->notifyKernelDebugData(&kernelInfo->debugData, kernelInfo->name, kernelInfo->heapInfo.pKernelHeap, kernelInfo->heapInfo.pKernelHeader->KernelHeapSize); } } } else { inputArgs.outType = IGC::CodeType::llvmBc; NEO::TranslationOutput compilerOuput = {}; auto compilerErr = pCompilerInterface->createLibrary(*this->pDevice, inputArgs, compilerOuput); this->updateBuildLog(this->pDevice, compilerOuput.frontendCompilerLog.c_str(), compilerOuput.frontendCompilerLog.size()); this->updateBuildLog(this->pDevice, compilerOuput.backendCompilerLog.c_str(), compilerOuput.backendCompilerLog.size()); retVal = asClError(compilerErr); if (retVal != CL_SUCCESS) { break; } this->irBinary = std::move(compilerOuput.intermediateRepresentation.mem); this->irBinarySize = compilerOuput.intermediateRepresentation.size; this->isSpirV = (compilerOuput.intermediateCodeType == IGC::CodeType::spirV); this->debugData = std::move(compilerOuput.debugData.mem); this->debugDataSize = compilerOuput.debugData.size; programBinaryType = CL_PROGRAM_BINARY_TYPE_LIBRARY; } updateNonUniformFlag(&*inputProgramsInternal.begin(), inputProgramsInternal.size()); separateBlockKernels(); } while (false); if (retVal != CL_SUCCESS) { buildStatus = CL_BUILD_ERROR; programBinaryType = CL_PROGRAM_BINARY_TYPE_NONE; } else { buildStatus = CL_BUILD_SUCCESS; } internalOptions.clear(); if (funcNotify != nullptr) { (*funcNotify)(this, userData); } return retVal; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/program/patch_info.h000066400000000000000000000071721363734646600240760ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "patch_g7.h" #include "patch_list.h" #include #include #include #include namespace NEO { using iOpenCL::SKernelBinaryHeaderCommon; using iOpenCL::SPatchAllocateLocalSurface; using iOpenCL::SPatchAllocateStatelessConstantMemorySurfaceWithInitialization; using iOpenCL::SPatchAllocateStatelessDefaultDeviceQueueSurface; using iOpenCL::SPatchAllocateStatelessEventPoolSurface; using iOpenCL::SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization; using iOpenCL::SPatchAllocateStatelessPrintfSurface; using iOpenCL::SPatchAllocateStatelessPrivateSurface; using iOpenCL::SPatchAllocateSyncBuffer; using iOpenCL::SPatchAllocateSystemThreadSurface; using iOpenCL::SPatchBindingTableState; using iOpenCL::SPatchDataParameterBuffer; using iOpenCL::SPatchDataParameterStream; using iOpenCL::SPatchExecutionEnvironment; using iOpenCL::SPatchGlobalMemoryObjectKernelArgument; using iOpenCL::SPatchGtpinFreeGRFInfo; using iOpenCL::SPatchImageMemoryObjectKernelArgument; using iOpenCL::SPatchInterfaceDescriptorData; using iOpenCL::SPatchKernelArgumentInfo; using iOpenCL::SPatchKernelAttributesInfo; using iOpenCL::SPatchMediaInterfaceDescriptorLoad; using iOpenCL::SPatchMediaVFEState; using iOpenCL::SPatchSamplerKernelArgument; using iOpenCL::SPatchSamplerStateArray; using iOpenCL::SPatchStatelessConstantMemoryObjectKernelArgument; using iOpenCL::SPatchStatelessDeviceQueueKernelArgument; using iOpenCL::SPatchStatelessGlobalMemoryObjectKernelArgument; using iOpenCL::SPatchStateSIP; using iOpenCL::SPatchString; using iOpenCL::SPatchThreadPayload; using iOpenCL::SProgramBinaryHeader; struct PatchInfo { const SPatchMediaInterfaceDescriptorLoad *interfaceDescriptorDataLoad = nullptr; const SPatchAllocateLocalSurface *localsurface = nullptr; const SPatchMediaVFEState *mediavfestate = nullptr; const SPatchMediaVFEState *mediaVfeStateSlot1 = nullptr; const SPatchInterfaceDescriptorData *interfaceDescriptorData = nullptr; const SPatchSamplerStateArray *samplerStateArray = nullptr; const SPatchBindingTableState *bindingTableState = nullptr; ::std::vector dataParameterBuffersKernelArgs; ::std::vector statelessGlobalMemObjKernelArgs; ::std::vector imageMemObjKernelArgs; const SPatchDataParameterStream *dataParameterStream = nullptr; const SPatchThreadPayload *threadPayload = nullptr; const SPatchExecutionEnvironment *executionEnvironment = nullptr; const SPatchKernelAttributesInfo *pKernelAttributesInfo = nullptr; const SPatchAllocateStatelessPrivateSurface *pAllocateStatelessPrivateSurface = nullptr; const SPatchAllocateSyncBuffer *pAllocateSyncBuffer = nullptr; const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *pAllocateStatelessConstantMemorySurfaceWithInitialization = nullptr; const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *pAllocateStatelessGlobalMemorySurfaceWithInitialization = nullptr; const SPatchAllocateStatelessPrintfSurface *pAllocateStatelessPrintfSurface = nullptr; const SPatchAllocateStatelessEventPoolSurface *pAllocateStatelessEventPoolSurface = nullptr; const SPatchAllocateStatelessDefaultDeviceQueueSurface *pAllocateStatelessDefaultDeviceQueueSurface = nullptr; const SPatchAllocateSystemThreadSurface *pAllocateSystemThreadSurface = nullptr; ::std::unordered_map stringDataMap; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/program/printf_handler.cpp000066400000000000000000000061251363734646600253130ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "printf_handler.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/program/print_formatter.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h" namespace NEO { PrintfHandler::PrintfHandler(ClDevice &deviceArg) : device(deviceArg) {} PrintfHandler::~PrintfHandler() { device.getMemoryManager()->freeGraphicsMemory(printfSurface); } PrintfHandler *PrintfHandler::create(const MultiDispatchInfo &multiDispatchInfo, ClDevice &device) { if (multiDispatchInfo.usesStatelessPrintfSurface()) { return new PrintfHandler(device); } auto mainKernel = multiDispatchInfo.peekMainKernel(); if ((mainKernel != nullptr) && mainKernel->checkIfIsParentKernelAndBlocksUsesPrintf()) { return new PrintfHandler(device); } return nullptr; } void PrintfHandler::prepareDispatch(const MultiDispatchInfo &multiDispatchInfo) { auto printfSurfaceSize = device.getSharedDeviceInfo().printfBufferSize; if (printfSurfaceSize == 0) { return; } kernel = multiDispatchInfo.peekMainKernel(); printfSurface = device.getMemoryManager()->allocateGraphicsMemoryWithProperties({device.getRootDeviceIndex(), printfSurfaceSize, GraphicsAllocation::AllocationType::PRINTF_SURFACE}); *reinterpret_cast(printfSurface->getUnderlyingBuffer()) = printfSurfaceInitialDataSize; auto printfPatchAddress = ptrOffset(reinterpret_cast(kernel->getCrossThreadData()), kernel->getKernelInfo().patchInfo.pAllocateStatelessPrintfSurface->DataParamOffset); patchWithRequiredSize(printfPatchAddress, kernel->getKernelInfo().patchInfo.pAllocateStatelessPrintfSurface->DataParamSize, (uintptr_t)printfSurface->getGpuAddressToPatch()); if (kernel->requiresSshForBuffers()) { auto surfaceState = ptrOffset(reinterpret_cast(kernel->getSurfaceStateHeap()), kernel->getKernelInfo().patchInfo.pAllocateStatelessPrintfSurface->SurfaceStateHeapOffset); void *addressToPatch = printfSurface->getUnderlyingBuffer(); size_t sizeToPatch = printfSurface->getUnderlyingBufferSize(); Buffer::setSurfaceState(&device.getDevice(), surfaceState, sizeToPatch, addressToPatch, 0, printfSurface, 0, 0); } } void PrintfHandler::makeResident(CommandStreamReceiver &commandStreamReceiver) { commandStreamReceiver.makeResident(*printfSurface); } void PrintfHandler::printEnqueueOutput() { PrintFormatter printFormatter(reinterpret_cast(printfSurface->getUnderlyingBuffer()), static_cast(printfSurface->getUnderlyingBufferSize()), kernel->is32Bit(), kernel->getKernelInfo().patchInfo.stringDataMap); printFormatter.printKernelOutput(); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/program/printf_handler.h000066400000000000000000000016351363734646600247610ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "opencl/source/kernel/kernel.h" namespace NEO { class ClDevice; struct MultiDispatchInfo; class PrintfHandler { public: static PrintfHandler *create(const MultiDispatchInfo &multiDispatchInfo, ClDevice &deviceArg); ~PrintfHandler(); void prepareDispatch(const MultiDispatchInfo &multiDispatchInfo); void makeResident(CommandStreamReceiver &commandStreamReceiver); void printEnqueueOutput(); GraphicsAllocation *getSurface() { return printfSurface; } protected: PrintfHandler(ClDevice &device); static const uint32_t printfSurfaceInitialDataSize = sizeof(uint32_t); ClDevice &device; Kernel *kernel = nullptr; GraphicsAllocation *printfSurface = nullptr; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/program/process_device_binary.cpp000066400000000000000000000255361363734646600266640ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device_binary_format/device_binary_formats.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/program/program_info.h" #include "shared/source/program/program_initialization.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/program/kernel_info.h" #include "opencl/source/program/program.h" #include "program_debug_data.h" #include using namespace iOpenCL; namespace NEO { extern bool familyEnabled[]; const KernelInfo *Program::getKernelInfo( const char *kernelName) const { if (kernelName == nullptr) { return nullptr; } auto it = std::find_if(kernelInfoArray.begin(), kernelInfoArray.end(), [=](const KernelInfo *kInfo) { return (0 == strcmp(kInfo->name.c_str(), kernelName)); }); return (it != kernelInfoArray.end()) ? *it : nullptr; } size_t Program::getNumKernels() const { return kernelInfoArray.size(); } const KernelInfo *Program::getKernelInfo(size_t ordinal) const { DEBUG_BREAK_IF(ordinal >= kernelInfoArray.size()); return kernelInfoArray[ordinal]; } cl_int Program::linkBinary() { if (linkerInput == nullptr) { return CL_SUCCESS; } Linker linker(*linkerInput); Linker::SegmentInfo globals; Linker::SegmentInfo constants; Linker::SegmentInfo exportedFunctions; Linker::PatchableSegment globalsForPatching; Linker::PatchableSegment constantsForPatching; if (this->globalSurface != nullptr) { globals.gpuAddress = static_cast(this->globalSurface->getGpuAddress()); globals.segmentSize = this->globalSurface->getUnderlyingBufferSize(); globalsForPatching.hostPointer = this->globalSurface->getUnderlyingBuffer(); globalsForPatching.segmentSize = this->globalSurface->getUnderlyingBufferSize(); } if (this->constantSurface != nullptr) { constants.gpuAddress = static_cast(this->constantSurface->getGpuAddress()); constants.segmentSize = this->constantSurface->getUnderlyingBufferSize(); constantsForPatching.hostPointer = this->constantSurface->getUnderlyingBuffer(); constantsForPatching.segmentSize = this->constantSurface->getUnderlyingBufferSize(); } if (this->linkerInput->getExportedFunctionsSegmentId() >= 0) { // Exported functions reside in instruction heap of one of kernels auto exportedFunctionHeapId = this->linkerInput->getExportedFunctionsSegmentId(); this->exportedFunctionsSurface = this->kernelInfoArray[exportedFunctionHeapId]->getGraphicsAllocation(); exportedFunctions.gpuAddress = static_cast(exportedFunctionsSurface->getGpuAddressToPatch()); exportedFunctions.segmentSize = exportedFunctionsSurface->getUnderlyingBufferSize(); } Linker::PatchableSegments isaSegmentsForPatching; std::vector> patchedIsaTempStorage; if (linkerInput->getTraits().requiresPatchingOfInstructionSegments) { patchedIsaTempStorage.reserve(this->kernelInfoArray.size()); for (const auto &kernelInfo : this->kernelInfoArray) { auto &kernHeapInfo = kernelInfo->heapInfo; const char *originalIsa = reinterpret_cast(kernHeapInfo.pKernelHeap); patchedIsaTempStorage.push_back(std::vector(originalIsa, originalIsa + kernHeapInfo.pKernelHeader->KernelHeapSize)); isaSegmentsForPatching.push_back(Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), kernHeapInfo.pKernelHeader->KernelHeapSize}); } } Linker::UnresolvedExternals unresolvedExternalsInfo; bool linkSuccess = linker.link(globals, constants, exportedFunctions, globalsForPatching, constantsForPatching, isaSegmentsForPatching, unresolvedExternalsInfo); this->symbols = linker.extractRelocatedSymbols(); if (false == linkSuccess) { std::vector kernelNames; for (const auto &kernelInfo : this->kernelInfoArray) { kernelNames.push_back("kernel : " + kernelInfo->name); } auto error = constructLinkerErrorMessage(unresolvedExternalsInfo, kernelNames); updateBuildLog(pDevice, error.c_str(), error.size()); return CL_INVALID_BINARY; } else if (linkerInput->getTraits().requiresPatchingOfInstructionSegments) { for (const auto &kernelInfo : this->kernelInfoArray) { if (nullptr == kernelInfo->getGraphicsAllocation()) { continue; } auto &kernHeapInfo = kernelInfo->heapInfo; auto segmentId = &kernelInfo - &this->kernelInfoArray[0]; this->pDevice->getMemoryManager()->copyMemoryToAllocation(kernelInfo->getGraphicsAllocation(), isaSegmentsForPatching[segmentId].hostPointer, kernHeapInfo.pKernelHeader->KernelHeapSize); } } return CL_SUCCESS; } cl_int Program::processGenBinary() { if (nullptr == this->unpackedDeviceBinary) { return CL_INVALID_BINARY; } cleanCurrentKernelInfo(); if (this->constantSurface || this->globalSurface) { pDevice->getMemoryManager()->freeGraphicsMemory(this->constantSurface); pDevice->getMemoryManager()->freeGraphicsMemory(this->globalSurface); this->constantSurface = nullptr; this->globalSurface = nullptr; } ProgramInfo programInfo; auto blob = ArrayRef(reinterpret_cast(this->unpackedDeviceBinary.get()), this->unpackedDeviceBinarySize); SingleDeviceBinary binary = {}; binary.deviceBinary = blob; std::string decodeErrors; std::string decodeWarnings; DecodeError decodeError; DeviceBinaryFormat singleDeviceBinaryFormat; std::tie(decodeError, singleDeviceBinaryFormat) = NEO::decodeSingleDeviceBinary(programInfo, binary, decodeErrors, decodeWarnings); if (decodeWarnings.empty() == false) { printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", decodeWarnings.c_str()); } if (DecodeError::Success != decodeError) { printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", decodeErrors.c_str()); return CL_INVALID_BINARY; } return this->processProgramInfo(programInfo); } cl_int Program::processProgramInfo(ProgramInfo &src) { size_t slmNeeded = getMaxInlineSlmNeeded(src); size_t slmAvailable = 0U; NEO::DeviceInfoKernelPayloadConstants deviceInfoConstants; if (this->pDevice) { slmAvailable = static_cast(this->pDevice->getDeviceInfo().localMemSize); deviceInfoConstants.maxWorkGroupSize = (uint32_t)this->pDevice->getDeviceInfo().maxWorkGroupSize; deviceInfoConstants.computeUnitsUsedForScratch = this->pDevice->getDeviceInfo().computeUnitsUsedForScratch; deviceInfoConstants.slmWindowSize = (uint32_t)this->pDevice->getDeviceInfo().localMemSize; if (requiresLocalMemoryWindowVA(src)) { deviceInfoConstants.slmWindow = this->executionEnvironment.memoryManager->getReservedMemory(MemoryConstants::slmWindowSize, MemoryConstants::slmWindowAlignment); } } if (slmNeeded > slmAvailable) { return CL_OUT_OF_RESOURCES; } this->linkerInput = std::move(src.linkerInput); this->kernelInfoArray = std::move(src.kernelInfos); auto svmAllocsManager = context ? context->getSVMAllocsManager() : nullptr; if (src.globalConstants.size != 0) { UNRECOVERABLE_IF(nullptr == pDevice); this->constantSurface = allocateGlobalsSurface(svmAllocsManager, *pDevice, src.globalConstants.size, true, linkerInput.get(), src.globalConstants.initData); } if (src.globalVariables.size != 0) { UNRECOVERABLE_IF(nullptr == pDevice); this->globalSurface = allocateGlobalsSurface(svmAllocsManager, *pDevice, src.globalVariables.size, false, linkerInput.get(), src.globalVariables.initData); } this->globalVarTotalSize = src.globalVariables.size; for (auto &kernelInfo : this->kernelInfoArray) { cl_int retVal = CL_SUCCESS; if (kernelInfo->heapInfo.pKernelHeader->KernelHeapSize && this->pDevice) { retVal = kernelInfo->createKernelAllocation(this->pDevice->getRootDeviceIndex(), this->pDevice->getMemoryManager()) ? CL_SUCCESS : CL_OUT_OF_HOST_MEMORY; } DEBUG_BREAK_IF(kernelInfo->heapInfo.pKernelHeader->KernelHeapSize && !this->pDevice); if (retVal != CL_SUCCESS) { return retVal; } if (kernelInfo->hasDeviceEnqueue()) { parentKernelInfoArray.push_back(kernelInfo); } if (kernelInfo->requiresSubgroupIndependentForwardProgress()) { subgroupKernelInfoArray.push_back(kernelInfo); } kernelInfo->apply(deviceInfoConstants); } return linkBinary(); } void Program::processDebugData() { if (debugData != nullptr) { SProgramDebugDataHeaderIGC *programDebugHeader = reinterpret_cast(debugData.get()); DEBUG_BREAK_IF(programDebugHeader->NumberOfKernels != kernelInfoArray.size()); const SKernelDebugDataHeaderIGC *kernelDebugHeader = reinterpret_cast(ptrOffset(programDebugHeader, sizeof(SProgramDebugDataHeaderIGC))); const char *kernelName = nullptr; const char *kernelDebugData = nullptr; for (uint32_t i = 0; i < programDebugHeader->NumberOfKernels; i++) { kernelName = reinterpret_cast(ptrOffset(kernelDebugHeader, sizeof(SKernelDebugDataHeaderIGC))); auto kernelInfo = kernelInfoArray[i]; UNRECOVERABLE_IF(kernelInfo->name.compare(0, kernelInfo->name.size(), kernelName) != 0); kernelDebugData = ptrOffset(kernelName, kernelDebugHeader->KernelNameSize); kernelInfo->debugData.vIsa = kernelDebugData; kernelInfo->debugData.genIsa = ptrOffset(kernelDebugData, kernelDebugHeader->SizeVisaDbgInBytes); kernelInfo->debugData.vIsaSize = kernelDebugHeader->SizeVisaDbgInBytes; kernelInfo->debugData.genIsaSize = kernelDebugHeader->SizeGenIsaDbgInBytes; kernelDebugData = ptrOffset(kernelDebugData, kernelDebugHeader->SizeVisaDbgInBytes + kernelDebugHeader->SizeGenIsaDbgInBytes); kernelDebugHeader = reinterpret_cast(kernelDebugData); } } } } // namespace NEO compute-runtime-20.13.16352/opencl/source/program/process_intermediate_binary.cpp000066400000000000000000000007531363734646600300710ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "program.h" namespace NEO { cl_int Program::processSpirBinary( const void *pBinary, size_t binarySize, bool isSpirV) { programBinaryType = CL_PROGRAM_BINARY_TYPE_INTERMEDIATE; this->irBinary = makeCopy(pBinary, binarySize); this->irBinarySize = binarySize; buildStatus = CL_BUILD_NONE; this->isSpirV = isSpirV; return CL_SUCCESS; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/program/program.cpp000066400000000000000000000475621363734646600237750ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "program.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/compiler_interface/intermediate_representations.h" #include "shared/source/device_binary_format/device_binary_formats.h" #include "shared/source/device_binary_format/elf/elf_encoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/os_interface/os_context.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/platform/platform.h" #include "opencl/source/program/block_kernel_manager.h" #include "opencl/source/program/kernel_info.h" #include "compiler_options.h" #include namespace NEO { const std::string Program::clOptNameClVer("-cl-std=CL"); Program::Program(ExecutionEnvironment &executionEnvironment) : Program(executionEnvironment, nullptr, false, nullptr) { numDevices = 0; } Program::Program(ExecutionEnvironment &executionEnvironment, Context *context, bool isBuiltIn, Device *device) : executionEnvironment(executionEnvironment), context(context), pDevice(device), isBuiltIn(isBuiltIn) { if (this->context && !this->isBuiltIn) { this->context->incRefInternal(); } blockKernelManager = new BlockKernelManager(); ClDevice *pClDevice = nullptr; if (context != nullptr) { pClDevice = context->getDevice(0); if (pDevice == nullptr) { pDevice = &pClDevice->getDevice(); } } else if (pDevice != nullptr) { auto pSpecializedDevice = castToObject(pDevice->getSpecializedDevice()); if (pSpecializedDevice != nullptr) { pClDevice = pSpecializedDevice; } } numDevices = 1; char paramValue[32] = {}; bool force32BitAddressess = false; if (pClDevice) { pClDevice->getDeviceInfo(CL_DEVICE_VERSION, 32, paramValue, nullptr); if (strstr(paramValue, "2.1")) { internalOptions = "-ocl-version=210 "; } else if (strstr(paramValue, "2.0")) { internalOptions = "-ocl-version=200 "; } else if (strstr(paramValue, "1.2")) { internalOptions = "-ocl-version=120 "; } force32BitAddressess = pClDevice->getSharedDeviceInfo().force32BitAddressess; if (force32BitAddressess) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::arch32bit); } if (pClDevice->areSharedSystemAllocationsAllowed() || DebugManager.flags.DisableStatelessToStatefulOptimization.get()) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::greaterThan4gbBuffersRequired); } if (DebugManager.flags.UseBindlessBuffers.get()) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::bindlessBuffers); } if (DebugManager.flags.UseBindlessImages.get()) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::bindlessImages); } kernelDebugEnabled = pClDevice->isDebuggerActive(); auto enableStatelessToStatefullWithOffset = pClDevice->getHardwareCapabilities().isStatelesToStatefullWithOffsetSupported; if (DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.get() != -1) { enableStatelessToStatefullWithOffset = DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.get() != 0; } if (enableStatelessToStatefullWithOffset) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::hasBufferOffsetArg); } auto &hwHelper = HwHelper::get(pClDevice->getHardwareInfo().platform.eRenderCoreFamily); if (hwHelper.isForceEmuInt32DivRemSPWARequired(pClDevice->getHardwareInfo())) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::forceEmuInt32DivRemSP); } } CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::preserveVec3Type); } Program::~Program() { cleanCurrentKernelInfo(); freeBlockResources(); delete blockKernelManager; if (constantSurface) { if ((nullptr != context) && (nullptr != context->getSVMAllocsManager()) && (context->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(constantSurface->getGpuAddress())))) { context->getSVMAllocsManager()->freeSVMAlloc(reinterpret_cast(constantSurface->getGpuAddress())); } else { this->executionEnvironment.memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(constantSurface); } constantSurface = nullptr; } if (globalSurface) { if ((nullptr != context) && (nullptr != context->getSVMAllocsManager()) && (context->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(globalSurface->getGpuAddress())))) { context->getSVMAllocsManager()->freeSVMAlloc(reinterpret_cast(globalSurface->getGpuAddress())); } else { this->executionEnvironment.memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(globalSurface); } globalSurface = nullptr; } if (context && !isBuiltIn) { context->decRefInternal(); } } cl_int Program::createProgramFromBinary( const void *pBinary, size_t binarySize) { cl_int retVal = CL_INVALID_BINARY; this->irBinary.reset(); this->irBinarySize = 0U; this->isSpirV = false; this->unpackedDeviceBinary.reset(); this->unpackedDeviceBinarySize = 0U; this->packedDeviceBinary.reset(); this->packedDeviceBinarySize = 0U; this->createdFrom = CreatedFrom::BINARY; ArrayRef archive(reinterpret_cast(pBinary), binarySize); bool isSpirV = NEO::isSpirVBitcode(archive); if (isSpirV || NEO::isLlvmBitcode(archive)) { this->programBinaryType = CL_PROGRAM_BINARY_TYPE_INTERMEDIATE; retVal = processSpirBinary(archive.begin(), archive.size(), isSpirV); } else if (isAnyDeviceBinaryFormat(archive)) { this->programBinaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE; this->isCreatedFromBinary = true; auto productAbbreviation = hardwarePrefix[pDevice->getHardwareInfo().platform.eProductFamily]; TargetDevice targetDevice = {}; targetDevice.coreFamily = pDevice->getHardwareInfo().platform.eRenderCoreFamily; targetDevice.stepping = pDevice->getHardwareInfo().platform.usRevId; targetDevice.maxPointerSizeInBytes = sizeof(uintptr_t); std::string decodeErrors; std::string decodeWarnings; auto singleDeviceBinary = unpackSingleDeviceBinary(archive, ConstStringRef(productAbbreviation, strlen(productAbbreviation)), targetDevice, decodeErrors, decodeWarnings); if (decodeWarnings.empty() == false) { printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", decodeWarnings.c_str()); } if (singleDeviceBinary.intermediateRepresentation.empty() && singleDeviceBinary.deviceBinary.empty()) { retVal = CL_INVALID_BINARY; printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", decodeErrors.c_str()); } else { retVal = CL_SUCCESS; this->irBinary = makeCopy(reinterpret_cast(singleDeviceBinary.intermediateRepresentation.begin()), singleDeviceBinary.intermediateRepresentation.size()); this->irBinarySize = singleDeviceBinary.intermediateRepresentation.size(); this->isSpirV = NEO::isSpirVBitcode(ArrayRef(reinterpret_cast(this->irBinary.get()), this->irBinarySize)); this->options = singleDeviceBinary.buildOptions.str(); if ((false == singleDeviceBinary.deviceBinary.empty()) && (false == DebugManager.flags.RebuildPrecompiledKernels.get())) { this->unpackedDeviceBinary = makeCopy(reinterpret_cast(singleDeviceBinary.deviceBinary.begin()), singleDeviceBinary.deviceBinary.size()); this->unpackedDeviceBinarySize = singleDeviceBinary.deviceBinary.size(); this->packedDeviceBinary = makeCopy(reinterpret_cast(archive.begin()), archive.size()); this->packedDeviceBinarySize = archive.size(); } else { this->isCreatedFromBinary = false; } switch (singleDeviceBinary.format) { default: break; case DeviceBinaryFormat::OclLibrary: this->programBinaryType = CL_PROGRAM_BINARY_TYPE_LIBRARY; break; case DeviceBinaryFormat::OclCompiledObject: this->programBinaryType = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; break; } } } return retVal; } cl_int Program::setProgramSpecializationConstant(cl_uint specId, size_t specSize, const void *specValue) { if (!isSpirV) { return CL_INVALID_PROGRAM; } static std::mutex mutex; std::lock_guard lock(mutex); if (!areSpecializationConstantsInitialized) { auto pCompilerInterface = this->pDevice->getCompilerInterface(); if (nullptr == pCompilerInterface) { return CL_OUT_OF_HOST_MEMORY; } SpecConstantInfo specConstInfo; auto retVal = pCompilerInterface->getSpecConstantsInfo(this->getDevice(), ArrayRef(irBinary.get(), irBinarySize), specConstInfo); if (retVal != TranslationOutput::ErrorCode::Success) { return CL_INVALID_VALUE; } this->specConstantsIds.reset(specConstInfo.idsBuffer.release()); this->specConstantsSizes.reset(specConstInfo.sizesBuffer.release()); areSpecializationConstantsInitialized = true; } return updateSpecializationConstant(specId, specSize, specValue); } cl_int Program::updateSpecializationConstant(cl_uint specId, size_t specSize, const void *specValue) { for (uint32_t i = 0; i < specConstantsIds->GetSize(); i++) { if (specConstantsIds->GetMemory()[i] == specId) { if (specConstantsSizes->GetMemory()[i] == static_cast(specSize)) { uint64_t specConstValue = 0u; memcpy_s(&specConstValue, sizeof(uint64_t), specValue, specSize); specConstantsValues[specId] = specConstValue; return CL_SUCCESS; } else { return CL_INVALID_VALUE; } } } return CL_INVALID_SPEC_ID; } void Program::setDevice(Device *device) { this->pDevice = device; } cl_int Program::getSource(std::string &binary) const { cl_int retVal = CL_INVALID_PROGRAM; binary = {}; if (!sourceCode.empty()) { binary = sourceCode; retVal = CL_SUCCESS; } return retVal; } void Program::updateBuildLog(const Device *pDevice, const char *pErrorString, size_t errorStringSize) { if ((pErrorString == nullptr) || (errorStringSize == 0) || (pErrorString[0] == '\0')) { return; } if (pErrorString[errorStringSize - 1] == '\0') { --errorStringSize; } auto it = buildLog.find(pDevice); if (it == buildLog.end()) { buildLog[pDevice].assign(pErrorString, pErrorString + errorStringSize); return; } buildLog[pDevice].append("\n"); buildLog[pDevice].append(pErrorString, pErrorString + errorStringSize); } const char *Program::getBuildLog(const Device *pDevice) const { const char *entry = nullptr; auto it = buildLog.find(pDevice); if (it != buildLog.end()) { entry = it->second.c_str(); } return entry; } void Program::separateBlockKernels() { if ((0 == parentKernelInfoArray.size()) && (0 == subgroupKernelInfoArray.size())) { return; } auto allKernelInfos(kernelInfoArray); kernelInfoArray.clear(); for (auto &i : allKernelInfos) { auto end = i->name.rfind("_dispatch_"); if (end != std::string::npos) { bool baseKernelFound = false; std::string baseKernelName(i->name, 0, end); for (auto &j : parentKernelInfoArray) { if (j->name.compare(baseKernelName) == 0) { baseKernelFound = true; break; } } if (!baseKernelFound) { for (auto &j : subgroupKernelInfoArray) { if (j->name.compare(baseKernelName) == 0) { baseKernelFound = true; break; } } } if (baseKernelFound) { //Parent or subgroup kernel found -> child kernel blockKernelManager->addBlockKernelInfo(i); } else { kernelInfoArray.push_back(i); } } else { //Regular kernel found kernelInfoArray.push_back(i); } } allKernelInfos.clear(); } void Program::allocateBlockPrivateSurfaces(uint32_t rootDeviceIndex) { size_t blockCount = blockKernelManager->getCount(); for (uint32_t i = 0; i < blockCount; i++) { const KernelInfo *info = blockKernelManager->getBlockKernelInfo(i); if (info->patchInfo.pAllocateStatelessPrivateSurface) { size_t privateSize = info->patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize; if (privateSize > 0 && blockKernelManager->getPrivateSurface(i) == nullptr) { privateSize *= getDevice().getDeviceInfo().computeUnitsUsedForScratch * info->getMaxSimdSize(); auto *privateSurface = this->executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties({rootDeviceIndex, privateSize, GraphicsAllocation::AllocationType::PRIVATE_SURFACE}); blockKernelManager->pushPrivateSurface(privateSurface, i); } } } } void Program::freeBlockResources() { size_t blockCount = blockKernelManager->getCount(); for (uint32_t i = 0; i < blockCount; i++) { auto *privateSurface = blockKernelManager->getPrivateSurface(i); if (privateSurface != nullptr) { blockKernelManager->pushPrivateSurface(nullptr, i); this->executionEnvironment.memoryManager->freeGraphicsMemory(privateSurface); } auto kernelInfo = blockKernelManager->getBlockKernelInfo(i); DEBUG_BREAK_IF(!kernelInfo->kernelAllocation); if (kernelInfo->kernelAllocation) { this->executionEnvironment.memoryManager->freeGraphicsMemory(kernelInfo->kernelAllocation); } } } void Program::cleanCurrentKernelInfo() { for (auto &kernelInfo : kernelInfoArray) { if (kernelInfo->kernelAllocation) { //register cache flush in all csrs where kernel allocation was used for (auto &engine : this->executionEnvironment.memoryManager->getRegisteredEngines()) { auto contextId = engine.osContext->getContextId(); if (kernelInfo->kernelAllocation->isUsedByOsContext(contextId)) { engine.commandStreamReceiver->registerInstructionCacheFlush(); } } this->executionEnvironment.memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(kernelInfo->kernelAllocation); } delete kernelInfo; } kernelInfoArray.clear(); } void Program::updateNonUniformFlag() { //Look for -cl-std=CL substring and extract value behind which can be 1.2 2.0 2.1 and convert to value auto pos = options.find(clOptNameClVer); if (pos == std::string::npos) { programOptionVersion = 12u; //Default is 1.2 } else { std::stringstream ss{options.c_str() + pos + clOptNameClVer.size()}; uint32_t majorV = 0u, minorV = 0u; char dot = 0u; ss >> majorV; ss >> dot; ss >> minorV; programOptionVersion = majorV * 10u + minorV; } if (programOptionVersion >= 20u && (false == CompilerOptions::contains(options, CompilerOptions::uniformWorkgroupSize))) { allowNonUniform = true; } } void Program::updateNonUniformFlag(const Program **inputPrograms, size_t numInputPrograms) { bool allowNonUniform = true; for (cl_uint i = 0; i < numInputPrograms; i++) { allowNonUniform = allowNonUniform && inputPrograms[i]->getAllowNonUniform(); } this->allowNonUniform = allowNonUniform; } void Program::replaceDeviceBinary(std::unique_ptr newBinary, size_t newBinarySize) { if (isAnyPackedDeviceBinaryFormat(ArrayRef(reinterpret_cast(newBinary.get()), newBinarySize))) { this->packedDeviceBinary = std::move(newBinary); this->packedDeviceBinarySize = newBinarySize; this->unpackedDeviceBinary.reset(); this->unpackedDeviceBinarySize = 0U; } else { this->packedDeviceBinary.reset(); this->packedDeviceBinarySize = 0U; this->unpackedDeviceBinary = std::move(newBinary); this->unpackedDeviceBinarySize = newBinarySize; } } cl_int Program::packDeviceBinary() { if (nullptr != packedDeviceBinary) { return CL_SUCCESS; } auto gfxCore = pDevice->getHardwareInfo().platform.eRenderCoreFamily; auto stepping = pDevice->getHardwareInfo().platform.usRevId; if (nullptr != this->unpackedDeviceBinary.get()) { SingleDeviceBinary singleDeviceBinary; singleDeviceBinary.buildOptions = this->options; singleDeviceBinary.targetDevice.coreFamily = gfxCore; singleDeviceBinary.targetDevice.stepping = stepping; singleDeviceBinary.deviceBinary = ArrayRef(reinterpret_cast(this->unpackedDeviceBinary.get()), this->unpackedDeviceBinarySize); singleDeviceBinary.intermediateRepresentation = ArrayRef(reinterpret_cast(this->irBinary.get()), this->irBinarySize); std::string packWarnings; std::string packErrors; auto packedDeviceBinary = NEO::packDeviceBinary(singleDeviceBinary, packErrors, packWarnings); if (packedDeviceBinary.empty()) { DEBUG_BREAK_IF(true); return CL_OUT_OF_HOST_MEMORY; } this->packedDeviceBinary = makeCopy(packedDeviceBinary.data(), packedDeviceBinary.size()); this->packedDeviceBinarySize = packedDeviceBinary.size(); } else if (nullptr != this->irBinary.get()) { NEO::Elf::ElfEncoder<> elfEncoder(true, true, 1U); if (this->programBinaryType == CL_PROGRAM_BINARY_TYPE_LIBRARY) { elfEncoder.getElfFileHeader().type = NEO::Elf::ET_OPENCL_LIBRARY; } else { elfEncoder.getElfFileHeader().type = NEO::Elf::ET_OPENCL_OBJECTS; } elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_SPIRV, NEO::Elf::SectionNamesOpenCl::spirvObject, ArrayRef::fromAny(this->irBinary.get(), this->irBinarySize)); elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_OPTIONS, NEO::Elf::SectionNamesOpenCl::buildOptions, this->options); auto elfData = elfEncoder.encode(); this->packedDeviceBinary = makeCopy(elfData.data(), elfData.size()); this->packedDeviceBinarySize = elfData.size(); } else { return CL_INVALID_PROGRAM; } return CL_SUCCESS; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/program/program.h000066400000000000000000000241361363734646600234320ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/compiler_interface/linker.h" #include "shared/source/device_binary_format/elf/elf_encoder.h" #include "shared/source/program/program_info.h" #include "shared/source/utilities/const_stringref.h" #include "opencl/source/api/cl_types.h" #include "opencl/source/helpers/base_object.h" #include "cif/builtins/memory/buffer/buffer.h" #include "patch_list.h" #include #include namespace NEO { namespace PatchTokenBinary { struct ProgramFromPatchtokens; } class BlockKernelManager; class BuiltinDispatchInfoBuilder; class ClDevice; class Context; class CompilerInterface; class Device; class ExecutionEnvironment; struct KernelInfo; template <> struct OpenCLObjectMapper<_cl_program> { typedef class Program DerivedType; }; constexpr cl_int asClError(TranslationOutput::ErrorCode err) { switch (err) { default: return CL_OUT_OF_HOST_MEMORY; case TranslationOutput::ErrorCode::Success: return CL_SUCCESS; case TranslationOutput::ErrorCode::CompilerNotAvailable: return CL_COMPILER_NOT_AVAILABLE; case TranslationOutput::ErrorCode::CompilationFailure: return CL_COMPILE_PROGRAM_FAILURE; case TranslationOutput::ErrorCode::BuildFailure: return CL_BUILD_PROGRAM_FAILURE; case TranslationOutput::ErrorCode::LinkFailure: return CL_LINK_PROGRAM_FAILURE; } } class Program : public BaseObject<_cl_program> { public: static const cl_ulong objectMagic = 0x5651C89100AAACFELL; enum class CreatedFrom { SOURCE, IL, BINARY, UNKNOWN }; // Create program from binary template static T *create( cl_context context, cl_uint numDevices, const cl_device_id *deviceList, const size_t *lengths, const unsigned char **binaries, cl_int *binaryStatus, cl_int &errcodeRet); // Create program from source template static T *create( cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int &errcodeRet); template static T *create( const char *nullTerminatedString, Context *context, ClDevice &device, bool isBuiltIn, cl_int *errcodeRet); template static T *create( const char *nullTerminatedString, Context *context, Device &device, bool isBuiltIn, cl_int *errcodeRet); template static T *createFromGenBinary( ExecutionEnvironment &executionEnvironment, Context *context, const void *binary, size_t size, bool isBuiltIn, cl_int *errcodeRet, Device *device); template static T *createFromIL(Context *context, const void *il, size_t length, cl_int &errcodeRet); Program(ExecutionEnvironment &executionEnvironment, Context *context, bool isBuiltIn, Device *device); ~Program() override; Program(const Program &) = delete; Program &operator=(const Program &) = delete; cl_int build(cl_uint numDevices, const cl_device_id *deviceList, const char *buildOptions, void(CL_CALLBACK *funcNotify)(cl_program program, void *userData), void *userData, bool enableCaching); cl_int build(const Device *pDevice, const char *buildOptions, bool enableCaching, std::unordered_map &builtinsMap); MOCKABLE_VIRTUAL cl_int processGenBinary(); MOCKABLE_VIRTUAL cl_int processProgramInfo(ProgramInfo &dst); cl_int compile(cl_uint numDevices, const cl_device_id *deviceList, const char *buildOptions, cl_uint numInputHeaders, const cl_program *inputHeaders, const char **headerIncludeNames, void(CL_CALLBACK *funcNotify)(cl_program program, void *userData), void *userData); cl_int link(cl_uint numDevices, const cl_device_id *deviceList, const char *buildOptions, cl_uint numInputPrograms, const cl_program *inputPrograms, void(CL_CALLBACK *funcNotify)(cl_program program, void *userData), void *userData); cl_int setProgramSpecializationConstant(cl_uint specId, size_t specSize, const void *specValue); MOCKABLE_VIRTUAL cl_int updateSpecializationConstant(cl_uint specId, size_t specSize, const void *specValue); size_t getNumKernels() const; const KernelInfo *getKernelInfo(const char *kernelName) const; const KernelInfo *getKernelInfo(size_t ordinal) const; cl_int getInfo(cl_program_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); cl_int getBuildInfo(cl_device_id device, cl_program_build_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const; cl_build_status getBuildStatus() const { return buildStatus; } Context &getContext() const { return *context; } Context *getContextPtr() const { return context; } ExecutionEnvironment &peekExecutionEnvironment() const { return executionEnvironment; } const Device &getDevice() const { UNRECOVERABLE_IF(pDevice == nullptr); return *pDevice; } void setDevice(Device *device); cl_int processSpirBinary(const void *pBinary, size_t binarySize, bool isSpirV); cl_int getSource(std::string &binary) const; void processDebugData(); void updateBuildLog(const Device *pDevice, const char *pErrorString, const size_t errorStringSize); const char *getBuildLog(const Device *pDevice) const; cl_uint getProgramBinaryType() const { return programBinaryType; } bool getIsSpirV() const { return isSpirV; } GraphicsAllocation *getConstantSurface() const { return constantSurface; } GraphicsAllocation *getGlobalSurface() const { return globalSurface; } GraphicsAllocation *getExportedFunctionsSurface() const { return exportedFunctionsSurface; } BlockKernelManager *getBlockKernelManager() const { return blockKernelManager; } void allocateBlockPrivateSurfaces(uint32_t rootDeviceIndex); void freeBlockResources(); void cleanCurrentKernelInfo(); const std::string &getOptions() const { return options; } const std::string &getInternalOptions() const { return internalOptions; } bool getAllowNonUniform() const { return allowNonUniform; } bool getIsBuiltIn() const { return isBuiltIn; } uint32_t getProgramOptionVersion() const { return programOptionVersion; } void enableKernelDebug() { kernelDebugEnabled = true; } bool isKernelDebugEnabled() { return kernelDebugEnabled; } char *getDebugData() { return debugData.get(); } size_t getDebugDataSize() { return debugDataSize; } const Linker::RelocatedSymbolsMap &getSymbols() const { return this->symbols; } LinkerInput *getLinkerInput() const { return this->linkerInput.get(); } MOCKABLE_VIRTUAL void replaceDeviceBinary(std::unique_ptr newBinary, size_t newBinarySize); protected: Program(ExecutionEnvironment &executionEnvironment); MOCKABLE_VIRTUAL cl_int createProgramFromBinary(const void *pBinary, size_t binarySize); cl_int packDeviceBinary(); MOCKABLE_VIRTUAL cl_int linkBinary(); void separateBlockKernels(); void updateNonUniformFlag(); void updateNonUniformFlag(const Program **inputProgram, size_t numInputPrograms); void extractInternalOptions(const std::string &options); MOCKABLE_VIRTUAL bool isFlagOption(ConstStringRef option); MOCKABLE_VIRTUAL bool isOptionValueValid(ConstStringRef option, ConstStringRef value); MOCKABLE_VIRTUAL void applyAdditionalOptions(); MOCKABLE_VIRTUAL bool appendKernelDebugOptions(); void notifyDebuggerWithSourceCode(std::string &filename); static const std::string clOptNameClVer; cl_program_binary_type programBinaryType = CL_PROGRAM_BINARY_TYPE_NONE; bool isSpirV = false; std::unique_ptr irBinary; size_t irBinarySize = 0U; std::unique_ptr unpackedDeviceBinary; size_t unpackedDeviceBinarySize = 0U; std::unique_ptr packedDeviceBinary; size_t packedDeviceBinarySize = 0U; std::unique_ptr debugData; size_t debugDataSize = 0U; CreatedFrom createdFrom = CreatedFrom::UNKNOWN; std::vector kernelInfoArray; std::vector parentKernelInfoArray; std::vector subgroupKernelInfoArray; GraphicsAllocation *constantSurface = nullptr; GraphicsAllocation *globalSurface = nullptr; GraphicsAllocation *exportedFunctionsSurface = nullptr; size_t globalVarTotalSize = 0U; cl_build_status buildStatus = CL_BUILD_NONE; bool isCreatedFromBinary = false; std::string sourceCode; std::string options; std::string internalOptions; static const std::vector internalOptionsToExtract; uint32_t programOptionVersion = 12U; bool allowNonUniform = false; std::unique_ptr linkerInput; Linker::RelocatedSymbolsMap symbols; std::map buildLog; bool areSpecializationConstantsInitialized = false; CIF::RAII::UPtr_t specConstantsIds; CIF::RAII::UPtr_t specConstantsSizes; specConstValuesMap specConstantsValues; BlockKernelManager *blockKernelManager = nullptr; ExecutionEnvironment &executionEnvironment; Context *context = nullptr; Device *pDevice = nullptr; cl_uint numDevices = 0U; bool isBuiltIn = false; bool kernelDebugEnabled = false; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/sampler/000077500000000000000000000000001363734646600216005ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/sampler/CMakeLists.txt000066400000000000000000000010511363734646600243350ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_SAMPLER ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.h ${CMAKE_CURRENT_SOURCE_DIR}/sampler.inl ${CMAKE_CURRENT_SOURCE_DIR}/sampler_factory_init.inl ${CMAKE_CURRENT_SOURCE_DIR}/sampler_tgllp_plus.inl ) add_subdirectories() target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_SAMPLER}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_SAMPLER ${RUNTIME_SRCS_SAMPLER}) compute-runtime-20.13.16352/opencl/source/sampler/sampler.cpp000066400000000000000000000163651363734646600237620ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sampler/sampler.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/hw_info.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/get_info_status_mapper.h" #include "patch_list.h" #include namespace NEO { SamplerCreateFunc samplerFactory[IGFX_MAX_CORE] = {}; getSamplerStateSizeHwFunc getSamplerStateSizeHw[IGFX_MAX_CORE] = {}; Sampler::Sampler(Context *context, cl_bool normalizedCoordinates, cl_addressing_mode addressingMode, cl_filter_mode filterMode, cl_filter_mode mipFilterMode, float lodMin, float lodMax) : context(context), normalizedCoordinates(normalizedCoordinates), addressingMode(addressingMode), filterMode(filterMode), mipFilterMode(mipFilterMode), lodMin(lodMin), lodMax(lodMax) { } Sampler::Sampler(Context *context, cl_bool normalizedCoordinates, cl_addressing_mode addressingMode, cl_filter_mode filterMode) : Sampler(context, normalizedCoordinates, addressingMode, filterMode, CL_FILTER_NEAREST, 0.0f, std::numeric_limits::max()) { } Sampler *Sampler::create(Context *context, cl_bool normalizedCoordinates, cl_addressing_mode addressingMode, cl_filter_mode filterMode, cl_filter_mode mipFilterMode, float lodMin, float lodMax, cl_int &errcodeRet) { errcodeRet = CL_SUCCESS; Sampler *sampler = nullptr; DEBUG_BREAK_IF(nullptr == context); const auto device = context->getDevice(0); const auto &hwInfo = device->getHardwareInfo(); auto funcCreate = samplerFactory[hwInfo.platform.eRenderCoreFamily]; DEBUG_BREAK_IF(nullptr == funcCreate); sampler = funcCreate(context, normalizedCoordinates, addressingMode, filterMode, mipFilterMode, lodMin, lodMax); if (sampler == nullptr) { errcodeRet = CL_OUT_OF_HOST_MEMORY; } return sampler; } size_t Sampler::getSamplerStateSize(const HardwareInfo &hwInfo) { return getSamplerStateSizeHw[hwInfo.platform.eRenderCoreFamily](); } template struct SetOnce { SetOnce(ParameterType defaultValue, ParameterType min, ParameterType max) : value(defaultValue), min(min), max(max) { } cl_int setValue(ParameterType property) { if (alreadySet) { return CL_INVALID_VALUE; } if ((property < min) || (property > max)) { return CL_INVALID_VALUE; } this->value = property; alreadySet = true; return CL_SUCCESS; } bool alreadySet = false; ParameterType value; ParameterType min; ParameterType max; }; Sampler *Sampler::create(Context *context, const cl_sampler_properties *samplerProperties, cl_int &errcodeRet) { SetOnce normalizedCoords(CL_TRUE, CL_FALSE, CL_TRUE); SetOnce filterMode(CL_FILTER_NEAREST, CL_FILTER_NEAREST, CL_FILTER_LINEAR); SetOnce addressingMode(CL_ADDRESS_CLAMP, CL_ADDRESS_NONE, CL_ADDRESS_MIRRORED_REPEAT); SetOnce mipFilterMode(CL_FILTER_NEAREST, CL_FILTER_NEAREST, CL_FILTER_LINEAR); SetOnce lodMin(0.0f, 0.0f, std::numeric_limits::max()); SetOnce lodMax(std::numeric_limits::max(), 0.0f, std::numeric_limits::max()); errcodeRet = CL_SUCCESS; if (samplerProperties) { cl_ulong samType; while ((samType = *samplerProperties) != 0) { ++samplerProperties; auto samValue = *samplerProperties; switch (samType) { case CL_SAMPLER_NORMALIZED_COORDS: errcodeRet = normalizedCoords.setValue(static_cast(samValue)); break; case CL_SAMPLER_ADDRESSING_MODE: errcodeRet = addressingMode.setValue(static_cast(samValue)); break; case CL_SAMPLER_FILTER_MODE: errcodeRet = filterMode.setValue(static_cast(samValue)); break; case CL_SAMPLER_MIP_FILTER_MODE: errcodeRet = mipFilterMode.setValue(static_cast(samValue)); break; case CL_SAMPLER_LOD_MIN: { SamplerLodProperty lodData; lodData.data = samValue; errcodeRet = lodMin.setValue(lodData.lod); break; } case CL_SAMPLER_LOD_MAX: { SamplerLodProperty lodData; lodData.data = samValue; errcodeRet = lodMax.setValue(lodData.lod); break; } default: errcodeRet = CL_INVALID_VALUE; break; } ++samplerProperties; } } Sampler *sampler = nullptr; if (errcodeRet == CL_SUCCESS) { sampler = create(context, normalizedCoords.value, addressingMode.value, filterMode.value, mipFilterMode.value, lodMin.value, lodMax.value, errcodeRet); } return sampler; } unsigned int Sampler::getSnapWaValue() const { if (filterMode == CL_FILTER_NEAREST && addressingMode == CL_ADDRESS_CLAMP) { return iOpenCL::CONSTANT_REGISTER_BOOLEAN_TRUE; } else { return iOpenCL::CONSTANT_REGISTER_BOOLEAN_FALSE; } } cl_int Sampler::getInfo(cl_sampler_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { cl_int retVal; size_t valueSize = 0; const void *pValue = nullptr; cl_uint refCount = 0; switch (paramName) { case CL_SAMPLER_CONTEXT: valueSize = sizeof(cl_device_id); pValue = &this->context; break; case CL_SAMPLER_NORMALIZED_COORDS: valueSize = sizeof(cl_bool); pValue = &this->normalizedCoordinates; break; case CL_SAMPLER_ADDRESSING_MODE: valueSize = sizeof(cl_addressing_mode); pValue = &this->addressingMode; break; case CL_SAMPLER_FILTER_MODE: valueSize = sizeof(cl_filter_mode); pValue = &this->filterMode; break; case CL_SAMPLER_MIP_FILTER_MODE: valueSize = sizeof(cl_filter_mode); pValue = &this->mipFilterMode; break; case CL_SAMPLER_LOD_MIN: valueSize = sizeof(float); pValue = &this->lodMin; break; case CL_SAMPLER_LOD_MAX: valueSize = sizeof(float); pValue = &this->lodMax; break; case CL_SAMPLER_REFERENCE_COUNT: refCount = static_cast(this->getReference()); valueSize = sizeof(refCount); pValue = &refCount; break; default: break; } retVal = changeGetInfoStatusToCLResultType(::getInfo(paramValue, paramValueSize, pValue, valueSize)); if (paramValueSizeRet) { *paramValueSizeRet = valueSize; } return retVal; } bool Sampler::isTransformable() const { return addressingMode == CL_ADDRESS_CLAMP_TO_EDGE && filterMode == CL_FILTER_NEAREST && normalizedCoordinates == CL_FALSE; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/sampler/sampler.h000066400000000000000000000114221363734646600234140ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/api/cl_types.h" #include "opencl/source/helpers/base_object.h" namespace NEO { class Context; struct HardwareInfo; template <> struct OpenCLObjectMapper<_cl_sampler> { typedef class Sampler DerivedType; }; union SamplerLodProperty { cl_sampler_properties data; float lod; }; class Sampler : public BaseObject<_cl_sampler> { public: static const cl_ulong objectMagic = 0x4684913AC213EF00LL; static const uint32_t samplerStateArrayAlignment = 64; static Sampler *create(Context *context, cl_bool normalizedCoordinates, cl_addressing_mode addressingMode, cl_filter_mode filterMode, cl_filter_mode mipFilterMode, float lodMin, float lodMax, cl_int &errcodeRet); static Sampler *create(Context *context, cl_bool normalizedCoordinates, cl_addressing_mode addressingMode, cl_filter_mode filterMode, cl_int &errcodeRet) { return Sampler::create(context, normalizedCoordinates, addressingMode, filterMode, CL_FILTER_NEAREST, 0.0f, std::numeric_limits::max(), errcodeRet); } static Sampler *create(Context *context, const cl_sampler_properties *samplerProperties, cl_int &errcodeRet); cl_int getInfo(cl_sampler_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet); virtual void setArg(void *memory) = 0; static size_t getSamplerStateSize(const HardwareInfo &hwInfo); bool isTransformable() const; Sampler(Context *context, cl_bool normalizedCoordinates, cl_addressing_mode addressingMode, cl_filter_mode filterMode, cl_filter_mode mipFilterMode, float lodMin, float lodMax); Sampler(Context *context, cl_bool normalizedCoordinates, cl_addressing_mode addressingMode, cl_filter_mode filterMode); unsigned int getSnapWaValue() const; cl_context context; cl_bool normalizedCoordinates; cl_addressing_mode addressingMode; cl_filter_mode filterMode; cl_filter_mode mipFilterMode; float lodMin; float lodMax; }; template struct SamplerHw : public Sampler { void setArg(void *memory) override; void appendSamplerStateParams(typename GfxFamily::SAMPLER_STATE *state); static constexpr float getGenSamplerMaxLod() { return 14.0f; } SamplerHw(Context *context, cl_bool normalizedCoordinates, cl_addressing_mode addressingMode, cl_filter_mode filterMode, cl_filter_mode mipFilterMode, float lodMin, float lodMax) : Sampler(context, normalizedCoordinates, addressingMode, filterMode, mipFilterMode, lodMin, lodMax) { } SamplerHw(Context *context, cl_bool normalizedCoordinates, cl_addressing_mode addressingMode, cl_filter_mode filterMode) : Sampler(context, normalizedCoordinates, addressingMode, filterMode) { } static Sampler *create(Context *context, cl_bool normalizedCoordinates, cl_addressing_mode addressingMode, cl_filter_mode filterMode, cl_filter_mode mipFilterMode, float lodMin, float lodMax) { return new SamplerHw(context, normalizedCoordinates, addressingMode, filterMode, mipFilterMode, lodMin, lodMax); } static size_t getSamplerStateSize(); }; typedef Sampler *(*SamplerCreateFunc)(Context *context, cl_bool normalizedCoordinates, cl_addressing_mode addressingMode, cl_filter_mode filterMode, cl_filter_mode mipFilterMode, float lodMin, float lodMax); typedef size_t (*getSamplerStateSizeHwFunc)(); template <> inline Sampler *castToObject(const void *object) { auto clSamplerObj = reinterpret_cast(object); return castToObject(const_cast(clSamplerObj)); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/sampler/sampler.inl000066400000000000000000000101101363734646600237400ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/utilities/numeric.h" #include namespace NEO { template void SamplerHw::setArg(void *memory) { using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE; auto samplerState = reinterpret_cast(memory); samplerState->setNonNormalizedCoordinateEnable(!this->normalizedCoordinates); auto addressControlModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP; auto addressControlModeY = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP; auto addressControlModeZ = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP; switch (this->addressingMode) { case CL_ADDRESS_NONE: case CL_ADDRESS_CLAMP: addressControlModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP_BORDER; addressControlModeY = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP_BORDER; addressControlModeZ = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP_BORDER; break; case CL_ADDRESS_CLAMP_TO_EDGE: addressControlModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP; addressControlModeY = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP; addressControlModeZ = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP; break; case CL_ADDRESS_MIRRORED_REPEAT: addressControlModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR; addressControlModeY = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR; addressControlModeZ = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR; break; case CL_ADDRESS_REPEAT: addressControlModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_WRAP; addressControlModeY = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_WRAP; addressControlModeZ = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_WRAP; break; } auto minMode = SAMPLER_STATE::MIN_MODE_FILTER_NEAREST; auto magMode = SAMPLER_STATE::MAG_MODE_FILTER_NEAREST; auto mipMode = SAMPLER_STATE::MIP_MODE_FILTER_NEAREST; if (CL_FILTER_LINEAR == filterMode) { minMode = SAMPLER_STATE::MIN_MODE_FILTER_LINEAR; magMode = SAMPLER_STATE::MAG_MODE_FILTER_LINEAR; } if (CL_FILTER_LINEAR == mipFilterMode) { mipMode = SAMPLER_STATE::MIP_MODE_FILTER_LINEAR; } samplerState->setMinModeFilter(minMode); samplerState->setMagModeFilter(magMode); samplerState->setMipModeFilter(mipMode); samplerState->setTcxAddressControlMode(addressControlModeX); samplerState->setTcyAddressControlMode(addressControlModeY); samplerState->setTczAddressControlMode(addressControlModeZ); if (CL_FILTER_NEAREST != filterMode) { samplerState->setRAddressMinFilterRoundingEnable(true); samplerState->setRAddressMagFilterRoundingEnable(true); samplerState->setVAddressMinFilterRoundingEnable(true); samplerState->setVAddressMagFilterRoundingEnable(true); samplerState->setUAddressMinFilterRoundingEnable(true); samplerState->setUAddressMagFilterRoundingEnable(true); } else { samplerState->setRAddressMinFilterRoundingEnable(false); samplerState->setRAddressMagFilterRoundingEnable(false); samplerState->setVAddressMinFilterRoundingEnable(false); samplerState->setVAddressMagFilterRoundingEnable(false); samplerState->setUAddressMinFilterRoundingEnable(false); samplerState->setUAddressMagFilterRoundingEnable(false); } FixedU4D8 minLodValue = FixedU4D8(std::min(getGenSamplerMaxLod(), this->lodMin)); FixedU4D8 maxLodValue = FixedU4D8(std::min(getGenSamplerMaxLod(), this->lodMax)); samplerState->setMinLod(minLodValue.getRawAccess()); samplerState->setMaxLod(maxLodValue.getRawAccess()); appendSamplerStateParams(samplerState); } template void SamplerHw::appendSamplerStateParams(typename GfxFamily::SAMPLER_STATE *state) { } template size_t SamplerHw::getSamplerStateSize() { using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE; return sizeof(SAMPLER_STATE); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/sampler/sampler_factory_init.inl000066400000000000000000000011341363734646600265200ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ // Sampler factory table initialization. // Family, gfxCore came from outside, do not set them here unless you // really know what you are doing template struct SamplerHw; template <> void populateFactoryTable>() { extern SamplerCreateFunc samplerFactory[IGFX_MAX_CORE]; extern getSamplerStateSizeHwFunc getSamplerStateSizeHw[IGFX_MAX_CORE]; samplerFactory[gfxCore] = SamplerHw::create; getSamplerStateSizeHw[gfxCore] = SamplerHw::getSamplerStateSize; } compute-runtime-20.13.16352/opencl/source/sampler/sampler_tgllp_plus.inl000066400000000000000000000011751363734646600262200ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "opencl/source/sampler/sampler.h" #include "opencl/source/sampler/sampler.inl" namespace NEO { using SAMPLER_STATE = typename Family::SAMPLER_STATE; template <> void SamplerHw::appendSamplerStateParams(SAMPLER_STATE *state) { if (DebugManager.flags.ForceSamplerLowFilteringPrecision.get()) { state->setLowQualityFilter(SAMPLER_STATE::LOW_QUALITY_FILTER_ENABLE); } } #include "opencl/source/sampler/sampler_factory_init.inl" } // namespace NEO compute-runtime-20.13.16352/opencl/source/scheduler/000077500000000000000000000000001363734646600221135ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/scheduler/CMakeLists.txt000066400000000000000000000010341363734646600246510ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_SCHEDULER ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/scheduler.cl ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_kernel.cpp ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_kernel.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_SCHEDULER}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_SCHEDULER ${RUNTIME_SRCS_SCHEDULER}) if(NOT (TARGET ${SCHEDULER_BINARY_LIB_NAME})) include(scheduler_binary.cmake) endif() compute-runtime-20.13.16352/opencl/source/scheduler/scheduler.cl000066400000000000000000004507421363734646600244250ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #ifndef SCHEDULER_EMULATION #include "device_enqueue.h" #endif // float passed as int extern float __intel__getProfilingTimerResolution(); #ifndef EMULATION_ENTER_FUNCTION #define EMULATION_ENTER_FUNCTION( ) #endif #ifndef NULL #define NULL 0 #endif #define SIMD8 0 #define SIMD16 1 #define SIMD32 2 #define SHORT_SIZE_IN_BYTES 2 #define DWORD_SIZE_IN_BYTES 4 #define QWORD_SIZE_IN_BYTES 8 #define MAX_GLOBAL_ARGS 255 #define MASK_LOW16_BITS 0xFFFF //Currently setting to 8 #define MAX_WALKERS_IN_PARALELL PARALLEL_SCHEDULER_HW_GROUPS //Need 4 uints per walker ( command packet offset, slb offest, dsh offset, idt offset, + 1 to store total #define PARALLEL_SCHEDULER_OFFSETS_NUMBER 4 #define PARALLEL_SCHEDULER_LOCAL_MEM_SIZE ( MAX_WALKERS_IN_PARALELL * PARALLEL_SCHEDULER_OFFSETS_NUMBER + 1 ) //Last index #define TOTAL_ENQUEUES_FOUND ( PARALLEL_SCHEDULER_LOCAL_MEM_SIZE - 1 ) //CURBE STUFF, only entries that really needs to be patched #define SCHEDULER_DATA_PARAMETER_KERNEL_ARGUMENT 1 #define SCHEDULER_DATA_PARAMETER_LOCAL_WORK_SIZE 2 #define SCHEDULER_DATA_PARAMETER_GLOBAL_WORK_SIZE 3 #define SCHEDULER_DATA_PARAMETER_NUM_WORK_GROUPS 4 #define SCHEDULER_DATA_PARAMETER_WORK_DIMENSIONS 5 #define SCHEDULER_DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES 8 #define SCHEDULER_DATA_PARAMETER_GLOBAL_WORK_OFFSET 16 #define SCHEDULER_DATA_PARAMETER_NUM_HARDWARE_THREADS 17 #define SCHEDULER_DATA_PARAMETER_PARENT_EVENT 22 #define SCHEDULER_DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE 28 #define SCHEDULER_DATA_PARAMETER_IMAGE_WIDTH ( 9 + SCHEDULER_DATA_PARAMETER_IMAGES_CURBE_SHIFT ) #define SCHEDULER_DATA_PARAMETER_IMAGE_HEIGHT ( 10 + SCHEDULER_DATA_PARAMETER_IMAGES_CURBE_SHIFT ) #define SCHEDULER_DATA_PARAMETER_IMAGE_DEPTH ( 11 + SCHEDULER_DATA_PARAMETER_IMAGES_CURBE_SHIFT ) #define SCHEDULER_DATA_PARAMETER_IMAGE_CHANNEL_DATA_TYPE ( 12 + SCHEDULER_DATA_PARAMETER_IMAGES_CURBE_SHIFT ) #define SCHEDULER_DATA_PARAMETER_IMAGE_CHANNEL_ORDER ( 13 + SCHEDULER_DATA_PARAMETER_IMAGES_CURBE_SHIFT ) #define SCHEDULER_DATA_PARAMETER_IMAGE_ARRAY_SIZE ( 18 + SCHEDULER_DATA_PARAMETER_IMAGES_CURBE_SHIFT ) #define SCHEDULER_DATA_PARAMETER_IMAGE_NUM_SAMPLES ( 20 + SCHEDULER_DATA_PARAMETER_IMAGES_CURBE_SHIFT ) #define SCHEDULER_DATA_PARAMETER_IMAGE_NUM_MIP_LEVELS ( 27 + SCHEDULER_DATA_PARAMETER_IMAGES_CURBE_SHIFT ) #define SCHEDULER_DATA_PARAMETER_IMAGE_OBJECT_ID ( 35 + SCHEDULER_DATA_PARAMETER_IMAGES_CURBE_SHIFT ) #define SCHEDULER_DATA_PARAMETER_IMAGE_SRGB_CHANNEL_ORDER ( 39 + SCHEDULER_DATA_PARAMETER_IMAGES_CURBE_SHIFT ) #define DATA_PARAMETER_SAMPLER_ADDRESS_MODE ( 14 + SCHEDULER_DATA_PARAMETER_SAMPLER_ADDED_VALUE ) #define DATA_PARAMETER_SAMPLER_NORMALIZED_COORDS ( 15 + SCHEDULER_DATA_PARAMETER_SAMPLER_ADDED_VALUE ) #define DATA_PARAMETER_SAMPLER_COORDINATE_SNAP_WA_REQUIRED ( 21 + SCHEDULER_DATA_PARAMETER_SAMPLER_ADDED_VALUE ) #define SCHEDULER_DATA_PARAMETER_SAMPLER_OBJECT_ID ( 35 + SCHEDULER_DATA_PARAMETER_SAMPLER_ADDED_VALUE ) //CURBE STUFF, only entries that really needs to be patched #define SCHEDULER_DATA_PARAMETER_KERNEL_ARGUMENT_MASK ( 1 << 1 ) #define SCHEDULER_DATA_PARAMETER_LOCAL_WORK_SIZE_MASK ( 1 << 2 ) #define SCHEDULER_DATA_PARAMETER_GLOBAL_WORK_SIZE_MASK ( 1 << 3 ) #define SCHEDULER_DATA_PARAMETER_NUM_WORK_GROUPS_MASK ( 1 << 4 ) #define SCHEDULER_DATA_PARAMETER_WORK_DIMENSIONS_MASK ( 1 << 5 ) #define SCHEDULER_DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES_MASK ( 1 << 8 ) #define SCHEDULER_DATA_PARAMETER_GLOBAL_WORK_OFFSET_MASK ( 1 << 16 ) #define SCHEDULER_DATA_PARAMETER_NUM_HARDWARE_THREADS_MASK ( 1 << 17 ) #define SCHEDULER_DATA_PARAMETER_PARENT_EVENT_MASK ( 1 << 22 ) #define SCHEDULER_DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE_MASK ( 1 << 28 ) #define SCHEDULER_DATA_PARAMETER_IMAGE_CURBE_ENTRIES ( ( ulong ) 1 << SCHEDULER_DATA_PARAMETER_IMAGES_CURBE_SHIFT ) #define SCHEDULER_DATA_PARAMETER_GLOBAL_POINTER ( ( ( ulong ) 1 ) << SCHEDULER_DATA_PARAMETER_GLOBAL_POINTER_SHIFT ) #define SCHEDULER_DATA_PARAMETER_SAMPLER_MASK ( ( ( ulong ) 1 ) << SCHEDULER_DATA_PARAMETER_SAMPLER_SHIFT ) //Error codes #define SCHEDULER_CURBE_TOKEN_MISSED 10 #define SCHEDULER_CURBE_ARGUMENTS_SIZE_MISMATCH 11 #define CAN_BE_RECLAIMED 123456 #define SCHEDULER_MSF_INITIAL 1 #define SCHEDULER_MSF_SECOND 2 //Uncomment to enable logging debug data //#define ENABLE_DEBUG_BUFFER 1 #ifdef ENABLE_DEBUG_BUFFER //Update DebugDataInfo types in device_enqueue.h and PrintDebugDataBuffer() in cmd_queue_device.cpp //Flags #define DDB_HAS_DATA_INFO ( 1 << 0 ) #define DDB_SCHEDULER_PROFILING ( 1 << 1 ) #define DDB_ALL ( 0xffffffff ) #endif //Turn this to 1 to turn on debug calls, notice that it will cause up to 10 x longer time to build scheduler //#define SCHEDULER_DEBUG_MODE 1 //#define DISABLE_RESOURCE_RECLAMATION 1 /* Resource reclamation procedure 1. Move all new command packets from queue_t to qstorage 2. In case there is place in storage for whole queue, reclaim space on queue 3. Construct stack basing on new commands added in the qstorage 4. Browse stack to find next item for execution 5. When you take item from the stack and schedule it , reclaim place on qstorage buffer */ typedef struct { uint3 ActualLocalSize; uint3 WalkerDimSize; uint3 WalkerStartPoint; } IGIL_WalkerData; typedef struct { uint3 LocalWorkSize; uint3 TotalDimSize; IGIL_WalkerData WalkerArray[ 8 ]; } IGIL_WalkerEnumeration; inline void patchDword( __global uint* patchedDword, uint startOffset, uint endOffset, uint value ) { uint LeftMask = ALL_BITS_SET_DWORD_MASK >> ( DWORD_SIZE_IN_BITS - endOffset - 1 ); uint RightMask = ALL_BITS_SET_DWORD_MASK << ( startOffset ); uint CleanMask = ~( RightMask & LeftMask ); *patchedDword &= CleanMask; *patchedDword |= ( value << startOffset ); } inline __global IGIL_KernelAddressData* IGIL_GetKernelAddressData( __global IGIL_KernelDataHeader* pKernelReflection, uint blockId ) { return ( __global IGIL_KernelAddressData* ) ( &pKernelReflection->m_data[ blockId ] ); } __global IGIL_KernelData* IGIL_GetKernelData( __global IGIL_KernelDataHeader* pKernelReflection, uint blockId ) { __global IGIL_KernelAddressData* pKernelAddressData = IGIL_GetKernelAddressData( pKernelReflection, blockId ); uint Offset = pKernelAddressData->m_KernelDataOffset; __global char* pKernelReflectionRaw = ( __global char * ) pKernelReflection; return ( __global IGIL_KernelData* ) ( &pKernelReflectionRaw[ Offset ] ); } inline __global IGIL_CommandHeader* TEMP_IGIL_GetCommandHeader( __global IGIL_CommandQueue* q, uint offset ) { __global uchar *pQueueRaw = (__global uchar *) q; __global IGIL_CommandHeader* pCommand = ( __global IGIL_CommandHeader* )( pQueueRaw + offset ); return pCommand; } //Make sure enough command packets are in command queue before calling this function. __global IGIL_CommandHeader* TEMP_IGIL_GetNthCommandHeader( __global IGIL_CommandQueue* q, uint initialOffset, uint number ) { __global uchar *pQueueRaw = (__global uchar *) q; __global IGIL_CommandHeader* pCommand = ( __global IGIL_CommandHeader* )( pQueueRaw + initialOffset ); uint Offset = initialOffset; //Traverse queue_t unless nth command packet is found while( number > 0 ) { Offset += pCommand->m_commandSize; pCommand = TEMP_IGIL_GetCommandHeader( q, Offset ); number--; } return pCommand; } //Make sure enough command packets are in command queue before calling this function. uint TEMP_IGIL_GetNthCommandHeaderOffset( __global IGIL_CommandQueue* q, uint initialOffset, uint number ) { __global uchar *pQueueRaw = (__global uchar *) q; __global IGIL_CommandHeader* pCommand = ( __global IGIL_CommandHeader* )( pQueueRaw + initialOffset ); uint Offset = initialOffset; //Traverse queue_t unless nth command packet is found while( number > 0 ) { Offset += pCommand->m_commandSize; pCommand = TEMP_IGIL_GetCommandHeader( q, Offset ); number--; } return Offset; } __global IGIL_CommandHeader* GetCommandHeaderFromStorage( __global uint* queueStorage, uint offset ) { __global uchar *pQueueRaw = ( __global uchar * ) queueStorage; __global IGIL_CommandHeader* pCommand = ( __global IGIL_CommandHeader* )( pQueueRaw + offset ); return pCommand; } inline queue_t TEMP_IGIL_GetQueueT( IGIL_CommandHeader * queuePtr ) { return __builtin_astype( queuePtr, queue_t ); } inline __global IGIL_DeviceEvent* TEMP_IGIL_GetDeviceEvents( __global IGIL_EventPool *pool ) { return ( __global IGIL_DeviceEvent * )( pool + 1 ); } inline __global IGIL_DeviceEvent* TEMP_IGIL_GetDeviceEvent( __global IGIL_EventPool *pool, uint eventId ) { __global IGIL_DeviceEvent * pEvent = ( __global IGIL_DeviceEvent * )( pool + 1 ); return ( __global IGIL_DeviceEvent * )( pEvent + eventId ); } bool SetEventState( __global IGIL_EventPool *pool, uint eventId, int state ) { __global IGIL_DeviceEvent* pDeviceEvent = TEMP_IGIL_GetDeviceEvent( pool, eventId ); pDeviceEvent->m_state = state; return true; } void TEMP_IGIL_FreeEvent( clk_event_t event, __global IGIL_EventPool *pool ) { //Offset into the event data in the pool __global IGIL_DeviceEvent *events = TEMP_IGIL_GetDeviceEvents( pool ); atomic_xchg( &events[ ( uint )(size_t)__builtin_astype( event, void* ) ].m_state, IGIL_EVENT_UNUSED ); } void IGILLOCAL_MEMCPY_GTOG( __global void* pDst, __global void* pSrc, int numBytes ) { numBytes = numBytes >> 2; for( int i = 0; i < numBytes; i++ ) { ( ( __global uint* )pDst ) [ i ] = ( ( __global int* )pSrc )[ i ]; } } //Global memcpy running on all witems possible, make sure it's run from all hw threads. void GLOBAL_MEMCPYUINT( __global void* pDst, __global void* pSrc, int numBytes ) { uint total_local_size = get_local_size( 0 ); uint LoopCtr = numBytes / ( total_local_size * DWORD_SIZE_IN_BYTES ); uint LeftOver = numBytes % ( total_local_size * DWORD_SIZE_IN_BYTES ); uint Lid = get_local_id( 0 ); uint i = 0; //Main copy for( i = 0; i < LoopCtr; i++ ) { ( ( __global uint* )pDst ) [ Lid + total_local_size * i ] = ( ( __global uint* )pSrc )[ Lid + total_local_size * i ]; } //Copy what's left if( LeftOver != 0 ) { if( Lid * DWORD_SIZE_IN_BYTES < LeftOver ) { ( ( __global uint* )pDst ) [ Lid + total_local_size * i ] = ( ( __global uint* )pSrc )[ Lid + total_local_size * i ]; } } } //In SIMD8 to fully use cachelines copy in portions of uint2 , 8 bytes x 8 witems = 64 bytes cacheline size. //Global memcpy running on all witems possible, make sure it's run from all hw threads. void GLOBAL_MEMCPY( __global void* pDst, __global void* pSrc, int numBytes ) { //In case I need dword copy use uint version of this function. if( ( numBytes % ( DWORD_SIZE_IN_BYTES * 2 ) ) != 0 ) { GLOBAL_MEMCPYUINT( pDst, pSrc, numBytes ); } else { uint total_local_size = get_local_size( 0 ); uint LoopCtr = numBytes / ( total_local_size * DWORD_SIZE_IN_BYTES * 2 ); uint LeftOver = numBytes % ( total_local_size * DWORD_SIZE_IN_BYTES * 2 ); uint Lid = get_local_id( 0 ); uint i = 0; //Main copy for( i = 0; i < LoopCtr; i++ ) { ( ( __global uint2* )pDst ) [ Lid + total_local_size * i ] = ( ( __global uint2* )pSrc )[ Lid + total_local_size * i ]; } //Copy what's left if( LeftOver != 0 ) { if( Lid * DWORD_SIZE_IN_BYTES * 2 < LeftOver ) { ( ( __global uint2* )pDst ) [ Lid + total_local_size * i ] = ( ( __global uint2* )pSrc )[ Lid + total_local_size * i ]; } } } } //This works only for 32 bit types uint GetNextPowerof2( uint number ) { number --; number |= number >> 1; number |= number >> 2; number |= number >> 4; number |= number >> 8; number |= number >> 16; number ++; return number; } #define OCLRT_ALIGN( a, b ) ( ( ( ( a ) % ( b ) ) != 0 ) ? ( ( a ) - ( ( a ) % ( b ) ) + ( b ) ) : ( a ) ) #define OCLRT_MAX( a, b ) ( ( ( a ) > ( b ) ) ? ( a ) : ( b ) ) #ifndef SCHEDULER_EMULATION #include "scheduler_definitions.h" #endif #ifdef ENABLE_DEBUG_BUFFER //Adds private uint* data to debug buffer // ddb - global buffer to keep data // src - source data // numberOfElements // localID - WI ID , when 0xffffffff all WI copy data // returns 0 when data was copied, else -1 int AddToDebugBufferParallel( __global DebugDataBuffer* ddb, uint* pSrc, uint numberOfElements, uint localID ) { if( ddb->m_flags == 0 ) { //All work items in local group copies data if( localID == 0xffffffff ) { //Check if there is enough place for new data in ddb for every workitem in local group if( ( ( ddb->m_size / sizeof( uint ) - ddb->m_offset ) * get_local_size(0) ) >= numberOfElements ) { uint startIndex = atomic_add( &ddb->m_offset, numberOfElements ); uint i; int srcPos; for( i = startIndex, srcPos = 0; i < startIndex + numberOfElements; i++, srcPos++ ) { ddb->m_data[ i ] = pSrc[ srcPos ]; } return 0; } } else { //Check if there is enough place for new data in ddb for one workitem in local group if( ( ddb->m_size / sizeof( uint ) - ddb->m_offset ) >= numberOfElements ) { if( get_local_id(0) == localID ) { uint startIndex = atomic_add( &ddb->m_offset, numberOfElements ); uint i; int srcPos; for( i = startIndex, srcPos = 0; i < startIndex + numberOfElements; i++, srcPos++ ) { ddb->m_data[ i ] = pSrc[ srcPos ]; } return 0; } } } } return -1; } //Adds private uint to debug buffer int AddToDBParallel(__global DebugDataBuffer* ddb, uint pSrc, uint localID) { return AddToDebugBufferParallel(ddb, &pSrc, 1, localID); } //Adds global uint* data to debug buffer // ddb - global buffer to keep data // src - source data // numberOfElements // localID - WI ID , when 0xffffffff all WI copy data // returns 0 when data was copied, else -1 int AddGlobalToDebugBufferParallel( __global DebugDataBuffer* ddb, __global uint* pSrc, uint numberOfElements, uint localID ) { if( ddb->m_flags == 0 ) { //All work items in local group copies data if( localID == 0xffffffff ) { //Check if there is enough place for new data in ddb for every workitem in local group if( ( ( ddb->m_size / sizeof( uint ) - ddb->m_offset ) * get_local_size(0) ) >= numberOfElements ) { uint startIndex = atomic_add( &ddb->m_offset, numberOfElements ); uint i; int srcPos; for( i = startIndex, srcPos = 0; i < startIndex + numberOfElements; i++, srcPos++ ) { ddb->m_data[ i ] = pSrc[ srcPos ]; } return 0; } } else { //Check if there is enough place for new data in ddb for one workitem in local group if( ( ddb->m_size / sizeof( uint ) - ddb->m_offset ) >= numberOfElements ) { if( get_local_id(0) == localID ) { uint startIndex = atomic_add( &ddb->m_offset, numberOfElements ); uint i; int srcPos; for( i = startIndex, srcPos = 0; i < startIndex + numberOfElements; i++, srcPos++ ) { ddb->m_data[ i ] = pSrc[ srcPos ]; } return 0; } } } } return -1; } //Adds private uint data to debug buffer // ddb - global buffer to keep data // src - source data // dataType - enum defining added data type // returns 0 when data was copied, else -1 int AddToDebugBuffer( __global DebugDataBuffer* ddb, __private ulong src, uint dataType, uint localID ) { if( ddb->m_flags == 0 || ddb->m_flags == DDB_HAS_DATA_INFO ) { //All work items in local group copies data if( localID == 0xffffffff ) { //Check if there is enough place for new data in ddb if( ( ( ddb->m_stackTop - ddb->m_dataInfoTop ) >= 4 * get_local_size( 0 ) ) && ( ddb->m_dataInfoTop > ddb->m_stackTop ) ) { //Check flags if( ddb->m_flags == 0 || ddb->m_flags == DDB_HAS_DATA_INFO ) { uint startIndex = atomic_add( &ddb->m_offset, 1 ); uint dataIndex = atomic_sub( &ddb->m_dataInfoTop, ddb->m_dataInfoSize ); uint stackTop = atomic_add( &ddb->m_stackTop, 8 ); __global uchar* pCharDebugQueue = ( __global uchar * )ddb; __global uint* dest = ( __global uint* )&pCharDebugQueue[ stackTop ]; __global DebugDataInfo* debugInfo; dest[ 0 ] = ( uint )src & 0xffffffff; dest[ 1 ] = ( src & 0xffffffff00000000 ) >> 32; debugInfo = ( __global DebugDataInfo* )( &pCharDebugQueue[ dataIndex ] ); debugInfo->m_dataType = ( DebugDataTypes )dataType; debugInfo->m_dataSize = 8; ddb->m_flags |= DDB_HAS_DATA_INFO; return 0; } } } else { //Check if there is enough place for new data in ddb if( ( ( ddb->m_stackTop - ddb->m_dataInfoTop ) >= 8 ) && ( ddb->m_dataInfoTop > ddb->m_stackTop ) ) { if( get_local_id( 0 ) == localID ) { uint startIndex = atomic_add( &ddb->m_offset, 1 ); uint dataIndex = atomic_sub( &ddb->m_dataInfoTop, ddb->m_dataInfoSize ); uint stackTop = atomic_add( &ddb->m_stackTop, 8 ); __global uchar* pCharDebugQueue = ( __global uchar * )ddb; __global uint* dest = ( __global uint* )&pCharDebugQueue[ stackTop ]; __global DebugDataInfo* debugInfo; dest[ 0 ] = ( uint )src & 0xffffffff; dest[ 1 ] = ( src & 0xffffffff00000000 ) >> 32; debugInfo = ( __global DebugDataInfo* )( &pCharDebugQueue[ dataIndex ] ); debugInfo->m_dataType = ( DebugDataTypes )dataType; debugInfo->m_dataSize = 8; ddb->m_flags |= DDB_HAS_DATA_INFO; return 0; } } } } return -1; } //Adds data to debug buffer // ddb - global buffer to keep data // src - source // bytes - number of bytes from src to put into ddb // dataType - enum defining added data type // returns 0 when data was copied, else -1 int AddGlobalToDebugBuffer( __global DebugDataBuffer* ddb, __global uchar* src, uint bytes, uint dataType ) { if( get_local_id( 0 ) ) { //Check if there is enough place for new data in ddb if( ( ( ddb->m_stackTop - ddb->m_dataInfoTop ) >= bytes ) && ( ddb->m_dataInfoTop > ddb->m_stackTop ) ) { //Check flags if( ddb->m_flags == 0 || ddb->m_flags == DDB_HAS_DATA_INFO ) { __global uchar* pCharDebugQueue = ( __global uchar * )ddb; __global DebugDataInfo* debugInfo; IGILLOCAL_MEMCPY_GTOG( ( &pCharDebugQueue[ ddb->m_stackTop ] ), ( src ), ( int )bytes ); debugInfo = ( __global DebugDataInfo* )( &pCharDebugQueue[ ddb->m_dataInfoTop ] ); debugInfo->m_dataType = ( DebugDataTypes )dataType; debugInfo->m_dataSize = bytes; ddb->m_dataInfoTop = ddb->m_dataInfoTop - ddb->m_dataInfoSize; ddb->m_stackTop = ddb->m_stackTop + bytes; ddb->m_offset = ddb->m_offset + ( bytes / 4 ); ddb->m_flags |= DDB_HAS_DATA_INFO; return 0; } } } return -1; } int AddGlobalToDebugBufferAllIds( __global DebugDataBuffer* ddb, __global uchar* src, uint bytes, uint dataType, uint localID ) { if( ddb->m_flags == 0 || ddb->m_flags == DDB_HAS_DATA_INFO ) { //Check if there is enough place for new data in ddb if( ( ( ddb->m_stackTop - ddb->m_dataInfoTop ) >= bytes ) && ( ddb->m_dataInfoTop > ddb->m_stackTop ) ) { //Check flags if( ddb->m_flags == 0 || ddb->m_flags == DDB_HAS_DATA_INFO ) { __global uchar* pCharDebugQueue = ( __global uchar * )ddb; __global DebugDataInfo* debugInfo; IGILLOCAL_MEMCPY_GTOG( ( &pCharDebugQueue[ ddb->m_stackTop ] ), ( src ), ( int )bytes ); debugInfo = ( __global DebugDataInfo* )( &pCharDebugQueue[ ddb->m_dataInfoTop ] ); debugInfo->m_dataType = ( DebugDataTypes )dataType; debugInfo->m_dataSize = bytes; ddb->m_dataInfoTop = ddb->m_dataInfoTop - ddb->m_dataInfoSize; ddb->m_stackTop = ddb->m_stackTop + bytes; ddb->m_offset = ddb->m_offset + ( bytes / 4 ); ddb->m_flags |= DDB_HAS_DATA_INFO; return 0; } } } return -1; } #endif #define MAX_SLB_OFFSET ( SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE * SECOND_LEVEL_BUFFER_NUMBER_OF_ENQUEUES ) #ifndef SCHEDULER_EMULATION #include "scheduler_builtin_kernel.inl" #endif //SOME COMMON CODE FUNCTIONS //COMMON CODE STARTS HERE //Not thread safe - make sure it's called in thread safe fashion. void patchIDData( __global char* dsh, uint blockId, uint numberOfHwThreads, uint slmSize ) { __global char* DSHIData = ( __global char* )( dsh + SIZEOF_COLOR_CALCULATOR_STATE + ( ( blockId + 1 ) * SIZEOF_INTERFACE_DESCRIPTOR_DATA ) ); __global uint* DSHIntData = ( __global uint* )( DSHIData ); //Barrier enable is pre-patched on the host. patchDword( ( &DSHIntData[ INTERFACE_DESCRIPTOR_HWTHREADS_NUMBER_DWORD ] ), 0, INTERFACE_DESCRIPTOR_HWTHREADS_UPPER_BIT, numberOfHwThreads ); //Patch SLM. uint SLMPatchValue = GetPatchValueForSLMSize( slmSize ); patchDword( ( &DSHIntData[ INTERFACE_DESCRIPTOR_HWTHREADS_NUMBER_DWORD ] ), 16, 20, SLMPatchValue ); } /* this is how it works : When constructing primary batch, first IDT table is also constructed, for all blocks, it is constructed as follows: [0] - parent id [1 .. x ] block id [last aligned ] scheduler now when we enter SLB, we forgot about first IDT, and we point all interface descriptor loads to point at scheduler which was last in the first IDT, to be first in the new IDT. This way we can copy Interface Descriptors for blocks from the first IDT and assign Interface Descriptors dynamically in scheduler. */ void CopyAndPatchIDData( __global char* dsh, uint blockId, uint numberOfHwThreads, uint slmSize, uint interfaceDescriptorOffset, uint blockStartId ) { __global char* DSHIData = ( __global char* )( dsh + SIZEOF_COLOR_CALCULATOR_STATE + ( ( blockId + blockStartId ) * SIZEOF_INTERFACE_DESCRIPTOR_DATA ) ); __global uint* DSHIntData = ( __global uint* )( DSHIData ); //Copy to ID InterfaceDescriptorOffset __global char* DSHDestIData = ( __global char* )( dsh + SIZEOF_COLOR_CALCULATOR_STATE + ( ( IDT_BREAKDOWN + interfaceDescriptorOffset ) * SIZEOF_INTERFACE_DESCRIPTOR_DATA ) ); __global uint* DSHDestIntData = ( __global uint* )( DSHDestIData ); __global uint* DSHDestIntStartData = DSHDestIntData; for( int i = 0; i < ( SIZEOF_INTERFACE_DESCRIPTOR_DATA / 4 ); i++ ) { DSHDestIntData[ i ] = DSHIntData[ i ]; } //Barrier enable is pre-patched on the host. patchDword( ( &DSHDestIntStartData[ INTERFACE_DESCRIPTOR_HWTHREADS_NUMBER_DWORD ] ), 0, INTERFACE_DESCRIPTOR_HWTHREADS_UPPER_BIT, numberOfHwThreads ); //Patch SLM. uint SLMPatchValue = GetPatchValueForSLMSize( slmSize ); patchDword( ( &DSHDestIntStartData[ INTERFACE_DESCRIPTOR_HWTHREADS_NUMBER_DWORD ] ), 16, 20, SLMPatchValue ); } void CopyAndPatchIDData20( __global char* dsh, uint blockId, uint numberOfHwThreads, uint slmSize, uint interfaceDescriptorOffset, uint blockStartId, uint bToffset, uint dshOffset, uint numOfSamplers #ifdef ENABLE_DEBUG_BUFFER , __global DebugDataBuffer* DebugQueue #endif ) { EMULATION_ENTER_FUNCTION( ); __global char* DSHIData = ( __global char* )( dsh + SIZEOF_COLOR_CALCULATOR_STATE + ( ( blockId + blockStartId ) * SIZEOF_INTERFACE_DESCRIPTOR_DATA ) ); __global uint* DSHIntData = ( __global uint* )( DSHIData ); //Copy to ID InterfaceDescriptorOffset __global char* DSHDestIData = ( __global char* )( dsh + SIZEOF_COLOR_CALCULATOR_STATE + ( ( IDT_BREAKDOWN + interfaceDescriptorOffset ) * SIZEOF_INTERFACE_DESCRIPTOR_DATA ) ); __global uint* DSHDestIntData = ( __global uint* )( DSHDestIData ); __global uint* DSHDestIntStartData = DSHDestIntData; for( int i = 0; i < ( SIZEOF_INTERFACE_DESCRIPTOR_DATA / 4 ); i++ ) { DSHDestIntData[ i ] = DSHIntData[ i ]; } //Barrier enable is pre-patched on the host. patchDword( ( &DSHDestIntStartData[ INTERFACE_DESCRIPTOR_HWTHREADS_NUMBER_DWORD ] ), 0, INTERFACE_DESCRIPTOR_HWTHREADS_UPPER_BIT, numberOfHwThreads ); //Patch BT offset patchDword( ( &DSHDestIntStartData[ INTERFACE_DESCRIPTOR_BINDING_TABLE_POINTER_DWORD ] ), 5, 15, ( bToffset >> 5 ) ); //Patch SLM. uint PatchValue = GetPatchValueForSLMSize( slmSize ); patchDword( ( &DSHDestIntStartData[ INTERFACE_DESCRIPTOR_HWTHREADS_NUMBER_DWORD ] ), 16, 20, PatchValue ); PatchValue = ( DSHDestIntStartData[ INTERFACE_DESCRIPTOR_SAMPLER_STATE_TABLE_DWORD ] & 0xffffffe0 ) + ( dshOffset ); patchDword( ( &DSHDestIntStartData[ INTERFACE_DESCRIPTOR_SAMPLER_STATE_TABLE_DWORD ] ), 5, 31, ( ( PatchValue ) >> 5 ) ); //Samplers in multiple of 4 numOfSamplers = ( numOfSamplers + 3 ) / 4; patchDword( ( &DSHDestIntStartData[ INTERFACE_DESCRIPTOR_SAMPLER_STATE_TABLE_DWORD ] ), 2, 4, numOfSamplers ); } void patchGpGpuWalker( uint secondLevelBatchOffset, __global uint* secondaryBatchBuffer, uint interfaceDescriptorOffset, uint simdSize, uint totalLocalWorkSize, uint3 dimSize, uint3 startPoint, uint numberOfHwThreadsPerWg, uint indirectPayloadSize, uint ioHoffset ) { EMULATION_ENTER_FUNCTION( ); //SlbOffset is expressed in bytes and for cmd it is needed to convert it to dwords uint CmdPacketStart = secondLevelBatchOffset / DWORD_SIZE_IN_BYTES; //INTERFACE_DESCRIPTOR for GPGPU_WALKER //INTERFACE DESCRIPTOR is one plus the block id uint PatchOffset = CmdPacketStart + GPGPU_WALKER_INTERFACE_DESCRIPTOR_ID_OFFSET; //Patch id data patchDword( &( secondaryBatchBuffer[ PatchOffset ] ), 0, 5, ( interfaceDescriptorOffset ) ); PatchOffset = CmdPacketStart + GPGPU_WALKER_THREAD_WIDTH_DWORD; //THREAD_WIDTH for GPGPU_WALKER patchDword( &( secondaryBatchBuffer[ PatchOffset ] ), 0, 5, ( numberOfHwThreadsPerWg - 1 ) ); PatchOffset = CmdPacketStart + GPGPU_WALKER_SIMDSIZE_DWORD; //SIMD SIZE for GPGPU_WALKER //Double Check the bits for SIMDSize if( simdSize == 8 ) { patchDword( &( secondaryBatchBuffer[ PatchOffset ] ), 30, 31, SIMD8 ); } else if ( simdSize == 16 ) { patchDword( &( secondaryBatchBuffer[ PatchOffset ] ), 30, 31, SIMD16 ); } else { patchDword( &( secondaryBatchBuffer[ PatchOffset ] ), 30, 31, SIMD32 ); } //XDIM for GPGPU_WALKER secondaryBatchBuffer[ CmdPacketStart + GPGPU_WALKER_XDIM_DWORD ] = dimSize.x; secondaryBatchBuffer[ CmdPacketStart + GPGPU_WALKER_GROUP_ID_START_X ] = startPoint.x; //YDIM secondaryBatchBuffer[ CmdPacketStart + GPGPU_WALKER_YDIM_DWORD ] = dimSize.y; secondaryBatchBuffer[ CmdPacketStart + GPGPU_WALKER_GROUP_ID_START_Y ] = startPoint.y; //ZDIM for GPGPU_WALKER secondaryBatchBuffer[ CmdPacketStart + GPGPU_WALKER_ZDIM_DWORD ] = dimSize.z; secondaryBatchBuffer[ CmdPacketStart + GPGPU_WALKER_GROUP_ID_START_Z ] = startPoint.z; //XMASK for GPGPU_WALKER uint mask = ( 1 << ( totalLocalWorkSize % simdSize ) ) - 1; if( mask == 0 ) mask = ~0; secondaryBatchBuffer[ CmdPacketStart + GPGPU_WALKER_XMASK_DWORD ] = mask; //YMASK for GPGPU_WALKER uint YMask = ~0; secondaryBatchBuffer[ CmdPacketStart + GPGPU_WALKER_YMASK_DWORD ] = YMask; patchDword( &( secondaryBatchBuffer[ CmdPacketStart + GPGPU_WALKER_INDIRECT_DATA_LENGTH_OFFSET ] ), 0, 16, indirectPayloadSize ); patchDword( &( secondaryBatchBuffer[ CmdPacketStart + GPGPU_WALKER_INDIRECT_START_ADDRESS_OFFSET ] ), 0, 31, ioHoffset ); } int PatchMediaStateFlush( uint secondLevelBatchOffset, __global uint* secondaryBatchBuffer, uint interfaceDescriptorOffset, uint msfNumber ) { //SlbOffset is expressed in bytes and for cmd it is needed to convert it to dwords uint CmdPacketStart = secondLevelBatchOffset / DWORD_SIZE_IN_BYTES; uint MsfOffset; if( msfNumber == SCHEDULER_MSF_INITIAL ) { MsfOffset = MEDIA_STATE_FLUSH_INITIAL_INTERFACE_DESCRIPTOR_OFFSET; } else if ( msfNumber == SCHEDULER_MSF_SECOND ) { MsfOffset = MEDIA_STATE_FLUSH_INTERFACE_DESCRIPTOR_OFFSET; } else { return -1; } patchDword( &( secondaryBatchBuffer[ CmdPacketStart + MsfOffset ] ), 0, 5, interfaceDescriptorOffset ); return 0; } #if defined WA_LRI_COMMANDS_EXIST void PatchMiLoadRegisterImm( uint secondLevelBatchOffset, __global uint* secondaryBatchBuffer, uint enqueueOffset, uint registerAddress, uint value ) { //SlbOffset is expressed in bytes and for cmd it is needed to convert it to dwords uint CmdPacketStart = secondLevelBatchOffset / DWORD_SIZE_IN_BYTES; secondaryBatchBuffer[ CmdPacketStart + enqueueOffset ] = OCLRT_LOAD_REGISTER_IMM_CMD; patchDword( &( secondaryBatchBuffer[ CmdPacketStart + enqueueOffset + IMM_LOAD_REGISTER_ADDRESS_DWORD_OFFSET ] ), 2, 22, registerAddress >> 2 ); secondaryBatchBuffer[ CmdPacketStart + enqueueOffset + IMM_LOAD_REGISTER_VALUE_DWORD_OFFSET ] = value; } void AddMiLoadRegisterImm( __global uint* secondaryBatchBuffer, __private uint* dwordOffset, uint value ) { secondaryBatchBuffer[ *dwordOffset ] = OCLRT_LOAD_REGISTER_IMM_CMD; ( *dwordOffset )++; secondaryBatchBuffer[ *dwordOffset ] = 0; patchDword( &( secondaryBatchBuffer[ *dwordOffset ] ), 2, 22, CTXT_PREMP_DBG_ADDRESS_VALUE >> 2 ); ( *dwordOffset )++; secondaryBatchBuffer[ *dwordOffset ] = value; //CTXT_PREMP_ON_MI_ARB_CHECK_ONLY or CTXT_PREMP_DEFAULT_VALUE ( *dwordOffset )++; } void SetDisablePreemptionRegister( uint secondLevelBatchOffset, __global uint* secondaryBatchBuffer ) { PatchMiLoadRegisterImm( secondLevelBatchOffset, secondaryBatchBuffer, IMM_LOAD_REGISTER_FOR_DISABLE_PREEMPTION_OFFSET, CTXT_PREMP_DBG_ADDRESS_VALUE, CTXT_PREMP_ON_MI_ARB_CHECK_ONLY ); } void SetEnablePreemptionRegister( uint secondLevelBatchOffset, __global uint* secondaryBatchBuffer ) { PatchMiLoadRegisterImm( secondLevelBatchOffset, secondaryBatchBuffer, IMM_LOAD_REGISTER_FOR_ENABLE_PREEMPTION_OFFSET, CTXT_PREMP_DBG_ADDRESS_VALUE, CTXT_PREMP_DEFAULT_VALUE ); } void NoopPreemptionCommand( uint secondLevelBatchOffset, uint cmdOffset, __global uint* secondaryBatchBuffer ) { uint CmdPacketStart = cmdOffset + secondLevelBatchOffset / DWORD_SIZE_IN_BYTES; for( int i = 0; i < OCLRT_IMM_LOAD_REGISTER_CMD_DEVICE_CMD_DWORD_OFFSET; i++ ) { secondaryBatchBuffer[ CmdPacketStart + i ] = 0; } } #endif //WA_LRI_COMMANDS_EXIST //PQueue is needed for SLBOffset void AddCmdsInSLBforScheduler20Parallel( uint slbOffset, __global IGIL_CommandQueue* pQueue, __global uint * secondaryBatchBuffer, __global char * dsh ) { EMULATION_ENTER_FUNCTION( ); #ifdef SCHEDULER_EMULATION uint3 StartPoint = { 0, 0, 0 }; uint3 DimSize = { get_num_groups( 0 ), 1, 1 }; #else uint3 StartPoint = ( uint3 )( 0 ); uint3 DimSize = ( uint3 )( get_num_groups( 0 ), 1, 1 ); #endif patchGpGpuWalker( slbOffset, secondaryBatchBuffer, 0, PARALLEL_SCHEDULER_COMPILATION_SIZE_20, get_local_size(0), DimSize, StartPoint, PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20, SIZEOF_3GRFS * PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20 + pQueue->m_controls.m_SchedulerConstantBufferSize, pQueue->m_controls.m_SchedulerDSHOffset ); PatchMediaStateFlush( slbOffset, secondaryBatchBuffer, 0, SCHEDULER_MSF_INITIAL ); PatchMediaStateFlush( slbOffset, secondaryBatchBuffer, 0, SCHEDULER_MSF_SECOND ); //When commands exists and scheduler does not require preemption off, noop the commands space #if defined WA_LRI_COMMANDS_EXIST #if defined WA_SCHEDULER_PREEMPTION if( pQueue->m_controls.m_EventTimestampAddress == 0u ) { SetEnablePreemptionRegister( slbOffset, secondaryBatchBuffer ); SetDisablePreemptionRegister( slbOffset, secondaryBatchBuffer ); } else { NoopPreemptionCommand( slbOffset, IMM_LOAD_REGISTER_FOR_ENABLE_PREEMPTION_OFFSET, secondaryBatchBuffer ); NoopPreemptionCommand( slbOffset, IMM_LOAD_REGISTER_FOR_DISABLE_PREEMPTION_OFFSET, secondaryBatchBuffer ); } #else //This is case, where LRI preemption is not required around scheduler WALKERs, but space for LRI commands exists, make sure they are nooped then NoopPreemptionCommand( SLBOffset, IMM_LOAD_REGISTER_FOR_ENABLE_PREEMPTION_OFFSET, secondaryBatchBuffer ); NoopPreemptionCommand( SLBOffset, IMM_LOAD_REGISTER_FOR_DISABLE_PREEMPTION_OFFSET, secondaryBatchBuffer ); #endif //WA_SCHEDULER_PREEMPTION #endif //WA_LRI_COMMANDS_EXIST } int generateLocalIDSParallel20( __global char* dsh, uint3 localSize, uint hwThreads, uint simdSize ) { uint it, currX, currY, currZ, FlattendID; uint Max = 1; if( simdSize == 32 ) { Max = 2; } //Update full GRFs, each WI generate ID for one work item in x,y and z //in case we generate SIMD8 payload using 16 wi , idle half of them if( get_local_id( 0 ) < simdSize ) { for( it = 0; it < hwThreads; it++ ) { for( uint multip = 0; multip < Max; multip++ ) { //We are in simd 8, each wi process generation for 1 wi FlattendID = get_local_id( 0 ) + it * simdSize + 16 * ( multip ); currX = FlattendID % localSize.x; currY = ( FlattendID / localSize.x ) % localSize.y; currZ = ( FlattendID / ( localSize.x * localSize.y ) );//not needed % localSize.z; *( __global ushort * )( dsh + get_local_id( 0 ) * 2 + it * GRF_SIZE * 3 * Max + multip * GRF_SIZE ) = ( ushort )currX; *( __global ushort * )( dsh + get_local_id( 0 ) * 2 + it * GRF_SIZE * 3 * Max + GRF_SIZE * Max + multip * GRF_SIZE ) = ( ushort )currY; *( __global ushort * )( dsh + get_local_id( 0 ) * 2 + it * GRF_SIZE * 3 * Max + GRF_SIZE * Max + GRF_SIZE * Max + multip * GRF_SIZE ) = ( ushort )currZ; } } } return 0; } //Function generate local ids. //SIMD16 version int generateLocalIDSsimd16( __global char* dsh, uint3 localSize, uint hwThreads) { typedef union { ushort16 vectors; ushort varray[ 16 ]; }vectorUnion; __private vectorUnion LidX; __private vectorUnion LidY; __private vectorUnion LidZ; __private ushort currX = 0; __private ushort currY = 0; __private ushort currZ = 0; //Assuming full load of hw thread , remainder done separately for(uint it = 0; it < hwThreads; it++ ) { //This will be unrolled by compiler for(uint x = 0; x < 16; x++ ) { LidX.varray[ x ] = currX++; LidY.varray[ x ] = currY; LidZ.varray[ x ] = currZ; if( currX == localSize.x ) { currX = 0; currY++; } if( currY == localSize.y ) { currY = 0; currZ++; } } *( __global ushort16 * )( dsh + it * GRF_SIZE * 3 ) = LidX.vectors; *( __global ushort16 * )( dsh + it * GRF_SIZE * 3 + GRF_SIZE ) = LidY.vectors; *( __global ushort16 * )( dsh + it * GRF_SIZE * 3 + GRF_SIZE + GRF_SIZE ) = LidZ.vectors; } return 0; } //Function generate local ids. //SIMD8 version int generateLocalIDSsimd8( __global char* dsh, uint3 localSize, uint hwThreads) { typedef union { ushort8 vectors; ushort varray[ 8 ]; }vectorUnion; __private vectorUnion LidX; __private vectorUnion LidY; __private vectorUnion LidZ; __private ushort currX = 0; __private ushort currY = 0; __private ushort currZ = 0; //Assuming full load of hw thread , remainder done separately for(uint it = 0; it < hwThreads; it++ ) { //This will be unrolled by compiler for(uint x = 0; x < 8; x++ ) { LidX.varray[ x ] = currX++; LidY.varray[ x ] = currY; LidZ.varray[ x ] = currZ; if( currX == localSize.x ) { currX = 0; currY++; } if( currY == localSize.y ) { currY = 0; currZ++; } } *( __global ushort8 * )( dsh + it * GRF_SIZE * 3 ) = LidX.vectors; *( __global ushort8 * )( dsh + it * GRF_SIZE * 3 + GRF_SIZE ) = LidY.vectors; *( __global ushort8 * )( dsh + it * GRF_SIZE * 3 + GRF_SIZE + GRF_SIZE ) = LidZ.vectors; } return 0; } //Function patches a curbe parametr , this version of function supports only these curbe tokens that may appear only once int PatchDSH1Token( int currentIndex, uint tokenType, __global IGIL_KernelCurbeParams* pKernelCurbeParams, __global char* pDsh, uint value ) { EMULATION_ENTER_FUNCTION( ); uint PatchOffset; #if SCHEDULER_DEBUG_MODE //If we are here it means that mask is ok and there are at least 3 curbe tokens that needs to be patched, do it right away if( pKernelCurbeParams[ CurrentIndex ].m_parameterType != TokenType ) { return -1; } #endif PatchOffset = pKernelCurbeParams[ currentIndex ].m_patchOffset; *( __global uint * )( &pDsh[ PatchOffset ] ) = value; currentIndex++; return currentIndex; } int PatchLocalMemEntities( int currentIndex, uint tokenType, __global IGIL_KernelCurbeParams* pKernelCurbeParams, __global char* pDsh, __global IGIL_CommandHeader* pCommand ) { uint PatchOffset; #if SCHEDULER_DEBUG_MODE //If we are here it means that mask is ok and there are at least 3 curbe tokens that needs to be patched, do it right away if( pKernelCurbeParams[ CurrentIndex ].m_parameterType != TokenType ) { return -1; } #endif //First patch is with 0 PatchOffset = pKernelCurbeParams[ currentIndex ].m_patchOffset; //SUM_OF_LOCAL_MEMORY_KERNEL_ARGS can be a 4 or 8 byte patch if( pKernelCurbeParams[currentIndex].m_parameterSize == sizeof( ulong ) ) { *( __global ulong * )( &pDsh[PatchOffset] ) = 0; } else { *( __global uint * )( &pDsh[ PatchOffset ] ) = 0; } currentIndex++; uint Alignement; uint iter = 0; uint CurrentSum = 0; uint CurrentValue; //For each global captured there will be uint with index and ulong with address. uint GlobalPointersSize = ( pCommand->m_numGlobalCapturedBuffer * ( sizeof( ulong ) + sizeof( uint ) ) ) / sizeof( uint ); __global uint* pLocalMemSizes = &pCommand->m_data[ pCommand->m_numDependencies + pCommand->m_numScalarArguments + GlobalPointersSize ]; //Check if there is second surface while( pKernelCurbeParams[ currentIndex ].m_parameterType == tokenType ) { PatchOffset = pKernelCurbeParams[ currentIndex ].m_patchOffset; //Value needs to be aligned to the value stored in sourceoffset Alignement = OCLRT_MAX( DWORD_SIZE_IN_BYTES, pKernelCurbeParams[ currentIndex ].m_sourceOffset ); CurrentValue = pLocalMemSizes[ iter ]; CurrentValue = OCLRT_ALIGN( CurrentValue, Alignement ); CurrentSum += CurrentValue; //SUM_OF_LOCAL_MEMORY_KERNEL_ARGS can be a 4 or 8 byte patch if( pKernelCurbeParams[currentIndex].m_parameterSize == sizeof( ulong ) ) { *( __global ulong * )( &pDsh[PatchOffset] ) = ( ulong )CurrentSum; } else { *( __global uint * )( &pDsh[ PatchOffset ] ) = CurrentSum; } currentIndex++; iter++; } return currentIndex; } //Function patches a curbe parametr , this version of function supports only these curbe tokens that may appear only once int PatchDSH1TokenParallel20( int currentIndex, uint tokenType, __global IGIL_KernelCurbeParams* pKernelCurbeParams, __global char* pDsh, uint value ) { EMULATION_ENTER_FUNCTION( ); uint PatchOffset; if( get_local_id( 0 ) == PARALLEL_SCHEDULER_COMPILATION_SIZE_20 ) { PatchOffset = pKernelCurbeParams[ currentIndex ].m_patchOffset; *( __global uint * ) ( &pDsh[ PatchOffset ] ) = value; } currentIndex++; return currentIndex; } //Function patches a curbe parametr, this version of function works on 3d curbe tokens //It assumes that at least 3 tokens exists, then checks if 3 additional patches are needed int PatchDSH6TokensParallel20( int currentIndex, uint tokenType, __global IGIL_KernelCurbeParams* pKernelCurbeParams, __global char* pDsh, uint value1, uint value2, uint value3 ) { EMULATION_ENTER_FUNCTION( ); uint PatchOffset, SourceOffset; uint WorkingOffset; uint ShiftSize; //Check if we patch 3 or 6 curbe tokens if( pKernelCurbeParams[ currentIndex + 3 ].m_parameterType == tokenType ) { ShiftSize = 6; } else { ShiftSize = 3; } if( get_local_id( 0 ) < PARALLEL_SCHEDULER_COMPILATION_SIZE_20 + ShiftSize ) { WorkingOffset = currentIndex + get_local_id( 0 ) - PARALLEL_SCHEDULER_COMPILATION_SIZE_20; PatchOffset = pKernelCurbeParams[ WorkingOffset ].m_patchOffset; SourceOffset = pKernelCurbeParams[ WorkingOffset ].m_sourceOffset; if( SourceOffset == 0 ) { *( __global uint * )( &pDsh[ PatchOffset ] ) = value1; } else if( SourceOffset == 4 ) { *( __global uint * )( &pDsh[ PatchOffset ] ) = value2; } else if( SourceOffset == 8 ) { *( __global uint * )( &pDsh[ PatchOffset ] ) = value3; } } currentIndex += ShiftSize; return currentIndex; } int PatchLocalWorkSizes( int currentIndex, uint tokenType, __global IGIL_KernelCurbeParams* pKernelCurbeParams, __global char* pDsh, uint enqLocalX, uint enqLocalY, uint enqLocalZ, uint cutLocalX, uint cutLocalY, uint cutLocalZ ) { EMULATION_ENTER_FUNCTION( ); uint PatchOffset, SourceOffset; //Tokens are sorted by m_sourceOffset, it means that first 3 keys are always used to compute global_id and are always present for( uint it = 0; it < 3; it++ ) { PatchOffset = pKernelCurbeParams[ currentIndex ].m_patchOffset; SourceOffset = pKernelCurbeParams[ currentIndex ].m_sourceOffset; if( SourceOffset == 0 ) { *( __global uint * )( &pDsh[ PatchOffset ] ) = enqLocalX; } else if( SourceOffset == 4 ) { *( __global uint * )( &pDsh[ PatchOffset ] ) = enqLocalY; } else if( SourceOffset == 8 ) { *( __global uint * )( &pDsh[ PatchOffset ] ) = enqLocalZ; } currentIndex++; } //If there are 3 more tokens, it means that get_local_size is used within a kernel, to deal with it patch with the second set of variables if( pKernelCurbeParams[ currentIndex ].m_parameterType == tokenType ) { for( uint it = 0; it < 3; it++ ) { PatchOffset = pKernelCurbeParams[ currentIndex ].m_patchOffset; SourceOffset = pKernelCurbeParams[ currentIndex ].m_sourceOffset; if( SourceOffset == 0 ) { *( __global uint * )( &pDsh[ PatchOffset ] ) = cutLocalX; } else if( SourceOffset == 4 ) { *( __global uint * )( &pDsh[ PatchOffset ] ) = cutLocalY; } else if( SourceOffset == 8 ) { *( __global uint * )( &pDsh[ PatchOffset ] ) = cutLocalZ; } currentIndex++; } } return currentIndex; } //Function patches a curbe parametr, this version of function works on 3d curbe tokens //It assumes that at least 3 tokens exists, then checks if 3 additional patches are needed int PatchLocalWorkSizesParallel( int currentIndex, uint tokenType, __global IGIL_KernelCurbeParams* pKernelCurbeParams, __global char* pDsh, uint enqLocalX, uint enqLocalY, uint enqLocalZ, uint cutLocalX, uint cutLocalY, uint cutLocalZ ) { EMULATION_ENTER_FUNCTION( ); uint ShiftSize; //Check if we patch 3 or 6 curbe tokens if( pKernelCurbeParams[ currentIndex + 3 ].m_parameterType == tokenType ) { ShiftSize = 6; } else { ShiftSize = 3; } //Use single threaded version if( get_local_id( 0 ) == PARALLEL_SCHEDULER_COMPILATION_SIZE_20 ) { PatchLocalWorkSizes( currentIndex, SCHEDULER_DATA_PARAMETER_LOCAL_WORK_SIZE, pKernelCurbeParams, pDsh, enqLocalX, enqLocalY, enqLocalZ, cutLocalX, cutLocalY, cutLocalZ ); } currentIndex += ShiftSize; return currentIndex; } //Function patches a curbe parametr, this version of function works on 3d curbe tokens //It assumes that at least 3 tokens exists, then checks if 3 additional patches are needed int PatchDSH6Tokens( int currentIndex, uint tokenType, __global IGIL_KernelCurbeParams* pKernelCurbeParams, __global char* pDsh, uint value1, uint value2, uint value3 ) { EMULATION_ENTER_FUNCTION( ); uint PatchOffset, SourceOffset; #if SCHEDULER_DEBUG_MODE //If we are here it means that mask is ok and there are at least 3 curbe tokens that needs to be patched, do it right away if( pKernelCurbeParams[ CurrentIndex ].m_parameterType != TokenType ) { return -1; } #endif for( uint it = 0; it < 3; it++ ) { PatchOffset = pKernelCurbeParams[ currentIndex ].m_patchOffset; SourceOffset = pKernelCurbeParams[ currentIndex ].m_sourceOffset; if( SourceOffset == 0 ) { *( __global uint * )( &pDsh[ PatchOffset ] ) = value1; } else if( SourceOffset == 4 ) { *( __global uint * )( &pDsh[ PatchOffset ] ) = value2; } else if( SourceOffset == 8 ) { *( __global uint * )( &pDsh[ PatchOffset ] ) = value3; } currentIndex++; } //Check if there are 3 more. if( pKernelCurbeParams[ currentIndex ].m_parameterType == tokenType ) { for( uint it = 0; it < 3; it++ ) { PatchOffset = pKernelCurbeParams[ currentIndex ].m_patchOffset; SourceOffset = pKernelCurbeParams[ currentIndex ].m_sourceOffset; if( SourceOffset == 0 ) { *( __global uint * )( &pDsh[ PatchOffset ] ) = value1; } else if( SourceOffset == 4 ) { *( __global uint * )( &pDsh[ PatchOffset ] ) = value2; } else if( SourceOffset == 8 ) { *( __global uint * )( &pDsh[ PatchOffset ] ) = value3; } currentIndex++; } } return currentIndex; } //Common code inline __global char* GetPtrToCurbeData( uint offset, __global IGIL_KernelDataHeader * pKernelReflection ) { __global char * pRawKernelReflection = ( __global char * )pKernelReflection; return ( pRawKernelReflection + offset ); } __global char* GetPtrToKernelReflectionOffset( uint offset, __global IGIL_KernelDataHeader * pKernelReflection ) { __global char * pRawKernelReflection = ( __global char * )pKernelReflection; return ( pRawKernelReflection + offset ); } void InitWalkerDataParallel( __local IGIL_WalkerEnumeration* pWalkerEnumData, uint workDim, uint* pWalkerCount, uint3 edgeArray, uint3 globalDim, uint3 globalSizes, uint3 localSizes ) { EMULATION_ENTER_FUNCTION( ); pWalkerEnumData->TotalDimSize.x = globalDim.x; pWalkerEnumData->TotalDimSize.y = globalDim.y; pWalkerEnumData->TotalDimSize.z = globalDim.z; pWalkerEnumData->WalkerArray[ 0 ].ActualLocalSize.x = localSizes.x; pWalkerEnumData->WalkerArray[ 0 ].WalkerStartPoint.x = 0; pWalkerEnumData->WalkerArray[ 0 ].WalkerDimSize.x = globalDim.x; pWalkerEnumData->WalkerArray[ 0 ].ActualLocalSize.y = localSizes.y; pWalkerEnumData->WalkerArray[ 0 ].WalkerStartPoint.y = 0; pWalkerEnumData->WalkerArray[ 0 ].WalkerDimSize.y = globalDim.y; pWalkerEnumData->WalkerArray[ 0 ].ActualLocalSize.z = localSizes.z; pWalkerEnumData->WalkerArray[ 0 ].WalkerStartPoint.z = 0; pWalkerEnumData->WalkerArray[ 0 ].WalkerDimSize.z = globalDim.z; uint WalkerCount = 1; if( edgeArray.x != 0 ) { pWalkerEnumData->TotalDimSize.x++; pWalkerEnumData->WalkerArray[ 1 ].ActualLocalSize.x = edgeArray.x; pWalkerEnumData->WalkerArray[ 1 ].WalkerStartPoint.x = globalDim.x; pWalkerEnumData->WalkerArray[ 1 ].WalkerDimSize.x = pWalkerEnumData->TotalDimSize.x; pWalkerEnumData->WalkerArray[ 1 ].ActualLocalSize.y = localSizes.y; pWalkerEnumData->WalkerArray[ 1 ].WalkerStartPoint.y = 0; pWalkerEnumData->WalkerArray[ 1 ].WalkerDimSize.y = globalDim.y; pWalkerEnumData->WalkerArray[ 1 ].ActualLocalSize.z = localSizes.z; pWalkerEnumData->WalkerArray[ 1 ].WalkerStartPoint.z = 0; pWalkerEnumData->WalkerArray[ 1 ].WalkerDimSize.z = globalDim.z; WalkerCount++; } if( workDim > 1 ) { if( edgeArray.y != 0 ) { pWalkerEnumData->TotalDimSize.y++; pWalkerEnumData->WalkerArray[ WalkerCount ].ActualLocalSize.x = localSizes.x; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerStartPoint.x = 0; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerDimSize.x = globalDim.x; pWalkerEnumData->WalkerArray[ WalkerCount ].ActualLocalSize.y = edgeArray.y; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerStartPoint.y = globalDim.y; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerDimSize.y = pWalkerEnumData->TotalDimSize.y; pWalkerEnumData->WalkerArray[ WalkerCount ].ActualLocalSize.z = localSizes.z; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerStartPoint.z = 0; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerDimSize.z = globalDim.z; WalkerCount++; } if( ( edgeArray.x != 0 ) & ( edgeArray.y != 0 ) ) { pWalkerEnumData->WalkerArray[ WalkerCount ].ActualLocalSize.x = edgeArray.x; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerStartPoint.x = globalDim.x; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerDimSize.x = pWalkerEnumData->TotalDimSize.x; pWalkerEnumData->WalkerArray[ WalkerCount ].ActualLocalSize.y = edgeArray.y; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerStartPoint.y = globalDim.y; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerDimSize.y = pWalkerEnumData->TotalDimSize.y; pWalkerEnumData->WalkerArray[ WalkerCount ].ActualLocalSize.z = localSizes.z; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerStartPoint.z = 0; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerDimSize.z = globalDim.z; WalkerCount++; } if( workDim > 2 ) { if( edgeArray.z != 0 ) { pWalkerEnumData->TotalDimSize.z++; pWalkerEnumData->WalkerArray[ WalkerCount ].ActualLocalSize.x = localSizes.x; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerStartPoint.x = 0; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerDimSize.x = globalDim.x; pWalkerEnumData->WalkerArray[ WalkerCount ].ActualLocalSize.y = localSizes.y; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerStartPoint.y = 0; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerDimSize.y = globalDim.y; pWalkerEnumData->WalkerArray[ WalkerCount ].ActualLocalSize.z = edgeArray.z; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerStartPoint.z = globalDim.z; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerDimSize.z = pWalkerEnumData->TotalDimSize.z; WalkerCount++; } if( ( edgeArray.x != 0 ) & ( edgeArray.z != 0 ) ) { pWalkerEnumData->WalkerArray[ WalkerCount ].ActualLocalSize.x = edgeArray.x; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerStartPoint.x = globalDim.x; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerDimSize.x = pWalkerEnumData->TotalDimSize.x; pWalkerEnumData->WalkerArray[ WalkerCount ].ActualLocalSize.y = localSizes.y; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerStartPoint.y = 0; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerDimSize.y = globalDim.y; pWalkerEnumData->WalkerArray[ WalkerCount ].ActualLocalSize.z = edgeArray.z; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerStartPoint.z = globalDim.z; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerDimSize.z = pWalkerEnumData->TotalDimSize.z; WalkerCount++; } if( ( edgeArray.y != 0 ) & ( edgeArray.z != 0 ) ) { pWalkerEnumData->WalkerArray[ WalkerCount ].ActualLocalSize.x = localSizes.x; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerStartPoint.x = 0; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerDimSize.x = globalDim.x; pWalkerEnumData->WalkerArray[ WalkerCount ].ActualLocalSize.y = edgeArray.y; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerStartPoint.y = globalDim.y; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerDimSize.y = pWalkerEnumData->TotalDimSize.y; pWalkerEnumData->WalkerArray[ WalkerCount ].ActualLocalSize.z = edgeArray.z; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerStartPoint.z = globalDim.z; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerDimSize.z = pWalkerEnumData->TotalDimSize.z; WalkerCount++; } if( ( edgeArray.x != 0 ) & ( edgeArray.y != 0 ) & ( edgeArray.z != 0 ) ) { pWalkerEnumData->WalkerArray[ WalkerCount ].ActualLocalSize.x = edgeArray.x; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerStartPoint.x = globalDim.x; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerDimSize.x = pWalkerEnumData->TotalDimSize.x; pWalkerEnumData->WalkerArray[ WalkerCount ].ActualLocalSize.y = edgeArray.y; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerStartPoint.y = globalDim.y; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerDimSize.y = pWalkerEnumData->TotalDimSize.y; pWalkerEnumData->WalkerArray[ WalkerCount ].ActualLocalSize.z = edgeArray.z; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerStartPoint.z = globalDim.z; pWalkerEnumData->WalkerArray[ WalkerCount ].WalkerDimSize.z = pWalkerEnumData->TotalDimSize.z; WalkerCount++; } } } *pWalkerCount = WalkerCount; } //Compute number of Walkers needed for this command packet, this function assumes that command packet is initialized inline int GetWalkerCount( __global IGIL_CommandHeader* pCommand ) { int WalkerCount = 1; for( uint dim = 0; ( dim < pCommand->m_range.m_dispatchDimensions ); dim++ ) { if( ( pCommand->m_range.m_globalWorkSize[ dim ] % pCommand->m_range.m_localWorkSize[ dim ] ) != 0 ) { WalkerCount *= 2; } } return WalkerCount; } //This function intializes command packet, checks for null case, sets proper LWS sizes and return WalkerCount needed for this packet inline void InitializeCommandPacket( __global IGIL_CommandHeader* pCommand ) { EMULATION_ENTER_FUNCTION( ); //Check for NULL case if( pCommand->m_range.m_localWorkSize[ 0 ] == 0 ) { //If null case detected use 16 x 1 x 1 lws if( pCommand->m_range.m_globalWorkSize[ 0 ] >= 16 ) { pCommand->m_range.m_localWorkSize[ 0 ] = 16; } else { pCommand->m_range.m_localWorkSize[ 0 ] = pCommand->m_range.m_globalWorkSize[ 0 ]; } pCommand->m_range.m_localWorkSize[ 1 ] = 1; pCommand->m_range.m_localWorkSize[ 2 ] = 1; } } //Patches the address for pipe control void PatchPipeControlProfilingAddres( __global uint* secondaryBatchBuffer, uint slBoffset, ulong address, uint pipeControlOffset ) { EMULATION_ENTER_FUNCTION( ); //SlbOffset is expressed in bytes and for cmd it is needed to convert it to dwords uint PostSyncDwordOffset = ( slBoffset / DWORD_SIZE_IN_BYTES ) + pipeControlOffset + PIPE_CONTROL_POST_SYNC_DWORD; uint DwordOffset = ( slBoffset / DWORD_SIZE_IN_BYTES ) + pipeControlOffset + PIPE_CONTROL_ADDRESS_FIELD_DWORD; //Patch P_C event timestamp address in SLB in 3rd and 4th dword secondaryBatchBuffer[ DwordOffset ] = 0; patchDword( &secondaryBatchBuffer[ DwordOffset ], PIPE_CONTROL_GRAPHICS_ADDRESS_START_BIT, PIPE_CONTROL_GRAPHICS_ADDRESS_END_BIT, ( uint )( address >> PIPE_CONTROL_GRAPHICS_ADDRESS_START_BIT ) ); DwordOffset++; secondaryBatchBuffer[ DwordOffset ] = 0; patchDword( &secondaryBatchBuffer[ DwordOffset ], PIPE_CONTROL_GRAPHICS_ADDRESS_HIGH_START_BIT, PIPE_CONTROL_GRAPHICS_ADDRESS_HIGH_END_BIT, ( address >> 32 ) ); //Patch Timestamp bit patchDword( &secondaryBatchBuffer[ PostSyncDwordOffset ], PIPE_CONTROL_POST_SYNC_START_BIT, PIPE_CONTROL_POST_SYNC_END_BIT, PIPE_CONTROL_GENERATE_TIME_STAMP ); } void DisablePostSyncBitInPipeControl( __global uint* secondaryBatchBuffer, uint slBoffset, uint pipeControlOffset ) { //SlbOffset is expressed in bytes and for cmd it is needed to convert it to dwords uint PostSyncDwordOffset = ( slBoffset / DWORD_SIZE_IN_BYTES ) + pipeControlOffset + PIPE_CONTROL_POST_SYNC_DWORD; //Patch P_C event timestamp address in SLB in 3rd and 4th dword patchDword( &secondaryBatchBuffer[ PostSyncDwordOffset ], PIPE_CONTROL_POST_SYNC_START_BIT, PIPE_CONTROL_POST_SYNC_END_BIT, PIPE_CONTROL_NO_POSTSYNC_OPERATION ); } int PatchDSH( __global IGIL_CommandQueue* pQueue, __global IGIL_KernelDataHeader * pKernelReflection, __global char* dsh, uint blockId, __global IGIL_CommandHeader* pCommandHeader, __global uint* secondaryBatchBuffer, uint dshOffset, uint intefaceDescriptorOffset, __local IGIL_WalkerEnumeration* pWalkerMain, uint walkerPos ) { EMULATION_ENTER_FUNCTION( ); __global IGIL_KernelAddressData* pKernelAddressData = IGIL_GetKernelAddressData( pKernelReflection, blockId ); __global IGIL_KernelData* pBlockData = IGIL_GetKernelData( pKernelReflection, blockId ); ulong PatchMask = pBlockData->m_PatchTokensMask; uint CurrentIndex = 0; __global char* pDsh = ( __global char* )&dsh[ dshOffset ]; __global IGIL_KernelCurbeParams* pKernelCurbeParams = ( __global IGIL_KernelCurbeParams* )&pBlockData->m_data; uint NumberOfDepencies = pCommandHeader->m_numDependencies; uint PatchOffset; __global char* pScalarData = ( __global char* )( &pCommandHeader->m_data[ NumberOfDepencies ] ); __global char *pDshOnKRS = GetPtrToKernelReflectionOffset( pKernelAddressData->m_SamplerHeapOffset, pKernelReflection ); uint SizeOfScalarsFromCurbe = 0; uint CurbeSize; uint TotalLocalSize; uint ThreadPayloadSize; uint NumberOfHWThreads; uint WorkDim; uint3 GlobalOffset; uint3 GlobalSizes; uint3 ActualLocalSize; GlobalOffset.x = ( uint )pCommandHeader->m_range.m_globalWorkOffset[ 0 ]; GlobalOffset.y = ( uint )pCommandHeader->m_range.m_globalWorkOffset[ 1 ]; GlobalOffset.z = ( uint )pCommandHeader->m_range.m_globalWorkOffset[ 2 ]; GlobalSizes.x = ( uint )pCommandHeader->m_range.m_globalWorkSize[ 0 ]; GlobalSizes.y = ( uint )pCommandHeader->m_range.m_globalWorkSize[ 1 ]; GlobalSizes.z = ( uint )pCommandHeader->m_range.m_globalWorkSize[ 2 ]; ActualLocalSize.x = pWalkerMain->WalkerArray[ walkerPos ].ActualLocalSize.x; ActualLocalSize.y = pWalkerMain->WalkerArray[ walkerPos ].ActualLocalSize.y; ActualLocalSize.z = pWalkerMain->WalkerArray[ walkerPos ].ActualLocalSize.z; WorkDim = pCommandHeader->m_range.m_dispatchDimensions; TotalLocalSize = ActualLocalSize.x * ActualLocalSize.y * ActualLocalSize.z; NumberOfHWThreads = TotalLocalSize / pBlockData->m_SIMDSize; if( TotalLocalSize % pBlockData->m_SIMDSize ) { NumberOfHWThreads++; } ThreadPayloadSize = NumberOfHWThreads * 3 * GRF_SIZE; //Copy constant buffer to designated area on DSH. //pDshOnKRS seems to be in the global address space, not the private address space //Copy SamplerState and Constant Buffer at once IGILLOCAL_MEMCPY_GTOG( pDsh, pDshOnKRS, pBlockData->m_sizeOfConstantBuffer + pBlockData->m_SizeOfSamplerHeap ); if( PatchMask & SCHEDULER_DATA_PARAMETER_KERNEL_ARGUMENT_MASK ) { while( pKernelCurbeParams[ CurrentIndex ].m_parameterType == SCHEDULER_DATA_PARAMETER_KERNEL_ARGUMENT ) { CurbeSize = pKernelCurbeParams[ CurrentIndex ].m_parameterSize; SizeOfScalarsFromCurbe += CurbeSize; PatchOffset = pKernelCurbeParams[ CurrentIndex ].m_patchOffset; //pScalarData is in the global address space, not the private address space IGILLOCAL_MEMCPY_GTOG( &pDsh[ PatchOffset ], pScalarData, CurbeSize ); pScalarData += CurbeSize; CurrentIndex++; } #if SCHEDULER_DEBUG_MODE if( pCommandHeader->m_sizeOfScalarArguments != SizeOfScalarsFromCurbe ) { pCommandHeader->m_commandState = SCHEDULER_CURBE_ARGUMENTS_SIZE_MISMATCH; return -1; } #endif } if( PatchMask & SCHEDULER_DATA_PARAMETER_LOCAL_WORK_SIZE_MASK ) { CurrentIndex = PatchLocalWorkSizes( CurrentIndex, SCHEDULER_DATA_PARAMETER_LOCAL_WORK_SIZE, pKernelCurbeParams, pDsh, pWalkerMain->LocalWorkSize.x, pWalkerMain->LocalWorkSize.y, pWalkerMain->LocalWorkSize.z, ActualLocalSize.x, ActualLocalSize.y, ActualLocalSize.z ); #if SCHEDULER_DEBUG_MODE if( ( CurrentIndex == -1 ) || ( CurrentIndex >= pBlockData->m_numberOfCurbeParams ) ) { pCommandHeader->m_commandState = SCHEDULER_CURBE_TOKEN_MISSED; return -1; } #endif } if( PatchMask & SCHEDULER_DATA_PARAMETER_GLOBAL_WORK_SIZE_MASK ) { CurrentIndex = PatchDSH6Tokens( CurrentIndex, SCHEDULER_DATA_PARAMETER_GLOBAL_WORK_SIZE, pKernelCurbeParams, pDsh, GlobalSizes.x, GlobalSizes.y, GlobalSizes.z ); #if SCHEDULER_DEBUG_MODE if( ( CurrentIndex == -1 ) || ( CurrentIndex >= pBlockData->m_numberOfCurbeParams ) ) { pCommandHeader->m_commandState = SCHEDULER_CURBE_TOKEN_MISSED; return -1; } #endif } if( PatchMask & SCHEDULER_DATA_PARAMETER_NUM_WORK_GROUPS_MASK ) { CurrentIndex = PatchDSH6Tokens( CurrentIndex, SCHEDULER_DATA_PARAMETER_NUM_WORK_GROUPS, pKernelCurbeParams, pDsh, pWalkerMain->TotalDimSize.x, pWalkerMain->TotalDimSize.y, pWalkerMain->TotalDimSize.z ); #if SCHEDULER_DEBUG_MODE if( ( CurrentIndex == -1 ) || ( CurrentIndex >= pBlockData->m_numberOfCurbeParams ) ) { pCommandHeader->m_commandState = SCHEDULER_CURBE_TOKEN_MISSED; return -1; } #endif } if( PatchMask & SCHEDULER_DATA_PARAMETER_WORK_DIMENSIONS_MASK ) { CurrentIndex = PatchDSH1Token( CurrentIndex, SCHEDULER_DATA_PARAMETER_WORK_DIMENSIONS, pKernelCurbeParams, pDsh, WorkDim ); #if SCHEDULER_DEBUG_MODE if( ( CurrentIndex == -1 ) || ( CurrentIndex >= pBlockData->m_numberOfCurbeParams ) ) { pCommandHeader->m_commandState = SCHEDULER_CURBE_TOKEN_MISSED; return -1; } #endif } if( PatchMask & SCHEDULER_DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES_MASK ) { CurrentIndex = PatchLocalMemEntities( CurrentIndex, SCHEDULER_DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES, pKernelCurbeParams, pDsh, pCommandHeader ); #if SCHEDULER_DEBUG_MODE if( ( CurrentIndex == -1 ) || ( CurrentIndex >= pBlockData->m_numberOfCurbeParams ) ) { pCommandHeader->m_commandState = SCHEDULER_CURBE_TOKEN_MISSED; return -1; } #endif } if( PatchMask & SCHEDULER_DATA_PARAMETER_GLOBAL_WORK_OFFSET_MASK ) { CurrentIndex = PatchDSH6Tokens( CurrentIndex, SCHEDULER_DATA_PARAMETER_GLOBAL_WORK_OFFSET, pKernelCurbeParams, pDsh, GlobalOffset.x, GlobalOffset.y, GlobalOffset.z ); #if SCHEDULER_DEBUG_MODE if( ( CurrentIndex == -1 ) || ( CurrentIndex >= pBlockData->m_numberOfCurbeParams ) ) { pCommandHeader->m_commandState = SCHEDULER_CURBE_TOKEN_MISSED; return -1; } #endif } if( PatchMask & SCHEDULER_DATA_PARAMETER_NUM_HARDWARE_THREADS_MASK ) { CurrentIndex = PatchDSH1Token( CurrentIndex, SCHEDULER_DATA_PARAMETER_NUM_HARDWARE_THREADS, pKernelCurbeParams, pDsh, NumberOfHWThreads ); #if SCHEDULER_DEBUG_MODE if( ( CurrentIndex == -1 ) || ( CurrentIndex >= pBlockData->m_numberOfCurbeParams ) ) { pCommandHeader->m_commandState = SCHEDULER_CURBE_TOKEN_MISSED; return -1; } #endif } if( PatchMask & SCHEDULER_DATA_PARAMETER_PARENT_EVENT_MASK ) { CurrentIndex = PatchDSH1Token( CurrentIndex, SCHEDULER_DATA_PARAMETER_PARENT_EVENT, pKernelCurbeParams, pDsh, pCommandHeader->m_event ); #if SCHEDULER_DEBUG_MODE if( ( CurrentIndex == -1 ) || ( CurrentIndex >= pBlockData->m_numberOfCurbeParams ) ) { pCommandHeader->m_commandState = SCHEDULER_CURBE_TOKEN_MISSED; return -1; } #endif } if( PatchMask & SCHEDULER_DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE_MASK ) { CurrentIndex = PatchDSH6Tokens( CurrentIndex, SCHEDULER_DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE, pKernelCurbeParams, pDsh, pWalkerMain->LocalWorkSize.x, pWalkerMain->LocalWorkSize.y, pWalkerMain->LocalWorkSize.z ); #if SCHEDULER_DEBUG_MODE if( ( CurrentIndex == -1 ) || ( CurrentIndex >= pBlockData->m_numberOfCurbeParams ) ) { pCommandHeader->m_commandState = SCHEDULER_CURBE_TOKEN_MISSED; return -1; } #endif } if( PatchMask & SCHEDULER_DATA_PARAMETER_GLOBAL_POINTER ) { if( pCommandHeader->m_numGlobalCapturedBuffer > 0 ) { //Handle global pointers patching in stateless mode, info about layout in declaration of IGIL_CommandHeader __global uint* pGlobalIndexes = ( __global uint* ) ( &pCommandHeader->m_data[ NumberOfDepencies + pCommandHeader->m_numScalarArguments ] ); __global uint* pGlobalPtrs = ( __global uint* ) ( &pCommandHeader->m_data[ NumberOfDepencies + pCommandHeader->m_numScalarArguments + pCommandHeader->m_numGlobalCapturedBuffer ] ); uint StartIndex = CurrentIndex; //Argument in command header are not in correct sequence, that's why proper key needs to be located for( uint glIdx = 0 ; glIdx < pCommandHeader->m_numGlobalCapturedBuffer; glIdx++) { //Reset CurrentIndex as we need to start from the beginning. CurrentIndex = StartIndex; while( pKernelCurbeParams[ CurrentIndex ].m_parameterType == COMPILER_DATA_PARAMETER_GLOBAL_SURFACE ) { //Patch only if exact match occurs if( pKernelCurbeParams[ CurrentIndex ].m_sourceOffset == *pGlobalIndexes ) { PatchOffset = pKernelCurbeParams[ CurrentIndex ].m_patchOffset; //64 bit patching if( pKernelCurbeParams[ CurrentIndex ].m_parameterSize == 8 ) { __global uint* pDst = (__global uint *) &pDsh[PatchOffset]; pDst[ 0 ] = pGlobalPtrs[ 0 ]; pDst[ 1 ] = pGlobalPtrs[ 1 ]; } else { __global uint* pDst = ( __global uint* ) &pDsh[ PatchOffset ]; *pDst = ( uint ) *pGlobalPtrs; } } CurrentIndex++; } pGlobalPtrs += 2; pGlobalIndexes++; } } } //Now generate local IDS if( pBlockData->m_SIMDSize == 8 ) { generateLocalIDSsimd8( &pDsh[ pBlockData->m_sizeOfConstantBuffer ], ActualLocalSize, NumberOfHWThreads ); } else { generateLocalIDSsimd16( &pDsh[ pBlockData->m_sizeOfConstantBuffer ], ActualLocalSize, NumberOfHWThreads ); } uint TotalSLMSize = pCommandHeader->m_totalLocalSize + pBlockData->m_InilineSLMSize; //Update Interface Descriptor Data with SLM size / number of HW threads. CopyAndPatchIDData( dsh, blockId, NumberOfHWThreads, TotalSLMSize, intefaceDescriptorOffset, pQueue->m_controls.m_StartBlockID ); //Add WalkerStartSize patchGpGpuWalker( pQueue->m_controls.m_SecondLevelBatchOffset, secondaryBatchBuffer, intefaceDescriptorOffset, pBlockData->m_SIMDSize, TotalLocalSize, pWalkerMain->WalkerArray[ walkerPos ].WalkerDimSize, pWalkerMain->WalkerArray[ walkerPos ].WalkerStartPoint, NumberOfHWThreads, pBlockData->m_sizeOfConstantBuffer + ThreadPayloadSize, dshOffset ); PatchMediaStateFlush( pQueue->m_controls.m_SecondLevelBatchOffset, secondaryBatchBuffer, intefaceDescriptorOffset, SCHEDULER_MSF_INITIAL ); PatchMediaStateFlush( pQueue->m_controls.m_SecondLevelBatchOffset, secondaryBatchBuffer, intefaceDescriptorOffset, SCHEDULER_MSF_SECOND ); return 0; } //Returns: isSRGB(ChannelOrder) ? ChannelOrder : 0; inline uint GetSRGBChannelOrder( uint channelOrder ) { const uint AsSrgb = channelOrder - CL_sRGB; const uint NumSrgbFormats = CL_sBGRA - CL_sRGB; if( AsSrgb < NumSrgbFormats ) return channelOrder; else return 0; } void PatchDSHParallelWithDynamicDSH20( uint slbOffsetBase, uint dshOffsetBase, uint intefaceDescriptorOffsetBase, __global IGIL_KernelDataHeader * pKernelReflection, __global char* dsh, uint blockId, __global IGIL_CommandHeader* pCommandHeader, __global uint* secondaryBatchBuffer, __global IGIL_CommandQueue* pQueue, __global IGIL_EventPool* eventsPool, __global char* ssh, uint btOffset, __local IGIL_WalkerEnumeration* pWalkerEnum, __local uint* objectIds #ifdef ENABLE_DEBUG_BUFFER , __global DebugDataBuffer* DebugQueue #endif ) { EMULATION_ENTER_FUNCTION( ); __global IGIL_KernelAddressData* pKernelAddressData = IGIL_GetKernelAddressData( pKernelReflection, blockId ); __global IGIL_KernelData* pBlockData = IGIL_GetKernelData( pKernelReflection, blockId ); ulong PatchMask = pBlockData->m_PatchTokensMask; uint CurrentIndex = 0; __global IGIL_KernelCurbeParams* pKernelCurbeParams = ( __global IGIL_KernelCurbeParams* )&pBlockData->m_data; uint NumberOfDepencies = pCommandHeader->m_numDependencies; uint PatchOffset; uint CurbeSize; //Get pointer to the Sampler State __global char *pDshOnKRS = GetPtrToKernelReflectionOffset( pKernelAddressData->m_SamplerHeapOffset, pKernelReflection ); uint WalkerCount = GetWalkerCount( pCommandHeader ); __global char *pKernelReflectionChar = ( __global char * ) pKernelReflection; __global IGIL_KernelCurbeParams* pSSHdata = ( __global IGIL_KernelCurbeParams* )&pKernelReflectionChar[ pKernelAddressData->m_SSHTokensOffset ]; //WALKER variables that will be propagated to SLB uint3 LocalSizes; uint3 GlobalSizes; uint3 GlobalOffset; uint3 EdgeArray; uint3 XYZDim; //X is always there GlobalOffset.x = ( uint )pCommandHeader->m_range.m_globalWorkOffset[ 0 ]; GlobalSizes.x = ( uint )pCommandHeader->m_range.m_globalWorkSize[ 0 ]; LocalSizes.x = ( uint )pCommandHeader->m_range.m_localWorkSize[ 0 ]; EdgeArray.x = GlobalSizes.x % LocalSizes.x; uint WorkDim = pCommandHeader->m_range.m_dispatchDimensions; XYZDim.x = GlobalSizes.x / LocalSizes.x; if( WorkDim > 1 ) { GlobalOffset.y = ( uint )pCommandHeader->m_range.m_globalWorkOffset[ 1 ]; GlobalSizes.y = ( uint )pCommandHeader->m_range.m_globalWorkSize[ 1 ]; LocalSizes.y = ( uint )pCommandHeader->m_range.m_localWorkSize[ 1 ]; EdgeArray.y = GlobalSizes.y % LocalSizes.y; XYZDim.y = GlobalSizes.y / LocalSizes.y; if( WorkDim > 2 ) { GlobalOffset.z = ( uint )pCommandHeader->m_range.m_globalWorkOffset[ 2 ]; GlobalSizes.z = ( uint )pCommandHeader->m_range.m_globalWorkSize[ 2 ]; LocalSizes.z = ( uint )pCommandHeader->m_range.m_localWorkSize[ 2 ]; XYZDim.z = GlobalSizes.z / LocalSizes.z; EdgeArray.z = GlobalSizes.z % LocalSizes.z; } else { GlobalOffset.z = 0; GlobalSizes.z = 1; LocalSizes.z = 1; EdgeArray.z = 0; XYZDim.z = 1; } } else { GlobalOffset.y = 0; GlobalOffset.z = 0; GlobalSizes.y = 1; GlobalSizes.z = 1; LocalSizes.y = 1; LocalSizes.z = 1; EdgeArray.z = 0; EdgeArray.y = 0; XYZDim.z = 1; XYZDim.y = 1; } if( get_local_id( 0 ) == 0 ) { InitWalkerDataParallel( pWalkerEnum, WorkDim, &WalkerCount, EdgeArray, XYZDim, GlobalSizes, LocalSizes ); } uint SLBOffset = slbOffsetBase; uint DshOffset = dshOffsetBase; uint IntefaceDescriptorOffset = intefaceDescriptorOffsetBase; __global uint* pArgumentIds = NULL; __global uint* pObjectIds = NULL; __global char* pLocalIdsOnDSH = NULL; uint SamplerHeapSize = pBlockData->m_SizeOfSamplerHeap; //Object ID is in fact surface state offset for parent in case of surfaces using SSH, copy SSH from parent to child. //Copy binding table state of this kernel to allocated place on ssh GLOBAL_MEMCPY( &ssh[ btOffset ], &ssh[ pKernelAddressData->m_BTSoffset ] , pKernelAddressData->m_BTSize ); for( uint WalkerID = 0; WalkerID < WalkerCount; WalkerID++ ) { //Update the offsets if( WalkerID > 0 ) { SLBOffset += SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE; SLBOffset %= MAX_SLB_OFFSET; IntefaceDescriptorOffset++; DshOffset += MAX_DSH_SIZE_PER_ENQUEUE; } __global char* pDsh = ( __global char* )&dsh[ DshOffset ]; pLocalIdsOnDSH = &pDsh[ pBlockData->m_sizeOfConstantBuffer + SamplerHeapSize ]; //Copy Sampler State and constant buffer on all threads GLOBAL_MEMCPY( pDsh, pDshOnKRS, pBlockData->m_sizeOfConstantBuffer + SamplerHeapSize ); barrier( CLK_GLOBAL_MEM_FENCE ); //Update BorderColorPointer on all threads if( pBlockData->m_numberOfSamplerStates ) { uint SamplerId = get_local_id( 0 ); __global uint* pSamplerState; while( SamplerId < pBlockData->m_numberOfSamplerStates ) { pSamplerState = ( __global uint* )&dsh[ DshOffset + pBlockData->m_SamplerStateArrayOffsetOnDSH + SamplerId * OCLRT_SIZEOF_SAMPLER_STATE ]; uint PatchValue = DshOffset >> 5; patchDword( &pSamplerState[ SAMPLER_STATE_DESCRIPTOR_BORDER_COLOR_POINTER_DWORD ], 5, 31, PatchValue ); SamplerId += PARALLEL_SCHEDULER_COMPILATION_SIZE_20 * PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20; } } //Setup SSH if needed, do it only for first Walker as all Walkers will re-use the same binding table layout. if( ( pCommandHeader->m_numGlobalArguments > 0 ) & ( WalkerID == 0 ) ) { //Global arguments are after scalars, global pointers slm sizes and events uint offset = pCommandHeader->m_numDependencies + pCommandHeader->m_numScalarArguments + pCommandHeader->m_numOfLocalPtrSizes + ( pCommandHeader->m_numGlobalCapturedBuffer * ( sizeof( ulong ) + sizeof( uint ) ) / sizeof( uint ) ); pArgumentIds = &pCommandHeader->m_data[ offset ]; //Object IDS are located after Argument IDs pObjectIds = &pCommandHeader->m_data[ offset + pCommandHeader->m_numGlobalArguments ]; //Setup local memory for fast access for Curbe patching uint ArgId = get_local_id( 0 ); //Only third group Updates ObjectIDS, this will be synchronized with condition below if( ( ArgId >> HW_GROUP_ID_SHIFT( PARALLEL_SCHEDULER_COMPILATION_SIZE_20 ) ) == 2 ) { ArgId = ArgId - ( PARALLEL_SCHEDULER_COMPILATION_SIZE_20 << 1 ); while( ArgId < pCommandHeader->m_numGlobalArguments ) { objectIds[ pArgumentIds[ ArgId ] ] = pObjectIds[ ArgId ]; ArgId += PARALLEL_SCHEDULER_COMPILATION_SIZE_20; } } #ifdef SCHEDULER_EMULATION //Synchronization needed for Emulation, ObjectIDS needs to be set by whole HW group, on GPU there is implicit synchronization in HW group barrier( CLK_GLOBAL_MEM_FENCE ); #endif if( get_local_id( 0 ) == PARALLEL_SCHEDULER_COMPILATION_SIZE_20 * 2 ) { __global uint* pBindingTable = ( __global uint* ) &ssh[ btOffset ]; //To properly set up binding table point to parents surface state heap for( uint ArgumentID = 0 ; ArgumentID < pCommandHeader->m_numGlobalArguments; ArgumentID++ ) { uint ArgId = pArgumentIds[ ArgumentID ]; //Locate proper Arg ID //Get ssh offset, lookup table already provided if( objectIds[ ArgId ] < MAX_SSH_PER_KERNEL_SIZE ) { if( pSSHdata[ ArgId ].m_sourceOffset == ArgId ) { pBindingTable[ pSSHdata[ ArgId ].m_patchOffset ] = objectIds[ ArgId ]; } else { pQueue->m_controls.m_ErrorCode += 10; uint CurrentArg = 0; while( CurrentArg < pKernelAddressData->m_BTSize / 4 ) { if( pSSHdata[ CurrentArg ].m_sourceOffset == ArgId ) { pBindingTable[ pSSHdata[ CurrentArg ].m_patchOffset ] = objectIds[ ArgId ]; break; } CurrentArg++; } } } } } } if( ( PatchMask & SCHEDULER_DATA_PARAMETER_SAMPLER_MASK ) ) { if( get_local_id( 0 ) == 2 * PARALLEL_SCHEDULER_COMPILATION_SIZE_20 ) { for( uint ArgumentID = 0; ArgumentID < pCommandHeader->m_numGlobalArguments; ArgumentID++ ) { uint ArgId = pArgumentIds[ ArgumentID ]; if( ( objectIds[ ArgId ] >= MAX_SSH_PER_KERNEL_SIZE ) ) { uint SamplerCount = 0; //Get pointer to Parent's samplers ( arguments ) data stored on KRS __global IGIL_SamplerParams* pSamplerParamsOnKRS = ( __global IGIL_SamplerParams* )GetPtrToKernelReflectionOffset( pKernelAddressData->m_SamplerParamsOffset, pKernelReflection ); //Iterate through all samplers passed from parent and copy state to proper SSA offset while( pKernelReflection->m_ParentSamplerCount > SamplerCount ) { //Get offset in parent's SSA from ObjectID, offset to beginning of SSA is included ( before SSA is BorderColorPointer ) so this is relative to parent's DSH heap PatchOffset = objectIds[ ArgId ] - MAX_SSH_PER_KERNEL_SIZE; if( pSamplerParamsOnKRS->m_ArgID == ArgId ) { IGILLOCAL_MEMCPY_GTOG( &pDsh[ pSamplerParamsOnKRS->m_SamplerStateOffset ], &dsh[ pQueue->m_controls.m_ParentDSHOffset + PatchOffset ], OCLRT_SIZEOF_SAMPLER_STATE ); break; } pSamplerParamsOnKRS = pSamplerParamsOnKRS + 1; SamplerCount = SamplerCount + 1; } } } } } __global char* pScalarData = ( __global char* ) ( &pCommandHeader->m_data[ NumberOfDepencies ] ); CurrentIndex = 0; uint TotalLocalSize = pWalkerEnum->WalkerArray[ WalkerID ].ActualLocalSize.x * pWalkerEnum->WalkerArray[ WalkerID ].ActualLocalSize.y * pWalkerEnum->WalkerArray[ WalkerID ].ActualLocalSize.z; uint NumberOfHWThreads = TotalLocalSize / pBlockData->m_SIMDSize; if( TotalLocalSize % pBlockData->m_SIMDSize != 0 ) { NumberOfHWThreads++; } uint ThreadPayloadSize = NumberOfHWThreads * pBlockData->m_PayloadSize; //Move pointer to Constant Buffer Offset pDsh = ( __global char* )&dsh[ DshOffset + SamplerHeapSize ]; if( ( get_local_id( 0 ) >= PARALLEL_SCHEDULER_COMPILATION_SIZE_20 ) & ( get_local_id( 0 ) < PARALLEL_SCHEDULER_COMPILATION_SIZE_20 + 6 ) ) { if( PatchMask & SCHEDULER_DATA_PARAMETER_KERNEL_ARGUMENT_MASK ) { while( pKernelCurbeParams[ CurrentIndex ].m_parameterType == SCHEDULER_DATA_PARAMETER_KERNEL_ARGUMENT ) { CurbeSize = pKernelCurbeParams[ CurrentIndex ].m_parameterSize; PatchOffset = pKernelCurbeParams[ CurrentIndex ].m_patchOffset; IGILLOCAL_MEMCPY_GTOG( &pDsh[ PatchOffset ], pScalarData, CurbeSize ); pScalarData += CurbeSize; CurrentIndex++; } } if( PatchMask & SCHEDULER_DATA_PARAMETER_LOCAL_WORK_SIZE_MASK ) { CurrentIndex = PatchLocalWorkSizesParallel( CurrentIndex, SCHEDULER_DATA_PARAMETER_LOCAL_WORK_SIZE, pKernelCurbeParams, pDsh, LocalSizes.x, LocalSizes.y, LocalSizes.z, pWalkerEnum->WalkerArray[ WalkerID ].ActualLocalSize.x, pWalkerEnum->WalkerArray[ WalkerID ].ActualLocalSize.y, pWalkerEnum->WalkerArray[ WalkerID ].ActualLocalSize.z ); } if( PatchMask & SCHEDULER_DATA_PARAMETER_GLOBAL_WORK_SIZE_MASK ) { CurrentIndex = PatchDSH6TokensParallel20( CurrentIndex, SCHEDULER_DATA_PARAMETER_GLOBAL_WORK_SIZE, pKernelCurbeParams, pDsh, GlobalSizes.x, GlobalSizes.y, GlobalSizes.z ); } if( PatchMask & SCHEDULER_DATA_PARAMETER_NUM_WORK_GROUPS_MASK ) { CurrentIndex = PatchDSH6TokensParallel20( CurrentIndex, SCHEDULER_DATA_PARAMETER_NUM_WORK_GROUPS, pKernelCurbeParams, pDsh, pWalkerEnum->TotalDimSize.x, pWalkerEnum->TotalDimSize.y, pWalkerEnum->TotalDimSize.z ); } if( PatchMask & SCHEDULER_DATA_PARAMETER_WORK_DIMENSIONS_MASK ) { CurrentIndex = PatchDSH1TokenParallel20( CurrentIndex, SCHEDULER_DATA_PARAMETER_WORK_DIMENSIONS, pKernelCurbeParams, pDsh, WorkDim ); } if( PatchMask & SCHEDULER_DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES_MASK ) { CurrentIndex = PatchLocalMemEntities( CurrentIndex, SCHEDULER_DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES, pKernelCurbeParams, pDsh, pCommandHeader ); } if( PatchMask & SCHEDULER_DATA_PARAMETER_GLOBAL_WORK_OFFSET_MASK ) { CurrentIndex = PatchDSH6TokensParallel20( CurrentIndex, SCHEDULER_DATA_PARAMETER_GLOBAL_WORK_OFFSET, pKernelCurbeParams, pDsh, GlobalOffset.x, GlobalOffset.y, GlobalOffset.z ); } if( PatchMask & SCHEDULER_DATA_PARAMETER_NUM_HARDWARE_THREADS_MASK ) { CurrentIndex = PatchDSH1TokenParallel20( CurrentIndex, SCHEDULER_DATA_PARAMETER_NUM_HARDWARE_THREADS, pKernelCurbeParams, pDsh, NumberOfHWThreads ); } if( PatchMask & SCHEDULER_DATA_PARAMETER_PARENT_EVENT_MASK ) { CurrentIndex = PatchDSH1TokenParallel20( CurrentIndex, SCHEDULER_DATA_PARAMETER_PARENT_EVENT, pKernelCurbeParams, pDsh, pCommandHeader->m_event ); } if( PatchMask & SCHEDULER_DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE_MASK ) { CurrentIndex = PatchDSH6TokensParallel20( CurrentIndex, SCHEDULER_DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE, pKernelCurbeParams, pDsh, LocalSizes.x, LocalSizes.y, LocalSizes.z ); } if( PatchMask & SCHEDULER_DATA_PARAMETER_GLOBAL_POINTER ) { if( pCommandHeader->m_numGlobalCapturedBuffer > 0 ) { //Handle global pointers patching in stateless mode, info about layout in declaration of IGIL_CommandHeader __global uint* pGlobalIndexes = ( __global uint* ) ( &pCommandHeader->m_data[ NumberOfDepencies + pCommandHeader->m_numScalarArguments ] ); __global uint* pGlobalPtrs = ( __global uint* ) ( &pCommandHeader->m_data[ NumberOfDepencies + pCommandHeader->m_numScalarArguments + pCommandHeader->m_numGlobalCapturedBuffer ] ); uint StartIndex = CurrentIndex; //Argument in command header are not in correct sequence, that's why proper key needs to be located for( uint glIdx = 0 ; glIdx < pCommandHeader->m_numGlobalCapturedBuffer; glIdx++) { //Reset CurrentIndex as we need to start from the beginning. CurrentIndex = StartIndex; while( pKernelCurbeParams[ CurrentIndex ].m_parameterType == COMPILER_DATA_PARAMETER_GLOBAL_SURFACE ) { //Patch only if exact match occurs if( pKernelCurbeParams[ CurrentIndex ].m_sourceOffset == *pGlobalIndexes ) { PatchOffset = pKernelCurbeParams[ CurrentIndex ].m_patchOffset; //64 bit patching if( pKernelCurbeParams[ CurrentIndex ].m_parameterSize == 8 ) { __global uint* pDst = (__global uint *) &pDsh[PatchOffset]; pDst[0] = pGlobalPtrs[0]; pDst[1] = pGlobalPtrs[1]; } else { __global uint* pDst = ( __global uint* ) &pDsh[ PatchOffset ]; *pDst = ( uint ) *pGlobalPtrs; } } CurrentIndex++; } pGlobalPtrs += 2; pGlobalIndexes++; } } while( pKernelCurbeParams[ CurrentIndex ].m_parameterType == COMPILER_DATA_PARAMETER_GLOBAL_SURFACE ) { CurrentIndex++; } } //Patch images curbe entries if( ( PatchMask & SCHEDULER_DATA_PARAMETER_IMAGE_CURBE_ENTRIES ) | ( PatchMask & SCHEDULER_DATA_PARAMETER_SAMPLER_MASK ) ) { if( ( pArgumentIds != NULL ) & ( pObjectIds != NULL ) ) { //pKernelReflectionChar is a global address pointer __global IGIL_ImageParamters *pImageParams = ( __global IGIL_ImageParamters * ) &pKernelReflectionChar[ pKernelReflection->m_ParentImageDataOffset ]; __global IGIL_ParentSamplerParams *pParentSamplerParams = ( __global IGIL_ParentSamplerParams* ) &pKernelReflectionChar[ pKernelReflection->m_ParentSamplerParamsOffset ]; //First obtain argument ID uint WorkID = get_local_id( 0 ) - PARALLEL_SCHEDULER_COMPILATION_SIZE_20; while( WorkID + CurrentIndex < pBlockData->m_numberOfCurbeTokens ) { uint ArgId = pKernelCurbeParams[ CurrentIndex + WorkID ].m_sourceOffset; uint ObjectID = objectIds[ ArgId ]; uint CurrentImage = 0; uint CurrentSampler = 0; uint PatchValue = 0; uint TokenType = pKernelCurbeParams[ CurrentIndex + WorkID ].m_parameterType; uint PatchOffset = pKernelCurbeParams[ CurrentIndex + WorkID ].m_patchOffset; uint PatchValueInvalid = 0; //If Images if( ObjectID < OCLRT_IMAGE_MAX_OBJECT_ID ) { //Locate proper parent Image while( ( pImageParams[ CurrentImage ].m_ObjectID != ObjectID ) & ( CurrentImage < pKernelReflection->m_ParentKernelImageCount ) ) { CurrentImage++; } //Proper image is located under CurrentImage patch the token if( TokenType == SCHEDULER_DATA_PARAMETER_IMAGE_WIDTH ) { PatchValue = pImageParams[ CurrentImage ].m_Width; } else if( TokenType == SCHEDULER_DATA_PARAMETER_IMAGE_HEIGHT ) { PatchValue = pImageParams[ CurrentImage ].m_Height; } else if( TokenType == SCHEDULER_DATA_PARAMETER_IMAGE_DEPTH ) { PatchValue = pImageParams[ CurrentImage ].m_Depth; } else if( TokenType == SCHEDULER_DATA_PARAMETER_IMAGE_CHANNEL_DATA_TYPE ) { PatchValue = pImageParams[ CurrentImage ].m_ChannelDataType; } else if( TokenType == SCHEDULER_DATA_PARAMETER_IMAGE_CHANNEL_ORDER ) { PatchValue = pImageParams[ CurrentImage ].m_ChannelOrder; } else if( TokenType == SCHEDULER_DATA_PARAMETER_IMAGE_SRGB_CHANNEL_ORDER ) { PatchValue = GetSRGBChannelOrder( pImageParams[ CurrentImage ].m_ChannelOrder ); } else if( TokenType == SCHEDULER_DATA_PARAMETER_IMAGE_ARRAY_SIZE ) { PatchValue = pImageParams[ CurrentImage ].m_ArraySize; } else if( TokenType == SCHEDULER_DATA_PARAMETER_IMAGE_NUM_SAMPLES ) { PatchValue = pImageParams[ CurrentImage ].m_NumSamples; } else if( TokenType == SCHEDULER_DATA_PARAMETER_IMAGE_NUM_MIP_LEVELS ) { PatchValue = pImageParams[ CurrentImage ].m_NumMipLevels; } else if( TokenType == SCHEDULER_DATA_PARAMETER_IMAGE_OBJECT_ID ) { PatchValue = ObjectID; } else { PatchValueInvalid = 1; } } //If Sampler else if( ObjectID >= OCLRT_SAMPLER_MIN_OBJECT_ID ) { //Mark PatchValue invalid if SamplerParams will not be found PatchValueInvalid = 1; //Locate proper parent Image while( CurrentSampler < pKernelReflection->m_ParentSamplerCount ) { if( pParentSamplerParams[ CurrentSampler ].m_ObjectID == ObjectID ) { PatchValueInvalid = 0; if( TokenType == DATA_PARAMETER_SAMPLER_ADDRESS_MODE ) { PatchValue = pParentSamplerParams[ CurrentSampler ].m_AddressingMode; } else if( TokenType == DATA_PARAMETER_SAMPLER_NORMALIZED_COORDS ) { PatchValue = pParentSamplerParams[ CurrentSampler ].NormalizedCoords; } else if( TokenType == DATA_PARAMETER_SAMPLER_COORDINATE_SNAP_WA_REQUIRED ) { PatchValue = pParentSamplerParams[ CurrentSampler ].CoordinateSnapRequired; } else if( TokenType == SCHEDULER_DATA_PARAMETER_SAMPLER_OBJECT_ID ) { PatchValue = ObjectID; } else { PatchValueInvalid = 1; } CurrentSampler = pKernelReflection->m_ParentSamplerCount; } CurrentSampler++; } } else { PatchValueInvalid = 1; } if( PatchValueInvalid == 0 ) { *( __global uint * ) ( &pDsh[ PatchOffset ] ) = PatchValue; } CurrentIndex += 6; } } else { pQueue->m_controls.m_ErrorCode += 7; } } } #ifdef SCHEDULER_EMULATION barrier( CLK_GLOBAL_MEM_FENCE ); #endif if( get_local_id( 0 ) == 0 ) { #if defined WA_LRI_COMMANDS_EXIST bool ShouldDisablePreemption = false; #endif //Profiling support if( pQueue->m_controls.m_IsProfilingEnabled != 0 ) { bool DisableTimeStampStart = true; bool DisableTimeStampEnd = true; if( ( ( uint )pCommandHeader->m_event != IGIL_EVENT_INVALID_HANDLE ) & ( ( WalkerID == 0 ) | ( WalkerID == WalkerCount - 1 ) ) ) { //Event is propagated to childs as "parent event", to avoid overwriting the same start value, only generate timestamp write //For the first command for this event, this means we look for event with no children ( so compare to 1 ). clk_event_t EventID = __builtin_astype( ( void* ) ( ( ulong ) pCommandHeader->m_event ), clk_event_t ); __global IGIL_DeviceEvent *events = TEMP_IGIL_GetDeviceEvents( eventsPool ); if( events[ ( uint )(size_t)__builtin_astype( EventID, void* ) ].m_numChildren == 1 ) { #if defined WA_LRI_COMMANDS_EXIST && defined WA_PROFILING_PREEMPTION //This is a case, where profiling of block kernels occurs - presence of event in EM workload //In such case, disable preemption around all WALKERs for that block kernel and event ShouldDisablePreemption = true; #endif if( WalkerID == 0 ) { //Emit pipecontrol with timestamp write ulong Address = ( ulong )&( events[ ( uint )(size_t)__builtin_astype( EventID, void* ) ].m_profilingCmdStart ); //Timestamp start PatchPipeControlProfilingAddres( secondaryBatchBuffer, SLBOffset, Address, PIPE_CONTROL_FOR_TIMESTAMP_START_OFFSET ); DisableTimeStampStart = false; } if( WalkerID == WalkerCount - 1 ) { ulong Address = ( ulong )&( events[ ( uint )(size_t)__builtin_astype( EventID, void* ) ].m_profilingCmdEnd ); //Timestamp end PatchPipeControlProfilingAddres( secondaryBatchBuffer, SLBOffset, Address, PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET ); DisableTimeStampEnd = false; } } } if( DisableTimeStampStart ) { DisablePostSyncBitInPipeControl( secondaryBatchBuffer, SLBOffset, PIPE_CONTROL_FOR_TIMESTAMP_START_OFFSET ); } if( DisableTimeStampEnd ) { DisablePostSyncBitInPipeControl( secondaryBatchBuffer, SLBOffset, PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET ); } } else { //Optimized path, in case block can be run concurently noop pipe control after such block. uint DwordOffset = SLBOffset / DWORD_SIZE_IN_BYTES; if( pBlockData->m_CanRunConcurently != 0 ) { NOOPCSStallPipeControl( secondaryBatchBuffer, DwordOffset, PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET ); } else { PutCSStallPipeControl( secondaryBatchBuffer, DwordOffset, PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET ); } } #if defined WA_LRI_COMMANDS_EXIST bool NoopPreemptionDisabling = true; bool NoopPreemptionEnabling = true; #if defined WA_KERNEL_PREEMPTION //This is case, where block kernel should have disabled preemption because of its sampler usage around all WALKERs of that block kernel //Preemption should be disabled when EM event profiling is used OR kernel data indicate such behavior ShouldDisablePreemption |= ( pBlockData->m_DisablePreemption != 0 ); #endif #if defined WA_PROFILING_PREEMPTION //m_EventTimestampAddress != NULL means profiling of the whole workload is enabled (preemption around whole chained BB is disabled) //So disabling preemption should be permitted only when workload profiling is off, in other cases noop all LRI commands //For m_EventTimestampAddress != NULL preemption is enabled before BB_END ShouldDisablePreemption &= ( pQueue->m_controls.m_EventTimestampAddress == 0 ); #endif if( ShouldDisablePreemption != false ) { if( WalkerID == 0 ) { SetDisablePreemptionRegister( SLBOffset, secondaryBatchBuffer ); NoopPreemptionDisabling = false; } if( WalkerID == WalkerCount - 1 ) { SetEnablePreemptionRegister( SLBOffset, secondaryBatchBuffer ); NoopPreemptionEnabling = false; } } if( NoopPreemptionDisabling ) { NoopPreemptionCommand( SLBOffset, IMM_LOAD_REGISTER_FOR_DISABLE_PREEMPTION_OFFSET, secondaryBatchBuffer ); } if( NoopPreemptionEnabling ) { NoopPreemptionCommand( SLBOffset, IMM_LOAD_REGISTER_FOR_ENABLE_PREEMPTION_OFFSET, secondaryBatchBuffer ); } #endif //WA_LRI_COMMANDS_EXIST } //Witems from 0 to 16 are responsible for local ids generation. if( ( get_local_id( 0 ) < 16 ) & ( pBlockData->m_NeedLocalIDS != 0 ) ) { //Now generate local IDS generateLocalIDSParallel20( pLocalIdsOnDSH, pWalkerEnum->WalkerArray[ WalkerID ].ActualLocalSize, NumberOfHWThreads, pBlockData->m_SIMDSize ); } //3rd HW thread will take care of patching media curbe load and GPPGU_WALKER command if( get_local_id( 0 ) == PARALLEL_SCHEDULER_COMPILATION_SIZE_20 * 2 ) { uint TotalSLMSize = pCommandHeader->m_totalLocalSize + pBlockData->m_InilineSLMSize; //Update Interface Descriptor Data with SLM size / number of HW threads. CopyAndPatchIDData20(dsh, blockId, NumberOfHWThreads, TotalSLMSize, IntefaceDescriptorOffset, pQueue->m_controls.m_StartBlockID, btOffset, DshOffset, pBlockData->m_numberOfSamplerStates #ifdef ENABLE_DEBUG_BUFFER , DebugQueue #endif ); patchGpGpuWalker( SLBOffset, secondaryBatchBuffer, IntefaceDescriptorOffset, pBlockData->m_SIMDSize, TotalLocalSize, pWalkerEnum->WalkerArray[ WalkerID ].WalkerDimSize, pWalkerEnum->WalkerArray[ WalkerID ].WalkerStartPoint, NumberOfHWThreads, pBlockData->m_sizeOfConstantBuffer + ThreadPayloadSize, SamplerHeapSize + DshOffset ); PatchMediaStateFlush( SLBOffset, secondaryBatchBuffer, IntefaceDescriptorOffset, SCHEDULER_MSF_INITIAL ); PatchMediaStateFlush( SLBOffset, secondaryBatchBuffer, IntefaceDescriptorOffset, SCHEDULER_MSF_SECOND ); } } } uint CheckEventStatus( __global IGIL_CommandHeader* pCommand, __global IGIL_EventPool* eventsPool ) { if( pCommand->m_numDependencies == 0 ) { return 0; } else { __global IGIL_DeviceEvent* pDeviceEvent; //Events are stored at the begining of command packet dynamic payload for( uint i = 0; i < pCommand->m_numDependencies; i++ ) { pDeviceEvent = TEMP_IGIL_GetDeviceEvent( eventsPool, pCommand->m_data[ i ] ); if( pDeviceEvent->m_state != CL_COMPLETE ) { return 1; } } } return 0; } void DecreaseEventDependenciesParallel( __global IGIL_CommandHeader* pCommand, __global IGIL_EventPool* eventsPool ) { __global IGIL_DeviceEvent* pDeviceEvent; //Events are stored at the begining of command packet dynamic payload for( uint i = 0; i < pCommand->m_numDependencies; i++ ) { pDeviceEvent = TEMP_IGIL_GetDeviceEvent( eventsPool, pCommand->m_data[ i ] ); int OldDependants = atomic_dec( &pDeviceEvent->m_numDependents ); if( ( pDeviceEvent->m_refCount <= 0 ) & ( ( OldDependants - 1 ) <= 0 ) & ( pDeviceEvent->m_numChildren <= 0 ) ) { TEMP_IGIL_FreeEvent( __builtin_astype( ( void* )( ( ulong )pCommand->m_data[ i ] ), clk_event_t ), eventsPool ); } } } //Update status of the event and all events that are depending on this event void UpdateEventsTreeStatusParallel( clk_event_t eventId, __global IGIL_EventPool* eventsPool, bool isProfilingEnabled ) { __global IGIL_DeviceEvent *events = TEMP_IGIL_GetDeviceEvents( eventsPool ); __global IGIL_DeviceEvent *pEvent; do { pEvent = &events[ (uint) (size_t)__builtin_astype( eventId, void* ) ]; int OldNumChild = atomic_dec( &pEvent->m_numChildren ); if( ( OldNumChild - 1 ) <= 0 ) { pEvent->m_state = CL_COMPLETE; if( ( pEvent->m_refCount <= 0 ) & ( pEvent->m_numDependents <= 0 ) & ( pEvent->m_numChildren <= 0 ) ) { TEMP_IGIL_FreeEvent( eventId, eventsPool ); } //This event transitions to CL_COMPLETE state, update it profiling informations. if( isProfilingEnabled != 0 ) { //CL COMPLETE time is before this scheduler starts pEvent->m_profilingCmdComplete = eventsPool->m_CLcompleteTimestamp; //Check if this event has profiling pointer, if so update profiling data, all times should be there atm if( pEvent->m_pProfiling != 0 ) { __global ulong* retValues = ( __global ulong * )pEvent->m_pProfiling; ulong StartTime = pEvent->m_profilingCmdStart; ulong EndTime = pEvent->m_profilingCmdEnd; ulong CompleteTime = pEvent->m_profilingCmdComplete; ulong CLEndTransitionTime = 0; ulong CLCompleteTransitionTime = 0; //Check if timer didn't reset by hitting max value if( CompleteTime > StartTime ) { CLEndTransitionTime = EndTime - StartTime; CLCompleteTransitionTime = CompleteTime - StartTime; } //If we hit this else it means that GPU timer reset to 0, compute proper delta else { if( EndTime < StartTime ) { CLEndTransitionTime = PROFILING_MAX_TIMER_VALUE - StartTime + EndTime; } else { CLEndTransitionTime = EndTime - StartTime; } CLCompleteTransitionTime = PROFILING_MAX_TIMER_VALUE - StartTime + CompleteTime; } //First value is END - START timestamp retValues[ 0 ] = ( ulong )( ( float )CLEndTransitionTime * eventsPool->m_TimestampResolution ); //Second value is COMPLETE - START timestamp retValues[ 1 ] = ( ulong )( ( float )CLCompleteTransitionTime * eventsPool->m_TimestampResolution ); } } //Signal parent because we completed eventId = __builtin_astype( ( void* )( ( ulong )pEvent->m_parentEvent ), clk_event_t ); } } while ( ( ( uint )(size_t)__builtin_astype( eventId, void* ) != IGIL_EVENT_INVALID_HANDLE ) & ( pEvent->m_numChildren <= 0 ) ); } void GlobalBarrier( __global volatile uint* syncSurface ) { //Make sure each WKG item hit the barrier. barrier( CLK_GLOBAL_MEM_FENCE ); //Now first thread of each wkg writes to designated place on SyncSurface if ( get_local_id( 0 ) == 0 ) { syncSurface[ get_group_id( 0 ) ] = 1; } //Higher wkg ids tend to not have work to do in all cases, therefore I choose last wkg to wait for the others, as it is most likely it will hit this code sooner. if( get_group_id( 0 ) == ( get_num_groups( 0 ) - 1 ) ) { //24 -48 case uint Value; do { Value = 1; for( uint i = get_local_id( 0 ); i < get_num_groups( 0 ); i += get_local_size( 0 ) ) { Value = Value & syncSurface[ i ]; } } while( Value == 0 ); barrier( CLK_GLOBAL_MEM_FENCE ); for( uint i = get_local_id( 0 ); i < get_num_groups( 0 ); i += get_local_size( 0 ) ) { syncSurface[ i ] = 0; } } if( get_local_id( 0 ) == 0 ) { while( syncSurface[ get_group_id( 0 ) ] != 0 ); } barrier( CLK_GLOBAL_MEM_FENCE ); } void GlobalBarrierUpdateQueue( __global volatile uint* syncSurface, __global IGIL_CommandQueue* pQueue ) { //Make sure each WKG item hit the barrier. barrier( CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); //Now first thread of each wkg writes to designated place on SyncSurface if ( get_local_id(0) == 0 ) { syncSurface[get_group_id(0)] = 1; } //Higher wkg ids tend to not have work to do in all cases, therefore I choose last wkg to wait for the others, as it is most likely it will hit this code sooner. if( get_group_id(0) == ( get_num_groups( 0 ) - 1 ) ) { uint Value; do { Value = 1; for( uint i = get_local_id( 0 ); i < get_num_groups( 0 ); i += get_local_size( 0 ) ) { Value = Value & syncSurface[ i ]; } } while( Value == 0 ); barrier( CLK_GLOBAL_MEM_FENCE ); pQueue->m_controls.m_IDTAfterFirstPhase = pQueue->m_controls.m_CurrentIDToffset; barrier( CLK_GLOBAL_MEM_FENCE ); for( uint i = get_local_id( 0 ); i < get_num_groups( 0 ); i += get_local_size( 0 ) ) { syncSurface[ i ] = 0; } } if( get_local_id(0) == 0 ) { while( syncSurface[ get_group_id(0) ] != 0 ); } barrier( CLK_GLOBAL_MEM_FENCE ); } #ifdef SCHEDULER_EMULATION __local int IDTOffset; __local int DSHOffset; __local int SLBOffset; __local int StackOffset; __local int QStorageOffset; __local int MarkerOffset; __local int BTSoffset; __local IGIL_WalkerEnumeration WalkerEnum; __local uint ObjectIDS[ MAX_GLOBAL_ARGS ]; #endif #define WA_INT_DESC_MAX 62 __kernel __attribute__((intel_reqd_sub_group_size(PARALLEL_SCHEDULER_COMPILATION_SIZE_20))) void SchedulerParallel20( __global IGIL_CommandQueue* pQueue, __global uint* commandsStack, __global IGIL_EventPool* eventsPool, __global uint* secondaryBatchBuffer, //SLB that will be used to put commands in. __global char* dsh, //Pointer to the start of Dynamic State Heap __global IGIL_KernelDataHeader* kernelData, //This is kernel reflection surface __global volatile uint* queueStorageBuffer, __global char* ssh, //Pointer to Surface state heap with BT and SS __global DebugDataBuffer* debugQueue ) { EMULATION_ENTER_FUNCTION( ); #ifdef WA_DISABLE_SCHEDULERS return; #endif #ifdef DEBUG //Early return enabled when m_SchedulerEarlyReturn is > 0, if( pQueue->m_controls.m_SchedulerEarlyReturn > 0 ) { if( pQueue->m_controls.m_SchedulerEarlyReturn == 1 ) { return; } if( get_global_id( 0 ) == 0 ) { pQueue->m_controls.m_SchedulerEarlyReturnCounter++; } GlobalBarrier( queueStorageBuffer ); if( pQueue->m_controls.m_SchedulerEarlyReturnCounter == pQueue->m_controls.m_SchedulerEarlyReturn ) { if( ( ( get_group_id( 0 ) == 1) == ( get_num_groups( 0 ) > 1 ) ) & ( get_local_id( 0 ) == 0 ) ) { #ifdef ENABLE_DEBUG_BUFFER //Set START time of current (last) scheduler if( ( pQueue->m_controls.m_IsProfilingEnabled != 0 ) & ( DebugQueue != 0 ) & ( DebugQueue->m_flags == DDB_SCHEDULER_PROFILING ) ) { *( ( __global ulong * ) ( &DebugQueue->m_data[ atomic_add( &DebugQueue->m_offset, 2 ) ] ) ) = EventsPool->m_CLcompleteTimestamp; } #endif pQueue->m_controls.Temporary[ 2 ]++; //SlbOffset is expressed in bytes and for cmd it is needed to convert it to dwords __private uint DwordOffset = ( pQueue->m_controls.m_SecondLevelBatchOffset % MAX_SLB_OFFSET ) / DWORD_SIZE_IN_BYTES; //BB_START 1st DWORD secondaryBatchBuffer[ DwordOffset ] = OCLRT_BATCH_BUFFER_BEGIN_CMD_DWORD0; DwordOffset++; //BB_START 2nd DWORD - Address, 3rd DWORD Address high secondaryBatchBuffer[ DwordOffset++ ] = (uint)(pQueue->m_controls.m_CleanupSectionAddress & 0xFFFFFFFF); secondaryBatchBuffer[ DwordOffset ] = (uint)((pQueue->m_controls.m_CleanupSectionAddress >> 32) & 0xFFFFFFFF); } return; } } #endif //First check if there are any new command packets on queue_t __global IGIL_CommandHeader* pCommand = 0; uint GroupID = get_group_id( 0 ); #ifndef SCHEDULER_EMULATION __local int IDTOffset; __local int DSHOffset; __local int SLBOffset; __local int StackOffset; __local int QStorageOffset; __local int MarkerOffset; __local int BTSoffset; __local IGIL_WalkerEnumeration WalkerEnum; __local uint ObjectIDS[ MAX_GLOBAL_ARGS ]; #endif if( pQueue->m_controls.m_LastScheduleEventNumber > 0 ) { //Check if there are any events that needs updating, each wkg uses all hw threads in wkg to update events if( GroupID * PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20 < pQueue->m_controls.m_LastScheduleEventNumber ) { clk_event_t EventID; if( get_local_id( 0 ) % PARALLEL_SCHEDULER_COMPILATION_SIZE_20 == 0 ) { uint ID = ( GroupID * PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20 ) + ( get_local_id( 0 ) / PARALLEL_SCHEDULER_COMPILATION_SIZE_20 ); while( ID < pQueue->m_controls.m_LastScheduleEventNumber ) { EventID = __builtin_astype( ( void* )( ( ulong )pQueue->m_controls.m_EventDependencies[ ID ] ), clk_event_t ); UpdateEventsTreeStatusParallel( EventID, eventsPool, ( pQueue->m_controls.m_IsProfilingEnabled != 0 ) ); ID += get_num_groups( 0 ) * PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20; } } } GlobalBarrier( queueStorageBuffer ); } //Queue parsing section uint NumberOfEnqueues = pQueue->m_controls.m_TotalNumberOfQueues - pQueue->m_controls.m_PreviousNumberOfQueues; if( NumberOfEnqueues > 0 ) { uint InitialOffset = pQueue->m_controls.m_PreviousHead; bool PacketScheduled = true; uint offset = 0; for( uint CurrentPacket = GroupID; CurrentPacket < NumberOfEnqueues; CurrentPacket += get_num_groups( 0 ) ) { if( CurrentPacket == GroupID ) { offset = TEMP_IGIL_GetNthCommandHeaderOffset( pQueue, InitialOffset, CurrentPacket ); } else { offset = TEMP_IGIL_GetNthCommandHeaderOffset( pQueue, offset, get_num_groups( 0 ) ); } pCommand = TEMP_IGIL_GetCommandHeader( pQueue, offset ); //Initialize command packet with proper lws if( get_local_id( 0 ) == 0 ) { InitializeCommandPacket( pCommand ); } //Can I run this command ? if( CheckEventStatus( pCommand, eventsPool ) == 0 ) { //Is it marker command ? if( pCommand->m_kernelId != IGIL_KERNEL_ID_ENQUEUE_MARKER ) { //Is there enough IDT space for me ? if( get_local_id( 0 ) == 0 ) { int WalkerNeeded = GetWalkerCount( pCommand ); //Optimization - check if IDT has free space for me if( pQueue->m_controls.m_CurrentIDToffset + WalkerNeeded <= WA_INT_DESC_MAX ) { uint Temp = atomic_add( &pQueue->m_controls.m_CurrentIDToffset, WalkerNeeded ); if( Temp + WalkerNeeded <= WA_INT_DESC_MAX ) { IDTOffset = Temp; DSHOffset = atomic_add( &pQueue->m_controls.m_CurrentDSHoffset, ( MAX_DSH_SIZE_PER_ENQUEUE * WalkerNeeded ) ); SLBOffset = ( ( atomic_add( &pQueue->m_controls.m_SecondLevelBatchOffset, ( SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE * WalkerNeeded ) ) ) % MAX_SLB_OFFSET ); BTSoffset = atomic_add( &pQueue->m_controls.m_CurrentSSHoffset, pQueue->m_controls.m_BTmaxSize ); } else { IDTOffset = -1; } } else { IDTOffset = -1; } } //Now barrier and check if we can go with scheduling barrier( CLK_LOCAL_MEM_FENCE ); if( IDTOffset != -1 ) { //This packet is all set, schedule it and we are done with it. //Patch DSH has media curbe load and patch gpgpu walker inside PatchDSHParallelWithDynamicDSH20( SLBOffset, DSHOffset, IDTOffset, kernelData, dsh, pCommand->m_kernelId, pCommand, secondaryBatchBuffer, pQueue, eventsPool, ssh, BTSoffset, &WalkerEnum, ObjectIDS #ifdef ENABLE_DEBUG_BUFFER , DebugQueue #endif ); PacketScheduled = true; } else { PacketScheduled = false; } } else //For marker we need to update returned event status { //Check if there is space to track event if( get_local_id( 0 ) == 0 ) { uint Temp = atomic_inc( &pQueue->m_controls.m_EnqueueMarkerScheduled ); if( Temp < MAX_NUMBER_OF_ENQUEUE_MARKER ) { MarkerOffset = Temp; } else { MarkerOffset = -1; } } barrier( CLK_LOCAL_MEM_FENCE ); if( MarkerOffset != -1 ) { PacketScheduled = true; } else { PacketScheduled = false; } } //Update event dependencies if any, if there are event waiting for status change, put them on the list. if( PacketScheduled == true ) { if( get_local_id( 0 ) == 0 ) { if( ( uint )pCommand->m_event != IGIL_EVENT_INVALID_HANDLE ) { pQueue->m_controls.m_EventDependencies[ atomic_inc( &pQueue->m_controls.m_CurrentScheduleEventNumber ) ] = pCommand->m_event; } //Remove event dependencies setting. if( pCommand->m_numDependencies > 0 ) { DecreaseEventDependenciesParallel( pCommand, eventsPool ); } } } } //Can't schedule it right now, move to storage. else { if( pQueue->m_controls.m_IsSimulation ) { barrier( CLK_LOCAL_MEM_FENCE ); } PacketScheduled = false; } //Allocation failure, move command to stack storage and update stack pointers if( PacketScheduled == false ) { if( get_local_id( 0 ) == 0 ) { StackOffset = atomic_dec( &pQueue->m_controls.m_StackTop ) - 1; QStorageOffset = atomic_sub( &pQueue->m_controls.m_QstorageTop, pCommand->m_commandSize ) - pCommand->m_commandSize; commandsStack[ StackOffset ] = QStorageOffset; } barrier( CLK_LOCAL_MEM_FENCE ); __global char* ptrQueue = ( __global char* )pQueue; GLOBAL_MEMCPY( ( __global void* )&queueStorageBuffer[ QStorageOffset / 4 ], (__global void * )&ptrQueue[ offset ] , pCommand->m_commandSize ); } else if( pQueue->m_controls.m_IsSimulation ) { barrier( CLK_LOCAL_MEM_FENCE ); } } //In case there were new enqueues on queue_t, all work items must hit the global barrier before they can start taking items from the stack. GlobalBarrierUpdateQueue( queueStorageBuffer, pQueue ); } //Check stack only when there are free IDTS if( ( pQueue->m_controls.m_IDTAfterFirstPhase < WA_INT_DESC_MAX ) & ( pQueue->m_controls.m_PreviousStackTop != pQueue->m_controls.m_StackSize ) ) { //Start stack browsing uint MyID = get_group_id( 0 ); //Start browsing from the begining of the previous stack top uint CurrentOffset = pQueue->m_controls.m_PreviousStackTop + MyID; uint CommandOffset = 0; while( CurrentOffset < pQueue->m_controls.m_StackSize ) { CommandOffset = commandsStack[ CurrentOffset ]; if( CommandOffset != 0 ) { pCommand = GetCommandHeaderFromStorage( ( __global uint* )queueStorageBuffer, CommandOffset ); //Can I run this command ? if( CheckEventStatus( pCommand, eventsPool ) == 0 ) { //Is it marker command ? if( pCommand->m_kernelId != IGIL_KERNEL_ID_ENQUEUE_MARKER ) { //Is there enough IDT space for me ? if( get_local_id( 0 ) == 0 ) { int WalkerNeeded = GetWalkerCount( pCommand ); //Optimization - check if IDT has free space for me if( pQueue->m_controls.m_CurrentIDToffset + WalkerNeeded <= WA_INT_DESC_MAX ) { uint Temp = atomic_add( &pQueue->m_controls.m_CurrentIDToffset, WalkerNeeded ); if( Temp + WalkerNeeded <= WA_INT_DESC_MAX ) { IDTOffset = Temp; DSHOffset = atomic_add( &pQueue->m_controls.m_CurrentDSHoffset, ( MAX_DSH_SIZE_PER_ENQUEUE * WalkerNeeded ) ); SLBOffset = ( ( atomic_add( &pQueue->m_controls.m_SecondLevelBatchOffset, ( SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE * WalkerNeeded ) ) ) % MAX_SLB_OFFSET ); BTSoffset = atomic_add( &pQueue->m_controls.m_CurrentSSHoffset, pQueue->m_controls.m_BTmaxSize ); } else { IDTOffset = -1; } } else { IDTOffset = -1; } } //Now barrier and check if we can go with scheduling barrier( CLK_LOCAL_MEM_FENCE ); if( IDTOffset != -1 ) { //This packet is all set, schedule it and we are done with it. //Patch DSH has media curbe load and patch gpgpu walker inside PatchDSHParallelWithDynamicDSH20( SLBOffset, DSHOffset, IDTOffset, kernelData, dsh, pCommand->m_kernelId, pCommand, secondaryBatchBuffer, pQueue, eventsPool, ssh, BTSoffset, &WalkerEnum, ObjectIDS #ifdef ENABLE_DEBUG_BUFFER , DebugQueue #endif ); pCommand->m_commandState = CAN_BE_RECLAIMED; //Reset stack offset commandsStack[ CurrentOffset ] = 0; //Update event status if( get_local_id( 0 ) == 0 ) { //Add events dependant on this command to list of events neeeded to be updated. if( ( uint )pCommand->m_event != IGIL_EVENT_INVALID_HANDLE ) { pQueue->m_controls.m_EventDependencies[ atomic_inc( &pQueue->m_controls.m_CurrentScheduleEventNumber ) ] = pCommand->m_event; } //Remove event dependencies setting. if( pCommand->m_numDependencies > 0 ) { DecreaseEventDependenciesParallel( pCommand, eventsPool ); } } } } else // For marker we need to update returned event status { barrier( CLK_GLOBAL_MEM_FENCE ); //Check if there is space to track event if( get_local_id( 0 ) == 0 ) { uint Temp = atomic_inc( &pQueue->m_controls.m_EnqueueMarkerScheduled ); if( Temp < MAX_NUMBER_OF_ENQUEUE_MARKER ) { pCommand->m_commandState = CAN_BE_RECLAIMED; commandsStack[ CurrentOffset ] = 0; //Add events dependant on this command to list of events neeeded to be updated. if( ( uint )pCommand->m_event != IGIL_EVENT_INVALID_HANDLE ) { pQueue->m_controls.m_EventDependencies[ atomic_inc( &pQueue->m_controls.m_CurrentScheduleEventNumber ) ] = pCommand->m_event; } //Remove event dependencies setting. if( pCommand->m_numDependencies > 0 ) { DecreaseEventDependenciesParallel( pCommand, eventsPool ); } } } } } } CurrentOffset += get_num_groups( 0 ); if( pQueue->m_controls.m_IsSimulation ) { barrier( CLK_LOCAL_MEM_FENCE ); } } } //Finish execution and check end conditons //Execute this global barrier only when needed, i.e. stack browsing was executed or new item were added on the stack if( ( pQueue->m_controls.m_PreviousStackTop != pQueue->m_controls.m_StackSize ) | ( pQueue->m_controls.m_StackTop != pQueue->m_controls.m_PreviousStackTop ) ) { GlobalBarrier( queueStorageBuffer ); } //Cleanup & resource reclamation section //We are after global sync section, we can do anything to globals right now. if( ( get_local_id( 0 ) == 0 ) & ( get_group_id( 0 ) == 0 ) ) { { pQueue->m_controls.m_CurrentDSHoffset = pQueue->m_controls.m_DynamicHeapStart; pQueue->m_controls.m_IDTAfterFirstPhase = 1; pQueue->m_controls.m_PreviousNumberOfQueues = pQueue->m_controls.m_TotalNumberOfQueues; pQueue->m_controls.m_LastScheduleEventNumber = pQueue->m_controls.m_CurrentScheduleEventNumber; pQueue->m_controls.m_CurrentScheduleEventNumber = 0; } } //Schedule scheduler if( ( (get_group_id( 0 ) == 1) == (get_num_groups( 0 ) > 1) ) & ( get_local_id(0) == 0 ) ) { pQueue->m_controls.m_SecondLevelBatchOffset = ( pQueue->m_controls.m_SecondLevelBatchOffset % MAX_SLB_OFFSET ); //If we scheduled any blocks, put scheduler right after if( ( pQueue->m_controls.m_CurrentIDToffset > 1 ) | ( pQueue->m_controls.m_EnqueueMarkerScheduled > 0 ) ) { AddCmdsInSLBforScheduler20Parallel( pQueue->m_controls.m_SecondLevelBatchOffset, pQueue, secondaryBatchBuffer, dsh ); //If we have profiling enabled, we need CL_COMPLETE time, which is before next scheduler starts if( pQueue->m_controls.m_IsProfilingEnabled != 0 ) { ulong Address = ( ulong ) &( eventsPool->m_CLcompleteTimestamp ); //Emit pipecontrol with timestamp write PatchPipeControlProfilingAddres( secondaryBatchBuffer, pQueue->m_controls.m_SecondLevelBatchOffset, Address, PIPE_CONTROL_FOR_TIMESTAMP_START_OFFSET ); //Bit after scheduler may be set by some other command, reset it to 0 DisablePostSyncBitInPipeControl( secondaryBatchBuffer, pQueue->m_controls.m_SecondLevelBatchOffset, PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET_TO_PATCH ); #ifdef ENABLE_DEBUG_BUFFER if( ( DebugQueue != 0 ) & ( DebugQueue->m_flags == DDB_SCHEDULER_PROFILING ) ) { //Store Current scheduler START time *((__global ulong * ) (&DebugQueue->m_data[ atomic_add( &DebugQueue->m_offset, 2 ) ] )) = EventsPool->m_CLcompleteTimestamp; //Set address to store next scheduler's END time PatchPipeControlProfilingAddres( secondaryBatchBuffer, pQueue->m_controls.m_SecondLevelBatchOffset, ( ulong )(&DebugQueue->m_data[ atomic_add( &DebugQueue->m_offset, 2 ) ]), PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET ); } #endif } //Program pipe controls around scheduler to make sure it is not executed concurently to blocks else { //Locate previous pipe control int PreviousOffset = pQueue->m_controls.m_SecondLevelBatchOffset - SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE; //If offset is negative it means we are first command after chaining if( PreviousOffset < 0 ) { PreviousOffset += MAX_SLB_OFFSET; } //Tighten previous pipecontrol uint DwordOffset = PreviousOffset / DWORD_SIZE_IN_BYTES; PutCSStallPipeControl( secondaryBatchBuffer, DwordOffset, PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET ); //Now put pipe control after scheduler DwordOffset = pQueue->m_controls.m_SecondLevelBatchOffset / DWORD_SIZE_IN_BYTES; PutCSStallPipeControl( secondaryBatchBuffer, DwordOffset, PIPE_CONTROL_FOR_TIMESTAMP_END_OFFSET ); } pQueue->m_controls.m_SecondLevelBatchOffset += SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE; pQueue->m_controls.m_CurrentIDToffset = 1; pQueue->m_controls.m_EnqueueMarkerScheduled = 0; pQueue->m_controls.Temporary[1]++; pQueue->m_controls.m_CurrentSSHoffset = pQueue->m_controls.m_BTbaseOffset; } //Nothing to schedule, return to the host else { #ifdef ENABLE_DEBUG_BUFFER //Set START time of current (last) scheduler if( ( pQueue->m_controls.m_IsProfilingEnabled != 0 ) & ( DebugQueue != 0 ) & ( DebugQueue->m_flags == DDB_SCHEDULER_PROFILING ) ) { *((__global ulong * ) (&DebugQueue->m_data[ atomic_add( &DebugQueue->m_offset, 2 ) ] )) = EventsPool->m_CLcompleteTimestamp; } #endif pQueue->m_controls.Temporary[2]++; pQueue->m_controls.m_SLBENDoffsetInBytes = ( int ) pQueue->m_controls.m_SecondLevelBatchOffset; //SlbOffset is expressed in bytes and for cmd it is needed to convert it to dwords __private uint DwordOffset = pQueue->m_controls.m_SecondLevelBatchOffset / DWORD_SIZE_IN_BYTES; //BB_START 1st DWORD secondaryBatchBuffer[ DwordOffset ] = OCLRT_BATCH_BUFFER_BEGIN_CMD_DWORD0; DwordOffset++; //BB_START 2nd DWORD - Address, 3rd DWORD Address high secondaryBatchBuffer[ DwordOffset++ ] = (uint)( pQueue->m_controls.m_CleanupSectionAddress & 0xFFFFFFFF ); secondaryBatchBuffer[ DwordOffset++ ] = (uint)( ( pQueue->m_controls.m_CleanupSectionAddress >> 32 ) & 0xFFFFFFFF ); } } //Parallel stack compaction if( ( ( get_group_id( 0 ) == 2 ) == ( get_num_groups( 0 ) > 2 ) ) & ( get_local_id( 0 ) == 0 ) ) { uint Current = pQueue->m_controls.m_StackTop + get_local_id( 0 ); uint StackSize = pQueue->m_controls.m_StackSize; uint Found = 0; while( ( Current < StackSize ) && ( Found == 0 ) ) { __global uint * pCmdStackBlock = (__global uint *)( commandsStack + Current ); //We have found an element if( *pCmdStackBlock != 0 ) { Found = 1; } else { Current += get_local_size( 0 ); } } if ( Found == 1 ) { atomic_min( &pQueue->m_controls.m_StackTop, Current ); atomic_min( &pQueue->m_controls.m_PreviousStackTop, Current ); } } //Qstorage compaction if( ( ( get_group_id( 0 ) == 3 ) == ( get_num_groups( 0 ) > 3 ) ) & ( get_local_id( 0 ) == 0 ) ) { uint ReclaimFurhter = 1; while( ( pQueue->m_controls.m_QstorageTop < pQueue->m_controls.m_QstorageSize ) & ( ReclaimFurhter == 1 ) ) { pCommand = GetCommandHeaderFromStorage( ( __global uint* ) queueStorageBuffer, pQueue->m_controls.m_QstorageTop ); if( pCommand->m_commandState == CAN_BE_RECLAIMED ) { pQueue->m_controls.m_QstorageTop += pCommand->m_commandSize; } else { ReclaimFurhter = 0; } } pQueue->m_controls.m_PreviousStorageTop = pQueue->m_controls.m_QstorageTop; #ifndef DISABLE_RESOURCE_RECLAMATION //Reclaim space on queue_t, do this only if there is enough space //1 KB is used for global barrier, make sure this space will never be used. if( pQueue->m_controls.m_QstorageTop - 1024 > pQueue->m_size ) { //In this case we can take full queue_t next time we enter scheduler, so reclaim full space on queue_t pQueue->m_head = IGIL_DEVICE_QUEUE_HEAD_INIT; } #endif pQueue->m_controls.m_PreviousHead = pQueue->m_head; } } compute-runtime-20.13.16352/opencl/source/scheduler/scheduler_binary.cmake000066400000000000000000000075451363734646600264520ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # add_custom_target(scheduler) set(SCHEDULER_OUTDIR_WITH_ARCH "${TargetDir}/scheduler/${NEO_ARCH}") set_target_properties(scheduler PROPERTIES FOLDER "scheduler") set (SCHEDULER_KERNEL scheduler.cl) if(DEFINED NEO__IGC_INCLUDE_DIR) list(APPEND __cloc__options__ "-I$") endif() if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") list(APPEND __cloc__options__ "-D DEBUG") endif() set(SCHEDULER_INCLUDE_DIR ${TargetDir}) function(compile_kernel target gen_type platform_type kernel) get_family_name_with_type(${gen_type} ${platform_type}) string(TOLOWER ${gen_type} gen_type_lower) # get filename set(OUTPUTDIR "${SCHEDULER_OUTDIR_WITH_ARCH}/${gen_type_lower}") list(APPEND __cloc__options__ "-I ../${gen_type_lower}") get_filename_component(BASENAME ${kernel} NAME_WE) set(OUTPUTPATH "${OUTPUTDIR}/${BASENAME}_${family_name_with_type}.bin") set(SCHEDULER_CPP "${OUTPUTDIR}/${BASENAME}_${family_name_with_type}.cpp") if(WIN32) set(cloc_cmd_prefix ocloc) else() if(DEFINED NEO__IGC_LIBRARY_PATH) set(cloc_cmd_prefix LD_LIBRARY_PATH=${NEO__IGC_LIBRARY_PATH}:$ $) else() set(cloc_cmd_prefix LD_LIBRARY_PATH=$ $) endif() endif() list(APPEND __cloc__options__ "-cl-kernel-arg-info") list(APPEND __cloc__options__ "-cl-std=CL2.0") list(APPEND __cloc__options__ "-cl-intel-disable-a64WA") add_custom_command( OUTPUT ${OUTPUTPATH} COMMAND ${cloc_cmd_prefix} -q -file ${kernel} -device ${DEFAULT_SUPPORTED_${gen_type}_${platform_type}_PLATFORM} -cl-intel-greater-than-4GB-buffer-required -${NEO_BITS} -out_dir ${OUTPUTDIR} -cpp_file -options "$" WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DEPENDS ${kernel} ocloc copy_compiler_files ) set(SCHEDULER_CPP ${SCHEDULER_CPP} PARENT_SCOPE) add_custom_target(${target} DEPENDS ${OUTPUTPATH}) set_target_properties(${target} PROPERTIES FOLDER "opencl/source/scheduler/${gen_type_lower}") endfunction() macro(macro_for_each_gen) foreach(PLATFORM_TYPE ${PLATFORM_TYPES}) if(${GEN_TYPE}_HAS_${PLATFORM_TYPE} AND SUPPORT_DEVICE_ENQUEUE_${GEN_TYPE}) get_family_name_with_type(${GEN_TYPE} ${PLATFORM_TYPE}) set(PLATFORM_2_0_LOWER ${DEFAULT_SUPPORTED_2_0_${GEN_TYPE}_${PLATFORM_TYPE}_PLATFORM}) if(COMPILE_BUILT_INS AND PLATFORM_2_0_LOWER) compile_kernel(scheduler_${family_name_with_type} ${GEN_TYPE} ${PLATFORM_TYPE} ${SCHEDULER_KERNEL}) add_dependencies(scheduler scheduler_${family_name_with_type}) list(APPEND SCHEDULER_TARGETS scheduler_${family_name_with_type}) list(APPEND GENERATED_SCHEDULER_CPPS ${SCHEDULER_CPP}) endif() endif() endforeach() source_group("generated files\\${GEN_TYPE_LOWER}" FILES ${GENERATED_SCHEDULER_CPPS}) endmacro() apply_macro_for_each_gen("SUPPORTED") add_library(${SCHEDULER_BINARY_LIB_NAME} OBJECT EXCLUDE_FROM_ALL CMakeLists.txt) if(COMPILE_BUILT_INS) target_sources(${SCHEDULER_BINARY_LIB_NAME} PUBLIC ${GENERATED_SCHEDULER_CPPS}) set_source_files_properties(${GENERATED_SCHEDULER_CPPS} PROPERTIES GENERATED TRUE) foreach(SCHEDULER_TARGET ${SCHEDULER_TARGETS}) add_dependencies(${SCHEDULER_BINARY_LIB_NAME} ${SCHEDULER_TARGET}) endforeach() endif(COMPILE_BUILT_INS) set_target_properties(${SCHEDULER_BINARY_LIB_NAME} PROPERTIES LINKER_LANGUAGE CXX) set_target_properties(${SCHEDULER_BINARY_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(${SCHEDULER_BINARY_LIB_NAME} PROPERTIES FOLDER "scheduler") add_dependencies(${SCHEDULER_BINARY_LIB_NAME} scheduler) target_include_directories(${SCHEDULER_BINARY_LIB_NAME} PRIVATE ${ENGINE_NODE_DIR} ${KHRONOS_HEADERS_DIR} ${NEO__GMM_INCLUDE_DIR} ${NEO__IGC_INCLUDE_DIR} ${THIRD_PARTY_DIR} ) compute-runtime-20.13.16352/opencl/source/scheduler/scheduler_kernel.cpp000066400000000000000000000074771363734646600261540ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/scheduler/scheduler_kernel.h" #include "shared/source/device/device.h" #include "shared/source/helpers/hw_helper.h" #include "opencl/source/cl_device/cl_device.h" #include namespace NEO { void SchedulerKernel::setArgs(GraphicsAllocation *queue, GraphicsAllocation *commandsStack, GraphicsAllocation *eventsPool, GraphicsAllocation *secondaryBatchBuffer, GraphicsAllocation *dsh, GraphicsAllocation *reflectionSurface, GraphicsAllocation *queueStorageBuffer, GraphicsAllocation *ssh, GraphicsAllocation *debugQueue) { setArgSvmAlloc(0, reinterpret_cast(queue->getGpuAddress()), queue); setArgSvmAlloc(1, reinterpret_cast(commandsStack->getGpuAddress()), commandsStack); setArgSvmAlloc(2, reinterpret_cast(eventsPool->getGpuAddress()), eventsPool); setArgSvmAlloc(3, reinterpret_cast(secondaryBatchBuffer->getGpuAddress()), secondaryBatchBuffer); setArgSvmAlloc(4, reinterpret_cast(dsh->getGpuAddress()), dsh); setArgSvmAlloc(5, reinterpret_cast(reflectionSurface->getGpuAddress()), reflectionSurface); setArgSvmAlloc(6, reinterpret_cast(queueStorageBuffer->getGpuAddress()), queueStorageBuffer); setArgSvmAlloc(7, reinterpret_cast(ssh->getGpuAddress()), ssh); if (debugQueue) setArgSvmAlloc(8, reinterpret_cast(debugQueue->getGpuAddress()), debugQueue); DBG_LOG(PrintEMDebugInformation, "Scheduler Surfaces: \nqueue=", queue->getUnderlyingBuffer(), " \nstack=", commandsStack->getUnderlyingBuffer(), " \nevents=", eventsPool->getUnderlyingBuffer(), " \nslb=", secondaryBatchBuffer->getUnderlyingBuffer(), "\ndsh=", dsh->getUnderlyingBuffer(), " \nkrs=", reflectionSurface->getUnderlyingBuffer(), " \nstorage=", queueStorageBuffer->getUnderlyingBuffer(), "\nssh=", ssh->getUnderlyingBuffer()); } void SchedulerKernel::computeGws() { auto &devInfo = device.getDeviceInfo(); auto &hwInfo = device.getHardwareInfo(); auto &helper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); size_t hWThreadsPerSubSlice = devInfo.maxComputUnits / hwInfo.gtSystemInfo.SubSliceCount; size_t wkgsPerSubSlice = hWThreadsPerSubSlice / PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20; wkgsPerSubSlice = std::min(wkgsPerSubSlice, helper.getMaxBarrierRegisterPerSlice()); gws = wkgsPerSubSlice * hwInfo.gtSystemInfo.SubSliceCount * PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20 * PARALLEL_SCHEDULER_COMPILATION_SIZE_20; if (device.isSimulation()) { gws = PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20 * PARALLEL_SCHEDULER_COMPILATION_SIZE_20; } if (DebugManager.flags.SchedulerGWS.get() != 0) { DEBUG_BREAK_IF(DebugManager.flags.SchedulerGWS.get() % (PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20 * PARALLEL_SCHEDULER_COMPILATION_SIZE_20) != 0); gws = DebugManager.flags.SchedulerGWS.get(); } DBG_LOG(PrintEMDebugInformation, "Scheduler GWS: ", gws); printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "Scheduler GWS: %" PRIu64, static_cast(gws)); } BuiltinCode SchedulerKernel::loadSchedulerKernel(Device *device) { std::string schedulerResourceName = getFamilyNameWithType(device->getHardwareInfo()) + "_0_scheduler.builtin_kernel.bin"; BuiltinCode ret; auto storage = std::make_unique(""); ret.resource = storage.get()->load(schedulerResourceName); ret.type = BuiltinCode::ECodeType::Binary; ret.targetDevice = device; return ret; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/scheduler/scheduler_kernel.h000066400000000000000000000036451363734646600256120ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "opencl/source/kernel/kernel.h" #include namespace NEO { class SchedulerKernel : public Kernel { public: static constexpr const char *schedulerName = "SchedulerParallel20"; friend Kernel; ~SchedulerKernel() override = default; size_t getLws() { return PARALLEL_SCHEDULER_HWTHREADS_IN_HW_GROUP20 * PARALLEL_SCHEDULER_COMPILATION_SIZE_20; } size_t getGws() { return gws; } void setGws(size_t newGws) { gws = newGws; } size_t getCurbeSize() { size_t crossTrheadDataSize = kernelInfo.patchInfo.dataParameterStream ? kernelInfo.patchInfo.dataParameterStream->DataParameterStreamSize : 0; size_t dshSize = kernelInfo.heapInfo.pKernelHeader ? kernelInfo.heapInfo.pKernelHeader->DynamicStateHeapSize : 0; crossTrheadDataSize = alignUp(crossTrheadDataSize, 64); dshSize = alignUp(dshSize, 64); return alignUp(SCHEDULER_DYNAMIC_PAYLOAD_SIZE, 64) + crossTrheadDataSize + dshSize; } void setArgs(GraphicsAllocation *queue, GraphicsAllocation *commandsStack, GraphicsAllocation *eventsPool, GraphicsAllocation *secondaryBatchBuffer, GraphicsAllocation *dsh, GraphicsAllocation *reflectionSurface, GraphicsAllocation *queueStorageBuffer, GraphicsAllocation *ssh, GraphicsAllocation *debugQueue = nullptr); static BuiltinCode loadSchedulerKernel(Device *device); protected: SchedulerKernel(Program *programArg, const KernelInfo &kernelInfoArg, const ClDevice &deviceArg) : Kernel(programArg, kernelInfoArg, deviceArg, true), gws(0) { computeGws(); }; void computeGws(); size_t gws; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/000077500000000000000000000000001363734646600217535ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/sharings/CMakeLists.txt000066400000000000000000000026461363734646600245230ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # file(GLOB_RECURSE SHARING_ENABLE_CPPS enable*.cpp) add_library (${SHARINGS_ENABLE_LIB_NAME} OBJECT EXCLUDE_FROM_ALL ${SHARING_ENABLE_CPPS}) set_target_properties (${SHARINGS_ENABLE_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties (${SHARINGS_ENABLE_LIB_NAME} PROPERTIES FOLDER "opencl runtime") target_include_directories(${SHARINGS_ENABLE_LIB_NAME} PRIVATE ${KHRONOS_HEADERS_DIR} ${KHRONOS_GL_HEADERS_DIR} ${NEO__GMM_INCLUDE_DIR} ${NEO__IGC_INCLUDE_DIR} ${THIRD_PARTY_DIR} ) set(RUNTIME_SRCS_SHARINGS ${CMAKE_CURRENT_SOURCE_DIR}/sharing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sharing.h ${CMAKE_CURRENT_SOURCE_DIR}/sharing_factory.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sharing_factory.h ${CMAKE_CURRENT_SOURCE_DIR}/sharing_factory.inl ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_SHARINGS}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_SHARINGS ${RUNTIME_SRCS_SHARINGS}) if(WIN32) target_include_directories(${SHARINGS_ENABLE_LIB_NAME} PRIVATE ${NEO_SOURCE_DIR}/opencl/source/os_interface/windows) else() target_include_directories(${SHARINGS_ENABLE_LIB_NAME} PRIVATE ${NEO_SOURCE_DIR}/opencl/source/os_interface/linux) endif() add_subdirectories() create_project_source_tree(${SHARINGS_ENABLE_LIB_NAME} ${NEO_SOURCE_DIR}/runtime) set(MSVC_DEF_ADDITIONAL_EXPORTS ${MSVC_DEF_ADDITIONAL_EXPORTS} PARENT_SCOPE) compute-runtime-20.13.16352/opencl/source/sharings/d3d/000077500000000000000000000000001363734646600224255ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/sharings/d3d/CMakeLists.txt000066400000000000000000000013151363734646600251650ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(RUNTIME_SRCS_SHARINGS_D3D ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cl_d3d_api.h ${CMAKE_CURRENT_SOURCE_DIR}/d3d_buffer.h ${CMAKE_CURRENT_SOURCE_DIR}/d3d_sharing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/d3d_sharing.h ${CMAKE_CURRENT_SOURCE_DIR}/d3d_surface.cpp ${CMAKE_CURRENT_SOURCE_DIR}/d3d_surface.h ${CMAKE_CURRENT_SOURCE_DIR}/d3d_texture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/d3d_texture.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_SHARINGS_D3D}) endif() set_property(GLOBAL PROPERTY RUNTIME_SRCS_SHARINGS_D3D ${RUNTIME_SRCS_SHARINGS_D3D})compute-runtime-20.13.16352/opencl/source/sharings/d3d/cl_d3d_api.h000066400000000000000000000144031363734646600245610ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include "CL/cl.h" #include "CL/cl_d3d10.h" #include "CL/cl_d3d11.h" #include "CL/cl_dx9_media_sharing.h" #define CL_DX9_MEDIA_SHARING_INTEL_EXT #include "CL/cl_dx9_media_sharing_intel.h" extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromDX9INTEL( cl_platform_id platform, cl_dx9_device_source_intel dx9DeviceSource, void *dx9Object, cl_dx9_device_set_intel dx9DeviceSet, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices); extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromDX9MediaSurfaceINTEL( cl_context context, cl_mem_flags flags, IDirect3DSurface9 *resource, HANDLE sharedHandle, UINT plane, cl_int *errcodeRet); extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireDX9ObjectsINTEL( cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseDX9ObjectsINTEL( cl_command_queue commandQueue, cl_uint numObjects, cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromDX9MediaAdapterKHR( cl_platform_id platform, cl_uint numMediaAdapters, cl_dx9_media_adapter_type_khr *mediaAdapterType, void *mediaAdapters, cl_dx9_media_adapter_set_khr mediaAdapterSet, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromDX9MediaSurfaceKHR( cl_context context, cl_mem_flags flags, cl_dx9_media_adapter_type_khr adapterType, void *surfaceInfo, cl_uint plane, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireDX9MediaSurfacesKHR( cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseDX9MediaSurfacesKHR( cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromD3D10KHR( cl_platform_id platform, cl_d3d10_device_source_khr d3dDeviceSource, void *d3dObject, cl_d3d10_device_set_khr d3dDeviceSet, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10BufferKHR( cl_context context, cl_mem_flags flags, ID3D10Buffer *resource, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10Texture2DKHR( cl_context context, cl_mem_flags flags, ID3D10Texture2D *resource, UINT subresource, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10Texture3DKHR( cl_context context, cl_mem_flags flags, ID3D10Texture3D *resource, UINT subresource, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireD3D10ObjectsKHR( cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D10ObjectsKHR( cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromD3D11KHR( cl_platform_id platform, cl_d3d11_device_source_khr d3dDeviceSource, void *d3dObject, cl_d3d11_device_set_khr d3dDeviceSet, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11BufferKHR( cl_context context, cl_mem_flags flags, ID3D11Buffer *resource, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11Texture2DKHR( cl_context context, cl_mem_flags flags, ID3D11Texture2D *resource, UINT subresource, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11Texture3DKHR( cl_context context, cl_mem_flags flags, ID3D11Texture3D *resource, UINT subresource, cl_int *errcodeRet) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireD3D11ObjectsKHR( cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D11ObjectsKHR( cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clGetSupportedDX9MediaSurfaceFormatsINTEL( cl_context context, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint plane, cl_uint numEntries, D3DFORMAT *dx9Formats, cl_uint *numImageFormats) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clGetSupportedD3D10TextureFormatsINTEL( cl_context context, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint numEntries, DXGI_FORMAT *dx10Formats, cl_uint *numImageFormats) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clGetSupportedD3D11TextureFormatsINTEL( cl_context context, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint plane, cl_uint numEntries, DXGI_FORMAT *dx11Formats, cl_uint *numImageFormats) CL_API_SUFFIX__VERSION_1_2; compute-runtime-20.13.16352/opencl/source/sharings/d3d/d3d_buffer.h000066400000000000000000000037171363734646600246110ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/memory_manager.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/sharings/d3d/d3d_sharing.h" namespace NEO { class Buffer; class Context; template class D3DBuffer : public D3DSharing { typedef typename D3D::D3DBufferObj D3DBufferObj; typedef typename D3D::D3DBufferDesc D3DBufferDesc; public: static Buffer *create(Context *context, D3DBufferObj *d3dBuffer, cl_mem_flags flags, cl_int *retCode) { ErrorCodeHelper err(retCode, CL_SUCCESS); auto sharingFcns = context->getSharing>(); void *sharedHandle = nullptr; D3DBufferDesc bufferDesc = {}; sharingFcns->getBufferDesc(&bufferDesc, d3dBuffer); bool sharedResource = false; D3DBufferObj *bufferStaging = nullptr; if (bufferDesc.MiscFlags & D3DResourceFlags::MISC_SHARED) { bufferStaging = d3dBuffer; sharedResource = true; } else { sharingFcns->createBuffer(&bufferStaging, bufferDesc.ByteWidth); } sharingFcns->getSharedHandle(bufferStaging, &sharedHandle); AllocationProperties properties = {context->getDevice(0)->getRootDeviceIndex(), false, 0, GraphicsAllocation::AllocationType::SHARED_BUFFER, false}; auto alloc = context->getMemoryManager()->createGraphicsAllocationFromSharedHandle(toOsHandle(sharedHandle), properties, true); auto d3dBufferObj = new D3DBuffer(context, d3dBuffer, bufferStaging, sharedResource); return Buffer::createSharedBuffer(context, flags, d3dBufferObj, alloc); } ~D3DBuffer() override = default; protected: D3DBuffer(Context *context, D3DBufferObj *d3dBuffer, D3DBufferObj *bufferStaging, bool sharedResource) : D3DSharing(context, d3dBuffer, bufferStaging, 0, sharedResource){}; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/d3d/d3d_sharing.cpp000066400000000000000000000067501363734646600253260ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/d3d/d3d_sharing.h" #include "shared/source/gmm_helper/gmm.h" #include "opencl/source/context/context.h" #include "opencl/source/mem_obj/image.h" using namespace NEO; template class D3DSharing; template class D3DSharing; template class D3DSharing; template D3DSharing::D3DSharing(Context *context, D3DResource *resource, D3DResource *resourceStaging, unsigned int subresource, bool sharedResource) : sharedResource(sharedResource), subresource(subresource), resource(resource), resourceStaging(resourceStaging), context(context) { sharingFunctions = context->getSharing>(); if (sharingFunctions) { sharingFunctions->addRef(resource); sharingFunctions->createQuery(&this->d3dQuery); sharingFunctions->track(resource, subresource); } }; template D3DSharing::~D3DSharing() { if (sharingFunctions) { sharingFunctions->untrack(resource, subresource); if (!sharedResource) { sharingFunctions->release(resourceStaging); } sharingFunctions->release(resource); sharingFunctions->release(d3dQuery); } }; template void D3DSharing::synchronizeObject(UpdateData &updateData) { sharingFunctions->getDeviceContext(d3dQuery); if (!sharedResource) { sharingFunctions->copySubresourceRegion(resourceStaging, 0, resource, subresource); sharingFunctions->flushAndWait(d3dQuery); } else if (!context->getInteropUserSyncEnabled()) { sharingFunctions->flushAndWait(d3dQuery); } sharingFunctions->releaseDeviceContext(d3dQuery); updateData.synchronizationStatus = SynchronizeStatus::ACQUIRE_SUCCESFUL; } template void D3DSharing::releaseResource(MemObj *memObject) { if (!sharedResource) { sharingFunctions->getDeviceContext(d3dQuery); sharingFunctions->copySubresourceRegion(resource, subresource, resourceStaging, 0); if (!context->getInteropUserSyncEnabled()) { sharingFunctions->flushAndWait(d3dQuery); } sharingFunctions->releaseDeviceContext(d3dQuery); } } template void D3DSharing::updateImgInfoAndDesc(Gmm *gmm, ImageInfo &imgInfo, ImagePlane imagePlane, cl_uint arrayIndex) { gmm->updateImgInfoAndDesc(imgInfo, arrayIndex); if (imagePlane == ImagePlane::PLANE_U || imagePlane == ImagePlane::PLANE_V || imagePlane == ImagePlane::PLANE_UV) { imgInfo.imgDesc.imageWidth /= 2; imgInfo.imgDesc.imageHeight /= 2; if (imagePlane != ImagePlane::PLANE_UV) { imgInfo.imgDesc.imageRowPitch /= 2; } } } template const ClSurfaceFormatInfo *D3DSharing::findSurfaceFormatInfo(GMM_RESOURCE_FORMAT_ENUM gmmFormat, cl_mem_flags flags, unsigned int clVersionSupport) { ArrayRef formats = SurfaceFormats::surfaceFormats(flags, clVersionSupport); for (auto &format : formats) { if (gmmFormat == format.surfaceFormat.GMMSurfaceFormat) { return &format; } } return nullptr; } template bool D3DSharing::isFormatWithPlane1(DXGI_FORMAT format) { switch (format) { case DXGI_FORMAT_NV12: case DXGI_FORMAT_P010: case DXGI_FORMAT_P016: return true; } return false; } compute-runtime-20.13.16352/opencl/source/sharings/d3d/d3d_sharing.h000066400000000000000000000032351363734646600247660ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/d3d/d3d_sharing.h" #include "d3d_sharing_functions.h" enum GMM_RESOURCE_FORMAT_ENUM; namespace NEO { enum class ImagePlane; class Context; class Gmm; struct ClSurfaceFormatInfo; struct ImageInfo; template class D3DSharing : public SharingHandler { typedef typename D3D::D3DQuery D3DQuery; typedef typename D3D::D3DResource D3DResource; public: D3DSharing(Context *context, D3DResource *resource, D3DResource *resourceStaging, unsigned int subresource, bool sharedResource); ~D3DSharing() override; void synchronizeObject(UpdateData &updateData) override; void releaseResource(MemObj *memObject) override; D3DResource **getResourceHandler() { return &resource; } void *getResourceStaging() { return resourceStaging; } unsigned int &getSubresource() { return subresource; } typename D3DQuery *getQuery() { return d3dQuery; } bool isSharedResource() { return sharedResource; } static const ClSurfaceFormatInfo *findSurfaceFormatInfo(GMM_RESOURCE_FORMAT_ENUM gmmFormat, cl_mem_flags flags, unsigned int clVersionSupport); static bool isFormatWithPlane1(DXGI_FORMAT format); protected: static void updateImgInfoAndDesc(Gmm *gmm, ImageInfo &imgInfo, ImagePlane imagePlane, cl_uint arrayIndex); Context *context; D3DSharingFunctions *sharingFunctions = nullptr; D3DResource *resource = nullptr; D3DResource *resourceStaging = nullptr; D3DQuery *d3dQuery = nullptr; bool sharedResource = false; unsigned int subresource = 0; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/d3d/d3d_surface.cpp000066400000000000000000000260011363734646600253120ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/d3d/d3d_surface.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/helpers/get_info.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/gmm_types_converter.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/mem_obj/mem_obj_helper.h" #include "mmsystem.h" using namespace NEO; D3DSurface::D3DSurface(Context *context, cl_dx9_surface_info_khr *surfaceInfo, D3D9Surface *surfaceStaging, cl_uint plane, ImagePlane imagePlane, cl_dx9_media_adapter_type_khr adapterType, bool sharedResource, bool lockable) : D3DSharing(context, surfaceInfo->resource, surfaceStaging, plane, sharedResource), adapterType(adapterType), surfaceInfo(*surfaceInfo), lockable(lockable), plane(plane), imagePlane(imagePlane), d3d9Surface(surfaceInfo->resource), d3d9SurfaceStaging(surfaceStaging) { if (sharingFunctions) { resourceDevice = sharingFunctions->getDevice(); } }; Image *D3DSurface::create(Context *context, cl_dx9_surface_info_khr *surfaceInfo, cl_mem_flags flags, cl_dx9_media_adapter_type_khr adapterType, cl_uint plane, cl_int *retCode) { ErrorCodeHelper err(retCode, CL_SUCCESS); D3D9Surface *surfaceStaging = nullptr; ImageInfo imgInfo = {}; cl_image_format imgFormat = {}; McsSurfaceInfo mcsSurfaceInfo = {}; ImagePlane imagePlane = ImagePlane::NO_PLANE; if (!context || !context->getSharing>() || !context->getSharing>()->getDevice()) { err.set(CL_INVALID_CONTEXT); return nullptr; } auto sharingFcns = context->getSharing>(); if (sharingFcns->isTracked(surfaceInfo->resource, plane)) { err.set(CL_INVALID_DX9_RESOURCE_INTEL); return nullptr; } sharingFcns->updateDevice(surfaceInfo->resource); imgInfo.imgDesc.imageType = ImageType::Image2D; D3D9SurfaceDesc surfaceDesc = {}; sharingFcns->getTexture2dDesc(&surfaceDesc, surfaceInfo->resource); imgInfo.imgDesc.imageWidth = surfaceDesc.Width; imgInfo.imgDesc.imageHeight = surfaceDesc.Height; if (surfaceDesc.Pool != D3DPOOL_DEFAULT) { err.set(CL_INVALID_DX9_RESOURCE_INTEL); return nullptr; } err.set(findImgFormat(surfaceDesc.Format, imgFormat, plane, imagePlane)); if (err.localErrcode != CL_SUCCESS) { return nullptr; } imgInfo.plane = GmmTypesConverter::convertPlane(imagePlane); auto *clSurfaceFormat = Image::getSurfaceFormatFromTable(flags, &imgFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); imgInfo.surfaceFormat = &clSurfaceFormat->surfaceFormat; bool isSharedResource = false; bool lockable = false; auto rootDeviceIndex = context->getDevice(0)->getRootDeviceIndex(); GraphicsAllocation *alloc = nullptr; if (surfaceInfo->shared_handle) { isSharedResource = true; AllocationProperties allocProperties(rootDeviceIndex, false, 0u, GraphicsAllocation::AllocationType::SHARED_IMAGE, false); alloc = context->getMemoryManager()->createGraphicsAllocationFromSharedHandle(toOsHandle(surfaceInfo->shared_handle), allocProperties, false); updateImgInfoAndDesc(alloc->getDefaultGmm(), imgInfo, imagePlane, 0u); } else { lockable = !(surfaceDesc.Usage & D3DResourceFlags::USAGE_RENDERTARGET) || imagePlane != ImagePlane::NO_PLANE; if (!lockable) { sharingFcns->createTexture2d(&surfaceStaging, &surfaceDesc, 0u); } if (imagePlane == ImagePlane::PLANE_U || imagePlane == ImagePlane::PLANE_V || imagePlane == ImagePlane::PLANE_UV) { imgInfo.imgDesc.imageWidth /= 2; imgInfo.imgDesc.imageHeight /= 2; } MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0); AllocationProperties allocProperties = MemObjHelper::getAllocationPropertiesWithImageInfo(rootDeviceIndex, imgInfo, true, memoryProperties, context->getDevice(0)->getHardwareInfo()); allocProperties.allocationType = GraphicsAllocation::AllocationType::SHARED_RESOURCE_COPY; alloc = context->getMemoryManager()->allocateGraphicsMemoryInPreferredPool(allocProperties, nullptr); imgInfo.imgDesc.imageRowPitch = imgInfo.rowPitch; imgInfo.imgDesc.imageSlicePitch = imgInfo.slicePitch; } DEBUG_BREAK_IF(!alloc); auto surface = new D3DSurface(context, surfaceInfo, surfaceStaging, plane, imagePlane, adapterType, isSharedResource, lockable); return Image::createSharedImage(context, surface, mcsSurfaceInfo, alloc, nullptr, flags, clSurfaceFormat, imgInfo, __GMM_NO_CUBE_MAP, 0, 0); } void D3DSurface::synchronizeObject(UpdateData &updateData) { D3DLOCKED_RECT lockedRect = {}; sharingFunctions->setDevice(resourceDevice); if (sharedResource && !context->getInteropUserSyncEnabled()) { sharingFunctions->flushAndWait(d3dQuery); } else if (!sharedResource) { if (lockable) { sharingFunctions->lockRect(d3d9Surface, &lockedRect, D3DLOCK_READONLY); } else { sharingFunctions->getRenderTargetData(d3d9Surface, d3d9SurfaceStaging); sharingFunctions->lockRect(d3d9SurfaceStaging, &lockedRect, D3DLOCK_READONLY); } auto image = castToObjectOrAbort(updateData.memObject); auto sys = lockedRect.pBits; auto gpu = context->getMemoryManager()->lockResource(image->getGraphicsAllocation()); auto pitch = static_cast(lockedRect.Pitch); auto height = static_cast(image->getImageDesc().image_height); image->getGraphicsAllocation()->getDefaultGmm()->resourceCopyBlt(sys, gpu, pitch, height, 1u, imagePlane); context->getMemoryManager()->unlockResource(updateData.memObject->getGraphicsAllocation()); if (lockable) { sharingFunctions->unlockRect(d3d9Surface); } else { sharingFunctions->unlockRect(d3d9SurfaceStaging); } sharingFunctions->flushAndWait(d3dQuery); } updateData.synchronizationStatus = SynchronizeStatus::ACQUIRE_SUCCESFUL; } void D3DSurface::releaseResource(MemObj *memObject) { D3DLOCKED_RECT lockedRect = {}; auto image = castToObject(memObject); if (!image) { return; } sharingFunctions->setDevice(resourceDevice); if (!sharedResource) { if (lockable) { sharingFunctions->lockRect(d3d9Surface, &lockedRect, 0); } else { sharingFunctions->lockRect(d3d9SurfaceStaging, &lockedRect, 0); } auto sys = lockedRect.pBits; auto gpu = context->getMemoryManager()->lockResource(image->getGraphicsAllocation()); auto pitch = static_cast(lockedRect.Pitch); auto height = static_cast(image->getImageDesc().image_height); image->getGraphicsAllocation()->getDefaultGmm()->resourceCopyBlt(sys, gpu, pitch, height, 0u, imagePlane); context->getMemoryManager()->unlockResource(memObject->getGraphicsAllocation()); if (lockable) { sharingFunctions->unlockRect(d3d9Surface); } else { sharingFunctions->unlockRect(d3d9SurfaceStaging); sharingFunctions->updateSurface(d3d9SurfaceStaging, d3d9Surface); } } } const std::map D3DSurface::D3DtoClFormatConversions = { {D3DFMT_R32F, {CL_R, CL_FLOAT}}, {D3DFMT_R16F, {CL_R, CL_HALF_FLOAT}}, {D3DFMT_L16, {CL_R, CL_UNORM_INT16}}, {D3DFMT_A8, {CL_A, CL_UNORM_INT8}}, {D3DFMT_L8, {CL_R, CL_UNORM_INT8}}, {D3DFMT_G32R32F, {CL_RG, CL_FLOAT}}, {D3DFMT_G16R16F, {CL_RG, CL_HALF_FLOAT}}, {D3DFMT_G16R16, {CL_RG, CL_UNORM_INT16}}, {D3DFMT_A8L8, {CL_RG, CL_UNORM_INT8}}, {D3DFMT_A32B32G32R32F, {CL_RGBA, CL_FLOAT}}, {D3DFMT_A16B16G16R16F, {CL_RGBA, CL_HALF_FLOAT}}, {D3DFMT_A16B16G16R16, {CL_RGBA, CL_UNORM_INT16}}, {D3DFMT_X8B8G8R8, {CL_RGBA, CL_UNORM_INT8}}, {D3DFMT_A8B8G8R8, {CL_RGBA, CL_UNORM_INT8}}, {D3DFMT_A8R8G8B8, {CL_BGRA, CL_UNORM_INT8}}, {D3DFMT_X8R8G8B8, {CL_BGRA, CL_UNORM_INT8}}, {D3DFMT_YUY2, {CL_YUYV_INTEL, CL_UNORM_INT8}}, {D3DFMT_UYVY, {CL_UYVY_INTEL, CL_UNORM_INT8}}, // The specific channel_order for NV12 is selected in findImgFormat {static_cast(MAKEFOURCC('N', 'V', '1', '2')), {CL_R | CL_RG, CL_UNORM_INT8}}, {static_cast(MAKEFOURCC('Y', 'V', '1', '2')), {CL_R, CL_UNORM_INT8}}, {static_cast(MAKEFOURCC('Y', 'V', 'Y', 'U')), {CL_YVYU_INTEL, CL_UNORM_INT8}}, {static_cast(MAKEFOURCC('V', 'Y', 'U', 'Y')), {CL_VYUY_INTEL, CL_UNORM_INT8}}}; const std::vector D3DSurface::D3DPlane1Formats = { static_cast(MAKEFOURCC('N', 'V', '1', '2')), static_cast(MAKEFOURCC('Y', 'V', '1', '2'))}; const std::vector D3DSurface::D3DPlane2Formats = {static_cast(MAKEFOURCC('Y', 'V', '1', '2'))}; cl_int D3DSurface::findImgFormat(D3DFORMAT d3dFormat, cl_image_format &imgFormat, cl_uint plane, ImagePlane &imagePlane) { imagePlane = ImagePlane::NO_PLANE; static const cl_image_format unknown_format = {0, 0}; auto element = D3DtoClFormatConversions.find(d3dFormat); if (element == D3DtoClFormatConversions.end()) { imgFormat = unknown_format; return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; } imgFormat = element->second; switch (d3dFormat) { case static_cast(MAKEFOURCC('N', 'V', '1', '2')): switch (plane) { case 0: imgFormat.image_channel_order = CL_R; imagePlane = ImagePlane::PLANE_Y; return CL_SUCCESS; case 1: imgFormat.image_channel_order = CL_RG; imagePlane = ImagePlane::PLANE_UV; return CL_SUCCESS; default: imgFormat = unknown_format; return CL_INVALID_VALUE; } case static_cast(MAKEFOURCC('Y', 'V', '1', '2')): switch (plane) { case 0: imagePlane = ImagePlane::PLANE_Y; return CL_SUCCESS; case 1: imagePlane = ImagePlane::PLANE_V; return CL_SUCCESS; case 2: imagePlane = ImagePlane::PLANE_U; return CL_SUCCESS; default: imgFormat = unknown_format; return CL_INVALID_VALUE; } } if (plane > 0) { return CL_INVALID_VALUE; } return CL_SUCCESS; } int D3DSurface::validateUpdateData(UpdateData &updateData) { auto image = castToObject(updateData.memObject); if (!image) { return CL_INVALID_MEM_OBJECT; } return CL_SUCCESS; } compute-runtime-20.13.16352/opencl/source/sharings/d3d/d3d_surface.h000066400000000000000000000041211363734646600247560ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/d3d/d3d_sharing.h" #include struct ErrorCodeHelper; namespace NEO { enum class ImagePlane; class Image; class Context; class D3DSurface : public D3DSharing { typedef typename D3DTypesHelper::D3D9::D3DTexture2dDesc D3D9SurfaceDesc; typedef typename D3DTypesHelper::D3D9::D3DTexture2d D3D9Surface; typedef typename D3DTypesHelper::D3D9::D3DResource D3DResource; typedef typename D3DTypesHelper::D3D9::D3DDevice D3DDevice; public: static Image *create(Context *context, cl_dx9_surface_info_khr *surfaceInfo, cl_mem_flags flags, cl_dx9_media_adapter_type_khr adapterType, cl_uint plane, cl_int *retCode); static const std::map D3DtoClFormatConversions; static const std::vector D3DPlane1Formats; static const std::vector D3DPlane2Formats; static cl_int findImgFormat(D3DFORMAT d3dFormat, cl_image_format &imgFormat, cl_uint plane, ImagePlane &imagePlane); void synchronizeObject(UpdateData &updateData) override; void releaseResource(MemObj *memObject) override; int validateUpdateData(UpdateData &updateData) override; cl_dx9_surface_info_khr &getSurfaceInfo() { return surfaceInfo; } cl_dx9_media_adapter_type_khr &getAdapterType() { return adapterType; } cl_uint &getPlane() { return plane; } ~D3DSurface() override = default; const bool lockable = false; protected: D3DSurface(Context *context, cl_dx9_surface_info_khr *surfaceInfo, D3D9Surface *surfaceStaging, cl_uint plane, ImagePlane imagePlane, cl_dx9_media_adapter_type_khr adapterType, bool sharedResource, bool lockable); cl_dx9_media_adapter_type_khr adapterType = 0u; cl_dx9_surface_info_khr surfaceInfo = {}; cl_uint plane = 0; ImagePlane imagePlane; D3D9Surface *d3d9Surface = nullptr; D3D9Surface *d3d9SurfaceStaging = nullptr; D3DDevice *resourceDevice = nullptr; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/d3d/d3d_texture.cpp000066400000000000000000000203521363734646600253650ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/d3d/d3d_texture.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/gmm_types_converter.h" #include "opencl/source/mem_obj/image.h" using namespace NEO; template class D3DTexture; template class D3DTexture; template Image *D3DTexture::create2d(Context *context, D3DTexture2d *d3dTexture, cl_mem_flags flags, cl_uint subresource, cl_int *retCode) { ErrorCodeHelper err(retCode, CL_SUCCESS); auto sharingFcns = context->getSharing>(); ImagePlane imagePlane = ImagePlane::NO_PLANE; void *sharedHandle = nullptr; cl_uint arrayIndex = 0u; cl_image_format imgFormat = {}; McsSurfaceInfo mcsSurfaceInfo = {}; ImageInfo imgInfo = {}; imgInfo.imgDesc.imageType = ImageType::Image2D; D3DTexture2dDesc textureDesc = {}; sharingFcns->getTexture2dDesc(&textureDesc, d3dTexture); if ((textureDesc.Format == DXGI_FORMAT_NV12) || (textureDesc.Format == DXGI_FORMAT_P010) || (textureDesc.Format == DXGI_FORMAT_P016)) { if ((subresource % 2) == 0) { imagePlane = ImagePlane::PLANE_Y; } else { imagePlane = ImagePlane::PLANE_UV; } imgInfo.plane = GmmTypesConverter::convertPlane(imagePlane); arrayIndex = subresource / 2u; } else if (subresource >= textureDesc.MipLevels * textureDesc.ArraySize) { err.set(CL_INVALID_VALUE); return nullptr; } bool sharedResource = false; D3DTexture2d *textureStaging = nullptr; if ((textureDesc.MiscFlags & D3DResourceFlags::MISC_SHARED || textureDesc.MiscFlags & D3DResourceFlags::MISC_SHARED_KEYEDMUTEX) && subresource % textureDesc.MipLevels == 0) { textureStaging = d3dTexture; sharedResource = true; } else { sharingFcns->createTexture2d(&textureStaging, &textureDesc, subresource); } GraphicsAllocation *alloc = nullptr; auto memoryManager = context->getMemoryManager(); auto rootDeviceIndex = context->getDevice(0)->getRootDeviceIndex(); if (textureDesc.MiscFlags & D3DResourceFlags::MISC_SHARED_NTHANDLE) { sharingFcns->getSharedNTHandle(textureStaging, &sharedHandle); alloc = memoryManager->createGraphicsAllocationFromNTHandle(sharedHandle, rootDeviceIndex); } else { sharingFcns->getSharedHandle(textureStaging, &sharedHandle); AllocationProperties allocProperties(rootDeviceIndex, nullptr, false, GraphicsAllocation::AllocationType::SHARED_IMAGE, false); alloc = memoryManager->createGraphicsAllocationFromSharedHandle(toOsHandle(sharedHandle), allocProperties, false); } DEBUG_BREAK_IF(!alloc); updateImgInfoAndDesc(alloc->getDefaultGmm(), imgInfo, imagePlane, arrayIndex); auto d3dTextureObj = new D3DTexture(context, d3dTexture, subresource, textureStaging, sharedResource); const ClSurfaceFormatInfo *clSurfaceFormat = nullptr; if ((textureDesc.Format == DXGI_FORMAT_NV12) || (textureDesc.Format == DXGI_FORMAT_P010) || (textureDesc.Format == DXGI_FORMAT_P016)) { clSurfaceFormat = findYuvSurfaceFormatInfo(textureDesc.Format, imagePlane, flags); imgInfo.surfaceFormat = &clSurfaceFormat->surfaceFormat; } else { clSurfaceFormat = findSurfaceFormatInfo(alloc->getDefaultGmm()->gmmResourceInfo->getResourceFormat(), flags, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); imgInfo.surfaceFormat = &clSurfaceFormat->surfaceFormat; } auto hwInfo = memoryManager->peekExecutionEnvironment().rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily); if (alloc->getDefaultGmm()->unifiedAuxTranslationCapable()) { alloc->getDefaultGmm()->isRenderCompressed = hwHelper.isPageTableManagerSupported(*hwInfo) ? memoryManager->mapAuxGpuVA(alloc) : true; } return Image::createSharedImage(context, d3dTextureObj, mcsSurfaceInfo, alloc, nullptr, flags, clSurfaceFormat, imgInfo, __GMM_NO_CUBE_MAP, 0, 0); } template Image *D3DTexture::create3d(Context *context, D3DTexture3d *d3dTexture, cl_mem_flags flags, cl_uint subresource, cl_int *retCode) { ErrorCodeHelper err(retCode, CL_SUCCESS); auto sharingFcns = context->getSharing>(); void *sharedHandle = nullptr; cl_image_format imgFormat = {}; McsSurfaceInfo mcsSurfaceInfo = {}; ImageInfo imgInfo = {}; imgInfo.imgDesc.imageType = ImageType::Image3D; D3DTexture3dDesc textureDesc = {}; sharingFcns->getTexture3dDesc(&textureDesc, d3dTexture); if (subresource >= textureDesc.MipLevels) { err.set(CL_INVALID_VALUE); return nullptr; } bool sharedResource = false; D3DTexture3d *textureStaging = nullptr; if ((textureDesc.MiscFlags & D3DResourceFlags::MISC_SHARED || textureDesc.MiscFlags & D3DResourceFlags::MISC_SHARED_KEYEDMUTEX) && subresource == 0) { textureStaging = d3dTexture; sharedResource = true; } else { sharingFcns->createTexture3d(&textureStaging, &textureDesc, subresource); } GraphicsAllocation *alloc = nullptr; auto memoryManager = context->getMemoryManager(); auto rootDeviceIndex = context->getDevice(0)->getRootDeviceIndex(); if (textureDesc.MiscFlags & D3DResourceFlags::MISC_SHARED_NTHANDLE) { sharingFcns->getSharedNTHandle(textureStaging, &sharedHandle); alloc = memoryManager->createGraphicsAllocationFromNTHandle(sharedHandle, rootDeviceIndex); } else { sharingFcns->getSharedHandle(textureStaging, &sharedHandle); AllocationProperties allocProperties(rootDeviceIndex, nullptr, false, GraphicsAllocation::AllocationType::SHARED_IMAGE, false); alloc = memoryManager->createGraphicsAllocationFromSharedHandle(toOsHandle(sharedHandle), allocProperties, false); } DEBUG_BREAK_IF(!alloc); updateImgInfoAndDesc(alloc->getDefaultGmm(), imgInfo, ImagePlane::NO_PLANE, 0u); auto d3dTextureObj = new D3DTexture(context, d3dTexture, subresource, textureStaging, sharedResource); auto *clSurfaceFormat = findSurfaceFormatInfo(alloc->getDefaultGmm()->gmmResourceInfo->getResourceFormat(), flags, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); imgInfo.qPitch = alloc->getDefaultGmm()->queryQPitch(GMM_RESOURCE_TYPE::RESOURCE_3D); imgInfo.surfaceFormat = &clSurfaceFormat->surfaceFormat; auto hwInfo = memoryManager->peekExecutionEnvironment().rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily); if (alloc->getDefaultGmm()->unifiedAuxTranslationCapable()) { alloc->getDefaultGmm()->isRenderCompressed = hwHelper.isPageTableManagerSupported(*hwInfo) ? memoryManager->mapAuxGpuVA(alloc) : true; } return Image::createSharedImage(context, d3dTextureObj, mcsSurfaceInfo, alloc, nullptr, flags, clSurfaceFormat, imgInfo, __GMM_NO_CUBE_MAP, 0, 0); } template const ClSurfaceFormatInfo *D3DTexture::findYuvSurfaceFormatInfo(DXGI_FORMAT dxgiFormat, ImagePlane imagePlane, cl_mem_flags flags) { cl_image_format imgFormat = {}; if (imagePlane == ImagePlane::PLANE_Y) { imgFormat.image_channel_order = CL_R; } else { imgFormat.image_channel_order = CL_RG; } if ((dxgiFormat == DXGI_FORMAT_P010) || (dxgiFormat == DXGI_FORMAT_P016)) { imgFormat.image_channel_data_type = CL_UNORM_INT16; } else { imgFormat.image_channel_data_type = CL_UNORM_INT8; } return Image::getSurfaceFormatFromTable(flags, &imgFormat, 12); } compute-runtime-20.13.16352/opencl/source/sharings/d3d/d3d_texture.h000066400000000000000000000023371363734646600250350ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/d3d/d3d_sharing.h" namespace NEO { class Context; class Image; template class D3DTexture : public D3DSharing { typedef typename D3D::D3DTexture2dDesc D3DTexture2dDesc; typedef typename D3D::D3DTexture3dDesc D3DTexture3dDesc; typedef typename D3D::D3DTexture2d D3DTexture2d; typedef typename D3D::D3DTexture3d D3DTexture3d; typedef typename D3D::D3DResource D3DResource; public: ~D3DTexture() override = default; static Image *create2d(Context *context, D3DTexture2d *d3dTexture, cl_mem_flags flags, cl_uint subresource, cl_int *retCode); static Image *create3d(Context *context, D3DTexture3d *d3dTexture, cl_mem_flags flags, cl_uint subresource, cl_int *retCode); static const ClSurfaceFormatInfo *findYuvSurfaceFormatInfo(DXGI_FORMAT dxgiFormat, ImagePlane imagePlane, cl_mem_flags flags); protected: D3DTexture(Context *context, D3DResource *d3dTexture, cl_uint subresource, D3DResource *textureStaging, bool sharedResource) : D3DSharing(context, d3dTexture, textureStaging, subresource, sharedResource){}; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/d3d/enable_d3d.cpp000066400000000000000000000173571363734646600251260ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #ifdef WIN32 #include "opencl/source/sharings/d3d/enable_d3d.h" #include "shared/source/os_interface/driver_info.h" #include "opencl/source/api/api.h" #include "opencl/source/context/context.h" #include "opencl/source/context/context.inl" #include "opencl/source/os_interface/windows/d3d_sharing_functions.h" #include "opencl/source/sharings/d3d/cl_d3d_api.h" #include "opencl/source/sharings/sharing_factory.h" #include "opencl/source/sharings/sharing_factory.inl" #include namespace NEO { bool D3DSharingContextBuilder::processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue, cl_int &errcodeRet) { if (contextData.get() == nullptr) { contextData = std::make_unique>(); } bool res = false; switch (propertyType) { case CL_CONTEXT_ADAPTER_D3D9_KHR: case CL_CONTEXT_ADAPTER_D3D9EX_KHR: case CL_CONTEXT_ADAPTER_DXVA_KHR: case CL_CONTEXT_D3D9_DEVICE_INTEL: case CL_CONTEXT_D3D9EX_DEVICE_INTEL: case CL_CONTEXT_DXVA_DEVICE_INTEL: contextData->pDevice = (D3DTypesHelper::D3D9::D3DDevice *)propertyValue; contextData->argumentsDefined = true; res = true; break; } return res; } bool D3DSharingContextBuilder::processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue, cl_int &errcodeRet) { if (contextData.get() == nullptr) { contextData = std::make_unique>(); } bool res = false; switch (propertyType) { case CL_CONTEXT_D3D10_DEVICE_KHR: contextData->pDevice = (D3DTypesHelper::D3D10::D3DDevice *)propertyValue; contextData->argumentsDefined = true; res = true; break; } return res; } bool D3DSharingContextBuilder::processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue, cl_int &errcodeRet) { if (contextData.get() == nullptr) { contextData = std::make_unique>(); } bool res = false; switch (propertyType) { case CL_CONTEXT_D3D11_DEVICE_KHR: contextData->pDevice = (D3DTypesHelper::D3D11::D3DDevice *)propertyValue; contextData->argumentsDefined = true; res = true; break; } return res; } template <> void Context::registerSharing(D3DSharingFunctions *sharing) { this->sharingFunctions[D3DSharingFunctions::sharingId].reset(sharing); this->preferD3dSharedResources = 1u; } template <> void Context::registerSharing(D3DSharingFunctions *sharing) { this->sharingFunctions[D3DSharingFunctions::sharingId].reset(sharing); this->preferD3dSharedResources = 1u; } template bool D3DSharingContextBuilder::finalizeProperties(Context &context, int32_t &errcodeRet) { if (contextData.get() == nullptr) return true; if (contextData->argumentsDefined) { context.registerSharing(new D3DSharingFunctions(contextData->pDevice)); } return true; } template std::unique_ptr D3DSharingBuilderFactory::createContextBuilder() { return std::make_unique>(); }; std::string D3DSharingBuilderFactory::getExtensions() { return extensionEnabled ? "cl_intel_dx9_media_sharing cl_khr_dx9_media_sharing " : ""; } std::string D3DSharingBuilderFactory::getExtensions() { return "cl_khr_d3d10_sharing "; } std::string D3DSharingBuilderFactory::getExtensions() { return "cl_khr_d3d11_sharing cl_intel_d3d11_nv12_media_sharing "; } void D3DSharingBuilderFactory::fillGlobalDispatchTable() { icdGlobalDispatchTable.clGetDeviceIDsFromDX9MediaAdapterKHR = clGetDeviceIDsFromDX9MediaAdapterKHR; icdGlobalDispatchTable.clCreateFromDX9MediaSurfaceKHR = clCreateFromDX9MediaSurfaceKHR; icdGlobalDispatchTable.clEnqueueReleaseDX9MediaSurfacesKHR = clEnqueueReleaseDX9MediaSurfacesKHR; icdGlobalDispatchTable.clEnqueueAcquireDX9MediaSurfacesKHR = clEnqueueAcquireDX9MediaSurfacesKHR; crtGlobalDispatchTable.clGetDeviceIDsFromDX9INTEL = clGetDeviceIDsFromDX9INTEL; crtGlobalDispatchTable.clCreateFromDX9MediaSurfaceINTEL = clCreateFromDX9MediaSurfaceINTEL; crtGlobalDispatchTable.clEnqueueAcquireDX9ObjectsINTEL = clEnqueueAcquireDX9ObjectsINTEL; crtGlobalDispatchTable.clEnqueueReleaseDX9ObjectsINTEL = clEnqueueReleaseDX9ObjectsINTEL; } void D3DSharingBuilderFactory::fillGlobalDispatchTable() { icdGlobalDispatchTable.clCreateFromD3D10BufferKHR = clCreateFromD3D10BufferKHR; icdGlobalDispatchTable.clCreateFromD3D10Texture2DKHR = clCreateFromD3D10Texture2DKHR; icdGlobalDispatchTable.clCreateFromD3D10Texture3DKHR = clCreateFromD3D10Texture3DKHR; icdGlobalDispatchTable.clEnqueueAcquireD3D10ObjectsKHR = clEnqueueAcquireD3D10ObjectsKHR; icdGlobalDispatchTable.clEnqueueReleaseD3D10ObjectsKHR = clEnqueueReleaseD3D10ObjectsKHR; icdGlobalDispatchTable.clGetDeviceIDsFromD3D10KHR = clGetDeviceIDsFromD3D10KHR; } void D3DSharingBuilderFactory::fillGlobalDispatchTable() { icdGlobalDispatchTable.clCreateFromD3D11BufferKHR = clCreateFromD3D11BufferKHR; icdGlobalDispatchTable.clCreateFromD3D11Texture2DKHR = clCreateFromD3D11Texture2DKHR; icdGlobalDispatchTable.clCreateFromD3D11Texture3DKHR = clCreateFromD3D11Texture3DKHR; icdGlobalDispatchTable.clEnqueueAcquireD3D11ObjectsKHR = clEnqueueAcquireD3D11ObjectsKHR; icdGlobalDispatchTable.clEnqueueReleaseD3D11ObjectsKHR = clEnqueueReleaseD3D11ObjectsKHR; icdGlobalDispatchTable.clGetDeviceIDsFromD3D11KHR = clGetDeviceIDsFromD3D11KHR; } void *D3DSharingBuilderFactory::getExtensionFunctionAddress(const std::string &functionName) { if (DebugManager.flags.EnableFormatQuery.get() && functionName == "clGetSupportedDX9MediaSurfaceFormatsINTEL") { return ((void *)(clGetSupportedDX9MediaSurfaceFormatsINTEL)); } return nullptr; } void *D3DSharingBuilderFactory::getExtensionFunctionAddress(const std::string &functionName) { if (DebugManager.flags.EnableFormatQuery.get() && functionName == "clGetSupportedD3D10TextureFormatsINTEL") { return ((void *)(clGetSupportedD3D10TextureFormatsINTEL)); } return nullptr; } void *D3DSharingBuilderFactory::getExtensionFunctionAddress(const std::string &functionName) { if (DebugManager.flags.EnableFormatQuery.get() && functionName == "clGetSupportedD3D11TextureFormatsINTEL") { return ((void *)(clGetSupportedD3D11TextureFormatsINTEL)); } return nullptr; } void D3DSharingBuilderFactory::setExtensionEnabled(DriverInfo *driverInfo) { extensionEnabled = driverInfo->getMediaSharingSupport(); } void D3DSharingBuilderFactory::setExtensionEnabled(DriverInfo *driverInfo) {} void D3DSharingBuilderFactory::setExtensionEnabled(DriverInfo *driverInfo) {} static SharingFactory::RegisterSharing, D3DSharingFunctions> D3D9Sharing; static SharingFactory::RegisterSharing, D3DSharingFunctions> D3D10Sharing; static SharingFactory::RegisterSharing, D3DSharingFunctions> D3D11Sharing; } // namespace NEO #endif compute-runtime-20.13.16352/opencl/source/sharings/d3d/enable_d3d.h000066400000000000000000000023111363734646600245530ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/sharing_factory.h" #include namespace NEO { class Context; class DriverInfo; template struct D3DCreateContextProperties { typename D3D::D3DDevice *pDevice = nullptr; bool argumentsDefined = false; }; template class D3DSharingContextBuilder : public SharingContextBuilder { protected: std::unique_ptr> contextData; public: bool processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue, cl_int &errcodeRet) override; bool finalizeProperties(Context &context, int32_t &errcodeRet) override; }; template class D3DSharingBuilderFactory : public SharingBuilderFactory { public: std::unique_ptr createContextBuilder() override; std::string getExtensions() override; void fillGlobalDispatchTable() override; void *getExtensionFunctionAddress(const std::string &functionName) override; void setExtensionEnabled(DriverInfo *driverInfo) override; bool extensionEnabled = true; }; } // namespace NEOcompute-runtime-20.13.16352/opencl/source/sharings/gl/000077500000000000000000000000001363734646600223555ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/sharings/gl/CMakeLists.txt000066400000000000000000000023771363734646600251260ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(RUNTIME_SRCS_SHARINGS_GL ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cl_gl_api_intel.h ${CMAKE_CURRENT_SOURCE_DIR}/gl_arb_sync_event.h ${CMAKE_CURRENT_SOURCE_DIR}/gl_buffer.h ${CMAKE_CURRENT_SOURCE_DIR}/gl_cl_image_format.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_context_guard.h ${CMAKE_CURRENT_SOURCE_DIR}/gl_sharing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_sharing.h ${CMAKE_CURRENT_SOURCE_DIR}/gl_sync_event.h ${CMAKE_CURRENT_SOURCE_DIR}/gl_texture.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_SHARINGS_GL}) add_subdirectories() set(ADDITIONAL_EXPORTS "clEnqueueMarkerWithSyncObjectINTEL" "clGetCLObjectInfoINTEL" "clGetCLEventInfoINTEL" "clReleaseGlSharedEventINTEL") foreach(EXPORT_NAME ${ADDITIONAL_EXPORTS}) set(MSVC_DEF_ADDITIONAL_EXPORTS "${MSVC_DEF_ADDITIONAL_EXPORTS}\n${EXPORT_NAME}") endforeach(EXPORT_NAME) set(MSVC_DEF_ADDITIONAL_EXPORTS "${MSVC_DEF_ADDITIONAL_EXPORTS}" PARENT_SCOPE) endif() set_property(GLOBAL PROPERTY RUNTIME_SRCS_SHARINGS_GL ${RUNTIME_SRCS_SHARINGS_GL}) compute-runtime-20.13.16352/opencl/source/sharings/gl/cl_gl_api_intel.h000066400000000000000000000005431363734646600256340ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl_gl.h" extern cl_int CL_API_CALL clGetSupportedGLTextureFormatsINTEL( cl_context context, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint numEntries, cl_GLenum *glFormats, cl_uint *numTextureFormats); compute-runtime-20.13.16352/opencl/source/sharings/gl/gl_arb_sync_event.h000066400000000000000000000035611363734646600262160ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/event/event.h" struct _tagCLGLSyncInfo; typedef _tagCLGLSyncInfo CL_GL_SYNC_INFO; namespace NEO { class Context; class GLSharingFunctions; class OsInterface; class OsContext; char *createArbSyncEventName(); void destroyArbSyncEventName(char *name); void cleanupArbSyncObject(OSInterface &osInterface, CL_GL_SYNC_INFO *glSyncInfo); bool setupArbSyncObject(GLSharingFunctions &sharing, OSInterface &osInterface, CL_GL_SYNC_INFO &glSyncInfo); void signalArbSyncObject(OsContext &osContext, CL_GL_SYNC_INFO &glSyncInfo); void serverWaitForArbSyncObject(OSInterface &osInterface, CL_GL_SYNC_INFO &glSyncInfo); class GlArbSyncEvent : public Event { public: GlArbSyncEvent() = delete; ~GlArbSyncEvent() override; void unblockEventBy(Event &event, uint32_t taskLevel, int32_t transitionStatus) override; static GlArbSyncEvent *create(Event &baseEvent); CL_GL_SYNC_INFO *getSyncInfo() { return glSyncInfo.get(); } protected: GlArbSyncEvent(Context &context); MOCKABLE_VIRTUAL bool setBaseEvent(Event &ev); Event *baseEvent = nullptr; OSInterface *osInterface = nullptr; std::unique_ptr glSyncInfo; }; } // namespace NEO extern "C" CL_API_ENTRY cl_int CL_API_CALL clEnqueueMarkerWithSyncObjectINTEL(cl_command_queue commandQueue, cl_event *event, cl_context *context); extern "C" CL_API_ENTRY cl_int CL_API_CALL clGetCLObjectInfoINTEL(cl_mem memObj, void *pResourceInfo); extern "C" CL_API_ENTRY cl_int CL_API_CALL clGetCLEventInfoINTEL(cl_event event, CL_GL_SYNC_INFO **pSyncInfoHandleRet, cl_context *pClContextRet); extern "C" CL_API_ENTRY cl_int CL_API_CALL clReleaseGlSharedEventINTEL(cl_event event); compute-runtime-20.13.16352/opencl/source/sharings/gl/gl_buffer.h000066400000000000000000000021061363734646600244600ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/gl/gl_sharing.h" #include "CL/cl_gl.h" struct _tagCLGLBufferInfo; namespace NEO { class Buffer; class Context; class GlBuffer : public GlSharing { public: static Buffer *createSharedGlBuffer(Context *context, cl_mem_flags flags, unsigned int bufferId, cl_int *errcodeRet); void synchronizeObject(UpdateData &updateData) override; void releaseReusedGraphicsAllocation() override; protected: GlBuffer(GLSharingFunctions *sharingFunctions, unsigned int glObjectId) : GlSharing(sharingFunctions, CL_GL_OBJECT_BUFFER, glObjectId){}; void releaseResource(MemObj *memObject) override; void resolveGraphicsAllocationChange(osHandle currentSharedHandle, UpdateData *updateData) override; void popGraphicsAllocationFromReuse(GraphicsAllocation *graphicsAllocation); static GraphicsAllocation *createGraphicsAllocation(Context *context, unsigned int bufferId, _tagCLGLBufferInfo &bufferInfo); }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/gl/gl_cl_image_format.cpp000066400000000000000000000013641363734646600266570ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/extensions/public/cl_gl_private_intel.h" #include "opencl/source/sharings/gl/gl_sharing.h" #include "opencl/source/sharings/gl/gl_texture.h" #include "GL/gl.h" #include "config.h" namespace NEO { bool GlTexture::setClImageFormat(int glFormat, cl_image_format &clImgFormat) { auto clFormat = GlSharing::gLToCLFormats.find(static_cast(glFormat)); if (clFormat != GlSharing::gLToCLFormats.end()) { clImgFormat.image_channel_data_type = clFormat->second.image_channel_data_type; clImgFormat.image_channel_order = clFormat->second.image_channel_order; return true; } return false; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/gl/gl_context_guard.h000066400000000000000000000010131363734646600260510ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/os_interface/windows/gl/gl_sharing_os.h" #include "opencl/source/sharings/gl/gl_sharing.h" namespace NEO { class GLContextGuard { public: GLContextGuard() = delete; GLContextGuard(GLSharingFunctions &sharingFcns); ~GLContextGuard(); protected: GLSharingFunctions *sharingFunctions; GLContext currentContextHandle; GLDisplay currentDisplayHandle; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/gl/gl_sharing.cpp000066400000000000000000000062731363734646600252060ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/gl/gl_sharing.h" #include "shared/source/helpers/string.h" #include "shared/source/helpers/timestamp_packet.h" #include "opencl/source/context/context.inl" #include "opencl/source/sharings/gl/gl_context_guard.h" #include "opencl/source/sharings/sharing_factory.h" #include namespace NEO { const uint32_t GLSharingFunctions::sharingId = SharingType::CLGL_SHARING; const std::unordered_map GlSharing::gLToCLFormats = { {GL_RGBA8, {CL_RGBA, CL_UNORM_INT8}}, {GL_RGBA8I, {CL_RGBA, CL_SIGNED_INT8}}, {GL_RGBA16, {CL_RGBA, CL_UNORM_INT16}}, {GL_RGBA16I, {CL_RGBA, CL_SIGNED_INT16}}, {GL_RGBA32I, {CL_RGBA, CL_SIGNED_INT32}}, {GL_RGBA8UI, {CL_RGBA, CL_UNSIGNED_INT8}}, {GL_RGBA16UI, {CL_RGBA, CL_UNSIGNED_INT16}}, {GL_RGBA32UI, {CL_RGBA, CL_UNSIGNED_INT32}}, {GL_RGBA16F, {CL_RGBA, CL_HALF_FLOAT}}, {GL_RGBA32F, {CL_RGBA, CL_FLOAT}}, {GL_RGBA, {CL_RGBA, CL_UNORM_INT8}}, {GL_RGBA8_SNORM, {CL_RGBA, CL_SNORM_INT8}}, {GL_RGBA16_SNORM, {CL_RGBA, CL_SNORM_INT16}}, {GL_BGRA, {CL_BGRA, CL_UNORM_INT8}}, {GL_R8, {CL_R, CL_UNORM_INT8}}, {GL_R8_SNORM, {CL_R, CL_SNORM_INT8}}, {GL_R16, {CL_R, CL_UNORM_INT16}}, {GL_R16_SNORM, {CL_R, CL_SNORM_INT16}}, {GL_R16F, {CL_R, CL_HALF_FLOAT}}, {GL_R32F, {CL_R, CL_FLOAT}}, {GL_R8I, {CL_R, CL_SIGNED_INT8}}, {GL_R16I, {CL_R, CL_SIGNED_INT16}}, {GL_R32I, {CL_R, CL_SIGNED_INT32}}, {GL_R8UI, {CL_R, CL_UNSIGNED_INT8}}, {GL_R16UI, {CL_R, CL_UNSIGNED_INT16}}, {GL_R32UI, {CL_R, CL_UNSIGNED_INT32}}, {GL_DEPTH_COMPONENT32F, {CL_DEPTH, CL_FLOAT}}, {GL_DEPTH_COMPONENT16, {CL_DEPTH, CL_UNORM_INT16}}, {GL_DEPTH24_STENCIL8, {CL_DEPTH_STENCIL, CL_UNORM_INT24}}, {GL_DEPTH32F_STENCIL8, {CL_DEPTH_STENCIL, CL_FLOAT}}, {GL_SRGB8_ALPHA8, {CL_sRGBA, CL_UNORM_INT8}}, {GL_RG8, {CL_RG, CL_UNORM_INT8}}, {GL_RG8_SNORM, {CL_RG, CL_SNORM_INT8}}, {GL_RG16, {CL_RG, CL_UNORM_INT16}}, {GL_RG16_SNORM, {CL_RG, CL_SNORM_INT16}}, {GL_RG16F, {CL_RG, CL_HALF_FLOAT}}, {GL_RG32F, {CL_RG, CL_FLOAT}}, {GL_RG8I, {CL_RG, CL_SIGNED_INT8}}, {GL_RG16I, {CL_RG, CL_SIGNED_INT16}}, {GL_RG32I, {CL_RG, CL_SIGNED_INT32}}, {GL_RG8UI, {CL_RG, CL_UNSIGNED_INT8}}, {GL_RG16UI, {CL_RG, CL_UNSIGNED_INT16}}, {GL_RG32UI, {CL_RG, CL_UNSIGNED_INT32}}, {GL_RGB10, {CL_RGBA, CL_UNORM_INT16}}}; int GlSharing::synchronizeHandler(UpdateData &updateData) { GLContextGuard guard(*sharingFunctions); synchronizeObject(updateData); return CL_SUCCESS; } char *createArbSyncEventName() { static std::atomic synchCounter{0}; uint32_t id = synchCounter++; constexpr int maxDigitsForId = std::numeric_limits::digits10; static const char prefix[] = "NEO_SYNC_"; constexpr int nameMaxLen = sizeof(prefix) + maxDigitsForId + 1; char *ret = new char[nameMaxLen]; snprintf(ret, nameMaxLen, "%s_%d", prefix, id); return ret; } void destroyArbSyncEventName(char *name) { delete[] name; } template GLSharingFunctions *Context::getSharing(); } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/gl/gl_sharing.h000066400000000000000000000042461363734646600246510ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/os_library.h" #include "opencl/source/sharings/sharing.h" #include "CL/cl.h" #include "GL/gl.h" #include "GL/glext.h" #include #include #include struct _tagCLGLSyncInfo; typedef struct _tagCLGLSyncInfo CL_GL_SYNC_INFO, *PCL_GL_SYNC_INFO; namespace NEO { class Event; class GlArbSyncEvent; class GLSharingFunctions; class OSInterface; class OsContext; typedef unsigned int OS_HANDLE; typedef struct CLGLContextInfo { OS_HANDLE DeviceHandle; OS_HANDLE ContextHandle; } ContextInfo; class GLSharingFunctions : public SharingFunctions { public: GLSharingFunctions() = default; uint32_t getId() const override { return GLSharingFunctions::sharingId; } static const uint32_t sharingId; static cl_int getSupportedFormats(cl_mem_flags flags, cl_mem_object_type imageType, size_t numEntries, cl_GLenum *formats, uint32_t *numImageFormats); virtual GLboolean initGLFunctions() = 0; virtual bool isOpenGlSharingSupported() = 0; }; class GlSharing : public SharingHandler { public: GlSharing(GLSharingFunctions *sharingFunctions, unsigned int glObjectType, unsigned int glObjectId) : sharingFunctions(sharingFunctions), clGlObjectType(glObjectType), clGlObjectId(glObjectId){}; GLSharingFunctions *peekFunctionsHandler() { return sharingFunctions; } void getGlObjectInfo(unsigned int *pClGlObjectType, unsigned int *pClGlObjectId) { if (pClGlObjectType) { *pClGlObjectType = clGlObjectType; } if (pClGlObjectId) { *pClGlObjectId = clGlObjectId; } } static const std::unordered_map gLToCLFormats; protected: int synchronizeHandler(UpdateData &updateData) override; GLSharingFunctions *sharingFunctions = nullptr; unsigned int clGlObjectType = 0u; unsigned int clGlObjectId = 0u; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/gl/gl_sync_event.h000066400000000000000000000013511363734646600253650ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/event/event.h" #include struct _tagGLCLSyncInfo; typedef _tagGLCLSyncInfo GL_CL_SYNC_INFO; namespace NEO { class Context; class GlSyncEvent : public Event { public: GlSyncEvent() = delete; GlSyncEvent(Context &context, const GL_CL_SYNC_INFO &sync); ~GlSyncEvent() override; static GlSyncEvent *create(Context &context, cl_GLsync sync, cl_int *errCode); void updateExecutionStatus() override; uint32_t getTaskLevel() override; bool isExternallySynchronized() const override { return true; } protected: std::unique_ptr glSync; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/gl/gl_texture.h000066400000000000000000000032661363734646600247170ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/extensions/public/cl_gl_private_intel.h" #include "opencl/source/sharings/gl/gl_sharing.h" #include "CL/cl_gl.h" namespace NEO { class Context; class Image; class GlTexture : GlSharing { public: static Image *createSharedGlTexture(Context *context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int *errcodeRet); void synchronizeObject(UpdateData &updateData) override; cl_int getGlTextureInfo(cl_gl_texture_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const; cl_GLint getMiplevel() const { return miplevel; } CL_GL_RESOURCE_INFO *getTextureInfo() { return &textureInfo; } cl_GLenum getTarget() const { return target; } static bool setClImageFormat(int glFormat, cl_image_format &clImgFormat); static cl_mem_object_type getClMemObjectType(cl_GLenum glType); static cl_gl_object_type getClGlObjectType(cl_GLenum glType); static cl_GLenum getBaseTargetType(cl_GLenum target); protected: GlTexture(GLSharingFunctions *sharingFunctions, unsigned int glObjectType, unsigned int glObjectId, CL_GL_RESOURCE_INFO texInfo, cl_GLenum target, cl_GLint miplevel) : GlSharing(sharingFunctions, glObjectType, glObjectId), target(target), miplevel(miplevel), textureInfo(texInfo){}; static uint32_t getClObjectType(cl_GLenum glType, bool returnClGlObjectType); void releaseResource(MemObj *memObject) override; cl_GLenum target; cl_GLint miplevel; CL_GL_RESOURCE_INFO textureInfo; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/gl/windows/000077500000000000000000000000001363734646600240475ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/sharings/gl/windows/CMakeLists.txt000066400000000000000000000023631363734646600266130ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(RUNTIME_SRCS_SHARINGS_GL_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cl_gl_api.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_arb_sync_event_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_buffer_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_context_guard_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_sharing_windows.h ${CMAKE_CURRENT_SOURCE_DIR}/gl_sharing_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_sync_event_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_texture_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/win_enable_gl.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_SHARINGS_GL_WINDOWS}) set(RUNTIME_SRCS_SHARINGS_GL_ENABLE_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/win_enable_gl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/win_enable_gl.h ) target_sources(${SHARINGS_ENABLE_LIB_NAME} PRIVATE ${RUNTIME_SRCS_SHARINGS_GL_ENABLE_WINDOWS}) endif() set_property(GLOBAL PROPERTY RUNTIME_SRCS_SHARINGS_GL_WINDOWS ${RUNTIME_SRCS_SHARINGS_GL_WINDOWS}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_SHARINGS_GL_ENABLE_WINDOWS ${RUNTIME_SRCS_SHARINGS_GL_ENABLE_WINDOWS}) compute-runtime-20.13.16352/opencl/source/sharings/gl/windows/cl_gl_api.cpp000066400000000000000000000364411363734646600264740ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/get_info.h" #include "shared/source/utilities/api_intercept.h" #include "opencl/source/api/api.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/event/async_events_handler.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/helpers/validators.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/mem_obj/mem_obj.h" #include "opencl/source/platform/platform.h" #include "opencl/source/sharings/gl/gl_buffer.h" #include "opencl/source/sharings/gl/gl_sync_event.h" #include "opencl/source/sharings/gl/gl_texture.h" #include "opencl/source/sharings/gl/windows/gl_sharing_windows.h" #include "opencl/source/tracing/tracing_notify.h" #include "CL/cl.h" #include "CL/cl_gl.h" #include "config.h" using namespace NEO; cl_mem CL_API_CALL clCreateFromGLBuffer(cl_context context, cl_mem_flags flags, cl_GLuint bufobj, cl_int *errcodeRet) { TRACING_ENTER(clCreateFromGLBuffer, &context, &flags, &bufobj, &errcodeRet); API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "bufobj", bufobj); Context *pContext = nullptr; auto returnCode = validateObjects(WithCastToInternal(context, &pContext)); ErrorCodeHelper err(errcodeRet, returnCode); if (returnCode != CL_SUCCESS) { cl_mem buffer = nullptr; TRACING_EXIT(clCreateFromGLBuffer, &buffer); return buffer; } if (pContext->getSharing() == nullptr) { err.set(CL_INVALID_CONTEXT); cl_mem buffer = nullptr; TRACING_EXIT(clCreateFromGLBuffer, &buffer); return buffer; } cl_mem buffer = GlBuffer::createSharedGlBuffer(pContext, flags, bufobj, errcodeRet); TRACING_EXIT(clCreateFromGLBuffer, &buffer); return buffer; } cl_mem CL_API_CALL clCreateFromGLTexture(cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int *errcodeRet) { TRACING_ENTER(clCreateFromGLTexture, &context, &flags, &target, &miplevel, &texture, &errcodeRet); API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "target", target, "miplevel", miplevel, "texture", texture); Context *pContext = nullptr; auto returnCode = validateObjects(WithCastToInternal(context, &pContext)); ErrorCodeHelper err(errcodeRet, returnCode); cl_mem image = nullptr; if (returnCode != CL_SUCCESS) { TRACING_EXIT(clCreateFromGLTexture, &image); return image; } if (pContext->getSharing() == nullptr) { err.set(CL_INVALID_CONTEXT); TRACING_EXIT(clCreateFromGLTexture, &image); return image; } image = GlTexture::createSharedGlTexture(pContext, flags, target, miplevel, texture, errcodeRet); DBG_LOG_INPUTS("image", image); TRACING_EXIT(clCreateFromGLTexture, &image); return image; } // deprecated OpenCL 1.1 cl_mem CL_API_CALL clCreateFromGLTexture2D(cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int *errcodeRet) { TRACING_ENTER(clCreateFromGLTexture2D, &context, &flags, &target, &miplevel, &texture, &errcodeRet); API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "target", target, "miplevel", miplevel, "texture", texture); Context *pContext = nullptr; auto returnCode = validateObjects(WithCastToInternal(context, &pContext)); ErrorCodeHelper err(errcodeRet, returnCode); cl_mem image = nullptr; if (returnCode != CL_SUCCESS) { TRACING_EXIT(clCreateFromGLTexture2D, &image); return image; } if (pContext->getSharing() == nullptr) { err.set(CL_INVALID_CONTEXT); TRACING_EXIT(clCreateFromGLTexture2D, &image); return image; } image = GlTexture::createSharedGlTexture(pContext, flags, target, miplevel, texture, errcodeRet); DBG_LOG_INPUTS("image", image); TRACING_EXIT(clCreateFromGLTexture2D, &image); return image; } // deprecated OpenCL 1.1 cl_mem CL_API_CALL clCreateFromGLTexture3D(cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int *errcodeRet) { TRACING_ENTER(clCreateFromGLTexture3D, &context, &flags, &target, &miplevel, &texture, &errcodeRet); API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "target", target, "miplevel", miplevel, "texture", texture); Context *pContext = nullptr; auto returnCode = validateObjects(WithCastToInternal(context, &pContext)); ErrorCodeHelper err(errcodeRet, returnCode); cl_mem image = nullptr; if (returnCode != CL_SUCCESS) { TRACING_EXIT(clCreateFromGLTexture3D, &image); return image; } if (pContext->getSharing() == nullptr) { err.set(CL_INVALID_CONTEXT); TRACING_EXIT(clCreateFromGLTexture3D, &image); return image; } image = GlTexture::createSharedGlTexture(pContext, flags, target, miplevel, texture, errcodeRet); DBG_LOG_INPUTS("image", image); TRACING_EXIT(clCreateFromGLTexture3D, &image); return image; } cl_mem CL_API_CALL clCreateFromGLRenderbuffer(cl_context context, cl_mem_flags flags, cl_GLuint renderbuffer, cl_int *errcodeRet) { TRACING_ENTER(clCreateFromGLRenderbuffer, &context, &flags, &renderbuffer, &errcodeRet); API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "flags", flags, "renderbuffer", renderbuffer); Context *pContext = nullptr; auto returnCode = validateObjects(WithCastToInternal(context, &pContext)); ErrorCodeHelper err(errcodeRet, returnCode); if (returnCode != CL_SUCCESS) { cl_mem buffer = nullptr; TRACING_EXIT(clCreateFromGLRenderbuffer, &buffer); return buffer; } if (pContext->getSharing() == nullptr) { err.set(CL_INVALID_CONTEXT); cl_mem buffer = nullptr; TRACING_EXIT(clCreateFromGLRenderbuffer, &buffer); return buffer; } cl_mem buffer = GlTexture::createSharedGlTexture(pContext, flags, GL_RENDERBUFFER_EXT, 0, renderbuffer, errcodeRet); TRACING_EXIT(clCreateFromGLRenderbuffer, &buffer); return buffer; } cl_int CL_API_CALL clGetGLObjectInfo(cl_mem memobj, cl_gl_object_type *glObjectType, cl_GLuint *glObjectName) { TRACING_ENTER(clGetGLObjectInfo, &memobj, &glObjectType, &glObjectName); cl_int retValue = CL_SUCCESS; API_ENTER(&retValue); DBG_LOG_INPUTS("memobj", memobj, "glObjectType", glObjectType, "glObjectName", glObjectName); retValue = validateObjects(memobj); if (retValue == CL_SUCCESS) { auto pMemObj = castToObject(memobj); auto handler = (GlSharing *)pMemObj->peekSharingHandler(); if (handler != nullptr) { handler->getGlObjectInfo(glObjectType, glObjectName); } else { retValue = CL_INVALID_GL_OBJECT; TRACING_EXIT(clGetGLObjectInfo, &retValue); return retValue; } } TRACING_EXIT(clGetGLObjectInfo, &retValue); return retValue; } cl_int CL_API_CALL clGetGLTextureInfo(cl_mem memobj, cl_gl_texture_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { TRACING_ENTER(clGetGLTextureInfo, &memobj, ¶mName, ¶mValueSize, ¶mValue, ¶mValueSizeRet); cl_int retValue = CL_SUCCESS; API_ENTER(&retValue); DBG_LOG_INPUTS("memobj", memobj, "paramName", paramName, "paramValueSize", paramValueSize, "paramValueSize", FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", FileLoggerInstance().getInput(paramValueSizeRet, 0)); retValue = validateObjects(memobj); if (retValue == CL_SUCCESS) { auto pMemObj = castToObject(memobj); auto glTexture = (GlTexture *)pMemObj->peekSharingHandler(); retValue = glTexture->getGlTextureInfo(paramName, paramValueSize, paramValue, paramValueSizeRet); } TRACING_EXIT(clGetGLTextureInfo, &retValue); return retValue; } cl_int CL_API_CALL clEnqueueAcquireGLObjects(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueAcquireGLObjects, &commandQueue, &numObjects, &memObjects, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "numObjects", numObjects, "memObjects", memObjects, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; retVal = validateObjects(WithCastToInternal(commandQueue, &pCommandQueue), EventWaitList(numEventsInWaitList, eventWaitList)); if (retVal == CL_SUCCESS) { if (pCommandQueue->getContext().getSharing() == nullptr) { retVal = CL_INVALID_CONTEXT; TRACING_EXIT(clEnqueueAcquireGLObjects, &retVal); return retVal; } for (auto id = 0u; id < numEventsInWaitList; id++) { auto event = castToObjectOrAbort(eventWaitList[id]); event->updateExecutionStatus(); if ((event->peekExecutionStatus() > CL_COMPLETE) && (event->isExternallySynchronized())) { if (DebugManager.flags.EnableAsyncEventsHandler.get()) { platformsImpl[0]->getAsyncEventsHandler()->registerEvent(event); } } } retVal = pCommandQueue->enqueueAcquireSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_ACQUIRE_GL_OBJECTS); } TRACING_EXIT(clEnqueueAcquireGLObjects, &retVal); return retVal; } cl_int CL_API_CALL clEnqueueReleaseGLObjects(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { TRACING_ENTER(clEnqueueReleaseGLObjects, &commandQueue, &numObjects, &memObjects, &numEventsInWaitList, &eventWaitList, &event); cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("commandQueue", commandQueue, "numObjects", numObjects, "memObjects", memObjects, "numEventsInWaitList", numEventsInWaitList, "eventWaitList", FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; retVal = validateObjects(WithCastToInternal(commandQueue, &pCommandQueue), EventWaitList(numEventsInWaitList, eventWaitList)); if (retVal == CL_SUCCESS) { if (pCommandQueue->getContext().getSharing() == nullptr) { retVal = CL_INVALID_CONTEXT; TRACING_EXIT(clEnqueueReleaseGLObjects, &retVal); return retVal; } pCommandQueue->finish(); retVal = pCommandQueue->enqueueReleaseSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_RELEASE_GL_OBJECTS); } TRACING_EXIT(clEnqueueReleaseGLObjects, &retVal); return retVal; } cl_event CL_API_CALL clCreateEventFromGLsyncKHR(cl_context context, cl_GLsync sync, cl_int *errcodeRet) { API_ENTER(errcodeRet); DBG_LOG_INPUTS("context", context, "sync", sync); Context *pContext = nullptr; auto returnCode = validateObjects(WithCastToInternal(context, &pContext)); ErrorCodeHelper err(errcodeRet, returnCode); if (returnCode != CL_SUCCESS) { return nullptr; } if (pContext->getSharing() == nullptr) { err.set(CL_INVALID_CONTEXT); return nullptr; } return GlSyncEvent::create(*pContext, sync, errcodeRet); } cl_int CL_API_CALL clGetGLContextInfoKHR(const cl_context_properties *properties, cl_gl_context_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { cl_int retVal = CL_SUCCESS; API_ENTER(&retVal); DBG_LOG_INPUTS("properties", properties, "paramName", paramName, "paramValueSize", paramValueSize, "paramValue", FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", FileLoggerInstance().getInput(paramValueSizeRet, 0)); GetInfoHelper info(paramValue, paramValueSize, paramValueSizeRet); uint32_t GLHGLRCHandle = 0; uint32_t GLHDCHandle = 0; uint32_t propertyType = 0; uint32_t propertyValue = 0; Platform *platform = nullptr; if (properties != nullptr) { while (*properties != 0) { propertyType = static_cast(properties[0]); propertyValue = static_cast(properties[1]); switch (propertyType) { case CL_CONTEXT_PLATFORM: { platform = castToObject(reinterpret_cast(properties[1])); } break; case CL_GL_CONTEXT_KHR: GLHGLRCHandle = propertyValue; break; case CL_WGL_HDC_KHR: GLHDCHandle = propertyValue; break; } properties += 2; } } if ((GLHDCHandle == 0) || (GLHGLRCHandle == 0)) { retVal = CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR; return retVal; } auto glSharing = std::make_unique(); glSharing->initGLFunctions(); if (glSharing->isOpenGlSharingSupported() == false) { retVal = CL_INVALID_CONTEXT; return retVal; } if (paramName == CL_DEVICES_FOR_GL_CONTEXT_KHR || paramName == CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR) { if (platform) { info.set(platform->getClDevice(0)); } else { info.set(platformsImpl[0]->getClDevice(0)); } return retVal; } retVal = CL_INVALID_VALUE; return retVal; } cl_int CL_API_CALL clGetSupportedGLTextureFormatsINTEL( cl_context context, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint numEntries, cl_GLenum *glFormats, cl_uint *numTextureFormats) { if (numTextureFormats) { *numTextureFormats = 0; } Context *pContext = castToObjectOrAbort(context); auto pSharing = pContext->getSharing(); if (!pSharing) { return CL_INVALID_CONTEXT; } return pSharing->getSupportedFormats(flags, imageType, numEntries, glFormats, numTextureFormats); } compute-runtime-20.13.16352/opencl/source/sharings/gl/windows/gl_arb_sync_event_windows.cpp000066400000000000000000000113131363734646600320070ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "opencl/extensions/public/cl_gl_private_intel.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/sharings/gl/gl_arb_sync_event.h" #include "opencl/source/sharings/gl/windows/gl_sharing_windows.h" #include namespace NEO { GlArbSyncEvent::GlArbSyncEvent(Context &context) : Event(&context, nullptr, CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR, CompletionStamp::levelNotReady, CompletionStamp::levelNotReady), glSyncInfo(std::make_unique()) { } bool GlArbSyncEvent::setBaseEvent(Event &ev) { UNRECOVERABLE_IF(this->baseEvent != nullptr); UNRECOVERABLE_IF(ev.getContext() == nullptr); UNRECOVERABLE_IF(ev.getCommandQueue() == nullptr); auto cmdQueue = ev.getCommandQueue(); auto osInterface = cmdQueue->getGpgpuCommandStreamReceiver().getOSInterface(); UNRECOVERABLE_IF(osInterface == nullptr); if (false == ctx->getSharing()->glArbSyncObjectSetup(*osInterface, *glSyncInfo)) { return false; } this->baseEvent = &ev; this->cmdQueue = cmdQueue; this->cmdQueue->incRefInternal(); this->baseEvent->incRefInternal(); this->osInterface = osInterface; ev.addChild(*this); return true; } GlArbSyncEvent::~GlArbSyncEvent() { if (baseEvent != nullptr) { ctx->getSharing()->glArbSyncObjectCleanup(*osInterface, glSyncInfo.get()); baseEvent->decRefInternal(); } } GlArbSyncEvent *GlArbSyncEvent::create(Event &baseEvent) { if (baseEvent.getContext() == nullptr) { return nullptr; } auto arbSyncEvent = new GlArbSyncEvent(*baseEvent.getContext()); if (false == arbSyncEvent->setBaseEvent(baseEvent)) { delete arbSyncEvent; arbSyncEvent = nullptr; } return arbSyncEvent; } void GlArbSyncEvent::unblockEventBy(Event &event, uint32_t taskLevel, int32_t transitionStatus) { DEBUG_BREAK_IF(&event != this->baseEvent); if ((transitionStatus > CL_SUBMITTED) || (transitionStatus < 0)) { return; } ctx->getSharing()->glArbSyncObjectSignal(event.getCommandQueue()->getGpgpuCommandStreamReceiver().getOsContext(), *glSyncInfo); ctx->getSharing()->glArbSyncObjectWaitServer(*osInterface, *glSyncInfo); } } // namespace NEO extern "C" CL_API_ENTRY cl_int CL_API_CALL clEnqueueMarkerWithSyncObjectINTEL(cl_command_queue commandQueue, cl_event *event, cl_context *context) { return CL_INVALID_OPERATION; } extern "C" CL_API_ENTRY cl_int CL_API_CALL clGetCLObjectInfoINTEL(cl_mem memObj, void *pResourceInfo) { return CL_INVALID_OPERATION; } extern "C" CL_API_ENTRY cl_int CL_API_CALL clGetCLEventInfoINTEL(cl_event event, PCL_GL_SYNC_INFO *pSyncInfoHandleRet, cl_context *pClContextRet) { if ((nullptr == pSyncInfoHandleRet) || (nullptr == pClContextRet)) { return CL_INVALID_ARG_VALUE; } auto neoEvent = NEO::castToObject(event); if (nullptr == neoEvent) { return CL_INVALID_EVENT; } if (neoEvent->getCommandType() != CL_COMMAND_RELEASE_GL_OBJECTS) { *pSyncInfoHandleRet = nullptr; *pClContextRet = static_cast(neoEvent->getContext()); return CL_SUCCESS; } auto sharing = neoEvent->getContext()->getSharing(); if (sharing == nullptr) { return CL_INVALID_OPERATION; } NEO::GlArbSyncEvent *arbSyncEvent = sharing->getOrCreateGlArbSyncEvent(*neoEvent); if (nullptr == arbSyncEvent) { return CL_OUT_OF_RESOURCES; } neoEvent->updateExecutionStatus(); CL_GL_SYNC_INFO *syncInfo = arbSyncEvent->getSyncInfo(); *pSyncInfoHandleRet = syncInfo; *pClContextRet = static_cast(neoEvent->getContext()); return CL_SUCCESS; } extern "C" CL_API_ENTRY cl_int CL_API_CALL clReleaseGlSharedEventINTEL(cl_event event) { auto neoEvent = NEO::castToObject(event); if (nullptr == neoEvent) { return CL_INVALID_EVENT; } auto arbSyncEvent = neoEvent->getContext()->getSharing()->getGlArbSyncEvent(*neoEvent); neoEvent->getContext()->getSharing()->removeGlArbSyncEventMapping(*neoEvent); if (nullptr != arbSyncEvent) { arbSyncEvent->release(); } neoEvent->release(); return CL_SUCCESS; } compute-runtime-20.13.16352/opencl/source/sharings/gl/windows/gl_buffer_windows.cpp000066400000000000000000000160121363734646600302600ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm.h" #include "shared/source/helpers/get_info.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/extensions/public/cl_gl_private_intel.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/sharings/gl/gl_buffer.h" #include "opencl/source/sharings/gl/windows/gl_sharing_windows.h" #include "config.h" using namespace NEO; Buffer *GlBuffer::createSharedGlBuffer(Context *context, cl_mem_flags flags, unsigned int bufferId, cl_int *errcodeRet) { ErrorCodeHelper errorCode(errcodeRet, CL_SUCCESS); CL_GL_BUFFER_INFO bufferInfo = {0}; bufferInfo.bufferName = bufferId; GLSharingFunctionsWindows *sharingFunctions = context->getSharing(); if (sharingFunctions->acquireSharedBufferINTEL(&bufferInfo) == GL_FALSE) { errorCode.set(CL_INVALID_GL_OBJECT); return nullptr; } auto graphicsAllocation = GlBuffer::createGraphicsAllocation(context, bufferId, bufferInfo); if (!graphicsAllocation) { errorCode.set(CL_INVALID_GL_OBJECT); return nullptr; } auto glHandler = new GlBuffer(sharingFunctions, bufferId); return Buffer::createSharedBuffer(context, flags, glHandler, graphicsAllocation); } void GlBuffer::synchronizeObject(UpdateData &updateData) { auto sharingFunctions = static_cast(this->sharingFunctions); CL_GL_BUFFER_INFO bufferInfo = {}; bufferInfo.bufferName = this->clGlObjectId; sharingFunctions->acquireSharedBufferINTEL(&bufferInfo); updateData.sharedHandle = bufferInfo.globalShareHandle; updateData.synchronizationStatus = SynchronizeStatus::ACQUIRE_SUCCESFUL; updateData.memObject->getGraphicsAllocation()->setAllocationOffset(bufferInfo.bufferOffset); const auto currentSharedHandle = updateData.memObject->getGraphicsAllocation()->peekSharedHandle(); if (currentSharedHandle != updateData.sharedHandle) { updateData.updateData = new CL_GL_BUFFER_INFO(bufferInfo); } } void GlBuffer::resolveGraphicsAllocationChange(osHandle currentSharedHandle, UpdateData *updateData) { const auto memObject = updateData->memObject; if (currentSharedHandle != updateData->sharedHandle) { const auto bufferInfo = std::unique_ptr(static_cast(updateData->updateData)); auto oldGraphicsAllocation = memObject->getGraphicsAllocation(); popGraphicsAllocationFromReuse(oldGraphicsAllocation); Context *context = memObject->getContext(); auto newGraphicsAllocation = createGraphicsAllocation(context, clGlObjectId, *bufferInfo); if (newGraphicsAllocation == nullptr) { updateData->synchronizationStatus = SynchronizeStatus::SYNCHRONIZE_ERROR; } else { updateData->synchronizationStatus = SynchronizeStatus::ACQUIRE_SUCCESFUL; } memObject->resetGraphicsAllocation(newGraphicsAllocation); if (updateData->synchronizationStatus == SynchronizeStatus::ACQUIRE_SUCCESFUL) { memObject->getGraphicsAllocation()->setAllocationOffset(bufferInfo->bufferOffset); } } } void GlBuffer::popGraphicsAllocationFromReuse(GraphicsAllocation *graphicsAllocation) { auto sharingFunctions = static_cast(this->sharingFunctions); std::unique_lock lock(sharingFunctions->mutex); auto &graphicsAllocations = sharingFunctions->graphicsAllocationsForGlBufferReuse; auto foundIter = std::find_if(graphicsAllocations.begin(), graphicsAllocations.end(), [&graphicsAllocation](const std::pair &entry) { return entry.second == graphicsAllocation; }); if (foundIter != graphicsAllocations.end()) { std::iter_swap(foundIter, graphicsAllocations.end() - 1); graphicsAllocations.pop_back(); } graphicsAllocation->decReuseCount(); } void GlBuffer::releaseReusedGraphicsAllocation() { auto sharingFunctions = static_cast(this->sharingFunctions); std::unique_lock lock(sharingFunctions->mutex); auto &allocationsVector = sharingFunctions->graphicsAllocationsForGlBufferReuse; auto itEnd = allocationsVector.end(); for (auto it = allocationsVector.begin(); it != itEnd; it++) { if (it->first == clGlObjectId) { it->second->decReuseCount(); if (it->second->peekReuseCount() == 0) { std::iter_swap(it, itEnd - 1); allocationsVector.pop_back(); } break; } } } GraphicsAllocation *GlBuffer::createGraphicsAllocation(Context *context, unsigned int bufferId, _tagCLGLBufferInfo &bufferInfo) { GLSharingFunctionsWindows *sharingFunctions = context->getSharing(); auto &allocationsVector = sharingFunctions->graphicsAllocationsForGlBufferReuse; GraphicsAllocation *graphicsAllocation = nullptr; bool reusedAllocation = false; std::unique_lock lock(sharingFunctions->mutex); auto endIter = allocationsVector.end(); auto foundIter = std::find_if(allocationsVector.begin(), endIter, [&bufferId](const std::pair &entry) { return entry.first == bufferId; }); if (foundIter != endIter) { graphicsAllocation = foundIter->second; reusedAllocation = true; } if (!graphicsAllocation) { AllocationProperties properties = {context->getDevice(0)->getRootDeviceIndex(), false, 0, GraphicsAllocation::AllocationType::SHARED_BUFFER, false}; // couldn't find allocation for reuse - create new graphicsAllocation = context->getMemoryManager()->createGraphicsAllocationFromSharedHandle(bufferInfo.globalShareHandle, properties, true); } if (!graphicsAllocation) { return nullptr; } graphicsAllocation->incReuseCount(); // decremented in releaseReusedGraphicsAllocation() called from MemObj destructor if (!reusedAllocation) { sharingFunctions->graphicsAllocationsForGlBufferReuse.push_back(std::make_pair(bufferId, graphicsAllocation)); if (bufferInfo.pGmmResInfo) { DEBUG_BREAK_IF(graphicsAllocation->getDefaultGmm() != nullptr); auto clientContext = context->getDevice(0)->getRootDeviceEnvironment().getGmmClientContext(); graphicsAllocation->setDefaultGmm(new Gmm(clientContext, bufferInfo.pGmmResInfo)); } } return graphicsAllocation; } void GlBuffer::releaseResource(MemObj *memObject) { auto sharingFunctions = static_cast(this->sharingFunctions); CL_GL_BUFFER_INFO bufferInfo = {}; bufferInfo.bufferName = this->clGlObjectId; sharingFunctions->releaseSharedBufferINTEL(&bufferInfo); } compute-runtime-20.13.16352/opencl/source/sharings/gl/windows/gl_context_guard_windows.cpp000066400000000000000000000024121363734646600316540ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/gl/gl_context_guard.h" #include "opencl/source/sharings/gl/windows/gl_sharing_windows.h" namespace NEO { GLContextGuard::GLContextGuard(GLSharingFunctions &sharingFcns) : sharingFunctions(&sharingFcns) { auto &sharing = *static_cast(sharingFunctions); currentContextHandle = sharing.getCurrentContext(); currentDisplayHandle = sharing.getCurrentDisplay(); auto ctxToMakeCurrent = sharing.getContextHandle(); if (currentContextHandle == 0) { ctxToMakeCurrent = sharing.getBackupContextHandle(); } if (currentContextHandle != sharing.getContextHandle() && currentContextHandle != sharing.getBackupContextHandle()) { if (sharing.makeCurrent(ctxToMakeCurrent) == GL_FALSE) { while (sharing.makeCurrent(sharing.getBackupContextHandle()) == GL_FALSE) { ; } } } } GLContextGuard::~GLContextGuard() { auto &sharing = *static_cast(sharingFunctions); if (currentContextHandle != sharing.getContextHandle()) { sharing.makeCurrent(currentContextHandle, currentDisplayHandle); } } } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/gl/windows/gl_sharing_windows.cpp000066400000000000000000000133401363734646600304430ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/gl/windows/gl_sharing_windows.h" #include "opencl/source/context/context.inl" #include "opencl/source/helpers/windows/gl_helper.h" #include "opencl/source/sharings/gl/gl_arb_sync_event.h" namespace NEO { GLSharingFunctionsWindows::GLSharingFunctionsWindows(GLType glhdcType, GLContext glhglrcHandle, GLContext glhglrcHandleBkpCtx, GLDisplay glhdcHandle) : GLHDCType(glhdcType), GLHGLRCHandle(glhglrcHandle), GLHGLRCHandleBkpCtx(glhglrcHandleBkpCtx), GLHDCHandle(glhdcHandle) { initGLFunctions(); updateOpenGLContext(); createBackupContext(); } GLSharingFunctionsWindows::~GLSharingFunctionsWindows() { if (pfnWglDeleteContext) { pfnWglDeleteContext(GLHGLRCHandleBkpCtx); } } bool GLSharingFunctionsWindows::isGlSharingEnabled() { static bool oglLibAvailable = std::unique_ptr(OsLibrary::load(Os::openglDllName)).get() != nullptr; return oglLibAvailable; } void GLSharingFunctionsWindows::createBackupContext() { if (pfnWglCreateContext) { GLHGLRCHandleBkpCtx = pfnWglCreateContext(GLHDCHandle); pfnWglShareLists(GLHGLRCHandle, GLHGLRCHandleBkpCtx); } } GLboolean GLSharingFunctionsWindows::setSharedOCLContextState() { ContextInfo CtxInfo = {0}; GLboolean retVal = GLSetSharedOCLContextState(GLHDCHandle, GLHGLRCHandle, CL_TRUE, &CtxInfo); if (retVal == GL_FALSE) { return GL_FALSE; } GLContextHandle = CtxInfo.ContextHandle; GLDeviceHandle = CtxInfo.DeviceHandle; return retVal; } bool GLSharingFunctionsWindows::isOpenGlExtensionSupported(const unsigned char *pExtensionString) { bool LoadedNull = (glGetStringi == nullptr) || (glGetIntegerv == nullptr); if (LoadedNull) { return false; } cl_int NumberOfExtensions = 0; glGetIntegerv(GL_NUM_EXTENSIONS, &NumberOfExtensions); for (cl_int i = 0; i < NumberOfExtensions; i++) { std::basic_string pString = glGetStringi(GL_EXTENSIONS, i); if (pString == pExtensionString) { return true; } } return false; } bool GLSharingFunctionsWindows::isOpenGlSharingSupported() { std::basic_string Vendor = glGetString(GL_VENDOR); const unsigned char intelVendor[] = "Intel"; if ((Vendor.empty()) || (Vendor != intelVendor)) { return false; } std::basic_string Version = glGetString(GL_VERSION); if (Version.empty()) { return false; } bool IsOpenGLES = false; const unsigned char versionES[] = "OpenGL ES"; if (Version.find(versionES) != std::string::npos) { IsOpenGLES = true; } if (IsOpenGLES == true) { const unsigned char versionES1[] = "OpenGL ES 1."; if (Version.find(versionES1) != std::string::npos) { const unsigned char supportGLOES[] = "GL_OES_framebuffer_object"; if (isOpenGlExtensionSupported(supportGLOES) == false) { return false; } } } else { if (Version[0] < '3') { const unsigned char supportGLEXT[] = "GL_EXT_framebuffer_object"; if (isOpenGlExtensionSupported(supportGLEXT) == false) { return false; } } } return true; } GlArbSyncEvent *GLSharingFunctionsWindows::getGlArbSyncEvent(Event &baseEvent) { std::lock_guard lock{glArbEventMutex}; auto it = glArbEventMapping.find(&baseEvent); if (it != glArbEventMapping.end()) { return it->second; } return nullptr; } void GLSharingFunctionsWindows::removeGlArbSyncEventMapping(Event &baseEvent) { std::lock_guard lock{glArbEventMutex}; auto it = glArbEventMapping.find(&baseEvent); if (it == glArbEventMapping.end()) { DEBUG_BREAK_IF(it == glArbEventMapping.end()); return; } glArbEventMapping.erase(it); } GLboolean GLSharingFunctionsWindows::initGLFunctions() { glLibrary.reset(OsLibrary::load(Os::openglDllName)); if (glLibrary->isLoaded()) { glFunctionHelper wglLibrary(glLibrary.get(), "wglGetProcAddress"); GLGetCurrentContext = (*glLibrary)["wglGetCurrentContext"]; GLGetCurrentDisplay = (*glLibrary)["wglGetCurrentDC"]; glGetString = (*glLibrary)["glGetString"]; glGetIntegerv = (*glLibrary)["glGetIntegerv"]; pfnWglCreateContext = (*glLibrary)["wglCreateContext"]; pfnWglDeleteContext = (*glLibrary)["wglDeleteContext"]; pfnWglShareLists = (*glLibrary)["wglShareLists"]; wglMakeCurrent = (*glLibrary)["wglMakeCurrent"]; GLSetSharedOCLContextState = wglLibrary["wglSetSharedOCLContextStateINTEL"]; GLAcquireSharedBuffer = wglLibrary["wglAcquireSharedBufferINTEL"]; GLReleaseSharedBuffer = wglLibrary["wglReleaseSharedBufferINTEL"]; GLAcquireSharedRenderBuffer = wglLibrary["wglAcquireSharedRenderBufferINTEL"]; GLReleaseSharedRenderBuffer = wglLibrary["wglReleaseSharedRenderBufferINTEL"]; GLAcquireSharedTexture = wglLibrary["wglAcquireSharedTextureINTEL"]; GLReleaseSharedTexture = wglLibrary["wglReleaseSharedTextureINTEL"]; GLRetainSync = wglLibrary["wglRetainSyncINTEL"]; GLReleaseSync = wglLibrary["wglReleaseSyncINTEL"]; GLGetSynciv = wglLibrary["wglGetSyncivINTEL"]; glGetStringi = wglLibrary["glGetStringi"]; } this->pfnGlArbSyncObjectCleanup = cleanupArbSyncObject; this->pfnGlArbSyncObjectSetup = setupArbSyncObject; this->pfnGlArbSyncObjectSignal = signalArbSyncObject; this->pfnGlArbSyncObjectWaitServer = serverWaitForArbSyncObject; return 1; } template GLSharingFunctionsWindows *Context::getSharing(); } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/gl/windows/gl_sharing_windows.h000066400000000000000000000217521363734646600301160ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/os_interface/windows/gl/gl_sharing_os.h" #include "opencl/source/sharings/gl/gl_sharing.h" #include namespace NEO { //OpenGL API names typedef GLboolean(OSAPI *PFNOGLSetSharedOCLContextStateINTEL)(GLDisplay hdcHandle, GLContext contextHandle, GLboolean state, GLvoid *pContextInfo); typedef GLboolean(OSAPI *PFNOGLAcquireSharedBufferINTEL)(GLDisplay hdcHandle, GLContext contextHandle, GLContext backupContextHandle, GLvoid *pBufferInfo); typedef GLboolean(OSAPI *PFNOGLAcquireSharedRenderBufferINTEL)(GLDisplay hdcHandle, GLContext contextHandle, GLContext backupContextHandle, GLvoid *pResourceInfo); typedef GLboolean(OSAPI *PFNOGLAcquireSharedTextureINTEL)(GLDisplay hdcHandle, GLContext contextHandle, GLContext backupContextHandle, GLvoid *pResourceInfo); typedef GLboolean(OSAPI *PFNOGLReleaseSharedBufferINTEL)(GLDisplay hdcHandle, GLContext contextHandle, GLContext backupContextHandle, GLvoid *pBufferInfo); typedef GLboolean(OSAPI *PFNOGLReleaseSharedRenderBufferINTEL)(GLDisplay hdcHandle, GLContext contextHandle, GLContext backupContextHandle, GLvoid *pResourceInfo); typedef GLboolean(OSAPI *PFNOGLReleaseSharedTextureINTEL)(GLDisplay hdcHandle, GLContext contextHandle, GLContext backupContextHandle, GLvoid *pResourceInfo); typedef GLContext(OSAPI *PFNOGLGetCurrentContext)(); typedef GLDisplay(OSAPI *PFNOGLGetCurrentDisplay)(); typedef GLboolean(OSAPI *PFNOGLMakeCurrent)(GLDisplay hdcHandle, void *draw, void *read, GLContext contextHandle); typedef GLboolean(OSAPI *PFNOGLRetainSyncINTEL)(GLDisplay hdcHandle, GLContext contextHandle, GLContext backupContextHandle, GLvoid *pSyncInfo); typedef GLboolean(OSAPI *PFNOGLReleaseSyncINTEL)(GLDisplay hdcHandle, GLContext contextHandle, GLContext backupContextHandle, GLvoid *pSync); typedef void(OSAPI *PFNOGLGetSyncivINTEL)(GLvoid *pSync, GLenum pname, GLint *value); typedef const GLubyte *(OSAPI *PFNglGetString)(GLenum name); typedef const GLubyte *(OSAPI *PFNglGetStringi)(GLenum name, GLuint index); typedef void(OSAPI *PFNglGetIntegerv)(GLenum pname, GLint *params); typedef void(OSAPI *PFNglBindTexture)(GLenum target, GLuint texture); //wgl typedef BOOL(OSAPI *PFNwglMakeCurrent)(HDC, HGLRC); typedef GLContext(OSAPI *PFNwglCreateContext)(GLDisplay hdcHandle); typedef int(OSAPI *PFNwglShareLists)(GLContext contextHandle, GLContext backupContextHandle); typedef BOOL(OSAPI *PFNwglDeleteContext)(HGLRC hglrcHandle); typedef bool (*PFNglArbSyncObjectSetup)(GLSharingFunctions &sharing, OSInterface &osInterface, CL_GL_SYNC_INFO &glSyncInfo); typedef void (*PFNglArbSyncObjectCleanup)(OSInterface &osInterface, CL_GL_SYNC_INFO *glSyncInfo); typedef void (*PFNglArbSyncObjectSignal)(OsContext &osContext, CL_GL_SYNC_INFO &glSyncInfo); typedef void (*PFNglArbSyncObjectWaitServer)(OSInterface &osInterface, CL_GL_SYNC_INFO &glSyncInfo); class GLSharingFunctionsWindows : public GLSharingFunctions { public: GLSharingFunctionsWindows() = default; GLSharingFunctionsWindows(GLType glhdcType, GLContext glhglrcHandle, GLContext glhglrcHandleBkpCtx, GLDisplay glhdcHandle); ~GLSharingFunctionsWindows() override; OS_HANDLE getGLDeviceHandle() const { return GLDeviceHandle; } OS_HANDLE getGLContextHandle() const { return GLContextHandle; } GLboolean initGLFunctions() override; bool isOpenGlSharingSupported() override; static bool isGlSharingEnabled(); // Arb sync event template auto getOrCreateGlArbSyncEvent(Event &baseEvent) -> decltype(EventType::create(baseEvent)); GlArbSyncEvent *getGlArbSyncEvent(Event &baseEvent); void removeGlArbSyncEventMapping(Event &baseEvent); // Gl functions GLboolean acquireSharedBufferINTEL(GLvoid *pBufferInfo) { return GLAcquireSharedBuffer(GLHDCHandle, GLHGLRCHandle, GLHGLRCHandleBkpCtx, pBufferInfo); } GLboolean releaseSharedBufferINTEL(GLvoid *pBufferInfo) { return GLReleaseSharedBuffer(GLHDCHandle, GLHGLRCHandle, GLHGLRCHandleBkpCtx, pBufferInfo); } GLboolean acquireSharedRenderBuffer(GLvoid *pResourceInfo) { return GLAcquireSharedRenderBuffer(GLHDCHandle, GLHGLRCHandle, GLHGLRCHandleBkpCtx, pResourceInfo); } GLboolean releaseSharedRenderBuffer(GLvoid *pResourceInfo) { return GLReleaseSharedRenderBuffer(GLHDCHandle, GLHGLRCHandle, GLHGLRCHandleBkpCtx, pResourceInfo); } GLboolean acquireSharedTexture(GLvoid *pResourceInfo) { return GLAcquireSharedTexture(GLHDCHandle, GLHGLRCHandle, GLHGLRCHandleBkpCtx, pResourceInfo); } GLboolean releaseSharedTexture(GLvoid *pResourceInfo) { return GLReleaseSharedTexture(GLHDCHandle, GLHGLRCHandle, GLHGLRCHandleBkpCtx, pResourceInfo); } GLboolean retainSync(GLvoid *pSyncInfo) { return GLRetainSync(GLHDCHandle, GLHGLRCHandle, GLHGLRCHandleBkpCtx, pSyncInfo); } GLboolean releaseSync(GLvoid *pSync) { return GLReleaseSync(GLHDCHandle, GLHGLRCHandle, GLHGLRCHandleBkpCtx, pSync); } void getSynciv(GLvoid *pSync, GLenum pname, GLint *value) { return GLGetSynciv(pSync, pname, value); } GLContext getCurrentContext() { return GLGetCurrentContext(); } GLDisplay getCurrentDisplay() { return GLGetCurrentDisplay(); } GLboolean makeCurrent(GLContext contextHandle, GLDisplay displayHandle = 0) { if (displayHandle == 0) { displayHandle = GLHDCHandle; } return this->wglMakeCurrent(displayHandle, contextHandle); } GLContext getBackupContextHandle() { return GLHGLRCHandleBkpCtx; } GLContext getContextHandle() { return GLHGLRCHandle; } bool glArbSyncObjectSetup(OSInterface &osInterface, CL_GL_SYNC_INFO &glSyncInfo) { return pfnGlArbSyncObjectSetup(*this, osInterface, glSyncInfo); } void glArbSyncObjectCleanup(OSInterface &osInterface, CL_GL_SYNC_INFO *glSyncInfo) { pfnGlArbSyncObjectCleanup(osInterface, glSyncInfo); } void glArbSyncObjectSignal(OsContext &osContext, CL_GL_SYNC_INFO &glSyncInfo) { pfnGlArbSyncObjectSignal(osContext, glSyncInfo); } void glArbSyncObjectWaitServer(OSInterface &osInterface, CL_GL_SYNC_INFO &glSyncInfo) { pfnGlArbSyncObjectWaitServer(osInterface, glSyncInfo); } // Buffer reuse std::mutex mutex; std::vector> graphicsAllocationsForGlBufferReuse; protected: void updateOpenGLContext() { if (GLSetSharedOCLContextState) { setSharedOCLContextState(); } } GLboolean setSharedOCLContextState(); void createBackupContext(); bool isOpenGlExtensionSupported(const unsigned char *pExtentionString); // Handles GLType GLHDCType = 0; GLContext GLHGLRCHandle = 0; GLContext GLHGLRCHandleBkpCtx = 0; GLDisplay GLHDCHandle = 0; OS_HANDLE GLDeviceHandle = 0; OS_HANDLE GLContextHandle = 0; // GL functions std::unique_ptr glLibrary; PFNOGLSetSharedOCLContextStateINTEL GLSetSharedOCLContextState = nullptr; PFNOGLAcquireSharedBufferINTEL GLAcquireSharedBuffer = nullptr; PFNOGLReleaseSharedBufferINTEL GLReleaseSharedBuffer = nullptr; PFNOGLAcquireSharedRenderBufferINTEL GLAcquireSharedRenderBuffer = nullptr; PFNOGLReleaseSharedRenderBufferINTEL GLReleaseSharedRenderBuffer = nullptr; PFNOGLAcquireSharedTextureINTEL GLAcquireSharedTexture = nullptr; PFNOGLReleaseSharedTextureINTEL GLReleaseSharedTexture = nullptr; PFNOGLGetCurrentContext GLGetCurrentContext = nullptr; PFNOGLGetCurrentDisplay GLGetCurrentDisplay = nullptr; PFNglGetString glGetString = nullptr; PFNglGetStringi glGetStringi = nullptr; PFNglGetIntegerv glGetIntegerv = nullptr; PFNwglCreateContext pfnWglCreateContext = nullptr; PFNwglMakeCurrent wglMakeCurrent = nullptr; PFNwglShareLists pfnWglShareLists = nullptr; PFNwglDeleteContext pfnWglDeleteContext = nullptr; PFNOGLRetainSyncINTEL GLRetainSync = nullptr; PFNOGLReleaseSyncINTEL GLReleaseSync = nullptr; PFNOGLGetSyncivINTEL GLGetSynciv = nullptr; PFNglArbSyncObjectSetup pfnGlArbSyncObjectSetup = nullptr; PFNglArbSyncObjectCleanup pfnGlArbSyncObjectCleanup = nullptr; PFNglArbSyncObjectSignal pfnGlArbSyncObjectSignal = nullptr; PFNglArbSyncObjectWaitServer pfnGlArbSyncObjectWaitServer = nullptr; // support for GL_ARB_cl_event std::mutex glArbEventMutex; std::unordered_map glArbEventMapping; }; template inline auto GLSharingFunctionsWindows::getOrCreateGlArbSyncEvent(Event &baseEvent) -> decltype(EventType::create(baseEvent)) { std::lock_guard lock{glArbEventMutex}; auto it = glArbEventMapping.find(&baseEvent); if (it != glArbEventMapping.end()) { return it->second; } auto arbEvent = EventType::create(baseEvent); if (nullptr == arbEvent) { return arbEvent; } glArbEventMapping[&baseEvent] = arbEvent; return arbEvent; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/gl/windows/gl_sync_event_windows.cpp000066400000000000000000000042031363734646600311630ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/timestamp_packet.h" #include "opencl/extensions/public/cl_gl_private_intel.h" #include "opencl/source/context/context.h" #include "opencl/source/event/async_events_handler.h" #include "opencl/source/event/event_builder.h" #include "opencl/source/platform/platform.h" #include "opencl/source/sharings/gl/gl_context_guard.h" #include "opencl/source/sharings/gl/gl_sync_event.h" #include "opencl/source/sharings/gl/windows/gl_sharing_windows.h" namespace NEO { GlSyncEvent::GlSyncEvent(Context &context, const GL_CL_SYNC_INFO &sync) : Event(&context, nullptr, CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR, CompletionStamp::levelNotReady, CompletionStamp::levelNotReady), glSync(std::make_unique(sync)) { transitionExecutionStatus(CL_SUBMITTED); } GlSyncEvent::~GlSyncEvent() { ctx->getSharing()->releaseSync(glSync->pSync); } GlSyncEvent *GlSyncEvent::create(Context &context, cl_GLsync sync, cl_int *errCode) { GLContextGuard guard(*context.getSharing()); ErrorCodeHelper err(errCode, CL_SUCCESS); GL_CL_SYNC_INFO syncInfo = {sync, nullptr}; context.getSharing()->retainSync(&syncInfo); DEBUG_BREAK_IF(!syncInfo.pSync); EventBuilder eventBuilder; eventBuilder.create(context, syncInfo); return static_cast(eventBuilder.finalizeAndRelease()); } void GlSyncEvent::updateExecutionStatus() { GLContextGuard guard(*ctx->getSharing()); int retVal = 0; ctx->getSharing()->getSynciv(glSync->pSync, GL_SYNC_STATUS, &retVal); if (retVal == GL_SIGNALED) { setStatus(CL_COMPLETE); } } uint32_t GlSyncEvent::getTaskLevel() { if (peekExecutionStatus() == CL_COMPLETE) { return 0; } return CompletionStamp::levelNotReady; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/gl/windows/gl_texture_windows.cpp000066400000000000000000000255101363734646600305120ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/extensions/public/cl_gl_private_intel.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/gmm_types_converter.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/sharings/gl/gl_texture.h" #include "opencl/source/sharings/gl/windows/gl_sharing_windows.h" #include "CL/cl_gl.h" #include "config.h" #include namespace NEO { Image *GlTexture::createSharedGlTexture(Context *context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int *errcodeRet) { ErrorCodeHelper errorCode(errcodeRet, CL_INVALID_GL_OBJECT); auto clientContext = context->getDevice(0)->getRootDeviceEnvironment().getGmmClientContext(); auto memoryManager = context->getMemoryManager(); cl_image_desc imgDesc = {}; cl_image_format imgFormat = {}; McsSurfaceInfo mcsSurfaceInfo = {}; CL_GL_RESOURCE_INFO texInfo = {}; texInfo.name = texture; texInfo.target = getBaseTargetType(target); GLSharingFunctionsWindows *sharingFunctions = context->getSharing(); if (target == GL_RENDERBUFFER_EXT) { sharingFunctions->acquireSharedRenderBuffer(&texInfo); } else { sharingFunctions->acquireSharedTexture(&texInfo); } errorCode.set(CL_SUCCESS); AllocationProperties allocProperties(context->getDevice(0)->getRootDeviceIndex(), false, 0u, GraphicsAllocation::AllocationType::SHARED_IMAGE, false); auto alloc = memoryManager->createGraphicsAllocationFromSharedHandle(texInfo.globalShareHandle, allocProperties, false); if (alloc == nullptr) { errorCode.set(CL_INVALID_GL_OBJECT); return nullptr; } if (texInfo.pGmmResInfo) { DEBUG_BREAK_IF(alloc->getDefaultGmm() != nullptr); alloc->setDefaultGmm(new Gmm(clientContext, texInfo.pGmmResInfo)); } auto gmm = alloc->getDefaultGmm(); imgDesc.image_type = getClMemObjectType(target); if (target == GL_TEXTURE_BUFFER) { imgDesc.image_width = texInfo.textureBufferWidth; imgDesc.image_row_pitch = texInfo.textureBufferSize; } else { imgDesc.image_width = gmm->gmmResourceInfo->getBaseWidth(); imgDesc.image_row_pitch = gmm->gmmResourceInfo->getRenderPitch(); if (imgDesc.image_row_pitch == 0) { size_t alignedWidth = alignUp(imgDesc.image_width, gmm->gmmResourceInfo->getHAlign()); size_t bpp = gmm->gmmResourceInfo->getBitsPerPixel() >> 3; imgDesc.image_row_pitch = alignedWidth * bpp; } } uint32_t numSamples = static_cast(gmm->gmmResourceInfo->getNumSamples()); imgDesc.num_samples = getValidParam(numSamples, 0u, 1u); imgDesc.image_height = gmm->gmmResourceInfo->getBaseHeight(); imgDesc.image_array_size = gmm->gmmResourceInfo->getArraySize(); if (target == GL_TEXTURE_3D) { imgDesc.image_depth = gmm->gmmResourceInfo->getBaseDepth(); } if (imgDesc.image_array_size > 1 || imgDesc.image_depth > 1) { GMM_REQ_OFFSET_INFO GMMReqInfo = {}; GMMReqInfo.ArrayIndex = imgDesc.image_array_size > 1 ? 1 : 0; GMMReqInfo.Slice = imgDesc.image_depth > 1 ? 1 : 0; GMMReqInfo.ReqLock = 1; gmm->gmmResourceInfo->getOffset(GMMReqInfo); imgDesc.image_slice_pitch = GMMReqInfo.Lock.Offset; } else { imgDesc.image_slice_pitch = alloc->getUnderlyingBufferSize(); } uint32_t cubeFaceIndex = GmmTypesConverter::getCubeFaceIndex(target); auto qPitch = gmm->queryQPitch(gmm->gmmResourceInfo->getResourceType()); if (setClImageFormat(texInfo.glInternalFormat, imgFormat) == false) { memoryManager->freeGraphicsMemory(alloc); errorCode.set(CL_INVALID_GL_OBJECT); return nullptr; } auto surfaceFormatInfoAddress = Image::getSurfaceFormatFromTable(flags, &imgFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); if (!surfaceFormatInfoAddress) { memoryManager->freeGraphicsMemory(alloc); errorCode.set(CL_INVALID_GL_OBJECT); return nullptr; } auto surfaceFormatInfo = *surfaceFormatInfoAddress; if (texInfo.glInternalFormat != GL_RGB10) { surfaceFormatInfo.surfaceFormat.GenxSurfaceFormat = (GFX3DSTATE_SURFACEFORMAT)texInfo.glHWFormat; } GraphicsAllocation *mcsAlloc = nullptr; if (texInfo.globalShareHandleMCS) { AllocationProperties allocProperties(context->getDevice(0)->getRootDeviceIndex(), 0, GraphicsAllocation::AllocationType::MCS); mcsAlloc = memoryManager->createGraphicsAllocationFromSharedHandle(texInfo.globalShareHandleMCS, allocProperties, false); if (texInfo.pGmmResInfoMCS) { DEBUG_BREAK_IF(mcsAlloc->getDefaultGmm() != nullptr); mcsAlloc->setDefaultGmm(new Gmm(clientContext, texInfo.pGmmResInfoMCS)); } mcsSurfaceInfo.pitch = getValidParam(static_cast(mcsAlloc->getDefaultGmm()->gmmResourceInfo->getRenderPitch() / 128)); mcsSurfaceInfo.qPitch = mcsAlloc->getDefaultGmm()->gmmResourceInfo->getQPitch(); } mcsSurfaceInfo.multisampleCount = GmmTypesConverter::getRenderMultisamplesCount(static_cast(imgDesc.num_samples)); if (miplevel < 0) { imgDesc.num_mip_levels = gmm->gmmResourceInfo->getMaxLod() + 1; } ImageInfo imgInfo = {}; imgInfo.imgDesc = Image::convertDescriptor(imgDesc); imgInfo.surfaceFormat = &surfaceFormatInfo.surfaceFormat; imgInfo.qPitch = qPitch; auto glTexture = new GlTexture(sharingFunctions, getClGlObjectType(target), texture, texInfo, target, std::max(miplevel, 0)); auto hwInfo = context->getDevice(0)->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); if (alloc->getDefaultGmm()->unifiedAuxTranslationCapable()) { alloc->getDefaultGmm()->isRenderCompressed = hwHelper.isPageTableManagerSupported(hwInfo) ? memoryManager->mapAuxGpuVA(alloc) : true; } return Image::createSharedImage(context, glTexture, mcsSurfaceInfo, alloc, mcsAlloc, flags, &surfaceFormatInfo, imgInfo, cubeFaceIndex, std::max(miplevel, 0), imgInfo.imgDesc.numMipLevels); } // namespace NEO void GlTexture::synchronizeObject(UpdateData &updateData) { auto sharingFunctions = static_cast(this->sharingFunctions); CL_GL_RESOURCE_INFO resourceInfo = {0}; resourceInfo.name = this->clGlObjectId; if (target == GL_RENDERBUFFER_EXT) { sharingFunctions->acquireSharedRenderBuffer(&resourceInfo); } else { sharingFunctions->acquireSharedTexture(&resourceInfo); // Set texture buffer offset acquired from OpenGL layer in graphics allocation updateData.memObject->getGraphicsAllocation()->setAllocationOffset(resourceInfo.textureBufferOffset); } updateData.sharedHandle = resourceInfo.globalShareHandle; updateData.synchronizationStatus = SynchronizeStatus::ACQUIRE_SUCCESFUL; } cl_int GlTexture::getGlTextureInfo(cl_gl_texture_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { GetInfoHelper info(paramValue, paramValueSize, paramValueSizeRet); if (paramName == CL_GL_TEXTURE_TARGET) { info.set(target); } else if (paramName == CL_GL_MIPMAP_LEVEL) { info.set(miplevel); } else if (paramName == CL_GL_NUM_SAMPLES) { info.set(textureInfo.numberOfSamples > 1 ? textureInfo.numberOfSamples : 0); } else { return CL_INVALID_VALUE; } return CL_SUCCESS; } cl_mem_object_type GlTexture::getClMemObjectType(cl_GLenum glType) { return static_cast(getClObjectType(glType, false)); } cl_gl_object_type GlTexture::getClGlObjectType(cl_GLenum glType) { return static_cast(getClObjectType(glType, true)); } uint32_t GlTexture::getClObjectType(cl_GLenum glType, bool returnClGlObjectType) { // return cl_gl_object_type if returnClGlObjectType is ture, otherwise cl_mem_object_type uint32_t retValue = 0; switch (glType) { case GL_TEXTURE_1D: retValue = returnClGlObjectType ? CL_GL_OBJECT_TEXTURE1D : CL_MEM_OBJECT_IMAGE1D; break; case GL_TEXTURE_1D_ARRAY: retValue = returnClGlObjectType ? CL_GL_OBJECT_TEXTURE1D_ARRAY : CL_MEM_OBJECT_IMAGE1D_ARRAY; break; case GL_TEXTURE_2D: case GL_TEXTURE_RECTANGLE: case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: case GL_TEXTURE_CUBE_MAP_POSITIVE_X: case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: case GL_TEXTURE_2D_MULTISAMPLE: retValue = returnClGlObjectType ? CL_GL_OBJECT_TEXTURE2D : CL_MEM_OBJECT_IMAGE2D; break; case GL_TEXTURE_2D_ARRAY: case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: retValue = returnClGlObjectType ? CL_GL_OBJECT_TEXTURE2D_ARRAY : CL_MEM_OBJECT_IMAGE2D_ARRAY; break; case GL_TEXTURE_3D: retValue = returnClGlObjectType ? CL_GL_OBJECT_TEXTURE3D : CL_MEM_OBJECT_IMAGE3D; break; case GL_TEXTURE_BUFFER: retValue = returnClGlObjectType ? CL_GL_OBJECT_TEXTURE_BUFFER : CL_MEM_OBJECT_IMAGE1D_BUFFER; break; case GL_RENDERBUFFER_EXT: retValue = returnClGlObjectType ? CL_GL_OBJECT_RENDERBUFFER : CL_MEM_OBJECT_IMAGE2D; break; default: retValue = 0; break; } return retValue; } cl_GLenum GlTexture::getBaseTargetType(cl_GLenum target) { cl_GLenum returnTarget = 0; switch (target) { case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: case GL_TEXTURE_CUBE_MAP_POSITIVE_X: case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: returnTarget = GL_TEXTURE_CUBE_MAP_ARB; break; default: returnTarget = target; break; } return returnTarget; } void GlTexture::releaseResource(MemObj *memObject) { auto sharingFunctions = static_cast(this->sharingFunctions); if (target == GL_RENDERBUFFER_EXT) { sharingFunctions->releaseSharedRenderBuffer(&textureInfo); } else { sharingFunctions->releaseSharedTexture(&textureInfo); } } } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/gl/windows/win_enable_gl.cpp000066400000000000000000000101261363734646600273400ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/gl/windows/win_enable_gl.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "opencl/source/context/context.h" #include "opencl/source/context/context.inl" #include "opencl/source/sharings/gl/cl_gl_api_intel.h" #include "opencl/source/sharings/gl/windows/gl_sharing_windows.h" #include "opencl/source/sharings/sharing_factory.h" #include "opencl/source/sharings/sharing_factory.inl" #include namespace NEO { bool GlSharingContextBuilder::processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue, cl_int &errcodeRet) { if (contextData.get() == nullptr) { contextData = std::make_unique(); } bool res = false; switch (propertyType) { case CL_GL_CONTEXT_KHR: contextData->GLHGLRCHandle = (GLContext)propertyValue; res = true; break; case CL_WGL_HDC_KHR: contextData->GLHDCType = (GLType)CL_WGL_HDC_KHR; contextData->GLHDCHandle = (GLDisplay)propertyValue; res = true; break; case CL_GLX_DISPLAY_KHR: contextData->GLHDCType = (GLType)CL_GLX_DISPLAY_KHR; contextData->GLHDCHandle = (GLDisplay)propertyValue; res = true; break; case CL_EGL_DISPLAY_KHR: contextData->GLHDCType = (GLType)CL_EGL_DISPLAY_KHR; contextData->GLHDCHandle = (GLDisplay)propertyValue; res = true; break; case CL_CGL_SHAREGROUP_KHR: errcodeRet = CL_INVALID_PROPERTY; res = true; break; } return res; } bool GlSharingContextBuilder::finalizeProperties(Context &context, int32_t &errcodeRet) { if (contextData.get() == nullptr) return true; if (contextData->GLHGLRCHandle) { context.registerSharing(new GLSharingFunctionsWindows(contextData->GLHDCType, contextData->GLHGLRCHandle, nullptr, contextData->GLHDCHandle)); } contextData.reset(nullptr); return true; } std::unique_ptr GlSharingBuilderFactory::createContextBuilder() { return std::make_unique(); }; void GlSharingBuilderFactory::fillGlobalDispatchTable() { icdGlobalDispatchTable.clCreateFromGLBuffer = clCreateFromGLBuffer; icdGlobalDispatchTable.clCreateFromGLTexture = clCreateFromGLTexture; icdGlobalDispatchTable.clCreateFromGLTexture2D = clCreateFromGLTexture2D; icdGlobalDispatchTable.clCreateFromGLTexture3D = clCreateFromGLTexture3D; icdGlobalDispatchTable.clCreateFromGLRenderbuffer = clCreateFromGLRenderbuffer; icdGlobalDispatchTable.clGetGLObjectInfo = clGetGLObjectInfo; icdGlobalDispatchTable.clGetGLTextureInfo = clGetGLTextureInfo; icdGlobalDispatchTable.clEnqueueAcquireGLObjects = clEnqueueAcquireGLObjects; icdGlobalDispatchTable.clEnqueueReleaseGLObjects = clEnqueueReleaseGLObjects; icdGlobalDispatchTable.clCreateEventFromGLsyncKHR = clCreateEventFromGLsyncKHR; icdGlobalDispatchTable.clGetGLContextInfoKHR = clGetGLContextInfoKHR; } std::string GlSharingBuilderFactory::getExtensions() { if (DebugManager.flags.AddClGlSharing.get()) { return "cl_khr_gl_sharing " "cl_khr_gl_depth_images " "cl_khr_gl_event " "cl_khr_gl_msaa_sharing "; } else if (GLSharingFunctionsWindows::isGlSharingEnabled()) { return "cl_khr_gl_sharing " "cl_khr_gl_depth_images " "cl_khr_gl_event " "cl_khr_gl_msaa_sharing "; } return ""; } void *GlSharingBuilderFactory::getExtensionFunctionAddress(const std::string &functionName) { if (DebugManager.flags.EnableFormatQuery.get() && functionName == "clGetSupportedGLTextureFormatsINTEL") { return ((void *)(clGetSupportedGLTextureFormatsINTEL)); } return nullptr; } static SharingFactory::RegisterSharing glSharing; } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/gl/windows/win_enable_gl.h000066400000000000000000000021161363734646600270050ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/gl/windows/gl_sharing_windows.h" #include "opencl/source/sharings/sharing_factory.h" #include namespace NEO { class Context; struct GlCreateContextProperties { GLType GLHDCType = 0; GLContext GLHGLRCHandle = 0; GLDisplay GLHDCHandle = 0; }; class GlSharingContextBuilder : public SharingContextBuilder { protected: std::unique_ptr contextData; public: bool processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue, cl_int &errcodeRet) override; bool finalizeProperties(Context &context, int32_t &errcodeRet) override; }; class GlSharingBuilderFactory : public SharingBuilderFactory { public: std::unique_ptr createContextBuilder() override; std::string getExtensions() override; void fillGlobalDispatchTable() override; void *getExtensionFunctionAddress(const std::string &functionName) override; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/sharing.cpp000066400000000000000000000031171363734646600241140ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "sharing.h" #include "opencl/source/mem_obj/mem_obj.h" #include "CL/cl.h" #include namespace NEO { int SharingHandler::acquire(MemObj *memObj) { if (acquireCount == 0) { UpdateData updateData; auto currentSharedHandle = memObj->getGraphicsAllocation()->peekSharedHandle(); updateData.sharedHandle = currentSharedHandle; updateData.memObject = memObj; int result = synchronizeHandler(updateData); resolveGraphicsAllocationChange(currentSharedHandle, &updateData); if (result != CL_SUCCESS) { return result; } if (updateData.synchronizationStatus != SynchronizeStatus::ACQUIRE_SUCCESFUL) { return CL_OUT_OF_RESOURCES; } DEBUG_BREAK_IF(memObj->getGraphicsAllocation()->peekSharedHandle() != updateData.sharedHandle); } acquireCount++; return CL_SUCCESS; } int SharingHandler::synchronizeHandler(UpdateData &updateData) { auto result = validateUpdateData(updateData); if (result == CL_SUCCESS) { synchronizeObject(updateData); } return result; } int SharingHandler::validateUpdateData(UpdateData &updateData) { return CL_SUCCESS; } void SharingHandler::resolveGraphicsAllocationChange(osHandle currentSharedHandle, UpdateData *updateData) { } void SharingHandler::release(MemObj *memObject) { DEBUG_BREAK_IF(acquireCount <= 0); acquireCount--; if (acquireCount == 0) { releaseResource(memObject); } } } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/sharing.h000066400000000000000000000025501363734646600235610ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/graphics_allocation.h" namespace NEO { class Context; class MemObj; enum SynchronizeStatus { SHARED_OBJECT_NOT_CHANGED, SHARED_OBJECT_REQUIRES_UPDATE, ACQUIRE_SUCCESFUL, SYNCHRONIZE_ERROR }; struct UpdateData { SynchronizeStatus synchronizationStatus; osHandle sharedHandle; MemObj *memObject = nullptr; void *updateData = nullptr; }; class SharingFunctions { public: virtual uint32_t getId() const = 0; virtual ~SharingFunctions() = default; }; class SharingHandler { public: int acquire(MemObj *memObj); void release(MemObj *memObject); virtual ~SharingHandler() = default; virtual void getMemObjectInfo(size_t ¶mValueSize, void *¶mValue){}; virtual void releaseReusedGraphicsAllocation(){}; protected: virtual int synchronizeHandler(UpdateData &updateData); virtual int validateUpdateData(UpdateData &updateData); virtual void synchronizeObject(UpdateData &updateData) { updateData.synchronizationStatus = SYNCHRONIZE_ERROR; } virtual void resolveGraphicsAllocationChange(osHandle currentSharedHandle, UpdateData *updateData); virtual void releaseResource(MemObj *memObject){}; unsigned int acquireCount = 0u; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/sharing_factory.cpp000066400000000000000000000047701363734646600256510ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "sharing_factory.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "opencl/source/platform/extensions.h" namespace NEO { std::unique_ptr SharingFactory::build() { auto res = std::make_unique(); for (auto &builder : sharingContextBuilder) { if (builder == nullptr) continue; res->sharings.push_back(builder->createContextBuilder()); } return res; } std::string SharingFactory::getExtensions() { std::string res; bool sharingAvailable = false; for (auto &builder : sharingContextBuilder) { if (builder == nullptr) continue; res += builder->getExtensions(); sharingAvailable = true; } if (DebugManager.flags.EnableFormatQuery.get() && sharingAvailable) { res += Extensions::sharingFormatQuery; } return res; } void SharingFactory::fillGlobalDispatchTable() { for (auto &builder : sharingContextBuilder) { if (builder == nullptr) continue; builder->fillGlobalDispatchTable(); } } void *SharingFactory::getExtensionFunctionAddress(const std::string &functionName) { for (auto &builder : sharingContextBuilder) { if (builder == nullptr) continue; auto ret = builder->getExtensionFunctionAddress(functionName); if (ret != nullptr) return ret; } return nullptr; } bool SharingFactory::processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue, cl_int &errcodeRet) { for (auto &sharing : sharings) { if (sharing->processProperties(propertyType, propertyValue, errcodeRet)) return true; } return false; } bool SharingFactory::finalizeProperties(Context &context, int32_t &errcodeRet) { for (auto &sharing : sharings) { if (!sharing->finalizeProperties(context, errcodeRet)) return false; } return true; } SharingBuilderFactory *SharingFactory::sharingContextBuilder[SharingType::MAX_SHARING_VALUE] = { nullptr, }; void SharingFactory::verifyExtensionSupport(DriverInfo *driverInfo) { for (auto &builder : sharingContextBuilder) { if (builder == nullptr) continue; builder->setExtensionEnabled(driverInfo); } }; void SharingBuilderFactory::setExtensionEnabled(DriverInfo *driverInfo){}; SharingFactory sharingFactory; } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/sharing_factory.h000066400000000000000000000036331363734646600253130ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl.h" #include #include #include namespace NEO { class Context; class DriverInfo; enum SharingType { CLGL_SHARING = 0, VA_SHARING = 1, D3D9_SHARING = 2, D3D10_SHARING = 3, D3D11_SHARING = 4, UNIFIED_SHARING = 5, MAX_SHARING_VALUE = 6 }; class SharingContextBuilder { public: virtual ~SharingContextBuilder() = default; virtual bool processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue, cl_int &errcodeRet) = 0; virtual bool finalizeProperties(Context &context, int32_t &errcodeRet) = 0; }; class SharingBuilderFactory { public: virtual ~SharingBuilderFactory() = default; virtual std::unique_ptr createContextBuilder() = 0; virtual std::string getExtensions() = 0; virtual void fillGlobalDispatchTable() {} virtual void *getExtensionFunctionAddress(const std::string &functionName) = 0; virtual void setExtensionEnabled(DriverInfo *driverInfo); }; class SharingFactory { protected: static SharingBuilderFactory *sharingContextBuilder[SharingType::MAX_SHARING_VALUE]; std::vector> sharings; public: template class RegisterSharing { public: RegisterSharing(); }; static std::unique_ptr build(); bool processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue, cl_int &errcodeRet); bool finalizeProperties(Context &context, int32_t &errcodeRet); std::string getExtensions(); void fillGlobalDispatchTable(); void *getExtensionFunctionAddress(const std::string &functionName); void verifyExtensionSupport(DriverInfo *driverInfo); }; extern SharingFactory sharingFactory; } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/sharing_factory.inl000066400000000000000000000005221363734646600256400ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/sharing_factory.h" namespace NEO { template SharingFactory::RegisterSharing::RegisterSharing() { sharingContextBuilder[T::sharingId] = new F; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/unified/000077500000000000000000000000001363734646600233765ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/sharings/unified/CMakeLists.txt000066400000000000000000000015501363734646600261370ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_SHARINGS_UNIFIED ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/enable_unified.h ${CMAKE_CURRENT_SOURCE_DIR}/unified_buffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unified_buffer.h ${CMAKE_CURRENT_SOURCE_DIR}/unified_image.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unified_image.h ${CMAKE_CURRENT_SOURCE_DIR}/unified_sharing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unified_sharing.h ${CMAKE_CURRENT_SOURCE_DIR}/unified_sharing_types.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_SHARINGS_UNIFIED}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_SHARINGS_UNIFIED ${RUNTIME_SRCS_SHARINGS_UNIFIED}) add_subdirectories() target_sources(${SHARINGS_ENABLE_LIB_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/unified_enable_name.cpp) compute-runtime-20.13.16352/opencl/source/sharings/unified/enable_unified.cpp000066400000000000000000000035011363734646600270320ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/unified/enable_unified.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "opencl/source/context/context.h" #include "opencl/source/context/context.inl" #include "opencl/source/sharings/sharing_factory.h" #include "opencl/source/sharings/sharing_factory.inl" #include "opencl/source/sharings/unified/unified_sharing.h" #include "opencl/source/sharings/unified/unified_sharing_types.h" #include namespace NEO { bool UnifiedSharingContextBuilder::processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue, cl_int &errcodeRet) { switch (propertyType) { case static_cast(UnifiedSharingContextType::DeviceHandle): case static_cast(UnifiedSharingContextType::DeviceGroup): this->contextData = std::make_unique(); return true; default: return false; } } bool UnifiedSharingContextBuilder::finalizeProperties(Context &context, int32_t &errcodeRet) { if (contextData.get() != nullptr) { if (context.getInteropUserSyncEnabled()) { context.registerSharing(new UnifiedSharingFunctions()); } contextData.reset(nullptr); } return true; } std::unique_ptr UnifiedSharingBuilderFactory::createContextBuilder() { return std::make_unique(); }; void *UnifiedSharingBuilderFactory::getExtensionFunctionAddress(const std::string &functionName) { return nullptr; } static SharingFactory::RegisterSharing unifiedSharing; } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/unified/enable_unified.h000066400000000000000000000016311363734646600265010ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/sharing_factory.h" #include namespace NEO { class Context; struct UnifiedCreateContextProperties { }; class UnifiedSharingContextBuilder : public SharingContextBuilder { protected: std::unique_ptr contextData; public: bool processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue, cl_int &errcodeRet) override; bool finalizeProperties(Context &context, int32_t &errcodeRet) override; }; class UnifiedSharingBuilderFactory : public SharingBuilderFactory { public: std::unique_ptr createContextBuilder() override; std::string getExtensions() override; void *getExtensionFunctionAddress(const std::string &functionName) override; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/unified/unified_buffer.cpp000066400000000000000000000021261363734646600270570ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "unified_buffer.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/helpers/get_info.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/source/context/context.h" #include "opencl/source/mem_obj/buffer.h" #include "config.h" using namespace NEO; Buffer *UnifiedBuffer::createSharedUnifiedBuffer(Context *context, cl_mem_flags flags, UnifiedSharingMemoryDescription extMem, cl_int *errcodeRet) { ErrorCodeHelper errorCode(errcodeRet, CL_SUCCESS); auto graphicsAllocation = UnifiedBuffer::createGraphicsAllocation(context, extMem, GraphicsAllocation::AllocationType::SHARED_BUFFER); if (!graphicsAllocation) { errorCode.set(CL_INVALID_MEM_OBJECT); return nullptr; } UnifiedSharingFunctions *sharingFunctions = context->getSharing(); auto sharingHandler = new UnifiedBuffer(sharingFunctions, extMem.type); return Buffer::createSharedBuffer(context, flags, sharingHandler, graphicsAllocation); } compute-runtime-20.13.16352/opencl/source/sharings/unified/unified_buffer.h000066400000000000000000000007421363734646600265260ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/unified/unified_sharing.h" namespace NEO { class Buffer; class Context; class UnifiedBuffer : public UnifiedSharing { using UnifiedSharing::UnifiedSharing; public: static Buffer *createSharedUnifiedBuffer(Context *context, cl_mem_flags flags, UnifiedSharingMemoryDescription description, cl_int *errcodeRet); }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/unified/unified_enable_name.cpp000066400000000000000000000004071363734646600300340ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/unified/enable_unified.h" namespace NEO { std::string UnifiedSharingBuilderFactory::getExtensions() { return ""; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/unified/unified_image.cpp000066400000000000000000000050511363734646600266700ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "unified_image.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/get_info.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/mem_obj/image.h" namespace NEO { Image *UnifiedImage::createSharedUnifiedImage(Context *context, cl_mem_flags flags, UnifiedSharingMemoryDescription description, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, cl_int *errcodeRet) { ErrorCodeHelper errorCode(errcodeRet, CL_SUCCESS); UnifiedSharingFunctions *sharingFunctions = context->getSharing(); auto *clSurfaceFormat = Image::getSurfaceFormatFromTable(flags, imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); ImageInfo imgInfo = {}; imgInfo.imgDesc = Image::convertDescriptor(*imageDesc); imgInfo.surfaceFormat = &clSurfaceFormat->surfaceFormat; GraphicsAllocation *graphicsAllocation = createGraphicsAllocation(context, description, GraphicsAllocation::AllocationType::SHARED_IMAGE); if (!graphicsAllocation) { errorCode.set(CL_INVALID_MEM_OBJECT); return nullptr; } graphicsAllocation->getDefaultGmm()->updateOffsetsInImgInfo(imgInfo, 0u); auto &memoryManager = *context->getMemoryManager(); if (graphicsAllocation->getDefaultGmm()->unifiedAuxTranslationCapable()) { const auto &hwInfo = context->getDevice(0)->getHardwareInfo(); const auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); graphicsAllocation->getDefaultGmm()->isRenderCompressed = hwHelper.isPageTableManagerSupported(hwInfo) ? memoryManager.mapAuxGpuVA(graphicsAllocation) : true; } const uint32_t baseMipmapIndex = 0u; const uint32_t sharedMipmapsCount = imageDesc->num_mip_levels; auto sharingHandler = new UnifiedImage(sharingFunctions, description.type); return Image::createSharedImage(context, sharingHandler, McsSurfaceInfo{}, graphicsAllocation, nullptr, flags, clSurfaceFormat, imgInfo, __GMM_NO_CUBE_MAP, baseMipmapIndex, sharedMipmapsCount); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/unified/unified_image.h000066400000000000000000000011151363734646600263320ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/unified/unified_sharing.h" namespace NEO { class Image; class Context; class UnifiedImage : public UnifiedSharing { using UnifiedSharing::UnifiedSharing; public: static Image *createSharedUnifiedImage(Context *context, cl_mem_flags flags, UnifiedSharingMemoryDescription description, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, cl_int *errcodeRet); }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/unified/unified_sharing.cpp000066400000000000000000000035621363734646600272460ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/unified/unified_sharing.h" #include "shared/source/helpers/string.h" #include "shared/source/helpers/timestamp_packet.h" #include "opencl/source/context/context.h" #include "opencl/source/sharings/sharing_factory.h" #include namespace NEO { const uint32_t UnifiedSharingFunctions::sharingId = SharingType::UNIFIED_SHARING; UnifiedSharing::UnifiedSharing(UnifiedSharingFunctions *sharingFunctions, UnifiedSharingHandleType memoryType) : sharingFunctions(sharingFunctions), memoryType(memoryType) { } void UnifiedSharing::synchronizeObject(UpdateData &updateData) { updateData.synchronizationStatus = SynchronizeStatus::ACQUIRE_SUCCESFUL; } void UnifiedSharing::releaseResource(MemObj *memObject) { } GraphicsAllocation *UnifiedSharing::createGraphicsAllocation(Context *context, UnifiedSharingMemoryDescription description, GraphicsAllocation::AllocationType allocationType) { auto memoryManager = context->getMemoryManager(); switch (description.type) { case UnifiedSharingHandleType::Win32Nt: { return memoryManager->createGraphicsAllocationFromNTHandle(description.handle, 0u); } case UnifiedSharingHandleType::LinuxFd: case UnifiedSharingHandleType::Win32Shared: { const AllocationProperties properties{0u, false, 0u, allocationType, false}; return memoryManager->createGraphicsAllocationFromSharedHandle(toOsHandle(description.handle), properties, false); } default: return nullptr; } } template <> UnifiedSharingFunctions *Context::getSharing() { UNRECOVERABLE_IF(UnifiedSharingFunctions::sharingId >= sharingFunctions.size()) return reinterpret_cast(sharingFunctions[UnifiedSharingFunctions::sharingId].get()); } } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/unified/unified_sharing.h000066400000000000000000000025141363734646600267070ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/os_library.h" #include "opencl/source/mem_obj/mem_obj.h" #include "opencl/source/sharings/sharing.h" #include "opencl/source/sharings/unified/unified_sharing_types.h" #include "CL/cl.h" #include #include #include namespace NEO { class UnifiedSharingFunctions : public SharingFunctions { public: uint32_t getId() const override { return UnifiedSharingFunctions::sharingId; } static const uint32_t sharingId; }; class UnifiedSharing : public SharingHandler { public: UnifiedSharing(UnifiedSharingFunctions *sharingFunctions, UnifiedSharingHandleType memoryType); UnifiedSharingFunctions *peekFunctionsHandler() { return sharingFunctions; } UnifiedSharingHandleType getExternalMemoryType() { return memoryType; } protected: void synchronizeObject(UpdateData &updateData) override; void releaseResource(MemObj *memObject) override; static GraphicsAllocation *createGraphicsAllocation(Context *context, UnifiedSharingMemoryDescription description, GraphicsAllocation::AllocationType allocationType); private: UnifiedSharingFunctions *sharingFunctions; UnifiedSharingHandleType memoryType; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/unified/unified_sharing_types.h000066400000000000000000000010211363734646600301230ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { using UnifiedSharingMemoryProperties = uint64_t; enum class UnifiedSharingContextType { DeviceHandle = 0x300B, DeviceGroup = 0x300C }; enum class UnifiedSharingHandleType { LinuxFd = 1, Win32Shared = 2, Win32Nt = 3 }; struct UnifiedSharingMemoryDescription { UnifiedSharingHandleType type; void *handle; unsigned long long size; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/va/000077500000000000000000000000001363734646600223615ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/sharings/va/CMakeLists.txt000066400000000000000000000014211363734646600251170ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_SHARINGS_VA ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cl_va_api.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_va_api.h ${CMAKE_CURRENT_SOURCE_DIR}/va_sharing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/va_sharing.h ${CMAKE_CURRENT_SOURCE_DIR}/va_sharing_defines.h ${CMAKE_CURRENT_SOURCE_DIR}/va_sharing_functions.cpp ${CMAKE_CURRENT_SOURCE_DIR}/va_sharing_functions.h ${CMAKE_CURRENT_SOURCE_DIR}/va_surface.cpp ${CMAKE_CURRENT_SOURCE_DIR}/va_surface.h ) set_property(GLOBAL PROPERTY RUNTIME_SRCS_SHARINGS_VA ${RUNTIME_SRCS_SHARINGS_VA}) if(NEO__LIBVA_FOUND) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_SHARINGS_VA}) endif(NEO__LIBVA_FOUND) compute-runtime-20.13.16352/opencl/source/sharings/va/cl_va_api.cpp000066400000000000000000000136501363734646600250070ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/get_info.h" #include "shared/source/utilities/api_intercept.h" #include "opencl/source/api/api.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/platform/platform.h" #include "opencl/source/sharings/va/va_sharing.h" #include "opencl/source/sharings/va/va_surface.h" #include "CL/cl.h" #include using namespace NEO; cl_mem CL_API_CALL clCreateFromVA_APIMediaSurfaceINTEL(cl_context context, cl_mem_flags flags, VASurfaceID *surface, cl_uint plane, cl_int *errcodeRet) { cl_int returnCode = CL_SUCCESS; API_ENTER(&returnCode); DBG_LOG_INPUTS("context", context, "flags", flags, "VASurfaceID", surface, "plane", plane); Context *pContext = nullptr; cl_mem image = nullptr; returnCode = validateObject(WithCastToInternal(context, &pContext)); ErrorCodeHelper err(errcodeRet, returnCode); if (returnCode != CL_SUCCESS) { return nullptr; } if (!VASurface::validate(flags, plane)) { returnCode = CL_INVALID_VALUE; err.set(returnCode); return nullptr; } image = VASurface::createSharedVaSurface(pContext, pContext->getSharing(), flags, surface, plane, errcodeRet); DBG_LOG_INPUTS("image", image); return image; } cl_int CL_API_CALL clGetDeviceIDsFromVA_APIMediaAdapterINTEL(cl_platform_id platform, cl_va_api_device_source_intel mediaAdapterType, void *mediaAdapter, cl_va_api_device_set_intel mediaAdapterSet, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) { cl_int status = CL_SUCCESS; API_ENTER(&status); DBG_LOG_INPUTS("platform", platform, "mediaAdapterType", mediaAdapterType, "mediaAdapter", mediaAdapter, "mediaAdapterSet", mediaAdapterSet, "numEntries", numEntries); Platform *pPlatform = nullptr; status = validateObjects(WithCastToInternal(platform, &pPlatform)); if (status != CL_SUCCESS) { status = CL_INVALID_PLATFORM; } else { cl_device_id device = pPlatform->getClDevice(0); GetInfoHelper::set(devices, device); GetInfoHelper::set(numDevices, 1u); } return status; } cl_int CL_API_CALL clEnqueueAcquireVA_APIMediaSurfacesINTEL(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { cl_int status = CL_SUCCESS; API_ENTER(&status); DBG_LOG_INPUTS("commandQueue", commandQueue, "numObjects", numObjects, "memObjects", FileLoggerInstance().getMemObjects(reinterpret_cast(memObjects), numObjects), "numEventsInWaitList", numEventsInWaitList, "eventWaitList", FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; status = validateObjects(WithCastToInternal(commandQueue, &pCommandQueue)); if (status == CL_SUCCESS) { status = pCommandQueue->enqueueAcquireSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_ACQUIRE_VA_API_MEDIA_SURFACES_INTEL); } return status; } cl_int CL_API_CALL clEnqueueReleaseVA_APIMediaSurfacesINTEL(cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { cl_int status = CL_SUCCESS; API_ENTER(&status); DBG_LOG_INPUTS("commandQueue", commandQueue, "numObjects", numObjects, "memObjects", FileLoggerInstance().getMemObjects(reinterpret_cast(memObjects), numObjects), "numEventsInWaitList", numEventsInWaitList, "eventWaitList", FileLoggerInstance().getEvents(reinterpret_cast(eventWaitList), numEventsInWaitList), "event", FileLoggerInstance().getEvents(reinterpret_cast(event), 1)); CommandQueue *pCommandQueue = nullptr; status = validateObjects(WithCastToInternal(commandQueue, &pCommandQueue)); if (status == CL_SUCCESS) { status = pCommandQueue->enqueueReleaseSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL); } return status; } cl_int CL_API_CALL clGetSupportedVA_APIMediaSurfaceFormatsINTEL( cl_context context, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint numEntries, VAImageFormat *vaApiFormats, cl_uint *numImageFormats) { if (numImageFormats) { *numImageFormats = 0; } Context *pContext = castToObjectOrAbort(context); auto pSharing = pContext->getSharing(); if (!pSharing) { return CL_INVALID_CONTEXT; } return pSharing->getSupportedFormats(flags, imageType, numEntries, vaApiFormats, numImageFormats); } compute-runtime-20.13.16352/opencl/source/sharings/va/cl_va_api.h000066400000000000000000000006101363734646600244440ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "CL/cl.h" #include "CL/cl_va_api_media_sharing_intel.h" cl_int CL_API_CALL clGetSupportedVA_APIMediaSurfaceFormatsINTEL( cl_context context, cl_mem_flags flags, cl_mem_object_type imageType, cl_uint numEntries, VAImageFormat *vaApiFormats, cl_uint *numImageFormats); compute-runtime-20.13.16352/opencl/source/sharings/va/enable_va.cpp000066400000000000000000000063071363734646600250070ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #ifdef LIBVA #include "opencl/source/sharings/va/enable_va.h" #include "opencl/source/api/api.h" #include "opencl/source/context/context.h" #include "opencl/source/context/context.inl" #include "opencl/source/sharings/sharing_factory.h" #include "opencl/source/sharings/sharing_factory.inl" #include "opencl/source/sharings/va/cl_va_api.h" #include "opencl/source/sharings/va/va_sharing.h" #include namespace NEO { bool VaSharingContextBuilder::processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue, cl_int &errcodeRet) { if (contextData.get() == nullptr) { contextData = std::make_unique(); } bool res = false; switch (propertyType) { case CL_CONTEXT_VA_API_DISPLAY_INTEL: contextData->vaDisplay = (VADisplay)propertyValue; res = true; break; } return res; } bool VaSharingContextBuilder::finalizeProperties(Context &context, int32_t &errcodeRet) { if (contextData.get() == nullptr) return true; if (contextData->vaDisplay) { context.registerSharing(new VASharingFunctions(contextData->vaDisplay)); if (!context.getSharing()->isValidVaDisplay()) { errcodeRet = CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL; return false; } context.getSharing()->querySupportedVaImageFormats(contextData->vaDisplay); } return true; } std::unique_ptr VaSharingBuilderFactory::createContextBuilder() { return std::make_unique(); }; std::string VaSharingBuilderFactory::getExtensions() { if (VASharingFunctions::isVaLibraryAvailable()) { return "cl_intel_va_api_media_sharing "; } return ""; } void VaSharingBuilderFactory::fillGlobalDispatchTable() { crtGlobalDispatchTable.clCreateFromVA_APIMediaSurfaceINTEL = clCreateFromVA_APIMediaSurfaceINTEL; crtGlobalDispatchTable.clGetDeviceIDsFromVA_APIMediaAdapterINTEL = clGetDeviceIDsFromVA_APIMediaAdapterINTEL; crtGlobalDispatchTable.clEnqueueReleaseVA_APIMediaSurfacesINTEL = clEnqueueReleaseVA_APIMediaSurfacesINTEL; crtGlobalDispatchTable.clEnqueueAcquireVA_APIMediaSurfacesINTEL = clEnqueueAcquireVA_APIMediaSurfacesINTEL; } #define RETURN_FUNC_PTR_IF_EXIST(name) \ { \ if (functionName == #name) { \ return ((void *)(name)); \ } \ } void *VaSharingBuilderFactory::getExtensionFunctionAddress(const std::string &functionName) { RETURN_FUNC_PTR_IF_EXIST(clCreateFromVA_APIMediaSurfaceINTEL); RETURN_FUNC_PTR_IF_EXIST(clGetDeviceIDsFromVA_APIMediaAdapterINTEL); RETURN_FUNC_PTR_IF_EXIST(clEnqueueAcquireVA_APIMediaSurfacesINTEL); RETURN_FUNC_PTR_IF_EXIST(clEnqueueReleaseVA_APIMediaSurfacesINTEL); if (DebugManager.flags.EnableFormatQuery.get()) { RETURN_FUNC_PTR_IF_EXIST(clGetSupportedVA_APIMediaSurfaceFormatsINTEL); } return nullptr; } static SharingFactory::RegisterSharing vaSharing; } // namespace NEO #endif compute-runtime-20.13.16352/opencl/source/sharings/va/enable_va.h000066400000000000000000000020201363734646600244400ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/sharing_factory.h" #include "opencl/source/sharings/va/va_sharing_defines.h" #include namespace NEO { class Context; struct VaCreateContextProperties { VADisplay vaDisplay = nullptr; }; class VaSharingContextBuilder : public SharingContextBuilder { protected: std::unique_ptr contextData; public: bool processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue, cl_int &errcodeRet) override; bool finalizeProperties(Context &context, int32_t &errcodeRet) override; }; class VaSharingBuilderFactory : public SharingBuilderFactory { public: std::unique_ptr createContextBuilder() override; std::string getExtensions() override; void fillGlobalDispatchTable() override; void *getExtensionFunctionAddress(const std::string &functionName) override; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/va/va_sharing.cpp000066400000000000000000000007471363734646600252160ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.inl" #include "opencl/source/sharings/sharing_factory.h" #include "opencl/source/sharings/va/va_sharing_defines.h" #include "opencl/source/sharings/va/va_sharing_functions.h" namespace NEO { const uint32_t VASharingFunctions::sharingId = SharingType::VA_SHARING; template VASharingFunctions *Context::getSharing(); } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/va/va_sharing.h000066400000000000000000000012731363734646600246560ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/api/cl_types.h" #include "opencl/source/sharings/sharing.h" #include "opencl/source/sharings/va/va_sharing_defines.h" #include "opencl/source/sharings/va/va_sharing_functions.h" namespace NEO { class VASharing : public SharingHandler { public: VASharing(VASharingFunctions *sharingFunctions, VAImageID imageId) : sharingFunctions(sharingFunctions), imageId(imageId){}; VASharingFunctions *peekFunctionsHandler() { return sharingFunctions; } protected: VASharingFunctions *sharingFunctions = nullptr; VAImageID imageId; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/va/va_sharing_defines.h000066400000000000000000000015211363734646600263470ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl.h" #include "CL/cl_va_api_media_sharing_intel.h" typedef int (*VADisplayIsValidPFN)(VADisplay vaDisplay); typedef VAStatus (*VADeriveImagePFN)(VADisplay vaDisplay, VASurfaceID vaSurface, VAImage *vaImage); typedef VAStatus (*VADestroyImagePFN)(VADisplay vaDisplay, VAImageID vaImageId); typedef VAStatus (*VAExtGetSurfaceHandlePFN)(VADisplay vaDisplay, VASurfaceID *vaSurface, unsigned int *handleId); typedef VAStatus (*VASyncSurfacePFN)(VADisplay vaDisplay, VASurfaceID vaSurface); typedef void *(*VAGetLibFuncPFN)(VADisplay vaDisplay, const char *func); typedef VAStatus (*VAQueryImageFormatsPFN)(VADisplay vaDisplay, VAImageFormat *formatList, int *numFormats); typedef int (*VAMaxNumImageFormatsPFN)(VADisplay vaDisplay); compute-runtime-20.13.16352/opencl/source/sharings/va/va_sharing_functions.cpp000066400000000000000000000100161363734646600272740ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "va_sharing_functions.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "opencl/source/sharings/va/va_surface.h" #include namespace Os { extern const char *libvaDllName; } namespace NEO { std::function VASharingFunctions::fdlopen = dlopen; std::function VASharingFunctions::fdlsym = dlsym; std::function VASharingFunctions::fdlclose = dlclose; VASharingFunctions::VASharingFunctions(VADisplay vaDisplay) : vaDisplay(vaDisplay) { initFunctions(); }; VASharingFunctions::~VASharingFunctions() { if (libHandle != nullptr) { fdlclose(libHandle); libHandle = nullptr; } } bool VASharingFunctions::isVaLibraryAvailable() { auto lib = fdlopen(Os::libvaDllName, RTLD_LAZY); if (lib) { fdlclose(lib); return true; } return false; } void VASharingFunctions::initFunctions() { if (DebugManager.flags.EnableVaLibCalls.get()) { libHandle = fdlopen(Os::libvaDllName, RTLD_LAZY); if (libHandle) { vaDisplayIsValidPFN = reinterpret_cast(fdlsym(libHandle, "vaDisplayIsValid")); vaDeriveImagePFN = reinterpret_cast(fdlsym(libHandle, "vaDeriveImage")); vaDestroyImagePFN = reinterpret_cast(fdlsym(libHandle, "vaDestroyImage")); vaSyncSurfacePFN = reinterpret_cast(fdlsym(libHandle, "vaSyncSurface")); vaGetLibFuncPFN = reinterpret_cast(fdlsym(libHandle, "vaGetLibFunc")); vaExtGetSurfaceHandlePFN = reinterpret_cast(getLibFunc("DdiMedia_ExtGetSurfaceHandle")); vaQueryImageFormatsPFN = reinterpret_cast(fdlsym(libHandle, "vaQueryImageFormats")); vaMaxNumImageFormatsPFN = reinterpret_cast(fdlsym(libHandle, "vaMaxNumImageFormats")); } else { vaDisplayIsValidPFN = nullptr; vaDeriveImagePFN = nullptr; vaDestroyImagePFN = nullptr; vaSyncSurfacePFN = nullptr; vaGetLibFuncPFN = nullptr; vaExtGetSurfaceHandlePFN = nullptr; vaQueryImageFormatsPFN = nullptr; vaMaxNumImageFormatsPFN = nullptr; } } } void VASharingFunctions::querySupportedVaImageFormats(VADisplay vaDisplay) { UNRECOVERABLE_IF(supportedFormats.size() != 0); int maxFormats = this->maxNumImageFormats(vaDisplay); if (maxFormats > 0) { std::unique_ptr allVaFormats(new VAImageFormat[maxFormats]); this->queryImageFormats(vaDisplay, allVaFormats.get(), &maxFormats); for (int i = 0; i < maxFormats; i++) { if (VASurface::isSupportedFourCC(allVaFormats[i].fourcc)) { supportedFormats.emplace_back(allVaFormats[i]); } } } } cl_int VASharingFunctions::getSupportedFormats(cl_mem_flags flags, cl_mem_object_type imageType, cl_uint numEntries, VAImageFormat *formats, cl_uint *numImageFormats) { if (flags != CL_MEM_READ_ONLY && flags != CL_MEM_WRITE_ONLY && flags != CL_MEM_READ_WRITE) { return CL_INVALID_VALUE; } if (imageType != CL_MEM_OBJECT_IMAGE2D) { return CL_INVALID_VALUE; } if (numImageFormats != nullptr) { *numImageFormats = static_cast(supportedFormats.size()); } if (formats != nullptr && supportedFormats.size() > 0) { uint32_t elementsToCopy = std::min(numEntries, static_cast(supportedFormats.size())); memcpy_s(formats, elementsToCopy * sizeof(VAImageFormat), &supportedFormats[0], elementsToCopy * sizeof(VAImageFormat)); } return CL_SUCCESS; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/va/va_sharing_functions.h000066400000000000000000000055771363734646600267610ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/sharing.h" #include "opencl/source/sharings/va/va_sharing_defines.h" #include #include namespace NEO { class VASharingFunctions : public SharingFunctions { public: VASharingFunctions(VADisplay vaDisplay); ~VASharingFunctions() override; uint32_t getId() const override { return VASharingFunctions::sharingId; } static const uint32_t sharingId; MOCKABLE_VIRTUAL bool isValidVaDisplay() { return vaDisplayIsValidPFN(vaDisplay) == 1; } MOCKABLE_VIRTUAL VAStatus deriveImage(VASurfaceID vaSurface, VAImage *vaImage) { return vaDeriveImagePFN(vaDisplay, vaSurface, vaImage); } MOCKABLE_VIRTUAL VAStatus destroyImage(VAImageID vaImageId) { return vaDestroyImagePFN(vaDisplay, vaImageId); } MOCKABLE_VIRTUAL VAStatus extGetSurfaceHandle(VASurfaceID *vaSurface, unsigned int *handleId) { return vaExtGetSurfaceHandlePFN(vaDisplay, vaSurface, handleId); } MOCKABLE_VIRTUAL VAStatus syncSurface(VASurfaceID vaSurface) { return vaSyncSurfacePFN(vaDisplay, vaSurface); } MOCKABLE_VIRTUAL VAStatus queryImageFormats(VADisplay vaDisplay, VAImageFormat *formatList, int *numFormats) { return vaQueryImageFormatsPFN(vaDisplay, formatList, numFormats); } MOCKABLE_VIRTUAL int maxNumImageFormats(VADisplay vaDisplay) { if (vaMaxNumImageFormatsPFN) { return vaMaxNumImageFormatsPFN(vaDisplay); } return 0; } void *getLibFunc(const char *func) { if (vaGetLibFuncPFN) { return vaGetLibFuncPFN(vaDisplay, func); } return nullptr; } void initFunctions(); void querySupportedVaImageFormats(VADisplay vaDisplay); cl_int getSupportedFormats(cl_mem_flags flags, cl_mem_object_type imageType, cl_uint numEntries, VAImageFormat *formats, cl_uint *numImageFormats); static std::function fdlopen; static std::function fdlsym; static std::function fdlclose; static bool isVaLibraryAvailable(); protected: void *libHandle = nullptr; VADisplay vaDisplay = nullptr; VADisplayIsValidPFN vaDisplayIsValidPFN = [](VADisplay vaDisplay) { return 0; }; VADeriveImagePFN vaDeriveImagePFN; VADestroyImagePFN vaDestroyImagePFN; VASyncSurfacePFN vaSyncSurfacePFN; VAExtGetSurfaceHandlePFN vaExtGetSurfaceHandlePFN; VAGetLibFuncPFN vaGetLibFuncPFN; VAQueryImageFormatsPFN vaQueryImageFormatsPFN; VAMaxNumImageFormatsPFN vaMaxNumImageFormatsPFN; std::vector supportedFormats; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/va/va_surface.cpp000066400000000000000000000122431363734646600252050ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/va/va_surface.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/helpers/get_info.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/mem_obj/image.h" namespace NEO { Image *VASurface::createSharedVaSurface(Context *context, VASharingFunctions *sharingFunctions, cl_mem_flags flags, VASurfaceID *surface, cl_uint plane, cl_int *errcodeRet) { ErrorCodeHelper errorCode(errcodeRet, CL_SUCCESS); auto memoryManager = context->getMemoryManager(); unsigned int sharedHandle = 0; VAImage vaImage = {}; cl_image_desc imgDesc = {}; cl_image_format gmmImgFormat = {CL_NV12_INTEL, CL_UNORM_INT8}; cl_channel_order channelOrder = CL_RG; cl_channel_type channelType = CL_UNORM_INT8; ImageInfo imgInfo = {}; VAImageID imageId = 0; McsSurfaceInfo mcsSurfaceInfo = {}; sharingFunctions->deriveImage(*surface, &vaImage); imageId = vaImage.image_id; imgDesc.image_width = vaImage.width; imgDesc.image_height = vaImage.height; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imgInfo.imgDesc = Image::convertDescriptor(imgDesc); if (plane == 0) { imgInfo.plane = GMM_PLANE_Y; channelOrder = CL_R; } else if (plane == 1) { imgInfo.plane = GMM_PLANE_U; channelOrder = CL_RG; } else { UNRECOVERABLE_IF(true); } auto gmmSurfaceFormat = Image::getSurfaceFormatFromTable(flags, &gmmImgFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); //vaImage.format.fourcc == VA_FOURCC_NV12 if (DebugManager.flags.EnableExtendedVaFormats.get() && vaImage.format.fourcc == VA_FOURCC_P010) { channelType = CL_UNORM_INT16; gmmSurfaceFormat = getExtendedSurfaceFormatInfo(vaImage.format.fourcc); } imgInfo.surfaceFormat = &gmmSurfaceFormat->surfaceFormat; cl_image_format imgFormat = {channelOrder, channelType}; auto imgSurfaceFormat = Image::getSurfaceFormatFromTable(flags, &imgFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); sharingFunctions->extGetSurfaceHandle(surface, &sharedHandle); AllocationProperties properties(context->getDevice(0)->getRootDeviceIndex(), false, imgInfo, GraphicsAllocation::AllocationType::SHARED_IMAGE); auto alloc = memoryManager->createGraphicsAllocationFromSharedHandle(sharedHandle, properties, false); imgDesc.image_row_pitch = imgInfo.rowPitch; imgDesc.image_slice_pitch = 0u; imgInfo.slicePitch = 0u; imgInfo.surfaceFormat = &imgSurfaceFormat->surfaceFormat; if (plane == 1) { imgDesc.image_width /= 2; imgDesc.image_height /= 2; imgInfo.offset = vaImage.offsets[1]; imgInfo.yOffset = 0; imgInfo.xOffset = 0; imgInfo.yOffsetForUVPlane = static_cast(imgInfo.offset / vaImage.pitches[0]); } imgInfo.imgDesc = Image::convertDescriptor(imgDesc); sharingFunctions->destroyImage(vaImage.image_id); auto vaSurface = new VASurface(sharingFunctions, imageId, plane, surface, context->getInteropUserSyncEnabled()); auto image = Image::createSharedImage(context, vaSurface, mcsSurfaceInfo, alloc, nullptr, flags, imgSurfaceFormat, imgInfo, __GMM_NO_CUBE_MAP, 0, 0); image->setMediaPlaneType(plane); return image; } void VASurface::synchronizeObject(UpdateData &updateData) { if (!interopUserSync) { sharingFunctions->syncSurface(*surfaceId); } updateData.synchronizationStatus = SynchronizeStatus::ACQUIRE_SUCCESFUL; } void VASurface::getMemObjectInfo(size_t ¶mValueSize, void *¶mValue) { paramValueSize = sizeof(surfaceId); paramValue = &surfaceId; } bool VASurface::validate(cl_mem_flags flags, cl_uint plane) { switch (flags) { case CL_MEM_READ_ONLY: case CL_MEM_WRITE_ONLY: case CL_MEM_READ_WRITE: break; default: return false; } if (plane > 1) { return false; } return true; } const ClSurfaceFormatInfo *VASurface::getExtendedSurfaceFormatInfo(uint32_t formatFourCC) { if (formatFourCC == VA_FOURCC_P010) { static const ClSurfaceFormatInfo formatInfo = {{CL_NV12_INTEL, CL_UNORM_INT16}, {GMM_RESOURCE_FORMAT::GMM_FORMAT_P010, static_cast(NUM_GFX3DSTATE_SURFACEFORMATS), // not used for plane images 0, 1, 2, 2}}; return &formatInfo; } return nullptr; } bool VASurface::isSupportedFourCC(int fourcc) { if ((fourcc == VA_FOURCC_NV12) || (DebugManager.flags.EnableExtendedVaFormats.get() && fourcc == VA_FOURCC_P010)) { return true; } return false; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/sharings/va/va_surface.h000066400000000000000000000023451363734646600246540ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/mem_obj/image.h" #include "opencl/source/sharings/va/va_sharing.h" namespace NEO { class Context; class Image; class VASurface : VASharing { public: static Image *createSharedVaSurface(Context *context, VASharingFunctions *sharingFunctions, cl_mem_flags flags, VASurfaceID *surface, cl_uint plane, cl_int *errcodeRet); void synchronizeObject(UpdateData &updateData) override; void getMemObjectInfo(size_t ¶mValueSize, void *¶mValue) override; static bool validate(cl_mem_flags flags, cl_uint plane); static const ClSurfaceFormatInfo *getExtendedSurfaceFormatInfo(uint32_t formatFourCC); static bool isSupportedFourCC(int fourcc); protected: VASurface(VASharingFunctions *sharingFunctions, VAImageID imageId, cl_uint plane, VASurfaceID *surfaceId, bool interopUserSync) : VASharing(sharingFunctions, imageId), plane(plane), surfaceId(surfaceId), interopUserSync(interopUserSync){}; cl_uint plane; VASurfaceID *surfaceId; bool interopUserSync; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/tbx/000077500000000000000000000000001363734646600207325ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/tbx/CMakeLists.txt000066400000000000000000000007241363734646600234750ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_TBX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/tbx_proto.h ${CMAKE_CURRENT_SOURCE_DIR}/tbx_sockets.h ${CMAKE_CURRENT_SOURCE_DIR}/tbx_sockets_imp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tbx_sockets_imp.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_TBX}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_TBX ${RUNTIME_SRCS_TBX}) compute-runtime-20.13.16352/opencl/source/tbx/tbx_proto.h000066400000000000000000000224401363734646600231250ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include enum HAS_MSG_TYPE { HAS_MMIO_REQ_TYPE = 0, HAS_MMIO_RES_TYPE = 1, HAS_GTT_REQ_TYPE = 2, HAS_GTT_RES_TYPE = 3, HAS_WRITE_DATA_REQ_TYPE = 4, HAS_READ_DATA_REQ_TYPE = 5, HAS_READ_DATA_RES_TYPE = 6, HAS_MARKER_REQ_TYPE = 7, HAS_MARKER_RES_TYPE = 8, HAS_REPORT_REND_END_REQ_TYPE = 9, HAS_REPORT_REND_END_RES_TYPE = 10, HAS_CONTROL_REQ_TYPE = 11, HAS_PARAMS_REQ_TYPE = 12, HAS_PARAMS_RES_TYPE = 13, HAS_PCICFG_REQ_TYPE = 14, HAS_PCICFG_RES_TYPE = 15, HAS_GTT_PARAMS_REQ_TYPE = 16, HAS_EVENT_REQ_TYPE = 17, HAS_INNER_VAR_REQ_TYPE = 18, HAS_INNER_VAR_RES_TYPE = 19, HAS_INNER_VAR_LIST_REQ_TYPE = 20, HAS_INNER_VAR_LIST_RES_TYPE = 21, HAS_FUNNY_IO_REQ_TYPE = 22, HAS_FUNNY_IO_RES_TYPE = 23, HAS_IO_REQ_TYPE = 24, HAS_IO_RES_TYPE = 25, HAS_RPC_REQ_TYPE = 26, HAS_RPC_RES_TYPE = 27, HAS_CL_FLUSH_REQ_TYPE = 28, HAS_CL_FLUSH_RES_TYPE = 29, HAS_SYNC_ALL_PAGES_REQ_TYPE = 30, HAS_SYNC_ALL_PAGES_RES_TYPE = 31, HAS_GD2_MESSAGE_TYPE = 32, HAS_SIMTIME_RES_TYPE = 33, HAS_RL_STATUS_RES_TYPE = 34, NUM_OF_MSG_TYPE }; struct HAS_HDR { union { uint32_t msg_type; HAS_MSG_TYPE type; }; uint32_t trans_id; uint32_t size; }; enum { MSG_TYPE_MMIO = 0, MSG_TYPE_IO, MSG_TYPE_FUNNY_IO }; struct HAS_MMIO_REQ { uint32_t write : 1; uint32_t size : 3; uint32_t dev_idx : 2; uint32_t msg_type : 3; uint32_t reserved : 7; uint32_t delay : 16; uint32_t offset; uint32_t data; enum { HAS_MSG_TYPE = HAS_MMIO_REQ_TYPE }; }; struct HAS_MMIO_EXT_REQ { struct HAS_MMIO_REQ mmio_req; uint32_t sourceid : 8; uint32_t reserved1 : 24; enum { HAS_MSG_TYPE = HAS_MMIO_REQ_TYPE }; }; struct HAS_MMIO_RES { uint32_t data; enum { HAS_MSG_TYPE = HAS_MMIO_RES_TYPE }; }; struct HAS_GTT32_REQ { uint32_t write : 1; uint32_t reserved : 31; uint32_t offset; uint32_t data; enum { HAS_MSG_TYPE = HAS_GTT_REQ_TYPE }; }; struct HAS_GTT32_RES { uint32_t data; enum { HAS_MSG_TYPE = HAS_GTT_RES_TYPE }; }; struct HAS_GTT64_REQ { uint32_t write : 1; uint32_t reserved : 31; uint32_t offset; uint32_t data; uint32_t data_h; enum { HAS_MSG_TYPE = HAS_GTT_REQ_TYPE }; }; struct HAS_GTT64_RES { uint32_t data; uint32_t data_h; enum { HAS_MSG_TYPE = HAS_GTT_RES_TYPE }; }; struct HAS_WRITE_DATA_REQ { uint32_t addr_type : 1; uint32_t mask_exist : 1; uint32_t frontdoor : 1; uint32_t take_ownership : 1; uint32_t model_owned : 1; uint32_t cacheline_disable : 1; uint32_t memory_type : 2; uint32_t reserved : 16; uint32_t address_h : 8; uint32_t address; uint32_t size; enum { HAS_MSG_TYPE = HAS_WRITE_DATA_REQ_TYPE }; }; struct HAS_READ_DATA_REQ { uint32_t addr_type : 1; uint32_t frontdoor : 1; uint32_t ownership_req : 1; uint32_t model_owned : 1; uint32_t cacheline_disable : 1; uint32_t reserved : 19; uint32_t address_h : 8; uint32_t address; uint32_t size; enum { HAS_MSG_TYPE = HAS_READ_DATA_REQ_TYPE }; }; struct HAS_READ_DATA_RES { uint32_t addr_type : 1; uint32_t mask_exist : 1; uint32_t last_page : 1; uint32_t ownership_res : 1; uint32_t reserved : 20; uint32_t address_h : 8; uint32_t address; uint32_t size; enum { HAS_MSG_TYPE = HAS_READ_DATA_RES_TYPE }; }; struct HAS_CONTROL_REQ { uint32_t reset : 1; // [0:0] uint32_t has : 1; // [1:1] uint32_t rd_on_demand : 1; // [2:2] uint32_t write_mask : 1; // [3:3] uint32_t time_adv : 1; // [4:4] uint32_t async_msg : 1; // [5:5] uint32_t quit : 1; // [6:6] uint32_t cncry_enb : 1; // [7:7] uint32_t stime_enb : 1; // [8:8] uint32_t full_reset : 1; // [9:9] uint32_t auto_ownership : 1; // [10:10] uint32_t backdoor_model : 1; // [11:11] uint32_t flush : 1; // [12:12] uint32_t reserved : 3; // [15:13] uint32_t reset_mask : 1; // [16:16] uint32_t has_mask : 1; // [17:17] uint32_t rd_on_demand_mask : 1; // [18:18] uint32_t write_mask_mask : 1; // [19:19] uint32_t time_adv_mask : 1; // [20:20] uint32_t async_msg_mask : 1; // [21:21] uint32_t quit_mask : 1; // [22:22] uint32_t cncry_enb_mask : 1; // [23:23] uint32_t stime_enb_mask : 1; // [24:24] uint32_t full_reset_mask : 1; // [25:25] uint32_t auto_ownership_mask : 1; // [26:26] uint32_t backdoor_model_mask : 1; // [27:27] uint32_t flush_mask : 1; // [28:28] uint32_t reserved_mask : 3; // [31:29] enum { HAS_MSG_TYPE = HAS_CONTROL_REQ_TYPE }; }; struct HAS_REPORT_REND_END_REQ { uint32_t timeout; enum { HAS_MSG_TYPE = HAS_REPORT_REND_END_REQ_TYPE }; }; struct HAS_REPORT_REND_END_RES { uint32_t timeout : 1; uint32_t reserved : 31; enum { HAS_MSG_TYPE = HAS_REPORT_REND_END_RES_TYPE }; }; struct HAS_PCICFG_REQ { uint32_t write : 1; uint32_t size : 3; uint32_t bus : 8; uint32_t device : 5; uint32_t function : 3; uint32_t reserved : 12; uint32_t offset; uint32_t data; enum { HAS_MSG_TYPE = HAS_PCICFG_REQ_TYPE }; }; struct HAS_PCICFG_RES { uint32_t data; }; struct HAS_GTT_PARAMS_REQ { uint32_t base; uint32_t base_h : 8; uint32_t size : 24; enum { HAS_MSG_TYPE = HAS_GTT_PARAMS_REQ_TYPE }; }; struct HAS_EVENT_OBSOLETE_REQ { uint32_t offset; uint32_t data; enum { HAS_MSG_TYPE = HAS_EVENT_REQ_TYPE }; }; struct HAS_EVENT_REQ { uint32_t offset; uint32_t data; uint32_t dev_idx : 2; uint32_t reserved : 30; enum { HAS_MSG_TYPE = HAS_EVENT_REQ_TYPE }; }; struct HAS_INNER_VAR_REQ { uint32_t write : 1; uint32_t non_dword : 16; uint32_t reserved : 15; uint32_t id; uint32_t data; enum { HAS_MSG_TYPE = HAS_INNER_VAR_REQ_TYPE }; }; struct HAS_INNER_VAR_RES { uint32_t data; enum { HAS_MSG_TYPE = HAS_INNER_VAR_RES_TYPE }; }; struct HAS_INNER_VAR_LIST_RES { uint32_t size; enum { HAS_MSG_TYPE = HAS_INNER_VAR_LIST_RES_TYPE }; }; struct HAS_INTERNAL_VAR_LIST_ENTRY_RES { uint32_t id; uint32_t min; uint32_t max; uint32_t desc_size; }; struct HAS_FUNNY_IO_REQ { uint32_t write : 1; uint32_t reserved : 28; uint32_t size : 3; uint32_t offset; uint32_t value; enum { HAS_MSG_TYPE = HAS_FUNNY_IO_REQ_TYPE }; }; struct HAS_FUNNY_IO_RES { uint32_t data; enum { HAS_MSG_TYPE = HAS_FUNNY_IO_RES_TYPE }; }; struct HAS_IO_REQ { uint32_t write : 1; uint32_t dev_idx : 2; uint32_t reserved : 26; uint32_t size : 3; uint32_t offset; uint32_t value; enum { HAS_MSG_TYPE = HAS_IO_REQ_TYPE }; }; struct HAS_IO_RES { uint32_t data; enum { HAS_MSG_TYPE = HAS_IO_RES_TYPE }; }; struct HAS_RPC_REQ { uint32_t size; enum { HAS_MSG_TYPE = HAS_RPC_REQ_TYPE }; }; struct HAS_RPC_RES { uint32_t status; uint32_t size; enum { HAS_MSG_TYPE = HAS_RPC_RES_TYPE }; }; struct HAS_CL_FLUSH_REQ { uint32_t reserved : 23; uint32_t ignore : 1; uint32_t address_h : 8; uint32_t address; uint32_t size; uint32_t delay; enum { HAS_MSG_TYPE = HAS_CL_FLUSH_REQ_TYPE }; }; struct HAS_CL_FLUSH_RES { uint32_t data; enum { HAS_MSG_TYPE = HAS_CL_FLUSH_RES_TYPE }; }; struct HAS_SIMTIME_RES { uint32_t data_l; uint32_t data_h; enum { HAS_MSG_TYPE = HAS_SIMTIME_RES_TYPE }; }; struct HAS_GD2_MESSAGE { uint32_t subOpcode; uint32_t data[1]; enum { HAS_MSG_TYPE = HAS_GD2_MESSAGE_TYPE }; }; union HAS_MSG_BODY { struct HAS_MMIO_REQ mmio_req; struct HAS_MMIO_EXT_REQ mmio_req_ext; struct HAS_MMIO_RES mmio_res; struct HAS_GTT32_REQ gtt32_req; struct HAS_GTT32_RES gtt32_res; struct HAS_GTT64_REQ gtt64_req; struct HAS_GTT64_RES gtt64_res; struct HAS_WRITE_DATA_REQ write_req; struct HAS_READ_DATA_REQ read_req; struct HAS_READ_DATA_RES read_res; struct HAS_CONTROL_REQ control_req; struct HAS_REPORT_REND_END_REQ render_req; struct HAS_REPORT_REND_END_RES render_res; struct HAS_PCICFG_REQ pcicfg_req; struct HAS_PCICFG_RES pcicfg_res; struct HAS_GTT_PARAMS_REQ gtt_params_req; struct HAS_EVENT_REQ event_req; struct HAS_EVENT_OBSOLETE_REQ event_obsolete_req; struct HAS_INNER_VAR_REQ inner_var_req; struct HAS_INNER_VAR_RES inner_var_res; struct HAS_INNER_VAR_LIST_RES inner_var_list_res; struct HAS_IO_REQ io_req; struct HAS_IO_RES io_res; struct HAS_RPC_REQ rpc_req; struct HAS_RPC_RES rpc_res; struct HAS_CL_FLUSH_REQ flush_req; struct HAS_CL_FLUSH_RES flush_res; struct HAS_SIMTIME_RES stime_res; struct HAS_GD2_MESSAGE gd2_message_req; }; struct HAS_MSG { struct HAS_HDR hdr; union HAS_MSG_BODY u; }; compute-runtime-20.13.16352/opencl/source/tbx/tbx_sockets.h000066400000000000000000000014121363734646600234310ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { class TbxSockets { protected: TbxSockets() = default; public: virtual ~TbxSockets() = default; virtual bool init(const std::string &hostNameOrIp, uint16_t port) = 0; virtual void close() = 0; virtual bool writeGTT(uint32_t offset, uint64_t entry) = 0; virtual bool readMemory(uint64_t addr, void *memory, size_t size) = 0; virtual bool writeMemory(uint64_t addr, const void *memory, size_t size, uint32_t type) = 0; virtual bool readMMIO(uint32_t offset, uint32_t *value) = 0; virtual bool writeMMIO(uint32_t offset, uint32_t value) = 0; static TbxSockets *create(); }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/tbx/tbx_sockets_imp.cpp000066400000000000000000000223051363734646600246350ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/tbx/tbx_sockets_imp.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/string.h" #ifdef WIN32 #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif typedef int socklen_t; #else #include #include #include #include #include typedef struct sockaddr SOCKADDR; #define SOCKET_ERROR -1 #define INVALID_SOCKET -1 #define WSAECONNRESET -1 #endif #include "tbx_proto.h" #include namespace NEO { TbxSocketsImp::TbxSocketsImp(std::ostream &err) : cerrStream(err) { } void TbxSocketsImp::close() { if (0 != m_socket) { #ifdef WIN32 ::shutdown(m_socket, 0x02 /*SD_BOTH*/); ::closesocket(m_socket); ::WSACleanup(); #else ::shutdown(m_socket, SHUT_RDWR); ::close(m_socket); #endif m_socket = 0; } } void TbxSocketsImp::logErrorInfo(const char *tag) { #ifdef WIN32 cerrStream << tag << "TbxSocketsImp Error: <" << WSAGetLastError() << ">" << std::endl; #else cerrStream << tag << strerror(errno) << std::endl; #endif DEBUG_BREAK_IF(!false); } bool TbxSocketsImp::init(const std::string &hostNameOrIp, uint16_t port) { do { #ifdef WIN32 WSADATA wsaData; auto iResult = ::WSAStartup(MAKEWORD(2, 2), &wsaData); if (iResult != NO_ERROR) { cerrStream << "Error at WSAStartup()" << std::endl; break; } #endif m_socket = ::socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); if (m_socket == INVALID_SOCKET) { logErrorInfo("Error at socket(): "); break; } if (!connectToServer(hostNameOrIp, port)) { break; } HAS_MSG cmd; memset(&cmd, 0, sizeof(cmd)); cmd.hdr.msg_type = HAS_CONTROL_REQ_TYPE; cmd.hdr.size = sizeof(HAS_CONTROL_REQ); cmd.hdr.trans_id = transID++; cmd.u.control_req.time_adv_mask = 1; cmd.u.control_req.time_adv = 0; cmd.u.control_req.async_msg_mask = 1; cmd.u.control_req.async_msg = 0; cmd.u.control_req.has_mask = 1; cmd.u.control_req.has = 1; sendWriteData(&cmd, sizeof(HAS_HDR) + cmd.hdr.size); } while (false); return m_socket != INVALID_SOCKET; } bool TbxSocketsImp::connectToServer(const std::string &hostNameOrIp, uint16_t port) { do { sockaddr_in clientService; if (::isalpha(hostNameOrIp.at(0))) { auto hostData = ::gethostbyname(hostNameOrIp.c_str()); if (hostData == nullptr) { cerrStream << "Host name look up failed for " << hostNameOrIp.c_str() << std::endl; break; } memcpy_s(&clientService.sin_addr, sizeof(clientService.sin_addr), hostData->h_addr, hostData->h_length); } else { clientService.sin_addr.s_addr = inet_addr(hostNameOrIp.c_str()); } clientService.sin_family = AF_INET; clientService.sin_port = htons(port); if (::connect(m_socket, (SOCKADDR *)&clientService, sizeof(clientService)) == SOCKET_ERROR) { logErrorInfo("Failed to connect: "); cerrStream << "Is TBX server process running on host system [ " << hostNameOrIp.c_str() << ", port " << port << "]?" << std::endl; break; } } while (false); return !!m_socket; } bool TbxSocketsImp::readMMIO(uint32_t offset, uint32_t *data) { bool success; do { HAS_MSG cmd; memset(&cmd, 0, sizeof(cmd)); cmd.hdr.msg_type = HAS_MMIO_REQ_TYPE; cmd.hdr.size = sizeof(HAS_MMIO_REQ); cmd.hdr.trans_id = transID++; cmd.u.mmio_req.offset = offset; cmd.u.mmio_req.data = 0; cmd.u.mmio_req.write = 0; cmd.u.mmio_req.delay = 0; cmd.u.mmio_req.msg_type = MSG_TYPE_MMIO; cmd.u.mmio_req.size = sizeof(uint32_t); success = sendWriteData(&cmd, sizeof(HAS_HDR) + cmd.hdr.size); if (!success) { break; } HAS_MSG resp; success = getResponseData((char *)(&resp), sizeof(HAS_HDR) + sizeof(HAS_MMIO_RES)); if (!success) { break; } if (resp.hdr.msg_type != HAS_MMIO_RES_TYPE || cmd.hdr.trans_id != resp.hdr.trans_id) { *data = 0xdeadbeef; success = false; break; } *data = resp.u.mmio_res.data; success = true; } while (false); DEBUG_BREAK_IF(!success); return success; } bool TbxSocketsImp::writeMMIO(uint32_t offset, uint32_t value) { HAS_MSG cmd; memset(&cmd, 0, sizeof(cmd)); cmd.hdr.msg_type = HAS_MMIO_REQ_TYPE; cmd.hdr.size = sizeof(HAS_MMIO_REQ); cmd.hdr.trans_id = transID++; cmd.u.mmio_req.msg_type = MSG_TYPE_MMIO; cmd.u.mmio_req.offset = offset; cmd.u.mmio_req.data = value; cmd.u.mmio_req.write = 1; cmd.u.mmio_req.size = sizeof(uint32_t); return sendWriteData(&cmd, sizeof(HAS_HDR) + cmd.hdr.size); } bool TbxSocketsImp::readMemory(uint64_t addrOffset, void *data, size_t size) { HAS_MSG cmd; memset(&cmd, 0, sizeof(cmd)); cmd.hdr.msg_type = HAS_READ_DATA_REQ_TYPE; cmd.hdr.trans_id = transID++; cmd.hdr.size = sizeof(HAS_READ_DATA_REQ); cmd.u.read_req.address = static_cast(addrOffset); cmd.u.read_req.address_h = static_cast(addrOffset >> 32); cmd.u.read_req.addr_type = 0; cmd.u.read_req.size = static_cast(size); cmd.u.read_req.ownership_req = 0; cmd.u.read_req.frontdoor = 0; cmd.u.read_req.cacheline_disable = cmd.u.read_req.frontdoor; bool success; do { success = sendWriteData(&cmd, sizeof(HAS_HDR) + sizeof(HAS_READ_DATA_REQ)); if (!success) { break; } HAS_MSG resp; success = getResponseData(&resp, sizeof(HAS_HDR) + sizeof(HAS_READ_DATA_RES)); if (!success) { break; } if (resp.hdr.msg_type != HAS_READ_DATA_RES_TYPE || resp.hdr.trans_id != cmd.hdr.trans_id) { cerrStream << "Out of sequence read data packet?" << std::endl; success = false; break; } success = getResponseData(data, size); } while (false); DEBUG_BREAK_IF(!success); return success; } bool TbxSocketsImp::writeMemory(uint64_t physAddr, const void *data, size_t size, uint32_t type) { HAS_MSG cmd; memset(&cmd, 0, sizeof(cmd)); cmd.hdr.msg_type = HAS_WRITE_DATA_REQ_TYPE; cmd.hdr.trans_id = transID++; cmd.hdr.size = sizeof(HAS_WRITE_DATA_REQ); cmd.u.write_req.address = static_cast(physAddr); cmd.u.write_req.address_h = static_cast(physAddr >> 32); cmd.u.write_req.addr_type = 0; cmd.u.write_req.size = static_cast(size); cmd.u.write_req.take_ownership = 0; cmd.u.write_req.frontdoor = 0; cmd.u.write_req.cacheline_disable = cmd.u.write_req.frontdoor; cmd.u.write_req.memory_type = type; bool success; do { success = sendWriteData(&cmd, sizeof(HAS_HDR) + sizeof(HAS_WRITE_DATA_REQ)); if (!success) { break; } success = sendWriteData(data, size); if (!success) { cerrStream << "Problem sending write data?" << std::endl; break; } } while (false); DEBUG_BREAK_IF(!success); return success; } bool TbxSocketsImp::writeGTT(uint32_t offset, uint64_t entry) { HAS_MSG cmd; memset(&cmd, 0, sizeof(cmd)); cmd.hdr.msg_type = HAS_GTT_REQ_TYPE; cmd.hdr.size = sizeof(HAS_GTT64_REQ); cmd.hdr.trans_id = transID++; cmd.u.gtt64_req.write = 1; cmd.u.gtt64_req.offset = offset / sizeof(uint64_t); // the TBX server expects GTT index here, not offset cmd.u.gtt64_req.data = static_cast(entry & 0xffffffff); cmd.u.gtt64_req.data_h = static_cast(entry >> 32); return sendWriteData(&cmd, sizeof(HAS_HDR) + cmd.hdr.size); } bool TbxSocketsImp::sendWriteData(const void *buffer, size_t sizeInBytes) { size_t totalSent = 0; auto dataBuffer = reinterpret_cast(buffer); do { auto bytesSent = ::send(m_socket, &dataBuffer[totalSent], static_cast(sizeInBytes - totalSent), 0); if (bytesSent == 0 || bytesSent == WSAECONNRESET) { logErrorInfo("Connection Closed."); return false; } if (bytesSent == SOCKET_ERROR) { logErrorInfo("Error on send()"); return false; } totalSent += bytesSent; } while (totalSent < sizeInBytes); return true; } bool TbxSocketsImp::getResponseData(void *buffer, size_t sizeInBytes) { size_t totalRecv = 0; auto dataBuffer = static_cast(buffer); do { auto bytesRecv = ::recv(m_socket, &dataBuffer[totalRecv], static_cast(sizeInBytes - totalRecv), 0); if (bytesRecv == 0 || bytesRecv == WSAECONNRESET) { logErrorInfo("Connection Closed."); return false; } if (bytesRecv == SOCKET_ERROR) { logErrorInfo("Error on recv()"); return false; } totalRecv += bytesRecv; } while (totalRecv < sizeInBytes); return true; } } // namespace NEO compute-runtime-20.13.16352/opencl/source/tbx/tbx_sockets_imp.h000066400000000000000000000023121363734646600242760ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/tbx/tbx_sockets.h" #include "os_socket.h" #include namespace NEO { class TbxSocketsImp : public TbxSockets { public: TbxSocketsImp(std::ostream &err = std::cerr); ~TbxSocketsImp() override = default; bool init(const std::string &hostNameOrIp, uint16_t port) override; void close() override; bool writeGTT(uint32_t gttOffset, uint64_t entry) override; bool readMemory(uint64_t offset, void *data, size_t size) override; bool writeMemory(uint64_t offset, const void *data, size_t size, uint32_t type) override; bool readMMIO(uint32_t offset, uint32_t *data) override; bool writeMMIO(uint32_t offset, uint32_t data) override; protected: std::ostream &cerrStream; SOCKET m_socket = 0; bool connectToServer(const std::string &hostNameOrIp, uint16_t port); bool sendWriteData(const void *buffer, size_t sizeInBytes); bool getResponseData(void *buffer, size_t sizeInBytes); inline uint32_t getNextTransID() { return transID++; } void logErrorInfo(const char *tag); uint32_t transID = 0; }; } // namespace NEO compute-runtime-20.13.16352/opencl/source/tracing/000077500000000000000000000000001363734646600215645ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/tracing/CMakeLists.txt000066400000000000000000000010221363734646600243170ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_TRACING ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/tracing_api.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing_api.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_handle.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_notify.h ${CMAKE_CURRENT_SOURCE_DIR}/tracing_types.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_TRACING}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_TRACING ${RUNTIME_SRCS_TRACING}) compute-runtime-20.13.16352/opencl/source/tracing/tracing_api.cpp000066400000000000000000000136371363734646600245620ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/tracing/tracing_api.h" #include "opencl/source/tracing/tracing_handle.h" #include "opencl/source/tracing/tracing_notify.h" namespace HostSideTracing { // [XYZZ..Z] - { X - enabled/disabled bit, Y - locked/unlocked bit, ZZ..Z - client count bits } std::atomic tracingState(0); std::vector tracingHandle; std::atomic tracingCorrelationId(0); bool addTracingClient() { uint32_t state = tracingState.load(std::memory_order_acquire); state = TRACING_SET_ENABLED_BIT(state); state = TRACING_UNSET_LOCKED_BIT(state); AtomicBackoff backoff; while (!tracingState.compare_exchange_weak(state, state + 1, std::memory_order_release, std::memory_order_acquire)) { if (!TRACING_GET_ENABLED_BIT(state)) { return false; } else if (TRACING_GET_LOCKED_BIT(state)) { DEBUG_BREAK_IF(TRACING_GET_CLIENT_COUNTER(state) != 0); state = TRACING_UNSET_LOCKED_BIT(state); backoff.pause(); } else { backoff.pause(); } } return true; } void removeTracingClient() { DEBUG_BREAK_IF(!TRACING_GET_ENABLED_BIT(tracingState.load(std::memory_order_acquire))); DEBUG_BREAK_IF(TRACING_GET_LOCKED_BIT(tracingState.load(std::memory_order_acquire))); DEBUG_BREAK_IF(TRACING_GET_CLIENT_COUNTER(tracingState.load(std::memory_order_acquire)) == 0); tracingState.fetch_sub(1, std::memory_order_acq_rel); } static void LockTracingState() { uint32_t state = tracingState.load(std::memory_order_acquire); state = TRACING_ZERO_CLIENT_COUNTER(state); state = TRACING_UNSET_LOCKED_BIT(state); AtomicBackoff backoff; while (!tracingState.compare_exchange_weak(state, TRACING_SET_LOCKED_BIT(state), std::memory_order_release, std::memory_order_acquire)) { state = TRACING_ZERO_CLIENT_COUNTER(state); state = TRACING_UNSET_LOCKED_BIT(state); backoff.pause(); } DEBUG_BREAK_IF(!TRACING_GET_LOCKED_BIT(tracingState.load(std::memory_order_acquire))); DEBUG_BREAK_IF(TRACING_GET_CLIENT_COUNTER(tracingState.load(std::memory_order_acquire)) > 0); } static void UnlockTracingState() { DEBUG_BREAK_IF(!TRACING_GET_LOCKED_BIT(tracingState.load(std::memory_order_acquire))); DEBUG_BREAK_IF(TRACING_GET_CLIENT_COUNTER(tracingState.load(std::memory_order_acquire)) > 0); tracingState.fetch_and(~TRACING_STATE_LOCKED_BIT, std::memory_order_acq_rel); } } // namespace HostSideTracing using namespace HostSideTracing; cl_int CL_API_CALL clCreateTracingHandleINTEL(cl_device_id device, cl_tracing_callback callback, void *userData, cl_tracing_handle *handle) { if (device == nullptr || callback == nullptr || handle == nullptr) { return CL_INVALID_VALUE; } *handle = new _cl_tracing_handle; if (*handle == nullptr) { return CL_OUT_OF_HOST_MEMORY; } (*handle)->device = device; (*handle)->handle = new TracingHandle(callback, userData); if ((*handle)->handle == nullptr) { delete *handle; return CL_OUT_OF_HOST_MEMORY; } return CL_SUCCESS; } cl_int CL_API_CALL clSetTracingPointINTEL(cl_tracing_handle handle, cl_function_id fid, cl_bool enable) { if (handle == nullptr) { return CL_INVALID_VALUE; } DEBUG_BREAK_IF(handle->handle == nullptr); if (static_cast(fid) >= CL_FUNCTION_COUNT) { return CL_INVALID_VALUE; } handle->handle->setTracingPoint(fid, enable); return CL_SUCCESS; } cl_int CL_API_CALL clDestroyTracingHandleINTEL(cl_tracing_handle handle) { if (handle == nullptr) { return CL_INVALID_VALUE; } DEBUG_BREAK_IF(handle->handle == nullptr); delete handle->handle; delete handle; return CL_SUCCESS; } cl_int CL_API_CALL clEnableTracingINTEL(cl_tracing_handle handle) { if (handle == nullptr) { return CL_INVALID_VALUE; } LockTracingState(); DEBUG_BREAK_IF(handle->handle == nullptr); for (size_t i = 0; i < tracingHandle.size(); ++i) { if (tracingHandle[i] == handle->handle) { UnlockTracingState(); return CL_INVALID_VALUE; } } if (tracingHandle.size() == TRACING_MAX_HANDLE_COUNT) { UnlockTracingState(); return CL_OUT_OF_RESOURCES; } tracingHandle.push_back(handle->handle); if (tracingHandle.size() == 1) { tracingState.fetch_or(TRACING_STATE_ENABLED_BIT, std::memory_order_acq_rel); } UnlockTracingState(); return CL_SUCCESS; } cl_int CL_API_CALL clDisableTracingINTEL(cl_tracing_handle handle) { if (handle == nullptr) { return CL_INVALID_VALUE; } LockTracingState(); DEBUG_BREAK_IF(handle->handle == nullptr); for (size_t i = 0; i < tracingHandle.size(); ++i) { if (tracingHandle[i] == handle->handle) { if (tracingHandle.size() == 1) { tracingState.fetch_and(~TRACING_STATE_ENABLED_BIT, std::memory_order_acq_rel); std::vector().swap(tracingHandle); } else { tracingHandle[i] = tracingHandle[tracingHandle.size() - 1]; tracingHandle.pop_back(); } UnlockTracingState(); return CL_SUCCESS; } } UnlockTracingState(); return CL_INVALID_VALUE; } cl_int CL_API_CALL clGetTracingStateINTEL(cl_tracing_handle handle, cl_bool *enable) { if (handle == nullptr || enable == nullptr) { return CL_INVALID_VALUE; } LockTracingState(); *enable = CL_FALSE; DEBUG_BREAK_IF(handle->handle == nullptr); for (size_t i = 0; i < tracingHandle.size(); ++i) { if (tracingHandle[i] == handle->handle) { *enable = CL_TRUE; break; } } UnlockTracingState(); return CL_SUCCESS; } compute-runtime-20.13.16352/opencl/source/tracing/tracing_api.h000066400000000000000000000050721363734646600242210ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/tracing/tracing_types.h" #ifdef __cplusplus extern "C" { #endif /*! Function creates a tracing handle object \param[in] device Device to create tracing handle for \param[in] callback User-defined callback that will be called along with traced API function \param[in] userData Pointer to any data user would like to pass into the callback, can be zero \param[out] handle Tracing handle object that describes current tracing session \return Status code for current operation Thread Safety: yes */ cl_int CL_API_CALL clCreateTracingHandleINTEL(cl_device_id device, cl_tracing_callback callback, void *userData, cl_tracing_handle *handle); /*! Function allows to specify which target API call should be traced. By default function will NOT be traced \param[in] handle Tracing handle object \param[in] fid Target function identifier \param[in] enable Flag to enable/disable tracing for target function \return Status code for current operation Thread Safety: no */ cl_int CL_API_CALL clSetTracingPointINTEL(cl_tracing_handle handle, cl_function_id fid, cl_bool enable); /*! Function destroys the tracing handle object and releases all the associated resources \param[in] handle Tracing handle object \return Status code for current operation Thread Safety: no */ cl_int CL_API_CALL clDestroyTracingHandleINTEL(cl_tracing_handle handle); /*! Function enables the tracing process for the handle. Multiple handles can be enabled at a time \param[in] handle Tracing handle object \return Status code for current operation Thread Safety: yes */ cl_int CL_API_CALL clEnableTracingINTEL(cl_tracing_handle handle); /*! Function disables the tracing process for the handle. It will wait until all currently running callbacks are done \param[in] handle Tracing handle object \return Status code for current operation Thread Safety: yes */ cl_int CL_API_CALL clDisableTracingINTEL(cl_tracing_handle handle); /*! Function requests the tracing state for the handle \param[in] handle Tracing handle object \param[out] enable Returns TRUE if tracing handle is in use and FALSE otherwise \return Status code for current operation Thread Safety: yes */ cl_int CL_API_CALL clGetTracingStateINTEL(cl_tracing_handle handle, cl_bool *enable); #ifdef __cplusplus } #endif compute-runtime-20.13.16352/opencl/source/tracing/tracing_handle.h000066400000000000000000000022001363734646600246710ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/debug_helpers.h" #include "opencl/source/tracing/tracing_types.h" #include #include namespace HostSideTracing { struct TracingHandle { public: TracingHandle(cl_tracing_callback callback, void *userData) : callback(callback), userData(userData) {} void call(cl_function_id fid, cl_callback_data *callbackData) { callback(fid, callbackData, userData); } void setTracingPoint(cl_function_id fid, bool enable) { DEBUG_BREAK_IF(static_cast(fid) >= CL_FUNCTION_COUNT); mask[static_cast(fid)] = enable; } bool getTracingPoint(cl_function_id fid) const { DEBUG_BREAK_IF(static_cast(fid) >= CL_FUNCTION_COUNT); return mask[static_cast(fid)]; } private: cl_tracing_callback callback; void *userData; std::bitset mask; }; } // namespace HostSideTracing struct _cl_tracing_handle { cl_device_id device; HostSideTracing::TracingHandle *handle; }; compute-runtime-20.13.16352/opencl/source/tracing/tracing_notify.h000066400000000000000000011025261363734646600247630ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/utilities/cpuintrinsics.h" #include "opencl/source/tracing/tracing_handle.h" #include #include #include namespace HostSideTracing { #define TRACING_SET_ENABLED_BIT(state) ((state) | (HostSideTracing::TRACING_STATE_ENABLED_BIT)) #define TRACING_UNSET_ENABLED_BIT(state) ((state) & (~HostSideTracing::TRACING_STATE_ENABLED_BIT)) #define TRACING_GET_ENABLED_BIT(state) ((state) & (HostSideTracing::TRACING_STATE_ENABLED_BIT)) #define TRACING_SET_LOCKED_BIT(state) ((state) | (HostSideTracing::TRACING_STATE_LOCKED_BIT)) #define TRACING_UNSET_LOCKED_BIT(state) ((state) & (~HostSideTracing::TRACING_STATE_LOCKED_BIT)) #define TRACING_GET_LOCKED_BIT(state) ((state) & (HostSideTracing::TRACING_STATE_LOCKED_BIT)) #define TRACING_ZERO_CLIENT_COUNTER(state) ((state) & (HostSideTracing::TRACING_STATE_ENABLED_BIT | HostSideTracing::TRACING_STATE_LOCKED_BIT)) #define TRACING_GET_CLIENT_COUNTER(state) ((state) & (~(HostSideTracing::TRACING_STATE_ENABLED_BIT | HostSideTracing::TRACING_STATE_LOCKED_BIT))) #define TRACING_ENTER(name, ...) \ bool isHostSideTracingEnabled_##name = false; \ HostSideTracing::name##Tracer tracer_##name; \ if (TRACING_GET_ENABLED_BIT(HostSideTracing::tracingState.load(std::memory_order_acquire))) { \ isHostSideTracingEnabled_##name = HostSideTracing::addTracingClient(); \ if (isHostSideTracingEnabled_##name) { \ tracer_##name.enter(__VA_ARGS__); \ } \ } #define TRACING_EXIT(name, ...) \ if (isHostSideTracingEnabled_##name) { \ tracer_##name.exit(__VA_ARGS__); \ HostSideTracing::removeTracingClient(); \ } typedef enum _tracing_notify_state_t { TRACING_NOTIFY_STATE_NOTHING_CALLED = 0, TRACING_NOTIFY_STATE_ENTER_CALLED = 1, TRACING_NOTIFY_STATE_EXIT_CALLED = 2, } tracing_notify_state_t; constexpr size_t TRACING_MAX_HANDLE_COUNT = 16; constexpr uint32_t TRACING_STATE_ENABLED_BIT = 0x80000000u; constexpr uint32_t TRACING_STATE_LOCKED_BIT = 0x40000000u; extern std::atomic tracingState; extern std::vector tracingHandle; extern std::atomic tracingCorrelationId; bool addTracingClient(); void removeTracingClient(); class AtomicBackoff { public: AtomicBackoff() {} void pause() { if (count < loopsBeforeYield) { for (uint32_t i = 0; i < count; i++) { NEO::CpuIntrinsics::pause(); } count *= 2; } else { std::this_thread::yield(); } } private: static const uint32_t loopsBeforeYield = 16; uint32_t count = 1; }; class clBuildProgramTracer { public: clBuildProgramTracer() {} void enter(cl_program *program, cl_uint *numDevices, const cl_device_id **deviceList, const char **options, void(CL_CALLBACK **funcNotify)(cl_program program, void *userData), void **userData) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.program = program; params.numDevices = numDevices; params.deviceList = deviceList; params.options = options; params.funcNotify = funcNotify; params.userData = userData; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clBuildProgram"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clBuildProgram)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clBuildProgram, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clBuildProgram)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clBuildProgram, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clBuildProgramTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clBuildProgram params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCloneKernelTracer { public: clCloneKernelTracer() {} void enter(cl_kernel *sourceKernel, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.sourceKernel = sourceKernel; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCloneKernel"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCloneKernel)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCloneKernel, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_kernel *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCloneKernel)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCloneKernel, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCloneKernelTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCloneKernel params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCompileProgramTracer { public: clCompileProgramTracer() {} void enter(cl_program *program, cl_uint *numDevices, const cl_device_id **deviceList, const char **options, cl_uint *numInputHeaders, const cl_program **inputHeaders, const char ***headerIncludeNames, void(CL_CALLBACK **funcNotify)(cl_program program, void *userData), void **userData) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.program = program; params.numDevices = numDevices; params.deviceList = deviceList; params.options = options; params.numInputHeaders = numInputHeaders; params.inputHeaders = inputHeaders; params.headerIncludeNames = headerIncludeNames; params.funcNotify = funcNotify; params.userData = userData; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCompileProgram"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCompileProgram)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCompileProgram, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCompileProgram)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCompileProgram, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCompileProgramTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCompileProgram params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateBufferTracer { public: clCreateBufferTracer() {} void enter(cl_context *context, cl_mem_flags *flags, size_t *size, void **hostPtr, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.flags = flags; params.size = size; params.hostPtr = hostPtr; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateBuffer"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateBuffer, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_mem *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateBuffer, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateBufferTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateBuffer params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateCommandQueueTracer { public: clCreateCommandQueueTracer() {} void enter(cl_context *context, cl_device_id *device, cl_command_queue_properties *properties, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.device = device; params.properties = properties; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateCommandQueue"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateCommandQueue)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateCommandQueue, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_command_queue *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateCommandQueue)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateCommandQueue, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateCommandQueueTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateCommandQueue params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateCommandQueueWithPropertiesTracer { public: clCreateCommandQueueWithPropertiesTracer() {} void enter(cl_context *context, cl_device_id *device, const cl_queue_properties **properties, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.device = device; params.properties = properties; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateCommandQueueWithProperties"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateCommandQueueWithProperties)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateCommandQueueWithProperties, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_command_queue *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateCommandQueueWithProperties)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateCommandQueueWithProperties, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateCommandQueueWithPropertiesTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateCommandQueueWithProperties params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateContextTracer { public: clCreateContextTracer() {} void enter(const cl_context_properties **properties, cl_uint *numDevices, const cl_device_id **devices, void(CL_CALLBACK **funcNotify)(const char *, const void *, size_t, void *), void **userData, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.properties = properties; params.numDevices = numDevices; params.devices = devices; params.funcNotify = funcNotify; params.userData = userData; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateContext"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateContext)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateContext, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_context *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateContext)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateContext, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateContextTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateContext params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateContextFromTypeTracer { public: clCreateContextFromTypeTracer() {} void enter(const cl_context_properties **properties, cl_device_type *deviceType, void(CL_CALLBACK **funcNotify)(const char *, const void *, size_t, void *), void **userData, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.properties = properties; params.deviceType = deviceType; params.funcNotify = funcNotify; params.userData = userData; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateContextFromType"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateContextFromType)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateContextFromType, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_context *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateContextFromType)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateContextFromType, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateContextFromTypeTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateContextFromType params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateImageTracer { public: clCreateImageTracer() {} void enter(cl_context *context, cl_mem_flags *flags, const cl_image_format **imageFormat, const cl_image_desc **imageDesc, void **hostPtr, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.flags = flags; params.imageFormat = imageFormat; params.imageDesc = imageDesc; params.hostPtr = hostPtr; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateImage"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateImage, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_mem *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateImage, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateImageTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateImage params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateImage2DTracer { public: clCreateImage2DTracer() {} void enter(cl_context *context, cl_mem_flags *flags, const cl_image_format **imageFormat, size_t *imageWidth, size_t *imageHeight, size_t *imageRowPitch, void **hostPtr, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.flags = flags; params.imageFormat = imageFormat; params.imageWidth = imageWidth; params.imageHeight = imageHeight; params.imageRowPitch = imageRowPitch; params.hostPtr = hostPtr; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateImage2D"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateImage2D)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateImage2D, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_mem *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateImage2D)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateImage2D, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateImage2DTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateImage2D params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateImage3DTracer { public: clCreateImage3DTracer() {} void enter(cl_context *context, cl_mem_flags *flags, const cl_image_format **imageFormat, size_t *imageWidth, size_t *imageHeight, size_t *imageDepth, size_t *imageRowPitch, size_t *imageSlicePitch, void **hostPtr, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.flags = flags; params.imageFormat = imageFormat; params.imageWidth = imageWidth; params.imageHeight = imageHeight; params.imageDepth = imageDepth; params.imageRowPitch = imageRowPitch; params.imageSlicePitch = imageSlicePitch; params.hostPtr = hostPtr; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateImage3D"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateImage3D)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateImage3D, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_mem *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateImage3D)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateImage3D, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateImage3DTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateImage3D params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateKernelTracer { public: clCreateKernelTracer() {} void enter(cl_program *program, const char **kernelName, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.program = program; params.kernelName = kernelName; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateKernel"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateKernel)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateKernel, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_kernel *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateKernel)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateKernel, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateKernelTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateKernel params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateKernelsInProgramTracer { public: clCreateKernelsInProgramTracer() {} void enter(cl_program *program, cl_uint *numKernels, cl_kernel **kernels, cl_uint **numKernelsRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.program = program; params.numKernels = numKernels; params.kernels = kernels; params.numKernelsRet = numKernelsRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateKernelsInProgram"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateKernelsInProgram)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateKernelsInProgram, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateKernelsInProgram)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateKernelsInProgram, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateKernelsInProgramTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateKernelsInProgram params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreatePipeTracer { public: clCreatePipeTracer() {} void enter(cl_context *context, cl_mem_flags *flags, cl_uint *pipePacketSize, cl_uint *pipeMaxPackets, const cl_pipe_properties **properties, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.flags = flags; params.pipePacketSize = pipePacketSize; params.pipeMaxPackets = pipeMaxPackets; params.properties = properties; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreatePipe"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreatePipe)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreatePipe, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_mem *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreatePipe)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreatePipe, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreatePipeTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreatePipe params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateProgramWithBinaryTracer { public: clCreateProgramWithBinaryTracer() {} void enter(cl_context *context, cl_uint *numDevices, const cl_device_id **deviceList, const size_t **lengths, const unsigned char ***binaries, cl_int **binaryStatus, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.numDevices = numDevices; params.deviceList = deviceList; params.lengths = lengths; params.binaries = binaries; params.binaryStatus = binaryStatus; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateProgramWithBinary"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateProgramWithBinary)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateProgramWithBinary, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_program *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateProgramWithBinary)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateProgramWithBinary, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateProgramWithBinaryTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateProgramWithBinary params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateProgramWithBuiltInKernelsTracer { public: clCreateProgramWithBuiltInKernelsTracer() {} void enter(cl_context *context, cl_uint *numDevices, const cl_device_id **deviceList, const char **kernelNames, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.numDevices = numDevices; params.deviceList = deviceList; params.kernelNames = kernelNames; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateProgramWithBuiltInKernels"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateProgramWithBuiltInKernels)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateProgramWithBuiltInKernels, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_program *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateProgramWithBuiltInKernels)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateProgramWithBuiltInKernels, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateProgramWithBuiltInKernelsTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateProgramWithBuiltInKernels params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateProgramWithILTracer { public: clCreateProgramWithILTracer() {} void enter(cl_context *context, const void **il, size_t *length, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.il = il; params.length = length; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateProgramWithIL"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateProgramWithIL)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateProgramWithIL, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_program *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateProgramWithIL)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateProgramWithIL, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateProgramWithILTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateProgramWithIL params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateProgramWithSourceTracer { public: clCreateProgramWithSourceTracer() {} void enter(cl_context *context, cl_uint *count, const char ***strings, const size_t **lengths, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.count = count; params.strings = strings; params.lengths = lengths; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateProgramWithSource"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateProgramWithSource)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateProgramWithSource, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_program *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateProgramWithSource)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateProgramWithSource, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateProgramWithSourceTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateProgramWithSource params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateSamplerTracer { public: clCreateSamplerTracer() {} void enter(cl_context *context, cl_bool *normalizedCoords, cl_addressing_mode *addressingMode, cl_filter_mode *filterMode, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.normalizedCoords = normalizedCoords; params.addressingMode = addressingMode; params.filterMode = filterMode; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateSampler"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateSampler)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateSampler, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_sampler *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateSampler)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateSampler, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateSamplerTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateSampler params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateSamplerWithPropertiesTracer { public: clCreateSamplerWithPropertiesTracer() {} void enter(cl_context *context, const cl_sampler_properties **samplerProperties, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.samplerProperties = samplerProperties; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateSamplerWithProperties"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateSamplerWithProperties)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateSamplerWithProperties, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_sampler *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateSamplerWithProperties)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateSamplerWithProperties, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateSamplerWithPropertiesTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateSamplerWithProperties params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateSubBufferTracer { public: clCreateSubBufferTracer() {} void enter(cl_mem *buffer, cl_mem_flags *flags, cl_buffer_create_type *bufferCreateType, const void **bufferCreateInfo, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.buffer = buffer; params.flags = flags; params.bufferCreateType = bufferCreateType; params.bufferCreateInfo = bufferCreateInfo; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateSubBuffer"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateSubBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateSubBuffer, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_mem *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateSubBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateSubBuffer, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateSubBufferTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateSubBuffer params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateUserEventTracer { public: clCreateUserEventTracer() {} void enter(cl_context *context, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateUserEvent"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateUserEvent)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateUserEvent, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_event *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateUserEvent)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateUserEvent, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateUserEventTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateUserEvent params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueBarrierTracer { public: clEnqueueBarrierTracer() {} void enter(cl_command_queue *commandQueue) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueBarrier"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueBarrier)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueBarrier, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueBarrier)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueBarrier, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueBarrierTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueBarrier params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueBarrierWithWaitListTracer { public: clEnqueueBarrierWithWaitListTracer() {} void enter(cl_command_queue *commandQueue, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueBarrierWithWaitList"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueBarrierWithWaitList)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueBarrierWithWaitList, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueBarrierWithWaitList)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueBarrierWithWaitList, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueBarrierWithWaitListTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueBarrierWithWaitList params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueCopyBufferTracer { public: clEnqueueCopyBufferTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *srcBuffer, cl_mem *dstBuffer, size_t *srcOffset, size_t *dstOffset, size_t *cb, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.srcBuffer = srcBuffer; params.dstBuffer = dstBuffer; params.srcOffset = srcOffset; params.dstOffset = dstOffset; params.cb = cb; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueCopyBuffer"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueCopyBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueCopyBuffer, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueCopyBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueCopyBuffer, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueCopyBufferTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueCopyBuffer params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueCopyBufferRectTracer { public: clEnqueueCopyBufferRectTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *srcBuffer, cl_mem *dstBuffer, const size_t **srcOrigin, const size_t **dstOrigin, const size_t **region, size_t *srcRowPitch, size_t *srcSlicePitch, size_t *dstRowPitch, size_t *dstSlicePitch, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.srcBuffer = srcBuffer; params.dstBuffer = dstBuffer; params.srcOrigin = srcOrigin; params.dstOrigin = dstOrigin; params.region = region; params.srcRowPitch = srcRowPitch; params.srcSlicePitch = srcSlicePitch; params.dstRowPitch = dstRowPitch; params.dstSlicePitch = dstSlicePitch; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueCopyBufferRect"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueCopyBufferRect)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueCopyBufferRect, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueCopyBufferRect)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueCopyBufferRect, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueCopyBufferRectTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueCopyBufferRect params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueCopyBufferToImageTracer { public: clEnqueueCopyBufferToImageTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *srcBuffer, cl_mem *dstImage, size_t *srcOffset, const size_t **dstOrigin, const size_t **region, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.srcBuffer = srcBuffer; params.dstImage = dstImage; params.srcOffset = srcOffset; params.dstOrigin = dstOrigin; params.region = region; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueCopyBufferToImage"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueCopyBufferToImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueCopyBufferToImage, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueCopyBufferToImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueCopyBufferToImage, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueCopyBufferToImageTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueCopyBufferToImage params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueCopyImageTracer { public: clEnqueueCopyImageTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *srcImage, cl_mem *dstImage, const size_t **srcOrigin, const size_t **dstOrigin, const size_t **region, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.srcImage = srcImage; params.dstImage = dstImage; params.srcOrigin = srcOrigin; params.dstOrigin = dstOrigin; params.region = region; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueCopyImage"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueCopyImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueCopyImage, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueCopyImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueCopyImage, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueCopyImageTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueCopyImage params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueCopyImageToBufferTracer { public: clEnqueueCopyImageToBufferTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *srcImage, cl_mem *dstBuffer, const size_t **srcOrigin, const size_t **region, size_t *dstOffset, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.srcImage = srcImage; params.dstBuffer = dstBuffer; params.srcOrigin = srcOrigin; params.region = region; params.dstOffset = dstOffset; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueCopyImageToBuffer"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueCopyImageToBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueCopyImageToBuffer, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueCopyImageToBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueCopyImageToBuffer, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueCopyImageToBufferTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueCopyImageToBuffer params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueFillBufferTracer { public: clEnqueueFillBufferTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *buffer, const void **pattern, size_t *patternSize, size_t *offset, size_t *size, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.buffer = buffer; params.pattern = pattern; params.patternSize = patternSize; params.offset = offset; params.size = size; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueFillBuffer"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueFillBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueFillBuffer, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueFillBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueFillBuffer, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueFillBufferTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueFillBuffer params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueFillImageTracer { public: clEnqueueFillImageTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *image, const void **fillColor, const size_t **origin, const size_t **region, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.image = image; params.fillColor = fillColor; params.origin = origin; params.region = region; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueFillImage"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueFillImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueFillImage, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueFillImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueFillImage, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueFillImageTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueFillImage params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueMapBufferTracer { public: clEnqueueMapBufferTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *buffer, cl_bool *blockingMap, cl_map_flags *mapFlags, size_t *offset, size_t *cb, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.buffer = buffer; params.blockingMap = blockingMap; params.mapFlags = mapFlags; params.offset = offset; params.cb = cb; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueMapBuffer"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueMapBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueMapBuffer, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(void **retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueMapBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueMapBuffer, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueMapBufferTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueMapBuffer params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueMapImageTracer { public: clEnqueueMapImageTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *image, cl_bool *blockingMap, cl_map_flags *mapFlags, const size_t **origin, const size_t **region, size_t **imageRowPitch, size_t **imageSlicePitch, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.image = image; params.blockingMap = blockingMap; params.mapFlags = mapFlags; params.origin = origin; params.region = region; params.imageRowPitch = imageRowPitch; params.imageSlicePitch = imageSlicePitch; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueMapImage"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueMapImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueMapImage, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(void **retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueMapImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueMapImage, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueMapImageTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueMapImage params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueMarkerTracer { public: clEnqueueMarkerTracer() {} void enter(cl_command_queue *commandQueue, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueMarker"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueMarker)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueMarker, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueMarker)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueMarker, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueMarkerTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueMarker params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueMarkerWithWaitListTracer { public: clEnqueueMarkerWithWaitListTracer() {} void enter(cl_command_queue *commandQueue, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueMarkerWithWaitList"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueMarkerWithWaitList)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueMarkerWithWaitList, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueMarkerWithWaitList)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueMarkerWithWaitList, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueMarkerWithWaitListTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueMarkerWithWaitList params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueMigrateMemObjectsTracer { public: clEnqueueMigrateMemObjectsTracer() {} void enter(cl_command_queue *commandQueue, cl_uint *numMemObjects, const cl_mem **memObjects, cl_mem_migration_flags *flags, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.numMemObjects = numMemObjects; params.memObjects = memObjects; params.flags = flags; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueMigrateMemObjects"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueMigrateMemObjects)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueMigrateMemObjects, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueMigrateMemObjects)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueMigrateMemObjects, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueMigrateMemObjectsTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueMigrateMemObjects params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueNDRangeKernelTracer { public: clEnqueueNDRangeKernelTracer() {} void enter(cl_command_queue *commandQueue, cl_kernel *kernel, cl_uint *workDim, const size_t **globalWorkOffset, const size_t **globalWorkSize, const size_t **localWorkSize, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.kernel = kernel; params.workDim = workDim; params.globalWorkOffset = globalWorkOffset; params.globalWorkSize = globalWorkSize; params.localWorkSize = localWorkSize; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueNDRangeKernel"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueNDRangeKernel)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueNDRangeKernel, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueNDRangeKernel)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueNDRangeKernel, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueNDRangeKernelTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueNDRangeKernel params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueNativeKernelTracer { public: clEnqueueNativeKernelTracer() {} void enter(cl_command_queue *commandQueue, void(CL_CALLBACK **userFunc)(void *), void **args, size_t *cbArgs, cl_uint *numMemObjects, const cl_mem **memList, const void ***argsMemLoc, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.userFunc = userFunc; params.args = args; params.cbArgs = cbArgs; params.numMemObjects = numMemObjects; params.memList = memList; params.argsMemLoc = argsMemLoc; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueNativeKernel"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueNativeKernel)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueNativeKernel, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueNativeKernel)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueNativeKernel, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueNativeKernelTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueNativeKernel params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueReadBufferTracer { public: clEnqueueReadBufferTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *buffer, cl_bool *blockingRead, size_t *offset, size_t *cb, void **ptr, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.buffer = buffer; params.blockingRead = blockingRead; params.offset = offset; params.cb = cb; params.ptr = ptr; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueReadBuffer"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueReadBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueReadBuffer, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueReadBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueReadBuffer, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueReadBufferTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueReadBuffer params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueReadBufferRectTracer { public: clEnqueueReadBufferRectTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *buffer, cl_bool *blockingRead, const size_t **bufferOrigin, const size_t **hostOrigin, const size_t **region, size_t *bufferRowPitch, size_t *bufferSlicePitch, size_t *hostRowPitch, size_t *hostSlicePitch, void **ptr, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.buffer = buffer; params.blockingRead = blockingRead; params.bufferOrigin = bufferOrigin; params.hostOrigin = hostOrigin; params.region = region; params.bufferRowPitch = bufferRowPitch; params.bufferSlicePitch = bufferSlicePitch; params.hostRowPitch = hostRowPitch; params.hostSlicePitch = hostSlicePitch; params.ptr = ptr; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueReadBufferRect"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueReadBufferRect)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueReadBufferRect, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueReadBufferRect)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueReadBufferRect, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueReadBufferRectTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueReadBufferRect params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueReadImageTracer { public: clEnqueueReadImageTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *image, cl_bool *blockingRead, const size_t **origin, const size_t **region, size_t *rowPitch, size_t *slicePitch, void **ptr, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.image = image; params.blockingRead = blockingRead; params.origin = origin; params.region = region; params.rowPitch = rowPitch; params.slicePitch = slicePitch; params.ptr = ptr; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueReadImage"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueReadImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueReadImage, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueReadImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueReadImage, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueReadImageTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueReadImage params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueSVMFreeTracer { public: clEnqueueSVMFreeTracer() {} void enter(cl_command_queue *commandQueue, cl_uint *numSvmPointers, void ***svmPointers, void(CL_CALLBACK **pfnFreeFunc)(cl_command_queue queue, cl_uint numSvmPointers, void **svmPointers, void *userData), void **userData, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.numSvmPointers = numSvmPointers; params.svmPointers = svmPointers; params.pfnFreeFunc = pfnFreeFunc; params.userData = userData; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueSVMFree"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueSVMFree)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueSVMFree, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueSVMFree)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueSVMFree, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueSVMFreeTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueSVMFree params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueSVMMapTracer { public: clEnqueueSVMMapTracer() {} void enter(cl_command_queue *commandQueue, cl_bool *blockingMap, cl_map_flags *mapFlags, void **svmPtr, size_t *size, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.blockingMap = blockingMap; params.mapFlags = mapFlags; params.svmPtr = svmPtr; params.size = size; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueSVMMap"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueSVMMap)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueSVMMap, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueSVMMap)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueSVMMap, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueSVMMapTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueSVMMap params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueSVMMemFillTracer { public: clEnqueueSVMMemFillTracer() {} void enter(cl_command_queue *commandQueue, void **svmPtr, const void **pattern, size_t *patternSize, size_t *size, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.svmPtr = svmPtr; params.pattern = pattern; params.patternSize = patternSize; params.size = size; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueSVMMemFill"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueSVMMemFill)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueSVMMemFill, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueSVMMemFill)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueSVMMemFill, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueSVMMemFillTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueSVMMemFill params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueSVMMemcpyTracer { public: clEnqueueSVMMemcpyTracer() {} void enter(cl_command_queue *commandQueue, cl_bool *blockingCopy, void **dstPtr, const void **srcPtr, size_t *size, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.blockingCopy = blockingCopy; params.dstPtr = dstPtr; params.srcPtr = srcPtr; params.size = size; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueSVMMemcpy"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueSVMMemcpy)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueSVMMemcpy, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueSVMMemcpy)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueSVMMemcpy, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueSVMMemcpyTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueSVMMemcpy params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueSVMMigrateMemTracer { public: clEnqueueSVMMigrateMemTracer() {} void enter(cl_command_queue *commandQueue, cl_uint *numSvmPointers, const void ***svmPointers, const size_t **sizes, const cl_mem_migration_flags *flags, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.numSvmPointers = numSvmPointers; params.svmPointers = svmPointers; params.sizes = sizes; params.flags = flags; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueSVMMigrateMem"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueSVMMigrateMem)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueSVMMigrateMem, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueSVMMigrateMem)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueSVMMigrateMem, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueSVMMigrateMemTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueSVMMigrateMem params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueSVMUnmapTracer { public: clEnqueueSVMUnmapTracer() {} void enter(cl_command_queue *commandQueue, void **svmPtr, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.svmPtr = svmPtr; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueSVMUnmap"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueSVMUnmap)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueSVMUnmap, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueSVMUnmap)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueSVMUnmap, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueSVMUnmapTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueSVMUnmap params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueTaskTracer { public: clEnqueueTaskTracer() {} void enter(cl_command_queue *commandQueue, cl_kernel *kernel, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.kernel = kernel; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueTask"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueTask)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueTask, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueTask)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueTask, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueTaskTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueTask params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueUnmapMemObjectTracer { public: clEnqueueUnmapMemObjectTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *memobj, void **mappedPtr, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.memobj = memobj; params.mappedPtr = mappedPtr; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueUnmapMemObject"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueUnmapMemObject)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueUnmapMemObject, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueUnmapMemObject)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueUnmapMemObject, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueUnmapMemObjectTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueUnmapMemObject params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueWaitForEventsTracer { public: clEnqueueWaitForEventsTracer() {} void enter(cl_command_queue *commandQueue, cl_uint *numEvents, const cl_event **eventList) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.numEvents = numEvents; params.eventList = eventList; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueWaitForEvents"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueWaitForEvents)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueWaitForEvents, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueWaitForEvents)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueWaitForEvents, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueWaitForEventsTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueWaitForEvents params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueWriteBufferTracer { public: clEnqueueWriteBufferTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *buffer, cl_bool *blockingWrite, size_t *offset, size_t *cb, const void **ptr, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.buffer = buffer; params.blockingWrite = blockingWrite; params.offset = offset; params.cb = cb; params.ptr = ptr; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueWriteBuffer"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueWriteBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueWriteBuffer, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueWriteBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueWriteBuffer, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueWriteBufferTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueWriteBuffer params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueWriteBufferRectTracer { public: clEnqueueWriteBufferRectTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *buffer, cl_bool *blockingWrite, const size_t **bufferOrigin, const size_t **hostOrigin, const size_t **region, size_t *bufferRowPitch, size_t *bufferSlicePitch, size_t *hostRowPitch, size_t *hostSlicePitch, const void **ptr, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.buffer = buffer; params.blockingWrite = blockingWrite; params.bufferOrigin = bufferOrigin; params.hostOrigin = hostOrigin; params.region = region; params.bufferRowPitch = bufferRowPitch; params.bufferSlicePitch = bufferSlicePitch; params.hostRowPitch = hostRowPitch; params.hostSlicePitch = hostSlicePitch; params.ptr = ptr; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueWriteBufferRect"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueWriteBufferRect)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueWriteBufferRect, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueWriteBufferRect)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueWriteBufferRect, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueWriteBufferRectTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueWriteBufferRect params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueWriteImageTracer { public: clEnqueueWriteImageTracer() {} void enter(cl_command_queue *commandQueue, cl_mem *image, cl_bool *blockingWrite, const size_t **origin, const size_t **region, size_t *inputRowPitch, size_t *inputSlicePitch, const void **ptr, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.image = image; params.blockingWrite = blockingWrite; params.origin = origin; params.region = region; params.inputRowPitch = inputRowPitch; params.inputSlicePitch = inputSlicePitch; params.ptr = ptr; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueWriteImage"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueWriteImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueWriteImage, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueWriteImage)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueWriteImage, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueWriteImageTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueWriteImage params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clFinishTracer { public: clFinishTracer() {} void enter(cl_command_queue *commandQueue) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clFinish"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clFinish)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clFinish, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clFinish)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clFinish, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clFinishTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clFinish params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clFlushTracer { public: clFlushTracer() {} void enter(cl_command_queue *commandQueue) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clFlush"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clFlush)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clFlush, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clFlush)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clFlush, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clFlushTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clFlush params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetCommandQueueInfoTracer { public: clGetCommandQueueInfoTracer() {} void enter(cl_command_queue *commandQueue, cl_command_queue_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetCommandQueueInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetCommandQueueInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetCommandQueueInfo, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetCommandQueueInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetCommandQueueInfo, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetCommandQueueInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetCommandQueueInfo params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetContextInfoTracer { public: clGetContextInfoTracer() {} void enter(cl_context *context, cl_context_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetContextInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetContextInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetContextInfo, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetContextInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetContextInfo, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetContextInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetContextInfo params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetDeviceAndHostTimerTracer { public: clGetDeviceAndHostTimerTracer() {} void enter(cl_device_id *device, cl_ulong **deviceTimestamp, cl_ulong **hostTimestamp) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.device = device; params.deviceTimestamp = deviceTimestamp; params.hostTimestamp = hostTimestamp; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetDeviceAndHostTimer"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetDeviceAndHostTimer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetDeviceAndHostTimer, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetDeviceAndHostTimer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetDeviceAndHostTimer, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetDeviceAndHostTimerTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetDeviceAndHostTimer params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetDeviceIDsTracer { public: clGetDeviceIDsTracer() {} void enter(cl_platform_id *platform, cl_device_type *deviceType, cl_uint *numEntries, cl_device_id **devices, cl_uint **numDevices) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.platform = platform; params.deviceType = deviceType; params.numEntries = numEntries; params.devices = devices; params.numDevices = numDevices; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetDeviceIDs"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetDeviceIDs)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetDeviceIDs, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetDeviceIDs)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetDeviceIDs, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetDeviceIDsTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetDeviceIDs params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetDeviceInfoTracer { public: clGetDeviceInfoTracer() {} void enter(cl_device_id *device, cl_device_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.device = device; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetDeviceInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetDeviceInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetDeviceInfo, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetDeviceInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetDeviceInfo, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetDeviceInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetDeviceInfo params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetEventInfoTracer { public: clGetEventInfoTracer() {} void enter(cl_event *event, cl_event_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.event = event; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetEventInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetEventInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetEventInfo, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetEventInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetEventInfo, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetEventInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetEventInfo params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetEventProfilingInfoTracer { public: clGetEventProfilingInfoTracer() {} void enter(cl_event *event, cl_profiling_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.event = event; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetEventProfilingInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetEventProfilingInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetEventProfilingInfo, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetEventProfilingInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetEventProfilingInfo, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetEventProfilingInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetEventProfilingInfo params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetExtensionFunctionAddressTracer { public: clGetExtensionFunctionAddressTracer() {} void enter(const char **funcName) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.funcName = funcName; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetExtensionFunctionAddress"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetExtensionFunctionAddress)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetExtensionFunctionAddress, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(void **retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetExtensionFunctionAddress)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetExtensionFunctionAddress, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetExtensionFunctionAddressTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetExtensionFunctionAddress params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetExtensionFunctionAddressForPlatformTracer { public: clGetExtensionFunctionAddressForPlatformTracer() {} void enter(cl_platform_id *platform, const char **funcName) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.platform = platform; params.funcName = funcName; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetExtensionFunctionAddressForPlatform"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetExtensionFunctionAddressForPlatform)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetExtensionFunctionAddressForPlatform, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(void **retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetExtensionFunctionAddressForPlatform)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetExtensionFunctionAddressForPlatform, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetExtensionFunctionAddressForPlatformTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetExtensionFunctionAddressForPlatform params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetHostTimerTracer { public: clGetHostTimerTracer() {} void enter(cl_device_id *device, cl_ulong **hostTimestamp) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.device = device; params.hostTimestamp = hostTimestamp; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetHostTimer"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetHostTimer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetHostTimer, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetHostTimer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetHostTimer, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetHostTimerTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetHostTimer params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetImageInfoTracer { public: clGetImageInfoTracer() {} void enter(cl_mem *image, cl_image_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.image = image; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetImageInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetImageInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetImageInfo, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetImageInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetImageInfo, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetImageInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetImageInfo params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetKernelArgInfoTracer { public: clGetKernelArgInfoTracer() {} void enter(cl_kernel *kernel, cl_uint *argIndx, cl_kernel_arg_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.kernel = kernel; params.argIndx = argIndx; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetKernelArgInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetKernelArgInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetKernelArgInfo, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetKernelArgInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetKernelArgInfo, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetKernelArgInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetKernelArgInfo params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetKernelInfoTracer { public: clGetKernelInfoTracer() {} void enter(cl_kernel *kernel, cl_kernel_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.kernel = kernel; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetKernelInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetKernelInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetKernelInfo, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetKernelInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetKernelInfo, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetKernelInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetKernelInfo params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetKernelSubGroupInfoTracer { public: clGetKernelSubGroupInfoTracer() {} void enter(cl_kernel *kernel, cl_device_id *device, cl_kernel_sub_group_info *paramName, size_t *inputValueSize, const void **inputValue, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.kernel = kernel; params.device = device; params.paramName = paramName; params.inputValueSize = inputValueSize; params.inputValue = inputValue; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetKernelSubGroupInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetKernelSubGroupInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetKernelSubGroupInfo, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetKernelSubGroupInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetKernelSubGroupInfo, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetKernelSubGroupInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetKernelSubGroupInfo params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetKernelWorkGroupInfoTracer { public: clGetKernelWorkGroupInfoTracer() {} void enter(cl_kernel *kernel, cl_device_id *device, cl_kernel_work_group_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.kernel = kernel; params.device = device; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetKernelWorkGroupInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetKernelWorkGroupInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetKernelWorkGroupInfo, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetKernelWorkGroupInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetKernelWorkGroupInfo, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetKernelWorkGroupInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetKernelWorkGroupInfo params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetMemObjectInfoTracer { public: clGetMemObjectInfoTracer() {} void enter(cl_mem *memobj, cl_mem_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.memobj = memobj; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetMemObjectInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetMemObjectInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetMemObjectInfo, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetMemObjectInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetMemObjectInfo, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetMemObjectInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetMemObjectInfo params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetPipeInfoTracer { public: clGetPipeInfoTracer() {} void enter(cl_mem *pipe, cl_pipe_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.pipe = pipe; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetPipeInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetPipeInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetPipeInfo, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetPipeInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetPipeInfo, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetPipeInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetPipeInfo params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetPlatformIDsTracer { public: clGetPlatformIDsTracer() {} void enter(cl_uint *numEntries, cl_platform_id **platforms, cl_uint **numPlatforms) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.numEntries = numEntries; params.platforms = platforms; params.numPlatforms = numPlatforms; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetPlatformIDs"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetPlatformIDs)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetPlatformIDs, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetPlatformIDs)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetPlatformIDs, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetPlatformIDsTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetPlatformIDs params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetPlatformInfoTracer { public: clGetPlatformInfoTracer() {} void enter(cl_platform_id *platform, cl_platform_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.platform = platform; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetPlatformInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetPlatformInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetPlatformInfo, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetPlatformInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetPlatformInfo, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetPlatformInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetPlatformInfo params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetProgramBuildInfoTracer { public: clGetProgramBuildInfoTracer() {} void enter(cl_program *program, cl_device_id *device, cl_program_build_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.program = program; params.device = device; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetProgramBuildInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetProgramBuildInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetProgramBuildInfo, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetProgramBuildInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetProgramBuildInfo, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetProgramBuildInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetProgramBuildInfo params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetProgramInfoTracer { public: clGetProgramInfoTracer() {} void enter(cl_program *program, cl_program_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.program = program; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetProgramInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetProgramInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetProgramInfo, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetProgramInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetProgramInfo, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetProgramInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetProgramInfo params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetSamplerInfoTracer { public: clGetSamplerInfoTracer() {} void enter(cl_sampler *sampler, cl_sampler_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.sampler = sampler; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetSamplerInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetSamplerInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetSamplerInfo, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetSamplerInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetSamplerInfo, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetSamplerInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetSamplerInfo params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetSupportedImageFormatsTracer { public: clGetSupportedImageFormatsTracer() {} void enter(cl_context *context, cl_mem_flags *flags, cl_mem_object_type *imageType, cl_uint *numEntries, cl_image_format **imageFormats, cl_uint **numImageFormats) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.flags = flags; params.imageType = imageType; params.numEntries = numEntries; params.imageFormats = imageFormats; params.numImageFormats = numImageFormats; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetSupportedImageFormats"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetSupportedImageFormats)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetSupportedImageFormats, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetSupportedImageFormats)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetSupportedImageFormats, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetSupportedImageFormatsTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetSupportedImageFormats params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clLinkProgramTracer { public: clLinkProgramTracer() {} void enter(cl_context *context, cl_uint *numDevices, const cl_device_id **deviceList, const char **options, cl_uint *numInputPrograms, const cl_program **inputPrograms, void(CL_CALLBACK **funcNotify)(cl_program program, void *userData), void **userData, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.numDevices = numDevices; params.deviceList = deviceList; params.options = options; params.numInputPrograms = numInputPrograms; params.inputPrograms = inputPrograms; params.funcNotify = funcNotify; params.userData = userData; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clLinkProgram"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clLinkProgram)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clLinkProgram, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_program *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clLinkProgram)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clLinkProgram, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clLinkProgramTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clLinkProgram params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clReleaseCommandQueueTracer { public: clReleaseCommandQueueTracer() {} void enter(cl_command_queue *commandQueue) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clReleaseCommandQueue"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseCommandQueue)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseCommandQueue, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseCommandQueue)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseCommandQueue, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clReleaseCommandQueueTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clReleaseCommandQueue params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clReleaseContextTracer { public: clReleaseContextTracer() {} void enter(cl_context *context) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clReleaseContext"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseContext)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseContext, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseContext)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseContext, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clReleaseContextTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clReleaseContext params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clReleaseDeviceTracer { public: clReleaseDeviceTracer() {} void enter(cl_device_id *device) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.device = device; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clReleaseDevice"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseDevice)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseDevice, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseDevice)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseDevice, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clReleaseDeviceTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clReleaseDevice params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clReleaseEventTracer { public: clReleaseEventTracer() {} void enter(cl_event *event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clReleaseEvent"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseEvent)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseEvent, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseEvent)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseEvent, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clReleaseEventTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clReleaseEvent params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clReleaseKernelTracer { public: clReleaseKernelTracer() {} void enter(cl_kernel *kernel) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.kernel = kernel; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clReleaseKernel"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseKernel)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseKernel, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseKernel)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseKernel, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clReleaseKernelTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clReleaseKernel params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clReleaseMemObjectTracer { public: clReleaseMemObjectTracer() {} void enter(cl_mem *memobj) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.memobj = memobj; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clReleaseMemObject"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseMemObject)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseMemObject, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseMemObject)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseMemObject, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clReleaseMemObjectTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clReleaseMemObject params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clReleaseProgramTracer { public: clReleaseProgramTracer() {} void enter(cl_program *program) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.program = program; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clReleaseProgram"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseProgram)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseProgram, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseProgram)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseProgram, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clReleaseProgramTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clReleaseProgram params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clReleaseSamplerTracer { public: clReleaseSamplerTracer() {} void enter(cl_sampler *sampler) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.sampler = sampler; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clReleaseSampler"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseSampler)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseSampler, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clReleaseSampler)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clReleaseSampler, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clReleaseSamplerTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clReleaseSampler params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clRetainCommandQueueTracer { public: clRetainCommandQueueTracer() {} void enter(cl_command_queue *commandQueue) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clRetainCommandQueue"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainCommandQueue)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainCommandQueue, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainCommandQueue)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainCommandQueue, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clRetainCommandQueueTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clRetainCommandQueue params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clRetainContextTracer { public: clRetainContextTracer() {} void enter(cl_context *context) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clRetainContext"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainContext)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainContext, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainContext)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainContext, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clRetainContextTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clRetainContext params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clRetainDeviceTracer { public: clRetainDeviceTracer() {} void enter(cl_device_id *device) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.device = device; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clRetainDevice"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainDevice)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainDevice, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainDevice)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainDevice, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clRetainDeviceTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clRetainDevice params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clRetainEventTracer { public: clRetainEventTracer() {} void enter(cl_event *event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clRetainEvent"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainEvent)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainEvent, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainEvent)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainEvent, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clRetainEventTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clRetainEvent params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clRetainKernelTracer { public: clRetainKernelTracer() {} void enter(cl_kernel *kernel) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.kernel = kernel; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clRetainKernel"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainKernel)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainKernel, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainKernel)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainKernel, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clRetainKernelTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clRetainKernel params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clRetainMemObjectTracer { public: clRetainMemObjectTracer() {} void enter(cl_mem *memobj) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.memobj = memobj; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clRetainMemObject"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainMemObject)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainMemObject, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainMemObject)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainMemObject, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clRetainMemObjectTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clRetainMemObject params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clRetainProgramTracer { public: clRetainProgramTracer() {} void enter(cl_program *program) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.program = program; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clRetainProgram"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainProgram)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainProgram, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainProgram)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainProgram, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clRetainProgramTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clRetainProgram params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clRetainSamplerTracer { public: clRetainSamplerTracer() {} void enter(cl_sampler *sampler) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.sampler = sampler; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clRetainSampler"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainSampler)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainSampler, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clRetainSampler)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clRetainSampler, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clRetainSamplerTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clRetainSampler params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clSVMAllocTracer { public: clSVMAllocTracer() {} void enter(cl_context *context, cl_svm_mem_flags *flags, size_t *size, cl_uint *alignment) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.flags = flags; params.size = size; params.alignment = alignment; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clSVMAlloc"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSVMAlloc)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSVMAlloc, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(void **retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSVMAlloc)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSVMAlloc, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clSVMAllocTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clSVMAlloc params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clSVMFreeTracer { public: clSVMFreeTracer() {} void enter(cl_context *context, void **svmPointer) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.svmPointer = svmPointer; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clSVMFree"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSVMFree)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSVMFree, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(void *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSVMFree)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSVMFree, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clSVMFreeTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clSVMFree params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clSetCommandQueuePropertyTracer { public: clSetCommandQueuePropertyTracer() {} void enter(cl_command_queue *commandQueue, cl_command_queue_properties *properties, cl_bool *enable, cl_command_queue_properties **oldProperties) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.properties = properties; params.enable = enable; params.oldProperties = oldProperties; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clSetCommandQueueProperty"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetCommandQueueProperty)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetCommandQueueProperty, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetCommandQueueProperty)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetCommandQueueProperty, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clSetCommandQueuePropertyTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clSetCommandQueueProperty params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clSetDefaultDeviceCommandQueueTracer { public: clSetDefaultDeviceCommandQueueTracer() {} void enter(cl_context *context, cl_device_id *device, cl_command_queue *commandQueue) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.device = device; params.commandQueue = commandQueue; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clSetDefaultDeviceCommandQueue"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetDefaultDeviceCommandQueue)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetDefaultDeviceCommandQueue, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetDefaultDeviceCommandQueue)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetDefaultDeviceCommandQueue, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clSetDefaultDeviceCommandQueueTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clSetDefaultDeviceCommandQueue params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clSetEventCallbackTracer { public: clSetEventCallbackTracer() {} void enter(cl_event *event, cl_int *commandExecCallbackType, void(CL_CALLBACK **funcNotify)(cl_event, cl_int, void *), void **userData) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.event = event; params.commandExecCallbackType = commandExecCallbackType; params.funcNotify = funcNotify; params.userData = userData; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clSetEventCallback"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetEventCallback)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetEventCallback, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetEventCallback)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetEventCallback, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clSetEventCallbackTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clSetEventCallback params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clSetKernelArgTracer { public: clSetKernelArgTracer() {} void enter(cl_kernel *kernel, cl_uint *argIndex, size_t *argSize, const void **argValue) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.kernel = kernel; params.argIndex = argIndex; params.argSize = argSize; params.argValue = argValue; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clSetKernelArg"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetKernelArg)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetKernelArg, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetKernelArg)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetKernelArg, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clSetKernelArgTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clSetKernelArg params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clSetKernelArgSVMPointerTracer { public: clSetKernelArgSVMPointerTracer() {} void enter(cl_kernel *kernel, cl_uint *argIndex, const void **argValue) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.kernel = kernel; params.argIndex = argIndex; params.argValue = argValue; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clSetKernelArgSVMPointer"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetKernelArgSVMPointer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetKernelArgSVMPointer, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetKernelArgSVMPointer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetKernelArgSVMPointer, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clSetKernelArgSVMPointerTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clSetKernelArgSVMPointer params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clSetKernelExecInfoTracer { public: clSetKernelExecInfoTracer() {} void enter(cl_kernel *kernel, cl_kernel_exec_info *paramName, size_t *paramValueSize, const void **paramValue) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.kernel = kernel; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clSetKernelExecInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetKernelExecInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetKernelExecInfo, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetKernelExecInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetKernelExecInfo, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clSetKernelExecInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clSetKernelExecInfo params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clSetMemObjectDestructorCallbackTracer { public: clSetMemObjectDestructorCallbackTracer() {} void enter(cl_mem *memobj, void(CL_CALLBACK **funcNotify)(cl_mem, void *), void **userData) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.memobj = memobj; params.funcNotify = funcNotify; params.userData = userData; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clSetMemObjectDestructorCallback"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetMemObjectDestructorCallback)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetMemObjectDestructorCallback, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetMemObjectDestructorCallback)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetMemObjectDestructorCallback, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clSetMemObjectDestructorCallbackTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clSetMemObjectDestructorCallback params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clSetUserEventStatusTracer { public: clSetUserEventStatusTracer() {} void enter(cl_event *event, cl_int *executionStatus) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.event = event; params.executionStatus = executionStatus; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clSetUserEventStatus"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetUserEventStatus)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetUserEventStatus, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clSetUserEventStatus)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clSetUserEventStatus, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clSetUserEventStatusTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clSetUserEventStatus params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clUnloadCompilerTracer { public: clUnloadCompilerTracer() {} void enter() { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clUnloadCompiler"; data.functionParams = nullptr; data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clUnloadCompiler)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clUnloadCompiler, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clUnloadCompiler)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clUnloadCompiler, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clUnloadCompilerTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clUnloadPlatformCompilerTracer { public: clUnloadPlatformCompilerTracer() {} void enter(cl_platform_id *platform) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.platform = platform; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clUnloadPlatformCompiler"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clUnloadPlatformCompiler)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clUnloadPlatformCompiler, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clUnloadPlatformCompiler)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clUnloadPlatformCompiler, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clUnloadPlatformCompilerTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clUnloadPlatformCompiler params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clWaitForEventsTracer { public: clWaitForEventsTracer() {} void enter(cl_uint *numEvents, const cl_event **eventList) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.numEvents = numEvents; params.eventList = eventList; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clWaitForEvents"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clWaitForEvents)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clWaitForEvents, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clWaitForEvents)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clWaitForEvents, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clWaitForEventsTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clWaitForEvents params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; #ifdef _WIN32 class clCreateFromGLBufferTracer { public: clCreateFromGLBufferTracer() {} void enter(cl_context *context, cl_mem_flags *flags, cl_GLuint *bufobj, int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.flags = flags; params.bufobj = bufobj; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateFromGLBuffer"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateFromGLBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateFromGLBuffer, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_mem *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateFromGLBuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateFromGLBuffer, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateFromGLBufferTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateFromGLBuffer params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateFromGLRenderbufferTracer { public: clCreateFromGLRenderbufferTracer() {} void enter(cl_context *context, cl_mem_flags *flags, cl_GLuint *renderbuffer, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.flags = flags; params.renderbuffer = renderbuffer; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateFromGLRenderbuffer"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateFromGLRenderbuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateFromGLRenderbuffer, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_mem *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateFromGLRenderbuffer)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateFromGLRenderbuffer, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateFromGLRenderbufferTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateFromGLRenderbuffer params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateFromGLTextureTracer { public: clCreateFromGLTextureTracer() {} void enter(cl_context *context, cl_mem_flags *flags, cl_GLenum *target, cl_GLint *miplevel, cl_GLuint *texture, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.flags = flags; params.target = target; params.miplevel = miplevel; params.texture = texture; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateFromGLTexture"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateFromGLTexture)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateFromGLTexture, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_mem *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateFromGLTexture)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateFromGLTexture, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateFromGLTextureTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateFromGLTexture params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateFromGLTexture2DTracer { public: clCreateFromGLTexture2DTracer() {} void enter(cl_context *context, cl_mem_flags *flags, cl_GLenum *target, cl_GLint *miplevel, cl_GLuint *texture, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.flags = flags; params.target = target; params.miplevel = miplevel; params.texture = texture; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateFromGLTexture2D"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateFromGLTexture2D)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateFromGLTexture2D, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_mem *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateFromGLTexture2D)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateFromGLTexture2D, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateFromGLTexture2DTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateFromGLTexture2D params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clCreateFromGLTexture3DTracer { public: clCreateFromGLTexture3DTracer() {} void enter(cl_context *context, cl_mem_flags *flags, cl_GLenum *target, cl_GLint *miplevel, cl_GLuint *texture, cl_int **errcodeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.context = context; params.flags = flags; params.target = target; params.miplevel = miplevel; params.texture = texture; params.errcodeRet = errcodeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clCreateFromGLTexture3D"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateFromGLTexture3D)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateFromGLTexture3D, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_mem *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clCreateFromGLTexture3D)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clCreateFromGLTexture3D, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clCreateFromGLTexture3DTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clCreateFromGLTexture3D params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueAcquireGLObjectsTracer { public: clEnqueueAcquireGLObjectsTracer() {} void enter(cl_command_queue *commandQueue, cl_uint *numObjects, const cl_mem **memObjects, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.numObjects = numObjects; params.memObjects = memObjects; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueAcquireGLObjects"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueAcquireGLObjects)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueAcquireGLObjects, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueAcquireGLObjects)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueAcquireGLObjects, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueAcquireGLObjectsTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueAcquireGLObjects params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clEnqueueReleaseGLObjectsTracer { public: clEnqueueReleaseGLObjectsTracer() {} void enter(cl_command_queue *commandQueue, cl_uint *numObjects, const cl_mem **memObjects, cl_uint *numEventsInWaitList, const cl_event **eventWaitList, cl_event **event) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.commandQueue = commandQueue; params.numObjects = numObjects; params.memObjects = memObjects; params.numEventsInWaitList = numEventsInWaitList; params.eventWaitList = eventWaitList; params.event = event; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clEnqueueReleaseGLObjects"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueReleaseGLObjects)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueReleaseGLObjects, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clEnqueueReleaseGLObjects)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clEnqueueReleaseGLObjects, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clEnqueueReleaseGLObjectsTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clEnqueueReleaseGLObjects params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetGLObjectInfoTracer { public: clGetGLObjectInfoTracer() {} void enter(cl_mem *memobj, cl_gl_object_type **glObjectType, cl_GLuint **glObjectName) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.memobj = memobj; params.glObjectType = glObjectType; params.glObjectName = glObjectName; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetGLObjectInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetGLObjectInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetGLObjectInfo, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetGLObjectInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetGLObjectInfo, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetGLObjectInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetGLObjectInfo params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; class clGetGLTextureInfoTracer { public: clGetGLTextureInfoTracer() {} void enter(cl_mem *memobj, cl_gl_texture_info *paramName, size_t *paramValueSize, void **paramValue, size_t **paramValueSizeRet) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_NOTHING_CALLED); params.memobj = memobj; params.paramName = paramName; params.paramValueSize = paramValueSize; params.paramValue = paramValue; params.paramValueSizeRet = paramValueSizeRet; data.site = CL_CALLBACK_SITE_ENTER; data.correlationId = tracingCorrelationId.fetch_add(1, std::memory_order_acq_rel); data.functionName = "clGetGLTextureInfo"; data.functionParams = static_cast(¶ms); data.functionReturnValue = nullptr; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetGLTextureInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetGLTextureInfo, &data); } } state = TRACING_NOTIFY_STATE_ENTER_CALLED; } void exit(cl_int *retVal) { DEBUG_BREAK_IF(state != TRACING_NOTIFY_STATE_ENTER_CALLED); data.site = CL_CALLBACK_SITE_EXIT; data.functionReturnValue = retVal; DEBUG_BREAK_IF(tracingHandle.size() == 0); DEBUG_BREAK_IF(tracingHandle.size() >= TRACING_MAX_HANDLE_COUNT); for (size_t i = 0; i < tracingHandle.size(); ++i) { TracingHandle *handle = tracingHandle[i]; DEBUG_BREAK_IF(handle == nullptr); if (handle->getTracingPoint(CL_FUNCTION_clGetGLTextureInfo)) { data.correlationData = correlationData + i; handle->call(CL_FUNCTION_clGetGLTextureInfo, &data); } } state = TRACING_NOTIFY_STATE_EXIT_CALLED; } ~clGetGLTextureInfoTracer() { DEBUG_BREAK_IF(state == TRACING_NOTIFY_STATE_ENTER_CALLED); } private: cl_params_clGetGLTextureInfo params; cl_callback_data data; uint64_t correlationData[TRACING_MAX_HANDLE_COUNT]; tracing_notify_state_t state = TRACING_NOTIFY_STATE_NOTHING_CALLED; }; #endif } // namespace HostSideTracing compute-runtime-20.13.16352/opencl/source/tracing/tracing_types.h000066400000000000000000001022201363734646600246050ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl.h" #include "CL/cl_gl.h" struct _cl_tracing_handle; typedef _cl_tracing_handle *cl_tracing_handle; //! Enumeration of callback call sites typedef enum _cl_callback_site { CL_CALLBACK_SITE_ENTER = 0, //!< Before the function CL_CALLBACK_SITE_EXIT = 1 //!< After the function } cl_callback_site; /*! \brief Callback data structure The structure contains information about the traced function. Function name allows to determine which function is currently traced. Call site is used to determine if the callback was called at the beginning or at the end of function. Correlation ID and Data fields allow to associate the callback on enter with the callback on exit and pass any piece of data between them. Function arguments and return value available both for reading and writing. Return value will be available only within on-exit callback */ typedef struct _cl_callback_data { cl_callback_site site; //!< Call site, can be ENTER or EXIT cl_uint correlationId; //!< Correlation identifier, the same for ENTER //!< and EXIT callbacks cl_ulong *correlationData; //!< Pointer to correlation data repository, //!< can be used to move data from ENTER to //!< EXIT callback const char *functionName; //!< Name of the traced function const void *functionParams; //!< Traced function arguments, should be //!< casted to appropriate params structure void *functionReturnValue; //!< Return value for the traced function } cl_callback_data; //! Enumeration of supported functions for tracing typedef enum _cl_function_id { CL_FUNCTION_clBuildProgram = 0, CL_FUNCTION_clCloneKernel = 1, CL_FUNCTION_clCompileProgram = 2, CL_FUNCTION_clCreateBuffer = 3, CL_FUNCTION_clCreateCommandQueue = 4, CL_FUNCTION_clCreateCommandQueueWithProperties = 5, CL_FUNCTION_clCreateContext = 6, CL_FUNCTION_clCreateContextFromType = 7, CL_FUNCTION_clCreateFromGLBuffer = 8, CL_FUNCTION_clCreateFromGLRenderbuffer = 9, CL_FUNCTION_clCreateFromGLTexture = 10, CL_FUNCTION_clCreateFromGLTexture2D = 11, CL_FUNCTION_clCreateFromGLTexture3D = 12, CL_FUNCTION_clCreateImage = 13, CL_FUNCTION_clCreateImage2D = 14, CL_FUNCTION_clCreateImage3D = 15, CL_FUNCTION_clCreateKernel = 16, CL_FUNCTION_clCreateKernelsInProgram = 17, CL_FUNCTION_clCreatePipe = 18, CL_FUNCTION_clCreateProgramWithBinary = 19, CL_FUNCTION_clCreateProgramWithBuiltInKernels = 20, CL_FUNCTION_clCreateProgramWithIL = 21, CL_FUNCTION_clCreateProgramWithSource = 22, CL_FUNCTION_clCreateSampler = 23, CL_FUNCTION_clCreateSamplerWithProperties = 24, CL_FUNCTION_clCreateSubBuffer = 25, CL_FUNCTION_clCreateSubDevices = 26, CL_FUNCTION_clCreateUserEvent = 27, CL_FUNCTION_clEnqueueAcquireGLObjects = 28, CL_FUNCTION_clEnqueueBarrier = 29, CL_FUNCTION_clEnqueueBarrierWithWaitList = 30, CL_FUNCTION_clEnqueueCopyBuffer = 31, CL_FUNCTION_clEnqueueCopyBufferRect = 32, CL_FUNCTION_clEnqueueCopyBufferToImage = 33, CL_FUNCTION_clEnqueueCopyImage = 34, CL_FUNCTION_clEnqueueCopyImageToBuffer = 35, CL_FUNCTION_clEnqueueFillBuffer = 36, CL_FUNCTION_clEnqueueFillImage = 37, CL_FUNCTION_clEnqueueMapBuffer = 38, CL_FUNCTION_clEnqueueMapImage = 39, CL_FUNCTION_clEnqueueMarker = 40, CL_FUNCTION_clEnqueueMarkerWithWaitList = 41, CL_FUNCTION_clEnqueueMigrateMemObjects = 42, CL_FUNCTION_clEnqueueNDRangeKernel = 43, CL_FUNCTION_clEnqueueNativeKernel = 44, CL_FUNCTION_clEnqueueReadBuffer = 45, CL_FUNCTION_clEnqueueReadBufferRect = 46, CL_FUNCTION_clEnqueueReadImage = 47, CL_FUNCTION_clEnqueueReleaseGLObjects = 48, CL_FUNCTION_clEnqueueSVMFree = 49, CL_FUNCTION_clEnqueueSVMMap = 50, CL_FUNCTION_clEnqueueSVMMemFill = 51, CL_FUNCTION_clEnqueueSVMMemcpy = 52, CL_FUNCTION_clEnqueueSVMMigrateMem = 53, CL_FUNCTION_clEnqueueSVMUnmap = 54, CL_FUNCTION_clEnqueueTask = 55, CL_FUNCTION_clEnqueueUnmapMemObject = 56, CL_FUNCTION_clEnqueueWaitForEvents = 57, CL_FUNCTION_clEnqueueWriteBuffer = 58, CL_FUNCTION_clEnqueueWriteBufferRect = 59, CL_FUNCTION_clEnqueueWriteImage = 60, CL_FUNCTION_clFinish = 61, CL_FUNCTION_clFlush = 62, CL_FUNCTION_clGetCommandQueueInfo = 63, CL_FUNCTION_clGetContextInfo = 64, CL_FUNCTION_clGetDeviceAndHostTimer = 65, CL_FUNCTION_clGetDeviceIDs = 66, CL_FUNCTION_clGetDeviceInfo = 67, CL_FUNCTION_clGetEventInfo = 68, CL_FUNCTION_clGetEventProfilingInfo = 69, CL_FUNCTION_clGetExtensionFunctionAddress = 70, CL_FUNCTION_clGetExtensionFunctionAddressForPlatform = 71, CL_FUNCTION_clGetGLObjectInfo = 72, CL_FUNCTION_clGetGLTextureInfo = 73, CL_FUNCTION_clGetHostTimer = 74, CL_FUNCTION_clGetImageInfo = 75, CL_FUNCTION_clGetKernelArgInfo = 76, CL_FUNCTION_clGetKernelInfo = 77, CL_FUNCTION_clGetKernelSubGroupInfo = 78, CL_FUNCTION_clGetKernelWorkGroupInfo = 79, CL_FUNCTION_clGetMemObjectInfo = 80, CL_FUNCTION_clGetPipeInfo = 81, CL_FUNCTION_clGetPlatformIDs = 82, CL_FUNCTION_clGetPlatformInfo = 83, CL_FUNCTION_clGetProgramBuildInfo = 84, CL_FUNCTION_clGetProgramInfo = 85, CL_FUNCTION_clGetSamplerInfo = 86, CL_FUNCTION_clGetSupportedImageFormats = 87, CL_FUNCTION_clLinkProgram = 88, CL_FUNCTION_clReleaseCommandQueue = 89, CL_FUNCTION_clReleaseContext = 90, CL_FUNCTION_clReleaseDevice = 91, CL_FUNCTION_clReleaseEvent = 92, CL_FUNCTION_clReleaseKernel = 93, CL_FUNCTION_clReleaseMemObject = 94, CL_FUNCTION_clReleaseProgram = 95, CL_FUNCTION_clReleaseSampler = 96, CL_FUNCTION_clRetainCommandQueue = 97, CL_FUNCTION_clRetainContext = 98, CL_FUNCTION_clRetainDevice = 99, CL_FUNCTION_clRetainEvent = 100, CL_FUNCTION_clRetainKernel = 101, CL_FUNCTION_clRetainMemObject = 102, CL_FUNCTION_clRetainProgram = 103, CL_FUNCTION_clRetainSampler = 104, CL_FUNCTION_clSVMAlloc = 105, CL_FUNCTION_clSVMFree = 106, CL_FUNCTION_clSetCommandQueueProperty = 107, CL_FUNCTION_clSetDefaultDeviceCommandQueue = 108, CL_FUNCTION_clSetEventCallback = 109, CL_FUNCTION_clSetKernelArg = 110, CL_FUNCTION_clSetKernelArgSVMPointer = 111, CL_FUNCTION_clSetKernelExecInfo = 112, CL_FUNCTION_clSetMemObjectDestructorCallback = 113, CL_FUNCTION_clSetUserEventStatus = 114, CL_FUNCTION_clUnloadCompiler = 115, CL_FUNCTION_clUnloadPlatformCompiler = 116, CL_FUNCTION_clWaitForEvents = 117, CL_FUNCTION_COUNT = 118, } cl_function_id; /*! User-defined tracing callback prototype \param[in] fid Identifier of the function for which the callback is called \param[in] callbackData Data structure with information about the traced function \param[in] userData User-defined data pointer passed through clCreateTracingHandleINTEL() function Thread Safety: must be guaranteed by customer */ typedef void (*cl_tracing_callback)(cl_function_id fid, cl_callback_data *callbackData, void *userData); typedef struct _cl_params_clBuildProgram { cl_program *program; cl_uint *numDevices; const cl_device_id **deviceList; const char **options; void(CL_CALLBACK **funcNotify)(cl_program program, void *userData); void **userData; } cl_params_clBuildProgram; typedef struct _cl_params_clCloneKernel { cl_kernel *sourceKernel; cl_int **errcodeRet; } cl_params_clCloneKernel; typedef struct _cl_params_clCompileProgram { cl_program *program; cl_uint *numDevices; const cl_device_id **deviceList; const char **options; cl_uint *numInputHeaders; const cl_program **inputHeaders; const char ***headerIncludeNames; void(CL_CALLBACK **funcNotify)(cl_program program, void *userData); void **userData; } cl_params_clCompileProgram; typedef struct _cl_params_clCreateBuffer { cl_context *context; cl_mem_flags *flags; size_t *size; void **hostPtr; cl_int **errcodeRet; } cl_params_clCreateBuffer; typedef struct _cl_params_clCreateCommandQueue { cl_context *context; cl_device_id *device; cl_command_queue_properties *properties; cl_int **errcodeRet; } cl_params_clCreateCommandQueue; typedef struct _cl_params_clCreateCommandQueueWithProperties { cl_context *context; cl_device_id *device; const cl_queue_properties **properties; cl_int **errcodeRet; } cl_params_clCreateCommandQueueWithProperties; typedef struct _cl_params_clCreateContext { const cl_context_properties **properties; cl_uint *numDevices; const cl_device_id **devices; void(CL_CALLBACK **funcNotify)(const char *, const void *, size_t, void *); void **userData; cl_int **errcodeRet; } cl_params_clCreateContext; typedef struct _cl_params_clCreateContextFromType { const cl_context_properties **properties; cl_device_type *deviceType; void(CL_CALLBACK **funcNotify)(const char *, const void *, size_t, void *); void **userData; cl_int **errcodeRet; } cl_params_clCreateContextFromType; typedef struct _cl_params_clCreateFromGLBuffer { cl_context *context; cl_mem_flags *flags; cl_GLuint *bufobj; int **errcodeRet; } cl_params_clCreateFromGLBuffer; typedef struct _cl_params_clCreateFromGLRenderbuffer { cl_context *context; cl_mem_flags *flags; cl_GLuint *renderbuffer; cl_int **errcodeRet; } cl_params_clCreateFromGLRenderbuffer; typedef struct _cl_params_clCreateFromGLTexture { cl_context *context; cl_mem_flags *flags; cl_GLenum *target; cl_GLint *miplevel; cl_GLuint *texture; cl_int **errcodeRet; } cl_params_clCreateFromGLTexture; typedef struct _cl_params_clCreateFromGLTexture2D { cl_context *context; cl_mem_flags *flags; cl_GLenum *target; cl_GLint *miplevel; cl_GLuint *texture; cl_int **errcodeRet; } cl_params_clCreateFromGLTexture2D; typedef struct _cl_params_clCreateFromGLTexture3D { cl_context *context; cl_mem_flags *flags; cl_GLenum *target; cl_GLint *miplevel; cl_GLuint *texture; cl_int **errcodeRet; } cl_params_clCreateFromGLTexture3D; typedef struct _cl_params_clCreateImage { cl_context *context; cl_mem_flags *flags; const cl_image_format **imageFormat; const cl_image_desc **imageDesc; void **hostPtr; cl_int **errcodeRet; } cl_params_clCreateImage; typedef struct _cl_params_clCreateImage2D { cl_context *context; cl_mem_flags *flags; const cl_image_format **imageFormat; size_t *imageWidth; size_t *imageHeight; size_t *imageRowPitch; void **hostPtr; cl_int **errcodeRet; } cl_params_clCreateImage2D; typedef struct _cl_params_clCreateImage3D { cl_context *context; cl_mem_flags *flags; const cl_image_format **imageFormat; size_t *imageWidth; size_t *imageHeight; size_t *imageDepth; size_t *imageRowPitch; size_t *imageSlicePitch; void **hostPtr; cl_int **errcodeRet; } cl_params_clCreateImage3D; typedef struct _cl_params_clCreateKernel { cl_program *program; const char **kernelName; cl_int **errcodeRet; } cl_params_clCreateKernel; typedef struct _cl_params_clCreateKernelsInProgram { cl_program *program; cl_uint *numKernels; cl_kernel **kernels; cl_uint **numKernelsRet; } cl_params_clCreateKernelsInProgram; typedef struct _cl_params_clCreatePipe { cl_context *context; cl_mem_flags *flags; cl_uint *pipePacketSize; cl_uint *pipeMaxPackets; const cl_pipe_properties **properties; cl_int **errcodeRet; } cl_params_clCreatePipe; typedef struct _cl_params_clCreateProgramWithBinary { cl_context *context; cl_uint *numDevices; const cl_device_id **deviceList; const size_t **lengths; const unsigned char ***binaries; cl_int **binaryStatus; cl_int **errcodeRet; } cl_params_clCreateProgramWithBinary; typedef struct _cl_params_clCreateProgramWithBuiltInKernels { cl_context *context; cl_uint *numDevices; const cl_device_id **deviceList; const char **kernelNames; cl_int **errcodeRet; } cl_params_clCreateProgramWithBuiltInKernels; typedef struct _cl_params_clCreateProgramWithIL { cl_context *context; const void **il; size_t *length; cl_int **errcodeRet; } cl_params_clCreateProgramWithIL; typedef struct _cl_params_clCreateProgramWithSource { cl_context *context; cl_uint *count; const char ***strings; const size_t **lengths; cl_int **errcodeRet; } cl_params_clCreateProgramWithSource; typedef struct _cl_params_clCreateSampler { cl_context *context; cl_bool *normalizedCoords; cl_addressing_mode *addressingMode; cl_filter_mode *filterMode; cl_int **errcodeRet; } cl_params_clCreateSampler; typedef struct _cl_params_clCreateSamplerWithProperties { cl_context *context; const cl_sampler_properties **samplerProperties; cl_int **errcodeRet; } cl_params_clCreateSamplerWithProperties; typedef struct _cl_params_clCreateSubBuffer { cl_mem *buffer; cl_mem_flags *flags; cl_buffer_create_type *bufferCreateType; const void **bufferCreateInfo; cl_int **errcodeRet; } cl_params_clCreateSubBuffer; typedef struct _cl_params_clCreateSubDevices { cl_device_id *inDevice; const cl_device_partition_property **properties; cl_uint *numDevices; cl_device_id **outDevices; cl_uint **numDevicesRet; } cl_params_clCreateSubDevices; typedef struct _cl_params_clCreateUserEvent { cl_context *context; cl_int **errcodeRet; } cl_params_clCreateUserEvent; typedef struct _cl_params_clEnqueueAcquireGLObjects { cl_command_queue *commandQueue; cl_uint *numObjects; const cl_mem **memObjects; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueAcquireGLObjects; typedef struct _cl_params_clEnqueueBarrier { cl_command_queue *commandQueue; } cl_params_clEnqueueBarrier; typedef struct _cl_params_clEnqueueBarrierWithWaitList { cl_command_queue *commandQueue; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueBarrierWithWaitList; typedef struct _cl_params_clEnqueueCopyBuffer { cl_command_queue *commandQueue; cl_mem *srcBuffer; cl_mem *dstBuffer; size_t *srcOffset; size_t *dstOffset; size_t *cb; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueCopyBuffer; typedef struct _cl_params_clEnqueueCopyBufferRect { cl_command_queue *commandQueue; cl_mem *srcBuffer; cl_mem *dstBuffer; const size_t **srcOrigin; const size_t **dstOrigin; const size_t **region; size_t *srcRowPitch; size_t *srcSlicePitch; size_t *dstRowPitch; size_t *dstSlicePitch; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueCopyBufferRect; typedef struct _cl_params_clEnqueueCopyBufferToImage { cl_command_queue *commandQueue; cl_mem *srcBuffer; cl_mem *dstImage; size_t *srcOffset; const size_t **dstOrigin; const size_t **region; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueCopyBufferToImage; typedef struct _cl_params_clEnqueueCopyImage { cl_command_queue *commandQueue; cl_mem *srcImage; cl_mem *dstImage; const size_t **srcOrigin; const size_t **dstOrigin; const size_t **region; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueCopyImage; typedef struct _cl_params_clEnqueueCopyImageToBuffer { cl_command_queue *commandQueue; cl_mem *srcImage; cl_mem *dstBuffer; const size_t **srcOrigin; const size_t **region; size_t *dstOffset; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueCopyImageToBuffer; typedef struct _cl_params_clEnqueueFillBuffer { cl_command_queue *commandQueue; cl_mem *buffer; const void **pattern; size_t *patternSize; size_t *offset; size_t *size; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueFillBuffer; typedef struct _cl_params_clEnqueueFillImage { cl_command_queue *commandQueue; cl_mem *image; const void **fillColor; const size_t **origin; const size_t **region; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueFillImage; typedef struct _cl_params_clEnqueueMapBuffer { cl_command_queue *commandQueue; cl_mem *buffer; cl_bool *blockingMap; cl_map_flags *mapFlags; size_t *offset; size_t *cb; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; cl_int **errcodeRet; } cl_params_clEnqueueMapBuffer; typedef struct _cl_params_clEnqueueMapImage { cl_command_queue *commandQueue; cl_mem *image; cl_bool *blockingMap; cl_map_flags *mapFlags; const size_t **origin; const size_t **region; size_t **imageRowPitch; size_t **imageSlicePitch; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; cl_int **errcodeRet; } cl_params_clEnqueueMapImage; typedef struct _cl_params_clEnqueueMarker { cl_command_queue *commandQueue; cl_event **event; } cl_params_clEnqueueMarker; typedef struct _cl_params_clEnqueueMarkerWithWaitList { cl_command_queue *commandQueue; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueMarkerWithWaitList; typedef struct _cl_params_clEnqueueMigrateMemObjects { cl_command_queue *commandQueue; cl_uint *numMemObjects; const cl_mem **memObjects; cl_mem_migration_flags *flags; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueMigrateMemObjects; typedef struct _cl_params_clEnqueueNDRangeKernel { cl_command_queue *commandQueue; cl_kernel *kernel; cl_uint *workDim; const size_t **globalWorkOffset; const size_t **globalWorkSize; const size_t **localWorkSize; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueNDRangeKernel; typedef struct _cl_params_clEnqueueNativeKernel { cl_command_queue *commandQueue; void(CL_CALLBACK **userFunc)(void *); void **args; size_t *cbArgs; cl_uint *numMemObjects; const cl_mem **memList; const void ***argsMemLoc; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueNativeKernel; typedef struct _cl_params_clEnqueueReadBuffer { cl_command_queue *commandQueue; cl_mem *buffer; cl_bool *blockingRead; size_t *offset; size_t *cb; void **ptr; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueReadBuffer; typedef struct _cl_params_clEnqueueReadBufferRect { cl_command_queue *commandQueue; cl_mem *buffer; cl_bool *blockingRead; const size_t **bufferOrigin; const size_t **hostOrigin; const size_t **region; size_t *bufferRowPitch; size_t *bufferSlicePitch; size_t *hostRowPitch; size_t *hostSlicePitch; void **ptr; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueReadBufferRect; typedef struct _cl_params_clEnqueueReadImage { cl_command_queue *commandQueue; cl_mem *image; cl_bool *blockingRead; const size_t **origin; const size_t **region; size_t *rowPitch; size_t *slicePitch; void **ptr; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueReadImage; typedef struct _cl_params_clEnqueueReleaseGLObjects { cl_command_queue *commandQueue; cl_uint *numObjects; const cl_mem **memObjects; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueReleaseGLObjects; typedef struct _cl_params_clEnqueueSVMFree { cl_command_queue *commandQueue; cl_uint *numSvmPointers; void ***svmPointers; void(CL_CALLBACK **pfnFreeFunc)(cl_command_queue queue, cl_uint numSvmPointers, void **svmPointers, void *userData); void **userData; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueSVMFree; typedef struct _cl_params_clEnqueueSVMMap { cl_command_queue *commandQueue; cl_bool *blockingMap; cl_map_flags *mapFlags; void **svmPtr; size_t *size; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueSVMMap; typedef struct _cl_params_clEnqueueSVMMemFill { cl_command_queue *commandQueue; void **svmPtr; const void **pattern; size_t *patternSize; size_t *size; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueSVMMemFill; typedef struct _cl_params_clEnqueueSVMMemcpy { cl_command_queue *commandQueue; cl_bool *blockingCopy; void **dstPtr; const void **srcPtr; size_t *size; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueSVMMemcpy; typedef struct _cl_params_clEnqueueSVMMigrateMem { cl_command_queue *commandQueue; cl_uint *numSvmPointers; const void ***svmPointers; const size_t **sizes; const cl_mem_migration_flags *flags; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueSVMMigrateMem; typedef struct _cl_params_clEnqueueSVMUnmap { cl_command_queue *commandQueue; void **svmPtr; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueSVMUnmap; typedef struct _cl_params_clEnqueueTask { cl_command_queue *commandQueue; cl_kernel *kernel; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueTask; typedef struct _cl_params_clEnqueueUnmapMemObject { cl_command_queue *commandQueue; cl_mem *memobj; void **mappedPtr; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueUnmapMemObject; typedef struct _cl_params_clEnqueueWaitForEvents { cl_command_queue *commandQueue; cl_uint *numEvents; const cl_event **eventList; } cl_params_clEnqueueWaitForEvents; typedef struct _cl_params_clEnqueueWriteBuffer { cl_command_queue *commandQueue; cl_mem *buffer; cl_bool *blockingWrite; size_t *offset; size_t *cb; const void **ptr; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueWriteBuffer; typedef struct _cl_params_clEnqueueWriteBufferRect { cl_command_queue *commandQueue; cl_mem *buffer; cl_bool *blockingWrite; const size_t **bufferOrigin; const size_t **hostOrigin; const size_t **region; size_t *bufferRowPitch; size_t *bufferSlicePitch; size_t *hostRowPitch; size_t *hostSlicePitch; const void **ptr; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueWriteBufferRect; typedef struct _cl_params_clEnqueueWriteImage { cl_command_queue *commandQueue; cl_mem *image; cl_bool *blockingWrite; const size_t **origin; const size_t **region; size_t *inputRowPitch; size_t *inputSlicePitch; const void **ptr; cl_uint *numEventsInWaitList; const cl_event **eventWaitList; cl_event **event; } cl_params_clEnqueueWriteImage; typedef struct _cl_params_clFinish { cl_command_queue *commandQueue; } cl_params_clFinish; typedef struct _cl_params_clFlush { cl_command_queue *commandQueue; } cl_params_clFlush; typedef struct _cl_params_clGetCommandQueueInfo { cl_command_queue *commandQueue; cl_command_queue_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetCommandQueueInfo; typedef struct _cl_params_clGetContextInfo { cl_context *context; cl_context_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetContextInfo; typedef struct _cl_params_clGetDeviceAndHostTimer { cl_device_id *device; cl_ulong **deviceTimestamp; cl_ulong **hostTimestamp; } cl_params_clGetDeviceAndHostTimer; typedef struct _cl_params_clGetDeviceIDs { cl_platform_id *platform; cl_device_type *deviceType; cl_uint *numEntries; cl_device_id **devices; cl_uint **numDevices; } cl_params_clGetDeviceIDs; typedef struct _cl_params_clGetDeviceInfo { cl_device_id *device; cl_device_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetDeviceInfo; typedef struct _cl_params_clGetEventInfo { cl_event *event; cl_event_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetEventInfo; typedef struct _cl_params_clGetEventProfilingInfo { cl_event *event; cl_profiling_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetEventProfilingInfo; typedef struct _cl_params_clGetExtensionFunctionAddress { const char **funcName; } cl_params_clGetExtensionFunctionAddress; typedef struct _cl_params_clGetExtensionFunctionAddressForPlatform { cl_platform_id *platform; const char **funcName; } cl_params_clGetExtensionFunctionAddressForPlatform; typedef struct _cl_params_clGetGLObjectInfo { cl_mem *memobj; cl_gl_object_type **glObjectType; cl_GLuint **glObjectName; } cl_params_clGetGLObjectInfo; typedef struct _cl_params_clGetGLTextureInfo { cl_mem *memobj; cl_gl_texture_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetGLTextureInfo; typedef struct _cl_params_clGetHostTimer { cl_device_id *device; cl_ulong **hostTimestamp; } cl_params_clGetHostTimer; typedef struct _cl_params_clGetImageInfo { cl_mem *image; cl_image_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetImageInfo; typedef struct _cl_params_clGetKernelArgInfo { cl_kernel *kernel; cl_uint *argIndx; cl_kernel_arg_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetKernelArgInfo; typedef struct _cl_params_clGetKernelInfo { cl_kernel *kernel; cl_kernel_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetKernelInfo; typedef struct _cl_params_clGetKernelSubGroupInfo { cl_kernel *kernel; cl_device_id *device; cl_kernel_sub_group_info *paramName; size_t *inputValueSize; const void **inputValue; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetKernelSubGroupInfo; typedef struct _cl_params_clGetKernelWorkGroupInfo { cl_kernel *kernel; cl_device_id *device; cl_kernel_work_group_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetKernelWorkGroupInfo; typedef struct _cl_params_clGetMemObjectInfo { cl_mem *memobj; cl_mem_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetMemObjectInfo; typedef struct _cl_params_clGetPipeInfo { cl_mem *pipe; cl_pipe_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetPipeInfo; typedef struct _cl_params_clGetPlatformIDs { cl_uint *numEntries; cl_platform_id **platforms; cl_uint **numPlatforms; } cl_params_clGetPlatformIDs; typedef struct _cl_params_clGetPlatformInfo { cl_platform_id *platform; cl_platform_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetPlatformInfo; typedef struct _cl_params_clGetProgramBuildInfo { cl_program *program; cl_device_id *device; cl_program_build_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetProgramBuildInfo; typedef struct _cl_params_clGetProgramInfo { cl_program *program; cl_program_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetProgramInfo; typedef struct _cl_params_clGetSamplerInfo { cl_sampler *sampler; cl_sampler_info *paramName; size_t *paramValueSize; void **paramValue; size_t **paramValueSizeRet; } cl_params_clGetSamplerInfo; typedef struct _cl_params_clGetSupportedImageFormats { cl_context *context; cl_mem_flags *flags; cl_mem_object_type *imageType; cl_uint *numEntries; cl_image_format **imageFormats; cl_uint **numImageFormats; } cl_params_clGetSupportedImageFormats; typedef struct _cl_params_clLinkProgram { cl_context *context; cl_uint *numDevices; const cl_device_id **deviceList; const char **options; cl_uint *numInputPrograms; const cl_program **inputPrograms; void(CL_CALLBACK **funcNotify)(cl_program program, void *userData); void **userData; cl_int **errcodeRet; } cl_params_clLinkProgram; typedef struct _cl_params_clReleaseCommandQueue { cl_command_queue *commandQueue; } cl_params_clReleaseCommandQueue; typedef struct _cl_params_clReleaseContext { cl_context *context; } cl_params_clReleaseContext; typedef struct _cl_params_clReleaseDevice { cl_device_id *device; } cl_params_clReleaseDevice; typedef struct _cl_params_clReleaseEvent { cl_event *event; } cl_params_clReleaseEvent; typedef struct _cl_params_clReleaseKernel { cl_kernel *kernel; } cl_params_clReleaseKernel; typedef struct _cl_params_clReleaseMemObject { cl_mem *memobj; } cl_params_clReleaseMemObject; typedef struct _cl_params_clReleaseProgram { cl_program *program; } cl_params_clReleaseProgram; typedef struct _cl_params_clReleaseSampler { cl_sampler *sampler; } cl_params_clReleaseSampler; typedef struct _cl_params_clRetainCommandQueue { cl_command_queue *commandQueue; } cl_params_clRetainCommandQueue; typedef struct _cl_params_clRetainContext { cl_context *context; } cl_params_clRetainContext; typedef struct _cl_params_clRetainDevice { cl_device_id *device; } cl_params_clRetainDevice; typedef struct _cl_params_clRetainEvent { cl_event *event; } cl_params_clRetainEvent; typedef struct _cl_params_clRetainKernel { cl_kernel *kernel; } cl_params_clRetainKernel; typedef struct _cl_params_clRetainMemObject { cl_mem *memobj; } cl_params_clRetainMemObject; typedef struct _cl_params_clRetainProgram { cl_program *program; } cl_params_clRetainProgram; typedef struct _cl_params_clRetainSampler { cl_sampler *sampler; } cl_params_clRetainSampler; typedef struct _cl_params_clSVMAlloc { cl_context *context; cl_svm_mem_flags *flags; size_t *size; cl_uint *alignment; } cl_params_clSVMAlloc; typedef struct _cl_params_clSVMFree { cl_context *context; void **svmPointer; } cl_params_clSVMFree; typedef struct _cl_params_clSetCommandQueueProperty { cl_command_queue *commandQueue; cl_command_queue_properties *properties; cl_bool *enable; cl_command_queue_properties **oldProperties; } cl_params_clSetCommandQueueProperty; typedef struct _cl_params_clSetDefaultDeviceCommandQueue { cl_context *context; cl_device_id *device; cl_command_queue *commandQueue; } cl_params_clSetDefaultDeviceCommandQueue; typedef struct _cl_params_clSetEventCallback { cl_event *event; cl_int *commandExecCallbackType; void(CL_CALLBACK **funcNotify)(cl_event, cl_int, void *); void **userData; } cl_params_clSetEventCallback; typedef struct _cl_params_clSetKernelArg { cl_kernel *kernel; cl_uint *argIndex; size_t *argSize; const void **argValue; } cl_params_clSetKernelArg; typedef struct _cl_params_clSetKernelArgSVMPointer { cl_kernel *kernel; cl_uint *argIndex; const void **argValue; } cl_params_clSetKernelArgSVMPointer; typedef struct _cl_params_clSetKernelExecInfo { cl_kernel *kernel; cl_kernel_exec_info *paramName; size_t *paramValueSize; const void **paramValue; } cl_params_clSetKernelExecInfo; typedef struct _cl_params_clSetMemObjectDestructorCallback { cl_mem *memobj; void(CL_CALLBACK **funcNotify)(cl_mem, void *); void **userData; } cl_params_clSetMemObjectDestructorCallback; typedef struct _cl_params_clSetUserEventStatus { cl_event *event; cl_int *executionStatus; } cl_params_clSetUserEventStatus; typedef struct _cl_params_clUnloadCompiler { } cl_params_clUnloadCompiler; typedef struct _cl_params_clUnloadPlatformCompiler { cl_platform_id *platform; } cl_params_clUnloadPlatformCompiler; typedef struct _cl_params_clWaitForEvents { cl_uint *numEvents; const cl_event **eventList; } cl_params_clWaitForEvents; compute-runtime-20.13.16352/opencl/source/utilities/000077500000000000000000000000001363734646600221505ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/source/utilities/CMakeLists.txt000066400000000000000000000011171363734646600247100ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(RUNTIME_SRCS_UTILITIES_BASE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/logger.cpp ${CMAKE_CURRENT_SOURCE_DIR}/logger.h ) set_property(GLOBAL PROPERTY RUNTIME_SRCS_UTILITIES_BASE ${RUNTIME_SRCS_UTILITIES_BASE}) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_UTILITIES_BASE}) if(WIN32) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${NEO_CORE_UTILITIES_WINDOWS}) else() target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${NEO_CORE_UTILITIES_LINUX}) endif() compute-runtime-20.13.16352/opencl/source/utilities/logger.cpp000066400000000000000000000303671363734646600241440ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/utilities/logger.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/timestamp_packet.h" #include "opencl/source/event/event.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/mem_obj.h" #include #include namespace NEO { FileLogger &FileLoggerInstance() { static FileLogger fileLoggerInstance(std::string("igdrcl.log"), DebugManager.flags); return fileLoggerInstance; } template FileLogger::FileLogger(std::string filename, const DebugVariables &flags) { logFileName = filename; std::remove(logFileName.c_str()); dumpKernels = flags.DumpKernels.get(); dumpKernelArgsEnabled = flags.DumpKernelArgs.get(); logApiCalls = flags.LogApiCalls.get(); logAllocationMemoryPool = flags.LogAllocationMemoryPool.get(); } template FileLogger::~FileLogger() = default; template void FileLogger::writeToFile(std::string filename, const char *str, size_t length, std::ios_base::openmode mode) { std::ofstream outFile(filename, mode); if (outFile.is_open()) { outFile.write(str, length); outFile.close(); } } template void FileLogger::dumpKernel(const std::string &name, const std::string &src) { if (false == enabled()) { return; } if (dumpKernels) { DBG_LOG(LogApiCalls, "Kernel size", src.size(), src.c_str()); writeToFile(name + ".txt", src.c_str(), src.size(), std::ios::trunc); } } template void FileLogger::logApiCall(const char *function, bool enter, int32_t errorCode) { if (false == enabled()) { return; } if (logApiCalls) { std::unique_lock theLock(mtx); std::thread::id thisThread = std::this_thread::get_id(); std::stringstream ss; ss << "ThreadID: " << thisThread << " "; if (enter) ss << "Function Enter: "; else ss << "Function Leave (" << errorCode << "): "; ss << function << std::endl; auto str = ss.str(); writeToFile(logFileName, str.c_str(), str.size(), std::ios::app); } } template void FileLogger::logAllocation(GraphicsAllocation const *graphicsAllocation) { if (false == enabled()) { return; } if (logAllocationMemoryPool) { std::thread::id thisThread = std::this_thread::get_id(); std::stringstream ss; ss << " ThreadID: " << thisThread; ss << " AllocationType: " << getAllocationTypeString(graphicsAllocation); ss << " MemoryPool: " << graphicsAllocation->getMemoryPool(); ss << graphicsAllocation->getAllocationInfoString(); ss << std::endl; auto str = ss.str(); std::unique_lock theLock(mtx); writeToFile(logFileName, str.c_str(), str.size(), std::ios::app); } } template size_t FileLogger::getInput(const size_t *input, int32_t index) { if (enabled() == false) return 0; return input != nullptr ? input[index] : 0; } template const std::string FileLogger::getEvents(const uintptr_t *input, uint32_t numOfEvents) { if (false == enabled()) { return ""; } std::stringstream os; for (uint32_t i = 0; i < numOfEvents; i++) { if (input != nullptr) { cl_event event = ((cl_event *)input)[i]; os << "cl_event " << event << ", Event " << (Event *)event << ", "; } } return os.str(); } template const std::string FileLogger::getMemObjects(const uintptr_t *input, uint32_t numOfObjects) { if (false == enabled()) { return ""; } std::stringstream os; for (uint32_t i = 0; i < numOfObjects; i++) { if (input != nullptr) { cl_mem mem = const_cast(reinterpret_cast(input)[i]); os << "cl_mem " << mem << ", MemObj " << static_cast(mem) << ", "; } } return os.str(); } template void FileLogger::dumpBinaryProgram(int32_t numDevices, const size_t *lengths, const unsigned char **binaries) { if (false == enabled()) { return; } if (dumpKernels) { if (lengths != nullptr && binaries != nullptr && lengths[0] != 0 && binaries[0] != nullptr) { std::unique_lock theLock(mtx); writeToFile("programBinary.bin", reinterpret_cast(binaries[0]), lengths[0], std::ios::trunc | std::ios::binary); } } } template void FileLogger::dumpKernelArgs(const Kernel *kernel) { if (false == enabled()) { return; } if (dumpKernelArgsEnabled && kernel != nullptr) { std::unique_lock theLock(mtx); std::ofstream outFile; for (unsigned int i = 0; i < kernel->getKernelInfo().kernelArgInfo.size(); i++) { std::string type; std::string fileName; const char *ptr = nullptr; size_t size = 0; uint64_t flags = 0; std::unique_ptr argVal = nullptr; auto &argInfo = kernel->getKernelInfo().kernelArgInfo[i]; if (argInfo.metadata.addressQualifier == KernelArgMetadata::AddrLocal) { type = "local"; } else if (argInfo.isImage) { type = "image"; auto clMem = (const cl_mem)kernel->getKernelArg(i); auto memObj = castToObject(clMem); if (memObj != nullptr) { ptr = static_cast(memObj->getCpuAddress()); size = memObj->getSize(); flags = memObj->getMemoryPropertiesFlags(); } } else if (argInfo.isSampler) { type = "sampler"; } else if (argInfo.isBuffer) { type = "buffer"; auto clMem = (const cl_mem)kernel->getKernelArg(i); auto memObj = castToObject(clMem); if (memObj != nullptr) { ptr = static_cast(memObj->getCpuAddress()); size = memObj->getSize(); flags = memObj->getMemoryPropertiesFlags(); } } else { type = "immediate"; auto crossThreadData = kernel->getCrossThreadData(); auto crossThreadDataSize = kernel->getCrossThreadDataSize(); argVal = std::unique_ptr(new char[crossThreadDataSize]); size_t totalArgSize = 0; for (const auto &kernelArgPatchInfo : argInfo.kernelArgPatchInfoVector) { auto pSource = ptrOffset(crossThreadData, kernelArgPatchInfo.crossthreadOffset); auto pDestination = ptrOffset(argVal.get(), kernelArgPatchInfo.sourceOffset); memcpy_s(pDestination, kernelArgPatchInfo.size, pSource, kernelArgPatchInfo.size); totalArgSize += kernelArgPatchInfo.size; } size = totalArgSize; ptr = argVal.get(); } if (ptr && size) { fileName = kernel->getKernelInfo().name + "_arg_" + std::to_string(i) + "_" + type + "_size_" + std::to_string(size) + "_flags_" + std::to_string(flags) + ".bin"; writeToFile(fileName, ptr, size, std::ios::trunc | std::ios::binary); } } } } template void FileLogger::dumpKernelArgs(const MultiDispatchInfo *multiDispatchInfo) { if (enabled() == false) { return; } if (dumpKernelArgsEnabled == false || multiDispatchInfo == nullptr) { return; } for (auto &dispatchInfo : *multiDispatchInfo) { dumpKernelArgs(dispatchInfo.getKernel()); } } template const char *FileLogger::getAllocationTypeString(GraphicsAllocation const *graphicsAllocation) { if (false == enabled()) { return nullptr; } auto type = graphicsAllocation->getAllocationType(); switch (type) { case GraphicsAllocation::AllocationType::BUFFER: return "BUFFER"; case GraphicsAllocation::AllocationType::BUFFER_COMPRESSED: return "BUFFER_COMPRESSED"; case GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY: return "BUFFER_HOST_MEMORY"; case GraphicsAllocation::AllocationType::COMMAND_BUFFER: return "COMMAND_BUFFER"; case GraphicsAllocation::AllocationType::CONSTANT_SURFACE: return "CONSTANT_SURFACE"; case GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER: return "DEVICE_QUEUE_BUFFER"; case GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR: return "EXTERNAL_HOST_PTR"; case GraphicsAllocation::AllocationType::FILL_PATTERN: return "FILL_PATTERN"; case GraphicsAllocation::AllocationType::GLOBAL_SURFACE: return "GLOBAL_SURFACE"; case GraphicsAllocation::AllocationType::IMAGE: return "IMAGE"; case GraphicsAllocation::AllocationType::INDIRECT_OBJECT_HEAP: return "INDIRECT_OBJECT_HEAP"; case GraphicsAllocation::AllocationType::INSTRUCTION_HEAP: return "INSTRUCTION_HEAP"; case GraphicsAllocation::AllocationType::INTERNAL_HEAP: return "INTERNAL_HEAP"; case GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY: return "INTERNAL_HOST_MEMORY"; case GraphicsAllocation::AllocationType::KERNEL_ISA: return "KERNEL_ISA"; case GraphicsAllocation::AllocationType::LINEAR_STREAM: return "LINEAR_STREAM"; case GraphicsAllocation::AllocationType::MAP_ALLOCATION: return "MAP_ALLOCATION"; case GraphicsAllocation::AllocationType::MCS: return "MCS"; case GraphicsAllocation::AllocationType::PIPE: return "PIPE"; case GraphicsAllocation::AllocationType::PREEMPTION: return "PREEMPTION"; case GraphicsAllocation::AllocationType::PRINTF_SURFACE: return "PRINTF_SURFACE"; case GraphicsAllocation::AllocationType::PRIVATE_SURFACE: return "PRIVATE_SURFACE"; case GraphicsAllocation::AllocationType::PROFILING_TAG_BUFFER: return "PROFILING_TAG_BUFFER"; case GraphicsAllocation::AllocationType::SCRATCH_SURFACE: return "SCRATCH_SURFACE"; case GraphicsAllocation::AllocationType::SHARED_BUFFER: return "SHARED_BUFFER"; case GraphicsAllocation::AllocationType::SHARED_CONTEXT_IMAGE: return "SHARED_CONTEXT_IMAGE"; case GraphicsAllocation::AllocationType::SHARED_IMAGE: return "SHARED_IMAGE"; case GraphicsAllocation::AllocationType::SHARED_RESOURCE_COPY: return "SHARED_RESOURCE_COPY"; case GraphicsAllocation::AllocationType::SURFACE_STATE_HEAP: return "SURFACE_STATE_HEAP"; case GraphicsAllocation::AllocationType::SVM_CPU: return "SVM_CPU"; case GraphicsAllocation::AllocationType::SVM_GPU: return "SVM_GPU"; case GraphicsAllocation::AllocationType::SVM_ZERO_COPY: return "SVM_ZERO_COPY"; case GraphicsAllocation::AllocationType::TAG_BUFFER: return "TAG_BUFFER"; case GraphicsAllocation::AllocationType::GLOBAL_FENCE: return "GLOBAL_FENCE"; case GraphicsAllocation::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER: return "TIMESTAMP_PACKET_TAG_BUFFER"; case GraphicsAllocation::AllocationType::UNKNOWN: return "UNKNOWN"; case GraphicsAllocation::AllocationType::WRITE_COMBINED: return "WRITE_COMBINED"; default: return "ILLEGAL_VALUE"; } } template class FileLogger; template class FileLogger; template class FileLogger; } // namespace NEO compute-runtime-20.13.16352/opencl/source/utilities/logger.h000066400000000000000000000151741363734646600236100ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/debug_settings/debug_settings_manager.h" #include #include #include #include #include #include #include namespace NEO { class Kernel; struct MultiDispatchInfo; template class FileLogger { public: FileLogger(std::string filename, const DebugVariables &flags); ~FileLogger(); FileLogger(const FileLogger &) = delete; FileLogger &operator=(const FileLogger &) = delete; static constexpr bool enabled() { return DebugLevel == DebugFunctionalityLevel::Full; } void dumpKernel(const std::string &name, const std::string &src); void logApiCall(const char *function, bool enter, int32_t errorCode); void logAllocation(GraphicsAllocation const *graphicsAllocation); size_t getInput(const size_t *input, int32_t index); const std::string getEvents(const uintptr_t *input, uint32_t numOfEvents); const std::string getMemObjects(const uintptr_t *input, uint32_t numOfObjects); MOCKABLE_VIRTUAL void writeToFile(std::string filename, const char *str, size_t length, std::ios_base::openmode mode); void dumpBinaryProgram(int32_t numDevices, const size_t *lengths, const unsigned char **binaries); void dumpKernelArgs(const Kernel *kernel); void dumpKernelArgs(const MultiDispatchInfo *multiDispatchInfo); const std::string getSizes(const uintptr_t *input, uint32_t workDim, bool local) { if (false == enabled()) { return ""; } std::stringstream os; std::string workSize; if (local) { workSize = "localWorkSize"; } else { workSize = "globalWorkSize"; } for (uint32_t i = 0; i < workDim; i++) { if (input != nullptr) { os << workSize << "[" << i << "]: \t" << input[i] << "\n"; } } return os.str(); } const std::string infoPointerToString(const void *paramValue, size_t paramSize) { if (false == enabled()) { return ""; } std::stringstream os; if (paramValue) { switch (paramSize) { case sizeof(uint32_t): os << *(uint32_t *)paramValue; break; case sizeof(uint64_t): os << *(uint64_t *)paramValue; break; case sizeof(uint8_t): os << (uint32_t)(*(uint8_t *)paramValue); break; default: break; } } return os.str(); } // Expects pairs of args (even number of args) template void logInputs(Types &&... params) { if (enabled()) { if (logApiCalls) { std::unique_lock theLock(mtx); std::thread::id thisThread = std::this_thread::get_id(); std::stringstream ss; ss << "------------------------------\n"; printInputs(ss, "ThreadID", thisThread, params...); ss << "------------------------------" << std::endl; writeToFile(logFileName, ss.str().c_str(), ss.str().length(), std::ios::app); } } } template void logLazyEvaluateArgs(bool predicate, FT &&callable) { if (enabled()) { if (predicate) { callable(); } } } template void log(bool enableLog, Types... params) { if (enabled()) { if (enableLog) { std::unique_lock theLock(mtx); std::thread::id thisThread = std::this_thread::get_id(); std::stringstream ss; print(ss, "ThreadID", thisThread, params...); writeToFile(logFileName, ss.str().c_str(), ss.str().length(), std::ios::app); } } } const char *getLogFileName() { return logFileName.c_str(); } void setLogFileName(std::string filename) { logFileName = filename; } const char *getAllocationTypeString(GraphicsAllocation const *graphicsAllocation); bool peekLogApiCalls() { return logApiCalls; } protected: std::mutex mtx; std::string logFileName; bool dumpKernels = false; bool dumpKernelArgsEnabled = false; bool logApiCalls = false; bool logAllocationMemoryPool = false; // Required for variadic template with 0 args passed void printInputs(std::stringstream &ss) {} // Prints inputs in format: InputName: InputValue \newline template void printInputs(std::stringstream &ss, T1 first, Types... params) { if (enabled()) { const size_t argsLeft = sizeof...(params); ss << "\t" << first; if (argsLeft % 2) { ss << ": "; } else { ss << std::endl; } printInputs(ss, params...); } } // Required for variadic template with 0 args passed void print(std::stringstream &ss) {} template void print(std::stringstream &ss, T1 first, Types... params) { if (enabled()) { const size_t argsLeft = sizeof...(params); ss << first << " "; if (argsLeft == 0) { ss << std::endl; } print(ss, params...); } } }; extern FileLogger &FileLoggerInstance(); template class LoggerApiEnterWrapper { public: LoggerApiEnterWrapper(const char *funcName, const int *errorCode) : funcName(funcName), errorCode(errorCode) { if (Enabled) { FileLoggerInstance().logApiCall(funcName, true, 0); } } ~LoggerApiEnterWrapper() { if (Enabled) { FileLoggerInstance().logApiCall(funcName, false, (errorCode != nullptr) ? *errorCode : 0); } } const char *funcName; const int *errorCode; }; }; // namespace NEO #define DBG_LOG_LAZY_EVALUATE_ARGS(LOGGER, PREDICATE, LOG_FUNCTION, ...) \ LOGGER.logLazyEvaluateArgs(PREDICATE, [&] { LOGGER.LOG_FUNCTION(__VA_ARGS__); }) #define DBG_LOG(PREDICATE, ...) \ DBG_LOG_LAZY_EVALUATE_ARGS(NEO::FileLoggerInstance(), NEO::DebugManager.flags.PREDICATE.get(), log, NEO::DebugManager.flags.PREDICATE.get(), __VA_ARGS__) #define DBG_LOG_INPUTS(...) \ DBG_LOG_LAZY_EVALUATE_ARGS(NEO::FileLoggerInstance(), NEO::FileLoggerInstance().peekLogApiCalls(), logInputs, __VA_ARGS__) compute-runtime-20.13.16352/opencl/test/000077500000000000000000000000001363734646600176145ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/000077500000000000000000000000001363734646600216325ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/.clang-tidy000066400000000000000000000035441363734646600236740ustar00rootroot00000000000000--- Checks: 'clang-diagnostic-*,clang-analyzer-*,google-default-arguments,modernize-use-override,modernize-use-default-member-init,-clang-analyzer-alpha*,readability-identifier-naming,-clang-analyzer-core.StackAddressEscape,-clang-analyzer-optin.performance.Padding,-clang-analyzer-security.insecureAPI.strcpy,-clang-analyzer-cplusplus.NewDeleteLeaks,-clang-analyzer-core.CallAndMessage,-clang-analyzer-core.uninitialized.Assign,-clang-analyzer-unix.MismatchedDeallocator,-clang-analyzer-core.NonNullParamChecker,-clang-analyzer-core.NullDereference,-clang-analyzer-cplusplus.NewDelete,-clang-analyzer-optin.cplusplus.VirtualCall' # WarningsAsErrors: '.*' HeaderFilterRegex: '/runtime/|/core/|/offline_compiler/' AnalyzeTemporaryDtors: false CheckOptions: - key: google-readability-braces-around-statements.ShortStatementLines value: '1' - key: google-readability-function-size.StatementThreshold value: '800' - key: google-readability-namespace-comments.ShortNamespaceLines value: '10' - key: google-readability-namespace-comments.SpacesBeforeComments value: '2' - key: readability-identifier-naming.ParameterCase value: camelBack - key: modernize-loop-convert.MaxCopySize value: '16' - key: modernize-loop-convert.MinConfidence value: reasonable - key: modernize-loop-convert.NamingStyle value: CamelCase - key: modernize-pass-by-value.IncludeStyle value: llvm - key: modernize-replace-auto-ptr.IncludeStyle value: llvm - key: modernize-use-nullptr.NullMacros value: 'NULL' - key: modernize-use-default-member-init.UseAssignment value: '1' ... compute-runtime-20.13.16352/opencl/test/unit_test/CMakeLists.txt000066400000000000000000000516411363734646600244010ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # project(igdrcl_tests) set(OPENCL_TEST_PROJECTS_FOLDER "opencl runtime") set(PLATFORM_SPECIFIC_TEST_TARGETS_FOLDER "${OPENCL_TEST_PROJECTS_FOLDER}/test platforms") set(OPENCL_UNIT_TEST_DIR ${CMAKE_CURRENT_SOURCE_DIR}) # disable optimizations for ults if(UNIX) string(REPLACE "-O2" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) #disable fortify source as this requires optimization to be on string(REPLACE "-Wp,-D_FORTIFY_SOURCE=2" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) string(REPLACE "-D_FORTIFY_SOURCE=2" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O0") set(CMAKE_CXX_FLAGS_RELEASEINTERNAL "${CMAKE_CXX_FLAGS_RELEASEINTERNAL} -O0") set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O0") set(CMAKE_C_FLAGS_RELEASEINTERNAL "${CMAKE_C_FLAGS_RELEASEINTERNAL} -O0") endif() if(WIN32) string(REPLACE "/O2" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) string(REPLACE "/O2" "/Od" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}) string(REPLACE "/O2" "/Od" CMAKE_C_FLAGS_RELEASE ${CMAKE_C_FLAGS_RELEASE}) string(REPLACE "/O2" "/Od" CMAKE_CXX_FLAGS_RELEASEINTERNAL ${CMAKE_CXX_FLAGS_RELEASEINTERNAL}) string(REPLACE "/O2" "/Od" CMAKE_C_FLAGS_RELEASEINTERNAL ${CMAKE_C_FLAGS_RELEASEINTERNAL}) endif() function(ADD_SUPPORTED_TEST_PRODUCT_FAMILIES_DEFINITION) set(NEO_SUPPORTED_PRODUCT_FAMILIES ${ALL_PRODUCT_FAMILY_LIST}) string(REPLACE ";" "," NEO_SUPPORTED_PRODUCT_FAMILIES "${NEO_SUPPORTED_PRODUCT_FAMILIES}") add_definitions(-DSUPPORTED_TEST_PRODUCT_FAMILIES=${NEO_SUPPORTED_PRODUCT_FAMILIES}) endfunction() ADD_SUPPORTED_TEST_PRODUCT_FAMILIES_DEFINITION() link_libraries(${ASAN_LIBS} ${TSAN_LIBS}) add_custom_target(unit_tests) add_custom_target(prepare_test_kernels) add_custom_target(run_unit_tests ALL) set(IGDRCL_SRCS_tests_local ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/libult/os_interface.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ult_configuration.cpp ${NEO_SHARED_TEST_DIRECTORY}/unit_test/tests_configuration.h ) if(WIN32) list(APPEND IGDRCL_SRCS_tests_local ${NEO_SOURCE_DIR}/opencl/test/unit_test/os_interface/windows/wddm_create.cpp) endif() add_subdirectory(libult) hide_subdir(libult) hide_subdir(linux) if(UNIX) add_subdirectory(linux) add_custom_command( TARGET run_unit_tests POST_BUILD COMMAND echo running tests for linux dynamic library - .so in ${TargetDir} COMMAND igdrcl_linux_dll_tests ${IGDRCL_TESTS_LISTENER_OPTION} COMMAND WORKING_DIRECTORY ${TargetDir} ) if(PRE_ULT_COMMAND) add_custom_command( TARGET unit_tests POST_BUILD COMMAND echo running ${PRE_ULT_COMMAND} utility before ULTs execution COMMAND ${PRE_ULT_COMMAND} ) endif() endif() set(NEO_IGDRCL_TESTS__TARGET_OBJECTS $ $ $ $ $ $ $ $ $ ) if(DEFINED AUB_STREAM_DIR) add_library (aub_stream_mock_lib OBJECT EXCLUDE_FROM_ALL ${NEO_SOURCE_DIR}/opencl/test/unit_test/aub_stream_mocks/aub_stream_interface_mock.cpp ) list(APPEND NEO_IGDRCL_TESTS__TARGET_OBJECTS $ $ ) else() list(APPEND IGDRCL_SRCS_tests_local ${NEO_SOURCE_DIR}/opencl/source/aub/aub_stream_interface.cpp) endif() add_executable(igdrcl_tests ${NEO_IGDRCL_TESTS__TARGET_OBJECTS} ${IGDRCL_SRCS_tests_local} ) include(${NEO_SOURCE_DIR}/opencl/test/unit_test/core_unit_tests_files.cmake) hide_subdir(gen_common) add_subdirectory(gen_common) if(NOT GTEST_EXCEPTION_OPTIONS) set(GTEST_EXCEPTION_OPTIONS --gtest_catch_exceptions=1) endif() message(STATUS "GTest exception options set to ${GTEST_EXCEPTION_OPTIONS}") if(GTEST_FILTERING_PATTERN) set(GTEST_FILTER_OPTION "--gtest_filter=${GTEST_FILTERING_PATTERN}") message(STATUS "GTest filter for regular tests: ${GTEST_FILTERING_PATTERN}") endif() if(NOT MSVC) set_source_files_properties(helpers/uint16_sse4_tests.cpp PROPERTIES COMPILE_FLAGS -msse4.2) endif() target_link_libraries(igdrcl_tests ${NEO_STATICALLY_LINKED_LIBRARIES_MOCKABLE}) target_link_libraries(igdrcl_tests igdrcl_mocks) option(SHOW_VERBOSE_UTESTS_RESULTS "Use the default/verbose test output" OFF) if(NOT SHOW_VERBOSE_UTESTS_RESULTS) set(IGDRCL_TESTS_LISTENER_OPTION "--disable_default_listener") else() set(IGDRCL_TESTS_LISTENER_OPTION "--enable_default_listener") endif() target_include_directories(igdrcl_tests PRIVATE ${NEO_SOURCE_DIR}/opencl/test/unit_test/mocks${BRANCH_DIR_SUFFIX} ${ENGINE_NODE_DIR} ${KHRONOS_GL_HEADERS_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ) target_link_libraries(igdrcl_tests gmock-gtest ${IGDRCL_EXTRA_LIBS}) set(BUILT_IN_KERNEL_DIR "${NEO_SOURCE_DIR}/shared/source/built_ins") function(neo_copy_test_files target product) set(outputdir "${TargetDir}/${product}") add_custom_target(${target}) add_custom_command( TARGET ${target} POST_BUILD COMMAND echo deleting and re-creating ${product} cache directory... COMMAND ${CMAKE_COMMAND} -E remove_directory ${outputdir}/cl_cache COMMAND ${CMAKE_COMMAND} -E make_directory ${outputdir}/cl_cache COMMAND echo copying built-in kernel files from ${BUILT_IN_KERNEL_DIR}/kernels to ${outputdir}/test_files COMMAND ${CMAKE_COMMAND} -E copy_directory ${BUILT_IN_KERNEL_DIR}/kernels ${outputdir}/test_files COMMAND echo copying test files from ${NEO_SOURCE_DIR}/opencl/test/unit_test/test_files to ${outputdir}/test_files COMMAND ${CMAKE_COMMAND} -E copy_directory ${NEO_SOURCE_DIR}/opencl/test/unit_test/test_files ${outputdir}/test_files COMMAND WORKING_DIRECTORY ${TargetDir} DEPENDS ${NEO_SOURCE_DIR}/opencl/test/unit_test/test_files ) add_dependencies(${target} copy_compiler_files) set_target_properties(${target} PROPERTIES FOLDER "${PLATFORM_SPECIFIC_TEST_TARGETS_FOLDER}/${product}") endfunction() add_dependencies(unit_tests igdrcl_tests test_dynamic_lib prepare_test_kernels ) set_target_properties(igdrcl_tests PROPERTIES FOLDER ${OPENCL_TEST_PROJECTS_FOLDER}) set_property(TARGET igdrcl_tests APPEND_STRING PROPERTY COMPILE_FLAGS ${ASAN_FLAGS}) if(UNIX) set_property(TARGET igdrcl_tests APPEND_STRING PROPERTY COMPILE_FLAGS " -g") endif() set_target_properties(unit_tests PROPERTIES FOLDER ${OPENCL_TEST_PROJECTS_FOLDER}) set_target_properties(prepare_test_kernels PROPERTIES FOLDER ${OPENCL_TEST_PROJECTS_FOLDER}) set_target_properties(run_unit_tests PROPERTIES FOLDER ${OPENCL_TEST_PROJECTS_FOLDER}) target_include_directories(igdrcl_tests BEFORE PRIVATE ${NEO_SHARED_TEST_DIRECTORY}/unit_test/test_macros${BRANCH_DIR_SUFFIX}) if(NOT DEFINED cloc_cmd_prefix) if(WIN32) set(cloc_cmd_prefix ocloc) else() if(DEFINED NEO__IGC_LIBRARY_PATH) set(cloc_cmd_prefix LD_LIBRARY_PATH=${NEO__IGC_LIBRARY_PATH}:$ $) else() set(cloc_cmd_prefix LD_LIBRARY_PATH=$ $) endif() endif() endif() function(neo_gen_kernels platform_name_with_type platform_name suffix) set(outputdir "${TargetDir}/${suffix}/test_files/${NEO_ARCH}/") set(kernels_to_compile) foreach(filepath ${ARGN}) get_filename_component(filename ${filepath} NAME) get_filename_component(basename ${filepath} NAME_WE) get_filename_component(workdir ${filepath} DIRECTORY) set(outputpath_base "${outputdir}${basename}_${suffix}") set(output_files ${outputpath_base}.spv ${outputpath_base}.bin ${outputpath_base}.gen ) add_custom_command( OUTPUT ${output_files} COMMAND ${cloc_cmd_prefix} -q -file ${filename} -device ${platform_name} -${NEO_BITS} -out_dir ${outputdir} WORKING_DIRECTORY ${workdir} DEPENDS ${filepath} ocloc ) list(APPEND kernels_to_compile ${output_files}) endforeach() list(APPEND kernels_to_compile_${platform_name_with_type} ${kernels_to_compile}) set(kernels_to_compile_${platform_name_with_type} ${kernels_to_compile_${platform_name_with_type}} PARENT_SCOPE) endfunction() function(neo_gen_kernels_with_options platform_name_with_type platform_name suffix filepath) set(kernels_to_compile) foreach(filearg ${filepath}) get_filename_component(filename ${filearg} NAME) get_filename_component(basename ${filearg} NAME_WE) get_filename_component(base_workdir ${filearg} DIRECTORY) set(outputdir "${TargetDir}/${suffix}/test_files/${NEO_ARCH}/") set(workdir "${CMAKE_CURRENT_SOURCE_DIR}/${base_workdir}/") foreach(arg ${ARGN}) string(REPLACE " " "_" argwospaces ${arg}) set(outputpath_base "${outputdir}/${basename}_${suffix}") set(output_files ${outputpath_base}.spv${argwospaces} ${outputpath_base}.bin${argwospaces} ${outputpath_base}.gen${argwospaces} ) add_custom_command( OUTPUT ${output_files} COMMAND ${cloc_cmd_prefix} -q -file ${filename} -device ${platform_name} -${NEO_BITS} -out_dir ${outputdir} -options ${arg} -options_name WORKING_DIRECTORY ${workdir} DEPENDS ${filearg} ocloc ) list(APPEND kernels_to_compile ${output_files}) endforeach() endforeach() list(APPEND kernels_to_compile_${platform_name_with_type} ${kernels_to_compile}) set(kernels_to_compile_${platform_name_with_type} ${kernels_to_compile_${platform_name_with_type}} PARENT_SCOPE) endfunction() function(neo_gen_kernels_with_internal_options platform_name_with_type platform_name suffix filepath) set(kernels_to_compile) foreach(filearg ${filepath}) get_filename_component(filename ${filearg} NAME) get_filename_component(basename ${filearg} NAME_WE) get_filename_component(base_workdir ${filearg} DIRECTORY) set(outputdir "${TargetDir}/${suffix}/test_files/${NEO_ARCH}/") set(workdir "${CMAKE_CURRENT_SOURCE_DIR}/${base_workdir}/") foreach(arg ${ARGN}) set(outputpath_base "${outputdir}/${basename}_${suffix}") set(output_files ${outputpath_base}.spv ${outputpath_base}.bin ${outputpath_base}.gen ) add_custom_command( OUTPUT ${output_files} COMMAND ${cloc_cmd_prefix} -q -file ${filename} -device ${platform_name} -${NEO_BITS} -out_dir ${outputdir} -internal_options ${arg} WORKING_DIRECTORY ${workdir} DEPENDS ${filearg} ocloc ) list(APPEND kernels_to_compile ${output_files}) endforeach() endforeach() list(APPEND kernels_to_compile_${platform_name_with_type} ${kernels_to_compile}) set(kernels_to_compile_${platform_name_with_type} ${kernels_to_compile_${platform_name_with_type}} PARENT_SCOPE) endfunction() set(TEST_KERNEL_kernel_debug_enable "-cl-kernel-debug-enable" ) function(neo_gen_kernel_with_kernel_debug_options platform_name_with_type platform_name suffix filepath) get_filename_component(filename ${filepath} NAME) get_filename_component(basename ${filepath} NAME_WE) get_filename_component(base_workdir ${filepath} DIRECTORY) set(outputdir "${TargetDir}/${suffix}/test_files/${NEO_ARCH}/") set(workdir "${CMAKE_CURRENT_SOURCE_DIR}/${base_workdir}/") string(REPLACE " " "_" argwospaces ${TEST_KERNEL_kernel_debug_enable}) set(outputpath_base "${outputdir}/${argwospaces}_${suffix}") set(output_files ${outputpath_base}.spv ${outputpath_base}.bin ${outputpath_base}.gen ${outputpath_base}.dbg ) add_custom_command( OUTPUT ${output_files} COMMAND ${cloc_cmd_prefix} -q -file ${filename} -device ${platform_name} -${NEO_BITS} -out_dir ${outputdir} -output ${argwospaces} -internal_options ${TEST_KERNEL_kernel_debug_enable} -options "-g" WORKING_DIRECTORY ${workdir} DEPENDS ${filepath} ocloc ) list(APPEND kernels_to_compile_${platform_name_with_type} ${output_files}) set(kernels_to_compile_${platform_name_with_type} ${kernels_to_compile_${platform_name_with_type}} PARENT_SCOPE) endfunction() function(neo_gen_kernel_from_ll platform_name_with_type platform_name suffix filepath output_name compile_options) get_filename_component(filename ${filepath} NAME) get_filename_component(basename ${filepath} NAME_WE) set(outputdir "${TargetDir}/${suffix}/test_files/${NEO_ARCH}") set(workdir "${CMAKE_CURRENT_SOURCE_DIR}/test_files/") set(outputpath_base "${outputdir}/${output_name}_${suffix}") set(output_files ${outputpath_base}.bin ${outputpath_base}.gen ) string(CONCAT compile_options \" ${compile_options} \" ) add_custom_command( OUTPUT ${output_files} COMMAND ${cloc_cmd_prefix} -q -file ${filename} -output ${output_name} -device ${platform_name} -${NEO_BITS} -out_dir ${outputdir} -internal_options ${compile_options} -llvm_input WORKING_DIRECTORY ${workdir} DEPENDS ${filepath} ocloc ) list(APPEND kernels_to_compile_${platform_name_with_type} ${output_files}) set(kernels_to_compile_${platform_name_with_type} ${kernels_to_compile_${platform_name_with_type}} PARENT_SCOPE) endfunction() set(TEST_KERNEL test_files/CopyBuffer_simd16.cl) set(TEST_KERNEL_options "-cl-fast-relaxed-math" "-cl-finite-math-only" "-cl-kernel-arg-info" "-x spir -spir-std=1.2" ) set(TEST_KERNEL_2_0_options "-cl-std=CL2.0" ) set(TEST_KERNEL_2_0 test_files/simple_block_kernel.cl test_files/simple_nonuniform.cl ) set(TEST_KERNEL_STATELESS_internal_options "-cl-intel-greater-than-4GB-buffer-required" ) set(TEST_KERNEL_STATELESS_internal_options_gen9lp "-cl-intel-greater-than-4GB-buffer-required -m32" ) set(TEST_KERNEL_STATELESS test_files/stateless_kernel.cl ) set(TEST_KERNEL_VME ${CMAKE_CURRENT_SOURCE_DIR}/test_files/vme_kernels.cl ${CMAKE_CURRENT_SOURCE_DIR}/test_files/media_kernels_backend.cl ${CMAKE_CURRENT_SOURCE_DIR}/test_files/media_kernels_frontend.cl ) set(TEST_KERNEL_SIP_DEBUG_options "-cl-include-sip-kernel-debug -cl-include-sip-csr -cl-set-bti:0" ) set(TEST_KERNEL_SIP_DEBUG_LOCAL_options "-cl-include-sip-kernel-local-debug -cl-include-sip-csr -cl-set-bti:0" ) file(GLOB_RECURSE TEST_KERNELS test_files/*.cl) list(REMOVE_ITEM TEST_KERNELS "${CMAKE_CURRENT_SOURCE_DIR}/test_files/shouldfail.cl") list(REMOVE_ITEM TEST_KERNELS "${CMAKE_CURRENT_SOURCE_DIR}/test_files/simple_block_kernel.cl") list(REMOVE_ITEM TEST_KERNELS "${CMAKE_CURRENT_SOURCE_DIR}/test_files/simple_nonuniform.cl") list(REMOVE_ITEM TEST_KERNELS "${CMAKE_CURRENT_SOURCE_DIR}/test_files/stateless_kernel.cl") list(REMOVE_ITEM TEST_KERNELS ${TEST_KERNEL_VME} ) macro(macro_for_each_gen) foreach(PLATFORM_TYPE ${PLATFORM_TYPES}) if(${GEN_TYPE}_HAS_${PLATFORM_TYPE}) get_family_name_with_type(${GEN_TYPE} ${PLATFORM_TYPE}) string(TOLOWER ${PLATFORM_TYPE} PLATFORM_TYPE_LOWER) set(PLATFORM_LOWER ${DEFAULT_SUPPORTED_${GEN_TYPE}_${PLATFORM_TYPE}_PLATFORM}) set(PLATFORM_2_0_LOWER ${DEFAULT_SUPPORTED_2_0_${GEN_TYPE}_${PLATFORM_TYPE}_PLATFORM}) set(PLATFORM_VME_LOWER ${DEFAULT_SUPPORTED_VME_${GEN_TYPE}_${PLATFORM_TYPE}_PLATFORM}) set(PLATFORM_TEST_KERNELS ${TEST_KERNELS}) foreach(KERNEL_TO_REMOVE ${${GEN_TYPE}_TEST_KERNELS_BLACKLIST}) set(KERNEL_TO_REMOVE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/test_files/${KERNEL_TO_REMOVE}") list(REMOVE_ITEM PLATFORM_TEST_KERNELS ${KERNEL_TO_REMOVE_PATH}) endforeach() if(MSVC OR CMAKE_SIZEOF_VOID_P EQUAL 8) neo_gen_kernels(${family_name_with_type} ${PLATFORM_LOWER} ${family_name_with_type} ${PLATFORM_TEST_KERNELS}) neo_gen_kernels_with_options(${family_name_with_type} ${PLATFORM_LOWER} ${family_name_with_type} ${TEST_KERNEL} ${TEST_KERNEL_options}) # Temporarily disabled debug kernel generation on gen8 if(NOT ("${GEN_TYPE_LOWER}" STREQUAL "gen8")) neo_gen_kernel_with_kernel_debug_options(${family_name_with_type} ${PLATFORM_LOWER} ${family_name_with_type} ${TEST_KERNEL}) endif() # Gen9lp needs extra -m32 flag if( ("${GEN_TYPE_LOWER}" STREQUAL "gen9") AND ("${PLATFORM_TYPE_LOWER}" STREQUAL "lp")) neo_gen_kernels_with_internal_options(${family_name_with_type} ${PLATFORM_LOWER} ${family_name_with_type} ${TEST_KERNEL_STATELESS} ${TEST_KERNEL_STATELESS_internal_options_gen9lp}) else() neo_gen_kernels_with_internal_options(${family_name_with_type} ${PLATFORM_LOWER} ${family_name_with_type} ${TEST_KERNEL_STATELESS} ${TEST_KERNEL_STATELESS_internal_options}) endif() set(sip_kernel_file_name) set(sip_kernel_output_file) set(sip_debug_kernel_output_file) set(sip_debug_local_kernel_output_file) list(APPEND sip_kernel_file_name "opencl/test/unit_test/test_files/sip_dummy_kernel_${NEO_BITS}.ll") list(APPEND sip_debug_kernel_output_file "sip_dummy_kernel_debug_${NEO_BITS}") list(APPEND sip_debug_local_kernel_output_file "sip_dummy_kernel_debug_local_${NEO_BITS}") # Temporarily disabled sip kernel generation # if("${GEN_TYPE_LOWER}" STREQUAL "gen9" ) # neo_gen_kernel_from_ll(${family_name_with_type} ${PLATFORM_LOWER} ${family_name_with_type} ${sip_kernel_file_name} ${sip_debug_local_kernel_output_file} ${TEST_KERNEL_SIP_DEBUG_LOCAL_options}) # endif() # neo_gen_kernel_from_ll(${family_name_with_type} ${PLATFORM_LOWER} ${family_name_with_type} ${sip_kernel_file_name} ${sip_debug_kernel_output_file} ${TEST_KERNEL_SIP_DEBUG_options}) if(PLATFORM_2_0_LOWER) neo_gen_kernels_with_options(${family_name_with_type} ${PLATFORM_2_0_LOWER} ${family_name_with_type} "${TEST_KERNEL_2_0}" ${TEST_KERNEL_2_0_options}) endif() if(PLATFORM_VME_LOWER) neo_gen_kernels(${family_name_with_type} ${PLATFORM_VME_LOWER} ${family_name_with_type} ${TEST_KERNEL_VME}) endif() endif() add_custom_target(prepare_test_kernels_${family_name_with_type} DEPENDS ${kernels_to_compile_${family_name_with_type}} copy_compiler_files) set_target_properties(prepare_test_kernels_${family_name_with_type} PROPERTIES FOLDER "${PLATFORM_SPECIFIC_TEST_TARGETS_FOLDER}/${family_name_with_type}") add_dependencies(prepare_test_kernels prepare_test_kernels_${family_name_with_type}) endif() endforeach() endmacro() apply_macro_for_each_gen("TESTED") add_subdirectories() create_project_source_tree(igdrcl_tests ${NEO_SOURCE_DIR}/runtime) set(UltPchHeader "${CMAKE_CURRENT_SOURCE_DIR}/igdrcl_tests_pch.h") set(UltPchSource "${CMAKE_CURRENT_SOURCE_DIR}/igdrcl_tests_pch.cpp") get_target_property(UltSources igdrcl_tests SOURCES) if(MSVC AND NOT DISABLE_ULT_PCH_WIN) set(UltPchBinary "${CMAKE_CURRENT_BINARY_DIR}/igdrcl_tests_pch.pch") set(IGDRCL_SRCS_ult_pch ${UltPchSource} ${UltPchHeader}) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_ult_pch}) set_source_files_properties(${UltSources} PROPERTIES COMPILE_FLAGS "/Yu${UltPchHeader} /FI${UltPchHeader} /Fp${UltPchBinary}" OBJECT_DEPENDS "${UltPchBinary}") set_source_files_properties(${UltPchSource} PROPERTIES COMPILE_FLAGS "/Yc${UltPchHeader} /FI${UltPchHeader} /Fp${UltPchBinary}" OBJECT_OUTPUTS "${UltPchBinary}") elseif(USE_ULT_PCH) set(UltPchHeaderInBuildDir "${CMAKE_CURRENT_BINARY_DIR}/igdrcl_tests_pch.h") set(UltPchBinaryGch "${UltPchHeaderInBuildDir}.gch") set(UltPchBinary "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/igdrcl_ult_pch.dir/igdrcl_tests_pch.h.o") add_library(igdrcl_ult_pch STATIC EXCLUDE_FROM_ALL ${UltPchHeader}) add_dependencies(igdrcl_tests igdrcl_ult_pch) target_include_directories(igdrcl_ult_pch PRIVATE $ $) target_compile_definitions(igdrcl_ult_pch PRIVATE $) target_include_directories(igdrcl_tests PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) if(NOT USE_ASAN) set_source_files_properties(${UltSources} PROPERTIES COMPILE_FLAGS "-include ${UltPchHeaderInBuildDir} -msse4" OBJECT_DEPENDS ${UltPchBinaryGch}) endif() set_source_files_properties(${UltPchHeader} PROPERTIES LANGUAGE "CXX" COMPILE_FLAGS "-x c++-header -msse4 -gdwarf-2") add_custom_command( OUTPUT ${UltPchBinaryGch} COMMAND cp "${UltPchHeader}" "${UltPchHeaderInBuildDir}" COMMAND cp "${UltPchBinary}" "${UltPchBinaryGch}" DEPENDS ${UltPchBinary}) endif() # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # !! !! # !! DONT ADD ANY SOURCES HERE! !! # !! !! # !! You are below PCH logic! !! # !! This is to keep PCH dependencies correctly without creating new target !! # !! !! # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! compute-runtime-20.13.16352/opencl/test/unit_test/abort.cpp000066400000000000000000000003641363734646600234500ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/abort.h" #include namespace NEO { void abortExecution() { throw std::exception(); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/accelerators/000077500000000000000000000000001363734646600243015ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/accelerators/CMakeLists.txt000066400000000000000000000004451363734646600270440ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_accelerators ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/media_image_arg_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_accelerators}) compute-runtime-20.13.16352/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp000066400000000000000000000114511363734646600313030ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/surface.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" using namespace NEO; class MediaImageSetArgTest : public DeviceFixture, public testing::Test { public: MediaImageSetArgTest() { memset(&kernelHeader, 0, sizeof(kernelHeader)); } protected: void SetUp() override { DeviceFixture::SetUp(); pKernelInfo = std::make_unique(); program = std::make_unique(*pDevice->getExecutionEnvironment()); kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; pKernelInfo->usesSsh = true; pKernelInfo->isVmeWorkload = true; pKernelInfo->kernelArgInfo.resize(2); pKernelInfo->kernelArgInfo[1].offsetHeap = 0x00; pKernelInfo->kernelArgInfo[0].offsetHeap = 0x40; pKernelInfo->kernelArgInfo[1].isMediaImage = true; pKernelInfo->kernelArgInfo[0].isMediaImage = true; pKernelInfo->kernelArgInfo[1].isImage = true; pKernelInfo->kernelArgInfo[0].isImage = true; pKernel = new MockKernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_NE(nullptr, pKernel); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); ASSERT_EQ(true, pKernel->isVmeKernel()); pKernel->setKernelArgHandler(0, &Kernel::setArgImage); pKernel->setKernelArgHandler(1, &Kernel::setArgImage); context = new MockContext(pClDevice); srcImage = Image2dHelper<>::create(context); ASSERT_NE(nullptr, srcImage); } void TearDown() override { delete srcImage; delete pKernel; delete context; DeviceFixture::TearDown(); } cl_int retVal = CL_SUCCESS; MockContext *context; std::unique_ptr program; MockKernel *pKernel = nullptr; SKernelBinaryHeaderCommon kernelHeader; std::unique_ptr pKernelInfo; char surfaceStateHeap[0x80]; Image *srcImage = nullptr; }; HWTEST_F(MediaImageSetArgTest, setKernelArgImage) { typedef typename FamilyType::MEDIA_SURFACE_STATE MEDIA_SURFACE_STATE; auto pSurfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); srcImage->setMediaImageArg(const_cast(pSurfaceState)); SurfaceOffsets surfaceOffsets; srcImage->getSurfaceOffsets(surfaceOffsets); EXPECT_EQ(srcImage->getGraphicsAllocation()->getGpuAddress() + surfaceOffsets.offset, pSurfaceState->getSurfaceBaseAddress()); std::vector surfaces; pKernel->getResidency(surfaces); EXPECT_EQ(0u, surfaces.size()); } HWTEST_F(MediaImageSetArgTest, clSetKernelArgImage) { typedef typename FamilyType::MEDIA_SURFACE_STATE MEDIA_SURFACE_STATE; cl_mem memObj = srcImage; retVal = clSetKernelArg( pKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto pSurfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); uint64_t surfaceAddress = pSurfaceState->getSurfaceBaseAddress(); ASSERT_EQ(srcImage->getGraphicsAllocation()->getGpuAddress(), surfaceAddress); EXPECT_EQ(srcImage->getImageDesc().image_width, pSurfaceState->getWidth()); EXPECT_EQ(srcImage->getImageDesc().image_height, pSurfaceState->getHeight()); typename FamilyType::MEDIA_SURFACE_STATE::TILE_MODE tileMode; if (srcImage->isTiledAllocation()) { tileMode = FamilyType::MEDIA_SURFACE_STATE::TILE_MODE_TILEMODE_YMAJOR; } else { tileMode = FamilyType::MEDIA_SURFACE_STATE::TILE_MODE_TILEMODE_LINEAR; } EXPECT_EQ(tileMode, pSurfaceState->getTileMode()); EXPECT_EQ(MEDIA_SURFACE_STATE::SURFACE_FORMAT_Y8_UNORM_VA, pSurfaceState->getSurfaceFormat()); EXPECT_EQ(MEDIA_SURFACE_STATE::PICTURE_STRUCTURE_FRAME_PICTURE, pSurfaceState->getPictureStructure()); std::vector surfaces; pKernel->getResidency(surfaces); for (auto &surface : surfaces) { delete surface; } EXPECT_EQ(1u, surfaces.size()); } compute-runtime-20.13.16352/opencl/test/unit_test/api/000077500000000000000000000000001363734646600224035ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/api/CMakeLists.txt000066400000000000000000000170701363734646600251500ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_api ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/additional_extensions_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/api_tests_wrapper1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/api_tests_wrapper2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/api_tests_wrapper3.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_add_comment_to_aub_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_api_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_api_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/cl_build_program_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_clone_kernel_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_compile_program_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_command_queue_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_command_queue_with_properties_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_context_from_type_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_context_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_kernel_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_kernels_in_program_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_perf_counters_command_queue_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_pipe_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_program_with_binary_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_program_with_built_in_kernels_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_sampler_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_sampler_with_properties_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_sub_buffer_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_sub_devices_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_user_event_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_user_event_tests_mt.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_barrier_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_barrier_with_wait_list_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_copy_buffer_rect_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_copy_buffer_to_image_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_copy_image_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_copy_image_to_buffer_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_fill_buffer_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_fill_image_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_image_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_map_buffer_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_map_image_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_marker_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_marker_with_wait_list_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_migrate_mem_objects_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_native_kernel_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_nd_range_kernel_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_read_buffer_rect_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_read_buffer_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_read_image_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_svm_free_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_svm_map_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_svm_mem_fill_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_svm_memcpy_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_svm_migrate_mem_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_svm_unmap_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_task_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_unmap_mem_object_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_verify_memory.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_wait_for_events_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_write_buffer_rect_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_write_buffer_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_write_image_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_finish_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_flush_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_function_pointers_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_context_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_device_and_host_timer.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_device_ids_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_device_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_event_profiling_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_extension_function_address_for_platform_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_extension_function_address_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_image_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_image_params_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_kernel_arg_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_kernel_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_kernel_max_concurrent_work_group_count_intel_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_kernel_sub_group_info_khr_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_kernel_sub_group_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_kernel_suggested_local_work_size_intel_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_kernel_work_group_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_mem_object_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_pipe_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_platform_ids_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_platform_ids_tests_mt.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_platform_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_program_build_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_program_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_supported_image_formats_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_icd_get_platform_ids_khr_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_intel_accelerator_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_intel_motion_estimation.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_intel_tracing_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_link_program_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_mem_locally_uncached_resource_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_release_command_queue_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_release_context_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_release_event_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_release_kernel_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_release_mem_obj_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_release_program_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_retain_mem_obj_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_retain_release_command_queue_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_retain_release_context_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_retain_release_device_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_retain_release_sampler_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_set_default_device_command_queue_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_set_event_callback_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_set_kernel_arg_svm_pointer_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_set_kernel_exec_info_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_set_mem_object_destructor_callback_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_set_mem_object_destructor_callback_tests_mt.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_set_performance_configuration_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_set_program_specialization_constant_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_svm_alloc_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_svm_free_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_unified_shared_memory_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_unload_compiler_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/cl_unload_platform_compiler_tests.inl ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_api}) add_subdirectories() compute-runtime-20.13.16352/opencl/test/unit_test/api/additional_extensions_tests.cpp000066400000000000000000000005711363734646600307230ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/api/additional_extensions.h" #include "test.h" using namespace NEO; TEST(AdditionalExtension, GivenFuncNameWhenGetingFunctionAddressThenReturnNullptr) { auto address = getAdditionalExtensionFunctionAddress("clFunction"); EXPECT_EQ(nullptr, address); } compute-runtime-20.13.16352/opencl/test/unit_test/api/api_tests_wrapper1.cpp000066400000000000000000000044611363734646600267300ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_add_comment_to_aub_tests.inl" #include "opencl/test/unit_test/api/cl_build_program_tests.inl" #include "opencl/test/unit_test/api/cl_clone_kernel_tests.inl" #include "opencl/test/unit_test/api/cl_compile_program_tests.inl" #include "opencl/test/unit_test/api/cl_create_command_queue_tests.inl" #include "opencl/test/unit_test/api/cl_create_context_from_type_tests.inl" #include "opencl/test/unit_test/api/cl_create_context_tests.inl" #include "opencl/test/unit_test/api/cl_create_kernel_tests.inl" #include "opencl/test/unit_test/api/cl_create_kernels_in_program_tests.inl" #include "opencl/test/unit_test/api/cl_create_perf_counters_command_queue_tests.inl" #include "opencl/test/unit_test/api/cl_create_pipe_tests.inl" #include "opencl/test/unit_test/api/cl_create_program_with_binary_tests.inl" #include "opencl/test/unit_test/api/cl_create_sampler_tests.inl" #include "opencl/test/unit_test/api/cl_create_sampler_with_properties_tests.inl" #include "opencl/test/unit_test/api/cl_create_sub_buffer_tests.inl" #include "opencl/test/unit_test/api/cl_create_sub_devices_tests.inl" #include "opencl/test/unit_test/api/cl_create_user_event_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_barrier_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_barrier_with_wait_list_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_copy_buffer_rect_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_copy_buffer_to_image_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_copy_image_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_copy_image_to_buffer_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_fill_buffer_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_fill_image_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_image_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_map_buffer_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_map_image_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_marker_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_marker_with_wait_list_tests.inl" #include "opencl/test/unit_test/api/cl_function_pointers_tests.inl" #include "opencl/test/unit_test/api/cl_unified_shared_memory_tests.inl" compute-runtime-20.13.16352/opencl/test/unit_test/api/api_tests_wrapper2.cpp000066400000000000000000000045641363734646600267350ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_enqueue_migrate_mem_objects_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_native_kernel_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_nd_range_kernel_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_read_buffer_rect_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_read_buffer_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_read_image_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_svm_free_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_svm_map_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_svm_mem_fill_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_svm_memcpy_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_svm_unmap_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_task_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_unmap_mem_object_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_verify_memory.inl" #include "opencl/test/unit_test/api/cl_enqueue_wait_for_events_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_write_buffer_rect_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_write_buffer_tests.inl" #include "opencl/test/unit_test/api/cl_enqueue_write_image_tests.inl" #include "opencl/test/unit_test/api/cl_finish_tests.inl" #include "opencl/test/unit_test/api/cl_flush_tests.inl" #include "opencl/test/unit_test/api/cl_get_context_info_tests.inl" #include "opencl/test/unit_test/api/cl_get_device_and_host_timer.inl" #include "opencl/test/unit_test/api/cl_get_device_ids_tests.inl" #include "opencl/test/unit_test/api/cl_get_device_info_tests.inl" #include "opencl/test/unit_test/api/cl_get_event_profiling_info_tests.inl" #include "opencl/test/unit_test/api/cl_get_extension_function_address_for_platform_tests.inl" #include "opencl/test/unit_test/api/cl_get_extension_function_address_tests.inl" #include "opencl/test/unit_test/api/cl_get_image_info_tests.inl" #include "opencl/test/unit_test/api/cl_get_image_params_tests.inl" #include "opencl/test/unit_test/api/cl_get_kernel_arg_info_tests.inl" #include "opencl/test/unit_test/api/cl_get_kernel_info_tests.inl" #include "opencl/test/unit_test/api/cl_get_kernel_sub_group_info_khr_tests.inl" #include "opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl" compute-runtime-20.13.16352/opencl/test/unit_test/api/api_tests_wrapper3.cpp000066400000000000000000000052011363734646600267230ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_get_kernel_max_concurrent_work_group_count_intel_tests.inl" #include "opencl/test/unit_test/api/cl_get_kernel_suggested_local_work_size_intel_tests.inl" #include "opencl/test/unit_test/api/cl_get_kernel_work_group_info_tests.inl" #include "opencl/test/unit_test/api/cl_get_mem_object_info_tests.inl" #include "opencl/test/unit_test/api/cl_get_pipe_info_tests.inl" #include "opencl/test/unit_test/api/cl_get_platform_ids_tests.inl" #include "opencl/test/unit_test/api/cl_get_platform_info_tests.inl" #include "opencl/test/unit_test/api/cl_get_program_build_info_tests.inl" #include "opencl/test/unit_test/api/cl_get_program_info_tests.inl" #include "opencl/test/unit_test/api/cl_get_supported_image_formats_tests.inl" #include "opencl/test/unit_test/api/cl_icd_get_platform_ids_khr_tests.inl" #include "opencl/test/unit_test/api/cl_intel_accelerator_tests.inl" #include "opencl/test/unit_test/api/cl_intel_tracing_tests.inl" #include "opencl/test/unit_test/api/cl_link_program_tests.inl" #include "opencl/test/unit_test/api/cl_release_command_queue_tests.inl" #include "opencl/test/unit_test/api/cl_release_context_tests.inl" #include "opencl/test/unit_test/api/cl_release_event_tests.inl" #include "opencl/test/unit_test/api/cl_release_kernel_tests.inl" #include "opencl/test/unit_test/api/cl_release_mem_obj_tests.inl" #include "opencl/test/unit_test/api/cl_release_program_tests.inl" #include "opencl/test/unit_test/api/cl_retain_mem_obj_tests.inl" #include "opencl/test/unit_test/api/cl_retain_release_command_queue_tests.inl" #include "opencl/test/unit_test/api/cl_retain_release_context_tests.inl" #include "opencl/test/unit_test/api/cl_retain_release_device_tests.inl" #include "opencl/test/unit_test/api/cl_retain_release_sampler_tests.inl" #include "opencl/test/unit_test/api/cl_set_default_device_command_queue_tests.inl" #include "opencl/test/unit_test/api/cl_set_event_callback_tests.inl" #include "opencl/test/unit_test/api/cl_set_kernel_arg_svm_pointer_tests.inl" #include "opencl/test/unit_test/api/cl_set_kernel_exec_info_tests.inl" #include "opencl/test/unit_test/api/cl_set_mem_object_destructor_callback_tests.inl" #include "opencl/test/unit_test/api/cl_set_performance_configuration_tests.inl" #include "opencl/test/unit_test/api/cl_set_program_specialization_constant_tests.inl" #include "opencl/test/unit_test/api/cl_svm_alloc_tests.inl" #include "opencl/test/unit_test/api/cl_svm_free_tests.inl" #include "opencl/test/unit_test/api/cl_unload_compiler_tests.inl" #include "opencl/test/unit_test/api/cl_unload_platform_compiler_tests.inl" compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_add_comment_to_aub_tests.inl000066400000000000000000000051621363734646600306160ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/api/cl_api_tests.h" #include "opencl/test/unit_test/mocks/mock_aub_center.h" #include "opencl/test/unit_test/mocks/mock_aub_manager.h" using namespace NEO; namespace ULT { struct clAddCommentToAubTest : api_tests { void SetUp() override { api_tests::SetUp(); pDevice = pContext->getDevice(0); } void TearDown() override { api_tests::TearDown(); } ClDevice *pDevice = nullptr; }; TEST_F(clAddCommentToAubTest, givenProperCommentNullptrAubCenterWhenAddCommentToAubThenSuccessIsReturned) { auto retVal = clAddCommentINTEL(pDevice, "comment"); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clAddCommentToAubTest, givenInvalidDeviceWhenAddCommentToAubThenErrorIsReturned) { auto retVal = clAddCommentINTEL(nullptr, "comment"); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(clAddCommentToAubTest, givenNullptrCommentWhenAddCommentToAubThenErrorIsReturned) { auto retVal = clAddCommentINTEL(pDevice, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clAddCommentToAubTest, givenAubCenterAndProperCommentButNullptrAubManagerWhenAddCommentToAubThenErrorIsReturned) { pPlatform->peekExecutionEnvironment()->rootDeviceEnvironments[testedRootDeviceIndex]->aubCenter.reset(new MockAubCenter()); auto retVal = clAddCommentINTEL(pDevice, "comment"); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clAddCommentToAubTest, givenProperCommentAubCenterAndAubManagerWhenAddCommentToAubThenSuccessIsReturned) { struct AubManagerCommentMock : public MockAubManager { using MockAubManager::MockAubManager; void addComment(const char *message) override { addCommentCalled = true; EXPECT_STREQ("comment", message); } bool addCommentCalled = false; }; auto mockAubCenter = new MockAubCenter(); auto mockAubManager = new AubManagerCommentMock; mockAubCenter->aubManager.reset(mockAubManager); pPlatform->peekExecutionEnvironment()->rootDeviceEnvironments[testedRootDeviceIndex]->aubCenter.reset(mockAubCenter); EXPECT_FALSE(mockAubManager->addCommentCalled); auto retVal = clAddCommentINTEL(pDevice, "comment"); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(mockAubManager->addCommentCalled); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_api_tests.cpp000066400000000000000000000025151363734646600255630ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "cl_api_tests.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device.h" namespace NEO { void api_fixture_using_aligned_memory_manager::SetUp() { retVal = CL_SUCCESS; retSize = 0; device = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())}; cl_device_id deviceId = device; context = Context::create(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); Context *ctxPtr = reinterpret_cast(context); commandQueue = new MockCommandQueue(context, device, 0); program = new MockProgram(*device->getExecutionEnvironment(), ctxPtr, false, &device->getDevice()); Program *prgPtr = reinterpret_cast(program); kernel = new MockKernel(prgPtr, program->mockKernelInfo, *device); ASSERT_NE(nullptr, kernel); } void api_fixture_using_aligned_memory_manager::TearDown() { delete kernel; delete commandQueue; context->release(); program->release(); delete device; } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_api_tests.h000066400000000000000000000064231363734646600252320ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/execution_environment/root_device_environment.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/api/api.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/tracing/tracing_api.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/helpers/ult_limits.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" #include "gtest/gtest.h" #include namespace NEO { class Context; class MockClDevice; struct RootDeviceEnvironment; template struct ApiFixture : PlatformFixture { ApiFixture() = default; ~ApiFixture() = default; virtual void SetUp() { DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices); PlatformFixture::SetUp(); if (rootDeviceIndex != 0u) { rootDeviceEnvironmentBackup.swap(pPlatform->peekExecutionEnvironment()->rootDeviceEnvironments[0]); } auto pDevice = pPlatform->getClDevice(testedRootDeviceIndex); ASSERT_NE(nullptr, pDevice); testedClDevice = pDevice; pContext = Context::create(nullptr, ClDeviceVector(&testedClDevice, 1), nullptr, nullptr, retVal); EXPECT_EQ(retVal, CL_SUCCESS); pCommandQueue = new MockCommandQueue(pContext, pDevice, nullptr); pProgram = new MockProgram(*pDevice->getExecutionEnvironment(), pContext, false, &pDevice->getDevice()); pKernel = new MockKernel(pProgram, pProgram->mockKernelInfo, *pDevice); ASSERT_NE(nullptr, pKernel); } virtual void TearDown() { pKernel->release(); pCommandQueue->release(); pContext->release(); pProgram->release(); if (rootDeviceIndex != 0u) { rootDeviceEnvironmentBackup.swap(pPlatform->peekExecutionEnvironment()->rootDeviceEnvironments[0]); } PlatformFixture::TearDown(); } DebugManagerStateRestore restorer; cl_int retVal = CL_SUCCESS; size_t retSize = 0; CommandQueue *pCommandQueue = nullptr; Context *pContext = nullptr; MockKernel *pKernel = nullptr; MockProgram *pProgram = nullptr; constexpr static uint32_t numRootDevices = maxRootDeviceCount; constexpr static uint32_t testedRootDeviceIndex = rootDeviceIndex; cl_device_id testedClDevice = nullptr; std::unique_ptr rootDeviceEnvironmentBackup; }; struct api_tests : public ApiFixture<>, public ::testing::Test { void SetUp() override { ApiFixture::SetUp(); } void TearDown() override { ApiFixture::TearDown(); } }; struct api_fixture_using_aligned_memory_manager { public: virtual void SetUp(); virtual void TearDown(); cl_int retVal; size_t retSize; CommandQueue *commandQueue; Context *context; MockKernel *kernel; MockProgram *program; MockClDevice *device; }; using api_test_using_aligned_memory_manager = Test; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_build_program_tests.inl000066400000000000000000000136161363734646600276440ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/device/device.h" #include "shared/source/helpers/file_io.h" #include "opencl/source/context/context.h" #include "opencl/source/program/kernel_info.h" #include "opencl/source/program/program.h" #include "opencl/test/unit_test/helpers/kernel_binary_helper.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "opencl/test/unit_test/mocks/mock_compilers.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clBuildProgramTests; namespace ULT { TEST_F(clBuildProgramTests, GivenSourceAsInputWhenCreatingProgramWithSourceThenProgramBuildSucceeds) { cl_program pProgram = nullptr; std::unique_ptr pSource = nullptr; size_t sourceSize = 0; std::string testFile; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sourceArray[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( pContext, 1, sourceArray, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clBuildProgram( pProgram, num_devices, devices, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); pProgram = clCreateProgramWithSource( nullptr, 1, sourceArray, &sourceSize, nullptr); EXPECT_EQ(nullptr, pProgram); } TEST_F(clBuildProgramTests, GivenBinaryAsInputWhenCreatingProgramWithSourceThenProgramBuildSucceeds) { cl_program pProgram = nullptr; cl_int binaryStatus = CL_SUCCESS; std::unique_ptr pBinary = nullptr; size_t binarySize = 0; std::string testFile; retrieveBinaryKernelFilename(testFile, "CopyBuffer_simd16_", ".bin"); pBinary = loadDataFromFile( testFile.c_str(), binarySize); ASSERT_NE(0u, binarySize); ASSERT_NE(nullptr, pBinary); const unsigned char *binaries[1] = {reinterpret_cast(pBinary.get())}; pProgram = clCreateProgramWithBinary( pContext, num_devices, devices, &binarySize, binaries, &binaryStatus, &retVal); pBinary.reset(); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clBuildProgram( pProgram, num_devices, devices, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clBuildProgramTests, GivenProgramCreatedFromBinaryWhenBuildProgramWithOptionsIsCalledThenStoredOptionsAreUsed) { cl_program pProgram = nullptr; cl_int binaryStatus = CL_SUCCESS; size_t binarySize = 0; std::string testFile; retrieveBinaryKernelFilename(testFile, "CopyBuffer_simd16_", ".bin"); auto pBinary = loadDataFromFile( testFile.c_str(), binarySize); ASSERT_NE(0u, binarySize); ASSERT_NE(nullptr, pBinary); const unsigned char *binaries[1] = {reinterpret_cast(pBinary.get())}; pProgram = clCreateProgramWithBinary( pContext, num_devices, devices, &binarySize, binaries, &binaryStatus, &retVal); auto pInternalProgram = castToObject(pProgram); pBinary.reset(); auto storedOptionsSize = pInternalProgram->getOptions().size(); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); const char *newBuildOption = "cl-fast-relaxed-math"; retVal = clBuildProgram( pProgram, num_devices, devices, newBuildOption, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); auto optionsAfterBuildSize = pInternalProgram->getOptions().size(); EXPECT_EQ(optionsAfterBuildSize, storedOptionsSize); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clBuildProgramTests, GivenSpirAsInputWhenCreatingProgramFromBinaryThenProgramBuildSucceeds) { cl_program pProgram = nullptr; cl_int binaryStatus = CL_SUCCESS; unsigned char llvm[16] = "BC\xc0\xde_unique"; size_t binarySize = sizeof(llvm); const unsigned char *binaries[1] = {llvm}; pProgram = clCreateProgramWithBinary( pContext, num_devices, devices, &binarySize, binaries, &binaryStatus, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); MockCompilerDebugVars igcDebugVars; SProgramBinaryHeader progBin = {}; progBin.Magic = iOpenCL::MAGIC_CL; progBin.Version = iOpenCL::CURRENT_ICBE_VERSION; progBin.Device = pContext->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily; progBin.GPUPointerSizeInBytes = sizeof(uintptr_t); igcDebugVars.binaryToReturn = &progBin; igcDebugVars.binaryToReturnSize = sizeof(progBin); auto prevDebugVars = getIgcDebugVars(); setIgcDebugVars(igcDebugVars); retVal = clBuildProgram( pProgram, num_devices, devices, "-x spir -spir-std=1.2", nullptr, nullptr); setIgcDebugVars(prevDebugVars); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clBuildProgramTests, GivenNullAsInputWhenCreatingProgramThenInvalidProgramErrorIsReturned) { retVal = clBuildProgram( nullptr, 1, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_clone_kernel_tests.inl000066400000000000000000000044171363734646600274550ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/file_io.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clCloneKernelTests; namespace ULT { TEST_F(clCloneKernelTests, GivenNullKernelWhenCloningKernelThenNullIsReturned) { auto kernel = clCloneKernel(nullptr, nullptr); EXPECT_EQ(nullptr, kernel); } TEST_F(clCloneKernelTests, GivenNullKernelWhenCloningKernelThenInvalidKernelErrorIsReturned) { clCloneKernel(nullptr, &retVal); EXPECT_EQ(CL_INVALID_KERNEL, retVal); } TEST_F(clCloneKernelTests, GivenValidKernelWhenCloningKernelThenSuccessIsReturned) { cl_kernel pSourceKernel = nullptr; cl_kernel pClonedKernel = nullptr; cl_program pProgram = nullptr; cl_int binaryStatus = CL_SUCCESS; size_t binarySize = 0; std::string testFile; retrieveBinaryKernelFilename(testFile, "CopyBuffer_simd16_", ".bin"); auto pBinary = loadDataFromFile( testFile.c_str(), binarySize); ASSERT_NE(0u, binarySize); ASSERT_NE(nullptr, pBinary); const unsigned char *binaries[1] = {reinterpret_cast(pBinary.get())}; pProgram = clCreateProgramWithBinary( pContext, num_devices, devices, &binarySize, binaries, &binaryStatus, &retVal); pBinary.reset(); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clBuildProgram( pProgram, num_devices, devices, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); pSourceKernel = clCreateKernel( pProgram, "CopyBuffer", &retVal); EXPECT_NE(nullptr, pSourceKernel); ASSERT_EQ(CL_SUCCESS, retVal); pClonedKernel = clCloneKernel( pSourceKernel, &retVal); EXPECT_NE(nullptr, pClonedKernel); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseKernel(pClonedKernel); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseKernel(pSourceKernel); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_compile_program_tests.inl000066400000000000000000000066151363734646600301760ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/helpers/file_io.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/helpers/kernel_binary_helper.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clCompileProgramTests; namespace ULT { TEST_F(clCompileProgramTests, GivenKernelAsSingleSourceWhenCompilingProgramThenSuccessIsReturned) { cl_program pProgram = nullptr; size_t sourceSize = 0; std::string testFile; KernelBinaryHelper kbHelper("copybuffer", false); testFile.append(clFiles); testFile.append("copybuffer.cl"); auto pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( pContext, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clCompileProgram( pProgram, num_devices, devices, nullptr, 0, nullptr, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCompileProgramTests, GivenKernelAsSourceWithHeaderWhenCompilingProgramThenSuccessIsReturned) { cl_program pProgram = nullptr; cl_program pHeader = nullptr; size_t sourceSize = 0; std::string testFile; const char *simpleHeaderName = "simple_header.h"; testFile.append(clFiles); testFile.append("/copybuffer_with_header.cl"); auto pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( pContext, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); testFile.clear(); testFile.append(clFiles); testFile.append("simple_header.h"); auto pHeaderSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pHeaderSource); const char *headerSources[1] = {pHeaderSource.get()}; pHeader = clCreateProgramWithSource( pContext, 1, headerSources, &sourceSize, &retVal); EXPECT_NE(nullptr, pHeader); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clCompileProgram( pProgram, num_devices, devices, nullptr, 1, &pHeader, &simpleHeaderName, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pHeader); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCompileProgramTests, GivenNullProgramWhenCompilingProgramThenInvalidProgramErrorIsReturned) { retVal = clCompileProgram( nullptr, 1, nullptr, "", 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_create_buffer_tests.cpp000066400000000000000000000311761363734646600276130ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clCreateBufferTests; namespace ClCreateBufferTests { class clCreateBufferTemplateTests : public ApiFixture<>, public testing::TestWithParam { void SetUp() override { ApiFixture::SetUp(); } void TearDown() override { ApiFixture::TearDown(); } }; struct clCreateBufferValidFlagsTests : public clCreateBufferTemplateTests { cl_uchar pHostPtr[64]; }; TEST_P(clCreateBufferValidFlagsTests, GivenValidFlagsWhenCreatingBufferThenBufferIsCreated) { cl_mem_flags flags = GetParam() | CL_MEM_USE_HOST_PTR; auto buffer = clCreateBuffer(pContext, flags, 64, pHostPtr, &retVal); EXPECT_NE(nullptr, buffer); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseMemObject(buffer); cl_mem_properties_intel properties[] = {CL_MEM_FLAGS, flags, 0}; buffer = clCreateBufferWithPropertiesINTEL(pContext, properties, 64, pHostPtr, &retVal); EXPECT_NE(nullptr, buffer); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseMemObject(buffer); }; static cl_mem_flags validFlags[] = { CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY, CL_MEM_HOST_READ_ONLY, CL_MEM_HOST_WRITE_ONLY, CL_MEM_HOST_NO_ACCESS, }; INSTANTIATE_TEST_CASE_P( CreateBufferCheckFlags, clCreateBufferValidFlagsTests, testing::ValuesIn(validFlags)); using clCreateBufferInvalidFlagsTests = clCreateBufferTemplateTests; TEST_P(clCreateBufferInvalidFlagsTests, GivenInvalidFlagsWhenCreatingBufferThenBufferIsNotCreated) { cl_mem_flags flags = GetParam(); auto buffer = clCreateBuffer(pContext, flags, 64, nullptr, &retVal); EXPECT_EQ(nullptr, buffer); EXPECT_EQ(CL_INVALID_VALUE, retVal); cl_mem_properties_intel properties[] = {CL_MEM_FLAGS, flags, 0}; buffer = clCreateBufferWithPropertiesINTEL(pContext, properties, 64, nullptr, &retVal); EXPECT_EQ(nullptr, buffer); EXPECT_EQ(CL_INVALID_VALUE, retVal); }; cl_mem_flags invalidFlags[] = { CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE | CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY, CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_HOST_PTR, CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR, CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_WRITE_ONLY, CL_MEM_HOST_NO_ACCESS | CL_MEM_HOST_READ_ONLY, CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY, 0xffcc, }; INSTANTIATE_TEST_CASE_P( CreateBufferCheckFlags, clCreateBufferInvalidFlagsTests, testing::ValuesIn(invalidFlags)); using clCreateBufferValidFlagsIntelTests = clCreateBufferTemplateTests; TEST_P(clCreateBufferValidFlagsIntelTests, GivenValidFlagsIntelWhenCreatingBufferThenBufferIsCreated) { cl_mem_properties_intel properties[] = {CL_MEM_FLAGS_INTEL, GetParam(), 0}; auto buffer = clCreateBufferWithPropertiesINTEL(pContext, properties, 64, nullptr, &retVal); EXPECT_NE(nullptr, buffer); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseMemObject(buffer); }; static cl_mem_flags validFlagsIntel[] = { CL_MEM_LOCALLY_UNCACHED_RESOURCE, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE, CL_MEM_48BIT_RESOURCE_INTEL}; INSTANTIATE_TEST_CASE_P( CreateBufferCheckFlagsIntel, clCreateBufferValidFlagsIntelTests, testing::ValuesIn(validFlagsIntel)); using clCreateBufferInvalidFlagsIntelTests = clCreateBufferTemplateTests; TEST_P(clCreateBufferInvalidFlagsIntelTests, GivenInvalidFlagsIntelWhenCreatingBufferThenBufferIsNotCreated) { cl_mem_properties_intel properties[] = {CL_MEM_FLAGS_INTEL, GetParam(), 0}; auto buffer = clCreateBufferWithPropertiesINTEL(pContext, properties, 64, nullptr, &retVal); EXPECT_EQ(nullptr, buffer); EXPECT_EQ(CL_INVALID_VALUE, retVal); }; cl_mem_flags invalidFlagsIntel[] = { 0xffcc, }; INSTANTIATE_TEST_CASE_P( CreateBufferCheckFlagsIntel, clCreateBufferInvalidFlagsIntelTests, testing::ValuesIn(invalidFlagsIntel)); using clCreateBufferInvalidProperties = clCreateBufferTemplateTests; TEST_F(clCreateBufferInvalidProperties, GivenInvalidPropertyKeyWhenCreatingBufferThenBufferIsNotCreated) { cl_mem_properties_intel properties[] = {(cl_mem_properties_intel(1) << 31), 0, 0}; auto buffer = clCreateBufferWithPropertiesINTEL(pContext, properties, 64, nullptr, &retVal); EXPECT_EQ(nullptr, buffer); EXPECT_EQ(CL_INVALID_VALUE, retVal); }; TEST_F(clCreateBufferTests, GivenValidParametersWhenCreatingBufferThenSuccessIsReturned) { cl_mem_flags flags = CL_MEM_USE_HOST_PTR; static const unsigned int bufferSize = 16; cl_mem buffer = nullptr; unsigned char pHostMem[bufferSize]; memset(pHostMem, 0xaa, bufferSize); buffer = clCreateBuffer(pContext, flags, bufferSize, pHostMem, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateBufferTests, GivenNullContextWhenCreatingBufferThenInvalidContextErrorIsReturned) { unsigned char *pHostMem = nullptr; cl_mem_flags flags = 0; static const unsigned int bufferSize = 16; clCreateBuffer(nullptr, flags, bufferSize, pHostMem, &retVal); ASSERT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(clCreateBufferTests, GivenBufferSizeZeroWhenCreatingBufferThenInvalidBufferSizeErrorIsReturned) { uint8_t hostData = 0; clCreateBuffer(pContext, CL_MEM_USE_HOST_PTR, 0, &hostData, &retVal); ASSERT_EQ(CL_INVALID_BUFFER_SIZE, retVal); } TEST_F(clCreateBufferTests, GivenInvalidHostPointerWhenCreatingBufferThenInvalidHostPointerErrorIsReturned) { uint32_t hostData = 0; cl_mem_flags flags = 0; clCreateBuffer(pContext, flags, sizeof(uint32_t), &hostData, &retVal); ASSERT_EQ(CL_INVALID_HOST_PTR, retVal); } TEST_F(clCreateBufferTests, GivenNullHostPointerAndMemCopyHostPtrFlagWhenCreatingBufferThenInvalidHostPointerErrorIsReturned) { cl_mem_flags flags = CL_MEM_COPY_HOST_PTR; clCreateBuffer(pContext, flags, sizeof(uint32_t), nullptr, &retVal); ASSERT_EQ(CL_INVALID_HOST_PTR, retVal); } TEST_F(clCreateBufferTests, GivenNullHostPointerAndMemUseHostPtrFlagWhenCreatingBufferThenInvalidHostPointerErrorIsReturned) { cl_mem_flags flags = CL_MEM_USE_HOST_PTR; clCreateBuffer(pContext, flags, sizeof(uint32_t), nullptr, &retVal); ASSERT_EQ(CL_INVALID_HOST_PTR, retVal); } TEST_F(clCreateBufferTests, GivenMemWriteOnlyFlagAndMemReadWriteFlagWhenCreatingBufferThenInvalidValueErrorIsReturned) { cl_mem_flags flags = CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE; clCreateBuffer(pContext, flags, 16, nullptr, &retVal); ASSERT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clCreateBufferTests, GivenBufferSizeOverMaxMemAllocSizeWhenCreatingBufferThenInvalidBufferSizeErrorIsReturned) { auto pDevice = pContext->getDevice(0); size_t size = static_cast(pDevice->getHardwareCapabilities().maxMemAllocSize) + 1; auto buffer = clCreateBuffer(pContext, CL_MEM_ALLOC_HOST_PTR, size, nullptr, &retVal); EXPECT_EQ(CL_INVALID_BUFFER_SIZE, retVal); EXPECT_EQ(nullptr, buffer); } TEST_F(clCreateBufferTests, GivenBufferSizeOverMaxMemAllocSizeWhenCreateBufferWithPropertiesINTELThenInvalidBufferSizeErrorIsReturned) { auto pDevice = pContext->getDevice(0); size_t size = static_cast(pDevice->getHardwareCapabilities().maxMemAllocSize) + 1; auto buffer = clCreateBufferWithPropertiesINTEL(pContext, nullptr, size, nullptr, &retVal); EXPECT_EQ(CL_INVALID_BUFFER_SIZE, retVal); EXPECT_EQ(nullptr, buffer); } TEST_F(clCreateBufferTests, GivenBufferSizeOverMaxMemAllocSizeAndClMemAllowUnrestirctedSizeFlagWhenCreatingBufferThenClSuccessIsReturned) { auto pDevice = pContext->getDevice(0); uint64_t bigSize = GB * 5; size_t size = static_cast(bigSize); cl_mem_flags flags = CL_MEM_ALLOC_HOST_PTR | CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL; auto memoryManager = static_cast(pDevice->getMemoryManager()); memoryManager->turnOnFakingBigAllocations(); if (memoryManager->peekForce32BitAllocations() || is32bit) { GTEST_SKIP(); } auto buffer = clCreateBuffer(pContext, flags, size, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateBufferTests, GivenBufferSizeOverMaxMemAllocSizeAndClMemAllowUnrestirctedSizeFlagWhenCreatingBufferWithPropertiesINTELThenClSuccesssIsReturned) { auto pDevice = pContext->getDevice(0); uint64_t bigSize = GB * 5; size_t size = static_cast(bigSize); cl_mem_properties_intel properties[] = {CL_MEM_FLAGS_INTEL, CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL, 0}; auto memoryManager = static_cast(pDevice->getMemoryManager()); memoryManager->turnOnFakingBigAllocations(); if (memoryManager->peekForce32BitAllocations() || is32bit) { GTEST_SKIP(); } auto buffer = clCreateBufferWithPropertiesINTEL(pContext, properties, size, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateBufferTests, GivenNullHostPointerAndMemCopyHostPtrFlagWhenCreatingBufferThenNullIsReturned) { cl_mem_flags flags = CL_MEM_USE_HOST_PTR; static const unsigned int bufferSize = 16; cl_mem buffer = nullptr; unsigned char pHostMem[bufferSize]; memset(pHostMem, 0xaa, bufferSize); buffer = clCreateBuffer(pContext, flags, bufferSize, pHostMem, nullptr); EXPECT_NE(nullptr, buffer); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } using clCreateBufferTestsWithRestrictions = api_test_using_aligned_memory_manager; TEST_F(clCreateBufferTestsWithRestrictions, GivenMemoryManagerRestrictionsWhenMinIsLessThanHostPtrThenUseZeroCopy) { std::unique_ptr hostMem(nullptr); unsigned char *destMem = nullptr; cl_mem_flags flags = CL_MEM_USE_HOST_PTR; const unsigned int bufferSize = MemoryConstants::pageSize * 3; const unsigned int destBufferSize = MemoryConstants::pageSize; cl_mem buffer = nullptr; uintptr_t minAddress = 0; MockAllocSysMemAgnosticMemoryManager *memMngr = reinterpret_cast(device->getMemoryManager()); memMngr->ptrRestrictions = &memMngr->testRestrictions; EXPECT_EQ(minAddress, memMngr->ptrRestrictions->minAddress); hostMem.reset(new unsigned char[bufferSize]); destMem = hostMem.get(); destMem += MemoryConstants::pageSize; destMem -= (reinterpret_cast(destMem) % MemoryConstants::pageSize); buffer = clCreateBuffer(context, flags, destBufferSize, destMem, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); Buffer *bufferObj = NEO::castToObject(buffer); EXPECT_TRUE(bufferObj->isMemObjZeroCopy()); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateBufferTestsWithRestrictions, GivenMemoryManagerRestrictionsWhenMinIsLessThanHostPtrThenCreateCopy) { std::unique_ptr hostMem(nullptr); unsigned char *destMem = nullptr; cl_mem_flags flags = CL_MEM_USE_HOST_PTR; const unsigned int realBufferSize = MemoryConstants::pageSize * 3; const unsigned int destBufferSize = MemoryConstants::pageSize; cl_mem buffer = nullptr; MockAllocSysMemAgnosticMemoryManager *memMngr = reinterpret_cast(device->getMemoryManager()); memMngr->ptrRestrictions = &memMngr->testRestrictions; hostMem.reset(new unsigned char[realBufferSize]); destMem = hostMem.get(); destMem += MemoryConstants::pageSize; destMem -= (reinterpret_cast(destMem) % MemoryConstants::pageSize); memMngr->ptrRestrictions->minAddress = reinterpret_cast(destMem) + 1; buffer = clCreateBuffer(context, flags, destBufferSize, destMem, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); Buffer *bufferObj = NEO::castToObject(buffer); EXPECT_FALSE(bufferObj->isMemObjZeroCopy()); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ClCreateBufferTests compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_create_command_queue_tests.inl000066400000000000000000000112641363734646600311600ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "test.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clCreateCommandQueueTest; namespace ULT { TEST_F(clCreateCommandQueueTest, GivenCorrectParametersWhenCreatingCommandQueueThenCommandQueueIsCreatedAndSuccessIsReturned) { cl_command_queue cmdQ = nullptr; cl_queue_properties properties = 0; cmdQ = clCreateCommandQueue(pContext, devices[testedRootDeviceIndex], properties, &retVal); ASSERT_NE(nullptr, cmdQ); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateCommandQueueTest, GivenNullContextWhenCreatingCommandQueueThenInvalidContextErrorIsReturned) { clCreateCommandQueue(nullptr, devices[testedRootDeviceIndex], 0, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(clCreateCommandQueueTest, GivenNullDeviceWhenCreatingCommandQueueThenInvalidDeviceErrorIsReturned) { clCreateCommandQueue(pContext, nullptr, 0, &retVal); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(clCreateCommandQueueTest, GivenDeviceNotAssociatedWithContextWhenCreatingCommandQueueThenInvalidDeviceErrorIsReturned) { EXPECT_NE(devices[0], devices[testedRootDeviceIndex]); clCreateCommandQueue(pContext, devices[0], 0, &retVal); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(clCreateCommandQueueTest, GivenInvalidPropertiesWhenCreatingCommandQueueThenInvalidValueErrorIsReturned) { cl_command_queue cmdQ = nullptr; cl_queue_properties properties = 0xf0000; cmdQ = clCreateCommandQueue(pContext, devices[testedRootDeviceIndex], properties, &retVal); ASSERT_EQ(nullptr, cmdQ); ASSERT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clCreateCommandQueueTest, GivenOoqParametersWhenQueueIsCreatedThenQueueIsSucesfullyCreated) { cl_int retVal = CL_SUCCESS; cl_queue_properties ooq = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; auto cmdq = clCreateCommandQueue(pContext, devices[testedRootDeviceIndex], ooq, &retVal); EXPECT_NE(nullptr, cmdq); EXPECT_EQ(retVal, CL_SUCCESS); retVal = clReleaseCommandQueue(cmdq); } HWTEST_F(clCreateCommandQueueTest, GivenOoqParametersWhenQueueIsCreatedThenCommandStreamReceiverSwitchesToBatchingMode) { cl_int retVal = CL_SUCCESS; cl_queue_properties ooq = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; auto clDevice = castToObject(devices[testedRootDeviceIndex]); auto mockDevice = reinterpret_cast(&clDevice->getDevice()); auto &csr = mockDevice->getUltCommandStreamReceiver(); EXPECT_EQ(DispatchMode::ImmediateDispatch, csr.dispatchMode); auto cmdq = clCreateCommandQueue(pContext, devices[testedRootDeviceIndex], ooq, &retVal); EXPECT_EQ(DispatchMode::BatchedDispatch, csr.dispatchMode); retVal = clReleaseCommandQueue(cmdq); } HWTEST_F(clCreateCommandQueueTest, GivenForcedDispatchModeAndOoqParametersWhenQueueIsCreatedThenCommandStreamReceiverDoesntSwitchToBatchingMode) { DebugManagerStateRestore restorer; DebugManager.flags.CsrDispatchMode.set(static_cast(DispatchMode::ImmediateDispatch)); cl_int retVal = CL_SUCCESS; cl_queue_properties ooq = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; auto clDevice = castToObject(devices[testedRootDeviceIndex]); auto mockDevice = reinterpret_cast(&clDevice->getDevice()); auto &csr = mockDevice->getUltCommandStreamReceiver(); EXPECT_EQ(DispatchMode::ImmediateDispatch, csr.dispatchMode); auto cmdq = clCreateCommandQueue(pContext, devices[testedRootDeviceIndex], ooq, &retVal); EXPECT_EQ(DispatchMode::ImmediateDispatch, csr.dispatchMode); retVal = clReleaseCommandQueue(cmdq); } HWTEST_F(clCreateCommandQueueTest, GivenOoqParametersWhenQueueIsCreatedThenCommandStreamReceiverSwitchesToNTo1SubmissionModel) { cl_int retVal = CL_SUCCESS; cl_queue_properties ooq = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; auto clDevice = castToObject(devices[testedRootDeviceIndex]); auto mockDevice = reinterpret_cast(&clDevice->getDevice()); auto &csr = mockDevice->getUltCommandStreamReceiver(); EXPECT_FALSE(csr.isNTo1SubmissionModelEnabled()); auto cmdq = clCreateCommandQueue(pContext, devices[testedRootDeviceIndex], ooq, &retVal); EXPECT_TRUE(csr.isNTo1SubmissionModelEnabled()); retVal = clReleaseCommandQueue(cmdq); } } // namespace ULT cl_create_command_queue_with_properties_tests.cpp000066400000000000000000000512531363734646600344120ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/api/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/device_queue/device_queue.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "CL/cl_ext.h" #include "cl_api_tests.h" using namespace NEO; namespace ULT { struct CommandQueueWithPropertiesTest : public ApiFixture<>, public ::testing::WithParamInterface>, public ::testing::Test { CommandQueueWithPropertiesTest() : queuePriority(CL_QUEUE_PRIORITY_MED_KHR), queueThrottle(CL_QUEUE_THROTTLE_MED_KHR) { } void SetUp() override { std::tie(commandQueueProperties, queueSize, queuePriority, queueThrottle) = GetParam(); ApiFixture::SetUp(); } void TearDown() override { ApiFixture::TearDown(); } cl_command_queue_properties commandQueueProperties = 0; cl_uint queueSize = 0; cl_queue_priority_khr queuePriority; cl_queue_throttle_khr queueThrottle; }; struct clCreateCommandQueueWithPropertiesApi : public ApiFixture<>, public MemoryManagementFixture, public ::testing::Test { clCreateCommandQueueWithPropertiesApi() { } void SetUp() override { platformsImpl.clear(); MemoryManagementFixture::SetUp(); ApiFixture::SetUp(); } void TearDown() override { ApiFixture::TearDown(); MemoryManagementFixture::TearDown(); } }; typedef CommandQueueWithPropertiesTest clCreateCommandQueueWithPropertiesTests; TEST_P(clCreateCommandQueueWithPropertiesTests, GivenPropertiesWhenCreatingCommandQueueThenExpectedResultIsReturned) { cl_command_queue cmdQ = nullptr; cl_queue_properties properties[] = { CL_QUEUE_PROPERTIES, 0, CL_QUEUE_SIZE, 0, CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_HIGH_KHR, CL_QUEUE_THROTTLE_KHR, CL_QUEUE_THROTTLE_MED_KHR, 0}; const auto minimumCreateDeviceQueueFlags = static_cast(CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE); const auto deviceQueueShouldBeCreated = (commandQueueProperties & minimumCreateDeviceQueueFlags) == minimumCreateDeviceQueueFlags; if (deviceQueueShouldBeCreated && !castToObject(this->devices[testedRootDeviceIndex])->getHardwareInfo().capabilityTable.supportsDeviceEnqueue) { return; } bool queueOnDeviceUsed = false; bool priorityHintsUsed = false; bool throttleHintsUsed = false; cl_queue_properties *pProp = &properties[0]; if (commandQueueProperties) { *pProp++ = CL_QUEUE_PROPERTIES; *pProp++ = (cl_queue_properties)commandQueueProperties; } if ((commandQueueProperties & CL_QUEUE_ON_DEVICE) && queueSize) { *pProp++ = CL_QUEUE_SIZE; *pProp++ = queueSize; } if (commandQueueProperties & CL_QUEUE_ON_DEVICE) { queueOnDeviceUsed = true; } if (queuePriority) { *pProp++ = CL_QUEUE_PRIORITY_KHR; *pProp++ = queuePriority; priorityHintsUsed = true; } if (queueThrottle) { *pProp++ = CL_QUEUE_THROTTLE_KHR; *pProp++ = queueThrottle; throttleHintsUsed = true; } *pProp++ = 0; cmdQ = clCreateCommandQueueWithProperties( pContext, devices[testedRootDeviceIndex], properties, &retVal); if (queueOnDeviceUsed && priorityHintsUsed) { EXPECT_EQ(nullptr, cmdQ); EXPECT_EQ(retVal, CL_INVALID_QUEUE_PROPERTIES); return; } else if (queueOnDeviceUsed && throttleHintsUsed) { EXPECT_EQ(nullptr, cmdQ); EXPECT_EQ(retVal, CL_INVALID_QUEUE_PROPERTIES); return; } else { EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, cmdQ); } auto deviceQ = static_cast(cmdQ); auto deviceQueueObj = castToObject(deviceQ); auto commandQueueObj = castToObject(cmdQ); if (deviceQueueShouldBeCreated) { // created device queue ASSERT_NE(deviceQueueObj, nullptr); ASSERT_EQ(commandQueueObj, nullptr); } else { // created host queue ASSERT_EQ(deviceQueueObj, nullptr); ASSERT_NE(commandQueueObj, nullptr); } retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); auto icdStoredFunction = icdGlobalDispatchTable.clCreateCommandQueueWithProperties; auto pFunction = &clCreateCommandQueueWithProperties; EXPECT_EQ(icdStoredFunction, pFunction); } static cl_command_queue_properties commandQueueProperties[] = { 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT, CL_QUEUE_PROFILING_ENABLE}; static cl_uint queueSizes[] = { 0, 2000}; cl_queue_priority_khr queuePriorities[] = { 0, CL_QUEUE_PRIORITY_LOW_KHR, CL_QUEUE_PRIORITY_MED_KHR, CL_QUEUE_PRIORITY_HIGH_KHR}; cl_queue_throttle_khr queueThrottles[] = { 0, CL_QUEUE_THROTTLE_LOW_KHR, CL_QUEUE_THROTTLE_MED_KHR, CL_QUEUE_THROTTLE_HIGH_KHR}; INSTANTIATE_TEST_CASE_P(api, clCreateCommandQueueWithPropertiesTests, ::testing::Combine( ::testing::ValuesIn(commandQueueProperties), ::testing::ValuesIn(queueSizes), ::testing::ValuesIn(queuePriorities), ::testing::ValuesIn(queueThrottles))); TEST_F(clCreateCommandQueueWithPropertiesApi, GivenNullContextWhenCreatingCommandQueueWithPropertiesThenInvalidContextErrorIsReturned) { cl_int retVal = CL_SUCCESS; auto cmdQ = clCreateCommandQueueWithProperties( nullptr, nullptr, 0, &retVal); EXPECT_EQ(cmdQ, nullptr); EXPECT_EQ(retVal, CL_INVALID_CONTEXT); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenNullContextWhenCreatingCommandQueueWithPropertiesKHRThenInvalidContextErrorIsReturned) { cl_int retVal = CL_SUCCESS; auto cmdQ = clCreateCommandQueueWithPropertiesKHR( nullptr, nullptr, 0, &retVal); EXPECT_EQ(cmdQ, nullptr); EXPECT_EQ(retVal, CL_INVALID_CONTEXT); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenOoqPropertiesWhenQueueIsCreatedThenSuccessIsReturned) { cl_int retVal = CL_SUCCESS; cl_queue_properties ooq[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0, 0}; auto cmdq = clCreateCommandQueueWithProperties(pContext, devices[testedRootDeviceIndex], ooq, &retVal); EXPECT_NE(nullptr, cmdq); EXPECT_EQ(retVal, CL_SUCCESS); retVal = clReleaseCommandQueue(cmdq); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenQueueOnDeviceWithoutOoqPropertiesWhenQueueIsCreatedThenErrorIsReturned) { cl_queue_properties ondevice[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE, 0, 0}; auto cmdqd = clCreateCommandQueueWithProperties(pContext, devices[testedRootDeviceIndex], ondevice, &retVal); EXPECT_EQ(nullptr, cmdqd); EXPECT_EQ(retVal, CL_INVALID_VALUE); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenNullContextAndOoqPropertiesWhenCreatingCommandQueueWithPropertiesThenInvalidContextErrorIsReturned) { cl_int retVal = CL_SUCCESS; cl_queue_properties ooq[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT, 0, 0}; auto cmdq = clCreateCommandQueueWithProperties(nullptr, devices[testedRootDeviceIndex], ooq, &retVal); EXPECT_EQ(nullptr, cmdq); EXPECT_EQ(retVal, CL_INVALID_CONTEXT); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenNullDeviceWhenCreatingCommandQueueWithPropertiesThenInvalidDeviceErrorIsReturned) { cl_int retVal = CL_SUCCESS; cl_queue_properties ooq[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT, 0, 0}; auto cmdq = clCreateCommandQueueWithProperties(pContext, nullptr, ooq, &retVal); EXPECT_EQ(nullptr, cmdq); EXPECT_EQ(retVal, CL_INVALID_DEVICE); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenDeviceNotAssociatedWithContextWhenCreatingCommandQueueWithPropertiesThenInvalidDeviceErrorIsReturned) { cl_int retVal = CL_OUT_OF_HOST_MEMORY; EXPECT_NE(devices[0], devices[testedRootDeviceIndex]); auto cmdq = clCreateCommandQueueWithProperties(pContext, devices[0], nullptr, &retVal); EXPECT_EQ(nullptr, cmdq); EXPECT_EQ(retVal, CL_INVALID_DEVICE); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenSizeWhichExceedsMaxDeviceQueueSizeWhenCreatingCommandQueueWithPropertiesThenInvalidQueuePropertiesErrorIsReturned) { cl_int retVal = CL_SUCCESS; cl_queue_properties ooq[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT, CL_QUEUE_SIZE, (cl_uint)0xffffffff, 0, 0}; auto cmdq = clCreateCommandQueueWithProperties(pContext, devices[testedRootDeviceIndex], ooq, &retVal); EXPECT_EQ(nullptr, cmdq); EXPECT_EQ(retVal, CL_INVALID_QUEUE_PROPERTIES); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenQueueOnDeviceWithoutOutOfOrderExecModePropertyWhenCreatingCommandQueueWithPropertiesThenInvalidValueErrorIsReturned) { cl_int retVal = CL_SUCCESS; cl_queue_properties odq[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE, 0, 0}; auto cmdq = clCreateCommandQueueWithProperties(pContext, devices[testedRootDeviceIndex], odq, &retVal); EXPECT_EQ(nullptr, cmdq); EXPECT_EQ(retVal, CL_INVALID_VALUE); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenDefaultDeviceQueueWithoutQueueOnDevicePropertyWhenCreatingCommandQueueWithPropertiesThenInvalidValueErrorIsReturned) { cl_int retVal = CL_SUCCESS; cl_queue_properties ddq[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE_DEFAULT, 0, 0}; auto cmdq = clCreateCommandQueueWithProperties(pContext, devices[testedRootDeviceIndex], ddq, &retVal); EXPECT_EQ(nullptr, cmdq); EXPECT_EQ(retVal, CL_INVALID_VALUE); } HWCMDTEST_F(IGFX_GEN8_CORE, clCreateCommandQueueWithPropertiesApi, GivenNumberOfDevicesGreaterThanMaxWhenCreatingCommandQueueWithPropertiesThenOutOfResourcesErrorIsReturned) { if (!this->pContext->getDevice(0u)->getHardwareInfo().capabilityTable.supportsDeviceEnqueue) { GTEST_SKIP(); } cl_int retVal = CL_SUCCESS; auto pDevice = castToObject(devices[testedRootDeviceIndex]); cl_queue_properties odq[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE, 0, 0}; auto cmdq1 = clCreateCommandQueueWithProperties(pContext, devices[testedRootDeviceIndex], odq, &retVal); EXPECT_NE(nullptr, cmdq1); EXPECT_EQ(retVal, CL_SUCCESS); auto cmdq2 = clCreateCommandQueueWithProperties(pContext, devices[testedRootDeviceIndex], odq, &retVal); if (pDevice->getSharedDeviceInfo().maxOnDeviceQueues > 1) { EXPECT_NE(nullptr, cmdq2); EXPECT_EQ(retVal, CL_SUCCESS); } else { EXPECT_EQ(nullptr, cmdq2); EXPECT_EQ(retVal, CL_OUT_OF_RESOURCES); } clReleaseCommandQueue(cmdq1); if (cmdq2) { clReleaseCommandQueue(cmdq2); } } HWCMDTEST_F(IGFX_GEN8_CORE, clCreateCommandQueueWithPropertiesApi, GivenFailedAllocationWhenCreatingCommandQueueWithPropertiesThenOutOfHostMemoryErrorIsReturned) { if (!this->pContext->getDevice(0u)->getHardwareInfo().capabilityTable.supportsDeviceEnqueue) { GTEST_SKIP(); } InjectedFunction method = [this](size_t failureIndex) { cl_queue_properties ooq[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT, 0, 0}; auto retVal = CL_INVALID_VALUE; auto cmdq = clCreateCommandQueueWithProperties(pContext, devices[testedRootDeviceIndex], ooq, &retVal); if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, cmdq); clReleaseCommandQueue(cmdq); } else { EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal) << "for allocation " << failureIndex; EXPECT_EQ(nullptr, cmdq); } }; injectFailureOnIndex(method, 0); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenHighPriorityWhenCreatingOoqCommandQueueWithPropertiesThenInvalidQueuePropertiesErrorIsReturned) { cl_int retVal = CL_SUCCESS; cl_queue_properties ondevice[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_HIGH_KHR, 0, 0}; auto cmdqd = clCreateCommandQueueWithProperties(pContext, devices[testedRootDeviceIndex], ondevice, &retVal); EXPECT_EQ(nullptr, cmdqd); EXPECT_EQ(retVal, CL_INVALID_QUEUE_PROPERTIES); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenLowPriorityWhenCreatingOoqCommandQueueWithPropertiesThenInvalidQueuePropertiesErrorIsReturned) { cl_int retVal = CL_SUCCESS; cl_queue_properties ondevice[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR, 0, 0}; auto cmdqd = clCreateCommandQueueWithProperties(pContext, devices[testedRootDeviceIndex], ondevice, &retVal); EXPECT_EQ(nullptr, cmdqd); EXPECT_EQ(retVal, CL_INVALID_QUEUE_PROPERTIES); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenMedPriorityWhenCreatingOoqCommandQueueWithPropertiesThenInvalidQueuePropertiesErrorIsReturned) { cl_int retVal = CL_SUCCESS; cl_queue_properties ondevice[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_MED_KHR, 0, 0}; auto cmdqd = clCreateCommandQueueWithProperties(pContext, devices[testedRootDeviceIndex], ondevice, &retVal); EXPECT_EQ(nullptr, cmdqd); EXPECT_EQ(retVal, CL_INVALID_QUEUE_PROPERTIES); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenInvalidPropertiesWhenCreatingOoqCommandQueueWithPropertiesThenInvalidValueErrorIsReturned) { cl_int retVal = CL_SUCCESS; cl_queue_properties properties = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; auto commandQueue = clCreateCommandQueueWithProperties(pContext, devices[testedRootDeviceIndex], &properties, &retVal); EXPECT_EQ(nullptr, commandQueue); EXPECT_EQ(retVal, CL_INVALID_VALUE); } TEST_F(clCreateCommandQueueWithPropertiesApi, givenInvalidPropertiesOnSubsequentTokenWhenQueueIsCreatedThenReturnError) { cl_int retVal = CL_SUCCESS; cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, CL_DEVICE_PARTITION_EQUALLY, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; auto commandQueue = clCreateCommandQueueWithProperties(pContext, devices[testedRootDeviceIndex], properties, &retVal); EXPECT_EQ(nullptr, commandQueue); EXPECT_EQ(retVal, CL_INVALID_VALUE); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenNullPropertiesWhenCreatingCommandQueueWithPropertiesThenSuccessIsReturned) { cl_int retVal = CL_SUCCESS; auto commandQueue = clCreateCommandQueueWithProperties(pContext, devices[testedRootDeviceIndex], nullptr, &retVal); EXPECT_NE(nullptr, commandQueue); EXPECT_EQ(retVal, CL_SUCCESS); clReleaseCommandQueue(commandQueue); } TEST_F(clCreateCommandQueueWithPropertiesApi, GivenLowPriorityWhenCreatingCommandQueueWithPropertiesThenSuccessIsReturned) { cl_int retVal = CL_SUCCESS; cl_queue_properties ondevice[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR, 0}; auto cmdqd = clCreateCommandQueueWithProperties(pContext, devices[testedRootDeviceIndex], ondevice, &retVal); EXPECT_NE(nullptr, cmdqd); EXPECT_EQ(retVal, CL_SUCCESS); retVal = clReleaseCommandQueue(cmdqd); EXPECT_EQ(retVal, CL_SUCCESS); } HWTEST_F(clCreateCommandQueueWithPropertiesApi, GivenLowPriorityWhenCreatingCommandQueueThenSelectRcsEngine) { cl_queue_properties properties[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR, 0}; auto cmdQ = clCreateCommandQueueWithProperties(pContext, devices[testedRootDeviceIndex], properties, nullptr); auto commandQueueObj = castToObject(cmdQ); auto &osContext = commandQueueObj->getGpgpuCommandStreamReceiver().getOsContext(); EXPECT_EQ(HwHelperHw::lowPriorityEngineType, osContext.getEngineType()); EXPECT_TRUE(osContext.isLowPriority()); clReleaseCommandQueue(cmdQ); } using LowPriorityCommandQueueTest = ::testing::Test; HWTEST_F(LowPriorityCommandQueueTest, GivenDeviceWithSubdevicesWhenCreatingLowPriorityCommandQueueThenEngineFromFirstSubdeviceIsTaken) { DebugManagerStateRestore restorer; VariableBackup mockDeviceFlagBackup{&MockDevice::createSingleDevice, false}; DebugManager.flags.CreateMultipleSubDevices.set(2); MockContext context; cl_queue_properties properties[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR, 0}; EXPECT_EQ(2u, context.getDevice(0)->getNumAvailableDevices()); auto cmdQ = clCreateCommandQueueWithProperties(&context, context.getDevice(0), properties, nullptr); auto commandQueueObj = castToObject(cmdQ); auto subDevice = context.getDevice(0)->getDeviceById(0); auto engine = subDevice->getEngine(HwHelperHw::lowPriorityEngineType, true); EXPECT_EQ(engine.commandStreamReceiver, &commandQueueObj->getGpgpuCommandStreamReceiver()); EXPECT_EQ(engine.osContext, &commandQueueObj->getGpgpuCommandStreamReceiver().getOsContext()); clReleaseCommandQueue(cmdQ); } std::pair priorityParams[3]{ std::make_pair(CL_QUEUE_PRIORITY_LOW_KHR, QueuePriority::LOW), std::make_pair(CL_QUEUE_PRIORITY_MED_KHR, QueuePriority::MEDIUM), std::make_pair(CL_QUEUE_PRIORITY_HIGH_KHR, QueuePriority::HIGH)}; class clCreateCommandQueueWithPropertiesApiPriority : public clCreateCommandQueueWithPropertiesApi, public ::testing::WithParamInterface> { }; TEST_P(clCreateCommandQueueWithPropertiesApiPriority, GivenValidPriorityWhenCreatingCommandQueueWithPropertiesThenCorrectPriorityIsSetInternally) { cl_int retVal = CL_SUCCESS; cl_queue_properties ondevice[] = {CL_QUEUE_PRIORITY_KHR, GetParam().first, 0}; auto cmdqd = clCreateCommandQueueWithProperties(pContext, devices[testedRootDeviceIndex], ondevice, &retVal); EXPECT_NE(nullptr, cmdqd); EXPECT_EQ(retVal, CL_SUCCESS); auto commandQueue = castToObject(cmdqd); EXPECT_EQ(commandQueue->getPriority(), GetParam().second); retVal = clReleaseCommandQueue(cmdqd); EXPECT_EQ(retVal, CL_SUCCESS); } INSTANTIATE_TEST_CASE_P(AllValidPriorities, clCreateCommandQueueWithPropertiesApiPriority, ::testing::ValuesIn(priorityParams)); std::pair throttleParams[3]{ std::make_pair(CL_QUEUE_THROTTLE_LOW_KHR, QueueThrottle::LOW), std::make_pair(CL_QUEUE_THROTTLE_MED_KHR, QueueThrottle::MEDIUM), std::make_pair(CL_QUEUE_THROTTLE_HIGH_KHR, QueueThrottle::HIGH)}; class clCreateCommandQueueWithPropertiesApiThrottle : public clCreateCommandQueueWithPropertiesApi, public ::testing::WithParamInterface> { }; TEST_P(clCreateCommandQueueWithPropertiesApiThrottle, GivenThrottlePropertiesWhenCreatingCommandQueueWithPropertiesThenCorrectThrottleIsSetInternally) { cl_int retVal = CL_SUCCESS; cl_queue_properties ondevice[] = {CL_QUEUE_THROTTLE_KHR, GetParam().first, 0}; auto cmdqd = clCreateCommandQueueWithProperties(pContext, devices[testedRootDeviceIndex], ondevice, &retVal); EXPECT_NE(nullptr, cmdqd); EXPECT_EQ(retVal, CL_SUCCESS); auto commandQueue = castToObject(cmdqd); EXPECT_EQ(commandQueue->getThrottle(), GetParam().second); retVal = clReleaseCommandQueue(cmdqd); EXPECT_EQ(retVal, CL_SUCCESS); } INSTANTIATE_TEST_CASE_P(AllValidThrottleValues, clCreateCommandQueueWithPropertiesApiThrottle, ::testing::ValuesIn(throttleParams)); } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_create_context_from_type_tests.inl000066400000000000000000000104751363734646600321110ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_platform.h" #include "cl_api_tests.h" using namespace NEO; struct clCreateContextFromTypeTests : public ApiFixture<0u>, public ::testing::Test { void SetUp() override { ApiFixture::SetUp(); } void TearDown() override { ApiFixture::TearDown(); } }; namespace ULT { void CL_CALLBACK contextCallBack(const char *, const void *, size_t, void *) { } TEST_F(clCreateContextFromTypeTests, GivenOnlyGpuDeviceTypeAndReturnValueWhenCreatingContextFromTypeThenCallSucceeds) { auto context = clCreateContextFromType(nullptr, CL_DEVICE_TYPE_GPU, nullptr, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, context); EXPECT_NE(nullptr, context->dispatch.icdDispatch); EXPECT_NE(nullptr, context->dispatch.crtDispatch); retVal = clReleaseContext(context); ASSERT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateContextFromTypeTests, GivenCpuTypeWhenCreatingContextFromTypeThenInvalidValueErrorIsReturned) { auto context = clCreateContextFromType(nullptr, CL_DEVICE_TYPE_CPU, nullptr, nullptr, &retVal); ASSERT_EQ(nullptr, context); ASSERT_EQ(CL_DEVICE_NOT_FOUND, retVal); } TEST_F(clCreateContextFromTypeTests, GivenNullCallbackFunctionAndNotNullUserDataWhenCreatingContextFromTypeThenInvalidValueErrorIsReturned) { cl_int a; auto context = clCreateContextFromType(nullptr, CL_DEVICE_TYPE_GPU, nullptr, &a, &retVal); ASSERT_EQ(nullptr, context); ASSERT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clCreateContextFromTypeTests, GivenCallbackFunctionWhenCreatingContextFromTypeThenCallSucceeds) { auto context = clCreateContextFromType(nullptr, CL_DEVICE_TYPE_GPU, contextCallBack, nullptr, &retVal); ASSERT_NE(nullptr, context); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); ASSERT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateContextFromTypeTests, GivenOnlyGpuDeviceTypeWhenCreatingContextFromTypeThenCallSucceeds) { auto context = clCreateContextFromType(nullptr, CL_DEVICE_TYPE_GPU, nullptr, nullptr, nullptr); ASSERT_NE(nullptr, context); EXPECT_NE(nullptr, context->dispatch.icdDispatch); EXPECT_NE(nullptr, context->dispatch.crtDispatch); retVal = clReleaseContext(context); ASSERT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateContextFromTypeTests, GivenInvalidContextCreationPropertiesWhenCreatingContextFromTypeThenInvalidPlatformErrorIsReturned) { cl_context_properties invalidProperties[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties) nullptr, 0}; auto context = clCreateContextFromType(invalidProperties, CL_DEVICE_TYPE_GPU, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_PLATFORM, retVal); EXPECT_EQ(nullptr, context); } TEST_F(clCreateContextFromTypeTests, GivenNonDefaultPlatformInContextCreationPropertiesWhenCreatingContextFromTypeThenSuccessIsReturned) { auto nonDefaultPlatform = std::make_unique(); nonDefaultPlatform->initializeWithNewDevices(); cl_platform_id nonDefaultPlatformCl = nonDefaultPlatform.get(); cl_context_properties properties[3] = {CL_CONTEXT_PLATFORM, reinterpret_cast(nonDefaultPlatformCl), 0}; auto clContext = clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, clContext); clReleaseContext(clContext); } TEST_F(clCreateContextFromTypeTests, GivenNonDefaultPlatformWithInvalidIcdDispatchInContextCreationPropertiesWhenCreatingContextFromTypeThenInvalidPlatformErrorIsReturned) { auto nonDefaultPlatform = std::make_unique(); nonDefaultPlatform->initializeWithNewDevices(); cl_platform_id nonDefaultPlatformCl = nonDefaultPlatform.get(); nonDefaultPlatformCl->dispatch.icdDispatch = reinterpret_cast(nonDefaultPlatform.get()); cl_context_properties properties[3] = {CL_CONTEXT_PLATFORM, reinterpret_cast(nonDefaultPlatformCl), 0}; auto clContext = clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_PLATFORM, retVal); EXPECT_EQ(nullptr, clContext); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_create_context_tests.inl000066400000000000000000000104771363734646600300270ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "opencl/test/unit_test/api/cl_api_tests.h" #include "opencl/test/unit_test/mocks/mock_platform.h" using namespace NEO; typedef api_tests clCreateContextTests; namespace ClCreateContextTests { static int cbInvoked = 0; void CL_CALLBACK eventCallBack(const char *, const void *, size_t, void *) { cbInvoked++; } TEST_F(clCreateContextTests, returnsSuccess) { auto context = clCreateContext(nullptr, 1u, &testedClDevice, nullptr, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, context); EXPECT_NE(nullptr, context->dispatch.icdDispatch); EXPECT_NE(nullptr, context->dispatch.crtDispatch); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateContextTests, noRet) { auto context = clCreateContext(nullptr, 1u, &testedClDevice, nullptr, nullptr, nullptr); ASSERT_NE(nullptr, context); EXPECT_NE(nullptr, context->dispatch.icdDispatch); EXPECT_NE(nullptr, context->dispatch.crtDispatch); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateContextTests, returnsFail) { auto context = clCreateContext(nullptr, 0, &testedClDevice, nullptr, nullptr, &retVal); ASSERT_EQ(nullptr, context); ASSERT_EQ(CL_INVALID_VALUE, retVal); cl_int someData = 25; context = clCreateContext(nullptr, 1u, &testedClDevice, nullptr, &someData, &retVal); ASSERT_EQ(nullptr, context); ASSERT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clCreateContextTests, invalidDevices) { cl_device_id devList[2]; devList[0] = devices[testedRootDeviceIndex]; devList[1] = (cl_device_id)ptrGarbage; auto context = clCreateContext(nullptr, 2, devList, nullptr, nullptr, &retVal); ASSERT_EQ(nullptr, context); ASSERT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(clCreateContextTests, nullDevices) { auto context = clCreateContext(nullptr, 2, nullptr, nullptr, nullptr, &retVal); ASSERT_EQ(nullptr, context); ASSERT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clCreateContextTests, nullUserData) { auto context = clCreateContext(nullptr, 1u, &testedClDevice, eventCallBack, nullptr, &retVal); ASSERT_NE(nullptr, context); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateContextTests, givenInvalidContextCreationPropertiesThenContextCreationFails) { cl_context_properties invalidProperties[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties) nullptr, 0}; auto context = clCreateContext(invalidProperties, 1u, &testedClDevice, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_PLATFORM, retVal); EXPECT_EQ(nullptr, context); } TEST_F(clCreateContextTests, GivenNonDefaultPlatformInContextCreationPropertiesWhenCreatingContextThenSuccessIsReturned) { auto nonDefaultPlatform = std::make_unique(); nonDefaultPlatform->initializeWithNewDevices(); cl_platform_id nonDefaultPlatformCl = nonDefaultPlatform.get(); cl_device_id clDevice = nonDefaultPlatform->getClDevice(0); cl_context_properties properties[3] = {CL_CONTEXT_PLATFORM, reinterpret_cast(nonDefaultPlatformCl), 0}; auto clContext = clCreateContext(properties, 1, &clDevice, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, clContext); clReleaseContext(clContext); } TEST_F(clCreateContextFromTypeTests, GivenNonDefaultPlatformWithInvalidIcdDispatchInContextCreationPropertiesWhenCreatingContextThenInvalidPlatformErrorIsReturned) { auto nonDefaultPlatform = std::make_unique(); nonDefaultPlatform->initializeWithNewDevices(); cl_platform_id nonDefaultPlatformCl = nonDefaultPlatform.get(); nonDefaultPlatformCl->dispatch.icdDispatch = reinterpret_cast(nonDefaultPlatform.get()); cl_device_id clDevice = nonDefaultPlatform->getClDevice(0); cl_context_properties properties[3] = {CL_CONTEXT_PLATFORM, reinterpret_cast(nonDefaultPlatformCl), 0}; auto clContext = clCreateContext(properties, 1, &clDevice, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_PLATFORM, retVal); EXPECT_EQ(nullptr, clContext); } } // namespace ClCreateContextTests compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_create_image_tests.cpp000066400000000000000000001013061363734646600274150ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "cl_api_tests.h" using namespace NEO; namespace ClCreateImageTests { template struct clCreateImageTests : public ApiFixture<>, public T { void SetUp() override { ApiFixture::SetUp(); // clang-format off imageFormat.image_channel_order = CL_RGBA; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 32; imageDesc.image_height = 32; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = nullptr; // clang-format on } void TearDown() override { ApiFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; }; typedef clCreateImageTests<::testing::Test> clCreateImageTest; TEST_F(clCreateImageTest, GivenNullHostPtrWhenCreatingImageThenImageIsCreatedAndSuccessReturned) { auto image = clCreateImage( pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(clCreateImageTest, GivenDeviceThatDoesntSupportImagesWhenCreatingTiledImageThenInvalidOperationErrorIsReturned) { MockClDevice mockClDevice{MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get(), 0)}; MockContext mockContext{&mockClDevice}; mockClDevice.sharedDeviceInfo.imageSupport = CL_FALSE; cl_bool imageSupportInfo = CL_TRUE; auto status = clGetDeviceInfo(&mockClDevice, CL_DEVICE_IMAGE_SUPPORT, sizeof(imageSupportInfo), &imageSupportInfo, nullptr); EXPECT_EQ(CL_SUCCESS, status); cl_bool expectedValue = CL_FALSE; EXPECT_EQ(expectedValue, imageSupportInfo); auto image = clCreateImage( &mockContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); if (UnitTestHelper::tiledImagesSupported) { EXPECT_EQ(CL_INVALID_OPERATION, retVal); EXPECT_EQ(nullptr, image); } else { EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } } HWTEST_F(clCreateImageTest, GivenDeviceThatDoesntSupportImagesWhenCreatingNonTiledImageThenCreate) { MockClDevice mockClDevice{MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get(), 0)}; MockContext mockContext{&mockClDevice}; mockClDevice.sharedDeviceInfo.imageSupport = CL_FALSE; cl_bool imageSupportInfo = CL_TRUE; auto status = clGetDeviceInfo(&mockClDevice, CL_DEVICE_IMAGE_SUPPORT, sizeof(imageSupportInfo), &imageSupportInfo, nullptr); EXPECT_EQ(CL_SUCCESS, status); cl_bool expectedValue = CL_FALSE; EXPECT_EQ(expectedValue, imageSupportInfo); imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_height = 1; auto image = clCreateImage( &mockContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateImageTest, GivenNonNullHostPtrAndAlignedRowPitchWhenCreatingImageThenImageIsCreatedAndSuccessReturned) { char hostPtr[4096]; imageDesc.image_row_pitch = 128; auto image = clCreateImage( pContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, &imageFormat, &imageDesc, hostPtr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateImageTest, GivenNonNullHostPtrAndUnalignedRowPitchWhenCreatingImageThenInvalidImageDescriptotErrorIsReturned) { char hostPtr[4096]; imageDesc.image_row_pitch = 129; auto image = clCreateImage( pContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, &imageFormat, &imageDesc, hostPtr, &retVal); ASSERT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); EXPECT_EQ(nullptr, image); } TEST_F(clCreateImageTest, GivenNonNullHostPtrAndSmallRowPitchWhenCreatingImageThenInvalidImageDescriptorErrorIsReturned) { char hostPtr[4096]; imageDesc.image_row_pitch = 4; auto image = clCreateImage( pContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, &imageFormat, &imageDesc, hostPtr, &retVal); ASSERT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); EXPECT_EQ(nullptr, image); } TEST_F(clCreateImageTest, GivenUnrestrictedIntelFlagWhenCreatingImageWithInvalidFlagCombinationThenImageIsCreatedAndSuccessReturned) { cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL; auto image = clCreateImage( pContext, flags, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateImageTest, GivenNotNullHostPtrAndNoHostPtrFlagWhenCreatingImageThenInvalidHostPtrErrorIsReturned) { char hostPtr[4096]; auto image = clCreateImage( pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, hostPtr, &retVal); ASSERT_EQ(CL_INVALID_HOST_PTR, retVal); EXPECT_EQ(nullptr, image); } TEST_F(clCreateImageTest, GivenInvalidFlagBitsWhenCreatingImageThenInvalidValueErrorIsReturned) { cl_mem_flags flags = (1 << 12); auto image = clCreateImage( pContext, flags, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clCreateImageTest, GivenInvalidFlagBitsWhenCreatingImageFromAnotherImageThenInvalidValueErrorIsReturned) { imageFormat.image_channel_order = CL_NV12_INTEL; auto image = clCreateImage( pContext, CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); imageFormat.image_channel_order = CL_RG; imageDesc.mem_object = image; cl_mem_flags flags = (1 << 30); auto imageFromImageObject = clCreateImage( pContext, flags, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, imageFromImageObject); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(imageFromImageObject); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clCreateImageTest, GivenInvalidRowPitchWhenCreatingImageThenInvalidImageDescriptorErrorIsReturned) { imageDesc.image_row_pitch = 655; auto image = clCreateImage( pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); EXPECT_EQ(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clCreateImageTest, GivenNullHostPtrAndCopyHostPtrFlagWhenCreatingImageThenInvalidHostPtrErrorIsReturned) { auto image = clCreateImage( pContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_INVALID_HOST_PTR, retVal); EXPECT_EQ(nullptr, image); } TEST_F(clCreateImageTest, GivenNullHostPtrAndMemUseHostPtrFlagWhenCreatingImageThenInvalidHostPtrErrorIsReturned) { auto image = clCreateImage( pContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_INVALID_HOST_PTR, retVal); EXPECT_EQ(nullptr, image); } TEST_F(clCreateImageTest, GivenNullHostPtrAndNonZeroRowPitchWhenCreatingImageThenInvalidImageDescriptorErrorIsReturned) { imageDesc.image_row_pitch = 4; auto image = clCreateImage( pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); EXPECT_EQ(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clCreateImageTest, GivenNonZeroPitchWhenCreatingImageFromBufferThenImageIsCreatedAndSuccessReturned) { auto buffer = clCreateBuffer(pContext, CL_MEM_READ_WRITE, 4096 * 9, nullptr, nullptr); auto &helper = HwHelper::get(renderCoreFamily); HardwareInfo hardwareInfo = *defaultHwInfo; imageDesc.mem_object = buffer; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 17; imageDesc.image_height = 17; imageDesc.image_row_pitch = helper.getPitchAlignmentForImage(&hardwareInfo) * 17; auto image = clCreateImage( pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_NE(CL_INVALID_IMAGE_DESCRIPTOR, retVal); EXPECT_NE(nullptr, image); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateImageTest, GivenNotNullHostPtrAndRowPitchIsNotGreaterThanWidthTimesElementSizeWhenCreatingImageThenInvalidImageDescriptorErrorIsReturned) { imageDesc.image_row_pitch = 64; auto image = clCreateImage( pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); EXPECT_EQ(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clCreateImageTest, GivenNullContextWhenCreatingImageThenInvalidContextErrorIsReturned) { auto image = clCreateImage( nullptr, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } typedef clCreateImageTests<::testing::Test> clCreateImageTestYUV; TEST_F(clCreateImageTestYUV, GivenInvalidGlagWhenCreatingYuvImageThenInvalidValueErrorIsReturned) { imageFormat.image_channel_order = CL_YUYV_INTEL; auto image = clCreateImage( pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clCreateImageTestYUV, Given1DImageTypeWhenCreatingYuvImageThenInvalidImageDescriptorErrorIsReturned) { imageFormat.image_channel_order = CL_YUYV_INTEL; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; auto image = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); EXPECT_EQ(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } typedef clCreateImageTests<::testing::TestWithParam> clCreateImageValidFlags; static cl_mem_flags validFlags[] = { CL_MEM_READ_WRITE, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY, CL_MEM_USE_HOST_PTR, CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR, CL_MEM_HOST_WRITE_ONLY, CL_MEM_HOST_READ_ONLY, CL_MEM_HOST_NO_ACCESS, CL_MEM_NO_ACCESS_INTEL, CL_MEM_FORCE_LINEAR_STORAGE_INTEL, CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL, }; INSTANTIATE_TEST_CASE_P(CreateImageWithFlags, clCreateImageValidFlags, ::testing::ValuesIn(validFlags)); TEST_P(clCreateImageValidFlags, GivenValidFlagsWhenCreatingImageThenImageIsCreatedAndSuccessReturned) { cl_mem_flags flags = GetParam(); std::unique_ptr ptr; char *hostPtr = nullptr; if (flags & CL_MEM_USE_HOST_PTR || flags & CL_MEM_COPY_HOST_PTR) { ptr = std::make_unique(alignUp(imageDesc.image_width * imageDesc.image_height * 4, MemoryConstants::pageSize)); hostPtr = ptr.get(); } auto image = clCreateImage( pContext, flags, &imageFormat, &imageDesc, hostPtr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } typedef clCreateImageTests<::testing::TestWithParam> clCreateImageInvalidFlags; static cl_mem_flags invalidFlagsCombinations[] = { CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE | CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY, CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_HOST_PTR, CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR, CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY, CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS, CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS, CL_MEM_NO_ACCESS_INTEL | CL_MEM_READ_WRITE, CL_MEM_NO_ACCESS_INTEL | CL_MEM_WRITE_ONLY, CL_MEM_NO_ACCESS_INTEL | CL_MEM_READ_ONLY}; INSTANTIATE_TEST_CASE_P(CreateImageWithFlags, clCreateImageInvalidFlags, ::testing::ValuesIn(invalidFlagsCombinations)); TEST_P(clCreateImageInvalidFlags, GivenInvalidFlagsCombinationsWhenCreatingImageThenInvalidValueErrorIsReturned) { char ptr[10]; imageDesc.image_row_pitch = 128; cl_mem_flags flags = GetParam(); auto image = clCreateImage( pContext, flags, &imageFormat, &imageDesc, ptr, &retVal); ASSERT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } struct ImageFlags { cl_mem_flags parentFlags; cl_mem_flags flags; }; static ImageFlags flagsWithUnrestrictedIntel[] = { {CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL, CL_MEM_READ_WRITE}, {CL_MEM_READ_WRITE, CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL}}; typedef clCreateImageTests<::testing::TestWithParam> clCreateImageFlagsUnrestrictedIntel; INSTANTIATE_TEST_CASE_P(CreateImageWithFlags, clCreateImageFlagsUnrestrictedIntel, ::testing::ValuesIn(flagsWithUnrestrictedIntel)); TEST_P(clCreateImageFlagsUnrestrictedIntel, GivenFlagsIncludingUnrestrictedIntelWhenCreatingImageThenImageIsCreatedAndSuccessReturned) { imageFormat.image_channel_order = CL_NV12_INTEL; ImageFlags imageFlags = GetParam(); cl_mem_flags parentFlags = imageFlags.parentFlags; cl_mem_flags flags = imageFlags.flags; auto image = clCreateImage( pContext, parentFlags | CL_MEM_HOST_NO_ACCESS, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); imageDesc.mem_object = image; imageFormat.image_channel_order = CL_RG; auto imageFromImageObject = clCreateImage( pContext, flags, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, imageFromImageObject); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(imageFromImageObject); EXPECT_EQ(CL_SUCCESS, retVal); } static ImageFlags validFlagsAndParentFlags[] = { {CL_MEM_WRITE_ONLY, CL_MEM_HOST_NO_ACCESS}, {CL_MEM_READ_ONLY, CL_MEM_HOST_NO_ACCESS}, {CL_MEM_NO_ACCESS_INTEL, CL_MEM_HOST_NO_ACCESS}, {CL_MEM_HOST_NO_ACCESS, CL_MEM_READ_WRITE}}; typedef clCreateImageTests<::testing::TestWithParam> clCreateImageValidFlagsAndParentFlagsCombinations; INSTANTIATE_TEST_CASE_P(CreateImageWithFlags, clCreateImageValidFlagsAndParentFlagsCombinations, ::testing::ValuesIn(validFlagsAndParentFlags)); TEST_P(clCreateImageValidFlagsAndParentFlagsCombinations, GivenValidFlagsAndParentFlagsWhenCreatingImageThenImageIsCreatedAndSuccessReturned) { imageFormat.image_channel_order = CL_NV12_INTEL; ImageFlags imageFlags = GetParam(); cl_mem_flags parentFlags = imageFlags.parentFlags; cl_mem_flags flags = imageFlags.flags; auto image = clCreateImage( pContext, parentFlags | CL_MEM_HOST_NO_ACCESS, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); imageDesc.mem_object = image; imageFormat.image_channel_order = CL_RG; auto imageFromImageObject = clCreateImage( pContext, flags, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, imageFromImageObject); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(imageFromImageObject); EXPECT_EQ(CL_SUCCESS, retVal); } static ImageFlags invalidFlagsAndParentFlags[] = { {CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE}, {CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY}, {CL_MEM_READ_ONLY, CL_MEM_READ_WRITE}, {CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY}, {CL_MEM_NO_ACCESS_INTEL, CL_MEM_READ_WRITE}, {CL_MEM_NO_ACCESS_INTEL, CL_MEM_WRITE_ONLY}, {CL_MEM_NO_ACCESS_INTEL, CL_MEM_READ_ONLY}, {CL_MEM_HOST_NO_ACCESS, CL_MEM_HOST_WRITE_ONLY}, {CL_MEM_HOST_NO_ACCESS, CL_MEM_HOST_READ_ONLY}}; typedef clCreateImageTests<::testing::TestWithParam> clCreateImageInvalidFlagsAndParentFlagsCombinations; INSTANTIATE_TEST_CASE_P(CreateImageWithFlags, clCreateImageInvalidFlagsAndParentFlagsCombinations, ::testing::ValuesIn(invalidFlagsAndParentFlags)); TEST_P(clCreateImageInvalidFlagsAndParentFlagsCombinations, GivenInvalidFlagsAndParentFlagsWhenCreatingImageThenInvalidMemObjectErrorIsReturned) { imageFormat.image_channel_order = CL_NV12_INTEL; ImageFlags imageFlags = GetParam(); cl_mem_flags parentFlags = imageFlags.parentFlags; cl_mem_flags flags = imageFlags.flags; auto image = clCreateImage( pContext, parentFlags | CL_MEM_HOST_NO_ACCESS, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); imageFormat.image_channel_order = CL_RG; imageDesc.mem_object = image; auto imageFromImageObject = clCreateImage( pContext, flags, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, imageFromImageObject); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(imageFromImageObject); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } struct ImageSizes { size_t width; size_t height; size_t depth; }; ImageSizes validImage2DSizes[] = {{64, 64, 1}, {3, 3, 1}, {8192, 1, 1}, {117, 39, 1}, {16384, 4, 1}, {4, 16384, 1}}; typedef clCreateImageTests<::testing::TestWithParam> clCreateImageValidSizesTest; INSTANTIATE_TEST_CASE_P(validImage2DSizes, clCreateImageValidSizesTest, ::testing::ValuesIn(validImage2DSizes)); TEST_P(clCreateImageValidSizesTest, GivenValidSizesWhenCreatingImageThenImageIsCreatedAndSuccessReturned) { ImageSizes sizes = GetParam(); imageDesc.image_width = sizes.width; imageDesc.image_height = sizes.height; imageDesc.image_depth = sizes.depth; auto image = clCreateImage( pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); EXPECT_NE(nullptr, image); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(image); } typedef clCreateImageTests<::testing::Test> clCreateImage2DTest; TEST_F(clCreateImage2DTest, GivenValidParametersWhenCreating2DImageThenImageIsCreatedAndSuccessReturned) { auto image = clCreateImage2D( pContext, CL_MEM_READ_WRITE, &imageFormat, 10, 10, 0, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateImage2DTest, GivenNoPtrToReturnValueWhenCreating2DImageThenImageIsCreated) { auto image = clCreateImage2D( pContext, CL_MEM_READ_WRITE, &imageFormat, 10, 10, 0, 0, nullptr); EXPECT_NE(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateImage2DTest, GivenInvalidContextsWhenCreating2DImageThenInvalidContextErrorIsReturned) { auto image = clCreateImage2D( nullptr, CL_MEM_READ_WRITE, &imageFormat, 10, 10, 0, 0, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, image); } typedef clCreateImageTests<::testing::Test> clCreateImage3DTest; TEST_F(clCreateImage3DTest, GivenValidParametersWhenCreating3DImageThenImageIsCreatedAndSuccessReturned) { auto image = clCreateImage3D( pContext, CL_MEM_READ_WRITE, &imageFormat, 10, 10, 1, 0, 0, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateImage3DTest, GivenNoPtrToReturnValueWhenCreating3DImageThenImageIsCreated) { auto image = clCreateImage3D( pContext, CL_MEM_READ_WRITE, &imageFormat, 10, 10, 1, 0, 0, 0, nullptr); EXPECT_NE(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateImage3DTest, GivenInvalidContextsWhenCreating3DImageThenInvalidContextErrorIsReturned) { auto image = clCreateImage3D( nullptr, CL_MEM_READ_WRITE, &imageFormat, 10, 10, 1, 0, 0, 0, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, image); } using clCreateImageWithPropertiesINTELTest = clCreateImageTest; TEST_F(clCreateImageWithPropertiesINTELTest, GivenInvalidContextWhenCreatingImageWithPropertiesThenInvalidContextErrorIsReturned) { auto image = clCreateImageWithPropertiesINTEL( nullptr, nullptr, nullptr, nullptr, nullptr, &retVal); ASSERT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, image); } TEST_F(clCreateImageWithPropertiesINTELTest, GivenValidParametersWhenCreatingImageWithPropertiesThenImageIsCreatedAndSuccessReturned) { cl_mem_properties_intel properties[] = {CL_MEM_FLAGS, CL_MEM_READ_WRITE, 0}; auto image = clCreateImageWithPropertiesINTEL( pContext, properties, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateImageWithPropertiesINTELTest, GivenInvalidPropertyKeyWhenCreatingImageWithPropertiesThenInvalidValueErrorIsReturned) { cl_mem_properties_intel properties[] = {(cl_mem_properties_intel(1) << 31), 0, 0}; auto image = clCreateImageWithPropertiesINTEL( pContext, properties, &imageFormat, &imageDesc, nullptr, &retVal); EXPECT_EQ(nullptr, image); EXPECT_EQ(CL_INVALID_VALUE, retVal); } typedef clCreateImageTests<::testing::Test> clCreateImageFromImageTest; TEST_F(clCreateImageFromImageTest, GivenImage2dWhenCreatingImage2dFromImageWithTheSameDescriptorAndValidFormatThenImageIsCreatedAndSuccessReturned) { imageFormat.image_channel_order = CL_BGRA; auto image = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); retVal = clGetImageInfo(image, CL_IMAGE_WIDTH, sizeof(imageDesc.image_width), &imageDesc.image_width, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_WIDTH, sizeof(imageDesc.image_height), &imageDesc.image_height, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_DEPTH, sizeof(imageDesc.image_depth), &imageDesc.image_depth, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_ROW_PITCH, sizeof(imageDesc.image_row_pitch), &imageDesc.image_row_pitch, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_SLICE_PITCH, sizeof(imageDesc.image_slice_pitch), &imageDesc.image_slice_pitch, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_NUM_MIP_LEVELS, sizeof(imageDesc.num_mip_levels), &imageDesc.num_mip_levels, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_NUM_SAMPLES, sizeof(imageDesc.num_samples), &imageDesc.num_samples, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_ARRAY_SIZE, sizeof(imageDesc.image_array_size), &imageDesc.image_array_size, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); imageDesc.mem_object = image; imageFormat.image_channel_order = CL_sBGRA; auto imageFromImageObject = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); if (pContext->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport < 20) { EXPECT_EQ(CL_IMAGE_FORMAT_NOT_SUPPORTED, retVal); EXPECT_EQ(nullptr, imageFromImageObject); } else { EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, imageFromImageObject); retVal = clReleaseMemObject(imageFromImageObject); EXPECT_EQ(CL_SUCCESS, retVal); } retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateImageFromImageTest, GivenImage2dWhenCreatingImage2dFromImageWithDifferentDescriptorAndValidFormatThenInvalidImageFormatDescriptorErrorIsReturned) { imageFormat.image_channel_order = CL_BGRA; auto image = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); retVal = clGetImageInfo(image, CL_IMAGE_WIDTH, sizeof(imageDesc.image_width), &imageDesc.image_width, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_WIDTH, sizeof(imageDesc.image_height), &imageDesc.image_height, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_DEPTH, sizeof(imageDesc.image_depth), &imageDesc.image_depth, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_ROW_PITCH, sizeof(imageDesc.image_row_pitch), &imageDesc.image_row_pitch, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_SLICE_PITCH, sizeof(imageDesc.image_slice_pitch), &imageDesc.image_slice_pitch, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_NUM_MIP_LEVELS, sizeof(imageDesc.num_mip_levels), &imageDesc.num_mip_levels, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_NUM_SAMPLES, sizeof(imageDesc.num_samples), &imageDesc.num_samples, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_ARRAY_SIZE, sizeof(imageDesc.image_array_size), &imageDesc.image_array_size, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); imageDesc.mem_object = image; imageDesc.image_width++; imageFormat.image_channel_order = CL_sBGRA; auto imageFromImageObject = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); if (pContext->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport >= 20) { EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); } else { EXPECT_EQ(CL_IMAGE_FORMAT_NOT_SUPPORTED, retVal); } EXPECT_EQ(nullptr, imageFromImageObject); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateImageFromImageTest, GivenImage2dWhenCreatingImage2dFromImageWithTheSameDescriptorAndNotValidFormatThenInvalidImageFormatDescriptorErrorIsReturned) { imageFormat.image_channel_order = CL_BGRA; auto image = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); retVal = clGetImageInfo(image, CL_IMAGE_WIDTH, sizeof(imageDesc.image_width), &imageDesc.image_width, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_WIDTH, sizeof(imageDesc.image_height), &imageDesc.image_height, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_DEPTH, sizeof(imageDesc.image_depth), &imageDesc.image_depth, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_ROW_PITCH, sizeof(imageDesc.image_row_pitch), &imageDesc.image_row_pitch, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_SLICE_PITCH, sizeof(imageDesc.image_slice_pitch), &imageDesc.image_slice_pitch, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_NUM_MIP_LEVELS, sizeof(imageDesc.num_mip_levels), &imageDesc.num_mip_levels, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_NUM_SAMPLES, sizeof(imageDesc.num_samples), &imageDesc.num_samples, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(image, CL_IMAGE_ARRAY_SIZE, sizeof(imageDesc.image_array_size), &imageDesc.image_array_size, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); imageDesc.mem_object = image; imageFormat.image_channel_order = CL_BGRA; auto imageFromImageObject = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); EXPECT_EQ(nullptr, imageFromImageObject); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } uint32_t non2dImageTypes[] = {CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D}; struct clCreateNon2dImageFromImageTest : public clCreateImageFromImageTest, public ::testing::WithParamInterface { void SetUp() override { clCreateImageFromImageTest::SetUp(); image = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); imageDesc.mem_object = image; } void TearDown() override { retVal = clReleaseMemObject(image); clCreateImageFromImageTest::TearDown(); } cl_mem image; }; TEST_P(clCreateNon2dImageFromImageTest, GivenImage2dWhenCreatingImageFromNon2dImageThenInvalidImageDescriptorErrorIsReturned) { imageDesc.image_type = GetParam(); auto imageFromImageObject = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); EXPECT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); EXPECT_EQ(nullptr, imageFromImageObject); } INSTANTIATE_TEST_CASE_P(clCreateNon2dImageFromImageTests, clCreateNon2dImageFromImageTest, ::testing::ValuesIn(non2dImageTypes)); } // namespace ClCreateImageTests compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_create_kernel_tests.inl000066400000000000000000000116421363734646600276160ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/file_io.h" #include "opencl/source/context/context.h" #include "opencl/source/program/kernel_info.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clCreateKernelTests; namespace ULT { TEST_F(clCreateKernelTests, GivenCorrectKernelInProgramWhenCreatingNewKernelThenKernelIsCreatedAndSuccessIsReturned) { cl_kernel kernel = nullptr; cl_program pProgram = nullptr; cl_int binaryStatus = CL_SUCCESS; size_t binarySize = 0; std::string testFile; retrieveBinaryKernelFilename(testFile, "CopyBuffer_simd16_", ".bin"); auto pBinary = loadDataFromFile( testFile.c_str(), binarySize); ASSERT_NE(0u, binarySize); ASSERT_NE(nullptr, pBinary); const unsigned char *binaries[1] = {reinterpret_cast(pBinary.get())}; pProgram = clCreateProgramWithBinary( pContext, num_devices, devices, &binarySize, binaries, &binaryStatus, &retVal); pBinary.reset(); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clBuildProgram( pProgram, num_devices, devices, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); kernel = clCreateKernel( pProgram, "CopyBuffer", &retVal); EXPECT_NE(nullptr, kernel); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseKernel(kernel); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateKernelTests, GivenInvalidKernelNameWhenCreatingNewKernelThenInvalidKernelNameErrorIsReturned) { cl_kernel kernel = nullptr; cl_program pProgram = nullptr; cl_int binaryStatus = CL_SUCCESS; size_t binarySize = 0; std::string testFile; retrieveBinaryKernelFilename(testFile, "CopyBuffer_simd16_", ".bin"); auto pBinary = loadDataFromFile( testFile.c_str(), binarySize); ASSERT_NE(0u, binarySize); ASSERT_NE(nullptr, pBinary); const unsigned char *binaries[1] = {reinterpret_cast(pBinary.get())}; pProgram = clCreateProgramWithBinary( pContext, num_devices, devices, &binarySize, binaries, &binaryStatus, &retVal); pBinary.reset(); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clBuildProgram( pProgram, num_devices, devices, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); kernel = clCreateKernel( pProgram, "WrongName", &retVal); ASSERT_EQ(nullptr, kernel); ASSERT_EQ(CL_INVALID_KERNEL_NAME, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateKernelTests, GivenNullProgramWhenCreatingNewKernelThenInvalidProgramErrorIsReturned) { cl_kernel kernel = nullptr; kernel = clCreateKernel( nullptr, "CopyBuffer", &retVal); ASSERT_EQ(CL_INVALID_PROGRAM, retVal); ASSERT_EQ(nullptr, kernel); } TEST_F(clCreateKernelTests, GivenNullKernelNameWhenCreatingNewKernelThenInvalidValueErrorIsReturned) { cl_kernel kernel = nullptr; KernelInfo *pKernelInfo = new KernelInfo(); std::unique_ptr pMockProg = std::make_unique(*pPlatform->peekExecutionEnvironment(), pContext, false, nullptr); pMockProg->addKernelInfo(pKernelInfo); kernel = clCreateKernel( pMockProg.get(), nullptr, &retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, kernel); } TEST_F(clCreateKernelTests, GivenInvalidProgramWhenCreatingNewKernelThenInvalidProgramErrorIsReturned) { cl_kernel kernel = nullptr; kernel = clCreateKernel( reinterpret_cast(pContext), "CopyBuffer", &retVal); ASSERT_EQ(CL_INVALID_PROGRAM, retVal); ASSERT_EQ(nullptr, kernel); } TEST_F(clCreateKernelTests, GivenProgramWithBuildErrorWhenCreatingNewKernelThenInvalidProgramExecutableErrorIsReturned) { cl_kernel kernel = nullptr; std::unique_ptr pMockProg = std::make_unique(*pPlatform->peekExecutionEnvironment(), pContext, false, nullptr); pMockProg->SetBuildStatus(CL_BUILD_ERROR); kernel = clCreateKernel( pMockProg.get(), "", &retVal); EXPECT_EQ(CL_INVALID_PROGRAM_EXECUTABLE, retVal); EXPECT_EQ(nullptr, kernel); } TEST_F(clCreateKernelTests, GivenNullPtrForReturnWhenCreatingNewKernelThenKernelIsCreated) { cl_kernel kernel = nullptr; kernel = clCreateKernel( nullptr, "CopyBuffer", nullptr); ASSERT_EQ(nullptr, kernel); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_create_kernels_in_program_tests.inl000066400000000000000000000061651363734646600322220ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/file_io.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "cl_api_tests.h" using namespace NEO; struct clCreateKernelsInProgramTests : public api_tests { void SetUp() override { api_tests::SetUp(); std::string testFile; retrieveBinaryKernelFilename(testFile, "CopyBuffer_simd16_", ".bin"); size_t binarySize = 0; auto pBinary = loadDataFromFile( testFile.c_str(), binarySize); ASSERT_NE(0u, binarySize); ASSERT_NE(nullptr, pBinary); auto binaryStatus = CL_SUCCESS; const unsigned char *binaries[1] = {reinterpret_cast(pBinary.get())}; program = clCreateProgramWithBinary( pContext, num_devices, devices, &binarySize, binaries, &binaryStatus, &retVal); pBinary.reset(); ASSERT_NE(nullptr, program); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clBuildProgram( program, num_devices, devices, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); } void TearDown() override { clReleaseKernel(kernel); clReleaseProgram(program); api_tests::TearDown(); } cl_program program = nullptr; cl_kernel kernel = nullptr; std::unique_ptr pBinary = nullptr; }; TEST_F(clCreateKernelsInProgramTests, GivenValidParametersWhenCreatingKernelObjectsThenKernelsAndSuccessAreReturned) { cl_uint numKernelsRet = 0; retVal = clCreateKernelsInProgram( program, 1, &kernel, &numKernelsRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, numKernelsRet); EXPECT_NE(nullptr, kernel); } TEST_F(clCreateKernelsInProgramTests, GivenNullKernelArgWhenCreatingKernelObjectsThenSuccessIsReturned) { cl_uint numKernelsRet = 0; retVal = clCreateKernelsInProgram( program, 0, nullptr, &numKernelsRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, numKernelsRet); } TEST_F(clCreateKernelsInProgramTests, GivenNullPtrForNumKernelsReturnWhenCreatingKernelObjectsThenSuccessIsReturned) { retVal = clCreateKernelsInProgram( program, 1, &kernel, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, kernel); } TEST_F(clCreateKernelsInProgramTests, GivenNullProgramWhenCreatingKernelObjectsThenInvalidProgramErrorIsReturn) { retVal = clCreateKernelsInProgram( nullptr, 1, &kernel, nullptr); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); EXPECT_EQ(nullptr, kernel); } TEST_F(clCreateKernelsInProgramTests, GivenTooSmallOutputBufferWhenCreatingKernelObjectsThenInvalidValueErrorIsReturned) { retVal = clCreateKernelsInProgram( program, 0, &kernel, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, kernel); } cl_create_perf_counters_command_queue_tests.inl000066400000000000000000000164571363734646600340500ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/api/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/fixtures/device_instrumentation_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/os_interface/mock_performance_counters.h" #include "cl_api_tests.h" using namespace NEO; struct clCreatePerfCountersCommandQueueINTELTests : public DeviceInstrumentationFixture, public PerformanceCountersDeviceFixture, ::testing::Test { void SetUp() override { PerformanceCountersDeviceFixture::SetUp(); DeviceInstrumentationFixture::SetUp(true); deviceId = device.get(); retVal = CL_SUCCESS; context = std::unique_ptr(Context::create(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal)); } void TearDown() override { PerformanceCountersDeviceFixture::TearDown(); } std::unique_ptr context; cl_device_id deviceId; cl_int retVal; }; namespace ULT { TEST_F(clCreatePerfCountersCommandQueueINTELTests, GivenCorrectParamatersWhenCreatingPerfCountersCmdQThenCmdQIsCreatedAndPerfCountersAreEnabled) { cl_command_queue cmdQ = nullptr; cl_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; cl_uint configuration = 0; cmdQ = clCreatePerfCountersCommandQueueINTEL(context.get(), deviceId, properties, configuration, &retVal); ASSERT_NE(nullptr, cmdQ); ASSERT_EQ(CL_SUCCESS, retVal); auto commandQueueObject = castToObject(cmdQ); EXPECT_TRUE(commandQueueObject->isPerfCountersEnabled()); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreatePerfCountersCommandQueueINTELTests, GivenNullPropertiesWhenCreatingPerfCountersCmdQThenInvalidQueuePropertiesErrorIsReturned) { cl_command_queue cmdQ = nullptr; cl_queue_properties properties = 0; cl_uint configuration = 0; cmdQ = clCreatePerfCountersCommandQueueINTEL(context.get(), deviceId, properties, configuration, &retVal); ASSERT_EQ(nullptr, cmdQ); ASSERT_EQ(CL_INVALID_QUEUE_PROPERTIES, retVal); } TEST_F(clCreatePerfCountersCommandQueueINTELTests, GivenClQueueOnDevicePropertyWhenCreatingPerfCountersCmdQThenInvalidQueuePropertiesErrorIsReturned) { cl_command_queue cmdQ = nullptr; cl_queue_properties properties = CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE; cl_uint configuration = 0; cmdQ = clCreatePerfCountersCommandQueueINTEL(context.get(), deviceId, properties, configuration, &retVal); ASSERT_EQ(nullptr, cmdQ); ASSERT_EQ(CL_INVALID_QUEUE_PROPERTIES, retVal); properties = CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE_DEFAULT; cmdQ = clCreatePerfCountersCommandQueueINTEL(context.get(), deviceId, properties, configuration, &retVal); ASSERT_EQ(nullptr, cmdQ); ASSERT_EQ(CL_INVALID_QUEUE_PROPERTIES, retVal); } TEST_F(clCreatePerfCountersCommandQueueINTELTests, GivenNullContextWhenCreatingPerfCountersCmdQThenInvalidContextErrorIsReturned) { cl_command_queue cmdQ = nullptr; cl_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; cl_uint configuration = 0; cmdQ = clCreatePerfCountersCommandQueueINTEL(nullptr, deviceId, properties, configuration, &retVal); ASSERT_EQ(nullptr, cmdQ); ASSERT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(clCreatePerfCountersCommandQueueINTELTests, GivenMaximumGtdiConfigurationWhenCreatingPerfCountersCmdQThenOutOfResourcesErrorIsReturned) { cl_command_queue cmdQ = nullptr; cl_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; cl_uint configuration = 4; cmdQ = clCreatePerfCountersCommandQueueINTEL(context.get(), deviceId, properties, configuration, &retVal); ASSERT_EQ(nullptr, cmdQ); ASSERT_EQ(CL_INVALID_OPERATION, retVal); } TEST_F(clCreatePerfCountersCommandQueueINTELTests, GivenCorrectCmdQWhenEventIsCreatedThenPerfCountersAreEnabled) { cl_command_queue cmdQ = nullptr; cl_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; cl_uint configuration = 0; cmdQ = clCreatePerfCountersCommandQueueINTEL(context.get(), deviceId, properties, configuration, &retVal); ASSERT_NE(nullptr, cmdQ); ASSERT_EQ(CL_SUCCESS, retVal); auto commandQueueObject = castToObject(cmdQ); EXPECT_TRUE(commandQueueObject->isPerfCountersEnabled()); Event event(commandQueueObject, CL_COMMAND_NDRANGE_KERNEL, 1, 5); EXPECT_TRUE(event.isPerfCountersEnabled()); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreatePerfCountersCommandQueueINTELTests, GivenInstrumentationEnabledIsFalseWhenCreatingPerfCountersCmdQThenInvalidDeviceErrorIsReturned) { hwInfo->capabilityTable.instrumentationEnabled = false; cl_command_queue cmdQ = nullptr; cl_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; cl_uint configuration = 0; cmdQ = clCreatePerfCountersCommandQueueINTEL(context.get(), deviceId, properties, configuration, &retVal); ASSERT_EQ(nullptr, cmdQ); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(clCreatePerfCountersCommandQueueINTELTests, GivenInvalidDeviceWhenCreatingPerfCountersCmdQThenInvalidDeviceErrorIsReturned) { cl_command_queue cmdQ = nullptr; cl_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; cl_uint configuration = 0; cmdQ = clCreatePerfCountersCommandQueueINTEL(context.get(), (cl_device_id)context.get(), properties, configuration, &retVal); ASSERT_EQ(nullptr, cmdQ); ASSERT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(clCreatePerfCountersCommandQueueINTELTests, GivenInvalidMetricsLibraryWhenCreatingPerfCountersThenPerfCountersReturnError) { cl_command_queue cmdQ = nullptr; cl_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; cl_uint configuration = 0; cmdQ = clCreatePerfCountersCommandQueueINTEL(context.get(), deviceId, properties, configuration, &retVal); auto commandQueueObject = castToObject(cmdQ); ASSERT_NE(nullptr, cmdQ); ASSERT_EQ(CL_SUCCESS, retVal); auto performanceCounters = commandQueueObject->getPerfCounters(); auto metricsLibary = static_cast(performanceCounters->getMetricsLibraryInterface()); metricsLibary->validOpen = false; ASSERT_NE(nullptr, metricsLibary); EXPECT_TRUE(commandQueueObject->isPerfCountersEnabled()); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreatePerfCountersCommandQueueINTELTests, givenInvalidMetricsLibraryWhenCreatingCommandQueueThenReturnError) { cl_command_queue cmdQ = nullptr; cl_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; cl_uint configuration = 0; auto performanceCounters = device->getPerformanceCounters(); auto metricsLibary = static_cast(performanceCounters->getMetricsLibraryInterface()); metricsLibary->validOpen = false; cmdQ = clCreatePerfCountersCommandQueueINTEL(context.get(), deviceId, properties, configuration, &retVal); EXPECT_EQ(nullptr, cmdQ); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_create_pipe_tests.inl000066400000000000000000000137641363734646600273020ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/base_object.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clCreatePipeTests; namespace ClCreatePipeTests { class clCreatePipeWithParamTests : public ApiFixture<>, public testing::TestWithParam { void SetUp() override { ApiFixture::SetUp(); } void TearDown() override { ApiFixture::TearDown(); } }; class clCreatePipeWithParamNegativeTests : public ApiFixture<>, public testing::TestWithParam { void SetUp() override { ApiFixture::SetUp(); } void TearDown() override { ApiFixture::TearDown(); } }; TEST_P(clCreatePipeWithParamTests, GivenValidFlagsWhenCreatingPipeThenPipeIsCreatedAndSuccessIsReturned) { cl_mem_flags flags = GetParam(); auto pipe = clCreatePipe(pContext, flags, 1, 20, nullptr, &retVal); EXPECT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseMemObject(pipe); } TEST_P(clCreatePipeWithParamNegativeTests, GivenInalidFlagsWhenCreatingPipeThenInvalidValueErrorIsReturned) { cl_mem_flags flags = GetParam(); auto pipe = clCreatePipe(pContext, flags, 1, 20, nullptr, &retVal); EXPECT_EQ(nullptr, pipe); EXPECT_EQ(CL_INVALID_VALUE, retVal); clReleaseMemObject(pipe); } static cl_mem_flags validFlags[] = { 0, CL_MEM_READ_WRITE, CL_MEM_HOST_NO_ACCESS, CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, }; static cl_mem_flags invalidFlags[] = { CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY, CL_MEM_HOST_READ_ONLY, CL_MEM_HOST_WRITE_ONLY, CL_MEM_USE_HOST_PTR, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, CL_MEM_COPY_HOST_PTR, CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR, CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE | CL_MEM_READ_ONLY, CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY, CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS, CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY, CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS, CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR, CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR, }; INSTANTIATE_TEST_CASE_P( CreatePipeCheckFlags, clCreatePipeWithParamTests, testing::ValuesIn(validFlags)); INSTANTIATE_TEST_CASE_P( CreatePipeCheckFlagsNegative, clCreatePipeWithParamNegativeTests, testing::ValuesIn(invalidFlags)); TEST_F(clCreatePipeTests, GivenValidFlagsAndNullReturnWhenCreatingPipeThenPipeIsCreated) { cl_mem_flags flags = CL_MEM_READ_WRITE; auto pipe = clCreatePipe(pContext, flags, 1, 20, nullptr, nullptr); EXPECT_NE(nullptr, pipe); clReleaseMemObject(pipe); } TEST_F(clCreatePipeTests, GivenPipePacketSizeZeroWhenCreatingPipeThenInvalidPipeSizeErrorIsReturned) { cl_mem_flags flags = CL_MEM_READ_WRITE; auto pipe = clCreatePipe(pContext, flags, 0, 20, nullptr, &retVal); EXPECT_EQ(nullptr, pipe); EXPECT_EQ(CL_INVALID_PIPE_SIZE, retVal); clReleaseMemObject(pipe); } TEST_F(clCreatePipeTests, GivenPipeMaxSizeZeroWhenCreatingPipeThenInvalidPipeSizeErrorIsReturned) { cl_mem_flags flags = CL_MEM_READ_WRITE; auto pipe = clCreatePipe(pContext, flags, 1, 0, nullptr, &retVal); EXPECT_EQ(nullptr, pipe); EXPECT_EQ(CL_INVALID_PIPE_SIZE, retVal); clReleaseMemObject(pipe); } TEST_F(clCreatePipeTests, GivenPipePropertiesNotNullWhenCreatingPipeThenInvalidValueErrorIsReturned) { cl_mem_flags flags = CL_MEM_READ_WRITE; cl_pipe_properties properties = {0}; auto pipe = clCreatePipe(pContext, flags, 1, 20, &properties, &retVal); EXPECT_EQ(nullptr, pipe); EXPECT_EQ(CL_INVALID_VALUE, retVal); clReleaseMemObject(pipe); } TEST_F(clCreatePipeTests, GivenPipePacketSizeGreaterThanAllowedWhenCreatingPipeThenInvalidPipeSizeErrorIsReturned) { cl_uint packetSize = pContext->getDevice(0)->getDeviceInfo().pipeMaxPacketSize; cl_mem_flags flags = CL_MEM_READ_WRITE; auto pipe = clCreatePipe(pContext, flags, packetSize, 20, nullptr, &retVal); EXPECT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseMemObject(pipe); packetSize += 1; pipe = clCreatePipe(pContext, flags, packetSize, 20, nullptr, &retVal); EXPECT_EQ(nullptr, pipe); EXPECT_EQ(CL_INVALID_PIPE_SIZE, retVal); clReleaseMemObject(pipe); } TEST_F(clCreatePipeTests, GivenNullContextWhenCreatingPipeThenInvalidContextErrorIsReturned) { auto pipe = clCreatePipe(nullptr, 0, 1, 20, nullptr, &retVal); EXPECT_EQ(nullptr, pipe); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); clReleaseMemObject(pipe); } TEST(clCreatePipeTest, givenPlatformWithoutDevicesWhenClCreatePipeIsCalledThenDeviceIsTakenFromContext) { auto executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); executionEnvironment->prepareRootDeviceEnvironments(1); auto device = std::make_unique(*Device::create(executionEnvironment, 0u), platform()); const ClDeviceInfo &devInfo = device->getDeviceInfo(); if (devInfo.svmCapabilities == 0) { GTEST_SKIP(); } cl_device_id clDevice = device.get(); cl_int retVal; auto context = ReleaseableObjectPtr(Context::create(nullptr, ClDeviceVector(&clDevice, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, platform()->getNumDevices()); cl_uint packetSize = context->getDevice(0)->getDeviceInfo().pipeMaxPacketSize; cl_mem_flags flags = CL_MEM_READ_WRITE; auto pipe = clCreatePipe(context.get(), flags, packetSize, 20, nullptr, &retVal); EXPECT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseMemObject(pipe); } } // namespace ClCreatePipeTests compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_create_program_with_binary_tests.inl000066400000000000000000000107721363734646600324070ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/file_io.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clCreateProgramWithBinaryTests; typedef api_tests clCreateProgramWithILTests; typedef api_tests clCreateProgramWithILKHRTests; namespace ULT { TEST_F(clCreateProgramWithBinaryTests, GivenCorrectParametersWhenCreatingProgramWithBinaryThenProgramIsCreatedAndSuccessIsReturned) { cl_program pProgram = nullptr; cl_int binaryStatus = CL_INVALID_VALUE; size_t binarySize = 0; std::string testFile; retrieveBinaryKernelFilename(testFile, "CopyBuffer_simd16_", ".bin"); ASSERT_EQ(true, fileExists(testFile)); auto pBinary = loadDataFromFile( testFile.c_str(), binarySize); ASSERT_NE(0u, binarySize); ASSERT_NE(nullptr, pBinary); const unsigned char *binaries[1] = {reinterpret_cast(pBinary.get())}; pProgram = clCreateProgramWithBinary( pContext, num_devices, devices, &binarySize, binaries, &binaryStatus, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, pProgram); EXPECT_EQ(CL_SUCCESS, binaryStatus); pBinary.reset(); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); pProgram = clCreateProgramWithBinary( nullptr, num_devices, devices, &binarySize, binaries, &binaryStatus, nullptr); EXPECT_EQ(nullptr, pProgram); } TEST_F(clCreateProgramWithILTests, GivenInvalidContextWhenCreatingProgramWithIlThenInvalidContextErrorIsReturned) { const uint32_t spirv[16] = {0x03022307}; cl_int err = CL_SUCCESS; cl_program prog = clCreateProgramWithIL(nullptr, spirv, sizeof(spirv), &err); EXPECT_EQ(CL_INVALID_CONTEXT, err); EXPECT_EQ(nullptr, prog); } TEST_F(clCreateProgramWithILTests, GivenNullIlWhenCreatingProgramWithIlThenInvalidValueErrorIsReturned) { cl_int err = CL_SUCCESS; cl_program prog = clCreateProgramWithIL(pContext, nullptr, 0, &err); EXPECT_EQ(CL_INVALID_VALUE, err); EXPECT_EQ(nullptr, prog); } TEST_F(clCreateProgramWithILTests, GivenIncorrectIlSizeWhenCreatingProgramWithIlThenInvalidBinaryErrorIsReturned) { const uint32_t spirv[16] = {0x03022307}; cl_int err = CL_SUCCESS; cl_program prog = clCreateProgramWithIL(pContext, spirv, 0, &err); EXPECT_EQ(CL_INVALID_BINARY, err); EXPECT_EQ(nullptr, prog); } TEST_F(clCreateProgramWithILTests, GivenIncorrectIlWhenCreatingProgramWithIlThenInvalidBinaryErrorIsReturned) { const uint32_t notSpirv[16] = {0xDEADBEEF}; cl_int err = CL_SUCCESS; cl_program prog = clCreateProgramWithIL(pContext, notSpirv, sizeof(notSpirv), &err); EXPECT_EQ(CL_INVALID_BINARY, err); EXPECT_EQ(nullptr, prog); } TEST_F(clCreateProgramWithILTests, GivenIncorrectIlAndNoErrorPointerWhenCreatingProgramWithIlThenInvalidBinaryErrorIsReturned) { const uint32_t notSpirv[16] = {0xDEADBEEF}; cl_program prog = clCreateProgramWithIL(pContext, notSpirv, sizeof(notSpirv), nullptr); EXPECT_EQ(nullptr, prog); } TEST_F(clCreateProgramWithILKHRTests, GivenCorrectParametersWhenCreatingProgramWithIlkhrThenProgramIsCreatedAndSuccessIsReturned) { const uint32_t spirv[16] = {0x03022307}; cl_int err = CL_INVALID_VALUE; cl_program program = clCreateProgramWithILKHR(pContext, spirv, sizeof(spirv), &err); EXPECT_EQ(CL_SUCCESS, err); EXPECT_NE(nullptr, program); retVal = clReleaseProgram(program); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateProgramWithILKHRTests, GivenProgramCreatedWithILWhenBuildAfterBuildIsCalledThenReturnSuccess) { const uint32_t spirv[16] = {0x03022307}; cl_int err = CL_INVALID_VALUE; cl_program program = clCreateProgramWithIL(pContext, spirv, sizeof(spirv), &err); EXPECT_EQ(CL_SUCCESS, err); EXPECT_NE(nullptr, program); err = clBuildProgram(program, 0, nullptr, "", nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, err); err = clBuildProgram(program, 0, nullptr, "", nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, err); retVal = clReleaseProgram(program); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateProgramWithILKHRTests, GivenNullIlWhenCreatingProgramWithIlkhrThenNullProgramIsReturned) { cl_program program = clCreateProgramWithILKHR(pContext, nullptr, 0, nullptr); EXPECT_EQ(nullptr, program); } } // namespace ULT cl_create_program_with_built_in_kernels_tests.cpp000066400000000000000000000225731363734646600343760ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/api/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/device/device.h" #include "opencl/source/built_ins/built_in_ops_vme.h" #include "opencl/source/built_ins/vme_builtin.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/program/program.h" #include "opencl/test/unit_test/fixtures/run_kernel_fixture.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clCreateProgramWithBuiltInKernelsTests; struct clCreateProgramWithBuiltInVmeKernelsTests : clCreateProgramWithBuiltInKernelsTests { void SetUp() override { clCreateProgramWithBuiltInKernelsTests::SetUp(); if (!castToObject(devices[testedRootDeviceIndex])->getHardwareInfo().capabilityTable.supportsVme) { GTEST_SKIP(); } pDev = &pContext->getDevice(0)->getDevice(); } Device *pDev; }; namespace ULT { TEST_F(clCreateProgramWithBuiltInKernelsTests, GivenInvalidContextWhenCreatingProgramWithBuiltInKernelsThenInvalidContextErrorIsReturned) { cl_int retVal = CL_SUCCESS; auto program = clCreateProgramWithBuiltInKernels( nullptr, // context 1, // num_devices nullptr, // device_list nullptr, // kernel_names &retVal); EXPECT_EQ(nullptr, program); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(clCreateProgramWithBuiltInKernelsTests, GivenNoKernelsWhenCreatingProgramWithBuiltInKernelsThenInvalidValueErrorIsReturned) { cl_int retVal = CL_SUCCESS; auto program = clCreateProgramWithBuiltInKernels( pContext, // context 1, // num_devices &testedClDevice, // device_list "", // kernel_names &retVal); EXPECT_EQ(nullptr, program); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clCreateProgramWithBuiltInKernelsTests, GivenNoDeviceWhenCreatingProgramWithBuiltInKernelsThenInvalidValueErrorIsReturned) { cl_int retVal = CL_SUCCESS; auto program = clCreateProgramWithBuiltInKernels( pContext, // context 0, // num_devices &testedClDevice, // device_list "", // kernel_names &retVal); EXPECT_EQ(nullptr, program); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clCreateProgramWithBuiltInKernelsTests, GivenNoKernelsAndNoReturnWhenCreatingProgramWithBuiltInKernelsThenProgramIsNotCreated) { auto program = clCreateProgramWithBuiltInKernels( pContext, // context 1, // num_devices &testedClDevice, // device_list "", // kernel_names nullptr); EXPECT_EQ(nullptr, program); } TEST_F(clCreateProgramWithBuiltInVmeKernelsTests, GivenValidMediaKernelsWhenCreatingProgramWithBuiltInKernelsThenProgramIsSuccessfullyCreated) { cl_int retVal = CL_SUCCESS; overwriteBuiltInBinaryName(pDev, "media_kernels_frontend"); const char *kernelNamesString = { "block_advanced_motion_estimate_bidirectional_check_intel;" "block_motion_estimate_intel;" "block_advanced_motion_estimate_check_intel;"}; const char *kernelNames[] = { "block_motion_estimate_intel", "block_advanced_motion_estimate_check_intel", "block_advanced_motion_estimate_bidirectional_check_intel", }; cl_program program = clCreateProgramWithBuiltInKernels( pContext, // context 1, // num_devices &testedClDevice, // device_list kernelNamesString, // kernel_names &retVal); restoreBuiltInBinaryName(pDev); EXPECT_NE(nullptr, program); EXPECT_EQ(CL_SUCCESS, retVal); for (auto &kernelName : kernelNames) { cl_kernel kernel = clCreateKernel( program, kernelName, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, kernel); retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); } retVal = clReleaseProgram(program); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateProgramWithBuiltInVmeKernelsTests, GivenValidMediaKernelsWithOptionsWhenCreatingProgramWithBuiltInKernelsThenProgramIsSuccessfullyCreatedWithThoseOptions) { cl_int retVal = CL_SUCCESS; overwriteBuiltInBinaryName(pDev, "media_kernels_frontend"); const char *kernelNamesString = { "block_motion_estimate_intel;"}; cl_program program = clCreateProgramWithBuiltInKernels( pContext, // context 1, // num_devices &testedClDevice, // device_list kernelNamesString, // kernel_names &retVal); restoreBuiltInBinaryName(pDev); auto neoProgram = castToObject(program); auto builtinOptions = neoProgram->getOptions(); auto it = builtinOptions.find("HW_NULL_CHECK"); EXPECT_EQ(std::string::npos, it); clReleaseProgram(program); } TEST_F(clCreateProgramWithBuiltInVmeKernelsTests, GivenVmeBlockMotionEstimateKernelWhenCreatingProgramWithBuiltInKernelsThenCorrectDispatchBuilderAndFrontendKernelIsCreated) { cl_int retVal = CL_SUCCESS; overwriteBuiltInBinaryName(pDev, "media_kernels_backend"); Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockMotionEstimateIntel, *pDev); restoreBuiltInBinaryName(pDev); overwriteBuiltInBinaryName(pDev, "media_kernels_frontend"); const char *kernelNamesString = { "block_motion_estimate_intel;"}; cl_program program = clCreateProgramWithBuiltInKernels( pContext, // context 1, // num_devices &testedClDevice, // device_list kernelNamesString, // kernel_names &retVal); restoreBuiltInBinaryName(pDev); cl_kernel kernel = clCreateKernel( program, "block_motion_estimate_intel", &retVal); auto kernNeo = castToObject(kernel); EXPECT_NE(nullptr, kernNeo->getKernelInfo().builtinDispatchBuilder); EXPECT_EQ(6U, kernNeo->getKernelArgsNumber()); auto &vmeBuilder = Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockMotionEstimateIntel, *pDev); EXPECT_EQ(&vmeBuilder, kernNeo->getKernelInfo().builtinDispatchBuilder); clReleaseKernel(kernel); clReleaseProgram(program); } TEST_F(clCreateProgramWithBuiltInVmeKernelsTests, GivenVmeBlockAdvancedMotionEstimateKernelWhenCreatingProgramWithBuiltInKernelsThenCorrectDispatchBuilderAndFrontendKernelIsCreated) { cl_int retVal = CL_SUCCESS; overwriteBuiltInBinaryName(pDev, "media_kernels_backend"); Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel, *pDev); restoreBuiltInBinaryName(pDev); overwriteBuiltInBinaryName(pDev, "media_kernels_frontend"); const char *kernelNamesString = { "block_advanced_motion_estimate_check_intel;"}; cl_program program = clCreateProgramWithBuiltInKernels( pContext, // context 1, // num_devices &testedClDevice, // device_list kernelNamesString, // kernel_names &retVal); restoreBuiltInBinaryName(pDev); cl_kernel kernel = clCreateKernel( program, "block_advanced_motion_estimate_check_intel", &retVal); auto kernNeo = castToObject(kernel); EXPECT_NE(nullptr, kernNeo->getKernelInfo().builtinDispatchBuilder); EXPECT_EQ(15U, kernNeo->getKernelArgsNumber()); auto &vmeBuilder = Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel, *pDev); EXPECT_EQ(&vmeBuilder, kernNeo->getKernelInfo().builtinDispatchBuilder); clReleaseKernel(kernel); clReleaseProgram(program); } TEST_F(clCreateProgramWithBuiltInVmeKernelsTests, GivenVmeBlockAdvancedMotionEstimateBidirectionalCheckKernelWhenCreatingProgramWithBuiltInKernelsThenCorrectDispatchBuilderAndFrontendKernelIsCreated) { cl_int retVal = CL_SUCCESS; overwriteBuiltInBinaryName(pDev, "media_kernels_backend"); Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel, *pDev); restoreBuiltInBinaryName(pDev); overwriteBuiltInBinaryName(pDev, "media_kernels_frontend"); const char *kernelNamesString = { "block_advanced_motion_estimate_bidirectional_check_intel;"}; cl_program program = clCreateProgramWithBuiltInKernels( pContext, // context 1, // num_devices &testedClDevice, // device_list kernelNamesString, // kernel_names &retVal); restoreBuiltInBinaryName(pDev); cl_kernel kernel = clCreateKernel( program, "block_advanced_motion_estimate_bidirectional_check_intel", &retVal); auto kernNeo = castToObject(kernel); EXPECT_NE(nullptr, kernNeo->getKernelInfo().builtinDispatchBuilder); EXPECT_EQ(20U, kernNeo->getKernelArgsNumber()); auto ctxNeo = castToObject(pContext); auto &vmeBuilder = Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel, ctxNeo->getDevice(0)->getDevice()); EXPECT_EQ(&vmeBuilder, kernNeo->getKernelInfo().builtinDispatchBuilder); clReleaseKernel(kernel); clReleaseProgram(program); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_create_sampler_tests.inl000066400000000000000000000025451363734646600300030ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clCreateSamplerTests; namespace ULT { TEST_F(clCreateSamplerTests, GivenCorrectParametersWhenCreatingSamplerThenSamplerIsCreatedAndSuccessReturned) { auto sampler = clCreateSampler( pContext, CL_TRUE, CL_ADDRESS_CLAMP, CL_FILTER_NEAREST, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, sampler); retVal = clReleaseSampler(sampler); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateSamplerTests, GivenCorrectParametersAndNullReturnPointerWhenCreatingSamplerThenSamplerIsCreated) { auto sampler = clCreateSampler( pContext, CL_TRUE, CL_ADDRESS_CLAMP, CL_FILTER_NEAREST, nullptr); EXPECT_NE(nullptr, sampler); retVal = clReleaseSampler(sampler); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateSamplerTests, GivenInvalidContextWhenCreatingSamplerThenInvalidContextErrorIsReturned) { auto sampler = clCreateSampler( nullptr, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_LINEAR, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, sampler); delete sampler; } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_create_sampler_with_properties_tests.inl000066400000000000000000000157761363734646600333240ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "opencl/source/sampler/sampler.h" #include "CL/cl_ext.h" #include "cl_api_tests.h" using namespace NEO; namespace ULT { struct SamplerWithPropertiesTest : public ApiFixture<>, public ::testing::WithParamInterface>, public ::testing::Test { SamplerWithPropertiesTest() { } void SetUp() override { std::tie(NormalizdProperties, AddressingProperties, FilterProperties) = GetParam(); ApiFixture::SetUp(); } void TearDown() override { ApiFixture::TearDown(); } cl_sampler_properties NormalizdProperties = 0; cl_sampler_properties AddressingProperties = 0; cl_sampler_properties FilterProperties = 0; }; typedef api_tests clCreateSamplerWithPropertiesTests; typedef SamplerWithPropertiesTest clCreateSamplerWithProperties_; TEST_F(clCreateSamplerWithPropertiesTests, GivenSamplerPropertiesAndNoReturnPointerWhenCreatingSamplerWithPropertiesThenSamplerIsCreated) { cl_sampler sampler = nullptr; cl_queue_properties properties[] = { CL_SAMPLER_NORMALIZED_COORDS, 0, CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_NONE, CL_SAMPLER_FILTER_MODE, CL_FILTER_LINEAR, 0}; sampler = clCreateSamplerWithProperties( pContext, properties, nullptr); ASSERT_NE(nullptr, sampler); retVal = clReleaseSampler(sampler); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateSamplerWithPropertiesTests, GivenNullContextWhenCreatingSamplerWithPropertiesThenInvalidContextErrorIsReturned) { cl_sampler sampler = nullptr; cl_queue_properties properties[] = { CL_SAMPLER_NORMALIZED_COORDS, 0, CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_NONE, CL_SAMPLER_FILTER_MODE, CL_FILTER_LINEAR, 0}; sampler = clCreateSamplerWithProperties( nullptr, properties, &retVal); ASSERT_EQ(nullptr, sampler); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_P(clCreateSamplerWithProperties_, GivenCorrectParametersWhenCreatingSamplerWithPropertiesThenSamplerIsCreatedAndSuccessIsReturned) { cl_sampler sampler = nullptr; cl_queue_properties properties[] = { CL_SAMPLER_NORMALIZED_COORDS, 0, CL_SAMPLER_ADDRESSING_MODE, 0, CL_SAMPLER_FILTER_MODE, 0, 0}; cl_queue_properties *pProp = &properties[0]; if (NormalizdProperties) { *pProp++ = CL_SAMPLER_NORMALIZED_COORDS; *pProp++ = (cl_queue_properties)NormalizdProperties; } if (AddressingProperties) { *pProp++ = CL_SAMPLER_ADDRESSING_MODE; *pProp++ = (cl_queue_properties)AddressingProperties; } if (FilterProperties) { *pProp++ = CL_SAMPLER_FILTER_MODE; *pProp++ = (cl_queue_properties)FilterProperties; } *pProp++ = 0; sampler = clCreateSamplerWithProperties( pContext, properties, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, sampler); retVal = clReleaseSampler(sampler); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_P(clCreateSamplerWithProperties_, GivenInvalidPropertiesWhenCreatingSamplerWithPropertiesThenInvalidValueErrorIsReturned) { cl_sampler sampler = nullptr; cl_queue_properties properties[] = { CL_SAMPLER_NORMALIZED_COORDS, 0, CL_SAMPLER_NORMALIZED_COORDS, 0, 0}; cl_queue_properties *pProp = &properties[0]; if (NormalizdProperties) { *pProp++ = CL_SAMPLER_NORMALIZED_COORDS; *pProp++ = (cl_queue_properties)NormalizdProperties; *pProp++ = CL_SAMPLER_NORMALIZED_COORDS; *pProp++ = (cl_queue_properties)NormalizdProperties; *pProp++ = 0; sampler = clCreateSamplerWithProperties( pContext, properties, &retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); ASSERT_EQ(nullptr, sampler); } pProp = &properties[0]; if (AddressingProperties) { *pProp++ = CL_SAMPLER_ADDRESSING_MODE; *pProp++ = (cl_queue_properties)AddressingProperties; *pProp++ = CL_SAMPLER_ADDRESSING_MODE; *pProp++ = (cl_queue_properties)AddressingProperties; *pProp++ = 0; sampler = clCreateSamplerWithProperties( pContext, properties, &retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); ASSERT_EQ(nullptr, sampler); } pProp = &properties[0]; if (FilterProperties) { *pProp++ = CL_SAMPLER_FILTER_MODE; *pProp++ = (cl_queue_properties)FilterProperties; *pProp++ = CL_SAMPLER_FILTER_MODE; *pProp++ = (cl_queue_properties)FilterProperties; *pProp++ = 0; sampler = clCreateSamplerWithProperties( pContext, properties, &retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); ASSERT_EQ(nullptr, sampler); } } static cl_sampler_properties NormalizdProperties[] = { CL_TRUE, CL_FALSE, }; static cl_sampler_properties AddressingProperties[] = { CL_ADDRESS_NONE, CL_ADDRESS_CLAMP_TO_EDGE, CL_ADDRESS_CLAMP, CL_ADDRESS_REPEAT, CL_ADDRESS_MIRRORED_REPEAT, }; static cl_sampler_properties FilterProperties[] = { CL_FILTER_NEAREST, CL_FILTER_LINEAR, }; INSTANTIATE_TEST_CASE_P(api, clCreateSamplerWithProperties_, ::testing::Combine( ::testing::ValuesIn(NormalizdProperties), ::testing::ValuesIn(AddressingProperties), ::testing::ValuesIn(FilterProperties))); TEST_F(clCreateSamplerWithPropertiesTests, GivenMipMapDataWhenCreatingSamplerWithPropertiesThenSamplerIsCreatedAndCorrectlyPopulated) { SamplerLodProperty minLodProperty; SamplerLodProperty maxLodProperty; minLodProperty.lod = 2.0f; maxLodProperty.lod = 3.0f; cl_sampler_properties mipMapFilteringMode = CL_FILTER_LINEAR; cl_sampler_properties properties[] = { CL_SAMPLER_NORMALIZED_COORDS, 0, CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_NONE, CL_SAMPLER_FILTER_MODE, CL_FILTER_LINEAR, CL_SAMPLER_MIP_FILTER_MODE, mipMapFilteringMode, CL_SAMPLER_LOD_MIN, minLodProperty.data, CL_SAMPLER_LOD_MAX, maxLodProperty.data, 0}; cl_sampler clSampler = clCreateSamplerWithProperties( pContext, properties, &retVal); auto sampler = castToObject(clSampler); ASSERT_NE(nullptr, sampler); EXPECT_EQ(mipMapFilteringMode, sampler->mipFilterMode); EXPECT_EQ(minLodProperty.lod, sampler->lodMin); EXPECT_EQ(maxLodProperty.lod, sampler->lodMax); clReleaseSampler(sampler); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_create_sub_buffer_tests.inl000066400000000000000000000161031363734646600304550ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; namespace ClCreateSubbufferTests { template class clCreateSubBufferTemplateTests : public ApiFixture<>, public testing::TestWithParam { void SetUp() override { ApiFixture::SetUp(); cl_mem_flags flg = parentFlags; void *ptr = nullptr; if (hasHostPtr == true) { flg |= CL_MEM_USE_HOST_PTR; ptr = pHostPtr; } buffer = clCreateBuffer(pContext, flg, 64, ptr, &retVal); EXPECT_NE(nullptr, buffer); EXPECT_EQ(CL_SUCCESS, retVal); } void TearDown() override { clReleaseMemObject(buffer); ApiFixture::TearDown(); } protected: cl_mem buffer; cl_uchar pHostPtr[64]; }; struct clCreateSubBufferValidFlagsNoHostPtrTests : public clCreateSubBufferTemplateTests { }; TEST_P(clCreateSubBufferValidFlagsNoHostPtrTests, GivenValidFlagsWhenCreatingSubBufferThenSubBufferIsCreatedAndSuccessIsReturned) { cl_buffer_region region = {0, 12}; cl_mem_flags flags = GetParam(); auto subBuffer = clCreateSubBuffer(buffer, flags, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &retVal); EXPECT_NE(nullptr, subBuffer); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseMemObject(subBuffer); }; static cl_mem_flags validFlags[] = { CL_MEM_READ_WRITE, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY, CL_MEM_HOST_READ_ONLY, CL_MEM_HOST_WRITE_ONLY, CL_MEM_HOST_NO_ACCESS, }; INSTANTIATE_TEST_CASE_P( CreateSubBufferCheckFlags, clCreateSubBufferValidFlagsNoHostPtrTests, testing::ValuesIn(validFlags)); struct clCreateSubBufferInvalidFlagsHostPtrTests : public clCreateSubBufferTemplateTests { }; TEST_P(clCreateSubBufferInvalidFlagsHostPtrTests, GivenInvalidFlagsWhenCreatingSubBufferThenInvalidValueErrorIsReturned) { cl_buffer_region region = {4, 12}; cl_mem_flags flags = GetParam(); auto subBuffer = clCreateSubBuffer(buffer, flags, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &retVal); EXPECT_EQ(nullptr, subBuffer); EXPECT_EQ(CL_INVALID_VALUE, retVal); }; cl_mem_flags invalidFlags[] = { CL_MEM_READ_WRITE, CL_MEM_WRITE_ONLY, CL_MEM_HOST_WRITE_ONLY, CL_MEM_HOST_READ_ONLY, CL_MEM_USE_HOST_PTR, CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR, 0xffcc, }; INSTANTIATE_TEST_CASE_P( CreateSubBufferCheckFlags, clCreateSubBufferInvalidFlagsHostPtrTests, testing::ValuesIn(invalidFlags)); class clCreateSubBufferTests : public api_tests { void SetUp() override { api_tests::SetUp(); cl_mem_flags flg = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS; buffer = clCreateBuffer(pContext, flg, 64, pHostPtr, &retVal); EXPECT_NE(nullptr, buffer); EXPECT_EQ(CL_SUCCESS, retVal); } void TearDown() override { clReleaseMemObject(buffer); ApiFixture::TearDown(); } protected: cl_mem buffer; cl_uchar pHostPtr[64]; }; TEST_F(clCreateSubBufferTests, GivenInBoundsRegionWhenCreatingSubBufferThenSubBufferIsCreatedAndSuccessIsReturned) { cl_buffer_region region = {0, 12}; auto subBuffer = clCreateSubBuffer(buffer, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &retVal); EXPECT_NE(nullptr, subBuffer); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseMemObject(subBuffer); } TEST_F(clCreateSubBufferTests, GivenOutOfBoundsRegionWhenCreatingSubBufferThenInvalidValueErrorIsReturned) { cl_buffer_region region = {4, 68}; auto subBuffer = clCreateSubBuffer(buffer, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &retVal); EXPECT_EQ(nullptr, subBuffer); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clCreateSubBufferTests, GivenSubBufferAsBufferWhenCreatingSubBufferThenInvalidMemObjectErrorIsReturned) { cl_buffer_region region0 = {0, 60}; cl_buffer_region region1 = {8, 20}; auto subBuffer = clCreateSubBuffer(buffer, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion0, &retVal); EXPECT_NE(nullptr, subBuffer); EXPECT_EQ(CL_SUCCESS, retVal); auto subsubBuffer = clCreateSubBuffer(subBuffer, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion1, &retVal); EXPECT_EQ(nullptr, subsubBuffer); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); clReleaseMemObject(subBuffer); } TEST_F(clCreateSubBufferTests, GivenInvalidBufferObjectWhenCreatingSubBufferThenInvalidMemObjectErrorIsReturned) { cl_buffer_region region = {4, 60}; cl_int trash[] = {0x01, 0x08, 0x88, 0xcc, 0xab, 0x55}; auto subBuffer = clCreateSubBuffer(reinterpret_cast(trash), CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &retVal); EXPECT_EQ(nullptr, subBuffer); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clCreateSubBufferTests, GivenInvalidOffsetWhenCreatingSubBufferThenMisalignedSubBufferOffsetErrorIsReturned) { cl_buffer_region region = {1, 60}; auto subBuffer = clCreateSubBuffer(buffer, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &retVal); EXPECT_EQ(nullptr, subBuffer); EXPECT_EQ(CL_MISALIGNED_SUB_BUFFER_OFFSET, retVal); } TEST_F(clCreateSubBufferTests, GivenNoRegionWhenCreatingSubBufferThenInvalidValueErrorIsReturned) { cl_buffer_region region = {4, 60}; auto subBuffer = clCreateSubBuffer(buffer, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, nullptr, &retVal); EXPECT_EQ(nullptr, subBuffer); EXPECT_EQ(CL_INVALID_VALUE, retVal); subBuffer = clCreateSubBuffer(buffer, CL_MEM_READ_WRITE, 0, ®ion, &retVal); EXPECT_EQ(nullptr, subBuffer); EXPECT_EQ(CL_INVALID_VALUE, retVal); subBuffer = clCreateSubBuffer(buffer, CL_MEM_READ_WRITE, 0, nullptr, &retVal); EXPECT_EQ(nullptr, subBuffer); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clCreateSubBufferTests, GivenBufferWithFlagsWhenCreatingSubBufferThenFlagsAreInherited) { cl_buffer_region region = {0, 60}; auto subBuffer = clCreateSubBuffer(buffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &retVal); EXPECT_NE(nullptr, subBuffer); EXPECT_EQ(CL_SUCCESS, retVal); cl_mem_flags retFlag; size_t retSZ; retVal = clGetMemObjectInfo(subBuffer, CL_MEM_FLAGS, sizeof(cl_mem_flags), &retFlag, &retSZ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_mem_flags), retSZ); EXPECT_EQ(static_cast(CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS), retFlag); clReleaseMemObject(subBuffer); } } // namespace ClCreateSubbufferTests compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_create_sub_devices_tests.inl000066400000000000000000000306141363734646600306310ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/api/api.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "test.h" #include using namespace NEO; namespace ULT { struct clCreateSubDevicesTests : ::testing::Test { DebugManagerStateRestore restorer; VariableBackup mockDeviceCreateSingleDeviceBackup{&MockDevice::createSingleDevice}; std::unique_ptr device; cl_device_partition_property properties[3] = {CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_NUMA, 0}; cl_uint outDevicesCount; cl_device_id outDevices[4]; void setup(int numberOfDevices) { DebugManager.flags.CreateMultipleSubDevices.set(numberOfDevices); mockDeviceCreateSingleDeviceBackup = (numberOfDevices == 1); device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); outDevicesCount = numberOfDevices; } }; TEST_F(clCreateSubDevicesTests, GivenInvalidDeviceWhenCreatingSubDevicesThenInvalidDeviceErrorIsReturned) { auto retVal = clCreateSubDevices( nullptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(clCreateSubDevicesTests, GivenDeviceWithoutSubDevicesWhenCreatingSubDevicesThenDevicePartitionFailedErrorIsReturned) { setup(1); auto retVal = clCreateSubDevices(device.get(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_DEVICE_PARTITION_FAILED, retVal); } TEST_F(clCreateSubDevicesTests, GivenInvalidOrUnsupportedPropertiesWhenCreatingSubDevicesThenInvalidValueErrorIsReturned) { setup(2); auto retVal = clCreateSubDevices(device.get(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); properties[0] = 0; retVal = clCreateSubDevices(device.get(), properties, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); properties[0] = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; properties[1] = 0; retVal = clCreateSubDevices(device.get(), properties, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); properties[1] = CL_DEVICE_AFFINITY_DOMAIN_NUMA; properties[2] = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; retVal = clCreateSubDevices(device.get(), properties, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clCreateSubDevicesTests, GivenOutDevicesNullWhenCreatingSubDevicesThenSuccessIsReturned) { setup(2); cl_uint returnedOutDeviceCount = 0; auto retVal = clCreateSubDevices(device.get(), properties, 0, nullptr, &returnedOutDeviceCount); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, returnedOutDeviceCount); } TEST_F(clCreateSubDevicesTests, GivenOutDevicesTooSmallWhenCreatingSubDevicesThenInvalidValueErrorIsReturned) { setup(2); outDevicesCount = 1; auto retVal = clCreateSubDevices(device.get(), properties, outDevicesCount, outDevices, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clCreateSubDevicesTests, GivenValidInputWhenCreatingSubDevicesThenSubDevicesAreReturned) { setup(2); auto retVal = clCreateSubDevices(device.get(), properties, outDevicesCount, outDevices, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(device->getDeviceById(0), outDevices[0]); EXPECT_EQ(device->getDeviceById(1), outDevices[1]); properties[1] = CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE; cl_device_id outDevices2[2]; retVal = clCreateSubDevices(device.get(), properties, outDevicesCount, outDevices2, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(outDevices[0], outDevices2[0]); EXPECT_EQ(outDevices[1], outDevices2[1]); } TEST_F(clCreateSubDevicesTests, GivenValidInputWhenCreatingSubDevicesThenDeviceApiReferenceCountIsIncreasedEveryTime) { setup(2); EXPECT_EQ(0, device->getDeviceById(0)->getRefApiCount()); EXPECT_EQ(0, device->getDeviceById(1)->getRefApiCount()); auto retVal = clCreateSubDevices(device.get(), properties, outDevicesCount, outDevices, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1, device->getDeviceById(0)->getRefApiCount()); EXPECT_EQ(1, device->getDeviceById(1)->getRefApiCount()); retVal = clCreateSubDevices(device.get(), properties, outDevicesCount, outDevices, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2, device->getDeviceById(0)->getRefApiCount()); EXPECT_EQ(2, device->getDeviceById(1)->getRefApiCount()); } struct clCreateSubDevicesDeviceInfoTests : clCreateSubDevicesTests { void setup(int numberOfDevices) { clCreateSubDevicesTests::setup(numberOfDevices); expectedSubDeviceParentDevice = device.get(); expectedRootDevicePartitionMaxSubDevices = numberOfDevices; } cl_device_id expectedRootDeviceParentDevice = nullptr; cl_device_affinity_domain expectedRootDevicePartitionAffinityDomain = CL_DEVICE_AFFINITY_DOMAIN_NUMA | CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE; cl_uint expectedRootDevicePartitionMaxSubDevices; cl_device_partition_property expectedRootDevicePartitionProperties[2] = {CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, 0}; cl_device_partition_property expectedRootDevicePartitionType[3] = {0}; cl_device_id expectedSubDeviceParentDevice; cl_device_affinity_domain expectedSubDevicePartitionAffinityDomain = 0; cl_uint expectedSubDevicePartitionMaxSubDevices = 0; cl_device_partition_property expectedSubDevicePartitionProperties[2] = {0}; cl_device_partition_property expectedSubDevicePartitionType[3] = {CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_NUMA, 0}; cl_device_id expectedRootDeviceWithoutSubDevicesParentDevice = nullptr; cl_device_affinity_domain expectedRootDeviceWithoutSubDevicesPartitionAffinityDomain = 0; cl_uint expectedRootDeviceWithoutSubDevicesPartitionMaxSubDevices = 0; cl_device_partition_property expectedRootDeviceWithoutSubDevicesPartitionProperties[2] = {0}; cl_device_partition_property expectedRootDeviceWithoutSubDevicesPartitionType[3] = {0}; cl_device_id parentDevice; cl_device_affinity_domain partitionAffinityDomain; cl_uint partitionMaxSubDevices; cl_device_partition_property partitionProperties[2]; cl_device_partition_property partitionType[3]; }; TEST_F(clCreateSubDevicesDeviceInfoTests, WhenGettingSubDeviceRelatedDeviceInfoThenCorrectValuesAreSet) { setup(4); auto &rootDeviceInfo = device->getDeviceInfo(); EXPECT_EQ(expectedRootDeviceParentDevice, rootDeviceInfo.parentDevice); EXPECT_EQ(expectedRootDevicePartitionAffinityDomain, rootDeviceInfo.partitionAffinityDomain); EXPECT_EQ(expectedRootDevicePartitionMaxSubDevices, rootDeviceInfo.partitionMaxSubDevices); EXPECT_EQ(expectedRootDevicePartitionProperties[0], rootDeviceInfo.partitionProperties[0]); EXPECT_EQ(expectedRootDevicePartitionProperties[1], rootDeviceInfo.partitionProperties[1]); EXPECT_EQ(expectedRootDevicePartitionType[0], rootDeviceInfo.partitionType[0]); auto retVal = clCreateSubDevices(device.get(), properties, outDevicesCount, outDevices, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); for (auto outDevice : outDevices) { auto &subDevice = *castToObject(outDevice); auto &subDeviceInfo = subDevice.getDeviceInfo(); EXPECT_EQ(expectedSubDeviceParentDevice, subDeviceInfo.parentDevice); EXPECT_EQ(expectedSubDevicePartitionAffinityDomain, subDeviceInfo.partitionAffinityDomain); EXPECT_EQ(expectedSubDevicePartitionMaxSubDevices, subDeviceInfo.partitionMaxSubDevices); EXPECT_EQ(expectedSubDevicePartitionProperties[0], subDeviceInfo.partitionProperties[0]); EXPECT_EQ(expectedSubDevicePartitionType[0], subDeviceInfo.partitionType[0]); EXPECT_EQ(expectedSubDevicePartitionType[1], subDeviceInfo.partitionType[1]); EXPECT_EQ(expectedSubDevicePartitionType[2], subDeviceInfo.partitionType[2]); } } TEST_F(clCreateSubDevicesDeviceInfoTests, GivenRootDeviceWithoutSubDevicesWhenGettingSubDeviceRelatedDeviceInfoThenCorrectValuesAreSet) { setup(1); auto &rootDeviceInfo = device->getDeviceInfo(); EXPECT_EQ(expectedRootDeviceWithoutSubDevicesParentDevice, rootDeviceInfo.parentDevice); EXPECT_EQ(expectedRootDeviceWithoutSubDevicesPartitionAffinityDomain, rootDeviceInfo.partitionAffinityDomain); EXPECT_EQ(expectedRootDeviceWithoutSubDevicesPartitionMaxSubDevices, rootDeviceInfo.partitionMaxSubDevices); EXPECT_EQ(expectedRootDeviceWithoutSubDevicesPartitionProperties[0], rootDeviceInfo.partitionProperties[0]); EXPECT_EQ(expectedRootDeviceWithoutSubDevicesPartitionType[0], rootDeviceInfo.partitionType[0]); } TEST_F(clCreateSubDevicesDeviceInfoTests, WhenGettingSubDeviceRelatedDeviceInfoViaApiThenCorrectValuesAreSet) { setup(4); clGetDeviceInfo(device.get(), CL_DEVICE_PARENT_DEVICE, sizeof(parentDevice), &parentDevice, nullptr); EXPECT_EQ(expectedRootDeviceParentDevice, parentDevice); clGetDeviceInfo(device.get(), CL_DEVICE_PARTITION_AFFINITY_DOMAIN, sizeof(partitionAffinityDomain), &partitionAffinityDomain, nullptr); EXPECT_EQ(expectedRootDevicePartitionAffinityDomain, partitionAffinityDomain); clGetDeviceInfo(device.get(), CL_DEVICE_PARTITION_MAX_SUB_DEVICES, sizeof(partitionMaxSubDevices), &partitionMaxSubDevices, nullptr); EXPECT_EQ(expectedRootDevicePartitionMaxSubDevices, partitionMaxSubDevices); clGetDeviceInfo(device.get(), CL_DEVICE_PARTITION_PROPERTIES, sizeof(partitionProperties), &partitionProperties, nullptr); EXPECT_EQ(expectedRootDevicePartitionProperties[0], partitionProperties[0]); EXPECT_EQ(expectedRootDevicePartitionProperties[1], partitionProperties[1]); clGetDeviceInfo(device.get(), CL_DEVICE_PARTITION_TYPE, sizeof(partitionType), &partitionType, nullptr); EXPECT_EQ(expectedRootDevicePartitionType[0], partitionType[0]); auto retVal = clCreateSubDevices(device.get(), properties, outDevicesCount, outDevices, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); for (auto subDevice : outDevices) { clGetDeviceInfo(subDevice, CL_DEVICE_PARENT_DEVICE, sizeof(parentDevice), &parentDevice, nullptr); EXPECT_EQ(expectedSubDeviceParentDevice, parentDevice); clGetDeviceInfo(subDevice, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, sizeof(partitionAffinityDomain), &partitionAffinityDomain, nullptr); EXPECT_EQ(expectedSubDevicePartitionAffinityDomain, partitionAffinityDomain); clGetDeviceInfo(subDevice, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, sizeof(partitionMaxSubDevices), &partitionMaxSubDevices, nullptr); EXPECT_EQ(expectedSubDevicePartitionMaxSubDevices, partitionMaxSubDevices); clGetDeviceInfo(subDevice, CL_DEVICE_PARTITION_PROPERTIES, sizeof(partitionProperties), &partitionProperties, nullptr); EXPECT_EQ(expectedSubDevicePartitionProperties[0], partitionProperties[0]); clGetDeviceInfo(subDevice, CL_DEVICE_PARTITION_TYPE, sizeof(partitionType), &partitionType, nullptr); EXPECT_EQ(expectedSubDevicePartitionType[0], partitionType[0]); EXPECT_EQ(expectedSubDevicePartitionType[1], partitionType[1]); EXPECT_EQ(expectedSubDevicePartitionType[2], partitionType[2]); } } TEST_F(clCreateSubDevicesDeviceInfoTests, GivenRootDeviceWithoutSubDevicesWhenGettingSubDeviceRelatedDeviceInfoViaApiThenCorrectValuesAreSet) { setup(1); clGetDeviceInfo(device.get(), CL_DEVICE_PARENT_DEVICE, sizeof(parentDevice), &parentDevice, nullptr); EXPECT_EQ(expectedRootDeviceWithoutSubDevicesParentDevice, parentDevice); clGetDeviceInfo(device.get(), CL_DEVICE_PARTITION_AFFINITY_DOMAIN, sizeof(partitionAffinityDomain), &partitionAffinityDomain, nullptr); EXPECT_EQ(expectedRootDeviceWithoutSubDevicesPartitionAffinityDomain, partitionAffinityDomain); clGetDeviceInfo(device.get(), CL_DEVICE_PARTITION_MAX_SUB_DEVICES, sizeof(partitionMaxSubDevices), &partitionMaxSubDevices, nullptr); EXPECT_EQ(expectedRootDeviceWithoutSubDevicesPartitionMaxSubDevices, partitionMaxSubDevices); clGetDeviceInfo(device.get(), CL_DEVICE_PARTITION_PROPERTIES, sizeof(partitionProperties), &partitionProperties, nullptr); EXPECT_EQ(expectedRootDeviceWithoutSubDevicesPartitionProperties[0], partitionProperties[0]); clGetDeviceInfo(device.get(), CL_DEVICE_PARTITION_TYPE, sizeof(partitionType), &partitionType, nullptr); EXPECT_EQ(expectedRootDeviceWithoutSubDevicesPartitionType[0], partitionType[0]); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_create_user_event_tests.inl000066400000000000000000000101601363734646600305070ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clCreateUserEventTests; namespace ULT { TEST_F(clCreateUserEventTests, GivenValidContextWhenCreatingUserEventThenEventIsCreated) { auto userEvent = clCreateUserEvent( pContext, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, userEvent); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateUserEventTests, GivenNullContextWhenCreatingUserEventThenInvalidContextErrorIsReturned) { auto userEvent = clCreateUserEvent( nullptr, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, userEvent); } TEST_F(clCreateUserEventTests, GivenCorrectUserEventWhenGetingEventInfoThenClCommandUserCmdTypeIsReturned) { auto userEvent = clCreateUserEvent( pContext, &retVal); size_t retSize; retVal = clGetEventInfo(userEvent, CL_EVENT_COMMAND_QUEUE, 0, nullptr, &retSize); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_command_queue), retSize); auto cmdQueue = reinterpret_cast(static_cast(0xdeadbeaf)); retVal = clGetEventInfo(userEvent, CL_EVENT_COMMAND_QUEUE, retSize, &cmdQueue, 0); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, cmdQueue); retVal = clGetEventInfo(userEvent, CL_EVENT_COMMAND_TYPE, 0, nullptr, &retSize); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_event_info), retSize); auto cmd_type = CL_COMMAND_SVM_UNMAP; retVal = clGetEventInfo(userEvent, CL_EVENT_COMMAND_TYPE, retSize, &cmd_type, 0); EXPECT_EQ(CL_COMMAND_USER, cmd_type); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateUserEventTests, GivenUserEventStatusSetToCompleteWhenGettingEventInfoThenStatusIsSetToCompleteAndSuccessReturned) { auto userEvent = clCreateUserEvent( pContext, &retVal); retVal = clSetUserEventStatus(userEvent, CL_COMPLETE); ASSERT_EQ(CL_SUCCESS, retVal); size_t retSize; retVal = clGetEventInfo(userEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, 0, nullptr, &retSize); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_int), retSize); auto status = CL_SUBMITTED; retVal = clGetEventInfo(userEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, retSize, &status, 0); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(CL_COMPLETE, status); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateUserEventTests, GivenValidUserEventWhenGettingContextThenValidContextAndSuccessIsReturned) { auto userEvent = clCreateUserEvent( pContext, &retVal); size_t retSize; retVal = clGetEventInfo(userEvent, CL_EVENT_CONTEXT, 0, nullptr, &retSize); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_context), retSize); cl_context oclContext; retVal = clGetEventInfo(userEvent, CL_EVENT_CONTEXT, retSize, &oclContext, 0); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(oclContext, pContext); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateUserEventTests, GivenCompleteUserEventWhenWaitingForUserEventThenReturnIsImmediate) { auto userEvent = clCreateUserEvent( pContext, &retVal); retVal = clSetUserEventStatus(userEvent, CL_COMPLETE); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clWaitForEvents(1, &userEvent); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateUserEventTests, GivenUserEventWithErrorStatusWhenWaitingForUserEventThenClExecStatusErrorForEventsInWaitListErrorIsReturned) { auto userEvent = clCreateUserEvent( pContext, &retVal); retVal = clSetUserEventStatus(userEvent, -1); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clWaitForEvents(1, &userEvent); ASSERT_EQ(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, retVal); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_create_user_event_tests_mt.cpp000066400000000000000000000023251363734646600312130ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clCreateUserEventMtTests; namespace ULT { TEST_F(clCreateUserEventMtTests, GivenClCompleteEventWhenWaitingForEventThenWaitForEventsIsCompleted) { auto userEvent = clCreateUserEvent( pContext, &retVal); std::atomic ThreadStarted(false); std::atomic WaitForEventsCompleted(false); int counter = 0; int Deadline = 2000; std::thread t([&]() { ThreadStarted = true; clWaitForEvents(1, &userEvent); WaitForEventsCompleted = true; }); //wait for the thread to start while (!ThreadStarted) ; //now wait a while. while (!WaitForEventsCompleted && counter++ < Deadline) ; ASSERT_EQ(WaitForEventsCompleted, false) << "WaitForEvents returned while user event is not signaled!"; //set event to CL_COMPLETE retVal = clSetUserEventStatus(userEvent, CL_COMPLETE); t.join(); ASSERT_EQ(WaitForEventsCompleted, true); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_barrier_tests.inl000066400000000000000000000011751363734646600301700ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueBarrierTests; TEST_F(clEnqueueBarrierTests, GivenNullCommandQueueWhenEnqueuingThenInvalidCommandQueueErrorIsReturned) { auto retVal = clEnqueueBarrier( nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueBarrierTests, GivenValidCommandQueueWhenEnqueuingThenSuccessIsReturned) { auto retVal = clEnqueueBarrier( pCommandQueue); EXPECT_EQ(CL_SUCCESS, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_barrier_with_wait_list_tests.inl000066400000000000000000000014711363734646600333010ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueBarrierWithWaitListTests; TEST_F(clEnqueueBarrierWithWaitListTests, GivenNullCommandQueueWhenEnqueuingBarrierWithWaitListThenInvalidCommandQueueErrorIsReturned) { auto retVal = clEnqueueBarrierWithWaitList( nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueBarrierWithWaitListTests, GivenValidCommandQueueWhenEnqueuingBarrierWithWaitListThenSuccessIsReturned) { auto retVal = clEnqueueBarrierWithWaitList( pCommandQueue, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_copy_buffer_rect_tests.inl000066400000000000000000000033701363734646600320610ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "cl_api_tests.h" using namespace NEO; struct clEnqueueCopyBufferRectTests : public ApiFixture<0>, ::testing::Test { void SetUp() override { ApiFixture::SetUp(); } void TearDown() override { ApiFixture::TearDown(); } }; namespace ULT { TEST_F(clEnqueueCopyBufferRectTests, GivenCorrectParametersWhenEnqueingCopyBufferRectThenSuccessIsReturned) { MockBuffer srcBuffer; MockBuffer dstBuffer; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 0}; auto retVal = clEnqueueCopyBufferRect( pCommandQueue, &srcBuffer, //srcBuffer &dstBuffer, //dstBuffer srcOrigin, dstOrigin, region, 10, //srcRowPitch 0, //srcSlicePitch 10, //dstRowPitch 0, //dstSlicePitch 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueCopyBufferRectTests, GivenNullCommandQueueWhenEnqueingCopyBufferRectThenInvalidCommandQueueErrorIsReturned) { auto retVal = clEnqueueCopyBufferRect( nullptr, //command_queue nullptr, //srcBuffer nullptr, //dstBuffer nullptr, //srcOrigin nullptr, //dstOrigin nullptr, //retion 0, //srcRowPitch 0, //srcSlicePitch 0, //dstRowPitch 0, //dstSlicePitch 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_copy_buffer_to_image_tests.inl000066400000000000000000000133761363734646600327170ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueCopyBufferToImageTests; namespace ULT { struct clEnqueueCopyBufferToImageTests : public ApiFixture<>, public ::testing::Test { void SetUp() override { ApiFixture::SetUp(); // clang-format off imageFormat.image_channel_order = CL_YUYV_INTEL; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 32; imageDesc.image_height = 32; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = nullptr; // clang-format on } void TearDown() override { ApiFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; }; TEST_F(clEnqueueCopyBufferToImageTests, GivenInvalidCmdQueueWhenCopyingBufferToImageThenInvalidCommandQueueErrorIsReturned) { size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 0}; auto retVal = clEnqueueCopyBufferToImage( nullptr, //commandQueue nullptr, //srcBuffer nullptr, //dstBuffer 0u, //src_offset dstOrigin, region, 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueCopyBufferToImageTests, GivenInvalidSrcBufferWhenCopyingBufferToImageThenInvalidMemObjectErrorIsReturned) { size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 0}; auto retVal = clEnqueueCopyBufferToImage( pCommandQueue, nullptr, //srcBuffer nullptr, //dstBuffer 0u, //src_offset dstOrigin, region, 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clEnqueueCopyBufferToImageTests, GivenValidParametersWhenCopyingBufferToImageThenSuccessIsReturned) { imageFormat.image_channel_order = CL_RGBA; auto dstImage = clCreateImage( pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, dstImage); auto srcBuffer = std::unique_ptr(BufferHelper>::create(pContext)); size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 1}; auto retVal = clEnqueueCopyBufferToImage( pCommandQueue, srcBuffer.get(), dstImage, 0u, //src_offset dstOrigin, region, 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(dstImage); EXPECT_EQ(CL_SUCCESS, retVal); } typedef clEnqueueCopyBufferToImageTests clEnqueueCopyBufferToImageYUV; TEST_F(clEnqueueCopyBufferToImageYUV, GivenValidYuvDstImageWhenCopyingBufferToImageThenSuccessIsReturned) { auto srcBuffer = std::unique_ptr(BufferHelper>::create(pContext)); auto dstImage = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, dstImage); const size_t origin[] = {2, 2, 0}; const size_t region[] = {2, 2, 1}; auto retVal = clEnqueueCopyBufferToImage( pCommandQueue, srcBuffer.get(), dstImage, 0, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(dstImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueCopyBufferToImageYUV, GivenInvalidOriginAndYuvDstImageWhenCopyingBufferToImageThenInvalidValueErrorIsReturned) { auto srcBuffer = std::unique_ptr(BufferHelper>::create(pContext)); auto dstImage = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, dstImage); const size_t origin[] = {1, 2, 0}; const size_t region[] = {2, 2, 0}; auto retVal = clEnqueueCopyBufferToImage( pCommandQueue, srcBuffer.get(), dstImage, 0, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(dstImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueCopyBufferToImageYUV, GivenInvalidRegionAndValidYuvDstImageWhenCopyingBufferToImageThenInvalidValueErrorIsReturned) { auto srcBuffer = std::unique_ptr(BufferHelper>::create(pContext)); auto dstImage = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, dstImage); const size_t origin[] = {2, 2, 0}; const size_t region[] = {1, 2, 0}; auto retVal = clEnqueueCopyBufferToImage( pCommandQueue, srcBuffer.get(), dstImage, 0, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(dstImage); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_copy_image_tests.inl000066400000000000000000000210611363734646600306520ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/surface_formats.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueCopyImageTests; namespace ULT { struct clEnqueueCopyImageTests : public ApiFixture<>, public ::testing::Test { void SetUp() override { ApiFixture::SetUp(); // clang-format off imageFormat.image_channel_order = CL_YUYV_INTEL; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 32; imageDesc.image_height = 32; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = nullptr; // clang-format on } void TearDown() override { ApiFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; }; TEST_F(clEnqueueCopyImageTests, GivenNullCommandQueueWhenCopyingImageThenInvalidCommandQueueErrorIsReturned) { auto buffer = (cl_mem)ptrGarbage; retVal = clEnqueueCopyImage( nullptr, buffer, buffer, nullptr, nullptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueCopyImageTests, GivenNullSrcBufferWhenCopyingImageThenInvalidMemObjectErrorIsReturned) { auto buffer = (cl_mem)ptrGarbage; retVal = clEnqueueCopyImage( pCommandQueue, nullptr, buffer, nullptr, nullptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clEnqueueCopyImageTests, GivenNullDstBufferWhenCopyingImageThenInvalidMemObjectErrorIsReturned) { auto buffer = (cl_mem)ptrGarbage; retVal = clEnqueueCopyImage( pCommandQueue, buffer, nullptr, nullptr, nullptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clEnqueueCopyImageTests, GivenDifferentSrcAndDstImageFormatsWhenCopyingImageThenImageFormatMismatchErrorIsReturned) { imageFormat.image_channel_order = CL_RGBA; auto srcImage = clCreateImage( pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); imageFormat.image_channel_order = CL_BGRA; auto dstImage = clCreateImage( pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, srcImage); EXPECT_NE(nullptr, dstImage); const size_t origin[] = {2, 2, 0}; const size_t region[] = {2, 2, 0}; auto retVal = clEnqueueCopyImage( pCommandQueue, srcImage, dstImage, origin, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_IMAGE_FORMAT_MISMATCH, retVal); retVal = clReleaseMemObject(srcImage); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(dstImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueCopyImageTests, GivenValidParametersWhenCopyingImageThenSuccessIsReturned) { imageFormat.image_channel_order = CL_RGBA; auto srcImage = clCreateImage( pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); auto dstImage = clCreateImage( pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, srcImage); EXPECT_NE(nullptr, dstImage); const size_t origin[] = {2, 2, 0}; const size_t region[] = {2, 2, 1}; auto retVal = clEnqueueCopyImage( pCommandQueue, srcImage, dstImage, origin, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(srcImage); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(dstImage); EXPECT_EQ(CL_SUCCESS, retVal); } typedef clEnqueueCopyImageTests clEnqueueCopyImageYUVTests; TEST_F(clEnqueueCopyImageYUVTests, GivenValidParametersWhenCopyingYuvImageThenSuccessIsReturned) { auto srcImage = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); auto dstImage = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, srcImage); EXPECT_NE(nullptr, dstImage); const size_t origin[] = {2, 2, 0}; const size_t region[] = {2, 2, 1}; auto retVal = clEnqueueCopyImage( pCommandQueue, srcImage, dstImage, origin, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(srcImage); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(dstImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueCopyImageYUVTests, GivenInvalidSrcOriginWhenCopyingYuvImageThenInvalidValueErrorIsReturned) { auto srcImage = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); auto dstImage = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, srcImage); EXPECT_NE(nullptr, dstImage); const size_t srcOrigin[] = {1, 2, 0}; const size_t dstOrigin[] = {2, 2, 0}; const size_t region[] = {2, 2, 0}; auto retVal = clEnqueueCopyImage( pCommandQueue, srcImage, dstImage, srcOrigin, dstOrigin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(srcImage); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(dstImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueCopyImageYUVTests, GivenInvalidDstOriginWhenCopyingYuvImageThenInvalidValueErrorIsReturned) { auto srcImage = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); auto dstImage = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, srcImage); EXPECT_NE(nullptr, dstImage); const size_t srcOrigin[] = {2, 2, 0}; const size_t dstOrigin[] = {1, 2, 0}; const size_t region[] = {2, 2, 0}; auto retVal = clEnqueueCopyImage( pCommandQueue, srcImage, dstImage, srcOrigin, dstOrigin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(srcImage); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(dstImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueCopyImageYUVTests, GivenInvalidDstOriginFor2dImageWhenCopyingYuvImageThenInvalidValueErrorIsReturned) { auto srcImage = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); auto dstImage = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, srcImage); EXPECT_NE(nullptr, dstImage); const size_t origin[] = {2, 2, 1}; const size_t region[] = {2, 2, 0}; auto retVal = clEnqueueCopyImage( pCommandQueue, srcImage, dstImage, origin, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(srcImage); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(dstImage); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_copy_image_to_buffer_tests.inl000066400000000000000000000133241363734646600327100ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueCopyImageToBufferTests; namespace ULT { struct clEnqueueCopyImageToBufferTests : public ApiFixture<>, public ::testing::Test { void SetUp() override { ApiFixture::SetUp(); // clang-format off imageFormat.image_channel_order = CL_YUYV_INTEL; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 32; imageDesc.image_height = 32; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = nullptr; // clang-format on } void TearDown() override { ApiFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; }; TEST_F(clEnqueueCopyImageToBufferTests, GivenInvalidQueueWhenCopyingImageToBufferThenInvalidCommandQueueErrorIsReturned) { size_t srcOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 0}; auto retVal = clEnqueueCopyImageToBuffer( nullptr, nullptr, //srcBuffer nullptr, //dstBuffer srcOrigin, region, 0, //dstOffset 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueCopyImageToBufferTests, GivenInvalidBufferWhenCopyingImageToBufferThenInvalidMemObjectErrorIsReturned) { size_t srcOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 0}; auto retVal = clEnqueueCopyImageToBuffer( pCommandQueue, nullptr, //srcBuffer nullptr, //dstBuffer srcOrigin, region, 0, //dstOffset 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clEnqueueCopyImageToBufferTests, GivenValidParametersWhenCopyingImageToBufferThenSuccessIsReturned) { imageFormat.image_channel_order = CL_RGBA; auto srcImage = clCreateImage( pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, srcImage); auto dstBuffer = std::unique_ptr(BufferHelper>::create(pContext)); size_t srcOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 1}; auto retVal = clEnqueueCopyImageToBuffer( pCommandQueue, srcImage, dstBuffer.get(), srcOrigin, region, 0, //dstOffset 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(srcImage); EXPECT_EQ(CL_SUCCESS, retVal); } typedef clEnqueueCopyImageToBufferTests clEnqueueCopyImageToBufferYUVTests; TEST_F(clEnqueueCopyImageToBufferYUVTests, GivenValidParametersWhenCopyingYuvImageToBufferThenSuccessIsReturned) { auto dstBuffer = std::unique_ptr(BufferHelper>::create(pContext)); auto srcImage = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, srcImage); const size_t origin[] = {2, 2, 0}; const size_t region[] = {2, 2, 1}; auto retVal = clEnqueueCopyImageToBuffer( pCommandQueue, srcImage, dstBuffer.get(), origin, region, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(srcImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueCopyImageToBufferYUVTests, GivenInvalidOriginWhenCopyingYuvImageToBufferThenInvalidValueErrorIsReturned) { auto dstBuffer = std::unique_ptr(BufferHelper>::create(pContext)); auto srcImage = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, srcImage); const size_t origin[] = {1, 2, 0}; const size_t region[] = {2, 2, 0}; auto retVal = clEnqueueCopyImageToBuffer( pCommandQueue, srcImage, dstBuffer.get(), origin, region, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(srcImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueCopyImageToBufferYUVTests, GivenInvalidRegionWhenCopyingYuvImageToBufferThenInvalidValueErrorIsReturned) { auto dstBuffer = std::unique_ptr(BufferHelper>::create(pContext)); auto srcImage = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, srcImage); const size_t origin[] = {2, 2, 0}; const size_t region[] = {1, 2, 0}; auto retVal = clEnqueueCopyImageToBuffer( pCommandQueue, srcImage, dstBuffer.get(), origin, region, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(srcImage); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_fill_buffer_tests.inl000066400000000000000000000021521363734646600310150ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "opencl/source/command_queue/command_queue.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueFillBufferTests; namespace ULT { TEST_F(clEnqueueFillBufferTests, GivenNullCommandQueueWhenFillingBufferThenInvalidCommandQueueErrorIsReturned) { auto buffer = (cl_mem)ptrGarbage; cl_float pattern = 1.0f; retVal = clEnqueueFillBuffer( nullptr, buffer, &pattern, sizeof(pattern), 0, sizeof(pattern), 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueFillBufferTests, GivenNullBufferWhenFillingBufferThenInvalidMemObjectErrorIsReturned) { cl_float pattern = 1.0f; retVal = clEnqueueFillBuffer( pCommandQueue, nullptr, &pattern, sizeof(pattern), 0, sizeof(pattern), 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_fill_image_tests.inl000066400000000000000000000036141363734646600306320ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueFillImageTests; namespace ULT { TEST_F(clEnqueueFillImageTests, GivenNullCommandQueueWhenFillingImageThenInvalidCommandQueueErrorIsReturned) { auto image = std::unique_ptr(Image2dHelper>::create(pContext)); uint32_t fill_color[4] = {0xaaaaaaaa, 0xbbbbbbbb, 0xcccccccc, 0xdddddddd}; size_t origin[3] = {0, 0, 0}; size_t region[3] = {2, 2, 1}; retVal = clEnqueueFillImage( nullptr, image.get(), fill_color, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueFillImageTests, GivenNullImageWhenFillingImageThenInvalidMemObjectErrorIsReturned) { uint32_t fill_color[4] = {0xaaaaaaaa, 0xbbbbbbbb, 0xcccccccc, 0xdddddddd}; size_t origin[3] = {0, 0, 0}; size_t region[3] = {2, 2, 1}; retVal = clEnqueueFillImage( pCommandQueue, nullptr, fill_color, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clEnqueueFillImageTests, GivenNullFillColorWhenFillingImageThenInvalidValueErrorIsReturned) { auto image = std::unique_ptr(Image2dHelper>::create(pContext)); size_t origin[3] = {0, 0, 0}; size_t region[3] = {2, 2, 1}; retVal = clEnqueueFillImage( pCommandQueue, image.get(), nullptr, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_image_tests.inl000066400000000000000000000201151363734646600276170ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" #include using namespace NEO; using ImageEnqueueCall = std::function; struct ValidateRegionAndOriginTests : public ::testing::TestWithParam { void SetUp() override { context.reset(new MockContext()); cmdQ.reset(new MockCommandQueue(context.get(), context->getDevice(0), 0)); } static void readImage(MockCommandQueue *cmdQ, Image *image, size_t *origin, size_t *region, int32_t &retVal) { uint32_t tempPtr; retVal = clEnqueueReadImage(cmdQ, image, CL_TRUE, origin, region, 0, 0, &tempPtr, 0, nullptr, nullptr); } static void writeImage(MockCommandQueue *cmdQ, Image *image, size_t *origin, size_t *region, int32_t &retVal) { uint32_t tempPtr; retVal = clEnqueueWriteImage(cmdQ, image, CL_TRUE, origin, region, 0, 0, &tempPtr, 0, nullptr, nullptr); } static void fillImage(MockCommandQueue *cmdQ, Image *image, size_t *origin, size_t *region, int32_t &retVal) { uint32_t fill_color[4] = {0xaaaaaaaa, 0xbbbbbbbb, 0xcccccccc, 0xdddddddd}; retVal = clEnqueueFillImage(cmdQ, image, fill_color, origin, region, 0, nullptr, nullptr); } static void copyImageWithCorrectSrc(MockCommandQueue *cmdQ, Image *dstImage, size_t *dstOrigin, size_t *region, int32_t &retVal) { std::unique_ptr srcImage(ImageHelper::create(&cmdQ->getContext())); size_t srcOrigin[3] = {0, 0, 0}; retVal = clEnqueueCopyImage(cmdQ, srcImage.get(), dstImage, srcOrigin, dstOrigin, region, 0, nullptr, nullptr); } static void copyImageWithCorrectDst(MockCommandQueue *cmdQ, Image *srcImage, size_t *srcOrigin, size_t *region, int32_t &retVal) { std::unique_ptr dstImage(ImageHelper::create(&cmdQ->getContext())); size_t dstOrigin[3] = {0, 0, 0}; retVal = clEnqueueCopyImage(cmdQ, srcImage, dstImage.get(), srcOrigin, dstOrigin, region, 0, nullptr, nullptr); } static void copyImageToBuffer(MockCommandQueue *cmdQ, Image *image, size_t *origin, size_t *region, int32_t &retVal) { MockBuffer buffer; retVal = clEnqueueCopyImageToBuffer(cmdQ, image, &buffer, origin, region, 0, 0, nullptr, nullptr); } static void copyBufferToImage(MockCommandQueue *cmdQ, Image *image, size_t *origin, size_t *region, int32_t &retVal) { MockBuffer buffer; retVal = clEnqueueCopyBufferToImage(cmdQ, &buffer, image, 0, origin, region, 0, nullptr, nullptr); } static void mapImage(MockCommandQueue *cmdQ, Image *image, size_t *origin, size_t *region, int32_t &retVal) { clEnqueueMapImage(cmdQ, image, CL_TRUE, CL_MAP_READ, origin, region, nullptr, nullptr, 0, nullptr, nullptr, &retVal); } std::unique_ptr context; std::unique_ptr cmdQ; cl_int retVal = CL_SUCCESS; }; TEST_P(ValidateRegionAndOriginTests, givenAnyZeroRegionParamWhenEnqueueCalledThenReturnError) { std::unique_ptr image(ImageHelper::create(context.get())); EXPECT_NE(nullptr, image.get()); size_t origin[3] = {0, 0, 0}; std::array region = {{0, 1, 1}}; GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); region = {{1, 0, 1}}; GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); region = {{1, 1, 0}}; GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); region = {{0, 0, 0}}; GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_P(ValidateRegionAndOriginTests, givenSecondOriginCoordinateAndNotAllowedImgTypeWhenEnqueueCalledThenReturnError) { size_t region[3] = {1, 1, 1}; size_t origin[3] = {0, 1, 0}; std::unique_ptr image(ImageHelper::create(context.get())); GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); auto image1dBufferDesc = Image1dDefaults::imageDesc; image1dBufferDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; image.reset(ImageHelper::create(context.get(), &image1dBufferDesc)); GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_P(ValidateRegionAndOriginTests, givenThirdOriginCoordinateAndNotAllowedImgTypeWhenEnqueueCalledThenReturnError) { size_t region[3] = {1, 1, 1}; size_t origin[3] = {0, 0, 1}; std::unique_ptr image(ImageHelper::create(context.get())); GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); image.reset(ImageHelper::create(context.get())); GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); image.reset(ImageHelper::create(context.get())); GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); auto image1dBufferDesc = Image1dDefaults::imageDesc; image1dBufferDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; image.reset(ImageHelper::create(context.get(), &image1dBufferDesc)); GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_P(ValidateRegionAndOriginTests, givenSecondRegionCoordinateAndNotAllowedImgTypeWhenEnqueueCalledThenReturnError) { size_t region[3] = {1, 2, 1}; size_t origin[3] = {0, 0, 0}; std::unique_ptr image(ImageHelper::create(context.get())); GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); auto image1dBufferDesc = Image1dDefaults::imageDesc; image1dBufferDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; image.reset(ImageHelper::create(context.get(), &image1dBufferDesc)); GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_P(ValidateRegionAndOriginTests, givenThirdRegionnCoordinateAndNotAllowedImgTypeWhenEnqueueCalledThenReturnError) { size_t region[3] = {1, 1, 2}; size_t origin[3] = {0, 0, 0}; std::unique_ptr image(ImageHelper::create(context.get())); GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); image.reset(ImageHelper::create(context.get())); GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); image.reset(ImageHelper::create(context.get())); GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); auto image1dBufferDesc = Image1dDefaults::imageDesc; image1dBufferDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; image.reset(ImageHelper::create(context.get(), &image1dBufferDesc)); GetParam()(cmdQ.get(), image.get(), origin, ®ion[0], retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); } ImageEnqueueCall enqueueFunctions[8] = { &ValidateRegionAndOriginTests::readImage, &ValidateRegionAndOriginTests::writeImage, &ValidateRegionAndOriginTests::fillImage, &ValidateRegionAndOriginTests::copyImageWithCorrectSrc, &ValidateRegionAndOriginTests::copyImageWithCorrectDst, &ValidateRegionAndOriginTests::copyImageToBuffer, &ValidateRegionAndOriginTests::copyBufferToImage, &ValidateRegionAndOriginTests::mapImage, }; INSTANTIATE_TEST_CASE_P( ValidateRegionAndOriginTests, ValidateRegionAndOriginTests, ::testing::ValuesIn(enqueueFunctions)); compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_map_buffer_tests.inl000066400000000000000000000131011363734646600306400ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueMapBufferTests; TEST_F(clEnqueueMapBufferTests, GivenNullCommandQueueWhenMappingBufferThenInvalidCommandQueueErrorIsReturned) { unsigned int bufferSize = 16; auto pHostMem = new unsigned char[bufferSize]; memset(pHostMem, 0xaa, bufferSize); cl_mem_flags flags = CL_MEM_USE_HOST_PTR; auto buffer = clCreateBuffer( pContext, flags, bufferSize, pHostMem, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); cl_event eventReturned = nullptr; auto ptrResult = clEnqueueMapBuffer( nullptr, buffer, CL_TRUE, CL_MAP_READ, 0, 8, 0, nullptr, &eventReturned, &retVal); EXPECT_EQ(nullptr, ptrResult); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); delete[] pHostMem; clReleaseEvent(eventReturned); } TEST_F(clEnqueueMapBufferTests, GivenValidParametersWhenMappingBufferThenSuccessIsReturned) { unsigned int bufferSize = 16; auto pHostMem = new unsigned char[bufferSize]; memset(pHostMem, 0xaa, bufferSize); cl_mem_flags flags = CL_MEM_USE_HOST_PTR; auto buffer = clCreateBuffer( pContext, flags, bufferSize, pHostMem, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); cl_event eventReturned = nullptr; auto ptrResult = clEnqueueMapBuffer( pCommandQueue, buffer, CL_TRUE, CL_MAP_READ, 0, 8, 0, nullptr, &eventReturned, &retVal); EXPECT_NE(nullptr, ptrResult); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); delete[] pHostMem; clReleaseEvent(eventReturned); } TEST_F(clEnqueueMapBufferTests, GivenMappedPointerWhenCreatingBufferFromThisPointerThenInvalidHostPtrErrorIsReturned) { unsigned int bufferSize = 16; cl_mem buffer = clCreateBuffer(pContext, CL_MEM_READ_WRITE, bufferSize, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); void *hostPointer = clEnqueueMapBuffer(pCommandQueue, buffer, CL_TRUE, CL_MAP_READ, 0, bufferSize, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, hostPointer); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueUnmapMemObject(pCommandQueue, buffer, hostPointer, 0, NULL, NULL); EXPECT_EQ(CL_SUCCESS, retVal); auto bufferFromHostPtr = clCreateBuffer(pContext, CL_MEM_USE_HOST_PTR, bufferSize, hostPointer, &retVal); EXPECT_EQ(CL_INVALID_HOST_PTR, retVal); EXPECT_EQ(nullptr, bufferFromHostPtr); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } class EnqueueMapBufferFlagsTest : public ApiFixture<>, public testing::TestWithParam { public: EnqueueMapBufferFlagsTest() { } protected: void SetUp() override { ApiFixture::SetUp(); buffer_flags = GetParam(); unsigned int bufferSize = 16; pHostMem = new unsigned char[bufferSize]; memset(pHostMem, 0xaa, bufferSize); buffer = clCreateBuffer( pContext, buffer_flags, bufferSize, pHostMem, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); } void TearDown() override { retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); delete[] pHostMem; ApiFixture::TearDown(); } cl_int retVal = CL_SUCCESS; cl_mem_flags buffer_flags = 0; unsigned char *pHostMem; cl_mem buffer; }; typedef EnqueueMapBufferFlagsTest EnqueueMapReadBufferTests; TEST_P(EnqueueMapReadBufferTests, GivenInvalidFlagsWhenMappingBufferForReadingThenInvalidOperationErrorIsReturned) { cl_event eventReturned = nullptr; auto ptrResult = clEnqueueMapBuffer( pCommandQueue, buffer, CL_TRUE, CL_MAP_READ, 0, 8, 0, nullptr, &eventReturned, &retVal); EXPECT_EQ(nullptr, ptrResult); EXPECT_EQ(CL_INVALID_OPERATION, retVal); clReleaseEvent(eventReturned); } static cl_mem_flags NoReadAccessFlags[] = { CL_MEM_USE_HOST_PTR | CL_MEM_HOST_WRITE_ONLY, CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS}; INSTANTIATE_TEST_CASE_P( EnqueueMapBufferFlagsTests_Create, EnqueueMapReadBufferTests, testing::ValuesIn(NoReadAccessFlags)); typedef EnqueueMapBufferFlagsTest EnqueueMapWriteBufferTests; TEST_P(EnqueueMapWriteBufferTests, GivenInvalidFlagsWhenMappingBufferForWritingThenInvalidOperationErrorIsReturned) { cl_event eventReturned = nullptr; auto ptrResult = clEnqueueMapBuffer( pCommandQueue, buffer, CL_TRUE, CL_MAP_WRITE, 0, 8, 0, nullptr, &eventReturned, &retVal); EXPECT_EQ(nullptr, ptrResult); EXPECT_EQ(CL_INVALID_OPERATION, retVal); clReleaseEvent(eventReturned); } static cl_mem_flags NoWriteAccessFlags[] = { CL_MEM_USE_HOST_PTR | CL_MEM_HOST_READ_ONLY, CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS}; INSTANTIATE_TEST_CASE_P( EnqueueMapBufferFlagsTests_Create, EnqueueMapWriteBufferTests, testing::ValuesIn(NoWriteAccessFlags)); compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_map_image_tests.inl000066400000000000000000000122011363734646600304510ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "cl_api_tests.h" using namespace NEO; namespace ULT { struct clEnqueueMapImageTests : public ApiFixture<>, public ::testing::Test { void SetUp() override { ApiFixture::SetUp(); // clang-format off imageFormat.image_channel_order = CL_RGBA; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 32; imageDesc.image_height = 32; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = nullptr; // clang-format on } void TearDown() override { ApiFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; }; TEST_F(clEnqueueMapImageTests, GivenValidParametersWhenMappingImageThenSuccessIsReturned) { auto image = clCreateImage( pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t imageRowPitch = 0; size_t imageSlicePitch = 0; clEnqueueMapImage( pCommandQueue, image, CL_TRUE, CL_MAP_READ, origin, region, &imageRowPitch, &imageSlicePitch, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } struct clEnqueueMapImageYUVTests : public ApiFixture<>, public ::testing::Test { void SetUp() override { ApiFixture::SetUp(); // clang-format off imageFormat.image_channel_order = CL_YUYV_INTEL; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 32; imageDesc.image_height = 32; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = nullptr; // clang-format on } void TearDown() override { ApiFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; }; TEST_F(clEnqueueMapImageYUVTests, GivenValidYuvImageWhenMappingImageThenSuccessIsReturned) { auto image = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); const size_t origin[] = {2, 2, 0}; const size_t region[] = {2, 2, 1}; clEnqueueMapImage( pCommandQueue, image, CL_TRUE, CL_MAP_READ, origin, region, 0, 0, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueMapImageYUVTests, GivenInvalidOriginWhenMappingYuvImageThenInvalidValueErrorIsReturned) { auto image = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); const size_t origin[] = {1, 2, 0}; const size_t region[] = {2, 2, 0}; clEnqueueMapImage( pCommandQueue, image, CL_TRUE, CL_MAP_READ, origin, region, 0, 0, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueMapImageYUVTests, GivenInvalidRegionWhenMappingYuvImageThenInvalidValueErrorIsReturned) { auto image = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); const size_t origin[] = {2, 2, 0}; const size_t region[] = {1, 2, 0}; clEnqueueMapImage( pCommandQueue, image, CL_TRUE, CL_MAP_READ, origin, region, 0, 0, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_marker_tests.inl000066400000000000000000000026761363734646600300320ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/command_queue/enqueue_common.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueMarkerTests; TEST_F(clEnqueueMarkerTests, GivenNullCommandQueueWhenEnqueingMarkerThenInvalidCommandQueueErrorIsReturned) { auto retVal = clEnqueueMarker( nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueMarkerTests, GivenValidCommandQueueWhenEnqueingMarkerThenSuccessIsReturned) { auto retVal = clEnqueueMarker( pCommandQueue, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } class CommandWithoutKernelTypesTests : public testing::TestWithParam { }; TEST_P(CommandWithoutKernelTypesTests, commandWithoutKernelTypes) { unsigned int commandType = GetParam(); EXPECT_TRUE(isCommandWithoutKernel(commandType)); }; TEST_P(CommandWithoutKernelTypesTests, commandZeroType) { EXPECT_FALSE(isCommandWithoutKernel(0)); }; static unsigned int commandWithoutKernelTypes[] = { CL_COMMAND_BARRIER, CL_COMMAND_MARKER, CL_COMMAND_MIGRATE_MEM_OBJECTS, CL_COMMAND_SVM_MAP, CL_COMMAND_SVM_UNMAP, CL_COMMAND_SVM_FREE}; INSTANTIATE_TEST_CASE_P( commandWithoutKernelTypes, CommandWithoutKernelTypesTests, testing::ValuesIn(commandWithoutKernelTypes)); compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_marker_with_wait_list_tests.inl000066400000000000000000000014601363734646600331320ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueMarkerWithWaitListTests; TEST_F(clEnqueueMarkerWithWaitListTests, GivenNullCommandQueueWhenEnqueingMarkerWithWaitListThenInvalidCommandQueueErrorIsReturned) { auto retVal = clEnqueueMarkerWithWaitList( nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueMarkerWithWaitListTests, GivenValidCommandQueueWhenEnqueingMarkerWithWaitListThenSuccessIsReturned) { auto retVal = clEnqueueMarkerWithWaitList( pCommandQueue, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_migrate_mem_objects_tests.inl000066400000000000000000000136201363734646600325370ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/event/event.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueMigrateMemObjectsTests; TEST_F(clEnqueueMigrateMemObjectsTests, GivenNullCommandQueueWhenMigratingMemObjThenInvalidCommandQueueErrorIsReturned) { unsigned int bufferSize = 16; auto pHostMem = new unsigned char[bufferSize]; memset(pHostMem, 0xaa, bufferSize); cl_mem_flags flags = CL_MEM_USE_HOST_PTR; auto buffer = clCreateBuffer( pContext, flags, bufferSize, pHostMem, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); cl_event eventReturned = nullptr; auto Result = clEnqueueMigrateMemObjects( nullptr, 1, &buffer, CL_MIGRATE_MEM_OBJECT_HOST, 0, nullptr, &eventReturned); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, Result); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); delete[] pHostMem; clReleaseEvent(eventReturned); } TEST_F(clEnqueueMigrateMemObjectsTests, GivenValidInputsWhenMigratingMemObjThenSuccessIsReturned) { unsigned int bufferSize = 16; auto pHostMem = new unsigned char[bufferSize]; memset(pHostMem, 0xaa, bufferSize); cl_mem_flags flags = CL_MEM_USE_HOST_PTR; auto buffer = clCreateBuffer( pContext, flags, bufferSize, pHostMem, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); cl_event eventReturned = nullptr; auto Result = clEnqueueMigrateMemObjects( pCommandQueue, 1, &buffer, CL_MIGRATE_MEM_OBJECT_HOST, 0, nullptr, &eventReturned); EXPECT_EQ(CL_SUCCESS, Result); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); delete[] pHostMem; clReleaseEvent(eventReturned); } TEST_F(clEnqueueMigrateMemObjectsTests, GivenNullMemObjsWhenMigratingMemObjThenInvalidValueErrorIsReturned) { cl_event eventReturned = nullptr; auto Result = clEnqueueMigrateMemObjects( pCommandQueue, 1, nullptr, CL_MIGRATE_MEM_OBJECT_HOST, 0, nullptr, &eventReturned); EXPECT_EQ(CL_INVALID_VALUE, Result); } TEST_F(clEnqueueMigrateMemObjectsTests, GivenZeroMemObjectsWhenMigratingMemObjsThenInvalidValueErrorIsReturned) { cl_event eventReturned = nullptr; auto Result = clEnqueueMigrateMemObjects( pCommandQueue, 0, nullptr, CL_MIGRATE_MEM_OBJECT_HOST, 0, nullptr, &eventReturned); EXPECT_EQ(CL_INVALID_VALUE, Result); } TEST_F(clEnqueueMigrateMemObjectsTests, GivenNonZeroEventsAndNullWaitlistWhenMigratingMemObjThenInvalidWaitListErrorIsReturned) { cl_event eventReturned = nullptr; auto Result = clEnqueueMigrateMemObjects( pCommandQueue, 0, nullptr, CL_MIGRATE_MEM_OBJECT_HOST, 2, nullptr, &eventReturned); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, Result); } TEST_F(clEnqueueMigrateMemObjectsTests, GivenZeroEventsAndNonNullWaitlistWhenMigratingMemObjsThenInvalidWaitListErrorIsReturned) { cl_event eventReturned = nullptr; Event event(pCommandQueue, CL_COMMAND_MIGRATE_MEM_OBJECTS, 0, 0); auto Result = clEnqueueMigrateMemObjects( pCommandQueue, 0, nullptr, CL_MIGRATE_MEM_OBJECT_HOST, 0, (cl_event *)&event, &eventReturned); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, Result); } TEST_F(clEnqueueMigrateMemObjectsTests, GivenValidFlagsWhenMigratingMemObjsThenSuccessIsReturned) { unsigned int bufferSize = 16; auto pHostMem = new unsigned char[bufferSize]; memset(pHostMem, 0xaa, bufferSize); cl_mem_flags flags = CL_MEM_USE_HOST_PTR; auto buffer = clCreateBuffer( pContext, flags, bufferSize, pHostMem, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); cl_mem_migration_flags validFlags[] = {0, CL_MIGRATE_MEM_OBJECT_HOST, CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED, CL_MIGRATE_MEM_OBJECT_HOST | CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED}; for (auto validFlag : validFlags) { cl_event eventReturned = nullptr; auto Result = clEnqueueMigrateMemObjects( pCommandQueue, 1, &buffer, validFlag, 0, nullptr, &eventReturned); EXPECT_EQ(CL_SUCCESS, Result); clReleaseEvent(eventReturned); } retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); delete[] pHostMem; } TEST_F(clEnqueueMigrateMemObjectsTests, GivenInvalidFlagsWhenMigratingMemObjsThenInvalidValueErrorIsReturned) { unsigned int bufferSize = 16; auto pHostMem = new unsigned char[bufferSize]; memset(pHostMem, 0xaa, bufferSize); cl_mem_flags flags = CL_MEM_USE_HOST_PTR; auto buffer = clCreateBuffer( pContext, flags, bufferSize, pHostMem, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); cl_mem_migration_flags invalidFlags[] = {(cl_mem_migration_flags)0xffffffff, CL_MIGRATE_MEM_OBJECT_HOST | (1 << 10), CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED | (1 << 10), (cl_mem_migration_flags)12345}; for (auto invalidFlag : invalidFlags) { cl_event eventReturned = nullptr; auto Result = clEnqueueMigrateMemObjects( pCommandQueue, 1, &buffer, invalidFlag, 0, nullptr, &eventReturned); EXPECT_EQ(CL_INVALID_VALUE, Result); clReleaseEvent(eventReturned); } retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); delete[] pHostMem; } compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_native_kernel_tests.inl000066400000000000000000000013401363734646600313620ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueNativeKernelTests; namespace ULT { TEST_F(clEnqueueNativeKernelTests, GivenAnyParametersWhenExecutingNativeKernelThenOutOfHostMemoryErrorIsReturned) { auto retVal = clEnqueueNativeKernel( nullptr, // commandQueue nullptr, // user_func nullptr, // args 0u, // cb_args 0, // num_mem_objects nullptr, // mem_list nullptr, // args_mem_loc 0, // num_events nullptr, //event_list nullptr // event ); EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_nd_range_kernel_tests.inl000066400000000000000000000070521363734646600316570ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueNDRangeKernelTests; namespace ULT { TEST_F(clEnqueueNDRangeKernelTests, GivenValidParametersWhenExecutingKernelThenSuccessIsReturned) { cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; retVal = clEnqueueNDRangeKernel( pCommandQueue, pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueNDRangeKernelTests, GivenNullCommandQueueWhenExecutingKernelThenInvalidCommandQueueErrorIsReturned) { size_t globalWorkSize[3] = {1, 1, 1}; retVal = clEnqueueNDRangeKernel( nullptr, pKernel, 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueNDRangeKernelTests, GivenNonZeroEventsAndEmptyEventWaitListWhenExecutingKernelThenInvalidEventWaitListErrorIsReturned) { cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; cl_uint numEventsInWaitList = 1; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; retVal = clEnqueueNDRangeKernel( pCommandQueue, pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(clEnqueueNDRangeKernelTests, GivenConcurrentKernelWhenExecutingKernelThenInvalidKernelErrorIsReturned) { cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; pKernel->executionType = KernelExecutionType::Concurrent; retVal = clEnqueueNDRangeKernel( pCommandQueue, pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_INVALID_KERNEL, retVal); } TEST_F(clEnqueueNDRangeKernelTests, GivenKernelWithAllocateSyncBufferPatchWhenExecutingKernelThenInvalidKernelErrorIsReturned) { cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; SPatchAllocateSyncBuffer patchAllocateSyncBuffer; pProgram->mockKernelInfo.patchInfo.pAllocateSyncBuffer = &patchAllocateSyncBuffer; EXPECT_TRUE(pKernel->isUsingSyncBuffer()); retVal = clEnqueueNDRangeKernel( pCommandQueue, pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_INVALID_KERNEL, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_read_buffer_rect_tests.inl000066400000000000000000000177571363734646600320400ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueReadBufferRectTest; namespace ULT { TEST_F(clEnqueueReadBufferRectTest, GivenInvalidBufferWhenReadingRectangularRegionThenInvalidMemObjectErrorIsReturned) { auto buffer = (cl_mem)ptrGarbage; size_t buffOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 0}; char ptr[10]; auto retVal = clEnqueueReadBufferRect( pCommandQueue, buffer, CL_FALSE, buffOrigin, hostOrigin, region, 10, //bufferRowPitch 0, //bufferSlicePitch 10, //hostRowPitch 0, //hostSlicePitch ptr, //hostPtr 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clEnqueueReadBufferRectTest, GivenNullCommandQueueWhenReadingRectangularRegionThenInvalidCommandQueueErrorIsReturned) { auto buffer = (cl_mem)ptrGarbage; size_t buffOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 0}; char ptr[10]; auto retVal = clEnqueueReadBufferRect( nullptr, buffer, CL_FALSE, buffOrigin, hostOrigin, region, 10, //bufferRowPitch 0, //bufferSlicePitch 10, //hostRowPitch 0, //hostSlicePitch ptr, //hostPtr 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueReadBufferRectTest, GivenNullHostPtrWhenReadingRectangularRegionThenInvalidValueErrorIsReturned) { auto buffer = clCreateBuffer( pContext, CL_MEM_READ_WRITE, 20, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); size_t buffOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 0}; auto retVal = clEnqueueReadBufferRect( pCommandQueue, buffer, CL_FALSE, buffOrigin, hostOrigin, region, 10, //bufferRowPitch 0, //bufferSlicePitch 10, //hostRowPitch 0, //hostSlicePitch nullptr, //hostPtr 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueReadBufferRectTest, GivenValidParametersWhenReadingRectangularRegionThenSuccessIsReturned) { auto buffer = clCreateBuffer( pContext, CL_MEM_READ_WRITE, 20, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); char ptr[10]; size_t buffOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 0}; auto retVal = clEnqueueReadBufferRect( pCommandQueue, buffer, CL_FALSE, buffOrigin, hostOrigin, region, 10, //bufferRowPitch 0, //bufferSlicePitch 10, //hostRowPitch 0, //hostSlicePitch ptr, //hostPtr 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseMemObject(buffer); } TEST_F(clEnqueueReadBufferRectTest, GivenInvalidPitchWhenReadingRectangularRegionThenInvalidValueErrorIsReturned) { auto buffer = clCreateBuffer( pContext, CL_MEM_READ_WRITE, 20, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); char ptr[10]; size_t buffOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 0}; size_t bufferRowPitch = 9; auto retVal = clEnqueueReadBufferRect( pCommandQueue, buffer, CL_FALSE, buffOrigin, hostOrigin, region, bufferRowPitch, //bufferRowPitch 0, //bufferSlicePitch 10, //hostRowPitch 0, //hostSlicePitch ptr, //hostPtr 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); size_t hostRowPitch = 9; retVal = clEnqueueReadBufferRect( pCommandQueue, buffer, CL_FALSE, buffOrigin, hostOrigin, region, 10, //bufferRowPitch 0, //bufferSlicePitch hostRowPitch, //hostRowPitch 0, //hostSlicePitch ptr, //hostPtr 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); size_t bufferSlicePitch = 9; retVal = clEnqueueReadBufferRect( pCommandQueue, buffer, CL_FALSE, buffOrigin, hostOrigin, region, 10, //bufferRowPitch bufferSlicePitch, //bufferSlicePitch 10, //hostRowPitch 0, //hostSlicePitch ptr, //hostPtr 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); size_t hostSlicePitch = 9; retVal = clEnqueueReadBufferRect( pCommandQueue, buffer, CL_FALSE, buffOrigin, hostOrigin, region, 10, //bufferRowPitch 0, //bufferSlicePitch 10, //hostRowPitch hostSlicePitch, //hostSlicePitch ptr, //hostPtr 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); clReleaseMemObject(buffer); } class EnqueueReadBufferRectFlagsTest : public ApiFixture<>, public testing::TestWithParam { public: EnqueueReadBufferRectFlagsTest() { } protected: void SetUp() override { ApiFixture::SetUp(); buffer_flags = GetParam(); unsigned int bufferSize = 16; pHostMem = new unsigned char[bufferSize]; memset(pHostMem, 0xaa, bufferSize); buffer = clCreateBuffer( pContext, buffer_flags, bufferSize, pHostMem, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); } void TearDown() override { retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); delete[] pHostMem; ApiFixture::TearDown(); } cl_int retVal = CL_SUCCESS; cl_mem_flags buffer_flags = 0; unsigned char *pHostMem; cl_mem buffer; }; typedef EnqueueReadBufferRectFlagsTest EnqueueReadReadBufferRectTests; TEST_P(EnqueueReadReadBufferRectTests, GivenNoReadFlagsWhenReadingRectangularRegionThenInvalidOperationErrorIsReturned) { size_t buffOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 0}; char ptr[10]; cl_event eventReturned = nullptr; retVal = clEnqueueReadBufferRect( pCommandQueue, buffer, CL_FALSE, buffOrigin, hostOrigin, region, 10, //bufferRowPitch 0, //bufferSlicePitch 10, //hostRowPitch 0, //hostSlicePitch ptr, //hostPtr 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); clReleaseEvent(eventReturned); } static cl_mem_flags read_buffer_rect_flags[] = { CL_MEM_USE_HOST_PTR | CL_MEM_HOST_WRITE_ONLY, CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS}; INSTANTIATE_TEST_CASE_P( EnqueueReadBufferRectFlagsTests_Create, EnqueueReadReadBufferRectTests, testing::ValuesIn(read_buffer_rect_flags)); } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_read_buffer_tests.inl000066400000000000000000000074031363734646600310060ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "opencl/source/cl_device/cl_device_info.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueReadBufferTests; namespace ULT { TEST_F(clEnqueueReadBufferTests, GivenNullCommandQueueWhenReadingBufferThenInvalidCommandQueueErrorIsReturned) { auto data = 1; auto retVal = clEnqueueReadBuffer( nullptr, nullptr, false, 0, sizeof(data), &data, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } class EnqueueReadBufferFlagsTest : public ApiFixture<>, public testing::TestWithParam { protected: void SetUp() override { ApiFixture::SetUp(); buffer_flags = GetParam(); unsigned int bufferSize = 16; pHostMem = new unsigned char[bufferSize]; memset(pHostMem, 0xaa, bufferSize); buffer = clCreateBuffer( pContext, buffer_flags, bufferSize, pHostMem, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); } void TearDown() override { retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); delete[] pHostMem; ApiFixture::TearDown(); } cl_int retVal = CL_SUCCESS; cl_mem_flags buffer_flags = 0; unsigned char *pHostMem; cl_mem buffer; }; typedef EnqueueReadBufferFlagsTest EnqueueReadReadBufferTests; TEST_P(EnqueueReadReadBufferTests, GivenNoReadFlagsWhenReadingBufferThenInvalidOperationErrorIsReturned) { auto data = 1; cl_event eventReturned = nullptr; retVal = clEnqueueReadBuffer( pCommandQueue, buffer, CL_TRUE, 0, sizeof(data), &data, 0, nullptr, &eventReturned); EXPECT_EQ(CL_INVALID_OPERATION, retVal); clReleaseEvent(eventReturned); } static cl_mem_flags read_buffer_flags[] = { CL_MEM_USE_HOST_PTR | CL_MEM_HOST_WRITE_ONLY, CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS}; INSTANTIATE_TEST_CASE_P( EnqueueReadBufferFlagsTests_Create, EnqueueReadReadBufferTests, testing::ValuesIn(read_buffer_flags)); } // namespace ULT class EnqueueReadBufferTest : public api_tests { public: EnqueueReadBufferTest() {} protected: cl_mem buffer = nullptr; cl_int retVal = CL_SUCCESS; unsigned char *pHostMem = nullptr; unsigned int bufferSize = 0; void SetUp() override { api_tests::SetUp(); bufferSize = 16; pHostMem = new unsigned char[bufferSize]; memset(pHostMem, 0xaa, bufferSize); buffer = clCreateBuffer( pContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, bufferSize, pHostMem, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); } void TearDown() override { retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); delete[] pHostMem; api_tests::TearDown(); } }; TEST_F(EnqueueReadBufferTest, GivenSvmPtrWhenReadingBufferThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(testedRootDeviceIndex)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { auto data = clSVMAlloc(pContext, CL_MEM_READ_WRITE, bufferSize, 64); auto retVal = clEnqueueReadBuffer(pCommandQueue, buffer, CL_TRUE, bufferSize, 0, data, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, data); } } compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_read_image_tests.inl000066400000000000000000000116211363734646600306140ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/surface_formats.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueReadImageTests; namespace ULT { struct clEnqueueReadImageTests : public ApiFixture<>, public ::testing::Test { void SetUp() override { ApiFixture::SetUp(); // clang-format off imageFormat.image_channel_order = CL_YUYV_INTEL; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 32; imageDesc.image_height = 32; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = nullptr; // clang-format on } void TearDown() override { ApiFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; }; TEST_F(clEnqueueReadImageTests, GivenNullCommandQueueWhenReadingImageThenInvalidCommandQueueErrorIsReturned) { auto retVal = clEnqueueReadImage( nullptr, nullptr, false, nullptr, nullptr, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueReadImageTests, GivenNullImageWhenReadingImageThenInvalidMemObjectErrorIsReturned) { auto retVal = clEnqueueReadImage( pCommandQueue, nullptr, false, nullptr, nullptr, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clEnqueueReadImageTests, GivenValidParametersWhenReadinImagesThenSuccessIsReturned) { imageFormat.image_channel_order = CL_RGBA; auto image = clCreateImage( pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); const size_t origin[] = {2, 2, 0}; const size_t region[] = {2, 2, 1}; auto retVal = clEnqueueReadImage( pCommandQueue, image, false, origin, region, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } typedef clEnqueueReadImageTests clEnqueueReadImageYuv; TEST_F(clEnqueueReadImageYuv, GivenValidYuvImageWhenReadingImageThenSuccessIsReturned) { auto image = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); const size_t origin[] = {2, 2, 0}; const size_t region[] = {2, 2, 1}; auto retVal = clEnqueueReadImage( pCommandQueue, image, false, origin, region, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueReadImageYuv, GivenInvalidOriginWhenReadingYuvImageThenInvalidValueErrorIsReturned) { auto image = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); const size_t origin[] = {1, 2, 0}; const size_t region[] = {2, 2, 0}; auto retVal = clEnqueueReadImage( pCommandQueue, image, false, origin, region, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueReadImageYuv, GivenInvalidRegionWhenReadingYuvImageThenInvalidValueErrorIsReturned) { auto image = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); const size_t origin[] = {2, 2, 0}; const size_t region[] = {1, 2, 0}; auto retVal = clEnqueueReadImage( pCommandQueue, image, false, origin, region, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_svm_free_tests.inl000066400000000000000000000167641363734646600303620ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueSVMFreeTests; namespace ULT { TEST_F(clEnqueueSVMFreeTests, GivenInvalidCommandQueueWhenFreeingSVMThenInvalidCommandQueueErrorIsReturned) { auto retVal = clEnqueueSVMFree( nullptr, // cl_command_queue command_queue 0, // cl_uint num_svm_pointers nullptr, // void *svm_pointers[] nullptr, // (CL_CALLBACK *pfn_free_func) ( cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) nullptr, // void *user_data 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueSVMFreeTests, GivenNonZeroNumOfSVMPointersAndNullSVMPointersWhenFreeingSVMThenInvalidValueErrorIsReturned) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { auto retVal = clEnqueueSVMFree( pCommandQueue, // cl_command_queue command_queue 1, // cl_uint num_svm_pointers nullptr, // void *svm_pointers[] nullptr, // (CL_CALLBACK *pfn_free_func) ( cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) nullptr, // void *user_data 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } } TEST_F(clEnqueueSVMFreeTests, GivenZeroNumOfSVMPointersAndNonNullSVMPointersWhenFreeingSVMThenInvalidValueErrorIsReturned) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm); void *svmPtrs[] = {ptrSvm}; auto retVal = clEnqueueSVMFree( pCommandQueue, // cl_command_queue command_queue 0, // cl_uint num_svm_pointers svmPtrs, // void *svm_pointers[] nullptr, // (CL_CALLBACK *pfn_free_func) ( cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) nullptr, // void *user_data 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMFreeTests, GivenNonZeroNumOfEventsAndNullEventListWhenFreeingSVMThenInvalidEventWaitListErrorIsReturned) { auto retVal = clEnqueueSVMFree( pCommandQueue, // cl_command_queue command_queue 0, // cl_uint num_svm_pointers nullptr, // void *svm_pointers[] nullptr, // (CL_CALLBACK *pfn_free_func) ( cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) nullptr, // void *user_data 1, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(clEnqueueSVMFreeTests, GivenZeroNumOfEventsAndNonNullEventListWhenFreeingSVMThenInvalidEventWaitListErrorIsReturned) { UserEvent uEvent(pContext); cl_event eventWaitList[] = {&uEvent}; auto retVal = clEnqueueSVMFree( pCommandQueue, // cl_command_queue command_queue 0, // cl_uint num_svm_pointers nullptr, // void *svm_pointers[] nullptr, // (CL_CALLBACK *pfn_free_func) ( cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) nullptr, // void *user_data 0, // cl_uint num_events_in_wait_list eventWaitList, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(clEnqueueSVMFreeTests, GivenNonZeroNumOfSVMPointersAndNonNullSVMPointersWhenFreeingSVMThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm); void *svmPtrs[] = {ptrSvm}; auto retVal = clEnqueueSVMFree( pCommandQueue, // cl_command_queue command_queue 1, // cl_uint num_svm_pointers svmPtrs, // void *svm_pointers[] nullptr, // (CL_CALLBACK *pfn_free_func) ( cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) nullptr, // void *user_data 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMFreeTests, GivenZeroNumOfSVMPointersAndNullSVMPointersWhenFreeingSVMThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { auto retVal = clEnqueueSVMFree( pCommandQueue, // cl_command_queue command_queue 0, // cl_uint num_svm_pointers nullptr, // void *svm_pointers[] nullptr, // (CL_CALLBACK *pfn_free_func) ( cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) nullptr, // void *user_data 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } } TEST_F(clEnqueueSVMFreeTests, GivenDeviceNotSupportingSvmWhenEnqueuingSVMFreeThenInvalidOperationErrorIsReturned) { auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.ftrSvm = false; auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); cl_device_id deviceId = pDevice.get(); auto pContext = std::unique_ptr(Context::create(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal)); auto pCommandQueue = std::make_unique(pContext.get(), pDevice.get(), nullptr); auto retVal = clEnqueueSVMFree( pCommandQueue.get(), // cl_command_queue command_queue 0, // cl_uint num_svm_pointers nullptr, // void *svm_pointers[] nullptr, // (CL_CALLBACK *pfn_free_func) ( cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) nullptr, // void *user_data 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_svm_map_tests.inl000066400000000000000000000135231363734646600302040ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueSVMMapTests; namespace ULT { TEST_F(clEnqueueSVMMapTests, GivenInvalidCommandQueueWhenMappingSVMThenInvalidCommandQueueErrorIsReturned) { auto retVal = clEnqueueSVMMap( nullptr, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags nullptr, // void *svm_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueSVMMapTests, GivenNullSVMPointerWhenMappingSVMThenInvalidValueErrorIsReturned) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { auto retVal = clEnqueueSVMMap( pCommandQueue, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags nullptr, // void *svm_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } } TEST_F(clEnqueueSVMMapTests, GivenRegionSizeZeroWhenMappingSVMThenInvalidValueErrorIsReturned) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm); auto retVal = clEnqueueSVMMap( pCommandQueue, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags ptrSvm, // void *svm_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMMapTests, GivenNullEventWaitListAndNonZeroNumEventsWhenMappingSVMThenInvalidEventWaitListErrorIsReturned) { auto retVal = clEnqueueSVMMap( pCommandQueue, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags nullptr, // void *svm_ptr 0, // size_t size 1, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(clEnqueueSVMMapTests, GivenNonNullEventWaitListAndZeroNumEventsWhenMappingSVMThenInvalidEventWaitListErrorIsReturned) { UserEvent uEvent(pContext); cl_event eventWaitList[] = {&uEvent}; auto retVal = clEnqueueSVMMap( pCommandQueue, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags nullptr, // void *svm_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list eventWaitList, // const cL_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(clEnqueueSVMMapTests, GivenValidParametersWhenMappingSVMThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm); auto retVal = clEnqueueSVMMap( pCommandQueue, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags ptrSvm, // void *svm_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMMapTests, GivenDeviceNotSupportingSvmWhenEnqueuingSVMMapThenInvalidOperationErrorIsReturned) { auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.ftrSvm = false; auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); cl_device_id deviceId = pDevice.get(); auto pContext = std::unique_ptr(Context::create(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal)); auto pCommandQueue = std::make_unique(pContext.get(), pDevice.get(), nullptr); auto retVal = clEnqueueSVMMap( pCommandQueue.get(), // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags nullptr, // void *svm_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_svm_mem_fill_tests.inl000066400000000000000000000134751363734646600312210ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueSVMMemFillTests; namespace ULT { TEST_F(clEnqueueSVMMemFillTests, GivenInvalidCommandQueueWhenFillingSVMMemoryThenInvalidCommandQueueErrorIsReturned) { auto retVal = clEnqueueSVMMemFill( nullptr, // cl_command_queue command_queue nullptr, // void *svm_ptr nullptr, // const void *pattern 0, // size_t pattern_size 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueSVMMemFillTests, GivenNullSVMPtrWhenFillingSVMMemoryThenInvalidValueErrorIsReturned) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { auto retVal = clEnqueueSVMMemFill( pCommandQueue, // cl_command_queue command_queue nullptr, // void *svm_ptr nullptr, // const void *pattern 0, // size_t pattern_size 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } } TEST_F(clEnqueueSVMMemFillTests, GivenRegionSizeZeroWhenFillingSVMMemoryThenInvalidValueErrorIsReturned) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm); auto retVal = clEnqueueSVMMemFill( pCommandQueue, // cl_command_queue command_queue ptrSvm, // void *svm_ptr nullptr, // const void *pattern 0, // size_t pattern_size 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMMemFillTests, GivenNullEventWaitListAndNonZeroEventsWhenFillingSVMMemoryThenInvalidEventWaitListIsReturned) { auto retVal = clEnqueueSVMMemFill( pCommandQueue, // cl_command_queue command_queue nullptr, // void *svm_ptr nullptr, // const void *pattern 0, // size_t pattern_size 0, // size_t size 1, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(clEnqueueSVMMemFillTests, GivenNonNullEventWaitListAndZeroEventsWhenFillingSVMMemoryThenInvalidEventWaitListIsReturned) { UserEvent uEvent(pContext); cl_event eventWaitList[] = {&uEvent}; auto retVal = clEnqueueSVMMemFill( pCommandQueue, // cl_command_queue command_queue nullptr, // void *svm_ptr nullptr, // const void *pattern 0, // size_t pattern_size 0, // size_t size 0, // cl_uint num_events_in_wait_list eventWaitList, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(clEnqueueSVMMemFillTests, GivenValidParametersWhenFillingSVMMemoryThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm); auto retVal = clEnqueueSVMMemFill( pCommandQueue, // cl_command_queue command_queue ptrSvm, // void *svm_ptr nullptr, // const void *pattern 0, // size_t pattern_size 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMMemFillTests, GivenDeviceNotSupportingSvmWhenEnqueuingSVMMemFillThenInvalidOperationErrorIsReturned) { auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.ftrSvm = false; auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); cl_device_id deviceId = pDevice.get(); auto pContext = std::unique_ptr(Context::create(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal)); auto pCommandQueue = std::make_unique(pContext.get(), pDevice.get(), nullptr); auto retVal = clEnqueueSVMMemFill( pCommandQueue.get(), // cl_command_queue command_queue nullptr, // void *svm_ptr nullptr, // const void *pattern 0, // size_t pattern_size 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_svm_memcpy_tests.inl000066400000000000000000000162311363734646600307200ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueSVMMemcpyTests; namespace ULT { TEST_F(clEnqueueSVMMemcpyTests, GivenInvalidCommandQueueWhenCopyingSVMMemoryThenInvalidCommandQueueErrorIsReturned) { auto retVal = clEnqueueSVMMemcpy( nullptr, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_copy nullptr, // void *dst_ptr nullptr, // const void *src_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueSVMMemcpyTests, GivenNullDstPtrWhenCopyingSVMMemoryThenInvalidValueErrorIsReturned) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *pSrcSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, pSrcSvm); auto retVal = clEnqueueSVMMemcpy( pCommandQueue, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_copy nullptr, // void *dst_ptr pSrcSvm, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); clSVMFree(pContext, pSrcSvm); } } TEST_F(clEnqueueSVMMemcpyTests, GivenNullSrcPtrWhenCopyingSVMMemoryThenInvalidValueErrorIsReturned) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *pDstSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, pDstSvm); auto retVal = clEnqueueSVMMemcpy( pCommandQueue, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_copy pDstSvm, // void *dst_ptr nullptr, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); clSVMFree(pContext, pDstSvm); } } TEST_F(clEnqueueSVMMemcpyTests, GivenNonZeroEventsAndNullEventListWhenCopyingSVMMemoryThenInvalidEventWaitListErrorIsReturned) { auto retVal = clEnqueueSVMMemcpy( pCommandQueue, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_copy nullptr, // void *dst_ptr nullptr, // const void *src_ptr 0, // size_t size 1, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(clEnqueueSVMMemcpyTests, GivenZeroEventsAndNonNullEventListWhenCopyingSVMMemoryThenInvalidEventWaitListErrorIsReturned) { UserEvent uEvent(pContext); cl_event eventWaitList[] = {&uEvent}; auto retVal = clEnqueueSVMMemcpy( pCommandQueue, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_copy nullptr, // void *dst_ptr nullptr, // const void *src_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list eventWaitList, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(clEnqueueSVMMemcpyTests, GivenNonZeroSizeWhenCopyingSVMMemoryThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *pDstSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, pDstSvm); void *pSrcSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, pSrcSvm); auto retVal = clEnqueueSVMMemcpy( pCommandQueue, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_copy pDstSvm, // void *dst_ptr pSrcSvm, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, pDstSvm); clSVMFree(pContext, pSrcSvm); } } TEST_F(clEnqueueSVMMemcpyTests, GivenZeroSizeWhenCopyingSVMMemoryThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *pDstSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, pDstSvm); void *pSrcSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, pSrcSvm); auto retVal = clEnqueueSVMMemcpy( pCommandQueue, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_copy pDstSvm, // void *dst_ptr pSrcSvm, // const void *src_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, pDstSvm); clSVMFree(pContext, pSrcSvm); } } TEST_F(clEnqueueSVMMemcpyTests, GivenDeviceNotSupportingSvmWhenEnqueuingSVMMemcpyThenInvalidOperationErrorIsReturned) { auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.ftrSvm = false; auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); cl_device_id deviceId = pDevice.get(); auto pContext = std::unique_ptr(Context::create(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal)); auto pCommandQueue = std::make_unique(pContext.get(), pDevice.get(), nullptr); auto retVal = clEnqueueSVMMemcpy( pCommandQueue.get(), // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_copy nullptr, // void *dst_ptr nullptr, // const void *src_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_svm_migrate_mem_tests.cpp000066400000000000000000000333271363734646600317210ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "cl_api_tests.h" #include using namespace NEO; typedef api_tests clEnqueueSVMMigrateMemTests; namespace ULT { TEST_F(clEnqueueSVMMigrateMemTests, GivenInvalidCommandQueueWhenMigratingSVMThenInvalidCommandQueueErrorIsReturned) { auto retVal = clEnqueueSVMMigrateMem( nullptr, // cl_command_queue command_queue 0, // cl_uint num_svm_pointers nullptr, // const void **svm_pointers nullptr, // const size_t *sizes 0, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueSVMMigrateMemTests, GivenNullSvmPointersWhenMigratingSvmThenInvalidValueErrorIsReturned) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { auto retVal = clEnqueueSVMMigrateMem( pCommandQueue, // cl_command_queue command_queue 1, // cl_uint num_svm_pointers nullptr, // const void **svm_pointers nullptr, // const size_t *sizes 0, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } } TEST_F(clEnqueueSVMMigrateMemTests, GivenNumSvmPointersIsZeroWhenMigratingSvmThenInvalidValueErrorIsReturned) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); ASSERT_NE(nullptr, ptrSvm); const void *svmPtrs[] = {ptrSvm}; auto retVal = clEnqueueSVMMigrateMem( pCommandQueue, // cl_command_queue command_queue 0, // cl_uint num_svm_pointers svmPtrs, // const void **svm_pointers nullptr, // const size_t *sizes 0, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMMigrateMemTests, GivenSvmPointerIsHostPtrWhenMigratingSvmThenInvalidValueErrorIsReturned) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities == 0) { GTEST_SKIP(); } char *ptrHost = new char[10]; ASSERT_NE(nullptr, ptrHost); const void *svmPtrs[] = {ptrHost}; auto retVal = clEnqueueSVMMigrateMem( pCommandQueue, // cl_command_queue command_queue 1, // cl_uint num_svm_pointers svmPtrs, // const void **svm_pointers nullptr, // const size_t *sizes 0, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); delete[] ptrHost; } TEST_F(clEnqueueSVMMigrateMemTests, GivenNonZeroSizeIsNotContainedWithinAllocationWhenMigratingSvmThenInvalidValueErrorIsReturned) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); ASSERT_NE(nullptr, ptrSvm); auto svmData = pContext->getSVMAllocsManager()->getSVMAlloc(ptrSvm); ASSERT_NE(nullptr, svmData); auto svmAlloc = svmData->gpuAllocation; EXPECT_NE(nullptr, svmAlloc); size_t allocSize = svmAlloc->getUnderlyingBufferSize(); const void *svmPtrs[] = {ptrSvm}; const size_t sizes[] = {allocSize + 1}; auto retVal = clEnqueueSVMMigrateMem( pCommandQueue, // cl_command_queue command_queue 1, // cl_uint num_svm_pointers svmPtrs, // const void **svm_pointers sizes, // const size_t *sizes 0, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMMigrateMemTests, GivenUnsupportedFlagsWhenMigratingSvmThenInvalidValueErrorIsReturned) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); ASSERT_NE(nullptr, ptrSvm); const void *svmPtrs[] = {ptrSvm}; auto retVal = clEnqueueSVMMigrateMem( pCommandQueue, // cl_command_queue command_queue 1, // cl_uint num_svm_pointers svmPtrs, // const void **svm_pointers nullptr, // const size_t *sizes 0xAA55AA55AA55AA55, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMMigrateMemTests, GivenNullEventWaitListAndNonZeroNumEventsWhenMigratingSvmThenInvalidEventWaitListErrorIsReturned) { auto retVal = clEnqueueSVMMigrateMem( pCommandQueue, // cl_command_queue command_queue 0, // cl_uint num_svm_pointers nullptr, // const void **svm_pointers nullptr, // const size_t *sizes 0, // const cl_mem_migration_flags flags 1, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(clEnqueueSVMMigrateMemTests, GivenNonNullEventWaitListAndZeroNumEventsWhenMigratingSvmThenInvalidEventWaitListErrorIsReturned) { UserEvent uEvent(pContext); cl_event eventWaitList[] = {&uEvent}; auto retVal = clEnqueueSVMMigrateMem( pCommandQueue, // cl_command_queue command_queue 0, // cl_uint num_svm_pointers nullptr, // const void **svm_pointers nullptr, // const size_t *sizes 0, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list eventWaitList, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(clEnqueueSVMMigrateMemTests, GivenDifferentContextCommandQueueAndEventsWhenMigratingSvmThenInvalidContextErrorIsReturned) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); ASSERT_NE(nullptr, ptrSvm); MockContext mockContext; UserEvent uEvent(&mockContext); cl_event eventWaitList[] = {&uEvent}; const void *svmPtrs[] = {ptrSvm}; auto retVal = clEnqueueSVMMigrateMem( pCommandQueue, // cl_command_queue command_queue 1, // cl_uint num_svm_pointers svmPtrs, // const void **svm_pointers nullptr, // const size_t *sizes 0, // const cl_mem_migration_flags flags 1, // cl_uint num_events_in_wait_list eventWaitList, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMMigrateMemTests, GivenNullSizesWhenMigratingSvmThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); ASSERT_NE(nullptr, ptrSvm); const void *svmPtrs[] = {ptrSvm}; auto retVal = clEnqueueSVMMigrateMem( pCommandQueue, // cl_command_queue command_queue 1, // cl_uint num_svm_pointers svmPtrs, // const void **svm_pointers nullptr, // const size_t *sizes 0, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMMigrateMemTests, GivenSizeZeroWhenMigratingSvmThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); ASSERT_NE(nullptr, ptrSvm); const void *svmPtrs[] = {ptrSvm}; const size_t sizes[] = {0}; auto retVal = clEnqueueSVMMigrateMem( pCommandQueue, // cl_command_queue command_queue 1, // cl_uint num_svm_pointers svmPtrs, // const void **svm_pointers sizes, // const size_t *sizes 0, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMMigrateMemTests, GivenNonZeroSizeWhenMigratingSvmThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); ASSERT_NE(nullptr, ptrSvm); const void *svmPtrs[] = {ptrSvm}; const size_t sizes[] = {256}; auto retVal = clEnqueueSVMMigrateMem( pCommandQueue, // cl_command_queue command_queue 1, // cl_uint num_svm_pointers svmPtrs, // const void **svm_pointers sizes, // const size_t *sizes 0, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMMigrateMemTests, GivenSameContextCommandQueueAndEventsWhenMigratingSvmThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); ASSERT_NE(nullptr, ptrSvm); UserEvent uEvent(pContext); cl_event eventWaitList[] = {&uEvent}; const void *svmPtrs[] = {ptrSvm}; auto retVal = clEnqueueSVMMigrateMem( pCommandQueue, // cl_command_queue command_queue 1, // cl_uint num_svm_pointers svmPtrs, // const void **svm_pointers nullptr, // const size_t *sizes 0, // const cl_mem_migration_flags flags 1, // cl_uint num_events_in_wait_list eventWaitList, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMMigrateMemTests, GivenDeviceNotSupportingSvmWhenEnqueuingSVMMigrateMemThenInvalidOperationErrorIsReturned) { auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.ftrSvm = false; auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); cl_device_id deviceId = pDevice.get(); auto pContext = std::unique_ptr(Context::create(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal)); auto pCommandQueue = std::make_unique(pContext.get(), pDevice.get(), nullptr); auto retVal = clEnqueueSVMMigrateMem( pCommandQueue.get(), // cl_command_queue command_queue 1, // cl_uint num_svm_pointers nullptr, // const void **svm_pointers nullptr, // const size_t *sizes 0, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_svm_unmap_tests.inl000066400000000000000000000110621363734646600305430ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueSVMUnmapTests; namespace ULT { TEST_F(clEnqueueSVMUnmapTests, GivenInvalidCommandQueueWhenUnmappingSvmThenInvalidCommandQueueErrorIsReturned) { auto retVal = clEnqueueSVMUnmap( nullptr, // cl_command_queue command_queue nullptr, // void *svm_ptr 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueSVMUnmapTests, GivenNullSvmPtrWhenUnmappingSvmThenInvalidValueErrorIsReturned) { auto retVal = clEnqueueSVMUnmap( pCommandQueue, // cl_command_queue command_queue nullptr, // void *svm_ptr 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clEnqueueSVMUnmapTests, GivenNullEventListAndNonZeroEventsWhenUnmappingSvmThenInvalidEventWaitListErrorIsReturned) { auto retVal = clEnqueueSVMUnmap( pCommandQueue, // cl_command_queue command_queue nullptr, // void *svm_ptr 1, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(clEnqueueSVMUnmapTests, GivenNonNullEventListAndZeroEventsWhenUnmappingSvmThenInvalidEventWaitListErrorIsReturned) { UserEvent uEvent(pContext); cl_event eventWaitList[] = {&uEvent}; auto retVal = clEnqueueSVMUnmap( pCommandQueue, // cl_command_queue command_queue nullptr, // void *svm_ptr 0, // cl_uint num_events_in_wait_list eventWaitList, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(clEnqueueSVMUnmapTests, GivenValidParametersWhenUnmappingSvmThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm); auto retVal = clEnqueueSVMMap( pCommandQueue, // cl_command_queue command_queue CL_FALSE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags ptrSvm, // void *svm_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueSVMUnmap( pCommandQueue, // cl_command_queue command_queue ptrSvm, // void *svm_ptr 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clEnqueueSVMUnmapTests, GivenDeviceNotSupportingSvmWhenEnqueuingSVMUnmapThenInvalidOperationErrorIsReturned) { auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.ftrSvm = false; auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); cl_device_id deviceId = pDevice.get(); auto pContext = std::unique_ptr(Context::create(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal)); auto pCommandQueue = std::make_unique(pContext.get(), pDevice.get(), nullptr); retVal = clEnqueueSVMUnmap( pCommandQueue.get(), // cl_command_queue command_queue reinterpret_cast(0x1234), // void *svm_ptr 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_task_tests.inl000066400000000000000000000032651363734646600275060ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/helpers/base_object.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueTaskTests; struct EnqueueTaskWithRequiredWorkGroupSize : public HelloWorldTest { typedef HelloWorldTest Parent; void SetUp() override { Parent::kernelFilename = "required_work_group"; Parent::kernelName = "CopyBuffer2"; Parent::SetUp(); } void TearDown() override { Parent::TearDown(); } }; namespace ULT { TEST_F(clEnqueueTaskTests, GivenValidParametersWhenEnqueingTaskThenSuccessIsReturned) { cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; retVal = clEnqueueTask( pCommandQueue, pKernel, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueTaskWithRequiredWorkGroupSize, GivenRequiredWorkGroupSizeWhenEnqueingTaskThenSuccessIsReturned) { cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; cl_command_queue command_queue = static_cast(pCmdQ); cl_kernel kernel = static_cast(pKernel); retVal = clEnqueueTask( command_queue, kernel, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_unmap_mem_object_tests.inl000066400000000000000000000043411363734646600320440ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/api/cl_api_tests.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include using namespace NEO; typedef api_tests clEnqueueUnmapMemObjTests; TEST_F(clEnqueueUnmapMemObjTests, givenValidAddressWhenUnmappingThenReturnSuccess) { auto buffer = std::unique_ptr(BufferHelper>::create(pContext)); cl_int retVal = CL_SUCCESS; auto mappedPtr = clEnqueueMapBuffer(pCommandQueue, buffer.get(), CL_TRUE, CL_MAP_READ, 0, 1, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueUnmapMemObject( pCommandQueue, buffer.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueUnmapMemObjTests, givenInvalidAddressWhenUnmappingOnCpuThenReturnError) { auto buffer = std::unique_ptr(BufferHelper>::create(pContext)); EXPECT_TRUE(buffer->mappingOnCpuAllowed()); cl_int retVal = CL_SUCCESS; auto mappedPtr = clEnqueueMapBuffer(pCommandQueue, buffer.get(), CL_TRUE, CL_MAP_READ, 0, 1, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueUnmapMemObject( pCommandQueue, buffer.get(), ptrOffset(mappedPtr, buffer->getSize() + 1), 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clEnqueueUnmapMemObjTests, givenInvalidAddressWhenUnmappingOnGpuThenReturnError) { auto buffer = std::unique_ptr(BufferHelper>::create(pContext)); buffer->setSharingHandler(new SharingHandler()); EXPECT_FALSE(buffer->mappingOnCpuAllowed()); cl_int retVal = CL_SUCCESS; auto mappedPtr = clEnqueueMapBuffer(pCommandQueue, buffer.get(), CL_TRUE, CL_MAP_READ, 0, 1, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueUnmapMemObject( pCommandQueue, buffer.get(), ptrOffset(mappedPtr, buffer->getSize() + 1), 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_verify_memory.inl000066400000000000000000000053131363734646600302120ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/aub_mem_dump/aub_services.h" #include "opencl/test/unit_test/api/cl_api_tests.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "test.h" using namespace NEO; TEST(CheckVerifyMemoryRelatedApiConstants, givenVerifyMemoryRelatedApiConstantsWhenVerifyingTheirValueThenCorrectValuesAreReturned) { EXPECT_EQ(AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual, CL_MEM_COMPARE_EQUAL); EXPECT_EQ(AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareNotEqual, CL_MEM_COMPARE_NOT_EQUAL); } struct clEnqueueVerifyMemoryINTELSettings { const cl_uint comparisonMode = CL_MEM_COMPARE_EQUAL; const size_t bufferSize = 1; static constexpr size_t expectedSize = 1; int expected[expectedSize]{}; void *gpuAddress = expected; }; class clEnqueueVerifyMemoryINTELTests : public api_tests, public clEnqueueVerifyMemoryINTELSettings { }; TEST_F(clEnqueueVerifyMemoryINTELTests, givenSizeOfComparisonEqualZeroWhenCallingVerifyMemoryThenErrorIsReturned) { cl_int retval = clEnqueueVerifyMemoryINTEL(nullptr, nullptr, nullptr, 0, comparisonMode); EXPECT_EQ(CL_INVALID_VALUE, retval); } TEST_F(clEnqueueVerifyMemoryINTELTests, givenNullExpectedDataWhenCallingVerifyMemoryThenErrorIsReturned) { cl_int retval = clEnqueueVerifyMemoryINTEL(nullptr, nullptr, nullptr, expectedSize, comparisonMode); EXPECT_EQ(CL_INVALID_VALUE, retval); } TEST_F(clEnqueueVerifyMemoryINTELTests, givenInvalidAllocationPointerWhenCallingVerifyMemoryThenErrorIsReturned) { cl_int retval = clEnqueueVerifyMemoryINTEL(nullptr, nullptr, expected, expectedSize, comparisonMode); EXPECT_EQ(CL_INVALID_VALUE, retval); } TEST_F(clEnqueueVerifyMemoryINTELTests, givenInvalidCommandQueueWhenCallingVerifyMemoryThenErrorIsReturned) { cl_int retval = clEnqueueVerifyMemoryINTEL(nullptr, gpuAddress, expected, expectedSize, comparisonMode); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retval); } TEST_F(clEnqueueVerifyMemoryINTELTests, givenEqualMemoryWhenCallingVerifyMemoryThenSuccessIsReturned) { cl_int retval = clEnqueueVerifyMemoryINTEL(pCommandQueue, gpuAddress, expected, expectedSize, comparisonMode); EXPECT_EQ(CL_SUCCESS, retval); } TEST_F(clEnqueueVerifyMemoryINTELTests, givenNotEqualMemoryWhenCallingVerifyMemoryThenInvalidValueErrorIsReturned) { int differentMemory = expected[0] + 1; cl_int retval = clEnqueueVerifyMemoryINTEL(pCommandQueue, gpuAddress, &differentMemory, sizeof(differentMemory), comparisonMode); EXPECT_EQ(CL_INVALID_VALUE, retval); } compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_wait_for_events_tests.inl000066400000000000000000000055121363734646600317370ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/event/event.h" #include "opencl/source/event/user_event.h" #include "cl_api_tests.h" using namespace NEO; using clEnqueueWaitForEventsTests = api_tests; TEST_F(clEnqueueWaitForEventsTests, GivenInvalidCommandQueueWhenClEnqueueWaitForEventsIsCalledThenReturnError) { auto retVal = CL_SUCCESS; auto userEvent = clCreateUserEvent( pContext, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueWaitForEvents( nullptr, 1, &userEvent); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); retVal = clReleaseEvent(userEvent); ASSERT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueWaitForEventsTests, GivenProperParamsWhenClEnqueueWaitForEventsIsCalledAndEventStatusIsCompleteThenWaitAndReturnSuccess) { struct MyEvent : public UserEvent { MyEvent(Context *context) : UserEvent(context) { } bool wait(bool blocking, bool quickKmdSleep) override { wasWaitCalled = true; return true; }; bool wasWaitCalled = false; }; auto retVal = CL_SUCCESS; auto event = std::make_unique(pContext); cl_event clEvent = static_cast(event.get()); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueWaitForEvents( pCommandQueue, 1, &clEvent); EXPECT_EQ(true, event->wasWaitCalled); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueWaitForEventsTests, GivenProperParamsWhenClEnqueueWaitForEventsIsCalledAndEventStatusIsNotCompleteThenReturnError) { auto retVal = CL_SUCCESS; auto userEvent = clCreateUserEvent( pContext, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetUserEventStatus(userEvent, -1); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueWaitForEvents( pCommandQueue, 1, &userEvent); EXPECT_EQ(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, retVal); retVal = clReleaseEvent(userEvent); ASSERT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueWaitForEventsTests, GivenInvalidEventWhenClEnqueueWaitForEventsIsCalledThenReturnError) { auto retVal = CL_SUCCESS; auto validUserEvent = clCreateUserEvent( pContext, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); auto ptr = std::make_unique(sizeof(Event)); cl_event invalidEvent = reinterpret_cast(ptr.get()); cl_event events[]{validUserEvent, invalidEvent, validUserEvent}; retVal = clEnqueueWaitForEvents( pCommandQueue, 3, events); EXPECT_EQ(CL_INVALID_EVENT, retVal); retVal = clReleaseEvent(validUserEvent); ASSERT_EQ(CL_SUCCESS, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_write_buffer_rect_tests.inl000066400000000000000000000052461363734646600322450ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueWriteBufferRectTests; namespace ULT { TEST_F(clEnqueueWriteBufferRectTests, GivenInvalidBufferWhenWritingRectangularRegionThenInvalidMemObjectErrorIsReturned) { auto buffer = (cl_mem)ptrGarbage; size_t buffOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 0}; char ptr[10]; auto retVal = clEnqueueWriteBufferRect( pCommandQueue, buffer, CL_FALSE, buffOrigin, hostOrigin, region, 10, //bufferRowPitch 0, //bufferSlicePitch 10, //hostRowPitch 0, //hostSlicePitch ptr, //hostPtr 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clEnqueueWriteBufferRectTests, GivenNullCommandQueueWhenWritingRectangularRegionThenInvalidCommandQueueErrorIsReturned) { auto buffer = (cl_mem)ptrGarbage; size_t buffOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 0}; char ptr[10]; auto retVal = clEnqueueWriteBufferRect( nullptr, buffer, CL_FALSE, buffOrigin, hostOrigin, region, 10, //bufferRowPitch 0, //bufferSlicePitch 10, //hostRowPitch 0, //hostSlicePitch ptr, //hostPtr 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueWriteBufferRectTests, GivenNullHostPtrWhenWritingRectangularRegionThenInvalidValueErrorIsReturned) { auto buffer = clCreateBuffer( pContext, CL_MEM_READ_WRITE, 20, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); size_t buffOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {10, 10, 0}; auto retVal = clEnqueueWriteBufferRect( pCommandQueue, buffer, CL_FALSE, buffOrigin, hostOrigin, region, 10, //bufferRowPitch 0, //bufferSlicePitch 10, //hostRowPitch 0, //hostSlicePitch nullptr, //hostPtr 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_write_buffer_tests.inl000066400000000000000000000022161363734646600312220ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "opencl/source/command_queue/command_queue.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueWriteBufferTests; namespace ULT { TEST_F(clEnqueueWriteBufferTests, GivenNullCommandQueueWhenWritingBufferThenInvalidCommandQueueErrorIsReturned) { auto buffer = (cl_mem)ptrGarbage; retVal = clEnqueueWriteBuffer( nullptr, buffer, CL_FALSE, //blocking write 0, //offset 0, //sb nullptr, 0, //numEventsInWaitList nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueWriteBufferTests, GivenNullBufferWhenWritingBufferThenInvalidMemObjectErrorIsReturned) { void *ptr = nullptr; retVal = clEnqueueWriteBuffer( pCommandQueue, nullptr, CL_FALSE, //blocking write 0, //offset 0, //cb ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_enqueue_write_image_tests.inl000066400000000000000000000116461363734646600310420ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/surface_formats.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueWriteImageTests; namespace ULT { struct clEnqueueWriteImageTests : public ApiFixture<>, public ::testing::Test { void SetUp() override { ApiFixture::SetUp(); // clang-format off imageFormat.image_channel_order = CL_YUYV_INTEL; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 32; imageDesc.image_height = 32; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = nullptr; // clang-format on } void TearDown() override { ApiFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; }; TEST_F(clEnqueueWriteImageTests, GivenNullCommandQueueWhenWritingImageThenInvalidCommandQueueErrorIsReturned) { auto retVal = clEnqueueWriteImage( nullptr, nullptr, false, nullptr, nullptr, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(clEnqueueWriteImageTests, GivenNullImageWhenWritingImageThenInvalidMemObjectErrorIsReturned) { auto retVal = clEnqueueWriteImage( pCommandQueue, nullptr, false, nullptr, nullptr, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clEnqueueWriteImageTests, GivenValidParametersWhenWritingImageThenSuccessIsReturned) { imageFormat.image_channel_order = CL_RGBA; auto image = clCreateImage( pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); const size_t origin[] = {2, 2, 0}; const size_t region[] = {2, 2, 1}; auto retVal = clEnqueueWriteImage( pCommandQueue, image, false, origin, region, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } typedef clEnqueueWriteImageTests clEnqueueWriteImageYUV; TEST_F(clEnqueueWriteImageYUV, GivenValidParametersWhenWritingYuvImageThenSuccessIsReturned) { auto image = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); const size_t origin[] = {2, 2, 0}; const size_t region[] = {2, 2, 1}; auto retVal = clEnqueueWriteImage( pCommandQueue, image, false, origin, region, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueWriteImageYUV, GivenInvalidOriginWhenWritingYuvImageThenInvalidValueErrorIsReturned) { auto image = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); const size_t origin[] = {1, 2, 0}; const size_t region[] = {2, 2, 0}; auto retVal = clEnqueueWriteImage( pCommandQueue, image, false, origin, region, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEnqueueWriteImageYUV, GivenInvalidRegionWhenWritingYuvImageThenInvalidValueErrorIsReturned) { auto image = clCreateImage( pContext, CL_MEM_READ_ONLY, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); const size_t origin[] = {2, 2, 0}; const size_t region[] = {1, 2, 0}; auto retVal = clEnqueueWriteImage( pCommandQueue, image, false, origin, region, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_finish_tests.inl000066400000000000000000000011601363734646600262650ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clFinishTests; namespace ULT { TEST_F(clFinishTests, GivenValidCommandQueueWhenWaitingForFinishThenSuccessIsReturned) { retVal = clFinish(pCommandQueue); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clFinishTests, GivenNullCommandQueueWhenWaitingForFinishThenInvalidCommandQueueErrorIsReturned) { auto retVal = clFinish(nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_flush_tests.inl000066400000000000000000000011331363734646600261260ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clFlushTests; namespace ULT { TEST_F(clFlushTests, GivenValidCommandQueueWhenFlushingThenSuccessIsReturned) { retVal = clFlush(pCommandQueue); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clFlushTests, GivenNullCommandQueueWhenFlushingThenInvalidCommandQueueErrorIsReturned) { auto retVal = clFlush(nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_function_pointers_tests.inl000066400000000000000000000116341363734646600305640ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_program.h" #include "cl_api_tests.h" using namespace NEO; using clGetDeviceGlobalVariablePointer = api_tests; using clGetDeviceFunctionPointer = api_tests; TEST_F(clGetDeviceGlobalVariablePointer, GivenNullMandatoryArgumentsThenReturnInvalidArgError) { this->pProgram->symbols["A"].gpuAddress = 7U; this->pProgram->symbols["A"].symbol.size = 64U; this->pProgram->symbols["A"].symbol.segment = NEO::SegmentType::GlobalVariables; void *globalRet = 0; auto ret = clGetDeviceGlobalVariablePointerINTEL(this->pContext->getDevice(0), this->pProgram, "A", nullptr, &globalRet); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(7U, reinterpret_cast(globalRet)); ret = clGetDeviceGlobalVariablePointerINTEL(this->pContext->getDevice(0), this->pProgram, "A", nullptr, nullptr); EXPECT_EQ(CL_INVALID_ARG_VALUE, ret); ret = clGetDeviceGlobalVariablePointerINTEL(this->pContext->getDevice(0), nullptr, "A", nullptr, &globalRet); EXPECT_EQ(CL_INVALID_PROGRAM, ret); ret = clGetDeviceGlobalVariablePointerINTEL(nullptr, this->pProgram, "A", nullptr, &globalRet); EXPECT_EQ(CL_INVALID_DEVICE, ret); } TEST_F(clGetDeviceGlobalVariablePointer, GivenValidSymbolNameThenReturnProperAddressAndSize) { this->pProgram->symbols["A"].gpuAddress = 7U; this->pProgram->symbols["A"].symbol.size = 64U; this->pProgram->symbols["A"].symbol.segment = NEO::SegmentType::GlobalVariables; void *globalRet = 0; size_t sizeRet = 0; auto ret = clGetDeviceGlobalVariablePointerINTEL(this->pContext->getDevice(0), this->pProgram, "A", &sizeRet, &globalRet); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(7U, reinterpret_cast(globalRet)); EXPECT_EQ(64U, sizeRet); } TEST_F(clGetDeviceGlobalVariablePointer, GivenFunctionSymbolNameThenReturnInvalidArgError) { this->pProgram->symbols["A"].gpuAddress = 7U; this->pProgram->symbols["A"].symbol.size = 64U; this->pProgram->symbols["A"].symbol.segment = NEO::SegmentType::Instructions; void *globalRet = 0; auto ret = clGetDeviceGlobalVariablePointerINTEL(this->pContext->getDevice(0), this->pProgram, "A", nullptr, &globalRet); EXPECT_EQ(CL_INVALID_ARG_VALUE, ret); } TEST_F(clGetDeviceGlobalVariablePointer, GivenUnknownSymbolNameThenReturnInvalidArgError) { void *globalRet = 0; auto ret = clGetDeviceGlobalVariablePointerINTEL(this->pContext->getDevice(0), this->pProgram, "A", nullptr, &globalRet); EXPECT_EQ(CL_INVALID_ARG_VALUE, ret); } TEST_F(clGetDeviceFunctionPointer, GivenNullMandatoryArgumentsThenReturnInvalidArgError) { this->pProgram->symbols["A"].gpuAddress = 7U; this->pProgram->symbols["A"].symbol.size = 64U; this->pProgram->symbols["A"].symbol.segment = NEO::SegmentType::Instructions; cl_ulong fptrRet = 0; auto ret = clGetDeviceFunctionPointerINTEL(this->pContext->getDevice(0), this->pProgram, "A", &fptrRet); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(7U, fptrRet); ret = clGetDeviceFunctionPointerINTEL(this->pContext->getDevice(0), this->pProgram, "A", nullptr); EXPECT_EQ(CL_INVALID_ARG_VALUE, ret); ret = clGetDeviceFunctionPointerINTEL(this->pContext->getDevice(0), nullptr, "A", &fptrRet); EXPECT_EQ(CL_INVALID_PROGRAM, ret); ret = clGetDeviceFunctionPointerINTEL(nullptr, this->pProgram, "A", &fptrRet); EXPECT_EQ(CL_INVALID_DEVICE, ret); } TEST_F(clGetDeviceFunctionPointer, GivenValidSymbolNameThenReturnProperAddress) { this->pProgram->symbols["A"].gpuAddress = 7U; this->pProgram->symbols["A"].symbol.size = 64U; this->pProgram->symbols["A"].symbol.segment = NEO::SegmentType::Instructions; cl_ulong fptrRet = 0; auto ret = clGetDeviceFunctionPointerINTEL(this->pContext->getDevice(0), this->pProgram, "A", &fptrRet); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(7U, fptrRet); } TEST_F(clGetDeviceFunctionPointer, GivenGlobalSymbolNameThenReturnInvalidArgError) { this->pProgram->symbols["A"].gpuAddress = 7U; this->pProgram->symbols["A"].symbol.size = 64U; this->pProgram->symbols["A"].symbol.segment = NEO::SegmentType::GlobalVariables; this->pProgram->symbols["B"].gpuAddress = 7U; this->pProgram->symbols["B"].symbol.size = 64U; this->pProgram->symbols["B"].symbol.segment = NEO::SegmentType::GlobalConstants; cl_ulong fptrRet = 0; auto ret = clGetDeviceFunctionPointerINTEL(this->pContext->getDevice(0), this->pProgram, "A", &fptrRet); EXPECT_EQ(CL_INVALID_ARG_VALUE, ret); ret = clGetDeviceFunctionPointerINTEL(this->pContext->getDevice(0), this->pProgram, "B", &fptrRet); EXPECT_EQ(CL_INVALID_ARG_VALUE, ret); } TEST_F(clGetDeviceFunctionPointer, GivenUnknownSymbolNameThenReturnInvalidArgError) { cl_ulong fptrRet = 0; auto ret = clGetDeviceFunctionPointerINTEL(this->pContext->getDevice(0), this->pProgram, "A", &fptrRet); EXPECT_EQ(CL_INVALID_ARG_VALUE, ret); } compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_get_context_info_tests.inl000066400000000000000000000044061363734646600303510ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clGetContextInfoTests; namespace ULT { TEST_F(clGetContextInfoTests, GivenContextNumDevicesParamWhenGettingContextInfoThenNumDevicesIsReturned) { cl_uint numDevices = 0; retVal = clGetContextInfo( pContext, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDevices, nullptr); EXPECT_EQ(1u, numDevices); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clGetContextInfoTests, GivenContextWithSingleDeviceAndContextDevicesParamWhenGettingContextInfoThenListOfDevicesContainsOneDevice) { retVal = clGetContextInfo( pContext, CL_CONTEXT_DEVICES, 0, nullptr, &retSize); EXPECT_EQ(1 * sizeof(cl_device_id), retSize); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clGetContextInfoTests, GivenContextWithMultipleDevicesAndContextDevicesParamWhenGettingContextInfoThenListOfDevicesContainsAllDevices) { auto devicesReturned = new cl_device_id[this->num_devices - 1]; cl_uint numDevices = this->num_devices - 1; auto context = clCreateContext( nullptr, this->num_devices - 1, this->devices + 1, nullptr, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, context); retVal = clGetContextInfo( context, CL_CONTEXT_DEVICES, numDevices * sizeof(cl_device_id), devicesReturned, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); for (size_t deviceOrdinal = 0; deviceOrdinal < this->num_devices - 1; ++deviceOrdinal) { EXPECT_EQ(this->devices[deviceOrdinal + 1], devicesReturned[deviceOrdinal]); } clReleaseContext(context); delete[] devicesReturned; } TEST(clGetContextInfo, GivenNullContextWhenGettingContextInfoThenInvalidContextErrorIsReturned) { cl_device_id pDevices[1]; cl_uint numDevices = 1; auto retVal = clGetContextInfo( nullptr, CL_CONTEXT_DEVICES, numDevices * sizeof(cl_device_id), pDevices, nullptr); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_get_device_and_host_timer.inl000066400000000000000000000110001363734646600307320ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_ostime.h" #include "cl_api_tests.h" using namespace NEO; struct FailOSTime : public MockOSTime { public: bool getCpuGpuTime(TimeStampData *pGpuCpuTime) override { return false; } bool getCpuTime(uint64_t *timeStamp) override { return false; }; }; typedef api_tests clGetDeviceAndHostTimerTest; typedef api_tests clGetHostTimerTest; namespace ULT { TEST_F(clGetDeviceAndHostTimerTest, GivenNullDeviceWhenGettingDeviceAndHostTimerThenInvalidDeviceErrorIsReturned) { cl_ulong device_timestamp = 0; cl_ulong host_timestamp = 0; retVal = clGetDeviceAndHostTimer( nullptr, &device_timestamp, &host_timestamp); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(clGetDeviceAndHostTimerTest, GivenNullHostTimerWhenGettingDeviceAndHostTimerThenInvalidValueErrorIsReturned) { cl_ulong device_timestamp = 0; retVal = clGetDeviceAndHostTimer( devices[testedRootDeviceIndex], &device_timestamp, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clGetDeviceAndHostTimerTest, GivenNullDevicesTimerWhenGettingDeviceAndHostTimerThenInvalidValueErrorIsReturned) { cl_ulong host_timestamp = 0; retVal = clGetDeviceAndHostTimer( devices[testedRootDeviceIndex], nullptr, &host_timestamp); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clGetDeviceAndHostTimerTest, GivenValidOSTimeWhenGettingDeviceAndHostTimerThenSuccessIsReturned) { cl_ulong device_timestamp = 0; cl_ulong host_timestamp = 0; cl_ulong zero_timestamp = 0; auto mDev = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}; mDev->setOSTime(new MockOSTime()); retVal = clGetDeviceAndHostTimer( mDev, &device_timestamp, &host_timestamp); EXPECT_GT(device_timestamp, zero_timestamp); EXPECT_GT(host_timestamp, zero_timestamp); EXPECT_EQ(retVal, CL_SUCCESS); delete mDev; } TEST_F(clGetDeviceAndHostTimerTest, GivenInvalidOSTimeWhenGettingDeviceAndHostTimerThenOutOfResourcesErrorIsReturned) { cl_ulong device_timestamp = 0; cl_ulong host_timestamp = 0; cl_ulong zero_timestamp = 0; auto mDev = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}; mDev->setOSTime(new FailOSTime()); retVal = clGetDeviceAndHostTimer( mDev, &device_timestamp, &host_timestamp); EXPECT_EQ(device_timestamp, zero_timestamp); EXPECT_EQ(host_timestamp, zero_timestamp); EXPECT_EQ(retVal, CL_OUT_OF_RESOURCES); delete mDev; } TEST_F(clGetHostTimerTest, GivenNullDeviceWhenGettingHostTimerThenInvalidDeviceErrorIsReturned) { cl_ulong host_timestamp = 0; retVal = clGetHostTimer( nullptr, &host_timestamp); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(clGetHostTimerTest, GivenNullHostTimerWhenGettingHostTimerThenInvalidValueErrorIsReturned) { retVal = clGetHostTimer( devices[testedRootDeviceIndex], nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clGetHostTimerTest, GivenCorrectParametersWhenGettingHostTimerThenSuccessIsReturned) { cl_ulong host_timestamp = 0; cl_ulong zero_timestamp = 0; retVal = clGetHostTimer( devices[testedRootDeviceIndex], &host_timestamp); EXPECT_GE(host_timestamp, zero_timestamp); EXPECT_EQ(retVal, CL_SUCCESS); } TEST_F(clGetHostTimerTest, GivenValidOSTimeWhenGettingHostTimerThenSuccessIsReturned) { cl_ulong host_timestamp = 0; cl_ulong zero_timestamp = 0; auto mDev = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}; mDev->setOSTime(new MockOSTime()); retVal = clGetHostTimer( mDev, &host_timestamp); EXPECT_GE(host_timestamp, zero_timestamp); EXPECT_EQ(retVal, CL_SUCCESS); delete mDev; } TEST_F(clGetHostTimerTest, GivenInvalidOSTimeWhenGettingHostTimerThenOutOfResourcesErrorIsReturned) { cl_ulong host_timestamp = 0; cl_ulong zero_timestamp = 0; auto mDev = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}; mDev->setOSTime(new FailOSTime()); retVal = clGetHostTimer( mDev, &host_timestamp); EXPECT_EQ(host_timestamp, zero_timestamp); EXPECT_EQ(retVal, CL_OUT_OF_RESOURCES); delete mDev; } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_get_device_ids_tests.inl000066400000000000000000000163451363734646600277550ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/device_factory.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/helpers/ult_hw_config.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "cl_api_tests.h" using namespace NEO; using clGetDeviceIDsTests = api_tests; namespace ULT { TEST_F(clGetDeviceIDsTests, GivenZeroNumEntriesWhenGettingDeviceIdsThenNumberOfDevicesIsGreaterThanZero) { cl_uint numDevices = 0; retVal = clGetDeviceIDs(pPlatform, CL_DEVICE_TYPE_GPU, 0, nullptr, &numDevices); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(numDevices, (cl_uint)0); } TEST_F(clGetDeviceIDsTests, GivenNonNullDevicesWhenGettingDeviceIdsThenDeviceIdIsReturned) { cl_uint numEntries = 1; cl_device_id pDevices[1]; retVal = clGetDeviceIDs(pPlatform, CL_DEVICE_TYPE_GPU, numEntries, pDevices, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clGetDeviceIDsTests, GivenNullPlatformWhenGettingDeviceIdsThenDeviceIdIsReturned) { cl_uint numEntries = 1; cl_device_id pDevices[1]; retVal = clGetDeviceIDs(nullptr, CL_DEVICE_TYPE_GPU, numEntries, pDevices, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clGetDeviceIDsTests, GivenInvalidDeviceTypeWhenGettingDeviceIdsThenInvalidDeivceTypeErrorIsReturned) { cl_uint numEntries = 1; cl_device_id pDevices[1]; retVal = clGetDeviceIDs(pPlatform, 0x0f00, numEntries, pDevices, nullptr); EXPECT_EQ(CL_INVALID_DEVICE_TYPE, retVal); } TEST_F(clGetDeviceIDsTests, GivenZeroNumEntriesAndNonNullDevicesWhenGettingDeviceIdsThenInvalidValueErrorIsReturned) { cl_device_id pDevices[1]; retVal = clGetDeviceIDs(pPlatform, CL_DEVICE_TYPE_GPU, 0, pDevices, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clGetDeviceIDsTests, GivenInvalidPlatformWhenGettingDeviceIdsThenInvalidPlatformErrorIsReturned) { cl_uint numEntries = 1; cl_device_id pDevices[1]; uint32_t trash[6] = {0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef}; cl_platform_id p = reinterpret_cast(trash); retVal = clGetDeviceIDs(p, CL_DEVICE_TYPE_GPU, numEntries, pDevices, nullptr); EXPECT_EQ(CL_INVALID_PLATFORM, retVal); } TEST_F(clGetDeviceIDsTests, GivenDeviceTypeAllWhenGettingDeviceIdsThenDeviceIdIsReturned) { cl_uint numDevices = 0; cl_uint numEntries = 1; cl_device_id pDevices[1]; retVal = clGetDeviceIDs(pPlatform, CL_DEVICE_TYPE_ALL, numEntries, pDevices, &numDevices); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(numDevices, (cl_uint)0); } TEST_F(clGetDeviceIDsTests, GivenDeviceTypeDefaultWhenGettingDeviceIdsThenDeviceIdIsReturned) { cl_uint numDevices = 0; cl_uint numEntries = 1; cl_device_id pDevices[1]; retVal = clGetDeviceIDs(pPlatform, CL_DEVICE_TYPE_DEFAULT, numEntries, pDevices, &numDevices); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(numDevices, (cl_uint)0); } TEST_F(clGetDeviceIDsTests, GivenDeviceTypeCpuWhenGettingDeviceIdsThenDeviceNotFoundErrorIsReturned) { cl_uint numDevices = 0; retVal = clGetDeviceIDs(pPlatform, CL_DEVICE_TYPE_CPU, 0, nullptr, &numDevices); EXPECT_EQ(CL_DEVICE_NOT_FOUND, retVal); EXPECT_EQ(numDevices, (cl_uint)0); } TEST(clGetDeviceIDsTest, givenMultipleRootDevicesWhenGetDeviceIdsThenAllRootDevicesAreReturned) { platformsImpl.clear(); constexpr auto numRootDevices = 3u; VariableBackup backup(&ultHwConfig); ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices); cl_uint numDevices = 0; cl_uint numEntries = numRootDevices; cl_device_id devices[numRootDevices]; auto retVal = clGetDeviceIDs(nullptr, CL_DEVICE_TYPE_ALL, numEntries, devices, &numDevices); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(numEntries, numDevices); for (auto i = 0u; i < numRootDevices; i++) { EXPECT_EQ(devices[i], platform()->getClDevice(i)); } } TEST(clGetDeviceIDsTest, givenMultipleRootDevicesWhenGetDeviceIdsButNumEntriesIsLowerThanNumDevicesThenSubsetOfRootDevicesIsReturned) { platformsImpl.clear(); constexpr auto numRootDevices = 3u; VariableBackup backup(&ultHwConfig); ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices); cl_uint maxNumDevices; auto retVal = clGetDeviceIDs(nullptr, CL_DEVICE_TYPE_ALL, 0, nullptr, &maxNumDevices); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(numRootDevices, maxNumDevices); cl_uint numDevices = 0; cl_uint numEntries = numRootDevices - 1; cl_device_id devices[numRootDevices]; const auto dummyDevice = reinterpret_cast(0x1357); for (auto i = 0u; i < numRootDevices; i++) { devices[i] = dummyDevice; } retVal = clGetDeviceIDs(nullptr, CL_DEVICE_TYPE_ALL, numEntries, devices, &numDevices); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_LT(numDevices, maxNumDevices); EXPECT_EQ(numEntries, numDevices); for (auto i = 0u; i < numEntries; i++) { EXPECT_EQ(devices[i], platform()->getClDevice(i)); } EXPECT_EQ(devices[numEntries], dummyDevice); } TEST(clGetDeviceIDsTest, givenMultipleRootDevicesAndLimitedNumberOfReturnedDevicesWhenGetDeviceIdsThenLimitedNumberOfRootDevicesIsReturned) { platformsImpl.clear(); constexpr auto numRootDevices = 3u; VariableBackup backup(&ultHwConfig); ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices); DebugManager.flags.LimitAmountOfReturnedDevices.set(numRootDevices - 1); cl_uint numDevices = 0; cl_uint numEntries = numRootDevices; cl_device_id devices[numRootDevices]; const auto dummyDevice = reinterpret_cast(0x1357); for (auto i = 0u; i < numRootDevices; i++) { devices[i] = dummyDevice; } auto retVal = clGetDeviceIDs(nullptr, CL_DEVICE_TYPE_ALL, numEntries, devices, &numDevices); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(numEntries - 1, numDevices); for (auto i = 0u; i < numDevices; i++) { EXPECT_EQ(devices[i], platform()->getClDevice(i)); } EXPECT_EQ(devices[numDevices], dummyDevice); } TEST(clGetDeviceIDsNegativeTests, whenFailToCreateDeviceThenclGetDeviceIDsReturnsNoDeviceError) { VariableBackup createFuncBackup{&DeviceFactory::createRootDeviceFunc}; DeviceFactory::createRootDeviceFunc = [](ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) -> std::unique_ptr { return nullptr; }; platformsImpl.clear(); constexpr auto numRootDevices = 3u; cl_uint numDevices = 0; cl_uint numEntries = numRootDevices; cl_device_id devices[numRootDevices]; auto retVal = clGetDeviceIDs(nullptr, CL_DEVICE_TYPE_ALL, numEntries, devices, &numDevices); EXPECT_EQ(CL_DEVICE_NOT_FOUND, retVal); EXPECT_EQ(numDevices, 0u); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_get_device_info_tests.inl000066400000000000000000000241261363734646600301250ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/helpers/hw_info.h" #include "cl_api_tests.h" #include using namespace NEO; using clGetDeviceInfoTests = api_tests; namespace ULT { TEST_F(clGetDeviceInfoTests, givenNeoDeviceWhenAskedForSliceCountThenNumberOfSlicesIsReturned) { cl_device_info paramName = 0; size_t paramSize = 0; void *paramValue = nullptr; size_t paramRetSize = 0; size_t numSlices = 0; paramName = CL_DEVICE_SLICE_COUNT_INTEL; retVal = clGetDeviceInfo( devices[testedRootDeviceIndex], paramName, 0, nullptr, ¶mRetSize); EXPECT_EQ(sizeof(size_t), paramRetSize); paramSize = paramRetSize; paramValue = &numSlices; retVal = clGetDeviceInfo( devices[testedRootDeviceIndex], paramName, paramSize, paramValue, ¶mRetSize); EXPECT_EQ(defaultHwInfo->gtSystemInfo.SliceCount, numSlices); } TEST_F(clGetDeviceInfoTests, GivenGpuDeviceWhenGettingDeviceInfoThenDeviceTypeGpuIsReturned) { cl_device_info paramName = 0; size_t paramSize = 0; void *paramValue = nullptr; size_t paramRetSize = 0; cl_device_type deviceType = CL_DEVICE_TYPE_CPU; // set to wrong value paramName = CL_DEVICE_TYPE; paramSize = sizeof(cl_device_type); paramValue = &deviceType; retVal = clGetDeviceInfo( devices[testedRootDeviceIndex], paramName, paramSize, paramValue, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast(CL_DEVICE_TYPE_GPU), deviceType); } TEST_F(clGetDeviceInfoTests, GivenNullDeviceWhenGettingDeviceInfoThenInvalidDeviceErrorIsReturned) { size_t paramRetSize = 0; retVal = clGetDeviceInfo( nullptr, CL_DEVICE_TYPE, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(clGetDeviceInfoTests, givenOpenCLDeviceWhenAskedForSupportedSvmTypeCorrectValueIsReturned) { cl_device_svm_capabilities svmCaps; retVal = clGetDeviceInfo( devices[testedRootDeviceIndex], CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &svmCaps, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); const HardwareInfo &hwInfo = pPlatform->getClDevice(testedRootDeviceIndex)->getHardwareInfo(); cl_device_svm_capabilities expectedCaps = 0; if (hwInfo.capabilityTable.ftrSvm != 0) { if (hwInfo.capabilityTable.ftrSupportsCoherency != 0) { expectedCaps = CL_DEVICE_SVM_COARSE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS; } else { expectedCaps = CL_DEVICE_SVM_COARSE_GRAIN_BUFFER; } } EXPECT_EQ(svmCaps, expectedCaps); } TEST(clGetDeviceFineGrainedTests, givenDebugFlagForFineGrainedOverrideWhenItIsUsedWithZeroThenNoFineGrainSupport) { DebugManagerStateRestore restorer; DebugManager.flags.ForceFineGrainedSVMSupport.set(0); cl_device_svm_capabilities svmCaps; auto hwInfo = *defaultHwInfo; auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); auto retVal = clGetDeviceInfo( pDevice.get(), CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &svmCaps, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); cl_device_svm_capabilities expectedCaps = 0; if (hwInfo.capabilityTable.ftrSvm != 0) { expectedCaps = CL_DEVICE_SVM_COARSE_GRAIN_BUFFER; } EXPECT_EQ(svmCaps, expectedCaps); } TEST(clGetDeviceFineGrainedTests, givenDebugFlagForFineGrainedOverrideWhenItIsUsedWithOneThenThereIsFineGrainSupport) { DebugManagerStateRestore restorer; DebugManager.flags.ForceFineGrainedSVMSupport.set(1); cl_device_svm_capabilities svmCaps; auto hwInfo = *defaultHwInfo; auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); auto retVal = clGetDeviceInfo( pDevice.get(), CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &svmCaps, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); cl_device_svm_capabilities expectedCaps = 0; if (hwInfo.capabilityTable.ftrSvm != 0) { expectedCaps = CL_DEVICE_SVM_COARSE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS; } EXPECT_EQ(svmCaps, expectedCaps); } TEST_F(clGetDeviceInfoTests, givenNeoDeviceWhenAskedForDriverVersionThenNeoIsReturned) { cl_device_info paramName = 0; size_t paramSize = 0; void *paramValue = nullptr; size_t paramRetSize = 0; cl_uint driverVersion = 0; paramName = CL_DEVICE_DRIVER_VERSION_INTEL; retVal = clGetDeviceInfo( devices[testedRootDeviceIndex], paramName, 0, nullptr, ¶mRetSize); EXPECT_EQ(sizeof(cl_uint), paramRetSize); paramSize = paramRetSize; paramValue = &driverVersion; retVal = clGetDeviceInfo( devices[testedRootDeviceIndex], paramName, paramSize, paramValue, ¶mRetSize); EXPECT_EQ((cl_uint)CL_DEVICE_DRIVER_VERSION_INTEL_NEO1, driverVersion); } TEST_F(clGetDeviceInfoTests, GivenClDeviceExtensionsParamWhenGettingDeviceInfoThenAllExtensionsAreListed) { size_t paramRetSize = 0; cl_int retVal = clGetDeviceInfo( devices[testedRootDeviceIndex], CL_DEVICE_EXTENSIONS, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(0u, paramRetSize); auto paramValue = std::make_unique(paramRetSize); retVal = clGetDeviceInfo( devices[testedRootDeviceIndex], CL_DEVICE_EXTENSIONS, paramRetSize, paramValue.get(), nullptr); EXPECT_EQ(CL_SUCCESS, retVal); std::string extensionString(paramValue.get()); std::string supportedExtensions[] = { "cl_khr_byte_addressable_store ", "cl_khr_fp16 ", "cl_khr_global_int32_base_atomics ", "cl_khr_global_int32_extended_atomics ", "cl_khr_icd ", "cl_khr_local_int32_base_atomics ", "cl_khr_local_int32_extended_atomics ", "cl_intel_subgroups ", "cl_intel_required_subgroup_size ", "cl_intel_subgroups_short ", "cl_khr_spir ", "cl_intel_accelerator ", "cl_intel_driver_diagnostics ", "cl_khr_priority_hints ", "cl_khr_throttle_hints ", "cl_khr_create_command_queue ", "cl_intel_subgroups_char ", "cl_intel_subgroups_long "}; for (auto element = 0u; element < sizeof(supportedExtensions) / sizeof(supportedExtensions[0]); element++) { auto foundOffset = extensionString.find(supportedExtensions[element]); EXPECT_TRUE(foundOffset != std::string::npos); } } TEST_F(clGetDeviceInfoTests, GivenClDeviceIlVersionParamAndOcl21WhenGettingDeviceInfoThenSpirv12IsReturned) { size_t paramRetSize = 0; ClDevice *pDevice = castToObject(devices[testedRootDeviceIndex]); if (pDevice->getSupportedClVersion() < 21) return; cl_int retVal = clGetDeviceInfo( devices[testedRootDeviceIndex], CL_DEVICE_IL_VERSION, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(0u, paramRetSize); auto paramValue = std::make_unique(paramRetSize); retVal = clGetDeviceInfo( devices[testedRootDeviceIndex], CL_DEVICE_IL_VERSION, paramRetSize, paramValue.get(), nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_STREQ("SPIR-V_1.2 ", paramValue.get()); } //------------------------------------------------------------------------------ struct GetDeviceInfoP : public ApiFixture<>, public ::testing::TestWithParam { void SetUp() override { param = GetParam(); ApiFixture::SetUp(); } void TearDown() override { ApiFixture::TearDown(); } cl_device_info param; }; typedef GetDeviceInfoP GetDeviceInfoStr; TEST_P(GetDeviceInfoStr, GivenStringTypeParamWhenGettingDeviceInfoThenSuccessIsReturned) { size_t paramRetSize = 0; cl_int retVal = clGetDeviceInfo( devices[testedRootDeviceIndex], param, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(0u, paramRetSize); auto paramValue = std::make_unique(paramRetSize); retVal = clGetDeviceInfo( devices[testedRootDeviceIndex], param, paramRetSize, paramValue.get(), nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } static cl_device_info deviceInfoStrParams[] = { CL_DEVICE_BUILT_IN_KERNELS, CL_DEVICE_NAME, CL_DEVICE_OPENCL_C_VERSION, CL_DEVICE_PROFILE, CL_DEVICE_VENDOR, CL_DEVICE_VERSION, CL_DRIVER_VERSION}; INSTANTIATE_TEST_CASE_P( api, GetDeviceInfoStr, testing::ValuesIn(deviceInfoStrParams)); typedef GetDeviceInfoP GetDeviceInfoVectorWidth; TEST_P(GetDeviceInfoVectorWidth, GivenParamTypeVectorWhenGettingDeviceInfoThenSizeIsGreaterThanZeroAndValueIsGreaterThanZero) { cl_uint paramValue = 0; size_t paramRetSize = 0; auto retVal = clGetDeviceInfo( devices[testedRootDeviceIndex], param, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_uint), paramRetSize); retVal = clGetDeviceInfo( devices[testedRootDeviceIndex], param, paramRetSize, ¶mValue, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(paramValue, 0u); } cl_device_info devicePreferredVector[] = { CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT}; INSTANTIATE_TEST_CASE_P( api, GetDeviceInfoVectorWidth, testing::ValuesIn(devicePreferredVector)); } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_get_event_profiling_info_tests.inl000066400000000000000000000273131363734646600320610ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/event/event.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/api/cl_api_tests.h" #include "opencl/test/unit_test/fixtures/device_instrumentation_fixture.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/os_interface/mock_performance_counters.h" #include "test.h" using namespace NEO; template class EventFixture : public ApiFixture<>, public T { public: void SetUp() override { ApiFixture::SetUp(); } void TearDown() override { ApiFixture::TearDown(); } }; typedef EventFixture<::testing::Test> clEventProfilingTests; cl_int ProfilingInfo[] = { CL_PROFILING_COMMAND_QUEUED, CL_PROFILING_COMMAND_SUBMIT, CL_PROFILING_COMMAND_START, CL_PROFILING_COMMAND_END, CL_PROFILING_COMMAND_COMPLETE}; TEST_F(clEventProfilingTests, GivenInvalidParamNameWhenGettingEventProfilingInfoThenInvalidValueErrorIsReturned) { Event *pEvent = new Event(nullptr, 0, 0, 0); pEvent->setStatus(CL_COMPLETE); size_t param_value_size = sizeof(cl_ulong); cl_ulong param_value; size_t param_value_size_ret; cl_int retVal = CL_PROFILING_INFO_NOT_AVAILABLE; cl_event event = (cl_event)pEvent; pEvent->setProfilingEnabled(true); retVal = clGetEventProfilingInfo(event, 0, param_value_size, ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_INVALID_VALUE, retVal); delete pEvent; } TEST_F(clEventProfilingTests, GivenInvalidParamValueSizeWhenGettingEventProfilingInfoThenInvalidValueErrorIsReturned) { Event *pEvent = new Event(nullptr, 0, 0, 0); pEvent->setStatus(CL_COMPLETE); size_t param_value_size = sizeof(cl_ulong); cl_ulong param_value; size_t param_value_size_ret; cl_int retVal = CL_PROFILING_INFO_NOT_AVAILABLE; cl_event event = (cl_event)pEvent; pEvent->setProfilingEnabled(true); retVal = clGetEventProfilingInfo(event, ProfilingInfo[0], param_value_size - 1, ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_INVALID_VALUE, retVal); delete pEvent; } TEST_F(clEventProfilingTests, GivenValidParametersWhenGettingEventProfilingInfoThenSuccessIsReturned) { Event *pEvent = new Event(nullptr, 0, 0, 0); pEvent->setStatus(CL_COMPLETE); size_t param_value_size = sizeof(cl_ulong); cl_ulong param_value; size_t param_value_size_ret; cl_event event = (cl_event)pEvent; pEvent->setProfilingEnabled(true); for (auto infoId : ::ProfilingInfo) { cl_int retVal = clGetEventProfilingInfo(event, infoId, param_value_size, ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, retVal); } delete pEvent; } TEST_F(clEventProfilingTests, GivenNullParamValueSizeRetWhenGettingEventProfilingInfoThenSuccessIsReturned) { Event *pEvent = new Event(nullptr, 0, 0, 0); pEvent->setStatus(CL_COMPLETE); size_t param_value_size = sizeof(cl_ulong); cl_ulong param_value; cl_event event = (cl_event)pEvent; pEvent->setProfilingEnabled(true); cl_int retVal = clGetEventProfilingInfo(event, ProfilingInfo[0], param_value_size, ¶m_value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); delete pEvent; } TEST_F(clEventProfilingTests, GivenNullEventWhenGettingEventProfilingInfoThenInvalidEventErrorIsReturned) { auto retVal = clGetEventProfilingInfo(nullptr, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), 0u, nullptr); EXPECT_EQ(CL_INVALID_EVENT, retVal); } TEST(clGetEventProfilingInfo, GivenNullParamValueAndZeroParamValueSizeWhenGettingEventProfilingInfoThenSuccessIsReturned) { Event *pEvent = new Event(nullptr, 0, 0, 0); size_t param_value_size = 0; pEvent->setStatus(CL_COMPLETE); pEvent->setProfilingEnabled(true); cl_event event = (cl_event)pEvent; cl_int retVal = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED, param_value_size, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); delete pEvent; } TEST(clGetEventProfilingInfo, GivenNullParamValueAndCorrectParamValueSizeWhenGettingEventProfilingInfoThenSuccessIsReturned) { Event *pEvent = new Event(nullptr, 0, 0, 0); size_t param_value_size = sizeof(cl_ulong); pEvent->setStatus(CL_COMPLETE); pEvent->setProfilingEnabled(true); cl_event event = (cl_event)pEvent; cl_int retVal = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED, param_value_size, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); delete pEvent; } TEST(clGetEventProfilingInfo, GivenUserEventWhenGettingEventProfilingInfoThenProfilingInfoNotAvailableErrorIsReturned) { UserEvent *ue = new UserEvent(); size_t param_value_size = sizeof(cl_ulong); cl_ulong param_value; size_t param_value_size_ret; cl_event event = (cl_event)ue; for (auto infoId : ::ProfilingInfo) { cl_int retVal = clGetEventProfilingInfo(event, infoId, param_value_size, ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_PROFILING_INFO_NOT_AVAILABLE, retVal); } delete ue; } TEST(clGetEventProfilingInfo, GivenStartAndEndTimeWhenGettingDeltaThenCorrectDeltaIsReturned) { Event *pEvent = new Event(nullptr, 0, 0, 0); cl_ulong startTime = 1; cl_ulong endTime = 2; cl_ulong delta = 0; delta = pEvent->getDelta(startTime, endTime); EXPECT_EQ(endTime - startTime, delta); delete pEvent; } TEST(clGetEventProfilingInfo, GivenStartTimeGreaterThenEndTimeWhenGettingDeltaThenCorrectDeltaIsReturned) { Event *pEvent = new Event(nullptr, 0, 0, 0); cl_ulong startTime = 2; cl_ulong endTime = 1; cl_ulong delta = 0; cl_ulong timeMax = 0xffffffffULL; delta = pEvent->getDelta(startTime, endTime); EXPECT_EQ((timeMax + (endTime - startTime)), delta); delete pEvent; } TEST(clGetEventProfilingInfo, givenTimestampThatOverlapWhenGetDeltaIsCalledThenProperDeltaIsComputed) { Event *pEvent = new Event(nullptr, 0, 0, 0); cl_ulong TimeMax = 0xffffffffULL; cl_ulong realDelta = 10; cl_ulong startTime = TimeMax - realDelta; cl_ulong endTime = 2; cl_ulong Delta = 0; Delta = pEvent->getDelta(startTime, endTime); EXPECT_EQ(realDelta + endTime, Delta); delete pEvent; } TEST(clGetEventProfilingInfo, GivenProfilingDisabledWhenCalculatingProfilingDataThenFalseIsReturned) { auto *pEvent = new MockEvent(nullptr, 0, 0, 0); EXPECT_FALSE(pEvent->calcProfilingData()); delete pEvent; } TEST(clGetEventProfilingInfo, GivenProfilingEnabledWhenCalculatingProfilingDataThenFalseIsNotReturned) { Event *pEvent = new Event(nullptr, 0, 0, 0); cl_bool Result = pEvent->isProfilingEnabled(); EXPECT_EQ(((cl_bool)CL_FALSE), Result); pEvent->setProfilingEnabled(true); Result = pEvent->isProfilingEnabled(); EXPECT_NE(((cl_bool)CL_FALSE), Result); delete pEvent; } TEST(clGetEventProfilingInfo, GivenProfilingEnabledAndUserEventsWhenCalculatingProfilingDataThenFalseIsReturned) { Event *pEvent = new UserEvent(); cl_bool Result = pEvent->isProfilingEnabled(); EXPECT_EQ(((cl_bool)CL_FALSE), Result); delete pEvent; } TEST(clGetEventProfilingInfo, GivenPerfCountersEnabledWhenCheckingPerfCountersThenTrueIsReturned) { Event *pEvent = new Event(nullptr, 0, 0, 0); bool Result = pEvent->isPerfCountersEnabled(); EXPECT_FALSE(Result); pEvent->setPerfCountersEnabled(true); Result = pEvent->isPerfCountersEnabled(); EXPECT_TRUE(Result); delete pEvent; } class clEventProfilingWithPerfCountersTests : public DeviceInstrumentationFixture, public PerformanceCountersDeviceFixture, public ::testing::Test { public: void SetUp() override { PerformanceCountersDeviceFixture::SetUp(); DeviceInstrumentationFixture::SetUp(true); cl_device_id deviceId = device.get(); cl_int retVal = CL_SUCCESS; context = std::unique_ptr(Context::create(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal)); commandQueue = std::make_unique(context.get(), device.get(), nullptr); event = std::make_unique(commandQueue.get(), 0, 0, 0); event->setStatus(CL_COMPLETE); commandQueue->getPerfCounters()->getApiReport(0, nullptr, ¶m_value_size, true); event->setProfilingEnabled(true); eventCl = static_cast(event.get()); } void TearDown() override { PerformanceCountersDeviceFixture::TearDown(); } std::unique_ptr context; std::unique_ptr commandQueue; std::unique_ptr event; size_t param_value_size = 0; cl_event eventCl = nullptr; cl_ulong param_value = 0; size_t param_value_size_ret = 0; }; TEST_F(clEventProfilingWithPerfCountersTests, GivenDisabledPerfCountersWhenGettingEventProfilingInfoThenInvalidValueErrorIsReturned) { event->setPerfCountersEnabled(false); cl_int retVal = clGetEventProfilingInfo(eventCl, CL_PROFILING_COMMAND_PERFCOUNTERS_INTEL, param_value_size, ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clEventProfilingWithPerfCountersTests, GivenEnabledPerfCountersWhenGettingEventProfilingInfoThenSuccessIsReturned) { event->setPerfCountersEnabled(true); cl_int retVal = clGetEventProfilingInfo(eventCl, CL_PROFILING_COMMAND_PERFCOUNTERS_INTEL, param_value_size, ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEventProfilingWithPerfCountersTests, GivenEnabledPerfCountersAndIncorrectParamValueSizeWhenGettingEventProfilingInfoThenProfilingInfoNotAvailableErrorIsReturned) { event->setPerfCountersEnabled(true); cl_int retVal = clGetEventProfilingInfo(eventCl, CL_PROFILING_COMMAND_PERFCOUNTERS_INTEL, param_value_size - 1, ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_PROFILING_INFO_NOT_AVAILABLE, retVal); } cl_get_extension_function_address_for_platform_tests.inl000066400000000000000000000123201363734646600357650ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/api/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "cl_api_tests.h" using namespace NEO; typedef api_tests clGetExtensionFunctionAddressForPlatformTests; namespace ULT { TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenNullPlatformWhenGettingExtensionFunctionThenNullIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(nullptr, "clCreateAcceleratorINTEL"); EXPECT_EQ(retVal, nullptr); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenNonExistentExtensionWhenGettingExtensionFunctionThenNullIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "__some__function__"); EXPECT_EQ(retVal, nullptr); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClCreateAcceleratorINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clCreateAcceleratorINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clCreateAcceleratorINTEL)); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClGetAcceleratorInfoINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clGetAcceleratorInfoINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clGetAcceleratorInfoINTEL)); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClRetainAcceleratorINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clRetainAcceleratorINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clRetainAcceleratorINTEL)); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClReleaseAcceleratorINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clReleaseAcceleratorINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clReleaseAcceleratorINTEL)); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClCreateProgramWithILKHRWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clCreateProgramWithILKHR"); EXPECT_EQ(retVal, reinterpret_cast(clCreateProgramWithILKHR)); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClCreateTracingHandleINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clCreateTracingHandleINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clCreateTracingHandleINTEL)); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClSetTracingPointINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clSetTracingPointINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clSetTracingPointINTEL)); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClDestroyTracingHandleINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clDestroyTracingHandleINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clDestroyTracingHandleINTEL)); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClEnableTracingINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clEnableTracingINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clEnableTracingINTEL)); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClDisableTracingINTELLWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clDisableTracingINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clDisableTracingINTEL)); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClGetTracingStateINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clGetTracingStateINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clGetTracingStateINTEL)); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClGetKernelSuggestedLocalWorkSizeINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clGetKernelSuggestedLocalWorkSizeINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clGetKernelSuggestedLocalWorkSizeINTEL)); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClGetKernelMaxConcurrentWorkGroupCountINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clGetKernelMaxConcurrentWorkGroupCountINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clGetKernelMaxConcurrentWorkGroupCountINTEL)); } TEST_F(clGetExtensionFunctionAddressForPlatformTests, GivenClEnqueueNDCountKernelINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddressForPlatform(pPlatform, "clEnqueueNDCountKernelINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clEnqueueNDCountKernelINTEL)); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_get_extension_function_address_tests.inl000066400000000000000000000243261363734646600333030ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "cl_api_tests.h" using namespace NEO; typedef api_tests clGetExtensionFunctionAddressTests; namespace ULT { TEST_F(clGetExtensionFunctionAddressTests, GivenNonExistentExtensionWhenGettingExtensionFunctionThenNullIsReturned) { auto retVal = clGetExtensionFunctionAddress("__some__function__"); EXPECT_EQ(nullptr, retVal); } TEST_F(clGetExtensionFunctionAddressTests, GivenClIcdGetPlatformIDsKHRWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clIcdGetPlatformIDsKHR"); EXPECT_EQ(retVal, reinterpret_cast(clIcdGetPlatformIDsKHR)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClCreateAcceleratorINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clCreateAcceleratorINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clCreateAcceleratorINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClGetAcceleratorInfoINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clGetAcceleratorInfoINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clGetAcceleratorInfoINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClRetainAcceleratorINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clRetainAcceleratorINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clRetainAcceleratorINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClReleaseAcceleratorINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clReleaseAcceleratorINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clReleaseAcceleratorINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClCreatePerfCountersCommandQueueINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clCreatePerfCountersCommandQueueINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clCreatePerfCountersCommandQueueINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClSetPerformanceConfigurationINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clSetPerformanceConfigurationINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clSetPerformanceConfigurationINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClCreateBufferWithPropertiesINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto functionPointer = clGetExtensionFunctionAddress("clCreateBufferWithPropertiesINTEL"); EXPECT_EQ(functionPointer, reinterpret_cast(clCreateBufferWithPropertiesINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClCreateImageWithPropertiesINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto functionPointer = clGetExtensionFunctionAddress("clCreateImageWithPropertiesINTEL"); EXPECT_EQ(functionPointer, reinterpret_cast(clCreateImageWithPropertiesINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, givenClAddCommentToAubIntelAsInputWhenFunctionIsCalledThenProperPointerIsReturned) { auto functionPointer = clGetExtensionFunctionAddress("clAddCommentINTEL"); EXPECT_EQ(functionPointer, reinterpret_cast(clAddCommentINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClCreateTracingHandleINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clCreateTracingHandleINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clCreateTracingHandleINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClSetTracingPointINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clSetTracingPointINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clSetTracingPointINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClDestroyTracingHandleINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clDestroyTracingHandleINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clDestroyTracingHandleINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClEnableTracingINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clEnableTracingINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clEnableTracingINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClDisableTracingINTELLWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clDisableTracingINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clDisableTracingINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClGetTracingStateINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clGetTracingStateINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clGetTracingStateINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClHostMemAllocINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clHostMemAllocINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clHostMemAllocINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClDeviceMemAllocINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clDeviceMemAllocINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clDeviceMemAllocINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClSharedMemAllocINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clSharedMemAllocINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clSharedMemAllocINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClMemFreeINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clMemFreeINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clMemFreeINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClMemBlockingFreeINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clMemBlockingFreeINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clMemBlockingFreeINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClGetMemAllocInfoINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clGetMemAllocInfoINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clGetMemAllocInfoINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClSetKernelArgMemPointerINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clSetKernelArgMemPointerINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clSetKernelArgMemPointerINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClEnqueueMemsetINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clEnqueueMemsetINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clEnqueueMemsetINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClEnqueueMemFillINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clEnqueueMemFillINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clEnqueueMemFillINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClEnqueueMemcpyINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clEnqueueMemcpyINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clEnqueueMemcpyINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClEnqueueMigrateMemINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clEnqueueMigrateMemINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clEnqueueMigrateMemINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClEnqueueMemAdviseINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clEnqueueMemAdviseINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clEnqueueMemAdviseINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClGetDeviceGlobalVariablePointerINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clGetDeviceGlobalVariablePointerINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clGetDeviceGlobalVariablePointerINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClGetDeviceFunctionPointerINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clGetDeviceFunctionPointerINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clGetDeviceFunctionPointerINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClGetKernelSuggestedLocalWorkSizeINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clGetKernelSuggestedLocalWorkSizeINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clGetKernelSuggestedLocalWorkSizeINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClGetKernelMaxConcurrentWorkGroupCountINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clGetKernelMaxConcurrentWorkGroupCountINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clGetKernelMaxConcurrentWorkGroupCountINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenClEnqueueNDCountKernelINTELWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clEnqueueNDCountKernelINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clEnqueueNDCountKernelINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, GivenCSlSetProgramSpecializationConstantWhenGettingExtensionFunctionThenCorrectAddressIsReturned) { auto retVal = clGetExtensionFunctionAddress("clSetProgramSpecializationConstant"); EXPECT_EQ(retVal, reinterpret_cast(clSetProgramSpecializationConstant)); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_get_image_info_tests.inl000066400000000000000000000335511363734646600277520ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include "opencl/source/context/context.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "cl_api_tests.h" using namespace NEO; namespace ULT { struct clGetImageInfoTests : public ApiFixture<>, public ::testing::Test { void SetUp() override { ApiFixture::SetUp(); imageFormat.image_channel_order = CL_RGBA; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 32; imageDesc.image_height = 32; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = nullptr; image = clCreateImage(pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); } void TearDown() override { retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); ApiFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; cl_mem image; }; TEST_F(clGetImageInfoTests, GivenBufferWhenGettingImageInfoThenInvalidMemObjectErrorIsReturned) { size_t paramRetSize = 0; auto buffer = clCreateBuffer(pContext, CL_MEM_READ_WRITE, 42, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetImageInfo(buffer, CL_IMAGE_ELEMENT_SIZE, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); clReleaseMemObject(buffer); } TEST_F(clGetImageInfoTests, GivenNullWhenGettingImageInfoThenInvalidMemObjectErrorIsReturned) { size_t paramRetSize = 0; retVal = clGetImageInfo(nullptr, CL_IMAGE_ELEMENT_SIZE, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clGetImageInfoTests, GivenInvalidParamNameWhenGettingImageInfoThenInvalidValueErrorIsReturned) { size_t paramRetSize = 0; retVal = clGetImageInfo(image, CL_MEM_SIZE, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_INVALID_VALUE, retVal); ASSERT_EQ(0u, paramRetSize); } TEST_F(clGetImageInfoTests, GivenClImageFormatWhenGettingImageInfoThenImageFormatIsReturned) { cl_image_format imgFmtRet; size_t paramRetSize = 0; retVal = clGetImageInfo(image, CL_IMAGE_FORMAT, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(sizeof(cl_image_format), paramRetSize); retVal = clGetImageInfo(image, CL_IMAGE_FORMAT, paramRetSize, &imgFmtRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(this->imageFormat.image_channel_data_type, imgFmtRet.image_channel_data_type); ASSERT_EQ(this->imageFormat.image_channel_order, imgFmtRet.image_channel_order); } TEST_F(clGetImageInfoTests, GivenClImageElementSizeWhenGettingImageInfoThenSizeOfImageElementIsReturned) { size_t elemSize = 4; size_t sizeRet = 0; size_t paramRetSize = 0; retVal = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(sizeof(size_t), paramRetSize); retVal = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, paramRetSize, &sizeRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(elemSize, sizeRet); } TEST_F(clGetImageInfoTests, GivenClImageRowPitchWhenGettingImageInfoThenSizeOfRowIsReturned) { size_t rowPitchRet = 0; size_t paramRetSize = 0; retVal = clGetImageInfo(image, CL_IMAGE_ROW_PITCH, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(sizeof(size_t), paramRetSize); retVal = clGetImageInfo(image, CL_IMAGE_ROW_PITCH, paramRetSize, &rowPitchRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(0u, rowPitchRet); } TEST_F(clGetImageInfoTests, GivenClImageSlicePitchAnd2dImageWhenGettingImageInfoThenZeroIsReturned) { size_t slicePitchRet = 0; size_t paramRetSize = 0; retVal = clGetImageInfo(image, CL_IMAGE_SLICE_PITCH, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(sizeof(size_t), paramRetSize); retVal = clGetImageInfo(image, CL_IMAGE_SLICE_PITCH, paramRetSize, &slicePitchRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(0u, slicePitchRet); } TEST_F(clGetImageInfoTests, GivenClImageWidthWhenGettingImageInfoThenWidthOfImageIsReturned) { size_t widthRet = 0; size_t paramRetSize = 0; retVal = clGetImageInfo(image, CL_IMAGE_WIDTH, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(sizeof(size_t), paramRetSize); retVal = clGetImageInfo(image, CL_IMAGE_WIDTH, paramRetSize, &widthRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(this->imageDesc.image_width, widthRet); } TEST_F(clGetImageInfoTests, GivenImageWithMipMapsWhenGettingImageInfoThenWidthAndHeightOfImageAreShifted) { auto initialWidth = this->imageDesc.image_width; auto initialHeight = this->imageDesc.image_height; auto pImage = castToObject(image); size_t returnValue = 0; size_t paramRetSize = sizeof(size_t); for (int mipLevel = 0; mipLevel < 10; mipLevel++) { pImage->setBaseMipLevel(mipLevel); auto expectedWidth = initialWidth >> mipLevel; expectedWidth = expectedWidth == 0 ? 1 : expectedWidth; auto expectedHeight = initialHeight >> mipLevel; expectedHeight = expectedHeight == 0 ? 1 : expectedHeight; retVal = clGetImageInfo(image, CL_IMAGE_WIDTH, paramRetSize, &returnValue, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedWidth, returnValue); retVal = clGetImageInfo(image, CL_IMAGE_HEIGHT, paramRetSize, &returnValue, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedHeight, returnValue); } } TEST_F(clGetImageInfoTests, GivenClImageHeightWhenGettingImageInfoThenHeightOfImageIsReturned) { size_t heightRet = 0; size_t paramRetSize = 0; retVal = clGetImageInfo(image, CL_IMAGE_HEIGHT, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(sizeof(size_t), paramRetSize); retVal = clGetImageInfo(image, CL_IMAGE_HEIGHT, paramRetSize, &heightRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(this->imageDesc.image_height, heightRet); } TEST_F(clGetImageInfoTests, Given3dImageWithMipMapsWhenGettingImageInfoThenWidthAndHeightOfImageAreShifted) { size_t widthRet; size_t expectedWidth; size_t heightRet; size_t expectedHeight; size_t depthRet; size_t expectedDepth; cl_image_format imageFormat2; cl_image_desc imageDesc2; cl_mem image2; imageFormat2.image_channel_order = CL_RGBA; imageFormat2.image_channel_data_type = CL_UNORM_INT8; imageDesc2.image_type = CL_MEM_OBJECT_IMAGE3D; imageDesc2.image_width = 8; imageDesc2.image_height = 8; imageDesc2.image_depth = 4; imageDesc2.image_array_size = 1; imageDesc2.image_row_pitch = 0; imageDesc2.image_slice_pitch = 0; imageDesc2.num_mip_levels = 5; imageDesc2.num_samples = 0; imageDesc2.mem_object = nullptr; image2 = clCreateImage(pContext, CL_MEM_READ_WRITE, &imageFormat2, &imageDesc2, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image2); auto pImgObj = castToObject(image2); for (cl_uint n = 0; n <= imageDesc2.num_mip_levels; n++) { pImgObj->setBaseMipLevel(n); retVal = clGetImageInfo(image2, CL_IMAGE_WIDTH, sizeof(widthRet), &widthRet, NULL); EXPECT_EQ(CL_SUCCESS, retVal); expectedWidth = imageDesc2.image_width >> n; expectedWidth = (expectedWidth == 0) ? 1 : expectedWidth; ASSERT_EQ(expectedWidth, widthRet); retVal = clGetImageInfo(image2, CL_IMAGE_HEIGHT, sizeof(heightRet), &heightRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); expectedHeight = imageDesc2.image_height >> n; expectedHeight = (expectedHeight == 0) ? 1 : expectedHeight; ASSERT_EQ(expectedHeight, heightRet); retVal = clGetImageInfo(image2, CL_IMAGE_DEPTH, sizeof(depthRet), &depthRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); expectedDepth = imageDesc2.image_depth >> n; expectedDepth = (expectedDepth == 0) ? 1 : expectedDepth; ASSERT_EQ(expectedDepth, depthRet); } retVal = clReleaseMemObject(image2); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clGetImageInfoTests, Given1dImageWithMipMapsWhenGettingImageInfoThenWidthAndHeightOfImageAreShifted) { size_t widthRet; size_t expectedWidth; size_t heightRet; size_t depthRet; cl_image_format imageFormat2; cl_image_desc imageDesc2; cl_mem image2; imageFormat2.image_channel_order = CL_RGBA; imageFormat2.image_channel_data_type = CL_UNORM_INT8; imageDesc2.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc2.image_width = 8; imageDesc2.image_height = 1; imageDesc2.image_depth = 1; imageDesc2.image_array_size = 1; imageDesc2.image_row_pitch = 0; imageDesc2.image_slice_pitch = 0; imageDesc2.num_mip_levels = 5; imageDesc2.num_samples = 0; imageDesc2.mem_object = nullptr; image2 = clCreateImage(pContext, CL_MEM_READ_WRITE, &imageFormat2, &imageDesc2, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image2); auto pImgObj = castToObject(image2); for (cl_uint n = 0; n <= imageDesc2.num_mip_levels; n++) { pImgObj->setBaseMipLevel(n); retVal = clGetImageInfo(image2, CL_IMAGE_WIDTH, sizeof(widthRet), &widthRet, NULL); EXPECT_EQ(CL_SUCCESS, retVal); expectedWidth = imageDesc2.image_width >> n; expectedWidth = (expectedWidth == 0) ? 1 : expectedWidth; ASSERT_EQ(expectedWidth, widthRet); retVal = clGetImageInfo(image2, CL_IMAGE_HEIGHT, sizeof(heightRet), &heightRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(0u, heightRet); retVal = clGetImageInfo(image2, CL_IMAGE_DEPTH, sizeof(depthRet), &depthRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(0u, depthRet); } retVal = clReleaseMemObject(image2); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clGetImageInfoTests, GivenClImageDepthAnd2dImageWhenGettingImageInfoThenZeroIsReturned) { size_t depthRet = 0; size_t paramRetSize = 0; retVal = clGetImageInfo(image, CL_IMAGE_DEPTH, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(sizeof(size_t), paramRetSize); retVal = clGetImageInfo(image, CL_IMAGE_DEPTH, paramRetSize, &depthRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(0U, depthRet); } TEST_F(clGetImageInfoTests, GivenClImageArraySizeAndNonArrayImageWhenGettingImageInfoThenZeroIsReturned) { size_t arraySizeRet = 0; size_t paramRetSize = 0; retVal = clGetImageInfo(image, CL_IMAGE_ARRAY_SIZE, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(sizeof(size_t), paramRetSize); retVal = clGetImageInfo(image, CL_IMAGE_ARRAY_SIZE, paramRetSize, &arraySizeRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(0u, arraySizeRet); } TEST_F(clGetImageInfoTests, GivenClImageBufferWhenGettingImageInfoThenBufferIsReturned) { cl_mem bufferRet = nullptr; size_t paramRetSize = 0; retVal = clGetImageInfo(image, CL_IMAGE_BUFFER, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(sizeof(cl_mem), paramRetSize); retVal = clGetImageInfo(image, CL_IMAGE_BUFFER, paramRetSize, &bufferRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(this->imageDesc.buffer, bufferRet); } TEST_F(clGetImageInfoTests, GivenClImageNumMipLevelsWhenGettingImageInfoThenCorrectMipMapLevelIsReturned) { cl_uint numMipLevelRet = 0; size_t paramRetSize = 0; retVal = clGetImageInfo(image, CL_IMAGE_NUM_MIP_LEVELS, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(sizeof(cl_uint), paramRetSize); retVal = clGetImageInfo(image, CL_IMAGE_NUM_MIP_LEVELS, paramRetSize, &numMipLevelRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(this->imageDesc.num_mip_levels, numMipLevelRet); } TEST_F(clGetImageInfoTests, GivenClImageNumSamplesWhenGettingImageInfoThenCorrectNumberOfSamplesIsReturned) { cl_uint numSamplesRet = 0; size_t paramRetSize = 0; retVal = clGetImageInfo(image, CL_IMAGE_NUM_SAMPLES, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(sizeof(cl_uint), paramRetSize); retVal = clGetImageInfo(image, CL_IMAGE_NUM_SAMPLES, paramRetSize, &numSamplesRet, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(this->imageDesc.num_samples, numSamplesRet); } TEST_F(clGetImageInfoTests, givenMultisampleCountForMcsWhenAskingForRowPitchThenReturnNewValueIfGreaterThanOne) { McsSurfaceInfo mcsInfo = {1, 1, 0}; imageDesc.num_samples = 16; size_t receivedRowPitch = 0; clReleaseMemObject(image); image = clCreateImage(pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); auto imageObj = castToObject(image); auto formatInfo = imageObj->getSurfaceFormatInfo(); size_t multisampleRowPitch = imageDesc.image_width * formatInfo.surfaceFormat.ImageElementSizeInBytes * imageDesc.num_samples; EXPECT_NE(multisampleRowPitch, imageObj->getHostPtrRowPitch()); for (uint32_t multisampleCount = 0; multisampleCount <= 4; multisampleCount++) { mcsInfo.multisampleCount = multisampleCount; imageObj->setMcsSurfaceInfo(mcsInfo); clGetImageInfo(image, CL_IMAGE_ROW_PITCH, sizeof(size_t), &receivedRowPitch, nullptr); if (multisampleCount > 1) { EXPECT_EQ(multisampleRowPitch, receivedRowPitch); } else { EXPECT_EQ(imageObj->getHostPtrRowPitch(), receivedRowPitch); } } } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_get_image_params_tests.inl000066400000000000000000000071531363734646600303010ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "cl_api_tests.h" using namespace NEO; namespace ULT { template struct clGetImageParams : public ApiFixture<>, public T { void SetUp() override { ApiFixture::SetUp(); // clang-format off imageFormat.image_channel_order = CL_RGBA; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 32; imageDesc.image_height = 32; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = nullptr; // clang-format on } void TearDown() override { ApiFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; }; typedef clGetImageParams<::testing::Test> clGetImageParamsTest; TEST_F(clGetImageParamsTest, GivenValidParamsWhenGettingImageParamsThenSuccessIsReturned) { size_t imageRowPitch = 0; size_t imageSlicePitch = 0; cl_int retVal = CL_INVALID_VALUE; retVal = clGetImageParamsINTEL(pContext, &imageFormat, &imageDesc, &imageRowPitch, &imageSlicePitch); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(imageRowPitch, 0u); EXPECT_NE(imageSlicePitch, 0u); } TEST_F(clGetImageParamsTest, GivenNullContextWhenGettingImageParamsThenInvalidContextErrorIsReturned) { size_t imageRowPitch = 0; size_t imageSlicePitch = 0; cl_int retVal = CL_SUCCESS; retVal = clGetImageParamsINTEL(nullptr, &imageFormat, &imageDesc, &imageRowPitch, &imageSlicePitch); ASSERT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(imageRowPitch, 0u); EXPECT_EQ(imageSlicePitch, 0u); } TEST_F(clGetImageParamsTest, GivenNullParamsWhenGettingImageParamsThenInvalidValueErrorIsReturned) { size_t imageRowPitch = 0; size_t imageSlicePitch = 0; cl_int retVal = CL_SUCCESS; retVal = clGetImageParamsINTEL(pContext, nullptr, &imageDesc, &imageRowPitch, &imageSlicePitch); ASSERT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(imageRowPitch, 0u); EXPECT_EQ(imageSlicePitch, 0u); retVal = clGetImageParamsINTEL(pContext, &imageFormat, nullptr, &imageRowPitch, &imageSlicePitch); ASSERT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(imageRowPitch, 0u); EXPECT_EQ(imageSlicePitch, 0u); retVal = clGetImageParamsINTEL(pContext, &imageFormat, &imageDesc, nullptr, &imageSlicePitch); ASSERT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(imageRowPitch, 0u); EXPECT_EQ(imageSlicePitch, 0u); retVal = clGetImageParamsINTEL(pContext, &imageFormat, &imageDesc, &imageRowPitch, nullptr); ASSERT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(imageRowPitch, 0u); EXPECT_EQ(imageSlicePitch, 0u); } TEST_F(clGetImageParamsTest, GivenInvalidFormatWhenGettingImageParamsThenImageFormatNotSupportedErrorIsReturned) { size_t imageRowPitch = 0; size_t imageSlicePitch = 0; imageFormat.image_channel_order = CL_A; imageFormat.image_channel_data_type = CL_SIGNED_INT32; auto retVal = clGetImageParamsINTEL(pContext, &imageFormat, &imageDesc, &imageRowPitch, &imageSlicePitch); ASSERT_EQ(CL_IMAGE_FORMAT_NOT_SUPPORTED, retVal); EXPECT_EQ(imageRowPitch, 0u); EXPECT_EQ(imageSlicePitch, 0u); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_get_kernel_arg_info_tests.inl000066400000000000000000000040721363734646600307750ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/device/device.h" #include "shared/source/helpers/file_io.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/helpers/kernel_binary_helper.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "cl_api_tests.h" #include "compiler_options.h" using namespace NEO; typedef api_tests clGetKernelArgInfoTests; namespace ULT { TEST_F(clGetKernelArgInfoTests, GivenValidParamsWhenGettingKernelArgInfoThenSuccessAndCorrectSizeAreReturned) { cl_program pProgram = nullptr; size_t sourceSize = 0; std::string testFile; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( pContext, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clBuildProgram( pProgram, num_devices, devices, CompilerOptions::argInfo, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); cl_kernel kernel = clCreateKernel(pProgram, "CopyBuffer", &retVal); ASSERT_EQ(CL_SUCCESS, retVal); size_t returnSize = 0; cl_kernel_arg_type_qualifier typeQualifier = CL_KERNEL_ARG_TYPE_NONE; retVal = clGetKernelArgInfo(kernel, 0, CL_KERNEL_ARG_TYPE_QUALIFIER, sizeof(typeQualifier), &typeQualifier, &returnSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(returnSize, sizeof(cl_kernel_arg_type_qualifier)); retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_get_kernel_info_tests.inl000066400000000000000000000041141363734646600301410ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/device/device.h" #include "shared/source/helpers/file_io.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/helpers/kernel_binary_helper.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clGetKernelInfoTests; namespace ULT { TEST_F(clGetKernelInfoTests, GivenValidParamsWhenGettingKernelInfoThenSuccessIsReturned) { cl_program pProgram = nullptr; size_t sourceSize = 0; std::string testFile; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( pContext, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clBuildProgram( pProgram, num_devices, devices, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); cl_kernel kernel = clCreateKernel(pProgram, "CopyBuffer", &retVal); ASSERT_EQ(CL_SUCCESS, retVal); size_t paramValueSizeRet; retVal = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(paramValueSizeRet, 0u); retVal = clGetKernelInfo( kernel, CL_KERNEL_ATTRIBUTES, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(paramValueSizeRet, 0u); retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT cl_get_kernel_max_concurrent_work_group_count_intel_tests.inl000066400000000000000000000077511363734646600370510ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/api/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "cl_api_tests.h" using namespace NEO; using clGetKernelMaxConcurrentWorkGroupCountTests = api_tests; namespace ULT { TEST_F(clGetKernelMaxConcurrentWorkGroupCountTests, GivenInvalidInputWhenCallingGetKernelMaxConcurrentWorkGroupCountThenErrorIsReturned) { size_t globalWorkOffset[3]; size_t localWorkSize[3]; size_t suggestedWorkGroupCount; cl_uint workDim = 1; retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(nullptr, pKernel, workDim, globalWorkOffset, localWorkSize, &suggestedWorkGroupCount); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, nullptr, workDim, globalWorkOffset, localWorkSize, &suggestedWorkGroupCount); EXPECT_EQ(CL_INVALID_KERNEL, retVal); pKernel->isPatchedOverride = false; retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pKernel, workDim, globalWorkOffset, localWorkSize, &suggestedWorkGroupCount); EXPECT_EQ(CL_INVALID_KERNEL, retVal); pKernel->isPatchedOverride = true; retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pKernel, workDim, globalWorkOffset, localWorkSize, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pKernel, 0, globalWorkOffset, localWorkSize, &suggestedWorkGroupCount); EXPECT_EQ(CL_INVALID_WORK_DIMENSION, retVal); retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pKernel, 4, globalWorkOffset, localWorkSize, &suggestedWorkGroupCount); EXPECT_EQ(CL_INVALID_WORK_DIMENSION, retVal); retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pKernel, workDim, nullptr, localWorkSize, &suggestedWorkGroupCount); EXPECT_EQ(CL_INVALID_GLOBAL_OFFSET, retVal); retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pKernel, workDim, globalWorkOffset, nullptr, &suggestedWorkGroupCount); EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, retVal); } TEST_F(clGetKernelMaxConcurrentWorkGroupCountTests, GivenVariousInputWhenGettingMaxConcurrentWorkGroupCountThenCorrectValuesAreReturned) { cl_uint workDim = 3; size_t globalWorkOffset[] = {0, 0, 0}; size_t localWorkSize[] = {8, 8, 8}; size_t maxConcurrentWorkGroupCount = 0; retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pKernel, workDim, globalWorkOffset, localWorkSize, &maxConcurrentWorkGroupCount); EXPECT_EQ(CL_SUCCESS, retVal); size_t expectedMaxConcurrentWorkGroupCount = pKernel->getMaxWorkGroupCount(workDim, localWorkSize); EXPECT_EQ(expectedMaxConcurrentWorkGroupCount, maxConcurrentWorkGroupCount); std::unique_ptr pKernelWithExecutionEnvironmentPatch(MockKernel::create(pCommandQueue->getDevice(), pProgram)); retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pKernelWithExecutionEnvironmentPatch.get(), workDim, globalWorkOffset, localWorkSize, &maxConcurrentWorkGroupCount); EXPECT_EQ(CL_SUCCESS, retVal); expectedMaxConcurrentWorkGroupCount = pKernelWithExecutionEnvironmentPatch->getMaxWorkGroupCount(workDim, localWorkSize); EXPECT_EQ(expectedMaxConcurrentWorkGroupCount, maxConcurrentWorkGroupCount); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_khr_tests.inl000066400000000000000000000200451363734646600330730ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" using namespace NEO; struct KernelSubGroupInfoKhrFixture : HelloWorldFixture { typedef HelloWorldFixture ParentClass; void SetUp() override { ParentClass::SetUp(); MaxSimdSize = static_cast(pKernel->getKernelInfo().getMaxSimdSize()); ASSERT_GE(MaxSimdSize, 8u); MaxWorkDim = static_cast(pClDevice->getDeviceInfo().maxWorkItemDimensions); ASSERT_EQ(MaxWorkDim, 3u); } void TearDown() override { ParentClass::TearDown(); } size_t inputValue[3]; size_t paramValue; size_t paramValueSizeRet; size_t MaxSimdSize; size_t CalculatedWGS; size_t MaxWorkDim; }; namespace ULT { typedef Test KernelSubGroupInfoKhrTest; template struct KernelSubGroupInfoKhrParamFixture : KernelSubGroupInfoKhrFixture, ::testing::TestWithParam { void SetUp() override { KernelSubGroupInfoKhrFixture::SetUp(); } void TearDown() override { KernelSubGroupInfoKhrFixture::TearDown(); } }; struct TestParam { size_t gwsX; size_t gwsY; size_t gwsZ; } KernelSubGroupInfoKhrWGS[] = { {0, 0, 0}, {1, 1, 1}, {1, 5, 1}, {8, 1, 1}, {16, 1, 1}, {32, 1, 1}, {64, 1, 1}, {1, 190, 1}, {1, 510, 1}, {512, 1, 1}}; typedef KernelSubGroupInfoKhrParamFixture KernelSubGroupInfoKhrReturnSizeTest; INSTANTIATE_TEST_CASE_P(wgs, KernelSubGroupInfoKhrReturnSizeTest, ::testing::ValuesIn(KernelSubGroupInfoKhrWGS)); TEST_P(KernelSubGroupInfoKhrReturnSizeTest, GivenLwsParameterWhenGettingMaxSubGroupSizeThenCorrectValueIsReturned) { paramValueSizeRet = 0; inputValue[0] = GetParam().gwsX; inputValue[1] = GetParam().gwsY; inputValue[2] = GetParam().gwsZ; CalculatedWGS = inputValue[0] * inputValue[1] * inputValue[2]; retVal = clGetKernelSubGroupInfoKHR( pKernel, pClDevice, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, sizeof(size_t) * 3, inputValue, sizeof(size_t), ¶mValue, ¶mValueSizeRet); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); if (CalculatedWGS < MaxSimdSize) { EXPECT_EQ(paramValue, CalculatedWGS); } else { EXPECT_EQ(paramValue, MaxSimdSize); } } typedef KernelSubGroupInfoKhrParamFixture KernelSubGroupInfoKhrReturnCountTest; INSTANTIATE_TEST_CASE_P(wgs, KernelSubGroupInfoKhrReturnCountTest, ::testing::ValuesIn(KernelSubGroupInfoKhrWGS)); TEST_P(KernelSubGroupInfoKhrReturnCountTest, GivenLwsParameterWhenGettingSubGroupCountThenCorrectValueIsReturned) { paramValueSizeRet = 0; inputValue[0] = GetParam().gwsX; inputValue[1] = GetParam().gwsY; inputValue[2] = GetParam().gwsZ; CalculatedWGS = inputValue[0] * inputValue[1] * inputValue[2]; retVal = clGetKernelSubGroupInfoKHR( pKernel, pClDevice, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, sizeof(size_t) * 3, inputValue, sizeof(size_t), ¶mValue, ¶mValueSizeRet); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); if (CalculatedWGS % MaxSimdSize == 0) { EXPECT_EQ(paramValue, CalculatedWGS / MaxSimdSize); } else { EXPECT_EQ(paramValue, (CalculatedWGS / MaxSimdSize) + 1); } } typedef KernelSubGroupInfoKhrParamFixture KernelSubGroupInfoKhrReturnCompileSizeTest; TEST_F(KernelSubGroupInfoKhrReturnCompileSizeTest, GivenKernelWhenGettingRequiredSubGroupSizeThenCorrectValueIsReturned) { retVal = clGetKernelSubGroupInfoKHR( pKernel, pClDevice, CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL, 0, nullptr, sizeof(size_t), ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); size_t requiredSubGroupSize = 0; auto start = pKernel->getKernelInfo().attributes.find("intel_reqd_sub_group_size("); if (start != std::string::npos) { start += strlen("intel_reqd_sub_group_size("); auto stop = pKernel->getKernelInfo().attributes.find(")", start); requiredSubGroupSize = stoi(pKernel->getKernelInfo().attributes.substr(start, stop - start)); } EXPECT_EQ(paramValue, requiredSubGroupSize); } TEST_F(KernelSubGroupInfoKhrTest, GivenNullKernelWhenGettingKernelSubGroupInfoThenInvalidKernelErrorIsReturned) { retVal = clGetKernelSubGroupInfoKHR( nullptr, pClDevice, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_INVALID_KERNEL); } TEST_F(KernelSubGroupInfoKhrTest, GivenNullDeviceWhenGettingKernelSubGroupInfoThenInvalidDeviceErrorIsReturned) { retVal = clGetKernelSubGroupInfoKHR( pKernel, nullptr, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_INVALID_DEVICE); } TEST_F(KernelSubGroupInfoKhrTest, GivenInvalidParamNameWhenGettingKernelSubGroupInfoThenInvalidValueErrorIsReturned) { retVal = clGetKernelSubGroupInfoKHR( pKernel, pClDevice, 0, sizeof(size_t), inputValue, sizeof(size_t), ¶mValue, nullptr); EXPECT_EQ(retVal, CL_INVALID_VALUE); } uint32_t /*cl_kernel_sub_group_info_khr*/ KernelSubGroupInfoKhrInputParams[] = { CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR}; typedef KernelSubGroupInfoKhrParamFixture KernelSubGroupInfoKhrInputParamsTest; INSTANTIATE_TEST_CASE_P(KernelSubGroupInfoKhrInputParams, KernelSubGroupInfoKhrInputParamsTest, ::testing::ValuesIn(KernelSubGroupInfoKhrInputParams)); TEST_P(KernelSubGroupInfoKhrInputParamsTest, GivenInvalidInputWhenGettingKernelSubGroupInfoThenInvalidValueErrorIsReturned) { // work dim == 0 retVal = clGetKernelSubGroupInfoKHR( pKernel, pClDevice, GetParam(), 0, inputValue, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_INVALID_VALUE); // work dim % sizeof(size_t) != 0 retVal = clGetKernelSubGroupInfoKHR( pKernel, pClDevice, GetParam(), (sizeof(size_t) * MaxWorkDim) - 1, inputValue, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_INVALID_VALUE); // work dim > MaxWorkDim retVal = clGetKernelSubGroupInfoKHR( pKernel, pClDevice, GetParam(), sizeof(size_t) * (MaxWorkDim + 1), inputValue, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_INVALID_VALUE); // null input_value retVal = clGetKernelSubGroupInfoKHR( pKernel, pClDevice, GetParam(), sizeof(size_t) * (MaxWorkDim), nullptr, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_INVALID_VALUE); } TEST_P(KernelSubGroupInfoKhrInputParamsTest, GivenInvalidParamSizeWhenGettingKernelSubGroupInfoThenInvalidValueErrorIsReturned) { //param_value_size < sizeof(size_t) retVal = clGetKernelSubGroupInfoKHR( pKernel, pClDevice, GetParam(), sizeof(size_t), inputValue, sizeof(size_t) - 1, ¶mValue, nullptr); EXPECT_EQ(retVal, CL_INVALID_VALUE); } TEST_P(KernelSubGroupInfoKhrInputParamsTest, GivenNoReturnPointerWhenGettingKernelSubGroupInfoThenSuccessIsReturned) { retVal = clGetKernelSubGroupInfoKHR( pKernel, pClDevice, GetParam(), sizeof(size_t), inputValue, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl000066400000000000000000000475061363734646600322420ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" using namespace NEO; struct KernelSubGroupInfoFixture : HelloWorldFixture { typedef HelloWorldFixture ParentClass; void SetUp() override { ParentClass::SetUp(); pKernel->maxKernelWorkGroupSize = static_cast(pDevice->getDeviceInfo().maxWorkGroupSize / 2); maxSimdSize = static_cast(pKernel->getKernelInfo().getMaxSimdSize()); ASSERT_LE(8u, maxSimdSize); maxWorkDim = static_cast(pClDevice->getDeviceInfo().maxWorkItemDimensions); ASSERT_EQ(3u, maxWorkDim); maxWorkGroupSize = static_cast(pKernel->maxKernelWorkGroupSize); ASSERT_GE(1024u, maxWorkGroupSize); largestCompiledSIMDSize = static_cast(pKernel->getKernelInfo().patchInfo.executionEnvironment->LargestCompiledSIMDSize); ASSERT_EQ(32u, largestCompiledSIMDSize); auto requiredWorkGroupSizeX = static_cast(pKernel->getKernelInfo().patchInfo.executionEnvironment->RequiredWorkGroupSizeX); auto requiredWorkGroupSizeY = static_cast(pKernel->getKernelInfo().patchInfo.executionEnvironment->RequiredWorkGroupSizeY); auto requiredWorkGroupSizeZ = static_cast(pKernel->getKernelInfo().patchInfo.executionEnvironment->RequiredWorkGroupSizeZ); calculatedMaxWorkgroupSize = requiredWorkGroupSizeX * requiredWorkGroupSizeY * requiredWorkGroupSizeZ; if ((calculatedMaxWorkgroupSize == 0) || (calculatedMaxWorkgroupSize > static_cast(pKernel->maxKernelWorkGroupSize))) { calculatedMaxWorkgroupSize = static_cast(pKernel->maxKernelWorkGroupSize); } } void TearDown() override { ParentClass::TearDown(); } size_t inputValue[3]; size_t paramValue[3]; size_t paramValueSizeRet; size_t maxSimdSize; size_t maxWorkDim; size_t maxWorkGroupSize; size_t largestCompiledSIMDSize; size_t calculatedMaxWorkgroupSize; }; namespace ULT { typedef Test KernelSubGroupInfoTest; template struct KernelSubGroupInfoParamFixture : KernelSubGroupInfoFixture, ::testing::TestWithParam { void SetUp() override { KernelSubGroupInfoFixture::SetUp(); } void TearDown() override { KernelSubGroupInfoFixture::TearDown(); } }; static size_t WorkDimensions[] = {1, 2, 3}; static struct WorkSizeParam { size_t x; size_t y; size_t z; } KernelSubGroupInfoWGS[] = { {0, 0, 0}, {1, 1, 1}, {1, 5, 1}, {8, 1, 1}, {16, 1, 1}, {32, 1, 1}, {64, 1, 1}, {1, 190, 1}, {1, 510, 1}, {512, 1, 1}}; typedef KernelSubGroupInfoParamFixture> KernelSubGroupInfoReturnSizeTest; INSTANTIATE_TEST_CASE_P(wgs, KernelSubGroupInfoReturnSizeTest, ::testing::Combine( ::testing::ValuesIn(KernelSubGroupInfoWGS), ::testing::ValuesIn(WorkDimensions))); TEST_P(KernelSubGroupInfoReturnSizeTest, GivenWorkGroupSizeWhenGettingMaxSubGroupSizeThenReturnIsCalculatedCorrectly) { if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { WorkSizeParam workSize; size_t workDim; std::tie(workSize, workDim) = GetParam(); memset(inputValue, 0, sizeof(inputValue)); inputValue[0] = workSize.x; if (workDim > 1) { inputValue[1] = workSize.y; } if (workDim > 2) { inputValue[2] = workSize.z; } paramValueSizeRet = 0; retVal = clGetKernelSubGroupInfo( pKernel, pClDevice, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, sizeof(size_t) * workDim, inputValue, sizeof(size_t), paramValue, ¶mValueSizeRet); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); auto calculatedWGS = inputValue[0]; if (workDim > 1) { calculatedWGS *= inputValue[1]; } if (workDim > 2) { calculatedWGS *= inputValue[2]; } if (calculatedWGS < maxSimdSize) { EXPECT_EQ(calculatedWGS, paramValue[0]); } else { EXPECT_EQ(maxSimdSize, paramValue[0]); } } } typedef KernelSubGroupInfoParamFixture> KernelSubGroupInfoReturnCountTest; INSTANTIATE_TEST_CASE_P(wgs, KernelSubGroupInfoReturnCountTest, ::testing::Combine( ::testing::ValuesIn(KernelSubGroupInfoWGS), ::testing::ValuesIn(WorkDimensions))); TEST_P(KernelSubGroupInfoReturnCountTest, GivenWorkGroupSizeWhenGettingSubGroupCountThenReturnIsCalculatedCorrectly) { if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { WorkSizeParam workSize; size_t workDim; std::tie(workSize, workDim) = GetParam(); memset(inputValue, 0, sizeof(inputValue)); inputValue[0] = workSize.x; if (workDim > 1) { inputValue[1] = workSize.y; } if (workDim > 2) { inputValue[2] = workSize.z; } paramValueSizeRet = 0; retVal = clGetKernelSubGroupInfo( pKernel, pClDevice, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, sizeof(size_t) * workDim, inputValue, sizeof(size_t), paramValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(size_t), paramValueSizeRet); auto calculatedWGS = workSize.x; if (workDim > 1) { calculatedWGS *= workSize.y; } if (workDim > 2) { calculatedWGS *= workSize.z; } if (calculatedWGS % maxSimdSize == 0) { EXPECT_EQ(calculatedWGS / maxSimdSize, paramValue[0]); } else { EXPECT_EQ((calculatedWGS / maxSimdSize) + 1, paramValue[0]); } } } static size_t SubGroupsNumbers[] = {0, 1, 10, 12, 21, 33, 67, 99}; typedef KernelSubGroupInfoParamFixture> KernelSubGroupInfoReturnLocalSizeTest; INSTANTIATE_TEST_CASE_P(sgn, KernelSubGroupInfoReturnLocalSizeTest, ::testing::Combine( ::testing::ValuesIn(SubGroupsNumbers), ::testing::ValuesIn(WorkDimensions))); TEST_P(KernelSubGroupInfoReturnLocalSizeTest, GivenWorkGroupSizeWhenGettingLocalSizeThenReturnIsCalculatedCorrectly) { if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { size_t subGroupsNum; size_t workDim; std::tie(subGroupsNum, workDim) = GetParam(); inputValue[0] = subGroupsNum; retVal = clGetKernelSubGroupInfo( pKernel, pClDevice, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, sizeof(size_t), inputValue, sizeof(size_t) * workDim, paramValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(size_t) * workDim, paramValueSizeRet); size_t workGroupSize = subGroupsNum * largestCompiledSIMDSize; if (workGroupSize > calculatedMaxWorkgroupSize) { workGroupSize = 0; } EXPECT_EQ(workGroupSize, paramValue[0]); if (workDim > 1) { EXPECT_EQ(workGroupSize ? 1u : 0u, paramValue[1]); } if (workDim > 2) { EXPECT_EQ(workGroupSize ? 1u : 0u, paramValue[2]); } } } typedef KernelSubGroupInfoParamFixture KernelSubGroupInfoReturnMaxNumberTest; TEST_F(KernelSubGroupInfoReturnMaxNumberTest, GivenWorkGroupSizeWhenGettingMaxNumSubGroupsThenReturnIsCalculatedCorrectly) { if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { retVal = clGetKernelSubGroupInfo( pKernel, pClDevice, CL_KERNEL_MAX_NUM_SUB_GROUPS, 0, nullptr, sizeof(size_t), paramValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); EXPECT_EQ(paramValue[0], Math::divideAndRoundUp(calculatedMaxWorkgroupSize, largestCompiledSIMDSize)); } } typedef KernelSubGroupInfoParamFixture KernelSubGroupInfoReturnCompileNumberTest; TEST_F(KernelSubGroupInfoReturnCompileNumberTest, GivenKernelWhenGettingCompileNumSubGroupThenReturnIsCalculatedCorrectly) { if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { retVal = clGetKernelSubGroupInfo( pKernel, pClDevice, CL_KERNEL_COMPILE_NUM_SUB_GROUPS, 0, nullptr, sizeof(size_t), paramValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); EXPECT_EQ(paramValue[0], static_cast(pKernel->getKernelInfo().patchInfo.executionEnvironment->CompiledSubGroupsNumber)); } } typedef KernelSubGroupInfoParamFixture KernelSubGroupInfoReturnCompileSizeTest; TEST_F(KernelSubGroupInfoReturnCompileSizeTest, GivenKernelWhenGettingCompileSubGroupSizeThenReturnIsCalculatedCorrectly) { if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { retVal = clGetKernelSubGroupInfo( pKernel, pClDevice, CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL, 0, nullptr, sizeof(size_t), paramValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); size_t requiredSubGroupSize = 0; auto start = pKernel->getKernelInfo().attributes.find("intel_reqd_sub_group_size("); if (start != std::string::npos) { start += strlen("intel_reqd_sub_group_size("); auto stop = pKernel->getKernelInfo().attributes.find(")", start); requiredSubGroupSize = stoi(pKernel->getKernelInfo().attributes.substr(start, stop - start)); } EXPECT_EQ(paramValue[0], requiredSubGroupSize); } } TEST_F(KernelSubGroupInfoTest, GivenNullKernelWhenGettingSubGroupInfoThenInvalidKernelErrorIsReturned) { if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { retVal = clGetKernelSubGroupInfo( nullptr, pClDevice, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL, retVal); } } TEST_F(KernelSubGroupInfoTest, GivenNullDeviceWhenGettingSubGroupInfoThenInvalidDeviceErrorIsReturned) { if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { retVal = clGetKernelSubGroupInfo( pKernel, nullptr, 0, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } } TEST_F(KernelSubGroupInfoTest, GivenInvalidParamNameWhenGettingSubGroupInfoThenInvalidValueErrorIsReturned) { if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { retVal = clGetKernelSubGroupInfo( pKernel, pClDevice, 0, sizeof(size_t), inputValue, sizeof(size_t), paramValue, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } } uint32_t /*cl_kernel_sub_group_info*/ KernelSubGroupInfoInputParams[] = { CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, CL_KERNEL_MAX_NUM_SUB_GROUPS, CL_KERNEL_COMPILE_NUM_SUB_GROUPS, CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL}; typedef KernelSubGroupInfoParamFixture KernelSubGroupInfoInputParamsTest; INSTANTIATE_TEST_CASE_P(KernelSubGroupInfoInputParams, KernelSubGroupInfoInputParamsTest, ::testing::ValuesIn(KernelSubGroupInfoInputParams)); TEST_P(KernelSubGroupInfoInputParamsTest, GivenOpenClVersionLowerThan21WhenGettingKenrelSubGroupInfoThenInvalidValueErrorIsReturned) { bool requireOpenCL21 = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT) || (GetParam() == CL_KERNEL_MAX_NUM_SUB_GROUPS) || (GetParam() == CL_KERNEL_COMPILE_NUM_SUB_GROUPS); if (requireOpenCL21) { DebugManager.flags.ForceOCLVersion.set(20); pDevice->initializeCaps(); pClDevice->initializeCaps(); retVal = clGetKernelSubGroupInfo( pKernel, pClDevice, GetParam(), 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); DebugManager.flags.ForceOCLVersion.set(0); pDevice->initializeCaps(); pClDevice->initializeCaps(); } } TEST_P(KernelSubGroupInfoInputParamsTest, GivenWorkDimZeroWhenGettingSubGroupInfoThenSuccessOrErrorIsCorrectlyReturned) { if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) || (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT); retVal = clGetKernelSubGroupInfo( pKernel, pClDevice, GetParam(), 0, inputValue, 0, nullptr, nullptr); EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal); } } TEST_P(KernelSubGroupInfoInputParamsTest, GivenIndivisibleWorkDimWhenGettingSubGroupInfoThenSuccessOrErrorIsCorrectlyReturned) { if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) || (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT); size_t workDim = ((GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE)) ? maxWorkDim : 1; retVal = clGetKernelSubGroupInfo( pKernel, pClDevice, GetParam(), (sizeof(size_t) * workDim) - 1, inputValue, 0, nullptr, nullptr); EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal); } } TEST_P(KernelSubGroupInfoInputParamsTest, GivenWorkDimGreaterThanMaxWorkDimWhenGettingSubGroupInfoThenSuccessOrErrorIsCorrectlyReturned) { if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) || (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT); size_t workDim = ((GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE)) ? maxWorkDim : 1; retVal = clGetKernelSubGroupInfo( pKernel, pClDevice, GetParam(), sizeof(size_t) * (workDim + 1), inputValue, 0, nullptr, nullptr); EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal); } } TEST_P(KernelSubGroupInfoInputParamsTest, GivenInputValueIsNullWhenGettingSubGroupInfoThenSuccessOrErrorIsCorrectlyReturned) { if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { bool requireInput = (GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE) || (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT); size_t workDim = ((GetParam() == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE) || (GetParam() == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE)) ? maxWorkDim : 1; retVal = clGetKernelSubGroupInfo( pKernel, pClDevice, GetParam(), sizeof(size_t) * (workDim), nullptr, 0, nullptr, nullptr); EXPECT_EQ(requireInput ? CL_INVALID_VALUE : CL_SUCCESS, retVal); } } TEST_P(KernelSubGroupInfoInputParamsTest, GivenParamValueSizeZeroWhenGettingSubGroupInfoThenInvalidValueErrorIsReturned) { if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { retVal = clGetKernelSubGroupInfo( pKernel, pClDevice, GetParam(), sizeof(size_t), inputValue, 0, paramValue, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } } TEST_P(KernelSubGroupInfoInputParamsTest, GivenUnalignedParamValueSizeWhenGettingSubGroupInfoThenInvalidValueErrorIsReturned) { if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { size_t workDim = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT) ? maxWorkDim : 1; retVal = clGetKernelSubGroupInfo( pKernel, pClDevice, GetParam(), sizeof(size_t), inputValue, (sizeof(size_t) * workDim) - 1, paramValue, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } } TEST_P(KernelSubGroupInfoInputParamsTest, GivenTooLargeParamValueSizeWhenGettingSubGroupInfoThenCorrectRetValIsReturned) { if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { bool requireOutputArray = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT); size_t workDim = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT) ? maxWorkDim : 1; // paramValue size / sizeof(size_t) > MaxWorkDim retVal = clGetKernelSubGroupInfo( pKernel, pClDevice, GetParam(), sizeof(size_t), inputValue, sizeof(size_t) * (workDim + 1), paramValue, nullptr); EXPECT_EQ(requireOutputArray ? CL_INVALID_VALUE : CL_SUCCESS, retVal); } } TEST_P(KernelSubGroupInfoInputParamsTest, GivenNullPtrForReturnWhenGettingKernelSubGroupInfoThenSuccessIsReturned) { if (std::string(pClDevice->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { bool requireOutputArray = (GetParam() == CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT); retVal = clGetKernelSubGroupInfo( pKernel, pClDevice, GetParam(), sizeof(size_t), inputValue, 0, nullptr, nullptr); EXPECT_EQ(requireOutputArray ? CL_INVALID_VALUE : CL_SUCCESS, retVal); } } } // namespace ULT cl_get_kernel_suggested_local_work_size_intel_tests.inl000066400000000000000000000126561363734646600355740ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/api/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "cl_api_tests.h" using namespace NEO; using clGetKernelSuggestedLocalWorkSizeTests = api_tests; namespace ULT { TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenInvalidInputWhenCallingGetKernelSuggestedLocalWorkSizeThenErrorIsReturned) { size_t globalWorkOffset[3]; size_t globalWorkSize[3]; size_t suggestedLocalWorkSize[3]; cl_uint workDim = 1; retVal = clGetKernelSuggestedLocalWorkSizeINTEL(nullptr, pKernel, workDim, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, nullptr, workDim, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_INVALID_KERNEL, retVal); pKernel->isPatchedOverride = false; retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pKernel, workDim, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_INVALID_KERNEL, retVal); pKernel->isPatchedOverride = true; retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pKernel, workDim, globalWorkOffset, globalWorkSize, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pKernel, 0, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_INVALID_WORK_DIMENSION, retVal); retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pKernel, 4, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_INVALID_WORK_DIMENSION, retVal); retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pKernel, workDim, nullptr, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_INVALID_GLOBAL_OFFSET, retVal); retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pKernel, workDim, globalWorkOffset, nullptr, suggestedLocalWorkSize); EXPECT_EQ(CL_INVALID_GLOBAL_WORK_SIZE, retVal); } TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenVariousInputWhenGettingSuggestedLocalWorkSizeThenCorrectValuesAreReturned) { size_t globalWorkOffset[] = {0, 0, 0}; size_t globalWorkSize[] = {128, 128, 128}; size_t suggestedLocalWorkSize[] = {0, 0, 0}; Vec3 elws{0, 0, 0}; Vec3 gws{128, 128, 128}; Vec3 offset{0, 0, 0}; DispatchInfo dispatchInfo{pKernel, 1, gws, elws, offset}; auto expectedLws = computeWorkgroupSize(dispatchInfo); EXPECT_GT(expectedLws.x, 1u); retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pKernel, 1, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedLws.x, suggestedLocalWorkSize[0]); EXPECT_EQ(0u, suggestedLocalWorkSize[1]); EXPECT_EQ(0u, suggestedLocalWorkSize[2]); dispatchInfo.setDim(2); expectedLws = computeWorkgroupSize(dispatchInfo); retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pKernel, 2, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedLws.x, suggestedLocalWorkSize[0]); EXPECT_EQ(expectedLws.y, suggestedLocalWorkSize[1]); EXPECT_EQ(0u, suggestedLocalWorkSize[2]); dispatchInfo.setDim(3); expectedLws = computeWorkgroupSize(dispatchInfo); retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, pKernel, 3, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedLws.x, suggestedLocalWorkSize[0]); EXPECT_EQ(expectedLws.y, suggestedLocalWorkSize[1]); EXPECT_EQ(expectedLws.z, suggestedLocalWorkSize[2]); } TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenKernelWithExecutionEnvironmentPatchedWhenGettingSuggestedLocalWorkSizeThenCorrectValuesAreReturned) { std::unique_ptr kernelWithExecutionEnvironmentPatch(MockKernel::create(pCommandQueue->getDevice(), pProgram)); size_t globalWorkOffset[] = {0, 0, 0}; size_t globalWorkSize[] = {128, 128, 128}; size_t suggestedLocalWorkSize[] = {0, 0, 0}; cl_uint workDim = 3; Vec3 elws{0, 0, 0}; Vec3 gws{128, 128, 128}; Vec3 offset{0, 0, 0}; const DispatchInfo dispatchInfo{kernelWithExecutionEnvironmentPatch.get(), workDim, gws, elws, offset}; auto expectedLws = computeWorkgroupSize(dispatchInfo); EXPECT_GT(expectedLws.x * expectedLws.y * expectedLws.z, 1u); retVal = clGetKernelSuggestedLocalWorkSizeINTEL(pCommandQueue, kernelWithExecutionEnvironmentPatch.get(), workDim, globalWorkOffset, globalWorkSize, suggestedLocalWorkSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedLws.x, suggestedLocalWorkSize[0]); EXPECT_EQ(expectedLws.y, suggestedLocalWorkSize[1]); EXPECT_EQ(expectedLws.z, suggestedLocalWorkSize[2]); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_get_kernel_work_group_info_tests.inl000066400000000000000000000125521363734646600324240ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/device/device.h" #include "shared/source/helpers/file_io.h" #include "opencl/test/unit_test/helpers/kernel_binary_helper.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "cl_api_tests.h" using namespace NEO; struct clGetKernelWorkGroupInfoTests : public ApiFixture<>, public ::testing::TestWithParam { typedef ApiFixture BaseClass; void SetUp() override { BaseClass::SetUp(); std::unique_ptr pSource = nullptr; size_t sourceSize = 0; std::string testFile; kbHelper = new KernelBinaryHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); ASSERT_EQ(true, fileExists(testFile)); pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( pContext, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); pSource.reset(); retVal = clBuildProgram( pProgram, num_devices, devices, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); kernel = clCreateKernel(pProgram, "CopyBuffer", &retVal); ASSERT_EQ(CL_SUCCESS, retVal); } void TearDown() override { retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); delete kbHelper; BaseClass::TearDown(); } cl_program pProgram = nullptr; cl_kernel kernel = nullptr; KernelBinaryHelper *kbHelper; }; namespace ULT { TEST_P(clGetKernelWorkGroupInfoTests, GivenValidParametersWhenGettingKernelWorkGroupInfoThenSuccessIsReturned) { size_t paramValueSizeRet; retVal = clGetKernelWorkGroupInfo( kernel, devices[testedRootDeviceIndex], GetParam(), 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(0u, paramValueSizeRet); } TEST_F(clGetKernelWorkGroupInfoTests, GivenKernelRequiringScratchSpaceWhenGettingKernelWorkGroupInfoThenCorrectSpillMemSizeIsReturned) { size_t paramValueSizeRet; cl_ulong param_value; auto pDevice = castToObject(devices[testedRootDeviceIndex]); MockKernelWithInternals mockKernel(*pDevice); SPatchMediaVFEState mediaVFEstate; mediaVFEstate.PerThreadScratchSpace = 1024; //whatever greater than 0 mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate; cl_ulong scratchSpaceSize = static_cast(mockKernel.mockKernel->getScratchSize()); EXPECT_EQ(scratchSpaceSize, 1024u); retVal = clGetKernelWorkGroupInfo( mockKernel, pDevice, CL_KERNEL_SPILL_MEM_SIZE_INTEL, sizeof(cl_ulong), ¶m_value, ¶mValueSizeRet); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(paramValueSizeRet, sizeof(cl_ulong)); EXPECT_EQ(param_value, scratchSpaceSize); } TEST_F(clGetKernelWorkGroupInfoTests, givenKernelHavingPrivateMemoryAllocationWhenAskedForPrivateAllocationSizeThenProperSizeIsReturned) { size_t paramValueSizeRet; cl_ulong param_value; auto pDevice = castToObject(devices[testedRootDeviceIndex]); MockKernelWithInternals mockKernel(*pDevice); SPatchAllocateStatelessPrivateSurface privateAllocation; privateAllocation.PerThreadPrivateMemorySize = 1024; mockKernel.kernelInfo.patchInfo.pAllocateStatelessPrivateSurface = &privateAllocation; retVal = clGetKernelWorkGroupInfo( mockKernel, pDevice, CL_KERNEL_PRIVATE_MEM_SIZE, sizeof(cl_ulong), ¶m_value, ¶mValueSizeRet); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(paramValueSizeRet, sizeof(cl_ulong)); EXPECT_EQ(param_value, privateAllocation.PerThreadPrivateMemorySize); } TEST_F(clGetKernelWorkGroupInfoTests, givenKernelNotHavingPrivateMemoryAllocationWhenAskedForPrivateAllocationSizeThenZeroIsReturned) { size_t paramValueSizeRet; cl_ulong param_value; auto pDevice = castToObject(devices[testedRootDeviceIndex]); MockKernelWithInternals mockKernel(*pDevice); retVal = clGetKernelWorkGroupInfo( mockKernel, pDevice, CL_KERNEL_PRIVATE_MEM_SIZE, sizeof(cl_ulong), ¶m_value, ¶mValueSizeRet); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(paramValueSizeRet, sizeof(cl_ulong)); EXPECT_EQ(param_value, 0u); } static cl_kernel_work_group_info paramNames[] = { CL_KERNEL_WORK_GROUP_SIZE, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, CL_KERNEL_LOCAL_MEM_SIZE, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, CL_KERNEL_SPILL_MEM_SIZE_INTEL, CL_KERNEL_PRIVATE_MEM_SIZE}; INSTANTIATE_TEST_CASE_P( api, clGetKernelWorkGroupInfoTests, testing::ValuesIn(paramNames)); } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_get_mem_object_info_tests.inl000066400000000000000000000060301363734646600307640ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/device/device.h" #include "shared/source/helpers/file_io.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clGetMemObjectInfoTests; namespace ULT { TEST_F(clGetMemObjectInfoTests, GivenValidBufferWhenGettingMemObjectInfoThenCorrectBufferSizeIsReturned) { size_t bufferSize = 16; cl_mem buffer = nullptr; buffer = clCreateBuffer( pContext, 0, bufferSize, NULL, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); size_t paramValue = 0; retVal = clGetMemObjectInfo(buffer, CL_MEM_SIZE, sizeof(paramValue), ¶mValue, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(bufferSize, paramValue); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clGetMemObjectInfoTests, GivenBufferWithMappedRegionWhenGettingMemObjectInfoThenCorrectMapCountIsReturned) { size_t bufferSize = 16; cl_mem buffer = nullptr; cl_queue_properties properties = 0; cl_command_queue cmdQ = clCreateCommandQueue(pContext, devices[testedRootDeviceIndex], properties, &retVal); buffer = clCreateBuffer( pContext, 0, bufferSize, NULL, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); clEnqueueMapBuffer( cmdQ, buffer, CL_TRUE, CL_MAP_WRITE, 0, 8, 0, nullptr, nullptr, nullptr); cl_uint paramValue = 0; retVal = clGetMemObjectInfo(buffer, CL_MEM_MAP_COUNT, sizeof(paramValue), ¶mValue, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(1u, paramValue); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseCommandQueue(cmdQ); } TEST_F(clGetMemObjectInfoTests, GivenBufferCreatedFromSvmPointerWhenGettingMemObjectInfoThenClTrueIsReturned) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { size_t bufferSize = 64; cl_mem buffer = nullptr; auto ptr = clSVMAlloc(pContext, CL_MEM_READ_WRITE, bufferSize, 64); ASSERT_NE(nullptr, ptr); buffer = clCreateBuffer( pContext, CL_MEM_USE_HOST_PTR, bufferSize, ptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); cl_bool paramValue = CL_FALSE; retVal = clGetMemObjectInfo(buffer, CL_MEM_USES_SVM_POINTER, sizeof(paramValue), ¶mValue, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(static_cast(CL_TRUE), paramValue); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, ptr); } } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_get_pipe_info_tests.inl000066400000000000000000000103311363734646600276140ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "opencl/source/mem_obj/pipe.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clGetPipeInfoTests; namespace ULT { TEST_F(clGetPipeInfoTests, GivenValidPipeWithPacketSizeOneWhenGettingPipeInfoThenPacketSizeReturnedIsOne) { auto pipe = clCreatePipe(pContext, CL_MEM_READ_WRITE, 1, 20, nullptr, &retVal); EXPECT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, retVal); cl_uint paramValue = 0; size_t paramValueRetSize = 0; retVal = clGetPipeInfo(pipe, CL_PIPE_PACKET_SIZE, sizeof(paramValue), ¶mValue, ¶mValueRetSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValue, 1u); EXPECT_EQ(paramValueRetSize, sizeof(cl_uint)); clReleaseMemObject(pipe); } TEST_F(clGetPipeInfoTests, GivenValidPipeWithMaxPacketEqualTwentyWhenGettingPipeInfoThenMaxPacketReturnedIsTwenty) { auto pipe = clCreatePipe(pContext, CL_MEM_READ_WRITE, 1, 20, nullptr, &retVal); EXPECT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, retVal); cl_uint paramValue = 0; size_t paramValueRetSize = 0; retVal = clGetPipeInfo(pipe, CL_PIPE_MAX_PACKETS, sizeof(paramValue), ¶mValue, ¶mValueRetSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValue, 20u); EXPECT_EQ(paramValueRetSize, sizeof(cl_uint)); clReleaseMemObject(pipe); } TEST_F(clGetPipeInfoTests, GivenInvalidParamNameWhenGettingPipeInfoThenClInvalidValueErrorIsReturned) { auto pipe = clCreatePipe(pContext, CL_MEM_READ_WRITE, 1, 20, nullptr, &retVal); EXPECT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, retVal); cl_uint paramValue = 0; size_t paramValueRetSize = 0; retVal = clGetPipeInfo(pipe, CL_MEM_READ_WRITE, sizeof(paramValue), ¶mValue, ¶mValueRetSize); EXPECT_EQ(CL_INVALID_VALUE, retVal); clReleaseMemObject(pipe); } TEST_F(clGetPipeInfoTests, GivenInvalidMemoryObjectWhenGettingPipeInfoThenClInvalidMemObjectErrorIsReturned) { cl_uint paramValue = 0; size_t paramValueRetSize = 0; char fakeMemoryObj[sizeof(Pipe)]; retVal = clGetPipeInfo((cl_mem)&fakeMemoryObj[0], CL_MEM_READ_WRITE, sizeof(paramValue), ¶mValue, ¶mValueRetSize); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(clGetPipeInfoTests, GivenNullParamValueWhenGettingPipeInfoThenClSuccessIsReturned) { auto pipe = clCreatePipe(pContext, CL_MEM_READ_WRITE, 1, 20, nullptr, &retVal); EXPECT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, retVal); size_t paramValueRetSize = 0; cl_uint paramValue = 0; retVal = clGetPipeInfo(pipe, CL_PIPE_MAX_PACKETS, sizeof(paramValue), nullptr, ¶mValueRetSize); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseMemObject(pipe); } TEST_F(clGetPipeInfoTests, GivenNullParamValueSizeRetWhenGettingPipeInfoThenClSuccessIsReturned) { auto pipe = clCreatePipe(pContext, CL_MEM_READ_WRITE, 1, 20, nullptr, &retVal); EXPECT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, retVal); cl_uint paramValue = 0; retVal = clGetPipeInfo(pipe, CL_PIPE_MAX_PACKETS, sizeof(paramValue), ¶mValue, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseMemObject(pipe); } TEST_F(clGetPipeInfoTests, GivenParamValueSizeRetTooSmallWhenGettingPipeInfoThenClInvalidValueErrorIsReturned) { auto pipe = clCreatePipe(pContext, CL_MEM_READ_WRITE, 1, 20, nullptr, &retVal); EXPECT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, retVal); uint16_t paramValue = 0; size_t paramValueRetSize = 0; retVal = clGetPipeInfo(pipe, CL_PIPE_PACKET_SIZE, sizeof(paramValue), ¶mValue, ¶mValueRetSize); EXPECT_EQ(CL_INVALID_VALUE, retVal); clReleaseMemObject(pipe); } TEST_F(clGetPipeInfoTests, GivenBufferInsteadOfPipeWhenGettingPipeInfoThenClInvalidMemObjectErrorIsReturned) { auto buffer = clCreateBuffer(pContext, CL_MEM_READ_WRITE, 20, nullptr, &retVal); EXPECT_NE(nullptr, buffer); EXPECT_EQ(CL_SUCCESS, retVal); cl_uint paramValue = 0; size_t paramValueRetSize = 0; retVal = clGetPipeInfo(buffer, CL_PIPE_PACKET_SIZE, sizeof(paramValue), ¶mValue, ¶mValueRetSize); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); clReleaseMemObject(buffer); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_get_platform_ids_tests.inl000066400000000000000000000103451363734646600303340ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/device_factory.h" #include "shared/test/unit_test/helpers/ult_hw_config.h" #include "opencl/source/context/context.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clGetPlatformIDsTests; namespace ULT { TEST_F(clGetPlatformIDsTests, GivenNullPlatformWhenGettingPlatformIdsThenNumberofPlatformsIsReturned) { cl_int retVal = CL_SUCCESS; cl_uint numPlatforms = 0; retVal = clGetPlatformIDs(0, nullptr, &numPlatforms); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(numPlatforms, 0u); } TEST_F(clGetPlatformIDsTests, GivenPlatformsWhenGettingPlatformIdsThenPlatformsIdIsReturned) { cl_int retVal = CL_SUCCESS; cl_platform_id platform = nullptr; retVal = clGetPlatformIDs(1, &platform, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, platform); } TEST_F(clGetPlatformIDsTests, GivenNumEntriesZeroAndPlatformNotNullWhenGettingPlatformIdsThenClInvalidValueErrorIsReturned) { cl_int retVal = CL_SUCCESS; cl_platform_id platform = nullptr; retVal = clGetPlatformIDs(0, &platform, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST(clGetPlatformIDsNegativeTests, GivenFailedInitializationWhenGettingPlatformIdsThenClOutOfHostMemoryErrorIsReturned) { platformsImpl.clear(); VariableBackup backup{&ultHwConfig}; ultHwConfig.mockedPrepareDeviceEnvironmentsFuncResult = false; cl_int retVal = CL_SUCCESS; cl_platform_id platformRet = nullptr; cl_uint numPlatforms = 0; retVal = clGetPlatformIDs(1, &platformRet, &numPlatforms); EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal); EXPECT_EQ(0u, numPlatforms); EXPECT_EQ(nullptr, platformRet); platformsImpl.clear(); } TEST(clGetPlatformIDsNegativeTests, whenFailToCreateDeviceThenClGetPlatfomsIdsReturnsOutOfHostMemoryError) { VariableBackup createFuncBackup{&DeviceFactory::createRootDeviceFunc}; DeviceFactory::createRootDeviceFunc = [](ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) -> std::unique_ptr { return nullptr; }; platformsImpl.clear(); cl_int retVal = CL_SUCCESS; cl_platform_id platformRet = nullptr; cl_uint numPlatforms = 0; retVal = clGetPlatformIDs(1, &platformRet, &numPlatforms); EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal); EXPECT_EQ(0u, numPlatforms); EXPECT_EQ(nullptr, platformRet); platformsImpl.clear(); } TEST(clGetPlatformIDsNegativeTests, whenFailToCreatePlatformThenClGetPlatfomsIdsReturnsOutOfHostMemoryError) { VariableBackup createFuncBackup{&Platform::createFunc}; Platform::createFunc = [](ExecutionEnvironment &executionEnvironment) -> std::unique_ptr { return nullptr; }; platformsImpl.clear(); cl_int retVal = CL_SUCCESS; cl_platform_id platformRet = nullptr; cl_uint numPlatforms = 0; retVal = clGetPlatformIDs(1, &platformRet, &numPlatforms); EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal); EXPECT_EQ(0u, numPlatforms); EXPECT_EQ(nullptr, platformRet); platformsImpl.clear(); } TEST(clGetPlatformIDsNegativeTests, whenFailToInitializePlatformThenClGetPlatfomsIdsReturnsOutOfHostMemoryError) { VariableBackup createFuncBackup{&Platform::createFunc}; struct FailingPlatform : public Platform { using Platform::Platform; bool initialize(std::vector> devices) override { return false; } }; Platform::createFunc = [](ExecutionEnvironment &executionEnvironment) -> std::unique_ptr { return std::make_unique(executionEnvironment); }; platformsImpl.clear(); cl_int retVal = CL_SUCCESS; cl_platform_id platformRet = nullptr; cl_uint numPlatforms = 0; retVal = clGetPlatformIDs(1, &platformRet, &numPlatforms); EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal); EXPECT_EQ(0u, numPlatforms); EXPECT_EQ(nullptr, platformRet); platformsImpl.clear(); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_get_platform_ids_tests_mt.cpp000066400000000000000000000013571363734646600310370ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "opencl/source/platform/platform.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clGetPlatformIDsMtTests; namespace ULT { TEST_F(clGetPlatformIDsMtTests, GivenSeparateThreadWhenGettingPlatformIdThenPlatformIdIsCorrect) { cl_int retVal = CL_SUCCESS; cl_platform_id platform = nullptr; cl_platform_id threadPlatform = nullptr; std::thread t1([&] { clGetPlatformIDs(1, &threadPlatform, nullptr); }); retVal = clGetPlatformIDs(1, &platform, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); t1.join(); EXPECT_EQ(threadPlatform, platform); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_get_platform_info_tests.inl000066400000000000000000000160561363734646600305150ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" #include "opencl/source/platform/platform.h" #include "test.h" #include "CL/cl_ext.h" #include "cl_api_tests.h" using namespace NEO; struct clGetPlatformInfoTests : public api_tests { void SetUp() override { api_tests::SetUp(); } void TearDown() override { delete[] paramValue; api_tests::TearDown(); } char *getPlatformInfoString(Platform *pPlatform, cl_platform_info paramName) { size_t retSize; auto retVal = clGetPlatformInfo(pPlatform, paramName, 0, nullptr, &retSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(retSize, 0u); auto value = new char[retSize]; retVal = clGetPlatformInfo(pPlatform, paramName, retSize, value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); return value; } char *paramValue = nullptr; }; namespace ULT { TEST_F(clGetPlatformInfoTests, GivenClPlatformProfileWhenGettingPlatformInfoStringThenFullProfileIsReturned) { paramValue = getPlatformInfoString(pPlatform, CL_PLATFORM_PROFILE); EXPECT_STREQ(paramValue, "FULL_PROFILE"); } class clGetPlatformInfoParameterizedTests : public clGetPlatformInfoTests, public ::testing::WithParamInterface { void SetUp() override { DebugManager.flags.ForceOCLVersion.set(GetParam()); clGetPlatformInfoTests::SetUp(); } void TearDown() override { clGetPlatformInfoTests::TearDown(); DebugManager.flags.ForceOCLVersion.set(0); } }; TEST_P(clGetPlatformInfoParameterizedTests, GivenClPlatformVersionWhenGettingPlatformInfoStringThenCorrectOpenClVersionIsReturned) { paramValue = getPlatformInfoString(pPlatform, CL_PLATFORM_VERSION); std::string deviceVer; switch (GetParam()) { case 21: deviceVer = "OpenCL 2.1 "; break; case 20: deviceVer = "OpenCL 2.0 "; break; case 12: default: deviceVer = "OpenCL 1.2 "; break; } EXPECT_STREQ(paramValue, deviceVer.c_str()); } INSTANTIATE_TEST_CASE_P(OCLVersions, clGetPlatformInfoParameterizedTests, ::testing::Values(12, 20, 21)); TEST_F(clGetPlatformInfoTests, GivenClPlatformNameWhenGettingPlatformInfoStringThenCorrectStringIsReturned) { paramValue = getPlatformInfoString(pPlatform, CL_PLATFORM_NAME); EXPECT_STREQ(paramValue, "Intel(R) OpenCL HD Graphics"); } TEST_F(clGetPlatformInfoTests, GivenClPlatformVendorWhenGettingPlatformInfoStringThenCorrectStringIsReturned) { paramValue = getPlatformInfoString(pPlatform, CL_PLATFORM_VENDOR); EXPECT_STREQ(paramValue, "Intel(R) Corporation"); } TEST_F(clGetPlatformInfoTests, GivenClPlatformExtensionsWhenGettingPlatformInfoStringThenExtensionStringIsReturned) { paramValue = getPlatformInfoString(pPlatform, CL_PLATFORM_EXTENSIONS); EXPECT_NE(nullptr, strstr(paramValue, "cl_khr_icd ")); EXPECT_NE(nullptr, strstr(paramValue, "cl_khr_fp16 ")); } TEST_F(clGetPlatformInfoTests, GivenClPlatformIcdSuffixKhrWhenGettingPlatformInfoStringThenIntelIsReturned) { paramValue = getPlatformInfoString(pPlatform, CL_PLATFORM_ICD_SUFFIX_KHR); EXPECT_STREQ(paramValue, "INTEL"); } TEST_F(clGetPlatformInfoTests, GivenClPlatformHostTimerResolutionWhenGettingPlatformInfoStringThenCorrectResolutionIsReturned) { auto retVal = clGetPlatformInfo(pPlatform, CL_PLATFORM_HOST_TIMER_RESOLUTION, 0, nullptr, &retSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(retSize, 0u); cl_ulong value = 0; retVal = clGetPlatformInfo(pPlatform, CL_PLATFORM_HOST_TIMER_RESOLUTION, retSize, &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto device = pPlatform->getClDevice(0); cl_ulong resolution = static_cast(device->getPlatformHostTimerResolution()); EXPECT_EQ(resolution, value); } TEST_F(clGetPlatformInfoTests, GivenNullPlatformWhenGettingPlatformInfoStringThenClInvalidPlatformErrorIsReturned) { char extensions[512]; retVal = clGetPlatformInfo( nullptr, // invalid platform CL_PLATFORM_EXTENSIONS, sizeof(extensions), extensions, &retSize); EXPECT_EQ(CL_INVALID_PLATFORM, retVal); } TEST_F(clGetPlatformInfoTests, GivenInvalidParamNameWhenGettingPlatformInfoStringThenClInvalidValueErrorIsReturned) { char extensions[512]; retVal = clGetPlatformInfo( pPlatform, 0, // invalid platform info enum sizeof(extensions), extensions, &retSize); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clGetPlatformInfoTests, GivenInvalidParamSizeWhenGettingPlatformInfoStringThenClInvalidValueErrorIsReturned) { char extensions[512]; retVal = clGetPlatformInfo( pPlatform, CL_PLATFORM_EXTENSIONS, 0, // invalid size extensions, &retSize); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clGetPlatformInfoTests, GivenDeviceWhenGettingIcdDispatchTableThenDeviceAndPlatformTablesMatch) { EXPECT_NE(pPlatform->dispatch.icdDispatch, nullptr); for (size_t deviceOrdinal = 0; deviceOrdinal < pPlatform->getNumDevices(); ++deviceOrdinal) { auto device = pPlatform->getClDevice(deviceOrdinal); ASSERT_NE(nullptr, device); EXPECT_EQ(pPlatform->dispatch.icdDispatch, device->dispatch.icdDispatch); } } class GetPlatformInfoTests : public PlatformFixture, public testing::TestWithParam { using PlatformFixture::SetUp; public: GetPlatformInfoTests() {} protected: void SetUp() override { platformInfo = GetParam(); PlatformFixture::SetUp(); } void TearDown() override { PlatformFixture::TearDown(); } char *getPlatformInfoString(Platform *pPlatform, cl_platform_info paramName) { size_t retSize; auto retVal = clGetPlatformInfo(pPlatform, paramName, 0, nullptr, &retSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(retSize, 0u); auto value = new char[retSize]; retVal = clGetPlatformInfo(pPlatform, paramName, retSize, value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); return value; } cl_int retVal = CL_SUCCESS; size_t retSize = 0; cl_platform_info platformInfo = 0; const HardwareInfo *pHwInfo = nullptr; }; TEST_P(GetPlatformInfoTests, GivenValidParamWhenGettingPlatformInfoStringThenNonEmptyStringIsReturned) { auto paramValue = getPlatformInfoString(pPlatform, platformInfo); EXPECT_STRNE(paramValue, ""); delete[] paramValue; } const cl_platform_info PlatformInfoTestValues[] = { CL_PLATFORM_PROFILE, CL_PLATFORM_VERSION, CL_PLATFORM_NAME, CL_PLATFORM_VENDOR, CL_PLATFORM_EXTENSIONS, CL_PLATFORM_ICD_SUFFIX_KHR, }; INSTANTIATE_TEST_CASE_P(api, GetPlatformInfoTests, ::testing::ValuesIn(PlatformInfoTestValues)); } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_get_program_build_info_tests.inl000066400000000000000000000105261363734646600315130ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/compiler_interface/intermediate_representations.h" #include "shared/source/device/device.h" #include "shared/source/device_binary_format/elf/elf.h" #include "shared/source/device_binary_format/elf/elf_encoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/helpers/file_io.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/helpers/kernel_binary_helper.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clGetProgramBuildInfoTests; namespace ULT { TEST_F(clGetProgramBuildInfoTests, givenSourceWhenclGetProgramBuildInfoIsCalledThenReturnClBuildNone) { cl_program pProgram = nullptr; std::unique_ptr pSource = nullptr; size_t sourceSize = 0; std::string testFile; KernelBinaryHelper kbHelper("CopyBuffer_simd16"); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( pContext, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); cl_build_status buildStatus; retVal = clGetProgramBuildInfo(pProgram, devices[testedRootDeviceIndex], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_BUILD_NONE, buildStatus); retVal = clCompileProgram( pProgram, num_devices, devices, nullptr, 0, nullptr, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clGetProgramBuildInfo(pProgram, devices[testedRootDeviceIndex], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_BUILD_SUCCESS, buildStatus); retVal = clBuildProgram( pProgram, num_devices, devices, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clGetProgramBuildInfo(pProgram, devices[testedRootDeviceIndex], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_BUILD_SUCCESS, buildStatus); // try to get program build info for invalid program object - should fail retVal = clGetProgramBuildInfo(nullptr, devices[testedRootDeviceIndex], CL_PROGRAM_BUILD_STATUS, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clGetProgramBuildInfoTests, givenElfBinaryWhenclGetProgramBuildInfoIsCalledThenReturnClBuildNone) { cl_program pProgram = nullptr; cl_int binaryStatus = CL_INVALID_VALUE; NEO::Elf::ElfEncoder elfEncoder; elfEncoder.getElfFileHeader().type = NEO::Elf::ET_OPENCL_LIBRARY; const uint8_t data[4] = {}; elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_OPTIONS, NEO::Elf::SectionNamesOpenCl::buildOptions, data); elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_SPIRV, NEO::Elf::SectionNamesOpenCl::spirvObject, ArrayRef::fromAny(NEO::spirvMagic.begin(), NEO::spirvMagic.size())); auto elfBinary = elfEncoder.encode(); const size_t binarySize = elfBinary.size(); const unsigned char *elfBinaryTemp = reinterpret_cast(elfBinary.data()); pProgram = clCreateProgramWithBinary( pContext, num_devices, devices, &binarySize, &elfBinaryTemp, &binaryStatus, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, pProgram); EXPECT_EQ(CL_SUCCESS, binaryStatus); cl_build_status buildStatus; retVal = clGetProgramBuildInfo(pProgram, devices[testedRootDeviceIndex], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_BUILD_NONE, buildStatus); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_get_program_info_tests.inl000066400000000000000000000106631363734646600303360ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/device/device.h" #include "shared/source/helpers/file_io.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/helpers/kernel_binary_helper.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clGetProgramInfoTests; namespace ULT { TEST_F(clGetProgramInfoTests, SuccessfulProgramWithSource) { cl_program pProgram = nullptr; std::unique_ptr pSource = nullptr; size_t sourceSize = 0; std::string testFile; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( pContext, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clBuildProgram( pProgram, num_devices, devices, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); cl_uint numDevices; retVal = clGetProgramInfo(pProgram, CL_PROGRAM_NUM_DEVICES, sizeof(numDevices), &numDevices, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, numDevices); cl_device_id programDevices; retVal = clGetProgramInfo(pProgram, CL_PROGRAM_DEVICES, sizeof(programDevices), &programDevices, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(devices[testedRootDeviceIndex], programDevices); size_t length = 0; char buffer[10240]; retVal = clGetProgramInfo(pProgram, CL_PROGRAM_SOURCE, 0, nullptr, &length); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sourceSize + 1, length); retVal = clGetProgramInfo(pProgram, CL_PROGRAM_SOURCE, sizeof(buffer), buffer, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(strlen(pSource.get()), strlen(buffer)); // try to get program info for invalid program object - should fail retVal = clGetProgramInfo(nullptr, CL_PROGRAM_SOURCE, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); // set paramValueSizeRet to 0 for IL program queries on non-IL programs size_t sourceSizeRet = sourceSize; retVal = clGetProgramInfo(pProgram, CL_PROGRAM_IL, 0, nullptr, &sourceSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, sourceSizeRet); retVal = clGetProgramInfo(pProgram, CL_PROGRAM_IL, sourceSizeRet, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clGetProgramInfoTests, SuccessfulProgramWithIL) { const size_t binarySize = 16; const uint32_t spirv[binarySize] = {0x03022307}; cl_int err = CL_INVALID_VALUE; cl_program pProgram = clCreateProgramWithIL(pContext, spirv, sizeof(spirv), &err); EXPECT_EQ(CL_SUCCESS, err); EXPECT_NE(nullptr, pProgram); uint32_t output[binarySize] = {}; size_t outputSize = 0; retVal = clGetProgramInfo(pProgram, CL_PROGRAM_IL, sizeof(output), output, &outputSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(spirv), outputSize); EXPECT_EQ(0, memcmp(spirv, output, outputSize)); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clGetProgramInfoTests, GivenSPIRVProgramWhenGettingProgramSourceThenReturnNullString) { const size_t binarySize = 16; const uint32_t spirv[binarySize] = {0x03022307}; cl_int err = CL_INVALID_VALUE; cl_program pProgram = clCreateProgramWithIL(pContext, spirv, sizeof(spirv), &err); EXPECT_EQ(CL_SUCCESS, err); EXPECT_NE(nullptr, pProgram); size_t outputSize = 0; uint32_t output[binarySize] = {}; const char reference[sizeof(output)] = {}; retVal = clGetProgramInfo(pProgram, CL_PROGRAM_SOURCE, 0, nullptr, &outputSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, outputSize); retVal = clGetProgramInfo(pProgram, CL_PROGRAM_SOURCE, sizeof(output), output, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, memcmp(output, reference, sizeof(output))); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_get_supported_image_formats_tests.inl000066400000000000000000000102751363734646600325750ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clGetSupportedImageFormatsTests; TEST_F(clGetSupportedImageFormatsTests, GivenValidParamsWhenGettingSupportImageFormatsThenNumImageFormatsIsGreaterThanZero) { if (!pContext->getDevice(0)->getSharedDeviceInfo().imageSupport) { GTEST_SKIP(); } cl_uint numImageFormats = 0; retVal = clGetSupportedImageFormats( pContext, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, nullptr, &numImageFormats); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(numImageFormats, 0u); } TEST_F(clGetSupportedImageFormatsTests, givenInvalidContextWhenGettingSupportImageFormatsThenClInvalidContextErrorIsReturned) { auto device = pContext->getDevice(0u); auto dummyContext = reinterpret_cast(device); cl_uint numImageFormats = 0; retVal = clGetSupportedImageFormats( dummyContext, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, nullptr, &numImageFormats); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST(clGetSupportedImageFormatsTest, givenPlatforNotSupportingImageWhenGettingSupportImageFormatsThenCLSuccessReturned) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsImages = false; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); cl_device_id clDevice = device.get(); cl_int retVal; auto context = ReleaseableObjectPtr(Context::create(nullptr, ClDeviceVector(&clDevice, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); cl_uint numImageFormats = 0; retVal = clGetSupportedImageFormats( context.get(), CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, nullptr, &numImageFormats); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, numImageFormats); } TEST(clGetSupportedImageFormatsTest, givenPlatforNotSupportingImageAndNullPointerToNumFormatsWhenGettingSupportImageFormatsThenCLSuccessReturned) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsImages = false; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); cl_device_id clDevice = device.get(); cl_int retVal; auto context = ReleaseableObjectPtr(Context::create(nullptr, ClDeviceVector(&clDevice, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetSupportedImageFormats( context.get(), CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clGetSupportedImageFormatsTest, givenPlatformWithoutDevicesWhenClGetSupportedImageFormatIsCalledThenDeviceIsTakenFromContext) { auto executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); executionEnvironment->prepareRootDeviceEnvironments(1); auto device = std::make_unique(*Device::create(executionEnvironment, 0u), platform()); const DeviceInfo &devInfo = device->getSharedDeviceInfo(); if (!devInfo.imageSupport) { GTEST_SKIP(); } cl_device_id clDevice = device.get(); cl_int retVal; auto context = ReleaseableObjectPtr(Context::create(nullptr, ClDeviceVector(&clDevice, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, platform()->getNumDevices()); cl_uint numImageFormats = 0; retVal = clGetSupportedImageFormats( context.get(), CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, nullptr, &numImageFormats); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(numImageFormats, 0u); } compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_icd_get_platform_ids_khr_tests.inl000066400000000000000000000047761363734646600320320ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #if defined(_WIN32) #include "shared/source/os_interface/windows/windows_wrapper.h" #endif #include "shared/source/device/device.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/api/cl_api_tests.h" #include using namespace NEO; typedef api_tests clIcdGetPlatformIDsKHRTests; namespace ULT { TEST_F(clIcdGetPlatformIDsKHRTests, checkDispatchLocation) { cl_platform_id platform = pPlatform; EXPECT_EQ((void *)platform, (void *)(&platform->dispatch)); } TEST_F(clIcdGetPlatformIDsKHRTests, getCount) { cl_int retVal = CL_SUCCESS; cl_uint numPlatforms = 0; retVal = clIcdGetPlatformIDsKHR(0, nullptr, &numPlatforms); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(numPlatforms, (cl_uint)0); } TEST_F(clIcdGetPlatformIDsKHRTests, checkExtensionFunctionAvailability) { void *funPtr = clGetExtensionFunctionAddress("clIcdGetPlatformIDsKHR"); decltype(&clIcdGetPlatformIDsKHR) expected = clIcdGetPlatformIDsKHR; EXPECT_NE(nullptr, funPtr); EXPECT_EQ(expected, reinterpret_cast(funPtr)); } TEST_F(clIcdGetPlatformIDsKHRTests, checkDeviceId) { cl_uint numPlatforms = 0; cl_uint numPlatformsIcd = 0; retVal = clGetPlatformIDs(0, nullptr, &numPlatforms); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clIcdGetPlatformIDsKHR(0, nullptr, &numPlatformsIcd); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(numPlatforms, numPlatformsIcd); std::unique_ptr platforms(reinterpret_cast(malloc(sizeof(cl_platform_id) * numPlatforms)), free); ASSERT_NE(nullptr, platforms); std::unique_ptr platformsIcd(reinterpret_cast(malloc(sizeof(cl_platform_id) * numPlatforms)), free); ASSERT_NE(nullptr, platforms); retVal = clGetPlatformIDs(numPlatforms, platforms.get(), nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clIcdGetPlatformIDsKHR(numPlatformsIcd, platformsIcd.get(), nullptr); ASSERT_EQ(CL_SUCCESS, retVal); for (cl_uint i = 0; i < std::min(numPlatforms, numPlatformsIcd); i++) { EXPECT_EQ(platforms.get()[i], platformsIcd.get()[i]); } } TEST_F(clIcdGetPlatformIDsKHRTests, checkExtensionString) { const ClDeviceInfo &caps = pPlatform->getClDevice(0)->getDeviceInfo(); EXPECT_NE(std::string::npos, std::string(caps.deviceExtensions).find("cl_khr_icd")); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_intel_accelerator_tests.inl000066400000000000000000000232261363734646600304730ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/accelerators/intel_accelerator.h" #include "opencl/source/accelerators/intel_motion_estimation.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; namespace ULT { struct IntelAcceleratorTest : public api_tests { public: IntelAcceleratorTest() {} void SetUp() override { api_tests::SetUp(); } void TearDown() override { api_tests::TearDown(); } protected: cl_accelerator_intel accelerator = nullptr; cl_motion_estimation_desc_intel desc; cl_int retVal = 0xEEEEEEEEu; cl_int result = -1; }; struct IntelAcceleratorTestWithValidDescriptor : IntelAcceleratorTest { IntelAcceleratorTestWithValidDescriptor() {} void SetUp() override { IntelAcceleratorTest::SetUp(); desc.mb_block_type = CL_ME_MB_TYPE_16x16_INTEL; desc.subpixel_mode = CL_ME_SUBPIXEL_MODE_QPEL_INTEL; desc.sad_adjust_mode = CL_ME_SAD_ADJUST_MODE_HAAR_INTEL; desc.search_path_type = CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL; } void TearDown() override { IntelAcceleratorTest::TearDown(); } }; TEST_F(IntelAcceleratorTestWithValidDescriptor, GivenInvalidAcceleratorTypeWhenCreatingAcceleratorThenClInvalidAcceleratorTypeIntelErrorIsReturned) { auto INVALID_ACCELERATOR_TYPE = static_cast(0xEEEEEEEE); accelerator = clCreateAcceleratorINTEL( pContext, INVALID_ACCELERATOR_TYPE, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); EXPECT_EQ(static_cast(nullptr), accelerator); EXPECT_EQ(CL_INVALID_ACCELERATOR_TYPE_INTEL, retVal); } TEST_F(IntelAcceleratorTestWithValidDescriptor, GivenInvalidContextWhenCreatingAcceleratorThenClInvalidContextErrorIsReturned) { accelerator = clCreateAcceleratorINTEL( nullptr, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); EXPECT_EQ(static_cast(nullptr), accelerator); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(IntelAcceleratorTest, GivenNullAcceleratorWhenReleasingAcceleratorThenClInvalidAcceleratorIntelErrorIsReturned) { result = clReleaseAcceleratorINTEL(nullptr); EXPECT_EQ(CL_INVALID_ACCELERATOR_INTEL, result); } TEST_F(IntelAcceleratorTest, GivenNullAcceleratorWhenRetainingAcceleratorThenClInvalidAcceleratorIntelErrorIsReturned) { result = clRetainAcceleratorINTEL(nullptr); EXPECT_EQ(CL_INVALID_ACCELERATOR_INTEL, result); } struct IntelAcceleratorGetInfoTest : IntelAcceleratorTestWithValidDescriptor { IntelAcceleratorGetInfoTest() {} void SetUp() override { IntelAcceleratorTestWithValidDescriptor::SetUp(); accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); ASSERT_NE(nullptr, accelerator); ASSERT_EQ(CL_SUCCESS, retVal); } void TearDown() override { result = clReleaseAcceleratorINTEL(accelerator); ASSERT_EQ(CL_SUCCESS, result); IntelAcceleratorTestWithValidDescriptor::TearDown(); } protected: size_t param_value_size_ret = 0; }; TEST_F(IntelAcceleratorTest, GivenNullAcceleratorWhenGettingAcceleratorInfoThenClInvalidAcceleratorIntelErrorIsReturned) { result = clGetAcceleratorInfoINTEL( nullptr, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_ACCELERATOR_INTEL, result); } TEST_F(IntelAcceleratorTest, GivenNullAcceleratorWhenGettingAcceleratorInfoThenParamValueAndSizeArePreserved) { cl_uint paramValue = 0xEEEEEEE1u; size_t paramSize = 0xEEEEEEE3u; result = clGetAcceleratorInfoINTEL( nullptr, 0, sizeof(paramValue), ¶mValue, ¶mSize); EXPECT_EQ(CL_INVALID_ACCELERATOR_INTEL, result); // No changes to inputs EXPECT_EQ(static_cast(0xEEEEEEE1u), paramValue); EXPECT_EQ(0xEEEEEEE3u, paramSize); } TEST_F(IntelAcceleratorGetInfoTest, GivenInvalidParamNameWhenGettingAcceleratorInfoThenClInvalidValueErrorIsReturned) { result = clGetAcceleratorInfoINTEL( accelerator, 0xEEEEEEEE, sizeof(cl_uint), nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, result); } TEST_F(IntelAcceleratorGetInfoTest, GivenClAcceleratorReferenceCountIntelWhenGettingAcceleratorInfoThenParamValueSizeRetHasCorrectSize) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_REFERENCE_COUNT_INTEL, sizeof(cl_uint), nullptr, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_uint), param_value_size_ret); } TEST_F(IntelAcceleratorGetInfoTest, GivenClAcceleratorReferenceCountIntelWhenGettingAcceleratorInfoThenParamValueIsOne) { cl_uint param_value = static_cast(-1); result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_REFERENCE_COUNT_INTEL, sizeof(cl_uint), ¶m_value, nullptr); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(1u, param_value); } TEST_F(IntelAcceleratorGetInfoTest, GivenLongForDescriptorSizeWhenGettingAcceleratorInfoThenCorrectValuesAreReturned) { cl_uint param_value = static_cast(-1); result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_REFERENCE_COUNT_INTEL, sizeof(cl_uint) + 1, ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_uint), param_value_size_ret); EXPECT_EQ(1u, param_value); } TEST_F(IntelAcceleratorGetInfoTest, GivenShortForDescriptorSizeWhenGettingAcceleratorInfoThenClInvalidValueErrorIsReturned) { cl_uint param_value = static_cast(-1); result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_REFERENCE_COUNT_INTEL, sizeof(cl_uint) - 1, ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_INVALID_VALUE, result); } TEST_F(IntelAcceleratorGetInfoTest, GivenZeroForDescriptorSizeGivenLongForDescriptorSizeWhenGettingAcceleratorInfoThenCorrectValuesAreReturned) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_REFERENCE_COUNT_INTEL, 0, nullptr, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_uint), param_value_size_ret); } TEST_F(IntelAcceleratorGetInfoTest, GivenCallToRetainAcceleratorWhenGettingAcceleratorInfoThenParamValueIsTwo) { cl_uint param_value = static_cast(-1); result = clRetainAcceleratorINTEL(accelerator); ASSERT_EQ(CL_SUCCESS, result); result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_REFERENCE_COUNT_INTEL, sizeof(cl_uint), ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(2u, param_value); result = clReleaseAcceleratorINTEL(accelerator); EXPECT_EQ(CL_SUCCESS, result); result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_REFERENCE_COUNT_INTEL, sizeof(cl_uint), ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(1u, param_value); } TEST_F(IntelAcceleratorGetInfoTest, GivenNullPtrForParamValueWhenGettingAcceleratorInfoThenClSuccessIsReturned) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_CONTEXT_INTEL, sizeof(cl_context), nullptr, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_context), param_value_size_ret); } TEST_F(IntelAcceleratorGetInfoTest, GivenLongForDescriptorSizeWhenGettingAcceleratorContextInfoThenCorrectValuesAreReturned) { cl_context param_value = reinterpret_cast(-1); result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_CONTEXT_INTEL, sizeof(cl_context) + 1, ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_context), param_value_size_ret); } TEST_F(IntelAcceleratorGetInfoTest, GivenAcceleratorContextIntelWhenGettingAcceleratorInfoThenCorrectValuesAreReturned) { cl_context param_value = reinterpret_cast(-1); result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_CONTEXT_INTEL, sizeof(cl_context), ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_context), param_value_size_ret); cl_context referenceContext = static_cast(pContext); EXPECT_EQ(referenceContext, param_value); } TEST_F(IntelAcceleratorGetInfoTest, GivenShortForDescriptorSizeWhenGettingAcceleratorContextInfoThenClInvalidValueErrorIsReturned) { cl_context param_value = reinterpret_cast(-1); result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_CONTEXT_INTEL, sizeof(cl_context) - 1, ¶m_value, ¶m_value_size_ret); EXPECT_EQ(CL_INVALID_VALUE, result); EXPECT_EQ(sizeof(cl_context), param_value_size_ret); } TEST_F(IntelAcceleratorGetInfoTest, GivenZeroForDescriptorSizeGivenLongForDescriptorSizeWhenGettingAcceleratorContextInfoThenCorrectValuesAreReturned) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_CONTEXT_INTEL, 0, nullptr, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_context), param_value_size_ret); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_intel_motion_estimation.cpp000066400000000000000000000421211363734646600305210ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/accelerators/intel_accelerator.h" #include "opencl/source/accelerators/intel_motion_estimation.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; namespace ULT { struct IntelMotionEstimationTest : public api_tests { public: IntelMotionEstimationTest() {} void SetUp() override { api_tests::SetUp(); desc.mb_block_type = CL_ME_MB_TYPE_16x16_INTEL; desc.subpixel_mode = CL_ME_SUBPIXEL_MODE_QPEL_INTEL; desc.sad_adjust_mode = CL_ME_SAD_ADJUST_MODE_HAAR_INTEL; desc.search_path_type = CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL; } void TearDown() override { api_tests::TearDown(); } protected: cl_accelerator_intel accelerator = nullptr; cl_motion_estimation_desc_intel desc; cl_int retVal = 0xEEEEEEEEu; cl_int result = -1; }; typedef IntelMotionEstimationTest IntelMotionEstimationNegativeTest; TEST_F(IntelMotionEstimationNegativeTest, GivenNullDescriptorWhenCreatinAcceleratorThenClInvalidAcceleratorDescriptorIntelErrorIsReturned) { accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), nullptr, &retVal); EXPECT_EQ(CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL, retVal); ASSERT_EQ(static_cast(nullptr), accelerator); } TEST_F(IntelMotionEstimationNegativeTest, GivenDescriptorSizeLongerThanActualWhenCreatinAcceleratorThenClInvalidAcceleratorDescriptorIntelErrorIsReturned) { accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel) + 1, &desc, &retVal); EXPECT_EQ(CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL, retVal); ASSERT_EQ(static_cast(nullptr), accelerator); } TEST_F(IntelMotionEstimationNegativeTest, GivenDescriptorSizeShorterThanActualWhenCreatinAcceleratorThenClInvalidAcceleratorDescriptorIntelErrorIsReturned) { accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel) - 1, &desc, &retVal); EXPECT_EQ(CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL, retVal); ASSERT_EQ(static_cast(nullptr), accelerator); } TEST_F(IntelMotionEstimationNegativeTest, GivenInvalidMacroBlockTypeWhenCreatinAcceleratorThenClInvalidAcceleratorDescriptorIntelErrorIsReturned) { desc.mb_block_type = 0xEEEEEEEE; accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), nullptr, &retVal); EXPECT_EQ(CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL, retVal); ASSERT_EQ(static_cast(nullptr), accelerator); } TEST_F(IntelMotionEstimationNegativeTest, GivenInvalidSubPixelModeWhenCreatinAcceleratorThenClInvalidAcceleratorDescriptorIntelErrorIsReturned) { desc.subpixel_mode = 0xEEEEEEEE; accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), nullptr, &retVal); EXPECT_EQ(CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL, retVal); ASSERT_EQ(static_cast(nullptr), accelerator); } TEST_F(IntelMotionEstimationNegativeTest, GivenInvalidSadAdjustModeWhenCreatinAcceleratorThenClInvalidAcceleratorDescriptorIntelErrorIsReturned) { desc.sad_adjust_mode = 0xEEEEEEEE; accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), nullptr, &retVal); EXPECT_EQ(CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL, retVal); ASSERT_EQ(static_cast(nullptr), accelerator); } TEST_F(IntelMotionEstimationNegativeTest, GivenInvalidSearchPathTypeWhenCreatinAcceleratorThenClInvalidAcceleratorDescriptorIntelErrorIsReturned) { desc.search_path_type = 0xEEEEEEEE; accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), nullptr, &retVal); EXPECT_EQ(CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL, retVal); ASSERT_EQ(static_cast(nullptr), accelerator); } TEST_F(IntelMotionEstimationTest, GivenValidArgumentsWhenCreatinAcceleratorThenAcceleratorIsCreated) { accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); ASSERT_NE(nullptr, accelerator); EXPECT_EQ(CL_SUCCESS, retVal); auto acc = static_cast(accelerator); delete acc; } TEST_F(IntelMotionEstimationTest, GivenNullReturnWhenCreatinAcceleratorThenAcceleratorIsCreated) { accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, nullptr); ASSERT_NE(nullptr, accelerator); auto acc = static_cast(accelerator); delete acc; } TEST_F(IntelMotionEstimationTest, GivenValidAcceleratorWhenReleasingAcceleratorThenSuccessIsReturned) { accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); ASSERT_NE(nullptr, accelerator); ASSERT_EQ(CL_SUCCESS, retVal); result = clReleaseAcceleratorINTEL(accelerator); EXPECT_EQ(CL_SUCCESS, result); } TEST_F(IntelMotionEstimationTest, GivenValidAcceleratorWhenRetainingAndReleasingAcceleratorThanReferenceCountIsAdjustedCorrectly) { accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); ASSERT_NE(nullptr, accelerator); ASSERT_EQ(CL_SUCCESS, retVal); auto pAccelerator = static_cast(accelerator); ASSERT_EQ(1, pAccelerator->getReference()); result = clRetainAcceleratorINTEL(accelerator); ASSERT_EQ(CL_SUCCESS, result); ASSERT_EQ(2, pAccelerator->getReference()); result = clReleaseAcceleratorINTEL(accelerator); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(1, pAccelerator->getReference()); result = clReleaseAcceleratorINTEL(accelerator); EXPECT_EQ(CL_SUCCESS, result); } struct IntelMotionEstimationGetInfoTest : public IntelMotionEstimationTest { public: IntelMotionEstimationGetInfoTest() : type_returned(static_cast(-1)), param_value_size_ret(static_cast(-1)) {} void SetUp() override { IntelMotionEstimationTest::SetUp(); descReturn.mb_block_type = static_cast(-1); descReturn.subpixel_mode = static_cast(-1); descReturn.sad_adjust_mode = static_cast(-1); descReturn.search_path_type = static_cast(-1); accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); ASSERT_NE(nullptr, accelerator); ASSERT_EQ(CL_SUCCESS, retVal); } void TearDown() override { result = clReleaseAcceleratorINTEL(accelerator); EXPECT_EQ(CL_SUCCESS, result); IntelMotionEstimationTest::TearDown(); } protected: cl_motion_estimation_desc_intel descReturn; cl_accelerator_type_intel type_returned; size_t param_value_size_ret; }; TEST_F(IntelMotionEstimationGetInfoTest, GivenValidParamsWhenGettingAcceleratorInfoThanDescriptorContainsCorrectInformation) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_DESCRIPTOR_INTEL, sizeof(cl_motion_estimation_desc_intel), // exact &descReturn, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_motion_estimation_desc_intel), param_value_size_ret); EXPECT_EQ(static_cast(CL_ME_MB_TYPE_16x16_INTEL), descReturn.mb_block_type); EXPECT_EQ(static_cast(CL_ME_SUBPIXEL_MODE_QPEL_INTEL), descReturn.subpixel_mode); EXPECT_EQ(static_cast(CL_ME_SAD_ADJUST_MODE_HAAR_INTEL), descReturn.sad_adjust_mode); EXPECT_EQ(static_cast(CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL), descReturn.search_path_type); } TEST_F(IntelMotionEstimationGetInfoTest, GivenTooShortDescriptorLengthWhenGettingAcceleratorInfoThanClInvalidValueErrorIsReturned) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_DESCRIPTOR_INTEL, sizeof(cl_motion_estimation_desc_intel) - 1, // short &descReturn, ¶m_value_size_ret); EXPECT_EQ(CL_INVALID_VALUE, result); EXPECT_EQ(sizeof(cl_motion_estimation_desc_intel), param_value_size_ret); } TEST_F(IntelMotionEstimationGetInfoTest, GivenDescriptorLengthZeroWhenGettingAcceleratorInfoThanClInvalidValueErrorIsReturned) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_DESCRIPTOR_INTEL, 0, &descReturn, ¶m_value_size_ret); EXPECT_EQ(CL_INVALID_VALUE, result); EXPECT_EQ(sizeof(cl_motion_estimation_desc_intel), param_value_size_ret); } TEST_F(IntelMotionEstimationGetInfoTest, GivenLongerDescriptorLengthWhenGettingAcceleratorInfoThanCorrectDescriptorLengthIsReturned) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_DESCRIPTOR_INTEL, sizeof(cl_motion_estimation_desc_intel) + 1, // long &descReturn, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_motion_estimation_desc_intel), param_value_size_ret); } TEST_F(IntelMotionEstimationGetInfoTest, GivenDescriptorLengthZeroAndDescriptorNullWhenGettingAcceleratorInfoThanCorrectDescriptorLengthIsReturned) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_DESCRIPTOR_INTEL, 0, // query required size w/nullptr return nullptr, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_motion_estimation_desc_intel), param_value_size_ret); } TEST_F(IntelMotionEstimationGetInfoTest, getInfoTypeExpectPass) { ASSERT_EQ(sizeof(cl_accelerator_type_intel), sizeof(cl_uint)); result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_TYPE_INTEL, sizeof(cl_uint), &type_returned, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_accelerator_type_intel), param_value_size_ret); EXPECT_EQ(static_cast(CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL), type_returned); } TEST_F(IntelMotionEstimationGetInfoTest, GivenAcceleratorTypeIntelWhenGettingAcceleratorInfoThanClAcceleratorTypeMotionEstimationIntelIsReturned) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_TYPE_INTEL, sizeof(cl_uint), // exact &type_returned, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_accelerator_type_intel), param_value_size_ret); EXPECT_EQ(static_cast(CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL), type_returned); } TEST_F(IntelMotionEstimationGetInfoTest, GivenAcceleratorTypeIntelAndTooShortTypeLengthWhenGettingAcceleratorInfoThanClInvalidValueIsReturned) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_TYPE_INTEL, sizeof(cl_uint) - 1, // short &type_returned, ¶m_value_size_ret); EXPECT_EQ(CL_INVALID_VALUE, result); EXPECT_EQ(sizeof(cl_accelerator_type_intel), param_value_size_ret); } TEST_F(IntelMotionEstimationGetInfoTest, GivenAcceleratorTypeIntelAndTypeLengthZeroWhenGettingAcceleratorInfoThanClInvalidValueIsReturned) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_TYPE_INTEL, 0, // very short &type_returned, ¶m_value_size_ret); EXPECT_EQ(CL_INVALID_VALUE, result); EXPECT_EQ(sizeof(cl_accelerator_type_intel), param_value_size_ret); } TEST_F(IntelMotionEstimationGetInfoTest, GivenAcceleratorTypeIntelAndTooLongTypeLengthWhenGettingAcceleratorInfoThanCorrectLengthIsReturned) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_TYPE_INTEL, sizeof(cl_uint) + 1, // long &type_returned, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_accelerator_type_intel), param_value_size_ret); } TEST_F(IntelMotionEstimationGetInfoTest, GivenAcceleratorTypeIntelAndNullTypeWhenGettingAcceleratorInfoThanCorrectLengthIsReturned) { result = clGetAcceleratorInfoINTEL( accelerator, CL_ACCELERATOR_TYPE_INTEL, 0, nullptr, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_accelerator_type_intel), param_value_size_ret); } TEST_F(IntelMotionEstimationTest, GivenDescriptor8x8IntegerNone2x2WhenCreatingAcceleratorThenSuccessIsReturned) { desc.mb_block_type = CL_ME_MB_TYPE_8x8_INTEL; desc.subpixel_mode = CL_ME_SUBPIXEL_MODE_INTEGER_INTEL; desc.sad_adjust_mode = CL_ME_SAD_ADJUST_MODE_NONE_INTEL; desc.search_path_type = CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL; accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); result = clReleaseAcceleratorINTEL(accelerator); EXPECT_EQ(CL_SUCCESS, result); } TEST_F(IntelMotionEstimationTest, GivenDescriptor4x4HpelHaar16x12WhenCreatingAcceleratorThenSuccessIsReturned) { desc.mb_block_type = CL_ME_MB_TYPE_4x4_INTEL; desc.subpixel_mode = CL_ME_SUBPIXEL_MODE_HPEL_INTEL; desc.sad_adjust_mode = CL_ME_SAD_ADJUST_MODE_HAAR_INTEL; desc.search_path_type = CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL; accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); result = clReleaseAcceleratorINTEL(accelerator); EXPECT_EQ(CL_SUCCESS, result); } TEST_F(IntelMotionEstimationTest, GivenDescriptor16x16HpelHaar4x4WhenCreatingAcceleratorThenSuccessIsReturned) { desc.mb_block_type = CL_ME_MB_TYPE_16x16_INTEL; desc.subpixel_mode = CL_ME_SUBPIXEL_MODE_QPEL_INTEL; desc.sad_adjust_mode = CL_ME_SAD_ADJUST_MODE_HAAR_INTEL; desc.search_path_type = CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL; accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); result = clReleaseAcceleratorINTEL(accelerator); EXPECT_EQ(CL_SUCCESS, result); } TEST_F(IntelMotionEstimationNegativeTest, GivenInvalidBlockTypeWhenCreatingAcceleratorThenClInvalidAcceleratorDescriptorIntelErrorIsReturned) { desc.mb_block_type = static_cast(-1); accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); EXPECT_EQ(CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL, retVal); } TEST_F(IntelMotionEstimationNegativeTest, GivenInvalidSubpixelModeWhenCreatingAcceleratorThenClInvalidAcceleratorDescriptorIntelErrorIsReturned) { desc.subpixel_mode = static_cast(-1); accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); EXPECT_EQ(CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL, retVal); } TEST_F(IntelMotionEstimationNegativeTest, GivenInvalidAdjustModeWhenCreatingAcceleratorThenClInvalidAcceleratorDescriptorIntelErrorIsReturned) { desc.sad_adjust_mode = static_cast(-1); accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); EXPECT_EQ(CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL, retVal); } TEST_F(IntelMotionEstimationNegativeTest, GivenInvalidPathTypeWhenCreatingAcceleratorThenClInvalidAcceleratorDescriptorIntelErrorIsReturned) { desc.search_path_type = static_cast(-1); accelerator = clCreateAcceleratorINTEL( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(cl_motion_estimation_desc_intel), &desc, &retVal); EXPECT_EQ(CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_intel_tracing_tests.inl000066400000000000000000000646531363734646600276470ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/tracing/tracing_api.h" #include "opencl/source/tracing/tracing_notify.h" #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; namespace ULT { struct IntelTracingTest : public api_tests { public: IntelTracingTest() {} void SetUp() override { api_tests::SetUp(); } void TearDown() override { api_tests::TearDown(); } protected: static void callback(cl_function_id fid, cl_callback_data *callbackData, void *userData) { ASSERT_NE(nullptr, userData); IntelTracingTest *base = (IntelTracingTest *)userData; base->vcallback(fid, callbackData, nullptr); } virtual void vcallback(cl_function_id fid, cl_callback_data *callbackData, void *userData) {} protected: cl_tracing_handle handle = nullptr; cl_int status = CL_SUCCESS; }; TEST_F(IntelTracingTest, GivenInvalidDeviceExpectFail) { status = clCreateTracingHandleINTEL(nullptr, callback, nullptr, &handle); EXPECT_EQ(static_cast(nullptr), handle); EXPECT_EQ(CL_INVALID_VALUE, status); } TEST_F(IntelTracingTest, GivenInvalidCallbackExpectFail) { status = clCreateTracingHandleINTEL(devices[testedRootDeviceIndex], nullptr, nullptr, &handle); EXPECT_EQ(static_cast(nullptr), handle); EXPECT_EQ(CL_INVALID_VALUE, status); } TEST_F(IntelTracingTest, GivenInvalidHandlePointerExpectFail) { status = clCreateTracingHandleINTEL(devices[testedRootDeviceIndex], callback, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, status); } TEST_F(IntelTracingTest, GivenInvalidHandleExpectFail) { status = clSetTracingPointINTEL(nullptr, CL_FUNCTION_clBuildProgram, CL_TRUE); EXPECT_EQ(CL_INVALID_VALUE, status); status = clDestroyTracingHandleINTEL(nullptr); EXPECT_EQ(CL_INVALID_VALUE, status); status = clEnableTracingINTEL(nullptr); EXPECT_EQ(CL_INVALID_VALUE, status); status = clDisableTracingINTEL(nullptr); EXPECT_EQ(CL_INVALID_VALUE, status); status = clSetTracingPointINTEL(nullptr, CL_FUNCTION_clBuildProgram, CL_FALSE); EXPECT_EQ(CL_INVALID_VALUE, status); cl_bool enabled = CL_FALSE; status = clGetTracingStateINTEL(nullptr, &enabled); EXPECT_EQ(CL_INVALID_VALUE, status); } TEST_F(IntelTracingTest, GivenInactiveHandleExpectFail) { status = clCreateTracingHandleINTEL(devices[testedRootDeviceIndex], callback, nullptr, &handle); EXPECT_EQ(CL_SUCCESS, status); status = clDisableTracingINTEL(handle); EXPECT_EQ(CL_INVALID_VALUE, status); status = clDestroyTracingHandleINTEL(handle); EXPECT_EQ(CL_SUCCESS, status); } TEST_F(IntelTracingTest, GivenTooManyHandlesExpectFail) { cl_tracing_handle handle[HostSideTracing::TRACING_MAX_HANDLE_COUNT + 1] = {nullptr}; for (uint32_t i = 0; i < HostSideTracing::TRACING_MAX_HANDLE_COUNT + 1; ++i) { status = clCreateTracingHandleINTEL(devices[testedRootDeviceIndex], callback, nullptr, &(handle[i])); EXPECT_EQ(CL_SUCCESS, status); } for (uint32_t i = 0; i < HostSideTracing::TRACING_MAX_HANDLE_COUNT; ++i) { status = clEnableTracingINTEL(handle[i]); EXPECT_EQ(CL_SUCCESS, status); } status = clEnableTracingINTEL(handle[HostSideTracing::TRACING_MAX_HANDLE_COUNT]); EXPECT_EQ(CL_OUT_OF_RESOURCES, status); for (uint32_t i = 0; i < HostSideTracing::TRACING_MAX_HANDLE_COUNT; ++i) { status = clDisableTracingINTEL(handle[i]); EXPECT_EQ(CL_SUCCESS, status); } for (uint32_t i = 0; i < HostSideTracing::TRACING_MAX_HANDLE_COUNT + 1; ++i) { status = clDestroyTracingHandleINTEL(handle[i]); EXPECT_EQ(CL_SUCCESS, status); } } TEST_F(IntelTracingTest, EnableTracingExpectPass) { status = clCreateTracingHandleINTEL(devices[testedRootDeviceIndex], callback, this, &handle); EXPECT_EQ(CL_SUCCESS, status); status = clSetTracingPointINTEL(handle, CL_FUNCTION_clBuildProgram, CL_TRUE); EXPECT_EQ(CL_SUCCESS, status); cl_bool enabled = CL_FALSE; status = clGetTracingStateINTEL(handle, &enabled); EXPECT_EQ(CL_SUCCESS, status); EXPECT_EQ(static_cast(CL_FALSE), enabled); status = clEnableTracingINTEL(handle); EXPECT_EQ(CL_SUCCESS, status); status = clGetTracingStateINTEL(handle, &enabled); EXPECT_EQ(CL_SUCCESS, status); EXPECT_EQ(static_cast(CL_TRUE), enabled); status = clDisableTracingINTEL(handle); EXPECT_EQ(CL_SUCCESS, status); status = clGetTracingStateINTEL(handle, &enabled); EXPECT_EQ(CL_SUCCESS, status); EXPECT_EQ(static_cast(CL_FALSE), enabled); status = clSetTracingPointINTEL(handle, CL_FUNCTION_clBuildProgram, CL_FALSE); EXPECT_EQ(CL_SUCCESS, status); status = clDestroyTracingHandleINTEL(handle); EXPECT_EQ(CL_SUCCESS, status); } TEST_F(IntelTracingTest, EnableTwoHandlesExpectPass) { cl_tracing_handle handle1 = nullptr; status = clCreateTracingHandleINTEL(devices[testedRootDeviceIndex], callback, this, &handle1); EXPECT_EQ(CL_SUCCESS, status); cl_tracing_handle handle2 = nullptr; status = clCreateTracingHandleINTEL(devices[testedRootDeviceIndex], callback, this, &handle2); EXPECT_EQ(CL_SUCCESS, status); status = clSetTracingPointINTEL(handle1, CL_FUNCTION_clBuildProgram, CL_TRUE); EXPECT_EQ(CL_SUCCESS, status); status = clSetTracingPointINTEL(handle2, CL_FUNCTION_clBuildProgram, CL_TRUE); EXPECT_EQ(CL_SUCCESS, status); status = clEnableTracingINTEL(handle1); EXPECT_EQ(CL_SUCCESS, status); status = clEnableTracingINTEL(handle2); EXPECT_EQ(CL_SUCCESS, status); cl_bool enabled = CL_FALSE; status = clGetTracingStateINTEL(handle1, &enabled); EXPECT_EQ(CL_SUCCESS, status); EXPECT_EQ(static_cast(CL_TRUE), enabled); status = clGetTracingStateINTEL(handle2, &enabled); EXPECT_EQ(CL_SUCCESS, status); EXPECT_EQ(static_cast(CL_TRUE), enabled); status = clDisableTracingINTEL(handle1); EXPECT_EQ(CL_SUCCESS, status); status = clDisableTracingINTEL(handle2); EXPECT_EQ(CL_SUCCESS, status); status = clDestroyTracingHandleINTEL(handle1); EXPECT_EQ(CL_SUCCESS, status); status = clDestroyTracingHandleINTEL(handle2); EXPECT_EQ(CL_SUCCESS, status); } struct IntelAllTracingTest : public IntelTracingTest { public: IntelAllTracingTest() {} void SetUp() override { IntelTracingTest::SetUp(); status = clCreateTracingHandleINTEL(devices[testedRootDeviceIndex], callback, this, &handle); ASSERT_NE(nullptr, handle); ASSERT_EQ(CL_SUCCESS, status); for (uint32_t i = 0; i < CL_FUNCTION_COUNT; ++i) { status = clSetTracingPointINTEL(handle, static_cast(i), CL_TRUE); ASSERT_EQ(CL_SUCCESS, status); } status = clEnableTracingINTEL(handle); ASSERT_EQ(CL_SUCCESS, status); } void TearDown() override { status = clDisableTracingINTEL(handle); ASSERT_EQ(CL_SUCCESS, status); status = clDestroyTracingHandleINTEL(handle); ASSERT_EQ(CL_SUCCESS, status); IntelTracingTest::TearDown(); } protected: void vcallback(cl_function_id fid, cl_callback_data *callbackData, void *userData) override { if (fid == functionId) { if (callbackData->site == CL_CALLBACK_SITE_ENTER) { ++enterCount; } else if (callbackData->site == CL_CALLBACK_SITE_EXIT) { ++exitCount; } } } uint16_t callFunctions() { uint16_t count = 0; ++count; functionId = CL_FUNCTION_clBuildProgram; clBuildProgram(0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCloneKernel; clCloneKernel(0, 0); ++count; functionId = CL_FUNCTION_clCompileProgram; clCompileProgram(0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateBuffer; clCreateBuffer(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateCommandQueue; clCreateCommandQueue(0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateCommandQueueWithProperties; clCreateCommandQueueWithProperties(0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateContext; clCreateContext(0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateContextFromType; clCreateContextFromType(0, 0, 0, 0, 0); ++count; cl_image_desc imageDesc = {0}; functionId = CL_FUNCTION_clCreateImage; clCreateImage(0, 0, 0, &imageDesc, 0, 0); ++count; functionId = CL_FUNCTION_clCreateImage2D; clCreateImage2D(0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateImage3D; clCreateImage3D(0, 0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateKernel; clCreateKernel(0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateKernelsInProgram; clCreateKernelsInProgram(0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreatePipe; clCreatePipe(0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateProgramWithBinary; const size_t length = 32; unsigned char binary[length] = {0}; clCreateProgramWithBinary(0, 0, &(devices[testedRootDeviceIndex]), &length, reinterpret_cast(&binary), 0, 0); ++count; functionId = CL_FUNCTION_clCreateProgramWithBuiltInKernels; clCreateProgramWithBuiltInKernels(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateProgramWithIL; clCreateProgramWithIL(0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateProgramWithSource; clCreateProgramWithSource(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateSampler; clCreateSampler(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateSamplerWithProperties; clCreateSamplerWithProperties(0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateSubBuffer; clCreateSubBuffer(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateUserEvent; clCreateUserEvent(0, 0); ++count; functionId = CL_FUNCTION_clEnqueueBarrier; clEnqueueBarrier(0); ++count; functionId = CL_FUNCTION_clEnqueueBarrierWithWaitList; clEnqueueBarrierWithWaitList(0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueCopyBuffer; clEnqueueCopyBuffer(0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueCopyBufferRect; clEnqueueCopyBufferRect(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueCopyBufferToImage; clEnqueueCopyBufferToImage(0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueCopyImage; clEnqueueCopyImage(0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueCopyImageToBuffer; clEnqueueCopyImageToBuffer(0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueFillBuffer; clEnqueueFillBuffer(0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueFillImage; clEnqueueFillImage(0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueMapBuffer; clEnqueueMapBuffer(0, 0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueMapImage; clEnqueueMapImage(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueMarker; clEnqueueMarker(0, 0); ++count; functionId = CL_FUNCTION_clEnqueueMarkerWithWaitList; clEnqueueMarkerWithWaitList(0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueMigrateMemObjects; clEnqueueMigrateMemObjects(0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueNDRangeKernel; clEnqueueNDRangeKernel(0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueNativeKernel; clEnqueueNativeKernel(0, 0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueReadBuffer; clEnqueueReadBuffer(0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueReadBufferRect; clEnqueueReadBufferRect(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueReadImage; clEnqueueReadImage(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueSVMFree; clEnqueueSVMFree(0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueSVMMap; clEnqueueSVMMap(0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueSVMMemFill; clEnqueueSVMMemFill(0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueSVMMemcpy; clEnqueueSVMMemcpy(0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueSVMMigrateMem; clEnqueueSVMMigrateMem(0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueSVMUnmap; clEnqueueSVMUnmap(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueTask; clEnqueueTask(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueUnmapMemObject; clEnqueueUnmapMemObject(0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueWaitForEvents; clEnqueueWaitForEvents(0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueWriteBuffer; clEnqueueWriteBuffer(0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueWriteBufferRect; clEnqueueWriteBufferRect(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueWriteImage; clEnqueueWriteImage(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clFinish; clFinish(0); ++count; functionId = CL_FUNCTION_clFlush; clFlush(0); ++count; functionId = CL_FUNCTION_clGetCommandQueueInfo; clGetCommandQueueInfo(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetContextInfo; clGetContextInfo(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetDeviceAndHostTimer; clGetDeviceAndHostTimer(0, 0, 0); ++count; functionId = CL_FUNCTION_clGetDeviceIDs; clGetDeviceIDs(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetDeviceInfo; clGetDeviceInfo(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetEventInfo; clGetEventInfo(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetEventProfilingInfo; clGetEventProfilingInfo(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetExtensionFunctionAddress; clGetExtensionFunctionAddress("test"); ++count; functionId = CL_FUNCTION_clGetExtensionFunctionAddressForPlatform; clGetExtensionFunctionAddressForPlatform(0, "test"); ++count; functionId = CL_FUNCTION_clGetHostTimer; clGetHostTimer(0, 0); ++count; functionId = CL_FUNCTION_clGetImageInfo; clGetImageInfo(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetKernelArgInfo; clGetKernelArgInfo(0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetKernelInfo; clGetKernelInfo(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetKernelSubGroupInfo; clGetKernelSubGroupInfo(0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetKernelWorkGroupInfo; clGetKernelWorkGroupInfo(0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetMemObjectInfo; clGetMemObjectInfo(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetPipeInfo; clGetPipeInfo(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetPlatformIDs; clGetPlatformIDs(0, 0, 0); ++count; functionId = CL_FUNCTION_clGetPlatformInfo; clGetPlatformInfo(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetProgramBuildInfo; clGetProgramBuildInfo(0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetProgramInfo; clGetProgramInfo(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetSamplerInfo; clGetSamplerInfo(0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetSupportedImageFormats; clGetSupportedImageFormats(0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clLinkProgram; clLinkProgram(0, 0, 0, 0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clReleaseCommandQueue; clReleaseCommandQueue(0); ++count; functionId = CL_FUNCTION_clReleaseContext; clReleaseContext(0); ++count; functionId = CL_FUNCTION_clReleaseDevice; clReleaseDevice(0); ++count; functionId = CL_FUNCTION_clReleaseEvent; clReleaseEvent(0); ++count; functionId = CL_FUNCTION_clReleaseKernel; clReleaseKernel(0); ++count; functionId = CL_FUNCTION_clReleaseMemObject; clReleaseMemObject(0); ++count; functionId = CL_FUNCTION_clReleaseProgram; clReleaseProgram(0); ++count; functionId = CL_FUNCTION_clReleaseSampler; clReleaseSampler(0); ++count; functionId = CL_FUNCTION_clRetainCommandQueue; clRetainCommandQueue(0); ++count; functionId = CL_FUNCTION_clRetainContext; clRetainContext(0); ++count; functionId = CL_FUNCTION_clRetainDevice; clRetainDevice(0); ++count; functionId = CL_FUNCTION_clRetainEvent; clRetainEvent(0); ++count; functionId = CL_FUNCTION_clRetainKernel; clRetainKernel(0); ++count; functionId = CL_FUNCTION_clRetainMemObject; clRetainMemObject(0); ++count; functionId = CL_FUNCTION_clRetainProgram; clRetainProgram(0); ++count; functionId = CL_FUNCTION_clRetainSampler; clRetainSampler(0); ++count; functionId = CL_FUNCTION_clSVMAlloc; clSVMAlloc(0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clSVMFree; clSVMFree(0, 0); ++count; functionId = CL_FUNCTION_clSetCommandQueueProperty; clSetCommandQueueProperty(0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clSetDefaultDeviceCommandQueue; clSetDefaultDeviceCommandQueue(0, 0, 0); ++count; functionId = CL_FUNCTION_clSetEventCallback; clSetEventCallback(0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clSetKernelArg; clSetKernelArg(0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clSetKernelArgSVMPointer; clSetKernelArgSVMPointer(0, 0, 0); ++count; functionId = CL_FUNCTION_clSetKernelExecInfo; clSetKernelExecInfo(0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clSetMemObjectDestructorCallback; clSetMemObjectDestructorCallback(0, 0, 0); ++count; functionId = CL_FUNCTION_clSetUserEventStatus; clSetUserEventStatus(0, 0); ++count; functionId = CL_FUNCTION_clUnloadCompiler; clUnloadCompiler(); ++count; functionId = CL_FUNCTION_clUnloadPlatformCompiler; clUnloadPlatformCompiler(0); ++count; functionId = CL_FUNCTION_clWaitForEvents; clWaitForEvents(0, 0); return count; } protected: uint16_t enterCount = 0; uint16_t exitCount = 0; cl_function_id functionId = CL_FUNCTION_COUNT; }; TEST_F(IntelAllTracingTest, GivenAllFunctionsToTraceExpectPass) { uint16_t count = callFunctions(); EXPECT_EQ(count, enterCount); EXPECT_EQ(count, exitCount); } TEST_F(IntelAllTracingTest, GivenNoFunctionsToTraceExpectPass) { for (uint32_t i = 0; i < CL_FUNCTION_COUNT; ++i) { status = clSetTracingPointINTEL(handle, static_cast(i), CL_FALSE); EXPECT_EQ(CL_SUCCESS, status); } callFunctions(); EXPECT_EQ(0, enterCount); EXPECT_EQ(0, exitCount); } struct IntelClGetDeviceInfoTracingCollectTest : public IntelAllTracingTest { public: IntelClGetDeviceInfoTracingCollectTest() {} protected: void call(cl_device_id target) { device = target; status = clGetDeviceInfo(device, CL_DEVICE_VENDOR, 0, nullptr, ¶mValueSizeRet); } void vcallback(cl_function_id fid, cl_callback_data *callbackData, void *userData) override { EXPECT_EQ(CL_FUNCTION_clGetDeviceInfo, fid); EXPECT_NE(nullptr, callbackData); if (callbackData->site == CL_CALLBACK_SITE_ENTER) { correlationId = callbackData->correlationId; EXPECT_NE(nullptr, callbackData->correlationData); callbackData->correlationData[0] = 777ull; } else { EXPECT_EQ(correlationId, callbackData->correlationId); EXPECT_NE(nullptr, callbackData->correlationData); EXPECT_EQ(777ull, callbackData->correlationData[0]); } EXPECT_NE(nullptr, callbackData->functionName); EXPECT_STREQ("clGetDeviceInfo", callbackData->functionName); EXPECT_NE(nullptr, callbackData->functionParams); if (callbackData->site == CL_CALLBACK_SITE_ENTER) { EXPECT_EQ(nullptr, callbackData->functionReturnValue); } else { EXPECT_NE(nullptr, callbackData->functionReturnValue); } cl_params_clGetDeviceInfo *params = (cl_params_clGetDeviceInfo *)callbackData->functionParams; EXPECT_NE(nullptr, *params->device); EXPECT_EQ(static_cast(CL_DEVICE_VENDOR), *params->paramName); EXPECT_EQ(0u, *params->paramValueSize); EXPECT_EQ(nullptr, *params->paramValue); if (callbackData->site == CL_CALLBACK_SITE_EXIT) { cl_int *retVal = (cl_int *)callbackData->functionReturnValue; EXPECT_EQ(CL_SUCCESS, *retVal); EXPECT_LT(0u, **params->paramValueSizeRet); } if (callbackData->site == CL_CALLBACK_SITE_ENTER) { ++enterCount; } else if (callbackData->site == CL_CALLBACK_SITE_EXIT) { ++exitCount; } } protected: cl_device_id device = nullptr; size_t paramValueSizeRet = 0; uint64_t correlationId = 0; }; TEST_F(IntelClGetDeviceInfoTracingCollectTest, GeneralTracingCollectionExpectPass) { call(devices[testedRootDeviceIndex]); EXPECT_EQ(CL_SUCCESS, status); EXPECT_LT(0u, paramValueSizeRet); EXPECT_EQ(1u, enterCount); EXPECT_EQ(1u, exitCount); } struct IntelClGetDeviceInfoTracingChangeParamsTest : public IntelAllTracingTest { public: IntelClGetDeviceInfoTracingChangeParamsTest() {} protected: void call(cl_device_id target) { device = target; status = clGetDeviceInfo(device, CL_DEVICE_VENDOR, 0, nullptr, ¶mValueSizeRet); } void vcallback(cl_function_id fid, cl_callback_data *callbackData, void *userData) override { if (callbackData->site == CL_CALLBACK_SITE_ENTER) { cl_params_clGetDeviceInfo *params = (cl_params_clGetDeviceInfo *)callbackData->functionParams; *params->paramValueSize = paramValueSize; *params->paramValue = paramValue; } } protected: cl_device_id device = nullptr; size_t paramValueSizeRet = 0; static const size_t paramValueSize = 256; char paramValue[paramValueSize]; }; TEST_F(IntelClGetDeviceInfoTracingChangeParamsTest, GeneralTracingWithParamsChangeExpectPass) { paramValue[0] = '\0'; call(devices[testedRootDeviceIndex]); EXPECT_EQ(CL_SUCCESS, status); EXPECT_LT(0u, paramValueSizeRet); EXPECT_STRNE("", paramValue); } struct IntelClGetDeviceInfoTracingChangeRetValTest : public IntelAllTracingTest { public: IntelClGetDeviceInfoTracingChangeRetValTest() {} protected: void call(cl_device_id target) { device = target; status = clGetDeviceInfo(device, CL_DEVICE_VENDOR, 0, nullptr, ¶mValueSizeRet); } void vcallback(cl_function_id fid, cl_callback_data *callbackData, void *userData) override { if (callbackData->site == CL_CALLBACK_SITE_EXIT) { cl_int *retVal = reinterpret_cast(callbackData->functionReturnValue); *retVal = CL_INVALID_VALUE; } } protected: cl_device_id device = nullptr; size_t paramValueSizeRet = 0; }; TEST_F(IntelClGetDeviceInfoTracingChangeRetValTest, GeneralTracingWithRetValChangeExpectPass) { call(devices[testedRootDeviceIndex]); EXPECT_EQ(CL_INVALID_VALUE, status); EXPECT_LT(0u, paramValueSizeRet); } struct IntelClGetDeviceInfoTwoHandlesTracingCollectTest : public IntelClGetDeviceInfoTracingCollectTest { public: IntelClGetDeviceInfoTwoHandlesTracingCollectTest() {} void SetUp() override { IntelClGetDeviceInfoTracingCollectTest::SetUp(); status = clCreateTracingHandleINTEL(devices[testedRootDeviceIndex], callback, this, &secondHandle); ASSERT_NE(nullptr, secondHandle); ASSERT_EQ(CL_SUCCESS, status); status = clSetTracingPointINTEL(secondHandle, CL_FUNCTION_clGetDeviceInfo, CL_TRUE); ASSERT_EQ(CL_SUCCESS, status); status = clEnableTracingINTEL(secondHandle); ASSERT_EQ(CL_SUCCESS, status); } void TearDown() override { status = clDisableTracingINTEL(secondHandle); ASSERT_EQ(CL_SUCCESS, status); status = clDestroyTracingHandleINTEL(secondHandle); ASSERT_EQ(CL_SUCCESS, status); IntelClGetDeviceInfoTracingCollectTest::TearDown(); } protected: cl_tracing_handle secondHandle = nullptr; }; TEST_F(IntelClGetDeviceInfoTwoHandlesTracingCollectTest, GeneralTracingCollectionWithTwoHandlesExpectPass) { call(devices[testedRootDeviceIndex]); EXPECT_EQ(CL_SUCCESS, status); EXPECT_LT(0u, paramValueSizeRet); EXPECT_EQ(2u, enterCount); EXPECT_EQ(2u, exitCount); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_intel_tracing_tests_mt.cpp000066400000000000000000000067651363734646600303470ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/tracing/tracing_api.h" #include "opencl/source/tracing/tracing_notify.h" #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; namespace ULT { struct IntelTracingMtTest : public api_tests { public: IntelTracingMtTest() : started(false), count(0) {} void SetUp() override { api_tests::SetUp(); } void TearDown() override { api_tests::TearDown(); } protected: static void threadBody(int iterationCount, IntelTracingMtTest *test) { test->vthreadBody(iterationCount); } virtual void vthreadBody(int iterationCount) { cl_int status = CL_SUCCESS; cl_platform_id platform = nullptr; const uint32_t maxStrSize = 1024; char buffer[maxStrSize] = {0}; while (!started) { } for (int i = 0; i < iterationCount; ++i) { HostSideTracing::AtomicBackoff backoff; status = clGetDeviceInfo(devices[testedRootDeviceIndex], CL_DEVICE_NAME, maxStrSize, buffer, nullptr); EXPECT_EQ(CL_SUCCESS, status); backoff.pause(); status = clGetDeviceInfo(devices[testedRootDeviceIndex], CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &platform, nullptr); EXPECT_EQ(CL_SUCCESS, status); backoff.pause(); status = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, maxStrSize, buffer, nullptr); EXPECT_EQ(CL_SUCCESS, status); backoff.pause(); status = clGetPlatformInfo(platform, CL_PLATFORM_NAME, maxStrSize, buffer, nullptr); EXPECT_EQ(CL_SUCCESS, status); backoff.pause(); } } static void callback(cl_function_id fid, cl_callback_data *callbackData, void *userData) { ASSERT_NE(nullptr, userData); IntelTracingMtTest *base = (IntelTracingMtTest *)userData; base->vcallback(fid, callbackData, nullptr); } virtual void vcallback(cl_function_id fid, cl_callback_data *callbackData, void *userData) { if (fid == CL_FUNCTION_clGetDeviceInfo || fid == CL_FUNCTION_clGetPlatformInfo) { ++count; } } protected: cl_tracing_handle handle = nullptr; cl_int status = CL_SUCCESS; std::atomic started; std::atomic count; }; TEST_F(IntelTracingMtTest, SafeTracingFromMultipleThreads) { status = clCreateTracingHandleINTEL(devices[testedRootDeviceIndex], callback, this, &handle); EXPECT_EQ(CL_SUCCESS, status); status = clSetTracingPointINTEL(handle, CL_FUNCTION_clGetDeviceInfo, CL_TRUE); EXPECT_EQ(CL_SUCCESS, status); status = clSetTracingPointINTEL(handle, CL_FUNCTION_clGetPlatformInfo, CL_TRUE); EXPECT_EQ(CL_SUCCESS, status); status = clEnableTracingINTEL(handle); EXPECT_EQ(CL_SUCCESS, status); int numThreads = 4; int iterationCount = 1024; std::vector threads; for (int i = 0; i < numThreads; ++i) { threads.push_back(std::thread(threadBody, iterationCount, this)); } started = true; for (auto &thread : threads) { thread.join(); } status = clDisableTracingINTEL(handle); EXPECT_EQ(CL_SUCCESS, status); status = clDestroyTracingHandleINTEL(handle); EXPECT_EQ(CL_SUCCESS, status); int callsPerIteration = 4; int callbacksPerCall = 2; EXPECT_EQ(numThreads * iterationCount * callsPerIteration * callbacksPerCall, count); } } // namespace ULTcompute-runtime-20.13.16352/opencl/test/unit_test/api/cl_link_program_tests.inl000066400000000000000000000066071363734646600275040ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/helpers/file_io.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "cl_api_tests.h" #include "compiler_options.h" using namespace NEO; namespace ULT { typedef api_tests clLinkProgramTests; TEST_F(clLinkProgramTests, GivenValidParamsWhenLinkingProgramThenSuccessIsReturned) { cl_program pProgram = nullptr; size_t sourceSize = 0; std::string testFile; testFile.append(clFiles); testFile.append("copybuffer.cl"); auto pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( pContext, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clCompileProgram( pProgram, num_devices, devices, nullptr, 0, nullptr, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); cl_program program = pProgram; cl_program oprog; oprog = clLinkProgram( pContext, num_devices, devices, nullptr, 1, &program, nullptr, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(oprog); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clLinkProgramTests, GivenCreateLibraryOptionWhenLinkingProgramThenSuccessIsReturned) { cl_program pProgram = nullptr; size_t sourceSize = 0; std::string testFile; testFile.append(clFiles); testFile.append("copybuffer.cl"); auto pSource = loadDataFromFile( testFile.c_str(), sourceSize); ASSERT_NE(0u, sourceSize); ASSERT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( pContext, 1, sources, &sourceSize, &retVal); EXPECT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clCompileProgram( pProgram, num_devices, devices, nullptr, 0, nullptr, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); cl_program program = pProgram; cl_program oprog; oprog = clLinkProgram( pContext, num_devices, devices, CompilerOptions::createLibrary, 1, &program, nullptr, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(oprog); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clLinkProgramTests, GivenNullContextWhenLinkingProgramThenClInvalidContextErrorIsReturned) { cl_program program = {0}; cl_program oprog; oprog = clLinkProgram( nullptr, num_devices, devices, nullptr, 1, &program, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, oprog); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_mem_locally_uncached_resource_tests.cpp000066400000000000000000000466151363734646600330610ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/hw_cmds.h" #include "shared/source/helpers/state_base_address.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/api/api.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "test.h" using namespace NEO; namespace clMemLocallyUncachedResourceTests { template uint32_t argMocs(Kernel &kernel, size_t argIndex) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; auto surfaceStateHeapAddress = kernel.getSurfaceStateHeap(); auto surfaceStateHeapAddressOffset = kernel.getKernelInfo().kernelArgInfo[argIndex].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(surfaceStateHeapAddress, surfaceStateHeapAddressOffset)); return surfaceState->getMemoryObjectControlState(); } template uint32_t cmdQueueMocs(CommandQueue *pCmdQ) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; auto pCmdQHw = reinterpret_cast *>(pCmdQ); auto &csr = pCmdQHw->getGpgpuCommandStreamReceiver(); HardwareParse hwParse; hwParse.parseCommands(csr.getCS(0), 0); auto itorCmd = reverse_find(hwParse.cmdList.rbegin(), hwParse.cmdList.rend()); EXPECT_NE(hwParse.cmdList.rend(), itorCmd); auto sba = genCmdCast(*itorCmd); EXPECT_NE(nullptr, sba); return sba->getStatelessDataPortAccessMemoryObjectControlState(); } const size_t n = 512; const size_t globalWorkSize[3] = {n, 1, 1}; const size_t localWorkSize[3] = {256, 1, 1}; const cl_mem_properties_intel *propertiesCacheable = nullptr; const cl_mem_properties_intel propertiesUncacheable[] = {CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_RESOURCE, 0}; const cl_mem_properties_intel propertiesUncacheableInSurfaceState[] = {CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE, 0}; using clMemLocallyUncachedResourceFixture = Test>; HWTEST_F(clMemLocallyUncachedResourceFixture, GivenAtLeastOneLocallyUncacheableResourceWhenSettingKernelArgumentsThenKernelIsUncacheable) { cl_int retVal = CL_SUCCESS; MockKernelWithInternals mockKernel(*this->pClDevice, context, true); mockKernel.kernelInfo.usesSsh = true; mockKernel.kernelInfo.requiresSshForBuffers = true; auto kernel = mockKernel.mockKernel; auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr); auto pBufferCacheable1 = clUniquePtr(castToObject(bufferCacheable1)); auto bufferCacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr); auto pBufferCacheable2 = clUniquePtr(castToObject(bufferCacheable2)); auto bufferUncacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, n * sizeof(float), nullptr, nullptr); auto pBufferUncacheable1 = clUniquePtr(castToObject(bufferUncacheable1)); auto bufferUncacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, n * sizeof(float), nullptr, nullptr); auto pBufferUncacheable2 = clUniquePtr(castToObject(bufferUncacheable2)); auto mocsCacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto mocsUncacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferCacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferCacheable2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 1)); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); EXPECT_FALSE(kernel->hasUncacheableStatelessArgs()); retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferUncacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, cmdQueueMocs(pCmdQ)); EXPECT_TRUE(kernel->hasUncacheableStatelessArgs()); retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferUncacheable2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, cmdQueueMocs(pCmdQ)); EXPECT_TRUE(kernel->hasUncacheableStatelessArgs()); retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferCacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, cmdQueueMocs(pCmdQ)); EXPECT_TRUE(kernel->hasUncacheableStatelessArgs()); retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferCacheable2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 1)); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); EXPECT_FALSE(kernel->hasUncacheableStatelessArgs()); } HWTEST_F(clMemLocallyUncachedResourceFixture, givenBuffersThatAreUncachedInSurfaceStateWhenStatelessIsProgrammedItIsCached) { cl_int retVal = CL_SUCCESS; MockKernelWithInternals mockKernel(*this->pClDevice, context, true); auto kernel = mockKernel.mockKernel; mockKernel.kernelInfo.usesSsh = true; mockKernel.kernelInfo.requiresSshForBuffers = true; EXPECT_EQ(CL_SUCCESS, retVal); auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr); auto pBufferCacheable1 = clUniquePtr(castToObject(bufferCacheable1)); auto bufferCacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr); auto pBufferCacheable2 = clUniquePtr(castToObject(bufferCacheable2)); auto bufferUncacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheableInSurfaceState, n * sizeof(float), nullptr, nullptr); auto pBufferUncacheable1 = clUniquePtr(castToObject(bufferUncacheable1)); auto bufferUncacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheableInSurfaceState, n * sizeof(float), nullptr, nullptr); auto pBufferUncacheable2 = clUniquePtr(castToObject(bufferUncacheable2)); auto mocsCacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto mocsUncacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferCacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferCacheable2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 1)); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferUncacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); EXPECT_FALSE(kernel->hasUncacheableStatelessArgs()); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferUncacheable2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); EXPECT_FALSE(kernel->hasUncacheableStatelessArgs()); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferCacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferCacheable2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 1)); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); } HWTEST_F(clMemLocallyUncachedResourceFixture, givenBuffersThatAreUncachedButKernelDoesntHaveAnyStatelessAccessessThenSurfacesAreNotRecordedAsUncacheable) { cl_int retVal = CL_SUCCESS; MockKernelWithInternals mockKernel(*this->pClDevice, context, true); auto kernel = mockKernel.mockKernel; mockKernel.kernelInfo.usesSsh = true; mockKernel.kernelInfo.requiresSshForBuffers = true; mockKernel.kernelInfo.kernelArgInfo[0].pureStatefulBufferAccess = true; mockKernel.kernelInfo.kernelArgInfo[1].pureStatefulBufferAccess = true; EXPECT_EQ(CL_SUCCESS, retVal); auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr); auto pBufferCacheable1 = clUniquePtr(castToObject(bufferCacheable1)); auto bufferCacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr); auto pBufferCacheable2 = clUniquePtr(castToObject(bufferCacheable2)); auto bufferUncacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, n * sizeof(float), nullptr, nullptr); auto pBufferUncacheable1 = clUniquePtr(castToObject(bufferUncacheable1)); auto bufferUncacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, n * sizeof(float), nullptr, nullptr); auto pBufferUncacheable2 = clUniquePtr(castToObject(bufferUncacheable2)); auto mocsCacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto mocsUncacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferCacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferCacheable2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 1)); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferUncacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); EXPECT_FALSE(kernel->hasUncacheableStatelessArgs()); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferUncacheable2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); EXPECT_FALSE(kernel->hasUncacheableStatelessArgs()); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferCacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); EXPECT_FALSE(kernel->hasUncacheableStatelessArgs()); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferCacheable2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 1)); EXPECT_FALSE(kernel->hasUncacheableStatelessArgs()); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); EXPECT_FALSE(kernel->hasUncacheableStatelessArgs()); } HWTEST_F(clMemLocallyUncachedResourceFixture, WhenUnsettingUncacheableResourceFromKernelThanKernelContinuesToCorrectlySetMocs) { cl_int retVal = CL_SUCCESS; MockKernelWithInternals mockKernel(*this->pClDevice, context, true); auto kernel = mockKernel.mockKernel; mockKernel.kernelInfo.usesSsh = true; mockKernel.kernelInfo.requiresSshForBuffers = true; EXPECT_EQ(CL_SUCCESS, retVal); auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr); auto pBufferCacheable1 = clUniquePtr(castToObject(bufferCacheable1)); auto bufferCacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr); auto pBufferCacheable2 = clUniquePtr(castToObject(bufferCacheable2)); auto bufferUncacheable = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, n * sizeof(float), nullptr, nullptr); auto pBufferUncacheable = clUniquePtr(castToObject(bufferUncacheable)); auto mocsCacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto mocsUncacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferCacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferCacheable2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 1)); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferUncacheable); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, cmdQueueMocs(pCmdQ)); kernel->unsetArg(0); retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferCacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); kernel->unsetArg(0); retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferUncacheable); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, cmdQueueMocs(pCmdQ)); } HWTEST_F(clMemLocallyUncachedResourceFixture, givenBuffersThatAreUncachedInSurfaceStateAndAreNotUsedInStatelessFashionThenThoseResourcesAreNotRegistredAsResourcesForCacheFlush) { cl_int retVal = CL_SUCCESS; MockKernelWithInternals mockKernel(*this->pClDevice, context, true); auto kernel = mockKernel.mockKernel; mockKernel.kernelInfo.usesSsh = true; mockKernel.kernelInfo.requiresSshForBuffers = true; mockKernel.kernelInfo.kernelArgInfo[0].pureStatefulBufferAccess = true; mockKernel.kernelInfo.kernelArgInfo[1].pureStatefulBufferAccess = true; EXPECT_EQ(CL_SUCCESS, retVal); auto bufferCacheable = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr); auto bufferUncacheableInSurfaceState = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheableInSurfaceState, n * sizeof(float), nullptr, nullptr); auto bufferUncacheable = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, n * sizeof(float), nullptr, nullptr); retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferUncacheableInSurfaceState); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, kernel->kernelArgRequiresCacheFlush[0]); retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferCacheable); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, kernel->kernelArgRequiresCacheFlush[0]); retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferUncacheable); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, kernel->kernelArgRequiresCacheFlush[0]); clReleaseMemObject(bufferUncacheableInSurfaceState); clReleaseMemObject(bufferUncacheable); clReleaseMemObject(bufferCacheable); } } // namespace clMemLocallyUncachedResourceTests compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_release_command_queue_tests.inl000066400000000000000000000053751363734646600313430ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "cl_api_tests.h" #include using namespace NEO; namespace DeviceHostQueue { typedef ::testing::Types QueueTypes; template class clReleaseCommandQueueTypeTests : public DeviceHostQueueFixture {}; TYPED_TEST_CASE(clReleaseCommandQueueTypeTests, QueueTypes); TYPED_TEST(clReleaseCommandQueueTypeTests, GivenValidCmdQueueWhenReleasingCmdQueueThenSucessIsReturned) { if (std::is_same::value && !castToObject(this->devices[this->testedRootDeviceIndex])->getHardwareInfo().capabilityTable.supportsDeviceEnqueue) { return; } using BaseType = typename TypeParam::BaseType; auto queue = this->createClQueue(); ASSERT_EQ(CL_SUCCESS, this->retVal); auto qObject = castToObject(static_cast(queue)); ASSERT_NE(qObject, nullptr); this->retVal = clReleaseCommandQueue(queue); EXPECT_EQ(CL_SUCCESS, this->retVal); } TEST(clReleaseCommandQueueTypeTests, GivenNullCmdQueueWhenReleasingCmdQueueThenClInvalidCommandQueueErrorIsReturned) { auto retVal = clReleaseCommandQueue(nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } } // namespace DeviceHostQueue namespace ULT { typedef api_tests clReleaseCommandQueueTests; TEST_F(clReleaseCommandQueueTests, givenBlockedEnqueueWithOutputEventStoredAsVirtualEventWhenReleasingCmdQueueThenInternalRefCountIsDecrementedAndQueueDeleted) { cl_command_queue cmdQ = nullptr; cl_queue_properties properties = 0; ClDevice *device = (ClDevice *)devices[testedRootDeviceIndex]; MockKernelWithInternals kernelInternals(*device, pContext); Kernel *kernel = kernelInternals.mockKernel; cmdQ = clCreateCommandQueue(pContext, devices[testedRootDeviceIndex], properties, &retVal); ASSERT_NE(nullptr, cmdQ); ASSERT_EQ(CL_SUCCESS, retVal); size_t offset[] = {0, 0, 0}; size_t gws[] = {1, 1, 1}; cl_int retVal = CL_SUCCESS; cl_int success = CL_SUCCESS; cl_event event = clCreateUserEvent(pContext, &retVal); cl_event eventOut = nullptr; EXPECT_EQ(success, retVal); retVal = clEnqueueNDRangeKernel(cmdQ, kernel, 1, offset, gws, nullptr, 1, &event, &eventOut); EXPECT_EQ(success, retVal); EXPECT_NE(nullptr, eventOut); clSetUserEventStatus(event, CL_COMPLETE); clReleaseEvent(event); clReleaseEvent(eventOut); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(success, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_release_context_tests.inl000066400000000000000000000014261363734646600301760ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "cl_api_tests.h" using namespace NEO; typedef api_tests clReleaseContextTests; namespace ULT { TEST_F(clReleaseContextTests, GivenValidContextWhenReleasingContextThenSuccessIsReturned) { auto context = clCreateContext( nullptr, 1u, &testedClDevice, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, context); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clReleaseContextTests, GivenNullContextWhenReleasingContextThenClInvalidContextIsReturned) { auto retVal = clReleaseContext(nullptr); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_release_event_tests.inl000066400000000000000000000113471363734646600276360ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "opencl/source/event/event.h" #include "test.h" #include "cl_api_tests.h" using namespace NEO; namespace ClReleaseEventTests { template class EventFixture : public ApiFixture<>, public T { public: void SetUp() override { ApiFixture::SetUp(); } void TearDown() override { ApiFixture::TearDown(); } }; typedef EventFixture<::testing::Test> clEventTests; TEST_F(clEventTests, GivenNullEventWhenReleasingEventThenClInvalidEventErrorIsReturned) { auto retVal = clReleaseEvent(nullptr); EXPECT_EQ(CL_INVALID_EVENT, retVal); } TEST_F(clEventTests, GivenValidEventWhenReleasingEventTheSuccessIsReturned) { auto *pEvent = new Event(nullptr, 0, 0, 0); ASSERT_NE(nullptr, pEvent); cl_event event = (cl_event)pEvent; auto retVal = clReleaseEvent(event); EXPECT_EQ(CL_SUCCESS, retVal); //no delete operation. clReleaseEvent should do this for us } TEST_F(clEventTests, GivenValidEventWhenRetainedAndReleasedThenReferenceCountIsUpdated) { auto *pEvent = new Event(nullptr, 0, 0, 0); ASSERT_NE(nullptr, pEvent); cl_event event = (cl_event)pEvent; auto retVal = clRetainEvent(event); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pEvent->getReference(), 2); retVal = clReleaseEvent(event); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pEvent->getReference(), 1); delete pEvent; } TEST_F(clEventTests, GivenValidEventWhenRetainedAndReleasedTwiceThenClSuccessIsReturned) { auto *pEvent = new Event(nullptr, 0, 0, 0); ASSERT_NE(nullptr, pEvent); cl_event event = (cl_event)pEvent; auto retVal = clRetainEvent(event); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pEvent->getReference(), 2); retVal = clReleaseEvent(event); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pEvent->getReference(), 1); retVal = clReleaseEvent(event); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clEventTests, GivenNullEventWhenRetainingEventThenClInvalidEventErrorIsReturned) { auto retVal = clRetainEvent(nullptr); EXPECT_EQ(CL_INVALID_EVENT, retVal); } TEST_F(clEventTests, GivenValidEventWhenGettingEventInfoThenSuccessIsReturned) { cl_command_queue cmdQ; auto *pEvent = new Event(nullptr, 0, 0, 0); cl_event event = (cl_event)pEvent; auto retVal = clGetEventInfo(event, CL_EVENT_COMMAND_QUEUE, sizeof(cmdQ), &cmdQ, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); delete pEvent; } TEST_F(clEventTests, GivenNullEventWhenGettingEventInfoThenClInvalidEventErrorIsReturned) { cl_command_queue cmdQ; auto retVal = clGetEventInfo(nullptr, CL_EVENT_COMMAND_QUEUE, sizeof(cmdQ), &cmdQ, nullptr); EXPECT_EQ(CL_INVALID_EVENT, retVal); } TEST_F(clEventTests, GivenInvalidEventWhenWaitingForEventsThenClInvalidEventErrorIsReturned) { char *ptr = new char[sizeof(Event)]; cl_event event = (cl_event)ptr; auto retVal = clWaitForEvents(1, &event); EXPECT_EQ(CL_INVALID_EVENT, retVal); delete[] ptr; } TEST_F(clEventTests, GivenValidEventWhenSettingStatusMultipleTimesThenClInvalidOperationErrorIsReturned) { cl_int retVal = 0; auto event = clCreateUserEvent(pContext, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetUserEventStatus(event, CL_COMPLETE); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetUserEventStatus(event, CL_COMPLETE); EXPECT_EQ(CL_INVALID_OPERATION, retVal); clReleaseEvent(event); } typedef EventFixture<::testing::TestWithParam>> clEventStatusTests; TEST_P(clEventStatusTests, GivenExecutionStatusWhenSettingUserEventStatusThenSuccessOrCorrectErrorIsReturned) { cl_int retVal = 0; cl_event event = clCreateUserEvent(pContext, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); auto status = std::get<0>(GetParam()); auto expect = std::get<1>(GetParam()); retVal = clSetUserEventStatus(event, status); EXPECT_EQ(expect, retVal); clReleaseEvent(event); } cl_int validStatus[] = {CL_COMPLETE, -1}; cl_int expectValidStatus[] = {CL_SUCCESS}; cl_int invalidStatus[] = {CL_QUEUED, CL_SUBMITTED, 12}; cl_int expectInvalidStatus[] = {CL_INVALID_VALUE}; INSTANTIATE_TEST_CASE_P(SetValidStatus, clEventStatusTests, ::testing::Combine( ::testing::ValuesIn(validStatus), ::testing::ValuesIn(expectValidStatus))); INSTANTIATE_TEST_CASE_P(SetInvalidStatus, clEventStatusTests, ::testing::Combine( ::testing::ValuesIn(invalidStatus), ::testing::ValuesIn(expectInvalidStatus))); } // namespace ClReleaseEventTests compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_release_kernel_tests.inl000066400000000000000000000047671363734646600300050ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/file_io.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clReleaseKernelTests; namespace ULT { TEST_F(clReleaseKernelTests, GivenNullKernelWhenReleasingKernelThenClInvalidKernelErrorIsReturned) { retVal = clReleaseKernel(nullptr); EXPECT_EQ(CL_INVALID_KERNEL, retVal); } TEST_F(clReleaseKernelTests, GivenRetainedKernelWhenReleasingKernelThenKernelIsCorrectlyReleased) { cl_kernel kernel = nullptr; cl_program program = nullptr; cl_int binaryStatus = CL_SUCCESS; size_t binarySize = 0; std::string testFile; retrieveBinaryKernelFilename(testFile, "CopyBuffer_simd16_", ".bin"); auto binary = loadDataFromFile(testFile.c_str(), binarySize); ASSERT_NE(0u, binarySize); ASSERT_NE(nullptr, binary); unsigned const char *binaries[1] = {reinterpret_cast(binary.get())}; program = clCreateProgramWithBinary(pContext, num_devices, devices, &binarySize, binaries, &binaryStatus, &retVal); binary.reset(); EXPECT_NE(nullptr, program); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clBuildProgram(program, num_devices, devices, nullptr, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); kernel = clCreateKernel(program, "CopyBuffer", &retVal); EXPECT_NE(nullptr, kernel); ASSERT_EQ(CL_SUCCESS, retVal); cl_uint theRef; retVal = clGetKernelInfo(kernel, CL_KERNEL_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, theRef); retVal = clRetainKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clRetainKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetKernelInfo(kernel, CL_KERNEL_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(3u, theRef); retVal = clReleaseKernel(kernel); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseKernel(kernel); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clGetKernelInfo(kernel, CL_KERNEL_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, theRef); retVal = clReleaseKernel(kernel); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(program); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_release_mem_obj_tests.inl000066400000000000000000000015311363734646600301170ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clReleaseMemObjectTests; namespace ULT { TEST_F(clReleaseMemObjectTests, GivenValidBufferWhenReleasingMemObjectThenSuccessIsReturned) { cl_mem_flags flags = CL_MEM_USE_HOST_PTR; static const unsigned int bufferSize = 16; cl_mem buffer = nullptr; std::unique_ptr hostMem(new char[bufferSize]); memset(hostMem.get(), 0xaa, bufferSize); buffer = clCreateBuffer( pContext, flags, bufferSize, hostMem.get(), &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_release_program_tests.inl000066400000000000000000000026461363734646600301660ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clReleaseProgramTests; TEST_F(clReleaseProgramTests, GivenNullProgramWhenReleasingProgramThenClInvalidProgramIsReturned) { auto retVal = clReleaseProgram(nullptr); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); } static const char fakeSrc[] = "__kernel void func(void) { }"; TEST_F(clReleaseProgramTests, GivenRetainedProgramWhenReleasingProgramThenProgramIsReleasedAndProgramReferenceCountDecrementedCorrectly) { size_t srcLen = sizeof(fakeSrc); const char *src = fakeSrc; cl_int retVal; cl_uint theRef; cl_program prog = clCreateProgramWithSource(pContext, 1, &src, &srcLen, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clRetainProgram(prog); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetProgramInfo(prog, CL_PROGRAM_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, theRef); retVal = clReleaseProgram(prog); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetProgramInfo(prog, CL_PROGRAM_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, theRef); retVal = clReleaseProgram(prog); EXPECT_EQ(CL_SUCCESS, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_retain_mem_obj_tests.inl000066400000000000000000000026321363734646600277640ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clRetainMemObjectTests; namespace ULT { TEST_F(clRetainMemObjectTests, GivenValidParamsWhenRetainingMemObjectThenRefCountIsIncremented) { cl_mem_flags flags = CL_MEM_USE_HOST_PTR; static const unsigned int bufferSize = 16; cl_mem buffer = nullptr; cl_int retVal; cl_uint theRef; std::unique_ptr hostMem(new char[bufferSize]); memset(hostMem.get(), 0xaa, bufferSize); buffer = clCreateBuffer( pContext, flags, bufferSize, hostMem.get(), &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); retVal = clRetainMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetMemObjectInfo(buffer, CL_MEM_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, theRef); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetMemObjectInfo(buffer, CL_MEM_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, theRef); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_retain_release_command_queue_tests.inl000066400000000000000000000041021363734646600326700ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" using namespace NEO; namespace DeviceHostQueue { typedef ::testing::Types QueueTypes; template class clRetainReleaseCommandQueueTests : public DeviceHostQueueFixture {}; TYPED_TEST_CASE(clRetainReleaseCommandQueueTests, QueueTypes); TYPED_TEST(clRetainReleaseCommandQueueTests, GivenValidCommandQueueWhenRetainingAndReleasingThenReferenceCountIsUpdatedCorrectly) { if (std::is_same::value && !castToObject(this->devices[this->testedRootDeviceIndex])->getHardwareInfo().capabilityTable.supportsDeviceEnqueue) { return; } using BaseType = typename TypeParam::BaseType; auto queue = this->createClQueue(); ASSERT_EQ(CL_SUCCESS, this->retVal); auto qObject = castToObject(static_cast(queue)); ASSERT_NE(qObject, nullptr); cl_uint refCount; this->retVal = clGetCommandQueueInfo(queue, CL_QUEUE_REFERENCE_COUNT, sizeof(cl_uint), &refCount, NULL); EXPECT_EQ(CL_SUCCESS, this->retVal); EXPECT_EQ(1u, refCount); this->retVal = clRetainCommandQueue(queue); EXPECT_EQ(CL_SUCCESS, this->retVal); this->retVal = clGetCommandQueueInfo(queue, CL_QUEUE_REFERENCE_COUNT, sizeof(cl_uint), &refCount, NULL); EXPECT_EQ(CL_SUCCESS, this->retVal); EXPECT_EQ(2u, refCount); this->retVal = clReleaseCommandQueue(queue); EXPECT_EQ(CL_SUCCESS, this->retVal); this->retVal = clGetCommandQueueInfo(queue, CL_QUEUE_REFERENCE_COUNT, sizeof(cl_uint), &refCount, NULL); EXPECT_EQ(CL_SUCCESS, this->retVal); EXPECT_EQ(1u, refCount); this->retVal = clReleaseCommandQueue(queue); EXPECT_EQ(CL_SUCCESS, this->retVal); } } // namespace DeviceHostQueue compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_retain_release_context_tests.inl000066400000000000000000000025531363734646600315420ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/platform/platform.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clRetainReleaseContextTests; namespace ULT { TEST_F(clRetainReleaseContextTests, GivenValidContextWhenRetainingAndReleasingThenContextReferenceCountIsUpdatedCorrectly) { cl_context context = clCreateContext(nullptr, 1, &testedClDevice, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); cl_uint theRef; retVal = clGetContextInfo(context, CL_CONTEXT_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, theRef); retVal = clRetainContext(context); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetContextInfo(context, CL_CONTEXT_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, theRef); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetContextInfo(context, CL_CONTEXT_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, theRef); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_retain_release_device_tests.inl000066400000000000000000000034151363734646600313130ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/platform/platform.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clRetainReleaseDeviceTests; namespace ULT { TEST_F(clRetainReleaseDeviceTests, GivenRootDeviceWhenRetainingThenReferenceCountIsOne) { cl_uint numEntries = numRootDevices; cl_device_id devices[numRootDevices]; retVal = clGetDeviceIDs(pPlatform, CL_DEVICE_TYPE_GPU, numEntries, devices, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clRetainDevice(devices[testedRootDeviceIndex]); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clRetainDevice(devices[testedRootDeviceIndex]); EXPECT_EQ(CL_SUCCESS, retVal); cl_uint theRef; retVal = clGetDeviceInfo(devices[testedRootDeviceIndex], CL_DEVICE_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, theRef); } TEST_F(clRetainReleaseDeviceTests, GivenRootDeviceWhenReleasingThenReferenceCountIsOne) { constexpr cl_uint numEntries = numRootDevices; cl_device_id devices[numRootDevices]; auto retVal = clGetDeviceIDs(pPlatform, CL_DEVICE_TYPE_GPU, numEntries, devices, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseDevice(devices[testedRootDeviceIndex]); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseDevice(devices[testedRootDeviceIndex]); EXPECT_EQ(CL_SUCCESS, retVal); cl_uint theRef; retVal = clGetDeviceInfo(devices[testedRootDeviceIndex], CL_DEVICE_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, theRef); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_retain_release_sampler_tests.inl000066400000000000000000000026351363734646600315220ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clRetainReleaseSamplerTests; namespace ULT { TEST_F(clRetainReleaseSamplerTests, GivenValidSamplerWhenRetainingThenSamplerReferenceCountIsIncremented) { cl_int retVal = CL_SUCCESS; auto sampler = clCreateSampler(pContext, CL_TRUE, CL_ADDRESS_CLAMP, CL_FILTER_NEAREST, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, sampler); cl_uint theRef; retVal = clGetSamplerInfo(sampler, CL_SAMPLER_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, theRef); retVal = clRetainSampler(sampler); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetSamplerInfo(sampler, CL_SAMPLER_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, theRef); retVal = clReleaseSampler(sampler); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetSamplerInfo(sampler, CL_SAMPLER_REFERENCE_COUNT, sizeof(cl_uint), &theRef, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, theRef); retVal = clReleaseSampler(sampler); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_set_default_device_command_queue_tests.inl000066400000000000000000000127411363734646600335340ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "opencl/source/context/context.h" #include "opencl/source/device_queue/device_queue.h" #include "test.h" #include "cl_api_tests.h" using namespace NEO; namespace ULT { struct clSetDefaultDeviceCommandQueueApiTest : public api_tests { clSetDefaultDeviceCommandQueueApiTest() { } void SetUp() override { api_tests::SetUp(); cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE, 0, 0}; deviceQueue = clCreateCommandQueueWithProperties(pContext, devices[testedRootDeviceIndex], properties, &retVal); if (!pContext->getDevice(0u)->getHardwareInfo().capabilityTable.supportsDeviceEnqueue) { ASSERT_EQ(nullptr, deviceQueue); EXPECT_EQ(CL_INVALID_QUEUE_PROPERTIES, retVal); GTEST_SKIP(); } else { ASSERT_NE(nullptr, deviceQueue); ASSERT_EQ(CL_SUCCESS, retVal); } } void TearDown() override { if (deviceQueue) { retVal = clReleaseCommandQueue(deviceQueue); EXPECT_EQ(CL_SUCCESS, retVal); } api_tests::TearDown(); } cl_command_queue deviceQueue = nullptr; }; HWCMDTEST_F(IGFX_GEN8_CORE, clSetDefaultDeviceCommandQueueApiTest, GivenValidParamsWhenSettingDefaultDeviceQueueThenSuccessIsReturned) { retVal = clSetDefaultDeviceCommandQueue(pContext, devices[testedRootDeviceIndex], deviceQueue); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast<_device_queue *>(deviceQueue), static_cast<_device_queue *>(pContext->getDefaultDeviceQueue())); } HWCMDTEST_F(IGFX_GEN8_CORE, clSetDefaultDeviceCommandQueueApiTest, GivenValidParamsWhenReplacingDefaultDeviceQueueThenSuccessIsReturned) { cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE, 0, 0}; auto pDevice = castToObject(devices[testedRootDeviceIndex]); if (pDevice->getSharedDeviceInfo().maxOnDeviceQueues > 1) { auto newDeviceQueue = clCreateCommandQueueWithProperties(pContext, devices[testedRootDeviceIndex], properties, &retVal); ASSERT_NE(nullptr, newDeviceQueue); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetDefaultDeviceCommandQueue(pContext, devices[testedRootDeviceIndex], newDeviceQueue); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast<_device_queue *>(newDeviceQueue), static_cast<_device_queue *>(pContext->getDefaultDeviceQueue())); clReleaseCommandQueue(newDeviceQueue); } } HWCMDTEST_F(IGFX_GEN8_CORE, clSetDefaultDeviceCommandQueueApiTest, GivenNullContextWhenSettingDefaultDeviceQueueThenClInvalidContextErrorIsReturned) { retVal = clSetDefaultDeviceCommandQueue(nullptr, devices[testedRootDeviceIndex], deviceQueue); ASSERT_EQ(CL_INVALID_CONTEXT, retVal); } HWCMDTEST_F(IGFX_GEN8_CORE, clSetDefaultDeviceCommandQueueApiTest, GivenNullDeviceWhenSettingDefaultDeviceQueueThenClInvalidDeviceErrorIsReturned) { retVal = clSetDefaultDeviceCommandQueue(pContext, nullptr, deviceQueue); ASSERT_EQ(CL_INVALID_DEVICE, retVal); } HWCMDTEST_F(IGFX_GEN8_CORE, clSetDefaultDeviceCommandQueueApiTest, GivenNullDeviceQueueWhenSettingDefaultDeviceQueueThenClInvalidCommandQueueErrorIsReturned) { retVal = clSetDefaultDeviceCommandQueue(pContext, devices[testedRootDeviceIndex], nullptr); ASSERT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } HWCMDTEST_F(IGFX_GEN8_CORE, clSetDefaultDeviceCommandQueueApiTest, GivenHostQueueAsDeviceQueueWhenSettingDefaultDeviceQueueThenClInvalidCommandQueueErrorIsReturned) { cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, 0, 0, 0}; cl_command_queue hostQueue = clCreateCommandQueueWithProperties(pContext, devices[testedRootDeviceIndex], properties, &retVal); ASSERT_NE(nullptr, hostQueue); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetDefaultDeviceCommandQueue(pContext, devices[testedRootDeviceIndex], hostQueue); ASSERT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); retVal = clReleaseCommandQueue(hostQueue); EXPECT_EQ(CL_SUCCESS, retVal); } HWCMDTEST_F(IGFX_GEN8_CORE, clSetDefaultDeviceCommandQueueApiTest, GivenIncorrectDeviceQueueWhenSettingDefaultDeviceQueueThenClInvalidCommandQueueErrorIsReturned) { auto context2 = clCreateContext(nullptr, 1u, &testedClDevice, nullptr, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE, 0, 0}; cl_command_queue deviceQueueCtx2 = clCreateCommandQueueWithProperties(context2, devices[testedRootDeviceIndex], properties, &retVal); ASSERT_NE(nullptr, deviceQueueCtx2); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetDefaultDeviceCommandQueue(pContext, devices[testedRootDeviceIndex], deviceQueueCtx2); ASSERT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); retVal = clReleaseCommandQueue(deviceQueueCtx2); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context2); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_set_event_callback_tests.inl000066400000000000000000000135641363734646600306300ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/event/event.h" #include "cl_api_tests.h" using namespace NEO; namespace ClSetEventCallbackTests { static int cbInvoked = 0; static void *cbData = nullptr; void CL_CALLBACK eventCallBack(cl_event event, cl_int callbackType, void *userData) { cbInvoked++; cbData = userData; } class clSetEventCallbackTests : public ApiFixture<>, public ::testing::Test { void SetUp() override { dbgRestore.reset(new DebugManagerStateRestore()); DebugManager.flags.EnableAsyncEventsHandler.set(false); ApiFixture::SetUp(); cbInvoked = 0; cbData = nullptr; } void TearDown() override { ApiFixture::TearDown(); } std::unique_ptr dbgRestore; }; TEST_F(clSetEventCallbackTests, GivenValidEventWhenSettingEventCallbackThenSuccessIsReturned) { std::unique_ptr event(new Event(nullptr, 0, 0, 0)); retVal = clSetEventCallback(event.get(), CL_COMPLETE, eventCallBack, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); event->decRefInternal(); } TEST_F(clSetEventCallbackTests, GivenInvalidEventWhenSettingEventCallbackThenInvalidEventErrorIsReturned) { std::unique_ptr event(new char[sizeof(Event)]); memset(event.get(), 0, sizeof(Event)); retVal = clSetEventCallback(reinterpret_cast(event.get()), CL_COMPLETE, eventCallBack, nullptr); EXPECT_EQ(CL_INVALID_EVENT, retVal); } TEST_F(clSetEventCallbackTests, GivenValidCallbackTypeWhenSettingEventCallbackThenSuccessIsReturned) { std::unique_ptr event(new Event(nullptr, 0, 0, 0)); retVal = clSetEventCallback(event.get(), CL_COMPLETE, eventCallBack, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); event->decRefInternal(); event.reset(new Event(nullptr, 0, 0, 0)); retVal = clSetEventCallback(event.get(), CL_RUNNING, eventCallBack, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); event->decRefInternal(); event.reset(new Event(nullptr, 0, 0, 0)); retVal = clSetEventCallback(event.get(), CL_SUBMITTED, eventCallBack, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clSetEventCallbackTests, GivenInvalidCallbackTypeWhenSettingEventCallbackThenInvalidValueErrorIsReturned) { std::unique_ptr event(new Event(nullptr, 0, 0, 0)); retVal = clSetEventCallback(event.get(), CL_COMPLETE + CL_RUNNING + CL_SUBMITTED, eventCallBack, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clSetEventCallbackTests, GivenNullCallbackWhenSettingEventCallbackThenInvalidValueErrorIsReturned) { std::unique_ptr event(new Event(nullptr, 0, 0, 0)); retVal = clSetEventCallback(event.get(), CL_COMPLETE, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clSetEventCallbackTests, GivenMultipleCallbacksWhenSettingEventCallbackThenSuccessIsReturned) { std::unique_ptr event(new Event(nullptr, 0, 0, 0)); retVal = clSetEventCallback(event.get(), CL_COMPLETE, eventCallBack, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetEventCallback(event.get(), CL_RUNNING, eventCallBack, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetEventCallback(event.get(), CL_SUBMITTED, eventCallBack, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); event->decRefInternal(); event->decRefInternal(); } TEST_F(clSetEventCallbackTests, GivenValidCallbackWhenStatusIsSetToCompleteThenCallbackWasInvokedOnce) { std::unique_ptr event(new Event(nullptr, 0, 0, 0)); retVal = clSetEventCallback(event.get(), CL_COMPLETE, eventCallBack, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); event->setStatus(CL_COMPLETE); EXPECT_EQ(cbInvoked, 1); } TEST_F(clSetEventCallbackTests, GivenThreeCallbacksWhenStatusIsSetToCompleteThenCallbackWasInvokedThreeTimes) { std::unique_ptr event(new Event(nullptr, 0, 0, 0)); retVal = clSetEventCallback(event.get(), CL_COMPLETE, eventCallBack, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetEventCallback(event.get(), CL_RUNNING, eventCallBack, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetEventCallback(event.get(), CL_SUBMITTED, eventCallBack, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); event->setStatus(CL_COMPLETE); EXPECT_EQ(cbInvoked, 3); } TEST_F(clSetEventCallbackTests, GivenValidCallbackWhenStatusIsSetToCompleteMultipleTimesThenCallbackWasInvokedOnce) { std::unique_ptr event(new Event(nullptr, 0, 0, 0)); retVal = clSetEventCallback(event.get(), CL_COMPLETE, eventCallBack, nullptr); event->setStatus(CL_COMPLETE); event->setStatus(CL_COMPLETE); event->setStatus(CL_COMPLETE); EXPECT_EQ(cbInvoked, 1); } TEST_F(clSetEventCallbackTests, GivenThreeCallbacksWhenStatusIsSetToCompleteMultipleTimesThenCallbackWasInvokedThreeTimes) { std::unique_ptr event(new Event(nullptr, 0, 0, 0)); retVal = clSetEventCallback(event.get(), CL_COMPLETE, eventCallBack, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetEventCallback(event.get(), CL_RUNNING, eventCallBack, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetEventCallback(event.get(), CL_SUBMITTED, eventCallBack, nullptr); event->setStatus(CL_SUBMITTED); event->setStatus(CL_RUNNING); event->setStatus(CL_COMPLETE); event->setStatus(CL_COMPLETE); event->setStatus(CL_COMPLETE); EXPECT_EQ(cbInvoked, 3); } TEST_F(clSetEventCallbackTests, GivenUserDataWhenStatusIsSetToCompleteThenCallbackWasInvokedOnce) { std::unique_ptr event(new Event(nullptr, 0, 0, 0)); int data = 1; retVal = clSetEventCallback(event.get(), CL_COMPLETE, eventCallBack, &data); EXPECT_EQ(CL_SUCCESS, retVal); event->setStatus(CL_COMPLETE); EXPECT_EQ(cbInvoked, 1); EXPECT_EQ(&data, cbData); } } // namespace ClSetEventCallbackTests compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_set_kernel_arg_svm_pointer_tests.inl000066400000000000000000000176301363734646600324270ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" #include "cl_api_tests.h" using namespace NEO; class KernelArgSvmFixture : public ApiFixture<>, public DeviceFixture { public: KernelArgSvmFixture() : pCrossThreadData{0} { } protected: void SetUp() override { ApiFixture::SetUp(); DeviceFixture::SetUp(); if (defaultHwInfo->capabilityTable.ftrSvm == false) { GTEST_SKIP(); } // define kernel info pKernelInfo = std::make_unique(); // setup kernel arg offsets KernelArgPatchInfo kernelArgPatchInfo; kernelHeader.SurfaceStateHeapSize = sizeof(pSshLocal); pKernelInfo->heapInfo.pSsh = pSshLocal; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; pKernelInfo->kernelArgInfo.resize(1); pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = 0x30; pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size = (uint32_t)sizeof(void *); pKernelInfo->kernelArgInfo[0].metadata.addressQualifier = KernelArgMetadata::AddrGlobal; pMockKernel = new MockKernel(pProgram, *pKernelInfo, *this->pClDevice); ASSERT_EQ(CL_SUCCESS, pMockKernel->initialize()); pMockKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); } void TearDown() override { if (pMockKernel) { delete pMockKernel; } DeviceFixture::TearDown(); ApiFixture::TearDown(); } cl_int retVal = CL_SUCCESS; MockKernel *pMockKernel = nullptr; std::unique_ptr pKernelInfo; SKernelBinaryHeaderCommon kernelHeader; char pSshLocal[64]; char pCrossThreadData[64]; }; typedef Test clSetKernelArgSVMPointerTests; namespace ULT { TEST_F(clSetKernelArgSVMPointerTests, GivenNullKernelWhenSettingKernelArgThenInvalidKernelErrorIsReturned) { auto retVal = clSetKernelArgSVMPointer( nullptr, // cl_kernel kernel 0, // cl_uint arg_index nullptr // const void *arg_value ); EXPECT_EQ(CL_INVALID_KERNEL, retVal); } TEST_F(clSetKernelArgSVMPointerTests, GivenInvalidArgIndexWhenSettingKernelArgThenInvalidArgIndexErrorIsReturned) { auto retVal = clSetKernelArgSVMPointer( pMockKernel, // cl_kernel kernel (cl_uint)-1, // cl_uint arg_index nullptr // const void *arg_value ); EXPECT_EQ(CL_INVALID_ARG_INDEX, retVal); } TEST_F(clSetKernelArgSVMPointerTests, GivenDeviceNotSupportingSvmWhenSettingKernelArgSVMPointerThenInvalidOperationErrorIsReturned) { auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.ftrSvm = false; auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); auto pMockKernel = std::make_unique(pProgram, *pKernelInfo, *pDevice); auto retVal = clSetKernelArgSVMPointer( pMockKernel.get(), // cl_kernel kernel (cl_uint)-1, // cl_uint arg_index nullptr // const void *arg_value ); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } TEST_F(clSetKernelArgSVMPointerTests, GivenLocalAddressAndNullArgValueWhenSettingKernelArgThenInvalidArgValueErrorIsReturned) { pKernelInfo->kernelArgInfo[0].metadata.addressQualifier = KernelArgMetadata::AddrLocal; auto retVal = clSetKernelArgSVMPointer( pMockKernel, // cl_kernel kernel 0, // cl_uint arg_index nullptr // const void *arg_value ); EXPECT_EQ(CL_INVALID_ARG_VALUE, retVal); } TEST_F(clSetKernelArgSVMPointerTests, GivenInvalidArgValueWhenSettingKernelArgThenInvalidArgValueErrorIsReturned) { pClDevice->deviceInfo.sharedSystemMemCapabilities = 0u; pClDevice->sharedDeviceInfo.sharedSystemAllocationsSupport = false; void *ptrHost = malloc(256); EXPECT_NE(nullptr, ptrHost); auto retVal = clSetKernelArgSVMPointer( pMockKernel, // cl_kernel kernel 0, // cl_uint arg_index ptrHost // const void *arg_value ); EXPECT_EQ(CL_INVALID_ARG_VALUE, retVal); free(ptrHost); } TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndNullArgValueWhenSettingKernelArgThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { auto retVal = clSetKernelArgSVMPointer( pMockKernel, // cl_kernel kernel 0, // cl_uint arg_index nullptr // const void *arg_value ); EXPECT_EQ(CL_SUCCESS, retVal); } } TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenSettingKernelArgThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm); auto retVal = clSetKernelArgSVMPointer( pMockKernel, // cl_kernel kernel 0, // cl_uint arg_index ptrSvm // const void *arg_value ); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndConstantAddressWhenSettingKernelArgThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm); pKernelInfo->kernelArgInfo[0].metadata.addressQualifier = KernelArgMetadata::AddrConstant; auto retVal = clSetKernelArgSVMPointer( pMockKernel, // cl_kernel kernel 0, // cl_uint arg_index ptrSvm // const void *arg_value ); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndPointerWithOffsetWhenSettingKernelArgThenSuccessIsReturned) { const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); size_t offset = 256 / 2; EXPECT_NE(nullptr, ptrSvm); auto retVal = clSetKernelArgSVMPointer( pMockKernel, // cl_kernel kernel 0, // cl_uint arg_index (char *)ptrSvm + offset // const void *arg_value ); EXPECT_EQ(CL_SUCCESS, retVal); clSVMFree(pContext, ptrSvm); } } TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndPointerWithInvalidOffsetWhenSettingKernelArgThenInvalidArgValueErrorIsReturned) { pClDevice->deviceInfo.sharedSystemMemCapabilities = 0u; pClDevice->sharedDeviceInfo.sharedSystemAllocationsSupport = false; const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); auto svmData = pContext->getSVMAllocsManager()->getSVMAlloc(ptrSvm); ASSERT_NE(nullptr, svmData); auto svmAlloc = svmData->gpuAllocation; EXPECT_NE(nullptr, svmAlloc); size_t offset = svmAlloc->getUnderlyingBufferSize() + 1; auto retVal = clSetKernelArgSVMPointer( pMockKernel, // cl_kernel kernel 0, // cl_uint arg_index (char *)ptrSvm + offset // const void *arg_value ); EXPECT_EQ(CL_INVALID_ARG_VALUE, retVal); clSVMFree(pContext, ptrSvm); } } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_set_kernel_exec_info_tests.inl000066400000000000000000000340571363734646600311720ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/command_stream/thread_arbitration_policy_helper.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" #include "cl_api_tests.h" using namespace NEO; class KernelExecInfoFixture : public ApiFixture<> { protected: void SetUp() override { ApiFixture::SetUp(); if (defaultHwInfo->capabilityTable.ftrSvm == false) { GTEST_SKIP(); } pKernelInfo = std::make_unique(); pMockKernel = new MockKernel(pProgram, *pKernelInfo, *pPlatform->getClDevice(testedRootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, pMockKernel->initialize()); svmCapabilities = pPlatform->getClDevice(testedRootDeviceIndex)->getDeviceInfo().svmCapabilities; if (svmCapabilities != 0) { ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm); } } void TearDown() override { if (svmCapabilities != 0) { clSVMFree(pContext, ptrSvm); } if (pMockKernel) { delete pMockKernel; } ApiFixture::TearDown(); } cl_int retVal = CL_SUCCESS; MockKernel *pMockKernel = nullptr; std::unique_ptr pKernelInfo; void *ptrSvm = nullptr; cl_device_svm_capabilities svmCapabilities = 0; }; typedef Test clSetKernelExecInfoTests; namespace ULT { TEST_F(clSetKernelExecInfoTests, GivenNullKernelWhenSettingAdditionalKernelInfoThenInvalidKernelErrorIsReturned) { retVal = clSetKernelExecInfo( nullptr, // cl_kernel kernel CL_KERNEL_EXEC_INFO_SVM_PTRS, // cl_kernel_exec_info param_name 0, // size_t param_value_size nullptr // const void *param_value ); EXPECT_EQ(CL_INVALID_KERNEL, retVal); } TEST_F(clSetKernelArgSVMPointerTests, GivenDeviceNotSupportingSvmWhenSettingKernelExecInfoThenInvalidOperationErrorIsReturned) { auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.ftrSvm = false; auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); auto pMockKernel = std::make_unique(pProgram, *pKernelInfo, *pDevice); auto retVal = clSetKernelExecInfo( pMockKernel.get(), // cl_kernel kernel CL_KERNEL_EXEC_INFO_SVM_PTRS, // cl_kernel_exec_info param_name 0, // size_t param_value_size nullptr // const void *param_value ); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } TEST_F(clSetKernelExecInfoTests, GivenNullParamValueWhenSettingAdditionalKernelInfoThenInvalidValueErrorIsReturned) { void **pSvmPtrList = nullptr; size_t SvmPtrListSizeInBytes = 1 * sizeof(void *); retVal = clSetKernelExecInfo( pMockKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_SVM_PTRS, // cl_kernel_exec_info param_name SvmPtrListSizeInBytes, // size_t param_value_size pSvmPtrList // const void *param_value ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clSetKernelExecInfoTests, GivenNullPointerInParamValueWhenSettingAdditionalKernelInfoThenInvalidValueErrorIsReturned) { void *pSvmPtrList[] = {nullptr}; size_t SvmPtrListSizeInBytes = 1 * sizeof(void *); retVal = clSetKernelExecInfo( pMockKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_SVM_PTRS, // cl_kernel_exec_info param_name SvmPtrListSizeInBytes, // size_t param_value_size pSvmPtrList // const void *param_value ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clSetKernelExecInfoTests, GivenParamSizeZeroWhenSettingAdditionalKernelInfoThenInvalidValueErrorIsReturned) { void *pSvmPtrList[] = {ptrSvm}; size_t SvmPtrListSizeInBytes = 0; retVal = clSetKernelExecInfo( pMockKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_SVM_PTRS, // cl_kernel_exec_info param_name SvmPtrListSizeInBytes, // size_t param_value_size pSvmPtrList // const void *param_value ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clSetKernelExecInfoTests, GivenInvalidParamSizeWhenSettingAdditionalKernelInfoThenInvalidValueErrorIsReturned) { void *pSvmPtrList[] = {ptrSvm}; size_t SvmPtrListSizeInBytes = (size_t)(-1); retVal = clSetKernelExecInfo( pMockKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_SVM_PTRS, // cl_kernel_exec_info param_name SvmPtrListSizeInBytes, // size_t param_value_size pSvmPtrList // const void *param_value ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clSetKernelExecInfoTests, GivenInvalidParamNameWhenSettingAdditionalKernelInfoThenInvalidValueErrorIsReturned) { void *pSvmPtrList[] = {ptrSvm}; size_t SvmPtrListSizeInBytes = 1 * sizeof(void *); retVal = clSetKernelExecInfo( pMockKernel, // cl_kernel kernel 0, // cl_kernel_exec_info param_name SvmPtrListSizeInBytes, // size_t param_value_size pSvmPtrList // const void *param_value ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clSetKernelExecInfoTests, GivenInvalidOperationWhenSettingAdditionalKernelInfoThenInvalidOperationErrorIsReturned) { void *pSvmPtrList[] = {ptrSvm}; size_t SvmPtrListSizeInBytes = 1 * sizeof(void *); retVal = clSetKernelExecInfo( pMockKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM, // cl_kernel_exec_info param_name SvmPtrListSizeInBytes, // size_t param_value_size pSvmPtrList // const void *param_value ); EXPECT_EQ(CL_INVALID_OPERATION, retVal); } TEST_F(clSetKernelExecInfoTests, GivenValidPointerListWithOnePointerWhenSettingAdditionalKernelInfoThenSuccessIsReturned) { if (svmCapabilities != 0) { void *pSvmPtrList[] = {ptrSvm}; size_t SvmPtrListSizeInBytes = 1 * sizeof(void *); retVal = clSetKernelExecInfo( pMockKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_SVM_PTRS, // cl_kernel_exec_info param_name SvmPtrListSizeInBytes, // size_t param_value_size pSvmPtrList // const void *param_value ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pMockKernel->kernelSvmGfxAllocations.size()); } } TEST_F(clSetKernelExecInfoTests, GivenValidPointerListWithMultiplePointersWhenSettingAdditionalKernelInfoThenSuccessIsReturned) { if (svmCapabilities != 0) { void *ptrSvm1 = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm1); void *ptrSvm2 = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm2); void *pSvmPtrList[] = {ptrSvm, ptrSvm1, ptrSvm2}; size_t SvmPtrListSizeInBytes = 3 * sizeof(void *); retVal = clSetKernelExecInfo( pMockKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_SVM_PTRS, // cl_kernel_exec_info param_name SvmPtrListSizeInBytes, // size_t param_value_size pSvmPtrList // const void *param_value ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(3u, pMockKernel->kernelSvmGfxAllocations.size()); EXPECT_TRUE(pMockKernel->svmAllocationsRequireCacheFlush); clSVMFree(pContext, ptrSvm1); clSVMFree(pContext, ptrSvm2); } } TEST_F(clSetKernelExecInfoTests, givenReadOnlySvmPtrListWhenUsedAsKernelPointersThenCacheFlushIsNotRequired) { if (svmCapabilities != 0) { void *ptrSvm1 = clSVMAlloc(pContext, CL_MEM_READ_ONLY, 256, 4); EXPECT_NE(nullptr, ptrSvm1); void *ptrSvm2 = clSVMAlloc(pContext, CL_MEM_READ_ONLY, 256, 4); EXPECT_NE(nullptr, ptrSvm2); void *pSvmPtrList[] = {ptrSvm1, ptrSvm2}; size_t SvmPtrListSizeInBytes = 2 * sizeof(void *); retVal = clSetKernelExecInfo( pMockKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_SVM_PTRS, // cl_kernel_exec_info param_name SvmPtrListSizeInBytes, // size_t param_value_size pSvmPtrList // const void *param_value ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, pMockKernel->kernelSvmGfxAllocations.size()); EXPECT_FALSE(pMockKernel->svmAllocationsRequireCacheFlush); clSVMFree(pContext, ptrSvm1); clSVMFree(pContext, ptrSvm2); } } TEST_F(clSetKernelExecInfoTests, GivenMultipleSettingKernelInfoOperationsWhenSettingAdditionalKernelInfoThenSuccessIsReturned) { if (svmCapabilities != 0) { void *pSvmPtrList[] = {ptrSvm}; size_t SvmPtrListSizeInBytes = 1 * sizeof(void *); retVal = clSetKernelExecInfo( pMockKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_SVM_PTRS, // cl_kernel_exec_info param_name SvmPtrListSizeInBytes, // size_t param_value_size pSvmPtrList // const void *param_value ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pMockKernel->kernelSvmGfxAllocations.size()); retVal = clSetKernelExecInfo( pMockKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_SVM_PTRS, // cl_kernel_exec_info param_name SvmPtrListSizeInBytes, // size_t param_value_size pSvmPtrList // const void *param_value ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pMockKernel->kernelSvmGfxAllocations.size()); } } HWTEST_F(clSetKernelExecInfoTests, givenKernelExecInfoThreadArbitrationPolicyWhenSettingAdditionalKernelInfoThenSuccessIsReturned) { uint32_t newThreadArbitrationPolicy = CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL; size_t ptrSizeInBytes = sizeof(uint32_t *); retVal = clSetKernelExecInfo( pMockKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_INTEL, // cl_kernel_exec_info param_name ptrSizeInBytes, // size_t param_value_size &newThreadArbitrationPolicy // const void *param_value ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(getNewKernelArbitrationPolicy(newThreadArbitrationPolicy), pMockKernel->threadArbitrationPolicy); EXPECT_EQ(getNewKernelArbitrationPolicy(newThreadArbitrationPolicy), pMockKernel->getThreadArbitrationPolicy()); } HWTEST_F(clSetKernelExecInfoTests, givenInvalidThreadArbitrationPolicyWhenSettingAdditionalKernelInfoThenClInvalidValueIsReturned) { uint32_t invalidThreadArbitrationPolicy = 0; size_t ptrSizeInBytes = 1 * sizeof(uint32_t *); retVal = clSetKernelExecInfo( pMockKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_INTEL, // cl_kernel_exec_info param_name ptrSizeInBytes, // size_t param_value_size &invalidThreadArbitrationPolicy // const void *param_value ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWTEST_F(clSetKernelExecInfoTests, givenInvalidParamSizeWhenSettingKernelExecutionTypeThenClInvalidValueErrorIsReturned) { cl_execution_info_kernel_type_intel kernelExecutionType; retVal = clSetKernelExecInfo( pMockKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_KERNEL_TYPE_INTEL, // cl_kernel_exec_info param_name sizeof(cl_execution_info_kernel_type_intel) - 1, // size_t param_value_size &kernelExecutionType // const void *param_value ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWTEST_F(clSetKernelExecInfoTests, givenInvalidParamValueWhenSettingKernelExecutionTypeThenClInvalidValueErrorIsReturned) { retVal = clSetKernelExecInfo( pMockKernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_KERNEL_TYPE_INTEL, // cl_kernel_exec_info param_name sizeof(cl_execution_info_kernel_type_intel), // size_t param_value_size nullptr // const void *param_value ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWTEST_F(clSetKernelExecInfoTests, givenDifferentExecutionTypesWhenSettingAdditionalKernelInfoThenCorrectValuesAreSet) { cl_kernel_exec_info paramName = CL_KERNEL_EXEC_INFO_KERNEL_TYPE_INTEL; size_t paramSize = sizeof(cl_execution_info_kernel_type_intel); cl_execution_info_kernel_type_intel kernelExecutionType = -1; retVal = clSetKernelExecInfo( pMockKernel, // cl_kernel kernel paramName, // cl_kernel_exec_info param_name paramSize, // size_t param_value_size &kernelExecutionType // const void *param_value ); EXPECT_EQ(CL_INVALID_VALUE, retVal); kernelExecutionType = CL_KERNEL_EXEC_INFO_DEFAULT_TYPE_INTEL; retVal = clSetKernelExecInfo( pMockKernel, // cl_kernel kernel paramName, // cl_kernel_exec_info param_name paramSize, // size_t param_value_size &kernelExecutionType // const void *param_value ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(KernelExecutionType::Default, pMockKernel->executionType); kernelExecutionType = CL_KERNEL_EXEC_INFO_CONCURRENT_TYPE_INTEL; retVal = clSetKernelExecInfo( pMockKernel, // cl_kernel kernel paramName, // cl_kernel_exec_info param_name paramSize, // size_t param_value_size &kernelExecutionType // const void *param_value ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(KernelExecutionType::Concurrent, pMockKernel->executionType); } } // namespace ULT cl_set_mem_object_destructor_callback_tests.inl000066400000000000000000000071221363734646600340030ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/api/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clCreateBufferTests; namespace ULT { static int cbInvoked = 0; void CL_CALLBACK destructorCallback(cl_mem memObj, void *userData) { cbInvoked++; } struct clSetMemObjectDestructorCallbackTests : public ApiFixture<>, public ::testing::Test { void SetUp() override { ApiFixture::SetUp(); // clang-format off imageFormat.image_channel_order = CL_RGBA; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 32; imageDesc.image_height = 32; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = nullptr; // clang-format on cbInvoked = 0; } void TearDown() override { ApiFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; }; TEST_F(clSetMemObjectDestructorCallbackTests, GivenNullMemObjWhenSettingMemObjCallbackThenInvalidMemObjectErrorIsReturned) { retVal = clSetMemObjectDestructorCallback(nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); EXPECT_EQ(0, cbInvoked); } TEST_F(clSetMemObjectDestructorCallbackTests, GivenImageAndDestructorCallbackWhenSettingMemObjCallbackThenSuccessIsReturned) { auto image = clCreateImage(pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); retVal = clSetMemObjectDestructorCallback(image, destructorCallback, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1, cbInvoked); } TEST_F(clSetMemObjectDestructorCallbackTests, GivenImageAndNullCallbackFunctionWhenSettingMemObjCallbackThenInvalidValueErrorIsReturned) { auto image = clCreateImage(pContext, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, &retVal); retVal = clSetMemObjectDestructorCallback(image, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, cbInvoked); } TEST_F(clSetMemObjectDestructorCallbackTests, GivenBufferAndDestructorCallbackFunctionWhenSettingMemObjCallbackThenSuccessIsReturned) { auto buffer = clCreateBuffer(pContext, CL_MEM_READ_WRITE, 42, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); retVal = clSetMemObjectDestructorCallback(buffer, destructorCallback, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1, cbInvoked); } TEST_F(clSetMemObjectDestructorCallbackTests, GivenBufferAndNullCallbackFunctionWhenSettingMemObjCallbackThenInvalidValueErrorIsReturned) { auto buffer = clCreateBuffer(pContext, CL_MEM_READ_WRITE, 42, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); cbInvoked = 0; retVal = clSetMemObjectDestructorCallback(buffer, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, cbInvoked); } } // namespace ULT cl_set_mem_object_destructor_callback_tests_mt.cpp000066400000000000000000000031071363734646600345020ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/api/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clCreateBufferTests; namespace ULT { static int cbInvoked = 0; void CL_CALLBACK destructorCallBackMt(cl_mem memObj, void *userData) { cbInvoked++; } struct clSetMemObjectDestructorCallbackMtTests : public ApiFixture<>, public ::testing::Test { void SetUp() override { ApiFixture::SetUp(); cbInvoked = 0; } void TearDown() override { ApiFixture::TearDown(); } static void setMemCallbackThreadFunc(cl_mem buf) { auto ret = clSetMemObjectDestructorCallback(buf, destructorCallBackMt, nullptr); EXPECT_EQ(CL_SUCCESS, ret); } }; TEST_F(clSetMemObjectDestructorCallbackMtTests, GivenMultipleThreadsWhenSettingDestructorCallbackThenCallbackWasInvokedForEachThread) { auto buffer = clCreateBuffer(pContext, CL_MEM_READ_WRITE, 42, nullptr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); std::thread t1(clSetMemObjectDestructorCallbackMtTests::setMemCallbackThreadFunc, buffer); std::thread t2(clSetMemObjectDestructorCallbackMtTests::setMemCallbackThreadFunc, buffer); retVal = clSetMemObjectDestructorCallback(buffer, destructorCallBackMt, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); t1.join(); t2.join(); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(3, cbInvoked); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_set_performance_configuration_tests.inl000066400000000000000000000021751363734646600331170ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_instrumentation_fixture.h" #include "opencl/test/unit_test/os_interface/mock_performance_counters.h" #include "cl_api_tests.h" using namespace NEO; struct clSetPerformanceConfigurationINTELTests : public DeviceInstrumentationFixture, public PerformanceCountersDeviceFixture, ::testing::Test { void SetUp() override { PerformanceCountersDeviceFixture::SetUp(); DeviceInstrumentationFixture::SetUp(true); } void TearDown() override { PerformanceCountersDeviceFixture::TearDown(); } }; namespace ULT { TEST_F(clSetPerformanceConfigurationINTELTests, GivenAnyArgumentsWhenSettingPerformanceConfigurationThenInvalidOperationErrorIsReturned) { cl_int ret = CL_OUT_OF_RESOURCES; cl_uint offsets[2]; cl_uint values[2]; ret = clSetPerformanceConfigurationINTEL(device.get(), 2, offsets, values); EXPECT_EQ(CL_INVALID_OPERATION, ret); } } // namespace ULT cl_set_program_specialization_constant_tests.inl000066400000000000000000000021461363734646600342640ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/api/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; namespace ULT { TEST(clSetProgramSpecializationConstantTest, givenNullptrProgramWhenSetProgramSpecializationConstantThenErrorIsReturned) { auto retVal = clSetProgramSpecializationConstant(nullptr, 1, 1, nullptr); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); } using clSetProgramSpecializationConstantTests = api_tests; TEST_F(clSetProgramSpecializationConstantTests, givenNonSpirVProgramWhenSetProgramSpecializationConstantThenErrorIsReturned) { pProgram->isSpirV = false; int specValue = 1; auto retVal = clSetProgramSpecializationConstant(pProgram, 1, sizeof(int), &specValue); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); } TEST_F(clSetProgramSpecializationConstantTests, givenProperProgramAndNullptrSpecValueWhenSetProgramSpecializationConstantThenErrorIsReturned) { pProgram->isSpirV = true; auto retVal = clSetProgramSpecializationConstant(pProgram, 1, 1, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_svm_alloc_tests.inl000066400000000000000000000172001363734646600267660ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clSVMAllocTests; namespace ULT { class clSVMAllocTemplateTests : public ApiFixture<>, public testing::TestWithParam { public: void SetUp() override { ApiFixture::SetUp(); if (!pPlatform->getClDevice(testedRootDeviceIndex)->getHardwareInfo().capabilityTable.ftrSvm) { GTEST_SKIP(); } } void TearDown() override { ApiFixture::TearDown(); } }; struct clSVMAllocValidFlagsTests : public clSVMAllocTemplateTests { cl_uchar pHostPtr[64]; }; TEST(clSVMAllocTest, givenPlatformWithoutDevicesWhenClSVMAllocIsCalledThenDeviceIsTakenFromContext) { auto executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); executionEnvironment->prepareRootDeviceEnvironments(1); auto clDevice = std::make_unique(*Device::create(executionEnvironment, 0u), platform()); const ClDeviceInfo &devInfo = clDevice->getDeviceInfo(); if (devInfo.svmCapabilities == 0) { GTEST_SKIP(); } cl_device_id deviceId = clDevice.get(); cl_int retVal; auto context = ReleaseableObjectPtr(Context::create(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, platform()->getNumDevices()); auto SVMPtr = clSVMAlloc(context.get(), 0u, 4096, 128); EXPECT_NE(nullptr, SVMPtr); clSVMFree(context.get(), SVMPtr); } TEST_P(clSVMAllocValidFlagsTests, GivenSvmSupportWhenAllocatingSvmThenSvmIsAllocated) { cl_mem_flags flags = GetParam(); const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); //check for svm support if (devInfo.svmCapabilities != 0) { //fg svm flag if (flags & CL_MEM_SVM_FINE_GRAIN_BUFFER) { //fg svm flag, fg svm support - expected success if (devInfo.svmCapabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) { auto SVMPtr = clSVMAlloc(pContext, flags, 4096 /* Size*/, 128 /* alignment */); EXPECT_NE(nullptr, SVMPtr); clSVMFree(pContext, SVMPtr); } //fg svm flag no fg svm support else { auto SVMPtr = clSVMAlloc(pContext, flags, 4096 /* Size*/, 128 /* alignment */); EXPECT_EQ(nullptr, SVMPtr); } } //no fg svm flag, svm support - expected success else { auto SVMPtr = clSVMAlloc(pContext, flags, 4096 /* Size*/, 128 /* alignment */); EXPECT_NE(nullptr, SVMPtr); clSVMFree(pContext, SVMPtr); } } else { //no svm support -expected fail auto SVMPtr = clSVMAlloc(pContext, flags, 4096 /* Size*/, 128 /* alignment */); EXPECT_EQ(nullptr, SVMPtr); } }; static cl_mem_flags SVMAllocValidFlags[] = { 0, CL_MEM_READ_WRITE, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY, CL_MEM_SVM_FINE_GRAIN_BUFFER, CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER, CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER, CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS}; INSTANTIATE_TEST_CASE_P( SVMAllocCheckFlags, clSVMAllocValidFlagsTests, testing::ValuesIn(SVMAllocValidFlags)); using clSVMAllocFtrFlagsTests = clSVMAllocTemplateTests; INSTANTIATE_TEST_CASE_P( SVMAllocCheckFlagsFtrFlags, clSVMAllocFtrFlagsTests, testing::ValuesIn(SVMAllocValidFlags)); TEST_P(clSVMAllocFtrFlagsTests, GivenCorrectFlagsWhenAllocatingSvmThenSvmIsAllocated) { HardwareInfo *pHwInfo = pPlatform->peekExecutionEnvironment()->rootDeviceEnvironments[testedRootDeviceIndex]->getMutableHardwareInfo(); cl_mem_flags flags = GetParam(); void *SVMPtr = nullptr; //1: no svm - no flags supported pHwInfo->capabilityTable.ftrSvm = false; pHwInfo->capabilityTable.ftrSupportsCoherency = false; SVMPtr = clSVMAlloc(pContext, flags, 4096, 128); EXPECT_EQ(nullptr, SVMPtr); //2: coarse svm - normal flags supported pHwInfo->capabilityTable.ftrSvm = true; SVMPtr = clSVMAlloc(pContext, flags, 4096, 128); if (flags & CL_MEM_SVM_FINE_GRAIN_BUFFER) { //fg svm flags not supported EXPECT_EQ(nullptr, SVMPtr); } else { //no fg svm flags supported EXPECT_NE(nullptr, SVMPtr); clSVMFree(pContext, SVMPtr); } //3: fg svm - all flags supported pHwInfo->capabilityTable.ftrSupportsCoherency = true; SVMPtr = clSVMAlloc(pContext, flags, 4096, 128); EXPECT_NE(nullptr, SVMPtr); clSVMFree(pContext, SVMPtr); }; struct clSVMAllocInvalidFlagsTests : public clSVMAllocTemplateTests { }; TEST_P(clSVMAllocInvalidFlagsTests, GivenInvalidFlagsWhenAllocatingSvmThenSvmIsNotAllocated) { cl_mem_flags flags = GetParam(); auto SVMPtr = clSVMAlloc(pContext, flags, 4096 /* Size*/, 128 /* alignment */); EXPECT_EQ(nullptr, SVMPtr); }; cl_mem_flags SVMAllocInvalidFlags[] = { CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY, CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY, CL_MEM_SVM_ATOMICS, 0xffcc}; INSTANTIATE_TEST_CASE_P( SVMAllocCheckFlags, clSVMAllocInvalidFlagsTests, testing::ValuesIn(SVMAllocInvalidFlags)); TEST_F(clSVMAllocTests, GivenNullContextWhenAllocatingSvmThenSvmIsNotAllocated) { cl_mem_flags flags = CL_MEM_READ_WRITE; auto SVMPtr = clSVMAlloc(nullptr /* cl_context */, flags, 4096 /* Size*/, 128 /* alignment */); EXPECT_EQ(nullptr, SVMPtr); } TEST_F(clSVMAllocTests, GivenZeroSizeWhenAllocatingSvmThenSvmIsNotAllocated) { cl_mem_flags flags = CL_MEM_READ_WRITE; auto SVMPtr = clSVMAlloc(pContext /* cl_context */, flags, 0 /* Size*/, 128 /* alignment */); EXPECT_EQ(nullptr, SVMPtr); } TEST_F(clSVMAllocTests, GivenZeroAlignmentWhenAllocatingSvmThenSvmIsAllocated) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { cl_mem_flags flags = CL_MEM_READ_WRITE; auto SVMPtr = clSVMAlloc(pContext /* cl_context */, flags, 4096 /* Size*/, 0 /* alignment */); EXPECT_NE(nullptr, SVMPtr); clSVMFree(pContext, SVMPtr); } } TEST_F(clSVMAllocTests, GivenUnalignedSizeAndDefaultAlignmentWhenAllocatingSvmThenSvmIsAllocated) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { cl_mem_flags flags = CL_MEM_READ_WRITE; auto SVMPtr = clSVMAlloc(pContext /* cl_context */, flags, 4095 /* Size*/, 0 /* alignment */); EXPECT_NE(nullptr, SVMPtr); clSVMFree(pContext, SVMPtr); } } TEST_F(clSVMAllocTests, GivenAlignmentNotPowerOfTwoWhenAllocatingSvmThenSvmIsNotAllocated) { cl_mem_flags flags = CL_MEM_READ_WRITE; auto SVMPtr = clSVMAlloc(pContext /* cl_context */, flags, 4096 /* Size*/, 129 /* alignment */); EXPECT_EQ(nullptr, SVMPtr); } TEST_F(clSVMAllocTests, GivenAlignmentTooLargeWhenAllocatingSvmThenSvmIsNotAllocated) { auto SVMPtr = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 4096 /* Size */, 4096 /* alignment */); EXPECT_EQ(nullptr, SVMPtr); }; } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_svm_free_tests.inl000066400000000000000000000017061363734646600266210ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "cl_api_tests.h" using namespace NEO; typedef api_tests clSVMFreeTests; namespace ULT { TEST_F(clSVMFreeTests, GivenNullPtrWhenFreeingSvmThenNoAction) { clSVMFree( nullptr, // cl_context context nullptr // void *svm_pointer ); } TEST_F(clSVMFreeTests, GivenContextWithDeviceNotSupportingSvmWhenFreeingSvmThenNoAction) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.ftrSvm = false; auto clDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); cl_device_id deviceId = clDevice.get(); auto context = clUniquePtr(Context::create(nullptr, ClDeviceVector(&deviceId, 1), nullptr, nullptr, retVal)); EXPECT_EQ(retVal, CL_SUCCESS); clSVMFree( context.get(), reinterpret_cast(0x1234)); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_unified_shared_memory_tests.inl000066400000000000000000001342251363734646600313570ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/api/api.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; TEST(clUnifiedSharedMemoryTests, whenClHostMemAllocINTELisCalledWithoutContextThenInvalidContextIsReturned) { cl_int retVal = CL_SUCCESS; auto ptr = clHostMemAllocINTEL(0, nullptr, 0, 0, &retVal); EXPECT_EQ(nullptr, ptr); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST(clUnifiedSharedMemoryTests, whenClHostMemAllocIntelIsCalledThenItAllocatesHostUnifiedMemoryAllocation) { MockContext mockContext; cl_int retVal = CL_SUCCESS; auto unifiedMemoryHostAllocation = clHostMemAllocINTEL(&mockContext, nullptr, 4, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, unifiedMemoryHostAllocation); auto allocationsManager = mockContext.getSVMAllocsManager(); EXPECT_EQ(1u, allocationsManager->getNumAllocs()); auto graphicsAllocation = allocationsManager->getSVMAlloc(unifiedMemoryHostAllocation); EXPECT_EQ(graphicsAllocation->size, 4u); EXPECT_EQ(graphicsAllocation->memoryType, InternalMemoryType::HOST_UNIFIED_MEMORY); EXPECT_EQ(graphicsAllocation->gpuAllocation->getGpuAddress(), castToUint64(unifiedMemoryHostAllocation)); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryHostAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, givenMappedAllocationWhenClMemFreeIntelIscalledThenMappingIsRemoved) { MockContext mockContext; cl_int retVal = CL_SUCCESS; auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(&mockContext, mockContext.getDevice(0u), nullptr, 4, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, unifiedMemorySharedAllocation); auto allocationsManager = mockContext.getSVMAllocsManager(); allocationsManager->insertSvmMapOperation(unifiedMemorySharedAllocation, 4u, unifiedMemorySharedAllocation, 0u, false); retVal = clMemFreeINTEL(&mockContext, unifiedMemorySharedAllocation); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, allocationsManager->getSvmMapOperation(unifiedMemorySharedAllocation)); } TEST(clUnifiedSharedMemoryTests, whenClDeviceMemAllocINTELisCalledWithWrongContextThenInvalidContextErrorIsReturned) { cl_int retVal = CL_SUCCESS; auto ptr = clDeviceMemAllocINTEL(0, 0, nullptr, 0, 0, &retVal); EXPECT_EQ(nullptr, ptr); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST(clUnifiedSharedMemoryTests, whenClDeviceMemAllocIntelIsCalledThenItAllocatesDeviceUnifiedMemoryAllocation) { MockContext mockContext; cl_int retVal = CL_SUCCESS; auto unfiedMemoryDeviceAllocation = clDeviceMemAllocINTEL(&mockContext, mockContext.getDevice(0u), nullptr, 4, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, unfiedMemoryDeviceAllocation); auto allocationsManager = mockContext.getSVMAllocsManager(); EXPECT_EQ(1u, allocationsManager->getNumAllocs()); auto graphicsAllocation = allocationsManager->getSVMAlloc(unfiedMemoryDeviceAllocation); EXPECT_EQ(graphicsAllocation->size, 4u); EXPECT_EQ(graphicsAllocation->memoryType, InternalMemoryType::DEVICE_UNIFIED_MEMORY); EXPECT_EQ(graphicsAllocation->gpuAllocation->getGpuAddress(), castToUint64(unfiedMemoryDeviceAllocation)); retVal = clMemFreeINTEL(&mockContext, unfiedMemoryDeviceAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenUnifiedSharedMemoryAllocationCallsAreCalledWithSizeGreaterThenMaxMemAllocSizeThenErrorIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; auto maxMemAllocSize = mockContext.getDevice(0u)->getSharedDeviceInfo().maxMemAllocSize; size_t requestedSize = static_cast(maxMemAllocSize) + 1u; auto unfiedMemoryDeviceAllocation = clDeviceMemAllocINTEL(&mockContext, mockContext.getDevice(0u), nullptr, requestedSize, 0, &retVal); EXPECT_EQ(CL_INVALID_BUFFER_SIZE, retVal); EXPECT_EQ(nullptr, unfiedMemoryDeviceAllocation); unfiedMemoryDeviceAllocation = clSharedMemAllocINTEL(&mockContext, mockContext.getDevice(0u), nullptr, requestedSize, 0, &retVal); EXPECT_EQ(CL_INVALID_BUFFER_SIZE, retVal); EXPECT_EQ(nullptr, unfiedMemoryDeviceAllocation); unfiedMemoryDeviceAllocation = clHostMemAllocINTEL(&mockContext, nullptr, requestedSize, 0, &retVal); EXPECT_EQ(CL_INVALID_BUFFER_SIZE, retVal); EXPECT_EQ(nullptr, unfiedMemoryDeviceAllocation); } TEST(clUnifiedSharedMemoryTests, whenClSharedMemAllocINTELisCalledWithWrongContextThenInvalidContextErrorIsReturned) { cl_int retVal = CL_SUCCESS; auto ptr = clSharedMemAllocINTEL(0, 0, nullptr, 0, 0, &retVal); EXPECT_EQ(nullptr, ptr); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST(clUnifiedSharedMemoryTests, whenClSharedMemAllocIntelIsCalledThenItAllocatesSharedUnifiedMemoryAllocation) { MockContext mockContext; cl_int retVal = CL_SUCCESS; auto unfiedMemorySharedAllocation = clSharedMemAllocINTEL(&mockContext, mockContext.getDevice(0u), nullptr, 4, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, unfiedMemorySharedAllocation); auto allocationsManager = mockContext.getSVMAllocsManager(); EXPECT_EQ(1u, allocationsManager->getNumAllocs()); auto graphicsAllocation = allocationsManager->getSVMAlloc(unfiedMemorySharedAllocation); EXPECT_EQ(graphicsAllocation->size, 4u); EXPECT_EQ(graphicsAllocation->memoryType, InternalMemoryType::SHARED_UNIFIED_MEMORY); EXPECT_EQ(graphicsAllocation->gpuAllocation->getGpuAddress(), castToUint64(unfiedMemorySharedAllocation)); retVal = clMemFreeINTEL(&mockContext, unfiedMemorySharedAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClMemFreeINTELisCalledWithIncorrectContextThenReturnError) { auto retVal = clMemFreeINTEL(0, nullptr); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST(clUnifiedSharedMemoryTests, whenClMemFreeINTELisCalledWithNullPointerThenNoActionOccurs) { MockContext mockContext; auto retVal = clMemFreeINTEL(&mockContext, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClMemBlockingFreeINTELisCalledWithNullPointerThenNoActionOccurs) { MockContext mockContext; auto retVal = clMemBlockingFreeINTEL(&mockContext, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClMemFreeINTELisCalledWithValidUmPointerThenMemoryIsFreed) { MockContext mockContext; cl_int retVal = CL_SUCCESS; auto unifiedMemoryHostAllocation = clHostMemAllocINTEL(&mockContext, nullptr, 4, 0, &retVal); auto allocationsManager = mockContext.getSVMAllocsManager(); EXPECT_EQ(1u, allocationsManager->getNumAllocs()); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryHostAllocation); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, allocationsManager->getNumAllocs()); } TEST(clUnifiedSharedMemoryTests, whenClMemFreeINTELisCalledWithInvalidUmPointerThenMemoryIsNotFreed) { MockContext mockContext; cl_int retVal = CL_SUCCESS; auto unifiedMemoryHostAllocation = clHostMemAllocINTEL(&mockContext, nullptr, 4, 0, &retVal); auto allocationsManager = mockContext.getSVMAllocsManager(); EXPECT_EQ(1u, allocationsManager->getNumAllocs()); retVal = clMemFreeINTEL(&mockContext, ptrOffset(unifiedMemoryHostAllocation, 4)); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(1u, allocationsManager->getNumAllocs()); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryHostAllocation); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, allocationsManager->getNumAllocs()); } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocInfoINTELisCalledWithoutContextThenInvalidContextIsReturned) { auto retVal = clGetMemAllocInfoINTEL(0, nullptr, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocInfoINTELisCalledWithoutAllocationThenInvalidValueIsReturned) { MockContext mockContext; auto retVal = clGetMemAllocInfoINTEL(&mockContext, nullptr, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocInfoINTELisCalledWithoutAllocationAndWithPropertiesThenProperValueIsReturned) { MockContext mockContext; cl_int retVal = CL_INVALID_VALUE; size_t paramValueSize = sizeof(void *); size_t paramValueSizeRet = 0; { void *paramValue = reinterpret_cast(0xfeedbac); retVal = clGetMemAllocInfoINTEL(&mockContext, mockContext.getDevice(0), CL_MEM_ALLOC_BASE_PTR_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(void *), paramValueSizeRet); EXPECT_EQ(static_cast(nullptr), paramValue); } { size_t paramValue = 1; paramValueSize = sizeof(size_t); retVal = clGetMemAllocInfoINTEL(&mockContext, mockContext.getDevice(0), CL_MEM_ALLOC_SIZE_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(size_t), paramValueSizeRet); EXPECT_EQ(static_cast(0u), paramValue); } { cl_device_id paramValue = mockContext.getDevice(0); paramValueSize = sizeof(cl_device_id); retVal = clGetMemAllocInfoINTEL(&mockContext, mockContext.getDevice(0), CL_MEM_ALLOC_DEVICE_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_device_id), paramValueSizeRet); EXPECT_EQ(static_cast(nullptr), paramValue); } { cl_mem_alloc_flags_intel paramValue = 1; paramValueSize = sizeof(cl_mem_properties_intel); retVal = clGetMemAllocInfoINTEL(&mockContext, mockContext.getDevice(0), CL_MEM_ALLOC_FLAGS_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_mem_properties_intel), paramValueSizeRet); EXPECT_EQ(static_cast(0u), paramValue); } } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocInfoINTELisCalledWithoutSVMAllocationThenInvalidValueIsReturned) { MockContext mockContext; delete mockContext.svmAllocsManager; mockContext.svmAllocsManager = nullptr; auto retVal = clGetMemAllocInfoINTEL(&mockContext, nullptr, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocInfoINTELisCalledWithAllocationTypeParamNameAndWithoutUnifiedSharedMemoryAllocationThenProperFieldsAreSet) { MockContext mockContext; cl_int retVal = CL_SUCCESS; size_t paramValueSize = sizeof(cl_int); cl_int paramValue = 0; size_t paramValueSizeRet = 0; retVal = clGetMemAllocInfoINTEL(&mockContext, nullptr, CL_MEM_ALLOC_TYPE_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_MEM_TYPE_UNKNOWN_INTEL, paramValue); EXPECT_EQ(sizeof(cl_int), paramValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocInfoINTELisCalledWithValidUnifiedMemoryHostAllocationThenProperFieldsAreSet) { MockContext mockContext; cl_int retVal = CL_SUCCESS; size_t paramValueSize = sizeof(cl_int); cl_int paramValue = 0; size_t paramValueSizeRet = 0; auto unifiedMemoryHostAllocation = clHostMemAllocINTEL(&mockContext, nullptr, 4, 0, &retVal); auto allocationsManager = mockContext.getSVMAllocsManager(); auto graphicsAllocation = allocationsManager->getSVMAlloc(unifiedMemoryHostAllocation); retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemoryHostAllocation, CL_MEM_ALLOC_TYPE_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(graphicsAllocation->memoryType, InternalMemoryType::HOST_UNIFIED_MEMORY); EXPECT_EQ(CL_MEM_TYPE_HOST_INTEL, paramValue); EXPECT_EQ(sizeof(cl_int), paramValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryHostAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenHostMemAllocWithInvalidPropertiesTokenThenErrorIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; cl_mem_properties_intel properties[] = {0x1234, CL_MEM_ALLOC_WRITE_COMBINED_INTEL, 0}; auto unifiedMemoryHostAllocation = clHostMemAllocINTEL(&mockContext, properties, 4, 0, &retVal); EXPECT_EQ(nullptr, unifiedMemoryHostAllocation); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST(clUnifiedSharedMemoryTests, whenHostMemAllocWithInvalidWriteCombinedTokenThenSuccessIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; cl_mem_properties_intel properties[] = {CL_MEM_ALLOC_FLAGS_INTEL, CL_MEM_ALLOC_WRITE_COMBINED_INTEL, 0}; auto unifiedMemoryHostAllocation = clHostMemAllocINTEL(&mockContext, properties, 4, 0, &retVal); EXPECT_NE(nullptr, unifiedMemoryHostAllocation); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryHostAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenDeviceMemAllocWithInvalidPropertiesTokenThenErrorIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; cl_mem_properties_intel properties[] = {0x1234, CL_MEM_ALLOC_WRITE_COMBINED_INTEL, 0}; auto unifiedMemoryDeviceAllocation = clDeviceMemAllocINTEL(&mockContext, mockContext.getDevice(0u), properties, 4, 0, &retVal); EXPECT_EQ(nullptr, unifiedMemoryDeviceAllocation); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST(clUnifiedSharedMemoryTests, whenSharedMemAllocWithInvalidPropertiesTokenThenErrorIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; const uint64_t invalidToken = 0x1234; cl_mem_properties_intel properties[] = {invalidToken, CL_MEM_ALLOC_WRITE_COMBINED_INTEL, 0}; auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(&mockContext, mockContext.getDevice(0u), properties, 4, 0, &retVal); EXPECT_EQ(nullptr, unifiedMemorySharedAllocation); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST(clUnifiedSharedMemoryTests, whenSharedMemAllocWithInvalidWriteCombinedTokenThenSuccessIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; cl_mem_properties_intel properties[] = {CL_MEM_ALLOC_FLAGS_INTEL, CL_MEM_ALLOC_WRITE_COMBINED_INTEL, 0}; auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(&mockContext, mockContext.getDevice(0u), properties, 4, 0, &retVal); EXPECT_NE(nullptr, unifiedMemorySharedAllocation); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(&mockContext, unifiedMemorySharedAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, givenUnifiedMemoryAllocWithoutPropertiesWhenGetMemAllocFlagsThenDefaultValueIsReturned) { uint64_t defaultValue = CL_MEM_ALLOC_DEFAULT_INTEL; MockContext mockContext; cl_int retVal = CL_SUCCESS; size_t paramValueSize = sizeof(cl_mem_properties_intel); cl_mem_properties_intel paramValue = 0; size_t paramValueSizeRet = 0; auto unifiedMemoryHostAllocation = clHostMemAllocINTEL(&mockContext, nullptr, 4, 0, &retVal); retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemoryHostAllocation, CL_MEM_ALLOC_FLAGS_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(defaultValue, paramValue); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryHostAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocTypeIsCalledWithValidUnifiedMemoryHostAllocationThenProperTypeIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; size_t paramValueSize = sizeof(cl_mem_properties_intel); cl_mem_properties_intel paramValue = 0; size_t paramValueSizeRet = 0; cl_mem_properties_intel properties[] = {CL_MEM_ALLOC_FLAGS_INTEL, CL_MEM_ALLOC_DEFAULT_INTEL, 0}; auto unifiedMemoryHostAllocation = clHostMemAllocINTEL(&mockContext, properties, 4, 0, &retVal); retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemoryHostAllocation, CL_MEM_ALLOC_FLAGS_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(properties[1], paramValue); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryHostAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocTypeIsCalledWithValidUnifiedMemoryDeviceAllocationThenProperTypeIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; size_t paramValueSize = sizeof(cl_mem_properties_intel); cl_mem_properties_intel paramValue = 0; size_t paramValueSizeRet = 0; cl_mem_properties_intel properties[] = {CL_MEM_ALLOC_FLAGS_INTEL, CL_MEM_ALLOC_WRITE_COMBINED_INTEL, 0}; auto unifiedMemoryDeviceAllocation = clDeviceMemAllocINTEL(&mockContext, mockContext.getDevice(0u), properties, 4, 0, &retVal); retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemoryDeviceAllocation, CL_MEM_ALLOC_FLAGS_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(properties[1], paramValue); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryDeviceAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocTypeIsCalledWithValidUnifiedMemorySharedAllocationThenProperTypeIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; size_t paramValueSize = sizeof(cl_mem_properties_intel); cl_mem_properties_intel paramValue = 0; size_t paramValueSizeRet = 0; cl_mem_properties_intel properties[] = {CL_MEM_ALLOC_FLAGS_INTEL, CL_MEM_ALLOC_DEFAULT_INTEL, 0}; auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(&mockContext, mockContext.getDevice(0u), properties, 4, 0, &retVal); retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemorySharedAllocation, CL_MEM_ALLOC_FLAGS_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(properties[1], paramValue); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(&mockContext, unifiedMemorySharedAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocInfoINTELisCalledWithValidUnifiedMemoryDeviceAllocationThenProperFieldsAreSet) { MockContext mockContext; cl_int retVal = CL_SUCCESS; size_t paramValueSize = sizeof(cl_int); cl_int paramValue = 0; size_t paramValueSizeRet = 0; auto unifiedMemoryDeviceAllocation = clDeviceMemAllocINTEL(&mockContext, mockContext.getDevice(0u), nullptr, 4, 0, &retVal); auto allocationsManager = mockContext.getSVMAllocsManager(); auto graphicsAllocation = allocationsManager->getSVMAlloc(unifiedMemoryDeviceAllocation); retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemoryDeviceAllocation, CL_MEM_ALLOC_TYPE_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(graphicsAllocation->memoryType, InternalMemoryType::DEVICE_UNIFIED_MEMORY); EXPECT_EQ(CL_MEM_TYPE_DEVICE_INTEL, paramValue); EXPECT_EQ(sizeof(cl_int), paramValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryDeviceAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocInfoINTELisCalledWithValidUnifiedMemorySharedAllocationThenProperFieldsAreSet) { MockContext mockContext; cl_int retVal = CL_SUCCESS; size_t paramValueSize = sizeof(cl_int); cl_int paramValue = 0; size_t paramValueSizeRet = 0; auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(&mockContext, mockContext.getDevice(0u), nullptr, 4, 0, &retVal); auto allocationsManager = mockContext.getSVMAllocsManager(); auto graphicsAllocation = allocationsManager->getSVMAlloc(unifiedMemorySharedAllocation); retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemorySharedAllocation, CL_MEM_ALLOC_TYPE_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(graphicsAllocation->memoryType, InternalMemoryType::SHARED_UNIFIED_MEMORY); EXPECT_EQ(CL_MEM_TYPE_SHARED_INTEL, paramValue); EXPECT_EQ(sizeof(cl_int), paramValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(&mockContext, unifiedMemorySharedAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, givenDeviceAllocationWhenItIsQueriedForDeviceThenProperDeviceIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; size_t paramValueSizeRet = 0; auto device = mockContext.getDevice(0u); cl_device_id clDevice = device; auto unifiedMemoryDeviceAllocation = clDeviceMemAllocINTEL(&mockContext, device, nullptr, 4, 0, &retVal); cl_device_id returnedDevice; retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemoryDeviceAllocation, CL_MEM_ALLOC_DEVICE_INTEL, sizeof(returnedDevice), &returnedDevice, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSizeRet, sizeof(returnedDevice)); EXPECT_EQ(returnedDevice, clDevice); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryDeviceAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, givenSharedAllocationWhenItIsQueriedForDeviceThenProperDeviceIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; size_t paramValueSizeRet = 0; auto device = mockContext.getDevice(0u); cl_device_id clDevice = device; auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(&mockContext, device, nullptr, 4, 0, &retVal); cl_device_id returnedDevice; retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemorySharedAllocation, CL_MEM_ALLOC_DEVICE_INTEL, sizeof(returnedDevice), &returnedDevice, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSizeRet, sizeof(returnedDevice)); EXPECT_EQ(returnedDevice, clDevice); retVal = clMemFreeINTEL(&mockContext, unifiedMemorySharedAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, givenSharedAllocationWithoutDeviceWhenItIsQueriedForDeviceThenNullIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; size_t paramValueSizeRet = 0; auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(&mockContext, nullptr, nullptr, 4, 0, &retVal); cl_device_id returnedDevice; retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemorySharedAllocation, CL_MEM_ALLOC_DEVICE_INTEL, sizeof(returnedDevice), &returnedDevice, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSizeRet, sizeof(returnedDevice)); EXPECT_EQ(returnedDevice, nullptr); retVal = clMemFreeINTEL(&mockContext, unifiedMemorySharedAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, givenHostAllocationWhenItIsQueriedForDeviceThenProperDeviceIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; size_t paramValueSizeRet = 0; auto unifiedMemoryHostAllocation = clHostMemAllocINTEL(&mockContext, nullptr, 4, 0, &retVal); cl_device_id returnedDevice; retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemoryHostAllocation, CL_MEM_ALLOC_DEVICE_INTEL, sizeof(returnedDevice), &returnedDevice, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSizeRet, sizeof(returnedDevice)); EXPECT_EQ(returnedDevice, nullptr); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryHostAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocInfoINTELisCalledWithAllocationBasePtrParamNameThenProperFieldsAreSet) { MockContext mockContext; cl_int retVal = CL_SUCCESS; size_t paramValueSize = sizeof(uint64_t); uint64_t paramValue = 0; size_t paramValueSizeRet = 0; auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(&mockContext, mockContext.getDevice(0u), nullptr, 4, 0, &retVal); auto allocationsManager = mockContext.getSVMAllocsManager(); auto graphicsAllocation = allocationsManager->getSVMAlloc(unifiedMemorySharedAllocation); retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemorySharedAllocation, CL_MEM_ALLOC_BASE_PTR_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(graphicsAllocation->memoryType, InternalMemoryType::SHARED_UNIFIED_MEMORY); EXPECT_EQ(graphicsAllocation->gpuAllocation->getGpuAddress(), paramValue); EXPECT_EQ(sizeof(uint64_t), paramValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(&mockContext, unifiedMemorySharedAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocInfoINTELisCalledWithAllocationSizeParamNameThenProperFieldsAreSet) { MockContext mockContext; cl_int retVal = CL_SUCCESS; size_t paramValueSize = sizeof(size_t); size_t paramValue = 0; size_t paramValueSizeRet = 0; auto unifiedMemoryHostAllocation = clHostMemAllocINTEL(&mockContext, nullptr, 4, 0, &retVal); auto allocationsManager = mockContext.getSVMAllocsManager(); auto graphicsAllocation = allocationsManager->getSVMAlloc(unifiedMemoryHostAllocation); retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemoryHostAllocation, CL_MEM_ALLOC_SIZE_INTEL, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(graphicsAllocation->memoryType, InternalMemoryType::HOST_UNIFIED_MEMORY); EXPECT_EQ(graphicsAllocation->size, paramValue); EXPECT_EQ(sizeof(size_t), paramValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clMemFreeINTEL(&mockContext, unifiedMemoryHostAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClGetMemAllocInfoINTELisCalledWithoutParamNameThenInvalidValueIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; size_t paramValueSize = sizeof(cl_uint); cl_uint paramValue = 0; size_t paramValueSizeRet = 0; auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(&mockContext, mockContext.getDevice(0u), nullptr, 4, 0, &retVal); retVal = clGetMemAllocInfoINTEL(&mockContext, unifiedMemorySharedAllocation, 0, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = clMemFreeINTEL(&mockContext, unifiedMemorySharedAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClSetKernelArgMemPointerINTELisCalledWithInvalidKernelThenInvaliKernelErrorIsReturned) { auto retVal = clSetKernelArgMemPointerINTEL(0, 0, nullptr); EXPECT_EQ(CL_INVALID_KERNEL, retVal); } TEST(clUnifiedSharedMemoryTests, whenDeviceSupportSharedMemoryAllocationsAndSystemPointerIsPassedItIsProperlySetInKernel) { DebugManagerStateRestore restorer; DebugManager.flags.EnableSharedSystemUsmSupport.set(1u); auto mockContext = std::make_unique(); if (mockContext->getDevice(0u)->getHardwareInfo().capabilityTable.ftrSvm == false) { GTEST_SKIP(); } MockKernelWithInternals mockKernel(*mockContext->getDevice(0u), mockContext.get(), true); auto systemPointer = reinterpret_cast(0xfeedbac); auto retVal = clSetKernelArgMemPointerINTEL(mockKernel.mockKernel, 0, systemPointer); EXPECT_EQ(retVal, CL_SUCCESS); //check if cross thread is updated auto crossThreadLocation = reinterpret_cast(ptrOffset(mockKernel.mockKernel->getCrossThreadData(), mockKernel.kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset)); auto systemAddress = reinterpret_cast(systemPointer); EXPECT_EQ(*crossThreadLocation, systemAddress); } TEST(clUnifiedSharedMemoryTests, whenClSetKernelArgMemPointerINTELisCalledWithValidUnifiedMemoryAllocationThenProperFieldsAreSet) { auto mockContext = std::make_unique(); if (mockContext->getDevice(0u)->getHardwareInfo().capabilityTable.ftrSvm == false) { GTEST_SKIP(); } cl_int retVal = CL_SUCCESS; auto unfiedMemoryDeviceAllocation = clDeviceMemAllocINTEL(mockContext.get(), mockContext->getDevice(0u), nullptr, 4, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); MockKernelWithInternals mockKernel(*mockContext->getDevice(0u), mockContext.get(), true); retVal = clSetKernelArgMemPointerINTEL(mockKernel.mockKernel, 0, unfiedMemoryDeviceAllocation); EXPECT_EQ(CL_SUCCESS, retVal); auto svmAlloc = mockContext->getSVMAllocsManager()->getSVMAlloc(unfiedMemoryDeviceAllocation); EXPECT_EQ(mockKernel.mockKernel->kernelArguments[0].object, svmAlloc->gpuAllocation); retVal = clMemFreeINTEL(mockContext.get(), unfiedMemoryDeviceAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenclEnqueueMemsetINTELisCalledWithoutIncorrectCommandQueueThenInvaliQueueErrorIsReturned) { auto retVal = clEnqueueMemsetINTEL(0, nullptr, 0, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST(clUnifiedSharedMemoryTests, whenclEnqueueMemsetINTELisCalledWithProperParametersThenParametersArePassedCorrectly) { auto mockContext = std::make_unique(); const ClDeviceInfo &devInfo = mockContext->getDevice(0u)->getDeviceInfo(); if (devInfo.svmCapabilities == 0) { GTEST_SKIP(); } cl_int retVal = CL_SUCCESS; auto unfiedMemoryDeviceAllocation = clDeviceMemAllocINTEL(mockContext.get(), mockContext->getDevice(0u), nullptr, 400, 0, &retVal); struct MockedCommandQueue : public MockCommandQueue { using MockCommandQueue::MockCommandQueue; cl_int enqueueSVMMemFill(void *svmPtr, const void *pattern, size_t patternSize, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { EXPECT_EQ(12, *reinterpret_cast(pattern)); EXPECT_EQ(expectedDstPtr, svmPtr); EXPECT_EQ(400u, size); EXPECT_EQ(1u, patternSize); EXPECT_EQ(0u, numEventsInWaitList); EXPECT_EQ(nullptr, eventWaitList); EXPECT_EQ(nullptr, event); return CL_SUCCESS; } void *expectedDstPtr = nullptr; }; MockedCommandQueue queue{*mockContext}; queue.expectedDstPtr = unfiedMemoryDeviceAllocation; cl_int setValue = 12u; retVal = clEnqueueMemsetINTEL(&queue, unfiedMemoryDeviceAllocation, setValue, 400u, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); clMemFreeINTEL(mockContext.get(), unfiedMemoryDeviceAllocation); } TEST(clUnifiedSharedMemoryTests, whenclEnqueueMemFillINTELisCalledWithoutIncorrectCommandQueueThenInvaliQueueErrorIsReturned) { cl_int setValue = 12u; auto retVal = clEnqueueMemFillINTEL(0, nullptr, &setValue, 0u, 0u, 0u, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST(clUnifiedSharedMemoryTests, whenclEnqueueMemFillINTELisCalledWithProperParametersThenParametersArePassedCorrectly) { auto mockContext = std::make_unique(); const ClDeviceInfo &devInfo = mockContext->getDevice(0u)->getDeviceInfo(); if (devInfo.svmCapabilities == 0) { GTEST_SKIP(); } cl_int retVal = CL_SUCCESS; auto unfiedMemoryDeviceAllocation = clDeviceMemAllocINTEL(mockContext.get(), mockContext->getDevice(0u), nullptr, 400, 0, &retVal); struct MockedCommandQueue : public MockCommandQueue { using MockCommandQueue::MockCommandQueue; cl_int enqueueSVMMemFill(void *svmPtr, const void *pattern, size_t patternSize, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { EXPECT_EQ(12, *reinterpret_cast(pattern)); EXPECT_EQ(expectedDstPtr, svmPtr); EXPECT_EQ(400u, size); EXPECT_EQ(4u, patternSize); EXPECT_EQ(0u, numEventsInWaitList); EXPECT_EQ(nullptr, eventWaitList); EXPECT_EQ(nullptr, event); return CL_SUCCESS; } void *expectedDstPtr = nullptr; }; MockedCommandQueue queue{*mockContext}; queue.expectedDstPtr = unfiedMemoryDeviceAllocation; cl_int setValue = 12u; retVal = clEnqueueMemFillINTEL(&queue, unfiedMemoryDeviceAllocation, &setValue, sizeof(setValue), 400u, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); clMemFreeINTEL(mockContext.get(), unfiedMemoryDeviceAllocation); } TEST(clUnifiedSharedMemoryTests, whenClEnqueueMemcpyINTELisCalledWithWrongQueueThenInvalidQueueErrorIsReturned) { auto retVal = clEnqueueMemcpyINTEL(0, 0, nullptr, nullptr, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST(clUnifiedSharedMemoryTests, givenTwoUnifiedMemoryAllocationsWhenTheyAreCopiedThenProperParamtersArePassed) { auto mockContext = std::make_unique(); const ClDeviceInfo &devInfo = mockContext->getDevice(0u)->getDeviceInfo(); if (devInfo.svmCapabilities == 0) { GTEST_SKIP(); } cl_int retVal = CL_SUCCESS; auto unfiedMemoryDeviceAllocation = clDeviceMemAllocINTEL(mockContext.get(), mockContext->getDevice(0u), nullptr, 400, 0, &retVal); auto unfiedMemorySharedAllocation = clSharedMemAllocINTEL(mockContext.get(), mockContext->getDevice(0u), nullptr, 400, 0, &retVal); struct MockedCommandQueue : public MockCommandQueue { using MockCommandQueue::MockCommandQueue; cl_int enqueueSVMMemcpy(cl_bool blockingCopy, void *dstPtr, const void *srcPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { EXPECT_EQ(0u, blockingCopy); EXPECT_EQ(expectedDstPtr, dstPtr); EXPECT_EQ(expectedSrcPtr, srcPtr); EXPECT_EQ(400u, size); EXPECT_EQ(0u, numEventsInWaitList); EXPECT_EQ(nullptr, eventWaitList); EXPECT_EQ(nullptr, event); return CL_SUCCESS; } void *expectedDstPtr = nullptr; const void *expectedSrcPtr = nullptr; }; MockedCommandQueue queue{*mockContext}; queue.expectedDstPtr = unfiedMemoryDeviceAllocation; queue.expectedSrcPtr = unfiedMemorySharedAllocation; retVal = clEnqueueMemcpyINTEL(&queue, 0, unfiedMemoryDeviceAllocation, unfiedMemorySharedAllocation, 400u, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); clMemFreeINTEL(mockContext.get(), unfiedMemoryDeviceAllocation); clMemFreeINTEL(mockContext.get(), unfiedMemorySharedAllocation); } TEST(clUnifiedSharedMemoryTests, whenClEnqueueMigrateMemINTELisCalledWithWrongQueueThenInvalidQueueErrorIsReturned) { auto retVal = clEnqueueMigrateMemINTEL(0, nullptr, 0, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST(clUnifiedSharedMemoryTests, whenClEnqueueMigrateMemINTELisCalledWithProperParametersThenSuccessIsReturned) { MockCommandQueue cmdQ; void *unifiedMemoryAlloc = reinterpret_cast(0x1234); auto retVal = clEnqueueMigrateMemINTEL(&cmdQ, unifiedMemoryAlloc, 10, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, whenClEnqueueMemAdviseINTELisCalledWithWrongQueueThenInvalidQueueErrorIsReturned) { auto retVal = clEnqueueMemAdviseINTEL(0, nullptr, 0, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST(clUnifiedSharedMemoryTests, whenClEnqueueMemAdviseINTELisCalledWithProperParametersThenSuccessIsReturned) { MockCommandQueue cmdQ; void *unifiedMemoryAlloc = reinterpret_cast(0x1234); auto retVal = clEnqueueMemAdviseINTEL(&cmdQ, unifiedMemoryAlloc, 10, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } class clUnifiedSharedMemoryEventTests : public CommandQueueHwFixture, public ::testing::Test { public: void SetUp() override { this->pCmdQ = createCommandQueue(nullptr); } void TearDown() override { clReleaseEvent(event); CommandQueueHwFixture::TearDown(); } cl_event event = nullptr; }; TEST_F(clUnifiedSharedMemoryEventTests, whenClEnqueueMigrateMemINTELIsCalledWithEventThenProperCmdTypeIsSet) { void *unifiedMemoryAlloc = reinterpret_cast(0x1234); auto retVal = clEnqueueMigrateMemINTEL(this->pCmdQ, unifiedMemoryAlloc, 10, 0, 0, nullptr, &event); EXPECT_EQ(CL_SUCCESS, retVal); constexpr cl_command_type expectedCmd = CL_COMMAND_MIGRATEMEM_INTEL; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); } TEST_F(clUnifiedSharedMemoryEventTests, whenClEnqueueMemAdviseINTELIsCalledWithEventThenProperCmdTypeIsSet) { void *unifiedMemoryAlloc = reinterpret_cast(0x1234); auto retVal = clEnqueueMemAdviseINTEL(this->pCmdQ, unifiedMemoryAlloc, 10, 0, 0, nullptr, &event); EXPECT_EQ(CL_SUCCESS, retVal); constexpr cl_command_type expectedCmd = CL_COMMAND_MEMADVISE_INTEL; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); } TEST_F(clUnifiedSharedMemoryEventTests, whenClEnqueueMemcpyINTELIsCalledWithEventThenProperCmdTypeIsSet) { const ClDeviceInfo &devInfo = this->context->getDevice(0u)->getDeviceInfo(); if (devInfo.svmCapabilities == 0) { GTEST_SKIP(); } cl_int retVal = CL_SUCCESS; auto unfiedMemoryDst = clSharedMemAllocINTEL(this->context, this->context->getDevice(0u), nullptr, 400, 0, &retVal); auto unfiedMemorySrc = clSharedMemAllocINTEL(this->context, this->context->getDevice(0u), nullptr, 400, 0, &retVal); retVal = clEnqueueMemcpyINTEL(this->pCmdQ, 0, unfiedMemoryDst, unfiedMemorySrc, 400u, 0, nullptr, &event); EXPECT_EQ(retVal, CL_SUCCESS); constexpr cl_command_type expectedCmd = CL_COMMAND_MEMCPY_INTEL; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); clMemFreeINTEL(this->context, unfiedMemoryDst); clMemFreeINTEL(this->context, unfiedMemorySrc); } TEST_F(clUnifiedSharedMemoryEventTests, whenClEnqueueMemsetINTELIsCalledWithEventThenProperCmdTypeIsSet) { const ClDeviceInfo &devInfo = this->context->getDevice(0u)->getDeviceInfo(); if (devInfo.svmCapabilities == 0) { GTEST_SKIP(); } cl_int retVal = CL_SUCCESS; auto unfiedMemorySharedAllocation = clSharedMemAllocINTEL(this->context, this->context->getDevice(0u), nullptr, 400, 0, &retVal); cl_int setValue = 12u; retVal = clEnqueueMemsetINTEL(this->pCmdQ, unfiedMemorySharedAllocation, setValue, 400u, 0, nullptr, &event); EXPECT_EQ(CL_SUCCESS, retVal); constexpr cl_command_type expectedCmd = CL_COMMAND_MEMSET_INTEL; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); clMemFreeINTEL(this->context, unfiedMemorySharedAllocation); } TEST_F(clUnifiedSharedMemoryEventTests, whenClEnqueueMemFillINTELIsCalledWithEventThenProperCmdTypeIsSet) { const ClDeviceInfo &devInfo = this->context->getDevice(0u)->getDeviceInfo(); if (devInfo.svmCapabilities == 0) { GTEST_SKIP(); } cl_int retVal = CL_SUCCESS; auto unfiedMemorySharedAllocation = clSharedMemAllocINTEL(this->context, this->context->getDevice(0u), nullptr, 400, 0, &retVal); cl_int setValue = 12u; retVal = clEnqueueMemFillINTEL(this->pCmdQ, unfiedMemorySharedAllocation, &setValue, sizeof(setValue), 400u, 0, nullptr, &event); EXPECT_EQ(CL_SUCCESS, retVal); constexpr cl_command_type expectedCmd = CL_COMMAND_MEMFILL_INTEL; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); clMemFreeINTEL(this->context, unfiedMemorySharedAllocation); } TEST(clUnifiedSharedMemoryTests, givenDefaulMemPropertiesWhenClDeviceMemAllocIntelIsCalledThenItAllocatesDeviceUnifiedMemoryAllocationWithProperAllocationTypeAndSize) { MockContext mockContext; cl_int retVal = CL_SUCCESS; cl_mem_properties_intel properties[] = {CL_MEM_ALLOC_FLAGS_INTEL, CL_MEM_ALLOC_DEFAULT_INTEL, 0}; auto allocationSize = 4000u; auto unfiedMemoryDeviceAllocation = clDeviceMemAllocINTEL(&mockContext, mockContext.getDevice(0u), properties, allocationSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, unfiedMemoryDeviceAllocation); auto allocationsManager = mockContext.getSVMAllocsManager(); EXPECT_EQ(1u, allocationsManager->getNumAllocs()); auto graphicsAllocation = allocationsManager->getSVMAlloc(unfiedMemoryDeviceAllocation); EXPECT_EQ(graphicsAllocation->size, allocationSize); EXPECT_EQ(graphicsAllocation->memoryType, InternalMemoryType::DEVICE_UNIFIED_MEMORY); EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER, graphicsAllocation->gpuAllocation->getAllocationType()); EXPECT_EQ(graphicsAllocation->gpuAllocation->getGpuAddress(), castToUint64(unfiedMemoryDeviceAllocation)); EXPECT_EQ(alignUp(allocationSize, MemoryConstants::pageSize64k), graphicsAllocation->gpuAllocation->getUnderlyingBufferSize()); retVal = clMemFreeINTEL(&mockContext, unfiedMemoryDeviceAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, givenValidMemPropertiesWhenClDeviceMemAllocIntelIsCalledThenItAllocatesDeviceUnifiedMemoryAllocationWithProperAllocationTypeAndSize) { MockContext mockContext; cl_int retVal = CL_SUCCESS; auto allocationSize = 4000u; cl_mem_properties_intel properties[] = {CL_MEM_ALLOC_FLAGS_INTEL, CL_MEM_ALLOC_WRITE_COMBINED_INTEL, 0}; auto unfiedMemoryDeviceAllocation = clDeviceMemAllocINTEL(&mockContext, mockContext.getDevice(0u), properties, allocationSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, unfiedMemoryDeviceAllocation); auto allocationsManager = mockContext.getSVMAllocsManager(); EXPECT_EQ(1u, allocationsManager->getNumAllocs()); auto graphicsAllocation = allocationsManager->getSVMAlloc(unfiedMemoryDeviceAllocation); EXPECT_EQ(graphicsAllocation->size, allocationSize); EXPECT_EQ(graphicsAllocation->memoryType, InternalMemoryType::DEVICE_UNIFIED_MEMORY); EXPECT_EQ(graphicsAllocation->gpuAllocation->getAllocationType(), GraphicsAllocation::AllocationType::WRITE_COMBINED); EXPECT_EQ(graphicsAllocation->gpuAllocation->getGpuAddress(), castToUint64(unfiedMemoryDeviceAllocation)); EXPECT_EQ(alignUp(allocationSize, MemoryConstants::pageSize64k), graphicsAllocation->gpuAllocation->getUnderlyingBufferSize()); retVal = clMemFreeINTEL(&mockContext, unfiedMemoryDeviceAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(clUnifiedSharedMemoryTests, givenInvalidMemPropertiesWhenClSharedMemAllocIntelIsCalledThenInvalidValueIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; cl_mem_properties_intel properties[] = {CL_MEM_ALLOC_WRITE_COMBINED_INTEL, 0}; auto unfiedMemorySharedAllocation = clSharedMemAllocINTEL(&mockContext, mockContext.getDevice(0u), properties, 4, 0, &retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, unfiedMemorySharedAllocation); } TEST(clUnifiedSharedMemoryTests, givenUnifiedMemoryAllocationSizeGreaterThanMaxMemAllocSizeAndClMemAllowUnrestrictedSizeFlagWhenCreateAllocationThenSuccesIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; cl_mem_properties_intel properties[] = {CL_MEM_FLAGS, CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL, 0}; auto bigSize = MemoryConstants::gigaByte * 10; auto allocationSize = static_cast(bigSize); auto memoryManager = static_cast(mockContext.getDevice(0u)->getMemoryManager()); memoryManager->turnOnFakingBigAllocations(); if (memoryManager->peekForce32BitAllocations() || is32bit) { GTEST_SKIP(); } { auto unfiedMemoryAllocation = clDeviceMemAllocINTEL(&mockContext, mockContext.getDevice(0u), properties, allocationSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, unfiedMemoryAllocation); retVal = clMemFreeINTEL(&mockContext, unfiedMemoryAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } { auto unfiedMemoryAllocation = clSharedMemAllocINTEL(&mockContext, mockContext.getDevice(0u), properties, allocationSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, unfiedMemoryAllocation); retVal = clMemFreeINTEL(&mockContext, unfiedMemoryAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } { auto unfiedMemoryAllocation = clHostMemAllocINTEL(&mockContext, properties, allocationSize, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, unfiedMemoryAllocation); retVal = clMemFreeINTEL(&mockContext, unfiedMemoryAllocation); EXPECT_EQ(CL_SUCCESS, retVal); } } TEST(clUnifiedSharedMemoryTests, givenUnifiedMemoryAllocationSizeGreaterThanMaxMemAllocSizeWhenCreateAllocationThenErrorIsReturned) { MockContext mockContext; cl_int retVal = CL_SUCCESS; cl_mem_properties_intel properties[] = {0}; auto bigSize = MemoryConstants::gigaByte * 10; auto allocationSize = static_cast(bigSize); auto memoryManager = static_cast(mockContext.getDevice(0u)->getMemoryManager()); memoryManager->turnOnFakingBigAllocations(); if (memoryManager->peekForce32BitAllocations() || is32bit) { GTEST_SKIP(); } { auto unfiedMemoryAllocation = clDeviceMemAllocINTEL(&mockContext, mockContext.getDevice(0u), properties, allocationSize, 0, &retVal); EXPECT_NE(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, unfiedMemoryAllocation); } { auto unfiedMemoryAllocation = clSharedMemAllocINTEL(&mockContext, mockContext.getDevice(0u), properties, allocationSize, 0, &retVal); EXPECT_NE(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, unfiedMemoryAllocation); } { auto unfiedMemoryAllocation = clHostMemAllocINTEL(&mockContext, properties, allocationSize, 0, &retVal); EXPECT_NE(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, unfiedMemoryAllocation); } } compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_unload_compiler_tests.inl000066400000000000000000000006111363734646600301610ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "cl_api_tests.h" using namespace NEO; typedef api_tests clUnloadCompilerTests; namespace ULT { TEST_F(clUnloadCompilerTests, WhenUnloadingCompilerThenOutOfHostMemoryErrorIsReturned) { auto retVal = clUnloadCompiler(); EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/cl_unload_platform_compiler_tests.inl000066400000000000000000000007541363734646600320750ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests clUnloadPlatformCompilerTests; namespace ULT { TEST_F(clUnloadPlatformCompilerTests, WhenUnloadingPlatformCompilerThenOutOfHostMemoryErrorIsReturned) { auto retVal = clUnloadPlatformCompiler(nullptr); EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/gl/000077500000000000000000000000001363734646600230055ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/api/gl/CMakeLists.txt000066400000000000000000000020511363734646600255430ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(IGDRCL_SRCS_tests_api_gl ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_from_gl_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_from_gl_renderbuffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_from_gl_texture2d_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_from_gl_texture3d_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_from_gl_texture_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_acquire_gl_objects_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_release_gl_objects_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_gl_device_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_gl_context_info_khr_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_gl_object_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_gl_texture_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_gl_intel_tracing_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_api_gl}) endif() compute-runtime-20.13.16352/opencl/test/unit_test/api/gl/cl_create_from_gl_buffer_tests.cpp000066400000000000000000000014051363734646600317120ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; typedef api_tests clCreateFromGLBuffer_; namespace ULT { TEST_F(clCreateFromGLBuffer_, givenNullConxtextWhenCreateFromGLIsCalledThenErrorIsReturned) { int errCode = CL_SUCCESS; auto retVal = clCreateFromGLBuffer(nullptr, // cl_context context CL_MEM_READ_WRITE, // cl_mem_flags flags 0, // cl_GLuint bufobj &errCode // cl_int * errcode_ret ); EXPECT_EQ(nullptr, retVal); EXPECT_EQ(errCode, CL_INVALID_CONTEXT); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/gl/cl_create_from_gl_renderbuffer_tests.cpp000066400000000000000000000014441363734646600331150ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; typedef api_tests clCreateFromGLRenderbuffer_; namespace ULT { TEST_F(clCreateFromGLRenderbuffer_, givenNullContextWhenCreateIsCalledThenErrorIsReturned) { int errCode = CL_SUCCESS; auto retVal = clCreateFromGLRenderbuffer(nullptr, // cl_context context CL_MEM_READ_WRITE, // cl_mem_flags flags 0, // GLuint renderbuffer &errCode // cl_int *errcode_ret ); EXPECT_EQ(nullptr, retVal); EXPECT_EQ(errCode, CL_INVALID_CONTEXT); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/gl/cl_create_from_gl_texture2d_tests.cpp000066400000000000000000000017151363734646600323730ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; typedef api_tests clCreateFromGLTexture2D_; namespace ULT { TEST_F(clCreateFromGLTexture2D_, givenNullConxtextWhenClCreateFromGlTexture2DIsCalledThenInvalidContextIsReturned) { int errCode = CL_SUCCESS; auto retVal = clCreateFromGLTexture2D(nullptr, // cl_context context CL_MEM_READ_WRITE, // cl_mem_flags flags 0, // GLenum texture_target 0, // GLint miplevel 0, // GLuint texture &errCode // cl_int *errcode_ret ); EXPECT_EQ(nullptr, retVal); EXPECT_EQ(errCode, CL_INVALID_CONTEXT); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/gl/cl_create_from_gl_texture3d_tests.cpp000066400000000000000000000017151363734646600323740ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; typedef api_tests clCreateFromGLTexture3D_; namespace ULT { TEST_F(clCreateFromGLTexture3D_, givenNullConxtextWhenClCreateFromGlTexture2DIsCalledThenInvalidContextIsReturned) { int errCode = CL_SUCCESS; auto retVal = clCreateFromGLTexture3D(nullptr, // cl_context context CL_MEM_READ_WRITE, // cl_mem_flags flags 0, // GLenum texture_target 0, // GLint miplevel 0, // GLuint texture &errCode // cl_int *errcode_ret ); EXPECT_EQ(nullptr, retVal); EXPECT_EQ(errCode, CL_INVALID_CONTEXT); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/gl/cl_create_from_gl_texture_tests.cpp000066400000000000000000000010561363734646600321430ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" #include using namespace NEO; typedef api_tests clCreateFromGLTexture_; namespace ULT { TEST_F(clCreateFromGLTexture_, givenNullContextWhenCreateIsCalledThenErrorIsReturned) { int errCode = CL_SUCCESS; auto image = clCreateFromGLTexture(nullptr, CL_MEM_READ_WRITE, GL_TEXTURE_1D, 0, 0, &errCode); EXPECT_EQ(nullptr, image); EXPECT_EQ(errCode, CL_INVALID_CONTEXT); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/gl/cl_enqueue_acquire_gl_objects_tests.cpp000066400000000000000000000016201363734646600327630ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueAcquireGLObjects_; namespace ULT { TEST_F(clEnqueueAcquireGLObjects_, givenNullCommandQueueWhenAcquireIsCalledThenInvalidCommandQueueIsReturned) { auto retVal = clEnqueueAcquireGLObjects(nullptr, // cl_command_queue command_queue 0, // cl_uint num_objects nullptr, // const cl_mem *mem_objects 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/gl/cl_enqueue_release_gl_objects_tests.cpp000066400000000000000000000016311363734646600327540ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; typedef api_tests clEnqueueReleaseGLObjects_; namespace ULT { TEST_F(clEnqueueReleaseGLObjects_, givenNullCommandQueueWhenReleaseGlObjectsIsCalledThenInvalidCommandQueueIsReturned) { auto retVal = clEnqueueReleaseGLObjects(nullptr, // cl_command_queue command_queue 0, // cl_uint num_objects nullptr, // const cl_mem *mem_objects 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/gl/cl_get_gl_context_info_khr_tests.cpp000066400000000000000000000115001363734646600322720ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/device_factory.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/test/unit_test/api/cl_api_tests.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/os_interface/windows/gl/gl_dll_helper.h" using namespace NEO; typedef api_tests clGetGLContextInfoKHR_; namespace ULT { TEST_F(clGetGLContextInfoKHR_, successWithDefaultPlatform) { auto expectedDevice = ::platform()->getClDevice(0); cl_device_id retDevice = 0; size_t retSize = 0; const cl_context_properties properties[] = {CL_GL_CONTEXT_KHR, 1, CL_WGL_HDC_KHR, 2, 0}; retVal = clGetGLContextInfoKHR(properties, CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR, sizeof(cl_device_id), &retDevice, &retSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedDevice, retDevice); EXPECT_EQ(sizeof(cl_device_id), retSize); retVal = clGetGLContextInfoKHR(properties, CL_DEVICES_FOR_GL_CONTEXT_KHR, sizeof(cl_device_id), &retDevice, &retSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedDevice, retDevice); EXPECT_EQ(sizeof(cl_device_id), retSize); } using clGetGLContextInfoKHRNonDefaultPlatform = ::testing::Test; TEST_F(clGetGLContextInfoKHRNonDefaultPlatform, successWithNonDefaultPlatform) { platformsImpl.clear(); cl_int retVal = CL_SUCCESS; auto nonDefaultPlatform = std::make_unique(); nonDefaultPlatform->initializeWithNewDevices(); cl_platform_id nonDefaultPlatformCl = nonDefaultPlatform.get(); auto expectedDevice = nonDefaultPlatform->getClDevice(0); size_t retSize = 0; cl_device_id retDevice = 0; const cl_context_properties properties[] = {CL_GL_CONTEXT_KHR, 1, CL_WGL_HDC_KHR, 2, CL_CONTEXT_PLATFORM, reinterpret_cast(nonDefaultPlatformCl), 0}; retVal = clGetGLContextInfoKHR(properties, CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR, sizeof(cl_device_id), &retDevice, &retSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedDevice, retDevice); EXPECT_EQ(sizeof(cl_device_id), retSize); retVal = clGetGLContextInfoKHR(properties, CL_DEVICES_FOR_GL_CONTEXT_KHR, sizeof(cl_device_id), &retDevice, &retSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedDevice, retDevice); EXPECT_EQ(sizeof(cl_device_id), retSize); } TEST_F(clGetGLContextInfoKHR_, invalidParam) { cl_device_id retDevice = 0; size_t retSize = 0; const cl_context_properties properties[] = {CL_GL_CONTEXT_KHR, 1, CL_WGL_HDC_KHR, 2, 0}; retVal = clGetGLContextInfoKHR(properties, 0, sizeof(cl_device_id), &retDevice, &retSize); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, retDevice); EXPECT_EQ(0u, retSize); } TEST_F(clGetGLContextInfoKHR_, givenContextFromNoIntelOpenGlDriverWhenCallClGetGLContextInfoKHRThenReturnClInvalidContext) { cl_device_id retDevice = 0; size_t retSize = 0; const cl_context_properties properties[] = {CL_GL_CONTEXT_KHR, 1, CL_WGL_HDC_KHR, 2, 0}; glDllHelper setDllParam; setDllParam.glSetString("NoIntel", GL_VENDOR); retVal = clGetGLContextInfoKHR(properties, 0, sizeof(cl_device_id), &retDevice, &retSize); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, retDevice); EXPECT_EQ(0u, retSize); } TEST_F(clGetGLContextInfoKHR_, givenNullVersionFromIntelOpenGlDriverWhenCallClGetGLContextInfoKHRThenReturnClInvalidContext) { cl_device_id retDevice = 0; size_t retSize = 0; const cl_context_properties properties[] = {CL_GL_CONTEXT_KHR, 1, CL_WGL_HDC_KHR, 2, 0}; glDllHelper setDllParam; setDllParam.glSetString("", GL_VERSION); retVal = clGetGLContextInfoKHR(properties, 0, sizeof(cl_device_id), &retDevice, &retSize); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, retDevice); EXPECT_EQ(0u, retSize); } TEST_F(clGetGLContextInfoKHR_, GivenIncorrectPropertiesWhenCallclGetGLContextInfoKHRThenReturnClInvalidGlShareGroupRererencKhr) { cl_device_id retDevice = 0; size_t retSize = 0; retVal = clGetGLContextInfoKHR(nullptr, 0, sizeof(cl_device_id), &retDevice, &retSize); EXPECT_EQ(CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR, retVal); const cl_context_properties propertiesLackOfWglHdcKhr[] = {CL_GL_CONTEXT_KHR, 1, 0}; retVal = clGetGLContextInfoKHR(propertiesLackOfWglHdcKhr, 0, sizeof(cl_device_id), &retDevice, &retSize); EXPECT_EQ(CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR, retVal); const cl_context_properties propertiesLackOfCLGlContextKhr[] = {CL_WGL_HDC_KHR, 2, 0}; retVal = clGetGLContextInfoKHR(propertiesLackOfCLGlContextKhr, 0, sizeof(cl_device_id), &retDevice, &retSize); EXPECT_EQ(CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/gl/cl_get_gl_device_info_tests.cpp000066400000000000000000000055611363734646600312130ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/helpers/hw_info.h" #include "opencl/test/unit_test/api/cl_api_tests.h" #include using namespace NEO; namespace ULT { //------------------------------------------------------------------------------ struct GetDeviceInfoP : public ApiFixture<>, public ::testing::TestWithParam { void SetUp() override { param = GetParam(); ApiFixture::SetUp(); } void TearDown() override { ApiFixture::TearDown(); } cl_device_info param; }; typedef GetDeviceInfoP GetDeviceGlInfoStr; TEST_P(GetDeviceGlInfoStr, StringType) { char *paramValue = nullptr; size_t paramRetSize = 0; cl_int retVal = clGetDeviceInfo(devices[0], param, 0, nullptr, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(0u, paramRetSize); paramValue = new char[paramRetSize]; retVal = clGetDeviceInfo(devices[0], param, paramRetSize, paramValue, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GE(std::strlen(paramValue), 0u); // check for extensions if (param == CL_DEVICE_EXTENSIONS) { std::string extensionString(paramValue); size_t currentOffset = 0u; std::string supportedExtensions[] = { "cl_khr_byte_addressable_store ", "cl_khr_fp16 ", "cl_khr_global_int32_base_atomics ", "cl_khr_global_int32_extended_atomics ", "cl_khr_icd ", "cl_khr_local_int32_base_atomics ", "cl_khr_local_int32_extended_atomics ", "cl_intel_subgroups ", "cl_intel_required_subgroup_size ", "cl_intel_subgroups_short ", "cl_khr_spir ", "cl_intel_accelerator ", "cl_intel_driver_diagnostics ", "cl_khr_priority_hints ", "cl_khr_throttle_hints ", "cl_khr_create_command_queue ", "cl_khr_gl_depth_images", "cl_khr_gl_event", "cl_khr_gl_msaa_sharing", }; for (auto element = 0u; element < sizeof(supportedExtensions) / sizeof(supportedExtensions[0]); element++) { auto foundOffset = extensionString.find(supportedExtensions[element]); EXPECT_TRUE(foundOffset != std::string::npos); EXPECT_GE(foundOffset, currentOffset); currentOffset = foundOffset; } } delete[] paramValue; } // Define new command types to run the parameterized tests static cl_device_info deviceInfoStrParams[] = { CL_DEVICE_BUILT_IN_KERNELS, CL_DEVICE_EXTENSIONS, CL_DEVICE_NAME, CL_DEVICE_OPENCL_C_VERSION, CL_DEVICE_PROFILE, CL_DEVICE_VENDOR, CL_DEVICE_VERSION, CL_DRIVER_VERSION}; INSTANTIATE_TEST_CASE_P(api, GetDeviceGlInfoStr, testing::ValuesIn(deviceInfoStrParams)); } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/gl/cl_get_gl_object_info_tests.cpp000066400000000000000000000011521363734646600312120ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; typedef api_tests clGetGLObjectInfo_; namespace ULT { TEST_F(clGetGLObjectInfo_, givenNullMemObjectWhenGetGlObjectInfoIsCalledThenInvalidMemObjectIsReturned) { auto retVal = clGetGLObjectInfo(nullptr, // cl_mem memobj nullptr, // cl_gl_object_type *gl_object_type nullptr // GLuint *gl_object_name ); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/gl/cl_get_gl_texture_info_tests.cpp000066400000000000000000000014761363734646600314550ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; typedef api_tests clGetGLTextureInfo_; namespace ULT { TEST_F(clGetGLTextureInfo_, givenNullMemObjectWhenGetGLTextureInfoIsCalledThenInvalidMemObjectIsReturned) { auto retVal = clGetGLTextureInfo(nullptr, // cl_mem memobj CL_GL_TEXTURE_TARGET, // cl_gl_texture_info param_name 0, // size_t param_value_size nullptr, // void *param_value nullptr // size_t *param_value_size_ret ); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/api/gl/cl_gl_intel_tracing_tests.cpp000066400000000000000000000071501363734646600307200ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/tracing/tracing_api.h" #include "opencl/source/tracing/tracing_notify.h" #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; namespace ULT { struct IntelGlTracingTest : public api_tests { public: IntelGlTracingTest() {} void SetUp() override { api_tests::SetUp(); status = clCreateTracingHandleINTEL(devices[0], callback, this, &handle); ASSERT_NE(nullptr, handle); ASSERT_EQ(CL_SUCCESS, status); for (uint32_t i = 0; i < CL_FUNCTION_COUNT; ++i) { status = clSetTracingPointINTEL(handle, static_cast(i), CL_TRUE); ASSERT_EQ(CL_SUCCESS, status); } status = clEnableTracingINTEL(handle); ASSERT_EQ(CL_SUCCESS, status); } void TearDown() override { status = clDisableTracingINTEL(handle); ASSERT_EQ(CL_SUCCESS, status); status = clDestroyTracingHandleINTEL(handle); ASSERT_EQ(CL_SUCCESS, status); api_tests::TearDown(); } protected: static void callback(cl_function_id fid, cl_callback_data *callback_data, void *user_data) { ASSERT_NE(nullptr, user_data); IntelGlTracingTest *base = (IntelGlTracingTest *)user_data; base->vcallback(fid, callback_data, nullptr); } virtual void vcallback(cl_function_id fid, cl_callback_data *callback_data, void *user_data) { if (fid == functionId) { if (callback_data->site == CL_CALLBACK_SITE_ENTER) { ++enterCount; } else if (callback_data->site == CL_CALLBACK_SITE_EXIT) { ++exitCount; } } } uint16_t callFunctions() { uint16_t count = 0; ++count; functionId = CL_FUNCTION_clCreateFromGLBuffer; clCreateFromGLBuffer(0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateFromGLRenderbuffer; clCreateFromGLRenderbuffer(0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateFromGLTexture; clCreateFromGLTexture(0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateFromGLTexture2D; clCreateFromGLTexture2D(0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clCreateFromGLTexture3D; clCreateFromGLTexture3D(0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueAcquireGLObjects; clEnqueueAcquireGLObjects(0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clEnqueueReleaseGLObjects; clEnqueueReleaseGLObjects(0, 0, 0, 0, 0, 0); ++count; functionId = CL_FUNCTION_clGetGLObjectInfo; clGetGLObjectInfo(0, 0, 0); ++count; functionId = CL_FUNCTION_clGetGLTextureInfo; clGetGLTextureInfo(0, 0, 0, 0, 0); return count; } protected: cl_tracing_handle handle = nullptr; cl_int status = CL_SUCCESS; uint16_t enterCount = 0; uint16_t exitCount = 0; cl_function_id functionId = CL_FUNCTION_COUNT; }; TEST_F(IntelGlTracingTest, GivenAllFunctionsToTraceExpectPass) { uint16_t count = callFunctions(); EXPECT_EQ(count, enterCount); EXPECT_EQ(count, exitCount); } TEST_F(IntelGlTracingTest, GivenNoFunctionsToTraceExpectPass) { for (uint32_t i = 0; i < CL_FUNCTION_COUNT; ++i) { status = clSetTracingPointINTEL(handle, static_cast(i), CL_FALSE); EXPECT_EQ(CL_SUCCESS, status); } callFunctions(); EXPECT_EQ(0, enterCount); EXPECT_EQ(0, exitCount); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/aub/000077500000000000000000000000001363734646600224015ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub/CMakeLists.txt000066400000000000000000000011141363734646600251360ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_aub_helper_tests ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_center_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/aub_helper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_helper_tests.inl ) if(NOT DEFINED AUB_STREAM_DIR) list(APPEND IGDRCL_SRCS_aub_helper_tests ${CMAKE_CURRENT_SOURCE_DIR}/aub_center_using_aubstream_stubs_tests.cpp ) endif() target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_aub_helper_tests}) add_subdirectories() compute-runtime-20.13.16352/opencl/test/unit_test/aub/aub_center_tests.cpp000066400000000000000000000162661363734646600264510ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/hw_info.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/helpers/default_hw_info.h" #include "opencl/test/unit_test/mocks/mock_aub_center.h" #include "opencl/test/unit_test/mocks/mock_aub_manager.h" #include "gtest/gtest.h" #include "third_party/aub_stream/headers/aubstream.h" using namespace NEO; TEST(AubCenter, GivenUseAubStreamDebugVariableNotSetWhenAubCenterIsCreatedThenAubCenterDoesNotCreateAubManager) { DebugManagerStateRestore restorer; DebugManager.flags.UseAubStream.set(false); MockAubCenter aubCenter(defaultHwInfo.get(), false, "", CommandStreamReceiverType::CSR_AUB); EXPECT_EQ(nullptr, aubCenter.aubManager.get()); } TEST(AubCenter, GivenUseAubStreamDebugVariableSetWhenAubCenterIsCreatedThenCreateAubManagerWithCorrectParameters) { DebugManagerStateRestore restorer; DebugManager.flags.UseAubStream.set(false); MockAubManager *mockAubManager = new MockAubManager(defaultHwInfo->platform.eProductFamily, 4, 8 * MB, true, aub_stream::mode::aubFile, defaultHwInfo->capabilityTable.gpuAddressSpace); MockAubCenter mockAubCenter(defaultHwInfo.get(), false, "", CommandStreamReceiverType::CSR_AUB); mockAubCenter.aubManager = std::unique_ptr(mockAubManager); EXPECT_EQ(defaultHwInfo->platform.eProductFamily, mockAubManager->mockAubManagerParams.productFamily); EXPECT_EQ(4, mockAubManager->mockAubManagerParams.devicesCount); EXPECT_EQ(8 * MB, mockAubManager->mockAubManagerParams.memoryBankSize); EXPECT_EQ(true, mockAubManager->mockAubManagerParams.localMemorySupported); EXPECT_EQ(aub_stream::mode::aubFile, mockAubManager->mockAubManagerParams.streamMode); } TEST(AubCenter, GivenDefaultSetCommandStreamReceiverFlagAndAubFileNameWhenGettingAubStreamModeThenModeAubFileIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.UseAubStream.set(true); std::string aubFile("test.aub"); auto mode = AubCenter::getAubStreamMode(aubFile, CommandStreamReceiverType::CSR_AUB); EXPECT_EQ(aub_stream::mode::aubFile, mode); } TEST(AubCenter, GivenCsrHwAndEmptyAubFileNameWhenGettingAubStreamModeThenModeAubFileIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.UseAubStream.set(true); std::string aubFile(""); auto mode = AubCenter::getAubStreamMode(aubFile, CommandStreamReceiverType::CSR_HW); EXPECT_EQ(aub_stream::mode::aubFile, mode); } TEST(AubCenter, GivenCsrHwAndNotEmptyAubFileNameWhenGettingAubStreamModeThenModeAubFileIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.UseAubStream.set(true); std::string aubFile("test.aub"); auto mode = AubCenter::getAubStreamMode(aubFile, CommandStreamReceiverType::CSR_HW); EXPECT_EQ(aub_stream::mode::aubFile, mode); } TEST(AubCenter, GivenCsrTypeWhenGettingAubStreamModeThenCorrectModeIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.UseAubStream.set(true); std::string aubFile("test.aub"); auto mode = AubCenter::getAubStreamMode(aubFile, CommandStreamReceiverType::CSR_AUB); EXPECT_EQ(aub_stream::mode::aubFile, mode); mode = AubCenter::getAubStreamMode(aubFile, CommandStreamReceiverType::CSR_TBX); EXPECT_EQ(aub_stream::mode::tbx, mode); mode = AubCenter::getAubStreamMode(aubFile, CommandStreamReceiverType::CSR_TBX_WITH_AUB); EXPECT_EQ(aub_stream::mode::aubFileAndTbx, mode); } TEST(AubCenter, GivenSetCommandStreamReceiverFlagEqualDefaultHwWhenAubManagerIsCreatedThenCsrTypeDefinesAubStreamMode) { DebugManagerStateRestore restorer; DebugManager.flags.UseAubStream.set(true); DebugManager.flags.SetCommandStreamReceiver.set(-1); std::vector aubTypes = {CommandStreamReceiverType::CSR_HW, CommandStreamReceiverType::CSR_HW_WITH_AUB, CommandStreamReceiverType::CSR_AUB}; for (auto type : aubTypes) { MockAubCenter aubCenter(defaultHwInfo.get(), true, "test", type); EXPECT_EQ(aub_stream::mode::aubFile, aubCenter.aubStreamMode); } MockAubCenter aubCenter2(defaultHwInfo.get(), true, "", CommandStreamReceiverType::CSR_TBX); EXPECT_EQ(aub_stream::mode::tbx, aubCenter2.aubStreamMode); MockAubCenter aubCenter3(defaultHwInfo.get(), true, "", CommandStreamReceiverType::CSR_TBX_WITH_AUB); EXPECT_EQ(aub_stream::mode::aubFileAndTbx, aubCenter3.aubStreamMode); } TEST(AubCenter, GivenSetCommandStreamReceiverFlagSetWhenAubManagerIsCreatedThenDebugFlagDefinesAubStreamMode) { DebugManagerStateRestore restorer; DebugManager.flags.UseAubStream.set(true); DebugManager.flags.SetCommandStreamReceiver.set(CommandStreamReceiverType::CSR_TBX); MockAubCenter aubCenter(defaultHwInfo.get(), true, "", CommandStreamReceiverType::CSR_AUB); EXPECT_EQ(aub_stream::mode::tbx, aubCenter.aubStreamMode); DebugManager.flags.SetCommandStreamReceiver.set(CommandStreamReceiverType::CSR_TBX_WITH_AUB); MockAubCenter aubCenter2(defaultHwInfo.get(), true, "", CommandStreamReceiverType::CSR_AUB); EXPECT_EQ(aub_stream::mode::aubFileAndTbx, aubCenter2.aubStreamMode); } TEST(AubCenter, GivenAubCenterInSubCaptureModeWhenItIsCreatedWithoutDebugFilterSettingsThenItInitializesSubCaptureFiltersWithDefaults) { DebugManagerStateRestore restorer; DebugManager.flags.AUBDumpSubCaptureMode.set(static_cast(AubSubCaptureManager::SubCaptureMode::Filter)); MockAubCenter aubCenter(defaultHwInfo.get(), false, "", CommandStreamReceiverType::CSR_AUB); auto subCaptureCommon = aubCenter.getSubCaptureCommon(); EXPECT_NE(nullptr, subCaptureCommon); EXPECT_EQ(0u, subCaptureCommon->subCaptureFilter.dumpKernelStartIdx); EXPECT_EQ(static_cast(-1), subCaptureCommon->subCaptureFilter.dumpKernelEndIdx); EXPECT_STREQ("", subCaptureCommon->subCaptureFilter.dumpKernelName.c_str()); } TEST(AubCenter, GivenAubCenterInSubCaptureModeWhenItIsCreatedWithDebugFilterSettingsThenItInitializesSubCaptureFiltersWithDebugFilterSettings) { DebugManagerStateRestore restorer; DebugManager.flags.AUBDumpSubCaptureMode.set(static_cast(AubSubCaptureManager::SubCaptureMode::Filter)); DebugManager.flags.AUBDumpFilterKernelStartIdx.set(10); DebugManager.flags.AUBDumpFilterKernelEndIdx.set(100); DebugManager.flags.AUBDumpFilterKernelName.set("kernel_name"); MockAubCenter aubCenter(defaultHwInfo.get(), false, "", CommandStreamReceiverType::CSR_AUB); auto subCaptureCommon = aubCenter.getSubCaptureCommon(); EXPECT_NE(nullptr, subCaptureCommon); EXPECT_EQ(static_cast(DebugManager.flags.AUBDumpFilterKernelStartIdx.get()), subCaptureCommon->subCaptureFilter.dumpKernelStartIdx); EXPECT_EQ(static_cast(DebugManager.flags.AUBDumpFilterKernelEndIdx.get()), subCaptureCommon->subCaptureFilter.dumpKernelEndIdx); EXPECT_STREQ(DebugManager.flags.AUBDumpFilterKernelName.get().c_str(), subCaptureCommon->subCaptureFilter.dumpKernelName.c_str()); } compute-runtime-20.13.16352/opencl/test/unit_test/aub/aub_center_using_aubstream_stubs_tests.cpp000066400000000000000000000040731363734646600331320ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/options.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/helpers/default_hw_info.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/mocks/mock_aub_center.h" #include "gtest/gtest.h" #include "third_party/aub_stream/headers/aubstream.h" using namespace NEO; namespace aub_stream_stubs { extern uint16_t tbxServerPort; extern std::string tbxServerIp; } // namespace aub_stream_stubs TEST(AubCenter, GivenUseAubStreamDebugVariableSetWhenAubCenterIsCreatedThenAubManagerIsNotCreated) { DebugManagerStateRestore restorer; DebugManager.flags.UseAubStream.set(true); MockAubCenter aubCenter(defaultHwInfo.get(), false, "test", CommandStreamReceiverType::CSR_AUB); EXPECT_EQ(nullptr, aubCenter.aubManager.get()); } TEST(AubCenter, GivenUseAubStreamAndTbxServerIpDebugVariableSetWhenAubCenterIsCreatedThenServerIpIsModified) { DebugManagerStateRestore restorer; DebugManager.flags.UseAubStream.set(true); DebugManager.flags.TbxServer.set("10.10.10.10"); VariableBackup backup(&aub_stream_stubs::tbxServerIp); MockAubCenter aubCenter(defaultHwInfo.get(), false, "", CommandStreamReceiverType::CSR_TBX); EXPECT_STREQ("10.10.10.10", aub_stream_stubs::tbxServerIp.c_str()); } TEST(AubCenter, GivenUseAubStreamAndTbxServerPortDebugVariableSetWhenAubCenterIsCreatedThenServerIpIsModified) { DebugManagerStateRestore restorer; DebugManager.flags.UseAubStream.set(true); DebugManager.flags.TbxPort.set(1234); VariableBackup backup(&aub_stream_stubs::tbxServerPort); uint16_t port = 1234u; EXPECT_NE(port, aub_stream_stubs::tbxServerPort); MockAubCenter aubCenter(defaultHwInfo.get(), false, "", CommandStreamReceiverType::CSR_TBX); EXPECT_EQ(port, aub_stream_stubs::tbxServerPort); } compute-runtime-20.13.16352/opencl/test/unit_test/aub/aub_helper_tests.cpp000066400000000000000000000005101363734646600264310ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/aub/aub_helper_tests.inl" TEST(AubHelper, GivenHwInfoWhenGetMemBankSizeIsCalledThenItReturnsCorrectValue) { EXPECT_EQ(2 * MemoryConstants::gigaByte, AubHelper::getMemBankSize(defaultHwInfo.get())); } compute-runtime-20.13.16352/opencl/test/unit_test/aub/aub_helper_tests.inl000066400000000000000000000136221363734646600264410ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/hw_helper.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/aub/aub_helper.h" #include "opencl/source/aub_mem_dump/aub_mem_dump.h" #include "opencl/source/aub_mem_dump/page_table_entry_bits.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_lrca_helper.h" #include "test.h" #include "gtest/gtest.h" using namespace NEO; TEST(AubHelper, GivenZeroPdEntryBitsWhenGetMemTraceIsCalledThenTraceNonLocalIsReturned) { int hint = AubHelper::getMemTrace(0u); EXPECT_EQ(AubMemDump::AddressSpaceValues::TraceNonlocal, hint); } TEST(AubHelper, WhenGetPtEntryBitsIsCalledThenEntryBitsAreNotMasked) { uint64_t entryBits = BIT(PageTableEntry::presentBit) | BIT(PageTableEntry::writableBit) | BIT(PageTableEntry::userSupervisorBit); uint64_t maskedEntryBits = AubHelper::getPTEntryBits(entryBits); EXPECT_EQ(entryBits, maskedEntryBits); } TEST(AubHelper, GivenMultipleSubDevicesWhenGettingDeviceCountThenCorrectValueIsReturned) { DebugManagerStateRestore stateRestore; FeatureTable featureTable = {}; WorkaroundTable workaroundTable = {}; RuntimeCapabilityTable capTable = {}; GT_SYSTEM_INFO sysInfo = {}; PLATFORM platform = {}; HardwareInfo hwInfo{&platform, &featureTable, &workaroundTable, &sysInfo, capTable}; DebugManager.flags.CreateMultipleSubDevices.set(2); uint32_t devicesCount = HwHelper::getSubDevicesCount(&hwInfo); EXPECT_EQ(devicesCount, 2u); DebugManager.flags.CreateMultipleSubDevices.set(0); devicesCount = HwHelper::getSubDevicesCount(&hwInfo); EXPECT_EQ(devicesCount, 1u); } typedef Test AubHelperHwTest; HWTEST_F(AubHelperHwTest, GivenDisabledLocalMemoryWhenGetDataHintForPml4EntryIsCalledThenTraceNotypeIsReturned) { AubHelperHw aubHelper(false); int dataHint = aubHelper.getDataHintForPml4Entry(); EXPECT_EQ(AubMemDump::DataTypeHintValues::TraceNotype, dataHint); } HWTEST_F(AubHelperHwTest, GivenDisabledLocalMemoryWhenGetDataHintForPdpEntryIsCalledThenTraceNotypeIsReturned) { AubHelperHw aubHelper(false); int dataHint = aubHelper.getDataHintForPdpEntry(); EXPECT_EQ(AubMemDump::DataTypeHintValues::TraceNotype, dataHint); } HWTEST_F(AubHelperHwTest, GivenDisabledLocalMemoryWhenGetDataHintForPdEntryIsCalledThenTraceNotypeIsReturned) { AubHelperHw aubHelper(false); int dataHint = aubHelper.getDataHintForPdEntry(); EXPECT_EQ(AubMemDump::DataTypeHintValues::TraceNotype, dataHint); } HWTEST_F(AubHelperHwTest, GivenDisabledLocalMemoryWhenGetDataHintForPtEntryIsCalledThenTraceNotypeIsReturned) { AubHelperHw aubHelper(false); int dataHint = aubHelper.getDataHintForPtEntry(); EXPECT_EQ(AubMemDump::DataTypeHintValues::TraceNotype, dataHint); } HWTEST_F(AubHelperHwTest, GivenDisabledLocalMemoryWhenGetMemTraceForPml4EntryIsCalledThenTracePml4EntryIsReturned) { AubHelperHw aubHelper(false); int addressSpace = aubHelper.getMemTraceForPml4Entry(); EXPECT_EQ(AubMemDump::AddressSpaceValues::TracePml4Entry, addressSpace); } HWTEST_F(AubHelperHwTest, GivenDisabledLocalMemoryWhenGetMemTraceForPdpEntryIsCalledThenTracePhysicalPdpEntryIsReturned) { AubHelperHw aubHelper(false); int addressSpace = aubHelper.getMemTraceForPdpEntry(); EXPECT_EQ(AubMemDump::AddressSpaceValues::TracePhysicalPdpEntry, addressSpace); } HWTEST_F(AubHelperHwTest, GivenDisabledLocalMemoryWhenGetMemTraceForPd4EntryIsCalledThenTracePpgttPdEntryIsReturned) { AubHelperHw aubHelper(false); int addressSpace = aubHelper.getMemTraceForPdEntry(); EXPECT_EQ(AubMemDump::AddressSpaceValues::TracePpgttPdEntry, addressSpace); } HWTEST_F(AubHelperHwTest, GivenDisabledLocalMemoryWhenGetMemTraceForPtEntryIsCalledThenTracePpgttEntryIsReturned) { AubHelperHw aubHelper(false); int addressSpace = aubHelper.getMemTraceForPtEntry(); EXPECT_EQ(AubMemDump::AddressSpaceValues::TracePpgttEntry, addressSpace); } HWTEST_F(AubHelperHwTest, GivenEnabledLocalMemoryWhenGetMemTraceForPml4EntryIsCalledThenTraceLocalIsReturned) { AubHelperHw aubHelper(true); int addressSpace = aubHelper.getMemTraceForPml4Entry(); EXPECT_EQ(AubMemDump::AddressSpaceValues::TraceLocal, addressSpace); } HWTEST_F(AubHelperHwTest, GivenEnabledLocalMemoryWhenGetMemTraceForPdpEntryIsCalledThenTraceLocalIsReturned) { AubHelperHw aubHelper(true); int addressSpace = aubHelper.getMemTraceForPdpEntry(); EXPECT_EQ(AubMemDump::AddressSpaceValues::TraceLocal, addressSpace); } HWTEST_F(AubHelperHwTest, GivenEnabledLocalMemoryWhenGetMemTraceForPd4EntryIsCalledThenTraceLocalIsReturned) { AubHelperHw aubHelper(true); int addressSpace = aubHelper.getMemTraceForPdEntry(); EXPECT_EQ(AubMemDump::AddressSpaceValues::TraceLocal, addressSpace); } HWTEST_F(AubHelperHwTest, GivenEnabledLocalMemoryWhenGetMemTraceForPtEntryIsCalledThenTraceLocalIsReturned) { AubHelperHw aubHelper(true); int addressSpace = aubHelper.getMemTraceForPtEntry(); EXPECT_EQ(AubMemDump::AddressSpaceValues::TraceLocal, addressSpace); } HWTEST_F(AubHelperHwTest, givenLrcaHelperWhenContextIsInitializedThenContextFlagsAreSet) { const auto &csTraits = CommandStreamReceiverSimulatedCommonHw::getCsTraits(aub_stream::ENGINE_RCS); MockLrcaHelper lrcaHelper(csTraits.mmioBase); std::unique_ptr> lrcaBase(alignedMalloc(csTraits.sizeLRCA, csTraits.alignLRCA), alignedFree); lrcaHelper.initialize(lrcaBase.get()); ASSERT_NE(0u, lrcaHelper.setContextSaveRestoreFlagsCalled); } compute-runtime-20.13.16352/opencl/test/unit_test/aub_mem_dump/000077500000000000000000000000001363734646600242645ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_mem_dump/CMakeLists.txt000066400000000000000000000005261363734646600270270ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_aub_mem_dump_tests ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_alloc_dump_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/lrca_helper_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_aub_mem_dump_tests}) compute-runtime-20.13.16352/opencl/test/unit_test/aub_mem_dump/aub_alloc_dump_tests.cpp000066400000000000000000000625321363734646600311700ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/aub_mem_dump/aub_alloc_dump.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_gmm.h" #include "opencl/test/unit_test/mocks/mock_gmm_resource_info.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "test.h" using namespace NEO; typedef Test AubAllocDumpTests; struct AubFileStreamMock : public AubMemDump::AubFileStream { void write(const char *data, size_t size) override { buffer.resize(size); memcpy(buffer.data(), data, size); } char *getData() { return buffer.data(); } size_t getSize() { return buffer.size(); } std::vector buffer; }; HWTEST_F(AubAllocDumpTests, givenBufferOrImageWhenGraphicsAllocationIsKnownThenItsTypeCanBeCheckedIfItIsWritable) { auto memoryManager = pDevice->getMemoryManager(); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); gfxAllocation->setAllocationType(GraphicsAllocation::AllocationType::BUFFER); EXPECT_FALSE(gfxAllocation->isMemObjectsAllocationWithWritableFlags()); EXPECT_FALSE(AubAllocDump::isWritableBuffer(*gfxAllocation)); gfxAllocation->setAllocationType(GraphicsAllocation::AllocationType::BUFFER); gfxAllocation->setMemObjectsAllocationWithWritableFlags(true); EXPECT_TRUE(AubAllocDump::isWritableBuffer(*gfxAllocation)); gfxAllocation->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); gfxAllocation->setMemObjectsAllocationWithWritableFlags(false); EXPECT_FALSE(AubAllocDump::isWritableBuffer(*gfxAllocation)); gfxAllocation->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); gfxAllocation->setMemObjectsAllocationWithWritableFlags(true); EXPECT_TRUE(AubAllocDump::isWritableBuffer(*gfxAllocation)); gfxAllocation->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY); gfxAllocation->setMemObjectsAllocationWithWritableFlags(false); EXPECT_FALSE(AubAllocDump::isWritableBuffer(*gfxAllocation)); gfxAllocation->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY); gfxAllocation->setMemObjectsAllocationWithWritableFlags(true); EXPECT_TRUE(AubAllocDump::isWritableBuffer(*gfxAllocation)); gfxAllocation->setAllocationType(GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR); gfxAllocation->setMemObjectsAllocationWithWritableFlags(false); EXPECT_FALSE(AubAllocDump::isWritableBuffer(*gfxAllocation)); gfxAllocation->setAllocationType(GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR); gfxAllocation->setMemObjectsAllocationWithWritableFlags(true); EXPECT_TRUE(AubAllocDump::isWritableBuffer(*gfxAllocation)); gfxAllocation->setAllocationType(GraphicsAllocation::AllocationType::MAP_ALLOCATION); gfxAllocation->setMemObjectsAllocationWithWritableFlags(false); EXPECT_FALSE(AubAllocDump::isWritableBuffer(*gfxAllocation)); gfxAllocation->setAllocationType(GraphicsAllocation::AllocationType::MAP_ALLOCATION); gfxAllocation->setMemObjectsAllocationWithWritableFlags(true); EXPECT_TRUE(AubAllocDump::isWritableBuffer(*gfxAllocation)); gfxAllocation->setAllocationType(GraphicsAllocation::AllocationType::IMAGE); gfxAllocation->setMemObjectsAllocationWithWritableFlags(false); EXPECT_FALSE(AubAllocDump::isWritableImage(*gfxAllocation)); gfxAllocation->setAllocationType(GraphicsAllocation::AllocationType::IMAGE); gfxAllocation->setMemObjectsAllocationWithWritableFlags(true); EXPECT_TRUE(AubAllocDump::isWritableImage(*gfxAllocation)); memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(AubAllocDumpTests, givenImageResourceWhenGmmResourceInfoIsAvailableThenImageSurfaceTypeCanBeDeducedFromGmmResourceType) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_1D, AubAllocDump::getImageSurfaceTypeFromGmmResourceType(GMM_RESOURCE_TYPE::RESOURCE_1D)); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_2D, AubAllocDump::getImageSurfaceTypeFromGmmResourceType(GMM_RESOURCE_TYPE::RESOURCE_2D)); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_3D, AubAllocDump::getImageSurfaceTypeFromGmmResourceType(GMM_RESOURCE_TYPE::RESOURCE_3D)); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL, AubAllocDump::getImageSurfaceTypeFromGmmResourceType(GMM_RESOURCE_TYPE::RESOURCE_INVALID)); } HWTEST_F(AubAllocDumpTests, givenGraphicsAllocationWhenDumpAllocationIsCalledInDefaultModeThenGraphicsAllocationShouldNotBeDumped) { auto memoryManager = pDevice->getMemoryManager(); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); std::unique_ptr mockAubFileStream(new AubFileStreamMock()); auto format = AubAllocDump::getDumpFormat(*gfxAllocation); AubAllocDump::dumpAllocation(format, *gfxAllocation, mockAubFileStream.get(), 0); EXPECT_EQ(0u, mockAubFileStream->getSize()); memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(AubAllocDumpTests, givenGraphicsAllocationWhenDumpAllocationIsCalledButDumpFormatIsUnspecifiedThenGraphicsAllocationShouldNotBeDumped) { auto memoryManager = pDevice->getMemoryManager(); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); std::unique_ptr mockAubFileStream(new AubFileStreamMock()); auto format = AubAllocDump::getDumpFormat(*gfxAllocation); AubAllocDump::dumpAllocation(format, *gfxAllocation, mockAubFileStream.get(), 0); EXPECT_EQ(0u, mockAubFileStream->getSize()); memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(AubAllocDumpTests, givenNonWritableBufferWhenDumpAllocationIsCalledAndDumpFormatIsSpecifiedThenBufferShouldNotBeDumped) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpBufferFormat.set("BIN"); auto memoryManager = pDevice->getMemoryManager(); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}); std::unique_ptr mockAubFileStream(new AubFileStreamMock()); auto format = AubAllocDump::getDumpFormat(*gfxAllocation); AubAllocDump::dumpAllocation(format, *gfxAllocation, mockAubFileStream.get(), 0); EXPECT_EQ(0u, mockAubFileStream->getSize()); memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(AubAllocDumpTests, givenNonWritableImageWhenDumpAllocationIsCalledAndDumpFormatIsSpecifiedThenImageShouldNotBeDumped) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpBufferFormat.set("BMP"); auto memoryManager = pDevice->getMemoryManager(); auto gfxAllocation = MockGmm::allocateImage2d(*memoryManager); std::unique_ptr mockAubFileStream(new AubFileStreamMock()); auto format = AubAllocDump::getDumpFormat(*gfxAllocation); AubAllocDump::dumpAllocation(format, *gfxAllocation, mockAubFileStream.get(), 0); EXPECT_EQ(0u, mockAubFileStream->getSize()); memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(AubAllocDumpTests, givenWritableBufferWhenDumpAllocationIsCalledAndAubDumpBufferFormatIsNotSetThenBufferShouldNotBeDumped) { MockContext context; size_t bufferSize = 10; auto retVal = CL_INVALID_VALUE; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_READ_WRITE, bufferSize, nullptr, retVal)); ASSERT_NE(nullptr, buffer); auto gfxAllocation = buffer->getGraphicsAllocation(); std::unique_ptr mockAubFileStream(new AubFileStreamMock()); auto handle = static_cast(reinterpret_cast(this)); auto format = AubAllocDump::getDumpFormat(*gfxAllocation); AubAllocDump::dumpAllocation(format, *gfxAllocation, mockAubFileStream.get(), handle); EXPECT_EQ(0u, mockAubFileStream->getSize()); } HWTEST_F(AubAllocDumpTests, givenWritableImageWhenDumpAllocationIsCalledAndAubDumpImageFormatIsNotSetThenImageShouldNotBeDumped) { MockContext context; std::unique_ptr image(ImageHelper::create(&context)); ASSERT_NE(nullptr, image); auto gfxAllocation = image->getGraphicsAllocation(); std::unique_ptr mockAubFileStream(new AubFileStreamMock()); auto handle = static_cast(reinterpret_cast(this)); auto format = AubAllocDump::getDumpFormat(*gfxAllocation); AubAllocDump::dumpAllocation(format, *gfxAllocation, mockAubFileStream.get(), handle); EXPECT_EQ(0u, mockAubFileStream->getSize()); } HWTEST_F(AubAllocDumpTests, givenWritableBufferWhenDumpAllocationIsCalledAndAubDumpBufferFormatIsSetToBinThenBufferShouldBeDumpedInBinFormat) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpBufferFormat.set("BIN"); MockContext context; size_t bufferSize = 10; auto retVal = CL_INVALID_VALUE; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_READ_WRITE, bufferSize, nullptr, retVal)); ASSERT_NE(nullptr, buffer); auto gfxAllocation = buffer->getGraphicsAllocation(); std::unique_ptr mockAubFileStream(new AubFileStreamMock()); auto handle = static_cast(reinterpret_cast(this)); auto format = AubAllocDump::getDumpFormat(*gfxAllocation); AubAllocDump::dumpAllocation(format, *gfxAllocation, mockAubFileStream.get(), handle); ASSERT_EQ(sizeof(AubMemDump::AubCaptureBinaryDumpHD), mockAubFileStream->getSize()); AubMemDump::AubCaptureBinaryDumpHD cmd; memcpy(&cmd, mockAubFileStream->getData(), mockAubFileStream->getSize()); EXPECT_EQ(0x7u, cmd.Header.Type); EXPECT_EQ(0x1u, cmd.Header.Opcode); EXPECT_EQ(0x15u, cmd.Header.SubOp); EXPECT_EQ(((sizeof(cmd) - sizeof(cmd.Header)) / sizeof(uint32_t)) - 1, cmd.Header.DwordLength); EXPECT_EQ(gfxAllocation->getGpuAddress(), cmd.getBaseAddr()); EXPECT_EQ(static_cast(gfxAllocation->getUnderlyingBufferSize()), cmd.getWidth()); EXPECT_EQ(1u, cmd.getHeight()); EXPECT_EQ(static_cast(gfxAllocation->getUnderlyingBufferSize()), cmd.getPitch()); EXPECT_EQ(1u, cmd.GttType); EXPECT_EQ(handle, cmd.DirectoryHandle); } HWTEST_F(AubAllocDumpTests, givenWritableBufferWhenDumpAllocationIsCalledAndAubDumpBufferFormatIsSetToTreThenBufferShouldBeDumpedInTreFormat) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpBufferFormat.set("TRE"); MockContext context; size_t bufferSize = 10; auto retVal = CL_INVALID_VALUE; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_READ_WRITE, bufferSize, nullptr, retVal)); ASSERT_NE(nullptr, buffer); auto gfxAllocation = buffer->getGraphicsAllocation(); std::unique_ptr mockAubFileStream(new AubFileStreamMock()); auto handle = static_cast(reinterpret_cast(this)); auto format = AubAllocDump::getDumpFormat(*gfxAllocation); AubAllocDump::dumpAllocation(format, *gfxAllocation, mockAubFileStream.get(), handle); ASSERT_EQ(sizeof(AubMemDump::CmdServicesMemTraceDumpCompress), mockAubFileStream->getSize()); AubMemDump::CmdServicesMemTraceDumpCompress cmd; memcpy(&cmd, mockAubFileStream->getData(), mockAubFileStream->getSize()); EXPECT_EQ((sizeof(AubMemDump::CmdServicesMemTraceDumpCompress) - 1) / 4, cmd.dwordCount); EXPECT_EQ(0x7u, cmd.instructionType); EXPECT_EQ(0x10u, cmd.instructionSubOpcode); EXPECT_EQ(0x2eu, cmd.instructionOpcode); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; EXPECT_EQ(gfxAllocation->getGpuAddress(), cmd.getSurfaceAddress()); EXPECT_EQ(static_cast(gfxAllocation->getUnderlyingBufferSize()), cmd.surfaceWidth); EXPECT_EQ(1u, cmd.surfaceHeight); EXPECT_EQ(static_cast(gfxAllocation->getUnderlyingBufferSize()), cmd.surfacePitch); EXPECT_EQ(SURFACE_FORMAT::SURFACE_FORMAT_RAW, cmd.surfaceFormat); EXPECT_EQ(AubMemDump::CmdServicesMemTraceDumpCompress::DumpTypeValues::Tre, cmd.dumpType); EXPECT_EQ(RENDER_SURFACE_STATE::TILE_MODE_LINEAR, cmd.surfaceTilingType); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER, cmd.surfaceType); EXPECT_EQ(AubMemDump::CmdServicesMemTraceDumpCompress::AlgorithmValues::Uncompressed, cmd.algorithm); EXPECT_EQ(1u, cmd.gttType); EXPECT_EQ(handle, cmd.directoryHandle); } HWTEST_F(AubAllocDumpTests, givenWritableImageWhenDumpAllocationIsCalledAndAubDumpImageFormatIsSetToBmpThenImageShouldBeDumpedInBmpFormat) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpImageFormat.set("BMP"); MockContext context; std::unique_ptr image(ImageHelper::create(&context)); ASSERT_NE(nullptr, image); auto gfxAllocation = image->getGraphicsAllocation(); std::unique_ptr mockAubFileStream(new AubFileStreamMock()); auto handle = static_cast(reinterpret_cast(this)); auto format = AubAllocDump::getDumpFormat(*gfxAllocation); AubAllocDump::dumpAllocation(format, *gfxAllocation, mockAubFileStream.get(), handle); ASSERT_EQ(sizeof(AubMemDump::AubCmdDumpBmpHd), mockAubFileStream->getSize()); AubMemDump::AubCmdDumpBmpHd cmd; memcpy(&cmd, mockAubFileStream->getData(), mockAubFileStream->getSize()); EXPECT_EQ(0x7u, cmd.Header.Type); EXPECT_EQ(0x1u, cmd.Header.Opcode); EXPECT_EQ(0x44u, cmd.Header.SubOp); EXPECT_EQ(((sizeof(cmd) - sizeof(cmd.Header)) / sizeof(uint32_t)) - 1, cmd.Header.DwordLength); EXPECT_EQ(0u, cmd.Xmin); EXPECT_EQ(0u, cmd.Ymin); auto gmm = gfxAllocation->getDefaultGmm(); EXPECT_EQ((8 * gmm->gmmResourceInfo->getRenderPitch()) / gmm->gmmResourceInfo->getBitsPerPixel(), cmd.BufferPitch); EXPECT_EQ(gmm->gmmResourceInfo->getBitsPerPixel(), cmd.BitsPerPixel); EXPECT_EQ(static_cast(gmm->gmmResourceInfo->getResourceFormatSurfaceState()), cmd.Format); EXPECT_EQ(static_cast(gmm->gmmResourceInfo->getBaseWidth()), cmd.Xsize); EXPECT_EQ(static_cast(gmm->gmmResourceInfo->getBaseHeight()), cmd.Ysize); EXPECT_EQ(gfxAllocation->getGpuAddress(), cmd.getBaseAddr()); EXPECT_EQ(0u, cmd.Secure); EXPECT_EQ(0u, cmd.UseFence); auto flagInfo = gmm->gmmResourceInfo->getResourceFlags()->Info; EXPECT_EQ(static_cast(flagInfo.TiledW || flagInfo.TiledX || flagInfo.TiledY || flagInfo.TiledYf || flagInfo.TiledYs), cmd.TileOn); EXPECT_EQ(flagInfo.TiledY, cmd.WalkY); EXPECT_EQ(1u, cmd.UsePPGTT); EXPECT_EQ(1u, cmd.Use32BitDump); EXPECT_EQ(1u, cmd.UseFullFormat); EXPECT_EQ(handle, cmd.DirectoryHandle); } HWTEST_F(AubAllocDumpTests, givenWritableImageWhenDumpAllocationIsCalledAndAubDumpImageFormatIsSetToTreThenImageShouldBeDumpedInTreFormat) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpImageFormat.set("TRE"); MockContext context; std::unique_ptr image(ImageHelper::create(&context)); ASSERT_NE(nullptr, image); auto gfxAllocation = image->getGraphicsAllocation(); std::unique_ptr mockAubFileStream(new AubFileStreamMock()); auto handle = static_cast(reinterpret_cast(this)); auto format = AubAllocDump::getDumpFormat(*gfxAllocation); AubAllocDump::dumpAllocation(format, *gfxAllocation, mockAubFileStream.get(), handle); ASSERT_EQ(sizeof(AubMemDump::CmdServicesMemTraceDumpCompress), mockAubFileStream->getSize()); AubMemDump::CmdServicesMemTraceDumpCompress cmd; memcpy(&cmd, mockAubFileStream->getData(), mockAubFileStream->getSize()); EXPECT_EQ((sizeof(AubMemDump::CmdServicesMemTraceDumpCompress) - 1) / 4, cmd.dwordCount); EXPECT_EQ(0x7u, cmd.instructionType); EXPECT_EQ(0x10u, cmd.instructionSubOpcode); EXPECT_EQ(0x2eu, cmd.instructionOpcode); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; EXPECT_EQ(gfxAllocation->getGpuAddress(), cmd.getSurfaceAddress()); auto gmm = gfxAllocation->getDefaultGmm(); EXPECT_EQ(static_cast(gmm->gmmResourceInfo->getBaseWidth()), cmd.surfaceWidth); EXPECT_EQ(static_cast(gmm->gmmResourceInfo->getBaseHeight()), cmd.surfaceHeight); EXPECT_EQ(static_cast(gmm->gmmResourceInfo->getRenderPitch()), cmd.surfacePitch); EXPECT_EQ(static_cast(gmm->gmmResourceInfo->getResourceFormatSurfaceState()), cmd.surfaceFormat); EXPECT_EQ(AubMemDump::CmdServicesMemTraceDumpCompress::DumpTypeValues::Tre, cmd.dumpType); EXPECT_EQ(gmm->gmmResourceInfo->getTileModeSurfaceState(), cmd.surfaceTilingType); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_2D, cmd.surfaceType); EXPECT_EQ(AubMemDump::CmdServicesMemTraceDumpCompress::AlgorithmValues::Uncompressed, cmd.algorithm); EXPECT_EQ(1u, cmd.gttType); EXPECT_EQ(handle, cmd.directoryHandle); } HWTEST_F(AubAllocDumpTests, givenCompressedImageWritableWhenDumpAllocationIsCalledAndAubDumpImageFormatIsSetToTreThenImageShouldBeDumpedInTreFormat) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpImageFormat.set("TRE"); MockContext context; std::unique_ptr image(ImageHelper::create(&context)); ASSERT_NE(nullptr, image); auto gfxAllocation = image->getGraphicsAllocation(); gfxAllocation->getDefaultGmm()->isRenderCompressed = true; std::unique_ptr mockAubFileStream(new AubFileStreamMock()); auto handle = static_cast(reinterpret_cast(this)); auto format = AubAllocDump::getDumpFormat(*gfxAllocation); AubAllocDump::dumpAllocation(format, *gfxAllocation, mockAubFileStream.get(), handle); EXPECT_EQ(0u, mockAubFileStream->getSize()); } HWTEST_F(AubAllocDumpTests, givenMultisampleImageWritableWhenDumpAllocationIsCalledAndAubDumpImageFormatIsSetToTreThenImageDumpIsNotSupported) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpImageFormat.set("TRE"); MockContext context; std::unique_ptr image(ImageHelper::create(&context)); ASSERT_NE(nullptr, image); auto gfxAllocation = image->getGraphicsAllocation(); auto mockGmmResourceInfo = reinterpret_cast(gfxAllocation->getDefaultGmm()->gmmResourceInfo.get()); mockGmmResourceInfo->mockResourceCreateParams.MSAA.NumSamples = 2; std::unique_ptr mockAubFileStream(new AubFileStreamMock()); auto handle = static_cast(reinterpret_cast(this)); auto format = AubAllocDump::getDumpFormat(*gfxAllocation); AubAllocDump::dumpAllocation(format, *gfxAllocation, mockAubFileStream.get(), handle); EXPECT_EQ(0u, mockAubFileStream->getSize()); } HWTEST_F(AubAllocDumpTests, givenMultisampleImageWritableWheGetDumpSurfaceIsCalledAndDumpFormatIsSpecifiedThenNullSurfaceInfoIsReturned) { MockContext context; std::unique_ptr image(ImageHelper::create(&context)); ASSERT_NE(nullptr, image); auto gfxAllocation = image->getGraphicsAllocation(); auto mockGmmResourceInfo = reinterpret_cast(gfxAllocation->getDefaultGmm()->gmmResourceInfo.get()); mockGmmResourceInfo->mockResourceCreateParams.MSAA.NumSamples = 2; EXPECT_EQ(nullptr, AubAllocDump::getDumpSurfaceInfo(*gfxAllocation, AubAllocDump::DumpFormat::IMAGE_BMP)); EXPECT_EQ(nullptr, AubAllocDump::getDumpSurfaceInfo(*gfxAllocation, AubAllocDump::DumpFormat::IMAGE_TRE)); } struct AubSurfaceDumpTests : public AubAllocDumpTests, public ::testing::WithParamInterface> { void SetUp() override { AubAllocDumpTests::SetUp(); isCompressed = std::get<0>(GetParam()); dumpFormat = std::get<1>(GetParam()); } void TearDown() override { AubAllocDumpTests::TearDown(); } bool isCompressed = false; AubAllocDump::DumpFormat dumpFormat = AubAllocDump::DumpFormat::NONE; }; HWTEST_P(AubSurfaceDumpTests, givenGraphicsAllocationWhenGetDumpSurfaceIsCalledAndDumpFormatIsSpecifiedThenSurfaceInfoIsReturned) { ExecutionEnvironment *executionEnvironment = pDevice->executionEnvironment; MockMemoryManager memoryManager(*executionEnvironment); if (AubAllocDump::isBufferDumpFormat(dumpFormat)) { auto bufferAllocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, bufferAllocation); bufferAllocation->setAllocationType(isCompressed ? GraphicsAllocation::AllocationType::BUFFER_COMPRESSED : GraphicsAllocation::AllocationType::BUFFER); std::unique_ptr surfaceInfo(AubAllocDump::getDumpSurfaceInfo(*bufferAllocation, dumpFormat)); if (nullptr != surfaceInfo) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; EXPECT_EQ(GmmHelper::decanonize(bufferAllocation->getGpuAddress()), surfaceInfo->address); EXPECT_EQ(static_cast(bufferAllocation->getUnderlyingBufferSize()), surfaceInfo->width); EXPECT_EQ(1u, surfaceInfo->height); EXPECT_EQ(static_cast(bufferAllocation->getUnderlyingBufferSize()), surfaceInfo->pitch); EXPECT_EQ(SURFACE_FORMAT::SURFACE_FORMAT_RAW, surfaceInfo->format); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER, surfaceInfo->surftype); EXPECT_EQ(RENDER_SURFACE_STATE::TILE_MODE_LINEAR, surfaceInfo->tilingType); EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED == bufferAllocation->getAllocationType(), surfaceInfo->compressed); EXPECT_EQ((AubAllocDump::DumpFormat::BUFFER_TRE == dumpFormat) ? aub_stream::dumpType::tre : aub_stream::dumpType::bin, surfaceInfo->dumpType); } memoryManager.freeGraphicsMemory(bufferAllocation); } if (AubAllocDump::isImageDumpFormat(dumpFormat)) { cl_image_desc imgDesc = {}; imgDesc.image_width = 512; imgDesc.image_height = 1; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); MockGmm::queryImgParams(pDevice->getGmmClientContext(), imgInfo); MockMemoryManager::AllocationData allocationData; allocationData.imgInfo = &imgInfo; auto imageAllocation = memoryManager.allocateGraphicsMemoryForImage(allocationData); ASSERT_NE(nullptr, imageAllocation); auto gmm = imageAllocation->getDefaultGmm(); gmm->isRenderCompressed = isCompressed; std::unique_ptr surfaceInfo(AubAllocDump::getDumpSurfaceInfo(*imageAllocation, dumpFormat)); if (nullptr != surfaceInfo) { EXPECT_EQ(GmmHelper::decanonize(imageAllocation->getGpuAddress()), surfaceInfo->address); EXPECT_EQ(static_cast(gmm->gmmResourceInfo->getBaseWidth()), surfaceInfo->width); EXPECT_EQ(static_cast(gmm->gmmResourceInfo->getBaseHeight()), surfaceInfo->height); EXPECT_EQ(static_cast(gmm->gmmResourceInfo->getRenderPitch()), surfaceInfo->pitch); EXPECT_EQ(static_cast(gmm->gmmResourceInfo->getResourceFormatSurfaceState()), surfaceInfo->format); EXPECT_EQ(AubAllocDump::getImageSurfaceTypeFromGmmResourceType(gmm->gmmResourceInfo->getResourceType()), surfaceInfo->surftype); EXPECT_EQ(gmm->gmmResourceInfo->getTileModeSurfaceState(), surfaceInfo->tilingType); EXPECT_EQ(gmm->isRenderCompressed, surfaceInfo->compressed); EXPECT_EQ((AubAllocDump::DumpFormat::IMAGE_TRE == dumpFormat) ? aub_stream::dumpType::tre : aub_stream::dumpType::bmp, surfaceInfo->dumpType); } memoryManager.freeGraphicsMemory(imageAllocation); } } INSTANTIATE_TEST_CASE_P(GetDumpSurfaceTest, AubSurfaceDumpTests, ::testing::Combine( ::testing::Bool(), // isCompressed ::testing::Values( // dumpFormat AubAllocDump::DumpFormat::NONE, AubAllocDump::DumpFormat::BUFFER_BIN, AubAllocDump::DumpFormat::BUFFER_TRE, AubAllocDump::DumpFormat::IMAGE_BMP, AubAllocDump::DumpFormat::IMAGE_TRE))); compute-runtime-20.13.16352/opencl/test/unit_test/aub_mem_dump/lrca_helper_tests.cpp000066400000000000000000000013651363734646600304770ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/aub_mem_dump/aub_mem_dump.h" #include "test.h" #include using namespace AubMemDump; TEST(LrcaHelper, WhenLrcaHelperIsInitializedThenLrcaIncludesDebugModeLri) { LrcaHelper helper(0x2000); auto lrcaBufferSize = helper.sizeLRCA / sizeof(uint32_t); auto lrca = std::unique_ptr(new uint32_t[lrcaBufferSize]); helper.initialize(lrca.get()); bool debugModeLriFound = false; for (uint32_t i = 0; i < lrcaBufferSize; i += 2) { if (lrca[i] == 0x20d8 && lrca[i + 1] == 0x00200020) { debugModeLriFound = true; break; } } EXPECT_TRUE(debugModeLriFound); } compute-runtime-20.13.16352/opencl/test/unit_test/aub_stream_mocks/000077500000000000000000000000001363734646600251505ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_stream_mocks/aub_stream_interface_mock.cpp000066400000000000000000000007671363734646600330410ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_aub_manager.h" namespace NEO { aub_stream::AubManager *createAubManager(uint32_t productFamily, uint32_t devicesCount, uint64_t memoryBankSize, bool localMemorySupported, uint32_t streamMode, uint64_t gpuAddressSpace) { return new MockAubManager(productFamily, devicesCount, memoryBankSize, localMemorySupported, streamMode, gpuAddressSpace); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/000077500000000000000000000000001363734646600236235ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/CMakeLists.txt000066400000000000000000000055301363734646600263660ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # project(igdrcl_aub_tests) set(OPENCL_AUB_TEST_DIR ${CMAKE_CURRENT_SOURCE_DIR}) list(APPEND IGDRCL_AUB_TESTS__TARGET_OBJECTS ${CMAKE_CURRENT_SOURCE_DIR}/aub_tests_configuration.cpp ${NEO_SOURCE_DIR}/opencl/source/aub/aub_stream_interface.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/libult/os_interface.cpp $ $ $ $ $ ) if(DEFINED AUB_STREAM_DIR) list(APPEND IGDRCL_AUB_TESTS__TARGET_OBJECTS $) endif() add_executable(igdrcl_aub_tests ${IGDRCL_AUB_TESTS__TARGET_OBJECTS} ) set_target_properties(igdrcl_aub_tests PROPERTIES FOLDER ${OPENCL_TEST_PROJECTS_FOLDER}) if(WIN32) set_target_properties(igdrcl_aub_tests PROPERTIES VS_DEBUGGER_WORKING_DIRECTORY ${TargetDir} VS_DEBUGGER_COMMAND_ARGUMENTS " --disable_pagefaulting_tests" ) endif() add_custom_target(run_aub_tests) set_target_properties(run_aub_tests PROPERTIES FOLDER ${OPENCL_TEST_PROJECTS_FOLDER}) if(WIN32) target_include_directories(igdrcl_aub_tests PRIVATE ${NEO_SOURCE_DIR}/opencl/test/unit_test/mocks${BRANCH_DIR_SUFFIX} ) endif() target_include_directories(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) target_sources(igdrcl_aub_tests PRIVATE ${NEO_SHARED_TEST_DIRECTORY}/unit_test/page_fault_manager/default_asan_options.cpp ${NEO_SHARED_DIRECTORY}/gmm_helper/resource_info.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_mode.h ) if(WIN32) target_sources(igdrcl_aub_tests PRIVATE ${NEO_SHARED_DIRECTORY}/os_interface/windows/gmm_interface_win.cpp ${NEO_SHARED_DIRECTORY}/gmm_helper/windows/gmm_memory.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/os_interface/windows/wddm_create.cpp ) else() target_sources(igdrcl_aub_tests PRIVATE ${NEO_SHARED_DIRECTORY}/os_interface/linux/gmm_interface_linux.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/os_interface/linux/drm_neo_create.cpp ) endif() copy_gmm_dll_for(igdrcl_aub_tests) add_subdirectories() target_link_libraries(igdrcl_aub_tests igdrcl_mocks) target_link_libraries(igdrcl_aub_tests ${NEO_STATICALLY_LINKED_LIBRARIES_MOCKABLE}) target_link_libraries(igdrcl_aub_tests igdrcl_mocks) target_link_libraries(igdrcl_aub_tests gmock-gtest ${IGDRCL_EXTRA_LIBS}) if(UNIX) target_link_libraries(igdrcl_aub_tests ${GMM_LINK_NAME}) else() add_dependencies(igdrcl_aub_tests ${GMM_TARGET_NAME}) target_sources(igdrcl_aub_tests PRIVATE ${NEO_SHARED_DIRECTORY}/os_interface/windows/gmm_interface_win.cpp ) endif() target_include_directories(igdrcl_aub_tests BEFORE PRIVATE ${NEO_SHARED_TEST_DIRECTORY}/unit_test/test_macros${BRANCH_DIR_SUFFIX}) create_project_source_tree(igdrcl_aub_tests) compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/aub_tests_configuration.cpp000066400000000000000000000005211363734646600312450ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test_mode.h" namespace NEO { // max time per single test iteration unsigned int ultIterationMaxTime = 180; bool useMockGmm = false; const char *executionDirectorySuffix = "_aub"; TestMode testMode = defaultTestMode; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/aub_tests_configuration.h000066400000000000000000000003461363734646600307170ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once struct AubTestsConfig { bool testCanonicalAddress; }; template AubTestsConfig GetAubTestsConfig(); compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/aub_tests_configuration.inl000066400000000000000000000005401363734646600312460ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/aub_tests/aub_tests_configuration.h" template AubTestsConfig GetAubTestsConfig() { AubTestsConfig aubTestsConfig; aubTestsConfig.testCanonicalAddress = true; return aubTestsConfig; }compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/cmake/000077500000000000000000000000001363734646600247035ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/cmake/run_aub_test_target.cmake000066400000000000000000000043661363734646600317560ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # string(REPLACE "/" ";" aub_test_config ${aub_test_config}) list(GET aub_test_config 0 product) list(GET aub_test_config 1 slices) list(GET aub_test_config 2 subslices) list(GET aub_test_config 3 eu_per_ss) add_custom_target(run_${product}_aub_tests ALL DEPENDS copy_test_files_${product} prepare_test_kernels) add_dependencies(run_aub_tests run_${product}_aub_tests) set_target_properties(run_${product}_aub_tests PROPERTIES FOLDER "${PLATFORM_SPECIFIC_TEST_TARGETS_FOLDER}/${product}") if(WIN32) add_dependencies(run_${product}_aub_tests mock_gdi) endif() set(aub_tests_options "") if (NOT ${AUB_DUMP_BUFFER_FORMAT} STREQUAL "") list(APPEND aub_tests_options --dump_buffer_format) list(APPEND aub_tests_options ${AUB_DUMP_BUFFER_FORMAT}) endif() if (NOT ${AUB_DUMP_IMAGE_FORMAT} STREQUAL "") list(APPEND aub_tests_options --dump_image_format) list(APPEND aub_tests_options ${AUB_DUMP_IMAGE_FORMAT}) endif() add_custom_command( TARGET run_${product}_aub_tests POST_BUILD COMMAND WORKING_DIRECTORY ${TargetDir} COMMAND echo re-creating working directory for ${product} AUBs generation... COMMAND ${CMAKE_COMMAND} -E remove_directory ${TargetDir}/${product}_aub COMMAND ${CMAKE_COMMAND} -E make_directory ${TargetDir}/${product}_aub COMMAND ${CMAKE_COMMAND} -E make_directory ${TargetDir}/${product}_aub/aub_out COMMAND ${CMAKE_COMMAND} -E make_directory ${TargetDir}/${product}_aub/cl_cache ) if(WIN32 OR NOT DEFINED NEO__GMM_LIBRARY_PATH) set(aub_test_cmd_prefix $) else() set(aub_test_cmd_prefix LD_LIBRARY_PATH=${NEO__GMM_LIBRARY_PATH} IGDRCL_TEST_SELF_EXEC=off $) endif() add_custom_command( TARGET run_${product}_aub_tests POST_BUILD COMMAND WORKING_DIRECTORY ${TargetDir} COMMAND echo Running AUB generation for ${product} in ${TargetDir}/${product}_aub COMMAND ${aub_test_cmd_prefix} --product ${product} --slices ${slices} --subslices ${subslices} --eu_per_ss ${eu_per_ss} --gtest_repeat=1 ${aub_tests_options} ${IGDRCL_TESTS_LISTENER_OPTION} ) if(DO_NOT_RUN_AUB_TESTS) set_target_properties(run_${product}_aub_tests PROPERTIES EXCLUDE_FROM_DEFAULT_BUILD TRUE EXCLUDE_FROM_ALL TRUE ) endif() compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_queue/000077500000000000000000000000001363734646600264455ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_queue/CMakeLists.txt000066400000000000000000000027471363734646600312170ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/command_enqueue_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_buffer_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_buffer_rect_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_image_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fill_buffer_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fill_image_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_map_buffer_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_map_image_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_rect_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_image_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_write_image_aub_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_verify_memory_buffer_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_verify_memory_image_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_buffer_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_buffer_rect_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_copy_read_buffer_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_copy_read_buffer_aub_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_image_aub_tests.cpp ) add_subdirectories() compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h000066400000000000000000000020721363734646600335320ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_stream/command_stream_fixture.h" #include "opencl/test/unit_test/fixtures/built_in_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/helpers/hw_parse.h" namespace NEO { struct CommandEnqueueAUBFixture : public CommandEnqueueBaseFixture, public AUBCommandStreamFixture { using AUBCommandStreamFixture::SetUp; void SetUp() override { CommandEnqueueBaseFixture::SetUp(cl_command_queue_properties(0)); AUBCommandStreamFixture::SetUp(pCmdQ); } void TearDown() override { AUBCommandStreamFixture::TearDown(); CommandEnqueueBaseFixture::TearDown(); } }; } // namespace NEO enqueue_copy_buffer_aub_tests.cpp000066400000000000000000000066761363734646600352140ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/ptr_math.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" using namespace NEO; struct CopyBufferHw : public CommandEnqueueAUBFixture, public ::testing::WithParamInterface>, public ::testing::Test { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } }; typedef CopyBufferHw AUBCopyBuffer; HWTEST_P(AUBCopyBuffer, simple) { MockContext context(pCmdQ->getDevice().getSpecializedDevice()); cl_float srcMemory[] = {1.0f, 2.0f, 3.0f, 4.0f}; cl_float dstMemory[] = {0.0f, 0.0f, 0.0f, 0.0f}; auto retVal = CL_INVALID_VALUE; auto srcBuffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, sizeof(srcMemory), srcMemory, retVal); ASSERT_NE(nullptr, srcBuffer); auto dstBuffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, sizeof(dstMemory), dstMemory, retVal); ASSERT_NE(nullptr, dstBuffer); auto pSrcMemory = &srcMemory[0]; size_t srcOffset = std::get<0>(GetParam()); size_t dstOffset = std::get<1>(GetParam()); size_t sizeCopied = sizeof(cl_float); cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; auto pDstMemory = (cl_float *)(dstBuffer->getGraphicsAllocation()->getGpuAddress()); retVal = pCmdQ->enqueueCopyBuffer( srcBuffer, dstBuffer, srcOffset, dstOffset, sizeCopied, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); pSrcMemory = ptrOffset(pSrcMemory, srcOffset); pDstMemory = ptrOffset(pDstMemory, dstOffset); // Compute our memory expecations based on kernel execution size_t sizeUserMemory = sizeof(dstMemory); AUBCommandStreamFixture::expectMemory(pDstMemory, pSrcMemory, sizeCopied); // If the copykernel wasn't max sized, ensure we didn't overwrite existing memory if (dstOffset + sizeCopied < sizeUserMemory) { pDstMemory = ptrOffset(pDstMemory, sizeCopied); float *dstMemoryRef = ptrOffset(dstMemory, sizeCopied); size_t sizeRemaining = sizeUserMemory - sizeCopied - dstOffset; AUBCommandStreamFixture::expectMemory(pDstMemory, dstMemoryRef, sizeRemaining); } delete srcBuffer; delete dstBuffer; } INSTANTIATE_TEST_CASE_P(AUBCopyBuffer_simple, AUBCopyBuffer, ::testing::Combine( ::testing::Values( // srcOffset 0 * sizeof(cl_float), 1 * sizeof(cl_float), 2 * sizeof(cl_float), 3 * sizeof(cl_float)), ::testing::Values( // dstOffset 0 * sizeof(cl_float), 1 * sizeof(cl_float), 2 * sizeof(cl_float), 3 * sizeof(cl_float)))); enqueue_copy_buffer_rect_aub_tests.cpp000066400000000000000000000105621363734646600362160ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/ptr_math.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" #include using namespace NEO; struct CopyBufferRectHw : public CommandEnqueueAUBFixture, public ::testing::TestWithParam> { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); std::tie(srcOrigin0, srcOrigin1, srcOrigin2, dstOrigin0, dstOrigin1, dstOrigin2, copy3D) = GetParam(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } size_t srcOrigin0; size_t srcOrigin1; size_t srcOrigin2; size_t dstOrigin0; size_t dstOrigin1; size_t dstOrigin2; bool copy3D; }; typedef CopyBufferRectHw AUBCopyBufferRect; HWTEST_P(AUBCopyBufferRect, simple) { //3D UINT8 buffer 20x20x20 static const size_t rowPitch = 20; static const size_t slicePitch = rowPitch * rowPitch; static const size_t elementCount = slicePitch * rowPitch; MockContext context(this->pClDevice); cl_uchar *srcMemory = new uint8_t[elementCount + 8]; cl_uchar *dstMemory = new uint8_t[elementCount + 8]; for (size_t i = 0; i < elementCount; i++) { srcMemory[i] = static_cast(i + 1); dstMemory[i] = 0; } auto retVal = CL_INVALID_VALUE; auto srcBuffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, elementCount * sizeof(uint8_t), srcMemory, retVal); ASSERT_NE(nullptr, srcBuffer); auto dstBuffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, elementCount * sizeof(uint8_t), dstMemory, retVal); ASSERT_NE(nullptr, dstBuffer); auto pSrcMemory = &srcMemory[0]; auto pDestMemory = (cl_uchar *)(dstBuffer->getGraphicsAllocation()->getGpuAddress()); size_t regionX = std::min(rowPitch / 2, rowPitch - std::max(srcOrigin0, dstOrigin0)); size_t regionY = std::min(rowPitch / 2, rowPitch - std::max(srcOrigin1, dstOrigin1)); size_t regionZ = copy3D ? std::min(rowPitch / 2, rowPitch - std::max(srcOrigin2, dstOrigin2)) : 1; size_t srcOrigin[] = {srcOrigin0, srcOrigin1, srcOrigin2}; size_t dstOrigin[] = {dstOrigin0, dstOrigin1, dstOrigin2}; size_t region[] = {regionX, regionY, regionZ}; retVal = pCmdQ->enqueueCopyBufferRect( srcBuffer, dstBuffer, srcOrigin, dstOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); // Verify Output, line by line uint8_t src[rowPitch * slicePitch]; memset(src, 0, sizeof(src)); auto tDst = pDestMemory; auto tSrc = ptrOffset(pSrcMemory, srcOrigin[0] + srcOrigin[1] * rowPitch + srcOrigin[2] * slicePitch); auto tRef = ptrOffset(src, dstOrigin[0] + dstOrigin[1] * rowPitch + dstOrigin[2] * slicePitch); for (unsigned int z = 0; z < regionZ; z++) { auto pDst = tDst; auto pSrc = tSrc; auto pRef = tRef; for (unsigned int y = 0; y < regionY; y++) { memcpy(pRef, pSrc, region[0]); pDst += rowPitch; pSrc += rowPitch; pRef += rowPitch; } tDst += slicePitch; tSrc += slicePitch; tRef += slicePitch; } AUBCommandStreamFixture::expectMemory(pDestMemory, src, rowPitch * slicePitch); delete srcBuffer; delete dstBuffer; delete[] srcMemory; delete[] dstMemory; } static size_t zero[] = {0}; INSTANTIATE_TEST_CASE_P(AUBCopyBufferRect, AUBCopyBufferRect, ::testing::Combine( ::testing::Values(0, 3), //srcOrigin ::testing::ValuesIn(zero), ::testing::Values(0, 7), ::testing::Values(0, 3), //dstPrigin ::testing::ValuesIn(zero), ::testing::Values(0, 7), ::testing::Values(true, false))); enqueue_copy_image_aub_tests.cpp000066400000000000000000000146721363734646600350200ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/ptr_math.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" using namespace NEO; struct AUBCopyImage : public CommandDeviceFixture, public AUBCommandStreamFixture, public ::testing::WithParamInterface>, public ::testing::Test { using AUBCommandStreamFixture::SetUp; typedef AUBCommandStreamFixture CommandStreamFixture; void SetUp() override { if (!defaultHwInfo->capabilityTable.supportsImages) { GTEST_SKIP(); } CommandDeviceFixture::SetUp(cl_command_queue_properties(0)); CommandStreamFixture::SetUp(pCmdQ); context = std::make_unique(pClDevice); } void TearDown() override { srcImage.reset(); dstImage.reset(); context.reset(); CommandStreamFixture::TearDown(); CommandDeviceFixture::TearDown(); } std::unique_ptr context; std::unique_ptr srcImage; std::unique_ptr dstImage; }; HWTEST_P(AUBCopyImage, simple) { const size_t testImageDimensions = 4; cl_float srcMemory[testImageDimensions * testImageDimensions] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 0.5f, 1.5f, 2.5f, 3.5f, 4.5f, 5.5f, 6.5f}; cl_float origValue = -1.0f; cl_float dstMemory[testImageDimensions * testImageDimensions] = { origValue, origValue, origValue, origValue, origValue, origValue, origValue, origValue, origValue, origValue, origValue, origValue, origValue, origValue, origValue, origValue}; cl_image_format imageFormat; cl_image_desc imageDesc; cl_mem_flags flags = CL_MEM_COPY_HOST_PTR; // clang-format off imageFormat.image_channel_data_type = CL_FLOAT; imageFormat.image_channel_order = CL_R; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = testImageDimensions; imageDesc.image_height = testImageDimensions; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // clang-format on auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, pClDevice->getHardwareInfo().capabilityTable.clVersionSupport); auto retVal = CL_INVALID_VALUE; srcImage.reset(Image::create( context.get(), MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, srcMemory, retVal)); ASSERT_NE(nullptr, srcImage.get()); dstImage.reset(Image::create( context.get(), MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, dstMemory, retVal)); ASSERT_NE(nullptr, dstImage.get()); size_t srcOffset = std::get<0>(GetParam()); size_t dstOffset = std::get<1>(GetParam()); size_t srcOrigin[3] = {srcOffset, srcOffset, 0}; size_t dstOrigin[3] = {dstOffset, dstOffset, 0}; // Only draw 1/4 of the original image const size_t region[3] = { testImageDimensions / 2, testImageDimensions / 2, 1}; retVal = pCmdQ->enqueueCopyImage( srcImage.get(), dstImage.get(), srcOrigin, dstOrigin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto dstOutMemory = new cl_float[dstImage->getSize()]; size_t imgOrigin[] = {0, 0, 0}; size_t imgRegion[] = {imageDesc.image_width, imageDesc.image_height, imageDesc.image_depth}; retVal = pCmdQ->enqueueReadImage(dstImage.get(), CL_FALSE, imgOrigin, imgRegion, 0, 0, dstOutMemory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->flush(); EXPECT_EQ(CL_SUCCESS, retVal); // Offset the source memory auto pSrcMemory = ptrOffset(srcMemory, (srcOffset * testImageDimensions + srcOffset) * sizeof(origValue)); // Since the driver allocated his own memory, we need to use that for verification auto pDstMemory = static_cast(dstOutMemory); if (dstOffset > 0) { // Add expectations for rows that should be unmodified AUBCommandStreamFixture::expectMemory(pDstMemory, dstMemory, dstOffset * testImageDimensions * sizeof(origValue)); pDstMemory = ptrOffset(pDstMemory, dstOffset * testImageDimensions * sizeof(origValue)); } for (size_t row = 0; row < region[1]; ++row) { if (dstOffset > 0) { size_t length = dstOffset * sizeof(origValue); AUBCommandStreamFixture::expectMemory(pDstMemory, dstMemory, length); pDstMemory = ptrOffset(pDstMemory, length); } size_t length = region[0] * sizeof(origValue); AUBCommandStreamFixture::expectMemory(pDstMemory, pSrcMemory, length); pDstMemory = ptrOffset(pDstMemory, length); length = (testImageDimensions - region[0] - dstOffset) * sizeof(origValue); AUBCommandStreamFixture::expectMemory(pDstMemory, dstMemory, length); pDstMemory = ptrOffset(pDstMemory, length); pSrcMemory = ptrOffset(pSrcMemory, testImageDimensions * sizeof(origValue)); } size_t remainingRows = testImageDimensions - region[1] - dstOffset; while (remainingRows > 0) { size_t length = testImageDimensions * sizeof(origValue); AUBCommandStreamFixture::expectMemory(pDstMemory, dstMemory, length); pDstMemory = ptrOffset(pDstMemory, length); --remainingRows; } delete[] dstOutMemory; } INSTANTIATE_TEST_CASE_P(AUBCopyImage_simple, AUBCopyImage, ::testing::Combine( ::testing::Values( // srcOffset 0u, 1u, 2u), ::testing::Values( // dstOffset 0u, 1u, 2u))); enqueue_fill_buffer_aub_tests.cpp000066400000000000000000000116051363734646600351540ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/ptr_math.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" using namespace NEO; struct FillBufferHw : public CommandEnqueueAUBFixture, public ::testing::WithParamInterface, public ::testing::Test { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } }; typedef FillBufferHw AUBFillBuffer; HWTEST_P(AUBFillBuffer, simple) { cl_float destMemory[] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}; auto pDestMemory = &destMemory[0]; MockContext context(this->pCmdQ->getDevice().getSpecializedDevice()); auto retVal = CL_INVALID_VALUE; auto destBuffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, sizeof(destMemory), pDestMemory, retVal); ASSERT_NE(nullptr, destBuffer); float pattern[] = {1.0f}; size_t patternSize = sizeof(pattern); size_t offset = GetParam(); size_t size = 2 * patternSize; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; retVal = pCmdQ->enqueueFillBuffer( destBuffer, pattern, patternSize, offset, size, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); pDestMemory = reinterpret_cast((destBuffer->getGraphicsAllocation()->getGpuAddress())); // The memory under offset should be untouched if (offset) { cl_float *destMemoryRef = ptrOffset(&destMemory[0], offset); AUBCommandStreamFixture::expectMemory(pDestMemory, destMemoryRef, offset); pDestMemory = ptrOffset(pDestMemory, offset); } // Compute our memory expecations based on kernel execution auto pEndMemory = ptrOffset(pDestMemory, size); while (pDestMemory < pEndMemory) { AUBCommandStreamFixture::expectMemory(pDestMemory, pattern, patternSize); pDestMemory = ptrOffset(pDestMemory, patternSize); } // If the copykernel wasn't max sized, ensure we didn't overwrite existing memory size_t sizeUserMemory = sizeof(destMemory); if (offset + size < sizeUserMemory) { size_t sizeRemaining = sizeUserMemory - size - offset; cl_float *destMemoryRef = ptrOffset(&destMemory[0], offset + size); AUBCommandStreamFixture::expectMemory(pDestMemory, destMemoryRef, sizeRemaining); } delete destBuffer; } HWTEST_F(AUBFillBuffer, givenFillBufferWhenSeveralSubmissionsWithoutPollForCompletionBetweenThenTestConcurrentCS) { DebugManagerStateRestore dbgRestorer; cl_float destMemory[] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}; auto pDestMemory = &destMemory[0]; MockContext context(this->pCmdQ->getDevice().getSpecializedDevice()); auto retVal = CL_INVALID_VALUE; std::unique_ptr destBuffer(Buffer::create( &context, CL_MEM_USE_HOST_PTR, sizeof(destMemory), pDestMemory, retVal)); ASSERT_NE(nullptr, destBuffer); float pattern[] = {1.0f}; size_t patternSize = sizeof(pattern); size_t offset = 0; size_t size = 2 * patternSize; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; uint32_t numWrites = 4; for (uint32_t id = 0; id < numWrites; id++) { offset = id * size; retVal = pCmdQ->enqueueFillBuffer( destBuffer.get(), pattern, patternSize, offset, size, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); } AUBCommandStreamFixture::pollForCompletion(); pDestMemory = reinterpret_cast((destBuffer->getGraphicsAllocation()->getGpuAddress())); auto pEndMemory = ptrOffset(pDestMemory, numWrites * size); while (pDestMemory < pEndMemory) { AUBCommandStreamFixture::expectMemory(pDestMemory, pattern, patternSize); pDestMemory = ptrOffset(pDestMemory, patternSize); } } INSTANTIATE_TEST_CASE_P(AUBFillBuffer_simple, AUBFillBuffer, ::testing::Values( 0 * sizeof(cl_float), 1 * sizeof(cl_float), 2 * sizeof(cl_float), 3 * sizeof(cl_float))); enqueue_fill_image_aub_tests.cpp000066400000000000000000000223271363734646600347700ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" #include using namespace NEO; // clang-format off struct FillImageParams { cl_mem_object_type imageType; size_t offsets[3]; } imageParams[] = { {CL_MEM_OBJECT_IMAGE1D, { 0u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE1D, { 1u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE2D, { 0u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE2D, { 1u, 2u, 0u}}, {CL_MEM_OBJECT_IMAGE3D, { 0u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE3D, { 1u, 2u, 3u}} }; static const uint32_t fillValues[4] = { 0x3f800000, 0x00000000, 0x3f555555, 0x3f2aaaaa }; static const uint16_t expectedHALF_FLOAT[4] = {0x3c00, 0x0000, 0x3aaa, 0x3955}; static const uint16_t expectedUNORM16[4] = {0xffff, 0x0000, 0xd554, 0xaaa9}; static const uint8_t expectedUNORM8[4] = { 0xff, 0x00, 0xd4, 0xa9}; //The distance between sRGB values and the expected values should not be greater than 0.6f //In this test, for simplicity purposes, we are checking if the distance is 0 static const uint8_t expectedUNORM8sRGB[4] = { 0xff, 0x00, 0xeb, 0xa9}; static const uint8_t expectedUNORM8sBGR[4] = { 0xeb, 0x00, 0xff, 0xa9}; static const uint16_t expectedSNORM16[4] = {0x7fff, 0x0000, 0x6AA9, 0x5554}; static const uint8_t expectedSNORM8[4] = { 0x7f, 0x00, 0x69, 0x54}; static auto expectedSINT32 = fillValues; static uint16_t expectedSINT16[4] = { 0x0000, 0x0000, 0x5555, 0xaaaa }; static uint8_t expectedSINT8[4] = { 0x00, 0x00, 0x55, 0xaa }; static auto expectedUINT32 = fillValues; static uint16_t expectedUINT16[4] = { 0x0000, 0x0000, 0x5555, 0xaaaa }; static uint8_t expectedUINT8[4] = { 0x00, 0x00, 0x55, 0xaa }; static auto expectedFLOAT = fillValues; // ChannelTypes/FillValues for test struct FillChannelType { cl_channel_type type; const void *expectedValues; } fillChannelTypes[] = { {CL_SNORM_INT8, expectedSNORM8}, {CL_SNORM_INT16, expectedSNORM16}, {CL_UNORM_INT8, expectedUNORM8}, {CL_UNORM_INT16, expectedUNORM16}, {CL_SIGNED_INT8, expectedSINT8}, {CL_SIGNED_INT16, expectedSINT16}, {CL_SIGNED_INT32, expectedSINT32}, {CL_UNSIGNED_INT8, expectedUINT8}, {CL_UNSIGNED_INT16, expectedUINT16}, {CL_UNSIGNED_INT32, expectedUINT32}, {CL_HALF_FLOAT, expectedHALF_FLOAT}, {CL_FLOAT, expectedFLOAT}}; // clang-format on struct AubFillImage : public CommandDeviceFixture, public AUBCommandStreamFixture, public ::testing::WithParamInterface>, public ::testing::Test { using AUBCommandStreamFixture::SetUp; typedef AUBCommandStreamFixture CommandStreamFixture; void SetUp() override { if (!(defaultHwInfo->capabilityTable.supportsImages)) { GTEST_SKIP(); } auto dataType = std::get<0>(GetParam()).type; auto channelOrder = std::get<1>(GetParam()); if (dataType != CL_UNORM_INT8 && (channelOrder == CL_sRGBA || channelOrder == CL_sBGRA)) { //sRGBA and sBGRA support only unorm int8 type GTEST_SKIP(); } CommandDeviceFixture::SetUp(cl_command_queue_properties(0)); CommandStreamFixture::SetUp(pCmdQ); context = std::make_unique(pClDevice); if ((pClDevice->getHardwareInfo().capabilityTable.clVersionSupport < 20) && (channelOrder == CL_sRGBA || channelOrder == CL_sBGRA)) { GTEST_SKIP(); } } void TearDown() override { image.reset(); context.reset(); CommandStreamFixture::TearDown(); CommandDeviceFixture::TearDown(); } std::unique_ptr context; std::unique_ptr image; }; HWTEST_P(AubFillImage, simple) { const unsigned int testWidth = 5; const unsigned int testHeight = std::get<2>(GetParam()).imageType != CL_MEM_OBJECT_IMAGE1D ? 5 : 1; const unsigned int testDepth = std::get<2>(GetParam()).imageType == CL_MEM_OBJECT_IMAGE3D ? 5 : 1; cl_image_format imageFormat; cl_image_desc imageDesc; // clang-format off imageFormat.image_channel_data_type = std::get<0>(GetParam()).type; imageFormat.image_channel_order = std::get<1>(GetParam()); imageDesc.image_type = std::get<2>(GetParam()).imageType; imageDesc.image_width = testWidth; imageDesc.image_height = testHeight; imageDesc.image_depth = testDepth; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // clang-format on auto perChannelDataSize = 0u; switch (imageFormat.image_channel_data_type) { default: case CL_SIGNED_INT8: case CL_SNORM_INT8: case CL_UNORM_INT8: case CL_UNSIGNED_INT8: perChannelDataSize = 1; break; case CL_HALF_FLOAT: case CL_SIGNED_INT16: case CL_SNORM_INT16: case CL_UNORM_INT16: case CL_UNSIGNED_INT16: perChannelDataSize = 2; break; case CL_SIGNED_INT32: case CL_UNSIGNED_INT32: case CL_FLOAT: perChannelDataSize = 4; break; } auto numChannels = 0u; switch (imageFormat.image_channel_order) { default: case CL_R: numChannels = 1; break; case CL_RG: numChannels = 2; break; case CL_RGBA: case CL_sRGBA: case CL_sBGRA: numChannels = 4; break; } size_t elementSize = perChannelDataSize * numChannels; auto retVal = CL_INVALID_VALUE; cl_mem_flags flags = CL_MEM_READ_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); image.reset(Image::create( context.get(), MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, image.get()); auto sizeMemory = image->getSize(); ASSERT_GT(sizeMemory, 0u); auto srcMemory = new uint8_t[elementSize]; memset(srcMemory, 0xAB, elementSize); memset(image->getCpuAddress(), 0xAB, sizeMemory); auto origin = std::get<2>(GetParam()).offsets; const size_t region[3] = { std::max(testWidth / 2, 1u), std::max(testHeight / 2, 1u), std::max(testDepth / 2, 1u)}; retVal = pCmdQ->enqueueFillImage( image.get(), fillValues, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); size_t imgOrigin[] = {0, 0, 0}; size_t imgRegion[] = {testWidth, testHeight, testDepth}; auto dstMemory = new uint8_t[sizeMemory]; retVal = pCmdQ->enqueueReadImage(image.get(), CL_FALSE, imgOrigin, imgRegion, 0, 0, dstMemory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->flush(); EXPECT_EQ(CL_SUCCESS, retVal); size_t slicePitch = image->getHostPtrSlicePitch(); size_t rowPitch = image->getHostPtrRowPitch(); auto expected = std::get<0>(GetParam()).expectedValues; if (imageFormat.image_channel_order == CL_sRGBA) { expected = expectedUNORM8sRGB; } if (imageFormat.image_channel_order == CL_sBGRA) { expected = expectedUNORM8sBGR; } auto pImageData = dstMemory; for (size_t z = 0; z < testDepth; ++z) { for (size_t y = 0; y < testHeight; ++y) { for (size_t x = 0; x < testWidth; ++x) { if (z >= origin[2] && z < (origin[2] + region[2]) && y >= origin[1] && y < (origin[1] + region[1]) && x >= origin[0] && x < (origin[0] + region[0])) { AUBCommandStreamFixture::expectMemory(&pImageData[x * elementSize], expected, elementSize); } else { AUBCommandStreamFixture::expectMemory(&pImageData[x * elementSize], srcMemory, elementSize); } } pImageData = ptrOffset(pImageData, rowPitch); } pImageData = ptrOffset(pImageData, slicePitch - (rowPitch * (testHeight > 0 ? testHeight : 1))); } delete[] dstMemory; delete[] srcMemory; } INSTANTIATE_TEST_CASE_P(AubFillImage_simple, AubFillImage, ::testing::Combine( ::testing::ValuesIn(fillChannelTypes), ::testing::Values( // channels CL_R, CL_RG, CL_RGBA, CL_sRGBA, CL_sBGRA), ::testing::ValuesIn(imageParams))); enqueue_kernel_aub_tests.cpp000066400000000000000000000755631363734646600341720ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/event/event.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h" #include "opencl/test/unit_test/aub_tests/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/fixtures/simple_arg_fixture.h" #include "opencl/test/unit_test/fixtures/two_walker_fixture.h" #include "opencl/test/unit_test/gen_common/gen_cmd_parse.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "test.h" using namespace NEO; extern const HardwareInfo *defaultHwInfo; struct TestParam { cl_uint globalWorkSizeX; cl_uint globalWorkSizeY; cl_uint globalWorkSizeZ; cl_uint localWorkSizeX; cl_uint localWorkSizeY; cl_uint localWorkSizeZ; }; static TestParam TestParamTable[] = { {1, 1, 1, 1, 1, 1}, {16, 1, 1, 16, 1, 1}, {32, 1, 1, 16, 1, 1}, {64, 1, 1, 1, 1, 1}, {64, 1, 1, 16, 1, 1}, {64, 1, 1, 64, 1, 1}}; cl_uint TestSimdTable[] = { 8, 16, 32}; namespace ULT { struct AUBHelloWorld : public HelloWorldFixture, public HardwareParse, public ::testing::Test { void SetUp() override { HelloWorldFixture::SetUp(); HardwareParse::SetUp(); } void TearDown() override { HardwareParse::TearDown(); HelloWorldFixture::TearDown(); } }; HWCMDTEST_F(IGFX_GEN8_CORE, AUBHelloWorld, simple) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; // Intentionally mis-align data as we're going to test driver properly aligns commands pDSH->getSpace(sizeof(uint32_t)); auto retVal = pCmdQ->enqueueKernel( pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); parseCommands(*pCmdQ); auto *pWalker = reinterpret_cast(cmdWalker); ASSERT_NE(nullptr, pWalker); auto alignmentIDSA = 32 * sizeof(uint8_t); EXPECT_EQ(0u, pWalker->getIndirectDataStartAddress() % alignmentIDSA); // Check interface descriptor alignment auto pMIDL = reinterpret_cast(cmdMediaInterfaceDescriptorLoad); ASSERT_NE(nullptr, pMIDL); uintptr_t addrIDD = pMIDL->getInterfaceDescriptorDataStartAddress(); auto alignmentIDD = 64 * sizeof(uint8_t); EXPECT_EQ(0u, addrIDD % alignmentIDD); // Check kernel start pointer matches hard-coded kernel. auto pExpectedISA = pKernel->getKernelHeap(); auto expectedSize = pKernel->getKernelHeapSize(); auto pSBA = reinterpret_cast(cmdStateBaseAddress); ASSERT_NE(nullptr, pSBA); auto pISA = pKernel->getKernelInfo().getGraphicsAllocation()->getUnderlyingBuffer(); EXPECT_EQ(0, memcmp(pISA, pExpectedISA, expectedSize)); } struct AUBHelloWorldIntegrateTest : public HelloWorldFixture, public ::testing::TestWithParam> { typedef HelloWorldFixture ParentClass; void SetUp() override { std::tie(KernelFixture::simd, param) = GetParam(); if (KernelFixture::simd < HwHelper::get(NEO::defaultHwInfo->platform.eRenderCoreFamily).getMinimalSIMDSize()) { GTEST_SKIP(); } ParentClass::SetUp(); } void TearDown() override { if (!IsSkipped()) { ParentClass::TearDown(); } } template void writeMemory(GraphicsAllocation *allocation) { AUBCommandStreamReceiverHw *aubCsr = nullptr; if (testMode == TestMode::AubTests) { aubCsr = static_cast *>(pCommandStreamReceiver); } else if (testMode == TestMode::AubTestsWithTbx) { auto tbxWithAubCsr = static_cast> *>(pCommandStreamReceiver); aubCsr = static_cast *>(tbxWithAubCsr->aubCSR.get()); tbxWithAubCsr->writeMemory(*allocation); } aubCsr->writeMemory(*allocation); } TestParam param; }; HWTEST_P(AUBHelloWorldIntegrateTest, simple) { if (this->simd < UnitTestHelper::smallestTestableSimdSize) { GTEST_SKIP(); } cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {param.globalWorkSizeX, param.globalWorkSizeY, param.globalWorkSizeZ}; size_t localWorkSize[3] = {param.localWorkSizeX, param.localWorkSizeY, param.localWorkSizeZ}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; writeMemory(destBuffer->getGraphicsAllocation()); writeMemory(srcBuffer->getGraphicsAllocation()); auto retVal = this->pCmdQ->enqueueKernel( this->pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); // Compute our memory expecations based on kernel execution auto globalWorkItems = globalWorkSize[0] * globalWorkSize[1] * globalWorkSize[2]; auto sizeWritten = globalWorkItems * sizeof(float); auto pDestGpuAddress = reinterpret_cast((destBuffer->getGraphicsAllocation()->getGpuAddress())); AUBCommandStreamFixture::expectMemory(pDestGpuAddress, this->pSrcMemory, sizeWritten); // If the copykernel wasn't max sized, ensure we didn't overwrite existing memory if (sizeWritten < this->sizeUserMemory) { auto sizeRemaining = this->sizeUserMemory - sizeWritten; auto pDestUnwrittenMemory = ptrOffset(pDestGpuAddress, sizeWritten); auto pUnwrittenMemory = ptrOffset(this->pDestMemory, sizeWritten); AUBCommandStreamFixture::expectMemory(pDestUnwrittenMemory, pUnwrittenMemory, sizeRemaining); } } INSTANTIATE_TEST_CASE_P( AUB, AUBHelloWorldIntegrateTest, ::testing::Combine( ::testing::ValuesIn(TestSimdTable), ::testing::ValuesIn(TestParamTable))); struct AUBSimpleArg : public SimpleArgFixture, public HardwareParse, public ::testing::Test { using SimpleArgKernelFixture::SetUp; void SetUp() override { SimpleArgFixture::SetUp(); HardwareParse::SetUp(); } void TearDown() override { HardwareParse::TearDown(); SimpleArgFixture::TearDown(); } }; HWCMDTEST_F(IGFX_GEN8_CORE, AUBSimpleArg, simple) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; // Intentionally mis-align data as we're going to test driver properly aligns commands pDSH->getSpace(sizeof(uint32_t)); auto retVal = pCmdQ->enqueueKernel( pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); pCmdQ->flush(); auto *pWalker = reinterpret_cast(cmdWalker); ASSERT_NE(nullptr, pWalker); auto alignmentIDSA = 32 * sizeof(uint8_t); EXPECT_EQ(0u, pWalker->getIndirectDataStartAddress() % alignmentIDSA); // Check interface descriptor alignment auto pMIDL = reinterpret_cast(cmdMediaInterfaceDescriptorLoad); ASSERT_NE(nullptr, pMIDL); uintptr_t addrIDD = pMIDL->getInterfaceDescriptorDataStartAddress(); auto alignmentIDD = 64 * sizeof(uint8_t); EXPECT_EQ(0u, addrIDD % alignmentIDD); // Check kernel start pointer matches hard-coded kernel. auto pExpectedISA = pKernel->getKernelHeap(); auto expectedSize = pKernel->getKernelHeapSize(); auto pSBA = reinterpret_cast(cmdStateBaseAddress); ASSERT_NE(nullptr, pSBA); auto pISA = pKernel->getKernelInfo().getGraphicsAllocation()->getUnderlyingBuffer(); EXPECT_EQ(0, memcmp(pISA, pExpectedISA, expectedSize)); } HWTEST_F(AUBSimpleArg, givenAubCommandStreamerReceiverWhenBatchBufferFlateningIsForcedThenDumpedAubIsStillValid) { cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; DebugManagerStateRestore dbgRestore; DebugManager.flags.FlattenBatchBufferForAUBDump.set(true); pCmdQ->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::ImmediateDispatch); auto retVal = pCmdQ->enqueueKernel( pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); } struct AUBSimpleArgIntegrateTest : public SimpleArgFixture, public ::testing::TestWithParam> { typedef SimpleArgFixture ParentClass; void SetUp() override { std::tie(simd, param) = GetParam(); if (simd < HwHelper::get(NEO::defaultHwInfo->platform.eRenderCoreFamily).getMinimalSIMDSize()) { GTEST_SKIP(); } ParentClass::SetUp(); } void TearDown() override { if (!IsSkipped()) { ParentClass::TearDown(); } } cl_uint simd; TestParam param; }; HWTEST_P(AUBSimpleArgIntegrateTest, simple) { cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {param.globalWorkSizeX, param.globalWorkSizeY, param.globalWorkSizeZ}; size_t localWorkSize[3] = {param.localWorkSizeX, param.localWorkSizeY, param.localWorkSizeZ}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; auto retVal = this->pCmdQ->enqueueKernel( this->pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); // Compute our memory expecations based on kernel execution size_t globalWorkItems = globalWorkSize[0] * globalWorkSize[1] * globalWorkSize[2]; size_t sizeWritten = globalWorkItems * sizeof(int); AUBCommandStreamFixture::expectMemory(this->pDestMemory, this->pExpectedMemory, sizeWritten); // If the copykernel wasn't max sized, ensure we didn't overwrite existing memory if (sizeWritten < this->sizeUserMemory) { auto sizeRemaining = this->sizeUserMemory - sizeWritten; auto pUnwrittenMemory = ptrOffset(this->pDestMemory, sizeWritten); AUBCommandStreamFixture::expectMemory(pUnwrittenMemory, pUnwrittenMemory, sizeRemaining); } } INSTANTIATE_TEST_CASE_P( AUB, AUBSimpleArgIntegrateTest, ::testing::Combine( ::testing::ValuesIn(TestSimdTable), ::testing::ValuesIn(TestParamTable))); } // namespace ULT struct AUBSimpleArgNonUniformFixture : public KernelAUBFixture { void SetUp() override { if (NEO::defaultHwInfo->capabilityTable.clVersionSupport < 20) { GTEST_SKIP(); } KernelAUBFixture::SetUp(); sizeUserMemory = alignUp(typeItems * typeSize, 64); destMemory = alignedMalloc(sizeUserMemory, 4096); ASSERT_NE(nullptr, destMemory); for (uint32_t i = 0; i < typeItems; i++) { *(static_cast(destMemory) + i) = 0xdeadbeef; } expectedMemory = alignedMalloc(sizeUserMemory, 4096); ASSERT_NE(nullptr, expectedMemory); memset(expectedMemory, 0x0, sizeUserMemory); } void initializeExpectedMemory(size_t globalX, size_t globalY, size_t globalZ) { uint32_t id = 0; size_t testGlobalMax = globalX * globalY * globalZ; ASSERT_GT(typeItems, testGlobalMax); int maxId = static_cast(testGlobalMax); argVal = maxId; kernel->setArg(0, sizeof(int), &argVal); int *expectedData = static_cast(expectedMemory); for (size_t z = 0; z < globalZ; z++) { for (size_t y = 0; y < globalY; y++) { for (size_t x = 0; x < globalX; x++) { *(expectedData + id) = id; ++id; } } } *(static_cast(destMemory) + maxId) = 0; *(expectedData + maxId) = maxId; outBuffer.reset(Buffer::create(context, CL_MEM_COPY_HOST_PTR, alignUp(sizeUserMemory, 4096), destMemory, retVal)); bufferGpuAddress = reinterpret_cast(outBuffer->getGraphicsAllocation()->getGpuAddress()); kernel->setArg(1, outBuffer.get()); sizeWrittenMemory = maxId * typeSize; //add single int size for atomic sum of all work-items sizeWrittenMemory += typeSize; sizeRemainderMemory = sizeUserMemory - sizeWrittenMemory; expectedRemainderMemory = alignedMalloc(sizeRemainderMemory, 4096); ASSERT_NE(nullptr, expectedRemainderMemory); int *expectedReminderData = static_cast(expectedRemainderMemory); size_t reminderElements = sizeRemainderMemory / typeSize; for (size_t i = 0; i < reminderElements; i++) { *(expectedReminderData + i) = 0xdeadbeef; } remainderBufferGpuAddress = ptrOffset(bufferGpuAddress, sizeWrittenMemory); } void TearDown() override { if (NEO::defaultHwInfo->capabilityTable.clVersionSupport < 20) { return; } if (destMemory) { alignedFree(destMemory); destMemory = nullptr; } if (expectedMemory) { alignedFree(expectedMemory); expectedMemory = nullptr; } if (expectedRemainderMemory) { alignedFree(expectedRemainderMemory); expectedRemainderMemory = nullptr; } KernelAUBFixture::TearDown(); } unsigned int deviceClVersionSupport; const size_t typeSize = sizeof(int); const size_t typeItems = 40 * 40 * 40; size_t sizeWrittenMemory = 0; size_t sizeUserMemory; size_t sizeRemainderMemory; int argVal = 0x22222222; void *destMemory = nullptr; void *expectedMemory = nullptr; void *expectedRemainderMemory = nullptr; void *remainderBufferGpuAddress = nullptr; void *bufferGpuAddress = nullptr; std::unique_ptr outBuffer; HardwareParse hwParser; }; using AUBSimpleKernelStatelessTest = Test>; HWTEST_F(AUBSimpleKernelStatelessTest, givenSimpleKernelWhenStatelessPathIsUsedThenExpectCorrectBuffer) { constexpr size_t bufferSize = MemoryConstants::pageSize; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {bufferSize, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; uint8_t bufferData[bufferSize] = {}; uint8_t bufferExpected[bufferSize]; memset(bufferExpected, 0xCD, bufferSize); auto pBuffer = std::unique_ptr(Buffer::create(context, CL_MEM_USE_HOST_PTR | CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL, bufferSize, bufferData, retVal)); ASSERT_NE(nullptr, pBuffer); kernel->setArg(0, pBuffer.get()); retVal = this->pCmdQ->enqueueKernel( kernel.get(), workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_THAT(this->pProgram->getInternalOptions(), testing::HasSubstr(std::string(NEO::CompilerOptions::greaterThan4gbBuffersRequired))); if (this->device->getSharedDeviceInfo().force32BitAddressess) { EXPECT_THAT(this->pProgram->getInternalOptions(), testing::HasSubstr(std::string(NEO::CompilerOptions::arch32bit))); } EXPECT_FALSE(this->kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); EXPECT_TRUE(this->kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); this->pCmdQ->flush(); expectMemory(reinterpret_cast(pBuffer->getGraphicsAllocation()->getGpuAddress()), bufferExpected, bufferSize); } using AUBSimpleArgNonUniformTest = Test; HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork1DimNonUniformGroupThenExpectTwoWalkers) { using WALKER_TYPE = WALKER_TYPE; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {39, 1, 1}; size_t localWorkSize[3] = {32, 1, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]); auto retVal = this->pCmdQ->enqueueKernel( this->kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); hwParser.parseCommands(*pCmdQ); uint32_t walkerCount = hwParser.getCommandCount(); EXPECT_EQ(2u, walkerCount); pCmdQ->flush(); expectMemory(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory); expectMemory(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory); } HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork2DimNonUniformGroupInXDimensionThenExpectTwoWalkers) { using WALKER_TYPE = WALKER_TYPE; cl_uint workDim = 2; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {39, 32, 1}; size_t localWorkSize[3] = {16, 16, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]); auto retVal = this->pCmdQ->enqueueKernel( this->kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); hwParser.parseCommands(*pCmdQ); uint32_t walkerCount = hwParser.getCommandCount(); EXPECT_EQ(2u, walkerCount); pCmdQ->flush(); expectMemory(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory); expectMemory(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory); } HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork2DimNonUniformGroupInYDimensionThenExpectTwoWalkers) { using WALKER_TYPE = WALKER_TYPE; cl_uint workDim = 2; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {32, 39, 1}; size_t localWorkSize[3] = {16, 16, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]); auto retVal = this->pCmdQ->enqueueKernel( this->kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); hwParser.parseCommands(*pCmdQ); uint32_t walkerCount = hwParser.getCommandCount(); EXPECT_EQ(2u, walkerCount); pCmdQ->flush(); expectMemory(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory); expectMemory(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory); } HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork2DimNonUniformGroupInXandYDimensionThenExpectFourWalkers) { using WALKER_TYPE = WALKER_TYPE; cl_uint workDim = 2; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {39, 39, 1}; size_t localWorkSize[3] = {16, 16, 1}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]); auto retVal = this->pCmdQ->enqueueKernel( this->kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); hwParser.parseCommands(*pCmdQ); uint32_t walkerCount = hwParser.getCommandCount(); EXPECT_EQ(4u, walkerCount); pCmdQ->flush(); expectMemory(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory); expectMemory(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory); } HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInXDimensionThenExpectTwoWalkers) { using WALKER_TYPE = WALKER_TYPE; cl_uint workDim = 3; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {39, 32, 32}; size_t localWorkSize[3] = {8, 8, 2}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]); auto retVal = this->pCmdQ->enqueueKernel( this->kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); hwParser.parseCommands(*pCmdQ); uint32_t walkerCount = hwParser.getCommandCount(); EXPECT_EQ(2u, walkerCount); pCmdQ->flush(); expectMemory(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory); expectMemory(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory); } HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInYDimensionThenExpectTwoWalkers) { using WALKER_TYPE = WALKER_TYPE; cl_uint workDim = 3; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {32, 39, 32}; size_t localWorkSize[3] = {8, 8, 2}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]); auto retVal = this->pCmdQ->enqueueKernel( this->kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); hwParser.parseCommands(*pCmdQ); uint32_t walkerCount = hwParser.getCommandCount(); EXPECT_EQ(2u, walkerCount); pCmdQ->flush(); expectMemory(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory); expectMemory(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory); } HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInZDimensionThenExpectTwoWalkers) { using WALKER_TYPE = WALKER_TYPE; cl_uint workDim = 3; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {32, 32, 39}; size_t localWorkSize[3] = {8, 2, 8}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]); auto retVal = this->pCmdQ->enqueueKernel( this->kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); hwParser.parseCommands(*pCmdQ); uint32_t walkerCount = hwParser.getCommandCount(); EXPECT_EQ(2u, walkerCount); pCmdQ->flush(); expectMemory(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory); expectMemory(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory); } HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInXandYDimensionThenExpectFourWalkers) { using WALKER_TYPE = WALKER_TYPE; cl_uint workDim = 3; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {39, 39, 32}; size_t localWorkSize[3] = {8, 8, 2}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]); auto retVal = this->pCmdQ->enqueueKernel( this->kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); hwParser.parseCommands(*pCmdQ); uint32_t walkerCount = hwParser.getCommandCount(); EXPECT_EQ(4u, walkerCount); pCmdQ->flush(); expectMemory(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory); expectMemory(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory); } HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInXandZDimensionThenExpectFourWalkers) { using WALKER_TYPE = WALKER_TYPE; cl_uint workDim = 3; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {39, 32, 39}; size_t localWorkSize[3] = {8, 2, 8}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]); auto retVal = this->pCmdQ->enqueueKernel( this->kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); hwParser.parseCommands(*pCmdQ); uint32_t walkerCount = hwParser.getCommandCount(); EXPECT_EQ(4u, walkerCount); pCmdQ->flush(); expectMemory(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory); expectMemory(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory); } HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInYandZDimensionThenExpectFourWalkers) { using WALKER_TYPE = WALKER_TYPE; cl_uint workDim = 3; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {32, 39, 39}; size_t localWorkSize[3] = {2, 8, 8}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]); auto retVal = this->pCmdQ->enqueueKernel( this->kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); hwParser.parseCommands(*pCmdQ); uint32_t walkerCount = hwParser.getCommandCount(); EXPECT_EQ(4u, walkerCount); pCmdQ->flush(); expectMemory(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory); expectMemory(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory); } HWTEST_F(AUBSimpleArgNonUniformTest, givenOpenCL20SupportWhenProvidingWork3DimNonUniformGroupInXandYandZDimensionThenExpectEightWalkers) { using WALKER_TYPE = WALKER_TYPE; cl_uint workDim = 3; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {39, 39, 39}; size_t localWorkSize[3] = {8, 8, 2}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; initializeExpectedMemory(globalWorkSize[0], globalWorkSize[1], globalWorkSize[2]); auto retVal = this->pCmdQ->enqueueKernel( this->kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); hwParser.parseCommands(*pCmdQ); uint32_t walkerCount = hwParser.getCommandCount(); EXPECT_EQ(8u, walkerCount); pCmdQ->flush(); expectMemory(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory); expectMemory(remainderBufferGpuAddress, this->expectedRemainderMemory, sizeRemainderMemory); } enqueue_map_buffer_aub_tests.cpp000066400000000000000000000042561363734646600350070ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" using namespace NEO; struct AUBMapBuffer : public CommandEnqueueAUBFixture, public ::testing::Test { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } }; HWTEST_F(AUBMapBuffer, MapUpdateUnmapVerify) { MockContext context(this->pCmdQ->getDevice().getSpecializedDevice()); auto retVal = CL_INVALID_VALUE; size_t bufferSize = 10; std::unique_ptr buffer(Buffer::create( &context, CL_MEM_READ_WRITE, bufferSize, nullptr, retVal)); ASSERT_NE(nullptr, buffer); uint8_t pattern[] = {0xFF}; size_t patternSize = sizeof(pattern); retVal = pCmdQ->enqueueFillBuffer( buffer.get(), pattern, patternSize, 0, bufferSize, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); auto mappedPtr = pCmdQ->enqueueMapBuffer(buffer.get(), CL_TRUE, CL_MAP_WRITE | CL_MAP_READ, 0, bufferSize, 0, nullptr, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); // write to mapped ptr auto mappedPtrStart = static_cast(mappedPtr); for (uint32_t i = 0; i < bufferSize; i++) { *(mappedPtrStart + i) = i; } pCmdQ->enqueueUnmapMemObject(buffer.get(), mappedPtr, 0, nullptr, nullptr); // verify unmap std::unique_ptr readMemory(new uint8_t[bufferSize]); buffer->forceDisallowCPUCopy = true; retVal = pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, bufferSize, readMemory.get(), nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->flush(); ASSERT_EQ(CL_SUCCESS, retVal); for (size_t i = 0; i < bufferSize; i++) { AUBCommandStreamFixture::expectMemory(&readMemory[i], &i, sizeof(uint8_t)); } } enqueue_map_image_aub_tests.cpp000066400000000000000000000213531363734646600346150ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" using namespace NEO; struct MapImageParams { cl_mem_object_type imageType; size_t offsets[3]; } mapImageParams[] = { {CL_MEM_OBJECT_IMAGE1D, {0u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE1D, {1u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE2D, {0u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE2D, {1u, 2u, 0u}}, {CL_MEM_OBJECT_IMAGE3D, {0u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE3D, {1u, 2u, 3u}}, }; struct AUBMapImage : public CommandDeviceFixture, public AUBCommandStreamFixture, public ::testing::WithParamInterface>, public ::testing::Test { typedef AUBCommandStreamFixture CommandStreamFixture; using AUBCommandStreamFixture::SetUp; void SetUp() override { if (!(defaultHwInfo->capabilityTable.supportsImages)) { GTEST_SKIP(); } CommandDeviceFixture::SetUp(cl_command_queue_properties(0)); CommandStreamFixture::SetUp(pCmdQ); context = std::make_unique(pClDevice); } void TearDown() override { srcImage.reset(); context.reset(); CommandStreamFixture::TearDown(); CommandDeviceFixture::TearDown(); } std::unique_ptr context; std::unique_ptr srcImage; }; HWTEST_P(AUBMapImage, MapUpdateUnmapVerify) { const unsigned int testWidth = 5; const unsigned int testHeight = std::get<2>(GetParam()).imageType != CL_MEM_OBJECT_IMAGE1D ? 5 : 1; const unsigned int testDepth = std::get<2>(GetParam()).imageType == CL_MEM_OBJECT_IMAGE3D ? 5 : 1; cl_image_format imageFormat; cl_image_desc imageDesc; // clang-format off imageFormat.image_channel_data_type = std::get<0>( GetParam( ) ); imageFormat.image_channel_order = std::get<1>( GetParam( ) ); imageDesc.image_type = std::get<2>( GetParam( ) ).imageType; imageDesc.image_width = testWidth; imageDesc.image_height = testHeight; imageDesc.image_depth = testDepth; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // clang-format on auto perChannelDataSize = 0; switch (imageFormat.image_channel_data_type) { case CL_UNORM_INT8: perChannelDataSize = 1; break; case CL_SIGNED_INT16: case CL_HALF_FLOAT: perChannelDataSize = 2; break; case CL_UNSIGNED_INT32: case CL_FLOAT: perChannelDataSize = 4; break; } auto numChannels = 0u; switch (imageFormat.image_channel_order) { case CL_R: numChannels = 1; break; case CL_RG: numChannels = 2; break; case CL_RGBA: numChannels = 4; break; } size_t elementSize = perChannelDataSize * numChannels; auto sizeMemory = testWidth * alignUp(testHeight, 4) * testDepth * elementSize; auto srcMemory = new (std::nothrow) uint8_t[sizeMemory]; ASSERT_NE(nullptr, srcMemory); for (unsigned i = 0; i < sizeMemory; ++i) { uint8_t origValue = i; memcpy(srcMemory + i, &origValue, sizeof(origValue)); } auto retVal = CL_INVALID_VALUE; cl_mem_flags flags = CL_MEM_COPY_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, pClDevice->getHardwareInfo().capabilityTable.clVersionSupport); srcImage.reset(Image::create( context.get(), MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, srcMemory, retVal)); ASSERT_NE(nullptr, srcImage.get()); auto origin = std::get<2>(GetParam()).offsets; const size_t region[3] = {std::max(testWidth / 2, 1u), std::max(testHeight / 2, 1u), std::max(testDepth / 2, 1u)}; size_t inputRowPitch = testWidth * elementSize; size_t inputSlicePitch = inputRowPitch * testHeight; size_t mappedRowPitch; size_t mappedSlicePitch; auto mappedPtr = pCmdQ->enqueueMapImage(srcImage.get(), CL_TRUE, CL_MAP_WRITE | CL_MAP_READ, origin, region, &mappedRowPitch, &mappedSlicePitch, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); uint8_t *mappedPtrStart; uint8_t *srcMemoryStart; bool isGpuCopy = srcImage->isTiledAllocation() || !MemoryPool::isSystemMemoryPool(srcImage->getGraphicsAllocation()->getMemoryPool()); if (isGpuCopy) { mappedPtrStart = static_cast(mappedPtr); srcMemoryStart = srcMemory; // validate mapped region srcMemoryStart = ptrOffset(srcMemoryStart, inputSlicePitch * origin[2]); srcMemoryStart = ptrOffset(srcMemoryStart, inputRowPitch * origin[1]); srcMemoryStart = ptrOffset(srcMemoryStart, elementSize * origin[0]); for (size_t z = 0; z < region[2]; z++) { for (size_t y = 0; y < region[1]; y++) { AUBCommandStreamFixture::expectMemory(mappedPtrStart, srcMemoryStart, elementSize * region[0]); mappedPtrStart = ptrOffset(mappedPtrStart, mappedRowPitch); srcMemoryStart = ptrOffset(srcMemoryStart, inputRowPitch); } mappedPtrStart = ptrOffset(mappedPtrStart, mappedSlicePitch - (mappedRowPitch * region[1])); srcMemoryStart = ptrOffset(srcMemoryStart, inputSlicePitch - (inputRowPitch * (region[1]))); } } // write to mapped ptr mappedPtrStart = static_cast(mappedPtr); for (size_t z = 0; z < region[2]; z++) { for (size_t y = 0; y < region[1]; y++) { memset(mappedPtrStart, 0xFF, elementSize * region[0]); mappedPtrStart = ptrOffset(mappedPtrStart, mappedRowPitch); } mappedPtrStart = ptrOffset(mappedPtrStart, mappedSlicePitch - (mappedRowPitch * region[1])); } pCmdQ->enqueueUnmapMemObject(srcImage.get(), mappedPtr, 0, nullptr, nullptr); // verify unmap uint8_t *readMemory = new uint8_t[srcImage->getSize()]; size_t imgOrigin[] = {0, 0, 0}; size_t imgRegion[] = {testWidth, testHeight, testDepth}; retVal = pCmdQ->enqueueReadImage(srcImage.get(), CL_FALSE, imgOrigin, imgRegion, inputRowPitch, inputSlicePitch, readMemory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->flush(); EXPECT_EQ(CL_SUCCESS, retVal); srcMemoryStart = srcMemory; auto readMemoryStart = readMemory; uint8_t *expected = new uint8_t[elementSize]; memset(expected, 0xFF, elementSize); for (size_t z = 0; z < testDepth; ++z) { for (size_t y = 0; y < testHeight; ++y) { for (size_t x = 0; x < testWidth; ++x) { if (z >= origin[2] && z < (origin[2] + region[2]) && y >= origin[1] && y < (origin[1] + region[1]) && x >= origin[0] && x < (origin[0] + region[0])) { // this texel should be updated AUBCommandStreamFixture::expectMemory(&readMemoryStart[x * elementSize], expected, elementSize); } else { AUBCommandStreamFixture::expectMemory(&readMemoryStart[x * elementSize], &srcMemoryStart[x * elementSize], elementSize); } } readMemoryStart = ptrOffset(readMemoryStart, inputRowPitch); srcMemoryStart = ptrOffset(srcMemoryStart, inputRowPitch); } readMemoryStart = ptrOffset(readMemoryStart, inputSlicePitch - (inputRowPitch * (testHeight > 0 ? testHeight : 1))); srcMemoryStart = ptrOffset(srcMemoryStart, inputSlicePitch - (inputRowPitch * (testHeight > 0 ? testHeight : 1))); } delete[] readMemory; delete[] srcMemory; delete[] expected; } INSTANTIATE_TEST_CASE_P( AUBMapImage_simple, AUBMapImage, ::testing::Combine(::testing::Values( // formats CL_UNORM_INT8, CL_SIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT), ::testing::Values( // channels CL_R, CL_RG, CL_RGBA), ::testing::ValuesIn(mapImageParams))); enqueue_read_buffer_aub_tests.cpp000066400000000000000000000177221363734646600351470ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/ptr_math.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/aub_tests_configuration.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "test.h" #include using namespace NEO; struct ReadBufferHw : public CommandEnqueueAUBFixture, public ::testing::WithParamInterface, public ::testing::Test { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } }; typedef ReadBufferHw AUBReadBuffer; HWTEST_P(AUBReadBuffer, simple) { MockContext context(this->pClDevice); cl_float srcMemory[] = {1.0f, 2.0f, 3.0f, 4.0f}; cl_float destMemory[] = {0.0f, 0.0f, 0.0f, 0.0f}; auto retVal = CL_INVALID_VALUE; auto srcBuffer = std::unique_ptr(Buffer::create( &context, CL_MEM_USE_HOST_PTR, sizeof(srcMemory), srcMemory, retVal)); ASSERT_NE(nullptr, srcBuffer.get()); auto pSrcMemory = &srcMemory[0]; auto pDestMemory = &destMemory[0]; cl_bool blockingRead = CL_FALSE; size_t offset = GetParam(); size_t sizeWritten = sizeof(cl_float); cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; GraphicsAllocation *allocation = createResidentAllocationAndStoreItInCsr(pDestMemory, sizeof(destMemory)); srcBuffer->forceDisallowCPUCopy = true; retVal = pCmdQ->enqueueReadBuffer( srcBuffer.get(), blockingRead, offset, sizeWritten, pDestMemory, nullptr, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_SUCCESS, retVal); allocation = pCommandStreamReceiver->getTemporaryAllocations().peekHead(); while (allocation && allocation->getUnderlyingBuffer() != pDestMemory) { allocation = allocation->next; } retVal = pCmdQ->flush(); EXPECT_EQ(CL_SUCCESS, retVal); pSrcMemory = ptrOffset(pSrcMemory, offset); cl_float *destGpuaddress = reinterpret_cast(allocation->getGpuAddress()); // Compute our memory expecations based on kernel execution size_t sizeUserMemory = sizeof(destMemory); AUBCommandStreamFixture::expectMemory(destGpuaddress, pSrcMemory, sizeWritten); // If the copykernel wasn't max sized, ensure we didn't overwrite existing memory if (offset + sizeWritten < sizeUserMemory) { pDestMemory = ptrOffset(pDestMemory, sizeWritten); destGpuaddress = ptrOffset(destGpuaddress, sizeWritten); size_t sizeRemaining = sizeUserMemory - sizeWritten - offset; AUBCommandStreamFixture::expectMemory(destGpuaddress, pDestMemory, sizeRemaining); } } INSTANTIATE_TEST_CASE_P(AUBReadBuffer_simple, AUBReadBuffer, ::testing::Values( 0 * sizeof(cl_float), 1 * sizeof(cl_float), 2 * sizeof(cl_float), 3 * sizeof(cl_float))); HWTEST_F(AUBReadBuffer, reserveCanonicalGpuAddress) { if (!GetAubTestsConfig().testCanonicalAddress) { return; } MockContext context(this->pClDevice); cl_float srcMemory[] = {1.0f, 2.0f, 3.0f, 4.0f}; cl_float dstMemory[] = {0.0f, 0.0f, 0.0f, 0.0f}; GraphicsAllocation *srcAlocation = new MockGraphicsAllocation(0, GraphicsAllocation::AllocationType::UNKNOWN, srcMemory, 0xFFFF800400001000, 0xFFFF800400001000, sizeof(srcMemory), MemoryPool::MemoryNull); std::unique_ptr srcBuffer(Buffer::createBufferHw(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_USE_HOST_PTR, 0, 0), CL_MEM_USE_HOST_PTR, 0, sizeof(srcMemory), srcAlocation->getUnderlyingBuffer(), srcMemory, srcAlocation, false, false, false)); ASSERT_NE(nullptr, srcBuffer); srcBuffer->forceDisallowCPUCopy = true; auto retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), CL_FALSE, 0, sizeof(dstMemory), dstMemory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->flush(); EXPECT_EQ(CL_SUCCESS, retVal); GraphicsAllocation *dstAllocation = createResidentAllocationAndStoreItInCsr(dstMemory, sizeof(dstMemory)); cl_float *dstGpuAddress = reinterpret_cast(dstAllocation->getGpuAddress()); AUBCommandStreamFixture::expectMemory(dstGpuAddress, srcMemory, sizeof(dstMemory)); } struct AUBReadBufferUnaligned : public CommandEnqueueAUBFixture, public ::testing::Test { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } template void testReadBufferUnaligned(size_t offset, size_t size) { MockContext context(pCmdQ->getDevice().getSpecializedDevice()); char srcMemory[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; const auto bufferSize = sizeof(srcMemory); char dstMemory[bufferSize] = {0}; auto retVal = CL_INVALID_VALUE; auto buffer = std::unique_ptr(Buffer::create( &context, CL_MEM_USE_HOST_PTR, bufferSize, srcMemory, retVal)); ASSERT_NE(nullptr, buffer); buffer->forceDisallowCPUCopy = true; // Map destination memory to GPU GraphicsAllocation *allocation = createResidentAllocationAndStoreItInCsr(dstMemory, bufferSize); auto dstMemoryGPUPtr = reinterpret_cast(allocation->getGpuAddress()); // Do unaligned read retVal = pCmdQ->enqueueReadBuffer( buffer.get(), CL_FALSE, offset, size, ptrOffset(dstMemory, offset), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->flush(); EXPECT_EQ(CL_SUCCESS, retVal); // Check the memory AUBCommandStreamFixture::expectMemory(ptrOffset(dstMemoryGPUPtr, offset), ptrOffset(srcMemory, offset), size); } }; HWTEST_F(AUBReadBufferUnaligned, all) { const std::vector offsets = {0, 1, 2, 3}; const std::vector sizes = {4, 3, 2, 1}; for (auto offset : offsets) { for (auto size : sizes) { testReadBufferUnaligned(offset, size); } } } enqueue_read_buffer_rect_aub_tests.cpp000066400000000000000000000136661363734646600361670ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" using namespace NEO; struct ReadBufferRectHw : public CommandEnqueueAUBFixture, public ::testing::WithParamInterface>, public ::testing::Test { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } }; typedef ReadBufferRectHw AUBReadBufferRect; static const size_t width = 10; HWTEST_P(AUBReadBufferRect, simple3D) { MockContext context(this->pClDevice); size_t rowPitch = width; size_t slicePitch = rowPitch * rowPitch; size_t bufferSizeBuff = rowPitch * rowPitch * rowPitch; size_t bufferSize = alignUp(bufferSizeBuff, 4096); size_t zHostOffs; size_t zBuffOffs; std::tie(zBuffOffs, zHostOffs) = GetParam(); ASSERT_LT(zBuffOffs, width); ASSERT_LT(zHostOffs, width); uint8_t *srcMemory = (uint8_t *)::alignedMalloc(bufferSize, 4096); uint8_t *destMemory = (uint8_t *)::alignedMalloc(bufferSize, 4096); for (unsigned int i = 0; i < bufferSize; i++) srcMemory[i] = i; memset(destMemory, 0x00, bufferSize); auto retVal = CL_INVALID_VALUE; auto srcBuffer = std::unique_ptr(Buffer::create( &context, CL_MEM_USE_HOST_PTR, bufferSize, srcMemory, retVal)); ASSERT_NE(nullptr, srcBuffer); cl_bool blockingRead = CL_FALSE; createResidentAllocationAndStoreItInCsr(destMemory, bufferSize); size_t bufferOrigin[] = {0, 0, zBuffOffs}; size_t hostOrigin[] = {0, 0, zHostOffs}; size_t region[] = {rowPitch, rowPitch, 1}; retVal = pCmdQ->enqueueReadBufferRect( srcBuffer.get(), blockingRead, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, destMemory, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->flush(); EXPECT_EQ(CL_SUCCESS, retVal); char *ptr = new char[slicePitch]; memset(ptr, 0, slicePitch); for (unsigned int i = 0; i < rowPitch; i++) { //one slice will be copied from src. all others should be zeros if (i == zHostOffs) { AUBCommandStreamFixture::expectMemory(destMemory + slicePitch * i, srcMemory + slicePitch * zBuffOffs, slicePitch); } else { AUBCommandStreamFixture::expectMemory(destMemory + slicePitch * i, ptr, slicePitch); } } delete[] ptr; ::alignedFree(srcMemory); ::alignedFree(destMemory); } INSTANTIATE_TEST_CASE_P(AUBReadBufferRect_simple, AUBReadBufferRect, ::testing::Combine( ::testing::Values(0, 1, 2, 3, 4), ::testing::Values(0, 1, 2, 3, 4))); struct AUBReadBufferRectUnaligned : public CommandEnqueueAUBFixture, public ::testing::Test { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } template void testReadBufferUnaligned(size_t offset, size_t size) { MockContext context(pCmdQ->getDevice().getSpecializedDevice()); char srcMemory[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; const auto bufferSize = sizeof(srcMemory); void *dstMemory = alignedMalloc(bufferSize, MemoryConstants::pageSize); memset(dstMemory, 0, bufferSize); char referenceMemory[bufferSize] = {0}; auto retVal = CL_INVALID_VALUE; auto buffer = std::unique_ptr(Buffer::create( &context, CL_MEM_COPY_HOST_PTR, bufferSize, srcMemory, retVal)); ASSERT_NE(nullptr, buffer); buffer->forceDisallowCPUCopy = true; // Map destination memory to GPU GraphicsAllocation *allocation = createResidentAllocationAndStoreItInCsr(dstMemory, bufferSize); auto dstMemoryGPUPtr = reinterpret_cast(allocation->getGpuAddress()); cl_bool blockingRead = CL_FALSE; size_t rowPitch = bufferSize / 4; size_t slicePitch = 4 * rowPitch; size_t bufferOrigin[] = {0, 1, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {size, 1, 1}; retVal = pCmdQ->enqueueReadBufferRect( buffer.get(), blockingRead, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptrOffset(dstMemory, offset), 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->flush(); EXPECT_EQ(CL_SUCCESS, retVal); AUBCommandStreamFixture::expectMemory(dstMemoryGPUPtr, referenceMemory, offset); AUBCommandStreamFixture::expectMemory(ptrOffset(dstMemoryGPUPtr, offset), &srcMemory[rowPitch * bufferOrigin[1]], size); AUBCommandStreamFixture::expectMemory(ptrOffset(dstMemoryGPUPtr, size + offset), referenceMemory, bufferSize - offset - size); pCmdQ->finish(); alignedFree(dstMemory); } }; HWTEST_F(AUBReadBufferRectUnaligned, misalignedHostPtr) { const std::vector offsets = {0, 1, 2, 3}; const std::vector sizes = {4, 3, 2, 1}; for (auto offset : offsets) { for (auto size : sizes) { testReadBufferUnaligned(offset, size); } } } enqueue_read_image_aub_tests.cpp000066400000000000000000000202171363734646600347510ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/aub_tests/command_queue/enqueue_read_write_image_aub_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" using namespace NEO; struct ReadImageParams { cl_mem_object_type imageType; size_t offsets[3]; } readImageParams[] = { {CL_MEM_OBJECT_IMAGE1D, {0u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE1D, {1u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE2D, {0u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE2D, {1u, 2u, 0u}}, {CL_MEM_OBJECT_IMAGE3D, {0u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE3D, {1u, 2u, 3u}}, }; struct AUBReadImage : public CommandDeviceFixture, public AUBCommandStreamFixture, public ::testing::WithParamInterface>, public ::testing::Test { typedef AUBCommandStreamFixture CommandStreamFixture; using AUBCommandStreamFixture::SetUp; void SetUp() override { if (!(defaultHwInfo->capabilityTable.supportsImages)) { GTEST_SKIP(); } CommandDeviceFixture::SetUp(cl_command_queue_properties(0)); CommandStreamFixture::SetUp(pCmdQ); context = std::make_unique(pClDevice); } void TearDown() override { srcImage.reset(); context.reset(); CommandStreamFixture::TearDown(); CommandDeviceFixture::TearDown(); } std::unique_ptr context; std::unique_ptr srcImage; }; HWTEST_P(AUBReadImage, simpleUnalignedMemory) { const unsigned int testWidth = 5; const unsigned int testHeight = std::get<2>(GetParam()).imageType != CL_MEM_OBJECT_IMAGE1D ? 5 : 1; const unsigned int testDepth = std::get<2>(GetParam()).imageType == CL_MEM_OBJECT_IMAGE3D ? 5 : 1; auto numPixels = testWidth * testHeight * testDepth; cl_image_format imageFormat; cl_image_desc imageDesc; // clang-format off imageFormat.image_channel_data_type = std::get<0>(GetParam()); imageFormat.image_channel_order = std::get<1>(GetParam()); imageDesc.image_type = std::get<2>(GetParam()).imageType; imageDesc.image_width = testWidth; imageDesc.image_height = testHeight; imageDesc.image_depth = testDepth; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // clang-format on auto perChannelDataSize = 0; switch (imageFormat.image_channel_data_type) { case CL_UNORM_INT8: perChannelDataSize = 1; break; case CL_SIGNED_INT16: case CL_HALF_FLOAT: perChannelDataSize = 2; break; case CL_UNSIGNED_INT32: case CL_FLOAT: perChannelDataSize = 4; break; } auto numChannels = 0u; switch (imageFormat.image_channel_order) { case CL_R: numChannels = 1; break; case CL_RG: numChannels = 2; break; case CL_RGBA: numChannels = 4; break; } size_t elementSize = perChannelDataSize * numChannels; // Generate initial dst memory but make it unaligned to // stress test enqueueReadImage logic auto dstMemoryAligned = alignedMalloc(1 + elementSize * numPixels, 0x1000); auto dstMemoryUnaligned = ptrOffset(reinterpret_cast(dstMemoryAligned), 1); auto sizeMemory = testWidth * alignUp(testHeight, 4) * testDepth * elementSize; auto srcMemory = new (std::nothrow) uint8_t[sizeMemory]; ASSERT_NE(nullptr, srcMemory); for (unsigned i = 0; i < sizeMemory; ++i) { uint8_t origValue = i; memcpy(srcMemory + i, &origValue, sizeof(origValue)); } for (unsigned i = 0; i < numPixels * elementSize; ++i) { uint8_t origValue = 0xff; memcpy(dstMemoryUnaligned + i, &origValue, sizeof(origValue)); } cl_mem_flags flags = CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); auto retVal = CL_INVALID_VALUE; srcImage.reset(Image::create( context.get(), MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, srcMemory, retVal)); ASSERT_NE(nullptr, srcImage.get()); auto origin = std::get<2>(GetParam()).offsets; // Only draw 1/4 of the original image const size_t region[3] = {std::max(testWidth / 2, 1u), std::max(testHeight / 2, 1u), std::max(testDepth / 2, 1u)}; size_t inputRowPitch = testWidth * elementSize; size_t inputSlicePitch = inputRowPitch * testHeight; retVal = pCmdQ->enqueueReadImage( srcImage.get(), CL_FALSE, origin, region, inputRowPitch, inputSlicePitch, dstMemoryUnaligned, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->flush(); EXPECT_EQ(CL_SUCCESS, retVal); auto imageMemory = srcMemory; bool isGpuCopy = srcImage->isTiledAllocation() || !MemoryPool::isSystemMemoryPool(srcImage->getGraphicsAllocation()->getMemoryPool()); if (!isGpuCopy) { imageMemory = (uint8_t *)(srcImage->getCpuAddress()); } auto pSrcMemory = ptrOffset(imageMemory, (origin[2] * testWidth * testHeight + origin[1] * testWidth + origin[0]) * elementSize); auto pDstMemory = dstMemoryUnaligned; for (auto depth = origin[2] + 1; depth < (origin[2] + region[2]); ++depth) { for (size_t row = 0; row < region[1]; ++row) { size_t length = region[0] * elementSize; AUBCommandStreamFixture::expectMemory(pDstMemory, pSrcMemory, length); pDstMemory = ptrOffset(pDstMemory, length); length = (testWidth - region[0]) * elementSize; AUBCommandStreamFixture::expectMemory(pDstMemory, pDstMemory, length); pDstMemory = ptrOffset(pDstMemory, length); pSrcMemory = ptrOffset(pSrcMemory, testWidth * elementSize); } size_t remainingRows = testHeight - region[1]; while (remainingRows > 0) { size_t length = testHeight * elementSize; AUBCommandStreamFixture::expectMemory(pDstMemory, pDstMemory, length); pDstMemory = ptrOffset(pDstMemory, length); --remainingRows; } pDstMemory = ptrOffset(dstMemoryUnaligned, testWidth * testHeight * elementSize); } retVal = pCmdQ->finish(); //FixMe - not all test cases verified with expects EXPECT_EQ(CL_SUCCESS, retVal); alignedFree(dstMemoryAligned); delete[] srcMemory; } INSTANTIATE_TEST_CASE_P( AUBReadImage_simple, AUBReadImage, ::testing::Combine(::testing::Values( // formats CL_UNORM_INT8, CL_SIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT), ::testing::Values( // channels CL_R, CL_RG, CL_RGBA), ::testing::ValuesIn(readImageParams))); using AUBReadImageUnaligned = AUBImageUnaligned; HWTEST_F(AUBReadImageUnaligned, misalignedHostPtr) { const std::vector pixelSizes = {1, 2, 4}; const std::vector offsets = {0, 1, 2, 3}; const std::vector sizes = {3, 2, 1}; for (auto pixelSize : pixelSizes) { for (auto offset : offsets) { for (auto size : sizes) { testReadImageUnaligned(offset, size, pixelSize); } } } } enqueue_read_write_image_aub_fixture.h000066400000000000000000000204761363734646600361630ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" using namespace NEO; struct AUBImageUnaligned : public CommandEnqueueAUBFixture, public ::testing::Test { void SetUp() override { if (!(defaultHwInfo->capabilityTable.supportsImages)) { GTEST_SKIP(); } CommandEnqueueAUBFixture::SetUp(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } template void testReadImageUnaligned(size_t offset, size_t size, size_t pixelSize) { MockContext context(pCmdQ->getDevice().getSpecializedDevice()); char srcMemory[] = "_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnoprstuwxyz"; const auto bufferSize = sizeof(srcMemory) - 1; char *imageMemory = &srcMemory[1]; //ensure non cacheline-aligned hostPtr to create non-zerocopy image void *dstMemory = alignedMalloc(bufferSize, MemoryConstants::pageSize); memset(dstMemory, 0, bufferSize); char referenceMemory[bufferSize] = {0}; const size_t testWidth = bufferSize / 4 / pixelSize; const size_t testHeight = 4; const size_t testDepth = 1; cl_image_format imageFormat; cl_image_desc imageDesc; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; switch (pixelSize) { case 1: imageFormat.image_channel_order = CL_R; break; case 2: imageFormat.image_channel_order = CL_RG; break; case 3: ASSERT_TRUE(false); break; case 4: imageFormat.image_channel_order = CL_RGBA; break; } imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = testWidth; imageDesc.image_height = testHeight; imageDesc.image_depth = testDepth; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; cl_mem_flags flags = CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, pClDevice->getHardwareInfo().capabilityTable.clVersionSupport); auto retVal = CL_INVALID_VALUE; auto image = std::unique_ptr(Image::create( &context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, imageMemory, retVal)); ASSERT_NE(nullptr, image); EXPECT_FALSE(image->isMemObjZeroCopy()); auto graphicsAllocation = createResidentAllocationAndStoreItInCsr(dstMemory, bufferSize); auto dstMemoryGPUPtr = reinterpret_cast(graphicsAllocation->getGpuAddress()); const size_t origin[3] = {0, 1, 0}; const size_t region[3] = {size, 1, 1}; size_t inputRowPitch = testWidth; size_t inputSlicePitch = inputRowPitch * testHeight; retVal = pCmdQ->enqueueReadImage( image.get(), CL_FALSE, origin, region, inputRowPitch, inputSlicePitch, ptrOffset(dstMemory, offset), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->flush(); EXPECT_EQ(CL_SUCCESS, retVal); AUBCommandStreamFixture::expectMemory(dstMemoryGPUPtr, referenceMemory, offset); AUBCommandStreamFixture::expectMemory(ptrOffset(dstMemoryGPUPtr, offset), &imageMemory[inputRowPitch * origin[1] * pixelSize], size * pixelSize); AUBCommandStreamFixture::expectMemory(ptrOffset(dstMemoryGPUPtr, size * pixelSize + offset), referenceMemory, bufferSize - offset - size * pixelSize); pCmdQ->finish(); alignedFree(dstMemory); } template void testWriteImageUnaligned(size_t offset, size_t size, size_t pixelSize) { DebugManagerStateRestore restorer; DebugManager.flags.ForceLinearImages.set(true); MockContext context(pCmdQ->getDevice().getSpecializedDevice()); char srcMemory[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnoprstuwxyz"; const auto bufferSize = sizeof(srcMemory); char dstMemory[bufferSize + 1] = {0}; char *imageMemory = &dstMemory[1]; //ensure non cacheline-aligned hostPtr to create non-zerocopy image char referenceMemory[bufferSize] = {0}; const size_t testWidth = bufferSize / 4 / pixelSize; const size_t testHeight = 4; const size_t testDepth = 1; cl_image_format imageFormat; cl_image_desc imageDesc; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; switch (pixelSize) { case 1: imageFormat.image_channel_order = CL_R; break; case 2: imageFormat.image_channel_order = CL_RG; break; case 3: ASSERT_TRUE(false); break; case 4: imageFormat.image_channel_order = CL_RGBA; break; } imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = testWidth; imageDesc.image_height = testHeight; imageDesc.image_depth = testDepth; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; cl_mem_flags flags = CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, pClDevice->getHardwareInfo().capabilityTable.clVersionSupport); auto retVal = CL_INVALID_VALUE; auto image = std::unique_ptr(Image::create( &context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, imageMemory, retVal)); ASSERT_NE(nullptr, image); EXPECT_FALSE(image->isMemObjZeroCopy()); auto dstMemoryGPUPtr = reinterpret_cast(image->getGraphicsAllocation()->getGpuAddress()); const size_t origin[3] = {0, 1, 0}; // write first row const size_t region[3] = {size, 1, 1}; // write only "size" number of pixels size_t inputRowPitch = testWidth; size_t inputSlicePitch = inputRowPitch * testHeight; retVal = pCmdQ->enqueueWriteImage( image.get(), CL_TRUE, origin, region, inputRowPitch, inputSlicePitch, ptrOffset(srcMemory, offset), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); pCmdQ->finish(); auto imageRowPitch = image->getImageDesc().image_row_pitch; AUBCommandStreamFixture::expectMemory(dstMemoryGPUPtr, referenceMemory, inputRowPitch * pixelSize); // validate zero row is not written AUBCommandStreamFixture::expectMemory(ptrOffset(dstMemoryGPUPtr, imageRowPitch), &srcMemory[offset], size * pixelSize); // validate first row is written, AUBCommandStreamFixture::expectMemory(ptrOffset(dstMemoryGPUPtr, imageRowPitch + size * pixelSize), referenceMemory, (inputRowPitch - size) * pixelSize); // only size number of pixels, with correct data for (uint32_t row = 2; row < testHeight; row++) { AUBCommandStreamFixture::expectMemory(ptrOffset(dstMemoryGPUPtr, row * imageRowPitch), referenceMemory, inputRowPitch * pixelSize); // next image rows shouldn;t be modified } } }; enqueue_verify_memory_buffer_aub_tests.cpp000066400000000000000000000107471363734646600371300ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/api/api.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" using namespace NEO; struct VerifyMemoryBufferHw : public CommandEnqueueAUBFixture, public ::testing::TestWithParam> { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } }; size_t testDataSizeTable[] = { 16, MemoryConstants::megaByte}; cl_mem_flags testFlagsTable[] = { 0, CL_MEM_COPY_HOST_PTR}; HWTEST_P(VerifyMemoryBufferHw, givenDifferentBuffersWhenValidatingMemoryThenSuccessIsReturned) { cl_uint testItem = 5; cl_uint testItemWrong1 = 4; cl_uint testItemWrong2 = 6; auto testItemSize = sizeof(testItem); const auto testDataSize = std::get<0>(GetParam()); EXPECT_FALSE(testDataSize < testItemSize); const auto flags = std::get<1>(GetParam()); const auto usesHostPointer = ((flags & CL_MEM_USE_HOST_PTR) || (flags & CL_MEM_COPY_HOST_PTR)); DebugManagerStateRestore restore; DebugManager.flags.DisableZeroCopyForBuffers.set(true); std::unique_ptr bufferContent(new uint8_t[testDataSize]); std::unique_ptr validContent(new uint8_t[testDataSize]); std::unique_ptr invalidContent1(new uint8_t[testDataSize]); std::unique_ptr invalidContent2(new uint8_t[testDataSize]); auto pTestItem = reinterpret_cast(&testItem); for (size_t offset = 0; offset < testDataSize; offset += testItemSize) { for (size_t itemOffset = 0; itemOffset < testItemSize; itemOffset++) { bufferContent.get()[offset + itemOffset] = pTestItem[itemOffset]; validContent.get()[offset + itemOffset] = pTestItem[itemOffset]; invalidContent1.get()[offset + itemOffset] = pTestItem[itemOffset]; invalidContent2.get()[offset + itemOffset] = pTestItem[itemOffset]; } } // set last item for invalid contents auto pTestItemWrong1 = reinterpret_cast(&testItemWrong1); auto pTestItemWrong2 = reinterpret_cast(&testItemWrong2); size_t offset = testDataSize - testItemSize; for (size_t itemOffset = 0; itemOffset < testItemSize; itemOffset++) { invalidContent1.get()[offset + itemOffset] = pTestItemWrong1[itemOffset]; invalidContent2.get()[offset + itemOffset] = pTestItemWrong2[itemOffset]; } MockContext context(this->pCmdQ->getDevice().getSpecializedDevice()); cl_int retVal = CL_INVALID_VALUE; std::unique_ptr buffer(Buffer::create( &context, flags, testDataSize, (usesHostPointer ? bufferContent.get() : nullptr), retVal)); EXPECT_NE(nullptr, buffer); if (!usesHostPointer) { retVal = pCmdQ->enqueueFillBuffer( buffer.get(), &testItem, testItemSize, 0, testDataSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } auto mappedAddress = clEnqueueMapBuffer(pCmdQ, buffer.get(), CL_FALSE, CL_MAP_READ, 0, testDataSize, 0, nullptr, nullptr, nullptr); clFlush(pCmdQ); retVal = clEnqueueVerifyMemoryINTEL(pCmdQ, mappedAddress, validContent.get(), testDataSize, CL_MEM_COMPARE_EQUAL); EXPECT_EQ(CL_SUCCESS, retVal); if (UnitTestHelper::isExpectMemoryNotEqualSupported()) { retVal = clEnqueueVerifyMemoryINTEL(pCmdQ, mappedAddress, invalidContent1.get(), testDataSize, CL_MEM_COMPARE_NOT_EQUAL); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueVerifyMemoryINTEL(pCmdQ, mappedAddress, invalidContent2.get(), testDataSize, CL_MEM_COMPARE_NOT_EQUAL); EXPECT_EQ(CL_SUCCESS, retVal); } clFinish(pCmdQ); } INSTANTIATE_TEST_CASE_P(VerifyMemoryBuffer, VerifyMemoryBufferHw, ::testing::Combine( ::testing::ValuesIn(testDataSizeTable), ::testing::ValuesIn(testFlagsTable))); enqueue_verify_memory_image_aub_tests.cpp000066400000000000000000000117661363734646600367430ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "opencl/source/api/api.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" using namespace NEO; struct TestOffset { size_t offset[3]; }; struct VerifyMemoryImageHw : public CommandEnqueueAUBFixture, public ::testing::TestWithParam { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); if (!pDevice->getDeviceInfo().imageSupport) { GTEST_SKIP(); } } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } }; TestOffset testInput[] = { {{0, 0, 0}}, {{1, 2, 3}}, {{3, 2, 1}}, {{5, 5, 5}}}; HWTEST_P(VerifyMemoryImageHw, givenDifferentImagesWhenValidatingMemoryThenSuccessIsReturned) { cl_image_format imageFormat; cl_image_desc imageDesc; // clang-format off imageFormat.image_channel_data_type = CL_UNSIGNED_INT32; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_type = CL_MEM_OBJECT_IMAGE3D; imageDesc.image_width = 10; imageDesc.image_height = 19; imageDesc.image_depth = 7; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // clang-format on // data per channel multplied by number of channels size_t elementSize = 16; cl_mem_flags flags = CL_MEM_READ_ONLY; auto surfaceFormat = Image:: getSurfaceFormatFromTable(flags, &imageFormat, pClDevice->getHardwareInfo().capabilityTable.clVersionSupport); auto retVal = CL_INVALID_VALUE; std::unique_ptr image(Image::create( context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); EXPECT_NE(nullptr, image); auto sizeMemory = image->getSize(); EXPECT_GT(sizeMemory, 0u); std::unique_ptr srcMemory(new uint8_t[elementSize]); memset(srcMemory.get(), 0xAB, elementSize); memset(image->getCpuAddress(), 0xAB, sizeMemory); const size_t *origin = GetParam().offset; const size_t region[] = { imageDesc.image_width - origin[0], imageDesc.image_height - origin[1], imageDesc.image_depth - origin[2]}; uint32_t fillValues[] = {0x3f800000, 0x00000000, 0x3f555555, 0x3f2aaaaa}; retVal = pCmdQ->enqueueFillImage( image.get(), fillValues, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); size_t imgOrigin[] = {0, 0, 0}; size_t imgRegion[] = {imageDesc.image_width, imageDesc.image_height, imageDesc.image_depth}; size_t mappedRowPitch; size_t mappedSlicePitch; auto mappedAddress = clEnqueueMapImage(pCmdQ, image.get(), CL_TRUE, CL_MAP_READ, imgOrigin, imgRegion, &mappedRowPitch, &mappedSlicePitch, 0, nullptr, nullptr, &retVal); auto pImageData = reinterpret_cast(mappedAddress); for (size_t z = 0; z < imageDesc.image_depth; ++z) { for (size_t y = 0; y < imageDesc.image_height; ++y) { for (size_t x = 0; x < imageDesc.image_width; ++x) { void *validData = srcMemory.get(); void *invalidData = fillValues; if (z >= origin[2] && z < (origin[2] + region[2]) && y >= origin[1] && y < (origin[1] + region[1]) && x >= origin[0] && x < (origin[0] + region[0])) { std::swap(validData, invalidData); } retVal = clEnqueueVerifyMemoryINTEL(pCmdQ, &pImageData[x * elementSize], validData, elementSize, CL_MEM_COMPARE_EQUAL); EXPECT_EQ(CL_SUCCESS, retVal); if (UnitTestHelper::isExpectMemoryNotEqualSupported()) { retVal = clEnqueueVerifyMemoryINTEL(pCmdQ, &pImageData[x * elementSize], invalidData, elementSize, CL_MEM_COMPARE_NOT_EQUAL); EXPECT_EQ(CL_SUCCESS, retVal); } } pImageData = ptrOffset(pImageData, mappedRowPitch); } pImageData = ptrOffset(pImageData, mappedSlicePitch - (mappedRowPitch * imageDesc.image_height)); } } INSTANTIATE_TEST_CASE_P(VerifyMemoryImage, VerifyMemoryImageHw, ::testing::ValuesIn(testInput)); enqueue_write_buffer_aub_tests.cpp000066400000000000000000000120341363734646600353550ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/helpers/ptr_math.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" using namespace NEO; struct WriteBufferHw : public CommandEnqueueAUBFixture, public ::testing::WithParamInterface, public ::testing::Test { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } }; typedef WriteBufferHw AUBWriteBuffer; HWTEST_P(AUBWriteBuffer, simple) { MockContext context(this->pCmdQ->getDevice().getSpecializedDevice()); cl_float *srcMemory = new float[1024]; cl_float *destMemory = new float[1024]; cl_float *zeroMemory = new float[1024]; for (int i = 0; i < 1024; i++) { srcMemory[i] = (float)i + 1.0f; destMemory[i] = 0; zeroMemory[i] = 0; } auto retVal = CL_INVALID_VALUE; auto dstBuffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, 1024 * sizeof(float), destMemory, retVal); ASSERT_NE(nullptr, dstBuffer); auto pSrcMemory = &srcMemory[0]; cl_bool blockingWrite = CL_TRUE; size_t offset = GetParam(); size_t sizeWritten = sizeof(cl_float); cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; dstBuffer->forceDisallowCPUCopy = true; retVal = pCmdQ->enqueueWriteBuffer( dstBuffer, blockingWrite, offset, sizeWritten, pSrcMemory, nullptr, numEventsInWaitList, eventWaitList, event); auto pDestMemory = reinterpret_cast((dstBuffer->getGraphicsAllocation()->getGpuAddress())); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); // Compute our memory expecations based on kernel execution size_t sizeUserMemory = 1024 * sizeof(float); auto pVal = ptrOffset(pDestMemory, offset); AUBCommandStreamFixture::expectMemory(pVal, pSrcMemory, sizeWritten); // if offset provided, check the beginning if (offset > 0) { AUBCommandStreamFixture::expectMemory(pDestMemory, zeroMemory, offset); } // If the copykernel wasn't max sized, ensure we didn't overwrite existing memory if (offset + sizeWritten < sizeUserMemory) { pDestMemory = ptrOffset(pVal, sizeWritten); size_t sizeRemaining = sizeUserMemory - sizeWritten - offset; AUBCommandStreamFixture::expectMemory(pDestMemory, zeroMemory, sizeRemaining); } delete dstBuffer; delete[] srcMemory; delete[] destMemory; delete[] zeroMemory; } INSTANTIATE_TEST_CASE_P(AUBWriteBuffer_simple, AUBWriteBuffer, ::testing::Values( 0 * sizeof(cl_float), 1 * sizeof(cl_float), 2 * sizeof(cl_float), 3 * sizeof(cl_float))); struct AUBWriteBufferUnaligned : public CommandEnqueueAUBFixture, public ::testing::Test { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } template void testWriteBufferUnaligned(size_t offset, size_t size) { MockContext context(pCmdQ->getDevice().getSpecializedDevice()); char srcMemory[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; const auto bufferSize = sizeof(srcMemory); char dstMemory[bufferSize] = {0}; auto retVal = CL_INVALID_VALUE; auto buffer = std::unique_ptr(Buffer::create( &context, CL_MEM_USE_HOST_PTR, bufferSize, dstMemory, retVal)); ASSERT_NE(nullptr, buffer); buffer->forceDisallowCPUCopy = true; // Do unaligned write retVal = pCmdQ->enqueueWriteBuffer( buffer.get(), CL_TRUE, offset, size, ptrOffset(srcMemory, offset), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // Check the memory auto bufferGPUPtr = reinterpret_cast((buffer->getGraphicsAllocation()->getGpuAddress())); AUBCommandStreamFixture::expectMemory(ptrOffset(bufferGPUPtr, offset), ptrOffset(srcMemory, offset), size); } }; HWTEST_F(AUBWriteBufferUnaligned, all) { const std::vector offsets = {0, 1, 2, 3}; const std::vector sizes = {4, 3, 2, 1}; for (auto offset : offsets) { for (auto size : sizes) { testWriteBufferUnaligned(offset, size); } } } enqueue_write_buffer_rect_aub_tests.cpp000066400000000000000000000132031363734646600363710ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" using namespace NEO; struct WriteBufferRectHw : public CommandEnqueueAUBFixture, public ::testing::WithParamInterface>, public ::testing::Test { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } }; typedef WriteBufferRectHw AUBWriteBufferRect; static const size_t width = 10; HWTEST_P(AUBWriteBufferRect, simple3D) { MockContext context(this->pClDevice); size_t rowPitch = width; size_t slicePitch = rowPitch * rowPitch; size_t bufferSizeBuff = rowPitch * rowPitch * rowPitch; size_t bufferSize = alignUp(bufferSizeBuff, 4096); size_t zHostOffs; size_t zBuffOffs; std::tie(zBuffOffs, zHostOffs) = GetParam(); ASSERT_LT(zBuffOffs, width); ASSERT_LT(zHostOffs, width); uint8_t *srcMemory = (uint8_t *)::alignedMalloc(bufferSize, 4096); uint8_t *destMemory = (uint8_t *)::alignedMalloc(bufferSize, 4096); for (unsigned int i = 0; i < bufferSize; i++) srcMemory[i] = i; memset(destMemory, 0x00, bufferSize); auto retVal = CL_INVALID_VALUE; auto dstBuffer = std::unique_ptr(Buffer::create( &context, CL_MEM_USE_HOST_PTR, bufferSize, destMemory, retVal)); ASSERT_NE(nullptr, dstBuffer); uint8_t *pDestMemory = (uint8_t *)dstBuffer->getGraphicsAllocation()->getGpuAddress(); cl_bool blockingWrite = CL_TRUE; size_t bufferOrigin[] = {0, 0, zBuffOffs}; size_t hostOrigin[] = {0, 0, zHostOffs}; size_t region[] = {rowPitch, rowPitch, 1}; retVal = pCmdQ->enqueueWriteBufferRect( dstBuffer.get(), blockingWrite, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, srcMemory, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); char *ptr = new char[slicePitch]; memset(ptr, 0, slicePitch); for (unsigned int i = 0; i < rowPitch; i++) { //one slice will be copied from src. all others should be zeros if (i == zBuffOffs) { AUBCommandStreamFixture::expectMemory(pDestMemory + slicePitch * i, srcMemory + slicePitch * zHostOffs, slicePitch); } else { AUBCommandStreamFixture::expectMemory(pDestMemory + slicePitch * i, ptr, slicePitch); } } delete[] ptr; ::alignedFree(srcMemory); ::alignedFree(destMemory); } INSTANTIATE_TEST_CASE_P(AUBWriteBufferRect_simple, AUBWriteBufferRect, ::testing::Combine( ::testing::Values(0, 1, 2, 3, 4), ::testing::Values(0, 1, 2, 3, 4))); struct AUBWriteBufferRectUnaligned : public CommandEnqueueAUBFixture, public ::testing::Test { void SetUp() override { CommandEnqueueAUBFixture::SetUp(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } template void testWriteBufferUnaligned(size_t offset, size_t size) { MockContext context(pCmdQ->getDevice().getSpecializedDevice()); char srcMemory[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; const auto bufferSize = sizeof(srcMemory); char dstMemory[bufferSize] = {0}; char referenceMemory[bufferSize] = {0}; auto retVal = CL_INVALID_VALUE; auto buffer = std::unique_ptr(Buffer::create( &context, CL_MEM_COPY_HOST_PTR, bufferSize, dstMemory, retVal)); ASSERT_NE(nullptr, buffer); buffer->forceDisallowCPUCopy = true; uint8_t *pDestMemory = (uint8_t *)buffer->getGraphicsAllocation()->getGpuAddress(); cl_bool blockingWrite = CL_TRUE; size_t rowPitch = bufferSize / 4; size_t slicePitch = 4 * rowPitch; size_t bufferOrigin[] = {0, 1, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {size, 1, 1}; retVal = pCmdQ->enqueueWriteBufferRect( buffer.get(), blockingWrite, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptrOffset(srcMemory, offset), 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); pCmdQ->finish(); AUBCommandStreamFixture::expectMemory(pDestMemory, referenceMemory, rowPitch); AUBCommandStreamFixture::expectMemory(pDestMemory + rowPitch * bufferOrigin[1], ptrOffset(srcMemory, offset), size); AUBCommandStreamFixture::expectMemory(pDestMemory + rowPitch * bufferOrigin[1] + size, referenceMemory, bufferSize - size - rowPitch); } }; HWTEST_F(AUBWriteBufferRectUnaligned, misalignedHostPtr) { const std::vector offsets = {0, 1, 2, 3}; const std::vector sizes = {4, 3, 2, 1}; for (auto offset : offsets) { for (auto size : sizes) { testWriteBufferUnaligned(offset, size); } } } enqueue_write_copy_read_buffer_aub_tests.cpp000066400000000000000000000074431363734646600374120ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/aub_tests/command_queue/enqueue_write_copy_read_buffer_aub_tests.h" #include "shared/source/memory_manager/allocations_list.h" #include "test.h" using namespace NEO; template void AubWriteCopyReadBuffer::runTest() { auto simulatedCsr = AUBFixture::getSimulatedCsr(); char srcMemoryInitial[] = {1, 2, 3, 4, 5, 6, 7, 8}; char dstMemoryInitial[] = {11, 12, 13, 14, 15, 16, 17, 18}; char srcMemoryToWrite[] = {1, 2, 3, 4, 5, 6, 7, 8}; char dstMemoryToWrite[] = {11, 12, 13, 14, 15, 16, 17, 18}; const size_t bufferSize = sizeof(srcMemoryInitial); static_assert(bufferSize == sizeof(dstMemoryInitial), ""); static_assert(bufferSize == sizeof(srcMemoryToWrite), ""); static_assert(bufferSize == sizeof(dstMemoryToWrite), ""); auto retVal = CL_INVALID_VALUE; auto srcBuffer = std::unique_ptr(Buffer::create( context, CL_MEM_COPY_HOST_PTR, bufferSize, srcMemoryInitial, retVal)); ASSERT_NE(nullptr, srcBuffer); auto dstBuffer = std::unique_ptr(Buffer::create( context, CL_MEM_COPY_HOST_PTR, bufferSize, dstMemoryInitial, retVal)); ASSERT_NE(nullptr, dstBuffer); simulatedCsr->writeMemory(*srcBuffer->getGraphicsAllocation()); simulatedCsr->writeMemory(*dstBuffer->getGraphicsAllocation()); expectMemory(AUBFixture::getGpuPointer(srcBuffer->getGraphicsAllocation()), srcMemoryInitial, bufferSize); expectMemory(AUBFixture::getGpuPointer(dstBuffer->getGraphicsAllocation()), dstMemoryInitial, bufferSize); cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; retVal = pCmdQ->enqueueWriteBuffer( srcBuffer.get(), true, 0, bufferSize, srcMemoryToWrite, nullptr, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->enqueueWriteBuffer( dstBuffer.get(), true, 0, bufferSize, dstMemoryToWrite, nullptr, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_SUCCESS, retVal); expectMemory(AUBFixture::getGpuPointer(srcBuffer->getGraphicsAllocation()), srcMemoryToWrite, bufferSize); expectMemory(AUBFixture::getGpuPointer(dstBuffer->getGraphicsAllocation()), dstMemoryToWrite, bufferSize); retVal = pCmdQ->enqueueCopyBuffer( srcBuffer.get(), dstBuffer.get(), 0, 0, bufferSize, numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); // Destination buffer should have src buffer content expectMemory(AUBFixture::getGpuPointer(dstBuffer->getGraphicsAllocation()), srcMemoryToWrite, bufferSize); char hostPtrMemory[] = {0, 0, 0, 0, 0, 0, 0, 0}; ASSERT_EQ(bufferSize, sizeof(hostPtrMemory)); retVal = pCmdQ->enqueueReadBuffer( dstBuffer.get(), false, 0, bufferSize, hostPtrMemory, nullptr, numEventsInWaitList, eventWaitList, event); pCmdQ->flush(); GraphicsAllocation *allocation = csr->getTemporaryAllocations().peekHead(); while (allocation && allocation->getUnderlyingBuffer() != hostPtrMemory) { allocation = allocation->next; } expectMemory(AUBFixture::getGpuPointer(allocation), srcMemoryToWrite, bufferSize); } HWTEST_F(AubWriteCopyReadBuffer, givenTwoBuffersFilledWithPatternWhenSourceIsCopiedToDestinationThenDestinationDataValidates) { runTest(); } enqueue_write_copy_read_buffer_aub_tests.h000066400000000000000000000012011363734646600370410ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/ptr_math.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h" using namespace NEO; struct AubWriteCopyReadBuffer : public AUBFixture, public ::testing::Test { void SetUp() override { AUBFixture::SetUp(nullptr); } void TearDown() override { AUBFixture::TearDown(); } template void runTest(); }; enqueue_write_image_aub_tests.cpp000066400000000000000000000211721363734646600351710ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_queue/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/aub_tests/command_queue/enqueue_read_write_image_aub_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" using namespace NEO; struct WriteImageParams { cl_mem_object_type imageType; size_t offsets[3]; } writeImageParams[] = { {CL_MEM_OBJECT_IMAGE1D, {0u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE1D, {1u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE2D, {0u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE2D, {1u, 2u, 0u}}, {CL_MEM_OBJECT_IMAGE3D, {0u, 0u, 0u}}, {CL_MEM_OBJECT_IMAGE3D, {1u, 2u, 3u}}, }; struct AUBWriteImage : public CommandDeviceFixture, public AUBCommandStreamFixture, public ::testing::WithParamInterface>, public ::testing::Test { typedef AUBCommandStreamFixture CommandStreamFixture; using AUBCommandStreamFixture::SetUp; void SetUp() override { if (!(defaultHwInfo->capabilityTable.supportsImages)) { GTEST_SKIP(); } CommandDeviceFixture::SetUp(cl_command_queue_properties(0)); CommandStreamFixture::SetUp(pCmdQ); context = std::make_unique(pClDevice); } void TearDown() override { dstImage.reset(); context.reset(); CommandStreamFixture::TearDown(); CommandDeviceFixture::TearDown(); } std::unique_ptr context; std::unique_ptr dstImage; }; HWTEST_P(AUBWriteImage, simpleUnalignedMemory) { const unsigned int testWidth = 5; const unsigned int testHeight = std::get<2>(GetParam()).imageType != CL_MEM_OBJECT_IMAGE1D ? 5 : 1; const unsigned int testDepth = std::get<2>(GetParam()).imageType == CL_MEM_OBJECT_IMAGE3D ? 5 : 1; auto numPixels = testWidth * testHeight * testDepth; cl_image_format imageFormat; cl_image_desc imageDesc; // clang-format off imageFormat.image_channel_data_type = std::get<0>(GetParam()); imageFormat.image_channel_order = std::get<1>(GetParam()); imageDesc.image_type = std::get<2>(GetParam()).imageType; imageDesc.image_width = testWidth; imageDesc.image_height = testHeight; imageDesc.image_depth = testDepth; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // clang-format on auto perChannelDataSize = 0; switch (imageFormat.image_channel_data_type) { case CL_UNORM_INT8: perChannelDataSize = 1; break; case CL_SIGNED_INT16: case CL_HALF_FLOAT: perChannelDataSize = 2; break; case CL_UNSIGNED_INT32: case CL_FLOAT: perChannelDataSize = 4; break; } auto numChannels = 0u; switch (imageFormat.image_channel_order) { case CL_R: numChannels = 1; break; case CL_RG: numChannels = 2; break; case CL_RGBA: numChannels = 4; break; } size_t elementSize = perChannelDataSize * numChannels; // Generate initial src memory but make it unaligned to // stress test enqueueWriteImage logic auto srcMemoryAligned = alignedMalloc(1 + elementSize * numPixels, 0x1000); auto srcMemoryUnaligned = ptrOffset(reinterpret_cast(srcMemoryAligned), 1); for (unsigned i = 0; i < numPixels * elementSize; ++i) { uint8_t origValue = i; memcpy(srcMemoryUnaligned + i, &origValue, sizeof(origValue)); } // Initialize dest memory auto sizeMemory = testWidth * testHeight * testDepth * elementSize; auto dstMemory = new (std::nothrow) uint8_t[sizeMemory]; ASSERT_NE(nullptr, dstMemory); memset(dstMemory, 0xff, sizeMemory); auto retVal = CL_INVALID_VALUE; cl_mem_flags flags = 0; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); dstImage.reset(Image::create( context.get(), MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, dstImage.get()); memset(dstImage->getCpuAddress(), 0xFF, dstImage->getSize()); // init image - avoid writeImage inside createImage (for tiled img) auto origin = std::get<2>(GetParam()).offsets; // Only draw 1/4 of the original image const size_t region[3] = {std::max(testWidth / 2, 1u), std::max(testHeight / 2, 1u), std::max(testDepth / 2, 1u)}; // Offset the source memory auto pSrcMemory = ptrOffset(srcMemoryUnaligned, (origin[1] * testWidth + origin[0]) * elementSize); size_t inputRowPitch = testWidth * elementSize; size_t inputSlicePitch = inputRowPitch * testHeight; retVal = pCmdQ->enqueueWriteImage( dstImage.get(), CL_TRUE, origin, region, inputRowPitch, inputSlicePitch, pSrcMemory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto readMemory = new uint8_t[dstImage->getSize()]; size_t imgOrigin[] = {0, 0, 0}; size_t imgRegion[] = {testWidth, testHeight, testDepth}; retVal = pCmdQ->enqueueReadImage(dstImage.get(), CL_FALSE, imgOrigin, imgRegion, 0, 0, readMemory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->flush(); EXPECT_EQ(CL_SUCCESS, retVal); auto pDstMemory = readMemory; auto pSrc = pSrcMemory; auto rowPitch = dstImage->getHostPtrRowPitch(); auto slicePitch = dstImage->getHostPtrSlicePitch(); for (size_t z = 0; z < testDepth; ++z) { for (size_t y = 0; y < testHeight; ++y) { for (size_t x = 0; x < testWidth; ++x) { auto pos = x * elementSize; if (z >= origin[2] && z < (origin[2] + region[2]) && y >= origin[1] && y < (origin[1] + region[1]) && x >= origin[0] && x < (origin[0] + region[0])) { // this texel should be updated AUBCommandStreamFixture::expectMemory(&pDstMemory[pos], pSrc, elementSize); pSrc = ptrOffset(pSrc, elementSize); } else { AUBCommandStreamFixture::expectMemory(&pDstMemory[pos], dstMemory, elementSize); } } pDstMemory = ptrOffset(pDstMemory, rowPitch); if (y >= origin[1] && y < origin[1] + region[1] && z >= origin[2] && z < origin[2] + region[2]) { pSrc = ptrOffset(pSrc, inputRowPitch - (elementSize * region[0])); } } pDstMemory = ptrOffset(pDstMemory, slicePitch - (rowPitch * (testHeight > 0 ? testHeight : 1))); if (z >= origin[2] && z < origin[2] + region[2]) { pSrc = ptrOffset(pSrc, inputSlicePitch - (inputRowPitch * (region[1]))); } } alignedFree(srcMemoryAligned); delete[] dstMemory; delete[] readMemory; } INSTANTIATE_TEST_CASE_P(AUBWriteImage_simple, AUBWriteImage, ::testing::Combine(::testing::Values( // formats CL_UNORM_INT8, CL_SIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT), ::testing::Values( // channels CL_R, CL_RG, CL_RGBA), ::testing::ValuesIn(writeImageParams))); using AUBWriteImageUnaligned = AUBImageUnaligned; HWTEST_F(AUBWriteImageUnaligned, misalignedHostPtr) { const std::vector pixelSizes = {1, 2, 4}; const std::vector offsets = {0, 1, 2, 3}; const std::vector sizes = {3, 2, 1}; for (auto pixelSize : pixelSizes) { for (auto offset : offsets) { for (auto size : sizes) { testWriteImageUnaligned(offset, size, pixelSize); } } } } compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_stream/000077500000000000000000000000001363734646600266145ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_stream/CMakeLists.txt000066400000000000000000000007321363734646600313560ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_mem_dump_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_mem_dump_tests.cpp ) add_subdirectories() aub_command_stream_fixture.cpp000066400000000000000000000041341363734646600346310ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_stream/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/unit_test/helpers/memory_management.h" #include "shared/test/unit_test/tests_configuration.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/command_stream/tbx_command_stream_receiver.h" #include "opencl/test/unit_test/gen_common/gen_cmd_parse.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "gtest/gtest.h" namespace NEO { void AUBCommandStreamFixture::SetUp(CommandQueue *pCmdQ) { ASSERT_NE(pCmdQ, nullptr); auto &device = reinterpret_cast(pCmdQ->getDevice()); const auto &hwInfo = device.getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); const ::testing::TestInfo *const testInfo = ::testing::UnitTest::GetInstance()->current_test_info(); std::stringstream strfilename; auto engineType = pCmdQ->getGpgpuCommandStreamReceiver().getOsContext().getEngineType(); strfilename << testInfo->test_case_name() << "_" << testInfo->name() << "_" << hwHelper.getCsTraits(engineType).name; if (testMode == TestMode::AubTestsWithTbx) { pCommandStreamReceiver = TbxCommandStreamReceiver::create(strfilename.str(), true, *device.executionEnvironment, device.getRootDeviceIndex()); } else { pCommandStreamReceiver = AUBCommandStreamReceiver::create(strfilename.str(), true, *device.executionEnvironment, device.getRootDeviceIndex()); } ASSERT_NE(nullptr, pCommandStreamReceiver); device.resetCommandStreamReceiver(pCommandStreamReceiver); CommandStreamFixture::SetUp(pCmdQ); pTagMemory = pCommandStreamReceiver->getTagAddress(); this->commandQueue = pCmdQ; } void AUBCommandStreamFixture::TearDown() { CommandStreamFixture::TearDown(); } } // namespace NEO aub_command_stream_fixture.h000066400000000000000000000113101363734646600342700ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_stream/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/unit_test/tests_configuration.h" #include "opencl/source/aub_mem_dump/aub_mem_dump.h" #include "opencl/source/aub_mem_dump/page_table_entry_bits.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" #include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.h" #include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h" #include "opencl/source/memory_manager/memory_banks.h" #include "opencl/test/unit_test/command_stream/command_stream_fixture.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include namespace NEO { class CommandStreamReceiver; class AUBCommandStreamFixture : public CommandStreamFixture { public: virtual void SetUp(CommandQueue *pCommandQueue); void TearDown() override; template AUBCommandStreamReceiverHw *getAubCsr() const { CommandStreamReceiver *csr = pCommandStreamReceiver; if (testMode == TestMode::AubTestsWithTbx) { csr = static_cast> *>(csr)->aubCSR.get(); } return static_cast *>(csr); } template void expectMMIO(uint32_t mmioRegister, uint32_t expectedValue) { CommandStreamReceiver *csr = pCommandStreamReceiver; if (testMode == TestMode::AubTestsWithTbx) { csr = static_cast> *>(pCommandStreamReceiver)->aubCSR.get(); } if (csr) { // Write our pseudo-op to the AUB file auto aubCsr = static_cast *>(csr); aubCsr->expectMMIO(mmioRegister, expectedValue); } } template void expectMemory(void *gfxAddress, const void *srcAddress, size_t length) { CommandStreamReceiver *csr = pCommandStreamReceiver; if (testMode == TestMode::AubTestsWithTbx) { auto tbxCsr = static_cast *>(pCommandStreamReceiver); tbxCsr->expectMemoryEqual(gfxAddress, srcAddress, length); csr = static_cast> *>(pCommandStreamReceiver)->aubCSR.get(); } if (csr) { auto aubCsr = static_cast *>(csr); aubCsr->expectMemoryEqual(gfxAddress, srcAddress, length); } } template void expectMemoryNotEqual(void *gfxAddress, const void *srcAddress, size_t length) { CommandStreamReceiver *csr = pCommandStreamReceiver; if (testMode == TestMode::AubTestsWithTbx) { auto tbxCsr = static_cast *>(pCommandStreamReceiver); tbxCsr->expectMemoryNotEqual(gfxAddress, srcAddress, length); csr = static_cast> *>(pCommandStreamReceiver)->aubCSR.get(); } if (csr) { auto aubCsr = static_cast *>(csr); aubCsr->expectMemoryNotEqual(gfxAddress, srcAddress, length); } } template CommandStreamReceiverSimulatedCommonHw *getSimulatedCsr() const { return static_cast *>(pCommandStreamReceiver); } template void pollForCompletion() { getSimulatedCsr()->pollForCompletion(); } GraphicsAllocation *createResidentAllocationAndStoreItInCsr(const void *address, size_t size) { GraphicsAllocation *graphicsAllocation = pCommandStreamReceiver->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pCommandStreamReceiver->getRootDeviceIndex(), false, size}, address); pCommandStreamReceiver->makeResidentHostPtrAllocation(graphicsAllocation); pCommandStreamReceiver->getInternalAllocationStorage()->storeAllocation(std::unique_ptr(graphicsAllocation), TEMPORARY_ALLOCATION); return graphicsAllocation; } CommandStreamReceiver *pCommandStreamReceiver = nullptr; volatile uint32_t *pTagMemory = nullptr; private: CommandQueue *commandQueue = nullptr; }; } // namespace NEO aub_command_stream_tests.cpp000066400000000000000000000123751363734646600343130ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_stream/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/os_interface/os_context.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/gen_common/gen_cmd_parse.h" #include "test.h" #include "aub_command_stream_fixture.h" #include using namespace NEO; struct AUBFixture : public AUBCommandStreamFixture, public CommandQueueFixture, public DeviceFixture { using AUBCommandStreamFixture::SetUp; using CommandQueueFixture::SetUp; void SetUp() { DeviceFixture::SetUp(); CommandQueueFixture::SetUp(nullptr, pClDevice, 0); AUBCommandStreamFixture::SetUp(pCmdQ); } void TearDown() override { AUBCommandStreamFixture::TearDown(); CommandQueueFixture::TearDown(); DeviceFixture::TearDown(); } template void testNoopIdXcs(aub_stream::EngineType engineType) { pCommandStreamReceiver->getOsContext().getEngineType() = engineType; typedef typename FamilyType::MI_NOOP MI_NOOP; auto pCmd = (MI_NOOP *)pCS->getSpace(sizeof(MI_NOOP) * 4); uint32_t noopId = 0xbaadd; auto noop = FamilyType::cmdInitNoop; *pCmd++ = noop; *pCmd++ = noop; *pCmd++ = noop; noop.TheStructure.Common.IdentificationNumberRegisterWriteEnable = true; noop.TheStructure.Common.IdentificationNumber = noopId; *pCmd++ = noop; CommandStreamReceiverHw::addBatchBufferEnd(*pCS, nullptr); CommandStreamReceiverHw::alignToCacheLine(*pCS); BatchBuffer batchBuffer{pCS->getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, pCS->getUsed(), pCS, nullptr}; ResidencyContainer allocationsForResidency; pCommandStreamReceiver->flush(batchBuffer, allocationsForResidency); AUBCommandStreamFixture::getSimulatedCsr()->pollForCompletionImpl(); auto mmioBase = CommandStreamReceiverSimulatedCommonHw::getCsTraits(engineType).mmioBase; AUBCommandStreamFixture::expectMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2094), noopId); } }; typedef Test AUBcommandstreamTests; HWTEST_F(AUBcommandstreamTests, testFlushTwice) { CommandStreamReceiverHw::addBatchBufferEnd(*pCS, nullptr); CommandStreamReceiverHw::alignToCacheLine(*pCS); BatchBuffer batchBuffer{pCS->getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, pCS->getUsed(), pCS, nullptr}; ResidencyContainer allocationsForResidency; pCommandStreamReceiver->flush(batchBuffer, allocationsForResidency); BatchBuffer batchBuffer2{pCS->getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, pCS->getUsed(), pCS, nullptr}; ResidencyContainer allocationsForResidency2; pCommandStreamReceiver->flush(batchBuffer2, allocationsForResidency); AUBCommandStreamFixture::getSimulatedCsr()->pollForCompletion(); } HWTEST_F(AUBcommandstreamTests, testNoopIdRcs) { testNoopIdXcs(aub_stream::ENGINE_RCS); } HWTEST_F(AUBcommandstreamTests, testNoopIdBcs) { testNoopIdXcs(aub_stream::ENGINE_BCS); } HWTEST_F(AUBcommandstreamTests, testNoopIdVcs) { testNoopIdXcs(aub_stream::ENGINE_VCS); } HWTEST_F(AUBcommandstreamTests, testNoopIdVecs) { testNoopIdXcs(aub_stream::ENGINE_VECS); } TEST_F(AUBcommandstreamTests, makeResident) { uint8_t buffer[0x10000]; size_t size = sizeof(buffer); auto &commandStreamReceiver = pDevice->getGpgpuCommandStreamReceiver(); auto graphicsAllocation = createResidentAllocationAndStoreItInCsr(buffer, size); ResidencyContainer allocationsForResidency = {graphicsAllocation}; commandStreamReceiver.processResidency(allocationsForResidency, 0u); } HWTEST_F(AUBcommandstreamTests, expectMemorySingle) { uint32_t buffer = 0xdeadbeef; size_t size = sizeof(buffer); auto graphicsAllocation = createResidentAllocationAndStoreItInCsr(&buffer, size); ResidencyContainer allocationsForResidency = {graphicsAllocation}; pCommandStreamReceiver->processResidency(allocationsForResidency, 0u); AUBCommandStreamFixture::expectMemory(&buffer, &buffer, size); } HWTEST_F(AUBcommandstreamTests, expectMemoryLarge) { size_t sizeBuffer = 0x100001; auto buffer = new uint8_t[sizeBuffer]; for (size_t index = 0; index < sizeBuffer; ++index) { buffer[index] = static_cast(index); } auto graphicsAllocation = createResidentAllocationAndStoreItInCsr(buffer, sizeBuffer); ResidencyContainer allocationsForResidency = {graphicsAllocation}; pCommandStreamReceiver->processResidency(allocationsForResidency, 0u); AUBCommandStreamFixture::expectMemory(buffer, buffer, sizeBuffer); delete[] buffer; } compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_stream/aub_mem_dump_tests.cpp000066400000000000000000000237121363734646600332010ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "aub_mem_dump_tests.h" #include "shared/source/helpers/hw_helper.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/aub/aub_helper.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_aub_csr.h" using NEO::AUBCommandStreamReceiver; using NEO::AUBCommandStreamReceiverHw; using NEO::AUBFamilyMapper; using NEO::DeviceFixture; using NEO::folderAUB; std::string getAubFileName(const NEO::Device *pDevice, const std::string baseName) { const auto pGtSystemInfo = &pDevice->getHardwareInfo().gtSystemInfo; std::stringstream strfilename; uint32_t subSlicesPerSlice = pGtSystemInfo->SubSliceCount / pGtSystemInfo->SliceCount; strfilename << hardwarePrefix[pDevice->getHardwareInfo().platform.eProductFamily] << "_" << pGtSystemInfo->SliceCount << "x" << subSlicesPerSlice << "x" << pGtSystemInfo->MaxEuPerSubSlice << "_" << baseName; return strfilename.str(); } TEST(PageTableTraits, when48BitTraitsAreUsedThenPageTableAddressesAreCorrect) { EXPECT_EQ(BIT(32), AubMemDump::PageTableTraits<48>::ptBaseAddress); EXPECT_EQ(BIT(31), AubMemDump::PageTableTraits<48>::pdBaseAddress); EXPECT_EQ(BIT(30), AubMemDump::PageTableTraits<48>::pdpBaseAddress); EXPECT_EQ(BIT(29), AubMemDump::PageTableTraits<48>::pml4BaseAddress); } TEST(PageTableTraits, when32BitTraitsAreUsedThenPageTableAddressesAreCorrect) { EXPECT_EQ(BIT(38), AubMemDump::PageTableTraits<32>::ptBaseAddress); EXPECT_EQ(BIT(37), AubMemDump::PageTableTraits<32>::pdBaseAddress); EXPECT_EQ(BIT(36), AubMemDump::PageTableTraits<32>::pdpBaseAddress); } typedef Test AubMemDumpTests; HWTEST_F(AubMemDumpTests, givenAubFileStreamWhenOpenAndCloseIsCalledThenFileNameIsReportedCorrectly) { AUBCommandStreamReceiver::AubFileStream aubFile; std::string fileName = "file_name.aub"; aubFile.open(fileName.c_str()); EXPECT_STREQ(fileName.c_str(), aubFile.getFileName().c_str()); aubFile.close(); EXPECT_STREQ("", aubFile.getFileName().c_str()); } HWTEST_F(AubMemDumpTests, testHeader) { typedef typename AUBFamilyMapper::AUB AUB; std::string filePath(folderAUB); filePath.append(Os::fileSeparator); filePath.append(getAubFileName(pDevice, "header.aub")); AUBCommandStreamReceiver::AubFileStream aubFile; aubFile.fileHandle.open(filePath.c_str(), std::ofstream::binary); // Header auto deviceId = pDevice->getHardwareInfo().capabilityTable.aubDeviceId; aubFile.init(AubMemDump::SteppingValues::A, deviceId); aubFile.fileHandle.close(); } HWTEST_F(AubMemDumpTests, reserveMaxAddress) { typedef typename AUBFamilyMapper::AUB AUB; std::string filePath(folderAUB); filePath.append(Os::fileSeparator); filePath.append(getAubFileName(pDevice, "reserveMaxAddress.aub")); AUBCommandStreamReceiver::AubFileStream aubFile; aubFile.fileHandle.open(filePath.c_str(), std::ofstream::binary); // Header auto hwInfo = pDevice->getHardwareInfo(); auto deviceId = hwInfo.capabilityTable.aubDeviceId; aubFile.init(AubMemDump::SteppingValues::A, deviceId); auto gAddress = static_cast(-1) - 4096; auto pAddress = static_cast(gAddress) & 0xFFFFFFFF; auto enableLocalMemory = HwHelper::get(hwInfo.platform.eRenderCoreFamily).getEnableLocalMemory(hwInfo); NEO::AubHelperHw aubHelperHw(enableLocalMemory); AUB::reserveAddressPPGTT(aubFile, gAddress, 4096, pAddress, 7, aubHelperHw); aubFile.fileHandle.close(); } HWTEST_F(AubMemDumpTests, DISABLED_writeVerifyOneBytePPGTT) { typedef typename AUBFamilyMapper::AUB AUB; std::string filePath(folderAUB); filePath.append(Os::fileSeparator); filePath.append(getAubFileName(pDevice, "writeVerifyOneBytePPGTT.aub")); AUBCommandStreamReceiver::AubFileStream aubFile; aubFile.fileHandle.open(filePath.c_str(), std::ofstream::binary); // Header auto deviceId = pDevice->getHardwareInfo().capabilityTable.aubDeviceId; aubFile.init(AubMemDump::SteppingValues::A, deviceId); uint8_t byte = 0xbf; auto gAddress = reinterpret_cast(&byte); uint64_t physAddress = reinterpret_cast(&byte) & 0xFFFFFFFF; NEO::AubHelperHw aubHelperHw(false); AUB::reserveAddressPPGTT(aubFile, gAddress, sizeof(byte), physAddress, 7, aubHelperHw); AUB::addMemoryWrite(aubFile, physAddress, &byte, sizeof(byte), AubMemDump::AddressSpaceValues::TraceNonlocal); aubFile.expectMemory(physAddress, &byte, sizeof(byte), AubMemDump::AddressSpaceValues::TraceNonlocal, AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual); aubFile.fileHandle.close(); } HWTEST_F(AubMemDumpTests, writeVerifyOneByteGGTT) { typedef typename AUBFamilyMapper::AUB AUB; std::string filePath(folderAUB); filePath.append(Os::fileSeparator); filePath.append(getAubFileName(pDevice, "writeVerifyOneByteGGTT.aub")); AUBCommandStreamReceiver::AubFileStream aubFile; aubFile.fileHandle.open(filePath.c_str(), std::ofstream::binary); // Header auto deviceId = pDevice->getHardwareInfo().capabilityTable.aubDeviceId; aubFile.init(AubMemDump::SteppingValues::A, deviceId); uint8_t byte = 0xbf; uint64_t physAddress = reinterpret_cast(&byte) & 0xFFFFFFFF; AubGTTData data = {true, false}; AUB::reserveAddressGGTT(aubFile, &byte, sizeof(byte), physAddress, data); AUB::addMemoryWrite(aubFile, physAddress, &byte, sizeof(byte), AubMemDump::AddressSpaceValues::TraceNonlocal); aubFile.expectMemory(physAddress, &byte, sizeof(byte), AubMemDump::AddressSpaceValues::TraceNonlocal, AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual); aubFile.fileHandle.close(); } HWTEST_F(AubMemDumpTests, writeVerifySevenBytesPPGTT) { typedef typename AUBFamilyMapper::AUB AUB; std::string filePath(folderAUB); filePath.append(Os::fileSeparator); filePath.append(getAubFileName(pDevice, "writeVerifySevenBytesPPGTT.aub")); AUBCommandStreamReceiver::AubFileStream aubFile; aubFile.fileHandle.open(filePath.c_str(), std::ofstream::binary); // Header auto deviceId = pDevice->getHardwareInfo().capabilityTable.aubDeviceId; aubFile.init(AubMemDump::SteppingValues::A, deviceId); uint8_t bytes[] = {0, 1, 2, 3, 4, 5, 6}; auto gAddress = reinterpret_cast(bytes); auto physAddress = reinterpret_cast(bytes) & 0xFFFFFFFF; NEO::AubHelperHw aubHelperHw(false); AUB::reserveAddressPPGTT(aubFile, gAddress, sizeof(bytes), physAddress, 7, aubHelperHw); AUB::addMemoryWrite(aubFile, physAddress, bytes, sizeof(bytes), AubMemDump::AddressSpaceValues::TraceNonlocal); aubFile.expectMemory(physAddress, bytes, sizeof(bytes), AubMemDump::AddressSpaceValues::TraceNonlocal, AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual); aubFile.fileHandle.close(); } HWTEST_F(AubMemDumpTests, writeVerifySevenBytesGGTT) { typedef typename AUBFamilyMapper::AUB AUB; std::string filePath(folderAUB); filePath.append(Os::fileSeparator); filePath.append(getAubFileName(pDevice, "writeVerifySevenBytesGGTT.aub")); AUBCommandStreamReceiver::AubFileStream aubFile; aubFile.fileHandle.open(filePath.c_str(), std::ofstream::binary); // Header auto deviceId = pDevice->getHardwareInfo().capabilityTable.aubDeviceId; aubFile.init(AubMemDump::SteppingValues::A, deviceId); uint8_t bytes[] = {0, 1, 2, 3, 4, 5, 6}; uint64_t physAddress = reinterpret_cast(bytes) & 0xFFFFFFFF; AubGTTData data = {true, false}; AUB::reserveAddressGGTT(aubFile, bytes, sizeof(bytes), physAddress, data); AUB::addMemoryWrite(aubFile, physAddress, bytes, sizeof(bytes), AubMemDump::AddressSpaceValues::TraceNonlocal); aubFile.expectMemory(physAddress, bytes, sizeof(bytes), AubMemDump::AddressSpaceValues::TraceNonlocal, AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual); aubFile.fileHandle.close(); } HWTEST_F(AubMemDumpTests, simpleRCS) { setupAUB(pDevice, aub_stream::ENGINE_RCS); } HWTEST_F(AubMemDumpTests, simpleBCS) { setupAUB(pDevice, aub_stream::ENGINE_BCS); } HWTEST_F(AubMemDumpTests, simpleVCS) { setupAUB(pDevice, aub_stream::ENGINE_VCS); } HWTEST_F(AubMemDumpTests, simpleVECS) { setupAUB(pDevice, aub_stream::ENGINE_VECS); } TEST(AubMemDumpBasic, givenDebugOverrideMmioWhenMmioNotMatchThenDoNotAlterValue) { DebugManagerStateRestore dbgRestore; uint32_t dbgOffset = 0x1000; uint32_t dbgValue = 0xDEAD; DebugManager.flags.AubDumpOverrideMmioRegister.set(static_cast(dbgOffset)); DebugManager.flags.AubDumpOverrideMmioRegisterValue.set(static_cast(dbgValue)); uint32_t offset = 0x2000; uint32_t value = 0x3000; MMIOPair mmio = std::make_pair(offset, value); MockAubFileStreamMockMmioWrite mockAubStream; mockAubStream.writeMMIO(offset, value); EXPECT_EQ(1u, mockAubStream.mmioList.size()); EXPECT_TRUE(mockAubStream.isOnMmioList(mmio)); } TEST(AubMemDumpBasic, givenDebugOverrideMmioWhenMmioMatchThenAlterValue) { DebugManagerStateRestore dbgRestore; uint32_t dbgOffset = 0x2000; uint32_t dbgValue = 0xDEAD; MMIOPair dbgMmio = std::make_pair(dbgOffset, dbgValue); DebugManager.flags.AubDumpOverrideMmioRegister.set(static_cast(dbgOffset)); DebugManager.flags.AubDumpOverrideMmioRegisterValue.set(static_cast(dbgValue)); uint32_t offset = 0x2000; uint32_t value = 0x3000; MockAubFileStreamMockMmioWrite mockAubStream; mockAubStream.writeMMIO(offset, value); EXPECT_EQ(1u, mockAubStream.mmioList.size()); EXPECT_TRUE(mockAubStream.isOnMmioList(dbgMmio)); } compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/command_stream/aub_mem_dump_tests.h000066400000000000000000000116261363734646600326470ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/device/device.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "opencl/source/aub_mem_dump/aub_mem_dump.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" #include "opencl/source/gen_common/aub_mapper.h" #include "test.h" namespace Os { extern const char *fileSeparator; } extern std::string getAubFileName(const NEO::Device *pDevice, const std::string baseName); template void setupAUB(const NEO::Device *pDevice, aub_stream::EngineType engineType) { typedef typename NEO::AUBFamilyMapper::AUB AUB; const auto &csTraits = NEO::CommandStreamReceiverSimulatedCommonHw::getCsTraits(engineType); auto mmioBase = csTraits.mmioBase; uint64_t physAddress = 0x10000; NEO::AUBCommandStreamReceiver::AubFileStream aubFile; std::string filePath(NEO::folderAUB); filePath.append(Os::fileSeparator); std::string baseName("simple"); baseName.append(csTraits.name); baseName.append(".aub"); filePath.append(getAubFileName(pDevice, baseName)); aubFile.fileHandle.open(filePath.c_str(), std::ofstream::binary); // Header auto deviceId = pDevice->getHardwareInfo().capabilityTable.aubDeviceId; aubFile.init(AubMemDump::SteppingValues::A, deviceId); aubFile.writeMMIO(mmioBase + 0x229c, 0xffff8280); const size_t sizeHWSP = 0x1000; const size_t sizeRing = 0x4 * 0x1000; const size_t sizeTotal = alignUp((sizeHWSP + sizeRing + csTraits.sizeLRCA), 0x1000); const size_t alignTotal = sizeTotal; auto totalBuffer = alignedMalloc(sizeTotal, alignTotal); size_t totalBufferOffset = 0; auto pGlobalHWStatusPage = totalBuffer; totalBufferOffset += sizeHWSP; uint32_t ggttGlobalHardwareStatusPage = (uint32_t)((uintptr_t)pGlobalHWStatusPage); AubGTTData data = {true, false}; AUB::reserveAddressGGTT(aubFile, ggttGlobalHardwareStatusPage, sizeHWSP, physAddress, data); physAddress += sizeHWSP; aubFile.writeMMIO(mmioBase + 0x2080, ggttGlobalHardwareStatusPage); size_t sizeCommands = 0; auto pRing = ptrOffset(totalBuffer, totalBufferOffset); totalBufferOffset += sizeRing; auto ggttRing = (uint32_t)(uintptr_t)pRing; auto physRing = physAddress; physAddress += sizeRing; auto rRing = AUB::reserveAddressGGTT(aubFile, ggttRing, sizeRing, physRing, data); ASSERT_NE(static_cast(-1), rRing); EXPECT_EQ(rRing, physRing); uint32_t noopId = 0xbaadd; auto cur = (uint32_t *)pRing; using MI_NOOP = typename FamilyType::MI_NOOP; auto noop = FamilyType::cmdInitNoop; *cur++ = noop.TheStructure.RawData[0]; *cur++ = noop.TheStructure.RawData[0]; *cur++ = noop.TheStructure.RawData[0]; noop.TheStructure.Common.IdentificationNumberRegisterWriteEnable = true; noop.TheStructure.Common.IdentificationNumber = noopId; *cur++ = noop.TheStructure.RawData[0]; sizeCommands = ptrDiff(cur, pRing); AUB::addMemoryWrite(aubFile, physRing, pRing, sizeCommands, AubMemDump::AddressSpaceValues::TraceNonlocal, csTraits.aubHintCommandBuffer); auto sizeLRCA = csTraits.sizeLRCA; auto pLRCABase = ptrOffset(totalBuffer, totalBufferOffset); totalBufferOffset += csTraits.sizeLRCA; csTraits.initialize(pLRCABase); csTraits.setRingHead(pLRCABase, 0x0000); csTraits.setRingTail(pLRCABase, static_cast(sizeCommands)); csTraits.setRingBase(pLRCABase, ggttRing); auto ringCtrl = static_cast((sizeRing - 0x1000) | 1); csTraits.setRingCtrl(pLRCABase, ringCtrl); auto ggttLRCA = static_cast(reinterpret_cast(pLRCABase)); auto physLRCA = physAddress; physAddress += sizeLRCA; AUB::reserveAddressGGTT(aubFile, ggttLRCA, sizeLRCA, physLRCA, data); AUB::addMemoryWrite(aubFile, physLRCA, pLRCABase, sizeLRCA, AubMemDump::AddressSpaceValues::TraceNonlocal, csTraits.aubHintLRCA); typename AUB::MiContextDescriptorReg contextDescriptor = {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; contextDescriptor.sData.Valid = true; contextDescriptor.sData.ForcePageDirRestore = false; contextDescriptor.sData.ForceRestore = false; contextDescriptor.sData.Legacy = true; contextDescriptor.sData.FaultSupport = 0; contextDescriptor.sData.PrivilegeAccessOrPPGTT = true; contextDescriptor.sData.ADor64bitSupport = AUB::Traits::addressingBits > 32; contextDescriptor.sData.LogicalRingCtxAddress = (uintptr_t)pLRCABase / 4096; contextDescriptor.sData.ContextID = 0; aubFile.writeMMIO(mmioBase + 0x2230, 0); aubFile.writeMMIO(mmioBase + 0x2230, 0); aubFile.writeMMIO(mmioBase + 0x2230, contextDescriptor.ulData[1]); aubFile.writeMMIO(mmioBase + 0x2230, contextDescriptor.ulData[0]); alignedFree(totalBuffer); aubFile.fileHandle.close(); } compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/fixtures/000077500000000000000000000000001363734646600254745ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/fixtures/CMakeLists.txt000066400000000000000000000011221363734646600302300ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_parent_kernel_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/fixture_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hello_world_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/run_kernel_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/simple_arg_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_fixture.h ) add_subdirectories()compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/fixtures/aub_fixture.cpp000066400000000000000000000020601363734646600305130ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" namespace NEO { GraphicsAllocation *AUBFixture::createHostPtrAllocationFromSvmPtr(void *svmPtr, size_t size) { GraphicsAllocation *allocation = csr->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size}, svmPtr); csr->makeResidentHostPtrAllocation(allocation); csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr(allocation), TEMPORARY_ALLOCATION); allocation->setAllocationType(GraphicsAllocation::AllocationType::BUFFER); allocation->setMemObjectsAllocationWithWritableFlags(true); return allocation; } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h000066400000000000000000000115651363734646600301720ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/hw_helper.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/unit_test/tests_configuration.h" #include "opencl/source/aub_mem_dump/aub_mem_dump.h" #include "opencl/source/aub_mem_dump/page_table_entry_bits.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" #include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.h" #include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include namespace NEO { class AUBFixture : public CommandQueueHwFixture { public: void SetUp(const HardwareInfo *hardwareInfo) { const HardwareInfo &hwInfo = hardwareInfo ? *hardwareInfo : *defaultHwInfo; uint32_t deviceIndex = 0; auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); auto engineType = getChosenEngineType(hwInfo); const ::testing::TestInfo *const testInfo = ::testing::UnitTest::GetInstance()->current_test_info(); std::stringstream strfilename; strfilename << testInfo->test_case_name() << "_" << testInfo->name() << "_" << hwHelper.getCsTraits(engineType).name; executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1u); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(&hwInfo); if (testMode == TestMode::AubTestsWithTbx) { this->csr = TbxCommandStreamReceiver::create(strfilename.str(), true, *executionEnvironment, 0); } else { this->csr = AUBCommandStreamReceiver::create(strfilename.str(), true, *executionEnvironment, 0); } device = std::make_unique(MockDevice::create(executionEnvironment, deviceIndex)); device->resetCommandStreamReceiver(this->csr); CommandQueueHwFixture::SetUp(AUBFixture::device.get(), cl_command_queue_properties(0)); } void TearDown() override { CommandQueueHwFixture::TearDown(); } GraphicsAllocation *createHostPtrAllocationFromSvmPtr(void *svmPtr, size_t size); template CommandStreamReceiverSimulatedCommonHw *getSimulatedCsr() const { return static_cast *>(csr); } template void expectMemory(void *gfxAddress, const void *srcAddress, size_t length) { CommandStreamReceiverSimulatedCommonHw *csrSimulated = getSimulatedCsr(); if (testMode == TestMode::AubTestsWithTbx) { auto tbxCsr = csrSimulated; tbxCsr->expectMemoryEqual(gfxAddress, srcAddress, length); csrSimulated = static_cast *>( static_cast> *>(csr)->aubCSR.get()); } if (csrSimulated) { csrSimulated->expectMemoryEqual(gfxAddress, srcAddress, length); } } template void expectNotEqualMemory(void *gfxAddress, const void *srcAddress, size_t length) { CommandStreamReceiverSimulatedCommonHw *csrSimulated = getSimulatedCsr(); if (testMode == TestMode::AubTestsWithTbx) { auto tbxCsr = csrSimulated; tbxCsr->expectMemoryNotEqual(gfxAddress, srcAddress, length); csrSimulated = static_cast *>( static_cast> *>(csr)->aubCSR.get()); } if (csrSimulated) { csrSimulated->expectMemoryNotEqual(gfxAddress, srcAddress, length); } } static void *getGpuPointer(GraphicsAllocation *allocation) { return reinterpret_cast(allocation->getGpuAddress()); } CommandStreamReceiver *csr = nullptr; volatile uint32_t *pTagMemory = nullptr; std::unique_ptr device; ExecutionEnvironment *executionEnvironment; private: using CommandQueueHwFixture::SetUp; }; // namespace NEO template struct KernelAUBFixture : public AUBFixture, public KernelFixture { void SetUp() override { AUBFixture::SetUp(nullptr); KernelFixture::SetUp(device.get(), context); } void TearDown() override { KernelFixture::TearDown(); AUBFixture::TearDown(); } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/fixtures/aub_parent_kernel_fixture.h000066400000000000000000000022031363734646600330700ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_kernel_fixture.h" namespace NEO { static const char programFile[] = "simple_block_kernel"; static const char kernelName[] = "kernel_reflection"; class AUBParentKernelFixture : public CommandEnqueueAUBFixture, public HelloWorldKernelFixture, public testing::Test { public: using HelloWorldKernelFixture::SetUp; void SetUp() override { CommandEnqueueAUBFixture::SetUp(); ASSERT_NE(nullptr, pClDevice); if (pClDevice->getHardwareInfo().capabilityTable.clVersionSupport < 20) { GTEST_SKIP(); } HelloWorldKernelFixture::SetUp(pClDevice, programFile, kernelName, "-cl-std=CL2.0"); } void TearDown() override { if (IsSkipped()) { return; } HelloWorldKernelFixture::TearDown(); CommandEnqueueAUBFixture::TearDown(); } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/fixtures/fixture_tests.cpp000066400000000000000000000021771363734646600311170ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/ptr_math.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; struct SimpleTest : public CommandDeviceFixture, public AUBCommandStreamFixture, public ::testing::Test { using AUBCommandStreamFixture::SetUp; void SetUp() override { CommandDeviceFixture::SetUp(cl_command_queue_properties(0)); AUBCommandStreamFixture::SetUp(pCmdQ); context = new MockContext(pClDevice); } void TearDown() override { delete context; AUBCommandStreamFixture::TearDown(); CommandDeviceFixture::TearDown(); } MockContext *context; }; TEST_F(SimpleTest, VerifyBasicFixturesSetupAndTearDown) { EXPECT_EQ(1, 1); } compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/fixtures/hello_world_fixture.h000066400000000000000000000014251363734646600317270ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h" #include "opencl/test/unit_test/aub_tests/fixtures/simple_arg_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/fixtures/simple_arg_fixture.h" namespace NEO { //////////////////////////////////////////////////////////////////////////////// // Factory where all command stream traffic funnels to an AUB file //////////////////////////////////////////////////////////////////////////////// struct AUBHelloWorldFixtureFactory : public HelloWorldFixtureFactory { typedef AUBCommandStreamFixture CommandStreamFixture; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/fixtures/run_kernel_fixture.h000066400000000000000000000053151363734646600315630ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/file_io.h" #include "opencl/source/program/program.h" #include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_stream/command_stream_fixture.h" #include "opencl/test/unit_test/fixtures/run_kernel_fixture.h" #include "opencl/test/unit_test/global_environment.h" #include "opencl/test/unit_test/helpers/test_files.h" namespace NEO { //////////////////////////////////////////////////////////////////////////////// // Factory where all command stream traffic funnels to an AUB file //////////////////////////////////////////////////////////////////////////////// struct AUBRunKernelFixtureFactory : public RunKernelFixtureFactory { typedef AUBCommandStreamFixture CommandStreamFixture; }; //////////////////////////////////////////////////////////////////////////////// // RunKernelFixture // Instantiates a fixture based on the supplied fixture factory. // Performs proper initialization/shutdown of various elements in factory. // Used by most tests for integration testing with command queues. //////////////////////////////////////////////////////////////////////////////// template class RunKernelFixture : public CommandEnqueueAUBFixture { public: RunKernelFixture() { } void SetUp() override { CommandEnqueueAUBFixture::SetUp(); } void TearDown() override { CommandEnqueueAUBFixture::TearDown(); } protected: Program *CreateProgramFromBinary( const std::string &binaryFileName) { cl_int retVal = CL_SUCCESS; EXPECT_EQ(true, fileExists(binaryFileName)); size_t sourceSize = 0; auto pSource = loadDataFromFile(binaryFileName.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSource); Program *pProgram = nullptr; const cl_device_id device = pClDevice; const unsigned char *binaries[1] = {reinterpret_cast(pSource.get())}; pProgram = Program::create( context, 1, &device, &sourceSize, binaries, nullptr, retVal); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_NE(pProgram, nullptr); return pProgram; } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/fixtures/simple_arg_fixture.h000066400000000000000000000102261363734646600315360ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_stream/command_stream_fixture.h" #include "opencl/test/unit_test/fixtures/simple_arg_fixture.h" #include "opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h" #include "opencl/test/unit_test/indirect_heap/indirect_heap_fixture.h" namespace NEO { //////////////////////////////////////////////////////////////////////////////// // Factory where all command stream traffic funnels to an AUB file //////////////////////////////////////////////////////////////////////////////// struct AUBSimpleArgFixtureFactory : public SimpleArgFixtureFactory, public IndirectHeapFixture { typedef AUBCommandStreamFixture CommandStreamFixture; }; //////////////////////////////////////////////////////////////////////////////// // SimpleArgTest // Instantiates a fixture based on the supplied fixture factory. // Performs proper initialization/shutdown of various elements in factory. // Used by most tests for integration testing with command queues. //////////////////////////////////////////////////////////////////////////////// template struct SimpleArgFixture : public FixtureFactory::IndirectHeapFixture, public FixtureFactory::CommandStreamFixture, public FixtureFactory::CommandQueueFixture, public FixtureFactory::KernelFixture, public DeviceFixture { typedef typename FixtureFactory::IndirectHeapFixture IndirectHeapFixture; typedef typename FixtureFactory::CommandStreamFixture CommandStreamFixture; typedef typename FixtureFactory::CommandQueueFixture CommandQueueFixture; typedef typename FixtureFactory::KernelFixture KernelFixture; using AUBCommandStreamFixture::SetUp; using CommandQueueFixture::pCmdQ; using CommandStreamFixture::pCS; using IndirectHeapFixture::SetUp; using KernelFixture::pKernel; using KernelFixture::SetUp; public: void SetUp() override { DeviceFixture::SetUp(); ASSERT_NE(nullptr, pClDevice); CommandQueueFixture::SetUp(pClDevice, 0); ASSERT_NE(nullptr, pCmdQ); CommandStreamFixture::SetUp(pCmdQ); ASSERT_NE(nullptr, pCS); IndirectHeapFixture::SetUp(pCmdQ); KernelFixture::SetUp(pClDevice); ASSERT_NE(nullptr, pKernel); argVal = static_cast(0x22222222); pDestMemory = alignedMalloc(sizeUserMemory, 4096); ASSERT_NE(nullptr, pDestMemory); pExpectedMemory = alignedMalloc(sizeUserMemory, 4096); ASSERT_NE(nullptr, pExpectedMemory); // Initialize user memory to known values memset(pDestMemory, 0x11, sizeUserMemory); memset(pExpectedMemory, 0x22, sizeUserMemory); pKernel->setArg(0, sizeof(int), &argVal); pKernel->setArgSvm(1, sizeUserMemory, pDestMemory, nullptr, 0u); outBuffer = AUBCommandStreamFixture::createResidentAllocationAndStoreItInCsr(pDestMemory, sizeUserMemory); ASSERT_NE(nullptr, outBuffer); outBuffer->setAllocationType(GraphicsAllocation::AllocationType::BUFFER); outBuffer->setMemObjectsAllocationWithWritableFlags(true); } void TearDown() override { if (pExpectedMemory) { alignedFree(pExpectedMemory); pExpectedMemory = nullptr; } if (pDestMemory) { alignedFree(pDestMemory); pDestMemory = nullptr; } KernelFixture::TearDown(); IndirectHeapFixture::TearDown(); CommandStreamFixture::TearDown(); CommandQueueFixture::TearDown(); DeviceFixture::TearDown(); } int argVal = 0; void *pDestMemory = nullptr; void *pExpectedMemory = nullptr; size_t sizeUserMemory = 128 * sizeof(float); GraphicsAllocation *outBuffer = nullptr; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/fixtures/unified_memory_fixture.h000066400000000000000000000054201363734646600324270ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/memory_constants.h" #include "opencl/source/api/api.h" #include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h" #include "opencl/test/unit_test/mocks/mock_platform.h" namespace NEO { namespace PagaFaultManagerTestConfig { extern bool disabled; } class UnifiedMemoryAubFixture : public AUBFixture { public: using AUBFixture::TearDown; cl_int retVal = CL_SUCCESS; const size_t dataSize = MemoryConstants::megaByte; bool skipped = false; void SetUp() override { if (PagaFaultManagerTestConfig::disabled) { skipped = true; GTEST_SKIP(); } AUBFixture::SetUp(nullptr); if (!platform()->peekExecutionEnvironment()->memoryManager->getPageFaultManager()) { skipped = true; GTEST_SKIP(); } } void *allocateUSM(InternalMemoryType type) { void *ptr = nullptr; if (!this->skipped) { switch (type) { case DEVICE_UNIFIED_MEMORY: ptr = clDeviceMemAllocINTEL(this->context, this->device.get(), nullptr, dataSize, 0, &retVal); break; case HOST_UNIFIED_MEMORY: ptr = clHostMemAllocINTEL(this->context, nullptr, dataSize, 0, &retVal); break; case SHARED_UNIFIED_MEMORY: ptr = clSharedMemAllocINTEL(this->context, this->device.get(), nullptr, dataSize, 0, &retVal); break; default: ptr = new char[dataSize]; break; } EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_NE(ptr, nullptr); } return ptr; } void freeUSM(void *ptr, InternalMemoryType type) { if (!this->skipped) { switch (type) { case DEVICE_UNIFIED_MEMORY: case HOST_UNIFIED_MEMORY: case SHARED_UNIFIED_MEMORY: retVal = clMemFreeINTEL(this->context, ptr); break; default: delete[] static_cast(ptr); break; } EXPECT_EQ(retVal, CL_SUCCESS); } } void writeToUsmMemory(std::vector data, void *ptr, InternalMemoryType type) { if (!this->skipped) { switch (type) { case DEVICE_UNIFIED_MEMORY: retVal = clEnqueueMemcpyINTEL(this->pCmdQ, true, ptr, data.data(), dataSize, 0, nullptr, nullptr); break; default: std::copy(data.begin(), data.end(), static_cast(ptr)); break; } EXPECT_EQ(retVal, CL_SUCCESS); } } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen11/000077500000000000000000000000001363734646600245365ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen11/CMakeLists.txt000066400000000000000000000005551363734646600273030ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN11) target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_tests_configuration_gen11.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/gen11/unit_test_helper_gen11.cpp ) add_subdirectories() endif() compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen11/aub_tests_configuration_gen11.cpp000066400000000000000000000004351363734646600331570ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_cmds.h" #include "opencl/test/unit_test/aub_tests/aub_tests_configuration.inl" using namespace NEO; template AubTestsConfig GetAubTestsConfig(); compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen11/batch_buffer/000077500000000000000000000000001363734646600271505ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen11/batch_buffer/CMakeLists.txt000066400000000000000000000004451363734646600317130ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_batch_buffer_tests_gen11.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_batch_buffer_tests_gen11.cpp ) aub_batch_buffer_tests_gen11.cpp000066400000000000000000000010621363734646600352520ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen11/batch_buffer/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "aub_batch_buffer_tests_gen11.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" using Gen11AubBatchBufferTests = Test; static constexpr auto gpuBatchBufferAddr = 0x800400001000; // 48-bit GPU address GEN11TEST_F(Gen11AubBatchBufferTests, givenSimpleRCSWithBatchBufferWhenItHasMSBSetInGpuAddressThenAUBShouldBeSetupSuccessfully) { setupAUBWithBatchBuffer(pDevice, aub_stream::ENGINE_RCS, gpuBatchBufferAddr); } aub_batch_buffer_tests_gen11.h000066400000000000000000000151451363734646600347260ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen11/batch_buffer/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/aub/aub_helper.h" #include "opencl/test/unit_test/aub_tests/command_stream/aub_mem_dump_tests.h" template void setupAUBWithBatchBuffer(const NEO::Device *pDevice, aub_stream::EngineType engineType, uint64_t gpuBatchBufferAddr) { typedef typename NEO::AUBFamilyMapper::AUB AUB; const auto &csTraits = NEO::CommandStreamReceiverSimulatedCommonHw::getCsTraits(engineType); auto mmioBase = csTraits.mmioBase; uint64_t physAddress = 0x10000; NEO::AUBCommandStreamReceiver::AubFileStream aubFile; std::string filePath(NEO::folderAUB); filePath.append(Os::fileSeparator); std::string baseName("simple"); baseName.append(csTraits.name); baseName.append("WithBatchBuffer"); baseName.append(".aub"); filePath.append(getAubFileName(pDevice, baseName)); aubFile.fileHandle.open(filePath.c_str(), std::ofstream::binary); // Header aubFile.init(AubMemDump::SteppingValues::A, AUB::Traits::device); aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x229c), 0xffff8280); const size_t sizeHWSP = 0x1000; const size_t alignHWSP = 0x1000; auto pGlobalHWStatusPage = alignedMalloc(sizeHWSP, alignHWSP); uint32_t ggttGlobalHardwareStatusPage = (uint32_t)((uintptr_t)pGlobalHWStatusPage); AubGTTData data = {true, false}; AUB::reserveAddressGGTT(aubFile, ggttGlobalHardwareStatusPage, sizeHWSP, physAddress, data); physAddress += sizeHWSP; aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2080), ggttGlobalHardwareStatusPage); using MI_NOOP = typename FamilyType::MI_NOOP; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; // create a user mode batch buffer auto physBatchBuffer = physAddress; const auto sizeBatchBuffer = 0x1000; auto gpuBatchBuffer = static_cast(gpuBatchBufferAddr); physAddress += sizeBatchBuffer; NEO::AubHelperHw aubHelperHw(false); AUB::reserveAddressPPGTT(aubFile, gpuBatchBuffer, sizeBatchBuffer, physBatchBuffer, 7, aubHelperHw); uint8_t batchBuffer[sizeBatchBuffer]; auto noop = FamilyType::cmdInitNoop; uint32_t noopId = 0xbaadd; { auto pBatchBuffer = (void *)batchBuffer; *(MI_NOOP *)pBatchBuffer = noop; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_NOOP)); *(MI_NOOP *)pBatchBuffer = noop; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_NOOP)); *(MI_NOOP *)pBatchBuffer = noop; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_NOOP)); noop.TheStructure.Common.IdentificationNumberRegisterWriteEnable = true; noop.TheStructure.Common.IdentificationNumber = noopId++; *(MI_NOOP *)pBatchBuffer = noop; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_NOOP)); *(MI_BATCH_BUFFER_END *)pBatchBuffer = FamilyType::cmdInitBatchBufferEnd; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_BATCH_BUFFER_END)); auto sizeBufferUsed = ptrDiff(pBatchBuffer, batchBuffer); AUB::addMemoryWrite(aubFile, physBatchBuffer, batchBuffer, sizeBufferUsed, AubMemDump::AddressSpaceValues::TraceNonlocal, AubMemDump::DataTypeHintValues::TraceBatchBuffer); } const size_t sizeRing = 0x4 * 0x1000; const size_t alignRing = 0x1000; size_t sizeCommands = 0; auto pRing = alignedMalloc(sizeRing, alignRing); auto ggttRing = (uint32_t)(uintptr_t)pRing; auto physRing = physAddress; physAddress += sizeRing; auto rRing = AUB::reserveAddressGGTT(aubFile, ggttRing, sizeRing, physRing, data); ASSERT_NE(static_cast(-1), rRing); EXPECT_EQ(rRing, physRing); auto cur = (uint32_t *)pRing; auto bbs = FamilyType::cmdInitBatchBufferStart; bbs.setBatchBufferStartAddressGraphicsaddress472(gpuBatchBuffer); bbs.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT); *(MI_BATCH_BUFFER_START *)cur = bbs; cur = ptrOffset(cur, sizeof(MI_BATCH_BUFFER_START)); noop.TheStructure.Common.IdentificationNumberRegisterWriteEnable = true; noop.TheStructure.Common.IdentificationNumber = noopId; *cur++ = noop.TheStructure.RawData[0]; sizeCommands = ptrDiff(cur, pRing); AUB::addMemoryWrite(aubFile, physRing, pRing, sizeCommands, AubMemDump::AddressSpaceValues::TraceNonlocal, csTraits.aubHintCommandBuffer); auto sizeLRCA = csTraits.sizeLRCA; auto pLRCABase = alignedMalloc(csTraits.sizeLRCA, csTraits.alignLRCA); csTraits.initialize(pLRCABase); csTraits.setRingHead(pLRCABase, 0x0000); csTraits.setRingTail(pLRCABase, static_cast(sizeCommands)); csTraits.setRingBase(pLRCABase, ggttRing); auto ringCtrl = static_cast((sizeRing - 0x1000) | 1); csTraits.setRingCtrl(pLRCABase, ringCtrl); auto ggttLRCA = static_cast(reinterpret_cast(pLRCABase)); auto physLRCA = physAddress; physAddress += sizeLRCA; AUB::reserveAddressGGTT(aubFile, ggttLRCA, sizeLRCA, physLRCA, data); AUB::addMemoryWrite(aubFile, physLRCA, pLRCABase, sizeLRCA, AubMemDump::AddressSpaceValues::TraceNonlocal, csTraits.aubHintLRCA); typename AUB::MiContextDescriptorReg contextDescriptor = {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; contextDescriptor.sData.Valid = true; contextDescriptor.sData.ForcePageDirRestore = false; contextDescriptor.sData.ForceRestore = false; contextDescriptor.sData.Legacy = true; contextDescriptor.sData.FaultSupport = 0; contextDescriptor.sData.PrivilegeAccessOrPPGTT = true; contextDescriptor.sData.ADor64bitSupport = AUB::Traits::addressingBits > 32; contextDescriptor.sData.LogicalRingCtxAddress = (uintptr_t)pLRCABase / 4096; contextDescriptor.sData.ContextID = 0; aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2510), contextDescriptor.ulData[0]); aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2514), contextDescriptor.ulData[1]); // Load our new exec list aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2550), 1); // Poll until HW complete using AubMemDump::CmdServicesMemTraceRegisterPoll; aubFile.registerPoll( AubMemDump::computeRegisterOffset(mmioBase, 0x2234), //EXECLIST_STATUS 0x00008000, 0x00008000, false, CmdServicesMemTraceRegisterPoll::TimeoutActionValues::Abort); alignedFree(pRing); alignedFree(pLRCABase); alignedFree(pGlobalHWStatusPage); aubFile.fileHandle.close(); } compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen11/execution_model/000077500000000000000000000000001363734646600277215ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen11/execution_model/CMakeLists.txt000066400000000000000000000003551363734646600324640ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_parent_kernel_tests_gen11.cpp ) enqueue_parent_kernel_tests_gen11.cpp000066400000000000000000000061211363734646600371430ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen11/execution_model/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/sampler/sampler.h" #include "opencl/test/unit_test/aub_tests/fixtures/aub_parent_kernel_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "test.h" using namespace NEO; typedef AUBParentKernelFixture GEN11AUBParentKernelFixture; GEN11TEST_F(GEN11AUBParentKernelFixture, EnqueueParentKernel) { ASSERT_NE(nullptr, pKernel); ASSERT_TRUE(pKernel->isParentKernel); const cl_queue_properties properties[3] = {(CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE), 0, 0}; DeviceQueue *devQueue = DeviceQueue::create( &pCmdQ->getContext(), pClDevice, properties[0], retVal); SchedulerKernel &scheduler = pCmdQ->getContext().getSchedulerKernel(); // Aub execution takes huge time for bigger GWS scheduler.setGws(24); size_t offset[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; size_t lws[3] = {1, 1, 1}; // clang-format off cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc desc = { 0 }; desc.image_array_size = 0; desc.image_depth = 1; desc.image_height = 4; desc.image_width = 4; desc.image_type = CL_MEM_OBJECT_IMAGE3D; desc.image_row_pitch = 0; desc.image_slice_pitch = 0; // clang-format on auto surfaceFormat = Image::getSurfaceFormatFromTable(0, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); Image *image = Image::create( pContext, {}, 0, 0, surfaceFormat, &desc, nullptr, retVal); Buffer *buffer = BufferHelper>::create(pContext); cl_mem bufferMem = buffer; cl_mem imageMem = image; auto sampler = Sampler::create( pContext, CL_TRUE, CL_ADDRESS_NONE, CL_FILTER_LINEAR, retVal); uint64_t argScalar = 2; pKernel->setArg( 3, sizeof(uint64_t), &argScalar); pKernel->setArg( 2, sizeof(cl_mem), &bufferMem); pKernel->setArg( 1, sizeof(cl_mem), &imageMem); pKernel->setArg( 0, sizeof(cl_sampler), &sampler); pCmdQ->enqueueKernel(pKernel, 1, offset, gws, lws, 0, 0, 0); pCmdQ->finish(); uint32_t expectedNumberOfEnqueues = 1; uint64_t gpuAddress = devQueue->getQueueBuffer()->getGpuAddress() + offsetof(IGIL_CommandQueue, m_controls.m_TotalNumberOfQueues); AUBCommandStreamFixture::expectMemory((void *)(uintptr_t)gpuAddress, &expectedNumberOfEnqueues, sizeof(uint32_t)); AUBCommandStreamFixture::expectMemory((void *)(uintptr_t)buffer->getGraphicsAllocation()->getGpuAddress(), &argScalar, sizeof(size_t)); delete devQueue; delete image; delete buffer; delete sampler; } compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen11/icllp/000077500000000000000000000000001363734646600256415ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen11/icllp/CMakeLists.txt000066400000000000000000000003141363734646600303770ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_ICLLP) set(aub_test_config "icllp/1/8/8") include(${OPENCL_AUB_TEST_DIR}/cmake/run_aub_test_target.cmake) endif() compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen12lp/000077500000000000000000000000001363734646600250735ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen12lp/CMakeLists.txt000066400000000000000000000010071363734646600276310ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN12LP) target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_mem_dump_tests_gen12lp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_tests_configuration_gen12lp.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/gen12lp/special_ult_helper_gen12lp.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/gen12lp/unit_test_helper_gen12lp.cpp ) add_subdirectories() endif() compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen12lp/aub_mem_dump_tests_gen12lp.cpp000066400000000000000000000006601363734646600330050ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/aub_tests/command_stream/aub_mem_dump_tests.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" namespace NEO { using Gen12LPAubMemDumpTests = Test; GEN12LPTEST_F(Gen12LPAubMemDumpTests, simpleCCS) { setupAUB(pDevice, aub_stream::ENGINE_CCS); } } // namespace NEO aub_tests_configuration_gen12lp.cpp000066400000000000000000000006211363734646600337670ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen12lp/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_cmds.h" #include "opencl/test/unit_test/aub_tests/aub_tests_configuration.h" using namespace NEO; template <> AubTestsConfig GetAubTestsConfig() { AubTestsConfig aubTestsConfig; aubTestsConfig.testCanonicalAddress = false; return aubTestsConfig; }compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen12lp/batch_buffer/000077500000000000000000000000001363734646600275055ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen12lp/batch_buffer/CMakeLists.txt000066400000000000000000000004511363734646600322450ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_batch_buffer_tests_gen12lp.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_batch_buffer_tests_gen12lp.cpp ) aub_batch_buffer_tests_gen12lp.cpp000066400000000000000000000064151363734646600361530ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen12lp/batch_buffer/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "aub_batch_buffer_tests_gen12lp.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/event/event.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h" #include "opencl/test/unit_test/aub_tests/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "test.h" using Gen12LPAubBatchBufferTests = Test; using Gen12LPTimestampTests = Test>; static constexpr auto gpuBatchBufferAddr = 0x400400001000; // 47-bit GPU address GEN12LPTEST_F(Gen12LPAubBatchBufferTests, givenSimpleRCSWithBatchBufferWhenItHasMSBSetInGpuAddressThenAUBShouldBeSetupSuccessfully) { setupAUBWithBatchBuffer(pDevice, aub_stream::ENGINE_RCS, gpuBatchBufferAddr); } GEN12LPTEST_F(Gen12LPAubBatchBufferTests, givenSimpleCCSWithBatchBufferWhenItHasMSBSetInGpuAddressThenAUBShouldBeSetupSuccessfully) { setupAUBWithBatchBuffer(pDevice, aub_stream::ENGINE_CCS, gpuBatchBufferAddr); } GEN12LPTEST_F(Gen12LPTimestampTests, DISABLED_GivenCommandQueueWithProfilingEnabledWhenKernelIsEnqueuedThenProfilingTimestampsAreNotZero) { cl_queue_properties properties[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; CommandQueueHw cmdQ(pContext, pClDevice, &properties[0], false); EXPECT_EQ(aub_stream::ENGINE_CCS, pDevice->getDefaultEngine().osContext->getEngineType()); const uint32_t bufferSize = 4; std::unique_ptr buffer(Buffer::create(pContext, CL_MEM_READ_WRITE, bufferSize, nullptr, retVal)); memset(buffer->getGraphicsAllocation()->getUnderlyingBuffer(), 0, buffer->getGraphicsAllocation()->getUnderlyingBufferSize()); buffer->forceDisallowCPUCopy = true; uint8_t writeData[bufferSize] = {0x11, 0x22, 0x33, 0x44}; cl_event event; cmdQ.enqueueWriteBuffer(buffer.get(), CL_TRUE, 0, bufferSize, writeData, nullptr, 0, nullptr, &event); ASSERT_NE(event, nullptr); auto eventObject = castToObject(event); ASSERT_NE(eventObject, nullptr); expectMemory(buffer->getGraphicsAllocation()->getUnderlyingBuffer(), writeData, bufferSize); uint64_t expectedTimestampValues[2] = {0, 0}; TagNode &hwTimeStamps = *(eventObject->getHwTimeStampNode()); uint64_t timeStampStartAddress = hwTimeStamps.getGpuAddress() + offsetof(HwTimeStamps, ContextStartTS); uint64_t timeStampEndAddress = hwTimeStamps.getGpuAddress() + offsetof(HwTimeStamps, ContextEndTS); expectMemoryNotEqual(reinterpret_cast(timeStampStartAddress), &expectedTimestampValues[0], sizeof(uint64_t)); expectMemoryNotEqual(reinterpret_cast(timeStampEndAddress), &expectedTimestampValues[1], sizeof(uint64_t)); eventObject->release(); } aub_batch_buffer_tests_gen12lp.h000066400000000000000000000154241363734646600356200ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen12lp/batch_buffer/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/aub/aub_helper.h" #include "opencl/test/unit_test/aub_tests/command_stream/aub_mem_dump_tests.h" template void setupAUBWithBatchBuffer(const NEO::Device *pDevice, aub_stream::EngineType engineType, uint64_t gpuBatchBufferAddr) { typedef typename NEO::AUBFamilyMapper::AUB AUB; const auto &csTraits = NEO::CommandStreamReceiverSimulatedCommonHw::getCsTraits(engineType); auto mmioBase = csTraits.mmioBase; uint64_t physAddress = 0x10000; NEO::AUBCommandStreamReceiver::AubFileStream aubFile; std::string filePath(NEO::folderAUB); filePath.append(Os::fileSeparator); std::string baseName("simple"); baseName.append(csTraits.name); baseName.append("WithBatchBuffer"); baseName.append(".aub"); filePath.append(getAubFileName(pDevice, baseName)); aubFile.fileHandle.open(filePath.c_str(), std::ofstream::binary); // Header aubFile.init(AubMemDump::SteppingValues::A, AUB::Traits::device); aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x229c), 0xffff8280); // enable CCS if (engineType == aub_stream::ENGINE_CCS) { aubFile.writeMMIO(0x0000ce90, 0x00010001); aubFile.writeMMIO(0x00014800, 0x00010001); } const size_t sizeHWSP = 0x1000; const size_t alignHWSP = 0x1000; auto pGlobalHWStatusPage = alignedMalloc(sizeHWSP, alignHWSP); uint32_t ggttGlobalHardwareStatusPage = (uint32_t)((uintptr_t)pGlobalHWStatusPage); AubGTTData data = {true, false}; AUB::reserveAddressGGTT(aubFile, ggttGlobalHardwareStatusPage, sizeHWSP, physAddress, data); physAddress += sizeHWSP; aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2080), ggttGlobalHardwareStatusPage); using MI_NOOP = typename FamilyType::MI_NOOP; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; // create a user mode batch buffer auto physBatchBuffer = physAddress; const auto sizeBatchBuffer = 0x1000; auto gpuBatchBuffer = static_cast(gpuBatchBufferAddr); physAddress += sizeBatchBuffer; NEO::AubHelperHw aubHelperHw(false); AUB::reserveAddressPPGTT(aubFile, gpuBatchBuffer, sizeBatchBuffer, physBatchBuffer, 3, aubHelperHw); uint8_t batchBuffer[sizeBatchBuffer]; auto noop = FamilyType::cmdInitNoop; uint32_t noopId = 0xbaadd; { auto pBatchBuffer = (void *)batchBuffer; *(MI_NOOP *)pBatchBuffer = noop; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_NOOP)); *(MI_NOOP *)pBatchBuffer = noop; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_NOOP)); *(MI_NOOP *)pBatchBuffer = noop; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_NOOP)); noop.TheStructure.Common.IdentificationNumberRegisterWriteEnable = true; noop.TheStructure.Common.IdentificationNumber = noopId++; *(MI_NOOP *)pBatchBuffer = noop; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_NOOP)); *(MI_BATCH_BUFFER_END *)pBatchBuffer = FamilyType::cmdInitBatchBufferEnd; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_BATCH_BUFFER_END)); auto sizeBufferUsed = ptrDiff(pBatchBuffer, batchBuffer); AUB::addMemoryWrite(aubFile, physBatchBuffer, batchBuffer, sizeBufferUsed, AubMemDump::AddressSpaceValues::TraceNonlocal, AubMemDump::DataTypeHintValues::TraceBatchBuffer); } const size_t sizeRing = 0x4 * 0x1000; const size_t alignRing = 0x1000; size_t sizeCommands = 0; auto pRing = alignedMalloc(sizeRing, alignRing); auto ggttRing = (uint32_t)(uintptr_t)pRing; auto physRing = physAddress; physAddress += sizeRing; auto rRing = AUB::reserveAddressGGTT(aubFile, ggttRing, sizeRing, physRing, data); ASSERT_NE(static_cast(-1), rRing); EXPECT_EQ(rRing, physRing); auto cur = (uint32_t *)pRing; auto bbs = FamilyType::cmdInitBatchBufferStart; bbs.setBatchBufferStartAddressGraphicsaddress472(gpuBatchBuffer); bbs.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT); *(MI_BATCH_BUFFER_START *)cur = bbs; cur = ptrOffset(cur, sizeof(MI_BATCH_BUFFER_START)); noop.TheStructure.Common.IdentificationNumberRegisterWriteEnable = true; noop.TheStructure.Common.IdentificationNumber = noopId; *cur++ = noop.TheStructure.RawData[0]; sizeCommands = ptrDiff(cur, pRing); AUB::addMemoryWrite(aubFile, physRing, pRing, sizeCommands, AubMemDump::AddressSpaceValues::TraceNonlocal, csTraits.aubHintCommandBuffer); auto sizeLRCA = csTraits.sizeLRCA; auto pLRCABase = alignedMalloc(csTraits.sizeLRCA, csTraits.alignLRCA); csTraits.initialize(pLRCABase); csTraits.setRingHead(pLRCABase, 0x0000); csTraits.setRingTail(pLRCABase, static_cast(sizeCommands)); csTraits.setRingBase(pLRCABase, ggttRing); auto ringCtrl = static_cast((sizeRing - 0x1000) | 1); csTraits.setRingCtrl(pLRCABase, ringCtrl); auto ggttLRCA = static_cast(reinterpret_cast(pLRCABase)); auto physLRCA = physAddress; physAddress += sizeLRCA; AUB::reserveAddressGGTT(aubFile, ggttLRCA, sizeLRCA, physLRCA, data); AUB::addMemoryWrite(aubFile, physLRCA, pLRCABase, sizeLRCA, AubMemDump::AddressSpaceValues::TraceNonlocal, csTraits.aubHintLRCA); typename AUB::MiContextDescriptorReg contextDescriptor = {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; contextDescriptor.sData.Valid = true; contextDescriptor.sData.ForcePageDirRestore = false; contextDescriptor.sData.ForceRestore = false; contextDescriptor.sData.Legacy = true; contextDescriptor.sData.FaultSupport = 0; contextDescriptor.sData.PrivilegeAccessOrPPGTT = true; contextDescriptor.sData.ADor64bitSupport = AUB::Traits::addressingBits > 32; contextDescriptor.sData.LogicalRingCtxAddress = (uintptr_t)pLRCABase / 4096; contextDescriptor.sData.ContextID = 0; aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2510), contextDescriptor.ulData[0]); aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2514), contextDescriptor.ulData[1]); // Load our new exec list aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2550), 1); // Poll until HW complete using AubMemDump::CmdServicesMemTraceRegisterPoll; aubFile.registerPoll( AubMemDump::computeRegisterOffset(mmioBase, 0x2234), //EXECLIST_STATUS 0x00008000, 0x00008000, false, CmdServicesMemTraceRegisterPoll::TimeoutActionValues::Abort); alignedFree(pRing); alignedFree(pLRCABase); alignedFree(pGlobalHWStatusPage); aubFile.fileHandle.close(); } compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen12lp/execution_model/000077500000000000000000000000001363734646600302565ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen12lp/execution_model/CMakeLists.txt000066400000000000000000000003571363734646600330230ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_parent_kernel_tests_gen12lp.cpp ) enqueue_parent_kernel_tests_gen12lp.cpp000066400000000000000000000061271363734646600400430ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen12lp/execution_model/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/sampler/sampler.h" #include "opencl/test/unit_test/aub_tests/fixtures/aub_parent_kernel_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "test.h" using namespace NEO; typedef AUBParentKernelFixture GEN12LPAUBParentKernelFixture; GEN12LPTEST_F(GEN12LPAUBParentKernelFixture, EnqueueParentKernel) { ASSERT_NE(nullptr, pKernel); ASSERT_TRUE(pKernel->isParentKernel); const cl_queue_properties properties[3] = {(CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE), 0, 0}; DeviceQueue *devQueue = DeviceQueue::create( &pCmdQ->getContext(), pClDevice, properties[0], retVal); SchedulerKernel &scheduler = pCmdQ->getContext().getSchedulerKernel(); // Aub execution takes huge time for bigger GWS scheduler.setGws(24); size_t offset[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; size_t lws[3] = {1, 1, 1}; // clang-format off cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc desc = { 0 }; desc.image_array_size = 0; desc.image_depth = 1; desc.image_height = 4; desc.image_width = 4; desc.image_type = CL_MEM_OBJECT_IMAGE3D; desc.image_row_pitch = 0; desc.image_slice_pitch = 0; // clang-format on auto surfaceFormat = Image::getSurfaceFormatFromTable(0, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); Image *image = Image::create( pContext, {}, 0, 0, surfaceFormat, &desc, nullptr, retVal); Buffer *buffer = BufferHelper>::create(pContext); cl_mem bufferMem = buffer; cl_mem imageMem = image; auto sampler = Sampler::create( pContext, CL_TRUE, CL_ADDRESS_NONE, CL_FILTER_LINEAR, retVal); uint64_t argScalar = 2; pKernel->setArg( 3, sizeof(uint64_t), &argScalar); pKernel->setArg( 2, sizeof(cl_mem), &bufferMem); pKernel->setArg( 1, sizeof(cl_mem), &imageMem); pKernel->setArg( 0, sizeof(cl_sampler), &sampler); pCmdQ->enqueueKernel(pKernel, 1, offset, gws, lws, 0, 0, 0); pCmdQ->finish(); uint32_t expectedNumberOfEnqueues = 1; uint64_t gpuAddress = devQueue->getQueueBuffer()->getGpuAddress() + offsetof(IGIL_CommandQueue, m_controls.m_TotalNumberOfQueues); AUBCommandStreamFixture::expectMemory((void *)(uintptr_t)gpuAddress, &expectedNumberOfEnqueues, sizeof(uint32_t)); AUBCommandStreamFixture::expectMemory((void *)(uintptr_t)buffer->getGraphicsAllocation()->getGpuAddress(), &argScalar, sizeof(size_t)); delete devQueue; delete image; delete buffer; delete sampler; } compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen12lp/tgllp/000077500000000000000000000000001363734646600262155ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen12lp/tgllp/CMakeLists.txt000066400000000000000000000003151363734646600307540ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_TGLLP) set(aub_test_config "tgllp/1/6/16") include(${OPENCL_AUB_TEST_DIR}/cmake/run_aub_test_target.cmake) endif() compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen8/000077500000000000000000000000001363734646600244645ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen8/CMakeLists.txt000066400000000000000000000005511363734646600272250ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN8) target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_tests_configuration_gen8.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/gen8/unit_test_helper_gen8.cpp ) add_subdirectories() endif() compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen8/aub_tests_configuration_gen8.cpp000066400000000000000000000004351363734646600330330ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_cmds.h" #include "opencl/test/unit_test/aub_tests/aub_tests_configuration.inl" using namespace NEO; template AubTestsConfig GetAubTestsConfig(); compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen8/bdw/000077500000000000000000000000001363734646600252405ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen8/bdw/CMakeLists.txt000066400000000000000000000003101363734646600277720ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_BDW) set(aub_test_config "bdw/1/3/8") include(${OPENCL_AUB_TEST_DIR}/cmake/run_aub_test_target.cmake) endif() compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen8/execution_model/000077500000000000000000000000001363734646600276475ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen8/execution_model/CMakeLists.txt000066400000000000000000000003541363734646600324110ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_parent_kernel_tests_gen8.cpp ) enqueue_parent_kernel_tests_gen8.cpp000066400000000000000000000060551363734646600370250ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen8/execution_model/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/sampler/sampler.h" #include "opencl/test/unit_test/aub_tests/fixtures/aub_parent_kernel_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "test.h" #include using namespace NEO; typedef AUBParentKernelFixture GEN8AUBParentKernelFixture; GEN8TEST_F(GEN8AUBParentKernelFixture, EnqueueParentKernel) { ASSERT_NE(nullptr, pKernel); ASSERT_TRUE(pKernel->isParentKernel); const cl_queue_properties properties[3] = {(CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE), 0, 0}; std::unique_ptr devQueue(DeviceQueue::create( &pCmdQ->getContext(), pClDevice, properties[0], retVal)); SchedulerKernel &scheduler = pCmdQ->getContext().getSchedulerKernel(); // Aub execution takes huge time for bigger GWS scheduler.setGws(24); size_t offset[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; size_t lws[3] = {1, 1, 1}; cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc desc = {0}; desc.image_array_size = 0; desc.image_depth = 1; desc.image_height = 4; desc.image_width = 4; desc.image_type = CL_MEM_OBJECT_IMAGE3D; desc.image_row_pitch = 0; desc.image_slice_pitch = 0; auto surfaceFormat = Image::getSurfaceFormatFromTable(0, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); std::unique_ptr image(Image::create( pContext, {}, 0, 0, surfaceFormat, &desc, nullptr, retVal)); std::unique_ptr buffer(BufferHelper>::create(pContext)); cl_mem bufferMem = buffer.get(); cl_mem imageMem = image.get(); std::unique_ptr sampler(Sampler::create( pContext, CL_TRUE, CL_ADDRESS_NONE, CL_FILTER_LINEAR, retVal)); uint64_t argScalar = 2; pKernel->setArg( 3, sizeof(uint64_t), &argScalar); pKernel->setArg( 2, sizeof(cl_mem), &bufferMem); pKernel->setArg( 1, sizeof(cl_mem), &imageMem); pKernel->setArg( 0, sizeof(cl_sampler), &sampler); pCmdQ->enqueueKernel(pKernel, 1, offset, gws, lws, 0, 0, 0); pCmdQ->finish(); uint32_t expectedNumberOfEnqueues = 1; uint64_t gpuAddress = devQueue->getQueueBuffer()->getGpuAddress() + offsetof(IGIL_CommandQueue, m_controls.m_TotalNumberOfQueues); AUBCommandStreamFixture::expectMemory((void *)(uintptr_t)gpuAddress, &expectedNumberOfEnqueues, sizeof(uint32_t)); AUBCommandStreamFixture::expectMemory((void *)(uintptr_t)buffer->getGraphicsAllocation()->getGpuAddress(), &argScalar, sizeof(size_t)); } compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen9/000077500000000000000000000000001363734646600244655ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen9/CMakeLists.txt000066400000000000000000000004721363734646600272300ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN9) target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/aub_tests_configuration_gen9.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/gen9/unit_test_helper_gen9.cpp ) add_subdirectories() endif() compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen9/aub_tests_configuration_gen9.cpp000066400000000000000000000004351363734646600330350ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_cmds.h" #include "opencl/test/unit_test/aub_tests/aub_tests_configuration.inl" using namespace NEO; template AubTestsConfig GetAubTestsConfig(); compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen9/batch_buffer/000077500000000000000000000000001363734646600270775ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen9/batch_buffer/CMakeLists.txt000066400000000000000000000004311363734646600316350ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_batch_buffer_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_batch_buffer_tests.cpp ) aub_batch_buffer_tests.cpp000066400000000000000000000010441363734646600342060ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen9/batch_buffer/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "aub_batch_buffer_tests.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" using AubBatchBufferTests = Test; static constexpr auto gpuBatchBufferAddr = 0x800400001000ull; // 48-bit GPU address GEN9TEST_F(AubBatchBufferTests, givenSimpleRCSWithBatchBufferWhenItHasMSBSetInGpuAddressThenAUBShouldBeSetupSuccessfully) { setupAUBWithBatchBuffer(pDevice, aub_stream::ENGINE_RCS, gpuBatchBufferAddr); } aub_batch_buffer_tests.h000066400000000000000000000153511363734646600336610ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen9/batch_buffer/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/aub/aub_helper.h" #include "opencl/test/unit_test/aub_tests/command_stream/aub_mem_dump_tests.h" template void setupAUBWithBatchBuffer(const NEO::Device *pDevice, aub_stream::EngineType engineType, uint64_t gpuBatchBufferAddr) { typedef typename NEO::AUBFamilyMapper::AUB AUB; const auto &csTraits = NEO::CommandStreamReceiverSimulatedCommonHw::getCsTraits(engineType); auto mmioBase = csTraits.mmioBase; uint64_t physAddress = 0x10000; NEO::AUBCommandStreamReceiver::AubFileStream aubFile; std::string filePath(NEO::folderAUB); filePath.append(Os::fileSeparator); std::string baseName("simple"); baseName.append(csTraits.name); baseName.append("WithBatchBuffer"); baseName.append(".aub"); filePath.append(getAubFileName(pDevice, baseName)); aubFile.fileHandle.open(filePath.c_str(), std::ofstream::binary); // Header auto deviceId = pDevice->getHardwareInfo().capabilityTable.aubDeviceId; aubFile.init(AubMemDump::SteppingValues::A, deviceId); aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x229c), 0xffff8280); const size_t sizeHWSP = 0x1000; const size_t alignHWSP = 0x1000; auto pGlobalHWStatusPage = alignedMalloc(sizeHWSP, alignHWSP); uint32_t ggttGlobalHardwareStatusPage = (uint32_t)((uintptr_t)pGlobalHWStatusPage); AubGTTData data = {true, false}; AUB::reserveAddressGGTT(aubFile, ggttGlobalHardwareStatusPage, sizeHWSP, physAddress, data); physAddress += sizeHWSP; aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2080), ggttGlobalHardwareStatusPage); using MI_NOOP = typename FamilyType::MI_NOOP; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; // create a user mode batch buffer auto physBatchBuffer = physAddress; const auto sizeBatchBuffer = 0x1000; auto gpuBatchBuffer = static_cast(gpuBatchBufferAddr); physAddress += sizeBatchBuffer; NEO::AubHelperHw aubHelperHw(false); AUB::reserveAddressPPGTT(aubFile, gpuBatchBuffer, sizeBatchBuffer, physBatchBuffer, 7, aubHelperHw); uint8_t batchBuffer[sizeBatchBuffer]; auto noop = FamilyType::cmdInitNoop; uint32_t noopId = 0xbaadd; { auto pBatchBuffer = (void *)batchBuffer; *(MI_NOOP *)pBatchBuffer = noop; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_NOOP)); *(MI_NOOP *)pBatchBuffer = noop; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_NOOP)); *(MI_NOOP *)pBatchBuffer = noop; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_NOOP)); noop.TheStructure.Common.IdentificationNumberRegisterWriteEnable = true; noop.TheStructure.Common.IdentificationNumber = noopId++; *(MI_NOOP *)pBatchBuffer = noop; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_NOOP)); *(MI_BATCH_BUFFER_END *)pBatchBuffer = FamilyType::cmdInitBatchBufferEnd; pBatchBuffer = ptrOffset(pBatchBuffer, sizeof(MI_BATCH_BUFFER_END)); auto sizeBufferUsed = ptrDiff(pBatchBuffer, batchBuffer); AUB::addMemoryWrite(aubFile, physBatchBuffer, batchBuffer, sizeBufferUsed, AubMemDump::AddressSpaceValues::TraceNonlocal, AubMemDump::DataTypeHintValues::TraceBatchBuffer); } const size_t sizeRing = 0x4 * 0x1000; const size_t alignRing = 0x1000; size_t sizeCommands = 0; auto pRing = alignedMalloc(sizeRing, alignRing); auto ggttRing = (uint32_t)(uintptr_t)pRing; auto physRing = physAddress; physAddress += sizeRing; auto rRing = AUB::reserveAddressGGTT(aubFile, ggttRing, sizeRing, physRing, data); ASSERT_NE(static_cast(-1), rRing); EXPECT_EQ(rRing, physRing); auto cur = (uint32_t *)pRing; auto bbs = FamilyType::cmdInitBatchBufferStart; bbs.setBatchBufferStartAddressGraphicsaddress472(gpuBatchBuffer); bbs.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT); *(MI_BATCH_BUFFER_START *)cur = bbs; cur = ptrOffset(cur, sizeof(MI_BATCH_BUFFER_START)); noop.TheStructure.Common.IdentificationNumberRegisterWriteEnable = true; noop.TheStructure.Common.IdentificationNumber = noopId; *cur++ = noop.TheStructure.RawData[0]; sizeCommands = ptrDiff(cur, pRing); AUB::addMemoryWrite(aubFile, physRing, pRing, sizeCommands, AubMemDump::AddressSpaceValues::TraceNonlocal, csTraits.aubHintCommandBuffer); auto sizeLRCA = csTraits.sizeLRCA; auto pLRCABase = alignedMalloc(csTraits.sizeLRCA, csTraits.alignLRCA); csTraits.initialize(pLRCABase); csTraits.setRingHead(pLRCABase, 0x0000); csTraits.setRingTail(pLRCABase, static_cast(sizeCommands)); csTraits.setRingBase(pLRCABase, ggttRing); auto ringCtrl = static_cast((sizeRing - 0x1000) | 1); csTraits.setRingCtrl(pLRCABase, ringCtrl); auto ggttLRCA = static_cast(reinterpret_cast(pLRCABase)); auto physLRCA = physAddress; physAddress += sizeLRCA; AUB::reserveAddressGGTT(aubFile, ggttLRCA, sizeLRCA, physLRCA, data); AUB::addMemoryWrite(aubFile, physLRCA, pLRCABase, sizeLRCA, AubMemDump::AddressSpaceValues::TraceNonlocal, csTraits.aubHintLRCA); typename AUB::MiContextDescriptorReg contextDescriptor = {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; contextDescriptor.sData.Valid = true; contextDescriptor.sData.ForcePageDirRestore = false; contextDescriptor.sData.ForceRestore = false; contextDescriptor.sData.Legacy = true; contextDescriptor.sData.FaultSupport = 0; contextDescriptor.sData.PrivilegeAccessOrPPGTT = true; contextDescriptor.sData.ADor64bitSupport = AUB::Traits::addressingBits > 32; contextDescriptor.sData.LogicalRingCtxAddress = (uintptr_t)pLRCABase / 4096; contextDescriptor.sData.ContextID = 0; // Submit our exec-list aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2230), 0); aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2230), 0); aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2230), contextDescriptor.ulData[1]); aubFile.writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2230), contextDescriptor.ulData[0]); // Poll until HW complete using AubMemDump::CmdServicesMemTraceRegisterPoll; aubFile.registerPoll( AubMemDump::computeRegisterOffset(mmioBase, 0x2234), //EXECLIST_STATUS 0x100, 0x100, false, CmdServicesMemTraceRegisterPoll::TimeoutActionValues::Abort); alignedFree(pRing); alignedFree(pLRCABase); alignedFree(pGlobalHWStatusPage); aubFile.fileHandle.close(); } compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen9/bxt/000077500000000000000000000000001363734646600252625ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen9/bxt/CMakeLists.txt000066400000000000000000000003101363734646600300140ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_BXT) set(aub_test_config "bxt/1/3/6") include(${OPENCL_AUB_TEST_DIR}/cmake/run_aub_test_target.cmake) endif() compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen9/execution_model/000077500000000000000000000000001363734646600276505ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen9/execution_model/CMakeLists.txt000066400000000000000000000003541363734646600324120ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_parent_kernel_tests_gen9.cpp ) enqueue_parent_kernel_tests_gen9.cpp000066400000000000000000000060161363734646600370240ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen9/execution_model/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/sampler/sampler.h" #include "opencl/test/unit_test/aub_tests/fixtures/aub_parent_kernel_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "test.h" using namespace NEO; GEN9TEST_F(AUBParentKernelFixture, EnqueueParentKernel) { ASSERT_NE(nullptr, pKernel); ASSERT_TRUE(pKernel->isParentKernel); const cl_queue_properties properties[3] = {(CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE), 0, 0}; DeviceQueue *devQueue = DeviceQueue::create( &pCmdQ->getContext(), pClDevice, properties[0], retVal); SchedulerKernel &scheduler = pCmdQ->getContext().getSchedulerKernel(); // Aub execution takes huge time for bigger GWS scheduler.setGws(24); size_t offset[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; size_t lws[3] = {1, 1, 1}; // clang-format off cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc desc = { 0 }; desc.image_array_size = 0; desc.image_depth = 1; desc.image_height = 4; desc.image_width = 4; desc.image_type = CL_MEM_OBJECT_IMAGE3D; desc.image_row_pitch = 0; desc.image_slice_pitch = 0; // clang-format on auto surfaceFormat = Image::getSurfaceFormatFromTable(0, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); Image *image = Image::create( pContext, {}, 0, 0, surfaceFormat, &desc, nullptr, retVal); Buffer *buffer = BufferHelper>::create(pContext); cl_mem bufferMem = buffer; cl_mem imageMem = image; auto sampler = Sampler::create( pContext, CL_TRUE, CL_ADDRESS_NONE, CL_FILTER_LINEAR, retVal); uint64_t argScalar = 2; pKernel->setArg( 3, sizeof(uint64_t), &argScalar); pKernel->setArg( 2, sizeof(cl_mem), &bufferMem); pKernel->setArg( 1, sizeof(cl_mem), &imageMem); pKernel->setArg( 0, sizeof(cl_sampler), &sampler); pCmdQ->enqueueKernel(pKernel, 1, offset, gws, lws, 0, 0, 0); pCmdQ->finish(); uint32_t expectedNumberOfEnqueues = 1; uint64_t gpuAddress = devQueue->getQueueBuffer()->getGpuAddress() + offsetof(IGIL_CommandQueue, m_controls.m_TotalNumberOfQueues); AUBCommandStreamFixture::expectMemory((void *)(uintptr_t)gpuAddress, &expectedNumberOfEnqueues, sizeof(uint32_t)); AUBCommandStreamFixture::expectMemory((void *)(uintptr_t)buffer->getGraphicsAllocation()->getGpuAddress(), &argScalar, sizeof(size_t)); delete devQueue; delete image; delete buffer; delete sampler; } compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen9/skl/000077500000000000000000000000001363734646600252565ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen9/skl/CMakeLists.txt000066400000000000000000000003441363734646600300170ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_SKL) add_subdirectories() set(aub_test_config "skl/1/3/8") include(${OPENCL_AUB_TEST_DIR}/cmake/run_aub_test_target.cmake) endif() compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen9/skl/command_queue/000077500000000000000000000000001363734646600301005ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen9/skl/command_queue/CMakeLists.txt000066400000000000000000000003441363734646600326410ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/run_kernel_aub_tests_skl.cpp ) run_kernel_aub_tests_skl.cpp000066400000000000000000000415571363734646600356270ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/gen9/skl/command_queue/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/aub_tests/fixtures/run_kernel_fixture.h" #include "opencl/test/unit_test/fixtures/two_walker_fixture.h" using namespace NEO; namespace ULT { class AUBRunKernelIntegrateTest : public RunKernelFixture, public ::testing::Test { typedef RunKernelFixture ParentClass; protected: void SetUp() override { ParentClass::SetUp(); } void TearDown() override { ParentClass::TearDown(); } }; SKLTEST_F(AUBRunKernelIntegrateTest, ooqExecution) { cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {16, 1, 1}; size_t localWorkSize[3] = {16, 1, 1}; cl_uint numEventsInWaitList = 0; cl_event *event0 = nullptr; cl_event *event1 = nullptr; cl_event *event2 = nullptr; cl_int retVal = CL_FALSE; std::string kernelFilename; retrieveBinaryKernelFilename(kernelFilename, "simple_kernels_", ".bin"); Program *pProgram = CreateProgramFromBinary(kernelFilename); ASSERT_NE(nullptr, pProgram); cl_device_id device = pClDevice; retVal = pProgram->build( 1, &device, nullptr, nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); const KernelInfo *pKernelInfo0 = pProgram->getKernelInfo("simple_kernel_0"); ASSERT_NE(nullptr, pKernelInfo0); Kernel *pKernel0 = Kernel::create( pProgram, *pKernelInfo0, &retVal); ASSERT_NE(nullptr, pKernel0); const KernelInfo *pKernelInfo1 = pProgram->getKernelInfo("simple_kernel_1"); ASSERT_NE(nullptr, pKernelInfo1); Kernel *pKernel1 = Kernel::create( pProgram, *pKernelInfo1, &retVal); ASSERT_NE(nullptr, pKernel1); const KernelInfo *pKernelInfo2 = pProgram->getKernelInfo("simple_kernel_2"); ASSERT_NE(nullptr, pKernelInfo2); Kernel *pKernel2 = Kernel::create( pProgram, *pKernelInfo2, &retVal); ASSERT_NE(nullptr, pKernel2); const cl_int NUM_ELEMS = 64; const size_t BUFFER_SIZE = NUM_ELEMS * sizeof(cl_uint); cl_uint *destinationMemory1; cl_uint *destinationMemory2; cl_uint expectedMemory1[NUM_ELEMS]; cl_uint expectedMemory2[NUM_ELEMS]; cl_uint arg0 = 2; cl_float arg1 = 3.0f; cl_uint arg3 = 4; cl_uint arg5 = 0xBBBBBBBB; cl_uint bad_value = 0; // set to non-zero to force failure destinationMemory1 = (cl_uint *)::alignedMalloc(BUFFER_SIZE, 4096); ASSERT_NE(nullptr, destinationMemory1); destinationMemory2 = (cl_uint *)::alignedMalloc(BUFFER_SIZE, 4096); ASSERT_NE(nullptr, destinationMemory2); for (cl_int i = 0; i < NUM_ELEMS; i++) { destinationMemory1[i] = 0xA1A1A1A1; destinationMemory2[i] = 0xA2A2A2A2; expectedMemory1[i] = (arg0 + static_cast(arg1) + arg3 + bad_value); expectedMemory2[i] = arg5 + bad_value; } auto pDestinationMemory1 = &destinationMemory1[0]; auto pDestinationMemory2 = &destinationMemory2[0]; auto pExpectedMemory1 = &expectedMemory1[0]; auto pExpectedMemory2 = &expectedMemory2[0]; auto intermediateBuffer = Buffer::create( context, CL_MEM_READ_WRITE, BUFFER_SIZE, nullptr, retVal); ASSERT_NE(nullptr, intermediateBuffer); auto destinationBuffer1 = Buffer::create( context, CL_MEM_USE_HOST_PTR, BUFFER_SIZE, pDestinationMemory1, retVal); ASSERT_NE(nullptr, destinationBuffer1); //buffer may not be zero copied pDestinationMemory1 = reinterpret_cast(destinationBuffer1->getGraphicsAllocation()->getGpuAddress()); auto destinationBuffer2 = Buffer::create( context, CL_MEM_USE_HOST_PTR, BUFFER_SIZE, pDestinationMemory2, retVal); ASSERT_NE(nullptr, destinationBuffer2); //buffer may not be zero copied pDestinationMemory2 = reinterpret_cast(destinationBuffer2->getGraphicsAllocation()->getGpuAddress()); cl_mem arg2 = intermediateBuffer; cl_mem arg4 = destinationBuffer1; cl_mem arg6 = destinationBuffer2; //__kernel void simple_kernel_0(const uint arg0, const float arg1, __global uint *dst) //{ dst = arg0 + arg1 } retVal = clSetKernelArg(pKernel0, 0, sizeof(cl_uint), &arg0); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pKernel0, 1, sizeof(cl_float), &arg1); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pKernel0, 2, sizeof(cl_mem), &arg2); ASSERT_EQ(CL_SUCCESS, retVal); //__kernel void simple_kernel_1(__global uint *src, const uint arg1, __global uint *dst) //{ dst = src + arg1 } retVal = clSetKernelArg(pKernel1, 0, sizeof(cl_mem), &arg2); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pKernel1, 1, sizeof(cl_uint), &arg3); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pKernel1, 2, sizeof(cl_mem), &arg4); ASSERT_EQ(CL_SUCCESS, retVal); //__kernel void simple_kernel_2(const uint arg1, __global uint *dst) //{ dst = arg1 } retVal = clSetKernelArg(pKernel2, 0, sizeof(cl_mem), &arg5); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pKernel2, 1, sizeof(cl_mem), &arg6); ASSERT_EQ(CL_SUCCESS, retVal); // Create a second command queue (beyond the default one) CommandQueue *pCmdQ2 = nullptr; pCmdQ2 = createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE); ASSERT_NE(nullptr, pCmdQ2); auto &csr = pCmdQ2->getGpgpuCommandStreamReceiver(); csr.overrideDispatchPolicy(DispatchMode::ImmediateDispatch); retVal = pCmdQ2->enqueueKernel( pKernel0, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, nullptr, event0); ASSERT_EQ(CL_SUCCESS, retVal); // depends on kernel0 retVal = pCmdQ2->enqueueKernel( pKernel1, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, event0, event1); ASSERT_EQ(CL_SUCCESS, retVal); // independent from other kernels, can be run asynchronously retVal = pCmdQ2->enqueueKernel( pKernel2, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, nullptr, event2); ASSERT_EQ(CL_SUCCESS, retVal); HardwareParse::parseCommands(*pCmdQ2); // Compute our memory expecations based on kernel execution auto globalWorkItems = globalWorkSize[0] * globalWorkSize[1] * globalWorkSize[2]; auto sizeWritten = globalWorkItems * sizeof(cl_uint); AUBCommandStreamFixture::expectMemory(pDestinationMemory1, pExpectedMemory1, sizeWritten); AUBCommandStreamFixture::expectMemory(pDestinationMemory2, pExpectedMemory2, sizeWritten); // ensure we didn't overwrite existing memory if (sizeWritten < BUFFER_SIZE) { auto sizeRemaining = BUFFER_SIZE - sizeWritten; auto pUnwrittenMemory1 = (pDestinationMemory1 + sizeWritten / sizeof(cl_uint)); auto pUnwrittenMemory2 = (pDestinationMemory2 + sizeWritten / sizeof(cl_uint)); auto pExpectedUnwrittenMemory1 = &destinationMemory1[globalWorkItems]; auto pExpectedUnwrittenMemory2 = &destinationMemory2[globalWorkItems]; AUBCommandStreamFixture::expectMemory(pUnwrittenMemory1, pExpectedUnwrittenMemory1, sizeRemaining); AUBCommandStreamFixture::expectMemory(pUnwrittenMemory2, pExpectedUnwrittenMemory2, sizeRemaining); } ::alignedFree(destinationMemory1); ::alignedFree(destinationMemory2); delete intermediateBuffer; delete destinationBuffer1; delete destinationBuffer2; delete pKernel0; delete pKernel1; delete pKernel2; delete pProgram; delete pCmdQ2; } SKLTEST_F(AUBRunKernelIntegrateTest, deviceSideVme) { const cl_int testWidth = 32; const cl_int testHeight = 16; const cl_uint workDim = 2; const size_t globalWorkSize[2] = {testWidth, testHeight}; const size_t *localWorkSize = nullptr; cl_uint numEventsInWaitList = 0; auto retVal = CL_INVALID_VALUE; // VME works on 16x16 macroblocks const cl_int mbWidth = testWidth / 16; const cl_int mbHeight = testHeight / 16; // 1 per macroblock (there is 1 macroblock in this test): const int PRED_BUFFER_SIZE = mbWidth * mbHeight; const int SHAPES_BUFFER_SIZE = mbWidth * mbHeight; // 4 per macroblock (valid for 8x8 mode only): const int MV_BUFFER_SIZE = testWidth * mbHeight / 4; const int RESIDUALS_BUFFER_SIZE = MV_BUFFER_SIZE; std::string kernelFilename; retrieveBinaryKernelFilename(kernelFilename, "vme_kernels_", ".bin"); Program *pProgram = CreateProgramFromBinary(kernelFilename); ASSERT_NE(nullptr, pProgram); cl_device_id device = pClDevice; retVal = pProgram->build( 1, &device, "", nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); const KernelInfo *pKernelInfo = pProgram->getKernelInfo("device_side_block_motion_estimate_intel"); EXPECT_NE(nullptr, pKernelInfo); Kernel *pKernel = Kernel::create( pProgram, *pKernelInfo, &retVal); ASSERT_NE(pKernel, nullptr); EXPECT_EQ(true, pKernel->isVmeKernel()); cl_image_format imageFormat; cl_image_desc imageDesc; imageFormat.image_channel_order = CL_R; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = testWidth; imageDesc.image_height = testHeight; imageDesc.image_depth = 0; imageDesc.image_array_size = 0; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = nullptr; const int INPUT_SIZE = testWidth * testHeight; ASSERT_GT(INPUT_SIZE, 0); auto srcMemory = (cl_uchar *)::alignedMalloc(INPUT_SIZE, 4096); ASSERT_NE(srcMemory, nullptr); memset(srcMemory, 0x00, INPUT_SIZE); auto refMemory = (cl_uchar *)::alignedMalloc(INPUT_SIZE, 4096); ASSERT_NE(refMemory, nullptr); memset(refMemory, 0x00, INPUT_SIZE); int xMovement = 7; int yMovement = 9; // pixel movement: 0xFF, 0xFF values moved from 0x0 to 7x9 for vme kernel to detect srcMemory[0] = 0xFF; // 1.0 srcMemory[1] = 0xFF; // 1.0 refMemory[xMovement + yMovement * testWidth] = 0xFF; refMemory[xMovement + yMovement * testWidth + 1] = 0xFF; cl_mem_flags flags = CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); auto srcImage = Image::create( context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, srcMemory, retVal); ASSERT_NE(nullptr, srcImage); auto refImage = Image::create( context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, refMemory, retVal); ASSERT_NE(nullptr, refImage); cl_short2 *predMem = new cl_short2[PRED_BUFFER_SIZE]; for (int i = 0; i < PRED_BUFFER_SIZE; i++) { predMem[i].s[0] = 0; predMem[i].s[1] = 0; } auto predMvBuffer = Buffer::create( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, PRED_BUFFER_SIZE * sizeof(cl_short2), predMem, retVal); ASSERT_NE(nullptr, predMvBuffer); auto motionVectorBuffer = Buffer::create( context, CL_MEM_WRITE_ONLY, MV_BUFFER_SIZE * sizeof(cl_short2), nullptr, retVal); ASSERT_NE(nullptr, motionVectorBuffer); auto residualsBuffer = Buffer::create( context, CL_MEM_WRITE_ONLY, RESIDUALS_BUFFER_SIZE * sizeof(cl_short), nullptr, retVal); ASSERT_NE(nullptr, residualsBuffer); auto shapesBuffer = Buffer::create( context, CL_MEM_WRITE_ONLY, SHAPES_BUFFER_SIZE * sizeof(cl_char2), nullptr, retVal); ASSERT_NE(nullptr, shapesBuffer); // kernel decl: //void block_motion_estimate_intel_noacc( // __read_only image2d_t srcImg, // IN // __read_only image2d_t refImg, // IN // __global short2* prediMVbuffer, // IN // __global short2* motion_vector_buffer, // OUT // __global ushort* residuals_buffer, // OUT // __global uchar2* shapes_buffer, // OUT // int iterations, // IN // int partition_mask) // IN cl_mem arg0 = srcImage; cl_mem arg1 = refImage; cl_mem arg2 = predMvBuffer; cl_mem arg3 = motionVectorBuffer; cl_mem arg4 = residualsBuffer; cl_mem arg5 = shapesBuffer; cl_int arg6 = mbHeight; cl_int arg7 = CL_AVC_ME_PARTITION_MASK_8x8_INTEL; retVal = clSetKernelArg(pKernel, 0, sizeof(cl_mem), &arg0); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pKernel, 1, sizeof(cl_mem), &arg1); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pKernel, 2, sizeof(cl_mem), &arg2); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pKernel, 3, sizeof(cl_mem), &arg3); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pKernel, 4, sizeof(cl_mem), &arg4); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pKernel, 5, sizeof(cl_mem), &arg5); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pKernel, 6, sizeof(cl_int), &arg6); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pKernel, 7, sizeof(cl_int), &arg7); ASSERT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->enqueueKernel( pKernel, workDim, nullptr, globalWorkSize, localWorkSize, numEventsInWaitList, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); cl_short2 destinationMV[MV_BUFFER_SIZE]; cl_short destinationResiduals[RESIDUALS_BUFFER_SIZE]; cl_uchar2 destinationShapes[SHAPES_BUFFER_SIZE]; motionVectorBuffer->forceDisallowCPUCopy = true; residualsBuffer->forceDisallowCPUCopy = true; shapesBuffer->forceDisallowCPUCopy = true; retVal = pCmdQ->enqueueReadBuffer(motionVectorBuffer, true, 0, sizeof(destinationMV), destinationMV, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->enqueueReadBuffer(residualsBuffer, true, 0, sizeof(destinationResiduals), destinationResiduals, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->enqueueReadBuffer(shapesBuffer, true, 0, sizeof(destinationShapes), destinationShapes, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); // Check if our buffers matches expectations cl_short2 expectedMV[MV_BUFFER_SIZE]; cl_short expectedResiduals[RESIDUALS_BUFFER_SIZE]; cl_uchar2 expectedShapes[SHAPES_BUFFER_SIZE]; // This test uses 8x8 sub blocks (4 per macroblock) for (int i = 0; i < SHAPES_BUFFER_SIZE; i++) { expectedShapes[i].s0 = CL_AVC_ME_MAJOR_8x8_INTEL; expectedShapes[i].s1 = CL_AVC_ME_MINOR_8x8_INTEL; } for (int i = 0; i < MV_BUFFER_SIZE; i++) { expectedResiduals[i] = 0; // Second and fourth block not moved, set 0 as default. expectedMV[i].s0 = 0; expectedMV[i].s1 = 0; // First 8x8 subblock moved by 7x9 vecor as xMovement is 7 and // yMovement is 9 if (i == 0) { // times 4 since VME returns data in quarter pixels. expectedMV[i].s0 = 4 * xMovement; expectedMV[i].s1 = 4 * yMovement; } // In this test all other subblocks are empty, in 16x12 mode used in // this test vme should find match at -16 x -12 else { expectedMV[i].s0 = 4 * -16; expectedMV[i].s1 = 4 * -12; } } AUBCommandStreamFixture::expectMemory(destinationMV, expectedMV, sizeof(expectedMV)); AUBCommandStreamFixture::expectMemory(destinationResiduals, expectedResiduals, sizeof(expectedResiduals)); AUBCommandStreamFixture::expectMemory(destinationShapes, expectedShapes, sizeof(expectedShapes)); delete predMvBuffer; delete motionVectorBuffer; delete residualsBuffer; delete shapesBuffer; delete[] predMem; ::alignedFree(srcMemory); srcMemory = nullptr; delete srcImage; ::alignedFree(refMemory); refMemory = nullptr; delete refImage; delete pKernel; delete pProgram; } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/mem_obj/000077500000000000000000000000001363734646600252335ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/mem_obj/CMakeLists.txt000066400000000000000000000003421363734646600277720ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/create_image_aub_tests.cpp ) compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/mem_obj/create_image_aub_tests.cpp000066400000000000000000000412661363734646600324260ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/aub_tests/command_stream/aub_command_stream_fixture.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_gmm.h" #include "test.h" #include extern GFXCORE_FAMILY renderCoreFamily; using namespace NEO; static const unsigned int testImageDimensions = 17; auto const elementSize = 4; //sizeof CL_RGBA * CL_UNORM_INT8 static cl_mem_object_type ImgArrayTypes[] = { CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY}; struct AUBCreateImage : public CommandDeviceFixture, public AUBCommandStreamFixture, public ::testing::Test { typedef AUBCommandStreamFixture CommandStreamFixture; using AUBCommandStreamFixture::SetUp; void SetUp() override { if (!(defaultHwInfo->capabilityTable.supportsImages)) { GTEST_SKIP(); } CommandDeviceFixture::SetUp(cl_command_queue_properties(0)); CommandStreamFixture::SetUp(pCmdQ); imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = testImageDimensions; imageDesc.image_height = testImageDimensions; imageDesc.image_depth = 0; imageDesc.image_array_size = 10; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; } void TearDown() override { image.reset(); CommandStreamFixture::TearDown(); CommandDeviceFixture::TearDown(); } std::unique_ptr image; cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal; unsigned char pHostPtr[512 * testImageDimensions * elementSize * 4]; }; struct AUBCreateImageArray : public AUBCreateImage, public ::testing::WithParamInterface { void SetUp() override { if (!(defaultHwInfo->capabilityTable.supportsImages)) { GTEST_SKIP(); } AUBCreateImage::SetUp(); } void TearDown() override { AUBCreateImage::TearDown(); } }; INSTANTIATE_TEST_CASE_P( CreateImgTest_Arrays, AUBCreateImageArray, testing::ValuesIn(ImgArrayTypes)); HWTEST_P(AUBCreateImageArray, CheckArrayImages) { auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); imageDesc.image_type = GetParam(); if (imageDesc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { imageDesc.image_height = 1; } cl_mem_flags flags = CL_MEM_COPY_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); auto imgInfo = MockGmm::initImgInfo(imageDesc, 0, surfaceFormat); imgInfo.linearStorage = !hwHelper.tilingAllowed(false, Image::isImage1d(imageDesc), false); auto queryGmm = MockGmm::queryImgParams(pDevice->getGmmClientContext(), imgInfo); //allocate host_ptr auto pixelSize = 4; auto storageSize = imageDesc.image_array_size * pixelSize * imageDesc.image_width * imageDesc.image_height; std::unique_ptr hostPtr(new int[storageSize]); for (auto i = 0u; i < storageSize; i++) { hostPtr[i] = i; } image.reset(Image::create( context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, hostPtr.get(), retVal)); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, image); EXPECT_EQ(image->getSize(), imgInfo.size); EXPECT_EQ(image->getImageDesc().image_slice_pitch, imgInfo.slicePitch); EXPECT_EQ(image->getImageDesc().image_row_pitch, imgInfo.rowPitch); EXPECT_GE(image->getImageDesc().image_slice_pitch, image->getImageDesc().image_row_pitch); EXPECT_EQ(image->getQPitch(), imgInfo.qPitch); EXPECT_EQ(image->getCubeFaceIndex(), static_cast(__GMM_NO_CUBE_MAP)); auto imageHeight = imageDesc.image_height; std::unique_ptr readMemory(new uint32_t[image->getSize() / sizeof(uint32_t)]); auto allocation = createResidentAllocationAndStoreItInCsr(readMemory.get(), image->getSize()); size_t imgOrigin[] = {0, 0, 0}; size_t imgRegion[] = {imageDesc.image_width, 1, 1}; if (imageDesc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { imgRegion[1] = imageDesc.image_array_size; } else if (imageDesc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { imgRegion[1] = imageDesc.image_height; imgRegion[2] = imageDesc.image_array_size; } else { ASSERT_TRUE(false); } retVal = pCmdQ->enqueueReadImage(image.get(), CL_FALSE, imgOrigin, imgRegion, imgInfo.rowPitch, imgInfo.slicePitch, readMemory.get(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); allocation = pCommandStreamReceiver->getTemporaryAllocations().peekHead(); while (allocation && allocation->getUnderlyingBuffer() != readMemory.get()) { allocation = allocation->next; } auto destGpuAddress = reinterpret_cast(allocation->getGpuAddress()); pCmdQ->flush(); auto address = (int *)image->getCpuAddress(); auto currentCounter = 0; for (auto array = 0u; array < imageDesc.image_array_size; array++) { for (auto height = 0u; height < imageHeight; height++) { for (auto element = 0u; element < imageDesc.image_width; element++) { auto offset = (array * imgInfo.slicePitch + element * pixelSize + height * imgInfo.rowPitch) / 4; if (MemoryPool::isSystemMemoryPool(image->getGraphicsAllocation()->getMemoryPool()) == false) { AUBCommandStreamFixture::expectMemory(&destGpuAddress[offset], ¤tCounter, pixelSize); } else { EXPECT_EQ(currentCounter, address[offset]); } currentCounter++; } } } } struct AUBCreateImageHostPtr : public AUBCreateImage, public ::testing::WithParamInterface { void SetUp() override { if (!(defaultHwInfo->capabilityTable.supportsImages)) { GTEST_SKIP(); } flags = GetParam(); AUBCreateImage::SetUp(); } void TearDown() override { AUBCreateImage::TearDown(); } uint64_t flags; }; static cl_mem_flags useHostPtrFlags[] = { 0 | CL_MEM_USE_HOST_PTR, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, CL_MEM_HOST_READ_ONLY | CL_MEM_USE_HOST_PTR, CL_MEM_HOST_WRITE_ONLY | CL_MEM_USE_HOST_PTR, CL_MEM_HOST_NO_ACCESS | CL_MEM_USE_HOST_PTR}; static cl_mem_flags copyHostPtrFlags[] = { 0 | CL_MEM_COPY_HOST_PTR, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, CL_MEM_HOST_READ_ONLY | CL_MEM_COPY_HOST_PTR, CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, CL_MEM_HOST_NO_ACCESS | CL_MEM_COPY_HOST_PTR}; using UseHostPtrTest = AUBCreateImageHostPtr; using CopyHostPtrTest = AUBCreateImageHostPtr; INSTANTIATE_TEST_CASE_P( CreateImgTest_UseHostPtr, UseHostPtrTest, testing::ValuesIn(useHostPtrFlags)); INSTANTIATE_TEST_CASE_P( CreateImgTest_CopyHostPtr, CopyHostPtrTest, testing::ValuesIn(copyHostPtrFlags)); HWTEST_P(CopyHostPtrTest, imageWithDoubledRowPitchThatIsCreatedWithCopyHostPtrFlagHasProperRowPitchSet) { auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); auto imgInfo = MockGmm::initImgInfo(imageDesc, 0, surfaceFormat); MockGmm::queryImgParams(pDevice->getGmmClientContext(), imgInfo); auto lineWidth = imageDesc.image_width * elementSize; auto passedRowPitch = imgInfo.rowPitch * 2; imageDesc.image_row_pitch = passedRowPitch; char counter = 0; char *data = (char *)pHostPtr; auto heightToCopy = imageDesc.image_height; while (heightToCopy--) { for (unsigned int i = 0; i < imageDesc.image_width * elementSize; i++) { data[i] = counter++; } data += passedRowPitch; } image.reset(Image::create(context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, pHostPtr, retVal)); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(image->getImageDesc().image_row_pitch, imgInfo.rowPitch); EXPECT_EQ(image->getHostPtrRowPitch(), (size_t)passedRowPitch); EXPECT_EQ(image->getSize(), imgInfo.size); EXPECT_EQ(image->getImageDesc().image_slice_pitch, imgInfo.slicePitch); EXPECT_GE(image->getImageDesc().image_slice_pitch, image->getImageDesc().image_row_pitch); EXPECT_EQ(image->getQPitch(), imgInfo.qPitch); EXPECT_EQ(image->getCubeFaceIndex(), static_cast(__GMM_NO_CUBE_MAP)); //now check if data is properly propagated to image heightToCopy = imageDesc.image_height; auto imageStorage = static_cast(image->getCpuAddress()); data = (char *)pHostPtr; uint8_t *readMemory = nullptr; bool isGpuCopy = image->isTiledAllocation() || !MemoryPool::isSystemMemoryPool(image->getGraphicsAllocation()->getMemoryPool()); if (isGpuCopy) { readMemory = new uint8_t[testImageDimensions * testImageDimensions * elementSize * 4]; size_t imgOrigin[] = {0, 0, 0}; size_t imgRegion[] = {imageDesc.image_width, imageDesc.image_height, imageDesc.image_depth ? imageDesc.image_depth : 1}; retVal = pCmdQ->enqueueReadImage(image.get(), CL_FALSE, imgOrigin, imgRegion, 0, 0, readMemory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->flush(); EXPECT_EQ(CL_SUCCESS, retVal); imageStorage = readMemory; } while (heightToCopy--) { for (unsigned int i = 0; i < imageDesc.image_width * elementSize; i++) { if (isGpuCopy) { AUBCommandStreamFixture::expectMemory(&imageStorage[i], &data[i], 1); } else { EXPECT_EQ(imageStorage[i], data[i]); } } data += passedRowPitch; imageStorage += lineWidth; } if (readMemory) delete readMemory; } HWTEST_P(UseHostPtrTest, imageWithRowPitchCreatedWithUseHostPtrFlagCopiedActuallyVerifyMapImageData) { imageDesc.image_width = 546; imageDesc.image_height = 1; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); auto imgInfo = MockGmm::initImgInfo(imageDesc, 0, surfaceFormat); MockGmm::queryImgParams(pDevice->getGmmClientContext(), imgInfo); auto passedRowPitch = imgInfo.rowPitch + 32; imageDesc.image_row_pitch = passedRowPitch; unsigned char *pUseHostPtr = new unsigned char[passedRowPitch * imageDesc.image_height * elementSize]; char counter = 0; char *data = (char *)pUseHostPtr; auto heightToCopy = imageDesc.image_height; while (heightToCopy--) { for (unsigned int i = 0; i < imageDesc.image_width * elementSize; i++) { data[i] = counter++; } data += passedRowPitch; } image.reset(Image::create( context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, pUseHostPtr, retVal)); ASSERT_EQ(CL_SUCCESS, retVal); //now check if data is properly propagated to image auto mapFlags = CL_MAP_READ; const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {imageDesc.image_width, imageDesc.image_height, 1}; size_t imageRowPitch = 0; size_t imageSlicePitch = 0; auto ptr = pCmdQ->enqueueMapImage( image.get(), true, mapFlags, origin, region, &imageRowPitch, &imageSlicePitch, 0, nullptr, nullptr, retVal); if (image->isMemObjZeroCopy()) { EXPECT_EQ(image->getCpuAddress(), ptr); } else { EXPECT_NE(image->getCpuAddress(), ptr); } size_t imageRowPitchRef = 0; image->getImageInfo(CL_IMAGE_ROW_PITCH, sizeof(imageRowPitchRef), &imageRowPitchRef, nullptr); // Only ZeroCopy HOST_PTR image has the same row_pitch as the one from map, otherwise mapped ptr may have different row_pitch if (image->isMemObjZeroCopy()) { EXPECT_EQ(imageRowPitch, imageRowPitchRef); } size_t imageSlicePitchRef = 0; image->getImageInfo(CL_IMAGE_SLICE_PITCH, sizeof(imageSlicePitchRef), &imageSlicePitchRef, nullptr); // Only ZeroCopy HOST_PTR image has the same slice_pitch as the one from map, otherwise mapped ptr may have different slice_pitch if (image->isMemObjZeroCopy()) { EXPECT_EQ(imageSlicePitch, imageSlicePitchRef); } heightToCopy = imageDesc.image_height; char *imageStorage = (char *)ptr; data = (char *)pUseHostPtr; bool isGpuCopy = image->isTiledAllocation() || !MemoryPool::isSystemMemoryPool(image->getGraphicsAllocation()->getMemoryPool()); while (heightToCopy--) { for (unsigned int i = 0; i < imageDesc.image_width * elementSize; i++) { if (isGpuCopy) { AUBCommandStreamFixture::expectMemory(&imageStorage[i], &data[i], 1); } else { EXPECT_EQ(imageStorage[i], data[i]); } } data += passedRowPitch; imageStorage += imageRowPitch; } retVal = clEnqueueUnmapMemObject(pCmdQ, image.get(), ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); delete[] pUseHostPtr; } HWTEST_F(AUBCreateImage, image3DCreatedWithDoubledSlicePitchWhenQueriedForDataReturnsProperData) { imageDesc.image_type = CL_MEM_OBJECT_IMAGE3D; imageDesc.image_depth = testImageDimensions; auto imageSize = imageDesc.image_width * imageDesc.image_height * imageDesc.image_depth * 4 * 2; auto computedSlicePitch = imageDesc.image_width * alignUp(imageDesc.image_height, 4) * 4; auto inputSlicePitch = computedSlicePitch * 2; imageDesc.image_slice_pitch = inputSlicePitch; auto host_ptr = alignedMalloc(inputSlicePitch * imageDesc.image_depth, 4096); auto counter = 0; char *data = (char *)host_ptr; auto depthToCopy = imageDesc.image_depth; while (depthToCopy--) { for (unsigned int i = 0; i < imageDesc.image_width * 4 * imageDesc.image_height; i++) { data[i] = counter++; } data += inputSlicePitch; } cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); image.reset(Image::create(context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, host_ptr, retVal)); depthToCopy = imageDesc.image_depth; auto imageStorage = (uint8_t *)image->getCpuAddress(); data = (char *)host_ptr; uint8_t *readMemory = nullptr; bool isGpuCopy = image->isTiledAllocation() || !MemoryPool::isSystemMemoryPool(image->getGraphicsAllocation()->getMemoryPool()); if (isGpuCopy) { readMemory = new uint8_t[imageSize]; size_t imgOrigin[] = {0, 0, 0}; size_t imgRegion[] = {imageDesc.image_width, imageDesc.image_height, imageDesc.image_depth}; retVal = pCmdQ->enqueueReadImage(image.get(), CL_FALSE, imgOrigin, imgRegion, 0, computedSlicePitch, readMemory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pCmdQ->flush(); EXPECT_EQ(CL_SUCCESS, retVal); imageStorage = readMemory; } while (depthToCopy--) { for (unsigned int i = 0; i < imageDesc.image_width * 4 * imageDesc.image_height; i++) { if (isGpuCopy) { AUBCommandStreamFixture::expectMemory(&imageStorage[i], &data[i], 1); } else { EXPECT_EQ(imageStorage[i], data[i]); } } data += inputSlicePitch; imageStorage += computedSlicePitch; } alignedFree(host_ptr); if (readMemory) { delete readMemory; } } compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/test_mode.h000066400000000000000000000003751363734646600257640ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/unit_test/tests_configuration.h" namespace NEO { constexpr TestMode defaultTestMode = TestMode::AubTests; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/unified_memory/000077500000000000000000000000001363734646600266365ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/unified_memory/CMakeLists.txt000066400000000000000000000004441363734646600314000ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_aub_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_copy_aub_tests.cpp ) unified_memory_aub_tests.cpp000066400000000000000000000047261363734646600343600ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/unified_memory/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/aub_tests/fixtures/unified_memory_fixture.h" #include "test.h" namespace NEO { class UnifiedMemoryAubTest : public UnifiedMemoryAubFixture, public ::testing::Test { public: using UnifiedMemoryAubFixture::TearDown; std::vector values; void SetUp() override { UnifiedMemoryAubFixture::SetUp(); values = std::vector(dataSize, 11); }; }; HWTEST_F(UnifiedMemoryAubTest, givenDeviceMemoryAllocWhenWriteIntoItThenValuesMatch) { auto unifiedMemoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; auto unifiedMemoryPtr = allocateUSM(unifiedMemoryType); writeToUsmMemory(values, unifiedMemoryPtr, unifiedMemoryType); expectMemory(unifiedMemoryPtr, values.data(), dataSize); freeUSM(unifiedMemoryPtr, unifiedMemoryType); } HWTEST_F(UnifiedMemoryAubTest, givenSharedMemoryAllocWhenWriteIntoCPUPartThenValuesMatchAfterUsingAllocAsKernelParam) { auto unifiedMemoryType = InternalMemoryType::SHARED_UNIFIED_MEMORY; auto unifiedMemoryPtr = allocateUSM(unifiedMemoryType); writeToUsmMemory(values, unifiedMemoryPtr, unifiedMemoryType); expectNotEqualMemory(unifiedMemoryPtr, values.data(), dataSize); auto mockPtr = std::make_unique(dataSize); retVal = clEnqueueMemcpyINTEL(this->pCmdQ, true, mockPtr.get(), unifiedMemoryPtr, dataSize, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); expectMemory(unifiedMemoryPtr, values.data(), dataSize); freeUSM(unifiedMemoryPtr, unifiedMemoryType); } HWTEST_F(UnifiedMemoryAubTest, givenSharedMemoryAllocWhenWriteIntoGPUPartThenValuesMatchAfterUsingAlloc) { auto unifiedMemoryType = InternalMemoryType::SHARED_UNIFIED_MEMORY; auto unifiedMemoryPtr = allocateUSM(unifiedMemoryType); std::vector input(dataSize, 11); retVal = clEnqueueMemcpyINTEL(this->pCmdQ, true, unifiedMemoryPtr, input.data(), dataSize, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); expectNotEqualMemory(unifiedMemoryPtr, unifiedMemoryPtr, dataSize); expectMemory(unifiedMemoryPtr, input.data(), dataSize); auto mockRead = reinterpret_cast(unifiedMemoryPtr)[0]; mockRead = 0; expectMemory(unifiedMemoryPtr, unifiedMemoryPtr, dataSize); freeUSM(unifiedMemoryPtr, unifiedMemoryType); } } // namespace NEO unified_memory_copy_aub_tests.cpp000066400000000000000000000040231363734646600354000ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/aub_tests/unified_memory/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/aub_tests/fixtures/unified_memory_fixture.h" #include "test.h" namespace NEO { class UnifiedMemoryCopyAubTest : public UnifiedMemoryAubFixture, public ::testing::TestWithParam> { public: void *srcPtr, *dstPtr; InternalMemoryType srcMemoryType, dstMemoryType; std::vector srcValues, dstValues; void SetUp() override { UnifiedMemoryAubFixture::SetUp(); srcMemoryType = std::get<0>(GetParam()); dstMemoryType = std::get<1>(GetParam()); srcPtr = this->allocateUSM(srcMemoryType); dstPtr = this->allocateUSM(dstMemoryType); srcValues = std::vector(dataSize, 11); dstValues = std::vector(dataSize, 22); this->writeToUsmMemory(srcValues, srcPtr, srcMemoryType); this->writeToUsmMemory(dstValues, dstPtr, dstMemoryType); } void TearDown() override { this->freeUSM(srcPtr, srcMemoryType); this->freeUSM(dstPtr, dstMemoryType); UnifiedMemoryAubFixture::TearDown(); } }; HWTEST_P(UnifiedMemoryCopyAubTest, givenTwoUnifiedMemoryAllocsWhenCopyingOneToAnotherThenValuesMatch) { clEnqueueMemcpyINTEL(this->pCmdQ, true, dstPtr, srcPtr, dataSize, 0, nullptr, nullptr); expectMemory(dstPtr, srcValues.data(), dataSize); } InternalMemoryType memoryTypes[] = {InternalMemoryType::HOST_UNIFIED_MEMORY, InternalMemoryType::DEVICE_UNIFIED_MEMORY, InternalMemoryType::SHARED_UNIFIED_MEMORY, InternalMemoryType::NOT_SPECIFIED}; INSTANTIATE_TEST_CASE_P(UnifiedMemoryCopyAubTest, UnifiedMemoryCopyAubTest, ::testing::Combine(::testing::ValuesIn(memoryTypes), ::testing::ValuesIn(memoryTypes))); } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/built_ins/000077500000000000000000000000001363734646600236225ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/built_ins/CMakeLists.txt000066400000000000000000000011121363734646600263550ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_built_in ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/built_ins_file_names.h ${CMAKE_CURRENT_SOURCE_DIR}/built_ins_file_names.cpp ${CMAKE_CURRENT_SOURCE_DIR}/built_in_kernels_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/built_in_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/get_built_ins_file_names.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sip_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_built_in}) add_subdirectories() compute-runtime-20.13.16352/opencl/test/unit_test/built_ins/built_in_kernels_tests.cpp000066400000000000000000000110611363734646600310770ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/builtin_kernels_simulation/opencl_c.h" #include "gtest/gtest.h" namespace BuiltinKernelsSimulation { __kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, int dstOffset, uint2 pitch) { const int x = get_global_id(0); const int y = get_global_id(1); const int z = get_global_id(2); int4 srcCoord = {x, y, z, 0}; srcCoord = srcCoord + srcOffset; uint DstOffset = dstOffset + (y * pitch.x) + (z * pitch.y); const uint4 c = read_imageui(input, srcCoord); if ((ulong)(dst + dstOffset) & 0x0000000f) { *((__global uchar *)(dst + DstOffset + x * 16 + 3)) = convert_uchar_sat((c.x >> 24) & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 2)) = convert_uchar_sat((c.x >> 16) & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 1)) = convert_uchar_sat((c.x >> 8) & 0xff); *((__global uchar *)(dst + DstOffset + x * 16)) = convert_uchar_sat(c.x & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 7)) = convert_uchar_sat((c.y >> 24) & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 6)) = convert_uchar_sat((c.y >> 16) & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 5)) = convert_uchar_sat((c.y >> 8) & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 4)) = convert_uchar_sat(c.y & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 11)) = convert_uchar_sat((c.z >> 24) & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 10)) = convert_uchar_sat((c.z >> 16) & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 9)) = convert_uchar_sat((c.z >> 8) & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 8)) = convert_uchar_sat(c.z & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 15)) = convert_uchar_sat((c.w >> 24) & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 14)) = convert_uchar_sat((c.w >> 16) & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 13)) = convert_uchar_sat((c.w >> 8) & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 12)) = convert_uchar_sat(c.w & 0xff); } else { *(__global uint4 *)(dst + DstOffset + x * 16) = c; } } TEST(BuiltInKernelTests, WhenBuiltInCopiesImageThenDataIsWrittenIntoCorrectMemory) { uint width = 3; uint height = 3; uint depth = 3; uint bytesPerChannel = 4; uint channels = 4; uint bpp = bytesPerChannel * channels; globalID[0] = 0; globalID[1] = 0; globalID[2] = 0; localID[0] = 0; localID[1] = 0; localID[2] = 0; localSize[0] = width; localSize[1] = height; localSize[2] = depth; size_t size = width * height * depth * bytesPerChannel * channels; auto ptrSrc = std::make_unique(64 + size + 64); auto ptrDst = std::make_unique(64 + size + 64); auto ptrZero = std::make_unique(64); memset(ptrZero.get(), 0, 64); memset(ptrDst.get(), 0, 64 + size + 64); memset(ptrSrc.get(), 0, 64 + size + 64); char *temp = ptrSrc.get() + 64; for (uint i = 0; i < size; i++) { temp[i] = i; } image im; im.ptr = ptrSrc.get() + 64; im.bytesPerChannel = bytesPerChannel; im.channels = channels; im.width = width; im.height = height; im.depth = depth; uint2 Pitch(0, 0); Pitch.x = width * bpp; Pitch.y = width * height * bpp; for (uint dimZ = 0; dimZ < depth; dimZ++) { globalID[1] = 0; for (uint dimY = 0; dimY < height; dimY++) { globalID[0] = 0; for (uint dimX = 0; dimX < width; dimX++) { CopyImage3dToBuffer16Bytes(&im, (uchar *)ptrDst.get() + 64, {0, 0, 0, 0}, 0, Pitch); globalID[0]++; } globalID[1]++; } globalID[2]++; } EXPECT_EQ(0, memcmp(im.ptr, ptrDst.get() + 64, size)) << "Data not copied properly!\n"; EXPECT_EQ(0, memcmp(ptrDst.get(), ptrZero.get(), 64)) << "Data written before passed ptr!\n"; EXPECT_EQ(0, memcmp(ptrDst.get() + size + 64, ptrZero.get(), 64)) << "Data written after passed ptr!\n"; } } // namespace BuiltinKernelsSimulation compute-runtime-20.13.16352/opencl/test/unit_test/built_ins/built_in_tests.cpp000066400000000000000000002762331363734646600273720ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/hash.h" #include "shared/source/helpers/string.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/built_ins/aux_translation_builtin.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/built_ins/vme_builtin.h" #include "opencl/source/built_ins/vme_dispatch_builder.h" #include "opencl/source/helpers/dispatch_info_builder.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/built_ins/built_ins_file_names.h" #include "opencl/test/unit_test/fixtures/built_in_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/fixtures/run_kernel_fixture.h" #include "opencl/test/unit_test/global_environment.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_builtins.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_compilers.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" #include "compiler_options.h" #include "gtest/gtest.h" #include "os_inc.h" #include using namespace NEO; class BuiltInTests : public BuiltInFixture, public DeviceFixture, public ContextFixture, public ::testing::Test { using BuiltInFixture::SetUp; using ContextFixture::SetUp; public: BuiltInTests() { // reserving space here to avoid the appearance of a memory management // leak being reported allBuiltIns.reserve(5000); } void SetUp() override { DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Builtin)); DeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); BuiltInFixture::SetUp(pDevice); } void TearDown() override { allBuiltIns.clear(); BuiltInFixture::TearDown(); ContextFixture::TearDown(); DeviceFixture::TearDown(); } void AppendBuiltInStringFromFile(std::string builtInFile, size_t &size) { std::string src; auto pData = loadDataFromFile( builtInFile.c_str(), size); ASSERT_NE(nullptr, pData); src = (const char *)pData.get(); size_t start = src.find("R\"===("); size_t stop = src.find(")===\""); // assert that pattern was found ASSERT_NE(std::string::npos, start); ASSERT_NE(std::string::npos, stop); start += strlen("R\"===("); size = stop - start; allBuiltIns.append(src, start, size); } bool compareBuiltinOpParams(const BuiltinOpParams &left, const BuiltinOpParams &right) { return left.srcPtr == right.srcPtr && left.dstPtr == right.dstPtr && left.size == right.size && left.srcOffset == right.srcOffset && left.dstOffset == right.dstOffset && left.dstMemObj == right.dstMemObj && left.srcMemObj == right.srcMemObj; } DebugManagerStateRestore restore; std::string allBuiltIns; }; struct VmeBuiltInTests : BuiltInTests { void SetUp() override { BuiltInTests::SetUp(); if (!pDevice->getHardwareInfo().capabilityTable.supportsVme) { GTEST_SKIP(); } } }; TEST_F(BuiltInTests, SourceConsistency) { size_t size = 0; for (auto &fileName : getBuiltInFileNames()) { AppendBuiltInStringFromFile( fileName, size); ASSERT_NE(0u, size); } // convert /r/n to /n size_t start_pos = 0; while ((start_pos = allBuiltIns.find("\r\n", start_pos)) != std::string::npos) { allBuiltIns.replace(start_pos, 2, "\n"); } // convert /r to /n start_pos = 0; while ((start_pos = allBuiltIns.find("\r", start_pos)) != std::string::npos) { allBuiltIns.replace(start_pos, 1, "\n"); } uint64_t hash = Hash::hash(allBuiltIns.c_str(), allBuiltIns.length()); auto hashName = getBuiltInHashFileName(hash); //Fisrt fail, if we are inconsistent EXPECT_EQ(true, fileExists(hashName)) << "**********\nBuilt in kernels need to be regenerated for the mock compilers!\n**********"; //then write to file if needed #define GENERATE_NEW_HASH_FOR_BUILT_INS 0 #if GENERATE_NEW_HASH_FOR_BUILT_INS std::cout << "writing builtins to file: " << hashName << std::endl; const char *pData = allBuiltIns.c_str(); writeDataToFile(hashName.c_str(), pData, allBuiltIns.length()); #endif } TEST_F(BuiltInTests, BuiltinDispatchInfoBuilderCopyBufferToBuffer) { BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, *pDevice); MockBuffer *srcPtr = new MockBuffer(); MockBuffer *dstPtr = new MockBuffer(); MockBuffer &src = *srcPtr; MockBuffer &dst = *dstPtr; MultiDispatchInfo multiDispatchInfo; BuiltinOpParams builtinOpsParams; builtinOpsParams.srcMemObj = &src; builtinOpsParams.dstMemObj = &dst; builtinOpsParams.srcPtr = src.getCpuAddress(); builtinOpsParams.dstPtr = dst.getCpuAddress(); builtinOpsParams.size = {dst.getSize(), 0, 0}; ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, builtinOpsParams)); size_t leftSize = reinterpret_cast(dst.getCpuAddress()) % MemoryConstants::cacheLineSize; if (leftSize > 0) { leftSize = MemoryConstants::cacheLineSize - leftSize; } size_t rightSize = (reinterpret_cast(dst.getCpuAddress()) + dst.getSize()) % MemoryConstants::cacheLineSize; size_t middleSize = (dst.getSize() - leftSize - rightSize) / (sizeof(uint32_t) * 4); int i = 0; int leftKernel = 0; int middleKernel = 0; int rightKernel = 0; if (leftSize > 0) { middleKernel++; rightKernel++; } else { leftKernel = -1; } if (middleSize > 0) { rightKernel++; } else { middleKernel = -1; } if (rightSize == 0) { rightKernel = -1; } for (auto &dispatchInfo : multiDispatchInfo) { EXPECT_EQ(1u, dispatchInfo.getDim()); if (i == leftKernel) { EXPECT_EQ(Vec3(leftSize, 1, 1), dispatchInfo.getGWS()); } else if (i == middleKernel) { EXPECT_EQ(Vec3(middleSize, 1, 1), dispatchInfo.getGWS()); } else if (i == rightKernel) { EXPECT_EQ(Vec3(rightSize, 1, 1), dispatchInfo.getGWS()); } i++; } EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), builtinOpsParams)); delete srcPtr; delete dstPtr; } HWTEST_F(BuiltInTests, givenInputBufferWhenBuildingNonAuxDispatchInfoForAuxTranslationThenPickAndSetupCorrectKernels) { BuiltinDispatchInfoBuilder &baseBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pDevice); auto &builder = static_cast &>(baseBuilder); MemObjsForAuxTranslation memObjsForAuxTranslation; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation); std::vector builtinKernels; MockBuffer mockBuffer[3]; mockBuffer[0].getGraphicsAllocation()->setSize(0x1000); mockBuffer[1].getGraphicsAllocation()->setSize(0x20000); mockBuffer[2].getGraphicsAllocation()->setSize(0x30000); BuiltinOpParams builtinOpsParams; builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux; for (auto &buffer : mockBuffer) { memObjsForAuxTranslation.insert(&buffer); } EXPECT_TRUE(builder.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpsParams)); EXPECT_EQ(3u, multiDispatchInfo.size()); for (auto &dispatchInfo : multiDispatchInfo) { auto kernel = dispatchInfo.getKernel(); builtinKernels.push_back(kernel); MemObj *buffer = *memObjsForAuxTranslation.find(castToObject(kernel->getKernelArguments().at(0).object)); EXPECT_NE(nullptr, buffer); memObjsForAuxTranslation.erase(buffer); cl_mem clMem = buffer; EXPECT_EQ(clMem, kernel->getKernelArguments().at(0).object); EXPECT_EQ(clMem, kernel->getKernelArguments().at(1).object); EXPECT_EQ(1u, dispatchInfo.getDim()); size_t xGws = alignUp(buffer->getSize(), 512) / 16; Vec3 gws = {xGws, 1, 1}; EXPECT_EQ(gws, dispatchInfo.getGWS()); } EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), builtinOpsParams)); // always pick different kernel EXPECT_EQ(3u, builtinKernels.size()); EXPECT_NE(builtinKernels[0], builtinKernels[1]); EXPECT_NE(builtinKernels[0], builtinKernels[2]); EXPECT_NE(builtinKernels[1], builtinKernels[2]); } HWTEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxDispatchInfoForAuxTranslationThenPickAndSetupCorrectKernels) { BuiltinDispatchInfoBuilder &baseBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pDevice); auto &builder = static_cast &>(baseBuilder); MemObjsForAuxTranslation memObjsForAuxTranslation; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation); std::vector builtinKernels; MockBuffer mockBuffer[3]; mockBuffer[0].getGraphicsAllocation()->setSize(0x1000); mockBuffer[1].getGraphicsAllocation()->setSize(0x20000); mockBuffer[2].getGraphicsAllocation()->setSize(0x30000); BuiltinOpParams builtinOpsParams; builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux; for (auto &buffer : mockBuffer) { memObjsForAuxTranslation.insert(&buffer); } EXPECT_TRUE(builder.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpsParams)); EXPECT_EQ(3u, multiDispatchInfo.size()); for (auto &dispatchInfo : multiDispatchInfo) { auto kernel = dispatchInfo.getKernel(); builtinKernels.push_back(kernel); MemObj *buffer = *memObjsForAuxTranslation.find(castToObject(kernel->getKernelArguments().at(1).object)); EXPECT_NE(nullptr, buffer); memObjsForAuxTranslation.erase(buffer); cl_mem clMem = buffer; EXPECT_EQ(clMem, kernel->getKernelArguments().at(0).object); EXPECT_EQ(clMem, kernel->getKernelArguments().at(1).object); EXPECT_EQ(1u, dispatchInfo.getDim()); size_t xGws = alignUp(buffer->getSize(), 4) / 4; Vec3 gws = {xGws, 1, 1}; EXPECT_EQ(gws, dispatchInfo.getGWS()); } EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), builtinOpsParams)); // always pick different kernel EXPECT_EQ(3u, builtinKernels.size()); EXPECT_NE(builtinKernels[0], builtinKernels[1]); EXPECT_NE(builtinKernels[0], builtinKernels[2]); EXPECT_NE(builtinKernels[1], builtinKernels[2]); } HWTEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxTranslationDispatchThenPickDifferentKernelsDependingOnRequest) { BuiltinDispatchInfoBuilder &baseBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pDevice); auto &builder = static_cast &>(baseBuilder); MemObjsForAuxTranslation memObjsForAuxTranslation; MockBuffer mockBuffer[3]; std::vector builtinKernels; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation); BuiltinOpParams builtinOpsParams; for (auto &buffer : mockBuffer) { memObjsForAuxTranslation.insert(&buffer); } builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux; EXPECT_TRUE(builder.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpsParams)); builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux; EXPECT_TRUE(builder.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpsParams)); EXPECT_EQ(6u, multiDispatchInfo.size()); for (auto &dispatchInfo : multiDispatchInfo) { builtinKernels.push_back(dispatchInfo.getKernel()); } EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), builtinOpsParams)); // nonAux vs Aux instance EXPECT_EQ(6u, builtinKernels.size()); EXPECT_NE(builtinKernels[0], builtinKernels[3]); EXPECT_NE(builtinKernels[1], builtinKernels[4]); EXPECT_NE(builtinKernels[2], builtinKernels[5]); } HWTEST_F(BuiltInTests, givenInvalidAuxTranslationDirectionWhenBuildingDispatchInfosThenAbort) { BuiltinDispatchInfoBuilder &baseBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pDevice); auto &builder = static_cast &>(baseBuilder); MemObjsForAuxTranslation memObjsForAuxTranslation; MockBuffer mockBuffer; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation); BuiltinOpParams builtinOpsParams; memObjsForAuxTranslation.insert(&mockBuffer); builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::None; EXPECT_THROW(builder.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpsParams), std::exception); } class MockAuxBuilInOp : public BuiltInOp { public: using BuiltinDispatchInfoBuilder::populate; using BaseClass = BuiltInOp; using BaseClass::baseKernel; using BaseClass::convertToAuxKernel; using BaseClass::convertToNonAuxKernel; using BaseClass::resizeKernelInstances; using BaseClass::usedKernels; MockAuxBuilInOp(BuiltIns &kernelsLib, Context &context, Device &device) : BaseClass(kernelsLib, device) {} }; TEST_F(BuiltInTests, whenAuxBuiltInIsConstructedThenResizeKernelInstancedTo5) { MockAuxBuilInOp mockAuxBuiltInOp(*pBuiltIns, *pContext, *pDevice); EXPECT_EQ(5u, mockAuxBuiltInOp.convertToAuxKernel.size()); EXPECT_EQ(5u, mockAuxBuiltInOp.convertToNonAuxKernel.size()); } HWTEST_F(BuiltInTests, givenMoreBuffersForAuxTranslationThanKernelInstancesWhenDispatchingThenResize) { MockAuxBuilInOp mockAuxBuiltInOp(*pBuiltIns, *pContext, *pDevice); EXPECT_EQ(5u, mockAuxBuiltInOp.convertToAuxKernel.size()); EXPECT_EQ(5u, mockAuxBuiltInOp.convertToNonAuxKernel.size()); MemObjsForAuxTranslation memObjsForAuxTranslation; BuiltinOpParams builtinOpsParams; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation); MockBuffer mockBuffer[7]; builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux; for (auto &buffer : mockBuffer) { memObjsForAuxTranslation.insert(&buffer); } EXPECT_TRUE(mockAuxBuiltInOp.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpsParams)); EXPECT_EQ(7u, mockAuxBuiltInOp.convertToAuxKernel.size()); EXPECT_EQ(7u, mockAuxBuiltInOp.convertToNonAuxKernel.size()); } TEST_F(BuiltInTests, givenkAuxBuiltInWhenResizeIsCalledThenCloneAllNewInstancesFromBaseKernel) { MockAuxBuilInOp mockAuxBuiltInOp(*pBuiltIns, *pContext, *pDevice); size_t newSize = mockAuxBuiltInOp.convertToAuxKernel.size() + 3; mockAuxBuiltInOp.resizeKernelInstances(newSize); EXPECT_EQ(newSize, mockAuxBuiltInOp.convertToAuxKernel.size()); for (auto &convertToAuxKernel : mockAuxBuiltInOp.convertToAuxKernel) { EXPECT_EQ(&mockAuxBuiltInOp.baseKernel->getKernelInfo(), &convertToAuxKernel->getKernelInfo()); } EXPECT_EQ(newSize, mockAuxBuiltInOp.convertToNonAuxKernel.size()); for (auto &convertToNonAuxKernel : mockAuxBuiltInOp.convertToNonAuxKernel) { EXPECT_EQ(&mockAuxBuiltInOp.baseKernel->getKernelInfo(), &convertToNonAuxKernel->getKernelInfo()); } } HWTEST_F(BuiltInTests, givenKernelWithAuxTranslationRequiredWhenEnqueueCalledThenLockOnBuiltin) { BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pDevice); auto mockAuxBuiltInOp = new MockAuxBuilInOp(*pBuiltIns, *pContext, *pDevice); pBuiltIns->BuiltinOpsBuilders[static_cast(EBuiltInOps::AuxTranslation)].first.reset(mockAuxBuiltInOp); auto mockProgram = clUniquePtr(new MockProgram(*pDevice->getExecutionEnvironment())); auto mockBuiltinKernel = MockKernel::create(*pDevice, mockProgram.get()); mockAuxBuiltInOp->usedKernels.at(0).reset(mockBuiltinKernel); MockKernelWithInternals mockKernel(*pClDevice, pContext); MockCommandQueueHw cmdQ(pContext, pClDevice, nullptr); size_t gws[3] = {1, 0, 0}; MockBuffer buffer; cl_mem clMem = &buffer; buffer.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); mockKernel.kernelInfo.kernelArgInfo.resize(1); mockKernel.kernelInfo.kernelArgInfo.at(0).kernelArgPatchInfoVector.resize(1); mockKernel.kernelInfo.kernelArgInfo.at(0).pureStatefulBufferAccess = false; mockKernel.mockKernel->initialize(); mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem); mockKernel.mockKernel->auxTranslationRequired = false; cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(0u, mockBuiltinKernel->takeOwnershipCalls); EXPECT_EQ(0u, mockBuiltinKernel->releaseOwnershipCalls); mockKernel.mockKernel->auxTranslationRequired = true; cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(1u, mockBuiltinKernel->takeOwnershipCalls); EXPECT_EQ(1u, mockBuiltinKernel->releaseOwnershipCalls); } HWTEST_F(BuiltInTests, givenAuxTranslationKernelWhenSettingKernelArgsThenSetValidMocs) { if (this->pDevice->areSharedSystemAllocationsAllowed()) { GTEST_SKIP(); } using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; MockAuxBuilInOp mockAuxBuiltInOp(*pBuiltIns, *pContext, *pDevice); MultiDispatchInfo multiDispatchInfo; MemObjsForAuxTranslation memObjsForAuxTranslation; multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation); BuiltinOpParams builtinOpParamsToAux; builtinOpParamsToAux.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux; BuiltinOpParams builtinOpParamsToNonAux; builtinOpParamsToNonAux.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux; cl_int retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create(pContext, 0, MemoryConstants::pageSize, nullptr, retVal)); memObjsForAuxTranslation.insert(buffer.get()); mockAuxBuiltInOp.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpParamsToAux); mockAuxBuiltInOp.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpParamsToNonAux); { // read args auto argNum = 0; auto expectedMocs = pDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(); auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState()); sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(); sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState()); } { // write args auto argNum = 1; auto expectedMocs = pDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(); auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState()); sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(); sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState()); } } HWTEST_F(BuiltInTests, givenAuxToNonAuxTranslationWhenSettingSurfaceStateThenSetValidAuxMode) { if (this->pDevice->areSharedSystemAllocationsAllowed()) { GTEST_SKIP(); } using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; MockAuxBuilInOp mockAuxBuiltInOp(*pBuiltIns, *pContext, *pDevice); MultiDispatchInfo multiDispatchInfo; MemObjsForAuxTranslation memObjsForAuxTranslation; multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation); BuiltinOpParams builtinOpParams; builtinOpParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux; cl_int retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create(pContext, 0, MemoryConstants::pageSize, nullptr, retVal)); buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); auto gmm = new Gmm(pDevice->getGmmClientContext(), nullptr, 1, false); gmm->isRenderCompressed = true; buffer->getGraphicsAllocation()->setDefaultGmm(gmm); memObjsForAuxTranslation.insert(buffer.get()); mockAuxBuiltInOp.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpParams); { // read arg auto argNum = 0; auto sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(); auto sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E, surfaceState->getAuxiliarySurfaceMode()); } { // write arg auto argNum = 1; auto sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(); auto sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, surfaceState->getAuxiliarySurfaceMode()); } } HWTEST_F(BuiltInTests, givenNonAuxToAuxTranslationWhenSettingSurfaceStateThenSetValidAuxMode) { if (this->pDevice->areSharedSystemAllocationsAllowed()) { GTEST_SKIP(); } using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; MockAuxBuilInOp mockAuxBuiltInOp(*pBuiltIns, *pContext, *pDevice); MultiDispatchInfo multiDispatchInfo; MemObjsForAuxTranslation memObjsForAuxTranslation; multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation); BuiltinOpParams builtinOpParams; builtinOpParams.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux; cl_int retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create(pContext, 0, MemoryConstants::pageSize, nullptr, retVal)); buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); auto gmm = new Gmm(pDevice->getGmmClientContext(), nullptr, 1, false); gmm->isRenderCompressed = true; buffer->getGraphicsAllocation()->setDefaultGmm(gmm); memObjsForAuxTranslation.insert(buffer.get()); mockAuxBuiltInOp.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpParams); { // read arg auto argNum = 0; auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(); auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, surfaceState->getAuxiliarySurfaceMode()); } { // write arg auto argNum = 1; auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(); auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E, surfaceState->getAuxiliarySurfaceMode()); } } TEST_F(BuiltInTests, BuiltinDispatchInfoBuilderCopyBufferToBufferAligned) { BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, *pDevice); AlignedBuffer src; AlignedBuffer dst; MultiDispatchInfo multiDispatchInfo; BuiltinOpParams builtinOpsParams; builtinOpsParams.srcMemObj = &src; builtinOpsParams.dstMemObj = &dst; builtinOpsParams.size = {src.getSize(), 0, 0}; ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, builtinOpsParams)); EXPECT_EQ(1u, multiDispatchInfo.size()); const DispatchInfo *dispatchInfo = multiDispatchInfo.begin(); EXPECT_EQ(1u, dispatchInfo->getDim()); size_t leftSize = reinterpret_cast(dst.getCpuAddress()) % MemoryConstants::cacheLineSize; EXPECT_EQ(0u, leftSize); size_t rightSize = (reinterpret_cast(dst.getCpuAddress()) + dst.getSize()) % MemoryConstants::cacheLineSize; EXPECT_EQ(0u, rightSize); size_t middleElSize = sizeof(uint32_t) * 4; size_t middleSize = dst.getSize() / middleElSize; EXPECT_EQ(Vec3(middleSize, 1, 1), dispatchInfo->getGWS()); EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), builtinOpsParams)); } TEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyBufferToBufferStatelessIsUsedThenParamsAreCorrect) { if (is32bit) { GTEST_SKIP(); } BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBufferStateless, *pDevice); uint64_t bigSize = 10ull * MemoryConstants::gigaByte; uint64_t bigOffset = 4ull * MemoryConstants::gigaByte; uint64_t size = 4ull * MemoryConstants::gigaByte; MockBuffer srcBuffer; srcBuffer.size = static_cast(bigSize); MockBuffer dstBuffer; dstBuffer.size = static_cast(bigSize); MultiDispatchInfo multiDispatchInfo; BuiltinOpParams builtinOpsParams; builtinOpsParams.srcMemObj = &srcBuffer; builtinOpsParams.srcOffset = {static_cast(bigOffset), 0, 0}; builtinOpsParams.dstMemObj = &dstBuffer; builtinOpsParams.dstOffset = {0, 0, 0}; builtinOpsParams.size = {static_cast(size), 0, 0}; ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, builtinOpsParams)); EXPECT_EQ(1u, multiDispatchInfo.size()); EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), builtinOpsParams)); } TEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyBufferToBufferRectStatelessIsUsedThenParamsAreCorrect) { if (is32bit) { GTEST_SKIP(); } BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferRectStateless, *pDevice); uint64_t bigSize = 10ull * MemoryConstants::gigaByte; uint64_t bigOffset = 4ull * MemoryConstants::gigaByte; uint64_t size = 4ull * MemoryConstants::gigaByte; MockBuffer srcBuffer; srcBuffer.size = static_cast(bigSize); MockBuffer dstBuffer; dstBuffer.size = static_cast(bigSize); BuiltinOpParams dc; dc.srcMemObj = &srcBuffer; dc.dstMemObj = &dstBuffer; dc.srcOffset = {static_cast(bigOffset), 0, 0}; dc.dstOffset = {0, 0, 0}; dc.size = {static_cast(size), 1, 1}; dc.srcRowPitch = static_cast(size); dc.srcSlicePitch = 0; dc.dstRowPitch = static_cast(size); dc.dstSlicePitch = 0; MultiDispatchInfo multiDispatchInfo; ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, dc)); EXPECT_EQ(1u, multiDispatchInfo.size()); EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc)); } TEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderFillBufferStatelessIsUsedThenParamsAreCorrect) { if (is32bit) { GTEST_SKIP(); } BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::FillBufferStateless, *pDevice); uint64_t bigSize = 10ull * MemoryConstants::gigaByte; uint64_t bigOffset = 4ull * MemoryConstants::gigaByte; uint64_t size = 4ull * MemoryConstants::gigaByte; MockBuffer srcBuffer; srcBuffer.size = static_cast(bigSize); MockBuffer dstBuffer; dstBuffer.size = static_cast(bigSize); BuiltinOpParams dc; dc.srcMemObj = &srcBuffer; dc.dstMemObj = &dstBuffer; dc.dstOffset = {static_cast(bigOffset), 0, 0}; dc.size = {static_cast(size), 0, 0}; MultiDispatchInfo multiDispatchInfo; ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, dc)); EXPECT_EQ(1u, multiDispatchInfo.size()); EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc)); } TEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyBufferToImageStatelessIsUsedThenParamsAreCorrect) { if (is32bit) { GTEST_SKIP(); } uint64_t bigSize = 10ull * MemoryConstants::gigaByte; uint64_t bigOffset = 4ull * MemoryConstants::gigaByte; MockBuffer srcBuffer; srcBuffer.size = static_cast(bigSize); std ::unique_ptr pDstImage(Image2dHelper<>::create(pContext)); ASSERT_NE(nullptr, pDstImage.get()); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToImage3dStateless, *pDevice); BuiltinOpParams dc; dc.srcPtr = &srcBuffer; dc.dstMemObj = pDstImage.get(); dc.srcOffset = {static_cast(bigOffset), 0, 0}; dc.dstOffset = {0, 0, 0}; dc.size = {1, 1, 1}; dc.dstRowPitch = 0; dc.dstSlicePitch = 0; MultiDispatchInfo multiDispatchInfo; ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, dc)); EXPECT_EQ(1u, multiDispatchInfo.size()); EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc)); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); EXPECT_TRUE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); } TEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyImageToBufferStatelessIsUsedThenParamsAreCorrect) { if (is32bit) { GTEST_SKIP(); } uint64_t bigSize = 10ull * MemoryConstants::gigaByte; uint64_t bigOffset = 4ull * MemoryConstants::gigaByte; MockBuffer dstBuffer; dstBuffer.size = static_cast(bigSize); std ::unique_ptr pSrcImage(Image2dHelper<>::create(pContext)); ASSERT_NE(nullptr, pSrcImage.get()); auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImage3dToBufferStateless, *pDevice); BuiltinOpParams dc; dc.srcMemObj = pSrcImage.get(); dc.dstMemObj = &dstBuffer; dc.srcOffset = {0, 0, 0}; dc.dstOffset = {static_cast(bigOffset), 0, 0}; dc.size = {1, 1, 1}; MultiDispatchInfo multiDispatchInfo; ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, dc)); EXPECT_EQ(1u, multiDispatchInfo.size()); EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc)); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); EXPECT_TRUE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); } TEST_F(BuiltInTests, BuiltinDispatchInfoBuilderCopyBufferToBufferWithSourceOffsetUnalignedToFour) { BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, *pDevice); AlignedBuffer src; AlignedBuffer dst; MultiDispatchInfo multiDispatchInfo; BuiltinOpParams builtinOpsParams; builtinOpsParams.srcMemObj = &src; builtinOpsParams.srcOffset.x = 1; builtinOpsParams.dstMemObj = &dst; builtinOpsParams.size = {src.getSize(), 0, 0}; ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, builtinOpsParams)); EXPECT_EQ(1u, multiDispatchInfo.size()); const DispatchInfo *dispatchInfo = multiDispatchInfo.begin(); EXPECT_EQ(dispatchInfo->getKernel()->getKernelInfo().name, "CopyBufferToBufferLeftLeftover"); EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), builtinOpsParams)); } TEST_F(BuiltInTests, BuiltinDispatchInfoBuilderReadBufferAligned) { BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, *pDevice); AlignedBuffer srcMemObj; auto size = 10 * MemoryConstants::cacheLineSize; auto dstPtr = alignedMalloc(size, MemoryConstants::cacheLineSize); MultiDispatchInfo multiDispatchInfo; BuiltinOpParams builtinOpsParams; builtinOpsParams.srcMemObj = &srcMemObj; builtinOpsParams.dstPtr = dstPtr; builtinOpsParams.size = {size, 0, 0}; ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, builtinOpsParams)); EXPECT_EQ(1u, multiDispatchInfo.size()); const DispatchInfo *dispatchInfo = multiDispatchInfo.begin(); EXPECT_EQ(1u, dispatchInfo->getDim()); size_t leftSize = reinterpret_cast(dstPtr) % MemoryConstants::cacheLineSize; EXPECT_EQ(0u, leftSize); size_t rightSize = (reinterpret_cast(dstPtr) + size) % MemoryConstants::cacheLineSize; EXPECT_EQ(0u, rightSize); size_t middleElSize = sizeof(uint32_t) * 4; size_t middleSize = size / middleElSize; EXPECT_EQ(Vec3(middleSize, 1, 1), dispatchInfo->getGWS()); EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), builtinOpsParams)); alignedFree(dstPtr); } TEST_F(BuiltInTests, BuiltinDispatchInfoBuilderWriteBufferAligned) { BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, *pDevice); auto size = 10 * MemoryConstants::cacheLineSize; auto srcPtr = alignedMalloc(size, MemoryConstants::cacheLineSize); AlignedBuffer dstMemObj; MultiDispatchInfo multiDispatchInfo; BuiltinOpParams builtinOpsParams; builtinOpsParams.srcPtr = srcPtr; builtinOpsParams.dstMemObj = &dstMemObj; builtinOpsParams.size = {size, 0, 0}; ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, builtinOpsParams)); EXPECT_EQ(1u, multiDispatchInfo.size()); const DispatchInfo *dispatchInfo = multiDispatchInfo.begin(); EXPECT_EQ(1u, dispatchInfo->getDim()); size_t leftSize = reinterpret_cast(srcPtr) % MemoryConstants::cacheLineSize; EXPECT_EQ(0u, leftSize); size_t rightSize = (reinterpret_cast(srcPtr) + size) % MemoryConstants::cacheLineSize; EXPECT_EQ(0u, rightSize); size_t middleElSize = sizeof(uint32_t) * 4; size_t middleSize = size / middleElSize; EXPECT_EQ(Vec3(middleSize, 1, 1), dispatchInfo->getGWS()); EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), builtinOpsParams)); alignedFree(srcPtr); } TEST_F(BuiltInTests, BuiltinDispatchInfoBuilderGetBuilderTwice) { BuiltinDispatchInfoBuilder &builder1 = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, *pDevice); BuiltinDispatchInfoBuilder &builder2 = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, *pDevice); EXPECT_EQ(&builder1, &builder2); } TEST_F(BuiltInTests, BuiltinDispatchInfoBuilderGetBuilderForUnknownBuiltInOp) { bool caughtException = false; try { BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::COUNT, *pDevice); } catch (const std::runtime_error &) { caughtException = true; } EXPECT_TRUE(caughtException); } HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, getSchedulerKernel) { if (pClDevice->getSupportedClVersion() >= 20) { SchedulerKernel &schedulerKernel = pContext->getSchedulerKernel(); std::string name = SchedulerKernel::schedulerName; EXPECT_EQ(name, schedulerKernel.getKernelInfo().name); } } HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, getSchedulerKernelForSecondTimeDoesNotCreateNewKernel) { if (pClDevice->getSupportedClVersion() >= 20) { SchedulerKernel &schedulerKernel = pContext->getSchedulerKernel(); Program *program = schedulerKernel.getProgram(); EXPECT_NE(nullptr, program); SchedulerKernel &schedulerKernelSecond = pContext->getSchedulerKernel(); Program *program2 = schedulerKernelSecond.getProgram(); EXPECT_EQ(&schedulerKernel, &schedulerKernelSecond); EXPECT_EQ(program, program2); } } TEST_F(BuiltInTests, BuiltinDispatchInfoBuilderReturnFalseIfUnsupportedBuildType) { auto &bs = *pDevice->getBuiltIns(); BuiltinDispatchInfoBuilder bdib{bs}; MultiDispatchInfo multiDispatchInfo; BuiltinOpParams params; auto ret = bdib.buildDispatchInfos(multiDispatchInfo, params); EXPECT_FALSE(ret); ASSERT_EQ(0U, multiDispatchInfo.size()); ret = bdib.buildDispatchInfos(multiDispatchInfo, nullptr, 0, Vec3{0, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); EXPECT_FALSE(ret); EXPECT_EQ(0U, multiDispatchInfo.size()); } TEST_F(BuiltInTests, GeivenDefaultBuiltinDispatchInfoBuilderWhendValidateDispatchIsCalledThenClSuccessIsReturned) { auto &bs = *pDevice->getBuiltIns(); BuiltinDispatchInfoBuilder bdib{bs}; auto ret = bdib.validateDispatch(nullptr, 1, Vec3{0, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); EXPECT_EQ(CL_SUCCESS, ret); } TEST_F(BuiltInTests, BuiltinDispatchInfoBuilderReturnTrueIfExplicitKernelArgNotTakenCareOfInBuiltinDispatchBInfoBuilder) { auto &bs = *pDevice->getBuiltIns(); BuiltinDispatchInfoBuilder bdib{bs}; MultiDispatchInfo multiDispatchInfo; BuiltinOpParams params; cl_int err; auto ret = bdib.setExplicitArg(1, 5, nullptr, err); EXPECT_TRUE(ret); } TEST_F(VmeBuiltInTests, BuiltinDispatchInfoBuilderGetVMEBuilderReturnNonNull) { overwriteBuiltInBinaryName(pDevice, "media_kernels_backend"); EBuiltInOps::Type vmeOps[] = {EBuiltInOps::VmeBlockMotionEstimateIntel, EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel, EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel}; for (auto op : vmeOps) { BuiltinDispatchInfoBuilder &builder = Vme::getBuiltinDispatchInfoBuilder(op, *pDevice); EXPECT_NE(nullptr, &builder); } restoreBuiltInBinaryName(pDevice); } TEST_F(VmeBuiltInTests, givenInvalidBuiltInOpWhenGetVmeBuilderInfoThenExceptionIsThrown) { EXPECT_THROW(Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::COUNT, *pDevice), std::exception); } TEST_F(VmeBuiltInTests, BuiltinDispatchInfoBuilderVMEBuilderNullKernel) { overwriteBuiltInBinaryName(pDevice, "media_kernels_backend"); EBuiltInOps::Type vmeOps[] = {EBuiltInOps::VmeBlockMotionEstimateIntel, EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel, EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel}; for (auto op : vmeOps) { BuiltinDispatchInfoBuilder &builder = Vme::getBuiltinDispatchInfoBuilder(op, *pDevice); MultiDispatchInfo outMdi; Vec3 gws{352, 288, 0}; Vec3 elws{0, 0, 0}; Vec3 offset{0, 0, 0}; auto ret = builder.buildDispatchInfos(outMdi, nullptr, 0, gws, elws, offset); EXPECT_FALSE(ret); EXPECT_EQ(0U, outMdi.size()); } restoreBuiltInBinaryName(pDevice); } TEST_F(VmeBuiltInTests, BuiltinDispatchInfoBuilderVMEBuilder) { MockKernelWithInternals mockKernel{*pClDevice}; ((SPatchExecutionEnvironment *)mockKernel.kernelInfo.patchInfo.executionEnvironment)->CompiledSIMD32 = 0; ((SPatchExecutionEnvironment *)mockKernel.kernelInfo.patchInfo.executionEnvironment)->CompiledSIMD16 = 1; mockKernel.kernelInfo.reqdWorkGroupSize[0] = 16; mockKernel.kernelInfo.reqdWorkGroupSize[1] = 0; mockKernel.kernelInfo.reqdWorkGroupSize[2] = 0; overwriteBuiltInBinaryName(pDevice, "media_kernels_backend"); BuiltinDispatchInfoBuilder &builder = Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockMotionEstimateIntel, *pDevice); restoreBuiltInBinaryName(pDevice); MultiDispatchInfo outMdi; Vec3 gws{352, 288, 0}; Vec3 elws{0, 0, 0}; Vec3 offset{16, 0, 0}; MockBuffer mb; cl_mem bufferArg = static_cast(&mb); cl_int err; constexpr uint32_t bufferArgNum = 3; bool ret = builder.setExplicitArg(bufferArgNum, sizeof(cl_mem), &bufferArg, err); EXPECT_FALSE(ret); EXPECT_EQ(CL_SUCCESS, err); ret = builder.buildDispatchInfos(outMdi, mockKernel.mockKernel, 0, gws, elws, offset); EXPECT_TRUE(ret); EXPECT_EQ(1U, outMdi.size()); auto outDi = outMdi.begin(); EXPECT_EQ(Vec3(352, 1, 1), outDi->getGWS()); EXPECT_EQ(Vec3(16, 1, 1), outDi->getEnqueuedWorkgroupSize()); EXPECT_EQ(Vec3(16, 0, 0), outDi->getOffset()); EXPECT_NE(mockKernel.mockKernel, outDi->getKernel()); EXPECT_EQ(bufferArg, outDi->getKernel()->getKernelArg(bufferArgNum)); constexpr uint32_t vmeImplicitArgsBase = 6; constexpr uint32_t vmeImplicitArgs = 3; ASSERT_EQ(vmeImplicitArgsBase + vmeImplicitArgs, outDi->getKernel()->getKernelInfo().kernelArgInfo.size()); uint32_t vmeExtraArgsExpectedVals[] = {18, 22, 18}; // height, width, stride for (uint32_t i = 0; i < vmeImplicitArgs; ++i) { auto &argInfo = outDi->getKernel()->getKernelInfo().kernelArgInfo[vmeImplicitArgsBase + i]; ASSERT_EQ(1U, argInfo.kernelArgPatchInfoVector.size()); auto off = argInfo.kernelArgPatchInfoVector[0].crossthreadOffset; EXPECT_EQ(vmeExtraArgsExpectedVals[i], *((uint32_t *)(outDi->getKernel()->getCrossThreadData() + off))); } } TEST_F(VmeBuiltInTests, BuiltinDispatchInfoBuilderAdvancedVMEBuilder) { MockKernelWithInternals mockKernel{*pClDevice}; ((SPatchExecutionEnvironment *)mockKernel.kernelInfo.patchInfo.executionEnvironment)->CompiledSIMD32 = 0; ((SPatchExecutionEnvironment *)mockKernel.kernelInfo.patchInfo.executionEnvironment)->CompiledSIMD16 = 1; mockKernel.kernelInfo.reqdWorkGroupSize[0] = 16; mockKernel.kernelInfo.reqdWorkGroupSize[1] = 0; mockKernel.kernelInfo.reqdWorkGroupSize[2] = 0; Vec3 gws{352, 288, 0}; Vec3 elws{0, 0, 0}; Vec3 offset{0, 0, 0}; cl_int err; constexpr uint32_t bufferArgNum = 7; MockBuffer mb; cl_mem bufferArg = static_cast(&mb); constexpr uint32_t srcImageArgNum = 1; auto image = std::unique_ptr(Image2dHelper<>::create(pContext)); cl_mem srcImageArg = static_cast(image.get()); EBuiltInOps::Type vmeOps[] = {EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel, EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel}; for (auto op : vmeOps) { MultiDispatchInfo outMdi; overwriteBuiltInBinaryName(pDevice, "media_kernels_backend"); BuiltinDispatchInfoBuilder &builder = Vme::getBuiltinDispatchInfoBuilder(op, *pDevice); restoreBuiltInBinaryName(pDevice); bool ret = builder.setExplicitArg(srcImageArgNum, sizeof(cl_mem), &srcImageArg, err); EXPECT_FALSE(ret); EXPECT_EQ(CL_SUCCESS, err); ret = builder.setExplicitArg(bufferArgNum, sizeof(cl_mem), &bufferArg, err); EXPECT_FALSE(ret); EXPECT_EQ(CL_SUCCESS, err); ret = builder.buildDispatchInfos(outMdi, mockKernel.mockKernel, 0, gws, elws, offset); EXPECT_TRUE(ret); EXPECT_EQ(1U, outMdi.size()); auto outDi = outMdi.begin(); EXPECT_EQ(Vec3(352, 1, 1), outDi->getGWS()); EXPECT_EQ(Vec3(16, 1, 1), outDi->getEnqueuedWorkgroupSize()); EXPECT_NE(mockKernel.mockKernel, outDi->getKernel()); EXPECT_EQ(srcImageArg, outDi->getKernel()->getKernelArg(srcImageArgNum)); uint32_t vmeImplicitArgsBase = outDi->getKernel()->getKernelInfo().getArgNumByName("intraSrcImg"); uint32_t vmeImplicitArgs = 4; ASSERT_EQ(vmeImplicitArgsBase + vmeImplicitArgs, outDi->getKernel()->getKernelInfo().kernelArgInfo.size()); EXPECT_EQ(srcImageArg, outDi->getKernel()->getKernelArg(vmeImplicitArgsBase)); ++vmeImplicitArgsBase; --vmeImplicitArgs; uint32_t vmeExtraArgsExpectedVals[] = {18, 22, 18}; // height, width, stride for (uint32_t i = 0; i < vmeImplicitArgs; ++i) { auto &argInfo = outDi->getKernel()->getKernelInfo().kernelArgInfo[vmeImplicitArgsBase + i]; ASSERT_EQ(1U, argInfo.kernelArgPatchInfoVector.size()); auto off = argInfo.kernelArgPatchInfoVector[0].crossthreadOffset; EXPECT_EQ(vmeExtraArgsExpectedVals[i], *((uint32_t *)(outDi->getKernel()->getCrossThreadData() + off))); } } } TEST_F(VmeBuiltInTests, getBuiltinAsString) { EXPECT_EQ(0, strcmp("aux_translation.builtin_kernel", getBuiltinAsString(EBuiltInOps::AuxTranslation))); EXPECT_EQ(0, strcmp("copy_buffer_to_buffer.builtin_kernel", getBuiltinAsString(EBuiltInOps::CopyBufferToBuffer))); EXPECT_EQ(0, strcmp("copy_buffer_rect.builtin_kernel", getBuiltinAsString(EBuiltInOps::CopyBufferRect))); EXPECT_EQ(0, strcmp("fill_buffer.builtin_kernel", getBuiltinAsString(EBuiltInOps::FillBuffer))); EXPECT_EQ(0, strcmp("copy_buffer_to_image3d.builtin_kernel", getBuiltinAsString(EBuiltInOps::CopyBufferToImage3d))); EXPECT_EQ(0, strcmp("copy_image3d_to_buffer.builtin_kernel", getBuiltinAsString(EBuiltInOps::CopyImage3dToBuffer))); EXPECT_EQ(0, strcmp("copy_image_to_image1d.builtin_kernel", getBuiltinAsString(EBuiltInOps::CopyImageToImage1d))); EXPECT_EQ(0, strcmp("copy_image_to_image2d.builtin_kernel", getBuiltinAsString(EBuiltInOps::CopyImageToImage2d))); EXPECT_EQ(0, strcmp("copy_image_to_image3d.builtin_kernel", getBuiltinAsString(EBuiltInOps::CopyImageToImage3d))); EXPECT_EQ(0, strcmp("fill_image1d.builtin_kernel", getBuiltinAsString(EBuiltInOps::FillImage1d))); EXPECT_EQ(0, strcmp("fill_image2d.builtin_kernel", getBuiltinAsString(EBuiltInOps::FillImage2d))); EXPECT_EQ(0, strcmp("fill_image3d.builtin_kernel", getBuiltinAsString(EBuiltInOps::FillImage3d))); EXPECT_EQ(0, strcmp("vme_block_motion_estimate_intel.builtin_kernel", getBuiltinAsString(EBuiltInOps::VmeBlockMotionEstimateIntel))); EXPECT_EQ(0, strcmp("vme_block_advanced_motion_estimate_check_intel.builtin_kernel", getBuiltinAsString(EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel))); EXPECT_EQ(0, strcmp("vme_block_advanced_motion_estimate_bidirectional_check_intel", getBuiltinAsString(EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel))); EXPECT_EQ(0, strcmp("unknown", getBuiltinAsString(EBuiltInOps::COUNT))); } TEST_F(BuiltInTests, WhenUnknownOperationIsSpecifiedThenUnknownNameIsReturned) { EXPECT_EQ(0, strcmp("unknown", getUnknownBuiltinAsString(EBuiltInOps::CopyImage3dToBuffer))); EXPECT_EQ(0, strcmp("unknown", getUnknownBuiltinAsString(EBuiltInOps::COUNT))); } TEST_F(BuiltInTests, getExtension) { EXPECT_EQ(0, strcmp("", BuiltinCode::getExtension(BuiltinCode::ECodeType::Any))); EXPECT_EQ(0, strcmp(".bin", BuiltinCode::getExtension(BuiltinCode::ECodeType::Binary))); EXPECT_EQ(0, strcmp(".bc", BuiltinCode::getExtension(BuiltinCode::ECodeType::Intermediate))); EXPECT_EQ(0, strcmp(".cl", BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))); EXPECT_EQ(0, strcmp("", BuiltinCode::getExtension(BuiltinCode::ECodeType::COUNT))); EXPECT_EQ(0, strcmp("", BuiltinCode::getExtension(BuiltinCode::ECodeType::INVALID))); } TEST_F(BuiltInTests, createBuiltinResource) { std::string resource = "__kernel"; auto br1 = createBuiltinResource(resource.data(), resource.size()); EXPECT_NE(0u, br1.size()); auto br2 = createBuiltinResource(br1); EXPECT_NE(0u, br2.size()); EXPECT_EQ(br1, br2); } TEST_F(BuiltInTests, createBuiltinResourceName) { EBuiltInOps::Type builtin = EBuiltInOps::CopyBufferToBuffer; const std::string extension = ".cl"; const std::string platformName = "skl"; const uint32_t deviceRevId = 9; std::string resourceNameGeneric = createBuiltinResourceName(builtin, extension); std::string resourceNameForPlatform = createBuiltinResourceName(builtin, extension, platformName); std::string resourceNameForPlatformAndStepping = createBuiltinResourceName(builtin, extension, platformName, deviceRevId); EXPECT_EQ(0, strcmp("copy_buffer_to_buffer.builtin_kernel.cl", resourceNameGeneric.c_str())); EXPECT_EQ(0, strcmp("skl_0_copy_buffer_to_buffer.builtin_kernel.cl", resourceNameForPlatform.c_str())); EXPECT_EQ(0, strcmp("skl_9_copy_buffer_to_buffer.builtin_kernel.cl", resourceNameForPlatformAndStepping.c_str())); } TEST_F(BuiltInTests, joinPath) { std::string resourceName = "copy_buffer_to_buffer.builtin_kernel.cl"; std::string resourcePath = "path"; EXPECT_EQ(0, strcmp(resourceName.c_str(), joinPath("", resourceName).c_str())); EXPECT_EQ(0, strcmp(resourcePath.c_str(), joinPath(resourcePath, "").c_str())); EXPECT_EQ(0, strcmp((resourcePath + PATH_SEPARATOR + resourceName).c_str(), joinPath(resourcePath + PATH_SEPARATOR, resourceName).c_str())); EXPECT_EQ(0, strcmp((resourcePath + PATH_SEPARATOR + resourceName).c_str(), joinPath(resourcePath, resourceName).c_str())); } TEST_F(BuiltInTests, EmbeddedStorageRegistry) { EmbeddedStorageRegistry storageRegistry; std::string resource = "__kernel"; storageRegistry.store("kernel.cl", createBuiltinResource(resource.data(), resource.size() + 1)); const BuiltinResourceT *br = storageRegistry.get("kernel.cl"); EXPECT_NE(nullptr, br); EXPECT_EQ(0, strcmp(resource.data(), br->data())); const BuiltinResourceT *bnr = storageRegistry.get("unknown.cl"); EXPECT_EQ(nullptr, bnr); } TEST_F(BuiltInTests, StorageRootPath) { class MockStorage : Storage { public: MockStorage(const std::string &rootPath) : Storage(rootPath){}; std::string &getRootPath() { return Storage::rootPath; } protected: BuiltinResourceT loadImpl(const std::string &fullResourceName) override { BuiltinResourceT ret; return ret; } }; const std::string rootPath("root"); MockStorage mockStorage(rootPath); EXPECT_EQ(0, strcmp(rootPath.data(), mockStorage.getRootPath().data())); } TEST_F(BuiltInTests, EmbeddedStorageLoadImpl) { class MockEmbeddedStorage : EmbeddedStorage { public: MockEmbeddedStorage(const std::string &rootPath) : EmbeddedStorage(rootPath){}; BuiltinResourceT loadImpl(const std::string &fullResourceName) override { return EmbeddedStorage::loadImpl(fullResourceName); } }; MockEmbeddedStorage mockEmbeddedStorage("root"); BuiltinResourceT br = mockEmbeddedStorage.loadImpl("copy_buffer_to_buffer.builtin_kernel.cl"); EXPECT_NE(0u, br.size()); BuiltinResourceT bnr = mockEmbeddedStorage.loadImpl("unknown.cl"); EXPECT_EQ(0u, bnr.size()); } TEST_F(BuiltInTests, FileStorageLoadImpl) { class MockFileStorage : FileStorage { public: MockFileStorage(const std::string &rootPath) : FileStorage(rootPath){}; BuiltinResourceT loadImpl(const std::string &fullResourceName) override { return FileStorage::loadImpl(fullResourceName); } }; MockFileStorage mockEmbeddedStorage("root"); BuiltinResourceT br = mockEmbeddedStorage.loadImpl("test_files/copybuffer.cl"); EXPECT_NE(0u, br.size()); BuiltinResourceT bnr = mockEmbeddedStorage.loadImpl("unknown.cl"); EXPECT_EQ(0u, bnr.size()); } TEST_F(BuiltInTests, builtinsLib) { class MockBuiltinsLib : BuiltinsLib { public: StoragesContainerT &getAllStorages() { return BuiltinsLib::allStorages; } }; auto mockBuiltinsLib = std::unique_ptr(new MockBuiltinsLib()); EXPECT_EQ(2u, mockBuiltinsLib->getAllStorages().size()); } TEST_F(BuiltInTests, getBuiltinCodeForTypeAny) { auto builtinsLib = std::unique_ptr(new BuiltinsLib()); BuiltinCode code = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Any, *pDevice); EXPECT_EQ(BuiltinCode::ECodeType::Binary, code.type); EXPECT_NE(0u, code.resource.size()); EXPECT_EQ(pDevice, code.targetDevice); } TEST_F(BuiltInTests, getBuiltinCodeForTypeBinary) { auto builtinsLib = std::unique_ptr(new BuiltinsLib()); BuiltinCode code = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Binary, *pDevice); EXPECT_EQ(BuiltinCode::ECodeType::Binary, code.type); EXPECT_NE(0u, code.resource.size()); EXPECT_EQ(pDevice, code.targetDevice); } TEST_F(BuiltInTests, getBuiltinCodeForTypeIntermediate) { auto builtinsLib = std::unique_ptr(new BuiltinsLib()); BuiltinCode code = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Intermediate, *pDevice); EXPECT_EQ(BuiltinCode::ECodeType::Intermediate, code.type); EXPECT_EQ(0u, code.resource.size()); EXPECT_EQ(pDevice, code.targetDevice); } TEST_F(BuiltInTests, getBuiltinCodeForTypeSource) { auto builtinsLib = std::unique_ptr(new BuiltinsLib()); BuiltinCode code = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Source, *pDevice); EXPECT_EQ(BuiltinCode::ECodeType::Source, code.type); EXPECT_NE(0u, code.resource.size()); EXPECT_EQ(pDevice, code.targetDevice); } TEST_F(BuiltInTests, getBuiltinCodeForTypeInvalid) { auto builtinsLib = std::unique_ptr(new BuiltinsLib()); BuiltinCode code = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::INVALID, *pDevice); EXPECT_EQ(BuiltinCode::ECodeType::INVALID, code.type); EXPECT_EQ(0u, code.resource.size()); EXPECT_EQ(pDevice, code.targetDevice); } TEST_F(BuiltInTests, getBuiltinResourcesForTypeSource) { class MockBuiltinsLib : BuiltinsLib { public: BuiltinResourceT getBuiltinResource(EBuiltInOps::Type builtin, BuiltinCode::ECodeType requestedCodeType, Device &device) { return BuiltinsLib::getBuiltinResource(builtin, requestedCodeType, device); } }; auto mockBuiltinsLib = std::unique_ptr(new MockBuiltinsLib()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::AuxTranslation, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyBufferRect, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillBuffer, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyBufferToImage3d, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImage3dToBuffer, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImageToImage1d, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImageToImage2d, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImageToImage3d, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillImage1d, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillImage2d, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillImage3d, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::VmeBlockMotionEstimateIntel, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel, BuiltinCode::ECodeType::Source, *pDevice).size()); EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::COUNT, BuiltinCode::ECodeType::Source, *pDevice).size()); } HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, getBuiltinResourcesForTypeBinary) { class MockBuiltinsLib : BuiltinsLib { public: BuiltinResourceT getBuiltinResource(EBuiltInOps::Type builtin, BuiltinCode::ECodeType requestedCodeType, Device &device) { return BuiltinsLib::getBuiltinResource(builtin, requestedCodeType, device); } }; auto mockBuiltinsLib = std::unique_ptr(new MockBuiltinsLib()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::AuxTranslation, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyBufferRect, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillBuffer, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyBufferToImage3d, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImage3dToBuffer, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImageToImage1d, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImageToImage2d, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::CopyImageToImage3d, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillImage1d, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillImage2d, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::FillImage3d, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::VmeBlockMotionEstimateIntel, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel, BuiltinCode::ECodeType::Binary, *pDevice).size()); EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::COUNT, BuiltinCode::ECodeType::Binary, *pDevice).size()); } TEST_F(BuiltInTests, createProgramFromCodeForTypeAny) { auto builtinsLib = std::unique_ptr(new BuiltinsLib()); const BuiltinCode bc = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Any, *pDevice); EXPECT_NE(0u, bc.resource.size()); auto program = std::unique_ptr(BuiltinsLib::createProgramFromCode(bc, *pDevice)); EXPECT_NE(nullptr, program.get()); } TEST_F(BuiltInTests, createProgramFromCodeForTypeSource) { auto builtinsLib = std::unique_ptr(new BuiltinsLib()); const BuiltinCode bc = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Source, *pDevice); EXPECT_NE(0u, bc.resource.size()); auto program = std::unique_ptr(BuiltinsLib::createProgramFromCode(bc, *pDevice)); EXPECT_NE(nullptr, program.get()); } TEST_F(BuiltInTests, givenCreateProgramFromSourceWhenDeviceSupportSharedSystemAllocationThenInternalOptionsDisableStosoFlag) { auto builtinsLib = std::unique_ptr(new BuiltinsLib()); pClDevice->deviceInfo.sharedSystemMemCapabilities = CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL; pDevice->deviceInfo.sharedSystemAllocationsSupport = true; const BuiltinCode bc = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Source, *pDevice); EXPECT_NE(0u, bc.resource.size()); auto program = std::unique_ptr(BuiltinsLib::createProgramFromCode(bc, *pDevice)); EXPECT_NE(nullptr, program.get()); EXPECT_THAT(program->getInternalOptions(), testing::HasSubstr(std::string(CompilerOptions::greaterThan4gbBuffersRequired))); } TEST_F(BuiltInTests, createProgramFromCodeForTypeIntermediate) { auto builtinsLib = std::unique_ptr(new BuiltinsLib()); const BuiltinCode bc = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Intermediate, *pDevice); EXPECT_EQ(0u, bc.resource.size()); auto program = std::unique_ptr(BuiltinsLib::createProgramFromCode(bc, *pDevice)); EXPECT_EQ(nullptr, program.get()); } TEST_F(BuiltInTests, createProgramFromCodeForTypeBinary) { auto builtinsLib = std::unique_ptr(new BuiltinsLib()); const BuiltinCode bc = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Binary, *pDevice); EXPECT_NE(0u, bc.resource.size()); auto program = std::unique_ptr(BuiltinsLib::createProgramFromCode(bc, *pDevice)); EXPECT_NE(nullptr, program.get()); } TEST_F(BuiltInTests, createProgramFromCodeForTypeInvalid) { auto builtinsLib = std::unique_ptr(new BuiltinsLib()); const BuiltinCode bc = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::INVALID, *pDevice); EXPECT_EQ(0u, bc.resource.size()); auto program = std::unique_ptr(BuiltinsLib::createProgramFromCode(bc, *pDevice)); EXPECT_EQ(nullptr, program.get()); } TEST_F(BuiltInTests, createProgramFromCodeForTypeAnyButBuiltinInvalid) { auto builtinsLib = std::unique_ptr(new BuiltinsLib()); const BuiltinCode bc = builtinsLib->getBuiltinCode(EBuiltInOps::COUNT, BuiltinCode::ECodeType::Any, *pDevice); EXPECT_EQ(0u, bc.resource.size()); auto program = std::unique_ptr(BuiltinsLib::createProgramFromCode(bc, *pDevice)); EXPECT_EQ(nullptr, program.get()); } TEST_F(BuiltInTests, createProgramFromCodeInternalOptionsFor32Bit) { bool force32BitAddressess = pDevice->getDeviceInfo().force32BitAddressess; const_cast(&pDevice->getDeviceInfo())->force32BitAddressess = true; auto builtinsLib = std::unique_ptr(new BuiltinsLib()); const BuiltinCode bc = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Source, *pDevice); ASSERT_NE(0u, bc.resource.size()); auto program = std::unique_ptr(BuiltinsLib::createProgramFromCode(bc, *pDevice)); ASSERT_NE(nullptr, program.get()); auto builtinInternalOptions = program->getInternalOptions(); auto it = builtinInternalOptions.find(NEO::CompilerOptions::arch32bit); EXPECT_EQ(std::string::npos, it); it = builtinInternalOptions.find(NEO::CompilerOptions::greaterThan4gbBuffersRequired); if (is32bit || pDevice->areSharedSystemAllocationsAllowed()) { EXPECT_NE(std::string::npos, it); } else { EXPECT_EQ(std::string::npos, it); } const_cast(&pDevice->getDeviceInfo())->force32BitAddressess = force32BitAddressess; } TEST_F(BuiltInTests, whenQueriedProperVmeVersionIsReturned) { if (!pDevice->getHardwareInfo().capabilityTable.supportsVme) { GTEST_SKIP(); } cl_uint param; auto ret = pClDevice->getDeviceInfo(CL_DEVICE_ME_VERSION_INTEL, sizeof(param), ¶m, nullptr); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(static_cast(CL_ME_VERSION_ADVANCED_VER_2_INTEL), param); } TEST_F(VmeBuiltInTests, vmeDispatchValidationHelpers) { this->pBuiltIns->setCacheingEnableState(false); overwriteBuiltInBinaryName(pDevice, "media_kernels_backend"); BuiltInOp vmeBuilder(*this->pBuiltIns, *this->pDevice); restoreBuiltInBinaryName(pDevice); cl_int err; { int32_t bufArgNum = 7; cl_mem mem = 0; vmeBuilder.setExplicitArg(bufArgNum, sizeof(cl_mem), &mem, err); EXPECT_TRUE(vmeBuilder.validateBufferSize(-1, 16)); EXPECT_TRUE(vmeBuilder.validateBufferSize(bufArgNum, 16)); MockBuffer mb; mem = &mb; vmeBuilder.setExplicitArg(bufArgNum, sizeof(cl_mem), &mem, err); EXPECT_TRUE(vmeBuilder.validateBufferSize(bufArgNum, mb.getSize())); EXPECT_TRUE(vmeBuilder.validateBufferSize(bufArgNum, mb.getSize() / 2)); EXPECT_FALSE(vmeBuilder.validateBufferSize(bufArgNum, mb.getSize() * 2)); mem = 0; vmeBuilder.setExplicitArg(bufArgNum, sizeof(cl_mem), &mem, err); } { EXPECT_TRUE(vmeBuilder.validateEnumVal(1, 1, 2, 3, 4)); EXPECT_TRUE(vmeBuilder.validateEnumVal(1, 1)); EXPECT_TRUE(vmeBuilder.validateEnumVal(3, 1, 2, 3)); EXPECT_FALSE(vmeBuilder.validateEnumVal(1, 3, 4)); EXPECT_FALSE(vmeBuilder.validateEnumVal(1)); EXPECT_FALSE(vmeBuilder.validateEnumVal(1, 2)); int32_t valArgNum = 3; uint32_t val = 7; vmeBuilder.setExplicitArg(valArgNum, sizeof(val), &val, err); EXPECT_FALSE(vmeBuilder.validateEnumArg(valArgNum, 3)); EXPECT_TRUE(vmeBuilder.validateEnumArg(valArgNum, 7)); val = 0; vmeBuilder.setExplicitArg(valArgNum, sizeof(val), &val, err); } { int32_t valArgNum = 3; uint32_t val = 7; vmeBuilder.setExplicitArg(valArgNum, sizeof(val), &val, err); EXPECT_EQ(val, vmeBuilder.getKernelArgByValValue(valArgNum)); val = 11; vmeBuilder.setExplicitArg(valArgNum, sizeof(val), &val, err); EXPECT_EQ(val, vmeBuilder.getKernelArgByValValue(valArgNum)); val = 0; vmeBuilder.setExplicitArg(valArgNum, sizeof(val), &val, err); } } TEST_F(VmeBuiltInTests, vmeDispatchIsBidir) { this->pBuiltIns->setCacheingEnableState(false); overwriteBuiltInBinaryName(pDevice, "media_kernels_backend"); BuiltInOp avmeBuilder(*this->pBuiltIns, *this->pDevice); BuiltInOp avmeBidirBuilder(*this->pBuiltIns, *this->pDevice); restoreBuiltInBinaryName(pDevice); EXPECT_FALSE(avmeBuilder.isBidirKernel()); EXPECT_TRUE(avmeBidirBuilder.isBidirKernel()); } struct ImageVmeValidFormat : Image2dDefaults { static const cl_image_format imageFormat; static const cl_image_desc iamgeDesc; }; const cl_image_format ImageVmeValidFormat::imageFormat = { CL_R, CL_UNORM_INT8}; const cl_image_desc ImageVmeValidFormat::iamgeDesc = { CL_MEM_OBJECT_IMAGE1D, 8192, 16, 1, 1, 0, 0, 0, 0, {nullptr}}; struct ImageVmeInvalidDataType : Image2dDefaults { static const cl_image_format imageFormat; }; const cl_image_format ImageVmeInvalidDataType::imageFormat = { CL_R, CL_FLOAT}; struct ImageVmeInvalidChannelOrder : Image2dDefaults { static const cl_image_format imageFormat; }; const cl_image_format ImageVmeInvalidChannelOrder::imageFormat = { CL_RGBA, CL_UNORM_INT8}; TEST_F(VmeBuiltInTests, vmeValidateImages) { this->pBuiltIns->setCacheingEnableState(false); overwriteBuiltInBinaryName(pDevice, "media_kernels_backend"); BuiltInOp vmeBuilder(*this->pBuiltIns, *this->pDevice); restoreBuiltInBinaryName(pDevice); uint32_t srcImgArgNum = 1; uint32_t refImgArgNum = 2; cl_int err; { // validate images are not null std::unique_ptr image1(ImageHelper::create(pContext)); cl_mem srcImgMem = 0; cl_mem refImgMem = 0; EXPECT_EQ(CL_INVALID_KERNEL_ARGS, vmeBuilder.validateImages(Vec3{3, 3, 0}, Vec3{0, 0, 0})); srcImgMem = image1.get(); refImgMem = 0; vmeBuilder.setExplicitArg(srcImgArgNum, sizeof(srcImgMem), &srcImgMem, err); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, vmeBuilder.validateImages(Vec3{3, 3, 0}, Vec3{0, 0, 0})); } { // validate image formats std::unique_ptr imageValid(ImageHelper::create(pContext)); std::unique_ptr imageInvalidDataType(ImageHelper::create(pContext)); std::unique_ptr imageChannelOrder(ImageHelper::create(pContext)); Image *images[] = {imageValid.get(), imageInvalidDataType.get(), imageChannelOrder.get()}; for (Image *srcImg : images) { for (Image *dstImg : images) { cl_mem srcImgMem = srcImg; cl_mem refImgMem = dstImg; vmeBuilder.setExplicitArg(srcImgArgNum, sizeof(srcImgMem), &srcImgMem, err); vmeBuilder.setExplicitArg(refImgArgNum, sizeof(refImgMem), &refImgMem, err); bool shouldSucceed = (srcImg == imageValid.get()) && (dstImg == imageValid.get()); if (shouldSucceed) { EXPECT_EQ(CL_SUCCESS, vmeBuilder.validateImages(Vec3{1, 1, 0}, Vec3{0, 0, 0})); } else { EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, vmeBuilder.validateImages(Vec3{1, 1, 0}, Vec3{0, 0, 0})); } } } } { // validate image tiling std::unique_ptr imageValid(ImageHelper::create(pContext)); pContext->isSharedContext = true; std::unique_ptr imageLinear(ImageHelper::create(pContext)); pContext->isSharedContext = false; Image *images[] = {imageValid.get(), imageLinear.get()}; for (Image *srcImg : images) { for (Image *dstImg : images) { cl_mem srcImgMem = srcImg; cl_mem refImgMem = dstImg; vmeBuilder.setExplicitArg(srcImgArgNum, sizeof(srcImgMem), &srcImgMem, err); vmeBuilder.setExplicitArg(refImgArgNum, sizeof(refImgMem), &refImgMem, err); bool shouldSucceed = (srcImg == imageValid.get()) && (dstImg == imageValid.get()); if (shouldSucceed) { EXPECT_EQ(CL_SUCCESS, vmeBuilder.validateImages(Vec3{1, 1, 0}, Vec3{0, 0, 0})); } else { EXPECT_EQ(CL_OUT_OF_RESOURCES, vmeBuilder.validateImages(Vec3{1, 1, 0}, Vec3{0, 0, 0})); } } } } { // validate region size std::unique_ptr imageValid(ImageHelper::create(pContext)); cl_mem imgValidMem = imageValid.get(); vmeBuilder.setExplicitArg(srcImgArgNum, sizeof(imgValidMem), &imgValidMem, err); vmeBuilder.setExplicitArg(refImgArgNum, sizeof(imgValidMem), &imgValidMem, err); EXPECT_EQ(CL_INVALID_IMAGE_SIZE, vmeBuilder.validateImages(Vec3{imageValid->getImageDesc().image_width + 1, 1, 0}, Vec3{0, 0, 0})); EXPECT_EQ(CL_INVALID_IMAGE_SIZE, vmeBuilder.validateImages(Vec3{1, imageValid->getImageDesc().image_height + 1, 0}, Vec3{0, 0, 0})); } } TEST_F(VmeBuiltInTests, vmeValidateFlags) { this->pBuiltIns->setCacheingEnableState(false); overwriteBuiltInBinaryName(pDevice, "media_kernels_backend"); BuiltInOp vmeBuilder(*this->pBuiltIns, *this->pDevice); restoreBuiltInBinaryName(pDevice); uint32_t defaultSkipBlockVal = 8192; uint32_t flagsArgNum = 3; std::tuple flagsToTest[] = { std::make_tuple(CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL, false, defaultSkipBlockVal), std::make_tuple(CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL, true, CL_ME_MB_TYPE_16x16_INTEL), std::make_tuple(CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL, true, CL_ME_MB_TYPE_8x8_INTEL), std::make_tuple(defaultSkipBlockVal, true, defaultSkipBlockVal), }; cl_int err; for (auto &conf : flagsToTest) { uint32_t skipBlock = defaultSkipBlockVal; vmeBuilder.setExplicitArg(flagsArgNum, sizeof(uint32_t), &std::get<0>(conf), err); bool validationResult = vmeBuilder.validateFlags(skipBlock); if (std::get<1>(conf)) { EXPECT_TRUE(validationResult); } else { EXPECT_FALSE(validationResult); } EXPECT_EQ(std::get<2>(conf), skipBlock); } } TEST_F(VmeBuiltInTests, vmeValidateSkipBlockType) { this->pBuiltIns->setCacheingEnableState(false); overwriteBuiltInBinaryName(pDevice, "media_kernels_backend"); BuiltInOp avmeBidirectionalBuilder(*this->pBuiltIns, *this->pDevice); BuiltInOp avmeBuilder(*this->pBuiltIns, *this->pDevice); restoreBuiltInBinaryName(pDevice); cl_int err; uint32_t skipBlockTypeArgNum = 4; uint32_t skipBlockType = 8192; bool ret = avmeBidirectionalBuilder.validateSkipBlockTypeArg(skipBlockType); EXPECT_TRUE(ret); EXPECT_EQ(8192U, skipBlockType); skipBlockType = 8192U; avmeBuilder.setExplicitArg(skipBlockTypeArgNum, sizeof(uint32_t), &skipBlockType, err); ret = avmeBuilder.validateSkipBlockTypeArg(skipBlockType); EXPECT_FALSE(ret); skipBlockType = CL_ME_MB_TYPE_16x16_INTEL; avmeBuilder.setExplicitArg(skipBlockTypeArgNum, sizeof(uint32_t), &skipBlockType, err); skipBlockType = 8192U; ret = avmeBuilder.validateSkipBlockTypeArg(skipBlockType); EXPECT_TRUE(ret); EXPECT_EQ(static_cast(CL_ME_MB_TYPE_16x16_INTEL), skipBlockType); skipBlockType = CL_ME_MB_TYPE_8x8_INTEL; avmeBuilder.setExplicitArg(skipBlockTypeArgNum, sizeof(uint32_t), &skipBlockType, err); skipBlockType = 8192U; ret = avmeBuilder.validateSkipBlockTypeArg(skipBlockType); EXPECT_TRUE(ret); EXPECT_EQ(static_cast(CL_ME_MB_TYPE_8x8_INTEL), skipBlockType); } TEST_F(VmeBuiltInTests, setExplicitArgAccelerator) { this->pBuiltIns->setCacheingEnableState(false); overwriteBuiltInBinaryName(pDevice, "media_kernels_backend"); BuiltInOp vmeBuilder(*this->pBuiltIns, *this->pDevice); restoreBuiltInBinaryName(pDevice); cl_int err; uint32_t aceleratorArgNum = 0; bool ret = vmeBuilder.setExplicitArg(aceleratorArgNum, sizeof(cl_accelerator_intel), nullptr, err); EXPECT_FALSE(ret); EXPECT_EQ(CL_INVALID_ACCELERATOR_INTEL, err); cl_motion_estimation_desc_intel acceleratorDesc; acceleratorDesc.subpixel_mode = CL_ME_SUBPIXEL_MODE_INTEGER_INTEL; acceleratorDesc.sad_adjust_mode = CL_ME_SAD_ADJUST_MODE_NONE_INTEL; acceleratorDesc.search_path_type = CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL; acceleratorDesc.mb_block_type = CL_ME_MB_TYPE_16x16_INTEL; auto neoAccelerator = std::unique_ptr(VmeAccelerator::create(pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(acceleratorDesc), &acceleratorDesc, err)); ASSERT_NE(nullptr, neoAccelerator.get()); cl_accelerator_intel clAccel = neoAccelerator.get(); ret = vmeBuilder.setExplicitArg(aceleratorArgNum, sizeof(cl_accelerator_intel), &clAccel, err); EXPECT_FALSE(ret); EXPECT_EQ(CL_SUCCESS, err); } TEST_F(VmeBuiltInTests, vmeValidateDispatch) { this->pBuiltIns->setCacheingEnableState(false); overwriteBuiltInBinaryName(pDevice, "media_kernels_backend"); struct MockVmeBuilder : BuiltInOp { MockVmeBuilder(BuiltIns &kernelsLib, Context &context, Device &device) : BuiltInOp(kernelsLib, device) { } cl_int validateVmeDispatch(Vec3 inputRegion, Vec3 offset, size_t blkNum, size_t blkMul) const override { receivedInputRegion = inputRegion; receivedOffset = offset; receivedBlkNum = blkNum; receivedBlkMul = blkMul; wasValidateVmeDispatchCalled = true; return valueToReturn; } mutable bool wasValidateVmeDispatchCalled = false; mutable Vec3 receivedInputRegion = {0, 0, 0}; mutable Vec3 receivedOffset = {0, 0, 0}; mutable size_t receivedBlkNum = 0; mutable size_t receivedBlkMul = 0; mutable cl_int valueToReturn = CL_SUCCESS; }; uint32_t aaceleratorArgNum = 0; MockVmeBuilder vmeBuilder(*this->pBuiltIns, *pContext, *this->pDevice); restoreBuiltInBinaryName(pDevice); cl_int ret = vmeBuilder.validateDispatch(nullptr, 1, Vec3{16, 16, 0}, Vec3{16, 1, 0}, Vec3{0, 0, 0}); EXPECT_EQ(CL_INVALID_WORK_DIMENSION, ret); ret = vmeBuilder.validateDispatch(nullptr, 3, Vec3{16, 16, 0}, Vec3{16, 1, 0}, Vec3{0, 0, 0}); EXPECT_EQ(CL_INVALID_WORK_DIMENSION, ret); ret = vmeBuilder.validateDispatch(nullptr, 2, Vec3{16, 16, 0}, Vec3{16, 1, 0}, Vec3{0, 0, 0}); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, ret); // accelerator not set EXPECT_FALSE(vmeBuilder.wasValidateVmeDispatchCalled); cl_int err; cl_motion_estimation_desc_intel acceleratorDesc; acceleratorDesc.subpixel_mode = CL_ME_SUBPIXEL_MODE_INTEGER_INTEL; acceleratorDesc.sad_adjust_mode = CL_ME_SAD_ADJUST_MODE_NONE_INTEL; acceleratorDesc.search_path_type = CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL; Vec3 gws{16, 16, 0}; Vec3 lws{16, 1, 0}; Vec3 off{0, 0, 0}; size_t gwWidthInBlk = 0; size_t gwHeightInBlk = 0; vmeBuilder.getBlkTraits(gws, gwWidthInBlk, gwHeightInBlk); { acceleratorDesc.mb_block_type = CL_ME_MB_TYPE_16x16_INTEL; auto neoAccelerator = std::unique_ptr(VmeAccelerator::create(pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(acceleratorDesc), &acceleratorDesc, err)); ASSERT_NE(nullptr, neoAccelerator.get()); cl_accelerator_intel clAccel = neoAccelerator.get(); vmeBuilder.setExplicitArg(aaceleratorArgNum, sizeof(clAccel), &clAccel, err); vmeBuilder.wasValidateVmeDispatchCalled = false; auto ret = vmeBuilder.validateDispatch(nullptr, 2, gws, lws, off); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_TRUE(vmeBuilder.wasValidateVmeDispatchCalled); EXPECT_EQ(gws, vmeBuilder.receivedInputRegion); EXPECT_EQ(off, vmeBuilder.receivedOffset); EXPECT_EQ(gwWidthInBlk * gwHeightInBlk, vmeBuilder.receivedBlkNum); EXPECT_EQ(1U, vmeBuilder.receivedBlkMul); } { acceleratorDesc.mb_block_type = CL_ME_MB_TYPE_4x4_INTEL; auto neoAccelerator = std::unique_ptr(VmeAccelerator::create(pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(acceleratorDesc), &acceleratorDesc, err)); ASSERT_NE(nullptr, neoAccelerator.get()); cl_accelerator_intel clAccel = neoAccelerator.get(); vmeBuilder.setExplicitArg(aaceleratorArgNum, sizeof(clAccel), &clAccel, err); vmeBuilder.wasValidateVmeDispatchCalled = false; auto ret = vmeBuilder.validateDispatch(nullptr, 2, gws, lws, off); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_TRUE(vmeBuilder.wasValidateVmeDispatchCalled); EXPECT_EQ(gws, vmeBuilder.receivedInputRegion); EXPECT_EQ(off, vmeBuilder.receivedOffset); EXPECT_EQ(gwWidthInBlk * gwHeightInBlk, vmeBuilder.receivedBlkNum); EXPECT_EQ(16U, vmeBuilder.receivedBlkMul); } { acceleratorDesc.mb_block_type = CL_ME_MB_TYPE_8x8_INTEL; auto neoAccelerator = std::unique_ptr(VmeAccelerator::create(pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(acceleratorDesc), &acceleratorDesc, err)); ASSERT_NE(nullptr, neoAccelerator.get()); cl_accelerator_intel clAccel = neoAccelerator.get(); vmeBuilder.setExplicitArg(aaceleratorArgNum, sizeof(clAccel), &clAccel, err); vmeBuilder.wasValidateVmeDispatchCalled = false; vmeBuilder.valueToReturn = 37; auto ret = vmeBuilder.validateDispatch(nullptr, 2, gws, lws, off); EXPECT_EQ(37, ret); EXPECT_TRUE(vmeBuilder.wasValidateVmeDispatchCalled); EXPECT_EQ(gws, vmeBuilder.receivedInputRegion); EXPECT_EQ(off, vmeBuilder.receivedOffset); EXPECT_EQ(gwWidthInBlk * gwHeightInBlk, vmeBuilder.receivedBlkNum); EXPECT_EQ(4U, vmeBuilder.receivedBlkMul); } } TEST_F(VmeBuiltInTests, vmeValidateVmeDispatch) { this->pBuiltIns->setCacheingEnableState(false); overwriteBuiltInBinaryName(pDevice, "media_kernels_backend"); BuiltInOp vmeBuilder(*this->pBuiltIns, *this->pDevice); restoreBuiltInBinaryName(pDevice); cl_int err; // images not set EXPECT_EQ(CL_INVALID_KERNEL_ARGS, vmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 64, 1)); uint32_t srcImgArgNum = 1; uint32_t refImgArgNum = 2; std::unique_ptr imageValid(ImageHelper::create(pContext)); cl_mem srcImgMem = imageValid.get(); vmeBuilder.setExplicitArg(srcImgArgNum, sizeof(srcImgMem), &srcImgMem, err); vmeBuilder.setExplicitArg(refImgArgNum, sizeof(srcImgMem), &srcImgMem, err); // null buffers are valid EXPECT_EQ(CL_SUCCESS, vmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 64, 1)); // too small buffers should fail MockBuffer mb; cl_mem mem = &mb; uint32_t predictionMotionVectorBufferArgNum = 3; uint32_t motionVectorBufferArgNum = 4; uint32_t residualsBufferArgNum = 5; for (uint32_t argNum : {predictionMotionVectorBufferArgNum, motionVectorBufferArgNum, residualsBufferArgNum}) { EXPECT_EQ(CL_SUCCESS, vmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, mb.getSize() * 2, 1)); vmeBuilder.setExplicitArg(argNum, sizeof(cl_mem), &mem, err); EXPECT_EQ(CL_INVALID_BUFFER_SIZE, vmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, mb.getSize() * 2, 1)); vmeBuilder.setExplicitArg(argNum, sizeof(cl_mem), nullptr, err); } } TEST_F(VmeBuiltInTests, advancedVmeValidateVmeDispatch) { this->pBuiltIns->setCacheingEnableState(false); overwriteBuiltInBinaryName(pDevice, "media_kernels_backend"); BuiltInOp avmeBuilder(*this->pBuiltIns, *this->pDevice); restoreBuiltInBinaryName(pDevice); cl_int err; // images not set ASSERT_EQ(CL_INVALID_KERNEL_ARGS, avmeBuilder.VmeBuiltinDispatchInfoBuilder::validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 64, 1)); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, avmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 64, 1)); uint32_t srcImgArgNum = 1; uint32_t refImgArgNum = 2; std::unique_ptr imageValid(ImageHelper::create(pContext)); cl_mem srcImgMem = imageValid.get(); avmeBuilder.setExplicitArg(srcImgArgNum, sizeof(srcImgMem), &srcImgMem, err); avmeBuilder.setExplicitArg(refImgArgNum, sizeof(srcImgMem), &srcImgMem, err); ASSERT_EQ(CL_SUCCESS, avmeBuilder.VmeBuiltinDispatchInfoBuilder::validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 64, 1)); uint32_t flagsArgNum = 3; uint32_t val = CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL; avmeBuilder.setExplicitArg(flagsArgNum, sizeof(val), &val, err); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, avmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 64, 1)); val = CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL; avmeBuilder.setExplicitArg(flagsArgNum, sizeof(val), &val, err); uint32_t skipBlockTypeArgNum = 4; val = 8192; avmeBuilder.setExplicitArg(skipBlockTypeArgNum, sizeof(uint32_t), &val, err); EXPECT_EQ(CL_OUT_OF_RESOURCES, avmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 64, 1)); val = CL_ME_MB_TYPE_16x16_INTEL; avmeBuilder.setExplicitArg(skipBlockTypeArgNum, sizeof(uint32_t), &val, err); uint32_t searchCostPenaltyArgNum = 5; val = 8192; avmeBuilder.setExplicitArg(searchCostPenaltyArgNum, sizeof(uint32_t), &val, err); EXPECT_EQ(CL_OUT_OF_RESOURCES, avmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 64, 1)); val = CL_ME_COST_PENALTY_NONE_INTEL; avmeBuilder.setExplicitArg(searchCostPenaltyArgNum, sizeof(uint32_t), &val, err); uint32_t searchCostPrecisionArgNum = 6; val = 8192; avmeBuilder.setExplicitArg(searchCostPrecisionArgNum, sizeof(uint32_t), &val, err); EXPECT_EQ(CL_OUT_OF_RESOURCES, avmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 64, 1)); val = CL_ME_COST_PRECISION_QPEL_INTEL; avmeBuilder.setExplicitArg(searchCostPrecisionArgNum, sizeof(uint32_t), &val, err); // for non-bidirectional avme kernel, countMotionVectorBuffer must be set uint32_t countMotionVectorBufferArgNum = 7; EXPECT_EQ(CL_INVALID_BUFFER_SIZE, avmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 64, 1)); MockBuffer mb; cl_mem mem = &mb; avmeBuilder.setExplicitArg(countMotionVectorBufferArgNum, sizeof(cl_mem), &mem, err); EXPECT_EQ(CL_SUCCESS, avmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 1, 1)); } TEST_F(VmeBuiltInTests, advancedBidirectionalVmeValidateVmeDispatch) { this->pBuiltIns->setCacheingEnableState(false); overwriteBuiltInBinaryName(pDevice, "media_kernels_backend"); BuiltInOp avmeBuilder(*this->pBuiltIns, *this->pDevice); restoreBuiltInBinaryName(pDevice); cl_int err; uint32_t srcImgArgNum = 1; uint32_t refImgArgNum = 2; std::unique_ptr imageValid(ImageHelper::create(pContext)); cl_mem srcImgMem = imageValid.get(); avmeBuilder.setExplicitArg(srcImgArgNum, sizeof(srcImgMem), &srcImgMem, err); avmeBuilder.setExplicitArg(refImgArgNum, sizeof(srcImgMem), &srcImgMem, err); ASSERT_EQ(CL_SUCCESS, avmeBuilder.VmeBuiltinDispatchInfoBuilder::validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 64, 1)); uint32_t flagsArgNum = 6; uint32_t val = CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL; avmeBuilder.setExplicitArg(flagsArgNum, sizeof(val), &val, err); uint32_t searchCostPenaltyArgNum = 7; val = CL_ME_COST_PENALTY_NONE_INTEL; avmeBuilder.setExplicitArg(searchCostPenaltyArgNum, sizeof(uint32_t), &val, err); uint32_t searchCostPrecisionArgNum = 8; val = CL_ME_COST_PRECISION_QPEL_INTEL; avmeBuilder.setExplicitArg(searchCostPrecisionArgNum, sizeof(uint32_t), &val, err); uint32_t bidirWeightArgNum = 10; val = 255; avmeBuilder.setExplicitArg(bidirWeightArgNum, sizeof(uint8_t), &val, err); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, avmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 64, 1)); val = CL_ME_BIDIR_WEIGHT_QUARTER_INTEL; avmeBuilder.setExplicitArg(bidirWeightArgNum, sizeof(uint8_t), &val, err); EXPECT_EQ(CL_SUCCESS, avmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 64, 1)); // test bufferSize checking uint32_t countMotionVectorBufferArgNum = 11; MockBuffer mb; cl_mem mem = &mb; avmeBuilder.setExplicitArg(countMotionVectorBufferArgNum, sizeof(cl_mem), &mem, err); EXPECT_EQ(CL_SUCCESS, avmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, 1, 1)); EXPECT_EQ(CL_INVALID_BUFFER_SIZE, avmeBuilder.validateVmeDispatch(Vec3{1, 1, 0}, Vec3{0, 0, 0}, mb.getSize() * 2, 1)); } TEST_F(VmeBuiltInTests, advancedVmeGetSkipResidualsBuffExpSizeDefaultValue) { this->pBuiltIns->setCacheingEnableState(false); overwriteBuiltInBinaryName(pDevice, "media_kernels_backend"); BuiltInOp vmeBuilder(*this->pBuiltIns, *this->pDevice); restoreBuiltInBinaryName(pDevice); auto size16x16 = vmeBuilder.getSkipResidualsBuffExpSize(CL_ME_MB_TYPE_16x16_INTEL, 4); auto sizeDefault = vmeBuilder.getSkipResidualsBuffExpSize(8192, 4); EXPECT_EQ(size16x16, sizeDefault); } TEST_F(BuiltInTests, createBuiltInProgramForInvalidBuiltinKernelName) { const char *kernelNames = "invalid_kernel"; cl_int retVal = CL_SUCCESS; cl_program program = Vme::createBuiltInProgram( *pContext, *pDevice, kernelNames, retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, program); } TEST_F(BuiltInTests, getSipKernelReturnsProgramCreatedOutOfIsaAcquiredFromCompilerInterface) { MockBuiltins mockBuiltins; auto mockCompilerInterface = new MockCompilerInterface(); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->compilerInterface.reset(mockCompilerInterface); mockCompilerInterface->sipKernelBinaryOverride = mockCompilerInterface->getDummyGenBinary(); cl_int errCode = CL_BUILD_PROGRAM_FAILURE; auto p = Program::createFromGenBinary(*pDevice->getExecutionEnvironment(), pContext, mockCompilerInterface->sipKernelBinaryOverride.data(), mockCompilerInterface->sipKernelBinaryOverride.size(), false, &errCode, nullptr); ASSERT_EQ(CL_SUCCESS, errCode); errCode = p->processGenBinary(); ASSERT_EQ(CL_SUCCESS, errCode); const SipKernel &sipKern = mockBuiltins.getSipKernel(SipKernelType::Csr, pContext->getDevice(0)->getDevice()); const auto &sipKernelInfo = p->getKernelInfo(static_cast(0)); auto compbinedKernelHeapSize = sipKernelInfo->heapInfo.pKernelHeader->KernelHeapSize; auto sipOffset = sipKernelInfo->systemKernelOffset; ASSERT_GT(compbinedKernelHeapSize, sipOffset); auto expectedMem = reinterpret_cast(sipKernelInfo->heapInfo.pKernelHeap) + sipOffset; ASSERT_EQ(compbinedKernelHeapSize - sipOffset, sipKern.getBinarySize()); EXPECT_EQ(0, memcmp(expectedMem, sipKern.getBinary(), sipKern.getBinarySize())); EXPECT_EQ(SipKernelType::Csr, mockCompilerInterface->requestedSipKernel); p->release(); } TEST_F(BuiltInTests, givenSipKernelWhenItIsCreatedThenItHasGraphicsAllocationForKernel) { const SipKernel &sipKern = pDevice->getBuiltIns()->getSipKernel(SipKernelType::Csr, pContext->getDevice(0)->getDevice()); auto sipAllocation = sipKern.getSipAllocation(); EXPECT_NE(nullptr, sipAllocation); } TEST_F(BuiltInTests, givenSameDeviceIsUsedWhenUsingStaticGetterThenExpectRetrieveSameAllocation) { const SipKernel &sipKern = pDevice->getBuiltIns()->getSipKernel(SipKernelType::Csr, pContext->getDevice(0)->getDevice()); auto sipAllocation = sipKern.getSipAllocation(); EXPECT_NE(nullptr, sipAllocation); auto staticSipAllocation = SipKernel::getSipKernelAllocation(*pDevice); EXPECT_NE(nullptr, staticSipAllocation); EXPECT_EQ(sipAllocation, staticSipAllocation); } TEST_F(BuiltInTests, givenDebugFlagForceUseSourceWhenArgIsBinaryThenReturnBuiltinCodeBinary) { DebugManager.flags.RebuildPrecompiledKernels.set(true); auto builtinsLib = std::unique_ptr(new BuiltinsLib()); BuiltinCode code = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Binary, *pDevice); EXPECT_EQ(BuiltinCode::ECodeType::Binary, code.type); EXPECT_NE(0u, code.resource.size()); EXPECT_EQ(pDevice, code.targetDevice); } TEST_F(BuiltInTests, givenDebugFlagForceUseSourceWhenArgIsAnyThenReturnBuiltinCodeSource) { DebugManager.flags.RebuildPrecompiledKernels.set(true); auto builtinsLib = std::unique_ptr(new BuiltinsLib()); BuiltinCode code = builtinsLib->getBuiltinCode(EBuiltInOps::CopyBufferToBuffer, BuiltinCode::ECodeType::Any, *pDevice); EXPECT_EQ(BuiltinCode::ECodeType::Source, code.type); EXPECT_NE(0u, code.resource.size()); EXPECT_EQ(pDevice, code.targetDevice); } using BuiltInOwnershipWrapperTests = BuiltInTests; TEST_F(BuiltInOwnershipWrapperTests, givenBuiltinWhenConstructedThenLockAndUnlockOnDestruction) { MockAuxBuilInOp mockAuxBuiltInOp(*pBuiltIns, *pContext, *pDevice); MockContext mockContext; { BuiltInOwnershipWrapper lock(mockAuxBuiltInOp, &mockContext); EXPECT_TRUE(mockAuxBuiltInOp.baseKernel->hasOwnership()); EXPECT_EQ(&mockContext, &mockAuxBuiltInOp.baseKernel->getContext()); } EXPECT_FALSE(mockAuxBuiltInOp.baseKernel->hasOwnership()); } TEST_F(BuiltInOwnershipWrapperTests, givenLockWithoutParametersWhenConstructingThenLockOnlyWhenRequested) { MockAuxBuilInOp mockAuxBuiltInOp(*pBuiltIns, *pContext, *pDevice); MockContext mockContext; { BuiltInOwnershipWrapper lock; lock.takeOwnership(mockAuxBuiltInOp, &mockContext); EXPECT_TRUE(mockAuxBuiltInOp.baseKernel->hasOwnership()); EXPECT_EQ(&mockContext, &mockAuxBuiltInOp.baseKernel->getContext()); } EXPECT_FALSE(mockAuxBuiltInOp.baseKernel->hasOwnership()); } TEST_F(BuiltInOwnershipWrapperTests, givenLockWithAcquiredOwnershipWhenTakeOwnershipCalledThenAbort) { MockAuxBuilInOp mockAuxBuiltInOp1(*pBuiltIns, *pContext, *pDevice); MockAuxBuilInOp mockAuxBuiltInOp2(*pBuiltIns, *pContext, *pDevice); BuiltInOwnershipWrapper lock(mockAuxBuiltInOp1, pContext); EXPECT_THROW(lock.takeOwnership(mockAuxBuiltInOp1, pContext), std::exception); EXPECT_THROW(lock.takeOwnership(mockAuxBuiltInOp2, pContext), std::exception); } HWTEST_F(BuiltInOwnershipWrapperTests, givenBuiltInOwnershipWrapperWhenAskedForTypeTraitsThenDisableCopyConstructorAndOperator) { EXPECT_FALSE(std::is_copy_constructible::value); EXPECT_FALSE(std::is_copy_assignable::value); } compute-runtime-20.13.16352/opencl/test/unit_test/built_ins/built_ins_file_names.cpp000066400000000000000000000014471363734646600305060ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include namespace NEO { std::vector buintInFileNames = { "test_files/aux_translation.builtin_kernel", "test_files/copy_buffer_to_buffer.builtin_kernel", "test_files/fill_buffer.builtin_kernel", "test_files/fill_image1d.builtin_kernel", "test_files/fill_image2d.builtin_kernel", "test_files/fill_image3d.builtin_kernel", "test_files/copy_image_to_image1d.builtin_kernel", "test_files/copy_image_to_image2d.builtin_kernel", "test_files/copy_image_to_image3d.builtin_kernel", "test_files/copy_buffer_rect.builtin_kernel", "test_files/copy_buffer_to_image3d.builtin_kernel", "test_files/copy_image3d_to_buffer.builtin_kernel"}; } compute-runtime-20.13.16352/opencl/test/unit_test/built_ins/built_ins_file_names.h000066400000000000000000000004241363734646600301450ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include #pragma once namespace NEO { std::vector getBuiltInFileNames(); std::string getBuiltInHashFileName(uint64_t hash); } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/built_ins/get_built_ins_file_names.cpp000066400000000000000000000007321363734646600313410ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/built_ins/built_ins_file_names.h" namespace NEO { extern std::vector buintInFileNames; std::vector getBuiltInFileNames() { return buintInFileNames; } std::string getBuiltInHashFileName(uint64_t hash) { std::string hashName = "test_files/" + std::to_string(hash) + ".cl"; return hashName; } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/built_ins/sip_tests.cpp000066400000000000000000000134301363734646600263440ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/built_ins/sip.h" #include "opencl/test/unit_test/global_environment.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" #include "gtest/gtest.h" using namespace NEO; namespace SipKernelTests { std::string getDebugSipKernelNameWithBitnessAndProductSuffix(std::string &base, const char *product) { std::string fullName = base + std::string("_"); if (sizeof(uintptr_t) == 8) { fullName.append("64_"); } else { fullName.append("32_"); } fullName.append(product); return fullName; } TEST(Sip, WhenSipKernelIsInvalidThenEmptyCompilerInternalOptionsAreReturned) { const char *opt = getSipKernelCompilerInternalOptions(SipKernelType::COUNT); ASSERT_NE(nullptr, opt); EXPECT_EQ(0U, strlen(opt)); } TEST(Sip, WhenRequestingCsrSipKernelThenProperCompilerInternalOptionsAreReturned) { const char *opt = getSipKernelCompilerInternalOptions(SipKernelType::Csr); ASSERT_NE(nullptr, opt); EXPECT_STREQ("-cl-include-sip-csr", opt); } TEST(Sip, When32BitAddressesAreNotBeingForcedThenSipLlHasSameBitnessAsHostApplication) { auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_NE(nullptr, mockDevice); mockDevice->deviceInfo.force32BitAddressess = false; const char *src = getSipLlSrc(*mockDevice); ASSERT_NE(nullptr, src); if (sizeof(void *) == 8) { EXPECT_NE(nullptr, strstr(src, "target datalayout = \"e-p:64:64:64\"")); EXPECT_NE(nullptr, strstr(src, "target triple = \"spir64\"")); } else { EXPECT_NE(nullptr, strstr(src, "target datalayout = \"e-p:32:32:32\"")); EXPECT_NE(nullptr, strstr(src, "target triple = \"spir\"")); EXPECT_EQ(nullptr, strstr(src, "target triple = \"spir64\"")); } } TEST(Sip, When32BitAddressesAreBeingForcedThenSipLlHas32BitAddresses) { auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_NE(nullptr, mockDevice); mockDevice->deviceInfo.force32BitAddressess = true; const char *src = getSipLlSrc(*mockDevice); ASSERT_NE(nullptr, src); EXPECT_NE(nullptr, strstr(src, "target datalayout = \"e-p:32:32:32\"")); EXPECT_NE(nullptr, strstr(src, "target triple = \"spir\"")); EXPECT_EQ(nullptr, strstr(src, "target triple = \"spir64\"")); } TEST(Sip, GivenSipLlWhenGettingMetadataThenMetadataRequiredByCompilerIsReturned) { auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_NE(nullptr, mockDevice); const char *src = getSipLlSrc(*mockDevice); ASSERT_NE(nullptr, src); EXPECT_NE(nullptr, strstr(src, "!opencl.compiler.options")); EXPECT_NE(nullptr, strstr(src, "!opencl.kernels")); } TEST(Sip, WhenGettingTypeThenCorrectTypeIsReturned) { SipKernel csr{SipKernelType::Csr, GlobalMockSipProgram::getSipProgramWithCustomBinary()}; EXPECT_EQ(SipKernelType::Csr, csr.getType()); SipKernel dbgCsr{SipKernelType::DbgCsr, GlobalMockSipProgram::getSipProgramWithCustomBinary()}; EXPECT_EQ(SipKernelType::DbgCsr, dbgCsr.getType()); SipKernel dbgCsrLocal{SipKernelType::DbgCsrLocal, GlobalMockSipProgram::getSipProgramWithCustomBinary()}; EXPECT_EQ(SipKernelType::DbgCsrLocal, dbgCsrLocal.getType()); SipKernel undefined{SipKernelType::COUNT, GlobalMockSipProgram::getSipProgramWithCustomBinary()}; EXPECT_EQ(SipKernelType::COUNT, undefined.getType()); } TEST(Sip, givenSipKernelClassWhenAskedForMaxDebugSurfaceSizeThenCorrectValueIsReturned) { EXPECT_EQ(0x49c000u, SipKernel::maxDbgSurfaceSize); } TEST(Sip, givenDebuggingInactiveWhenSipTypeIsQueriedThenCsrSipTypeIsReturned) { auto sipType = SipKernel::getSipKernelType(renderCoreFamily, false); EXPECT_EQ(SipKernelType::Csr, sipType); } TEST(DebugSip, givenDebuggingActiveWhenSipTypeIsQueriedThenDbgCsrSipTypeIsReturned) { auto sipType = SipKernel::getSipKernelType(renderCoreFamily, true); EXPECT_LE(SipKernelType::DbgCsr, sipType); } TEST(DebugSip, WhenRequestingDbgCsrSipKernelThenProperCompilerInternalOptionsAreReturned) { const char *opt = getSipKernelCompilerInternalOptions(SipKernelType::DbgCsr); ASSERT_NE(nullptr, opt); EXPECT_STREQ("-cl-include-sip-kernel-debug -cl-include-sip-csr -cl-set-bti:0", opt); } TEST(DebugSip, WhenRequestingDbgCsrWithLocalMemorySipKernelThenProperCompilerInternalOptionsAreReturned) { const char *opt = getSipKernelCompilerInternalOptions(SipKernelType::DbgCsrLocal); ASSERT_NE(nullptr, opt); EXPECT_STREQ("-cl-include-sip-kernel-local-debug -cl-include-sip-csr -cl-set-bti:0", opt); } TEST(DebugSip, DISABLED_givenBuiltInsWhenDbgCsrSipIsRequestedThanCorrectSipKernelIsReturned) { auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_NE(nullptr, mockDevice); MockCompilerDebugVars igcDebugVars; std::string name = "sip_dummy_kernel_debug"; std::string builtInFileRoot = testFiles + getDebugSipKernelNameWithBitnessAndProductSuffix(name, binaryNameSuffix.c_str()); std::string builtInGenFile = builtInFileRoot; builtInGenFile.append(".gen"); igcDebugVars.fileName = builtInGenFile; gEnvironment->igcPushDebugVars(igcDebugVars); auto &builtins = *mockDevice->getBuiltIns(); auto &sipKernel = builtins.getSipKernel(SipKernelType::DbgCsr, *mockDevice); EXPECT_NE(nullptr, &sipKernel); EXPECT_EQ(SipKernelType::DbgCsr, sipKernel.getType()); gEnvironment->igcPopDebugVars(); } } // namespace SipKernelTests compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/000077500000000000000000000000001363734646600244545ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/CMakeLists.txt000066400000000000000000000124361363734646600272220ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_command_queue ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/buffer_operations_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/blit_enqueue_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/dispatch_walker_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_barrier_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_command_without_kernel_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_buffer_event_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_buffer_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_buffer_rect_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_buffer_rect_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_buffer_to_image_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_buffer_to_image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_image_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_image_to_buffer_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_copy_image_to_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_debug_kernel_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fill_buffer_event_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fill_buffer_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fill_buffer_negative_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fill_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fill_image_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fill_image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_handler_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_1_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_2_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_event_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_global_offset_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_local_work_size_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_two_ioq_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_two_ooq_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_two_walker_ioq_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_two_walker_ooq_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_map_buffer_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_map_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_map_image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_marker_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_media_kernel.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_migrate_mem_objects_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_event_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_rect_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_rect_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_image_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/enqueue_resource_barier_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_svm_mem_copy_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_svm_mem_fill_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_svm_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_thread_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_unmap_memobject_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_waitlist_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_buffer_event_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_buffer_rect_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_buffer_rect_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_image_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/finish_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/flattened_id_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/flush_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/get_command_queue_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/get_size_required_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/get_size_required_image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/get_size_required_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ioq_task_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ioq_task_tests_mt.cpp ${CMAKE_CURRENT_SOURCE_DIR}/local_id_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/local_work_size_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/multi_dispatch_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/multiple_map_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/multiple_map_image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/oom_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/oom_image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/oom_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ooq_task_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ooq_task_tests_mt.cpp ${CMAKE_CURRENT_SOURCE_DIR}/read_write_buffer_cpu_copy.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sync_buffer_handler_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/work_group_size_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/zero_size_enqueue_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_command_queue}) add_subdirectories() compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp000066400000000000000000001070551363734646600310730ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/vec.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_timestamp_container.h" #include "test.h" using namespace NEO; struct BlitAuxTranslationTests : public ::testing::Test { class BcsMockContext : public MockContext { public: BcsMockContext(ClDevice *device) : MockContext(device) { bcsOsContext.reset(OsContext::create(nullptr, 0, 0, aub_stream::ENGINE_BCS, PreemptionMode::Disabled, false, false, false)); bcsCsr.reset(createCommandStream(*device->getExecutionEnvironment(), device->getRootDeviceIndex())); bcsCsr->setupContext(*bcsOsContext); bcsCsr->initializeTagAllocation(); } BlitOperationResult blitMemoryToAllocation(MemObj &memObj, GraphicsAllocation *memory, void *hostPtr, Vec3 size) const override { auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, *bcsCsr, memory, nullptr, hostPtr, memory->getGpuAddress(), 0, 0, 0, size, 0, 0, 0, 0); BlitPropertiesContainer container; container.push_back(blitProperties); bcsCsr->blitBuffer(container, true); return BlitOperationResult::Success; } std::unique_ptr bcsOsContext; std::unique_ptr bcsCsr; }; template void SetUpT() { auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); if (is32bit || !hwHelper.requiresAuxResolves()) { GTEST_SKIP(); } DebugManager.flags.EnableTimestampPacket.set(1); DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(1); DebugManager.flags.ForceAuxTranslationMode.set(1); DebugManager.flags.CsrDispatchMode.set(static_cast(DispatchMode::ImmediateDispatch)); device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto &capabilityTable = device->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable; bool createBcsEngine = !capabilityTable.blitterOperationsSupported; capabilityTable.blitterOperationsSupported = true; if (createBcsEngine) { auto &engine = device->getEngine(HwHelperHw::lowPriorityEngineType, true); bcsOsContext.reset(OsContext::create(nullptr, 1, 0, aub_stream::ENGINE_BCS, PreemptionMode::Disabled, false, false, false)); engine.osContext = bcsOsContext.get(); engine.commandStreamReceiver->setupContext(*bcsOsContext); } bcsMockContext = std::make_unique(device.get()); auto mockCmdQueue = new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr); commandQueue.reset(mockCmdQueue); mockKernel = std::make_unique(*device, bcsMockContext.get()); mockKernel->mockKernel->auxTranslationRequired = true; auto mockProgram = mockKernel->mockProgram; mockProgram->setAllowNonUniform(true); gpgpuCsr = mockCmdQueue->gpgpuEngine->commandStreamReceiver; bcsCsr = mockCmdQueue->bcsEngine->commandStreamReceiver; } template void TearDownT() {} template void setMockKernelArgs(std::array buffers) { if (mockKernel->kernelInfo.kernelArgInfo.size() < buffers.size()) { mockKernel->kernelInfo.kernelArgInfo.resize(buffers.size()); } mockKernel->mockKernel->initialize(); for (uint32_t i = 0; i < buffers.size(); i++) { cl_mem clMem = buffers[i]; mockKernel->kernelInfo.kernelArgInfo.at(i).kernelArgPatchInfoVector.resize(1); mockKernel->kernelInfo.kernelArgInfo.at(i).pureStatefulBufferAccess = false; mockKernel->mockKernel->setArgBuffer(i, sizeof(cl_mem *), &clMem); } } ReleaseableObjectPtr createBuffer(size_t size, bool compressed) { auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, size, nullptr, retVal)); if (compressed) { buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); } else { buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER); } return buffer; } template GenCmdList getCmdList(LinearStream &linearStream) { HardwareParse hwParser; hwParser.parseCommands(linearStream); return hwParser.cmdList; } template GenCmdList::iterator expectPipeControl(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) { using PIPE_CONTROL = typename Family::PIPE_CONTROL; PIPE_CONTROL *pipeControlCmd = nullptr; GenCmdList::iterator commandItor = itorStart; bool stallingWrite = false; do { commandItor = find(commandItor, itorEnd); if (itorEnd == commandItor) { EXPECT_TRUE(false); return itorEnd; } pipeControlCmd = genCmdCast(*commandItor); stallingWrite = pipeControlCmd->getPostSyncOperation() == PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA && pipeControlCmd->getCommandStreamerStallEnable(); ++commandItor; } while (!stallingWrite); return --commandItor; } template GenCmdList::iterator expectMiFlush(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) { Family *miFlushCmd = nullptr; GenCmdList::iterator commandItor = itorStart; bool miFlushWithMemoryWrite = false; do { commandItor = find(commandItor, itorEnd); if (itorEnd == commandItor) { EXPECT_TRUE(false); return itorEnd; } miFlushCmd = genCmdCast(*commandItor); miFlushWithMemoryWrite = miFlushCmd->getDestinationAddress() != 0; ++commandItor; } while (!miFlushWithMemoryWrite); return --commandItor; } template GenCmdList::iterator expectCommand(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) { auto commandItor = find(itorStart, itorEnd); EXPECT_TRUE(commandItor != itorEnd); return commandItor; } template void verifySemaphore(GenCmdList::iterator &semaphoreItor, uint64_t expectedAddress) { using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT; auto semaphoreCmd = genCmdCast(*semaphoreItor); EXPECT_EQ(expectedAddress, semaphoreCmd->getSemaphoreGraphicsAddress()); } DebugManagerStateRestore restore; std::unique_ptr bcsOsContext; std::unique_ptr device; std::unique_ptr bcsMockContext; std::unique_ptr commandQueue; std::unique_ptr mockKernel; CommandStreamReceiver *bcsCsr = nullptr; CommandStreamReceiver *gpgpuCsr = nullptr; size_t gws[3] = {63, 0, 0}; size_t lws[3] = {16, 0, 0}; uint32_t hostPtr = 0; cl_int retVal = CL_SUCCESS; }; HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWhenConstructingCommandBufferThenEnsureCorrectOrder) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; auto buffer0 = createBuffer(1, true); auto buffer1 = createBuffer(1, false); auto buffer2 = createBuffer(1, true); setMockKernelArgs(std::array{{buffer0.get(), buffer1.get(), buffer2.get()}}); auto mockCmdQ = static_cast *>(commandQueue.get()); auto initialBcsTaskCount = mockCmdQ->bcsTaskCount; mockCmdQ->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr); EXPECT_EQ(mockCmdQ->bcsTaskCount, initialBcsTaskCount + 1); // Gpgpu command buffer { auto cmdListCsr = getCmdList(gpgpuCsr->getCS(0)); auto cmdListQueue = getCmdList(commandQueue->getCS(0)); // Barrier expectPipeControl(cmdListCsr.begin(), cmdListCsr.end()); // Aux to NonAux auto cmdFound = expectCommand(cmdListQueue.begin(), cmdListQueue.end()); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); // Walker cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); // NonAux to Aux cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); // task count expectPipeControl(++cmdFound, cmdListQueue.end()); } // BCS command buffer { auto cmdList = getCmdList(bcsCsr->getCS(0)); // Barrier auto cmdFound = expectCommand(cmdList.begin(), cmdList.end()); // Aux to NonAux cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); // wait for NDR cmdFound = expectCommand(++cmdFound, cmdList.end()); // NonAux to Aux cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); // taskCount expectCommand(++cmdFound, cmdList.end()); } } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWhenConstructingBlockedCommandBufferThenEnsureCorrectOrder) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; auto buffer0 = createBuffer(1, true); auto buffer1 = createBuffer(1, false); auto buffer2 = createBuffer(1, true); setMockKernelArgs(std::array{{buffer0.get(), buffer1.get(), buffer2.get()}}); auto mockCmdQ = static_cast *>(commandQueue.get()); auto initialBcsTaskCount = mockCmdQ->bcsTaskCount; UserEvent userEvent; cl_event waitlist[] = {&userEvent}; mockCmdQ->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, lws, 1, waitlist, nullptr); userEvent.setStatus(CL_COMPLETE); EXPECT_EQ(mockCmdQ->bcsTaskCount, initialBcsTaskCount + 1); // Gpgpu command buffer { auto cmdListCsr = getCmdList(gpgpuCsr->getCS(0)); auto ultCsr = static_cast *>(gpgpuCsr); auto cmdListQueue = getCmdList(*ultCsr->lastFlushedCommandStream); // Barrier expectPipeControl(cmdListCsr.begin(), cmdListCsr.end()); // Aux to NonAux auto cmdFound = expectCommand(cmdListQueue.begin(), cmdListQueue.end()); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); // Walker cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); // NonAux to Aux cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); // task count expectPipeControl(++cmdFound, cmdListQueue.end()); } // BCS command buffer { auto cmdList = getCmdList(bcsCsr->getCS(0)); // Barrier auto cmdFound = expectCommand(cmdList.begin(), cmdList.end()); // Aux to NonAux cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); // wait for NDR cmdFound = expectCommand(++cmdFound, cmdList.end()); // NonAux to Aux cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); // taskCount expectCommand(++cmdFound, cmdList.end()); } EXPECT_FALSE(mockCmdQ->isQueueBlocked()); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingCommandBufferThenSynchronizeBarrier) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto buffer = createBuffer(1, true); setMockKernelArgs(std::array{{buffer.get()}}); commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto cmdListCsr = getCmdList(gpgpuCsr->getCS(0)); auto pipeControl = expectPipeControl(cmdListCsr.begin(), cmdListCsr.end()); auto pipeControlCmd = genCmdCast(*pipeControl); uint64_t low = pipeControlCmd->getAddress(); uint64_t high = pipeControlCmd->getAddressHigh(); uint64_t barrierGpuAddress = (high << 32) | low; auto cmdList = getCmdList(bcsCsr->getCS(0)); auto semaphore = expectCommand(cmdList.begin(), cmdList.end()); verifySemaphore(semaphore, barrierGpuAddress); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingCommandBufferThenSynchronizeBcsOutput) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using WALKER_TYPE = typename FamilyType::WALKER_TYPE; auto buffer0 = createBuffer(1, true); auto buffer1 = createBuffer(1, true); setMockKernelArgs(std::array{{buffer0.get(), buffer1.get()}}); commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); uint64_t auxToNonAuxOutputAddress[2] = {}; uint64_t nonAuxToAuxOutputAddress[2] = {}; { auto cmdListBcs = getCmdList(bcsCsr->getCS(0)); auto cmdFound = expectCommand(cmdListBcs.begin(), cmdListBcs.end()); cmdFound = expectMiFlush(++cmdFound, cmdListBcs.end()); auto miflushDwCmd = genCmdCast(*cmdFound); auxToNonAuxOutputAddress[0] = miflushDwCmd->getDestinationAddress(); cmdFound = expectMiFlush(++cmdFound, cmdListBcs.end()); miflushDwCmd = genCmdCast(*cmdFound); auxToNonAuxOutputAddress[1] = miflushDwCmd->getDestinationAddress(); cmdFound = expectCommand(++cmdFound, cmdListBcs.end()); cmdFound = expectMiFlush(++cmdFound, cmdListBcs.end()); miflushDwCmd = genCmdCast(*cmdFound); nonAuxToAuxOutputAddress[0] = miflushDwCmd->getDestinationAddress(); cmdFound = expectMiFlush(++cmdFound, cmdListBcs.end()); miflushDwCmd = genCmdCast(*cmdFound); nonAuxToAuxOutputAddress[1] = miflushDwCmd->getDestinationAddress(); } { auto cmdListQueue = getCmdList(commandQueue->getCS(0)); // Aux to NonAux auto cmdFound = expectCommand(cmdListQueue.begin(), cmdListQueue.end()); verifySemaphore(cmdFound, auxToNonAuxOutputAddress[0]); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); verifySemaphore(cmdFound, auxToNonAuxOutputAddress[1]); // Walker cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); // NonAux to Aux cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); verifySemaphore(cmdFound, nonAuxToAuxOutputAddress[0]); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); verifySemaphore(cmdFound, nonAuxToAuxOutputAddress[1]); } } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingCommandBufferThenSynchronizeKernel) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto buffer = createBuffer(1, true); setMockKernelArgs(std::array{{buffer.get()}}); auto mockCmdQ = static_cast *>(commandQueue.get()); mockCmdQ->overrideIsCacheFlushForBcsRequired.enabled = true; mockCmdQ->overrideIsCacheFlushForBcsRequired.returnValue = false; mockCmdQ->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto kernelNode = mockCmdQ->timestampPacketContainer->peekNodes()[0]; auto kernelNodeAddress = kernelNode->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); auto cmdList = getCmdList(bcsCsr->getCS(0)); // Aux to nonAux auto cmdFound = expectCommand(cmdList.begin(), cmdList.end()); // semaphore before NonAux to Aux auto semaphore = expectCommand(++cmdFound, cmdList.end()); verifySemaphore(semaphore, kernelNodeAddress); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingCommandBufferThenSynchronizeCacheFlush) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using WALKER_TYPE = typename FamilyType::WALKER_TYPE; auto buffer = createBuffer(1, true); setMockKernelArgs(std::array{{buffer.get()}}); auto mockCmdQ = static_cast *>(commandQueue.get()); mockCmdQ->overrideIsCacheFlushForBcsRequired.enabled = true; mockCmdQ->overrideIsCacheFlushForBcsRequired.returnValue = true; mockCmdQ->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto cmdListBcs = getCmdList(bcsCsr->getCS(0)); auto cmdListQueue = getCmdList(mockCmdQ->getCS(0)); uint64_t cacheFlushWriteAddress = 0; { auto cmdFound = expectCommand(cmdListQueue.begin(), cmdListQueue.end()); cmdFound = expectPipeControl(++cmdFound, cmdListQueue.end()); auto pipeControlCmd = genCmdCast(*cmdFound); if (!pipeControlCmd->getDcFlushEnable()) { // skip pipe control with TimestampPacket write cmdFound = expectPipeControl(++cmdFound, cmdListQueue.end()); pipeControlCmd = genCmdCast(*cmdFound); } EXPECT_TRUE(pipeControlCmd->getDcFlushEnable()); EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable()); uint64_t low = pipeControlCmd->getAddress(); uint64_t high = pipeControlCmd->getAddressHigh(); cacheFlushWriteAddress = (high << 32) | low; EXPECT_NE(0u, cacheFlushWriteAddress); } { // Aux to nonAux auto cmdFound = expectCommand(cmdListBcs.begin(), cmdListBcs.end()); // semaphore before NonAux to Aux cmdFound = expectCommand(++cmdFound, cmdListBcs.end()); verifySemaphore(cmdFound, cacheFlushWriteAddress); } } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingCommandBufferThenSynchronizeEvents) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto buffer = createBuffer(1, true); setMockKernelArgs(std::array{{buffer.get()}}); auto event = make_releaseable(commandQueue.get(), CL_COMMAND_READ_BUFFER, 0, 0); MockTimestampPacketContainer eventDependencyContainer(*bcsCsr->getTimestampPacketAllocator(), 1); auto eventDependency = eventDependencyContainer.getNode(0); event->addTimestampPacketNodes(eventDependencyContainer); cl_event clEvent[] = {event.get()}; commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 1, clEvent, nullptr); auto eventDependencyAddress = eventDependency->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); auto cmdList = getCmdList(bcsCsr->getCS(0)); // Barrier auto cmdFound = expectCommand(cmdList.begin(), cmdList.end()); // Event auto semaphore = expectCommand(++cmdFound, cmdList.end()); verifySemaphore(semaphore, eventDependencyAddress); cmdFound = expectCommand(++semaphore, cmdList.end()); expectCommand(++cmdFound, cmdList.end()); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenOutEventWhenDispatchingThenAssignNonAuxNodes) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto buffer0 = createBuffer(1, true); auto buffer1 = createBuffer(1, false); auto buffer2 = createBuffer(1, true); setMockKernelArgs(std::array{{buffer0.get(), buffer1.get(), buffer2.get()}}); cl_event clEvent; commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &clEvent); auto event = castToObject(clEvent); auto &eventNodes = event->getTimestampPacketNodes()->peekNodes(); EXPECT_EQ(3u, eventNodes.size()); auto cmdListQueue = getCmdList(commandQueue->getCS(0)); auto cmdFound = expectCommand(cmdListQueue.begin(), cmdListQueue.end()); // NonAux to Aux cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); auto eventNodeAddress = eventNodes[1]->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); verifySemaphore(cmdFound, eventNodeAddress); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); eventNodeAddress = eventNodes[2]->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); verifySemaphore(cmdFound, eventNodeAddress); clReleaseEvent(clEvent); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWhenDispatchingThenEstimateCmdBufferSize) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto &hwInfo = device->getHardwareInfo(); auto mockCmdQ = static_cast *>(commandQueue.get()); mockCmdQ->overrideIsCacheFlushForBcsRequired.enabled = true; mockCmdQ->overrideIsCacheFlushForBcsRequired.returnValue = false; auto buffer0 = createBuffer(1, true); auto buffer1 = createBuffer(1, false); auto buffer2 = createBuffer(1, true); MemObjsForAuxTranslation memObjects; memObjects.insert(buffer0.get()); memObjects.insert(buffer2.get()); size_t numBuffersToEstimate = 2; size_t dependencySize = numBuffersToEstimate * TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue(); setMockKernelArgs(std::array{{buffer0.get(), buffer1.get(), buffer2.get()}}); mockCmdQ->storeMultiDispatchInfo = true; mockCmdQ->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr); MultiDispatchInfo &multiDispatchInfo = mockCmdQ->storedMultiDispatchInfo; DispatchInfo *firstDispatchInfo = multiDispatchInfo.begin(); DispatchInfo *lastDispatchInfo = &(*multiDispatchInfo.rbegin()); EXPECT_NE(firstDispatchInfo, lastDispatchInfo); // walker split EXPECT_EQ(dependencySize, firstDispatchInfo->dispatchInitCommands.estimateCommandsSize(memObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired())); EXPECT_EQ(0u, firstDispatchInfo->dispatchEpilogueCommands.estimateCommandsSize(memObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired())); EXPECT_EQ(0u, lastDispatchInfo->dispatchInitCommands.estimateCommandsSize(memObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired())); EXPECT_EQ(dependencySize, lastDispatchInfo->dispatchEpilogueCommands.estimateCommandsSize(memObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired())); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWithRequiredCacheFlushWhenDispatchingThenEstimateCmdBufferSize) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto &hwInfo = device->getHardwareInfo(); auto mockCmdQ = static_cast *>(commandQueue.get()); mockCmdQ->overrideIsCacheFlushForBcsRequired.enabled = true; mockCmdQ->overrideIsCacheFlushForBcsRequired.returnValue = true; auto buffer0 = createBuffer(1, true); auto buffer1 = createBuffer(1, false); auto buffer2 = createBuffer(1, true); MemObjsForAuxTranslation memObjects; memObjects.insert(buffer0.get()); memObjects.insert(buffer2.get()); size_t numBuffersToEstimate = 2; size_t dependencySize = numBuffersToEstimate * TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue(); size_t cacheFlushSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); setMockKernelArgs(std::array{{buffer0.get(), buffer1.get(), buffer2.get()}}); mockCmdQ->storeMultiDispatchInfo = true; mockCmdQ->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr); MultiDispatchInfo &multiDispatchInfo = mockCmdQ->storedMultiDispatchInfo; DispatchInfo *firstDispatchInfo = multiDispatchInfo.begin(); DispatchInfo *lastDispatchInfo = &(*multiDispatchInfo.rbegin()); EXPECT_NE(firstDispatchInfo, lastDispatchInfo); // walker split EXPECT_EQ(dependencySize, firstDispatchInfo->dispatchInitCommands.estimateCommandsSize(memObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired())); EXPECT_EQ(0u, firstDispatchInfo->dispatchEpilogueCommands.estimateCommandsSize(memObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired())); EXPECT_EQ(0u, lastDispatchInfo->dispatchInitCommands.estimateCommandsSize(memObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired())); EXPECT_EQ(dependencySize + cacheFlushSize, lastDispatchInfo->dispatchEpilogueCommands.estimateCommandsSize(memObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired())); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingBlockedCommandBufferThenSynchronizeBarrier) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto buffer = createBuffer(1, true); setMockKernelArgs(std::array{{buffer.get()}}); UserEvent userEvent; cl_event waitlist[] = {&userEvent}; commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 1, waitlist, nullptr); userEvent.setStatus(CL_COMPLETE); auto cmdListCsr = getCmdList(gpgpuCsr->getCS(0)); auto pipeControl = expectPipeControl(cmdListCsr.begin(), cmdListCsr.end()); auto pipeControlCmd = genCmdCast(*pipeControl); uint64_t low = pipeControlCmd->getAddress(); uint64_t high = pipeControlCmd->getAddressHigh(); uint64_t barrierGpuAddress = (high << 32) | low; auto cmdList = getCmdList(bcsCsr->getCS(0)); auto semaphore = expectCommand(cmdList.begin(), cmdList.end()); verifySemaphore(semaphore, barrierGpuAddress); EXPECT_FALSE(commandQueue->isQueueBlocked()); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingBlockedCommandBufferThenSynchronizeEvents) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto buffer = createBuffer(1, true); setMockKernelArgs(std::array{{buffer.get()}}); auto event = make_releaseable(commandQueue.get(), CL_COMMAND_READ_BUFFER, 0, 0); MockTimestampPacketContainer eventDependencyContainer(*bcsCsr->getTimestampPacketAllocator(), 1); auto eventDependency = eventDependencyContainer.getNode(0); event->addTimestampPacketNodes(eventDependencyContainer); UserEvent userEvent; cl_event waitlist[] = {&userEvent, event.get()}; commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 2, waitlist, nullptr); userEvent.setStatus(CL_COMPLETE); auto eventDependencyAddress = eventDependency->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); auto cmdList = getCmdList(bcsCsr->getCS(0)); // Barrier auto cmdFound = expectCommand(cmdList.begin(), cmdList.end()); // Event auto semaphore = expectCommand(++cmdFound, cmdList.end()); verifySemaphore(semaphore, eventDependencyAddress); cmdFound = expectCommand(++semaphore, cmdList.end()); expectCommand(++cmdFound, cmdList.end()); EXPECT_FALSE(commandQueue->isQueueBlocked()); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingBlockedCommandBufferThenSynchronizeKernel) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto buffer = createBuffer(1, true); setMockKernelArgs(std::array{{buffer.get()}}); auto mockCmdQ = static_cast *>(commandQueue.get()); UserEvent userEvent; cl_event waitlist[] = {&userEvent}; mockCmdQ->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 1, waitlist, nullptr); userEvent.setStatus(CL_COMPLETE); auto kernelNode = mockCmdQ->timestampPacketContainer->peekNodes()[0]; auto kernelNodeAddress = kernelNode->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); auto cmdList = getCmdList(bcsCsr->getCS(0)); // Aux to nonAux auto cmdFound = expectCommand(cmdList.begin(), cmdList.end()); // semaphore before NonAux to Aux auto semaphore = expectCommand(++cmdFound, cmdList.end()); if (mockCmdQ->isCacheFlushForBcsRequired()) { semaphore = expectCommand(++semaphore, cmdList.end()); } verifySemaphore(semaphore, kernelNodeAddress); EXPECT_FALSE(commandQueue->isQueueBlocked()); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingBlockedCommandBufferThenSynchronizeBcsOutput) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using WALKER_TYPE = typename FamilyType::WALKER_TYPE; auto buffer0 = createBuffer(1, true); auto buffer1 = createBuffer(1, true); setMockKernelArgs(std::array{{buffer0.get(), buffer1.get()}}); UserEvent userEvent; cl_event waitlist[] = {&userEvent}; commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 1, waitlist, nullptr); userEvent.setStatus(CL_COMPLETE); uint64_t auxToNonAuxOutputAddress[2] = {}; uint64_t nonAuxToAuxOutputAddress[2] = {}; { auto cmdListBcs = getCmdList(bcsCsr->getCS(0)); auto cmdFound = expectCommand(cmdListBcs.begin(), cmdListBcs.end()); cmdFound = expectMiFlush(++cmdFound, cmdListBcs.end()); auto miflushDwCmd = genCmdCast(*cmdFound); auxToNonAuxOutputAddress[0] = miflushDwCmd->getDestinationAddress(); cmdFound = expectMiFlush(++cmdFound, cmdListBcs.end()); miflushDwCmd = genCmdCast(*cmdFound); auxToNonAuxOutputAddress[1] = miflushDwCmd->getDestinationAddress(); cmdFound = expectCommand(++cmdFound, cmdListBcs.end()); cmdFound = expectMiFlush(++cmdFound, cmdListBcs.end()); miflushDwCmd = genCmdCast(*cmdFound); nonAuxToAuxOutputAddress[0] = miflushDwCmd->getDestinationAddress(); cmdFound = expectMiFlush(++cmdFound, cmdListBcs.end()); miflushDwCmd = genCmdCast(*cmdFound); nonAuxToAuxOutputAddress[1] = miflushDwCmd->getDestinationAddress(); } { auto ultCsr = static_cast *>(gpgpuCsr); auto cmdListQueue = getCmdList(*ultCsr->lastFlushedCommandStream); // Aux to NonAux auto cmdFound = expectCommand(cmdListQueue.begin(), cmdListQueue.end()); verifySemaphore(cmdFound, auxToNonAuxOutputAddress[0]); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); verifySemaphore(cmdFound, auxToNonAuxOutputAddress[1]); // Walker cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); // NonAux to Aux cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); verifySemaphore(cmdFound, nonAuxToAuxOutputAddress[0]); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); verifySemaphore(cmdFound, nonAuxToAuxOutputAddress[1]); } EXPECT_FALSE(commandQueue->isQueueBlocked()); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/buffer_operations_fixture.h000066400000000000000000000035761363734646600321220ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/ptr_math.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/gen_common/gen_cmd_parse.h" #include "gtest/gtest.h" namespace NEO { struct EnqueueWriteBufferTypeTest : public CommandEnqueueFixture, public ::testing::Test { EnqueueWriteBufferTypeTest(void) : srcBuffer(nullptr) { } void SetUp() override { CommandEnqueueFixture::SetUp(); BufferDefaults::context = new MockContext; zeroCopyBuffer.reset(BufferHelper<>::create()); srcBuffer.reset(BufferHelper>::create()); } void TearDown() override { srcBuffer.reset(nullptr); zeroCopyBuffer.reset(nullptr); delete BufferDefaults::context; CommandEnqueueFixture::TearDown(); } protected: template void enqueueWriteBuffer(cl_bool blocking = EnqueueWriteBufferTraits::blocking) { auto retVal = EnqueueWriteBufferHelper<>::enqueueWriteBuffer( pCmdQ, srcBuffer.get(), blocking); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } template void enqueueWriteBuffer(bool blocking, void *inputData, int size) { auto retVal = EnqueueWriteBufferHelper<>::enqueueWriteBuffer( pCmdQ, srcBuffer.get(), blocking, 0, size, inputData); EXPECT_EQ(CL_SUCCESS, retVal); } std::unique_ptr srcBuffer; std::unique_ptr zeroCopyBuffer; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/command_enqueue_fixture.h000066400000000000000000000115371363734646600315470ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_stream/command_stream_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/indirect_heap/indirect_heap_fixture.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" namespace NEO { struct CommandDeviceFixture : public DeviceFixture, public CommandQueueHwFixture { using CommandQueueHwFixture::SetUp; void SetUp(cl_command_queue_properties cmdQueueProperties = 0) { DeviceFixture::SetUp(); CommandQueueHwFixture::SetUp(pClDevice, cmdQueueProperties); } void TearDown() override { CommandQueueHwFixture::TearDown(); DeviceFixture::TearDown(); } }; struct CommandEnqueueBaseFixture : CommandDeviceFixture, public IndirectHeapFixture, public HardwareParse { using IndirectHeapFixture::SetUp; void SetUp(cl_command_queue_properties cmdQueueProperties = 0) { CommandDeviceFixture::SetUp(cmdQueueProperties); IndirectHeapFixture::SetUp(pCmdQ); HardwareParse::SetUp(); } void TearDown() override { HardwareParse::TearDown(); IndirectHeapFixture::TearDown(); CommandDeviceFixture::TearDown(); } }; struct CommandEnqueueFixture : public CommandEnqueueBaseFixture, public CommandStreamFixture { void SetUp(cl_command_queue_properties cmdQueueProperties = 0) { CommandEnqueueBaseFixture::SetUp(cmdQueueProperties); CommandStreamFixture::SetUp(pCmdQ); } void TearDown() override { CommandEnqueueBaseFixture::TearDown(); CommandStreamFixture::TearDown(); } }; struct NegativeFailAllocationCommandEnqueueBaseFixture : public CommandEnqueueBaseFixture { void SetUp() override { CommandEnqueueBaseFixture::SetUp(); failMemManager.reset(new FailMemoryManager(*pDevice->getExecutionEnvironment())); BufferDefaults::context = context; Image2dDefaults::context = context; buffer.reset(BufferHelper<>::create()); image.reset(ImageHelper::create()); ptr = static_cast(array); oldMemManager = pDevice->getExecutionEnvironment()->memoryManager.release(); pDevice->injectMemoryManager(failMemManager.release()); } void TearDown() override { pDevice->injectMemoryManager(oldMemManager); buffer.reset(nullptr); image.reset(nullptr); BufferDefaults::context = nullptr; Image2dDefaults::context = nullptr; CommandEnqueueBaseFixture::TearDown(); } std::unique_ptr buffer; std::unique_ptr image; std::unique_ptr failMemManager; char array[MemoryConstants::cacheLineSize]; void *ptr; MemoryManager *oldMemManager; }; template struct CommandQueueStateless : public CommandQueueHw { CommandQueueStateless(Context *context, ClDevice *device) : CommandQueueHw(context, device, nullptr, false){}; void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo) override { auto kernel = dispatchInfo.begin()->getKernel(); EXPECT_TRUE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); } }; template struct CommandQueueStateful : public CommandQueueHw { CommandQueueStateful(Context *context, ClDevice *device) : CommandQueueHw(context, device, nullptr, false){}; void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo) override { auto kernel = dispatchInfo.begin()->getKernel(); auto &device = kernel->getDevice(); if (!device.areSharedSystemAllocationsAllowed()) { EXPECT_FALSE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); if (device.getHardwareCapabilities().isStatelesToStatefullWithOffsetSupported) { EXPECT_TRUE(kernel->allBufferArgsStateful); } } else { EXPECT_TRUE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); } } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/command_queue_fixture.cpp000066400000000000000000000053321363734646600315530ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "shared/source/device/device.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "gtest/gtest.h" namespace NEO { // Global table of create functions extern CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE]; CommandQueue *CommandQueueHwFixture::createCommandQueue( ClDevice *pDevice, cl_command_queue_properties properties) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, properties, 0}; return createCommandQueue(pDevice, props); } CommandQueue *CommandQueueHwFixture::createCommandQueue( ClDevice *pDevice, const cl_command_queue_properties *properties) { if (pDevice == nullptr) { if (this->device == nullptr) { this->device = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}; } pDevice = this->device; } if (!context) context = new MockContext(pDevice); auto funcCreate = commandQueueFactory[pDevice->getRenderCoreFamily()]; assert(nullptr != funcCreate); return funcCreate(context, pDevice, properties, false); } void CommandQueueHwFixture::SetUp() { ASSERT_NE(nullptr, pCmdQ); context = new MockContext(); } void CommandQueueHwFixture::SetUp( ClDevice *pDevice, cl_command_queue_properties properties) { ASSERT_NE(nullptr, pDevice); context = new MockContext(pDevice); pCmdQ = createCommandQueue(pDevice, properties); ASSERT_NE(nullptr, pCmdQ); } void CommandQueueHwFixture::TearDown() { //resolve event dependencies if (pCmdQ) { auto blocked = pCmdQ->isQueueBlocked(); UNRECOVERABLE_IF(blocked); pCmdQ->release(); } if (context) { context->release(); } if (device) { delete device; } } CommandQueue *CommandQueueFixture::createCommandQueue( Context *context, ClDevice *device, cl_command_queue_properties properties) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, properties, 0}; return new MockCommandQueue( context, device, props); } void CommandQueueFixture::SetUp( Context *context, ClDevice *device, cl_command_queue_properties properties) { pCmdQ = createCommandQueue( context, device, properties); } void CommandQueueFixture::TearDown() { delete pCmdQ; pCmdQ = nullptr; } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/command_queue_fixture.h000066400000000000000000000044671363734646600312300ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "CL/cl.h" #include "gtest/gtest.h" namespace NEO { class Device; struct CommandQueueHwFixture { CommandQueue *createCommandQueue(ClDevice *device) { return createCommandQueue(device, cl_command_queue_properties{0}); } CommandQueue *createCommandQueue( ClDevice *device, cl_command_queue_properties properties); CommandQueue *createCommandQueue( ClDevice *device, const cl_command_queue_properties *properties); virtual void SetUp(); virtual void SetUp(ClDevice *pDevice, cl_command_queue_properties properties); virtual void TearDown(); CommandQueue *pCmdQ = nullptr; ClDevice *device = nullptr; MockContext *context = nullptr; }; struct OOQueueFixture : public CommandQueueHwFixture { typedef CommandQueueHwFixture BaseClass; void SetUp(ClDevice *pDevice, cl_command_queue_properties properties) override { ASSERT_NE(nullptr, pDevice); BaseClass::pCmdQ = BaseClass::createCommandQueue(pDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE); ASSERT_NE(nullptr, BaseClass::pCmdQ); } }; struct CommandQueueFixture { virtual void SetUp( Context *context, ClDevice *device, cl_command_queue_properties properties); virtual void TearDown(); CommandQueue *createCommandQueue( Context *context, ClDevice *device, cl_command_queue_properties properties); CommandQueue *pCmdQ = nullptr; }; static const cl_command_queue_properties AllCommandQueueProperties[] = { 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE_DEFAULT, CL_QUEUE_PROFILING_ENABLE, CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE_DEFAULT}; static const cl_command_queue_properties DefaultCommandQueueProperties[] = { 0, CL_QUEUE_PROFILING_ENABLE, }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/command_queue_hw_tests.cpp000066400000000000000000001471761363734646600317420ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/helpers/dispatch_info_builder.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_builtins.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" using namespace NEO; struct CommandQueueHwTest : public DeviceFixture, public ContextFixture, public CommandQueueHwFixture, ::testing::Test { using ContextFixture::SetUp; void SetUp() override { DeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); CommandQueueHwFixture::SetUp(pClDevice, 0); } void TearDown() override { CommandQueueHwFixture::TearDown(); ContextFixture::TearDown(); DeviceFixture::TearDown(); } cl_command_queue_properties properties; const HardwareInfo *pHwInfo = nullptr; }; struct OOQueueHwTest : public DeviceFixture, public ContextFixture, public OOQueueFixture, ::testing::Test { using ContextFixture::SetUp; OOQueueHwTest() { } void SetUp() override { DeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); OOQueueFixture::SetUp(pClDevice, 0); } void SetUp(ClDevice *pDevice, cl_command_queue_properties properties) override { } void TearDown() override { OOQueueFixture::TearDown(); ContextFixture::TearDown(); DeviceFixture::TearDown(); } }; HWTEST_F(CommandQueueHwTest, WhenEnqueuingBlockedMapUnmapOperationThenVirtualEventIsCreated) { CommandQueueHw *pHwQ = reinterpret_cast *>(pCmdQ); MockBuffer buffer; pHwQ->virtualEvent = nullptr; MockEventBuilder eventBuilder; MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; pHwQ->enqueueBlockedMapUnmapOperation(nullptr, 0, MAP, &buffer, size, offset, false, eventBuilder); ASSERT_NE(nullptr, pHwQ->virtualEvent); pHwQ->virtualEvent->decRefInternal(); pHwQ->virtualEvent = nullptr; } HWTEST_F(CommandQueueHwTest, givenCommandQueueWhenAskingForCacheFlushOnBcsThenReturnTrue) { auto pHwQ = static_cast *>(pCmdQ); EXPECT_TRUE(pHwQ->isCacheFlushForBcsRequired()); } HWTEST_F(CommandQueueHwTest, givenBlockedMapBufferCallWhenMemObjectIsPassedToCommandThenItsRefCountIsBeingIncreased) { CommandQueueHw *pHwQ = reinterpret_cast *>(pCmdQ); MockBuffer buffer; pHwQ->virtualEvent = nullptr; auto currentRefCount = buffer.getRefInternalCount(); MockEventBuilder eventBuilder; MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; pHwQ->enqueueBlockedMapUnmapOperation(nullptr, 0, MAP, &buffer, size, offset, false, eventBuilder); EXPECT_EQ(currentRefCount + 1, buffer.getRefInternalCount()); ASSERT_NE(nullptr, pHwQ->virtualEvent); pHwQ->virtualEvent->decRefInternal(); pHwQ->virtualEvent = nullptr; EXPECT_EQ(currentRefCount, buffer.getRefInternalCount()); } HWTEST_F(CommandQueueHwTest, givenNoReturnEventWhenCallingEnqueueBlockedMapUnmapOperationThenVirtualEventIncrementsCommandQueueInternalRefCount) { CommandQueueHw *pHwQ = reinterpret_cast *>(pCmdQ); MockBuffer buffer; pHwQ->virtualEvent = nullptr; auto initialRefCountInternal = pHwQ->getRefInternalCount(); MockEventBuilder eventBuilder; MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; pHwQ->enqueueBlockedMapUnmapOperation(nullptr, 0, MAP, &buffer, size, offset, false, eventBuilder); ASSERT_NE(nullptr, pHwQ->virtualEvent); auto refCountInternal = pHwQ->getRefInternalCount(); EXPECT_EQ(initialRefCountInternal + 1, refCountInternal); pHwQ->virtualEvent->decRefInternal(); pHwQ->virtualEvent = nullptr; } HWTEST_F(CommandQueueHwTest, WhenAddMapUnmapToWaitlistEventsThenDependenciesAreNotAddedIntoChild) { auto buffer = new MockBuffer; CommandQueueHw *pHwQ = reinterpret_cast *>(pCmdQ); auto returnEvent = new Event(pHwQ, CL_COMMAND_MAP_BUFFER, 0, 0); auto event = new Event(pHwQ, CL_COMMAND_MAP_BUFFER, 0, 0); const cl_event eventWaitList = event; pHwQ->virtualEvent = nullptr; MockEventBuilder eventBuilder(returnEvent); MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; pHwQ->enqueueBlockedMapUnmapOperation(&eventWaitList, 1, MAP, buffer, size, offset, false, eventBuilder); EXPECT_EQ(returnEvent, pHwQ->virtualEvent); ASSERT_EQ(nullptr, event->peekChildEvents()); // Release API refcount (i.e. from workload's perspective) returnEvent->release(); event->decRefInternal(); buffer->decRefInternal(); } HWTEST_F(CommandQueueHwTest, givenMapCommandWhenZeroStateCommandIsSubmittedThenTaskCountIsNotBeingWaited) { auto buffer = new MockBuffer; MockCommandQueueHw mockCmdQueueHw(context, pClDevice, nullptr); MockEventBuilder eventBuilder; MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; mockCmdQueueHw.enqueueBlockedMapUnmapOperation(nullptr, 0, MAP, buffer, size, offset, false, eventBuilder); EXPECT_NE(nullptr, mockCmdQueueHw.virtualEvent); mockCmdQueueHw.virtualEvent->setStatus(CL_COMPLETE); EXPECT_EQ(std::numeric_limits::max(), mockCmdQueueHw.latestTaskCountWaited); buffer->decRefInternal(); } HWTEST_F(CommandQueueHwTest, givenMapCommandWhenZeroStateCommandIsSubmittedOnNonZeroCopyBufferThenTaskCountIsBeingWaited) { auto buffer = new MockBuffer; buffer->isZeroCopy = false; MockCommandQueueHw mockCmdQueueHw(context, pClDevice, nullptr); MockEventBuilder eventBuilder; MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; mockCmdQueueHw.enqueueBlockedMapUnmapOperation(nullptr, 0, MAP, buffer, size, offset, false, eventBuilder); EXPECT_NE(nullptr, mockCmdQueueHw.virtualEvent); mockCmdQueueHw.virtualEvent->setStatus(CL_COMPLETE); EXPECT_EQ(1u, mockCmdQueueHw.latestTaskCountWaited); buffer->decRefInternal(); } HWTEST_F(CommandQueueHwTest, GivenEventWhenEnqueuingBlockedMapUnmapOperationThenEventIsRetained) { CommandQueueHw *pHwQ = reinterpret_cast *>(pCmdQ); Event *returnEvent = new Event(pHwQ, CL_COMMAND_MAP_BUFFER, 0, 0); auto buffer = new MockBuffer; pHwQ->virtualEvent = nullptr; MockEventBuilder eventBuilder(returnEvent); MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; pHwQ->enqueueBlockedMapUnmapOperation(nullptr, 0, MAP, buffer, size, offset, false, eventBuilder); eventBuilder.finalizeAndRelease(); EXPECT_EQ(returnEvent, pHwQ->virtualEvent); EXPECT_NE(nullptr, returnEvent->peekCommand()); // CommandQueue has retained this event, release it returnEvent->release(); pHwQ->virtualEvent = nullptr; delete returnEvent; buffer->decRefInternal(); } HWTEST_F(CommandQueueHwTest, GivenEventWhenEnqueuingBlockedMapUnmapOperationThenChildIsUnaffected) { auto buffer = new MockBuffer; CommandQueueHw *pHwQ = reinterpret_cast *>(pCmdQ); Event *returnEvent = new Event(pHwQ, CL_COMMAND_MAP_BUFFER, 0, 0); Event event(pHwQ, CL_COMMAND_MAP_BUFFER, 0, 0); pHwQ->virtualEvent = nullptr; pHwQ->virtualEvent = &event; //virtual event from regular event to stored in previousVirtualEvent pHwQ->virtualEvent->incRefInternal(); MockEventBuilder eventBuilder(returnEvent); MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; pHwQ->enqueueBlockedMapUnmapOperation(nullptr, 0, MAP, buffer, size, offset, false, eventBuilder); EXPECT_EQ(returnEvent, pHwQ->virtualEvent); ASSERT_EQ(nullptr, event.peekChildEvents()); returnEvent->release(); buffer->decRefInternal(); } HWTEST_F(CommandQueueHwTest, GivenNonEmptyQueueOnBlockingMapBufferWillWaitForPrecedingCommandsToComplete) { struct MockCmdQ : CommandQueueHw { MockCmdQ(Context *context, ClDevice *device) : CommandQueueHw(context, device, 0, false) { finishWasCalled = false; } cl_int finish() override { finishWasCalled = true; return 0; } bool finishWasCalled; }; MockCmdQ cmdQ(context, pCmdQ->getDevice().getSpecializedDevice()); auto b1 = clCreateBuffer(context, CL_MEM_READ_WRITE, 20, nullptr, nullptr); auto b2 = clCreateBuffer(context, CL_MEM_READ_WRITE, 20, nullptr, nullptr); auto gatingEvent = clCreateUserEvent(context, nullptr); void *ptr1 = clEnqueueMapBuffer(&cmdQ, b1, CL_FALSE, CL_MAP_READ, 0, 8, 1, &gatingEvent, nullptr, nullptr); clEnqueueUnmapMemObject(&cmdQ, b1, ptr1, 0, nullptr, nullptr); ASSERT_FALSE(cmdQ.finishWasCalled); void *ptr2 = clEnqueueMapBuffer(&cmdQ, b2, CL_TRUE, CL_MAP_READ, 0, 8, 0, nullptr, nullptr, nullptr); ASSERT_TRUE(cmdQ.finishWasCalled); clSetUserEventStatus(gatingEvent, CL_COMPLETE); clEnqueueUnmapMemObject(pCmdQ, b2, ptr2, 0, nullptr, nullptr); clReleaseMemObject(b1); clReleaseMemObject(b2); clReleaseEvent(gatingEvent); } HWTEST_F(CommandQueueHwTest, GivenEventsWaitlistOnBlockingMapBufferWillWaitForEvents) { struct MockEvent : UserEvent { MockEvent(Context *ctx, uint32_t updateCountBeforeCompleted) : UserEvent(ctx), updateCount(0), updateCountBeforeCompleted(updateCountBeforeCompleted) { this->updateTaskCount(0); this->taskLevel = 0; } void updateExecutionStatus() override { ++updateCount; if (updateCount == updateCountBeforeCompleted) { transitionExecutionStatus(CL_COMPLETE); } unblockEventsBlockedByThis(executionStatus); } uint32_t updateCount; uint32_t updateCountBeforeCompleted; }; MockEvent *me = new MockEvent(context, 1024); auto b1 = clCreateBuffer(context, CL_MEM_READ_WRITE, 20, nullptr, nullptr); cl_event meAsClEv = me; void *ptr1 = clEnqueueMapBuffer(pCmdQ, b1, CL_TRUE, CL_MAP_READ, 0, 8, 1, &meAsClEv, nullptr, nullptr); ASSERT_TRUE(me->updateStatusAndCheckCompletion()); ASSERT_LE(me->updateCountBeforeCompleted, me->updateCount); clEnqueueUnmapMemObject(pCmdQ, b1, ptr1, 0, nullptr, nullptr); clReleaseMemObject(b1); me->release(); } HWTEST_F(CommandQueueHwTest, GivenNotCompleteUserEventPassedToEnqueueWhenEventIsUnblockedThenAllSurfacesForBlockedCommandsAreMadeResident) { int32_t executionStamp = 0; auto mockCSR = new MockCsr(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCSR); auto userEvent = make_releaseable(context); KernelInfo kernelInfo; MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; auto mockProgram = mockKernelWithInternals.mockProgram; size_t offset = 0; size_t size = 1; GraphicsAllocation *constantSurface = mockCSR->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{mockCSR->getRootDeviceIndex(), MemoryConstants::pageSize}); mockProgram->setConstantSurface(constantSurface); GraphicsAllocation *printfSurface = mockCSR->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{mockCSR->getRootDeviceIndex(), MemoryConstants::pageSize}); GraphicsAllocation *privateSurface = mockCSR->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{mockCSR->getRootDeviceIndex(), MemoryConstants::pageSize}); mockKernel->setPrivateSurface(privateSurface, 10); cl_event blockedEvent = userEvent.get(); pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); userEvent->setStatus(CL_COMPLETE); EXPECT_TRUE(mockCSR->isMadeResident(constantSurface)); EXPECT_TRUE(mockCSR->isMadeResident(privateSurface)); mockKernel->setPrivateSurface(nullptr, 0); mockProgram->setConstantSurface(nullptr); mockCSR->getMemoryManager()->freeGraphicsMemory(privateSurface); mockCSR->getMemoryManager()->freeGraphicsMemory(printfSurface); mockCSR->getMemoryManager()->freeGraphicsMemory(constantSurface); } typedef CommandQueueHwTest BlockedCommandQueueTest; HWTEST_F(BlockedCommandQueueTest, givenCommandQueueWhenBlockedCommandIsBeingSubmittedThenQueueHeapsAreNotUsed) { UserEvent userEvent(context); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; size_t offset = 0; size_t size = 1; cl_event blockedEvent = &userEvent; pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); userEvent.setStatus(CL_COMPLETE); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 4096u); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 4096u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 4096u); uint32_t defaultSshUse = UnitTestHelper::getDefaultSshUsage(); EXPECT_EQ(0u, ioh.getUsed()); EXPECT_EQ(0u, dsh.getUsed()); EXPECT_EQ(defaultSshUse, ssh.getUsed()); pCmdQ->isQueueBlocked(); } HWTEST_F(BlockedCommandQueueTest, givenCommandQueueWithUsedHeapsWhenBlockedCommandIsBeingSubmittedThenQueueHeapsAreNotUsed) { UserEvent userEvent(context); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; size_t offset = 0; size_t size = 1; cl_event blockedEvent = &userEvent; auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 4096u); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 4096u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 4096u); auto spaceToUse = 4u; ioh.getSpace(spaceToUse); dsh.getSpace(spaceToUse); ssh.getSpace(spaceToUse); pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); userEvent.setStatus(CL_COMPLETE); uint32_t sshSpaceUse = spaceToUse + UnitTestHelper::getDefaultSshUsage(); EXPECT_EQ(spaceToUse, ioh.getUsed()); EXPECT_EQ(spaceToUse, dsh.getUsed()); EXPECT_EQ(sshSpaceUse, ssh.getUsed()); pCmdQ->isQueueBlocked(); } HWTEST_F(BlockedCommandQueueTest, givenCommandQueueWhichHasSomeUnusedHeapsWhenBlockedCommandIsBeingSubmittedThenThoseHeapsAreBeingUsed) { UserEvent userEvent(context); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; size_t offset = 0; size_t size = 1; cl_event blockedEvent = &userEvent; auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 4096u); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 4096u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 4096u); auto iohBase = ioh.getCpuBase(); auto dshBase = dsh.getCpuBase(); auto sshBase = ssh.getCpuBase(); pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); userEvent.setStatus(CL_COMPLETE); EXPECT_EQ(iohBase, ioh.getCpuBase()); EXPECT_EQ(dshBase, dsh.getCpuBase()); EXPECT_EQ(sshBase, ssh.getCpuBase()); pCmdQ->isQueueBlocked(); } HWTEST_F(BlockedCommandQueueTest, givenEnqueueBlockedByUserEventWhenItIsEnqueuedThenKernelReferenceCountIsIncreased) { UserEvent userEvent(context); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; size_t offset = 0; size_t size = 1; cl_event blockedEvent = &userEvent; auto currentRefCount = mockKernel->getRefInternalCount(); pCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); EXPECT_EQ(currentRefCount + 1, mockKernel->getRefInternalCount()); userEvent.setStatus(CL_COMPLETE); pCmdQ->isQueueBlocked(); EXPECT_EQ(currentRefCount, mockKernel->getRefInternalCount()); } typedef CommandQueueHwTest CommandQueueHwRefCountTest; HWTEST_F(CommandQueueHwRefCountTest, givenBlockedCmdQWhenNewBlockedEnqueueReplacesVirtualEventThenPreviousVirtualEventDecrementsCmdQRefCount) { cl_int retVal = 0; auto mockCmdQ = new MockCommandQueueHw(context, pClDevice, 0); UserEvent userEvent(context); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; size_t offset = 0; size_t size = 1; cl_event blockedEvent = &userEvent; EXPECT_EQ(1, mockCmdQ->getRefInternalCount()); mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); // UserEvent on waitlist doesn't increments cmdQ refCount, virtualEvent increments refCount EXPECT_EQ(2, mockCmdQ->getRefInternalCount()); mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); // new virtual event increments refCount EXPECT_EQ(3, mockCmdQ->getRefInternalCount()); userEvent.setStatus(CL_COMPLETE); // UserEvent is set to complete and event tree is unblocked, queue has only 1 refference to itself after this operation EXPECT_EQ(2, mockCmdQ->getRefInternalCount()); //this call will release the queue releaseQueue(mockCmdQ, retVal); } HWTEST_F(CommandQueueHwRefCountTest, givenBlockedCmdQWithOutputEventAsVirtualEventWhenNewBlockedEnqueueReplacesVirtualEventCreatedFromOutputEventThenPreviousVirtualEventDoesntDecrementRefCount) { cl_int retVal = 0; auto mockCmdQ = new MockCommandQueueHw(context, pClDevice, 0); UserEvent userEvent(context); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; size_t offset = 0; size_t size = 1; cl_event eventOut = nullptr; cl_event blockedEvent = &userEvent; EXPECT_EQ(1, mockCmdQ->getRefInternalCount()); mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); // UserEvent on waitlist doesn't increment cmdQ refCount, virtualEvent increments refCount EXPECT_EQ(2, mockCmdQ->getRefInternalCount()); mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, &eventOut); //output event increments EXPECT_EQ(3, mockCmdQ->getRefInternalCount()); mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); // previous virtualEvent which was outputEvent DOES NOT decrement refCount, // new virtual event increments refCount EXPECT_EQ(4, mockCmdQ->getRefInternalCount()); // unblocking deletes 2 virtualEvents userEvent.setStatus(CL_COMPLETE); EXPECT_EQ(3, mockCmdQ->getRefInternalCount()); auto pEventOut = castToObject(eventOut); pEventOut->release(); // releasing output event decrements refCount EXPECT_EQ(2, mockCmdQ->getRefInternalCount()); mockCmdQ->isQueueBlocked(); releaseQueue(mockCmdQ, retVal); } HWTEST_F(CommandQueueHwRefCountTest, givenSeriesOfBlockedEnqueuesWhenEveryEventIsDeletedAndCmdQIsReleasedThenCmdQIsDeleted) { cl_int retVal = 0; auto mockCmdQ = new MockCommandQueueHw(context, pClDevice, 0); UserEvent *userEvent = new UserEvent(context); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; size_t offset = 0; size_t size = 1; cl_event eventOut = nullptr; cl_event blockedEvent = userEvent; EXPECT_EQ(1, mockCmdQ->getRefInternalCount()); mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); // UserEvent on waitlist doesn't increment cmdQ refCount, virtualEvent increments refCount EXPECT_EQ(2, mockCmdQ->getRefInternalCount()); mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, &eventOut); //output event increments refCount EXPECT_EQ(3, mockCmdQ->getRefInternalCount()); mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); // previous virtualEvent which was outputEvent DOES NOT decrement refCount, // new virtual event increments refCount EXPECT_EQ(4, mockCmdQ->getRefInternalCount()); // unblocking deletes 2 virtualEvents userEvent->setStatus(CL_COMPLETE); userEvent->release(); EXPECT_EQ(3, mockCmdQ->getRefInternalCount()); auto pEventOut = castToObject(eventOut); pEventOut->release(); // releasing output event decrements refCount EXPECT_EQ(2, mockCmdQ->getRefInternalCount()); mockCmdQ->isQueueBlocked(); EXPECT_EQ(1, mockCmdQ->getRefInternalCount()); releaseQueue(mockCmdQ, retVal); } HWTEST_F(CommandQueueHwRefCountTest, givenSeriesOfBlockedEnqueuesWhenCmdQIsReleasedBeforeOutputEventThenOutputEventDeletesCmdQ) { cl_int retVal = 0; auto mockCmdQ = new MockCommandQueueHw(context, pClDevice, 0); UserEvent *userEvent = new UserEvent(context); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; size_t offset = 0; size_t size = 1; cl_event eventOut = nullptr; cl_event blockedEvent = userEvent; EXPECT_EQ(1, mockCmdQ->getRefInternalCount()); mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); // UserEvent on waitlist doesn't increment cmdQ refCount, virtualEvent increments refCount EXPECT_EQ(2, mockCmdQ->getRefInternalCount()); mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, &eventOut); //output event increments refCount EXPECT_EQ(3, mockCmdQ->getRefInternalCount()); mockCmdQ->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); // previous virtualEvent which was outputEvent DOES NOT decrement refCount, // new virtual event increments refCount EXPECT_EQ(4, mockCmdQ->getRefInternalCount()); userEvent->setStatus(CL_COMPLETE); userEvent->release(); // releasing UserEvent doesn't change the queue refCount EXPECT_EQ(3, mockCmdQ->getRefInternalCount()); releaseQueue(mockCmdQ, retVal); // releasing cmdQ decrements refCount EXPECT_EQ(1, mockCmdQ->getRefInternalCount()); auto pEventOut = castToObject(eventOut); pEventOut->release(); } HWTEST_F(CommandQueueHwTest, GivenEventThatIsNotCompletedWhenFinishIsCalledAndItGetsCompletedThenItStatusIsUpdatedAfterFinishCall) { cl_int ret; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); struct ClbFuncTempStruct { static void CL_CALLBACK ClbFuncT(cl_event e, cl_int execStatus, void *valueForUpdate) { *((cl_int *)valueForUpdate) = 1; } }; auto Value = 0u; auto ev = new Event(this->pCmdQ, CL_COMMAND_COPY_BUFFER, 3, CompletionStamp::levelNotReady + 1); clSetEventCallback(ev, CL_COMPLETE, ClbFuncTempStruct::ClbFuncT, &Value); auto &csr = this->pCmdQ->getGpgpuCommandStreamReceiver(); EXPECT_GT(3u, csr.peekTaskCount()); *csr.getTagAddress() = CompletionStamp::levelNotReady + 1; ret = clFinish(this->pCmdQ); ASSERT_EQ(CL_SUCCESS, ret); ev->updateExecutionStatus(); EXPECT_EQ(1u, Value); ev->decRefInternal(); } void CloneMdi(MultiDispatchInfo &dst, const MultiDispatchInfo &src) { for (auto &srcDi : src) { dst.push(srcDi); } } struct MockBuilder : BuiltinDispatchInfoBuilder { MockBuilder(NEO::BuiltIns &builtins) : BuiltinDispatchInfoBuilder(builtins) { } bool buildDispatchInfos(MultiDispatchInfo &d, const BuiltinOpParams &conf) const override { wasBuildDispatchInfosWithBuiltinOpParamsCalled = true; paramsReceived.multiDispatchInfo.setBuiltinOpParams(conf); return true; } bool buildDispatchInfos(MultiDispatchInfo &d, Kernel *kernel, const uint32_t dim, const Vec3 &gws, const Vec3 &elws, const Vec3 &offset) const override { paramsReceived.kernel = kernel; paramsReceived.gws = gws; paramsReceived.elws = elws; paramsReceived.offset = offset; wasBuildDispatchInfosWithKernelParamsCalled = true; DispatchInfoBuilder dib; dib.setKernel(paramsToUse.kernel); dib.setDispatchGeometry(dim, paramsToUse.gws, paramsToUse.elws, paramsToUse.offset); dib.bake(d); CloneMdi(paramsReceived.multiDispatchInfo, d); return true; } mutable bool wasBuildDispatchInfosWithBuiltinOpParamsCalled = false; mutable bool wasBuildDispatchInfosWithKernelParamsCalled = false; struct Params { MultiDispatchInfo multiDispatchInfo; Kernel *kernel = nullptr; Vec3 gws = Vec3{0, 0, 0}; Vec3 elws = Vec3{0, 0, 0}; Vec3 offset = Vec3{0, 0, 0}; }; mutable Params paramsReceived; Params paramsToUse; }; struct BuiltinParamsCommandQueueHwTests : public CommandQueueHwTest { void SetUpImpl(EBuiltInOps::Type operation) { auto builtIns = new MockBuiltins(); pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()]->builtins.reset(builtIns); auto swapBuilder = builtIns->setBuiltinDispatchInfoBuilder( operation, *pContext, *pDevice, std::unique_ptr(new MockBuilder(*builtIns))); mockBuilder = static_cast(&BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( operation, *pDevice)); } MockBuilder *mockBuilder; }; HWTEST_F(BuiltinParamsCommandQueueHwTests, givenEnqueueReadWriteBufferCallWhenBuiltinParamsArePassedThenCheckValuesCorectness) { SetUpImpl(EBuiltInOps::CopyBufferToBuffer); BufferDefaults::context = context; auto buffer = clUniquePtr(BufferHelper<>::create()); char array[3 * MemoryConstants::cacheLineSize]; char *ptr = &array[MemoryConstants::cacheLineSize]; ptr = alignUp(ptr, MemoryConstants::cacheLineSize); ptr -= 1; cl_int status = pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 0, ptr, nullptr, 0, 0, nullptr); EXPECT_EQ(CL_SUCCESS, status); void *alignedPtr = alignDown(ptr, 4); size_t ptrOffset = ptrDiff(ptr, alignedPtr); Vec3 offset = {0, 0, 0}; auto builtinParams = mockBuilder->paramsReceived.multiDispatchInfo.peekBuiltinOpParams(); EXPECT_EQ(alignedPtr, builtinParams.dstPtr); EXPECT_EQ(ptrOffset, builtinParams.dstOffset.x); EXPECT_EQ(offset, builtinParams.srcOffset); status = pCmdQ->enqueueWriteBuffer(buffer.get(), CL_FALSE, 0, 0, ptr, nullptr, 0, 0, nullptr); EXPECT_EQ(CL_SUCCESS, status); builtinParams = mockBuilder->paramsReceived.multiDispatchInfo.peekBuiltinOpParams(); EXPECT_EQ(alignedPtr, builtinParams.srcPtr); EXPECT_EQ(ptrOffset, builtinParams.srcOffset.x); EXPECT_EQ(offset, builtinParams.dstOffset); } HWTEST_F(BuiltinParamsCommandQueueHwTests, givenEnqueueWriteImageCallWhenBuiltinParamsArePassedThenCheckValuesCorectness) { SetUpImpl(EBuiltInOps::CopyBufferToImage3d); std::unique_ptr dstImage(ImageHelper>::create(context)); auto imageDesc = dstImage->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, 0}; size_t rowPitch = dstImage->getHostPtrRowPitch(); size_t slicePitch = dstImage->getHostPtrSlicePitch(); char array[3 * MemoryConstants::cacheLineSize]; char *ptr = &array[MemoryConstants::cacheLineSize]; ptr = alignUp(ptr, MemoryConstants::cacheLineSize); ptr -= 1; void *alignedPtr = alignDown(ptr, 4); size_t ptrOffset = ptrDiff(ptr, alignedPtr); Vec3 offset = {0, 0, 0}; cl_int status = pCmdQ->enqueueWriteImage(dstImage.get(), CL_FALSE, origin, region, rowPitch, slicePitch, ptr, nullptr, 0, 0, nullptr); EXPECT_EQ(CL_SUCCESS, status); auto builtinParams = mockBuilder->paramsReceived.multiDispatchInfo.peekBuiltinOpParams(); EXPECT_EQ(alignedPtr, builtinParams.srcPtr); EXPECT_EQ(ptrOffset, builtinParams.srcOffset.x); EXPECT_EQ(offset, builtinParams.dstOffset); } HWTEST_F(BuiltinParamsCommandQueueHwTests, givenEnqueueReadImageCallWhenBuiltinParamsArePassedThenCheckValuesCorectness) { SetUpImpl(EBuiltInOps::CopyImage3dToBuffer); std::unique_ptr dstImage(ImageHelper>::create(context)); auto imageDesc = dstImage->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, 0}; size_t rowPitch = dstImage->getHostPtrRowPitch(); size_t slicePitch = dstImage->getHostPtrSlicePitch(); char array[3 * MemoryConstants::cacheLineSize]; char *ptr = &array[MemoryConstants::cacheLineSize]; ptr = alignUp(ptr, MemoryConstants::cacheLineSize); ptr -= 1; void *alignedPtr = alignDown(ptr, 4); size_t ptrOffset = ptrDiff(ptr, alignedPtr); Vec3 offset = {0, 0, 0}; cl_int status = pCmdQ->enqueueReadImage(dstImage.get(), CL_FALSE, origin, region, rowPitch, slicePitch, ptr, nullptr, 0, 0, nullptr); EXPECT_EQ(CL_SUCCESS, status); auto builtinParams = mockBuilder->paramsReceived.multiDispatchInfo.peekBuiltinOpParams(); EXPECT_EQ(alignedPtr, builtinParams.dstPtr); EXPECT_EQ(ptrOffset, builtinParams.dstOffset.x); EXPECT_EQ(offset, builtinParams.srcOffset); } HWTEST_F(BuiltinParamsCommandQueueHwTests, givenEnqueueReadWriteBufferRectCallWhenBuiltinParamsArePassedThenCheckValuesCorectness) { SetUpImpl(EBuiltInOps::CopyBufferRect); BufferDefaults::context = context; auto buffer = clUniquePtr(BufferHelper<>::create()); size_t bufferOrigin[3] = {0, 0, 0}; size_t hostOrigin[3] = {0, 0, 0}; size_t region[3] = {0, 0, 0}; char array[3 * MemoryConstants::cacheLineSize]; char *ptr = &array[MemoryConstants::cacheLineSize]; ptr = alignUp(ptr, MemoryConstants::cacheLineSize); ptr -= 1; cl_int status = pCmdQ->enqueueReadBufferRect(buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, 0, 0, 0, 0, ptr, 0, 0, nullptr); void *alignedPtr = alignDown(ptr, 4); size_t ptrOffset = ptrDiff(ptr, alignedPtr); Vec3 offset = {0, 0, 0}; auto builtinParams = mockBuilder->paramsReceived.multiDispatchInfo.peekBuiltinOpParams(); EXPECT_EQ(alignedPtr, builtinParams.dstPtr); EXPECT_EQ(ptrOffset, builtinParams.dstOffset.x); EXPECT_EQ(offset, builtinParams.srcOffset); status = pCmdQ->enqueueWriteBufferRect(buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, 0, 0, 0, 0, ptr, 0, 0, nullptr); EXPECT_EQ(CL_SUCCESS, status); builtinParams = mockBuilder->paramsReceived.multiDispatchInfo.peekBuiltinOpParams(); EXPECT_EQ(alignedPtr, builtinParams.srcPtr); EXPECT_EQ(offset, builtinParams.dstOffset); EXPECT_EQ(ptrOffset, builtinParams.srcOffset.x); } HWTEST_F(CommandQueueHwTest, givenCommandQueueThatIsBlockedAndUsesCpuCopyWhenEventIsReturnedItIsNotReady) { CommandQueueHw *cmdQHw = static_cast *>(this->pCmdQ); MockBuffer buffer; cl_event returnEvent = nullptr; auto retVal = CL_SUCCESS; cmdQHw->taskLevel = CompletionStamp::levelNotReady; size_t offset = 0; size_t size = 4096u; TransferProperties transferProperties(&buffer, CL_COMMAND_READ_BUFFER, 0, false, &offset, &size, nullptr, false); EventsRequest eventsRequest(0, nullptr, &returnEvent); cmdQHw->cpuDataTransferHandler(transferProperties, eventsRequest, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CompletionStamp::levelNotReady, castToObject(returnEvent)->peekTaskCount()); clReleaseEvent(returnEvent); } HWTEST_F(CommandQueueHwTest, givenEventWithRecordedCommandWhenSubmitCommandIsCalledThenTaskCountMustBeUpdatedFromOtherThread) { std::atomic_bool go{false}; struct mockEvent : public Event { using Event::Event; using Event::eventWithoutCommand; using Event::submitCommand; void synchronizeTaskCount() override { *atomicFence = true; Event::synchronizeTaskCount(); } uint32_t synchronizeCallCount = 0u; std::atomic_bool *atomicFence = nullptr; }; mockEvent neoEvent(this->pCmdQ, CL_COMMAND_MAP_BUFFER, CompletionStamp::levelNotReady, CompletionStamp::levelNotReady); neoEvent.atomicFence = &go; EXPECT_TRUE(neoEvent.eventWithoutCommand); neoEvent.eventWithoutCommand = false; EXPECT_EQ(CompletionStamp::levelNotReady, neoEvent.peekTaskCount()); std::thread t([&]() { while (!go) ; neoEvent.updateTaskCount(77u); }); neoEvent.submitCommand(false); EXPECT_EQ(77u, neoEvent.peekTaskCount()); t.join(); } HWTEST_F(CommandQueueHwTest, GivenBuiltinKernelWhenBuiltinDispatchInfoBuilderIsProvidedThenThisBuilderIsUsedForCreatingDispatchInfo) { CommandQueueHw *cmdQHw = static_cast *>(this->pCmdQ); MockKernelWithInternals mockKernelToUse(*pClDevice); MockBuilder builder(*pDevice->getBuiltIns()); builder.paramsToUse.gws.x = 11; builder.paramsToUse.elws.x = 13; builder.paramsToUse.offset.x = 17; builder.paramsToUse.kernel = mockKernelToUse.mockKernel; MockKernelWithInternals mockKernelToSend(*pClDevice); mockKernelToSend.kernelInfo.builtinDispatchBuilder = &builder; NullSurface s; Surface *surfaces[] = {&s}; size_t gws[3] = {3, 0, 0}; size_t lws[3] = {5, 0, 0}; size_t off[3] = {7, 0, 0}; EXPECT_FALSE(builder.wasBuildDispatchInfosWithBuiltinOpParamsCalled); EXPECT_FALSE(builder.wasBuildDispatchInfosWithKernelParamsCalled); cmdQHw->template enqueueHandler(surfaces, false, mockKernelToSend.mockKernel, 1, off, gws, lws, lws, 0, nullptr, nullptr); EXPECT_FALSE(builder.wasBuildDispatchInfosWithBuiltinOpParamsCalled); EXPECT_TRUE(builder.wasBuildDispatchInfosWithKernelParamsCalled); EXPECT_EQ(Vec3(gws[0], gws[1], gws[2]), builder.paramsReceived.gws); EXPECT_EQ(Vec3(lws[0], lws[1], lws[2]), builder.paramsReceived.elws); EXPECT_EQ(Vec3(off[0], off[1], off[2]), builder.paramsReceived.offset); EXPECT_EQ(mockKernelToSend.mockKernel, builder.paramsReceived.kernel); auto dispatchInfo = builder.paramsReceived.multiDispatchInfo.begin(); EXPECT_EQ(1U, builder.paramsReceived.multiDispatchInfo.size()); EXPECT_EQ(builder.paramsToUse.gws.x, dispatchInfo->getGWS().x); EXPECT_EQ(builder.paramsToUse.elws.x, dispatchInfo->getEnqueuedWorkgroupSize().x); EXPECT_EQ(builder.paramsToUse.offset.x, dispatchInfo->getOffset().x); EXPECT_EQ(builder.paramsToUse.kernel, dispatchInfo->getKernel()); } HWTEST_F(CommandQueueHwTest, givenNonBlockedEnqueueWhenEventIsPassedThenUpdateItsFlushStamp) { CommandQueueHw *cmdQHw = static_cast *>(this->pCmdQ); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.flushStamp->setStamp(5); size_t offset = 0; size_t size = 1; cl_event event; auto retVal = cmdQHw->enqueueKernel(mockKernelWithInternals.mockKernel, 1, &offset, &size, nullptr, 0, nullptr, &event); ASSERT_EQ(CL_SUCCESS, retVal); auto eventObj = castToObject(event); EXPECT_EQ(csr.flushStamp->peekStamp(), eventObj->flushStamp->peekStamp()); EXPECT_EQ(csr.flushStamp->peekStamp(), pCmdQ->flushStamp->peekStamp()); eventObj->release(); } HWTEST_F(CommandQueueHwTest, givenBlockedEnqueueWhenEventIsPassedThenDontUpdateItsFlushStamp) { UserEvent userEvent; cl_event event, clUserEvent; CommandQueueHw *cmdQHw = static_cast *>(this->pCmdQ); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.flushStamp->setStamp(5); size_t offset = 0; size_t size = 1; clUserEvent = &userEvent; auto retVal = cmdQHw->enqueueKernel(mockKernelWithInternals.mockKernel, 1, &offset, &size, nullptr, 1, &clUserEvent, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(cmdQHw->isQueueBlocked()); retVal = cmdQHw->enqueueKernel(mockKernelWithInternals.mockKernel, 1, &offset, &size, nullptr, 0, nullptr, &event); ASSERT_EQ(CL_SUCCESS, retVal); FlushStamp expectedFlushStamp = 0; auto eventObj = castToObject(event); EXPECT_EQ(expectedFlushStamp, eventObj->flushStamp->peekStamp()); EXPECT_EQ(expectedFlushStamp, pCmdQ->flushStamp->peekStamp()); eventObj->release(); } HWTEST_F(CommandQueueHwTest, givenBlockedInOrderCmdQueueAndAsynchronouslyCompletedEventWhenEnqueueCompletesVirtualEventThenUpdatedTaskLevelIsPassedToEnqueueAndFlushTask) { CommandQueueHw *cmdQHw = static_cast *>(this->pCmdQ); int32_t executionStamp = 0; auto mockCSR = new MockCsr(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCSR); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; size_t offset = 0; size_t size = 1; auto event = new Event(cmdQHw, CL_COMMAND_NDRANGE_KERNEL, 10, 0); uint32_t virtualEventTaskLevel = 77; uint32_t virtualEventTaskCount = 80; auto virtualEvent = new Event(cmdQHw, CL_COMMAND_NDRANGE_KERNEL, virtualEventTaskLevel, virtualEventTaskCount); cl_event blockedEvent = event; // Put Queue in blocked state by assigning virtualEvent event->addChild(*virtualEvent); virtualEvent->incRefInternal(); cmdQHw->virtualEvent = virtualEvent; *mockCSR->getTagAddress() = 0u; cmdQHw->taskLevel = 23; cmdQHw->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); //new virtual event is created on enqueue, bind it to the created virtual event EXPECT_NE(cmdQHw->virtualEvent, virtualEvent); EXPECT_EQ(virtualEvent->peekExecutionStatus(), CL_QUEUED); event->setStatus(CL_SUBMITTED); EXPECT_EQ(virtualEvent->peekExecutionStatus(), CL_SUBMITTED); EXPECT_FALSE(cmdQHw->isQueueBlocked()); // +1 for next level after virtualEvent is unblocked // +1 as virtualEvent was a parent for event with actual command that is being submitted EXPECT_EQ(virtualEventTaskLevel + 2, cmdQHw->taskLevel); //command being submitted was dependant only on virtual event hence only +1 EXPECT_EQ(virtualEventTaskLevel + 1, mockCSR->lastTaskLevelToFlushTask); *mockCSR->getTagAddress() = initialHardwareTag; virtualEvent->decRefInternal(); event->decRefInternal(); } HWTEST_F(CommandQueueHwTest, givenBlockedOutOfOrderQueueWhenUserEventIsSubmittedThenNDREventIsSubmittedAsWell) { CommandQueueHw *cmdQHw = static_cast *>(this->pCmdQ); auto &mockCsr = pDevice->getUltCommandStreamReceiver(); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; size_t offset = 0; size_t size = 1; cl_event userEvent = clCreateUserEvent(this->pContext, nullptr); cl_event blockedEvent = nullptr; *mockCsr.getTagAddress() = 0u; cmdQHw->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &userEvent, &blockedEvent); auto neoEvent = castToObject(blockedEvent); EXPECT_EQ(neoEvent->peekExecutionStatus(), CL_QUEUED); neoEvent->updateExecutionStatus(); EXPECT_EQ(neoEvent->peekExecutionStatus(), CL_QUEUED); EXPECT_EQ(neoEvent->peekTaskCount(), CompletionStamp::levelNotReady); clSetUserEventStatus(userEvent, 0u); EXPECT_EQ(neoEvent->peekExecutionStatus(), CL_SUBMITTED); EXPECT_EQ(neoEvent->peekTaskCount(), 1u); *mockCsr.getTagAddress() = initialHardwareTag; clReleaseEvent(blockedEvent); clReleaseEvent(userEvent); } HWTEST_F(OOQueueHwTest, givenBlockedOutOfOrderCmdQueueAndAsynchronouslyCompletedEventWhenEnqueueCompletesVirtualEventThenUpdatedTaskLevelIsPassedToEnqueueAndFlushTask) { CommandQueueHw *cmdQHw = static_cast *>(this->pCmdQ); int32_t executionStamp = 0; auto mockCSR = new MockCsr(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCSR); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; size_t offset = 0; size_t size = 1; class MockEventWithSetCompleteOnUpdate : public Event { public: MockEventWithSetCompleteOnUpdate(CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount) : Event(cmdQueue, cmdType, taskLevel, taskCount) { } void updateExecutionStatus() override { setStatus(CL_COMPLETE); } }; Event event(cmdQHw, CL_COMMAND_NDRANGE_KERNEL, 10, 0); uint32_t virtualEventTaskLevel = 77; uint32_t virtualEventTaskCount = 80; MockEventWithSetCompleteOnUpdate virtualEvent(cmdQHw, CL_COMMAND_NDRANGE_KERNEL, virtualEventTaskLevel, virtualEventTaskCount); cl_event blockedEvent = &event; // Put Queue in blocked state by assigning virtualEvent virtualEvent.incRefInternal(); event.addChild(virtualEvent); cmdQHw->virtualEvent = &virtualEvent; cmdQHw->taskLevel = 23; cmdQHw->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); //new virtual event is created on enqueue, bind it to the created virtual event EXPECT_NE(cmdQHw->virtualEvent, &virtualEvent); event.setStatus(CL_SUBMITTED); virtualEvent.Event::updateExecutionStatus(); EXPECT_FALSE(cmdQHw->isQueueBlocked()); //+1 due to dependency between virtual event & new virtual event //new virtual event is actually responsible for command delivery EXPECT_EQ(virtualEventTaskLevel + 1, cmdQHw->taskLevel); EXPECT_EQ(virtualEventTaskLevel + 1, mockCSR->lastTaskLevelToFlushTask); } HWTEST_F(CommandQueueHwTest, givenWalkerSplitEnqueueNDRangeWhenNoBlockedThenKernelMakeResidentCalledOnce) { KernelInfo kernelInfo; MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; auto mockProgram = mockKernelWithInternals.mockProgram; mockProgram->setAllowNonUniform(true); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.storeMakeResidentAllocations = true; size_t offset = 0; size_t gws = 63; size_t lws = 16; cl_int status = pCmdQ->enqueueKernel(mockKernel, 1, &offset, &gws, &lws, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, status); EXPECT_EQ(1u, mockKernel->makeResidentCalls); } HWTEST_F(CommandQueueHwTest, givenWalkerSplitEnqueueNDRangeWhenBlockedThenKernelGetResidencyCalledOnce) { UserEvent userEvent(context); KernelInfo kernelInfo; MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; auto mockProgram = mockKernelWithInternals.mockProgram; mockProgram->setAllowNonUniform(true); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.storeMakeResidentAllocations = true; size_t offset = 0; size_t gws = 63; size_t lws = 16; cl_event blockedEvent = &userEvent; cl_int status = pCmdQ->enqueueKernel(mockKernel, 1, &offset, &gws, &lws, 1, &blockedEvent, nullptr); EXPECT_EQ(CL_SUCCESS, status); EXPECT_EQ(1u, mockKernel->getResidencyCalls); userEvent.setStatus(CL_COMPLETE); pCmdQ->isQueueBlocked(); } HWTEST_F(CommandQueueHwTest, givenKernelSplitEnqueueReadBufferWhenBlockedThenEnqueueSurfacesMakeResidentIsCalledOnce) { UserEvent userEvent(context); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.storeMakeResidentAllocations = true; csr.timestampPacketWriteEnabled = false; BufferDefaults::context = context; auto buffer = clUniquePtr(BufferHelper<>::create()); GraphicsAllocation *bufferAllocation = buffer->getGraphicsAllocation(); char array[3 * MemoryConstants::cacheLineSize]; char *ptr = &array[MemoryConstants::cacheLineSize]; ptr = alignUp(ptr, MemoryConstants::cacheLineSize); ptr -= 1; cl_event blockedEvent = &userEvent; cl_int status = pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 1, &blockedEvent, nullptr); EXPECT_EQ(CL_SUCCESS, status); userEvent.setStatus(CL_COMPLETE); std::map::iterator it = csr.makeResidentAllocations.begin(); for (; it != csr.makeResidentAllocations.end(); it++) { uint32_t expected = 1u; //Buffer surface will be added three times (for each kernel from split and as a base range of enqueueReadBuffer call) if (it->first == bufferAllocation) { expected = 3u; } EXPECT_EQ(expected, it->second); } pCmdQ->isQueueBlocked(); } HWTEST_F(CommandQueueHwTest, givenDefaultHwCommandQueueThenCacheFlushAfterWalkerIsNotNeeded) { EXPECT_FALSE(pCmdQ->getRequiresCacheFlushAfterWalker()); } HWTEST_F(CommandQueueHwTest, givenSizeWhenForceStatelessIsCalledThenCorrectValueIsReturned) { if (is32bit) { GTEST_SKIP(); } struct MockCommandQueueHw : public CommandQueueHw { using CommandQueueHw::forceStateless; }; MockCommandQueueHw *pCmdQHw = reinterpret_cast(pCmdQ); uint64_t bigSize = 4ull * MemoryConstants::gigaByte; EXPECT_TRUE(pCmdQHw->forceStateless(static_cast(bigSize))); uint64_t smallSize = bigSize - 1; EXPECT_FALSE(pCmdQHw->forceStateless(static_cast(smallSize))); } class MockCommandStreamReceiverWithFailingFlushBatchedSubmission : public MockCommandStreamReceiver { public: using MockCommandStreamReceiver::MockCommandStreamReceiver; bool flushBatchedSubmissions() override { return false; } }; template struct MockCommandQueueHwWithOverwrittenCsr : public CommandQueueHw { using CommandQueueHw::CommandQueueHw; MockCommandStreamReceiverWithFailingFlushBatchedSubmission *csr; CommandStreamReceiver &getGpgpuCommandStreamReceiver() const override { return *csr; } }; HWTEST_F(CommandQueueHwTest, givenFlushWhenFlushBatchedSubmissionsFailsThenErrorIsRetured) { MockCommandQueueHwWithOverwrittenCsr cmdQueue(context, device, nullptr, false); MockCommandStreamReceiverWithFailingFlushBatchedSubmission csr(*pDevice->executionEnvironment, 0); cmdQueue.csr = &csr; cl_int errorCode = cmdQueue.flush(); EXPECT_EQ(CL_OUT_OF_RESOURCES, errorCode); } HWTEST_F(CommandQueueHwTest, givenFinishWhenFlushBatchedSubmissionsFailsThenErrorIsRetured) { MockCommandQueueHwWithOverwrittenCsr cmdQueue(context, device, nullptr, false); MockCommandStreamReceiverWithFailingFlushBatchedSubmission csr(*pDevice->executionEnvironment, 0); cmdQueue.csr = &csr; cl_int errorCode = cmdQueue.finish(); EXPECT_EQ(CL_OUT_OF_RESOURCES, errorCode); } HWTEST_F(CommandQueueHwTest, givenEmptyDispatchGlobalsArgsWhenEnqueueInitDispatchGlobalsCalledThenErrorIsReturned) { EXPECT_EQ(CL_INVALID_VALUE, pCmdQ->enqueueInitDispatchGlobals(nullptr, 0, nullptr, nullptr)); } HWTEST_F(CommandQueueHwTest, WhenForcePerDssBackedBufferProgrammingSetThenDispatchFlagsAreSetAccordingly) { DebugManagerStateRestore restore; DebugManager.flags.ForcePerDssBackedBufferProgramming = true; MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto mockKernel = mockKernelWithInternals.mockKernel; auto &csr = pDevice->getUltCommandStreamReceiver(); size_t offset = 0; size_t gws = 64; size_t lws = 16; cl_int status = pCmdQ->enqueueKernel(mockKernel, 1, &offset, &gws, &lws, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, status); EXPECT_TRUE(csr.recordedDispatchFlags.usePerDssBackedBuffer); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/command_queue_tests.cpp000066400000000000000000001345001363734646600312270ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/event/event.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_stream/command_stream_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" #include "gmock/gmock.h" #include "gtest/gtest.h" using namespace NEO; struct CommandQueueMemoryDevice : public MemoryManagementFixture, public DeviceFixture { void SetUp() override { MemoryManagementFixture::SetUp(); DeviceFixture::SetUp(); } void TearDown() override { DeviceFixture::TearDown(); platformsImpl.clear(); MemoryManagementFixture::TearDown(); } }; struct CommandQueueTest : public CommandQueueMemoryDevice, public ContextFixture, public CommandQueueFixture, ::testing::TestWithParam { using CommandQueueFixture::SetUp; using ContextFixture::SetUp; CommandQueueTest() { } void SetUp() override { CommandQueueMemoryDevice::SetUp(); properties = GetParam(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); CommandQueueFixture::SetUp(pContext, pClDevice, properties); } void TearDown() override { CommandQueueFixture::TearDown(); ContextFixture::TearDown(); CommandQueueMemoryDevice::TearDown(); } cl_command_queue_properties properties; const HardwareInfo *pHwInfo = nullptr; }; TEST_P(CommandQueueTest, GivenNonFailingAllocationWhenCreatingCommandQueueThenCommandQueueIsCreated) { InjectedFunction method = [this](size_t failureIndex) { auto retVal = CL_INVALID_VALUE; auto pCmdQ = CommandQueue::create( pContext, pClDevice, nullptr, false, retVal); if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, pCmdQ); } else { EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal) << "for allocation " << failureIndex; EXPECT_EQ(nullptr, pCmdQ); } delete pCmdQ; }; injectFailures(method); } INSTANTIATE_TEST_CASE_P(CommandQueue, CommandQueueTest, ::testing::ValuesIn(AllCommandQueueProperties)); TEST(CommandQueue, WhenConstructingCommandQueueThenTaskLevelAndTaskCountAreZero) { MockCommandQueue cmdQ(nullptr, nullptr, 0); EXPECT_EQ(0u, cmdQ.taskLevel); EXPECT_EQ(0u, cmdQ.taskCount); } struct GetTagTest : public DeviceFixture, public CommandQueueFixture, public CommandStreamFixture, public ::testing::Test { using CommandQueueFixture::SetUp; void SetUp() override { DeviceFixture::SetUp(); CommandQueueFixture::SetUp(nullptr, pClDevice, 0); CommandStreamFixture::SetUp(pCmdQ); } void TearDown() override { CommandStreamFixture::TearDown(); CommandQueueFixture::TearDown(); DeviceFixture::TearDown(); } }; TEST_F(GetTagTest, GivenSetHwTagWhenGettingHwTagThenCorrectTagIsReturned) { uint32_t tagValue = 0xdeadbeef; *pTagMemory = tagValue; EXPECT_EQ(tagValue, pCmdQ->getHwTag()); } TEST_F(GetTagTest, GivenInitialValueWhenGettingHwTagThenCorrectTagIsReturned) { MockContext context; MockCommandQueue commandQueue(&context, pClDevice, 0); EXPECT_EQ(initialHardwareTag, commandQueue.getHwTag()); } TEST(CommandQueue, GivenUpdatedCompletionStampWhenGettingCompletionStampThenUpdatedValueIsReturned) { MockContext context; MockCommandQueue cmdQ(&context, nullptr, 0); CompletionStamp cs = { cmdQ.taskCount + 100, cmdQ.taskLevel + 50, 5}; cmdQ.updateFromCompletionStamp(cs); EXPECT_EQ(cs.taskLevel, cmdQ.taskLevel); EXPECT_EQ(cs.taskCount, cmdQ.taskCount); EXPECT_EQ(cs.flushStamp, cmdQ.flushStamp->peekStamp()); } TEST(CommandQueue, givenTimeStampWithTaskCountNotReadyStatusWhenupdateFromCompletionStampIsBeingCalledThenQueueTaskCountIsNotUpdated) { MockContext context; MockCommandQueue cmdQ(&context, nullptr, 0); cmdQ.taskCount = 1u; CompletionStamp cs = { CompletionStamp::levelNotReady, 0, 0}; cmdQ.updateFromCompletionStamp(cs); EXPECT_EQ(1u, cmdQ.taskCount); } TEST(CommandQueue, GivenOOQwhenUpdateFromCompletionStampWithTrueIsCalledThenTaskLevelIsUpdated) { MockContext context; const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; MockCommandQueue cmdQ(&context, nullptr, props); auto oldTL = cmdQ.taskLevel; CompletionStamp cs = { cmdQ.taskCount + 100, cmdQ.taskLevel + 50, 5}; cmdQ.updateFromCompletionStamp(cs); EXPECT_NE(oldTL, cmdQ.taskLevel); EXPECT_EQ(oldTL + 50, cmdQ.taskLevel); EXPECT_EQ(cs.taskCount, cmdQ.taskCount); EXPECT_EQ(cs.flushStamp, cmdQ.flushStamp->peekStamp()); } TEST(CommandQueue, givenDeviceWhenCreatingCommandQueueThenPickCsrFromDefaultEngine) { auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockCommandQueue cmdQ(nullptr, mockDevice.get(), 0); auto defaultCsr = mockDevice->getDefaultEngine().commandStreamReceiver; EXPECT_EQ(defaultCsr, &cmdQ.getGpgpuCommandStreamReceiver()); } TEST(CommandQueue, givenDeviceNotSupportingBlitOperationsWhenQueueIsCreatedThenDontRegisterBcsCsr) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = false; auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); MockCommandQueue cmdQ(nullptr, mockDevice.get(), 0); EXPECT_EQ(nullptr, cmdQ.getBcsCommandStreamReceiver()); } using CommandQueueWithSubDevicesTest = ::testing::Test; HWTEST_F(CommandQueueWithSubDevicesTest, givenDeviceWithSubDevicesSupportingBlitOperationsWhenQueueIsCreatedThenBcsIsTakenFromFirstSubDevice) { DebugManagerStateRestore restorer; VariableBackup mockDeviceFlagBackup{&MockDevice::createSingleDevice, false}; DebugManager.flags.CreateMultipleSubDevices.set(2); DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(1); HardwareInfo hwInfo = *defaultHwInfo; bool createBcsEngine = !hwInfo.capabilityTable.blitterOperationsSupported; hwInfo.capabilityTable.blitterOperationsSupported = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); EXPECT_EQ(2u, device->getNumAvailableDevices()); std::unique_ptr bcsOsContext; auto subDevice = device->getDeviceById(0); if (createBcsEngine) { auto &engine = subDevice->getEngine(HwHelperHw::lowPriorityEngineType, true); bcsOsContext.reset(OsContext::create(nullptr, 1, 0, aub_stream::ENGINE_BCS, PreemptionMode::Disabled, false, false, false)); engine.osContext = bcsOsContext.get(); engine.commandStreamReceiver->setupContext(*bcsOsContext); } auto bcsEngine = subDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, false); MockCommandQueue cmdQ(nullptr, device.get(), 0); EXPECT_NE(nullptr, cmdQ.getBcsCommandStreamReceiver()); EXPECT_EQ(bcsEngine.commandStreamReceiver, cmdQ.getBcsCommandStreamReceiver()); EXPECT_EQ(bcsEngine.osContext, &cmdQ.getBcsCommandStreamReceiver()->getOsContext()); } TEST(CommandQueue, givenCmdQueueBlockedByReadyVirtualEventWhenUnblockingThenUpdateFlushTaskFromEvent) { auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto context = new MockContext; auto cmdQ = new MockCommandQueue(context, mockDevice.get(), 0); auto userEvent = new Event(cmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); userEvent->setStatus(CL_COMPLETE); userEvent->flushStamp->setStamp(5); userEvent->incRefInternal(); FlushStamp expectedFlushStamp = 0; EXPECT_EQ(expectedFlushStamp, cmdQ->flushStamp->peekStamp()); cmdQ->virtualEvent = userEvent; EXPECT_FALSE(cmdQ->isQueueBlocked()); EXPECT_EQ(userEvent->flushStamp->peekStamp(), cmdQ->flushStamp->peekStamp()); userEvent->decRefInternal(); cmdQ->decRefInternal(); context->decRefInternal(); } TEST(CommandQueue, givenCmdQueueBlockedByAbortedVirtualEventWhenUnblockingThenUpdateFlushTaskFromEvent) { auto context = new MockContext; auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto cmdQ = new MockCommandQueue(context, mockDevice.get(), 0); auto userEvent = new Event(cmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); userEvent->setStatus(-1); userEvent->flushStamp->setStamp(5); FlushStamp expectedFlushStamp = 0; EXPECT_EQ(expectedFlushStamp, cmdQ->flushStamp->peekStamp()); userEvent->incRefInternal(); cmdQ->virtualEvent = userEvent; EXPECT_FALSE(cmdQ->isQueueBlocked()); EXPECT_EQ(expectedFlushStamp, cmdQ->flushStamp->peekStamp()); userEvent->decRefInternal(); cmdQ->decRefInternal(); context->decRefInternal(); } struct CommandQueueCommandStreamTest : public CommandQueueMemoryDevice, public ::testing::Test { void SetUp() override { CommandQueueMemoryDevice::SetUp(); context.reset(new MockContext(pClDevice)); } void TearDown() override { context.reset(); CommandQueueMemoryDevice::TearDown(); } std::unique_ptr context; }; HWTEST_F(CommandQueueCommandStreamTest, givenCommandQueueThatWaitsOnAbortedUserEventWhenIsQueueBlockedIsCalledThenTaskLevelAlignsToCsr) { MockContext context; auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockCommandQueue cmdQ(&context, mockDevice.get(), 0); auto &commandStreamReceiver = mockDevice->getUltCommandStreamReceiver(); commandStreamReceiver.taskLevel = 100u; Event userEvent(&cmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); userEvent.setStatus(-1); userEvent.incRefInternal(); cmdQ.virtualEvent = &userEvent; EXPECT_FALSE(cmdQ.isQueueBlocked()); EXPECT_EQ(100u, cmdQ.taskLevel); } TEST_F(CommandQueueCommandStreamTest, GivenValidCommandQueueWhenGettingCommandStreamThenValidObjectIsReturned) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue commandQueue(context.get(), pClDevice, props); auto &cs = commandQueue.getCS(1024); EXPECT_NE(nullptr, &cs); } TEST_F(CommandQueueCommandStreamTest, GivenValidCommandStreamWhenGettingGraphicsAllocationThenMaxAvailableSpaceAndUnderlyingBufferSizeAreCorrect) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue commandQueue(context.get(), pClDevice, props); size_t minSizeRequested = 20; auto &cs = commandQueue.getCS(minSizeRequested); ASSERT_NE(nullptr, &cs); auto *allocation = cs.getGraphicsAllocation(); ASSERT_NE(nullptr, &allocation); size_t expectedCsSize = alignUp(minSizeRequested + CSRequirements::minCommandQueueCommandStreamSize + CSRequirements::csOverfetchSize, MemoryConstants::pageSize64k) - CSRequirements::minCommandQueueCommandStreamSize - CSRequirements::csOverfetchSize; EXPECT_EQ(expectedCsSize, cs.getMaxAvailableSpace()); size_t expectedTotalSize = alignUp(minSizeRequested + CSRequirements::minCommandQueueCommandStreamSize + CSRequirements::csOverfetchSize, MemoryConstants::pageSize64k); EXPECT_EQ(expectedTotalSize, allocation->getUnderlyingBufferSize()); } TEST_F(CommandQueueCommandStreamTest, GivenRequiredSizeWhenGettingCommandStreamThenMaxAvailableSpaceIsEqualOrGreaterThanRequiredSize) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue commandQueue(context.get(), pClDevice, props); size_t requiredSize = 16384; const auto &commandStream = commandQueue.getCS(requiredSize); ASSERT_NE(nullptr, &commandStream); EXPECT_GE(commandStream.getMaxAvailableSpace(), requiredSize); } TEST_F(CommandQueueCommandStreamTest, WhenGettingCommandStreamWithNewSizeThenMaxAvailableSpaceIsEqualOrGreaterThanNewSize) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue commandQueue(context.get(), pClDevice, props); auto &commandStreamInitial = commandQueue.getCS(1024); size_t requiredSize = commandStreamInitial.getMaxAvailableSpace() + 42; const auto &commandStream = commandQueue.getCS(requiredSize); ASSERT_NE(nullptr, &commandStream); EXPECT_GE(commandStream.getMaxAvailableSpace(), requiredSize); } TEST_F(CommandQueueCommandStreamTest, givenCommandStreamReceiverWithReusableAllocationsWhenAskedForCommandStreamThenReturnsAllocationFromReusablePool) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props); auto memoryManager = pDevice->getMemoryManager(); size_t requiredSize = alignUp(100 + CSRequirements::minCommandQueueCommandStreamSize + CSRequirements::csOverfetchSize, MemoryConstants::pageSize64k); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), requiredSize, GraphicsAllocation::AllocationType::COMMAND_BUFFER}); auto &commandStreamReceiver = cmdQ.getGpgpuCommandStreamReceiver(); commandStreamReceiver.getInternalAllocationStorage()->storeAllocation(std::unique_ptr(allocation), REUSABLE_ALLOCATION); EXPECT_FALSE(commandStreamReceiver.getAllocationsForReuse().peekIsEmpty()); EXPECT_TRUE(commandStreamReceiver.getAllocationsForReuse().peekContains(*allocation)); const auto &indirectHeap = cmdQ.getCS(100); EXPECT_EQ(indirectHeap.getGraphicsAllocation(), allocation); EXPECT_TRUE(commandStreamReceiver.getAllocationsForReuse().peekIsEmpty()); } TEST_F(CommandQueueCommandStreamTest, givenCommandQueueWhenItIsDestroyedThenCommandStreamIsPutOnTheReusabeList) { auto cmdQ = new MockCommandQueue(context.get(), pClDevice, 0); const auto &commandStream = cmdQ->getCS(100); auto graphicsAllocation = commandStream.getGraphicsAllocation(); EXPECT_TRUE(pDevice->getDefaultEngine().commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); //now destroy command queue, heap should go to reusable list delete cmdQ; EXPECT_FALSE(pDevice->getDefaultEngine().commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); EXPECT_TRUE(pDevice->getDefaultEngine().commandStreamReceiver->getAllocationsForReuse().peekContains(*graphicsAllocation)); } TEST_F(CommandQueueCommandStreamTest, WhenAskedForNewCommandStreamThenOldHeapIsStoredForReuse) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props); EXPECT_TRUE(pDevice->getDefaultEngine().commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); const auto &indirectHeap = cmdQ.getCS(100); EXPECT_TRUE(pDevice->getDefaultEngine().commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); auto graphicsAllocation = indirectHeap.getGraphicsAllocation(); cmdQ.getCS(indirectHeap.getAvailableSpace() + 100); EXPECT_FALSE(pDevice->getDefaultEngine().commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); EXPECT_TRUE(pDevice->getDefaultEngine().commandStreamReceiver->getAllocationsForReuse().peekContains(*graphicsAllocation)); } TEST_F(CommandQueueCommandStreamTest, givenCommandQueueWhenGetCSIsCalledThenCommandStreamAllocationTypeShouldBeSetToCommandBuffer) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props); const auto &commandStream = cmdQ.getCS(100); auto commandStreamAllocation = commandStream.getGraphicsAllocation(); ASSERT_NE(nullptr, commandStreamAllocation); EXPECT_EQ(GraphicsAllocation::AllocationType::COMMAND_BUFFER, commandStreamAllocation->getAllocationType()); } HWTEST_F(CommandQueueCommandStreamTest, givenMultiDispatchInfoWithSingleKernelWithFlushAllocationsDisabledWhenEstimatingNodesCountThenItEqualsMultiDispatchInfoSize) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(0); MockCommandQueueHw cmdQ(context.get(), pClDevice, nullptr); pDevice->getUltCommandStreamReceiver().multiOsContextCapable = true; MockKernelWithInternals mockKernelWithInternals(*pClDevice, context.get()); mockKernelWithInternals.mockKernel->kernelArgRequiresCacheFlush.resize(1); MockGraphicsAllocation cacheRequiringAllocation; mockKernelWithInternals.mockKernel->kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; MockMultiDispatchInfo multiDispatchInfo(std::vector({mockKernelWithInternals.mockKernel})); size_t estimatedNodesCount = cmdQ.estimateTimestampPacketNodesCount(multiDispatchInfo); EXPECT_EQ(estimatedNodesCount, multiDispatchInfo.size()); } HWTEST_F(CommandQueueCommandStreamTest, givenMultiDispatchInfoWithSingleKernelWithFlushAllocationsEnabledWhenEstimatingNodesCountThenItEqualsMultiDispatchInfoSizePlusOne) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); MockCommandQueueHw cmdQ(context.get(), pClDevice, nullptr); MockKernelWithInternals mockKernelWithInternals(*pClDevice, context.get()); mockKernelWithInternals.mockKernel->kernelArgRequiresCacheFlush.resize(1); MockGraphicsAllocation cacheRequiringAllocation; mockKernelWithInternals.mockKernel->kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; MockMultiDispatchInfo multiDispatchInfo(std::vector({mockKernelWithInternals.mockKernel})); size_t estimatedNodesCount = cmdQ.estimateTimestampPacketNodesCount(multiDispatchInfo); EXPECT_EQ(estimatedNodesCount, multiDispatchInfo.size() + 1); } struct CommandQueueIndirectHeapTest : public CommandQueueMemoryDevice, public ::testing::TestWithParam { void SetUp() override { CommandQueueMemoryDevice::SetUp(); context.reset(new MockContext(pClDevice)); } void TearDown() override { context.reset(); CommandQueueMemoryDevice::TearDown(); } std::unique_ptr context; }; TEST_P(CommandQueueIndirectHeapTest, WhenGettingIndirectHeapThenValidObjectIsReturned) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props); auto &indirectHeap = cmdQ.getIndirectHeap(this->GetParam(), 8192); EXPECT_NE(nullptr, &indirectHeap); } TEST_P(CommandQueueIndirectHeapTest, givenIndirectObjectHeapWhenItIsQueriedForInternalAllocationThenTrueIsReturned) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props); auto &indirectHeap = cmdQ.getIndirectHeap(this->GetParam(), 8192); if (this->GetParam() == IndirectHeap::INDIRECT_OBJECT) { EXPECT_TRUE(indirectHeap.getGraphicsAllocation()->is32BitAllocation()); } else { EXPECT_FALSE(indirectHeap.getGraphicsAllocation()->is32BitAllocation()); } } HWTEST_P(CommandQueueIndirectHeapTest, GivenIndirectHeapWhenGettingAvailableSpaceThenCorrectSizeIsReturned) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props); auto &indirectHeap = cmdQ.getIndirectHeap(this->GetParam(), sizeof(uint32_t)); if (this->GetParam() == IndirectHeap::SURFACE_STATE) { size_t expectedSshUse = cmdQ.getGpgpuCommandStreamReceiver().defaultSshSize - MemoryConstants::pageSize - UnitTestHelper::getDefaultSshUsage(); EXPECT_EQ(expectedSshUse, indirectHeap.getAvailableSpace()); } else { EXPECT_EQ(64 * KB, indirectHeap.getAvailableSpace()); } } TEST_P(CommandQueueIndirectHeapTest, GivenRequiredSizeWhenGettingIndirectHeapThenIndirectHeapHasRequiredSize) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props); size_t requiredSize = 16384; const auto &indirectHeap = cmdQ.getIndirectHeap(this->GetParam(), requiredSize); ASSERT_NE(nullptr, &indirectHeap); EXPECT_GE(indirectHeap.getMaxAvailableSpace(), requiredSize); } TEST_P(CommandQueueIndirectHeapTest, WhenGettingIndirectHeapWithNewSizeThenMaxAvailableSpaceIsEqualOrGreaterThanNewSize) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props); auto &indirectHeapInitial = cmdQ.getIndirectHeap(this->GetParam(), 10); size_t requiredSize = indirectHeapInitial.getMaxAvailableSpace() + 42; const auto &indirectHeap = cmdQ.getIndirectHeap(this->GetParam(), requiredSize); ASSERT_NE(nullptr, &indirectHeap); if (this->GetParam() == IndirectHeap::SURFACE_STATE) { //no matter what SSH is always capped EXPECT_EQ(cmdQ.getGpgpuCommandStreamReceiver().defaultSshSize - MemoryConstants::pageSize, indirectHeap.getMaxAvailableSpace()); } else { EXPECT_LE(requiredSize, indirectHeap.getMaxAvailableSpace()); } } TEST_P(CommandQueueIndirectHeapTest, WhenGettingIndirectHeapThenSizeIsAlignedToCacheLine) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props); size_t minHeapSize = 64 * KB; auto &indirectHeapInitial = cmdQ.getIndirectHeap(this->GetParam(), 2 * minHeapSize + 1); EXPECT_TRUE(isAligned(indirectHeapInitial.getAvailableSpace())); indirectHeapInitial.getSpace(indirectHeapInitial.getAvailableSpace()); // use whole space to force obtain reusable const auto &indirectHeap = cmdQ.getIndirectHeap(this->GetParam(), minHeapSize + 1); ASSERT_NE(nullptr, &indirectHeap); EXPECT_TRUE(isAligned(indirectHeap.getAvailableSpace())); } TEST_P(CommandQueueIndirectHeapTest, givenCommandStreamReceiverWithReusableAllocationsWhenAskedForHeapAllocationThenAllocationFromReusablePoolIsReturned) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props); auto memoryManager = pDevice->getMemoryManager(); auto allocationSize = defaultHeapSize * 2; GraphicsAllocation *allocation = nullptr; auto &commandStreamReceiver = cmdQ.getGpgpuCommandStreamReceiver(); auto allocationType = GraphicsAllocation::AllocationType::LINEAR_STREAM; if (this->GetParam() == IndirectHeap::INDIRECT_OBJECT) { allocationType = GraphicsAllocation::AllocationType::INTERNAL_HEAP; } allocation = memoryManager->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), allocationSize, allocationType}); if (this->GetParam() == IndirectHeap::SURFACE_STATE) { allocation->setSize(commandStreamReceiver.defaultSshSize * 2); } commandStreamReceiver.getInternalAllocationStorage()->storeAllocation(std::unique_ptr(allocation), REUSABLE_ALLOCATION); EXPECT_FALSE(commandStreamReceiver.getAllocationsForReuse().peekIsEmpty()); EXPECT_TRUE(commandStreamReceiver.getAllocationsForReuse().peekContains(*allocation)); const auto &indirectHeap = cmdQ.getIndirectHeap(this->GetParam(), 100); EXPECT_EQ(indirectHeap.getGraphicsAllocation(), allocation); // if we obtain heap from reusable pool, we need to keep the size of allocation // surface state heap is an exception, it is capped at (max_ssh_size_for_HW - page_size) if (this->GetParam() == IndirectHeap::SURFACE_STATE) { EXPECT_EQ(commandStreamReceiver.defaultSshSize - MemoryConstants::pageSize, indirectHeap.getMaxAvailableSpace()); } else { EXPECT_EQ(allocationSize, indirectHeap.getMaxAvailableSpace()); } EXPECT_TRUE(commandStreamReceiver.getAllocationsForReuse().peekIsEmpty()); } HWTEST_P(CommandQueueIndirectHeapTest, WhenAskedForNewHeapThenOldHeapIsStoredForReuse) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); EXPECT_TRUE(commandStreamReceiver.getAllocationsForReuse().peekIsEmpty()); *commandStreamReceiver.getTagAddress() = 1u; commandStreamReceiver.taskCount = 2u; const auto &indirectHeap = cmdQ.getIndirectHeap(this->GetParam(), 100); auto heapSize = indirectHeap.getAvailableSpace(); auto graphicsAllocation = indirectHeap.getGraphicsAllocation(); // Request a larger heap than the first. cmdQ.getIndirectHeap(this->GetParam(), heapSize + 6000); EXPECT_FALSE(commandStreamReceiver.getAllocationsForReuse().peekIsEmpty()); EXPECT_TRUE(commandStreamReceiver.getAllocationsForReuse().peekContains(*graphicsAllocation)); *commandStreamReceiver.getTagAddress() = 2u; } TEST_P(CommandQueueIndirectHeapTest, GivenCommandQueueWithoutHeapAllocationWhenAskedForNewHeapThenNewAllocationIsAcquiredWithoutStoring) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props); auto memoryManager = pDevice->getMemoryManager(); auto &csr = pDevice->getUltCommandStreamReceiver(); EXPECT_TRUE(pDevice->getDefaultEngine().commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); const auto &indirectHeap = cmdQ.getIndirectHeap(this->GetParam(), 100); auto heapSize = indirectHeap.getAvailableSpace(); auto graphicsAllocation = indirectHeap.getGraphicsAllocation(); csr.indirectHeap[this->GetParam()]->replaceGraphicsAllocation(nullptr); csr.indirectHeap[this->GetParam()]->replaceBuffer(nullptr, 0); // Request a larger heap than the first. cmdQ.getIndirectHeap(this->GetParam(), heapSize + 6000); EXPECT_NE(graphicsAllocation, indirectHeap.getGraphicsAllocation()); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_P(CommandQueueIndirectHeapTest, givenCommandQueueWithResourceCachingActiveWhenQueueISDestroyedThenIndirectHeapIsNotOnReuseList) { auto cmdQ = new MockCommandQueue(context.get(), pClDevice, 0); cmdQ->getIndirectHeap(this->GetParam(), 100); EXPECT_TRUE(pDevice->getDefaultEngine().commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); //now destroy command queue, heap should go to reusable list delete cmdQ; EXPECT_TRUE(pDevice->getDefaultEngine().commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); } TEST_P(CommandQueueIndirectHeapTest, GivenCommandQueueWithHeapAllocatedWhenIndirectHeapIsReleasedThenHeapAllocationAndHeapBufferIsSetToNullptr) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props); EXPECT_TRUE(pDevice->getDefaultEngine().commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); const auto &indirectHeap = cmdQ.getIndirectHeap(this->GetParam(), 100); auto heapSize = indirectHeap.getMaxAvailableSpace(); EXPECT_NE(0u, heapSize); auto graphicsAllocation = indirectHeap.getGraphicsAllocation(); EXPECT_NE(nullptr, graphicsAllocation); cmdQ.releaseIndirectHeap(this->GetParam()); auto &csr = pDevice->getUltCommandStreamReceiver(); EXPECT_EQ(nullptr, csr.indirectHeap[this->GetParam()]->getGraphicsAllocation()); EXPECT_EQ(nullptr, indirectHeap.getCpuBase()); EXPECT_EQ(0u, indirectHeap.getMaxAvailableSpace()); } TEST_P(CommandQueueIndirectHeapTest, GivenCommandQueueWithoutHeapAllocatedWhenIndirectHeapIsReleasedThenIndirectHeapAllocationStaysNull) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props); cmdQ.releaseIndirectHeap(this->GetParam()); auto &csr = pDevice->getUltCommandStreamReceiver(); EXPECT_EQ(nullptr, csr.indirectHeap[this->GetParam()]); } TEST_P(CommandQueueIndirectHeapTest, GivenCommandQueueWithHeapWhenGraphicAllocationIsNullThenNothingOnReuseList) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props); auto &ih = cmdQ.getIndirectHeap(this->GetParam(), 0u); auto allocation = ih.getGraphicsAllocation(); EXPECT_NE(nullptr, allocation); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.indirectHeap[this->GetParam()]->replaceGraphicsAllocation(nullptr); csr.indirectHeap[this->GetParam()]->replaceBuffer(nullptr, 0); cmdQ.releaseIndirectHeap(this->GetParam()); auto memoryManager = pDevice->getMemoryManager(); EXPECT_TRUE(pDevice->getDefaultEngine().commandStreamReceiver->getAllocationsForReuse().peekIsEmpty()); memoryManager->freeGraphicsMemory(allocation); } TEST_P(CommandQueueIndirectHeapTest, givenCommandQueueWhenGetIndirectHeapIsCalledThenIndirectHeapAllocationTypeShouldBeSetToInternalHeapForIohAndLinearStreamForOthers) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props); auto heapType = this->GetParam(); bool requireInternalHeap = IndirectHeap::INDIRECT_OBJECT == heapType; const auto &indirectHeap = cmdQ.getIndirectHeap(heapType, 100); auto indirectHeapAllocation = indirectHeap.getGraphicsAllocation(); ASSERT_NE(nullptr, indirectHeapAllocation); auto expectedAllocationType = GraphicsAllocation::AllocationType::LINEAR_STREAM; if (requireInternalHeap) { expectedAllocationType = GraphicsAllocation::AllocationType::INTERNAL_HEAP; } EXPECT_EQ(expectedAllocationType, indirectHeapAllocation->getAllocationType()); } TEST_P(CommandQueueIndirectHeapTest, givenCommandQueueWhenGetHeapMemoryIsCalledThenHeapIsCreated) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props); IndirectHeap *indirectHeap = nullptr; cmdQ.allocateHeapMemory(this->GetParam(), 100, indirectHeap); EXPECT_NE(nullptr, indirectHeap); EXPECT_NE(nullptr, indirectHeap->getGraphicsAllocation()); pDevice->getMemoryManager()->freeGraphicsMemory(indirectHeap->getGraphicsAllocation()); delete indirectHeap; } TEST_P(CommandQueueIndirectHeapTest, givenCommandQueueWhenGetHeapMemoryIsCalledWithAlreadyAllocatedHeapThenGraphicsAllocationIsCreated) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; MockCommandQueue cmdQ(context.get(), pClDevice, props); IndirectHeap heap(nullptr, size_t{100}); IndirectHeap *indirectHeap = &heap; cmdQ.allocateHeapMemory(this->GetParam(), 100, indirectHeap); EXPECT_EQ(&heap, indirectHeap); EXPECT_NE(nullptr, indirectHeap->getGraphicsAllocation()); pDevice->getMemoryManager()->freeGraphicsMemory(indirectHeap->getGraphicsAllocation()); } INSTANTIATE_TEST_CASE_P( Device, CommandQueueIndirectHeapTest, testing::Values( IndirectHeap::DYNAMIC_STATE, IndirectHeap::INDIRECT_OBJECT, IndirectHeap::SURFACE_STATE)); using CommandQueueTests = ::testing::Test; HWTEST_F(CommandQueueTests, givenMultipleCommandQueuesWhenMarkerIsEmittedThenGraphicsAllocationIsReused) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockContext context(device.get()); std::unique_ptr commandQ(new MockCommandQueue(&context, device.get(), 0)); *device->getDefaultEngine().commandStreamReceiver->getTagAddress() = 0; commandQ->enqueueMarkerWithWaitList(0, nullptr, nullptr); commandQ->enqueueMarkerWithWaitList(0, nullptr, nullptr); auto commandStreamGraphicsAllocation = commandQ->getCS(0).getGraphicsAllocation(); commandQ.reset(new MockCommandQueue(&context, device.get(), 0)); commandQ->enqueueMarkerWithWaitList(0, nullptr, nullptr); commandQ->enqueueMarkerWithWaitList(0, nullptr, nullptr); auto commandStreamGraphicsAllocation2 = commandQ->getCS(0).getGraphicsAllocation(); EXPECT_EQ(commandStreamGraphicsAllocation, commandStreamGraphicsAllocation2); } struct WaitForQueueCompletionTests : public ::testing::Test { template struct MyCmdQueue : public CommandQueueHw { MyCmdQueue(Context *context, ClDevice *device) : CommandQueueHw(context, device, nullptr, false){}; void waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { requestedUseQuickKmdSleep = useQuickKmdSleep; waitUntilCompleteCounter++; } bool isQueueBlocked() override { return false; } bool requestedUseQuickKmdSleep = false; uint32_t waitUntilCompleteCounter = 0; }; void SetUp() override { device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context.reset(new MockContext(device.get())); } std::unique_ptr device; std::unique_ptr context; }; HWTEST_F(WaitForQueueCompletionTests, givenBlockingCallAndUnblockedQueueWhenEnqueuedThenCallWaitWithoutQuickKmdSleepRequest) { std::unique_ptr> cmdQ(new MyCmdQueue(context.get(), device.get())); uint32_t tmpPtr = 0; auto buffer = std::unique_ptr(BufferHelper<>::create(context.get())); cmdQ->enqueueReadBuffer(buffer.get(), CL_TRUE, 0, 1, &tmpPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(1u, cmdQ->waitUntilCompleteCounter); EXPECT_FALSE(cmdQ->requestedUseQuickKmdSleep); } HWTEST_F(WaitForQueueCompletionTests, givenBlockingCallAndBlockedQueueWhenEnqueuedThenCallWaitWithoutQuickKmdSleepRequest) { std::unique_ptr> cmdQ(new MyCmdQueue(context.get(), device.get())); std::unique_ptr blockingEvent(new Event(cmdQ.get(), CL_COMMAND_NDRANGE_KERNEL, 0, 0)); cl_event clBlockingEvent = blockingEvent.get(); uint32_t tmpPtr = 0; auto buffer = std::unique_ptr(BufferHelper<>::create(context.get())); cmdQ->enqueueReadBuffer(buffer.get(), CL_TRUE, 0, 1, &tmpPtr, nullptr, 1, &clBlockingEvent, nullptr); EXPECT_EQ(1u, cmdQ->waitUntilCompleteCounter); EXPECT_FALSE(cmdQ->requestedUseQuickKmdSleep); } HWTEST_F(WaitForQueueCompletionTests, whenFinishIsCalledThenCallWaitWithoutQuickKmdSleepRequest) { std::unique_ptr> cmdQ(new MyCmdQueue(context.get(), device.get())); cmdQ->finish(); EXPECT_EQ(1u, cmdQ->waitUntilCompleteCounter); EXPECT_FALSE(cmdQ->requestedUseQuickKmdSleep); } TEST(CommandQueue, givenEnqueueAcquireSharedObjectsWhenNoObjectsThenReturnSuccess) { MockContext context; MockCommandQueue cmdQ(&context, nullptr, 0); cl_uint numObjects = 0; cl_mem *memObjects = nullptr; cl_int result = cmdQ.enqueueAcquireSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_SUCCESS); } class MockSharingHandler : public SharingHandler { public: void synchronizeObject(UpdateData &updateData) override { updateData.synchronizationStatus = ACQUIRE_SUCCESFUL; } }; TEST(CommandQueue, givenEnqueuesForSharedObjectsWithImageWhenUsingSharingHandlerThenReturnSuccess) { MockContext context; MockCommandQueue cmdQ(&context, nullptr, 0); MockSharingHandler *mockSharingHandler = new MockSharingHandler; auto image = std::unique_ptr(ImageHelper::create(&context)); image->setSharingHandler(mockSharingHandler); cl_mem memObject = image.get(); cl_uint numObjects = 1; cl_mem *memObjects = &memObject; cl_int result = cmdQ.enqueueAcquireSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_SUCCESS); result = cmdQ.enqueueReleaseSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_SUCCESS); } TEST(CommandQueue, givenEnqueuesForSharedObjectsWithImageWhenUsingSharingHandlerWithEventThenReturnSuccess) { auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext context; MockCommandQueue cmdQ(&context, mockDevice.get(), 0); MockSharingHandler *mockSharingHandler = new MockSharingHandler; auto image = std::unique_ptr(ImageHelper::create(&context)); image->setSharingHandler(mockSharingHandler); cl_mem memObject = image.get(); cl_uint numObjects = 1; cl_mem *memObjects = &memObject; Event *eventAcquire = new Event(&cmdQ, CL_COMMAND_NDRANGE_KERNEL, 1, 5); cl_event clEventAquire = eventAcquire; cl_int result = cmdQ.enqueueAcquireSharedObjects(numObjects, memObjects, 0, nullptr, &clEventAquire, 0); EXPECT_EQ(result, CL_SUCCESS); ASSERT_NE(clEventAquire, nullptr); eventAcquire->release(); Event *eventRelease = new Event(&cmdQ, CL_COMMAND_NDRANGE_KERNEL, 1, 5); cl_event clEventRelease = eventRelease; result = cmdQ.enqueueReleaseSharedObjects(numObjects, memObjects, 0, nullptr, &clEventRelease, 0); EXPECT_EQ(result, CL_SUCCESS); ASSERT_NE(clEventRelease, nullptr); eventRelease->release(); } TEST(CommandQueue, givenEnqueueAcquireSharedObjectsWhenIncorrectArgumentsThenReturnProperError) { MockContext context; MockCommandQueue cmdQ(&context, nullptr, 0); cl_uint numObjects = 1; cl_mem *memObjects = nullptr; cl_int result = cmdQ.enqueueAcquireSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_INVALID_VALUE); numObjects = 0; memObjects = (cl_mem *)1; result = cmdQ.enqueueAcquireSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_INVALID_VALUE); numObjects = 0; memObjects = (cl_mem *)1; result = cmdQ.enqueueAcquireSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_INVALID_VALUE); cl_mem memObject = nullptr; numObjects = 1; memObjects = &memObject; result = cmdQ.enqueueAcquireSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_INVALID_MEM_OBJECT); auto buffer = std::unique_ptr(BufferHelper<>::create(&context)); memObject = buffer.get(); numObjects = 1; memObjects = &memObject; result = cmdQ.enqueueAcquireSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_INVALID_MEM_OBJECT); } TEST(CommandQueue, givenEnqueueReleaseSharedObjectsWhenNoObjectsThenReturnSuccess) { MockContext context; MockCommandQueue cmdQ(&context, nullptr, 0); cl_uint numObjects = 0; cl_mem *memObjects = nullptr; cl_int result = cmdQ.enqueueReleaseSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_SUCCESS); } TEST(CommandQueue, givenEnqueueReleaseSharedObjectsWhenIncorrectArgumentsThenReturnProperError) { MockContext context; MockCommandQueue cmdQ(&context, nullptr, 0); cl_uint numObjects = 1; cl_mem *memObjects = nullptr; cl_int result = cmdQ.enqueueReleaseSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_INVALID_VALUE); numObjects = 0; memObjects = (cl_mem *)1; result = cmdQ.enqueueReleaseSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_INVALID_VALUE); numObjects = 0; memObjects = (cl_mem *)1; result = cmdQ.enqueueReleaseSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_INVALID_VALUE); cl_mem memObject = nullptr; numObjects = 1; memObjects = &memObject; result = cmdQ.enqueueReleaseSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_INVALID_MEM_OBJECT); auto buffer = std::unique_ptr(BufferHelper<>::create(&context)); memObject = buffer.get(); numObjects = 1; memObjects = &memObject; result = cmdQ.enqueueReleaseSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0); EXPECT_EQ(result, CL_INVALID_MEM_OBJECT); } TEST(CommandQueue, givenEnqueueAcquireSharedObjectsCallWhenAcquireFailsThenCorrectErrorIsReturned) { class MockSharingHandler : public SharingHandler { int validateUpdateData(UpdateData &data) override { return CL_INVALID_MEM_OBJECT; } }; MockContext context; MockCommandQueue cmdQ(&context, nullptr, 0); auto buffer = std::unique_ptr(BufferHelper<>::create(&context)); MockSharingHandler *handler = new MockSharingHandler; buffer->setSharingHandler(handler); cl_mem memObject = buffer.get(); auto retVal = cmdQ.enqueueAcquireSharedObjects(1, &memObject, 0, nullptr, nullptr, 0); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); buffer->setSharingHandler(nullptr); } HWTEST_F(CommandQueueCommandStreamTest, givenDebugKernelWhenSetupDebugSurfaceIsCalledThenSurfaceStateIsCorrectlySet) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; MockProgram program(*pDevice->getExecutionEnvironment()); program.enableKernelDebug(); std::unique_ptr kernel(MockKernel::create(*pDevice, &program)); MockCommandQueue cmdQ(context.get(), pClDevice, 0); kernel->setSshLocal(nullptr, sizeof(RENDER_SURFACE_STATE) + kernel->getAllocatedKernelInfo()->patchInfo.pAllocateSystemThreadSurface->Offset); kernel->getAllocatedKernelInfo()->usesSsh = true; auto &commandStreamReceiver = cmdQ.getGpgpuCommandStreamReceiver(); cmdQ.setupDebugSurface(kernel.get()); auto debugSurface = commandStreamReceiver.getDebugSurfaceAllocation(); ASSERT_NE(nullptr, debugSurface); RENDER_SURFACE_STATE *surfaceState = (RENDER_SURFACE_STATE *)kernel->getSurfaceStateHeap(); EXPECT_EQ(debugSurface->getGpuAddress(), surfaceState->getSurfaceBaseAddress()); } HWTEST_F(CommandQueueCommandStreamTest, givenCsrWithDebugSurfaceAllocatedWhenSetupDebugSurfaceIsCalledThenDebugSurfaceIsReused) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; MockProgram program(*pDevice->getExecutionEnvironment()); program.enableKernelDebug(); std::unique_ptr kernel(MockKernel::create(*pDevice, &program)); MockCommandQueue cmdQ(context.get(), pClDevice, 0); kernel->setSshLocal(nullptr, sizeof(RENDER_SURFACE_STATE) + kernel->getAllocatedKernelInfo()->patchInfo.pAllocateSystemThreadSurface->Offset); kernel->getAllocatedKernelInfo()->usesSsh = true; auto &commandStreamReceiver = cmdQ.getGpgpuCommandStreamReceiver(); commandStreamReceiver.allocateDebugSurface(SipKernel::maxDbgSurfaceSize); auto debugSurface = commandStreamReceiver.getDebugSurfaceAllocation(); ASSERT_NE(nullptr, debugSurface); cmdQ.setupDebugSurface(kernel.get()); EXPECT_EQ(debugSurface, commandStreamReceiver.getDebugSurfaceAllocation()); RENDER_SURFACE_STATE *surfaceState = (RENDER_SURFACE_STATE *)kernel->getSurfaceStateHeap(); EXPECT_EQ(debugSurface->getGpuAddress(), surfaceState->getSurfaceBaseAddress()); } struct MockTimestampPacketContainer : TimestampPacketContainer { MockTimestampPacketContainer(Context &context) : context(context) { } ~MockTimestampPacketContainer() override { EXPECT_EQ(1, context.getRefInternalCount()); } Context &context; }; TEST(CommandQueueDestructorTest, whenCommandQueueIsDestroyedThenDestroysTimestampPacketContainerBeforeReleasingContext) { auto context = new MockContext; EXPECT_EQ(1, context->getRefInternalCount()); MockCommandQueue queue(context, context->getDevice(0), nullptr); queue.timestampPacketContainer.reset(new MockTimestampPacketContainer(*context)); EXPECT_EQ(2, context->getRefInternalCount()); context->release(); EXPECT_EQ(1, context->getRefInternalCount()); } TEST(CommandQueuePropertiesTests, whenGetEngineIsCalledThenQueueEngineIsReturned) { MockCommandQueue queue; EngineControl engineControl; queue.gpgpuEngine = &engineControl; EXPECT_EQ(queue.gpgpuEngine, &queue.getGpgpuEngine()); } TEST(CommandQueue, GivenCommandQueueWhenEnqueueResourceBarrierCalledThenSuccessReturned) { MockContext context; MockCommandQueue cmdQ(&context, nullptr, 0); cl_int result = cmdQ.enqueueResourceBarrier( nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, result); } TEST(CommandQueue, GivenCommandQueueWhenCheckingIfIsCacheFlushCommandCalledThenFalseReturned) { MockContext context; MockCommandQueue cmdQ(&context, nullptr, 0); bool isCommandCacheFlush = cmdQ.isCacheFlushCommand(0u); EXPECT_FALSE(isCommandCacheFlush); } TEST(CommandQueue, GivenCommandQueueWhenEnqueueInitDispatchGlobalsCalledThenSuccessReturned) { MockContext context; MockCommandQueue cmdQ(&context, nullptr, 0); cl_int result = cmdQ.enqueueInitDispatchGlobals( nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, result); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp000066400000000000000000001612041363734646600315520ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/built_ins/aux_translation_builtin.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/command_queue/hardware_interface.h" #include "opencl/source/event/perf_counter.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/task_information.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" using namespace NEO; struct DispatchWalkerTest : public CommandQueueFixture, public DeviceFixture, public ::testing::Test { using CommandQueueFixture::SetUp; void SetUp() override { DebugManager.flags.EnableTimestampPacket.set(0); DeviceFixture::SetUp(); context = std::make_unique(pClDevice); CommandQueueFixture::SetUp(context.get(), pClDevice, 0); program = std::make_unique(*pDevice->getExecutionEnvironment()); memset(&kernelHeader, 0, sizeof(kernelHeader)); kernelHeader.KernelHeapSize = sizeof(kernelIsa); memset(&dataParameterStream, 0, sizeof(dataParameterStream)); dataParameterStream.DataParameterStreamSize = sizeof(crossThreadData); executionEnvironment = {}; memset(&executionEnvironment, 0, sizeof(executionEnvironment)); executionEnvironment.CompiledSIMD32 = 1; executionEnvironment.LargestCompiledSIMDSize = 32; memset(&threadPayload, 0, sizeof(threadPayload)); threadPayload.LocalIDXPresent = 1; threadPayload.LocalIDYPresent = 1; threadPayload.LocalIDZPresent = 1; samplerArray.BorderColorOffset = 0; samplerArray.Count = 1; samplerArray.Offset = 4; samplerArray.Size = 2; samplerArray.Token = 0; kernelInfo.heapInfo.pKernelHeap = kernelIsa; kernelInfo.heapInfo.pKernelHeader = &kernelHeader; kernelInfo.patchInfo.dataParameterStream = &dataParameterStream; kernelInfo.patchInfo.executionEnvironment = &executionEnvironment; kernelInfo.patchInfo.threadPayload = &threadPayload; kernelInfoWithSampler.heapInfo.pKernelHeap = kernelIsa; kernelInfoWithSampler.heapInfo.pKernelHeader = &kernelHeader; kernelInfoWithSampler.patchInfo.dataParameterStream = &dataParameterStream; kernelInfoWithSampler.patchInfo.executionEnvironment = &executionEnvironment; kernelInfoWithSampler.patchInfo.threadPayload = &threadPayload; kernelInfoWithSampler.patchInfo.samplerStateArray = &samplerArray; kernelInfoWithSampler.heapInfo.pDsh = static_cast(dsh); } void TearDown() override { CommandQueueFixture::TearDown(); context.reset(); DeviceFixture::TearDown(); } std::unique_ptr createBlockedCommandsData(CommandQueue &commandQueue) { auto commandStream = new LinearStream(); auto &gpgpuCsr = commandQueue.getGpgpuCommandStreamReceiver(); gpgpuCsr.ensureCommandBufferAllocation(*commandStream, 1, 1); return std::make_unique(commandStream, *gpgpuCsr.getInternalAllocationStorage()); } std::unique_ptr context; std::unique_ptr program; SKernelBinaryHeaderCommon kernelHeader = {}; SPatchDataParameterStream dataParameterStream = {}; SPatchExecutionEnvironment executionEnvironment = {}; SPatchThreadPayload threadPayload = {}; SPatchSamplerStateArray samplerArray = {}; KernelInfo kernelInfo; KernelInfo kernelInfoWithSampler; uint32_t kernelIsa[32]; uint32_t crossThreadData[32]; uint32_t dsh[32]; DebugManagerStateRestore dbgRestore; }; HWTEST_F(DispatchWalkerTest, WhenGettingComputeDimensionsThenCorrectNumberOfDimensionsIsReturned) { const size_t workItems1D[] = {100, 1, 1}; EXPECT_EQ(1u, computeDimensions(workItems1D)); const size_t workItems2D[] = {100, 100, 1}; EXPECT_EQ(2u, computeDimensions(workItems2D)); const size_t workItems3D[] = {100, 100, 100}; EXPECT_EQ(3u, computeDimensions(workItems3D)); } HWTEST_F(DispatchWalkerTest, givenSimd1WhenSetGpgpuWalkerThreadDataThenSimdInWalkerIsSetTo32Value) { uint32_t pCmdBuffer[1024]; MockGraphicsAllocation gfxAllocation((void *)pCmdBuffer, sizeof(pCmdBuffer)); LinearStream linearStream(&gfxAllocation); using WALKER_TYPE = typename FamilyType::WALKER_TYPE; WALKER_TYPE *computeWalker = static_cast(linearStream.getSpace(sizeof(WALKER_TYPE))); *computeWalker = FamilyType::cmdInitGpgpuWalker; size_t globalOffsets[] = {0, 0, 0}; size_t startWorkGroups[] = {0, 0, 0}; size_t numWorkGroups[] = {1, 1, 1}; size_t localWorkSizesIn[] = {32, 1, 1}; uint32_t simd = 1; iOpenCL::SPatchThreadPayload threadPayload; GpgpuWalkerHelper::setGpgpuWalkerThreadData( computeWalker, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizesIn, simd, 3, true, false, threadPayload, 5u); EXPECT_EQ(computeWalker->getSimdSize(), 32 >> 4); } HWTEST_F(DispatchWalkerTest, WhenDispatchingWalkerThenCommandStreamMemoryIsntChanged) { MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); auto &commandStream = pCmdQ->getCS(4096); // Consume all memory except what is needed for this enqueue auto sizeDispatchWalkerNeeds = sizeof(typename FamilyType::WALKER_TYPE) + HardwareCommandsHelper::getSizeRequiredCS(&kernel); //cs has a minimum required size auto sizeThatNeedsToBeSubstracted = sizeDispatchWalkerNeeds + CSRequirements::minCommandQueueCommandStreamSize; commandStream.getSpace(commandStream.getMaxAvailableSpace() - sizeThatNeedsToBeSubstracted); ASSERT_EQ(commandStream.getAvailableSpace(), sizeThatNeedsToBeSubstracted); auto commandStreamStart = commandStream.getUsed(); auto commandStreamBuffer = commandStream.getCpuBase(); ASSERT_NE(0u, commandStreamStart); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; cl_uint dimensions = 1; DispatchInfo dispatchInfo(const_cast(&kernel), dimensions, workItems, nullptr, globalOffsets); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(commandStreamBuffer, commandStream.getCpuBase()); EXPECT_LT(commandStreamStart, commandStream.getUsed()); EXPECT_EQ(sizeDispatchWalkerNeeds, commandStream.getUsed() - commandStreamStart); } HWTEST_F(DispatchWalkerTest, GivenNoLocalIdsWhenDispatchingWalkerThenWalkerIsDispatched) { threadPayload.LocalIDXPresent = 0; threadPayload.LocalIDYPresent = 0; threadPayload.LocalIDZPresent = 0; threadPayload.UnusedPerThreadConstantPresent = 1; MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); auto &commandStream = pCmdQ->getCS(4096); // Consume all memory except what is needed for this enqueue auto sizeDispatchWalkerNeeds = sizeof(typename FamilyType::WALKER_TYPE) + HardwareCommandsHelper::getSizeRequiredCS(&kernel); //cs has a minimum required size auto sizeThatNeedsToBeSubstracted = sizeDispatchWalkerNeeds + CSRequirements::minCommandQueueCommandStreamSize; commandStream.getSpace(commandStream.getMaxAvailableSpace() - sizeThatNeedsToBeSubstracted); ASSERT_EQ(commandStream.getAvailableSpace(), sizeThatNeedsToBeSubstracted); auto commandStreamStart = commandStream.getUsed(); auto commandStreamBuffer = commandStream.getCpuBase(); ASSERT_NE(0u, commandStreamStart); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; cl_uint dimensions = 1; DispatchInfo dispatchInfo(const_cast(&kernel), dimensions, workItems, nullptr, globalOffsets); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(commandStreamBuffer, commandStream.getCpuBase()); EXPECT_LT(commandStreamStart, commandStream.getUsed()); EXPECT_EQ(sizeDispatchWalkerNeeds, commandStream.getUsed() - commandStreamStart); } HWTEST_F(DispatchWalkerTest, GivenDefaultLwsAlgorithmWhenDispatchingWalkerThenDimensionsAreCorrect) { MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.workDimOffset = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; for (uint32_t dimension = 1; dimension <= 3; ++dimension) { workItems[dimension - 1] = 256; DispatchInfo dispatchInfo(const_cast(&kernel), dimension, workItems, nullptr, globalOffsets); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(dimension, *kernel.workDim); } } HWTEST_F(DispatchWalkerTest, GivenSquaredLwsAlgorithmWhenDispatchingWalkerThenDimensionsAreCorrect) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeND.set(false); DebugManager.flags.EnableComputeWorkSizeSquared.set(true); MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.workDimOffset = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; for (uint32_t dimension = 1; dimension <= 3; ++dimension) { workItems[dimension - 1] = 256; DispatchInfo dispatchInfo(const_cast(&kernel), dimension, workItems, nullptr, globalOffsets); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(dimension, *kernel.workDim); } } HWTEST_F(DispatchWalkerTest, GivenNdLwsAlgorithmWhenDispatchingWalkerThenDimensionsAreCorrect) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeND.set(true); MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.workDimOffset = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; for (uint32_t dimension = 1; dimension <= 3; ++dimension) { workItems[dimension - 1] = 256; DispatchInfo dispatchInfo(const_cast(&kernel), dimension, workItems, nullptr, globalOffsets); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(dimension, *kernel.workDim); } } HWTEST_F(DispatchWalkerTest, GivenOldLwsAlgorithmWhenDispatchingWalkerThenDimensionsAreCorrect) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeND.set(false); DebugManager.flags.EnableComputeWorkSizeSquared.set(false); MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.workDimOffset = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; for (uint32_t dimension = 1; dimension <= 3; ++dimension) { workItems[dimension - 1] = 256; DispatchInfo dispatchInfo(const_cast(&kernel), dimension, workItems, nullptr, globalOffsets); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(dimension, *kernel.workDim); } } HWTEST_F(DispatchWalkerTest, GivenNumWorkGroupsWhenDispatchingWalkerThenNumWorkGroupsIsCorrectlySet) { MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.numWorkGroupsOffset[0] = 0; kernelInfo.workloadInfo.numWorkGroupsOffset[1] = 4; kernelInfo.workloadInfo.numWorkGroupsOffset[2] = 8; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {2, 5, 10}; size_t workGroupSize[3] = {1, 1, 1}; cl_uint dimensions = 3; DispatchInfo dispatchInfo(const_cast(&kernel), dimensions, workItems, workGroupSize, globalOffsets); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(2u, *kernel.numWorkGroupsX); EXPECT_EQ(5u, *kernel.numWorkGroupsY); EXPECT_EQ(10u, *kernel.numWorkGroupsZ); } HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndDefaultAlgorithmWhenDispatchingWalkerThenLwsIsCorrect) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeND.set(false); MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {2, 5, 10}; cl_uint dimensions = 3; DispatchInfo dispatchInfo(const_cast(&kernel), dimensions, workItems, nullptr, globalOffsets); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(2u, *kernel.localWorkSizeX); EXPECT_EQ(5u, *kernel.localWorkSizeY); EXPECT_EQ(1u, *kernel.localWorkSizeZ); } HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndNdOnWhenDispatchingWalkerThenLwsIsCorrect) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeND.set(true); MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {2, 5, 10}; cl_uint dimensions = 3; DispatchInfo dispatchInfo(const_cast(&kernel), dimensions, workItems, nullptr, globalOffsets); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(2u, *kernel.localWorkSizeX); EXPECT_EQ(5u, *kernel.localWorkSizeY); EXPECT_EQ(10u, *kernel.localWorkSizeZ); } HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndSquaredAlgorithmWhenDispatchingWalkerThenLwsIsCorrect) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(true); DebugManager.flags.EnableComputeWorkSizeND.set(false); MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {2, 5, 10}; cl_uint dimensions = 3; DispatchInfo dispatchInfo(const_cast(&kernel), dimensions, workItems, nullptr, globalOffsets); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(2u, *kernel.localWorkSizeX); EXPECT_EQ(5u, *kernel.localWorkSizeY); EXPECT_EQ(1u, *kernel.localWorkSizeZ); } HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndSquaredAlgorithmOffAndNdOffWhenDispatchingWalkerThenLwsIsCorrect) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(false); DebugManager.flags.EnableComputeWorkSizeND.set(false); MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {2, 5, 10}; cl_uint dimensions = 3; DispatchInfo dispatchInfo(const_cast(&kernel), dimensions, workItems, nullptr, globalOffsets); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(2u, *kernel.localWorkSizeX); EXPECT_EQ(5u, *kernel.localWorkSizeY); EXPECT_EQ(1u, *kernel.localWorkSizeZ); } HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeWhenDispatchingWalkerThenLwsIsCorrect) { MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {2, 5, 10}; size_t workGroupSize[3] = {1, 2, 3}; cl_uint dimensions = 3; DispatchInfo dispatchInfo(const_cast(&kernel), dimensions, workItems, workGroupSize, globalOffsets); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(1u, *kernel.localWorkSizeX); EXPECT_EQ(2u, *kernel.localWorkSizeY); EXPECT_EQ(3u, *kernel.localWorkSizeZ); } HWTEST_F(DispatchWalkerTest, GivenTwoSetsOfLwsOffsetsWhenDispatchingWalkerThenLwsIsCorrect) { MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; kernelInfo.workloadInfo.localWorkSizeOffsets2[0] = 12; kernelInfo.workloadInfo.localWorkSizeOffsets2[1] = 16; kernelInfo.workloadInfo.localWorkSizeOffsets2[2] = 20; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {2, 5, 10}; size_t workGroupSize[3] = {1, 2, 3}; cl_uint dimensions = 3; DispatchInfo dispatchInfo(const_cast(&kernel), dimensions, workItems, workGroupSize, globalOffsets); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(1u, *kernel.localWorkSizeX); EXPECT_EQ(2u, *kernel.localWorkSizeY); EXPECT_EQ(3u, *kernel.localWorkSizeZ); EXPECT_EQ(1u, *kernel.localWorkSizeX2); EXPECT_EQ(2u, *kernel.localWorkSizeY2); EXPECT_EQ(3u, *kernel.localWorkSizeZ2); } HWTEST_F(DispatchWalkerTest, GivenSplitKernelWhenDispatchingWalkerThenLwsIsCorrect) { MockKernel kernel1(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); MockKernel kernel2(program.get(), kernelInfoWithSampler, *pClDevice); kernelInfoWithSampler.workloadInfo.localWorkSizeOffsets[0] = 12; kernelInfoWithSampler.workloadInfo.localWorkSizeOffsets[1] = 16; kernelInfoWithSampler.workloadInfo.localWorkSizeOffsets[2] = 20; ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); DispatchInfo di1(&kernel1, 3, {10, 10, 10}, {1, 2, 3}, {0, 0, 0}); DispatchInfo di2(&kernel2, 3, {10, 10, 10}, {4, 5, 6}, {0, 0, 0}); MockMultiDispatchInfo multiDispatchInfo(std::vector({&di1, &di2})); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); auto dispatchId = 0; for (auto &dispatchInfo : multiDispatchInfo) { auto &kernel = *dispatchInfo.getKernel(); if (dispatchId == 0) { EXPECT_EQ(1u, *kernel.localWorkSizeX); EXPECT_EQ(2u, *kernel.localWorkSizeY); EXPECT_EQ(3u, *kernel.localWorkSizeZ); } if (dispatchId == 1) { EXPECT_EQ(4u, *kernel.localWorkSizeX); EXPECT_EQ(5u, *kernel.localWorkSizeY); EXPECT_EQ(6u, *kernel.localWorkSizeZ); } dispatchId++; } } HWTEST_F(DispatchWalkerTest, GivenSplitWalkerWhenDispatchingWalkerThenLwsIsCorrect) { MockKernel kernel1(program.get(), kernelInfo, *pClDevice); MockKernel mainKernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; kernelInfo.workloadInfo.localWorkSizeOffsets2[0] = 12; kernelInfo.workloadInfo.localWorkSizeOffsets2[1] = 16; kernelInfo.workloadInfo.localWorkSizeOffsets2[2] = 20; kernelInfo.workloadInfo.numWorkGroupsOffset[0] = 24; kernelInfo.workloadInfo.numWorkGroupsOffset[1] = 28; kernelInfo.workloadInfo.numWorkGroupsOffset[2] = 32; ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); ASSERT_EQ(CL_SUCCESS, mainKernel.initialize()); DispatchInfo di1(&kernel1, 3, {10, 10, 10}, {1, 2, 3}, {0, 0, 0}); DispatchInfo di2(&mainKernel, 3, {10, 10, 10}, {4, 5, 6}, {0, 0, 0}); MultiDispatchInfo multiDispatchInfo(&mainKernel); multiDispatchInfo.push(di1); multiDispatchInfo.push(di2); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); for (auto &dispatchInfo : multiDispatchInfo) { auto &kernel = *dispatchInfo.getKernel(); if (&kernel == &mainKernel) { EXPECT_EQ(4u, *kernel.localWorkSizeX); EXPECT_EQ(5u, *kernel.localWorkSizeY); EXPECT_EQ(6u, *kernel.localWorkSizeZ); EXPECT_EQ(4u, *kernel.localWorkSizeX2); EXPECT_EQ(5u, *kernel.localWorkSizeY2); EXPECT_EQ(6u, *kernel.localWorkSizeZ2); EXPECT_EQ(3u, *kernel.numWorkGroupsX); EXPECT_EQ(2u, *kernel.numWorkGroupsY); EXPECT_EQ(2u, *kernel.numWorkGroupsZ); } else { EXPECT_EQ(0u, *kernel.localWorkSizeX); EXPECT_EQ(0u, *kernel.localWorkSizeY); EXPECT_EQ(0u, *kernel.localWorkSizeZ); EXPECT_EQ(1u, *kernel.localWorkSizeX2); EXPECT_EQ(2u, *kernel.localWorkSizeY2); EXPECT_EQ(3u, *kernel.localWorkSizeZ2); EXPECT_EQ(0u, *kernel.numWorkGroupsX); EXPECT_EQ(0u, *kernel.numWorkGroupsY); EXPECT_EQ(0u, *kernel.numWorkGroupsZ); } } } HWTEST_F(DispatchWalkerTest, GivenBlockedQueueWhenDispatchingWalkerThenCommandSteamIsNotConsumed) { MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; size_t workGroupSize[3] = {2, 5, 10}; cl_uint dimensions = 1; auto blockedCommandsData = createBlockedCommandsData(*pCmdQ); DispatchInfo dispatchInfo(const_cast(&kernel), dimensions, workItems, workGroupSize, globalOffsets); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), blockedCommandsData.get(), nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); auto &commandStream = pCmdQ->getCS(1024); EXPECT_EQ(0u, commandStream.getUsed()); EXPECT_NE(nullptr, blockedCommandsData); EXPECT_NE(nullptr, blockedCommandsData->commandStream); EXPECT_NE(nullptr, blockedCommandsData->dsh); EXPECT_NE(nullptr, blockedCommandsData->ioh); EXPECT_NE(nullptr, blockedCommandsData->ssh); } HWTEST_F(DispatchWalkerTest, GivenBlockedQueueWhenDispatchingWalkerThenRequiredHeaSizesAreTakenFromKernel) { MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; size_t workGroupSize[3] = {2, 5, 10}; cl_uint dimensions = 1; auto blockedCommandsData = createBlockedCommandsData(*pCmdQ); DispatchInfo dispatchInfo(const_cast(&kernel), dimensions, workItems, workGroupSize, globalOffsets); MultiDispatchInfo multiDispatchInfo(&kernel); multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), blockedCommandsData.get(), nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); Vec3 localWorkgroupSize(workGroupSize); auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(kernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(kernel, Math::computeTotalElementsCount(localWorkgroupSize)); auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(kernel); EXPECT_LE(expectedSizeDSH, blockedCommandsData->dsh->getMaxAvailableSpace()); EXPECT_LE(expectedSizeIOH, blockedCommandsData->ioh->getMaxAvailableSpace()); EXPECT_LE(expectedSizeSSH, blockedCommandsData->ssh->getMaxAvailableSpace()); } HWTEST_F(DispatchWalkerTest, givenBlockedEnqueueWhenObtainingCommandStreamThenAllocateEnoughSpaceAndBlockedKernelData) { DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); std::unique_ptr blockedKernelData; MockCommandQueueHw mockCmdQ(nullptr, pClDevice, nullptr); auto expectedSizeCSAllocation = MemoryConstants::pageSize64k; auto expectedSizeCS = MemoryConstants::pageSize64k - CSRequirements::csOverfetchSize; CsrDependencies csrDependencies; EventsRequest eventsRequest(0, nullptr, nullptr); auto cmdStream = mockCmdQ.template obtainCommandStream(csrDependencies, false, true, multiDispatchInfo, eventsRequest, blockedKernelData, nullptr, 0u); EXPECT_EQ(expectedSizeCS, cmdStream->getMaxAvailableSpace()); EXPECT_EQ(expectedSizeCSAllocation, cmdStream->getGraphicsAllocation()->getUnderlyingBufferSize()); EXPECT_NE(nullptr, blockedKernelData); EXPECT_EQ(cmdStream, blockedKernelData->commandStream.get()); } HWTEST_F(DispatchWalkerTest, GivenBlockedQueueWhenDispatchingWalkerThenRequiredHeapSizesAreTakenFromMdi) { MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); MockMultiDispatchInfo multiDispatchInfo(&kernel); auto blockedCommandsData = createBlockedCommandsData(*pCmdQ); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), blockedCommandsData.get(), nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); auto expectedSizeDSH = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); auto expectedSizeIOH = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); EXPECT_LE(expectedSizeDSH, blockedCommandsData->dsh->getMaxAvailableSpace()); EXPECT_LE(expectedSizeIOH, blockedCommandsData->ioh->getMaxAvailableSpace()); EXPECT_LE(expectedSizeSSH, blockedCommandsData->ssh->getMaxAvailableSpace()); } HWTEST_F(DispatchWalkerTest, givenBlockedQueueWhenDispatchWalkerIsCalledThenCommandStreamHasGpuAddress) { MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); MockMultiDispatchInfo multiDispatchInfo(&kernel); auto blockedCommandsData = createBlockedCommandsData(*pCmdQ); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), blockedCommandsData.get(), nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); EXPECT_NE(nullptr, blockedCommandsData->commandStream->getGraphicsAllocation()); EXPECT_NE(0ull, blockedCommandsData->commandStream->getGraphicsAllocation()->getGpuAddress()); } HWTEST_F(DispatchWalkerTest, givenThereAreAllocationsForReuseWhenDispatchWalkerIsCalledThenCommandStreamObtainsReusableAllocation) { MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); MockMultiDispatchInfo multiDispatchInfo(&kernel); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto allocation = csr.getMemoryManager()->allocateGraphicsMemoryWithProperties({csr.getRootDeviceIndex(), MemoryConstants::pageSize64k + CSRequirements::csOverfetchSize, GraphicsAllocation::AllocationType::COMMAND_BUFFER}); csr.getInternalAllocationStorage()->storeAllocation(std::unique_ptr{allocation}, REUSABLE_ALLOCATION); ASSERT_FALSE(csr.getInternalAllocationStorage()->getAllocationsForReuse().peekIsEmpty()); auto blockedCommandsData = createBlockedCommandsData(*pCmdQ); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), blockedCommandsData.get(), nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); EXPECT_TRUE(csr.getInternalAllocationStorage()->getAllocationsForReuse().peekIsEmpty()); EXPECT_EQ(allocation, blockedCommandsData->commandStream->getGraphicsAllocation()); } HWTEST_F(DispatchWalkerTest, GivenMultipleKernelsWhenDispatchingWalkerThenWorkDimensionsAreCorrect) { MockKernel kernel1(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); MockKernel kernel2(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); MockMultiDispatchInfo multiDispatchInfo(std::vector({&kernel1, &kernel2})); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); for (auto &dispatchInfo : multiDispatchInfo) { auto &kernel = *dispatchInfo.getKernel(); EXPECT_EQ(*kernel.workDim, dispatchInfo.getDim()); } } HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, GivenMultipleKernelsWhenDispatchingWalkerThenInterfaceDescriptorsAreProgrammedCorrectly) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; auto memoryManager = this->pDevice->getMemoryManager(); auto kernelIsaAllocation = memoryManager->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::KERNEL_ISA}); auto kernelIsaWithSamplerAllocation = memoryManager->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::KERNEL_ISA}); kernelInfo.kernelAllocation = kernelIsaAllocation; kernelInfoWithSampler.kernelAllocation = kernelIsaWithSamplerAllocation; auto gpuAddress1 = kernelIsaAllocation->getGpuAddressToPatch(); auto gpuAddress2 = kernelIsaWithSamplerAllocation->getGpuAddressToPatch(); MockKernel kernel1(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); MockKernel kernel2(program.get(), kernelInfoWithSampler, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); MockMultiDispatchInfo multiDispatchInfo(std::vector({&kernel1, &kernel2})); // create Indirect DSH heap auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192); indirectHeap.align(HardwareCommandsHelper::alignInterfaceDescriptorData); auto dshBeforeMultiDisptach = indirectHeap.getUsed(); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); auto dshAfterMultiDisptach = indirectHeap.getUsed(); auto numberOfDispatches = multiDispatchInfo.size(); auto interfaceDesriptorTableSize = numberOfDispatches * sizeof(INTERFACE_DESCRIPTOR_DATA); EXPECT_LE(dshBeforeMultiDisptach + interfaceDesriptorTableSize, dshAfterMultiDisptach); INTERFACE_DESCRIPTOR_DATA *pID = reinterpret_cast(ptrOffset(indirectHeap.getCpuBase(), dshBeforeMultiDisptach)); for (uint32_t index = 0; index < multiDispatchInfo.size(); index++) { uint32_t addressLow = pID[index].getKernelStartPointer(); uint32_t addressHigh = pID[index].getKernelStartPointerHigh(); uint64_t fullAddress = ((uint64_t)addressHigh << 32) | addressLow; if (index > 0) { uint32_t addressLowOfPrevious = pID[index - 1].getKernelStartPointer(); uint32_t addressHighOfPrevious = pID[index - 1].getKernelStartPointerHigh(); uint64_t addressPrevious = ((uint64_t)addressHighOfPrevious << 32) | addressLowOfPrevious; uint64_t address = ((uint64_t)addressHigh << 32) | addressLow; EXPECT_NE(addressPrevious, address); } if (index == 0) { auto samplerPointer = pID[index].getSamplerStatePointer(); auto samplerCount = pID[index].getSamplerCount(); EXPECT_EQ(0u, samplerPointer); EXPECT_EQ(0u, samplerCount); EXPECT_EQ(fullAddress, gpuAddress1); } if (index == 1) { auto samplerPointer = pID[index].getSamplerStatePointer(); auto samplerCount = pID[index].getSamplerCount(); EXPECT_NE(0u, samplerPointer); EXPECT_EQ(1u, samplerCount); EXPECT_EQ(fullAddress, gpuAddress2); } } HardwareParse hwParser; auto &cmdStream = pCmdQ->getCS(0); hwParser.parseCommands(cmdStream, 0); hwParser.findHardwareCommands(); auto cmd = hwParser.getCommand(); EXPECT_NE(nullptr, cmd); auto IDStartAddress = cmd->getInterfaceDescriptorDataStartAddress(); auto IDSize = cmd->getInterfaceDescriptorTotalLength(); EXPECT_EQ(dshBeforeMultiDisptach, IDStartAddress); EXPECT_EQ(interfaceDesriptorTableSize, IDSize); memoryManager->freeGraphicsMemory(kernelIsaAllocation); memoryManager->freeGraphicsMemory(kernelIsaWithSamplerAllocation); } HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, GivenMultipleKernelsWhenDispatchingWalkerThenGpgpuWalkerIdOffsetIsProgrammedCorrectly) { using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; MockKernel kernel1(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); MockKernel kernel2(program.get(), kernelInfoWithSampler, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); MockMultiDispatchInfo multiDispatchInfo(std::vector({&kernel1, &kernel2})); // create commandStream auto &cmdStream = pCmdQ->getCS(0); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); hwParser.findHardwareCommands(); auto walkerItor = hwParser.itorWalker; ASSERT_NE(hwParser.cmdList.end(), walkerItor); for (uint32_t index = 0; index < multiDispatchInfo.size(); index++) { ASSERT_NE(hwParser.cmdList.end(), walkerItor); auto *gpgpuWalker = (GPGPU_WALKER *)*walkerItor; auto IDIndex = gpgpuWalker->getInterfaceDescriptorOffset(); EXPECT_EQ(index, IDIndex); // move walker iterator walkerItor++; walkerItor = find(walkerItor, hwParser.cmdList.end()); } } HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, GivenMultipleKernelsWhenDispatchingWalkerThenThreadGroupIdStartingCoordinatesAreProgrammedCorrectly) { using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; MockKernel kernel1(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); MockKernel kernel2(program.get(), kernelInfoWithSampler, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); MockMultiDispatchInfo multiDispatchInfo(std::vector({&kernel1, &kernel2})); // create commandStream auto &cmdStream = pCmdQ->getCS(0); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); hwParser.findHardwareCommands(); auto walkerItor = hwParser.itorWalker; ASSERT_NE(hwParser.cmdList.end(), walkerItor); for (uint32_t index = 0; index < multiDispatchInfo.size(); index++) { ASSERT_NE(hwParser.cmdList.end(), walkerItor); auto *gpgpuWalker = (GPGPU_WALKER *)*walkerItor; auto coordinateX = gpgpuWalker->getThreadGroupIdStartingX(); EXPECT_EQ(coordinateX, 0u); auto coordinateY = gpgpuWalker->getThreadGroupIdStartingY(); EXPECT_EQ(coordinateY, 0u); auto coordinateZ = gpgpuWalker->getThreadGroupIdStartingResumeZ(); EXPECT_EQ(coordinateZ, 0u); // move walker iterator walkerItor++; walkerItor = find(walkerItor, hwParser.cmdList.end()); } } HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, GivenMultipleDispatchInfoAndSameKernelWhenDispatchingWalkerThenGpgpuWalkerThreadGroupIdStartingCoordinatesAreCorrectlyProgrammed) { using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); DispatchInfo di1(&kernel, 1, {100, 1, 1}, {10, 1, 1}, {0, 0, 0}, {100, 1, 1}, {10, 1, 1}, {10, 1, 1}, {10, 1, 1}, {0, 0, 0}); DispatchInfo di2(&kernel, 1, {100, 1, 1}, {10, 1, 1}, {0, 0, 0}, {100, 1, 1}, {10, 1, 1}, {10, 1, 1}, {10, 1, 1}, {10, 0, 0}); MockMultiDispatchInfo multiDispatchInfo(std::vector({&di1, &di2})); // create commandStream auto &cmdStream = pCmdQ->getCS(0); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); hwParser.findHardwareCommands(); auto walkerItor = hwParser.itorWalker; ASSERT_NE(hwParser.cmdList.end(), walkerItor); for (uint32_t index = 0; index < multiDispatchInfo.size(); index++) { ASSERT_NE(hwParser.cmdList.end(), walkerItor); auto *gpgpuWalker = (GPGPU_WALKER *)*walkerItor; auto coordinateX = gpgpuWalker->getThreadGroupIdStartingX(); EXPECT_EQ(coordinateX, index * 10u); auto coordinateY = gpgpuWalker->getThreadGroupIdStartingY(); EXPECT_EQ(coordinateY, 0u); auto coordinateZ = gpgpuWalker->getThreadGroupIdStartingResumeZ(); EXPECT_EQ(coordinateZ, 0u); // move walker iterator walkerItor++; walkerItor = find(walkerItor, hwParser.cmdList.end()); } } HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerDisabledWhenAllocationRequiresCacheFlushThenFlushCommandNotPresentAfterWalker) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(0); MockKernel kernel1(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); kernel1.kernelArgRequiresCacheFlush.resize(1); MockGraphicsAllocation cacheRequiringAllocation; kernel1.kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; MockMultiDispatchInfo multiDispatchInfo(std::vector({&kernel1})); // create commandStream auto &cmdStream = pCmdQ->getCS(0); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParse; hwParse.parseCommands(cmdStream); PIPE_CONTROL *pipeControl = hwParse.getCommand(); EXPECT_EQ(nullptr, pipeControl); } HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerEnabledWhenWalkerWithTwoKernelsThenFlushCommandPresentOnce) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); MockKernel kernel1(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); MockKernel kernel2(program.get(), kernelInfoWithSampler, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); kernel1.kernelArgRequiresCacheFlush.resize(1); kernel2.kernelArgRequiresCacheFlush.resize(1); MockGraphicsAllocation cacheRequiringAllocation; kernel1.kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; kernel2.kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; MockMultiDispatchInfo multiDispatchInfo(std::vector({&kernel1, &kernel2})); // create commandStream auto &cmdStream = pCmdQ->getCS(0); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParse; hwParse.parseCommands(cmdStream); uint32_t pipeControlCount = hwParse.getCommandCount(); EXPECT_EQ(pipeControlCount, 1u); } HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerEnabledWhenTwoWalkersForQueueThenFlushCommandPresentTwice) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); MockKernel kernel1(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); MockKernel kernel2(program.get(), kernelInfoWithSampler, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); kernel1.kernelArgRequiresCacheFlush.resize(1); kernel2.kernelArgRequiresCacheFlush.resize(1); MockGraphicsAllocation cacheRequiringAllocation; kernel1.kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; kernel2.kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; MockMultiDispatchInfo multiDispatchInfo1(std::vector({&kernel1})); MockMultiDispatchInfo multiDispatchInfo2(std::vector({&kernel2})); // create commandStream auto &cmdStream = pCmdQ->getCS(0); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo1, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo2, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParse; hwParse.parseCommands(cmdStream); uint32_t pipeControlCount = hwParse.getCommandCount(); EXPECT_EQ(pipeControlCount, 2u); } TEST(DispatchWalker, WhenCalculatingDispatchDimensionsThenCorrectValuesAreReturned) { Vec3 dim0{0, 0, 0}; Vec3 dim1{2, 1, 1}; Vec3 dim2{2, 2, 1}; Vec3 dim3{2, 2, 2}; Vec3 dispatches[] = {dim0, dim1, dim2, dim3}; uint32_t testDims[] = {0, 1, 2, 3}; for (const auto &lhs : testDims) { for (const auto &rhs : testDims) { uint32_t dimTest = calculateDispatchDim(dispatches[lhs], dispatches[rhs]); uint32_t dimRef = std::max(1U, std::max(lhs, rhs)); EXPECT_EQ(dimRef, dimTest); } } } HWTEST_F(DispatchWalkerTest, givenKernelWhenAuxToNonAuxWhenTranslationRequiredThenPipeControlWithStallAndDCFlushAdded) { BuiltinDispatchInfoBuilder &baseBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pDevice); auto &builder = static_cast &>(baseBuilder); MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.workDimOffset = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); auto &cmdStream = pCmdQ->getCS(0); void *buffer = cmdStream.getCpuBase(); kernel.auxTranslationRequired = true; MockBuffer mockBuffer[2]; MultiDispatchInfo multiDispatchInfo; MemObjsForAuxTranslation memObjsForAuxTranslation; multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation); memObjsForAuxTranslation.insert(&mockBuffer[0]); memObjsForAuxTranslation.insert(&mockBuffer[1]); BuiltinOpParams builtinOpsParams; builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux; builder.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpsParams); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); auto sizeUsed = cmdStream.getUsed(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, buffer, sizeUsed)); auto pipeControls = findAll(cmdList.begin(), cmdList.end()); ASSERT_EQ(2u, pipeControls.size()); auto beginPipeControl = genCmdCast(*(pipeControls[0])); EXPECT_TRUE(beginPipeControl->getDcFlushEnable()); EXPECT_TRUE(beginPipeControl->getCommandStreamerStallEnable()); auto endPipeControl = genCmdCast(*(pipeControls[1])); bool dcFlushRequired = (pClDevice->getHardwareInfo().platform.eRenderCoreFamily == IGFX_GEN8_CORE); EXPECT_EQ(dcFlushRequired, endPipeControl->getDcFlushEnable()); EXPECT_TRUE(endPipeControl->getCommandStreamerStallEnable()); } HWTEST_F(DispatchWalkerTest, givenKernelWhenNonAuxToAuxWhenTranslationRequiredThenPipeControlWithStallAdded) { BuiltinDispatchInfoBuilder &baseBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pDevice); auto &builder = static_cast &>(baseBuilder); MockKernel kernel(program.get(), kernelInfo, *pClDevice); kernelInfo.workloadInfo.workDimOffset = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); auto &cmdStream = pCmdQ->getCS(0); void *buffer = cmdStream.getCpuBase(); kernel.auxTranslationRequired = true; MockBuffer mockBuffer[2]; MultiDispatchInfo multiDispatchInfo; MemObjsForAuxTranslation memObjsForAuxTranslation; multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation); memObjsForAuxTranslation.insert(&mockBuffer[0]); memObjsForAuxTranslation.insert(&mockBuffer[1]); BuiltinOpParams builtinOpsParams; builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux; builder.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpsParams); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); auto sizeUsed = cmdStream.getUsed(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, buffer, sizeUsed)); auto pipeControls = findAll(cmdList.begin(), cmdList.end()); ASSERT_EQ(2u, pipeControls.size()); bool dcFlushRequired = (pClDevice->getHardwareInfo().platform.eRenderCoreFamily == IGFX_GEN8_CORE); auto beginPipeControl = genCmdCast(*(pipeControls[0])); EXPECT_TRUE(beginPipeControl->getDcFlushEnable()); EXPECT_TRUE(beginPipeControl->getCommandStreamerStallEnable()); auto endPipeControl = genCmdCast(*(pipeControls[1])); EXPECT_EQ(dcFlushRequired, endPipeControl->getDcFlushEnable()); EXPECT_TRUE(endPipeControl->getCommandStreamerStallEnable()); } struct ProfilingCommandsTest : public DispatchWalkerTest, ::testing::WithParamInterface { void SetUp() override { DispatchWalkerTest::SetUp(); } void TearDown() override { DispatchWalkerTest::TearDown(); } }; HWTEST_P(ProfilingCommandsTest, givenKernelWhenProfilingCommandStartIsTakenThenTimeStampAddressIsProgrammedCorrectly) { using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; bool checkForStart = GetParam(); auto &cmdStream = pCmdQ->getCS(0); TagAllocator timeStampAllocator(pDevice->getRootDeviceIndex(), this->pDevice->getMemoryManager(), 10, MemoryConstants::cacheLineSize, sizeof(HwTimeStamps), false); auto hwTimeStamp1 = timeStampAllocator.getTag(); ASSERT_NE(nullptr, hwTimeStamp1); if (checkForStart) { GpgpuWalkerHelper::dispatchProfilingCommandsStart(*hwTimeStamp1, &cmdStream, pDevice->getHardwareInfo()); } else { GpgpuWalkerHelper::dispatchProfilingCommandsEnd(*hwTimeStamp1, &cmdStream); } auto hwTimeStamp2 = timeStampAllocator.getTag(); ASSERT_NE(nullptr, hwTimeStamp2); if (checkForStart) { GpgpuWalkerHelper::dispatchProfilingCommandsStart(*hwTimeStamp2, &cmdStream, pDevice->getHardwareInfo()); } else { GpgpuWalkerHelper::dispatchProfilingCommandsEnd(*hwTimeStamp2, &cmdStream); } GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream.getCpuBase(), cmdStream.getUsed())); auto itorStoreReg = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorStoreReg); auto storeReg = genCmdCast(*itorStoreReg); ASSERT_NE(nullptr, storeReg); uint64_t gpuAddress = storeReg->getMemoryAddress(); auto contextTimestampFieldOffset = checkForStart ? offsetof(HwTimeStamps, ContextStartTS) : offsetof(HwTimeStamps, ContextEndTS); uint64_t expectedAddress = hwTimeStamp1->getGpuAddress() + contextTimestampFieldOffset; EXPECT_EQ(expectedAddress, gpuAddress); itorStoreReg++; itorStoreReg = find(itorStoreReg, cmdList.end()); ASSERT_NE(cmdList.end(), itorStoreReg); storeReg = genCmdCast(*itorStoreReg); ASSERT_NE(nullptr, storeReg); gpuAddress = storeReg->getMemoryAddress(); expectedAddress = hwTimeStamp2->getGpuAddress() + contextTimestampFieldOffset; EXPECT_EQ(expectedAddress, gpuAddress); if (checkForStart) { auto itorPipeCtrl = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorPipeCtrl); if (HardwareCommandsHelper::isPipeControlWArequired(pDevice->getHardwareInfo())) { itorPipeCtrl++; } if (UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(pDevice->getHardwareInfo())) { itorPipeCtrl++; } auto pipeControl = genCmdCast(*itorPipeCtrl); ASSERT_NE(nullptr, pipeControl); gpuAddress = static_cast(pipeControl->getAddress()) | (static_cast(pipeControl->getAddressHigh()) << 32); expectedAddress = hwTimeStamp1->getGpuAddress() + offsetof(HwTimeStamps, GlobalStartTS); EXPECT_EQ(expectedAddress, gpuAddress); itorPipeCtrl++; itorPipeCtrl = find(itorPipeCtrl, cmdList.end()); if (HardwareCommandsHelper::isPipeControlWArequired(pDevice->getHardwareInfo())) { itorPipeCtrl++; } if (UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(pDevice->getHardwareInfo())) { itorPipeCtrl++; } ASSERT_NE(cmdList.end(), itorPipeCtrl); pipeControl = genCmdCast(*itorPipeCtrl); ASSERT_NE(nullptr, pipeControl); gpuAddress = static_cast(pipeControl->getAddress()) | static_cast(pipeControl->getAddressHigh()) << 32; expectedAddress = hwTimeStamp2->getGpuAddress() + offsetof(HwTimeStamps, GlobalStartTS); EXPECT_EQ(expectedAddress, gpuAddress); } } INSTANTIATE_TEST_CASE_P(StartEndFlag, ProfilingCommandsTest, ::testing::Bool()); compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_barrier_tests.cpp000066400000000000000000000237451363734646600315720ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/event/user_event.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/gen_common/gen_cmd_parse.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "test.h" using namespace NEO; using BarrierTest = Test; HWTEST_F(BarrierTest, givenCsrWithHigherLevelThenCommandQueueWhenEnqueueBarrierIsCalledThenCommandQueueAlignsToCsrWithoutSendingAnyCommands) { auto pCmdQ = this->pCmdQ; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // Set task levels to known values. uint32_t originalCSRLevel = 2; commandStreamReceiver.taskLevel = originalCSRLevel; pCmdQ->taskLevel = originalCSRLevel; uint32_t originalTaskCount = 15; commandStreamReceiver.taskCount = originalTaskCount; auto &csrCommandStream = commandStreamReceiver.commandStream; auto csrUsed = csrCommandStream.getUsed(); cl_uint numEventsInWaitList = 0; const cl_event *eventWaitList = nullptr; cl_event *event = nullptr; auto &commandStream = pCmdQ->getCS(0); auto used = commandStream.getUsed(); auto retVal = pCmdQ->enqueueBarrierWithWaitList( numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); // csr is untouched as we do not submit anything, cmd queue task level goes up as this is barrier call EXPECT_EQ(2u, commandStreamReceiver.peekTaskLevel()); EXPECT_EQ(3u, pCmdQ->taskLevel); //make sure nothing was added to CommandStream or CSR-CommandStream and command queue still uses this stream EXPECT_EQ(used, commandStream.getUsed()); EXPECT_EQ(&commandStream, &pCmdQ->getCS(0)); EXPECT_EQ(csrUsed, csrCommandStream.getUsed()); EXPECT_EQ(&csrCommandStream, &commandStreamReceiver.commandStream); } HWTEST_F(BarrierTest, GivenCsrTaskLevelGreaterThenCmdqTaskLevelWhenEnqueingBarrierWithWaitListThenAddPipeControlIsNotAdded) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; auto pCS = this->pCS; auto pCmdQ = this->pCmdQ; auto pCmdBuffer = this->pCmdBuffer; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.setMediaVFEStateDirty(false); // Set task levels to known values. commandStreamReceiver.taskLevel = 2; pCmdQ->taskLevel = 1; cl_uint numEventsInWaitList = 0; const cl_event *eventWaitList = nullptr; cl_event *event = nullptr; auto retVal = pCmdQ->enqueueBarrierWithWaitList( numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); // Should sync CSR & CmdQ levels. EXPECT_GE(commandStreamReceiver.peekTaskLevel(), pCmdQ->taskLevel); auto sizeUsed = pCS->getUsed(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, pCmdBuffer, sizeUsed)); // If CSR > CQ then a PC isn't required. auto itorCmd = find(cmdList.begin(), cmdList.end()); ASSERT_EQ(cmdList.end(), itorCmd); } HWTEST_F(BarrierTest, GivenEventWhenEnqueingBarrierWithWaitListThenEventIsSetupCorrectly) { auto pCmdQ = this->pCmdQ; cl_uint numEventsInWaitList = 0; const cl_event *eventWaitList = nullptr; cl_event event = nullptr; auto retVal = pCmdQ->enqueueBarrierWithWaitList( numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, event); // Check CL_EVENT_COMMAND_TYPE { auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_BARRIER), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } } HWTEST_F(BarrierTest, WhenEnqueingBarrierWithWaitListThenReturnedEventShouldHaveEqualDepth) { auto pCmdQ = this->pCmdQ; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // Set task levels to known values. commandStreamReceiver.taskLevel = 2; pCmdQ->taskLevel = 1; cl_uint numEventsInWaitList = 0; const cl_event *eventWaitList = nullptr; cl_event event = nullptr; auto retVal = pCmdQ->enqueueBarrierWithWaitList( numEventsInWaitList, eventWaitList, &event); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; // Should sync all 3 (CSR, CmdQ, Event) levels. EXPECT_GE(commandStreamReceiver.peekTaskLevel(), pEvent->taskLevel); EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel); delete pEvent; } HWTEST_F(BarrierTest, WhenEnqueingBarrierWithWaitListThenDependenciesShouldSync) { auto pCmdQ = this->pCmdQ; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // In N:1, CSR is always highest task level. commandStreamReceiver.taskLevel = 7; // In N:1, pCmdQ.level <= CSR.level pCmdQ->taskLevel = 7; // In N:1, event.level <= pCmdQ.level Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 5, 15); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 6, 16); Event event3(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 1, 17); cl_event eventWaitList[] = { &event1, &event2, &event3}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto retVal = pCmdQ->enqueueBarrierWithWaitList( numEventsInWaitList, eventWaitList, &event); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = castToObject(event); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); // in this case only cmdQ raises the taskLevel why csr stay intact EXPECT_EQ(8u, pCmdQ->taskLevel); if (csr.peekTimestampPacketWriteEnabled()) { EXPECT_EQ(pCmdQ->taskLevel + 1, commandStreamReceiver.peekTaskLevel()); } else { EXPECT_EQ(7u, commandStreamReceiver.peekTaskLevel()); } EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel); EXPECT_EQ(8u, pEvent->taskLevel); delete pEvent; } HWTEST_F(BarrierTest, givenNotBlockedCommandQueueAndEnqueueBarrierWithWaitlistReturningEventWhenCallIsMadeThenDontWaitUntilEventIsSignaled) { MockCommandQueueHw mockCmdQueue(context, pClDevice, nullptr); // In N:1, event.level <= pCmdQ.level Event event1(&mockCmdQueue, CL_COMMAND_NDRANGE_KERNEL, 5, 15); Event event2(&mockCmdQueue, CL_COMMAND_NDRANGE_KERNEL, 6, 16); Event event3(&mockCmdQueue, CL_COMMAND_NDRANGE_KERNEL, 1, 17); cl_event eventWaitList[] = { &event1, &event2, &event3}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto latestTaskCountWaitedBeforeEnqueue = mockCmdQueue.latestTaskCountWaited.load(); auto retVal = mockCmdQueue.enqueueBarrierWithWaitList( numEventsInWaitList, eventWaitList, &event); auto &csr = mockCmdQueue.getGpgpuCommandStreamReceiver(); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(latestTaskCountWaitedBeforeEnqueue, mockCmdQueue.latestTaskCountWaited); auto pEvent = castToObject(event); EXPECT_NE(nullptr, pEvent); if (csr.peekTimestampPacketWriteEnabled()) { EXPECT_EQ(csr.peekTaskCount(), pEvent->peekTaskCount()); } else { EXPECT_EQ(17u, pEvent->peekTaskCount()); } EXPECT_TRUE(pEvent->updateStatusAndCheckCompletion()); delete pEvent; } HWTEST_F(BarrierTest, givenBlockedCommandQueueAndEnqueueBarrierWithWaitlistReturningEventWhenCallIsMadeThenReturnEventIsNotSignaled) { UserEvent event2(&pCmdQ->getContext()); cl_event eventWaitList[] = { &event2, }; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto retVal = pCmdQ->enqueueBarrierWithWaitList( numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); auto pEvent = (Event *)event; EXPECT_EQ(pEvent->peekTaskCount(), CompletionStamp::levelNotReady); event2.setStatus(CL_COMPLETE); clReleaseEvent(event); } HWTEST_F(BarrierTest, givenEmptyCommandStreamAndBlockedBarrierCommandWhenUserEventIsSignaledThenNewCommandStreamIsNotAcquired) { UserEvent event2(&pCmdQ->getContext()); cl_event eventWaitList[] = { &event2, }; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto &commandStream = pCmdQ->getCS(0); auto commandStreamStart = commandStream.getUsed(); auto commandStreamBuffer = commandStream.getCpuBase(); auto retVal = pCmdQ->enqueueBarrierWithWaitList( numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); // Consume all memory except what is needed for this enqueue size_t barrierCmdStreamSize = NEO::EnqueueOperation::getSizeRequiredCS(CL_COMMAND_BARRIER, false, false, *pCmdQ, nullptr); commandStream.getSpace(commandStream.getMaxAvailableSpace() - barrierCmdStreamSize); //now trigger event event2.setStatus(CL_COMPLETE); auto commandStreamStart2 = commandStream.getUsed(); auto commandStreamBuffer2 = commandStream.getCpuBase(); EXPECT_EQ(0u, commandStreamStart); EXPECT_GT(commandStreamStart2, 0u); EXPECT_EQ(commandStreamBuffer2, commandStreamBuffer); EXPECT_GE(commandStream.getMaxAvailableSpace(), commandStream.getMaxAvailableSpace()); clReleaseEvent(event); } enqueue_command_without_kernel_tests.cpp000066400000000000000000000350111363734646600346130ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/os_interface/os_context.h" #include "opencl/source/event/event_builder.h" #include "opencl/source/event/user_event.h" #include "opencl/source/helpers/enqueue_properties.h" #include "opencl/test/unit_test/fixtures/dispatch_flags_fixture.h" #include "opencl/test/unit_test/fixtures/enqueue_handler_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "opencl/test/unit_test/mocks/mock_timestamp_container.h" #include "test.h" namespace NEO { template class MockCommandQueueWithCacheFlush : public MockCommandQueueHw { using MockCommandQueueHw::MockCommandQueueHw; public: bool isCacheFlushCommand(uint32_t commandType) const override { return commandRequireCacheFlush; } bool commandRequireCacheFlush = false; }; HWTEST_F(EnqueueHandlerTest, GivenCommandStreamWithoutKernelWhenCommandEnqueuedThenTaskCountIncreased) { std::unique_ptr> mockCmdQ(new MockCommandQueueHw(context, pClDevice, 0)); char buffer[64]; std::unique_ptr allocation(new MockGraphicsAllocation(buffer, sizeof(buffer))); std::unique_ptr surface(new GeneralSurface(allocation.get())); EventsRequest eventsRequest(0, nullptr, nullptr); EventBuilder eventBuilder; Surface *surfaces[] = {surface.get()}; auto blocking = true; TimestampPacketDependencies timestampPacketDependencies; EnqueueProperties enqueueProperties(false, false, false, true, nullptr); mockCmdQ->enqueueCommandWithoutKernel(surfaces, 1, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0); EXPECT_EQ(allocation->getTaskCount(mockCmdQ->getGpgpuCommandStreamReceiver().getOsContext().getContextId()), 1u); } HWTEST_F(EnqueueHandlerTest, givenNonBlitPropertyWhenEnqueueIsBlockedThenDontRegisterBlitProperties) { std::unique_ptr> mockCmdQ(new MockCommandQueueHw(context, pClDevice, 0)); auto &csr = mockCmdQ->getGpgpuCommandStreamReceiver(); auto commandStream = new LinearStream(); csr.ensureCommandBufferAllocation(*commandStream, 1, 1); auto blockedCommandsDataForDependencyFlush = new KernelOperation(commandStream, *csr.getInternalAllocationStorage()); TimestampPacketDependencies timestampPacketDependencies; MultiDispatchInfo multiDispatchInfo; EventsRequest eventsRequest(0, nullptr, nullptr); EventBuilder eventBuilder; const EnqueueProperties enqueuePropertiesForDependencyFlush(false, false, false, true, nullptr); auto blockedCommandsData = std::unique_ptr(blockedCommandsDataForDependencyFlush); Surface *surfaces[] = {nullptr}; mockCmdQ->enqueueBlocked(CL_COMMAND_MARKER, surfaces, size_t(0), multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueuePropertiesForDependencyFlush, eventsRequest, eventBuilder, std::unique_ptr(nullptr)); EXPECT_FALSE(blockedCommandsDataForDependencyFlush->blitEnqueue); } HWTEST_F(EnqueueHandlerTest, givenBlitPropertyWhenEnqueueIsBlockedThenRegisterBlitProperties) { std::unique_ptr> mockCmdQ(new MockCommandQueueHw(context, pClDevice, 0)); auto &csr = mockCmdQ->getGpgpuCommandStreamReceiver(); auto commandStream = new LinearStream(); csr.ensureCommandBufferAllocation(*commandStream, 1, 1); auto blockedCommandsDataForBlitEnqueue = new KernelOperation(commandStream, *csr.getInternalAllocationStorage()); TimestampPacketDependencies timestampPacketDependencies; MultiDispatchInfo multiDispatchInfo; EventsRequest eventsRequest(0, nullptr, nullptr); EventBuilder eventBuilder; BlitProperties blitProperties; blitProperties.srcAllocation = reinterpret_cast(0x12345); blitProperties.dstAllocation = reinterpret_cast(0x56789); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); const EnqueueProperties enqueuePropertiesForBlitEnqueue(true, false, false, false, &blitPropertiesContainer); auto blockedCommandsData = std::unique_ptr(blockedCommandsDataForBlitEnqueue); Surface *surfaces[] = {nullptr}; mockCmdQ->enqueueBlocked(CL_COMMAND_READ_BUFFER, surfaces, size_t(0), multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueuePropertiesForBlitEnqueue, eventsRequest, eventBuilder, std::unique_ptr(nullptr)); EXPECT_TRUE(blockedCommandsDataForBlitEnqueue->blitEnqueue); EXPECT_EQ(blitProperties.srcAllocation, blockedCommandsDataForBlitEnqueue->blitPropertiesContainer.begin()->srcAllocation); EXPECT_EQ(blitProperties.dstAllocation, blockedCommandsDataForBlitEnqueue->blitPropertiesContainer.begin()->dstAllocation); } HWTEST_F(DispatchFlagsTests, whenEnqueueCommandWithoutKernelThenPassCorrectDispatchFlags) { using CsrType = MockCsrHw2; SetUpImpl(); auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto mockCsr = static_cast(&mockCmdQ->getGpgpuCommandStreamReceiver()); auto blocking = true; TimestampPacketDependencies timestampPacketDependencies; EventsRequest eventsRequest(0, nullptr, nullptr); EventBuilder eventBuilder; EnqueueProperties enqueueProperties(false, false, false, true, nullptr); mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0); EXPECT_EQ(blocking, mockCsr->passedDispatchFlags.blocking); EXPECT_FALSE(mockCsr->passedDispatchFlags.implicitFlush); EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl); EXPECT_EQ(device->getPreemptionMode(), mockCsr->passedDispatchFlags.preemptionMode); EXPECT_EQ(mockCmdQ->flushStamp->getStampReference(), mockCsr->passedDispatchFlags.flushStampReference); } HWTEST_F(DispatchFlagsTests, whenEnqueueCommandWithoutKernelThenPassCorrectThrottleHint) { using CsrType = MockCsrHw2; SetUpImpl(); auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); mockCmdQ->throttle = QueueThrottle::HIGH; auto mockCsr = static_cast(&mockCmdQ->getGpgpuCommandStreamReceiver()); TimestampPacketDependencies timestampPacketDependencies; EventsRequest eventsRequest(0, nullptr, nullptr); EventBuilder eventBuilder; EnqueueProperties enqueueProperties(false, false, false, true, nullptr); bool blocking = true; mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0); EXPECT_EQ(mockCmdQ->throttle, mockCsr->passedDispatchFlags.throttle); } HWTEST_F(DispatchFlagsTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKernelThenDoImplicitFlush) { using CsrType = MockCsrHw2; DebugManager.flags.EnableTimestampPacket.set(1); SetUpImpl(); auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto mockCsr = static_cast(&mockCmdQ->getGpgpuCommandStreamReceiver()); mockCsr->skipBlitCalls = true; mockCmdQ->bcsEngine = mockCmdQ->gpgpuEngine; cl_int retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create(context.get(), 0, 1, nullptr, retVal)); auto blocking = true; TimestampPacketDependencies timestampPacketDependencies; EventsRequest eventsRequest(0, nullptr, nullptr); EventBuilder eventBuilder; BuiltinOpParams builtinOpParams; builtinOpParams.srcMemObj = buffer.get(); builtinOpParams.dstPtr = reinterpret_cast(0x1234); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.setBuiltinOpParams(builtinOpParams); mockCmdQ->obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, true); timestampPacketDependencies.cacheFlushNodes.add(mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag()); BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(multiDispatchInfo, timestampPacketDependencies, eventsRequest, mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); EnqueueProperties enqueueProperties(true, false, false, false, &blitPropertiesContainer); mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0); EXPECT_TRUE(mockCsr->passedDispatchFlags.implicitFlush); EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl); } HWTEST_F(DispatchFlagsTests, givenN1EnabledWhenDispatchingWithoutKernelTheAllowOutOfOrderExecution) { using CsrType = MockCsrHw2; DebugManager.flags.EnableTimestampPacket.set(1); SetUpImpl(); auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto mockCsr = static_cast(&mockCmdQ->getGpgpuCommandStreamReceiver()); mockCsr->skipBlitCalls = true; mockCmdQ->bcsEngine = mockCmdQ->gpgpuEngine; cl_int retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create(context.get(), 0, 1, nullptr, retVal)); TimestampPacketDependencies timestampPacketDependencies; EventsRequest eventsRequest(0, nullptr, nullptr); EventBuilder eventBuilder; bool blocked = false; BuiltinOpParams builtinOpParams; builtinOpParams.srcMemObj = buffer.get(); builtinOpParams.dstPtr = reinterpret_cast(0x1234); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.setBuiltinOpParams(builtinOpParams); mockCmdQ->obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, true); timestampPacketDependencies.cacheFlushNodes.add(mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag()); BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(multiDispatchInfo, timestampPacketDependencies, eventsRequest, mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); EnqueueProperties enqueueProperties(true, false, false, false, &blitPropertiesContainer); mockCsr->nTo1SubmissionModelEnabled = false; mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocked, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0); EXPECT_FALSE(mockCsr->passedDispatchFlags.outOfOrderExecutionAllowed); mockCsr->nTo1SubmissionModelEnabled = true; mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocked, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0); EXPECT_TRUE(mockCsr->passedDispatchFlags.outOfOrderExecutionAllowed); } HWTEST_F(EnqueueHandlerTest, GivenCommandStreamWithoutKernelAndZeroSurfacesWhenEnqueuedHandlerThenProgramPipeControl) { std::unique_ptr> mockCmdQ(new MockCommandQueueWithCacheFlush(context, pClDevice, 0)); mockCmdQ->commandRequireCacheFlush = true; MultiDispatchInfo multiDispatch; mockCmdQ->template enqueueHandler(nullptr, 0, false, multiDispatch, 0, nullptr, nullptr); auto requiredCmdStreamSize = alignUp(MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation( pDevice->getHardwareInfo()), MemoryConstants::cacheLineSize); EXPECT_EQ(mockCmdQ->getCS(0).getUsed(), requiredCmdStreamSize); } HWTEST_F(EnqueueHandlerTest, givenTimestampPacketWriteEnabledAndCommandWithCacheFlushWhenEnqueueingHandlerThenObtainNewStamp) { auto &csr = pDevice->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; auto mockTagAllocator = new MockTagAllocator<>(csr.rootDeviceIndex, pDevice->getMemoryManager()); csr.timestampPacketAllocator.reset(mockTagAllocator); std::unique_ptr> mockCmdQ(new MockCommandQueueWithCacheFlush(context, pClDevice, 0)); mockCmdQ->commandRequireCacheFlush = true; cl_event event; MultiDispatchInfo multiDispatch; mockCmdQ->template enqueueHandler(nullptr, 0, false, multiDispatch, 0, nullptr, &event); auto node1 = mockCmdQ->timestampPacketContainer->peekNodes().at(0); EXPECT_NE(nullptr, node1); clReleaseEvent(event); } HWTEST_F(EnqueueHandlerTest, givenTimestampPacketWriteDisabledAndCommandWithCacheFlushWhenEnqueueingHandlerThenTimeStampContainerIsNotCreated) { auto &csr = pDevice->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = false; auto mockTagAllocator = new MockTagAllocator<>(pDevice->getRootDeviceIndex(), pDevice->getMemoryManager()); csr.timestampPacketAllocator.reset(mockTagAllocator); std::unique_ptr> mockCmdQ(new MockCommandQueueWithCacheFlush(context, pClDevice, 0)); mockCmdQ->commandRequireCacheFlush = true; cl_event event; MultiDispatchInfo multiDispatch; mockCmdQ->template enqueueHandler(nullptr, 0, false, multiDispatch, 0, nullptr, &event); auto container = mockCmdQ->timestampPacketContainer.get(); EXPECT_EQ(nullptr, container); clReleaseEvent(event); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_copy_buffer_event_tests.cpp000066400000000000000000000053471363734646600336460ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/built_in_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "gtest/gtest.h" #include using namespace NEO; typedef HelloWorldTest EnqueueCopyBuffer; TEST_F(EnqueueCopyBuffer, WhenEnqueingCopyBufferThenEventHasCorrectCommandType) { cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); auto dstBuffer = std::unique_ptr(BufferHelper<>::create()); auto retVal = pCmdQ->enqueueCopyBuffer( srcBuffer.get(), dstBuffer.get(), 0, 0, srcBuffer->getSize(), numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel); // Check CL_EVENT_COMMAND_TYPE { cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_COPY_BUFFER), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); } delete pEvent; } TEST_F(EnqueueCopyBuffer, GivenMultipleEventsWhenEnqueingCopyBufferThenReturnedEventShouldBeMaxOfInputEventsAndCmdQPlus1) { uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto retVal = CL_INVALID_VALUE; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); auto dstBuffer = std::unique_ptr(BufferHelper<>::create()); retVal = pCmdQ->enqueueCopyBuffer( srcBuffer.get(), dstBuffer.get(), 0, 0, srcBuffer->getSize(), numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u + 1u, pEvent->taskLevel); delete pEvent; } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_copy_buffer_fixture.h000066400000000000000000000042751363734646600324350ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/ptr_math.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/gen_common/gen_cmd_parse.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" namespace NEO { struct EnqueueCopyBufferHelper { cl_int enqueueCopyBuffer( CommandQueue *pCmdQ, Buffer *srcBuffer, Buffer *dstBuffer, size_t srcOffset, size_t dstOffset, size_t size, cl_uint numEventsInWaitList = 0, cl_event *eventWaitList = nullptr, cl_event *event = nullptr) { cl_int retVal = pCmdQ->enqueueCopyBuffer( srcBuffer, dstBuffer, srcOffset, dstOffset, size, numEventsInWaitList, eventWaitList, event); return retVal; } }; struct EnqueueCopyBufferTest : public CommandEnqueueFixture, public EnqueueCopyBufferHelper, public ::testing::Test { void SetUp(void) override { CommandEnqueueFixture::SetUp(); BufferDefaults::context = new MockContext; srcBuffer = BufferHelper<>::create(); dstBuffer = BufferHelper<>::create(); } void TearDown(void) override { delete srcBuffer; delete dstBuffer; delete BufferDefaults::context; CommandEnqueueFixture::TearDown(); } protected: void enqueueCopyBuffer() { auto retVal = EnqueueCopyBufferHelper::enqueueCopyBuffer( pCmdQ, srcBuffer, dstBuffer, 0, 0, sizeof(float), 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } template void enqueueCopyBufferAndParse() { enqueueCopyBuffer(); parseCommands(*pCmdQ); } MockContext context; Buffer *srcBuffer = nullptr; Buffer *dstBuffer = nullptr; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_copy_buffer_rect_fixture.h000066400000000000000000000072271363734646600334520ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/ptr_math.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/gen_common/gen_cmd_parse.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" namespace NEO { struct EnqueueCopyBufferRectHelper { cl_int enqueueCopyBufferRect( CommandQueue *pCmdQ, Buffer *srcBuffer, Buffer *dstBuffer, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { cl_int retVal = pCmdQ->enqueueCopyBufferRect( srcBuffer, dstBuffer, srcOrigin, dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList, eventWaitList, event); return retVal; } }; struct EnqueueCopyBufferRectTest : public CommandEnqueueFixture, public EnqueueCopyBufferRectHelper, public ::testing::Test { struct BufferRect : public BufferDefaults { static const size_t sizeInBytes; }; void SetUp(void) override { CommandEnqueueFixture::SetUp(); BufferDefaults::context = new MockContext; srcBuffer = BufferHelper::create(); dstBuffer = BufferHelper::create(); } void TearDown(void) override { delete srcBuffer; delete dstBuffer; delete BufferDefaults::context; CommandEnqueueFixture::TearDown(); } protected: template void enqueueCopyBufferRect2D() { typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; auto retVal = EnqueueCopyBufferRectHelper::enqueueCopyBufferRect( pCmdQ, srcBuffer, dstBuffer, srcOrigin, dstOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } template void enqueueCopyBufferRect3D() { typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 50}; auto retVal = EnqueueCopyBufferRectHelper::enqueueCopyBufferRect( pCmdQ, srcBuffer, dstBuffer, srcOrigin, dstOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } Buffer *srcBuffer = nullptr; Buffer *dstBuffer = nullptr; static const size_t rowPitch = 100; static const size_t slicePitch = 100 * 100; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_copy_buffer_rect_tests.cpp000066400000000000000000000437021363734646600334570ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/memory_manager/memory_constants.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/test/unit_test/command_queue/enqueue_copy_buffer_rect_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "test.h" #include "reg_configs_common.h" using namespace NEO; const size_t EnqueueCopyBufferRectTest::BufferRect::sizeInBytes = 100 * 100 * 100 * sizeof(cl_char); HWTEST_F(EnqueueCopyBufferRectTest, GivenNullSrcMemObjWhenCopyingBufferRectThenClInvalidMemObjectErrorIsReturned) { auto retVal = CL_SUCCESS; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 0}; retVal = clEnqueueCopyBufferRect( pCmdQ, nullptr, dstBuffer, srcOrigin, dstOrigin, region, 10, 0, 10, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } HWTEST_F(EnqueueCopyBufferRectTest, GivenNullDstMemObjWhenCopyingBufferRectThenClInvalidMemObjectErrorIsReturned) { auto retVal = CL_SUCCESS; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 0}; retVal = clEnqueueCopyBufferRect( pCmdQ, srcBuffer, nullptr, srcOrigin, dstOrigin, region, 10, 0, 10, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } HWTEST_F(EnqueueCopyBufferRectTest, GivenValidParametersWhenCopyingBufferRectThenSuccessIsReturned) { auto retVal = CL_SUCCESS; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; retVal = clEnqueueCopyBufferRect( pCmdQ, srcBuffer, dstBuffer, srcOrigin, dstOrigin, region, 10, 0, 10, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenTaskCountIsAlignedWithCsr) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; enqueueCopyBufferRect2D(); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenGpgpuWalkerIsCorrect) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; enqueueCopyBufferRect2D(); auto *cmd = (GPGPU_WALKER *)cmdWalker; ASSERT_NE(nullptr, cmd); // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_EQ(1u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_EQ(0u, cmd->getIndirectDataLength() % GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; enqueueCopyBufferRect2D(); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); enqueueCopyBufferRect2D(); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenIndirectDataGetsAdded) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); enqueueCopyBufferRect2D(); // Extract the kernel used MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferRect, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcBuffer; dc.dstMemObj = dstBuffer; dc.srcOffset = {0, 0, 0}; dc.dstOffset = {0, 0, 0}; dc.size = {50, 50, 1}; dc.srcRowPitch = rowPitch; dc.srcSlicePitch = slicePitch; dc.dstRowPitch = rowPitch; dc.dstSlicePitch = slicePitch; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); EXPECT_NE(dshBefore, pDSH->getUsed()); EXPECT_NE(iohBefore, pIOH->getUsed()); if (kernel->requiresSshForBuffers()) { EXPECT_NE(sshBefore, pSSH->getUsed()); } } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRectStatelessThenStatelessKernelIsUsed) { auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferRectStateless, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcBuffer; dc.dstMemObj = dstBuffer; dc.srcOffset = {0, 0, 0}; dc.dstOffset = {0, 0, 0}; dc.size = {50, 50, 1}; dc.srcRowPitch = rowPitch; dc.srcSlicePitch = slicePitch; dc.dstRowPitch = rowPitch; dc.dstSlicePitch = slicePitch; MultiDispatchInfo multiDispatchInfo; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); EXPECT_TRUE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenL3ProgrammingIsCorrect) { enqueueCopyBufferRect2D(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, When2DEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueCopyBufferRect2D(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenMediaInterfaceDescriptorLoadIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyBufferRect2D(); auto *cmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)cmdMediaInterfaceDescriptorLoad; ASSERT_NE(nullptr, cmd); // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorMediaInterfaceDescriptorLoad); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyBufferRect2D(); auto *cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress; auto &IDD = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, IDD.getConstantIndirectUrbEntryReadLength()); } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenNumberOfPipelineSelectsIsOne) { enqueueCopyBufferRect2D(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenMediaVfeStateIsSetCorrectly) { enqueueCopyBufferRect2D(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenGpgpuWalkerIsCorrect) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; enqueueCopyBufferRect3D(); auto *cmd = (GPGPU_WALKER *)cmdWalker; ASSERT_NE(nullptr, cmd); // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_LT(1u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_EQ(0u, cmd->getIndirectDataLength() % GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; enqueueCopyBufferRect3D(); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); enqueueCopyBufferRect3D(); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenIndirectDataIsAdded) { auto usedIndirectHeapBefore = pDSH->getUsed(); enqueueCopyBufferRect3D(); EXPECT_NE(usedIndirectHeapBefore, pDSH->getUsed()); } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenL3ProgrammingIsCorrect) { enqueueCopyBufferRect3D(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, When3DEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueCopyBufferRect3D(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenMediaInterfaceDescriptorLoadIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyBufferRect3D(); auto *cmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)cmdMediaInterfaceDescriptorLoad; ASSERT_NE(nullptr, cmd); // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorMediaInterfaceDescriptorLoad); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyBufferRect3D(); auto *cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress; auto &IDD = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, IDD.getConstantIndirectUrbEntryReadLength()); } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenNumberOfPipelineSelectsIsOne) { enqueueCopyBufferRect3D(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferRectTest, WhenCopyingBufferRect3DThenMediaVfeStateIsSetCorrectly) { enqueueCopyBufferRect3D(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } struct EnqueueCopyBufferRectHw : public ::testing::Test { void SetUp() override { if (is32bit) { GTEST_SKIP(); } device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context.reset(new MockContext(device.get())); dstBuffer = std::unique_ptr(BufferHelper::create(context.get())); } std::unique_ptr device; std::unique_ptr context; MockBuffer srcBuffer; std::unique_ptr dstBuffer; const size_t rowPitch = 100; const size_t slicePitch = 100 * 100; std::array srcOrigin = {{0, 0, 0}}; std::array dstOrigin = {{0, 0, 0}}; std::array region = {{50, 50, 1}}; uint64_t bigSize = 4ull * MemoryConstants::gigaByte; uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1; protected: template cl_int enqueueCopyBufferRectHw(CommandQueueHw *cmdQ) { auto retVal = CL_SUCCESS; retVal = clEnqueueCopyBufferRect( cmdQ, &srcBuffer, dstBuffer.get(), srcOrigin.data(), dstOrigin.data(), region.data(), rowPitch, slicePitch, rowPitch, slicePitch, 0, nullptr, nullptr); return retVal; } }; using EnqueueCopyBufferRectStateless = EnqueueCopyBufferRectHw; HWTEST_F(EnqueueCopyBufferRectStateless, GivenValidParametersWhenCopyingBufferRectStatelessThenSuccessIsReturned) { std::unique_ptr> cmdQ(new CommandQueueStateless(context.get(), device.get())); srcBuffer.size = static_cast(bigSize); auto retVal = enqueueCopyBufferRectHw(cmdQ.get()); EXPECT_EQ(CL_SUCCESS, retVal); } using EnqueueCopyBufferRectStateful = EnqueueCopyBufferRectHw; HWTEST_F(EnqueueCopyBufferRectStateful, GivenValidParametersWhenCopyingBufferRectStatefulThenSuccessIsReturned) { std::unique_ptr> cmdQ(new CommandQueueStateful(context.get(), device.get())); srcBuffer.size = static_cast(smallSize); auto retVal = enqueueCopyBufferRectHw(cmdQ.get()); EXPECT_EQ(CL_SUCCESS, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_copy_buffer_tests.cpp000066400000000000000000000333121363734646600324360ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/helpers/ptr_math.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/command_queue/enqueue_copy_buffer_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "test.h" #include "reg_configs_common.h" #include using namespace NEO; HWTEST_F(EnqueueCopyBufferTest, GivenNullSrcMemObjWhenCopyingBufferThenClInvalidMemObjectErrorIsReturned) { auto dstBuffer = std::unique_ptr(BufferHelper<>::create()); auto retVal = clEnqueueCopyBuffer( pCmdQ, nullptr, dstBuffer.get(), 0, 0, sizeof(float), 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } HWTEST_F(EnqueueCopyBufferTest, GivenNullDstMemObjWhenCopyingBufferThenClInvalidMemObjectErrorIsReturned) { auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); auto retVal = clEnqueueCopyBuffer( pCmdQ, srcBuffer.get(), nullptr, 0, 0, sizeof(float), 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } HWTEST_F(EnqueueCopyBufferTest, GivenInvalidMemoryLocationWhenCopyingBufferThenClInvalidValueErrorIsReturned) { auto retVal = clEnqueueCopyBuffer( pCmdQ, srcBuffer, dstBuffer, 0, 8, 128, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenTaskCountIsAlignedWithCsr) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; enqueueCopyBuffer(); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferTest, WhenCopyingBufferThenGpgpuWalkerIsCorrect) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; enqueueCopyBufferAndParse(); auto *cmd = (GPGPU_WALKER *)cmdWalker; ASSERT_NE(nullptr, cmd); // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; enqueueCopyBuffer(); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); enqueueCopyBuffer(); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenIndirectDataGetsAdded) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); enqueueCopyBuffer(); MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcBuffer; dc.dstMemObj = dstBuffer; dc.srcOffset = {EnqueueCopyBufferTraits::srcOffset, 0, 0}; dc.dstOffset = {EnqueueCopyBufferTraits::dstOffset, 0, 0}; dc.size = {EnqueueCopyBufferTraits::size, 0, 0}; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernel)); EXPECT_NE(iohBefore, pIOH->getUsed()); if (kernel->requiresSshForBuffers()) { EXPECT_NE(sshBefore, pSSH->getUsed()); } } HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferStatelessThenStatelessKernelIsUsed) { auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); auto dstBuffer = std::unique_ptr(BufferHelper<>::create()); MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBufferStateless, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcBuffer.get(); dc.dstMemObj = dstBuffer.get(); dc.srcOffset = {EnqueueCopyBufferTraits::srcOffset, 0, 0}; dc.dstOffset = {EnqueueCopyBufferTraits::dstOffset, 0, 0}; dc.size = {EnqueueCopyBufferTraits::size, 0, 0}; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); EXPECT_TRUE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); } HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenL3ProgrammingIsCorrect) { enqueueCopyBufferAndParse(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferTest, WhenEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueCopyBufferAndParse(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferTest, WhenCopyingBufferThenMediaInterfaceDescriptorLoadIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyBufferAndParse(); auto *cmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)cmdMediaInterfaceDescriptorLoad; ASSERT_NE(nullptr, cmd); // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorMediaInterfaceDescriptorLoad); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferTest, WhenCopyingBufferThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyBufferAndParse(); auto cmdIDD = (INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; auto cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)cmdIDD->getKernelStartPointerHigh() << 32) + cmdIDD->getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); EXPECT_NE(0u, cmdIDD->getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, cmdIDD->getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, cmdIDD->getConstantIndirectUrbEntryReadLength()); } HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenNumberOfPipelineSelectsIsOne) { enqueueCopyBufferAndParse(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferTest, WhenCopyingBufferThenMediaVfeStateIsSetCorrectly) { enqueueCopyBufferAndParse(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenArgumentZeroMatchesSourceAddress) { enqueueCopyBufferAndParse(); // Extract the kernel used MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcBuffer; dc.dstMemObj = dstBuffer; dc.srcOffset = {EnqueueCopyBufferTraits::srcOffset, 0, 0}; dc.dstOffset = {EnqueueCopyBufferTraits::dstOffset, 0, 0}; dc.size = {EnqueueCopyBufferTraits::size, 0, 0}; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); // Determine where the argument is auto pArgument = (void **)getStatelessArgumentPointer(*kernel, 0u, pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0)); EXPECT_EQ((void *)((uintptr_t)srcBuffer->getGraphicsAllocation()->getGpuAddress()), *pArgument); } HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenArgumentOneMatchesDestinationAddress) { enqueueCopyBufferAndParse(); // Extract the kernel used MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcBuffer; dc.dstMemObj = dstBuffer; dc.srcOffset = {EnqueueCopyBufferTraits::srcOffset, 0, 0}; dc.dstOffset = {EnqueueCopyBufferTraits::dstOffset, 0, 0}; dc.size = {EnqueueCopyBufferTraits::size, 0, 0}; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); // Determine where the argument is auto pArgument = (void **)getStatelessArgumentPointer(*kernel, 1u, pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0)); EXPECT_EQ((void *)((uintptr_t)dstBuffer->getGraphicsAllocation()->getGpuAddress()), *pArgument); } struct EnqueueCopyBufferHw : public ::testing::Test { void SetUp() override { if (is32bit) { GTEST_SKIP(); } device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context.reset(new MockContext(device.get())); dstBuffer = std::unique_ptr(BufferHelper<>::create(context.get())); } std::unique_ptr device; std::unique_ptr context; std::unique_ptr dstBuffer; MockBuffer srcBuffer; uint64_t bigSize = 4ull * MemoryConstants::gigaByte; uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1; }; using EnqueueCopyBufferStatelessTest = EnqueueCopyBufferHw; HWTEST_F(EnqueueCopyBufferStatelessTest, givenBuffersWhenCopyingBufferStatelessThenSuccessIsReturned) { auto cmdQ = std::make_unique>(context.get(), device.get()); srcBuffer.size = static_cast(bigSize); auto retVal = cmdQ->enqueueCopyBuffer( &srcBuffer, dstBuffer.get(), 0, 0, sizeof(float), 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } using EnqueueCopyBufferStatefulTest = EnqueueCopyBufferHw; HWTEST_F(EnqueueCopyBufferStatefulTest, givenBuffersWhenCopyingBufferStatefulThenSuccessIsReturned) { auto cmdQ = std::make_unique>(context.get(), device.get()); srcBuffer.size = static_cast(smallSize); auto retVal = cmdQ->enqueueCopyBuffer( &srcBuffer, dstBuffer.get(), 0, 0, sizeof(float), 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } enqueue_copy_buffer_to_image_fixture.h000066400000000000000000000042731363734646600342200ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/ptr_math.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "gtest/gtest.h" namespace NEO { struct EnqueueCopyBufferToImageTest : public CommandEnqueueFixture, public ::testing::Test { void SetUp() override { CommandEnqueueFixture::SetUp(); BufferDefaults::context = new MockContext(pClDevice); context = new MockContext(pClDevice); srcBuffer = BufferHelper<>::create(context); dstImage = Image2dHelper<>::create(context); } void TearDown() override { delete dstImage; delete srcBuffer; delete BufferDefaults::context; delete context; CommandEnqueueFixture::TearDown(); } protected: template void enqueueCopyBufferToImage() { auto retVal = EnqueueCopyBufferToImageHelper<>::enqueueCopyBufferToImage( pCmdQ, srcBuffer, dstImage); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } MockContext *context = nullptr; Buffer *srcBuffer = nullptr; Image *dstImage = nullptr; }; struct EnqueueCopyBufferToImageMipMapTest : public CommandEnqueueFixture, public ::testing::Test, public ::testing::WithParamInterface { void SetUp(void) override { CommandEnqueueFixture::SetUp(); BufferDefaults::context = new MockContext(pClDevice); context = new MockContext(pClDevice); srcBuffer = BufferHelper<>::create(context); } void TearDown(void) override { delete srcBuffer; delete BufferDefaults::context; delete context; CommandEnqueueFixture::TearDown(); } MockContext *context = nullptr; Buffer *srcBuffer = nullptr; }; } // namespace NEO enqueue_copy_buffer_to_image_tests.cpp000066400000000000000000000355141363734646600342310ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/test/unit_test/command_queue/enqueue_copy_buffer_to_image_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_builtin_dispatch_info_builder.h" #include "opencl/test/unit_test/mocks/mock_builtins.h" #include "test.h" #include "reg_configs_common.h" #include using namespace NEO; HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenGpgpuWalkerIsCorrect) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; enqueueCopyBufferToImage(); auto *cmd = reinterpret_cast(cmdWalker); ASSERT_NE(nullptr, cmd); // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenTaskCountIsAlignedWithCsr) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; EnqueueCopyBufferToImageHelper<>::enqueueCopyBufferToImage(pCmdQ, srcBuffer, dstImage); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWTEST_F(EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; EnqueueCopyBufferToImageHelper<>::enqueueCopyBufferToImage(pCmdQ, srcBuffer, dstImage); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); EnqueueCopyBufferToImageHelper<>::enqueueCopyBufferToImage(pCmdQ, srcBuffer, dstImage); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWTEST_F(EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenIndirectDataGetsAdded) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); EnqueueCopyBufferToImageHelper<>::enqueueCopyBufferToImage(pCmdQ, srcBuffer, dstImage); EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr)); EXPECT_NE(iohBefore, pIOH->getUsed()); EXPECT_NE(sshBefore, pSSH->getUsed()); } HWTEST_F(EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenL3ProgrammingIsCorrect) { enqueueCopyBufferToImage(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferToImageTest, WhenEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueCopyBufferToImage(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenMediaInterfaceDescriptorLoadIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyBufferToImage(); // All state should be programmed before walker auto cmd = reinterpret_cast(cmdMediaInterfaceDescriptorLoad); ASSERT_NE(nullptr, cmd); // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorMediaInterfaceDescriptorLoad); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyBufferToImage(); // Extract the interfaceDescriptorData auto cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress; auto &interfaceDescriptorData = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)interfaceDescriptorData.getKernelStartPointerHigh() << 32) + interfaceDescriptorData.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); size_t maxLocalSize = 256u; auto localWorkSize = std::min( maxLocalSize, Image2dDefaults::imageDesc.image_width * Image2dDefaults::imageDesc.image_height); auto simd = 32u; auto threadsPerThreadGroup = Math::divideAndRoundUp(localWorkSize, simd); EXPECT_EQ(threadsPerThreadGroup, interfaceDescriptorData.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, interfaceDescriptorData.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, interfaceDescriptorData.getConstantIndirectUrbEntryReadLength()); // We shouldn't have these pointers the same. EXPECT_NE(kernelStartPointer, interfaceDescriptorData.getBindingTablePointer()); } HWTEST_F(EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenSurfaceStateIsCorrect) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; enqueueCopyBufferToImage(); const auto &surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), 1); const auto &imageDesc = dstImage->getImageDesc(); // EnqueueReadImage uses multi-byte copies depending on per-pixel-size-in-bytes EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth()); EXPECT_EQ(imageDesc.image_height, surfaceState.getHeight()); EXPECT_NE(0u, surfaceState.getSurfacePitch()); EXPECT_NE(0u, surfaceState.getSurfaceType()); auto surfaceFormat = surfaceState.getSurfaceFormat(); bool isRedescribedFormat = surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32G32B32A32_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32G32_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R16_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R8_UINT; EXPECT_TRUE(isRedescribedFormat); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_4, surfaceState.getSurfaceHorizontalAlignment()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState.getSurfaceVerticalAlignment()); EXPECT_EQ(dstImage->getGraphicsAllocation()->getGpuAddress(), surfaceState.getSurfaceBaseAddress()); } HWTEST_F(EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenNumberOfPipelineSelectsIsOne) { enqueueCopyBufferToImage(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyBufferToImageTest, WhenCopyingBufferToImageThenMediaVfeStateIsSetCorrectly) { enqueueCopyBufferToImage(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } typedef EnqueueCopyBufferToImageMipMapTest MipMapCopyBufferToImageTest; HWTEST_P(MipMapCopyBufferToImageTest, GivenImageWithMipLevelNonZeroWhenCopyBufferToImageIsCalledThenProperMipLevelIsSet) { auto image_type = (cl_mem_object_type)GetParam(); auto builtIns = new MockBuiltins(); pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()]->builtins.reset(builtIns); auto &origBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( EBuiltInOps::CopyBufferToImage3d, pCmdQ->getDevice()); // substitute original builder with mock builder auto oldBuilder = builtIns->setBuiltinDispatchInfoBuilder( EBuiltInOps::CopyBufferToImage3d, pCmdQ->getContext(), pCmdQ->getDevice(), std::unique_ptr(new MockBuiltinDispatchInfoBuilder(*builtIns, &origBuilder))); cl_int retVal = CL_SUCCESS; cl_image_desc imageDesc = {}; uint32_t expectedMipLevel = 3; imageDesc.image_type = image_type; imageDesc.num_mip_levels = 10; imageDesc.image_width = 4; imageDesc.image_height = 1; imageDesc.image_depth = 1; size_t origin[] = {0, 0, 0, 0}; size_t region[] = {imageDesc.image_width, 1, 1}; std::unique_ptr image; switch (image_type) { case CL_MEM_OBJECT_IMAGE1D: origin[1] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: imageDesc.image_array_size = 2; origin[2] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE2D: origin[2] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: imageDesc.image_array_size = 2; origin[3] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE3D: origin[3] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; } EXPECT_NE(nullptr, image.get()); std::unique_ptr ptr = std::unique_ptr(new uint32_t[3]); retVal = pCmdQ->enqueueCopyBufferToImage(srcBuffer, image.get(), 0, origin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto &mockBuilder = static_cast(BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToImage3d, pCmdQ->getDevice())); auto params = mockBuilder.getBuiltinOpParams(); EXPECT_EQ(expectedMipLevel, params->dstMipLevel); // restore original builder and retrieve mock builder auto newBuilder = builtIns->setBuiltinDispatchInfoBuilder( EBuiltInOps::CopyBufferToImage3d, pCmdQ->getContext(), pCmdQ->getDevice(), std::move(oldBuilder)); EXPECT_NE(nullptr, newBuilder); } INSTANTIATE_TEST_CASE_P(MipMapCopyBufferToImageTest_GivenImageWithMipLevelNonZeroWhenCopyBufferToImageIsCalledThenProperMipLevelIsSet, MipMapCopyBufferToImageTest, ::testing::Values(CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D)); struct EnqueueCopyBufferToImageHw : public ::testing::Test { void SetUp() override { if (is32bit) { GTEST_SKIP(); } device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context = std::make_unique(device.get()); dstImage = std::unique_ptr(Image2dHelper<>::create(context.get())); } std::unique_ptr device; std::unique_ptr context; std::unique_ptr dstImage; MockBuffer srcBuffer; uint64_t bigSize = 5ull * MemoryConstants::gigaByte; uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1; uint64_t bigOffset = 4ull * MemoryConstants::gigaByte; const size_t dstOrigin[3] = {0, 0, 0}; const size_t region[3] = {4, 1, 1}; }; using EnqueueCopyBufferToImageStatelessTest = EnqueueCopyBufferToImageHw; HWTEST_F(EnqueueCopyBufferToImageStatelessTest, givenBigBufferWhenCopyingBufferToImageStatelessThenSuccessIsReturned) { auto cmdQ = std::make_unique>(context.get(), device.get()); srcBuffer.size = static_cast(bigSize); auto retVal = cmdQ->enqueueCopyBufferToImage( &srcBuffer, dstImage.get(), static_cast(bigOffset), dstOrigin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } using EnqueueCopyBufferToImageStatefulTest = EnqueueCopyBufferToImageHw; HWTEST_F(EnqueueCopyBufferToImageStatefulTest, givenBigBufferWhenCopyingBufferToImageStatefulThenSuccessIsReturned) { auto cmdQ = std::make_unique>(context.get(), device.get()); srcBuffer.size = static_cast(smallSize); auto retVal = cmdQ->enqueueCopyBufferToImage( &srcBuffer, dstImage.get(), 0, dstOrigin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_copy_image_fixture.h000066400000000000000000000035311363734646600322400ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/ptr_math.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/built_in_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "gtest/gtest.h" namespace NEO { struct EnqueueCopyImageTest : public CommandEnqueueFixture, public ::testing::Test { void SetUp(void) override { CommandEnqueueFixture::SetUp(); context = new MockContext(pClDevice); srcImage = Image2dHelper<>::create(context); dstImage = Image2dHelper<>::create(context); } void TearDown(void) override { delete dstImage; delete srcImage; delete context; CommandEnqueueFixture::TearDown(); } protected: template void enqueueCopyImage() { auto retVal = EnqueueCopyImageHelper<>::enqueueCopyImage( pCmdQ, srcImage, dstImage); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } MockContext *context = nullptr; Image *srcImage = nullptr; Image *dstImage = nullptr; }; struct EnqueueCopyImageMipMapTest : public CommandEnqueueFixture, public ::testing::Test, public ::testing::WithParamInterface> { void SetUp(void) override { CommandEnqueueFixture::SetUp(); context = new MockContext(pClDevice); } void TearDown(void) override { delete context; CommandEnqueueFixture::TearDown(); } MockContext *context = nullptr; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_copy_image_tests.cpp000066400000000000000000000351531363734646600322540ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/test/unit_test/command_queue/enqueue_copy_image_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_builtin_dispatch_info_builder.h" #include "opencl/test/unit_test/mocks/mock_builtins.h" #include "test.h" #include "reg_configs_common.h" #include using namespace NEO; HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyImageTest, WhenCopyingImageThenGpgpuWalkerIsCorrect) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; enqueueCopyImage(); auto *cmd = reinterpret_cast(cmdWalker); ASSERT_NE(nullptr, cmd); // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } //auto numWorkItems = ( ( cmd->getThreadWidthCounterMaximum() - 1 ) * simd + lanesPerThreadX ) * cmd->getThreadGroupIdXDimension(); //EXPECT_EQ( expectedWorkItems, numWorkItems ); } HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenTaskCountIsAlignedWithCsr) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; EnqueueCopyImageHelper<>::enqueueCopyImage(pCmdQ, srcImage, dstImage); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; EnqueueCopyImageHelper<>::enqueueCopyImage(pCmdQ, srcImage, dstImage); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); EnqueueCopyImageHelper<>::enqueueCopyImage(pCmdQ, srcImage, dstImage); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenIndirectDataGetsAdded) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); EnqueueCopyImageHelper<>::enqueueCopyImage(pCmdQ, srcImage, dstImage); EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr)); EXPECT_NE(iohBefore, pIOH->getUsed()); EXPECT_NE(sshBefore, pSSH->getUsed()); } HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenL3ProgrammingIsCorrect) { enqueueCopyImage(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyImageTest, WhenEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueCopyImage(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyImageTest, WhenCopyingImageThenMediaInterfaceDescriptorLoadIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyImage(); // All state should be programmed before walker auto cmd = reinterpret_cast(cmdMediaInterfaceDescriptorLoad); ASSERT_NE(nullptr, cmd); // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorMediaInterfaceDescriptorLoad); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyImageTest, WhenCopyingImageThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyImage(); // Extract the interfaceDescriptorData auto cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress; auto &interfaceDescriptorData = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)interfaceDescriptorData.getKernelStartPointerHigh() << 32) + interfaceDescriptorData.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); size_t maxLocalSize = 256u; auto localWorkSize = std::min(maxLocalSize, Image2dDefaults::imageDesc.image_width * Image2dDefaults::imageDesc.image_height); auto simd = 32u; auto threadsPerThreadGroup = Math::divideAndRoundUp(localWorkSize, simd); EXPECT_EQ(threadsPerThreadGroup, interfaceDescriptorData.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, interfaceDescriptorData.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, interfaceDescriptorData.getConstantIndirectUrbEntryReadLength()); // We shouldn't have these pointers the same. EXPECT_NE(kernelStartPointer, interfaceDescriptorData.getBindingTablePointer()); } HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenSurfaceStateIsCorrect) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; enqueueCopyImage(); for (uint32_t i = 0; i < 2; ++i) { const auto &surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), i); const auto &imageDesc = dstImage->getImageDesc(); EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth()); EXPECT_EQ(imageDesc.image_height, surfaceState.getHeight()); EXPECT_NE(0u, surfaceState.getSurfacePitch()); EXPECT_NE(0u, surfaceState.getSurfaceType()); auto surfaceFormat = surfaceState.getSurfaceFormat(); bool isRedescribedFormat = surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32G32B32A32_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32G32_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R16_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R8_UINT; EXPECT_TRUE(isRedescribedFormat); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_4, surfaceState.getSurfaceHorizontalAlignment()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState.getSurfaceVerticalAlignment()); } const auto &srcSurfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), 0); EXPECT_EQ(srcImage->getGraphicsAllocation()->getGpuAddress(), srcSurfaceState.getSurfaceBaseAddress()); const auto &dstSurfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), 1); EXPECT_EQ(dstImage->getGraphicsAllocation()->getGpuAddress(), dstSurfaceState.getSurfaceBaseAddress()); } HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenNumberOfPipelineSelectsIsOne) { enqueueCopyImage(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyImageTest, WhenCopyingImageThenMediaVfeStateIsSetCorrectly) { enqueueCopyImage(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } typedef EnqueueCopyImageMipMapTest MipMapCopyImageTest; HWTEST_P(MipMapCopyImageTest, GivenImagesWithNonZeroMipLevelsWhenCopyImageIsCalledThenProperMipLevelsAreSet) { cl_mem_object_type srcImageType, dstImageType; std::tie(srcImageType, dstImageType) = GetParam(); auto builtIns = new MockBuiltins(); pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()]->builtins.reset(builtIns); auto &origBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( EBuiltInOps::CopyImageToImage3d, pCmdQ->getDevice()); // substitute original builder with mock builder auto oldBuilder = builtIns->setBuiltinDispatchInfoBuilder( EBuiltInOps::CopyImageToImage3d, pCmdQ->getContext(), pCmdQ->getDevice(), std::unique_ptr(new MockBuiltinDispatchInfoBuilder(*builtIns, &origBuilder))); cl_int retVal = CL_SUCCESS; cl_image_desc srcImageDesc = {}; uint32_t expectedSrcMipLevel = 3; uint32_t expectedDstMipLevel = 4; srcImageDesc.image_type = srcImageType; srcImageDesc.num_mip_levels = 10; srcImageDesc.image_width = 4; srcImageDesc.image_height = 1; srcImageDesc.image_depth = 1; cl_image_desc dstImageDesc = srcImageDesc; dstImageDesc.image_type = dstImageType; size_t srcOrigin[] = {0, 0, 0, 0}; size_t dstOrigin[] = {0, 0, 0, 0}; size_t region[] = {srcImageDesc.image_width, 1, 1}; std::unique_ptr srcImage; std::unique_ptr dstImage; switch (srcImageType) { case CL_MEM_OBJECT_IMAGE1D: srcOrigin[1] = expectedSrcMipLevel; srcImage = std::unique_ptr(ImageHelper::create(context, &srcImageDesc)); break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: srcImageDesc.image_array_size = 2; srcOrigin[2] = expectedSrcMipLevel; srcImage = std::unique_ptr(ImageHelper::create(context, &srcImageDesc)); break; case CL_MEM_OBJECT_IMAGE2D: srcOrigin[2] = expectedSrcMipLevel; srcImage = std::unique_ptr(ImageHelper::create(context, &srcImageDesc)); break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: srcImageDesc.image_array_size = 2; srcOrigin[3] = expectedSrcMipLevel; srcImage = std::unique_ptr(ImageHelper::create(context, &srcImageDesc)); break; case CL_MEM_OBJECT_IMAGE3D: srcOrigin[3] = expectedSrcMipLevel; srcImage = std::unique_ptr(ImageHelper::create(context, &srcImageDesc)); break; } EXPECT_NE(nullptr, srcImage.get()); switch (dstImageType) { case CL_MEM_OBJECT_IMAGE1D: dstOrigin[1] = expectedDstMipLevel; dstImage = std::unique_ptr(ImageHelper::create(context, &dstImageDesc)); break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: dstImageDesc.image_array_size = 2; dstOrigin[2] = expectedDstMipLevel; dstImage = std::unique_ptr(ImageHelper::create(context, &dstImageDesc)); break; case CL_MEM_OBJECT_IMAGE2D: dstOrigin[2] = expectedDstMipLevel; dstImage = std::unique_ptr(ImageHelper::create(context, &dstImageDesc)); break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: dstImageDesc.image_array_size = 2; dstOrigin[3] = expectedDstMipLevel; dstImage = std::unique_ptr(ImageHelper::create(context, &dstImageDesc)); break; case CL_MEM_OBJECT_IMAGE3D: dstOrigin[3] = expectedDstMipLevel; dstImage = std::unique_ptr(ImageHelper::create(context, &dstImageDesc)); break; } EXPECT_NE(nullptr, dstImage.get()); retVal = pCmdQ->enqueueCopyImage(srcImage.get(), dstImage.get(), srcOrigin, dstOrigin, region, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto &mockBuilder = static_cast(BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImageToImage3d, pCmdQ->getDevice())); auto params = mockBuilder.getBuiltinOpParams(); EXPECT_EQ(expectedSrcMipLevel, params->srcMipLevel); EXPECT_EQ(expectedDstMipLevel, params->dstMipLevel); // restore original builder and retrieve mock builder auto newBuilder = builtIns->setBuiltinDispatchInfoBuilder( EBuiltInOps::CopyImageToImage3d, pCmdQ->getContext(), pCmdQ->getDevice(), std::move(oldBuilder)); EXPECT_NE(nullptr, newBuilder); } uint32_t types[] = {CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D}; INSTANTIATE_TEST_CASE_P(MipMapCopyImageTest_GivenImagesWithNonZeroMipLevelsWhenCopyImageIsCalledThenProperMipLevelsAreSet, MipMapCopyImageTest, ::testing::Combine( ::testing::ValuesIn(types), ::testing::ValuesIn(types))); enqueue_copy_image_to_buffer_fixture.h000066400000000000000000000043011363734646600342100ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/ptr_math.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "gtest/gtest.h" namespace NEO { struct EnqueueCopyImageToBufferTest : public CommandEnqueueFixture, public ::testing::Test { void SetUp(void) override { CommandEnqueueFixture::SetUp(); BufferDefaults::context = new MockContext(pClDevice); context = new MockContext(pClDevice); srcImage = Image2dHelper<>::create(context); dstBuffer = BufferHelper<>::create(context); } void TearDown(void) override { delete srcImage; delete dstBuffer; delete BufferDefaults::context; delete context; CommandEnqueueFixture::TearDown(); } protected: template void enqueueCopyImageToBuffer() { auto retVal = EnqueueCopyImageToBufferHelper<>::enqueueCopyImageToBuffer( pCmdQ, srcImage, dstBuffer); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } MockContext *context = nullptr; Image *srcImage = nullptr; Buffer *dstBuffer = nullptr; }; struct EnqueueCopyImageToBufferMipMapTest : public CommandEnqueueFixture, public ::testing::Test, public ::testing::WithParamInterface { void SetUp(void) override { CommandEnqueueFixture::SetUp(); BufferDefaults::context = new MockContext(pClDevice); context = new MockContext(pClDevice); dstBuffer = BufferHelper<>::create(context); } void TearDown(void) override { delete dstBuffer; delete BufferDefaults::context; delete context; CommandEnqueueFixture::TearDown(); } MockContext *context = nullptr; Buffer *dstBuffer = nullptr; }; } // namespace NEO enqueue_copy_image_to_buffer_tests.cpp000066400000000000000000000352361363734646600342320ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/test/unit_test/command_queue/enqueue_copy_image_to_buffer_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_builtin_dispatch_info_builder.h" #include "opencl/test/unit_test/mocks/mock_builtins.h" #include "test.h" #include "reg_configs_common.h" #include using namespace NEO; HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenGpgpuWalkerIsCorrect) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; enqueueCopyImageToBuffer(); auto *cmd = reinterpret_cast(cmdWalker); ASSERT_NE(nullptr, cmd); // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_EQ(0u, cmd->getIndirectDataLength() % GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenTaskCountIsAlignedWithCsr) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; enqueueCopyImageToBuffer(); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWTEST_F(EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; enqueueCopyImageToBuffer(); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); enqueueCopyImageToBuffer(); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWTEST_F(EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenIndirectDataGetsAdded) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); enqueueCopyImageToBuffer(); EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr)); EXPECT_NE(iohBefore, pIOH->getUsed()); EXPECT_NE(sshBefore, pSSH->getUsed()); } HWTEST_F(EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenL3ProgrammingIsCorrect) { enqueueCopyImageToBuffer(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyImageToBufferTest, WhenEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueCopyImageToBuffer(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenMediaInterfaceDescriptorLoadIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyImageToBuffer(); // All state should be programmed before walker auto cmd = reinterpret_cast(cmdMediaInterfaceDescriptorLoad); ASSERT_NE(nullptr, cmd); // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorMediaInterfaceDescriptorLoad); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueCopyImageToBuffer(); // Extract the interfaceDescriptorData auto cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress; auto &interfaceDescriptorData = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)interfaceDescriptorData.getKernelStartPointerHigh() << 32) + interfaceDescriptorData.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); size_t maxLocalSize = 256u; auto localWorkSize = std::min( maxLocalSize, Image2dDefaults::imageDesc.image_width * Image2dDefaults::imageDesc.image_height); auto simd = 32u; auto threadsPerThreadGroup = Math::divideAndRoundUp(localWorkSize, simd); EXPECT_EQ(threadsPerThreadGroup, interfaceDescriptorData.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, interfaceDescriptorData.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, interfaceDescriptorData.getConstantIndirectUrbEntryReadLength()); // We shouldn't have these pointers the same. EXPECT_NE(kernelStartPointer, interfaceDescriptorData.getBindingTablePointer()); } HWTEST_F(EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenSurfaceStateIsCorrect) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; enqueueCopyImageToBuffer(); const auto &surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), 0); const auto &imageDesc = srcImage->getImageDesc(); // EnqueueReadImage uses multi-byte copies depending on per-pixel-size-in-bytes EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth()); EXPECT_EQ(imageDesc.image_height, surfaceState.getHeight()); EXPECT_NE(0u, surfaceState.getSurfacePitch()); EXPECT_NE(0u, surfaceState.getSurfaceType()); auto surfaceFormat = surfaceState.getSurfaceFormat(); bool isRedescribedFormat = surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32G32B32A32_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32G32_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R32_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R16_UINT || surfaceFormat == RENDER_SURFACE_STATE::SURFACE_FORMAT_R8_UINT; EXPECT_TRUE(isRedescribedFormat); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_4, surfaceState.getSurfaceHorizontalAlignment()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState.getSurfaceVerticalAlignment()); EXPECT_EQ(srcImage->getGraphicsAllocation()->getGpuAddress(), surfaceState.getSurfaceBaseAddress()); } HWTEST_F(EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenNumberOfPipelineSelectsIsOne) { enqueueCopyImageToBuffer(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueCopyImageToBufferTest, WhenCopyingImageToBufferThenMediaVfeStateIsSetCorrectly) { enqueueCopyImageToBuffer(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } typedef EnqueueCopyImageToBufferMipMapTest MipMapCopyImageToBufferTest; HWTEST_P(MipMapCopyImageToBufferTest, GivenImageWithMipLevelNonZeroWhenCopyImageToBufferIsCalledThenProperMipLevelIsSet) { auto image_type = (cl_mem_object_type)GetParam(); auto builtIns = new MockBuiltins(); pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()]->builtins.reset(builtIns); auto &origBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( EBuiltInOps::CopyImage3dToBuffer, pCmdQ->getDevice()); // substitute original builder with mock builder auto oldBuilder = builtIns->setBuiltinDispatchInfoBuilder( EBuiltInOps::CopyImage3dToBuffer, pCmdQ->getContext(), pCmdQ->getDevice(), std::unique_ptr(new MockBuiltinDispatchInfoBuilder(*builtIns, &origBuilder))); cl_int retVal = CL_SUCCESS; cl_image_desc imageDesc = {}; uint32_t expectedMipLevel = 3; imageDesc.image_type = image_type; imageDesc.num_mip_levels = 10; imageDesc.image_width = 4; imageDesc.image_height = 1; imageDesc.image_depth = 1; size_t origin[] = {0, 0, 0, 0}; size_t region[] = {imageDesc.image_width, 1, 1}; std::unique_ptr image; switch (image_type) { case CL_MEM_OBJECT_IMAGE1D: origin[1] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: imageDesc.image_array_size = 2; origin[2] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE2D: origin[2] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: imageDesc.image_array_size = 2; origin[3] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE3D: origin[3] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; } EXPECT_NE(nullptr, image.get()); retVal = pCmdQ->enqueueCopyImageToBuffer(image.get(), dstBuffer, origin, region, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto &mockBuilder = static_cast(BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImage3dToBuffer, pCmdQ->getDevice())); auto params = mockBuilder.getBuiltinOpParams(); EXPECT_EQ(expectedMipLevel, params->srcMipLevel); // restore original builder and retrieve mock builder auto newBuilder = builtIns->setBuiltinDispatchInfoBuilder( EBuiltInOps::CopyImage3dToBuffer, pCmdQ->getContext(), pCmdQ->getDevice(), std::move(oldBuilder)); EXPECT_NE(nullptr, newBuilder); } INSTANTIATE_TEST_CASE_P(MipMapCopyImageToBufferTest_GivenImageWithMipLevelNonZeroWhenCopyImageToBufferIsCalledThenProperMipLevelIsSet, MipMapCopyImageToBufferTest, ::testing::Values(CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D)); struct EnqueueCopyImageToBufferHw : public ::testing::Test { void SetUp() override { if (is32bit) { GTEST_SKIP(); } device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context = std::make_unique(device.get()); srcImage = std::unique_ptr(Image2dHelper<>::create(context.get())); } std::unique_ptr device; std::unique_ptr context; std::unique_ptr srcImage; MockBuffer dstBuffer; uint64_t bigSize = 5ull * MemoryConstants::gigaByte; uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1; uint64_t bigOffset = 4ull * MemoryConstants::gigaByte; const size_t srcOrigin[3] = {0, 0, 0}; const size_t region[3] = {4, 1, 1}; }; using EnqueueCopyImageToBufferHwStatelessTest = EnqueueCopyImageToBufferHw; HWTEST_F(EnqueueCopyImageToBufferHwStatelessTest, givenBigBufferWhenCopyingImageToBufferStatelessThenSuccessIsReturned) { auto cmdQ = std::make_unique>(context.get(), device.get()); dstBuffer.size = static_cast(bigSize); auto retVal = cmdQ->enqueueCopyImageToBuffer( srcImage.get(), &dstBuffer, srcOrigin, region, static_cast(bigOffset), 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } using EnqueueCopyImageToBufferStatefulTest = EnqueueCopyImageToBufferHw; HWTEST_F(EnqueueCopyImageToBufferStatefulTest, givenBufferWhenCopyingImageToBufferStatefulThenSuccessIsReturned) { auto cmdQ = std::make_unique>(context.get(), device.get()); dstBuffer.size = static_cast(smallSize); auto retVal = cmdQ->enqueueCopyImageToBuffer( srcImage.get(), &dstBuffer, srcOrigin, region, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_debug_kernel_tests.cpp000066400000000000000000000141721363734646600325640ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_context.h" #include "shared/source/source_level_debugger/source_level_debugger.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/program/program.h" #include "opencl/test/unit_test/fixtures/enqueue_handler_fixture.h" #include "opencl/test/unit_test/helpers/kernel_binary_helper.h" #include "opencl/test/unit_test/helpers/kernel_filename_helper.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/program/program_from_binary.h" #include "test.h" #include "compiler_options.h" #include "gmock/gmock.h" using namespace NEO; using namespace ::testing; typedef EnqueueHandlerTest EnqueueDebugKernelSimpleTest; class EnqueueDebugKernelTest : public ProgramSimpleFixture, public ::testing::Test { public: void SetUp() override { ProgramSimpleFixture::SetUp(); device = pClDevice; pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->debugger.reset(new SourceLevelDebugger(nullptr)); if (pDevice->getHardwareInfo().platform.eRenderCoreFamily >= IGFX_GEN9_CORE) { pDevice->deviceInfo.debuggerActive = true; std::string filename; std::string kernelOption(CompilerOptions::debugKernelEnable); KernelFilenameHelper::getKernelFilenameFromInternalOption(kernelOption, filename); kbHelper = new KernelBinaryHelper(filename, false); CreateProgramWithSource( pContext, &device, "copybuffer.cl"); pProgram->enableKernelDebug(); cl_int retVal = pProgram->build(1, &device, nullptr, nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); // create a kernel debugKernel = Kernel::create( pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, debugKernel); cl_mem src = &bufferSrc; cl_mem dst = &bufferDst; retVal = debugKernel->setArg( 0, sizeof(cl_mem), &src); retVal = debugKernel->setArg( 1, sizeof(cl_mem), &dst); } } void TearDown() override { if (pDevice->getHardwareInfo().platform.eRenderCoreFamily >= IGFX_GEN9_CORE) { delete kbHelper; debugKernel->release(); } ProgramSimpleFixture::TearDown(); } cl_device_id device; Kernel *debugKernel = nullptr; KernelBinaryHelper *kbHelper = nullptr; MockContext context; MockBuffer bufferSrc; MockBuffer bufferDst; }; HWTEST_F(EnqueueDebugKernelTest, givenDebugKernelWhenEnqueuedThenSSHAndBtiAreCorrectlySet) { if (pDevice->isDebuggerActive()) { using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; std::unique_ptr> mockCmdQ(new MockCommandQueueHw(&context, pClDevice, 0)); size_t gws[] = {1, 1, 1}; auto &ssh = mockCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 4096u); void *surfaceStates = ssh.getSpace(0); mockCmdQ->enqueueKernel(debugKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto *dstBtiTableBase = reinterpret_cast(ptrOffset(surfaceStates, debugKernel->getBindingTableOffset())); uint32_t surfaceStateOffset = dstBtiTableBase[0].getSurfaceStatePointer(); auto debugSurfaceState = reinterpret_cast(ptrOffset(ssh.getCpuBase(), surfaceStateOffset)); auto &commandStreamReceiver = mockCmdQ->getGpgpuCommandStreamReceiver(); auto debugSurface = commandStreamReceiver.getDebugSurfaceAllocation(); EXPECT_EQ(1u, debugSurface->getTaskCount(commandStreamReceiver.getOsContext().getContextId())); EXPECT_EQ(debugSurface->getGpuAddress(), debugSurfaceState->getSurfaceBaseAddress()); } } template class GMockCommandQueueHw : public CommandQueueHw { typedef CommandQueueHw BaseClass; public: GMockCommandQueueHw(Context *context, ClDevice *device, cl_queue_properties *properties) : BaseClass(context, device, properties, false) { } MOCK_METHOD1(setupDebugSurface, bool(Kernel *kernel)); }; HWTEST_F(EnqueueDebugKernelSimpleTest, givenKernelFromProgramWithDebugEnabledWhenEnqueuedThenDebugSurfaceIsSetup) { MockProgram program(*pDevice->getExecutionEnvironment()); program.enableKernelDebug(); std::unique_ptr kernel(MockKernel::create(*pDevice, &program)); kernel->setContext(context); std::unique_ptr> mockCmdQ(new GMockCommandQueueHw(context, pClDevice, 0)); EXPECT_CALL(*mockCmdQ.get(), setupDebugSurface(kernel.get())).Times(1).RetiresOnSaturation(); size_t gws[] = {1, 1, 1}; mockCmdQ->enqueueKernel(kernel.get(), 1, nullptr, gws, nullptr, 0, nullptr, nullptr); ::testing::Mock::VerifyAndClearExpectations(mockCmdQ.get()); } HWTEST_F(EnqueueDebugKernelSimpleTest, givenKernelFromProgramWithoutDebugEnabledWhenEnqueuedThenDebugSurfaceIsNotSetup) { MockProgram program(*pDevice->getExecutionEnvironment()); std::unique_ptr kernel(MockKernel::create(*pDevice, &program)); kernel->setContext(context); std::unique_ptr>> mockCmdQ(new NiceMock>(context, pClDevice, nullptr)); EXPECT_CALL(*mockCmdQ.get(), setupDebugSurface(kernel.get())).Times(0); size_t gws[] = {1, 1, 1}; mockCmdQ->enqueueKernel(kernel.get(), 1, nullptr, gws, nullptr, 0, nullptr, nullptr); ::testing::Mock::VerifyAndClearExpectations(mockCmdQ.get()); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_fill_buffer_event_tests.cpp000066400000000000000000000054501363734646600336150ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/command_queue/enqueue_fill_buffer_fixture.h" #include "test.h" using namespace NEO; namespace ULT { struct FillBufferEventTests : public EnqueueFillBufferFixture, public ::testing::Test { typedef EnqueueFillBufferFixture BaseClass; void SetUp() override { BaseClass::SetUp(); } void TearDown() override { BaseClass::TearDown(); } }; HWTEST_F(FillBufferEventTests, WhenEnqueingFillBufferThenEventHasCorrectCommandType) { float pattern[] = {1.0f}; size_t patternSize = sizeof(pattern); size_t offset = 0; size_t size = 2 * patternSize; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event event = nullptr; auto retVal = pCmdQ->enqueueFillBuffer( buffer, pattern, patternSize, offset, size, numEventsInWaitList, eventWaitList, &event); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel); // Check CL_EVENT_COMMAND_TYPE { cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_FILL_BUFFER), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); } delete pEvent; } HWTEST_F(FillBufferEventTests, GivenMultipleEventsWhenEnqueingFillBufferThenReturnedEventShouldBeMaxOfInputEventsAndCmdQPlus1) { uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 15); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 16); float pattern[] = {1.0f}; size_t patternSize = sizeof(pattern); size_t offset = 0; size_t size = 2 * patternSize; cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto retVal = pCmdQ->enqueueFillBuffer( buffer, pattern, patternSize, offset, size, numEventsInWaitList, eventWaitList, &event); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u + 1u, pEvent->taskLevel); delete pEvent; } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_fill_buffer_fixture.h000066400000000000000000000021101363734646600323730ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/gen_common/gen_cmd_parse.h" #include "opencl/test/unit_test/mocks/mock_context.h" namespace NEO { struct EnqueueFillBufferFixture : public CommandEnqueueFixture { void SetUp() override { CommandEnqueueFixture::SetUp(); BufferDefaults::context = new MockContext; buffer = BufferHelper<>::create(); } void TearDown() override { delete buffer; delete BufferDefaults::context; CommandEnqueueFixture::TearDown(); } template void enqueueFillBuffer() { auto retVal = EnqueueFillBufferHelper<>::enqueueFillBuffer( pCmdQ, buffer); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } MockContext context; Buffer *buffer = nullptr; }; } // namespace NEO enqueue_fill_buffer_negative_tests.cpp000066400000000000000000000065731363734646600342260ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/command_queue/enqueue_fill_buffer_fixture.h" #include "gtest/gtest.h" using namespace NEO; namespace ULT { struct EnqueueFillBuffer : public EnqueueFillBufferFixture, public ::testing::Test { typedef EnqueueFillBufferFixture BaseClass; void SetUp() override { BaseClass::SetUp(); } void TearDown() override { BaseClass::TearDown(); } }; TEST_F(EnqueueFillBuffer, GivenNullBufferWhenFillingBufferThenInvalidMemObjectErrorIsReturned) { cl_float pattern = 1.0f; auto retVal = clEnqueueFillBuffer( BaseClass::pCmdQ, nullptr, &pattern, sizeof(pattern), 0, sizeof(pattern), 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(EnqueueFillBuffer, GivenNullPatternWhenFillingBufferThenInvalidValueErrorIsReturned) { cl_float pattern = 1.0f; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; auto retVal = clEnqueueFillBuffer( BaseClass::pCmdQ, buffer, nullptr, sizeof(pattern), 0, sizeof(pattern), numEventsInWaitList, eventWaitList, event); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(EnqueueFillBuffer, GivenNullEventListAndNumEventsNonZeroWhenFillingBufferThenInvalidEventWaitListErrorIsReturned) { cl_float pattern = 1.0f; auto retVal = clEnqueueFillBuffer( BaseClass::pCmdQ, buffer, &pattern, sizeof(pattern), 0, sizeof(pattern), 1, nullptr, nullptr); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(EnqueueFillBuffer, GivenEventListAndNumEventsZeroWhenFillingBufferThenInvalidEventWaitListErrorIsReturned) { cl_event eventList = (cl_event)ptrGarbage; cl_float pattern = 1.0f; auto retVal = clEnqueueFillBuffer( BaseClass::pCmdQ, buffer, &pattern, sizeof(pattern), 0, sizeof(pattern), 0, &eventList, nullptr); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } } // namespace ULT namespace ULT { struct InvalidPatternSize : public EnqueueFillBufferFixture, public ::testing::TestWithParam { typedef EnqueueFillBufferFixture BaseClass; InvalidPatternSize() { } void SetUp() override { BaseClass::SetUp(); patternSize = GetParam(); pattern = new char[patternSize]; } void TearDown() override { delete[] pattern; BaseClass::TearDown(); } size_t patternSize = 0; char *pattern = nullptr; }; TEST_P(InvalidPatternSize, GivenInvalidPatternSizeWhenFillingBufferThenInvalidValueErrorIsReturned) { auto retVal = clEnqueueFillBuffer( BaseClass::pCmdQ, buffer, &pattern, patternSize, 0, patternSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } INSTANTIATE_TEST_CASE_P(EnqueueFillBuffer, InvalidPatternSize, ::testing::Values(0, 3, 5, 256, 512, 1024)); } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp000066400000000000000000000630231363734646600324140ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/os_context.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/test/unit_test/command_queue/enqueue_fill_buffer_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "test.h" #include "reg_configs_common.h" using namespace NEO; typedef Test EnqueueFillBufferCmdTests; HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenTaskCountIsAlignedWithCsr) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueFillBufferCmdTests, WhenFillingBufferThenGpgpuWalkerIsCorrect) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; enqueueFillBuffer(); auto *cmd = (GPGPU_WALKER *)cmdWalker; ASSERT_NE(nullptr, cmd); // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenIndirectDataGetsAdded) { auto patternAllocation = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize}); auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer); MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::FillBuffer, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; MemObj patternMemObj(&this->context, 0, {}, 0, 0, alignUp(EnqueueFillBufferTraits::patternSize, 4), patternAllocation->getUnderlyingBuffer(), patternAllocation->getUnderlyingBuffer(), patternAllocation, false, false, true); dc.srcMemObj = &patternMemObj; dc.dstMemObj = buffer; dc.dstOffset = {EnqueueFillBufferTraits::offset, 0, 0}; dc.size = {EnqueueFillBufferTraits::size, 0, 0}; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernel)); EXPECT_NE(iohBefore, pIOH->getUsed()); if (kernel->requiresSshForBuffers()) { EXPECT_NE(sshBefore, pSSH->getUsed()); } context.getMemoryManager()->freeGraphicsMemory(patternAllocation); } HWTEST_F(EnqueueFillBufferCmdTests, FillBufferRightLeftover) { auto patternAllocation = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize}); EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer); MultiDispatchInfo mdi; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::FillBuffer, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; MemObj patternMemObj(&this->context, 0, {}, 0, 0, alignUp(EnqueueFillBufferTraits::patternSize, 4), patternAllocation->getUnderlyingBuffer(), patternAllocation->getUnderlyingBuffer(), patternAllocation, false, false, true); dc.srcMemObj = &patternMemObj; dc.dstMemObj = buffer; dc.dstOffset = {0, 0, 0}; dc.size = {EnqueueFillBufferTraits::patternSize, 0, 0}; builder.buildDispatchInfos(mdi, dc); EXPECT_EQ(1u, mdi.size()); auto kernel = mdi.begin()->getKernel(); EXPECT_STREQ("FillBufferRightLeftover", kernel->getKernelInfo().name.c_str()); context.getMemoryManager()->freeGraphicsMemory(patternAllocation); } HWTEST_F(EnqueueFillBufferCmdTests, FillBufferMiddle) { auto patternAllocation = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize}); EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer); MultiDispatchInfo mdi; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::FillBuffer, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; MemObj patternMemObj(&this->context, 0, {}, 0, 0, alignUp(EnqueueFillBufferTraits::patternSize, 4), patternAllocation->getUnderlyingBuffer(), patternAllocation->getUnderlyingBuffer(), patternAllocation, false, false, true); dc.srcMemObj = &patternMemObj; dc.dstMemObj = buffer; dc.dstOffset = {0, 0, 0}; dc.size = {MemoryConstants::cacheLineSize, 0, 0}; builder.buildDispatchInfos(mdi, dc); EXPECT_EQ(1u, mdi.size()); auto kernel = mdi.begin()->getKernel(); EXPECT_STREQ("FillBufferMiddle", kernel->getKernelInfo().name.c_str()); context.getMemoryManager()->freeGraphicsMemory(patternAllocation); } HWTEST_F(EnqueueFillBufferCmdTests, FillBufferLeftLeftover) { auto patternAllocation = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize}); EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer); MultiDispatchInfo mdi; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::FillBuffer, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; MemObj patternMemObj(&this->context, 0, {}, 0, 0, alignUp(EnqueueFillBufferTraits::patternSize, 4), patternAllocation->getUnderlyingBuffer(), patternAllocation->getUnderlyingBuffer(), patternAllocation, false, false, true); dc.srcMemObj = &patternMemObj; dc.dstMemObj = buffer; dc.dstOffset = {EnqueueFillBufferTraits::patternSize, 0, 0}; dc.size = {EnqueueFillBufferTraits::patternSize, 0, 0}; builder.buildDispatchInfos(mdi, dc); EXPECT_EQ(1u, mdi.size()); auto kernel = mdi.begin()->getKernel(); EXPECT_STREQ("FillBufferLeftLeftover", kernel->getKernelInfo().name.c_str()); context.getMemoryManager()->freeGraphicsMemory(patternAllocation); } HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenL3ProgrammingIsCorrect) { enqueueFillBuffer(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueFillBufferCmdTests, WhenEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueFillBuffer(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueFillBufferCmdTests, WhenFillingBufferThenMediaInterfaceDescriptorLoadIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueFillBuffer(); auto *cmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)cmdMediaInterfaceDescriptorLoad; // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorMediaInterfaceDescriptorLoad); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueFillBufferCmdTests, WhenFillingBufferThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; enqueueFillBuffer(); // Extract the IDD auto cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress; auto &IDD = *(INTERFACE_DESCRIPTOR_DATA *)(cmdInterfaceDescriptorData); // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, IDD.getConstantIndirectUrbEntryReadLength()); } HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenNumberOfPipelineSelectsIsOne) { enqueueFillBuffer(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueFillBufferCmdTests, WhenFillingBufferThenMediaVfeStateIsSetCorrectly) { enqueueFillBuffer(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenArgumentZeroShouldMatchDestAddress) { auto patternAllocation = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize}); enqueueFillBuffer(); // Extract the kernel used MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::FillBuffer, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; MemObj patternMemObj(&this->context, 0, {}, 0, 0, alignUp(EnqueueFillBufferTraits::patternSize, 4), patternAllocation->getUnderlyingBuffer(), patternAllocation->getUnderlyingBuffer(), patternAllocation, false, false, true); dc.srcMemObj = &patternMemObj; dc.dstMemObj = buffer; dc.dstOffset = {EnqueueFillBufferTraits::offset, 0, 0}; dc.size = {EnqueueFillBufferTraits::size, 0, 0}; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); // Determine where the argument is auto pArgument = (void **)getStatelessArgumentPointer(*kernel, 0u, pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0)); EXPECT_EQ((void *)((uintptr_t)buffer->getGraphicsAllocation()->getGpuAddress()), *pArgument); context.getMemoryManager()->freeGraphicsMemory(patternAllocation); } // This test case should be re-enabled once getStatelessArgumentPointer gets support for SVM pointers. // This could happen if KernelInfo.kernelArgInfo was accessible given a Kernel. Just need an offset // into CrossThreadData. HWTEST_F(EnqueueFillBufferCmdTests, DISABLED_WhenFillingBufferThenArgumentOneShouldMatchOffset) { auto patternAllocation = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize}); enqueueFillBuffer(); // Extract the kernel used MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::FillBuffer, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; MemObj patternMemObj(&this->context, 0, {}, 0, 0, alignUp(EnqueueFillBufferTraits::patternSize, 4), patternAllocation->getUnderlyingBuffer(), patternAllocation->getUnderlyingBuffer(), patternAllocation, false, false, true); dc.srcMemObj = &patternMemObj; dc.dstMemObj = buffer; dc.dstOffset = {EnqueueFillBufferTraits::offset, 0, 0}; dc.size = {EnqueueFillBufferTraits::size, 0, 0}; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); // Determine where the argument is auto pArgument = (uint32_t *)getStatelessArgumentPointer(*kernel, 1u, pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0)); ASSERT_NE(nullptr, pArgument); EXPECT_EQ(0u, *pArgument); context.getMemoryManager()->freeGraphicsMemory(patternAllocation); } HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenArgumentTwoShouldMatchPatternPtr) { auto patternAllocation = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize}); enqueueFillBuffer(); // Extract the kernel used MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::FillBuffer, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; MemObj patternMemObj(&this->context, 0, {}, 0, 0, alignUp(EnqueueFillBufferTraits::patternSize, 4), patternAllocation->getUnderlyingBuffer(), patternAllocation->getUnderlyingBuffer(), patternAllocation, false, false, true); dc.srcMemObj = &patternMemObj; dc.dstMemObj = buffer; dc.dstOffset = {EnqueueFillBufferTraits::offset, 0, 0}; dc.size = {EnqueueFillBufferTraits::size, 0, 0}; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); // Determine where the argument is auto pArgument = (void **)getStatelessArgumentPointer(*kernel, 2u, pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0)); EXPECT_NE(nullptr, *pArgument); context.getMemoryManager()->freeGraphicsMemory(patternAllocation); } HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferStatelessThenStatelessKernelIsUsed) { auto patternAllocation = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize}); // Extract the kernel used auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::FillBufferStateless, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; MemObj patternMemObj(&this->context, 0, {}, 0, 0, alignUp(EnqueueFillBufferTraits::patternSize, 4), patternAllocation->getUnderlyingBuffer(), patternAllocation->getUnderlyingBuffer(), patternAllocation, false, false, true); dc.srcMemObj = &patternMemObj; dc.dstMemObj = buffer; dc.dstOffset = {EnqueueFillBufferTraits::offset, 0, 0}; dc.size = {EnqueueFillBufferTraits::size, 0, 0}; MultiDispatchInfo multiDispatchInfo; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); EXPECT_TRUE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); context.getMemoryManager()->freeGraphicsMemory(patternAllocation); } HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternShouldBeCopied) { auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer); ASSERT_FALSE(csr.getTemporaryAllocations().peekIsEmpty()); GraphicsAllocation *allocation = csr.getTemporaryAllocations().peekHead(); while (allocation != nullptr) { if ((allocation->getUnderlyingBufferSize() >= sizeof(float)) && (allocation->getUnderlyingBuffer() != nullptr) && (*(static_cast(allocation->getUnderlyingBuffer())) == EnqueueFillBufferHelper<>::Traits::pattern[0]) && (pCmdQ->taskCount == allocation->getTaskCount(csr.getOsContext().getContextId()))) { break; } allocation = allocation->next; } ASSERT_NE(nullptr, allocation); EXPECT_NE(&EnqueueFillBufferHelper<>::Traits::pattern[0], allocation->getUnderlyingBuffer()); } HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternShouldBeAligned) { auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); EnqueueFillBufferHelper<>::enqueueFillBuffer(pCmdQ, buffer); ASSERT_FALSE(csr.getTemporaryAllocations().peekIsEmpty()); GraphicsAllocation *allocation = csr.getTemporaryAllocations().peekHead(); while (allocation != nullptr) { if ((allocation->getUnderlyingBufferSize() >= sizeof(float)) && (allocation->getUnderlyingBuffer() != nullptr) && (*(static_cast(allocation->getUnderlyingBuffer())) == EnqueueFillBufferHelper<>::Traits::pattern[0]) && (pCmdQ->taskCount == allocation->getTaskCount(csr.getOsContext().getContextId()))) { break; } allocation = allocation->next; } ASSERT_NE(nullptr, allocation); EXPECT_EQ(alignUp(allocation->getUnderlyingBuffer(), MemoryConstants::cacheLineSize), allocation->getUnderlyingBuffer()); EXPECT_EQ(alignUp(allocation->getUnderlyingBufferSize(), MemoryConstants::cacheLineSize), allocation->getUnderlyingBufferSize()); } HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternOfSizeOneByteShouldGetPreparedForMiddleKernel) { auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty()); ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); auto dstBuffer = std::unique_ptr(BufferHelper<>::create()); const uint8_t pattern[1] = {0x55}; const size_t patternSize = sizeof(pattern); const size_t offset = 0; const size_t size = 4 * patternSize; const uint8_t output[4] = {0x55, 0x55, 0x55, 0x55}; auto retVal = clEnqueueFillBuffer( pCmdQ, dstBuffer.get(), pattern, patternSize, offset, size, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty()); ASSERT_FALSE(csr.getTemporaryAllocations().peekIsEmpty()); GraphicsAllocation *allocation = csr.getTemporaryAllocations().peekHead(); ASSERT_NE(nullptr, allocation); EXPECT_EQ(0, memcmp(allocation->getUnderlyingBuffer(), output, size)); } HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenPatternOfSizeTwoBytesShouldGetPreparedForMiddleKernel) { auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty()); ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); auto dstBuffer = std::unique_ptr(BufferHelper<>::create()); const uint8_t pattern[2] = {0x55, 0xAA}; const size_t patternSize = sizeof(pattern); const size_t offset = 0; const size_t size = 2 * patternSize; const uint8_t output[4] = {0x55, 0xAA, 0x55, 0xAA}; auto retVal = clEnqueueFillBuffer( pCmdQ, dstBuffer.get(), pattern, patternSize, offset, size, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty()); ASSERT_FALSE(csr.getTemporaryAllocations().peekIsEmpty()); GraphicsAllocation *allocation = csr.getTemporaryAllocations().peekHead(); ASSERT_NE(nullptr, allocation); EXPECT_EQ(0, memcmp(allocation->getUnderlyingBuffer(), output, size)); } HWTEST_F(EnqueueFillBufferCmdTests, givenEnqueueFillBufferWhenPatternAllocationIsObtainedThenItsTypeShouldBeSetToFillPattern) { auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); ASSERT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); auto dstBuffer = std::unique_ptr(BufferHelper<>::create()); const uint8_t pattern[1] = {0x55}; const size_t patternSize = sizeof(pattern); const size_t offset = 0; const size_t size = patternSize; auto retVal = clEnqueueFillBuffer( pCmdQ, dstBuffer.get(), pattern, patternSize, offset, size, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_FALSE(csr.getTemporaryAllocations().peekIsEmpty()); GraphicsAllocation *patternAllocation = csr.getTemporaryAllocations().peekHead(); ASSERT_NE(nullptr, patternAllocation); EXPECT_EQ(GraphicsAllocation::AllocationType::FILL_PATTERN, patternAllocation->getAllocationType()); } struct EnqueueFillBufferHw : public ::testing::Test { void SetUp() override { if (is32bit) { GTEST_SKIP(); } device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context.reset(new MockContext(device.get())); } std::unique_ptr device; std::unique_ptr context; const uint8_t pattern[1] = {0x55}; const size_t patternSize = sizeof(pattern); const size_t offset = 0; const size_t size = patternSize; MockBuffer dstBuffer; uint64_t bigSize = 4ull * MemoryConstants::gigaByte; uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1; }; using EnqeueFillBufferStatelessTest = EnqueueFillBufferHw; HWTEST_F(EnqeueFillBufferStatelessTest, givenBuffersWhenFillingBufferStatelessThenSuccessIsReturned) { auto pCmdQ = std::make_unique>(context.get(), device.get()); dstBuffer.size = static_cast(bigSize); auto retVal = pCmdQ->enqueueFillBuffer( &dstBuffer, pattern, patternSize, offset, size, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); } using EnqeueFillBufferStatefullTest = EnqueueFillBufferHw; HWTEST_F(EnqeueFillBufferStatefullTest, givenBuffersWhenFillingBufferStatefullThenSuccessIsReturned) { auto pCmdQ = std::make_unique>(context.get(), device.get()); dstBuffer.size = static_cast(smallSize); auto retVal = pCmdQ->enqueueFillBuffer( &dstBuffer, pattern, patternSize, offset, size, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_fill_image_fixture.h000066400000000000000000000021341363734646600322120ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "gtest/gtest.h" namespace NEO { struct EnqueueFillImageTestFixture : public CommandEnqueueFixture { void SetUp(void) override { CommandEnqueueFixture::SetUp(); context = new MockContext(pClDevice); image = Image2dHelper<>::create(context); } void TearDown(void) override { delete image; delete context; CommandEnqueueFixture::TearDown(); } protected: template void enqueueFillImage() { auto retVal = EnqueueFillImageHelper<>::enqueueFillImage(pCmdQ, image); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } MockContext *context = nullptr; Image *image = nullptr; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_fill_image_tests.cpp000066400000000000000000000263121363734646600322250ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/convert_color.h" #include "opencl/test/unit_test/command_queue/enqueue_fill_image_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "test.h" #include "reg_configs_common.h" #include using namespace NEO; class EnqueueFillImageTest : public EnqueueFillImageTestFixture, public ::testing::Test { public: void SetUp(void) override { EnqueueFillImageTestFixture::SetUp(); } void TearDown(void) override { EnqueueFillImageTestFixture::TearDown(); } }; HWTEST_F(EnqueueFillImageTest, WhenFillingImageThenTaskCountIsAlignedWithCsr) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; EnqueueFillImageHelper<>::enqueueFillImage(pCmdQ, image); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueFillImageTest, WhenFillingImageThenGpgpuWalkerIsCorrect) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; enqueueFillImage(); auto *cmd = reinterpret_cast(cmdWalker); ASSERT_NE(nullptr, cmd); // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueFillImageTest, WhenFillingImageThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; EnqueueFillImageHelper<>::enqueueFillImage(pCmdQ, image); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueFillImageTest, WhenFillingImageThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); EnqueueFillImageHelper<>::enqueueFillImage(pCmdQ, image); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWTEST_F(EnqueueFillImageTest, WhenFillingImageThenIndirectDataGetsAdded) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); EnqueueFillImageHelper<>::enqueueFillImage(pCmdQ, image); EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr)); EXPECT_NE(iohBefore, pIOH->getUsed()); EXPECT_NE(sshBefore, pSSH->getUsed()); } HWTEST_F(EnqueueFillImageTest, WhenFillingImageThenL3ProgrammingIsCorrect) { enqueueFillImage(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueFillImageTest, WhenEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueFillImage(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueFillImageTest, WhenFillingImageThenMediaInterfaceDescriptorLoadIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueFillImage(); // All state should be programmed before walker auto cmd = reinterpret_cast(cmdMediaInterfaceDescriptorLoad); ASSERT_NE(nullptr, cmd); // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorMediaInterfaceDescriptorLoad); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueFillImageTest, WhenFillingImageThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueFillImage(); // Extract the interfaceDescriptorData auto cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress; auto &interfaceDescriptorData = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)interfaceDescriptorData.getKernelStartPointerHigh() << 32) + interfaceDescriptorData.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); size_t maxLocalSize = 256u; auto localWorkSize = std::min(maxLocalSize, Image2dDefaults::imageDesc.image_width * Image2dDefaults::imageDesc.image_height); auto simd = 32u; auto threadsPerThreadGroup = Math::divideAndRoundUp(localWorkSize, simd); EXPECT_EQ(threadsPerThreadGroup, interfaceDescriptorData.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, interfaceDescriptorData.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, interfaceDescriptorData.getConstantIndirectUrbEntryReadLength()); // We shouldn't have these pointers the same. EXPECT_NE(kernelStartPointer, interfaceDescriptorData.getBindingTablePointer()); } HWTEST_F(EnqueueFillImageTest, WhenFillingImageThenSurfaceStateIsCorrect) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; enqueueFillImage(); const auto &surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), 0); const auto &imageDesc = image->getImageDesc(); EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth()); EXPECT_EQ(imageDesc.image_height, surfaceState.getHeight()); EXPECT_NE(0u, surfaceState.getSurfacePitch()); EXPECT_NE(0u, surfaceState.getSurfaceType()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_4, surfaceState.getSurfaceHorizontalAlignment()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState.getSurfaceVerticalAlignment()); const auto &srcSurfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), 0); EXPECT_EQ(image->getGraphicsAllocation()->getGpuAddress(), srcSurfaceState.getSurfaceBaseAddress()); } HWTEST_F(EnqueueFillImageTest, WhenFillingImageThenNumberOfPipelineSelectsIsOne) { enqueueFillImage(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueFillImageTest, WhenFillingImageThenMediaVfeStateIsSetCorrectly) { enqueueFillImage(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } TEST_F(EnqueueFillImageTest, givenSrgbFormatWhenConvertingThenUseNormalizingFactor) { float *fillColor; int iFillColor[4] = {0}; float LessThanZeroArray[4] = {-1.0f, -1.0f, -1.0f, 1.0f}; float MoreThanOneArray[4] = {2.0f, 2.0f, 2.0f, 1.0f}; float NaNArray[4] = {NAN, NAN, NAN, 1.0f}; float distance; cl_image_format oldImageFormat = {CL_sRGBA, CL_UNORM_INT8}; cl_image_format newImageFormat = {CL_RGBA, CL_UNSIGNED_INT8}; fillColor = LessThanZeroArray; convertFillColor(static_cast(fillColor), iFillColor, oldImageFormat, newImageFormat); for (int i = 0; i < 3; i++) { distance = std::fabs(0.0f - static_cast(iFillColor[i])); EXPECT_GE(0.6f, distance); } EXPECT_EQ(255, iFillColor[3]); fillColor = MoreThanOneArray; convertFillColor(static_cast(fillColor), iFillColor, oldImageFormat, newImageFormat); for (int i = 0; i < 3; i++) { distance = std::fabs(255.0f - static_cast(iFillColor[i])); EXPECT_GE(0.6f, distance); } EXPECT_EQ(255, iFillColor[3]); fillColor = NaNArray; convertFillColor(static_cast(fillColor), iFillColor, oldImageFormat, newImageFormat); for (int i = 0; i < 3; i++) { distance = std::fabs(0.0f - static_cast(iFillColor[i])); EXPECT_GE(0.6f, distance); } EXPECT_EQ(255, iFillColor[3]); } TEST(ColorConvertTest, givenSnorm8FormatWhenConvertingThenUseNormalizingFactor) { float fFillColor[4] = {0.3f, -0.3f, 0.0f, 1.0f}; int32_t iFillColor[4] = {}; int32_t expectedIFillColor[4] = {}; cl_image_format oldFormat = {CL_R, CL_SNORM_INT8}; cl_image_format newFormat = {CL_R, CL_UNSIGNED_INT8}; auto normalizingFactor = selectNormalizingFactor(oldFormat.image_channel_data_type); for (size_t i = 0; i < 4; i++) { expectedIFillColor[i] = static_cast(normalizingFactor * fFillColor[i]); expectedIFillColor[i] = expectedIFillColor[i] & 0xFF; } convertFillColor(static_cast(fFillColor), iFillColor, oldFormat, newFormat); EXPECT_TRUE(memcmp(expectedIFillColor, iFillColor, 4 * sizeof(int32_t)) == 0); } TEST(ColorConvertTest, givenSnorm16FormatWhenConvertingThenUseNormalizingFactor) { float fFillColor[4] = {0.3f, -0.3f, 0.0f, 1.0f}; int32_t iFillColor[4] = {}; int32_t expectedIFillColor[4] = {}; cl_image_format oldFormat = {CL_R, CL_SNORM_INT16}; cl_image_format newFormat = {CL_R, CL_UNSIGNED_INT16}; auto normalizingFactor = selectNormalizingFactor(oldFormat.image_channel_data_type); for (size_t i = 0; i < 4; i++) { expectedIFillColor[i] = static_cast(normalizingFactor * fFillColor[i]); expectedIFillColor[i] = expectedIFillColor[i] & 0xFFFF; } convertFillColor(static_cast(fFillColor), iFillColor, oldFormat, newFormat); EXPECT_TRUE(memcmp(expectedIFillColor, iFillColor, 4 * sizeof(int32_t)) == 0); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_fixture.cpp000066400000000000000000000141041363734646600303750ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "shared/source/helpers/ptr_math.h" // clang-format off // EnqueueTraits using namespace NEO; cl_uint EnqueueTraits::numEventsInWaitList = 0; const cl_event *EnqueueTraits::eventWaitList = nullptr; cl_event *EnqueueTraits::event = nullptr; static const auto negOne = static_cast(-1); static int ptrOutputContent[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; static auto ptrOutput = (void *)ptrOutputContent; // EnqueueCopyBufferTraits const size_t EnqueueCopyBufferTraits::srcOffset = 0; const size_t EnqueueCopyBufferTraits::dstOffset = 0; const size_t EnqueueCopyBufferTraits::size = negOne; cl_command_type EnqueueCopyBufferTraits::cmdType = CL_COMMAND_COPY_BUFFER; // EnqueueCopyBufferToImageTraits const size_t EnqueueCopyBufferToImageTraits::srcOffset = 0; const size_t EnqueueCopyBufferToImageTraits::dstOrigin[3] = { 0, 0, 0}; const size_t EnqueueCopyBufferToImageTraits::region[3] = {negOne, negOne, negOne}; cl_command_type EnqueueCopyBufferToImageTraits::cmdType = CL_COMMAND_COPY_BUFFER_TO_IMAGE; // EnqueueCopyImageToBufferTraits const size_t EnqueueCopyImageToBufferTraits::srcOrigin[3] = { 0, 0, 0}; const size_t EnqueueCopyImageToBufferTraits::region[3] = {negOne, negOne, negOne}; const size_t EnqueueCopyImageToBufferTraits::dstOffset = 0; cl_command_type EnqueueCopyImageToBufferTraits::cmdType = CL_COMMAND_COPY_IMAGE_TO_BUFFER; // EnqueueCopyImageTraits const size_t EnqueueCopyImageTraits::region[3] = {negOne, negOne, negOne}; const size_t EnqueueCopyImageTraits::srcOrigin[3] = { 0, 0, 0}; const size_t EnqueueCopyImageTraits::dstOrigin[3] = { 0, 0, 0}; cl_command_type EnqueueCopyImageTraits::cmdType = CL_COMMAND_COPY_IMAGE; // EnqueueFillBufferTraits const float EnqueueFillBufferTraits::pattern[1] = {1.2345f}; const size_t EnqueueFillBufferTraits::patternSize = sizeof(pattern); const size_t EnqueueFillBufferTraits::offset = 0; const size_t EnqueueFillBufferTraits::size = 2 * patternSize; cl_command_type EnqueueFillBufferTraits::cmdType = CL_COMMAND_FILL_BUFFER; // EnqueueFillImageTraits const float EnqueueFillImageTraits::fillColor[4] = { 1.0f, 2.0f, 3.0f, 4.0f}; const size_t EnqueueFillImageTraits::origin[3] = { 0, 0, 0}; const size_t EnqueueFillImageTraits::region[3] = {negOne, negOne, negOne}; cl_command_type EnqueueFillImageTraits::cmdType = CL_COMMAND_COPY_IMAGE; // EnqueueKernelTraits const cl_uint EnqueueKernelTraits::workDim = 1; const size_t EnqueueKernelTraits::globalWorkOffset[3] = {0, 0, 0}; const size_t EnqueueKernelTraits::globalWorkSize[3] = {1, 1, 1}; const size_t *EnqueueKernelTraits::localWorkSize = nullptr; cl_command_type EnqueueKernelTraits::cmdType = CL_COMMAND_NDRANGE_KERNEL; // EnqueueMapBufferTraits const cl_bool EnqueueMapBufferTraits::blocking = CL_TRUE; const cl_mem_flags EnqueueMapBufferTraits::flags = CL_MAP_WRITE; const size_t EnqueueMapBufferTraits::offset = 0; const size_t EnqueueMapBufferTraits::sizeInBytes = negOne; cl_int *EnqueueMapBufferTraits::errcodeRet = nullptr; cl_command_type EnqueueMapBufferTraits::cmdType = CL_COMMAND_MAP_BUFFER; // EnqueueReadBufferTraits const cl_bool EnqueueReadBufferTraits::blocking = CL_TRUE; const size_t EnqueueReadBufferTraits::offset = 0; const size_t EnqueueReadBufferTraits::sizeInBytes = negOne; void *EnqueueReadBufferTraits::hostPtr = ptrOutput; cl_command_type EnqueueReadBufferTraits::cmdType = CL_COMMAND_READ_BUFFER; GraphicsAllocation *EnqueueReadBufferTraits::mapAllocation = nullptr; // EnqueueReadImageTraits const cl_bool EnqueueReadImageTraits::blocking = CL_TRUE; const size_t EnqueueReadImageTraits::origin[3] = {0, 0, 0}; const size_t EnqueueReadImageTraits::region[3] = {2, 2, 1}; const size_t EnqueueReadImageTraits::rowPitch = 0; const size_t EnqueueReadImageTraits::slicePitch = 0; void *EnqueueReadImageTraits::hostPtr = ptrOutput; cl_command_type EnqueueReadImageTraits::cmdType = CL_COMMAND_READ_IMAGE; GraphicsAllocation *EnqueueReadImageTraits::mapAllocation = nullptr; // EnqueueWriteBufferTraits const bool EnqueueWriteBufferTraits::zeroCopy = true; const cl_bool EnqueueWriteBufferTraits::blocking = CL_TRUE; const size_t EnqueueWriteBufferTraits::offset = 0; const size_t EnqueueWriteBufferTraits::sizeInBytes = negOne; void *EnqueueWriteBufferTraits::hostPtr = ptrGarbage; cl_command_type EnqueueWriteBufferTraits::cmdType = CL_COMMAND_WRITE_BUFFER; GraphicsAllocation *EnqueueWriteBufferTraits::mapAllocation = nullptr; // EnqueueWriteBufferRectTraits const bool EnqueueWriteBufferRectTraits::zeroCopy = true; const cl_bool EnqueueWriteBufferRectTraits::blocking = CL_TRUE; const size_t EnqueueWriteBufferRectTraits::bufferOrigin[3] = { 0, 0, 0 }; const size_t EnqueueWriteBufferRectTraits::hostOrigin[3] = { 0, 0, 0 }; const size_t EnqueueWriteBufferRectTraits::region[3] = { 2, 2, 1 }; size_t EnqueueWriteBufferRectTraits::bufferRowPitch = 0; size_t EnqueueWriteBufferRectTraits::bufferSlicePitch = 0; size_t EnqueueWriteBufferRectTraits::hostRowPitch = 0; size_t EnqueueWriteBufferRectTraits::hostSlicePitch = 0; void *EnqueueWriteBufferRectTraits::hostPtr = ptrGarbage; cl_command_type EnqueueWriteBufferRectTraits::cmdType = CL_COMMAND_WRITE_BUFFER_RECT; // EnqueueWriteImageTraits const cl_bool EnqueueWriteImageTraits::blocking = CL_TRUE; const size_t EnqueueWriteImageTraits::origin[3] = {0, 0, 0}; const size_t EnqueueWriteImageTraits::region[3] = {2, 2, 1}; const size_t EnqueueWriteImageTraits::rowPitch = 0; const size_t EnqueueWriteImageTraits::slicePitch = 0; void *EnqueueWriteImageTraits::hostPtr = ptrGarbage; cl_command_type EnqueueWriteImageTraits::cmdType = CL_COMMAND_WRITE_IMAGE; GraphicsAllocation *EnqueueWriteImageTraits::mapAllocation = nullptr; // clang-format on compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_fixture.h000066400000000000000000000716671363734646600300630ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/event/user_event.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "CL/cl.h" #include struct EnqueueTraits { static cl_uint numEventsInWaitList; static const cl_event *eventWaitList; static cl_event *event; }; struct EnqueueCopyBufferTraits : public EnqueueTraits { static const size_t srcOffset; static const size_t dstOffset; static const size_t size; static cl_command_type cmdType; }; template struct EnqueueCopyBufferHelper { typedef T Traits; using Buffer = NEO::Buffer; using CommandQueue = NEO::CommandQueue; static cl_int enqueueCopyBuffer(CommandQueue *pCmdQ, Buffer *srcBuffer = std::unique_ptr(BufferHelper<>::create()).get(), Buffer *dstBuffer = std::unique_ptr(BufferHelper<>::create()).get(), size_t srcOffset = Traits::srcOffset, size_t dstOffset = Traits::dstOffset, size_t size = Traits::size, cl_uint numEventsInWaitList = Traits::numEventsInWaitList, const cl_event *eventWaitList = Traits::eventWaitList, cl_event *event = Traits::event) { cl_int retVal = pCmdQ->enqueueCopyBuffer( srcBuffer, dstBuffer, srcOffset, dstOffset, size, numEventsInWaitList, eventWaitList, event); return retVal; } static cl_int enqueue(CommandQueue *pCmdQ, void *placeholder = nullptr) { return enqueueCopyBuffer(pCmdQ); } }; struct EnqueueCopyBufferToImageTraits : public EnqueueTraits { static const size_t srcOffset; static const size_t dstOrigin[3]; static const size_t region[3]; static cl_command_type cmdType; }; template struct EnqueueCopyBufferToImageHelper { typedef T Traits; using Buffer = NEO::Buffer; using Image = NEO::Image; using CommandQueue = NEO::CommandQueue; static cl_int enqueueCopyBufferToImage(CommandQueue *pCmdQ, Buffer *srcBuffer = std::unique_ptr(BufferHelper<>::create()).get(), Image *dstImage = nullptr, const size_t srcOffset = Traits::srcOffset, const size_t dstOrigin[3] = Traits::dstOrigin, const size_t region[3] = Traits::region, cl_uint numEventsInWaitList = Traits::numEventsInWaitList, const cl_event *eventWaitList = Traits::eventWaitList, cl_event *event = Traits::event) { auto &context = pCmdQ->getContext(); std::unique_ptr dstImageDelete(dstImage ? nullptr : Image2dHelper<>::create(&context)); dstImage = dstImage ? dstImage : dstImageDelete.get(); size_t regionOut[3] = { region[0] == static_cast(-1) ? dstImage->getImageDesc().image_width : region[0], region[1] == static_cast(-1) ? dstImage->getImageDesc().image_height : region[1], region[2] == static_cast(-1) ? (dstImage->getImageDesc().image_depth > 0 ? dstImage->getImageDesc().image_depth : 1) : region[2], }; return pCmdQ->enqueueCopyBufferToImage(srcBuffer, dstImage, srcOffset, dstOrigin, regionOut, numEventsInWaitList, eventWaitList, event); } static cl_int enqueue(CommandQueue *pCmdQ, void *placeholder = nullptr) { return enqueueCopyBufferToImage(pCmdQ); } }; struct EnqueueCopyImageToBufferTraits : public EnqueueTraits { static const size_t srcOrigin[3]; static const size_t region[3]; static const size_t dstOffset; static cl_command_type cmdType; }; template struct EnqueueCopyImageToBufferHelper { typedef T Traits; using Buffer = NEO::Buffer; using Image = NEO::Image; using CommandQueue = NEO::CommandQueue; static cl_int enqueueCopyImageToBuffer(CommandQueue *pCmdQ, Image *srcImage = nullptr, Buffer *dstBuffer = std::unique_ptr(BufferHelper<>::create()).get(), const size_t srcOrigin[3] = Traits::srcOrigin, const size_t region[3] = Traits::region, const size_t dstOffset = Traits::dstOffset, cl_uint numEventsInWaitList = Traits::numEventsInWaitList, const cl_event *eventWaitList = Traits::eventWaitList, cl_event *event = Traits::event) { auto &context = pCmdQ->getContext(); std::unique_ptr srcImageDelete(srcImage ? nullptr : Image2dHelper<>::create(&context)); srcImage = srcImage ? srcImage : srcImageDelete.get(); size_t regionIn[3] = { region[0] == static_cast(-1) ? srcImage->getImageDesc().image_width : region[0], region[1] == static_cast(-1) ? srcImage->getImageDesc().image_height : region[1], region[2] == static_cast(-1) ? (srcImage->getImageDesc().image_depth > 0 ? srcImage->getImageDesc().image_depth : 1) : region[2], }; return pCmdQ->enqueueCopyImageToBuffer(srcImage, dstBuffer, srcOrigin, regionIn, dstOffset, numEventsInWaitList, eventWaitList, event); } static cl_int enqueue(CommandQueue *pCmdQ, void *placeholder = nullptr) { return enqueueCopyImageToBuffer(pCmdQ); } }; struct EnqueueCopyImageTraits : public EnqueueTraits { static const size_t srcOrigin[3]; static const size_t dstOrigin[3]; static const size_t region[3]; static cl_command_type cmdType; }; template struct EnqueueCopyImageHelper { typedef T Traits; using Image = NEO::Image; using CommandQueue = NEO::CommandQueue; static cl_int enqueueCopyImage(CommandQueue *pCmdQ, Image *srcImage = nullptr, Image *dstImage = nullptr, const size_t srcOrigin[3] = Traits::srcOrigin, const size_t dstOrigin[3] = Traits::dstOrigin, const size_t region[3] = Traits::region, cl_uint numEventsInWaitList = Traits::numEventsInWaitList, const cl_event *eventWaitList = Traits::eventWaitList, cl_event *event = Traits::event) { auto &context = pCmdQ->getContext(); std::unique_ptr srcImageDelete(srcImage ? nullptr : Image2dHelper<>::create(&context)); std::unique_ptr dstImageDelete(dstImage ? nullptr : Image2dHelper<>::create(&context)); srcImage = srcImage ? srcImage : srcImageDelete.get(); dstImage = dstImage ? dstImage : dstImageDelete.get(); size_t regionOut[3] = { region[0] == static_cast(-1) ? srcImage->getImageDesc().image_width : region[0], region[1] == static_cast(-1) ? srcImage->getImageDesc().image_height : region[1], region[2] == static_cast(-1) ? (srcImage->getImageDesc().image_depth > 0 ? srcImage->getImageDesc().image_depth : 1) : region[2], }; return pCmdQ->enqueueCopyImage(srcImage, dstImage, srcOrigin, dstOrigin, regionOut, numEventsInWaitList, eventWaitList, event); } static cl_int enqueue(CommandQueue *pCmdQ, void *placeholder = nullptr) { return enqueueCopyImage(pCmdQ); } }; struct EnqueueFillBufferTraits : public EnqueueTraits { static const float pattern[1]; static const size_t patternSize; static const size_t offset; static const size_t size; static cl_command_type cmdType; }; template struct EnqueueFillBufferHelper { typedef T Traits; using Buffer = NEO::Buffer; using CommandQueue = NEO::CommandQueue; static cl_int enqueueFillBuffer(CommandQueue *pCmdQ, Buffer *buffer = std::unique_ptr(BufferHelper<>::create()).get(), cl_uint numEventsInWaitList = Traits::numEventsInWaitList, const cl_event *eventWaitList = Traits::eventWaitList, cl_event *event = Traits::event) { return pCmdQ->enqueueFillBuffer(buffer, Traits::pattern, Traits::patternSize, Traits::offset, Traits::size, numEventsInWaitList, eventWaitList, event); } static cl_int enqueue(CommandQueue *pCmdQ, void *placeholder = nullptr) { return enqueueFillBuffer(pCmdQ); } }; struct EnqueueFillImageTraits : public EnqueueTraits { static const float fillColor[4]; static const size_t origin[3]; static const size_t region[3]; static cl_command_type cmdType; }; template struct EnqueueFillImageHelper { typedef T Traits; using Image = NEO::Image; using CommandQueue = NEO::CommandQueue; static cl_int enqueueFillImage(CommandQueue *pCmdQ, Image *image = nullptr, const void *fillColor = Traits::fillColor, const size_t *origin = Traits::origin, const size_t *region = Traits::region, cl_uint numEventsInWaitList = Traits::numEventsInWaitList, const cl_event *eventWaitList = Traits::eventWaitList, cl_event *event = Traits::event) { auto &context = pCmdQ->getContext(); std::unique_ptr imageDelete(image ? nullptr : Image2dHelper<>::create(&context)); image = image ? image : imageDelete.get(); size_t regionOut[3] = { region[0] == static_cast(-1) ? image->getImageDesc().image_width : region[0], region[1] == static_cast(-1) ? image->getImageDesc().image_height : region[1], region[2] == static_cast(-1) ? (image->getImageDesc().image_depth > 0 ? image->getImageDesc().image_depth : 1) : region[2], }; cl_int retVal = pCmdQ->enqueueFillImage(image, fillColor, origin, regionOut, numEventsInWaitList, eventWaitList, event); return retVal; } static cl_int enqueue(CommandQueue *pCmdQ, void *placeholder = nullptr) { return enqueueFillImage(pCmdQ); } }; struct EnqueueKernelTraits : public EnqueueTraits { static const cl_uint workDim; static const size_t globalWorkOffset[3]; static const size_t globalWorkSize[3]; static const size_t *localWorkSize; static cl_command_type cmdType; }; template struct EnqueueKernelHelper { typedef T Traits; using CommandQueue = NEO::CommandQueue; using Kernel = NEO::Kernel; static cl_int enqueueKernel(CommandQueue *pCmdQ, Kernel *kernel, cl_uint workDim = Traits::workDim, const size_t *globalWorkOffset = Traits::globalWorkOffset, const size_t *globalWorkSize = Traits::globalWorkSize, const size_t *localWorkSize = Traits::localWorkSize, cl_uint numEventsInWaitList = Traits::numEventsInWaitList, const cl_event *eventWaitList = Traits::eventWaitList, cl_event *event = Traits::event) { return pCmdQ->enqueueKernel(kernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); } }; struct EnqueueMapBufferTraits : public EnqueueTraits { static const cl_bool blocking; static const cl_mem_flags flags; static const size_t offset; static const size_t sizeInBytes; static cl_int *errcodeRet; static cl_command_type cmdType; }; template struct EnqueueMapBufferHelper { typedef T Traits; using Buffer = NEO::Buffer; using CommandQueue = NEO::CommandQueue; static void *enqueueMapBuffer(CommandQueue *pCmdQ, Buffer *buffer = std::unique_ptr(BufferHelper<>::create()).get(), cl_bool blockingMap = Traits::blocking, cl_mem_flags flags = Traits::flags, size_t offset = Traits::offset, size_t size = Traits::sizeInBytes, cl_uint numEventsInWaitList = Traits::numEventsInWaitList, const cl_event *eventWaitList = Traits::eventWaitList, cl_event *event = Traits::event, cl_int *errcodeRet = Traits::errcodeRet) { size = size == static_cast(-1) ? buffer->getSize() : size; auto retCode = CL_SUCCESS; auto retPtr = pCmdQ->enqueueMapBuffer(buffer, blockingMap, flags, offset, size, numEventsInWaitList, eventWaitList, event, retCode); if (errcodeRet) { *errcodeRet = retCode; } return retPtr; } static cl_int enqueue(CommandQueue *pCmdQ, Buffer *buffer = nullptr) { auto retVal = CL_SUCCESS; enqueueMapBuffer(pCmdQ, buffer ? buffer : std::unique_ptr(BufferHelper<>::create()).get(), Traits::blocking, Traits::flags, Traits::offset, Traits::sizeInBytes, Traits::numEventsInWaitList, Traits::eventWaitList, Traits::event, &retVal); return retVal; } }; struct EnqueueReadBufferTraits : public EnqueueTraits { static const cl_bool blocking; static const size_t offset; static const size_t sizeInBytes; static void *hostPtr; static cl_command_type cmdType; static NEO::GraphicsAllocation *mapAllocation; }; template struct EnqueueReadBufferHelper { typedef T Traits; using Buffer = NEO::Buffer; using CommandQueue = NEO::CommandQueue; static cl_int enqueueReadBuffer(CommandQueue *pCmdQ, Buffer *buffer = std::unique_ptr(BufferHelper<>::create()).get(), cl_bool blockingRead = Traits::blocking, size_t offset = Traits::offset, size_t size = Traits::sizeInBytes, void *ptr = Traits::hostPtr, NEO::GraphicsAllocation *mapAllocation = Traits::mapAllocation, cl_uint numEventsInWaitList = Traits::numEventsInWaitList, const cl_event *eventWaitList = Traits::eventWaitList, cl_event *event = Traits::event) { size = size == static_cast(-1) ? buffer->getSize() : size; cl_int retVal = pCmdQ->enqueueReadBuffer(buffer, blockingRead, offset, size, ptr, mapAllocation, numEventsInWaitList, eventWaitList, event); return retVal; } static cl_int enqueue(CommandQueue *pCmdQ) { return enqueueReadBuffer(pCmdQ); } static cl_int enqueue(CommandQueue *pCmdQ, Buffer *buffer) { return enqueueReadBuffer(pCmdQ, buffer); } }; struct EnqueueReadImageTraits : public EnqueueTraits { static const cl_bool blocking; static const size_t origin[3]; static const size_t region[3]; static const size_t rowPitch; static const size_t slicePitch; static void *hostPtr; static cl_command_type cmdType; static NEO::GraphicsAllocation *mapAllocation; }; template struct EnqueueReadImageHelper { typedef T Traits; using Image = NEO::Image; using CommandQueue = NEO::CommandQueue; static cl_int enqueueReadImage(CommandQueue *pCmdQ, Image *image = nullptr, cl_bool blockingRead = Traits::blocking, const size_t *origin = Traits::origin, const size_t *region = Traits::region, size_t rowPitch = Traits::rowPitch, size_t slicePitch = Traits::slicePitch, void *ptr = Traits::hostPtr, NEO::GraphicsAllocation *mapAllocation = Traits::mapAllocation, cl_uint numEventsInWaitList = Traits::numEventsInWaitList, const cl_event *eventWaitList = Traits::eventWaitList, cl_event *event = Traits::event) { auto &context = pCmdQ->getContext(); std::unique_ptr imageDelete(image ? nullptr : Image2dHelper<>::create(&context)); image = image ? image : imageDelete.get(); size_t regionOut[3] = { region[0] == static_cast(-1) ? image->getImageDesc().image_width : region[0], region[1] == static_cast(-1) ? image->getImageDesc().image_height : region[1], region[2] == static_cast(-1) ? (image->getImageDesc().image_depth > 0 ? image->getImageDesc().image_depth : 1) : region[2], }; return pCmdQ->enqueueReadImage(image, blockingRead, origin, regionOut, rowPitch, slicePitch, ptr, mapAllocation, numEventsInWaitList, eventWaitList, event); } static cl_int enqueue(CommandQueue *pCmdQ, void *placeholder = nullptr) { return enqueueReadImage(pCmdQ); } }; struct EnqueueWriteBufferTraits : public EnqueueTraits { static const bool zeroCopy; static const cl_bool blocking; static const size_t offset; static const size_t sizeInBytes; static void *hostPtr; static cl_command_type cmdType; static NEO::GraphicsAllocation *mapAllocation; }; template struct EnqueueWriteBufferHelper { typedef T Traits; using Buffer = NEO::Buffer; using CommandQueue = NEO::CommandQueue; static cl_int enqueueWriteBuffer(CommandQueue *pCmdQ, Buffer *buffer = std::unique_ptr(BufferHelper<>::create()).get(), cl_bool blockingWrite = Traits::blocking, size_t offset = Traits::offset, size_t size = Traits::sizeInBytes, void *ptr = Traits::hostPtr, NEO::GraphicsAllocation *mapAllocation = Traits::mapAllocation, cl_uint numEventsInWaitList = Traits::numEventsInWaitList, const cl_event *eventWaitList = Traits::eventWaitList, cl_event *event = Traits::event) { size = size == static_cast(-1) ? buffer->getSize() : size; cl_int retVal = pCmdQ->enqueueWriteBuffer(buffer, blockingWrite, offset, size, ptr, mapAllocation, numEventsInWaitList, eventWaitList, event); return retVal; } static cl_int enqueue(CommandQueue *pCmdQ) { return enqueueWriteBuffer(pCmdQ); } static cl_int enqueue(CommandQueue *pCmdQ, Buffer *buffer) { return enqueueWriteBuffer(pCmdQ, buffer); } }; struct EnqueueWriteBufferRectTraits : public EnqueueTraits { static const bool zeroCopy; static const cl_bool blocking; static const size_t bufferOrigin[3]; static const size_t hostOrigin[3]; static const size_t region[3]; static size_t bufferRowPitch; static size_t bufferSlicePitch; static size_t hostRowPitch; static size_t hostSlicePitch; static void *hostPtr; static cl_command_type cmdType; }; template struct EnqueueWriteBufferRectHelper { typedef T Traits; using Buffer = NEO::Buffer; using CommandQueue = NEO::CommandQueue; static cl_int enqueueWriteBufferRect(CommandQueue *pCmdQ, Buffer *buffer = std::unique_ptr(BufferHelper<>::create()).get(), cl_bool blockingWrite = Traits::blocking, const size_t *bufferOrigin = Traits::bufferOrigin, const size_t *hostOrigin = Traits::hostOrigin, const size_t *region = Traits::region, size_t bufferRowPitch = Traits::bufferRowPitch, size_t bufferSlicePitch = Traits::bufferSlicePitch, size_t hostRowPitch = Traits::hostRowPitch, size_t hostSlicePitch = Traits::hostSlicePitch, void *hostPtr = Traits::hostPtr, cl_uint numEventsInWaitList = Traits::numEventsInWaitList, const cl_event *eventWaitList = Traits::eventWaitList, cl_event *event = Traits::event) { cl_int retVal = pCmdQ->enqueueWriteBufferRect(buffer, blockingWrite, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, hostPtr, numEventsInWaitList, eventWaitList, event); return retVal; } static cl_int enqueue(CommandQueue *pCmdQ) { return enqueueWriteBufferRect(pCmdQ); } }; struct EnqueueWriteImageTraits : public EnqueueTraits { static const cl_bool blocking; static const size_t origin[3]; static const size_t region[3]; static const size_t rowPitch; static const size_t slicePitch; static void *hostPtr; static cl_command_type cmdType; static NEO::GraphicsAllocation *mapAllocation; }; template struct EnqueueWriteImageHelper { typedef T Traits; using Image = NEO::Image; using CommandQueue = NEO::CommandQueue; static cl_int enqueueWriteImage(CommandQueue *pCmdQ, Image *image = nullptr, cl_bool blockingRead = Traits::blocking, const size_t *origin = Traits::origin, const size_t *region = Traits::region, size_t rowPitch = Traits::rowPitch, size_t slicePitch = Traits::slicePitch, const void *ptr = Traits::hostPtr, NEO::GraphicsAllocation *mapAllocation = Traits::mapAllocation, cl_uint numEventsInWaitList = Traits::numEventsInWaitList, const cl_event *eventWaitList = Traits::eventWaitList, cl_event *event = Traits::event) { auto &context = pCmdQ->getContext(); std::unique_ptr imageDelete(image ? nullptr : Image2dHelper<>::create(&context)); image = image ? image : imageDelete.get(); size_t regionOut[3] = { region[0] == static_cast(-1) ? image->getImageDesc().image_width : region[0], region[1] == static_cast(-1) ? image->getImageDesc().image_height : region[1], region[2] == static_cast(-1) ? (image->getImageDesc().image_depth > 0 ? image->getImageDesc().image_depth : 1) : region[2], }; return pCmdQ->enqueueWriteImage(image, blockingRead, origin, regionOut, rowPitch, slicePitch, ptr, mapAllocation, numEventsInWaitList, eventWaitList, event); } static cl_int enqueue(CommandQueue *pCmdQ, void *placeholder = nullptr) { return enqueueWriteImage(pCmdQ); } }; compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp000066400000000000000000000666671363734646600315730ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/command_stream/aub_subcapture.h" #include "opencl/source/event/user_event.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/command_stream/thread_arbitration_policy_helper.h" #include "opencl/test/unit_test/fixtures/enqueue_handler_fixture.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mocks/mock_aub_csr.h" #include "opencl/test/unit_test/mocks/mock_aub_subcapture_manager.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_internal_allocation_storage.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "test.h" using namespace NEO; HWTEST_F(EnqueueHandlerTest, WhenEnqueingHandlerWithKernelThenProcessEvictionOnCsrIsCalled) { int32_t tag; auto csr = new MockCsrBase(tag, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(csr); MockKernelWithInternals mockKernel(*pClDevice); auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(context, pClDevice, 0)); size_t gws[] = {1, 1, 1}; mockCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_TRUE(csr->processEvictionCalled); } HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWithKernelWhenAubCsrIsActiveThenAddCommentWithKernelName) { int32_t tag; auto aubCsr = new MockCsrAub(tag, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(aubCsr); MockKernelWithInternals mockKernel(*pClDevice); auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(context, pClDevice, 0)); size_t gws[] = {1, 1, 1}; mockKernel.kernelInfo.name = "kernel_name"; mockCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_TRUE(aubCsr->addAubCommentCalled); EXPECT_EQ(1u, aubCsr->aubCommentMessages.size()); EXPECT_STREQ("kernel_name", aubCsr->aubCommentMessages[0].c_str()); } HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWithKernelSplitWhenAubCsrIsActiveThenAddCommentWithKernelName) { int32_t tag; auto aubCsr = new MockCsrAub(tag, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(aubCsr); MockKernelWithInternals kernel1(*pClDevice); MockKernelWithInternals kernel2(*pClDevice); kernel1.kernelInfo.name = "kernel_1"; kernel2.kernelInfo.name = "kernel_2"; auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(context, pClDevice, 0)); MockMultiDispatchInfo multiDispatchInfo(std::vector({kernel1.mockKernel, kernel2.mockKernel})); mockCmdQ->template enqueueHandler(nullptr, 0, true, multiDispatchInfo, 0, nullptr, nullptr); EXPECT_TRUE(aubCsr->addAubCommentCalled); EXPECT_EQ(2u, aubCsr->aubCommentMessages.size()); EXPECT_STREQ("kernel_1", aubCsr->aubCommentMessages[0].c_str()); EXPECT_STREQ("kernel_2", aubCsr->aubCommentMessages[1].c_str()); } HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWithEmptyDispatchInfoWhenAubCsrIsActiveThenDontAddCommentWithKernelName) { int32_t tag; auto aubCsr = new MockCsrAub(tag, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(aubCsr); MockKernelWithInternals mockKernel(*pClDevice); auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(context, pClDevice, 0)); size_t gws[] = {0, 0, 0}; mockKernel.kernelInfo.name = "kernel_name"; mockCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(aubCsr->addAubCommentCalled); } struct EnqueueHandlerWithAubSubCaptureTests : public EnqueueHandlerTest { template class MockCmdQWithAubSubCapture : public CommandQueueHw { public: MockCmdQWithAubSubCapture(Context *context, ClDevice *device) : CommandQueueHw(context, device, nullptr, false) {} void waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { waitUntilCompleteCalled = true; CommandQueueHw::waitUntilComplete(taskCountToWait, flushStampToWait, useQuickKmdSleep); } void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies) override { timestampPacketDependenciesCleared = clearAllDependencies; CommandQueueHw::obtainNewTimestampPacketNodes(numberOfNodes, previousNodes, clearAllDependencies); } bool waitUntilCompleteCalled = false; bool timestampPacketDependenciesCleared = false; }; }; HWTEST_F(EnqueueHandlerWithAubSubCaptureTests, givenEnqueueHandlerWithAubSubCaptureWhenSubCaptureIsNotActiveThenEnqueueIsMadeBlocking) { DebugManagerStateRestore stateRestore; DebugManager.flags.AUBDumpSubCaptureMode.set(1); auto aubCsr = new MockAubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(aubCsr); AubSubCaptureCommon subCaptureCommon; subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter; subCaptureCommon.subCaptureFilter.dumpKernelName = "invalid_kernel_name"; auto subCaptureManagerMock = new AubSubCaptureManagerMock("file_name.aub", subCaptureCommon); aubCsr->subCaptureManager.reset(subCaptureManagerMock); MockCmdQWithAubSubCapture cmdQ(context, pClDevice); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_TRUE(cmdQ.waitUntilCompleteCalled); } HWTEST_F(EnqueueHandlerWithAubSubCaptureTests, givenEnqueueHandlerWithAubSubCaptureWhenSubCaptureGetsActivatedThenTimestampPacketDependenciesAreClearedAndNextRemainUncleared) { DebugManagerStateRestore stateRestore; DebugManager.flags.AUBDumpSubCaptureMode.set(1); DebugManager.flags.EnableTimestampPacket.set(true); auto aubCsr = new MockAubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(aubCsr); AubSubCaptureCommon subCaptureCommon; subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter; subCaptureCommon.subCaptureFilter.dumpKernelName = ""; subCaptureCommon.subCaptureFilter.dumpKernelStartIdx = 0; subCaptureCommon.subCaptureFilter.dumpKernelEndIdx = 1; auto subCaptureManagerMock = new AubSubCaptureManagerMock("file_name.aub", subCaptureCommon); aubCsr->subCaptureManager.reset(subCaptureManagerMock); MockCmdQWithAubSubCapture cmdQ(context, pClDevice); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; // activate subcapture cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_TRUE(cmdQ.timestampPacketDependenciesCleared); // keep subcapture active cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(cmdQ.timestampPacketDependenciesCleared); // deactivate subcapture cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(cmdQ.timestampPacketDependenciesCleared); } template class MyCommandQueueHw : public CommandQueueHw { typedef CommandQueueHw BaseClass; public: MyCommandQueueHw(Context *context, ClDevice *device, cl_queue_properties *properties) : BaseClass(context, device, properties, false){}; Vec3 lws = {1, 1, 1}; Vec3 elws = {1, 1, 1}; void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &multiDispatchInfo) override { elws = multiDispatchInfo.begin()->getEnqueuedWorkgroupSize(); lws = multiDispatchInfo.begin()->getActualWorkgroupSize(); } }; HWTEST_F(EnqueueHandlerTest, givenLocalWorkgroupSizeGreaterThenGlobalWorkgroupSizeWhenEnqueueKernelThenLwsIsClamped) { int32_t tag; auto csr = new MockCsrBase(tag, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(csr); MockKernelWithInternals mockKernel(*pClDevice); auto mockProgram = mockKernel.mockProgram; mockProgram->setAllowNonUniform(true); MyCommandQueueHw myCmdQ(context, pClDevice, 0); size_t lws1d[] = {4, 1, 1}; size_t gws1d[] = {2, 1, 1}; myCmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws1d, lws1d, 0, nullptr, nullptr); EXPECT_EQ(myCmdQ.elws.x, lws1d[0]); EXPECT_EQ(myCmdQ.lws.x, gws1d[0]); size_t lws2d[] = {3, 3, 1}; size_t gws2d[] = {2, 1, 1}; myCmdQ.enqueueKernel(mockKernel.mockKernel, 2, nullptr, gws2d, lws2d, 0, nullptr, nullptr); EXPECT_EQ(myCmdQ.elws.x, lws2d[0]); EXPECT_EQ(myCmdQ.elws.y, lws2d[1]); EXPECT_EQ(myCmdQ.lws.x, gws2d[0]); EXPECT_EQ(myCmdQ.lws.y, gws2d[1]); size_t lws3d[] = {5, 4, 3}; size_t gws3d[] = {2, 2, 2}; myCmdQ.enqueueKernel(mockKernel.mockKernel, 3, nullptr, gws3d, lws3d, 0, nullptr, nullptr); EXPECT_EQ(myCmdQ.elws.x, lws3d[0]); EXPECT_EQ(myCmdQ.elws.y, lws3d[1]); EXPECT_EQ(myCmdQ.elws.z, lws3d[2]); EXPECT_EQ(myCmdQ.lws.x, gws3d[0]); EXPECT_EQ(myCmdQ.lws.y, gws3d[1]); EXPECT_EQ(myCmdQ.lws.z, gws3d[2]); } HWTEST_F(EnqueueHandlerTest, givenLocalWorkgroupSizeGreaterThenGlobalWorkgroupSizeAndNonUniformWorkGroupWhenEnqueueKernelThenClIvalidWorkGroupSizeIsReturned) { int32_t tag; auto csr = new MockCsrBase(tag, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(csr); MockKernelWithInternals mockKernel(*pClDevice); auto mockProgram = mockKernel.mockProgram; mockProgram->setAllowNonUniform(false); MyCommandQueueHw myCmdQ(context, pClDevice, 0); size_t lws1d[] = {4, 1, 1}; size_t gws1d[] = {2, 1, 1}; cl_int retVal = myCmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws1d, lws1d, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_INVALID_WORK_GROUP_SIZE); } HWTEST_F(EnqueueHandlerTest, WhenEnqueuingHandlerCallOnEnqueueMarkerThenCallProcessEvictionOnCsrIsNotCalled) { int32_t tag; auto csr = new MockCsrBase(tag, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(csr); auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(context, pClDevice, 0)); mockCmdQ->enqueueMarkerWithWaitList( 0, nullptr, nullptr); EXPECT_FALSE(csr->processEvictionCalled); EXPECT_EQ(0u, csr->madeResidentGfxAllocations.size()); EXPECT_EQ(0u, csr->madeNonResidentGfxAllocations.size()); } HWTEST_F(EnqueueHandlerTest, WhenEnqueuingHandlerForMarkerOnUnblockedQueueThenTaskLevelIsNotIncremented) { int32_t tag; auto csr = new MockCsrBase(tag, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(csr); auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(context, pClDevice, 0)); // put queue into initial unblocked state mockCmdQ->taskLevel = 0; mockCmdQ->enqueueMarkerWithWaitList( 0, nullptr, nullptr); EXPECT_EQ(0u, mockCmdQ->taskLevel); } HWTEST_F(EnqueueHandlerTest, WhenEnqueuingHandlerForMarkerOnBlockedQueueThenTaskLevelIsNotIncremented) { int32_t tag; auto csr = new MockCsrBase(tag, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(csr); auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(context, pClDevice, 0)); // put queue into initial blocked state mockCmdQ->taskLevel = CompletionStamp::levelNotReady; mockCmdQ->enqueueMarkerWithWaitList( 0, nullptr, nullptr); EXPECT_EQ(CompletionStamp::levelNotReady, mockCmdQ->taskLevel); } HWTEST_F(EnqueueHandlerTest, WhenEnqueuingBlockedWithoutReturnEventThenVirtualEventIsCreatedAndCommandQueueInternalRefCountIsIncremeted) { MockKernelWithInternals kernelInternals(*pClDevice, context); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(kernel); auto mockCmdQ = new MockCommandQueueHw(context, pClDevice, 0); // put queue into initial blocked state mockCmdQ->taskLevel = CompletionStamp::levelNotReady; auto initialRefCountInternal = mockCmdQ->getRefInternalCount(); bool blocking = false; mockCmdQ->template enqueueHandler(nullptr, 0, blocking, multiDispatchInfo, 0, nullptr, nullptr); EXPECT_NE(nullptr, mockCmdQ->virtualEvent); auto refCountInternal = mockCmdQ->getRefInternalCount(); EXPECT_EQ(initialRefCountInternal + 1, refCountInternal); mockCmdQ->virtualEvent->setStatus(CL_COMPLETE); mockCmdQ->isQueueBlocked(); mockCmdQ->release(); } HWTEST_F(EnqueueHandlerTest, WhenEnqueuingBlockedThenVirtualEventIsSetAsCurrentCmdQVirtualEvent) { MockKernelWithInternals kernelInternals(*pClDevice, context); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(kernel); auto mockCmdQ = new MockCommandQueueHw(context, pClDevice, 0); // put queue into initial blocked state mockCmdQ->taskLevel = CompletionStamp::levelNotReady; bool blocking = false; mockCmdQ->template enqueueHandler(nullptr, 0, blocking, multiDispatchInfo, 0, nullptr, nullptr); ASSERT_NE(nullptr, mockCmdQ->virtualEvent); mockCmdQ->virtualEvent->setStatus(CL_COMPLETE); mockCmdQ->isQueueBlocked(); mockCmdQ->release(); } HWTEST_F(EnqueueHandlerTest, WhenEnqueuingWithOutputEventThenEventIsRegistered) { MockKernelWithInternals kernelInternals(*pClDevice, context); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(kernel); cl_event outputEvent = nullptr; auto mockCmdQ = new MockCommandQueueHw(context, pClDevice, 0); bool blocking = false; mockCmdQ->template enqueueHandler(nullptr, 0, blocking, multiDispatchInfo, 0, nullptr, &outputEvent); ASSERT_NE(nullptr, outputEvent); Event *event = castToObjectOrAbort(outputEvent); ASSERT_NE(nullptr, event); event->release(); mockCmdQ->release(); } HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenAddPatchInfoCommentsForAUBDumpIsNotSetThenPatchInfoDataIsNotTransferredToCSR) { auto csr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); auto mockHelper = new MockFlatBatchBufferHelper(*pDevice->executionEnvironment); csr->overwriteFlatBatchBufferHelper(mockHelper); pDevice->resetCommandStreamReceiver(csr); MockKernelWithInternals mockKernel(*pClDevice); auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(context, pClDevice, 0)); size_t gws[] = {1, 1, 1}; PatchInfoData patchInfoData = {0xaaaaaaaa, 0, PatchInfoAllocationType::KernelArg, 0xbbbbbbbb, 0, PatchInfoAllocationType::IndirectObjectHeap}; mockKernel.mockKernel->getPatchInfoDataList().push_back(patchInfoData); EXPECT_CALL(*mockHelper, setPatchInfoData(::testing::_)).Times(0); mockCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); } HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenAddPatchInfoCommentsForAUBDumpIsSetThenPatchInfoDataIsTransferredToCSR) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); DebugManager.flags.FlattenBatchBufferForAUBDump.set(true); auto csr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); auto mockHelper = new MockFlatBatchBufferHelper(*pDevice->executionEnvironment); csr->overwriteFlatBatchBufferHelper(mockHelper); pDevice->resetCommandStreamReceiver(csr); MockKernelWithInternals mockKernel(*pClDevice); auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(context, pClDevice, 0)); size_t gws[] = {1, 1, 1}; PatchInfoData patchInfoData = {0xaaaaaaaa, 0, PatchInfoAllocationType::KernelArg, 0xbbbbbbbb, 0, PatchInfoAllocationType::IndirectObjectHeap}; mockKernel.mockKernel->getPatchInfoDataList().push_back(patchInfoData); EXPECT_CALL(*mockHelper, setPatchInfoData(::testing::_)).Times(8); EXPECT_CALL(*mockHelper, registerCommandChunk(::testing::_)).Times(1); EXPECT_CALL(*mockHelper, registerBatchBufferStartAddress(::testing::_, ::testing::_)).Times(1); mockCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); } HWTEST_F(EnqueueHandlerTest, givenExternallySynchronizedParentEventWhenRequestingEnqueueWithoutGpuSubmissionThenTaskCountIsNotInherited) { struct ExternallySynchEvent : VirtualEvent { ExternallySynchEvent(CommandQueue *cmdQueue) { setStatus(CL_COMPLETE); this->updateTaskCount(7); } bool isExternallySynchronized() const override { return true; } }; auto mockCmdQ = new MockCommandQueueHw(context, pClDevice, 0); ExternallySynchEvent synchEvent(mockCmdQ); cl_event inEv = &synchEvent; cl_event outEv = nullptr; bool blocking = false; MultiDispatchInfo emptyDispatchInfo; mockCmdQ->template enqueueHandler(nullptr, 0, blocking, emptyDispatchInfo, 1U, &inEv, &outEv); Event *ouputEvent = castToObject(outEv); ASSERT_NE(nullptr, ouputEvent); EXPECT_EQ(0U, ouputEvent->peekTaskCount()); ouputEvent->release(); mockCmdQ->release(); } HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenSubCaptureIsOffThenActivateSubCaptureIsNotCalled) { DebugManagerStateRestore stateRestore; DebugManager.flags.AUBDumpSubCaptureMode.set(static_cast(AubSubCaptureManager::SubCaptureMode::Off)); MockKernelWithInternals kernelInternals(*pClDevice, context); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(kernel); auto mockCmdQ = new MockCommandQueueHw(context, pClDevice, 0); mockCmdQ->template enqueueHandler(nullptr, 0, false, multiDispatchInfo, 0, nullptr, nullptr); EXPECT_FALSE(pDevice->getUltCommandStreamReceiver().checkAndActivateAubSubCaptureCalled); mockCmdQ->release(); } HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenSubCaptureIsOnThenActivateSubCaptureIsCalled) { DebugManagerStateRestore stateRestore; DebugManager.flags.AUBDumpSubCaptureMode.set(static_cast(AubSubCaptureManager::SubCaptureMode::Filter)); MockKernelWithInternals kernelInternals(*pClDevice, context); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(kernel); auto mockCmdQ = new MockCommandQueueHw(context, pClDevice, 0); mockCmdQ->template enqueueHandler(nullptr, 0, false, multiDispatchInfo, 0, nullptr, nullptr); EXPECT_TRUE(pDevice->getUltCommandStreamReceiver().checkAndActivateAubSubCaptureCalled); mockCmdQ->release(); } HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenClSetKernelExecInfoAlreadysetKernelThreadArbitrationPolicyThenRequiredThreadArbitrationPolicyIsSetProperly) { if (pClDevice->getHardwareInfo().capabilityTable.ftrSvm == false) { GTEST_SKIP(); } DebugManagerStateRestore stateRestore; DebugManager.flags.AUBDumpSubCaptureMode.set(static_cast(AubSubCaptureManager::SubCaptureMode::Filter)); MockKernelWithInternals kernelInternals(*pClDevice, context); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(kernel); uint32_t euThreadSetting = CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL; size_t ptrSizeInBytes = 1 * sizeof(uint32_t *); clSetKernelExecInfo( kernel, // cl_kernel kernel CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_INTEL, // cl_kernel_exec_info param_name ptrSizeInBytes, // size_t param_value_size &euThreadSetting // const void *param_value ); auto mockCmdQ = new MockCommandQueueHw(context, pClDevice, 0); mockCmdQ->template enqueueHandler(nullptr, 0, false, multiDispatchInfo, 0, nullptr, nullptr); EXPECT_EQ(UnitTestHelper::getAppropriateThreadArbitrationPolicy(getNewKernelArbitrationPolicy(euThreadSetting)), pDevice->getUltCommandStreamReceiver().requiredThreadArbitrationPolicy); mockCmdQ->release(); } struct EnqueueHandlerTestBasic : public ::testing::Test { template std::unique_ptr> setupFixtureAndCreateMockCommandQueue() { auto executionEnvironment = platform()->peekExecutionEnvironment(); device = std::make_unique(MockDevice::createWithExecutionEnvironment(nullptr, executionEnvironment, 0u)); context = std::make_unique(device.get()); auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto &ultCsr = static_cast &>(mockCmdQ->getGpgpuCommandStreamReceiver()); ultCsr.taskCount = initialTaskCount; mockInternalAllocationStorage = new MockInternalAllocationStorage(ultCsr); ultCsr.internalAllocationStorage.reset(mockInternalAllocationStorage); return mockCmdQ; } MockInternalAllocationStorage *mockInternalAllocationStorage = nullptr; const uint32_t initialTaskCount = 100; std::unique_ptr device; std::unique_ptr context; }; HWTEST_F(EnqueueHandlerTestBasic, givenEnqueueHandlerWhenCommandIsBlokingThenCompletionStampTaskCountIsPassedToWaitForTaskCountAndCleanAllocationListAsRequiredTaskCount) { auto mockCmdQ = setupFixtureAndCreateMockCommandQueue(); MockKernelWithInternals kernelInternals(*device, context.get()); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(kernel); mockCmdQ->template enqueueHandler(nullptr, 0, true, multiDispatchInfo, 0, nullptr, nullptr); EXPECT_EQ(initialTaskCount + 1, mockInternalAllocationStorage->lastCleanAllocationsTaskCount); } HWTEST_F(EnqueueHandlerTestBasic, givenBlockedEnqueueHandlerWhenCommandIsBlokingThenCompletionStampTaskCountIsPassedToWaitForTaskCountAndCleanAllocationListAsRequiredTaskCount) { auto mockCmdQ = setupFixtureAndCreateMockCommandQueue(); MockKernelWithInternals kernelInternals(*device, context.get()); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(kernel); UserEvent userEvent; cl_event waitlist[] = {&userEvent}; std::thread t0([&mockCmdQ, &userEvent]() { while (!mockCmdQ->isQueueBlocked()) { } userEvent.setStatus(CL_COMPLETE); }); mockCmdQ->template enqueueHandler(nullptr, 0, true, multiDispatchInfo, 1, waitlist, nullptr); EXPECT_EQ(initialTaskCount + 1, mockInternalAllocationStorage->lastCleanAllocationsTaskCount); t0.join(); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp000066400000000000000000001567761363734646600316570ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/api/api.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_submissions_aggregator.h" using namespace NEO; typedef HelloWorldFixture EnqueueKernelFixture; typedef Test EnqueueKernelTest; TEST_F(EnqueueKernelTest, GivenNullKernelWhenEnqueuingKernelThenInvalidKernelErrorIsReturned) { size_t globalWorkSize[3] = {1, 1, 1}; auto retVal = clEnqueueNDRangeKernel( pCmdQ, nullptr, 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL, retVal); } TEST_F(EnqueueKernelTest, givenKernelWhenAllArgsAreSetThenClEnqueueNDRangeKernelReturnsSuccess) { const size_t n = 512; size_t globalWorkSize[3] = {n, 1, 1}; size_t localWorkSize[3] = {256, 1, 1}; cl_int retVal = CL_SUCCESS; CommandQueue *pCmdQ2 = createCommandQueue(pClDevice); std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); auto b0 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); auto b1 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ2, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &b0); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ2, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &b1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ2, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(b0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(b1); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseCommandQueue(pCmdQ2); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueKernelTest, givenKernelWhenNotAllArgsAreSetButSetKernelArgIsCalledTwiceThenClEnqueueNDRangeKernelReturnsError) { const size_t n = 512; size_t globalWorkSize[3] = {n, 1, 1}; size_t localWorkSize[3] = {256, 1, 1}; cl_int retVal = CL_SUCCESS; CommandQueue *pCmdQ2 = createCommandQueue(pClDevice); std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); auto b0 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); auto b1 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ2, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &b0); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ2, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &b1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ2, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clReleaseMemObject(b0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(b1); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseCommandQueue(pCmdQ2); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueKernelTest, givenKernelWhenSetKernelArgIsCalledForEachArgButAtLeastFailsThenClEnqueueNDRangeKernelReturnsError) { const size_t n = 512; size_t globalWorkSize[3] = {n, 1, 1}; size_t localWorkSize[3] = {256, 1, 1}; cl_int retVal = CL_SUCCESS; CommandQueue *pCmdQ2 = createCommandQueue(pClDevice); std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); auto b0 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); auto b1 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ2, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &b0); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ2, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(kernel.get(), 1, 2 * sizeof(cl_mem), &b1); EXPECT_NE(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ2, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clReleaseMemObject(b0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(b1); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseCommandQueue(pCmdQ2); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueKernelTest, GivenInvalidEventListCountWhenEnqueuingKernelThenInvalidEventWaitListErrorIsReturned) { size_t globalWorkSize[3] = {1, 1, 1}; auto retVal = clEnqueueNDRangeKernel( pCmdQ, pKernel, 1, nullptr, globalWorkSize, nullptr, 1, nullptr, nullptr); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } TEST_F(EnqueueKernelTest, GivenInvalidWorkGroupSizeWhenEnqueuingKernelThenInvalidWorkGroupSizeErrorIsReturned) { size_t globalWorkSize[3] = {12, 12, 12}; size_t localWorkSize[3] = {11, 12, 12}; auto retVal = clEnqueueNDRangeKernel( pCmdQ, pKernel, 3, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, retVal); } TEST_F(EnqueueKernelTest, GivenNullKernelWhenEnqueuingNDCountKernelINTELThenInvalidKernelErrorIsReturned) { size_t workgroupCount[3] = {1, 1, 1}; auto retVal = clEnqueueNDCountKernelINTEL( pCmdQ, nullptr, 1, nullptr, workgroupCount, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL, retVal); } TEST_F(EnqueueKernelTest, givenKernelWhenAllArgsAreSetThenClEnqueueNDCountKernelINTELReturnsSuccess) { const size_t n = 512; size_t workgroupCount[3] = {2, 1, 1}; size_t localWorkSize[3] = {256, 1, 1}; cl_int retVal = CL_SUCCESS; CommandQueue *pCmdQ2 = createCommandQueue(pClDevice); std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); auto b0 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); auto b1 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDCountKernelINTEL(pCmdQ2, kernel.get(), 1, nullptr, workgroupCount, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &b0); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDCountKernelINTEL(pCmdQ2, kernel.get(), 1, nullptr, workgroupCount, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &b1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDCountKernelINTEL(pCmdQ2, kernel.get(), 1, nullptr, workgroupCount, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(b0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(b1); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseCommandQueue(pCmdQ2); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueKernelTest, givenKernelWhenNotAllArgsAreSetButSetKernelArgIsCalledTwiceThenClEnqueueNDCountKernelINTELReturnsError) { const size_t n = 512; size_t workgroupCount[3] = {2, 1, 1}; size_t localWorkSize[3] = {256, 1, 1}; cl_int retVal = CL_SUCCESS; CommandQueue *pCmdQ2 = createCommandQueue(pClDevice); std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); auto b0 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); auto b1 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDCountKernelINTEL(pCmdQ2, kernel.get(), 1, nullptr, workgroupCount, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &b0); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDCountKernelINTEL(pCmdQ2, kernel.get(), 1, nullptr, workgroupCount, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &b1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDCountKernelINTEL(pCmdQ2, kernel.get(), 1, nullptr, workgroupCount, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clReleaseMemObject(b0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(b1); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseCommandQueue(pCmdQ2); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueKernelTest, givenKernelWhenSetKernelArgIsCalledForEachArgButAtLeastFailsThenClEnqueueNDCountKernelINTELReturnsError) { const size_t n = 512; size_t workgroupCount[3] = {2, 1, 1}; size_t localWorkSize[3] = {256, 1, 1}; cl_int retVal = CL_SUCCESS; CommandQueue *pCmdQ2 = createCommandQueue(pClDevice); std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); auto b0 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); auto b1 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDCountKernelINTEL(pCmdQ2, kernel.get(), 1, nullptr, workgroupCount, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &b0); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDCountKernelINTEL(pCmdQ2, kernel.get(), 1, nullptr, workgroupCount, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clSetKernelArg(kernel.get(), 1, 2 * sizeof(cl_mem), &b1); EXPECT_NE(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); retVal = clEnqueueNDCountKernelINTEL(pCmdQ2, kernel.get(), 1, nullptr, workgroupCount, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); retVal = clReleaseMemObject(b0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(b1); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseCommandQueue(pCmdQ2); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueKernelTest, GivenInvalidEventListCountWhenEnqueuingNDCountKernelINTELThenInvalidEventWaitListErrorIsReturned) { size_t workgroupCount[3] = {1, 1, 1}; auto retVal = clEnqueueNDCountKernelINTEL( pCmdQ, pKernel, 1, nullptr, workgroupCount, nullptr, 1, nullptr, nullptr); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); } HWTEST_F(EnqueueKernelTest, bumpsTaskLevel) { auto taskLevelBefore = pCmdQ->taskLevel; callOneWorkItemNDRKernel(); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueKernelTest, alignsToCSR) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; callOneWorkItemNDRKernel(); EXPECT_EQ(pCmdQ->taskCount, csr.peekTaskCount()); EXPECT_EQ(pCmdQ->taskLevel + 1, csr.peekTaskLevel()); } HWTEST_F(EnqueueKernelTest, addsCommands) { auto usedCmdBufferBefore = pCS->getUsed(); callOneWorkItemNDRKernel(); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWTEST_F(EnqueueKernelTest, addsIndirectData) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); callOneWorkItemNDRKernel(); EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), pKernel)); EXPECT_NE(iohBefore, pIOH->getUsed()); if (pKernel->requiresSshForBuffers() || (pKernel->getKernelInfo().patchInfo.imageMemObjKernelArgs.size() > 0)) { EXPECT_NE(sshBefore, pSSH->getUsed()); } } TEST_F(EnqueueKernelTest, GivenKernelWithBuiltinDispatchInfoBuilderWhenBeingDispatchedThenBuiltinDispatcherIsUsedForDispatchValidation) { struct MockBuiltinDispatchBuilder : BuiltinDispatchInfoBuilder { MockBuiltinDispatchBuilder(BuiltIns &builtins) : BuiltinDispatchInfoBuilder(builtins) { } cl_int validateDispatch(Kernel *kernel, uint32_t inworkDim, const Vec3 &gws, const Vec3 &elws, const Vec3 &offset) const override { receivedKernel = kernel; receivedWorkDim = inworkDim; receivedGws = gws; receivedElws = elws; receivedOffset = offset; wasValidateDispatchCalled = true; return valueToReturn; } cl_int valueToReturn = CL_SUCCESS; mutable Kernel *receivedKernel = nullptr; mutable uint32_t receivedWorkDim = 0; mutable Vec3 receivedGws = {0, 0, 0}; mutable Vec3 receivedElws = {0, 0, 0}; mutable Vec3 receivedOffset = {0, 0, 0}; mutable bool wasValidateDispatchCalled = false; }; MockBuiltinDispatchBuilder mockNuiltinDispatchBuilder(*pCmdQ->getDevice().getBuiltIns()); MockKernelWithInternals mockKernel(*pClDevice); mockKernel.kernelInfo.builtinDispatchBuilder = &mockNuiltinDispatchBuilder; EXPECT_FALSE(mockNuiltinDispatchBuilder.wasValidateDispatchCalled); mockNuiltinDispatchBuilder.valueToReturn = CL_SUCCESS; size_t gws[2] = {10, 1}; size_t lws[2] = {5, 1}; size_t off[2] = {7, 0}; uint32_t dim = 1; auto ret = pCmdQ->enqueueKernel(mockKernel.mockKernel, dim, off, gws, lws, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_TRUE(mockNuiltinDispatchBuilder.wasValidateDispatchCalled); EXPECT_EQ(mockKernel.mockKernel, mockNuiltinDispatchBuilder.receivedKernel); EXPECT_EQ(gws[0], mockNuiltinDispatchBuilder.receivedGws.x); EXPECT_EQ(lws[0], mockNuiltinDispatchBuilder.receivedElws.x); EXPECT_EQ(off[0], mockNuiltinDispatchBuilder.receivedOffset.x); EXPECT_EQ(dim, mockNuiltinDispatchBuilder.receivedWorkDim); mockNuiltinDispatchBuilder.wasValidateDispatchCalled = false; gws[0] = 26; lws[0] = 13; off[0] = 17; dim = 2; cl_int forcedErr = 37; mockNuiltinDispatchBuilder.valueToReturn = forcedErr; ret = pCmdQ->enqueueKernel(mockKernel.mockKernel, dim, off, gws, lws, 0, nullptr, nullptr); EXPECT_EQ(forcedErr, ret); EXPECT_TRUE(mockNuiltinDispatchBuilder.wasValidateDispatchCalled); EXPECT_EQ(mockKernel.mockKernel, mockNuiltinDispatchBuilder.receivedKernel); EXPECT_EQ(gws[0], mockNuiltinDispatchBuilder.receivedGws.x); EXPECT_EQ(lws[0], mockNuiltinDispatchBuilder.receivedElws.x); EXPECT_EQ(off[0], mockNuiltinDispatchBuilder.receivedOffset.x); EXPECT_EQ(dim, mockNuiltinDispatchBuilder.receivedWorkDim); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueKernelTest, givenSecondEnqueueWithTheSameScratchRequirementWhenPreemptionIsEnabledThenDontProgramMVSAgain) { typedef typename FamilyType::MEDIA_VFE_STATE MEDIA_VFE_STATE; pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); auto &csr = pDevice->getGpgpuCommandStreamReceiver(); csr.getMemoryManager()->setForce32BitAllocations(false); HardwareParse hwParser; size_t off[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; SPatchMediaVFEState mediaVFEstate; uint32_t scratchSize = 4096u; mediaVFEstate.PerThreadScratchSpace = scratchSize; MockKernelWithInternals mockKernel(*pClDevice); mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate; auto sizeToProgram = Kernel::getScratchSizeValueToProgramMediaVfeState(scratchSize); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwParser.parseCommands(*pCmdQ); // All state should be programmed before walker auto itorCmd = find(hwParser.itorPipelineSelect, hwParser.itorWalker); ASSERT_NE(hwParser.itorWalker, itorCmd); auto *cmd = (MEDIA_VFE_STATE *)*itorCmd; EXPECT_EQ(sizeToProgram, cmd->getPerThreadScratchSpace()); EXPECT_EQ(sizeToProgram, cmd->getStackSize()); auto scratchAlloc = csr.getScratchAllocation(); auto itorfirstBBEnd = find(hwParser.itorWalker, hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), itorfirstBBEnd); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwParser.parseCommands(*pCmdQ); itorCmd = find(itorfirstBBEnd, hwParser.cmdList.end()); ASSERT_EQ(hwParser.cmdList.end(), itorCmd); EXPECT_EQ(csr.getScratchAllocation(), scratchAlloc); } HWTEST_F(EnqueueKernelTest, whenEnqueueingKernelThatRequirePrivateScratchThenPrivateScratchIsSetInCommandStreamReceviver) { pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.getMemoryManager()->setForce32BitAllocations(false); size_t off[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; SPatchMediaVFEState mediaVFEstate; uint32_t privateScratchSize = 4096u; mediaVFEstate.PerThreadScratchSpace = privateScratchSize; MockKernelWithInternals mockKernel(*pClDevice); mockKernel.kernelInfo.patchInfo.mediaVfeStateSlot1 = &mediaVFEstate; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(privateScratchSize, csr.requiredPrivateScratchSize); } HWTEST_F(EnqueueKernelTest, whenEnqueueKernelWithNoStatelessWriteWhenSbaIsBeingProgrammedThenConstPolicyIsChoosen) { auto &csr = pDevice->getUltCommandStreamReceiver(); size_t off[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; MockKernelWithInternals mockKernel(*pClDevice); mockKernel.mockKernel->containsStatelessWrites = false; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(csr.recordedDispatchFlags.l3CacheSettings, L3CachingSettings::l3AndL1On); auto &helper = HwHelper::get(renderCoreFamily); auto gmmHelper = this->pDevice->getGmmHelper(); auto expectedMocsIndex = helper.getMocsIndex(*gmmHelper, true, true); EXPECT_EQ(expectedMocsIndex, csr.latestSentStatelessMocsConfig); } HWTEST_F(EnqueueKernelTest, whenEnqueueKernelWithNoStatelessWriteOnBlockedCodePathWhenSbaIsBeingProgrammedThenConstPolicyIsChoosen) { auto &csr = pDevice->getUltCommandStreamReceiver(); size_t off[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; auto userEvent = clCreateUserEvent(this->context, nullptr); MockKernelWithInternals mockKernel(*pClDevice); mockKernel.mockKernel->containsStatelessWrites = false; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 1, &userEvent, nullptr); clSetUserEventStatus(userEvent, 0u); EXPECT_EQ(csr.recordedDispatchFlags.l3CacheSettings, L3CachingSettings::l3AndL1On); auto &helper = HwHelper::get(renderCoreFamily); auto gmmHelper = this->pDevice->getGmmHelper(); auto expectedMocsIndex = helper.getMocsIndex(*gmmHelper, true, true); EXPECT_EQ(expectedMocsIndex, csr.latestSentStatelessMocsConfig); clReleaseEvent(userEvent); } HWTEST_F(EnqueueKernelTest, givenEnqueueWithGlobalWorkSizeWhenZeroValueIsPassedInDimensionThenTheKernelCommandWillTriviallySucceed) { size_t gws[3] = {0, 0, 0}; MockKernelWithInternals mockKernel(*pClDevice); auto ret = pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto expected = (pClDevice->getEnabledClVersion() < 21 ? CL_INVALID_GLOBAL_WORK_SIZE : CL_SUCCESS); EXPECT_EQ(expected, ret); } HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeWhenEnqueueKernelIsCalledThenKernelIsRecorded) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice, context); size_t gws[3] = {1, 0, 0}; auto ret = pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); auto cmdBuffer = mockedSubmissionsAggregator->peekCmdBufferList().peekHead(); //Two more surfaces from preemptionAllocation and SipKernel size_t csrSurfaceCount = (pDevice->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0; size_t timestampPacketSurfacesCount = mockCsr->peekTimestampPacketWriteEnabled() ? 1 : 0; EXPECT_EQ(0, mockCsr->flushCalledCount); EXPECT_EQ(5u + csrSurfaceCount + timestampPacketSurfacesCount, cmdBuffer->surfaces.size()); } HWTEST_F(EnqueueKernelTest, givenReducedAddressSpaceGraphicsAllocationForHostPtrWithL3FlushRequiredWhenEnqueueKernelIsCalledThenFlushIsCalledForReducedAddressSpacePlatforms) { std::unique_ptr device; std::unique_ptr cmdQ; auto hwInfoToModify = *defaultHwInfo; hwInfoToModify.capabilityTable.gpuAddressSpace = MemoryConstants::max36BitAddress; device.reset(new MockClDevice{MockDevice::createWithNewExecutionEnvironment(&hwInfoToModify)}); auto mockCsr = new MockCsrHw2(*device->executionEnvironment, device->getRootDeviceIndex()); device->resetCommandStreamReceiver(mockCsr); auto memoryManager = mockCsr->getMemoryManager(); uint32_t hostPtr[10]{}; AllocationProperties properties{device->getRootDeviceIndex(), false, 1, GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR, false}; properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = true; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, hostPtr); MockKernelWithInternals mockKernel(*device, context); size_t gws[3] = {1, 0, 0}; mockCsr->makeResident(*allocation); cmdQ.reset(createCommandQueue(device.get())); auto ret = cmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_TRUE(mockCsr->passedDispatchFlags.dcFlush); memoryManager->freeGraphicsMemory(allocation); } HWTEST_F(EnqueueKernelTest, givenReducedAddressSpaceGraphicsAllocationForHostPtrWithL3FlushUnrequiredWhenEnqueueKernelIsCalledThenFlushIsNotForcedByGraphicsAllocation) { std::unique_ptr device; std::unique_ptr cmdQ; auto hwInfoToModify = *defaultHwInfo; hwInfoToModify.capabilityTable.gpuAddressSpace = MemoryConstants::max36BitAddress; device.reset(new MockClDevice{MockDevice::createWithNewExecutionEnvironment(&hwInfoToModify)}); auto mockCsr = new MockCsrHw2(*device->executionEnvironment, device->getRootDeviceIndex()); device->resetCommandStreamReceiver(mockCsr); auto memoryManager = mockCsr->getMemoryManager(); uint32_t hostPtr[10]{}; AllocationProperties properties{device->getRootDeviceIndex(), false, 1, GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR, false}; properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = false; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, hostPtr); MockKernelWithInternals mockKernel(*device, context); size_t gws[3] = {1, 0, 0}; mockCsr->makeResident(*allocation); cmdQ.reset(createCommandQueue(device.get())); auto ret = cmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_FALSE(mockCsr->passedDispatchFlags.dcFlush); memoryManager->freeGraphicsMemory(allocation); } HWTEST_F(EnqueueKernelTest, givenFullAddressSpaceGraphicsAllocationWhenEnqueueKernelIsCalledThenFlushIsNotForcedByGraphicsAllocation) { HardwareInfo hwInfoToModify; std::unique_ptr device; std::unique_ptr cmdQ; hwInfoToModify = *defaultHwInfo; hwInfoToModify.capabilityTable.gpuAddressSpace = MemoryConstants::max48BitAddress; device.reset(new MockClDevice{MockDevice::createWithNewExecutionEnvironment(&hwInfoToModify)}); auto mockCsr = new MockCsrHw2(*device->executionEnvironment, device->getRootDeviceIndex()); device->resetCommandStreamReceiver(mockCsr); auto memoryManager = mockCsr->getMemoryManager(); uint32_t hostPtr[10]{}; AllocationProperties properties{device->getRootDeviceIndex(), false, 1, GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR, false}; properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = false; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, hostPtr); MockKernelWithInternals mockKernel(*device, context); size_t gws[3] = {1, 0, 0}; mockCsr->makeResident(*allocation); cmdQ.reset(createCommandQueue(device.get())); auto ret = cmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_FALSE(mockCsr->passedDispatchFlags.dcFlush); memoryManager->freeGraphicsMemory(allocation); properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = false; allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, hostPtr); mockCsr->makeResident(*allocation); cmdQ.reset(createCommandQueue(device.get())); ret = cmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_FALSE(mockCsr->passedDispatchFlags.dcFlush); memoryManager->freeGraphicsMemory(allocation); } HWTEST_F(EnqueueKernelTest, givenDefaultCommandStreamReceiverWhenClFlushIsCalledThenSuccessIsReturned) { MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto ret = clFlush(pCmdQ); EXPECT_EQ(CL_SUCCESS, ret); } HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeAndBatchedKernelWhenFlushIsCalledThenKernelIsSubmitted) { auto mockCsrmockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); mockCsrmockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsrmockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsrmockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); auto ret = clFlush(pCmdQ); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); EXPECT_EQ(1, mockCsrmockCsr->flushCalledCount); } HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeAndBatchedKernelWhenFlushIsCalledTwiceThenNothingChanges) { auto mockCsrmockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); mockCsrmockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsrmockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsrmockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto ret = clFlush(pCmdQ); EXPECT_EQ(CL_SUCCESS, ret); ret = clFlush(pCmdQ); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); EXPECT_EQ(1, mockCsrmockCsr->flushCalledCount); } HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeWhenKernelIsEnqueuedTwiceThenTwoSubmissionsAreRecorded) { auto &mockCsrmockCsr = pDevice->getUltCommandStreamReceiver(); mockCsrmockCsr.overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsrmockCsr.submissionAggregator.reset(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice, context); size_t gws[3] = {1, 0, 0}; //make sure csr emits something mockCsrmockCsr.mediaVfeStateDirty = true; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); mockCsrmockCsr.mediaVfeStateDirty = true; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); auto &cmdBufferList = mockedSubmissionsAggregator->peekCmdBufferList(); EXPECT_NE(nullptr, cmdBufferList.peekHead()); EXPECT_NE(cmdBufferList.peekTail(), cmdBufferList.peekHead()); auto cmdBuffer1 = cmdBufferList.peekHead(); auto cmdBuffer2 = cmdBufferList.peekTail(); EXPECT_EQ(cmdBuffer1->surfaces.size(), cmdBuffer2->surfaces.size()); EXPECT_EQ(cmdBuffer1->batchBuffer.commandBufferAllocation, cmdBuffer2->batchBuffer.commandBufferAllocation); EXPECT_GT(cmdBuffer2->batchBuffer.startOffset, cmdBuffer1->batchBuffer.startOffset); } HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeWhenFlushIsCalledOnTwoBatchedKernelsThenTheyAreExecutedInOrder) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->flush(); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); EXPECT_EQ(1, mockCsr->flushCalledCount); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenFinishIsCalledThenBatchesSubmissionsAreFlushed) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->finish(); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); EXPECT_EQ(1, mockCsr->flushCalledCount); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenThressEnqueueKernelsAreCalledThenBatchesSubmissionsAreFlushed) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->finish(); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); EXPECT_EQ(1, mockCsr->flushCalledCount); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenWaitForEventsIsCalledThenBatchedSubmissionsAreFlushed) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); cl_event event; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event); auto status = clWaitForEvents(1, &event); EXPECT_EQ(CL_SUCCESS, status); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); EXPECT_EQ(1, mockCsr->flushCalledCount); status = clReleaseEvent(event); EXPECT_EQ(CL_SUCCESS, status); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenCommandIsFlushedThenFlushStampIsUpdatedInCommandQueueCsrAndEvent) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); cl_event event; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event); auto neoEvent = castToObject(event); EXPECT_EQ(0u, mockCsr->flushStamp->peekStamp()); EXPECT_EQ(0u, neoEvent->flushStamp->peekStamp()); EXPECT_EQ(0u, pCmdQ->flushStamp->peekStamp()); auto status = clWaitForEvents(1, &event); EXPECT_EQ(1, neoEvent->getRefInternalCount()); EXPECT_EQ(1u, mockCsr->flushStamp->peekStamp()); EXPECT_EQ(1u, neoEvent->flushStamp->peekStamp()); EXPECT_EQ(1u, pCmdQ->flushStamp->peekStamp()); status = clFinish(pCmdQ); EXPECT_EQ(1u, pCmdQ->flushStamp->peekStamp()); status = clReleaseEvent(event); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenNonBlockingMapFollowsNdrCallThenFlushStampIsUpdatedProperly) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); EXPECT_TRUE(this->destBuffer->isMemObjZeroCopy()); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); cl_event event; pCmdQ->enqueueMapBuffer(this->destBuffer, false, CL_MAP_READ, 0u, 1u, 0, nullptr, &event, this->retVal); pCmdQ->flush(); auto neoEvent = castToObject(event); EXPECT_EQ(1u, neoEvent->flushStamp->peekStamp()); clReleaseEvent(event); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenCommandWithEventIsFollowedByCommandWithoutEventThenFlushStampIsUpdatedInCommandQueueCsrAndEvent) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; cl_event event; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto neoEvent = castToObject(event); EXPECT_EQ(0u, mockCsr->flushStamp->peekStamp()); EXPECT_EQ(0u, neoEvent->flushStamp->peekStamp()); EXPECT_EQ(0u, pCmdQ->flushStamp->peekStamp()); auto status = clWaitForEvents(1, &event); EXPECT_EQ(1, neoEvent->getRefInternalCount()); EXPECT_EQ(1u, mockCsr->flushStamp->peekStamp()); EXPECT_EQ(1u, neoEvent->flushStamp->peekStamp()); EXPECT_EQ(1u, pCmdQ->flushStamp->peekStamp()); status = clFinish(pCmdQ); EXPECT_EQ(1u, pCmdQ->flushStamp->peekStamp()); status = clReleaseEvent(event); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenClFlushIsCalledThenQueueFlushStampIsUpdated) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(0u, mockCsr->flushStamp->peekStamp()); EXPECT_EQ(0u, pCmdQ->flushStamp->peekStamp()); clFlush(pCmdQ); EXPECT_EQ(1u, mockCsr->flushStamp->peekStamp()); EXPECT_EQ(1u, pCmdQ->flushStamp->peekStamp()); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenWaitForEventsIsCalledWithUnflushedTaskCountThenBatchedSubmissionsAreFlushed) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; cl_event event; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueMarkerWithWaitList(0, nullptr, &event); auto status = clWaitForEvents(1, &event); EXPECT_EQ(CL_SUCCESS, status); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); EXPECT_EQ(1, mockCsr->flushCalledCount); status = clReleaseEvent(event); EXPECT_EQ(CL_SUCCESS, status); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenFinishIsCalledWithUnflushedTaskCountThenBatchedSubmissionsAreFlushed) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; cl_event event; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueMarkerWithWaitList(0, nullptr, &event); auto status = clFinish(pCmdQ); EXPECT_EQ(CL_SUCCESS, status); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); EXPECT_EQ(1, mockCsr->flushCalledCount); status = clReleaseEvent(event); EXPECT_EQ(CL_SUCCESS, status); } HWTEST_F(EnqueueKernelTest, givenOutOfOrderCommandQueueWhenEnqueueKernelIsMadeThenPipeControlPositionIsRecorded) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; auto ooq = clCreateCommandQueueWithProperties(context, pClDevice, props, nullptr); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice, context); size_t gws[3] = {1, 0, 0}; clEnqueueNDRangeKernel(ooq, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); auto cmdBuffer = mockedSubmissionsAggregator->peekCmdBufferList().peekHead(); EXPECT_NE(nullptr, cmdBuffer->pipeControlThatMayBeErasedLocation); clReleaseCommandQueue(ooq); } HWTEST_F(EnqueueKernelTest, givenInOrderCommandQueueWhenEnqueueKernelIsMadeThenPipeControlPositionIsRecorded) { const cl_queue_properties props[] = {0}; auto inOrderQueue = clCreateCommandQueueWithProperties(context, pClDevice, props, nullptr); auto &mockCsr = pDevice->getUltCommandStreamReceiver(); mockCsr.overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr.submissionAggregator.reset(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice, context); size_t gws[3] = {1, 0, 0}; clEnqueueNDRangeKernel(inOrderQueue, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); auto cmdBuffer = mockedSubmissionsAggregator->peekCmdBufferList().peekHead(); EXPECT_NE(nullptr, cmdBuffer->pipeControlThatMayBeErasedLocation); clReleaseCommandQueue(inOrderQueue); } HWTEST_F(EnqueueKernelTest, givenInOrderCommandQueueWhenEnqueueKernelThatHasSharedObjectsAsArgIsMadeThenPipeControlPositionIsRecorded) { const cl_queue_properties props[] = {0}; auto inOrderQueue = clCreateCommandQueueWithProperties(context, pClDevice, props, nullptr); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice, context); size_t gws[3] = {1, 0, 0}; mockKernel.mockKernel->setUsingSharedArgs(true); clEnqueueNDRangeKernel(inOrderQueue, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); auto cmdBuffer = mockedSubmissionsAggregator->peekCmdBufferList().peekHead(); EXPECT_NE(nullptr, cmdBuffer->pipeControlThatMayBeErasedLocation); EXPECT_NE(nullptr, cmdBuffer->epiloguePipeControlLocation); clReleaseCommandQueue(inOrderQueue); } HWTEST_F(EnqueueKernelTest, givenInOrderCommandQueueWhenEnqueueKernelThatHasSharedObjectsAsArgIsMadeThenPipeControlDoesntHaveDcFlush) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pClDevice, context); size_t gws[3] = {1, 0, 0}; mockKernel.mockKernel->setUsingSharedArgs(true); clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(mockCsr->passedDispatchFlags.dcFlush); } HWTEST_F(EnqueueKernelTest, givenInOrderCommandQueueWhenEnqueueKernelReturningEventIsMadeThenPipeControlPositionIsNotRecorded) { const cl_queue_properties props[] = {0}; auto inOrderQueue = clCreateCommandQueueWithProperties(context, pClDevice, props, nullptr); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->timestampPacketWriteEnabled = false; pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice, context); size_t gws[3] = {1, 0, 0}; cl_event event; clEnqueueNDRangeKernel(inOrderQueue, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event); EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); auto cmdBuffer = mockedSubmissionsAggregator->peekCmdBufferList().peekHead(); EXPECT_EQ(nullptr, cmdBuffer->pipeControlThatMayBeErasedLocation); EXPECT_NE(nullptr, cmdBuffer->epiloguePipeControlLocation); clReleaseCommandQueue(inOrderQueue); clReleaseEvent(event); } HWTEST_F(EnqueueKernelTest, givenInOrderCommandQueueWhenEnqueueKernelReturningEventIsMadeAndCommandStreamReceiverIsInNTo1ModeThenPipeControlPositionIsRecorded) { const cl_queue_properties props[] = {0}; auto inOrderQueue = clCreateCommandQueueWithProperties(context, pClDevice, props, nullptr); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->enableNTo1SubmissionModel(); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); MockKernelWithInternals mockKernel(*pClDevice, context); size_t gws[3] = {1, 0, 0}; cl_event event; clEnqueueNDRangeKernel(inOrderQueue, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event); EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); auto cmdBuffer = mockedSubmissionsAggregator->peekCmdBufferList().peekHead(); EXPECT_NE(nullptr, cmdBuffer->pipeControlThatMayBeErasedLocation); EXPECT_NE(nullptr, cmdBuffer->epiloguePipeControlLocation); clReleaseCommandQueue(inOrderQueue); clReleaseEvent(event); } HWTEST_F(EnqueueKernelTest, givenOutOfOrderCommandQueueWhenEnqueueKernelReturningEventIsMadeThenPipeControlPositionIsRecorded) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; auto inOrderQueue = clCreateCommandQueueWithProperties(context, pClDevice, props, nullptr); MockKernelWithInternals mockKernel(*pClDevice, context); size_t gws[3] = {1, 0, 0}; cl_event event; clEnqueueNDRangeKernel(inOrderQueue, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event); EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); auto cmdBuffer = mockedSubmissionsAggregator->peekCmdBufferList().peekHead(); EXPECT_NE(nullptr, cmdBuffer->pipeControlThatMayBeErasedLocation); EXPECT_EQ(cmdBuffer->epiloguePipeControlLocation, cmdBuffer->pipeControlThatMayBeErasedLocation); clReleaseCommandQueue(inOrderQueue); clReleaseEvent(event); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenBlockingCallIsMadeThenEventAssociatedWithCommandHasProperFlushStamp) { DebugManagerStateRestore stateRestore; DebugManager.flags.MakeEachEnqueueBlocking.set(true); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; cl_event event; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event); auto neoEvent = castToObject(event); EXPECT_EQ(1u, neoEvent->flushStamp->peekStamp()); EXPECT_EQ(1, mockCsr->flushCalledCount); auto status = clReleaseEvent(event); EXPECT_EQ(CL_SUCCESS, status); } HWTEST_F(EnqueueKernelTest, givenKernelWhenItIsEnqueuedThenAllResourceGraphicsAllocationsAreUpdatedWithCsrTaskCount) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(1, mockCsr->flushCalledCount); auto csrTaskCount = mockCsr->peekTaskCount(); auto &passedAllocationPack = mockCsr->copyOfAllocations; for (auto &allocation : passedAllocationPack) { EXPECT_EQ(csrTaskCount, allocation->getTaskCount(mockCsr->getOsContext().getContextId())); } } HWTEST_F(EnqueueKernelTest, givenKernelWhenItIsSubmittedFromTwoDifferentCommandQueuesThenCsrDoesntReloadAnyCommands) { auto &csr = this->pDevice->getUltCommandStreamReceiver(); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto currentUsed = csr.commandStream.getUsed(); const cl_queue_properties props[] = {0}; auto inOrderQueue = clCreateCommandQueueWithProperties(context, pClDevice, props, nullptr); clEnqueueNDRangeKernel(inOrderQueue, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto usedAfterSubmission = csr.commandStream.getUsed(); EXPECT_EQ(usedAfterSubmission, currentUsed); clReleaseCommandQueue(inOrderQueue); } TEST_F(EnqueueKernelTest, givenKernelWhenAllArgsAreNotAndEventExistSetThenClEnqueueNDRangeKernelReturnsInvalidKernelArgsAndSetEventToNull) { const size_t n = 512; size_t globalWorkSize[3] = {n, 1, 1}; size_t localWorkSize[3] = {256, 1, 1}; cl_int retVal = CL_SUCCESS; CommandQueue *pCmdQ2 = createCommandQueue(pClDevice); std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); cl_event event; retVal = clEnqueueNDRangeKernel(pCmdQ2, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, &event); EXPECT_EQ(CL_INVALID_KERNEL_ARGS, retVal); clFlush(pCmdQ2); clReleaseCommandQueue(pCmdQ2); } TEST_F(EnqueueKernelTest, givenEnqueueCommandThatLwsExceedsDeviceCapabilitiesWhenEnqueueNDRangeKernelIsCalledThenErrorIsReturned) { MockKernelWithInternals mockKernel(*pClDevice); mockKernel.mockKernel->maxKernelWorkGroupSize = static_cast(pDevice->getDeviceInfo().maxWorkGroupSize / 2); auto maxKernelWorkgroupSize = mockKernel.mockKernel->maxKernelWorkGroupSize; size_t globalWorkSize[3] = {maxKernelWorkgroupSize + 1, 1, 1}; size_t localWorkSize[3] = {maxKernelWorkgroupSize + 1, 1, 1}; auto status = pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, status); } TEST_F(EnqueueKernelTest, givenEnqueueCommandThatLocalWorkgroupSizeContainsZeroWhenEnqueueNDRangeKernelIsCalledThenClInvalidWorkGroupSizeIsReturned) { size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 0, 1}; MockKernelWithInternals mockKernel(*pClDevice); auto status = pCmdQ->enqueueKernel(mockKernel.mockKernel, 3, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, status); } HWTEST_F(EnqueueKernelTest, givenVMEKernelWhenEnqueueKernelThenDispatchFlagsHaveMediaSamplerRequired) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pClDevice, context); size_t gws[3] = {1, 0, 0}; mockKernel.kernelInfo.isVmeWorkload = true; clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_TRUE(mockCsr->passedDispatchFlags.pipelineSelectArgs.mediaSamplerRequired); } HWTEST_F(EnqueueKernelTest, givenNonVMEKernelWhenEnqueueKernelThenDispatchFlagsDoesntHaveMediaSamplerRequired) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); MockKernelWithInternals mockKernel(*pClDevice, context); size_t gws[3] = {1, 0, 0}; mockKernel.kernelInfo.isVmeWorkload = false; clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(mockCsr->passedDispatchFlags.pipelineSelectArgs.mediaSamplerRequired); } HWTEST_F(EnqueueKernelTest, whenEnqueueKernelWithEngineHintsThenEpilogRequiredIsSet) { auto &csr = pDevice->getUltCommandStreamReceiver(); size_t off[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; MockKernelWithInternals mockKernel(*pClDevice); pCmdQ->dispatchHints = 1; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(csr.recordedDispatchFlags.epilogueRequired, true); EXPECT_EQ(csr.recordedDispatchFlags.engineHints, 1u); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp000066400000000000000000001151421363734646600316360ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/scratch_space_controller.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/gen_common/gen_cmd_parse.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_device_queue.h" #include "reg_configs_common.h" using namespace NEO; struct TestParam2 { cl_uint ScratchSize; } TestParamTable2[] = {{1024}, {2048}, {4096}, {8192}, {16384}}; struct TestParam { cl_uint globalWorkSizeX; cl_uint globalWorkSizeY; cl_uint globalWorkSizeZ; cl_uint localWorkSizeX; cl_uint localWorkSizeY; cl_uint localWorkSizeZ; } TestParamTable[] = { {1, 1, 1, 1, 1, 1}, {16, 1, 1, 1, 1, 1}, {16, 1, 1, 16, 1, 1}, {32, 1, 1, 1, 1, 1}, {32, 1, 1, 16, 1, 1}, {32, 1, 1, 32, 1, 1}, {64, 1, 1, 1, 1, 1}, {64, 1, 1, 16, 1, 1}, {64, 1, 1, 32, 1, 1}, {64, 1, 1, 64, 1, 1}, {190, 1, 1, 95, 1, 1}, {510, 1, 1, 255, 1, 1}, {512, 1, 1, 256, 1, 1}}, OneEntryTestParamTable[] = { {1, 1, 1, 1, 1, 1}, }; template struct EnqueueKernelTypeTest : public HelloWorldFixture, public HardwareParse, ::testing::TestWithParam { typedef HelloWorldFixture ParentClass; using ParentClass::pCmdBuffer; using ParentClass::pCS; EnqueueKernelTypeTest() { } void FillValues() { globalWorkSize[0] = 1; globalWorkSize[1] = 1; globalWorkSize[2] = 1; localWorkSize[0] = 1; localWorkSize[1] = 1; localWorkSize[2] = 1; }; template typename std::enable_if::type enqueueKernel(Kernel *inputKernel = nullptr) { cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; FillValues(); // Compute # of expected work items expectedWorkItems = 1; for (auto i = 0u; i < workDim; i++) { expectedWorkItems *= globalWorkSize[i]; } auto usedKernel = inputKernel ? inputKernel : pKernel; auto retVal = pCmdQ->enqueueKernel( usedKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); } template typename std::enable_if::type enqueueKernel(Kernel *inputKernel = nullptr) { enqueueKernel(inputKernel); parseCommands(*pCmdQ); } template void enqueueKernel(Kernel *inputKernel = nullptr) { enqueueKernel(inputKernel); } void SetUp() override { ParentClass::SetUp(); HardwareParse::SetUp(); } void TearDown() override { HardwareParse::TearDown(); ParentClass::TearDown(); } size_t globalWorkSize[3]; size_t localWorkSize[3]; size_t expectedWorkItems = 0; }; template <> void EnqueueKernelTypeTest::FillValues() { const TestParam ¶m = GetParam(); globalWorkSize[0] = param.globalWorkSizeX; globalWorkSize[1] = param.globalWorkSizeY; globalWorkSize[2] = param.globalWorkSizeZ; localWorkSize[0] = param.localWorkSizeX; localWorkSize[1] = param.localWorkSizeY; localWorkSize[2] = param.localWorkSizeZ; } typedef EnqueueKernelTypeTest EnqueueWorkItemTests; typedef EnqueueKernelTypeTest EnqueueWorkItemTestsWithLimitedParamSet; HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueWorkItemTests, GPGPUWalker) { typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::GPGPU_WALKER GPGPU_WALKER; enqueueKernel(); ASSERT_NE(cmdList.end(), itorWalker); auto *cmd = (GPGPU_WALKER *)*itorWalker; // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } auto numWorkItems = ((cmd->getThreadWidthCounterMaximum() - 1) * simd + lanesPerThreadX) * cmd->getThreadGroupIdXDimension(); EXPECT_EQ(expectedWorkItems, numWorkItems); } HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueWorkItemTestsWithLimitedParamSet, LoadRegisterImmediateL3CNTLREG) { enqueueKernel(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueWorkItemTestsWithLimitedParamSet, WhenEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueKernel(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, context->getMemoryManager()->peekForce32BitAllocations() ? context->getMemoryManager()->getExternalHeapBaseAddress(ultCsr.rootDeviceIndex) : 0llu); } HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueWorkItemTestsWithLimitedParamSet, MediaInterfaceDescriptorLoad) { typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename PARSE::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueKernel(); // All state should be programmed before walker auto itorCmd = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmd = genCmdCast(*itorCmd); // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command PARSE::template validateCommand(cmdList.begin(), itorCmd); } HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueWorkItemTestsWithLimitedParamSet, InterfaceDescriptorData) { typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename PARSE::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename PARSE::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueKernel(); // Extract the MIDL command auto itorCmd = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmdMIDL = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorCmd; // Extract the SBA command itorCmd = find(cmdList.begin(), itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmdSBA = (STATE_BASE_ADDRESS *)*itorCmd; // Extrach the DSH auto DSH = cmdSBA->getDynamicStateBaseAddress(); ASSERT_NE(0u, DSH); // IDD should be located within DSH auto iddStart = cmdMIDL->getInterfaceDescriptorDataStartAddress(); auto IDDEnd = iddStart + cmdMIDL->getInterfaceDescriptorTotalLength(); ASSERT_LE(IDDEnd, cmdSBA->getDynamicStateBufferSize() * MemoryConstants::pageSize); auto &IDD = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, IDD.getConstantIndirectUrbEntryReadLength()); } HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueWorkItemTestsWithLimitedParamSet, givenDebugVariableToOverrideMOCSWhenStateBaseAddressIsBeingProgrammedThenItContainsDesiredIndex) { DebugManagerStateRestore restore; DebugManager.flags.OverrideStatelessMocsIndex.set(1); typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; enqueueKernel(); // Extract the SBA command auto itorCmd = find(cmdList.begin(), cmdList.end()); ASSERT_NE(itorWalker, itorCmd); auto *cmdSBA = (STATE_BASE_ADDRESS *)*itorCmd; auto mocsProgrammed = cmdSBA->getStatelessDataPortAccessMemoryObjectControlStateIndexToMocsTables() >> 1; EXPECT_EQ(1u, mocsProgrammed); } HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueWorkItemTestsWithLimitedParamSet, PipelineSelect) { enqueueKernel(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueWorkItemTestsWithLimitedParamSet, MediaVFEState) { enqueueKernel(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } INSTANTIATE_TEST_CASE_P(EnqueueKernel, EnqueueWorkItemTests, ::testing::ValuesIn(TestParamTable)); INSTANTIATE_TEST_CASE_P(EnqueueKernel, EnqueueWorkItemTestsWithLimitedParamSet, ::testing::ValuesIn(OneEntryTestParamTable)); typedef EnqueueKernelTypeTest EnqueueScratchSpaceTests; HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueScratchSpaceTests, GivenKernelRequiringScratchWhenItIsEnqueuedWithDifferentScratchSizesThenMediaVFEStateAndStateBaseAddressAreProperlyProgrammed) { typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::MEDIA_VFE_STATE MEDIA_VFE_STATE; typedef typename PARSE::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; auto &csr = pDevice->getUltCommandStreamReceiver(); csr.getMemoryManager()->setForce32BitAllocations(false); EXPECT_TRUE(csr.getAllocationsForReuse().peekIsEmpty()); SPatchMediaVFEState mediaVFEstate; auto scratchSize = GetParam().ScratchSize; mediaVFEstate.PerThreadScratchSpace = scratchSize; MockKernelWithInternals mockKernel(*pClDevice); mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate; auto sizeToProgram = (scratchSize / MemoryConstants::kiloByte); auto bitValue = 0u; while (sizeToProgram >>= 1) { bitValue++; } auto valueToProgram = Kernel::getScratchSizeValueToProgramMediaVfeState(scratchSize); EXPECT_EQ(bitValue, valueToProgram); enqueueKernel(mockKernel); // All state should be programmed before walker auto itorCmd = find(itorPipelineSelect, itorWalker); auto itorCmdForStateBase = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); ASSERT_NE(itorWalker, itorCmdForStateBase); auto *cmd = (MEDIA_VFE_STATE *)*itorCmd; auto *sba = (STATE_BASE_ADDRESS *)*itorCmdForStateBase; const HardwareInfo &hwInfo = *defaultHwInfo; uint32_t threadPerEU = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount) + hwInfo.capabilityTable.extraQuantityThreadsPerEU; uint32_t maxNumberOfThreads = hwInfo.gtSystemInfo.EUCount * threadPerEU; // Verify we have a valid length EXPECT_EQ(maxNumberOfThreads, cmd->getMaximumNumberOfThreads()); EXPECT_NE(0u, cmd->getNumberOfUrbEntries()); EXPECT_NE(0u, cmd->getUrbEntryAllocationSize()); EXPECT_EQ(bitValue, cmd->getPerThreadScratchSpace()); EXPECT_EQ(bitValue, cmd->getStackSize()); auto graphicsAllocation = csr.getScratchAllocation(); auto GSHaddress = sba->getGeneralStateBaseAddress(); if (is32bit) { EXPECT_NE(0u, cmd->getScratchSpaceBasePointer()); EXPECT_EQ(0u, GSHaddress); } else { EXPECT_EQ(ScratchSpaceConstants::scratchSpaceOffsetFor64Bit, cmd->getScratchSpaceBasePointer()); EXPECT_EQ(GSHaddress + ScratchSpaceConstants::scratchSpaceOffsetFor64Bit, graphicsAllocation->getGpuAddress()); } auto allocationSize = scratchSize * pDevice->getDeviceInfo().computeUnitsUsedForScratch; EXPECT_EQ(graphicsAllocation->getUnderlyingBufferSize(), allocationSize); // Generically validate this command PARSE::template validateCommand(cmdList.begin(), itorCmd); scratchSize *= 2; //skip if size to big 4MB, no point in stressing memory allocator. if (allocationSize > 4194304) { return; } mediaVFEstate.PerThreadScratchSpace = scratchSize; auto itorfirstBBEnd = find(itorWalker, cmdList.end()); ASSERT_NE(cmdList.end(), itorfirstBBEnd); enqueueKernel(mockKernel); bitValue++; itorCmd = find(itorfirstBBEnd, cmdList.end()); itorCmdForStateBase = find(itorWalker, cmdList.end()); ASSERT_NE(itorWalker, itorCmd); if (is64bit) { ASSERT_NE(itorCmdForStateBase, itorCmd); } else { //no SBA not dirty ASSERT_EQ(itorCmdForStateBase, cmdList.end()); } auto *cmd2 = (MEDIA_VFE_STATE *)*itorCmd; // Verify we have a valid length EXPECT_EQ(maxNumberOfThreads, cmd2->getMaximumNumberOfThreads()); EXPECT_NE(0u, cmd2->getNumberOfUrbEntries()); EXPECT_NE(0u, cmd2->getUrbEntryAllocationSize()); EXPECT_EQ(bitValue, cmd2->getPerThreadScratchSpace()); EXPECT_EQ(bitValue, cmd2->getStackSize()); auto graphicsAllocation2 = csr.getScratchAllocation(); if (is32bit) { auto scratchBase = cmd2->getScratchSpaceBasePointer(); EXPECT_NE(0u, scratchBase); auto graphicsAddress = graphicsAllocation2->getGpuAddress(); EXPECT_EQ(graphicsAddress, scratchBase); } else { auto *sba2 = (STATE_BASE_ADDRESS *)*itorCmdForStateBase; auto GSHaddress2 = sba2->getGeneralStateBaseAddress(); EXPECT_NE(0u, GSHaddress2); EXPECT_EQ(ScratchSpaceConstants::scratchSpaceOffsetFor64Bit, cmd2->getScratchSpaceBasePointer()); EXPECT_NE(GSHaddress2, GSHaddress); } EXPECT_EQ(graphicsAllocation->getUnderlyingBufferSize(), allocationSize); EXPECT_NE(graphicsAllocation2, graphicsAllocation); // Generically validate this command PARSE::template validateCommand(cmdList.begin(), itorCmd); // Trigger SBA generation IndirectHeap dirtyDsh(nullptr); csr.dshState.updateAndCheck(&dirtyDsh); enqueueKernel(mockKernel); auto finalItorToSBA = find(itorCmd, cmdList.end()); ASSERT_NE(finalItorToSBA, cmdList.end()); auto *finalSba2 = (STATE_BASE_ADDRESS *)*finalItorToSBA; auto GSBaddress = finalSba2->getGeneralStateBaseAddress(); if (is32bit) { EXPECT_EQ(0u, GSBaddress); } else if (is64bit) { EXPECT_EQ(graphicsAllocation2->getGpuAddress(), GSBaddress + ScratchSpaceConstants::scratchSpaceOffsetFor64Bit); } EXPECT_TRUE(csr.getAllocationsForReuse().peekIsEmpty()); } INSTANTIATE_TEST_CASE_P(EnqueueKernel, EnqueueScratchSpaceTests, ::testing::ValuesIn(TestParamTable2)); typedef EnqueueKernelTypeTest EnqueueKernelWithScratch; HWTEST_P(EnqueueKernelWithScratch, GivenKernelRequiringScratchWhenItIsEnqueuedWithDifferentScratchSizesThenPreviousScratchAllocationIsMadeNonResidentPriorStoringOnResueList) { auto mockCsr = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); SPatchMediaVFEState mediaVFEstate; auto scratchSize = 1024; mediaVFEstate.PerThreadScratchSpace = scratchSize; MockKernelWithInternals mockKernel(*pClDevice); mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate; auto sizeToProgram = (scratchSize / MemoryConstants::kiloByte); auto bitValue = 0u; while (sizeToProgram >>= 1) { bitValue++; } auto valueToProgram = Kernel::getScratchSizeValueToProgramMediaVfeState(scratchSize); EXPECT_EQ(bitValue, valueToProgram); enqueueKernel(mockKernel); auto graphicsAllocation = mockCsr->getScratchAllocation(); EXPECT_TRUE(mockCsr->isMadeResident(graphicsAllocation)); // Enqueue With ScratchSize bigger than previous scratchSize = 8196; mediaVFEstate.PerThreadScratchSpace = scratchSize; enqueueKernel(mockKernel); EXPECT_TRUE(mockCsr->isMadeNonResident(graphicsAllocation)); } HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueKernelWithScratch, givenDeviceForcing32bitAllocationsWhenKernelWithScratchIsEnqueuedThenGeneralStateHeapBaseAddressIsCorrectlyProgrammedAndMediaVFEStateContainsProgramming) { typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::MEDIA_VFE_STATE MEDIA_VFE_STATE; typedef typename PARSE::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; if (is64bit) { CommandStreamReceiver *csr = &pDevice->getGpgpuCommandStreamReceiver(); auto memoryManager = csr->getMemoryManager(); memoryManager->setForce32BitAllocations(true); SPatchMediaVFEState mediaVFEstate; auto scratchSize = 1024; mediaVFEstate.PerThreadScratchSpace = scratchSize; MockKernelWithInternals mockKernel(*pClDevice); mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate; enqueueKernel(mockKernel); auto graphicsAllocation = csr->getScratchAllocation(); EXPECT_TRUE(graphicsAllocation->is32BitAllocation()); auto graphicsAddress = (uint64_t)graphicsAllocation->getGpuAddress(); auto baseAddress = graphicsAllocation->getGpuBaseAddress(); // All state should be programmed before walker auto itorCmd = find(itorPipelineSelect, itorWalker); auto itorCmdForStateBase = find(itorPipelineSelect, itorWalker); auto *mediaVfeState = (MEDIA_VFE_STATE *)*itorCmd; auto scratchBaseLowPart = (uint64_t)mediaVfeState->getScratchSpaceBasePointer(); auto scratchBaseHighPart = (uint64_t)mediaVfeState->getScratchSpaceBasePointerHigh(); uint64_t scratchBaseAddr = scratchBaseHighPart << 32 | scratchBaseLowPart; EXPECT_EQ(graphicsAddress - baseAddress, scratchBaseAddr); ASSERT_NE(itorCmdForStateBase, itorWalker); auto *sba = (STATE_BASE_ADDRESS *)*itorCmdForStateBase; auto GSHaddress = sba->getGeneralStateBaseAddress(); EXPECT_EQ(memoryManager->getExternalHeapBaseAddress(graphicsAllocation->getRootDeviceIndex()), GSHaddress); //now re-try to see if SBA is not programmed scratchSize *= 2; mediaVFEstate.PerThreadScratchSpace = scratchSize; enqueueKernel(mockKernel); itorCmdForStateBase = find(itorWalker, cmdList.end()); EXPECT_EQ(itorCmdForStateBase, cmdList.end()); } } INSTANTIATE_TEST_CASE_P(EnqueueKernel, EnqueueKernelWithScratch, testing::Values(1)); TestParam TestParamPrintf[] = { {1, 1, 1, 1, 1, 1}}; typedef EnqueueKernelTypeTest EnqueueKernelPrintfTest; HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfThenPatchCrossTHreadData) { typedef typename FamilyType::PARSE PARSE; SPatchAllocateStatelessPrintfSurface patchData; patchData.SurfaceStateHeapOffset = 0; patchData.Size = 256; patchData.DataParamOffset = 64; MockKernelWithInternals mockKernel(*pClDevice); mockKernel.crossThreadData[64] = 0; mockKernel.kernelInfo.patchInfo.pAllocateStatelessPrintfSurface = &patchData; enqueueKernel(mockKernel); EXPECT_EQ(mockKernel.crossThreadData[64], 0); } HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfWhenBeingDispatchedThenL3CacheIsFlushed) { typedef typename FamilyType::PARSE PARSE; SPatchAllocateStatelessPrintfSurface patchData; patchData.Size = 256; patchData.DataParamOffset = 64; MockCommandQueueHw mockCmdQueue(context, pClDevice, nullptr); MockKernelWithInternals mockKernel(*pClDevice); mockKernel.crossThreadData[64] = 0; mockKernel.kernelInfo.patchInfo.pAllocateStatelessPrintfSurface = &patchData; auto &csr = mockCmdQueue.getGpgpuCommandStreamReceiver(); auto latestSentTaskCount = csr.peekTaskCount(); cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event *event = nullptr; FillValues(); // Compute # of expected work items expectedWorkItems = 1; for (auto i = 0u; i < workDim; i++) { expectedWorkItems *= globalWorkSize[i]; } auto retVal = mockCmdQueue.enqueueKernel( mockKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); auto newLatestSentTaskCount = csr.peekTaskCount(); EXPECT_GT(newLatestSentTaskCount, latestSentTaskCount); EXPECT_EQ(mockCmdQueue.latestTaskCountWaited, newLatestSentTaskCount); } HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueKernelPrintfTest, GivenKernelWithPrintfBlockedByEventWhenEventUnblockedThenL3CacheIsFlushed) { typedef typename FamilyType::PARSE PARSE; UserEvent userEvent(context); SPatchAllocateStatelessPrintfSurface patchData; patchData.Size = 256; patchData.DataParamOffset = 64; MockCommandQueueHw mockCommandQueue(context, pClDevice, nullptr); MockKernelWithInternals mockKernel(*pClDevice); mockKernel.crossThreadData[64] = 0; mockKernel.kernelInfo.patchInfo.pAllocateStatelessPrintfSurface = &patchData; auto &csr = mockCommandQueue.getGpgpuCommandStreamReceiver(); auto latestSentDcFlushTaskCount = csr.peekTaskCount(); cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; FillValues(); cl_event blockedEvent = &userEvent; auto retVal = mockCommandQueue.enqueueKernel( mockKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 1, &blockedEvent, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); userEvent.setStatus(CL_COMPLETE); parseCommands(mockCommandQueue); auto newLatestSentDCFlushTaskCount = csr.peekTaskCount(); EXPECT_GT(newLatestSentDCFlushTaskCount, latestSentDcFlushTaskCount); EXPECT_EQ(mockCommandQueue.latestTaskCountWaited, newLatestSentDCFlushTaskCount); } HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfBlockedByEventWhenEventUnblockedThenOutputPrinted) { typedef typename FamilyType::PARSE PARSE; // In scenarios with 32bit allocator and 64 bit tests this code won't work // due to inability to retrieve original buffer pointer as it is done in this test. auto memoryManager = pDevice->getMemoryManager(); if (!memoryManager->peekForce32BitAllocations() && !memoryManager->isLimitedRange(0)) { testing::internal::CaptureStdout(); auto userEvent = make_releaseable(context); SPatchAllocateStatelessPrintfSurface patchData; patchData.Size = 256; patchData.DataParamSize = 8; patchData.DataParamOffset = 0; MockKernelWithInternals mockKernel(*pClDevice); mockKernel.kernelInfo.patchInfo.pAllocateStatelessPrintfSurface = &patchData; auto crossThreadData = reinterpret_cast(mockKernel.mockKernel->getCrossThreadData()); std::string testString = "test"; mockKernel.kernelInfo.patchInfo.stringDataMap.insert(std::make_pair(0, testString)); cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; FillValues(); cl_event blockedEvent = userEvent.get(); auto retVal = pCmdQ->enqueueKernel( mockKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 1, &blockedEvent, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); auto printfAllocation = reinterpret_cast(*crossThreadData); printfAllocation[0] = 8; printfAllocation[1] = 0; userEvent->setStatus(CL_COMPLETE); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STREQ("test", output.c_str()); } } INSTANTIATE_TEST_CASE_P(EnqueueKernel, EnqueueKernelPrintfTest, ::testing::ValuesIn(TestParamPrintf)); typedef HelloWorldFixture EnqueueKernelFixture; typedef Test EnqueueKernelTest; struct EnqueueAuxKernelTests : public EnqueueKernelTest { template class MyCmdQ : public CommandQueueHw { public: using CommandQueueHw::commandStream; using CommandQueueHw::gpgpuEngine; using CommandQueueHw::bcsEngine; MyCmdQ(Context *context, ClDevice *device) : CommandQueueHw(context, device, nullptr, false) {} void dispatchAuxTranslationBuiltin(MultiDispatchInfo &multiDispatchInfo, AuxTranslationDirection auxTranslationDirection) override { CommandQueueHw::dispatchAuxTranslationBuiltin(multiDispatchInfo, auxTranslationDirection); auxTranslationDirections.push_back(auxTranslationDirection); Kernel *lastKernel = nullptr; for (const auto &dispatchInfo : multiDispatchInfo) { lastKernel = dispatchInfo.getKernel(); dispatchInfos.emplace_back(dispatchInfo); } dispatchAuxTranslationInputs.emplace_back(lastKernel, multiDispatchInfo.size(), *multiDispatchInfo.getMemObjsForAuxTranslation(), auxTranslationDirection); } void waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { waitCalled++; CommandQueueHw::waitUntilComplete(taskCountToWait, flushStampToWait, useQuickKmdSleep); } std::vector auxTranslationDirections; std::vector dispatchInfos; std::vector> dispatchAuxTranslationInputs; uint32_t waitCalled = 0; }; void SetUp() override { DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Builtin)); EnqueueKernelTest::SetUp(); } DebugManagerStateRestore dbgRestore; }; HWTEST_F(EnqueueAuxKernelTests, givenKernelWithRequiredAuxTranslationAndWithoutArgumentsWhenEnqueuedThenNoGuardKernelWithAuxTranslations) { MockKernelWithInternals mockKernel(*pClDevice, context); MyCmdQ cmdQ(context, pClDevice); size_t gws[3] = {1, 0, 0}; mockKernel.mockKernel->auxTranslationRequired = true; cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(0u, cmdQ.dispatchAuxTranslationInputs.size()); } HWTEST_F(EnqueueAuxKernelTests, givenMultipleArgsWhenAuxTranslationIsRequiredThenPickOnlyApplicableBuffers) { if (!HwHelper::get(this->pDevice->getHardwareInfo().platform.eRenderCoreFamily).requiresAuxResolves()) { GTEST_SKIP(); } MyCmdQ cmdQ(context, pClDevice); size_t gws[3] = {1, 0, 0}; MockBuffer buffer0, buffer1, buffer2, buffer3; cl_mem clMem0 = &buffer0; cl_mem clMem1 = &buffer1; cl_mem clMem2 = &buffer2; cl_mem clMem3 = &buffer3; buffer0.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER); buffer1.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER); buffer2.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); buffer3.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); MockKernelWithInternals mockKernel(*pClDevice, context); mockKernel.mockKernel->auxTranslationRequired = true; mockKernel.kernelInfo.kernelArgInfo.resize(6); for (auto &kernelInfo : mockKernel.kernelInfo.kernelArgInfo) { kernelInfo.kernelArgPatchInfoVector.resize(1); } mockKernel.mockKernel->initialize(); mockKernel.kernelInfo.kernelArgInfo.at(0).pureStatefulBufferAccess = false; mockKernel.kernelInfo.kernelArgInfo.at(1).pureStatefulBufferAccess = true; mockKernel.kernelInfo.kernelArgInfo.at(2).pureStatefulBufferAccess = false; mockKernel.kernelInfo.kernelArgInfo.at(3).pureStatefulBufferAccess = true; mockKernel.kernelInfo.kernelArgInfo.at(4).pureStatefulBufferAccess = false; mockKernel.kernelInfo.kernelArgInfo.at(5).pureStatefulBufferAccess = false; mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem0); // stateless on regular buffer - dont insert mockKernel.mockKernel->setArgBuffer(1, sizeof(cl_mem *), &clMem1); // stateful on regular buffer - dont insert mockKernel.mockKernel->setArgBuffer(2, sizeof(cl_mem *), &clMem2); // stateless on BUFFER_COMPRESSED - insert mockKernel.mockKernel->setArgBuffer(3, sizeof(cl_mem *), &clMem3); // stateful on BUFFER_COMPRESSED - dont insert mockKernel.mockKernel->setArgBuffer(4, sizeof(cl_mem *), nullptr); // nullptr - dont insert mockKernel.mockKernel->kernelArguments.at(5).type = Kernel::kernelArgType::IMAGE_OBJ; // non-buffer arg - dont insert cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(2u, cmdQ.dispatchAuxTranslationInputs.size()); EXPECT_EQ(1u, std::get(cmdQ.dispatchAuxTranslationInputs.at(0)).size()); // before kernel EXPECT_EQ(1u, std::get(cmdQ.dispatchAuxTranslationInputs.at(1)).size()); // after kernel EXPECT_EQ(&buffer2, *std::get(cmdQ.dispatchAuxTranslationInputs.at(0)).begin()); EXPECT_EQ(&buffer2, *std::get(cmdQ.dispatchAuxTranslationInputs.at(1)).begin()); auto cmdStream = cmdQ.commandStream; auto sizeUsed = cmdStream->getUsed(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), sizeUsed)); auto pipeControls = findAll(cmdList.begin(), cmdList.end()); auto additionalPcCount = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation( pDevice->getHardwareInfo()) / sizeof(typename FamilyType::PIPE_CONTROL); // |AuxToNonAux|NDR|NonAuxToAux| ASSERT_EQ(4u + additionalPcCount, pipeControls.size()); ASSERT_EQ(2u, cmdQ.auxTranslationDirections.size()); EXPECT_EQ(AuxTranslationDirection::AuxToNonAux, cmdQ.auxTranslationDirections[0]); EXPECT_EQ(AuxTranslationDirection::NonAuxToAux, cmdQ.auxTranslationDirections[1]); } HWTEST_F(EnqueueAuxKernelTests, givenKernelWithRequiredAuxTranslationWhenEnqueuedThenDispatchAuxTranslationBuiltin) { MockKernelWithInternals mockKernel(*pClDevice, context); MyCmdQ cmdQ(context, pClDevice); size_t gws[3] = {1, 0, 0}; MockBuffer buffer; cl_mem clMem = &buffer; buffer.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); mockKernel.kernelInfo.kernelArgInfo.resize(1); mockKernel.kernelInfo.kernelArgInfo.at(0).kernelArgPatchInfoVector.resize(1); mockKernel.kernelInfo.kernelArgInfo.at(0).pureStatefulBufferAccess = false; mockKernel.mockKernel->initialize(); mockKernel.mockKernel->auxTranslationRequired = true; mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem); cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(2u, cmdQ.dispatchAuxTranslationInputs.size()); // before kernel EXPECT_EQ(1u, std::get(cmdQ.dispatchAuxTranslationInputs.at(0))); // aux before NDR auto kernelBefore = std::get(cmdQ.dispatchAuxTranslationInputs.at(0)); EXPECT_EQ("fullCopy", kernelBefore->getKernelInfo().name); EXPECT_TRUE(kernelBefore->isBuiltIn); // after kernel EXPECT_EQ(3u, std::get(cmdQ.dispatchAuxTranslationInputs.at(1))); // aux + NDR + aux auto kernelAfter = std::get(cmdQ.dispatchAuxTranslationInputs.at(1)); EXPECT_EQ("fullCopy", kernelAfter->getKernelInfo().name); EXPECT_TRUE(kernelAfter->isBuiltIn); } HWTEST_F(EnqueueAuxKernelTests, givenDebugVariableDisablingBuiltinTranslationWhenDispatchingKernelWithRequiredAuxTranslationThenDontDispatch) { DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Blit)); pDevice->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockKernelWithInternals mockKernel(*pClDevice, context); MyCmdQ cmdQ(context, pClDevice); cmdQ.bcsEngine = cmdQ.gpgpuEngine; size_t gws[3] = {1, 0, 0}; MockBuffer buffer; cl_mem clMem = &buffer; buffer.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); mockKernel.kernelInfo.kernelArgInfo.resize(1); mockKernel.kernelInfo.kernelArgInfo.at(0).kernelArgPatchInfoVector.resize(1); mockKernel.kernelInfo.kernelArgInfo.at(0).pureStatefulBufferAccess = false; mockKernel.mockKernel->initialize(); mockKernel.mockKernel->auxTranslationRequired = true; mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem); cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(2u, cmdQ.dispatchAuxTranslationInputs.size()); // aux builtin not dispatched before NDR EXPECT_EQ(0u, std::get(cmdQ.dispatchAuxTranslationInputs.at(0))); // only NDR is dispatched EXPECT_EQ(1u, std::get(cmdQ.dispatchAuxTranslationInputs.at(1))); auto kernel = std::get(cmdQ.dispatchAuxTranslationInputs.at(1)); EXPECT_FALSE(kernel->isBuiltIn); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueKernelTest, givenCacheFlushAfterWalkerEnabledWhenAllocationRequiresCacheFlushThenFlushCommandPresentAfterWalker) { using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); MockKernelWithInternals mockKernel(*pClDevice, context); CommandQueueHw cmdQ(context, pClDevice, nullptr, false); size_t gws[3] = {1, 0, 0}; mockKernel.mockKernel->svmAllocationsRequireCacheFlush = true; cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); HardwareParse hwParse; hwParse.parseCommands(cmdQ.getCS(0), 0); auto itorCmd = find(hwParse.cmdList.begin(), hwParse.cmdList.end()); ASSERT_NE(hwParse.cmdList.end(), itorCmd); itorCmd = find(itorCmd, hwParse.cmdList.end()); auto pipeControl = genCmdCast(*itorCmd); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); EXPECT_TRUE(pipeControl->getDcFlushEnable()); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueAuxKernelTests, givenParentKernelWhenAuxTranslationIsRequiredThenMakeEnqueueBlocking) { if (pClDevice->getSupportedClVersion() >= 20) { MyCmdQ cmdQ(context, pClDevice); size_t gws[3] = {1, 0, 0}; cl_queue_properties queueProperties = {}; auto mockDevQueue = std::make_unique>(context, pClDevice, queueProperties); context->setDefaultDeviceQueue(mockDevQueue.get()); std::unique_ptr parentKernel(MockParentKernel::create(*context, false, false, false, false, false)); parentKernel->initialize(); parentKernel->auxTranslationRequired = false; cmdQ.enqueueKernel(parentKernel.get(), 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(0u, cmdQ.waitCalled); mockDevQueue->getIgilQueue()->m_controls.m_CriticalSection = 0; parentKernel->auxTranslationRequired = true; cmdQ.enqueueKernel(parentKernel.get(), 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(1u, cmdQ.waitCalled); } } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueAuxKernelTests, givenParentKernelButNoDeviceQueueWhenEnqueueIsCalledItReturnsInvalidOperation) { if (pClDevice->getSupportedClVersion() >= 20) { MyCmdQ cmdQ(context, pClDevice); size_t gws[3] = {1, 0, 0}; std::unique_ptr parentKernel(MockParentKernel::create(*context, false, false, false, false, false)); parentKernel->initialize(); auto status = cmdQ.enqueueKernel(parentKernel.get(), 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, status); } } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_kernel_event_tests.cpp000066400000000000000000000161661363734646600326240ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "gtest/gtest.h" using namespace NEO; typedef HelloWorldTest EventTests; TEST_F(EventTests, WhenEnqueingKernelThenCorrectEventIsReturned) { cl_event event = nullptr; auto retVal = callOneWorkItemNDRKernel(nullptr, 0, &event); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel); // Check CL_EVENT_COMMAND_TYPE { cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_NDRANGE_KERNEL), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); } delete pEvent; } TEST_F(EventTests, WhenEnqueingKernelThenEventReturnedShouldBeMaxOfInputEventsAndCmdQPlus1) { uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 15); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 16); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto retVal = callOneWorkItemNDRKernel(eventWaitList, numEventsInWaitList, &event); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u + 1u, pEvent->taskLevel); delete pEvent; } TEST_F(EventTests, WhenWaitingForEventThenPipeControlIsNotInserted) { cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event event = nullptr; auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto retVal = callOneWorkItemNDRKernel(eventWaitList, numEventsInWaitList, &event); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel); retVal = Event::waitForEvents(1, &event); EXPECT_EQ(CL_SUCCESS, retVal); //we expect event is completed uint32_t taskCountOfEvent = pEvent->peekTaskCount(); EXPECT_LE(taskCountOfEvent, pCmdQ->getHwTag()); // no more tasks after WFE, no need to write PC EXPECT_EQ(pEvent->taskLevel + 1, csr.peekTaskLevel()); pCmdQ->finish(); // Check CL_EVENT_COMMAND_TYPE { cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_NDRANGE_KERNEL), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); } delete pEvent; } TEST_F(EventTests, GivenTwoEnqueuesWhenWaitingForBothEventsThenTaskLevelIsCorrect) { cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event event[2] = {}; auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto retVal = callOneWorkItemNDRKernel(eventWaitList, numEventsInWaitList, &event[0]); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event[0]); auto pEvent0 = castToObject(event[0]); EXPECT_EQ(pCmdQ->taskLevel, pEvent0->taskLevel); retVal = callOneWorkItemNDRKernel(eventWaitList, numEventsInWaitList, &event[1]); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event[1]); auto pEvent1 = castToObject(event[1]); EXPECT_EQ(pCmdQ->taskLevel, pEvent1->taskLevel); EXPECT_GT(pEvent1->taskLevel, pEvent0->taskLevel); retVal = Event::waitForEvents(2, event); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pEvent1->taskLevel + 1, csr.peekTaskLevel()); pCmdQ->finish(); EXPECT_EQ(pEvent1->taskLevel + 1, csr.peekTaskLevel()); // Check CL_EVENT_COMMAND_TYPE { cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent0, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_NDRANGE_KERNEL), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); } delete pEvent0; delete pEvent1; } TEST_F(EventTests, GivenNoEventsWhenEnqueuingKernelThenTaskLevelIsIncremented) { cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event event = nullptr; auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto retVal = callOneWorkItemNDRKernel(eventWaitList, numEventsInWaitList, &event); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel); retVal = Event::waitForEvents(1, &event); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pEvent->taskLevel + 1, csr.peekTaskLevel()); retVal = callOneWorkItemNDRKernel(eventWaitList, numEventsInWaitList, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pEvent->taskLevel + 2, csr.peekTaskLevel()); pCmdQ->finish(); EXPECT_EQ(pEvent->taskLevel + 2, csr.peekTaskLevel()); // Check CL_EVENT_COMMAND_TYPE { cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_NDRANGE_KERNEL), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); } delete pEvent; } TEST_F(EventTests, WhenEnqueuingMarkerThenPassedEventHasTheSameLevelAsPreviousCommand) { cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event event = nullptr; auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto retVal = callOneWorkItemNDRKernel(eventWaitList, numEventsInWaitList, &event); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(pEvent->taskLevel + 1, csr.peekTaskLevel()); cl_event event2 = nullptr; retVal = clEnqueueMarkerWithWaitList(pCmdQ, 1, &event, &event2); auto pEvent2 = castToObject(event2); if (csr.peekTimestampPacketWriteEnabled()) { EXPECT_EQ(pEvent2->taskLevel, pEvent->taskLevel + 1); } else { EXPECT_EQ(pEvent2->taskLevel, pEvent->taskLevel); } ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event2); retVal = clWaitForEvents(1, &event2); ASSERT_EQ(CL_SUCCESS, retVal); if (csr.peekTimestampPacketWriteEnabled()) { EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } else { EXPECT_EQ(csr.peekTaskLevel(), pEvent->taskLevel + 1); } clReleaseEvent(event); clReleaseEvent(event2); } enqueue_kernel_global_offset_tests.cpp000066400000000000000000000013211363734646600342150ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "gtest/gtest.h" using namespace NEO; using GlobalWorkOffset = HelloWorldTest; TEST_F(GlobalWorkOffset, GivenNullGlobalWorkOffsetWhenEnqueuingKernelThenSuccessIsReturned) { size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; auto retVal = pCmdQ->enqueueKernel( pKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } enqueue_kernel_local_work_size_tests.cpp000066400000000000000000000074151363734646600346070ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "gtest/gtest.h" using namespace NEO; typedef HelloWorldTest EnqueueKernelLocalWorkSize; TEST_F(EnqueueKernelLocalWorkSize, GivenNullLwsInWhenEnqueuingKernelThenSuccessIsReturned) { size_t globalWorkOffset[3] = {0, 999, 9999}; size_t globalWorkSize[3] = {1, 999, 9999}; auto retVal = pCmdQ->enqueueKernel( pKernel, 1, globalWorkOffset, globalWorkSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } struct EnqueueKernelRequiredWorkSize : public HelloWorldTest { typedef HelloWorldTest Parent; void SetUp() override { Parent::kernelFilename = "required_work_group"; Parent::kernelName = "CopyBuffer"; Parent::SetUp(); } void TearDown() override { Parent::TearDown(); } }; // Kernel specifies the optional reqd_work_group_size() attribute but it wasn't // specified. We'll permit the user to not specify the local work group size // and pick up the correct values instead. TEST_F(EnqueueKernelRequiredWorkSize, GivenUnspecifiedWorkGroupSizeWhenEnqeueingKernelThenLwsIsSetCorrectly) { size_t globalWorkSize[3] = {32, 32, 32}; size_t *localWorkSize = nullptr; auto retVal = pCmdQ->enqueueKernel( pKernel, 3, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(*pKernel->localWorkSizeX, 8u); EXPECT_EQ(*pKernel->localWorkSizeY, 4u); EXPECT_EQ(*pKernel->localWorkSizeZ, 4u); EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 8u); EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 4u); EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeZ, 4u); } // Fully specified TEST_F(EnqueueKernelRequiredWorkSize, GivenRequiredWorkGroupSizeWhenEnqeueingKernelThenLwsIsSetCorrectly) { size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {32, 32, 32}; size_t localWorkSize[3] = {8, 4, 4}; auto retVal = pCmdQ->enqueueKernel( pKernel, 3, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 8u); EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 4u); EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeZ, 4u); EXPECT_EQ(*pKernel->localWorkSizeX, 8u); EXPECT_EQ(*pKernel->localWorkSizeY, 4u); EXPECT_EQ(*pKernel->localWorkSizeZ, 4u); } // Underspecified. Won't permit. TEST_F(EnqueueKernelRequiredWorkSize, givenKernelRequiringLocalWorkgroupSizeWhen1DimensionIsPassedThatIsCorrectThenNdRangeIsSuccesful) { size_t globalWorkOffset[1] = {0}; size_t globalWorkSize[1] = {32}; size_t localWorkSize[1] = {8}; auto retVal = pCmdQ->enqueueKernel( pKernel, 1, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } // Incorrectly specified TEST_F(EnqueueKernelRequiredWorkSize, GivenInvalidRequiredWorkgroupSizeWhenEnqueuingKernelThenInvalidWorkGroupSizeErrorIsReturned) { size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {32, 32, 32}; size_t localWorkSize[3] = {16, 8, 1}; auto retVal = pCmdQ->enqueueKernel( pKernel, 3, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_kernel_mt_tests.cpp000066400000000000000000000043061363734646600321140ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_submissions_aggregator.h" typedef HelloWorldFixture EnqueueKernelFixture; typedef Test EnqueueKernelTest; HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenFinishIsCalledThenBatchesSubmissionsAreFlushed) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); std::atomic startEnqueueProcess(false); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; auto enqueueCount = 10; auto threadCount = 4; auto function = [&]() { //wait until we are signalled while (!startEnqueueProcess) ; for (int enqueue = 0; enqueue < enqueueCount; enqueue++) { pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); } }; std::vector threads; for (auto thread = 0; thread < threadCount; thread++) { threads.push_back(std::thread(function)); } auto currentTaskCount = 0; startEnqueueProcess = true; //call a flush while other threads enqueue, we can't drop anything while (currentTaskCount < enqueueCount * threadCount) { clFlush(pCmdQ); auto locker = mockCsr->obtainUniqueOwnership(); currentTaskCount = mockCsr->peekTaskCount(); } for (auto &thread : threads) { thread.join(); } pCmdQ->finish(); EXPECT_GE(mockCsr->flushCalledCount, 1); EXPECT_LE(mockCsr->flushCalledCount, enqueueCount * threadCount); EXPECT_EQ(mockedSubmissionsAggregator->peekInspectionId() - 1, (uint32_t)mockCsr->flushCalledCount); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_kernel_two_ioq_tests.cpp000066400000000000000000000073201363734646600331540ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "test.h" using namespace NEO; struct TwoIOQsTwoDependentWalkers : public HelloWorldTest, public HardwareParse { typedef HelloWorldTest Parent; using Parent::createCommandQueue; using Parent::pCmdQ; using Parent::pDevice; using Parent::pKernel; TwoIOQsTwoDependentWalkers() { } void SetUp() override { Parent::SetUp(); HardwareParse::SetUp(); } void TearDown() override { delete pCmdQ2; HardwareParse::TearDown(); Parent::TearDown(); } template void parseWalkers() { cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; cl_event event1 = nullptr; cl_event event2 = nullptr; auto retVal = pCmdQ->enqueueKernel( pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, &event1); ASSERT_EQ(CL_SUCCESS, retVal); HardwareParse::parseCommands(*pCmdQ); // Create a second command queue (beyond the default one) pCmdQ2 = createCommandQueue(pClDevice); ASSERT_NE(nullptr, pCmdQ2); retVal = pCmdQ2->enqueueKernel( pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 1, &event1, &event2); ASSERT_EQ(CL_SUCCESS, retVal); HardwareParse::parseCommands(*pCmdQ2); Event *E1 = castToObject(event1); ASSERT_NE(nullptr, E1); Event *E2 = castToObject(event2); ASSERT_NE(nullptr, E2); delete E1; delete E2; typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; itorWalker1 = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorWalker1); itorWalker2 = itorWalker1; ++itorWalker2; itorWalker2 = find(itorWalker2, cmdList.end()); ASSERT_NE(cmdList.end(), itorWalker2); } GenCmdList::iterator itorWalker1; GenCmdList::iterator itorWalker2; CommandQueue *pCmdQ2 = nullptr; }; HWTEST_F(TwoIOQsTwoDependentWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenTwoDifferentWalkersAreCreated) { parseWalkers(); EXPECT_NE(itorWalker1, itorWalker2); } HWTEST_F(TwoIOQsTwoDependentWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenOnePipelineSelectExists) { parseWalkers(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, TwoIOQsTwoDependentWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenThereIsOneVfeState) { parseWalkers(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); } HWTEST_F(TwoIOQsTwoDependentWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenOnePipeControlIsInsertedBetweenWalkers) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; parseWalkers(); auto itorCmd = find(itorWalker1, itorWalker2); // Should find a PC. EXPECT_NE(itorWalker2, itorCmd); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_kernel_two_ooq_tests.cpp000066400000000000000000000134641363734646600331700ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "test.h" using namespace NEO; struct OOQFixtureFactory : public HelloWorldFixtureFactory { typedef OOQueueFixture CommandQueueFixture; }; struct TwoOOQsTwoDependentWalkers : public HelloWorldTest, public HardwareParse { typedef HelloWorldTest Parent; using Parent::createCommandQueue; using Parent::pCmdQ; using Parent::pDevice; using Parent::pKernel; TwoOOQsTwoDependentWalkers() { } void SetUp() override { Parent::SetUp(); HardwareParse::SetUp(); } void TearDown() override { delete pCmdQ2; HardwareParse::TearDown(); Parent::TearDown(); } template void parseWalkers() { cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; cl_event event1 = nullptr; cl_event event2 = nullptr; auto retVal = pCmdQ->enqueueKernel( pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, &event1); ASSERT_EQ(CL_SUCCESS, retVal); // Create a second command queue (beyond the default one) pCmdQ2 = createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE); ASSERT_NE(nullptr, pCmdQ2); retVal = pCmdQ2->enqueueKernel( pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 1, &event1, &event2); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); pCmdQ2->flush(); HardwareParse::parseCommands(*pCmdQ); HardwareParse::parseCommands(*pCmdQ2); Event *E1 = castToObject(event1); ASSERT_NE(nullptr, E1); Event *E2 = castToObject(event2); ASSERT_NE(nullptr, E2); delete E1; delete E2; typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; itorWalker1 = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorWalker1); itorWalker2 = itorWalker1; ++itorWalker2; itorWalker2 = find(itorWalker2, cmdList.end()); ASSERT_NE(cmdList.end(), itorWalker2); } void hexDump(void *ptr, size_t size) { uint8_t *byte = reinterpret_cast(ptr); uint8_t bytesNum = 0; while (bytesNum < size) { std::cout << std::hex << "0x" << static_cast(byte[bytesNum++]) << " "; } std::cout << std::endl; } GenCmdList::iterator itorWalker1; GenCmdList::iterator itorWalker2; CommandQueue *pCmdQ2 = nullptr; }; HWTEST_F(TwoOOQsTwoDependentWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenTwoDifferentWalkersAreCreated) { parseWalkers(); EXPECT_NE(itorWalker1, itorWalker2); } HWTEST_F(TwoOOQsTwoDependentWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenOnePipelineSelectExists) { parseWalkers(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, TwoOOQsTwoDependentWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenThereIsOneVfeState) { using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE; parseWalkers(); auto commandsList = getCommandsList(); auto numCommands = commandsList.size(); EXPECT_EQ(1u, numCommands); auto expectedCmd = MEDIA_VFE_STATE::sInit(); if (numCommands > 1) { uint32_t commandIndex = 0; for (auto &cmd : commandsList) { auto offset = reinterpret_cast(cmd) - reinterpret_cast(*cmdList.begin()); std::cout << "MEDIA_VFE_STATE [" << commandIndex << "] : 0x" << std::hex << cmd << ". Byte offset in command buffer: 0x" << offset << std::endl; commandIndex++; if (memcmp(&expectedCmd, cmd, sizeof(MEDIA_VFE_STATE)) == 0) { std::cout << "matches expected MEDIA_VFE_STATE command" << std::endl; } else { std::cout << "doesn't match expected MEDIA_VFE_STATE command." << std::endl; } std::cout << "Expected:" << std::endl; hexDump(&expectedCmd, sizeof(MEDIA_VFE_STATE)); std::cout << "Actual:" << std::endl; hexDump(cmd, sizeof(MEDIA_VFE_STATE)); } std::cout << std::endl << "Command buffer content:" << std::endl; auto it = cmdList.begin(); uint32_t cmdNum = 0; std::string cmdBuffStr; while (it != cmdList.end()) { cmdBuffStr += std::to_string(cmdNum) + ":" + HardwareParse::getCommandName(*it) + " "; ++cmdNum; ++it; } std::cout << cmdBuffStr << std::endl; } } HWTEST_F(TwoOOQsTwoDependentWalkers, DISABLED_GivenTwoCommandQueuesWhenEnqueuingKernelThenOnePipeControlIsInsertedBetweenWalkers) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; pDevice->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; parseWalkers(); auto itorCmd = find(itorWalker1, itorWalker2); // Should find a PC. EXPECT_NE(itorWalker2, itorCmd); } enqueue_kernel_two_walker_ioq_tests.cpp000066400000000000000000000046471363734646600344530ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/fixtures/two_walker_fixture.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "test.h" using namespace NEO; typedef TwoWalkerTest IOQWithTwoWalkers; HWTEST_F(IOQWithTwoWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenTwoDifferentWalkersAreCreated) { enqueueTwoKernels(); EXPECT_NE(itorWalker1, itorWalker2); } HWTEST_F(IOQWithTwoWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenOnePipelineSelectExists) { enqueueTwoKernels(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, IOQWithTwoWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenThereIsOneVfeState) { enqueueTwoKernels(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); } HWTEST_F(IOQWithTwoWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenOnePipeControlIsInsertedBetweenWalkers) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.timestampPacketWriteEnabled = false; enqueueTwoKernels(); typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; auto WaNeeded = HardwareCommandsHelper::isPipeControlWArequired(pDevice->getHardwareInfo()); auto itorCmd = find(itorWalker1, itorWalker2); ASSERT_NE(itorWalker2, itorCmd); auto pipeControl = genCmdCast(*itorCmd); if (WaNeeded) { EXPECT_EQ(0u, pipeControl->getPostSyncOperation()); itorCmd++; itorCmd = find(itorCmd, itorWalker2); } pipeControl = genCmdCast(*itorCmd); ASSERT_NE(nullptr, pipeControl); // We should be writing a tag value to an address EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation()); uint64_t addressPC = ((uint64_t)pipeControl->getAddressHigh() << 32) | pipeControl->getAddress(); // The PC address should match the CS tag address EXPECT_EQ(commandStreamReceiver.getTagAllocation()->getGpuAddress(), addressPC); EXPECT_EQ(1u, pipeControl->getImmediateData()); } enqueue_kernel_two_walker_ooq_tests.cpp000066400000000000000000000027551363734646600344570ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/fixtures/two_walker_fixture.h" #include "test.h" using namespace NEO; struct OOQFixtureFactory : public HelloWorldFixtureFactory { typedef OOQueueFixture CommandQueueFixture; }; typedef TwoWalkerTest OOQWithTwoWalkers; HWTEST_F(OOQWithTwoWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenTwoDifferentWalkersAreCreated) { enqueueTwoKernels(); EXPECT_NE(itorWalker1, itorWalker2); } HWTEST_F(OOQWithTwoWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenOnePipelineSelectExists) { enqueueTwoKernels(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, OOQWithTwoWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenThereIsOneVfeState) { enqueueTwoKernels(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); } HWTEST_F(OOQWithTwoWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenOnePipeControlIsInsertedBetweenWalkers) { enqueueTwoKernels(); auto itorCmd = find(itorWalker1, itorWalker2); // Workaround for DRM i915 coherency patch // EXPECT_EQ(itorWalker2, itorCmd); EXPECT_NE(itorWalker2, itorCmd); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_map_buffer_fixture.h000066400000000000000000000026661363734646600322420ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" namespace NEO { struct EnqueueMapBufferTypeTest : public CommandEnqueueFixture, public ::testing::Test { void SetUp() override { CommandEnqueueFixture::SetUp(); BufferDefaults::context = new MockContext; srcBuffer = BufferHelper<>::create(); } void TearDown() override { delete srcBuffer; delete BufferDefaults::context; CommandEnqueueFixture::TearDown(); } protected: template void enqueueMapBuffer(cl_bool blocking = CL_TRUE) { cl_int retVal; EnqueueMapBufferHelper<>::Traits::errcodeRet = &retVal; auto mappedPointer = EnqueueMapBufferHelper<>::enqueueMapBuffer( pCmdQ, srcBuffer, blocking); EXPECT_EQ(CL_SUCCESS, *EnqueueMapBufferHelper<>::Traits::errcodeRet); EXPECT_NE(nullptr, mappedPointer); EnqueueMapBufferHelper<>::Traits::errcodeRet = nullptr; parseCommands(*pCmdQ); } Buffer *srcBuffer = nullptr; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_map_buffer_tests.cpp000066400000000000000000000531421363734646600322440ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_map_buffer_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" #include "gtest/gtest.h" using namespace NEO; struct EnqueueMapBufferTest : public DeviceFixture, public CommandQueueHwFixture, public ::testing::Test { typedef CommandQueueHwFixture CommandQueueFixture; EnqueueMapBufferTest() { } void SetUp() override { DeviceFixture::SetUp(); CommandQueueFixture::SetUp(pClDevice, 0); BufferDefaults::context = new MockContext; buffer = BufferHelper>::create(); } void TearDown() override { delete buffer; delete BufferDefaults::context; CommandQueueFixture::TearDown(); DeviceFixture::TearDown(); } cl_int retVal = CL_SUCCESS; Buffer *buffer = nullptr; char srcMemory[128]; }; TEST_F(EnqueueMapBufferTest, GivenBufferAddressesWhenMappingBufferThenCpuAndGpuAddressAreEqualWhenZeroCopyIsUsed) { auto mapFlags = CL_MAP_READ; auto size = 0; auto offset = 0; cl_int retVal; auto ptr = pCmdQ->enqueueMapBuffer( buffer, true, mapFlags, offset, size, 0, nullptr, nullptr, retVal); if (buffer->isMemObjZeroCopy()) { EXPECT_EQ(buffer->getCpuAddress(), ptr); } else { EXPECT_NE(buffer->getCpuAddress(), ptr); } } TEST_F(EnqueueMapBufferTest, givenBufferWithUseHostPtrFlagWhenMappedThenReturnHostPtr) { auto hostPtr = buffer->getHostPtr(); EXPECT_NE(nullptr, hostPtr); auto mapFlags = CL_MAP_READ; auto size = 2; auto offset = 2; cl_int retVal; auto ptr = pCmdQ->enqueueMapBuffer(buffer, true, mapFlags, offset, size, 0, nullptr, nullptr, retVal); EXPECT_EQ(ptr, ptrOffset(hostPtr, offset)); } TEST_F(EnqueueMapBufferTest, GivenCmdqAndValidArgsWhenMappingBufferThenSuccessIsReturned) { auto mapFlags = CL_MAP_READ; auto size = 0; auto offset = 0; auto retVal = CL_INVALID_VALUE; auto ptr = pCmdQ->enqueueMapBuffer( buffer, true, mapFlags, offset, size, 0, nullptr, nullptr, retVal); EXPECT_NE(nullptr, ptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueMapBufferTest, GivenChangesInHostBufferWhenMappingBufferThenChangesArePropagatedToDeviceMemory) { //size not aligned to cacheline size int bufferSize = 20; void *ptrHost = malloc(bufferSize); char *charHostPtr = static_cast(ptrHost); //first fill with data for (int i = 0; i < bufferSize; i++) { charHostPtr[i] = 1; } auto buffer = clCreateBuffer( BufferDefaults::context, CL_MEM_USE_HOST_PTR, bufferSize, charHostPtr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); auto ptrResult = clEnqueueMapBuffer( pCmdQ, buffer, CL_TRUE, CL_MAP_WRITE, 0, 8, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(ptrResult, charHostPtr) << "Map Buffer should return host_pointer used during creation with CL_MEM_USE_HOST_PTR"; //check data for (int i = 0; i < bufferSize; i++) { EXPECT_EQ(charHostPtr[i], 1); //change the data charHostPtr[i] = 2; } retVal = clEnqueueUnmapMemObject( pCmdQ, buffer, ptrResult, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); //now map again and see if data propagated clEnqueueMapBuffer( pCmdQ, buffer, CL_TRUE, CL_MAP_WRITE, 0, 8, 0, nullptr, nullptr, &retVal); //check data for (int i = 0; i < bufferSize; i++) { EXPECT_EQ(charHostPtr[i], 2); } retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); free(ptrHost); } TEST_F(EnqueueMapBufferTest, GivenChangesInHostBufferWithOffsetWhenMappingBufferThenChangesArePropagatedToDeviceMemory) { //size not aligned to cacheline size int bufferSize = 20; void *ptrHost = malloc(bufferSize); char *charHostPtr = static_cast(ptrHost); size_t offset = 4; //first fill with data for (int i = 0; i < bufferSize; i++) { charHostPtr[i] = 1; } auto buffer = clCreateBuffer( BufferDefaults::context, CL_MEM_USE_HOST_PTR, bufferSize, charHostPtr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); auto ptrResult = clEnqueueMapBuffer( pCmdQ, buffer, CL_TRUE, CL_MAP_WRITE, offset, 8, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(ptrResult, charHostPtr + offset) << "Map Buffer should return host_pointer used during creation with CL_MEM_USE_HOST_PTR"; //check data for (int i = (int)offset; i < (int)(bufferSize - (int)offset); i++) { EXPECT_EQ(charHostPtr[i], 1); } retVal = clEnqueueUnmapMemObject( pCmdQ, buffer, ptrResult, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); free(ptrHost); } TEST_F(EnqueueMapBufferTest, GivenValidArgsWhenMappingBufferThenSuccessIsReturned) { auto buffer = clCreateBuffer( BufferDefaults::context, CL_MEM_READ_WRITE, 20, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); auto ptrResult = clEnqueueMapBuffer( pCmdQ, buffer, CL_TRUE, CL_MAP_READ, 0, 8, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, ptrResult); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueMapBufferTest, givenNonBlockingReadOnlyMapBufferOnZeroCopyBufferWhenItIsCalledThenSynchronizationIsNotMadeUntilWaitForEvents) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); cl_event mapEventReturned = nullptr; cl_event unmapEventReturned = nullptr; *pTagMemory = 0; MockKernelWithInternals kernel(*pClDevice); size_t GWS = 1; struct E2Clb { static void CL_CALLBACK SignalEv2(cl_event e, cl_int status, void *data) { uint32_t *callbackCalled = static_cast(data); *callbackCalled = 1; } }; auto buffer = clCreateBuffer( BufferDefaults::context, CL_MEM_READ_WRITE, 20, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); MockCommandQueueHw mockCmdQueue(context, pClDevice, nullptr); auto &commandStreamReceiver = mockCmdQueue.getGpgpuCommandStreamReceiver(); uint32_t taskCount = commandStreamReceiver.peekTaskCount(); EXPECT_EQ(0u, taskCount); // enqueue something that can be finished... retVal = clEnqueueNDRangeKernel(&mockCmdQueue, kernel, 1, 0, &GWS, nullptr, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); auto ptrResult = clEnqueueMapBuffer( &mockCmdQueue, buffer, CL_FALSE, CL_MAP_READ, 0, 8, 0, nullptr, &mapEventReturned, &retVal); EXPECT_NE(nullptr, ptrResult); EXPECT_EQ(CL_SUCCESS, retVal); //no dc flush required at this point EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); taskCount = commandStreamReceiver.peekTaskCount(); EXPECT_EQ(1u, taskCount); auto neoEvent = castToObject(mapEventReturned); //if task count of csr is higher then event task count with proper dc flushing then we are fine EXPECT_EQ(1u, neoEvent->getCompletionStamp()); //this can't be completed as task count is not reached yet EXPECT_FALSE(neoEvent->updateStatusAndCheckCompletion()); EXPECT_TRUE(CL_COMMAND_MAP_BUFFER == neoEvent->getCommandType()); auto callbackCalled = 0u; *pTagMemory += 4; clSetEventCallback(mapEventReturned, CL_COMPLETE, E2Clb::SignalEv2, (void *)&callbackCalled); //wait for events needs to flush DC as event requires this. retVal = clWaitForEvents(1, &mapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); //wait for event do not sent flushTask EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); EXPECT_EQ(1u, mockCmdQueue.latestTaskCountWaited); EXPECT_TRUE(neoEvent->updateStatusAndCheckCompletion()); EXPECT_EQ(1u, callbackCalled); retVal = clEnqueueUnmapMemObject( &mockCmdQueue, buffer, ptrResult, 0, nullptr, &unmapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); auto unmapEvent = castToObject(unmapEventReturned); EXPECT_TRUE(CL_COMMAND_UNMAP_MEM_OBJECT == unmapEvent->getCommandType()); retVal = clWaitForEvents(1, &unmapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseEvent(mapEventReturned); clReleaseEvent(unmapEventReturned); } TEST_F(EnqueueMapBufferTest, givenNonReadOnlyBufferWhenMappedOnGpuThenSetValidEventCmds) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); cl_event mapEventReturned = nullptr; cl_event unmapEventReturned = nullptr; *pTagMemory = 5; std::unique_ptr buffer(Buffer::create(BufferDefaults::context, CL_MEM_READ_WRITE, 20, nullptr, retVal)); buffer->setSharingHandler(new SharingHandler()); buffer->forceDisallowCPUCopy = true; EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer.get()); auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver(); EXPECT_EQ(0u, commandStreamReceiver.peekTaskCount()); auto ptrResult = clEnqueueMapBuffer(pCmdQ, buffer.get(), CL_FALSE, CL_MAP_WRITE, 0, 8, 0, nullptr, &mapEventReturned, &retVal); EXPECT_NE(nullptr, ptrResult); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); auto mapEvent = castToObject(mapEventReturned); EXPECT_TRUE(CL_COMMAND_MAP_BUFFER == mapEvent->getCommandType()); retVal = clWaitForEvents(1, &mapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueUnmapMemObject(pCmdQ, buffer.get(), ptrResult, 0, nullptr, &unmapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, commandStreamReceiver.peekTaskCount()); auto unmapEvent = castToObject(unmapEventReturned); EXPECT_TRUE(CL_COMMAND_UNMAP_MEM_OBJECT == unmapEvent->getCommandType()); retVal = clWaitForEvents(1, &unmapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseEvent(mapEventReturned); clReleaseEvent(unmapEventReturned); } TEST_F(EnqueueMapBufferTest, givenReadOnlyBufferWhenMappedOnGpuThenSetValidEventCmds) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); cl_event mapEventReturned = nullptr; cl_event unmapEventReturned = nullptr; *pTagMemory = 5; std::unique_ptr buffer(Buffer::create(BufferDefaults::context, CL_MEM_READ_WRITE, 20, nullptr, retVal)); buffer->setSharingHandler(new SharingHandler()); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer.get()); auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver(); EXPECT_EQ(0u, commandStreamReceiver.peekTaskCount()); auto ptrResult = clEnqueueMapBuffer(pCmdQ, buffer.get(), CL_FALSE, CL_MAP_READ, 0, 8, 0, nullptr, &mapEventReturned, &retVal); EXPECT_NE(nullptr, ptrResult); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); auto mapEvent = castToObject(mapEventReturned); EXPECT_TRUE(CL_COMMAND_MAP_BUFFER == mapEvent->getCommandType()); retVal = clWaitForEvents(1, &mapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueUnmapMemObject(pCmdQ, buffer.get(), ptrResult, 0, nullptr, &unmapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); auto unmapEvent = castToObject(unmapEventReturned); EXPECT_TRUE(CL_COMMAND_UNMAP_MEM_OBJECT == unmapEvent->getCommandType()); retVal = clWaitForEvents(1, &unmapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseEvent(mapEventReturned); clReleaseEvent(unmapEventReturned); } TEST_F(EnqueueMapBufferTest, givenNonBlockingMapBufferAfterL3IsAlreadyFlushedThenEventIsSignaledAsCompleted) { cl_event eventReturned = nullptr; uint32_t tagHW = 0; *pTagMemory = tagHW; MockKernelWithInternals kernel(*pClDevice); size_t GWS = 1; auto buffer = clCreateBuffer( BufferDefaults::context, CL_MEM_READ_WRITE, 20, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver(); uint32_t taskCount = commandStreamReceiver.peekTaskCount(); EXPECT_EQ(0u, taskCount); // enqueue something that map buffer needs to wait for retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, 0, &GWS, nullptr, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); auto NDRcompletionStamp = commandStreamReceiver.peekTaskCount(); //simulate that NDR is done and DC was flushed auto forcedLatestSentDC = NDRcompletionStamp + 1; *pTagMemory = forcedLatestSentDC; auto ptrResult = clEnqueueMapBuffer( pCmdQ, buffer, CL_FALSE, CL_MAP_READ, 0, 8, 0, nullptr, &eventReturned, &retVal); EXPECT_NE(nullptr, ptrResult); EXPECT_EQ(CL_SUCCESS, retVal); taskCount = commandStreamReceiver.peekTaskCount(); EXPECT_EQ(1u, taskCount); auto neoEvent = castToObject(eventReturned); //if task count of csr is higher then event task count with proper dc flushing then we are fine EXPECT_EQ(1u, neoEvent->getCompletionStamp()); EXPECT_TRUE(neoEvent->updateStatusAndCheckCompletion()); //flush task was not called EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); //wait for events shouldn't call flush task retVal = clWaitForEvents(1, &eventReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseEvent(eventReturned); } HWTEST_F(EnqueueMapBufferTest, GivenBufferThatIsNotZeroCopyWhenNonBlockingMapIsCalledThenFinishIsCalledAndDataTransferred) { const auto bufferSize = 100; auto localSize = bufferSize; char misaligned[bufferSize] = {1}; MockKernelWithInternals kernel(*pClDevice); size_t GWS = 1; uintptr_t address = (uintptr_t)&misaligned[0]; if (!(address & (MemoryConstants::cacheLineSize - 1))) { address++; localSize--; } auto buffer = clCreateBuffer( BufferDefaults::context, CL_MEM_USE_HOST_PTR, localSize, (void *)address, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); auto pBuffer = castToObject(buffer); ASSERT_FALSE(pBuffer->isMemObjZeroCopy()); MockCommandQueueHw mockCmdQueue(context, pClDevice, nullptr); // enqueue something that can be finished retVal = clEnqueueNDRangeKernel(&mockCmdQueue, kernel, 1, 0, &GWS, nullptr, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); auto &commandStreamReceiver = mockCmdQueue.getGpgpuCommandStreamReceiver(); uint32_t taskCount = commandStreamReceiver.peekTaskCount(); EXPECT_EQ(1u, taskCount); auto ptrResult = clEnqueueMapBuffer( &mockCmdQueue, buffer, CL_FALSE, CL_MAP_READ, 0, localSize, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, ptrResult); EXPECT_EQ(CL_SUCCESS, retVal); commandStreamReceiver.peekTaskCount(); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); EXPECT_EQ(1u, mockCmdQueue.latestTaskCountWaited); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueMapBufferTest, GivenWrongMemObjectWhenMapIsCalledThenInvalidMemObjectErrorCodeIsReturned) { MockBuffer buffer; cl_mem mem = &buffer; buffer.magic = -1; auto ptrResult = clEnqueueMapBuffer( pCmdQ, mem, CL_FALSE, CL_MAP_READ, 0, 8, 0, nullptr, nullptr, &retVal); EXPECT_EQ(nullptr, ptrResult); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } HWTEST_F(EnqueueMapBufferTest, GivenPtrToReturnEventWhenMappingBufferThenEventIsNotNull) { cl_event eventReturned = NULL; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.taskCount = 100; auto buffer = clCreateBuffer( BufferDefaults::context, CL_MEM_READ_WRITE, 20, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); auto ptrResult = clEnqueueMapBuffer( pCmdQ, buffer, CL_FALSE, CL_MAP_READ, 0, 8, 0, nullptr, &eventReturned, &retVal); EXPECT_NE(nullptr, ptrResult); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, eventReturned); auto eventObject = castToObject(eventReturned); EXPECT_EQ(0u, eventObject->peekTaskCount()); EXPECT_TRUE(eventObject->updateStatusAndCheckCompletion()); retVal = clEnqueueUnmapMemObject( pCmdQ, buffer, ptrResult, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseEvent(eventReturned); clReleaseMemObject(buffer); } TEST_F(EnqueueMapBufferTest, GivenZeroCopyBufferWhenMapBufferWithoutEventsThenCommandStreamReceiverUpdatesRequiredDCFlushCount) { auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver(); auto buffer = clCreateBuffer( BufferDefaults::context, CL_MEM_READ_WRITE, 20, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); auto ptrResult = clEnqueueMapBuffer( pCmdQ, buffer, CL_FALSE, CL_MAP_READ, 0, 8, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, ptrResult); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); clReleaseMemObject(buffer); } TEST_F(EnqueueMapBufferTest, givenBufferWithoutUseHostPtrFlagWhenMappedOnCpuThenSetAllMapParams) { std::unique_ptr buffer(Buffer::create(BufferDefaults::context, CL_MEM_READ_WRITE, 10, nullptr, retVal)); EXPECT_NE(nullptr, buffer); EXPECT_TRUE(buffer->mappingOnCpuAllowed()); size_t mapSize = 3; size_t mapOffset = 2; auto mappedPtr = clEnqueueMapBuffer(pCmdQ, buffer.get(), CL_FALSE, CL_MAP_READ, mapOffset, mapSize, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); MapInfo mappedInfo; auto success = buffer->findMappedPtr(mappedPtr, mappedInfo); EXPECT_TRUE(success); EXPECT_NE(nullptr, mappedInfo.ptr); EXPECT_EQ(mapOffset, mappedInfo.offset[0]); EXPECT_EQ(0u, mappedInfo.offset[1]); EXPECT_EQ(0u, mappedInfo.offset[2]); EXPECT_EQ(mapSize, mappedInfo.size[0]); EXPECT_EQ(0u, mappedInfo.size[1]); EXPECT_EQ(0u, mappedInfo.size[2]); auto expectedPtr = ptrOffset(buffer->getCpuAddressForMapping(), mapOffset); EXPECT_EQ(mappedPtr, expectedPtr); } TEST_F(EnqueueMapBufferTest, givenBufferWithUseHostPtrFlagWhenMappedOnCpuThenSetAllMapParams) { uint8_t hostPtr[10] = {}; std::unique_ptr buffer(Buffer::create(BufferDefaults::context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, 10, hostPtr, retVal)); EXPECT_NE(nullptr, buffer); EXPECT_TRUE(buffer->mappingOnCpuAllowed()); size_t mapSize = 3; size_t mapOffset = 2; auto mappedPtr = clEnqueueMapBuffer(pCmdQ, buffer.get(), CL_FALSE, CL_MAP_READ, mapOffset, mapSize, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); MapInfo mappedInfo; auto success = buffer->findMappedPtr(mappedPtr, mappedInfo); EXPECT_TRUE(success); EXPECT_NE(nullptr, mappedInfo.ptr); EXPECT_EQ(mapOffset, mappedInfo.offset[0]); EXPECT_EQ(0u, mappedInfo.offset[1]); EXPECT_EQ(0u, mappedInfo.offset[2]); EXPECT_EQ(mapSize, mappedInfo.size[0]); EXPECT_EQ(0u, mappedInfo.size[1]); EXPECT_EQ(0u, mappedInfo.size[2]); auto expectedPtr = ptrOffset(buffer->getCpuAddressForMapping(), mapOffset); EXPECT_EQ(mappedPtr, expectedPtr); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_map_image_tests.cpp000066400000000000000000001130741363734646600320560ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/event/user_event.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" using namespace NEO; struct EnqueueMapImageTest : public DeviceFixture, public CommandQueueHwFixture, public ::testing::Test { typedef CommandQueueHwFixture CommandQueueFixture; EnqueueMapImageTest() { } void SetUp() override { DeviceFixture::SetUp(); CommandQueueFixture::SetUp(pClDevice, 0); context = new MockContext(pClDevice); image = ImageHelper>::create(context); } void TearDown() override { delete image; context->release(); CommandQueueFixture::TearDown(); DeviceFixture::TearDown(); } MockContext *context; cl_int retVal = CL_INVALID_VALUE; Image *image = nullptr; char srcMemory[128]; }; struct EnqueueMapImageParamsTest : public EnqueueMapImageTest, public ::testing::WithParamInterface { }; TEST_F(EnqueueMapImageTest, GivenTiledImageWhenMappingImageThenPointerIsReused) { auto mapFlags = CL_MAP_READ; const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; auto mapAllocation = image->getMapAllocation(); EXPECT_NE(nullptr, mapAllocation); auto ptr1 = pCmdQ->enqueueMapImage( image, true, mapFlags, origin, region, nullptr, nullptr, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image->getHostPtr()); mapAllocation = image->getMapAllocation(); EXPECT_NE(nullptr, mapAllocation); auto ptr2 = pCmdQ->enqueueMapImage( image, true, mapFlags, origin, region, nullptr, nullptr, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(ptr1, ptr2); retVal = pCmdQ->enqueueUnmapMemObject(image, ptr1, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueMapImageTest, givenAllocatedMapPtrAndMapWithDifferentOriginIsCalledThenReturnDifferentPointers) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } std::unique_ptr img(Image2dHelper::create(context)); auto mapFlags = CL_MAP_READ; const size_t origin1[3] = {0, 0, 0}; const size_t origin2[3] = {2, 2, 0}; const size_t region[3] = {1, 1, 1}; auto ptr1 = pCmdQ->enqueueMapImage(img.get(), true, mapFlags, origin1, region, nullptr, nullptr, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); auto ptr2 = pCmdQ->enqueueMapImage(img.get(), true, mapFlags, origin2, region, nullptr, nullptr, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(ptr1, ptr2); EXPECT_NE(nullptr, img->getAllocatedMapPtr()); size_t mapOffset = img->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes * origin2[0] + img->getHostPtrRowPitch() * origin2[1]; EXPECT_EQ(ptr2, ptrOffset(ptr1, mapOffset)); } typedef EnqueueMapImageParamsTest MipMapMapImageParamsTest; TEST_P(MipMapMapImageParamsTest, givenAllocatedMapPtrWhenMapsWithDifferentMipMapsAreCalledThenReturnDifferentPointers) { auto image_type = (cl_mem_object_type)GetParam(); cl_int retVal = CL_SUCCESS; cl_image_desc imageDesc = {}; imageDesc.image_type = image_type; imageDesc.num_mip_levels = 10; imageDesc.image_width = 4; imageDesc.image_height = 1; imageDesc.image_depth = 1; const size_t origin1[4] = {0, 0, 0, 0}; size_t origin2[4] = {0, 0, 0, 0}; std::unique_ptr image; size_t mapOffset = 16u; switch (image_type) { case CL_MEM_OBJECT_IMAGE1D: origin2[1] = 1; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: origin2[2] = 1; imageDesc.image_array_size = 2; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE2D: origin2[2] = 1; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: origin2[3] = 1; imageDesc.image_array_size = 2; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE3D: origin2[3] = 1; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; } EXPECT_NE(nullptr, image.get()); auto mapFlags = CL_MAP_READ; const size_t region[3] = {1, 1, 1}; auto ptr1 = pCmdQ->enqueueMapImage(image.get(), true, mapFlags, origin1, region, nullptr, nullptr, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); auto ptr2 = pCmdQ->enqueueMapImage(image.get(), true, mapFlags, origin2, region, nullptr, nullptr, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(ptr1, ptr2); if (image->mappingOnCpuAllowed() == false) { EXPECT_NE(nullptr, image->getAllocatedMapPtr()); } EXPECT_EQ(ptr2, ptrOffset(ptr1, mapOffset)); } INSTANTIATE_TEST_CASE_P(MipMapMapImageParamsTest_givenAllocatedMapPtrAndMapWithDifferentMipMapsIsCalledThenReturnDifferentPointers, MipMapMapImageParamsTest, ::testing::Values(CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D)); template struct mockedImage : public ImageHw { using ImageHw::ImageHw; void setAllocatedMapPtr(void *allocatedMapPtr) override { ownershipTaken = this->hasOwnership(); MemObj::setAllocatedMapPtr(allocatedMapPtr); } bool ownershipTaken = false; }; HWTEST_F(EnqueueMapImageTest, givenTiledImageWhenMapImageIsCalledThenStorageIsSetWithImageMutexTaken) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } auto imageFormat = image->getImageFormat(); auto imageDesc = image->getImageDesc(); auto graphicsAllocation = image->getGraphicsAllocation(); auto surfaceFormatInfo = image->getSurfaceFormatInfo(); mockedImage mockImage(context, {}, 0, 0, 4096u, nullptr, imageFormat, imageDesc, false, graphicsAllocation, true, 0, 0, surfaceFormatInfo, nullptr); mockImage.createFunction = image->createFunction; auto mapAllocation = mockImage.getMapAllocation(); EXPECT_EQ(nullptr, mapAllocation); EXPECT_EQ(nullptr, mockImage.getHostPtr()); auto mapFlags = CL_MAP_READ; const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; auto apiMapPtr = pCmdQ->enqueueMapImage( &mockImage, true, mapFlags, origin, region, nullptr, nullptr, 0, nullptr, nullptr, retVal); EXPECT_TRUE(mockImage.ownershipTaken); auto mapPtr = mockImage.getAllocatedMapPtr(); EXPECT_EQ(apiMapPtr, mapPtr); mapAllocation = mockImage.getMapAllocation(); EXPECT_NE(nullptr, mapAllocation); EXPECT_EQ(apiMapPtr, mapAllocation->getUnderlyingBuffer()); auto osContextId = pCmdQ->getGpgpuCommandStreamReceiver().getOsContext().getContextId(); auto expectedTaskCount = pCmdQ->getGpgpuCommandStreamReceiver().peekTaskCount(); auto actualMapAllocationTaskCount = mapAllocation->getTaskCount(osContextId); EXPECT_EQ(expectedTaskCount, actualMapAllocationTaskCount); pDevice->getMemoryManager()->freeGraphicsMemory(mockImage.getMapAllocation()); mockImage.releaseAllocatedMapPtr(); } TEST_F(EnqueueMapImageTest, WhenMappingImageThenCpuAndGpuAddressAreEqualWhenZeroCopyIsUsed) { auto mapFlags = CL_MAP_READ; const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t imageRowPitch = 0; size_t imageSlicePitch = 0; auto ptr = pCmdQ->enqueueMapImage( image, true, mapFlags, origin, region, &imageRowPitch, &imageSlicePitch, 0, nullptr, nullptr, retVal); if (image->isMemObjZeroCopy()) { EXPECT_EQ(image->getCpuAddress(), ptr); } else { EXPECT_NE(image->getCpuAddress(), ptr); } size_t imageRowPitchRef = 0; image->getImageInfo(CL_IMAGE_ROW_PITCH, sizeof(imageRowPitchRef), &imageRowPitchRef, nullptr); EXPECT_EQ(imageRowPitch, imageRowPitchRef); size_t imageSlicePitchRef = 0; image->getImageInfo(CL_IMAGE_SLICE_PITCH, sizeof(imageSlicePitchRef), &imageSlicePitchRef, nullptr); EXPECT_EQ(imageSlicePitch, imageSlicePitchRef); } TEST_F(EnqueueMapImageTest, GivenCmdqAndValidArgsWhenMappingImageThenSuccessIsReturned) { auto mapFlags = CL_MAP_READ; const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t imageRowPitch = 0; size_t imageSlicePitch = 0; auto ptr = pCmdQ->enqueueMapImage( image, true, mapFlags, origin, region, &imageRowPitch, &imageSlicePitch, 0, nullptr, nullptr, retVal); EXPECT_NE(nullptr, ptr); EXPECT_EQ(CL_SUCCESS, retVal); size_t imageRowPitchRef = 0; image->getImageInfo(CL_IMAGE_ROW_PITCH, sizeof(imageRowPitchRef), &imageRowPitchRef, nullptr); EXPECT_EQ(imageRowPitch, imageRowPitchRef); size_t imageSlicePitchRef = 0; image->getImageInfo(CL_IMAGE_SLICE_PITCH, sizeof(imageSlicePitchRef), &imageSlicePitchRef, nullptr); EXPECT_EQ(imageSlicePitch, imageSlicePitchRef); } HWTEST_F(EnqueueMapImageTest, givenNonReadOnlyMapWithOutEventWhenMappedThenSetEventAndIncraseTaskCountFromWriteImage) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); cl_event mapEventReturned = nullptr; cl_event unmapEventReturned = nullptr; uint32_t tagHW = 0; auto mapFlags = CL_MAP_WRITE; const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t imageRowPitch = 0; size_t imageSlicePitch = 0; size_t GWS = 1; MockKernelWithInternals kernel(*pClDevice); *pTagMemory = tagHW; auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver(); auto tag_address = commandStreamReceiver.getTagAddress(); EXPECT_TRUE(pTagMemory == tag_address); struct E2Clb { static void CL_CALLBACK SignalEv2(cl_event e, cl_int status, void *data) { uint32_t *pTagMem = static_cast(data); *pTagMem = 4; } }; uint32_t taskCount = commandStreamReceiver.peekTaskCount(); EXPECT_EQ(1u, taskCount); // enqueue something that can be finished... retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, 0, &GWS, nullptr, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); *pTagMemory = tagHW += 3; auto ptr = pCmdQ->enqueueMapImage( image, false, mapFlags, origin, region, &imageRowPitch, &imageSlicePitch, 0, nullptr, &mapEventReturned, retVal); EXPECT_NE(nullptr, ptr); EXPECT_EQ(CL_SUCCESS, retVal); auto mapEvent = castToObject(mapEventReturned); EXPECT_TRUE(CL_COMMAND_MAP_IMAGE == mapEvent->getCommandType()); taskCount = commandStreamReceiver.peekTaskCount(); EXPECT_EQ(3u, taskCount); clSetEventCallback(mapEventReturned, CL_COMPLETE, E2Clb::SignalEv2, (void *)pTagMemory); retVal = clWaitForEvents(1, &mapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(4u, *pTagMemory); taskCount = commandStreamReceiver.peekTaskCount(); EXPECT_EQ(3u, taskCount); (*pTagMemory)++; retVal = clEnqueueUnmapMemObject( pCmdQ, image, ptr, 0, nullptr, &unmapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); auto unmapEvent = castToObject(unmapEventReturned); EXPECT_TRUE(CL_COMMAND_UNMAP_MEM_OBJECT == unmapEvent->getCommandType()); retVal = clWaitForEvents(1, &unmapEventReturned); taskCount = commandStreamReceiver.peekTaskCount(); EXPECT_EQ(4u, taskCount); clReleaseEvent(mapEventReturned); clReleaseEvent(unmapEventReturned); } HWTEST_F(EnqueueMapImageTest, givenReadOnlyMapWithOutEventWhenMappedThenSetEventAndDontIncraseTaskCountFromWriteImage) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); cl_event mapEventReturned = nullptr; cl_event unmapEventReturned = nullptr; auto mapFlags = CL_MAP_READ; const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; *pTagMemory = 5; auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver(); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); auto ptr = pCmdQ->enqueueMapImage(image, false, mapFlags, origin, region, nullptr, nullptr, 0, nullptr, &mapEventReturned, retVal); EXPECT_NE(nullptr, ptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, commandStreamReceiver.peekTaskCount()); auto mapEvent = castToObject(mapEventReturned); EXPECT_TRUE(CL_COMMAND_MAP_IMAGE == mapEvent->getCommandType()); retVal = clWaitForEvents(1, &mapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueUnmapMemObject(pCmdQ, image, ptr, 0, nullptr, &unmapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, commandStreamReceiver.peekTaskCount()); auto unmapEvent = castToObject(unmapEventReturned); EXPECT_TRUE(CL_COMMAND_UNMAP_MEM_OBJECT == unmapEvent->getCommandType()); retVal = clWaitForEvents(1, &unmapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseEvent(mapEventReturned); clReleaseEvent(unmapEventReturned); } HWTEST_F(EnqueueMapImageTest, GivenPtrToReturnEventWhenMappingImageThenEventIsNotNull) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } cl_event eventReturned = nullptr; auto mapFlags = CL_MAP_READ; const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t imageRowPitch = 0; size_t imageSlicePitch = 0; uint32_t forceTaskCount = 100; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.taskCount = forceTaskCount; auto ptr = clEnqueueMapImage( pCmdQ, image, CL_FALSE, mapFlags, origin, region, &imageRowPitch, &imageSlicePitch, 0, nullptr, &eventReturned, &retVal); EXPECT_NE(nullptr, ptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, eventReturned); auto eventObject = castToObject(eventReturned); EXPECT_EQ(forceTaskCount + 1, eventObject->peekTaskCount()); EXPECT_TRUE(eventObject->updateStatusAndCheckCompletion()); retVal = clEnqueueUnmapMemObject( pCmdQ, image, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseEvent(eventReturned); } HWTEST_F(EnqueueMapImageTest, givenZeroCopyImageWhenItIsMappedAndReturnsEventThenEventHasCorrectProperties) { cl_event eventReturned = nullptr; auto mapFlags = CL_MAP_READ; const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t imageRowPitch = 0; size_t imageSlicePitch = 0; uint32_t forceTaskCount = 100; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.taskCount = forceTaskCount; std::unique_ptr zero_copy_image(ImageHelper>::create(context)); ASSERT_TRUE(zero_copy_image->isMemObjZeroCopy()); pCmdQ->taskCount = 40u; auto ptr = clEnqueueMapImage( pCmdQ, zero_copy_image.get(), CL_FALSE, mapFlags, origin, region, &imageRowPitch, &imageSlicePitch, 0, nullptr, &eventReturned, &retVal); EXPECT_NE(nullptr, ptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, eventReturned); EXPECT_EQ(ptr, zero_copy_image->getCpuAddressForMemoryTransfer()); auto eventObject = castToObject(eventReturned); EXPECT_EQ(pCmdQ->taskCount, eventObject->peekTaskCount()); EXPECT_TRUE(eventObject->updateStatusAndCheckCompletion()); retVal = clEnqueueUnmapMemObject( pCmdQ, zero_copy_image.get(), ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseEvent(eventReturned); } TEST_F(EnqueueMapImageTest, GivenNonZeroCopyImageWhenMappedWithOffsetThenCorrectPointerIsReturned) { auto mapFlags = CL_MAP_WRITE; const size_t origin[3] = {1, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t imageRowPitch = 0; size_t imageSlicePitch = 0; Image *nonZeroCopyImage = ImageHelper>::create(context); EXPECT_FALSE(nonZeroCopyImage->isMemObjZeroCopy()); auto ptr = clEnqueueMapImage( pCmdQ, nonZeroCopyImage, CL_TRUE, mapFlags, origin, region, &imageRowPitch, &imageSlicePitch, 0, nullptr, nullptr, &retVal); float *HostPtrOffseted = (float *)Image1dDefaults::hostPtr + 1; // EXPECT_NE(nullptr, ptr); if (!image->isTiledAllocation()) { EXPECT_EQ(HostPtrOffseted, ptr); // Returned pointer should be offseted } EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueUnmapMemObject( pCmdQ, nonZeroCopyImage, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); delete nonZeroCopyImage; } HWTEST_F(EnqueueMapImageTest, givenSharingHandlerWhenNonReadOnlyMapAndUnmapOnNonTiledImageIsCalledThenMakeGpuCopy) { std::unique_ptr image(ImageHelper>::create(context)); ASSERT_NE(nullptr, image); image->setSharingHandler(new SharingHandler()); EXPECT_FALSE(image->isTiledAllocation()); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = 1; csr.taskLevel = 1; pCmdQ->taskCount = 1; pCmdQ->taskLevel = 1; size_t origin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; void *data = clEnqueueMapImage(pCmdQ, image.get(), CL_TRUE, CL_MAP_WRITE, origin, region, nullptr, nullptr, 0, NULL, NULL, &retVal); EXPECT_NE(nullptr, data); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, pCmdQ->taskCount); EXPECT_EQ(2u, pCmdQ->taskLevel); retVal = clEnqueueUnmapMemObject(pCmdQ, image.get(), data, 0, NULL, NULL); EXPECT_EQ(3u, pCmdQ->taskCount); EXPECT_EQ(3u, pCmdQ->taskLevel); } HWTEST_F(EnqueueMapImageTest, givenSharingHandlerWhenReadOnlyMapAndUnmapOnNonTiledImageIsCalledThenMakeGpuCopy) { std::unique_ptr image(ImageHelper>::create(context)); ASSERT_NE(nullptr, image); image->setSharingHandler(new SharingHandler()); EXPECT_FALSE(image->isTiledAllocation()); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = 1; csr.taskLevel = 1; pCmdQ->taskCount = 1; pCmdQ->taskLevel = 1; size_t origin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; void *data = clEnqueueMapImage(pCmdQ, image.get(), CL_TRUE, CL_MAP_READ, origin, region, nullptr, nullptr, 0, NULL, NULL, &retVal); EXPECT_NE(nullptr, data); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, pCmdQ->taskCount); EXPECT_EQ(2u, pCmdQ->taskLevel); retVal = clEnqueueUnmapMemObject(pCmdQ, image.get(), data, 0, NULL, NULL); EXPECT_EQ(2u, pCmdQ->taskCount); EXPECT_EQ(2u, pCmdQ->taskLevel); } HWTEST_F(EnqueueMapImageTest, givenImageWithouUsetHostPtrFlagWhenMappedOnCpuThenSetAllMapProperties) { std::unique_ptr image(ImageHelper::create(context)); ASSERT_NE(nullptr, image); EXPECT_TRUE(image->mappingOnCpuAllowed()); size_t origin[] = {2, 0, 0}; size_t region[] = {2, 1, 1}; void *mappedPtr = clEnqueueMapImage(pCmdQ, image.get(), CL_TRUE, CL_MAP_READ, origin, region, nullptr, nullptr, 0, NULL, NULL, &retVal); EXPECT_NE(nullptr, mappedPtr); MapInfo mappedInfo; auto success = image->findMappedPtr(mappedPtr, mappedInfo); EXPECT_TRUE(success); EXPECT_NE(nullptr, mappedInfo.ptr); EXPECT_EQ(origin[0], mappedInfo.offset[0]); EXPECT_EQ(origin[1], mappedInfo.offset[1]); EXPECT_EQ(origin[2], mappedInfo.offset[2]); EXPECT_EQ(region[0], mappedInfo.size[0]); EXPECT_EQ(region[1], mappedInfo.size[1]); EXPECT_EQ(region[2], mappedInfo.size[2]); auto expectedPtr = ptrOffset(image->getCpuAddressForMapping(), image->calculateOffsetForMapping(mappedInfo.offset)); EXPECT_EQ(mappedPtr, expectedPtr); } HWTEST_F(EnqueueMapImageTest, givenImageWithUseHostPtrFlagWhenMappedOnCpuThenSetAllMapProperties) { std::unique_ptr image(ImageHelper>::create(context)); ASSERT_NE(nullptr, image); EXPECT_TRUE(image->mappingOnCpuAllowed()); size_t origin[] = {2, 0, 0}; size_t region[] = {2, 1, 1}; void *mappedPtr = clEnqueueMapImage(pCmdQ, image.get(), CL_TRUE, CL_MAP_READ, origin, region, nullptr, nullptr, 0, NULL, NULL, &retVal); EXPECT_NE(nullptr, mappedPtr); MapInfo mappedInfo; auto success = image->findMappedPtr(mappedPtr, mappedInfo); EXPECT_TRUE(success); EXPECT_NE(nullptr, mappedInfo.ptr); EXPECT_EQ(origin[0], mappedInfo.offset[0]); EXPECT_EQ(origin[1], mappedInfo.offset[1]); EXPECT_EQ(origin[2], mappedInfo.offset[2]); EXPECT_EQ(region[0], mappedInfo.size[0]); EXPECT_EQ(region[1], mappedInfo.size[1]); EXPECT_EQ(region[2], mappedInfo.size[2]); auto expectedPtr = ptrOffset(image->getCpuAddressForMapping(), image->calculateOffsetForMapping(mappedInfo.offset)); EXPECT_EQ(mappedPtr, expectedPtr); } TEST_F(EnqueueMapImageTest, givenBlockedCommandQueueWhenBlockingMapWith2DImageIsEnqueuedAndEventAsynchrounouslyCompletedThenEnqueueFinishesWithoutStall) { auto mapFlags = CL_MAP_READ; const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t imageRowPitch = 0; size_t imageSlicePitch = 0; class MockEventWithSetCompleteOnUpdate : public Event { public: MockEventWithSetCompleteOnUpdate(CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount) : Event(cmdQueue, cmdType, taskLevel, taskCount) { } void updateExecutionStatus() override { setStatus(CL_COMPLETE); } }; MockEventWithSetCompleteOnUpdate blockingEvent(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 1); cl_event blockingClEvent = &blockingEvent; int32_t initialRefCountCmdQ = pCmdQ->getRefInternalCount(); auto ptr = pCmdQ->enqueueMapImage( image, true, mapFlags, origin, region, &imageRowPitch, &imageSlicePitch, 1, &blockingClEvent, nullptr, retVal); EXPECT_NE(nullptr, ptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(initialRefCountCmdQ, pCmdQ->getRefInternalCount()); } TEST_F(EnqueueMapImageTest, givenBlockedCommandQueueWhenBlockingMapWith1DImageIsEnqueuedAndEventAsynchrounouslyCompletedThenEnqueueFinishesWithoutStall) { auto mapFlags = CL_MAP_READ; const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t imageRowPitch = 0; size_t imageSlicePitch = 0; Image *image1D = ImageHelper>::create(context); ASSERT_NE(nullptr, image1D); class MockEventWithSetCompleteOnUpdate : public Event { public: MockEventWithSetCompleteOnUpdate(CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount) : Event(cmdQueue, cmdType, taskLevel, taskCount) { } void updateExecutionStatus() override { setStatus(CL_COMPLETE); } }; MockEventWithSetCompleteOnUpdate blockingEvent(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 1); cl_event blockingClEvent = &blockingEvent; int32_t initialRefCountCmdQ = pCmdQ->getRefInternalCount(); auto ptr = pCmdQ->enqueueMapImage( image1D, true, mapFlags, origin, region, &imageRowPitch, &imageSlicePitch, 1, &blockingClEvent, nullptr, retVal); EXPECT_NE(nullptr, ptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(initialRefCountCmdQ, pCmdQ->getRefInternalCount()); delete image1D; } TEST_F(EnqueueMapImageTest, givenBlockedCommandQueueWhenBlockingCpuMapIsCalledThenReturnRowPitchAndSlicePitch) { const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t retImageRowPitch = 0; size_t retImageSlicePitch = 0; struct MyMockUserEvent : public UserEvent { MyMockUserEvent() : UserEvent(nullptr) {} void updateExecutionStatus() override { setStatus(CL_COMPLETE); } }; std::unique_ptr image(ImageHelper::create(context)); EXPECT_TRUE(image->mappingOnCpuAllowed()); MyMockUserEvent blockingEvent; cl_event blockingClEvent = &blockingEvent; pCmdQ->enqueueMapImage(image.get(), true, CL_MAP_READ, origin, region, &retImageRowPitch, &retImageSlicePitch, 1, &blockingClEvent, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(0u, retImageRowPitch); EXPECT_NE(0u, retImageSlicePitch); image.reset(ImageHelper::create(context)); pCmdQ->enqueueMapImage(image.get(), true, CL_MAP_READ, origin, region, &retImageRowPitch, &retImageSlicePitch, 1, &blockingClEvent, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(0u, retImageRowPitch); EXPECT_EQ(0u, retImageSlicePitch); } TEST_F(EnqueueMapImageTest, givenZeroCopyImageWhenMappedOnCpuThenReturnImageRowAndSlicePitch) { const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t retImageRowPitch = 0; size_t retImageSlicePitch = 0; std::unique_ptr image(ImageHelper::create(context)); EXPECT_TRUE(image->mappingOnCpuAllowed()); EXPECT_TRUE(image->isMemObjZeroCopy()); pCmdQ->enqueueMapImage(image.get(), true, CL_MAP_READ, origin, region, &retImageRowPitch, &retImageSlicePitch, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(image->getImageDesc().image_row_pitch, retImageRowPitch); EXPECT_EQ(image->getImageDesc().image_slice_pitch, retImageSlicePitch); } TEST_F(EnqueueMapImageTest, givenNonZeroCopyImageWhenMappedOnCpuThenReturnHostRowAndSlicePitch) { const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t retImageRowPitch = 0; size_t retImageSlicePitch = 0; std::unique_ptr image(ImageHelper>::create(context)); EXPECT_TRUE(image->mappingOnCpuAllowed()); EXPECT_FALSE(image->isMemObjZeroCopy()); pCmdQ->enqueueMapImage(image.get(), true, CL_MAP_READ, origin, region, &retImageRowPitch, &retImageSlicePitch, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(image->getHostPtrRowPitch(), retImageRowPitch); EXPECT_EQ(image->getHostPtrSlicePitch(), retImageSlicePitch); } TEST_F(EnqueueMapImageTest, givenZeroCopyImageWhenMappedOnGpuThenReturnHostRowAndSlicePitch) { const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t retImageRowPitch = 0; size_t retImageSlicePitch = 0; std::unique_ptr image(ImageHelper::create(context)); image->setSharingHandler(new SharingHandler()); EXPECT_FALSE(image->mappingOnCpuAllowed()); EXPECT_TRUE(image->isMemObjZeroCopy()); pCmdQ->enqueueMapImage(image.get(), true, CL_MAP_READ, origin, region, &retImageRowPitch, &retImageSlicePitch, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(image->getHostPtrRowPitch(), retImageRowPitch); EXPECT_EQ(image->getHostPtrSlicePitch(), retImageSlicePitch); } TEST_F(EnqueueMapImageTest, givenNonZeroCopyImageWhenMappedOnGpuThenReturnHostRowAndSlicePitch) { const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t retImageRowPitch = 0; size_t retImageSlicePitch = 0; std::unique_ptr image(ImageHelper>::create(context)); image->setSharingHandler(new SharingHandler()); EXPECT_FALSE(image->mappingOnCpuAllowed()); EXPECT_FALSE(image->isMemObjZeroCopy()); pCmdQ->enqueueMapImage(image.get(), true, CL_MAP_READ, origin, region, &retImageRowPitch, &retImageSlicePitch, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(image->getHostPtrRowPitch(), retImageRowPitch); EXPECT_EQ(image->getHostPtrSlicePitch(), retImageSlicePitch); } TEST_F(EnqueueMapImageTest, givenMipMapImageWhenMappedThenReturnHostRowAndSlicePitch) { const size_t origin[4] = {0, 0, 0, 1}; const size_t region[3] = {1, 1, 1}; size_t retImageRowPitch = 0; size_t retImageSlicePitch = 0; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE3D; imageDesc.num_mip_levels = 10; imageDesc.image_width = 4; imageDesc.image_height = 4; imageDesc.image_depth = 4; std::unique_ptr image(ImageHelper::create(context, &imageDesc)); image->setSharingHandler(new SharingHandler()); EXPECT_FALSE(image->mappingOnCpuAllowed()); pCmdQ->enqueueMapImage(image.get(), true, CL_MAP_READ, origin, region, &retImageRowPitch, &retImageSlicePitch, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(image->getHostPtrRowPitch(), retImageRowPitch); EXPECT_EQ(image->getHostPtrSlicePitch(), retImageSlicePitch); } TEST_F(EnqueueMapImageTest, givenImage1DArrayWhenEnqueueMapImageIsCalledThenReturnRowAndSlicePitchAreEqual) { class MockImage : public Image { public: MockImage(Context *context, cl_mem_flags flags, GraphicsAllocation *allocation, const ClSurfaceFormatInfo &surfaceFormat, const cl_image_format &imageFormat, const cl_image_desc &imageDesc) : Image(context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, 0, nullptr, imageFormat, imageDesc, true, allocation, false, 0, 0, surfaceFormat, nullptr) { } void setImageArg(void *memory, bool isMediaBlockImage, uint32_t mipLevel) override {} void setMediaImageArg(void *memory) override {} void setMediaSurfaceRotation(void *memory) override {} void setSurfaceMemoryObjectControlStateIndexToMocsTable(void *memory, uint32_t value) override {} void transformImage2dArrayTo3d(void *memory) override {} void transformImage3dTo2dArray(void *memory) override {} }; const size_t origin[3] = {0, 0, 0}; const size_t region[3] = {1, 1, 1}; size_t retImageRowPitch = 0; size_t retImageSlicePitch = 0; cl_mem_flags flags = CL_MEM_READ_ONLY; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; imageDesc.image_width = 329; imageDesc.image_array_size = 48; imageDesc.image_row_pitch = 2688; imageDesc.image_slice_pitch = 10752; imageDesc.num_mip_levels = 0; size_t imgSize = imageDesc.image_slice_pitch * imageDesc.image_array_size; cl_image_format imageFormat = {}; imageFormat.image_channel_order = CL_RGBA; imageFormat.image_channel_data_type = CL_UNSIGNED_INT16; const ClSurfaceFormatInfo *surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); auto allocation = context->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context->getDevice(0)->getRootDeviceIndex(), imgSize}); ASSERT_NE(allocation, nullptr); MockImage image(context, flags, allocation, *surfaceFormat, imageFormat, imageDesc); EXPECT_TRUE(image.mappingOnCpuAllowed()); EXPECT_TRUE(image.isMemObjZeroCopy()); pCmdQ->enqueueMapImage(&image, true, CL_MAP_READ, origin, region, &retImageRowPitch, &retImageSlicePitch, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(retImageRowPitch, retImageSlicePitch); } struct EnqueueMapImageTypeTest : public CommandEnqueueFixture, public ::testing::Test { typedef CommandQueueHwFixture CommandQueueFixture; using CommandQueueHwFixture::pCmdQ; EnqueueMapImageTypeTest(void) { } void SetUp() override { CommandEnqueueFixture::SetUp(); image = ImageHelper>::create(&context); } void TearDown() override { delete image; CommandEnqueueFixture::TearDown(); } protected: template void enqueueMapImage(cl_bool blocking = CL_TRUE) { typedef ImageUseHostPtr Traits; size_t imageRowPitch; size_t imageSlicePitch; size_t origin[3] = {0, 0, 0}; size_t region[3] = {Traits::imageDesc.image_width, Traits::imageDesc.image_height, Traits::imageDesc.image_depth}; cl_int retVal = 0; auto mappedPtr = pCmdQ->enqueueMapImage( image, blocking, Traits::flags, origin, region, &imageRowPitch, &imageSlicePitch, 0, nullptr, nullptr, retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } MockContext context; Image *image = nullptr; }; HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueMapImageTypeTest, GiveRequirementForPipeControlWorkaroundWhenMappingImageThenAdditionalPipeControlIsProgrammed) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; // Set taskCount to 1 to call finish on map operation pCmdQ->taskCount = 1; bool blocking = true; enqueueMapImage(blocking); auto itorWalker = find(cmdList.begin(), cmdList.end()); auto itorCmd = find(itorWalker, cmdList.end()); auto *cmd = (PIPE_CONTROL *)*itorCmd; EXPECT_NE(cmdList.end(), itorCmd); if (UnitTestHelper::isPipeControlWArequired(pDevice->getHardwareInfo())) { // SKL: two PIPE_CONTROLs following GPGPU_WALKER: first has DcFlush and second has Write HwTag EXPECT_FALSE(cmd->getDcFlushEnable()); // Move to next PPC auto itorCmdP = ++((GenCmdList::iterator)itorCmd); EXPECT_NE(cmdList.end(), itorCmdP); auto itorCmd2 = find(itorCmdP, cmdList.end()); cmd = (PIPE_CONTROL *)*itorCmd2; EXPECT_TRUE(cmd->getDcFlushEnable()); } else { // single PIPE_CONTROL following GPGPU_WALKER has DcFlush and Write HwTag EXPECT_TRUE(cmd->getDcFlushEnable()); } } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_marker_tests.cpp000066400000000000000000000216741363734646600314240ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/gen_common/gen_cmd_parse.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" using namespace NEO; using MarkerTest = Test; HWTEST_F(MarkerTest, GivenCsrAndCmdqWithSameTaskLevelWhenEnqueingMarkerThenPipeControlIsAdded) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // Set task levels to known values. uint32_t originalCSRLevel = 2; commandStreamReceiver.taskLevel = originalCSRLevel; pCmdQ->taskLevel = originalCSRLevel; uint32_t originalTaskCount = 15; commandStreamReceiver.taskCount = originalTaskCount; cl_uint numEventsInWaitList = 0; const cl_event *eventWaitList = nullptr; cl_event *event = nullptr; auto retVal = pCmdQ->enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); // Should sync CSR & CmdQ levels. EXPECT_EQ(commandStreamReceiver.peekTaskLevel(), pCmdQ->taskLevel); parseCommands(*pCmdQ); // If CSR == CQ then a PC is required. auto itorCmd = reverse_find(cmdList.rbegin(), cmdList.rend()); EXPECT_EQ(cmdList.rend(), itorCmd); } HWTEST_F(MarkerTest, GivenCsrAndCmdqWithDifferentTaskLevelsWhenEnqueingMarkerThenPipeControlIsNotAdded) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // Set task levels to known values. commandStreamReceiver.taskLevel = 2; pCmdQ->taskLevel = 1; cl_uint numEventsInWaitList = 0; const cl_event *eventWaitList = nullptr; cl_event *event = nullptr; auto retVal = pCmdQ->enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, event); ASSERT_EQ(CL_SUCCESS, retVal); // Should sync CSR & CmdQ levels. EXPECT_EQ(1u, pCmdQ->taskLevel); EXPECT_EQ(2u, commandStreamReceiver.peekTaskLevel()); auto sizeUsed = pCS->getUsed(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, pCmdBuffer, sizeUsed)); // If CSR > CQ then a PC isn't required. auto itorCmd = find(cmdList.begin(), cmdList.end()); ASSERT_EQ(cmdList.end(), itorCmd); } TEST_F(MarkerTest, WhenEnqueingMarkerThenEventIsReturned) { cl_uint numEventsInWaitList = 0; const cl_event *eventWaitList = nullptr; cl_event event = nullptr; auto retVal = pCmdQ->enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, event); // Check CL_EVENT_COMMAND_TYPE { std::unique_ptr pEvent((Event *)(event)); cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent.get(), CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); } } HWTEST_F(MarkerTest, WhenEnqueingMarkerThenReturnedEventShouldHaveEqualDepthToLastCommandPacketInCommandQueue) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // Set task levels to known values. commandStreamReceiver.taskLevel = 2; pCmdQ->taskLevel = 1; cl_uint numEventsInWaitList = 0; const cl_event *eventWaitList = nullptr; cl_event event = nullptr; auto retVal = pCmdQ->enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, &event); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); std::unique_ptr pEvent((Event *)(event)); // Shouldn't sync to CSR // should sync to command queue last packet EXPECT_EQ(1u, pEvent->taskLevel); EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel); } HWTEST_F(MarkerTest, GivenEventWithWaitDependenciesWhenEnqueingMarkerThenCsrLevelAndCmdqLevelShouldSync) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); uint32_t initialTaskLevel = 7; // In N:1, CSR is always highest task level. commandStreamReceiver.taskLevel = initialTaskLevel; // In N:1, pCmdQ.level <= CSR.level pCmdQ->taskLevel = initialTaskLevel; // In N:1, event.level <= pCmdQ.level Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 5, 15); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 6, 16); Event event3(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 1, 6); cl_event eventWaitList[] = { &event1, &event2, &event3}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto retVal = pCmdQ->enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, &event); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); std::unique_ptr pEvent((Event *)(event)); // Should sync CSR & CmdQ levels. if (pCmdQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { EXPECT_EQ(initialTaskLevel, pCmdQ->taskLevel); EXPECT_EQ(initialTaskLevel + 1, commandStreamReceiver.peekTaskLevel()); } else { EXPECT_EQ(commandStreamReceiver.peekTaskLevel(), pCmdQ->taskLevel); } EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel); EXPECT_EQ(7u, pEvent->taskLevel); } TEST_F(MarkerTest, givenMultipleEventWhenTheyArePassedToMarkerThenOutputEventHasHighestTaskCount) { // combine events with different task counts, max is 16 Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 5, 15); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 6, 16); Event event3(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 1, 6); cl_event eventWaitList[] = { &event1, &event2, &event3}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto initialTaskCount = pCmdQ->taskCount; pCmdQ->enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, &event); std::unique_ptr pEvent((Event *)(event)); if (pCmdQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { EXPECT_EQ(initialTaskCount + 1, pCmdQ->taskCount); EXPECT_EQ(initialTaskCount + 1, pEvent->peekTaskCount()); } else { EXPECT_EQ(16u, pCmdQ->taskCount); EXPECT_EQ(16u, pEvent->peekTaskCount()); } } TEST_F(MarkerTest, givenMultipleEventsAndCompletedUserEventWhenTheyArePassedToMarkerThenOutputEventHasHighestTaskCount) { // combine events with different task counts, max is 16 Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 5, 15); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 6, 16); Event event3(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 1, 6); UserEvent userEvent(&pCmdQ->getContext()); userEvent.setStatus(CL_COMPLETE); cl_event eventWaitList[] = { &event1, &event2, &event3, &userEvent}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto initialTaskCount = pCmdQ->taskCount; pCmdQ->enqueueMarkerWithWaitList( numEventsInWaitList, eventWaitList, &event); std::unique_ptr pEvent((Event *)(event)); if (pCmdQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { EXPECT_EQ(initialTaskCount + 1, pCmdQ->taskCount); EXPECT_EQ(initialTaskCount + 1, pEvent->peekTaskCount()); } else { EXPECT_EQ(16u, pCmdQ->taskCount); EXPECT_EQ(16u, pEvent->peekTaskCount()); } } HWTEST_F(MarkerTest, givenMarkerCallFollowingNdrangeCallInBatchedModeWhenWaitForEventsIsCalledThenFlushStampIsProperlyUpdated) { MockKernelWithInternals mockKernel(*this->pClDevice, this->context); auto &ultCommandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); ultCommandStreamReceiver.overrideDispatchPolicy(DispatchMode::BatchedDispatch); cl_event eventFromNdr = nullptr; size_t gws[] = {1}; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &eventFromNdr); cl_event eventFromMarker = nullptr; pCmdQ->enqueueMarkerWithWaitList(1u, &eventFromNdr, &eventFromMarker); ultCommandStreamReceiver.flushStamp->setStamp(1u); clEnqueueWaitForEvents(pCmdQ, 1u, &eventFromMarker); auto neoEvent = castToObject(eventFromMarker); EXPECT_EQ(1u, neoEvent->flushStamp->peekStamp()); clReleaseEvent(eventFromMarker); clReleaseEvent(eventFromNdr); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_media_kernel.cpp000066400000000000000000000017161363734646600313330ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/media_kernel_fixture.h" #include "test.h" using namespace NEO; typedef MediaKernelFixture MediaKernelTest; TEST_F(MediaKernelTest, GivenKernelWhenCheckingIsVmeKernelThenOnlyVmeKernelReportsTrue) { ASSERT_NE(true, pKernel->isVmeKernel()); ASSERT_EQ(true, pVmeKernel->isVmeKernel()); } HWTEST_F(MediaKernelTest, GivenVmeKernelWhenEnqueuingKernelThenSinglePipelineSelectIsProgrammed) { enqueueVmeKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); } HWTEST_F(MediaKernelTest, GivenNonVmeKernelWhenEnqueuingKernelThenSinglePipelineSelectIsProgrammed) { enqueueRegularKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); } enqueue_migrate_mem_objects_tests.cpp000066400000000000000000000041011363734646600340450ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_stream/command_stream_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "test.h" using namespace NEO; class MigrateMemObjectsFixture : public DeviceFixture, public CommandQueueHwFixture { public: void SetUp() override { DeviceFixture::SetUp(); CommandQueueHwFixture::SetUp(pClDevice, 0); ASSERT_NE(nullptr, pCmdQ); } void TearDown() override { CommandQueueHwFixture::TearDown(); DeviceFixture::TearDown(); } }; typedef Test MigrateMemObjectsTest; TEST_F(MigrateMemObjectsTest, GivenNullEventWhenMigratingEventsThenSuccessIsReturned) { MockBuffer buffer; auto retVal = pCmdQ->enqueueMigrateMemObjects( 1, (cl_mem *)&buffer, CL_MIGRATE_MEM_OBJECT_HOST, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(MigrateMemObjectsTest, GivenValidEventListWhenMigratingEventsThenSuccessIsReturned) { MockBuffer buffer; UserEvent uEvent; cl_event eventWaitList[] = {&uEvent}; auto retVal = pCmdQ->enqueueMigrateMemObjects( 1, (cl_mem *)&buffer, CL_MIGRATE_MEM_OBJECT_HOST, 1, eventWaitList, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(MigrateMemObjectsTest, GivenEventPointerWhenMigratingEventsThenEventIsReturned) { MockBuffer buffer; cl_event event = nullptr; auto retVal = pCmdQ->enqueueMigrateMemObjects( 1, (cl_mem *)&buffer, CL_MIGRATE_MEM_OBJECT_HOST, 0, nullptr, &event); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, event); Event *eventObject = (Event *)event; delete eventObject; } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_read_buffer_event_tests.cpp000066400000000000000000000310151363734646600335760ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "gtest/gtest.h" #include using namespace NEO; typedef HelloWorldTest EnqueueReadBuffer; TEST_F(EnqueueReadBuffer, GivenPointerToEventListWhenReadingBufferThenEventIsReturned) { cl_bool blockingRead = CL_TRUE; size_t offset = 0; size_t size = sizeof(cl_float); cl_float pDestMemory[] = {0.0f, 0.0f, 0.0f, 0.0f}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); auto retVal = pCmdQ->enqueueReadBuffer( srcBuffer.get(), blockingRead, offset, size, pDestMemory, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel); // Check CL_EVENT_COMMAND_TYPE { cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_READ_BUFFER), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); } delete pEvent; } TEST_F(EnqueueReadBuffer, WhenReadingBufferThenEventReturnedShouldBeMaxOfInputEventsAndCmdQPlus1) { uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_bool blockingRead = CL_TRUE; size_t offset = 0; size_t size = sizeof(cl_float); cl_float pDestMemory[] = {0.0f, 0.0f, 0.0f, 0.0f}; cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); auto retVal = pCmdQ->enqueueReadBuffer( srcBuffer.get(), blockingRead, offset, size, pDestMemory, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u + 1u, pEvent->taskLevel); delete pEvent; } TEST_F(EnqueueReadBuffer, givenInOrderQueueAndForcedCpuCopyOnReadBufferAndDstPtrEqualSrcPtrWithEventsNotBlockedWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(1); cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_bool blockingRead = CL_TRUE; size_t size = sizeof(cl_float); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), blockingRead, 0, size, ptr, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u, pEvent->taskLevel); EXPECT_EQ(17u, pCmdQ->taskLevel); pEvent->release(); } TEST_F(EnqueueReadBuffer, givenInOrderQueueAndForcedCpuCopyOnReadBufferAndDstPtrEqualSrcPtrWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(1); cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; cl_bool blockingRead = CL_TRUE; size_t size = sizeof(cl_float); cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), blockingRead, 0, size, ptr, nullptr, 0, nullptr, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(17u, pEvent->taskLevel); EXPECT_EQ(17u, pCmdQ->taskLevel); pEvent->release(); } TEST_F(EnqueueReadBuffer, givenOutOfOrderQueueAndForcedCpuCopyOnReadBufferAndDstPtrEqualSrcPtrWithEventsNotBlockedWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(1); std::unique_ptr pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdOOQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_bool blockingRead = CL_TRUE; size_t size = sizeof(cl_float); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); retVal = pCmdOOQ->enqueueReadBuffer(srcBuffer.get(), blockingRead, 0, size, ptr, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u, pEvent->taskLevel); EXPECT_EQ(17u, pCmdOOQ->taskLevel); pEvent->release(); } TEST_F(EnqueueReadBuffer, givenInOrderQueueAndForcedCpuCopyOnReadBufferAndEventNotReadyWhenReadBufferIsExecutedThenTaskLevelShouldBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(1); cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::levelNotReady, 4); cl_bool blockingRead = CL_FALSE; size_t size = sizeof(cl_float); cl_event eventWaitList[] = {&event1}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto dstBuffer = std::unique_ptr(BufferHelper<>::create()); cl_float mem[4]; retVal = pCmdQ->enqueueReadBuffer(dstBuffer.get(), blockingRead, 0, size, mem, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(CompletionStamp::levelNotReady, pEvent->taskLevel); EXPECT_EQ(CompletionStamp::levelNotReady, pCmdQ->taskLevel); event1.taskLevel = 20; event1.setStatus(CL_COMPLETE); pEvent->updateExecutionStatus(); pCmdQ->isQueueBlocked(); pEvent->release(); } TEST_F(EnqueueReadBuffer, givenInOrderQueueAndDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(0); cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_bool blockingRead = CL_TRUE; size_t size = sizeof(cl_float); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), blockingRead, 0, size, ptr, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u, pEvent->taskLevel); EXPECT_EQ(19u, pCmdQ->taskLevel); pEvent->release(); } TEST_F(EnqueueReadBuffer, givenOutOfOrderQueueAndDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(0); std::unique_ptr pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdOOQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_bool blockingRead = CL_TRUE; size_t size = sizeof(cl_float); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); retVal = pCmdOOQ->enqueueReadBuffer(srcBuffer.get(), blockingRead, 0, size, ptr, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = castToObject(event); if (pCmdOOQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { EXPECT_EQ(taskLevelEvent2 + 1, pCmdOOQ->taskLevel); EXPECT_EQ(taskLevelEvent2 + 1, pEvent->taskLevel); } else { EXPECT_EQ(19u, pCmdOOQ->taskLevel); EXPECT_EQ(19u, pEvent->taskLevel); } pEvent->release(); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_read_buffer_fixture.h000066400000000000000000000027241363734646600323730ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" namespace NEO { struct EnqueueReadBufferTypeTest : public CommandEnqueueFixture, public ::testing::Test { EnqueueReadBufferTypeTest(void) : srcBuffer(nullptr) { } void SetUp() override { CommandEnqueueFixture::SetUp(); BufferDefaults::context = new MockContext; srcBuffer.reset(BufferHelper<>::create()); nonZeroCopyBuffer.reset(BufferHelper>::create()); } void TearDown() override { srcBuffer.reset(nullptr); nonZeroCopyBuffer.reset(nullptr); delete BufferDefaults::context; CommandEnqueueFixture::TearDown(); } protected: template void enqueueReadBuffer(cl_bool blocking = CL_TRUE) { auto retVal = EnqueueReadBufferHelper<>::enqueueReadBuffer( pCmdQ, srcBuffer.get(), blocking); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } std::unique_ptr srcBuffer; std::unique_ptr nonZeroCopyBuffer; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_fixture.h000066400000000000000000000047351363734646600334140ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/gen_common/gen_cmd_parse.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" namespace NEO { struct EnqueueReadBufferRectTest : public CommandEnqueueFixture, public ::testing::Test { void SetUp() override { CommandEnqueueFixture::SetUp(); context.reset(new MockContext(pCmdQ->getDevice().getSpecializedDevice())); BufferDefaults::context = context.get(); //For 3D hostPtr = ::alignedMalloc(slicePitch * rowPitch, 4096); auto retVal = CL_INVALID_VALUE; buffer.reset(Buffer::create( context.get(), CL_MEM_READ_WRITE, slicePitch * rowPitch, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); nonZeroCopyBuffer.reset(BufferHelper>::create()); } void TearDown() override { nonZeroCopyBuffer.reset(); buffer.reset(); ::alignedFree(hostPtr); context.reset(); CommandEnqueueFixture::TearDown(); } protected: template void enqueueReadBufferRect2D(cl_bool blocking = CL_FALSE) { typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; auto retVal = pCmdQ->enqueueReadBufferRect( buffer.get(), blocking, //non-blocking bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, hostPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } std::unique_ptr context; std::unique_ptr buffer; std::unique_ptr nonZeroCopyBuffer; void *hostPtr = nullptr; static const size_t rowPitch = 100; static const size_t slicePitch = 100 * 100; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_tests.cpp000066400000000000000000000622521363734646600334210ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/memory_manager/memory_constants.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/event/event.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_enqueue_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "test.h" #include "reg_configs_common.h" using namespace NEO; HWTEST_F(EnqueueReadBufferRectTest, GivenNullBufferWhenReadingBufferThenInvalidMemObjectErrorIsReturned) { auto retVal = CL_SUCCESS; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 0}; retVal = clEnqueueReadBufferRect( pCmdQ, nullptr, CL_FALSE, bufferOrigin, hostOrigin, region, 10, 0, 10, 0, hostPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } HWTEST_F(EnqueueReadBufferRectTest, GivenNullHostPtrWhenReadingBufferThenInvalidValueErrorIsReturned) { auto retVal = CL_SUCCESS; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 0}; retVal = clEnqueueReadBufferRect( pCmdQ, buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, 10, 0, 10, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWTEST_F(EnqueueReadBufferRectTest, GivenValidParamsWhenReadingBufferThenSuccessIsReturned) { auto retVal = CL_SUCCESS; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; retVal = clEnqueueReadBufferRect( pCmdQ, buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, 10, 0, 10, 0, hostPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueReadBufferRectTest, GivenBlockingEnqueueWhenReadingBufferThenTaskLevelIsNotIncremented) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; auto oldCsrTaskLevel = csr.peekTaskLevel(); enqueueReadBufferRect2D(CL_TRUE); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(oldCsrTaskLevel, pCmdQ->taskLevel); } HWTEST_F(EnqueueReadBufferRectTest, GivenNonBlockingEnqueueWhenReadingBufferThenTaskLevelIsIncremented) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; enqueueReadBufferRect2D(CL_FALSE); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, Given2dRegionWhenReadingBufferThenCommandsAreProgrammedCorrectly) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; enqueueReadBufferRect2D(); ASSERT_NE(cmdList.end(), itorWalker); auto *cmd = (GPGPU_WALKER *)*itorWalker; // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_EQ(1u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueReadBufferRectTest, WhenReadingBufferThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; enqueueReadBufferRect2D(); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueReadBufferRectTest, WhenReadingBufferThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); enqueueReadBufferRect2D(); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, WhenReadingBufferThenIndirectDataIsAdded) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); enqueueReadBufferRect2D(); // Extract the kernel used MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferRect, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = buffer.get(); dc.dstPtr = hostPtr; dc.srcOffset = {0, 0, 0}; dc.dstOffset = {0, 0, 0}; dc.size = {50, 50, 1}; dc.srcRowPitch = rowPitch; dc.srcSlicePitch = slicePitch; dc.dstRowPitch = rowPitch; dc.dstSlicePitch = slicePitch; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); EXPECT_NE(dshBefore, pDSH->getUsed()); EXPECT_NE(iohBefore, pIOH->getUsed()); if (kernel->requiresSshForBuffers()) { EXPECT_NE(sshBefore, pSSH->getUsed()); } } HWTEST_F(EnqueueReadBufferRectTest, WhenReadingBufferThenL3ProgrammingIsCorrect) { enqueueReadBufferRect2D(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, When2DEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueReadBufferRect2D(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, WhenReadingBufferThenMediaInterfaceDescriptorIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueReadBufferRect2D(); // All state should be programmed before walker auto itorCmd = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorCmd; // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorCmd); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, WhenReadingBufferThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueReadBufferRect2D(); // Extract the MIDL command auto itorCmd = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmdMIDL = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorCmd; // Extract the SBA command itorCmd = find(cmdList.begin(), itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmdSBA = (STATE_BASE_ADDRESS *)*itorCmd; // Extrach the DSH auto DSH = cmdSBA->getDynamicStateBaseAddress(); ASSERT_NE(0u, DSH); // IDD should be located within DSH auto iddStart = cmdMIDL->getInterfaceDescriptorDataStartAddress(); auto IDDEnd = iddStart + cmdMIDL->getInterfaceDescriptorTotalLength(); ASSERT_LE(IDDEnd, cmdSBA->getDynamicStateBufferSize() * MemoryConstants::pageSize); auto &IDD = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, IDD.getConstantIndirectUrbEntryReadLength()); } HWTEST_F(EnqueueReadBufferRectTest, WhenReadingBufferThenOnePipelineSelectIsProgrammed) { enqueueReadBufferRect2D(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, WhenReadingBufferThenMediaVfeStateIsCorrect) { enqueueReadBufferRect2D(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferRectTest, GivenBlockingEnqueueWhenReadingBufferThenPipeControlIsProgrammedAfterWalkerWithDcFlushSet) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; auto blocking = CL_TRUE; enqueueReadBufferRect2D(blocking); auto itorWalker = find(cmdList.begin(), cmdList.end()); // All state should be programmed after walker auto itorCmd = find(itorWalker, cmdList.end()); auto *cmd = (PIPE_CONTROL *)*itorCmd; EXPECT_NE(cmdList.end(), itorCmd); if (UnitTestHelper::isPipeControlWArequired(pDevice->getHardwareInfo())) { // SKL: two PIPE_CONTROLs following GPGPU_WALKER: first has DcFlush and second has Write HwTag EXPECT_FALSE(cmd->getDcFlushEnable()); // Move to next PPC auto itorCmdP = ++((GenCmdList::iterator)itorCmd); EXPECT_NE(cmdList.end(), itorCmdP); auto itorCmd2 = find(itorCmdP, cmdList.end()); cmd = (PIPE_CONTROL *)*itorCmd2; EXPECT_TRUE(cmd->getDcFlushEnable()); } else { // single PIPE_CONTROL following GPGPU_WALKER has DcFlush and Write HwTag EXPECT_TRUE(cmd->getDcFlushEnable()); } } HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndDstPtrEqualSrcPtrWithEventsWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; void *ptr = buffer->getCpuAddressForMemoryTransfer(); retVal = pCmdQ->enqueueReadBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, numEventsInWaitList, eventWaitList, &event); ; EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u, pEvent->taskLevel); EXPECT_EQ(19u, pCmdQ->taskLevel); EXPECT_EQ(CL_COMMAND_READ_BUFFER_RECT, (const int)pEvent->getCommandType()); pEvent->release(); } HWTEST_F(EnqueueReadBufferRectTest, givenOutOfOrderQueueAndDstPtrEqualSrcPtrWithEventsWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; std::unique_ptr pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); uint32_t taskLevelCmdQ = 17; pCmdOOQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; void *ptr = buffer->getCpuAddressForMemoryTransfer(); retVal = pCmdOOQ->enqueueReadBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u, pEvent->taskLevel); EXPECT_EQ(19u, pCmdOOQ->taskLevel); EXPECT_EQ(CL_COMMAND_READ_BUFFER_RECT, (const int)pEvent->getCommandType()); pEvent->release(); } HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndRowPitchEqualZeroAndDstPtrEqualSrcPtrWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; void *ptr = buffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; retVal = pCmdQ->enqueueReadBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, 0, slicePitch, 0, slicePitch, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndSlicePitchEqualZeroAndDstPtrEqualSrcPtrWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; void *ptr = buffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; retVal = pCmdQ->enqueueReadBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, 0, rowPitch, 0, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndMemObjWithOffsetPointTheSameStorageWithHostWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; void *ptr = buffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {50, 50, 0}; size_t hostOrigin[] = {20, 20, 0}; size_t region[] = {50, 50, 1}; size_t hostOffset = (bufferOrigin[2] - hostOrigin[2]) * slicePitch + (bufferOrigin[1] - hostOrigin[1]) * rowPitch + (bufferOrigin[0] - hostOrigin[0]); auto hostStorage = ptrOffset(ptr, hostOffset); retVal = pCmdQ->enqueueReadBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, hostStorage, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndMemObjWithOffsetPointDiffrentStorageWithHostWhenReadBufferIsExecutedThenTaskLevelShouldBeIncreased) { cl_int retVal = CL_SUCCESS; void *ptr = buffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {1, 1, 0}; size_t region[] = {1, 1, 1}; retVal = pCmdQ->enqueueReadBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 1u); } HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndDstPtrEqualSrcPtrAndNonZeroCopyBufferWhenReadBufferIsExecutedThenTaskLevelShouldBeIncreased) { cl_int retVal = CL_SUCCESS; void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; retVal = pCmdQ->enqueueReadBufferRect( nonZeroCopyBuffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 1u); } HWTEST_F(EnqueueReadWriteBufferRectDispatch, givenOffsetResultingInMisalignedPtrWhenEnqueueReadBufferRectForNon3DCaseIsCalledThenAddressInStateBaseAddressIsAlignedAndMatchesKernelDispatchInfoParams) { hwInfo->capabilityTable.blitterOperationsSupported = false; initializeFixture(); if (device->areSharedSystemAllocationsAllowed()) { GTEST_SKIP(); } auto cmdQ = std::make_unique>(context.get(), device.get(), &properties); buffer->forceDisallowCPUCopy = true; Vec3 hostOffset(hostOrigin); auto misalignedDstPtr = ptrOffset(reinterpret_cast(memory), hostOffset.z * hostSlicePitch); auto retVal = cmdQ->enqueueReadBufferRect(buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, memory, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(0u, cmdQ->lastEnqueuedKernels.size()); Kernel *kernel = cmdQ->lastEnqueuedKernels[0]; cmdQ->finish(); parseCommands(*cmdQ); if (hwInfo->capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress) { const auto &surfaceStateDst = getSurfaceState(&cmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), 1); if (kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) { auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData() + kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(reinterpret_cast(alignDown(misalignedDstPtr, 4)), *pKernelArg); EXPECT_EQ(*pKernelArg, surfaceStateDst.getSurfaceBaseAddress()); } else if (kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) { auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData() + kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(reinterpret_cast(alignDown(misalignedDstPtr, 4)), static_cast(*pKernelArg)); EXPECT_EQ(static_cast(*pKernelArg), surfaceStateDst.getSurfaceBaseAddress()); } } if (kernel->getKernelInfo().kernelArgInfo[3].kernelArgPatchInfoVector[0].size == 4 * sizeof(uint32_t)) { // size of uint4 DstOrigin auto dstOffset = (uint32_t *)(kernel->getCrossThreadData() + kernel->getKernelInfo().kernelArgInfo[3].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(hostOffset.x + ptrDiff(misalignedDstPtr, alignDown(misalignedDstPtr, 4)), *dstOffset); } else { // DstOrigin arg should be 16 bytes in size, if that changes, above if path should be modified EXPECT_TRUE(false); } } using NegativeFailAllocationTest = Test; HWTEST_F(NegativeFailAllocationTest, givenEnqueueReadBufferRectWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) { cl_int retVal = CL_SUCCESS; constexpr size_t rowPitch = 100; constexpr size_t slicePitch = 100 * 100; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; retVal = pCmdQ->enqueueReadBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); } struct EnqueueReadBufferRectHw : public ::testing::Test { void SetUp() override { if (is32bit) { GTEST_SKIP(); } device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context.reset(new MockContext(device.get())); } std::unique_ptr device; std::unique_ptr context; MockBuffer srcBuffer; size_t bufferOrigin[3] = {0, 0, 0}; size_t hostOrigin[3] = {0, 0, 0}; size_t region[3] = {1, 1, 1}; size_t bufferRowPitch = 10; size_t bufferSlicePitch = 0; size_t hostRowPitch = 10; size_t hostSlicePitch = 10; uint64_t bigSize = 4ull * MemoryConstants::gigaByte; uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1; }; using EnqeueReadBufferRectStatelessTest = EnqueueReadBufferRectHw; HWTEST_F(EnqeueReadBufferRectStatelessTest, WhenReadingBufferRectStatelessThenSuccessIsReturned) { auto pCmdQ = std::make_unique>(context.get(), device.get()); void *missAlignedPtr = reinterpret_cast(0x1041); srcBuffer.size = static_cast(bigSize); auto retVal = pCmdQ->enqueueReadBufferRect(&srcBuffer, CL_FALSE, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, missAlignedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } using EnqeueReadBufferRectStatefulTest = EnqueueReadBufferRectHw; HWTEST_F(EnqeueReadBufferRectStatefulTest, WhenReadingBufferRectStatefulThenSuccessIsReturned) { auto pCmdQ = std::make_unique>(context.get(), device.get()); void *missAlignedPtr = reinterpret_cast(0x1041); srcBuffer.size = static_cast(smallSize); auto retVal = pCmdQ->enqueueReadBufferRect(&srcBuffer, CL_FALSE, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, missAlignedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_read_buffer_tests.cpp000066400000000000000000001004701363734646600323770ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/cache_policy.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_read_buffer_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "test.h" #include "reg_configs_common.h" using namespace NEO; HWTEST_F(EnqueueReadBufferTypeTest, null_mem_object) { auto data = 1; auto retVal = clEnqueueReadBuffer( pCmdQ, nullptr, false, 0, sizeof(data), &data, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } HWTEST_F(EnqueueReadBufferTypeTest, null_user_pointer) { auto data = 1; auto retVal = clEnqueueReadBuffer( pCmdQ, srcBuffer.get(), false, 0, sizeof(data), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferTypeTest, GPGPUWalker) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; srcBuffer->forceDisallowCPUCopy = true; enqueueReadBuffer(); ASSERT_NE(cmdList.end(), itorWalker); auto *cmd = (GPGPU_WALKER *)*itorWalker; // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueReadBufferTypeTest, bumpsTaskLevel) { auto taskLevelBefore = pCmdQ->taskLevel; srcBuffer->forceDisallowCPUCopy = true; EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, srcBuffer.get(), CL_TRUE); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueReadBufferTypeTest, alignsToCSR_Blocking) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; auto oldCsrTaskLevel = csr.peekTaskLevel(); srcBuffer->forceDisallowCPUCopy = true; EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, srcBuffer.get(), CL_TRUE); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(oldCsrTaskLevel, pCmdQ->taskLevel); } HWTEST_F(EnqueueReadBufferTypeTest, alignsToCSR_NonBlocking) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, srcBuffer.get(), CL_FALSE); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWTEST_F(EnqueueReadBufferTypeTest, addsCommands) { auto usedCmdBufferBefore = pCS->getUsed(); srcBuffer->forceDisallowCPUCopy = true; EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, srcBuffer.get(), CL_TRUE); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWTEST_F(EnqueueReadBufferTypeTest, addsIndirectData) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); srcBuffer->forceDisallowCPUCopy = true; EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, srcBuffer.get(), CL_TRUE); MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.dstPtr = EnqueueReadBufferTraits::hostPtr; dc.srcMemObj = srcBuffer.get(); dc.srcOffset = {EnqueueReadBufferTraits::offset, 0, 0}; dc.size = {srcBuffer->getSize(), 0, 0}; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernel)); EXPECT_NE(iohBefore, pIOH->getUsed()); if (kernel->requiresSshForBuffers()) { EXPECT_NE(sshBefore, pSSH->getUsed()); } } HWTEST_F(EnqueueReadBufferTypeTest, LoadRegisterImmediateL3CNTLREG) { srcBuffer->forceDisallowCPUCopy = true; enqueueReadBuffer(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferTypeTest, WhenEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { srcBuffer->forceDisallowCPUCopy = true; enqueueReadBuffer(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferTypeTest, MediaInterfaceDescriptorLoad) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; srcBuffer->forceDisallowCPUCopy = true; enqueueReadBuffer(); // All state should be programmed before walker auto itorCmd = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorCmd; // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorCmd); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferTypeTest, InterfaceDescriptorData) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; srcBuffer->forceDisallowCPUCopy = true; enqueueReadBuffer(); // Extract the MIDL command auto itorCmd = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmdMIDL = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorCmd; // Extract the SBA command itorCmd = find(cmdList.begin(), itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmdSBA = (STATE_BASE_ADDRESS *)*itorCmd; // Extrach the DSH auto DSH = cmdSBA->getDynamicStateBaseAddress(); ASSERT_NE(0u, DSH); // IDD should be located within DSH auto iddStart = cmdMIDL->getInterfaceDescriptorDataStartAddress(); auto IDDEnd = iddStart + cmdMIDL->getInterfaceDescriptorTotalLength(); ASSERT_LE(IDDEnd, cmdSBA->getDynamicStateBufferSize() * MemoryConstants::pageSize); auto &IDD = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, IDD.getConstantIndirectUrbEntryReadLength()); } HWTEST_F(EnqueueReadBufferTypeTest, PipelineSelect) { srcBuffer->forceDisallowCPUCopy = true; enqueueReadBuffer(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferTypeTest, MediaVFEState) { srcBuffer->forceDisallowCPUCopy = true; enqueueReadBuffer(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadBufferTypeTest, blockingRequiresPipeControlAfterWalkerWithDCFlushSet) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; srcBuffer->forceDisallowCPUCopy = true; enqueueReadBuffer(CL_TRUE); // All state should be programmed after walker auto itorWalker = find(cmdList.begin(), cmdList.end()); auto itorCmd = find(itorWalker, cmdList.end()); auto *cmd = (PIPE_CONTROL *)*itorCmd; EXPECT_NE(cmdList.end(), itorCmd); if (UnitTestHelper::isPipeControlWArequired(pDevice->getHardwareInfo())) { // SKL: two PIPE_CONTROLs following GPGPU_WALKER: first has DcFlush and second has Write HwTag EXPECT_FALSE(cmd->getDcFlushEnable()); // Move to next PPC auto itorCmdP = ++((GenCmdList::iterator)itorCmd); EXPECT_NE(cmdList.end(), itorCmdP); auto itorCmd2 = find(itorCmdP, cmdList.end()); cmd = (PIPE_CONTROL *)*itorCmd2; EXPECT_TRUE(cmd->getDcFlushEnable()); } else { // BDW: single PIPE_CONTROL following GPGPU_WALKER has DcFlush and Write HwTag EXPECT_TRUE(cmd->getDcFlushEnable()); } } HWTEST_F(EnqueueReadBufferTypeTest, givenAlignedPointerAndAlignedSizeWhenReadBufferIsCalledThenRecordedL3IndexIsL3OrL1ON) { void *ptr = (void *)0x1040; cl_int retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto &csr = pDevice->getUltCommandStreamReceiver(); auto gmmHelper = pDevice->getGmmHelper(); auto mocsIndexL3on = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1; auto mocsIndexL1on = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST) >> 1; EXPECT_TRUE(mocsIndexL3on == csr.latestSentStatelessMocsConfig || mocsIndexL1on == csr.latestSentStatelessMocsConfig); } HWTEST_F(EnqueueReadBufferTypeTest, givenNotAlignedPointerAndAlignedSizeWhenReadBufferIsCalledThenRecordedL3IndexIsL3Off) { void *ptr = (void *)0x1039; cl_int retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto &csr = pDevice->getUltCommandStreamReceiver(); auto gmmHelper = pDevice->getGmmHelper(); auto mocsIndexL3off = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1; auto mocsIndexL3on = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1; auto mocsIndexL1on = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST) >> 1; EXPECT_EQ(mocsIndexL3off, csr.latestSentStatelessMocsConfig); void *ptr2 = (void *)0x1040; retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr2, nullptr, 0, nullptr, nullptr); EXPECT_TRUE(mocsIndexL3on == csr.latestSentStatelessMocsConfig || mocsIndexL1on == csr.latestSentStatelessMocsConfig); } HWTEST_F(EnqueueReadBufferTypeTest, givenNotAlignedPointerAndSizeWhenBlockedReadBufferIsCalledThenRecordedL3IndexIsL3Off) { auto ptr = reinterpret_cast(0x1039); auto userEvent = clCreateUserEvent(pCmdQ->getContextPtr(), nullptr); cl_int retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 1, &userEvent, nullptr); clSetUserEventStatus(userEvent, 0u); EXPECT_EQ(CL_SUCCESS, retVal); auto &csr = pDevice->getUltCommandStreamReceiver(); auto gmmHelper = pDevice->getGmmHelper(); auto mocsIndexL3off = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1; EXPECT_EQ(mocsIndexL3off, csr.latestSentStatelessMocsConfig); clReleaseEvent(userEvent); } HWTEST_F(EnqueueReadBufferTypeTest, givenOOQWithEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrAndZeroCopyBufferWhenReadBufferIsExecutedThenTaskLevelNotIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(1); cl_int retVal = CL_SUCCESS; std::unique_ptr pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); EXPECT_EQ(retVal, CL_SUCCESS); retVal = pCmdOOQ->enqueueReadBuffer(srcBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdOOQ->taskLevel, 0u); } HWTEST_F(EnqueueReadBufferTypeTest, givenOOQWithDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrAndZeroCopyBufferWhenReadBufferIsExecutedThenTaskLevelNotIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(0); cl_int retVal = CL_SUCCESS; std::unique_ptr pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); EXPECT_EQ(retVal, CL_SUCCESS); retVal = pCmdOOQ->enqueueReadBuffer(srcBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdOOQ->taskLevel, 0u); } HWTEST_F(EnqueueReadBufferTypeTest, givenInOrderQueueAndEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrAndZeroCopyBufferWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(1); cl_int retVal = CL_SUCCESS; void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); EXPECT_EQ(retVal, CL_SUCCESS); retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueReadBufferTypeTest, givenInOrderQueueAndDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrAndZeroCopyBufferWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(0); cl_int retVal = CL_SUCCESS; void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); EXPECT_EQ(retVal, CL_SUCCESS); retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueReadBufferTypeTest, givenInOrderQueueAndDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrAndNonZeroCopyBufferWhenReadBufferIsExecutedThenTaskLevelShouldBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(0); cl_int retVal = CL_SUCCESS; void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer(); EXPECT_EQ(retVal, CL_SUCCESS); retVal = pCmdQ->enqueueReadBuffer(nonZeroCopyBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 1u); } HWTEST_F(EnqueueReadBufferTypeTest, givenInOrderQueueAndEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrAndNonZeroCopyWhenReadBufferIsExecutedThenTaskLevelShouldBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(1); cl_int retVal = CL_SUCCESS; void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer(); EXPECT_EQ(retVal, CL_SUCCESS); retVal = pCmdQ->enqueueReadBuffer(nonZeroCopyBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 1u); } HWTEST_F(EnqueueReadBufferTypeTest, givenCommandQueueWhenEnqueueReadBufferIsCalledThenItCallsNotifyFunction) { auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer(); auto retVal = mockCmdQ->enqueueReadBuffer(srcBuffer.get(), CL_TRUE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(mockCmdQ->notifyEnqueueReadBufferCalled); } HWTEST_F(EnqueueReadBufferTypeTest, givenCommandQueueWhenEnqueueReadBufferWithMapAllocationIsCalledThenItDoesntCallNotifyFunction) { auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer(); GraphicsAllocation mapAllocation{0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, 0, 0, 0, MemoryPool::MemoryNull}; auto retVal = mockCmdQ->enqueueReadBuffer(srcBuffer.get(), CL_TRUE, 0, MemoryConstants::cacheLineSize, ptr, &mapAllocation, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(mockCmdQ->notifyEnqueueReadBufferCalled); } HWTEST_F(EnqueueReadBufferTypeTest, givenEnqueueReadBufferCalledWhenLockedPtrInTransferPropertisIsAvailableThenItIsNotUnlocked) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(1); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, true, executionEnvironment); MockContext ctx; cl_int retVal; ctx.memoryManager = &memoryManager; auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); std::unique_ptr buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal)); static_cast(buffer->getGraphicsAllocation())->overrideMemoryPool(MemoryPool::SystemCpuInaccessible); void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer(); retVal = mockCmdQ->enqueueReadBuffer(buffer.get(), CL_TRUE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, memoryManager.unlockResourceCalled); } HWTEST_F(EnqueueReadBufferTypeTest, givenForcedCpuCopyWhenEnqueueReadCompressedBufferThenDontCopyOnCpu) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(1); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, true, executionEnvironment); MockContext ctx; cl_int retVal; ctx.memoryManager = &memoryManager; auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); std::unique_ptr buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal)); static_cast(buffer->getGraphicsAllocation())->overrideMemoryPool(MemoryPool::SystemCpuInaccessible); void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer(); buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); retVal = mockCmdQ->enqueueReadBuffer(buffer.get(), CL_TRUE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(buffer->getGraphicsAllocation()->isLocked()); EXPECT_FALSE(mockCmdQ->cpuDataTransferHandlerCalled); buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER); retVal = mockCmdQ->enqueueReadBuffer(buffer.get(), CL_TRUE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(buffer->getGraphicsAllocation()->isLocked()); EXPECT_TRUE(mockCmdQ->cpuDataTransferHandlerCalled); } HWTEST_F(EnqueueReadBufferTypeTest, gicenEnqueueReadBufferCalledWhenLockedPtrInTransferPropertisIsNotAvailableThenItIsNotUnlocked) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnReadBuffer.set(1); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, true, executionEnvironment); MockContext ctx; cl_int retVal; ctx.memoryManager = &memoryManager; auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); std::unique_ptr buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal)); static_cast(buffer->getGraphicsAllocation())->overrideMemoryPool(MemoryPool::System4KBPages); void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer(); retVal = mockCmdQ->enqueueReadBuffer(buffer.get(), CL_TRUE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, memoryManager.unlockResourceCalled); } HWTEST_F(EnqueueReadBufferTypeTest, givenEnqueueReadBufferBlockingWhenAUBDumpAllocsOnEnqueueReadOnlyIsOnThenBufferShouldBeSetDumpable) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.set(true); ASSERT_FALSE(srcBuffer->getGraphicsAllocation()->isAllocDumpable()); cl_int retVal = CL_SUCCESS; void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer(); retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), CL_TRUE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(srcBuffer->getGraphicsAllocation()->isAllocDumpable()); EXPECT_TRUE(srcBuffer->forceDisallowCPUCopy); } HWTEST_F(EnqueueReadBufferTypeTest, givenEnqueueReadBufferNonBlockingWhenAUBDumpAllocsOnEnqueueReadOnlyIsOnThenBufferShouldntBeSetDumpable) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.set(true); ASSERT_FALSE(srcBuffer->getGraphicsAllocation()->isAllocDumpable()); cl_int retVal = CL_SUCCESS; void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer(); retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(srcBuffer->getGraphicsAllocation()->isAllocDumpable()); EXPECT_FALSE(srcBuffer->forceDisallowCPUCopy); } using NegativeFailAllocationTest = Test; HWTEST_F(NegativeFailAllocationTest, givenEnqueueReadBufferWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) { cl_int retVal = CL_SUCCESS; retVal = pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); } struct EnqueueReadBufferHw : public ::testing::Test { void SetUp() override { if (is32bit) { GTEST_SKIP(); } device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context.reset(new MockContext(device.get())); } std::unique_ptr device; std::unique_ptr context; MockBuffer srcBuffer; uint64_t bigSize = 4ull * MemoryConstants::gigaByte; uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1; }; using EnqeueReadBufferStatelessTest = EnqueueReadBufferHw; HWTEST_F(EnqeueReadBufferStatelessTest, WhenReadingBufferStatelessThenSuccessIsReturned) { auto pCmdQ = std::make_unique>(context.get(), device.get()); void *missAlignedPtr = reinterpret_cast(0x1041); srcBuffer.size = static_cast(bigSize); auto retVal = pCmdQ->enqueueReadBuffer(&srcBuffer, CL_FALSE, 0, MemoryConstants::cacheLineSize, missAlignedPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } using EnqeueReadBufferStatefulTest = EnqueueReadBufferHw; HWTEST_F(EnqeueReadBufferStatefulTest, WhenReadingBufferStatefulThenSuccessIsReturned) { auto pCmdQ = std::make_unique>(context.get(), device.get()); void *missAlignedPtr = reinterpret_cast(0x1041); srcBuffer.size = static_cast(smallSize); auto retVal = pCmdQ->enqueueReadBuffer(&srcBuffer, CL_FALSE, 0, MemoryConstants::cacheLineSize, missAlignedPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_read_image_fixture.h000066400000000000000000000034571363734646600322100ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/ptr_math.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" namespace NEO { struct EnqueueReadImageTest : public CommandEnqueueFixture, public ::testing::Test { typedef CommandQueueHwFixture CommandQueueFixture; using CommandQueueHwFixture::pCmdQ; void SetUp(void) override { CommandEnqueueFixture::SetUp(); context = new MockContext(pClDevice); srcImage = Image2dHelper<>::create(context); const auto &imageDesc = srcImage->getImageDesc(); dstPtr = new float[imageDesc.image_width * imageDesc.image_height]; } void TearDown(void) override { delete srcImage; delete[] dstPtr; delete context; CommandEnqueueFixture::TearDown(); } protected: template void enqueueReadImage(cl_bool blocking = EnqueueReadImageTraits::blocking) { auto retVal = EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, srcImage, blocking); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } float *dstPtr = nullptr; Image *srcImage = nullptr; MockContext *context = nullptr; }; struct EnqueueReadImageMipMapTest : public EnqueueReadImageTest, public ::testing::WithParamInterface { }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp000066400000000000000000000653161363734646600322210ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/allocations_list.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/test/unit_test/command_queue/enqueue_read_image_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mocks/mock_builtin_dispatch_info_builder.h" #include "opencl/test/unit_test/mocks/mock_builtins.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "test.h" #include "reg_configs_common.h" using namespace NEO; HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadImageTest, WhenReadingImageThenGpgpuWalkerIsProgrammedCorrectly) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; enqueueReadImage(); auto *cmd = reinterpret_cast(cmdWalker); ASSERT_NE(nullptr, cmd); // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueReadImageTest, GivenBlockingEnqueueWhenReadingImageThenTaskLevelIsNotIncremented) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; auto oldCsrTaskLevel = csr.peekTaskLevel(); EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, srcImage, CL_TRUE); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(oldCsrTaskLevel, pCmdQ->taskLevel); } HWTEST_F(EnqueueReadImageTest, GivenNonBlockingEnqueueWhenReadingImageThenTaskLevelIsIncremented) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, srcImage, CL_FALSE); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWTEST_F(EnqueueReadImageTest, WhenReadingImageThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, srcImage, EnqueueReadImageTraits::blocking); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueReadImageTest, WhenReadingImageThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, srcImage, EnqueueReadImageTraits::blocking); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWTEST_F(EnqueueReadImageTest, WhenReadingImageThenIndirectDataIsAdded) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, srcImage, EnqueueReadImageTraits::blocking); EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr)); EXPECT_NE(iohBefore, pIOH->getUsed()); EXPECT_NE(sshBefore, pSSH->getUsed()); } HWTEST_F(EnqueueReadImageTest, WhenReadingImageThenL3ProgrammingIsCorrect) { enqueueReadImage(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadImageTest, WhenEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueReadImage(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadImageTest, WhenReadingImageThenMediaInterfaceDescriptorIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueReadImage(); // All state should be programmed before walker auto cmd = reinterpret_cast(cmdMediaInterfaceDescriptorLoad); ASSERT_NE(nullptr, cmd); // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorMediaInterfaceDescriptorLoad); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadImageTest, WhenReadingImageThenInterfaceDescriptorData) { typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueReadImage(); // Extract the interfaceDescriptorData auto cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress; auto &interfaceDescriptorData = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)interfaceDescriptorData.getKernelStartPointerHigh() << 32) + interfaceDescriptorData.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); auto localWorkSize = 4u; auto simd = 32u; auto threadsPerThreadGroup = Math::divideAndRoundUp(localWorkSize, simd); EXPECT_EQ(threadsPerThreadGroup, interfaceDescriptorData.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, interfaceDescriptorData.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, interfaceDescriptorData.getConstantIndirectUrbEntryReadLength()); // We shouldn't have these pointers the same. EXPECT_NE(kernelStartPointer, interfaceDescriptorData.getBindingTablePointer()); } HWTEST_F(EnqueueReadImageTest, WhenReadingImageThenSurfaceStateIsCorrect) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; enqueueReadImage(); // BufferToImage kernel uses BTI=1 for destSurface uint32_t bindingTableIndex = 0; const auto &surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), bindingTableIndex); // EnqueueReadImage uses multi-byte copies depending on per-pixel-size-in-bytes const auto &imageDesc = srcImage->getImageDesc(); EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth()); EXPECT_EQ(imageDesc.image_height, surfaceState.getHeight()); EXPECT_NE(0u, surfaceState.getSurfacePitch()); EXPECT_NE(0u, surfaceState.getSurfaceType()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_FORMAT_R32_UINT, surfaceState.getSurfaceFormat()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_4, surfaceState.getSurfaceHorizontalAlignment()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState.getSurfaceVerticalAlignment()); EXPECT_EQ(srcImage->getGraphicsAllocation()->getGpuAddress(), surfaceState.getSurfaceBaseAddress()); } HWTEST_F(EnqueueReadImageTest, WhenReadingImageThenPipelineSelectIsProgrammed) { enqueueReadImage(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadImageTest, WhenReadingImageThenMediaVfeStateIsCorrect) { enqueueReadImage(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueReadImageTest, GivenBlockingEnqueueWhenReadingImageThenPipeControlWithDcFlushIsSetAfterWalker) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; bool blocking = true; enqueueReadImage(blocking); auto itorWalker = find(cmdList.begin(), cmdList.end()); auto itorCmd = find(itorWalker, cmdList.end()); auto *cmd = (PIPE_CONTROL *)*itorCmd; EXPECT_NE(cmdList.end(), itorCmd); if (UnitTestHelper::isPipeControlWArequired(pDevice->getHardwareInfo())) { // SKL: two PIPE_CONTROLs following GPGPU_WALKER: first has DcFlush and second has Write HwTag EXPECT_FALSE(cmd->getDcFlushEnable()); // Move to next PPC auto itorCmdP = ++((GenCmdList::iterator)itorCmd); EXPECT_NE(cmdList.end(), itorCmdP); auto itorCmd2 = find(itorCmdP, cmdList.end()); cmd = (PIPE_CONTROL *)*itorCmd2; EXPECT_TRUE(cmd->getDcFlushEnable()); } else { // BDW: single PIPE_CONTROL following GPGPU_WALKER has DcFlush and Write HwTag EXPECT_TRUE(cmd->getDcFlushEnable()); } } HWTEST_F(EnqueueReadImageTest, GivenImage1DarrayWhenReadImageIsCalledThenHostPtrSizeIsCalculatedProperly) { auto srcImage = Image1dArrayHelper<>::create(context); auto imageDesc = srcImage->getImageDesc(); auto imageSize = imageDesc.image_width * imageDesc.image_array_size * 4; size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_array_size, 1}; EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, srcImage, CL_FALSE, origin, region); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto temporaryAllocation = csr.getTemporaryAllocations().peekHead(); ASSERT_NE(nullptr, temporaryAllocation); EXPECT_EQ(temporaryAllocation->getUnderlyingBufferSize(), imageSize); delete srcImage; } HWTEST_F(EnqueueReadImageTest, GivenImage1DarrayWhenReadImageIsCalledThenRowPitchIsSetToSlicePitch) { auto builtIns = new MockBuiltins(); pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()]->builtins.reset(builtIns); EBuiltInOps::Type copyBuiltIn = EBuiltInOps::CopyImage3dToBuffer; auto &origBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( copyBuiltIn, pCmdQ->getDevice()); // substitute original builder with mock builder auto oldBuilder = builtIns->setBuiltinDispatchInfoBuilder( copyBuiltIn, pCmdQ->getContext(), pCmdQ->getDevice(), std::unique_ptr(new MockBuiltinDispatchInfoBuilder(*builtIns, &origBuilder))); auto srcImage = Image1dArrayHelper<>::create(context); auto imageDesc = srcImage->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_array_size, 1}; size_t rowPitch = 64; size_t slicePitch = 128; EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, srcImage, CL_TRUE, origin, region, rowPitch, slicePitch); auto &mockBuilder = static_cast(BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(copyBuiltIn, pCmdQ->getDevice())); auto params = mockBuilder.getBuiltinOpParams(); EXPECT_EQ(params->srcRowPitch, slicePitch); // restore original builder and retrieve mock builder auto newBuilder = builtIns->setBuiltinDispatchInfoBuilder( copyBuiltIn, pCmdQ->getContext(), pCmdQ->getDevice(), std::move(oldBuilder)); EXPECT_NE(nullptr, newBuilder); delete srcImage; } HWTEST_F(EnqueueReadImageTest, GivenImage2DarrayWhenReadImageIsCalledThenHostPtrSizeIsCalculatedProperly) { auto srcImage = Image2dArrayHelper<>::create(context); auto imageDesc = srcImage->getImageDesc(); auto imageSize = imageDesc.image_width * imageDesc.image_height * imageDesc.image_array_size * 4; size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, imageDesc.image_array_size}; EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, srcImage, CL_FALSE, origin, region); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto temporaryAllocation = csr.getTemporaryAllocations().peekHead(); ASSERT_NE(nullptr, temporaryAllocation); EXPECT_EQ(temporaryAllocation->getUnderlyingBufferSize(), imageSize); delete srcImage; } HWTEST_F(EnqueueReadImageTest, GivenImage1DAndImageShareTheSameStorageWithHostPtrWhenReadReadImageIsCalledThenImageIsNotRead) { cl_int retVal = CL_SUCCESS; std::unique_ptr dstImage2(Image1dHelper<>::create(context)); auto imageDesc = dstImage2->getImageDesc(); std::unique_ptr pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, 1, 1}; void *ptr = dstImage2->getCpuAddressForMemoryTransfer(); size_t rowPitch = dstImage2->getHostPtrRowPitch(); size_t slicePitch = dstImage2->getHostPtrSlicePitch(); retVal = pCmdOOQ->enqueueReadImage(dstImage2.get(), CL_FALSE, origin, region, rowPitch, slicePitch, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdOOQ->taskLevel, 0u); } HWTEST_F(EnqueueReadImageTest, GivenImage1DArrayAndImageShareTheSameStorageWithHostPtrWhenReadReadImageIsCalledThenImageIsNotRead) { cl_int retVal = CL_SUCCESS; std::unique_ptr dstImage2(Image1dArrayHelper<>::create(context)); auto imageDesc = dstImage2->getImageDesc(); size_t origin[] = {imageDesc.image_width / 2, imageDesc.image_array_size / 2, 0}; size_t region[] = {imageDesc.image_width - (imageDesc.image_width / 2), imageDesc.image_array_size - (imageDesc.image_array_size / 2), 1}; void *ptr = dstImage2->getCpuAddressForMemoryTransfer(); auto bytesPerPixel = 4; size_t rowPitch = dstImage2->getHostPtrRowPitch(); size_t slicePitch = dstImage2->getHostPtrSlicePitch(); auto pOffset = origin[2] * rowPitch + origin[1] * slicePitch + origin[0] * bytesPerPixel; void *ptrStorage = ptrOffset(ptr, pOffset); retVal = pCmdQ->enqueueReadImage(dstImage2.get(), CL_FALSE, origin, region, rowPitch, slicePitch, ptrStorage, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueReadImageTest, GivenSharedContextZeroCopy2DImageWhenEnqueueReadImageWithMappedPointerIsCalledThenImageIsNotRead) { cl_int retVal = CL_SUCCESS; context->isSharedContext = true; std::unique_ptr dstImage(ImageHelper>::create(context)); EXPECT_TRUE(dstImage->isMemObjZeroCopy()); auto imageDesc = dstImage->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, 1}; void *ptr = dstImage->getCpuAddressForMemoryTransfer(); size_t rowPitch = dstImage->getHostPtrRowPitch(); size_t slicePitch = dstImage->getHostPtrSlicePitch(); retVal = pCmdQ->enqueueReadImage(dstImage.get(), CL_FALSE, origin, region, rowPitch, slicePitch, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueReadImageTest, GivenImage1DThatIsZeroCopyWhenReadImageWithTheSamePointerAndOutputEventIsPassedThenEventHasCorrectCommandTypeSet) { cl_int retVal = CL_SUCCESS; std::unique_ptr dstImage(Image1dHelper<>::create(context)); auto imageDesc = dstImage->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, 1}; void *ptr = dstImage->getCpuAddressForMemoryTransfer(); size_t rowPitch = dstImage->getHostPtrRowPitch(); size_t slicePitch = dstImage->getHostPtrSlicePitch(); cl_uint numEventsInWaitList = 0; cl_event event = nullptr; retVal = pCmdQ->enqueueReadImage(dstImage.get(), CL_FALSE, origin, region, rowPitch, slicePitch, ptr, nullptr, numEventsInWaitList, nullptr, &event); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = static_cast(event); EXPECT_EQ(static_cast(CL_COMMAND_READ_IMAGE), pEvent->getCommandType()); pEvent->release(); } HWTEST_F(EnqueueReadImageTest, givenCommandQueueWhenEnqueueReadImageIsCalledThenItCallsNotifyFunction) { auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); std::unique_ptr srcImage(Image2dArrayHelper<>::create(context)); auto imageDesc = srcImage->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, imageDesc.image_array_size}; EnqueueReadImageHelper<>::enqueueReadImage(mockCmdQ.get(), srcImage.get(), CL_TRUE, origin, region); EXPECT_TRUE(mockCmdQ->notifyEnqueueReadImageCalled); } HWTEST_F(EnqueueReadImageTest, givenCommandQueueWhenEnqueueReadImageWithMapAllocationIsCalledThenItDoesntCallNotifyFunction) { auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); std::unique_ptr srcImage(Image2dArrayHelper<>::create(context)); auto imageDesc = srcImage->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, imageDesc.image_array_size}; size_t rowPitch = srcImage->getHostPtrRowPitch(); size_t slicePitch = srcImage->getHostPtrSlicePitch(); GraphicsAllocation mapAllocation{0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, 0, 0, 0, MemoryPool::MemoryNull}; EnqueueReadImageHelper<>::enqueueReadImage(mockCmdQ.get(), srcImage.get(), CL_TRUE, origin, region, rowPitch, slicePitch, dstPtr, &mapAllocation); EXPECT_FALSE(mockCmdQ->notifyEnqueueReadImageCalled); } HWTEST_F(EnqueueReadImageTest, givenEnqueueReadImageBlockingWhenAUBDumpAllocsOnEnqueueReadOnlyIsOnThenImageShouldBeSetDumpable) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.set(true); std::unique_ptr srcImage(Image2dArrayHelper<>::create(context)); auto imageDesc = srcImage->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, imageDesc.image_array_size}; ASSERT_FALSE(srcImage->getGraphicsAllocation()->isAllocDumpable()); EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, srcImage.get(), CL_TRUE, origin, region); EXPECT_TRUE(srcImage->getGraphicsAllocation()->isAllocDumpable()); } HWTEST_F(EnqueueReadImageTest, givenEnqueueReadImageNonBlockingWhenAUBDumpAllocsOnEnqueueReadOnlyIsOnThenImageShouldntBeSetDumpable) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.set(true); std::unique_ptr srcImage(Image2dArrayHelper<>::create(context)); auto imageDesc = srcImage->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, imageDesc.image_array_size}; ASSERT_FALSE(srcImage->getGraphicsAllocation()->isAllocDumpable()); EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, srcImage.get(), CL_FALSE, origin, region); EXPECT_FALSE(srcImage->getGraphicsAllocation()->isAllocDumpable()); } typedef EnqueueReadImageMipMapTest MipMapReadImageTest; HWTEST_P(MipMapReadImageTest, GivenImageWithMipLevelNonZeroWhenReadImageIsCalledThenProperMipLevelIsSet) { auto builtIns = new MockBuiltins(); pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()]->builtins.reset(builtIns); auto image_type = (cl_mem_object_type)GetParam(); auto &origBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( EBuiltInOps::CopyImage3dToBuffer, pCmdQ->getDevice()); // substitute original builder with mock builder auto oldBuilder = builtIns->setBuiltinDispatchInfoBuilder( EBuiltInOps::CopyImage3dToBuffer, pCmdQ->getContext(), pCmdQ->getDevice(), std::unique_ptr(new MockBuiltinDispatchInfoBuilder(*builtIns, &origBuilder))); cl_int retVal = CL_SUCCESS; cl_image_desc imageDesc = {}; uint32_t expectedMipLevel = 3; imageDesc.image_type = image_type; imageDesc.num_mip_levels = 10; imageDesc.image_width = 4; imageDesc.image_height = 1; imageDesc.image_depth = 1; size_t origin[] = {0, 0, 0, 0}; size_t region[] = {imageDesc.image_width, 1, 1}; std::unique_ptr image; switch (image_type) { case CL_MEM_OBJECT_IMAGE1D: origin[1] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: imageDesc.image_array_size = 2; origin[2] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE2D: origin[2] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: imageDesc.image_array_size = 2; origin[3] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE3D: origin[3] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; } EXPECT_NE(nullptr, image.get()); std::unique_ptr ptr = std::unique_ptr(new uint32_t[3]); retVal = pCmdQ->enqueueReadImage(image.get(), CL_FALSE, origin, region, 0, 0, ptr.get(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto &mockBuilder = static_cast(BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImage3dToBuffer, pCmdQ->getDevice())); auto params = mockBuilder.getBuiltinOpParams(); EXPECT_EQ(expectedMipLevel, params->srcMipLevel); // restore original builder and retrieve mock builder auto newBuilder = builtIns->setBuiltinDispatchInfoBuilder( EBuiltInOps::CopyImage3dToBuffer, pCmdQ->getContext(), pCmdQ->getDevice(), std::move(oldBuilder)); EXPECT_NE(nullptr, newBuilder); } INSTANTIATE_TEST_CASE_P(MipMapReadImageTest_GivenImageWithMipLevelNonZeroWhenWriteImageIsCalledThenProperMipLevelIsSet, MipMapReadImageTest, ::testing::Values(CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D)); using NegativeFailAllocationTest = Test; HWTEST_F(NegativeFailAllocationTest, givenEnqueueWriteImageWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) { cl_int retVal = CL_SUCCESS; auto imageDesc = image->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, 1}; size_t rowPitch = image->getHostPtrRowPitch(); size_t slicePitch = image->getHostPtrSlicePitch(); retVal = pCmdQ->enqueueWriteImage(image.get(), CL_FALSE, origin, region, rowPitch, slicePitch, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_resource_barier_tests.cpp000066400000000000000000000010241363734646600333010ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "test.h" using namespace NEO; using ResourceBarrierTest = Test; HWTEST_F(ResourceBarrierTest, givenNullArgsAndHWCommandQueueWhenEnqueueResourceBarrierCalledThenCorrectStatusReturned) { auto retVal = pCmdQ->enqueueResourceBarrier( nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); }compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp000066400000000000000000000352701363734646600326350ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_builtin_dispatch_info_builder.h" #include "opencl/test/unit_test/mocks/mock_builtins.h" #include "test.h" using namespace NEO; struct EnqueueSvmMemCopyTest : public DeviceFixture, public CommandQueueHwFixture, public ::testing::Test { typedef CommandQueueHwFixture CommandQueueFixture; EnqueueSvmMemCopyTest() { } void SetUp() override { DeviceFixture::SetUp(); if (!pDevice->isFullRangeSvm()) { return; } CommandQueueFixture::SetUp(pClDevice, 0); srcSvmPtr = context->getSVMAllocsManager()->createSVMAlloc(pDevice->getRootDeviceIndex(), 256, {}); ASSERT_NE(nullptr, srcSvmPtr); dstSvmPtr = context->getSVMAllocsManager()->createSVMAlloc(pDevice->getRootDeviceIndex(), 256, {}); ASSERT_NE(nullptr, dstSvmPtr); auto srcSvmData = context->getSVMAllocsManager()->getSVMAlloc(srcSvmPtr); ASSERT_NE(nullptr, srcSvmData); srcSvmAlloc = srcSvmData->gpuAllocation; ASSERT_NE(nullptr, srcSvmAlloc); auto dstSvmData = context->getSVMAllocsManager()->getSVMAlloc(dstSvmPtr); ASSERT_NE(nullptr, dstSvmData); dstSvmAlloc = dstSvmData->gpuAllocation; ASSERT_NE(nullptr, dstSvmAlloc); } void TearDown() override { if (pDevice->isFullRangeSvm()) { context->getSVMAllocsManager()->freeSVMAlloc(srcSvmPtr); context->getSVMAllocsManager()->freeSVMAlloc(dstSvmPtr); CommandQueueFixture::TearDown(); } DeviceFixture::TearDown(); } void *srcSvmPtr = nullptr; void *dstSvmPtr = nullptr; GraphicsAllocation *srcSvmAlloc = nullptr; GraphicsAllocation *dstSvmAlloc = nullptr; }; HWTEST_F(EnqueueSvmMemCopyTest, givenEnqueueSVMMemcpyWhenUsingCopyBufferToBufferBuilderThenItConfiguredWithBuiltinOpsAndProducesDispatchInfo) { if (!pDevice->isFullRangeSvm()) { return; } pDevice->mockMemoryManager.reset(new MockMemoryManager()); auto builtIns = new MockBuiltins(); pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()]->builtins.reset(builtIns); // retrieve original builder auto &origBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( EBuiltInOps::CopyBufferToBuffer, pCmdQ->getDevice()); ASSERT_NE(nullptr, &origBuilder); // substitute original builder with mock builder auto oldBuilder = builtIns->setBuiltinDispatchInfoBuilder( EBuiltInOps::CopyBufferToBuffer, pCmdQ->getContext(), pCmdQ->getDevice(), std::unique_ptr(new MockBuiltinDispatchInfoBuilder(*builtIns, &origBuilder))); EXPECT_EQ(&origBuilder, oldBuilder.get()); // call enqueue on mock builder auto retVal = pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy dstSvmPtr, // void *dst_ptr srcSvmPtr, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_event *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); // restore original builder and retrieve mock builder auto newBuilder = builtIns->setBuiltinDispatchInfoBuilder( EBuiltInOps::CopyBufferToBuffer, pCmdQ->getContext(), pCmdQ->getDevice(), std::move(oldBuilder)); EXPECT_NE(nullptr, newBuilder); // check if original builder is restored correctly auto &restoredBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( EBuiltInOps::CopyBufferToBuffer, pCmdQ->getDevice()); EXPECT_EQ(&origBuilder, &restoredBuilder); // use mock builder to validate builder's input / output auto mockBuilder = static_cast(newBuilder.get()); // validate builder's input - builtin ops auto params = mockBuilder->getBuiltinOpParams(); EXPECT_EQ(srcSvmPtr, params->srcPtr); EXPECT_EQ(dstSvmPtr, params->dstPtr); EXPECT_EQ(nullptr, params->srcMemObj); EXPECT_EQ(nullptr, params->dstMemObj); EXPECT_EQ(srcSvmAlloc, params->srcSvmAlloc); EXPECT_EQ(dstSvmAlloc, params->dstSvmAlloc); EXPECT_EQ(Vec3(0, 0, 0), params->srcOffset); EXPECT_EQ(Vec3(0, 0, 0), params->dstOffset); EXPECT_EQ(Vec3(256, 0, 0), params->size); // validate builder's output - multi dispatch info auto mdi = mockBuilder->getMultiDispatchInfo(); EXPECT_EQ(1u, mdi->size()); auto di = mdi->begin(); size_t middleElSize = 4 * sizeof(uint32_t); EXPECT_EQ(Vec3(256 / middleElSize, 1, 1), di->getGWS()); auto kernel = mdi->begin()->getKernel(); EXPECT_EQ("CopyBufferToBufferMiddle", kernel->getKernelInfo().name); } HWTEST_F(EnqueueSvmMemCopyTest, givenEnqueueSVMMemcpyWhenUsingCopyBufferToBufferBuilderAndSrcHostPtrThenItConfiguredWithBuiltinOpsAndProducesDispatchInfo) { if (!pDevice->isFullRangeSvm()) { return; } auto builtIns = new MockBuiltins(); pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()]->builtins.reset(builtIns); void *srcHostPtr = alignedMalloc(512, 64); size_t hostPtrOffset = 2; // retrieve original builder auto &origBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( EBuiltInOps::CopyBufferToBuffer, pCmdQ->getDevice()); ASSERT_NE(nullptr, &origBuilder); // substitute original builder with mock builder auto oldBuilder = builtIns->setBuiltinDispatchInfoBuilder( EBuiltInOps::CopyBufferToBuffer, pCmdQ->getContext(), pCmdQ->getDevice(), std::unique_ptr(new MockBuiltinDispatchInfoBuilder(*builtIns, &origBuilder))); EXPECT_EQ(&origBuilder, oldBuilder.get()); // call enqueue on mock builder auto retVal = pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy dstSvmPtr, // void *dst_ptr ptrOffset(srcHostPtr, hostPtrOffset), // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_event *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); // restore original builder and retrieve mock builder auto newBuilder = builtIns->setBuiltinDispatchInfoBuilder( EBuiltInOps::CopyBufferToBuffer, pCmdQ->getContext(), pCmdQ->getDevice(), std::move(oldBuilder)); EXPECT_NE(nullptr, newBuilder); // check if original builder is restored correctly auto &restoredBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( EBuiltInOps::CopyBufferToBuffer, pCmdQ->getDevice()); EXPECT_EQ(&origBuilder, &restoredBuilder); auto &ultCsr = pDevice->getUltCommandStreamReceiver(); GraphicsAllocation *srcSvmAlloc = nullptr; auto head = ultCsr.getTemporaryAllocations().peekHead(); while (head) { if (ptrOffset(srcHostPtr, hostPtrOffset) == head->getUnderlyingBuffer()) { srcSvmAlloc = head; break; } head = head->next; } EXPECT_NE(nullptr, srcSvmAlloc); // use mock builder to validate builder's input / output auto mockBuilder = static_cast(newBuilder.get()); // validate builder's input - builtin ops auto params = mockBuilder->getBuiltinOpParams(); EXPECT_EQ(alignDown(srcSvmAlloc->getGpuAddress(), 4), castToUint64(params->srcPtr)); EXPECT_EQ(dstSvmPtr, params->dstPtr); EXPECT_EQ(nullptr, params->srcMemObj); EXPECT_EQ(nullptr, params->dstMemObj); EXPECT_EQ(srcSvmAlloc, params->srcSvmAlloc); EXPECT_EQ(dstSvmAlloc, params->dstSvmAlloc); EXPECT_EQ(Vec3(2, 0, 0), params->srcOffset); EXPECT_EQ(Vec3(0, 0, 0), params->dstOffset); EXPECT_EQ(Vec3(256, 0, 0), params->size); alignedFree(srcHostPtr); } HWTEST_F(EnqueueSvmMemCopyTest, givenEnqueueSVMMemcpyWhenUsingCopyBufferToBufferBuilderAndDstHostPtrThenItConfiguredWithBuiltinOpsAndProducesDispatchInfo) { if (!pDevice->isFullRangeSvm()) { return; } auto builtIns = new MockBuiltins(); pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()]->builtins.reset(builtIns); auto dstHostPtr = alignedMalloc(512, 64); size_t hostPtrOffset = 2; // retrieve original builder auto &origBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( EBuiltInOps::CopyBufferToBuffer, pCmdQ->getDevice()); ASSERT_NE(nullptr, &origBuilder); // substitute original builder with mock builder auto oldBuilder = builtIns->setBuiltinDispatchInfoBuilder( EBuiltInOps::CopyBufferToBuffer, pCmdQ->getContext(), pCmdQ->getDevice(), std::unique_ptr(new MockBuiltinDispatchInfoBuilder(*builtIns, &origBuilder))); EXPECT_EQ(&origBuilder, oldBuilder.get()); // call enqueue on mock builder auto retVal = pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy ptrOffset(dstHostPtr, hostPtrOffset), // void *dst_ptr srcSvmPtr, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_event *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); // restore original builder and retrieve mock builder auto newBuilder = builtIns->setBuiltinDispatchInfoBuilder( EBuiltInOps::CopyBufferToBuffer, pCmdQ->getContext(), pCmdQ->getDevice(), std::move(oldBuilder)); EXPECT_NE(nullptr, newBuilder); // check if original builder is restored correctly auto &restoredBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( EBuiltInOps::CopyBufferToBuffer, pCmdQ->getDevice()); EXPECT_EQ(&origBuilder, &restoredBuilder); auto &ultCsr = pDevice->getUltCommandStreamReceiver(); GraphicsAllocation *dstSvmAlloc = nullptr; auto head = ultCsr.getTemporaryAllocations().peekHead(); while (head) { if (ptrOffset(dstHostPtr, hostPtrOffset) == head->getUnderlyingBuffer()) { dstSvmAlloc = head; break; } head = head->next; } EXPECT_NE(nullptr, dstSvmAlloc); // use mock builder to validate builder's input / output auto mockBuilder = static_cast(newBuilder.get()); // validate builder's input - builtin ops auto params = mockBuilder->getBuiltinOpParams(); EXPECT_EQ(srcSvmPtr, params->srcPtr); EXPECT_EQ(alignDown(dstSvmAlloc->getGpuAddress(), 4), castToUint64(params->dstPtr)); EXPECT_EQ(nullptr, params->srcMemObj); EXPECT_EQ(nullptr, params->dstMemObj); EXPECT_EQ(srcSvmAlloc, params->srcSvmAlloc); EXPECT_EQ(dstSvmAlloc, params->dstSvmAlloc); EXPECT_EQ(Vec3(0, 0, 0), params->srcOffset); EXPECT_EQ(Vec3(2, 0, 0), params->dstOffset); EXPECT_EQ(Vec3(256, 0, 0), params->size); alignedFree(dstHostPtr); } struct EnqueueSvmMemCopyHw : public ::testing::Test { void SetUp() override { device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); if (is32bit || !device->isFullRangeSvm()) { GTEST_SKIP(); } context = std::make_unique(device.get()); srcSvmPtr = context->getSVMAllocsManager()->createSVMAlloc(device->getRootDeviceIndex(), 256, {}); ASSERT_NE(nullptr, srcSvmPtr); dstHostPtr = alignedMalloc(256, 64); } void TearDown() override { if (is32bit || !device->isFullRangeSvm()) { return; } context->getSVMAllocsManager()->freeSVMAlloc(srcSvmPtr); alignedFree(dstHostPtr); } std::unique_ptr device; std::unique_ptr context; uint64_t bigSize = 5ull * MemoryConstants::gigaByte; uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1; void *srcSvmPtr = nullptr; void *dstHostPtr = nullptr; }; using EnqueueSvmMemCopyHwTest = EnqueueSvmMemCopyHw; HWTEST_F(EnqueueSvmMemCopyHwTest, givenEnqueueSVMMemCopyWhenUsingCopyBufferToBufferStatelessBuilderThenSuccessIsReturned) { auto cmdQ = std::make_unique>(context.get(), device.get()); auto srcSvmData = context->getSVMAllocsManager()->getSVMAlloc(srcSvmPtr); srcSvmData->size = static_cast(bigSize); auto retVal = cmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy dstHostPtr, // void *dst_ptr srcSvmPtr, // const void *src_ptr static_cast(bigSize), // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_event *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueSvmMemCopyHwTest, givenEnqueueSVMMemCopyWhenUsingCopyBufferToBufferStatefulBuilderThenSuccessIsReturned) { auto cmdQ = std::make_unique>(context.get(), device.get()); auto retVal = cmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy dstHostPtr, // void *dst_ptr srcSvmPtr, // const void *src_ptr static_cast(smallSize), // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_event *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp000066400000000000000000000213321363734646600326030ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_builtin_dispatch_info_builder.h" #include "opencl/test/unit_test/mocks/mock_builtins.h" #include "test.h" using namespace NEO; struct EnqueueSvmMemFillTest : public DeviceFixture, public CommandQueueHwFixture, public ::testing::TestWithParam { typedef CommandQueueHwFixture CommandQueueFixture; EnqueueSvmMemFillTest() { } void SetUp() override { DeviceFixture::SetUp(); CommandQueueFixture::SetUp(pClDevice, 0); const HardwareInfo &hwInfo = pDevice->getHardwareInfo(); if (!hwInfo.capabilityTable.ftrSvm) { GTEST_SKIP(); } patternSize = (size_t)GetParam(); ASSERT_TRUE((0 < patternSize) && (patternSize <= 128)); SVMAllocsManager::SvmAllocationProperties svmProperties; svmProperties.coherent = true; svmPtr = context->getSVMAllocsManager()->createSVMAlloc(pDevice->getRootDeviceIndex(), 256, svmProperties); ASSERT_NE(nullptr, svmPtr); auto svmData = context->getSVMAllocsManager()->getSVMAlloc(svmPtr); ASSERT_NE(nullptr, svmData); svmAlloc = svmData->gpuAllocation; ASSERT_NE(nullptr, svmAlloc); } void TearDown() override { if (svmPtr) { context->getSVMAllocsManager()->freeSVMAlloc(svmPtr); } CommandQueueFixture::TearDown(); DeviceFixture::TearDown(); } const uint64_t pattern[16] = {0x0011223344556677, 0x8899AABBCCDDEEFF, 0xFFEEDDCCBBAA9988, 0x7766554433221100, 0xFFEEDDCCBBAA9988, 0x7766554433221100, 0x0011223344556677, 0x8899AABBCCDDEEFF}; size_t patternSize = 0; void *svmPtr = nullptr; GraphicsAllocation *svmAlloc = nullptr; }; HWTEST_P(EnqueueSvmMemFillTest, givenEnqueueSVMMemFillWhenUsingFillBufferBuilderThenItIsConfiguredWithBuitinOpParamsAndProducesDispatchInfo) { struct MockFillBufferBuilder : MockBuiltinDispatchInfoBuilder { MockFillBufferBuilder(BuiltIns &kernelLib, BuiltinDispatchInfoBuilder *origBuilder, const void *pattern, size_t patternSize) : MockBuiltinDispatchInfoBuilder(kernelLib, origBuilder), pattern(pattern), patternSize(patternSize) { } void validateInput(const BuiltinOpParams &conf) const override { auto patternAllocation = conf.srcMemObj->getGraphicsAllocation(); EXPECT_EQ(patternSize, patternAllocation->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(pattern, patternAllocation->getUnderlyingBuffer(), patternSize)); }; const void *pattern; size_t patternSize; }; auto builtIns = new MockBuiltins(); pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()]->builtins.reset(builtIns); // retrieve original builder auto &origBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( EBuiltInOps::FillBuffer, pCmdQ->getDevice()); ASSERT_NE(nullptr, &origBuilder); // substitute original builder with mock builder auto oldBuilder = builtIns->setBuiltinDispatchInfoBuilder( EBuiltInOps::FillBuffer, pCmdQ->getContext(), pCmdQ->getDevice(), std::unique_ptr(new MockFillBufferBuilder(*builtIns, &origBuilder, pattern, patternSize))); EXPECT_EQ(&origBuilder, oldBuilder.get()); // call enqueue on mock builder auto retVal = pCmdQ->enqueueSVMMemFill( svmPtr, // void *svm_ptr pattern, // const void *pattern patternSize, // size_t pattern_size 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_event *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); // restore original builder and retrieve mock builder auto newBuilder = builtIns->setBuiltinDispatchInfoBuilder( EBuiltInOps::FillBuffer, pCmdQ->getContext(), pCmdQ->getDevice(), std::move(oldBuilder)); EXPECT_NE(nullptr, newBuilder); // check if original builder is restored correctly auto &restoredBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( EBuiltInOps::FillBuffer, pCmdQ->getDevice()); EXPECT_EQ(&origBuilder, &restoredBuilder); // use mock builder to validate builder's input / output auto mockBuilder = static_cast(newBuilder.get()); // validate builder's input - builtin ops auto params = mockBuilder->getBuiltinOpParams(); EXPECT_EQ(nullptr, params->srcPtr); EXPECT_EQ(svmPtr, params->dstPtr); EXPECT_NE(nullptr, params->srcMemObj); EXPECT_EQ(nullptr, params->dstMemObj); EXPECT_EQ(nullptr, params->srcSvmAlloc); EXPECT_EQ(svmAlloc, params->dstSvmAlloc); EXPECT_EQ(Vec3(0, 0, 0), params->srcOffset); EXPECT_EQ(Vec3(0, 0, 0), params->dstOffset); EXPECT_EQ(Vec3(256, 0, 0), params->size); // validate builder's output - multi dispatch info auto mdi = mockBuilder->getMultiDispatchInfo(); EXPECT_EQ(1u, mdi->size()); auto di = mdi->begin(); size_t middleElSize = sizeof(uint32_t); EXPECT_EQ(Vec3(256 / middleElSize, 1, 1), di->getGWS()); auto kernel = di->getKernel(); EXPECT_STREQ("FillBufferMiddle", kernel->getKernelInfo().name.c_str()); } INSTANTIATE_TEST_CASE_P(size_t, EnqueueSvmMemFillTest, ::testing::Values(1, 2, 4, 8, 16, 32, 64, 128)); struct EnqueueSvmMemFillHw : public ::testing::Test { void SetUp() override { device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); if (is32bit || !device->isFullRangeSvm()) { GTEST_SKIP(); } context = std::make_unique(device.get()); svmPtr = context->getSVMAllocsManager()->createSVMAlloc(device->getRootDeviceIndex(), 256, {}); ASSERT_NE(nullptr, svmPtr); } void TearDown() override { if (is32bit || !device->isFullRangeSvm()) { return; } context->getSVMAllocsManager()->freeSVMAlloc(svmPtr); } std::unique_ptr device; std::unique_ptr context; uint64_t bigSize = 5ull * MemoryConstants::gigaByte; uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1; void *svmPtr = nullptr; const uint64_t pattern[4] = {0x0011223344556677, 0x8899AABBCCDDEEFF, 0xFFEEDDCCBBAA9988, 0x7766554433221100}; size_t patternSize = 0; }; using EnqueueSvmMemFillHwTest = EnqueueSvmMemFillHw; HWTEST_F(EnqueueSvmMemFillHwTest, givenEnqueueSVMMemFillWhenUsingCopyBufferToBufferStatelessBuilderThenSuccessIsReturned) { auto cmdQ = std::make_unique>(context.get(), device.get()); auto svmData = context->getSVMAllocsManager()->getSVMAlloc(svmPtr); svmData->size = static_cast(bigSize); auto retVal = cmdQ->enqueueSVMMemFill( svmPtr, // void *svm_ptr pattern, // const void *pattern patternSize, // size_t pattern_size static_cast(bigSize), // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_event *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueSvmMemFillHwTest, givenEnqueueSVMMemFillWhenUsingCopyBufferToBufferStatefulBuilderThenSuccessIsReturned) { auto cmdQ = std::make_unique>(context.get(), device.get()); auto retVal = cmdQ->enqueueSVMMemFill( svmPtr, // void *svm_ptr pattern, // const void *pattern patternSize, // size_t pattern_size static_cast(smallSize), // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_event *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp000066400000000000000000001606171363734646600307510ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/page_fault_manager/mock_cpu_page_fault_manager.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_map_buffer_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_svm_manager.h" #include "test.h" using namespace NEO; struct EnqueueSvmTest : public DeviceFixture, public CommandQueueHwFixture, public ::testing::Test { typedef CommandQueueHwFixture CommandQueueFixture; EnqueueSvmTest() { } void SetUp() override { if (defaultHwInfo->capabilityTable.ftrSvm == false) { GTEST_SKIP(); } DeviceFixture::SetUp(); CommandQueueFixture::SetUp(pClDevice, 0); ptrSVM = context->getSVMAllocsManager()->createSVMAlloc(pDevice->getRootDeviceIndex(), 256, {}); } void TearDown() override { if (defaultHwInfo->capabilityTable.ftrSvm == false) { return; } context->getSVMAllocsManager()->freeSVMAlloc(ptrSVM); CommandQueueFixture::TearDown(); DeviceFixture::TearDown(); } cl_int retVal = CL_SUCCESS; void *ptrSVM = nullptr; }; TEST_F(EnqueueSvmTest, GivenInvalidSvmPtrWhenMappingSvmThenInvalidValueErrorIsReturned) { void *svmPtr = nullptr; retVal = this->pCmdQ->enqueueSVMMap( CL_FALSE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags svmPtr, // void *svm_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr, // cl_event *event false); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenMappingSvmThenSuccessIsReturned) { retVal = this->pCmdQ->enqueueSVMMap( CL_FALSE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags ptrSVM, // void *svm_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr, // cl_event *event false); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenMappingSvmWithBlockingThenSuccessIsReturned) { retVal = this->pCmdQ->enqueueSVMMap( CL_TRUE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags ptrSVM, // void *svm_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr, // cl_event *event false); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenMappingSvmWithEventsThenSuccessIsReturned) { UserEvent uEvent; cl_event eventWaitList[] = {&uEvent}; retVal = this->pCmdQ->enqueueSVMMap( CL_FALSE, // cl_bool blocking_map CL_MAP_READ, // cl_map_flags map_flags ptrSVM, // void *svm_ptr 256, // size_t size 1, // cl_uint num_events_in_wait_list eventWaitList, // const cL_event *event_wait_list nullptr, // cl_event *event false); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueSvmTest, GivenInvalidSvmPtrWhenUnmappingSvmThenInvalidValueErrorIsReturned) { void *svmPtr = nullptr; retVal = this->pCmdQ->enqueueSVMUnmap( svmPtr, // void *svm_ptr 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr, // cl_event *event false); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenUnmappingSvmThenSuccessIsReturned) { retVal = this->pCmdQ->enqueueSVMUnmap( ptrSVM, // void *svm_ptr 0, // cl_uint num_events_in_wait_list nullptr, // const cL_event *event_wait_list nullptr, // cl_event *event false); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenUnmappingSvmWithEventsThenSuccessIsReturned) { UserEvent uEvent; cl_event eventWaitList[] = {&uEvent}; retVal = this->pCmdQ->enqueueSVMUnmap( ptrSVM, // void *svm_ptr 1, // cl_uint num_events_in_wait_list eventWaitList, // const cL_event *event_wait_list nullptr, // cl_event *event false); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenFreeingSvmThenSuccessIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); ASSERT_EQ(1U, this->context->getSVMAllocsManager()->getNumAllocs()); void *svmPtrs[] = {ptrSVM}; retVal = this->pCmdQ->enqueueSVMFree( 1, // cl_uint num_svm_pointers svmPtrs, // void *svm_pointers[] nullptr, // (CL_CALLBACK *pfn_free_func) (cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) nullptr, // void *user_data 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(0U, this->context->getSVMAllocsManager()->getNumAllocs()); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenFreeingSvmWithCallbackThenSuccessIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); void *svmPtrs[] = {ptrSVM}; bool callbackWasCalled = false; struct ClbHelper { ClbHelper(bool &callbackWasCalled) : callbackWasCalled(callbackWasCalled) {} static void CL_CALLBACK Clb(cl_command_queue queue, cl_uint numSvmPointers, void *svmPointers[], void *usrData) { ClbHelper *data = (ClbHelper *)usrData; data->callbackWasCalled = true; } bool &callbackWasCalled; } userData(callbackWasCalled); retVal = this->pCmdQ->enqueueSVMFree( 1, // cl_uint num_svm_pointers svmPtrs, // void *svm_pointers[] ClbHelper::Clb, // (CL_CALLBACK *pfn_free_func) (cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) &userData, // void *user_data 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(callbackWasCalled); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenFreeingSvmWithCallbackAndEventThenSuccessIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); void *svmPtrs[] = {ptrSVM}; bool callbackWasCalled = false; struct ClbHelper { ClbHelper(bool &callbackWasCalled) : callbackWasCalled(callbackWasCalled) {} static void CL_CALLBACK Clb(cl_command_queue queue, cl_uint numSvmPointers, void *svmPointers[], void *usrData) { ClbHelper *data = (ClbHelper *)usrData; data->callbackWasCalled = true; } bool &callbackWasCalled; } userData(callbackWasCalled); cl_event event = nullptr; retVal = this->pCmdQ->enqueueSVMFree( 1, // cl_uint num_svm_pointers svmPtrs, // void *svm_pointers[] ClbHelper::Clb, // (CL_CALLBACK *pfn_free_func) (cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) &userData, // void *user_data 0, // cl_uint num_events_in_wait_list nullptr, // const cl_event *event_wait_list &event // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(callbackWasCalled); auto pEvent = (Event *)event; delete pEvent; } TEST_F(EnqueueSvmTest, GivenValidParamsWhenFreeingSvmWithBlockingThenSuccessIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); void *svmPtrs[] = {ptrSVM}; UserEvent uEvent; cl_event eventWaitList[] = {&uEvent}; retVal = this->pCmdQ->enqueueSVMFree( 1, // cl_uint num_svm_pointers svmPtrs, // void *svm_pointers[] nullptr, // (CL_CALLBACK *pfn_free_func) (cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[]) nullptr, // void *user_data 1, // cl_uint num_events_in_wait_list eventWaitList, // const cl_event *event_wait_list nullptr // cl_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueSvmTest, GivenNullDstPtrWhenCopyingMemoryThenInvalidVaueErrorIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); void *pDstSVM = nullptr; void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(pDevice->getRootDeviceIndex(), 256, {}); retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); context->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); } TEST_F(EnqueueSvmTest, GivenNullSrcPtrWhenCopyingMemoryThenInvalidVaueErrorIsReturned) { void *pDstSVM = ptrSVM; void *pSrcSVM = nullptr; retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(EnqueueSvmTest, givenSrcHostPtrAndEventWhenEnqueueSVMMemcpyThenEventCommandTypeIsCorrectlySet) { char srcHostPtr[260]; void *pDstSVM = ptrSVM; void *pSrcSVM = srcHostPtr; cl_event event = nullptr; retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list &event // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_MEMCPY; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); clReleaseEvent(event); } TEST_F(EnqueueSvmTest, givenSrcHostPtrAndSizeZeroWhenEnqueueSVMMemcpyThenReturnSuccess) { char srcHostPtr[260]; void *pDstSVM = ptrSVM; void *pSrcSVM = srcHostPtr; retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueSvmTest, givenSrcHostPtrWhenEnqueueSVMMemcpyThenEnqueuWriteBufferIsCalled) { char srcHostPtr[260]; void *pSrcSVM = srcHostPtr; void *pDstSVM = ptrSVM; MockCommandQueueHw myCmdQ(context, pClDevice, 0); retVal = myCmdQ.enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(myCmdQ.lastCommandType, static_cast(CL_COMMAND_WRITE_BUFFER)); auto tempAlloc = myCmdQ.getGpgpuCommandStreamReceiver().getTemporaryAllocations().peekHead(); EXPECT_EQ(0u, tempAlloc->countSuccessors()); EXPECT_EQ(pSrcSVM, reinterpret_cast(tempAlloc->getGpuAddress())); auto srcAddress = myCmdQ.kernelParams.srcPtr; auto srcOffset = myCmdQ.kernelParams.srcOffset.x; auto dstAddress = myCmdQ.kernelParams.dstPtr; auto dstOffset = myCmdQ.kernelParams.dstOffset.x; EXPECT_EQ(alignDown(pSrcSVM, 4), srcAddress); EXPECT_EQ(ptrDiff(pSrcSVM, alignDown(pSrcSVM, 4)), srcOffset); EXPECT_EQ(alignDown(pDstSVM, 4), dstAddress); EXPECT_EQ(ptrDiff(pDstSVM, alignDown(pDstSVM, 4)), dstOffset); } HWTEST_F(EnqueueSvmTest, givenDstHostPtrWhenEnqueueSVMMemcpyThenEnqueuReadBufferIsCalled) { char dstHostPtr[260]; void *pDstSVM = dstHostPtr; void *pSrcSVM = ptrSVM; MockCommandQueueHw myCmdQ(context, pClDevice, 0); retVal = myCmdQ.enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(myCmdQ.lastCommandType, static_cast(CL_COMMAND_READ_BUFFER)); auto tempAlloc = myCmdQ.getGpgpuCommandStreamReceiver().getTemporaryAllocations().peekHead(); EXPECT_EQ(0u, tempAlloc->countSuccessors()); EXPECT_EQ(pDstSVM, reinterpret_cast(tempAlloc->getGpuAddress())); auto srcAddress = myCmdQ.kernelParams.srcPtr; auto srcOffset = myCmdQ.kernelParams.srcOffset.x; auto dstAddress = myCmdQ.kernelParams.dstPtr; auto dstOffset = myCmdQ.kernelParams.dstOffset.x; EXPECT_EQ(alignDown(pSrcSVM, 4), srcAddress); EXPECT_EQ(ptrDiff(pSrcSVM, alignDown(pSrcSVM, 4)), srcOffset); EXPECT_EQ(alignDown(pDstSVM, 4), dstAddress); EXPECT_EQ(ptrDiff(pDstSVM, alignDown(pDstSVM, 4)), dstOffset); } TEST_F(EnqueueSvmTest, givenDstHostPtrAndEventWhenEnqueueSVMMemcpyThenEventCommandTypeIsCorrectlySet) { char dstHostPtr[260]; void *pDstSVM = dstHostPtr; void *pSrcSVM = ptrSVM; cl_event event = nullptr; retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list &event // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_MEMCPY; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); clReleaseEvent(event); } TEST_F(EnqueueSvmTest, givenDstHostPtrAndSizeZeroWhenEnqueueSVMMemcpyThenReturnSuccess) { char dstHostPtr[260]; void *pDstSVM = dstHostPtr; void *pSrcSVM = ptrSVM; retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueSvmTest, givenDstHostPtrAndSrcHostPtrWhenEnqueueNonBlockingSVMMemcpyThenEnqueuWriteBufferIsCalled) { char dstHostPtr[] = {0, 0, 0}; char srcHostPtr[] = {1, 2, 3}; void *pDstSVM = dstHostPtr; void *pSrcSVM = srcHostPtr; MockCommandQueueHw myCmdQ(context, pClDevice, 0); retVal = myCmdQ.enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 3, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(myCmdQ.lastCommandType, static_cast(CL_COMMAND_WRITE_BUFFER)); auto tempAlloc = myCmdQ.getGpgpuCommandStreamReceiver().getTemporaryAllocations().peekHead(); EXPECT_EQ(1u, tempAlloc->countSuccessors()); EXPECT_EQ(pSrcSVM, reinterpret_cast(tempAlloc->getGpuAddress())); EXPECT_EQ(pDstSVM, reinterpret_cast(tempAlloc->next->getGpuAddress())); auto srcAddress = myCmdQ.kernelParams.srcPtr; auto srcOffset = myCmdQ.kernelParams.srcOffset.x; auto dstAddress = myCmdQ.kernelParams.dstPtr; auto dstOffset = myCmdQ.kernelParams.dstOffset.x; EXPECT_EQ(alignDown(pSrcSVM, 4), srcAddress); EXPECT_EQ(ptrDiff(pSrcSVM, alignDown(pSrcSVM, 4)), srcOffset); EXPECT_EQ(alignDown(pDstSVM, 4), dstAddress); EXPECT_EQ(ptrDiff(pDstSVM, alignDown(pDstSVM, 4)), dstOffset); } HWTEST_F(EnqueueSvmTest, givenDstHostPtrAndSrcHostPtrWhenEnqueueBlockingSVMMemcpyThenEnqueuWriteBufferIsCalled) { char dstHostPtr[] = {0, 0, 0}; char srcHostPtr[] = {1, 2, 3}; void *pDstSVM = dstHostPtr; void *pSrcSVM = srcHostPtr; MockCommandQueueHw myCmdQ(context, pClDevice, 0); retVal = myCmdQ.enqueueSVMMemcpy( true, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 3, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(myCmdQ.lastCommandType, static_cast(CL_COMMAND_WRITE_BUFFER)); auto srcAddress = myCmdQ.kernelParams.srcPtr; auto srcOffset = myCmdQ.kernelParams.srcOffset.x; auto dstAddress = myCmdQ.kernelParams.dstPtr; auto dstOffset = myCmdQ.kernelParams.dstOffset.x; EXPECT_EQ(alignDown(pSrcSVM, 4), srcAddress); EXPECT_EQ(ptrDiff(pSrcSVM, alignDown(pSrcSVM, 4)), srcOffset); EXPECT_EQ(alignDown(pDstSVM, 4), dstAddress); EXPECT_EQ(ptrDiff(pDstSVM, alignDown(pDstSVM, 4)), dstOffset); } TEST_F(EnqueueSvmTest, givenDstHostPtrAndSrcHostPtrAndSizeZeroWhenEnqueueSVMMemcpyThenReturnSuccess) { char dstHostPtr[260]; char srcHostPtr[260]; void *pDstSVM = dstHostPtr; void *pSrcSVM = srcHostPtr; retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueSvmTest, givenSvmToSvmCopyTypeWhenEnqueueNonBlockingSVMMemcpyThenSvmMemcpyCommandIsEnqueued) { void *pDstSVM = ptrSVM; void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(pDevice->getRootDeviceIndex(), 256, {}); MockCommandQueueHw myCmdQ(context, pClDevice, 0); retVal = myCmdQ.enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(myCmdQ.lastCommandType, static_cast(CL_COMMAND_SVM_MEMCPY)); auto tempAlloc = myCmdQ.getGpgpuCommandStreamReceiver().getTemporaryAllocations().peekHead(); EXPECT_EQ(nullptr, tempAlloc); auto srcAddress = myCmdQ.kernelParams.srcPtr; auto srcOffset = myCmdQ.kernelParams.srcOffset.x; auto dstAddress = myCmdQ.kernelParams.dstPtr; auto dstOffset = myCmdQ.kernelParams.dstOffset.x; EXPECT_EQ(alignDown(pSrcSVM, 4), srcAddress); EXPECT_EQ(ptrDiff(pSrcSVM, alignDown(pSrcSVM, 4)), srcOffset); EXPECT_EQ(alignDown(pDstSVM, 4), dstAddress); EXPECT_EQ(ptrDiff(pDstSVM, alignDown(pDstSVM, 4)), dstOffset); context->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); } TEST_F(EnqueueSvmTest, givenSvmToSvmCopyTypeWhenEnqueueBlockingSVMMemcpyThenSuccessIsReturned) { void *pDstSVM = ptrSVM; void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(pDevice->getRootDeviceIndex(), 256, {}); retVal = this->pCmdQ->enqueueSVMMemcpy( true, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); context->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenCopyingMemoryWithBlockingThenSuccessisReturned) { void *pDstSVM = ptrSVM; void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(pDevice->getRootDeviceIndex(), 256, {}); auto uEvent = make_releaseable(); cl_event eventWaitList[] = {uEvent.get()}; retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 1, // cl_uint num_events_in_wait_list eventWaitList, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); context->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); uEvent->setStatus(-1); } TEST_F(EnqueueSvmTest, GivenCoherencyWhenCopyingMemoryThenSuccessIsReturned) { void *pDstSVM = ptrSVM; SVMAllocsManager::SvmAllocationProperties svmProperties; svmProperties.coherent = true; void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(pDevice->getRootDeviceIndex(), 256, svmProperties); retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); context->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); } TEST_F(EnqueueSvmTest, GivenCoherencyWhenCopyingMemoryWithBlockingThenSuccessIsReturned) { void *pDstSVM = ptrSVM; SVMAllocsManager::SvmAllocationProperties svmProperties; svmProperties.coherent = true; void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(pDevice->getRootDeviceIndex(), 256, svmProperties); auto uEvent = make_releaseable(); cl_event eventWaitList[] = {uEvent.get()}; retVal = this->pCmdQ->enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 1, // cl_uint num_events_in_wait_list eventWaitList, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); context->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); uEvent->setStatus(-1); } HWTEST_F(EnqueueSvmTest, givenUnalignedAddressWhenEnqueueMemcpyThenDispatchInfoHasAlignedAddressAndProperOffset) { void *pDstSVM = reinterpret_cast(0x17); void *pSrcSVM = ptrSVM; MockCommandQueueHw myCmdQ(context, pClDevice, 0); retVal = myCmdQ.enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 0, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); auto srcAddress = myCmdQ.kernelParams.srcPtr; auto srcOffset = myCmdQ.kernelParams.srcOffset.x; auto dstAddress = myCmdQ.kernelParams.dstPtr; auto dstOffset = myCmdQ.kernelParams.dstOffset.x; EXPECT_EQ(alignDown(pSrcSVM, 4), srcAddress); EXPECT_EQ(ptrDiff(pSrcSVM, alignDown(pSrcSVM, 4)), srcOffset); EXPECT_EQ(alignDown(pDstSVM, 4), dstAddress); EXPECT_EQ(ptrDiff(pDstSVM, alignDown(pDstSVM, 4)), dstOffset); } TEST_F(EnqueueSvmTest, GivenNullSvmPtrWhenFillingMemoryThenInvalidValueErrorIsReturned) { void *svmPtr = nullptr; const float pattern[1] = {1.2345f}; const size_t patternSize = sizeof(pattern); retVal = this->pCmdQ->enqueueSVMMemFill( svmPtr, // void *svm_ptr pattern, // const void *pattern patternSize, // size_t pattern_size 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWTEST_F(EnqueueSvmTest, givenSvmAllocWhenEnqueueSvmFillThenSuccesIsReturnedAndAddressIsProperlyAligned) { const float pattern[1] = {1.2345f}; const size_t patternSize = sizeof(pattern); MockCommandQueueHw myCmdQ(context, pClDevice, 0); retVal = myCmdQ.enqueueSVMMemFill( ptrSVM, // void *svm_ptr pattern, // const void *pattern patternSize, // size_t pattern_size 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); auto dstAddress = myCmdQ.kernelParams.dstPtr; auto dstOffset = myCmdQ.kernelParams.dstOffset.x; EXPECT_EQ(alignDown(ptrSVM, 4), dstAddress); EXPECT_EQ(ptrDiff(ptrSVM, alignDown(ptrSVM, 4)), dstOffset); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenFillingMemoryWithBlockingThenSuccessIsReturned) { const float pattern[1] = {1.2345f}; const size_t patternSize = sizeof(pattern); auto uEvent = make_releaseable(); cl_event eventWaitList[] = {uEvent.get()}; retVal = this->pCmdQ->enqueueSVMMemFill( ptrSVM, // void *svm_ptr pattern, // const void *pattern patternSize, // size_t pattern_size 256, // size_t size 1, // cl_uint num_events_in_wait_list eventWaitList, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); uEvent->setStatus(-1); } TEST_F(EnqueueSvmTest, GivenRepeatCallsWhenFillingMemoryThenSuccessIsReturnedForEachCall) { const float pattern[1] = {1.2345f}; const size_t patternSize = sizeof(pattern); retVal = this->pCmdQ->enqueueSVMMemFill( ptrSVM, // void *svm_ptr pattern, // const void *pattern patternSize, // size_t pattern_size 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); retVal = this->pCmdQ->enqueueSVMMemFill( ptrSVM, // void *svm_ptr pattern, // const void *pattern patternSize, // size_t pattern_size 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueSvmTest, givenEnqueueSVMMemFillWhenPatternAllocationIsObtainedThenItsTypeShouldBeSetToFillPattern) { auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); ASSERT_TRUE(csr.getAllocationsForReuse().peekIsEmpty()); const float pattern[1] = {1.2345f}; const size_t patternSize = sizeof(pattern); const size_t size = patternSize; retVal = this->pCmdQ->enqueueSVMMemFill( ptrSVM, pattern, patternSize, size, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty()); GraphicsAllocation *patternAllocation = csr.getAllocationsForReuse().peekHead(); ASSERT_NE(nullptr, patternAllocation); EXPECT_EQ(GraphicsAllocation::AllocationType::FILL_PATTERN, patternAllocation->getAllocationType()); } TEST_F(EnqueueSvmTest, GivenSvmAllocationWhenEnqueingKernelThenSuccessIsReturned) { auto svmData = context->getSVMAllocsManager()->getSVMAlloc(ptrSVM); ASSERT_NE(nullptr, svmData); GraphicsAllocation *pSvmAlloc = svmData->gpuAllocation; EXPECT_NE(nullptr, ptrSVM); std::unique_ptr program(Program::create("FillBufferBytes", context, *pClDevice, true, &retVal)); cl_device_id device = pClDevice; program->build(1, &device, nullptr, nullptr, nullptr, false); std::unique_ptr kernel(Kernel::create(program.get(), *program->getKernelInfo("FillBufferBytes"), &retVal)); kernel->setSvmKernelExecInfo(pSvmAlloc); size_t offset = 0; size_t size = 1; retVal = this->pCmdQ->enqueueKernel( kernel.get(), 1, &offset, &size, &size, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, kernel->kernelSvmGfxAllocations.size()); } TEST_F(EnqueueSvmTest, givenEnqueueTaskBlockedOnUserEventWhenItIsEnqueuedThenSurfacesAreMadeResident) { auto svmData = context->getSVMAllocsManager()->getSVMAlloc(ptrSVM); ASSERT_NE(nullptr, svmData); GraphicsAllocation *pSvmAlloc = svmData->gpuAllocation; EXPECT_NE(nullptr, ptrSVM); auto program = clUniquePtr(Program::create("FillBufferBytes", context, *pClDevice, true, &retVal)); cl_device_id device = pClDevice; program->build(1, &device, nullptr, nullptr, nullptr, false); auto kernel = clUniquePtr(Kernel::create(program.get(), *program->getKernelInfo("FillBufferBytes"), &retVal)); std::vector allSurfaces; kernel->getResidency(allSurfaces); EXPECT_EQ(1u, allSurfaces.size()); kernel->setSvmKernelExecInfo(pSvmAlloc); auto uEvent = make_releaseable(); cl_event eventWaitList[] = {uEvent.get()}; size_t offset = 0; size_t size = 1; retVal = this->pCmdQ->enqueueKernel( kernel.get(), 1, &offset, &size, &size, 1, eventWaitList, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); kernel->getResidency(allSurfaces); EXPECT_EQ(3u, allSurfaces.size()); for (auto &surface : allSurfaces) delete surface; EXPECT_EQ(1u, kernel->kernelSvmGfxAllocations.size()); uEvent->setStatus(-1); } TEST_F(EnqueueSvmTest, GivenMultipleThreasWhenAllocatingSvmThenOnlyOneAllocationIsCreated) { std::atomic flag(0); std::atomic ready(0); void *svmPtrs[15] = {}; auto allocSvm = [&](uint32_t from, uint32_t to) { for (uint32_t i = from; i <= to; i++) { svmPtrs[i] = context->getSVMAllocsManager()->createSVMAlloc(pDevice->getRootDeviceIndex(), 1, {}); auto svmData = context->getSVMAllocsManager()->getSVMAlloc(svmPtrs[i]); ASSERT_NE(nullptr, svmData); auto ga = svmData->gpuAllocation; EXPECT_NE(nullptr, ga); EXPECT_EQ(ga->getUnderlyingBuffer(), svmPtrs[i]); } }; auto freeSvm = [&](uint32_t from, uint32_t to) { for (uint32_t i = from; i <= to; i++) { context->getSVMAllocsManager()->freeSVMAlloc(svmPtrs[i]); } }; auto asyncFcn = [&](bool alloc, uint32_t from, uint32_t to) { flag++; while (flag < 3) ; if (alloc) { allocSvm(from, to); } freeSvm(from, to); ready++; }; EXPECT_EQ(1u, context->getSVMAllocsManager()->getNumAllocs()); allocSvm(10, 14); auto t1 = std::unique_ptr(new std::thread(asyncFcn, true, 0, 4)); auto t2 = std::unique_ptr(new std::thread(asyncFcn, true, 5, 9)); auto t3 = std::unique_ptr(new std::thread(asyncFcn, false, 10, 14)); while (ready < 3) { std::this_thread::yield(); } EXPECT_EQ(1u, context->getSVMAllocsManager()->getNumAllocs()); t1->join(); t2->join(); t3->join(); } TEST_F(EnqueueSvmTest, GivenValidParamsWhenMigratingMemoryThenSuccessIsReturned) { const void *svmPtrs[] = {ptrSVM}; retVal = this->pCmdQ->enqueueSVMMigrateMem( 1, // cl_uint num_svm_pointers svmPtrs, // const void **svm_pointers nullptr, // const size_t *sizes 0, // const cl_mem_migration_flags flags 0, // cl_uint num_events_in_wait_list nullptr, // cl_event *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(CreateSvmAllocTests, givenVariousSvmAllocationPropertiesWhenAllocatingSvmThenSvmIsCorrectlyAllocated) { if (!defaultHwInfo->capabilityTable.ftrSvm) { return; } DebugManagerStateRestore dbgRestore; SVMAllocsManager::SvmAllocationProperties svmAllocationProperties; for (auto isLocalMemorySupported : ::testing::Bool()) { DebugManager.flags.EnableLocalMemory.set(isLocalMemorySupported); auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); auto mockContext = std::make_unique(mockDevice.get()); for (auto isReadOnly : ::testing::Bool()) { for (auto isHostPtrReadOnly : ::testing::Bool()) { svmAllocationProperties.readOnly = isReadOnly; svmAllocationProperties.hostPtrReadOnly = isHostPtrReadOnly; auto ptrSVM = mockContext->getSVMAllocsManager()->createSVMAlloc(mockDevice->getRootDeviceIndex(), 256, svmAllocationProperties); EXPECT_NE(nullptr, ptrSVM); mockContext->getSVMAllocsManager()->freeSVMAlloc(ptrSVM); } } } } struct EnqueueSvmTestLocalMemory : public DeviceFixture, public ::testing::Test { void SetUp() override { if (defaultHwInfo->capabilityTable.ftrSvm == false) { GTEST_SKIP(); } dbgRestore = std::make_unique(); DebugManager.flags.EnableLocalMemory.set(1); DeviceFixture::SetUp(); context = std::make_unique(pClDevice, true); size = 256; svmPtr = context->getSVMAllocsManager()->createSVMAlloc(pDevice->getRootDeviceIndex(), size, {}); ASSERT_NE(nullptr, svmPtr); mockSvmManager = reinterpret_cast(context->getSVMAllocsManager()); } void TearDown() override { if (defaultHwInfo->capabilityTable.ftrSvm == false) { return; } context->getSVMAllocsManager()->freeSVMAlloc(svmPtr); context.reset(nullptr); DeviceFixture::TearDown(); } cl_int retVal = CL_SUCCESS; void *svmPtr = nullptr; size_t size; MockSVMAllocsManager *mockSvmManager; std::unique_ptr dbgRestore; std::unique_ptr context; HardwareParse hwParse; }; HWTEST_F(EnqueueSvmTestLocalMemory, givenWriteInvalidateRegionFlagWhenMappingSvmThenMapIsSuccessfulAndReadOnlyFlagIsFalse) { MockCommandQueueHw queue(context.get(), pClDevice, nullptr); uintptr_t offset = 64; void *regionSvmPtr = ptrOffset(svmPtr, offset); size_t regionSize = 64; retVal = queue.enqueueSVMMap( CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, regionSvmPtr, regionSize, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto svmMap = mockSvmManager->svmMapOperations.get(regionSvmPtr); EXPECT_FALSE(svmMap->readOnlyMap); } HWTEST_F(EnqueueSvmTestLocalMemory, givenMapWriteFlagWhenMappingSvmThenMapIsSuccessfulAndReadOnlyFlagIsFalse) { MockCommandQueueHw queue(context.get(), pClDevice, nullptr); uintptr_t offset = 64; void *regionSvmPtr = ptrOffset(svmPtr, offset); size_t regionSize = 64; retVal = queue.enqueueSVMMap( CL_TRUE, CL_MAP_WRITE, regionSvmPtr, regionSize, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto svmMap = mockSvmManager->svmMapOperations.get(regionSvmPtr); EXPECT_FALSE(svmMap->readOnlyMap); } HWTEST_F(EnqueueSvmTestLocalMemory, givenMapReadFlagWhenMappingSvmThenMapIsSuccessfulAndReadOnlyFlagIsTrue) { MockCommandQueueHw queue(context.get(), pClDevice, nullptr); uintptr_t offset = 64; void *regionSvmPtr = ptrOffset(svmPtr, offset); size_t regionSize = 64; retVal = queue.enqueueSVMMap( CL_TRUE, CL_MAP_READ, regionSvmPtr, regionSize, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto svmMap = mockSvmManager->svmMapOperations.get(regionSvmPtr); EXPECT_TRUE(svmMap->readOnlyMap); } HWTEST_F(EnqueueSvmTestLocalMemory, givenSvmAllocWithoutFlagsWhenMappingSvmThenMapIsSuccessfulAndReadOnlyFlagIsTrue) { MockCommandQueueHw queue(context.get(), pClDevice, nullptr); uintptr_t offset = 64; void *regionSvmPtr = ptrOffset(svmPtr, offset); size_t regionSize = 64; retVal = queue.enqueueSVMMap( CL_TRUE, 0, regionSvmPtr, regionSize, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto svmMap = mockSvmManager->svmMapOperations.get(regionSvmPtr); EXPECT_FALSE(svmMap->readOnlyMap); } HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenEnqeueMapValidSvmPtrThenExpectSingleWalker) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; MockCommandQueueHw queue(context.get(), pClDevice, nullptr); LinearStream &stream = queue.getCS(0x1000); cl_event event = nullptr; uintptr_t offset = 64; void *regionSvmPtr = ptrOffset(svmPtr, offset); size_t regionSize = 64; retVal = queue.enqueueSVMMap( CL_FALSE, CL_MAP_READ, regionSvmPtr, regionSize, 0, nullptr, &event, false); EXPECT_EQ(CL_SUCCESS, retVal); auto svmMap = mockSvmManager->svmMapOperations.get(regionSvmPtr); ASSERT_NE(nullptr, svmMap); EXPECT_EQ(regionSvmPtr, svmMap->regionSvmPtr); EXPECT_EQ(svmPtr, svmMap->baseSvmPtr); EXPECT_EQ(regionSize, svmMap->regionSize); EXPECT_EQ(offset, svmMap->offset); EXPECT_TRUE(svmMap->readOnlyMap); queue.flush(); hwParse.parseCommands(stream); auto walkerCount = hwParse.getCommandCount(); EXPECT_EQ(1u, walkerCount); constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_MAP; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); clReleaseEvent(event); } HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenEnqeueMapSvmPtrTwiceThenExpectSingleWalker) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; MockCommandQueueHw queue(context.get(), pClDevice, nullptr); LinearStream &stream = queue.getCS(0x1000); uintptr_t offset = 64; void *regionSvmPtr = ptrOffset(svmPtr, offset); size_t regionSize = 64; retVal = queue.enqueueSVMMap( CL_FALSE, CL_MAP_WRITE, regionSvmPtr, regionSize, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto svmMap = mockSvmManager->svmMapOperations.get(regionSvmPtr); ASSERT_NE(nullptr, svmMap); EXPECT_EQ(regionSvmPtr, svmMap->regionSvmPtr); EXPECT_EQ(svmPtr, svmMap->baseSvmPtr); EXPECT_EQ(regionSize, svmMap->regionSize); EXPECT_EQ(offset, svmMap->offset); EXPECT_FALSE(svmMap->readOnlyMap); EXPECT_EQ(1u, mockSvmManager->svmMapOperations.getNumMapOperations()); cl_event event = nullptr; retVal = queue.enqueueSVMMap( CL_FALSE, CL_MAP_WRITE, regionSvmPtr, regionSize, 0, nullptr, &event, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, mockSvmManager->svmMapOperations.getNumMapOperations()); queue.flush(); hwParse.parseCommands(stream); auto walkerCount = hwParse.getCommandCount(); EXPECT_EQ(1u, walkerCount); constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_MAP; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); clReleaseEvent(event); } HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenNoMappedSvmPtrThenExpectNoUnmapCopyKernel) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; MockCommandQueueHw queue(context.get(), pClDevice, nullptr); LinearStream &stream = queue.getCS(0x1000); cl_event event = nullptr; retVal = queue.enqueueSVMUnmap( svmPtr, 0, nullptr, &event, false); EXPECT_EQ(CL_SUCCESS, retVal); queue.flush(); hwParse.parseCommands(stream); auto walkerCount = hwParse.getCommandCount(); EXPECT_EQ(0u, walkerCount); constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_UNMAP; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); clReleaseEvent(event); } HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenMappedSvmRegionIsReadOnlyThenExpectNoUnmapCopyKernel) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; MockCommandQueueHw queue(context.get(), pClDevice, nullptr); LinearStream &stream = queue.getCS(0x1000); retVal = queue.enqueueSVMMap( CL_FALSE, CL_MAP_READ, svmPtr, size, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, mockSvmManager->svmMapOperations.getNumMapOperations()); auto svmMap = mockSvmManager->svmMapOperations.get(svmPtr); ASSERT_NE(nullptr, svmMap); queue.flush(); size_t offset = stream.getUsed(); hwParse.parseCommands(stream); auto walkerCount = hwParse.getCommandCount(); EXPECT_EQ(1u, walkerCount); hwParse.TearDown(); cl_event event = nullptr; retVal = queue.enqueueSVMUnmap( svmPtr, 0, nullptr, &event, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, mockSvmManager->svmMapOperations.getNumMapOperations()); queue.flush(); hwParse.parseCommands(stream, offset); walkerCount = hwParse.getCommandCount(); EXPECT_EQ(0u, walkerCount); constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_UNMAP; cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); EXPECT_EQ(expectedCmd, actualCmd); clReleaseEvent(event); } HWTEST_F(EnqueueSvmTestLocalMemory, givenNonReadOnlyMapWhenUnmappingThenSetAubTbxWritableBeforeUnmapEnqueue) { class MyQueue : public MockCommandQueueHw { public: using MockCommandQueueHw::MockCommandQueueHw; void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo) override { waitUntilCompleteCalled++; if (allocationToVerify) { EXPECT_TRUE(allocationToVerify->isAubWritable(GraphicsAllocation::defaultBank)); EXPECT_TRUE(allocationToVerify->isTbxWritable(GraphicsAllocation::defaultBank)); } } uint32_t waitUntilCompleteCalled = 0; GraphicsAllocation *allocationToVerify = nullptr; }; MyQueue myQueue(context.get(), pClDevice, nullptr); retVal = myQueue.enqueueSVMMap(CL_TRUE, CL_MAP_WRITE, svmPtr, size, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto gpuAllocation = mockSvmManager->getSVMAlloc(svmPtr)->gpuAllocation; myQueue.allocationToVerify = gpuAllocation; gpuAllocation->setAubWritable(false, GraphicsAllocation::defaultBank); gpuAllocation->setTbxWritable(false, GraphicsAllocation::defaultBank); EXPECT_EQ(1u, myQueue.waitUntilCompleteCalled); retVal = myQueue.enqueueSVMUnmap(svmPtr, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, myQueue.waitUntilCompleteCalled); } HWTEST_F(EnqueueSvmTestLocalMemory, givenReadOnlyMapWhenUnmappingThenDontResetAubTbxWritable) { MockCommandQueueHw queue(context.get(), pClDevice, nullptr); retVal = queue.enqueueSVMMap(CL_TRUE, CL_MAP_READ, svmPtr, size, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto gpuAllocation = mockSvmManager->getSVMAlloc(svmPtr)->gpuAllocation; gpuAllocation->setAubWritable(false, GraphicsAllocation::defaultBank); gpuAllocation->setTbxWritable(false, GraphicsAllocation::defaultBank); retVal = queue.enqueueSVMUnmap(svmPtr, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(gpuAllocation->isAubWritable(GraphicsAllocation::defaultBank)); EXPECT_FALSE(gpuAllocation->isTbxWritable(GraphicsAllocation::defaultBank)); } HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenMappedSvmRegionIsWritableThenExpectMapAndUnmapCopyKernel) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; MockCommandQueueHw queue(context.get(), pClDevice, nullptr); LinearStream &stream = queue.getCS(0x1000); cl_event eventMap = nullptr; retVal = queue.enqueueSVMMap( CL_FALSE, CL_MAP_WRITE, svmPtr, size, 0, nullptr, &eventMap, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, mockSvmManager->svmMapOperations.getNumMapOperations()); auto svmMap = mockSvmManager->svmMapOperations.get(svmPtr); ASSERT_NE(nullptr, svmMap); cl_event eventUnmap = nullptr; retVal = queue.enqueueSVMUnmap( svmPtr, 0, nullptr, &eventUnmap, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, mockSvmManager->svmMapOperations.getNumMapOperations()); queue.flush(); hwParse.parseCommands(stream); auto walkerCount = hwParse.getCommandCount(); EXPECT_EQ(2u, walkerCount); constexpr cl_command_type expectedMapCmd = CL_COMMAND_SVM_MAP; cl_command_type actualMapCmd = castToObjectOrAbort(eventMap)->getCommandType(); EXPECT_EQ(expectedMapCmd, actualMapCmd); constexpr cl_command_type expectedUnmapCmd = CL_COMMAND_SVM_UNMAP; cl_command_type actualUnmapCmd = castToObjectOrAbort(eventUnmap)->getCommandType(); EXPECT_EQ(expectedUnmapCmd, actualUnmapCmd); clReleaseEvent(eventMap); clReleaseEvent(eventUnmap); } HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenMappedSvmRegionAndNoEventIsUsedIsWritableThenExpectMapAndUnmapCopyKernelAnNo) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; MockCommandQueueHw queue(context.get(), pClDevice, nullptr); LinearStream &stream = queue.getCS(0x1000); retVal = queue.enqueueSVMMap( CL_FALSE, CL_MAP_WRITE, svmPtr, size, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, mockSvmManager->svmMapOperations.getNumMapOperations()); auto svmMap = mockSvmManager->svmMapOperations.get(svmPtr); ASSERT_NE(nullptr, svmMap); retVal = queue.enqueueSVMUnmap( svmPtr, 0, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, mockSvmManager->svmMapOperations.getNumMapOperations()); queue.flush(); hwParse.parseCommands(stream); auto walkerCount = hwParse.getCommandCount(); EXPECT_EQ(2u, walkerCount); } template struct FailCsr : public CommandStreamReceiverHw { using CommandStreamReceiverHw::CommandStreamReceiverHw; bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush) override { return CL_FALSE; } }; HWTEST_F(EnqueueSvmTest, whenInternalAllocationsAreMadeResidentThenOnlyNonSvmAllocationsAreAdded) { SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties; unifiedMemoryProperties.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; auto allocationSize = 4096u; auto svmManager = this->context->getSVMAllocsManager(); EXPECT_NE(0u, svmManager->getNumAllocs()); auto unifiedMemoryPtr = svmManager->createUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), allocationSize, unifiedMemoryProperties); EXPECT_NE(nullptr, unifiedMemoryPtr); EXPECT_EQ(2u, svmManager->getNumAllocs()); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto &residentAllocations = commandStreamReceiver.getResidencyAllocations(); EXPECT_EQ(0u, residentAllocations.size()); svmManager->makeInternalAllocationsResident(commandStreamReceiver, InternalMemoryType::DEVICE_UNIFIED_MEMORY); //only unified memory allocation is made resident EXPECT_EQ(1u, residentAllocations.size()); EXPECT_EQ(residentAllocations[0]->getGpuAddress(), castToUint64(unifiedMemoryPtr)); svmManager->freeSVMAlloc(unifiedMemoryPtr); } HWTEST_F(EnqueueSvmTest, whenInternalAllocationsAreAddedToResidencyContainerThenOnlyExpectedAllocationsAreAdded) { SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties; unifiedMemoryProperties.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; auto allocationSize = 4096u; auto svmManager = this->context->getSVMAllocsManager(); EXPECT_NE(0u, svmManager->getNumAllocs()); auto unifiedMemoryPtr = svmManager->createUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), allocationSize, unifiedMemoryProperties); EXPECT_NE(nullptr, unifiedMemoryPtr); EXPECT_EQ(2u, svmManager->getNumAllocs()); ResidencyContainer residencyContainer; EXPECT_EQ(0u, residencyContainer.size()); svmManager->addInternalAllocationsToResidencyContainer(residencyContainer, InternalMemoryType::DEVICE_UNIFIED_MEMORY); //only unified memory allocation is added to residency container EXPECT_EQ(1u, residencyContainer.size()); EXPECT_EQ(residencyContainer[0]->getGpuAddress(), castToUint64(unifiedMemoryPtr)); svmManager->freeSVMAlloc(unifiedMemoryPtr); } HWTEST_F(EnqueueSvmTest, GivenDstHostPtrWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) { char dstHostPtr[260]; void *pDstSVM = dstHostPtr; void *pSrcSVM = ptrSVM; MockCommandQueueHw cmdQ(context, pClDevice, nullptr); auto failCsr = std::make_unique>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex()); CommandStreamReceiver *oldCommandStreamReceiver = cmdQ.gpgpuEngine->commandStreamReceiver; cmdQ.gpgpuEngine->commandStreamReceiver = failCsr.get(); retVal = cmdQ.enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); cmdQ.gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver; } HWTEST_F(EnqueueSvmTest, GivenSrcHostPtrAndSizeZeroWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) { char srcHostPtr[260]; void *pDstSVM = ptrSVM; void *pSrcSVM = srcHostPtr; MockCommandQueueHw cmdQ(context, pClDevice, nullptr); auto failCsr = std::make_unique>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex()); CommandStreamReceiver *oldCommandStreamReceiver = cmdQ.gpgpuEngine->commandStreamReceiver; cmdQ.gpgpuEngine->commandStreamReceiver = failCsr.get(); retVal = cmdQ.enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); cmdQ.gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver; } HWTEST_F(EnqueueSvmTest, givenDstHostPtrAndSrcHostPtrWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) { char dstHostPtr[260]; char srcHostPtr[260]; void *pDstSVM = dstHostPtr; void *pSrcSVM = srcHostPtr; MockCommandQueueHw cmdQ(context, pClDevice, nullptr); auto failCsr = std::make_unique>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex()); CommandStreamReceiver *oldCommandStreamReceiver = cmdQ.gpgpuEngine->commandStreamReceiver; cmdQ.gpgpuEngine->commandStreamReceiver = failCsr.get(); retVal = cmdQ.enqueueSVMMemcpy( false, // cl_bool blocking_copy pDstSVM, // void *dst_ptr pSrcSVM, // const void *src_ptr 256, // size_t size 0, // cl_uint num_events_in_wait_list nullptr, // cl_evebt *event_wait_list nullptr // cL_event *event ); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); cmdQ.gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver; } TEST_F(EnqueueSvmTest, givenPageFaultManagerWhenEnqueueMemcpyThenAllocIsDecommitted) { auto mockMemoryManager = std::make_unique(); mockMemoryManager->pageFaultManager.reset(new MockPageFaultManager()); auto memoryManager = context->getMemoryManager(); context->memoryManager = mockMemoryManager.get(); auto srcSvm = context->getSVMAllocsManager()->createSVMAlloc(pDevice->getRootDeviceIndex(), 256, {}); mockMemoryManager->getPageFaultManager()->insertAllocation(srcSvm, 256, context->getSVMAllocsManager(), context->getSpecialQueue()); mockMemoryManager->getPageFaultManager()->insertAllocation(ptrSVM, 256, context->getSVMAllocsManager(), context->getSpecialQueue()); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->transferToCpuCalled, 2); this->pCmdQ->enqueueSVMMemcpy(false, ptrSVM, srcSvm, 256, 0, nullptr, nullptr); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->allowMemoryAccessCalled, 0); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->protectMemoryCalled, 2); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->transferToCpuCalled, 2); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->transferToGpuCalled, 2); context->getSVMAllocsManager()->freeSVMAlloc(srcSvm); context->memoryManager = memoryManager; } TEST_F(EnqueueSvmTest, givenPageFaultManagerWhenEnqueueMemFillThenAllocIsDecommitted) { char pattern[256]; auto mockMemoryManager = std::make_unique(); mockMemoryManager->pageFaultManager.reset(new MockPageFaultManager()); auto memoryManager = context->getMemoryManager(); context->memoryManager = mockMemoryManager.get(); mockMemoryManager->getPageFaultManager()->insertAllocation(ptrSVM, 256, context->getSVMAllocsManager(), context->getSpecialQueue()); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->transferToCpuCalled, 1); pCmdQ->enqueueSVMMemFill(ptrSVM, &pattern, 256, 256, 0, nullptr, nullptr); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->allowMemoryAccessCalled, 0); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->protectMemoryCalled, 1); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->transferToCpuCalled, 1); EXPECT_EQ(static_cast(mockMemoryManager->getPageFaultManager())->transferToGpuCalled, 1); context->memoryManager = memoryManager; } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_thread_tests.cpp000066400000000000000000000366111363734646600314070ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/helpers/aligned_memory.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" using namespace NEO; namespace ULT { template class CommandStreamReceiverMock : public UltCommandStreamReceiver { private: std::vector toFree; // pointers to be freed on destruction Device *pDevice; ClDevice *pClDevice; public: size_t expectedToFreeCount = (size_t)-1; CommandStreamReceiverMock(Device *pDevice) : UltCommandStreamReceiver(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex()) { this->pDevice = pDevice; this->pClDevice = pDevice->getSpecializedDevice(); } bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override { EXPECT_NE(nullptr, batchBuffer.commandBufferAllocation->getUnderlyingBuffer()); toFree.push_back(batchBuffer.commandBufferAllocation); batchBuffer.stream->replaceBuffer(nullptr, 0); batchBuffer.stream->replaceGraphicsAllocation(nullptr); EXPECT_TRUE(this->ownershipMutex.try_lock()); this->ownershipMutex.unlock(); return true; } ~CommandStreamReceiverMock() override { EXPECT_FALSE(pClDevice->hasOwnership()); if (expectedToFreeCount == (size_t)-1) { EXPECT_GT(toFree.size(), 0u); //make sure flush was called } else { EXPECT_EQ(toFree.size(), expectedToFreeCount); } auto memoryManager = this->getMemoryManager(); //Now free memory. if CQ/CSR did the same, we will hit double-free for (auto p : toFree) memoryManager->freeGraphicsMemory(p); } }; struct EnqueueThreadingFixture : public DeviceFixture { void SetUp() { DeviceFixture::SetUp(); context = new MockContext(pClDevice); pCmdQ = nullptr; } void TearDown() { delete pCmdQ; context->release(); DeviceFixture::TearDown(); } template class MyCommandQueue : public CommandQueueHw { public: MyCommandQueue(Context *context, ClDevice *device, const cl_queue_properties *props) : CommandQueueHw(context, device, props, false), kernel(nullptr) { } static CommandQueue *create(Context *context, ClDevice *device, cl_command_queue_properties props) { const cl_queue_properties properties[3] = {CL_QUEUE_PROPERTIES, props, 0}; return new MyCommandQueue(context, device, properties); } protected: ~MyCommandQueue() override { if (kernel) { EXPECT_FALSE(kernel->hasOwnership()); } } void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &multiDispatchInfo) override { for (auto &dispatchInfo : multiDispatchInfo) { auto &kernel = *dispatchInfo.getKernel(); EXPECT_TRUE(kernel.hasOwnership()); } } Kernel *kernel; }; CommandQueue *pCmdQ; MockContext *context; template void createCQ() { pCmdQ = MyCommandQueue::create(context, pClDevice, 0); ASSERT_NE(nullptr, pCmdQ); auto pCommandStreamReceiver = new CommandStreamReceiverMock(pDevice); pDevice->resetCommandStreamReceiver(pCommandStreamReceiver); } }; typedef Test EnqueueThreading; HWTEST_F(EnqueueThreading, enqueueReadBuffer) { createCQ(); cl_int retVal; std::unique_ptr buffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); void *ptr = ::alignedMalloc(1024u, 4096); ASSERT_NE(nullptr, ptr); buffer->forceDisallowCPUCopy = true; pCmdQ->enqueueReadBuffer(buffer.get(), true, 0, 1024u, ptr, nullptr, 0, nullptr, nullptr); alignedFree(ptr); } HWTEST_F(EnqueueThreading, enqueueWriteBuffer) { createCQ(); cl_int retVal; std::unique_ptr buffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); void *ptr = ::alignedMalloc(1024u, 4096); ASSERT_NE(nullptr, ptr); buffer->forceDisallowCPUCopy = true; pCmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1024u, ptr, nullptr, 0, nullptr, nullptr); alignedFree(ptr); } HWTEST_F(EnqueueThreading, enqueueCopyBuffer) { createCQ(); cl_int retVal; std::unique_ptr srcBuffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); ASSERT_NE(nullptr, srcBuffer.get()); std::unique_ptr dstBuffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); ASSERT_NE(nullptr, dstBuffer.get()); pCmdQ->enqueueCopyBuffer(srcBuffer.get(), dstBuffer.get(), 0, 0, 1024u, 0, nullptr, nullptr); } HWTEST_F(EnqueueThreading, enqueueCopyBufferRect) { createCQ(); cl_int retVal; std::unique_ptr srcBuffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); ASSERT_NE(nullptr, srcBuffer.get()); std::unique_ptr dstBuffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); ASSERT_NE(nullptr, dstBuffer.get()); size_t srcOrigin[3] = {1024u, 1, 0}; size_t dstOrigin[3] = {1024u, 1, 0}; size_t region[3] = {1024u, 1, 1}; pCmdQ->enqueueCopyBufferRect(srcBuffer.get(), dstBuffer.get(), srcOrigin, dstOrigin, region, 0, 0, 0, 0, 0, nullptr, nullptr); } HWTEST_F(EnqueueThreading, enqueueCopyBufferToImage) { createCQ(); cl_int retVal; std::unique_ptr srcBuffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); ASSERT_NE(nullptr, srcBuffer.get()); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc; memset(&imageDesc, 0, sizeof(imageDesc)); imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 1024u; cl_mem_flags flags = CL_MEM_WRITE_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); std::unique_ptr dstImage(Image::create(context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, dstImage.get()); size_t dstOrigin[3] = {1024u, 1, 0}; size_t region[3] = {1024u, 1, 1}; pCmdQ->enqueueCopyBufferToImage(srcBuffer.get(), dstImage.get(), 0, dstOrigin, region, 0, nullptr, nullptr); } HWTEST_F(EnqueueThreading, enqueueCopyImage) { createCQ(); cl_int retVal; cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc; memset(&imageDesc, 0, sizeof(imageDesc)); imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 1024u; cl_mem_flags flags = CL_MEM_WRITE_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); std::unique_ptr srcImage(Image::create(context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, srcImage.get()); std::unique_ptr dstImage(Image::create(context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, srcImage.get()); size_t srcOrigin[3] = {1024u, 1, 0}; size_t dstOrigin[3] = {1024u, 1, 0}; size_t region[3] = {1024u, 1, 1}; pCmdQ->enqueueCopyImage(srcImage.get(), dstImage.get(), srcOrigin, dstOrigin, region, 0, nullptr, nullptr); } HWTEST_F(EnqueueThreading, enqueueCopyImageToBuffer) { createCQ(); cl_int retVal; cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc; memset(&imageDesc, 0, sizeof(imageDesc)); imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 1024u; cl_mem_flags flags = CL_MEM_WRITE_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); std::unique_ptr srcImage(Image::create(context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, srcImage.get()); std::unique_ptr dstBuffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); ASSERT_NE(nullptr, dstBuffer.get()); size_t srcOrigin[3] = {1024u, 1, 0}; size_t region[3] = {1024u, 1, 1}; pCmdQ->enqueueCopyImageToBuffer(srcImage.get(), dstBuffer.get(), srcOrigin, region, 0, 0, nullptr, nullptr); } HWTEST_F(EnqueueThreading, enqueueFillBuffer) { createCQ(); cl_int retVal; std::unique_ptr buffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); cl_int pattern = 0xDEADBEEF; pCmdQ->enqueueFillBuffer(buffer.get(), &pattern, sizeof(pattern), 0, 1024u, 0, nullptr, nullptr); } HWTEST_F(EnqueueThreading, enqueueFillImage) { createCQ(); cl_int retVal; cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc; memset(&imageDesc, 0, sizeof(imageDesc)); imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 1024u; cl_mem_flags flags = CL_MEM_WRITE_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); std::unique_ptr image(Image::create(context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, image.get()); size_t origin[3] = {1024u, 1, 0}; size_t region[3] = {1024u, 1, 1}; int32_t fillColor[4] = {0xCC, 0xCC, 0xCC, 0xCC}; pCmdQ->enqueueFillImage(image.get(), &fillColor, origin, region, 0, nullptr, nullptr); } HWTEST_F(EnqueueThreading, enqueueReadBufferRect) { createCQ(); cl_int retVal; std::unique_ptr buffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); void *ptr = ::alignedMalloc(1024u, 4096); ASSERT_NE(nullptr, ptr); size_t bufferOrigin[3] = {1024u, 1, 0}; size_t hostOrigin[3] = {1024u, 1, 0}; size_t region[3] = {1024u, 1, 1}; pCmdQ->enqueueReadBufferRect(buffer.get(), CL_TRUE, bufferOrigin, hostOrigin, region, 0, 0, 0, 0, ptr, 0, nullptr, nullptr); ::alignedFree(ptr); } HWTEST_F(EnqueueThreading, enqueueReadImage) { createCQ(); cl_int retVal; cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc; memset(&imageDesc, 0, sizeof(imageDesc)); imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 1024u; cl_mem_flags flags = CL_MEM_WRITE_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); std::unique_ptr image(Image::create(context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, image.get()); void *ptr = ::alignedMalloc(1024u, 4096); ASSERT_NE(nullptr, ptr); size_t origin[3] = {1024u, 1, 0}; size_t region[3] = {1024u, 1, 1}; pCmdQ->enqueueReadImage(image.get(), CL_TRUE, origin, region, 0, 0, ptr, nullptr, 0, nullptr, nullptr); ::alignedFree(ptr); } HWTEST_F(EnqueueThreading, enqueueWriteBufferRect) { createCQ(); cl_int retVal; std::unique_ptr buffer(Buffer::create(context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); size_t bufferOrigin[3] = {1024u, 1, 0}; size_t hostOrigin[3] = {1024u, 1, 0}; size_t region[3] = {1024u, 1, 1}; auto hostPtrSize = Buffer::calculateHostPtrSize(hostOrigin, region, 0, 0); void *ptr = ::alignedMalloc(hostPtrSize, MemoryConstants::pageSize); ASSERT_NE(nullptr, ptr); pCmdQ->enqueueWriteBufferRect(buffer.get(), CL_TRUE, bufferOrigin, hostOrigin, region, 0, 0, 0, 0, ptr, 0, nullptr, nullptr); ::alignedFree(ptr); } HWTEST_F(EnqueueThreading, enqueueWriteImage) { createCQ(); cl_int retVal; cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc; memset(&imageDesc, 0, sizeof(imageDesc)); imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 1024u; cl_mem_flags flags = CL_MEM_READ_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); std::unique_ptr image(Image::create(context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, image.get()); void *ptr = ::alignedMalloc(1024u, 4096); ASSERT_NE(nullptr, ptr); size_t origin[3] = {1024u, 1, 0}; size_t region[3] = {1024u, 1, 1}; pCmdQ->enqueueWriteImage(image.get(), CL_TRUE, origin, region, 0, 0, ptr, nullptr, 0, nullptr, nullptr); ::alignedFree(ptr); } HWTEST_F(EnqueueThreading, finish) { createCQ(); // set something to finish pCmdQ->taskCount = 1; pCmdQ->taskLevel = 1; auto csr = (CommandStreamReceiverMock *)&this->pCmdQ->getGpgpuCommandStreamReceiver(); csr->expectedToFreeCount = 0u; pCmdQ->finish(); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_unmap_memobject_tests.cpp000066400000000000000000000244161363734646600333050ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "test.h" #include using namespace NEO; struct EnqueueUnmapMemObjTest : public DeviceFixture, public CommandQueueHwFixture, public ::testing::Test { typedef CommandQueueHwFixture CommandQueueFixture; EnqueueUnmapMemObjTest() { } void SetUp() override { DeviceFixture::SetUp(); CommandQueueFixture::SetUp(pClDevice, 0); BufferDefaults::context = new MockContext; buffer = BufferHelper>::create(); mappedPtr = pCmdQ->enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, 8, 0, nullptr, nullptr, retVal); } void TearDown() override { delete buffer; delete BufferDefaults::context; CommandQueueFixture::TearDown(); DeviceFixture::TearDown(); } cl_int retVal = CL_SUCCESS; Buffer *buffer = nullptr; void *mappedPtr; }; TEST_F(EnqueueUnmapMemObjTest, GivenValidParamsWhenUnmappingMemoryObjectThenSuccessIsReturned) { auto retVal = pCmdQ->enqueueUnmapMemObject( buffer, mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EnqueueUnmapMemObjTest, GivenPointerToEventThenUnmappingMemoryObjectThenEventIsReturned) { cl_event event = nullptr; auto retVal = pCmdQ->enqueueUnmapMemObject( buffer, mappedPtr, 0, nullptr, &event); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel); // Check CL_EVENT_COMMAND_TYPE { cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_UNMAP_MEM_OBJECT), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); } delete pEvent; } TEST_F(EnqueueUnmapMemObjTest, WhenUnmappingMemoryObjectThenReturnedEventHasGreaterThanOrEqualTaskLevelThanParentEvent) { uint32_t taskLevelCmdQ = 17; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; auto taskLevelMax = std::max({taskLevelCmdQ, taskLevelEvent1, taskLevelEvent2}); pCmdQ->taskLevel = taskLevelCmdQ; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 15); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 16); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto retVal = pCmdQ->enqueueUnmapMemObject( buffer, mappedPtr, numEventsInWaitList, eventWaitList, &event); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_GE(pEvent->taskLevel, taskLevelMax); delete pEvent; } HWTEST_F(EnqueueUnmapMemObjTest, WhenUnmappingMemoryObjectThenEventIsUpdated) { cl_event eventReturned = NULL; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.taskCount = 100; retVal = pCmdQ->enqueueUnmapMemObject( buffer, mappedPtr, 0, nullptr, &eventReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, eventReturned); auto eventObject = castToObject(eventReturned); EXPECT_EQ(0u, eventObject->peekTaskCount()); EXPECT_TRUE(eventObject->updateStatusAndCheckCompletion()); clReleaseEvent(eventReturned); } TEST_F(EnqueueUnmapMemObjTest, WhenUnmappingMemoryObjectThenWaitEventIsUpdated) { cl_event waitEvent = nullptr; cl_event retEvent = nullptr; auto buffer = clCreateBuffer( BufferDefaults::context, CL_MEM_READ_WRITE, 20, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); auto ptrResult = clEnqueueMapBuffer( pCmdQ, buffer, CL_FALSE, CL_MAP_READ, 0, 8, 0, nullptr, &waitEvent, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, ptrResult); EXPECT_NE(nullptr, waitEvent); retVal = clEnqueueUnmapMemObject( pCmdQ, buffer, ptrResult, 1, &waitEvent, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, retEvent); Event *wEvent = castToObject(waitEvent); EXPECT_EQ(CL_COMPLETE, wEvent->peekExecutionStatus()); Event *rEvent = castToObject(retEvent); EXPECT_EQ(CL_COMPLETE, rEvent->peekExecutionStatus()); retVal = clWaitForEvents(1, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseEvent(waitEvent); clReleaseEvent(retEvent); } HWTEST_F(EnqueueUnmapMemObjTest, givenEnqueueUnmapMemObjectWhenNonAubWritableBufferObjectMappedToHostPtrForWritingThenItShouldBeResetToAubAndTbxWritable) { auto buffer = std::unique_ptr(BufferHelper<>::create()); ASSERT_NE(nullptr, buffer); buffer->getGraphicsAllocation()->setAubWritable(false, GraphicsAllocation::defaultBank); buffer->getGraphicsAllocation()->setTbxWritable(false, GraphicsAllocation::defaultBank); auto mappedForWritingPtr = pCmdQ->enqueueMapBuffer(buffer.get(), CL_TRUE, CL_MAP_WRITE, 0, 8, 0, nullptr, nullptr, retVal); ASSERT_NE(nullptr, mappedForWritingPtr); retVal = pCmdQ->enqueueUnmapMemObject( buffer.get(), mappedForWritingPtr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(buffer->getGraphicsAllocation()->isAubWritable(GraphicsAllocation::defaultBank)); EXPECT_TRUE(buffer->getGraphicsAllocation()->isTbxWritable(GraphicsAllocation::defaultBank)); } HWTEST_F(EnqueueUnmapMemObjTest, givenWriteBufferIsServicedOnCPUWhenBufferIsNonAubTbxWriteableThanFlagsChange) { DebugManagerStateRestore restorer; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1); auto buffer = std::unique_ptr(BufferHelper<>::create()); ASSERT_NE(nullptr, buffer); buffer->getGraphicsAllocation()->setAubWritable(false, GraphicsAllocation::defaultBank); buffer->getGraphicsAllocation()->setTbxWritable(false, GraphicsAllocation::defaultBank); EXPECT_FALSE(buffer->getGraphicsAllocation()->isAubWritable(GraphicsAllocation::defaultBank)); EXPECT_FALSE(buffer->getGraphicsAllocation()->isTbxWritable(GraphicsAllocation::defaultBank)); auto ptr = allocateAlignedMemory(buffer->getSize(), MemoryConstants::cacheLineSize); retVal = pCmdQ->enqueueWriteBuffer(buffer.get(), true, 0u, buffer->getSize(), ptr.get(), nullptr, 0u, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, memcmp(ptr.get(), buffer->getGraphicsAllocation()->getUnderlyingBuffer(), buffer->getSize())); EXPECT_TRUE(buffer->getGraphicsAllocation()->isAubWritable(GraphicsAllocation::defaultBank)); EXPECT_TRUE(buffer->getGraphicsAllocation()->isTbxWritable(GraphicsAllocation::defaultBank)); } HWTEST_F(EnqueueUnmapMemObjTest, givenMemObjWhenUnmappingThenSetAubWritableBeforeEnqueueWrite) { DebugManagerStateRestore restore; DebugManager.flags.DisableZeroCopyForBuffers.set(true); auto buffer = std::unique_ptr(BufferHelper<>::create()); auto image = std::unique_ptr(Image2dHelper<>::create(BufferDefaults::context)); class MyMockCommandQueue : public MockCommandQueue { public: using MockCommandQueue::MockCommandQueue; cl_int enqueueWriteBuffer(Buffer *buffer, cl_bool blockingWrite, size_t offset, size_t cb, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { EXPECT_TRUE(buffer->getMapAllocation()->isAubWritable(GraphicsAllocation::defaultBank)); EXPECT_TRUE(buffer->getMapAllocation()->isTbxWritable(GraphicsAllocation::defaultBank)); return CL_SUCCESS; } cl_int enqueueWriteImage(Image *dstImage, cl_bool blockingWrite, const size_t *origin, const size_t *region, size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { EXPECT_TRUE(dstImage->getMapAllocation()->isAubWritable(GraphicsAllocation::defaultBank)); EXPECT_TRUE(dstImage->getMapAllocation()->isTbxWritable(GraphicsAllocation::defaultBank)); return CL_SUCCESS; } }; MyMockCommandQueue myMockCmdQ(BufferDefaults::context, pClDevice, nullptr); { auto mapPtr = myMockCmdQ.enqueueMapBuffer(buffer.get(), CL_TRUE, CL_MAP_WRITE, 0, 8, 0, nullptr, nullptr, retVal); buffer->getMapAllocation()->setAubWritable(false, GraphicsAllocation::defaultBank); buffer->getMapAllocation()->setTbxWritable(false, GraphicsAllocation::defaultBank); myMockCmdQ.enqueueUnmapMemObject(buffer.get(), mapPtr, 0, nullptr, nullptr); } { size_t region[] = {1, 0, 0}; size_t origin[] = {0, 0, 0}; auto mapPtr = myMockCmdQ.enqueueMapImage(image.get(), CL_TRUE, CL_MAP_WRITE, origin, region, nullptr, nullptr, 0, nullptr, nullptr, retVal); image->getMapAllocation()->setAubWritable(false, GraphicsAllocation::defaultBank); image->getMapAllocation()->setTbxWritable(false, GraphicsAllocation::defaultBank); myMockCmdQ.enqueueUnmapMemObject(image.get(), mapPtr, 0, nullptr, nullptr); } } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_waitlist_tests.cpp000066400000000000000000000240621363734646600317750ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" class clEventWrapper { public: clEventWrapper() { mMem = NULL; } clEventWrapper(cl_event mem) { mMem = mem; } clEventWrapper(const clEventWrapper &rhs) : mMem(rhs.mMem) { if (mMem != NULL) clRetainEvent(mMem); } ~clEventWrapper() { if (mMem != NULL) clReleaseEvent(mMem); } clEventWrapper &operator=(const cl_event &rhs) { mMem = rhs; return *this; } clEventWrapper &operator=(clEventWrapper rhs) { std::swap(mMem, rhs.mMem); return *this; } operator cl_event() const { return mMem; } cl_event *operator&() { return &mMem; } bool operator==(const cl_event &rhs) { return mMem == rhs; } protected: cl_event mMem; }; using namespace NEO; namespace ULT { struct EnqueueWaitlistTest; typedef HelloWorldTestWithParam EnqueueWaitlistFixture; typedef void (*ExecuteEnqueue)(EnqueueWaitlistTest *, uint32_t /*cl_uint*/, cl_event *, cl_event *, bool); struct EnqueueWaitlistTest : public EnqueueWaitlistFixture, public ::testing::TestWithParam { public: typedef CommandQueueHwFixture CommandQueueFixture; using CommandQueueHwFixture::pCmdQ; EnqueueWaitlistTest(void) { buffer = nullptr; } void SetUp() override { EnqueueWaitlistFixture::SetUp(); buffer = BufferHelper<>::create(); bufferNonZeroCopy = new UnalignedBuffer; image = Image1dHelper<>::create(BufferDefaults::context); imageNonZeroCopy = ImageHelper>::create(BufferDefaults::context); } void TearDown() override { buffer->decRefInternal(); bufferNonZeroCopy->decRefInternal(); image->decRefInternal(); imageNonZeroCopy->decRefInternal(); EnqueueWaitlistFixture::TearDown(); } cl_int retVal = CL_SUCCESS; cl_int error = CL_SUCCESS; Buffer *buffer; Buffer *bufferNonZeroCopy; Image *image; Image *imageNonZeroCopy; void test_error(cl_int error, std::string str) { EXPECT_EQ(CL_SUCCESS, error) << str << std::endl; } static void EnqueueNDRange(EnqueueWaitlistTest *test, cl_uint numWaits, cl_event *waits, cl_event *outEvent, bool blocking = false) { size_t threadNum = 10; size_t threads[1] = {threadNum}; cl_int error = clEnqueueNDRangeKernel(test->pCmdQ, test->pKernel, 1, NULL, threads, threads, numWaits, waits, outEvent); test->test_error(error, "Unable to execute kernel"); return; } static void EnqueueMapBuffer(EnqueueWaitlistTest *test, cl_uint numWaits, cl_event *waits, cl_event *outEvent, bool blocking = false) { cl_int error; void *mappedPtr = clEnqueueMapBuffer(test->pCmdQ, test->buffer, blocking ? CL_TRUE : CL_FALSE, CL_MAP_READ, 0, test->buffer->getSize(), numWaits, waits, outEvent, &error); EXPECT_NE(nullptr, mappedPtr); test->test_error(error, "Unable to enqueue buffer map"); error = clEnqueueUnmapMemObject(test->pCmdQ, test->buffer, mappedPtr, 0, nullptr, nullptr); return; } static void TwoEnqueueMapBuffer(EnqueueWaitlistTest *test, cl_uint numWaits, cl_event *waits, cl_event *outEvent, bool blocking = false) { cl_int error; void *mappedPtr = clEnqueueMapBuffer(test->pCmdQ, test->buffer, blocking ? CL_TRUE : CL_FALSE, CL_MAP_READ, 0, test->buffer->getSize(), numWaits, waits, outEvent, &error); EXPECT_NE(nullptr, mappedPtr); test->test_error(error, "Unable to enqueue buffer map"); void *mappedPtr2 = clEnqueueMapBuffer(test->pCmdQ, test->bufferNonZeroCopy, blocking ? CL_TRUE : CL_FALSE, CL_MAP_READ, 0, test->bufferNonZeroCopy->getSize(), 0, nullptr, nullptr, &error); EXPECT_NE(nullptr, mappedPtr2); test->test_error(error, "Unable to enqueue buffer map"); error = clEnqueueUnmapMemObject(test->pCmdQ, test->buffer, mappedPtr, 0, nullptr, nullptr); error = clEnqueueUnmapMemObject(test->pCmdQ, test->bufferNonZeroCopy, mappedPtr2, 0, nullptr, nullptr); return; } static void EnqueueUnMapBuffer(EnqueueWaitlistTest *test, cl_uint numWaits, cl_event *waits, cl_event *outEvent, bool blocking = false) { cl_int error; void *mappedPtr = clEnqueueMapBuffer(test->pCmdQ, test->buffer, CL_TRUE, CL_MAP_READ, 0, test->buffer->getSize(), 0, nullptr, nullptr, &error); EXPECT_NE(nullptr, mappedPtr); ASSERT_NE(test->buffer, nullptr); error = clEnqueueUnmapMemObject(test->pCmdQ, test->buffer, mappedPtr, numWaits, waits, outEvent); test->test_error(error, "Unable to unmap buffer"); return; } static void EnqueueMapImage(EnqueueWaitlistTest *test, cl_uint numWaits, cl_event *waits, cl_event *outEvent, bool blocking = false) { cl_int error; cl_image_desc desc = test->image->getImageDesc(); size_t origin[3] = {0, 0, 0}, region[3] = {desc.image_width, desc.image_height, 1}; size_t outPitch; void *mappedPtr = clEnqueueMapImage(test->pCmdQ, test->image, blocking ? CL_TRUE : CL_FALSE, CL_MAP_READ, origin, region, &outPitch, NULL, numWaits, waits, outEvent, &error); test->test_error(error, "Unable to enqueue image map"); EXPECT_NE(nullptr, mappedPtr); test->test_error(error, "Unable to enqueue buffer map"); error = clEnqueueUnmapMemObject(test->pCmdQ, test->image, mappedPtr, 0, nullptr, nullptr); return; } static void TwoEnqueueMapImage(EnqueueWaitlistTest *test, cl_uint numWaits, cl_event *waits, cl_event *outEvent, bool blocking = false) { cl_int error; cl_image_desc desc = test->image->getImageDesc(); size_t origin[3] = {0, 0, 0}, region[3] = {desc.image_width, desc.image_height, 1}; size_t outPitch; size_t origin2[3] = {0, 0, 0}, region2[3] = {desc.image_width, desc.image_height, 1}; size_t outPitch2; void *mappedPtr = clEnqueueMapImage(test->pCmdQ, test->image, blocking ? CL_TRUE : CL_FALSE, CL_MAP_READ, origin, region, &outPitch, NULL, numWaits, waits, outEvent, &error); test->test_error(error, "Unable to enqueue image map"); EXPECT_NE(nullptr, mappedPtr); test->test_error(error, "Unable to enqueue buffer map"); void *mappedPtr2 = clEnqueueMapImage(test->pCmdQ, test->imageNonZeroCopy, blocking ? CL_TRUE : CL_FALSE, CL_MAP_READ, origin2, region2, &outPitch2, NULL, 0, nullptr, nullptr, &error); test->test_error(error, "Unable to enqueue image map"); EXPECT_NE(nullptr, mappedPtr2); test->test_error(error, "Unable to enqueue buffer map"); error = clEnqueueUnmapMemObject(test->pCmdQ, test->image, mappedPtr, 0, nullptr, nullptr); error = clEnqueueUnmapMemObject(test->pCmdQ, test->imageNonZeroCopy, mappedPtr2, 0, nullptr, nullptr); return; } }; TEST_P(EnqueueWaitlistTest, BlockingWaitlist) { // Set up a user event, which we use as a gate for the second event clEventWrapper gateEvent = clCreateUserEvent(context, &error); test_error(error, "Unable to set up user gate event"); // Set up the execution of the action with its actual event clEventWrapper actualEvent; // call the function to execute GetParam()(this, 1, &gateEvent, &actualEvent, false); // Now release the user event, which will allow our actual action to run error = clSetUserEventStatus(gateEvent, CL_COMPLETE); test_error(error, "Unable to trigger gate event"); // Now we wait for completion. Note that we can actually wait on the event itself, at least at first error = clWaitForEvents(1, &actualEvent); test_error(error, "Unable to wait for actual test event"); } typedef EnqueueWaitlistTest EnqueueWaitlistTestTwoMapEnqueues; TEST_P(EnqueueWaitlistTestTwoMapEnqueues, TestPreviousVirtualEvent) { // Set up a user event, which we use as a gate for the second event clEventWrapper gateEvent = clCreateUserEvent(context, &error); test_error(error, "Unable to set up user gate event"); // Set up the execution of the action with its actual event clEventWrapper actualEvent; // call the function to execute GetParam()(this, 1, &gateEvent, &actualEvent, false); // Now release the user event, which will allow our actual action to run error = clSetUserEventStatus(gateEvent, CL_COMPLETE); // Now we wait for completion. Note that we can actually wait on the event itself, at least at first error = clWaitForEvents(1, &actualEvent); test_error(error, "Unable to wait for actual test event"); } TEST_P(EnqueueWaitlistTest, BlockingWaitlistNoOutEvent) { // Set up a user event, which we use as a gate for the second event clEventWrapper gateEvent = clCreateUserEvent(context, &error); test_error(error, "Unable to set up user gate event"); // call the function to execute GetParam()(this, 1, &gateEvent, nullptr, false); // Now release the user event, which will allow our actual action to run error = clSetUserEventStatus(gateEvent, CL_COMPLETE); test_error(error, "Unable to trigger gate event"); // Now we wait for completion. Note that we can actually wait on the event itself, at least at first error = clFinish(pCmdQ); test_error(error, "Finish FAILED"); } ExecuteEnqueue Enqueues[] = { &EnqueueWaitlistTest::EnqueueNDRange, &EnqueueWaitlistTest::EnqueueMapBuffer, &EnqueueWaitlistTest::EnqueueUnMapBuffer, &EnqueueWaitlistTest::EnqueueMapImage}; ExecuteEnqueue TwoEnqueueMap[] = { &EnqueueWaitlistTest::TwoEnqueueMapBuffer, &EnqueueWaitlistTest::TwoEnqueueMapImage}; INSTANTIATE_TEST_CASE_P( UnblockedEvent, EnqueueWaitlistTest, ::testing::ValuesIn(Enqueues)); INSTANTIATE_TEST_CASE_P( TwoEnqueueMap, EnqueueWaitlistTestTwoMapEnqueues, ::testing::ValuesIn(TwoEnqueueMap)); } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_write_buffer_event_tests.cpp000066400000000000000000000310771363734646600340250ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/command_queue/buffer_operations_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "gtest/gtest.h" #include using namespace NEO; TEST_F(EnqueueWriteBufferTypeTest, eventShouldBeReturned) { cl_bool blockingWrite = CL_TRUE; size_t offset = 0; size_t size = sizeof(cl_float); cl_float pDestMemory[] = {0.0f, 0.0f, 0.0f, 0.0f}; cl_uint numEventsInWaitList = 0; cl_event *eventWaitList = nullptr; cl_event event = nullptr; auto retVal = CL_INVALID_VALUE; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); retVal = pCmdQ->enqueueWriteBuffer( srcBuffer.get(), blockingWrite, offset, size, pDestMemory, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel); // Check CL_EVENT_COMMAND_TYPE { cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_WRITE_BUFFER), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); } delete pEvent; } TEST_F(EnqueueWriteBufferTypeTest, eventReturnedShouldBeMaxOfInputEventsAndCmdQPlus1) { uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_bool blockingWrite = CL_TRUE; size_t offset = 0; size_t size = sizeof(cl_float); cl_float pDestMemory[] = {0.0f, 0.0f, 0.0f, 0.0f}; cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); auto retVal = pCmdQ->enqueueWriteBuffer( srcBuffer.get(), blockingWrite, offset, size, pDestMemory, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_LE(19u, pEvent->taskLevel); delete pEvent; } TEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndForcedCpuCopyOnWriteBufferAndDstPtrEqualSrcPtrWithEventsNotBlockedWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1); cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_bool blockingRead = CL_TRUE; size_t size = sizeof(cl_float); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); retVal = pCmdQ->enqueueWriteBuffer(srcBuffer.get(), blockingRead, 0, size, ptr, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u, pEvent->taskLevel); EXPECT_EQ(17u, pCmdQ->taskLevel); pEvent->release(); } TEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndForcedCpuCopyOnWriteBufferAndEventNotReadyWhenWriteBufferIsExecutedThenTaskLevelShouldBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1); cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::levelNotReady, 4); cl_bool blockingWrite = CL_FALSE; size_t size = sizeof(cl_float); cl_event eventWaitList[] = {&event1}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); cl_float mem[4]; retVal = pCmdQ->enqueueWriteBuffer(srcBuffer.get(), blockingWrite, 0, size, mem, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(CompletionStamp::levelNotReady, pEvent->taskLevel); EXPECT_EQ(CompletionStamp::levelNotReady, pCmdQ->taskLevel); event1.taskLevel = 20; event1.setStatus(CL_COMPLETE); pEvent->updateExecutionStatus(); pCmdQ->isQueueBlocked(); pEvent->release(); } TEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndForcedCpuCopyOnWriteBufferAndDstPtrEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1); cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; cl_bool blockingRead = CL_TRUE; size_t size = sizeof(cl_float); cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); retVal = pCmdQ->enqueueWriteBuffer(srcBuffer.get(), blockingRead, 0, size, ptr, nullptr, 0, nullptr, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(17u, pEvent->taskLevel); EXPECT_EQ(17u, pCmdQ->taskLevel); pEvent->release(); } TEST_F(EnqueueWriteBufferTypeTest, givenOutOfOrderQueueAndForcedCpuCopyOnWriteBufferAndDstPtrEqualSrcPtrWithEventsWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1); std::unique_ptr pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdOOQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_bool blockingRead = CL_TRUE; size_t size = sizeof(cl_float); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); retVal = pCmdOOQ->enqueueWriteBuffer(srcBuffer.get(), blockingRead, 0, size, ptr, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u, pEvent->taskLevel); EXPECT_EQ(17u, pCmdOOQ->taskLevel); pEvent->release(); } TEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(0); cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_bool blockingRead = CL_TRUE; size_t size = sizeof(cl_float); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); retVal = pCmdQ->enqueueWriteBuffer(srcBuffer.get(), blockingRead, 0, size, ptr, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u, pEvent->taskLevel); EXPECT_EQ(19u, pCmdQ->taskLevel); pEvent->release(); } TEST_F(EnqueueWriteBufferTypeTest, givenOutOfOrderQueueAndDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrWithEventsWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(0); std::unique_ptr pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdOOQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_bool blockingRead = CL_TRUE; size_t size = sizeof(cl_float); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; auto srcBuffer = std::unique_ptr(BufferHelper<>::create()); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); retVal = pCmdOOQ->enqueueWriteBuffer(srcBuffer.get(), blockingRead, 0, size, ptr, nullptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = castToObject(event); if (pCmdOOQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { EXPECT_EQ(taskLevelEvent2 + 1, pCmdOOQ->taskLevel); EXPECT_EQ(taskLevelEvent2 + 1, pEvent->taskLevel); } else { EXPECT_EQ(19u, pCmdOOQ->taskLevel); EXPECT_EQ(19u, pEvent->taskLevel); } pEvent->release(); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_write_buffer_rect_fixture.h000066400000000000000000000047231363734646600336300ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/gen_common/gen_cmd_parse.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" namespace NEO { struct EnqueueWriteBufferRectTest : public CommandEnqueueFixture, public ::testing::Test { void SetUp() override { CommandEnqueueFixture::SetUp(); context.reset(new MockContext(pClDevice)); BufferDefaults::context = context.get(); //For 3D hostPtr = ::alignedMalloc(slicePitch * rowPitch, 4096); auto retVal = CL_INVALID_VALUE; buffer.reset(Buffer::create( context.get(), CL_MEM_READ_WRITE, slicePitch * rowPitch, nullptr, retVal)); nonZeroCopyBuffer.reset(BufferHelper>::create()); ASSERT_NE(nullptr, buffer.get()); } void TearDown() override { buffer.reset(); nonZeroCopyBuffer.reset(); ::alignedFree(hostPtr); context.reset(); CommandEnqueueFixture::TearDown(); } protected: template void enqueueWriteBufferRect2D(cl_bool blocking = CL_FALSE) { typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; auto retVal = pCmdQ->enqueueWriteBufferRect( buffer.get(), blocking, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, hostPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } std::unique_ptr context; std::unique_ptr buffer; std::unique_ptr nonZeroCopyBuffer; void *hostPtr = nullptr; static const size_t rowPitch = 100; static const size_t slicePitch = 100 * 100; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_write_buffer_rect_tests.cpp000066400000000000000000000616331363734646600336420ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/event/event.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/test/unit_test/command_queue/enqueue_write_buffer_rect_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "test.h" #include "reg_configs_common.h" using namespace NEO; HWTEST_F(EnqueueWriteBufferRectTest, GivenNullBufferWhenWritingBufferThenInvalidMemObjectErrorIsReturned) { auto retVal = CL_SUCCESS; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; retVal = clEnqueueWriteBufferRect( pCmdQ, nullptr, CL_FALSE, srcOrigin, dstOrigin, region, 10, 0, 10, 0, hostPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } HWTEST_F(EnqueueWriteBufferRectTest, GivenValidParamsWhenWritingBufferThenSuccessIsReturned) { auto retVal = CL_SUCCESS; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; retVal = clEnqueueWriteBufferRect( pCmdQ, buffer.get(), CL_TRUE, srcOrigin, dstOrigin, region, 10, 0, 10, 0, hostPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EnqueueWriteBufferRectTest, GivenBlockingEnqueueWhenWritingBufferThenTaskLevelIsNotIncremented) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; auto oldCsrTaskLevel = csr.peekTaskLevel(); enqueueWriteBufferRect2D(CL_TRUE); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(oldCsrTaskLevel, pCmdQ->taskLevel); } HWTEST_F(EnqueueWriteBufferRectTest, GivenNonBlockingEnqueueWhenWritingBufferThenTaskLevelIsIncremented) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; enqueueWriteBufferRect2D(CL_FALSE); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteBufferRectTest, Given2dRegionWhenWritingBufferThenCommandsAreProgrammedCorrectly) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; enqueueWriteBufferRect2D(); ASSERT_NE(cmdList.end(), itorWalker); auto *cmd = (GPGPU_WALKER *)*itorWalker; // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_EQ(1u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueWriteBufferRectTest, WhenWritingBufferThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; enqueueWriteBufferRect2D(); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueWriteBufferRectTest, WhenWritingBufferThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); enqueueWriteBufferRect2D(); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteBufferRectTest, WhenWritingBufferThenIndirectDataIsAdded) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); enqueueWriteBufferRect2D(); MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferRect, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcPtr = hostPtr; dc.dstMemObj = buffer.get(); dc.srcOffset = {0, 0, 0}; dc.dstOffset = {0, 0, 0}; dc.size = {50, 50, 1}; dc.srcRowPitch = rowPitch; dc.srcSlicePitch = slicePitch; dc.dstRowPitch = rowPitch; dc.dstSlicePitch = slicePitch; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); EXPECT_NE(dshBefore, pDSH->getUsed()); EXPECT_NE(iohBefore, pIOH->getUsed()); if (kernel->requiresSshForBuffers()) { EXPECT_NE(sshBefore, pSSH->getUsed()); } } HWTEST_F(EnqueueWriteBufferRectTest, WhenWritingBufferThenL3ProgrammingIsCorrect) { enqueueWriteBufferRect2D(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteBufferRectTest, When2DEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueWriteBufferRect2D(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteBufferRectTest, WhenWritingBufferThenMediaInterfaceDescriptorIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueWriteBufferRect2D(); // All state should be programmed before walker auto itorCmd = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorCmd; // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorCmd); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteBufferRectTest, WhenWritingBufferThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueWriteBufferRect2D(); // Extract the MIDL command auto itorCmd = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmdMIDL = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorCmd; // Extract the SBA command itorCmd = find(cmdList.begin(), itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmdSBA = (STATE_BASE_ADDRESS *)*itorCmd; // Extrach the DSH auto DSH = cmdSBA->getDynamicStateBaseAddress(); ASSERT_NE(0u, DSH); // IDD should be located within DSH auto iddStart = cmdMIDL->getInterfaceDescriptorDataStartAddress(); auto IDDEnd = iddStart + cmdMIDL->getInterfaceDescriptorTotalLength(); ASSERT_LE(IDDEnd, cmdSBA->getDynamicStateBufferSize() * MemoryConstants::pageSize); auto &IDD = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, IDD.getConstantIndirectUrbEntryReadLength()); } HWTEST_F(EnqueueWriteBufferRectTest, WhenWritingBufferThenOnePipelineSelectIsProgrammed) { enqueueWriteBufferRect2D(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteBufferRectTest, WhenWritingBufferThenMediaVfeStateIsCorrect) { enqueueWriteBufferRect2D(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } HWTEST_F(EnqueueWriteBufferRectTest, givenInOrderQueueAndDstPtrEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; void *ptr = buffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; retVal = pCmdQ->enqueueWriteBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueWriteBufferRectTest, givenOutOfOrderQueueAndDstPtrEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; std::unique_ptr pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); void *ptr = buffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; retVal = pCmdOOQ->enqueueWriteBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdOOQ->taskLevel, 0u); } HWTEST_F(EnqueueWriteBufferRectTest, givenInOrderQueueAndDstPtrEqualSrcPtrWithEventsWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; uint32_t taskLevelCmdQ = 17; pCmdQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; void *ptr = buffer->getCpuAddressForMemoryTransfer(); retVal = pCmdQ->enqueueWriteBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, numEventsInWaitList, eventWaitList, &event); ; EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u, pEvent->taskLevel); EXPECT_EQ(19u, pCmdQ->taskLevel); EXPECT_EQ(CL_COMMAND_WRITE_BUFFER_RECT, (const int)pEvent->getCommandType()); pEvent->release(); } HWTEST_F(EnqueueWriteBufferRectTest, givenOutOfOrderQueueAndDstPtrEqualSrcPtrWithEventsWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; std::unique_ptr pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); uint32_t taskLevelCmdQ = 17; pCmdOOQ->taskLevel = taskLevelCmdQ; uint32_t taskLevelEvent1 = 8; uint32_t taskLevelEvent2 = 19; Event event1(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4); Event event2(pCmdOOQ.get(), CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10); cl_event eventWaitList[] = { &event1, &event2}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; void *ptr = buffer->getCpuAddressForMemoryTransfer(); retVal = pCmdOOQ->enqueueWriteBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, numEventsInWaitList, eventWaitList, &event); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = (Event *)event; EXPECT_EQ(19u, pEvent->taskLevel); EXPECT_EQ(19u, pCmdOOQ->taskLevel); EXPECT_EQ(CL_COMMAND_WRITE_BUFFER_RECT, (const int)pEvent->getCommandType()); pEvent->release(); } HWTEST_F(EnqueueWriteBufferRectTest, givenInOrderQueueAndRowPitchEqualZeroAndDstPtrEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; void *ptr = buffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; retVal = pCmdQ->enqueueWriteBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, 0, slicePitch, 0, slicePitch, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueWriteBufferRectTest, givenInOrderQueueAndSlicePitchEqualZeroAndDstPtrEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; void *ptr = buffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; retVal = pCmdQ->enqueueWriteBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, 0, rowPitch, 0, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueWriteBufferRectTest, givenInOrderQueueAndMemObjWithOffsetPointTheSameStorageWithHostWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { cl_int retVal = CL_SUCCESS; void *ptr = buffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {50, 50, 0}; size_t hostOrigin[] = {20, 20, 0}; size_t region[] = {50, 50, 1}; size_t hostOffset = (bufferOrigin[2] - hostOrigin[2]) * slicePitch + (bufferOrigin[1] - hostOrigin[1]) * rowPitch + (bufferOrigin[0] - hostOrigin[0]); auto hostStorage = ptrOffset(ptr, hostOffset); retVal = pCmdQ->enqueueWriteBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, hostStorage, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueWriteBufferRectTest, givenInOrderQueueAndMemObjWithOffsetPointDiffrentStorageWithHostWhenWriteBufferIsExecutedThenTaskLevelShouldBeIncreased) { cl_int retVal = CL_SUCCESS; void *ptr = buffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {1, 1, 0}; size_t region[] = {1, 1, 1}; retVal = pCmdQ->enqueueWriteBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 1u); } HWTEST_F(EnqueueWriteBufferRectTest, givenInOrderQueueAndDstPtrEqualSrcPtrAndNonZeroCopyBufferWhenWriteBufferIsExecutedThenTaskLevelShouldBeIncreased) { cl_int retVal = CL_SUCCESS; void *ptr = nonZeroCopyBuffer->getCpuAddressForMemoryTransfer(); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; retVal = pCmdQ->enqueueWriteBufferRect( nonZeroCopyBuffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 1u); } HWTEST_F(EnqueueReadWriteBufferRectDispatch, givenOffsetResultingInMisalignedPtrWhenEnqueueWriteBufferRectForNon3DCaseIsCalledThenAddressInStateBaseAddressIsAlignedAndMatchesKernelDispatchInfoParams) { initializeFixture(); if (device->areSharedSystemAllocationsAllowed()) { GTEST_SKIP(); } auto cmdQ = std::make_unique>(context.get(), device.get(), &properties); buffer->forceDisallowCPUCopy = true; Vec3 hostOffset(hostOrigin); auto misalignedHostPtr = ptrOffset(reinterpret_cast(memory), hostOffset.z * hostSlicePitch); auto retVal = cmdQ->enqueueWriteBufferRect(buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, memory, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(0u, cmdQ->lastEnqueuedKernels.size()); Kernel *kernel = cmdQ->lastEnqueuedKernels[0]; cmdQ->finish(); parseCommands(*cmdQ); if (hwInfo->capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress) { const auto &surfaceState = getSurfaceState(&cmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), 0); if (kernel->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) { auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData() + kernel->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(reinterpret_cast(alignDown(misalignedHostPtr, 4)), *pKernelArg); EXPECT_EQ(*pKernelArg, surfaceState.getSurfaceBaseAddress()); } else if (kernel->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) { auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData() + kernel->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(reinterpret_cast(alignDown(misalignedHostPtr, 4)), static_cast(*pKernelArg)); EXPECT_EQ(static_cast(*pKernelArg), surfaceState.getSurfaceBaseAddress()); } } if (kernel->getKernelInfo().kernelArgInfo[2].kernelArgPatchInfoVector[0].size == 4 * sizeof(uint32_t)) { // size of uint4 SrcOrigin auto dstOffset = (uint32_t *)(kernel->getCrossThreadData() + kernel->getKernelInfo().kernelArgInfo[2].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(hostOffset.x + ptrDiff(misalignedHostPtr, alignDown(misalignedHostPtr, 4)), *dstOffset); } else { // SrcOrigin arg should be 16 bytes in size, if that changes, above if path should be modified EXPECT_TRUE(false); } } using NegativeFailAllocationTest = Test; HWTEST_F(NegativeFailAllocationTest, givenEnqueueWriteBufferRectWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) { cl_int retVal = CL_SUCCESS; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {50, 50, 1}; constexpr size_t rowPitch = 100; constexpr size_t slicePitch = 100 * 100; retVal = pCmdQ->enqueueWriteBufferRect( buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, rowPitch, slicePitch, rowPitch, slicePitch, ptr, 0, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); } struct EnqueueWriteBufferRectHw : public ::testing::Test { void SetUp() override { if (is32bit) { GTEST_SKIP(); } device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context.reset(new MockContext(device.get())); } std::unique_ptr device; std::unique_ptr context; MockBuffer srcBuffer; size_t bufferOrigin[3] = {0, 0, 0}; size_t hostOrigin[3] = {0, 0, 0}; size_t region[3] = {1, 1, 1}; size_t bufferRowPitch = 10; size_t bufferSlicePitch = 0; size_t hostRowPitch = 10; size_t hostSlicePitch = 10; uint64_t bigSize = 4ull * MemoryConstants::gigaByte; uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1; }; using EnqeueWriteBufferRectStatelessTest = EnqueueWriteBufferRectHw; HWTEST_F(EnqeueWriteBufferRectStatelessTest, WhenWritingBufferRectStatelessThenSuccessIsReturned) { auto pCmdQ = std::make_unique>(context.get(), device.get()); void *missAlignedPtr = reinterpret_cast(0x1041); srcBuffer.size = static_cast(bigSize); auto retVal = pCmdQ->enqueueWriteBufferRect(&srcBuffer, CL_FALSE, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, missAlignedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } using EnqeueWriteBufferRectStatefulTest = EnqueueWriteBufferRectHw; HWTEST_F(EnqeueWriteBufferRectStatefulTest, WhenWritingBufferRectStatefulThenSuccessIsReturned) { auto pCmdQ = std::make_unique>(context.get(), device.get()); void *missAlignedPtr = reinterpret_cast(0x1041); srcBuffer.size = static_cast(smallSize); auto retVal = pCmdQ->enqueueWriteBufferRect(&srcBuffer, CL_FALSE, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, missAlignedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_write_buffer_tests.cpp000066400000000000000000000576401363734646600326300ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/test/unit_test/command_queue/buffer_operations_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "test.h" #include "reg_configs_common.h" using namespace NEO; HWTEST_F(EnqueueWriteBufferTypeTest, GivenNullBufferWhenWrtingBufferThenInvalidMemObjectErrorIsReturned) { auto data = 1; auto retVal = clEnqueueWriteBuffer( pCmdQ, nullptr, false, 0, sizeof(data), &data, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } HWTEST_F(EnqueueWriteBufferTypeTest, GivenNullUserPointerWhenWritingBufferThenInvalidValueErrorIsReturned) { auto data = 1; auto retVal = clEnqueueWriteBuffer( pCmdQ, srcBuffer.get(), false, 0, sizeof(data), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWTEST_F(EnqueueWriteBufferTypeTest, GivenBlockingEnqueueWhenWritingBufferThenTaskLevelIsNotIncremented) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; auto oldCsrTaskLevel = csr.peekTaskLevel(); srcBuffer->forceDisallowCPUCopy = true; EnqueueWriteBufferHelper<>::enqueueWriteBuffer(pCmdQ, srcBuffer.get(), CL_TRUE); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(oldCsrTaskLevel, pCmdQ->taskLevel); } HWTEST_F(EnqueueWriteBufferTypeTest, GivenNonBlockingEnqueueWhenWritingBufferThenTaskLevelIsIncremented) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; EnqueueWriteBufferHelper<>::enqueueWriteBuffer(pCmdQ, srcBuffer.get(), CL_FALSE); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteBufferTypeTest, WhenWritingBufferThenCommandsAreProgrammedCorrectly) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; srcBuffer->forceDisallowCPUCopy = true; enqueueWriteBuffer(); ASSERT_NE(cmdList.end(), itorWalker); auto *cmd = (GPGPU_WALKER *)*itorWalker; // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueWriteBufferTypeTest, WhenWritingBufferThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; srcBuffer->forceDisallowCPUCopy = true; EnqueueWriteBufferHelper<>::enqueueWriteBuffer(pCmdQ, srcBuffer.get(), EnqueueWriteBufferTraits::blocking); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueWriteBufferTypeTest, WhenWritingBufferThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); srcBuffer->forceDisallowCPUCopy = true; EnqueueWriteBufferHelper<>::enqueueWriteBuffer(pCmdQ, srcBuffer.get(), EnqueueWriteBufferTraits::blocking); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWTEST_F(EnqueueWriteBufferTypeTest, WhenWritingBufferThenIndirectDataIsAdded) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); srcBuffer->forceDisallowCPUCopy = true; EnqueueWriteBufferHelper<>::enqueueWriteBuffer(pCmdQ, srcBuffer.get(), EnqueueWriteBufferTraits::blocking); MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcPtr = EnqueueWriteBufferTraits::hostPtr; dc.dstMemObj = srcBuffer.get(); dc.dstOffset = {EnqueueWriteBufferTraits::offset, 0, 0}; dc.size = {srcBuffer->getSize(), 0, 0}; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), kernel)); EXPECT_NE(iohBefore, pIOH->getUsed()); if (kernel->requiresSshForBuffers()) { EXPECT_NE(sshBefore, pSSH->getUsed()); } } HWTEST_F(EnqueueWriteBufferTypeTest, WhenWritingBufferThenL3ProgrammingIsCorrect) { srcBuffer->forceDisallowCPUCopy = true; enqueueWriteBuffer(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteBufferTypeTest, WhenEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { srcBuffer->forceDisallowCPUCopy = true; enqueueWriteBuffer(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteBufferTypeTest, WhenWritingBufferThenMediaInterfaceDescriptorIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; srcBuffer->forceDisallowCPUCopy = true; enqueueWriteBuffer(); // All state should be programmed before walker auto itorCmd = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorCmd; // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorCmd); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteBufferTypeTest, WhenWritingBufferThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; srcBuffer->forceDisallowCPUCopy = true; enqueueWriteBuffer(); // Extract the MIDL command auto itorCmd = find(itorPipelineSelect, itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmdMIDL = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorCmd; // Extract the SBA command itorCmd = find(cmdList.begin(), itorWalker); ASSERT_NE(itorWalker, itorCmd); auto *cmdSBA = (STATE_BASE_ADDRESS *)*itorCmd; // Extrach the DSH auto DSH = cmdSBA->getDynamicStateBaseAddress(); ASSERT_NE(0u, DSH); // IDD should be located within DSH auto iddStart = cmdMIDL->getInterfaceDescriptorDataStartAddress(); auto IDDEnd = iddStart + cmdMIDL->getInterfaceDescriptorTotalLength(); ASSERT_LE(IDDEnd, cmdSBA->getDynamicStateBufferSize() * MemoryConstants::pageSize); auto &IDD = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)IDD.getKernelStartPointerHigh() << 32) + IDD.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); EXPECT_NE(0u, IDD.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, IDD.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, IDD.getConstantIndirectUrbEntryReadLength()); } HWTEST_F(EnqueueWriteBufferTypeTest, WhenWritingBufferThenOnePipelineSelectIsProgrammed) { srcBuffer->forceDisallowCPUCopy = true; enqueueWriteBuffer(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteBufferTypeTest, WhenWritingBufferThenMediaVfeStateIsCorrect) { srcBuffer->forceDisallowCPUCopy = true; enqueueWriteBuffer(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } HWTEST_F(EnqueueWriteBufferTypeTest, givenOOQWithEnabledSupportCpuCopiesAndDstPtrEqualSrcPtrAndZeroCopyBufferTrueWhenWriteBufferIsExecutedThenTaskLevelNotIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1); cl_int retVal = CL_SUCCESS; std::unique_ptr pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); void *ptr = zeroCopyBuffer->getCpuAddressForMemoryTransfer(); EXPECT_EQ(retVal, CL_SUCCESS); retVal = pCmdOOQ->enqueueWriteBuffer(zeroCopyBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdOOQ->taskLevel, 0u); pCmdOOQ->flush(); } HWTEST_F(EnqueueWriteBufferTypeTest, givenOOQWithDisabledSupportCpuCopiesAndDstPtrEqualSrcPtrZeroCopyBufferWhenWriteBufferIsExecutedThenTaskLevelNotIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(0); cl_int retVal = CL_SUCCESS; std::unique_ptr pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); EXPECT_EQ(retVal, CL_SUCCESS); retVal = pCmdOOQ->enqueueWriteBuffer(zeroCopyBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdOOQ->taskLevel, 0u); pCmdOOQ->flush(); } HWTEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndEnabledSupportCpuCopiesAndDstPtrZeroCopyBufferEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1); cl_int retVal = CL_SUCCESS; void *ptr = zeroCopyBuffer->getCpuAddressForMemoryTransfer(); EXPECT_EQ(retVal, CL_SUCCESS); retVal = pCmdQ->enqueueWriteBuffer(zeroCopyBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndDisabledSupportCpuCopiesAndDstPtrZeroCopyBufferEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(0); cl_int retVal = CL_SUCCESS; void *ptr = zeroCopyBuffer->getCpuAddressForMemoryTransfer(); EXPECT_EQ(retVal, CL_SUCCESS); retVal = pCmdQ->enqueueWriteBuffer(zeroCopyBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndDisabledSupportCpuCopiesAndDstPtrZeroCopyBufferEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelShouldBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(0); cl_int retVal = CL_SUCCESS; void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); EXPECT_EQ(retVal, CL_SUCCESS); retVal = pCmdQ->enqueueWriteBuffer(srcBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 1u); } HWTEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndEnabledSupportCpuCopiesAndDstPtrNonZeroCopyBufferEqualSrcPtrWhenWriteBufferIsExecutedThenTaskLevelShouldBeIncreased) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1); cl_int retVal = CL_SUCCESS; void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); EXPECT_EQ(retVal, CL_SUCCESS); retVal = pCmdQ->enqueueWriteBuffer(srcBuffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 1u); } HWTEST_F(EnqueueWriteBufferTypeTest, givenEnqueueWriteBufferCalledWhenLockedPtrInTransferPropertisIsAvailableThenItIsNotUnlocked) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, true, executionEnvironment); MockContext ctx; cl_int retVal; ctx.memoryManager = &memoryManager; auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); std::unique_ptr buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal)); static_cast(buffer->getGraphicsAllocation())->overrideMemoryPool(MemoryPool::SystemCpuInaccessible); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); retVal = mockCmdQ->enqueueWriteBuffer(buffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, memoryManager.unlockResourceCalled); } HWTEST_F(EnqueueWriteBufferTypeTest, givenForcedCpuCopyWhenEnqueueWriteCompressedBufferThenDontCopyOnCpu) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, true, executionEnvironment); MockContext ctx; cl_int retVal; ctx.memoryManager = &memoryManager; auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); std::unique_ptr buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal)); static_cast(buffer->getGraphicsAllocation())->overrideMemoryPool(MemoryPool::SystemCpuInaccessible); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); retVal = mockCmdQ->enqueueWriteBuffer(buffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(buffer->getGraphicsAllocation()->isLocked()); EXPECT_FALSE(mockCmdQ->cpuDataTransferHandlerCalled); buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER); retVal = mockCmdQ->enqueueWriteBuffer(buffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(buffer->getGraphicsAllocation()->isLocked()); EXPECT_TRUE(mockCmdQ->cpuDataTransferHandlerCalled); } HWTEST_F(EnqueueWriteBufferTypeTest, givenEnqueueWriteBufferCalledWhenLockedPtrInTransferPropertisIsNotAvailableThenItIsNotUnlocked) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, true, executionEnvironment); MockContext ctx; cl_int retVal; ctx.memoryManager = &memoryManager; auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); std::unique_ptr buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal)); static_cast(buffer->getGraphicsAllocation())->overrideMemoryPool(MemoryPool::System4KBPages); void *ptr = srcBuffer->getCpuAddressForMemoryTransfer(); retVal = mockCmdQ->enqueueWriteBuffer(buffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, memoryManager.unlockResourceCalled); } using NegativeFailAllocationTest = Test; HWTEST_F(NegativeFailAllocationTest, givenEnqueueWriteBufferWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) { cl_int retVal = CL_SUCCESS; retVal = pCmdQ->enqueueWriteBuffer(buffer.get(), CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); } struct EnqueueWriteBufferHw : public ::testing::Test { void SetUp() override { if (is32bit) { GTEST_SKIP(); } device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context.reset(new MockContext(device.get())); } std::unique_ptr device; std::unique_ptr context; MockBuffer srcBuffer; uint64_t bigSize = 4ull * MemoryConstants::gigaByte; uint64_t smallSize = 4ull * MemoryConstants::gigaByte - 1; }; using EnqeueReadWriteStatelessTest = EnqueueWriteBufferHw; HWTEST_F(EnqeueReadWriteStatelessTest, WhenWritingBufferStatelessThenSuccessIsReturned) { auto pCmdQ = std::make_unique>(context.get(), device.get()); void *missAlignedPtr = reinterpret_cast(0x1041); srcBuffer.size = static_cast(bigSize); auto retVal = pCmdQ->enqueueWriteBuffer(&srcBuffer, CL_FALSE, 0, MemoryConstants::cacheLineSize, missAlignedPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } using EnqeueWriteBufferStatefulTest = EnqueueWriteBufferHw; HWTEST_F(EnqeueWriteBufferStatefulTest, WhenWritingBufferStatefulThenSuccessIsReturned) { auto pCmdQ = std::make_unique>(context.get(), device.get()); void *missAlignedPtr = reinterpret_cast(0x1041); srcBuffer.size = static_cast(smallSize); auto retVal = pCmdQ->enqueueWriteBuffer(&srcBuffer, CL_FALSE, 0, MemoryConstants::cacheLineSize, missAlignedPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_write_image_fixture.h000066400000000000000000000031751363734646600324240ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/ptr_math.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" namespace NEO { struct EnqueueWriteImageTest : public CommandEnqueueFixture, public ::testing::Test { void SetUp(void) override { CommandEnqueueFixture::SetUp(); context = new MockContext(pClDevice); dstImage = Image2dHelper<>::create(context); const auto &imageDesc = dstImage->getImageDesc(); srcPtr = new float[imageDesc.image_width * imageDesc.image_height]; } void TearDown(void) override { delete dstImage; delete[] srcPtr; delete context; CommandEnqueueFixture::TearDown(); } protected: template void enqueueWriteImage(cl_bool blocking = EnqueueWriteImageTraits::blocking) { auto retVal = EnqueueWriteImageHelper<>::enqueueWriteImage( pCmdQ, dstImage, blocking); EXPECT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); } float *srcPtr = nullptr; Image *dstImage = nullptr; MockContext *context = nullptr; }; struct EnqueueWriteImageMipMapTest : public EnqueueWriteImageTest, public ::testing::WithParamInterface { }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp000066400000000000000000000571731363734646600324420ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/test/unit_test/command_queue/enqueue_write_image_fixture.h" #include "opencl/test/unit_test/gen_common/gen_commands_common_validation.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_builtin_dispatch_info_builder.h" #include "opencl/test/unit_test/mocks/mock_builtins.h" #include "test.h" #include "reg_configs_common.h" using namespace NEO; HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteImageTest, WhenWritingImageThenCommandsAreProgrammedCorrectly) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; enqueueWriteImage(); auto *cmd = reinterpret_cast(cmdWalker); ASSERT_NE(nullptr, cmd); // Verify GPGPU_WALKER parameters EXPECT_NE(0u, cmd->getThreadGroupIdXDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdYDimension()); EXPECT_NE(0u, cmd->getThreadGroupIdZDimension()); EXPECT_NE(0u, cmd->getRightExecutionMask()); EXPECT_NE(0u, cmd->getBottomExecutionMask()); EXPECT_EQ(GPGPU_WALKER::SIMD_SIZE_SIMD32, cmd->getSimdSize()); EXPECT_NE(0u, cmd->getIndirectDataLength()); EXPECT_FALSE(cmd->getIndirectParameterEnable()); // Compute the SIMD lane mask size_t simd = cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : cmd->getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = cmd->getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } } HWTEST_F(EnqueueWriteImageTest, GivenBlockingEnqueueWhenWritingImageThenTaskLevelIsNotIncremented) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; auto oldCsrTaskLevel = csr.peekTaskLevel(); EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ, dstImage, CL_TRUE); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(oldCsrTaskLevel, pCmdQ->taskLevel); } HWTEST_F(EnqueueWriteImageTest, GivenNonBlockingEnqueueWhenWritingImageThenTaskLevelIsIncremented) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); csr.taskCount = pCmdQ->taskCount + 100; csr.taskLevel = pCmdQ->taskLevel + 50; EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ, dstImage, CL_FALSE); EXPECT_EQ(csr.peekTaskCount(), pCmdQ->taskCount); EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1); } HWTEST_F(EnqueueWriteImageTest, WhenWritingImageThenTaskLevelIsIncremented) { auto taskLevelBefore = pCmdQ->taskLevel; EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ, dstImage, EnqueueWriteImageTraits::blocking); EXPECT_GT(pCmdQ->taskLevel, taskLevelBefore); } HWTEST_F(EnqueueWriteImageTest, WhenWritingImageThenCommandsAreAdded) { auto usedCmdBufferBefore = pCS->getUsed(); EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ, dstImage, EnqueueWriteImageTraits::blocking); EXPECT_NE(usedCmdBufferBefore, pCS->getUsed()); } HWTEST_F(EnqueueWriteImageTest, WhenWritingImageThenIndirectDataIsAdded) { auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ, dstImage, EnqueueWriteImageTraits::blocking); EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), nullptr)); EXPECT_NE(iohBefore, pIOH->getUsed()); EXPECT_NE(sshBefore, pSSH->getUsed()); } HWTEST_F(EnqueueWriteImageTest, WhenWritingImageThenL3ProgrammingIsCorrect) { enqueueWriteImage(); validateL3Programming(cmdList, itorWalker); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteImageTest, WhenEnqueueIsDoneThenStateBaseAddressIsProperlyProgrammed) { enqueueWriteImage(); auto &ultCsr = this->pDevice->getUltCommandStreamReceiver(); validateStateBaseAddress(ultCsr.getMemoryManager()->getInternalHeapBaseAddress(ultCsr.rootDeviceIndex), pDSH, pIOH, pSSH, itorPipelineSelect, itorWalker, cmdList, 0llu); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteImageTest, WhenWritingImageThenMediaInterfaceDescriptorIsCorrect) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueWriteImage(); // All state should be programmed before walker auto cmd = reinterpret_cast(cmdMediaInterfaceDescriptorLoad); ASSERT_NE(nullptr, cmd); // Verify we have a valid length -- multiple of INTERFACE_DESCRIPTOR_DATAs EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % sizeof(INTERFACE_DESCRIPTOR_DATA)); // Validate the start address size_t alignmentStartAddress = 64 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorDataStartAddress() % alignmentStartAddress); // Validate the length EXPECT_NE(0u, cmd->getInterfaceDescriptorTotalLength()); size_t alignmentTotalLength = 32 * sizeof(uint8_t); EXPECT_EQ(0u, cmd->getInterfaceDescriptorTotalLength() % alignmentTotalLength); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorMediaInterfaceDescriptorLoad); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteImageTest, WhenWritingImageThenInterfaceDescriptorDataIsCorrect) { typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; enqueueWriteImage(); // Extract the interfaceDescriptorData auto cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress; auto &interfaceDescriptorData = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; // Validate the kernel start pointer. Technically, a kernel can start at address 0 but let's force a value. auto kernelStartPointer = ((uint64_t)interfaceDescriptorData.getKernelStartPointerHigh() << 32) + interfaceDescriptorData.getKernelStartPointer(); EXPECT_LE(kernelStartPointer, cmdSBA->getInstructionBufferSize() * MemoryConstants::pageSize); // EnqueueWriteImage uses a byte copy. Need to convert to bytes. auto localWorkSize = 2 * 2 * sizeof(float); auto simd = 32; auto threadsPerThreadGroup = Math::divideAndRoundUp(localWorkSize, simd); EXPECT_EQ(threadsPerThreadGroup, interfaceDescriptorData.getNumberOfThreadsInGpgpuThreadGroup()); EXPECT_NE(0u, interfaceDescriptorData.getCrossThreadConstantDataReadLength()); EXPECT_NE(0u, interfaceDescriptorData.getConstantIndirectUrbEntryReadLength()); // We shouldn't have these pointers the same. EXPECT_NE(kernelStartPointer, interfaceDescriptorData.getBindingTablePointer()); } HWTEST_F(EnqueueWriteImageTest, WhenWritingImageThenSurfaceStateIsProgrammedCorrectly) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; enqueueWriteImage(); // BufferToImage kernel uses BTI=1 for destSurface uint32_t bindingTableIndex = 1; const auto &surfaceState = getSurfaceState(&pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), bindingTableIndex); // EnqueueWriteImage uses multi-byte copies depending on per-pixel-size-in-bytes const auto &imageDesc = dstImage->getImageDesc(); EXPECT_EQ(imageDesc.image_width, surfaceState.getWidth()); EXPECT_EQ(imageDesc.image_height, surfaceState.getHeight()); EXPECT_NE(0u, surfaceState.getSurfacePitch()); EXPECT_NE(0u, surfaceState.getSurfaceType()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_FORMAT_R32_UINT, surfaceState.getSurfaceFormat()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_4, surfaceState.getSurfaceHorizontalAlignment()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4, surfaceState.getSurfaceVerticalAlignment()); EXPECT_EQ(dstImage->getGraphicsAllocation()->getGpuAddress(), surfaceState.getSurfaceBaseAddress()); } HWTEST_F(EnqueueWriteImageTest, WhenWritingImageThenOnePipelineSelectIsProgrammed) { enqueueWriteImage(); int numCommands = getNumberOfPipelineSelectsThatEnablePipelineSelect(); EXPECT_EQ(1, numCommands); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteImageTest, WhenWritingImageThenMediaVfeStateIsCorrect) { enqueueWriteImage(); validateMediaVFEState(&pDevice->getHardwareInfo(), cmdMediaVfeState, cmdList, itorMediaVfeState); } HWTEST_F(EnqueueWriteImageTest, GivenImage1DarrayWhenReadWriteImageIsCalledThenHostPtrSizeIsCalculatedProperly) { auto dstImage2 = Image1dArrayHelper<>::create(context); auto imageDesc = dstImage2->getImageDesc(); auto imageSize = imageDesc.image_width * imageDesc.image_array_size * 4; size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_array_size, 1}; EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ, dstImage2, CL_FALSE, origin, region); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto temporaryAllocation1 = csr.getTemporaryAllocations().peekHead(); ASSERT_NE(nullptr, temporaryAllocation1); EXPECT_EQ(temporaryAllocation1->getUnderlyingBufferSize(), imageSize); EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, dstImage2, CL_FALSE, origin, region); auto temporaryAllocation2 = temporaryAllocation1->next; ASSERT_NE(nullptr, temporaryAllocation2); EXPECT_EQ(temporaryAllocation2->getUnderlyingBufferSize(), imageSize); delete dstImage2; } HWTEST_F(EnqueueWriteImageTest, GivenImage1DarrayWhenWriteImageIsCalledThenRowPitchIsSetToSlicePitch) { auto builtIns = new MockBuiltins(); pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()]->builtins.reset(builtIns); EBuiltInOps::Type copyBuiltIn = EBuiltInOps::CopyBufferToImage3d; auto &origBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( copyBuiltIn, pCmdQ->getDevice()); // substitute original builder with mock builder auto oldBuilder = builtIns->setBuiltinDispatchInfoBuilder( copyBuiltIn, pCmdQ->getContext(), pCmdQ->getDevice(), std::unique_ptr(new MockBuiltinDispatchInfoBuilder(*builtIns, &origBuilder))); std::unique_ptr image; auto destImage = Image1dArrayHelper<>::create(context); auto imageDesc = destImage->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_array_size, 1}; size_t rowPitch = 64; size_t slicePitch = 128; EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ, destImage, CL_FALSE, origin, region, rowPitch, slicePitch); auto &mockBuilder = static_cast(BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(copyBuiltIn, pCmdQ->getDevice())); auto params = mockBuilder.getBuiltinOpParams(); EXPECT_EQ(params->dstRowPitch, slicePitch); // restore original builder and retrieve mock builder auto newBuilder = builtIns->setBuiltinDispatchInfoBuilder( copyBuiltIn, pCmdQ->getContext(), pCmdQ->getDevice(), std::move(oldBuilder)); EXPECT_NE(nullptr, newBuilder); delete destImage; } HWTEST_F(EnqueueWriteImageTest, GivenImage2DarrayWhenReadWriteImageIsCalledThenHostPtrSizeIsCalculatedProperly) { auto dstImage2 = Image2dArrayHelper<>::create(context); auto imageDesc = dstImage2->getImageDesc(); auto imageSize = imageDesc.image_width * imageDesc.image_height * imageDesc.image_array_size * 4; size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, imageDesc.image_array_size}; EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ, dstImage2, CL_FALSE, origin, region); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto temporaryAllocation1 = csr.getTemporaryAllocations().peekHead(); ASSERT_NE(nullptr, temporaryAllocation1); EXPECT_EQ(temporaryAllocation1->getUnderlyingBufferSize(), imageSize); EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, dstImage, CL_FALSE, origin, region); auto temporaryAllocation2 = temporaryAllocation1->next; ASSERT_NE(nullptr, temporaryAllocation2); EXPECT_EQ(temporaryAllocation1->getUnderlyingBufferSize(), imageSize); delete dstImage2; } HWTEST_F(EnqueueWriteImageTest, GivenImage1DAndImageShareTheSameStorageWithHostPtrWhenReadWriteImageIsCalledThenImageIsNotWritten) { cl_int retVal = CL_SUCCESS; std::unique_ptr dstImage2(Image1dHelper<>::create(context)); auto imageDesc = dstImage2->getImageDesc(); std::unique_ptr pCmdOOQ(createCommandQueue(pClDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, 1, 1}; void *ptr = dstImage2->getCpuAddressForMemoryTransfer(); size_t rowPitch = dstImage2->getHostPtrRowPitch(); size_t slicePitch = dstImage2->getHostPtrSlicePitch(); retVal = pCmdOOQ->enqueueWriteImage(dstImage2.get(), CL_FALSE, origin, region, rowPitch, slicePitch, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdOOQ->taskLevel, 0u); } HWTEST_F(EnqueueWriteImageTest, GivenImage1DArrayAndImageShareTheSameStorageWithHostPtrWhenReadWriteImageIsCalledThenImageIsNotWritten) { cl_int retVal = CL_SUCCESS; std::unique_ptr dstImage2(Image1dArrayHelper<>::create(context)); auto imageDesc = dstImage2->getImageDesc(); size_t origin[] = {imageDesc.image_width / 2, imageDesc.image_array_size / 2, 0}; size_t region[] = {imageDesc.image_width - (imageDesc.image_width / 2), imageDesc.image_array_size - (imageDesc.image_array_size / 2), 1}; void *ptr = dstImage2->getCpuAddressForMemoryTransfer(); auto bytesPerPixel = 4; size_t rowPitch = dstImage2->getHostPtrRowPitch(); size_t slicePitch = dstImage2->getHostPtrSlicePitch(); auto pOffset = origin[2] * rowPitch + origin[1] * slicePitch + origin[0] * bytesPerPixel; void *ptrStorage = ptrOffset(ptr, pOffset); retVal = pCmdQ->enqueueWriteImage(dstImage2.get(), CL_FALSE, origin, region, rowPitch, slicePitch, ptrStorage, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueWriteImageTest, GivenSharedContextZeroCopy2DImageWhenEnqueueWriteImageWithMappedPointerIsCalledThenImageIsNotWritten) { cl_int retVal = CL_SUCCESS; context->isSharedContext = true; std::unique_ptr dstImage(ImageHelper>::create(context)); EXPECT_TRUE(dstImage->isMemObjZeroCopy()); auto imageDesc = dstImage->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, 1}; void *ptr = dstImage->getCpuAddressForMemoryTransfer(); size_t rowPitch = dstImage->getHostPtrRowPitch(); size_t slicePitch = dstImage->getHostPtrSlicePitch(); retVal = pCmdQ->enqueueReadImage(dstImage.get(), CL_FALSE, origin, region, rowPitch, slicePitch, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pCmdQ->taskLevel, 0u); } HWTEST_F(EnqueueWriteImageTest, GivenImage1DThatIsZeroCopyWhenWriteImageWithTheSamePointerAndOutputEventIsPassedThenEventHasCorrectCommandTypeSet) { cl_int retVal = CL_SUCCESS; std::unique_ptr srcImage(Image1dHelper<>::create(context)); auto imageDesc = srcImage->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, 1}; void *ptr = srcImage->getCpuAddressForMemoryTransfer(); size_t rowPitch = srcImage->getHostPtrRowPitch(); size_t slicePitch = srcImage->getHostPtrSlicePitch(); cl_uint numEventsInWaitList = 0; cl_event event = nullptr; retVal = pCmdQ->enqueueWriteImage(srcImage.get(), CL_FALSE, origin, region, rowPitch, slicePitch, ptr, nullptr, numEventsInWaitList, nullptr, &event); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, event); auto pEvent = static_cast(event); EXPECT_EQ(static_cast(CL_COMMAND_WRITE_IMAGE), pEvent->getCommandType()); pEvent->release(); } typedef EnqueueWriteImageMipMapTest MipMapWriteImageTest; HWTEST_P(MipMapWriteImageTest, GivenImageWithMipLevelNonZeroWhenReadImageIsCalledThenProperMipLevelIsSet) { auto image_type = (cl_mem_object_type)GetParam(); auto builtIns = new MockBuiltins(); pCmdQ->getDevice().getExecutionEnvironment()->rootDeviceEnvironments[pCmdQ->getDevice().getRootDeviceIndex()]->builtins.reset(builtIns); auto &origBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder( EBuiltInOps::CopyBufferToImage3d, pCmdQ->getDevice()); // substitute original builder with mock builder auto oldBuilder = builtIns->setBuiltinDispatchInfoBuilder( EBuiltInOps::CopyBufferToImage3d, pCmdQ->getContext(), pCmdQ->getDevice(), std::unique_ptr(new MockBuiltinDispatchInfoBuilder(*builtIns, &origBuilder))); cl_int retVal = CL_SUCCESS; cl_image_desc imageDesc = {}; uint32_t expectedMipLevel = 3; imageDesc.image_type = image_type; imageDesc.num_mip_levels = 10; imageDesc.image_width = 4; imageDesc.image_height = 1; imageDesc.image_depth = 1; size_t origin[] = {0, 0, 0, 0}; size_t region[] = {imageDesc.image_width, 1, 1}; std::unique_ptr image; switch (image_type) { case CL_MEM_OBJECT_IMAGE1D: origin[1] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: imageDesc.image_array_size = 2; origin[2] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE2D: origin[2] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: imageDesc.image_array_size = 2; origin[3] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; case CL_MEM_OBJECT_IMAGE3D: origin[3] = expectedMipLevel; image = std::unique_ptr(ImageHelper::create(context, &imageDesc)); break; } EXPECT_NE(nullptr, image.get()); auto hostPtrSize = Image::calculateHostPtrSize(region, image->getHostPtrRowPitch(), image->getHostPtrSlicePitch(), image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes, image_type); std::unique_ptr ptr = std::unique_ptr(new uint32_t[hostPtrSize]); retVal = pCmdQ->enqueueWriteImage(image.get(), CL_FALSE, origin, region, 0, 0, ptr.get(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto &mockBuilder = static_cast(BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToImage3d, pCmdQ->getDevice())); auto params = mockBuilder.getBuiltinOpParams(); EXPECT_EQ(expectedMipLevel, params->dstMipLevel); // restore original builder and retrieve mock builder auto newBuilder = builtIns->setBuiltinDispatchInfoBuilder( EBuiltInOps::CopyBufferToImage3d, pCmdQ->getContext(), pCmdQ->getDevice(), std::move(oldBuilder)); EXPECT_NE(nullptr, newBuilder); } INSTANTIATE_TEST_CASE_P(MipMapWriteImageTest_GivenImageWithMipLevelNonZeroWhenReadImageIsCalledThenProperMipLevelIsSet, MipMapWriteImageTest, ::testing::Values(CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D)); using NegativeFailAllocationTest = Test; HWTEST_F(NegativeFailAllocationTest, givenEnqueueReadImageWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) { cl_int retVal = CL_SUCCESS; auto imageDesc = image->getImageDesc(); size_t origin[] = {0, 0, 0}; size_t region[] = {imageDesc.image_width, imageDesc.image_height, 1}; size_t rowPitch = image->getHostPtrRowPitch(); size_t slicePitch = image->getHostPtrSlicePitch(); retVal = pCmdQ->enqueueReadImage(image.get(), CL_FALSE, origin, region, rowPitch, slicePitch, ptr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/finish_tests.cpp000066400000000000000000000064241363734646600276700ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_stream/command_stream_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/gen_common/gen_cmd_parse.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "test.h" #include "gtest/gtest.h" using namespace NEO; struct FinishFixture : public DeviceFixture, public CommandQueueHwFixture, public CommandStreamFixture, public HardwareParse { void SetUp() override { DeviceFixture::SetUp(); CommandQueueHwFixture::SetUp(pClDevice, 0); ASSERT_NE(nullptr, pCmdQ); CommandStreamFixture::SetUp(pCmdQ); ASSERT_NE(nullptr, pCS); HardwareParse::SetUp(); } void TearDown() override { HardwareParse::TearDown(); CommandStreamFixture::TearDown(); CommandQueueHwFixture::TearDown(); DeviceFixture::TearDown(); } }; typedef Test FinishTest; HWTEST_F(FinishTest, GivenCsGreaterThanCqWhenFinishIsCalledThenPipeControlIsNotAdded) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // HW = 1, CQ = 1, CS = 2 (last PC was 1) // This means there is no work in CQ that needs a PC uint32_t originalHwTag = 1; uint32_t originalCSRLevel = 2; uint32_t originalCQLevel = 1; *commandStreamReceiver.getTagAddress() = originalHwTag; commandStreamReceiver.taskLevel = originalCSRLevel; // Must be greater than or equal to HW pCmdQ->taskLevel = originalCQLevel; auto retVal = pCmdQ->finish(); ASSERT_EQ(CL_SUCCESS, retVal); // Don't need to artificially execute PIPE_CONTROL. // Nothing should have been sent //*pCS->getTagAddress() = originalCSRLevel; EXPECT_EQ(commandStreamReceiver.peekTaskLevel(), originalCSRLevel); EXPECT_EQ(pCmdQ->taskLevel, originalCQLevel); EXPECT_GE(pCmdQ->getHwTag(), pCmdQ->taskLevel); auto sizeUsed = pCS->getUsed(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, pCmdBuffer, sizeUsed)); auto itorCmd = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itorCmd); } HWTEST_F(FinishTest, WhenFinishIsCalledThenPipeControlIsNotAddedToCqCommandStream) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; auto retVal = pCmdQ->finish(); ASSERT_EQ(CL_SUCCESS, retVal); // Check for PIPE_CONTROL parseCommands(pCmdQ->getCS(1024)); auto itorCmd = reverse_find(cmdList.rbegin(), cmdList.rend()); EXPECT_EQ(cmdList.rend(), itorCmd); } HWTEST_F(FinishTest, givenFreshQueueWhenFinishIsCalledThenCommandStreamIsNotAllocated) { MockContext contextWithMockCmdQ(pClDevice, true); MockCommandQueueHw cmdQ(&contextWithMockCmdQ, pClDevice, 0); auto retVal = cmdQ.finish(); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, cmdQ.peekCommandStream()); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/flattened_id_tests.cpp000066400000000000000000000267171363734646600310410ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/string.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "test.h" #include #include using namespace NEO; union GRF { float fRegs[8]; uint32_t dwRegs[8]; uint16_t wRegs[16]; }; // clang-format off uint16_t gFlattenedIDsSimD8[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }; uint16_t gFlattenedIDsSimD1632[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, }; // clang-format on size_t getSizeFlattenedIDs(uint32_t simd, uint32_t lwsX, uint32_t lwsY, uint32_t lwsZ) { auto lws = lwsX * lwsY * lwsZ; lws = std::min(lws, 256u); auto iDsPerGrf = std::min(simd, 16u); auto numGRFs = Math::divideAndRoundUp(lws, iDsPerGrf); return numGRFs * sizeof(GRF); } void generateFlattenedIDs(void *buffer, uint32_t simd, uint32_t lwsX, uint32_t lwsY, uint32_t lwsZ) { auto pSrc = simd == 8 ? gFlattenedIDsSimD8 : gFlattenedIDsSimD1632; auto copySize = getSizeFlattenedIDs(simd, lwsX, lwsY, lwsZ); memcpy_s(buffer, copySize, pSrc, copySize); } struct FlattenedIDFixture : ::testing::TestWithParam> { void SetUp() override { simd = std::get<0>(GetParam()); localWorkSizeX = std::get<1>(GetParam()); localWorkSizeY = std::get<2>(GetParam()); localWorkSizeZ = std::get<3>(GetParam()); localWorkSize = localWorkSizeX * localWorkSizeY * localWorkSizeZ; if (localWorkSize > 256) { localWorkSizeY = std::min(256 / localWorkSizeX, localWorkSizeY); localWorkSizeZ = std::min(256 / (localWorkSizeX * localWorkSizeY), localWorkSizeZ); localWorkSize = localWorkSizeX * localWorkSizeY * localWorkSizeZ; } memset(buffer, 0xff, sizeof(buffer)); } void validateIDWithinLimits(uint32_t simd, uint32_t lwsX, uint32_t lwsY, uint32_t lwsZ, bool useFullRow) { auto numWorkItems = lwsX * lwsY * lwsZ; auto idsPerThread = simd; // As per BackEnd HLD, SIMD32 has 32 flattenedIDs per channel. SIMD8/16 has up to 16 flattenedIDs. auto skipPerThread = (simd == 32 || useFullRow) ? 32 : 16; auto pBuffer = buffer; size_t itemIndex = 0; auto numWorkItemsRemaining = numWorkItems; while (numWorkItemsRemaining > 0) { EXPECT_LT(pBuffer[itemIndex], numWorkItems); ++itemIndex; if (idsPerThread == itemIndex) { pBuffer += skipPerThread; itemIndex = 0; } --numWorkItemsRemaining; } } void validateAllWorkItemsCovered(uint32_t simd, uint32_t lwsX, uint32_t lwsY, uint32_t lwsZ, bool useFullRow) { auto numWorkItems = lwsX * lwsY * lwsZ; auto idsPerThread = simd; // As per BackEnd HLD, SIMD32 has 32 localIDs per channel. SIMD8/16 has up to 16 localIDs. auto skipPerThread = (simd == 32 || useFullRow) ? 32 : 16; // Initialize local ID hit table uint32_t localIDHitTable[8]; memset(localIDHitTable, 0, sizeof(localIDHitTable)); auto pBuffer = buffer; size_t itemIndex = 0; auto numWorkItemsRemaining = numWorkItems; while (numWorkItemsRemaining > 0) { // Flatten out the IDs auto workItem = pBuffer[itemIndex]; ASSERT_LT(workItem, 256u); // Look up in the hit table auto &hitItem = localIDHitTable[workItem / 32]; auto hitBit = 1 << (workItem % 32); // No double-hits EXPECT_EQ(0u, hitItem & hitBit); // Set that work item as hit hitItem |= hitBit; ++itemIndex; if (idsPerThread == itemIndex) { pBuffer += skipPerThread; itemIndex = 0; } --numWorkItemsRemaining; } // All entries in hit table should be in form of n^2 - 1 for (unsigned int i : localIDHitTable) { EXPECT_EQ(0u, i & (i + 1)); } } // Test parameters uint32_t localWorkSize; uint32_t localWorkSizeX; uint32_t localWorkSizeY; uint32_t localWorkSizeZ; uint32_t simd; // Provide support for a max LWS of 256 // 32 threads @ SIMD8 // 16 lanes per thread (SIMD8 - only 8 used) uint16_t buffer[32 * 16]; }; HWTEST_P(FlattenedIDFixture, checkIDWithinLimits) { generateFlattenedIDs(buffer, simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ); validateIDWithinLimits(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper::useFullRowForLocalIdsGeneration); } HWTEST_P(FlattenedIDFixture, checkAllWorkItemsCovered) { generateFlattenedIDs(buffer, simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ); validateAllWorkItemsCovered(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper::useFullRowForLocalIdsGeneration); } TEST_P(FlattenedIDFixture, sizeCalculationLocalIDs) { auto sizePerThreadData = getSizeFlattenedIDs( simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ); // Should be multiple of GRFs auto sizeGRF = sizeof(GRF); EXPECT_EQ(0u, sizePerThreadData % sizeGRF); auto workItems = localWorkSizeX * localWorkSizeY * localWorkSizeZ; auto idsPerGRF = (simd == 8) ? 8 : 16; auto numGRFsExpected = Math::divideAndRoundUp(workItems, idsPerGRF); EXPECT_EQ(numGRFsExpected, sizePerThreadData / sizeGRF); } #define SIMDParams ::testing::Values(8, 16, 32) #if HEAVY_DUTY_TESTING #define LWSXParams ::testing::Values(1, 7, 8, 9, 15, 16, 17, 31, 32, 33, 64, 128, 256) #define LWSYParams ::testing::Values(1, 2, 3, 4, 5, 6, 7, 8) #define LWSZParams ::testing::Values(1, 2, 3, 4) #else #define LWSXParams ::testing::Values(1, 7, 8, 9, 15, 16, 17, 31, 32, 33, 64, 128, 256) #define LWSYParams ::testing::Values(1, 2, 4, 8) #define LWSZParams ::testing::Values(1) #endif INSTANTIATE_TEST_CASE_P(AllCombinations, FlattenedIDFixture, ::testing::Combine(SIMDParams, LWSXParams, LWSYParams, LWSZParams)); // To debug a specific configuration replace the list of Values with specific values. // NOTE: You'll need a unique test prefix INSTANTIATE_TEST_CASE_P(SingleTest, FlattenedIDFixture, ::testing::Combine( ::testing::Values(32), //SIMD ::testing::Values(5), //LWSX ::testing::Values(6), //LWSY ::testing::Values(7))); //LWSZ compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/flush_tests.cpp000066400000000000000000000020171363734646600275230ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_stream/command_stream_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "gtest/gtest.h" using namespace NEO; struct FlushTest : public DeviceFixture, public CommandQueueFixture, public CommandStreamFixture, public ::testing::Test { using CommandQueueFixture::SetUp; void SetUp() override { DeviceFixture::SetUp(); CommandQueueFixture::SetUp(nullptr, pClDevice, 0); CommandStreamFixture::SetUp(pCmdQ); } void TearDown() override { CommandStreamFixture::TearDown(); CommandQueueFixture::TearDown(); DeviceFixture::TearDown(); } }; TEST_F(FlushTest, WhenFlushingThenSuccessIsReturned) { auto retVal = pCmdQ->flush(); EXPECT_EQ(retVal, CL_SUCCESS); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/get_command_queue_info_tests.cpp000066400000000000000000000067101363734646600331020ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "gtest/gtest.h" using namespace NEO; struct GetCommandQueueInfoTest : public DeviceFixture, public ContextFixture, public CommandQueueFixture, ::testing::TestWithParam { using CommandQueueFixture::SetUp; using ContextFixture::SetUp; GetCommandQueueInfoTest() { } void SetUp() override { properties = GetParam(); DeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); CommandQueueFixture::SetUp(pContext, pClDevice, properties); } void TearDown() override { CommandQueueFixture::TearDown(); ContextFixture::TearDown(); DeviceFixture::TearDown(); } const HardwareInfo *pHwInfo = nullptr; cl_command_queue_properties properties; }; TEST_P(GetCommandQueueInfoTest, CONTEXT) { cl_context contextReturned = nullptr; auto retVal = pCmdQ->getCommandQueueInfo( CL_QUEUE_CONTEXT, sizeof(contextReturned), &contextReturned, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ((cl_context)pContext, contextReturned); } TEST_P(GetCommandQueueInfoTest, DEVICE) { cl_device_id device_expected = pClDevice; cl_device_id device_id_returned = nullptr; auto retVal = pCmdQ->getCommandQueueInfo( CL_QUEUE_DEVICE, sizeof(device_id_returned), &device_id_returned, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(device_expected, device_id_returned); } TEST_P(GetCommandQueueInfoTest, QUEUE_PROPERTIES) { cl_command_queue_properties command_queue_properties_returned = 0; auto retVal = pCmdQ->getCommandQueueInfo( CL_QUEUE_PROPERTIES, sizeof(command_queue_properties_returned), &command_queue_properties_returned, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(properties, command_queue_properties_returned); } TEST_P(GetCommandQueueInfoTest, QUEUE_SIZE) { cl_uint queueSize = 0; auto retVal = pCmdQ->getCommandQueueInfo( CL_QUEUE_SIZE, sizeof(queueSize), &queueSize, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_P(GetCommandQueueInfoTest, QUEUE_DEVICE_DEFAULT) { cl_command_queue commandQueueReturned = nullptr; auto retVal = pCmdQ->getCommandQueueInfo( CL_QUEUE_DEVICE_DEFAULT, sizeof(commandQueueReturned), &commandQueueReturned, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // host queue can't be default device queue EXPECT_NE(pCmdQ, commandQueueReturned); } TEST_P(GetCommandQueueInfoTest, GivenInvalidParameterWhenGettingCommandQueueInfoThenInvalidValueIsReturned) { cl_uint parameterReturned = 0; cl_command_queue_info invalidParameter = 0xdeadbeef; auto retVal = pCmdQ->getCommandQueueInfo( invalidParameter, sizeof(parameterReturned), ¶meterReturned, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } INSTANTIATE_TEST_CASE_P( GetCommandQueueInfoTest, GetCommandQueueInfoTest, ::testing::ValuesIn(DefaultCommandQueueProperties)); compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp000066400000000000000000000561111363734646600334500ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/command_queue/enqueue_fill_buffer.h" #include "opencl/source/command_queue/enqueue_kernel.h" #include "opencl/source/command_queue/enqueue_read_buffer.h" #include "opencl/source/command_queue/enqueue_write_buffer.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/event/event.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_kernel_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "test.h" using namespace NEO; struct GetSizeRequiredBufferTest : public CommandEnqueueFixture, public SimpleArgKernelFixture, public HelloWorldKernelFixture, public ::testing::Test { using HelloWorldKernelFixture::SetUp; using SimpleArgKernelFixture::SetUp; GetSizeRequiredBufferTest() { } void SetUp() override { CommandEnqueueFixture::SetUp(); SimpleArgKernelFixture::SetUp(pClDevice); HelloWorldKernelFixture::SetUp(pClDevice, "CopyBuffer_simd", "CopyBuffer"); BufferDefaults::context = new MockContext; srcBuffer = BufferHelper<>::create(); dstBuffer = BufferHelper<>::create(); patternAllocation = context->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize}); pDevice->setPreemptionMode(PreemptionMode::Disabled); } void TearDown() override { context->getMemoryManager()->freeGraphicsMemory(patternAllocation); delete dstBuffer; delete srcBuffer; delete BufferDefaults::context; HelloWorldKernelFixture::TearDown(); SimpleArgKernelFixture::TearDown(); CommandEnqueueFixture::TearDown(); } Buffer *srcBuffer = nullptr; Buffer *dstBuffer = nullptr; GraphicsAllocation *patternAllocation = nullptr; }; HWTEST_F(GetSizeRequiredBufferTest, WhenFillingBufferThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u); auto usedBeforeDSH = dsh.getUsed(); auto usedBeforeIOH = ioh.getUsed(); auto usedBeforeSSH = ssh.getUsed(); auto retVal = EnqueueFillBufferHelper<>::enqueue(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::FillBuffer, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; MemObj patternMemObj(this->context, 0, {}, 0, 0, alignUp(EnqueueFillBufferTraits::patternSize, 4), patternAllocation->getUnderlyingBuffer(), patternAllocation->getUnderlyingBuffer(), patternAllocation, false, false, true); dc.srcMemObj = &patternMemObj; dc.dstMemObj = dstBuffer; dc.dstOffset = {EnqueueFillBufferTraits::offset, 0, 0}; dc.size = {EnqueueFillBufferTraits::size, 0, 0}; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto usedAfterCS = commandStream.getUsed(); auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_FILL_BUFFER, CsrDependencies(), false, false, false, *pCmdQ, multiDispatchInfo); auto expectedSizeDSH = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); auto expectedSizeIOH = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); EXPECT_EQ(0u, expectedSizeIOH % GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); EXPECT_EQ(0u, expectedSizeDSH % 64); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(expectedSizeIOH, usedAfterIOH - usedBeforeIOH); EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH); } HWTEST_F(GetSizeRequiredBufferTest, WhenCopyingBufferThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u); auto usedBeforeDSH = dsh.getUsed(); auto usedBeforeIOH = ioh.getUsed(); auto usedBeforeSSH = ssh.getUsed(); auto retVal = EnqueueCopyBufferHelper<>::enqueue(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcBuffer; dc.srcMemObj = dstBuffer; dc.srcOffset = {EnqueueCopyBufferTraits::srcOffset, 0, 0}; dc.dstOffset = {EnqueueCopyBufferTraits::dstOffset, 0, 0}; dc.size = {EnqueueCopyBufferTraits::size, 0, 0}; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto usedAfterCS = commandStream.getUsed(); auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_COPY_BUFFER, CsrDependencies(), false, false, false, *pCmdQ, multiDispatchInfo); auto expectedSizeDSH = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); auto expectedSizeIOH = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); EXPECT_EQ(0u, expectedSizeIOH % GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); EXPECT_EQ(0u, expectedSizeDSH % 64); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(expectedSizeIOH, usedAfterIOH - usedBeforeIOH); EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH); } HWTEST_F(GetSizeRequiredBufferTest, WhenReadingBufferNonBlockingThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u); auto usedBeforeDSH = dsh.getUsed(); auto usedBeforeIOH = ioh.getUsed(); auto usedBeforeSSH = ssh.getUsed(); EnqueueReadBufferHelper<>::enqueueReadBuffer( pCmdQ, srcBuffer, CL_FALSE); MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.dstPtr = EnqueueReadBufferTraits::hostPtr; dc.srcMemObj = srcBuffer; dc.srcOffset = {EnqueueReadBufferTraits::offset, 0, 0}; dc.size = {srcBuffer->getSize(), 0, 0}; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto usedAfterCS = commandStream.getUsed(); auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_READ_BUFFER, CsrDependencies(), false, false, false, *pCmdQ, multiDispatchInfo); auto expectedSizeDSH = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); auto expectedSizeIOH = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); EXPECT_EQ(0u, expectedSizeIOH % GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); EXPECT_EQ(0u, expectedSizeDSH % 64); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(expectedSizeIOH, usedAfterIOH - usedBeforeIOH); EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH); } HWTEST_F(GetSizeRequiredBufferTest, WhenReadingBufferBlockingThenThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u); auto usedBeforeDSH = dsh.getUsed(); auto usedBeforeIOH = ioh.getUsed(); auto usedBeforeSSH = ssh.getUsed(); srcBuffer->forceDisallowCPUCopy = true; EnqueueReadBufferHelper<>::enqueueReadBuffer( pCmdQ, srcBuffer, CL_TRUE); MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.dstPtr = EnqueueReadBufferTraits::hostPtr; dc.srcMemObj = srcBuffer; dc.srcOffset = {EnqueueReadBufferTraits::offset, 0, 0}; dc.size = {srcBuffer->getSize(), 0, 0}; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto usedAfterCS = commandStream.getUsed(); auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_READ_BUFFER, CsrDependencies(), false, false, false, *pCmdQ, multiDispatchInfo); auto expectedSizeDSH = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); auto expectedSizeIOH = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); EXPECT_EQ(0u, expectedSizeIOH % GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); EXPECT_EQ(0u, expectedSizeDSH % 64); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(expectedSizeIOH, usedAfterIOH - usedBeforeIOH); EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH); } HWTEST_F(GetSizeRequiredBufferTest, WhenWritingBufferNonBlockingThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u); auto usedBeforeDSH = dsh.getUsed(); auto usedBeforeIOH = ioh.getUsed(); auto usedBeforeSSH = ssh.getUsed(); auto retVal = EnqueueWriteBufferHelper<>::enqueueWriteBuffer( pCmdQ, dstBuffer, CL_FALSE); EXPECT_EQ(CL_SUCCESS, retVal); MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcPtr = EnqueueWriteBufferTraits::hostPtr; dc.dstMemObj = dstBuffer; dc.dstOffset = {EnqueueWriteBufferTraits::offset, 0, 0}; dc.size = {dstBuffer->getSize(), 0, 0}; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto usedAfterCS = commandStream.getUsed(); auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_WRITE_BUFFER, CsrDependencies(), false, false, false, *pCmdQ, multiDispatchInfo); auto expectedSizeDSH = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); auto expectedSizeIOH = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(expectedSizeIOH, usedAfterIOH - usedBeforeIOH); EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH); } HWTEST_F(GetSizeRequiredBufferTest, WhenWritingBufferBlockingThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u); auto usedBeforeDSH = dsh.getUsed(); auto usedBeforeIOH = ioh.getUsed(); auto usedBeforeSSH = ssh.getUsed(); dstBuffer->forceDisallowCPUCopy = true; auto retVal = EnqueueWriteBufferHelper<>::enqueueWriteBuffer( pCmdQ, dstBuffer, CL_TRUE); EXPECT_EQ(CL_SUCCESS, retVal); MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcPtr = EnqueueWriteBufferTraits::hostPtr; dc.dstMemObj = dstBuffer; dc.dstOffset = {EnqueueWriteBufferTraits::offset, 0, 0}; dc.size = {dstBuffer->getSize(), 0, 0}; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto usedAfterCS = commandStream.getUsed(); auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_WRITE_BUFFER, CsrDependencies(), false, false, false, *pCmdQ, multiDispatchInfo); auto expectedSizeDSH = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); auto expectedSizeIOH = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(expectedSizeIOH, usedAfterIOH - usedBeforeIOH); EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH); } HWTEST_F(GetSizeRequiredBufferTest, givenMultipleKernelRequiringSshWhenTotalSizeIsComputedThenItIsProperlyAligned) { MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcPtr = EnqueueWriteBufferTraits::hostPtr; dc.dstMemObj = dstBuffer; dc.dstOffset = {EnqueueWriteBufferTraits::offset, 0, 0}; dc.size = {dstBuffer->getSize(), 0, 0}; builder.buildDispatchInfos(multiDispatchInfo, dc); builder.buildDispatchInfos(multiDispatchInfo, dc); builder.buildDispatchInfos(multiDispatchInfo, dc); builder.buildDispatchInfos(multiDispatchInfo, dc); auto sizeSSH = multiDispatchInfo.begin()->getKernel()->getSurfaceStateHeapSize(); sizeSSH += sizeSSH ? FamilyType::BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE : 0; sizeSSH = alignUp(sizeSSH, MemoryConstants::cacheLineSize); sizeSSH *= 4u; sizeSSH = alignUp(sizeSSH, MemoryConstants::pageSize); EXPECT_EQ(4u, multiDispatchInfo.size()); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); EXPECT_EQ(sizeSSH, expectedSizeSSH); } HWTEST_F(GetSizeRequiredBufferTest, GivenHelloWorldKernelWhenEnqueingKernelThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { typedef HelloWorldKernelFixture KernelFixture; auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); size_t workSize[] = {256}; auto retVal = EnqueueKernelHelper<>::enqueueKernel( pCmdQ, KernelFixture::pKernel, 1, nullptr, workSize, workSize); EXPECT_EQ(CL_SUCCESS, retVal); auto usedAfterCS = commandStream.getUsed(); auto dshAfter = pDSH->getUsed(); auto iohAfter = pIOH->getUsed(); auto sshAfter = pSSH->getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, KernelFixture::pKernel); auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*KernelFixture::pKernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(*KernelFixture::pKernel, workSize[0]); auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*KernelFixture::pKernel); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, dshAfter - dshBefore); EXPECT_GE(expectedSizeIOH, iohAfter - iohBefore); EXPECT_GE(expectedSizeSSH, sshAfter - sshBefore); } HWTEST_F(GetSizeRequiredBufferTest, GivenKernelWithSimpleArgWhenEnqueingKernelThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; typedef SimpleArgKernelFixture KernelFixture; auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto dshBefore = pDSH->getUsed(); auto iohBefore = pIOH->getUsed(); auto sshBefore = pSSH->getUsed(); size_t workSize[] = {256}; auto retVal = EnqueueKernelHelper<>::enqueueKernel( pCmdQ, KernelFixture::pKernel, 1, nullptr, workSize, workSize); EXPECT_EQ(CL_SUCCESS, retVal); auto usedAfterCS = commandStream.getUsed(); auto dshAfter = pDSH->getUsed(); auto iohAfter = pIOH->getUsed(); auto sshAfter = pSSH->getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, KernelFixture::pKernel); auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*KernelFixture::pKernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(*KernelFixture::pKernel, workSize[0]); auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*KernelFixture::pKernel); EXPECT_EQ(0u, expectedSizeIOH % GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); EXPECT_EQ(0u, expectedSizeDSH % 64); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, dshAfter - dshBefore); EXPECT_GE(expectedSizeIOH, iohAfter - iohBefore); EXPECT_GE(expectedSizeSSH, sshAfter - sshBefore); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/get_size_required_image_tests.cpp000066400000000000000000000416461363734646600332700ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/enqueue_copy_image.h" #include "opencl/source/command_queue/enqueue_fill_image.h" #include "opencl/source/command_queue/enqueue_read_image.h" #include "opencl/source/command_queue/enqueue_write_image.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/event/event.h" #include "opencl/source/event/perf_counter.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_write_image_fixture.h" #include "opencl/test/unit_test/fixtures/built_in_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" using namespace NEO; struct GetSizeRequiredImageTest : public CommandEnqueueFixture, public ::testing::Test { GetSizeRequiredImageTest() { } void SetUp() override { CommandEnqueueFixture::SetUp(); srcImage = Image2dHelper<>::create(context); dstImage = Image2dHelper<>::create(context); pDevice->setPreemptionMode(PreemptionMode::Disabled); } void TearDown() override { delete dstImage; delete srcImage; CommandEnqueueFixture::TearDown(); } Image *srcImage = nullptr; Image *dstImage = nullptr; }; HWTEST_F(GetSizeRequiredImageTest, WhenCopyingImageThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u); auto usedBeforeDSH = dsh.getUsed(); auto usedBeforeIOH = ioh.getUsed(); auto usedBeforeSSH = ssh.getUsed(); auto retVal = EnqueueCopyImageHelper<>::enqueueCopyImage(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImageToImage3d, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcImage; dc.dstMemObj = dstImage; dc.srcOffset = EnqueueCopyImageTraits::srcOrigin; dc.dstOffset = EnqueueCopyImageTraits::dstOrigin; dc.size = {1, 1, 1}; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); auto usedAfterCS = commandStream.getUsed(); auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_COPY_IMAGE, false, false, *pCmdQ, kernel); auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(*kernel); auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(expectedSizeIOH, usedAfterIOH - usedBeforeIOH); EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH); } HWTEST_F(GetSizeRequiredImageTest, WhenCopyingReadWriteImageThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u); auto usedBeforeDSH = dsh.getUsed(); auto usedBeforeIOH = ioh.getUsed(); auto usedBeforeSSH = ssh.getUsed(); std::unique_ptr program(Program::create("CopyImageToImage3d", context, *pClDevice, true, nullptr)); cl_device_id device = pClDevice; program->build(1, &device, nullptr, nullptr, nullptr, false); std::unique_ptr kernel(Kernel::create(program.get(), *program->getKernelInfo("CopyImageToImage3d"), nullptr)); EXPECT_NE(nullptr, kernel); // This kernel does not operate on OpenCL 2.0 Read and Write images EXPECT_EQ(kernel->getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages, (uint32_t) false); // Simulate that the kernel actually operates on OpenCL 2.0 Read and Write images. // Such kernel may require special WA DisableLSQCROPERFforOCL during construction of Command Buffer struct SPatchExecutionEnvironment *pExecEnv = (struct SPatchExecutionEnvironment *)kernel->getKernelInfo().patchInfo.executionEnvironment; pExecEnv->UsesFencesForReadWriteImages = (uint32_t) true; EXPECT_EQ(kernel->getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages, (uint32_t) true); // Enqueue kernel that may require special WA DisableLSQCROPERFforOCL auto retVal = EnqueueKernelHelper<>::enqueueKernel(pCmdQ, kernel.get()); EXPECT_EQ(CL_SUCCESS, retVal); auto usedAfterCS = commandStream.getUsed(); auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_COPY_IMAGE, false, false, *pCmdQ, kernel.get()); auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel.get()); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(*kernel.get()); auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel.get()); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); pExecEnv->UsesFencesForReadWriteImages = (uint32_t) false; EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(expectedSizeIOH, usedAfterIOH - usedBeforeIOH); EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH); } HWTEST_F(GetSizeRequiredImageTest, WhenReadingImageNonBlockingThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u); auto usedBeforeDSH = dsh.getUsed(); auto usedBeforeIOH = ioh.getUsed(); auto usedBeforeSSH = ssh.getUsed(); auto retVal = EnqueueReadImageHelper<>::enqueueReadImage( pCmdQ, srcImage, CL_FALSE); EXPECT_EQ(CL_SUCCESS, retVal); MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImage3dToBuffer, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcImage; dc.dstPtr = EnqueueReadImageTraits::hostPtr; dc.srcOffset = EnqueueReadImageTraits::origin; dc.size = EnqueueReadImageTraits::region; dc.srcRowPitch = EnqueueReadImageTraits::rowPitch; dc.srcSlicePitch = EnqueueReadImageTraits::slicePitch; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); auto usedAfterCS = commandStream.getUsed(); auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_READ_IMAGE, false, false, *pCmdQ, kernel); auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(*kernel); auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(expectedSizeIOH, usedAfterIOH - usedBeforeIOH); EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH); } HWTEST_F(GetSizeRequiredImageTest, WhenReadingImageBlockingThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u); auto usedBeforeDSH = dsh.getUsed(); auto usedBeforeIOH = ioh.getUsed(); auto usedBeforeSSH = ssh.getUsed(); auto retVal = EnqueueReadImageHelper<>::enqueueReadImage( pCmdQ, srcImage, CL_TRUE); EXPECT_EQ(CL_SUCCESS, retVal); MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImage3dToBuffer, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcImage; dc.dstPtr = EnqueueReadImageTraits::hostPtr; dc.srcOffset = EnqueueReadImageTraits::origin; dc.size = EnqueueReadImageTraits::region; dc.srcRowPitch = EnqueueReadImageTraits::rowPitch; dc.srcSlicePitch = EnqueueReadImageTraits::slicePitch; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); auto usedAfterCS = commandStream.getUsed(); auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_READ_IMAGE, false, false, *pCmdQ, kernel); auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(*kernel); auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(expectedSizeIOH, usedAfterIOH - usedBeforeIOH); EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH); } HWTEST_F(GetSizeRequiredImageTest, WhenWritingImageNonBlockingThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u); auto usedBeforeDSH = dsh.getUsed(); auto usedBeforeIOH = ioh.getUsed(); auto usedBeforeSSH = ssh.getUsed(); auto retVal = EnqueueWriteImageHelper<>::enqueueWriteImage( pCmdQ, dstImage, CL_FALSE); EXPECT_EQ(CL_SUCCESS, retVal); MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToImage3d, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcPtr = EnqueueWriteImageTraits::hostPtr; dc.dstMemObj = dstImage; dc.dstOffset = EnqueueWriteImageTraits::origin; dc.size = EnqueueWriteImageTraits::region; dc.dstRowPitch = EnqueueWriteImageTraits::rowPitch; dc.dstSlicePitch = EnqueueWriteImageTraits::slicePitch; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); auto usedAfterCS = commandStream.getUsed(); auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_WRITE_IMAGE, false, false, *pCmdQ, kernel); auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(*kernel); auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(expectedSizeIOH, usedAfterIOH - usedBeforeIOH); EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH); } HWTEST_F(GetSizeRequiredImageTest, WhenWritingImageBlockingThenHeapsAndCommandBufferConsumedMinimumRequiredSize) { auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto &dsh = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u); auto usedBeforeDSH = dsh.getUsed(); auto usedBeforeIOH = ioh.getUsed(); auto usedBeforeSSH = ssh.getUsed(); auto retVal = EnqueueWriteImageHelper<>::enqueueWriteImage( pCmdQ, dstImage, CL_TRUE); EXPECT_EQ(CL_SUCCESS, retVal); MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToImage3d, pCmdQ->getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcPtr = EnqueueWriteImageTraits::hostPtr; dc.dstMemObj = dstImage; dc.dstOffset = EnqueueWriteImageTraits::origin; dc.size = EnqueueWriteImageTraits::region; dc.dstRowPitch = EnqueueWriteImageTraits::rowPitch; dc.dstSlicePitch = EnqueueWriteImageTraits::slicePitch; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); auto usedAfterCS = commandStream.getUsed(); auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_WRITE_IMAGE, false, false, *pCmdQ, kernel); auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(*kernel); auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); EXPECT_GE(expectedSizeDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(expectedSizeIOH, usedAfterIOH - usedBeforeIOH); EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/get_size_required_tests.cpp000066400000000000000000000074351363734646600321240ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/enqueue_barrier.h" #include "opencl/source/command_queue/enqueue_marker.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" using namespace NEO; struct GetSizeRequiredTest : public CommandEnqueueFixture, public ::testing::Test { void SetUp() override { CommandEnqueueFixture::SetUp(); dsh = &pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u); ioh = &pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); ssh = &pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u); usedBeforeDSH = dsh->getUsed(); usedBeforeIOH = ioh->getUsed(); usedBeforeSSH = ssh->getUsed(); } void TearDown() override { CommandEnqueueFixture::TearDown(); } IndirectHeap *dsh; IndirectHeap *ioh; IndirectHeap *ssh; size_t usedBeforeDSH; size_t usedBeforeIOH; size_t usedBeforeSSH; }; HWTEST_F(GetSizeRequiredTest, WhenFinishingThenHeapsAndCommandBufferAreNotConsumed) { auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); auto retVal = pCmdQ->finish(); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, commandStream.getUsed() - usedBeforeCS); EXPECT_EQ(0u, dsh->getUsed() - usedBeforeDSH); EXPECT_EQ(0u, ioh->getUsed() - usedBeforeIOH); EXPECT_EQ(0u, ssh->getUsed() - usedBeforeSSH); } HWTEST_F(GetSizeRequiredTest, WhenEnqueuingMarkerThenHeapsAndCommandBufferAreNotConsumed) { auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 5, 15); cl_event eventBeingWaitedOn = &event1; cl_event eventReturned = nullptr; auto retVal = pCmdQ->enqueueMarkerWithWaitList( 1, &eventBeingWaitedOn, &eventReturned); EXPECT_EQ(CL_SUCCESS, retVal); size_t expectedStreamSize = 0; if (pCmdQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { expectedStreamSize = alignUp(MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation( pDevice->getHardwareInfo()), MemoryConstants::cacheLineSize); } EXPECT_EQ(expectedStreamSize, commandStream.getUsed() - usedBeforeCS); EXPECT_EQ(0u, dsh->getUsed() - usedBeforeDSH); EXPECT_EQ(0u, ioh->getUsed() - usedBeforeIOH); EXPECT_EQ(0u, ssh->getUsed() - usedBeforeSSH); clReleaseEvent(eventReturned); } HWTEST_F(GetSizeRequiredTest, WhenEnqueuingBarrierThenHeapsAndCommandBufferAreNotConsumed) { auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 5, 15); cl_event eventBeingWaitedOn = &event1; cl_event eventReturned = nullptr; auto retVal = pCmdQ->enqueueBarrierWithWaitList( 1, &eventBeingWaitedOn, &eventReturned); EXPECT_EQ(CL_SUCCESS, retVal); size_t expectedStreamSize = 0; if (pCmdQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { expectedStreamSize = alignUp(MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation( pDevice->getHardwareInfo()), MemoryConstants::cacheLineSize); } EXPECT_EQ(expectedStreamSize, commandStream.getUsed() - usedBeforeCS); clReleaseEvent(eventReturned); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/gl/000077500000000000000000000000001363734646600250565ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/gl/CMakeLists.txt000066400000000000000000000001471363734646600276200ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # add_subdirectories() compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/gl/windows/000077500000000000000000000000001363734646600265505ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/gl/windows/CMakeLists.txt000066400000000000000000000005341363734646600313120ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(IGDRCL_SRCS_tests_command_queue_gl_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_gl_tests_windows.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_command_queue_gl_windows}) endif() enqueue_kernel_gl_tests_windows.cpp000066400000000000000000000054101363734646600356620ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/gl/windows/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/helpers/preamble.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/memory_constants.h" #include "opencl/source/sharings/gl/gl_buffer.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/mocks/gl/windows/mock_gl_sharing_windows.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_submissions_aggregator.h" using namespace NEO; typedef HelloWorldFixture EnqueueKernelFixture; typedef Test EnqueueKernelTest; TEST_F(EnqueueKernelTest, givenKernelWithSharedObjArgsWhenEnqueueIsCalledThenResetPatchAddress) { auto nonSharedBuffer = new MockBuffer; MockGlSharing glSharing; glSharing.uploadDataToBufferInfo(1, 0); pContext->setSharingFunctions(glSharing.sharingFunctions.release()); auto retVal = CL_SUCCESS; auto sharedBuffer = GlBuffer::createSharedGlBuffer(pContext, CL_MEM_READ_WRITE, 1, &retVal); auto sharedMem = static_cast(sharedBuffer); auto nonSharedMem = static_cast(nonSharedBuffer); pKernel->setArg(0, sizeof(cl_mem *), &sharedMem); pKernel->setArg(1, sizeof(cl_mem *), &nonSharedMem); EXPECT_TRUE(pKernel->isUsingSharedObjArgs()); auto &kernelInfo = pKernel->getKernelInfo(); auto pKernelArg = (uint32_t *)(pKernel->getCrossThreadData() + kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); auto address1 = static_cast(*pKernelArg); auto sharedBufferGpuAddress = pKernel->isBuiltIn ? sharedBuffer->getGraphicsAllocation()->getGpuAddress() : sharedBuffer->getGraphicsAllocation()->getGpuAddressToPatch(); EXPECT_EQ(sharedBufferGpuAddress, address1); // update address glSharing.uploadDataToBufferInfo(1, 1); pCmdQ->enqueueAcquireSharedObjects(1, &sharedMem, 0, nullptr, nullptr, CL_COMMAND_ACQUIRE_GL_OBJECTS); callOneWorkItemNDRKernel(); auto address2 = static_cast(*pKernelArg); EXPECT_NE(address1, address2); sharedBufferGpuAddress = pKernel->isBuiltIn ? sharedBuffer->getGraphicsAllocation()->getGpuAddress() : sharedBuffer->getGraphicsAllocation()->getGpuAddressToPatch(); EXPECT_EQ(sharedBufferGpuAddress, address2); delete sharedBuffer; delete nonSharedBuffer; } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/ioq_task_tests.cpp000066400000000000000000000101331363734646600302120ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" using namespace NEO; typedef HelloWorldTest IOQ; TEST_F(IOQ, WhenEnqueueingKernelThenTaskLevelIsIncremented) { auto previousTaskLevel = pCmdQ->taskLevel; EnqueueKernelHelper<>::enqueueKernel( pCmdQ, pKernel); EXPECT_LT(previousTaskLevel, pCmdQ->taskLevel); } TEST_F(IOQ, WhenFillingBufferThenTaskLevelIsIncremented) { auto previousTaskLevel = pCmdQ->taskLevel; EnqueueFillBufferHelper<>::enqueue(pCmdQ); EXPECT_LT(previousTaskLevel, pCmdQ->taskLevel); } TEST_F(IOQ, WhenReadingBufferThenTaskLevelIsIncremented) { auto previousTaskLevel = pCmdQ->taskLevel; auto buffer = std::unique_ptr(BufferHelper<>::create()); buffer->forceDisallowCPUCopy = true; // task level is not increased if doing cpu copy EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, buffer.get()); EXPECT_LT(previousTaskLevel, pCmdQ->taskLevel); } TEST_F(IOQ, WhenEnqueueingKernelThenTaskCountIsIncremented) { auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver(); auto previousTaskCount = commandStreamReceiver.peekTaskCount(); EnqueueKernelHelper<>::enqueueKernel(pCmdQ, pKernel); EXPECT_LT(previousTaskCount, commandStreamReceiver.peekTaskCount()); EXPECT_EQ(pCmdQ->taskCount, commandStreamReceiver.peekTaskCount()); } TEST_F(IOQ, WhenFillingBufferThenTaskCountIsIncremented) { auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver(); auto previousTaskCount = commandStreamReceiver.peekTaskCount(); EnqueueFillBufferHelper<>::enqueue(pCmdQ); EXPECT_LT(previousTaskCount, commandStreamReceiver.peekTaskCount()); EXPECT_LE(pCmdQ->taskCount, commandStreamReceiver.peekTaskCount()); } TEST_F(IOQ, WhenReadingBufferThenTaskCountIsIncremented) { auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver(); auto previousTaskCount = commandStreamReceiver.peekTaskCount(); auto buffer = std::unique_ptr(BufferHelper<>::create()); buffer->forceDisallowCPUCopy = true; // task level is not increased if doing cpu copy EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, buffer.get()); EXPECT_LT(previousTaskCount, commandStreamReceiver.peekTaskCount()); EXPECT_LE(pCmdQ->taskCount, commandStreamReceiver.peekTaskCount()); } TEST_F(IOQ, GivenUserEventWhenReadingBufferThenTaskCountAndTaskLevelAreIncremented) { auto buffer = std::unique_ptr(BufferHelper<>::create()); auto alignedReadPtr = alignedMalloc(BufferDefaults::sizeInBytes, MemoryConstants::cacheLineSize); ASSERT_NE(nullptr, alignedReadPtr); auto previousTaskCount = pCmdQ->taskCount; auto previousTaskLevel = pCmdQ->taskLevel; auto userEvent = clCreateUserEvent(pContext, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetUserEventStatus(userEvent, CL_COMPLETE); ASSERT_EQ(CL_SUCCESS, retVal); buffer->forceDisallowCPUCopy = true; // task level is not increased if doing cpu copy retVal = EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, buffer.get(), CL_TRUE, 0, BufferDefaults::sizeInBytes, alignedReadPtr, nullptr, 1, &userEvent, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_LT(previousTaskCount, pCmdQ->taskCount); EXPECT_LT(previousTaskLevel, pCmdQ->taskLevel); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); alignedFree(alignedReadPtr); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/ioq_task_tests_mt.cpp000066400000000000000000000200021363734646600307060ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" using namespace NEO; typedef HelloWorldTest IOQTaskTestsMt; TEST_F(IOQTaskTestsMt, GivenBlockingAndBlockedOnUserEventWhenReadingBufferThenTaskCountAndTaskLevelAreIncremented) { auto buffer = std::unique_ptr(BufferHelper<>::create()); auto alignedReadPtr = alignedMalloc(BufferDefaults::sizeInBytes, MemoryConstants::cacheLineSize); ASSERT_NE(nullptr, alignedReadPtr); auto userEvent = clCreateUserEvent(pContext, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); auto previousTaskLevel = pCmdQ->taskLevel; auto previousTaskCount = pCmdQ->taskCount; std::thread t([=]() { Event *ev = castToObject(userEvent); while (ev->peekHasChildEvents() == false) { // active wait for VirtualEvent (which is added after queue is blocked) } auto ret = clSetUserEventStatus(userEvent, CL_COMPLETE); ASSERT_EQ(CL_SUCCESS, ret); }); buffer->forceDisallowCPUCopy = true; // no task level incrasing when cpu copy retVal = EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, buffer.get(), CL_TRUE, 0, BufferDefaults::sizeInBytes, alignedReadPtr, nullptr, 1, &userEvent, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_LT(previousTaskCount, pCmdQ->taskCount); EXPECT_LT(previousTaskLevel, pCmdQ->taskLevel); t.join(); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); alignedFree(alignedReadPtr); } TEST_F(IOQTaskTestsMt, GivenBlockedOnUserEventWhenEnqueingMarkerThenSuccessIsReturned) { auto userEvent = clCreateUserEvent(pContext, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); std::thread t([=]() { Event *ev = castToObject(userEvent); while (ev->peekHasChildEvents() == false) { // active wait for VirtualEvent (which is added after queue is blocked) } auto ret = clSetUserEventStatus(userEvent, CL_COMPLETE); ASSERT_EQ(CL_SUCCESS, ret); }); retVal = pCmdQ->enqueueMarkerWithWaitList( 1, &userEvent, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); t.join(); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(IOQTaskTestsMt, GivenMultipleThreadsWhenMappingBufferThenEventsAreCompleted) { AlignedBuffer alignedBuffer; auto userEvent = clCreateUserEvent(pContext, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); cl_event outputEvent = nullptr; void *mappedPtr = pCmdQ->enqueueMapBuffer(&alignedBuffer, false, CL_MAP_READ, 0, alignedBuffer.getSize(), 1, &userEvent, &outputEvent, retVal); EXPECT_EQ(CL_SUCCESS, retVal); const int32_t numThreads = 20; std::thread threads[numThreads]; std::thread threadUnblocking; cl_event ouputEventsFromThreads[numThreads]; void *mappedPtrs[numThreads]; for (int32_t i = 0; i < numThreads; i++) { threads[i] = std::thread([&](int32_t index) { cl_int errCode = CL_SUCCESS; cl_int success = CL_SUCCESS; mappedPtrs[index] = pCmdQ->enqueueMapBuffer(&alignedBuffer, false, CL_MAP_READ, 0, alignedBuffer.getSize(), 0, nullptr, &ouputEventsFromThreads[index], errCode); EXPECT_EQ(success, errCode); }, i); if (i == numThreads / 2) { threadUnblocking = std::thread([=]() { auto ret = clSetUserEventStatus(userEvent, CL_COMPLETE); EXPECT_EQ(CL_SUCCESS, ret); }); } } cl_int errCode = clWaitForEvents(1, &outputEvent); EXPECT_EQ(CL_SUCCESS, errCode); cl_int eventStatus = 0; errCode = clGetEventInfo(outputEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr); EXPECT_EQ(CL_SUCCESS, errCode); EXPECT_EQ(CL_COMPLETE, eventStatus); for (int32_t i = 0; i < numThreads; i++) { threads[i].join(); cl_int errCode = clWaitForEvents(1, &ouputEventsFromThreads[i]); EXPECT_EQ(CL_SUCCESS, errCode); errCode = clGetEventInfo(outputEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr); EXPECT_EQ(CL_SUCCESS, errCode); EXPECT_EQ(CL_COMPLETE, eventStatus); } threadUnblocking.join(); retVal = clReleaseEvent(userEvent); for (int32_t i = 0; i < numThreads; i++) { pCmdQ->enqueueUnmapMemObject(&alignedBuffer, mappedPtrs[i], 0, nullptr, nullptr); retVal = clReleaseEvent(ouputEventsFromThreads[i]); EXPECT_EQ(CL_SUCCESS, retVal); } pCmdQ->enqueueUnmapMemObject(&alignedBuffer, mappedPtr, 0, nullptr, nullptr); retVal = clReleaseEvent(outputEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(IOQTaskTestsMt, GivenMultipleThreadsWhenMappingImageThenEventsAreCompleted) { auto image = std::unique_ptr(ImageHelper::create(context)); auto userEvent = clCreateUserEvent(pContext, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); cl_event outputEvent = nullptr; const size_t origin[] = {0, 0, 0}; const size_t region[] = {1, 1, 1}; void *mappedPtr = pCmdQ->enqueueMapImage(image.get(), false, CL_MAP_READ, origin, region, nullptr, nullptr, 1, &userEvent, &outputEvent, retVal); EXPECT_EQ(CL_SUCCESS, retVal); const int32_t numThreads = 20; std::thread threads[numThreads]; std::thread threadUnblocking; cl_event ouputEventsFromThreads[numThreads]; void *mappedPtrs[numThreads]; for (int32_t i = 0; i < numThreads; i++) { threads[i] = std::thread([&](int32_t index) { cl_int errCode = CL_SUCCESS; cl_int success = CL_SUCCESS; mappedPtrs[index] = pCmdQ->enqueueMapImage(image.get(), false, CL_MAP_READ, origin, region, nullptr, nullptr, 0, nullptr, &ouputEventsFromThreads[index], errCode); EXPECT_EQ(success, errCode); }, i); if (i == numThreads / 2) { threadUnblocking = std::thread([=]() { auto ret = clSetUserEventStatus(userEvent, CL_COMPLETE); EXPECT_EQ(CL_SUCCESS, ret); }); } } cl_int errCode = clWaitForEvents(1, &outputEvent); EXPECT_EQ(CL_SUCCESS, errCode); cl_int eventStatus = 0; errCode = clGetEventInfo(outputEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr); EXPECT_EQ(CL_SUCCESS, errCode); EXPECT_EQ(CL_COMPLETE, eventStatus); for (int32_t i = 0; i < numThreads; i++) { threads[i].join(); cl_int errCode = clWaitForEvents(1, &ouputEventsFromThreads[i]); EXPECT_EQ(CL_SUCCESS, errCode); errCode = clGetEventInfo(outputEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr); EXPECT_EQ(CL_SUCCESS, errCode); EXPECT_EQ(CL_COMPLETE, eventStatus); } threadUnblocking.join(); retVal = clReleaseEvent(userEvent); for (int32_t i = 0; i < numThreads; i++) { pCmdQ->enqueueUnmapMemObject(image.get(), mappedPtrs[i], 0, nullptr, nullptr); retVal = clReleaseEvent(ouputEventsFromThreads[i]); EXPECT_EQ(CL_SUCCESS, retVal); } pCmdQ->enqueueUnmapMemObject(image.get(), mappedPtr, 0, nullptr, nullptr); retVal = clReleaseEvent(outputEvent); EXPECT_EQ(CL_SUCCESS, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/local_id_tests.cpp000066400000000000000000000516111363734646600301540ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/ptr_math.h" #include "opencl/source/command_queue/local_id_gen.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "test.h" #include #include using namespace NEO; using LocalIdTests = ::testing::Test; HWTEST_F(LocalIdTests, GivenSimd8WhenGettingGrfsPerThreadThenOneIsReturned) { uint32_t simd = 8; EXPECT_EQ(1u, getGRFsPerThread(simd, 32)); } HWTEST_F(LocalIdTests, GivenSimd16WhenGettingGrfsPerThreadThenOneIsReturned) { uint32_t simd = 16; EXPECT_EQ(1u, getGRFsPerThread(simd, 32)); } HWTEST_F(LocalIdTests, GivenSimd32WhenGettingGrfsPerThreadThenTwoIsReturned) { uint32_t simd = 32; EXPECT_EQ(2u, getGRFsPerThread(simd, 32)); } HWTEST_F(LocalIdTests, GivenSimd32AndNon32GrfSizeWhenGettingGrfsPerThreadThenTwoIsReturned) { uint32_t simd = 32; EXPECT_EQ(1u, getGRFsPerThread(simd, 33)); } TEST(LocalID, GivenSimd32AndLws33WhenGettingThreadsPerWorkgroupThenTwoIsReturned) { size_t lws = 33; uint32_t simd = 32; EXPECT_EQ(2u, getThreadsPerWG(simd, lws)); } TEST(LocalID, GivenSimd8WhenGettingPerThreadSizeLocalIdsThenValueIsThreeTimesGrfSize) { uint32_t simd = 8; uint32_t grfSize = 32; // 3 channels (x,y,z) * 1 GRFs per thread (@SIMD8) EXPECT_EQ(3 * grfSize, getPerThreadSizeLocalIDs(simd, grfSize)); } TEST(LocalID, GivenSimd16WhenGettingPerThreadSizeLocalIdsThenValueIsThreeTimesGrfSize) { uint32_t simd = 16; uint32_t grfSize = 32; // 3 channels (x,y,z) * 1 GRFs per thread (@SIMD16) EXPECT_EQ(3 * grfSize, getPerThreadSizeLocalIDs(simd, grfSize)); } TEST(LocalID, GivenSimd8WhenGettingPerThreadSizeLocalIdsThenValueIsSixTimesGrfSize) { uint32_t simd = 32; uint32_t grfSize = 32; // 3 channels (x,y,z) * 2 GRFs per thread (@SIMD32) EXPECT_EQ(6 * grfSize, getPerThreadSizeLocalIDs(simd, grfSize)); } TEST(LocalID, GivenSimd1WhenGettingPerThreadSizeLocalIdsThenValueIsEqualGrfSize) { uint32_t simd = 1; uint32_t grfSize = 32; EXPECT_EQ(grfSize, getPerThreadSizeLocalIDs(simd, grfSize)); } TEST(LocalID, givenVariadicGrfSizeWhenLocalSizesAreEmittedTheyUseFullRowSize) { auto localIdsPtr = allocateAlignedMemory(3 * 64u, MemoryConstants::cacheLineSize); uint16_t *localIdsView = reinterpret_cast(localIdsPtr.get()); std::array localSizes = {{2u, 2u, 1u}}; std::array dimensionsOrder = {{0u, 1u, 2u}}; generateLocalIDs(localIdsPtr.get(), 16u, localSizes, dimensionsOrder, false, 64u); EXPECT_EQ(localIdsView[0], 0u); EXPECT_EQ(localIdsView[1], 1u); EXPECT_EQ(localIdsView[2], 0u); EXPECT_EQ(localIdsView[3], 1u); EXPECT_EQ(localIdsView[32], 0u); EXPECT_EQ(localIdsView[33], 0u); EXPECT_EQ(localIdsView[34], 1u); EXPECT_EQ(localIdsView[35], 1u); EXPECT_EQ(localIdsView[64], 0u); EXPECT_EQ(localIdsView[65], 0u); EXPECT_EQ(localIdsView[66], 0u); EXPECT_EQ(localIdsView[67], 0u); } struct LocalIDFixture : ::testing::TestWithParam> { void SetUp() override { simd = std::get<0>(GetParam()); grfSize = std::get<1>(GetParam()); localWorkSizeX = std::get<2>(GetParam()); localWorkSizeY = std::get<3>(GetParam()); localWorkSizeZ = std::get<4>(GetParam()); localWorkSize = localWorkSizeX * localWorkSizeY * localWorkSizeZ; if (localWorkSize > 256) { localWorkSizeY = std::min(256 / localWorkSizeX, localWorkSizeY); localWorkSizeZ = std::min(256 / (localWorkSizeX * localWorkSizeY), localWorkSizeZ); localWorkSize = localWorkSizeX * localWorkSizeY * localWorkSizeZ; } const auto bufferSize = 32 * 3 * 16 * sizeof(uint16_t); buffer = reinterpret_cast(alignedMalloc(bufferSize, 32)); memset(buffer, 0xff, bufferSize); } void TearDown() override { alignedFree(buffer); } void validateIDWithinLimits(uint32_t simd, uint32_t lwsX, uint32_t lwsY, uint32_t lwsZ, bool useFullRowSize) { auto idsPerThread = simd; // As per BackEnd HLD, SIMD32 has 32 localIDs per channel. SIMD8/16 has up to 16 localIDs. auto skipPerThread = (simd == 32 || useFullRowSize) ? 32 : 16; auto pBufferX = buffer; auto pBufferY = pBufferX + skipPerThread; auto pBufferZ = pBufferY + skipPerThread; auto numWorkItems = lwsX * lwsY * lwsZ; size_t itemIndex = 0; while (numWorkItems > 0) { EXPECT_LT(pBufferX[itemIndex], lwsX) << simd << " " << lwsX << " " << lwsY << " " << lwsZ; EXPECT_LT(pBufferY[itemIndex], lwsY) << simd << " " << lwsX << " " << lwsY << " " << lwsZ; EXPECT_LT(pBufferZ[itemIndex], lwsZ) << simd << " " << lwsX << " " << lwsY << " " << lwsZ; ++itemIndex; if (idsPerThread == itemIndex) { pBufferX += skipPerThread * 3; pBufferY += skipPerThread * 3; pBufferZ += skipPerThread * 3; itemIndex = 0; } --numWorkItems; } } void validateAllWorkItemsCovered(uint32_t simd, uint32_t lwsX, uint32_t lwsY, uint32_t lwsZ, bool useFullRow) { auto idsPerThread = simd; // As per BackEnd HLD, SIMD32 has 32 localIDs per channel. SIMD8/16 has up to 16 localIDs. auto skipPerThread = (simd == 32 || useFullRow) ? 32 : 16; auto pBufferX = buffer; auto pBufferY = pBufferX + skipPerThread; auto pBufferZ = pBufferY + skipPerThread; auto numWorkItems = lwsX * lwsY * lwsZ; // Initialize local ID hit table uint32_t localIDHitTable[8]; memset(localIDHitTable, 0, sizeof(localIDHitTable)); size_t itemIndex = 0; while (numWorkItems > 0) { // Flatten out the IDs auto workItem = pBufferX[itemIndex] + pBufferY[itemIndex] * lwsX + pBufferZ[itemIndex] * lwsX * lwsY; ASSERT_LT(workItem, 256u); // Look up in the hit table auto &hitItem = localIDHitTable[workItem / 32]; auto hitBit = 1 << (workItem % 32); // No double-hits EXPECT_EQ(0u, hitItem & hitBit); // Set that work item as hit hitItem |= hitBit; ++itemIndex; if (idsPerThread == itemIndex) { pBufferX += skipPerThread * 3; pBufferY += skipPerThread * 3; pBufferZ += skipPerThread * 3; itemIndex = 0; } --numWorkItems; } // All entries in hit table should be in form of n^2 - 1 for (uint32_t i : localIDHitTable) { EXPECT_EQ(0u, i & (i + 1)); } } void validateWalkOrder(uint32_t simd, uint32_t localWorkgroupSizeX, uint32_t localWorkgroupSizeY, uint32_t localWorkgroupSizeZ, const std::array &dimensionsOrder) { std::array walkOrder = {}; for (uint32_t i = 0; i < 3; ++i) { // inverts the walk order mapping (from DIM_ID->ORDER_ID to ORDER_ID->DIM_ID) walkOrder[dimensionsOrder[i]] = i; } auto skipPerThread = simd == 32 ? 32 : 16; auto pBufferX = buffer; auto pBufferY = pBufferX + skipPerThread; auto pBufferZ = pBufferY + skipPerThread; decltype(pBufferX) ids[] = {pBufferX, pBufferY, pBufferZ}; uint32_t sizes[] = {localWorkgroupSizeX, localWorkgroupSizeY, localWorkgroupSizeZ}; uint32_t flattenedId = 0; for (uint32_t id2 = 0; id2 < sizes[walkOrder[2]]; ++id2) { for (uint32_t id1 = 0; id1 < sizes[walkOrder[1]]; ++id1) { for (uint32_t id0 = 0; id0 < sizes[walkOrder[0]]; ++id0) { uint32_t threadId = flattenedId / simd; uint32_t channelId = flattenedId % simd; uint16_t foundId0 = ids[walkOrder[0]][channelId + threadId * skipPerThread * 3]; uint16_t foundId1 = ids[walkOrder[1]][channelId + threadId * skipPerThread * 3]; uint16_t foundId2 = ids[walkOrder[2]][channelId + threadId * skipPerThread * 3]; if ((id0 != foundId0) || (id1 != foundId1) || (id2 != foundId2)) { EXPECT_EQ(id0, foundId0) << simd << " X @ (" << id0 << ", " << id1 << ", " << id2 << ") - flat " << flattenedId; EXPECT_EQ(id1, foundId1) << simd << " Y @ (" << id0 << ", " << id1 << ", " << id2 << ") - flat " << flattenedId; EXPECT_EQ(id2, foundId2) << simd << " Z @ (" << id0 << ", " << id1 << ", " << id2 << ") - flat " << flattenedId; } ++flattenedId; } } } } void dumpBuffer(uint32_t simd, uint32_t lwsX, uint32_t lwsY, uint32_t lwsZ) { auto workSize = lwsX * lwsY * lwsZ; auto threads = Math::divideAndRoundUp(workSize, simd); auto pBuffer = buffer; // As per BackEnd HLD, SIMD32 has 32 localIDs per channel. SIMD8/16 has up to 16 localIDs. auto skipPerThread = simd == 32 ? 32 : 16; while (threads-- > 0) { auto lanes = std::min(workSize, simd); for (auto dimension = 0u; dimension < 3u; ++dimension) { for (auto lane = 0u; lane < lanes; ++lane) { printf("%04d ", (unsigned int)pBuffer[lane]); } pBuffer += skipPerThread; printf("\n"); } workSize -= simd; } } // Test parameters uint32_t localWorkSizeX; uint32_t localWorkSizeY; uint32_t localWorkSizeZ; uint32_t localWorkSize; uint32_t simd; uint32_t grfSize; // Provide support for a max LWS of 256 // 32 threads @ SIMD8 // 3 channels (x/y/z) // 16 lanes per thread (SIMD8 - only 8 used) uint16_t *buffer; }; HWTEST_P(LocalIDFixture, WhenGeneratingLocalIdsThenIdsAreWithinLimits) { generateLocalIDs(buffer, simd, std::array{{static_cast(localWorkSizeX), static_cast(localWorkSizeY), static_cast(localWorkSizeZ)}}, std::array{{0, 1, 2}}, false, grfSize); validateIDWithinLimits(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper::useFullRowForLocalIdsGeneration); } HWTEST_P(LocalIDFixture, WhenGeneratingLocalIdsThenAllWorkItemsCovered) { generateLocalIDs(buffer, simd, std::array{{static_cast(localWorkSizeX), static_cast(localWorkSizeY), static_cast(localWorkSizeZ)}}, std::array{{0, 1, 2}}, false, grfSize); validateAllWorkItemsCovered(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper::useFullRowForLocalIdsGeneration); } HWTEST_P(LocalIDFixture, WhenWalkOrderIsXyzThenProperLocalIdsAreGenerated) { auto dimensionsOrder = std::array{{0, 1, 2}}; generateLocalIDs(buffer, simd, std::array{{static_cast(localWorkSizeX), static_cast(localWorkSizeY), static_cast(localWorkSizeZ)}}, dimensionsOrder, false, grfSize); validateAllWorkItemsCovered(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper::useFullRowForLocalIdsGeneration); validateWalkOrder(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, dimensionsOrder); } HWTEST_P(LocalIDFixture, WhenWalkOrderIsYxzThenProperLocalIdsAreGenerated) { auto dimensionsOrder = std::array{{1, 0, 2}}; generateLocalIDs(buffer, simd, std::array{{static_cast(localWorkSizeX), static_cast(localWorkSizeY), static_cast(localWorkSizeZ)}}, dimensionsOrder, false, grfSize); validateAllWorkItemsCovered(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper::useFullRowForLocalIdsGeneration); validateWalkOrder(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, dimensionsOrder); } HWTEST_P(LocalIDFixture, WhenWalkOrderIsZyxThenProperLocalIdsAreGenerated) { auto dimensionsOrder = std::array{{2, 1, 0}}; generateLocalIDs(buffer, simd, std::array{{static_cast(localWorkSizeX), static_cast(localWorkSizeY), static_cast(localWorkSizeZ)}}, dimensionsOrder, false, grfSize); validateAllWorkItemsCovered(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper::useFullRowForLocalIdsGeneration); validateWalkOrder(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, dimensionsOrder); } TEST_P(LocalIDFixture, WhenThreadsPerWgAreGeneratedThenSizeCalculationAreCorrect) { auto workItems = localWorkSizeX * localWorkSizeY * localWorkSizeZ; auto sizeTotalPerThreadData = getThreadsPerWG(simd, workItems) * getPerThreadSizeLocalIDs(simd, grfSize); // Should be multiple of GRFs EXPECT_EQ(0u, sizeTotalPerThreadData % grfSize); auto numGRFsPerThread = (simd == 32) ? 2 : 1; auto numThreadsExpected = Math::divideAndRoundUp(workItems, simd); auto numGRFsExpected = 3 * numGRFsPerThread * numThreadsExpected; EXPECT_EQ(numGRFsExpected * grfSize, sizeTotalPerThreadData); } struct LocalIdsLayoutForImagesTest : ::testing::TestWithParam> { void SetUp() override { simd = std::get<0>(GetParam()); grfSize = std::get<1>(GetParam()); localWorkSize = {{std::get<2>(GetParam()), std::get<3>(GetParam()), 1u}}; rowWidth = simd == 32u ? 32u : 16u; xDelta = simd == 8u ? 2u : 4u; } void generateLocalIds() { auto numGrfs = (localWorkSize.at(0) * localWorkSize.at(1) + (simd - 1)) / simd; elemsInBuffer = 3u * simd * numGrfs; if (simd == 8u) { elemsInBuffer *= 2; } size = elemsInBuffer * sizeof(uint16_t); memory = allocateAlignedMemory(size, 32); memset(memory.get(), 0xff, size); buffer = reinterpret_cast(memory.get()); EXPECT_TRUE(isCompatibleWithLayoutForImages(localWorkSize, dimensionsOrder, simd)); generateLocalIDs(buffer, simd, localWorkSize, dimensionsOrder, true, grfSize); } void validateGRF() { uint32_t totalLocalIds = localWorkSize.at(0) * localWorkSize.at(1); auto numRows = elemsInBuffer / rowWidth; auto numGrfs = numRows / 3u; for (auto i = 0u; i < numGrfs; i++) { // validate X row uint16_t baseX = buffer[i * 3 * rowWidth]; uint16_t baseY = buffer[i * 3 * rowWidth + rowWidth]; uint16_t currentX = baseX; for (int j = 1; j < simd; j++) { if (simd * i + j == totalLocalIds) break; if (simd == 32u && baseY + 8u > localWorkSize.at(1) && j == 16u) { baseX += xDelta; if (baseX == localWorkSize.at(0)) { baseX = 0; } } currentX = baseX + ((currentX + 1) & (xDelta - 1)); EXPECT_EQ(buffer[i * 3 * rowWidth + j], currentX); } // validate Y row for (int j = 0; j < simd; j++) { if (simd * i + j == totalLocalIds) break; uint16_t expectedY = baseY + ((j / xDelta) & 0b111); if (expectedY >= localWorkSize.at(1)) { expectedY -= (localWorkSize.at(1) - baseY); } EXPECT_EQ(buffer[i * 3 * rowWidth + rowWidth + j], expectedY); } // validate Z row for (int j = 0; j < simd; j++) { if (simd * i + j == totalLocalIds) break; EXPECT_EQ(buffer[i * 3 * rowWidth + 2 * rowWidth + j], 0u); } } } uint16_t simd; uint16_t grfSize; uint8_t rowWidth; uint16_t xDelta; std::array localWorkSize; std::array dimensionsOrder = {{0u, 1u, 2u}}; uint32_t elemsInBuffer; uint32_t size; std::unique_ptr> memory; uint16_t *buffer; }; TEST(LocalIdsLayoutForImagesTest, givenLocalWorkSizeCompatibleWithLayoutForImagesWithDefaultDimensionsOrderWhenCheckLayoutForImagesCompatibilityThenReturnTrue) { std::array localWorkSize{{4u, 4u, 1u}}; std::array dimensionsOrder = {{0u, 1u, 2u}}; EXPECT_TRUE(isCompatibleWithLayoutForImages(localWorkSize, dimensionsOrder, 16u)); EXPECT_TRUE(isCompatibleWithLayoutForImages({{4u, 12u, 1u}}, dimensionsOrder, 32u)); } TEST(LocalIdsLayoutForImagesTest, givenLocalWorkSizeNotCompatibleWithLayoutForImagesWithDefaultDimensionsOrderWhenCheckLayoutForImagesCompatibilityThenReturnFalse) { std::array dimensionsOrder = {{0u, 1u, 2u}}; EXPECT_FALSE(isCompatibleWithLayoutForImages({{4u, 4u, 2u}}, dimensionsOrder, 8u)); EXPECT_FALSE(isCompatibleWithLayoutForImages({{2u, 5u, 1u}}, dimensionsOrder, 8u)); EXPECT_FALSE(isCompatibleWithLayoutForImages({{1u, 4u, 1u}}, dimensionsOrder, 8u)); } TEST(LocalIdsLayoutForImagesTest, given4x4x1LocalWorkSizeWithNonDefaultDimensionsOrderWhenCheckLayoutForImagesCompatibilityThenReturnFalse) { std::array localWorkSize{{2u, 4u, 1u}}; EXPECT_FALSE(isCompatibleWithLayoutForImages(localWorkSize, {{0, 2, 1}}, 8u)); EXPECT_FALSE(isCompatibleWithLayoutForImages(localWorkSize, {{1, 0, 2}}, 8u)); EXPECT_FALSE(isCompatibleWithLayoutForImages(localWorkSize, {{1, 2, 0}}, 8u)); EXPECT_FALSE(isCompatibleWithLayoutForImages(localWorkSize, {{2, 0, 1}}, 8u)); EXPECT_FALSE(isCompatibleWithLayoutForImages(localWorkSize, {{2, 1, 0}}, 8u)); } using LocalIdsLayoutTest = ::testing::TestWithParam; TEST_P(LocalIdsLayoutTest, givenLocalWorkgroupSize4x4x1WhenGenerateLocalIdsThenHasKernelImagesOnlyFlagDoesntMatter) { uint16_t simd = GetParam(); uint8_t rowWidth = simd == 32 ? 32 : 16; uint16_t xDelta = simd == 8u ? 2u : 4u; std::array localWorkSize{{xDelta, 4u, 1u}}; uint16_t totalLocalWorkSize = 4u * xDelta; auto dimensionsOrder = std::array{{0u, 1u, 2u}}; uint32_t grfSize = 32; auto elemsInBuffer = rowWidth * 3u; auto size = elemsInBuffer * sizeof(uint16_t); auto alignedMemory1 = allocateAlignedMemory(size, 32); auto buffer1 = reinterpret_cast(alignedMemory1.get()); memset(buffer1, 0xff, size); auto alignedMemory2 = allocateAlignedMemory(size, 32); auto buffer2 = reinterpret_cast(alignedMemory2.get()); memset(buffer2, 0xff, size); generateLocalIDs(buffer1, simd, localWorkSize, dimensionsOrder, false, grfSize); generateLocalIDs(buffer2, simd, localWorkSize, dimensionsOrder, true, grfSize); for (auto i = 0u; i < elemsInBuffer / rowWidth; i++) { for (auto j = 0u; j < rowWidth; j++) { if (j < totalLocalWorkSize) { auto offset = (i * rowWidth + j) * sizeof(uint16_t); auto cmpValue = memcmp(ptrOffset(buffer1, offset), ptrOffset(buffer2, offset), sizeof(uint16_t)); EXPECT_EQ(0, cmpValue); } } } } TEST_P(LocalIdsLayoutForImagesTest, givenLocalWorkgroupSizeCompatibleWithLayoutForImagesWhenGenerateLocalIdsWithKernelWithOnlyImagesThenAppliesLayoutForImages) { generateLocalIds(); validateGRF(); } #define SIMDParams ::testing::Values(8, 16, 32) #if HEAVY_DUTY_TESTING #define LWSXParams ::testing::Values(1, 7, 8, 9, 15, 16, 17, 31, 32, 33, 64, 128, 256) #define LWSYParams ::testing::Values(1, 2, 3, 4, 5, 6, 7, 8) #define LWSZParams ::testing::Values(1, 2, 3, 4) #else #define LWSXParams ::testing::Values(1, 7, 8, 9, 15, 16, 17, 31, 32, 33, 64, 128, 256) #define LWSYParams ::testing::Values(1, 2, 4, 8) #define LWSZParams ::testing::Values(1) #endif #define GRFSizeParams ::testing::Values(32) INSTANTIATE_TEST_CASE_P(AllCombinations, LocalIDFixture, ::testing::Combine(SIMDParams, GRFSizeParams, LWSXParams, LWSYParams, LWSZParams)); INSTANTIATE_TEST_CASE_P(LayoutTests, LocalIdsLayoutTest, SIMDParams); INSTANTIATE_TEST_CASE_P(LayoutForImagesTests, LocalIdsLayoutForImagesTest, ::testing::Combine(SIMDParams, GRFSizeParams, ::testing::Values(4, 8, 12, 20), ::testing::Values(4, 8, 12, 20))); // To debug a specific configuration replace the list of Values with specific values. // NOTE: You'll need a unique test prefix INSTANTIATE_TEST_CASE_P(SingleTest, LocalIDFixture, ::testing::Combine( ::testing::Values(32), //SIMD ::testing::Values(32), //GRF ::testing::Values(5), //LWSX ::testing::Values(6), //LWSY ::testing::Values(7))); //LWSZ compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/local_work_size_tests.cpp000066400000000000000000000617751363734646600316100ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "gtest/gtest.h" using namespace NEO; TEST(localWorkSizeTest, given1DimWorkGroupAndSimdEqual8WhenComputeCalledThenLocalGroupComputed) { //wsInfo maxWorkGroupSize, hasBariers, simdSize, slmTotalSize, coreFamily, numThreadsPerSubSlice, localMemorySize, imgUsed, yTiledSurface WorkSizeInfo wsInfo(256, 0u, 8, 0u, defaultHwInfo->platform.eRenderCoreFamily, 32u, 0u, false, false); uint32_t workDim = 1; size_t workGroup[3] = {6144, 1, 1}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 256u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 48; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 16u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 1536; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 256u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 333; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 9u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, given1DimWorkGroupAndSimdEqual32WhenComputeCalledThenLocalGroupComputed) { WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo->platform.eRenderCoreFamily, 32u, 0u, false, false); uint32_t workDim = 1; size_t workGroup[3] = {6144, 1, 1}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 256u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 48; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 48u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 512; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 256u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, given2DimWorkGroupAndSimdEqual8WhenComputeCalledThenLocalGroupComputed) { WorkSizeInfo wsInfo(256, 0u, 8, 0u, defaultHwInfo->platform.eRenderCoreFamily, 56u, 0u, false, false); uint32_t workDim = 2; size_t workGroup[3] = {384, 96, 1}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 128u); EXPECT_EQ(workGroupSize[1], 2u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 48; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 16u); ; EXPECT_EQ(workGroupSize[1], 16u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 12; workGroup[1] = 512; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 4u); EXPECT_EQ(workGroupSize[1], 64u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, given2DimWorkGroupAndSimdEqual32WhenComputeCalledThenLocalGroupComputed) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(false); WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo->platform.eRenderCoreFamily, 32u, 0u, false, false); uint32_t workDim = 2; size_t workGroup[3] = {384, 96, 1}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 128u); EXPECT_EQ(workGroupSize[1], 2u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 48; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 16u); EXPECT_EQ(workGroupSize[1], 16u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 12; workGroup[1] = 512; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 4u); EXPECT_EQ(workGroupSize[1], 64u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 1; workGroup[1] = 384; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 1u); EXPECT_EQ(workGroupSize[1], 128u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, given3DimWorkGroupAndSimdEqual8WhenComputeCalledThenLocalGroupComputed) { WorkSizeInfo wsInfo(256, 0u, 8, 0u, defaultHwInfo->platform.eRenderCoreFamily, 56u, 0u, false, false); uint32_t workDim = 3; size_t workGroup[3] = {384, 384, 384}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 128u); EXPECT_EQ(workGroupSize[1], 2u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 96; workGroup[1] = 4; workGroup[2] = 4; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 32u); EXPECT_EQ(workGroupSize[1], 4u); EXPECT_EQ(workGroupSize[2], 2u); workGroup[0] = 12; workGroup[1] = 512; workGroup[2] = 48; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 4u); EXPECT_EQ(workGroupSize[1], 64u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 2; workGroup[1] = 2; workGroup[2] = 3; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 2u); EXPECT_EQ(workGroupSize[1], 2u); EXPECT_EQ(workGroupSize[2], 3u); } TEST(localWorkSizeTest, given3DimWorkGroupAndSimdEqual32WhenComputeCalledThenLocalGroupComputed) { NEO::WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo->platform.eRenderCoreFamily, 32u, 0u, false, false); uint32_t workDim = 3; size_t workGroup[3] = {384, 384, 384}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 128u); EXPECT_EQ(workGroupSize[1], 2u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 96; workGroup[1] = 6; workGroup[2] = 4; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 32u); EXPECT_EQ(workGroupSize[1], 2u); EXPECT_EQ(workGroupSize[2], 4u); workGroup[0] = 12; workGroup[1] = 512; workGroup[2] = 48; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 4u); EXPECT_EQ(workGroupSize[1], 64u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 6; workGroup[1] = 4; workGroup[2] = 64; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 2u); EXPECT_EQ(workGroupSize[1], 4u); EXPECT_EQ(workGroupSize[2], 32u); workGroup[0] = 113; workGroup[1] = 113; workGroup[2] = 113; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 113u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, given2DimWorkGroupAndSquaredAlgorithmWhenComputeCalledThenLocalGroupComputed) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(true); WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo->platform.eRenderCoreFamily, 32u, 0u, false, false); uint32_t workDim = 2; size_t workGroup[3] = {384, 96, 1}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 16u); EXPECT_EQ(workGroupSize[1], 16u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, given1DimWorkGroupAndSquaredAlgorithmOnWhenComputeCalledThenSquaredAlgorithmIsNotExecuted) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(true); WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo->platform.eRenderCoreFamily, 32u, 0u, false, false); uint32_t workDim = 1; size_t workGroup[3] = {1024, 1, 1}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 256u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, given2DdispatchWithImagesAndSquaredAlgorithmOnWhenLwsIsComputedThenSquaredAlgorithmIsNotExecuted) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(true); WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo->platform.eRenderCoreFamily, 32u, 0u, true, false); uint32_t workDim = 2; size_t workGroup[3] = {256, 96, 1}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 64u); EXPECT_EQ(workGroupSize[1], 4u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenKernelWithTileYImagesAndBarrierWhenWorkgroupSizeIsComputedThenItMimicsTilingPattern) { WorkSizeInfo wsInfo(256, true, 32, 0u, defaultHwInfo->platform.eRenderCoreFamily, 32u, 0u, true, true); uint32_t workDim = 2; size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; workGroup[0] = 2048; workGroup[1] = 2048; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 32u); EXPECT_EQ(workGroupSize[1], 8u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 1920; workGroup[1] = 1080; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 32u); EXPECT_EQ(workGroupSize[1], 8u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenKernelWithTileYImagesAndNoBarriersWhenWorkgroupSizeIsComputedThenItMimicsTilingPattern) { WorkSizeInfo wsInfo(256, false, 32, 0u, defaultHwInfo->platform.eRenderCoreFamily, 32u, 0u, true, true); uint32_t workDim = 2; size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; workGroup[0] = 2048; workGroup[1] = 2048; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 64u); EXPECT_EQ(workGroupSize[1], 4u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 1920; workGroup[1] = 1080; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 64u); EXPECT_EQ(workGroupSize[1], 4u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenSimd16KernelWithTileYImagesAndNoBarriersWhenWorkgroupSizeIsComputedThenItMimicsTilingPattern) { WorkSizeInfo wsInfo(256, false, 16, 0u, defaultHwInfo->platform.eRenderCoreFamily, 32u, 0u, true, true); uint32_t workDim = 2; size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; workGroup[0] = 2048; workGroup[1] = 2048; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 32u); EXPECT_EQ(workGroupSize[1], 4u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 1920; workGroup[1] = 1080; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 32u); EXPECT_EQ(workGroupSize[1], 4u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenKernelWithTwoDimensionalGlobalSizesWhenLwsIsComputedThenItHasMaxWorkgroupSize) { WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo->platform.eRenderCoreFamily, 32u, 0u, false, false); uint32_t workDim = 2; size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; workGroup[0] = 1024; workGroup[1] = 1024; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 256u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenKernelWithBarriersAndTiledImagesWithYdimensionHigherThenXDimensionWhenLwsIsComputedThenItMimicsTiling) { WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo->platform.eRenderCoreFamily, 32u, 0u, true, true); uint32_t workDim = 2; size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; workGroup[0] = 256; workGroup[1] = 1024; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 64u); EXPECT_EQ(workGroupSize[1], 4u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 512; workGroup[1] = 2048; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 64u); EXPECT_EQ(workGroupSize[1], 4u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 480; workGroup[1] = 1080; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 32u); EXPECT_EQ(workGroupSize[1], 4u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 196; workGroup[1] = 30; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 49u); EXPECT_EQ(workGroupSize[1], 5u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenHighOneDimensionalGwsWhenLwsIsComputedThenMaxWorkgoupSizeIsUsed) { WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo->platform.eRenderCoreFamily, 32u, 0u, false, false); uint32_t workDim = 2; size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; workGroup[0] = 65536; workGroup[1] = 1; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 256u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 524288; workGroup[1] = 1; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 256u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenVeriousGwsSizesWithImagesWhenLwsIsComputedThenProperSizesAreReturned) { WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo->platform.eRenderCoreFamily, 32u, 0u, true, true); uint32_t workDim = 2; size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; workGroup[0] = 256; workGroup[1] = 1024; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 64u); EXPECT_EQ(workGroupSize[1], 4u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 512; workGroup[1] = 2048; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 64u); EXPECT_EQ(workGroupSize[1], 4u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 208; workGroup[1] = 2; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 16u); EXPECT_EQ(workGroupSize[1], 2u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 6; workGroup[1] = 128; wsInfo.simdSize = 8; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 2u); EXPECT_EQ(workGroupSize[1], 4u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 3; workGroup[1] = 128; wsInfo.simdSize = 8; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 1u); EXPECT_EQ(workGroupSize[1], 128u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenHigh1DGwsAndSimdSize16WhenLwsIsComputedThenMaxWorkgroupSizeIsChoosen) { WorkSizeInfo wsInfo(256u, 0u, 16, 0u, defaultHwInfo->platform.eRenderCoreFamily, 56u, 0, false, false); size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; workGroup[0] = 1048576; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, 1); EXPECT_EQ(workGroupSize[0], 256u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenHigh1DGwsAndSimdSize8WhenLwsIsComputedThenMaxWorkgroupSizeIsChoosen) { WorkSizeInfo wsInfo(256u, 0u, 8, 0u, defaultHwInfo->platform.eRenderCoreFamily, 32u, 0, false, false); size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; workGroup[0] = 1048576; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, 1); EXPECT_EQ(workGroupSize[0], 256u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenKernelUtilizingImagesAndSlmWhenLwsIsBeingComputedThenItMimicsGlobalWorkgroupSizes) { WorkSizeInfo wsInfo(256u, 1u, 32, 4096u, defaultHwInfo->platform.eRenderCoreFamily, 56u, 65536u, true, true); uint32_t workDim = 2; size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; workGroup[0] = 2048; workGroup[1] = 2048; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 16u); EXPECT_EQ(workGroupSize[1], 16u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 1920; workGroup[1] = 1080; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 32u); EXPECT_EQ(workGroupSize[1], 8u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, GivenUseStrictRatioWhenLwsIsBeingComputedThenWgsIsCalculatedCorrectly) { WorkSizeInfo wsInfo(256u, 0u, 32u, 0u, defaultHwInfo->platform.eRenderCoreFamily, 0u, 0u, true, true); uint32_t workDim = 2; size_t workGroup[3] = {194, 234, 1}; size_t workGroupSize[3]; workGroup[0] = 194; workGroup[1] = 234; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 2u); EXPECT_EQ(workGroupSize[1], 117u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 100; workGroup[1] = 100; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 20u); EXPECT_EQ(workGroupSize[1], 5u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 54; workGroup[1] = 154; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 27u); EXPECT_EQ(workGroupSize[1], 7u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, GivenUseBarriersWhenLwsIsBeingComputedThenWgsIsCalculatedCorrectly) { WorkSizeInfo wsInfo(256u, 1u, 32u, 0u, defaultHwInfo->platform.eRenderCoreFamily, 56u, 0u, true, true); uint32_t workDim = 2; size_t workGroup[3] = {194, 234, 1}; size_t workGroupSize[3]; workGroup[0] = 194; workGroup[1] = 234; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 97u); EXPECT_EQ(workGroupSize[1], 2u); EXPECT_EQ(workGroupSize[2], 1u); wsInfo.yTiledSurfaces = false; wsInfo.imgUsed = false; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 2u); EXPECT_EQ(workGroupSize[1], 78u); EXPECT_EQ(workGroupSize[2], 1u); NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 2u); EXPECT_EQ(workGroupSize[1], 78u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, given2DimWorkWhenComputeSquaredCalledThenLocalGroupComputed) { WorkSizeInfo wsInfo(256, 0u, 16, 0u, defaultHwInfo->platform.eRenderCoreFamily, 6u, 0u, false, false); uint32_t workDim = 2; size_t workGroup[3] = {2048, 272, 1}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeSquared(wsInfo.maxWorkGroupSize, workGroupSize, workGroup, wsInfo.simdSize, workDim); EXPECT_EQ(workGroupSize[0], 16u); EXPECT_EQ(workGroupSize[1], 16u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 1024; workGroup[1] = 1024; NEO::computeWorkgroupSizeSquared(wsInfo.maxWorkGroupSize, workGroupSize, workGroup, wsInfo.simdSize, workDim); EXPECT_EQ(workGroupSize[0], 16u); EXPECT_EQ(workGroupSize[1], 16u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 512; workGroup[1] = 104; NEO::computeWorkgroupSizeSquared(wsInfo.maxWorkGroupSize, workGroupSize, workGroup, wsInfo.simdSize, workDim); EXPECT_EQ(workGroupSize[0], 32u); EXPECT_EQ(workGroupSize[1], 8u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 104; workGroup[1] = 512; NEO::computeWorkgroupSizeSquared(wsInfo.maxWorkGroupSize, workGroupSize, workGroup, wsInfo.simdSize, workDim); EXPECT_EQ(workGroupSize[0], 8u); EXPECT_EQ(workGroupSize[1], 32u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 184; workGroup[1] = 368; NEO::computeWorkgroupSizeSquared(wsInfo.maxWorkGroupSize, workGroupSize, workGroup, wsInfo.simdSize, workDim); EXPECT_EQ(workGroupSize[0], 8u); EXPECT_EQ(workGroupSize[1], 16u); EXPECT_EQ(workGroupSize[2], 1u); workGroup[0] = 113; workGroup[1] = 2; NEO::computeWorkgroupSizeSquared(wsInfo.maxWorkGroupSize, workGroupSize, workGroup, wsInfo.simdSize, workDim); EXPECT_EQ(workGroupSize[0], 113u); EXPECT_EQ(workGroupSize[1], 2u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenDeviceSupportingLws1024AndKernelCompiledInSimd8WhenGwsIs1024ThenLwsIsComputedAsMaxOptimalMultipliedBySimd) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(false); WorkSizeInfo wsInfo(1024, 0u, 8, 0u, defaultHwInfo->platform.eRenderCoreFamily, 56u, 0u, false, false); uint32_t workDim = 2; size_t workGroup[3] = {32, 32, 1}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 32u); EXPECT_EQ(workGroupSize[1], 8u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenDeviceWith36ThreadsPerSubsliceWhenSimd16KernelIsBeingSubmittedThenWorkgroupContainsOf8HwThreads) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(false); WorkSizeInfo wsInfo(256, 0u, 16, 0u, defaultHwInfo->platform.eRenderCoreFamily, 36u, 0u, false, false); uint32_t workDim = 2; size_t workGroup[3] = {1024, 1024, 1}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 128u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenDeviceWith56ThreadsPerSubsliceWhenSimd16KernelIsBeingSubmittedThenWorkgroupContainsOf16HwThreads) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(false); WorkSizeInfo wsInfo(256, 0u, 16, 0u, defaultHwInfo->platform.eRenderCoreFamily, 56u, 0u, false, false); uint32_t workDim = 2; size_t workGroup[3] = {1024, 1024, 1}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 256u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); } TEST(localWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenItHasCorrectNumberOfThreads) { MockClDevice device{new MockDevice}; MockKernelWithInternals kernel(device); DispatchInfo dispatchInfo; dispatchInfo.setKernel(kernel.mockKernel); auto threadsPerEu = defaultHwInfo->gtSystemInfo.ThreadCount / defaultHwInfo->gtSystemInfo.EUCount; auto euPerSubSlice = defaultHwInfo->gtSystemInfo.ThreadCount / defaultHwInfo->gtSystemInfo.MaxEuPerSubSlice; auto &deviceInfo = device.sharedDeviceInfo; deviceInfo.maxNumEUsPerSubSlice = euPerSubSlice; deviceInfo.numThreadsPerEU = threadsPerEu; WorkSizeInfo workSizeInfo(dispatchInfo); EXPECT_EQ(workSizeInfo.numThreadsPerSubSlice, threadsPerEu * euPerSubSlice); } TEST(localWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenHasBarriersIsCorrectlySet) { MockClDevice device{new MockDevice}; MockKernelWithInternals kernel(device); DispatchInfo dispatchInfo; dispatchInfo.setKernel(kernel.mockKernel); kernel.kernelInfo.patchInfo.executionEnvironment = nullptr; EXPECT_FALSE(WorkSizeInfo{dispatchInfo}.hasBarriers); kernel.executionEnvironment.HasBarriers = 0; kernel.kernelInfo.patchInfo.executionEnvironment = &kernel.executionEnvironment; EXPECT_FALSE(WorkSizeInfo{dispatchInfo}.hasBarriers); kernel.executionEnvironment.HasBarriers = 1; EXPECT_TRUE(WorkSizeInfo{dispatchInfo}.hasBarriers); } TEST(localWorkSizeTest, givenMaxWorkgroupSizeEqualToSimdSizeWhenLwsIsCalculatedThenItIsDownsizedToMaxWorkgroupSize) { WorkSizeInfo wsInfo(32, 0u, 32, 0u, defaultHwInfo->platform.eRenderCoreFamily, 32u, 0u, false, false); uint32_t workDim = 2; size_t workGroup[3] = {32, 32, 1}; size_t workGroupSize[3]; NEO::computeWorkgroupSizeND(wsInfo, workGroupSize, workGroup, workDim); EXPECT_EQ(workGroupSize[0], 32u); EXPECT_EQ(workGroupSize[1], 1u); EXPECT_EQ(workGroupSize[2], 1u); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/multi_dispatch_info_tests.cpp000066400000000000000000000012431363734646600324260ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "test.h" using namespace NEO; struct MultiDispatchInfoTest : public ::testing::Test { void SetUp() override { } void TearDown() override { } }; TEST_F(MultiDispatchInfoTest, GivenNullKernelWhenCreatingMultiDispatchInfoThenExpectationsAreMet) { MockMultiDispatchInfo multiDispatchInfo(nullptr); EXPECT_FALSE(multiDispatchInfo.begin()->usesSlm()); EXPECT_FALSE(multiDispatchInfo.begin()->usesStatelessPrintfSurface()); EXPECT_EQ(0u, multiDispatchInfo.begin()->getRequiredScratchSize()); }compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/multiple_map_buffer_tests.cpp000066400000000000000000000400731363734646600324270ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/event/user_event.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" using namespace NEO; struct MultipleMapBufferTest : public DeviceFixture, public ::testing::Test { template struct MockBuffer : public BufferHw { using Buffer::mapOperationsHandler; template MockBuffer(Params... params) : BufferHw(params...) { this->createFunction = BufferHw::create; }; void transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override { this->copySize = copySize[0]; this->copyOffset = copyOffset[0]; transferToHostPtrCalled++; }; void transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override { this->copySize = copySize[0]; this->copyOffset = copyOffset[0]; transferFromHostPtrCalled++; }; size_t copySize = 0; size_t copyOffset = 0; uint32_t transferToHostPtrCalled = 0; uint32_t transferFromHostPtrCalled = 0; }; template struct MockCmdQ : public CommandQueueHw { MockCmdQ(Context *context, ClDevice *device) : CommandQueueHw(context, device, 0, false) {} cl_int enqueueReadBuffer(Buffer *buffer, cl_bool blockingRead, size_t offset, size_t size, void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { enqueueSize = size; enqueueOffset = offset; readBufferCalled++; if (failOnReadBuffer) { return CL_OUT_OF_RESOURCES; } return CommandQueueHw::enqueueReadBuffer(buffer, blockingRead, offset, size, ptr, mapAllocation, numEventsInWaitList, eventWaitList, event); } cl_int enqueueWriteBuffer(Buffer *buffer, cl_bool blockingWrite, size_t offset, size_t cb, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { enqueueSize = cb; enqueueOffset = offset; unmapPtr = ptr; writeBufferCalled++; if (failOnWriteBuffer) { return CL_OUT_OF_RESOURCES; } return CommandQueueHw::enqueueWriteBuffer(buffer, blockingWrite, offset, cb, ptr, mapAllocation, numEventsInWaitList, eventWaitList, event); } cl_int enqueueMarkerWithWaitList(cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { enqueueMarkerCalled++; return CommandQueueHw::enqueueMarkerWithWaitList(numEventsInWaitList, eventWaitList, event); } uint32_t writeBufferCalled = 0; uint32_t readBufferCalled = 0; uint32_t enqueueMarkerCalled = 0; bool failOnReadBuffer = false; bool failOnWriteBuffer = false; size_t enqueueSize = 0; size_t enqueueOffset = 0; const void *unmapPtr = nullptr; }; template std::unique_ptr> createMockBuffer(bool mapOnGpu) { MemoryPropertiesFlags memoryProperties; auto mockAlloc = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); auto buffer = new MockBuffer(context, memoryProperties, 0, 0, 1024, mockAlloc->getUnderlyingBuffer(), mockAlloc->getUnderlyingBuffer(), mockAlloc, false, false, false); if (mapOnGpu) { buffer->setSharingHandler(new SharingHandler()); } return std::unique_ptr>(buffer); } template std::unique_ptr> createMockCmdQ() { return std::unique_ptr>(new MockCmdQ(context, pClDevice)); } void SetUp() override { DeviceFixture::SetUp(); context = new MockContext(pClDevice); } void TearDown() override { delete context; DeviceFixture::TearDown(); } MockContext *context = nullptr; cl_int retVal = CL_INVALID_VALUE; }; HWTEST_F(MultipleMapBufferTest, givenValidReadAndWriteBufferWhenMappedOnGpuThenAddMappedPtrAndRemoveOnUnmap) { auto buffer = createMockBuffer(true); auto cmdQ = createMockCmdQ(); EXPECT_FALSE(buffer->mappingOnCpuAllowed()); size_t offset = 1; size_t size = 3; void *mappedPtr = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_WRITE, offset, size, 0, nullptr, nullptr, nullptr); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, buffer->mapOperationsHandler.size()); EXPECT_EQ(cmdQ->readBufferCalled, 1u); EXPECT_EQ(cmdQ->enqueueSize, size); EXPECT_EQ(cmdQ->enqueueOffset, offset); retVal = clEnqueueUnmapMemObject(cmdQ.get(), buffer.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(0u, buffer->mapOperationsHandler.size()); EXPECT_EQ(cmdQ->writeBufferCalled, 1u); EXPECT_EQ(cmdQ->enqueueSize, size); EXPECT_EQ(cmdQ->enqueueOffset, offset); EXPECT_EQ(cmdQ->unmapPtr, mappedPtr); } HWTEST_F(MultipleMapBufferTest, givenReadOnlyMapWhenUnmappedOnGpuThenEnqueueMarker) { auto buffer = createMockBuffer(true); auto cmdQ = createMockCmdQ(); EXPECT_FALSE(buffer->mappingOnCpuAllowed()); size_t offset = 1; size_t size = 3; void *mappedPtr = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_READ, offset, size, 0, nullptr, nullptr, nullptr); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, buffer->mapOperationsHandler.size()); EXPECT_EQ(cmdQ->readBufferCalled, 1u); retVal = clEnqueueUnmapMemObject(cmdQ.get(), buffer.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(0u, buffer->mapOperationsHandler.size()); EXPECT_EQ(cmdQ->writeBufferCalled, 0u); EXPECT_EQ(cmdQ->enqueueMarkerCalled, 1u); } HWTEST_F(MultipleMapBufferTest, givenNotMappedPtrWhenUnmapedOnGpuThenReturnError) { auto buffer = createMockBuffer(true); auto cmdQ = createMockCmdQ(); EXPECT_FALSE(buffer->mappingOnCpuAllowed()); EXPECT_EQ(0u, buffer->mapOperationsHandler.size()); retVal = clEnqueueUnmapMemObject(cmdQ.get(), buffer.get(), buffer->getBasePtrForMap(cmdQ->getDevice().getRootDeviceIndex()), 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWTEST_F(MultipleMapBufferTest, givenErrorFromReadBufferWhenMappedOnGpuThenDontAddMappedPtr) { auto buffer = createMockBuffer(true); auto cmdQ = createMockCmdQ(); EXPECT_FALSE(buffer->mappingOnCpuAllowed()); cmdQ->failOnReadBuffer = true; size_t offset = 1; size_t size = 3; void *mappedPtr = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_READ, offset, size, 0, nullptr, nullptr, &retVal); EXPECT_EQ(nullptr, mappedPtr); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); EXPECT_EQ(0u, buffer->mapOperationsHandler.size()); } HWTEST_F(MultipleMapBufferTest, givenErrorFromWriteBufferWhenUnmappedOnGpuThenDontRemoveMappedPtr) { auto buffer = createMockBuffer(true); auto cmdQ = createMockCmdQ(); EXPECT_FALSE(buffer->mappingOnCpuAllowed()); cmdQ->failOnWriteBuffer = true; size_t offset = 1; size_t size = 3; void *mappedPtr = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_WRITE, offset, size, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, buffer->mapOperationsHandler.size()); retVal = clEnqueueUnmapMemObject(cmdQ.get(), buffer.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(1u, cmdQ->writeBufferCalled); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); EXPECT_EQ(1u, buffer->mapOperationsHandler.size()); } HWTEST_F(MultipleMapBufferTest, givenUnblockedQueueWhenMappedOnCpuThenAddMappedPtrAndRemoveOnUnmap) { auto buffer = createMockBuffer(false); auto cmdQ = createMockCmdQ(); EXPECT_TRUE(buffer->mappingOnCpuAllowed()); size_t offset = 1; size_t size = 3; void *mappedPtr = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_WRITE, offset, size, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, buffer->mapOperationsHandler.size()); EXPECT_EQ(1u, buffer->transferToHostPtrCalled); EXPECT_EQ(buffer->copySize, size); EXPECT_EQ(buffer->copyOffset, offset); retVal = clEnqueueUnmapMemObject(cmdQ.get(), buffer.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(0u, buffer->mapOperationsHandler.size()); EXPECT_EQ(1u, buffer->transferFromHostPtrCalled); EXPECT_EQ(buffer->copySize, size); EXPECT_EQ(buffer->copyOffset, offset); } HWTEST_F(MultipleMapBufferTest, givenUnblockedQueueWhenReadOnlyMappedOnCpuThenDontMakeCpuCopy) { auto buffer = createMockBuffer(false); auto cmdQ = createMockCmdQ(); EXPECT_TRUE(buffer->mappingOnCpuAllowed()); size_t offset = 1; size_t size = 3; void *mappedPtr = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_READ, offset, size, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, buffer->mapOperationsHandler.size()); EXPECT_EQ(1u, buffer->transferToHostPtrCalled); retVal = clEnqueueUnmapMemObject(cmdQ.get(), buffer.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(0u, buffer->mapOperationsHandler.size()); EXPECT_EQ(0u, buffer->transferFromHostPtrCalled); } HWTEST_F(MultipleMapBufferTest, givenBlockedQueueWhenMappedOnCpuThenAddMappedPtrAndRemoveOnUnmap) { auto buffer = createMockBuffer(false); auto cmdQ = createMockCmdQ(); EXPECT_TRUE(buffer->mappingOnCpuAllowed()); UserEvent mapEvent, unmapEvent; cl_event clMapEvent = &mapEvent; cl_event clUnmapEvent = &unmapEvent; size_t offset = 1; size_t size = 3; void *mappedPtr = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_WRITE, offset, size, 1, &clMapEvent, nullptr, &retVal); mapEvent.setStatus(CL_COMPLETE); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, buffer->mapOperationsHandler.size()); EXPECT_EQ(buffer->copySize, size); EXPECT_EQ(buffer->copyOffset, offset); EXPECT_EQ(1u, buffer->transferToHostPtrCalled); retVal = clEnqueueUnmapMemObject(cmdQ.get(), buffer.get(), mappedPtr, 1, &clUnmapEvent, nullptr); unmapEvent.setStatus(CL_COMPLETE); EXPECT_EQ(0u, buffer->mapOperationsHandler.size()); EXPECT_EQ(buffer->copySize, size); EXPECT_EQ(buffer->copyOffset, offset); EXPECT_EQ(1u, buffer->transferFromHostPtrCalled); } HWTEST_F(MultipleMapBufferTest, givenBlockedQueueWhenMappedReadOnlyOnCpuThenDontMakeCpuCopy) { auto buffer = createMockBuffer(false); auto cmdQ = createMockCmdQ(); EXPECT_TRUE(buffer->mappingOnCpuAllowed()); UserEvent mapEvent, unmapEvent; cl_event clMapEvent = &mapEvent; cl_event clUnmapEvent = &unmapEvent; size_t offset = 1; size_t size = 3; void *mappedPtr = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_READ, offset, size, 1, &clMapEvent, nullptr, &retVal); mapEvent.setStatus(CL_COMPLETE); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, buffer->transferToHostPtrCalled); EXPECT_EQ(1u, buffer->mapOperationsHandler.size()); retVal = clEnqueueUnmapMemObject(cmdQ.get(), buffer.get(), mappedPtr, 1, &clUnmapEvent, nullptr); unmapEvent.setStatus(CL_COMPLETE); EXPECT_EQ(0u, buffer->mapOperationsHandler.size()); EXPECT_EQ(0u, buffer->transferFromHostPtrCalled); } HWTEST_F(MultipleMapBufferTest, givenInvalidPtrWhenUnmappedOnCpuThenReturnError) { auto buffer = createMockBuffer(false); auto cmdQ = createMockCmdQ(); EXPECT_TRUE(buffer->mappingOnCpuAllowed()); retVal = clEnqueueUnmapMemObject(cmdQ.get(), buffer.get(), buffer->getBasePtrForMap(cmdQ->getDevice().getRootDeviceIndex()), 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWTEST_F(MultipleMapBufferTest, givenMultimpleMapsWhenUnmappingThenRemoveCorrectPointers) { auto buffer = createMockBuffer(true); auto cmdQ = createMockCmdQ(); MapInfo mappedPtrs[3] = { {nullptr, 1, {{1, 0, 0}}, {{1, 0, 0}}, 0}, {nullptr, 1, {{2, 0, 0}}, {{2, 0, 0}}, 0}, {nullptr, 1, {{5, 0, 0}}, {{5, 0, 0}}, 0}, }; for (size_t i = 0; i < 3; i++) { mappedPtrs[i].ptr = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_WRITE, mappedPtrs[i].offset[0], mappedPtrs[i].size[0], 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtrs[i].ptr); EXPECT_EQ(i + 1, buffer->mapOperationsHandler.size()); EXPECT_EQ(cmdQ->enqueueSize, mappedPtrs[i].size[0]); EXPECT_EQ(cmdQ->enqueueOffset, mappedPtrs[i].offset[0]); } // reordered unmap clEnqueueUnmapMemObject(cmdQ.get(), buffer.get(), mappedPtrs[1].ptr, 0, nullptr, nullptr); EXPECT_EQ(2u, buffer->mapOperationsHandler.size()); EXPECT_EQ(cmdQ->unmapPtr, mappedPtrs[1].ptr); EXPECT_EQ(cmdQ->enqueueSize, mappedPtrs[1].size[0]); EXPECT_EQ(cmdQ->enqueueOffset, mappedPtrs[1].offset[0]); clEnqueueUnmapMemObject(cmdQ.get(), buffer.get(), mappedPtrs[2].ptr, 0, nullptr, nullptr); EXPECT_EQ(1u, buffer->mapOperationsHandler.size()); EXPECT_EQ(cmdQ->unmapPtr, mappedPtrs[2].ptr); EXPECT_EQ(cmdQ->enqueueSize, mappedPtrs[2].size[0]); EXPECT_EQ(cmdQ->enqueueOffset, mappedPtrs[2].offset[0]); clEnqueueUnmapMemObject(cmdQ.get(), buffer.get(), mappedPtrs[0].ptr, 0, nullptr, nullptr); EXPECT_EQ(0u, buffer->mapOperationsHandler.size()); EXPECT_EQ(cmdQ->unmapPtr, mappedPtrs[0].ptr); EXPECT_EQ(cmdQ->enqueueSize, mappedPtrs[0].size[0]); EXPECT_EQ(cmdQ->enqueueOffset, mappedPtrs[0].offset[0]); } HWTEST_F(MultipleMapBufferTest, givenOverlapingPtrWhenMappingOnGpuForWriteThenReturnError) { auto buffer = createMockBuffer(true); auto cmdQ = createMockCmdQ(); EXPECT_FALSE(buffer->mappingOnCpuAllowed()); size_t offset = 1; size_t size = 3; void *mappedPtr = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_READ, offset, size, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, buffer->mapOperationsHandler.size()); offset++; void *mappedPtr2 = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_WRITE, offset, size, 0, nullptr, nullptr, &retVal); EXPECT_EQ(nullptr, mappedPtr2); EXPECT_EQ(CL_INVALID_OPERATION, retVal); EXPECT_EQ(1u, buffer->mapOperationsHandler.size()); } HWTEST_F(MultipleMapBufferTest, givenOverlapingPtrWhenMappingOnCpuForWriteThenReturnError) { auto buffer = createMockBuffer(false); auto cmdQ = createMockCmdQ(); EXPECT_TRUE(buffer->mappingOnCpuAllowed()); size_t offset = 1; size_t size = 3; void *mappedPtr = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_READ, offset, size, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, buffer->mapOperationsHandler.size()); offset++; void *mappedPtr2 = clEnqueueMapBuffer(cmdQ.get(), buffer.get(), CL_FALSE, CL_MAP_WRITE, offset, size, 0, nullptr, nullptr, &retVal); EXPECT_EQ(nullptr, mappedPtr2); EXPECT_EQ(CL_INVALID_OPERATION, retVal); EXPECT_EQ(1u, buffer->mapOperationsHandler.size()); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/multiple_map_image_tests.cpp000066400000000000000000000466641363734646600322540ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" namespace NEO { extern ImageFuncs imageFactory[IGFX_MAX_CORE]; struct MultipleMapImageTest : public DeviceFixture, public ::testing::Test { template struct MockImage : public ImageHw { using Image::mapOperationsHandler; using ImageHw::isZeroCopy; using ImageHw::ImageHw; static Image *createMockImage(Context *context, const MemoryPropertiesFlags &memoryProperties, uint64_t flags, uint64_t flagsIntel, size_t size, void *hostPtr, const cl_image_format &imageFormat, const cl_image_desc &imageDesc, bool zeroCopy, GraphicsAllocation *graphicsAllocation, bool isObjectRedescribed, uint32_t baseMipLevel, uint32_t mipCount, const ClSurfaceFormatInfo *surfaceFormatInfo, const SurfaceOffsets *surfaceOffsets) { return new MockImage(context, memoryProperties, flags, flagsIntel, size, hostPtr, imageFormat, imageDesc, zeroCopy, graphicsAllocation, isObjectRedescribed, baseMipLevel, mipCount, *surfaceFormatInfo, surfaceOffsets); }; void transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override { copyRegion = copySize; copyOrigin = copyOffset; transferToHostPtrCalled++; }; void transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override { copyRegion = copySize; copyOrigin = copyOffset; transferFromHostPtrCalled++; }; MemObjSizeArray copyRegion = {{0, 0, 0}}; MemObjOffsetArray copyOrigin = {{0, 0, 0}}; uint32_t transferToHostPtrCalled = 0; uint32_t transferFromHostPtrCalled = 0; }; template struct MockCmdQ : public CommandQueueHw { MockCmdQ(Context *context, ClDevice *device) : CommandQueueHw(context, device, 0, false) {} cl_int enqueueReadImage(Image *srcImage, cl_bool blockingRead, const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch, void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { enqueueRegion = {{region[0], region[1], region[2]}}; enqueueOrigin = {{origin[0], origin[1], origin[2]}}; readImageCalled++; if (failOnReadImage) { return CL_OUT_OF_RESOURCES; } return CommandQueueHw::enqueueReadImage(srcImage, blockingRead, origin, region, rowPitch, slicePitch, ptr, mapAllocation, numEventsInWaitList, eventWaitList, event); } cl_int enqueueWriteImage(Image *dstImage, cl_bool blockingWrite, const size_t *origin, const size_t *region, size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { enqueueRegion = {{region[0], region[1], region[2]}}; enqueueOrigin = {{origin[0], origin[1], origin[2]}}; unmapPtr = ptr; writeImageCalled++; if (failOnWriteImage) { return CL_OUT_OF_RESOURCES; } return CommandQueueHw::enqueueWriteImage(dstImage, blockingWrite, origin, region, inputRowPitch, inputSlicePitch, ptr, mapAllocation, numEventsInWaitList, eventWaitList, event); } cl_int enqueueMarkerWithWaitList(cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { enqueueMarkerCalled++; return CommandQueueHw::enqueueMarkerWithWaitList(numEventsInWaitList, eventWaitList, event); } uint32_t writeImageCalled = 0; uint32_t readImageCalled = 0; uint32_t enqueueMarkerCalled = 0; bool failOnReadImage = false; bool failOnWriteImage = false; MemObjSizeArray enqueueRegion = {{0, 0, 0}}; MemObjOffsetArray enqueueOrigin = {{0, 0, 0}}; const void *unmapPtr = nullptr; }; template std::unique_ptr> createMockImage() { auto eRenderCoreFamily = pDevice->getHardwareInfo().platform.eRenderCoreFamily; VariableBackup backup(&imageFactory[eRenderCoreFamily].createImageFunction); imageFactory[eRenderCoreFamily].createImageFunction = MockImage::createMockImage; auto surfaceFormat = Image::getSurfaceFormatFromTable(Traits::flags, &Traits::imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); cl_int retVal = CL_SUCCESS; auto img = Image::create(context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(Traits::flags, 0, 0), Traits::flags, 0, surfaceFormat, &Traits::imageDesc, Traits::hostPtr, retVal); auto mockImage = static_cast *>(img); return std::unique_ptr>(mockImage); } template std::unique_ptr> createMockCmdQ() { return std::unique_ptr>(new MockCmdQ(context, pClDevice)); } void SetUp() override { DeviceFixture::SetUp(); context = new MockContext(pClDevice); } void TearDown() override { delete context; DeviceFixture::TearDown(); } MockContext *context = nullptr; cl_int retVal = CL_INVALID_VALUE; }; HWTEST_F(MultipleMapImageTest, givenValidReadAndWriteImageWhenMappedOnGpuThenAddMappedPtrAndRemoveOnUnmap) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } auto image = createMockImage(); auto cmdQ = createMockCmdQ(); EXPECT_FALSE(image->mappingOnCpuAllowed()); MemObjOffsetArray origin = {{1, 2, 1}}; MemObjSizeArray region = {{3, 4, 1}}; void *mappedPtr = clEnqueueMapImage(cmdQ.get(), image.get(), CL_TRUE, CL_MAP_WRITE, &origin[0], ®ion[0], nullptr, nullptr, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, image->mapOperationsHandler.size()); EXPECT_EQ(cmdQ->enqueueRegion, region); EXPECT_EQ(cmdQ->enqueueOrigin, origin); EXPECT_EQ(cmdQ->readImageCalled, 1u); retVal = clEnqueueUnmapMemObject(cmdQ.get(), image.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(0u, image->mapOperationsHandler.size()); EXPECT_EQ(cmdQ->enqueueRegion, region); EXPECT_EQ(cmdQ->enqueueOrigin, origin); EXPECT_EQ(cmdQ->unmapPtr, mappedPtr); EXPECT_EQ(cmdQ->writeImageCalled, 1u); } HWTEST_F(MultipleMapImageTest, givenReadOnlyMapWhenUnmappedOnGpuThenEnqueueMarker) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } auto image = createMockImage(); auto cmdQ = createMockCmdQ(); EXPECT_FALSE(image->mappingOnCpuAllowed()); MemObjOffsetArray origin = {{1, 2, 1}}; MemObjSizeArray region = {{3, 4, 1}}; void *mappedPtr = clEnqueueMapImage(cmdQ.get(), image.get(), CL_TRUE, CL_MAP_READ, &origin[0], ®ion[0], nullptr, nullptr, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, image->mapOperationsHandler.size()); EXPECT_EQ(cmdQ->enqueueRegion, region); EXPECT_EQ(cmdQ->enqueueOrigin, origin); EXPECT_EQ(cmdQ->readImageCalled, 1u); retVal = clEnqueueUnmapMemObject(cmdQ.get(), image.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(0u, image->mapOperationsHandler.size()); EXPECT_EQ(cmdQ->writeImageCalled, 0u); EXPECT_EQ(cmdQ->enqueueMarkerCalled, 1u); } HWTEST_F(MultipleMapImageTest, givenNotMappedPtrWhenUnmapedThenReturnError) { auto image = createMockImage(); auto cmdQ = createMockCmdQ(); EXPECT_EQ(!UnitTestHelper::tiledImagesSupported, image->mappingOnCpuAllowed()); EXPECT_EQ(0u, image->mapOperationsHandler.size()); retVal = clEnqueueUnmapMemObject(cmdQ.get(), image.get(), image->getBasePtrForMap(cmdQ->getDevice().getRootDeviceIndex()), 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWTEST_F(MultipleMapImageTest, givenErrorFromReadImageWhenMappedOnGpuThenDontAddMappedPtr) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } auto image = createMockImage(); auto cmdQ = createMockCmdQ(); EXPECT_FALSE(image->mappingOnCpuAllowed()); cmdQ->failOnReadImage = true; size_t origin[] = {2, 1, 1}; size_t region[] = {2, 1, 1}; void *mappedPtr = clEnqueueMapImage(cmdQ.get(), image.get(), CL_TRUE, CL_MAP_READ, origin, region, nullptr, nullptr, 0, nullptr, nullptr, &retVal); EXPECT_EQ(nullptr, mappedPtr); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); EXPECT_EQ(0u, image->mapOperationsHandler.size()); } HWTEST_F(MultipleMapImageTest, givenErrorFromWriteImageWhenUnmappedOnGpuThenDontRemoveMappedPtr) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } auto image = createMockImage(); auto cmdQ = createMockCmdQ(); EXPECT_FALSE(image->mappingOnCpuAllowed()); cmdQ->failOnWriteImage = true; size_t origin[] = {2, 1, 1}; size_t region[] = {2, 1, 1}; void *mappedPtr = clEnqueueMapImage(cmdQ.get(), image.get(), CL_TRUE, CL_MAP_WRITE, origin, region, nullptr, nullptr, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, image->mapOperationsHandler.size()); retVal = clEnqueueUnmapMemObject(cmdQ.get(), image.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); EXPECT_EQ(cmdQ->writeImageCalled, 1u); EXPECT_EQ(1u, image->mapOperationsHandler.size()); } HWTEST_F(MultipleMapImageTest, givenUnblockedQueueWhenMappedOnCpuThenAddMappedPtrAndRemoveOnUnmap) { auto image = createMockImage(); auto cmdQ = createMockCmdQ(); image->isZeroCopy = false; EXPECT_TRUE(image->mappingOnCpuAllowed()); MemObjOffsetArray origin = {{1, 0, 0}}; MemObjSizeArray region = {{3, 1, 1}}; void *mappedPtr = clEnqueueMapImage(cmdQ.get(), image.get(), CL_TRUE, CL_MAP_WRITE, &origin[0], ®ion[0], nullptr, nullptr, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, image->mapOperationsHandler.size()); EXPECT_EQ(1u, image->transferToHostPtrCalled); EXPECT_EQ(image->copyRegion, region); EXPECT_EQ(image->copyOrigin, origin); retVal = clEnqueueUnmapMemObject(cmdQ.get(), image.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(0u, image->mapOperationsHandler.size()); EXPECT_EQ(1u, image->transferFromHostPtrCalled); EXPECT_EQ(image->copyRegion, region); EXPECT_EQ(image->copyOrigin, origin); } HWTEST_F(MultipleMapImageTest, givenUnblockedQueueWhenReadOnlyMappedOnCpuThenDontMakeCpuCopy) { auto image = createMockImage(); auto cmdQ = createMockCmdQ(); image->isZeroCopy = false; EXPECT_TRUE(image->mappingOnCpuAllowed()); MemObjOffsetArray origin = {{1, 0, 0}}; MemObjSizeArray region = {{3, 1, 1}}; void *mappedPtr = clEnqueueMapImage(cmdQ.get(), image.get(), CL_TRUE, CL_MAP_READ, &origin[0], ®ion[0], nullptr, nullptr, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, image->mapOperationsHandler.size()); EXPECT_EQ(1u, image->transferToHostPtrCalled); EXPECT_EQ(image->copyRegion, region); EXPECT_EQ(image->copyOrigin, origin); retVal = clEnqueueUnmapMemObject(cmdQ.get(), image.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(0u, image->mapOperationsHandler.size()); EXPECT_EQ(0u, image->transferFromHostPtrCalled); } HWTEST_F(MultipleMapImageTest, givenBlockedQueueWhenMappedOnCpuThenAddMappedPtrAndRemoveOnUnmap) { auto image = createMockImage(); auto cmdQ = createMockCmdQ(); image->isZeroCopy = false; EXPECT_TRUE(image->mappingOnCpuAllowed()); UserEvent mapEvent, unmapEvent; cl_event clMapEvent = &mapEvent; cl_event clUnmapEvent = &unmapEvent; MemObjOffsetArray origin = {{1, 0, 0}}; MemObjSizeArray region = {{3, 1, 1}}; void *mappedPtr = clEnqueueMapImage(cmdQ.get(), image.get(), CL_FALSE, CL_MAP_WRITE, &origin[0], ®ion[0], nullptr, nullptr, 1, &clMapEvent, nullptr, &retVal); mapEvent.setStatus(CL_COMPLETE); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, image->transferToHostPtrCalled); EXPECT_EQ(1u, image->mapOperationsHandler.size()); EXPECT_EQ(image->copyRegion, region); EXPECT_EQ(image->copyOrigin, origin); retVal = clEnqueueUnmapMemObject(cmdQ.get(), image.get(), mappedPtr, 1, &clUnmapEvent, nullptr); unmapEvent.setStatus(CL_COMPLETE); EXPECT_EQ(0u, image->mapOperationsHandler.size()); EXPECT_EQ(1u, image->transferFromHostPtrCalled); EXPECT_EQ(image->copyRegion, region); EXPECT_EQ(image->copyOrigin, origin); } HWTEST_F(MultipleMapImageTest, givenBlockedQueueWhenMappedReadOnlyOnCpuThenDontMakeCpuCopy) { auto image = createMockImage(); auto cmdQ = createMockCmdQ(); image->isZeroCopy = false; EXPECT_TRUE(image->mappingOnCpuAllowed()); UserEvent mapEvent, unmapEvent; cl_event clMapEvent = &mapEvent; cl_event clUnmapEvent = &unmapEvent; MemObjOffsetArray origin = {{1, 0, 0}}; MemObjSizeArray region = {{3, 1, 1}}; void *mappedPtr = clEnqueueMapImage(cmdQ.get(), image.get(), CL_FALSE, CL_MAP_READ, &origin[0], ®ion[0], nullptr, nullptr, 1, &clMapEvent, nullptr, &retVal); mapEvent.setStatus(CL_COMPLETE); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(1u, image->transferToHostPtrCalled); EXPECT_EQ(1u, image->mapOperationsHandler.size()); EXPECT_EQ(image->copyRegion, region); EXPECT_EQ(image->copyOrigin, origin); retVal = clEnqueueUnmapMemObject(cmdQ.get(), image.get(), mappedPtr, 1, &clUnmapEvent, nullptr); unmapEvent.setStatus(CL_COMPLETE); EXPECT_EQ(0u, image->mapOperationsHandler.size()); EXPECT_EQ(0u, image->transferFromHostPtrCalled); } HWTEST_F(MultipleMapImageTest, givenInvalidPtrWhenUnmappedOnCpuThenReturnError) { auto image = createMockImage(); auto cmdQ = createMockCmdQ(); EXPECT_TRUE(image->mappingOnCpuAllowed()); retVal = clEnqueueUnmapMemObject(cmdQ.get(), image.get(), image->getBasePtrForMap(cmdQ->getDevice().getRootDeviceIndex()), 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWTEST_F(MultipleMapImageTest, givenMultimpleMapsWhenUnmappingThenRemoveCorrectPointers) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } auto image = createMockImage(); auto cmdQ = createMockCmdQ(); MapInfo mappedPtrs[3] = {{nullptr, 1, {{1, 1, 1}}, {{1, 1, 1}}, 0}, {nullptr, 1, {{2, 2, 2}}, {{2, 2, 2}}, 0}, {nullptr, 1, {{3, 5, 7}}, {{4, 4, 4}}, 0}}; for (size_t i = 0; i < 3; i++) { mappedPtrs[i].ptr = clEnqueueMapImage(cmdQ.get(), image.get(), CL_TRUE, CL_MAP_WRITE, &mappedPtrs[i].offset[0], &mappedPtrs[i].size[0], nullptr, nullptr, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtrs[i].ptr); EXPECT_EQ(i + 1, image->mapOperationsHandler.size()); EXPECT_EQ(cmdQ->enqueueRegion, mappedPtrs[i].size); EXPECT_EQ(cmdQ->enqueueOrigin, mappedPtrs[i].offset); } // reordered unmap clEnqueueUnmapMemObject(cmdQ.get(), image.get(), mappedPtrs[1].ptr, 0, nullptr, nullptr); EXPECT_EQ(2u, image->mapOperationsHandler.size()); EXPECT_EQ(cmdQ->unmapPtr, mappedPtrs[1].ptr); EXPECT_EQ(cmdQ->enqueueRegion, mappedPtrs[1].size); EXPECT_EQ(cmdQ->enqueueOrigin, mappedPtrs[1].offset); clEnqueueUnmapMemObject(cmdQ.get(), image.get(), mappedPtrs[2].ptr, 0, nullptr, nullptr); EXPECT_EQ(1u, image->mapOperationsHandler.size()); EXPECT_EQ(cmdQ->unmapPtr, mappedPtrs[2].ptr); EXPECT_EQ(cmdQ->enqueueRegion, mappedPtrs[2].size); EXPECT_EQ(cmdQ->enqueueOrigin, mappedPtrs[2].offset); clEnqueueUnmapMemObject(cmdQ.get(), image.get(), mappedPtrs[0].ptr, 0, nullptr, nullptr); EXPECT_EQ(0u, image->mapOperationsHandler.size()); EXPECT_EQ(cmdQ->unmapPtr, mappedPtrs[0].ptr); EXPECT_EQ(cmdQ->enqueueRegion, mappedPtrs[0].size); EXPECT_EQ(cmdQ->enqueueOrigin, mappedPtrs[0].offset); } HWTEST_F(MultipleMapImageTest, givenOverlapingPtrWhenMappingForWriteThenReturnError) { auto image = createMockImage(); auto cmdQ = createMockCmdQ(); EXPECT_EQ(!UnitTestHelper::tiledImagesSupported, image->mappingOnCpuAllowed()); MemObjOffsetArray origin = {{1, 2, 1}}; MemObjSizeArray region = {{3, 4, 1}}; void *mappedPtr = clEnqueueMapImage(cmdQ.get(), image.get(), CL_TRUE, CL_MAP_READ, &origin[0], ®ion[0], nullptr, nullptr, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, image->mapOperationsHandler.size()); origin[0]++; void *mappedPtr2 = clEnqueueMapImage(cmdQ.get(), image.get(), CL_TRUE, CL_MAP_WRITE, &origin[0], ®ion[0], nullptr, nullptr, 0, nullptr, nullptr, &retVal); EXPECT_EQ(nullptr, mappedPtr2); EXPECT_EQ(CL_INVALID_OPERATION, retVal); EXPECT_EQ(1u, image->mapOperationsHandler.size()); } HWTEST_F(MultipleMapImageTest, givenOverlapingPtrWhenMappingOnCpuForWriteThenReturnError) { auto image = createMockImage(); auto cmdQ = createMockCmdQ(); EXPECT_TRUE(image->mappingOnCpuAllowed()); MemObjOffsetArray origin = {{1, 0, 0}}; MemObjSizeArray region = {{3, 1, 1}}; void *mappedPtr = clEnqueueMapImage(cmdQ.get(), image.get(), CL_TRUE, CL_MAP_READ, &origin[0], ®ion[0], nullptr, nullptr, 0, nullptr, nullptr, &retVal); EXPECT_NE(nullptr, mappedPtr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, image->mapOperationsHandler.size()); origin[0]++; void *mappedPtr2 = clEnqueueMapImage(cmdQ.get(), image.get(), CL_TRUE, CL_MAP_WRITE, &origin[0], ®ion[0], nullptr, nullptr, 0, nullptr, nullptr, &retVal); EXPECT_EQ(nullptr, mappedPtr2); EXPECT_EQ(CL_INVALID_OPERATION, retVal); EXPECT_EQ(1u, image->mapOperationsHandler.size()); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/oom_buffer_tests.cpp000066400000000000000000000236501363734646600305330ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/memory_manager.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_kernel_fixture.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h" #include "test.h" using namespace NEO; struct OOMSetting { bool oomCS; bool oomISH; }; static OOMSetting oomSettings[] = { {true, false}, {false, true}, {true, true}}; struct OOMCommandQueueBufferTest : public MemoryManagementFixture, public DeviceFixture, public CommandQueueFixture, public SimpleArgKernelFixture, public HelloWorldKernelFixture, public ::testing::TestWithParam { using CommandQueueFixture::SetUp; using HelloWorldKernelFixture::SetUp; using SimpleArgKernelFixture::SetUp; OOMCommandQueueBufferTest() { } void SetUp() override { MemoryManagement::breakOnAllocationEvent = 77; MemoryManagementFixture::SetUp(); DeviceFixture::SetUp(); context = new MockContext(pClDevice); BufferDefaults::context = context; CommandQueueFixture::SetUp(context, pClDevice, 0); SimpleArgKernelFixture::SetUp(pClDevice); HelloWorldKernelFixture::SetUp(pClDevice, "CopyBuffer_simd", "CopyBuffer"); srcBuffer = BufferHelper<>::create(); dstBuffer = BufferHelper<>::create(); const auto &oomSetting = GetParam(); auto oomSize = 10u; if (oomSetting.oomCS) { auto &cs = pCmdQ->getCS(oomSize); // CommandStream may be larger than requested so grab what wasnt requested cs.getSpace(cs.getAvailableSpace() - oomSize); ASSERT_EQ(oomSize, cs.getAvailableSpace()); } if (oomSetting.oomISH) { auto &ish = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, oomSize); // IndirectHeap may be larger than requested so grab what wasnt requested ish.getSpace(ish.getAvailableSpace() - oomSize); ASSERT_EQ(oomSize, ish.getAvailableSpace()); } } void TearDown() override { delete dstBuffer; delete srcBuffer; context->release(); HelloWorldKernelFixture::TearDown(); SimpleArgKernelFixture::TearDown(); CommandQueueFixture::TearDown(); DeviceFixture::TearDown(); MemoryManagementFixture::TearDown(); } MockContext *context; Buffer *srcBuffer = nullptr; Buffer *dstBuffer = nullptr; }; HWTEST_P(OOMCommandQueueBufferTest, enqueueCopyBuffer) { CommandQueueHw cmdQ(context, pClDevice, 0, false); auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); auto retVal1 = EnqueueCopyBufferHelper<>::enqueue(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal1); auto retVal2 = EnqueueCopyBufferHelper<>::enqueue(&cmdQ); EXPECT_EQ(CL_SUCCESS, retVal2); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); if (usedAfterISH > usedBeforeISH) { EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); } else { EXPECT_LE(usedAfterISH, indirectHeap.getMaxAvailableSpace()); } } HWTEST_P(OOMCommandQueueBufferTest, enqueueFillBuffer) { CommandQueueHw cmdQ(context, pClDevice, 0, false); auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); auto retVal1 = EnqueueFillBufferHelper<>::enqueue(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal1); auto retVal2 = EnqueueFillBufferHelper<>::enqueue(&cmdQ); EXPECT_EQ(CL_SUCCESS, retVal2); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); if (usedAfterISH > usedBeforeISH) { EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); } else { EXPECT_LE(usedAfterISH, indirectHeap.getMaxAvailableSpace()); } } HWTEST_P(OOMCommandQueueBufferTest, enqueueReadBuffer) { CommandQueueHw cmdQ(context, pClDevice, 0, false); auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); auto retVal1 = EnqueueReadBufferHelper<>::enqueue(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal1); auto retVal2 = EnqueueReadBufferHelper<>::enqueue(&cmdQ); EXPECT_EQ(CL_SUCCESS, retVal2); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); if (usedAfterISH > usedBeforeISH) { EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); } else { EXPECT_LE(usedAfterISH, indirectHeap.getMaxAvailableSpace()); } } HWTEST_P(OOMCommandQueueBufferTest, enqueueWriteBuffer) { CommandQueueHw cmdQ(context, pClDevice, 0, false); auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); auto retVal1 = EnqueueWriteBufferHelper<>::enqueue(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal1); auto retVal2 = EnqueueWriteBufferHelper<>::enqueue(&cmdQ); EXPECT_EQ(CL_SUCCESS, retVal2); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); if (usedAfterISH > usedBeforeISH) { EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); } else { EXPECT_LE(usedAfterISH, indirectHeap.getMaxAvailableSpace()); } } HWTEST_P(OOMCommandQueueBufferTest, enqueueWriteBufferRect) { CommandQueueHw cmdQ(context, pClDevice, 0, false); auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); auto retVal1 = EnqueueWriteBufferRectHelper<>::enqueue(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal1); auto retVal2 = EnqueueWriteBufferRectHelper<>::enqueue(&cmdQ); EXPECT_EQ(CL_SUCCESS, retVal2); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); if (usedAfterISH > usedBeforeISH) { EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); } else { EXPECT_LE(usedAfterISH, indirectHeap.getMaxAvailableSpace()); } } HWTEST_P(OOMCommandQueueBufferTest, enqueueKernelHelloWorld) { typedef HelloWorldKernelFixture KernelFixture; CommandQueueHw cmdQ(context, pClDevice, 0, false); auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); auto retVal1 = EnqueueKernelHelper<>::enqueueKernel( pCmdQ, KernelFixture::pKernel); auto retVal2 = EnqueueKernelHelper<>::enqueueKernel( &cmdQ, KernelFixture::pKernel); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); if (usedAfterISH > usedBeforeISH) { EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); } else { EXPECT_LE(usedAfterISH, indirectHeap.getMaxAvailableSpace()); } EXPECT_EQ(CL_SUCCESS, retVal1); EXPECT_EQ(CL_SUCCESS, retVal2); } HWTEST_P(OOMCommandQueueBufferTest, enqueueKernelSimpleArg) { typedef SimpleArgKernelFixture KernelFixture; CommandQueueHw cmdQ(context, pClDevice, 0, false); auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); auto retVal1 = EnqueueKernelHelper<>::enqueueKernel( pCmdQ, KernelFixture::pKernel); auto retVal2 = EnqueueKernelHelper<>::enqueueKernel( &cmdQ, KernelFixture::pKernel); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); if (usedAfterISH > usedBeforeISH) { EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); } else { EXPECT_LE(usedAfterISH, indirectHeap.getMaxAvailableSpace()); } EXPECT_EQ(CL_SUCCESS, retVal1); EXPECT_EQ(CL_SUCCESS, retVal2); } INSTANTIATE_TEST_CASE_P( OOM, OOMCommandQueueBufferTest, testing::ValuesIn(oomSettings)); compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/oom_image_tests.cpp000066400000000000000000000136331363734646600303440ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/memory_manager.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; struct OOMSetting { bool oomCS; bool oomISH; }; static OOMSetting oomSettings[] = { {true, false}, {false, true}, {true, true}}; struct OOMCommandQueueImageTest : public DeviceFixture, public CommandQueueFixture, public ::testing::TestWithParam { using CommandQueueFixture::SetUp; OOMCommandQueueImageTest() { } void SetUp() override { DeviceFixture::SetUp(); context = new MockContext(pClDevice); CommandQueueFixture::SetUp(context, pClDevice, 0); srcImage = Image2dHelper<>::create(context); dstImage = Image2dHelper<>::create(context); const auto &oomSetting = GetParam(); auto oomSize = 10u; if (oomSetting.oomCS) { auto &cs = pCmdQ->getCS(oomSize); // CommandStream may be larger than requested so grab what wasnt requested cs.getSpace(cs.getAvailableSpace() - oomSize); ASSERT_EQ(oomSize, cs.getAvailableSpace()); } if (oomSetting.oomISH) { auto &ish = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, oomSize); // IndirectHeap may be larger than requested so grab what wasnt requested ish.getSpace(ish.getAvailableSpace() - oomSize); ASSERT_EQ(oomSize, ish.getAvailableSpace()); } } void TearDown() override { delete dstImage; delete srcImage; context->release(); CommandQueueFixture::TearDown(); DeviceFixture::TearDown(); } MockContext *context; Image *srcImage = nullptr; Image *dstImage = nullptr; }; HWTEST_P(OOMCommandQueueImageTest, enqueueCopyImage) { CommandQueueHw cmdQ(context, pClDevice, 0, false); auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); auto retVal1 = EnqueueCopyImageHelper<>::enqueue(pCmdQ); auto retVal2 = EnqueueCopyImageHelper<>::enqueue(&cmdQ); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); if (usedAfterISH > usedBeforeISH) { EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); } else { EXPECT_LE(usedAfterISH, indirectHeap.getMaxAvailableSpace()); } EXPECT_EQ(CL_SUCCESS, retVal1); EXPECT_EQ(CL_SUCCESS, retVal2); } HWTEST_P(OOMCommandQueueImageTest, enqueueFillImage) { CommandQueueHw cmdQ(context, pClDevice, 0, false); auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); auto retVal1 = EnqueueFillImageHelper<>::enqueue(pCmdQ); auto retVal2 = EnqueueFillImageHelper<>::enqueue(&cmdQ); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); if (usedAfterISH > usedBeforeISH) { EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); } else { EXPECT_LE(usedAfterISH, indirectHeap.getMaxAvailableSpace()); } EXPECT_EQ(CL_SUCCESS, retVal1); EXPECT_EQ(CL_SUCCESS, retVal2); } HWTEST_P(OOMCommandQueueImageTest, enqueueReadImage) { CommandQueueHw cmdQ(context, pClDevice, 0, false); auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); auto retVal1 = EnqueueReadImageHelper<>::enqueue(pCmdQ); auto retVal2 = EnqueueReadImageHelper<>::enqueue(&cmdQ); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); if (usedAfterISH > usedBeforeISH) { EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); } else { EXPECT_LE(usedAfterISH, indirectHeap.getMaxAvailableSpace()); } EXPECT_EQ(CL_SUCCESS, retVal1); EXPECT_EQ(CL_SUCCESS, retVal2); } HWTEST_P(OOMCommandQueueImageTest, enqueueWriteImage) { CommandQueueHw cmdQ(context, pClDevice, 0, false); auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); auto retVal1 = EnqueueWriteImageHelper<>::enqueue(pCmdQ); auto retVal2 = EnqueueWriteImageHelper<>::enqueue(&cmdQ); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); if (usedAfterISH > usedBeforeISH) { EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); } else { EXPECT_LE(usedAfterISH, indirectHeap.getMaxAvailableSpace()); } EXPECT_EQ(CL_SUCCESS, retVal1); EXPECT_EQ(CL_SUCCESS, retVal2); } INSTANTIATE_TEST_CASE_P( OOM, OOMCommandQueueImageTest, testing::ValuesIn(oomSettings)); compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/oom_tests.cpp000066400000000000000000000103761363734646600272030ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" using namespace NEO; struct OOMSetting { bool oomCS; bool oomISH; }; static OOMSetting oomSettings[] = { {true, false}, {false, true}, {true, true}}; struct OOMCommandQueueTest : public DeviceFixture, public CommandQueueFixture, public ::testing::TestWithParam { using CommandQueueFixture::SetUp; OOMCommandQueueTest() { } void SetUp() override { DeviceFixture::SetUp(); context = new MockContext(pClDevice); CommandQueueFixture::SetUp(context, pClDevice, 0); const auto &oomSetting = GetParam(); auto oomSize = 10u; if (oomSetting.oomCS) { auto &cs = pCmdQ->getCS(oomSize); // CommandStream may be larger than requested so grab what wasnt requested cs.getSpace(cs.getAvailableSpace() - oomSize); ASSERT_EQ(oomSize, cs.getAvailableSpace()); } if (oomSetting.oomISH) { auto &ish = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, oomSize); // IndirectHeap may be larger than requested so grab what wasnt requested ish.getSpace(ish.getAvailableSpace() - oomSize); ASSERT_EQ(oomSize, ish.getAvailableSpace()); } } void TearDown() override { CommandQueueFixture::TearDown(); context->release(); DeviceFixture::TearDown(); } MockContext *context; }; HWTEST_P(OOMCommandQueueTest, finish) { auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); auto retVal = pCmdQ->finish(); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_P(OOMCommandQueueTest, enqueueMarker) { auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 5, 15); cl_event eventBeingWaitedOn = &event1; cl_event eventReturned = nullptr; auto retVal = pCmdQ->enqueueMarkerWithWaitList( 1, &eventBeingWaitedOn, &eventReturned); EXPECT_EQ(CL_SUCCESS, retVal); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); delete (Event *)eventReturned; } HWTEST_P(OOMCommandQueueTest, enqueueBarrier) { auto &commandStream = pCmdQ->getCS(1024); auto &indirectHeap = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 5, 15); cl_event eventBeingWaitedOn = &event1; cl_event eventReturned = nullptr; auto retVal = pCmdQ->enqueueBarrierWithWaitList( 1, &eventBeingWaitedOn, &eventReturned); EXPECT_EQ(CL_SUCCESS, retVal); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); EXPECT_LE(usedAfterCS - usedBeforeCS, commandStream.getMaxAvailableSpace()); EXPECT_LE(usedAfterISH - usedBeforeISH, indirectHeap.getMaxAvailableSpace()); delete (Event *)eventReturned; } INSTANTIATE_TEST_CASE_P( OOM, OOMCommandQueueTest, testing::ValuesIn(oomSettings)); compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/ooq_task_tests.cpp000066400000000000000000000277621363734646600302400ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/mocks/mock_csr.h" using namespace NEO; struct OOQFixtureFactory : public HelloWorldFixtureFactory { typedef OOQueueFixture CommandQueueFixture; }; template struct OOQTaskTypedTests : public HelloWorldTest { }; TYPED_TEST_CASE_P(OOQTaskTypedTests); bool isBlockingCall(unsigned int cmdType) { if (cmdType == CL_COMMAND_WRITE_BUFFER || cmdType == CL_COMMAND_READ_BUFFER || cmdType == CL_COMMAND_WRITE_IMAGE || cmdType == CL_COMMAND_READ_IMAGE) { return true; } else { return false; } } TYPED_TEST_P(OOQTaskTypedTests, givenNonBlockingCallWhenDoneOnOutOfOrderQueueThenTaskLevelDoesntChange) { auto &commandStreamReceiver = this->pCmdQ->getGpgpuCommandStreamReceiver(); auto tagAddress = commandStreamReceiver.getTagAddress(); auto blockingCall = isBlockingCall(TypeParam::Traits::cmdType); auto taskLevelClosed = blockingCall ? 1u : 0u; // for blocking commands task level will be closed //for non blocking calls make sure that resources are added to defer free list instaed of being destructed in place if (!blockingCall) { *tagAddress = 0; } auto previousTaskLevel = this->pCmdQ->taskLevel; if (TypeParam::Traits::cmdType == CL_COMMAND_WRITE_BUFFER || TypeParam::Traits::cmdType == CL_COMMAND_READ_BUFFER) { auto buffer = std::unique_ptr(BufferHelper<>::create()); buffer->forceDisallowCPUCopy = true; // no task level logic when cpu copy TypeParam::enqueue(this->pCmdQ, buffer.get()); this->pCmdQ->flush(); } else { TypeParam::enqueue(this->pCmdQ, nullptr); } EXPECT_EQ(previousTaskLevel + taskLevelClosed, this->pCmdQ->taskLevel); *tagAddress = initialHardwareTag; } TYPED_TEST_P(OOQTaskTypedTests, givenTaskWhenEnqueuedOnOutOfOrderQueueThenTaskCountIsUpdated) { auto &commandStreamReceiver = this->pCmdQ->getGpgpuCommandStreamReceiver(); auto previousTaskCount = commandStreamReceiver.peekTaskCount(); auto tagAddress = commandStreamReceiver.getTagAddress(); auto blockingCall = isBlockingCall(TypeParam::Traits::cmdType); //for non blocking calls make sure that resources are added to defer free list instaed of being destructed in place if (!blockingCall) { *tagAddress = 0; } if (TypeParam::Traits::cmdType == CL_COMMAND_WRITE_BUFFER || TypeParam::Traits::cmdType == CL_COMMAND_READ_BUFFER) { auto buffer = std::unique_ptr(BufferHelper<>::create()); buffer->forceDisallowCPUCopy = true; // no task level logic when cpu copy TypeParam::enqueue(this->pCmdQ, buffer.get()); this->pCmdQ->flush(); } else { TypeParam::enqueue(this->pCmdQ, nullptr); } EXPECT_LT(previousTaskCount, commandStreamReceiver.peekTaskCount()); EXPECT_LE(this->pCmdQ->taskCount, commandStreamReceiver.peekTaskCount()); *tagAddress = initialHardwareTag; } typedef ::testing::Types< EnqueueCopyBufferHelper<>, EnqueueCopyImageHelper<>, EnqueueFillBufferHelper<>, EnqueueFillImageHelper<>, EnqueueReadBufferHelper<>, EnqueueReadImageHelper<>, EnqueueWriteBufferHelper<>, EnqueueWriteImageHelper<>> EnqueueParams; REGISTER_TYPED_TEST_CASE_P(OOQTaskTypedTests, givenNonBlockingCallWhenDoneOnOutOfOrderQueueThenTaskLevelDoesntChange, givenTaskWhenEnqueuedOnOutOfOrderQueueThenTaskCountIsUpdated); // Instantiate all of these parameterized tests INSTANTIATE_TYPED_TEST_CASE_P(OOQ, OOQTaskTypedTests, EnqueueParams); typedef OOQTaskTypedTests> OOQTaskTests; TEST_F(OOQTaskTests, enqueueKernel_changesTaskCount) { auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver(); auto previousTaskCount = commandStreamReceiver.peekTaskCount(); EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EXPECT_LT(previousTaskCount, commandStreamReceiver.peekTaskCount()); EXPECT_EQ(this->pCmdQ->taskCount, commandStreamReceiver.peekTaskCount()); } HWTEST_F(OOQTaskTests, givenCommandQueueWithLowerTaskLevelThenCsrWhenItIsSubmittedThenCommandQueueObtainsTaskLevelFromCsrWithoutSendingPipeControl) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.taskLevel = 100; EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EXPECT_EQ(100u, this->pCmdQ->taskLevel); } HWTEST_F(OOQTaskTests, givenCommandQueueAtTaskLevel100WhenMultipleEnqueueAreDoneThenTaskLevelDoesntChange) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->taskLevel = 100; EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EXPECT_EQ(100u, this->pCmdQ->taskLevel); EXPECT_EQ(100u, mockCsr->peekTaskLevel()); } HWTEST_F(OOQTaskTests, givenCommandQueueAtTaskLevel100WhenItIsFlushedAndFollowedByNewCommandsThenTheyHaveHigherTaskLevel) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->taskLevel = 100; EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EXPECT_EQ(100u, this->pCmdQ->taskLevel); EXPECT_EQ(100u, mockCsr->peekTaskLevel()); mockCsr->flushBatchedSubmissions(); EXPECT_EQ(101u, mockCsr->peekTaskLevel()); EXPECT_EQ(100u, this->pCmdQ->taskLevel); EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EXPECT_EQ(101u, this->pCmdQ->taskLevel); } HWTEST_F(OOQTaskTests, givenCommandQueueAtTaskLevel100WhenItIsFlushedAndFollowedByNewCommandsAndBarrierThenCsrTaskLevelIncreases) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->taskLevel = 100; EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); mockCsr->flushBatchedSubmissions(); EXPECT_EQ(101u, mockCsr->peekTaskLevel()); EXPECT_EQ(100u, this->pCmdQ->taskLevel); EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EXPECT_EQ(101u, this->pCmdQ->taskLevel); this->pCmdQ->enqueueBarrierWithWaitList(0, nullptr, nullptr); EXPECT_EQ(102u, this->pCmdQ->taskLevel); EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EXPECT_EQ(102u, this->pCmdQ->taskLevel); EXPECT_EQ(102u, mockCsr->peekTaskLevel()); } HWTEST_F(OOQTaskTests, givenCommandQueueAtTaskLevel100WhenItIsFlushedAndFollowedByNewCommandsAndMarkerThenCsrTaskLevelIsNotIncreasing) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->taskLevel = 100; EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); mockCsr->flushBatchedSubmissions(); EXPECT_EQ(101u, mockCsr->peekTaskLevel()); EXPECT_EQ(100u, this->pCmdQ->taskLevel); EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EXPECT_EQ(101u, this->pCmdQ->taskLevel); this->pCmdQ->enqueueMarkerWithWaitList(0, nullptr, nullptr); EXPECT_EQ(101u, this->pCmdQ->taskLevel); EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EXPECT_EQ(101u, this->pCmdQ->taskLevel); EXPECT_EQ(101u, mockCsr->peekTaskLevel()); } HWTEST_F(OOQTaskTests, givenTwoEnqueueCommandSynchronizedByEventsWhenTheyAreEnqueueThenSecondHasHigherTaskLevelThenFirst) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto currentTaskLevel = this->pCmdQ->taskLevel; cl_event retEvent; EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel, EnqueueKernelTraits::workDim, EnqueueKernelTraits::globalWorkOffset, EnqueueKernelTraits::globalWorkSize, EnqueueKernelTraits::localWorkSize, 0, nullptr, &retEvent); auto neoEvent = castToObject(retEvent); EXPECT_EQ(currentTaskLevel, neoEvent->taskLevel); cl_event retEvent2; EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel, EnqueueKernelTraits::workDim, EnqueueKernelTraits::globalWorkOffset, EnqueueKernelTraits::globalWorkSize, EnqueueKernelTraits::localWorkSize, 1, &retEvent, &retEvent2); auto neoEvent2 = castToObject(retEvent2); EXPECT_EQ(neoEvent2->taskLevel, neoEvent->taskLevel + 1); clReleaseEvent(retEvent2); clReleaseEvent(retEvent); } TEST_F(OOQTaskTests, WhenEnqueingKernelThenTaskLevelIsNotIncremented) { auto previousTaskLevel = this->pCmdQ->taskLevel; EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); EXPECT_EQ(previousTaskLevel, this->pCmdQ->taskLevel); } TEST_F(OOQTaskTests, GivenBlockingAndNonBlockedOnUserEventWhenReadingBufferThenTaskCountIsIncrementedAndTaskLevelIsUnchanged) { auto buffer = std::unique_ptr(BufferHelper<>::create()); auto alignedReadPtr = alignedMalloc(BufferDefaults::sizeInBytes, MemoryConstants::cacheLineSize); ASSERT_NE(nullptr, alignedReadPtr); pCmdQ->taskLevel = 1; auto previousTaskCount = pCmdQ->taskCount; auto previousTaskLevel = pCmdQ->taskLevel; auto userEvent = clCreateUserEvent(pContext, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetUserEventStatus(userEvent, CL_COMPLETE); ASSERT_EQ(CL_SUCCESS, retVal); buffer->forceDisallowCPUCopy = true; // no task level incrasing when cpu copy retVal = EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, buffer.get(), CL_FALSE, 0, BufferDefaults::sizeInBytes, alignedReadPtr, nullptr, 1, &userEvent, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_LT(previousTaskCount, pCmdQ->taskCount); EXPECT_EQ(previousTaskLevel, pCmdQ->taskLevel); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); pCmdQ->flush(); alignedFree(alignedReadPtr); } TEST_F(OOQTaskTests, givenOutOfOrderCommandQueueWhenBarrierIsCalledThenTaskLevelIsUpdated) { EnqueueKernelHelper<>::enqueueKernel(this->pCmdQ, pKernel); auto currentTaskLevel = this->pCmdQ->taskLevel; clEnqueueBarrierWithWaitList(this->pCmdQ, 0, nullptr, nullptr); auto newTaskLevel = this->pCmdQ->taskLevel; EXPECT_GT(newTaskLevel, currentTaskLevel); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/ooq_task_tests_mt.cpp000066400000000000000000000061521363734646600307260ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" using namespace NEO; struct OOQFixtureFactory : public HelloWorldFixtureFactory { typedef OOQueueFixture CommandQueueFixture; }; template struct OOQTaskTypedTestsMt : public HelloWorldTest { }; typedef OOQTaskTypedTestsMt> OOQTaskTestsMt; TEST_F(OOQTaskTestsMt, GivenBlockingAndBlockedOnUserEventWhenReadingBufferThenTaskCountIsIncrementedAndTaskLevelIsUnchanged) { auto buffer = std::unique_ptr(BufferHelper<>::create()); auto alignedReadPtr = alignedMalloc(BufferDefaults::sizeInBytes, MemoryConstants::cacheLineSize); ASSERT_NE(nullptr, alignedReadPtr); auto userEvent = clCreateUserEvent(pContext, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); auto previousTaskCount = pCmdQ->taskCount; auto previousTaskLevel = pCmdQ->taskLevel; std::thread t([=]() { Event *ev = castToObject(userEvent); while (ev->peekHasChildEvents() == false) { // active wait for VirtualEvent (which is added after queue is blocked) } auto ret = clSetUserEventStatus(userEvent, CL_COMPLETE); ASSERT_EQ(CL_SUCCESS, ret); }); buffer->forceDisallowCPUCopy = true; // no task level incrasing when cpu copy retVal = EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, buffer.get(), CL_TRUE, 0, BufferDefaults::sizeInBytes, alignedReadPtr, nullptr, 1, &userEvent, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_LT(previousTaskCount, pCmdQ->taskCount); EXPECT_EQ(previousTaskLevel, pCmdQ->taskLevel); t.join(); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); alignedFree(alignedReadPtr); } TEST_F(OOQTaskTestsMt, GivenBlockedOnUserEventWhenEnqueingMarkerThenSuccessIsReturned) { auto userEvent = clCreateUserEvent(pContext, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); std::thread t([=]() { Event *ev = castToObject(userEvent); while (ev->peekHasChildEvents() == false) { // active wait for VirtualEvent (which is added after queue is blocked) } auto ret = clSetUserEventStatus(userEvent, CL_COMPLETE); ASSERT_EQ(CL_SUCCESS, ret); }); retVal = pCmdQ->enqueueMarkerWithWaitList( 1, &userEvent, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); t.join(); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); }compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/read_write_buffer_cpu_copy.cpp000066400000000000000000000370011363734646600325400ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm.h" #include "shared/source/helpers/basic_math.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/test/unit_test/command_queue/enqueue_read_buffer_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "test.h" using namespace NEO; typedef EnqueueReadBufferTypeTest ReadWriteBufferCpuCopyTest; HWTEST_F(ReadWriteBufferCpuCopyTest, givenRenderCompressedGmmWhenAskingForCpuOperationThenDisallow) { cl_int retVal; std::unique_ptr buffer(Buffer::create(context, CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto gmm = new Gmm(pDevice->getGmmClientContext(), nullptr, 1, false); gmm->isRenderCompressed = false; buffer->getGraphicsAllocation()->setDefaultGmm(gmm); auto alignedPtr = alignedMalloc(2, MemoryConstants::cacheLineSize); auto unalignedPtr = ptrOffset(alignedPtr, 1); EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed()); EXPECT_TRUE(buffer->isReadWriteOnCpuPreffered(unalignedPtr, 1)); gmm->isRenderCompressed = true; EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed()); EXPECT_TRUE(buffer->isReadWriteOnCpuPreffered(unalignedPtr, 1)); alignedFree(alignedPtr); } HWTEST_F(ReadWriteBufferCpuCopyTest, GivenUnalignedReadPtrWhenReadingBufferThenMemoryIsReadCorrectly) { cl_int retVal; size_t offset = 1; size_t size = 4; auto alignedReadPtr = alignedMalloc(size + 1, MemoryConstants::cacheLineSize); memset(alignedReadPtr, 0x00, size + 1); auto unalignedReadPtr = ptrOffset(alignedReadPtr, 1); std::unique_ptr bufferPtr(new uint8_t[size]); for (uint8_t i = 0; i < size; i++) { bufferPtr[i] = i + 1; } std::unique_ptr buffer(Buffer::create(context, CL_MEM_USE_HOST_PTR, size, bufferPtr.get(), retVal)); EXPECT_EQ(retVal, CL_SUCCESS); bool aligned = (reinterpret_cast(unalignedReadPtr) & (MemoryConstants::cacheLineSize - 1)) == 0; EXPECT_TRUE(!aligned || buffer->isMemObjZeroCopy()); ASSERT_TRUE(buffer->isReadWriteOnCpuAllowed()); ASSERT_TRUE(buffer->isReadWriteOnCpuPreffered(unalignedReadPtr, size)); retVal = EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, buffer.get(), CL_TRUE, offset, size - offset, unalignedReadPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); auto pBufferPtr = ptrOffset(bufferPtr.get(), offset); EXPECT_EQ(memcmp(unalignedReadPtr, pBufferPtr, size - offset), 0); alignedFree(alignedReadPtr); } HWTEST_F(ReadWriteBufferCpuCopyTest, GivenUnalignedSrcPtrWhenWritingBufferThenMemoryIsWrittenCorrectly) { cl_int retVal; size_t offset = 1; size_t size = 4; auto alignedWritePtr = alignedMalloc(size + 1, MemoryConstants::cacheLineSize); auto unalignedWritePtr = static_cast(ptrOffset(alignedWritePtr, 1)); auto bufferPtrBase = new uint8_t[size]; auto bufferPtr = new uint8_t[size]; for (uint8_t i = 0; i < size; i++) { unalignedWritePtr[i] = i + 5; bufferPtrBase[i] = i + 1; bufferPtr[i] = i + 1; } std::unique_ptr buffer(Buffer::create(context, CL_MEM_USE_HOST_PTR, size, bufferPtr, retVal)); EXPECT_EQ(retVal, CL_SUCCESS); bool aligned = (reinterpret_cast(unalignedWritePtr) & (MemoryConstants::cacheLineSize - 1)) == 0; EXPECT_TRUE(!aligned || buffer->isMemObjZeroCopy()); ASSERT_TRUE(buffer->isReadWriteOnCpuAllowed()); ASSERT_TRUE(buffer->isReadWriteOnCpuPreffered(unalignedWritePtr, size)); retVal = EnqueueWriteBufferHelper<>::enqueueWriteBuffer(pCmdQ, buffer.get(), CL_TRUE, offset, size - offset, unalignedWritePtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); auto pBufferPtr = buffer->getCpuAddress(); EXPECT_EQ(memcmp(pBufferPtr, bufferPtrBase, offset), 0); // untouched pBufferPtr = ptrOffset(pBufferPtr, offset); EXPECT_EQ(memcmp(pBufferPtr, unalignedWritePtr, size - offset), 0); // updated alignedFree(alignedWritePtr); delete[] bufferPtr; delete[] bufferPtrBase; } HWTEST_F(ReadWriteBufferCpuCopyTest, GivenSpecificMemoryStructuresWhenReadingWritingMemoryThenCpuReadWriteIsAllowed) { cl_int retVal; size_t size = MemoryConstants::cacheLineSize; auto alignedBufferPtr = alignedMalloc(MemoryConstants::cacheLineSize + 1, MemoryConstants::cacheLineSize); auto unalignedBufferPtr = ptrOffset(alignedBufferPtr, 1); auto alignedHostPtr = alignedMalloc(MemoryConstants::cacheLineSize + 1, MemoryConstants::cacheLineSize); auto unalignedHostPtr = ptrOffset(alignedHostPtr, 1); auto smallBufferPtr = alignedMalloc(1 * MB, MemoryConstants::cacheLineSize); size_t largeBufferSize = 11u * MemoryConstants::megaByte; auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto mockContext = std::unique_ptr(new MockContext(mockDevice.get())); auto memoryManager = static_cast(mockDevice->getMemoryManager()); memoryManager->turnOnFakingBigAllocations(); std::unique_ptr buffer(Buffer::create(context, CL_MEM_USE_HOST_PTR, size, alignedBufferPtr, retVal)); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_TRUE(buffer->isMemObjZeroCopy()); // zeroCopy == true && aligned/unaligned hostPtr EXPECT_TRUE(buffer->isReadWriteOnCpuPreffered(alignedHostPtr, MemoryConstants::cacheLineSize + 1)); EXPECT_TRUE(buffer->isReadWriteOnCpuPreffered(unalignedHostPtr, MemoryConstants::cacheLineSize)); buffer.reset(Buffer::create(context, CL_MEM_USE_HOST_PTR, size, unalignedBufferPtr, retVal)); EXPECT_EQ(retVal, CL_SUCCESS); // zeroCopy == false && unaligned hostPtr EXPECT_TRUE(buffer->isReadWriteOnCpuPreffered(unalignedHostPtr, MemoryConstants::cacheLineSize)); buffer.reset(Buffer::create(mockContext.get(), CL_MEM_USE_HOST_PTR, 1 * MB, smallBufferPtr, retVal)); // platform LP == true && size <= 10 MB mockDevice->deviceInfo.platformLP = true; EXPECT_TRUE(buffer->isReadWriteOnCpuPreffered(smallBufferPtr, 1 * MB)); // platform LP == false && size <= 10 MB mockDevice->deviceInfo.platformLP = false; EXPECT_TRUE(buffer->isReadWriteOnCpuPreffered(smallBufferPtr, 1 * MB)); buffer.reset(Buffer::create(mockContext.get(), CL_MEM_ALLOC_HOST_PTR, largeBufferSize, nullptr, retVal)); // platform LP == false && size > 10 MB mockDevice->deviceInfo.platformLP = false; EXPECT_TRUE(buffer->isReadWriteOnCpuPreffered(buffer->getCpuAddress(), largeBufferSize)); alignedFree(smallBufferPtr); alignedFree(alignedHostPtr); alignedFree(alignedBufferPtr); } HWTEST_F(ReadWriteBufferCpuCopyTest, GivenSpecificMemoryStructuresWhenReadingWritingMemoryThenCpuReadWriteIsNotAllowed) { cl_int retVal; size_t size = MemoryConstants::cacheLineSize; auto alignedBufferPtr = alignedMalloc(MemoryConstants::cacheLineSize + 1, MemoryConstants::cacheLineSize); auto unalignedBufferPtr = ptrOffset(alignedBufferPtr, 1); auto alignedHostPtr = alignedMalloc(MemoryConstants::cacheLineSize + 1, MemoryConstants::cacheLineSize); auto unalignedHostPtr = ptrOffset(alignedHostPtr, 1); size_t largeBufferSize = 11u * MemoryConstants::megaByte; auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto mockContext = std::unique_ptr(new MockContext(mockDevice.get())); auto mockCommandQueue = std::unique_ptr(new MockCommandQueue); auto memoryManager = static_cast(mockDevice->getMemoryManager()); memoryManager->turnOnFakingBigAllocations(); std::unique_ptr buffer(Buffer::create(context, CL_MEM_USE_HOST_PTR, size, alignedBufferPtr, retVal)); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_TRUE(buffer->isMemObjZeroCopy()); // non blocking EXPECT_FALSE(mockCommandQueue->bufferCpuCopyAllowed(buffer.get(), CL_COMMAND_NDRANGE_KERNEL, false, size, unalignedHostPtr, 0u, nullptr)); buffer.reset(Buffer::create(context, CL_MEM_USE_HOST_PTR, size, unalignedBufferPtr, retVal)); EXPECT_EQ(retVal, CL_SUCCESS); // zeroCopy == false && aligned hostPtr EXPECT_FALSE(buffer->isReadWriteOnCpuPreffered(alignedHostPtr, MemoryConstants::cacheLineSize + 1)); buffer.reset(Buffer::create(mockContext.get(), CL_MEM_ALLOC_HOST_PTR, largeBufferSize, nullptr, retVal)); // platform LP == true && size > 10 MB mockDevice->deviceInfo.platformLP = true; EXPECT_FALSE(buffer->isReadWriteOnCpuPreffered(buffer->getCpuAddress(), largeBufferSize)); alignedFree(alignedHostPtr); alignedFree(alignedBufferPtr); } HWTEST_F(ReadWriteBufferCpuCopyTest, givenDebugVariableToDisableCpuCopiesWhenBufferCpuCopyAllowedIsCalledThenItReturnsFalse) { DebugManagerStateRestore restorer; DebugManager.flags.EnableLocalMemory.set(false); cl_int retVal; auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto mockContext = std::unique_ptr(new MockContext(mockDevice.get())); auto mockCommandQueue = std::unique_ptr(new MockCommandQueue); std::unique_ptr buffer(Buffer::create(context, CL_MEM_ALLOC_HOST_PTR, MemoryConstants::pageSize, nullptr, retVal)); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_TRUE(buffer->isMemObjZeroCopy()); EXPECT_TRUE(mockCommandQueue->bufferCpuCopyAllowed(buffer.get(), CL_COMMAND_READ_BUFFER, true, MemoryConstants::pageSize, reinterpret_cast(0x1000), 0u, nullptr)); EXPECT_TRUE(mockCommandQueue->bufferCpuCopyAllowed(buffer.get(), CL_COMMAND_WRITE_BUFFER, true, MemoryConstants::pageSize, reinterpret_cast(0x1000), 0u, nullptr)); DebugManager.flags.DoCpuCopyOnReadBuffer.set(0); EXPECT_FALSE(mockCommandQueue->bufferCpuCopyAllowed(buffer.get(), CL_COMMAND_READ_BUFFER, true, MemoryConstants::pageSize, reinterpret_cast(0x1000), 0u, nullptr)); EXPECT_TRUE(mockCommandQueue->bufferCpuCopyAllowed(buffer.get(), CL_COMMAND_WRITE_BUFFER, true, MemoryConstants::pageSize, reinterpret_cast(0x1000), 0u, nullptr)); DebugManager.flags.DoCpuCopyOnWriteBuffer.set(0); EXPECT_FALSE(mockCommandQueue->bufferCpuCopyAllowed(buffer.get(), CL_COMMAND_READ_BUFFER, true, MemoryConstants::pageSize, reinterpret_cast(0x1000), 0u, nullptr)); EXPECT_FALSE(mockCommandQueue->bufferCpuCopyAllowed(buffer.get(), CL_COMMAND_WRITE_BUFFER, true, MemoryConstants::pageSize, reinterpret_cast(0x1000), 0u, nullptr)); } TEST(ReadWriteBufferOnCpu, givenNoHostPtrAndAlignedSizeWhenMemoryAllocationIsInNonSystemMemoryPoolThenIsReadWriteOnCpuAllowedReturnsFalse) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto memoryManager = new MockMemoryManager(*device->getExecutionEnvironment()); device->injectMemoryManager(memoryManager); MockContext ctx(device.get()); cl_int retVal = 0; cl_mem_flags flags = CL_MEM_READ_WRITE; std::unique_ptr buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed()); EXPECT_TRUE(buffer->isReadWriteOnCpuPreffered(reinterpret_cast(0x1000), MemoryConstants::pageSize)); reinterpret_cast(buffer->getGraphicsAllocation())->overrideMemoryPool(MemoryPool::SystemCpuInaccessible); //read write on CPU is allowed, but not preffered. We can access this memory via Lock. EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed()); EXPECT_FALSE(buffer->isReadWriteOnCpuPreffered(reinterpret_cast(0x1000), MemoryConstants::pageSize)); } TEST(ReadWriteBufferOnCpu, givenPointerThatRequiresCpuCopyWhenCpuCopyIsEvaluatedThenTrueIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto memoryManager = new MockMemoryManager(*device->getExecutionEnvironment()); device->injectMemoryManager(memoryManager); MockContext ctx(device.get()); cl_int retVal = 0; cl_mem_flags flags = CL_MEM_READ_WRITE; std::unique_ptr buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); auto mockCommandQueue = std::unique_ptr(new MockCommandQueue); EXPECT_FALSE(mockCommandQueue->bufferCpuCopyAllowed(buffer.get(), CL_COMMAND_READ_BUFFER, false, MemoryConstants::pageSize, nullptr, 0u, nullptr)); memoryManager->cpuCopyRequired = true; EXPECT_TRUE(mockCommandQueue->bufferCpuCopyAllowed(buffer.get(), CL_COMMAND_READ_BUFFER, false, MemoryConstants::pageSize, nullptr, 0u, nullptr)); } TEST(ReadWriteBufferOnCpu, givenPointerThatRequiresCpuCopyButItIsNotPossibleWhenCpuCopyIsEvaluatedThenFalseIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto memoryManager = new MockMemoryManager(*device->getExecutionEnvironment()); device->injectMemoryManager(memoryManager); MockContext ctx(device.get()); cl_int retVal = 0; cl_mem_flags flags = CL_MEM_READ_WRITE; std::unique_ptr buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); auto mockCommandQueue = std::unique_ptr(new MockCommandQueue); buffer->forceDisallowCPUCopy = true; EXPECT_FALSE(mockCommandQueue->bufferCpuCopyAllowed(buffer.get(), CL_COMMAND_READ_BUFFER, true, MemoryConstants::pageSize, nullptr, 0u, nullptr)); memoryManager->cpuCopyRequired = true; EXPECT_FALSE(mockCommandQueue->bufferCpuCopyAllowed(buffer.get(), CL_COMMAND_READ_BUFFER, true, MemoryConstants::pageSize, nullptr, 0u, nullptr)); } TEST(ReadWriteBufferOnCpu, whenLocalMemoryPoolAllocationIsAskedForPreferenceThenCpuIsNotChoosen) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext ctx(device.get()); cl_int retVal = 0; std::unique_ptr buffer(Buffer::create(&ctx, CL_MEM_READ_WRITE, MemoryConstants::pageSize, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); reinterpret_cast(buffer->getGraphicsAllocation())->overrideMemoryPool(MemoryPool::LocalMemory); EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed()); EXPECT_FALSE(buffer->isReadWriteOnCpuPreffered(reinterpret_cast(0x1000), MemoryConstants::pageSize)); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp000066400000000000000000000171151363734646600324110ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/program/sync_buffer_handler.h" #include "opencl/source/api/api.h" #include "opencl/test/unit_test/fixtures/enqueue_handler_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "test.h" using namespace NEO; class MockSyncBufferHandler : public SyncBufferHandler { public: using SyncBufferHandler::bufferSize; using SyncBufferHandler::graphicsAllocation; using SyncBufferHandler::usedBufferSize; }; class SyncBufferHandlerTest : public EnqueueHandlerTest { public: void SetUp() override {} void TearDown() override {} template void SetUpT() { EnqueueHandlerTest::SetUp(); kernelInternals = std::make_unique(*pClDevice, context); kernel = kernelInternals->mockKernel; kernel->executionType = KernelExecutionType::Concurrent; commandQueue = reinterpret_cast(new MockCommandQueueHw(context, pClDevice, 0)); } template void TearDownT() { commandQueue->release(); kernelInternals.reset(); EnqueueHandlerTest::TearDown(); } void patchAllocateSyncBuffer() { sPatchAllocateSyncBuffer.SurfaceStateHeapOffset = 0; sPatchAllocateSyncBuffer.DataParamOffset = 0; sPatchAllocateSyncBuffer.DataParamSize = sizeof(uint8_t); kernelInternals->kernelInfo.patchInfo.pAllocateSyncBuffer = &sPatchAllocateSyncBuffer; } MockSyncBufferHandler *getSyncBufferHandler() { return reinterpret_cast(pClDevice->syncBufferHandler.get()); } cl_int enqueueNDCount() { return clEnqueueNDCountKernelINTEL(commandQueue, kernel, workDim, gwOffset, workgroupCount, lws, 0, nullptr, nullptr); } const cl_uint workDim = 1; const size_t gwOffset[3] = {0, 0, 0}; const size_t lws[3] = {10, 1, 1}; size_t workgroupCount[3] = {10, 1, 1}; size_t globalWorkSize[3] = {100, 1, 1}; size_t workItemsCount = 10; std::unique_ptr kernelInternals; MockKernel *kernel; MockCommandQueue *commandQueue; SPatchAllocateSyncBuffer sPatchAllocateSyncBuffer; }; HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenAllocateSyncBufferPatchAndConcurrentKernelWhenEnqueuingKernelThenSyncBufferIsUsed) { patchAllocateSyncBuffer(); enqueueNDCount(); auto syncBufferHandler = getSyncBufferHandler(); EXPECT_EQ(workItemsCount, syncBufferHandler->usedBufferSize); commandQueue->flush(); EXPECT_EQ(syncBufferHandler->graphicsAllocation->getTaskCount( pDevice->getUltCommandStreamReceiver().getOsContext().getContextId()), pDevice->getUltCommandStreamReceiver().latestSentTaskCount); } HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenConcurrentKernelWithoutAllocateSyncBufferPatchWhenEnqueuingConcurrentKernelThenSyncBufferIsNotCreated) { auto retVal = enqueueNDCount(); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, getSyncBufferHandler()); } HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenDefaultKernelUsingSyncBufferWhenEnqueuingKernelThenErrorIsReturnedAndSyncBufferIsNotCreated) { patchAllocateSyncBuffer(); kernel->executionType = KernelExecutionType::Default; auto retVal = enqueueNDCount(); EXPECT_EQ(CL_INVALID_KERNEL, retVal); EXPECT_EQ(nullptr, getSyncBufferHandler()); } HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenConcurrentKernelWithAllocateSyncBufferPatchWhenEnqueuingConcurrentKernelThenSyncBufferIsCreated) { patchAllocateSyncBuffer(); auto retVal = enqueueNDCount(); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, getSyncBufferHandler()); } HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenMaxWorkgroupCountWhenEnqueuingConcurrentKernelThenSuccessIsReturned) { auto maxWorkGroupCount = kernel->getMaxWorkGroupCount(workDim, lws); workgroupCount[0] = maxWorkGroupCount; globalWorkSize[0] = maxWorkGroupCount * lws[0]; auto retVal = enqueueNDCount(); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenTooHighWorkgroupCountWhenEnqueuingConcurrentKernelThenErrorIsReturned) { size_t maxWorkGroupCount = kernel->getMaxWorkGroupCount(workDim, lws); workgroupCount[0] = maxWorkGroupCount + 1; globalWorkSize[0] = maxWorkGroupCount * lws[0] + 1; auto retVal = enqueueNDCount(); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenSyncBufferFullWhenEnqueuingKernelThenNewBufferIsAllocated) { patchAllocateSyncBuffer(); enqueueNDCount(); auto syncBufferHandler = getSyncBufferHandler(); syncBufferHandler->usedBufferSize = syncBufferHandler->bufferSize; enqueueNDCount(); EXPECT_EQ(workItemsCount, syncBufferHandler->usedBufferSize); } HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenSshRequiredWhenPatchingSyncBufferThenSshIsProperlyPatched) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; kernelInternals->kernelInfo.usesSsh = true; kernelInternals->kernelInfo.requiresSshForBuffers = true; patchAllocateSyncBuffer(); pClDevice->allocateSyncBufferHandler(); auto syncBufferHandler = getSyncBufferHandler(); auto surfaceState = reinterpret_cast(ptrOffset(kernel->getSurfaceStateHeap(), sPatchAllocateSyncBuffer.SurfaceStateHeapOffset)); auto bufferAddress = syncBufferHandler->graphicsAllocation->getGpuAddress(); surfaceState->setSurfaceBaseAddress(bufferAddress + 1); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_NE(bufferAddress, surfaceAddress); kernel->patchSyncBuffer(commandQueue->getDevice(), syncBufferHandler->graphicsAllocation, syncBufferHandler->usedBufferSize); surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(bufferAddress, surfaceAddress); } TEST(SyncBufferHandlerDeviceTest, GivenRootDeviceWhenAllocateSyncBufferIsCalledTwiceThenTheObjectIsCreatedOnlyOnce) { const size_t testUsedBufferSize = 100; MockClDevice rootDevice{new MockDevice}; rootDevice.allocateSyncBufferHandler(); auto syncBufferHandler = reinterpret_cast(rootDevice.syncBufferHandler.get()); ASSERT_NE(syncBufferHandler->usedBufferSize, testUsedBufferSize); syncBufferHandler->usedBufferSize = testUsedBufferSize; rootDevice.allocateSyncBufferHandler(); syncBufferHandler = reinterpret_cast(rootDevice.syncBufferHandler.get()); EXPECT_EQ(testUsedBufferSize, syncBufferHandler->usedBufferSize); } TEST(SyncBufferHandlerDeviceTest, GivenSubDeviceWhenAllocateSyncBufferIsCalledTwiceThenTheObjectIsCreatedOnlyOnce) { const size_t testUsedBufferSize = 100; MockClDevice rootDevice{new MockDevice}; ClDevice subDevice{*rootDevice.createSubDevice(0), platform()}; subDevice.allocateSyncBufferHandler(); auto syncBufferHandler = reinterpret_cast(subDevice.syncBufferHandler.get()); ASSERT_NE(syncBufferHandler->usedBufferSize, testUsedBufferSize); syncBufferHandler->usedBufferSize = testUsedBufferSize; subDevice.allocateSyncBufferHandler(); syncBufferHandler = reinterpret_cast(subDevice.syncBufferHandler.get()); EXPECT_EQ(testUsedBufferSize, syncBufferHandler->usedBufferSize); } compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/work_group_size_tests.cpp000066400000000000000000000314551363734646600316420ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" #include "patch_shared.h" using namespace NEO; struct WorkGroupSizeBase { template size_t computeWalkerWorkItems(typename FamilyType::GPGPU_WALKER &pCmd) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; // Compute the SIMD lane mask size_t simd = pCmd.getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD32 ? 32 : pCmd.getSimdSize() == GPGPU_WALKER::SIMD_SIZE_SIMD16 ? 16 : 8; uint64_t simdMask = maxNBitValue(simd); // Mask off lanes based on the execution masks auto laneMaskRight = pCmd.getRightExecutionMask() & simdMask; auto lanesPerThreadX = 0; while (laneMaskRight) { lanesPerThreadX += laneMaskRight & 1; laneMaskRight >>= 1; } auto numWorkItems = ((pCmd.getThreadWidthCounterMaximum() - 1) * simd + lanesPerThreadX) * pCmd.getThreadGroupIdXDimension(); numWorkItems *= pCmd.getThreadGroupIdYDimension(); numWorkItems *= pCmd.getThreadGroupIdZDimension(); return numWorkItems; } template void verify(uint32_t simdSize, size_t dimX, size_t dimY, size_t dimZ) { size_t globalOffsets[] = {0, 0, 0}; size_t workItems[] = { dimX, dimY, dimZ}; int dims = (dimX > 1 ? 1 : 0) + (dimY > 1 ? 1 : 0) + (dimZ > 1 ? 1 : 0); size_t workGroupSize[3]; auto maxWorkGroupSize = 256u; if (DebugManager.flags.EnableComputeWorkSizeND.get()) { WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, simdSize, 0u, IGFX_GEN9_CORE, 32u, 0u, false, false); computeWorkgroupSizeND(wsInfo, workGroupSize, workItems, dims); } else { if (dims == 1) computeWorkgroupSize1D(maxWorkGroupSize, workGroupSize, workItems, simdSize); else computeWorkgroupSize2D(maxWorkGroupSize, workGroupSize, workItems, simdSize); } auto totalWorkItems = workItems[0] * workItems[1] * workItems[2]; auto localWorkItems = workGroupSize[0] * workGroupSize[1] * workGroupSize[2]; EXPECT_GT(localWorkItems, 0u); EXPECT_LE(localWorkItems, 256u); auto xRemainder = workItems[0] % workGroupSize[0]; auto yRemainder = workItems[1] % workGroupSize[1]; auto zRemainder = workItems[2] % workGroupSize[2]; //No remainders EXPECT_EQ(0u, xRemainder); EXPECT_EQ(0u, yRemainder); EXPECT_EQ(0u, zRemainder); //Now setup GPGPU Walker typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; GPGPU_WALKER pCmd = FamilyType::cmdInitGpgpuWalker; const size_t workGroupsStart[3] = {0, 0, 0}; const size_t workGroupsNum[3] = { Math::divideAndRoundUp(workItems[0], workGroupSize[0]), Math::divideAndRoundUp(workItems[1], workGroupSize[1]), Math::divideAndRoundUp(workItems[2], workGroupSize[2])}; const iOpenCL::SPatchThreadPayload threadPayload = {}; GpgpuWalkerHelper::setGpgpuWalkerThreadData(&pCmd, globalOffsets, workGroupsStart, workGroupsNum, workGroupSize, simdSize, dims, true, false, threadPayload, 0u); //And check if it is programmed correctly auto numWorkItems = computeWalkerWorkItems(pCmd); EXPECT_EQ(totalWorkItems, numWorkItems); if (xRemainder | yRemainder | zRemainder | (totalWorkItems != numWorkItems)) { std::stringstream regionString; regionString << "workItems = <" << workItems[0] << ", " << workItems[1] << ", " << workItems[2] << ">; "; regionString << "LWS = <" << workGroupSize[0] << ", " << workGroupSize[1] << ", " << workGroupSize[2] << ">; "; regionString << "thread = <" << pCmd.getThreadGroupIdXDimension() << ", " << pCmd.getThreadGroupIdYDimension() << ", " << pCmd.getThreadGroupIdZDimension() << ">; "; regionString << "threadWidth = " << std::dec << pCmd.getThreadWidthCounterMaximum() << std::dec << "; "; regionString << "rightMask = " << std::hex << pCmd.getRightExecutionMask() << std::dec << "; "; EXPECT_FALSE(true) << regionString.str(); } } }; struct WorkGroupSizeChannels : public WorkGroupSizeBase, public ::testing::TestWithParam> { }; HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, allChannelsWithEnableComputeWorkSizeNDDefault) { uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, workDim, workDim, workDim); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, allChannelsWithEnableComputeWorkSizeNDEnabled) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(true); uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, workDim, workDim, workDim); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, allChannelsWithEnableComputeWorkSizeNDDisabled) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(false); uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, workDim, workDim, workDim); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, allChannelsWithEnableComputeWorkSizeSquaredDefault) { uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, workDim, workDim, workDim); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, allChannelsWithEnableComputeWorkSizeSquaredEnabled) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(true); DebugManager.flags.EnableComputeWorkSizeND.set(false); uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, workDim, workDim, workDim); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, allChannelsWithEnableComputeWorkSizeSquaredDisabled) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(false); DebugManager.flags.EnableComputeWorkSizeND.set(false); uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, workDim, workDim, workDim); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, justXWithEnableComputeWorkSizeNDDefault) { uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, workDim, 1, 1); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, justXWithEnableComputeWorkSizeNDEnabled) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(true); uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, workDim, 1, 1); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, justXWithEnableComputeWorkSizeNDDisabled) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(false); uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, workDim, 1, 1); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, justYWithEnableComputeWorkSizeNDDefault) { uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, 1, workDim, 1); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, justYWithEnableComputeWorkSizeNDEnalbed) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(true); uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, 1, workDim, 1); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, justYWithEnableComputeWorkSizeNDDisabled) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(false); uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, 1, workDim, 1); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, justZWithEnableComputeWorkSizeNDDefault) { uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, 1, 1, workDim); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, justZWithEnableComputeWorkSizeNDEnabled) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(true); uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, 1, 1, workDim); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeChannels, justZWithEnableComputeWorkSizeNDDisabled) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(false); uint32_t simdSize; size_t workDim; std::tie(simdSize, workDim) = GetParam(); verify(simdSize, 1, 1, workDim); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } static uint32_t simdSizes[] = { 8, 16, 32}; static size_t workItemCases1D[] = { 1, 2, 3, 4, 5, 7, 8, 9, 15, 16, 17, 31, 32, 33, 63, 64, 65, 127, 128, 129, 189, 190, 191, 255, 256, 257, 399, 400, 401, 511, 512, 513, 1007, 1008, 1009, 1023, 1024, 1025, 1400, 1401, 1402}; INSTANTIATE_TEST_CASE_P(wgs, WorkGroupSizeChannels, ::testing::Combine( ::testing::ValuesIn(simdSizes), ::testing::ValuesIn(workItemCases1D))); // ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== // ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== struct WorkGroupSize2D : public WorkGroupSizeBase, public ::testing::TestWithParam> { }; HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSize2D, XY) { uint32_t simdSize; size_t dimX, dimY; std::tie(simdSize, dimX, dimY) = GetParam(); verify(simdSize, dimX, dimY, 1); } static size_t workItemCases2D[] = {1, 2, 3, 7, 15, 31, 63, 127, 255, 511, 1007, 1023, 2047}; INSTANTIATE_TEST_CASE_P(wgs, WorkGroupSize2D, ::testing::Combine( ::testing::ValuesIn(simdSizes), ::testing::ValuesIn(workItemCases2D), ::testing::ValuesIn(workItemCases2D))); // ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== // ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== ==== struct Region { size_t r[3]; }; struct WorkGroupSizeRegion : public WorkGroupSizeBase, public ::testing::TestWithParam> { }; HWCMDTEST_P(IGFX_GEN8_CORE, WorkGroupSizeRegion, allChannels) { uint32_t simdSize; Region region; std::tie(simdSize, region) = GetParam(); verify(simdSize, region.r[0], region.r[1], region.r[2]); } Region regionCases[] = { {{1, 1, 1}}, // Trivial case {{9, 9, 10}} // This test case was hit by some AUBCopyBufferRect regressions }; INSTANTIATE_TEST_CASE_P(wgs, WorkGroupSizeRegion, ::testing::Combine( ::testing::ValuesIn(simdSizes), ::testing::ValuesIn(regionCases))); compute-runtime-20.13.16352/opencl/test/unit_test/command_queue/zero_size_enqueue_tests.cpp000066400000000000000000001306251363734646600321510ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" using namespace NEO; class ZeroSizeEnqueueHandlerTest : public Test { public: MockContext context; cl_int retVal; }; class ZeroSizeEnqueueHandlerTestZeroGws : public ZeroSizeEnqueueHandlerTest { public: void SetUp() override { ZeroSizeEnqueueHandlerTest::SetUp(); testGwsInputs[0] = std::make_tuple(1, nullptr); testGwsInputs[1] = std::make_tuple(2, nullptr); testGwsInputs[2] = std::make_tuple(3, nullptr); testGwsInputs[3] = std::make_tuple(1, zeroGWS0); testGwsInputs[4] = std::make_tuple(2, zeroGWS00); testGwsInputs[5] = std::make_tuple(2, zeroGWS01); testGwsInputs[6] = std::make_tuple(2, zeroGWS10); testGwsInputs[7] = std::make_tuple(3, zeroGWS000); testGwsInputs[8] = std::make_tuple(3, zeroGWS011); testGwsInputs[9] = std::make_tuple(3, zeroGWS101); testGwsInputs[10] = std::make_tuple(3, zeroGWS110); testGwsInputs[11] = std::make_tuple(3, zeroGWS001); testGwsInputs[12] = std::make_tuple(3, zeroGWS010); testGwsInputs[13] = std::make_tuple(3, zeroGWS100); } size_t zeroGWS0[1] = {0}; size_t zeroGWS00[2] = {0, 0}; size_t zeroGWS01[2] = {0, 1}; size_t zeroGWS10[2] = {1, 0}; size_t zeroGWS000[3] = {0, 0, 0}; size_t zeroGWS011[3] = {0, 1, 1}; size_t zeroGWS101[3] = {1, 0, 1}; size_t zeroGWS110[3] = {1, 1, 0}; size_t zeroGWS001[3] = {0, 0, 1}; size_t zeroGWS010[3] = {0, 1, 0}; size_t zeroGWS100[3] = {1, 0, 0}; std::tuple testGwsInputs[14]; }; HWTEST_F(ZeroSizeEnqueueHandlerTestZeroGws, GivenZeroSizeEnqueueIsDetectedAndOpenClAtLeast21WhenEnqueingKernelThenCommandMarkerShouldBeEnqueued) { pClDevice->enabledClVersion = 21; auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); MockKernelWithInternals mockKernel(*pClDevice); for (auto testInput : testGwsInputs) { auto workDim = std::get<0>(testInput); auto gws = std::get<1>(testInput); mockCmdQ->lastCommandType = static_cast(CL_COMMAND_COPY_BUFFER); retVal = mockCmdQ->enqueueKernel(mockKernel.mockKernel, workDim, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } } HWTEST_F(ZeroSizeEnqueueHandlerTestZeroGws, GivenZeroSizeEnqueueIsDetectedAndOpenClIs20OrOlderWhenEnqueingKernelThenErrorIsReturned) { int oclVersionsToTest[] = {12, 20}; for (auto oclVersion : oclVersionsToTest) { pClDevice->enabledClVersion = oclVersion; auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); MockKernelWithInternals mockKernel(*pClDevice); for (auto testInput : testGwsInputs) { auto workDim = std::get<0>(testInput); auto gws = std::get<1>(testInput); mockCmdQ->lastCommandType = static_cast(CL_COMMAND_COPY_BUFFER); retVal = mockCmdQ->enqueueKernel(mockKernel.mockKernel, workDim, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_GLOBAL_WORK_SIZE, retVal); EXPECT_EQ(static_cast(CL_COMMAND_COPY_BUFFER), mockCmdQ->lastCommandType); } } } HWTEST_F(ZeroSizeEnqueueHandlerTestZeroGws, GivenZeroSizeEnqueueIsDetectedAndLocalWorkSizeIsSetWhenEnqueingKernelThenNoExceptionIsThrown) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); MockKernelWithInternals mockKernel(*pClDevice); mockKernel.mockProgram->setAllowNonUniform(true); auto workDim = 1; auto gws = zeroGWS0; size_t lws[1] = {1}; EXPECT_NO_THROW(retVal = mockCmdQ->enqueueKernel(mockKernel.mockKernel, workDim, nullptr, gws, lws, 0, nullptr, nullptr)); auto expected = (pClDevice->getEnabledClVersion() < 21 ? CL_INVALID_GLOBAL_WORK_SIZE : CL_SUCCESS); EXPECT_EQ(expected, retVal); } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenEnqueingKernelThenEventCommandTypeShoudBeUnchanged) { if (pClDevice->getEnabledClVersion() < 21) { return; } auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; MockKernelWithInternals mockKernel(*pClDevice); size_t zeroGWS[] = {0, 0, 0}; mockCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, zeroGWS, nullptr, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_NDRANGE_KERNEL), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenReadingBufferThenCommandMarkerShouldBeEnqueued) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); MockBuffer buffer; size_t memory[1]; size_t zeroSize = 0; mockCmdQ->enqueueReadBuffer(&buffer, CL_FALSE, 0, zeroSize, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenReadingBufferThenEventCommandTypeShouldBeUnchanged) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; MockBuffer buffer; size_t memory[1]; size_t zeroSize = 0; mockCmdQ->enqueueReadBuffer(&buffer, CL_FALSE, 0, zeroSize, memory, nullptr, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_READ_BUFFER), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenReadingBufferRectThenCommandMarkerShouldBeEnqueued) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); MockBuffer buffer; size_t memory[1]; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t zeroRegion000[] = {0, 0, 0}; mockCmdQ->enqueueReadBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion000, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion011[] = {0, 1, 1}; mockCmdQ->enqueueReadBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion011, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion101[] = {1, 0, 1}; mockCmdQ->enqueueReadBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion101, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion110[] = {1, 1, 0}; mockCmdQ->enqueueReadBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion110, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion001[] = {0, 0, 1}; mockCmdQ->enqueueReadBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion001, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion010[] = {0, 1, 0}; mockCmdQ->enqueueReadBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion010, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion100[] = {1, 0, 0}; mockCmdQ->enqueueReadBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion100, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenReadingBufferRectThenEventCommandTypeShouldBeUnchanged) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; MockBuffer buffer; size_t memory[1]; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t zeroRegion[] = {0, 0, 0}; mockCmdQ->enqueueReadBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion, 0, 0, 0, 0, memory, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_READ_BUFFER_RECT), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenWritingBufferThenCommandMarkerShouldBeEnqueued) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); MockBuffer buffer; size_t memory[1]; size_t zeroSize = 0; mockCmdQ->enqueueWriteBuffer(&buffer, CL_FALSE, 0, zeroSize, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenWritingBufferThenEventCommandTypeShouldBeUnchanged) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; MockBuffer buffer; size_t memory[1]; size_t zeroSize = 0; mockCmdQ->enqueueWriteBuffer(&buffer, CL_FALSE, 0, zeroSize, memory, nullptr, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_WRITE_BUFFER), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenWritingBufferRectThenCommandMarkerShouldBeEnqueued) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); MockBuffer buffer; size_t memory[1]; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t zeroRegion000[] = {0, 0, 0}; mockCmdQ->enqueueWriteBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion000, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion011[] = {0, 1, 1}; mockCmdQ->enqueueWriteBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion011, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion101[] = {1, 0, 1}; mockCmdQ->enqueueWriteBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion101, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion110[] = {1, 1, 0}; mockCmdQ->enqueueWriteBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion110, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion001[] = {0, 0, 1}; mockCmdQ->enqueueWriteBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion001, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion010[] = {0, 1, 0}; mockCmdQ->enqueueWriteBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion010, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion100[] = {1, 0, 0}; mockCmdQ->enqueueWriteBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion100, 0, 0, 0, 0, memory, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenWritingBufferRectThenEventCommandTypeShouldBeUnchanged) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; MockBuffer buffer; size_t memory[1]; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t zeroRegion[] = {0, 0, 0}; mockCmdQ->enqueueWriteBufferRect(&buffer, CL_FALSE, bufferOrigin, hostOrigin, zeroRegion, 0, 0, 0, 0, memory, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_WRITE_BUFFER_RECT), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenCopyingBufferThenCommandMarkerShouldBeEnqueued) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); MockBuffer srcBuffer; MockBuffer dstBuffer; size_t zeroSize = 0; mockCmdQ->enqueueCopyBuffer(&srcBuffer, &dstBuffer, 0, 0, zeroSize, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenCopyingBufferThenEventCommandTypeShouldBeUnchanged) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; MockBuffer srcBuffer; MockBuffer dstBuffer; size_t zeroSize = 0; mockCmdQ->enqueueCopyBuffer(&srcBuffer, &dstBuffer, 0, 0, zeroSize, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_COPY_BUFFER), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenCopyingBufferRectThenCommandMarkerShouldBeEnqueued) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); MockBuffer srcBuffer; MockBuffer dstBuffer; size_t srcOrigin[3] = {1024u, 1, 0}; size_t dstOrigin[3] = {1024u, 1, 0}; size_t zeroRegion000[3] = {0, 0, 0}; mockCmdQ->enqueueCopyBufferRect(&srcBuffer, &dstBuffer, srcOrigin, dstOrigin, zeroRegion000, 0, 0, 0, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion011[3] = {0, 1, 1}; mockCmdQ->enqueueCopyBufferRect(&srcBuffer, &dstBuffer, srcOrigin, dstOrigin, zeroRegion011, 0, 0, 0, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion101[3] = {1, 0, 1}; mockCmdQ->enqueueCopyBufferRect(&srcBuffer, &dstBuffer, srcOrigin, dstOrigin, zeroRegion101, 0, 0, 0, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion110[3] = {1, 1, 0}; mockCmdQ->enqueueCopyBufferRect(&srcBuffer, &dstBuffer, srcOrigin, dstOrigin, zeroRegion110, 0, 0, 0, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion001[3] = {0, 0, 1}; mockCmdQ->enqueueCopyBufferRect(&srcBuffer, &dstBuffer, srcOrigin, dstOrigin, zeroRegion001, 0, 0, 0, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion010[3] = {0, 1, 0}; mockCmdQ->enqueueCopyBufferRect(&srcBuffer, &dstBuffer, srcOrigin, dstOrigin, zeroRegion010, 0, 0, 0, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion100[3] = {1, 0, 0}; mockCmdQ->enqueueCopyBufferRect(&srcBuffer, &dstBuffer, srcOrigin, dstOrigin, zeroRegion100, 0, 0, 0, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } HWTEST_F(ZeroSizeEnqueueHandlerTest, WhenCopyingBufferZeroSizeEnqueueIsDetectedWhenCopyingBufferRectThenEventCommandTypeShouldBeUnchanged) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; MockBuffer srcBuffer; MockBuffer dstBuffer; size_t srcOrigin[3] = {1024u, 1, 0}; size_t dstOrigin[3] = {1024u, 1, 0}; size_t zeroRegion[3] = {0, 0, 0}; mockCmdQ->enqueueCopyBufferRect(&srcBuffer, &dstBuffer, srcOrigin, dstOrigin, zeroRegion, 0, 0, 0, 0, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_COPY_BUFFER_RECT), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenFillingBufferThenCommandMarkerShouldBeEnqueued) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); MockBuffer buffer; cl_int pattern = 0xDEADBEEF; size_t zeroSize = 0; mockCmdQ->enqueueFillBuffer(&buffer, &pattern, sizeof(pattern), 0, zeroSize, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenFillingBufferTheEventCommandTypeShouldBeUnchanged) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; MockBuffer buffer; cl_int pattern = 0xDEADBEEF; size_t zeroSize = 0; mockCmdQ->enqueueFillBuffer(&buffer, &pattern, sizeof(pattern), 0, zeroSize, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_FILL_BUFFER), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenReadingImageThenCommandMarkerShouldBeEnqueued) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); std::unique_ptr image(Image2dHelper<>::create(&context)); size_t memory[1]; size_t origin[3] = {1024u, 1, 0}; size_t zeroRegion000[3] = {0, 0, 0}; mockCmdQ->enqueueReadImage(image.get(), CL_FALSE, origin, zeroRegion000, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion011[3] = {0, 1, 1}; mockCmdQ->enqueueReadImage(image.get(), CL_FALSE, origin, zeroRegion011, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion101[3] = {1, 0, 1}; mockCmdQ->enqueueReadImage(image.get(), CL_FALSE, origin, zeroRegion101, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion110[3] = {1, 1, 0}; mockCmdQ->enqueueReadImage(image.get(), CL_FALSE, origin, zeroRegion110, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion001[3] = {0, 0, 1}; mockCmdQ->enqueueReadImage(image.get(), CL_FALSE, origin, zeroRegion001, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion010[3] = {0, 1, 0}; mockCmdQ->enqueueReadImage(image.get(), CL_FALSE, origin, zeroRegion010, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion100[3] = {1, 0, 0}; mockCmdQ->enqueueReadImage(image.get(), CL_FALSE, origin, zeroRegion100, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenReadingImageThenEventCommandTypeShouldBeUnchanged) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; std::unique_ptr image(Image2dHelper<>::create(&context)); size_t memory[1]; size_t origin[3] = {1024u, 1, 0}; size_t zeroRegion[3] = {0, 0, 0}; mockCmdQ->enqueueReadImage(image.get(), CL_FALSE, origin, zeroRegion, 0, 0, memory, nullptr, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_READ_IMAGE), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenWritingImageThenCommandMarkerShouldBeEnqueued) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); std::unique_ptr image(Image2dHelper<>::create(&context)); size_t memory[1]; size_t origin[3] = {1024u, 1, 0}; size_t zeroRegion000[3] = {0, 0, 0}; mockCmdQ->enqueueWriteImage(image.get(), CL_FALSE, origin, zeroRegion000, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion011[3] = {0, 1, 1}; mockCmdQ->enqueueWriteImage(image.get(), CL_FALSE, origin, zeroRegion011, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion101[3] = {1, 0, 1}; mockCmdQ->enqueueWriteImage(image.get(), CL_FALSE, origin, zeroRegion101, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion110[3] = {1, 1, 0}; mockCmdQ->enqueueWriteImage(image.get(), CL_FALSE, origin, zeroRegion110, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion001[3] = {0, 0, 1}; mockCmdQ->enqueueWriteImage(image.get(), CL_FALSE, origin, zeroRegion001, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion010[3] = {0, 1, 0}; mockCmdQ->enqueueWriteImage(image.get(), CL_FALSE, origin, zeroRegion010, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion100[3] = {1, 0, 0}; mockCmdQ->enqueueWriteImage(image.get(), CL_FALSE, origin, zeroRegion100, 0, 0, memory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenWritingImageThenEventCommandTypeShouldBeUnchanged) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; std::unique_ptr image(Image2dHelper<>::create(&context)); size_t memory[1]; size_t origin[3] = {1024u, 1, 0}; size_t zeroRegion[3] = {0, 0, 0}; mockCmdQ->enqueueWriteImage(image.get(), CL_FALSE, origin, zeroRegion, 0, 0, memory, nullptr, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_WRITE_IMAGE), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenCopyingImageThenCommandMarkerShouldBeEnqueued) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); std::unique_ptr srcImage(Image2dHelper<>::create(&context)); std::unique_ptr dstImage(Image2dHelper<>::create(&context)); size_t srcOrigin[3] = {1024u, 1, 0}; size_t dstOrigin[3] = {1024u, 1, 0}; size_t zeroRegion000[3] = {0, 0, 0}; mockCmdQ->enqueueCopyImage(srcImage.get(), dstImage.get(), srcOrigin, dstOrigin, zeroRegion000, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion011[3] = {0, 1, 1}; mockCmdQ->enqueueCopyImage(srcImage.get(), dstImage.get(), srcOrigin, dstOrigin, zeroRegion011, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion101[3] = {1, 0, 1}; mockCmdQ->enqueueCopyImage(srcImage.get(), dstImage.get(), srcOrigin, dstOrigin, zeroRegion101, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion110[3] = {1, 1, 0}; mockCmdQ->enqueueCopyImage(srcImage.get(), dstImage.get(), srcOrigin, dstOrigin, zeroRegion110, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion001[3] = {0, 0, 1}; mockCmdQ->enqueueCopyImage(srcImage.get(), dstImage.get(), srcOrigin, dstOrigin, zeroRegion001, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion010[3] = {0, 1, 0}; mockCmdQ->enqueueCopyImage(srcImage.get(), dstImage.get(), srcOrigin, dstOrigin, zeroRegion010, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion100[3] = {1, 0, 0}; mockCmdQ->enqueueCopyImage(srcImage.get(), dstImage.get(), srcOrigin, dstOrigin, zeroRegion100, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenCopyingImageThenEventCommandTypeShouldBeUnchanged) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; std::unique_ptr srcImage(Image2dHelper<>::create(&context)); std::unique_ptr dstImage(Image2dHelper<>::create(&context)); size_t srcOrigin[3] = {1024u, 1, 0}; size_t dstOrigin[3] = {1024u, 1, 0}; size_t zeroRegion[3] = {0, 0, 0}; mockCmdQ->enqueueCopyImage(srcImage.get(), dstImage.get(), srcOrigin, dstOrigin, zeroRegion, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_COPY_IMAGE), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenCopyingImageToBufferThenCommandMarkerShouldBeEnqueued) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); std::unique_ptr srcImage(Image2dHelper<>::create(&context)); std::unique_ptr dstBuffer(Buffer::create(&context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); size_t srcOrigin[3] = {1024u, 1, 0}; size_t zeroRegion000[3] = {0, 0, 0}; mockCmdQ->enqueueCopyImageToBuffer(srcImage.get(), dstBuffer.get(), srcOrigin, zeroRegion000, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion011[3] = {0, 1, 1}; mockCmdQ->enqueueCopyImageToBuffer(srcImage.get(), dstBuffer.get(), srcOrigin, zeroRegion011, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion101[3] = {1, 0, 1}; mockCmdQ->enqueueCopyImageToBuffer(srcImage.get(), dstBuffer.get(), srcOrigin, zeroRegion101, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion110[3] = {1, 1, 0}; mockCmdQ->enqueueCopyImageToBuffer(srcImage.get(), dstBuffer.get(), srcOrigin, zeroRegion110, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion001[3] = {0, 0, 1}; mockCmdQ->enqueueCopyImageToBuffer(srcImage.get(), dstBuffer.get(), srcOrigin, zeroRegion001, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion010[3] = {0, 1, 0}; mockCmdQ->enqueueCopyImageToBuffer(srcImage.get(), dstBuffer.get(), srcOrigin, zeroRegion010, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion100[3] = {1, 0, 0}; mockCmdQ->enqueueCopyImageToBuffer(srcImage.get(), dstBuffer.get(), srcOrigin, zeroRegion100, 0, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenCopyingImageToBufferThenEventCommandTypeShouldBeUnchanged) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; std::unique_ptr srcImage(Image2dHelper<>::create(&context)); std::unique_ptr dstBuffer(Buffer::create(&context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); size_t srcOrigin[3] = {1024u, 1, 0}; size_t zeroRegion[3] = {0, 0, 0}; mockCmdQ->enqueueCopyImageToBuffer(srcImage.get(), dstBuffer.get(), srcOrigin, zeroRegion, 0, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_COPY_IMAGE_TO_BUFFER), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenCopyingBufferToImageThenCommandMarkerShouldBeEnqueued) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); std::unique_ptr srcBuffer(Buffer::create(&context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); std::unique_ptr dstImage(Image2dHelper<>::create(&context)); size_t dstOrigin[3] = {1024u, 1, 0}; size_t zeroRegion000[3] = {0, 0, 0}; mockCmdQ->enqueueCopyBufferToImage(srcBuffer.get(), dstImage.get(), 0, dstOrigin, zeroRegion000, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion011[3] = {0, 1, 1}; mockCmdQ->enqueueCopyBufferToImage(srcBuffer.get(), dstImage.get(), 0, dstOrigin, zeroRegion011, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion101[3] = {1, 0, 1}; mockCmdQ->enqueueCopyBufferToImage(srcBuffer.get(), dstImage.get(), 0, dstOrigin, zeroRegion101, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion110[3] = {1, 1, 0}; mockCmdQ->enqueueCopyBufferToImage(srcBuffer.get(), dstImage.get(), 0, dstOrigin, zeroRegion110, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion001[3] = {0, 0, 1}; mockCmdQ->enqueueCopyBufferToImage(srcBuffer.get(), dstImage.get(), 0, dstOrigin, zeroRegion001, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion010[3] = {0, 1, 0}; mockCmdQ->enqueueCopyBufferToImage(srcBuffer.get(), dstImage.get(), 0, dstOrigin, zeroRegion010, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion100[3] = {1, 0, 0}; mockCmdQ->enqueueCopyBufferToImage(srcBuffer.get(), dstImage.get(), 0, dstOrigin, zeroRegion100, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenCopyingBufferToImageThenEventCommandTypeShouldBeUnchanged) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; std::unique_ptr srcBuffer(Buffer::create(&context, CL_MEM_READ_WRITE, 1024u, nullptr, retVal)); std::unique_ptr dstImage(Image2dHelper<>::create(&context)); size_t dstOrigin[3] = {1024u, 1, 0}; size_t zeroRegion[3] = {0, 0, 0}; mockCmdQ->enqueueCopyBufferToImage(srcBuffer.get(), dstImage.get(), 0, dstOrigin, zeroRegion, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_COPY_BUFFER_TO_IMAGE), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenFillingImageThenCommandMarkerShouldBeEnqueued) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); std::unique_ptr image(Image2dHelper<>::create(&context)); size_t origin[3] = {1024u, 1, 1}; int32_t fillColor[4] = {0xCC, 0xCC, 0xCC, 0xCC}; size_t zeroRegion000[3] = {0, 0, 0}; mockCmdQ->enqueueFillImage(image.get(), &fillColor, origin, zeroRegion000, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion011[3] = {0, 1, 1}; mockCmdQ->enqueueFillImage(image.get(), &fillColor, origin, zeroRegion011, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion101[3] = {1, 0, 1}; mockCmdQ->enqueueFillImage(image.get(), &fillColor, origin, zeroRegion101, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion110[3] = {1, 1, 0}; mockCmdQ->enqueueFillImage(image.get(), &fillColor, origin, zeroRegion110, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion001[3] = {0, 0, 1}; mockCmdQ->enqueueFillImage(image.get(), &fillColor, origin, zeroRegion001, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion010[3] = {0, 1, 0}; mockCmdQ->enqueueFillImage(image.get(), &fillColor, origin, zeroRegion010, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); size_t zeroRegion100[3] = {1, 0, 0}; mockCmdQ->enqueueFillImage(image.get(), &fillColor, origin, zeroRegion100, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenFillingImageThenEventCommandTypeShouldBeUnchanged) { auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; std::unique_ptr image(Image2dHelper<>::create(&context)); size_t origin[3] = {1024u, 1, 1}; int32_t fillColor[4] = {0xCC, 0xCC, 0xCC, 0xCC}; size_t zeroRegion[3] = {0, 0, 0}; mockCmdQ->enqueueFillImage(image.get(), &fillColor, origin, zeroRegion, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_FILL_IMAGE), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenCopyingSvmMemThenCommandMarkerShouldBeEnqueued) { if (pDevice->getHardwareInfo().capabilityTable.ftrSvm == false) { GTEST_SKIP(); } auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); void *pSrcSVM = context.getSVMAllocsManager()->createSVMAlloc(pDevice->getRootDeviceIndex(), 256, {}); void *pDstSVM = context.getSVMAllocsManager()->createSVMAlloc(pDevice->getRootDeviceIndex(), 256, {}); size_t zeroSize = 0; mockCmdQ->enqueueSVMMemcpy(false, pSrcSVM, pDstSVM, zeroSize, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); context.getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); context.getSVMAllocsManager()->freeSVMAlloc(pDstSVM); } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenCopyingSvmMemThenEventCommandTypeShouldBeUnchanged) { if (pDevice->getHardwareInfo().capabilityTable.ftrSvm == false) { GTEST_SKIP(); } auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; void *pSrcSVM = context.getSVMAllocsManager()->createSVMAlloc(pDevice->getRootDeviceIndex(), 256, {}); void *pDstSVM = context.getSVMAllocsManager()->createSVMAlloc(pDevice->getRootDeviceIndex(), 256, {}); size_t zeroSize = 0; mockCmdQ->enqueueSVMMemcpy(false, pSrcSVM, pDstSVM, zeroSize, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_SVM_MEMCPY), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; context.getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); context.getSVMAllocsManager()->freeSVMAlloc(pDstSVM); } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenFillingSvmMemThenCommandMarkerShouldBeEnqueued) { if (pDevice->getHardwareInfo().capabilityTable.ftrSvm == false) { GTEST_SKIP(); } auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); void *pSVM = context.getSVMAllocsManager()->createSVMAlloc(pDevice->getRootDeviceIndex(), 256, {}); const float pattern[1] = {1.2345f}; size_t zeroSize = 0; mockCmdQ->enqueueSVMMemFill(pSVM, &pattern, sizeof(pattern), zeroSize, 0, nullptr, nullptr); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); context.getSVMAllocsManager()->freeSVMAlloc(pSVM); } HWTEST_F(ZeroSizeEnqueueHandlerTest, GivenZeroSizeEnqueueIsDetectedWhenFillingSvmMemThenEventCommandTypeShouldBeUnchanged) { if (pDevice->getHardwareInfo().capabilityTable.ftrSvm == false) { GTEST_SKIP(); } auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(&context, pClDevice, 0)); cl_event event; void *pSVM = context.getSVMAllocsManager()->createSVMAlloc(pDevice->getRootDeviceIndex(), 256, {}); const float pattern[1] = {1.2345f}; size_t zeroSize = 0; mockCmdQ->enqueueSVMMemFill(pSVM, &pattern, sizeof(pattern), zeroSize, 0, nullptr, &event); EXPECT_EQ(static_cast(CL_COMMAND_MARKER), mockCmdQ->lastCommandType); auto pEvent = (Event *)event; cl_command_type cmdType = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(CL_COMMAND_SVM_MEMFILL), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); delete pEvent; context.getSVMAllocsManager()->freeSVMAlloc(pSVM); } compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/000077500000000000000000000000001363734646600246235ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/CMakeLists.txt000066400000000000000000000042011363734646600273600ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_command_stream ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_1_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_2_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_file_stream_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_subcapture_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmd_parse_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_flush_task_1_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_flush_task_2_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_flush_task_3_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_flush_task_gmock_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_with_aub_dump_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/create_command_stream_receiver_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/get_devices_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/experimental_command_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/linear_stream_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/linear_stream_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/submissions_aggregator_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tbx_stream_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/thread_arbitration_policy_helper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/thread_arbitration_policy_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/compute_mode_tests.h ) get_property(NEO_CORE_SRCS_tests_preemption GLOBAL PROPERTY NEO_CORE_SRCS_tests_preemption) list(APPEND IGDRCL_SRCS_tests_command_stream ${NEO_CORE_SRCS_tests_preemption} ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_command_stream}) add_subdirectories() aub_command_stream_receiver_1_tests.cpp000066400000000000000000002177411363734646600344320ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/aub_mem_dump/aub_alloc_dump.h" #include "opencl/source/aub_mem_dump/page_table_entry_bits.h" #include "opencl/source/helpers/hardware_context_controller.h" #include "opencl/test/unit_test/fixtures/aub_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/mock_aub_center_fixture.h" #include "opencl/test/unit_test/libult/ult_aub_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_aub_center.h" #include "opencl/test/unit_test/mocks/mock_aub_csr.h" #include "opencl/test/unit_test/mocks/mock_aub_manager.h" #include "opencl/test/unit_test/mocks/mock_aub_subcapture_manager.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_gmm.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_os_context.h" #include "test.h" using namespace NEO; using AubCommandStreamReceiverTests = Test; template struct MockAubCsrToTestDumpAubNonWritable : public AUBCommandStreamReceiverHw { using AUBCommandStreamReceiverHw::AUBCommandStreamReceiverHw; using AUBCommandStreamReceiverHw::dumpAubNonWritable; bool writeMemory(GraphicsAllocation &gfxAllocation) override { return true; } }; TEST_F(AubCommandStreamReceiverTests, givenStructureWhenMisalignedUint64ThenUseSetterGetterFunctionsToSetGetValue) { const uint64_t value = 0x0123456789ABCDEFu; AubMemDump::AubCaptureBinaryDumpHD aubCaptureBinaryDumpHD{}; aubCaptureBinaryDumpHD.setBaseAddr(value); EXPECT_EQ(value, aubCaptureBinaryDumpHD.getBaseAddr()); aubCaptureBinaryDumpHD.setWidth(value); EXPECT_EQ(value, aubCaptureBinaryDumpHD.getWidth()); aubCaptureBinaryDumpHD.setHeight(value); EXPECT_EQ(value, aubCaptureBinaryDumpHD.getHeight()); aubCaptureBinaryDumpHD.setPitch(value); EXPECT_EQ(value, aubCaptureBinaryDumpHD.getPitch()); AubMemDump::AubCmdDumpBmpHd aubCmdDumpBmpHd{}; aubCmdDumpBmpHd.setBaseAddr(value); EXPECT_EQ(value, aubCmdDumpBmpHd.getBaseAddr()); AubMemDump::CmdServicesMemTraceDumpCompress cmdServicesMemTraceDumpCompress{}; cmdServicesMemTraceDumpCompress.setSurfaceAddress(value); EXPECT_EQ(value, cmdServicesMemTraceDumpCompress.getSurfaceAddress()); } TEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenItIsCreatedWithWrongGfxCoreFamilyThenNullPointerShouldBeReturned) { HardwareInfo *hwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->platform.eRenderCoreFamily = GFXCORE_FAMILY_FORCE_ULONG; // wrong gfx core family CommandStreamReceiver *aubCsr = AUBCommandStreamReceiver::create("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); EXPECT_EQ(nullptr, aubCsr); } TEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenTypeIsCheckedThenAubCsrIsReturned) { std::unique_ptr aubCsr(AUBCommandStreamReceiver::create("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); EXPECT_NE(nullptr, aubCsr); EXPECT_EQ(CommandStreamReceiverType::CSR_AUB, aubCsr->getType()); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenItIsCreatedThenAubManagerAndHardwareContextAreNull) { DebugManagerStateRestore restorer; DebugManager.flags.UseAubStream.set(false); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get(), false, 1); executionEnvironment.initializeMemoryManager(); auto aubCsr = std::make_unique>("", true, executionEnvironment, 0); ASSERT_NE(nullptr, aubCsr); EXPECT_EQ(nullptr, aubCsr->aubManager); EXPECT_EQ(nullptr, aubCsr->hardwareContextController); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCsrWhenItIsCreatedWithDefaultSettingsThenItHasBatchedDispatchModeEnabled) { DebugManagerStateRestore stateRestore; DebugManager.flags.CsrDispatchMode.set(0); std::unique_ptr> aubCsr(new MockAubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); EXPECT_EQ(DispatchMode::BatchedDispatch, aubCsr->peekDispatchMode()); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCsrWhenItIsCreatedWithDebugSettingsThenItHasProperDispatchModeEnabled) { DebugManagerStateRestore stateRestore; DebugManager.flags.CsrDispatchMode.set(static_cast(DispatchMode::ImmediateDispatch)); std::unique_ptr> aubCsr(new MockAubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); EXPECT_EQ(DispatchMode::ImmediateDispatch, aubCsr->peekDispatchMode()); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenItIsCreatedThenMemoryManagerIsNotNull) { std::unique_ptr> aubCsr(new AUBCommandStreamReceiverHw("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); std::unique_ptr memoryManager(new OsAgnosticMemoryManager(*pDevice->executionEnvironment)); EXPECT_NE(nullptr, memoryManager.get()); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenMultipleInstancesAreCreatedThenTheyOperateOnSingleFileStream) { auto aubCsr1 = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); auto aubCsr2 = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); EXPECT_EQ(aubCsr1->stream, aubCsr2->stream); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenMultipleInstancesAreCreatedThenTheyUseTheSameFileStream) { auto aubCsr1 = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); auto streamProvider1 = pDevice->executionEnvironment->rootDeviceEnvironments[0]->aubCenter->getStreamProvider(); EXPECT_NE(nullptr, streamProvider1); auto aubCsr2 = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); auto streamProvider2 = pDevice->executionEnvironment->rootDeviceEnvironments[0]->aubCenter->getStreamProvider(); EXPECT_NE(nullptr, streamProvider2); EXPECT_EQ(streamProvider1, streamProvider2); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenMultipleInstancesAreCreatedThenTheyUseTheSamePhysicalAddressAllocator) { auto aubCsr1 = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); auto physicalAddressAlocator1 = pDevice->executionEnvironment->rootDeviceEnvironments[0]->aubCenter->getPhysicalAddressAllocator(); EXPECT_NE(nullptr, physicalAddressAlocator1); auto aubCsr2 = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); auto physicalAddressAlocator2 = pDevice->executionEnvironment->rootDeviceEnvironments[0]->aubCenter->getPhysicalAddressAllocator(); EXPECT_NE(nullptr, physicalAddressAlocator2); EXPECT_EQ(physicalAddressAlocator1, physicalAddressAlocator2); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenMultipleInstancesAreCreatedThenTheyUseTheSameAddressMapper) { auto aubCsr1 = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); auto addressMapper1 = pDevice->executionEnvironment->rootDeviceEnvironments[0]->aubCenter->getAddressMapper(); EXPECT_NE(nullptr, addressMapper1); auto aubCsr2 = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); auto addressMapper2 = pDevice->executionEnvironment->rootDeviceEnvironments[0]->aubCenter->getAddressMapper(); EXPECT_NE(nullptr, addressMapper2); EXPECT_EQ(addressMapper1, addressMapper2); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenMultipleInstancesAreCreatedThenTheyUseTheSameSubCaptureCommon) { auto aubCsr1 = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); auto subCaptureCommon1 = pDevice->executionEnvironment->rootDeviceEnvironments[0]->aubCenter->getSubCaptureCommon(); EXPECT_NE(nullptr, subCaptureCommon1); auto aubCsr2 = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); auto subCaptureCommon2 = pDevice->executionEnvironment->rootDeviceEnvironments[0]->aubCenter->getSubCaptureCommon(); EXPECT_NE(nullptr, subCaptureCommon2); EXPECT_EQ(subCaptureCommon1, subCaptureCommon2); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWithAubManagerWhenItIsCreatedThenFileIsCreated) { std::string fileName = "file_name.aub"; MockAubManager *mockManager = new MockAubManager(); MockAubCenter *mockAubCenter = new MockAubCenter(defaultHwInfo.get(), false, fileName, CommandStreamReceiverType::CSR_AUB); mockAubCenter->aubManager = std::unique_ptr(mockManager); pDevice->executionEnvironment->rootDeviceEnvironments[0]->aubCenter = std::unique_ptr(mockAubCenter); std::unique_ptr> aubCsr(static_cast *>(AUBCommandStreamReceiver::create(fileName, true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()))); ASSERT_NE(nullptr, aubCsr); EXPECT_TRUE(aubCsr->isFileOpen()); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCsrWhenOsContextIsSetThenCreateHardwareContext) { uint32_t deviceIndex = 3; MockOsContext osContext(0, 8, aub_stream::ENGINE_BCS, PreemptionMode::Disabled, false, false, false); std::string fileName = "file_name.aub"; MockAubManager *mockManager = new MockAubManager(); MockAubCenter *mockAubCenter = new MockAubCenter(defaultHwInfo.get(), false, fileName, CommandStreamReceiverType::CSR_AUB); mockAubCenter->aubManager = std::unique_ptr(mockManager); pDevice->executionEnvironment->rootDeviceEnvironments[0]->aubCenter = std::unique_ptr(mockAubCenter); std::unique_ptr> aubCsr(static_cast *>(AUBCommandStreamReceiver::create(fileName, true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()))); EXPECT_EQ(nullptr, aubCsr->hardwareContextController.get()); aubCsr->setupContext(osContext); EXPECT_NE(nullptr, aubCsr->hardwareContextController.get()); auto mockHardwareContext = static_cast(aubCsr->hardwareContextController->hardwareContexts[0].get()); EXPECT_EQ(deviceIndex, mockHardwareContext->deviceIndex); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCsrWhenLowPriorityOsContextIsSetThenDontCreateHardwareContext) { MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, true, false, false); std::string fileName = "file_name.aub"; MockAubManager *mockManager = new MockAubManager(); MockAubCenter *mockAubCenter = new MockAubCenter(defaultHwInfo.get(), false, fileName, CommandStreamReceiverType::CSR_AUB); mockAubCenter->aubManager = std::unique_ptr(mockManager); pDevice->executionEnvironment->rootDeviceEnvironments[0]->aubCenter = std::unique_ptr(mockAubCenter); std::unique_ptr> aubCsr(static_cast *>(AUBCommandStreamReceiver::create(fileName, true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()))); EXPECT_EQ(nullptr, aubCsr->hardwareContextController.get()); aubCsr->setupContext(osContext); EXPECT_EQ(nullptr, aubCsr->hardwareContextController.get()); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInSubCaptureModeWhenItIsCreatedThenFileIsNotCreated) { DebugManagerStateRestore stateRestore; DebugManager.flags.AUBDumpSubCaptureMode.set(static_cast(AubSubCaptureManager::SubCaptureMode::Filter)); std::string fileName = "file_name.aub"; MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.initializeMemoryManager(); std::unique_ptr> aubCsr(static_cast *>(AUBCommandStreamReceiver::create(fileName, true, executionEnvironment, 0))); EXPECT_NE(nullptr, aubCsr); EXPECT_FALSE(aubCsr->isFileOpen()); } HWTEST_F(AubCommandStreamReceiverTests, givenGraphicsAllocationWhenMakeResidentCalledMultipleTimesAffectsResidencyOnce) { std::unique_ptr memoryManager(nullptr); std::unique_ptr> aubCsr(new AUBCommandStreamReceiverHw("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); memoryManager.reset(new OsAgnosticMemoryManager(*pDevice->executionEnvironment)); aubCsr->setupContext(*pDevice->getDefaultEngine().osContext); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); auto osContextId = aubCsr->getOsContext().getContextId(); // First makeResident marks the allocation resident aubCsr->makeResident(*gfxAllocation); EXPECT_TRUE(gfxAllocation->isResident(osContextId)); EXPECT_EQ(aubCsr->peekTaskCount() + 1, gfxAllocation->getTaskCount(osContextId)); EXPECT_EQ(aubCsr->peekTaskCount() + 1, gfxAllocation->getResidencyTaskCount(osContextId)); EXPECT_EQ(1u, aubCsr->getResidencyAllocations().size()); // Second makeResident should have no impact aubCsr->makeResident(*gfxAllocation); EXPECT_TRUE(gfxAllocation->isResident(osContextId)); EXPECT_EQ(aubCsr->peekTaskCount() + 1, gfxAllocation->getTaskCount(osContextId)); EXPECT_EQ(aubCsr->peekTaskCount() + 1, gfxAllocation->getResidencyTaskCount(osContextId)); EXPECT_EQ(1u, aubCsr->getResidencyAllocations().size()); // First makeNonResident marks the allocation as nonresident aubCsr->makeNonResident(*gfxAllocation); EXPECT_FALSE(gfxAllocation->isResident(osContextId)); EXPECT_EQ(1u, aubCsr->getEvictionAllocations().size()); // Second makeNonResident should have no impact aubCsr->makeNonResident(*gfxAllocation); EXPECT_FALSE(gfxAllocation->isResident(osContextId)); EXPECT_EQ(1u, aubCsr->getEvictionAllocations().size()); memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenMultipleInstancesInitializeTheirEnginesThenUniqueGlobalGttAdressesAreGenerated) { pDevice->executionEnvironment->rootDeviceEnvironments[0]->aubCenter.reset(new AubCenter()); auto &hwInfo = pDevice->getHardwareInfo(); auto engineInstance = HwHelper::get(hwInfo.platform.eRenderCoreFamily).getGpgpuEngineInstances(hwInfo)[0]; MockOsContext osContext(0, 1, engineInstance, PreemptionMode::Disabled, false, false, false); auto aubCsr1 = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); auto aubCsr2 = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); aubCsr1->setupContext(osContext); aubCsr1->initializeEngine(); EXPECT_NE(0u, aubCsr1->engineInfo.ggttLRCA); EXPECT_NE(0u, aubCsr1->engineInfo.ggttHWSP); EXPECT_NE(0u, aubCsr1->engineInfo.ggttRingBuffer); aubCsr2->setupContext(osContext); aubCsr2->initializeEngine(); EXPECT_NE(aubCsr1->engineInfo.ggttLRCA, aubCsr2->engineInfo.ggttLRCA); EXPECT_NE(aubCsr1->engineInfo.ggttHWSP, aubCsr2->engineInfo.ggttHWSP); EXPECT_NE(aubCsr1->engineInfo.ggttRingBuffer, aubCsr2->engineInfo.ggttRingBuffer); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenFlushIsCalledThenItShouldInitializeEngineInfo) { auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); aubCsr->hardwareContextController.reset(nullptr); LinearStream cs(aubExecutionEnvironment->commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency = {}; aubCsr->flush(batchBuffer, allocationsForResidency); EXPECT_NE(nullptr, aubCsr->engineInfo.pLRCA); EXPECT_NE(nullptr, aubCsr->engineInfo.pGlobalHWStatusPage); EXPECT_NE(nullptr, aubCsr->engineInfo.pRingBuffer); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInSubCaptureModeWhenProcessResidencyIsCalledButSubCaptureIsDisabledThenItShouldntWriteMemory) { AubSubCaptureCommon aubSubCaptureCommon; auto aubSubCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); aubSubCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; aubSubCaptureManagerMock->disableSubCapture(); aubCsr->subCaptureManager = std::unique_ptr(aubSubCaptureManagerMock); ASSERT_FALSE(aubCsr->subCaptureManager->isSubCaptureEnabled()); MockGraphicsAllocation allocation(reinterpret_cast(0x1000), 0x1000); ResidencyContainer allocationsForResidency = {&allocation}; aubCsr->processResidency(allocationsForResidency, 0u); EXPECT_FALSE(aubCsr->writeMemoryCalled); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInSubCaptureModeWhenProcessResidencyIsCalledButAllocationSizeIsZeroThenItShouldntWriteMemory) { DebugManagerStateRestore stateRestore; AubSubCaptureCommon aubSubCaptureCommon; auto aubSubCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); const DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); aubSubCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; aubSubCaptureManagerMock->setSubCaptureToggleActive(true); aubSubCaptureManagerMock->checkAndActivateSubCapture(multiDispatchInfo); aubCsr->subCaptureManager = std::unique_ptr(aubSubCaptureManagerMock); ASSERT_TRUE(aubCsr->subCaptureManager->isSubCaptureEnabled()); MockGraphicsAllocation allocation(reinterpret_cast(0x1000), 0); ResidencyContainer allocationsForResidency = {&allocation}; aubCsr->processResidency(allocationsForResidency, 0u); EXPECT_FALSE(aubCsr->writeMemoryCalled); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInSubCaptureModeWhenFlushIsCalledButSubCaptureIsDisabledThenItShouldntInitializeEngineInfo) { AubSubCaptureCommon aubSubCaptureCommon; auto aubSubCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); LinearStream cs(aubExecutionEnvironment->commandBuffer); aubSubCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; aubSubCaptureManagerMock->disableSubCapture(); aubCsr->subCaptureManager = std::unique_ptr(aubSubCaptureManagerMock); ASSERT_FALSE(aubCsr->subCaptureManager->isSubCaptureEnabled()); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency = {}; aubCsr->flush(batchBuffer, allocationsForResidency); EXPECT_EQ(nullptr, aubCsr->engineInfo.pLRCA); EXPECT_EQ(nullptr, aubCsr->engineInfo.pGlobalHWStatusPage); EXPECT_EQ(nullptr, aubCsr->engineInfo.pRingBuffer); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenFlushIsCalledThenItShouldLeaveProperRingTailAlignment) { auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); auto allocationsForResidency = aubCsr->getResidencyAllocations(); LinearStream cs(aubExecutionEnvironment->commandBuffer); auto ringTailAlignment = sizeof(uint64_t); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; // First flush typically includes a preamble and chain to command buffer aubCsr->overrideDispatchPolicy(DispatchMode::ImmediateDispatch); aubCsr->flush(batchBuffer, allocationsForResidency); EXPECT_EQ(0ull, aubCsr->engineInfo.tailRingBuffer % ringTailAlignment); // Second flush should just submit command buffer cs.getSpace(sizeof(uint64_t)); aubCsr->flush(batchBuffer, allocationsForResidency); EXPECT_EQ(0ull, aubCsr->engineInfo.tailRingBuffer % ringTailAlignment); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInNonStandaloneModeWhenFlushIsCalledThenItShouldNotUpdateHwTagWithLatestSentTaskCount) { auto aubExecutionEnvironment = getEnvironment>(true, true, false); auto aubCsr = aubExecutionEnvironment->template getCsr>(); LinearStream cs(aubExecutionEnvironment->commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency = {}; EXPECT_EQ(initialHardwareTag, *aubCsr->getTagAddress()); aubCsr->setLatestSentTaskCount(aubCsr->peekTaskCount() + 1); EXPECT_NE(aubCsr->peekLatestSentTaskCount(), *aubCsr->getTagAddress()); aubCsr->flush(batchBuffer, allocationsForResidency); EXPECT_NE(aubCsr->peekLatestSentTaskCount(), *aubCsr->getTagAddress()); EXPECT_EQ(initialHardwareTag, *aubCsr->getTagAddress()); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInStandaloneModeWhenFlushIsCalledThenItShouldUpdateHwTagWithLatestSentTaskCount) { auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); LinearStream cs(aubExecutionEnvironment->commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency = {}; EXPECT_EQ(initialHardwareTag, *aubCsr->getTagAddress()); aubCsr->setLatestSentTaskCount(aubCsr->peekTaskCount() + 1); EXPECT_NE(aubCsr->peekLatestSentTaskCount(), *aubCsr->getTagAddress()); aubCsr->flush(batchBuffer, allocationsForResidency); EXPECT_EQ(aubCsr->peekLatestSentTaskCount(), *aubCsr->getTagAddress()); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInStandaloneModeWhenFlushIsCalledThenItShouldUpdateAllocationsForResidencyWithCommandBufferAllocation) { auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); LinearStream cs(aubExecutionEnvironment->commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency = {}; aubCsr->flush(batchBuffer, allocationsForResidency); EXPECT_EQ(1u, allocationsForResidency.size()); EXPECT_EQ(cs.getGraphicsAllocation(), allocationsForResidency[0]); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInNonStandaloneModeWhenFlushIsCalledThenItShouldNotUpdateAllocationsForResidencyWithCommandBufferAllocation) { auto aubExecutionEnvironment = getEnvironment>(true, true, false); auto aubCsr = aubExecutionEnvironment->template getCsr>(); LinearStream cs(aubExecutionEnvironment->commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency = {}; aubCsr->flush(batchBuffer, allocationsForResidency); EXPECT_EQ(0u, allocationsForResidency.size()); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInStandaloneAndSubCaptureModeWhenFlushIsCalledButSubCaptureIsDisabledThenItShouldUpdateHwTagWithLatestSentTaskCount) { DebugManagerStateRestore stateRestore; AubSubCaptureCommon aubSubCaptureCommon; auto aubSubCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); LinearStream cs(aubExecutionEnvironment->commandBuffer); aubSubCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; aubSubCaptureManagerMock->disableSubCapture(); aubCsr->subCaptureManager = std::unique_ptr(aubSubCaptureManagerMock); ASSERT_FALSE(aubCsr->subCaptureManager->isSubCaptureEnabled()); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency = {}; aubCsr->setLatestSentTaskCount(aubCsr->peekTaskCount() + 1); EXPECT_NE(aubCsr->peekLatestSentTaskCount(), *aubCsr->getTagAddress()); aubCsr->flush(batchBuffer, allocationsForResidency); EXPECT_EQ(aubCsr->peekLatestSentTaskCount(), *aubCsr->getTagAddress()); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInNonStandaloneAndSubCaptureModeWhenFlushIsCalledButSubCaptureIsDisabledThenItShouldNotUpdateHwTagWithLatestSentTaskCount) { DebugManagerStateRestore stateRestore; AubSubCaptureCommon aubSubCaptureCommon; auto aubSubCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); auto aubExecutionEnvironment = getEnvironment>(true, true, false); auto aubCsr = aubExecutionEnvironment->template getCsr>(); LinearStream cs(aubExecutionEnvironment->commandBuffer); aubSubCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; aubSubCaptureManagerMock->disableSubCapture(); aubCsr->subCaptureManager = std::unique_ptr(aubSubCaptureManagerMock); ASSERT_FALSE(aubCsr->subCaptureManager->isSubCaptureEnabled()); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency = {}; aubCsr->setLatestSentTaskCount(aubCsr->peekTaskCount() + 1); EXPECT_NE(aubCsr->peekLatestSentTaskCount(), *aubCsr->getTagAddress()); aubCsr->flush(batchBuffer, allocationsForResidency); EXPECT_NE(aubCsr->peekLatestSentTaskCount(), *aubCsr->getTagAddress()); EXPECT_EQ(initialHardwareTag, *aubCsr->getTagAddress()); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInSubCaptureModeWhenFlushIsCalledAndSubCaptureIsEnabledThenItShouldDeactivateSubCapture) { DebugManagerStateRestore stateRestore; AubSubCaptureCommon aubSubCaptureCommon; auto aubSubCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); auto aubExecutionEnvironment = getEnvironment>(true, true, false); auto aubCsr = aubExecutionEnvironment->template getCsr>(); LinearStream cs(aubExecutionEnvironment->commandBuffer); const DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); aubSubCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; aubSubCaptureManagerMock->setSubCaptureToggleActive(true); aubSubCaptureManagerMock->checkAndActivateSubCapture(multiDispatchInfo); aubCsr->subCaptureManager = std::unique_ptr(aubSubCaptureManagerMock); ASSERT_TRUE(aubCsr->subCaptureManager->isSubCaptureEnabled()); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency = {}; aubCsr->flush(batchBuffer, allocationsForResidency); EXPECT_FALSE(aubCsr->subCaptureManager->isSubCaptureEnabled()); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInSubCaptureModeWhenFlushIsCalledAndSubCaptureIsEnabledThenItShouldCallPollForCompletion) { DebugManagerStateRestore stateRestore; AubSubCaptureCommon aubSubCaptureCommon; auto aubSubCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); auto aubExecutionEnvironment = getEnvironment>(true, true, false); auto aubCsr = aubExecutionEnvironment->template getCsr>(); LinearStream cs(aubExecutionEnvironment->commandBuffer); const DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); aubSubCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; aubSubCaptureManagerMock->setSubCaptureToggleActive(true); aubSubCaptureManagerMock->checkAndActivateSubCapture(multiDispatchInfo); aubCsr->subCaptureManager = std::unique_ptr(aubSubCaptureManagerMock); ASSERT_TRUE(aubCsr->subCaptureManager->isSubCaptureEnabled()); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency = {}; aubCsr->flush(batchBuffer, allocationsForResidency); EXPECT_TRUE(aubCsr->pollForCompletionCalled); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInStandaloneModeWhenFlushIsCalledThenItShouldCallMakeResidentOnCommandBufferAllocation) { auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); auto allocationsForResidency = aubCsr->getResidencyAllocations(); auto commandBuffer = aubExecutionEnvironment->commandBuffer; LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; EXPECT_FALSE(commandBuffer->isResident(aubCsr->getOsContext().getContextId())); aubCsr->overrideDispatchPolicy(DispatchMode::ImmediateDispatch); aubCsr->flush(batchBuffer, allocationsForResidency); EXPECT_TRUE(commandBuffer->isResident(aubCsr->getOsContext().getContextId())); EXPECT_EQ(aubCsr->peekTaskCount() + 1, commandBuffer->getResidencyTaskCount(aubCsr->getOsContext().getContextId())); aubCsr->makeSurfacePackNonResident(allocationsForResidency); EXPECT_FALSE(commandBuffer->isResident(aubCsr->getOsContext().getContextId())); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInNonStandaloneModeWhenFlushIsCalledThenItShouldCallMakeResidentOnCommandBufferAllocation) { auto aubExecutionEnvironment = getEnvironment>(false, true, false); auto aubCsr = aubExecutionEnvironment->template getCsr>(); auto allocationsForResidency = aubCsr->getResidencyAllocations(); LinearStream cs(aubExecutionEnvironment->commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; EXPECT_FALSE(aubExecutionEnvironment->commandBuffer->isResident(aubCsr->getOsContext().getContextId())); aubCsr->flush(batchBuffer, allocationsForResidency); EXPECT_TRUE(aubExecutionEnvironment->commandBuffer->isResident(aubCsr->getOsContext().getContextId())); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInStandaloneModeWhenFlushIsCalledThenItShouldCallMakeResidentOnResidencyAllocations) { auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); auto memoryManager = aubExecutionEnvironment->executionEnvironment->memoryManager.get(); auto commandBuffer = aubExecutionEnvironment->commandBuffer; LinearStream cs(commandBuffer); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, gfxAllocation); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency = {gfxAllocation}; EXPECT_FALSE(gfxAllocation->isResident(aubCsr->getOsContext().getContextId())); EXPECT_FALSE(commandBuffer->isResident(aubCsr->getOsContext().getContextId())); aubCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); aubCsr->flush(batchBuffer, allocationsForResidency); EXPECT_TRUE(gfxAllocation->isResident(aubCsr->getOsContext().getContextId())); EXPECT_EQ(aubCsr->peekTaskCount() + 1, gfxAllocation->getResidencyTaskCount(aubCsr->getOsContext().getContextId())); EXPECT_TRUE(commandBuffer->isResident(aubCsr->getOsContext().getContextId())); EXPECT_EQ(aubCsr->peekTaskCount() + 1, commandBuffer->getResidencyTaskCount(aubCsr->getOsContext().getContextId())); aubCsr->makeSurfacePackNonResident(allocationsForResidency); EXPECT_FALSE(gfxAllocation->isResident(aubCsr->getOsContext().getContextId())); EXPECT_FALSE(commandBuffer->isResident(aubCsr->getOsContext().getContextId())); memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInNonStandaloneModeWhenFlushIsCalledThenItShouldCallMakeResidentOnResidencyAllocations) { auto aubExecutionEnvironment = getEnvironment>(true, true, false); auto aubCsr = aubExecutionEnvironment->template getCsr>(); auto memoryManager = aubExecutionEnvironment->executionEnvironment->memoryManager.get(); auto commandBuffer = aubExecutionEnvironment->commandBuffer; LinearStream cs(commandBuffer); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency = {gfxAllocation}; EXPECT_FALSE(gfxAllocation->isResident(aubCsr->getOsContext().getContextId())); EXPECT_FALSE(commandBuffer->isResident(aubCsr->getOsContext().getContextId())); aubCsr->flush(batchBuffer, allocationsForResidency); EXPECT_TRUE(gfxAllocation->isResident(aubCsr->getOsContext().getContextId())); EXPECT_EQ(aubCsr->peekTaskCount() + 1, gfxAllocation->getResidencyTaskCount(aubCsr->getOsContext().getContextId())); EXPECT_TRUE(commandBuffer->isResident(aubCsr->getOsContext().getContextId())); memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInStandaloneAndSubCaptureModeWhenFlushIsCalledAndSubCaptureIsEnabledThenItShouldCallMakeResidentOnCommandBufferAndResidencyAllocations) { DebugManagerStateRestore stateRestore; AubSubCaptureCommon aubSubCaptureCommon; auto aubSubCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); auto memoryManager = aubExecutionEnvironment->executionEnvironment->memoryManager.get(); auto commandBuffer = aubExecutionEnvironment->commandBuffer; LinearStream cs(commandBuffer); const DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); aubSubCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; aubSubCaptureManagerMock->setSubCaptureToggleActive(true); aubSubCaptureManagerMock->checkAndActivateSubCapture(multiDispatchInfo); aubCsr->subCaptureManager = std::unique_ptr(aubSubCaptureManagerMock); ASSERT_TRUE(aubCsr->subCaptureManager->isSubCaptureEnabled()); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, gfxAllocation); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency = {gfxAllocation}; EXPECT_FALSE(gfxAllocation->isResident(aubCsr->getOsContext().getContextId())); EXPECT_FALSE(commandBuffer->isResident(aubCsr->getOsContext().getContextId())); aubCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); aubCsr->flush(batchBuffer, allocationsForResidency); EXPECT_TRUE(gfxAllocation->isResident(aubCsr->getOsContext().getContextId())); EXPECT_EQ(aubCsr->peekTaskCount() + 1, gfxAllocation->getResidencyTaskCount(aubCsr->getOsContext().getContextId())); EXPECT_TRUE(commandBuffer->isResident(aubCsr->getOsContext().getContextId())); EXPECT_EQ(aubCsr->peekTaskCount() + 1, commandBuffer->getResidencyTaskCount(aubCsr->getOsContext().getContextId())); aubCsr->makeSurfacePackNonResident(allocationsForResidency); EXPECT_FALSE(gfxAllocation->isResident(aubCsr->getOsContext().getContextId())); EXPECT_FALSE(commandBuffer->isResident(aubCsr->getOsContext().getContextId())); memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenGraphicsAllocationIsCreatedThenItDoesntHaveTypeNonAubWritable) { auto aubExecutionEnvironment = getEnvironment>(false, false, true); auto memoryManager = aubExecutionEnvironment->executionEnvironment->memoryManager.get(); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); EXPECT_TRUE(gfxAllocation->isAubWritable(GraphicsAllocation::defaultBank)); memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenProcessResidencyIsCalledOnDefaultAllocationThenAllocationTypeShouldNotBeMadeNonAubWritable) { auto aubExecutionEnvironment = getEnvironment>(false, false, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); auto memoryManager = aubExecutionEnvironment->executionEnvironment->memoryManager.get(); auto gfxDefaultAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); ResidencyContainer allocationsForResidency = {gfxDefaultAllocation}; aubCsr->processResidency(allocationsForResidency, 0u); EXPECT_TRUE(aubCsr->isAubWritable(*gfxDefaultAllocation)); memoryManager->freeGraphicsMemory(gfxDefaultAllocation); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenWriteMemoryIsCalledOnBufferAndImageTypeAllocationsThenAllocationsHaveAubWritableSetToFalse) { std::unique_ptr memoryManager(nullptr); std::unique_ptr> aubCsr(new AUBCommandStreamReceiverHw("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); aubCsr->setupContext(*pDevice->getDefaultEngine().osContext); memoryManager.reset(new OsAgnosticMemoryManager(*pDevice->executionEnvironment)); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); const GraphicsAllocation::AllocationType onlyOneTimeAubWritableTypes[] = { GraphicsAllocation::AllocationType::PIPE, GraphicsAllocation::AllocationType::CONSTANT_SURFACE, GraphicsAllocation::AllocationType::GLOBAL_SURFACE, GraphicsAllocation::AllocationType::KERNEL_ISA, GraphicsAllocation::AllocationType::PRIVATE_SURFACE, GraphicsAllocation::AllocationType::SCRATCH_SURFACE, GraphicsAllocation::AllocationType::BUFFER, GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, GraphicsAllocation::AllocationType::BUFFER_COMPRESSED, GraphicsAllocation::AllocationType::IMAGE, GraphicsAllocation::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, GraphicsAllocation::AllocationType::MAP_ALLOCATION, GraphicsAllocation::AllocationType::SVM_GPU, GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR}; for (auto allocationType : onlyOneTimeAubWritableTypes) { gfxAllocation->setAubWritable(true, GraphicsAllocation::defaultBank); gfxAllocation->setAllocationType(allocationType); aubCsr->writeMemory(*gfxAllocation); EXPECT_FALSE(aubCsr->isAubWritable(*gfxAllocation)); } memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenProcessResidencyIsCalledOnBufferAndImageAllocationsThenAllocationsTypesShouldBeMadeNonAubWritable) { std::unique_ptr memoryManager(nullptr); std::unique_ptr> aubCsr(new AUBCommandStreamReceiverHw("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); memoryManager.reset(new OsAgnosticMemoryManager(*pDevice->executionEnvironment)); aubCsr->setupContext(*pDevice->getDefaultEngine().osContext); auto gfxBufferAllocation = memoryManager->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}); auto gfxImageAllocation = MockGmm::allocateImage2d(*memoryManager); ResidencyContainer allocationsForResidency = {gfxBufferAllocation, gfxImageAllocation}; aubCsr->processResidency(allocationsForResidency, 0u); EXPECT_FALSE(aubCsr->isAubWritable(*gfxBufferAllocation)); EXPECT_FALSE(aubCsr->isAubWritable(*gfxImageAllocation)); memoryManager->freeGraphicsMemory(gfxBufferAllocation); memoryManager->freeGraphicsMemory(gfxImageAllocation); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInSubCaptureModWhenProcessResidencyIsCalledWithDumpAubNonWritableFlagThenAllocationsTypesShouldBeMadeAubWritable) { DebugManagerStateRestore stateRestore; std::unique_ptr memoryManager(nullptr); std::unique_ptr> aubCsr(new MockAubCsrToTestDumpAubNonWritable("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); memoryManager.reset(new OsAgnosticMemoryManager(*pDevice->executionEnvironment)); aubCsr->setupContext(*pDevice->getDefaultEngine().osContext); auto gfxBufferAllocation = memoryManager->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}); aubCsr->setAubWritable(false, *gfxBufferAllocation); auto gfxImageAllocation = MockGmm::allocateImage2d(*memoryManager); aubCsr->setAubWritable(false, *gfxImageAllocation); aubCsr->dumpAubNonWritable = true; ResidencyContainer allocationsForResidency = {gfxBufferAllocation, gfxImageAllocation}; aubCsr->processResidency(allocationsForResidency, 0u); EXPECT_TRUE(aubCsr->isAubWritable(*gfxBufferAllocation)); EXPECT_TRUE(aubCsr->isAubWritable(*gfxImageAllocation)); memoryManager->freeGraphicsMemory(gfxBufferAllocation); memoryManager->freeGraphicsMemory(gfxImageAllocation); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenProcessResidencyIsCalledWithoutDumpAubWritableFlagThenAllocationsTypesShouldBeKeptNonAubWritable) { DebugManagerStateRestore stateRestore; std::unique_ptr memoryManager(nullptr); std::unique_ptr> aubCsr(new MockAubCsrToTestDumpAubNonWritable("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); memoryManager.reset(new OsAgnosticMemoryManager(*pDevice->executionEnvironment)); aubCsr->setupContext(*pDevice->getDefaultEngine().osContext); auto gfxBufferAllocation = memoryManager->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}); aubCsr->setAubWritable(false, *gfxBufferAllocation); auto gfxImageAllocation = MockGmm::allocateImage2d(*memoryManager); aubCsr->setAubWritable(false, *gfxImageAllocation); aubCsr->dumpAubNonWritable = false; ResidencyContainer allocationsForResidency = {gfxBufferAllocation, gfxImageAllocation}; aubCsr->processResidency(allocationsForResidency, 0u); EXPECT_FALSE(aubCsr->isAubWritable(*gfxBufferAllocation)); EXPECT_FALSE(aubCsr->isAubWritable(*gfxImageAllocation)); memoryManager->freeGraphicsMemory(gfxBufferAllocation); memoryManager->freeGraphicsMemory(gfxImageAllocation); } HWTEST_F(AubCommandStreamReceiverTests, givenOsContextWithMultipleDevicesSupportedWhenSetupIsCalledThenCreateMultipleHardwareContexts) { MockOsContext osContext(1, 0b11, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); auto aubCsr = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); aubCsr->setupContext(osContext); EXPECT_EQ(2u, aubCsr->hardwareContextController->hardwareContexts.size()); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenGraphicsAllocationTypeIsntNonAubWritableThenWriteMemoryIsAllowed) { std::unique_ptr memoryManager(nullptr); std::unique_ptr> aubCsr(new AUBCommandStreamReceiverHw("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); aubCsr->setupContext(*pDevice->getDefaultEngine().osContext); memoryManager.reset(new OsAgnosticMemoryManager(*pDevice->executionEnvironment)); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); EXPECT_TRUE(aubCsr->writeMemory(*gfxAllocation)); memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenGraphicsAllocationTypeIsNonAubWritableThenWriteMemoryIsNotAllowed) { std::unique_ptr memoryManager(nullptr); std::unique_ptr> aubCsr(new AUBCommandStreamReceiverHw("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); memoryManager.reset(new OsAgnosticMemoryManager(*pDevice->executionEnvironment)); aubCsr->setupContext(*pDevice->getDefaultEngine().osContext); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); aubCsr->setAubWritable(false, *gfxAllocation); EXPECT_FALSE(aubCsr->writeMemory(*gfxAllocation)); memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenGraphicsAllocationSizeIsZeroThenWriteMemoryIsNotAllowed) { std::unique_ptr> aubCsr(new AUBCommandStreamReceiverHw("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); MockGraphicsAllocation gfxAllocation((void *)0x1234, 0); aubCsr->setupContext(*pDevice->getDefaultEngine().osContext); EXPECT_FALSE(aubCsr->writeMemory(gfxAllocation)); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenAllocationDataIsPassedInAllocationViewThenWriteMemoryIsAllowed) { auto aubCsr = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); aubCsr->setupContext(*pDevice->getDefaultEngine().osContext); size_t size = 100; auto ptr = std::make_unique(size); auto addr = reinterpret_cast(ptr.get()); AllocationView allocationView(addr, size); EXPECT_TRUE(aubCsr->writeMemory(allocationView)); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenAllocationSizeInAllocationViewIsZeroThenWriteMemoryIsNotAllowed) { auto aubCsr = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); AllocationView allocationView(0x1234, 0); aubCsr->setupContext(*pDevice->getDefaultEngine().osContext); EXPECT_FALSE(aubCsr->writeMemory(allocationView)); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenAUBDumpCaptureFileNameHasBeenSpecifiedThenItShouldBeUsedToOpenTheFileWithAubCapture) { DebugManagerStateRestore stateRestore; DebugManager.flags.AUBDumpCaptureFileName.set("file_name.aub"); std::unique_ptr> aubCsr(static_cast *>(AUBCommandStreamReceiver::create("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()))); EXPECT_NE(nullptr, aubCsr); EXPECT_TRUE(aubCsr->isFileOpen()); EXPECT_STREQ(DebugManager.flags.AUBDumpCaptureFileName.get().c_str(), aubCsr->getFileName().c_str()); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInSubCaptureModeWhenAubSubCaptureIsActivatedThenFileIsOpened) { DebugManagerStateRestore stateRestore; std::unique_ptr> aubCsr(new MockAubCsr("", false, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); AubSubCaptureCommon aubSubCaptureCommon; auto subCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); aubSubCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; subCaptureManagerMock->setSubCaptureIsActive(false); subCaptureManagerMock->setSubCaptureToggleActive(true); aubCsr->subCaptureManager = std::unique_ptr(subCaptureManagerMock); MockKernelWithInternals kernelInternals(*pClDevice); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(kernel); ASSERT_FALSE(aubCsr->isFileOpen()); aubCsr->checkAndActivateAubSubCapture(multiDispatchInfo); EXPECT_TRUE(aubCsr->isFileOpen()); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInSubCaptureModeWhenAubSubCaptureRemainsActivedThenTheSameFileShouldBeKeptOpened) { DebugManagerStateRestore stateRestore; std::unique_ptr> aubCsr(new MockAubCsr("", false, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); AubSubCaptureCommon aubSubCaptureCommon; auto subCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); aubSubCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; subCaptureManagerMock->setSubCaptureIsActive(false); subCaptureManagerMock->setSubCaptureToggleActive(true); aubCsr->subCaptureManager = std::unique_ptr(subCaptureManagerMock); MockKernelWithInternals kernelInternals(*pClDevice); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(kernel); std::string fileName = aubCsr->subCaptureManager->getSubCaptureFileName(multiDispatchInfo); aubCsr->initFile(fileName); ASSERT_TRUE(aubCsr->isFileOpen()); aubCsr->checkAndActivateAubSubCapture(multiDispatchInfo); EXPECT_TRUE(aubCsr->isFileOpen()); EXPECT_STREQ(fileName.c_str(), aubCsr->getFileName().c_str()); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInSubCaptureModeWhenAubSubCaptureIsActivatedWithNewFileNameThenNewFileShouldBeReOpened) { DebugManagerStateRestore stateRestore; std::unique_ptr> aubCsr(new MockAubCsr("", false, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); std::string newFileName = "new_file_name.aub"; AubSubCaptureCommon aubSubCaptureCommon; auto subCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); aubSubCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; subCaptureManagerMock->setSubCaptureIsActive(false); subCaptureManagerMock->setSubCaptureToggleActive(true); subCaptureManagerMock->setToggleFileName(newFileName); aubCsr->subCaptureManager = std::unique_ptr(subCaptureManagerMock); MockKernelWithInternals kernelInternals(*pClDevice); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(kernel); std::string fileName = "file_name.aub"; aubCsr->initFile(fileName); ASSERT_TRUE(aubCsr->isFileOpen()); ASSERT_STREQ(fileName.c_str(), aubCsr->getFileName().c_str()); aubCsr->checkAndActivateAubSubCapture(multiDispatchInfo); EXPECT_TRUE(aubCsr->isFileOpen()); EXPECT_STRNE(fileName.c_str(), aubCsr->getFileName().c_str()); EXPECT_STREQ(newFileName.c_str(), aubCsr->getFileName().c_str()); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInSubCaptureModeWhenAubSubCaptureIsActivatedForNewFileThenOldEngineInfoShouldBeFreed) { DebugManagerStateRestore stateRestore; std::unique_ptr> aubCsr(new MockAubCsr("", false, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); std::string newFileName = "new_file_name.aub"; AubSubCaptureCommon aubSubCaptureCommon; auto subCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); aubSubCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; subCaptureManagerMock->setSubCaptureIsActive(false); subCaptureManagerMock->setSubCaptureToggleActive(true); subCaptureManagerMock->setToggleFileName(newFileName); aubCsr->subCaptureManager = std::unique_ptr(subCaptureManagerMock); MockKernelWithInternals kernelInternals(*pClDevice); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(kernel); std::string fileName = "file_name.aub"; aubCsr->initFile(fileName); ASSERT_STREQ(fileName.c_str(), aubCsr->getFileName().c_str()); aubCsr->checkAndActivateAubSubCapture(multiDispatchInfo); ASSERT_STREQ(newFileName.c_str(), aubCsr->getFileName().c_str()); EXPECT_EQ(nullptr, aubCsr->engineInfo.pLRCA); EXPECT_EQ(nullptr, aubCsr->engineInfo.pGlobalHWStatusPage); EXPECT_EQ(nullptr, aubCsr->engineInfo.pRingBuffer); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInSubCaptureModeWhenAubSubCaptureIsActivatedThenForceDumpingAllocationsAubNonWritable) { DebugManagerStateRestore stateRestore; std::unique_ptr> aubCsr(new MockAubCsrToTestDumpAubNonWritable("", false, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); AubSubCaptureCommon aubSubCaptureCommon; auto subCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); aubSubCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; subCaptureManagerMock->setSubCaptureIsActive(false); subCaptureManagerMock->setSubCaptureToggleActive(true); aubCsr->subCaptureManager = std::unique_ptr(subCaptureManagerMock); MockKernelWithInternals kernelInternals(*pClDevice); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(kernel); aubCsr->checkAndActivateAubSubCapture(multiDispatchInfo); EXPECT_TRUE(aubCsr->dumpAubNonWritable); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInSubCaptureModeWhenAubSubCaptureRemainsActivatedThenDontForceDumpingAllocationsAubNonWritable) { DebugManagerStateRestore stateRestore; std::unique_ptr> aubCsr(new MockAubCsrToTestDumpAubNonWritable("", false, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); AubSubCaptureCommon aubSubCaptureCommon; auto subCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); aubSubCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; subCaptureManagerMock->setSubCaptureIsActive(false); subCaptureManagerMock->setSubCaptureToggleActive(true); aubCsr->subCaptureManager = std::unique_ptr(subCaptureManagerMock); MockKernelWithInternals kernelInternals(*pClDevice); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(kernel); aubCsr->initFile(aubCsr->subCaptureManager->getSubCaptureFileName(multiDispatchInfo)); aubCsr->checkAndActivateAubSubCapture(multiDispatchInfo); EXPECT_FALSE(aubCsr->dumpAubNonWritable); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInSubCaptureModeWhenSubCaptureModeRemainsDeactivatedThenSubCaptureIsDisabled) { DebugManagerStateRestore stateRestore; std::unique_ptr> aubCsr(new MockAubCsr("", false, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); AubSubCaptureCommon aubSubCaptureCommon; auto subCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); aubSubCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; subCaptureManagerMock->setSubCaptureIsActive(false); subCaptureManagerMock->setSubCaptureToggleActive(false); aubCsr->subCaptureManager = std::unique_ptr(subCaptureManagerMock); const DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); aubCsr->checkAndActivateAubSubCapture(multiDispatchInfo); EXPECT_FALSE(aubCsr->subCaptureManager->isSubCaptureEnabled()); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInSubCaptureModeWhenSubCaptureIsToggledOnThenSubCaptureGetsEnabled) { DebugManagerStateRestore stateRestore; std::unique_ptr> aubCsr(new MockAubCsr("", false, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); AubSubCaptureCommon aubSubCaptureCommon; auto subCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); aubSubCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; subCaptureManagerMock->setSubCaptureIsActive(false); subCaptureManagerMock->setSubCaptureToggleActive(true); aubCsr->subCaptureManager = std::unique_ptr(subCaptureManagerMock); MockKernelWithInternals kernelInternals(*pClDevice); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(kernel); aubCsr->checkAndActivateAubSubCapture(multiDispatchInfo); EXPECT_TRUE(aubCsr->subCaptureManager->isSubCaptureEnabled()); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInStandaloneAndSubCaptureModeWhenSubCaptureRemainsDeactivatedThenNeitherProgrammingFlagsAreInitializedNorCsrIsFlushed) { DebugManagerStateRestore stateRestore; std::unique_ptr> aubCsr(new MockAubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); AubSubCaptureCommon aubSubCaptureCommon; auto subCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); aubSubCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; subCaptureManagerMock->setSubCaptureIsActive(false); subCaptureManagerMock->setSubCaptureToggleActive(false); aubCsr->subCaptureManager = std::unique_ptr(subCaptureManagerMock); MockKernelWithInternals kernelInternals(*pClDevice); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(kernel); aubCsr->checkAndActivateAubSubCapture(multiDispatchInfo); EXPECT_FALSE(aubCsr->flushBatchedSubmissionsCalled); EXPECT_FALSE(aubCsr->initProgrammingFlagsCalled); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInStandaloneAndSubCaptureModeWhenSubCaptureRemainsActivatedThenNeitherProgrammingFlagsAreInitializedNorCsrIsFlushed) { DebugManagerStateRestore stateRestore; std::unique_ptr> aubCsr(new MockAubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); AubSubCaptureCommon aubSubCaptureCommon; auto subCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); aubSubCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; subCaptureManagerMock->setSubCaptureIsActive(true); subCaptureManagerMock->setSubCaptureToggleActive(true); aubCsr->subCaptureManager = std::unique_ptr(subCaptureManagerMock); MockKernelWithInternals kernelInternals(*pClDevice); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(kernel); aubCsr->checkAndActivateAubSubCapture(multiDispatchInfo); EXPECT_FALSE(aubCsr->flushBatchedSubmissionsCalled); EXPECT_FALSE(aubCsr->initProgrammingFlagsCalled); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInStandaloneAndSubCaptureModeWhenSubCaptureGetsActivatedThenProgrammingFlagsAreInitialized) { DebugManagerStateRestore stateRestore; std::unique_ptr> aubCsr(new MockAubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); AubSubCaptureCommon aubSubCaptureCommon; auto subCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); aubSubCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; subCaptureManagerMock->setSubCaptureIsActive(false); subCaptureManagerMock->setSubCaptureToggleActive(true); aubCsr->subCaptureManager = std::unique_ptr(subCaptureManagerMock); MockKernelWithInternals kernelInternals(*pClDevice); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(kernel); aubCsr->checkAndActivateAubSubCapture(multiDispatchInfo); EXPECT_FALSE(aubCsr->flushBatchedSubmissionsCalled); EXPECT_TRUE(aubCsr->initProgrammingFlagsCalled); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInStandaloneAndSubCaptureModeWhenSubCaptureGetsDeactivatedThenCsrIsFlushed) { DebugManagerStateRestore stateRestore; std::unique_ptr> aubCsr(new MockAubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); AubSubCaptureCommon aubSubCaptureCommon; auto subCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); aubSubCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; subCaptureManagerMock->setSubCaptureIsActive(true); subCaptureManagerMock->setSubCaptureToggleActive(false); aubCsr->subCaptureManager = std::unique_ptr(subCaptureManagerMock); MockKernelWithInternals kernelInternals(*pClDevice); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(kernel); aubCsr->checkAndActivateAubSubCapture(multiDispatchInfo); EXPECT_TRUE(aubCsr->flushBatchedSubmissionsCalled); EXPECT_FALSE(aubCsr->initProgrammingFlagsCalled); } HWTEST_F(AubCommandStreamReceiverTests, WhenBlitBufferIsCalledThenCounterIsCorrectlyIncremented) { auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); aubCsr->osContext->getEngineType() = aub_stream::ENGINE_BCS; EXPECT_EQ(0u, aubCsr->blitBufferCalled); MockGraphicsAllocation allocation(reinterpret_cast(0x1000), 0); BlitProperties blitProperties = BlitProperties::constructPropertiesForCopyBuffer(&allocation, &allocation, 0, 0, 0); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); aubCsr->blitBuffer(blitPropertiesContainer, true); EXPECT_EQ(1u, aubCsr->blitBufferCalled); } using HardwareContextContainerTests = ::testing::Test; TEST_F(HardwareContextContainerTests, givenOsContextWithMultipleDevicesSupportedThenInitialzeHwContextsWithValidIndexes) { MockAubManager aubManager; MockOsContext osContext(1, 0b11, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); HardwareContextController hwContextControler(aubManager, osContext, 0); EXPECT_EQ(2u, hwContextControler.hardwareContexts.size()); EXPECT_EQ(2u, osContext.getNumSupportedDevices()); auto mockHwContext0 = static_cast(hwContextControler.hardwareContexts[0].get()); auto mockHwContext1 = static_cast(hwContextControler.hardwareContexts[1].get()); EXPECT_EQ(0u, mockHwContext0->deviceIndex); EXPECT_EQ(1u, mockHwContext1->deviceIndex); } TEST_F(HardwareContextContainerTests, givenSingleHwContextWhenSubmitMethodIsCalledOnHwContextControllerThenSubmitIsCalled) { MockAubManager aubManager; MockOsContext osContext(1, 0b1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); HardwareContextController hwContextContainer(aubManager, osContext, 0); EXPECT_EQ(1u, hwContextContainer.hardwareContexts.size()); auto mockHwContext0 = static_cast(hwContextContainer.hardwareContexts[0].get()); EXPECT_FALSE(mockHwContext0->writeAndSubmitCalled); EXPECT_FALSE(mockHwContext0->writeMemoryCalled); hwContextContainer.submit(1, reinterpret_cast(0x123), 2, 0, 1, false); EXPECT_TRUE(mockHwContext0->submitCalled); EXPECT_FALSE(mockHwContext0->writeAndSubmitCalled); EXPECT_FALSE(mockHwContext0->writeMemoryCalled); } TEST_F(HardwareContextContainerTests, givenSingleHwContextWhenWriteMemoryIsCalledThenWholeMemoryBanksArePassed) { MockAubManager aubManager; MockOsContext osContext(1, 0b1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); HardwareContextController hwContextContainer(aubManager, osContext, 0); EXPECT_EQ(1u, hwContextContainer.hardwareContexts.size()); auto mockHwContext0 = static_cast(hwContextContainer.hardwareContexts[0].get()); hwContextContainer.writeMemory(1, reinterpret_cast(0x123), 2, 3u, 4, 5); EXPECT_TRUE(mockHwContext0->writeMemoryCalled); EXPECT_EQ(3u, mockHwContext0->memoryBanksPassed); } TEST_F(HardwareContextContainerTests, givenMultipleHwContextWhenSingleMethodIsCalledThenUseAllContexts) { MockAubManager aubManager; MockOsContext osContext(1, 0b11, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); HardwareContextController hwContextContainer(aubManager, osContext, 0); EXPECT_EQ(2u, hwContextContainer.hardwareContexts.size()); auto mockHwContext0 = static_cast(hwContextContainer.hardwareContexts[0].get()); auto mockHwContext1 = static_cast(hwContextContainer.hardwareContexts[1].get()); EXPECT_FALSE(mockHwContext0->initializeCalled); EXPECT_FALSE(mockHwContext1->initializeCalled); EXPECT_FALSE(mockHwContext0->pollForCompletionCalled); EXPECT_FALSE(mockHwContext1->pollForCompletionCalled); EXPECT_FALSE(mockHwContext0->expectMemoryCalled); EXPECT_FALSE(mockHwContext1->expectMemoryCalled); EXPECT_FALSE(mockHwContext0->submitCalled); EXPECT_FALSE(mockHwContext1->submitCalled); EXPECT_FALSE(mockHwContext0->writeMemoryCalled); EXPECT_FALSE(mockHwContext1->writeMemoryCalled); hwContextContainer.initialize(); hwContextContainer.pollForCompletion(); hwContextContainer.expectMemory(1, reinterpret_cast(0x123), 2, 0); hwContextContainer.submit(1, reinterpret_cast(0x123), 2, 0, 1, false); hwContextContainer.writeMemory(1, reinterpret_cast(0x123), 2, 3u, 4, 5); EXPECT_TRUE(mockHwContext0->initializeCalled); EXPECT_TRUE(mockHwContext1->initializeCalled); EXPECT_TRUE(mockHwContext0->pollForCompletionCalled); EXPECT_TRUE(mockHwContext1->pollForCompletionCalled); EXPECT_TRUE(mockHwContext0->expectMemoryCalled); EXPECT_TRUE(mockHwContext1->expectMemoryCalled); EXPECT_TRUE(mockHwContext0->submitCalled); EXPECT_TRUE(mockHwContext1->submitCalled); EXPECT_TRUE(mockHwContext0->writeMemoryCalled); EXPECT_TRUE(mockHwContext1->writeMemoryCalled); EXPECT_EQ(1u, mockHwContext0->memoryBanksPassed); EXPECT_EQ(2u, mockHwContext1->memoryBanksPassed); } TEST_F(HardwareContextContainerTests, givenMultipleHwContextWhenSingleMethodIsCalledThenUseFirstContext) { MockAubManager aubManager; MockOsContext osContext(1, 0b11, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); HardwareContextController hwContextContainer(aubManager, osContext, 0); EXPECT_EQ(2u, hwContextContainer.hardwareContexts.size()); auto mockHwContext0 = static_cast(hwContextContainer.hardwareContexts[0].get()); auto mockHwContext1 = static_cast(hwContextContainer.hardwareContexts[1].get()); EXPECT_FALSE(mockHwContext0->dumpBufferBINCalled); EXPECT_FALSE(mockHwContext1->dumpBufferBINCalled); EXPECT_FALSE(mockHwContext0->dumpSurfaceCalled); EXPECT_FALSE(mockHwContext1->dumpSurfaceCalled); EXPECT_FALSE(mockHwContext0->readMemoryCalled); EXPECT_FALSE(mockHwContext1->readMemoryCalled); hwContextContainer.dumpBufferBIN(1, 2); hwContextContainer.dumpSurface({1, 2, 3, 4, 5, 6, 7, false, 0}); hwContextContainer.readMemory(1, reinterpret_cast(0x123), 1, 2, 0); EXPECT_TRUE(mockHwContext0->dumpBufferBINCalled); EXPECT_FALSE(mockHwContext1->dumpBufferBINCalled); EXPECT_TRUE(mockHwContext0->dumpSurfaceCalled); EXPECT_FALSE(mockHwContext1->dumpSurfaceCalled); EXPECT_TRUE(mockHwContext0->readMemoryCalled); EXPECT_FALSE(mockHwContext1->readMemoryCalled); } aub_command_stream_receiver_2_tests.cpp000066400000000000000000001654261363734646600344350ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/aub_mem_dump/aub_alloc_dump.h" #include "opencl/source/aub_mem_dump/page_table_entry_bits.h" #include "opencl/source/helpers/hardware_context_controller.h" #include "opencl/source/mem_obj/mem_obj_helper.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/fixtures/aub_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/mock_aub_center_fixture.h" #include "opencl/test/unit_test/mocks/mock_aub_center.h" #include "opencl/test/unit_test/mocks/mock_aub_csr.h" #include "opencl/test/unit_test/mocks/mock_aub_file_stream.h" #include "opencl/test/unit_test/mocks/mock_aub_manager.h" #include "opencl/test/unit_test/mocks/mock_aub_subcapture_manager.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_gmm.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_os_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "test.h" #include "third_party/aub_stream/headers/aubstream.h" using namespace NEO; using AubCommandStreamReceiverTests = Test; HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenForcedBatchBufferFlatteningInImmediateDispatchModeThenNewCombinedBatchBufferIsCreated) { std::unique_ptr> aubCsr(new AUBCommandStreamReceiverHw("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); std::unique_ptr memoryManager(new OsAgnosticMemoryManager(*pDevice->executionEnvironment)); auto flatBatchBufferHelper = new FlatBatchBufferHelperHw(*pDevice->executionEnvironment); aubCsr->overwriteFlatBatchBufferHelper(flatBatchBufferHelper); auto chainedBatchBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), 128u}); auto otherAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), 128u}); ASSERT_NE(nullptr, chainedBatchBuffer); GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), 128u}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 128u, chainedBatchBuffer, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; size_t sizeBatchBuffer = 0xffffu; std::unique_ptr> flatBatchBuffer( aubCsr->getFlatBatchBufferHelper().flattenBatchBuffer(aubCsr->getRootDeviceIndex(), batchBuffer, sizeBatchBuffer, DispatchMode::ImmediateDispatch), [&](GraphicsAllocation *ptr) { memoryManager->freeGraphicsMemory(ptr); }); EXPECT_NE(nullptr, flatBatchBuffer->getUnderlyingBuffer()); EXPECT_EQ(alignUp(128u + 128u, MemoryConstants::pageSize), sizeBatchBuffer); memoryManager->freeGraphicsMemory(commandBuffer); memoryManager->freeGraphicsMemory(chainedBatchBuffer); memoryManager->freeGraphicsMemory(otherAllocation); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenForcedBatchBufferInImmediateDispatchModeAndNoChainedBatchBufferThenCombinedBatchBufferIsNotCreated) { std::unique_ptr> aubCsr(new AUBCommandStreamReceiverHw("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); std::unique_ptr memoryManager(new OsAgnosticMemoryManager(*pDevice->executionEnvironment)); auto flatBatchBufferHelper = new FlatBatchBufferHelperHw(*pDevice->executionEnvironment); aubCsr->overwriteFlatBatchBufferHelper(flatBatchBufferHelper); GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 128u, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; size_t sizeBatchBuffer = 0xffffu; std::unique_ptr> flatBatchBuffer( aubCsr->getFlatBatchBufferHelper().flattenBatchBuffer(aubCsr->getRootDeviceIndex(), batchBuffer, sizeBatchBuffer, DispatchMode::ImmediateDispatch), [&](GraphicsAllocation *ptr) { memoryManager->freeGraphicsMemory(ptr); }); EXPECT_EQ(nullptr, flatBatchBuffer.get()); EXPECT_EQ(0xffffu, sizeBatchBuffer); memoryManager->freeGraphicsMemory(commandBuffer); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenForcedBatchBufferAndNotImmediateOrBatchedDispatchModeThenCombinedBatchBufferIsNotCreated) { std::unique_ptr> aubCsr(new AUBCommandStreamReceiverHw("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); std::unique_ptr memoryManager(new OsAgnosticMemoryManager(*pDevice->executionEnvironment)); auto flatBatchBufferHelper = new FlatBatchBufferHelperHw(*pDevice->executionEnvironment); aubCsr->overwriteFlatBatchBufferHelper(flatBatchBufferHelper); auto chainedBatchBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); auto otherAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, chainedBatchBuffer); GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 128u, chainedBatchBuffer, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; size_t sizeBatchBuffer = 0xffffu; std::unique_ptr> flatBatchBuffer( aubCsr->getFlatBatchBufferHelper().flattenBatchBuffer(aubCsr->getRootDeviceIndex(), batchBuffer, sizeBatchBuffer, DispatchMode::AdaptiveDispatch), [&](GraphicsAllocation *ptr) { memoryManager->freeGraphicsMemory(ptr); }); EXPECT_EQ(nullptr, flatBatchBuffer.get()); EXPECT_EQ(0xffffu, sizeBatchBuffer); memoryManager->freeGraphicsMemory(commandBuffer); memoryManager->freeGraphicsMemory(chainedBatchBuffer); memoryManager->freeGraphicsMemory(otherAllocation); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenRegisterCommandChunkIsCalledThenNewChunkIsAddedToTheList) { typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; auto aubExecutionEnvironment = getEnvironment>(false, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); LinearStream cs(aubExecutionEnvironment->commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 128u, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; aubCsr->getFlatBatchBufferHelper().registerCommandChunk(batchBuffer, sizeof(MI_BATCH_BUFFER_START)); ASSERT_EQ(1u, aubCsr->getFlatBatchBufferHelper().getCommandChunkList().size()); EXPECT_EQ(128u + sizeof(MI_BATCH_BUFFER_START), aubCsr->getFlatBatchBufferHelper().getCommandChunkList()[0].endOffset); CommandChunk chunk; chunk.endOffset = 0x123; aubCsr->getFlatBatchBufferHelper().registerCommandChunk(chunk); ASSERT_EQ(2u, aubCsr->getFlatBatchBufferHelper().getCommandChunkList().size()); EXPECT_EQ(0x123u, aubCsr->getFlatBatchBufferHelper().getCommandChunkList()[1].endOffset); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenRemovePatchInfoDataIsCalledThenElementIsRemovedFromPatchInfoList) { auto aubExecutionEnvironment = getEnvironment>(false, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); PatchInfoData patchInfoData(0xA000, 0x0, PatchInfoAllocationType::KernelArg, 0xB000, 0x0, PatchInfoAllocationType::Default); aubCsr->getFlatBatchBufferHelper().setPatchInfoData(patchInfoData); EXPECT_EQ(1u, aubCsr->getFlatBatchBufferHelper().getPatchInfoCollection().size()); EXPECT_TRUE(aubCsr->getFlatBatchBufferHelper().removePatchInfoData(0xC000)); EXPECT_EQ(1u, aubCsr->getFlatBatchBufferHelper().getPatchInfoCollection().size()); EXPECT_TRUE(aubCsr->getFlatBatchBufferHelper().removePatchInfoData(0xB000)); EXPECT_EQ(0u, aubCsr->getFlatBatchBufferHelper().getPatchInfoCollection().size()); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenAddGucStartMessageIsCalledThenBatchBufferAddressIsStoredInPatchInfoCollection) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); auto aubExecutionEnvironment = getEnvironment>(false, false, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); LinearStream cs(aubExecutionEnvironment->commandBuffer); std::unique_ptr batchBuffer(new char[1024]); aubCsr->addGUCStartMessage(static_cast(reinterpret_cast(batchBuffer.get()))); auto &patchInfoCollection = aubCsr->getFlatBatchBufferHelper().getPatchInfoCollection(); ASSERT_EQ(1u, patchInfoCollection.size()); EXPECT_EQ(patchInfoCollection[0].sourceAllocation, reinterpret_cast(batchBuffer.get())); EXPECT_EQ(patchInfoCollection[0].targetType, PatchInfoAllocationType::GUCStartMessage); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenForcedBatchBufferFlatteningInBatchedDispatchModeThenNewCombinedBatchBufferIsCreated) { DebugManagerStateRestore dbgRestore; DebugManager.flags.FlattenBatchBufferForAUBDump.set(true); DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); DebugManager.flags.CsrDispatchMode.set(static_cast(DispatchMode::BatchedDispatch)); auto aubExecutionEnvironment = getEnvironment>(false, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); auto memoryManager = aubExecutionEnvironment->executionEnvironment->memoryManager.get(); LinearStream cs(aubExecutionEnvironment->commandBuffer); CommandChunk chunk1; CommandChunk chunk2; CommandChunk chunk3; std::unique_ptr commands1(new char[0x100u]); commands1.get()[0] = 0x1; chunk1.baseAddressCpu = chunk1.baseAddressGpu = reinterpret_cast(commands1.get()); chunk1.startOffset = 0u; chunk1.endOffset = 0x50u; std::unique_ptr commands2(new char[0x100u]); commands2.get()[0] = 0x2; chunk2.baseAddressCpu = chunk2.baseAddressGpu = reinterpret_cast(commands2.get()); chunk2.startOffset = 0u; chunk2.endOffset = 0x50u; aubCsr->getFlatBatchBufferHelper().registerBatchBufferStartAddress(reinterpret_cast(commands2.get() + 0x40), reinterpret_cast(commands1.get())); std::unique_ptr commands3(new char[0x100u]); commands3.get()[0] = 0x3; chunk3.baseAddressCpu = chunk3.baseAddressGpu = reinterpret_cast(commands3.get()); chunk3.startOffset = 0u; chunk3.endOffset = 0x50u; aubCsr->getFlatBatchBufferHelper().registerBatchBufferStartAddress(reinterpret_cast(commands3.get() + 0x40), reinterpret_cast(commands2.get())); aubCsr->getFlatBatchBufferHelper().registerCommandChunk(chunk1); aubCsr->getFlatBatchBufferHelper().registerCommandChunk(chunk2); aubCsr->getFlatBatchBufferHelper().registerCommandChunk(chunk3); ASSERT_EQ(3u, aubCsr->getFlatBatchBufferHelper().getCommandChunkList().size()); PatchInfoData patchInfoData1(0xAAAu, 0xAu, PatchInfoAllocationType::IndirectObjectHeap, chunk1.baseAddressGpu, 0x10, PatchInfoAllocationType::Default); PatchInfoData patchInfoData2(0xBBBu, 0xAu, PatchInfoAllocationType::IndirectObjectHeap, chunk1.baseAddressGpu, 0x60, PatchInfoAllocationType::Default); PatchInfoData patchInfoData3(0xCCCu, 0xAu, PatchInfoAllocationType::IndirectObjectHeap, 0x0, 0x10, PatchInfoAllocationType::Default); aubCsr->getFlatBatchBufferHelper().setPatchInfoData(patchInfoData1); aubCsr->getFlatBatchBufferHelper().setPatchInfoData(patchInfoData2); aubCsr->getFlatBatchBufferHelper().setPatchInfoData(patchInfoData3); ASSERT_EQ(3u, aubCsr->getFlatBatchBufferHelper().getPatchInfoCollection().size()); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 128u, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; size_t sizeBatchBuffer = 0u; std::unique_ptr> flatBatchBuffer( aubCsr->getFlatBatchBufferHelper().flattenBatchBuffer(aubCsr->getRootDeviceIndex(), batchBuffer, sizeBatchBuffer, DispatchMode::BatchedDispatch), [&](GraphicsAllocation *ptr) { memoryManager->freeGraphicsMemory(ptr); }); EXPECT_NE(nullptr, flatBatchBuffer.get()); EXPECT_EQ(alignUp(0x50u + 0x40u + 0x40u + CSRequirements::csOverfetchSize, 0x1000u), sizeBatchBuffer); ASSERT_EQ(1u, aubCsr->getFlatBatchBufferHelper().getPatchInfoCollection().size()); EXPECT_EQ(0xAAAu, aubCsr->getFlatBatchBufferHelper().getPatchInfoCollection()[0].sourceAllocation); EXPECT_EQ(0u, aubCsr->getFlatBatchBufferHelper().getCommandChunkList().size()); EXPECT_EQ(0x3, static_cast(flatBatchBuffer->getUnderlyingBuffer())[0]); EXPECT_EQ(0x2, static_cast(flatBatchBuffer->getUnderlyingBuffer())[0x40]); EXPECT_EQ(0x1, static_cast(flatBatchBuffer->getUnderlyingBuffer())[0x40 + 0x40]); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenDefaultDebugConfigThenExpectFlattenBatchBufferIsNotCalled) { auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); LinearStream cs(aubExecutionEnvironment->commandBuffer); auto mockHelper = new MockFlatBatchBufferHelper(*aubExecutionEnvironment->executionEnvironment); aubCsr->overwriteFlatBatchBufferHelper(mockHelper); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency = {}; EXPECT_CALL(*mockHelper, flattenBatchBuffer(aubCsr->getRootDeviceIndex(), ::testing::_, ::testing::_, ::testing::_)).Times(0); aubCsr->flush(batchBuffer, allocationsForResidency); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenForcedFlattenBatchBufferAndImmediateDispatchModeThenExpectFlattenBatchBufferIsCalled) { DebugManagerStateRestore dbgRestore; DebugManager.flags.FlattenBatchBufferForAUBDump.set(true); DebugManager.flags.CsrDispatchMode.set(static_cast(DispatchMode::ImmediateDispatch)); auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); auto allocationsForResidency = aubCsr->getResidencyAllocations(); LinearStream cs(aubExecutionEnvironment->commandBuffer); auto mockHelper = new MockFlatBatchBufferHelper(*aubExecutionEnvironment->executionEnvironment); aubCsr->overwriteFlatBatchBufferHelper(mockHelper); auto chainedBatchBuffer = aubExecutionEnvironment->executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, chainedBatchBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 128u, chainedBatchBuffer, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; aubCsr->makeResident(*chainedBatchBuffer); std::unique_ptr> ptr( aubExecutionEnvironment->executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}), [&](GraphicsAllocation *ptr) { aubExecutionEnvironment->executionEnvironment->memoryManager->freeGraphicsMemory(ptr); }); auto expectedAllocation = ptr.get(); EXPECT_CALL(*mockHelper, flattenBatchBuffer(aubCsr->getRootDeviceIndex(), ::testing::_, ::testing::_, ::testing::_)).WillOnce(::testing::Return(ptr.release())); aubCsr->flush(batchBuffer, allocationsForResidency); EXPECT_EQ(batchBuffer.commandBufferAllocation, expectedAllocation); aubExecutionEnvironment->executionEnvironment->memoryManager->freeGraphicsMemory(chainedBatchBuffer); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenForcedFlattenBatchBufferAndImmediateDispatchModeAndThereIsNoChainedBatchBufferThenExpectFlattenBatchBufferIsCalledAnyway) { DebugManagerStateRestore dbgRestore; DebugManager.flags.FlattenBatchBufferForAUBDump.set(true); DebugManager.flags.CsrDispatchMode.set(static_cast(DispatchMode::ImmediateDispatch)); auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); auto allocationsForResidency = aubCsr->getResidencyAllocations(); LinearStream cs(aubExecutionEnvironment->commandBuffer); auto mockHelper = new MockFlatBatchBufferHelper(*aubExecutionEnvironment->executionEnvironment); aubCsr->overwriteFlatBatchBufferHelper(mockHelper); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 128u, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; EXPECT_CALL(*mockHelper, flattenBatchBuffer(aubCsr->getRootDeviceIndex(), ::testing::_, ::testing::_, ::testing::_)).Times(1); aubCsr->flush(batchBuffer, allocationsForResidency); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenForcedFlattenBatchBufferAndBatchedDispatchModeThenExpectFlattenBatchBufferIsCalledAnyway) { DebugManagerStateRestore dbgRestore; DebugManager.flags.FlattenBatchBufferForAUBDump.set(true); DebugManager.flags.CsrDispatchMode.set(static_cast(DispatchMode::BatchedDispatch)); auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); LinearStream cs(aubExecutionEnvironment->commandBuffer); auto mockHelper = new MockFlatBatchBufferHelper(*aubExecutionEnvironment->executionEnvironment); aubCsr->overwriteFlatBatchBufferHelper(mockHelper); ResidencyContainer allocationsForResidency; BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 128u, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; EXPECT_CALL(*mockHelper, flattenBatchBuffer(aubCsr->getRootDeviceIndex(), ::testing::_, ::testing::_, ::testing::_)).Times(1); aubCsr->flush(batchBuffer, allocationsForResidency); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenAddPatchInfoCommentsForAUBDumpIsSetThenAddPatchInfoCommentsIsCalled) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); LinearStream cs(aubExecutionEnvironment->commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency; EXPECT_CALL(*aubCsr, addPatchInfoComments()).Times(1); aubCsr->flush(batchBuffer, allocationsForResidency); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenAddPatchInfoCommentsForAUBDumpIsNotSetThenAddPatchInfoCommentsIsNotCalled) { auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); LinearStream cs(aubExecutionEnvironment->commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency; EXPECT_CALL(*aubCsr, addPatchInfoComments()).Times(0); aubCsr->flush(batchBuffer, allocationsForResidency); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenGetIndirectPatchCommandsIsCalledForEmptyPatchInfoListThenIndirectPatchCommandBufferIsNotCreated) { auto aubExecutionEnvironment = getEnvironment>(false, false, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); size_t indirectPatchCommandsSize = 0u; std::vector indirectPatchInfo; std::unique_ptr commandBuffer(aubCsr->getFlatBatchBufferHelper().getIndirectPatchCommands(indirectPatchCommandsSize, indirectPatchInfo)); EXPECT_EQ(0u, indirectPatchCommandsSize); EXPECT_EQ(0u, indirectPatchInfo.size()); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenGetIndirectPatchCommandsIsCalledForNonEmptyPatchInfoListThenIndirectPatchCommandBufferIsCreated) { typedef typename FamilyType::MI_STORE_DATA_IMM MI_STORE_DATA_IMM; std::unique_ptr> aubCsr(new AUBCommandStreamReceiverHw("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); PatchInfoData patchInfo1(0xA000, 0u, PatchInfoAllocationType::KernelArg, 0x6000, 0x100, PatchInfoAllocationType::IndirectObjectHeap); PatchInfoData patchInfo2(0xB000, 0u, PatchInfoAllocationType::KernelArg, 0x6000, 0x200, PatchInfoAllocationType::IndirectObjectHeap); PatchInfoData patchInfo3(0xC000, 0u, PatchInfoAllocationType::IndirectObjectHeap, 0x1000, 0x100, PatchInfoAllocationType::Default); PatchInfoData patchInfo4(0xC000, 0u, PatchInfoAllocationType::Default, 0x2000, 0x100, PatchInfoAllocationType::GUCStartMessage); aubCsr->getFlatBatchBufferHelper().setPatchInfoData(patchInfo1); aubCsr->getFlatBatchBufferHelper().setPatchInfoData(patchInfo2); aubCsr->getFlatBatchBufferHelper().setPatchInfoData(patchInfo3); aubCsr->getFlatBatchBufferHelper().setPatchInfoData(patchInfo4); size_t indirectPatchCommandsSize = 0u; std::vector indirectPatchInfo; std::unique_ptr commandBuffer(aubCsr->getFlatBatchBufferHelper().getIndirectPatchCommands(indirectPatchCommandsSize, indirectPatchInfo)); EXPECT_EQ(4u, indirectPatchInfo.size()); EXPECT_EQ(2u * sizeof(MI_STORE_DATA_IMM), indirectPatchCommandsSize); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenAddBatchBufferStartCalledAndBatchBUfferFlatteningEnabledThenBatchBufferStartAddressIsRegistered) { typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; DebugManagerStateRestore dbgRestore; DebugManager.flags.FlattenBatchBufferForAUBDump.set(true); std::unique_ptr> aubCsr(new AUBCommandStreamReceiverHw("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); MI_BATCH_BUFFER_START bbStart; aubCsr->addBatchBufferStart(&bbStart, 0xA000u, false); std::map &batchBufferStartAddressSequence = aubCsr->getFlatBatchBufferHelper().getBatchBufferStartAddressSequence(); ASSERT_EQ(1u, batchBufferStartAddressSequence.size()); std::pair addr = *batchBufferStartAddressSequence.begin(); EXPECT_EQ(reinterpret_cast(&bbStart), addr.first); EXPECT_EQ(0xA000u, addr.second); } class OsAgnosticMemoryManagerForImagesWithNoHostPtr : public OsAgnosticMemoryManager { public: OsAgnosticMemoryManagerForImagesWithNoHostPtr(ExecutionEnvironment &executionEnvironment) : OsAgnosticMemoryManager(executionEnvironment) {} GraphicsAllocation *allocateGraphicsMemoryForImage(const AllocationData &allocationData) override { auto imageAllocation = OsAgnosticMemoryManager::allocateGraphicsMemoryForImage(allocationData); cpuPtr = imageAllocation->getUnderlyingBuffer(); imageAllocation->setCpuPtrAndGpuAddress(nullptr, imageAllocation->getGpuAddress()); return imageAllocation; }; void freeGraphicsMemoryImpl(GraphicsAllocation *imageAllocation) override { imageAllocation->setCpuPtrAndGpuAddress(cpuPtr, imageAllocation->getGpuAddress()); OsAgnosticMemoryManager::freeGraphicsMemoryImpl(imageAllocation); }; void *lockResourceImpl(GraphicsAllocation &imageAllocation) override { lockResourceParam.wasCalled = true; lockResourceParam.inImageAllocation = &imageAllocation; lockCpuPtr = alignedMalloc(imageAllocation.getUnderlyingBufferSize(), MemoryConstants::pageSize); lockResourceParam.retCpuPtr = lockCpuPtr; return lockResourceParam.retCpuPtr; }; void unlockResourceImpl(GraphicsAllocation &imageAllocation) override { unlockResourceParam.wasCalled = true; unlockResourceParam.inImageAllocation = &imageAllocation; alignedFree(lockCpuPtr); }; struct LockResourceParam { bool wasCalled = false; GraphicsAllocation *inImageAllocation = nullptr; void *retCpuPtr = nullptr; } lockResourceParam; struct UnlockResourceParam { bool wasCalled = false; GraphicsAllocation *inImageAllocation = nullptr; } unlockResourceParam; protected: void *cpuPtr = nullptr; void *lockCpuPtr = nullptr; }; using AubCommandStreamReceiverNoHostPtrTests = ::testing::Test; HWTEST_F(AubCommandStreamReceiverNoHostPtrTests, givenAubCommandStreamReceiverWhenWriteMemoryIsCalledOnImageWithNoHostPtrThenResourceShouldBeLockedToGetCpuAddress) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); auto memoryManager = new OsAgnosticMemoryManagerForImagesWithNoHostPtr(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(); auto engineInstance = HwHelper::get(hwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*hwInfo)[0]; MockOsContext osContext(0, 1, engineInstance, PreemptionMode::Disabled, false, false, false); std::unique_ptr> aubCsr(new AUBCommandStreamReceiverHw("", true, *executionEnvironment, 0)); aubCsr->setupContext(osContext); cl_image_desc imgDesc = {}; imgDesc.image_width = 512; imgDesc.image_height = 1; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); AllocationProperties allocProperties = MemObjHelper::getAllocationPropertiesWithImageInfo(aubCsr->getRootDeviceIndex(), imgInfo, true, {}, *hwInfo); auto imageAllocation = memoryManager->allocateGraphicsMemoryInPreferredPool(allocProperties, nullptr); ASSERT_NE(nullptr, imageAllocation); EXPECT_TRUE(aubCsr->writeMemory(*imageAllocation)); EXPECT_TRUE(memoryManager->lockResourceParam.wasCalled); EXPECT_EQ(imageAllocation, memoryManager->lockResourceParam.inImageAllocation); EXPECT_NE(nullptr, memoryManager->lockResourceParam.retCpuPtr); EXPECT_TRUE(memoryManager->unlockResourceParam.wasCalled); EXPECT_EQ(imageAllocation, memoryManager->unlockResourceParam.inImageAllocation); memoryManager->freeGraphicsMemory(imageAllocation); } HWTEST_F(AubCommandStreamReceiverNoHostPtrTests, givenAubCommandStreamReceiverWhenWriteMemoryIsCalledOnLockedResourceThenResourceShouldNotBeUnlocked) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); auto memoryManager = new OsAgnosticMemoryManagerForImagesWithNoHostPtr(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); std::unique_ptr> aubCsr(new AUBCommandStreamReceiverHw("", true, *executionEnvironment, 0)); auto osContext = memoryManager->createAndRegisterOsContext(aubCsr.get(), getChosenEngineType(*defaultHwInfo), 0, PreemptionMode::Disabled, false, false, false); aubCsr->setupContext(*osContext); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{aubCsr->getRootDeviceIndex(), MemoryConstants::pageSize}); memoryManager->lockResource(gfxAllocation); EXPECT_TRUE(aubCsr->writeMemory(*gfxAllocation)); EXPECT_FALSE(memoryManager->unlockResourceParam.wasCalled); memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(AubCommandStreamReceiverTests, givenNoDbgDeviceIdFlagWhenAubCsrIsCreatedThenUseDefaultDeviceId) { std::unique_ptr> aubCsr(new MockAubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); EXPECT_EQ(pDevice->getHardwareInfo().capabilityTable.aubDeviceId, aubCsr->aubDeviceId); } HWTEST_F(AubCommandStreamReceiverTests, givenDbgDeviceIdFlagIsSetWhenAubCsrIsCreatedThenUseDebugDeviceId) { DebugManagerStateRestore stateRestore; DebugManager.flags.OverrideAubDeviceId.set(9); //this is Hsw, not used std::unique_ptr> aubCsr(new MockAubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); EXPECT_EQ(9u, aubCsr->aubDeviceId); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenGetGTTDataIsCalledThenLocalMemoryIsSetAccordingToCsrFeature) { std::unique_ptr> aubCsr(new MockAubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); AubGTTData data = {}; aubCsr->getGTTData(nullptr, data); EXPECT_TRUE(data.present); if (aubCsr->localMemoryEnabled) { EXPECT_TRUE(data.localMemory); } else { EXPECT_FALSE(data.localMemory); } } HWTEST_F(AubCommandStreamReceiverTests, givenPhysicalAddressWhenSetGttEntryIsCalledThenGttEntrysBitFieldsShouldBePopulated) { typedef typename AUBFamilyMapper::AUB AUB; AubMemDump::MiGttEntry entry = {}; uint64_t address = 0x0123456789; AubGTTData data = {true, false}; AUB::setGttEntry(entry, address, data); EXPECT_EQ(entry.pageConfig.PhysicalAddress, address / 4096); EXPECT_TRUE(entry.pageConfig.Present); EXPECT_FALSE(entry.pageConfig.LocalMemory); } HWTEST_F(AubCommandStreamReceiverTests, whenGetMemoryBankForGttIsCalledThenCorrectBankIsReturned) { std::unique_ptr> aubCsr(new MockAubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); aubCsr->localMemoryEnabled = false; aubCsr->setupContext(*pDevice->getDefaultEngine().osContext); auto bank = aubCsr->getMemoryBankForGtt(); EXPECT_EQ(MemoryBanks::MainBank, bank); } HWTEST_F(AubCommandStreamReceiverTests, givenEntryBitsPresentAndWritableWhenGetAddressSpaceFromPTEBitsIsCalledThenTraceNonLocalIsReturned) { std::unique_ptr> aubCsr(new MockAubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); auto space = aubCsr->getAddressSpaceFromPTEBits(PageTableEntry::presentBit | PageTableEntry::writableBit); EXPECT_EQ(AubMemDump::AddressSpaceValues::TraceNonlocal, space); } template struct MockAubCsrToTestExternalAllocations : public AUBCommandStreamReceiverHw { using AUBCommandStreamReceiverHw::AUBCommandStreamReceiverHw; using AUBCommandStreamReceiverHw::externalAllocations; bool writeMemory(AllocationView &allocationView) override { writeMemoryParametrization.wasCalled = true; writeMemoryParametrization.receivedAllocationView = allocationView; writeMemoryParametrization.statusToReturn = (0 != allocationView.second) ? true : false; return writeMemoryParametrization.statusToReturn; } struct WriteMemoryParametrization { bool wasCalled = false; AllocationView receivedAllocationView = {}; bool statusToReturn = false; } writeMemoryParametrization; }; HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenMakeResidentExternalIsCalledThenGivenAllocationViewShouldBeAddedToExternalAllocations) { auto aubCsr = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); size_t size = 100; auto ptr = std::make_unique(size); auto addr = reinterpret_cast(ptr.get()); AllocationView externalAllocation(addr, size); ASSERT_EQ(0u, aubCsr->externalAllocations.size()); aubCsr->makeResidentExternal(externalAllocation); EXPECT_EQ(1u, aubCsr->externalAllocations.size()); EXPECT_EQ(addr, aubCsr->externalAllocations[0].first); EXPECT_EQ(size, aubCsr->externalAllocations[0].second); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenMakeNonResidentExternalIsCalledThenMatchingAllocationViewShouldBeRemovedFromExternalAllocations) { auto aubCsr = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); size_t size = 100; auto ptr = std::make_unique(size); auto addr = reinterpret_cast(ptr.get()); AllocationView externalAllocation(addr, size); aubCsr->makeResidentExternal(externalAllocation); ASSERT_EQ(1u, aubCsr->externalAllocations.size()); aubCsr->makeNonResidentExternal(addr); EXPECT_EQ(0u, aubCsr->externalAllocations.size()); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenMakeNonResidentExternalIsCalledThenNonMatchingAllocationViewShouldNotBeRemovedFromExternalAllocations) { auto aubCsr = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); size_t size = 100; auto ptr = std::make_unique(size); auto addr = reinterpret_cast(ptr.get()); AllocationView externalAllocation(addr, size); aubCsr->makeResidentExternal(externalAllocation); ASSERT_EQ(1u, aubCsr->externalAllocations.size()); aubCsr->makeNonResidentExternal(0); EXPECT_EQ(1u, aubCsr->externalAllocations.size()); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenProcessResidencyIsCalledThenExternalAllocationsShouldBeMadeResident) { auto aubCsr = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); size_t size = 100; auto ptr = std::make_unique(size); auto addr = reinterpret_cast(ptr.get()); AllocationView externalAllocation(addr, size); aubCsr->makeResidentExternal(externalAllocation); ASSERT_EQ(1u, aubCsr->externalAllocations.size()); ResidencyContainer allocationsForResidency; aubCsr->processResidency(allocationsForResidency, 0u); EXPECT_TRUE(aubCsr->writeMemoryParametrization.wasCalled); EXPECT_EQ(addr, aubCsr->writeMemoryParametrization.receivedAllocationView.first); EXPECT_EQ(size, aubCsr->writeMemoryParametrization.receivedAllocationView.second); EXPECT_TRUE(aubCsr->writeMemoryParametrization.statusToReturn); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenProcessResidencyIsCalledThenExternalAllocationWithZeroSizeShouldNotBeMadeResident) { auto aubCsr = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); AllocationView externalAllocation(0, 0); aubCsr->makeResidentExternal(externalAllocation); ASSERT_EQ(1u, aubCsr->externalAllocations.size()); ResidencyContainer allocationsForResidency; aubCsr->processResidency(allocationsForResidency, 0u); EXPECT_TRUE(aubCsr->writeMemoryParametrization.wasCalled); EXPECT_EQ(0u, aubCsr->writeMemoryParametrization.receivedAllocationView.first); EXPECT_EQ(0u, aubCsr->writeMemoryParametrization.receivedAllocationView.second); EXPECT_FALSE(aubCsr->writeMemoryParametrization.statusToReturn); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenWriteMemoryIsCalledThenGraphicsAllocationSizeIsReadCorrectly) { pDevice->executionEnvironment->rootDeviceEnvironments[0]->aubCenter.reset(new AubCenter()); auto aubCsr = std::make_unique>("", false, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); aubCsr->setupContext(*pDevice->getDefaultEngine().osContext); std::unique_ptr memoryManager(new OsAgnosticMemoryManager(*pDevice->executionEnvironment)); PhysicalAddressAllocator allocator; struct PpgttMock : std::conditional::type { PpgttMock(PhysicalAddressAllocator *allocator) : std::conditional::type(allocator) {} void pageWalk(uintptr_t vm, size_t size, size_t offset, uint64_t entryBits, PageWalker &pageWalker, uint32_t memoryBank) override { receivedSize = size; } size_t receivedSize = 0; }; auto ppgttMock = new PpgttMock(&allocator); aubCsr->ppgtt.reset(ppgttMock); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); aubCsr->setAubWritable(true, *gfxAllocation); auto gmm = new Gmm(pDevice->getGmmClientContext(), nullptr, 1, false); gfxAllocation->setDefaultGmm(gmm); for (bool compressed : {false, true}) { gmm->isRenderCompressed = compressed; aubCsr->writeMemory(*gfxAllocation); if (compressed) { EXPECT_EQ(gfxAllocation->getDefaultGmm()->gmmResourceInfo->getSizeAllocation(), ppgttMock->receivedSize); } else { EXPECT_EQ(gfxAllocation->getUnderlyingBufferSize(), ppgttMock->receivedSize); } } memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(AubCommandStreamReceiverTests, whenAubCommandStreamReceiverIsCreatedThenPPGTTAndGGTTCreatedHavePhysicalAddressAllocatorSet) { auto aubCsr = std::make_unique>("", false, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); ASSERT_NE(nullptr, aubCsr->ppgtt.get()); ASSERT_NE(nullptr, aubCsr->ggtt.get()); uintptr_t address = 0x20000; auto physicalAddress = aubCsr->ppgtt->map(address, MemoryConstants::pageSize, 0, MemoryBanks::MainBank); EXPECT_NE(0u, physicalAddress); physicalAddress = aubCsr->ggtt->map(address, MemoryConstants::pageSize, 0, MemoryBanks::MainBank); EXPECT_NE(0u, physicalAddress); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenEngineIsInitializedThenDumpHandleIsGenerated) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); auto hwInfo = executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo(); auto engineInstance = HwHelper::get(hwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*hwInfo)[0]; MockOsContext osContext(0, 1, engineInstance, PreemptionMode::Disabled, false, false, false); executionEnvironment.initializeMemoryManager(); auto aubCsr = std::make_unique>("", true, executionEnvironment, 0); EXPECT_NE(nullptr, aubCsr); EXPECT_EQ(nullptr, aubCsr->hardwareContextController.get()); aubCsr->aubManager = nullptr; aubCsr->setupContext(osContext); aubCsr->initializeEngine(); EXPECT_NE(0u, aubCsr->handle); } using InjectMmmioTest = Test; HWTEST_F(InjectMmmioTest, givenAddMmioKeySetToZeroWhenInitAdditionalMmioCalledThenDoNotWriteMmio) { DebugManagerStateRestore stateRestore; DebugManager.flags.AubDumpAddMmioRegistersList.set(""); auto aubCsr = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); EXPECT_NE(nullptr, aubCsr); auto stream = std::make_unique(); aubCsr->stream = stream.get(); EXPECT_EQ(0u, stream->mmioList.size()); aubCsr->initAdditionalMMIO(); EXPECT_EQ(0u, stream->mmioList.size()); } HWTEST_F(InjectMmmioTest, givenAddMmioRegistersListSetWhenInitAdditionalMmioCalledThenWriteGivenMmio) { std::string registers("0xdead;0xbeef;and another very long string"); MMIOPair mmioPair(0xdead, 0xbeef); DebugManagerStateRestore stateRestore; DebugManager.flags.AubDumpAddMmioRegistersList.set(registers); auto aubCsr = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); EXPECT_NE(nullptr, aubCsr); auto stream = std::make_unique(); aubCsr->stream = stream.get(); EXPECT_EQ(0u, stream->mmioList.size()); aubCsr->initAdditionalMMIO(); EXPECT_EQ(1u, stream->mmioList.size()); EXPECT_TRUE(stream->isOnMmioList(mmioPair)); }; HWTEST_F(InjectMmmioTest, givenLongSequenceOfAddMmioRegistersListSetWhenInitAdditionalMmioCalledThenWriteGivenMmio) { std::string registers("1;1;2;2;3;3"); DebugManagerStateRestore stateRestore; DebugManager.flags.AubDumpAddMmioRegistersList.set(registers); auto aubCsr = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); EXPECT_NE(nullptr, aubCsr); auto stream = std::make_unique(); aubCsr->stream = stream.get(); EXPECT_EQ(0u, stream->mmioList.size()); aubCsr->initAdditionalMMIO(); EXPECT_EQ(3u, stream->mmioList.size()); } HWTEST_F(InjectMmmioTest, givenSequenceWithIncompletePairOfAddMmioRegistersListSetWhenInitAdditionalMmioCalledThenWriteGivenMmio) { std::string registers("0x1;0x1;0x2"); MMIOPair mmioPair0(0x1, 0x1); MMIOPair mmioPair1(0x2, 0x2); DebugManagerStateRestore stateRestore; DebugManager.flags.AubDumpAddMmioRegistersList.set(registers); auto aubCsr = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); EXPECT_NE(nullptr, aubCsr); auto stream = std::make_unique(); aubCsr->stream = stream.get(); EXPECT_EQ(0u, stream->mmioList.size()); aubCsr->initAdditionalMMIO(); EXPECT_EQ(1u, stream->mmioList.size()); EXPECT_TRUE(stream->isOnMmioList(mmioPair0)); EXPECT_FALSE(stream->isOnMmioList(mmioPair1)); } HWTEST_F(InjectMmmioTest, givenAddMmioRegistersListSetWithSemicolonAtTheEndWhenInitAdditionalMmioCalledThenWriteGivenMmio) { std::string registers("0xdead;0xbeef;"); MMIOPair mmioPair(0xdead, 0xbeef); DebugManagerStateRestore stateRestore; DebugManager.flags.AubDumpAddMmioRegistersList.set(registers); auto aubCsr = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); EXPECT_NE(nullptr, aubCsr); auto stream = std::make_unique(); aubCsr->stream = stream.get(); EXPECT_EQ(0u, stream->mmioList.size()); aubCsr->initAdditionalMMIO(); EXPECT_EQ(1u, stream->mmioList.size()); EXPECT_TRUE(stream->isOnMmioList(mmioPair)); } HWTEST_F(InjectMmmioTest, givenAddMmioRegistersListSetWithInvalidValueWhenInitAdditionalMmioCalledThenMmioIsNotWritten) { std::string registers("0xdead;invalid"); DebugManagerStateRestore stateRestore; DebugManager.flags.AubDumpAddMmioRegistersList.set(registers); auto aubCsr = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); EXPECT_NE(nullptr, aubCsr); auto stream = std::make_unique(); aubCsr->stream = stream.get(); EXPECT_EQ(0u, stream->mmioList.size()); aubCsr->initAdditionalMMIO(); EXPECT_EQ(0u, stream->mmioList.size()); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCsrWhenAskedForMemoryExpectationThenPassValidCompareOperationType) { class MyMockAubCsr : public AUBCommandStreamReceiverHw { public: using AUBCommandStreamReceiverHw::AUBCommandStreamReceiverHw; bool expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation) override { inputCompareOperation = compareOperation; return AUBCommandStreamReceiverHw::expectMemory(gfxAddress, srcAddress, length, compareOperation); } uint32_t inputCompareOperation = 0; }; void *mockAddress = reinterpret_cast(1); uint32_t compareNotEqual = AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareNotEqual; uint32_t compareEqual = AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual; auto mockStream = std::make_unique(); MyMockAubCsr myMockCsr(std::string(), true, *pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex()); myMockCsr.setupContext(pDevice->commandStreamReceivers[0]->getOsContext()); myMockCsr.stream = mockStream.get(); myMockCsr.expectMemoryNotEqual(mockAddress, mockAddress, 1); EXPECT_EQ(compareNotEqual, myMockCsr.inputCompareOperation); EXPECT_EQ(compareNotEqual, mockStream->compareOperationFromExpectMemory); myMockCsr.expectMemoryEqual(mockAddress, mockAddress, 1); EXPECT_EQ(compareEqual, myMockCsr.inputCompareOperation); EXPECT_EQ(compareEqual, mockStream->compareOperationFromExpectMemory); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenObtainingPreferredTagPoolSizeThenReturnOne) { auto aubCsr = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); EXPECT_EQ(1u, aubCsr->getPreferredTagPoolSize()); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenSshSizeIsObtainedItEqualsTo64KB) { auto aubCsr = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); EXPECT_EQ(64 * KB, aubCsr->defaultSshSize); } HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenPhysicalAddressAllocatorIsCreatedThenItIsNotNull) { MockAubCsr aubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); std::unique_ptr allocator(aubCsr.createPhysicalAddressAllocator(&hardwareInfo)); ASSERT_NE(nullptr, allocator); } HWTEST_F(AubCommandStreamReceiverTests, givenGraphicsAllocationWritableWhenDumpAllocationIsCalledAndDumpFormatIsSpecifiedThenGraphicsAllocationShouldBeDumped) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpBufferFormat.set("BIN"); MockAubCsr aubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); aubCsr.setupContext(osContext); auto mockHardwareContext = static_cast(aubCsr.hardwareContextController->hardwareContexts[0].get()); auto memoryManager = pDevice->getMemoryManager(); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}); gfxAllocation->setMemObjectsAllocationWithWritableFlags(true); EXPECT_TRUE(AubAllocDump::isWritableBuffer(*gfxAllocation)); aubCsr.dumpAllocation(*gfxAllocation); EXPECT_TRUE(mockHardwareContext->dumpSurfaceCalled); memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(AubCommandStreamReceiverTests, givenBcsEngineWhenDumpAllocationCalledThenIgnore) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpBufferFormat.set("BIN"); MockAubCsr aubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); MockOsContext osContext(0, 1, aub_stream::ENGINE_BCS, PreemptionMode::Disabled, false, false, false); aubCsr.setupContext(osContext); auto mockHardwareContext = static_cast(aubCsr.hardwareContextController->hardwareContexts[0].get()); auto memoryManager = pDevice->getMemoryManager(); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}); gfxAllocation->setMemObjectsAllocationWithWritableFlags(true); EXPECT_TRUE(AubAllocDump::isWritableBuffer(*gfxAllocation)); aubCsr.dumpAllocation(*gfxAllocation); EXPECT_FALSE(mockHardwareContext->dumpSurfaceCalled); EXPECT_TRUE(AubAllocDump::isWritableBuffer(*gfxAllocation)); memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(AubCommandStreamReceiverTests, givenCompressedGraphicsAllocationWritableWhenDumpAllocationIsCalledAndDumpFormatIsSpecifiedThenGraphicsAllocationShouldBeDumped) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpBufferFormat.set("TRE"); MockAubCenter *mockAubCenter = new MockAubCenter(defaultHwInfo.get(), false, "aubfile", CommandStreamReceiverType::CSR_AUB); mockAubCenter->aubManager = std::make_unique(); pDevice->executionEnvironment->rootDeviceEnvironments[0]->aubCenter.reset(mockAubCenter); MockAubCsr aubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); aubCsr.setupContext(osContext); auto mockHardwareContext = static_cast(aubCsr.hardwareContextController->hardwareContexts[0].get()); auto memoryManager = pDevice->getMemoryManager(); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER_COMPRESSED}); gfxAllocation->setMemObjectsAllocationWithWritableFlags(true); EXPECT_TRUE(AubAllocDump::isWritableBuffer(*gfxAllocation)); aubCsr.dumpAllocation(*gfxAllocation); EXPECT_TRUE(mockHardwareContext->dumpSurfaceCalled); memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(AubCommandStreamReceiverTests, givenGraphicsAllocationWritableWhenDumpAllocationIsCalledButDumpFormatIsNotSpecifiedThenGraphicsAllocationShouldNotBeDumped) { DebugManagerStateRestore dbgRestore; MockAubCsr aubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); aubCsr.setupContext(osContext); auto mockHardwareContext = static_cast(aubCsr.hardwareContextController->hardwareContexts[0].get()); auto memoryManager = pDevice->getMemoryManager(); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}); gfxAllocation->setMemObjectsAllocationWithWritableFlags(true); EXPECT_TRUE(AubAllocDump::isWritableBuffer(*gfxAllocation)); aubCsr.dumpAllocation(*gfxAllocation); EXPECT_FALSE(mockHardwareContext->dumpSurfaceCalled); memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(AubCommandStreamReceiverTests, givenGraphicsAllocationNonWritableWhenDumpAllocationIsCalledAndFormatIsSpecifiedThenGraphicsAllocationShouldNotBeDumped) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpBufferFormat.set("BIN"); MockAubCsr aubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); aubCsr.setupContext(osContext); auto mockHardwareContext = static_cast(aubCsr.hardwareContextController->hardwareContexts[0].get()); auto memoryManager = pDevice->getMemoryManager(); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}); gfxAllocation->setMemObjectsAllocationWithWritableFlags(false); EXPECT_FALSE(AubAllocDump::isWritableBuffer(*gfxAllocation)); aubCsr.dumpAllocation(*gfxAllocation); EXPECT_FALSE(mockHardwareContext->dumpSurfaceCalled); memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(AubCommandStreamReceiverTests, givenGraphicsAllocationNotDumpableWhenDumpAllocationIsCalledAndAUBDumpAllocsOnEnqueueReadOnlyIsSetThenGraphicsAllocationShouldNotBeDumpedAndRemainNonDumpable) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.set(true); DebugManager.flags.AUBDumpBufferFormat.set("BIN"); MockAubCsr aubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); aubCsr.setupContext(osContext); auto mockHardwareContext = static_cast(aubCsr.hardwareContextController->hardwareContexts[0].get()); auto memoryManager = pDevice->getMemoryManager(); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}); gfxAllocation->setMemObjectsAllocationWithWritableFlags(true); gfxAllocation->setAllocDumpable(false); aubCsr.dumpAllocation(*gfxAllocation); EXPECT_FALSE(gfxAllocation->isAllocDumpable()); EXPECT_FALSE(mockHardwareContext->dumpSurfaceCalled); memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(AubCommandStreamReceiverTests, givenGraphicsAllocationDumpableWhenDumpAllocationIsCalledAndAUBDumpAllocsOnEnqueueReadOnlyIsOnThenGraphicsAllocationShouldBeDumpedAndMarkedNonDumpable) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpAllocsOnEnqueueReadOnly.set(true); DebugManager.flags.AUBDumpBufferFormat.set("BIN"); MockAubCsr aubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); aubCsr.setupContext(osContext); auto mockHardwareContext = static_cast(aubCsr.hardwareContextController->hardwareContexts[0].get()); auto memoryManager = pDevice->getMemoryManager(); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}); gfxAllocation->setMemObjectsAllocationWithWritableFlags(true); gfxAllocation->setAllocDumpable(true); aubCsr.dumpAllocation(*gfxAllocation); EXPECT_FALSE(gfxAllocation->isAllocDumpable()); EXPECT_TRUE(mockHardwareContext->dumpSurfaceCalled); memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(AubCommandStreamReceiverTests, givenGraphicsAllocationWritableWhenDumpAllocationIsCalledAndDumpFormatIsSpecifiedThenPollForCompletionShouldBeCalledBeforeGraphicsAllocationIsDumped) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpBufferFormat.set("BIN"); MockAubCsr aubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); aubCsr.setupContext(osContext); aubCsr.latestSentTaskCount = 1; auto mockHardwareContext = static_cast(aubCsr.hardwareContextController->hardwareContexts[0].get()); auto memoryManager = pDevice->getMemoryManager(); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}); gfxAllocation->setMemObjectsAllocationWithWritableFlags(true); EXPECT_TRUE(AubAllocDump::isWritableBuffer(*gfxAllocation)); aubCsr.dumpAllocation(*gfxAllocation); EXPECT_TRUE(mockHardwareContext->pollForCompletionCalled); memoryManager->freeGraphicsMemory(gfxAllocation); } using SimulatedCsrTest = ::testing::Test; HWTEST_F(SimulatedCsrTest, givenAubCsrTypeWhenCreateCommandStreamReceiverThenProperAubCenterIsInitalized) { uint32_t expectedRootDeviceIndex = 10; MockExecutionEnvironment executionEnvironment; executionEnvironment.initializeMemoryManager(); executionEnvironment.prepareRootDeviceEnvironments(expectedRootDeviceIndex + 2); auto rootDeviceEnvironment = new MockRootDeviceEnvironment(executionEnvironment); executionEnvironment.rootDeviceEnvironments[expectedRootDeviceIndex].reset(rootDeviceEnvironment); rootDeviceEnvironment->setHwInfo(defaultHwInfo.get()); EXPECT_EQ(nullptr, executionEnvironment.rootDeviceEnvironments[expectedRootDeviceIndex]->aubCenter.get()); EXPECT_FALSE(rootDeviceEnvironment->initAubCenterCalled); auto csr = std::make_unique>("", true, executionEnvironment, expectedRootDeviceIndex); EXPECT_TRUE(rootDeviceEnvironment->initAubCenterCalled); EXPECT_NE(nullptr, executionEnvironment.rootDeviceEnvironments[expectedRootDeviceIndex]->aubCenter.get()); } compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/aub_file_stream_tests.cpp000066400000000000000000001375451363734646600317110ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_context.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/aub_mem_dump/page_table_entry_bits.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" #include "opencl/source/helpers/hardware_context_controller.h" #include "opencl/source/helpers/neo_driver_version.h" #include "opencl/test/unit_test/fixtures/aub_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_aub_center.h" #include "opencl/test/unit_test/mocks/mock_aub_csr.h" #include "opencl/test/unit_test/mocks/mock_aub_file_stream.h" #include "opencl/test/unit_test/mocks/mock_aub_manager.h" #include "opencl/test/unit_test/mocks/mock_aub_subcapture_manager.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_os_context.h" #include "test.h" #include "driver_version.h" #include #include using namespace NEO; using ::testing::_; using ::testing::Invoke; using ::testing::Return; using AubFileStreamTests = Test; HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenInitFileIsCalledWithInvalidFileNameThenFileIsNotOpened) { auto aubCsr = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); std::string invalidFileName = ""; EXPECT_THROW(aubCsr->initFile(invalidFileName), std::exception); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWithoutAubManagerWhenInitFileIsCalledWithInvalidFileNameThenFileIsNotOpened) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.initializeMemoryManager(); auto aubCsr = std::make_unique>("", true, executionEnvironment, 0); std::string invalidFileName = ""; aubCsr->aubManager = nullptr; EXPECT_THROW(aubCsr->initFile(invalidFileName), std::exception); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenInitFileIsCalledThenFileIsOpenedAndFileNameIsStored) { auto aubCsr = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); std::string fileName = "file_name.aub"; aubCsr->initFile(fileName); EXPECT_TRUE(aubCsr->isFileOpen()); EXPECT_STREQ(fileName.c_str(), aubCsr->getFileName().c_str()); aubCsr->closeFile(); EXPECT_FALSE(aubCsr->isFileOpen()); EXPECT_TRUE(aubCsr->getFileName().empty()); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenReopenFileIsCalledThenFileWithSpecifiedNameIsReopened) { auto aubCsr = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); std::string fileName = "file_name.aub"; std::string newFileName = "new_file_name.aub"; aubCsr->reopenFile(fileName); EXPECT_TRUE(aubCsr->isFileOpen()); EXPECT_STREQ(fileName.c_str(), aubCsr->getFileName().c_str()); aubCsr->reopenFile(newFileName); EXPECT_TRUE(aubCsr->isFileOpen()); EXPECT_STREQ(newFileName.c_str(), aubCsr->getFileName().c_str()); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWithoutAubManagerWhenInitFileIsCalledThenFileShouldBeInitializedWithHeaderOnce) { auto mockAubFileStream = std::make_unique(); auto aubCsr = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); std::string fileName = "file_name.aub"; aubCsr->aubManager = nullptr; aubCsr->stream = mockAubFileStream.get(); aubCsr->initFile(fileName); aubCsr->initFile(fileName); EXPECT_EQ(1u, mockAubFileStream->openCalledCnt); EXPECT_EQ(1u, mockAubFileStream->initCalledCnt); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWithAubManagerWhenInitFileIsCalledThenFileShouldBeInitializedOnce) { auto mockAubManager = std::make_unique(); auto aubCsr = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); std::string fileName = "file_name.aub"; aubCsr->aubManager = mockAubManager.get(); aubCsr->initFile(fileName); aubCsr->initFile(fileName); EXPECT_EQ(1u, mockAubManager->openCalledCnt); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWithoutAubManagerWhenFileFunctionsAreCalledThenTheyShouldCallTheExpectedAubManagerFunctions) { auto mockAubFileStream = std::make_unique(); auto aubCsr = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); std::string fileName = "file_name.aub"; aubCsr->aubManager = nullptr; aubCsr->stream = mockAubFileStream.get(); aubCsr->initFile(fileName); EXPECT_EQ(1u, mockAubFileStream->initCalledCnt); EXPECT_TRUE(aubCsr->isFileOpen()); EXPECT_TRUE(mockAubFileStream->isOpenCalled); EXPECT_STREQ(fileName.c_str(), aubCsr->getFileName().c_str()); EXPECT_TRUE(mockAubFileStream->getFileNameCalled); aubCsr->closeFile(); EXPECT_FALSE(aubCsr->isFileOpen()); EXPECT_TRUE(aubCsr->getFileName().empty()); EXPECT_TRUE(mockAubFileStream->closeCalled); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWithAubManagerWhenFileFunctionsAreCalledThenTheyShouldCallTheExpectedAubManagerFunctions) { auto mockAubManager = std::make_unique(); auto aubCsr = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); std::string fileName = "file_name.aub"; aubCsr->aubManager = mockAubManager.get(); aubCsr->initFile(fileName); EXPECT_EQ(1u, mockAubManager->openCalledCnt); EXPECT_TRUE(aubCsr->isFileOpen()); EXPECT_TRUE(mockAubManager->isOpenCalled); EXPECT_STREQ(fileName.c_str(), aubCsr->getFileName().c_str()); EXPECT_TRUE(mockAubManager->getFileNameCalled); aubCsr->closeFile(); EXPECT_FALSE(aubCsr->isFileOpen()); EXPECT_TRUE(aubCsr->getFileName().empty()); EXPECT_TRUE(mockAubManager->closeCalled); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenOpenFileIsCalledThenFileStreamShouldBeLocked) { auto mockAubFileStream = std::make_unique(); auto aubCsr = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); std::string fileName = "file_name.aub"; aubCsr->stream = mockAubFileStream.get(); aubCsr->openFile(fileName); EXPECT_TRUE(mockAubFileStream->lockStreamCalled); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenReopenFileIsCalledThenFileStreamShouldBeLocked) { auto mockAubFileStream = std::make_unique(); auto aubCsr = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); std::string fileName = "file_name.aub"; aubCsr->stream = mockAubFileStream.get(); aubCsr->reopenFile(fileName); EXPECT_TRUE(mockAubFileStream->lockStreamCalled); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenInitializeEngineIsCalledThenFileStreamShouldBeLocked) { auto mockAubFileStream = std::make_unique(); auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); aubCsr->stream = static_cast(mockAubFileStream.get()); aubCsr->initializeEngine(); EXPECT_TRUE(mockAubFileStream->lockStreamCalled); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenSubmitBatchBufferIsCalledThenFileStreamShouldBeLocked) { auto mockAubFileStream = std::make_unique(); auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); LinearStream cs(aubExecutionEnvironment->commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; aubCsr->stream = static_cast(mockAubFileStream.get()); auto pBatchBuffer = ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.startOffset); auto batchBufferGpuAddress = ptrOffset(batchBuffer.commandBufferAllocation->getGpuAddress(), batchBuffer.startOffset); auto currentOffset = batchBuffer.usedSize; auto sizeBatchBuffer = currentOffset - batchBuffer.startOffset; aubCsr->initializeEngine(); mockAubFileStream->lockStreamCalled = false; aubCsr->submitBatchBuffer(batchBufferGpuAddress, pBatchBuffer, sizeBatchBuffer, aubCsr->getMemoryBank(batchBuffer.commandBufferAllocation), aubCsr->getPPGTTAdditionalBits(batchBuffer.commandBufferAllocation)); EXPECT_TRUE(mockAubFileStream->lockStreamCalled); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenWriteMemoryIsCalledThenFileStreamShouldBeLocked) { auto mockAubFileStream = std::make_unique(); auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); aubCsr->stream = static_cast(mockAubFileStream.get()); MockGraphicsAllocation allocation(reinterpret_cast(0x1000), 0x1000); aubCsr->writeMemory(allocation); EXPECT_TRUE(mockAubFileStream->lockStreamCalled); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenPollForCompletionIsCalledThenFileStreamShouldBeLocked) { auto mockAubFileStream = std::make_unique(); auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); aubCsr->stream = static_cast(mockAubFileStream.get()); aubCsr->latestSentTaskCount = 1; aubCsr->pollForCompletionTaskCount = 0; aubCsr->pollForCompletion(); EXPECT_TRUE(mockAubFileStream->lockStreamCalled); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenExpectMemoryEqualIsCalledThenFileStreamShouldBeLocked) { auto mockAubFileStream = std::make_unique(); auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); aubCsr->stream = static_cast(mockAubFileStream.get()); aubCsr->expectMemoryEqual(reinterpret_cast(0x1000), reinterpret_cast(0x1000), 0x1000); EXPECT_TRUE(mockAubFileStream->lockStreamCalled); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenAddAubCommentIsCalledThenFileStreamShouldBeLocked) { auto mockAubFileStream = std::make_unique(); auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); aubCsr->stream = static_cast(mockAubFileStream.get()); aubCsr->addAubComment("comment"); EXPECT_TRUE(mockAubFileStream->lockStreamCalled); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenDumpAllocationIsCalledThenFileStreamShouldBeLocked) { auto mockAubFileStream = std::make_unique(); auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); GraphicsAllocation allocation{0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, 0, 0, 0, MemoryPool::MemoryNull}; aubCsr->stream = static_cast(mockAubFileStream.get()); aubCsr->dumpAllocation(allocation); EXPECT_TRUE(mockAubFileStream->lockStreamCalled); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenFlushIsCalledThenItShouldCallTheExpectedFunctions) { auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); LinearStream cs(aubExecutionEnvironment->commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency = {}; aubCsr->flush(batchBuffer, allocationsForResidency); EXPECT_TRUE(aubCsr->initializeEngineCalled); EXPECT_TRUE(aubCsr->writeMemoryCalled); EXPECT_TRUE(aubCsr->submitBatchBufferCalled); EXPECT_FALSE(aubCsr->pollForCompletionCalled); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenCallingAddAubCommentThenCallAddCommentOnAubFileStream) { auto aubFileStream = std::make_unique(); auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); aubCsr->stream = aubFileStream.get(); const char *comment = "message"; aubCsr->addAubComment(comment); EXPECT_TRUE(aubCsr->addAubCommentCalled); EXPECT_TRUE(aubFileStream->addCommentCalled); EXPECT_STREQ(comment, aubFileStream->receivedComment.c_str()); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWithAubManagerWhenCallingAddAubCommentThenCallAddCommentOnAubManager) { MockAubCsr aubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); aubCsr.setupContext(osContext); auto mockAubManager = static_cast(aubCsr.aubManager); ASSERT_NE(nullptr, mockAubManager); const char *comment = "message"; aubCsr.addAubComment(comment); EXPECT_TRUE(aubCsr.addAubCommentCalled); EXPECT_TRUE(mockAubManager->addCommentCalled); EXPECT_STREQ(comment, mockAubManager->receivedComment.c_str()); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenCallingInsertAubWaitInstructionThenCallPollForCompletion) { auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); ASSERT_FALSE(aubCsr->pollForCompletionCalled); aubCsr->waitForTaskCountWithKmdNotifyFallback(0, 0, false, false); EXPECT_TRUE(aubCsr->pollForCompletionCalled); } HWTEST_F(AubFileStreamTests, givenNewTaskSinceLastPollWhenCallingPollForCompletionThenCallRegisterPoll) { auto aubStream = std::make_unique(); auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); aubCsr->stream = aubStream.get(); aubCsr->latestSentTaskCount = 50; aubCsr->pollForCompletionTaskCount = 49; ASSERT_FALSE(aubStream->registerPollCalled); aubCsr->pollForCompletion(); EXPECT_TRUE(aubStream->registerPollCalled); EXPECT_EQ(50u, aubCsr->pollForCompletionTaskCount); } HWTEST_F(AubFileStreamTests, givenNoNewTasksSinceLastPollWhenCallingPollForCompletionThenDontCallRegisterPoll) { auto aubStream = std::make_unique(); auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); aubCsr->stream = aubStream.get(); aubCsr->latestSentTaskCount = 50; aubCsr->pollForCompletionTaskCount = 50; ASSERT_FALSE(aubStream->registerPollCalled); aubCsr->pollForCompletion(); EXPECT_FALSE(aubStream->registerPollCalled); EXPECT_EQ(50u, aubCsr->pollForCompletionTaskCount); } HWTEST_F(AubFileStreamTests, givenNewTaskSinceLastPollWhenDeletingAubCsrThenCallRegisterPoll) { auto aubStream = std::make_unique(); auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); aubCsr->stream = aubStream.get(); aubCsr->latestSentTaskCount = 50; aubCsr->pollForCompletionTaskCount = 49; ASSERT_FALSE(aubStream->registerPollCalled); aubExecutionEnvironment->commandStreamReceiver.reset(); EXPECT_TRUE(aubStream->registerPollCalled); } HWTEST_F(AubFileStreamTests, givenNoNewTaskSinceLastPollWhenDeletingAubCsrThenDontCallRegisterPoll) { auto aubStream = std::make_unique(); auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); aubCsr->stream = aubStream.get(); aubCsr->latestSentTaskCount = 50; aubCsr->pollForCompletionTaskCount = 50; ASSERT_FALSE(aubStream->registerPollCalled); aubExecutionEnvironment->commandStreamReceiver.reset(); EXPECT_FALSE(aubStream->registerPollCalled); } HWTEST_F(AubFileStreamTests, givenNewTasksAndHardwareContextPresentWhenCallingPollForCompletionThenCallPollForCompletion) { MockAubCsr aubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); aubCsr.setupContext(osContext); auto hardwareContext = static_cast(aubCsr.hardwareContextController->hardwareContexts[0].get()); aubCsr.latestSentTaskCount = 50; aubCsr.pollForCompletionTaskCount = 49; ASSERT_FALSE(hardwareContext->pollForCompletionCalled); aubCsr.pollForCompletion(); EXPECT_TRUE(hardwareContext->pollForCompletionCalled); } HWTEST_F(AubFileStreamTests, givenNoNewTasksAndHardwareContextPresentWhenCallingPollForCompletionThenDontCallPollForCompletion) { MockAubCsr aubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); aubCsr.setupContext(osContext); auto hardwareContext = static_cast(aubCsr.hardwareContextController->hardwareContexts[0].get()); aubCsr.latestSentTaskCount = 50; aubCsr.pollForCompletionTaskCount = 50; ASSERT_FALSE(hardwareContext->pollForCompletionCalled); aubCsr.pollForCompletion(); EXPECT_FALSE(hardwareContext->pollForCompletionCalled); } HWTEST_F(AubFileStreamTests, givenNoNewTasksSinceLastPollWhenCallingExpectMemoryThenDontCallRegisterPoll) { auto aubStream = std::make_unique(); auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); aubCsr->stream = aubStream.get(); aubCsr->latestSentTaskCount = 50; aubCsr->pollForCompletionTaskCount = 50; ASSERT_FALSE(aubStream->registerPollCalled); MockGraphicsAllocation allocation(reinterpret_cast(0x1000), 0x1000); aubCsr->expectMemoryNotEqual(reinterpret_cast(0x1000), reinterpret_cast(0x1000), 0x1000); EXPECT_FALSE(aubStream->registerPollCalled); } HWTEST_F(AubFileStreamTests, givenNewTasksSinceLastPollWhenCallingExpectMemoryThenCallRegisterPoll) { auto aubStream = std::make_unique(); auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); aubCsr->stream = aubStream.get(); aubCsr->latestSentTaskCount = 50; aubCsr->pollForCompletionTaskCount = 49; ASSERT_FALSE(aubStream->registerPollCalled); MockGraphicsAllocation allocation(reinterpret_cast(0x1000), 0x1000); aubCsr->expectMemoryNotEqual(reinterpret_cast(0x1000), reinterpret_cast(0x1000), 0x1000); EXPECT_TRUE(aubStream->registerPollCalled); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverInSubCaptureModeWhenPollForCompletionIsCalledAndSubCaptureIsEnabledThenItShouldCallRegisterPoll) { DebugManagerStateRestore stateRestore; AubSubCaptureCommon aubSubCaptureCommon; auto aubSubCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); auto aubStream = std::make_unique(); auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); aubCsr->stream = aubStream.get(); const DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); aubSubCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; aubSubCaptureManagerMock->setSubCaptureToggleActive(true); aubSubCaptureManagerMock->checkAndActivateSubCapture(multiDispatchInfo); aubCsr->subCaptureManager = std::unique_ptr(aubSubCaptureManagerMock); ASSERT_TRUE(aubCsr->subCaptureManager->isSubCaptureEnabled()); aubCsr->latestSentTaskCount = 50; aubCsr->pollForCompletionTaskCount = 49; ASSERT_FALSE(aubStream->registerPollCalled); aubCsr->pollForCompletion(); EXPECT_TRUE(aubStream->registerPollCalled); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverInSubCaptureModeWhenPollForCompletionIsCalledButSubCaptureIsDisabledThenItShouldntCallRegisterPoll) { DebugManagerStateRestore stateRestore; AubSubCaptureCommon aubSubCaptureCommon; auto aubSubCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); auto aubStream = std::make_unique(); auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); aubCsr->stream = aubStream.get(); aubSubCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; aubSubCaptureManagerMock->disableSubCapture(); aubCsr->subCaptureManager = std::unique_ptr(aubSubCaptureManagerMock); ASSERT_FALSE(aubCsr->subCaptureManager->isSubCaptureEnabled()); aubCsr->latestSentTaskCount = 50; aubCsr->pollForCompletionTaskCount = 49; ASSERT_FALSE(aubStream->registerPollCalled); aubCsr->pollForCompletion(); EXPECT_FALSE(aubStream->registerPollCalled); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWithHardwareContextInSubCaptureModeWhenPollForCompletionIsCalledAndSubCaptureIsEnabledThenItShouldCallPollForCompletionOnHwContext) { DebugManagerStateRestore stateRestore; AubSubCaptureCommon aubSubCaptureCommon; auto aubSubCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); MockAubCsr aubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); aubCsr.setupContext(osContext); auto hardwareContext = static_cast(aubCsr.hardwareContextController->hardwareContexts[0].get()); const DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); aubSubCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; aubSubCaptureManagerMock->setSubCaptureToggleActive(true); aubSubCaptureManagerMock->checkAndActivateSubCapture(multiDispatchInfo); aubCsr.subCaptureManager = std::unique_ptr(aubSubCaptureManagerMock); ASSERT_TRUE(aubCsr.subCaptureManager->isSubCaptureEnabled()); aubCsr.latestSentTaskCount = 50; aubCsr.pollForCompletionTaskCount = 49; ASSERT_FALSE(hardwareContext->pollForCompletionCalled); aubCsr.pollForCompletion(); EXPECT_TRUE(hardwareContext->pollForCompletionCalled); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWithHardwareContextInSubCaptureModeWhenPollForCompletionIsCalledButSubCaptureIsDisabledThenItShouldntCallPollForCompletionOnHwContext) { DebugManagerStateRestore stateRestore; AubSubCaptureCommon aubSubCaptureCommon; auto aubSubCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); MockAubCsr aubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); aubCsr.setupContext(osContext); auto hardwareContext = static_cast(aubCsr.hardwareContextController->hardwareContexts[0].get()); aubSubCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; aubSubCaptureManagerMock->disableSubCapture(); aubCsr.subCaptureManager = std::unique_ptr(aubSubCaptureManagerMock); ASSERT_FALSE(aubCsr.subCaptureManager->isSubCaptureEnabled()); aubCsr.latestSentTaskCount = 50; aubCsr.pollForCompletionTaskCount = 49; ASSERT_FALSE(hardwareContext->pollForCompletionCalled); aubCsr.pollForCompletion(); EXPECT_FALSE(hardwareContext->pollForCompletionCalled); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenMakeResidentIsCalledThenItShouldCallTheExpectedFunctions) { auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); MockGraphicsAllocation allocation(reinterpret_cast(0x1000), 0x1000); ResidencyContainer allocationsForResidency = {&allocation}; aubCsr->processResidency(allocationsForResidency, 0u); EXPECT_TRUE(aubCsr->writeMemoryCalled); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenExpectMemoryEqualIsCalledThenItShouldCallTheExpectedFunctions) { auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); MockGraphicsAllocation allocation(reinterpret_cast(0x1000), 0x1000); aubCsr->expectMemoryEqual(reinterpret_cast(0x1000), reinterpret_cast(0x1000), 0x1000); EXPECT_TRUE(aubCsr->expectMemoryEqualCalled); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenExpectMemoryNotEqualIsCalledThenItShouldCallTheExpectedFunctions) { auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); MockGraphicsAllocation allocation(reinterpret_cast(0x1000), 0x1000); aubCsr->expectMemoryNotEqual(reinterpret_cast(0x1000), reinterpret_cast(0x1000), 0x1000); EXPECT_TRUE(aubCsr->expectMemoryNotEqualCalled); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenFlushIsCalledThenItShouldCallTheExpectedHwContextFunctions) { MockAubCsr aubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); aubCsr.setupContext(osContext); auto mockHardwareContext = static_cast(aubCsr.hardwareContextController->hardwareContexts[0].get()); auto commandBuffer = pDevice->executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); auto tagAllocation = pDevice->executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); aubCsr.setTagAllocation(tagAllocation); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 1, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency; aubCsr.flush(batchBuffer, allocationsForResidency); EXPECT_TRUE(mockHardwareContext->initializeCalled); EXPECT_TRUE(mockHardwareContext->submitCalled); EXPECT_FALSE(mockHardwareContext->writeAndSubmitCalled); EXPECT_FALSE(mockHardwareContext->pollForCompletionCalled); EXPECT_TRUE(aubCsr.writeMemoryWithAubManagerCalled); pDevice->executionEnvironment->memoryManager->freeGraphicsMemory(commandBuffer); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenFlushIsCalledWithZeroSizedBufferThenSubmitIsNotCalledOnHwContext) { MockAubCsr aubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); aubCsr.setupContext(osContext); auto mockHardwareContext = static_cast(aubCsr.hardwareContextController->hardwareContexts[0].get()); auto commandBuffer = pDevice->executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); auto tagAllocation = pDevice->executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); aubCsr.setTagAllocation(tagAllocation); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency; aubCsr.flush(batchBuffer, allocationsForResidency); EXPECT_FALSE(mockHardwareContext->writeAndSubmitCalled); EXPECT_FALSE(mockHardwareContext->submitCalled); pDevice->executionEnvironment->memoryManager->freeGraphicsMemory(commandBuffer); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenMakeResidentIsCalledThenItShouldCallTheExpectedHwContextFunctions) { MockAubCsr aubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); aubCsr.setupContext(osContext); MockGraphicsAllocation allocation(reinterpret_cast(0x1000), 0x1000); ResidencyContainer allocationsForResidency = {&allocation}; aubCsr.processResidency(allocationsForResidency, 0u); EXPECT_TRUE(aubCsr.writeMemoryWithAubManagerCalled); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenExpectMemoryEqualIsCalledThenItShouldCallTheExpectedHwContextFunctions) { MockAubCsr aubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); aubCsr.setupContext(osContext); auto mockHardwareContext = static_cast(aubCsr.hardwareContextController->hardwareContexts[0].get()); MockGraphicsAllocation allocation(reinterpret_cast(0x1000), 0x1000); aubCsr.expectMemoryEqual(reinterpret_cast(0x1000), reinterpret_cast(0x1000), 0x1000); EXPECT_TRUE(mockHardwareContext->expectMemoryCalled); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenExpectMemoryNotEqualIsCalledThenItShouldCallTheExpectedHwContextFunctions) { MockAubCsr aubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); aubCsr.setupContext(osContext); auto mockHardwareContext = static_cast(aubCsr.hardwareContextController->hardwareContexts[0].get()); MockGraphicsAllocation allocation(reinterpret_cast(0x1000), 0x1000); aubCsr.expectMemoryNotEqual(reinterpret_cast(0x1000), reinterpret_cast(0x1000), 0x1000); EXPECT_TRUE(mockHardwareContext->expectMemoryCalled); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenFlushIsCalledThenFileStreamShouldBeFlushed) { auto mockAubFileStream = std::make_unique(); auto aubExecutionEnvironment = getEnvironment>(true, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); LinearStream cs(aubExecutionEnvironment->commandBuffer); aubCsr->stream = mockAubFileStream.get(); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency = {}; aubCsr->flush(batchBuffer, allocationsForResidency); EXPECT_TRUE(mockAubFileStream->flushCalled); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenExpectMemoryIsCalledThenPageWalkIsCallingStreamsExpectMemory) { auto mockAubFileStream = std::make_unique(); auto aubCsr = std::make_unique>("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); aubCsr->setupContext(pDevice->commandStreamReceivers[0]->getOsContext()); aubCsr->stream = mockAubFileStream.get(); uintptr_t gpuAddress = 0x30000; void *sourceAddress = reinterpret_cast(0x50000); auto physicalAddress = aubCsr->ppgtt->map(gpuAddress, MemoryConstants::pageSize, PageTableEntry::presentBit, MemoryBanks::MainBank); aubCsr->expectMemoryEqual(reinterpret_cast(gpuAddress), sourceAddress, MemoryConstants::pageSize); EXPECT_EQ(AubMemDump::AddressSpaceValues::TraceNonlocal, mockAubFileStream->addressSpaceCapturedFromExpectMemory); EXPECT_EQ(reinterpret_cast(sourceAddress), mockAubFileStream->memoryCapturedFromExpectMemory); EXPECT_EQ(physicalAddress, mockAubFileStream->physAddressCapturedFromExpectMemory); EXPECT_EQ(MemoryConstants::pageSize, mockAubFileStream->sizeCapturedFromExpectMemory); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWithoutAubManagerWhenExpectMMIOIsCalledThenTheCorrectFunctionIsCalledFromAubFileStream) { std::string fileName = "file_name.aub"; auto mockAubFileStream = std::make_unique(); auto aubCsr = std::make_unique>(fileName.c_str(), true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); aubCsr->aubManager = nullptr; aubCsr->stream = mockAubFileStream.get(); aubCsr->setupContext(pDevice->commandStreamReceivers[0]->getOsContext()); aubCsr->initFile(fileName); aubCsr->expectMMIO(5, 10); EXPECT_EQ(5u, mockAubFileStream->mmioRegisterFromExpectMMIO); EXPECT_EQ(10u, mockAubFileStream->expectedValueFromExpectMMIO); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWithAubManagerWhenExpectMMIOIsCalledThenNoFunctionIsCalledFromAubFileStream) { std::string fileName = "file_name.aub"; auto mockAubManager = std::make_unique(); auto mockAubFileStream = std::make_unique(); auto aubCsr = std::make_unique>(fileName.c_str(), true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); aubCsr->stream = mockAubFileStream.get(); aubCsr->setupContext(pDevice->commandStreamReceivers[0]->getOsContext()); aubCsr->initFile(fileName); aubCsr->expectMMIO(5, 10); EXPECT_NE(5u, mockAubFileStream->mmioRegisterFromExpectMMIO); EXPECT_NE(10u, mockAubFileStream->expectedValueFromExpectMMIO); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenInitializeEngineIsCalledThenMemTraceCommentWithDriverVersionIsPutIntoAubStream) { auto mockAubFileStream = std::make_unique(); auto aubExecutionEnvironment = getEnvironment>(false, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); aubCsr->stream = mockAubFileStream.get(); std::vector comments; EXPECT_CALL(*mockAubFileStream, addComment(_)).WillRepeatedly(::testing::Invoke([&](const char *str) -> bool { comments.push_back(std::string(str)); return true; })); aubCsr->initializeEngine(); std::string commentWithDriverVersion = "driver version: " + std::string(driverVersion); EXPECT_EQ(commentWithDriverVersion, comments[0]); } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWithAubManagerWhenInitFileIsCalledThenMemTraceCommentWithDriverVersionIsPutIntoAubStream) { auto mockAubManager = std::make_unique(); auto aubExecutionEnvironment = getEnvironment>(false, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); aubCsr->aubManager = mockAubManager.get(); std::string fileName = "file_name.aub"; aubCsr->initFile(fileName); std::string commentWithDriverVersion = "driver version: " + std::string(driverVersion); EXPECT_EQ(mockAubManager->receivedComment, commentWithDriverVersion); } HWTEST_F(AubFileStreamTests, givenAddPatchInfoCommentsCalledWhenNoPatchInfoDataObjectsThenCommentsAreEmpty) { auto mockAubFileStream = std::make_unique(); auto aubExecutionEnvironment = getEnvironment>(false, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); LinearStream cs(aubExecutionEnvironment->commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 128u, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; aubCsr->stream = mockAubFileStream.get(); std::vector comments; EXPECT_CALL(*mockAubFileStream, addComment(_)).Times(2).WillRepeatedly(::testing::Invoke([&](const char *str) -> bool { comments.push_back(std::string(str)); return true; })); bool result = aubCsr->addPatchInfoComments(); EXPECT_TRUE(result); ASSERT_EQ(2u, comments.size()); EXPECT_EQ("PatchInfoData\n", comments[0]); EXPECT_EQ("AllocationsList\n", comments[1]); } HWTEST_F(AubFileStreamTests, givenAddPatchInfoCommentsCalledWhenFirstAddCommentsFailsThenFunctionReturnsFalse) { auto mockAubFileStream = std::make_unique(); auto aubExecutionEnvironment = getEnvironment>(false, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); LinearStream cs(aubExecutionEnvironment->commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 128u, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; aubCsr->stream = mockAubFileStream.get(); EXPECT_CALL(*mockAubFileStream, addComment(_)).Times(1).WillOnce(Return(false)); bool result = aubCsr->addPatchInfoComments(); EXPECT_FALSE(result); } HWTEST_F(AubFileStreamTests, givenAddPatchInfoCommentsCalledWhenSecondAddCommentsFailsThenFunctionReturnsFalse) { auto mockAubFileStream = std::make_unique(); auto aubExecutionEnvironment = getEnvironment>(false, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); LinearStream cs(aubExecutionEnvironment->commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 128u, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; aubCsr->stream = mockAubFileStream.get(); EXPECT_CALL(*mockAubFileStream, addComment(_)).Times(2).WillOnce(Return(true)).WillOnce(Return(false)); bool result = aubCsr->addPatchInfoComments(); EXPECT_FALSE(result); } HWTEST_F(AubFileStreamTests, givenAddPatchInfoCommentsCalledWhenPatchInfoDataObjectsAddedThenCommentsAreNotEmpty) { auto mockAubFileStream = std::make_unique(); auto aubExecutionEnvironment = getEnvironment>(false, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); LinearStream cs(aubExecutionEnvironment->commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 128u, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; aubCsr->stream = mockAubFileStream.get(); PatchInfoData patchInfoData[2] = {{0xAAAAAAAA, 128u, PatchInfoAllocationType::Default, 0xBBBBBBBB, 256u, PatchInfoAllocationType::Default}, {0xBBBBBBBB, 128u, PatchInfoAllocationType::Default, 0xDDDDDDDD, 256u, PatchInfoAllocationType::Default}}; EXPECT_TRUE(aubCsr->getFlatBatchBufferHelper().setPatchInfoData(patchInfoData[0])); EXPECT_TRUE(aubCsr->getFlatBatchBufferHelper().setPatchInfoData(patchInfoData[1])); std::vector comments; EXPECT_CALL(*mockAubFileStream, addComment(_)).Times(2).WillRepeatedly(::testing::Invoke([&](const char *str) -> bool { comments.push_back(std::string(str)); return true; })); bool result = aubCsr->addPatchInfoComments(); EXPECT_TRUE(result); ASSERT_EQ(2u, comments.size()); EXPECT_EQ("PatchInfoData", comments[0].substr(0, 13)); EXPECT_EQ("AllocationsList", comments[1].substr(0, 15)); std::string line; std::istringstream input1; input1.str(comments[0]); uint32_t lineNo = 0; while (std::getline(input1, line)) { if (line.substr(0, 13) == "PatchInfoData") { continue; } std::ostringstream ss; ss << std::hex << patchInfoData[lineNo].sourceAllocation << ";" << patchInfoData[lineNo].sourceAllocationOffset << ";" << patchInfoData[lineNo].sourceType << ";"; ss << patchInfoData[lineNo].targetAllocation << ";" << patchInfoData[lineNo].targetAllocationOffset << ";" << patchInfoData[lineNo].targetType << ";"; EXPECT_EQ(ss.str(), line); lineNo++; } std::vector expectedAddresses = {"aaaaaaaa", "bbbbbbbb", "cccccccc", "dddddddd"}; lineNo = 0; std::istringstream input2; input2.str(comments[1]); while (std::getline(input2, line)) { if (line.substr(0, 15) == "AllocationsList") { continue; } bool foundAddr = false; for (auto &addr : expectedAddresses) { if (line.substr(0, 8) == addr) { foundAddr = true; break; } } EXPECT_TRUE(foundAddr); EXPECT_TRUE(line.size() > 9); lineNo++; } } HWTEST_F(AubFileStreamTests, givenAddPatchInfoCommentsCalledWhenSourceAllocationIsNullThenDoNotAddToAllocationsList) { auto mockAubFileStream = std::make_unique(); auto aubExecutionEnvironment = getEnvironment>(false, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); LinearStream cs(aubExecutionEnvironment->commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 128u, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; aubCsr->stream = mockAubFileStream.get(); PatchInfoData patchInfoData = {0x0, 0u, PatchInfoAllocationType::Default, 0xBBBBBBBB, 0u, PatchInfoAllocationType::Default}; EXPECT_TRUE(aubCsr->getFlatBatchBufferHelper().setPatchInfoData(patchInfoData)); std::vector comments; EXPECT_CALL(*mockAubFileStream, addComment(_)).Times(2).WillRepeatedly(::testing::Invoke([&](const char *str) -> bool { comments.push_back(std::string(str)); return true; })); bool result = aubCsr->addPatchInfoComments(); EXPECT_TRUE(result); ASSERT_EQ(2u, comments.size()); ASSERT_EQ("PatchInfoData", comments[0].substr(0, 13)); ASSERT_EQ("AllocationsList", comments[1].substr(0, 15)); std::string line; std::istringstream input; input.str(comments[1]); uint32_t lineNo = 0; std::vector expectedAddresses = {"bbbbbbbb"}; while (std::getline(input, line)) { if (line.substr(0, 15) == "AllocationsList") { continue; } bool foundAddr = false; for (auto &addr : expectedAddresses) { if (line.substr(0, 8) == addr) { foundAddr = true; break; } } EXPECT_TRUE(foundAddr); EXPECT_TRUE(line.size() > 9); lineNo++; } } HWTEST_F(AubFileStreamTests, givenAddPatchInfoCommentsCalledWhenTargetAllocationIsNullThenDoNotAddToAllocationsList) { auto mockAubFileStream = std::make_unique(); auto aubExecutionEnvironment = getEnvironment>(false, true, true); auto aubCsr = aubExecutionEnvironment->template getCsr>(); LinearStream cs(aubExecutionEnvironment->commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 128u, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; aubCsr->stream = mockAubFileStream.get(); PatchInfoData patchInfoData = {0xAAAAAAAA, 0u, PatchInfoAllocationType::Default, 0x0, 0u, PatchInfoAllocationType::Default}; EXPECT_TRUE(aubCsr->getFlatBatchBufferHelper().setPatchInfoData(patchInfoData)); std::vector comments; EXPECT_CALL(*mockAubFileStream, addComment(_)).Times(2).WillRepeatedly(::testing::Invoke([&](const char *str) -> bool { comments.push_back(std::string(str)); return true; })); bool result = aubCsr->addPatchInfoComments(); EXPECT_TRUE(result); ASSERT_EQ(2u, comments.size()); ASSERT_EQ("PatchInfoData", comments[0].substr(0, 13)); ASSERT_EQ("AllocationsList", comments[1].substr(0, 15)); std::string line; std::istringstream input; input.str(comments[1]); uint32_t lineNo = 0; std::vector expectedAddresses = {"aaaaaaaa"}; while (std::getline(input, line)) { if (line.substr(0, 15) == "AllocationsList") { continue; } bool foundAddr = false; for (auto &addr : expectedAddresses) { if (line.substr(0, 8) == addr) { foundAddr = true; break; } } EXPECT_TRUE(foundAddr); EXPECT_TRUE(line.size() > 9); lineNo++; } } HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenCreateFullFilePathIsCalledForMultipleDevicesThenFileNameIsExtendedWithSuffixToIndicateMultipleDevices) { DebugManagerStateRestore stateRestore; DebugManager.flags.CreateMultipleSubDevices.set(1); auto fullName = AUBCommandStreamReceiver::createFullFilePath(*defaultHwInfo, "aubfile"); EXPECT_EQ(std::string::npos, fullName.find("tx")); DebugManager.flags.CreateMultipleSubDevices.set(2); fullName = AUBCommandStreamReceiver::createFullFilePath(*defaultHwInfo, "aubfile"); EXPECT_NE(std::string::npos, fullName.find("2tx")); } compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/aub_subcapture_tests.cpp000066400000000000000000000721341363734646600315640ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/command_stream/aub_subcapture.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_aub_subcapture_manager.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" using namespace NEO; struct AubSubCaptureTest : public DeviceFixture, public ::testing::Test { void SetUp() override { DeviceFixture::SetUp(); program = std::make_unique(*pDevice->getExecutionEnvironment()); kernelInfo.name = "kernel_name"; dbgRestore = new DebugManagerStateRestore(); } void TearDown() override { DeviceFixture::TearDown(); delete dbgRestore; } std::unique_ptr program; KernelInfo kernelInfo; DebugManagerStateRestore *dbgRestore; AubSubCaptureCommon subCaptureCommon; }; TEST_F(AubSubCaptureTest, givenSubCaptureManagerWhenSubCaptureToggleCaptureOnOffIsUnspecifiedThenSubCaptureIsToggledOffByDefault) { struct AubSubCaptureManagerWithToggleActiveMock : public AubSubCaptureManager { using AubSubCaptureManager::AubSubCaptureManager; using AubSubCaptureManager::isSubCaptureToggleActive; } aubSubCaptureManagerWithToggleActiveMock("", subCaptureCommon); EXPECT_FALSE(aubSubCaptureManagerWithToggleActiveMock.isSubCaptureToggleActive()); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerWhenSubCaptureToggleFileNameIsUnspecifiedThenEmptyToggleFileNameIsReturnedByDefault) { struct AubSubCaptureManagerWithToggleFileNameMock : public AubSubCaptureManager { using AubSubCaptureManager::AubSubCaptureManager; using AubSubCaptureManager::getToggleFileName; } aubSubCaptureManagerWithToggleFileNameMock("", subCaptureCommon); EXPECT_STREQ("", aubSubCaptureManagerWithToggleFileNameMock.getToggleFileName().c_str()); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerWhenAubCaptureFileNameIsUnspecifiedThenEmptyFileNameIsReturnedByDefault) { AubSubCaptureManagerMock aubSubCaptureManager("file_name.aub", subCaptureCommon); EXPECT_STREQ("", aubSubCaptureManager.getAubCaptureFileName().c_str()); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerWhenItIsCreatedThenItIsInitializedWithDefaults) { std::string initialFileName = "initial_file_name.aub"; AubSubCaptureManagerMock aubSubCaptureManager(initialFileName, subCaptureCommon); EXPECT_EQ(AubSubCaptureCommon::SubCaptureMode::Off, subCaptureCommon.subCaptureMode); EXPECT_STREQ("", subCaptureCommon.subCaptureFilter.dumpKernelName.c_str()); EXPECT_EQ(0u, subCaptureCommon.subCaptureFilter.dumpKernelStartIdx); EXPECT_EQ(static_cast(-1), subCaptureCommon.subCaptureFilter.dumpKernelEndIdx); EXPECT_FALSE(aubSubCaptureManager.isSubCaptureMode()); EXPECT_FALSE(aubSubCaptureManager.isSubCaptureActive()); EXPECT_FALSE(aubSubCaptureManager.wasSubCaptureActiveInPreviousEnqueue()); EXPECT_EQ(0u, aubSubCaptureManager.getKernelCurrentIndex()); EXPECT_TRUE(aubSubCaptureManager.getUseToggleFileName()); EXPECT_STREQ(initialFileName.c_str(), aubSubCaptureManager.getInitialFileName().c_str()); EXPECT_STREQ("", aubSubCaptureManager.getCurrentFileName().c_str()); EXPECT_NE(nullptr, aubSubCaptureManager.getSettingsReader()); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerWhenCheckAndActivateSubCaptureIsCalledWithEmptyDispatchInfoThenSubCaptureIsInactive) { AubSubCaptureManagerMock aubSubCaptureManager("", subCaptureCommon); MultiDispatchInfo dispatchInfo; auto status = aubSubCaptureManager.checkAndActivateSubCapture(dispatchInfo); EXPECT_FALSE(status.isActive); EXPECT_FALSE(status.wasActiveInPreviousEnqueue); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerWhenCheckAndActivateSubCaptureIsCalledWithNonEmptyDispatchInfoThenKernelCurrentIndexIsIncremented) { AubSubCaptureManagerMock aubSubCaptureManager("", subCaptureCommon); DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); uint32_t kernelCurrentIndex = aubSubCaptureManager.getKernelCurrentIndex(); ASSERT_EQ(0u, kernelCurrentIndex); aubSubCaptureManager.checkAndActivateSubCapture(multiDispatchInfo); EXPECT_EQ(kernelCurrentIndex + 0, aubSubCaptureManager.getKernelCurrentIndex()); aubSubCaptureManager.checkAndActivateSubCapture(multiDispatchInfo); EXPECT_EQ(kernelCurrentIndex + 1, aubSubCaptureManager.getKernelCurrentIndex()); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerWhenCheckAndActivateSubCaptureIsCalledButSubCaptureModeIsOffThenSubCaptureIsInactive) { AubSubCaptureManagerMock aubSubCaptureManager("", subCaptureCommon); DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Off; auto status = aubSubCaptureManager.checkAndActivateSubCapture(multiDispatchInfo); EXPECT_FALSE(status.isActive); EXPECT_FALSE(status.wasActiveInPreviousEnqueue); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerInToggleModeWhenCheckAndActivateSubCaptureIsCalledAndSubCaptureIsToggledOnThenSubCaptureGetsAndRemainsActivated) { AubSubCaptureManagerMock aubSubCaptureManager("", subCaptureCommon); DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; aubSubCaptureManager.setSubCaptureToggleActive(true); auto status = aubSubCaptureManager.checkAndActivateSubCapture(multiDispatchInfo); EXPECT_TRUE(status.isActive); EXPECT_FALSE(status.wasActiveInPreviousEnqueue); status = aubSubCaptureManager.checkAndActivateSubCapture(multiDispatchInfo); EXPECT_TRUE(status.isActive); EXPECT_TRUE(status.wasActiveInPreviousEnqueue); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerInToggleModeWhenCheckAndActivateSubCaptureIsCalledButSubCaptureIsToggledOffThenSubCaptureRemainsDeactivated) { AubSubCaptureManagerMock aubSubCaptureManager("", subCaptureCommon); DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; aubSubCaptureManager.setSubCaptureToggleActive(false); auto status = aubSubCaptureManager.checkAndActivateSubCapture(multiDispatchInfo); EXPECT_FALSE(status.isActive); EXPECT_FALSE(status.wasActiveInPreviousEnqueue); status = aubSubCaptureManager.checkAndActivateSubCapture(multiDispatchInfo); EXPECT_FALSE(status.isActive); EXPECT_FALSE(status.wasActiveInPreviousEnqueue); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerInToggleModeWhenCheckAndActivateSubCaptureIsCalledButSubCaptureIsToggledOnAndOffThenSubCaptureGetsActivatedAndDeactivated) { AubSubCaptureManagerMock aubSubCaptureManager("", subCaptureCommon); DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; aubSubCaptureManager.setSubCaptureToggleActive(true); auto status = aubSubCaptureManager.checkAndActivateSubCapture(multiDispatchInfo); EXPECT_TRUE(status.isActive); EXPECT_FALSE(status.wasActiveInPreviousEnqueue); aubSubCaptureManager.setSubCaptureToggleActive(false); status = aubSubCaptureManager.checkAndActivateSubCapture(multiDispatchInfo); EXPECT_FALSE(status.isActive); EXPECT_TRUE(status.wasActiveInPreviousEnqueue); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenCheckAndActivateSubCaptureIsCalledAndSubCaptureFilterIsDefaultThenSubCaptureIsActive) { AubSubCaptureManagerMock aubSubCaptureManager("", subCaptureCommon); DispatchInfo dispatchInfo; MockKernel kernel(program.get(), kernelInfo, *pClDevice); dispatchInfo.setKernel(&kernel); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter; auto status = aubSubCaptureManager.checkAndActivateSubCapture(multiDispatchInfo); EXPECT_TRUE(status.isActive); EXPECT_TRUE(aubSubCaptureManager.isSubCaptureActive()); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenCheckAndActivateSubCaptureIsCalledAndSubCaptureFilterWithValidKernelStartIndexIsSpecifiedThenSubCaptureGetsActivated) { AubSubCaptureManagerMock aubSubCaptureManager("", subCaptureCommon); DispatchInfo dispatchInfo; MockKernel kernel(program.get(), kernelInfo, *pClDevice); dispatchInfo.setKernel(&kernel); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter; subCaptureCommon.subCaptureFilter.dumpKernelStartIdx = 0; auto status = aubSubCaptureManager.checkAndActivateSubCapture(multiDispatchInfo); EXPECT_TRUE(status.isActive); EXPECT_FALSE(status.wasActiveInPreviousEnqueue); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenCheckAndActivateSubCaptureIsCalledAndSubCaptureFilterWithInvalidKernelStartIndexIsSpecifiedThenSubCaptureRemainsDeactivated) { AubSubCaptureManagerMock aubSubCaptureManager("", subCaptureCommon); DispatchInfo dispatchInfo; MockKernel kernel(program.get(), kernelInfo, *pClDevice); dispatchInfo.setKernel(&kernel); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter; subCaptureCommon.subCaptureFilter.dumpKernelStartIdx = 1; auto status = aubSubCaptureManager.checkAndActivateSubCapture(multiDispatchInfo); EXPECT_FALSE(status.isActive); EXPECT_FALSE(status.wasActiveInPreviousEnqueue); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenCheckAndActivateSubCaptureIsCalledAndSubCaptureFilterWithInvalidKernelEndIndexIsSpecifiedThenSubCaptureRemainsDeactivated) { AubSubCaptureManagerMock aubSubCaptureManager("", subCaptureCommon); DispatchInfo dispatchInfo; MockKernel kernel(program.get(), kernelInfo, *pClDevice); dispatchInfo.setKernel(&kernel); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter; subCaptureCommon.subCaptureFilter.dumpKernelEndIdx = 0; subCaptureCommon.getKernelCurrentIndexAndIncrement(); auto status = aubSubCaptureManager.checkAndActivateSubCapture(multiDispatchInfo); EXPECT_FALSE(status.isActive); EXPECT_FALSE(status.wasActiveInPreviousEnqueue); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenCheckAndActivateSubCaptureIsCalledAndSubCaptureFilterWithValidKernelNameIsSpecifiedThenSubCaptureGetsActivated) { AubSubCaptureManagerMock aubSubCaptureManager("", subCaptureCommon); DispatchInfo dispatchInfo; MockKernel kernel(program.get(), kernelInfo, *pClDevice); dispatchInfo.setKernel(&kernel); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter; subCaptureCommon.subCaptureFilter.dumpKernelName = "kernel_name"; auto status = aubSubCaptureManager.checkAndActivateSubCapture(multiDispatchInfo); EXPECT_TRUE(status.isActive); EXPECT_FALSE(status.wasActiveInPreviousEnqueue); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenCheckAndActivateSubCaptureIsCalledAndSubCaptureFilterWithInvalidKernelNameIsSpecifiedThenSubCaptureRemainsDeactivated) { AubSubCaptureManagerMock aubSubCaptureManager("", subCaptureCommon); DispatchInfo dispatchInfo; MockKernel kernel(program.get(), kernelInfo, *pClDevice); dispatchInfo.setKernel(&kernel); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter; subCaptureCommon.subCaptureFilter.dumpKernelName = "invalid_kernel_name"; auto status = aubSubCaptureManager.checkAndActivateSubCapture(multiDispatchInfo); EXPECT_FALSE(status.isActive); EXPECT_FALSE(status.wasActiveInPreviousEnqueue); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerWhenDeactivateSubCaptureIsCalledThenSubCaptureActiveStatesAreCleared) { AubSubCaptureManagerMock aubSubCaptureManager("", subCaptureCommon); aubSubCaptureManager.setSubCaptureIsActive(true); aubSubCaptureManager.setSubCaptureWasActiveInPreviousEnqueue(true); aubSubCaptureManager.disableSubCapture(); EXPECT_FALSE(aubSubCaptureManager.isSubCaptureActive()); EXPECT_FALSE(aubSubCaptureManager.wasSubCaptureActiveInPreviousEnqueue()); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerWhenSubCaptureActiveStatesAreDeterminedThenIsSubCaptureFunctionReturnCorrectValues) { AubSubCaptureManagerMock aubSubCaptureManager("", subCaptureCommon); aubSubCaptureManager.setSubCaptureWasActiveInPreviousEnqueue(false); aubSubCaptureManager.setSubCaptureIsActive(false); EXPECT_FALSE(aubSubCaptureManager.isSubCaptureEnabled()); aubSubCaptureManager.setSubCaptureWasActiveInPreviousEnqueue(false); aubSubCaptureManager.setSubCaptureIsActive(true); EXPECT_TRUE(aubSubCaptureManager.isSubCaptureEnabled()); aubSubCaptureManager.setSubCaptureWasActiveInPreviousEnqueue(true); aubSubCaptureManager.setSubCaptureIsActive(false); EXPECT_TRUE(aubSubCaptureManager.isSubCaptureEnabled()); aubSubCaptureManager.setSubCaptureIsActive(true); aubSubCaptureManager.setSubCaptureWasActiveInPreviousEnqueue(true); EXPECT_TRUE(aubSubCaptureManager.isSubCaptureEnabled()); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerWhenSubCaptureActiveStatesAreDeterminedThenGetSubCaptureStatusReturnsCorrectValues) { AubSubCaptureManagerMock aubSubCaptureManager("", subCaptureCommon); AubSubCaptureStatus aubSubCaptureStatus{}; aubSubCaptureManager.setSubCaptureWasActiveInPreviousEnqueue(false); aubSubCaptureManager.setSubCaptureIsActive(false); aubSubCaptureStatus = aubSubCaptureManager.getSubCaptureStatus(); EXPECT_FALSE(aubSubCaptureStatus.wasActiveInPreviousEnqueue); EXPECT_FALSE(aubSubCaptureStatus.isActive); aubSubCaptureManager.setSubCaptureWasActiveInPreviousEnqueue(false); aubSubCaptureManager.setSubCaptureIsActive(true); aubSubCaptureStatus = aubSubCaptureManager.getSubCaptureStatus(); EXPECT_FALSE(aubSubCaptureStatus.wasActiveInPreviousEnqueue); EXPECT_TRUE(aubSubCaptureStatus.isActive); aubSubCaptureManager.setSubCaptureWasActiveInPreviousEnqueue(true); aubSubCaptureManager.setSubCaptureIsActive(false); aubSubCaptureStatus = aubSubCaptureManager.getSubCaptureStatus(); EXPECT_TRUE(aubSubCaptureStatus.wasActiveInPreviousEnqueue); EXPECT_FALSE(aubSubCaptureStatus.isActive); aubSubCaptureManager.setSubCaptureIsActive(true); aubSubCaptureManager.setSubCaptureWasActiveInPreviousEnqueue(true); aubSubCaptureStatus = aubSubCaptureManager.getSubCaptureStatus(); EXPECT_TRUE(aubSubCaptureStatus.wasActiveInPreviousEnqueue); EXPECT_TRUE(aubSubCaptureStatus.isActive); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerWhenGetSubCaptureFileNameIsCalledAndAubCaptureFileNameIsSpecifiedThenItReturnsTheSpecifiedFileName) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpCaptureFileName.set("aubcapture_file_name.aub"); AubSubCaptureManagerMock aubSubCaptureManager("", subCaptureCommon); MultiDispatchInfo multiDispatchInfo; EXPECT_STREQ("aubcapture_file_name.aub", aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo).c_str()); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerInOffModeWhenGetSubCaptureFileNameIsCalledThenItReturnsEmptyFileName) { AubSubCaptureManagerMock aubSubCaptureManager("", subCaptureCommon); DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); EXPECT_STREQ("", aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo).c_str()); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenGetSubCaptureFileNameIsCalledAndToggleFileNameIsSpecifiedThenItReturnsItsName) { AubSubCaptureManagerMock aubSubCaptureManager("", subCaptureCommon); DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); std::string toggleFileName = "toggle_file_name.aub"; aubSubCaptureManager.setToggleFileName(toggleFileName); subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter; EXPECT_STREQ(toggleFileName.c_str(), aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo).c_str()); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerInToggleModeWhenGetSubCaptureFileNameIsCalledAndToggleFileNameIsSpecifiedThenItReturnsItsName) { AubSubCaptureManagerMock aubSubCaptureManager("", subCaptureCommon); DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); std::string toggleFileName = "toggle_file_name.aub"; aubSubCaptureManager.setToggleFileName(toggleFileName); subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; EXPECT_STREQ(toggleFileName.c_str(), aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo).c_str()); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerInToggleModeWhenGetSubCaptureFileNameIsCalledAndBothToggleAndAubCaptureFileNamesAreSpecifiedThenToggleNameTakesPrecedence) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpCaptureFileName.set("aubcapture_file_name.aub"); AubSubCaptureManagerMock aubSubCaptureManager("", subCaptureCommon); MultiDispatchInfo multiDispatchInfo; std::string toggleFileName = "toggle_file_name.aub"; aubSubCaptureManager.setToggleFileName(toggleFileName); subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; EXPECT_STREQ(toggleFileName.c_str(), aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo).c_str()); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenGetSubCaptureFileNameIsCalledAndToggleFileNameIsNotSpecifiedThenItGeneratesFilterFileName) { AubSubCaptureManagerMock aubSubCaptureManager("aubfile.aub", subCaptureCommon); DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); aubSubCaptureManager.setToggleFileName(""); subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter; std::string filterFileName = aubSubCaptureManager.generateFilterFileName(); EXPECT_STREQ(filterFileName.c_str(), aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo).c_str()); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerInToggleModeWhenGetSubCaptureFileNameIsCalledAndToggleFileNameIsNotSpecifiedThenItGeneratesToggleFileName) { AubSubCaptureManagerMock aubSubCaptureManager("aubfile.aub", subCaptureCommon); DispatchInfo dispatchInfo; MockKernel kernel(program.get(), kernelInfo, *pClDevice); dispatchInfo.setKernel(&kernel); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); aubSubCaptureManager.setToggleFileName(""); subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; std::string toggleFileName = aubSubCaptureManager.generateToggleFileName(multiDispatchInfo); EXPECT_STREQ(toggleFileName.c_str(), aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo).c_str()); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerInToggleModeWhenGetSubCaptureFileNameIsCalledForEmptyDispatchInfoThenGenerateToggleFileNameWithoutKernelName) { AubSubCaptureManagerMock aubSubCaptureManager("aubfile.aub", subCaptureCommon); MultiDispatchInfo dispatchInfo; subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; auto toggleFileName = aubSubCaptureManager.generateToggleFileName(dispatchInfo); EXPECT_STREQ(toggleFileName.c_str(), aubSubCaptureManager.getSubCaptureFileName(dispatchInfo).c_str()); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenGetSubCaptureFileNameIsCalledManyTimesAndToggleFileNameIsNotSpecifiedThenItGeneratesFilterFileNameOnceOnly) { struct AubSubCaptureManagerMockWithFilterFileNameGenerationCount : AubSubCaptureManager { using AubSubCaptureManager::AubSubCaptureManager; std::string generateFilterFileName() const override { generateFilterFileNameCount++; return "aubfile_filter.aub"; } mutable uint32_t generateFilterFileNameCount = 0; } aubSubCaptureManager("", subCaptureCommon); DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter; aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo); aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo); aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo); EXPECT_EQ(1u, aubSubCaptureManager.generateFilterFileNameCount); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerInToggleModeWhenGetSubCaptureFileNameIsCalledManyTimesAndToggleFileNameIsNotSpecifiedThenItGeneratesToggleFileNameOnceOnly) { struct AubSubCaptureManagerMockWithToggleFileNameGenerationCount : AubSubCaptureManager { using AubSubCaptureManager::AubSubCaptureManager; std::string generateToggleFileName(const MultiDispatchInfo &dispatchInfo) const override { generateToggleFileNameCount++; return "aubfile_toggle.aub"; } mutable uint32_t generateToggleFileNameCount = 0; } aubSubCaptureManager("", subCaptureCommon); DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo); aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo); aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo); EXPECT_EQ(1u, aubSubCaptureManager.generateToggleFileNameCount); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenGenerateFilterFileNameIsCalledThenItGeneratesFileNameWithStartAndEndIndexes) { AubSubCaptureManagerMock aubSubCaptureManager("aubfile.aub", subCaptureCommon); subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter; subCaptureCommon.subCaptureFilter.dumpKernelStartIdx = 123; subCaptureCommon.subCaptureFilter.dumpKernelEndIdx = 456; std::string filterFileName = aubSubCaptureManager.generateFilterFileName(); EXPECT_NE(std::string::npos, filterFileName.find("from_123_to_456")); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenGenerateFilterFileNameIsCalledAndKernelNameIsSpecifiedInFilterThenItGeneratesFileNameWithNameOfKernel) { AubSubCaptureManagerMock aubSubCaptureManager("aubfile.aub", subCaptureCommon); std::string kernelName = "kernel_name"; subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter; subCaptureCommon.subCaptureFilter.dumpKernelName = kernelName; std::string filterFileName = aubSubCaptureManager.generateFilterFileName(); EXPECT_NE(std::string::npos, filterFileName.find(kernelName)); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenGenerateFilterFileNameIsCalledAndKernelNameIsSpecifiedInFilterThenItGeneratesFileNameWithStartAndEndIndexesOfKernel) { AubSubCaptureManagerMock aubSubCaptureManager("aubfile.aub", subCaptureCommon); std::string kernelName = "kernel_name"; subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter; subCaptureCommon.subCaptureFilter.dumpKernelName = kernelName; subCaptureCommon.subCaptureFilter.dumpNamedKernelStartIdx = 12; subCaptureCommon.subCaptureFilter.dumpNamedKernelEndIdx = 17; std::string filterFileName = aubSubCaptureManager.generateFilterFileName(); EXPECT_NE(std::string::npos, filterFileName.find("from_12_to_17")); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerInToggleModeWhenGenerateToggleFileNameIsCalledThenItGeneratesFileNameWithKernelCurrentIndex) { AubSubCaptureManagerMock aubSubCaptureManager("aubfile.aub", subCaptureCommon); std::string kernelCurrentIndex = "from_" + std::to_string(aubSubCaptureManager.getKernelCurrentIndex()); MultiDispatchInfo dispatchInfo; subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; std::string filterFileName = aubSubCaptureManager.generateToggleFileName(dispatchInfo); EXPECT_NE(std::string::npos, filterFileName.find(kernelCurrentIndex)); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerInToggleModeWhenGenerateToggleFileNameIsCalledAndDispatchInfoIsEmptyThenItGeneratesFileNameWithoutNameOfKernel) { AubSubCaptureManagerMock aubSubCaptureManager("aubfile.aub", subCaptureCommon); std::string kernelName = "kernel_name"; MultiDispatchInfo dispatchInfo; subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; subCaptureCommon.subCaptureFilter.dumpKernelName = kernelName; std::string toggleFileName = aubSubCaptureManager.generateToggleFileName(dispatchInfo); EXPECT_EQ(std::string::npos, toggleFileName.find(kernelName)); } TEST_F(AubSubCaptureTest, givenMultiDispatchInfoWithMultipleKernelsWhenGenerateToggleFileNameThenPickMainKernel) { AubSubCaptureManagerMock aubSubCaptureManager("aubfile.aub", subCaptureCommon); KernelInfo mainKernelInfo = {}; mainKernelInfo.name = "main_kernel"; MockKernel mainKernel(program.get(), mainKernelInfo, *pClDevice); MockKernel kernel1(program.get(), kernelInfo, *pClDevice); MockKernel kernel2(program.get(), kernelInfo, *pClDevice); DispatchInfo mainDispatchInfo(&mainKernel, 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}); DispatchInfo dispatchInfo1(&kernel1, 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}); DispatchInfo dispatchInfo2(&kernel2, 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}); MultiDispatchInfo multiDispatchInfo(&mainKernel); multiDispatchInfo.push(dispatchInfo1); multiDispatchInfo.push(mainDispatchInfo); multiDispatchInfo.push(dispatchInfo2); aubSubCaptureManager.setToggleFileName(""); subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; std::string toggleFileName = aubSubCaptureManager.generateToggleFileName(multiDispatchInfo); EXPECT_NE(std::string::npos, toggleFileName.find(mainKernelInfo.name)); EXPECT_STREQ(toggleFileName.c_str(), aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo).c_str()); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenKernelNameIsSpecifiedThenNamedKernelIndexesShouldApplyToTheSpecifiedKernel) { AubSubCaptureManagerMock aubSubCaptureManager("aubfile.aub", subCaptureCommon); std::string kernelName = "kernel_name"; subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter; subCaptureCommon.subCaptureFilter.dumpNamedKernelStartIdx = 1; subCaptureCommon.subCaptureFilter.dumpNamedKernelEndIdx = 1; subCaptureCommon.subCaptureFilter.dumpKernelName = kernelName; DispatchInfo dispatchInfo; MockKernel kernel(program.get(), kernelInfo, *pClDevice); dispatchInfo.setKernel(&kernel); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); subCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter; auto status = aubSubCaptureManager.checkAndActivateSubCapture(multiDispatchInfo); EXPECT_FALSE(status.isActive); EXPECT_FALSE(aubSubCaptureManager.isSubCaptureActive()); status = aubSubCaptureManager.checkAndActivateSubCapture(multiDispatchInfo); EXPECT_TRUE(status.isActive); EXPECT_TRUE(aubSubCaptureManager.isSubCaptureActive()); status = aubSubCaptureManager.checkAndActivateSubCapture(multiDispatchInfo); EXPECT_FALSE(status.isActive); EXPECT_FALSE(aubSubCaptureManager.isSubCaptureActive()); } TEST_F(AubSubCaptureTest, givenSubCaptureManagerWhenPublicInterfaceIsCalledThenLockShouldBeAcquired) { AubSubCaptureManagerMock aubSubCaptureManager("", subCaptureCommon); DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); aubSubCaptureManager.isLocked = false; aubSubCaptureManager.isSubCaptureEnabled(); EXPECT_TRUE(aubSubCaptureManager.isLocked); aubSubCaptureManager.isLocked = false; aubSubCaptureManager.disableSubCapture(); EXPECT_TRUE(aubSubCaptureManager.isLocked); aubSubCaptureManager.isLocked = false; aubSubCaptureManager.checkAndActivateSubCapture(multiDispatchInfo); EXPECT_TRUE(aubSubCaptureManager.isLocked); aubSubCaptureManager.isLocked = false; aubSubCaptureManager.getSubCaptureStatus(); EXPECT_TRUE(aubSubCaptureManager.isLocked); aubSubCaptureManager.isLocked = false; aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo); EXPECT_TRUE(aubSubCaptureManager.isLocked); } compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/cmd_parse_tests.cpp000066400000000000000000000034401363734646600305070ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/gen_common/gen_cmd_parse.h" #include "test.h" using namespace NEO; struct CommandParse : public DeviceFixture, public ::testing::Test { void SetUp() override { DeviceFixture::SetUp(); } void TearDown() override { DeviceFixture::TearDown(); } }; HWTEST_F(CommandParse, WhenGeneratingCommandBufferThenIsNotNull) { typedef typename FamilyType::PARSE PARSE; GenCmdList cmds; EXPECT_FALSE(PARSE::parseCommandBuffer(cmds, nullptr, sizeof(void *))); } HWTEST_F(CommandParse, WhenGeneratingCommandBufferThenDoesNotContainGarbage) { typedef typename FamilyType::PARSE PARSE; uint32_t buffer[30] = {0xbaadf00d}; GenCmdList cmds; EXPECT_FALSE(PARSE::parseCommandBuffer(cmds, buffer, sizeof(uint32_t))); } HWTEST_F(CommandParse, GivenGarbageWhenGeneratingCommandBufferThenLengthIsZero) { typedef typename FamilyType::PARSE PARSE; uint32_t buffer[30] = {0xbaadf00d}; EXPECT_EQ(0u, PARSE::getCommandLength(buffer)); } HWTEST_F(CommandParse, WhenGeneratingCommandBufferThenBufferIsCorrect) { typedef typename FamilyType::PARSE PARSE; typedef typename FamilyType::WALKER_TYPE WALKER_TYPE; GenCmdList cmds; WALKER_TYPE buffer = FamilyType::cmdInitGpgpuWalker; EXPECT_TRUE(PARSE::parseCommandBuffer(cmds, &buffer, 0)); EXPECT_FALSE(PARSE::parseCommandBuffer(cmds, &buffer, 1)); EXPECT_FALSE(PARSE::parseCommandBuffer(cmds, &buffer, 2)); EXPECT_FALSE(PARSE::parseCommandBuffer(cmds, &buffer, 3)); EXPECT_FALSE(PARSE::parseCommandBuffer(cmds, &buffer, 4)); EXPECT_TRUE(PARSE::parseCommandBuffer(cmds, &buffer, sizeof(buffer))); } compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/command_stream_fixture.h000066400000000000000000000007271363734646600315410ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/command_queue/command_queue.h" #include namespace NEO { struct CommandStreamFixture { void SetUp(CommandQueue *pCmdQ) { pCS = &pCmdQ->getCS(1024); pCmdBuffer = pCS->getCpuBase(); } virtual void TearDown() { } LinearStream *pCS = nullptr; void *pCmdBuffer = nullptr; }; } // namespace NEO command_stream_receiver_flush_task_1_tests.cpp000066400000000000000000001354571363734646600360310ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/helpers/ult_hw_helper.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/helpers/dispatch_flags_helper.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_submissions_aggregator.h" #include "test.h" using namespace NEO; typedef UltCommandStreamReceiverTest CommandStreamReceiverFlushTaskTests; HWTEST_F(CommandStreamReceiverFlushTaskTests, shouldSeeCommandsOnFirstFlush) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenForceCsrReprogrammingDebugVariableSetWhenFlushingThenInitProgrammingFlagsShouldBeCalled) { DebugManagerStateRestore restore; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); DebugManager.flags.ForceCsrReprogramming.set(true); flushTask(commandStreamReceiver); EXPECT_TRUE(commandStreamReceiver.initProgrammingFlagsCalled); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenForceCsrFlushingDebugVariableSetWhenFlushingThenFlushBatchedSubmissionsShouldBeCalled) { DebugManagerStateRestore restore; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); DebugManager.flags.ForceCsrFlushing.set(true); flushTask(commandStreamReceiver); EXPECT_TRUE(commandStreamReceiver.flushBatchedSubmissionsCalled); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenOverrideThreadArbitrationPolicyDebugVariableSetWhenFlushingThenRequestRequiredMode) { DebugManagerStateRestore restore; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.requiredThreadArbitrationPolicy = ThreadArbitrationPolicy::AgeBased; commandStreamReceiver.lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::AgeBased; DebugManager.flags.OverrideThreadArbitrationPolicy.set(ThreadArbitrationPolicy::RoundRobin); flushTask(commandStreamReceiver); EXPECT_EQ(ThreadArbitrationPolicy::RoundRobin, commandStreamReceiver.lastSentThreadArbitrationPolicy); } HWTEST_F(CommandStreamReceiverFlushTaskTests, taskCountShouldBeUpdated) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenconfigureCSRtoNonDirtyStateWhenFlushTaskIsCalledThenNoCommandsAreAdded) { configureCSRtoNonDirtyState(); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); EXPECT_EQ(0u, commandStreamReceiver.commandStream.getUsed()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenMultiOsContextCommandStreamReceiverWhenFlushTaskIsCalledThenCommandStreamReceiverStreamIsUsed) { configureCSRtoNonDirtyState(); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.multiOsContextCapable = true; commandStream.getSpace(4); flushTask(commandStreamReceiver); EXPECT_EQ(MemoryConstants::cacheLineSize, commandStreamReceiver.commandStream.getUsed()); auto batchBufferStart = genCmdCast(commandStreamReceiver.commandStream.getCpuBase()); EXPECT_NE(nullptr, batchBufferStart); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenTaskIsSubmittedViaCsrThenBbEndCoversPaddingEnoughToFitMiBatchBufferStart) { auto &mockCsr = pDevice->getUltCommandStreamReceiver(); mockCsr.overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr.timestampPacketWriteEnabled = false; configureCSRtoNonDirtyState(); mockCsr.getCS(1024u); auto &csrCommandStream = mockCsr.commandStream; //we do level change that will emit PPC, fill all the space so only BB end fits. taskLevel++; auto ppcSize = MemorySynchronizationCommands::getSizeForSinglePipeControl(); auto fillSize = MemoryConstants::cacheLineSize - ppcSize - sizeof(typename FamilyType::MI_BATCH_BUFFER_END); csrCommandStream.getSpace(fillSize); auto expectedUsedSize = 2 * MemoryConstants::cacheLineSize; flushTask(mockCsr); EXPECT_EQ(expectedUsedSize, mockCsr.commandStream.getUsed()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenTaskIsSubmittedViaCommandStreamThenBbEndCoversPaddingEnoughToFitMiBatchBufferStart) { auto &mockCsr = pDevice->getUltCommandStreamReceiver(); mockCsr.overrideDispatchPolicy(DispatchMode::BatchedDispatch); CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); configureCSRtoNonDirtyState(); auto fillSize = MemoryConstants::cacheLineSize - sizeof(typename FamilyType::MI_BATCH_BUFFER_END); commandStream.getSpace(fillSize); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); mockCsr.flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); auto expectedUsedSize = 2 * MemoryConstants::cacheLineSize; EXPECT_EQ(expectedUsedSize, commandStream.getUsed()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenflushTaskThenDshAndIohNotEvictable) { auto &mockCsr = pDevice->getUltCommandStreamReceiver(); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); mockCsr.flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(dsh.getGraphicsAllocation()->peekEvictable(), true); EXPECT_EQ(ssh.getGraphicsAllocation()->peekEvictable(), true); EXPECT_EQ(ioh.getGraphicsAllocation()->peekEvictable(), true); dsh.getGraphicsAllocation()->setEvictable(false); EXPECT_EQ(dsh.getGraphicsAllocation()->peekEvictable(), false); dsh.getGraphicsAllocation()->setEvictable(true); EXPECT_EQ(dsh.getGraphicsAllocation()->peekEvictable(), true); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndMidThreadPreemptionWhenFlushTaskIsCalledThenSipKernelIsMadeResident) { auto &mockCsr = pDevice->getUltCommandStreamReceiver(); mockCsr.overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr.submissionAggregator.reset(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionMode::MidThread; mockCsr.flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); auto cmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); auto sipAllocation = pDevice->getBuiltIns()->getSipKernel(SipKernelType::Csr, *pDevice).getSipAllocation(); bool found = false; for (auto allocation : cmdBuffer->surfaces) { if (allocation == sipAllocation) { found = true; break; } } EXPECT_TRUE(found); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInDefaultModeAndMidThreadPreemptionWhenFlushTaskIsCalledThenSipKernelIsMadeResident) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionMode::MidThread; mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); auto sipAllocation = pDevice->getBuiltIns()->getSipKernel(SipKernelType::Csr, *pDevice).getSipAllocation(); bool found = false; for (auto allocation : mockCsr->copyOfAllocations) { if (allocation == sipAllocation) { found = true; break; } } EXPECT_TRUE(found); } HWTEST_F(CommandStreamReceiverFlushTaskTests, sameTaskLevelShouldntSendAPipeControl) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // Configure the CSR to not need to submit any state or commands. configureCSRtoNonDirtyState(); flushTask(commandStreamReceiver); EXPECT_EQ(taskLevel, commandStreamReceiver.taskLevel); auto sizeUsed = commandStreamReceiver.commandStream.getUsed(); EXPECT_EQ(sizeUsed, 0u); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDeviceWithThreadGroupPreemptionSupportThenDontSendMediaVfeStateIfNotDirty) { DebugManagerStateRestore dbgRestore; DebugManager.flags.ForcePreemptionMode.set(static_cast(PreemptionMode::ThreadGroup)); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); pDevice->resetCommandStreamReceiver(commandStreamReceiver); // Configure the CSR to not need to submit any state or commands. configureCSRtoNonDirtyState(); flushTask(*commandStreamReceiver); EXPECT_EQ(taskLevel, commandStreamReceiver->peekTaskLevel()); auto sizeUsed = commandStreamReceiver->commandStream.getUsed(); EXPECT_EQ(0u, sizeUsed); } HWTEST_F(CommandStreamReceiverFlushTaskTests, higherTaskLevelShouldSendAPipeControl) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.timestampPacketWriteEnabled = false; configureCSRtoNonDirtyState(); commandStreamReceiver.taskLevel = taskLevel / 2; flushTask(commandStreamReceiver); EXPECT_EQ(taskLevel, commandStreamReceiver.peekTaskLevel()); EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u); parseCommands(commandStreamReceiver.commandStream, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCommandStreamReceiverWithInstructionCacheRequestWhenFlushTaskIsCalledThenPipeControlWithInstructionCacheIsEmitted) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); configureCSRtoNonDirtyState(); commandStreamReceiver.registerInstructionCacheFlush(); EXPECT_EQ(1u, commandStreamReceiver.recursiveLockCounter); flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); auto pipeControlCmd = reinterpret_cast(*itorPC); EXPECT_TRUE(pipeControlCmd->getInstructionCacheInvalidateEnable()); EXPECT_FALSE(commandStreamReceiver.requiresInstructionCacheFlush); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenHigherTaskLevelWhenTimestampPacketWriteIsEnabledThenDontAddPipeControl) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.timestampPacketWriteEnabled = true; commandStreamReceiver.isPreambleSent = true; configureCSRtoNonDirtyState(); commandStreamReceiver.taskLevel = taskLevel; taskLevel++; // submit with higher taskLevel flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itorPC); } HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSamplerCacheFlushNotRequiredThenDontSendPipecontrol) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); NEO::WorkaroundTable *waTable = &pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->workaroundTable; commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.lastPreemptionMode = pDevice->getPreemptionMode(); commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired); configureCSRtoNonDirtyState(); commandStreamReceiver.taskLevel = taskLevel; waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = true; flushTask(commandStreamReceiver); EXPECT_EQ(commandStreamReceiver.commandStream.getUsed(), 0u); EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired, commandStreamReceiver.samplerCacheFlushRequired); parseCommands(commandStreamReceiver.commandStream, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itorPC); } HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSamplerCacheFlushBeforeThenSendPipecontrol) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore); configureCSRtoNonDirtyState(); commandStreamReceiver.taskLevel = taskLevel; NEO::WorkaroundTable *waTable = &pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->workaroundTable; waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = true; flushTask(commandStreamReceiver); EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u); EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushAfter, commandStreamReceiver.samplerCacheFlushRequired); parseCommands(commandStreamReceiver.commandStream, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); auto pipeControlCmd = (typename FamilyType::PIPE_CONTROL *)*itorPC; EXPECT_TRUE(pipeControlCmd->getTextureCacheInvalidationEnable()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSamplerCacheFlushBeforeAndWaSamplerCacheFlushBetweenRedescribedSurfaceReadsDasabledThenDontSendPipecontrol) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore); configureCSRtoNonDirtyState(); commandStreamReceiver.taskLevel = taskLevel; NEO::WorkaroundTable *waTable = &pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->workaroundTable; waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = false; flushTask(commandStreamReceiver); EXPECT_EQ(commandStreamReceiver.commandStream.getUsed(), 0u); EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore, commandStreamReceiver.samplerCacheFlushRequired); parseCommands(commandStreamReceiver.commandStream, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itorPC); } HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSamplerCacheFlushAfterThenSendPipecontrol) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushAfter); configureCSRtoNonDirtyState(); commandStreamReceiver.taskLevel = taskLevel; NEO::WorkaroundTable *waTable = &pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->workaroundTable; waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = true; flushTask(commandStreamReceiver); EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u); EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired, commandStreamReceiver.samplerCacheFlushRequired); parseCommands(commandStreamReceiver.commandStream, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); auto pipeControlCmd = (typename FamilyType::PIPE_CONTROL *)*itorPC; EXPECT_TRUE(pipeControlCmd->getTextureCacheInvalidationEnable()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, completionStampValid) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); //simulate our CQ is stale for 10 TL's commandStreamReceiver.taskLevel = taskLevel + 10; auto completionStamp = flushTask(commandStreamReceiver); EXPECT_EQ(completionStamp.taskLevel, commandStreamReceiver.peekTaskLevel()); EXPECT_EQ(completionStamp.taskCount, commandStreamReceiver.peekTaskCount()); EXPECT_EQ(completionStamp.flushStamp, commandStreamReceiver.flushStamp->peekStamp()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, completionStamp) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto completionStamp = flushTask(commandStreamReceiver); EXPECT_EQ(1u, completionStamp.taskCount); EXPECT_EQ(taskLevel, completionStamp.taskLevel); EXPECT_EQ(commandStreamReceiver.flushStamp->peekStamp(), completionStamp.flushStamp); } HWTEST_F(CommandStreamReceiverFlushTaskTests, stateBaseAddressTracking) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); EXPECT_FALSE(commandStreamReceiver.dshState.updateAndCheck(&dsh)); EXPECT_FALSE(commandStreamReceiver.iohState.updateAndCheck(&ioh)); EXPECT_FALSE(commandStreamReceiver.sshState.updateAndCheck(&ssh)); } HWTEST_F(CommandStreamReceiverFlushTaskTests, stateBaseAddressProgrammingShouldMatchTracking) { typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; auto gmmHelper = pDevice->getGmmHelper(); auto stateHeapMocs = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER); auto l3CacheOnMocs = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); auto &commandStreamCSR = commandStreamReceiver.commandStream; HardwareParse::parseCommands(commandStreamCSR, 0); HardwareParse::findHardwareCommands(); ASSERT_NE(nullptr, cmdStateBaseAddress); auto &cmd = *reinterpret_cast(cmdStateBaseAddress); EXPECT_EQ(dsh.getCpuBase(), reinterpret_cast(cmd.getDynamicStateBaseAddress())); EXPECT_EQ(commandStreamReceiver.getMemoryManager()->getInternalHeapBaseAddress(commandStreamReceiver.rootDeviceIndex), cmd.getInstructionBaseAddress()); EXPECT_EQ(ioh.getCpuBase(), reinterpret_cast(cmd.getIndirectObjectBaseAddress())); EXPECT_EQ(ssh.getCpuBase(), reinterpret_cast(cmd.getSurfaceStateBaseAddress())); EXPECT_EQ(l3CacheOnMocs, cmd.getStatelessDataPortAccessMemoryObjectControlState()); EXPECT_EQ(stateHeapMocs, cmd.getInstructionMemoryObjectControlState()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenStateBaseAddressWhenItIsRequiredThenThereIsPipeControlPriorToItWithTextureCacheFlush) { typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); configureCSRtoNonDirtyState(); ioh.replaceBuffer(ptrOffset(ioh.getCpuBase(), +1u), ioh.getMaxAvailableSpace() + MemoryConstants::pageSize * 3); flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); auto pipeControlItor = find(cmdList.begin(), stateBaseAddressItor); EXPECT_NE(stateBaseAddressItor, pipeControlItor); auto pipeControlCmd = (typename FamilyType::PIPE_CONTROL *)*pipeControlItor; EXPECT_TRUE(pipeControlCmd->getTextureCacheInvalidationEnable()); EXPECT_TRUE(pipeControlCmd->getDcFlushEnable()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, preambleShouldBeSentIfNeverSent) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = false; flushTask(commandStreamReceiver); EXPECT_TRUE(commandStreamReceiver.isPreambleSent); EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenFlushTaskWhenInitProgrammingFlagsIsCalledThenBindingTableBaseAddressRequiredIsSetCorrecty) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.initProgrammingFlags(); EXPECT_TRUE(commandStreamReceiver.bindingTableBaseAddressRequired); flushTask(commandStreamReceiver); EXPECT_FALSE(commandStreamReceiver.bindingTableBaseAddressRequired); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenFlushTaskWhenInitProgrammingFlagsIsNotCalledThenBindingTableBaseAddressRequiredIsSetCorrectly) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); EXPECT_FALSE(commandStreamReceiver.bindingTableBaseAddressRequired); flushTask(commandStreamReceiver); EXPECT_FALSE(commandStreamReceiver.bindingTableBaseAddressRequired); } HWTEST_F(CommandStreamReceiverFlushTaskTests, pipelineSelectShouldBeSentIfNeverSentPreambleAndMediaSamplerRequirementChanged) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = false; commandStreamReceiver.lastMediaSamplerConfig = -1; flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); EXPECT_NE(nullptr, getCommand()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, pipelineSelectShouldBeSentIfNeverSentPreambleAndMediaSamplerRequirementNotChanged) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = false; commandStreamReceiver.lastMediaSamplerConfig = 0; flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); EXPECT_NE(nullptr, getCommand()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, pipelineSelectShouldNotBeSentIfSentPreambleAndMediaSamplerRequirementDoesntChanged) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.lastMediaSamplerConfig = 0; flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); if (hwHelper.is3DPipelineSelectWARequired(pDevice->getHardwareInfo())) { EXPECT_NE(nullptr, getCommand()); } else { EXPECT_EQ(nullptr, getCommand()); } } HWTEST_F(CommandStreamReceiverFlushTaskTests, pipelineSelectShouldBeSentIfSentPreambleAndMediaSamplerRequirementDoesntChanged) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.lastMediaSamplerConfig = 1; flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); EXPECT_NE(nullptr, getCommand()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, stateBaseAddressShouldBeSentIfNeverSent) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.setMediaVFEStateDirty(false); flushTask(commandStreamReceiver); EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u); parseCommands(commandStreamReceiver.commandStream, 0); auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), stateBaseAddressItor); } HWTEST_F(CommandStreamReceiverFlushTaskTests, stateBaseAddressShouldBeSentIfSizeChanged) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto dshSize = dsh.getMaxAvailableSpace(); auto iohSize = ioh.getMaxAvailableSpace(); auto sshSize = ssh.getMaxAvailableSpace(); dsh.replaceBuffer(dsh.getCpuBase(), 0); ioh.replaceBuffer(ioh.getCpuBase(), 0); ssh.replaceBuffer(ssh.getCpuBase(), 0); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.setMediaVFEStateDirty(false); configureCSRHeapStatesToNonDirty(); dsh.replaceBuffer(dsh.getCpuBase(), dshSize); ioh.replaceBuffer(ioh.getCpuBase(), iohSize); ssh.replaceBuffer(ssh.getCpuBase(), sshSize); flushTask(commandStreamReceiver); EXPECT_GT(commandStreamReceiver.commandStream.getUsed(), 0u); parseCommands(commandStreamReceiver.commandStream, 0); auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), stateBaseAddressItor); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDshHeapChangeWhenFlushTaskIsCalledThenSbaIsReloaded) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); configureCSRtoNonDirtyState(); dsh.replaceBuffer(nullptr, 0); flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), stateBaseAddressItor); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenSshHeapChangeWhenFlushTaskIsCalledThenSbaIsReloaded) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); configureCSRtoNonDirtyState(); ssh.replaceBuffer(nullptr, 0); flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), stateBaseAddressItor); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenIohHeapChangeWhenFlushTaskIsCalledThenSbaIsReloaded) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); configureCSRtoNonDirtyState(); ioh.replaceBuffer(nullptr, 0); flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), stateBaseAddressItor); } HWTEST_F(CommandStreamReceiverFlushTaskTests, stateBaseAddressShouldNotBeSentIfTheSame) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; configureCSRHeapStatesToNonDirty(); flushTask(commandStreamReceiver); auto base = commandStreamReceiver.commandStream.getCpuBase(); auto stateBaseAddress = base ? genCmdCast(base) : nullptr; EXPECT_EQ(nullptr, stateBaseAddress); } HWTEST_F(CommandStreamReceiverFlushTaskTests, shouldntAddAnyCommandsToCQCSIfEmpty) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto usedBefore = commandStream.getUsed(); flushTask(commandStreamReceiver); EXPECT_EQ(usedBefore, commandStream.getUsed()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, blockingflushTaskAddsPCToClient) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto blocking = true; flushTask(commandStreamReceiver, blocking); parseCommands(commandStream, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); } HWTEST_F(CommandStreamReceiverFlushTaskTests, blockingFlushWithNoPreviousDependencies) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); taskLevel = 5; commandStreamReceiver.taskLevel = 6; auto blocking = true; flushTask(commandStreamReceiver, blocking); EXPECT_EQ(7u, commandStreamReceiver.peekTaskLevel()); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, nonblockingFlushWithNoPreviousDependencies) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); taskLevel = 5; commandStreamReceiver.taskLevel = 6; auto blocking = false; flushTask(commandStreamReceiver, blocking); EXPECT_EQ(6u, commandStreamReceiver.peekTaskLevel()); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithOnlyEnoughMemoryForPreamble) { typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.timestampPacketWriteEnabled = false; // Force a PIPE_CONTROL through a taskLevel transition taskLevel = commandStreamReceiver.peekTaskLevel() + 1; commandStreamReceiver.lastSentCoherencyRequest = 0; auto l3Config = PreambleHelper::getL3Config(pDevice->getHardwareInfo(), false); commandStreamReceiver.lastSentL3Config = l3Config; auto &csrCS = commandStreamReceiver.getCS(); size_t sizeNeededForPreamble = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); size_t sizeNeeded = commandStreamReceiver.getRequiredCmdStreamSize(flushTaskFlags, *pDevice); sizeNeeded -= sizeof(MI_BATCH_BUFFER_START); // no task to submit sizeNeeded += sizeof(MI_BATCH_BUFFER_END); // no task to submit, add BBE to CSR stream sizeNeeded = alignUp(sizeNeeded, MemoryConstants::cacheLineSize); csrCS.getSpace(csrCS.getAvailableSpace() - sizeNeededForPreamble); flushTask(commandStreamReceiver); EXPECT_EQ(sizeNeeded, csrCS.getUsed()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithOnlyEnoughMemoryForPreambleAndSba) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.timestampPacketWriteEnabled = false; // Force a PIPE_CONTROL through a taskLevel transition taskLevel = commandStreamReceiver.peekTaskLevel() + 1; commandStreamReceiver.lastSentCoherencyRequest = 0; auto l3Config = PreambleHelper::getL3Config(pDevice->getHardwareInfo(), false); commandStreamReceiver.lastSentL3Config = l3Config; auto &csrCS = commandStreamReceiver.getCS(); size_t sizeNeededForPreamble = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); size_t sizeNeededForStateBaseAddress = sizeof(STATE_BASE_ADDRESS) + sizeof(PIPE_CONTROL); size_t sizeNeeded = commandStreamReceiver.getRequiredCmdStreamSize(flushTaskFlags, *pDevice); sizeNeeded -= sizeof(MI_BATCH_BUFFER_START); // no task to submit sizeNeeded += sizeof(MI_BATCH_BUFFER_END); // no task to submit, add BBE to CSR stream sizeNeeded = alignUp(sizeNeeded, MemoryConstants::cacheLineSize); csrCS.getSpace(csrCS.getAvailableSpace() - sizeNeededForPreamble - sizeNeededForStateBaseAddress); flushTask(commandStreamReceiver); EXPECT_EQ(sizeNeeded, csrCS.getUsed()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithOnlyEnoughMemoryForPreambleSbaAndPc) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; commandStream.getSpace(sizeof(PIPE_CONTROL)); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.timestampPacketWriteEnabled = false; // Force a PIPE_CONTROL through a taskLevel transition taskLevel = commandStreamReceiver.peekTaskLevel() + 1; commandStreamReceiver.lastSentCoherencyRequest = 0; auto l3Config = PreambleHelper::getL3Config(pDevice->getHardwareInfo(), false); commandStreamReceiver.lastSentL3Config = l3Config; auto &csrCS = commandStreamReceiver.getCS(); size_t sizeNeeded = commandStreamReceiver.getRequiredCmdStreamSizeAligned(flushTaskFlags, *pDevice); csrCS.getSpace(csrCS.getAvailableSpace() - sizeNeeded); auto expectedBase = csrCS.getCpuBase(); // This case handles when we have *just* enough space auto expectedUsed = csrCS.getUsed() + sizeNeeded; flushTask(commandStreamReceiver, flushTaskFlags.blocking, 0, flushTaskFlags.requiresCoherency, flushTaskFlags.lowPriority); // Verify that we didn't grab a new CS buffer EXPECT_EQ(expectedUsed, csrCS.getUsed()); EXPECT_EQ(expectedBase, csrCS.getCpuBase()); } template struct CommandStreamReceiverHwLog : public UltCommandStreamReceiver { CommandStreamReceiverHwLog(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) : UltCommandStreamReceiver(executionEnvironment, rootDeviceIndex), flushCount(0) { } bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override { ++flushCount; return true; } int flushCount; }; HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithBothCSCallsFlushOnce) { CommandStreamReceiverHwLog commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); commandStreamReceiver.initializeTagAllocation(); commandStreamReceiver.createPreemptionAllocation(); commandStream.getSpace(sizeof(typename FamilyType::MI_NOOP)); commandStreamReceiver.setupContext(*pDevice->getDefaultEngine().osContext); flushTask(commandStreamReceiver); EXPECT_EQ(1, commandStreamReceiver.flushCount); } HWTEST_F(CommandStreamReceiverFlushTaskTests, flushTaskWithBothCSCallsChainsWithBatchBufferStart) { typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; typedef typename FamilyType::MI_NOOP MI_NOOP; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // Reserve space for 16 NOOPs commandStream.getSpace(16 * sizeof(MI_NOOP)); // Submit starting at 8 NOOPs size_t startOffset = 8 * sizeof(MI_NOOP); flushTask(commandStreamReceiver, false, startOffset); // Locate the MI_BATCH_BUFFER_START parseCommands(commandStreamReceiver.commandStream, 0); auto itorBBS = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorBBS); auto bbs = genCmdCast(*itorBBS); ASSERT_NE(nullptr, bbs); // Expect to see address based on startOffset of task auto expectedAddress = static_cast(reinterpret_cast(ptrOffset(commandStream.getCpuBase(), startOffset))); EXPECT_EQ(expectedAddress, bbs->getBatchBufferStartAddressGraphicsaddress472()); // MI_BATCH_BUFFER_START from UMD must be PPGTT for security reasons EXPECT_EQ(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT, bbs->getAddressSpaceIndicator()); } typedef Test CommandStreamReceiverCQFlushTaskTests; HWTEST_F(CommandStreamReceiverCQFlushTaskTests, getCSShouldReturnACSWithEnoughSizeCSRTraffic) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver(); // NOTE: This test attempts to reserve the maximum amount // of memory such that if a client gets everything he wants // we don't overflow/corrupt memory when CSR appends its // work. size_t sizeCQReserves = CSRequirements::minCommandQueueCommandStreamSize; size_t sizeRequested = MemoryConstants::pageSize64k - sizeCQReserves; auto &commandStream = commandQueue.getCS(sizeRequested); auto expect = alignUp(sizeRequested + CSRequirements::csOverfetchSize, MemoryConstants::pageSize64k); ASSERT_GE(expect, commandStream.getMaxAvailableSpace()); EXPECT_GE(commandStream.getAvailableSpace(), sizeRequested); commandStream.getSpace(sizeRequested - sizeCQReserves); MockGraphicsAllocation allocation((void *)MemoryConstants::pageSize64k, 1); IndirectHeap linear(&allocation); auto blocking = true; DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.blocking = blocking; commandStreamReceiver.flushTask( commandStream, 0, linear, linear, linear, 1, dispatchFlags, *pDevice); auto expectedSize = MemoryConstants::pageSize64k - sizeCQReserves; if (::renderCoreFamily == IGFX_GEN8_CORE) { expectedSize -= sizeof(typename FamilyType::PIPE_CONTROL); } expectedSize = alignUp(expectedSize, MemoryConstants::cacheLineSize); auto currentUsed = commandStream.getUsed(); EXPECT_EQ(0u, currentUsed % MemoryConstants::cacheLineSize); //depending on the size of commands we may need whole additional cacheline for alignment if (currentUsed != expectedSize) { EXPECT_EQ(expectedSize - MemoryConstants::cacheLineSize, currentUsed); } else { EXPECT_EQ(expectedSize, currentUsed); } } HWTEST_F(CommandStreamReceiverFlushTaskTests, blockingFlushTaskWithOnlyPipeControl) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); // Configure the CSR to not need to submit any state or commands configureCSRtoNonDirtyState(); // Force a PIPE_CONTROL through a blocking flag auto blocking = true; auto &commandStreamTask = commandQueue.getCS(1024); auto &commandStreamCSR = commandStreamReceiver->getCS(); commandStreamReceiver->lastSentCoherencyRequest = 0; DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.blocking = blocking; dispatchFlags.guardCommandBufferWithPipeControl = true; commandStreamReceiver->flushTask( commandStreamTask, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); // Verify that taskCS got modified, while csrCS remained intact EXPECT_GT(commandStreamTask.getUsed(), 0u); EXPECT_EQ(0u, commandStreamCSR.getUsed()); // Parse command list to verify that PC got added to taskCS cmdList.clear(); parseCommands(commandStreamTask, 0); auto itorTaskCS = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorTaskCS); // Parse command list to verify that PC wasn't added to csrCS cmdList.clear(); parseCommands(commandStreamCSR, 0); auto numberOfPC = getCommandsList().size(); EXPECT_EQ(0u, numberOfPC); } HWTEST_F(CommandStreamReceiverFlushTaskTests, FlushTaskBlockingHasPipeControlWithDCFlush) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); configureCSRtoNonDirtyState(); auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver(); size_t pipeControlCount = UltMemorySynchronizationCommands::getExpectedPipeControlCount(pDevice->getHardwareInfo()); auto &commandStreamTask = commandQueue.getCS(1024); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.blocking = true; dispatchFlags.dcFlush = true; dispatchFlags.guardCommandBufferWithPipeControl = true; commandStreamReceiver.flushTask( commandStreamTask, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); parseCommands(commandStreamTask, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); if (UnitTestHelper::isPipeControlWArequired(pDevice->getHardwareInfo())) { // Verify that the dcFlushEnabled bit is set in PC auto pCmdWA = reinterpret_cast(*itorPC); EXPECT_FALSE(pCmdWA->getDcFlushEnable()); if (pipeControlCount > 1) { // Search taskCS for PC to analyze itorPC = find(++itorPC, cmdList.end()); auto pipeControlTask = genCmdCast(*itorPC); ASSERT_NE(nullptr, pipeControlTask); // Verify that the dcFlushEnabled bit is not set in PC auto pCmd = reinterpret_cast(pipeControlTask); EXPECT_TRUE(pCmd->getDcFlushEnable()); } } else { auto pCmd = reinterpret_cast(*itorPC); EXPECT_TRUE(pCmd->getDcFlushEnable()); } } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelRequiringDCFlushWhenUnblockedThenDCFlushIsAdded) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; MockContext ctx(pClDevice); CommandQueueHw commandQueue(&ctx, pClDevice, 0, false); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.timestampPacketWriteEnabled = false; cl_event blockingEvent; MockEvent mockEvent(&ctx); blockingEvent = &mockEvent; size_t tempBuffer[] = {0, 1, 2}; size_t dstBuffer[] = {0, 1, 2}; cl_int retVal = 0; auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal); auto &commandStreamCSR = commandStreamReceiver.getCS(); commandQueue.enqueueReadBuffer(buffer, CL_FALSE, 0, sizeof(tempBuffer), dstBuffer, nullptr, 1, &blockingEvent, 0); // Expect nothing was sent EXPECT_EQ(0u, commandStreamCSR.getUsed()); // Unblock Event mockEvent.setStatus(CL_COMPLETE); auto &commandStreamTask = *commandStreamReceiver.lastFlushedCommandStream; cmdList.clear(); // Parse command list parseCommands(commandStreamTask, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); if (UnitTestHelper::isPipeControlWArequired(pDevice->getHardwareInfo())) { itorPC++; itorPC = find(itorPC, cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); } // Verify that the dcFlushEnabled bit is set in PC auto pCmdWA = reinterpret_cast(*itorPC); EXPECT_TRUE(pCmdWA->getDcFlushEnable()); buffer->release(); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDispatchFlagsWhenCallFlushTaskThenThreadArbitrationPolicyIsSetProperly) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); uint32_t beforeFlushRequiredThreadArbitrationPolicy = mockCsr->requiredThreadArbitrationPolicy; mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(UnitTestHelper::getAppropriateThreadArbitrationPolicy(beforeFlushRequiredThreadArbitrationPolicy), mockCsr->requiredThreadArbitrationPolicy); dispatchFlags.threadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin; mockCsr->requiredThreadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(UnitTestHelper::getAppropriateThreadArbitrationPolicy(dispatchFlags.threadArbitrationPolicy), mockCsr->requiredThreadArbitrationPolicy); } command_stream_receiver_flush_task_2_tests.cpp000066400000000000000000001616531363734646600360270ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/command_stream/scratch_space_controller.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/state_base_address.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/helpers/dispatch_flags_helper.h" #include "opencl/test/unit_test/helpers/raii_hw_helper.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_hw_helper.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_submissions_aggregator.h" #include "test.h" #include "reg_configs_common.h" using namespace NEO; typedef UltCommandStreamReceiverTest CommandStreamReceiverFlushTaskTests; HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelNotRequiringDCFlushWhenUnblockedThenDCFlushIsNotAdded) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; MockContext ctx(pClDevice); CommandQueueHw commandQueue(&ctx, pClDevice, 0, false); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.timestampPacketWriteEnabled = false; cl_event blockingEvent; MockEvent mockEvent(&ctx); blockingEvent = &mockEvent; size_t tempBuffer[] = {0, 1, 2}; size_t dstBuffer[] = {0, 1, 2}; cl_int retVal = 0; auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal); auto &commandStreamCSR = commandStreamReceiver.getCS(); commandQueue.enqueueWriteBuffer(buffer, CL_FALSE, 0, sizeof(tempBuffer), dstBuffer, nullptr, 1, &blockingEvent, 0); // Expect nothing was sent EXPECT_EQ(0u, commandStreamCSR.getUsed()); // Unblock Event mockEvent.setStatus(CL_COMPLETE); auto &commandStreamTask = *commandStreamReceiver.lastFlushedCommandStream; cmdList.clear(); // Parse command list parseCommands(commandStreamTask, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); if (UnitTestHelper::isPipeControlWArequired(pDevice->getHardwareInfo())) { itorPC++; itorPC = find(itorPC, cmdList.end()); EXPECT_NE(cmdList.end(), itorPC); } // Verify that the dcFlushEnabled bit is set in PC auto pCmdWA = reinterpret_cast(*itorPC); EXPECT_TRUE(pCmdWA->getDcFlushEnable()); buffer->release(); } HWTEST_F(CommandStreamReceiverFlushTaskTests, FlushTaskWithTaskCSPassedAsCommandStreamParam) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto &commandStreamTask = commandQueue.getCS(1024); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); // Pass taskCS as command stream parameter auto cs = commandStreamReceiver.flushTask( commandStreamTask, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); // Verify that flushTask returned a valid completion stamp EXPECT_EQ(commandStreamReceiver.peekTaskCount(), cs.taskCount); EXPECT_EQ(commandStreamReceiver.peekTaskLevel(), cs.taskLevel); } HWTEST_F(CommandStreamReceiverFlushTaskTests, TrackSentTagsWhenEmptyQueue) { MockContext ctx(pClDevice); MockCommandQueueHw mockCmdQueue(&ctx, pClDevice, nullptr); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); uint32_t taskCount = 0; taskLevel = taskCount; mockCmdQueue.taskCount = taskCount; mockCmdQueue.taskLevel = taskCount; commandStreamReceiver.taskLevel = taskCount; commandStreamReceiver.taskCount = taskCount; EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); mockCmdQueue.finish(); EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); mockCmdQueue.finish(); //nothings sent to the HW, no need to bump tags EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); EXPECT_EQ(0u, mockCmdQueue.latestTaskCountWaited); } HWTEST_F(CommandStreamReceiverFlushTaskTests, TrackSentTagsWhenNonDcFlushWithInitialTaskCountZero) { MockContext ctx(pClDevice); MockKernelWithInternals kernel(*pClDevice); MockCommandQueueHw mockCmdQueue(&ctx, pClDevice, nullptr); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); size_t GWS = 1; uint32_t taskCount = 0; taskLevel = taskCount; mockCmdQueue.taskCount = taskCount; mockCmdQueue.taskLevel = taskCount; commandStreamReceiver.taskLevel = taskCount; commandStreamReceiver.taskCount = taskCount; EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); // finish after enqueued kernel(cmdq task count = 1) mockCmdQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); mockCmdQueue.finish(); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); EXPECT_EQ(1u, mockCmdQueue.latestTaskCountWaited); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); // finish again - dont call flush task mockCmdQueue.finish(); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); EXPECT_EQ(1u, mockCmdQueue.latestTaskCountWaited); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, TrackSentTagsWhenDcFlush) { MockContext ctx(pClDevice); MockKernelWithInternals kernel(*pClDevice); MockCommandQueueHw mockCmdQueue(&ctx, pClDevice, nullptr); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); size_t GWS = 1; size_t tempBuffer[] = {0, 1, 2}; cl_int retVal; auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal); EXPECT_EQ(retVal, CL_SUCCESS); uint32_t taskCount = 0; taskLevel = taskCount; mockCmdQueue.taskCount = taskCount; mockCmdQueue.taskLevel = taskCount; commandStreamReceiver.taskLevel = taskCount; commandStreamReceiver.taskCount = taskCount; EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); // finish(dcFlush=true) from blocking MapBuffer after enqueued kernel mockCmdQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); auto ptr = mockCmdQueue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, sizeof(tempBuffer), 0, nullptr, nullptr, retVal); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); // cmdQ task count = 2, finish again mockCmdQueue.finish(); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); // finish again - dont flush task again mockCmdQueue.finish(); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); // finish(dcFlush=true) from MapBuffer again - dont call FinishTask n finished queue retVal = mockCmdQueue.enqueueUnmapMemObject(buffer, ptr, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); ptr = mockCmdQueue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, sizeof(tempBuffer), 0, nullptr, nullptr, retVal); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); //cleanup retVal = mockCmdQueue.enqueueUnmapMemObject(buffer, ptr, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); retVal = clReleaseMemObject(buffer); EXPECT_EQ(retVal, CL_SUCCESS); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenPowerOfTwoGlobalWorkSizeAndNullLocalWorkgroupSizeWhenEnqueueKernelIsCalledThenGpGpuWalkerHasOptimalSIMDmask) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; MockContext ctx(pClDevice); MockKernelWithInternals kernel(*pClDevice); size_t GWS = 1024; CommandQueueHw commandQueue(&ctx, pClDevice, 0, false); commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); auto &commandStreamTask = commandQueue.getCS(1024); parseCommands(commandStreamTask, 0); auto itorCmd = find(cmdList.begin(), cmdList.end()); ASSERT_NE(itorCmd, cmdList.end()); auto cmdGpGpuWalker = genCmdCast(*itorCmd); //execution masks should be all active EXPECT_EQ(0xffffffffu, cmdGpGpuWalker->getBottomExecutionMask()); EXPECT_EQ(0xffffffffu, cmdGpGpuWalker->getRightExecutionMask()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, TrackSentTagsWhenEventIsQueried) { MockContext ctx(pClDevice); CommandQueueHw commandQueue(&ctx, pClDevice, 0, false); cl_event event = nullptr; Event *pEvent; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); size_t tempBuffer[] = {0, 1, 2}; size_t dstBuffer[] = {5, 5, 5}; cl_int retVal; auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal); EXPECT_EQ(retVal, CL_SUCCESS); uint32_t taskCount = 0; taskLevel = taskCount; commandQueue.taskCount = taskCount; commandQueue.taskLevel = taskCount; commandStreamReceiver.taskLevel = taskCount; commandStreamReceiver.taskCount = taskCount; EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); commandQueue.enqueueReadBuffer(buffer, CL_FALSE, 0, sizeof(tempBuffer), dstBuffer, nullptr, 0, 0, &event); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); pEvent = (Event *)event; retVal = Event::waitForEvents(1, &event); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); retVal = clReleaseEvent(pEvent); retVal = clReleaseMemObject(buffer); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenNonBlockingMapWhenFinishIsCalledThenNothingIsSubmittedToTheHardware) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; MockContext ctx(pClDevice); CommandQueueHw commandQueue(&ctx, pClDevice, 0, false); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); size_t tempBuffer[] = {0, 1, 2}; cl_int retVal; AlignedBuffer mockBuffer; uint32_t taskCount = 0; taskLevel = taskCount; commandQueue.taskCount = taskCount; commandQueue.taskLevel = taskCount; commandStreamReceiver.taskLevel = taskCount; commandStreamReceiver.taskCount = taskCount; EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); auto ptr = commandQueue.enqueueMapBuffer(&mockBuffer, CL_FALSE, CL_MAP_READ, 0, sizeof(tempBuffer), 0, nullptr, nullptr, retVal); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_NE(nullptr, ptr); EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); commandQueue.finish(); EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); auto &commandStreamTask = commandQueue.getCS(1024); parseCommands(commandStreamTask, 0); auto itorPC = find(cmdList.begin(), cmdList.end()); ASSERT_EQ(cmdList.end(), itorPC); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, GivenFlushedCallRequiringDCFlushWhenBlockingEnqueueIsCalledThenPipeControlWithDCFlushIsAdded) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; MockContext ctx(pClDevice); MockCommandQueueHw mockCmdQueue(&ctx, pClDevice, nullptr); cl_event event = nullptr; auto &commandStreamReceiver = pDevice->getGpgpuCommandStreamReceiver(); auto &commandStreamTask = mockCmdQueue.getCS(1024); size_t tempBuffer[] = {0, 1, 2}; size_t dstBuffer[] = {5, 5, 5}; cl_int retVal; auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(0u, commandStreamReceiver.peekLatestSentTaskCount()); // Call requiring DCFlush, nonblocking buffer->forceDisallowCPUCopy = true; mockCmdQueue.enqueueReadBuffer(buffer, CL_FALSE, 0, sizeof(tempBuffer), dstBuffer, nullptr, 0, 0, 0); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); mockCmdQueue.enqueueReadBuffer(buffer, CL_TRUE, 0, sizeof(tempBuffer), dstBuffer, nullptr, 0, 0, &event); EXPECT_EQ(2u, commandStreamReceiver.peekLatestSentTaskCount()); EXPECT_EQ(2u, mockCmdQueue.latestTaskCountWaited); // Parse command list to verify that PC was added to taskCS cmdList.clear(); parseCommands(commandStreamTask, 0); auto itorWalker = find(cmdList.begin(), cmdList.end()); auto itorCmd = find(itorWalker, cmdList.end()); ASSERT_NE(cmdList.end(), itorCmd); auto cmdPC = genCmdCast(*itorCmd); ASSERT_NE(nullptr, cmdPC); if (UnitTestHelper::isPipeControlWArequired(pDevice->getHardwareInfo())) { // SKL: two PIPE_CONTROLs following GPGPU_WALKER: first has DcFlush and second has Write HwTag EXPECT_FALSE(cmdPC->getDcFlushEnable()); auto itorCmdP = ++((GenCmdList::iterator)itorCmd); EXPECT_NE(cmdList.end(), itorCmdP); auto itorCmd2 = find(itorCmdP, cmdList.end()); cmdPC = (PIPE_CONTROL *)*itorCmd2; EXPECT_TRUE(cmdPC->getDcFlushEnable()); } else { // single PIPE_CONTROL following GPGPU_WALKER has DcFlush and Write HwTag EXPECT_TRUE(cmdPC->getDcFlushEnable()); } retVal = clReleaseEvent(event); retVal = clReleaseMemObject(buffer); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDefaultCommandStreamReceiverThenRoundRobinPolicyIsSelected) { MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); EXPECT_EQ(PreambleHelper::getDefaultThreadArbitrationPolicy(), commandStreamReceiver.peekThreadArbitrationPolicy()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenKernelWithSlmWhenPreviousSLML3WasSentThenDontProgramL3) { typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; size_t GWS = 1; MockContext ctx(pClDevice); MockKernelWithInternals kernel(*pClDevice); CommandQueueHw commandQueue(&ctx, pClDevice, 0, false); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); auto &commandStreamCSR = commandStreamReceiver->getCS(); uint32_t L3Config = PreambleHelper::getL3Config(*defaultHwInfo, true); // Mark Pramble as sent, override L3Config to SLM config commandStreamReceiver->isPreambleSent = true; commandStreamReceiver->lastSentL3Config = L3Config; commandStreamReceiver->lastSentThreadArbitrationPolicy = kernel.mockKernel->getThreadArbitrationPolicy(); ((MockKernel *)kernel)->setTotalSLMSize(1024); cmdList.clear(); commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); // Parse command list to verify that PC was added to taskCS parseCommands(commandStreamCSR, 0); auto itorCmd = findMmio(cmdList.begin(), cmdList.end(), L3CNTLRegisterOffset::registerOffset); EXPECT_EQ(cmdList.end(), itorCmd); } HWTEST_F(CommandStreamReceiverFlushTaskTests, CreateCommandStreamReceiverHw) { DebugManagerStateRestore dbgRestorer; auto csrHw = CommandStreamReceiverHw::create(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); EXPECT_NE(nullptr, csrHw); GmmPageTableMngr *ptm = csrHw->createPageTableManager(); EXPECT_EQ(nullptr, ptm); delete csrHw; DebugManager.flags.SetCommandStreamReceiver.set(0); int32_t GetCsr = DebugManager.flags.SetCommandStreamReceiver.get(); EXPECT_EQ(0, GetCsr); auto csr = NEO::createCommandStream(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); EXPECT_NE(nullptr, csr); delete csr; DebugManager.flags.SetCommandStreamReceiver.set(0); } HWTEST_F(CommandStreamReceiverFlushTaskTests, handleTagAndScratchAllocationsResidencyOnEachFlush) { auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); commandStreamReceiver->setRequiredScratchSizes(1024, 0); // whatever > 0 flushTask(*commandStreamReceiver); auto tagAllocation = commandStreamReceiver->getTagAllocation(); auto scratchAllocation = commandStreamReceiver->getScratchAllocation(); ASSERT_NE(tagAllocation, nullptr); ASSERT_NE(scratchAllocation, nullptr); EXPECT_TRUE(commandStreamReceiver->isMadeResident(tagAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeResident(scratchAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(tagAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(scratchAllocation)); // call makeResident on tag and scratch allocations per each flush // DONT skip residency calls when scratch allocation is the same(new required size <= previous size) commandStreamReceiver->madeResidentGfxAllocations.clear(); // this is only history - we can clean this commandStreamReceiver->madeNonResidentGfxAllocations.clear(); flushTask(*commandStreamReceiver); // 2nd flush auto NewScratchAllocation = commandStreamReceiver->getScratchAllocation(); EXPECT_EQ(scratchAllocation, NewScratchAllocation); // Allocation unchanged. Dont skip residency handling EXPECT_TRUE(commandStreamReceiver->isMadeResident(tagAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeResident(scratchAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(tagAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(scratchAllocation)); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCommandStreamReceiverWhenFenceAllocationIsRequiredAndFlushTaskIsCalledThenFenceAlocationIsMadeResident) { RAIIHwHelperFactory> hwHelperBackup{pDevice->getHardwareInfo().platform.eRenderCoreFamily}; auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); auto globalFenceAllocation = commandStreamReceiver->globalFenceAllocation; ASSERT_NE(globalFenceAllocation, nullptr); EXPECT_FALSE(commandStreamReceiver->isMadeResident(globalFenceAllocation)); EXPECT_FALSE(commandStreamReceiver->isMadeNonResident(globalFenceAllocation)); flushTask(*commandStreamReceiver); EXPECT_TRUE(commandStreamReceiver->isMadeResident(globalFenceAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(globalFenceAllocation)); } struct MockScratchController : public ScratchSpaceController { using ScratchSpaceController::privateScratchAllocation; using ScratchSpaceController::scratchAllocation; using ScratchSpaceController::ScratchSpaceController; void setRequiredScratchSpace(void *sshBaseAddress, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) override { if (requiredPerThreadScratchSize > scratchSizeBytes) { scratchSizeBytes = requiredPerThreadScratchSize; scratchAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, requiredPerThreadScratchSize}); } if (requiredPerThreadPrivateScratchSize > privateScratchSizeBytes) { privateScratchSizeBytes = requiredPerThreadPrivateScratchSize; privateScratchAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, requiredPerThreadPrivateScratchSize}); } } uint64_t calculateNewGSH() override { return 0u; }; uint64_t getScratchPatchAddress() override { return 0u; }; void reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) override{}; }; HWTEST_F(CommandStreamReceiverFlushTaskTests, whenScratchIsRequiredForFirstFlushAndPrivateScratchForSecondFlushThenHandleResidencyProperly) { auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); auto scratchController = new MockScratchController(pDevice->getRootDeviceIndex(), *pDevice->executionEnvironment, *commandStreamReceiver->getInternalAllocationStorage()); commandStreamReceiver->scratchSpaceController.reset(scratchController); pDevice->resetCommandStreamReceiver(commandStreamReceiver); commandStreamReceiver->setRequiredScratchSizes(1024, 0); flushTask(*commandStreamReceiver); EXPECT_NE(nullptr, scratchController->scratchAllocation); EXPECT_EQ(nullptr, scratchController->privateScratchAllocation); auto scratchAllocation = scratchController->scratchAllocation; EXPECT_TRUE(commandStreamReceiver->isMadeResident(scratchAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(scratchAllocation)); commandStreamReceiver->madeResidentGfxAllocations.clear(); // this is only history - we can clean this commandStreamReceiver->madeNonResidentGfxAllocations.clear(); commandStreamReceiver->setRequiredScratchSizes(0, 1024); flushTask(*commandStreamReceiver); // 2nd flush EXPECT_NE(nullptr, scratchController->scratchAllocation); EXPECT_NE(nullptr, scratchController->privateScratchAllocation); auto privateScratchAllocation = scratchController->privateScratchAllocation; EXPECT_TRUE(commandStreamReceiver->isMadeResident(scratchAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(scratchAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeResident(privateScratchAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(privateScratchAllocation)); } HWTEST_F(CommandStreamReceiverFlushTaskTests, whenPrivateScratchIsRequiredForFirstFlushAndCommonScratchForSecondFlushThenHandleResidencyProperly) { auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); auto scratchController = new MockScratchController(pDevice->getRootDeviceIndex(), *pDevice->executionEnvironment, *commandStreamReceiver->getInternalAllocationStorage()); commandStreamReceiver->scratchSpaceController.reset(scratchController); pDevice->resetCommandStreamReceiver(commandStreamReceiver); commandStreamReceiver->setRequiredScratchSizes(0, 1024); flushTask(*commandStreamReceiver); EXPECT_EQ(nullptr, scratchController->scratchAllocation); EXPECT_NE(nullptr, scratchController->privateScratchAllocation); auto privateScratchAllocation = scratchController->privateScratchAllocation; EXPECT_TRUE(commandStreamReceiver->isMadeResident(privateScratchAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(privateScratchAllocation)); commandStreamReceiver->madeResidentGfxAllocations.clear(); // this is only history - we can clean this commandStreamReceiver->madeNonResidentGfxAllocations.clear(); commandStreamReceiver->setRequiredScratchSizes(1024, 0); flushTask(*commandStreamReceiver); // 2nd flush EXPECT_NE(nullptr, scratchController->scratchAllocation); EXPECT_NE(nullptr, scratchController->privateScratchAllocation); auto scratchAllocation = scratchController->scratchAllocation; EXPECT_TRUE(commandStreamReceiver->isMadeResident(scratchAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(scratchAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeResident(privateScratchAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(privateScratchAllocation)); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenTwoConsecutiveNDRangeKernelsStateBaseAddressIsProgrammedOnceAndScratchAddressInMediaVFEStateIsProgrammedTwiceBothWithCorrectAddress) { typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::MEDIA_VFE_STATE MEDIA_VFE_STATE; typedef typename PARSE::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; MockContext ctx(pClDevice); MockKernelWithInternals kernel(*pClDevice); CommandQueueHw commandQueue(&ctx, pClDevice, 0, false); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); size_t GWS = 1; uint32_t scratchSize = 1024; SPatchMediaVFEState mediaVFEstate; mediaVFEstate.PerThreadScratchSpace = scratchSize; kernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate; EXPECT_EQ(false, kernel.mockKernel->isBuiltIn); auto deviceInfo = pClDevice->getDeviceInfo(); auto sharedDeviceInfo = pDevice->getDeviceInfo(); if (sharedDeviceInfo.force32BitAddressess) { EXPECT_FALSE(commandStreamReceiver->getGSBAFor32BitProgrammed()); } commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); if (sharedDeviceInfo.force32BitAddressess) { EXPECT_TRUE(commandStreamReceiver->getGSBAFor32BitProgrammed()); } cmdList.clear(); // Parse command list parseCommands(commandQueue); // All state should be programmed before walker auto itorCmdForStateBase = itorStateBaseAddress; auto *mediaVfeState = (MEDIA_VFE_STATE *)*itorMediaVfeState; auto graphicsAllocationScratch = commandStreamReceiver->getScratchAllocation(); ASSERT_NE(itorCmdForStateBase, itorWalker); auto *sba = (STATE_BASE_ADDRESS *)*itorCmdForStateBase; auto GSHaddress = (uintptr_t)sba->getGeneralStateBaseAddress(); uint64_t graphicsAddress = 0; // Get address ( offset in 32 bit addressing ) of sratch graphicsAddress = (uint64_t)graphicsAllocationScratch->getGpuAddressToPatch(); if (sharedDeviceInfo.force32BitAddressess && is64bit) { EXPECT_TRUE(graphicsAllocationScratch->is32BitAllocation()); EXPECT_EQ(GmmHelper::decanonize(graphicsAllocationScratch->getGpuAddress()) - GSHaddress, graphicsAddress); } else if (!deviceInfo.svmCapabilities && is64bit) { EXPECT_EQ(ScratchSpaceConstants::scratchSpaceOffsetFor64Bit, mediaVfeState->getScratchSpaceBasePointer()); EXPECT_EQ(GSHaddress + ScratchSpaceConstants::scratchSpaceOffsetFor64Bit, graphicsAllocationScratch->getGpuAddressToPatch()); } else { EXPECT_EQ((uint64_t)graphicsAllocationScratch->getUnderlyingBuffer(), graphicsAddress); } uint64_t lowPartGraphicsAddress = (uint64_t)(graphicsAddress & 0xffffffff); uint64_t highPartGraphicsAddress = (uint64_t)((graphicsAddress >> 32) & 0xffffffff); uint64_t scratchBaseLowPart = (uint64_t)mediaVfeState->getScratchSpaceBasePointer(); uint64_t scratchBaseHighPart = (uint64_t)mediaVfeState->getScratchSpaceBasePointerHigh(); if (is64bit && !sharedDeviceInfo.force32BitAddressess) { uint64_t expectedAddress = ScratchSpaceConstants::scratchSpaceOffsetFor64Bit; EXPECT_EQ(expectedAddress, scratchBaseLowPart); EXPECT_EQ(0u, scratchBaseHighPart); } else { EXPECT_EQ(lowPartGraphicsAddress, scratchBaseLowPart); EXPECT_EQ(highPartGraphicsAddress, scratchBaseHighPart); } if (sharedDeviceInfo.force32BitAddressess) { EXPECT_EQ(pDevice->getMemoryManager()->getExternalHeapBaseAddress(graphicsAllocationScratch->getRootDeviceIndex()), GSHaddress); } else { if (is64bit) { EXPECT_EQ(graphicsAddress - ScratchSpaceConstants::scratchSpaceOffsetFor64Bit, GSHaddress); } else { EXPECT_EQ(0u, GSHaddress); } } //now re-try to see if SBA is not programmed scratchSize *= 2; mediaVFEstate.PerThreadScratchSpace = scratchSize; commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); // Parse command list parseCommands(commandQueue); itorCmdForStateBase = find(itorWalker, cmdList.end()); // In 32 Bit addressing sba shouldn't be reprogrammed if (sharedDeviceInfo.force32BitAddressess == true) { EXPECT_EQ(itorCmdForStateBase, cmdList.end()); } auto itorMediaVfeStateSecond = find(itorWalker, cmdList.end()); auto *cmdMediaVfeStateSecond = (MEDIA_VFE_STATE *)*itorMediaVfeStateSecond; EXPECT_NE(mediaVfeState, cmdMediaVfeStateSecond); uint64_t oldScratchAddr = ((uint64_t)scratchBaseHighPart << 32u) | scratchBaseLowPart; uint64_t newScratchAddr = ((uint64_t)cmdMediaVfeStateSecond->getScratchSpaceBasePointerHigh() << 32u) | cmdMediaVfeStateSecond->getScratchSpaceBasePointer(); if (sharedDeviceInfo.force32BitAddressess == true) { EXPECT_NE(oldScratchAddr, newScratchAddr); } } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenNDRangeKernelAndReadBufferStateBaseAddressAndScratchAddressInMediaVFEStateIsProgrammedForNDRangeAndReprogramedForReadBufferAndGSBAFlagIsResetToFalse) { typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::MEDIA_VFE_STATE MEDIA_VFE_STATE; typedef typename PARSE::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; MockContext ctx(pClDevice); MockKernelWithInternals kernel(*pClDevice); CommandQueueHw commandQueue(&ctx, pClDevice, 0, false); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); size_t GWS = 1; uint32_t scratchSize = 1024; SPatchMediaVFEState mediaVFEstate; mediaVFEstate.PerThreadScratchSpace = scratchSize; kernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate; EXPECT_EQ(false, kernel.mockKernel->isBuiltIn); auto deviceInfo = pClDevice->getDeviceInfo(); auto sharedDeviceInfo = pDevice->getDeviceInfo(); if (sharedDeviceInfo.force32BitAddressess) { EXPECT_FALSE(commandStreamReceiver->getGSBAFor32BitProgrammed()); } commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); if (sharedDeviceInfo.force32BitAddressess) { EXPECT_TRUE(commandStreamReceiver->getGSBAFor32BitProgrammed()); } cmdList.clear(); // Parse command list parseCommands(commandQueue); // All state should be programmed before walker auto itorCmdForStateBase = itorStateBaseAddress; auto *mediaVfeState = (MEDIA_VFE_STATE *)*itorMediaVfeState; auto graphicsAllocationScratch = commandStreamReceiver->getScratchAllocation(); ASSERT_NE(itorCmdForStateBase, itorWalker); auto *sba = (STATE_BASE_ADDRESS *)*itorCmdForStateBase; auto GSHaddress = (uintptr_t)sba->getGeneralStateBaseAddress(); uint64_t graphicsAddress = 0; // Get address ( offset in 32 bit addressing ) of sratch graphicsAddress = (uint64_t)graphicsAllocationScratch->getGpuAddressToPatch(); if (sharedDeviceInfo.force32BitAddressess && is64bit) { EXPECT_TRUE(graphicsAllocationScratch->is32BitAllocation()); EXPECT_EQ(GmmHelper::decanonize(graphicsAllocationScratch->getGpuAddress()) - GSHaddress, graphicsAddress); } else if (!deviceInfo.svmCapabilities && is64bit) { EXPECT_EQ(ScratchSpaceConstants::scratchSpaceOffsetFor64Bit, mediaVfeState->getScratchSpaceBasePointer()); EXPECT_EQ(GSHaddress + ScratchSpaceConstants::scratchSpaceOffsetFor64Bit, graphicsAllocationScratch->getGpuAddressToPatch()); } else { EXPECT_EQ((uint64_t)graphicsAllocationScratch->getUnderlyingBuffer(), graphicsAddress); } uint64_t lowPartGraphicsAddress = (uint64_t)(graphicsAddress & 0xffffffff); uint64_t highPartGraphicsAddress = (uint64_t)((graphicsAddress >> 32) & 0xffffffff); uint64_t scratchBaseLowPart = (uint64_t)mediaVfeState->getScratchSpaceBasePointer(); uint64_t scratchBaseHighPart = (uint64_t)mediaVfeState->getScratchSpaceBasePointerHigh(); if (is64bit && !sharedDeviceInfo.force32BitAddressess) { lowPartGraphicsAddress = ScratchSpaceConstants::scratchSpaceOffsetFor64Bit; highPartGraphicsAddress = 0u; } EXPECT_EQ(lowPartGraphicsAddress, scratchBaseLowPart); EXPECT_EQ(highPartGraphicsAddress, scratchBaseHighPart); if (sharedDeviceInfo.force32BitAddressess) { EXPECT_EQ(pDevice->getMemoryManager()->getExternalHeapBaseAddress(graphicsAllocationScratch->getRootDeviceIndex()), GSHaddress); } else { if (is64bit) { EXPECT_EQ(graphicsAddress - ScratchSpaceConstants::scratchSpaceOffsetFor64Bit, GSHaddress); } else { EXPECT_EQ(0u, GSHaddress); } } size_t tempBuffer[] = {0, 1, 2}; size_t dstBuffer[] = {0, 0, 0}; cl_int retVal = 0; auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal); commandQueue.enqueueReadBuffer(buffer, CL_FALSE, 0, sizeof(tempBuffer), dstBuffer, nullptr, 0, 0, 0); // Parse command list parseCommands(commandQueue); itorCmdForStateBase = find(itorWalker, cmdList.end()); if (sharedDeviceInfo.force32BitAddressess) { EXPECT_NE(itorWalker, itorCmdForStateBase); if (itorCmdForStateBase != cmdList.end()) { auto *sba2 = (STATE_BASE_ADDRESS *)*itorCmdForStateBase; auto GSHaddress2 = (uintptr_t)sba2->getGeneralStateBaseAddress(); EXPECT_NE(sba, sba2); EXPECT_EQ(0u, GSHaddress2); if (sharedDeviceInfo.force32BitAddressess) { EXPECT_FALSE(commandStreamReceiver->getGSBAFor32BitProgrammed()); } } } delete buffer; if (sharedDeviceInfo.force32BitAddressess) { // Asserts placed after restoring old CSR to avoid heap corruption ASSERT_NE(itorCmdForStateBase, cmdList.end()); } } HWTEST_F(CommandStreamReceiverFlushTaskTests, InForced32BitAllocationsModeDoNotStore32bitScratchAllocationOnReusableAllocationList) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.Force32bitAddressing.set(true); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->getMemoryManager()->setForce32BitAllocations(true); pDevice->resetCommandStreamReceiver(commandStreamReceiver); commandStreamReceiver->setRequiredScratchSizes(4096, 0); // whatever > 0 (in page size) flushTask(*commandStreamReceiver); auto scratchAllocation = commandStreamReceiver->getScratchAllocation(); ASSERT_NE(scratchAllocation, nullptr); commandStreamReceiver->setRequiredScratchSizes(8196, 0); // whatever > first size flushTask(*commandStreamReceiver); // 2nd flush auto newScratchAllocation = commandStreamReceiver->getScratchAllocation(); EXPECT_NE(scratchAllocation, newScratchAllocation); // Allocation changed std::unique_ptr allocationReusable = commandStreamReceiver->getInternalAllocationStorage()->obtainReusableAllocation(4096, GraphicsAllocation::AllocationType::LINEAR_STREAM); if (allocationReusable.get() != nullptr) { if (is64bit) { EXPECT_NE(scratchAllocation, allocationReusable.get()); } pDevice->getMemoryManager()->freeGraphicsMemory(allocationReusable.release()); } } HWTEST_F(CommandStreamReceiverFlushTaskTests, InForced32BitAllocationsModeStore32bitScratchAllocationOnTemporaryAllocationList) { if (is64bit) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.Force32bitAddressing.set(true); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->getMemoryManager()->setForce32BitAllocations(true); pDevice->resetCommandStreamReceiver(commandStreamReceiver); commandStreamReceiver->setRequiredScratchSizes(4096, 0); // whatever > 0 (in page size) flushTask(*commandStreamReceiver); auto scratchAllocation = commandStreamReceiver->getScratchAllocation(); ASSERT_NE(scratchAllocation, nullptr); commandStreamReceiver->setRequiredScratchSizes(8196, 0); // whatever > first size flushTask(*commandStreamReceiver); // 2nd flush auto newScratchAllocation = commandStreamReceiver->getScratchAllocation(); EXPECT_NE(scratchAllocation, newScratchAllocation); // Allocation changed std::unique_ptr allocationTemporary = commandStreamReceiver->getTemporaryAllocations().detachAllocation(0, *commandStreamReceiver, GraphicsAllocation::AllocationType::SCRATCH_SURFACE); EXPECT_EQ(scratchAllocation, allocationTemporary.get()); pDevice->getMemoryManager()->freeGraphicsMemory(allocationTemporary.release()); } } HWTEST_F(UltCommandStreamReceiverTest, addPipeControlWithFlushAllCaches) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; DebugManagerStateRestore dbgRestorer; DebugManager.flags.FlushAllCaches.set(true); char buff[sizeof(PIPE_CONTROL) * 3]; LinearStream stream(buff, sizeof(PIPE_CONTROL) * 3); MemorySynchronizationCommands::addPipeControl(stream, false); parseCommands(stream, 0); PIPE_CONTROL *pipeControl = getCommand(); ASSERT_NE(nullptr, pipeControl); // WA pipeControl added if (cmdList.size() == 2) { pipeControl++; } EXPECT_TRUE(pipeControl->getDcFlushEnable()); EXPECT_TRUE(pipeControl->getRenderTargetCacheFlushEnable()); EXPECT_TRUE(pipeControl->getInstructionCacheInvalidateEnable()); EXPECT_TRUE(pipeControl->getTextureCacheInvalidationEnable()); EXPECT_TRUE(pipeControl->getPipeControlFlushEnable()); EXPECT_TRUE(pipeControl->getVfCacheInvalidationEnable()); EXPECT_TRUE(pipeControl->getConstantCacheInvalidationEnable()); EXPECT_TRUE(pipeControl->getStateCacheInvalidationEnable()); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenEnabledPreemptionWhenFlushTaskCalledThenDontProgramMediaVfeStateAgain) { pDevice->setPreemptionMode(PreemptionMode::ThreadGroup); auto &csr = pDevice->getUltCommandStreamReceiver(); HardwareParse hwParser; flushTask(csr, false, 0); hwParser.parseCommands(csr.commandStream, 0); auto cmd = hwParser.getCommand(); EXPECT_NE(nullptr, cmd); // program again csr.setMediaVFEStateDirty(false); auto offset = csr.commandStream.getUsed(); flushTask(csr, false, commandStream.getUsed()); hwParser.cmdList.clear(); hwParser.parseCommands(csr.commandStream, offset); cmd = hwParser.getCommand(); EXPECT_EQ(nullptr, cmd); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, flushTaskWithPCWhenPreambleSentAndL3ConfigChanged) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; typedef typename FamilyType::MEDIA_VFE_STATE MEDIA_VFE_STATE; CsrSizeRequestFlags csrSizeRequest = {}; commandStream.getSpace(sizeof(PIPE_CONTROL)); flushTaskFlags.useSLM = true; flushTaskFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // Force a PIPE_CONTROL through a taskLevel transition taskLevel = commandStreamReceiver.peekTaskLevel() + 1; commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.lastPreemptionMode = pDevice->getPreemptionMode(); commandStreamReceiver.lastMediaSamplerConfig = 0; commandStreamReceiver.lastSentCoherencyRequest = false; commandStreamReceiver.lastSentThreadArbitrationPolicy = commandStreamReceiver.requiredThreadArbitrationPolicy; csrSizeRequest.l3ConfigChanged = true; commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest); auto &csrCS = commandStreamReceiver.getCS(); size_t sizeNeeded = commandStreamReceiver.getRequiredCmdStreamSize(flushTaskFlags, *pDevice); auto expectedUsed = csrCS.getUsed() + sizeNeeded; expectedUsed = alignUp(expectedUsed, MemoryConstants::cacheLineSize); commandStreamReceiver.flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, flushTaskFlags, *pDevice); // Verify that we didn't grab a new CS buffer EXPECT_EQ(expectedUsed, csrCS.getUsed()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenPreambleSentThenRequiredCsrSizeDependsOnL3ConfigChanged) { UltCommandStreamReceiver &commandStreamReceiver = (UltCommandStreamReceiver &)pDevice->getGpgpuCommandStreamReceiver(); CsrSizeRequestFlags csrSizeRequest = {}; commandStreamReceiver.isPreambleSent = true; csrSizeRequest.l3ConfigChanged = true; commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest); auto l3ConfigChangedSize = commandStreamReceiver.getRequiredCmdStreamSize(flushTaskFlags, *pDevice); auto expectedDifference = commandStreamReceiver.getCmdSizeForL3Config(); csrSizeRequest.l3ConfigChanged = false; commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest); auto l3ConfigNotChangedSize = commandStreamReceiver.getRequiredCmdStreamSize(flushTaskFlags, *pDevice); auto difference = l3ConfigChangedSize - l3ConfigNotChangedSize; EXPECT_EQ(expectedDifference, difference); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenPreambleNotSentThenRequiredCsrSizeDoesntDependOnL3ConfigChanged) { UltCommandStreamReceiver &commandStreamReceiver = (UltCommandStreamReceiver &)pDevice->getGpgpuCommandStreamReceiver(); CsrSizeRequestFlags csrSizeRequest = {}; commandStreamReceiver.isPreambleSent = false; csrSizeRequest.l3ConfigChanged = true; commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest); auto l3ConfigChangedSize = commandStreamReceiver.getRequiredCmdStreamSize(flushTaskFlags, *pDevice); csrSizeRequest.l3ConfigChanged = false; commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest); auto l3ConfigNotChangedSize = commandStreamReceiver.getRequiredCmdStreamSize(flushTaskFlags, *pDevice); EXPECT_EQ(l3ConfigNotChangedSize, l3ConfigChangedSize); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenPreambleNotSentThenRequiredCsrSizeDoesntDependOnmediaSamplerConfigChanged) { UltCommandStreamReceiver &commandStreamReceiver = (UltCommandStreamReceiver &)pDevice->getGpgpuCommandStreamReceiver(); CsrSizeRequestFlags csrSizeRequest = {}; DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); commandStreamReceiver.isPreambleSent = false; csrSizeRequest.mediaSamplerConfigChanged = false; commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest); auto mediaSamplerConfigNotChangedSize = commandStreamReceiver.getRequiredCmdStreamSize(flags, *pDevice); csrSizeRequest.mediaSamplerConfigChanged = true; commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest); auto mediaSamplerConfigChangedSize = commandStreamReceiver.getRequiredCmdStreamSize(flags, *pDevice); EXPECT_EQ(mediaSamplerConfigChangedSize, mediaSamplerConfigNotChangedSize); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenSpecialPipelineSelectModeChangedWhenGetCmdSizeForPielineSelectIsCalledThenCorrectSizeIsReturned) { using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; UltCommandStreamReceiver &commandStreamReceiver = (UltCommandStreamReceiver &)pDevice->getGpgpuCommandStreamReceiver(); CsrSizeRequestFlags csrSizeRequest = {}; DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); csrSizeRequest.specialPipelineSelectModeChanged = true; commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest); size_t size = commandStreamReceiver.getCmdSizeForPipelineSelect(); size_t expectedSize = sizeof(PIPELINE_SELECT); if (HardwareCommandsHelper::isPipeControlPriorToPipelineSelectWArequired(pDevice->getHardwareInfo())) { expectedSize += sizeof(PIPE_CONTROL); } EXPECT_EQ(expectedSize, size); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenPreambleSentThenRequiredCsrSizeDependsOnmediaSamplerConfigChanged) { using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; UltCommandStreamReceiver &commandStreamReceiver = (UltCommandStreamReceiver &)pDevice->getGpgpuCommandStreamReceiver(); CsrSizeRequestFlags csrSizeRequest = {}; DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); commandStreamReceiver.isPreambleSent = true; csrSizeRequest.mediaSamplerConfigChanged = false; commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest); auto mediaSamplerConfigNotChangedSize = commandStreamReceiver.getRequiredCmdStreamSize(flags, *pDevice); csrSizeRequest.mediaSamplerConfigChanged = true; commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest); auto mediaSamplerConfigChangedSize = commandStreamReceiver.getRequiredCmdStreamSize(flags, *pDevice); EXPECT_NE(mediaSamplerConfigChangedSize, mediaSamplerConfigNotChangedSize); auto difference = mediaSamplerConfigChangedSize - mediaSamplerConfigNotChangedSize; size_t expectedDifference = sizeof(PIPELINE_SELECT); if (HardwareCommandsHelper::isPipeControlPriorToPipelineSelectWArequired(pDevice->getHardwareInfo())) { expectedDifference += sizeof(PIPE_CONTROL); } EXPECT_EQ(expectedDifference, difference); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenSamplerCacheFlushSentThenRequiredCsrSizeContainsPipecontrolSize) { typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; UltCommandStreamReceiver &commandStreamReceiver = (UltCommandStreamReceiver &)pDevice->getGpgpuCommandStreamReceiver(); CsrSizeRequestFlags csrSizeRequest = {}; DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.overrideCsrSizeReqFlags(csrSizeRequest); commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushNotRequired); auto samplerCacheNotFlushedSize = commandStreamReceiver.getRequiredCmdStreamSize(flags, *pDevice); commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore); auto samplerCacheFlushBeforeSize = commandStreamReceiver.getRequiredCmdStreamSize(flags, *pDevice); EXPECT_EQ(samplerCacheNotFlushedSize, samplerCacheFlushBeforeSize); NEO::WorkaroundTable *waTable = &pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->workaroundTable; waTable->waSamplerCacheFlushBetweenRedescribedSurfaceReads = true; samplerCacheFlushBeforeSize = commandStreamReceiver.getRequiredCmdStreamSize(flags, *pDevice); auto difference = samplerCacheFlushBeforeSize - samplerCacheNotFlushedSize; EXPECT_EQ(sizeof(typename FamilyType::PIPE_CONTROL), difference); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInNonDirtyStateWhenflushTaskIsCalledThenNoFlushIsCalled) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); configureCSRtoNonDirtyState(); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(0, mockCsr->flushCalledCount); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInNonDirtyStateAndBatchingModeWhenflushTaskIsCalledWithDisabledPreemptionThenSubmissionIsNotRecorded) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); configureCSRtoNonDirtyState(); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(0, mockCsr->flushCalledCount); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); //surfaces are non resident auto &surfacesForResidency = mockCsr->getResidencyAllocations(); EXPECT_EQ(0u, surfacesForResidency.size()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenGeneralStateBaseAddressIsProgrammedThenDecanonizedAddressIsWritten) { uint64_t generalStateBaseAddress = 0xffff800400010000ull; DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); StateBaseAddressHelper::programStateBaseAddress(commandStream, &dsh, &ioh, &ssh, generalStateBaseAddress, true, 0, generalStateBaseAddress, true, pDevice->getGmmHelper(), false); HardwareParse hwParser; hwParser.parseCommands(commandStream); auto cmd = hwParser.getCommand(); EXPECT_NE(generalStateBaseAddress, cmd->getGeneralStateBaseAddress()); EXPECT_EQ(GmmHelper::decanonize(generalStateBaseAddress), cmd->getGeneralStateBaseAddress()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenNonZeroGeneralStateBaseAddressWhenProgrammingIsDisabledThenExpectCommandValueZero) { uint64_t generalStateBaseAddress = 0x80010000ull; StateBaseAddressHelper::programStateBaseAddress(commandStream, &dsh, &ioh, &ssh, generalStateBaseAddress, false, 0, generalStateBaseAddress, true, pDevice->getGmmHelper(), false); HardwareParse hwParser; hwParser.parseCommands(commandStream); auto cmd = hwParser.getCommand(); EXPECT_EQ(0ull, cmd->getGeneralStateBaseAddress()); EXPECT_EQ(0u, cmd->getGeneralStateBufferSize()); EXPECT_FALSE(cmd->getGeneralStateBaseAddressModifyEnable()); EXPECT_FALSE(cmd->getGeneralStateBufferSizeModifyEnable()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenNonZeroInternalHeapBaseAddressWhenProgrammingIsDisabledThenExpectCommandValueZero) { uint64_t internalHeapBaseAddress = 0x80010000ull; StateBaseAddressHelper::programStateBaseAddress(commandStream, &dsh, &ioh, &ssh, internalHeapBaseAddress, true, 0, internalHeapBaseAddress, false, pDevice->getGmmHelper(), false); HardwareParse hwParser; hwParser.parseCommands(commandStream); auto cmd = hwParser.getCommand(); EXPECT_FALSE(cmd->getInstructionBaseAddressModifyEnable()); EXPECT_EQ(0ull, cmd->getInstructionBaseAddress()); EXPECT_FALSE(cmd->getInstructionBufferSizeModifyEnable()); EXPECT_EQ(0u, cmd->getInstructionBufferSize()); EXPECT_EQ(0u, cmd->getInstructionMemoryObjectControlState()); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenSbaProgrammingWhenHeapsAreNotProvidedThenDontProgram) { DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); uint64_t internalHeapBase = 0x10000; uint64_t generalStateBase = 0x30000; StateBaseAddressHelper::programStateBaseAddress(commandStream, nullptr, nullptr, nullptr, generalStateBase, true, 0, internalHeapBase, true, pDevice->getGmmHelper(), false); HardwareParse hwParser; hwParser.parseCommands(commandStream); auto cmd = hwParser.getCommand(); EXPECT_FALSE(cmd->getDynamicStateBaseAddressModifyEnable()); EXPECT_FALSE(cmd->getDynamicStateBufferSizeModifyEnable()); EXPECT_EQ(0u, cmd->getDynamicStateBaseAddress()); EXPECT_EQ(0u, cmd->getDynamicStateBufferSize()); EXPECT_FALSE(cmd->getIndirectObjectBaseAddressModifyEnable()); EXPECT_FALSE(cmd->getIndirectObjectBufferSizeModifyEnable()); EXPECT_EQ(0u, cmd->getIndirectObjectBaseAddress()); EXPECT_EQ(0u, cmd->getIndirectObjectBufferSize()); EXPECT_FALSE(cmd->getSurfaceStateBaseAddressModifyEnable()); EXPECT_EQ(0u, cmd->getSurfaceStateBaseAddress()); EXPECT_TRUE(cmd->getInstructionBaseAddressModifyEnable()); EXPECT_EQ(internalHeapBase, cmd->getInstructionBaseAddress()); EXPECT_TRUE(cmd->getInstructionBufferSizeModifyEnable()); EXPECT_EQ(MemoryConstants::sizeOf4GBinPageEntities, cmd->getInstructionBufferSize()); EXPECT_TRUE(cmd->getGeneralStateBaseAddressModifyEnable()); EXPECT_TRUE(cmd->getGeneralStateBufferSizeModifyEnable()); EXPECT_EQ(GmmHelper::decanonize(generalStateBase), cmd->getGeneralStateBaseAddress()); EXPECT_EQ(0xfffffu, cmd->getGeneralStateBufferSize()); } command_stream_receiver_flush_task_3_tests.cpp000066400000000000000000002264471363734646600360330ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/helpers/dispatch_flags_helper.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_gmm_page_table_mngr.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_os_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/mocks/mock_submissions_aggregator.h" #include "test.h" using namespace NEO; typedef UltCommandStreamReceiverTest CommandStreamReceiverFlushTaskTests; HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTaskIsCalledThenNothingIsSubmittedToTheHwAndSubmissionIsRecorded) { typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers(); EXPECT_FALSE(cmdBufferList.peekIsEmpty()); EXPECT_EQ(cmdBufferList.peekHead(), cmdBufferList.peekTail()); auto cmdBuffer = cmdBufferList.peekHead(); //two more because of preemption allocation and sipKernel in Mid Thread preemption mode size_t csrSurfaceCount = (pDevice->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0; //we should have 3 heaps, tag allocation and csr command stream + cq EXPECT_EQ(5u + csrSurfaceCount, cmdBuffer->surfaces.size()); EXPECT_EQ(0, mockCsr->flushCalledCount); //we should be submitting via csr EXPECT_EQ(cmdBuffer->batchBuffer.commandBufferAllocation, mockCsr->commandStream.getGraphicsAllocation()); EXPECT_EQ(cmdBuffer->batchBuffer.startOffset, 0u); EXPECT_FALSE(cmdBuffer->batchBuffer.requiresCoherency); EXPECT_FALSE(cmdBuffer->batchBuffer.low_priority); //find BB END parseCommands(commandStream, 0); auto itBBend = find(cmdList.begin(), cmdList.end()); void *bbEndAddress = *itBBend; EXPECT_EQ(bbEndAddress, cmdBuffer->batchBufferEndLocation); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndTwoRecordedCommandBuffersWhenFlushTaskIsCalledThenBatchBuffersAreCombined) { typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); auto primaryBatch = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); auto secondBatchBuffer = primaryBatch->next; auto bbEndLocation = primaryBatch->batchBufferEndLocation; auto secondBatchBufferAddress = (uint64_t)ptrOffset(secondBatchBuffer->batchBuffer.commandBufferAllocation->getGpuAddress(), secondBatchBuffer->batchBuffer.startOffset); mockCsr->flushBatchedSubmissions(); auto batchBufferStart = genCmdCast(bbEndLocation); ASSERT_NE(nullptr, batchBufferStart); EXPECT_EQ(secondBatchBufferAddress, batchBufferStart->getBatchBufferStartAddressGraphicsaddress472()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndThreeRecordedCommandBuffersWhenFlushTaskIsCalledThenBatchBuffersAreCombined) { typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); auto primaryBatch = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); auto lastBatchBuffer = primaryBatch->next->next; auto bbEndLocation = primaryBatch->next->batchBufferEndLocation; auto lastBatchBufferAddress = (uint64_t)ptrOffset(lastBatchBuffer->batchBuffer.commandBufferAllocation->getGpuAddress(), lastBatchBuffer->batchBuffer.startOffset); mockCsr->flushBatchedSubmissions(); auto batchBufferStart = genCmdCast(bbEndLocation); ASSERT_NE(nullptr, batchBufferStart); EXPECT_EQ(lastBatchBufferAddress, batchBufferStart->getBatchBufferStartAddressGraphicsaddress472()); EXPECT_EQ(1, mockCsr->flushCalledCount); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndThreeRecordedCommandBuffersThatUsesAllResourceWhenFlushTaskIsCalledThenBatchBuffersAreNotCombined) { typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); auto memorySize = (size_t)pDevice->getDeviceInfo().globalMemSize; MockGraphicsAllocation largeAllocation(nullptr, memorySize); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); mockCsr->makeResident(largeAllocation); mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); mockCsr->makeResident(largeAllocation); mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); auto primaryBatch = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); auto bbEndLocation = primaryBatch->next->batchBufferEndLocation; mockCsr->flushBatchedSubmissions(); auto batchBufferStart = genCmdCast(bbEndLocation); ASSERT_EQ(nullptr, batchBufferStart); auto bbEnd = genCmdCast(bbEndLocation); EXPECT_NE(nullptr, bbEnd); EXPECT_EQ(3, mockCsr->flushCalledCount); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTaskIsCalledTwiceThenNothingIsSubmittedToTheHwAndTwoSubmissionAreRecorded) { typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto initialBase = commandStream.getCpuBase(); auto initialUsed = commandStream.getUsed(); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); //ensure command stream still used EXPECT_EQ(initialBase, commandStream.getCpuBase()); auto baseAfterFirstFlushTask = commandStream.getCpuBase(); auto usedAfterFirstFlushTask = commandStream.getUsed(); dispatchFlags.requiresCoherency = true; dispatchFlags.lowPriority = true; mockCsr->flushTask(commandStream, commandStream.getUsed(), dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); auto baseAfterSecondFlushTask = commandStream.getCpuBase(); auto usedAfterSecondFlushTask = commandStream.getUsed(); EXPECT_EQ(initialBase, commandStream.getCpuBase()); EXPECT_EQ(baseAfterSecondFlushTask, baseAfterFirstFlushTask); EXPECT_EQ(baseAfterFirstFlushTask, initialBase); EXPECT_GT(usedAfterFirstFlushTask, initialUsed); EXPECT_GT(usedAfterSecondFlushTask, usedAfterFirstFlushTask); auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers(); EXPECT_FALSE(cmdBufferList.peekIsEmpty()); EXPECT_NE(cmdBufferList.peekHead(), cmdBufferList.peekTail()); EXPECT_NE(nullptr, cmdBufferList.peekTail()); EXPECT_NE(nullptr, cmdBufferList.peekHead()); auto cmdBuffer1 = cmdBufferList.peekHead(); auto cmdBuffer2 = cmdBufferList.peekTail(); EXPECT_GT(cmdBuffer2->batchBufferEndLocation, cmdBuffer1->batchBufferEndLocation); EXPECT_FALSE(cmdBuffer1->batchBuffer.requiresCoherency); EXPECT_TRUE(cmdBuffer2->batchBuffer.requiresCoherency); EXPECT_FALSE(cmdBuffer1->batchBuffer.low_priority); EXPECT_TRUE(cmdBuffer2->batchBuffer.low_priority); EXPECT_GT(cmdBuffer2->batchBuffer.startOffset, cmdBuffer1->batchBuffer.startOffset); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenRecordedBatchBufferIsBeingSubmittedThenFlushIsCalledWithRecordedCommandBuffer) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); configureCSRtoNonDirtyState(); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.requiresCoherency = true; mockCsr->lastSentCoherencyRequest = 1; commandStream.getSpace(4); mockCsr->flushTask(commandStream, 4, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(0, mockCsr->flushCalledCount); auto &surfacesForResidency = mockCsr->getResidencyAllocations(); EXPECT_EQ(0u, surfacesForResidency.size()); auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers(); EXPECT_FALSE(cmdBufferList.peekIsEmpty()); auto cmdBuffer = cmdBufferList.peekHead(); //preemption allocation + sip kernel size_t csrSurfaceCount = (pDevice->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0; EXPECT_EQ(4u + csrSurfaceCount, cmdBuffer->surfaces.size()); //copy those surfaces std::vector residentSurfaces = cmdBuffer->surfaces; for (auto &graphicsAllocation : residentSurfaces) { EXPECT_TRUE(graphicsAllocation->isResident(mockCsr->getOsContext().getContextId())); EXPECT_EQ(1u, graphicsAllocation->getResidencyTaskCount(mockCsr->getOsContext().getContextId())); } mockCsr->flushBatchedSubmissions(); EXPECT_FALSE(mockCsr->recordedCommandBuffer->batchBuffer.low_priority); EXPECT_TRUE(mockCsr->recordedCommandBuffer->batchBuffer.requiresCoherency); EXPECT_EQ(mockCsr->recordedCommandBuffer->batchBuffer.commandBufferAllocation, commandStream.getGraphicsAllocation()); EXPECT_EQ(4u, mockCsr->recordedCommandBuffer->batchBuffer.startOffset); EXPECT_EQ(1, mockCsr->flushCalledCount); EXPECT_TRUE(mockedSubmissionsAggregator->peekCommandBuffers().peekIsEmpty()); EXPECT_EQ(0u, surfacesForResidency.size()); for (auto &graphicsAllocation : residentSurfaces) { EXPECT_FALSE(graphicsAllocation->isResident(mockCsr->getOsContext().getContextId())); } } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrCreatedWithDedicatedDebugFlagWhenItIsCreatedThenItHasProperDispatchMode) { DebugManagerStateRestore stateRestore; DebugManager.flags.CsrDispatchMode.set(static_cast(DispatchMode::AdaptiveDispatch)); std::unique_ptr> mockCsr(new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); EXPECT_EQ(DispatchMode::AdaptiveDispatch, mockCsr->dispatchMode); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenBlockingCommandIsSendThenItIsFlushedAndNotBatched) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); configureCSRtoNonDirtyState(); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.blocking = true; dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(1, mockCsr->flushCalledCount); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenBufferToFlushWhenFlushTaskCalledThenUpdateFlushStamp) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); commandStream.getSpace(1); EXPECT_EQ(0, mockCsr->flushCalledCount); auto previousFlushStamp = mockCsr->flushStamp->peekStamp(); auto cmplStamp = flushTask(*mockCsr); EXPECT_GT(mockCsr->flushStamp->peekStamp(), previousFlushStamp); EXPECT_EQ(mockCsr->flushStamp->peekStamp(), cmplStamp.flushStamp); EXPECT_EQ(1, mockCsr->flushCalledCount); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenNothingToFlushWhenFlushTaskCalledThenDontFlushStamp) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); configureCSRtoNonDirtyState(); EXPECT_EQ(0, mockCsr->flushCalledCount); auto previousFlushStamp = mockCsr->flushStamp->peekStamp(); auto cmplStamp = flushTask(*mockCsr); EXPECT_EQ(mockCsr->flushStamp->peekStamp(), previousFlushStamp); EXPECT_EQ(previousFlushStamp, cmplStamp.flushStamp); EXPECT_EQ(0, mockCsr->flushCalledCount); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTaskIsCalledThenFlushedTaskCountIsNotModifed) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(1u, mockCsr->peekLatestSentTaskCount()); EXPECT_EQ(0u, mockCsr->peekLatestFlushedTaskCount()); mockCsr->flushBatchedSubmissions(); EXPECT_EQ(1u, mockCsr->peekLatestSentTaskCount()); EXPECT_EQ(1u, mockCsr->peekLatestFlushedTaskCount()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInDefaultModeWhenFlushTaskIsCalledThenFlushedTaskCountIsModifed) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; auto &csr = commandQueue.getGpgpuCommandStreamReceiver(); csr.flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(1u, csr.peekLatestSentTaskCount()); EXPECT_EQ(1u, csr.peekLatestFlushedTaskCount()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenWaitForTaskCountIsCalledWithTaskCountThatWasNotYetFlushedThenBatchedCommandBuffersAreSubmitted) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers(); EXPECT_FALSE(cmdBufferList.peekIsEmpty()); EXPECT_EQ(0u, mockCsr->peekLatestFlushedTaskCount()); auto cmdBuffer = cmdBufferList.peekHead(); EXPECT_EQ(1u, cmdBuffer->taskCount); mockCsr->waitForCompletionWithTimeout(false, 1, 1); EXPECT_EQ(1u, mockCsr->peekLatestFlushedTaskCount()); EXPECT_TRUE(cmdBufferList.peekIsEmpty()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenEnqueueIsMadeThenCurrentMemoryUsedIsTracked) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); auto cmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); uint64_t expectedUsed = 0; for (const auto &resource : cmdBuffer->surfaces) { expectedUsed += resource->getUnderlyingBufferSize(); } EXPECT_EQ(expectedUsed, mockCsr->peekTotalMemoryUsed()); //after flush it goes to 0 mockCsr->flushBatchedSubmissions(); EXPECT_EQ(0u, mockCsr->peekTotalMemoryUsed()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenSusbsequentEnqueueIsMadeThenOnlyNewResourcesAreTrackedForMemoryUsage) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); auto cmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); uint64_t expectedUsed = 0; for (const auto &resource : cmdBuffer->surfaces) { expectedUsed += resource->getUnderlyingBufferSize(); } auto additionalSize = 1234; MockGraphicsAllocation graphicsAllocation(nullptr, additionalSize); mockCsr->makeResident(graphicsAllocation); mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(expectedUsed + additionalSize, mockCsr->peekTotalMemoryUsed()); mockCsr->flushBatchedSubmissions(); } struct MockedMemoryManager : public OsAgnosticMemoryManager { MockedMemoryManager(ExecutionEnvironment &executionEnvironment) : OsAgnosticMemoryManager(executionEnvironment) {} bool isMemoryBudgetExhausted() const override { return budgetExhausted; } bool budgetExhausted = false; }; HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenTotalResourceUsedExhaustsTheBudgetThenDoImplicitFlush) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); auto mockedMemoryManager = new MockedMemoryManager(*executionEnvironment); executionEnvironment->memoryManager.reset(mockedMemoryManager); auto mockCsr = std::make_unique>(*executionEnvironment, 0); if (pDevice->getPreemptionMode() == PreemptionMode::MidThread || pDevice->isDebuggerActive()) { mockCsr->createPreemptionAllocation(); } mockCsr->initializeTagAllocation(); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); mockCsr->setupContext(*pDevice->getDefaultEngine().osContext); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; mockedMemoryManager->budgetExhausted = true; mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); auto cmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); uint64_t expectedUsed = 0; for (const auto &resource : cmdBuffer->surfaces) { expectedUsed += resource->getUnderlyingBufferSize(); } EXPECT_EQ(expectedUsed, mockCsr->peekTotalMemoryUsed()); auto budgetSize = (size_t)pDevice->getDeviceInfo().globalMemSize; MockGraphicsAllocation hugeAllocation(nullptr, budgetSize / 4); mockCsr->makeResident(hugeAllocation); mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); //expect 2 flushes, since we cannot batch those submissions EXPECT_EQ(2u, mockCsr->peekLatestFlushedTaskCount()); EXPECT_EQ(0u, mockCsr->peekTotalMemoryUsed()); EXPECT_TRUE(mockedSubmissionsAggregator->peekCommandBuffers().peekIsEmpty()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenTwoTasksArePassedWithTheSameLevelThenThereIsNoPipeControlBetweenThemAfterFlush) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.outOfOrderExecutionAllowed = true; auto taskLevelPriorToSubmission = mockCsr->peekTaskLevel(); mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevelPriorToSubmission, dispatchFlags, *pDevice); //now emit with the same taskLevel mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevelPriorToSubmission, dispatchFlags, *pDevice); EXPECT_EQ(taskLevelPriorToSubmission, mockCsr->peekTaskLevel()); //validate if we recorded ppc positions auto firstCmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); EXPECT_NE(nullptr, firstCmdBuffer->pipeControlThatMayBeErasedLocation); auto secondCmdBuffer = firstCmdBuffer->next; EXPECT_NE(nullptr, secondCmdBuffer->pipeControlThatMayBeErasedLocation); EXPECT_NE(firstCmdBuffer->pipeControlThatMayBeErasedLocation, secondCmdBuffer->pipeControlThatMayBeErasedLocation); auto ppc = genCmdCast(firstCmdBuffer->pipeControlThatMayBeErasedLocation); EXPECT_NE(nullptr, ppc); auto ppc2 = genCmdCast(secondCmdBuffer->pipeControlThatMayBeErasedLocation); EXPECT_NE(nullptr, ppc2); //flush needs to bump the taskLevel mockCsr->flushBatchedSubmissions(); EXPECT_EQ(taskLevelPriorToSubmission + 1, mockCsr->peekTaskLevel()); //decode commands to confirm no pipe controls between Walkers parseCommands(commandQueue); auto itorBatchBufferStartFirst = find(cmdList.begin(), cmdList.end()); auto itorBatchBufferStartSecond = find(++itorBatchBufferStartFirst, cmdList.end()); //make sure they are not the same EXPECT_NE(cmdList.end(), itorBatchBufferStartFirst); EXPECT_NE(cmdList.end(), itorBatchBufferStartSecond); EXPECT_NE(itorBatchBufferStartFirst, itorBatchBufferStartSecond); auto itorPipeControl = find(itorBatchBufferStartFirst, itorBatchBufferStartSecond); EXPECT_EQ(itorPipeControl, itorBatchBufferStartSecond); //first pipe control is nooped, second pipe control is untouched auto noop1 = genCmdCast(ppc); auto noop2 = genCmdCast(ppc2); EXPECT_NE(nullptr, noop1); EXPECT_EQ(nullptr, noop2); auto ppcAfterChange = genCmdCast(ppc2); EXPECT_NE(nullptr, ppcAfterChange); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenDcFlushIsNotRequiredThenItIsNotSet) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); parseCommands(commandStream); auto itorPipeControl = find(cmdList.begin(), cmdList.end()); auto pipeControl = genCmdCast(*itorPipeControl); EXPECT_FALSE(pipeControl->getDcFlushEnable()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenCommandAreSubmittedThenDcFlushIsAdded) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.outOfOrderExecutionAllowed = true; mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); parseCommands(commandStream); auto itorPipeControl = find(cmdList.begin(), cmdList.end()); auto pipeControl = genCmdCast(*itorPipeControl); mockCsr->flushBatchedSubmissions(); EXPECT_TRUE(pipeControl->getDcFlushEnable()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWithOutOfOrderModeFisabledWhenCommandAreSubmittedThenDcFlushIsAdded) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.outOfOrderExecutionAllowed = false; mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); parseCommands(commandStream); auto itorPipeControl = find(cmdList.begin(), cmdList.end()); auto pipeControl = genCmdCast(*itorPipeControl); mockCsr->flushBatchedSubmissions(); EXPECT_TRUE(pipeControl->getDcFlushEnable()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenDcFlushIsRequiredThenPipeControlIsNotRegistredForNooping) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.dcFlush = true; dispatchFlags.outOfOrderExecutionAllowed = true; mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); auto cmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); EXPECT_EQ(nullptr, cmdBuffer->pipeControlThatMayBeErasedLocation); EXPECT_NE(nullptr, cmdBuffer->epiloguePipeControlLocation); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenEpiloguePipeControlThenDcFlushIsEnabled) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.outOfOrderExecutionAllowed = false; mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); auto cmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); ASSERT_NE(nullptr, cmdBuffer->epiloguePipeControlLocation); auto pipeControl = genCmdCast(cmdBuffer->epiloguePipeControlLocation); ASSERT_NE(nullptr, pipeControl); mockCsr->flushBatchedSubmissions(); EXPECT_TRUE(pipeControl->getDcFlushEnable()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenEpiloguePipeControlWhendDcFlushDisabledByDebugFlagThenDcFlushIsDisabled) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManagerStateRestore debugRestorer; DebugManager.flags.DisableDcFlushInEpilogue.set(true); CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.outOfOrderExecutionAllowed = false; mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); auto cmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); ASSERT_NE(nullptr, cmdBuffer->epiloguePipeControlLocation); auto pipeControl = genCmdCast(cmdBuffer->epiloguePipeControlLocation); ASSERT_NE(nullptr, pipeControl); mockCsr->flushBatchedSubmissions(); EXPECT_FALSE(pipeControl->getDcFlushEnable()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndOoqFlagSetToFalseWhenTwoTasksArePassedWithTheSameLevelThenThereIsPipeControlBetweenThemAfterFlush) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->timestampPacketWriteEnabled = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.outOfOrderExecutionAllowed = false; auto taskLevelPriorToSubmission = mockCsr->peekTaskLevel(); mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevelPriorToSubmission, dispatchFlags, *pDevice); //now emit with the same taskLevel mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevelPriorToSubmission, dispatchFlags, *pDevice); EXPECT_EQ(taskLevelPriorToSubmission, mockCsr->peekTaskLevel()); //validate if we recorded ppc positions auto firstCmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); EXPECT_EQ(nullptr, firstCmdBuffer->pipeControlThatMayBeErasedLocation); auto secondCmdBuffer = firstCmdBuffer->next; EXPECT_EQ(nullptr, secondCmdBuffer->pipeControlThatMayBeErasedLocation); mockCsr->flushBatchedSubmissions(); //decode commands to confirm no pipe controls between Walkers parseCommands(commandQueue); auto itorBatchBufferStartFirst = find(cmdList.begin(), cmdList.end()); auto itorBatchBufferStartSecond = find(++itorBatchBufferStartFirst, cmdList.end()); auto itorPipeControl = find(itorBatchBufferStartFirst, itorBatchBufferStartSecond); EXPECT_NE(itorPipeControl, itorBatchBufferStartSecond); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeAndOoqFlagSetToFalseWhenTimestampPacketWriteIsEnabledThenNoopPipeControl) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.outOfOrderExecutionAllowed = false; auto taskLevelPriorToSubmission = mockCsr->peekTaskLevel(); mockCsr->timestampPacketWriteEnabled = false; mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevelPriorToSubmission, dispatchFlags, *pDevice); mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevelPriorToSubmission, dispatchFlags, *pDevice); auto firstCmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); EXPECT_EQ(nullptr, firstCmdBuffer->pipeControlThatMayBeErasedLocation); auto secondCmdBuffer = firstCmdBuffer->next; EXPECT_EQ(nullptr, secondCmdBuffer->pipeControlThatMayBeErasedLocation); mockCsr->flushBatchedSubmissions(); mockCsr->timestampPacketWriteEnabled = true; mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevelPriorToSubmission, dispatchFlags, *pDevice); mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevelPriorToSubmission, dispatchFlags, *pDevice); firstCmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); EXPECT_NE(nullptr, firstCmdBuffer->pipeControlThatMayBeErasedLocation); secondCmdBuffer = firstCmdBuffer->next; EXPECT_NE(nullptr, secondCmdBuffer->pipeControlThatMayBeErasedLocation); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenPipeControlForNoopAddressIsNullThenPipeControlIsNotNooped) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.outOfOrderExecutionAllowed = true; auto taskLevelPriorToSubmission = mockCsr->peekTaskLevel(); mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevelPriorToSubmission, dispatchFlags, *pDevice); //now emit with the same taskLevel mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevelPriorToSubmission, dispatchFlags, *pDevice); //validate if we recorded ppc positions auto firstCmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); auto ppc1Location = firstCmdBuffer->pipeControlThatMayBeErasedLocation; firstCmdBuffer->pipeControlThatMayBeErasedLocation = nullptr; auto ppc = genCmdCast(ppc1Location); EXPECT_NE(nullptr, ppc); //call flush, both pipe controls must remain untouched mockCsr->flushBatchedSubmissions(); EXPECT_EQ(taskLevelPriorToSubmission + 1, mockCsr->peekTaskLevel()); //decode commands to confirm no pipe controls between Walkers parseCommands(commandQueue); auto itorBatchBufferStartFirst = find(cmdList.begin(), cmdList.end()); auto itorBatchBufferStartSecond = find(++itorBatchBufferStartFirst, cmdList.end()); auto itorPipeControl = find(itorBatchBufferStartFirst, itorBatchBufferStartSecond); EXPECT_NE(itorPipeControl, itorBatchBufferStartSecond); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenThreeTasksArePassedWithTheSameLevelThenThereIsNoPipeControlBetweenThemAfterFlush) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.outOfOrderExecutionAllowed = true; auto taskLevelPriorToSubmission = mockCsr->peekTaskLevel(); mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevelPriorToSubmission, dispatchFlags, *pDevice); //now emit with the same taskLevel mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevelPriorToSubmission, dispatchFlags, *pDevice); mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevelPriorToSubmission, dispatchFlags, *pDevice); EXPECT_EQ(taskLevelPriorToSubmission, mockCsr->peekTaskLevel()); //validate if we recorded ppc positions auto firstCmdBuffer = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); auto secondCmdBuffer = firstCmdBuffer->next; auto thirdCmdBuffer = firstCmdBuffer->next->next; EXPECT_NE(nullptr, thirdCmdBuffer->pipeControlThatMayBeErasedLocation); EXPECT_NE(firstCmdBuffer->pipeControlThatMayBeErasedLocation, thirdCmdBuffer->pipeControlThatMayBeErasedLocation); auto ppc = genCmdCast(firstCmdBuffer->pipeControlThatMayBeErasedLocation); auto ppc2 = genCmdCast(secondCmdBuffer->pipeControlThatMayBeErasedLocation); auto ppc3 = genCmdCast(thirdCmdBuffer->pipeControlThatMayBeErasedLocation); EXPECT_NE(nullptr, ppc2); EXPECT_NE(nullptr, ppc3); //flush needs to bump the taskLevel mockCsr->flushBatchedSubmissions(); EXPECT_EQ(taskLevelPriorToSubmission + 1, mockCsr->peekTaskLevel()); //decode commands to confirm no pipe controls between Walkers parseCommands(commandQueue); auto itorBatchBufferStartFirst = find(cmdList.begin(), cmdList.end()); auto itorBatchBufferStartSecond = find(++itorBatchBufferStartFirst, cmdList.end()); auto itorBatchBufferStartThird = find(++itorBatchBufferStartSecond, cmdList.end()); //make sure they are not the same EXPECT_NE(cmdList.end(), itorBatchBufferStartFirst); EXPECT_NE(cmdList.end(), itorBatchBufferStartSecond); EXPECT_NE(cmdList.end(), itorBatchBufferStartThird); EXPECT_NE(itorBatchBufferStartFirst, itorBatchBufferStartSecond); EXPECT_NE(itorBatchBufferStartThird, itorBatchBufferStartSecond); auto itorPipeControl = find(itorBatchBufferStartFirst, itorBatchBufferStartSecond); EXPECT_EQ(itorPipeControl, itorBatchBufferStartSecond); itorPipeControl = find(itorBatchBufferStartSecond, itorBatchBufferStartThird); EXPECT_EQ(itorPipeControl, itorBatchBufferStartThird); //first pipe control is nooped, second pipe control is untouched auto noop1 = genCmdCast(ppc); auto noop2 = genCmdCast(ppc2); auto noop3 = genCmdCast(ppc3); EXPECT_NE(nullptr, noop1); EXPECT_NE(nullptr, noop2); EXPECT_EQ(nullptr, noop3); auto ppcAfterChange = genCmdCast(ppc3); EXPECT_NE(nullptr, ppcAfterChange); } typedef UltCommandStreamReceiverTest CommandStreamReceiverCleanupTests; HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenTemporaryAndReusableAllocationsArePresentThenCleanupResourcesOnlyCleansThoseAboveLatestFlushTaskLevel) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto memoryManager = pDevice->getMemoryManager(); auto temporaryToClean = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); auto temporaryToHold = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); auto reusableToClean = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); auto reusableToHold = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); commandStreamReceiver.getInternalAllocationStorage()->storeAllocation(std::unique_ptr(temporaryToClean), TEMPORARY_ALLOCATION); commandStreamReceiver.getInternalAllocationStorage()->storeAllocation(std::unique_ptr(temporaryToHold), TEMPORARY_ALLOCATION); commandStreamReceiver.getInternalAllocationStorage()->storeAllocation(std::unique_ptr(reusableToClean), REUSABLE_ALLOCATION); commandStreamReceiver.getInternalAllocationStorage()->storeAllocation(std::unique_ptr(reusableToHold), REUSABLE_ALLOCATION); auto osContextId = commandStreamReceiver.getOsContext().getContextId(); temporaryToClean->updateTaskCount(1, osContextId); reusableToClean->updateTaskCount(1, osContextId); temporaryToHold->updateTaskCount(10, osContextId); reusableToHold->updateTaskCount(10, osContextId); commandStreamReceiver.latestFlushedTaskCount = 9; commandStreamReceiver.cleanupResources(); EXPECT_EQ(reusableToHold, commandStreamReceiver.getAllocationsForReuse().peekHead()); EXPECT_EQ(reusableToHold, commandStreamReceiver.getAllocationsForReuse().peekTail()); EXPECT_EQ(temporaryToHold, commandStreamReceiver.getTemporaryAllocations().peekHead()); EXPECT_EQ(temporaryToHold, commandStreamReceiver.getTemporaryAllocations().peekTail()); commandStreamReceiver.latestFlushedTaskCount = 11; commandStreamReceiver.cleanupResources(); EXPECT_TRUE(commandStreamReceiver.getAllocationsForReuse().peekIsEmpty()); EXPECT_TRUE(commandStreamReceiver.getTemporaryAllocations().peekIsEmpty()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDispatchFlagsWithThrottleSetToLowWhenFlushTaskIsCalledThenThrottleIsSetInBatchBuffer) { typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.throttle = QueueThrottle::LOW; mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers(); auto cmdBuffer = cmdBufferList.peekHead(); EXPECT_EQ(cmdBuffer->batchBuffer.throttle, QueueThrottle::LOW); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDispatchFlagsWithThrottleSetToMediumWhenFlushTaskIsCalledThenThrottleIsSetInBatchBuffer) { typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.throttle = QueueThrottle::MEDIUM; mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers(); auto cmdBuffer = cmdBufferList.peekHead(); EXPECT_EQ(cmdBuffer->batchBuffer.throttle, QueueThrottle::MEDIUM); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCommandQueueWithThrottleHintWhenFlushingThenPassThrottleHintToCsr) { MockContext context(pClDevice); cl_queue_properties properties[] = {CL_QUEUE_THROTTLE_KHR, CL_QUEUE_THROTTLE_LOW_KHR, 0}; CommandQueueHw commandQueue(&context, pClDevice, properties, false); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); cl_int retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create(&context, 0, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; uint32_t outPtr; commandQueue.enqueueReadBuffer(buffer.get(), CL_TRUE, 0, 1, &outPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(QueueThrottle::LOW, mockCsr->passedDispatchFlags.throttle); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDispatchFlagsWithThrottleSetToHighWhenFlushTaskIsCalledThenThrottleIsSetInBatchBuffer) { typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); dispatchFlags.throttle = QueueThrottle::HIGH; mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers(); auto cmdBuffer = cmdBufferList.peekHead(); EXPECT_EQ(cmdBuffer->batchBuffer.throttle, QueueThrottle::HIGH); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenEpilogueRequiredFlagWhenTaskIsSubmittedDirectlyThenItPointsBackToCsr) { configureCSRtoNonDirtyState(); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); EXPECT_EQ(0u, commandStreamReceiver.getCmdSizeForEpilogue(dispatchFlags)); dispatchFlags.epilogueRequired = true; dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); EXPECT_EQ(MemoryConstants::cacheLineSize, commandStreamReceiver.getCmdSizeForEpilogue(dispatchFlags)); auto data = commandStream.getSpace(MemoryConstants::cacheLineSize); memset(data, 0, MemoryConstants::cacheLineSize); commandStreamReceiver.storeMakeResidentAllocations = true; commandStreamReceiver.flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); auto &commandStreamReceiverStream = commandStreamReceiver.getCS(0u); EXPECT_EQ(MemoryConstants::cacheLineSize * 2, commandStream.getUsed()); EXPECT_EQ(MemoryConstants::cacheLineSize, commandStreamReceiverStream.getUsed()); parseCommands(commandStream, 0); auto itBBend = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(itBBend, cmdList.end()); auto itBatchBufferStart = find(cmdList.begin(), cmdList.end()); EXPECT_NE(itBatchBufferStart, cmdList.end()); auto batchBufferStart = genCmdCast(*itBatchBufferStart); EXPECT_EQ(batchBufferStart->getBatchBufferStartAddressGraphicsaddress472(), commandStreamReceiverStream.getGraphicsAllocation()->getGpuAddress()); parseCommands(commandStreamReceiverStream, 0); itBBend = find(cmdList.begin(), cmdList.end()); void *bbEndAddress = *itBBend; EXPECT_EQ(commandStreamReceiverStream.getCpuBase(), bbEndAddress); EXPECT_TRUE(commandStreamReceiver.isMadeResident(commandStreamReceiverStream.getGraphicsAllocation())); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDispatchFlagsWithNewSliceCountWhenFlushTaskThenNewSliceCountIsSet) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); uint64_t newSliceCount = 1; DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.sliceCount = newSliceCount; mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); auto &cmdBufferList = mockedSubmissionsAggregator->peekCommandBuffers(); auto cmdBuffer = cmdBufferList.peekHead(); EXPECT_EQ(cmdBuffer->batchBuffer.sliceCount, newSliceCount); } template class UltCommandStreamReceiverForDispatchFlags : public UltCommandStreamReceiver { using BaseClass = UltCommandStreamReceiver; public: UltCommandStreamReceiverForDispatchFlags(ExecutionEnvironment &executionEnvironment) : BaseClass(executionEnvironment, 0) {} CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap &dsh, const IndirectHeap &ioh, const IndirectHeap &ssh, uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override { savedDispatchFlags = dispatchFlags; return BaseClass::flushTask(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); } DispatchFlags savedDispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); }; HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelWhenItIsUnblockedThenDispatchFlagsAreSetCorrectly) { MockContext mockContext; auto csr = new UltCommandStreamReceiverForDispatchFlags(*pDevice->executionEnvironment); pDevice->resetCommandStreamReceiver(csr); uint32_t numGrfRequired = 666u; auto pCmdQ = std::make_unique(&mockContext, pClDevice, nullptr); auto mockProgram = std::make_unique(*pDevice->getExecutionEnvironment(), &mockContext, false, pDevice); std::unique_ptr pKernel(MockKernel::create(*pDevice, mockProgram.get(), numGrfRequired)); auto event = std::make_unique>(pCmdQ.get(), CL_COMMAND_MARKER, 0, 0); auto cmdStream = new LinearStream(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER})); IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 4096u, dsh); pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 4096u, ioh); pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, 4096u, ssh); auto blockedCommandsData = std::make_unique(cmdStream, *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); blockedCommandsData->setHeaps(dsh, ioh, ssh); std::vector surfaces; event->setCommand(std::make_unique(*pCmdQ, blockedCommandsData, surfaces, false, false, false, nullptr, pDevice->getPreemptionMode(), pKernel.get(), 1)); event->submitCommand(false); EXPECT_EQ(numGrfRequired, csr->savedDispatchFlags.numGrfRequired); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDcFlushArgumentIsTrueWhenCallingAddPipeControlThenDcFlushIsEnabled) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; std::unique_ptr buffer(new uint8_t[128]); LinearStream commandStream(buffer.get(), 128); auto pipeControl = MemorySynchronizationCommands::addPipeControl(commandStream, true); EXPECT_TRUE(pipeControl->getDcFlushEnable()); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDcFlushArgumentIsFalseWhenCallingAddPipeControlThenDcFlushIsEnabledOnlyOnGen8) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; std::unique_ptr buffer(new uint8_t[128]); LinearStream commandStream(buffer.get(), 128); auto pipeControl = MemorySynchronizationCommands::addPipeControl(commandStream, false); const bool expectedDcFlush = ::renderCoreFamily == IGFX_GEN8_CORE; EXPECT_EQ(expectedDcFlush, pipeControl->getDcFlushEnable()); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, whenPerDssBackBufferIsAllocatedItIsClearedInCleanupResources) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); ASSERT_NE(nullptr, pDevice); commandStreamReceiver.createPerDssBackedBuffer(*pDevice); EXPECT_NE(nullptr, commandStreamReceiver.perDssBackedBuffer); commandStreamReceiver.cleanupResources(); EXPECT_EQ(nullptr, commandStreamReceiver.perDssBackedBuffer); } HWTEST_F(CommandStreamReceiverFlushTaskTests, whenPerDssBackBufferProgrammingEnabledThenAllocationIsCreated) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.usePerDssBackedBuffer = true; commandStreamReceiver.flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(1u, commandStreamReceiver.createPerDssBackedBufferCalled); EXPECT_NE(nullptr, commandStreamReceiver.perDssBackedBuffer); } HWTEST_F(CommandStreamReceiverFlushTaskTests, whenPerDssBackBufferProgrammingEnabledAndPerDssBackedBufferAlreadyPresentThenNewAllocationIsNotCreated) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto memoryManager = pDevice->getMemoryManager(); commandStreamReceiver.perDssBackedBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.usePerDssBackedBuffer = true; commandStreamReceiver.flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(0u, commandStreamReceiver.createPerDssBackedBufferCalled); } template class MockCsrWithFailingFlush : public CommandStreamReceiverHw { public: using CommandStreamReceiverHw::latestSentTaskCount; using CommandStreamReceiverHw::submissionAggregator; MockCsrWithFailingFlush(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) : CommandStreamReceiverHw(executionEnvironment, rootDeviceIndex) { this->dispatchMode = DispatchMode::BatchedDispatch; this->tagAddress = &tag; } bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override { return false; } uint32_t tag = 0; }; HWTEST_F(CommandStreamReceiverFlushTaskTests, givenWaitForCompletionWithTimeoutIsCalledWhenFlushBatchedSubmissionsReturnsFailureThenItIsPropagated) { MockCsrWithFailingFlush mockCsr(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); MockOsContext osContext(0, 8, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); mockCsr.setupContext(osContext); mockCsr.latestSentTaskCount = 0; auto cmdBuffer = std::make_unique(*pDevice); mockCsr.submissionAggregator->recordCommandBuffer(cmdBuffer.release()); EXPECT_FALSE(mockCsr.waitForCompletionWithTimeout(false, 0, 1)); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCommandStreamReceiverWhenFlushTaskIsCalledThenInitializePageTableManagerRegister) { auto csr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); auto csr2 = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(csr); MockGmmPageTableMngr *pageTableManager = new MockGmmPageTableMngr(); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->pageTableManager.reset(pageTableManager); EXPECT_CALL(*pageTableManager, initContextAuxTableRegister(csr, ::testing::_)).Times(1); EXPECT_CALL(*pageTableManager, initContextAuxTableRegister(csr2, ::testing::_)).Times(0); auto memoryManager = pDevice->getMemoryManager(); auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); IndirectHeap cs(graphicsAllocation); EXPECT_FALSE(csr->pageTableManagerInitialized); EXPECT_FALSE(csr2->pageTableManagerInitialized); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); csr->flushTask(cs, 0u, cs, cs, cs, 0u, dispatchFlags, *pDevice); EXPECT_TRUE(csr->pageTableManagerInitialized); EXPECT_FALSE(csr2->pageTableManagerInitialized); csr->flushTask(cs, 0u, cs, cs, cs, 0u, dispatchFlags, *pDevice); EXPECT_CALL(*pageTableManager, initContextAuxTableRegister(csr2, ::testing::_)).Times(1); pDevice->resetCommandStreamReceiver(csr2); csr2->flushTask(cs, 0u, cs, cs, cs, 0u, dispatchFlags, *pDevice); EXPECT_TRUE(csr2->pageTableManagerInitialized); memoryManager->freeGraphicsMemory(graphicsAllocation); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCommandStreamReceiverWhenInitializingPageTableManagerRegisterFailsThenPageTableManagerIsNotInitialized) { auto csr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(csr); MockGmmPageTableMngr *pageTableManager = new MockGmmPageTableMngr(); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->pageTableManager.reset(pageTableManager); EXPECT_CALL(*pageTableManager, initContextAuxTableRegister(csr, ::testing::_)).Times(2).WillRepeatedly(::testing::Return(GMM_ERROR)); auto memoryManager = pDevice->getMemoryManager(); auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); IndirectHeap cs(graphicsAllocation); EXPECT_FALSE(csr->pageTableManagerInitialized); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); csr->flushTask(cs, 0u, cs, cs, cs, 0u, dispatchFlags, *pDevice); EXPECT_FALSE(csr->pageTableManagerInitialized); csr->flushTask(cs, 0u, cs, cs, cs, 0u, dispatchFlags, *pDevice); EXPECT_FALSE(csr->pageTableManagerInitialized); memoryManager->freeGraphicsMemory(graphicsAllocation); } command_stream_receiver_flush_task_gmock_tests.cpp000066400000000000000000000357341363734646600367660ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/command_stream/scratch_space_controller.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/cache_policy.h" #include "shared/source/helpers/preamble.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/linux/debug_env_reader.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/helpers/ult_hw_helper.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/event/user_event.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/built_in_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/helpers/dispatch_flags_helper.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_os_context.h" #include "opencl/test/unit_test/mocks/mock_submissions_aggregator.h" #include "test.h" #include "gtest/gtest.h" #include "reg_configs_common.h" using namespace NEO; using ::testing::_; using ::testing::Invoke; using CommandStreamReceiverFlushTaskGmockTests = UltCommandStreamReceiverTest; HWTEST_F(CommandStreamReceiverFlushTaskGmockTests, givenCsrInBatchingModeThreeRecordedCommandBufferEnabledBatchBufferFlatteningAndPatchInfoCollectionWhenFlushBatchedSubmissionsIsCalledThenBatchBuffersAndPatchInfoAreCollected) { DebugManagerStateRestore stateRestore; DebugManager.flags.CsrDispatchMode.set(static_cast(DispatchMode::BatchedDispatch)); DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); DebugManager.flags.FlattenBatchBufferForAUBDump.set(true); typedef typename FamilyType::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); auto mockHelper = new MockFlatBatchBufferHelper(*pDevice->executionEnvironment); mockCsr->overwriteFlatBatchBufferHelper(mockHelper); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.outOfOrderExecutionAllowed = true; EXPECT_CALL(*mockHelper, setPatchInfoData(::testing::_)).Times(10); size_t removePatchInfoDataCount = 4 * UltMemorySynchronizationCommands::getExpectedPipeControlCount(pDevice->getHardwareInfo()); EXPECT_CALL(*mockHelper, removePatchInfoData(::testing::_)).Times(static_cast(removePatchInfoDataCount)); EXPECT_CALL(*mockHelper, registerCommandChunk(::testing::_)).Times(4); EXPECT_CALL(*mockHelper, registerBatchBufferStartAddress(::testing::_, ::testing::_)).Times(3); mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); auto primaryBatch = mockedSubmissionsAggregator->peekCommandBuffers().peekHead(); auto lastBatchBuffer = primaryBatch->next->next; auto bbEndLocation = primaryBatch->next->batchBufferEndLocation; auto lastBatchBufferAddress = (uint64_t)ptrOffset(lastBatchBuffer->batchBuffer.commandBufferAllocation->getGpuAddress(), lastBatchBuffer->batchBuffer.startOffset); mockCsr->flushBatchedSubmissions(); auto batchBufferStart = genCmdCast(bbEndLocation); ASSERT_NE(nullptr, batchBufferStart); EXPECT_EQ(lastBatchBufferAddress, batchBufferStart->getBatchBufferStartAddressGraphicsaddress472()); EXPECT_EQ(1, mockCsr->flushCalledCount); } HWTEST_F(CommandStreamReceiverFlushTaskGmockTests, givenMockCommandStreamerWhenAddPatchInfoCommentsForAUBDumpIsNotSetThenAddPatchInfoDataIsNotCollected) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); auto mockHelper = new MockFlatBatchBufferHelper(*pDevice->executionEnvironment); mockCsr->overwriteFlatBatchBufferHelper(mockHelper); pDevice->resetCommandStreamReceiver(mockCsr); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.throttle = QueueThrottle::MEDIUM; EXPECT_CALL(*mockHelper, setPatchInfoData(_)).Times(0); mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); } HWTEST_F(CommandStreamReceiverFlushTaskGmockTests, givenMockCommandStreamerWhenAddPatchInfoCommentsForAUBDumpIsSetThenAddPatchInfoDataIsCollected) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); auto mockHelper = new MockFlatBatchBufferHelper(*pDevice->executionEnvironment); mockCsr->overwriteFlatBatchBufferHelper(mockHelper); pDevice->resetCommandStreamReceiver(mockCsr); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); std::vector patchInfoDataVector; EXPECT_CALL(*mockHelper, setPatchInfoData(_)) .Times(4) .WillRepeatedly(Invoke([&](const PatchInfoData &data) { patchInfoDataVector.push_back(data); return true; })); mockCsr->flushTask(commandStream, 0, dsh, ioh, ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(4u, patchInfoDataVector.size()); for (auto &patchInfoData : patchInfoDataVector) { uint64_t expectedAddress = 0u; switch (patchInfoData.sourceType) { case PatchInfoAllocationType::DynamicStateHeap: expectedAddress = dsh.getGraphicsAllocation()->getGpuAddress(); break; case PatchInfoAllocationType::SurfaceStateHeap: expectedAddress = ssh.getGraphicsAllocation()->getGpuAddress(); break; case PatchInfoAllocationType::IndirectObjectHeap: expectedAddress = ioh.getGraphicsAllocation()->getGpuAddress(); break; default: expectedAddress = 0u; } EXPECT_EQ(expectedAddress, patchInfoData.sourceAllocation); EXPECT_EQ(0u, patchInfoData.sourceAllocationOffset); EXPECT_EQ(PatchInfoAllocationType::Default, patchInfoData.targetType); } } HWTEST_F(CommandStreamReceiverFlushTaskGmockTests, givenMockCsrWhenCollectStateBaseAddresPatchInfoIsCalledThenAppropriateAddressesAreTaken) { typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; std::unique_ptr> mockCsr(new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); auto mockHelper = new MockFlatBatchBufferHelper(*pDevice->executionEnvironment); mockCsr->overwriteFlatBatchBufferHelper(mockHelper); std::vector patchInfoDataVector; EXPECT_CALL(*mockHelper, setPatchInfoData(_)) .Times(4) .WillRepeatedly(Invoke([&](const PatchInfoData &data) { patchInfoDataVector.push_back(data); return true; })); EXPECT_CALL(*mockHelper, registerCommandChunk(_)) .Times(0); uint64_t baseAddress = 0xabcdef; uint64_t commandOffset = 0xa; uint64_t generalStateBase = 0xff; mockCsr->collectStateBaseAddresPatchInfo(baseAddress, commandOffset, dsh, ioh, ssh, generalStateBase); ASSERT_EQ(patchInfoDataVector.size(), 4u); PatchInfoData dshPatch = patchInfoDataVector[0]; PatchInfoData gshPatch = patchInfoDataVector[1]; PatchInfoData sshPatch = patchInfoDataVector[2]; PatchInfoData iohPatch = patchInfoDataVector[3]; for (auto &patch : patchInfoDataVector) { EXPECT_EQ(patch.targetAllocation, baseAddress); EXPECT_EQ(patch.sourceAllocationOffset, 0u); } //DSH EXPECT_EQ(dshPatch.sourceAllocation, dsh.getGraphicsAllocation()->getGpuAddress()); EXPECT_EQ(dshPatch.targetAllocationOffset, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::DYNAMICSTATEBASEADDRESS_BYTEOFFSET); //IOH EXPECT_EQ(iohPatch.sourceAllocation, ioh.getGraphicsAllocation()->getGpuAddress()); EXPECT_EQ(iohPatch.targetAllocationOffset, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::INDIRECTOBJECTBASEADDRESS_BYTEOFFSET); //SSH EXPECT_EQ(sshPatch.sourceAllocation, ssh.getGraphicsAllocation()->getGpuAddress()); EXPECT_EQ(sshPatch.targetAllocationOffset, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::SURFACESTATEBASEADDRESS_BYTEOFFSET); //GSH EXPECT_EQ(gshPatch.sourceAllocation, generalStateBase); EXPECT_EQ(gshPatch.targetAllocationOffset, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::GENERALSTATEBASEADDRESS_BYTEOFFSET); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskGmockTests, givenPatchInfoCollectionEnabledWhenScratchSpaceIsProgrammedThenPatchInfoIsCollected) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); std::unique_ptr> mockCsr(new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); mockCsr->overwriteFlatBatchBufferHelper(new MockFlatBatchBufferHelper(*pDevice->executionEnvironment)); bool stateBaseAddressDirty; bool vfeStateDirty; mockCsr->getScratchSpaceController()->setRequiredScratchSpace(nullptr, 10u, 0u, 1u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, vfeStateDirty); DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); mockCsr->requiredScratchSize = 0x200000; MockOsContext osContext(0, 8, aub_stream::ENGINE_BCS, PreemptionMode::Disabled, false, false, false); mockCsr->setupContext(osContext); mockCsr->programVFEState(commandStream, flags, 10); ASSERT_EQ(1u, mockCsr->getFlatBatchBufferHelper().getPatchInfoCollection().size()); EXPECT_EQ(mockCsr->getScratchSpaceController()->getScratchPatchAddress(), mockCsr->getFlatBatchBufferHelper().getPatchInfoCollection().at(0).sourceAllocation); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskGmockTests, givenPatchInfoCollectionDisabledWhenScratchSpaceIsProgrammedThenPatchInfoIsNotCollected) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); std::unique_ptr> mockCsr(new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); mockCsr->overwriteFlatBatchBufferHelper(new MockFlatBatchBufferHelper(*pDevice->executionEnvironment)); bool stateBaseAddressDirty; bool vfeStateDirty; mockCsr->getScratchSpaceController()->setRequiredScratchSpace(nullptr, 10u, 0u, 1u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, vfeStateDirty); DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); mockCsr->requiredScratchSize = 0x200000; MockOsContext osContext(0, 8, aub_stream::ENGINE_BCS, PreemptionMode::Disabled, false, false, false); mockCsr->setupContext(osContext); mockCsr->programVFEState(commandStream, flags, 10); EXPECT_EQ(0u, mockCsr->getFlatBatchBufferHelper().getPatchInfoCollection().size()); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskGmockTests, givenPatchInfoCollectionEnabledWhenMediaVfeStateIsProgrammedWithEmptyScratchThenPatchInfoIsNotCollected) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); std::unique_ptr> mockCsr(new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); mockCsr->overwriteFlatBatchBufferHelper(new MockFlatBatchBufferHelper(*pDevice->executionEnvironment)); DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); mockCsr->requiredScratchSize = 0x200000; MockOsContext osContext(0, 8, aub_stream::ENGINE_BCS, PreemptionMode::Disabled, false, false, false); mockCsr->setupContext(osContext); mockCsr->programVFEState(commandStream, flags, 10); EXPECT_EQ(0u, mockCsr->getFlatBatchBufferHelper().getPatchInfoCollection().size()); } command_stream_receiver_hw_tests.cpp000066400000000000000000002451031363734646600340520ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/command_stream/scratch_space_controller.h" #include "shared/source/command_stream/scratch_space_controller_base.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/cache_policy.h" #include "shared/source/helpers/preamble.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/os_interface/linux/debug_env_reader.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/event/user_event.h" #include "opencl/source/helpers/cl_blit_properties.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/mem_obj_helper.h" #include "opencl/test/unit_test/fixtures/built_in_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/helpers/dispatch_flags_helper.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/helpers/raii_hw_helper.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_hw_helper.h" #include "opencl/test/unit_test/mocks/mock_internal_allocation_storage.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_submissions_aggregator.h" #include "opencl/test/unit_test/mocks/mock_timestamp_container.h" #include "test.h" #include "reg_configs_common.h" #include using namespace NEO; HWCMDTEST_F(IGFX_GEN8_CORE, UltCommandStreamReceiverTest, givenPreambleSentAndThreadArbitrationPolicyNotChangedWhenEstimatingPreambleCmdSizeThenReturnItsValue) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.requiredThreadArbitrationPolicy = commandStreamReceiver.lastSentThreadArbitrationPolicy; auto expectedCmdSize = sizeof(typename FamilyType::PIPE_CONTROL) + sizeof(typename FamilyType::MEDIA_VFE_STATE); EXPECT_EQ(expectedCmdSize, commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice)); } HWCMDTEST_F(IGFX_GEN8_CORE, UltCommandStreamReceiverTest, givenNotSentStateSipWhenFirstTaskIsFlushedThenStateSipCmdIsAddedAndIsStateSipSentSetToTrue) { using STATE_SIP = typename FamilyType::STATE_SIP; auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); if (mockDevice->getHardwareInfo().capabilityTable.defaultPreemptionMode == PreemptionMode::MidThread) { mockDevice->setPreemptionMode(PreemptionMode::MidThread); auto &csr = mockDevice->getUltCommandStreamReceiver(); csr.isPreambleSent = true; CommandQueueHw commandQueue(nullptr, mockDevice.get(), 0, false); auto &commandStream = commandQueue.getCS(4096u); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionMode::MidThread; MockGraphicsAllocation allocation(nullptr, 0); IndirectHeap heap(&allocation); csr.flushTask(commandStream, 0, heap, heap, heap, 0, dispatchFlags, mockDevice->getDevice()); EXPECT_TRUE(csr.isStateSipSent); HardwareParse hwParser; hwParser.parseCommands(csr.getCS(0)); auto stateSipItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_NE(hwParser.cmdList.end(), stateSipItor); } } HWTEST_F(UltCommandStreamReceiverTest, givenCsrWhenProgramStateSipIsCalledThenIsStateSipCalledIsSetToTrue) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto requiredSize = PreemptionHelper::getRequiredStateSipCmdSize(*pDevice); StackVec buffer(requiredSize); LinearStream cmdStream(buffer.begin(), buffer.size()); commandStreamReceiver.programStateSip(cmdStream, *pDevice); EXPECT_TRUE(commandStreamReceiver.isStateSipSent); } HWTEST_F(UltCommandStreamReceiverTest, givenSentStateSipFlagSetWhenGetRequiredStateSipCmdSizeIsCalledThenStateSipCmdSizeIsNotIncluded) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); commandStreamReceiver.isStateSipSent = false; auto sizeWithStateSipIsNotSent = commandStreamReceiver.getRequiredCmdStreamSize(dispatchFlags, *pDevice); commandStreamReceiver.isStateSipSent = true; auto sizeWhenSipIsSent = commandStreamReceiver.getRequiredCmdStreamSize(dispatchFlags, *pDevice); auto sizeForStateSip = PreemptionHelper::getRequiredStateSipCmdSize(*pDevice); EXPECT_EQ(sizeForStateSip, sizeWithStateSipIsNotSent - sizeWhenSipIsSent); } HWTEST_F(UltCommandStreamReceiverTest, givenSentStateSipFlagSetAndSourceLevelDebuggerIsActiveWhenGetRequiredStateSipCmdSizeIsCalledThenStateSipCmdSizeIsIncluded) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); commandStreamReceiver.isStateSipSent = true; auto sizeWithoutSourceKernelDebugging = commandStreamReceiver.getRequiredCmdStreamSize(dispatchFlags, *pDevice); pDevice->setDebuggerActive(true); commandStreamReceiver.isStateSipSent = true; auto sizeWithSourceKernelDebugging = commandStreamReceiver.getRequiredCmdStreamSize(dispatchFlags, *pDevice); auto sizeForStateSip = PreemptionHelper::getRequiredStateSipCmdSize(*pDevice); EXPECT_EQ(sizeForStateSip, sizeWithSourceKernelDebugging - sizeWithoutSourceKernelDebugging - PreambleHelper::getKernelDebuggingCommandsSize(true)); pDevice->setDebuggerActive(false); } HWTEST_F(UltCommandStreamReceiverTest, givenPreambleSentAndThreadArbitrationPolicyChangedWhenEstimatingPreambleCmdSizeThenResultDependsOnPolicyProgrammingCmdSize) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.requiredThreadArbitrationPolicy = commandStreamReceiver.lastSentThreadArbitrationPolicy; auto policyNotChanged = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); commandStreamReceiver.requiredThreadArbitrationPolicy = commandStreamReceiver.lastSentThreadArbitrationPolicy + 1; auto policyChanged = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); auto actualDifference = policyChanged - policyNotChanged; auto expectedDifference = PreambleHelper::getThreadArbitrationCommandsSize(); EXPECT_EQ(expectedDifference, actualDifference); } HWTEST_F(UltCommandStreamReceiverTest, givenPreambleSentWhenEstimatingPreambleCmdSizeThenResultDependsOnPolicyProgrammingAndAdditionalCmdsSize) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.requiredThreadArbitrationPolicy = commandStreamReceiver.lastSentThreadArbitrationPolicy; commandStreamReceiver.isPreambleSent = false; auto preambleNotSent = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); commandStreamReceiver.isPreambleSent = true; auto preambleSent = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); auto actualDifference = preambleNotSent - preambleSent; auto expectedDifference = PreambleHelper::getThreadArbitrationCommandsSize() + PreambleHelper::getAdditionalCommandsSize(*pDevice); EXPECT_EQ(expectedDifference, actualDifference); } HWTEST_F(UltCommandStreamReceiverTest, givenPerDssBackBufferProgrammingEnabledWhenEstimatingPreambleCmdSizeThenResultIncludesPerDssBackBufferProgramingCommandsSize) { DebugManagerStateRestore restore; DebugManager.flags.ForcePerDssBackedBufferProgramming.set(true); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.requiredThreadArbitrationPolicy = commandStreamReceiver.lastSentThreadArbitrationPolicy; commandStreamReceiver.isPreambleSent = false; auto preambleNotSent = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); commandStreamReceiver.isPreambleSent = true; auto preambleSent = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); auto actualDifference = preambleNotSent - preambleSent; auto expectedDifference = PreambleHelper::getThreadArbitrationCommandsSize() + PreambleHelper::getAdditionalCommandsSize(*pDevice) + PreambleHelper::getPerDssBackedBufferCommandsSize(pDevice->getHardwareInfo()); EXPECT_EQ(expectedDifference, actualDifference); } HWCMDTEST_F(IGFX_GEN8_CORE, UltCommandStreamReceiverTest, givenMediaVfeStateDirtyEstimatingPreambleCmdSizeThenResultDependsVfeStateProgrammingCmdSize) { typedef typename FamilyType::MEDIA_VFE_STATE MEDIA_VFE_STATE; typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.setMediaVFEStateDirty(false); auto notDirty = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); commandStreamReceiver.setMediaVFEStateDirty(true); auto dirty = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); auto actualDifference = dirty - notDirty; auto expectedDifference = sizeof(PIPE_CONTROL) + sizeof(MEDIA_VFE_STATE); EXPECT_EQ(expectedDifference, actualDifference); } HWTEST_F(UltCommandStreamReceiverTest, givenCommandStreamReceiverInInitialStateWhenHeapsAreAskedForDirtyStatusThenTrueIsReturned) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); EXPECT_EQ(0u, commandStreamReceiver.peekTaskCount()); EXPECT_EQ(0u, commandStreamReceiver.peekTaskLevel()); EXPECT_TRUE(commandStreamReceiver.dshState.updateAndCheck(&dsh)); EXPECT_TRUE(commandStreamReceiver.iohState.updateAndCheck(&ioh)); EXPECT_TRUE(commandStreamReceiver.sshState.updateAndCheck(&ssh)); } typedef UltCommandStreamReceiverTest CommandStreamReceiverFlushTests; HWTEST_F(CommandStreamReceiverFlushTests, WhenAddingBatchBufferEndThenBatchBufferEndIsAppendedCorrectly) { auto usedPrevious = commandStream.getUsed(); CommandStreamReceiverHw::addBatchBufferEnd(commandStream, nullptr); EXPECT_EQ(commandStream.getUsed(), usedPrevious + sizeof(typename FamilyType::MI_BATCH_BUFFER_END)); auto batchBufferEnd = genCmdCast( ptrOffset(commandStream.getCpuBase(), usedPrevious)); EXPECT_NE(nullptr, batchBufferEnd); } HWTEST_F(CommandStreamReceiverFlushTests, WhenAligningCommandStreamReceiverToCacheLineSizeThenItIsAlignedCorrectly) { commandStream.getSpace(sizeof(uint32_t)); CommandStreamReceiverHw::alignToCacheLine(commandStream); EXPECT_EQ(0u, commandStream.getUsed() % MemoryConstants::cacheLineSize); } typedef Test CommandStreamReceiverHwTest; HWTEST_F(CommandStreamReceiverHwTest, givenCsrHwWhenTypeIsCheckedThenCsrHwIsReturned) { auto csr = std::unique_ptr(CommandStreamReceiverHw::create(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); EXPECT_EQ(CommandStreamReceiverType::CSR_HW, csr->getType()); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverHwTest, WhenCommandStreamReceiverHwIsCreatedThenDefaultSshSizeIs64KB) { auto &commandStreamReceiver = pDevice->getGpgpuCommandStreamReceiver(); EXPECT_EQ(64 * KB, commandStreamReceiver.defaultSshSize); } HWTEST_F(CommandStreamReceiverHwTest, WhenScratchSpaceIsNotRequiredThenScratchAllocationIsNotCreated) { auto commandStreamReceiver = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); auto scratchController = commandStreamReceiver->getScratchSpaceController(); bool stateBaseAddressDirty = false; bool cfeStateDirty = false; scratchController->setRequiredScratchSpace(reinterpret_cast(0x2000), 0u, 0u, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); EXPECT_FALSE(cfeStateDirty); EXPECT_FALSE(stateBaseAddressDirty); EXPECT_EQ(nullptr, scratchController->getScratchSpaceAllocation()); EXPECT_EQ(nullptr, scratchController->getPrivateScratchSpaceAllocation()); } HWTEST_F(CommandStreamReceiverHwTest, WhenScratchSpaceIsRequiredThenCorrectAddressIsReturned) { auto commandStreamReceiver = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); auto scratchController = commandStreamReceiver->getScratchSpaceController(); bool cfeStateDirty = false; bool stateBaseAddressDirty = false; std::unique_ptr> surfaceHeap(alignedMalloc(0x1000, 0x1000), alignedFree); scratchController->setRequiredScratchSpace(surfaceHeap.get(), 0x1000u, 0u, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); uint64_t expectedScratchAddress = 0xAAABBBCCCDDD000ull; auto scratchAllocation = scratchController->getScratchSpaceAllocation(); scratchAllocation->setCpuPtrAndGpuAddress(scratchAllocation->getUnderlyingBuffer(), expectedScratchAddress); EXPECT_TRUE(UnitTestHelper::evaluateGshAddressForScratchSpace((scratchAllocation->getGpuAddress() - MemoryConstants::pageSize), scratchController->calculateNewGSH())); } HWTEST_F(CommandStreamReceiverHwTest, WhenScratchSpaceIsNotRequiredThenGshAddressZeroIsReturned) { auto commandStreamReceiver = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); auto scratchController = commandStreamReceiver->getScratchSpaceController(); EXPECT_EQ(nullptr, scratchController->getScratchSpaceAllocation()); EXPECT_EQ(0u, scratchController->calculateNewGSH()); } HWTEST_F(CommandStreamReceiverHwTest, givenKernelExecInfothreadArbitfationPoliciesWhenCallGetThreadArbitationPolicyThenRetunProperValueEuSchedulingMode) { auto commandStreamReceiver = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); uint32_t retVal = UnitTestHelper::getAppropriateThreadArbitrationPolicy(ThreadArbitrationPolicy::RoundRobin); EXPECT_EQ(static_cast(ThreadArbitrationPolicy::RoundRobin), retVal); retVal = UnitTestHelper::getAppropriateThreadArbitrationPolicy(ThreadArbitrationPolicy::AgeBased); EXPECT_EQ(static_cast(ThreadArbitrationPolicy::AgeBased), retVal); retVal = UnitTestHelper::getAppropriateThreadArbitrationPolicy(ThreadArbitrationPolicy::RoundRobinAfterDependency); EXPECT_EQ(static_cast(ThreadArbitrationPolicy::RoundRobinAfterDependency), retVal); } struct BcsTests : public CommandStreamReceiverHwTest { void SetUp() override { CommandStreamReceiverHwTest::SetUp(); auto &csr = pDevice->getGpgpuCommandStreamReceiver(); auto engine = csr.getMemoryManager()->getRegisteredEngineForCsr(&csr); auto contextId = engine->osContext->getContextId(); delete engine->osContext; engine->osContext = OsContext::create(nullptr, contextId, 0, aub_stream::EngineType::ENGINE_BCS, PreemptionMode::Disabled, false, false, false); engine->osContext->incRefInternal(); csr.setupContext(*engine->osContext); context = std::make_unique(pClDevice); } void TearDown() override { context.reset(); CommandStreamReceiverHwTest::TearDown(); } uint32_t blitBuffer(CommandStreamReceiver *bcsCsr, const BlitProperties &blitProperties, bool blocking) { BlitPropertiesContainer container; container.push_back(blitProperties); return bcsCsr->blitBuffer(container, blocking); } TimestampPacketContainer timestampPacketContainer; CsrDependencies csrDependencies; std::unique_ptr context; }; HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeThenAddAllRequiredCommands) { constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; size_t notAlignedBltSize = (3 * max2DBlitSize) + 1; size_t alignedBltSize = (3 * max2DBlitSize); uint32_t alignedNumberOfBlts = 3; uint32_t notAlignedNumberOfBlts = 4; auto expectedAlignedSize = sizeof(typename FamilyType::XY_COPY_BLT) * alignedNumberOfBlts; auto expectedNotAlignedSize = sizeof(typename FamilyType::XY_COPY_BLT) * notAlignedNumberOfBlts; auto alignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize({alignedBltSize, 1, 1}, csrDependencies, false); auto notAlignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize({notAlignedBltSize, 1, 1}, csrDependencies, false); EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize); } HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeForReadBufferRectThenAddAllRequiredCommands) { constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; Vec3 notAlignedBltSize = {(3 * max2DBlitSize) + 1, 4, 2}; Vec3 alignedBltSize = {(3 * max2DBlitSize), 4, 2}; size_t alignedNumberOfBlts = 3 * alignedBltSize.y * alignedBltSize.z; size_t notAlignedNumberOfBlts = 4 * notAlignedBltSize.y * notAlignedBltSize.z; auto expectedAlignedSize = sizeof(typename FamilyType::XY_COPY_BLT) * alignedNumberOfBlts; auto expectedNotAlignedSize = sizeof(typename FamilyType::XY_COPY_BLT) * notAlignedNumberOfBlts; auto alignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize(alignedBltSize, csrDependencies, false); auto notAlignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize(notAlignedBltSize, csrDependencies, false); EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize); } HWTEST_F(BcsTests, whenAskingForCmdSizeForMiFlushDwWithMemoryWriteThenReturnCorrectValue) { size_t waSize = EncodeMiFlushDW::getMiFlushDwWaSize(); size_t totalSize = EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); constexpr size_t miFlushDwSize = sizeof(typename FamilyType::MI_FLUSH_DW); size_t additionalSize = UnitTestHelper::additionalMiFlushDwRequired ? miFlushDwSize : 0; EXPECT_EQ(additionalSize, waSize); EXPECT_EQ(miFlushDwSize + additionalSize, totalSize); } HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenExstimatingCommandsSizeThenCalculateForAllAttachedProperites) { const auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; const uint32_t numberOfBlts = 3; const size_t bltSize = (3 * max2DBlitSize); const uint32_t numberOfBlitOperations = 4; auto baseSize = EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() + sizeof(typename FamilyType::MI_BATCH_BUFFER_END); auto expectedBlitInstructionsSize = sizeof(typename FamilyType::XY_COPY_BLT) * numberOfBlts; auto expectedAlignedSize = baseSize + MemorySynchronizationCommands::getSizeForAdditonalSynchronization(pDevice->getHardwareInfo()); BlitPropertiesContainer blitPropertiesContainer; for (uint32_t i = 0; i < numberOfBlitOperations; i++) { BlitProperties blitProperties; blitProperties.copySize = {bltSize, 1, 1}; blitPropertiesContainer.push_back(blitProperties); expectedAlignedSize += expectedBlitInstructionsSize; } expectedAlignedSize = alignUp(expectedAlignedSize, MemoryConstants::cacheLineSize); auto alignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize(blitPropertiesContainer, pDevice->getHardwareInfo()); EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); } HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenExstimatingCommandsSizeForReadBufferRectThenCalculateForAllAttachedProperites) { const auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; const Vec3 bltSize = {(3 * max2DBlitSize), 4, 2}; const size_t numberOfBlts = 3 * bltSize.y * bltSize.z; const size_t numberOfBlitOperations = 4 * bltSize.y * bltSize.z; auto baseSize = EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() + sizeof(typename FamilyType::MI_BATCH_BUFFER_END); auto expectedBlitInstructionsSize = sizeof(typename FamilyType::XY_COPY_BLT) * numberOfBlts; auto expectedAlignedSize = baseSize + MemorySynchronizationCommands::getSizeForAdditonalSynchronization(pDevice->getHardwareInfo()); BlitPropertiesContainer blitPropertiesContainer; for (uint32_t i = 0; i < numberOfBlitOperations; i++) { BlitProperties blitProperties; blitProperties.copySize = bltSize; blitPropertiesContainer.push_back(blitProperties); expectedAlignedSize += expectedBlitInstructionsSize; } expectedAlignedSize = alignUp(expectedAlignedSize, MemoryConstants::cacheLineSize); auto alignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize(blitPropertiesContainer, pDevice->getHardwareInfo()); EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); } HWTEST_F(BcsTests, givenTimestampPacketWriteRequestWhenEstimatingSizeForCommandsThenAddMiFlushDw) { size_t expectedBaseSize = sizeof(typename FamilyType::XY_COPY_BLT); auto expectedSizeWithTimestampPacketWrite = expectedBaseSize + EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); auto expectedSizeWithoutTimestampPacketWrite = expectedBaseSize; auto estimatedSizeWithTimestampPacketWrite = BlitCommandsHelper::estimateBlitCommandsSize({1, 1, 1}, csrDependencies, true); auto estimatedSizeWithoutTimestampPacketWrite = BlitCommandsHelper::estimateBlitCommandsSize({1, 1, 1}, csrDependencies, false); EXPECT_EQ(expectedSizeWithTimestampPacketWrite, estimatedSizeWithTimestampPacketWrite); EXPECT_EQ(expectedSizeWithoutTimestampPacketWrite, estimatedSizeWithoutTimestampPacketWrite); } HWTEST_F(BcsTests, givenBltSizeAndCsrDependenciesWhenEstimatingCommandSizeThenAddAllRequiredCommands) { uint32_t numberOfBlts = 1; size_t numberNodesPerContainer = 5; auto &csr = pDevice->getUltCommandStreamReceiver(); MockTimestampPacketContainer timestamp0(*csr.getTimestampPacketAllocator(), numberNodesPerContainer); MockTimestampPacketContainer timestamp1(*csr.getTimestampPacketAllocator(), numberNodesPerContainer); csrDependencies.push_back(×tamp0); csrDependencies.push_back(×tamp1); size_t expectedSize = (sizeof(typename FamilyType::XY_COPY_BLT) * numberOfBlts) + TimestampPacketHelper::getRequiredCmdStreamSize(csrDependencies); auto estimatedSize = BlitCommandsHelper::estimateBlitCommandsSize({1, 1, 1}, csrDependencies, false); EXPECT_EQ(expectedSize, estimatedSize); } HWTEST_F(BcsTests, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredCommands) { using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; auto &csr = pDevice->getUltCommandStreamReceiver(); static_cast(csr.getMemoryManager())->turnOnFakingBigAllocations(); uint32_t bltLeftover = 17; size_t bltSize = (2 * max2DBlitSize) + bltLeftover; uint32_t numberOfBlts = 3; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, static_cast(bltSize), nullptr, retVal)); void *hostPtr = reinterpret_cast(0x12340000); uint32_t newTaskCount = 19; csr.taskCount = newTaskCount - 1; EXPECT_EQ(0u, csr.recursiveLockCounter.load()); auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, buffer->getGraphicsAllocation(), nullptr, hostPtr, buffer->getGraphicsAllocation()->getGpuAddress(), 0, 0, 0, {bltSize, 1, 1}, 0, 0, 0, 0); blitBuffer(&csr, blitProperties, true); EXPECT_EQ(newTaskCount, csr.taskCount); EXPECT_EQ(newTaskCount, csr.latestFlushedTaskCount); EXPECT_EQ(newTaskCount, csr.latestSentTaskCount); EXPECT_EQ(newTaskCount, csr.latestSentTaskCountValueDuringFlush); EXPECT_EQ(1u, csr.recursiveLockCounter.load()); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream); auto &cmdList = hwParser.cmdList; auto cmdIterator = cmdList.begin(); for (uint32_t i = 0; i < numberOfBlts; i++) { auto bltCmd = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, bltCmd); uint32_t expectedWidth = static_cast(BlitterConstants::maxBlitWidth); uint32_t expectedHeight = static_cast(BlitterConstants::maxBlitHeight); if (i == (numberOfBlts - 1)) { expectedWidth = bltLeftover; expectedHeight = 1; } EXPECT_EQ(expectedWidth, bltCmd->getTransferWidth()); EXPECT_EQ(expectedHeight, bltCmd->getTransferHeight()); EXPECT_EQ(expectedWidth, bltCmd->getDestinationPitch()); EXPECT_EQ(expectedWidth, bltCmd->getSourcePitch()); } if (UnitTestHelper::isSynchronizationWArequired(pDevice->getHardwareInfo())) { auto miSemaphoreWaitCmd = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, miSemaphoreWaitCmd); EXPECT_TRUE(UnitTestHelper::isAdditionalMiSemaphoreWait(*miSemaphoreWaitCmd)); } auto miFlushCmd = genCmdCast(*(cmdIterator++)); if (UnitTestHelper::additionalMiFlushDwRequired) { uint64_t gpuAddress = 0x0; uint64_t immData = 0; EXPECT_NE(nullptr, miFlushCmd); EXPECT_EQ(MI_FLUSH_DW::POST_SYNC_OPERATION_NO_WRITE, miFlushCmd->getPostSyncOperation()); EXPECT_EQ(gpuAddress, miFlushCmd->getDestinationAddress()); EXPECT_EQ(immData, miFlushCmd->getImmediateData()); miFlushCmd = genCmdCast(*(cmdIterator++)); } EXPECT_NE(cmdIterator, cmdList.end()); EXPECT_EQ(MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD, miFlushCmd->getPostSyncOperation()); EXPECT_EQ(csr.getTagAllocation()->getGpuAddress(), miFlushCmd->getDestinationAddress()); EXPECT_EQ(newTaskCount, miFlushCmd->getImmediateData()); if (UnitTestHelper::isSynchronizationWArequired(pDevice->getHardwareInfo())) { auto miSemaphoreWaitCmd = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, miSemaphoreWaitCmd); EXPECT_TRUE(UnitTestHelper::isAdditionalMiSemaphoreWait(*miSemaphoreWaitCmd)); } EXPECT_NE(nullptr, genCmdCast(*(cmdIterator++))); // padding while (cmdIterator != cmdList.end()) { EXPECT_NE(nullptr, genCmdCast(*(cmdIterator++))); } } struct BcsTestParam { Vec3 copySize; Vec3 hostPtrOffset; Vec3 copyOffset; size_t dstRowPitch; size_t dstSlicePitch; size_t srcRowPitch; size_t srcSlicePitch; } BlitterProperties[] = { {{(2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17, 1, 1}, {0, 1, 1}, {BlitterConstants::maxBlitWidth, 1, 1}, (2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17, (2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17, (2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17, (2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17}, {{(2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17, 2, 1}, {BlitterConstants::maxBlitWidth, 2, 2}, {BlitterConstants::maxBlitWidth, 1, 1}, 0, ((2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17) * 2, 0, ((2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17) * 2}, {{(2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17, 1, 3}, {BlitterConstants::maxBlitWidth, 2, 2}, {BlitterConstants::maxBlitWidth, 1, 1}, 0, ((2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17) * 2, 0, ((2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17) * 2}, {{(2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17, 4, 2}, {0, 0, 0}, {0, 0, 0}, (2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17, ((2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17) * 4, (2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17, ((2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17) * 4}, {{(2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17, 3, 2}, {BlitterConstants::maxBlitWidth, 2, 2}, {BlitterConstants::maxBlitWidth, 1, 1}, ((2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17) + 2, (((2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17) * 3) + 2, ((2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17) + 2, (((2 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 17) * 3) + 2}}; template struct BcsDetaliedTests : public BcsTests, public ::testing::WithParamInterface { void SetUp() override { BcsTests::SetUp(); } void TearDown() override { BcsTests::TearDown(); } }; using BcsDetaliedTestsWithParams = BcsDetaliedTests; HWTEST_P(BcsDetaliedTestsWithParams, givenBltSizeWithLeftoverWhenDispatchedThenProgramAddresseForReadBufferRect) { auto &csr = pDevice->getUltCommandStreamReceiver(); static_cast(csr.getMemoryManager())->turnOnFakingBigAllocations(); uint32_t bltLeftover = 17; Vec3 bltSize = GetParam().copySize; size_t numberOfBltsForSingleBltSizeProgramm = 3; size_t totalNumberOfBits = numberOfBltsForSingleBltSizeProgramm * bltSize.y * bltSize.z; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, static_cast(8 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight), nullptr, retVal)); void *hostPtr = reinterpret_cast(0x12340000); Vec3 hostPtrOffset = GetParam().hostPtrOffset; Vec3 copyOffset = GetParam().copyOffset; size_t dstRowPitch = GetParam().dstRowPitch; size_t dstSlicePitch = GetParam().dstSlicePitch; size_t srcRowPitch = GetParam().srcRowPitch; size_t srcSlicePitch = GetParam().srcSlicePitch; auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::BufferToHostPtr, //blitDirection csr, buffer->getGraphicsAllocation(), //commandStreamReceiver nullptr, //memObjAllocation hostPtr, //preallocatedHostAllocation buffer->getGraphicsAllocation()->getGpuAddress(), //memObjGpuVa 0, //hostAllocGpuVa hostPtrOffset, //hostPtrOffset copyOffset, //copyOffset bltSize, //copySize dstRowPitch, //hostRowPitch dstSlicePitch, //hostSlicePitch srcRowPitch, //gpuRowPitch srcSlicePitch //gpuSlicePitch ); blitBuffer(&csr, blitProperties, true); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream); auto &cmdList = hwParser.cmdList; auto cmdIterator = cmdList.begin(); uint64_t offset = 0; for (uint32_t i = 0; i < totalNumberOfBits; i++) { auto bltCmd = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, bltCmd); uint32_t expectedWidth = static_cast(BlitterConstants::maxBlitWidth); uint32_t expectedHeight = static_cast(BlitterConstants::maxBlitHeight); if (i % numberOfBltsForSingleBltSizeProgramm == numberOfBltsForSingleBltSizeProgramm - 1) { expectedWidth = bltLeftover; expectedHeight = 1; } if (i % numberOfBltsForSingleBltSizeProgramm == 0) { offset = 0; } auto rowIndex = (i / numberOfBltsForSingleBltSizeProgramm) % blitProperties.copySize.y; auto sliceIndex = i / (numberOfBltsForSingleBltSizeProgramm * blitProperties.copySize.y); auto expectedDstAddr = blitProperties.dstGpuAddress + blitProperties.dstOffset.x + offset + blitProperties.dstOffset.y * blitProperties.dstRowPitch + blitProperties.dstOffset.z * blitProperties.dstSlicePitch + rowIndex * blitProperties.dstRowPitch + sliceIndex * blitProperties.dstSlicePitch; auto expectedSrcAddr = blitProperties.srcGpuAddress + blitProperties.srcOffset.x + offset + blitProperties.srcOffset.y * blitProperties.srcRowPitch + blitProperties.srcOffset.z * blitProperties.srcSlicePitch + rowIndex * blitProperties.srcRowPitch + sliceIndex * blitProperties.srcSlicePitch; auto dstAddr = NEO::BlitCommandsHelper::calculateBlitCommandDestinationBaseAddress(blitProperties, offset, rowIndex, sliceIndex); auto srcAddr = NEO::BlitCommandsHelper::calculateBlitCommandSourceBaseAddress(blitProperties, offset, rowIndex, sliceIndex); EXPECT_EQ(dstAddr, expectedDstAddr); EXPECT_EQ(srcAddr, expectedSrcAddr); offset += (expectedWidth * expectedHeight); } } HWTEST_P(BcsDetaliedTestsWithParams, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredCommandsForReadBufferRect) { auto &csr = pDevice->getUltCommandStreamReceiver(); static_cast(csr.getMemoryManager())->turnOnFakingBigAllocations(); uint32_t bltLeftover = 17; Vec3 bltSize = GetParam().copySize; size_t numberOfBltsForSingleBltSizeProgramm = 3; size_t totalNumberOfBits = numberOfBltsForSingleBltSizeProgramm * bltSize.y * bltSize.z; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, static_cast(8 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight), nullptr, retVal)); void *hostPtr = reinterpret_cast(0x12340000); Vec3 hostPtrOffset = GetParam().hostPtrOffset; Vec3 copyOffset = GetParam().copyOffset; size_t dstRowPitch = GetParam().dstRowPitch; size_t dstSlicePitch = GetParam().dstSlicePitch; size_t srcRowPitch = GetParam().srcRowPitch; size_t srcSlicePitch = GetParam().srcSlicePitch; auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::BufferToHostPtr, //blitDirection csr, buffer->getGraphicsAllocation(), //commandStreamReceiver nullptr, //memObjAllocation hostPtr, //preallocatedHostAllocation buffer->getGraphicsAllocation()->getGpuAddress(), //memObjGpuVa 0, //hostAllocGpuVa hostPtrOffset, //hostPtrOffset copyOffset, //copyOffset bltSize, //copySize dstRowPitch, //hostRowPitch dstSlicePitch, //hostSlicePitch srcRowPitch, //gpuRowPitch srcSlicePitch //gpuSlicePitch ); blitBuffer(&csr, blitProperties, true); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream); auto &cmdList = hwParser.cmdList; auto cmdIterator = cmdList.begin(); uint64_t offset = 0; for (uint32_t i = 0; i < totalNumberOfBits; i++) { auto bltCmd = genCmdCast(*(cmdIterator++)); EXPECT_NE(nullptr, bltCmd); uint32_t expectedWidth = static_cast(BlitterConstants::maxBlitWidth); uint32_t expectedHeight = static_cast(BlitterConstants::maxBlitHeight); if (i % numberOfBltsForSingleBltSizeProgramm == numberOfBltsForSingleBltSizeProgramm - 1) { expectedWidth = bltLeftover; expectedHeight = 1; } if (i % numberOfBltsForSingleBltSizeProgramm == 0) { offset = 0; } EXPECT_EQ(expectedWidth, bltCmd->getTransferWidth()); EXPECT_EQ(expectedHeight, bltCmd->getTransferHeight()); EXPECT_EQ(expectedWidth, bltCmd->getDestinationPitch()); EXPECT_EQ(expectedWidth, bltCmd->getSourcePitch()); auto rowIndex = (i / numberOfBltsForSingleBltSizeProgramm) % blitProperties.copySize.y; auto sliceIndex = i / (numberOfBltsForSingleBltSizeProgramm * blitProperties.copySize.y); auto dstAddr = NEO::BlitCommandsHelper::calculateBlitCommandDestinationBaseAddress(blitProperties, offset, rowIndex, sliceIndex); auto srcAddr = NEO::BlitCommandsHelper::calculateBlitCommandSourceBaseAddress(blitProperties, offset, rowIndex, sliceIndex); EXPECT_EQ(dstAddr, bltCmd->getDestinationBaseAddress()); EXPECT_EQ(srcAddr, bltCmd->getSourceBaseAddress()); offset += (expectedWidth * expectedHeight); } } INSTANTIATE_TEST_CASE_P(BcsDetaliedTest, BcsDetaliedTestsWithParams, ::testing::ValuesIn(BlitterProperties)); HWTEST_F(BcsTests, givenCsrDependenciesWhenProgrammingCommandStreamThenAddSemaphoreAndAtomic) { auto &csr = pDevice->getUltCommandStreamReceiver(); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); void *hostPtr = reinterpret_cast(0x12340000); uint32_t numberOfDependencyContainers = 2; size_t numberNodesPerContainer = 5; auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, buffer->getGraphicsAllocation(), nullptr, hostPtr, buffer->getGraphicsAllocation()->getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); MockTimestampPacketContainer timestamp0(*csr.getTimestampPacketAllocator(), numberNodesPerContainer); MockTimestampPacketContainer timestamp1(*csr.getTimestampPacketAllocator(), numberNodesPerContainer); blitProperties.csrDependencies.push_back(×tamp0); blitProperties.csrDependencies.push_back(×tamp1); blitBuffer(&csr, blitProperties, true); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream); auto &cmdList = hwParser.cmdList; bool xyCopyBltCmdFound = false; bool dependenciesFound = false; for (auto cmdIterator = cmdList.begin(); cmdIterator != cmdList.end(); cmdIterator++) { if (genCmdCast(*cmdIterator)) { xyCopyBltCmdFound = true; continue; } auto miSemaphore = genCmdCast(*cmdIterator); if (miSemaphore) { if (UnitTestHelper::isAdditionalMiSemaphoreWait(*miSemaphore)) { continue; } dependenciesFound = true; EXPECT_FALSE(xyCopyBltCmdFound); auto miAtomic = genCmdCast(*(++cmdIterator)); EXPECT_NE(nullptr, miAtomic); for (uint32_t i = 1; i < numberOfDependencyContainers * numberNodesPerContainer; i++) { EXPECT_NE(nullptr, genCmdCast(*(++cmdIterator))); EXPECT_NE(nullptr, genCmdCast(*(++cmdIterator))); } } } EXPECT_TRUE(xyCopyBltCmdFound); EXPECT_TRUE(dependenciesFound); } HWTEST_F(BcsTests, givenMultipleBlitPropertiesWhenDispatchingThenProgramCommandsInCorrectOrder) { auto &csr = pDevice->getUltCommandStreamReceiver(); cl_int retVal = CL_SUCCESS; auto buffer1 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto buffer2 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); void *hostPtr1 = reinterpret_cast(0x12340000); void *hostPtr2 = reinterpret_cast(0x12340000); auto blitProperties1 = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, buffer1->getGraphicsAllocation(), nullptr, hostPtr1, buffer1->getGraphicsAllocation()->getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); auto blitProperties2 = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, buffer2->getGraphicsAllocation(), nullptr, hostPtr2, buffer2->getGraphicsAllocation()->getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); MockTimestampPacketContainer timestamp1(*csr.getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp2(*csr.getTimestampPacketAllocator(), 1); blitProperties1.csrDependencies.push_back(×tamp1); blitProperties2.csrDependencies.push_back(×tamp2); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties1); blitPropertiesContainer.push_back(blitProperties2); csr.blitBuffer(blitPropertiesContainer, true); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream); auto &cmdList = hwParser.cmdList; uint32_t xyCopyBltCmdFound = 0; uint32_t dependenciesFound = 0; for (auto cmdIterator = cmdList.begin(); cmdIterator != cmdList.end(); cmdIterator++) { if (genCmdCast(*cmdIterator)) { xyCopyBltCmdFound++; EXPECT_EQ(xyCopyBltCmdFound, dependenciesFound); continue; } auto miSemaphore = genCmdCast(*cmdIterator); if (miSemaphore) { if (UnitTestHelper::isAdditionalMiSemaphoreWait(*miSemaphore)) { continue; } dependenciesFound++; EXPECT_EQ(xyCopyBltCmdFound, dependenciesFound - 1); } } EXPECT_EQ(2u, xyCopyBltCmdFound); EXPECT_EQ(2u, dependenciesFound); } HWTEST_F(BcsTests, givenInputAllocationsWhenBlitDispatchedThenMakeAllAllocationsResident) { auto &csr = pDevice->getUltCommandStreamReceiver(); csr.storeMakeResidentAllocations = true; cl_int retVal = CL_SUCCESS; auto buffer1 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto buffer2 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); void *hostPtr1 = reinterpret_cast(0x12340000); void *hostPtr2 = reinterpret_cast(0x43210000); EXPECT_EQ(0u, csr.makeSurfacePackNonResidentCalled); auto blitProperties1 = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, buffer1->getGraphicsAllocation(), nullptr, hostPtr1, buffer1->getGraphicsAllocation()->getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); auto blitProperties2 = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, buffer2->getGraphicsAllocation(), nullptr, hostPtr2, buffer2->getGraphicsAllocation()->getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties1); blitPropertiesContainer.push_back(blitProperties2); csr.blitBuffer(blitPropertiesContainer, false); EXPECT_TRUE(csr.isMadeResident(buffer1->getGraphicsAllocation())); EXPECT_TRUE(csr.isMadeResident(buffer2->getGraphicsAllocation())); EXPECT_TRUE(csr.isMadeResident(csr.getTagAllocation())); EXPECT_EQ(1u, csr.makeSurfacePackNonResidentCalled); EXPECT_EQ(5u, csr.makeResidentAllocations.size()); } HWTEST_F(BcsTests, givenFenceAllocationIsRequiredWhenBlitDispatchedThenMakeAllAllocationsResident) { RAIIHwHelperFactory> hwHelperBackup{pDevice->getHardwareInfo().platform.eRenderCoreFamily}; auto bcsOsContext = std::unique_ptr(OsContext::create(nullptr, 0, 0, aub_stream::ENGINE_BCS, PreemptionMode::Disabled, false, false, false)); auto bcsCsr = std::make_unique>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex()); bcsCsr->setupContext(*bcsOsContext); bcsCsr->initializeTagAllocation(); bcsCsr->createGlobalFenceAllocation(); bcsCsr->storeMakeResidentAllocations = true; cl_int retVal = CL_SUCCESS; auto buffer1 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto buffer2 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); void *hostPtr1 = reinterpret_cast(0x12340000); void *hostPtr2 = reinterpret_cast(0x43210000); EXPECT_EQ(0u, bcsCsr->makeSurfacePackNonResidentCalled); auto blitProperties1 = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, *bcsCsr, buffer1->getGraphicsAllocation(), nullptr, hostPtr1, buffer1->getGraphicsAllocation()->getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); auto blitProperties2 = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, *bcsCsr, buffer2->getGraphicsAllocation(), nullptr, hostPtr2, buffer2->getGraphicsAllocation()->getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties1); blitPropertiesContainer.push_back(blitProperties2); bcsCsr->blitBuffer(blitPropertiesContainer, false); EXPECT_TRUE(bcsCsr->isMadeResident(buffer1->getGraphicsAllocation())); EXPECT_TRUE(bcsCsr->isMadeResident(buffer2->getGraphicsAllocation())); EXPECT_TRUE(bcsCsr->isMadeResident(bcsCsr->getTagAllocation())); EXPECT_TRUE(bcsCsr->isMadeResident(bcsCsr->globalFenceAllocation)); EXPECT_EQ(1u, bcsCsr->makeSurfacePackNonResidentCalled); EXPECT_EQ(6u, bcsCsr->makeResidentAllocations.size()); } HWTEST_F(BcsTests, givenBufferWhenBlitCalledThenFlushCommandBuffer) { auto &csr = pDevice->getUltCommandStreamReceiver(); csr.recordFlusheBatchBuffer = true; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); void *hostPtr = reinterpret_cast(0x12340000); auto &commandStream = csr.getCS(MemoryConstants::pageSize); size_t commandStreamOffset = 4; commandStream.getSpace(commandStreamOffset); uint32_t newTaskCount = 17; csr.taskCount = newTaskCount - 1; auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, buffer->getGraphicsAllocation(), nullptr, hostPtr, buffer->getGraphicsAllocation()->getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); blitBuffer(&csr, blitProperties, true); EXPECT_EQ(commandStream.getGraphicsAllocation(), csr.latestFlushedBatchBuffer.commandBufferAllocation); EXPECT_EQ(commandStreamOffset, csr.latestFlushedBatchBuffer.startOffset); EXPECT_EQ(0u, csr.latestFlushedBatchBuffer.chainedBatchBufferStartOffset); EXPECT_EQ(nullptr, csr.latestFlushedBatchBuffer.chainedBatchBuffer); EXPECT_FALSE(csr.latestFlushedBatchBuffer.requiresCoherency); EXPECT_FALSE(csr.latestFlushedBatchBuffer.low_priority); EXPECT_EQ(QueueThrottle::MEDIUM, csr.latestFlushedBatchBuffer.throttle); EXPECT_EQ(commandStream.getUsed(), csr.latestFlushedBatchBuffer.usedSize); EXPECT_EQ(&commandStream, csr.latestFlushedBatchBuffer.stream); EXPECT_EQ(newTaskCount, csr.latestWaitForCompletionWithTimeoutTaskCount.load()); } HWTEST_F(BcsTests, whenBlitFromHostPtrCalledThenCallWaitWithKmdFallback) { class MyMockCsr : public UltCommandStreamReceiver { public: using UltCommandStreamReceiver::UltCommandStreamReceiver; void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override { waitForTaskCountWithKmdNotifyFallbackCalled++; taskCountToWaitPassed = taskCountToWait; flushStampToWaitPassed = flushStampToWait; useQuickKmdSleepPassed = useQuickKmdSleep; forcePowerSavingModePassed = forcePowerSavingMode; } uint32_t taskCountToWaitPassed = 0; FlushStamp flushStampToWaitPassed = 0; bool useQuickKmdSleepPassed = false; bool forcePowerSavingModePassed = false; uint32_t waitForTaskCountWithKmdNotifyFallbackCalled = 0; }; auto myMockCsr = std::make_unique<::testing::NiceMock>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex()); auto &bcsOsContext = pDevice->getUltCommandStreamReceiver().getOsContext(); myMockCsr->initializeTagAllocation(); myMockCsr->setupContext(bcsOsContext); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); void *hostPtr = reinterpret_cast(0x12340000); auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, *myMockCsr, buffer->getGraphicsAllocation(), nullptr, hostPtr, buffer->getGraphicsAllocation()->getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); blitBuffer(myMockCsr.get(), blitProperties, false); EXPECT_EQ(0u, myMockCsr->waitForTaskCountWithKmdNotifyFallbackCalled); blitBuffer(myMockCsr.get(), blitProperties, true); EXPECT_EQ(1u, myMockCsr->waitForTaskCountWithKmdNotifyFallbackCalled); EXPECT_EQ(myMockCsr->taskCount, myMockCsr->taskCountToWaitPassed); EXPECT_EQ(myMockCsr->flushStamp->peekStamp(), myMockCsr->flushStampToWaitPassed); EXPECT_FALSE(myMockCsr->useQuickKmdSleepPassed); EXPECT_FALSE(myMockCsr->forcePowerSavingModePassed); } HWTEST_F(BcsTests, whenBlitFromHostPtrCalledThenCleanTemporaryAllocations) { auto &bcsCsr = pDevice->getUltCommandStreamReceiver(); auto mockInternalAllocationsStorage = new MockInternalAllocationStorage(bcsCsr); bcsCsr.internalAllocationStorage.reset(mockInternalAllocationsStorage); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); void *hostPtr = reinterpret_cast(0x12340000); bcsCsr.taskCount = 17; EXPECT_EQ(0u, mockInternalAllocationsStorage->cleanAllocationsCalled); auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, bcsCsr, buffer->getGraphicsAllocation(), nullptr, hostPtr, buffer->getGraphicsAllocation()->getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); blitBuffer(&bcsCsr, blitProperties, false); EXPECT_EQ(0u, mockInternalAllocationsStorage->cleanAllocationsCalled); blitBuffer(&bcsCsr, blitProperties, true); EXPECT_EQ(1u, mockInternalAllocationsStorage->cleanAllocationsCalled); EXPECT_EQ(bcsCsr.taskCount, mockInternalAllocationsStorage->lastCleanAllocationsTaskCount); EXPECT_TRUE(TEMPORARY_ALLOCATION == mockInternalAllocationsStorage->lastCleanAllocationUsage); } HWTEST_F(BcsTests, givenBufferWhenBlitOperationCalledThenProgramCorrectGpuAddresses) { auto &csr = pDevice->getUltCommandStreamReceiver(); cl_int retVal = CL_SUCCESS; auto buffer1 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 100, nullptr, retVal)); auto buffer2 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 100, nullptr, retVal)); void *hostPtr = reinterpret_cast(0x12340000); const size_t hostPtrOffset = 0x1234; const size_t subBuffer1Offset = 0x23; cl_buffer_region subBufferRegion1 = {subBuffer1Offset, 1}; auto subBuffer1 = clUniquePtr(buffer1->createSubBuffer(CL_MEM_READ_WRITE, 0, &subBufferRegion1, retVal)); { // from hostPtr HardwareParse hwParser; auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, subBuffer1->getGraphicsAllocation(), nullptr, hostPtr, subBuffer1->getGraphicsAllocation()->getGpuAddress() + subBuffer1->getOffset(), 0, {hostPtrOffset, 0, 0}, 0, {1, 1, 1}, 0, 0, 0, 0); blitBuffer(&csr, blitProperties, true); hwParser.parseCommands(csr.commandStream); auto bltCmd = genCmdCast(*hwParser.cmdList.begin()); EXPECT_NE(nullptr, bltCmd); if (pDevice->isFullRangeSvm()) { EXPECT_EQ(reinterpret_cast(ptrOffset(hostPtr, hostPtrOffset)), bltCmd->getSourceBaseAddress()); } EXPECT_EQ(subBuffer1->getGraphicsAllocation()->getGpuAddress() + subBuffer1Offset, bltCmd->getDestinationBaseAddress()); } { // to hostPtr HardwareParse hwParser; auto offset = csr.commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::BufferToHostPtr, csr, subBuffer1->getGraphicsAllocation(), nullptr, hostPtr, subBuffer1->getGraphicsAllocation()->getGpuAddress() + subBuffer1->getOffset(), 0, {hostPtrOffset, 0, 0}, 0, {1, 1, 1}, 0, 0, 0, 0); blitBuffer(&csr, blitProperties, true); hwParser.parseCommands(csr.commandStream, offset); auto bltCmd = genCmdCast(*hwParser.cmdList.begin()); EXPECT_NE(nullptr, bltCmd); if (pDevice->isFullRangeSvm()) { EXPECT_EQ(reinterpret_cast(ptrOffset(hostPtr, hostPtrOffset)), bltCmd->getDestinationBaseAddress()); } EXPECT_EQ(subBuffer1->getGraphicsAllocation()->getGpuAddress() + subBuffer1Offset, bltCmd->getSourceBaseAddress()); } { // Buffer to Buffer HardwareParse hwParser; auto offset = csr.commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForCopyBuffer(buffer1->getGraphicsAllocation(), buffer2->getGraphicsAllocation(), 0, 0, 1); blitBuffer(&csr, blitProperties, true); hwParser.parseCommands(csr.commandStream, offset); auto bltCmd = genCmdCast(*hwParser.cmdList.begin()); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(buffer1->getGraphicsAllocation()->getGpuAddress(), bltCmd->getDestinationBaseAddress()); EXPECT_EQ(buffer2->getGraphicsAllocation()->getGpuAddress(), bltCmd->getSourceBaseAddress()); } { // Buffer to Buffer - with object offset const size_t subBuffer2Offset = 0x20; cl_buffer_region subBufferRegion2 = {subBuffer2Offset, 1}; auto subBuffer2 = clUniquePtr(buffer2->createSubBuffer(CL_MEM_READ_WRITE, 0, &subBufferRegion2, retVal)); BuiltinOpParams builtinOpParams = {}; builtinOpParams.dstMemObj = subBuffer2.get(); builtinOpParams.srcMemObj = subBuffer1.get(); builtinOpParams.size.x = 1; auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::BufferToBuffer, csr, builtinOpParams); auto offset = csr.commandStream.getUsed(); blitBuffer(&csr, blitProperties, true); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream, offset); auto bltCmd = genCmdCast(*hwParser.cmdList.begin()); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(buffer2->getGraphicsAllocation()->getGpuAddress() + subBuffer2Offset, bltCmd->getDestinationBaseAddress()); EXPECT_EQ(buffer1->getGraphicsAllocation()->getGpuAddress() + subBuffer1Offset, bltCmd->getSourceBaseAddress()); } } HWTEST_F(BcsTests, givenMapAllocationWhenDispatchReadWriteOperationThenSetValidGpuAddress) { auto &csr = pDevice->getUltCommandStreamReceiver(); auto memoryManager = csr.getMemoryManager(); AllocationProperties properties{csr.getRootDeviceIndex(), false, 1234, GraphicsAllocation::AllocationType::MAP_ALLOCATION, false}; GraphicsAllocation *mapAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, reinterpret_cast(0x12340000)); auto mapAllocationOffset = 0x1234; auto mapPtr = reinterpret_cast(mapAllocation->getGpuAddress() + mapAllocationOffset); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 100, nullptr, retVal)); const size_t hostPtrOffset = 0x1234; { // from hostPtr HardwareParse hwParser; auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, buffer->getGraphicsAllocation(), mapAllocation, mapPtr, buffer->getGraphicsAllocation()->getGpuAddress(), castToUint64(mapPtr), {hostPtrOffset, 0, 0}, 0, {1, 1, 1}, 0, 0, 0, 0); blitBuffer(&csr, blitProperties, true); hwParser.parseCommands(csr.commandStream); auto bltCmd = genCmdCast(*hwParser.cmdList.begin()); EXPECT_NE(nullptr, bltCmd); if (pDevice->isFullRangeSvm()) { EXPECT_EQ(reinterpret_cast(ptrOffset(mapPtr, hostPtrOffset)), bltCmd->getSourceBaseAddress()); } EXPECT_EQ(buffer->getGraphicsAllocation()->getGpuAddress(), bltCmd->getDestinationBaseAddress()); } { // to hostPtr HardwareParse hwParser; auto offset = csr.commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::BufferToHostPtr, csr, buffer->getGraphicsAllocation(), mapAllocation, mapPtr, buffer->getGraphicsAllocation()->getGpuAddress(), castToUint64(mapPtr), {hostPtrOffset, 0, 0}, 0, {1, 1, 1}, 0, 0, 0, 0); blitBuffer(&csr, blitProperties, true); hwParser.parseCommands(csr.commandStream, offset); auto bltCmd = genCmdCast(*hwParser.cmdList.begin()); EXPECT_NE(nullptr, bltCmd); if (pDevice->isFullRangeSvm()) { EXPECT_EQ(reinterpret_cast(ptrOffset(mapPtr, hostPtrOffset)), bltCmd->getDestinationBaseAddress()); } EXPECT_EQ(buffer->getGraphicsAllocation()->getGpuAddress(), bltCmd->getSourceBaseAddress()); } { // bufferRect to hostPtr HardwareParse hwParser; auto offset = csr.commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::BufferToHostPtr, csr, buffer->getGraphicsAllocation(), mapAllocation, mapPtr, buffer->getGraphicsAllocation()->getGpuAddress(), castToUint64(mapPtr), {hostPtrOffset, 0, 0}, 0, {4, 2, 1}, 0, 0, 0, 0); blitBuffer(&csr, blitProperties, true); hwParser.parseCommands(csr.commandStream, offset); auto bltCmd = genCmdCast(*hwParser.cmdList.begin()); EXPECT_NE(nullptr, bltCmd); if (pDevice->isFullRangeSvm()) { EXPECT_EQ(reinterpret_cast(ptrOffset(mapPtr, hostPtrOffset)), bltCmd->getDestinationBaseAddress()); } EXPECT_EQ(buffer->getGraphicsAllocation()->getGpuAddress(), bltCmd->getSourceBaseAddress()); } memoryManager->freeGraphicsMemory(mapAllocation); } HWTEST_F(BcsTests, givenMapAllocationInBuiltinOpParamsWhenConstructingThenUseItAsSourceOrDstAllocation) { auto &csr = pDevice->getUltCommandStreamReceiver(); auto memoryManager = csr.getMemoryManager(); AllocationProperties properties{csr.getRootDeviceIndex(), false, 1234, GraphicsAllocation::AllocationType::MAP_ALLOCATION, false}; GraphicsAllocation *mapAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, reinterpret_cast(0x12340000)); auto mapAllocationOffset = 0x1234; auto mapPtr = reinterpret_cast(mapAllocation->getGpuAddress() + mapAllocationOffset); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 100, nullptr, retVal)); { // from hostPtr BuiltinOpParams builtinOpParams = {}; builtinOpParams.dstMemObj = buffer.get(); builtinOpParams.srcPtr = mapPtr; builtinOpParams.size = {1, 1, 1}; builtinOpParams.transferAllocation = mapAllocation; auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, builtinOpParams); EXPECT_EQ(mapAllocation, blitProperties.srcAllocation); } { // to hostPtr BuiltinOpParams builtinOpParams = {}; builtinOpParams.srcMemObj = buffer.get(); builtinOpParams.dstPtr = mapPtr; builtinOpParams.size = {1, 1, 1}; builtinOpParams.transferAllocation = mapAllocation; auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::BufferToHostPtr, csr, builtinOpParams); EXPECT_EQ(mapAllocation, blitProperties.dstAllocation); } memoryManager->freeGraphicsMemory(mapAllocation); } HWTEST_F(BcsTests, givenNonZeroCopySvmAllocationWhenConstructingBlitPropertiesForReadWriteBufferCallThenSetValidAllocations) { auto &csr = pDevice->getUltCommandStreamReceiver(); MockMemoryManager mockMemoryManager(true, true); SVMAllocsManager svmAllocsManager(&mockMemoryManager); auto svmAllocationProperties = MemObjHelper::getSvmAllocationProperties(CL_MEM_READ_WRITE); auto svmAlloc = svmAllocsManager.createSVMAlloc(csr.getRootDeviceIndex(), 1, svmAllocationProperties); auto svmData = svmAllocsManager.getSVMAlloc(svmAlloc); EXPECT_NE(nullptr, svmData->gpuAllocation); EXPECT_NE(nullptr, svmData->cpuAllocation); EXPECT_NE(svmData->gpuAllocation, svmData->cpuAllocation); { // from hostPtr BuiltinOpParams builtinOpParams = {}; builtinOpParams.dstSvmAlloc = svmData->gpuAllocation; builtinOpParams.srcSvmAlloc = svmData->cpuAllocation; builtinOpParams.srcPtr = reinterpret_cast(svmData->cpuAllocation->getGpuAddress()); builtinOpParams.size = {1, 1, 1}; auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, builtinOpParams); EXPECT_EQ(svmData->cpuAllocation, blitProperties.srcAllocation); EXPECT_EQ(svmData->gpuAllocation, blitProperties.dstAllocation); } { // to hostPtr BuiltinOpParams builtinOpParams = {}; builtinOpParams.srcSvmAlloc = svmData->gpuAllocation; builtinOpParams.dstSvmAlloc = svmData->cpuAllocation; builtinOpParams.dstPtr = reinterpret_cast(svmData->cpuAllocation->getGpuAddress()); builtinOpParams.size = {1, 1, 1}; auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::BufferToHostPtr, csr, builtinOpParams); EXPECT_EQ(svmData->cpuAllocation, blitProperties.dstAllocation); EXPECT_EQ(svmData->gpuAllocation, blitProperties.srcAllocation); } svmAllocsManager.freeSVMAlloc(svmAlloc); } HWTEST_F(BcsTests, givenSvmAllocationWhenBlitCalledThenUsePassedPointers) { auto &csr = pDevice->getUltCommandStreamReceiver(); MockMemoryManager mockMemoryManager(true, true); SVMAllocsManager svmAllocsManager(&mockMemoryManager); auto svmAllocationProperties = MemObjHelper::getSvmAllocationProperties(CL_MEM_READ_WRITE); auto svmAlloc = svmAllocsManager.createSVMAlloc(csr.getRootDeviceIndex(), 1, svmAllocationProperties); auto svmData = svmAllocsManager.getSVMAlloc(svmAlloc); EXPECT_NE(nullptr, svmData->gpuAllocation); EXPECT_NE(nullptr, svmData->cpuAllocation); EXPECT_NE(svmData->gpuAllocation, svmData->cpuAllocation); uint64_t srcOffset = 2; uint64_t dstOffset = 3; { // from hostPtr BuiltinOpParams builtinOpParams = {}; builtinOpParams.dstSvmAlloc = svmData->cpuAllocation; builtinOpParams.srcSvmAlloc = svmData->gpuAllocation; builtinOpParams.srcPtr = reinterpret_cast(svmData->cpuAllocation->getGpuAddress() + srcOffset); builtinOpParams.dstPtr = reinterpret_cast(svmData->cpuAllocation->getGpuAddress() + dstOffset); builtinOpParams.size = {1, 1, 1}; auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, builtinOpParams); EXPECT_EQ(svmData->gpuAllocation, blitProperties.srcAllocation); EXPECT_EQ(svmData->cpuAllocation, blitProperties.dstAllocation); blitBuffer(&csr, blitProperties, true); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream, 0); auto bltCmd = genCmdCast(*hwParser.cmdList.begin()); EXPECT_EQ(castToUint64(builtinOpParams.dstPtr), bltCmd->getDestinationBaseAddress()); EXPECT_EQ(castToUint64(builtinOpParams.srcPtr), bltCmd->getSourceBaseAddress()); } { // to hostPtr BuiltinOpParams builtinOpParams = {}; builtinOpParams.srcSvmAlloc = svmData->gpuAllocation; builtinOpParams.dstSvmAlloc = svmData->cpuAllocation; builtinOpParams.dstPtr = reinterpret_cast(svmData->cpuAllocation + dstOffset); builtinOpParams.srcPtr = reinterpret_cast(svmData->gpuAllocation + srcOffset); builtinOpParams.size = {1, 1, 1}; auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::BufferToHostPtr, csr, builtinOpParams); auto offset = csr.commandStream.getUsed(); blitBuffer(&csr, blitProperties, true); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream, offset); auto bltCmd = genCmdCast(*hwParser.cmdList.begin()); EXPECT_EQ(castToUint64(builtinOpParams.dstPtr), bltCmd->getDestinationBaseAddress()); EXPECT_EQ(castToUint64(builtinOpParams.srcPtr), bltCmd->getSourceBaseAddress()); } svmAllocsManager.freeSVMAlloc(svmAlloc); } HWTEST_F(BcsTests, givenBufferWithOffsetWhenBlitOperationCalledThenProgramCorrectGpuAddresses) { auto &csr = pDevice->getUltCommandStreamReceiver(); cl_int retVal = CL_SUCCESS; auto buffer1 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto buffer2 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); void *hostPtr = reinterpret_cast(0x12340000); size_t addressOffsets[] = {0, 1, 1234}; for (auto buffer1Offset : addressOffsets) { { // from hostPtr HardwareParse hwParser; auto offset = csr.commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, buffer1->getGraphicsAllocation(), nullptr, hostPtr, buffer1->getGraphicsAllocation()->getGpuAddress(), 0, 0, {buffer1Offset, 0, 0}, {1, 1, 1}, 0, 0, 0, 0); blitBuffer(&csr, blitProperties, true); hwParser.parseCommands(csr.commandStream, offset); auto bltCmd = genCmdCast(*hwParser.cmdList.begin()); EXPECT_NE(nullptr, bltCmd); if (pDevice->isFullRangeSvm()) { EXPECT_EQ(reinterpret_cast(hostPtr), bltCmd->getSourceBaseAddress()); } EXPECT_EQ(ptrOffset(buffer1->getGraphicsAllocation()->getGpuAddress(), buffer1Offset), bltCmd->getDestinationBaseAddress()); } { // to hostPtr HardwareParse hwParser; auto offset = csr.commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::BufferToHostPtr, csr, buffer1->getGraphicsAllocation(), nullptr, hostPtr, buffer1->getGraphicsAllocation()->getGpuAddress(), 0, 0, {buffer1Offset, 0, 0}, {1, 1, 1}, 0, 0, 0, 0); blitBuffer(&csr, blitProperties, true); hwParser.parseCommands(csr.commandStream, offset); auto bltCmd = genCmdCast(*hwParser.cmdList.begin()); EXPECT_NE(nullptr, bltCmd); if (pDevice->isFullRangeSvm()) { EXPECT_EQ(reinterpret_cast(hostPtr), bltCmd->getDestinationBaseAddress()); } EXPECT_EQ(ptrOffset(buffer1->getGraphicsAllocation()->getGpuAddress(), buffer1Offset), bltCmd->getSourceBaseAddress()); } for (auto buffer2Offset : addressOffsets) { // Buffer to Buffer HardwareParse hwParser; auto offset = csr.commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForCopyBuffer(buffer1->getGraphicsAllocation(), buffer2->getGraphicsAllocation(), buffer1Offset, buffer2Offset, 1); blitBuffer(&csr, blitProperties, true); hwParser.parseCommands(csr.commandStream, offset); auto bltCmd = genCmdCast(*hwParser.cmdList.begin()); EXPECT_NE(nullptr, bltCmd); EXPECT_EQ(ptrOffset(buffer1->getGraphicsAllocation()->getGpuAddress(), buffer1Offset), bltCmd->getDestinationBaseAddress()); EXPECT_EQ(ptrOffset(buffer2->getGraphicsAllocation()->getGpuAddress(), buffer2Offset), bltCmd->getSourceBaseAddress()); } } } HWTEST_F(BcsTests, givenAuxTranslationRequestWhenBlitCalledThenProgramCommandCorrectly) { auto &csr = pDevice->getUltCommandStreamReceiver(); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 123, nullptr, retVal)); auto allocationGpuAddress = buffer->getGraphicsAllocation()->getGpuAddress(); auto allocationSize = buffer->getGraphicsAllocation()->getUnderlyingBufferSize(); AuxTranslationDirection translationDirection[] = {AuxTranslationDirection::AuxToNonAux, AuxTranslationDirection::NonAuxToAux}; for (int i = 0; i < 2; i++) { auto blitProperties = BlitProperties::constructPropertiesForAuxTranslation(translationDirection[i], buffer->getGraphicsAllocation()); auto offset = csr.commandStream.getUsed(); blitBuffer(&csr, blitProperties, false); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream, offset); uint32_t xyCopyBltCmdFound = 0; for (auto &cmd : hwParser.cmdList) { if (auto bltCmd = genCmdCast(cmd)) { xyCopyBltCmdFound++; EXPECT_EQ(static_cast(allocationSize), bltCmd->getTransferWidth()); EXPECT_EQ(1u, bltCmd->getTransferHeight()); EXPECT_EQ(allocationGpuAddress, bltCmd->getDestinationBaseAddress()); EXPECT_EQ(allocationGpuAddress, bltCmd->getSourceBaseAddress()); } } EXPECT_EQ(1u, xyCopyBltCmdFound); } } HWTEST_F(BcsTests, givenInvalidBlitDirectionWhenConstructPropertiesThenExceptionIsThrow) { auto &csr = pDevice->getUltCommandStreamReceiver(); EXPECT_THROW(ClBlitProperties::constructProperties(static_cast(7), csr, {}), std::exception); } struct MockScratchSpaceController : ScratchSpaceControllerBase { using ScratchSpaceControllerBase::privateScratchAllocation; using ScratchSpaceControllerBase::ScratchSpaceControllerBase; }; using ScratchSpaceControllerTest = Test; TEST_F(ScratchSpaceControllerTest, whenScratchSpaceControllerIsDestroyedThenItReleasePrivateScratchSpaceAllocation) { MockScratchSpaceController scratchSpaceController(pDevice->getRootDeviceIndex(), *pDevice->getExecutionEnvironment(), *pDevice->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); scratchSpaceController.privateScratchAllocation = pDevice->getExecutionEnvironment()->memoryManager->allocateGraphicsMemoryInPreferredPool(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}, nullptr); EXPECT_NE(nullptr, scratchSpaceController.privateScratchAllocation); //no memory leak is expected } TEST(BcsConstantsTests, givenBlitConstantsThenTheyHaveDesiredValues) { EXPECT_EQ(BlitterConstants::maxBlitWidth, 0x7FC0u); EXPECT_EQ(BlitterConstants::maxBlitHeight, 0x3FC0u); } command_stream_receiver_hw_tests.inl000066400000000000000000000104561363734646600340530ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ using namespace NEO; template struct CommandStreamReceiverHwTest : public DeviceFixture, public HardwareParse, public ::testing::Test { void SetUp() override { DeviceFixture::SetUp(); HardwareParse::SetUp(); } void TearDown() override { HardwareParse::TearDown(); DeviceFixture::TearDown(); } void givenKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3ConfigImpl(); void givenBlockedKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3ConfigAfterUnblockingImpl(); }; template void CommandStreamReceiverHwTest::givenKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3ConfigImpl() { typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL; size_t GWS = 1; MockContext ctx(pClDevice); MockKernelWithInternals kernel(*pClDevice); CommandQueueHw commandQueue(&ctx, pClDevice, 0, false); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); auto &commandStreamCSR = commandStreamReceiver->getCS(); // Mark Preamble as sent, override L3Config to invalid to programL3 commandStreamReceiver->isPreambleSent = true; commandStreamReceiver->lastSentL3Config = 0; static_cast(kernel)->setTotalSLMSize(1024); cmdList.clear(); commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); // Parse command list to verify that PC was added to taskCS parseCommands(commandStreamCSR, 0); auto itorCmd = findMmio(cmdList.begin(), cmdList.end(), L3CNTLRegisterOffset::registerOffset); ASSERT_NE(cmdList.end(), itorCmd); auto cmdMILoad = genCmdCast(*itorCmd); ASSERT_NE(nullptr, cmdMILoad); // MI_LOAD_REGISTER should be preceded by PC EXPECT_NE(cmdList.begin(), itorCmd); --itorCmd; auto cmdPC = genCmdCast(*itorCmd); ASSERT_NE(nullptr, cmdPC); uint32_t L3Config = PreambleHelper::getL3Config(*defaultHwInfo, true); EXPECT_EQ(L3Config, static_cast(cmdMILoad->getDataDword())); } template void CommandStreamReceiverHwTest::givenBlockedKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3ConfigAfterUnblockingImpl() { typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; size_t GWS = 1; MockContext ctx(pClDevice); MockKernelWithInternals kernel(*pClDevice); CommandQueueHw commandQueue(&ctx, pClDevice, 0, false); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); cl_event blockingEvent; MockEvent mockEvent(&ctx); blockingEvent = &mockEvent; auto &commandStreamCSR = commandStreamReceiver->getCS(); uint32_t L3Config = PreambleHelper::getL3Config(*defaultHwInfo, false); // Mark Pramble as sent, override L3Config to SLM config commandStreamReceiver->isPreambleSent = true; commandStreamReceiver->lastSentL3Config = 0; static_cast(kernel)->setTotalSLMSize(1024); commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 1, &blockingEvent, nullptr); // Expect nothing was sent EXPECT_EQ(0u, commandStreamCSR.getUsed()); // Unblock Event mockEvent.setStatus(CL_COMPLETE); cmdList.clear(); // Parse command list parseCommands(commandStreamCSR, 0); // Expect L3 was programmed auto itorCmd = findMmio(cmdList.begin(), cmdList.end(), L3CNTLRegisterOffset::registerOffset); ASSERT_NE(cmdList.end(), itorCmd); auto cmdMILoad = genCmdCast(*itorCmd); ASSERT_NE(nullptr, cmdMILoad); L3Config = PreambleHelper::getL3Config(*defaultHwInfo, true); EXPECT_EQ(L3Config, static_cast(cmdMILoad->getDataDword())); } compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/command_stream_receiver_tests.cpp000066400000000000000000001233771363734646600334430ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/command_stream/scratch_space_controller.h" #include "shared/source/gmm_helper/page_table_mngr.h" #include "shared/source/helpers/cache_policy.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/gen_common/matchers.h" #include "opencl/test/unit_test/helpers/raii_hw_helper.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_builtins.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "opencl/test/unit_test/mocks/mock_hw_helper.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" #include "command_stream_receiver_simulated_hw.h" #include "gmock/gmock.h" using namespace NEO; struct CommandStreamReceiverTest : public DeviceFixture, public ::testing::Test { void SetUp() override { DeviceFixture::SetUp(); commandStreamReceiver = &pDevice->getGpgpuCommandStreamReceiver(); ASSERT_NE(nullptr, commandStreamReceiver); memoryManager = commandStreamReceiver->getMemoryManager(); internalAllocationStorage = commandStreamReceiver->getInternalAllocationStorage(); } void TearDown() override { DeviceFixture::TearDown(); } CommandStreamReceiver *commandStreamReceiver; MemoryManager *memoryManager; InternalAllocationStorage *internalAllocationStorage; }; HWTEST_F(CommandStreamReceiverTest, WhenCreatingCsrThenDefaultValuesAreSet) { auto &csr = pDevice->getUltCommandStreamReceiver(); EXPECT_EQ(0u, csr.peekTaskLevel()); EXPECT_EQ(0u, csr.peekTaskCount()); EXPECT_FALSE(csr.isPreambleSent); } HWTEST_F(CommandStreamReceiverTest, WhenCreatingCsrThenFlagsAreSetCorrectly) { auto &csr = pDevice->getUltCommandStreamReceiver(); csr.initProgrammingFlags(); EXPECT_FALSE(csr.isPreambleSent); EXPECT_FALSE(csr.GSBAFor32BitProgrammed); EXPECT_TRUE(csr.mediaVfeStateDirty); EXPECT_FALSE(csr.lastVmeSubslicesConfig); EXPECT_EQ(0u, csr.lastSentL3Config); EXPECT_EQ(-1, csr.lastSentCoherencyRequest); EXPECT_EQ(-1, csr.lastMediaSamplerConfig); EXPECT_EQ(PreemptionMode::Initial, csr.lastPreemptionMode); EXPECT_EQ(0u, csr.latestSentStatelessMocsConfig); } TEST_F(CommandStreamReceiverTest, WhenMakingResidentThenBufferResidencyFlagIsSet) { MockContext context; float srcMemory[] = {1.0f}; auto retVal = CL_INVALID_VALUE; auto buffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, sizeof(srcMemory), srcMemory, retVal); ASSERT_NE(nullptr, buffer); EXPECT_FALSE(buffer->getGraphicsAllocation()->isResident(commandStreamReceiver->getOsContext().getContextId())); commandStreamReceiver->makeResident(*buffer->getGraphicsAllocation()); EXPECT_TRUE(buffer->getGraphicsAllocation()->isResident(commandStreamReceiver->getOsContext().getContextId())); delete buffer; } TEST_F(CommandStreamReceiverTest, givenBaseDownloadAllocationCalledThenDoesNotChangeAnything) { auto *memoryManager = commandStreamReceiver->getMemoryManager(); GraphicsAllocation *graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{commandStreamReceiver->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, graphicsAllocation); auto numEvictionAllocsBefore = commandStreamReceiver->getEvictionAllocations().size(); commandStreamReceiver->CommandStreamReceiver::downloadAllocation(*graphicsAllocation); auto numEvictionAllocsAfter = commandStreamReceiver->getEvictionAllocations().size(); EXPECT_EQ(numEvictionAllocsBefore, numEvictionAllocsAfter); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(CommandStreamReceiverTest, WhenCommandStreamReceiverIsCreatedThenItHasATagValue) { EXPECT_NE(nullptr, const_cast(commandStreamReceiver->getTagAddress())); } TEST_F(CommandStreamReceiverTest, WhenGettingCommandStreamerThenValidPointerIsReturned) { auto &cs = commandStreamReceiver->getCS(); EXPECT_NE(nullptr, &cs); } TEST_F(CommandStreamReceiverTest, WhenCommandStreamReceiverIsCreatedThenAvailableMemoryIsGreaterOrEqualRequiredSize) { size_t requiredSize = 16384; const auto &commandStream = commandStreamReceiver->getCS(requiredSize); ASSERT_NE(nullptr, &commandStream); EXPECT_GE(commandStream.getAvailableSpace(), requiredSize); } TEST_F(CommandStreamReceiverTest, WhenCommandStreamReceiverIsCreatedThenCsOverfetchSizeIsIncludedInGraphicsAllocation) { size_t sizeRequested = 560; const auto &commandStream = commandStreamReceiver->getCS(sizeRequested); ASSERT_NE(nullptr, &commandStream); auto *allocation = commandStream.getGraphicsAllocation(); ASSERT_NE(nullptr, allocation); size_t expectedTotalSize = alignUp(sizeRequested + MemoryConstants::cacheLineSize + CSRequirements::csOverfetchSize, MemoryConstants::pageSize64k); EXPECT_LT(commandStream.getAvailableSpace(), expectedTotalSize); EXPECT_LE(commandStream.getAvailableSpace(), expectedTotalSize - CSRequirements::csOverfetchSize); EXPECT_EQ(expectedTotalSize, allocation->getUnderlyingBufferSize()); } TEST_F(CommandStreamReceiverTest, WhenRequestingAdditionalSpaceThenCsrGetsAdditionalSpace) { auto &commandStreamInitial = commandStreamReceiver->getCS(); size_t requiredSize = commandStreamInitial.getMaxAvailableSpace() + 42; const auto &commandStream = commandStreamReceiver->getCS(requiredSize); ASSERT_NE(nullptr, &commandStream); EXPECT_GE(commandStream.getMaxAvailableSpace(), requiredSize); } TEST_F(CommandStreamReceiverTest, givenCommandStreamReceiverWhenGetCSIsCalledThenCommandStreamAllocationTypeShouldBeSetToLinearStream) { const auto &commandStream = commandStreamReceiver->getCS(); auto commandStreamAllocation = commandStream.getGraphicsAllocation(); ASSERT_NE(nullptr, commandStreamAllocation); EXPECT_EQ(GraphicsAllocation::AllocationType::COMMAND_BUFFER, commandStreamAllocation->getAllocationType()); } HWTEST_F(CommandStreamReceiverTest, whenStoreAllocationThenStoredAllocationHasTaskCountFromCsr) { auto &csr = pDevice->getUltCommandStreamReceiver(); auto *memoryManager = csr.getMemoryManager(); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); EXPECT_FALSE(allocation->isUsed()); csr.taskCount = 2u; csr.getInternalAllocationStorage()->storeAllocation(std::unique_ptr(allocation), REUSABLE_ALLOCATION); EXPECT_EQ(csr.peekTaskCount(), allocation->getTaskCount(csr.getOsContext().getContextId())); } HWTEST_F(CommandStreamReceiverTest, givenCommandStreamReceiverWhenCheckedForInitialStatusOfStatelessMocsIndexThenUnknownMocsIsReturend) { auto &csr = pDevice->getUltCommandStreamReceiver(); EXPECT_EQ(CacheSettings::unknownMocs, csr.latestSentStatelessMocsConfig); } TEST_F(CommandStreamReceiverTest, WhenMakingResidentThenAllocationIsPushedToMemoryManagerResidencyList) { auto *memoryManager = commandStreamReceiver->getMemoryManager(); GraphicsAllocation *graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{commandStreamReceiver->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, graphicsAllocation); commandStreamReceiver->makeResident(*graphicsAllocation); auto &residencyAllocations = commandStreamReceiver->getResidencyAllocations(); ASSERT_EQ(1u, residencyAllocations.size()); EXPECT_EQ(graphicsAllocation, residencyAllocations[0]); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(CommandStreamReceiverTest, GivenNoParamatersWhenMakingResidentThenResidencyDoesNotOccur) { commandStreamReceiver->processResidency(commandStreamReceiver->getResidencyAllocations(), 0u); auto &residencyAllocations = commandStreamReceiver->getResidencyAllocations(); EXPECT_EQ(0u, residencyAllocations.size()); } TEST_F(CommandStreamReceiverTest, givenForced32BitAddressingWhenDebugSurfaceIsAllocatedThenRegularAllocationIsReturned) { auto *memoryManager = commandStreamReceiver->getMemoryManager(); memoryManager->setForce32BitAllocations(true); auto allocation = commandStreamReceiver->allocateDebugSurface(1024); EXPECT_FALSE(allocation->is32BitAllocation()); } HWTEST_F(CommandStreamReceiverTest, givenDefaultCommandStreamReceiverThenDefaultDispatchingPolicyIsImmediateSubmission) { auto &csr = pDevice->getUltCommandStreamReceiver(); EXPECT_EQ(DispatchMode::ImmediateDispatch, csr.dispatchMode); } HWTEST_F(CommandStreamReceiverTest, givenCsrWhenGetIndirectHeapIsCalledThenHeapIsReturned) { auto &csr = pDevice->getUltCommandStreamReceiver(); auto &heap = csr.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10u); EXPECT_NE(nullptr, heap.getGraphicsAllocation()); EXPECT_NE(nullptr, csr.indirectHeap[IndirectHeap::DYNAMIC_STATE]); EXPECT_EQ(&heap, csr.indirectHeap[IndirectHeap::DYNAMIC_STATE]); } HWTEST_F(CommandStreamReceiverTest, givenCsrWhenReleaseIndirectHeapIsCalledThenHeapAllocationIsNull) { auto &csr = pDevice->getUltCommandStreamReceiver(); auto &heap = csr.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 10u); csr.releaseIndirectHeap(IndirectHeap::DYNAMIC_STATE); EXPECT_EQ(nullptr, heap.getGraphicsAllocation()); EXPECT_EQ(0u, heap.getMaxAvailableSpace()); } HWTEST_F(CommandStreamReceiverTest, givenCsrWhenAllocateHeapMemoryIsCalledThenHeapMemoryIsAllocated) { auto &csr = pDevice->getUltCommandStreamReceiver(); IndirectHeap *dsh = nullptr; csr.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 4096u, dsh); EXPECT_NE(nullptr, dsh); ASSERT_NE(nullptr, dsh->getGraphicsAllocation()); csr.getMemoryManager()->freeGraphicsMemory(dsh->getGraphicsAllocation()); delete dsh; } TEST(CommandStreamReceiverSimpleTest, givenCsrWithoutTagAllocationWhenGetTagAllocationIsCalledThenNullptrIsReturned) { MockExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(1); executionEnvironment.initializeMemoryManager(); MockCommandStreamReceiver csr(executionEnvironment, 0); EXPECT_EQ(nullptr, csr.getTagAllocation()); } TEST(CommandStreamReceiverSimpleTest, givenCsrWhenSubmitiingBatchBufferThenTaskCountIsIncrementedAndLatestsValuesSetCorrectly) { MockExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(1); executionEnvironment.initializeMemoryManager(); MockCommandStreamReceiver csr(executionEnvironment, 0); GraphicsAllocation *commandBuffer = executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr.getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer residencyList; auto expectedTaskCount = csr.peekTaskCount() + 1; csr.submitBatchBuffer(batchBuffer, residencyList); EXPECT_EQ(expectedTaskCount, csr.peekTaskCount()); EXPECT_EQ(expectedTaskCount, csr.peekLatestFlushedTaskCount()); EXPECT_EQ(expectedTaskCount, csr.peekLatestSentTaskCount()); executionEnvironment.memoryManager->freeGraphicsMemoryImpl(commandBuffer); } HWTEST_F(CommandStreamReceiverTest, givenDebugVariableEnabledWhenCreatingCsrThenEnableTimestampPacketWriteMode) { DebugManagerStateRestore restore; DebugManager.flags.EnableTimestampPacket.set(true); CommandStreamReceiverHw csr1(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); EXPECT_TRUE(csr1.peekTimestampPacketWriteEnabled()); DebugManager.flags.EnableTimestampPacket.set(false); CommandStreamReceiverHw csr2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); EXPECT_FALSE(csr2.peekTimestampPacketWriteEnabled()); } HWTEST_F(CommandStreamReceiverTest, whenCsrIsCreatedThenUseTimestampPacketWriteIfPossible) { CommandStreamReceiverHw csr(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); EXPECT_EQ(UnitTestHelper::isTimestampPacketWriteSupported(), csr.peekTimestampPacketWriteEnabled()); } TEST_F(CommandStreamReceiverTest, whenGetEventTsAllocatorIsCalledItReturnsSameTagAllocator) { TagAllocator *allocator = commandStreamReceiver->getEventTsAllocator(); EXPECT_NE(nullptr, allocator); TagAllocator *allocator2 = commandStreamReceiver->getEventTsAllocator(); EXPECT_EQ(allocator2, allocator); } TEST_F(CommandStreamReceiverTest, whenGetEventPerfCountAllocatorIsCalledItReturnsSameTagAllocator) { const uint32_t gpuReportSize = 100; TagAllocator *allocator = commandStreamReceiver->getEventPerfCountAllocator(gpuReportSize); EXPECT_NE(nullptr, allocator); TagAllocator *allocator2 = commandStreamReceiver->getEventPerfCountAllocator(gpuReportSize); EXPECT_EQ(allocator2, allocator); } HWTEST_F(CommandStreamReceiverTest, givenTimestampPacketAllocatorWhenAskingForTagThenReturnValidObject) { auto &csr = pDevice->getUltCommandStreamReceiver(); EXPECT_EQ(nullptr, csr.timestampPacketAllocator.get()); TagAllocator *allocator = csr.getTimestampPacketAllocator(); EXPECT_NE(nullptr, csr.timestampPacketAllocator.get()); EXPECT_EQ(allocator, csr.timestampPacketAllocator.get()); TagAllocator *allocator2 = csr.getTimestampPacketAllocator(); EXPECT_EQ(allocator, allocator2); auto node1 = allocator->getTag(); auto node2 = allocator->getTag(); EXPECT_NE(nullptr, node1); EXPECT_NE(nullptr, node2); EXPECT_NE(node1, node2); } HWTEST_F(CommandStreamReceiverTest, givenUltCommandStreamReceiverWhenAddAubCommentIsCalledThenCallAddAubCommentOnCsr) { auto &csr = pDevice->getUltCommandStreamReceiver(); csr.addAubComment("message"); EXPECT_TRUE(csr.addAubCommentCalled); } TEST(CommandStreamReceiverSimpleTest, givenCsrWhenDownloadAllocationCalledVerifyCallOccurs) { MockExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(1); executionEnvironment.initializeMemoryManager(); MockCommandStreamReceiver csr(executionEnvironment, 0); MockGraphicsAllocation graphicsAllocation; csr.downloadAllocation(graphicsAllocation); EXPECT_TRUE(csr.downloadAllocationCalled); } HWTEST_F(CommandStreamReceiverTest, givenUltCommandStreamReceiverWhenDownloadAllocationIsCalledThenVerifyCallOccurs) { auto &csr = pDevice->getUltCommandStreamReceiver(); auto *memoryManager = commandStreamReceiver->getMemoryManager(); GraphicsAllocation *graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, graphicsAllocation); csr.downloadAllocation(*graphicsAllocation); EXPECT_TRUE(csr.downloadAllocationCalled); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST(CommandStreamReceiverSimpleTest, givenCommandStreamReceiverWhenItIsDestroyedThenItDestroysTagAllocation) { struct MockGraphicsAllocationWithDestructorTracing : public MockGraphicsAllocation { using MockGraphicsAllocation::MockGraphicsAllocation; ~MockGraphicsAllocationWithDestructorTracing() override { *destructorCalled = true; } bool *destructorCalled = nullptr; }; bool destructorCalled = false; auto mockGraphicsAllocation = new MockGraphicsAllocationWithDestructorTracing(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, 0llu, 0llu, 1u, MemoryPool::MemoryNull); mockGraphicsAllocation->destructorCalled = &destructorCalled; MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); auto csr = std::make_unique(executionEnvironment, 0); executionEnvironment.memoryManager.reset(new OsAgnosticMemoryManager(executionEnvironment)); csr->setTagAllocation(mockGraphicsAllocation); EXPECT_FALSE(destructorCalled); csr.reset(nullptr); EXPECT_TRUE(destructorCalled); } TEST(CommandStreamReceiverSimpleTest, givenCommandStreamReceiverWhenInitializeTagAllocationIsCalledThenTagAllocationIsBeingAllocated) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); auto csr = std::make_unique(executionEnvironment, 0); executionEnvironment.memoryManager.reset(new OsAgnosticMemoryManager(executionEnvironment)); EXPECT_EQ(nullptr, csr->getTagAllocation()); EXPECT_TRUE(csr->getTagAddress() == nullptr); csr->initializeTagAllocation(); EXPECT_NE(nullptr, csr->getTagAllocation()); EXPECT_EQ(GraphicsAllocation::AllocationType::TAG_BUFFER, csr->getTagAllocation()->getAllocationType()); EXPECT_TRUE(csr->getTagAddress() != nullptr); EXPECT_EQ(*csr->getTagAddress(), initialHardwareTag); } HWTEST_F(CommandStreamReceiverTest, givenCommandStreamReceiverWhenFenceAllocationIsRequiredAndCreateGlobalFenceAllocationIsCalledThenFenceAllocationIsAllocated) { RAIIHwHelperFactory> hwHelperBackup{pDevice->getHardwareInfo().platform.eRenderCoreFamily}; MockCsrHw csr(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); EXPECT_EQ(nullptr, csr.globalFenceAllocation); EXPECT_TRUE(csr.createGlobalFenceAllocation()); ASSERT_NE(nullptr, csr.globalFenceAllocation); EXPECT_EQ(GraphicsAllocation::AllocationType::GLOBAL_FENCE, csr.globalFenceAllocation->getAllocationType()); } TEST(CommandStreamReceiverSimpleTest, givenNullHardwareDebugModeWhenInitializeTagAllocationIsCalledThenTagAllocationIsBeingAllocatedAndinitialValueIsMinusOne) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableNullHardware.set(true); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); auto csr = std::make_unique(executionEnvironment, 0); executionEnvironment.memoryManager.reset(new OsAgnosticMemoryManager(executionEnvironment)); EXPECT_EQ(nullptr, csr->getTagAllocation()); EXPECT_TRUE(csr->getTagAddress() == nullptr); csr->initializeTagAllocation(); EXPECT_NE(nullptr, csr->getTagAllocation()); EXPECT_TRUE(csr->getTagAddress() != nullptr); EXPECT_EQ(*csr->getTagAddress(), static_cast(-1)); } TEST(CommandStreamReceiverSimpleTest, givenVariousDataSetsWhenVerifyingMemoryThenCorrectValueIsReturned) { MockExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(1); executionEnvironment.initializeMemoryManager(); MockCommandStreamReceiver csr(executionEnvironment, 0); constexpr size_t setSize = 6; uint8_t setA1[setSize] = {4, 3, 2, 1, 2, 10}; uint8_t setA2[setSize] = {4, 3, 2, 1, 2, 10}; uint8_t setB1[setSize] = {40, 15, 3, 11, 17, 4}; uint8_t setB2[setSize] = {40, 15, 3, 11, 17, 4}; constexpr auto compareEqual = AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual; constexpr auto compareNotEqual = AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareNotEqual; EXPECT_TRUE(csr.expectMemory(setA1, setA2, setSize, compareEqual)); EXPECT_TRUE(csr.expectMemory(setB1, setB2, setSize, compareEqual)); EXPECT_FALSE(csr.expectMemory(setA1, setA2, setSize, compareNotEqual)); EXPECT_FALSE(csr.expectMemory(setB1, setB2, setSize, compareNotEqual)); EXPECT_FALSE(csr.expectMemory(setA1, setB1, setSize, compareEqual)); EXPECT_FALSE(csr.expectMemory(setA2, setB2, setSize, compareEqual)); EXPECT_TRUE(csr.expectMemory(setA1, setB1, setSize, compareNotEqual)); EXPECT_TRUE(csr.expectMemory(setA2, setB2, setSize, compareNotEqual)); } TEST(CommandStreamReceiverMultiContextTests, givenMultipleCsrsWhenSameResourcesAreUsedThenResidencyIsProperlyHandled) { auto executionEnvironment = platform()->peekExecutionEnvironment(); std::unique_ptr device(Device::create(executionEnvironment, 0u)); auto &commandStreamReceiver0 = *device->commandStreamReceivers[0]; auto &commandStreamReceiver1 = *device->commandStreamReceivers[1]; auto csr0ContextId = commandStreamReceiver0.getOsContext().getContextId(); auto csr1ContextId = commandStreamReceiver1.getOsContext().getContextId(); MockGraphicsAllocation graphicsAllocation; commandStreamReceiver0.makeResident(graphicsAllocation); EXPECT_EQ(1u, commandStreamReceiver0.getResidencyAllocations().size()); EXPECT_EQ(0u, commandStreamReceiver1.getResidencyAllocations().size()); commandStreamReceiver1.makeResident(graphicsAllocation); EXPECT_EQ(1u, commandStreamReceiver0.getResidencyAllocations().size()); EXPECT_EQ(1u, commandStreamReceiver1.getResidencyAllocations().size()); EXPECT_EQ(1u, graphicsAllocation.getResidencyTaskCount(csr0ContextId)); EXPECT_EQ(1u, graphicsAllocation.getResidencyTaskCount(csr1ContextId)); commandStreamReceiver0.makeNonResident(graphicsAllocation); EXPECT_FALSE(graphicsAllocation.isResident(csr0ContextId)); EXPECT_TRUE(graphicsAllocation.isResident(csr1ContextId)); commandStreamReceiver1.makeNonResident(graphicsAllocation); EXPECT_FALSE(graphicsAllocation.isResident(csr0ContextId)); EXPECT_FALSE(graphicsAllocation.isResident(csr1ContextId)); EXPECT_EQ(1u, commandStreamReceiver0.getEvictionAllocations().size()); EXPECT_EQ(1u, commandStreamReceiver1.getEvictionAllocations().size()); } struct CreateAllocationForHostSurfaceTest : public ::testing::Test { void SetUp() override { executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1u); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(&hwInfo); gmockMemoryManager = new ::testing::NiceMock(*executionEnvironment); executionEnvironment->memoryManager.reset(gmockMemoryManager); device.reset(MockDevice::create(executionEnvironment, 0u)); commandStreamReceiver = &device->getGpgpuCommandStreamReceiver(); } HardwareInfo hwInfo = *defaultHwInfo; ExecutionEnvironment *executionEnvironment = nullptr; GMockMemoryManager *gmockMemoryManager = nullptr; std::unique_ptr device; CommandStreamReceiver *commandStreamReceiver = nullptr; }; TEST_F(CreateAllocationForHostSurfaceTest, givenReadOnlyHostPointerWhenAllocationForHostSurfaceWithPtrCopyAllowedIsCreatedThenCopyAllocationIsCreatedAndMemoryCopied) { const char memory[8] = {1, 2, 3, 4, 5, 6, 7, 8}; size_t size = sizeof(memory); HostPtrSurface surface(const_cast(memory), size, true); if (!gmockMemoryManager->useNonSvmHostPtrAlloc(GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR, device->getRootDeviceIndex())) { EXPECT_CALL(*gmockMemoryManager, populateOsHandles(::testing::_, device->getRootDeviceIndex())) .Times(1) .WillOnce(::testing::Return(MemoryManager::AllocationStatus::InvalidHostPointer)); } else { EXPECT_CALL(*gmockMemoryManager, allocateGraphicsMemoryForNonSvmHostPtr(::testing::_)) .Times(1) .WillOnce(::testing::Return(nullptr)); } bool result = commandStreamReceiver->createAllocationForHostSurface(surface, false); EXPECT_TRUE(result); auto allocation = surface.getAllocation(); ASSERT_NE(nullptr, allocation); EXPECT_NE(memory, allocation->getUnderlyingBuffer()); EXPECT_THAT(allocation->getUnderlyingBuffer(), MemCompare(memory, size)); allocation->updateTaskCount(commandStreamReceiver->peekLatestFlushedTaskCount(), commandStreamReceiver->getOsContext().getContextId()); } TEST_F(CreateAllocationForHostSurfaceTest, givenReadOnlyHostPointerWhenAllocationForHostSurfaceWithPtrCopyNotAllowedIsCreatedThenCopyAllocationIsNotCreated) { const char memory[8] = {1, 2, 3, 4, 5, 6, 7, 8}; size_t size = sizeof(memory); HostPtrSurface surface(const_cast(memory), size, false); if (!gmockMemoryManager->useNonSvmHostPtrAlloc(GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR, device->getRootDeviceIndex())) { EXPECT_CALL(*gmockMemoryManager, populateOsHandles(::testing::_, device->getRootDeviceIndex())) .Times(1) .WillOnce(::testing::Return(MemoryManager::AllocationStatus::InvalidHostPointer)); } else { EXPECT_CALL(*gmockMemoryManager, allocateGraphicsMemoryForNonSvmHostPtr(::testing::_)) .Times(1) .WillOnce(::testing::Return(nullptr)); } bool result = commandStreamReceiver->createAllocationForHostSurface(surface, false); EXPECT_FALSE(result); auto allocation = surface.getAllocation(); EXPECT_EQ(nullptr, allocation); } struct ReducedAddrSpaceCommandStreamReceiverTest : public CreateAllocationForHostSurfaceTest { void SetUp() override { hwInfo.capabilityTable.gpuAddressSpace = MemoryConstants::max32BitAddress; CreateAllocationForHostSurfaceTest::SetUp(); } }; TEST_F(ReducedAddrSpaceCommandStreamReceiverTest, givenReducedGpuAddressSpaceWhenAllocationForHostSurfaceIsCreatedThenAllocateGraphicsMemoryForNonSvmHostPtrIsCalled) { if (is32bit) { GTEST_SKIP(); } char memory[8] = {}; HostPtrSurface surface(memory, sizeof(memory), false); EXPECT_CALL(*gmockMemoryManager, allocateGraphicsMemoryForNonSvmHostPtr(::testing::_)) .Times(1) .WillOnce(::testing::Return(nullptr)); bool result = commandStreamReceiver->createAllocationForHostSurface(surface, false); EXPECT_FALSE(result); } TEST_F(CommandStreamReceiverTest, givenMinimumSizeDoesNotExceedCurrentWhenCallingEnsureCommandBufferAllocationThenDoNotReallocate) { GraphicsAllocation *allocation = memoryManager->allocateGraphicsMemoryWithProperties({commandStreamReceiver->getRootDeviceIndex(), 128u, GraphicsAllocation::AllocationType::COMMAND_BUFFER}); LinearStream commandStream{allocation}; commandStreamReceiver->ensureCommandBufferAllocation(commandStream, 100u, 0u); EXPECT_EQ(allocation, commandStream.getGraphicsAllocation()); EXPECT_EQ(128u, commandStream.getMaxAvailableSpace()); commandStreamReceiver->ensureCommandBufferAllocation(commandStream, 128u, 0u); EXPECT_EQ(allocation, commandStream.getGraphicsAllocation()); EXPECT_EQ(128u, commandStream.getMaxAvailableSpace()); memoryManager->freeGraphicsMemory(commandStream.getGraphicsAllocation()); } TEST_F(CommandStreamReceiverTest, givenMinimumSizeExceedsCurrentWhenCallingEnsureCommandBufferAllocationThenReallocate) { GraphicsAllocation *allocation = memoryManager->allocateGraphicsMemoryWithProperties({commandStreamReceiver->getRootDeviceIndex(), 128u, GraphicsAllocation::AllocationType::COMMAND_BUFFER}); LinearStream commandStream{allocation}; commandStreamReceiver->ensureCommandBufferAllocation(commandStream, 129u, 0u); EXPECT_NE(allocation, commandStream.getGraphicsAllocation()); memoryManager->freeGraphicsMemory(commandStream.getGraphicsAllocation()); } TEST_F(CommandStreamReceiverTest, givenMinimumSizeExceedsCurrentWhenCallingEnsureCommandBufferAllocationThenReallocateAndAlignSizeTo64kb) { GraphicsAllocation *allocation = memoryManager->allocateGraphicsMemoryWithProperties({commandStreamReceiver->getRootDeviceIndex(), 128u, GraphicsAllocation::AllocationType::COMMAND_BUFFER}); LinearStream commandStream{allocation}; commandStreamReceiver->ensureCommandBufferAllocation(commandStream, 129u, 0u); EXPECT_EQ(MemoryConstants::pageSize64k, commandStream.getGraphicsAllocation()->getUnderlyingBufferSize()); EXPECT_EQ(MemoryConstants::pageSize64k, commandStream.getMaxAvailableSpace()); commandStreamReceiver->ensureCommandBufferAllocation(commandStream, MemoryConstants::pageSize64k + 1u, 0u); EXPECT_EQ(2 * MemoryConstants::pageSize64k, commandStream.getGraphicsAllocation()->getUnderlyingBufferSize()); EXPECT_EQ(2 * MemoryConstants::pageSize64k, commandStream.getMaxAvailableSpace()); memoryManager->freeGraphicsMemory(commandStream.getGraphicsAllocation()); } TEST_F(CommandStreamReceiverTest, givenAdditionalAllocationSizeWhenCallingEnsureCommandBufferAllocationThenSizesOfAllocationAndCommandBufferAreCorrect) { GraphicsAllocation *allocation = memoryManager->allocateGraphicsMemoryWithProperties({commandStreamReceiver->getRootDeviceIndex(), 128u, GraphicsAllocation::AllocationType::COMMAND_BUFFER}); LinearStream commandStream{allocation}; commandStreamReceiver->ensureCommandBufferAllocation(commandStream, 129u, 350u); EXPECT_NE(allocation, commandStream.getGraphicsAllocation()); EXPECT_EQ(MemoryConstants::pageSize64k, commandStream.getGraphicsAllocation()->getUnderlyingBufferSize()); EXPECT_EQ(MemoryConstants::pageSize64k - 350u, commandStream.getMaxAvailableSpace()); memoryManager->freeGraphicsMemory(commandStream.getGraphicsAllocation()); } TEST_F(CommandStreamReceiverTest, givenMinimumSizeExceedsCurrentAndNoAllocationsForReuseWhenCallingEnsureCommandBufferAllocationThenAllocateMemoryFromMemoryManager) { LinearStream commandStream; EXPECT_TRUE(internalAllocationStorage->getAllocationsForReuse().peekIsEmpty()); commandStreamReceiver->ensureCommandBufferAllocation(commandStream, 1u, 0u); EXPECT_NE(nullptr, commandStream.getGraphicsAllocation()); memoryManager->freeGraphicsMemory(commandStream.getGraphicsAllocation()); } TEST_F(CommandStreamReceiverTest, givenMinimumSizeExceedsCurrentAndAllocationsForReuseWhenCallingEnsureCommandBufferAllocationThenObtainAllocationFromInternalAllocationStorage) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties({commandStreamReceiver->getRootDeviceIndex(), MemoryConstants::pageSize64k, GraphicsAllocation::AllocationType::COMMAND_BUFFER}); internalAllocationStorage->storeAllocation(std::unique_ptr{allocation}, REUSABLE_ALLOCATION); LinearStream commandStream; EXPECT_FALSE(internalAllocationStorage->getAllocationsForReuse().peekIsEmpty()); commandStreamReceiver->ensureCommandBufferAllocation(commandStream, 1u, 0u); EXPECT_EQ(allocation, commandStream.getGraphicsAllocation()); EXPECT_TRUE(internalAllocationStorage->getAllocationsForReuse().peekIsEmpty()); memoryManager->freeGraphicsMemory(commandStream.getGraphicsAllocation()); } TEST_F(CommandStreamReceiverTest, givenMinimumSizeExceedsCurrentAndNoSuitableReusableAllocationWhenCallingEnsureCommandBufferAllocationThenObtainAllocationMemoryManager) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties({commandStreamReceiver->getRootDeviceIndex(), MemoryConstants::pageSize64k, GraphicsAllocation::AllocationType::COMMAND_BUFFER}); internalAllocationStorage->storeAllocation(std::unique_ptr{allocation}, REUSABLE_ALLOCATION); LinearStream commandStream; EXPECT_FALSE(internalAllocationStorage->getAllocationsForReuse().peekIsEmpty()); commandStreamReceiver->ensureCommandBufferAllocation(commandStream, MemoryConstants::pageSize64k + 1, 0u); EXPECT_NE(allocation, commandStream.getGraphicsAllocation()); EXPECT_NE(nullptr, commandStream.getGraphicsAllocation()); EXPECT_FALSE(internalAllocationStorage->getAllocationsForReuse().peekIsEmpty()); memoryManager->freeGraphicsMemory(commandStream.getGraphicsAllocation()); } class CommandStreamReceiverWithAubSubCaptureTest : public CommandStreamReceiverTest, public ::testing::WithParamInterface> {}; HWTEST_P(CommandStreamReceiverWithAubSubCaptureTest, givenCommandStreamReceiverWhenProgramForAubSubCaptureIsCalledThenProgramCsrDependsOnAubSubCaptureStatus) { class MyMockCsr : public MockCommandStreamReceiver { public: using MockCommandStreamReceiver::MockCommandStreamReceiver; void initProgrammingFlags() override { initProgrammingFlagsCalled = true; } bool flushBatchedSubmissions() override { flushBatchedSubmissionsCalled = true; return true; } bool initProgrammingFlagsCalled = false; bool flushBatchedSubmissionsCalled = false; }; auto status = GetParam(); bool wasActiveInPreviousEnqueue = status.first; bool isActive = status.second; MockExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(1); executionEnvironment.initializeMemoryManager(); MyMockCsr mockCsr(executionEnvironment, 0); mockCsr.programForAubSubCapture(wasActiveInPreviousEnqueue, isActive); EXPECT_EQ(!wasActiveInPreviousEnqueue && isActive, mockCsr.initProgrammingFlagsCalled); EXPECT_EQ(wasActiveInPreviousEnqueue && !isActive, mockCsr.flushBatchedSubmissionsCalled); } std::pair aubSubCaptureStatus[] = { {false, false}, {false, true}, {true, false}, {true, true}, }; INSTANTIATE_TEST_CASE_P( CommandStreamReceiverWithAubSubCaptureTest_program, CommandStreamReceiverWithAubSubCaptureTest, testing::ValuesIn(aubSubCaptureStatus)); using SimulatedCommandStreamReceiverTest = ::testing::Test; template struct MockSimulatedCsrHw : public CommandStreamReceiverSimulatedHw { using CommandStreamReceiverSimulatedHw::CommandStreamReceiverSimulatedHw; using CommandStreamReceiverSimulatedHw::getDeviceIndex; void pollForCompletion() override {} bool writeMemory(GraphicsAllocation &gfxAllocation) override { return true; } void writeMemory(uint64_t gpuAddress, void *cpuAddress, size_t size, uint32_t memoryBank, uint64_t entryBits) override {} }; HWTEST_F(SimulatedCommandStreamReceiverTest, givenCsrWithOsContextWhenGetDeviceIndexThenGetHighestEnabledBitInDeviceBitfield) { MockExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(1); executionEnvironment.initializeMemoryManager(); MockSimulatedCsrHw csr(executionEnvironment, 0); auto osContext = executionEnvironment.memoryManager->createAndRegisterOsContext(&csr, aub_stream::EngineType::ENGINE_RCS, 0b11, PreemptionMode::Disabled, false, false, false); csr.setupContext(*osContext); EXPECT_EQ(1u, csr.getDeviceIndex()); } HWTEST_F(SimulatedCommandStreamReceiverTest, givenOsContextWithNoDeviceBitfieldWhenGettingDeviceIndexThenZeroIsReturned) { MockExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(1); executionEnvironment.initializeMemoryManager(); MockSimulatedCsrHw csr(executionEnvironment, 0); auto osContext = executionEnvironment.memoryManager->createAndRegisterOsContext(&csr, aub_stream::EngineType::ENGINE_RCS, 0b00, PreemptionMode::Disabled, false, false, false); csr.setupContext(*osContext); EXPECT_EQ(0u, csr.getDeviceIndex()); } using CommandStreamReceiverMultiRootDeviceTest = MultiRootDeviceFixture; TEST_F(CommandStreamReceiverMultiRootDeviceTest, WhenCreatingCommandStreamGraphicsAllocationsThenTheyHaveCorrectRootDeviceIndex) { auto commandStreamReceiver = &device->getGpgpuCommandStreamReceiver(); ASSERT_NE(nullptr, commandStreamReceiver); EXPECT_EQ(expectedRootDeviceIndex, commandStreamReceiver->getRootDeviceIndex()); // Linear stream / Command buffer GraphicsAllocation *allocation = mockMemoryManager->allocateGraphicsMemoryWithProperties({expectedRootDeviceIndex, 128u, GraphicsAllocation::AllocationType::COMMAND_BUFFER}); LinearStream commandStream{allocation}; commandStreamReceiver->ensureCommandBufferAllocation(commandStream, 100u, 0u); EXPECT_EQ(allocation, commandStream.getGraphicsAllocation()); EXPECT_EQ(128u, commandStream.getMaxAvailableSpace()); EXPECT_EQ(expectedRootDeviceIndex, commandStream.getGraphicsAllocation()->getRootDeviceIndex()); commandStreamReceiver->ensureCommandBufferAllocation(commandStream, 1024u, 0u); EXPECT_NE(allocation, commandStream.getGraphicsAllocation()); EXPECT_EQ(0u, commandStream.getMaxAvailableSpace() % MemoryConstants::pageSize64k); EXPECT_EQ(expectedRootDeviceIndex, commandStream.getGraphicsAllocation()->getRootDeviceIndex()); mockMemoryManager->freeGraphicsMemory(commandStream.getGraphicsAllocation()); // Debug surface auto debugSurface = commandStreamReceiver->allocateDebugSurface(MemoryConstants::pageSize); ASSERT_NE(nullptr, debugSurface); EXPECT_EQ(expectedRootDeviceIndex, debugSurface->getRootDeviceIndex()); // Indirect heaps IndirectHeap::Type heapTypes[]{IndirectHeap::DYNAMIC_STATE, IndirectHeap::INDIRECT_OBJECT, IndirectHeap::SURFACE_STATE}; for (auto heapType : heapTypes) { IndirectHeap *heap = nullptr; commandStreamReceiver->allocateHeapMemory(heapType, MemoryConstants::pageSize, heap); ASSERT_NE(nullptr, heap); ASSERT_NE(nullptr, heap->getGraphicsAllocation()); EXPECT_EQ(expectedRootDeviceIndex, heap->getGraphicsAllocation()->getRootDeviceIndex()); mockMemoryManager->freeGraphicsMemory(heap->getGraphicsAllocation()); delete heap; } // Tag allocation ASSERT_NE(nullptr, commandStreamReceiver->getTagAllocation()); EXPECT_EQ(expectedRootDeviceIndex, commandStreamReceiver->getTagAllocation()->getRootDeviceIndex()); // Preemption allocation if (nullptr == commandStreamReceiver->getPreemptionAllocation()) { commandStreamReceiver->createPreemptionAllocation(); } EXPECT_EQ(expectedRootDeviceIndex, commandStreamReceiver->getPreemptionAllocation()->getRootDeviceIndex()); // HostPtr surface char memory[8] = {1, 2, 3, 4, 5, 6, 7, 8}; HostPtrSurface surface(memory, sizeof(memory), true); EXPECT_TRUE(commandStreamReceiver->createAllocationForHostSurface(surface, false)); ASSERT_NE(nullptr, surface.getAllocation()); EXPECT_EQ(expectedRootDeviceIndex, surface.getAllocation()->getRootDeviceIndex()); } using CommandStreamReceiverPageTableManagerTest = ::testing::Test; TEST_F(CommandStreamReceiverPageTableManagerTest, givenNonDefaultEngineTypeWhenNeedsPageTableManagerIsCalledThenFalseIsReturned) { MockExecutionEnvironment executionEnvironment; executionEnvironment.initializeMemoryManager(); MockCommandStreamReceiver commandStreamReceiver(executionEnvironment, 0u); auto hwInfo = executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo(); auto defaultEngineType = getChosenEngineType(*hwInfo); auto engineType = aub_stream::EngineType::ENGINE_BCS; EXPECT_NE(defaultEngineType, engineType); EXPECT_EQ(nullptr, executionEnvironment.rootDeviceEnvironments[0]->pageTableManager.get()); EXPECT_FALSE(commandStreamReceiver.needsPageTableManager(engineType)); } TEST_F(CommandStreamReceiverPageTableManagerTest, givenDefaultEngineTypeAndExistingPageTableManagerWhenNeedsPageTableManagerIsCalledThenFalseIsReturned) { MockExecutionEnvironment executionEnvironment; executionEnvironment.initializeMemoryManager(); MockCommandStreamReceiver commandStreamReceiver(executionEnvironment, 0u); auto hwInfo = executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo(); auto defaultEngineType = getChosenEngineType(*hwInfo); GmmPageTableMngr *dummyPageTableManager = reinterpret_cast(0x1234); executionEnvironment.rootDeviceEnvironments[0]->pageTableManager.reset(dummyPageTableManager); EXPECT_FALSE(commandStreamReceiver.needsPageTableManager(defaultEngineType)); executionEnvironment.rootDeviceEnvironments[0]->pageTableManager.release(); } TEST_F(CommandStreamReceiverPageTableManagerTest, givenDefaultEngineTypeAndNonExisitingPageTableManagerWhenNeedsPageTableManagerIsCalledThenSupportOfPageTableManagerIsReturned) { MockExecutionEnvironment executionEnvironment; executionEnvironment.initializeMemoryManager(); MockCommandStreamReceiver commandStreamReceiver(executionEnvironment, 0u); auto hwInfo = executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo(); auto defaultEngineType = getChosenEngineType(*hwInfo); bool supportsPageTableManager = HwHelper::get(hwInfo->platform.eRenderCoreFamily).isPageTableManagerSupported(*hwInfo); EXPECT_EQ(nullptr, executionEnvironment.rootDeviceEnvironments[0]->pageTableManager.get()); EXPECT_EQ(supportsPageTableManager, commandStreamReceiver.needsPageTableManager(defaultEngineType)); } command_stream_receiver_with_aub_dump_tests.cpp000066400000000000000000000660311363734646600362640ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/preemption.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/utilities/tag_allocator.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" #include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.h" #include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.inl" #include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/fixtures/mock_aub_center_fixture.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_aub_center.h" #include "opencl/test/unit_test/mocks/mock_aub_csr.h" #include "opencl/test/unit_test/mocks/mock_aub_manager.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_os_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "test.h" using namespace NEO; struct MyMockCsr : UltCommandStreamReceiver { MyMockCsr(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) : UltCommandStreamReceiver(executionEnvironment, rootDeviceIndex) { } bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override { flushParametrization.wasCalled = true; flushParametrization.receivedBatchBuffer = &batchBuffer; flushParametrization.receivedEngine = osContext->getEngineType(); flushParametrization.receivedAllocationsForResidency = &allocationsForResidency; processResidency(allocationsForResidency, 0u); flushStamp->setStamp(flushParametrization.flushStampToReturn); return true; } void makeResident(GraphicsAllocation &gfxAllocation) override { makeResidentParameterization.wasCalled = true; makeResidentParameterization.receivedGfxAllocation = &gfxAllocation; gfxAllocation.updateResidencyTaskCount(1, osContext->getContextId()); } void processResidency(const ResidencyContainer &allocationsForResidency, uint32_t handleId) override { processResidencyParameterization.wasCalled = true; processResidencyParameterization.receivedAllocationsForResidency = &allocationsForResidency; } void makeNonResident(GraphicsAllocation &gfxAllocation) override { if (gfxAllocation.isResident(this->osContext->getContextId())) { makeNonResidentParameterization.wasCalled = true; makeNonResidentParameterization.receivedGfxAllocation = &gfxAllocation; gfxAllocation.releaseResidencyInOsContext(this->osContext->getContextId()); } } AubSubCaptureStatus checkAndActivateAubSubCapture(const MultiDispatchInfo &dispatchInfo) override { checkAndActivateAubSubCaptureParameterization.wasCalled = true; checkAndActivateAubSubCaptureParameterization.receivedDispatchInfo = &dispatchInfo; return {false, false}; } struct FlushParameterization { bool wasCalled = false; FlushStamp flushStampToReturn = 1; BatchBuffer *receivedBatchBuffer = nullptr; aub_stream::EngineType receivedEngine = aub_stream::ENGINE_RCS; ResidencyContainer *receivedAllocationsForResidency = nullptr; } flushParametrization; struct MakeResidentParameterization { bool wasCalled = false; GraphicsAllocation *receivedGfxAllocation = nullptr; } makeResidentParameterization; struct ProcessResidencyParameterization { bool wasCalled = false; const ResidencyContainer *receivedAllocationsForResidency = nullptr; } processResidencyParameterization; struct MakeNonResidentParameterization { bool wasCalled = false; GraphicsAllocation *receivedGfxAllocation = nullptr; } makeNonResidentParameterization; struct CheckAndActivateAubSubCaptureParameterization { bool wasCalled = false; const MultiDispatchInfo *receivedDispatchInfo = nullptr; } checkAndActivateAubSubCaptureParameterization; }; template struct MyMockCsrWithAubDump : CommandStreamReceiverWithAUBDump { MyMockCsrWithAubDump(bool createAubCSR, ExecutionEnvironment &executionEnvironment) : CommandStreamReceiverWithAUBDump("aubfile", executionEnvironment, 0) { this->aubCSR.reset(createAubCSR ? new MyMockCsr(executionEnvironment, 0) : nullptr); } MyMockCsr &getAubMockCsr() const { return static_cast(*this->aubCSR); } }; struct CommandStreamReceiverWithAubDumpTest : public ::testing::TestWithParam, MockAubCenterFixture { void SetUp() override { MockAubCenterFixture::SetUp(); executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); memoryManager = executionEnvironment->memoryManager.get(); ASSERT_NE(nullptr, memoryManager); createAubCSR = GetParam(); csrWithAubDump = new MyMockCsrWithAubDump(createAubCSR, *executionEnvironment); ASSERT_NE(nullptr, csrWithAubDump); auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext(csrWithAubDump, getChosenEngineType(DEFAULT_TEST_PLATFORM::hwInfo), 1, PreemptionHelper::getDefaultPreemptionMode(DEFAULT_TEST_PLATFORM::hwInfo), false, false, false); csrWithAubDump->setupContext(*osContext); } void TearDown() override { MockAubCenterFixture::TearDown(); delete csrWithAubDump; } ExecutionEnvironment *executionEnvironment; MyMockCsrWithAubDump *csrWithAubDump; MemoryManager *memoryManager; bool createAubCSR; }; using CommandStreamReceiverWithAubDumpSimpleTest = Test; HWTEST_F(CommandStreamReceiverWithAubDumpSimpleTest, givenCsrWithAubDumpWhenSettingOsContextThenReplicateItToAubCsr) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); CommandStreamReceiverWithAUBDump> csrWithAubDump("aubfile", *executionEnvironment, 0); auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(); MockOsContext osContext(0, 1, HwHelper::get(hwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*hwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*hwInfo), false, false, false); csrWithAubDump.setupContext(osContext); EXPECT_EQ(&osContext, &csrWithAubDump.getOsContext()); EXPECT_EQ(&osContext, &csrWithAubDump.aubCSR->getOsContext()); } HWTEST_F(CommandStreamReceiverWithAubDumpSimpleTest, givenAubManagerAvailableWhenTbxCsrWithAubDumpIsCreatedThenAubCsrIsNotCreated) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); std::string fileName = "file_name.aub"; MockAubManager *mockManager = new MockAubManager(); MockAubCenter *mockAubCenter = new MockAubCenter(defaultHwInfo.get(), false, fileName, CommandStreamReceiverType::CSR_TBX_WITH_AUB); mockAubCenter->aubManager = std::unique_ptr(mockManager); executionEnvironment->rootDeviceEnvironments[0]->aubCenter = std::unique_ptr(mockAubCenter); CommandStreamReceiverWithAUBDump> csrWithAubDump("aubfile", *executionEnvironment, 0); ASSERT_EQ(nullptr, csrWithAubDump.aubCSR); EXPECT_EQ(CommandStreamReceiverType::CSR_TBX_WITH_AUB, csrWithAubDump.getType()); } HWTEST_F(CommandStreamReceiverWithAubDumpSimpleTest, givenAubManagerAvailableWhenHwCsrWithAubDumpIsCreatedThenAubCsrIsCreated) { std::string fileName = "file_name.aub"; MockAubManager *mockManager = new MockAubManager(); MockAubCenter *mockAubCenter = new MockAubCenter(defaultHwInfo.get(), false, fileName, CommandStreamReceiverType::CSR_HW_WITH_AUB); mockAubCenter->aubManager = std::unique_ptr(mockManager); ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); executionEnvironment->rootDeviceEnvironments[0]->aubCenter = std::unique_ptr(mockAubCenter); CommandStreamReceiverWithAUBDump> csrWithAubDump("aubfile", *executionEnvironment, 0); ASSERT_NE(nullptr, csrWithAubDump.aubCSR); EXPECT_EQ(CommandStreamReceiverType::CSR_HW_WITH_AUB, csrWithAubDump.getType()); } HWTEST_F(CommandStreamReceiverWithAubDumpSimpleTest, givenCsrWithAubDumpWhenWaitingForTaskCountThenAddPollForCompletion) { MockAubCenter *mockAubCenter = new MockAubCenter(defaultHwInfo.get(), false, "file_name.aub", CommandStreamReceiverType::CSR_HW_WITH_AUB); mockAubCenter->aubManager = std::unique_ptr(new MockAubManager()); auto executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); executionEnvironment->rootDeviceEnvironments[0]->aubCenter = std::unique_ptr(mockAubCenter); CommandStreamReceiverWithAUBDump> csrWithAubDump("file_name.aub", *executionEnvironment, 0); csrWithAubDump.initializeTagAllocation(); auto mockAubCsr = new MockAubCsr("file_name.aub", false, *executionEnvironment, 0); mockAubCsr->initializeTagAllocation(); csrWithAubDump.aubCSR.reset(mockAubCsr); EXPECT_FALSE(mockAubCsr->pollForCompletionCalled); csrWithAubDump.waitForTaskCountWithKmdNotifyFallback(1, 0, false, false); EXPECT_TRUE(mockAubCsr->pollForCompletionCalled); csrWithAubDump.aubCSR.reset(nullptr); csrWithAubDump.waitForTaskCountWithKmdNotifyFallback(1, 0, false, false); } HWTEST_F(CommandStreamReceiverWithAubDumpSimpleTest, givenCsrWithAubDumpWhenCreatingAubCsrThenInitializeTagAllocation) { MockAubCenter *mockAubCenter = new MockAubCenter(defaultHwInfo.get(), false, "file_name.aub", CommandStreamReceiverType::CSR_HW_WITH_AUB); mockAubCenter->aubManager = std::unique_ptr(new MockAubManager()); auto executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); executionEnvironment->rootDeviceEnvironments[0]->aubCenter = std::unique_ptr(mockAubCenter); CommandStreamReceiverWithAUBDump> csrWithAubDump("file_name.aub", *executionEnvironment, 0); EXPECT_NE(nullptr, csrWithAubDump.aubCSR->getTagAllocation()); EXPECT_NE(nullptr, csrWithAubDump.aubCSR->getTagAddress()); EXPECT_EQ(std::numeric_limits::max(), *csrWithAubDump.aubCSR->getTagAddress()); } HWTEST_F(CommandStreamReceiverWithAubDumpSimpleTest, givenAubCsrWithHwWhenAddingCommentThenAddCommentToAubManager) { MockAubCenter *mockAubCenter = new MockAubCenter(defaultHwInfo.get(), false, "file_name.aub", CommandStreamReceiverType::CSR_HW_WITH_AUB); auto mockAubManager = new MockAubManager(); mockAubCenter->aubManager.reset(mockAubManager); auto executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); executionEnvironment->rootDeviceEnvironments[0]->aubCenter = std::unique_ptr(mockAubCenter); EXPECT_FALSE(mockAubManager->addCommentCalled); CommandStreamReceiverWithAUBDump> csrWithAubDump("file_name.aub", *executionEnvironment, 0); csrWithAubDump.addAubComment("test"); EXPECT_TRUE(mockAubManager->addCommentCalled); } HWTEST_F(CommandStreamReceiverWithAubDumpSimpleTest, givenAubCsrWithTbxWhenAddingCommentThenDontAddCommentToAubManager) { MockAubCenter *mockAubCenter = new MockAubCenter(defaultHwInfo.get(), false, "file_name.aub", CommandStreamReceiverType::CSR_TBX_WITH_AUB); auto mockAubManager = new MockAubManager(); mockAubCenter->aubManager.reset(mockAubManager); auto executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); executionEnvironment->rootDeviceEnvironments[0]->aubCenter = std::unique_ptr(mockAubCenter); CommandStreamReceiverWithAUBDump> csrWithAubDump("file_name.aub", *executionEnvironment, 0); csrWithAubDump.addAubComment("test"); EXPECT_FALSE(mockAubManager->addCommentCalled); } struct CommandStreamReceiverTagTests : public ::testing::Test { template using AubWithHw = CommandStreamReceiverWithAUBDump>; template using AubWithTbx = CommandStreamReceiverWithAUBDump>; template bool isTimestampPacketNodeReleasable(Args &&... args) { CsrT csr(std::forward(args)...); auto allocator = csr.getTimestampPacketAllocator(); auto tag = allocator->getTag(); for (auto &packet : tag->tagForCpuAccess->packets) { packet.contextStart = 0; packet.globalStart = 0; packet.contextEnd = 0; packet.globalEnd = 0; } EXPECT_TRUE(tag->tagForCpuAccess->isCompleted()); bool canBeReleased = tag->canBeReleased(); allocator->returnTag(tag); return canBeReleased; }; template size_t getPreferredTagPoolSize(Args &&... args) { CsrT csr(std::forward(args)...); return csr.getPreferredTagPoolSize(); }; void SetUp() override { MockAubManager *mockManager = new MockAubManager(); MockAubCenter *mockAubCenter = new MockAubCenter(defaultHwInfo.get(), false, fileName, CommandStreamReceiverType::CSR_HW_WITH_AUB); mockAubCenter->aubManager = std::unique_ptr(mockManager); executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); executionEnvironment->rootDeviceEnvironments[0]->aubCenter = std::unique_ptr(mockAubCenter); } const std::string fileName = "file_name.aub"; ExecutionEnvironment *executionEnvironment = nullptr; }; HWTEST_F(CommandStreamReceiverTagTests, givenCsrTypeWhenCreatingTimestampPacketAllocatorThenSetDefaultCompletionCheckType) { EXPECT_TRUE(isTimestampPacketNodeReleasable>(*executionEnvironment, 0)); EXPECT_FALSE(isTimestampPacketNodeReleasable>(fileName, false, *executionEnvironment, 0)); EXPECT_FALSE(isTimestampPacketNodeReleasable>(fileName, *executionEnvironment, 0)); EXPECT_FALSE(isTimestampPacketNodeReleasable>(fileName, *executionEnvironment, 0)); } HWTEST_F(CommandStreamReceiverTagTests, givenCsrTypeWhenAskingForTagPoolSizeThenReturnOneForAubTbxMode) { EXPECT_EQ(512u, getPreferredTagPoolSize>(*executionEnvironment, 0)); EXPECT_EQ(1u, getPreferredTagPoolSize>(fileName, false, *executionEnvironment, 0)); EXPECT_EQ(1u, getPreferredTagPoolSize>(fileName, *executionEnvironment, 0)); EXPECT_EQ(1u, getPreferredTagPoolSize>(fileName, *executionEnvironment, 0)); } using SimulatedCsrTest = ::testing::Test; HWTEST_F(SimulatedCsrTest, givenHwWithAubDumpCsrTypeWhenCreateCommandStreamReceiverThenProperAubCenterIsInitialized) { uint32_t expectedRootDeviceIndex = 10; MockExecutionEnvironment executionEnvironment(defaultHwInfo.get(), true, expectedRootDeviceIndex + 2); executionEnvironment.initializeMemoryManager(); auto rootDeviceEnvironment = static_cast(executionEnvironment.rootDeviceEnvironments[expectedRootDeviceIndex].get()); rootDeviceEnvironment->setHwInfo(defaultHwInfo.get()); EXPECT_EQ(nullptr, executionEnvironment.rootDeviceEnvironments[expectedRootDeviceIndex]->aubCenter.get()); EXPECT_FALSE(rootDeviceEnvironment->initAubCenterCalled); auto csr = std::make_unique>>("", executionEnvironment, expectedRootDeviceIndex); EXPECT_TRUE(rootDeviceEnvironment->initAubCenterCalled); EXPECT_NE(nullptr, rootDeviceEnvironment->aubCenter.get()); } HWTEST_F(SimulatedCsrTest, givenTbxWithAubDumpCsrTypeWhenCreateCommandStreamReceiverThenProperAubCenterIsInitialized) { uint32_t expectedRootDeviceIndex = 10; MockExecutionEnvironment executionEnvironment(defaultHwInfo.get(), true, expectedRootDeviceIndex + 2); executionEnvironment.initializeMemoryManager(); auto rootDeviceEnvironment = new MockRootDeviceEnvironment(executionEnvironment); executionEnvironment.rootDeviceEnvironments[expectedRootDeviceIndex].reset(rootDeviceEnvironment); rootDeviceEnvironment->setHwInfo(defaultHwInfo.get()); EXPECT_EQ(nullptr, executionEnvironment.rootDeviceEnvironments[expectedRootDeviceIndex]->aubCenter.get()); EXPECT_FALSE(rootDeviceEnvironment->initAubCenterCalled); auto csr = std::make_unique>>("", executionEnvironment, expectedRootDeviceIndex); EXPECT_TRUE(rootDeviceEnvironment->initAubCenterCalled); EXPECT_NE(nullptr, rootDeviceEnvironment->aubCenter.get()); } HWTEST_F(CommandStreamReceiverWithAubDumpSimpleTest, givenNullAubManagerAvailableWhenTbxCsrWithAubDumpIsCreatedThenAubCsrIsCreated) { MockAubCenter *mockAubCenter = new MockAubCenter(); ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); executionEnvironment->rootDeviceEnvironments[0]->aubCenter = std::unique_ptr(mockAubCenter); CommandStreamReceiverWithAUBDump> csrWithAubDump("aubfile", *executionEnvironment, 0); EXPECT_NE(nullptr, csrWithAubDump.aubCSR); } HWTEST_F(CommandStreamReceiverWithAubDumpSimpleTest, givenAubManagerNotAvailableWhenHwCsrWithAubDumpIsCreatedThenAubCsrIsCreated) { std::string fileName = "file_name.aub"; MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.initializeMemoryManager(); CommandStreamReceiverWithAUBDump> csrWithAubDump("aubfile", executionEnvironment, 0); ASSERT_NE(nullptr, csrWithAubDump.aubCSR); } HWTEST_P(CommandStreamReceiverWithAubDumpTest, givenCommandStreamReceiverWithAubDumpWhenCtorIsCalledThenAubCsrIsInitialized) { if (createAubCSR) { EXPECT_NE(nullptr, csrWithAubDump->aubCSR); } else { EXPECT_EQ(nullptr, csrWithAubDump->aubCSR); } } HWTEST_P(CommandStreamReceiverWithAubDumpTest, givenCommandStreamReceiverWithAubDumpWhenFlushIsCalledThenBaseCsrFlushStampIsReturned) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csrWithAubDump->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; auto engineType = csrWithAubDump->getOsContext().getEngineType(); ResidencyContainer allocationsForResidency; csrWithAubDump->flush(batchBuffer, allocationsForResidency); EXPECT_EQ(csrWithAubDump->obtainCurrentFlushStamp(), csrWithAubDump->flushParametrization.flushStampToReturn); EXPECT_TRUE(csrWithAubDump->flushParametrization.wasCalled); EXPECT_EQ(&batchBuffer, csrWithAubDump->flushParametrization.receivedBatchBuffer); EXPECT_EQ(engineType, csrWithAubDump->flushParametrization.receivedEngine); EXPECT_EQ(&allocationsForResidency, csrWithAubDump->flushParametrization.receivedAllocationsForResidency); if (createAubCSR) { EXPECT_TRUE(csrWithAubDump->getAubMockCsr().flushParametrization.wasCalled); EXPECT_EQ(&batchBuffer, csrWithAubDump->getAubMockCsr().flushParametrization.receivedBatchBuffer); EXPECT_EQ(engineType, csrWithAubDump->getAubMockCsr().flushParametrization.receivedEngine); EXPECT_EQ(&allocationsForResidency, csrWithAubDump->getAubMockCsr().flushParametrization.receivedAllocationsForResidency); } memoryManager->freeGraphicsMemoryImpl(commandBuffer); } HWTEST_P(CommandStreamReceiverWithAubDumpTest, givenCommandStreamReceiverWithAubDumpWhenMakeResidentIsCalledThenBaseCsrMakeResidentIsCalled) { auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csrWithAubDump->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, gfxAllocation); csrWithAubDump->makeResident(*gfxAllocation); EXPECT_TRUE(csrWithAubDump->makeResidentParameterization.wasCalled); EXPECT_EQ(gfxAllocation, csrWithAubDump->makeResidentParameterization.receivedGfxAllocation); if (createAubCSR) { EXPECT_FALSE(csrWithAubDump->getAubMockCsr().makeResidentParameterization.wasCalled); EXPECT_EQ(nullptr, csrWithAubDump->getAubMockCsr().makeResidentParameterization.receivedGfxAllocation); } memoryManager->freeGraphicsMemoryImpl(gfxAllocation); } HWTEST_P(CommandStreamReceiverWithAubDumpTest, givenCommandStreamReceiverWithAubDumpWhenFlushIsCalledThenBothBaseAndAubCsrProcessResidencyIsCalled) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csrWithAubDump->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csrWithAubDump->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, gfxAllocation); ResidencyContainer allocationsForResidency = {gfxAllocation}; csrWithAubDump->flush(batchBuffer, allocationsForResidency); EXPECT_EQ(csrWithAubDump->obtainCurrentFlushStamp(), csrWithAubDump->flushParametrization.flushStampToReturn); EXPECT_TRUE(csrWithAubDump->processResidencyParameterization.wasCalled); EXPECT_EQ(&allocationsForResidency, csrWithAubDump->processResidencyParameterization.receivedAllocationsForResidency); if (createAubCSR) { EXPECT_TRUE(csrWithAubDump->getAubMockCsr().processResidencyParameterization.wasCalled); EXPECT_EQ(&allocationsForResidency, csrWithAubDump->getAubMockCsr().processResidencyParameterization.receivedAllocationsForResidency); } memoryManager->freeGraphicsMemoryImpl(gfxAllocation); memoryManager->freeGraphicsMemoryImpl(commandBuffer); } HWTEST_P(CommandStreamReceiverWithAubDumpTest, givenCommandStreamReceiverWithAubDumpWhenFlushIsCalledThenLatestSentTaskCountShouldBeUpdatedForAubCsr) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csrWithAubDump->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency; EXPECT_EQ(0u, csrWithAubDump->peekLatestSentTaskCount()); if (createAubCSR) { EXPECT_EQ(0u, csrWithAubDump->getAubMockCsr().peekLatestSentTaskCount()); } csrWithAubDump->setLatestSentTaskCount(1u); csrWithAubDump->flush(batchBuffer, allocationsForResidency); EXPECT_EQ(1u, csrWithAubDump->peekLatestSentTaskCount()); if (createAubCSR) { EXPECT_EQ(csrWithAubDump->peekLatestSentTaskCount(), csrWithAubDump->getAubMockCsr().peekLatestSentTaskCount()); } memoryManager->freeGraphicsMemoryImpl(commandBuffer); } HWTEST_P(CommandStreamReceiverWithAubDumpTest, givenCommandStreamReceiverWithAubDumpWhenMakeNonResidentIsCalledThenBothBaseAndAubCsrMakeNonResidentIsCalled) { auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csrWithAubDump->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, gfxAllocation); csrWithAubDump->makeResident(*gfxAllocation); csrWithAubDump->makeNonResident(*gfxAllocation); EXPECT_TRUE(csrWithAubDump->makeNonResidentParameterization.wasCalled); EXPECT_EQ(gfxAllocation, csrWithAubDump->makeNonResidentParameterization.receivedGfxAllocation); EXPECT_FALSE(gfxAllocation->isResident(csrWithAubDump->getOsContext().getContextId())); if (createAubCSR) { EXPECT_TRUE(csrWithAubDump->getAubMockCsr().makeNonResidentParameterization.wasCalled); EXPECT_EQ(gfxAllocation, csrWithAubDump->getAubMockCsr().makeNonResidentParameterization.receivedGfxAllocation); } memoryManager->freeGraphicsMemoryImpl(gfxAllocation); } HWTEST_P(CommandStreamReceiverWithAubDumpTest, givenCommandStreamReceiverWithAubDumpWhenCheckAndActivateAubSubCaptureIsCalledThenBaseCsrCommandStreamReceiverIsCalled) { const DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); csrWithAubDump->checkAndActivateAubSubCapture(multiDispatchInfo); EXPECT_TRUE(csrWithAubDump->checkAndActivateAubSubCaptureParameterization.wasCalled); EXPECT_EQ(&multiDispatchInfo, csrWithAubDump->checkAndActivateAubSubCaptureParameterization.receivedDispatchInfo); if (createAubCSR) { EXPECT_TRUE(csrWithAubDump->getAubMockCsr().checkAndActivateAubSubCaptureParameterization.wasCalled); EXPECT_EQ(&multiDispatchInfo, csrWithAubDump->getAubMockCsr().checkAndActivateAubSubCaptureParameterization.receivedDispatchInfo); } } HWTEST_P(CommandStreamReceiverWithAubDumpTest, givenCommandStreamReceiverWithAubDumpWhenCreateMemoryManagerIsCalledThenItIsUsedByBothBaseAndAubCsr) { EXPECT_EQ(memoryManager, csrWithAubDump->getMemoryManager()); if (createAubCSR) { EXPECT_EQ(memoryManager, csrWithAubDump->aubCSR->getMemoryManager()); } } static bool createAubCSR[] = { false, true}; INSTANTIATE_TEST_CASE_P( CommandStreamReceiverWithAubDumpTest_Create, CommandStreamReceiverWithAubDumpTest, testing::ValuesIn(createAubCSR)); compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/compute_mode_tests.h000066400000000000000000000071371363734646600307060ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/helpers/hw_helper.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "test.h" using namespace NEO; struct ComputeModeRequirements : public ::testing::Test { template struct myCsr : public UltCommandStreamReceiver { using CommandStreamReceiver::commandStream; using CommandStreamReceiverHw::lastSentThreadArbitrationPolicy; using CommandStreamReceiverHw::requiredThreadArbitrationPolicy; myCsr(ExecutionEnvironment &executionEnvironment) : UltCommandStreamReceiver(executionEnvironment, 0){}; CsrSizeRequestFlags *getCsrRequestFlags() { return &this->csrSizeRequestFlags; } }; void makeResidentSharedAlloc() { csr->getResidencyAllocations().push_back(alloc); } template void overrideComputeModeRequest(bool reqestChanged, bool requireCoherency, bool hasSharedHandles, bool modifyThreadArbitrationPolicy = false) { overrideComputeModeRequest(reqestChanged, requireCoherency, hasSharedHandles, false, 128u); if (modifyThreadArbitrationPolicy) { getCsrHw()->lastSentThreadArbitrationPolicy = getCsrHw()->requiredThreadArbitrationPolicy; } } template void overrideComputeModeRequest(bool coherencyRequestChanged, bool requireCoherency, bool hasSharedHandles, bool numGrfRequiredChanged, uint32_t numGrfRequired) { auto csrHw = getCsrHw(); csrHw->getCsrRequestFlags()->coherencyRequestChanged = coherencyRequestChanged; csrHw->getCsrRequestFlags()->hasSharedHandles = hasSharedHandles; csrHw->getCsrRequestFlags()->numGrfRequiredChanged = numGrfRequiredChanged; flags.requiresCoherency = requireCoherency; flags.numGrfRequired = numGrfRequired; if (hasSharedHandles) { makeResidentSharedAlloc(); } } template myCsr *getCsrHw() { return static_cast *>(csr); } template void SetUpImpl() { device.reset(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); csr = new myCsr(*device->executionEnvironment); device->resetCommandStreamReceiver(csr); AllocationProperties properties(device->getRootDeviceIndex(), false, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::SHARED_BUFFER, false); alloc = device->getMemoryManager()->createGraphicsAllocationFromSharedHandle((osHandle)123, properties, false); } void TearDown() override { device->getMemoryManager()->freeGraphicsMemory(alloc); } CommandStreamReceiver *csr = nullptr; std::unique_ptr device; DispatchFlags flags{{}, nullptr, {}, nullptr, QueueThrottle::MEDIUM, PreemptionMode::Disabled, GrfConfig::DefaultGrfNumber, L3CachingSettings::l3CacheOn, ThreadArbitrationPolicy::NotPresent, QueueSliceCount::defaultSliceCount, false, false, false, false, false, false, false, false, false, false, false}; GraphicsAllocation *alloc = nullptr; }; create_command_stream_receiver_tests.cpp000066400000000000000000000041761363734646600347020ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/helpers/ult_hw_config.h" #include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/test/unit_test/fixtures/mock_aub_center_fixture.h" #include "opencl/test/unit_test/helpers/execution_environment_helper.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/libult/create_command_stream.h" #include "test.h" using namespace NEO; struct CreateCommandStreamReceiverTest : public ::testing::TestWithParam {}; HWTEST_P(CreateCommandStreamReceiverTest, givenCreateCommandStreamWhenCsrIsSetToValidTypeThenTheFuntionReturnsCommandStreamReceiver) { DebugManagerStateRestore stateRestorer; HardwareInfo *hwInfo = nullptr; ExecutionEnvironment *executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); MockAubCenterFixture::setMockAubCenter(*executionEnvironment->rootDeviceEnvironments[0]); CommandStreamReceiverType csrType = GetParam(); VariableBackup backup(&ultHwConfig); ultHwConfig.useHwCsr = true; DebugManager.flags.SetCommandStreamReceiver.set(csrType); auto csr = std::unique_ptr(createCommandStream(*executionEnvironment, 0)); if (csrType < CommandStreamReceiverType::CSR_TYPES_NUM) { EXPECT_NE(nullptr, csr.get()); } else { EXPECT_EQ(nullptr, csr.get()); } EXPECT_NE(nullptr, executionEnvironment->memoryManager.get()); } static CommandStreamReceiverType commandStreamReceiverTypes[] = { CSR_HW, CSR_AUB, CSR_TBX, CSR_HW_WITH_AUB, CSR_TBX_WITH_AUB, CSR_TYPES_NUM}; INSTANTIATE_TEST_CASE_P( CreateCommandStreamReceiverTest_Create, CreateCommandStreamReceiverTest, testing::ValuesIn(commandStreamReceiverTypes)); experimental_command_buffer_tests.cpp000066400000000000000000000501551363734646600342240ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_constants.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/mocks/mock_experimental_command_buffer.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "test.h" #include "gtest/gtest.h" using namespace NEO; struct ExperimentalCommandBufferTest : public UltCommandStreamReceiverTest { void SetUp() override { dbgRestore.reset(new DebugManagerStateRestore()); DebugManager.flags.EnableExperimentalCommandBuffer.set(1); UltCommandStreamReceiverTest::SetUp(); } std::unique_ptr dbgRestore; }; struct MockExperimentalCommandBufferTest : public UltCommandStreamReceiverTest { void SetUp() override { UltCommandStreamReceiverTest::SetUp(); pDevice->getGpgpuCommandStreamReceiver().setExperimentalCmdBuffer( std::unique_ptr(new MockExperimentalCommandBuffer(&pDevice->getGpgpuCommandStreamReceiver()))); } }; HWTEST_F(MockExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhenCsrIsFlushedThenExpectProperlyFilledExperimentalCmdBuffer) { using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; MockExperimentalCommandBuffer *mockExCmdBuffer = static_cast(commandStreamReceiver.experimentalCmdBuffer.get()); flushTask(commandStreamReceiver); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream.get()); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream->getGraphicsAllocation()); uint64_t exCmdBufferGpuAddr = mockExCmdBuffer->currentStream->getGraphicsAllocation()->getGpuAddress(); EXPECT_TRUE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->currentStream->getGraphicsAllocation())); ASSERT_NE(nullptr, mockExCmdBuffer->experimentalAllocation); uint64_t exAllocationGpuAddr = mockExCmdBuffer->experimentalAllocation->getGpuAddress(); EXPECT_TRUE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->experimentalAllocation)); ASSERT_NE(nullptr, mockExCmdBuffer->timestamps); EXPECT_TRUE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->timestamps)); constexpr uint32_t expectedTsOffset = 2 * sizeof(uint64_t); EXPECT_EQ(expectedTsOffset, mockExCmdBuffer->timestampsOffset); constexpr uint32_t expectedExOffset = 0; EXPECT_EQ(expectedExOffset, mockExCmdBuffer->experimentalAllocationOffset); constexpr uint32_t expectedSemaphoreVal = 1; uintptr_t actualSemaphoreAddr = reinterpret_cast(mockExCmdBuffer->experimentalAllocation->getUnderlyingBuffer()) + mockExCmdBuffer->experimentalAllocationOffset; uint32_t *actualSemaphoreVal = reinterpret_cast(actualSemaphoreAddr); EXPECT_EQ(expectedSemaphoreVal, *actualSemaphoreVal); HardwareParse hwParserCsr; hwParserCsr.parseCommands(commandStreamReceiver.commandStream, 0); GenCmdList bbList = hwParserCsr.getCommandsList(); MI_BATCH_BUFFER_START *bbStart = nullptr; GenCmdList::iterator it = bbList.begin(); ASSERT_NE(bbList.end(), it); bbStart = reinterpret_cast(*it); ASSERT_NE(nullptr, bbStart); EXPECT_EQ(exCmdBufferGpuAddr, bbStart->getBatchBufferStartAddressGraphicsaddress472()); EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer()); MI_BATCH_BUFFER_END *bbEnd = nullptr; PIPE_CONTROL *pipeControl = nullptr; MI_SEMAPHORE_WAIT *semaphoreCmd = nullptr; HardwareParse hwParserExCmdBuffer; hwParserExCmdBuffer.parseCommands(*mockExCmdBuffer->currentStream, 0); it = hwParserExCmdBuffer.cmdList.begin(); GenCmdList::iterator end = hwParserExCmdBuffer.cmdList.end(); if (HardwareCommandsHelper::isPipeControlWArequired(pDevice->getHardwareInfo())) { //1st PIPE_CONTROL with CS Stall ASSERT_NE(end, it); pipeControl = genCmdCast(*it); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(1u, pipeControl->getCommandStreamerStallEnable()); it++; } if (UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(pDevice->getHardwareInfo())) { it++; } //2nd PIPE_CONTROL with ts addr uint64_t timeStampAddress = mockExCmdBuffer->timestamps->getGpuAddress(); uint32_t expectedTsAddress = static_cast(timeStampAddress & 0x0000FFFFFFFFULL); uint32_t expectedTsAddressHigh = static_cast(timeStampAddress >> 32); ASSERT_NE(end, it); pipeControl = genCmdCast(*it); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(1u, pipeControl->getCommandStreamerStallEnable()); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pipeControl->getPostSyncOperation()); EXPECT_EQ(expectedTsAddress, pipeControl->getAddress()); EXPECT_EQ(expectedTsAddressHigh, pipeControl->getAddressHigh()); if (UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(pDevice->getHardwareInfo())) { it++; } //MI_SEMAPHORE_WAIT it++; ASSERT_NE(end, it); semaphoreCmd = genCmdCast(*it); ASSERT_NE(nullptr, semaphoreCmd); EXPECT_EQ(expectedSemaphoreVal, semaphoreCmd->getSemaphoreDataDword()); EXPECT_EQ(exAllocationGpuAddr, semaphoreCmd->getSemaphoreGraphicsAddress()); EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION_SAD_EQUAL_SDD, semaphoreCmd->getCompareOperation()); if (HardwareCommandsHelper::isPipeControlWArequired(pDevice->getHardwareInfo())) { //3rd PIPE_CONTROL with CS stall it++; ASSERT_NE(end, it); pipeControl = genCmdCast(*it); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(1u, pipeControl->getCommandStreamerStallEnable()); } if (UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(pDevice->getHardwareInfo())) { it++; } //4th PIPE_CONTROL with ts addr timeStampAddress = mockExCmdBuffer->timestamps->getGpuAddress() + sizeof(uint64_t); expectedTsAddress = static_cast(timeStampAddress & 0x0000FFFFFFFFULL); expectedTsAddressHigh = static_cast(timeStampAddress >> 32); it++; ASSERT_NE(end, it); pipeControl = genCmdCast(*it); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(1u, pipeControl->getCommandStreamerStallEnable()); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pipeControl->getPostSyncOperation()); EXPECT_EQ(expectedTsAddress, pipeControl->getAddress()); EXPECT_EQ(expectedTsAddressHigh, pipeControl->getAddressHigh()); if (UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(pDevice->getHardwareInfo())) { it++; } //BB_END it++; ASSERT_NE(end, it); bbEnd = genCmdCast(*it); ASSERT_NE(nullptr, bbEnd); } HWTEST_F(MockExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhenCsrIsNotFlushedThenExperimentalBufferLinearStreamIsNotCreatedAndCmdBufferCommandsHaveProperlyOffsetedAddresses) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; MockExperimentalCommandBuffer *mockExCmdBuffer = static_cast(commandStreamReceiver.experimentalCmdBuffer.get()); EXPECT_EQ(nullptr, mockExCmdBuffer->currentStream.get()); EXPECT_NE(nullptr, mockExCmdBuffer->experimentalAllocation); EXPECT_FALSE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->experimentalAllocation)); EXPECT_NE(nullptr, mockExCmdBuffer->timestamps); EXPECT_FALSE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->timestamps)); constexpr uint32_t expectedTsOffset = 0; EXPECT_EQ(expectedTsOffset, mockExCmdBuffer->timestampsOffset); constexpr uint32_t expectedExOffset = 0; EXPECT_EQ(expectedExOffset, mockExCmdBuffer->experimentalAllocationOffset); } HWTEST_F(MockExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhenCsrIsFlushedTwiceThenExpectProperlyFilledExperimentalCmdBufferAndTimestampOffset) { using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; MockExperimentalCommandBuffer *mockExCmdBuffer = static_cast(commandStreamReceiver.experimentalCmdBuffer.get()); flushTask(commandStreamReceiver); size_t csrCmdBufferOffset = commandStreamReceiver.commandStream.getUsed(); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream.get()); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream->getGraphicsAllocation()); uint64_t exCmdBufferGpuAddr = mockExCmdBuffer->currentStream->getGraphicsAllocation()->getGpuAddress(); EXPECT_TRUE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->currentStream->getGraphicsAllocation())); ASSERT_NE(nullptr, mockExCmdBuffer->experimentalAllocation); EXPECT_TRUE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->experimentalAllocation)); ASSERT_NE(nullptr, mockExCmdBuffer->timestamps); EXPECT_TRUE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->timestamps)); size_t cmbBufferOffset = mockExCmdBuffer->currentStream->getUsed(); flushTask(commandStreamReceiver); //two pairs of TS constexpr uint32_t expectedTsOffset = 4 * sizeof(uint64_t); EXPECT_EQ(expectedTsOffset, mockExCmdBuffer->timestampsOffset); constexpr uint32_t expectedExOffset = 0; EXPECT_EQ(expectedExOffset, mockExCmdBuffer->experimentalAllocationOffset); HardwareParse hwParserCsr; hwParserCsr.parseCommands(commandStreamReceiver.commandStream, csrCmdBufferOffset); GenCmdList bbList = hwParserCsr.getCommandsList(); MI_BATCH_BUFFER_START *bbStart = nullptr; exCmdBufferGpuAddr += cmbBufferOffset; GenCmdList::iterator it = bbList.begin(); ASSERT_NE(bbList.end(), it); bbStart = reinterpret_cast(*it); ASSERT_NE(nullptr, bbStart); EXPECT_EQ(exCmdBufferGpuAddr, bbStart->getBatchBufferStartAddressGraphicsaddress472()); EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer()); PIPE_CONTROL *pipeControl = nullptr; HardwareParse hwParserExCmdBuffer; hwParserExCmdBuffer.parseCommands(*mockExCmdBuffer->currentStream, cmbBufferOffset); it = hwParserExCmdBuffer.cmdList.begin(); GenCmdList::iterator end = hwParserExCmdBuffer.cmdList.end(); if (HardwareCommandsHelper::isPipeControlWArequired(pDevice->getHardwareInfo())) { it++; } if (UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(pDevice->getHardwareInfo())) { it++; } //2nd PIPE_CONTROL uint64_t timeStampAddress = mockExCmdBuffer->timestamps->getGpuAddress() + 2 * sizeof(uint64_t); uint32_t expectedTsAddress = static_cast(timeStampAddress & 0x0000FFFFFFFFULL); uint32_t expectedTsAddressHigh = static_cast(timeStampAddress >> 32); ASSERT_NE(end, it); pipeControl = genCmdCast(*it); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(1u, pipeControl->getCommandStreamerStallEnable()); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pipeControl->getPostSyncOperation()); EXPECT_EQ(expectedTsAddress, pipeControl->getAddress()); EXPECT_EQ(expectedTsAddressHigh, pipeControl->getAddressHigh()); //omit SEMAPHORE_WAIT and 3rd PIPE_CONTROL if (HardwareCommandsHelper::isPipeControlWArequired(pDevice->getHardwareInfo())) { it++; } if (UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(pDevice->getHardwareInfo())) { it++; } it++; //get 4th PIPE_CONTROL timeStampAddress = mockExCmdBuffer->timestamps->getGpuAddress() + 3 * sizeof(uint64_t); expectedTsAddress = static_cast(timeStampAddress & 0x0000FFFFFFFFULL); expectedTsAddressHigh = static_cast(timeStampAddress >> 32); it++; if (UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(pDevice->getHardwareInfo())) { it++; } ASSERT_NE(end, it); pipeControl = genCmdCast(*it); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(1u, pipeControl->getCommandStreamerStallEnable()); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pipeControl->getPostSyncOperation()); EXPECT_EQ(expectedTsAddress, pipeControl->getAddress()); EXPECT_EQ(expectedTsAddressHigh, pipeControl->getAddressHigh()); } HWTEST_F(MockExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhenMemoryManagerAlreadyStoresAllocationThenUseItForLinearSteam) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto storage = commandStreamReceiver.getInternalAllocationStorage(); commandStreamReceiver.storeMakeResidentAllocations = true; MemoryManager *memoryManager = commandStreamReceiver.getMemoryManager(); //Make two allocations, since CSR will try to reuse it also auto rootDeviceIndex = pDevice->getRootDeviceIndex(); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties({rootDeviceIndex, 3 * MemoryConstants::pageSize64k, GraphicsAllocation::AllocationType::COMMAND_BUFFER}); storage->storeAllocation(std::unique_ptr(allocation), REUSABLE_ALLOCATION); allocation = memoryManager->allocateGraphicsMemoryWithProperties({rootDeviceIndex, 3 * MemoryConstants::pageSize64k, GraphicsAllocation::AllocationType::COMMAND_BUFFER}); storage->storeAllocation(std::unique_ptr(allocation), REUSABLE_ALLOCATION); MockExperimentalCommandBuffer *mockExCmdBuffer = static_cast(commandStreamReceiver.experimentalCmdBuffer.get()); flushTask(commandStreamReceiver); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream.get()); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream->getGraphicsAllocation()); EXPECT_EQ(allocation->getUnderlyingBuffer(), mockExCmdBuffer->currentStream->getGraphicsAllocation()->getUnderlyingBuffer()); EXPECT_TRUE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->currentStream->getGraphicsAllocation())); } HWTEST_F(MockExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhenLinearStreamIsExhaustedThenStoreOldAllocationForReuseAndObtainNewAllocationForLinearStream) { using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; MockExperimentalCommandBuffer *mockExCmdBuffer = static_cast(commandStreamReceiver.experimentalCmdBuffer.get()); flushTask(commandStreamReceiver); size_t csrCmdBufferOffset = commandStreamReceiver.commandStream.getUsed(); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream.get()); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream->getGraphicsAllocation()); uintptr_t oldCmdBufferAddress = reinterpret_cast(mockExCmdBuffer->currentStream->getGraphicsAllocation()); uint64_t oldExCmdBufferGpuAddr = mockExCmdBuffer->currentStream->getGraphicsAllocation()->getGpuAddress(); //leave space for single DWORD mockExCmdBuffer->currentStream->getSpace(mockExCmdBuffer->currentStream->getAvailableSpace() - sizeof(uint32_t)); HardwareParse hwParserCsr; hwParserCsr.parseCommands(commandStreamReceiver.commandStream, 0); GenCmdList bbList = hwParserCsr.getCommandsList(); MI_BATCH_BUFFER_START *bbStart = nullptr; GenCmdList::iterator it = bbList.begin(); ASSERT_NE(bbList.end(), it); bbStart = reinterpret_cast(*it); ASSERT_NE(nullptr, bbStart); EXPECT_EQ(oldExCmdBufferGpuAddr, bbStart->getBatchBufferStartAddressGraphicsaddress472()); EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer()); flushTask(commandStreamReceiver); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream.get()); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream->getGraphicsAllocation()); EXPECT_TRUE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->currentStream->getGraphicsAllocation())); uintptr_t newCmdBufferAddress = reinterpret_cast(mockExCmdBuffer->currentStream->getGraphicsAllocation()); uint64_t newExCmdBufferGpuAddr = mockExCmdBuffer->currentStream->getGraphicsAllocation()->getGpuAddress(); EXPECT_NE(oldCmdBufferAddress, newCmdBufferAddress); EXPECT_NE(oldExCmdBufferGpuAddr, newExCmdBufferGpuAddr); hwParserCsr.TearDown(); hwParserCsr.parseCommands(commandStreamReceiver.commandStream, csrCmdBufferOffset); bbList = hwParserCsr.getCommandsList(); bbStart = nullptr; it = bbList.begin(); ASSERT_NE(bbList.end(), it); bbStart = reinterpret_cast(*it); ASSERT_NE(nullptr, bbStart); EXPECT_EQ(newExCmdBufferGpuAddr, bbStart->getBatchBufferStartAddressGraphicsaddress472()); EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer()); } HWTEST_F(ExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhenCommandStreamReceiverIsCreatedThenExperimentalCmdBufferIsNotNull) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); EXPECT_NE(nullptr, commandStreamReceiver.experimentalCmdBuffer.get()); } HWTEST_F(ExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhenCommandStreamReceiverIsFlushedThenExpectPrintAfterDtor) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); //forced dtor to get printed timestamps testing::internal::CaptureStdout(); commandStreamReceiver.setExperimentalCmdBuffer(std::move(std::unique_ptr(nullptr))); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STRNE(output.c_str(), ""); } HWTEST_F(ExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhenCommandStreamReceiverIsNotFlushedThenExpectNoPrintAfterDtor) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); //forced dtor to try to get printed timestamps testing::internal::CaptureStdout(); commandStreamReceiver.setExperimentalCmdBuffer(std::move(std::unique_ptr(nullptr))); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STREQ(output.c_str(), ""); } using ExperimentalCommandBufferRootDeviceIndexTest = MultiRootDeviceFixture; TEST_F(ExperimentalCommandBufferRootDeviceIndexTest, experimentalCommandBufferGraphicsAllocationsHaveCorrectRootDeviceIndex) { auto experimentalCommandBuffer = std::make_unique(&device->getGpgpuCommandStreamReceiver()); ASSERT_NE(nullptr, experimentalCommandBuffer); EXPECT_EQ(expectedRootDeviceIndex, experimentalCommandBuffer->experimentalAllocation->getRootDeviceIndex()); EXPECT_EQ(expectedRootDeviceIndex, experimentalCommandBuffer->timestamps->getRootDeviceIndex()); } compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/get_devices_tests.cpp000066400000000000000000000212401363734646600310310ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/helpers/ult_hw_config.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/libult/create_command_stream.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "test.h" namespace NEO { bool operator==(const HardwareInfo &hwInfoIn, const HardwareInfo &hwInfoOut) { bool result = (0 == memcmp(&hwInfoIn.platform, &hwInfoOut.platform, sizeof(PLATFORM))); result &= (0 == memcmp(&hwInfoIn.featureTable, &hwInfoOut.featureTable, sizeof(FeatureTable))); result &= (0 == memcmp(&hwInfoIn.workaroundTable, &hwInfoOut.workaroundTable, sizeof(WorkaroundTable))); result &= (0 == memcmp(&hwInfoIn.capabilityTable, &hwInfoOut.capabilityTable, sizeof(RuntimeCapabilityTable))); return result; } struct PrepareDeviceEnvironmentsTest : ::testing::Test { void SetUp() override { ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; } void TearDown() override { } int i = 0; const HardwareInfo *hwInfo = nullptr; VariableBackup backup{&ultHwConfig}; DebugManagerStateRestore stateRestorer; }; HWTEST_F(PrepareDeviceEnvironmentsTest, givenPrepareDeviceEnvironmentsWhenCsrIsSetToVariousTypesThenTheFunctionReturnsTheExpectedValueOfHardwareInfo) { uint32_t expectedDevices = 1; DebugManager.flags.CreateMultipleRootDevices.set(expectedDevices); for (int productFamilyIndex = 0; productFamilyIndex < IGFX_MAX_PRODUCT; productFamilyIndex++) { const char *hwPrefix = hardwarePrefix[productFamilyIndex]; if (hwPrefix == nullptr) { continue; } const std::string productFamily(hwPrefix); for (int csrTypes = -1; csrTypes <= CSR_TYPES_NUM; csrTypes++) { CommandStreamReceiverType csrType; if (csrTypes != -1) { csrType = static_cast(csrTypes); DebugManager.flags.SetCommandStreamReceiver.set(csrType); } else { csrType = CSR_HW; DebugManager.flags.SetCommandStreamReceiver.set(-1); } DebugManager.flags.ProductFamilyOverride.set(productFamily); platformsImpl.clear(); ExecutionEnvironment *exeEnv = constructPlatform()->peekExecutionEnvironment(); const auto ret = prepareDeviceEnvironments(*exeEnv); EXPECT_EQ(expectedDevices, exeEnv->rootDeviceEnvironments.size()); for (auto i = 0u; i < expectedDevices; i++) { hwInfo = exeEnv->rootDeviceEnvironments[i]->getHardwareInfo(); switch (csrType) { case CSR_HW: case CSR_HW_WITH_AUB: EXPECT_TRUE(ret); EXPECT_NE(nullptr, hwInfo); break; case CSR_AUB: case CSR_TBX: case CSR_TBX_WITH_AUB: { EXPECT_TRUE(ret); EXPECT_NE(nullptr, hwInfo); for (i = 0; i < IGFX_MAX_PRODUCT; i++) { auto hardwareInfo = hardwareInfoTable[i]; if (hardwareInfo == nullptr) continue; if (hardwareInfoTable[i]->platform.eProductFamily == hwInfo->platform.eProductFamily) break; } EXPECT_TRUE(i < IGFX_MAX_PRODUCT); ASSERT_NE(nullptr, hardwarePrefix[i]); HardwareInfo hwInfoFromTable = *hardwareInfoTable[i]; hwInfoFromTable.featureTable = {}; hwInfoFromTable.workaroundTable = {}; hwInfoFromTable.gtSystemInfo = {}; hardwareInfoSetup[hwInfoFromTable.platform.eProductFamily](&hwInfoFromTable, true, 0x0); HwInfoConfig *hwConfig = HwInfoConfig::get(hwInfoFromTable.platform.eProductFamily); hwConfig->configureHardwareCustom(&hwInfoFromTable, nullptr); EXPECT_EQ(0, memcmp(&hwInfoFromTable.platform, &hwInfo->platform, sizeof(PLATFORM))); EXPECT_EQ(0, memcmp(&hwInfoFromTable.capabilityTable, &hwInfo->capabilityTable, sizeof(RuntimeCapabilityTable))); EXPECT_STREQ(hardwarePrefix[i], productFamily.c_str()); break; } default: break; } } } } } HWTEST_F(PrepareDeviceEnvironmentsTest, givenUpperCaseProductFamilyOverrideFlagSetWhenCreatingDevicesThenFindExpectedPlatform) { std::string hwPrefix; std::string hwPrefixUpperCase; PRODUCT_FAMILY productFamily; for (int productFamilyIndex = 0; productFamilyIndex < IGFX_MAX_PRODUCT; productFamilyIndex++) { if (hardwarePrefix[productFamilyIndex]) { hwPrefix = hardwarePrefix[productFamilyIndex]; productFamily = static_cast(productFamilyIndex); break; } } EXPECT_NE(0u, hwPrefix.length()); hwPrefixUpperCase.resize(hwPrefix.length()); std::transform(hwPrefix.begin(), hwPrefix.end(), hwPrefixUpperCase.begin(), ::toupper); EXPECT_NE(hwPrefix, hwPrefixUpperCase); DebugManager.flags.ProductFamilyOverride.set(hwPrefixUpperCase); DebugManager.flags.SetCommandStreamReceiver.set(CommandStreamReceiverType::CSR_AUB); ExecutionEnvironment *exeEnv = platform()->peekExecutionEnvironment(); bool ret = prepareDeviceEnvironments(*exeEnv); EXPECT_TRUE(ret); EXPECT_EQ(productFamily, exeEnv->rootDeviceEnvironments[0]->getHardwareInfo()->platform.eProductFamily); } HWTEST_F(PrepareDeviceEnvironmentsTest, givenPrepareDeviceEnvironmentsAndUnknownProductFamilyWhenCsrIsSetToValidTypeThenTheFunctionReturnsTheExpectedValueOfHardwareInfo) { uint32_t expectedDevices = 1; DebugManager.flags.CreateMultipleRootDevices.set(expectedDevices); for (int csrTypes = 0; csrTypes <= CSR_TYPES_NUM; csrTypes++) { CommandStreamReceiverType csrType = static_cast(csrTypes); std::string productFamily("unk"); DebugManager.flags.SetCommandStreamReceiver.set(csrType); DebugManager.flags.ProductFamilyOverride.set(productFamily); platformsImpl.clear(); ExecutionEnvironment *exeEnv = constructPlatform()->peekExecutionEnvironment(); auto ret = prepareDeviceEnvironments(*exeEnv); EXPECT_EQ(expectedDevices, exeEnv->rootDeviceEnvironments.size()); for (auto i = 0u; i < expectedDevices; i++) { hwInfo = exeEnv->rootDeviceEnvironments[i]->getHardwareInfo(); switch (csrType) { case CSR_HW: case CSR_HW_WITH_AUB: EXPECT_TRUE(ret); break; case CSR_AUB: case CSR_TBX: case CSR_TBX_WITH_AUB: { EXPECT_TRUE(ret); EXPECT_NE(nullptr, hwInfo); for (i = 0; i < IGFX_MAX_PRODUCT; i++) { auto hardwareInfo = hardwareInfoTable[i]; if (hardwareInfo == nullptr) continue; if (hardwareInfoTable[i]->platform.eProductFamily == hwInfo->platform.eProductFamily) break; } EXPECT_TRUE(i < IGFX_MAX_PRODUCT); ASSERT_NE(nullptr, hardwarePrefix[i]); HardwareInfo defaultHwInfo = DEFAULT_PLATFORM::hwInfo; defaultHwInfo.featureTable = {}; defaultHwInfo.workaroundTable = {}; defaultHwInfo.gtSystemInfo = {}; hardwareInfoSetup[defaultHwInfo.platform.eProductFamily](&defaultHwInfo, true, 0x0); HwInfoConfig *hwConfig = HwInfoConfig::get(defaultHwInfo.platform.eProductFamily); hwConfig->configureHardwareCustom(&defaultHwInfo, nullptr); EXPECT_EQ(0, memcmp(&defaultHwInfo.platform, &hwInfo->platform, sizeof(PLATFORM))); EXPECT_EQ(0, memcmp(&defaultHwInfo.capabilityTable, &hwInfo->capabilityTable, sizeof(RuntimeCapabilityTable))); break; } default: break; } } } } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/linear_stream_fixture.h000066400000000000000000000012761363734646600313750ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/linear_stream.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "test.h" #include namespace NEO { struct LinearStreamFixture { LinearStreamFixture(void) : gfxAllocation((void *)pCmdBuffer, sizeof(pCmdBuffer)), linearStream(&gfxAllocation) { } virtual void SetUp(void) { } virtual void TearDown(void) { } MockGraphicsAllocation gfxAllocation; LinearStream linearStream; uint32_t pCmdBuffer[1024]; }; typedef Test LinearStreamTest; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/linear_stream_tests.cpp000066400000000000000000000104171363734646600314010ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/linear_stream.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/test/unit_test/command_stream/linear_stream_fixture.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" using namespace NEO; TEST(LinearStreamCtorTest, WhenConstructingLinearStreamThenInitialValuesValuesAreSet) { LinearStream linearStream; EXPECT_EQ(nullptr, linearStream.getCpuBase()); EXPECT_EQ(0u, linearStream.getMaxAvailableSpace()); } TEST(LinearStreamCtorTest, whenProvidedAllArgumentsThenExpectSameValuesSet) { GraphicsAllocation *gfxAllocation = reinterpret_cast(0x1234); void *buffer = reinterpret_cast(0x2000); size_t bufferSize = 0x1000u; LinearStream linearStream(gfxAllocation, buffer, bufferSize); EXPECT_EQ(buffer, linearStream.getCpuBase()); EXPECT_EQ(bufferSize, linearStream.getMaxAvailableSpace()); EXPECT_EQ(gfxAllocation, linearStream.getGraphicsAllocation()); } TEST_F(LinearStreamTest, GivenSizeZeroWhenGettingSpaceUsedThenNonNullPointerIsReturned) { EXPECT_NE(nullptr, linearStream.getSpace(0)); } TEST_F(LinearStreamTest, GivenSizeUint32WhenGettingSpaceUsedThenNonNullPointerIsReturned) { EXPECT_NE(nullptr, linearStream.getSpace(sizeof(uint32_t))); } TEST_F(LinearStreamTest, WhenAllocatingMultipleTimesThenPointersIncrementedCorrectly) { size_t allocSize = 1; auto ptr1 = linearStream.getSpace(allocSize); ASSERT_NE(nullptr, ptr1); auto ptr2 = linearStream.getSpace(2); ASSERT_NE(nullptr, ptr2); EXPECT_EQ(allocSize, (uintptr_t)ptr2 - (uintptr_t)ptr1); } TEST_F(LinearStreamTest, WhenGettingSpaceThenPointerIsWriteable) { uint32_t cmd = 0xbaddf00d; auto pCmd = linearStream.getSpace(sizeof(cmd)); ASSERT_NE(nullptr, pCmd); *(uint32_t *)pCmd = cmd; } TEST_F(LinearStreamTest, WhenRequestingMutltipleAllocationsThenDifferentPointersAreReturnedForEachRequest) { auto pCmd = linearStream.getSpace(sizeof(uint32_t)); ASSERT_NE(nullptr, pCmd); auto pCmd2 = linearStream.getSpace(sizeof(uint32_t)); ASSERT_NE(pCmd2, pCmd); } TEST_F(LinearStreamTest, GivenNoAllocationsWhenGettingSpaceThenAvailableSpaceIsEqualMaximumSpace) { ASSERT_EQ(linearStream.getMaxAvailableSpace(), linearStream.getAvailableSpace()); } TEST_F(LinearStreamTest, GivenNoAllocationsWhenGettingSpaceThenAvailableSpaceIsGreaterThanZero) { EXPECT_NE(0u, linearStream.getAvailableSpace()); } TEST_F(LinearStreamTest, GivenAllocationWhenGettingSpaceThenAvailableSpaceIsReduced) { auto originalAvailable = linearStream.getAvailableSpace(); linearStream.getSpace(sizeof(uint32_t)); EXPECT_LT(linearStream.getAvailableSpace(), originalAvailable); } TEST_F(LinearStreamTest, GivenOneAllocationsWhenGettingSpaceThenSpaceUsedIsEqualToAllocationSize) { size_t sizeToAllocate = 2 * sizeof(uint32_t); ASSERT_NE(nullptr, linearStream.getSpace(sizeToAllocate)); EXPECT_EQ(sizeToAllocate, linearStream.getUsed()); } TEST_F(LinearStreamTest, givenLinearStreamWhenGetCpuBaseIsCalledThenCpuBaseAddressIsReturned) { ASSERT_EQ(pCmdBuffer, linearStream.getCpuBase()); } TEST_F(LinearStreamTest, givenNotEnoughSpaceWhenGetSpaceIsCalledThenThrowException) { linearStream.getSpace(linearStream.getMaxAvailableSpace()); EXPECT_THROW(linearStream.getSpace(1), std::exception); } TEST_F(LinearStreamTest, WhenReplacingBufferThenAvailableSizeIsEqualToBufferSizeAndAllSpaceIsAvailable) { char buffer[256]; linearStream.replaceBuffer(buffer, sizeof(buffer)); EXPECT_EQ(buffer, linearStream.getCpuBase()); EXPECT_EQ(sizeof(buffer), linearStream.getAvailableSpace()); EXPECT_EQ(0u, linearStream.getUsed()); } TEST_F(LinearStreamTest, givenNewGraphicsAllocationWhenReplaceIsCalledThenLinearStreamContainsNewGraphicsAllocation) { auto graphicsAllocation = linearStream.getGraphicsAllocation(); EXPECT_NE(nullptr, graphicsAllocation); auto address = (void *)0x100000; MockGraphicsAllocation newGraphicsAllocation(address, 4096); EXPECT_NE(&newGraphicsAllocation, graphicsAllocation); linearStream.replaceGraphicsAllocation(&newGraphicsAllocation); EXPECT_EQ(&newGraphicsAllocation, linearStream.getGraphicsAllocation()); } compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/submissions_aggregator_tests.cpp000066400000000000000000000730351363734646600333410ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/submissions_aggregator.h" #include "shared/source/helpers/flush_stamp.h" #include "opencl/source/event/event.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" using namespace NEO; struct MockSubmissionAggregator : public SubmissionAggregator { CommandBufferList &peekCommandBuffersList() { return this->cmdBuffers; } }; TEST(SubmissionsAggregator, givenDefaultSubmissionsAggregatorWhenItIsCreatedThenCreationIsSuccesful) { MockSubmissionAggregator submissionsAggregator; EXPECT_TRUE(submissionsAggregator.peekCommandBuffersList().peekIsEmpty()); } TEST(SubmissionsAggregator, givenCommandBufferWhenItIsPassedToSubmissionsAggregatorThenItIsRecorded) { MockSubmissionAggregator submissionsAggregator; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer = new CommandBuffer(*device); submissionsAggregator.recordCommandBuffer(cmdBuffer); EXPECT_FALSE(submissionsAggregator.peekCommandBuffersList().peekIsEmpty()); EXPECT_EQ(cmdBuffer, submissionsAggregator.peekCommandBuffersList().peekHead()); EXPECT_EQ(cmdBuffer, submissionsAggregator.peekCommandBuffersList().peekTail()); EXPECT_EQ(cmdBuffer->surfaces.size(), 0u); //idlist holds the ownership } TEST(SubmissionsAggregator, givenTwoCommandBuffersWhenMergeResourcesIsCalledThenDuplicatesAreEliminated) { MockSubmissionAggregator submissionsAggregator; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer = new CommandBuffer(*device); CommandBuffer *cmdBuffer2 = new CommandBuffer(*device); MockGraphicsAllocation alloc1(nullptr, 1); MockGraphicsAllocation alloc2(nullptr, 2); MockGraphicsAllocation alloc3(nullptr, 3); MockGraphicsAllocation alloc4(nullptr, 4); MockGraphicsAllocation alloc5(nullptr, 5); MockGraphicsAllocation alloc6(nullptr, 6); cmdBuffer->surfaces.push_back(&alloc1); cmdBuffer->surfaces.push_back(&alloc6); cmdBuffer->surfaces.push_back(&alloc5); cmdBuffer->surfaces.push_back(&alloc3); cmdBuffer->surfaces.push_back(&alloc6); cmdBuffer2->surfaces.push_back(&alloc1); cmdBuffer2->surfaces.push_back(&alloc2); cmdBuffer2->surfaces.push_back(&alloc5); cmdBuffer2->surfaces.push_back(&alloc4); size_t totalUsedSize = 0; size_t totalMemoryBudget = -1; ResourcePackage resourcePackage; submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); EXPECT_EQ(0u, totalUsedSize); submissionsAggregator.recordCommandBuffer(cmdBuffer); submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); EXPECT_EQ(15u, totalUsedSize); totalUsedSize = 0; resourcePackage.clear(); submissionsAggregator.recordCommandBuffer(cmdBuffer2); EXPECT_EQ(cmdBuffer, submissionsAggregator.peekCommandBuffersList().peekHead()); EXPECT_EQ(cmdBuffer2, submissionsAggregator.peekCommandBuffersList().peekTail()); EXPECT_NE(submissionsAggregator.peekCommandBuffersList().peekHead(), submissionsAggregator.peekCommandBuffersList().peekTail()); EXPECT_EQ(5u, cmdBuffer->surfaces.size()); EXPECT_EQ(4u, cmdBuffer2->surfaces.size()); submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); //command buffer 2 is aggregated to command buffer 1 auto primaryBatchInstepctionId = submissionsAggregator.peekCommandBuffersList().peekHead()->inspectionId; EXPECT_EQ(primaryBatchInstepctionId, submissionsAggregator.peekCommandBuffersList().peekHead()->next->inspectionId); EXPECT_EQ(submissionsAggregator.peekCommandBuffersList().peekHead(), cmdBuffer); EXPECT_EQ(6u, resourcePackage.size()); EXPECT_EQ(21u, totalUsedSize); } TEST(SubmissionsAggregator, givenSubmissionAggregatorWhenThreeCommandBuffersAreSubmittedThenTheyAreAggregated) { MockSubmissionAggregator submissionsAggregator; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer = new CommandBuffer(*device); CommandBuffer *cmdBuffer2 = new CommandBuffer(*device); CommandBuffer *cmdBuffer3 = new CommandBuffer(*device); MockGraphicsAllocation alloc1(nullptr, 1); MockGraphicsAllocation alloc2(nullptr, 2); MockGraphicsAllocation alloc3(nullptr, 3); MockGraphicsAllocation alloc4(nullptr, 4); MockGraphicsAllocation alloc5(nullptr, 5); MockGraphicsAllocation alloc6(nullptr, 6); MockGraphicsAllocation alloc7(nullptr, 7); cmdBuffer->surfaces.push_back(&alloc5); cmdBuffer->surfaces.push_back(&alloc6); cmdBuffer->surfaces.push_back(&alloc5); cmdBuffer->surfaces.push_back(&alloc3); cmdBuffer->surfaces.push_back(&alloc6); cmdBuffer2->surfaces.push_back(&alloc1); cmdBuffer2->surfaces.push_back(&alloc2); cmdBuffer2->surfaces.push_back(&alloc5); cmdBuffer2->surfaces.push_back(&alloc4); cmdBuffer3->surfaces.push_back(&alloc7); cmdBuffer3->surfaces.push_back(&alloc5); size_t totalUsedSize = 0; size_t totalMemoryBudget = -1; ResourcePackage resourcePackage; submissionsAggregator.recordCommandBuffer(cmdBuffer); submissionsAggregator.recordCommandBuffer(cmdBuffer2); submissionsAggregator.recordCommandBuffer(cmdBuffer3); EXPECT_EQ(cmdBuffer, submissionsAggregator.peekCommandBuffersList().peekHead()); EXPECT_EQ(cmdBuffer3, submissionsAggregator.peekCommandBuffersList().peekTail()); EXPECT_EQ(cmdBuffer3->prev, cmdBuffer2); EXPECT_EQ(cmdBuffer2->next, cmdBuffer3); EXPECT_EQ(cmdBuffer->next, cmdBuffer2); EXPECT_EQ(cmdBuffer2->prev, cmdBuffer); EXPECT_NE(submissionsAggregator.peekCommandBuffersList().peekHead(), submissionsAggregator.peekCommandBuffersList().peekTail()); EXPECT_EQ(5u, cmdBuffer->surfaces.size()); EXPECT_EQ(4u, cmdBuffer2->surfaces.size()); EXPECT_EQ(2u, cmdBuffer3->surfaces.size()); submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); //command buffer 3 and 2 is aggregated to command buffer 1 auto primaryBatchInstepctionId = submissionsAggregator.peekCommandBuffersList().peekHead()->inspectionId; EXPECT_EQ(primaryBatchInstepctionId, submissionsAggregator.peekCommandBuffersList().peekHead()->next->inspectionId); EXPECT_EQ(primaryBatchInstepctionId, submissionsAggregator.peekCommandBuffersList().peekHead()->next->next->inspectionId); EXPECT_EQ(submissionsAggregator.peekCommandBuffersList().peekHead(), cmdBuffer); EXPECT_EQ(7u, resourcePackage.size()); EXPECT_EQ(28u, totalUsedSize); } TEST(SubmissionsAggregator, givenMultipleCommandBuffersWhenTheyAreAggreagateWithCertainMemoryLimitThenOnlyThatFitAreAggregated) { MockSubmissionAggregator submissionsAggregator; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer = new CommandBuffer(*device); CommandBuffer *cmdBuffer2 = new CommandBuffer(*device); CommandBuffer *cmdBuffer3 = new CommandBuffer(*device); MockGraphicsAllocation alloc1(nullptr, 1); MockGraphicsAllocation alloc2(nullptr, 2); MockGraphicsAllocation alloc3(nullptr, 3); MockGraphicsAllocation alloc4(nullptr, 4); MockGraphicsAllocation alloc5(nullptr, 5); MockGraphicsAllocation alloc6(nullptr, 6); MockGraphicsAllocation alloc7(nullptr, 7); //14 bytes consumed cmdBuffer->surfaces.push_back(&alloc5); cmdBuffer->surfaces.push_back(&alloc6); cmdBuffer->surfaces.push_back(&alloc5); cmdBuffer->surfaces.push_back(&alloc3); cmdBuffer->surfaces.push_back(&alloc6); //12 bytes total , only 7 new cmdBuffer2->surfaces.push_back(&alloc1); cmdBuffer2->surfaces.push_back(&alloc2); cmdBuffer2->surfaces.push_back(&alloc5); cmdBuffer2->surfaces.push_back(&alloc4); //12 bytes total, only 7 new cmdBuffer3->surfaces.push_back(&alloc7); cmdBuffer3->surfaces.push_back(&alloc5); size_t totalUsedSize = 0; size_t totalMemoryBudget = 22; ResourcePackage resourcePackage; submissionsAggregator.recordCommandBuffer(cmdBuffer); submissionsAggregator.recordCommandBuffer(cmdBuffer2); submissionsAggregator.recordCommandBuffer(cmdBuffer3); submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); //command buffer 2 is aggregated to command buffer 1, comand buffer 3 becomes command buffer 2 EXPECT_EQ(submissionsAggregator.peekCommandBuffersList().peekHead(), cmdBuffer); EXPECT_EQ(submissionsAggregator.peekCommandBuffersList().peekTail(), cmdBuffer3); EXPECT_EQ(cmdBuffer->next, cmdBuffer2); EXPECT_EQ(cmdBuffer3->prev, cmdBuffer2); EXPECT_EQ(cmdBuffer2->inspectionId, cmdBuffer->inspectionId); EXPECT_NE(cmdBuffer3->inspectionId, cmdBuffer2->inspectionId); EXPECT_EQ(0u, cmdBuffer3->inspectionId); EXPECT_EQ(6u, resourcePackage.size()); EXPECT_EQ(21u, totalUsedSize); } TEST(SubmissionsAggregator, givenMultipleCommandBuffersWhenAggregateIsCalledMultipleTimesThenFurtherInspectionAreHandledCorrectly) { MockSubmissionAggregator submissionsAggregator; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer = new CommandBuffer(*device); CommandBuffer *cmdBuffer2 = new CommandBuffer(*device); CommandBuffer *cmdBuffer3 = new CommandBuffer(*device); MockGraphicsAllocation alloc1(nullptr, 1); MockGraphicsAllocation alloc2(nullptr, 2); MockGraphicsAllocation alloc3(nullptr, 3); MockGraphicsAllocation alloc4(nullptr, 4); MockGraphicsAllocation alloc5(nullptr, 5); MockGraphicsAllocation alloc6(nullptr, 6); MockGraphicsAllocation alloc7(nullptr, 7); //14 bytes consumed cmdBuffer->surfaces.push_back(&alloc5); cmdBuffer->surfaces.push_back(&alloc6); cmdBuffer->surfaces.push_back(&alloc5); cmdBuffer->surfaces.push_back(&alloc3); cmdBuffer->surfaces.push_back(&alloc6); //12 bytes total , only 7 new cmdBuffer2->surfaces.push_back(&alloc1); cmdBuffer2->surfaces.push_back(&alloc2); cmdBuffer2->surfaces.push_back(&alloc5); cmdBuffer2->surfaces.push_back(&alloc4); //12 bytes total, only 7 new cmdBuffer3->surfaces.push_back(&alloc7); cmdBuffer3->surfaces.push_back(&alloc5); size_t totalUsedSize = 0; size_t totalMemoryBudget = 14; ResourcePackage resourcePackage; submissionsAggregator.recordCommandBuffer(cmdBuffer); submissionsAggregator.recordCommandBuffer(cmdBuffer2); submissionsAggregator.recordCommandBuffer(cmdBuffer3); submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); //command buffers not aggregated due to too low limit EXPECT_EQ(submissionsAggregator.peekCommandBuffersList().peekHead(), cmdBuffer); EXPECT_EQ(cmdBuffer->next, cmdBuffer2); EXPECT_EQ(submissionsAggregator.peekCommandBuffersList().peekTail(), cmdBuffer3); //budget is now larger we can fit everything totalMemoryBudget = 28; resourcePackage.clear(); totalUsedSize = 0; submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); //all cmd buffers are merged to 1 EXPECT_EQ(cmdBuffer3->inspectionId, cmdBuffer2->inspectionId); EXPECT_EQ(cmdBuffer->inspectionId, cmdBuffer2->inspectionId); EXPECT_EQ(submissionsAggregator.peekCommandBuffersList().peekTail(), cmdBuffer3); EXPECT_EQ(submissionsAggregator.peekCommandBuffersList().peekHead(), cmdBuffer); EXPECT_EQ(totalMemoryBudget, totalUsedSize); EXPECT_EQ(7u, resourcePackage.size()); } TEST(SubmissionsAggregator, givenMultipleCommandBuffersWithDifferentGraphicsAllocationsWhenAggregateIsCalledThenResourcePackContainSecondBatchBuffer) { MockSubmissionAggregator submissionsAggregator; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer = new CommandBuffer(*device); CommandBuffer *cmdBuffer2 = new CommandBuffer(*device); MockGraphicsAllocation alloc1(nullptr, 1); MockGraphicsAllocation alloc2(nullptr, 2); MockGraphicsAllocation alloc5(nullptr, 5); MockGraphicsAllocation alloc7(nullptr, 7); //5 bytes consumed cmdBuffer->surfaces.push_back(&alloc5); //10 bytes total cmdBuffer2->surfaces.push_back(&alloc1); cmdBuffer2->surfaces.push_back(&alloc2); cmdBuffer2->batchBuffer.commandBufferAllocation = &alloc7; size_t totalUsedSize = 0; size_t totalMemoryBudget = 200; ResourcePackage resourcePackage; submissionsAggregator.recordCommandBuffer(cmdBuffer); submissionsAggregator.recordCommandBuffer(cmdBuffer2); submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); EXPECT_EQ(4u, resourcePackage.size()); EXPECT_EQ(15u, totalUsedSize); } TEST(SubmissionsAggregator, givenTwoCommandBufferWhereSecondContainsFirstOnResourceListWhenItIsAggregatedThenResourcePackDoesntContainPrimaryBatch) { MockSubmissionAggregator submissionsAggregator; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer = new CommandBuffer(*device); CommandBuffer *cmdBuffer2 = new CommandBuffer(*device); MockGraphicsAllocation cmdBufferAllocation1(nullptr, 1); MockGraphicsAllocation cmdBufferAllocation2(nullptr, 2); MockGraphicsAllocation alloc5(nullptr, 5); MockGraphicsAllocation alloc7(nullptr, 7); cmdBuffer->batchBuffer.commandBufferAllocation = &cmdBufferAllocation1; cmdBuffer2->batchBuffer.commandBufferAllocation = &cmdBufferAllocation2; //cmdBuffer2 has commandBufferAllocation on the surface list cmdBuffer2->surfaces.push_back(&cmdBufferAllocation1); cmdBuffer2->surfaces.push_back(&alloc7); cmdBuffer->surfaces.push_back(&alloc5); size_t totalUsedSize = 0; size_t totalMemoryBudget = 200; ResourcePackage resourcePackage; submissionsAggregator.recordCommandBuffer(cmdBuffer); submissionsAggregator.recordCommandBuffer(cmdBuffer2); submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); //resource pack shuold have 3 surfaces EXPECT_EQ(3u, resourcePackage.size()); EXPECT_EQ(14u, totalUsedSize); } TEST(SubmissionsAggregator, givenTwoCommandBufferWhereSecondContainsTheFirstCommandBufferGraphicsAllocaitonWhenItIsAggregatedThenResourcePackDoesntContainPrimaryBatch) { MockSubmissionAggregator submissionsAggregator; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer = new CommandBuffer(*device); CommandBuffer *cmdBuffer2 = new CommandBuffer(*device); MockGraphicsAllocation cmdBufferAllocation1(nullptr, 1); MockGraphicsAllocation alloc5(nullptr, 5); MockGraphicsAllocation alloc7(nullptr, 7); cmdBuffer->batchBuffer.commandBufferAllocation = &cmdBufferAllocation1; cmdBuffer2->batchBuffer.commandBufferAllocation = &cmdBufferAllocation1; //cmdBuffer2 has commandBufferAllocation on the surface list cmdBuffer2->surfaces.push_back(&alloc7); cmdBuffer->surfaces.push_back(&alloc5); size_t totalUsedSize = 0; size_t totalMemoryBudget = 200; ResourcePackage resourcePackage; submissionsAggregator.recordCommandBuffer(cmdBuffer); submissionsAggregator.recordCommandBuffer(cmdBuffer2); submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); //resource pack shuold have 3 surfaces EXPECT_EQ(2u, resourcePackage.size()); EXPECT_EQ(12u, totalUsedSize); } TEST(SubmissionsAggregator, givenCommandBuffersRequiringDifferentCoherencySettingWhenAggregateIsCalledThenTheyAreNotAgggregated) { MockSubmissionAggregator submissionsAggregator; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer = new CommandBuffer(*device); CommandBuffer *cmdBuffer2 = new CommandBuffer(*device); MockGraphicsAllocation alloc1(nullptr, 1); MockGraphicsAllocation alloc7(nullptr, 7); cmdBuffer->batchBuffer.requiresCoherency = true; cmdBuffer2->batchBuffer.requiresCoherency = false; cmdBuffer->surfaces.push_back(&alloc1); cmdBuffer2->surfaces.push_back(&alloc7); submissionsAggregator.recordCommandBuffer(cmdBuffer); submissionsAggregator.recordCommandBuffer(cmdBuffer2); ResourcePackage resourcePackage; size_t totalUsedSize = 0; size_t totalMemoryBudget = 200; submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); EXPECT_EQ(1u, totalUsedSize); EXPECT_EQ(1u, resourcePackage.size()); EXPECT_NE(cmdBuffer->inspectionId, cmdBuffer2->inspectionId); EXPECT_EQ(1u, cmdBuffer->inspectionId); } TEST(SubmissionsAggregator, givenCommandBuffersRequiringDifferentThrottleSettingWhenAggregateIsCalledThenTheyAreNotAgggregated) { MockSubmissionAggregator submissionsAggregator; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer = new CommandBuffer(*device); CommandBuffer *cmdBuffer2 = new CommandBuffer(*device); MockGraphicsAllocation alloc1(nullptr, 1); MockGraphicsAllocation alloc7(nullptr, 7); cmdBuffer->batchBuffer.throttle = QueueThrottle::LOW; cmdBuffer2->batchBuffer.throttle = QueueThrottle::MEDIUM; cmdBuffer->surfaces.push_back(&alloc1); cmdBuffer2->surfaces.push_back(&alloc7); submissionsAggregator.recordCommandBuffer(cmdBuffer); submissionsAggregator.recordCommandBuffer(cmdBuffer2); ResourcePackage resourcePackage; size_t totalUsedSize = 0; size_t totalMemoryBudget = 200; submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); EXPECT_EQ(1u, totalUsedSize); EXPECT_EQ(1u, resourcePackage.size()); EXPECT_NE(cmdBuffer->inspectionId, cmdBuffer2->inspectionId); EXPECT_EQ(1u, cmdBuffer->inspectionId); } TEST(SubmissionsAggregator, givenCommandBuffersRequiringDifferentPrioritySettingWhenAggregateIsCalledThenTheyAreNotAgggregated) { MockSubmissionAggregator submissionsAggregator; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer = new CommandBuffer(*device); CommandBuffer *cmdBuffer2 = new CommandBuffer(*device); MockGraphicsAllocation alloc1(nullptr, 1); MockGraphicsAllocation alloc7(nullptr, 7); cmdBuffer->batchBuffer.low_priority = true; cmdBuffer2->batchBuffer.low_priority = false; cmdBuffer->surfaces.push_back(&alloc1); cmdBuffer2->surfaces.push_back(&alloc7); submissionsAggregator.recordCommandBuffer(cmdBuffer); submissionsAggregator.recordCommandBuffer(cmdBuffer2); ResourcePackage resourcePackage; size_t totalUsedSize = 0; size_t totalMemoryBudget = 200; submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); EXPECT_EQ(1u, totalUsedSize); EXPECT_EQ(1u, resourcePackage.size()); EXPECT_NE(cmdBuffer->inspectionId, cmdBuffer2->inspectionId); EXPECT_EQ(1u, cmdBuffer->inspectionId); } TEST(SubmissionsAggregator, WhenAggregatorIsCreatedThenFlushStampIsNotAllocated) { std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer cmdBuffer(*device); EXPECT_EQ(nullptr, cmdBuffer.flushStamp->getStampReference()); } TEST(SubmissionsAggregator, givenMultipleOsContextsWhenAggregatingGraphicsAllocationsThenUseInspectionIdCorrespondingWithOsContextId) { SubmissionAggregator submissionsAggregator; ResourcePackage resourcePackage; const auto totalMemoryBudget = 3u; size_t totalUsedSize = 0; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer0 = new CommandBuffer(*device); CommandBuffer *cmdBuffer1 = new CommandBuffer(*device); MockGraphicsAllocation alloc0(nullptr, 1); MockGraphicsAllocation alloc1(nullptr, 1); MockGraphicsAllocation alloc2(nullptr, 1); MockGraphicsAllocation alloc3(nullptr, 1); cmdBuffer0->surfaces.push_back(&alloc0); cmdBuffer0->surfaces.push_back(&alloc1); cmdBuffer1->surfaces.push_back(&alloc2); cmdBuffer1->surfaces.push_back(&alloc3); submissionsAggregator.recordCommandBuffer(cmdBuffer0); submissionsAggregator.recordCommandBuffer(cmdBuffer1); EXPECT_EQ(0u, alloc0.getInspectionId(1u)); EXPECT_EQ(0u, alloc1.getInspectionId(1u)); EXPECT_EQ(0u, alloc2.getInspectionId(1u)); EXPECT_EQ(0u, alloc3.getInspectionId(1u)); submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 1u); EXPECT_EQ(1u, alloc0.getInspectionId(1u)); EXPECT_EQ(1u, alloc1.getInspectionId(1u)); EXPECT_EQ(1u, alloc2.getInspectionId(1u)); EXPECT_EQ(1u, alloc3.getInspectionId(1u)); } TEST(SubmissionsAggregator, givenMultipleOsContextsWhenAggregatingGraphicsAllocationsThenDoNotUpdateInspectionIdsOfOtherContexts) { SubmissionAggregator submissionsAggregator; ResourcePackage resourcePackage; const auto totalMemoryBudget = 2u; size_t totalUsedSize = 0; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer0 = new CommandBuffer(*device); CommandBuffer *cmdBuffer1 = new CommandBuffer(*device); MockGraphicsAllocation alloc0(nullptr, 1); MockGraphicsAllocation alloc1(nullptr, 1); cmdBuffer0->surfaces.push_back(&alloc0); cmdBuffer0->surfaces.push_back(&alloc1); submissionsAggregator.recordCommandBuffer(cmdBuffer0); submissionsAggregator.recordCommandBuffer(cmdBuffer1); submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 1u); for (auto osContextId = 0u; osContextId < alloc1.usageInfos.size(); osContextId++) { if (osContextId != 1u) { EXPECT_EQ(0u, alloc0.getInspectionId(osContextId)); } } for (auto osContextId = 0u; osContextId < alloc0.usageInfos.size(); osContextId++) { if (osContextId != 1u) { EXPECT_EQ(0u, alloc0.getInspectionId(osContextId)); } } } TEST(SubmissionsAggregator, givenCommandBuffersRequiringDifferentSliceCountSettingWhenAggregateIsCalledThenTheyAreNotAgggregated) { MockSubmissionAggregator submissionsAggregator; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); CommandBuffer *cmdBuffer = new CommandBuffer(*device); CommandBuffer *cmdBuffer2 = new CommandBuffer(*device); MockGraphicsAllocation alloc1(nullptr, 1); MockGraphicsAllocation alloc7(nullptr, 7); cmdBuffer->batchBuffer.sliceCount = 1; cmdBuffer2->batchBuffer.sliceCount = 2; cmdBuffer->surfaces.push_back(&alloc1); cmdBuffer2->surfaces.push_back(&alloc7); submissionsAggregator.recordCommandBuffer(cmdBuffer); submissionsAggregator.recordCommandBuffer(cmdBuffer2); ResourcePackage resourcePackage; size_t totalUsedSize = 0; size_t totalMemoryBudget = 200; submissionsAggregator.aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, 0u); EXPECT_EQ(1u, totalUsedSize); EXPECT_EQ(1u, resourcePackage.size()); EXPECT_NE(cmdBuffer->inspectionId, cmdBuffer2->inspectionId); EXPECT_EQ(1u, cmdBuffer->inspectionId); } struct SubmissionsAggregatorTests : public ::testing::Test { void SetUp() override { device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context.reset(new MockContext(device.get())); } void overrideCsr(CommandStreamReceiver *newCsr) { device->resetCommandStreamReceiver(newCsr); newCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); } std::unique_ptr device; std::unique_ptr context; }; HWTEST_F(SubmissionsAggregatorTests, givenMultipleQueuesWhenCmdBuffersAreRecordedThenAssignFlushStampObjFromCmdQueue) { MockKernelWithInternals kernel(*device.get()); CommandQueueHw cmdQ1(context.get(), device.get(), 0, false); CommandQueueHw cmdQ2(context.get(), device.get(), 0, false); auto mockCsr = new MockCsrHw2(*device->executionEnvironment, device->getRootDeviceIndex()); size_t GWS = 1; overrideCsr(mockCsr); auto expectRefCounts = [&](int32_t cmdQRef1, int32_t cmdQRef2) { EXPECT_EQ(cmdQRef1, cmdQ1.flushStamp->getStampReference()->getRefInternalCount()); EXPECT_EQ(cmdQRef2, cmdQ2.flushStamp->getStampReference()->getRefInternalCount()); }; expectRefCounts(1, 1); cmdQ1.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); expectRefCounts(2, 1); cmdQ2.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); expectRefCounts(2, 2); { auto cmdBuffer = mockCsr->peekSubmissionAggregator()->peekCmdBufferList().removeFrontOne(); EXPECT_EQ(cmdQ1.flushStamp->getStampReference(), cmdBuffer->flushStamp->getStampReference()); } expectRefCounts(1, 2); { auto cmdBuffer = mockCsr->peekSubmissionAggregator()->peekCmdBufferList().removeFrontOne(); EXPECT_EQ(cmdQ2.flushStamp->getStampReference(), cmdBuffer->flushStamp->getStampReference()); } expectRefCounts(1, 1); } HWTEST_F(SubmissionsAggregatorTests, givenCmdQueueWhenCmdBufferWithEventIsRecordedThenAssignFlushStampObjForEveryone) { MockKernelWithInternals kernel(*device.get()); CommandQueueHw cmdQ1(context.get(), device.get(), 0, false); auto mockCsr = new MockCsrHw2(*device->executionEnvironment, device->getRootDeviceIndex()); size_t GWS = 1; overrideCsr(mockCsr); cl_event event1; EXPECT_EQ(1, cmdQ1.flushStamp->getStampReference()->getRefInternalCount()); cmdQ1.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, &event1); EXPECT_EQ(3, cmdQ1.flushStamp->getStampReference()->getRefInternalCount()); EXPECT_EQ(castToObject(event1)->flushStamp->getStampReference(), cmdQ1.flushStamp->getStampReference()); { auto cmdBuffer = mockCsr->peekSubmissionAggregator()->peekCmdBufferList().removeFrontOne(); EXPECT_EQ(cmdQ1.flushStamp->getStampReference(), cmdBuffer->flushStamp->getStampReference()); } EXPECT_EQ(2, cmdQ1.flushStamp->getStampReference()->getRefInternalCount()); castToObject(event1)->release(); EXPECT_EQ(1, cmdQ1.flushStamp->getStampReference()->getRefInternalCount()); } HWTEST_F(SubmissionsAggregatorTests, givenMultipleCmdBuffersWhenFlushThenUpdateAllRelatedFlushStamps) { MockKernelWithInternals kernel(*device.get()); CommandQueueHw cmdQ1(context.get(), device.get(), 0, false); CommandQueueHw cmdQ2(context.get(), device.get(), 0, false); auto mockCsr = new MockCsrHw2(*device->executionEnvironment, device->getRootDeviceIndex()); size_t GWS = 1; overrideCsr(mockCsr); mockCsr->taskCount = 5; mockCsr->flushStamp->setStamp(5); cl_event event1, event2; cmdQ1.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, &event1); cmdQ2.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, &event2); mockCsr->flushBatchedSubmissions(); auto expectedFlushStamp = mockCsr->flushStamp->peekStamp(); EXPECT_EQ(expectedFlushStamp, cmdQ1.flushStamp->peekStamp()); EXPECT_EQ(expectedFlushStamp, cmdQ2.flushStamp->peekStamp()); EXPECT_EQ(expectedFlushStamp, castToObject(event1)->flushStamp->peekStamp()); EXPECT_EQ(expectedFlushStamp, castToObject(event2)->flushStamp->peekStamp()); castToObject(event1)->release(); castToObject(event2)->release(); } HWTEST_F(SubmissionsAggregatorTests, givenMultipleCmdBuffersWhenNotAggregatedDuringFlushThenUpdateAllRelatedFlushStamps) { MockKernelWithInternals kernel(*device.get()); CommandQueueHw cmdQ1(context.get(), device.get(), 0, false); CommandQueueHw cmdQ2(context.get(), device.get(), 0, false); auto mockCsr = new MockCsrHw2(*device->executionEnvironment, device->getRootDeviceIndex()); size_t GWS = 1; overrideCsr(mockCsr); mockCsr->taskCount = 5; mockCsr->flushStamp->setStamp(5); cl_event event1, event2; cmdQ1.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, &event1); cmdQ2.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, &event2); // dont aggregate mockCsr->peekSubmissionAggregator()->peekCmdBufferList().peekHead()->batchBuffer.low_priority = true; mockCsr->peekSubmissionAggregator()->peekCmdBufferList().peekTail()->batchBuffer.low_priority = false; mockCsr->flushBatchedSubmissions(); EXPECT_EQ(6u, cmdQ1.flushStamp->peekStamp()); EXPECT_EQ(6u, castToObject(event1)->flushStamp->peekStamp()); EXPECT_EQ(7u, cmdQ2.flushStamp->peekStamp()); EXPECT_EQ(7u, castToObject(event2)->flushStamp->peekStamp()); castToObject(event1)->release(); castToObject(event2)->release(); } compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/tbx_command_stream_fixture.cpp000066400000000000000000000020611363734646600327420ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/command_stream/tbx_command_stream_fixture.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/gen_common/gen_cmd_parse.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "gtest/gtest.h" namespace NEO { void TbxCommandStreamFixture::SetUp(MockDevice *pDevice) { // Create our TBX command stream receiver based on HW type pCommandStreamReceiver = TbxCommandStreamReceiver::create("", false, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); ASSERT_NE(nullptr, pCommandStreamReceiver); memoryManager = new OsAgnosticMemoryManager(*pDevice->executionEnvironment); pDevice->resetCommandStreamReceiver(pCommandStreamReceiver); } void TbxCommandStreamFixture::TearDown() { delete memoryManager; CommandStreamFixture::TearDown(); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/tbx_command_stream_fixture.h000066400000000000000000000011501363734646600324050ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h" #include "opencl/test/unit_test/command_stream/command_stream_fixture.h" #include namespace NEO { class CommandStreamReceiver; class MockDevice; class TbxCommandStreamFixture : public CommandStreamFixture { public: virtual void SetUp(MockDevice *pDevice); void TearDown(void) override; CommandStreamReceiver *pCommandStreamReceiver = nullptr; MemoryManager *memoryManager = nullptr; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/tbx_command_stream_tests.cpp000066400000000000000000001443431363734646600324300ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/command_stream/aub_command_stream_receiver.h" #include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.h" #include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h" #include "opencl/source/helpers/hardware_context_controller.h" #include "opencl/source/mem_obj/mem_obj.h" #include "opencl/source/memory_manager/memory_banks.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/mock_aub_center_fixture.h" #include "opencl/test/unit_test/gen_common/gen_cmd_parse.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_aub_center.h" #include "opencl/test/unit_test/mocks/mock_aub_manager.h" #include "opencl/test/unit_test/mocks/mock_aub_subcapture_manager.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_os_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_tbx_csr.h" #include "test.h" #include "tbx_command_stream_fixture.h" #include using namespace NEO; namespace NEO { extern TbxCommandStreamReceiverCreateFunc tbxCommandStreamReceiverFactory[IGFX_MAX_CORE]; } // namespace NEO namespace Os { extern const char *tbxLibName; } struct TbxFixture : public TbxCommandStreamFixture, public DeviceFixture, public MockAubCenterFixture { using TbxCommandStreamFixture::SetUp; TbxFixture() : MockAubCenterFixture(CommandStreamReceiverType::CSR_TBX) {} void SetUp() { DeviceFixture::SetUp(); setMockAubCenter(*pDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]); TbxCommandStreamFixture::SetUp(pDevice); MockAubCenterFixture::SetUp(); } void TearDown() override { MockAubCenterFixture::TearDown(); TbxCommandStreamFixture::TearDown(); DeviceFixture::TearDown(); } }; using TbxCommandStreamTests = Test; using TbxCommandSteamSimpleTest = TbxCommandStreamTests; template struct MockTbxCsrToTestDumpTbxNonWritable : public TbxCommandStreamReceiverHw { using TbxCommandStreamReceiverHw::TbxCommandStreamReceiverHw; using TbxCommandStreamReceiverHw::dumpTbxNonWritable; bool writeMemory(GraphicsAllocation &gfxAllocation) override { return true; } }; TEST_F(TbxCommandStreamTests, DISABLED_makeResident) { uint8_t buffer[0x10000]; size_t size = sizeof(buffer); GraphicsAllocation *graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pCommandStreamReceiver->getRootDeviceIndex(), false, size}, buffer); pCommandStreamReceiver->makeResident(*graphicsAllocation); pCommandStreamReceiver->makeNonResident(*graphicsAllocation); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(TbxCommandStreamTests, DISABLED_makeResidentOnZeroSizedBufferShouldDoNothing) { MockGraphicsAllocation graphicsAllocation(nullptr, 0); pCommandStreamReceiver->makeResident(graphicsAllocation); pCommandStreamReceiver->makeNonResident(graphicsAllocation); } TEST_F(TbxCommandStreamTests, DISABLED_flush) { char buffer[4096]; memset(buffer, 0, 4096); LinearStream cs(buffer, 4096); size_t startOffset = 0; BatchBuffer batchBuffer{cs.getGraphicsAllocation(), startOffset, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; pCommandStreamReceiver->flush(batchBuffer, pCommandStreamReceiver->getResidencyAllocations()); } HWTEST_F(TbxCommandStreamTests, DISABLED_flushUntilTailRingBufferLargerThanSizeRingBuffer) { char buffer[4096]; memset(buffer, 0, 4096); LinearStream cs(buffer, 4096); size_t startOffset = 0; TbxCommandStreamReceiverHw *tbxCsr = (TbxCommandStreamReceiverHw *)pCommandStreamReceiver; BatchBuffer batchBuffer{cs.getGraphicsAllocation(), startOffset, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; pCommandStreamReceiver->flush(batchBuffer, pCommandStreamReceiver->getResidencyAllocations()); auto size = tbxCsr->engineInfo.sizeRingBuffer; tbxCsr->engineInfo.sizeRingBuffer = 64; pCommandStreamReceiver->flush(batchBuffer, pCommandStreamReceiver->getResidencyAllocations()); pCommandStreamReceiver->flush(batchBuffer, pCommandStreamReceiver->getResidencyAllocations()); pCommandStreamReceiver->flush(batchBuffer, pCommandStreamReceiver->getResidencyAllocations()); tbxCsr->engineInfo.sizeRingBuffer = size; } HWTEST_F(TbxCommandStreamTests, DISABLED_getCsTraits) { TbxCommandStreamReceiverHw *tbxCsr = (TbxCommandStreamReceiverHw *)pCommandStreamReceiver; tbxCsr->getCsTraits(aub_stream::ENGINE_RCS); tbxCsr->getCsTraits(aub_stream::ENGINE_BCS); tbxCsr->getCsTraits(aub_stream::ENGINE_VCS); tbxCsr->getCsTraits(aub_stream::ENGINE_VECS); } TEST(TbxCommandStreamReceiverTest, givenNullFactoryEntryWhenTbxCsrIsCreatedThenNullptrIsReturned) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); GFXCORE_FAMILY family = executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo()->platform.eRenderCoreFamily; VariableBackup tbxCsrFactoryBackup(&tbxCommandStreamReceiverFactory[family]); tbxCommandStreamReceiverFactory[family] = nullptr; CommandStreamReceiver *csr = TbxCommandStreamReceiver::create("", false, *executionEnvironment, 0); EXPECT_EQ(nullptr, csr); } TEST(TbxCommandStreamReceiverTest, givenTbxCommandStreamReceiverWhenItIsCreatedWithWrongGfxCoreFamilyThenNullPointerShouldBeReturned) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1u); auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo(); hwInfo->platform.eRenderCoreFamily = GFXCORE_FAMILY_FORCE_ULONG; // wrong gfx core family CommandStreamReceiver *csr = TbxCommandStreamReceiver::create("", false, *executionEnvironment, 0); EXPECT_EQ(nullptr, csr); } TEST(TbxCommandStreamReceiverTest, givenTbxCommandStreamReceiverWhenTypeIsCheckedThenTbxCsrIsReturned) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); std::unique_ptr csr(TbxCommandStreamReceiver::create("", false, *executionEnvironment, 0)); EXPECT_NE(nullptr, csr); EXPECT_EQ(CommandStreamReceiverType::CSR_TBX, csr->getType()); } HWTEST_F(TbxCommandStreamTests, givenTbxCommandStreamReceiverWhenMakeResidentIsCalledForGraphicsAllocationThenItShouldPushAllocationForResidencyToCsr) { TbxCommandStreamReceiverHw *tbxCsr = (TbxCommandStreamReceiverHw *)pCommandStreamReceiver; MemoryManager *memoryManager = tbxCsr->getMemoryManager(); ASSERT_NE(nullptr, memoryManager); auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pCommandStreamReceiver->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_EQ(0u, tbxCsr->getResidencyAllocations().size()); tbxCsr->makeResident(*graphicsAllocation); EXPECT_EQ(1u, tbxCsr->getResidencyAllocations().size()); memoryManager->freeGraphicsMemory(graphicsAllocation); } HWTEST_F(TbxCommandStreamTests, givenTbxCommandStreamReceiverWhenItIsCreatedWithAubDumpAndAubCaptureFileNameHasBeenSpecifiedThenItShouldBeUsedToOpenTheFileWithAubCapture) { DebugManagerStateRestore stateRestore; DebugManager.flags.AUBDumpCaptureFileName.set("aubcapture_file_name.aub"); using TbxCsrWithAubDump = CommandStreamReceiverWithAUBDump>; std::unique_ptr tbxCsrWithAubDump(static_cast( TbxCommandStreamReceiverHw::create("aubfile", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()))); EXPECT_TRUE(tbxCsrWithAubDump->aubManager->isOpen()); EXPECT_STREQ("aubcapture_file_name.aub", tbxCsrWithAubDump->aubManager->getFileName().c_str()); } HWTEST_F(TbxCommandStreamTests, givenTbxCommandStreamReceiverWhenMakeResidentHasAlreadyBeenCalledForGraphicsAllocationThenItShouldNotPushAllocationForResidencyAgainToCsr) { TbxCommandStreamReceiverHw *tbxCsr = (TbxCommandStreamReceiverHw *)pCommandStreamReceiver; MemoryManager *memoryManager = tbxCsr->getMemoryManager(); ASSERT_NE(nullptr, memoryManager); auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pCommandStreamReceiver->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_EQ(0u, tbxCsr->getResidencyAllocations().size()); tbxCsr->makeResident(*graphicsAllocation); EXPECT_EQ(1u, tbxCsr->getResidencyAllocations().size()); tbxCsr->makeResident(*graphicsAllocation); EXPECT_EQ(1u, tbxCsr->getResidencyAllocations().size()); memoryManager->freeGraphicsMemory(graphicsAllocation); } HWTEST_F(TbxCommandStreamTests, givenTbxCommandStreamReceiverWhenWriteMemoryIsCalledForGraphicsAllocationWithNonZeroSizeThenItShouldReturnTrue) { TbxCommandStreamReceiverHw *tbxCsr = (TbxCommandStreamReceiverHw *)pCommandStreamReceiver; MemoryManager *memoryManager = tbxCsr->getMemoryManager(); ASSERT_NE(nullptr, memoryManager); auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pCommandStreamReceiver->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_TRUE(tbxCsr->writeMemory(*graphicsAllocation)); memoryManager->freeGraphicsMemory(graphicsAllocation); } HWTEST_F(TbxCommandStreamTests, givenTbxCommandStreamReceiverWhenWriteMemoryIsCalledWithGraphicsAllocationThatIsOnlyOneTimeWriteableThenGraphicsAllocationIsUpdated) { TbxCommandStreamReceiverHw *tbxCsr = (TbxCommandStreamReceiverHw *)pCommandStreamReceiver; MemoryManager *memoryManager = tbxCsr->getMemoryManager(); ASSERT_NE(nullptr, memoryManager); auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties({pCommandStreamReceiver->getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_TRUE(tbxCsr->isTbxWritable(*graphicsAllocation)); EXPECT_TRUE(tbxCsr->writeMemory(*graphicsAllocation)); EXPECT_FALSE(tbxCsr->isTbxWritable(*graphicsAllocation)); memoryManager->freeGraphicsMemory(graphicsAllocation); } HWTEST_F(TbxCommandStreamTests, givenTbxCommandStreamReceiverWhenWriteMemoryIsCalledWithGraphicsAllocationThatIsOnlyOneTimeWriteableButAlreadyWrittenThenGraphicsAllocationIsNotUpdated) { TbxCommandStreamReceiverHw *tbxCsr = (TbxCommandStreamReceiverHw *)pCommandStreamReceiver; MemoryManager *memoryManager = tbxCsr->getMemoryManager(); ASSERT_NE(nullptr, memoryManager); auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties({pCommandStreamReceiver->getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}); ASSERT_NE(nullptr, graphicsAllocation); tbxCsr->setTbxWritable(false, *graphicsAllocation); EXPECT_FALSE(tbxCsr->writeMemory(*graphicsAllocation)); EXPECT_FALSE(tbxCsr->isTbxWritable(*graphicsAllocation)); memoryManager->freeGraphicsMemory(graphicsAllocation); } HWTEST_F(TbxCommandStreamTests, givenTbxCommandStreamReceiverWhenWriteMemoryIsCalledForGraphicsAllocationWithZeroSizeThenItShouldReturnFalse) { TbxCommandStreamReceiverHw *tbxCsr = (TbxCommandStreamReceiverHw *)pCommandStreamReceiver; MockGraphicsAllocation graphicsAllocation((void *)0x1234, 0); EXPECT_FALSE(tbxCsr->writeMemory(graphicsAllocation)); } HWTEST_F(TbxCommandStreamTests, givenTbxCommandStreamReceiverWhenProcessResidencyIsCalledWithoutAllocationsForResidencyThenItShouldProcessAllocationsFromMemoryManager) { TbxCommandStreamReceiverHw *tbxCsr = (TbxCommandStreamReceiverHw *)pCommandStreamReceiver; MemoryManager *memoryManager = tbxCsr->getMemoryManager(); ASSERT_NE(nullptr, memoryManager); auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pCommandStreamReceiver->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_FALSE(graphicsAllocation->isResident(tbxCsr->getOsContext().getContextId())); ResidencyContainer allocationsForResidency = {graphicsAllocation}; tbxCsr->processResidency(allocationsForResidency, 0u); EXPECT_TRUE(graphicsAllocation->isResident(tbxCsr->getOsContext().getContextId())); EXPECT_EQ(tbxCsr->peekTaskCount() + 1, graphicsAllocation->getResidencyTaskCount(tbxCsr->getOsContext().getContextId())); memoryManager->freeGraphicsMemory(graphicsAllocation); } HWTEST_F(TbxCommandStreamTests, givenTbxCommandStreamReceiverWhenProcessResidencyIsCalledWithAllocationsForResidencyThenItShouldProcessGivenAllocations) { TbxCommandStreamReceiverHw *tbxCsr = (TbxCommandStreamReceiverHw *)pCommandStreamReceiver; MemoryManager *memoryManager = tbxCsr->getMemoryManager(); ASSERT_NE(nullptr, memoryManager); auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pCommandStreamReceiver->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_FALSE(graphicsAllocation->isResident(tbxCsr->getOsContext().getContextId())); ResidencyContainer allocationsForResidency = {graphicsAllocation}; tbxCsr->processResidency(allocationsForResidency, 0u); EXPECT_TRUE(graphicsAllocation->isResident(tbxCsr->getOsContext().getContextId())); EXPECT_EQ(tbxCsr->peekTaskCount() + 1, graphicsAllocation->getResidencyTaskCount(tbxCsr->getOsContext().getContextId())); memoryManager->freeGraphicsMemory(graphicsAllocation); } HWTEST_F(TbxCommandStreamTests, givenTbxCommandStreamReceiverWhenFlushIsCalledThenItShouldProcessAllocationsForResidency) { TbxCommandStreamReceiverHw *tbxCsr = (TbxCommandStreamReceiverHw *)pCommandStreamReceiver; MemoryManager *memoryManager = tbxCsr->getMemoryManager(); ASSERT_NE(nullptr, memoryManager); auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pCommandStreamReceiver->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, graphicsAllocation); GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pCommandStreamReceiver->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency = {graphicsAllocation}; EXPECT_FALSE(graphicsAllocation->isResident(tbxCsr->getOsContext().getContextId())); tbxCsr->flush(batchBuffer, allocationsForResidency); EXPECT_TRUE(graphicsAllocation->isResident(tbxCsr->getOsContext().getContextId())); EXPECT_EQ(tbxCsr->peekTaskCount() + 1, graphicsAllocation->getResidencyTaskCount(tbxCsr->getOsContext().getContextId())); memoryManager->freeGraphicsMemory(commandBuffer); memoryManager->freeGraphicsMemory(graphicsAllocation); } HWTEST_F(TbxCommandStreamTests, givenTbxCommandStreamReceiverWhenFlushIsCalledThenItMakesCommandBufferAllocationsProperlyResident) { TbxCommandStreamReceiverHw *tbxCsr = (TbxCommandStreamReceiverHw *)pCommandStreamReceiver; MemoryManager *memoryManager = tbxCsr->getMemoryManager(); ASSERT_NE(nullptr, memoryManager); GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pCommandStreamReceiver->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency = {}; EXPECT_FALSE(commandBuffer->isResident(tbxCsr->getOsContext().getContextId())); tbxCsr->flush(batchBuffer, allocationsForResidency); EXPECT_TRUE(commandBuffer->isResident(tbxCsr->getOsContext().getContextId())); EXPECT_EQ(tbxCsr->peekTaskCount() + 1, commandBuffer->getTaskCount(tbxCsr->getOsContext().getContextId())); EXPECT_EQ(tbxCsr->peekTaskCount() + 1, commandBuffer->getResidencyTaskCount(tbxCsr->getOsContext().getContextId())); ASSERT_EQ(1u, allocationsForResidency.size()); EXPECT_EQ(commandBuffer, allocationsForResidency[0]); memoryManager->freeGraphicsMemory(commandBuffer); } HWTEST_F(TbxCommandStreamTests, givenNoDbgDeviceIdFlagWhenTbxCsrIsCreatedThenUseDefaultDeviceId) { const HardwareInfo &hwInfo = *defaultHwInfo; TbxCommandStreamReceiverHw *tbxCsr = reinterpret_cast *>(pCommandStreamReceiver); EXPECT_EQ(hwInfo.capabilityTable.aubDeviceId, tbxCsr->aubDeviceId); } HWTEST_F(TbxCommandStreamTests, givenDbgDeviceIdFlagIsSetWhenTbxCsrIsCreatedThenUseDebugDeviceId) { DebugManagerStateRestore stateRestore; DebugManager.flags.OverrideAubDeviceId.set(9); //this is Hsw, not used std::unique_ptr> tbxCsr(reinterpret_cast *>(TbxCommandStreamReceiver::create("", false, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()))); EXPECT_EQ(9u, tbxCsr->aubDeviceId); } HWTEST_F(TbxCommandSteamSimpleTest, givenTbxCsrWhenCallingMakeSurfacePackNonResidentThenOnlyResidentAllocationsAddedAllocationsForDownload) { MockTbxCsr tbxCsr{*pDevice->executionEnvironment}; MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); tbxCsr.setupContext(osContext); EXPECT_EQ(0u, tbxCsr.allocationsForDownload.size()); MockGraphicsAllocation allocation1, allocation2, allocation3; allocation1.usageInfos[0].residencyTaskCount = 1; allocation3.usageInfos[0].residencyTaskCount = 1; ASSERT_TRUE(allocation1.isResident(0u)); ASSERT_FALSE(allocation2.isResident(0u)); ASSERT_TRUE(allocation3.isResident(0u)); ResidencyContainer allocationsForResidency{&allocation1, &allocation2, &allocation3}; tbxCsr.makeSurfacePackNonResident(allocationsForResidency); std::set expectedAllocationsForDownload = {&allocation1, &allocation3}; EXPECT_EQ(expectedAllocationsForDownload, tbxCsr.allocationsForDownload); } HWTEST_F(TbxCommandSteamSimpleTest, givenTbxCsrWhenCallingWaitForTaskCountWithKmdNotifyFallbackThenTagAllocationAndScheduledAllocationsAreDownloaded) { struct MockTbxCsr : TbxCommandStreamReceiverHw { using CommandStreamReceiver::latestFlushedTaskCount; using TbxCommandStreamReceiverHw::TbxCommandStreamReceiverHw; void downloadAllocation(GraphicsAllocation &gfxAllocation) override { *reinterpret_cast(CommandStreamReceiver::getTagAllocation()->getUnderlyingBuffer()) = this->latestFlushedTaskCount; downloadedAllocations.insert(&gfxAllocation); } std::set downloadedAllocations; }; MockTbxCsr tbxCsr{*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()}; MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); uint32_t tag = 0u; tbxCsr.setupContext(osContext); tbxCsr.setTagAllocation(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), false, sizeof(tag)}, &tag)); tbxCsr.latestFlushedTaskCount = 1u; MockGraphicsAllocation allocation1, allocation2, allocation3; allocation1.usageInfos[0].residencyTaskCount = 1; allocation2.usageInfos[0].residencyTaskCount = 1; allocation3.usageInfos[0].residencyTaskCount = 1; ASSERT_TRUE(allocation1.isResident(0u)); ASSERT_TRUE(allocation2.isResident(0u)); ASSERT_TRUE(allocation3.isResident(0u)); tbxCsr.allocationsForDownload = {&allocation1, &allocation2, &allocation3}; tbxCsr.waitForTaskCountWithKmdNotifyFallback(0u, 0u, false, false); std::set expectedDownloadedAllocations = {tbxCsr.getTagAllocation(), &allocation1, &allocation2, &allocation3}; EXPECT_EQ(expectedDownloadedAllocations, tbxCsr.downloadedAllocations); EXPECT_EQ(0u, tbxCsr.allocationsForDownload.size()); } HWTEST_F(TbxCommandSteamSimpleTest, whenTbxCommandStreamReceiverIsCreatedThenPPGTTAndGGTTCreatedHavePhysicalAddressAllocatorSet) { MockTbxCsr tbxCsr(*pDevice->executionEnvironment); uintptr_t address = 0x20000; auto physicalAddress = tbxCsr.ppgtt->map(address, MemoryConstants::pageSize, 0, MemoryBanks::MainBank); EXPECT_NE(0u, physicalAddress); physicalAddress = tbxCsr.ggtt->map(address, MemoryConstants::pageSize, 0, MemoryBanks::MainBank); EXPECT_NE(0u, physicalAddress); } HWTEST_F(TbxCommandSteamSimpleTest, givenTbxCommandStreamReceiverWhenPhysicalAddressAllocatorIsCreatedThenItIsNotNull) { MockTbxCsr tbxCsr(*pDevice->executionEnvironment); std::unique_ptr allocator(tbxCsr.createPhysicalAddressAllocator(&hardwareInfo)); ASSERT_NE(nullptr, allocator); } HWTEST_F(TbxCommandStreamTests, givenTbxCommandStreamReceiverWhenItIsCreatedWithUseAubStreamFalseThenDontInitializeAubManager) { DebugManagerStateRestore dbgRestore; DebugManager.flags.UseAubStream.set(false); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get(), false, 1); executionEnvironment.initializeMemoryManager(); auto tbxCsr = std::make_unique>(executionEnvironment, 0); EXPECT_EQ(nullptr, executionEnvironment.rootDeviceEnvironments[0]->aubCenter->getAubManager()); } HWTEST_F(TbxCommandStreamTests, givenTbxCommandStreamReceiverWhenFlushIsCalledThenItShouldCallTheExpectedHwContextFunctions) { MockTbxCsr tbxCsr(*pDevice->executionEnvironment); MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); tbxCsr.setupContext(osContext); auto mockHardwareContext = static_cast(tbxCsr.hardwareContextController->hardwareContexts[0].get()); auto commandBuffer = pDevice->executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 1, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; MockGraphicsAllocation allocation(reinterpret_cast(0x1000), 0x1000); ResidencyContainer allocationsForResidency = {&allocation}; tbxCsr.flush(batchBuffer, allocationsForResidency); EXPECT_TRUE(mockHardwareContext->initializeCalled); EXPECT_FALSE(mockHardwareContext->writeAndSubmitCalled); EXPECT_TRUE(mockHardwareContext->submitCalled); EXPECT_FALSE(mockHardwareContext->pollForCompletionCalled); EXPECT_TRUE(tbxCsr.writeMemoryWithAubManagerCalled); pDevice->executionEnvironment->memoryManager->freeGraphicsMemory(commandBuffer); } HWTEST_F(TbxCommandStreamTests, givenTbxCommandStreamReceiverInBatchedModeWhenFlushIsCalledThenItShouldMakeCommandBufferResident) { DebugManagerStateRestore dbgRestore; DebugManager.flags.CsrDispatchMode.set(static_cast(DispatchMode::BatchedDispatch)); MockTbxCsr tbxCsr(*pDevice->executionEnvironment); MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); tbxCsr.setupContext(osContext); auto commandBuffer = pDevice->executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 1, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency; tbxCsr.flush(batchBuffer, allocationsForResidency); EXPECT_TRUE(tbxCsr.writeMemoryWithAubManagerCalled); EXPECT_EQ(1u, batchBuffer.commandBufferAllocation->getResidencyTaskCount(tbxCsr.getOsContext().getContextId())); pDevice->executionEnvironment->memoryManager->freeGraphicsMemory(commandBuffer); } HWTEST_F(TbxCommandStreamTests, givenTbxCommandStreamReceiverWhenFlushIsCalledWithZeroSizedBufferThenSubmitIsNotCalledOnHwContext) { MockTbxCsr tbxCsr(*pDevice->executionEnvironment); MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); tbxCsr.setupContext(osContext); auto mockHardwareContext = static_cast(tbxCsr.hardwareContextController->hardwareContexts[0].get()); auto commandBuffer = pDevice->executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency; tbxCsr.flush(batchBuffer, allocationsForResidency); EXPECT_FALSE(mockHardwareContext->submitCalled); pDevice->executionEnvironment->memoryManager->freeGraphicsMemory(commandBuffer); } HWTEST_F(TbxCommandStreamTests, givenTbxCommandStreamReceiverWhenMakeResidentIsCalledThenItShouldCallTheExpectedHwContextFunctions) { MockTbxCsr tbxCsr(*pDevice->executionEnvironment); MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); tbxCsr.setupContext(osContext); MockGraphicsAllocation allocation(reinterpret_cast(0x1000), 0x1000); ResidencyContainer allocationsForResidency = {&allocation}; tbxCsr.processResidency(allocationsForResidency, 0u); EXPECT_TRUE(tbxCsr.writeMemoryWithAubManagerCalled); } HWTEST_F(TbxCommandStreamTests, givenTbxCommandStreamReceiverWhenDownloadAllocationIsCalledThenItShouldCallTheExpectedHwContextFunctions) { MockTbxCsr tbxCsr(*pDevice->executionEnvironment); MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); tbxCsr.setupContext(osContext); auto mockHardwareContext = static_cast(tbxCsr.hardwareContextController->hardwareContexts[0].get()); MockGraphicsAllocation allocation(reinterpret_cast(0x1000), 0x1000); tbxCsr.downloadAllocation(allocation); EXPECT_TRUE(mockHardwareContext->readMemoryCalled); } HWTEST_F(TbxCommandStreamTests, givenTbxCsrWhenHardwareContextIsCreatedThenTbxStreamInCsrIsNotInitialized) { MockAubManager *mockManager = new MockAubManager(); MockAubCenter *mockAubCenter = new MockAubCenter(&pDevice->getHardwareInfo(), false, "", CommandStreamReceiverType::CSR_TBX); mockAubCenter->aubManager = std::unique_ptr(mockManager); pDevice->executionEnvironment->rootDeviceEnvironments[0]->aubCenter = std::unique_ptr(mockAubCenter); auto tbxCsr = std::unique_ptr>(reinterpret_cast *>( TbxCommandStreamReceiverHw::create("", false, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()))); EXPECT_FALSE(tbxCsr->streamInitialized); } HWTEST_F(TbxCommandStreamTests, givenTbxCsrWhenOsContextIsSetThenCreateHardwareContext) { auto hwInfo = pDevice->getHardwareInfo(); MockOsContext osContext(0, 1, HwHelper::get(hwInfo.platform.eRenderCoreFamily).getGpgpuEngineInstances(hwInfo)[0], PreemptionMode::Disabled, false, false, false); std::string fileName = ""; MockAubManager *mockManager = new MockAubManager(); MockAubCenter *mockAubCenter = new MockAubCenter(&hwInfo, false, fileName, CommandStreamReceiverType::CSR_TBX); mockAubCenter->aubManager = std::unique_ptr(mockManager); pDevice->executionEnvironment->rootDeviceEnvironments[0]->aubCenter = std::unique_ptr(mockAubCenter); std::unique_ptr> tbxCsr(reinterpret_cast *>(TbxCommandStreamReceiver::create(fileName, false, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()))); EXPECT_EQ(nullptr, tbxCsr->hardwareContextController.get()); tbxCsr->setupContext(osContext); EXPECT_NE(nullptr, tbxCsr->hardwareContextController.get()); } HWTEST_F(TbxCommandStreamTests, givenTbxCsrWhenPollForCompletionImplIsCalledThenSimulatedCsrMethodIsCalled) { std::unique_ptr> tbxCsr(reinterpret_cast *>(TbxCommandStreamReceiver::create("", false, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()))); tbxCsr->pollForCompletionImpl(); } HWTEST_F(TbxCommandStreamTests, givenTbxCsrWhenItIsQueriedForPreferredTagPoolSizeThenOneIsReturned) { std::unique_ptr> tbxCsr(reinterpret_cast *>(TbxCommandStreamReceiver::create("", false, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()))); EXPECT_EQ(1u, tbxCsr->getPreferredTagPoolSize()); } HWTEST_F(TbxCommandStreamTests, givenTbxCsrWhenCreatedWithAubDumpThenFileNameIsExtendedWithSystemInfo) { MockExecutionEnvironment executionEnvironment; executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); executionEnvironment.initializeMemoryManager(); auto rootDeviceEnvironment = static_cast(executionEnvironment.rootDeviceEnvironments[0].get()); setMockAubCenter(*rootDeviceEnvironment, CommandStreamReceiverType::CSR_TBX); auto fullName = AUBCommandStreamReceiver::createFullFilePath(*defaultHwInfo, "aubfile"); std::unique_ptr> tbxCsr(reinterpret_cast *>(TbxCommandStreamReceiver::create("aubfile", true, executionEnvironment, 0))); EXPECT_STREQ(fullName.c_str(), rootDeviceEnvironment->aubFileNameReceived.c_str()); } HWTEST_F(TbxCommandStreamTests, givenTbxCsrWhenCreatedWithAubDumpThenOpenIsCalledOnAubManagerToOpenFileStream) { MockExecutionEnvironment executionEnvironment; executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); executionEnvironment.initializeMemoryManager(); std::unique_ptr> tbxCsrWithAubDump(reinterpret_cast *>( TbxCommandStreamReceiver::create("aubfile", true, executionEnvironment, 0))); EXPECT_TRUE(tbxCsrWithAubDump->aubManager->isOpen()); } using SimulatedCsrTest = ::testing::Test; HWTEST_F(SimulatedCsrTest, givenTbxCsrTypeWhenCreateCommandStreamReceiverThenProperAubCenterIsInitalized) { uint32_t expectedRootDeviceIndex = 10; MockExecutionEnvironment executionEnvironment; executionEnvironment.initializeMemoryManager(); executionEnvironment.prepareRootDeviceEnvironments(expectedRootDeviceIndex + 2); auto rootDeviceEnvironment = new MockRootDeviceEnvironment(executionEnvironment); executionEnvironment.rootDeviceEnvironments[expectedRootDeviceIndex].reset(rootDeviceEnvironment); rootDeviceEnvironment->setHwInfo(defaultHwInfo.get()); EXPECT_EQ(nullptr, executionEnvironment.rootDeviceEnvironments[expectedRootDeviceIndex]->aubCenter.get()); EXPECT_FALSE(rootDeviceEnvironment->initAubCenterCalled); auto csr = std::make_unique>(executionEnvironment, expectedRootDeviceIndex); EXPECT_NE(nullptr, executionEnvironment.rootDeviceEnvironments[expectedRootDeviceIndex]->aubCenter.get()); EXPECT_TRUE(rootDeviceEnvironment->initAubCenterCalled); } HWTEST_F(TbxCommandStreamTests, givenTbxCsrWhenCreatedWithAubDumpInSubCaptureModeThenCreateSubCaptureManagerAndGenerateSubCaptureFileName) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AUBDumpSubCaptureMode.set(static_cast(AubSubCaptureManager::SubCaptureMode::Filter)); MockExecutionEnvironment executionEnvironment; executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); executionEnvironment.initializeMemoryManager(); std::unique_ptr> tbxCsrWithAubDump(static_cast *>( TbxCommandStreamReceiver::create("aubfile", true, executionEnvironment, 0))); EXPECT_TRUE(tbxCsrWithAubDump->aubManager->isOpen()); auto subCaptureManager = tbxCsrWithAubDump->subCaptureManager.get(); EXPECT_NE(nullptr, subCaptureManager); MultiDispatchInfo dispatchInfo; EXPECT_STREQ(subCaptureManager->getSubCaptureFileName(dispatchInfo).c_str(), tbxCsrWithAubDump->aubManager->getFileName().c_str()); } HWTEST_F(TbxCommandStreamTests, givenTbxCsrWhenCreatedWithAubDumpSeveralTimesThenOpenIsCalledOnAubManagerOnceOnly) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get(), true, 1); executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); executionEnvironment.initializeMemoryManager(); auto tbxCsrWithAubDump1 = std::unique_ptr>(reinterpret_cast *>( TbxCommandStreamReceiverHw::create("aubfile", true, executionEnvironment, 0))); auto tbxCsrWithAubDump2 = std::unique_ptr>(reinterpret_cast *>( TbxCommandStreamReceiverHw::create("aubfile", true, executionEnvironment, 0))); auto mockManager = reinterpret_cast(executionEnvironment.rootDeviceEnvironments[0]->aubCenter->getAubManager()); EXPECT_EQ(1u, mockManager->openCalledCnt); } HWTEST_F(TbxCommandStreamTests, givenTbxCsrInSubCaptureModeWhenFlushIsCalledAndSubCaptureIsDisabledThenPauseShouldBeTurnedOn) { MockTbxCsr tbxCsr{*pDevice->executionEnvironment}; MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); tbxCsr.setupContext(osContext); AubSubCaptureCommon aubSubCaptureCommon; auto aubSubCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); tbxCsr.subCaptureManager = std::unique_ptr(aubSubCaptureManagerMock); EXPECT_FALSE(tbxCsr.subCaptureManager->isSubCaptureEnabled()); auto commandBuffer = pDevice->executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency = {}; tbxCsr.flush(batchBuffer, allocationsForResidency); auto mockAubManager = reinterpret_cast(pDevice->executionEnvironment->rootDeviceEnvironments[0]->aubCenter->getAubManager()); EXPECT_TRUE(mockAubManager->isPaused); pDevice->executionEnvironment->memoryManager->freeGraphicsMemory(commandBuffer); } HWTEST_F(TbxCommandStreamTests, givenTbxCsrInSubCaptureModeWhenFlushIsCalledAndSubCaptureIsEnabledThenPauseShouldBeTurnedOff) { MockTbxCsr tbxCsr{*pDevice->executionEnvironment}; MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); tbxCsr.setupContext(osContext); AubSubCaptureCommon aubSubCaptureCommon; auto aubSubCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); aubSubCaptureManagerMock->setSubCaptureIsActive(true); tbxCsr.subCaptureManager = std::unique_ptr(aubSubCaptureManagerMock); EXPECT_TRUE(tbxCsr.subCaptureManager->isSubCaptureEnabled()); auto commandBuffer = pDevice->executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency = {}; tbxCsr.flush(batchBuffer, allocationsForResidency); auto mockAubManager = reinterpret_cast(pDevice->executionEnvironment->rootDeviceEnvironments[0]->aubCenter->getAubManager()); EXPECT_FALSE(mockAubManager->isPaused); pDevice->executionEnvironment->memoryManager->freeGraphicsMemory(commandBuffer); } HWTEST_F(TbxCommandStreamTests, givenTbxCsrInSubCaptureModeWhenFlushIsCalledAndSubCaptureIsEnabledThenCallPollForCompletionAndDisableSubCapture) { MockTbxCsr tbxCsr{*pDevice->executionEnvironment}; MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); tbxCsr.setupContext(osContext); AubSubCaptureCommon aubSubCaptureCommon; auto aubSubCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); aubSubCaptureManagerMock->setSubCaptureIsActive(true); tbxCsr.subCaptureManager = std::unique_ptr(aubSubCaptureManagerMock); EXPECT_TRUE(tbxCsr.subCaptureManager->isSubCaptureEnabled()); auto commandBuffer = pDevice->executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency = {}; tbxCsr.flush(batchBuffer, allocationsForResidency); EXPECT_TRUE(tbxCsr.pollForCompletionCalled); EXPECT_FALSE(tbxCsr.subCaptureManager->isSubCaptureEnabled()); pDevice->executionEnvironment->memoryManager->freeGraphicsMemory(commandBuffer); } HWTEST_F(TbxCommandStreamTests, givenTbxCsrInSubCaptureModeWhenFlushIsCalledAndSubCaptureGetsActivatedThenCallSubmitBatchBufferWithOverrideRingBufferSetToTrue) { MockTbxCsr tbxCsr{*pDevice->executionEnvironment}; MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); tbxCsr.setupContext(osContext); AubSubCaptureCommon aubSubCaptureCommon; auto aubSubCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); aubSubCaptureManagerMock->setSubCaptureIsActive(true); tbxCsr.subCaptureManager = std::unique_ptr(aubSubCaptureManagerMock); EXPECT_FALSE(aubSubCaptureManagerMock->wasSubCaptureActiveInPreviousEnqueue()); EXPECT_TRUE(aubSubCaptureManagerMock->isSubCaptureActive()); auto commandBuffer = pDevice->executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency = {}; tbxCsr.flush(batchBuffer, allocationsForResidency); EXPECT_TRUE(tbxCsr.submitBatchBufferCalled); EXPECT_TRUE(tbxCsr.overrideRingHeadPassed); pDevice->executionEnvironment->memoryManager->freeGraphicsMemory(commandBuffer); } HWTEST_F(TbxCommandStreamTests, givenTbxCsrInSubCaptureModeWhenFlushIsCalledAndSubCaptureRemainsActiveThenCallSubmitBatchBufferWithOverrideRingBufferSetToTrue) { MockTbxCsr tbxCsr{*pDevice->executionEnvironment}; MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); tbxCsr.setupContext(osContext); AubSubCaptureCommon aubSubCaptureCommon; auto aubSubCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); aubSubCaptureManagerMock->setSubCaptureWasActiveInPreviousEnqueue(true); aubSubCaptureManagerMock->setSubCaptureIsActive(true); tbxCsr.subCaptureManager = std::unique_ptr(aubSubCaptureManagerMock); EXPECT_TRUE(aubSubCaptureManagerMock->wasSubCaptureActiveInPreviousEnqueue()); EXPECT_TRUE(aubSubCaptureManagerMock->isSubCaptureActive()); auto commandBuffer = pDevice->executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; ResidencyContainer allocationsForResidency = {}; tbxCsr.flush(batchBuffer, allocationsForResidency); EXPECT_TRUE(tbxCsr.submitBatchBufferCalled); EXPECT_FALSE(tbxCsr.overrideRingHeadPassed); pDevice->executionEnvironment->memoryManager->freeGraphicsMemory(commandBuffer); } HWTEST_F(TbxCommandStreamTests, givenTbxCsrWhenProcessResidencyIsCalledWithDumpTbxNonWritableFlagThenAllocationsForResidencyShouldBeMadeTbxWritable) { std::unique_ptr memoryManager(nullptr); std::unique_ptr> tbxCsr(new MockTbxCsrToTestDumpTbxNonWritable(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); memoryManager.reset(new OsAgnosticMemoryManager(*pDevice->executionEnvironment)); tbxCsr->setupContext(*pDevice->getDefaultEngine().osContext); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}); tbxCsr->setTbxWritable(false, *gfxAllocation); tbxCsr->dumpTbxNonWritable = true; ResidencyContainer allocationsForResidency = {gfxAllocation}; tbxCsr->processResidency(allocationsForResidency, 0u); EXPECT_TRUE(tbxCsr->isTbxWritable(*gfxAllocation)); EXPECT_FALSE(tbxCsr->dumpTbxNonWritable); memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(TbxCommandStreamTests, givenTbxCsrWhenProcessResidencyIsCalledWithoutDumpTbxWritableFlagThenAllocationsForResidencyShouldBeKeptNonTbxWritable) { std::unique_ptr memoryManager(nullptr); std::unique_ptr> tbxCsr(new MockTbxCsrToTestDumpTbxNonWritable(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); memoryManager.reset(new OsAgnosticMemoryManager(*pDevice->executionEnvironment)); tbxCsr->setupContext(*pDevice->getDefaultEngine().osContext); auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}); tbxCsr->setTbxWritable(false, *gfxAllocation); EXPECT_FALSE(tbxCsr->dumpTbxNonWritable); ResidencyContainer allocationsForResidency = {gfxAllocation}; tbxCsr->processResidency(allocationsForResidency, 0u); EXPECT_FALSE(tbxCsr->isTbxWritable(*gfxAllocation)); EXPECT_FALSE(tbxCsr->dumpTbxNonWritable); memoryManager->freeGraphicsMemory(gfxAllocation); } HWTEST_F(TbxCommandStreamTests, givenTbxCsrInSubCaptureModeWhenCheckAndActivateAubSubCaptureIsCalledAndSubCaptureIsInactiveThenDontForceDumpingAllocationsTbxNonWritable) { MockTbxCsr tbxCsr{*pDevice->executionEnvironment}; MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); tbxCsr.setupContext(osContext); AubSubCaptureCommon aubSubCaptureCommon; auto aubSubCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); aubSubCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; tbxCsr.subCaptureManager = std::unique_ptr(aubSubCaptureManagerMock); MockKernelWithInternals kernelInternals(*pClDevice); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(kernel); EXPECT_FALSE(tbxCsr.dumpTbxNonWritable); auto status = tbxCsr.checkAndActivateAubSubCapture(multiDispatchInfo); EXPECT_FALSE(status.isActive); EXPECT_FALSE(status.wasActiveInPreviousEnqueue); EXPECT_FALSE(tbxCsr.dumpTbxNonWritable); } HWTEST_F(TbxCommandStreamTests, givenTbxCsrInSubCaptureModeWhenCheckAndActivateAubSubCaptureIsCalledAndSubCaptureGetsActivatedThenForceDumpingAllocationsTbxNonWritable) { MockTbxCsr tbxCsr{*pDevice->executionEnvironment}; MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); tbxCsr.setupContext(osContext); AubSubCaptureCommon aubSubCaptureCommon; auto aubSubCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); aubSubCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; aubSubCaptureManagerMock->setSubCaptureIsActive(false); aubSubCaptureManagerMock->setSubCaptureToggleActive(true); tbxCsr.subCaptureManager = std::unique_ptr(aubSubCaptureManagerMock); MockKernelWithInternals kernelInternals(*pClDevice); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(kernel); EXPECT_FALSE(tbxCsr.dumpTbxNonWritable); auto status = tbxCsr.checkAndActivateAubSubCapture(multiDispatchInfo); EXPECT_TRUE(status.isActive); EXPECT_FALSE(status.wasActiveInPreviousEnqueue); EXPECT_TRUE(tbxCsr.dumpTbxNonWritable); } HWTEST_F(TbxCommandStreamTests, givenTbxCsrInSubCaptureModeWhenCheckAndActivateAubSubCaptureIsCalledAndSubCaptureRemainsActivatedThenDontForceDumpingAllocationsTbxNonWritable) { MockTbxCsr tbxCsr{*pDevice->executionEnvironment}; MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); tbxCsr.setupContext(osContext); AubSubCaptureCommon aubSubCaptureCommon; auto aubSubCaptureManagerMock = new AubSubCaptureManagerMock("", aubSubCaptureCommon); aubSubCaptureCommon.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle; aubSubCaptureManagerMock->setSubCaptureIsActive(true); aubSubCaptureManagerMock->setSubCaptureToggleActive(true); tbxCsr.subCaptureManager = std::unique_ptr(aubSubCaptureManagerMock); MockKernelWithInternals kernelInternals(*pClDevice); Kernel *kernel = kernelInternals.mockKernel; MockMultiDispatchInfo multiDispatchInfo(kernel); EXPECT_FALSE(tbxCsr.dumpTbxNonWritable); auto status = tbxCsr.checkAndActivateAubSubCapture(multiDispatchInfo); EXPECT_TRUE(status.isActive); EXPECT_TRUE(status.wasActiveInPreviousEnqueue); EXPECT_FALSE(tbxCsr.dumpTbxNonWritable); } HWTEST_F(TbxCommandStreamTests, givenTbxCsrInNonSubCaptureModeWhenCheckAndActivateAubSubCaptureIsCalledThenReturnStatusInactive) { MockTbxCsr tbxCsr{*pDevice->executionEnvironment}; MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); tbxCsr.setupContext(osContext); MultiDispatchInfo dispatchInfo; auto status = tbxCsr.checkAndActivateAubSubCapture(dispatchInfo); EXPECT_FALSE(status.isActive); EXPECT_FALSE(status.wasActiveInPreviousEnqueue); } HWTEST_F(TbxCommandStreamTests, givenTbxCsrWhenDispatchBlitEnqueueThenProcessCorrectly) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableBlitterOperationsSupport.set(1); DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(1); MockContext context(pClDevice); MockTbxCsr tbxCsr0{*pDevice->executionEnvironment}; tbxCsr0.initializeTagAllocation(); MockTbxCsr tbxCsr1{*pDevice->executionEnvironment}; tbxCsr1.initializeTagAllocation(); MockOsContext osContext0(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); tbxCsr0.setupContext(osContext0); EngineControl engineControl0{&tbxCsr0, &osContext0}; MockOsContext osContext1(1, 1, aub_stream::ENGINE_BCS, PreemptionMode::Disabled, false, false, false); tbxCsr1.setupContext(osContext0); EngineControl engineControl1{&tbxCsr1, &osContext1}; MockCommandQueueHw cmdQ(&context, pClDevice, nullptr); cmdQ.gpgpuEngine = &engineControl0; cmdQ.bcsEngine = &engineControl1; cl_int error = CL_SUCCESS; std::unique_ptr buffer(Buffer::create(&context, 0, 1, nullptr, error)); uint32_t hostPtr = 0; error = cmdQ.enqueueWriteBuffer(buffer.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, error); } compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/tbx_stream_tests.cpp000066400000000000000000000016201363734646600307200ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h" #include "opencl/test/unit_test/mocks/mock_tbx_sockets.h" #include "opencl/test/unit_test/mocks/mock_tbx_stream.h" #include "gtest/gtest.h" using namespace NEO; TEST(TbxStreamTests, givenTbxStreamWhenWriteMemoryIsCalledThenTypeIsZero) { std::unique_ptr mockTbxStream(new MockTbxStream()); MockTbxStream *mockTbxStreamPtr = static_cast(mockTbxStream.get()); MockTbxSockets *mockTbxSocket = new MockTbxSockets(); mockTbxStreamPtr->socket = mockTbxSocket; mockTbxStream->writeMemory(0, nullptr, 0, 0, 0); EXPECT_EQ(0u, mockTbxSocket->typeCapturedFromWriteMemory); mockTbxStream->writePTE(0, 0, 0); EXPECT_EQ(0u, mockTbxSocket->typeCapturedFromWriteMemory); } thread_arbitration_policy_helper.cpp000066400000000000000000000015301363734646600340320ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/thread_arbitration_policy.h" #include "opencl/extensions/public/cl_ext_private.h" #include namespace NEO { uint32_t getNewKernelArbitrationPolicy(uint32_t policy) { if (policy == CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL) { return ThreadArbitrationPolicy::RoundRobin; } else if (policy == CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_OLDEST_FIRST_INTEL) { return ThreadArbitrationPolicy::AgeBased; } else if (policy == CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_AFTER_DEPENDENCY_ROUND_ROBIN_INTEL) { return ThreadArbitrationPolicy::RoundRobinAfterDependency; } else { return ThreadArbitrationPolicy::NotPresent; } } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/thread_arbitration_policy_helper.h000066400000000000000000000003051363734646600335550ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include namespace NEO { uint32_t getNewKernelArbitrationPolicy(uint32_t policy); } // namespace NEOthread_arbitration_policy_helper_tests.cpp000066400000000000000000000024301363734646600352540ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/command_stream/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_stream/thread_arbitration_policy.h" #include "gtest/gtest.h" #include "public/cl_ext_private.h" namespace NEO { TEST(ThreadArbitrationPolicy, givenClKrenelExecThreadArbitrationPolicyWhenGetNewKernelArbitrationPolicyIsCalledThenExpectedThreadArbitrationPolicyIsReturned) { uint32_t retVal = ThreadArbitrationPolicy::getNewKernelArbitrationPolicy(CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL); EXPECT_EQ(retVal, static_cast(ThreadArbitrationPolicy::RoundRobin)); retVal = ThreadArbitrationPolicy::getNewKernelArbitrationPolicy(CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_OLDEST_FIRST_INTEL); EXPECT_EQ(retVal, static_cast(ThreadArbitrationPolicy::AgeBased)); retVal = ThreadArbitrationPolicy::getNewKernelArbitrationPolicy(CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_AFTER_DEPENDENCY_ROUND_ROBIN_INTEL); EXPECT_EQ(retVal, static_cast(ThreadArbitrationPolicy::RoundRobinAfterDependency)); uint32_t randomValue = -1; retVal = ThreadArbitrationPolicy::getNewKernelArbitrationPolicy(randomValue); EXPECT_EQ(retVal, static_cast(ThreadArbitrationPolicy::NotPresent)); } } // namespace NEOcompute-runtime-20.13.16352/opencl/test/unit_test/compiler_interface/000077500000000000000000000000001363734646600254645ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/compiler_interface/CMakeLists.txt000066400000000000000000000012421363734646600302230ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_compiler_interface ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/default_cl_cache_config_tests.cpp ) get_property(NEO_CORE_COMPILER_INTERFACE_TESTS GLOBAL PROPERTY NEO_CORE_COMPILER_INTERFACE_TESTS) get_property(NEO_DEVICE_BINARY_FORMAT_TESTS GLOBAL PROPERTY NEO_DEVICE_BINARY_FORMAT_TESTS) list(APPEND IGDRCL_SRCS_tests_compiler_interface ${NEO_CORE_COMPILER_INTERFACE_TESTS}) list(APPEND IGDRCL_SRCS_tests_compiler_interface ${NEO_DEVICE_BINARY_FORMAT_TESTS}) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_compiler_interface}) default_cl_cache_config_tests.cpp000066400000000000000000000007551363734646600341140ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/compiler_interface/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/compiler_interface/default_cl_cache_config.h" #include "test.h" TEST(CompilerCache, GivenDefaultClCacheConfigThenValuesAreProperlyPopulated) { auto cacheConfig = NEO::getDefaultClCompilerCacheConfig(); EXPECT_STREQ("cl_cache", cacheConfig.cacheDir.c_str()); EXPECT_STREQ(".cl_cache", cacheConfig.cacheFileExtension.c_str()); EXPECT_TRUE(cacheConfig.enabled); } compute-runtime-20.13.16352/opencl/test/unit_test/context/000077500000000000000000000000001363734646600233165ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/context/CMakeLists.txt000066400000000000000000000013201363734646600260520ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_context ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/context_get_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/context_multi_device_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/context_negative_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/context_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/driver_diagnostics_enqueue_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/driver_diagnostics_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/driver_diagnostics_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/get_supported_image_formats_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_context}) add_subdirectories() compute-runtime-20.13.16352/opencl/test/unit_test/context/context_get_info_tests.cpp000066400000000000000000000071371363734646600306120ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; struct ContextGetInfoTest : public PlatformFixture, public ContextFixture, public ::testing::Test { using ContextFixture::SetUp; using PlatformFixture::SetUp; ContextGetInfoTest() { } void SetUp() override { PlatformFixture::SetUp(); ContextFixture::SetUp(num_devices, devices); } void TearDown() override { ContextFixture::TearDown(); PlatformFixture::TearDown(); } cl_int retVal = CL_SUCCESS; }; TEST_F(ContextGetInfoTest, GivenInvalidParamNameWhenGettingInfoThenInvalidValueErrorIsReturned) { size_t retSize = 0; retVal = pContext->getInfo( 0, 0, nullptr, &retSize); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(ContextGetInfoTest, GivenNumDevicesParamNameWhenGettingInfoThenNumberOfDevicesIsReturned) { cl_uint numDevices = 0; size_t retSize = 0; retVal = pContext->getInfo( CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDevices, nullptr); EXPECT_EQ(this->num_devices, numDevices); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pContext->getInfo( CL_CONTEXT_DEVICES, 0, nullptr, &retSize); // make sure we get the same answer through a different query EXPECT_EQ(numDevices * sizeof(cl_device_id), retSize); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(ContextGetInfoTest, GivenContextDevicesParamNameWhenGettingInfoThenCorrectDeviceIdsAreReturned) { auto devicesReturned = new cl_device_id[this->num_devices]; retVal = pContext->getInfo( CL_CONTEXT_DEVICES, this->num_devices * sizeof(cl_device_id), devicesReturned, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); for (size_t deviceOrdinal = 0; deviceOrdinal < this->num_devices; ++deviceOrdinal) { EXPECT_EQ(devices[deviceOrdinal], devicesReturned[deviceOrdinal]); } delete[] devicesReturned; } TEST_F(ContextGetInfoTest, ContextProperties) { cl_context_properties props; size_t size; auto retVal = pContext->getInfo( CL_CONTEXT_PROPERTIES, sizeof(props), &props, &size); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, size); } TEST_F(ContextGetInfoTest, givenMultipleContextPropertiesWhenTheyAreBeingQueriedThenGetInfoReturnProperProperties) { cl_context_properties properties[] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, (cl_context_properties)0xff, 0}; constexpr auto propertiesCount = sizeof(properties) / sizeof(cl_context_properties); auto retValue = CL_SUCCESS; auto contextWithProperties = clCreateContext(properties, 1, &this->devices[0], nullptr, nullptr, &retValue); EXPECT_EQ(CL_SUCCESS, retValue); auto pContextWithProperties = castToObject(contextWithProperties); size_t size = 6; cl_context_properties obtainedProperties[propertiesCount] = {0}; auto retVal = pContextWithProperties->getInfo( CL_CONTEXT_PROPERTIES, sizeof(properties), obtainedProperties, &size); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(properties), size); for (auto property = 0u; property < propertiesCount; property++) { EXPECT_EQ(obtainedProperties[property], properties[property]); } clReleaseContext(contextWithProperties); } compute-runtime-20.13.16352/opencl/test/unit_test/context/context_multi_device_tests.cpp000066400000000000000000000062161363734646600314660ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/array_count.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" #include "gtest/gtest.h" using namespace NEO; TEST(ContextMultiDevice, GivenSingleDeviceWhenCreatingContextThenContextIsCreated) { cl_device_id devices[] = { new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}}; auto numDevices = static_cast(arrayCount(devices)); auto retVal = CL_SUCCESS; auto pContext = Context::create(nullptr, ClDeviceVector(devices, numDevices), nullptr, nullptr, retVal); ASSERT_NE(nullptr, pContext); auto numDevicesReturned = pContext->getNumDevices(); EXPECT_EQ(numDevices, numDevicesReturned); ClDeviceVector ctxDevices; for (size_t deviceOrdinal = 0; deviceOrdinal < numDevicesReturned; ++deviceOrdinal) { ctxDevices.push_back(pContext->getDevice(deviceOrdinal)); } delete pContext; for (size_t deviceOrdinal = 0; deviceOrdinal < numDevicesReturned; ++deviceOrdinal) { auto pDevice = (ClDevice *)devices[deviceOrdinal]; ASSERT_NE(nullptr, pDevice); EXPECT_EQ(pDevice, ctxDevices[deviceOrdinal]); delete pDevice; } } TEST(ContextMultiDevice, GivenMultipleDevicesWhenCreatingContextThenContextIsCreatedForEachDevice) { cl_device_id devices[] = { new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}, new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}, new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}, new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}, new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}, new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}, new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}, new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}}; auto numDevices = static_cast(arrayCount(devices)); ASSERT_EQ(8u, numDevices); auto retVal = CL_SUCCESS; auto pContext = Context::create(nullptr, ClDeviceVector(devices, numDevices), nullptr, nullptr, retVal); ASSERT_NE(nullptr, pContext); auto numDevicesReturned = pContext->getNumDevices(); EXPECT_EQ(numDevices, numDevicesReturned); ClDeviceVector ctxDevices; for (size_t deviceOrdinal = 0; deviceOrdinal < numDevicesReturned; ++deviceOrdinal) { ctxDevices.push_back(pContext->getDevice(deviceOrdinal)); } delete pContext; for (size_t deviceOrdinal = 0; deviceOrdinal < numDevicesReturned; ++deviceOrdinal) { auto pDevice = (ClDevice *)devices[deviceOrdinal]; ASSERT_NE(nullptr, pDevice); EXPECT_EQ(pDevice, ctxDevices[deviceOrdinal]); delete pDevice; } } compute-runtime-20.13.16352/opencl/test/unit_test/context/context_negative_tests.cpp000066400000000000000000000052701363734646600306160ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "test.h" #include "CL/cl_gl.h" #include "gtest/gtest.h" #include using namespace NEO; //////////////////////////////////////////////////////////////////////////////// typedef Test ContextFailureInjection; TEST_F(ContextFailureInjection, GivenFailedAllocationInjectionWhenCreatingContextThenOutOfHostMemoryErrorIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); cl_device_id deviceID = device.get(); InjectedFunction method = [deviceID](size_t failureIndex) { auto retVal = CL_INVALID_VALUE; auto context = Context::create(nullptr, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal); if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); } else { EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal) << "for allocation " << failureIndex; EXPECT_EQ(nullptr, context); } delete context; context = nullptr; }; injectFailures(method); } TEST(InvalidPropertyContextTest, GivenInvalidPropertiesWhenContextIsCreatedThenErrorIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); cl_device_id deviceID = device.get(); auto pPlatform = NEO::platform(); cl_platform_id pid[1]; pid[0] = pPlatform; cl_context_properties invalidProperties[5] = {CL_CONTEXT_PLATFORM, (cl_context_properties)pid[0], CL_CGL_SHAREGROUP_KHR, 0x10000, 0}; cl_context_properties invalidProperties2[5] = {CL_CONTEXT_PLATFORM, (cl_context_properties)pid[0], (cl_context_properties)0xdeadbeef, 0x10000, 0}; cl_int retVal = 0; auto context = Context::create(invalidProperties, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal); EXPECT_EQ(CL_INVALID_PROPERTY, retVal); EXPECT_EQ(nullptr, context); delete context; context = Context::create(invalidProperties2, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal); EXPECT_EQ(CL_INVALID_PROPERTY, retVal); EXPECT_EQ(nullptr, context); delete context; } compute-runtime-20.13.16352/opencl/test/unit_test/context/context_tests.cpp000066400000000000000000000373501363734646600267400ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.inl" #include "opencl/source/device_queue/device_queue.h" #include "opencl/source/sharings/sharing.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_deferred_deleter.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gtest/gtest.h" using namespace NEO; class WhiteBoxContext : public Context { public: MemoryManager *getMM() { return this->memoryManager; } const cl_context_properties *getProperties() const { return properties; } size_t getNumProperties() const { return numProperties; } WhiteBoxContext(void(CL_CALLBACK *pfnNotify)(const char *, const void *, size_t, void *), void *userData) : Context(pfnNotify, userData){}; }; struct ContextTest : public PlatformFixture, public ::testing::Test { using PlatformFixture::SetUp; void SetUp() override { PlatformFixture::SetUp(); cl_platform_id platform = pPlatform; properties = new cl_context_properties[3]; properties[0] = CL_CONTEXT_PLATFORM; properties[1] = (cl_context_properties)platform; properties[2] = 0; context = Context::create(properties, ClDeviceVector(devices, num_devices), nullptr, nullptr, retVal); ASSERT_NE(nullptr, context); } void TearDown() override { delete[] properties; delete context; PlatformFixture::TearDown(); } cl_int retVal = CL_SUCCESS; WhiteBoxContext *context = nullptr; cl_context_properties *properties = nullptr; }; TEST_F(ContextTest, WhenCreatingContextThenDevicesAllDevicesExist) { for (size_t deviceOrdinal = 0; deviceOrdinal < context->getNumDevices(); ++deviceOrdinal) { EXPECT_NE(nullptr, context->getDevice(deviceOrdinal)); } } TEST_F(ContextTest, WhenCreatingContextThenMemoryManagerForContextIsSet) { EXPECT_NE(nullptr, context->getMM()); } TEST_F(ContextTest, WhenCreatingContextThenPropertiesAreCopied) { auto contextProperties = context->getProperties(); EXPECT_NE(properties, contextProperties); } TEST_F(ContextTest, WhenCreatingContextThenPropertiesAreValid) { auto contextProperties = context->getProperties(); ASSERT_NE(nullptr, contextProperties); EXPECT_EQ(3u, context->getNumProperties()); while (*contextProperties) { switch (*contextProperties) { case CL_CONTEXT_PLATFORM: ++contextProperties; break; default: ASSERT_FALSE(!"Unknown context property"); break; } ++contextProperties; } } TEST_F(ContextTest, WhenCreatingContextThenSpecialQueueIsAvailable) { auto specialQ = context->getSpecialQueue(); EXPECT_NE(specialQ, nullptr); } TEST_F(ContextTest, WhenSettingSpecialQueueThenQueueIsAvailable) { MockContext context((ClDevice *)devices[0], true); auto specialQ = context.getSpecialQueue(); EXPECT_EQ(specialQ, nullptr); auto cmdQ = new MockCommandQueue(&context, (ClDevice *)devices[0], 0); context.setSpecialQueue(cmdQ); specialQ = context.getSpecialQueue(); EXPECT_NE(specialQ, nullptr); } TEST_F(ContextTest, WhenSettingDefaultQueueThenQueueIsAvailable) { EXPECT_EQ(nullptr, context->getDefaultDeviceQueue()); auto dq = new DeviceQueue(); context->setDefaultDeviceQueue(dq); EXPECT_EQ(dq, context->getDefaultDeviceQueue()); delete dq; } TEST_F(ContextTest, givenCmdQueueWithoutContextWhenBeingCreatedNextDeletedThenContextRefCountShouldNeitherBeIncrementedNorNextDecremented) { MockContext context((ClDevice *)devices[0]); EXPECT_EQ(1, context.getRefInternalCount()); auto cmdQ1 = new MockCommandQueue(); EXPECT_EQ(1, context.getRefInternalCount()); delete cmdQ1; EXPECT_EQ(1, context.getRefInternalCount()); auto cmdQ2 = new MockCommandQueue(nullptr, (ClDevice *)devices[0], 0); EXPECT_EQ(1, context.getRefInternalCount()); delete cmdQ2; EXPECT_EQ(1, context.getRefInternalCount()); } TEST_F(ContextTest, givenDeviceQueueWithoutContextWhenBeingCreatedNextDeletedThenContextRefCountShouldNeitherBeIncrementedNorNextDecremented) { MockContext context((ClDevice *)devices[0]); EXPECT_EQ(1, context.getRefInternalCount()); auto cmdQ1 = new DeviceQueue(); EXPECT_EQ(1, context.getRefInternalCount()); delete cmdQ1; EXPECT_EQ(1, context.getRefInternalCount()); cl_queue_properties properties = 0; auto cmdQ2 = new DeviceQueue(nullptr, (ClDevice *)devices[0], properties); EXPECT_EQ(1, context.getRefInternalCount()); delete cmdQ2; EXPECT_EQ(1, context.getRefInternalCount()); } TEST_F(ContextTest, givenCmdQueueWithContextWhenBeingCreatedNextDeletedThenContextRefCountShouldBeIncrementedNextDecremented) { MockContext context((ClDevice *)devices[0]); EXPECT_EQ(1, context.getRefInternalCount()); auto cmdQ = new MockCommandQueue(&context, (ClDevice *)devices[0], 0); EXPECT_EQ(2, context.getRefInternalCount()); delete cmdQ; EXPECT_EQ(1, context.getRefInternalCount()); } TEST_F(ContextTest, givenDeviceCmdQueueWithContextWhenBeingCreatedNextDeletedThenContextRefCountShouldBeIncrementedNextDecremented) { MockContext context((ClDevice *)devices[0]); EXPECT_EQ(1, context.getRefInternalCount()); cl_queue_properties properties = 0; auto cmdQ = new DeviceQueue(&context, (ClDevice *)devices[0], properties); EXPECT_EQ(2, context.getRefInternalCount()); delete cmdQ; EXPECT_EQ(1, context.getRefInternalCount()); } TEST_F(ContextTest, givenDefaultDeviceCmdQueueWithContextWhenBeingCreatedNextDeletedThenContextRefCountShouldBeIncrementedNextDecremented) { MockContext context((ClDevice *)devices[0]); EXPECT_EQ(1, context.getRefInternalCount()); cl_queue_properties properties = 0; auto cmdQ = new DeviceQueue(&context, (ClDevice *)devices[0], properties); context.setDefaultDeviceQueue(cmdQ); EXPECT_EQ(2, context.getRefInternalCount()); delete cmdQ; EXPECT_EQ(1, context.getRefInternalCount()); } TEST_F(ContextTest, givenContextWhenItIsCreatedFromDeviceThenItAddsRefCountToThisDevice) { auto device = castToObject(devices[0]); EXPECT_EQ(2, device->getRefInternalCount()); cl_device_id deviceID = devices[0]; std::unique_ptr context(Context::create(0, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal)); EXPECT_EQ(3, device->getRefInternalCount()); context.reset(nullptr); EXPECT_EQ(2, device->getRefInternalCount()); } TEST_F(ContextTest, givenContextWhenItIsCreatedFromMultipleDevicesThenItAddsRefCountToThoseDevices) { auto device = castToObject(devices[0]); EXPECT_EQ(2, device->getRefInternalCount()); ClDeviceVector devicesVector; devicesVector.push_back(device); devicesVector.push_back(device); std::unique_ptr context(Context::create(0, devicesVector, nullptr, nullptr, retVal)); EXPECT_EQ(4, device->getRefInternalCount()); context.reset(nullptr); EXPECT_EQ(2, device->getRefInternalCount()); } TEST_F(ContextTest, givenSpecialCmdQueueWithContextWhenBeingCreatedNextAutoDeletedThenContextRefCountShouldNeitherBeIncrementedNorNextDecremented) { MockContext context((ClDevice *)devices[0], true); EXPECT_EQ(1, context.getRefInternalCount()); auto cmdQ = new MockCommandQueue(&context, (ClDevice *)devices[0], 0); context.overrideSpecialQueueAndDecrementRefCount(cmdQ); EXPECT_EQ(1, context.getRefInternalCount()); //special queue is to be deleted implicitly by context } TEST_F(ContextTest, givenSpecialCmdQueueWithContextWhenBeingCreatedNextDeletedThenContextRefCountShouldNeitherBeIncrementedNorNextDecremented) { MockContext context((ClDevice *)devices[0], true); EXPECT_EQ(1, context.getRefInternalCount()); auto cmdQ = new MockCommandQueue(&context, (ClDevice *)devices[0], 0); context.overrideSpecialQueueAndDecrementRefCount(cmdQ); EXPECT_EQ(1, context.getRefInternalCount()); delete cmdQ; EXPECT_EQ(1, context.getRefInternalCount()); context.setSpecialQueue(nullptr); } TEST_F(ContextTest, GivenInteropSyncParamWhenCreateContextThenSetContextParam) { cl_device_id deviceID = devices[0]; auto pPlatform = NEO::platform(); cl_platform_id pid[1]; pid[0] = pPlatform; cl_context_properties validProperties[5] = {CL_CONTEXT_PLATFORM, (cl_context_properties)pid[0], CL_CONTEXT_INTEROP_USER_SYNC, 1, 0}; cl_int retVal = CL_SUCCESS; auto context = Context::create(validProperties, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); EXPECT_TRUE(context->getInteropUserSyncEnabled()); delete context; validProperties[3] = 0; // false context = Context::create(validProperties, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); EXPECT_FALSE(context->getInteropUserSyncEnabled()); delete context; } class MockSharingFunctions : public SharingFunctions { public: uint32_t getId() const override { return sharingId; } static const uint32_t sharingId = 0; }; TEST_F(ContextTest, givenContextWhenSharingTableEmptyThenReturnsNullptr) { MockContext context; context.clearSharingFunctions(); auto *sharingF = context.getSharing(); EXPECT_EQ(sharingF, nullptr); } TEST_F(ContextTest, givenNullptrWhenRegisteringSharingToContextThenAbortExecution) { MockContext context; context.clearSharingFunctions(); EXPECT_THROW(context.registerSharing(nullptr), std::exception); } TEST_F(ContextTest, givenContextWhenSharingTableIsNotEmptyThenReturnsSharingFunctionPointer) { MockContext context; MockSharingFunctions *sharingFunctions = new MockSharingFunctions; context.registerSharing(sharingFunctions); auto *sharingF = context.getSharing(); EXPECT_EQ(sharingF, sharingFunctions); } TEST(Context, givenFtrSvmFalseWhenContextIsCreatedThenSVMAllocsManagerIsNotCreated) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1u); auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo(); hwInfo->capabilityTable.ftrSvm = false; auto device = std::make_unique(MockDevice::createWithExecutionEnvironment(hwInfo, executionEnvironment, 0)); cl_device_id clDevice = device.get(); cl_int retVal = CL_SUCCESS; auto context = std::unique_ptr(Context::create(nullptr, ClDeviceVector(&clDevice, 1), nullptr, nullptr, retVal)); ASSERT_NE(nullptr, context); auto svmManager = context->getSVMAllocsManager(); EXPECT_EQ(nullptr, svmManager); } TEST(Context, whenCreateContextThenSpecialQueueUsesInternalEngine) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); cl_device_id clDevice = device.get(); cl_int retVal = CL_SUCCESS; auto context = std::unique_ptr(Context::create(nullptr, ClDeviceVector(&clDevice, 1), nullptr, nullptr, retVal)); ASSERT_NE(nullptr, context); EXPECT_EQ(CL_SUCCESS, retVal); auto specialQueueEngine = context->getSpecialQueue()->getGpgpuEngine(); auto internalEngine = device->getInternalEngine(); EXPECT_EQ(internalEngine.commandStreamReceiver, specialQueueEngine.commandStreamReceiver); } TEST(MultiDeviceContextTest, givenContextWithMultipleDevicesWhenGettingTotalNumberOfDevicesThenNumberOfAllAvailableDevicesIsReturned) { DebugManagerStateRestore restorer; const uint32_t numDevices = 2u; const uint32_t numSubDevices = 3u; DebugManager.flags.CreateMultipleRootDevices.set(numDevices); DebugManager.flags.CreateMultipleSubDevices.set(numSubDevices); initPlatform(); auto device0 = platform()->getClDevice(0); auto device1 = platform()->getClDevice(1); cl_device_id clDevices[2]{device0, device1}; ClDeviceVector deviceVector(clDevices, 2); cl_int retVal = CL_OUT_OF_HOST_MEMORY; auto context = std::unique_ptr(Context::create(nullptr, deviceVector, nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(numSubDevices, device0->getNumAvailableDevices()); EXPECT_EQ(numSubDevices, device1->getNumAvailableDevices()); EXPECT_EQ(numDevices, context->getNumDevices()); EXPECT_EQ(numDevices * numSubDevices, context->getTotalNumDevices()); } class ContextWithAsyncDeleterTest : public ::testing::WithParamInterface, public ::testing::Test { public: void SetUp() override { memoryManager = new MockMemoryManager(); device = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())}; deleter = new MockDeferredDeleter(); device->injectMemoryManager(memoryManager); memoryManager->setDeferredDeleter(deleter); } void TearDown() override { delete device; } Context *context; MockMemoryManager *memoryManager; MockDeferredDeleter *deleter; MockClDevice *device; }; TEST_P(ContextWithAsyncDeleterTest, givenContextWithMemoryManagerWhenAsyncDeleterIsEnabledThenUsesDeletersMethods) { cl_device_id clDevice = device; cl_int retVal; ClDeviceVector deviceVector(&clDevice, 1); bool asyncDeleterEnabled = GetParam(); memoryManager->overrideAsyncDeleterFlag(asyncDeleterEnabled); EXPECT_EQ(0, deleter->getClientsNum()); context = Context::create(0, deviceVector, nullptr, nullptr, retVal); if (asyncDeleterEnabled) { EXPECT_EQ(1, deleter->getClientsNum()); } else { EXPECT_EQ(0, deleter->getClientsNum()); } delete context; EXPECT_EQ(0, deleter->getClientsNum()); } INSTANTIATE_TEST_CASE_P(ContextTests, ContextWithAsyncDeleterTest, ::testing::Bool()); TEST(DefaultContext, givenDefaultContextWhenItIsQueriedForTypeThenDefaultTypeIsReturned) { MockContext context; EXPECT_EQ(ContextType::CONTEXT_TYPE_DEFAULT, context.peekContextType()); } TEST(Context, givenContextWhenCheckIfAllocationsAreMultiStorageThenReturnProperValueAccordingToContextType) { MockContext context; EXPECT_TRUE(context.areMultiStorageAllocationsPreferred()); context.contextType = ContextType::CONTEXT_TYPE_SPECIALIZED; EXPECT_FALSE(context.areMultiStorageAllocationsPreferred()); context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; EXPECT_TRUE(context.areMultiStorageAllocationsPreferred()); } TEST(Context, givenContextWhenIsDeviceAssociatedIsCalledWithItsDeviceThenTrueIsReturned) { MockContext context; EXPECT_TRUE(context.isDeviceAssociated(*context.getDevice(0))); } TEST(Context, givenContextWhenIsDeviceAssociatedIsCalledWithNotAssociatedDeviceThenFalseIsReturned) { MockContext context0; MockContext context1; EXPECT_FALSE(context0.isDeviceAssociated(*context1.getDevice(0))); EXPECT_FALSE(context1.isDeviceAssociated(*context0.getDevice(0))); }compute-runtime-20.13.16352/opencl/test/unit_test/context/driver_diagnostics_enqueue_tests.cpp000066400000000000000000001070251363734646600326620ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/context/driver_diagnostics_tests.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" using namespace NEO; TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingReadWhenEnqueueReadBufferIsCallingWithCPUCopyThenContextProvidesProperHint) { buffer->forceDisallowCPUCopy = false; void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); pCmdQ->enqueueReadBuffer( buffer, CL_TRUE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA], static_cast(buffer), ptr); EXPECT_TRUE(containsHint(expectedHint, userData)); alignedFree(ptr); } TEST_P(PerformanceHintEnqueueReadBufferTest, GivenHostPtrAndSizeAlignmentsWhenEnqueueReadBufferIsCallingThenContextProvidesHintsAboutAlignments) { void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); uintptr_t addressForReadBuffer = (uintptr_t)ptr; size_t sizeForReadBuffer = MemoryConstants::cacheLineSize; if (!alignedAddress) { addressForReadBuffer++; } if (!alignedSize) { sizeForReadBuffer--; } pCmdQ->enqueueReadBuffer(buffer, CL_FALSE, 0, sizeForReadBuffer, (void *)addressForReadBuffer, nullptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA], static_cast(buffer), addressForReadBuffer); EXPECT_TRUE(containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS], addressForReadBuffer, sizeForReadBuffer, MemoryConstants::pageSize, MemoryConstants::pageSize); EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData)); alignedFree(ptr); } TEST_P(PerformanceHintEnqueueReadBufferTest, GivenHostPtrAndSizeAlignmentsWhenEnqueueReadBufferRectIsCallingThenContextProvidesHintsAboutAlignments) { void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); uintptr_t addressForReadBufferRect = (uintptr_t)ptr; size_t sizeForReadBufferRect = MemoryConstants::cacheLineSize; if (!alignedAddress) { addressForReadBufferRect++; } if (!alignedSize) { sizeForReadBufferRect--; } size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {sizeForReadBufferRect, 1, 1}; pCmdQ->enqueueReadBufferRect( buffer, CL_TRUE, bufferOrigin, hostOrigin, region, 0, 0, 0, 0, (void *)addressForReadBufferRect, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA], static_cast(buffer), addressForReadBufferRect); EXPECT_TRUE(containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_MEET_ALIGNMENT_RESTRICTIONS], addressForReadBufferRect, sizeForReadBufferRect, MemoryConstants::pageSize, MemoryConstants::pageSize); EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData)); alignedFree(ptr); } TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingReadAndNotSharedMemWhenEnqueueReadBufferRectIsCallingThenContextProvidesProperHint) { size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 2, 1}; void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); pCmdQ->enqueueReadBufferRect( buffer, CL_FALSE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, ptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA], static_cast(buffer), ptr); EXPECT_TRUE(containsHint(expectedHint, userData)); alignedFree(ptr); } TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingReadAndSharedMemWhenEnqueueReadBufferRectIsCallingThenContextProvidesProperHint) { size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 2, 1}; pCmdQ->enqueueReadBufferRect( buffer, CL_FALSE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, address, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_REQUIRES_COPY_DATA], static_cast(buffer), address); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingWriteAndBufferDoesntShareMemWithCPUWhenEnqueueWriteBufferIsCallingWithoutCPUCopyThenContextProvidesRequiedCopyHint) { buffer->forceDisallowCPUCopy = true; void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); pCmdQ->enqueueWriteBuffer( buffer, CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA], static_cast(buffer)); EXPECT_TRUE(containsHint(expectedHint, userData)); alignedFree(ptr); } TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingWriteAndBufferSharesMemWithCPUWhenEnqueueWriteBufferIsCallingWithoutCPUCopyThenContextProvidesCopyDoenstRequiedHint) { buffer->forceDisallowCPUCopy = true; pCmdQ->enqueueWriteBuffer( buffer, CL_FALSE, 0, MemoryConstants::cacheLineSize, address, nullptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast(buffer), address); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingWriteAndBufferDoesntShareMemWithCPUWhenEnqueueWriteBufferIsCallingWithCPUCopyThenContextProvidesRequiedCopyHint) { buffer->forceDisallowCPUCopy = false; void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); pCmdQ->enqueueWriteBuffer( buffer, CL_TRUE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA], static_cast(buffer)); EXPECT_TRUE(containsHint(expectedHint, userData)); alignedFree(ptr); } TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingWriteAndBufferSharesMemWithCPUWhenEnqueueWriteBufferIsCallingWithCPUCopyThenContextProvidesCopyDoenstRequiedHint) { buffer->forceDisallowCPUCopy = false; pCmdQ->enqueueWriteBuffer( buffer, CL_TRUE, 0, MemoryConstants::cacheLineSize, address, nullptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast(buffer), address); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingReadAndBufferDoesntShareMemWithCPUWhenEnqueueReadBufferIsCallingWithoutCPUCopyThenContextProvidesRequiedCopyHint) { buffer->forceDisallowCPUCopy = true; void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); pCmdQ->enqueueReadBuffer( buffer, CL_FALSE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA], static_cast(buffer), ptr); EXPECT_TRUE(containsHint(expectedHint, userData)); alignedFree(ptr); } TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingReadAndBufferSharesMemWithCPUWhenEnqueueReadBufferIsCallingWithoutCPUCopyThenContextProvidesCopyDoenstRequiedHint) { buffer->forceDisallowCPUCopy = true; pCmdQ->enqueueReadBuffer( buffer, CL_FALSE, 0, MemoryConstants::cacheLineSize, address, nullptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast(buffer), address); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingReadAndBufferDoesntShareMemWithCPUWhenEnqueueReadBufferIsCallingWithCPUCopyThenContextProvidesRequiedCopyHint) { buffer->forceDisallowCPUCopy = false; void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); pCmdQ->enqueueReadBuffer( buffer, CL_TRUE, 0, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA], static_cast(buffer), ptr); EXPECT_TRUE(containsHint(expectedHint, userData)); alignedFree(ptr); } TEST_F(PerformanceHintEnqueueBufferTest, GivenBlockingReadAndBufferSharesMemWithCPUWhenEnqueueReadBufferIsCallingWithCPUCopyThenContextProvidesCopyDoenstRequiedHint) { buffer->forceDisallowCPUCopy = false; pCmdQ->enqueueReadBuffer( buffer, CL_TRUE, 0, MemoryConstants::cacheLineSize, address, nullptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast(buffer), address); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingWriteAndNotSharedMemWhenEnqueueWriteBufferRectIsCallingThenContextProvidesProperHint) { size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 2, 1}; void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); pCmdQ->enqueueWriteBufferRect( buffer, CL_FALSE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, ptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA], static_cast(buffer)); EXPECT_TRUE(containsHint(expectedHint, userData)); alignedFree(ptr); } TEST_F(PerformanceHintEnqueueBufferTest, GivenNonBlockingWriteAndSharedMemWhenEnqueueWriteBufferRectIsCallingThenContextProvidesProperHint) { size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 2, 1}; pCmdQ->enqueueWriteBufferRect( buffer, CL_FALSE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, address, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_BUFFER_RECT_DOESNT_REQUIRE_COPY_DATA], static_cast(buffer)); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_P(PerformanceHintEnqueueReadImageTest, GivenHostPtrAndSizeAlignmentsWhenEnqueueReadImageIsCallingThenContextProvidesHintsAboutAlignments) { size_t hostOrigin[] = {0, 0, 0}; void *ptr = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); uintptr_t addressForReadImage = (uintptr_t)ptr; size_t sizeForReadImageInPixels = MemoryConstants::cacheLineSize; bool hintWithMisalignment = !(alignedAddress && alignedSize); if (!alignedAddress) { addressForReadImage++; } if (!alignedSize) { sizeForReadImageInPixels--; } size_t region[] = {sizeForReadImageInPixels, 1, 1}; pCmdQ->enqueueReadImage(image, CL_FALSE, hostOrigin, region, 0, 0, (void *)addressForReadImage, nullptr, 0, nullptr, nullptr); size_t sizeForReadImage = sizeForReadImageInPixels * image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes; ASSERT_EQ(alignedSize, isAligned(sizeForReadImage)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_IMAGE_DOESNT_MEET_ALIGNMENT_RESTRICTIONS], addressForReadImage, sizeForReadImage, MemoryConstants::pageSize, MemoryConstants::pageSize); EXPECT_EQ(hintWithMisalignment, containsHint(expectedHint, userData)); alignedFree(ptr); } TEST_F(PerformanceHintEnqueueImageTest, GivenNonBlockingWriteWhenEnqueueWriteImageIsCallingThenContextProvidesProperHint) { size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; pCmdQ->enqueueWriteImage( image, CL_FALSE, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, address, nullptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA], static_cast(image)); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintEnqueueImageTest, GivenNonBlockingWriteImageSharesStorageWithDstPtrWhenEnqueueWriteImageIsCallingThenContextProvidesProperHint) { size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; void *ptr = zeroCopyImage->getCpuAddressForMemoryTransfer(); pCmdQ->enqueueWriteImage( zeroCopyImage.get(), CL_FALSE, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_WRITE_IMAGE_DOESNT_REQUIRES_COPY_DATA], static_cast(zeroCopyImage.get())); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintEnqueueImageTest, GivenNonBlockingReadImageSharesStorageWithDstPtrWhenEnqueueReadImageIsCallingThenContextProvidesProperHint) { size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; void *ptr = zeroCopyImage->getCpuAddressForMemoryTransfer(); pCmdQ->enqueueReadImage( zeroCopyImage.get(), CL_FALSE, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, ptr, nullptr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_READ_IMAGE_DOESNT_REQUIRES_COPY_DATA], static_cast(zeroCopyImage.get())); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueMapBufferIsCallingThenContextProvidesProperHint) { Buffer *buffer; void *address; bool zeroCopyBuffer = GetParam(); size_t sizeForBuffer = MemoryConstants::cacheLineSize; if (!zeroCopyBuffer) { sizeForBuffer++; } address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); buffer = Buffer::create(context, CL_MEM_USE_HOST_PTR, sizeForBuffer, address, retVal); pCmdQ->enqueueMapBuffer(buffer, CL_FALSE, 0, 0, MemoryConstants::cacheLineSize, 0, nullptr, nullptr, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast(buffer)); EXPECT_EQ(zeroCopyBuffer, containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA], static_cast(buffer)); EXPECT_EQ(!zeroCopyBuffer, containsHint(expectedHint, userData)); alignedFree(address); delete buffer; } TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagAndBlockingEventWhenEnqueueMapBufferIsCallingThenContextProvidesProperHint) { void *address; bool zeroCopyBuffer = GetParam(); UserEvent userEvent(context); cl_event blockedEvent = &userEvent; size_t sizeForBuffer = MemoryConstants::cacheLineSize; if (!zeroCopyBuffer) { sizeForBuffer++; } address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); auto buffer = std::unique_ptr(Buffer::create(context, CL_MEM_USE_HOST_PTR, sizeForBuffer, address, retVal)); EXPECT_EQ(buffer->isMemObjZeroCopy(), zeroCopyBuffer); pCmdQ->enqueueMapBuffer(buffer.get(), CL_FALSE, 0, 0, MemoryConstants::cacheLineSize, 1, &blockedEvent, nullptr, retVal); EXPECT_TRUE(pCmdQ->isQueueBlocked()); userEvent.setStatus(CL_COMPLETE); EXPECT_FALSE(pCmdQ->isQueueBlocked()); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA], static_cast(buffer.get())); EXPECT_EQ(zeroCopyBuffer, containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA], static_cast(buffer.get())); EXPECT_EQ(!zeroCopyBuffer, containsHint(expectedHint, userData)); alignedFree(address); } TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueMapImageIsCallingThenContextProvidesProperHint) { Image *image; bool isZeroCopyImage; isZeroCopyImage = GetParam(); size_t origin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; if (isZeroCopyImage) { image = ImageHelper>::create(context); } else { image = ImageHelper>::create(context); } EXPECT_EQ(isZeroCopyImage, image->isMemObjZeroCopy()); pCmdQ->enqueueMapImage( image, CL_FALSE, 0, origin, region, nullptr, nullptr, 0, nullptr, nullptr, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA], static_cast(image)); EXPECT_EQ(isZeroCopyImage, containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA], static_cast(image)); EXPECT_EQ(!isZeroCopyImage, containsHint(expectedHint, userData)); delete image; } TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagAndBlockingEventWhenEnqueueMapImageIsCallingThenContextProvidesProperHint) { auto image = std::unique_ptr(ImageHelper>::create(context)); bool isZeroCopyImage = GetParam(); size_t origin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; if (!isZeroCopyImage) { image.reset(ImageHelper>::create(context)); } EXPECT_EQ(isZeroCopyImage, image->isMemObjZeroCopy()); UserEvent userEvent(context); cl_event blockedEvent = &userEvent; void *mapPtr = pCmdQ->enqueueMapImage( image.get(), CL_FALSE, 0, origin, region, nullptr, nullptr, 1, &blockedEvent, nullptr, retVal); EXPECT_TRUE(pCmdQ->isQueueBlocked()); userEvent.setStatus(CL_COMPLETE); pCmdQ->enqueueUnmapMemObject(image.get(), mapPtr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA], static_cast(image.get())); EXPECT_EQ(isZeroCopyImage, containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA], static_cast(image.get())); EXPECT_EQ(!isZeroCopyImage, containsHint(expectedHint, userData)); } TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueUnmapIsCallingWithBufferThenContextProvidesProperHint) { Buffer *buffer; void *address; bool zeroCopyBuffer = GetParam(); size_t sizeForBuffer = MemoryConstants::cacheLineSize; if (!zeroCopyBuffer) { sizeForBuffer++; } address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); buffer = Buffer::create(context, CL_MEM_USE_HOST_PTR, sizeForBuffer, address, retVal); void *mapPtr = pCmdQ->enqueueMapBuffer(buffer, CL_FALSE, 0, 0, MemoryConstants::cacheLineSize, 0, nullptr, nullptr, retVal); pCmdQ->enqueueUnmapMemObject(buffer, mapPtr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA], mapPtr, static_cast(buffer)); EXPECT_EQ(!zeroCopyBuffer, containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA], mapPtr); EXPECT_EQ(zeroCopyBuffer, containsHint(expectedHint, userData)); alignedFree(address); delete buffer; } TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyAndBlockedEventFlagWhenEnqueueUnmapIsCallingWithBufferThenContextProvidesProperHint) { void *address; bool zeroCopyBuffer = GetParam(); UserEvent userEvent(context); cl_event blockedEvent = &userEvent; size_t sizeForBuffer = MemoryConstants::cacheLineSize; if (!zeroCopyBuffer) { sizeForBuffer++; } address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); auto buffer = std::unique_ptr(Buffer::create(context, CL_MEM_USE_HOST_PTR, sizeForBuffer, address, retVal)); EXPECT_EQ(buffer->isMemObjZeroCopy(), zeroCopyBuffer); void *mapPtr = pCmdQ->enqueueMapBuffer(buffer.get(), CL_FALSE, 0, 0, MemoryConstants::cacheLineSize, 1, &blockedEvent, nullptr, retVal); EXPECT_TRUE(pCmdQ->isQueueBlocked()); pCmdQ->enqueueUnmapMemObject(buffer.get(), mapPtr, 0, nullptr, nullptr); userEvent.setStatus(CL_COMPLETE); EXPECT_FALSE(pCmdQ->isQueueBlocked()); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA], mapPtr, static_cast(buffer.get())); EXPECT_EQ(!zeroCopyBuffer, containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA], mapPtr); EXPECT_EQ(zeroCopyBuffer, containsHint(expectedHint, userData)); alignedFree(address); } TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueUnmapIsCallingWithImageThenContextProvidesProperHint) { Image *image; bool isZeroCopyImage; isZeroCopyImage = GetParam(); size_t origin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; if (isZeroCopyImage) { image = ImageHelper>::create(context); } else { image = ImageHelper>::create(context); } EXPECT_EQ(isZeroCopyImage, image->isMemObjZeroCopy()); void *mapPtr = pCmdQ->enqueueMapImage(image, CL_FALSE, 0, origin, region, nullptr, nullptr, 0, nullptr, nullptr, retVal); pCmdQ->enqueueUnmapMemObject(image, mapPtr, 0, nullptr, nullptr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA], mapPtr, static_cast(image)); EXPECT_EQ(!isZeroCopyImage, containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA], mapPtr); EXPECT_EQ(isZeroCopyImage, containsHint(expectedHint, userData)); delete image; } TEST_F(PerformanceHintEnqueueTest, GivenSVMPointerWhenEnqueueSVMMapIsCallingThenContextProvidesProperHint) { if (!pPlatform->getClDevice(0)->getHardwareInfo().capabilityTable.ftrSvm) { GTEST_SKIP(); } void *svmPtr = context->getSVMAllocsManager()->createSVMAlloc(0, 256, {}); pCmdQ->enqueueSVMMap(CL_FALSE, 0, svmPtr, 256, 0, nullptr, nullptr, false); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_SVM_MAP_DOESNT_REQUIRE_COPY_DATA], svmPtr); EXPECT_TRUE(containsHint(expectedHint, userData)); context->getSVMAllocsManager()->freeSVMAlloc(svmPtr); } TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkSizeNDIsDefaultWhenEnqueueKernelIsCallingThenContextProvidesProperHint) { retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo().name.c_str(), *kernel->localWorkSizeX, *kernel->localWorkSizeY, *kernel->localWorkSizeZ); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkSizeNDIsTrueWhenEnqueueKernelIsCallingThenContextProvidesProperHint) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(true); retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo().name.c_str(), *kernel->localWorkSizeX, *kernel->localWorkSizeY, *kernel->localWorkSizeZ); EXPECT_TRUE(containsHint(expectedHint, userData)); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkSizeNDIsFalseWhenEnqueueKernelIsCallingThenContextProvidesProperHint) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(false); retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo().name.c_str(), *kernel->localWorkSizeX, *kernel->localWorkSizeY, *kernel->localWorkSizeZ); EXPECT_TRUE(containsHint(expectedHint, userData)); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkSizeSquaredIsDefaultWhenEnqueueKernelIsCallingThenContextProvidesProperHint) { retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo().name.c_str(), *kernel->localWorkSizeX, *kernel->localWorkSizeY, *kernel->localWorkSizeZ); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkSizeSquaredIsTrueWhenEnqueueKernelIsCallingThenContextProvidesProperHint) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(true); DebugManager.flags.EnableComputeWorkSizeND.set(false); retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo().name.c_str(), *kernel->localWorkSizeX, *kernel->localWorkSizeY, *kernel->localWorkSizeZ); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkSizeSquaredIsFalseWhenEnqueueKernelIsCallingThenContextProvidesProperHint) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(false); DebugManager.flags.EnableComputeWorkSizeND.set(false); retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo().name.c_str(), *kernel->localWorkSizeX, *kernel->localWorkSizeY, *kernel->localWorkSizeZ); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_P(PerformanceHintEnqueueKernelBadSizeTest, GivenBadLocalWorkGroupSizeWhenEnqueueKernelIsCallingThenContextProvidesProperHint) { size_t localWorkGroupSize[3]; int badSizeDimension; uint32_t workDim = globalWorkGroupSize[1] == 1 ? 1 : globalWorkGroupSize[2] == 1 ? 2 : 3; DispatchInfo dispatchInfo(kernel, workDim, Vec3(globalWorkGroupSize), Vec3(0u, 0u, 0u), Vec3(0u, 0u, 0u)); auto computedLocalWorkgroupSize = computeWorkgroupSize(dispatchInfo); localWorkGroupSize[0] = computedLocalWorkgroupSize.x; localWorkGroupSize[1] = computedLocalWorkgroupSize.y; localWorkGroupSize[2] = computedLocalWorkgroupSize.z; badSizeDimension = GetParam(); if (localWorkGroupSize[badSizeDimension] > 1) { localWorkGroupSize[badSizeDimension] /= 2; } else { localWorkGroupSize[0] /= 2; } retVal = pCmdQ->enqueueKernel(kernel, 3, nullptr, globalWorkGroupSize, localWorkGroupSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[BAD_LOCAL_WORKGROUP_SIZE], localWorkGroupSize[0], localWorkGroupSize[1], localWorkGroupSize[2], kernel->getKernelInfo().name.c_str(), computedLocalWorkgroupSize.x, computedLocalWorkgroupSize.y, computedLocalWorkgroupSize.z); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintEnqueueKernelPrintfTest, GivenKernelWithPrintfWhenEnqueueKernelIsCalledWithWorkDim3ThenContextProvidesProperHint) { size_t preferredWorkGroupSize[3]; auto maxWorkGroupSize = static_cast(pPlatform->getClDevice(0)->getSharedDeviceInfo().maxWorkGroupSize); if (DebugManager.flags.EnableComputeWorkSizeND.get()) { WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, 32u, 0u, IGFX_GEN9_CORE, 32u, 0u, false, false); computeWorkgroupSizeND(wsInfo, preferredWorkGroupSize, globalWorkGroupSize, 2); } else computeWorkgroupSize2D(maxWorkGroupSize, preferredWorkGroupSize, globalWorkGroupSize, 32); retVal = pCmdQ->enqueueKernel(kernel, 3, nullptr, globalWorkGroupSize, preferredWorkGroupSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[PRINTF_DETECTED_IN_KERNEL], kernel->getKernelInfo().name.c_str()); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintEnqueueTest, GivenKernelWithCoherentPtrWhenEnqueueKernelIsCalledWithWorkDim2ThenContextProvidesProperHint) { size_t preferredWorkGroupSize[3]; size_t globalWorkGroupSize[3] = {1, 1, 1}; auto maxWorkGroupSize = static_cast(pPlatform->getClDevice(0)->getSharedDeviceInfo().maxWorkGroupSize); MockKernelWithInternals mockKernel(*pPlatform->getClDevice(0), context); Kernel::SimpleKernelArgInfo kernelArgInfo; if (DebugManager.flags.EnableComputeWorkSizeND.get()) { WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, 32u, 0u, IGFX_GEN9_CORE, 32u, 0u, false, false); computeWorkgroupSizeND(wsInfo, preferredWorkGroupSize, globalWorkGroupSize, 2); } else computeWorkgroupSize2D(maxWorkGroupSize, preferredWorkGroupSize, globalWorkGroupSize, 32); auto buffer = new MockBuffer(); buffer->getGraphicsAllocation()->setCoherent(true); auto clBuffer = (cl_mem)buffer; kernelArgInfo.object = clBuffer; kernelArgInfo.type = Kernel::kernelArgType::BUFFER_OBJ; std::vector kernelArguments; kernelArguments.resize(1); kernelArguments[0] = kernelArgInfo; mockKernel.kernelInfo.kernelArgInfo.resize(1); mockKernel.mockKernel->setKernelArguments(kernelArguments); retVal = pCmdQ->enqueueKernel(mockKernel.mockKernel, 2, nullptr, globalWorkGroupSize, preferredWorkGroupSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_REQUIRES_COHERENCY], mockKernel.mockKernel->getKernelInfo().name.c_str()); EXPECT_TRUE(containsHint(expectedHint, userData)); delete buffer; } const int validDimensions[] = {0, 1, 2}; INSTANTIATE_TEST_CASE_P( DriverDiagnosticsTests, PerformanceHintEnqueueReadBufferTest, testing::Combine( ::testing::Bool(), ::testing::Bool())); INSTANTIATE_TEST_CASE_P( DriverDiagnosticsTests, PerformanceHintEnqueueReadImageTest, testing::Combine( ::testing::Bool(), ::testing::Bool())); INSTANTIATE_TEST_CASE_P( DriverDiagnosticsTests, PerformanceHintEnqueueMapTest, testing::Bool()); INSTANTIATE_TEST_CASE_P( DriverDiagnosticsTests, PerformanceHintEnqueueKernelBadSizeTest, testing::ValuesIn(validDimensions)); compute-runtime-20.13.16352/opencl/test/unit_test/context/driver_diagnostics_tests.cpp000066400000000000000000001144431363734646600311350ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "driver_diagnostics_tests.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/mem_obj/mem_obj_helper.h" #include "opencl/test/unit_test/mocks/mock_gmm.h" #include using namespace NEO; bool containsHint(const char *providedHint, char *userData) { for (auto i = 0; i < maxHintCounter; i++) { if (strcmp(providedHint, userData + i * DriverDiagnostics::maxHintStringSize) == 0) { return true; } } return false; } void CL_CALLBACK callbackFunction(const char *providedHint, const void *flags, size_t size, void *userData) { int offset = 0; while (((char *)userData + offset)[0] != 0) { offset += DriverDiagnostics::maxHintStringSize; } strcpy_s((char *)userData + offset, DriverDiagnostics::maxHintStringSize, providedHint); } cl_diagnostics_verbose_level diagnosticsVerboseLevels[] = { CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL}; TEST_P(VerboseLevelTest, GivenVerboseLevelWhenProvidedHintLevelIsSameOrAllThenCallbackFunctionTakesProvidedHint) { cl_device_id deviceID = devices[0]; cl_diagnostics_verbose_level diagnosticsLevel = GetParam(); cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, (cl_context_properties)diagnosticsLevel, 0}; retVal = CL_SUCCESS; auto context = Context::create(validProperties, ClDeviceVector(&deviceID, 1), callbackFunction, (void *)userData, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); for (auto hintLevel : validLevels) { memset(userData, 0, maxHintCounter * DriverDiagnostics::maxHintStringSize); context->providePerformanceHint(hintLevel, hintId); if (hintLevel == diagnosticsLevel || hintLevel == CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL) { EXPECT_TRUE(containsHint(expectedHint, userData)); } else { EXPECT_FALSE(containsHint(expectedHint, userData)); } } delete context; } TEST_P(VerboseLevelTest, GivenVerboseLevelAllWhenAnyHintIsProvidedThenCallbackFunctionTakesProvidedHint) { cl_device_id deviceID = devices[0]; cl_diagnostics_verbose_level providedHintLevel = GetParam(); cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0}; retVal = CL_SUCCESS; auto context = Context::create(validProperties, ClDeviceVector(&deviceID, 1), callbackFunction, (void *)userData, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); context->providePerformanceHint(providedHintLevel, hintId); EXPECT_TRUE(containsHint(expectedHint, userData)); delete context; } TEST_P(PerformanceHintBufferTest, GivenHostPtrAndSizeAlignmentsWhenBufferIsCreatingThenContextProvidesHintsAboutAlignmentsAndAllocatingMemory) { uintptr_t addressForBuffer = (uintptr_t)address; size_t sizeForBuffer = MemoryConstants::cacheLineSize; if (!alignedAddress) { addressForBuffer++; } if (!alignedSize) { sizeForBuffer--; } auto flags = CL_MEM_USE_HOST_PTR; if (alignedAddress && alignedSize) { flags |= CL_MEM_FORCE_SHARED_PHYSICAL_MEMORY_INTEL; } buffer = Buffer::create( context, flags, sizeForBuffer, (void *)addressForBuffer, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS], addressForBuffer, sizeForBuffer, MemoryConstants::pageSize, MemoryConstants::pageSize); EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_BUFFER_NEEDS_ALLOCATE_MEMORY], 0); EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData)); } TEST_P(PerformanceHintCommandQueueTest, GivenProfilingFlagAndPreemptionFlagWhenCommandQueueIsCreatingThenContextProvidesProperHints) { cl_command_queue_properties properties = 0; if (profilingEnabled) { properties = CL_QUEUE_PROFILING_ENABLE; } cmdQ = clCreateCommandQueue(context, context->getDevice(0), properties, &retVal); ASSERT_NE(nullptr, cmdQ); ASSERT_EQ(CL_SUCCESS, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[DRIVER_CALLS_INTERNAL_CL_FLUSH], 0); EXPECT_TRUE(containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[PROFILING_ENABLED], 0); EXPECT_EQ(profilingEnabled, containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[PROFILING_ENABLED_WITH_DISABLED_PREEMPTION], 0); if (context->getDevice(0)->getHardwareInfo().platform.eProductFamily < IGFX_SKYLAKE && preemptionSupported && profilingEnabled) { EXPECT_TRUE(containsHint(expectedHint, userData)); } else { EXPECT_FALSE(containsHint(expectedHint, userData)); } } TEST_P(PerformanceHintCommandQueueTest, GivenEnabledProfilingFlagAndSupportedPreemptionFlagWhenCommandQueueIsCreatingWithPropertiesThenContextProvidesProperHints) { cl_command_queue_properties properties[3] = {0}; if (profilingEnabled) { properties[0] = CL_QUEUE_PROPERTIES; properties[1] = CL_QUEUE_PROFILING_ENABLE; } cmdQ = clCreateCommandQueueWithProperties(context, context->getDevice(0), properties, &retVal); ASSERT_NE(nullptr, cmdQ); ASSERT_EQ(CL_SUCCESS, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[DRIVER_CALLS_INTERNAL_CL_FLUSH], 0); EXPECT_TRUE(containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[PROFILING_ENABLED], 0); EXPECT_EQ(profilingEnabled, containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[PROFILING_ENABLED_WITH_DISABLED_PREEMPTION], 0); if (context->getDevice(0)->getHardwareInfo().platform.eProductFamily < IGFX_SKYLAKE && preemptionSupported && profilingEnabled) { EXPECT_TRUE(containsHint(expectedHint, userData)); } else { EXPECT_FALSE(containsHint(expectedHint, userData)); } } TEST_F(PerformanceHintTest, GivenAlignedHostPtrWhenSubbufferIsCreatingThenContextProvidesHintAboutSharingMemoryWithParentBuffer) { cl_mem_flags flg = CL_MEM_USE_HOST_PTR; cl_buffer_region region = {0, MemoryConstants::cacheLineSize - 1}; void *address = alignedMalloc(MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); auto buffer = clCreateBuffer(context, flg, MemoryConstants::cacheLineSize, address, &retVal); EXPECT_NE(nullptr, buffer); EXPECT_EQ(CL_SUCCESS, retVal); auto subBuffer = clCreateSubBuffer(buffer, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &retVal); EXPECT_NE(nullptr, subBuffer); EXPECT_EQ(CL_SUCCESS, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[SUBBUFFER_SHARES_MEMORY], buffer); EXPECT_TRUE(containsHint(expectedHint, userData)); retVal = clReleaseMemObject(subBuffer); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); alignedFree(address); } TEST_F(PerformanceHintTest, GivenContextWhenSVMAllocIsCreatingThenContextProvidesHintAboutAlignment) { const ClDeviceInfo &devInfo = pPlatform->getClDevice(0)->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { cl_mem_flags flg = CL_MEM_READ_WRITE; size_t size = 4096; auto SVMPtr = clSVMAlloc(context, flg, size, 128); EXPECT_NE(nullptr, SVMPtr); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_SVM_ALLOC_MEETS_ALIGNMENT_RESTRICTIONS], SVMPtr, size); EXPECT_TRUE(containsHint(expectedHint, userData)); clSVMFree(context, SVMPtr); } } TEST_F(PerformanceHintTest, GivenNullContextAndEmptyDispatchinfoAndEnableComputeWorkSizeNDIsDefaultWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) { DispatchInfo emptyDispatchInfo; provideLocalWorkGroupSizeHints(nullptr, emptyDispatchInfo); } TEST_F(PerformanceHintTest, GivenNullContextAndEmptyDispatchinfoAndEnableComputeWorkSizeNDIsTrueWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(true); DispatchInfo emptyDispatchInfo; provideLocalWorkGroupSizeHints(nullptr, emptyDispatchInfo); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } TEST_F(PerformanceHintTest, GivenNullContextAndEmptyDispatchinfoAndEnableComputeWorkSizeNDIsFalseWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(false); DispatchInfo emptyDispatchInfo; provideLocalWorkGroupSizeHints(nullptr, emptyDispatchInfo); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } TEST_F(PerformanceHintTest, GivenNullContextAndEmptyDispatchinfoAndEnableComputeWorkSizeSquaredIsDefaultWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) { DispatchInfo emptyDispatchInfo; provideLocalWorkGroupSizeHints(nullptr, emptyDispatchInfo); } TEST_F(PerformanceHintTest, GivenNullContextAndEmptyDispatchinfoAndEnableComputeWorkSizeSquaredIsTrueWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(true); DebugManager.flags.EnableComputeWorkSizeND.set(false); DispatchInfo emptyDispatchInfo; provideLocalWorkGroupSizeHints(nullptr, emptyDispatchInfo); } TEST_F(PerformanceHintTest, GivenNullContextAndEmptyDispatchinfoAndEnableComputeWorkSizeSquaredIsFalseWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(false); DebugManager.flags.EnableComputeWorkSizeND.set(false); DispatchInfo emptyDispatchInfo; provideLocalWorkGroupSizeHints(nullptr, emptyDispatchInfo); } TEST_F(PerformanceHintTest, GivenNullContextAndInvalidDispatchinfoAndEnableComputeWorkSizeNDIsDefaultWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) { auto pDevice = castToObject(devices[0]); MockKernelWithInternals mockKernel(*pDevice, context); DispatchInfo invalidDispatchInfo(mockKernel, 100, {32, 32, 32}, {1, 1, 1}, {0, 0, 0}); provideLocalWorkGroupSizeHints(context, invalidDispatchInfo); } TEST_F(PerformanceHintTest, GivenNullContextAndInvalidDispatchinfoAndEnableComputeWorkSizeNDIsTrueWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(true); auto pDevice = castToObject(devices[0]); MockKernelWithInternals mockKernel(*pDevice, context); DispatchInfo invalidDispatchInfo(mockKernel, 100, {32, 32, 32}, {1, 1, 1}, {0, 0, 0}); provideLocalWorkGroupSizeHints(context, invalidDispatchInfo); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } TEST_F(PerformanceHintTest, GivenNullContextAndInvalidDispatchinfoAndEnableComputeWorkSizeNDIsFalseWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) { bool isWorkGroupSizeEnabled = DebugManager.flags.EnableComputeWorkSizeND.get(); DebugManager.flags.EnableComputeWorkSizeND.set(false); auto pDevice = castToObject(devices[0]); MockKernelWithInternals mockKernel(*pDevice, context); DispatchInfo invalidDispatchInfo(mockKernel, 100, {32, 32, 32}, {1, 1, 1}, {0, 0, 0}); provideLocalWorkGroupSizeHints(context, invalidDispatchInfo); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); } TEST_F(PerformanceHintTest, GivenNullContextAndInvalidDispatchinfoAndEnableComputeWorkSizeSquaredIsDefaultWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) { auto pDevice = castToObject(devices[0]); MockKernelWithInternals mockKernel(*pDevice, context); DispatchInfo invalidDispatchInfo(mockKernel, 100, {32, 32, 32}, {1, 1, 1}, {0, 0, 0}); provideLocalWorkGroupSizeHints(context, invalidDispatchInfo); } TEST_F(PerformanceHintTest, GivenNullContextAndInvalidDispatchinfoAndEnableComputeWorkSizeSquaredIsTrueWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(true); DebugManager.flags.EnableComputeWorkSizeND.set(false); auto pDevice = castToObject(devices[0]); MockKernelWithInternals mockKernel(*pDevice, context); DispatchInfo invalidDispatchInfo(mockKernel, 100, {32, 32, 32}, {1, 1, 1}, {0, 0, 0}); provideLocalWorkGroupSizeHints(context, invalidDispatchInfo); } TEST_F(PerformanceHintTest, GivenNullContextAndInvalidDispatchinfoAndEnableComputeWorkSizeSquaredIsFalseWhenProvideLocalWorkGroupSizeIsCalledThenItDoesntCrash) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(false); DebugManager.flags.EnableComputeWorkSizeND.set(false); auto pDevice = castToObject(devices[0]); MockKernelWithInternals mockKernel(*pDevice, context); DispatchInfo invalidDispatchInfo(mockKernel, 100, {32, 32, 32}, {1, 1, 1}, {0, 0, 0}); provideLocalWorkGroupSizeHints(context, invalidDispatchInfo); } TEST_F(PerformanceHintTest, GivenContextAndDispatchinfoAndEnableComputeWorkSizeSquaredIsDefaultWhenProvideLocalWorkGroupSizeIsCalledReturnValue) { auto pDevice = castToObject(devices[0]); MockKernelWithInternals mockKernel(*pDevice, context); DispatchInfo invalidDispatchInfo(mockKernel, 100, {32, 32, 32}, {1, 1, 1}, {0, 0, 0}); provideLocalWorkGroupSizeHints(context, invalidDispatchInfo); } TEST_F(PerformanceHintTest, GivenContextAndDispatchinfoAndEnableComputeWorkSizeSquaredIsTrueWhenProvideLocalWorkGroupSizeIsCalledReturnValue) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(true); DebugManager.flags.EnableComputeWorkSizeND.set(false); auto pDevice = castToObject(devices[0]); MockKernelWithInternals mockKernel(*pDevice, context); DispatchInfo invalidDispatchInfo(mockKernel, 2, {32, 32, 1}, {1, 1, 1}, {0, 0, 0}); provideLocalWorkGroupSizeHints(context, invalidDispatchInfo); } TEST_F(PerformanceHintTest, GivenContextAndDispatchinfoAndEnableComputeWorkSizeSquaredIsFalseWhenProvideLocalWorkGroupSizeIsCalledReturnValue) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(false); DebugManager.flags.EnableComputeWorkSizeND.set(false); auto pDevice = castToObject(devices[0]); MockKernelWithInternals mockKernel(*pDevice, context); DispatchInfo invalidDispatchInfo(mockKernel, 2, {32, 32, 1}, {1, 1, 1}, {0, 0, 0}); provideLocalWorkGroupSizeHints(context, invalidDispatchInfo); } TEST_F(PerformanceHintTest, GivenZeroCopyImageAndContextWhenCreateImageThenContextProvidesHintAboutAlignment) { std::unique_ptr image(ImageHelper::create(context)); EXPECT_TRUE(image->isMemObjZeroCopy()); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_IMAGE_MEETS_ALIGNMENT_RESTRICTIONS], static_cast(image.get())); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintTest, GivenNonZeroCopyImageAndContextWhenCreateImageThenContextDoesntProvidesHintAboutAlignment) { std::unique_ptr image(ImageHelper>::create(context)); EXPECT_FALSE(image->isMemObjZeroCopy()); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_IMAGE_MEETS_ALIGNMENT_RESTRICTIONS], static_cast(image.get())); EXPECT_FALSE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticValueWhenContextIsCreatedThenItHasHintLevelSetToThatValue) { DebugManagerStateRestore dbgRestore; auto hintLevel = 1; DebugManager.flags.PrintDriverDiagnostics.set(hintLevel); auto pDevice = castToObject(devices[0]); cl_device_id clDevice = pDevice; auto context = Context::create(nullptr, ClDeviceVector(&clDevice, 1), nullptr, nullptr, retVal); EXPECT_TRUE(!!context->isProvidingPerformanceHints()); auto driverDiagnostics = context->driverDiagnostics; ASSERT_NE(nullptr, driverDiagnostics); EXPECT_TRUE(driverDiagnostics->validFlags(hintLevel)); context->release(); } TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenHintIsCalledThenDriverProvidedOutputOnCout) { DebugManagerStateRestore dbgRestore; auto hintLevel = 255; DebugManager.flags.PrintDriverDiagnostics.set(hintLevel); auto pDevice = castToObject(devices[0]); cl_device_id clDevice = pDevice; auto context = Context::create(nullptr, ClDeviceVector(&clDevice, 1), nullptr, nullptr, retVal); testing::internal::CaptureStdout(); auto buffer = Buffer::create( context, CL_MEM_READ_ONLY, 4096, nullptr, retVal); std::string output = testing::internal::GetCapturedStdout(); EXPECT_NE(0u, output.size()); EXPECT_EQ('\n', output[0]); buffer->release(); context->release(); } TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsAndBadHintLevelWhenActionForHintOccursThenNothingIsProvidedToCout) { DebugManagerStateRestore dbgRestore; auto hintLevel = 8; DebugManager.flags.PrintDriverDiagnostics.set(hintLevel); auto pDevice = castToObject(devices[0]); cl_device_id clDevice = pDevice; auto context = Context::create(nullptr, ClDeviceVector(&clDevice, 1), nullptr, nullptr, retVal); testing::internal::CaptureStdout(); auto buffer = Buffer::create( context, CL_MEM_READ_ONLY, 4096, nullptr, retVal); std::string output = testing::internal::GetCapturedStdout(); EXPECT_EQ(0u, output.size()); buffer->release(); context->release(); } TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenContextIsBeingCreatedThenPropertiesPassedToContextAreOverwritten) { DebugManagerStateRestore dbgRestore; auto hintLevel = 1; DebugManager.flags.PrintDriverDiagnostics.set(hintLevel); auto pDevice = castToObject(devices[0]); cl_device_id clDevice = pDevice; cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0}; auto retValue = CL_SUCCESS; auto context = Context::create(validProperties, ClDeviceVector(&clDevice, 1), callbackFunction, (void *)userData, retVal); EXPECT_EQ(CL_SUCCESS, retValue); auto driverDiagnostics = context->driverDiagnostics; ASSERT_NE(nullptr, driverDiagnostics); EXPECT_TRUE(driverDiagnostics->validFlags(hintLevel)); EXPECT_FALSE(driverDiagnostics->validFlags(2)); context->release(); } TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallFillWithBuffersForAuxTranslationThenContextProvidesProperHint) { DebugManagerStateRestore dbgRestore; DebugManager.flags.PrintDriverDiagnostics.set(1); auto pDevice = castToObject(devices[0]); MockKernelWithInternals mockKernel(*pDevice, context); MockBuffer buffer; cl_mem clMem = &buffer; buffer.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); mockKernel.kernelInfo.kernelArgInfo.resize(1); mockKernel.kernelInfo.kernelArgInfo[0].metadataExtended = std::make_unique(); mockKernel.kernelInfo.kernelArgInfo[0].metadataExtended->argName = "arg0"; mockKernel.kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector.resize(1); mockKernel.kernelInfo.kernelArgInfo[0].pureStatefulBufferAccess = false; mockKernel.mockKernel->initialize(); mockKernel.mockKernel->auxTranslationRequired = true; mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem); testing::internal::CaptureStdout(); MemObjsForAuxTranslation memObjects; mockKernel.mockKernel->fillWithBuffersForAuxTranslation(memObjects); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_ARGUMENT_AUX_TRANSLATION], mockKernel.mockKernel->getKernelInfo().name.c_str(), 0, mockKernel.mockKernel->getKernelInfo().kernelArgInfo.at(0).metadataExtended->argName.c_str()); std::string output = testing::internal::GetCapturedStdout(); EXPECT_NE(0u, output.size()); EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintTest, given64bitCompressedBufferWhenItsCreatedThenProperPerformanceHintIsProvided) { cl_int retVal; HardwareInfo hwInfo = context->getDevice(0)->getHardwareInfo(); hwInfo.capabilityTable.ftrRenderCompressedBuffers = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); cl_device_id deviceId = device.get(); size_t size = 8192u; cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0}; auto context = std::unique_ptr(Context::create(validProperties, ClDeviceVector(&deviceId, 1), callbackFunction, static_cast(userData), retVal)); context->isSharedContext = false; auto buffer = std::unique_ptr(Buffer::create(context.get(), MemoryPropertiesFlagsParser::createMemoryPropertiesFlags((1 << 21), 0, 0), (1 << 21), 0, size, static_cast(NULL), retVal)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[BUFFER_IS_COMPRESSED], buffer.get()); auto compressionSupported = HwHelper::get(hwInfo.platform.eRenderCoreFamily).obtainRenderBufferCompressionPreference(hwInfo, size) && HwHelper::renderCompressedBuffersSupported(hwInfo); if (compressionSupported) { EXPECT_TRUE(containsHint(expectedHint, userData)); } else { EXPECT_FALSE(containsHint(expectedHint, userData)); } } TEST_F(PerformanceHintTest, givenUncompressedBufferWhenItsCreatedThenProperPerformanceHintIsProvided) { cl_int retVal; HardwareInfo hwInfo = context->getDevice(0)->getHardwareInfo(); hwInfo.capabilityTable.ftrRenderCompressedBuffers = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); cl_device_id deviceId = device.get(); MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_READ_WRITE, 0, 0); size_t size = 0u; cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0}; auto context = std::unique_ptr(Context::create(validProperties, ClDeviceVector(&deviceId, 1), callbackFunction, static_cast(userData), retVal)); std::unique_ptr buffer; bool isCompressed = true; if (context->getMemoryManager()) { isCompressed = MemObjHelper::isSuitableForRenderCompression( HwHelper::renderCompressedBuffersSupported(hwInfo), memoryProperties, *context, HwHelper::get(hwInfo.platform.eRenderCoreFamily).obtainRenderBufferCompressionPreference(hwInfo, size)) && !is32bit && !context->isSharedContext && (!memoryProperties.flags.useHostPtr || context->getMemoryManager()->isLocalMemorySupported(device->getRootDeviceIndex())) && !memoryProperties.flags.forceSharedPhysicalMemory; buffer = std::unique_ptr(Buffer::create(context.get(), memoryProperties, CL_MEM_READ_WRITE, 0, size, static_cast(NULL), retVal)); } snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[BUFFER_IS_NOT_COMPRESSED], buffer.get()); if (isCompressed) { Buffer::provideCompressionHint(GraphicsAllocation::AllocationType::BUFFER, context.get(), buffer.get()); } EXPECT_TRUE(containsHint(expectedHint, userData)); } TEST_F(PerformanceHintTest, givenCompressedImageWhenItsCreatedThenProperPerformanceHintIsProvided) { HardwareInfo hwInfo = context->getDevice(0)->getHardwareInfo(); hwInfo.capabilityTable.ftrRenderCompressedImages = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); cl_device_id deviceId = device.get(); cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0}; auto context = std::unique_ptr(Context::create(validProperties, ClDeviceVector(&deviceId, 1), callbackFunction, static_cast(userData), retVal)); const size_t width = 5; const size_t height = 3; const size_t depth = 2; cl_int retVal = CL_SUCCESS; auto const elementSize = 4; char *hostPtr = static_cast(alignedMalloc(width * height * depth * elementSize * 2, 64)); cl_image_format imageFormat; cl_image_desc imageDesc; auto mockBuffer = std::unique_ptr(new MockBuffer()); StorageInfo info; size_t t = 4; auto gmm = std::unique_ptr(new Gmm(device->getGmmClientContext(), static_cast(nullptr), t, false, true, true, info)); gmm->isRenderCompressed = true; mockBuffer->getGraphicsAllocation()->setDefaultGmm(gmm.get()); cl_mem mem = mockBuffer.get(); imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = mem; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; imageDesc.image_width = width; imageDesc.image_height = 0; imageDesc.image_depth = 0; imageDesc.image_array_size = 0; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); auto image = std::unique_ptr(Image::create( context.get(), MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, hostPtr, retVal)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[IMAGE_IS_COMPRESSED], image.get()); alignedFree(hostPtr); if (HwHelper::renderCompressedImagesSupported(hwInfo)) { EXPECT_TRUE(containsHint(expectedHint, userData)); } else { EXPECT_FALSE(containsHint(expectedHint, userData)); } } TEST_F(PerformanceHintTest, givenImageWithNoGmmWhenItsCreatedThenNoPerformanceHintIsProvided) { HardwareInfo hwInfo = context->getDevice(0)->getHardwareInfo(); hwInfo.capabilityTable.ftrRenderCompressedImages = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); cl_device_id deviceId = device.get(); cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0}; auto context = std::unique_ptr(Context::create(validProperties, ClDeviceVector(&deviceId, 1), callbackFunction, static_cast(userData), retVal)); const size_t width = 5; const size_t height = 3; const size_t depth = 2; cl_int retVal = CL_SUCCESS; auto const elementSize = 4; char *hostPtr = static_cast(alignedMalloc(width * height * depth * elementSize * 2, 64)); cl_image_format imageFormat; cl_image_desc imageDesc; auto mockBuffer = std::unique_ptr(new MockBuffer()); cl_mem mem = mockBuffer.get(); imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = mem; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; imageDesc.image_width = width; imageDesc.image_height = 0; imageDesc.image_depth = 0; imageDesc.image_array_size = 0; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); auto image = std::unique_ptr(Image::create( context.get(), MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, hostPtr, retVal)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[IMAGE_IS_COMPRESSED], image.get()); EXPECT_FALSE(containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[IMAGE_IS_NOT_COMPRESSED], image.get()); EXPECT_FALSE(containsHint(expectedHint, userData)); alignedFree(hostPtr); } TEST_F(PerformanceHintTest, givenUncompressedImageWhenItsCreatedThenProperPerformanceHintIsProvided) { HardwareInfo hwInfo = context->getDevice(0)->getHardwareInfo(); hwInfo.capabilityTable.ftrRenderCompressedImages = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); cl_device_id deviceId = device.get(); cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0}; auto context = std::unique_ptr(Context::create(validProperties, ClDeviceVector(&deviceId, 1), callbackFunction, static_cast(userData), retVal)); const size_t width = 5; const size_t height = 3; const size_t depth = 2; cl_int retVal = CL_SUCCESS; auto const elementSize = 4; char *hostPtr = static_cast(alignedMalloc(width * height * depth * elementSize * 2, 64)); cl_image_format imageFormat; cl_image_desc imageDesc; auto mockBuffer = std::unique_ptr(new MockBuffer()); StorageInfo info; size_t t = 4; auto gmm = std::unique_ptr(new Gmm(device->getGmmClientContext(), (const void *)nullptr, t, false, true, true, info)); gmm->isRenderCompressed = false; mockBuffer->getGraphicsAllocation()->setDefaultGmm(gmm.get()); cl_mem mem = mockBuffer.get(); imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = mem; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; imageDesc.image_width = width; imageDesc.image_height = 0; imageDesc.image_depth = 0; imageDesc.image_array_size = 0; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); auto image = std::unique_ptr(Image::create( context.get(), MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, hostPtr, retVal)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[IMAGE_IS_NOT_COMPRESSED], image.get()); alignedFree(hostPtr); if (HwHelper::renderCompressedImagesSupported(hwInfo)) { EXPECT_TRUE(containsHint(expectedHint, userData)); } else { EXPECT_FALSE(containsHint(expectedHint, userData)); } } TEST_P(PerformanceHintKernelTest, GivenSpillFillWhenKernelIsInitializedThenContextProvidesProperHint) { auto pDevice = castToObject(devices[0]); auto size = zeroSized ? 0 : 1024; MockKernelWithInternals mockKernel(*pDevice, context); SPatchMediaVFEState mediaVFEstate; mediaVFEstate.PerThreadScratchSpace = size; mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate; size *= pDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * mockKernel.mockKernel->getKernelInfo().getMaxSimdSize(); mockKernel.mockKernel->initialize(); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[REGISTER_PRESSURE_TOO_HIGH], mockKernel.mockKernel->getKernelInfo().name.c_str(), size); EXPECT_EQ(!zeroSized, containsHint(expectedHint, userData)); } TEST_P(PerformanceHintKernelTest, GivenPrivateSurfaceWhenKernelIsInitializedThenContextProvidesProperHint) { auto pDevice = castToObject(devices[0]); static_cast(pDevice->getMemoryManager())->turnOnFakingBigAllocations(); auto size = zeroSized ? 0 : 1024; MockKernelWithInternals mockKernel(*pDevice, context); SPatchAllocateStatelessPrivateSurface allocateStatelessPrivateMemorySurface; allocateStatelessPrivateMemorySurface.PerThreadPrivateMemorySize = size; allocateStatelessPrivateMemorySurface.SurfaceStateHeapOffset = 128; allocateStatelessPrivateMemorySurface.DataParamOffset = 16; allocateStatelessPrivateMemorySurface.DataParamSize = 8; mockKernel.kernelInfo.patchInfo.pAllocateStatelessPrivateSurface = &allocateStatelessPrivateMemorySurface; size *= pDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * mockKernel.mockKernel->getKernelInfo().getMaxSimdSize(); mockKernel.mockKernel->initialize(); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[PRIVATE_MEMORY_USAGE_TOO_HIGH], mockKernel.mockKernel->getKernelInfo().name.c_str(), size); EXPECT_EQ(!zeroSized, containsHint(expectedHint, userData)); } INSTANTIATE_TEST_CASE_P( DriverDiagnosticsTests, VerboseLevelTest, testing::ValuesIn(diagnosticsVerboseLevels)); INSTANTIATE_TEST_CASE_P( DriverDiagnosticsTests, PerformanceHintBufferTest, testing::Combine( ::testing::Bool(), ::testing::Bool())); INSTANTIATE_TEST_CASE_P( DriverDiagnosticsTests, PerformanceHintCommandQueueTest, testing::Combine( ::testing::Bool(), ::testing::Bool())); INSTANTIATE_TEST_CASE_P( DriverDiagnosticsTests, PerformanceHintKernelTest, testing::Bool()); TEST(PerformanceHintsDebugVariables, givenDefaultDebugManagerWhenPrintDriverDiagnosticsIsCalledThenMinusOneIsReturned) { EXPECT_EQ(-1, DebugManager.flags.PrintDriverDiagnostics.get()); } TEST(PerformanceHintsTransferTest, givenCommandTypeAndMemoryTransferRequiredWhenAskingForHintThenReturnCorrectValue) { DriverDiagnostics driverDiagnostics(0); const uint32_t numHints = 8; std::tuple commandHints[numHints] = { // commandType, transfer required, transfer not required std::make_tuple(CL_COMMAND_MAP_BUFFER, CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA, CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA), std::make_tuple(CL_COMMAND_MAP_IMAGE, CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA, CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA), std::make_tuple(CL_COMMAND_UNMAP_MEM_OBJECT, CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA, CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA), std::make_tuple(CL_COMMAND_WRITE_BUFFER, CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA, CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA), std::make_tuple(CL_COMMAND_READ_BUFFER, CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA, CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA), std::make_tuple(CL_COMMAND_WRITE_BUFFER_RECT, CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA, CL_ENQUEUE_WRITE_BUFFER_RECT_DOESNT_REQUIRE_COPY_DATA), std::make_tuple(CL_COMMAND_READ_BUFFER_RECT, CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA, CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_REQUIRES_COPY_DATA), std::make_tuple(CL_COMMAND_WRITE_IMAGE, CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA, CL_ENQUEUE_WRITE_IMAGE_DOESNT_REQUIRES_COPY_DATA), }; for (uint32_t i = 0; i < numHints; i++) { auto hintWithTransferRequired = driverDiagnostics.obtainHintForTransferOperation(std::get<0>(commandHints[i]), true); auto hintWithoutTransferRequired = driverDiagnostics.obtainHintForTransferOperation(std::get<0>(commandHints[i]), false); EXPECT_EQ(std::get<1>(commandHints[i]), hintWithTransferRequired); EXPECT_EQ(std::get<2>(commandHints[i]), hintWithoutTransferRequired); } EXPECT_THROW(driverDiagnostics.obtainHintForTransferOperation(CL_COMMAND_READ_IMAGE, true), std::exception); // no hint for this scenario EXPECT_EQ(CL_ENQUEUE_READ_IMAGE_DOESNT_REQUIRES_COPY_DATA, driverDiagnostics.obtainHintForTransferOperation(CL_COMMAND_READ_IMAGE, false)); } TEST_F(DriverDiagnosticsTest, givenInvalidCommandTypeWhenAskingForZeroCopyOperatonThenAbort) { cl_device_id deviceId = devices[0]; cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0}; auto context = std::unique_ptr(Context::create(validProperties, ClDeviceVector(&deviceId, 1), callbackFunction, (void *)userData, retVal)); auto buffer = std::unique_ptr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto address = reinterpret_cast(0x12345); EXPECT_THROW(context->providePerformanceHintForMemoryTransfer(CL_COMMAND_BARRIER, true, buffer.get(), address), std::exception); } compute-runtime-20.13.16352/opencl/test/unit_test/context/driver_diagnostics_tests.h000066400000000000000000000233511363734646600305770ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/aligned_memory.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/context/context.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/fixtures/program_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "gtest/gtest.h" using namespace NEO; const int maxHintCounter = 6; bool containsHint(const char *providedHint, char *userData); void CL_CALLBACK callbackFunction(const char *providedHint, const void *flags, size_t size, void *userData); struct DriverDiagnosticsTest : public PlatformFixture, public ::testing::Test { using PlatformFixture::SetUp; void SetUp() override { PlatformFixture::SetUp(); memset(userData, 0, maxHintCounter * DriverDiagnostics::maxHintStringSize); } void TearDown() override { PlatformFixture::TearDown(); } cl_int retVal = CL_SUCCESS; char userData[maxHintCounter * DriverDiagnostics::maxHintStringSize]{}; char expectedHint[DriverDiagnostics::maxHintStringSize]{}; }; struct VerboseLevelTest : public DriverDiagnosticsTest, public ::testing::WithParamInterface { void SetUp() override { DriverDiagnosticsTest::SetUp(); hintId = CL_BUFFER_NEEDS_ALLOCATE_MEMORY; snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[hintId], 0); } void TearDown() override { DriverDiagnosticsTest::TearDown(); } std::vector validLevels{ CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL}; PerformanceHints hintId; }; struct PerformanceHintTest : public DriverDiagnosticsTest, public CommandQueueHwFixture { void SetUp() override { DriverDiagnosticsTest::SetUp(); cl_device_id deviceID = devices[0]; cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0}; context = Context::create(validProperties, ClDeviceVector(&deviceID, 1), callbackFunction, (void *)userData, retVal); EXPECT_EQ(CL_SUCCESS, retVal); } void TearDown() override { CommandQueueHwFixture::TearDown(); DriverDiagnosticsTest::TearDown(); } }; struct PerformanceHintBufferTest : public PerformanceHintTest, public ::testing::WithParamInterface> { void SetUp() override { PerformanceHintTest::SetUp(); std::tie(alignedAddress, alignedSize) = GetParam(); address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); } void TearDown() override { delete buffer; alignedFree(address); PerformanceHintTest::TearDown(); } bool alignedSize = false; bool alignedAddress = false; void *address = nullptr; Buffer *buffer = nullptr; }; struct PerformanceHintCommandQueueTest : public PerformanceHintTest, public ::testing::WithParamInterface> { void SetUp() override { PerformanceHintTest::SetUp(); std::tie(profilingEnabled, preemptionSupported) = GetParam(); static_cast(context->getDevice(0))->deviceInfo.preemptionSupported = preemptionSupported; } void TearDown() override { clReleaseCommandQueue(cmdQ); PerformanceHintTest::TearDown(); } cl_command_queue cmdQ = nullptr; bool profilingEnabled = false; bool preemptionSupported = false; }; struct PerformanceHintEnqueueTest : public PerformanceHintTest { void SetUp() override { PerformanceHintTest::SetUp(); pCmdQ = createCommandQueue(pPlatform->getClDevice(0)); } void TearDown() override { PerformanceHintTest::TearDown(); } }; struct PerformanceHintEnqueueBufferTest : public PerformanceHintEnqueueTest { void SetUp() override { PerformanceHintEnqueueTest::SetUp(); address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); buffer = Buffer::create( context, CL_MEM_USE_HOST_PTR, MemoryConstants::cacheLineSize, address, retVal); } void TearDown() override { delete buffer; alignedFree(address); PerformanceHintEnqueueTest::TearDown(); } void *address = nullptr; Buffer *buffer = nullptr; }; struct PerformanceHintEnqueueReadBufferTest : public PerformanceHintEnqueueBufferTest, public ::testing::WithParamInterface> { void SetUp() override { PerformanceHintEnqueueBufferTest::SetUp(); std::tie(alignedAddress, alignedSize) = GetParam(); } void TearDown() override { PerformanceHintEnqueueBufferTest::TearDown(); } bool alignedSize = false; bool alignedAddress = false; }; struct PerformanceHintEnqueueImageTest : public PerformanceHintEnqueueTest { void SetUp() override { PerformanceHintEnqueueTest::SetUp(); address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); image = ImageHelper>::create(context); zeroCopyImage.reset(ImageHelper::create(context)); } void TearDown() override { delete image; zeroCopyImage.reset(nullptr); alignedFree(address); PerformanceHintEnqueueTest::TearDown(); } void *address = nullptr; Image *image = nullptr; std::unique_ptr zeroCopyImage; }; struct PerformanceHintEnqueueReadImageTest : public PerformanceHintEnqueueImageTest, public ::testing::WithParamInterface> { void SetUp() override { PerformanceHintEnqueueImageTest::SetUp(); std::tie(alignedAddress, alignedSize) = GetParam(); } void TearDown() override { PerformanceHintEnqueueImageTest::TearDown(); } bool alignedSize = false; bool alignedAddress = false; }; struct PerformanceHintEnqueueMapTest : public PerformanceHintEnqueueTest, public ::testing::WithParamInterface { void SetUp() override { PerformanceHintEnqueueTest::SetUp(); } void TearDown() override { PerformanceHintEnqueueTest::TearDown(); } }; struct PerformanceHintEnqueueKernelTest : public PerformanceHintEnqueueTest, public ProgramFixture { void SetUp() override { PerformanceHintEnqueueTest::SetUp(); cl_device_id device = pPlatform->getClDevice(0); CreateProgramFromBinary(context, &device, "CopyBuffer_simd32"); retVal = pProgram->build(1, &device, nullptr, nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); kernel = Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal); globalWorkGroupSize[0] = globalWorkGroupSize[1] = globalWorkGroupSize[2] = 1; } void TearDown() override { delete kernel; ProgramFixture::TearDown(); PerformanceHintEnqueueTest::TearDown(); } Kernel *kernel = nullptr; size_t globalWorkGroupSize[3]{}; }; struct PerformanceHintEnqueueKernelBadSizeTest : public PerformanceHintEnqueueKernelTest, public ::testing::WithParamInterface { void SetUp() override { PerformanceHintEnqueueKernelTest::SetUp(); globalWorkGroupSize[0] = globalWorkGroupSize[1] = globalWorkGroupSize[2] = 32; } void TearDown() override { PerformanceHintEnqueueKernelTest::TearDown(); } }; struct PerformanceHintEnqueueKernelPrintfTest : public PerformanceHintEnqueueTest, public ProgramFixture { void SetUp() override { PerformanceHintEnqueueTest::SetUp(); cl_device_id device = pPlatform->getClDevice(0); CreateProgramFromBinary(context, &device, "printf"); retVal = pProgram->build(1, &device, nullptr, nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); kernel = Kernel::create(pProgram, *pProgram->getKernelInfo("test"), &retVal); globalWorkGroupSize[0] = globalWorkGroupSize[1] = globalWorkGroupSize[2] = 1; } void TearDown() override { delete kernel; ProgramFixture::TearDown(); PerformanceHintEnqueueTest::TearDown(); } Kernel *kernel = nullptr; size_t globalWorkGroupSize[3]{}; }; struct PerformanceHintKernelTest : public PerformanceHintTest, public ::testing::WithParamInterface { void SetUp() override { PerformanceHintTest::SetUp(); zeroSized = GetParam(); } void TearDown() override { PerformanceHintTest::TearDown(); } bool zeroSized = false; }; compute-runtime-20.13.16352/opencl/test/unit_test/context/get_supported_image_formats_tests.cpp000066400000000000000000000364661363734646600330440ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "gtest/gtest.h" #include using namespace NEO; struct GetSupportedImageFormatsTest : public PlatformFixture, public ContextFixture, public ::testing::TestWithParam> { using ContextFixture::SetUp; using PlatformFixture::SetUp; GetSupportedImageFormatsTest() { } void SetUp() override { PlatformFixture::SetUp(); ContextFixture::SetUp(num_devices, devices); } void TearDown() override { ContextFixture::TearDown(); PlatformFixture::TearDown(); } cl_int retVal = CL_SUCCESS; }; TEST_P(GetSupportedImageFormatsTest, checkNumImageFormats) { cl_uint numImageFormats = 0; uint64_t imageFormatsFlags; uint32_t imageFormats; std::tie(imageFormatsFlags, imageFormats) = GetParam(); retVal = pContext->getSupportedImageFormats( &castToObject(devices[0])->getDevice(), imageFormatsFlags, imageFormats, 0, nullptr, &numImageFormats); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GT(numImageFormats, 0u); } TEST_P(GetSupportedImageFormatsTest, retrieveImageFormats) { cl_uint numImageFormats = 0; uint64_t imageFormatsFlags; uint32_t imageFormats; std::tie(imageFormatsFlags, imageFormats) = GetParam(); retVal = pContext->getSupportedImageFormats( &castToObject(devices[0])->getDevice(), imageFormatsFlags, imageFormats, 0, nullptr, &numImageFormats); EXPECT_GT(numImageFormats, 0u); auto imageFormatList = new cl_image_format[numImageFormats]; memset(imageFormatList, 0, numImageFormats * sizeof(cl_image_format)); retVal = pContext->getSupportedImageFormats( &castToObject(devices[0])->getDevice(), imageFormatsFlags, imageFormats, numImageFormats, imageFormatList, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); for (cl_uint entry = 0; entry < numImageFormats; ++entry) { EXPECT_NE(0u, imageFormatList[entry].image_channel_order); EXPECT_NE(0u, imageFormatList[entry].image_channel_data_type); } delete[] imageFormatList; } TEST_P(GetSupportedImageFormatsTest, retrieveImageFormatsSRGB) { cl_uint numImageFormats = 0; uint64_t imageFormatsFlags; uint32_t imageFormats; bool sRGBAFormatFound = false; bool sBGRAFormatFound = false; bool isReadOnly = false; std::tie(imageFormatsFlags, imageFormats) = GetParam(); retVal = pContext->getSupportedImageFormats( &castToObject(devices[0])->getDevice(), imageFormatsFlags, imageFormats, 0, nullptr, &numImageFormats); EXPECT_GT(numImageFormats, 0u); auto imageFormatList = new cl_image_format[numImageFormats]; memset(imageFormatList, 0, numImageFormats * sizeof(cl_image_format)); retVal = pContext->getSupportedImageFormats( &castToObject(devices[0])->getDevice(), imageFormatsFlags, imageFormats, numImageFormats, imageFormatList, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); isReadOnly |= (imageFormatsFlags == CL_MEM_READ_ONLY); for (cl_uint entry = 0; entry < numImageFormats; ++entry) { EXPECT_NE(0u, imageFormatList[entry].image_channel_order); EXPECT_NE(0u, imageFormatList[entry].image_channel_data_type); if (imageFormatList[entry].image_channel_order == CL_sRGBA) { sRGBAFormatFound = true; } if (imageFormatList[entry].image_channel_order == CL_sBGRA) { sBGRAFormatFound = true; } } if (isReadOnly && ((&castToObject(devices[0])->getDevice())->getHardwareInfo().capabilityTable.clVersionSupport >= 20)) { EXPECT_TRUE(sRGBAFormatFound & sBGRAFormatFound); } else { EXPECT_FALSE(sRGBAFormatFound | sBGRAFormatFound); } delete[] imageFormatList; } TEST(ImageFormats, isDepthFormat) { for (auto &format : SurfaceFormats::readOnly20()) { EXPECT_FALSE(Image::isDepthFormat(format.OCLImageFormat)); } for (auto &format : SurfaceFormats::readOnlyDepth()) { EXPECT_TRUE(Image::isDepthFormat(format.OCLImageFormat)); } } struct PackedYuvExtensionSupportedImageFormatsTest : public ::testing::TestWithParam> { void SetUp() override { device = std::make_unique(new MockDevice()); context = std::unique_ptr(new MockContext(device.get(), true)); } void TearDown() override { } std::unique_ptr device; std::unique_ptr context; cl_int retVal; }; TEST_P(PackedYuvExtensionSupportedImageFormatsTest, retrieveImageFormatsPackedYUV) { cl_uint numImageFormats = 0; uint64_t imageFormatsFlags; uint32_t imageFormats; bool YUYVFormatFound = false; bool UYVYFormatFound = false; bool YVYUFormatFound = false; bool VYUYFormatFound = false; bool isReadOnly = false; std::tie(imageFormatsFlags, imageFormats) = GetParam(); device->deviceInfo.nv12Extension = false; device->deviceInfo.packedYuvExtension = true; retVal = context->getSupportedImageFormats( &device->getDevice(), imageFormatsFlags, imageFormats, 0, nullptr, &numImageFormats); EXPECT_GT(numImageFormats, 0u); auto imageFormatList = new cl_image_format[numImageFormats]; memset(imageFormatList, 0, numImageFormats * sizeof(cl_image_format)); retVal = context->getSupportedImageFormats( &device->getDevice(), imageFormatsFlags, imageFormats, numImageFormats, imageFormatList, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); isReadOnly |= (imageFormatsFlags == CL_MEM_READ_ONLY); for (cl_uint entry = 0; entry < numImageFormats; ++entry) { EXPECT_NE(0u, imageFormatList[entry].image_channel_order); EXPECT_NE(0u, imageFormatList[entry].image_channel_data_type); if (imageFormatList[entry].image_channel_order == CL_YUYV_INTEL) { YUYVFormatFound = true; } if (imageFormatList[entry].image_channel_order == CL_UYVY_INTEL) { UYVYFormatFound = true; } if (imageFormatList[entry].image_channel_order == CL_YVYU_INTEL) { YVYUFormatFound = true; } if (imageFormatList[entry].image_channel_order == CL_VYUY_INTEL) { VYUYFormatFound = true; } } if (isReadOnly && imageFormats == CL_MEM_OBJECT_IMAGE2D) { EXPECT_TRUE(YUYVFormatFound); EXPECT_TRUE(UYVYFormatFound); EXPECT_TRUE(YVYUFormatFound); EXPECT_TRUE(VYUYFormatFound); } else { EXPECT_FALSE(YUYVFormatFound); EXPECT_FALSE(UYVYFormatFound); EXPECT_FALSE(YVYUFormatFound); EXPECT_FALSE(VYUYFormatFound); } delete[] imageFormatList; } struct NV12ExtensionSupportedImageFormatsTest : public ::testing::TestWithParam> { void SetUp() override { device = std::make_unique(new MockDevice()); context = std::unique_ptr(new MockContext(device.get(), true)); } void TearDown() override { } std::unique_ptr device; std::unique_ptr context; cl_int retVal; }; typedef NV12ExtensionSupportedImageFormatsTest NV12ExtensionUnsupportedImageFormatsTest; TEST_P(NV12ExtensionSupportedImageFormatsTest, givenNV12ExtensionWhenQueriedForImageFormatsThenNV12FormatIsReturnedOnlyFor2DImages) { cl_uint numImageFormats = 0; uint64_t imageFormatsFlags; uint32_t imageFormats; bool Nv12FormatFound = false; std::tie(imageFormatsFlags, imageFormats) = GetParam(); device->deviceInfo.nv12Extension = true; device->deviceInfo.packedYuvExtension = false; retVal = context->getSupportedImageFormats( &device->getDevice(), imageFormatsFlags, imageFormats, 0, nullptr, &numImageFormats); unsigned int clVersionSupport = device.get()->getHardwareInfo().capabilityTable.clVersionSupport; size_t expectedNumReadOnlyFormats = (clVersionSupport >= 20) ? SurfaceFormats::readOnly20().size() : SurfaceFormats::readOnly12().size(); if (Image::isImage2dOr2dArray(imageFormats) && imageFormatsFlags == CL_MEM_READ_ONLY) { expectedNumReadOnlyFormats += SurfaceFormats::readOnlyDepth().size(); } if (Image::isImage2d(imageFormats)) { if (imageFormatsFlags == CL_MEM_READ_ONLY) { EXPECT_EQ(expectedNumReadOnlyFormats + SurfaceFormats::planarYuv().size(), static_cast(numImageFormats)); } if (imageFormatsFlags == CL_MEM_NO_ACCESS_INTEL) { EXPECT_EQ(expectedNumReadOnlyFormats + SurfaceFormats::planarYuv().size(), static_cast(numImageFormats)); } } else { if (imageFormatsFlags == CL_MEM_READ_ONLY) { EXPECT_EQ(expectedNumReadOnlyFormats, static_cast(numImageFormats)); } if (imageFormatsFlags == CL_MEM_NO_ACCESS_INTEL) { EXPECT_EQ(expectedNumReadOnlyFormats, static_cast(numImageFormats)); } } auto imageFormatList = new cl_image_format[numImageFormats]; memset(imageFormatList, 0, numImageFormats * sizeof(cl_image_format)); retVal = context->getSupportedImageFormats( &device->getDevice(), imageFormatsFlags, imageFormats, numImageFormats, imageFormatList, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); for (cl_uint entry = 0; entry < numImageFormats; ++entry) { EXPECT_NE(0u, imageFormatList[entry].image_channel_order); EXPECT_NE(0u, imageFormatList[entry].image_channel_data_type); if (imageFormatList[entry].image_channel_order == CL_NV12_INTEL) { Nv12FormatFound = true; } } if (imageFormats == CL_MEM_OBJECT_IMAGE2D) { EXPECT_TRUE(Nv12FormatFound); } else { EXPECT_FALSE(Nv12FormatFound); } delete[] imageFormatList; } TEST_P(NV12ExtensionUnsupportedImageFormatsTest, givenNV12ExtensionWhenQueriedForWriteOnlyOrReadWriteImageFormatsThenNV12FormatIsNotReturned) { cl_uint numImageFormats = 0; uint64_t imageFormatsFlags; uint32_t imageFormats; bool Nv12FormatFound = false; std::tie(imageFormatsFlags, imageFormats) = GetParam(); device->deviceInfo.nv12Extension = true; retVal = context->getSupportedImageFormats( &device->getDevice(), imageFormatsFlags, imageFormats, 0, nullptr, &numImageFormats); if (imageFormatsFlags == CL_MEM_WRITE_ONLY) { if (!Image::isImage2dOr2dArray(imageFormats)) { EXPECT_EQ(SurfaceFormats::writeOnly().size(), static_cast(numImageFormats)); } else { EXPECT_EQ(SurfaceFormats::writeOnly().size() + SurfaceFormats::readWriteDepth().size(), static_cast(numImageFormats)); } } if (imageFormatsFlags == CL_MEM_READ_WRITE) { if (!Image::isImage2dOr2dArray(imageFormats)) { EXPECT_EQ(SurfaceFormats::readWrite().size(), static_cast(numImageFormats)); } else { EXPECT_EQ(SurfaceFormats::readWrite().size() + SurfaceFormats::readWriteDepth().size(), static_cast(numImageFormats)); } } auto imageFormatList = new cl_image_format[numImageFormats]; memset(imageFormatList, 0, numImageFormats * sizeof(cl_image_format)); retVal = context->getSupportedImageFormats( &device->getDevice(), imageFormatsFlags, imageFormats, numImageFormats, imageFormatList, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); for (cl_uint entry = 0; entry < numImageFormats; ++entry) { EXPECT_NE(0u, imageFormatList[entry].image_channel_order); EXPECT_NE(0u, imageFormatList[entry].image_channel_data_type); if (imageFormatList[entry].image_channel_order == CL_NV12_INTEL) { Nv12FormatFound = true; } } EXPECT_FALSE(Nv12FormatFound); delete[] imageFormatList; } TEST_P(NV12ExtensionSupportedImageFormatsTest, retrieveLessImageFormatsThanAvailable) { cl_uint numImageFormats = 0; uint64_t imageFormatsFlags; uint32_t imageFormats; std::tie(imageFormatsFlags, imageFormats) = GetParam(); device->deviceInfo.nv12Extension = true; retVal = context->getSupportedImageFormats( &device->getDevice(), imageFormatsFlags, imageFormats, 0, nullptr, &numImageFormats); EXPECT_GT(numImageFormats, 0u); if (numImageFormats > 1) numImageFormats--; auto imageFormatList = new cl_image_format[numImageFormats]; memset(imageFormatList, 0, numImageFormats * sizeof(cl_image_format)); retVal = context->getSupportedImageFormats( &device->getDevice(), imageFormatsFlags, imageFormats, numImageFormats, imageFormatList, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); for (cl_uint entry = 0; entry < numImageFormats; ++entry) { EXPECT_NE(0u, imageFormatList[entry].image_channel_order); EXPECT_NE(0u, imageFormatList[entry].image_channel_data_type); } delete[] imageFormatList; } cl_mem_flags GetSupportedImageFormatsFlags[] = { CL_MEM_READ_WRITE, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY}; cl_mem_object_type GetSupportedImageFormats[] = { CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D}; INSTANTIATE_TEST_CASE_P( Context, GetSupportedImageFormatsTest, ::testing::Combine( ::testing::ValuesIn(GetSupportedImageFormatsFlags), ::testing::ValuesIn(GetSupportedImageFormats))); INSTANTIATE_TEST_CASE_P( Context, PackedYuvExtensionSupportedImageFormatsTest, ::testing::Combine( ::testing::ValuesIn(GetSupportedImageFormatsFlags), ::testing::ValuesIn(GetSupportedImageFormats))); cl_mem_flags NV12ExtensionSupportedImageFormatsFlags[] = { CL_MEM_NO_ACCESS_INTEL, CL_MEM_READ_ONLY}; cl_mem_flags NV12ExtensionUnsupportedImageFormatsFlags[] = { CL_MEM_READ_WRITE, CL_MEM_WRITE_ONLY}; cl_mem_object_type NV12ExtensionSupportedImageFormats[] = { CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D}; INSTANTIATE_TEST_CASE_P( Context, NV12ExtensionSupportedImageFormatsTest, ::testing::Combine( ::testing::ValuesIn(NV12ExtensionSupportedImageFormatsFlags), ::testing::ValuesIn(NV12ExtensionSupportedImageFormats))); INSTANTIATE_TEST_CASE_P( Context, NV12ExtensionUnsupportedImageFormatsTest, ::testing::Combine( ::testing::ValuesIn(NV12ExtensionUnsupportedImageFormatsFlags), ::testing::ValuesIn(NV12ExtensionSupportedImageFormats))); compute-runtime-20.13.16352/opencl/test/unit_test/context/gl/000077500000000000000000000000001363734646600237205ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/context/gl/CMakeLists.txt000066400000000000000000000005761363734646600264700ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(IGDRCL_SRCS_tests_context_gl ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/context_gl_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/context_gl_tests.h ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_context_gl}) endif() add_subdirectories() compute-runtime-20.13.16352/opencl/test/unit_test/context/gl/context_gl_tests.cpp000066400000000000000000000022101363734646600300070ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/context/gl/context_gl_tests.h" #include "opencl/source/sharings/gl/gl_sharing.h" #include "opencl/test/unit_test/mocks/mock_platform.h" namespace NEO { TEST_F(GlContextTest, GivenDefaultContextThenGlSharingIsDisabled) { ASSERT_EQ(context->getSharing(), nullptr); } TEST_F(GlContextTest, GivenGlContextParamWhenCreateContextThenInitSharingFunctions) { cl_device_id deviceID = devices[0]; auto pPlatform = NEO::platform(); cl_platform_id pid[1]; pid[0] = pPlatform; cl_context_properties validProperties[5] = {CL_CONTEXT_PLATFORM, (cl_context_properties)pid[0], CL_GL_CONTEXT_KHR, 0x10000, 0}; cl_int retVal = CL_SUCCESS; auto ctx = Context::create(validProperties, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, ctx); auto sharing = ctx->getSharing(); ASSERT_NE(nullptr, sharing); EXPECT_FALSE(context->getInteropUserSyncEnabled()); delete ctx; } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/context/gl/context_gl_tests.h000066400000000000000000000042001363734646600274550ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gtest/gtest.h" #include namespace NEO { struct GlContextTest : public PlatformFixture, public ::testing::Test { using PlatformFixture::SetUp; void SetUp() override { PlatformFixture::SetUp(); properties[0] = CL_CONTEXT_PLATFORM; properties[1] = reinterpret_cast(static_cast(pPlatform)); properties[2] = 0; context = Context::create(properties, ClDeviceVector(devices, num_devices), nullptr, nullptr, retVal); ASSERT_NE(nullptr, context); } void TearDown() override { delete context; PlatformFixture::TearDown(); } void testContextCreation(cl_context_properties contextType) { const cl_device_id deviceID = devices[0]; const auto platformId = reinterpret_cast(static_cast(platform())); const cl_context_properties propertiesOneContext[] = {CL_CONTEXT_PLATFORM, platformId, contextType, 0x10000, 0}; auto context = std::unique_ptr(Context::create(propertiesOneContext, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, context.get()); EXPECT_FALSE(context->getInteropUserSyncEnabled()); const cl_context_properties propertiesTwoContexts[] = {CL_CONTEXT_PLATFORM, platformId, contextType, 0x10000, contextType, 0x10000, 0}; context = std::unique_ptr(Context::create(propertiesTwoContexts, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, context.get()); EXPECT_FALSE(context->getInteropUserSyncEnabled()); } cl_int retVal = CL_SUCCESS; MockContext *context = nullptr; cl_context_properties properties[3] = {}; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/context/gl/windows/000077500000000000000000000000001363734646600254125ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/context/gl/windows/CMakeLists.txt000066400000000000000000000005111363734646600301470ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(IGDRCL_SRCS_tests_context_gl_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/context_gl_tests_windows.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_context_gl_windows}) endif() compute-runtime-20.13.16352/opencl/test/unit_test/context/gl/windows/context_gl_tests_windows.cpp000066400000000000000000000012261363734646600332610ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/context/gl/context_gl_tests.h" namespace NEO { TEST_F(GlContextTest, GivenClGlContextWhenContextIsCreatedThenSuccess) { testContextCreation(CL_GL_CONTEXT_KHR); } TEST_F(GlContextTest, GivenEglContextWhenContextIsCreatedThenSuccess) { testContextCreation(CL_EGL_DISPLAY_KHR); } TEST_F(GlContextTest, GivenGlxContextWhenContextIsCreatedThenSuccess) { testContextCreation(CL_GLX_DISPLAY_KHR); } TEST_F(GlContextTest, GivenWglContextWhenContextIsCreatedThenSuccess) { testContextCreation(CL_WGL_HDC_KHR); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/core_unit_tests_files.cmake000066400000000000000000000016321363734646600272310ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # target_sources(igdrcl_tests PRIVATE ${NEO_SOURCE_DIR}/opencl/test/unit_test/core_unit_tests_files.cmake) append_sources_from_properties(NEO_CORE_UNIT_TESTS_SOURCES NEO_CORE_COMMAND_CONTAINER_TESTS NEO_CORE_DEBUG_SETTINGS_TESTS NEO_CORE_DIRECT_SUBMISSION_TESTS NEO_CORE_DIRECT_SUBMISSION_DISPATCHERS_TESTS NEO_CORE_INDIRECT_HEAP_TESTS NEO_CORE_ENCODERS_TESTS NEO_CORE_IMAGE_TESTS ) if(WIN32) append_sources_from_properties(NEO_CORE_UNIT_TESTS_SOURCES NEO_CORE_DIRECT_SUBMISSION_WINDOWS_TESTS NEO_CORE_OS_INTERFACE_TESTS_WINDOWS ) else() append_sources_from_properties(NEO_CORE_UNIT_TESTS_SOURCES NEO_CORE_DIRECT_SUBMISSION_LINUX_TESTS ) endif() set_property(GLOBAL PROPERTY NEO_CORE_UNIT_TESTS_SOURCES ${NEO_CORE_UNIT_TESTS_SOURCES}) target_sources(igdrcl_tests PRIVATE ${NEO_CORE_UNIT_TESTS_SOURCES}) compute-runtime-20.13.16352/opencl/test/unit_test/custom_event_listener.h000066400000000000000000000113301363734646600264210ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "gtest/gtest.h" #include #include #include #include #include std::string lastTest(""); class CCustomEventListener : public ::testing::TestEventListener { public: CCustomEventListener(::testing::TestEventListener *listener) : hardwarePrefix("---") { _listener = listener; } CCustomEventListener(::testing::TestEventListener *listener, const char *hardwarePrefix) : _listener(listener), hardwarePrefix(hardwarePrefix) { std::transform(this->hardwarePrefix.begin(), this->hardwarePrefix.end(), this->hardwarePrefix.begin(), [](unsigned char c) { return std::toupper(c); }); } private: void OnTestProgramStart(const ::testing::UnitTest &unitTest) override { } void OnTestIterationStart( const ::testing::UnitTest &unitTest, int iteration) override { if (::testing::GTEST_FLAG(shuffle)) { std::cout << "Iteration: " << iteration + 1 << ". random_seed: " << unitTest.random_seed() << std::endl; } else { std::cout << "Iteration: " << iteration + 1 << std::endl; std::cout << "Iteration: " << iteration + 1 << ". random_seed: " << unitTest.random_seed() << std::endl; } this->currentSeed = unitTest.random_seed(); } void OnTestIterationEnd(const ::testing::UnitTest &unitTest, int iteration) override { this->currentSeed = -1; } void OnEnvironmentsSetUpStart(const ::testing::UnitTest &unitTest) override { } void OnEnvironmentsSetUpEnd(const ::testing::UnitTest &unitTest) override { } void OnTestCaseStart(const ::testing::TestCase &testCase) override { } void OnTestStart(const ::testing::TestInfo &testCase) override { std::stringstream ss; ss << testCase.test_case_name() << "." << testCase.name(); lastTest = ss.str(); } void OnTestPartResult(const ::testing::TestPartResult &testPartResult) override { if (testPartResult.failed()) { printf("\n"); } _listener->OnTestPartResult(testPartResult); } void OnTestEnd(const ::testing::TestInfo &testCase) override { if (testCase.result()->Failed()) { std::stringstream ss; ss << testCase.test_case_name() << "." << testCase.name(); testFailures.push_back(std::make_pair(ss.str(), currentSeed)); std::cout << "[ FAILED ][ " << hardwarePrefix << " ][ " << currentSeed << " ] " << testCase.test_case_name() << "." << testCase.name() << std::endl; } } void OnTestCaseEnd(const ::testing::TestCase &testCase) override { } void OnEnvironmentsTearDownStart(const ::testing::UnitTest &testCase) override { } void OnEnvironmentsTearDownEnd(const ::testing::UnitTest &testCase) override { } void OnTestProgramEnd(const ::testing::UnitTest &unitTest) override { int testsRun = unitTest.test_to_run_count(); int testsPassed = unitTest.successful_test_count(); int testsSkipped = unitTest.skipped_test_count(); int testsFailed = unitTest.failed_test_count(); int testsDisabled = unitTest.disabled_test_count(); auto timeElapsed = static_cast(unitTest.elapsed_time()); if (unitTest.Failed()) { fprintf( stdout, "\n" "=====================\n" "== ULTs FAILED ==\n" "=====================\n"); } else { fprintf( stdout, "\n" "=====================\n" "== ULTs PASSED ==\n" "=====================\n"); } fprintf( stdout, "Tests run: %d\n" "Tests passed: %d\n" "Tests skipped: %d\n" "Tests failed: %d\n" "Tests disabled: %d\n" " Time elapsed: %d ms\n" "=====================\n", testsRun, testsPassed, testsSkipped, testsFailed, testsDisabled, timeElapsed); for (auto failure : testFailures) fprintf( stdout, "[ FAILED ][ %s ][ %u ] %s\n", hardwarePrefix.c_str(), failure.second, failure.first.c_str()); if (unitTest.Failed()) fprintf( stdout, "\n"); fflush(stdout); } ::testing::TestEventListener *_listener; std::vector> testFailures; int currentSeed = -1; std::string hardwarePrefix; }; compute-runtime-20.13.16352/opencl/test/unit_test/d3d_sharing/000077500000000000000000000000001363734646600240175ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/d3d_sharing/CMakeLists.txt000066400000000000000000000007641363734646600265660ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_d3d_sharing ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/d3d9_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/d3d_tests_part1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/d3d_tests_part2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/d3d_aux_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_dx_sharing_tests.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_d3d_sharing}) endif() compute-runtime-20.13.16352/opencl/test/unit_test/d3d_sharing/cl_dx_sharing_tests.cpp000066400000000000000000000304461363734646600305600ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/utilities/arrayref.h" #include "opencl/source/sharings/d3d/cl_d3d_api.h" #include "opencl/source/sharings/d3d/d3d_buffer.h" #include "opencl/source/sharings/d3d/d3d_sharing.h" #include "opencl/source/sharings/d3d/d3d_surface.h" #include "opencl/source/sharings/d3d/d3d_texture.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_d3d_objects.h" #include "test.h" #include "gmock/gmock.h" #include "gtest/gtest.h" static const DXGI_FORMAT DXGIformats[] = { DXGI_FORMAT_R32G32B32A32_TYPELESS, DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_UINT, DXGI_FORMAT_R32G32B32A32_SINT, DXGI_FORMAT_R32G32B32_TYPELESS, DXGI_FORMAT_R32G32B32_FLOAT, DXGI_FORMAT_R32G32B32_UINT, DXGI_FORMAT_R32G32B32_SINT, DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_R16G16B16A16_UINT, DXGI_FORMAT_R16G16B16A16_SNORM, DXGI_FORMAT_R16G16B16A16_SINT, DXGI_FORMAT_R32G32_TYPELESS, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32_UINT, DXGI_FORMAT_R32G32_SINT, DXGI_FORMAT_R32G8X24_TYPELESS, DXGI_FORMAT_D32_FLOAT_S8X24_UINT, DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS, DXGI_FORMAT_X32_TYPELESS_G8X24_UINT, DXGI_FORMAT_R10G10B10A2_TYPELESS, DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UINT, DXGI_FORMAT_R11G11B10_FLOAT, DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R8G8B8A8_SNORM, DXGI_FORMAT_R8G8B8A8_SINT, DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16_SNORM, DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_R32_TYPELESS, DXGI_FORMAT_D32_FLOAT, DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_UINT, DXGI_FORMAT_R32_SINT, DXGI_FORMAT_R24G8_TYPELESS, DXGI_FORMAT_D24_UNORM_S8_UINT, DXGI_FORMAT_R24_UNORM_X8_TYPELESS, DXGI_FORMAT_X24_TYPELESS_G8_UINT, DXGI_FORMAT_R8G8_TYPELESS, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8_SNORM, DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R16_TYPELESS, DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_D16_UNORM, DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_SNORM, DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_R8_SINT, DXGI_FORMAT_A8_UNORM, DXGI_FORMAT_R1_UNORM, DXGI_FORMAT_R9G9B9E5_SHAREDEXP, DXGI_FORMAT_R8G8_B8G8_UNORM, DXGI_FORMAT_G8R8_G8B8_UNORM, DXGI_FORMAT_BC1_TYPELESS, DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM_SRGB, DXGI_FORMAT_BC2_TYPELESS, DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM_SRGB, DXGI_FORMAT_BC3_TYPELESS, DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_UNORM_SRGB, DXGI_FORMAT_BC4_TYPELESS, DXGI_FORMAT_BC4_UNORM, DXGI_FORMAT_BC4_SNORM, DXGI_FORMAT_BC5_TYPELESS, DXGI_FORMAT_BC5_UNORM, DXGI_FORMAT_BC5_SNORM, DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_B5G5R5A1_UNORM, DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_FORMAT_B8G8R8X8_UNORM, DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM, DXGI_FORMAT_B8G8R8A8_TYPELESS, DXGI_FORMAT_B8G8R8A8_UNORM_SRGB, DXGI_FORMAT_B8G8R8X8_TYPELESS, DXGI_FORMAT_B8G8R8X8_UNORM_SRGB, DXGI_FORMAT_BC6H_TYPELESS, DXGI_FORMAT_BC6H_UF16, DXGI_FORMAT_BC6H_SF16, DXGI_FORMAT_BC7_TYPELESS, DXGI_FORMAT_BC7_UNORM, DXGI_FORMAT_BC7_UNORM_SRGB, DXGI_FORMAT_AYUV, DXGI_FORMAT_Y410, DXGI_FORMAT_Y416, DXGI_FORMAT_NV12, DXGI_FORMAT_P010, DXGI_FORMAT_P016, DXGI_FORMAT_420_OPAQUE, DXGI_FORMAT_YUY2, DXGI_FORMAT_Y210, DXGI_FORMAT_Y216, DXGI_FORMAT_NV11, DXGI_FORMAT_AI44, DXGI_FORMAT_IA44, DXGI_FORMAT_P8, DXGI_FORMAT_A8P8, DXGI_FORMAT_B4G4R4A4_UNORM, DXGI_FORMAT_P208, DXGI_FORMAT_V208, DXGI_FORMAT_V408, DXGI_FORMAT_FORCE_UINT}; template struct clIntelSharingFormatQueryDX1X : public PlatformFixture, public ::testing::Test { std::vector retrievedFormats; ArrayRef availableFormats; NiceMock> *mockSharingFcns; MockContext *context; cl_uint numImageFormats; cl_int retVal; size_t retSize; void SetUp() override { PlatformFixture::SetUp(); context = new MockContext(pPlatform->getClDevice(0)); mockSharingFcns = new NiceMock>(); context->setSharingFunctions(mockSharingFcns); availableFormats = ArrayRef(DXGIformats); retrievedFormats.assign(availableFormats.size(), DXGI_FORMAT_UNKNOWN); } void TearDown() override { delete context; PlatformFixture::TearDown(); } }; typedef clIntelSharingFormatQueryDX1X clIntelSharingFormatQueryDX10; typedef clIntelSharingFormatQueryDX1X clIntelSharingFormatQueryDX11; TEST_F(clIntelSharingFormatQueryDX10, givenInvalidContextWhenDX10TextureFormatsRequestedThenInvalidContextError) { retVal = clGetSupportedD3D10TextureFormatsINTEL(NULL, CL_MEM_READ_WRITE, 0, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(clIntelSharingFormatQueryDX10, givenValidParametersWhenRequestedDX10TextureFormatsThenTheResultIsASubsetOfKnownFormats) { retVal = clGetSupportedD3D10TextureFormatsINTEL(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); ASSERT_EQ(retVal, CL_SUCCESS); for (cl_uint i = 0; i < numImageFormats; ++i) { EXPECT_NE(std::find(availableFormats.begin(), availableFormats.end(), retrievedFormats[i]), availableFormats.end()); } } TEST_F(clIntelSharingFormatQueryDX10, givenValidParametersWhenRequestedDX10TextureFormatsTwiceThenTheResultsAreTheSame) { retVal = clGetSupportedD3D10TextureFormatsINTEL(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); ASSERT_EQ(retVal, CL_SUCCESS); std::vector formatsRetrievedForTheSecondTime(availableFormats.size()); cl_uint anotherNumImageFormats; retVal = clGetSupportedD3D10TextureFormatsINTEL(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, static_cast(formatsRetrievedForTheSecondTime.size()), &formatsRetrievedForTheSecondTime[0], &anotherNumImageFormats); ASSERT_EQ(retVal, CL_SUCCESS); ASSERT_EQ(numImageFormats, anotherNumImageFormats); ASSERT_EQ(memcmp(&retrievedFormats[0], &formatsRetrievedForTheSecondTime[0], numImageFormats * sizeof(DXGI_FORMAT)), 0); } TEST_F(clIntelSharingFormatQueryDX10, givenNullFormatsWhenRequestedDX10TextureFormatsThenNumImageFormatsIsSane) { retVal = clGetSupportedD3D10TextureFormatsINTEL(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, nullptr, &numImageFormats); ASSERT_EQ(retVal, CL_SUCCESS); ASSERT_LE(0U, numImageFormats); ASSERT_LE(numImageFormats, static_cast(availableFormats.size())); } TEST_F(clIntelSharingFormatQueryDX10, givenNullPointersWhenRequestedDX10TextureFormatsThenCLSuccessIsReturned) { retVal = clGetSupportedD3D10TextureFormatsINTEL(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, static_cast(retrievedFormats.size()), nullptr, nullptr); ASSERT_EQ(retVal, CL_SUCCESS); } TEST_F(clIntelSharingFormatQueryDX11, givenInvalidContextWhenDX11TextureFormatsRequestedThenInvalidContextError) { retVal = clGetSupportedD3D11TextureFormatsINTEL(nullptr, CL_MEM_READ_WRITE, 0, 0, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(clIntelSharingFormatQueryDX11, givenValidParametersWhenRequestedDX11TextureFormatsThenTheResultIsASubsetOfKnownFormats) { retVal = clGetSupportedD3D11TextureFormatsINTEL(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); ASSERT_EQ(retVal, CL_SUCCESS); for (cl_uint i = 0; i < numImageFormats; ++i) { EXPECT_NE(std::find(availableFormats.begin(), availableFormats.end(), retrievedFormats[i]), availableFormats.end()); } } TEST_F(clIntelSharingFormatQueryDX11, givenNullFormatsWhenRequestedDX11TextureFormatsThenNumImageFormatsIsSane) { retVal = clGetSupportedD3D11TextureFormatsINTEL(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, 0, nullptr, &numImageFormats); ASSERT_EQ(retVal, CL_SUCCESS); ASSERT_LE(0U, numImageFormats); ASSERT_LE(numImageFormats, static_cast(availableFormats.size())); } TEST_F(clIntelSharingFormatQueryDX11, givenNullPointersWhenRequestedDX11TextureFormatsThenCLSuccessIsReturned) { retVal = clGetSupportedD3D11TextureFormatsINTEL(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, static_cast(retrievedFormats.size()), nullptr, nullptr); ASSERT_EQ(retVal, CL_SUCCESS); } TEST_F(clIntelSharingFormatQueryDX11, givenValidParametersWhenRequestedDX11TextureFormatsTwiceThenTheResultsAreTheSame) { retVal = clGetSupportedD3D11TextureFormatsINTEL(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); ASSERT_EQ(retVal, CL_SUCCESS); std::vector formatsRetrievedForTheSecondTime(availableFormats.size()); cl_uint anotherNumImageFormats; retVal = clGetSupportedD3D11TextureFormatsINTEL(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, static_cast(formatsRetrievedForTheSecondTime.size()), &formatsRetrievedForTheSecondTime[0], &anotherNumImageFormats); ASSERT_EQ(retVal, CL_SUCCESS); ASSERT_EQ(numImageFormats, anotherNumImageFormats); ASSERT_EQ(memcmp(&retrievedFormats[0], &formatsRetrievedForTheSecondTime[0], numImageFormats * sizeof(DXGI_FORMAT)), 0); } TEST_F(clIntelSharingFormatQueryDX11, givenValidParametersWhenRequestingDX11TextureFormatsForPlane1ThenPlanarFormatsAeReturned) { auto checkFormat = [](DXGI_FORMAT format, UINT *pFormat) -> void { *pFormat = D3D11_FORMAT_SUPPORT_TEXTURE2D; }; ON_CALL(*mockSharingFcns, checkFormatSupport(::testing::_, ::testing::_)).WillByDefault(::testing::Invoke(checkFormat)); ON_CALL(*mockSharingFcns, memObjectFormatSupport(::testing::_, ::testing::_)).WillByDefault(::testing::Return(true)); retVal = clGetSupportedD3D11TextureFormatsINTEL(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 1, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); EXPECT_EQ(retVal, CL_SUCCESS); std::vector expectedPlanarFormats = {DXGI_FORMAT_NV12, DXGI_FORMAT_P010, DXGI_FORMAT_P016}; EXPECT_EQ(expectedPlanarFormats.size(), numImageFormats); for (auto format : expectedPlanarFormats) { auto found = std::find(retrievedFormats.begin(), retrievedFormats.end(), format); EXPECT_NE(found, retrievedFormats.end()); } } compute-runtime-20.13.16352/opencl/test/unit_test/d3d_sharing/d3d9_tests.cpp000066400000000000000000001704201363734646600265140ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/api/api.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/source/os_interface/windows/d3d_sharing_functions.h" #include "opencl/source/sharings/d3d/cl_d3d_api.h" #include "opencl/source/sharings/d3d/d3d_surface.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_d3d_objects.h" #include "opencl/test/unit_test/mocks/mock_gmm.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "test.h" namespace NEO { template <> uint32_t MockD3DSharingFunctions::getDxgiDescCalled = 0; template <> DXGI_ADAPTER_DESC MockD3DSharingFunctions::mockDxgiDesc = {{0}}; template <> IDXGIAdapter *MockD3DSharingFunctions::getDxgiDescAdapterRequested = nullptr; class MockMM : public OsAgnosticMemoryManager { public: MockMM(const ExecutionEnvironment &executionEnvironment) : OsAgnosticMemoryManager(const_cast(executionEnvironment)){}; GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness) override { auto alloc = OsAgnosticMemoryManager::createGraphicsAllocationFromSharedHandle(handle, properties, requireSpecificBitness); alloc->setDefaultGmm(forceGmm); gmmOwnershipPassed = true; return alloc; } GraphicsAllocation *allocateGraphicsMemoryForImage(const AllocationData &allocationData) override { auto gmm = std::make_unique(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), *allocationData.imgInfo, StorageInfo{}); AllocationProperties properties(allocationData.rootDeviceIndex, nullptr, false, GraphicsAllocation::AllocationType::SHARED_IMAGE, false); auto alloc = OsAgnosticMemoryManager::createGraphicsAllocationFromSharedHandle(1, properties, false); alloc->setDefaultGmm(forceGmm); gmmOwnershipPassed = true; return alloc; } void *lockResourceImpl(GraphicsAllocation &allocation) override { lockResourceCalled++; EXPECT_EQ(expectedLockingAllocation, &allocation); return lockResourceReturnValue; } void unlockResourceImpl(GraphicsAllocation &allocation) override { unlockResourceCalled++; EXPECT_EQ(expectedLockingAllocation, &allocation); } int32_t lockResourceCalled = 0; int32_t unlockResourceCalled = 0; GraphicsAllocation *expectedLockingAllocation = nullptr; void *lockResourceReturnValue = nullptr; Gmm *forceGmm = nullptr; bool gmmOwnershipPassed = false; }; class D3D9Tests : public PlatformFixture, public ::testing::Test { public: typedef typename D3DTypesHelper::D3D9 D3D9; typedef typename D3D9::D3DDevice D3DDevice; typedef typename D3D9::D3DQuery D3DQuery; typedef typename D3D9::D3DQueryDesc D3DQueryDesc; typedef typename D3D9::D3DResource D3DResource; typedef typename D3D9::D3DTexture2dDesc D3DTexture2dDesc; typedef typename D3D9::D3DTexture2d D3DTexture2d; void setupMockGmm() { cl_image_desc imgDesc = {}; imgDesc.image_height = 10; imgDesc.image_width = 10; imgDesc.image_depth = 1; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); gmm = MockGmm::queryImgParams(pPlatform->getClDevice(0)->getGmmClientContext(), imgInfo).release(); mockGmmResInfo = reinterpret_cast *>(gmm->gmmResourceInfo.get()); memoryManager->forceGmm = gmm; } void SetUp() override { PlatformFixture::SetUp(); memoryManager = std::make_unique(*pPlatform->peekExecutionEnvironment()); context = new MockContext(pPlatform->getClDevice(0)); context->preferD3dSharedResources = true; context->memoryManager = memoryManager.get(); mockSharingFcns = new NiceMock>(); context->setSharingFunctions(mockSharingFcns); cmdQ = new MockCommandQueue(context, context->getDevice(0), 0); DebugManager.injectFcn = &mockSharingFcns->mockGetDxgiDesc; surfaceInfo.resource = (IDirect3DSurface9 *)&dummyD3DSurface; mockSharingFcns->mockTexture2dDesc.Format = D3DFMT_R32F; mockSharingFcns->mockTexture2dDesc.Height = 10; mockSharingFcns->mockTexture2dDesc.Width = 10; setupMockGmm(); } void TearDown() override { delete cmdQ; delete context; if (!memoryManager->gmmOwnershipPassed) { delete gmm; } PlatformFixture::TearDown(); } NiceMock> *mockSharingFcns; MockContext *context; MockCommandQueue *cmdQ; DebugManagerStateRestore dbgRestore; char dummyD3DSurface; char dummyD3DSurfaceStaging; cl_dx9_surface_info_khr surfaceInfo = {}; Gmm *gmm = nullptr; NiceMock *mockGmmResInfo = nullptr; std::unique_ptr memoryManager; }; TEST_F(D3D9Tests, givenD3DDeviceParamWhenContextCreationThenSetProperValues) { cl_device_id deviceID = context->getDevice(0); cl_platform_id pid[1] = {pPlatform}; char expectedDevice; cl_context_properties validAdapters[6] = {CL_CONTEXT_ADAPTER_D3D9_KHR, CL_CONTEXT_ADAPTER_D3D9EX_KHR, CL_CONTEXT_ADAPTER_DXVA_KHR, CL_CONTEXT_D3D9_DEVICE_INTEL, CL_CONTEXT_D3D9EX_DEVICE_INTEL, CL_CONTEXT_DXVA_DEVICE_INTEL}; cl_context_properties validProperties[5] = {CL_CONTEXT_PLATFORM, (cl_context_properties)pid[0], CL_CONTEXT_ADAPTER_D3D9_KHR, (cl_context_properties)&expectedDevice, 0}; std::unique_ptr ctx(nullptr); cl_int retVal = CL_SUCCESS; for (int i = 0; i < 6; i++) { validProperties[2] = validAdapters[i]; ctx.reset(Context::create(validProperties, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, ctx.get()); EXPECT_NE(nullptr, ctx->getSharing>()); EXPECT_TRUE((D3DDevice *)&expectedDevice == ctx->getSharing>()->getDevice()); } } TEST_F(D3D9Tests, WhenGetDeviceIdThenOneCorrectDeviceIsReturned) { cl_device_id expectedDevice = *devices; cl_device_id device = 0; cl_uint numDevices = 0; auto retVal = clGetDeviceIDsFromDX9MediaAdapterKHR(platform(), 1, nullptr, nullptr, 1, 1, &device, &numDevices); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedDevice, device); EXPECT_EQ(1u, numDevices); retVal = clGetDeviceIDsFromDX9INTEL(platform(), 1, nullptr, 1, 1, &device, &numDevices); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedDevice, device); EXPECT_EQ(1u, numDevices); } TEST_F(D3D9Tests, WhenCreatingSurfaceThenImagePropertiesAreSetCorrectly) { cl_int retVal; cl_image_format expectedImgFormat = {}; ImagePlane imagePlane = ImagePlane::NO_PLANE; D3DSurface::findImgFormat(mockSharingFcns->mockTexture2dDesc.Format, expectedImgFormat, 0, imagePlane); EXPECT_CALL(*mockSharingFcns, updateDevice((IDirect3DSurface9 *)&dummyD3DSurface)).Times(1); EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); auto memObj = clCreateFromDX9MediaSurfaceKHR(context, CL_MEM_READ_WRITE, 0, &surfaceInfo, 0, &retVal); ASSERT_NE(nullptr, memObj); EXPECT_EQ(0u, mockGmmResInfo->getOffsetCalled); auto image = castToObject(memObj); EXPECT_NE(nullptr, image->getSharingHandler()); EXPECT_TRUE(CL_MEM_READ_WRITE == image->getMemoryPropertiesFlags()); EXPECT_TRUE(expectedImgFormat.image_channel_data_type == image->getImageFormat().image_channel_data_type); EXPECT_TRUE(expectedImgFormat.image_channel_order == image->getImageFormat().image_channel_order); EXPECT_TRUE(CL_MEM_OBJECT_IMAGE2D == image->getImageDesc().image_type); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Width, image->getImageDesc().image_width); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Height, image->getImageDesc().image_height); clReleaseMemObject(memObj); } TEST(D3D9SimpleTests, givenWrongFormatWhenFindIsCalledThenErrorIsReturned) { cl_image_format expectedImgFormat = {}; ImagePlane imagePlane = ImagePlane::NO_PLANE; auto status = D3DSurface::findImgFormat(D3DFMT_FORCE_DWORD, expectedImgFormat, 0, imagePlane); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, status); } TEST_F(D3D9Tests, WhenCreatingSurfaceIntelThenImagePropertiesAreSetCorrectly) { cl_int retVal; cl_image_format expectedImgFormat = {}; ImagePlane imagePlane = ImagePlane::NO_PLANE; D3DSurface::findImgFormat(mockSharingFcns->mockTexture2dDesc.Format, expectedImgFormat, 0, imagePlane); EXPECT_CALL(*mockSharingFcns, updateDevice((IDirect3DSurface9 *)&dummyD3DSurface)).Times(1); EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); auto memObj = clCreateFromDX9MediaSurfaceINTEL(context, CL_MEM_READ_WRITE, surfaceInfo.resource, surfaceInfo.shared_handle, 0, &retVal); ASSERT_NE(nullptr, memObj); auto image = castToObject(memObj); EXPECT_NE(nullptr, image->getSharingHandler()); EXPECT_TRUE(CL_MEM_READ_WRITE == image->getMemoryPropertiesFlags()); EXPECT_TRUE(expectedImgFormat.image_channel_data_type == image->getImageFormat().image_channel_data_type); EXPECT_TRUE(expectedImgFormat.image_channel_order == image->getImageFormat().image_channel_order); EXPECT_TRUE(CL_MEM_OBJECT_IMAGE2D == image->getImageDesc().image_type); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Width, image->getImageDesc().image_width); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Height, image->getImageDesc().image_height); clReleaseMemObject(memObj); } TEST_F(D3D9Tests, givenD3DHandleWhenCreatingSharedSurfaceThenAllocationTypeImageIsSet) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('Y', 'V', '1', '2'); surfaceInfo.shared_handle = reinterpret_cast(1); EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 2, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); ASSERT_NE(nullptr, sharedImg->getGraphicsAllocation()); EXPECT_EQ(GraphicsAllocation::AllocationType::SHARED_IMAGE, sharedImg->getGraphicsAllocation()->getAllocationType()); } TEST_F(D3D9Tests, givenUPlaneWhenCreateSurfaceThenChangeWidthHeightAndPitch) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('Y', 'V', '1', '2'); surfaceInfo.shared_handle = (HANDLE)1; EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 2, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Width / 2, sharedImg->getImageDesc().image_width); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Height / 2, sharedImg->getImageDesc().image_height); size_t expectedRowPitch = static_cast(mockGmmResInfo->getRenderPitch()) / 2; EXPECT_EQ(expectedRowPitch, sharedImg->getImageDesc().image_row_pitch); } TEST_F(D3D9Tests, givenVPlaneWhenCreateSurfaceThenChangeWidthHeightAndPitch) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('Y', 'V', '1', '2'); surfaceInfo.shared_handle = (HANDLE)1; EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 1, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Width / 2, sharedImg->getImageDesc().image_width); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Height / 2, sharedImg->getImageDesc().image_height); size_t expectedRowPitch = static_cast(mockGmmResInfo->getRenderPitch()) / 2; EXPECT_EQ(expectedRowPitch, sharedImg->getImageDesc().image_row_pitch); } TEST_F(D3D9Tests, givenUVPlaneWhenCreateSurfaceThenChangeWidthHeightAndPitch) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('N', 'V', '1', '2'); surfaceInfo.shared_handle = (HANDLE)1; EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 1, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Width / 2, sharedImg->getImageDesc().image_width); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Height / 2, sharedImg->getImageDesc().image_height); size_t expectedRowPitch = static_cast(mockGmmResInfo->getRenderPitch()); EXPECT_EQ(expectedRowPitch, sharedImg->getImageDesc().image_row_pitch); } TEST_F(D3D9Tests, givenYPlaneWhenCreateSurfaceThenDontChangeWidthHeightAndPitch) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('N', 'V', '1', '2'); surfaceInfo.shared_handle = (HANDLE)1; EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Width, sharedImg->getImageDesc().image_width); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Height, sharedImg->getImageDesc().image_height); size_t expectedRowPitch = static_cast(mockGmmResInfo->getRenderPitch()); EXPECT_EQ(expectedRowPitch, sharedImg->getImageDesc().image_row_pitch); } TEST_F(D3D9Tests, givenUPlaneWhenCreateNonSharedSurfaceThenChangeWidthHeightAndPitch) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('Y', 'V', '1', '2'); surfaceInfo.shared_handle = (HANDLE)0; EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 2, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Width / 2, sharedImg->getImageDesc().image_width); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Height / 2, sharedImg->getImageDesc().image_height); size_t expectedRowPitch = static_cast(mockGmmResInfo->getRenderPitch()); EXPECT_EQ(expectedRowPitch, sharedImg->getImageDesc().image_row_pitch); } TEST_F(D3D9Tests, givenVPlaneWhenCreateNonSharedSurfaceThenChangeWidthHeightAndPitch) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('Y', 'V', '1', '2'); surfaceInfo.shared_handle = (HANDLE)0; EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 1, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Width / 2, sharedImg->getImageDesc().image_width); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Height / 2, sharedImg->getImageDesc().image_height); size_t expectedRowPitch = static_cast(mockGmmResInfo->getRenderPitch()); EXPECT_EQ(expectedRowPitch, sharedImg->getImageDesc().image_row_pitch); } TEST_F(D3D9Tests, givenUVPlaneWhenCreateNonSharedSurfaceThenChangeWidthHeightAndPitch) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('N', 'V', '1', '2'); surfaceInfo.shared_handle = (HANDLE)0; EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 1, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Width / 2, sharedImg->getImageDesc().image_width); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Height / 2, sharedImg->getImageDesc().image_height); size_t expectedRowPitch = static_cast(mockGmmResInfo->getRenderPitch()); EXPECT_EQ(expectedRowPitch, sharedImg->getImageDesc().image_row_pitch); } TEST_F(D3D9Tests, givenYPlaneWhenCreateNonSharedSurfaceThenDontChangeWidthHeightAndPitch) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('N', 'V', '1', '2'); surfaceInfo.shared_handle = (HANDLE)0; EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Width, sharedImg->getImageDesc().image_width); EXPECT_EQ(mockSharingFcns->mockTexture2dDesc.Height, sharedImg->getImageDesc().image_height); size_t expectedRowPitch = static_cast(mockGmmResInfo->getRenderPitch()); EXPECT_EQ(expectedRowPitch, sharedImg->getImageDesc().image_row_pitch); } TEST_F(D3D9Tests, givenNV12FormatAndInvalidPlaneWhenSurfaceIsCreatedThenReturnError) { cl_int retVal = CL_SUCCESS; mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('N', 'V', '1', '2'); surfaceInfo.shared_handle = (HANDLE)0; ON_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).WillByDefault(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); auto img = D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 2, &retVal); EXPECT_EQ(nullptr, img); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(D3D9Tests, givenYV12FormatAndInvalidPlaneWhenSurfaceIsCreatedThenReturnError) { cl_int retVal = CL_SUCCESS; mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('Y', 'V', '1', '2'); surfaceInfo.shared_handle = (HANDLE)0; ON_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).WillByDefault(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); auto img = D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 3, &retVal); EXPECT_EQ(nullptr, img); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(D3D9Tests, givenNonPlaneFormatAndNonZeroPlaneWhenSurfaceIsCreatedThenReturnError) { cl_int retVal = CL_SUCCESS; mockSharingFcns->mockTexture2dDesc.Format = D3DFORMAT::D3DFMT_A16B16G16R16; surfaceInfo.shared_handle = (HANDLE)0; ON_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).WillByDefault(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); auto img = D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 1, &retVal); EXPECT_EQ(nullptr, img); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(D3D9Tests, givenNullResourceWhenSurfaceIsCreatedThenReturnError) { cl_int retVal = CL_SUCCESS; auto img = clCreateFromDX9MediaSurfaceINTEL(context, CL_MEM_READ_WRITE, nullptr, 0, 0, &retVal); EXPECT_EQ(nullptr, img); EXPECT_EQ(CL_INVALID_DX9_RESOURCE_INTEL, retVal); } TEST_F(D3D9Tests, givenNonDefaultPoolWhenSurfaceIsCreatedThenReturnError) { cl_int retVal = CL_SUCCESS; mockSharingFcns->mockTexture2dDesc.Pool = D3DPOOL_SYSTEMMEM; ON_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).WillByDefault(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); auto img = D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 1, &retVal); EXPECT_EQ(nullptr, img); EXPECT_EQ(CL_INVALID_DX9_RESOURCE_INTEL, retVal); } TEST_F(D3D9Tests, givenAlreadyUsedSurfaceWhenSurfaceIsCreatedThenReturnError) { cl_int retVal = CL_SUCCESS; surfaceInfo.resource = (IDirect3DSurface9 *)1; ON_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).WillByDefault(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); std::unique_ptr img(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, &retVal)); EXPECT_NE(nullptr, img.get()); img.reset(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, &retVal)); EXPECT_EQ(nullptr, img.get()); EXPECT_EQ(CL_INVALID_DX9_RESOURCE_INTEL, retVal); } TEST_F(D3D9Tests, givenNotSupportedFormatWhenSurfaceIsCreatedThenReturnError) { cl_int retVal = CL_SUCCESS; mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('I', '4', '2', '0'); surfaceInfo.shared_handle = (HANDLE)0; ON_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).WillByDefault(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); auto img = D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, &retVal); EXPECT_EQ(nullptr, img); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); } TEST_F(D3D9Tests, GivenMediaSurfaceInfoKhrWhenGetMemObjInfoThenCorrectInfoIsReturned) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('Y', 'V', '1', '2'); cl_dx9_media_adapter_type_khr expectedAdapterType = 5; cl_uint expectedPlane = 2; cl_dx9_surface_info_khr getSurfaceInfo = {}; surfaceInfo.shared_handle = (HANDLE)1; size_t retSize = 0; EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, expectedAdapterType, expectedPlane, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); sharedImg->getMemObjectInfo(CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR, sizeof(cl_dx9_surface_info_khr), &getSurfaceInfo, &retSize); EXPECT_EQ(getSurfaceInfo.resource, surfaceInfo.resource); EXPECT_EQ(getSurfaceInfo.shared_handle, surfaceInfo.shared_handle); EXPECT_EQ(sizeof(cl_dx9_surface_info_khr), retSize); } TEST_F(D3D9Tests, GivenResourceIntelWhenGetMemObjInfoThenCorrectInfoIsReturned) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('Y', 'V', '1', '2'); cl_dx9_media_adapter_type_khr expectedAdapterType = 5; cl_uint expectedPlane = 2; cl_dx9_surface_info_khr getSurfaceInfo = {}; surfaceInfo.shared_handle = (HANDLE)1; size_t retSize = 0; EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, expectedAdapterType, expectedPlane, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); getSurfaceInfo = {}; sharedImg->getMemObjectInfo(CL_MEM_DX9_RESOURCE_INTEL, sizeof(IDirect3DSurface9 *), &getSurfaceInfo.resource, &retSize); EXPECT_EQ(getSurfaceInfo.resource, surfaceInfo.resource); EXPECT_EQ(sizeof(IDirect3DSurface9 *), retSize); } TEST_F(D3D9Tests, GivenSharedHandleIntelWhenGetMemObjInfoThenCorrectInfoIsReturned) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('Y', 'V', '1', '2'); cl_dx9_media_adapter_type_khr expectedAdapterType = 5; cl_uint expectedPlane = 2; cl_dx9_surface_info_khr getSurfaceInfo = {}; surfaceInfo.shared_handle = (HANDLE)1; size_t retSize = 0; EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, expectedAdapterType, expectedPlane, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); sharedImg->getMemObjectInfo(CL_MEM_DX9_SHARED_HANDLE_INTEL, sizeof(IDirect3DSurface9 *), &getSurfaceInfo.shared_handle, &retSize); EXPECT_EQ(getSurfaceInfo.shared_handle, surfaceInfo.shared_handle); EXPECT_EQ(sizeof(IDirect3DSurface9 *), retSize); } TEST_F(D3D9Tests, GivenMediaAdapterTypeKhrWhenGetMemObjInfoThenCorrectInfoIsReturned) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('Y', 'V', '1', '2'); cl_dx9_media_adapter_type_khr adapterType = 0; cl_dx9_media_adapter_type_khr expectedAdapterType = 5; cl_uint expectedPlane = 2; surfaceInfo.shared_handle = (HANDLE)1; size_t retSize = 0; EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, expectedAdapterType, expectedPlane, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); sharedImg->getMemObjectInfo(CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR, sizeof(cl_dx9_media_adapter_type_khr), &adapterType, &retSize); EXPECT_EQ(expectedAdapterType, adapterType); EXPECT_EQ(sizeof(cl_dx9_media_adapter_type_khr), retSize); } TEST_F(D3D9Tests, GivenMediaPlaneKhrWhenGetMemObjInfoThenCorrectInfoIsReturned) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('Y', 'V', '1', '2'); cl_dx9_media_adapter_type_khr expectedAdapterType = 5; cl_uint plane = 0; cl_uint expectedPlane = 2; surfaceInfo.shared_handle = (HANDLE)1; size_t retSize = 0; EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, expectedAdapterType, expectedPlane, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); sharedImg->getImageInfo(CL_IMAGE_DX9_MEDIA_PLANE_KHR, sizeof(cl_uint), &plane, &retSize); EXPECT_EQ(expectedPlane, plane); EXPECT_EQ(sizeof(cl_uint), retSize); } TEST_F(D3D9Tests, GivenPlaneIntelWhenGetMemObjInfoThenCorrectInfoIsReturned) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('Y', 'V', '1', '2'); cl_dx9_media_adapter_type_khr expectedAdapterType = 5; cl_uint plane = 0; cl_uint expectedPlane = 2; surfaceInfo.shared_handle = (HANDLE)1; size_t retSize = 0; EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, expectedAdapterType, expectedPlane, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); sharedImg->getImageInfo(CL_IMAGE_DX9_PLANE_INTEL, sizeof(cl_uint), &plane, &retSize); EXPECT_EQ(expectedPlane, plane); EXPECT_EQ(sizeof(cl_uint), retSize); } TEST_F(D3D9Tests, givenSharedHandleWhenCreateThenDontCreateStagingSurface) { surfaceInfo.shared_handle = (HANDLE)1; ::testing::InSequence is; EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); EXPECT_CALL(*mockSharingFcns, createTexture2d(_, _, _)).Times(0); EXPECT_CALL(*mockSharingFcns, addRef(_)).Times(1); auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); EXPECT_EQ(1u, mockGmmResInfo->getOffsetCalled); EXPECT_EQ(0u, mockGmmResInfo->arrayIndexPassedToGetOffset); auto surface = static_cast(sharedImg->getSharingHandler().get()); EXPECT_TRUE(surface->isSharedResource()); EXPECT_EQ(nullptr, surface->getResourceStaging()); } TEST_F(D3D9Tests, givenZeroSharedHandleAndLockableFlagWhenCreateThenDontCreateStagingSurface) { surfaceInfo.shared_handle = (HANDLE)0; mockSharingFcns->mockTexture2dDesc.Usage = 0; EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); EXPECT_CALL(*mockSharingFcns, createTexture2d(_, _, _)).Times(0); EXPECT_CALL(*mockSharingFcns, addRef(_)).Times(1); auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); EXPECT_EQ(0u, mockGmmResInfo->getOffsetCalled); auto surface = static_cast(sharedImg->getSharingHandler().get()); EXPECT_FALSE(surface->isSharedResource()); EXPECT_EQ(nullptr, surface->getResourceStaging()); EXPECT_TRUE(surface->lockable); } TEST_F(D3D9Tests, givenZeroSharedHandleAndNonLockableFlagWhenCreateThenCreateStagingSurface) { surfaceInfo.shared_handle = (HANDLE)0; mockSharingFcns->mockTexture2dDesc.Usage = D3DResourceFlags::USAGE_RENDERTARGET; EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); EXPECT_CALL(*mockSharingFcns, createTexture2d(_, _, _)).Times(1).WillOnce(SetArgPointee<0>((D3DTexture2d *)&dummyD3DSurfaceStaging)); EXPECT_CALL(*mockSharingFcns, addRef(_)).Times(1); auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); EXPECT_EQ(0u, mockGmmResInfo->getOffsetCalled); auto surface = static_cast(sharedImg->getSharingHandler().get()); EXPECT_FALSE(surface->isSharedResource()); EXPECT_NE(nullptr, surface->getResourceStaging()); EXPECT_FALSE(surface->lockable); } TEST_F(D3D9Tests, GivenSharedResourceSurfaceAndEnabledInteropUserSyncWhenReleasingThenResourcesAreReleased) { context->setInteropUserSyncEnabled(true); surfaceInfo.shared_handle = (HANDLE)1; ::testing::InSequence is; EXPECT_CALL(*mockSharingFcns, updateDevice((IDirect3DSurface9 *)&dummyD3DSurface)).Times(1); EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); EXPECT_CALL(*mockSharingFcns, getRenderTargetData(_, _)).Times(0); EXPECT_CALL(*mockSharingFcns, lockRect(_, _, _)).Times(0); EXPECT_CALL(*mockGmmResInfo, cpuBlt(_)).Times(0); EXPECT_CALL(*mockSharingFcns, unlockRect(_)).Times(0); EXPECT_CALL(*mockSharingFcns, flushAndWait(_)).Times(0); EXPECT_CALL(*mockSharingFcns, updateSurface(_, _)).Times(0); auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg); EXPECT_EQ(1u, mockGmmResInfo->getOffsetCalled); EXPECT_EQ(0u, mockGmmResInfo->arrayIndexPassedToGetOffset); cl_mem clMem = sharedImg.get(); auto retVal = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQ, 1, &clMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQ, 1, &clMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, memoryManager->lockResourceCalled); EXPECT_EQ(0, memoryManager->unlockResourceCalled); } TEST_F(D3D9Tests, GivenSharedResourceSurfaceAndDisabledInteropUserSyncWhenReleasingThenResourcesAreReleased) { context->setInteropUserSyncEnabled(false); surfaceInfo.shared_handle = (HANDLE)1; ::testing::InSequence is; EXPECT_CALL(*mockSharingFcns, updateDevice((IDirect3DSurface9 *)&dummyD3DSurface)).Times(1); EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); EXPECT_CALL(*mockSharingFcns, getRenderTargetData(_, _)).Times(0); EXPECT_CALL(*mockSharingFcns, lockRect(_, _, _)).Times(0); EXPECT_CALL(*mockGmmResInfo, cpuBlt(_)).Times(0); EXPECT_CALL(*mockSharingFcns, unlockRect(_)).Times(0); EXPECT_CALL(*mockSharingFcns, flushAndWait(_)).Times(1); EXPECT_CALL(*mockSharingFcns, updateSurface(_, _)).Times(0); auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); EXPECT_EQ(1u, mockGmmResInfo->getOffsetCalled); cl_mem clMem = sharedImg.get(); auto retVal = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQ, 1, &clMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQ, 1, &clMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, memoryManager->lockResourceCalled); EXPECT_EQ(0, memoryManager->unlockResourceCalled); } TEST_F(D3D9Tests, GivenSharedResourceSurfaceAndDisabledInteropUserSyncIntelWhenReleasingThenResourcesAreReleased) { context->setInteropUserSyncEnabled(false); surfaceInfo.shared_handle = (HANDLE)1; ::testing::InSequence is; EXPECT_CALL(*mockSharingFcns, updateDevice((IDirect3DSurface9 *)&dummyD3DSurface)).Times(1); EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); EXPECT_CALL(*mockSharingFcns, getRenderTargetData(_, _)).Times(0); EXPECT_CALL(*mockSharingFcns, lockRect(_, _, _)).Times(0); EXPECT_CALL(*mockGmmResInfo, cpuBlt(_)).Times(0); EXPECT_CALL(*mockSharingFcns, unlockRect(_)).Times(0); EXPECT_CALL(*mockSharingFcns, flushAndWait(_)).Times(1); EXPECT_CALL(*mockSharingFcns, updateSurface(_, _)).Times(0); auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); cl_mem clMem = sharedImg.get(); auto retVal = clEnqueueAcquireDX9ObjectsINTEL(cmdQ, 1, &clMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueReleaseDX9ObjectsINTEL(cmdQ, 1, &clMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, memoryManager->lockResourceCalled); EXPECT_EQ(0, memoryManager->unlockResourceCalled); } TEST_F(D3D9Tests, GivenNonSharedResourceSurfaceAndLockableWhenReleasingThenResourcesAreReleased) { surfaceInfo.shared_handle = (HANDLE)0; mockSharingFcns->mockTexture2dDesc.Usage = 0; D3DLOCKED_RECT lockedRect = {10u, (void *)100}; ::testing::InSequence is; EXPECT_CALL(*mockSharingFcns, updateDevice((IDirect3DSurface9 *)&dummyD3DSurface)).Times(1); EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); EXPECT_EQ(0u, mockGmmResInfo->getOffsetCalled); cl_mem clMem = sharedImg.get(); auto imgHeight = static_cast(sharedImg->getImageDesc().image_height); void *returnedLockedRes = (void *)100; EXPECT_CALL(*mockSharingFcns, getRenderTargetData(_, _)).Times(0); EXPECT_CALL(*mockSharingFcns, lockRect((IDirect3DSurface9 *)&dummyD3DSurface, _, D3DLOCK_READONLY)).Times(1).WillOnce(SetArgPointee<1>(lockedRect)); memoryManager->lockResourceReturnValue = returnedLockedRes; memoryManager->expectedLockingAllocation = sharedImg->getGraphicsAllocation(); GMM_RES_COPY_BLT requestedResCopyBlt = {}; GMM_RES_COPY_BLT expectedResCopyBlt = {}; expectedResCopyBlt.Sys.pData = lockedRect.pBits; expectedResCopyBlt.Gpu.pData = returnedLockedRes; expectedResCopyBlt.Sys.RowPitch = lockedRect.Pitch; expectedResCopyBlt.Blt.Upload = 1; expectedResCopyBlt.Sys.BufferSize = lockedRect.Pitch * imgHeight; EXPECT_CALL(*mockGmmResInfo, cpuBlt(_)).Times(1).WillOnce(::testing::Invoke([&](GMM_RES_COPY_BLT *arg) {requestedResCopyBlt = *arg; return 1; })); EXPECT_CALL(*mockSharingFcns, unlockRect((IDirect3DSurface9 *)&dummyD3DSurface)).Times(1); EXPECT_CALL(*mockSharingFcns, flushAndWait(_)).Times(1); auto retVal = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQ, 1, &clMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1, memoryManager->lockResourceCalled); EXPECT_EQ(1, memoryManager->unlockResourceCalled); EXPECT_TRUE(memcmp(&requestedResCopyBlt, &expectedResCopyBlt, sizeof(GMM_RES_COPY_BLT)) == 0); EXPECT_CALL(*mockSharingFcns, lockRect((IDirect3DSurface9 *)&dummyD3DSurface, _, 0u)).Times(1).WillOnce(SetArgPointee<1>(lockedRect)); requestedResCopyBlt = {}; expectedResCopyBlt.Blt.Upload = 0; EXPECT_CALL(*mockGmmResInfo, cpuBlt(_)).Times(1).WillOnce(::testing::Invoke([&](GMM_RES_COPY_BLT *arg) {requestedResCopyBlt = *arg; return 1; })); EXPECT_CALL(*mockSharingFcns, unlockRect((IDirect3DSurface9 *)&dummyD3DSurface)).Times(1); EXPECT_CALL(*mockSharingFcns, updateSurface(_, _)).Times(0); retVal = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQ, 1, &clMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(memcmp(&requestedResCopyBlt, &expectedResCopyBlt, sizeof(GMM_RES_COPY_BLT)) == 0); EXPECT_EQ(2, memoryManager->lockResourceCalled); EXPECT_EQ(2, memoryManager->unlockResourceCalled); } TEST_F(D3D9Tests, GivenNonSharedResourceSurfaceAndLockableIntelWhenReleasingThenResourcesAreReleased) { surfaceInfo.shared_handle = (HANDLE)0; mockSharingFcns->mockTexture2dDesc.Usage = 0; D3DLOCKED_RECT lockedRect = {10u, (void *)100}; ::testing::InSequence is; EXPECT_CALL(*mockSharingFcns, updateDevice((IDirect3DSurface9 *)&dummyD3DSurface)).Times(1); EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); cl_mem clMem = sharedImg.get(); auto imgHeight = static_cast(sharedImg->getImageDesc().image_height); void *returnedLockedRes = (void *)100; EXPECT_CALL(*mockSharingFcns, getRenderTargetData(_, _)).Times(0); EXPECT_CALL(*mockSharingFcns, lockRect((IDirect3DSurface9 *)&dummyD3DSurface, _, D3DLOCK_READONLY)).Times(1).WillOnce(SetArgPointee<1>(lockedRect)); memoryManager->lockResourceReturnValue = returnedLockedRes; memoryManager->expectedLockingAllocation = sharedImg->getGraphicsAllocation(); GMM_RES_COPY_BLT requestedResCopyBlt = {}; GMM_RES_COPY_BLT expectedResCopyBlt = {}; expectedResCopyBlt.Sys.pData = lockedRect.pBits; expectedResCopyBlt.Gpu.pData = returnedLockedRes; expectedResCopyBlt.Sys.RowPitch = lockedRect.Pitch; expectedResCopyBlt.Blt.Upload = 1; expectedResCopyBlt.Sys.BufferSize = lockedRect.Pitch * imgHeight; EXPECT_CALL(*mockGmmResInfo, cpuBlt(_)).Times(1).WillOnce(::testing::Invoke([&](GMM_RES_COPY_BLT *arg) {requestedResCopyBlt = *arg; return 1; })); EXPECT_CALL(*mockSharingFcns, unlockRect((IDirect3DSurface9 *)&dummyD3DSurface)).Times(1); EXPECT_CALL(*mockSharingFcns, flushAndWait(_)).Times(1); auto retVal = clEnqueueAcquireDX9ObjectsINTEL(cmdQ, 1, &clMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(memcmp(&requestedResCopyBlt, &expectedResCopyBlt, sizeof(GMM_RES_COPY_BLT)) == 0); EXPECT_EQ(1, memoryManager->lockResourceCalled); EXPECT_EQ(1, memoryManager->unlockResourceCalled); EXPECT_CALL(*mockSharingFcns, lockRect((IDirect3DSurface9 *)&dummyD3DSurface, _, 0u)).Times(1).WillOnce(SetArgPointee<1>(lockedRect)); requestedResCopyBlt = {}; expectedResCopyBlt.Blt.Upload = 0; EXPECT_CALL(*mockGmmResInfo, cpuBlt(_)).Times(1).WillOnce(::testing::Invoke([&](GMM_RES_COPY_BLT *arg) {requestedResCopyBlt = *arg; return 1; })); EXPECT_CALL(*mockSharingFcns, unlockRect((IDirect3DSurface9 *)&dummyD3DSurface)).Times(1); EXPECT_CALL(*mockSharingFcns, updateSurface(_, _)).Times(0); retVal = clEnqueueReleaseDX9ObjectsINTEL(cmdQ, 1, &clMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(memcmp(&requestedResCopyBlt, &expectedResCopyBlt, sizeof(GMM_RES_COPY_BLT)) == 0); EXPECT_EQ(2, memoryManager->lockResourceCalled); EXPECT_EQ(2, memoryManager->unlockResourceCalled); } TEST_F(D3D9Tests, GivenNonSharedResourceSurfaceAndNonLockableWhenReleasingThenResourcesAreReleased) { surfaceInfo.shared_handle = (HANDLE)0; mockSharingFcns->mockTexture2dDesc.Usage = D3DResourceFlags::USAGE_RENDERTARGET; D3DLOCKED_RECT lockedRect = {10u, (void *)100}; ::testing::InSequence is; EXPECT_CALL(*mockSharingFcns, updateDevice((IDirect3DSurface9 *)&dummyD3DSurface)).Times(1); EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); EXPECT_CALL(*mockSharingFcns, createTexture2d(_, _, _)).Times(1).WillOnce(SetArgPointee<0>((D3DTexture2d *)&dummyD3DSurfaceStaging)); auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); EXPECT_EQ(0u, mockGmmResInfo->getOffsetCalled); cl_mem clMem = sharedImg.get(); auto imgHeight = static_cast(sharedImg->getImageDesc().image_height); void *returnedLockedRes = (void *)100; EXPECT_CALL(*mockSharingFcns, getRenderTargetData((IDirect3DSurface9 *)&dummyD3DSurface, (IDirect3DSurface9 *)&dummyD3DSurfaceStaging)).Times(1); EXPECT_CALL(*mockSharingFcns, lockRect((IDirect3DSurface9 *)&dummyD3DSurfaceStaging, _, D3DLOCK_READONLY)).Times(1).WillOnce(SetArgPointee<1>(lockedRect)); memoryManager->lockResourceReturnValue = returnedLockedRes; memoryManager->expectedLockingAllocation = sharedImg->getGraphicsAllocation(); GMM_RES_COPY_BLT requestedResCopyBlt = {}; GMM_RES_COPY_BLT expectedResCopyBlt = {}; expectedResCopyBlt.Sys.pData = lockedRect.pBits; expectedResCopyBlt.Gpu.pData = returnedLockedRes; expectedResCopyBlt.Sys.RowPitch = lockedRect.Pitch; expectedResCopyBlt.Blt.Upload = 1; expectedResCopyBlt.Sys.BufferSize = lockedRect.Pitch * imgHeight; EXPECT_CALL(*mockGmmResInfo, cpuBlt(_)).Times(1).WillOnce(::testing::Invoke([&](GMM_RES_COPY_BLT *arg) {requestedResCopyBlt = *arg; return 1; })); EXPECT_CALL(*mockSharingFcns, unlockRect((IDirect3DSurface9 *)&dummyD3DSurfaceStaging)).Times(1); EXPECT_CALL(*mockSharingFcns, flushAndWait(_)).Times(1); auto retVal = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQ, 1, &clMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(memcmp(&requestedResCopyBlt, &expectedResCopyBlt, sizeof(GMM_RES_COPY_BLT)) == 0); EXPECT_EQ(1, memoryManager->lockResourceCalled); EXPECT_EQ(1, memoryManager->unlockResourceCalled); EXPECT_CALL(*mockSharingFcns, lockRect((IDirect3DSurface9 *)&dummyD3DSurfaceStaging, _, 0)).Times(1).WillOnce(SetArgPointee<1>(lockedRect)); requestedResCopyBlt = {}; expectedResCopyBlt.Blt.Upload = 0; EXPECT_CALL(*mockGmmResInfo, cpuBlt(_)).Times(1).WillOnce(::testing::Invoke([&](GMM_RES_COPY_BLT *arg) {requestedResCopyBlt = *arg; return 1; })); EXPECT_CALL(*mockSharingFcns, unlockRect((IDirect3DSurface9 *)&dummyD3DSurfaceStaging)).Times(1); EXPECT_CALL(*mockSharingFcns, updateSurface((IDirect3DSurface9 *)&dummyD3DSurfaceStaging, (IDirect3DSurface9 *)&dummyD3DSurface)).Times(1); retVal = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQ, 1, &clMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(memcmp(&requestedResCopyBlt, &expectedResCopyBlt, sizeof(GMM_RES_COPY_BLT)) == 0); EXPECT_EQ(2, memoryManager->lockResourceCalled); EXPECT_EQ(2, memoryManager->unlockResourceCalled); } TEST_F(D3D9Tests, givenInvalidClMemObjectPassedOnReleaseListWhenCallIsMadeThenFailureIsReturned) { auto fakeObject = reinterpret_cast(cmdQ); auto retVal = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQ, 1, &fakeObject, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(D3D9Tests, givenResourcesCreatedFromDifferentDevicesWhenAcquireReleaseCalledThenUpdateDevice) { EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); auto createdResourceDevice = (D3DDevice *)123; mockSharingFcns->setDevice(createdResourceDevice); // create call will pick this device auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); memoryManager->expectedLockingAllocation = sharedImg->getGraphicsAllocation(); mockSharingFcns->setDevice(nullptr); // force device change sharedImg->getSharingHandler()->acquire(sharedImg.get()); EXPECT_EQ(createdResourceDevice, mockSharingFcns->getDevice()); mockSharingFcns->setDevice(nullptr); // force device change sharedImg->getSharingHandler()->release(sharedImg.get()); EXPECT_EQ(createdResourceDevice, mockSharingFcns->getDevice()); } TEST_F(D3D9Tests, givenNullD3dDeviceWhenContextIsCreatedThenReturnErrorOnSurfaceCreation) { cl_device_id deviceID = context->getDevice(0); cl_int retVal = CL_SUCCESS; cl_context_properties properties[5] = {CL_CONTEXT_PLATFORM, (cl_context_properties)(cl_platform_id)pPlatform, CL_CONTEXT_ADAPTER_D3D9_KHR, 0, 0}; std::unique_ptr ctx(Context::create(properties, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, ctx->getSharing>()->getDevice()); auto img = D3DSurface::create(ctx.get(), nullptr, CL_MEM_READ_WRITE, 0, 0, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, img); } TEST_F(D3D9Tests, givenInvalidContextWhenSurfaceIsCreatedThenReturnError) { cl_device_id deviceID = context->getDevice(0); cl_int retVal = CL_SUCCESS; cl_context_properties properties[5] = {CL_CONTEXT_PLATFORM, (cl_context_properties)(cl_platform_id)pPlatform, 0}; std::unique_ptr ctx(Context::create(properties, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, ctx->getSharing>()); auto img = D3DSurface::create(ctx.get(), nullptr, CL_MEM_READ_WRITE, 0, 0, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, img); img = D3DSurface::create(nullptr, nullptr, CL_MEM_READ_WRITE, 0, 0, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, img); } TEST_F(D3D9Tests, givenInvalidFlagsWhenSurfaceIsCreatedThenReturnError) { cl_int retVal = CL_SUCCESS; auto img = clCreateFromDX9MediaSurfaceINTEL(context, CL_MEM_USE_HOST_PTR, surfaceInfo.resource, surfaceInfo.shared_handle, 0, &retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, img); } TEST_F(D3D9Tests, givenInvalidContextWhenImageIsCreatedThenErrorIsReturned) { auto invalidContext = reinterpret_cast(this->cmdQ); auto retVal = CL_SUCCESS; auto img = clCreateFromDX9MediaSurfaceINTEL(invalidContext, CL_MEM_READ_WRITE, surfaceInfo.resource, surfaceInfo.shared_handle, 0, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, img); } TEST_F(D3D9Tests, givenTheSameResourceAndPlaneWhenSurfaceIsCreatedThenReturnError) { mockSharingFcns->mockTexture2dDesc.Format = (D3DFORMAT)MAKEFOURCC('Y', 'V', '1', '2'); surfaceInfo.shared_handle = (HANDLE)1; cl_int retVal = CL_SUCCESS; cl_uint plane = 0; EXPECT_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); auto sharedImg = std::unique_ptr(D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, plane, &retVal)); EXPECT_NE(nullptr, sharedImg.get()); EXPECT_EQ(CL_SUCCESS, retVal); auto sharedImg2 = D3DSurface::create(context, &surfaceInfo, CL_MEM_READ_WRITE, 0, plane, &retVal); EXPECT_EQ(nullptr, sharedImg2); EXPECT_EQ(CL_INVALID_DX9_RESOURCE_INTEL, retVal); } TEST_F(D3D9Tests, WhenFillingBufferDescThenBufferContentsAreCorrect) { D3D9::D3DBufferDesc requestedDesc = {}; D3D9::D3DBufferDesc expectedDesc = {}; mockSharingFcns->fillCreateBufferDesc(requestedDesc, 10); EXPECT_TRUE(memcmp(&requestedDesc, &expectedDesc, sizeof(D3D9::D3DBufferDesc)) == 0); } TEST_F(D3D9Tests, WhenFillingTexture2dDescThenTextureContentsAreCorrect) { D3D9::D3DTexture2dDesc requestedDesc = {}; D3D9::D3DTexture2dDesc expectedDesc = {}; D3D9::D3DTexture2dDesc srcDesc = {}; cl_uint subresource = 4; mockSharingFcns->fillCreateTexture2dDesc(requestedDesc, &srcDesc, subresource); EXPECT_TRUE(memcmp(&requestedDesc, &expectedDesc, sizeof(D3D9::D3DTexture2dDesc)) == 0); } TEST_F(D3D9Tests, WhenFillingTexture3dDescThenTextureContentsAreCorrect) { D3D9::D3DTexture3dDesc requestedDesc = {}; D3D9::D3DTexture3dDesc expectedDesc = {}; D3D9::D3DTexture3dDesc srcDesc = {}; cl_uint subresource = 4; mockSharingFcns->fillCreateTexture3dDesc(requestedDesc, &srcDesc, subresource); EXPECT_TRUE(memcmp(&requestedDesc, &expectedDesc, sizeof(D3D9::D3DTexture3dDesc)) == 0); } TEST_F(D3D9Tests, givenImproperPlatformWhenGettingDeviceIDsFromDX9ThenReturnError) { cl_int retVal = CL_SUCCESS; retVal = clGetDeviceIDsFromDX9INTEL(nullptr, 1, nullptr, 1, 1, nullptr, nullptr); EXPECT_EQ(CL_INVALID_PLATFORM, retVal); } TEST_F(D3D9Tests, givenImproperCommandQueueWhenDX9ObjectsAreAcquiredThenReturnError) { cl_int retVal = CL_SUCCESS; retVal = clEnqueueAcquireDX9ObjectsINTEL(nullptr, 1, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(D3D9Tests, givenImproperCommandQueueWhenDX9ObjectsAreReleasedThenReturnError) { cl_int retVal = CL_SUCCESS; retVal = clEnqueueReleaseDX9ObjectsINTEL(nullptr, 1, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(D3D9Tests, givenImproperPlatformWhenGettingDeviceIDsFromDX9MediaAdapterThenReturnError) { cl_int retVal = CL_SUCCESS; retVal = clGetDeviceIDsFromDX9MediaAdapterKHR(nullptr, 1, nullptr, nullptr, 1, 1, nullptr, nullptr); EXPECT_EQ(CL_INVALID_PLATFORM, retVal); } TEST_F(D3D9Tests, givenImproperCommandQueueWhenDX9MediaSurfacesAreAcquiredThenReturnError) { cl_int retVal = CL_SUCCESS; retVal = clEnqueueAcquireDX9MediaSurfacesKHR(nullptr, 1, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(D3D9Tests, givenImproperCommandQueueWhenDX9MediaSurfacesAreReleasedThenReturnError) { cl_int retVal = CL_SUCCESS; retVal = clEnqueueReleaseDX9MediaSurfacesKHR(nullptr, 1, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(D3D9Tests, givenImproperPlatformWhenGettingDeviceIDsFromD3D10ThenReturnError) { cl_int retVal = CL_SUCCESS; retVal = clGetDeviceIDsFromD3D10KHR(nullptr, 0, nullptr, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_PLATFORM, retVal); } TEST_F(D3D9Tests, givenImproperContextWhenCreatingFromD3D10BufferThenReturnError) { cl_int retVal = CL_SUCCESS; clCreateFromD3D10BufferKHR(nullptr, 0, nullptr, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(D3D9Tests, givenImproperContextWhenCreatingFromD3D10Texture2DThenReturnError) { cl_int retVal = CL_SUCCESS; clCreateFromD3D10Texture2DKHR(nullptr, 0, nullptr, 0u, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(D3D9Tests, givenImproperContextWhenCreatingFromD3D10Texture3DThenReturnError) { cl_int retVal = CL_SUCCESS; clCreateFromD3D10Texture3DKHR(nullptr, 0, nullptr, 0u, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(D3D9Tests, givenImproperCommandQueueWhenD3D10ObjectsAreAcquiredThenReturnError) { cl_int retVal = CL_SUCCESS; retVal = clEnqueueAcquireD3D10ObjectsKHR(nullptr, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(D3D9Tests, givenImproperCommandQueueWhenD3D10ObjectsAreReleasedThenReturnError) { cl_int retVal = CL_SUCCESS; retVal = clEnqueueReleaseD3D10ObjectsKHR(nullptr, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(D3D9Tests, givenImproperPlatformWhenGettingDeviceIDsFromD3D11ThenReturnError) { cl_int retVal = CL_SUCCESS; retVal = clGetDeviceIDsFromD3D11KHR(nullptr, 0, nullptr, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_PLATFORM, retVal); } TEST_F(D3D9Tests, givenImproperContextWhenCreatingFromD3D11BufferThenReturnError) { cl_int retVal = CL_SUCCESS; clCreateFromD3D11BufferKHR(nullptr, 0, nullptr, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(D3D9Tests, givenImproperContextWhenCreatingFromD3D11Texture2DThenReturnError) { cl_int retVal = CL_SUCCESS; clCreateFromD3D11Texture2DKHR(nullptr, 0, nullptr, 0u, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(D3D9Tests, givenImproperContextWhenCreatingFromD3D11Texture3DThenReturnError) { cl_int retVal = CL_SUCCESS; clCreateFromD3D11Texture3DKHR(nullptr, 0, nullptr, 0u, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(D3D9Tests, givenImproperCommandQueueWhenD3D11ObjectsAreAcquiredThenReturnError) { cl_int retVal = CL_SUCCESS; retVal = clEnqueueAcquireD3D11ObjectsKHR(nullptr, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } TEST_F(D3D9Tests, givenImproperCommandQueueWhenD3D11ObjectsAreReleasedThenReturnError) { cl_int retVal = CL_SUCCESS; retVal = clEnqueueReleaseD3D11ObjectsKHR(nullptr, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } namespace D3D9Formats { static const std::tuple allImageFormats[] = { // input, input, output, output std::make_tuple(D3DFMT_R32F, 0, CL_R, CL_FLOAT, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_R16F, 0, CL_R, CL_HALF_FLOAT, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_L16, 0, CL_R, CL_UNORM_INT16, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_A8, 0, CL_A, CL_UNORM_INT8, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_L8, 0, CL_R, CL_UNORM_INT8, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_G32R32F, 0, CL_RG, CL_FLOAT, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_G16R16F, 0, CL_RG, CL_HALF_FLOAT, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_G16R16, 0, CL_RG, CL_UNORM_INT16, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_A8L8, 0, CL_RG, CL_UNORM_INT8, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_A32B32G32R32F, 0, CL_RGBA, CL_FLOAT, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_A16B16G16R16F, 0, CL_RGBA, CL_HALF_FLOAT, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_A16B16G16R16, 0, CL_RGBA, CL_UNORM_INT16, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_A8B8G8R8, 0, CL_RGBA, CL_UNORM_INT8, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_X8B8G8R8, 0, CL_RGBA, CL_UNORM_INT8, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_A8R8G8B8, 0, CL_BGRA, CL_UNORM_INT8, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_X8R8G8B8, 0, CL_BGRA, CL_UNORM_INT8, ImagePlane::NO_PLANE), std::make_tuple(MAKEFOURCC('N', 'V', '1', '2'), 0, CL_R, CL_UNORM_INT8, ImagePlane::PLANE_Y), std::make_tuple(MAKEFOURCC('N', 'V', '1', '2'), 1, CL_RG, CL_UNORM_INT8, ImagePlane::PLANE_UV), std::make_tuple(MAKEFOURCC('N', 'V', '1', '2'), 2, 0, 0, ImagePlane::NO_PLANE), std::make_tuple(MAKEFOURCC('Y', 'V', '1', '2'), 0, CL_R, CL_UNORM_INT8, ImagePlane::PLANE_Y), std::make_tuple(MAKEFOURCC('Y', 'V', '1', '2'), 1, CL_R, CL_UNORM_INT8, ImagePlane::PLANE_V), std::make_tuple(MAKEFOURCC('Y', 'V', '1', '2'), 2, CL_R, CL_UNORM_INT8, ImagePlane::PLANE_U), std::make_tuple(MAKEFOURCC('Y', 'V', '1', '2'), 3, 0, 0, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_YUY2, 0, CL_YUYV_INTEL, CL_UNORM_INT8, ImagePlane::NO_PLANE), std::make_tuple(D3DFMT_UYVY, 0, CL_UYVY_INTEL, CL_UNORM_INT8, ImagePlane::NO_PLANE), std::make_tuple(MAKEFOURCC('Y', 'V', 'Y', 'U'), 0, CL_YVYU_INTEL, CL_UNORM_INT8, ImagePlane::NO_PLANE), std::make_tuple(MAKEFOURCC('V', 'Y', 'U', 'Y'), 0, CL_VYUY_INTEL, CL_UNORM_INT8, ImagePlane::NO_PLANE), std::make_tuple(CL_INVALID_VALUE, 0, 0, 0, ImagePlane::NO_PLANE)}; } struct D3D9ImageFormatTests : public ::testing::WithParamInterface>, public ::testing::Test { }; INSTANTIATE_TEST_CASE_P( D3D9ImageFormatTests, D3D9ImageFormatTests, testing::ValuesIn(D3D9Formats::allImageFormats)); TEST_P(D3D9ImageFormatTests, WhenGettingImageFormatThenValidFormatDetailsAreReturned) { cl_image_format imgFormat = {}; auto format = std::get<0>(GetParam()); auto plane = std::get<1>(GetParam()); ImagePlane imagePlane = ImagePlane::NO_PLANE; auto expectedImagePlane = std::get<4>(GetParam()); auto expectedClChannelType = static_cast(std::get<3>(GetParam())); auto expectedClChannelOrder = static_cast(std::get<2>(GetParam())); D3DSurface::findImgFormat((D3DFORMAT)format, imgFormat, plane, imagePlane); EXPECT_EQ(imgFormat.image_channel_data_type, expectedClChannelType); EXPECT_EQ(imgFormat.image_channel_order, expectedClChannelOrder); EXPECT_TRUE(imagePlane == expectedImagePlane); } using D3D9MultiRootDeviceTest = MultiRootDeviceFixture; TEST_F(D3D9MultiRootDeviceTest, givenD3DHandleIsNullWhenCreatingSharedSurfaceAndRootDeviceIndexIsSpecifiedThenAllocationHasCorrectRootDeviceIndex) { cl_image_desc imgDesc = {}; imgDesc.image_height = 10; imgDesc.image_width = 10; imgDesc.image_depth = 1; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); auto gmm = MockGmm::queryImgParams(device->getGmmClientContext(), imgInfo).release(); auto memoryManager = std::make_unique(*device->executionEnvironment); memoryManager->forceGmm = gmm; auto mockSharingFcns = new NiceMock>(); mockSharingFcns->mockTexture2dDesc.Format = D3DFMT_R32F; mockSharingFcns->mockTexture2dDesc.Height = 10; mockSharingFcns->mockTexture2dDesc.Width = 10; cl_dx9_surface_info_khr surfaceInfo = {}; surfaceInfo.shared_handle = reinterpret_cast(0); ON_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).WillByDefault(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); MockContext ctx(device.get()); ctx.setSharingFunctions(mockSharingFcns); ctx.memoryManager = memoryManager.get(); auto sharedImg = std::unique_ptr(D3DSurface::create(&ctx, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); ASSERT_NE(nullptr, sharedImg->getGraphicsAllocation()); EXPECT_EQ(expectedRootDeviceIndex, sharedImg->getGraphicsAllocation()->getRootDeviceIndex()); } TEST_F(D3D9MultiRootDeviceTest, givenD3DHandleIsNotNullWhenCreatingSharedSurfaceAndRootDeviceIndexIsSpecifiedThenAllocationHasCorrectRootDeviceIndex) { cl_image_desc imgDesc = {}; imgDesc.image_height = 10; imgDesc.image_width = 10; imgDesc.image_depth = 1; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); auto gmm = MockGmm::queryImgParams(device->getGmmClientContext(), imgInfo).release(); auto memoryManager = std::make_unique(*device->executionEnvironment); memoryManager->forceGmm = gmm; auto mockSharingFcns = new NiceMock>(); mockSharingFcns->mockTexture2dDesc.Format = D3DFMT_R32F; mockSharingFcns->mockTexture2dDesc.Height = 10; mockSharingFcns->mockTexture2dDesc.Width = 10; cl_dx9_surface_info_khr surfaceInfo = {}; surfaceInfo.shared_handle = reinterpret_cast(1); ON_CALL(*mockSharingFcns, getTexture2dDesc(_, _)).WillByDefault(SetArgPointee<0>(mockSharingFcns->mockTexture2dDesc)); MockContext ctx(device.get()); ctx.setSharingFunctions(mockSharingFcns); ctx.memoryManager = memoryManager.get(); auto sharedImg = std::unique_ptr(D3DSurface::create(&ctx, &surfaceInfo, CL_MEM_READ_WRITE, 0, 0, nullptr)); ASSERT_NE(nullptr, sharedImg.get()); ASSERT_NE(nullptr, sharedImg->getGraphicsAllocation()); EXPECT_EQ(expectedRootDeviceIndex, sharedImg->getGraphicsAllocation()->getRootDeviceIndex()); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/d3d_sharing/d3d_aux_tests.cpp000066400000000000000000000222221363734646600272740ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/utilities/arrayref.h" #include "opencl/source/api/api.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/source/platform/platform.h" #include "opencl/source/sharings/d3d/cl_d3d_api.h" #include "opencl/source/sharings/d3d/d3d_buffer.h" #include "opencl/source/sharings/d3d/d3d_sharing.h" #include "opencl/source/sharings/d3d/d3d_surface.h" #include "opencl/source/sharings/d3d/d3d_texture.h" #include "opencl/test/unit_test/fixtures/d3d_test_fixture.h" #include "gmock/gmock.h" #include "gtest/gtest.h" namespace NEO { template class D3DAuxTests : public D3DTests {}; TYPED_TEST_CASE_P(D3DAuxTests); TYPED_TEST_P(D3DAuxTests, given2dSharableTextureWithUnifiedAuxFlagsWhenCreatingThenMapAuxTableAndSetAsRenderCompressed) { this->mockSharingFcns->mockTexture2dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; this->mockSharingFcns->mockTexture2dDesc.ArraySize = 4; this->mockSharingFcns->mockTexture2dDesc.MipLevels = 4; mockGmmResInfo->setUnifiedAuxTranslationCapable(); EXPECT_CALL(*this->mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture2dDesc)); auto image = std::unique_ptr(D3DTexture::create2d(this->context, (D3DTexture2d *)&this->dummyD3DTexture, CL_MEM_READ_WRITE, 4, nullptr)); ASSERT_NE(nullptr, image.get()); auto hwInfo = context->getDevice(0)->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); uint32_t expectedMapAuxGpuVaCalls = hwHelper.isPageTableManagerSupported(hwInfo) ? 1 : 0; EXPECT_EQ(expectedMapAuxGpuVaCalls, mockMM->mapAuxGpuVACalled); EXPECT_TRUE(gmm->isRenderCompressed); } TYPED_TEST_P(D3DAuxTests, given2dSharableTextureWithUnifiedAuxFlagsWhenFailOnAuxMappingThenDontSetAsRenderCompressed) { this->mockSharingFcns->mockTexture2dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; this->mockSharingFcns->mockTexture2dDesc.ArraySize = 4; this->mockSharingFcns->mockTexture2dDesc.MipLevels = 4; mockGmmResInfo->setUnifiedAuxTranslationCapable(); EXPECT_CALL(*this->mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture2dDesc)); mockMM->mapAuxGpuVaRetValue = false; auto image = std::unique_ptr(D3DTexture::create2d(this->context, (D3DTexture2d *)&this->dummyD3DTexture, CL_MEM_READ_WRITE, 4, nullptr)); ASSERT_NE(nullptr, image.get()); auto hwInfo = context->getDevice(0)->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); uint32_t expectedMapAuxGpuVaCalls = hwHelper.isPageTableManagerSupported(hwInfo) ? 1 : 0; EXPECT_EQ(expectedMapAuxGpuVaCalls, mockMM->mapAuxGpuVACalled); EXPECT_EQ(!hwHelper.isPageTableManagerSupported(hwInfo), gmm->isRenderCompressed); } TYPED_TEST_P(D3DAuxTests, given2dSharableTextureWithoutUnifiedAuxFlagsWhenCreatingThenDontMapAuxTable) { this->mockSharingFcns->mockTexture2dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; this->mockSharingFcns->mockTexture2dDesc.ArraySize = 4; this->mockSharingFcns->mockTexture2dDesc.MipLevels = 4; EXPECT_FALSE(gmm->unifiedAuxTranslationCapable()); EXPECT_CALL(*this->mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture2dDesc)); auto image = std::unique_ptr(D3DTexture::create2d(this->context, (D3DTexture2d *)&this->dummyD3DTexture, CL_MEM_READ_WRITE, 4, nullptr)); ASSERT_NE(nullptr, image.get()); EXPECT_EQ(0u, mockMM->mapAuxGpuVACalled); EXPECT_FALSE(gmm->isRenderCompressed); } TYPED_TEST_P(D3DAuxTests, given2dNonSharableTextureWithUnifiedAuxFlagsWhenCreatingThenMapAuxTableAndSetRenderCompressed) { mockGmmResInfo->setUnifiedAuxTranslationCapable(); EXPECT_CALL(*this->mockSharingFcns, getTexture2dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture2dDesc)); mockGmmResInfo->setUnifiedAuxTranslationCapable(); auto image = std::unique_ptr(D3DTexture::create2d(this->context, (D3DTexture2d *)&this->dummyD3DTexture, CL_MEM_READ_WRITE, 1, nullptr)); ASSERT_NE(nullptr, image.get()); auto hwInfo = context->getDevice(0)->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); uint32_t expectedMapAuxGpuVaCalls = hwHelper.isPageTableManagerSupported(hwInfo) ? 1 : 0; EXPECT_EQ(expectedMapAuxGpuVaCalls, mockMM->mapAuxGpuVACalled); EXPECT_TRUE(gmm->isRenderCompressed); } TYPED_TEST_P(D3DAuxTests, given3dSharableTextureWithUnifiedAuxFlagsWhenCreatingThenMapAuxTableAndSetAsRenderCompressed) { this->mockSharingFcns->mockTexture3dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; mockGmmResInfo->setUnifiedAuxTranslationCapable(); EXPECT_CALL(*this->mockSharingFcns, getTexture3dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture3dDesc)); std::unique_ptr image(D3DTexture::create3d(this->context, (D3DTexture3d *)&this->dummyD3DTexture, CL_MEM_READ_WRITE, 1, nullptr)); ASSERT_NE(nullptr, image.get()); auto hwInfo = context->getDevice(0)->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); uint32_t expectedMapAuxGpuVaCalls = hwHelper.isPageTableManagerSupported(hwInfo) ? 1 : 0; EXPECT_EQ(expectedMapAuxGpuVaCalls, mockMM->mapAuxGpuVACalled); EXPECT_TRUE(gmm->isRenderCompressed); } TYPED_TEST_P(D3DAuxTests, given3dSharableTextureWithUnifiedAuxFlagsWhenFailOnAuxMappingThenDontSetAsRenderCompressed) { this->mockSharingFcns->mockTexture3dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; mockGmmResInfo->setUnifiedAuxTranslationCapable(); EXPECT_CALL(*this->mockSharingFcns, getTexture3dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture3dDesc)); mockMM->mapAuxGpuVaRetValue = false; std::unique_ptr image(D3DTexture::create3d(this->context, (D3DTexture3d *)&this->dummyD3DTexture, CL_MEM_READ_WRITE, 1, nullptr)); ASSERT_NE(nullptr, image.get()); auto hwInfo = context->getDevice(0)->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); uint32_t expectedMapAuxGpuVaCalls = hwHelper.isPageTableManagerSupported(hwInfo) ? 1 : 0; EXPECT_EQ(expectedMapAuxGpuVaCalls, mockMM->mapAuxGpuVACalled); EXPECT_EQ(!hwHelper.isPageTableManagerSupported(hwInfo), gmm->isRenderCompressed); } TYPED_TEST_P(D3DAuxTests, given3dSharableTextureWithoutUnifiedAuxFlagsWhenCreatingThenDontMapAuxTable) { this->mockSharingFcns->mockTexture3dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; EXPECT_FALSE(gmm->unifiedAuxTranslationCapable()); EXPECT_CALL(*this->mockSharingFcns, getTexture3dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture3dDesc)); std::unique_ptr image(D3DTexture::create3d(this->context, (D3DTexture3d *)&this->dummyD3DTexture, CL_MEM_READ_WRITE, 1, nullptr)); ASSERT_NE(nullptr, image.get()); EXPECT_EQ(0u, mockMM->mapAuxGpuVACalled); EXPECT_FALSE(gmm->isRenderCompressed); } TYPED_TEST_P(D3DAuxTests, given3dNonSharableTextureWithUnifiedAuxFlagsWhenCreatingThenMapAuxTableAndSetRenderCompressed) { mockGmmResInfo->setUnifiedAuxTranslationCapable(); EXPECT_CALL(*this->mockSharingFcns, getTexture3dDesc(_, _)).Times(1).WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture3dDesc)); mockGmmResInfo->setUnifiedAuxTranslationCapable(); std::unique_ptr image(D3DTexture::create3d(this->context, (D3DTexture3d *)&this->dummyD3DTexture, CL_MEM_READ_WRITE, 1, nullptr)); ASSERT_NE(nullptr, image.get()); auto hwInfo = context->getDevice(0)->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); uint32_t expectedMapAuxGpuVaCalls = hwHelper.isPageTableManagerSupported(hwInfo) ? 1 : 0; EXPECT_EQ(expectedMapAuxGpuVaCalls, mockMM->mapAuxGpuVACalled); EXPECT_TRUE(gmm->isRenderCompressed); } REGISTER_TYPED_TEST_CASE_P(D3DAuxTests, given2dSharableTextureWithUnifiedAuxFlagsWhenCreatingThenMapAuxTableAndSetAsRenderCompressed, given2dSharableTextureWithUnifiedAuxFlagsWhenFailOnAuxMappingThenDontSetAsRenderCompressed, given2dSharableTextureWithoutUnifiedAuxFlagsWhenCreatingThenDontMapAuxTable, given2dNonSharableTextureWithUnifiedAuxFlagsWhenCreatingThenMapAuxTableAndSetRenderCompressed, given3dSharableTextureWithUnifiedAuxFlagsWhenCreatingThenMapAuxTableAndSetAsRenderCompressed, given3dSharableTextureWithUnifiedAuxFlagsWhenFailOnAuxMappingThenDontSetAsRenderCompressed, given3dSharableTextureWithoutUnifiedAuxFlagsWhenCreatingThenDontMapAuxTable, given3dNonSharableTextureWithUnifiedAuxFlagsWhenCreatingThenMapAuxTableAndSetRenderCompressed); INSTANTIATE_TYPED_TEST_CASE_P(D3DSharingTests, D3DAuxTests, D3DTypes); } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/d3d_sharing/d3d_tests_part1.cpp000066400000000000000000001162301363734646600275310ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/driver_info.h" #include "shared/source/utilities/arrayref.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/api/api.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/source/platform/platform.h" #include "opencl/source/sharings/d3d/cl_d3d_api.h" #include "opencl/source/sharings/d3d/d3d_buffer.h" #include "opencl/source/sharings/d3d/d3d_sharing.h" #include "opencl/source/sharings/d3d/d3d_surface.h" #include "opencl/source/sharings/d3d/d3d_texture.h" #include "opencl/source/sharings/d3d/enable_d3d.h" #include "opencl/test/unit_test/fixtures/d3d_test_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_sharing_factory.h" #include "gmock/gmock.h" #include "gtest/gtest.h" namespace NEO { TYPED_TEST_CASE_P(D3DTests); TYPED_TEST_P(D3DTests, GivenSpecificDeviceSetWhenGettingDeviceIDsFromD3DThenOnlySelectedDevicesAreReturned) { cl_device_id expectedDevice = *this->devices; cl_device_id device = 0; cl_uint numDevices = 0; auto deviceSourceParam = this->pickParam(CL_D3D10_DEVICE_KHR, CL_D3D11_DEVICE_KHR); auto deviceSetParam = this->pickParam(CL_PREFERRED_DEVICES_FOR_D3D10_KHR, CL_PREFERRED_DEVICES_FOR_D3D11_KHR); cl_int retVal = this->getDeviceIDsFromD3DApi(this->mockSharingFcns, this->pPlatform, deviceSourceParam, &this->dummyD3DBuffer, deviceSetParam, 0, &device, &numDevices); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedDevice, device); EXPECT_EQ(1u, numDevices); device = 0; numDevices = 0; deviceSetParam = this->pickParam(CL_ALL_DEVICES_FOR_D3D10_KHR, CL_ALL_DEVICES_FOR_D3D11_KHR); retVal = this->getDeviceIDsFromD3DApi(this->mockSharingFcns, this->pPlatform, deviceSourceParam, &this->dummyD3DBuffer, deviceSetParam, 0, &device, &numDevices); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedDevice, device); EXPECT_EQ(1u, numDevices); device = 0; numDevices = 0; retVal = this->getDeviceIDsFromD3DApi(this->mockSharingFcns, this->pPlatform, deviceSourceParam, &this->dummyD3DBuffer, CL_INVALID_OPERATION, 0, &device, &numDevices); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_NE(expectedDevice, device); EXPECT_EQ(0u, numDevices); } TYPED_TEST_P(D3DTests, GivenSpecificDeviceSourceWhenGettingDeviceIDsFromD3DThenOnlySelectedDevicesAreReturned) { cl_device_id expectedDevice = *this->devices; cl_device_id device = 0; cl_uint numDevices = 0; auto deviceSourceParam = this->pickParam(CL_D3D10_DEVICE_KHR, CL_D3D11_DEVICE_KHR); auto deviceSetParam = this->pickParam(CL_ALL_DEVICES_FOR_D3D10_KHR, CL_ALL_DEVICES_FOR_D3D11_KHR); cl_int retVal = this->getDeviceIDsFromD3DApi(this->mockSharingFcns, this->pPlatform, deviceSourceParam, &this->dummyD3DBuffer, deviceSetParam, 0, &device, &numDevices); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedDevice, device); EXPECT_EQ(1u, numDevices); EXPECT_EQ(1u, this->mockSharingFcns->getDxgiDescCalled); EXPECT_EQ(nullptr, this->mockSharingFcns->getDxgiDescAdapterRequested); device = 0; numDevices = 0; deviceSourceParam = this->pickParam(CL_D3D10_DXGI_ADAPTER_KHR, CL_D3D11_DXGI_ADAPTER_KHR); retVal = this->getDeviceIDsFromD3DApi(this->mockSharingFcns, this->pPlatform, deviceSourceParam, &this->dummyD3DBuffer, deviceSetParam, 0, &device, &numDevices); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedDevice, device); EXPECT_EQ(1u, numDevices); EXPECT_EQ(2u, this->mockSharingFcns->getDxgiDescCalled); EXPECT_NE(nullptr, this->mockSharingFcns->getDxgiDescAdapterRequested); device = 0; numDevices = 0; retVal = this->getDeviceIDsFromD3DApi(this->mockSharingFcns, this->pPlatform, CL_INVALID_OPERATION, &this->dummyD3DBuffer, deviceSetParam, 0, &device, &numDevices); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_NE(expectedDevice, device); EXPECT_EQ(0u, numDevices); EXPECT_EQ(2u, this->mockSharingFcns->getDxgiDescCalled); } TYPED_TEST_P(D3DTests, givenNonIntelVendorWhenGetDeviceIdIsCalledThenReturnError) { DXGI_ADAPTER_DESC desc = {{0}}; desc.VendorId = INTEL_VENDOR_ID + 1u; this->mockSharingFcns->mockDxgiDesc = desc; cl_device_id device = 0; cl_uint numDevices = 0; auto deviceSourceParam = this->pickParam(CL_D3D10_DEVICE_KHR, CL_D3D11_DEVICE_KHR); auto deviceSetParam = this->pickParam(CL_ALL_DEVICES_FOR_D3D10_KHR, CL_ALL_DEVICES_FOR_D3D11_KHR); cl_int retVal = this->getDeviceIDsFromD3DApi(this->mockSharingFcns, this->pPlatform, deviceSourceParam, nullptr, deviceSetParam, 0, &device, &numDevices); EXPECT_EQ(CL_DEVICE_NOT_FOUND, retVal); EXPECT_TRUE(0 == device); EXPECT_EQ(0u, numDevices); EXPECT_EQ(1u, this->mockSharingFcns->getDxgiDescCalled); } TYPED_TEST_P(D3DTests, WhenCreatingFromD3DBufferKhrApiThenValidBufferIsReturned) { cl_int retVal; EXPECT_CALL(*this->mockSharingFcns, getSharedHandle(_, _)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, getSharedNTHandle(_, _)) .Times(0); auto memObj = this->createFromD3DBufferApi(this->context, CL_MEM_READ_WRITE, (D3DBufferObj *)&this->dummyD3DBuffer, &retVal); ASSERT_NE(nullptr, memObj); EXPECT_EQ(CL_SUCCESS, retVal); auto buffer = castToObject(memObj); ASSERT_NE(nullptr, buffer); ASSERT_NE(nullptr, buffer->getSharingHandler().get()); auto bufferObj = static_cast *>(buffer->getSharingHandler().get()); EXPECT_EQ((D3DResource *)&this->dummyD3DBuffer, *bufferObj->getResourceHandler()); EXPECT_TRUE(buffer->getMemoryPropertiesFlags() == CL_MEM_READ_WRITE); clReleaseMemObject(memObj); } TYPED_TEST_P(D3DTests, givenNV12FormatAndEvenPlaneWhen2dCreatedThenSetPlaneParams) { this->mockSharingFcns->mockTexture2dDesc.Format = DXGI_FORMAT_NV12; EXPECT_CALL(*this->mockSharingFcns, getTexture2dDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture2dDesc)); auto image = std::unique_ptr(D3DTexture::create2d(this->context, (D3DTexture2d *)&this->dummyD3DTexture, CL_MEM_READ_WRITE, 4, nullptr)); ASSERT_NE(nullptr, image.get()); auto expectedFormat = D3DTexture::findYuvSurfaceFormatInfo(DXGI_FORMAT_NV12, ImagePlane::PLANE_Y, CL_MEM_READ_WRITE); EXPECT_TRUE(memcmp(expectedFormat, &image->getSurfaceFormatInfo(), sizeof(SurfaceFormatInfo)) == 0); EXPECT_EQ(1u, mockGmmResInfo->getOffsetCalled); EXPECT_EQ(2u, mockGmmResInfo->arrayIndexPassedToGetOffset); } TYPED_TEST_P(D3DTests, givenNV12FormatAndOddPlaneWhen2dCreatedThenSetPlaneParams) { this->mockSharingFcns->mockTexture2dDesc.Format = DXGI_FORMAT_NV12; EXPECT_CALL(*this->mockSharingFcns, getTexture2dDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture2dDesc)); auto image = std::unique_ptr(D3DTexture::create2d(this->context, (D3DTexture2d *)&this->dummyD3DTexture, CL_MEM_READ_WRITE, 7, nullptr)); ASSERT_NE(nullptr, image.get()); auto expectedFormat = D3DTexture::findYuvSurfaceFormatInfo(DXGI_FORMAT_NV12, ImagePlane::PLANE_UV, CL_MEM_READ_WRITE); EXPECT_TRUE(memcmp(expectedFormat, &image->getSurfaceFormatInfo(), sizeof(SurfaceFormatInfo)) == 0); EXPECT_EQ(1u, mockGmmResInfo->getOffsetCalled); EXPECT_EQ(3u, mockGmmResInfo->arrayIndexPassedToGetOffset); } TYPED_TEST_P(D3DTests, givenP010FormatAndEvenPlaneWhen2dCreatedThenSetPlaneParams) { this->mockSharingFcns->mockTexture2dDesc.Format = DXGI_FORMAT_P010; EXPECT_CALL(*this->mockSharingFcns, getTexture2dDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture2dDesc)); auto image = std::unique_ptr(D3DTexture::create2d(this->context, (D3DTexture2d *)&this->dummyD3DTexture, CL_MEM_READ_WRITE, 4, nullptr)); ASSERT_NE(nullptr, image.get()); auto expectedFormat = D3DTexture::findYuvSurfaceFormatInfo(DXGI_FORMAT_P010, ImagePlane::PLANE_Y, CL_MEM_READ_WRITE); EXPECT_TRUE(memcmp(expectedFormat, &image->getSurfaceFormatInfo(), sizeof(SurfaceFormatInfo)) == 0); EXPECT_EQ(1u, mockGmmResInfo->getOffsetCalled); EXPECT_EQ(2u, mockGmmResInfo->arrayIndexPassedToGetOffset); } TYPED_TEST_P(D3DTests, givenP010FormatAndOddPlaneWhen2dCreatedThenSetPlaneParams) { this->mockSharingFcns->mockTexture2dDesc.Format = DXGI_FORMAT_P010; EXPECT_CALL(*this->mockSharingFcns, getTexture2dDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture2dDesc)); auto image = std::unique_ptr(D3DTexture::create2d(this->context, (D3DTexture2d *)&this->dummyD3DTexture, CL_MEM_READ_WRITE, 7, nullptr)); ASSERT_NE(nullptr, image.get()); auto expectedFormat = D3DTexture::findYuvSurfaceFormatInfo(DXGI_FORMAT_P010, ImagePlane::PLANE_UV, CL_MEM_READ_WRITE); EXPECT_TRUE(memcmp(expectedFormat, &image->getSurfaceFormatInfo(), sizeof(SurfaceFormatInfo)) == 0); EXPECT_EQ(1u, mockGmmResInfo->getOffsetCalled); EXPECT_EQ(3u, mockGmmResInfo->arrayIndexPassedToGetOffset); } TYPED_TEST_P(D3DTests, givenP016FormatAndEvenPlaneWhen2dCreatedThenSetPlaneParams) { this->mockSharingFcns->mockTexture2dDesc.Format = DXGI_FORMAT_P016; EXPECT_CALL(*this->mockSharingFcns, getTexture2dDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture2dDesc)); auto image = std::unique_ptr(D3DTexture::create2d(this->context, (D3DTexture2d *)&this->dummyD3DTexture, CL_MEM_READ_WRITE, 4, nullptr)); ASSERT_NE(nullptr, image.get()); auto expectedFormat = D3DTexture::findYuvSurfaceFormatInfo(DXGI_FORMAT_P016, ImagePlane::PLANE_Y, CL_MEM_READ_WRITE); EXPECT_TRUE(memcmp(expectedFormat, &image->getSurfaceFormatInfo(), sizeof(SurfaceFormatInfo)) == 0); EXPECT_EQ(1u, mockGmmResInfo->getOffsetCalled); EXPECT_EQ(2u, mockGmmResInfo->arrayIndexPassedToGetOffset); } TYPED_TEST_P(D3DTests, givenP016FormatAndOddPlaneWhen2dCreatedThenSetPlaneParams) { this->mockSharingFcns->mockTexture2dDesc.Format = DXGI_FORMAT_P016; EXPECT_CALL(*this->mockSharingFcns, getTexture2dDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture2dDesc)); auto image = std::unique_ptr(D3DTexture::create2d(this->context, (D3DTexture2d *)&this->dummyD3DTexture, CL_MEM_READ_WRITE, 7, nullptr)); ASSERT_NE(nullptr, image.get()); auto expectedFormat = D3DTexture::findYuvSurfaceFormatInfo(DXGI_FORMAT_P016, ImagePlane::PLANE_UV, CL_MEM_READ_WRITE); EXPECT_TRUE(memcmp(expectedFormat, &image->getSurfaceFormatInfo(), sizeof(SurfaceFormatInfo)) == 0); EXPECT_EQ(1u, mockGmmResInfo->getOffsetCalled); EXPECT_EQ(3u, mockGmmResInfo->arrayIndexPassedToGetOffset); } TYPED_TEST_P(D3DTests, WhenCreatingFromD3D2dTextureKhrApiThenValidImageIsReturned) { cl_int retVal; EXPECT_CALL(*this->mockSharingFcns, createQuery(_)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, getTexture2dDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture2dDesc)); EXPECT_CALL(*this->mockSharingFcns, getSharedHandle(_, _)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, getSharedNTHandle(_, _)) .Times(0); auto memObj = this->createFromD3DTexture2DApi(this->context, CL_MEM_READ_WRITE, (D3DTexture2d *)&this->dummyD3DTexture, 1, &retVal); ASSERT_NE(nullptr, memObj); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, mockGmmResInfo->getOffsetCalled); EXPECT_EQ(0u, mockGmmResInfo->arrayIndexPassedToGetOffset); auto image = castToObject(memObj); ASSERT_NE(nullptr, image); ASSERT_NE(nullptr, image->getSharingHandler().get()); auto textureObj = static_cast *>(image->getSharingHandler().get()); EXPECT_EQ((D3DResource *)&this->dummyD3DTexture, *textureObj->getResourceHandler()); EXPECT_TRUE(image->getMemoryPropertiesFlags() == CL_MEM_READ_WRITE); EXPECT_TRUE(image->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE2D); EXPECT_EQ(1u, textureObj->getSubresource()); clReleaseMemObject(memObj); } TYPED_TEST_P(D3DTests, WhenCreatingFromD3D3dTextureKhrApiThenValidImageIsReturned) { cl_int retVal; EXPECT_CALL(*this->mockSharingFcns, getSharedHandle(_, _)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, getSharedNTHandle(_, _)) .Times(0); EXPECT_CALL(*this->mockSharingFcns, createQuery(_)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, getTexture3dDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture3dDesc)); auto memObj = this->createFromD3DTexture3DApi(this->context, CL_MEM_READ_WRITE, (D3DTexture3d *)&this->dummyD3DTexture, 1, &retVal); ASSERT_NE(nullptr, memObj); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, mockGmmResInfo->getOffsetCalled); EXPECT_EQ(0u, mockGmmResInfo->arrayIndexPassedToGetOffset); auto image = castToObject(memObj); ASSERT_NE(nullptr, image); ASSERT_NE(nullptr, image->getSharingHandler().get()); auto textureObj = static_cast *>(image->getSharingHandler().get()); EXPECT_EQ((D3DResource *)&this->dummyD3DTexture, *textureObj->getResourceHandler()); EXPECT_TRUE(image->getMemoryPropertiesFlags() == CL_MEM_READ_WRITE); EXPECT_TRUE(image->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE3D); EXPECT_EQ(1u, textureObj->getSubresource()); clReleaseMemObject(memObj); } TYPED_TEST_P(D3DTests, givenSharedResourceFlagWhenCreateBufferThenStagingBufferEqualsPassedBuffer) { this->mockSharingFcns->mockBufferDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; ::testing::InSequence is; EXPECT_CALL(*this->mockSharingFcns, getBufferDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockBufferDesc)); EXPECT_CALL(*this->mockSharingFcns, createBuffer(_, _)) .Times(0); EXPECT_CALL(*this->mockSharingFcns, getSharedHandle((D3DBufferObj *)&this->dummyD3DBuffer, _)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, addRef((D3DBufferObj *)&this->dummyD3DBuffer)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, createQuery(_)) .Times(1) .WillOnce(SetArgPointee<0>((D3DQuery *)1)); auto buffer = std::unique_ptr(D3DBuffer::create(this->context, (D3DBufferObj *)&this->dummyD3DBuffer, CL_MEM_READ_WRITE, nullptr)); ASSERT_NE(nullptr, buffer.get()); auto d3dBuffer = static_cast *>(buffer->getSharingHandler().get()); ASSERT_NE(nullptr, d3dBuffer); EXPECT_NE(nullptr, d3dBuffer->getQuery()); EXPECT_TRUE(d3dBuffer->isSharedResource()); EXPECT_EQ(&this->dummyD3DBuffer, d3dBuffer->getResourceStaging()); EXPECT_CALL(*this->mockSharingFcns, release((D3DBufferObj *)&this->dummyD3DBuffer)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, release((D3DQuery *)d3dBuffer->getQuery())) .Times(1); } TYPED_TEST_P(D3DTests, givenNonSharedResourceFlagWhenCreateBufferThenCreateNewStagingBuffer) { ::testing::InSequence is; EXPECT_CALL(*this->mockSharingFcns, createBuffer(_, _)) .Times(1) .WillOnce(SetArgPointee<0>((D3DBufferObj *)&this->dummyD3DBufferStaging)); EXPECT_CALL(*this->mockSharingFcns, getSharedHandle((D3DBufferObj *)&this->dummyD3DBufferStaging, _)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, addRef((D3DBufferObj *)&this->dummyD3DBuffer)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, createQuery(_)) .Times(1) .WillOnce(SetArgPointee<0>((D3DQuery *)1)); auto buffer = std::unique_ptr(D3DBuffer::create(this->context, (D3DBufferObj *)&this->dummyD3DBuffer, CL_MEM_READ_WRITE, nullptr)); ASSERT_NE(nullptr, buffer.get()); auto d3dBuffer = static_cast *>(buffer->getSharingHandler().get()); ASSERT_NE(nullptr, d3dBuffer); EXPECT_NE(nullptr, d3dBuffer->getQuery()); EXPECT_FALSE(d3dBuffer->isSharedResource()); EXPECT_EQ(&this->dummyD3DBufferStaging, d3dBuffer->getResourceStaging()); EXPECT_CALL(*this->mockSharingFcns, release((D3DBufferObj *)&this->dummyD3DBufferStaging)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, release((D3DBufferObj *)&this->dummyD3DBuffer)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, release((D3DQuery *)d3dBuffer->getQuery())) .Times(1); } TYPED_TEST_P(D3DTests, givenNonSharedResourceBufferWhenAcquiredThenCopySubregion) { this->context->setInteropUserSyncEnabled(true); ::testing::InSequence is; EXPECT_CALL(*this->mockSharingFcns, createBuffer(_, _)) .Times(1) .WillOnce(SetArgPointee<0>((D3DBufferObj *)&this->dummyD3DBufferStaging)); EXPECT_CALL(*this->mockSharingFcns, getSharedHandle(_, _)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, getDeviceContext(_)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, copySubresourceRegion((D3DBufferObj *)&this->dummyD3DBufferStaging, 0u, (D3DBufferObj *)&this->dummyD3DBuffer, 0u)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, flushAndWait(_)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, releaseDeviceContext(_)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, getDeviceContext(_)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, copySubresourceRegion((D3DBufferObj *)&this->dummyD3DBuffer, 0u, (D3DBufferObj *)&this->dummyD3DBufferStaging, 0u)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, releaseDeviceContext(_)) .Times(1); auto buffer = std::unique_ptr(D3DBuffer::create(this->context, (D3DBufferObj *)&this->dummyD3DBuffer, CL_MEM_READ_WRITE, nullptr)); ASSERT_NE(nullptr, buffer.get()); cl_mem bufferMem = (cl_mem)buffer.get(); // acquireCount == 0, acquire EXPECT_EQ(0u, buffer->acquireCount); auto retVal = this->enqueueAcquireD3DObjectsApi(this->mockSharingFcns, this->cmdQ, 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, buffer->acquireCount); // acquireCount == 1, don't acquire retVal = this->enqueueAcquireD3DObjectsApi(this->mockSharingFcns, this->cmdQ, 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(this->pickParam(CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR, CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR), retVal); EXPECT_EQ(1u, buffer->acquireCount); retVal = this->enqueueReleaseD3DObjectsApi(this->mockSharingFcns, this->cmdQ, 1, &bufferMem, 0, nullptr, nullptr); // acquireCount == 0 EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, buffer->acquireCount); retVal = this->enqueueReleaseD3DObjectsApi(this->mockSharingFcns, this->cmdQ, 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(this->pickParam(CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR, CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR), retVal); EXPECT_EQ(0u, buffer->acquireCount); } TYPED_TEST_P(D3DTests, givenSharedResourceBufferWhenAcquiredThenDontCopySubregion) { this->context->setInteropUserSyncEnabled(true); this->mockSharingFcns->mockBufferDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; ::testing::InSequence is; EXPECT_CALL(*this->mockSharingFcns, getBufferDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockBufferDesc)); EXPECT_CALL(*this->mockSharingFcns, getSharedHandle(_, _)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, getDeviceContext(_)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, copySubresourceRegion(_, _, _, _)) .Times(0); EXPECT_CALL(*this->mockSharingFcns, flushAndWait(_)) .Times(0); EXPECT_CALL(*this->mockSharingFcns, releaseDeviceContext(_)) .Times(1); auto buffer = std::unique_ptr(D3DBuffer::create(this->context, (D3DBufferObj *)&this->dummyD3DBuffer, CL_MEM_READ_WRITE, nullptr)); ASSERT_NE(nullptr, buffer.get()); cl_mem bufferMem = (cl_mem)buffer.get(); auto retVal = this->enqueueAcquireD3DObjectsApi(this->mockSharingFcns, this->cmdQ, 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = this->enqueueReleaseD3DObjectsApi(this->mockSharingFcns, this->cmdQ, 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TYPED_TEST_P(D3DTests, givenSharedResourceBufferAndInteropUserSyncDisabledWhenAcquiredThenFlushOnAcquire) { this->context->setInteropUserSyncEnabled(false); this->mockSharingFcns->mockBufferDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; ::testing::InSequence is; EXPECT_CALL(*this->mockSharingFcns, getBufferDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockBufferDesc)); EXPECT_CALL(*this->mockSharingFcns, getSharedHandle(_, _)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, getDeviceContext(_)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, copySubresourceRegion(_, _, _, _)) .Times(0); EXPECT_CALL(*this->mockSharingFcns, flushAndWait(_)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, releaseDeviceContext(_)) .Times(1); auto buffer = std::unique_ptr(D3DBuffer::create(this->context, (D3DBufferObj *)&this->dummyD3DBuffer, CL_MEM_READ_WRITE, nullptr)); ASSERT_NE(nullptr, buffer.get()); cl_mem bufferMem = (cl_mem)buffer.get(); auto retVal = this->enqueueAcquireD3DObjectsApi(this->mockSharingFcns, this->cmdQ, 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = this->enqueueReleaseD3DObjectsApi(this->mockSharingFcns, this->cmdQ, 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TYPED_TEST_P(D3DTests, WhenGettingPreferD3DSharedResourcesThenCorrectValueIsReturned) { auto ctx = std::unique_ptr(new MockContext()); cl_bool retBool = 0; size_t size = 0; auto param = this->pickParam(CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR, CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR); ctx->preferD3dSharedResources = 1u; auto retVal = ctx->getInfo(param, sizeof(retBool), &retBool, &size); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_bool), size); EXPECT_EQ(1u, retBool); ctx->preferD3dSharedResources = 0u; retVal = ctx->getInfo(param, sizeof(retBool), &retBool, &size); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_bool), size); EXPECT_EQ(0u, retBool); } TYPED_TEST_P(D3DTests, WhenGettingD3DResourceInfoFromMemObjThenCorrectInfoIsReturned) { auto memObj = this->createFromD3DBufferApi(this->context, CL_MEM_READ_WRITE, (D3DBufferObj *)&this->dummyD3DBuffer, nullptr); ASSERT_NE(nullptr, memObj); auto param = this->pickParam(CL_MEM_D3D10_RESOURCE_KHR, CL_MEM_D3D11_RESOURCE_KHR); void *retBuffer = nullptr; size_t retSize = 0; clGetMemObjectInfo(memObj, param, sizeof(D3DBufferObj), &retBuffer, &retSize); EXPECT_EQ(sizeof(D3DBufferObj), retSize); EXPECT_EQ(&this->dummyD3DBuffer, retBuffer); clReleaseMemObject(memObj); } TYPED_TEST_P(D3DTests, WhenGettingD3DSubresourceInfoFromMemObjThenCorrectInfoIsReturned) { cl_int retVal; cl_uint subresource = 1u; auto param = this->pickParam(CL_IMAGE_D3D10_SUBRESOURCE_KHR, CL_IMAGE_D3D11_SUBRESOURCE_KHR); EXPECT_CALL(*this->mockSharingFcns, getTexture2dDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture2dDesc)); auto memObj = this->createFromD3DTexture2DApi(this->context, CL_MEM_READ_WRITE, (D3DTexture2d *)&this->dummyD3DTexture, subresource, &retVal); ASSERT_NE(nullptr, memObj); EXPECT_EQ(CL_SUCCESS, retVal); cl_uint retSubresource = 0; size_t retSize = 0; clGetImageInfo(memObj, param, sizeof(cl_uint), &retSubresource, &retSize); EXPECT_EQ(sizeof(cl_uint), retSize); EXPECT_EQ(subresource, retSubresource); clReleaseMemObject(memObj); } TYPED_TEST_P(D3DTests, givenTheSameD3DBufferWhenNextCreateIsCalledThenFail) { cl_int retVal; EXPECT_EQ(0u, this->mockSharingFcns->getTrackedResourcesVector()->size()); auto memObj = this->createFromD3DBufferApi(this->context, CL_MEM_READ_WRITE, (D3DBufferObj *)&this->dummyD3DBuffer, &retVal); ASSERT_NE(nullptr, memObj); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, this->mockSharingFcns->getTrackedResourcesVector()->size()); EXPECT_EQ(0u, this->mockSharingFcns->getTrackedResourcesVector()->at(0).second); auto memObj2 = this->createFromD3DBufferApi(this->context, CL_MEM_READ_WRITE, (D3DBufferObj *)&this->dummyD3DBuffer, &retVal); EXPECT_EQ(nullptr, memObj2); EXPECT_EQ(this->pickParam(CL_INVALID_D3D10_RESOURCE_KHR, CL_INVALID_D3D11_RESOURCE_KHR), retVal); EXPECT_EQ(1u, this->mockSharingFcns->getTrackedResourcesVector()->size()); clReleaseMemObject(memObj); EXPECT_EQ(0u, this->mockSharingFcns->getTrackedResourcesVector()->size()); } TYPED_TEST_P(D3DTests, givenD3DTextureWithTheSameSubresourceWhenNextCreateIsCalledThenFail) { cl_int retVal; cl_uint subresource = 1; EXPECT_CALL(*this->mockSharingFcns, getTexture2dDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture2dDesc)); EXPECT_EQ(0u, this->mockSharingFcns->getTrackedResourcesVector()->size()); auto memObj = this->createFromD3DTexture2DApi(this->context, CL_MEM_READ_WRITE, (D3DTexture2d *)&this->dummyD3DTexture, subresource, &retVal); ASSERT_NE(nullptr, memObj); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, this->mockSharingFcns->getTrackedResourcesVector()->size()); auto memObj2 = this->createFromD3DTexture2DApi(this->context, CL_MEM_READ_WRITE, (D3DTexture2d *)&this->dummyD3DTexture, subresource, &retVal); EXPECT_EQ(nullptr, memObj2); EXPECT_EQ(this->pickParam(CL_INVALID_D3D10_RESOURCE_KHR, CL_INVALID_D3D11_RESOURCE_KHR), retVal); EXPECT_EQ(1u, this->mockSharingFcns->getTrackedResourcesVector()->size()); EXPECT_CALL(*this->mockSharingFcns, getTexture2dDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture2dDesc)); subresource++; this->setupMockGmm(); // setup new mock for new resource auto memObj3 = this->createFromD3DTexture2DApi(this->context, CL_MEM_READ_WRITE, (D3DTexture2d *)&this->dummyD3DTexture, subresource, &retVal); ASSERT_NE(nullptr, memObj); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, this->mockSharingFcns->getTrackedResourcesVector()->size()); clReleaseMemObject(memObj); EXPECT_EQ(1u, this->mockSharingFcns->getTrackedResourcesVector()->size()); clReleaseMemObject(memObj3); EXPECT_EQ(0u, this->mockSharingFcns->getTrackedResourcesVector()->size()); } TYPED_TEST_P(D3DTests, givenInvalidSubresourceWhenCreateTexture2dIsCalledThenFail) { cl_int retVal; this->mockSharingFcns->mockTexture2dDesc.ArraySize = 4; this->mockSharingFcns->mockTexture2dDesc.MipLevels = 4; cl_uint subresource = 16; EXPECT_CALL(*this->mockSharingFcns, getTexture2dDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture2dDesc)); auto memObj = this->createFromD3DTexture2DApi(this->context, CL_MEM_READ_WRITE, (D3DTexture2d *)&this->dummyD3DTexture, subresource, &retVal); EXPECT_EQ(nullptr, memObj); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_CALL(*this->mockSharingFcns, getTexture2dDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture2dDesc)); subresource = 20; memObj = this->createFromD3DTexture2DApi(this->context, CL_MEM_READ_WRITE, (D3DTexture2d *)&this->dummyD3DTexture, subresource, &retVal); EXPECT_EQ(nullptr, memObj); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TYPED_TEST_P(D3DTests, givenInvalidSubresourceWhenCreateTexture3dIsCalledThenFail) { cl_int retVal; this->mockSharingFcns->mockTexture3dDesc.MipLevels = 4; cl_uint subresource = 16; EXPECT_CALL(*this->mockSharingFcns, getTexture3dDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture3dDesc)); auto memObj = this->createFromD3DTexture3DApi(this->context, CL_MEM_READ_WRITE, (D3DTexture3d *)&this->dummyD3DTexture, subresource, &retVal); EXPECT_EQ(nullptr, memObj); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_CALL(*this->mockSharingFcns, getTexture3dDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture3dDesc)); subresource = 20; memObj = this->createFromD3DTexture3DApi(this->context, CL_MEM_READ_WRITE, (D3DTexture3d *)&this->dummyD3DTexture, subresource, &retVal); EXPECT_EQ(nullptr, memObj); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TYPED_TEST_P(D3DTests, givenReadonlyFormatWhenLookingForSurfaceFormatThenReturnValidFormat) { EXPECT_GT(SurfaceFormats::readOnly12().size(), 0u); for (auto &format : SurfaceFormats::readOnly12()) { // only RGBA, BGRA, RG, R allowed for D3D if (format.OCLImageFormat.image_channel_order == CL_RGBA || format.OCLImageFormat.image_channel_order == CL_BGRA || format.OCLImageFormat.image_channel_order == CL_RG || format.OCLImageFormat.image_channel_order == CL_R) { auto surfaceFormat = D3DSharing::findSurfaceFormatInfo(format.surfaceFormat.GMMSurfaceFormat, CL_MEM_READ_ONLY, 12); ASSERT_NE(nullptr, surfaceFormat); EXPECT_EQ(&format, surfaceFormat); } } } TYPED_TEST_P(D3DTests, givenWriteOnlyFormatWhenLookingForSurfaceFormatThenReturnValidFormat) { EXPECT_GT(SurfaceFormats::writeOnly().size(), 0u); for (auto &format : SurfaceFormats::writeOnly()) { // only RGBA, BGRA, RG, R allowed for D3D if (format.OCLImageFormat.image_channel_order == CL_RGBA || format.OCLImageFormat.image_channel_order == CL_BGRA || format.OCLImageFormat.image_channel_order == CL_RG || format.OCLImageFormat.image_channel_order == CL_R) { auto surfaceFormat = D3DSharing::findSurfaceFormatInfo(format.surfaceFormat.GMMSurfaceFormat, CL_MEM_WRITE_ONLY, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); ASSERT_NE(nullptr, surfaceFormat); EXPECT_EQ(&format, surfaceFormat); } } } TYPED_TEST_P(D3DTests, givenReadWriteFormatWhenLookingForSurfaceFormatThenReturnValidFormat) { EXPECT_GT(SurfaceFormats::readWrite().size(), 0u); for (auto &format : SurfaceFormats::readWrite()) { // only RGBA, BGRA, RG, R allowed for D3D if (format.OCLImageFormat.image_channel_order == CL_RGBA || format.OCLImageFormat.image_channel_order == CL_BGRA || format.OCLImageFormat.image_channel_order == CL_RG || format.OCLImageFormat.image_channel_order == CL_R) { auto surfaceFormat = D3DSharing::findSurfaceFormatInfo(format.surfaceFormat.GMMSurfaceFormat, CL_MEM_READ_WRITE, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); ASSERT_NE(nullptr, surfaceFormat); EXPECT_EQ(&format, surfaceFormat); } } } REGISTER_TYPED_TEST_CASE_P(D3DTests, GivenSpecificDeviceSetWhenGettingDeviceIDsFromD3DThenOnlySelectedDevicesAreReturned, GivenSpecificDeviceSourceWhenGettingDeviceIDsFromD3DThenOnlySelectedDevicesAreReturned, givenNonIntelVendorWhenGetDeviceIdIsCalledThenReturnError, WhenCreatingFromD3DBufferKhrApiThenValidBufferIsReturned, WhenCreatingFromD3D2dTextureKhrApiThenValidImageIsReturned, WhenCreatingFromD3D3dTextureKhrApiThenValidImageIsReturned, givenSharedResourceFlagWhenCreateBufferThenStagingBufferEqualsPassedBuffer, givenNonSharedResourceFlagWhenCreateBufferThenCreateNewStagingBuffer, givenNonSharedResourceBufferWhenAcquiredThenCopySubregion, givenSharedResourceBufferWhenAcquiredThenDontCopySubregion, givenSharedResourceBufferAndInteropUserSyncDisabledWhenAcquiredThenFlushOnAcquire, WhenGettingPreferD3DSharedResourcesThenCorrectValueIsReturned, WhenGettingD3DResourceInfoFromMemObjThenCorrectInfoIsReturned, WhenGettingD3DSubresourceInfoFromMemObjThenCorrectInfoIsReturned, givenTheSameD3DBufferWhenNextCreateIsCalledThenFail, givenD3DTextureWithTheSameSubresourceWhenNextCreateIsCalledThenFail, givenInvalidSubresourceWhenCreateTexture2dIsCalledThenFail, givenInvalidSubresourceWhenCreateTexture3dIsCalledThenFail, givenReadonlyFormatWhenLookingForSurfaceFormatThenReturnValidFormat, givenWriteOnlyFormatWhenLookingForSurfaceFormatThenReturnValidFormat, givenReadWriteFormatWhenLookingForSurfaceFormatThenReturnValidFormat, givenNV12FormatAndEvenPlaneWhen2dCreatedThenSetPlaneParams, givenP010FormatAndEvenPlaneWhen2dCreatedThenSetPlaneParams, givenP016FormatAndEvenPlaneWhen2dCreatedThenSetPlaneParams, givenNV12FormatAndOddPlaneWhen2dCreatedThenSetPlaneParams, givenP010FormatAndOddPlaneWhen2dCreatedThenSetPlaneParams, givenP016FormatAndOddPlaneWhen2dCreatedThenSetPlaneParams); INSTANTIATE_TYPED_TEST_CASE_P(D3DSharingTests, D3DTests, D3DTypes); TEST(D3DSurfaceTest, givenD3DSurfaceWhenInvalidMemObjectIsPassedToValidateUpdateDataThenInvalidMemObjectErrorIsReturned) { class MockD3DSurface : public D3DSurface { public: MockD3DSurface(Context *context, cl_dx9_surface_info_khr *surfaceInfo, D3DTypesHelper::D3D9::D3DTexture2d *surfaceStaging, cl_uint plane, ImagePlane imagePlane, cl_dx9_media_adapter_type_khr adapterType, bool sharedResource, bool lockable) : D3DSurface(context, surfaceInfo, surfaceStaging, plane, imagePlane, adapterType, sharedResource, lockable) {} }; MockContext context; cl_dx9_surface_info_khr surfaceInfo = {}; ImagePlane imagePlane = ImagePlane::NO_PLANE; std::unique_ptr surface(new MockD3DSurface(&context, &surfaceInfo, nullptr, 0, imagePlane, 0, false, false)); MockBuffer buffer; UpdateData updateData; updateData.memObject = &buffer; auto result = surface->validateUpdateData(updateData); EXPECT_EQ(CL_INVALID_MEM_OBJECT, result); } TEST(D3D9, givenD3D9BuilderAndExtensionEnableTrueWhenGettingExtensionsThenCorrectExtensionsListIsReturned) { auto builderFactory = std::make_unique>(); builderFactory.get()->extensionEnabled = true; EXPECT_THAT(builderFactory->getExtensions(), testing::HasSubstr(std::string("cl_intel_dx9_media_sharing"))); EXPECT_THAT(builderFactory->getExtensions(), testing::HasSubstr(std::string("cl_khr_dx9_media_sharing"))); } TEST(D3D9, givenD3D9BuilderAndExtensionEnableFalseWhenGettingExtensionsThenDx9MediaSheringExtensionsAreNotReturned) { auto builderFactory = std::make_unique>(); builderFactory.get()->extensionEnabled = false; EXPECT_THAT(builderFactory->getExtensions(), testing::Not(testing::HasSubstr(std::string("cl_intel_dx9_media_sharing")))); EXPECT_THAT(builderFactory->getExtensions(), testing::Not(testing::HasSubstr(std::string("cl_khr_dx9_media_sharing")))); } TEST(D3D10, givenD3D10BuilderWhenGettingExtensionsThenCorrectExtensionsListIsReturned) { auto builderFactory = std::make_unique>(); EXPECT_THAT(builderFactory->getExtensions(), testing::HasSubstr(std::string("cl_khr_d3d10_sharing"))); } TEST(D3D11, givenD3D11BuilderWhenGettingExtensionsThenCorrectExtensionsListIsReturned) { auto builderFactory = std::make_unique>(); EXPECT_THAT(builderFactory->getExtensions(), testing::HasSubstr(std::string("cl_khr_d3d11_sharing"))); EXPECT_THAT(builderFactory->getExtensions(), testing::HasSubstr(std::string("cl_intel_d3d11_nv12_media_sharing"))); } TEST(D3DSharingFactory, givenEnabledFormatQueryAndFactoryWithD3DSharingsWhenGettingExtensionFunctionAddressThenFormatQueryFunctionsAreReturned) { DebugManagerStateRestore restorer; DebugManager.flags.EnableFormatQuery.set(true); SharingFactoryMock sharingFactory; auto function = sharingFactory.getExtensionFunctionAddress("clGetSupportedDX9MediaSurfaceFormatsINTEL"); EXPECT_EQ(reinterpret_cast(clGetSupportedDX9MediaSurfaceFormatsINTEL), function); function = sharingFactory.getExtensionFunctionAddress("clGetSupportedD3D10TextureFormatsINTEL"); EXPECT_EQ(reinterpret_cast(clGetSupportedD3D10TextureFormatsINTEL), function); function = sharingFactory.getExtensionFunctionAddress("clGetSupportedD3D11TextureFormatsINTEL"); EXPECT_EQ(reinterpret_cast(clGetSupportedD3D11TextureFormatsINTEL), function); } TEST(D3D9SharingFactory, givenDriverInfoWhenVerifyExtensionSupportThenExtensionEnableIsSetCorrect) { class MockDriverInfo : public DriverInfo { public: bool getMediaSharingSupport() override { return support; }; bool support = true; }; class MockSharingFactory : public SharingFactory { public: MockSharingFactory() { memcpy_s(savedState, sizeof(savedState), sharingContextBuilder, sizeof(sharingContextBuilder)); } ~MockSharingFactory() { memcpy_s(sharingContextBuilder, sizeof(sharingContextBuilder), savedState, sizeof(savedState)); } void prepare() { for (auto &builder : sharingContextBuilder) { builder = nullptr; } d3d9SharingBuilderFactory = std::make_unique>(); sharingContextBuilder[SharingType::D3D9_SHARING] = d3d9SharingBuilderFactory.get(); } using SharingFactory::sharingContextBuilder; std::unique_ptr> d3d9SharingBuilderFactory; decltype(SharingFactory::sharingContextBuilder) savedState; }; auto driverInfo = std::make_unique(); auto mockSharingFactory = std::make_unique(); mockSharingFactory->prepare(); driverInfo->support = true; mockSharingFactory->verifyExtensionSupport(driverInfo.get()); EXPECT_TRUE(mockSharingFactory->d3d9SharingBuilderFactory->extensionEnabled); driverInfo->support = false; mockSharingFactory->verifyExtensionSupport(driverInfo.get()); EXPECT_FALSE(mockSharingFactory->d3d9SharingBuilderFactory->extensionEnabled); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/d3d_sharing/d3d_tests_part2.cpp000066400000000000000000000606511363734646600275370ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/utilities/arrayref.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/api/api.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/source/platform/platform.h" #include "opencl/source/sharings/d3d/cl_d3d_api.h" #include "opencl/source/sharings/d3d/d3d_buffer.h" #include "opencl/source/sharings/d3d/d3d_sharing.h" #include "opencl/source/sharings/d3d/d3d_surface.h" #include "opencl/source/sharings/d3d/d3d_texture.h" #include "opencl/test/unit_test/fixtures/d3d_test_fixture.h" #include "gmock/gmock.h" #include "gtest/gtest.h" namespace NEO { TYPED_TEST_CASE_P(D3DTests); TYPED_TEST_P(D3DTests, givenSharedResourceBufferAndInteropUserSyncEnabledWhenReleaseIsCalledThenDontDoExplicitFinish) { this->context->setInteropUserSyncEnabled(true); this->mockSharingFcns->mockBufferDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; EXPECT_CALL(*this->mockSharingFcns, getBufferDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockBufferDesc)); class MockCmdQ : public MockCommandQueue { public: MockCmdQ(Context *context, ClDevice *device, const cl_queue_properties *properties) : MockCommandQueue(context, device, properties){}; cl_int finish() override { finishCalled++; return CL_SUCCESS; } uint32_t finishCalled = 0; }; auto mockCmdQ = std::unique_ptr(new MockCmdQ(this->context, this->context->getDevice(0), 0)); auto buffer = std::unique_ptr(D3DBuffer::create(this->context, (D3DBufferObj *)&this->dummyD3DBuffer, CL_MEM_READ_WRITE, nullptr)); ASSERT_NE(nullptr, buffer.get()); cl_mem bufferMem = (cl_mem)buffer.get(); auto retVal = this->enqueueAcquireD3DObjectsApi(this->mockSharingFcns, mockCmdQ.get(), 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, mockCmdQ->finishCalled); retVal = this->enqueueReleaseD3DObjectsApi(this->mockSharingFcns, mockCmdQ.get(), 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, mockCmdQ->finishCalled); } TYPED_TEST_P(D3DTests, givenNonSharedResourceBufferAndInteropUserSyncDisabledWhenReleaseIsCalledThenDoExplicitFinishTwice) { this->context->setInteropUserSyncEnabled(false); class MockCmdQ : public MockCommandQueue { public: MockCmdQ(Context *context, ClDevice *device, const cl_queue_properties *properties) : MockCommandQueue(context, device, properties){}; cl_int finish() override { finishCalled++; return CL_SUCCESS; } uint32_t finishCalled = 0; }; auto mockCmdQ = std::unique_ptr(new MockCmdQ(this->context, this->context->getDevice(0), 0)); auto buffer = std::unique_ptr(D3DBuffer::create(this->context, (D3DBufferObj *)&this->dummyD3DBuffer, CL_MEM_READ_WRITE, nullptr)); ASSERT_NE(nullptr, buffer.get()); cl_mem bufferMem = (cl_mem)buffer.get(); auto retVal = this->enqueueAcquireD3DObjectsApi(this->mockSharingFcns, mockCmdQ.get(), 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, mockCmdQ->finishCalled); retVal = this->enqueueReleaseD3DObjectsApi(this->mockSharingFcns, mockCmdQ.get(), 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2u, mockCmdQ->finishCalled); } TYPED_TEST_P(D3DTests, givenSharedResourceBufferAndInteropUserSyncDisabledWhenReleaseIsCalledThenDoExplicitFinishOnce) { this->context->setInteropUserSyncEnabled(false); this->mockSharingFcns->mockBufferDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; EXPECT_CALL(*this->mockSharingFcns, getBufferDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockBufferDesc)); class MockCmdQ : public MockCommandQueue { public: MockCmdQ(Context *context, ClDevice *device, const cl_queue_properties *properties) : MockCommandQueue(context, device, properties){}; cl_int finish() override { finishCalled++; return CL_SUCCESS; } uint32_t finishCalled = 0; }; auto mockCmdQ = std::unique_ptr(new MockCmdQ(this->context, this->context->getDevice(0), 0)); auto buffer = std::unique_ptr(D3DBuffer::create(this->context, (D3DBufferObj *)&this->dummyD3DBuffer, CL_MEM_READ_WRITE, nullptr)); ASSERT_NE(nullptr, buffer.get()); cl_mem bufferMem = (cl_mem)buffer.get(); auto retVal = this->enqueueAcquireD3DObjectsApi(this->mockSharingFcns, mockCmdQ.get(), 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, mockCmdQ->finishCalled); retVal = this->enqueueReleaseD3DObjectsApi(this->mockSharingFcns, mockCmdQ.get(), 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, mockCmdQ->finishCalled); } TYPED_TEST_P(D3DTests, givenNonSharedResourceBufferAndInteropUserSyncEnabledWhenReleaseIsCalledThenDoExplicitFinishOnce) { this->context->setInteropUserSyncEnabled(true); class MockCmdQ : public MockCommandQueue { public: MockCmdQ(Context *context, ClDevice *device, const cl_queue_properties *properties) : MockCommandQueue(context, device, properties){}; cl_int finish() override { finishCalled++; return CL_SUCCESS; } uint32_t finishCalled = 0; }; auto mockCmdQ = std::unique_ptr(new MockCmdQ(this->context, this->context->getDevice(0), 0)); auto buffer = std::unique_ptr(D3DBuffer::create(this->context, (D3DBufferObj *)&this->dummyD3DBuffer, CL_MEM_READ_WRITE, nullptr)); ASSERT_NE(nullptr, buffer.get()); cl_mem bufferMem = (cl_mem)buffer.get(); auto retVal = this->enqueueAcquireD3DObjectsApi(this->mockSharingFcns, mockCmdQ.get(), 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, mockCmdQ->finishCalled); retVal = this->enqueueReleaseD3DObjectsApi(this->mockSharingFcns, mockCmdQ.get(), 1, &bufferMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, mockCmdQ->finishCalled); } TYPED_TEST_P(D3DTests, givenSharedResourceFlagWhenCreate2dTextureThenStagingTextureEqualsPassedTexture) { this->mockSharingFcns->mockTexture2dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; this->mockSharingFcns->mockTexture2dDesc.ArraySize = 4; this->mockSharingFcns->mockTexture2dDesc.MipLevels = 4; ::testing::InSequence is; EXPECT_CALL(*this->mockSharingFcns, getTexture2dDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture2dDesc)); EXPECT_CALL(*this->mockSharingFcns, createTexture2d(_, _, _)) .Times(0); EXPECT_CALL(*this->mockSharingFcns, getSharedHandle((D3DTexture2d *)&this->dummyD3DTexture, _)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, addRef((D3DTexture2d *)&this->dummyD3DTexture)) .Times(1); auto image = std::unique_ptr(D3DTexture::create2d(this->context, (D3DTexture2d *)&this->dummyD3DTexture, CL_MEM_READ_WRITE, 4, nullptr)); ASSERT_NE(nullptr, image.get()); auto d3dTexture = static_cast *>(image->getSharingHandler().get()); ASSERT_NE(nullptr, d3dTexture); EXPECT_TRUE(d3dTexture->isSharedResource()); EXPECT_EQ(&this->dummyD3DTexture, d3dTexture->getResourceStaging()); EXPECT_CALL(*this->mockSharingFcns, release((D3DTexture2d *)&this->dummyD3DTexture)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, release((D3DQuery *)d3dTexture->getQuery())) .Times(1); } TYPED_TEST_P(D3DTests, givenNonSharedResourceFlagWhenCreate2dTextureThenCreateStagingTexture) { this->mockSharingFcns->mockTexture2dDesc.MiscFlags = 0; ::testing::InSequence is; EXPECT_CALL(*this->mockSharingFcns, getTexture2dDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture2dDesc)); EXPECT_CALL(*this->mockSharingFcns, createTexture2d(_, _, _)) .Times(1) .WillOnce(SetArgPointee<0>((D3DTexture2d *)&this->dummyD3DTextureStaging)); EXPECT_CALL(*this->mockSharingFcns, getSharedHandle((D3DTexture2d *)&this->dummyD3DTextureStaging, _)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, addRef((D3DTexture2d *)&this->dummyD3DTexture)) .Times(1); auto image = std::unique_ptr(D3DTexture::create2d(this->context, (D3DTexture2d *)&this->dummyD3DTexture, CL_MEM_READ_WRITE, 1, nullptr)); ASSERT_NE(nullptr, image.get()); auto d3dTexture = static_cast *>(image->getSharingHandler().get()); ASSERT_NE(nullptr, d3dTexture); EXPECT_FALSE(d3dTexture->isSharedResource()); EXPECT_EQ(&this->dummyD3DTextureStaging, d3dTexture->getResourceStaging()); EXPECT_CALL(*this->mockSharingFcns, release((D3DTexture2d *)&this->dummyD3DTextureStaging)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, release((D3DTexture2d *)&this->dummyD3DTexture)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, release((D3DQuery *)d3dTexture->getQuery())) .Times(1); } TYPED_TEST_P(D3DTests, givenSharedResourceFlagWhenCreate3dTextureThenStagingTextureEqualsPassedTexture) { this->mockSharingFcns->mockTexture3dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; this->mockSharingFcns->mockTexture3dDesc.MipLevels = 4; EXPECT_CALL(*this->mockSharingFcns, getTexture3dDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture3dDesc)); EXPECT_CALL(*this->mockSharingFcns, createTexture3d(_, _, _)) .Times(0); EXPECT_CALL(*this->mockSharingFcns, getSharedHandle((D3DTexture2d *)&this->dummyD3DTexture, _)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, addRef((D3DTexture3d *)&this->dummyD3DTexture)) .Times(1); auto image = std::unique_ptr(D3DTexture::create3d(this->context, (D3DTexture3d *)&this->dummyD3DTexture, CL_MEM_READ_WRITE, 0, nullptr)); ASSERT_NE(nullptr, image.get()); auto d3dTexture = static_cast *>(image->getSharingHandler().get()); ASSERT_NE(nullptr, d3dTexture); EXPECT_TRUE(d3dTexture->isSharedResource()); EXPECT_EQ(&this->dummyD3DTexture, d3dTexture->getResourceStaging()); EXPECT_CALL(*this->mockSharingFcns, release((D3DTexture2d *)&this->dummyD3DTexture)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, release((D3DQuery *)d3dTexture->getQuery())) .Times(1); } TYPED_TEST_P(D3DTests, givenNonSharedResourceFlagWhenCreate3dTextureThenCreateStagingTexture) { this->mockSharingFcns->mockTexture3dDesc.MiscFlags = 0; EXPECT_CALL(*this->mockSharingFcns, getTexture3dDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture3dDesc)); EXPECT_CALL(*this->mockSharingFcns, createTexture3d(_, _, _)) .Times(1) .WillOnce(SetArgPointee<0>((D3DTexture3d *)&this->dummyD3DTextureStaging)); EXPECT_CALL(*this->mockSharingFcns, getSharedHandle((D3DTexture2d *)&this->dummyD3DTextureStaging, _)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, addRef((D3DTexture3d *)&this->dummyD3DTexture)) .Times(1); auto image = std::unique_ptr(D3DTexture::create3d(this->context, (D3DTexture3d *)&this->dummyD3DTexture, CL_MEM_READ_WRITE, 1, nullptr)); ASSERT_NE(nullptr, image.get()); auto d3dTexture = static_cast *>(image->getSharingHandler().get()); ASSERT_NE(nullptr, d3dTexture); EXPECT_FALSE(d3dTexture->isSharedResource()); EXPECT_EQ(&this->dummyD3DTextureStaging, d3dTexture->getResourceStaging()); EXPECT_CALL(*this->mockSharingFcns, release((D3DTexture2d *)&this->dummyD3DTextureStaging)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, release((D3DTexture2d *)&this->dummyD3DTexture)) .Times(1); EXPECT_CALL(*this->mockSharingFcns, release((D3DQuery *)d3dTexture->getQuery())) .Times(1); } TYPED_TEST_P(D3DTests, givenD3DDeviceParamWhenContextCreationThenSetProperValues) { cl_device_id deviceID = this->context->getDevice(0); cl_platform_id pid[1] = {this->pPlatform}; auto param = this->pickParam(CL_CONTEXT_D3D10_DEVICE_KHR, CL_CONTEXT_D3D11_DEVICE_KHR); cl_context_properties validProperties[5] = {CL_CONTEXT_PLATFORM, (cl_context_properties)pid[0], param, 0, 0}; cl_int retVal = CL_SUCCESS; auto ctx = std::unique_ptr(Context::create(validProperties, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, ctx.get()); EXPECT_EQ(1u, ctx->preferD3dSharedResources); EXPECT_NE(nullptr, ctx->getSharing>()); } TYPED_TEST_P(D3DTests, givenSharedNtHandleFlagWhenCreate2dTextureThenGetNtHandle) { this->mockSharingFcns->mockTexture2dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED_NTHANDLE; EXPECT_CALL(*this->mockSharingFcns, getTexture2dDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture2dDesc)); EXPECT_CALL(*this->mockSharingFcns, createTexture2d(_, _, _)) .Times(1) .WillOnce(SetArgPointee<0>((D3DTexture2d *)&this->dummyD3DTextureStaging)); EXPECT_CALL(*this->mockSharingFcns, getSharedHandle(_, _)) .Times(0); EXPECT_CALL(*this->mockSharingFcns, getSharedNTHandle(_, _)) .Times(1); auto image = std::unique_ptr(D3DTexture::create2d(this->context, (D3DTexture2d *)&this->dummyD3DTexture, CL_MEM_READ_WRITE, 1, nullptr)); ASSERT_NE(nullptr, image.get()); auto d3dTexture = static_cast *>(image->getSharingHandler().get()); ASSERT_NE(nullptr, d3dTexture); } TYPED_TEST_P(D3DTests, givenSharedNtHandleFlagWhenCreate3dTextureThenGetNtHandle) { this->mockSharingFcns->mockTexture3dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED_NTHANDLE; EXPECT_CALL(*this->mockSharingFcns, getTexture3dDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture3dDesc)); EXPECT_CALL(*this->mockSharingFcns, createTexture3d(_, _, _)) .Times(1) .WillOnce(SetArgPointee<0>((D3DTexture3d *)&this->dummyD3DTextureStaging)); EXPECT_CALL(*this->mockSharingFcns, getSharedHandle(_, _)) .Times(0); EXPECT_CALL(*this->mockSharingFcns, getSharedNTHandle(_, _)) .Times(1); auto image = std::unique_ptr(D3DTexture::create3d(this->context, (D3DTexture3d *)&this->dummyD3DTexture, CL_MEM_READ_WRITE, 1, nullptr)); ASSERT_NE(nullptr, image.get()); auto d3dTexture = static_cast *>(image->getSharingHandler().get()); ASSERT_NE(nullptr, d3dTexture); } TYPED_TEST_P(D3DTests, WhenFillingBufferDescThenBufferContentIsCorrect) { D3DBufferDesc requestedDesc = {}; D3DBufferDesc expectedDesc = {}; expectedDesc.ByteWidth = 10; expectedDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; this->mockSharingFcns->fillCreateBufferDesc(requestedDesc, 10); EXPECT_TRUE(memcmp(&requestedDesc, &expectedDesc, sizeof(D3DBufferDesc)) == 0); } TYPED_TEST_P(D3DTests, WhenFillingTexture2dDescThenImageContentIsCorrect) { D3DTexture2dDesc requestedDesc = {}; D3DTexture2dDesc expectedDesc = {}; D3DTexture2dDesc srcDesc = {}; cl_uint subresource = 4; srcDesc.Width = 10; srcDesc.Height = 20; srcDesc.MipLevels = 9; srcDesc.ArraySize = 5; srcDesc.Format = DXGI_FORMAT::DXGI_FORMAT_A8_UNORM; srcDesc.SampleDesc = {8, 9}; srcDesc.BindFlags = 123; srcDesc.CPUAccessFlags = 456; srcDesc.MiscFlags = 789; expectedDesc.Width = srcDesc.Width; expectedDesc.Height = srcDesc.Height; expectedDesc.MipLevels = 1; expectedDesc.ArraySize = 1; expectedDesc.Format = srcDesc.Format; expectedDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; expectedDesc.SampleDesc = srcDesc.SampleDesc; for (uint32_t i = 0u; i < (subresource % srcDesc.MipLevels); i++) { expectedDesc.Width /= 2; expectedDesc.Height /= 2; } this->mockSharingFcns->fillCreateTexture2dDesc(requestedDesc, &srcDesc, subresource); EXPECT_TRUE(memcmp(&requestedDesc, &expectedDesc, sizeof(D3DTexture2dDesc)) == 0); } TYPED_TEST_P(D3DTests, WhenFillingTexture3dDescThenImageContentIsCorrect) { D3DTexture3dDesc requestedDesc = {}; D3DTexture3dDesc expectedDesc = {}; D3DTexture3dDesc srcDesc = {}; cl_uint subresource = 4; srcDesc.Width = 10; srcDesc.Height = 20; srcDesc.Depth = 30; srcDesc.MipLevels = 9; srcDesc.Format = DXGI_FORMAT::DXGI_FORMAT_A8_UNORM; srcDesc.BindFlags = 123; srcDesc.CPUAccessFlags = 456; srcDesc.MiscFlags = 789; expectedDesc.Width = srcDesc.Width; expectedDesc.Height = srcDesc.Height; expectedDesc.Depth = srcDesc.Depth; expectedDesc.MipLevels = 1; expectedDesc.Format = srcDesc.Format; expectedDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; for (uint32_t i = 0u; i < (subresource % srcDesc.MipLevels); i++) { expectedDesc.Width /= 2; expectedDesc.Height /= 2; expectedDesc.Depth /= 2; } this->mockSharingFcns->fillCreateTexture3dDesc(requestedDesc, &srcDesc, subresource); EXPECT_TRUE(memcmp(&requestedDesc, &expectedDesc, sizeof(D3DTexture3dDesc)) == 0); } TYPED_TEST_P(D3DTests, givenPlaneWhenFindYuvSurfaceCalledThenReturnValidImgFormat) { const ClSurfaceFormatInfo *surfaceFormat; DXGI_FORMAT testFormat[] = {DXGI_FORMAT::DXGI_FORMAT_NV12, DXGI_FORMAT::DXGI_FORMAT_P010, DXGI_FORMAT::DXGI_FORMAT_P016}; int channelDataType[] = {CL_UNORM_INT8, CL_UNORM_INT16, CL_UNORM_INT16}; for (int n = 0; n < 3; n++) { surfaceFormat = D3DTexture::findYuvSurfaceFormatInfo(testFormat[n], ImagePlane::NO_PLANE, CL_MEM_READ_WRITE); EXPECT_TRUE(surfaceFormat->OCLImageFormat.image_channel_order == CL_RG); EXPECT_TRUE(surfaceFormat->OCLImageFormat.image_channel_data_type == channelDataType[n]); surfaceFormat = D3DTexture::findYuvSurfaceFormatInfo(testFormat[n], ImagePlane::PLANE_U, CL_MEM_READ_WRITE); EXPECT_TRUE(surfaceFormat->OCLImageFormat.image_channel_order == CL_RG); EXPECT_TRUE(surfaceFormat->OCLImageFormat.image_channel_data_type == channelDataType[n]); surfaceFormat = D3DTexture::findYuvSurfaceFormatInfo(testFormat[n], ImagePlane::PLANE_UV, CL_MEM_READ_WRITE); EXPECT_TRUE(surfaceFormat->OCLImageFormat.image_channel_order == CL_RG); EXPECT_TRUE(surfaceFormat->OCLImageFormat.image_channel_data_type == channelDataType[n]); surfaceFormat = D3DTexture::findYuvSurfaceFormatInfo(testFormat[n], ImagePlane::PLANE_V, CL_MEM_READ_WRITE); EXPECT_TRUE(surfaceFormat->OCLImageFormat.image_channel_order == CL_RG); EXPECT_TRUE(surfaceFormat->OCLImageFormat.image_channel_data_type == channelDataType[n]); surfaceFormat = D3DTexture::findYuvSurfaceFormatInfo(testFormat[n], ImagePlane::PLANE_Y, CL_MEM_READ_WRITE); EXPECT_TRUE(surfaceFormat->OCLImageFormat.image_channel_order == CL_R); EXPECT_TRUE(surfaceFormat->OCLImageFormat.image_channel_data_type == channelDataType[n]); } } TYPED_TEST_P(D3DTests, GivenForced32BitAddressingWhenCreatingBufferThenBufferHas32BitAllocation) { auto buffer = std::unique_ptr(D3DBuffer::create(this->context, (D3DBufferObj *)&this->dummyD3DBuffer, CL_MEM_READ_WRITE, nullptr)); ASSERT_NE(nullptr, buffer.get()); auto *allocation = buffer->getGraphicsAllocation(); EXPECT_NE(nullptr, allocation); EXPECT_TRUE(allocation->is32BitAllocation()); } TYPED_TEST_P(D3DTests, givenD3DTexture2dWhenOclImageIsCreatedThenSharedImageAllocationTypeIsSet) { this->mockSharingFcns->mockTexture2dDesc.Format = DXGI_FORMAT_P016; EXPECT_CALL(*this->mockSharingFcns, getTexture2dDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture2dDesc)); auto image = std::unique_ptr(D3DTexture::create2d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 7, nullptr)); ASSERT_NE(nullptr, image.get()); ASSERT_NE(nullptr, image->getGraphicsAllocation()); EXPECT_EQ(GraphicsAllocation::AllocationType::SHARED_IMAGE, image->getGraphicsAllocation()->getAllocationType()); } TYPED_TEST_P(D3DTests, givenD3DTexture3dWhenOclImageIsCreatedThenSharedImageAllocationTypeIsSet) { this->mockSharingFcns->mockTexture3dDesc.MiscFlags = D3DResourceFlags::MISC_SHARED; EXPECT_CALL(*this->mockSharingFcns, getTexture3dDesc(_, _)) .Times(1) .WillOnce(SetArgPointee<0>(this->mockSharingFcns->mockTexture3dDesc)); EXPECT_CALL(*this->mockSharingFcns, createTexture3d(_, _, _)) .Times(1) .WillOnce(SetArgPointee<0>(reinterpret_cast(&this->dummyD3DTextureStaging))); auto image = std::unique_ptr(D3DTexture::create3d(this->context, reinterpret_cast(&this->dummyD3DTexture), CL_MEM_READ_WRITE, 1, nullptr)); ASSERT_NE(nullptr, image.get()); ASSERT_NE(nullptr, image->getGraphicsAllocation()); EXPECT_EQ(GraphicsAllocation::AllocationType::SHARED_IMAGE, image->getGraphicsAllocation()->getAllocationType()); } REGISTER_TYPED_TEST_CASE_P(D3DTests, givenSharedResourceBufferAndInteropUserSyncEnabledWhenReleaseIsCalledThenDontDoExplicitFinish, givenNonSharedResourceBufferAndInteropUserSyncDisabledWhenReleaseIsCalledThenDoExplicitFinishTwice, givenSharedResourceBufferAndInteropUserSyncDisabledWhenReleaseIsCalledThenDoExplicitFinishOnce, givenNonSharedResourceBufferAndInteropUserSyncEnabledWhenReleaseIsCalledThenDoExplicitFinishOnce, givenSharedResourceFlagWhenCreate2dTextureThenStagingTextureEqualsPassedTexture, givenNonSharedResourceFlagWhenCreate2dTextureThenCreateStagingTexture, givenSharedResourceFlagWhenCreate3dTextureThenStagingTextureEqualsPassedTexture, givenNonSharedResourceFlagWhenCreate3dTextureThenCreateStagingTexture, givenD3DDeviceParamWhenContextCreationThenSetProperValues, givenSharedNtHandleFlagWhenCreate2dTextureThenGetNtHandle, givenSharedNtHandleFlagWhenCreate3dTextureThenGetNtHandle, WhenFillingBufferDescThenBufferContentIsCorrect, WhenFillingTexture2dDescThenImageContentIsCorrect, WhenFillingTexture3dDescThenImageContentIsCorrect, givenPlaneWhenFindYuvSurfaceCalledThenReturnValidImgFormat, GivenForced32BitAddressingWhenCreatingBufferThenBufferHas32BitAllocation, givenD3DTexture2dWhenOclImageIsCreatedThenSharedImageAllocationTypeIsSet, givenD3DTexture3dWhenOclImageIsCreatedThenSharedImageAllocationTypeIsSet); INSTANTIATE_TYPED_TEST_CASE_P(D3DSharingTests, D3DTests, D3DTypes); using D3D10Test = D3DTests; TEST_F(D3D10Test, givenIncompatibleD3DAdapterWhenGettingDeviceIdsThenNoDevicesAreReturned) { cl_device_id deviceID; cl_uint numDevices = 15; auto clAdapterId = context->getDevice(0)->getHardwareInfo().platform.usDeviceID; auto d3dAdapterId = clAdapterId + 1; mockSharingFcns->mockDxgiDesc.DeviceId = d3dAdapterId; EXPECT_NE(clAdapterId, d3dAdapterId); auto retVal = clGetDeviceIDsFromD3D10KHR(pPlatform, CL_D3D10_DEVICE_KHR, nullptr, CL_ALL_DEVICES_FOR_D3D10_KHR, 1, &deviceID, &numDevices); EXPECT_EQ(CL_DEVICE_NOT_FOUND, retVal); EXPECT_EQ(0, numDevices); } using D3D11Test = D3DTests; TEST_F(D3D11Test, givenIncompatibleD3DAdapterWhenGettingDeviceIdsThenNoDevicesAreReturned) { cl_device_id deviceID; cl_uint numDevices = 15; auto clAdapterId = context->getDevice(0)->getHardwareInfo().platform.usDeviceID; auto d3dAdapterId = clAdapterId + 1; mockSharingFcns->mockDxgiDesc.DeviceId = d3dAdapterId; EXPECT_NE(clAdapterId, d3dAdapterId); auto retVal = clGetDeviceIDsFromD3D11KHR(pPlatform, CL_D3D11_DEVICE_KHR, nullptr, CL_ALL_DEVICES_FOR_D3D11_KHR, 1, &deviceID, &numDevices); EXPECT_EQ(CL_DEVICE_NOT_FOUND, retVal); EXPECT_EQ(0, numDevices); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/device/000077500000000000000000000000001363734646600230715ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/device/CMakeLists.txt000066400000000000000000000013751363734646600256370ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_device ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_caps_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/device_get_engine_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device_timers_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/get_device_info_size_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/get_device_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sub_device_tests.cpp ) if(WIN32) list(APPEND IGDRCL_SRCS_tests_device ${CMAKE_CURRENT_SOURCE_DIR}/device_win_timers_tests.cpp ) endif() target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_device}) add_subdirectories() compute-runtime-20.13.16352/opencl/test/unit_test/device/device_caps_tests.cpp000066400000000000000000001616151363734646600272760ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/os_interface/driver_info.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/test/unit_test/helpers/hw_helper_tests.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/mocks/mock_builtins.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "driver_version.h" #include "gtest/gtest.h" #include namespace NEO { extern const char *familyName[]; namespace MockSipData { extern SipKernelType calledType; extern bool called; } // namespace MockSipData } // namespace NEO using namespace NEO; struct DeviceGetCapsTest : public ::testing::Test { void SetUp() override { MockSipData::calledType = SipKernelType::COUNT; MockSipData::called = false; } void TearDown() override { MockSipData::calledType = SipKernelType::COUNT; MockSipData::called = false; } }; TEST_F(DeviceGetCapsTest, WhenCreatingDeviceThenCapsArePopulatedCorrectly) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); const auto &sharedCaps = device->getSharedDeviceInfo(); const auto &sysInfo = defaultHwInfo->gtSystemInfo; auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); EXPECT_NE(nullptr, caps.builtInKernels); std::string strDriverName = caps.name; std::string strFamilyName = familyName[device->getRenderCoreFamily()]; EXPECT_NE(std::string::npos, strDriverName.find(strFamilyName)); EXPECT_NE(nullptr, caps.name); EXPECT_NE(nullptr, caps.vendor); EXPECT_NE(nullptr, caps.driverVersion); EXPECT_NE(nullptr, caps.profile); EXPECT_NE(nullptr, caps.clVersion); EXPECT_NE(nullptr, caps.clCVersion); EXPECT_NE(nullptr, caps.spirVersions); EXPECT_NE(nullptr, caps.deviceExtensions); EXPECT_EQ(static_cast(CL_TRUE), caps.deviceAvailable); EXPECT_EQ(static_cast(CL_TRUE), caps.compilerAvailable); EXPECT_EQ(16u, caps.preferredVectorWidthChar); EXPECT_EQ(8u, caps.preferredVectorWidthShort); EXPECT_EQ(4u, caps.preferredVectorWidthInt); EXPECT_EQ(1u, caps.preferredVectorWidthLong); EXPECT_EQ(1u, caps.preferredVectorWidthFloat); EXPECT_EQ(1u, caps.preferredVectorWidthDouble); EXPECT_EQ(8u, caps.preferredVectorWidthHalf); EXPECT_EQ(16u, caps.nativeVectorWidthChar); EXPECT_EQ(8u, caps.nativeVectorWidthShort); EXPECT_EQ(4u, caps.nativeVectorWidthInt); EXPECT_EQ(1u, caps.nativeVectorWidthLong); EXPECT_EQ(1u, caps.nativeVectorWidthFloat); EXPECT_EQ(1u, caps.nativeVectorWidthDouble); EXPECT_EQ(8u, caps.nativeVectorWidthHalf); EXPECT_EQ(1u, caps.linkerAvailable); EXPECT_NE(0u, sharedCaps.globalMemCachelineSize); EXPECT_NE(0u, caps.globalMemCacheSize); EXPECT_LT(0u, sharedCaps.globalMemSize); EXPECT_EQ(sharedCaps.maxMemAllocSize, caps.maxConstantBufferSize); EXPECT_NE(nullptr, sharedCaps.ilVersion); EXPECT_EQ(static_cast(CL_TRUE), caps.deviceAvailable); EXPECT_EQ(static_cast(CL_READ_WRITE_CACHE), caps.globalMemCacheType); EXPECT_EQ(sysInfo.EUCount, caps.maxComputUnits); EXPECT_LT(0u, caps.maxConstantArgs); EXPECT_LE(128u, sharedCaps.maxReadImageArgs); EXPECT_LE(128u, sharedCaps.maxWriteImageArgs); EXPECT_EQ(128u, caps.maxReadWriteImageArgs); EXPECT_LE(sharedCaps.maxReadImageArgs * sizeof(cl_mem), sharedCaps.maxParameterSize); EXPECT_LE(sharedCaps.maxWriteImageArgs * sizeof(cl_mem), sharedCaps.maxParameterSize); EXPECT_LE(128u * MB, sharedCaps.maxMemAllocSize); EXPECT_GE((4 * GB) - (8 * KB), sharedCaps.maxMemAllocSize); EXPECT_LE(65536u, sharedCaps.imageMaxBufferSize); EXPECT_GT(sharedCaps.maxWorkGroupSize, 0u); EXPECT_EQ(sharedCaps.maxWorkItemSizes[0], sharedCaps.maxWorkGroupSize); EXPECT_EQ(sharedCaps.maxWorkItemSizes[1], sharedCaps.maxWorkGroupSize); EXPECT_EQ(sharedCaps.maxWorkItemSizes[2], sharedCaps.maxWorkGroupSize); EXPECT_LT(0u, sharedCaps.maxSamplers); // Minimum requirements for OpenCL 1.x EXPECT_EQ(static_cast(CL_FP_ROUND_TO_NEAREST), CL_FP_ROUND_TO_NEAREST & caps.singleFpConfig); EXPECT_EQ(static_cast(CL_FP_INF_NAN), CL_FP_INF_NAN & caps.singleFpConfig); cl_device_fp_config singleFpConfig = CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_FMA | CL_FP_DENORM; EXPECT_EQ(singleFpConfig, caps.singleFpConfig & singleFpConfig); EXPECT_EQ(static_cast(CL_EXEC_KERNEL), CL_EXEC_KERNEL & caps.executionCapabilities); EXPECT_EQ(static_cast(CL_QUEUE_PROFILING_ENABLE), CL_QUEUE_PROFILING_ENABLE & caps.queueOnHostProperties); EXPECT_EQ(static_cast(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE), CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE & caps.queueOnHostProperties); EXPECT_LT(128u, caps.memBaseAddressAlign); EXPECT_LT(0u, caps.minDataTypeAlignSize); EXPECT_EQ(1u, caps.endianLittle); auto expectedDeviceSubgroups = hwHelper.getDeviceSubGroupSizes(); EXPECT_EQ(expectedDeviceSubgroups.size(), sharedCaps.maxSubGroups.size()); for (uint32_t i = 0; i < expectedDeviceSubgroups.size(); i++) { EXPECT_EQ(expectedDeviceSubgroups[i], sharedCaps.maxSubGroups[i]); } if (device->getEnabledClVersion() >= 21) { EXPECT_TRUE(caps.independentForwardProgress != 0); } else { EXPECT_FALSE(caps.independentForwardProgress != 0); } EXPECT_EQ(sharedCaps.maxWorkGroupSize / hwHelper.getMinimalSIMDSize(), caps.maxNumOfSubGroups); EXPECT_EQ(1024u, caps.maxOnDeviceEvents); EXPECT_EQ(1u, sharedCaps.maxOnDeviceQueues); EXPECT_EQ(64u * MB, caps.queueOnDeviceMaxSize); EXPECT_EQ(128 * KB, caps.queueOnDevicePreferredSize); EXPECT_EQ(static_cast(CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE), caps.queueOnDeviceProperties); EXPECT_EQ(64u, caps.preferredGlobalAtomicAlignment); EXPECT_EQ(64u, caps.preferredLocalAtomicAlignment); EXPECT_EQ(64u, caps.preferredPlatformAtomicAlignment); EXPECT_EQ(static_cast(device->getHardwareInfo().capabilityTable.supportsImages), sharedCaps.imageSupport); EXPECT_EQ(16384u, sharedCaps.image2DMaxWidth); EXPECT_EQ(16384u, sharedCaps.image2DMaxHeight); EXPECT_EQ(2048u, sharedCaps.imageMaxArraySize); if (device->getHardwareInfo().capabilityTable.clVersionSupport == 12 && is64bit) { EXPECT_TRUE(sharedCaps.force32BitAddressess); } } HWTEST_F(DeviceGetCapsTest, givenDeviceWhenAskingForSubGroupSizesThenReturnCorrectValues) { auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); auto &hwHelper = HwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily); auto deviceSubgroups = hwHelper.getDeviceSubGroupSizes(); EXPECT_EQ(3u, deviceSubgroups.size()); EXPECT_EQ(8u, deviceSubgroups[0]); EXPECT_EQ(16u, deviceSubgroups[1]); EXPECT_EQ(32u, deviceSubgroups[2]); } TEST_F(DeviceGetCapsTest, GivenPlatformWhenGettingHwInfoThenImage3dDimensionsAreCorrect) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); const auto &sharedCaps = device->getSharedDeviceInfo(); if (device->getHardwareInfo().platform.eRenderCoreFamily > IGFX_GEN8_CORE && device->getHardwareInfo().platform.eRenderCoreFamily != IGFX_GEN12LP_CORE) { EXPECT_EQ(16384u, caps.image3DMaxWidth); EXPECT_EQ(16384u, caps.image3DMaxHeight); } else { EXPECT_EQ(2048u, caps.image3DMaxWidth); EXPECT_EQ(2048u, caps.image3DMaxHeight); } EXPECT_EQ(2048u, sharedCaps.image3DMaxDepth); } TEST_F(DeviceGetCapsTest, givenDontForcePreemptionModeDebugVariableWhenCreateDeviceThenSetDefaultHwPreemptionMode) { DebugManagerStateRestore dbgRestorer; { DebugManager.flags.ForcePreemptionMode.set(-1); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); EXPECT_TRUE(device->getHardwareInfo().capabilityTable.defaultPreemptionMode == device->getPreemptionMode()); } } TEST_F(DeviceGetCapsTest, givenForcePreemptionModeDebugVariableWhenCreateDeviceThenSetForcedMode) { DebugManagerStateRestore dbgRestorer; { PreemptionMode forceMode = PreemptionMode::MidThread; if (defaultHwInfo->capabilityTable.defaultPreemptionMode == forceMode) { // force non-default mode forceMode = PreemptionMode::ThreadGroup; } DebugManager.flags.ForcePreemptionMode.set((int32_t)forceMode); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); EXPECT_TRUE(forceMode == device->getPreemptionMode()); } } TEST_F(DeviceGetCapsTest, givenDeviceWithMidThreadPreemptionWhenDeviceIsCreatedThenSipKernelIsNotCreated) { DebugManagerStateRestore dbgRestorer; { auto builtIns = new MockBuiltins(); ASSERT_FALSE(MockSipData::called); DebugManager.flags.ForcePreemptionMode.set((int32_t)PreemptionMode::MidThread); auto executionEnvironment = new ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0u]->builtins.reset(builtIns); auto device = std::unique_ptr(MockDevice::createWithExecutionEnvironment(defaultHwInfo.get(), executionEnvironment, 0u)); ASSERT_EQ(builtIns, device->getBuiltIns()); EXPECT_FALSE(MockSipData::called); } } TEST_F(DeviceGetCapsTest, givenForceOclVersion21WhenCapsAreCreatedThenDeviceReportsOpenCL21) { DebugManagerStateRestore dbgRestorer; { DebugManager.flags.ForceOCLVersion.set(21); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_STREQ("OpenCL 2.1 NEO ", caps.clVersion); EXPECT_STREQ("OpenCL C 2.0 ", caps.clCVersion); DebugManager.flags.ForceOCLVersion.set(0); } } TEST_F(DeviceGetCapsTest, givenForceOclVersion20WhenCapsAreCreatedThenDeviceReportsOpenCL20) { DebugManagerStateRestore dbgRestorer; { DebugManager.flags.ForceOCLVersion.set(20); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_STREQ("OpenCL 2.0 NEO ", caps.clVersion); EXPECT_STREQ("OpenCL C 2.0 ", caps.clCVersion); DebugManager.flags.ForceOCLVersion.set(0); } } TEST_F(DeviceGetCapsTest, givenForceOclVersion12WhenCapsAreCreatedThenDeviceReportsOpenCL12) { DebugManagerStateRestore dbgRestorer; { DebugManager.flags.ForceOCLVersion.set(12); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_STREQ("OpenCL 1.2 NEO ", caps.clVersion); EXPECT_STREQ("OpenCL C 1.2 ", caps.clCVersion); DebugManager.flags.ForceOCLVersion.set(0); } } TEST_F(DeviceGetCapsTest, givenForceInvalidOclVersionWhenCapsAreCreatedThenDeviceWillDefaultToOpenCL12) { DebugManagerStateRestore dbgRestorer; { DebugManager.flags.ForceOCLVersion.set(1); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_STREQ("OpenCL 1.2 NEO ", caps.clVersion); EXPECT_STREQ("OpenCL C 1.2 ", caps.clCVersion); DebugManager.flags.ForceOCLVersion.set(0); } } TEST_F(DeviceGetCapsTest, givenForce32bitAddressingWhenCapsAreCreatedThenDeviceReports32bitAddressingOptimization) { DebugManagerStateRestore dbgRestorer; { DebugManager.flags.Force32bitAddressing.set(true); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); const auto &sharedCaps = device->getSharedDeviceInfo(); if (is64bit) { EXPECT_TRUE(sharedCaps.force32BitAddressess); } else { EXPECT_FALSE(sharedCaps.force32BitAddressess); } auto expectedSize = (cl_ulong)(4 * 0.8 * GB); EXPECT_LE(sharedCaps.globalMemSize, expectedSize); EXPECT_LE(sharedCaps.maxMemAllocSize, expectedSize); EXPECT_LE(caps.maxConstantBufferSize, expectedSize); EXPECT_EQ(sharedCaps.addressBits, 32u); } } TEST_F(DeviceGetCapsTest, WhenDeviceIsCreatedThenGlobalMemSizeIsAlignedDownToPageSize) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &sharedCaps = device->getSharedDeviceInfo(); auto expectedSize = alignDown(sharedCaps.globalMemSize, MemoryConstants::pageSize); EXPECT_EQ(sharedCaps.globalMemSize, expectedSize); } TEST_F(DeviceGetCapsTest, Given32bitAddressingWhenDeviceIsCreatedThenGlobalMemSizeIsAlignedDownToPageSize) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &sharedCaps = device->getSharedDeviceInfo(); auto pMemManager = device->getMemoryManager(); unsigned int enabledCLVer = device->getEnabledClVersion(); bool addressing32Bit = is32bit || (is64bit && (enabledCLVer < 20)) || DebugManager.flags.Force32bitAddressing.get(); cl_ulong sharedMem = (cl_ulong)pMemManager->getSystemSharedMemory(0u); cl_ulong maxAppAddrSpace = (cl_ulong)pMemManager->getMaxApplicationAddress() + 1ULL; cl_ulong memSize = std::min(sharedMem, maxAppAddrSpace); memSize = (cl_ulong)((double)memSize * 0.8); if (addressing32Bit) { memSize = std::min(memSize, (uint64_t)(4 * GB * 0.8)); } cl_ulong expectedSize = alignDown(memSize, MemoryConstants::pageSize); EXPECT_EQ(sharedCaps.globalMemSize, expectedSize); } TEST_F(DeviceGetCapsTest, givenDeviceCapsWhenLocalMemoryIsEnabledThenCalculateGlobalMemSizeBasedOnLocalMemory) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableLocalMemory.set(true); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &sharedCaps = device->getSharedDeviceInfo(); auto pMemManager = device->getMemoryManager(); auto enabledCLVer = device->getEnabledClVersion(); bool addressing32Bit = is32bit || (is64bit && (enabledCLVer < 20)) || DebugManager.flags.Force32bitAddressing.get(); auto localMem = pMemManager->getLocalMemorySize(0u); auto maxAppAddrSpace = pMemManager->getMaxApplicationAddress() + 1; auto memSize = std::min(localMem, maxAppAddrSpace); memSize = static_cast(memSize * 0.8); if (addressing32Bit) { memSize = std::min(memSize, static_cast(4 * GB * 0.8)); } cl_ulong expectedSize = alignDown(memSize, MemoryConstants::pageSize); EXPECT_EQ(sharedCaps.globalMemSize, expectedSize); } TEST_F(DeviceGetCapsTest, givenGlobalMemSizeWhenCalculatingMaxAllocSizeThenAdjustToHWCap) { auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); HardwareCapabilities hwCaps = {0}; auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); hwHelper.setupHardwareCapabilities(&hwCaps, *defaultHwInfo); uint64_t expectedSize = std::max((caps.globalMemSize / 2), static_cast(128ULL * MemoryConstants::megaByte)); expectedSize = std::min(expectedSize, hwCaps.maxMemAllocSize); EXPECT_EQ(caps.maxMemAllocSize, expectedSize); } TEST_F(DeviceGetCapsTest, WhenDeviceIsCreatedThenExtensionsStringEndsWithSpace) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); auto len = strlen(caps.deviceExtensions); ASSERT_LT(0U, len); EXPECT_EQ(' ', caps.deviceExtensions[len - 1]); } TEST_F(DeviceGetCapsTest, givenEnableSharingFormatQuerySetTrueAndDisabledMultipleSubDevicesWhenDeviceCapsAreCreatedThenSharingFormatQueryIsReported) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableFormatQuery.set(true); DebugManager.flags.CreateMultipleSubDevices.set(0); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_intel_sharing_format_query "))); } TEST_F(DeviceGetCapsTest, givenEnableSharingFormatQuerySetTrueAndEnabledMultipleSubDevicesWhenDeviceCapsAreCreatedThenSharingFormatQueryIsNotReported) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableFormatQuery.set(true); DebugManager.flags.CreateMultipleSubDevices.set(2); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, ::testing::Not(::testing::HasSubstr(std::string("cl_intel_sharing_format_query ")))); } TEST_F(DeviceGetCapsTest, givenOpenCLVersion21WhenCapsAreCreatedThenDeviceReportsClKhrSubgroupsExtension) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(21); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_khr_subgroups"))); } TEST_F(DeviceGetCapsTest, givenOpenCLVersion20WhenCapsAreCreatedThenDeviceDoesntReportClKhrSubgroupsExtension) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(20); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr(std::string("cl_khr_subgroups")))); } TEST_F(DeviceGetCapsTest, givenOpenCLVersion21WhenCapsAreCreatedThenDeviceReportsClKhrIlProgramExtension) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(21); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_khr_il_program"))); } TEST_F(DeviceGetCapsTest, givenOpenCLVersion20WhenCapsAreCreatedThenDeviceDoesntReportClKhrIlProgramExtension) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(20); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr(std::string("cl_khr_il_program")))); } TEST_F(DeviceGetCapsTest, givenOpenCLVersion21WhenCapsAreCreatedThenDeviceReportsClIntelSpirvExtensions) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(21); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); const HardwareInfo *hwInfo = defaultHwInfo.get(); if (hwInfo->capabilityTable.supportsVme) { EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_intel_spirv_device_side_avc_motion_estimation"))); } else { EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr(std::string("cl_intel_spirv_device_side_avc_motion_estimation")))); } if (hwInfo->capabilityTable.supportsImages) { EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_intel_spirv_media_block_io"))); EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_khr_3d_image_writes"))); } else { EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr(std::string("cl_intel_spirv_media_block_io")))); EXPECT_THAT(caps.deviceExtensions, testing::Not(std::string("cl_khr_3d_image_writes"))); } EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_intel_spirv_subgroups"))); EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_khr_spirv_no_integer_wrap_decoration"))); } TEST_F(DeviceGetCapsTest, givenSupportImagesWhenCapsAreCreatedThenDeviceReportsClIntelSpirvMediaBlockIoExtensions) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(21); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsImages = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_intel_spirv_media_block_io"))); } TEST_F(DeviceGetCapsTest, givenNotSupportImagesWhenCapsAreCreatedThenDeviceNotReportsClIntelSpirvMediaBlockIoExtensions) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(21); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsImages = false; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr(std::string("cl_intel_spirv_media_block_io")))); } TEST_F(DeviceGetCapsTest, givenSupportImagesWhenCapsAreCreatedThenDeviceReportsClKhr3dImageWritesExtensions) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsImages = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_khr_3d_image_writes"))); } TEST_F(DeviceGetCapsTest, givenNotSupportImagesWhenCapsAreCreatedThenDeviceNotReportsClKhr3dImageWritesExtensions) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsImages = false; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr(std::string("cl_khr_3d_image_writes")))); } TEST_F(DeviceGetCapsTest, givenOpenCLVersion12WhenCapsAreCreatedThenDeviceDoesntReportClIntelSpirvExtensions) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(12); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr(std::string("cl_intel_spirv_device_side_avc_motion_estimation")))); EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr(std::string("cl_intel_spirv_subgroups")))); EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr(std::string("cl_khr_spirv_no_integer_wrap_decoration")))); } TEST_F(DeviceGetCapsTest, givenEnableNV12setToTrueAndSupportImagesWhenCapsAreCreatedThenDeviceReportsNV12Extension) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableNV12.set(true); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); if (device->getHardwareInfo().capabilityTable.supportsImages) { EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_intel_planar_yuv"))); EXPECT_TRUE(caps.nv12Extension); } else { EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr(std::string("cl_intel_planar_yuv")))); } } TEST_F(DeviceGetCapsTest, givenEnablePackedYuvsetToTrueAndSupportImagesWhenCapsAreCreatedThenDeviceReportsPackedYuvExtension) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnablePackedYuv.set(true); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); if (device->getHardwareInfo().capabilityTable.supportsImages) { EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_intel_packed_yuv"))); EXPECT_TRUE(caps.packedYuvExtension); } else { EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr(std::string("cl_intel_packed_yuv")))); } } TEST_F(DeviceGetCapsTest, givenSupportImagesWhenCapsAreCreatedThenDeviceReportsPackedYuvAndNV12Extensions) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnablePackedYuv.set(true); DebugManager.flags.EnableNV12.set(true); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsImages = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_intel_packed_yuv"))); EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_intel_planar_yuv"))); } TEST_F(DeviceGetCapsTest, givenNotSupportImagesWhenCapsAreCreatedThenDeviceNotReportsPackedYuvAndNV12Extensions) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnablePackedYuv.set(true); DebugManager.flags.EnableNV12.set(true); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsImages = false; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr(std::string("cl_intel_packed_yuv")))); EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr(std::string("cl_intel_planar_yuv")))); } TEST_F(DeviceGetCapsTest, givenEnableNV12setToFalseWhenCapsAreCreatedThenDeviceDoesNotReportNV12Extension) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableNV12.set(false); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr(std::string("cl_intel_planar_yuv")))); EXPECT_FALSE(caps.nv12Extension); } TEST_F(DeviceGetCapsTest, givenEnablePackedYuvsetToFalseWhenCapsAreCreatedThenDeviceDoesNotReportPackedYuvExtension) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnablePackedYuv.set(false); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr(std::string("cl_intel_packed_yuv")))); EXPECT_FALSE(caps.packedYuvExtension); } TEST_F(DeviceGetCapsTest, givenEnableVmeSetToTrueAndDeviceSupportsVmeWhenCapsAreCreatedThenDeviceReportsVmeExtensionAndBuiltins) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableIntelVme.set(1); auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsVme = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_intel_motion_estimation"))); EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_intel_device_side_avc_motion_estimation"))); EXPECT_TRUE(caps.vmeExtension); EXPECT_THAT(caps.builtInKernels, testing::HasSubstr("block_motion_estimate_intel")); } TEST_F(DeviceGetCapsTest, givenEnableVmeSetToTrueAndDeviceDoesNotSupportVmeWhenCapsAreCreatedThenDeviceReportsVmeExtensionAndBuiltins) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableIntelVme.set(1); auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsVme = false; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_intel_motion_estimation"))); EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_intel_device_side_avc_motion_estimation"))); EXPECT_TRUE(caps.vmeExtension); EXPECT_THAT(caps.builtInKernels, testing::HasSubstr("block_motion_estimate_intel")); } TEST_F(DeviceGetCapsTest, givenEnableVmeSetToFalseAndDeviceDoesNotSupportVmeWhenCapsAreCreatedThenDeviceDoesNotReportVmeExtensionAndBuiltins) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableIntelVme.set(0); auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsVme = false; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr(std::string("cl_intel_motion_estimation")))); EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr(std::string("cl_intel_device_side_avc_motion_estimation")))); EXPECT_FALSE(caps.vmeExtension); EXPECT_THAT(caps.builtInKernels, testing::Not(testing::HasSubstr("block_motion_estimate_intel"))); } TEST_F(DeviceGetCapsTest, givenEnableVmeSetToFalseAndDeviceSupportsVmeWhenCapsAreCreatedThenDeviceDoesNotReportVmeExtensionAndBuiltins) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableIntelVme.set(0); auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsVme = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr(std::string("cl_intel_motion_estimation")))); EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr(std::string("cl_intel_device_side_avc_motion_estimation")))); EXPECT_FALSE(caps.vmeExtension); EXPECT_THAT(caps.builtInKernels, testing::Not(testing::HasSubstr("block_motion_estimate_intel"))); } TEST_F(DeviceGetCapsTest, givenEnableAdvancedVmeSetToTrueAndDeviceSupportsVmeWhenCapsAreCreatedThenDeviceReportsAdvancedVmeExtensionAndBuiltins) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableIntelAdvancedVme.set(1); auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsVme = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_intel_advanced_motion_estimation"))); EXPECT_THAT(caps.builtInKernels, testing::HasSubstr("block_advanced_motion_estimate_check_intel")); EXPECT_THAT(caps.builtInKernels, testing::HasSubstr("block_advanced_motion_estimate_bidirectional_check_intel")); } TEST_F(DeviceGetCapsTest, givenDeviceCapsSupportFor64BitAtomicsFollowsHardwareCapabilities) { auto hwInfo = *defaultHwInfo; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); if (hwInfo.capabilityTable.ftrSupportsInteger64BitAtomics) { EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr("cl_khr_int64_base_atomics ")); EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr("cl_khr_int64_extended_atomics ")); } else { EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr("cl_khr_int64_base_atomics "))); EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr("cl_khr_int64_extended_atomics "))); } } TEST_F(DeviceGetCapsTest, givenEnableAdvancedVmeSetToTrueAndDeviceDoesNotSupportVmeWhenCapsAreCreatedThenDeviceReportAdvancedVmeExtensionAndBuiltins) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableIntelAdvancedVme.set(1); auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsVme = false; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_intel_advanced_motion_estimation"))); EXPECT_THAT(caps.builtInKernels, testing::HasSubstr("block_advanced_motion_estimate_check_intel")); EXPECT_THAT(caps.builtInKernels, testing::HasSubstr("block_advanced_motion_estimate_bidirectional_check_intel")); } TEST_F(DeviceGetCapsTest, givenEnableAdvancedVmeSetToFalseAndDeviceDoesNotSupportVmeWhenCapsAreCreatedThenDeviceDoesNotReportAdvancedVmeExtensionAndBuiltins) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableIntelAdvancedVme.set(0); auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsVme = false; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr(std::string("cl_intel_advanced_motion_estimation")))); EXPECT_THAT(caps.builtInKernels, testing::Not(testing::HasSubstr("block_advanced_motion_estimate_check_intel"))); EXPECT_THAT(caps.builtInKernels, testing::Not(testing::HasSubstr("block_advanced_motion_estimate_bidirectional_check_intel"))); } TEST_F(DeviceGetCapsTest, givenEnableAdvancedVmeSetToFalseAndDeviceSupportsVmeWhenCapsAreCreatedThenDeviceDoesNotReportAdvancedVmeExtensionAndBuiltins) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableIntelAdvancedVme.set(0); auto hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsVme = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr(std::string("cl_intel_advanced_motion_estimation")))); EXPECT_THAT(caps.builtInKernels, testing::Not(testing::HasSubstr("block_advanced_motion_estimate_check_intel"))); EXPECT_THAT(caps.builtInKernels, testing::Not(testing::HasSubstr("block_advanced_motion_estimate_bidirectional_check_intel"))); } TEST_F(DeviceGetCapsTest, WhenDeviceIsCreatedThenVmeIsEnabled) { DebugSettingsManager freshDebugSettingsManager(""); EXPECT_TRUE(freshDebugSettingsManager.flags.EnableIntelVme.get()); } TEST_F(DeviceGetCapsTest, WhenDeviceIsCreatedThenPriorityHintsExtensionIsReported) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_khr_priority_hints"))); } TEST_F(DeviceGetCapsTest, WhenDeviceIsCreatedThenCreateCommandQueueExtensionIsReported) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_khr_create_command_queue"))); } TEST_F(DeviceGetCapsTest, WhenDeviceIsCreatedThenThrottleHintsExtensionIsReported) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_khr_throttle_hints"))); } TEST_F(DeviceGetCapsTest, GivenAnyDeviceWhenCheckingExtensionsThenSupportSubgroupsChar) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_intel_subgroups_char"))); } TEST_F(DeviceGetCapsTest, GivenAnyDeviceWhenCheckingExtensionsThenSupportSubgroupsLong) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_intel_subgroups_long"))); } TEST_F(DeviceGetCapsTest, givenAtleastOCL2DeviceThenExposesMipMapAndUnifiedMemoryExtensions) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(20); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); std::string extensionString = caps.deviceExtensions; if (device->getHardwareInfo().capabilityTable.supportsImages) { EXPECT_THAT(extensionString, testing::HasSubstr(std::string("cl_khr_mipmap_image"))); EXPECT_THAT(extensionString, testing::HasSubstr(std::string("cl_khr_mipmap_image_writes"))); } else { EXPECT_EQ(std::string::npos, extensionString.find(std::string("cl_khr_mipmap_image"))); EXPECT_EQ(std::string::npos, extensionString.find(std::string("cl_khr_mipmap_image_writes"))); } EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_intel_unified_shared_memory_preview"))); } TEST_F(DeviceGetCapsTest, givenSupportImagesWhenCapsAreCreatedThenDeviceReportsMinMapExtensions) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(20); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsImages = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_khr_mipmap_image"))); EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_khr_mipmap_image_writes"))); } TEST_F(DeviceGetCapsTest, givenNotSupportImagesWhenCapsAreCreatedThenDeviceNotReportsMinMapExtensions) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(20); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsImages = false; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr(std::string("cl_khr_mipmap_image")))); EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr(std::string("cl_khr_mipmap_image_writes")))); } TEST_F(DeviceGetCapsTest, givenOCL12DeviceThenDoesNotExposesMipMapAndUnifiedMemoryExtensions) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(12); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr(std::string("cl_khr_mipmap_image")))); EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr(std::string("cl_khr_mipmap_image_writes")))); EXPECT_THAT(caps.deviceExtensions, testing::Not(testing::HasSubstr(std::string("cl_intel_unified_shared_memory_preview")))); } TEST_F(DeviceGetCapsTest, givenSupporteImagesWhenCreateExtentionsListThenDeviceReportsImagesExtensions) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(20); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsImages = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto extensions = device->getDeviceInfo().deviceExtensions; EXPECT_THAT(extensions, testing::HasSubstr(std::string("cl_khr_image2d_from_buffer"))); EXPECT_THAT(extensions, testing::HasSubstr(std::string("cl_khr_depth_images"))); EXPECT_THAT(extensions, testing::HasSubstr(std::string("cl_intel_media_block_io"))); } TEST_F(DeviceGetCapsTest, givenNotSupporteImagesWhenCreateExtentionsListThenDeviceNotReportsImagesExtensions) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceOCLVersion.set(20); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsImages = false; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); const auto extensions = device->getDeviceInfo().deviceExtensions; EXPECT_THAT(extensions, testing::Not(testing::HasSubstr(std::string("cl_khr_image2d_from_buffer")))); EXPECT_THAT(extensions, testing::Not(testing::HasSubstr(std::string("cl_khr_depth_images")))); EXPECT_THAT(extensions, testing::Not(testing::HasSubstr(std::string("cl_intel_media_block_io")))); } TEST_F(DeviceGetCapsTest, givenDeviceThatDoesntHaveFp64ThenExtensionIsNotReported) { HardwareInfo nonFp64Device = *defaultHwInfo; nonFp64Device.capabilityTable.ftrSupportsFP64 = false; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&nonFp64Device)); const auto &caps = device->getDeviceInfo(); std::string extensionString = caps.deviceExtensions; EXPECT_EQ(std::string::npos, extensionString.find(std::string("cl_khr_fp64"))); EXPECT_EQ(0u, caps.doubleFpConfig); } TEST_F(DeviceGetCapsTest, givenDeviceWhenGettingHostUnifiedMemoryCapThenItDependsOnLocalMemory) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); auto localMemoryEnabled = hwHelper.isLocalMemoryEnabled(*defaultHwInfo); EXPECT_EQ((localMemoryEnabled == false), caps.hostUnifiedMemory); } TEST(DeviceGetCaps, givenDeviceThatDoesntHaveFp64WhenDbgFlagEnablesFp64ThenReportFp64Flags) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.OverrideDefaultFP64Settings.set(1); HardwareInfo nonFp64Device = *defaultHwInfo; nonFp64Device.capabilityTable.ftrSupportsFP64 = false; nonFp64Device.capabilityTable.ftrSupports64BitMath = false; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&nonFp64Device)); const auto &caps = device->getDeviceInfo(); std::string extensionString = caps.deviceExtensions; EXPECT_NE(std::string::npos, extensionString.find(std::string("cl_khr_fp64"))); EXPECT_NE(0u, caps.doubleFpConfig); cl_device_fp_config actualSingleFp = caps.singleFpConfig & static_cast(CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT); cl_device_fp_config expectedSingleFp = static_cast(CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT); EXPECT_EQ(expectedSingleFp, actualSingleFp); } TEST(DeviceGetCaps, givenDeviceThatDoesHaveFp64WhenDbgFlagDisablesFp64ThenDontReportFp64Flags) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.OverrideDefaultFP64Settings.set(0); HardwareInfo fp64Device = *defaultHwInfo; fp64Device.capabilityTable.ftrSupportsFP64 = true; fp64Device.capabilityTable.ftrSupports64BitMath = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&fp64Device)); const auto &caps = device->getDeviceInfo(); std::string extensionString = caps.deviceExtensions; EXPECT_EQ(std::string::npos, extensionString.find(std::string("cl_khr_fp64"))); EXPECT_EQ(0u, caps.doubleFpConfig); cl_device_fp_config actualSingleFp = caps.singleFpConfig & static_cast(CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT); cl_device_fp_config notExpectedSingleFp = static_cast(CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT); EXPECT_NE(notExpectedSingleFp, actualSingleFp); } TEST(DeviceGetCaps, givenOclVersionLessThan21WhenCapsAreCreatedThenDeviceReportsNoSupportedIlVersions) { DebugManagerStateRestore dbgRestorer; { DebugManager.flags.ForceOCLVersion.set(12); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_STREQ("", caps.ilVersion); DebugManager.flags.ForceOCLVersion.set(0); } } TEST(DeviceGetCaps, givenOclVersion21WhenCapsAreCreatedThenDeviceReportsSpirvAsSupportedIl) { DebugManagerStateRestore dbgRestorer; { DebugManager.flags.ForceOCLVersion.set(21); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_STREQ("SPIR-V_1.2 ", caps.ilVersion); DebugManager.flags.ForceOCLVersion.set(0); } } TEST(DeviceGetCaps, givenDisabledFtrPooledEuWhenCalculatingMaxEuPerSSThenIgnoreEuCountPerPoolMin) { HardwareInfo myHwInfo = *defaultHwInfo; GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo; FeatureTable &mySkuTable = myHwInfo.featureTable; mySysInfo.EUCount = 20; mySysInfo.EuCountPerPoolMin = 99999; mySkuTable.ftrPooledEuEnabled = 0; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); auto expectedMaxWGS = (mySysInfo.EUCount / mySysInfo.SubSliceCount) * (mySysInfo.ThreadCount / mySysInfo.EUCount) * 8; expectedMaxWGS = std::min(Math::prevPowerOfTwo(expectedMaxWGS), 1024u); EXPECT_EQ(expectedMaxWGS, device->getDeviceInfo().maxWorkGroupSize); } HWTEST_F(DeviceGetCapsTest, givenEnabledFtrPooledEuWhenCalculatingMaxEuPerSSThenDontIgnoreEuCountPerPoolMin) { HardwareInfo myHwInfo = *defaultHwInfo; GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo; FeatureTable &mySkuTable = myHwInfo.featureTable; mySysInfo.EUCount = 20; mySysInfo.EuCountPerPoolMin = 99999; mySkuTable.ftrPooledEuEnabled = 1; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); auto expectedMaxWGS = mySysInfo.EuCountPerPoolMin * (mySysInfo.ThreadCount / mySysInfo.EUCount) * 8; expectedMaxWGS = std::min(Math::prevPowerOfTwo(expectedMaxWGS), 1024u); EXPECT_EQ(expectedMaxWGS, device->getDeviceInfo().maxWorkGroupSize); } TEST(DeviceGetCaps, givenDebugFlagToUseMaxSimdSizeForWkgCalculationWhenDeviceCapsAreCreatedThen1024WorkgroupSizeIsReturned) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.set(true); HardwareInfo myHwInfo = *defaultHwInfo; GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo; mySysInfo.EUCount = 24; mySysInfo.SubSliceCount = 3; mySysInfo.ThreadCount = 24 * 7; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); EXPECT_EQ(1024u, device->getSharedDeviceInfo().maxWorkGroupSize); EXPECT_EQ(device->getSharedDeviceInfo().maxWorkGroupSize / 32, device->getDeviceInfo().maxNumOfSubGroups); } TEST(DeviceGetCaps, givenDebugFlagToUseCertainWorkgroupSizeWhenDeviceIsCreatedItHasCeratinWorkgroupSize) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.OverrideMaxWorkgroupSize.set(16u); HardwareInfo myHwInfo = *defaultHwInfo; GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo; mySysInfo.EUCount = 24; mySysInfo.SubSliceCount = 3; mySysInfo.ThreadCount = 24 * 7; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); EXPECT_EQ(16u, device->getDeviceInfo().maxWorkGroupSize); } HWTEST_F(DeviceGetCapsTest, givenDeviceThatHasHighNumberOfExecutionUnitsWhenMaxWorkgroupSizeIsComputedItIsLimitedTo1024) { HardwareInfo myHwInfo = *defaultHwInfo; GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo; auto &hwHelper = HwHelper::get(myHwInfo.platform.eRenderCoreFamily); mySysInfo.EUCount = 32; mySysInfo.SubSliceCount = 2; mySysInfo.ThreadCount = 32 * hwHelper.getMinimalSIMDSize(); // 128 threads per subslice, in simd 8 gives 1024 auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); EXPECT_EQ(1024u, device->getSharedDeviceInfo().maxWorkGroupSize); EXPECT_EQ(device->getSharedDeviceInfo().maxWorkGroupSize / hwHelper.getMinimalSIMDSize(), device->getDeviceInfo().maxNumOfSubGroups); } class DriverInfoMock : public DriverInfo { public: DriverInfoMock(){}; const static std::string testDeviceName; const static std::string testVersion; std::string getDeviceName(std::string defaultName) override { return testDeviceName; }; std::string getVersion(std::string defaultVersion) override { return testVersion; }; }; const std::string DriverInfoMock::testDeviceName = "testDeviceName"; const std::string DriverInfoMock::testVersion = "testVersion"; TEST_F(DeviceGetCapsTest, givenSystemWithDriverInfoWhenGettingNameAndVersionThenReturnValuesFromDriverInfo) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); DriverInfoMock *driverInfoMock = new DriverInfoMock(); device->driverInfo.reset(driverInfoMock); device->initializeCaps(); const auto &caps = device->getDeviceInfo(); EXPECT_STREQ(DriverInfoMock::testDeviceName.c_str(), caps.name); EXPECT_STREQ(DriverInfoMock::testVersion.c_str(), caps.driverVersion); } TEST_F(DeviceGetCapsTest, givenSystemWithNoDriverInfoWhenGettingNameAndVersionThenReturnDefaultValues) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); device->driverInfo.reset(); device->name.clear(); device->initializeCaps(); const auto &caps = device->getDeviceInfo(); std::string tempName = "Intel(R) "; tempName += familyName[defaultHwInfo->platform.eRenderCoreFamily]; tempName += " HD Graphics NEO"; #define QTR(a) #a #define TOSTR(b) QTR(b) const std::string expectedVersion = TOSTR(NEO_OCL_DRIVER_VERSION); #undef QTR #undef TOSTR EXPECT_STREQ(tempName.c_str(), caps.name); EXPECT_STREQ(expectedVersion.c_str(), caps.driverVersion); } TEST_F(DeviceGetCapsTest, GivenFlagEnabled64kbPagesWhenSetThenReturnCorrectValue) { DebugManagerStateRestore dbgRestore; VariableBackup OsEnabled64kbPagesBackup(&OSInterface::osEnabled64kbPages); MockExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(1); auto &capabilityTable = executionEnvironment.rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable; std::unique_ptr memoryManager; DebugManager.flags.Enable64kbpages.set(-1); capabilityTable.ftr64KBpages = false; OSInterface::osEnabled64kbPages = false; memoryManager.reset(new OsAgnosticMemoryManager(executionEnvironment)); EXPECT_FALSE(memoryManager->peek64kbPagesEnabled(0u)); capabilityTable.ftr64KBpages = false; OSInterface::osEnabled64kbPages = true; memoryManager.reset(new OsAgnosticMemoryManager(executionEnvironment)); EXPECT_FALSE(memoryManager->peek64kbPagesEnabled(0u)); capabilityTable.ftr64KBpages = true; OSInterface::osEnabled64kbPages = false; memoryManager.reset(new OsAgnosticMemoryManager(executionEnvironment)); EXPECT_FALSE(memoryManager->peek64kbPagesEnabled(0u)); capabilityTable.ftr64KBpages = true; OSInterface::osEnabled64kbPages = true; memoryManager.reset(new OsAgnosticMemoryManager(executionEnvironment)); EXPECT_TRUE(memoryManager->peek64kbPagesEnabled(0u)); DebugManager.flags.Enable64kbpages.set(0); // force false memoryManager.reset(new OsAgnosticMemoryManager(executionEnvironment)); EXPECT_FALSE(memoryManager->peek64kbPagesEnabled(0u)); DebugManager.flags.Enable64kbpages.set(1); // force true memoryManager.reset(new OsAgnosticMemoryManager(executionEnvironment)); EXPECT_TRUE(memoryManager->peek64kbPagesEnabled(0u)); } TEST_F(DeviceGetCapsTest, givenUnifiedMemoryShardeSystemFlagWhenDeviceIsCreatedItContainsProperSystemMemorySetting) { DebugManagerStateRestore restorer; DebugManager.flags.EnableSharedSystemUsmSupport.set(0u); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); EXPECT_EQ(0u, device->getDeviceInfo().sharedSystemMemCapabilities); EXPECT_FALSE(device->areSharedSystemAllocationsAllowed()); DebugManager.flags.EnableSharedSystemUsmSupport.set(1u); device.reset(new MockClDevice{MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())}); cl_unified_shared_memory_capabilities_intel expectedProperties = CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL; EXPECT_EQ(expectedProperties, device->getDeviceInfo().sharedSystemMemCapabilities); EXPECT_TRUE(device->areSharedSystemAllocationsAllowed()); } TEST_F(DeviceGetCapsTest, givenDeviceWithNullSourceLevelDebuggerWhenCapsAreInitializedThenSourceLevelDebuggerActiveIsSetToFalse) { std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_EQ(nullptr, device->getDebugger()); EXPECT_FALSE(caps.debuggerActive); } TEST(Device_UseCaps, givenCapabilityTableWhenDeviceInitializeCapsThenVmeVersionsAreSetProperly) { HardwareInfo hwInfo = *defaultHwInfo; cl_uint expectedVmeVersion = CL_ME_VERSION_ADVANCED_VER_2_INTEL; cl_uint expectedVmeAvcVersion = CL_AVC_ME_VERSION_1_INTEL; hwInfo.capabilityTable.supportsVme = 0; hwInfo.capabilityTable.ftrSupportsVmeAvcTextureSampler = 0; hwInfo.capabilityTable.ftrSupportsVmeAvcPreemption = 0; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); { auto &caps = device->getDeviceInfo(); auto &sharedCaps = device->getSharedDeviceInfo(); EXPECT_EQ(0u, caps.vmeVersion); EXPECT_EQ(0u, caps.vmeAvcVersion); EXPECT_EQ(hwInfo.capabilityTable.ftrSupportsVmeAvcPreemption, sharedCaps.vmeAvcSupportsPreemption); EXPECT_EQ(hwInfo.capabilityTable.ftrSupportsVmeAvcTextureSampler, caps.vmeAvcSupportsTextureSampler); } hwInfo.capabilityTable.supportsVme = 1; hwInfo.capabilityTable.ftrSupportsVmeAvcTextureSampler = 1; hwInfo.capabilityTable.ftrSupportsVmeAvcPreemption = 1; device.reset(new MockClDevice{MockDevice::createWithNewExecutionEnvironment(&hwInfo)}); { auto &caps = device->getDeviceInfo(); auto &sharedCaps = device->getSharedDeviceInfo(); EXPECT_EQ(expectedVmeVersion, caps.vmeVersion); EXPECT_EQ(expectedVmeAvcVersion, caps.vmeAvcVersion); EXPECT_EQ(hwInfo.capabilityTable.ftrSupportsVmeAvcPreemption, sharedCaps.vmeAvcSupportsPreemption); EXPECT_EQ(hwInfo.capabilityTable.ftrSupportsVmeAvcTextureSampler, caps.vmeAvcSupportsTextureSampler); } } typedef HwHelperTest DeviceCapsWithModifiedHwInfoTest; TEST_F(DeviceCapsWithModifiedHwInfoTest, givenPlatformWithSourceLevelDebuggerNotSupportedWhenDeviceIsCreatedThenSourceLevelDebuggerActiveIsSetToFalse) { hardwareInfo.capabilityTable.debuggerSupported = false; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(&hardwareInfo)); const auto &caps = device->getDeviceInfo(); EXPECT_EQ(nullptr, device->getDebugger()); EXPECT_FALSE(caps.debuggerActive); } compute-runtime-20.13.16352/opencl/test/unit_test/device/device_get_engine_tests.cpp000066400000000000000000000020141363734646600304370ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/options.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/helpers/ult_hw_config.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "gtest/gtest.h" using namespace NEO; TEST(DeviceGenEngineTest, givenNonHwCsrModeWhenGetEngineThenDefaultEngineIsReturned) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.SetCommandStreamReceiver.set(CommandStreamReceiverType::CSR_AUB); VariableBackup backup(&ultHwConfig); ultHwConfig.useHwCsr = true; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto &internalEngine = device->getInternalEngine(); auto &defaultEngine = device->getDefaultEngine(); EXPECT_EQ(defaultEngine.commandStreamReceiver, internalEngine.commandStreamReceiver); } compute-runtime-20.13.16352/opencl/test/unit_test/device/device_tests.cpp000066400000000000000000000436321363734646600262660ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/helpers/ult_hw_config.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "test.h" #include using namespace NEO; typedef Test DeviceTest; TEST_F(DeviceTest, givenDeviceWhenGetProductAbbrevThenReturnsHardwarePrefix) { const auto productAbbrev = pDevice->getProductAbbrev(); const auto hwPrefix = hardwarePrefix[pDevice->getHardwareInfo().platform.eProductFamily]; EXPECT_EQ(hwPrefix, productAbbrev); } TEST_F(DeviceTest, WhenDeviceIsCreatedThenCommandStreamReceiverIsNotNull) { EXPECT_NE(nullptr, &pDevice->getGpgpuCommandStreamReceiver()); } TEST_F(DeviceTest, WhenDeviceIsCreatedThenSupportedClVersionMatchesHardwareInfo) { auto version = pClDevice->getSupportedClVersion(); auto version2 = pDevice->getHardwareInfo().capabilityTable.clVersionSupport; EXPECT_EQ(version, version2); } TEST_F(DeviceTest, givenDeviceWhenEngineIsCreatedThenSetInitialValueForTag) { for (auto &engine : pDevice->engines) { auto tagAddress = engine.commandStreamReceiver->getTagAddress(); ASSERT_NE(nullptr, const_cast(tagAddress)); EXPECT_EQ(initialHardwareTag, *tagAddress); } } TEST_F(DeviceTest, givenDeviceWhenAskedForSpecificEngineThenRetrunIt) { auto &engines = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo); for (uint32_t i = 0; i < engines.size(); i++) { bool lowPriority = (HwHelper::lowPriorityGpgpuEngineIndex == i); auto &deviceEngine = pDevice->getEngine(engines[i], lowPriority); EXPECT_EQ(deviceEngine.osContext->getEngineType(), engines[i]); EXPECT_EQ(deviceEngine.osContext->isLowPriority(), lowPriority); } EXPECT_THROW(pDevice->getEngine(aub_stream::ENGINE_VCS, false), std::exception); } TEST_F(DeviceTest, givenDebugVariableToAlwaysChooseEngineZeroWhenNotExistingEngineSelectedThenIndexZeroEngineIsReturned) { DebugManagerStateRestore restore; DebugManager.flags.OverrideInvalidEngineWithDefault.set(true); auto &engines = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo); auto &deviceEngine = pDevice->getEngine(engines[0], false); auto ¬ExistingEngine = pDevice->getEngine(aub_stream::ENGINE_VCS, false); EXPECT_EQ(¬ExistingEngine, &deviceEngine); } TEST_F(DeviceTest, WhenDeviceIsCreatedThenOsTimeIsNotNull) { auto pDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); OSTime *osTime = pDevice->getOSTime(); ASSERT_NE(nullptr, osTime); } TEST_F(DeviceTest, GivenDebugVariableForcing32BitAllocationsWhenDeviceIsCreatedThenMemoryManagerHasForce32BitFlagSet) { DebugManager.flags.Force32bitAddressing.set(true); auto pDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); if (is64bit) { EXPECT_TRUE(pDevice->getDeviceInfo().force32BitAddressess); EXPECT_TRUE(pDevice->getMemoryManager()->peekForce32BitAllocations()); } else { EXPECT_FALSE(pDevice->getDeviceInfo().force32BitAddressess); EXPECT_FALSE(pDevice->getMemoryManager()->peekForce32BitAllocations()); } DebugManager.flags.Force32bitAddressing.set(false); } TEST_F(DeviceTest, WhenRetainingThenReferenceIsOneAndApiIsUsed) { ASSERT_NE(nullptr, pClDevice); pClDevice->retainApi(); pClDevice->retainApi(); pClDevice->retainApi(); ASSERT_EQ(1, pClDevice->getReference()); ASSERT_FALSE(pClDevice->releaseApi().isUnused()); ASSERT_EQ(1, pClDevice->getReference()); } HWTEST_F(DeviceTest, WhenDeviceIsCreatedThenActualEngineTypeIsSameAsDefault) { HardwareInfo hwInfo = *defaultHwInfo; if (hwInfo.capabilityTable.defaultEngineType == aub_stream::EngineType::ENGINE_CCS) { hwInfo.featureTable.ftrCCSNode = true; } auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); auto actualEngineType = device->getDefaultEngine().osContext->getEngineType(); auto defaultEngineType = device->getHardwareInfo().capabilityTable.defaultEngineType; EXPECT_EQ(&device->getDefaultEngine().commandStreamReceiver->getOsContext(), device->getDefaultEngine().osContext); EXPECT_EQ(defaultEngineType, actualEngineType); } TEST(DeviceCleanup, givenDeviceWhenItIsDestroyedThenFlushBatchedSubmissionsIsCalled) { auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockCommandStreamReceiver *csr = new MockCommandStreamReceiver(*mockDevice->getExecutionEnvironment(), mockDevice->getRootDeviceIndex()); mockDevice->resetCommandStreamReceiver(csr); int flushedBatchedSubmissionsCalledCount = 0; csr->flushBatchedSubmissionsCallCounter = &flushedBatchedSubmissionsCalledCount; mockDevice.reset(nullptr); EXPECT_EQ(1, flushedBatchedSubmissionsCalledCount); } TEST(DeviceCreation, givenSelectedAubCsrInDebugVarsWhenDeviceIsCreatedThenIsSimulationReturnsTrue) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.SetCommandStreamReceiver.set(CommandStreamReceiverType::CSR_AUB); VariableBackup backup(&ultHwConfig); ultHwConfig.useHwCsr = true; auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_TRUE(mockDevice->isSimulation()); } TEST(DeviceCreation, givenSelectedTbxCsrInDebugVarsWhenDeviceIsCreatedThenIsSimulationReturnsTrue) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.SetCommandStreamReceiver.set(CommandStreamReceiverType::CSR_TBX); VariableBackup backup(&ultHwConfig); ultHwConfig.useHwCsr = true; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_TRUE(device->isSimulation()); } TEST(DeviceCreation, givenSelectedTbxWithAubCsrInDebugVarsWhenDeviceIsCreatedThenIsSimulationReturnsTrue) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.SetCommandStreamReceiver.set(CommandStreamReceiverType::CSR_TBX_WITH_AUB); VariableBackup backup(&ultHwConfig); ultHwConfig.useHwCsr = true; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_TRUE(device->isSimulation()); } TEST(DeviceCreation, givenHwWithAubCsrInDebugVarsWhenDeviceIsCreatedThenIsSimulationReturnsFalse) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.SetCommandStreamReceiver.set(CommandStreamReceiverType::CSR_HW_WITH_AUB); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_FALSE(device->isSimulation()); } TEST(DeviceCreation, givenDefaultHwCsrInDebugVarsWhenDeviceIsCreatedThenIsSimulationReturnsFalse) { auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_FALSE(device->isSimulation()); } TEST(DeviceCreation, givenDeviceWhenItIsCreatedThenOsContextIsRegistredInMemoryManager) { auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto memoryManager = device->getMemoryManager(); auto &hwInfo = device->getHardwareInfo(); auto numEnginesForDevice = HwHelper::get(hwInfo.platform.eRenderCoreFamily).getGpgpuEngineInstances(hwInfo).size(); if (device->getNumAvailableDevices() > 1) { numEnginesForDevice *= device->getNumAvailableDevices(); numEnginesForDevice += device->engines.size(); } EXPECT_EQ(numEnginesForDevice, memoryManager->getRegisteredEnginesCount()); } TEST(DeviceCreation, givenMultiRootDeviceWhenTheyAreCreatedThenEachOsContextHasUniqueId) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); const size_t numDevices = 2; executionEnvironment->prepareRootDeviceEnvironments(numDevices); for (auto i = 0u; i < numDevices; i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } auto hwInfo = *defaultHwInfo; const auto &numGpgpuEngines = static_cast(HwHelper::get(hwInfo.platform.eRenderCoreFamily).getGpgpuEngineInstances(hwInfo).size()); auto device1 = std::unique_ptr(Device::create(executionEnvironment, 0u)); auto device2 = std::unique_ptr(Device::create(executionEnvironment, 1u)); auto ®isteredEngines = executionEnvironment->memoryManager->getRegisteredEngines(); EXPECT_EQ(numGpgpuEngines * numDevices, registeredEngines.size()); for (uint32_t i = 0; i < numGpgpuEngines; i++) { EXPECT_EQ(i, device1->engines[i].osContext->getContextId()); EXPECT_EQ(1u, device1->engines[i].osContext->getDeviceBitfield().to_ulong()); EXPECT_EQ(i + numGpgpuEngines, device2->engines[i].osContext->getContextId()); EXPECT_EQ(1u, device2->engines[i].osContext->getDeviceBitfield().to_ulong()); EXPECT_EQ(registeredEngines[i].commandStreamReceiver, device1->engines[i].commandStreamReceiver); EXPECT_EQ(registeredEngines[i + numGpgpuEngines].commandStreamReceiver, device2->engines[i].commandStreamReceiver); } EXPECT_EQ(numGpgpuEngines * numDevices, executionEnvironment->memoryManager->getRegisteredEnginesCount()); } TEST(DeviceCreation, givenMultiRootDeviceWhenTheyAreCreatedThenEachDeviceHasSeperateDeviceIndex) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); const size_t numDevices = 2; executionEnvironment->prepareRootDeviceEnvironments(numDevices); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } auto device = std::unique_ptr(Device::create(executionEnvironment, 0u)); auto device2 = std::unique_ptr(Device::create(executionEnvironment, 1u)); EXPECT_EQ(0u, device->getRootDeviceIndex()); EXPECT_EQ(1u, device2->getRootDeviceIndex()); } TEST(DeviceCreation, givenMultiRootDeviceWhenTheyAreCreatedThenEachDeviceHasSeperateCommandStreamReceiver) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); const size_t numDevices = 2; executionEnvironment->prepareRootDeviceEnvironments(numDevices); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } auto hwInfo = *defaultHwInfo; const auto &numGpgpuEngines = HwHelper::get(hwInfo.platform.eRenderCoreFamily).getGpgpuEngineInstances(hwInfo).size(); auto device1 = std::unique_ptr(Device::create(executionEnvironment, 0u)); auto device2 = std::unique_ptr(Device::create(executionEnvironment, 1u)); EXPECT_EQ(numGpgpuEngines, device1->commandStreamReceivers.size()); EXPECT_EQ(numGpgpuEngines, device2->commandStreamReceivers.size()); for (uint32_t i = 0; i < static_cast(numGpgpuEngines); i++) { EXPECT_NE(device2->engines[i].commandStreamReceiver, device1->engines[i].commandStreamReceiver); } } HWTEST_F(DeviceTest, givenDeviceWhenAskingForDefaultEngineThenReturnValidValue) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1u); auto &hwHelper = HwHelperHw::get(); hwHelper.adjustDefaultEngineType(executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo()); auto device = std::unique_ptr(Device::create(executionEnvironment, 0)); auto osContext = device->getDefaultEngine().osContext; EXPECT_EQ(device->getHardwareInfo().capabilityTable.defaultEngineType, osContext->getEngineType()); EXPECT_FALSE(osContext->isLowPriority()); } TEST(DeviceCreation, givenFtrSimulationModeFlagTrueWhenNoOtherSimulationFlagsArePresentThenIsSimulationReturnsTrue) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.ftrSimulationMode = true; bool simulationFromDeviceId = hwInfo.capabilityTable.isSimulation(hwInfo.platform.usDeviceID); EXPECT_FALSE(simulationFromDeviceId); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); EXPECT_TRUE(device->isSimulation()); } TEST(DeviceCreation, givenDeviceWhenCheckingEnginesCountThenNumberGreaterThanZeroIsReturned) { auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_GT(HwHelper::getEnginesCount(device->getHardwareInfo()), 0u); } using DeviceHwTest = ::testing::Test; HWTEST_F(DeviceHwTest, givenHwHelperInputWhenInitializingCsrThenCreatePageTableManagerIfNeeded) { HardwareInfo localHwInfo = *defaultHwInfo; localHwInfo.capabilityTable.ftrRenderCompressedBuffers = false; localHwInfo.capabilityTable.ftrRenderCompressedImages = false; MockExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(3); executionEnvironment.incRefInternal(); for (auto i = 0u; i < executionEnvironment.rootDeviceEnvironments.size(); i++) { executionEnvironment.rootDeviceEnvironments[i]->setHwInfo(&localHwInfo); } executionEnvironment.initializeMemoryManager(); auto defaultEngineType = getChosenEngineType(localHwInfo); std::unique_ptr device; device.reset(MockDevice::createWithExecutionEnvironment(&localHwInfo, &executionEnvironment, 0)); auto &csr0 = device->getUltCommandStreamReceiver(); EXPECT_FALSE(csr0.createPageTableManagerCalled); auto hwInfo = executionEnvironment.rootDeviceEnvironments[1]->getMutableHardwareInfo(); hwInfo->capabilityTable.ftrRenderCompressedBuffers = true; hwInfo->capabilityTable.ftrRenderCompressedImages = false; device.reset(MockDevice::createWithExecutionEnvironment(&localHwInfo, &executionEnvironment, 1)); auto &csr1 = device->getUltCommandStreamReceiver(); EXPECT_EQ(csr1.needsPageTableManager(defaultEngineType), csr1.createPageTableManagerCalled); hwInfo = executionEnvironment.rootDeviceEnvironments[2]->getMutableHardwareInfo(); hwInfo->capabilityTable.ftrRenderCompressedBuffers = false; hwInfo->capabilityTable.ftrRenderCompressedImages = true; device.reset(MockDevice::createWithExecutionEnvironment(&localHwInfo, &executionEnvironment, 2)); auto &csr2 = device->getUltCommandStreamReceiver(); EXPECT_EQ(csr2.needsPageTableManager(defaultEngineType), csr2.createPageTableManagerCalled); } HWTEST_F(DeviceHwTest, givenDeviceCreationWhenCsrFailsToCreateGlobalSyncAllocationThenReturnNull) { class MockUltCsrThatFailsToCreateGlobalFenceAllocation : public UltCommandStreamReceiver { public: MockUltCsrThatFailsToCreateGlobalFenceAllocation(ExecutionEnvironment &executionEnvironment) : UltCommandStreamReceiver(executionEnvironment, 0) {} bool createGlobalFenceAllocation() override { return false; } }; class MockDeviceThatFailsToCreateGlobalFenceAllocation : public MockDevice { public: MockDeviceThatFailsToCreateGlobalFenceAllocation(ExecutionEnvironment *executionEnvironment, uint32_t deviceIndex) : MockDevice(executionEnvironment, deviceIndex) {} std::unique_ptr createCommandStreamReceiver() const override { return std::make_unique(*executionEnvironment); } }; auto executionEnvironment = platform()->peekExecutionEnvironment(); auto mockDevice(MockDevice::create(executionEnvironment, 0)); EXPECT_EQ(nullptr, mockDevice); } TEST(DeviceGenEngineTest, givenHwCsrModeWhenGetEngineThenDedicatedForInternalUsageEngineIsReturned) { auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto &internalEngine = device->getInternalEngine(); auto &defaultEngine = device->getDefaultEngine(); EXPECT_NE(defaultEngine.commandStreamReceiver, internalEngine.commandStreamReceiver); auto internalEngineIndex = HwHelper::internalUsageEngineIndex; EXPECT_EQ(internalEngineIndex, internalEngine.osContext->getContextId()); } TEST(DeviceGenEngineTest, whenCreateDeviceThenInternalEngineHasDefaultType) { auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto internalEngineType = device->getInternalEngine().osContext->getEngineType(); auto defaultEngineType = getChosenEngineType(device->getHardwareInfo()); EXPECT_EQ(defaultEngineType, internalEngineType); } TEST(DeviceGenEngineTest, givenCreatedDeviceWhenRetrievingDefaultEngineThenOsContextHasDefaultFieldSet) { auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto &defaultEngine = device->getDefaultEngine(); EXPECT_TRUE(defaultEngine.osContext->isDefaultContext()); } compute-runtime-20.13.16352/opencl/test/unit_test/device/device_timers_tests.cpp000066400000000000000000000060031363734646600276400ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_ostime.h" #include "gtest/gtest.h" using namespace NEO; namespace ULT { TEST(MockOSTime, WhenSleepingThenDeviceAndHostTimerAreIncreased) { cl_ulong deviceTimestamp[2] = {0, 0}; cl_ulong hostTimestamp[2] = {0, 0}; auto mDev = MockDevice::createWithNewExecutionEnvironment(nullptr); mDev->setOSTime(new MockOSTime()); mDev->getDeviceAndHostTimer( &deviceTimestamp[0], &hostTimestamp[0]); std::this_thread::sleep_for(std::chrono::nanoseconds(1000)); mDev->getDeviceAndHostTimer( &deviceTimestamp[1], &hostTimestamp[1]); EXPECT_LT(deviceTimestamp[0], deviceTimestamp[1]); EXPECT_LT(hostTimestamp[0], hostTimestamp[1]); delete mDev; } TEST(MockOSTime, WhenGettingTimersThenDiffBetweenQueriesWithinAllowedError) { cl_ulong deviceTimestamp[2] = {0, 0}; cl_ulong hostTimestamp[2] = {0, 0}; cl_ulong hostOnlyTimestamp[2] = {0, 0}; cl_ulong hostDiff = 0; cl_ulong hostOnlyDiff = 0; cl_ulong observedDiff = 0; cl_ulong allowedDiff = 0; float allowedErr = 0.005f; auto mDev = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); mDev->getDeviceAndHostTimer( &deviceTimestamp[0], &hostTimestamp[0]); mDev->getHostTimer( &hostOnlyTimestamp[0]); mDev->getDeviceAndHostTimer( &deviceTimestamp[1], &hostTimestamp[1]); mDev->getHostTimer( &hostOnlyTimestamp[1]); hostDiff = hostTimestamp[1] - hostTimestamp[0]; hostOnlyDiff = hostOnlyTimestamp[1] - hostOnlyTimestamp[0]; EXPECT_LT(deviceTimestamp[0], deviceTimestamp[1]); EXPECT_LT(hostTimestamp[0], hostOnlyTimestamp[0]); EXPECT_LT(hostTimestamp[1], hostOnlyTimestamp[1]); if (hostOnlyDiff > hostDiff) { observedDiff = hostOnlyDiff - hostDiff; allowedDiff = (cl_ulong)(allowedErr * hostDiff); } else { observedDiff = hostDiff - hostOnlyDiff; allowedDiff = (cl_ulong)(allowedErr * hostOnlyDiff); } EXPECT_TRUE(observedDiff <= allowedDiff); } TEST(MockOSTime, WhenSleepingThenHostTimerIsIncreased) { cl_ulong hostTimestamp[2] = {0, 0}; auto mDev = MockDevice::createWithNewExecutionEnvironment(nullptr); mDev->setOSTime(new MockOSTime()); mDev->getHostTimer( &hostTimestamp[0]); std::this_thread::sleep_for(std::chrono::nanoseconds(1000)); mDev->getHostTimer( &hostTimestamp[1]); EXPECT_LT(hostTimestamp[0], hostTimestamp[1]); delete mDev; } TEST(MockOSTime, GivenNullWhenSettingOsTimeThenResolutionIsZero) { auto mDev = MockDevice::createWithNewExecutionEnvironment(nullptr); mDev->setOSTime(nullptr); double zeroRes; zeroRes = mDev->getPlatformHostTimerResolution(); EXPECT_EQ(zeroRes, 0.0); delete mDev; } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/device/device_win_timers_tests.cpp000066400000000000000000000023171363734646600305210ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/root_device_environment.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_ostime.h" #include "opencl/test/unit_test/mocks/mock_ostime_win.h" #include "opencl/test/unit_test/mocks/mock_wddm.h" #include "test.h" #include "gtest/gtest.h" using namespace NEO; namespace ULT { typedef ::testing::Test MockOSTimeWinTest; TEST_F(MockOSTimeWinTest, WhenCreatingTimerThenResolutionIsSetCorrectly) { MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); auto wddmMock = std::unique_ptr(new WddmMock(rootDeviceEnvironment)); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); wddmMock->init(); std::unique_ptr timeWin(new MockOSTimeWin(wddmMock.get())); double res = 0.0; res = timeWin->getDynamicDeviceTimerResolution(device->getHardwareInfo()); EXPECT_EQ(res, 1e+09); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/device/get_device_info_size_tests.cpp000066400000000000000000000230771363734646600311730ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/cl_device/cl_device_info_map.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gtest/gtest.h" #include using namespace NEO; struct GetDeviceInfoSize : public ::testing::TestWithParam> { void SetUp() override { param = GetParam(); } std::pair param; }; TEST_P(GetDeviceInfoSize, sizeIsValid) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); size_t sizeReturned = 0; auto retVal = device->getDeviceInfo( param.first, 0, nullptr, &sizeReturned); if (CL_SUCCESS != retVal) { ASSERT_EQ(CL_SUCCESS, retVal) << " param = " << param.first; } ASSERT_NE(0u, sizeReturned); EXPECT_EQ(param.second, sizeReturned); } std::pair deviceInfoParams2[] = { {CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint)}, {CL_DEVICE_AVAILABLE, sizeof(cl_bool)}, {CL_DEVICE_COMPILER_AVAILABLE, sizeof(cl_bool)}, {CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(cl_device_fp_config)}, {CL_DEVICE_ENDIAN_LITTLE, sizeof(cl_bool)}, {CL_DEVICE_ERROR_CORRECTION_SUPPORT, sizeof(cl_bool)}, {CL_DEVICE_EXECUTION_CAPABILITIES, sizeof(cl_device_exec_capabilities)}, // {CL_DEVICE_EXTENSIONS, sizeof(char[])}, {CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, sizeof(cl_ulong)}, {CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, sizeof(cl_device_mem_cache_type)}, {CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(cl_uint)}, {CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong)}, {CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, sizeof(size_t)}, // {CL_DEVICE_IL_VERSION, sizeof(char[])}, {CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool)}, {CL_DEVICE_LINKER_AVAILABLE, sizeof(cl_bool)}, {CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong)}, {CL_DEVICE_LOCAL_MEM_TYPE, sizeof(cl_device_local_mem_type)}, {CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(cl_uint)}, {CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint)}, {CL_DEVICE_MAX_CONSTANT_ARGS, sizeof(cl_uint)}, {CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(cl_ulong)}, {CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, sizeof(size_t)}, {CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong)}, {CL_DEVICE_MAX_NUM_SUB_GROUPS, sizeof(cl_uint)}, {CL_DEVICE_MAX_ON_DEVICE_EVENTS, sizeof(cl_uint)}, {CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(cl_uint)}, {CL_DEVICE_MAX_PARAMETER_SIZE, sizeof(size_t)}, {CL_DEVICE_MAX_PIPE_ARGS, sizeof(cl_uint)}, {CL_DEVICE_MAX_SAMPLERS, sizeof(cl_uint)}, {CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t)}, {CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint)}, {CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t[3])}, {CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(cl_uint)}, // {CL_DEVICE_NAME, sizeof(char[])}, {CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, sizeof(cl_uint)}, {CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, sizeof(cl_uint)}, {CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, sizeof(cl_uint)}, {CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, sizeof(cl_uint)}, {CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, sizeof(cl_uint)}, {CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint)}, {CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, sizeof(cl_uint)}, // {CL_DEVICE_OPENCL_C_VERSION, sizeof(char[])}, {CL_DEVICE_PARENT_DEVICE, sizeof(cl_device_id)}, {CL_DEVICE_PARTITION_AFFINITY_DOMAIN, sizeof(cl_device_affinity_domain)}, {CL_DEVICE_PARTITION_MAX_SUB_DEVICES, sizeof(cl_uint)}, {CL_DEVICE_PARTITION_PROPERTIES, sizeof(cl_device_partition_property[2])}, {CL_DEVICE_PARTITION_TYPE, sizeof(cl_device_partition_property[3])}, {CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, sizeof(cl_uint)}, {CL_DEVICE_PIPE_MAX_PACKET_SIZE, sizeof(cl_uint)}, {CL_DEVICE_PLATFORM, sizeof(cl_platform_id)}, {CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT, sizeof(cl_uint)}, {CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, sizeof(cl_bool)}, {CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT, sizeof(cl_uint)}, {CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT, sizeof(cl_uint)}, {CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, sizeof(cl_uint)}, {CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, sizeof(cl_uint)}, {CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint)}, {CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof(cl_uint)}, {CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint)}, {CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint)}, {CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, sizeof(cl_uint)}, {CL_DEVICE_PRINTF_BUFFER_SIZE, sizeof(size_t)}, // {CL_DEVICE_PROFILE, sizeof(char[])}, {CL_DEVICE_PROFILING_TIMER_RESOLUTION, sizeof(size_t)}, {CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(cl_uint)}, {CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, sizeof(cl_uint)}, {CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, sizeof(cl_command_queue_properties)}, {CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, sizeof(cl_command_queue_properties)}, {CL_DEVICE_REFERENCE_COUNT, sizeof(cl_uint)}, {CL_DEVICE_SINGLE_FP_CONFIG, sizeof(cl_device_fp_config)}, // {CL_DEVICE_SPIR_VERSIONS, sizeof(char[])}, {CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, sizeof(cl_bool)}, {CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities)}, // {CL_DEVICE_TERMINATE_CAPABILITY_KHR, sizeof(cl_device_terminate_capability_khr)}, {CL_DEVICE_TYPE, sizeof(cl_device_type)}, // {CL_DEVICE_VENDOR, sizeof(char[])}, {CL_DEVICE_VENDOR_ID, sizeof(cl_uint)}, // {CL_DEVICE_VERSION, sizeof(char[])}, // {CL_DRIVER_VERSION, sizeof(char[])}, }; INSTANTIATE_TEST_CASE_P( Device_, GetDeviceInfoSize, testing::ValuesIn(deviceInfoParams2)); struct GetDeviceInfoForImage : public GetDeviceInfoSize {}; TEST_P(GetDeviceInfoForImage, imageInfoSizeIsValid) { auto device = std::make_unique(*MockDevice::createWithNewExecutionEnvironment(nullptr), platform()); if (!device->getSharedDeviceInfo().imageSupport) { GTEST_SKIP(); } size_t sizeReturned = 0; auto retVal = device->getDeviceInfo( param.first, 0, nullptr, &sizeReturned); if (CL_SUCCESS != retVal) { ASSERT_EQ(CL_SUCCESS, retVal) << " param = " << param.first; } ASSERT_NE(0u, sizeReturned); EXPECT_EQ(param.second, sizeReturned); } TEST_P(GetDeviceInfoForImage, whenImageAreNotSupportedThenClSuccessAndSizeofCluintIsReturned) { auto device = std::make_unique(*MockDevice::createWithNewExecutionEnvironment(nullptr), platform()); if (device->getSharedDeviceInfo().imageSupport) { GTEST_SKIP(); } size_t sizeReturned = 0; auto retVal = device->getDeviceInfo( param.first, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(param.second, sizeReturned); } TEST_P(GetDeviceInfoForImage, givenInfoImageParamsWhenCallGetDeviceInfoForImageThenSizeIsValidAndTrueReturned) { auto device = std::make_unique(*MockDevice::createWithNewExecutionEnvironment(nullptr), platform()); size_t srcSize = 0; size_t retSize = 0; const void *src = nullptr; auto retVal = device->getDeviceInfoForImage( param.first, src, srcSize, retSize); EXPECT_TRUE(retVal); ASSERT_NE(0u, srcSize); EXPECT_EQ(param.second, srcSize); EXPECT_EQ(param.second, retSize); } TEST(GetDeviceInfoForImage, givenNotImageParamWhenCallGetDeviceInfoForImageThenSizeIsNotValidAndFalseReturned) { auto device = std::make_unique(*MockDevice::createWithNewExecutionEnvironment(nullptr), platform()); size_t srcSize = 0; size_t retSize = 0; const void *src = nullptr; cl_device_info notImageParam = CL_DEVICE_ADDRESS_BITS; size_t paramSize = sizeof(cl_uint); auto retVal = device->getDeviceInfoForImage( notImageParam, src, srcSize, retSize); EXPECT_FALSE(retVal); EXPECT_EQ(0u, srcSize); EXPECT_NE(paramSize, srcSize); EXPECT_NE(paramSize, retSize); } std::pair deviceInfoImageParams[] = { {CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t)}, {CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t)}, {CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t)}, {CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t)}, {CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t)}, {CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, sizeof(cl_uint)}, {CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof(size_t)}, {CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, sizeof(size_t)}, {CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof(cl_uint)}, {CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(cl_uint)}, {CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, sizeof(cl_uint)}, {CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(cl_uint)}, }; INSTANTIATE_TEST_CASE_P( Device_, GetDeviceInfoForImage, testing::ValuesIn(deviceInfoImageParams)); TEST(DeviceInfoTests, givenDefaultDeviceWhenQueriedForDeviceVersionThenProperSizeIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); size_t sizeReturned = 0; auto retVal = device->getDeviceInfo( CL_DEVICE_VERSION, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(16u, sizeReturned); std::unique_ptr deviceVersion(new char[sizeReturned]); retVal = device->getDeviceInfo( CL_DEVICE_VERSION, sizeReturned, deviceVersion.get(), nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/device/get_device_info_tests.cpp000066400000000000000000000461041363734646600301350ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/cl_device/cl_device_info_map.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/device_info_fixture.h" #include "test.h" #include "gtest/gtest.h" #include using namespace NEO; TEST(GetDeviceInfo, GivenInvalidParamsWhenGettingDeviceInfoThenInvalidValueErrorIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto retVal = device->getDeviceInfo( 0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } HWCMDTEST_F(IGFX_GEN8_CORE, GetDeviceInfoMemCapabilitiesTest, GivenValidParametersWhenGetDeviceInfoIsCalledForBdwPlusThenClSuccessIsReturned) { std::vector params = { {CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, (CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL)}, {CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL, (CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL)}, {CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, (CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL)}, {CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, 0}, {CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL, 0}}; check(params); } TEST(GetDeviceInfo, GivenPlanarYuvExtensionDisabledAndSupportImageEnabledWhenGettingPlanarYuvMaxWidthHeightThenInvalidValueErrorIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->sharedDeviceInfo.imageSupport = true; device->deviceInfo.nv12Extension = false; uint32_t value; auto retVal = device->getDeviceInfo( CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL, 4, &value, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = device->getDeviceInfo( CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL, 4, &value, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST(GetDeviceInfo, GivenPlanarYuvExtensionEnabledAndSupportImageEnabledWhenGettingPlanarYuvMaxWidthHeightThenCorrectValuesAreReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->sharedDeviceInfo.imageSupport = true; device->deviceInfo.nv12Extension = true; size_t value = 0; auto retVal = device->getDeviceInfo( CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(16384u, value); retVal = device->getDeviceInfo( CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(16352u, value); } TEST(GetDeviceInfo, GivenPlanarYuvExtensionDisabledAndSupportImageDisabledWhenGettingPlanarYuvMaxWidthHeightThenInvalidValueErrorIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->sharedDeviceInfo.imageSupport = false; device->deviceInfo.nv12Extension = false; uint32_t value; auto retVal = device->getDeviceInfo( CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL, 4, &value, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = device->getDeviceInfo( CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL, 4, &value, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST(GetDeviceInfo, GivenPlanarYuvExtensionEnabledAndSupportImageDisabledWhenGettingPlanarYuvMaxWidthHeightThenZeroIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->sharedDeviceInfo.imageSupport = false; device->deviceInfo.nv12Extension = true; size_t value = 0; auto retVal = device->getDeviceInfo( CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); retVal = device->getDeviceInfo( CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); } TEST(GetDeviceInfo, GivenImageSupportDisabledWhenGettingImage2dMaxWidthHeightThenZeroIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->sharedDeviceInfo.imageSupport = false; size_t value = 0; auto retVal = device->getDeviceInfo( CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); retVal = device->getDeviceInfo( CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); } TEST(GetDeviceInfo, GivenImageSupportEnabledWhenGettingImage2dMaxWidthHeightThenCorrectValuesAreReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->sharedDeviceInfo.imageSupport = true; size_t value = 0; auto retVal = device->getDeviceInfo( CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = device->getDeviceInfo( CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(GetDeviceInfo, GivenImageSupportDisabledWhenGettingImage3dMaxWidthHeightDepthThenZeroIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); size_t value = 0; device->sharedDeviceInfo.imageSupport = false; auto retVal = device->getDeviceInfo( CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); retVal = device->getDeviceInfo( CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); retVal = device->getDeviceInfo( CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); } TEST(GetDeviceInfo, GivenImageSupportEnabledWhenGettingImage3dMaxWidthHeightDepthThenCorrectValuesAreReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->sharedDeviceInfo.imageSupport = true; size_t value = 0; auto retVal = device->getDeviceInfo( CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = device->getDeviceInfo( CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = device->getDeviceInfo( CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(GetDeviceInfo, GivenImageSupportDisabledWhenGettingImageMaxArgsThenZeroIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->sharedDeviceInfo.imageSupport = false; uint32_t value; auto retVal = device->getDeviceInfo( CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); retVal = device->getDeviceInfo( CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); retVal = device->getDeviceInfo( CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); } TEST(GetDeviceInfo, GivenImageSupportEnabledWhenGettingImageMaxArgsThenCorrectValuesAreReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->sharedDeviceInfo.imageSupport = true; size_t value = 0; auto retVal = device->getDeviceInfo( CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(cl_uint), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = device->getDeviceInfo( CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, sizeof(cl_uint), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = device->getDeviceInfo( CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(cl_uint), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(GetDeviceInfo, GivenImageSupportDisabledWhenGettingImageBaseAddressAlignmentThenZeroIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); size_t value = 0; device->sharedDeviceInfo.imageSupport = false; auto retVal = device->getDeviceInfo( CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, sizeof(cl_uint), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); } TEST(GetDeviceInfo, GivenImageSupportEnabledWhenGettingImageBaseAddressAlignmentThenCorrectValuesAreReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->sharedDeviceInfo.imageSupport = true; size_t value = 0; auto retVal = device->getDeviceInfo( CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, sizeof(cl_uint), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(GetDeviceInfo, GivenImageSupportDisabledWhenGettingImageMaxArraySizeThenZeroIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); size_t value = 0; device->sharedDeviceInfo.imageSupport = false; auto retVal = device->getDeviceInfo( CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); } TEST(GetDeviceInfo, GivenImageSupportEnabledWhenGettingImageMaxArraySizeThenCorrectValuesAreReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->sharedDeviceInfo.imageSupport = true; size_t value = 0; auto retVal = device->getDeviceInfo( CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(GetDeviceInfo, GivenImageSupportDisabledWhenGettingImageMaxBufferSizeThenZeroIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); size_t value = 0; device->sharedDeviceInfo.imageSupport = false; auto retVal = device->getDeviceInfo( CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); } TEST(GetDeviceInfo, GivenImageSupportEnabledWhenGettingImageMaxBufferSizeThenCorrectValuesAreReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->sharedDeviceInfo.imageSupport = true; size_t value = 0; auto retVal = device->getDeviceInfo( CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, sizeof(size_t), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(GetDeviceInfo, GivenImageSupportDisabledWhenGettingImagePitchAlignmentThenZeroIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); size_t value = 0; device->sharedDeviceInfo.imageSupport = false; auto retVal = device->getDeviceInfo( CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof(cl_uint), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, value); } TEST(GetDeviceInfo, GivenImageSupportEnabledWhenGettingImagePitchAlignmentThenCorrectValuesAreReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->sharedDeviceInfo.imageSupport = true; size_t value = 0; auto retVal = device->getDeviceInfo( CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof(cl_uint), &value, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(GetDeviceInfo, GivenNumSimultaneousInteropsWhenGettingDeviceInfoThenCorrectValueIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->simultaneousInterops = {0}; cl_uint value = 0; size_t size = 0; auto retVal = device->getDeviceInfo(CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL, sizeof(cl_uint), &value, &size); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(0u, size); EXPECT_EQ(0u, value); device->simultaneousInterops = {1, 2, 3, 0}; retVal = device->getDeviceInfo(CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL, sizeof(cl_uint), &value, &size); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_uint), size); EXPECT_EQ(1u, value); } TEST(GetDeviceInfo, GivenSimultaneousInteropsWhenGettingDeviceInfoThenCorrectValueIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); device->simultaneousInterops = {0}; cl_uint value[4] = {}; size_t size = 0; auto retVal = device->getDeviceInfo(CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL, sizeof(cl_uint), &value, &size); EXPECT_EQ(CL_INVALID_VALUE, retVal); device->simultaneousInterops = {1, 2, 3, 0}; retVal = device->getDeviceInfo(CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL, sizeof(cl_uint) * 4u, &value, &size); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_uint) * 4u, size); EXPECT_TRUE(memcmp(value, &device->simultaneousInterops[0], 4u * sizeof(cl_uint)) == 0); } TEST(GetDeviceInfo, GivenPreferredInteropsWhenGettingDeviceInfoThenCorrectValueIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); cl_bool value = 0; size_t size = 0; auto retVal = device->getDeviceInfo(CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, sizeof(cl_bool), &value, &size); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_bool), size); EXPECT_TRUE(value == 1u); } struct GetDeviceInfo : public ::testing::TestWithParam { void SetUp() override { param = GetParam(); } cl_device_info param; }; TEST_P(GetDeviceInfo, GivenValidParamsWhenGettingDeviceInfoThenSuccessIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); size_t sizeReturned = 0; auto retVal = device->getDeviceInfo( param, 0, nullptr, &sizeReturned); if (CL_SUCCESS != retVal) { ASSERT_EQ(CL_SUCCESS, retVal) << " param = " << param; } ASSERT_NE(0u, sizeReturned); auto *object = new char[sizeReturned]; retVal = device->getDeviceInfo( param, sizeReturned, object, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); delete[] object; } // Define new command types to run the parameterized tests cl_device_info deviceInfoParams[] = { CL_DEVICE_ADDRESS_BITS, CL_DEVICE_AVAILABLE, CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL, CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL, CL_DEVICE_AVC_ME_VERSION_INTEL, CL_DEVICE_BUILT_IN_KERNELS, CL_DEVICE_COMPILER_AVAILABLE, CL_DEVICE_IL_VERSION, // NOT_SUPPORTED // CL_DEVICE_TERMINATE_CAPABILITY_KHR, CL_DEVICE_DOUBLE_FP_CONFIG, CL_DEVICE_ENDIAN_LITTLE, CL_DEVICE_ERROR_CORRECTION_SUPPORT, CL_DEVICE_EXECUTION_CAPABILITIES, CL_DEVICE_EXTENSIONS, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, CL_DEVICE_GLOBAL_MEM_SIZE, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, CL_DEVICE_HALF_FP_CONFIG, CL_DEVICE_HOST_UNIFIED_MEMORY, CL_DEVICE_IMAGE_SUPPORT, CL_DEVICE_LINKER_AVAILABLE, CL_DEVICE_LOCAL_MEM_SIZE, CL_DEVICE_LOCAL_MEM_TYPE, CL_DEVICE_MAX_CLOCK_FREQUENCY, CL_DEVICE_MAX_COMPUTE_UNITS, CL_DEVICE_MAX_CONSTANT_ARGS, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, CL_DEVICE_MAX_MEM_ALLOC_SIZE, CL_DEVICE_MAX_NUM_SUB_GROUPS, CL_DEVICE_MAX_ON_DEVICE_EVENTS, CL_DEVICE_MAX_ON_DEVICE_QUEUES, CL_DEVICE_MAX_PARAMETER_SIZE, CL_DEVICE_MAX_PIPE_ARGS, CL_DEVICE_MAX_SAMPLERS, CL_DEVICE_MAX_WORK_GROUP_SIZE, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, CL_DEVICE_MAX_WORK_ITEM_SIZES, CL_DEVICE_MEM_BASE_ADDR_ALIGN, CL_DEVICE_ME_VERSION_INTEL, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, CL_DEVICE_NAME, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, CL_DEVICE_OPENCL_C_VERSION, CL_DEVICE_PARENT_DEVICE, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, CL_DEVICE_PARTITION_PROPERTIES, CL_DEVICE_PARTITION_TYPE, CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, CL_DEVICE_PIPE_MAX_PACKET_SIZE, CL_DEVICE_PLATFORM, CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT, CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, CL_DEVICE_PRINTF_BUFFER_SIZE, CL_DEVICE_PROFILE, CL_DEVICE_PROFILING_TIMER_RESOLUTION, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, CL_DEVICE_REFERENCE_COUNT, CL_DEVICE_SINGLE_FP_CONFIG, CL_DEVICE_SPIR_VERSIONS, CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, CL_DEVICE_SUB_GROUP_SIZES_INTEL, CL_DEVICE_SVM_CAPABILITIES, CL_DEVICE_TYPE, CL_DEVICE_VENDOR, CL_DEVICE_VENDOR_ID, CL_DEVICE_VERSION, CL_DRIVER_VERSION, }; INSTANTIATE_TEST_CASE_P( Device_, GetDeviceInfo, testing::ValuesIn(deviceInfoParams)); compute-runtime-20.13.16352/opencl/test/unit_test/device/gl/000077500000000000000000000000001363734646600234735ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/device/gl/CMakeLists.txt000066400000000000000000000004111363734646600262270ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(IGDRCL_SRCS_tests_device_gl ${CMAKE_CURRENT_SOURCE_DIR}/device_caps_gl_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_device_gl}) endif() compute-runtime-20.13.16352/opencl/test/unit_test/device/gl/device_caps_gl_tests.cpp000066400000000000000000000020711363734646600303500ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/hw_info.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" #include "gmock/gmock.h" #include using namespace NEO; TEST(Device_GetCaps, givenForceClGlSharingWhenCapsAreCreatedThenDeviceReportsClGlSharingExtension) { DebugManagerStateRestore dbgRestorer; { DebugManager.flags.AddClGlSharing.set(true); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_khr_gl_sharing "))); DebugManager.flags.AddClGlSharing.set(false); } } compute-runtime-20.13.16352/opencl/test/unit_test/device/sub_device_tests.cpp000066400000000000000000000230671363734646600271370ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/sub_device.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/helpers/ult_hw_config.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_platform.h" using namespace NEO; TEST(SubDevicesTest, givenDefaultConfigWhenCreateRootDeviceThenItDoesntContainSubDevices) { auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); EXPECT_EQ(0u, device->getNumSubDevices()); EXPECT_EQ(1u, device->getNumAvailableDevices()); } TEST(SubDevicesTest, givenCreateMultipleSubDevicesFlagSetWhenCreateRootDeviceThenItsSubdevicesHaveProperRootIdSet) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); EXPECT_EQ(2u, device->getNumSubDevices()); EXPECT_EQ(0u, device->getRootDeviceIndex()); EXPECT_EQ(0u, device->subdevices.at(0)->getRootDeviceIndex()); EXPECT_EQ(0u, device->subdevices.at(0)->getSubDeviceIndex()); EXPECT_EQ(0u, device->subdevices.at(1)->getRootDeviceIndex()); EXPECT_EQ(1u, device->subdevices.at(1)->getSubDeviceIndex()); } TEST(SubDevicesTest, givenCreateMultipleSubDevicesFlagSetWhenCreateRootDeviceThenItContainsSubDevices) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); EXPECT_EQ(2u, device->getNumSubDevices()); EXPECT_EQ(2u, device->getNumAvailableDevices()); EXPECT_EQ(1u, device->subdevices.at(0)->getNumAvailableDevices()); EXPECT_EQ(1u, device->subdevices.at(1)->getNumAvailableDevices()); } TEST(SubDevicesTest, givenDeviceWithSubDevicesWhenSubDeviceRefcountsAreChangedThenChangeIsPropagatedToRootDevice) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); initPlatform(); auto nonDefaultPlatform = std::make_unique(*platform()->peekExecutionEnvironment()); nonDefaultPlatform->initializeWithNewDevices(); auto device = nonDefaultPlatform->getClDevice(0); auto defaultDevice = platform()->getClDevice(0); auto subDevice = device->getDeviceById(1); auto baseDeviceApiRefCount = device->getRefApiCount(); auto baseDeviceInternalRefCount = device->getRefInternalCount(); auto baseSubDeviceApiRefCount = subDevice->getRefApiCount(); auto baseSubDeviceInternalRefCount = subDevice->getRefInternalCount(); auto baseDefaultDeviceApiRefCount = defaultDevice->getRefApiCount(); auto baseDefaultDeviceInternalRefCount = defaultDevice->getRefInternalCount(); subDevice->retainApi(); EXPECT_EQ(baseDeviceApiRefCount, device->getRefApiCount()); EXPECT_EQ(baseDeviceInternalRefCount + 1, device->getRefInternalCount()); EXPECT_EQ(baseSubDeviceApiRefCount + 1, subDevice->getRefApiCount()); EXPECT_EQ(baseSubDeviceInternalRefCount + 1, subDevice->getRefInternalCount()); EXPECT_EQ(baseDefaultDeviceApiRefCount, defaultDevice->getRefApiCount()); EXPECT_EQ(baseDefaultDeviceInternalRefCount, defaultDevice->getRefInternalCount()); subDevice->releaseApi(); EXPECT_EQ(baseDeviceApiRefCount, device->getRefApiCount()); EXPECT_EQ(baseDeviceInternalRefCount, device->getRefInternalCount()); EXPECT_EQ(baseSubDeviceApiRefCount, subDevice->getRefApiCount()); EXPECT_EQ(baseSubDeviceInternalRefCount, subDevice->getRefInternalCount()); EXPECT_EQ(baseDefaultDeviceApiRefCount, defaultDevice->getRefApiCount()); EXPECT_EQ(baseDefaultDeviceInternalRefCount, defaultDevice->getRefInternalCount()); } TEST(SubDevicesTest, givenDeviceWithSubDevicesWhenSubDeviceCreationFailThenWholeDeviceIsDestroyed) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(10); MockExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(1); executionEnvironment.incRefInternal(); executionEnvironment.memoryManager.reset(new FailMemoryManager(10, executionEnvironment)); auto device = Device::create(&executionEnvironment, 0u); EXPECT_EQ(nullptr, device); } TEST(SubDevicesTest, givenCreateMultipleRootDevicesFlagsEnabledWhenDevicesAreCreatedThenEachHasUniqueDeviceIndex) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleRootDevices.set(2); VariableBackup backup{&ultHwConfig}; ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; initPlatform(); EXPECT_EQ(0u, platform()->getClDevice(0)->getRootDeviceIndex()); EXPECT_EQ(1u, platform()->getClDevice(1)->getRootDeviceIndex()); } TEST(SubDevicesTest, givenRootDeviceWithSubDevicesWhenOsContextIsCreatedThenItsBitfieldBasesOnSubDevicesCount) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); EXPECT_EQ(2u, device->getNumSubDevices()); uint32_t rootDeviceBitfield = 0b11; EXPECT_EQ(rootDeviceBitfield, static_cast(device->getDefaultEngine().osContext->getDeviceBitfield().to_ulong())); } TEST(SubDevicesTest, givenSubDeviceWhenOsContextIsCreatedThenItsBitfieldBasesOnSubDeviceId) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); EXPECT_EQ(2u, device->getNumSubDevices()); auto firstSubDevice = static_cast(device->subdevices.at(0)); auto secondSubDevice = static_cast(device->subdevices.at(1)); uint32_t firstSubDeviceMask = (1u << 0); uint32_t secondSubDeviceMask = (1u << 1); EXPECT_EQ(firstSubDeviceMask, static_cast(firstSubDevice->getDefaultEngine().osContext->getDeviceBitfield().to_ulong())); EXPECT_EQ(secondSubDeviceMask, static_cast(secondSubDevice->getDefaultEngine().osContext->getDeviceBitfield().to_ulong())); } TEST(SubDevicesTest, givenDeviceWithoutSubDevicesWhenGettingDeviceByIdZeroThenGetThisDevice) { auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); EXPECT_EQ(1u, device->getNumAvailableDevices()); EXPECT_EQ(device.get(), device->getDeviceById(0u)); EXPECT_THROW(device->getDeviceById(1), std::exception); } TEST(SubDevicesTest, givenDeviceWithSubDevicesWhenGettingDeviceByIdThenGetCorrectSubDevice) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); EXPECT_EQ(2u, device->getNumSubDevices()); EXPECT_EQ(device->subdevices.at(0), device->getDeviceById(0)); EXPECT_EQ(device->subdevices.at(1), device->getDeviceById(1)); EXPECT_THROW(device->getDeviceById(2), std::exception); } TEST(SubDevicesTest, givenSubDevicesWhenGettingDeviceByIdZeroThenGetThisSubDevice) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); EXPECT_EQ(2u, device->getNumSubDevices()); auto subDevice = device->subdevices.at(0); EXPECT_EQ(subDevice, subDevice->getDeviceById(0)); EXPECT_THROW(subDevice->getDeviceById(1), std::exception); } TEST(RootDevicesTest, givenRootDeviceWithoutSubdevicesWhenCreateEnginesThenDeviceCreatesCorrectNumberOfEngines) { auto hwInfo = *defaultHwInfo; auto &gpgpuEngines = HwHelper::get(hwInfo.platform.eRenderCoreFamily).getGpgpuEngineInstances(hwInfo); auto executionEnvironment = new MockExecutionEnvironment; MockDevice device(executionEnvironment, 0); EXPECT_EQ(0u, device.engines.size()); device.createEngines(); EXPECT_EQ(gpgpuEngines.size(), device.engines.size()); } TEST(RootDevicesTest, givenRootDeviceWithSubdevicesWhenCreateEnginesThenDeviceCreatesSpecialEngine) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(2); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); auto executionEnvironment = new MockExecutionEnvironment; MockDevice device(executionEnvironment, 0); device.subdevices.resize(2u); EXPECT_EQ(0u, device.engines.size()); device.createEngines(); EXPECT_EQ(1u, device.engines.size()); } compute-runtime-20.13.16352/opencl/test/unit_test/device_queue/000077500000000000000000000000001363734646600242755ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/device_queue/CMakeLists.txt000066400000000000000000000006261363734646600270410ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_device_queue ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device_queue_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/get_device_queue_info_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_device_queue}) compute-runtime-20.13.16352/opencl/test/unit_test/device_queue/device_queue_hw_tests.cpp000066400000000000000000001104671363734646600313750ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_cmds.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" #include "opencl/test/unit_test/fixtures/execution_model_fixture.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_device_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include using namespace NEO; using namespace DeviceHostQueue; HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, WhenResettingDeviceQueueThenQueueMatchesUnderlyingBuffer) { // profiling disabled deviceQueue = createQueueObject(); ASSERT_NE(deviceQueue, nullptr); auto deviceQueueHw = castToHwType(deviceQueue); auto expected = getExpectedgilCmdQueueAfterReset(deviceQueue); deviceQueueHw->resetDeviceQueue(); EXPECT_EQ(0, memcmp(deviceQueueHw->getQueueBuffer()->getUnderlyingBuffer(), &expected, sizeof(IGIL_CommandQueue))); delete deviceQueue; //profiling enabled deviceQueue = createQueueObject(deviceQueueProperties::minimumPropertiesWithProfiling); ASSERT_NE(deviceQueue, nullptr); deviceQueueHw = castToHwType(deviceQueue); expected = getExpectedgilCmdQueueAfterReset(deviceQueue); deviceQueueHw->resetDeviceQueue(); EXPECT_EQ(1u, expected.m_controls.m_IsProfilingEnabled); EXPECT_EQ(0, memcmp(deviceQueue->getQueueBuffer()->getUnderlyingBuffer(), &expected, sizeof(IGIL_CommandQueue))); delete deviceQueue; } HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, WhenResettingDeviceQueueThenFirstStackElementAtValueOne) { deviceQueue = createQueueObject(); ASSERT_NE(deviceQueue, nullptr); auto deviceQueueHw = castToHwType(deviceQueue); deviceQueueHw->resetDeviceQueue(); auto stack = static_cast(deviceQueue->getStackBuffer()->getUnderlyingBuffer()); stack += ((deviceQueue->getStackBuffer()->getUnderlyingBufferSize() / sizeof(uint32_t)) - 1); EXPECT_EQ(*stack, 1u); // first stack element in surface at value "1" delete deviceQueue; } HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, GivenNullHardwareIsEnabledWhenAcquiringEmCrticalSectionThenSectionIsNotAcquired) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableNullHardware.set(1); deviceQueue = createQueueObject(); ASSERT_NE(deviceQueue, nullptr); auto deviceQueueHw = castToHwType(deviceQueue); deviceQueueHw->acquireEMCriticalSection(); EXPECT_TRUE(deviceQueueHw->isEMCriticalSectionFree()); delete deviceQueue; } HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, WhenGettinCsPrefetchSizeThenSizeIsGreaterThanZero) { auto mockDeviceQueueHw = new MockDeviceQueueHw(pContext, device, deviceQueueProperties::minimumProperties[0]); EXPECT_NE(0u, mockDeviceQueueHw->getCSPrefetchSize()); delete mockDeviceQueueHw; } HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, GivenAddLriCmdWithArbCheckWhenGettingSlbCsThenParamsAreCorrect) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; auto mockDeviceQueueHw = new MockDeviceQueueHw(pContext, device, deviceQueueProperties::minimumProperties[0]); mockDeviceQueueHw->addLriCmd(true); HardwareParse hwParser; auto *slbCS = mockDeviceQueueHw->getSlbCS(); hwParser.parseCommands(*slbCS, 0); auto loadRegImmItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), loadRegImmItor); MI_LOAD_REGISTER_IMM *loadRegImm = (MI_LOAD_REGISTER_IMM *)*loadRegImmItor; EXPECT_EQ(0x2248u, loadRegImm->getRegisterOffset()); EXPECT_EQ(0x100u, loadRegImm->getDataDword()); EXPECT_EQ(sizeof(MI_LOAD_REGISTER_IMM), slbCS->getUsed()); delete mockDeviceQueueHw; } HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, GivenAddLriCmdWithoutArbCheckWhenGettingSlbCsThenParamsAreCorrect) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; auto mockDeviceQueueHw = new MockDeviceQueueHw(pContext, device, deviceQueueProperties::minimumProperties[0]); mockDeviceQueueHw->addLriCmd(false); HardwareParse hwParser; auto *slbCS = mockDeviceQueueHw->getSlbCS(); hwParser.parseCommands(*slbCS, 0); auto loadRegImmItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), loadRegImmItor); MI_LOAD_REGISTER_IMM *loadRegImm = (MI_LOAD_REGISTER_IMM *)*loadRegImmItor; EXPECT_EQ(0x2248u, loadRegImm->getRegisterOffset()); EXPECT_EQ(0u, loadRegImm->getDataDword()); EXPECT_EQ(sizeof(MI_LOAD_REGISTER_IMM), slbCS->getUsed()); delete mockDeviceQueueHw; } HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, GivenDeviceQueueHWWhenEventPoolIsCreatedThenTimestampResolutionIsSet) { auto timestampResolution = static_cast(device->getProfilingTimerResolution()); auto deviceQueue = std::unique_ptr(createQueueObject()); ASSERT_NE(deviceQueue, nullptr); auto eventPoolBuffer = reinterpret_cast(deviceQueue->getEventPoolBuffer()->getUnderlyingBuffer()); EXPECT_FLOAT_EQ(timestampResolution, eventPoolBuffer->m_TimestampResolution); } class DeviceQueueSlb : public DeviceQueueHwTest { public: template void *compareCmds(void *position, Cmd &cmd) { EXPECT_EQ(0, memcmp(position, &cmd, sizeof(Cmd))); return ptrOffset(position, sizeof(Cmd)); } void *compareCmdsWithSize(void *position, void *cmd, size_t size) { EXPECT_EQ(0, memcmp(position, cmd, size)); return ptrOffset(position, size); } }; HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSlb, WhenAllocatingSlbBufferThenCorrectSizeIsAllocated) { std::unique_ptr> mockDeviceQueueHw(new MockDeviceQueueHw(pContext, device, deviceQueueProperties::minimumProperties[0])); LinearStream *slbCS = mockDeviceQueueHw->getSlbCS(); size_t expectedSize = (mockDeviceQueueHw->getMinimumSlbSize() + mockDeviceQueueHw->getWaCommandsSize()) * 128; expectedSize += sizeof(typename FamilyType::MI_BATCH_BUFFER_START); expectedSize = alignUp(expectedSize, MemoryConstants::pageSize); expectedSize += MockDeviceQueueHw::getExecutionModelCleanupSectionSize(); expectedSize += (4 * MemoryConstants::pageSize); EXPECT_LE(expectedSize, slbCS->getAvailableSpace()); } HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSlb, WhenBuildingSlbAfterResetThenCmdsAreCorrect) { auto mockDeviceQueueHw = new MockDeviceQueueHw(pContext, device, deviceQueueProperties::minimumProperties[0]); auto mockDeviceQueueHwWithProfiling = new MockDeviceQueueHw(pContext, device, deviceQueueProperties::minimumPropertiesWithProfiling[0]); LinearStream *slbCS = mockDeviceQueueHw->getSlbCS(); auto expectedSize = (mockDeviceQueueHw->getMinimumSlbSize() + mockDeviceQueueHw->getWaCommandsSize()) * 128; expectedSize += sizeof(typename FamilyType::MI_BATCH_BUFFER_START); mockDeviceQueueHw->resetDeviceQueue(); mockDeviceQueueHwWithProfiling->resetDeviceQueue(); EXPECT_EQ(slbCS->getUsed(), expectedSize); EXPECT_EQ(mockDeviceQueueHwWithProfiling->getSlbCS()->getUsed(), expectedSize); auto cmds = mockDeviceQueueHw->expectedCmds; auto cmdsWithProfiling = mockDeviceQueueHwWithProfiling->expectedCmds; void *currCmd = slbCS->getCpuBase(); void *currCmdWithProfiling = mockDeviceQueueHwWithProfiling->getSlbCS()->getCpuBase(); for (size_t i = 0; i < 128; i++) { currCmd = compareCmds(currCmd, cmds.mediaStateFlush); currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.mediaStateFlush); if (mockDeviceQueueHw->arbCheckWa) { currCmd = compareCmds(currCmd, cmds.arbCheck); currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.arbCheck); } if (mockDeviceQueueHw->miAtomicWa) { currCmd = compareCmds(currCmd, cmds.miAtomic); currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.miAtomic); } currCmd = compareCmds(currCmd, cmds.mediaIdLoad); currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.mediaIdLoad); if (mockDeviceQueueHw->lriWa) { currCmd = compareCmds(currCmd, cmds.lriTrue); currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.lriTrue); } currCmd = compareCmds(currCmd, cmds.noopedPipeControl); // noop pipe control currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.pipeControl); if (mockDeviceQueueHw->pipeControlWa) { currCmd = compareCmds(currCmd, cmds.noopedPipeControl); // noop pipe control currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.pipeControl); } currCmd = compareCmds(currCmd, cmds.gpgpuWalker); currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.gpgpuWalker); currCmd = compareCmds(currCmd, cmds.mediaStateFlush); currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.mediaStateFlush); if (mockDeviceQueueHw->arbCheckWa) { currCmd = compareCmds(currCmd, cmds.arbCheck); currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.arbCheck); } currCmd = compareCmds(currCmd, cmds.pipeControl); currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.pipeControl); if (mockDeviceQueueHw->pipeControlWa) { currCmd = compareCmds(currCmd, cmds.pipeControl); currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.pipeControl); } if (mockDeviceQueueHw->lriWa) { currCmd = compareCmds(currCmd, cmds.lriFalse); currCmdWithProfiling = compareCmds(currCmdWithProfiling, cmdsWithProfiling.lriFalse); } currCmd = compareCmdsWithSize(currCmd, cmds.prefetch, DeviceQueueHw::getCSPrefetchSize()); currCmdWithProfiling = compareCmdsWithSize(currCmdWithProfiling, cmdsWithProfiling.prefetch, DeviceQueueHw::getCSPrefetchSize()); } currCmd = compareCmds(currCmd, cmds.bbStart); currCmdWithProfiling = compareCmds(currCmdWithProfiling, mockDeviceQueueHwWithProfiling->expectedCmds.bbStart); delete mockDeviceQueueHw; delete mockDeviceQueueHwWithProfiling; } HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSlb, WhenBuildingSlbThenOffsetIsCorrect) { auto mockDeviceQueueHw = new MockDeviceQueueHw(pContext, device, deviceQueueProperties::minimumProperties[0]); auto slb = mockDeviceQueueHw->getSlbBuffer(); auto commandsSize = mockDeviceQueueHw->getMinimumSlbSize() + mockDeviceQueueHw->getWaCommandsSize(); auto slbCopy = malloc(slb->getUnderlyingBufferSize()); memset(slb->getUnderlyingBuffer(), 0xFE, slb->getUnderlyingBufferSize()); memcpy(slbCopy, slb->getUnderlyingBuffer(), slb->getUnderlyingBufferSize()); auto igilCmdQueue = reinterpret_cast(mockDeviceQueueHw->getQueueBuffer()->getUnderlyingBuffer()); // slbEndOffset < commandsSize * 128 // always fill only 1 enqueue (after offset) auto offset = static_cast(commandsSize) * 50; igilCmdQueue->m_controls.m_SLBENDoffsetInBytes = offset; mockDeviceQueueHw->resetDeviceQueue(); EXPECT_EQ(0, memcmp(slb->getUnderlyingBuffer(), slbCopy, offset)); // dont touch memory before offset EXPECT_NE(0, memcmp(ptrOffset(slb->getUnderlyingBuffer(), offset), slbCopy, commandsSize)); // change 1 enqueue EXPECT_EQ(0, memcmp(ptrOffset(slb->getUnderlyingBuffer(), offset + commandsSize), slbCopy, offset)); // dont touch memory after (offset + 1 enqueue) compareCmds(ptrOffset(slb->getUnderlyingBuffer(), commandsSize * 128), mockDeviceQueueHw->expectedCmds.bbStart); // bbStart always on the same place // slbEndOffset == commandsSize * 128 // dont fill commands memset(slb->getUnderlyingBuffer(), 0xFEFEFEFE, slb->getUnderlyingBufferSize()); offset = static_cast(commandsSize) * 128; igilCmdQueue->m_controls.m_SLBENDoffsetInBytes = static_cast(commandsSize); mockDeviceQueueHw->resetDeviceQueue(); EXPECT_EQ(0, memcmp(slb->getUnderlyingBuffer(), slbCopy, commandsSize * 128)); // dont touch memory for enqueues compareCmds(ptrOffset(slb->getUnderlyingBuffer(), commandsSize * 128), mockDeviceQueueHw->expectedCmds.bbStart); // bbStart always in the same place delete mockDeviceQueueHw; free(slbCopy); } HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSlb, WhenBuildingSlbThenCleanupSectionIsCorrect) { using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto mockDeviceQueueHw = new MockDeviceQueueHw(pContext, device, deviceQueueProperties::minimumProperties[0]); auto commandsSize = mockDeviceQueueHw->getMinimumSlbSize() + mockDeviceQueueHw->getWaCommandsSize(); auto igilCmdQueue = reinterpret_cast(mockDeviceQueueHw->getQueueBuffer()->getUnderlyingBuffer()); MockParentKernel *mockParentKernel = MockParentKernel::create(*pContext); uint32_t taskCount = 7; mockDeviceQueueHw->buildSlbDummyCommands(); uint64_t tagAddress = 0x123450000; mockDeviceQueueHw->addExecutionModelCleanUpSection(mockParentKernel, nullptr, tagAddress, taskCount); HardwareParse hwParser; auto *slbCS = mockDeviceQueueHw->getSlbCS(); size_t cleanupSectionOffset = alignUp(mockDeviceQueueHw->numberOfDeviceEnqueues * commandsSize + sizeof(MI_BATCH_BUFFER_START), MemoryConstants::pageSize); size_t cleanupSectionOffsetToParse = cleanupSectionOffset; size_t slbUsed = slbCS->getUsed(); slbUsed = alignUp(slbUsed, MemoryConstants::pageSize); size_t slbMax = slbCS->getMaxAvailableSpace(); // 4 pages padding expected after cleanup section EXPECT_LE(4 * MemoryConstants::pageSize, slbMax - slbUsed); if (mockParentKernel->getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) { cleanupSectionOffsetToParse += GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(mockParentKernel) / 2; } hwParser.parseCommands(*slbCS, cleanupSectionOffsetToParse); hwParser.findHardwareCommands(); uint64_t cleanUpSectionAddress = mockDeviceQueueHw->getSlbBuffer()->getGpuAddress() + cleanupSectionOffset; EXPECT_EQ(cleanUpSectionAddress, igilCmdQueue->m_controls.m_CleanupSectionAddress); EXPECT_EQ(slbCS->getUsed() - cleanupSectionOffset, igilCmdQueue->m_controls.m_CleanupSectionSize); auto pipeControlItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_NE(hwParser.cmdList.end(), pipeControlItor); bool tagWriteFound = false; while (auto pipeControlCmd = genCmdCast(*(++pipeControlItor))) { if (pipeControlCmd->getPostSyncOperation() == PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { auto expectedAddressLow = static_cast(tagAddress & 0x0000FFFFFFFFULL); auto expectedAddressHigh = static_cast(tagAddress >> 32); if ((expectedAddressLow == pipeControlCmd->getAddress()) && (expectedAddressHigh == pipeControlCmd->getAddressHigh())) { tagWriteFound = true; break; } } } EXPECT_TRUE(tagWriteFound); auto bbEndItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_NE(hwParser.cmdList.end(), bbEndItor); MI_BATCH_BUFFER_END *bbEnd = (MI_BATCH_BUFFER_END *)*bbEndItor; uint64_t bbEndAddres = (uint64_t)bbEnd; EXPECT_LE((uint64_t)mockDeviceQueueHw->getSlbBuffer()->getUnderlyingBuffer() + cleanupSectionOffset, bbEndAddres); delete mockParentKernel; delete mockDeviceQueueHw; } HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSlb, GivenProfilingWhenBuildingSlbThenEmCleanupSectionIsAdded) { using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; auto mockDeviceQueueHw = new MockDeviceQueueHw(pContext, device, deviceQueueProperties::minimumProperties[0]); auto commandsSize = mockDeviceQueueHw->getMinimumSlbSize() + mockDeviceQueueHw->getWaCommandsSize(); auto igilCmdQueue = reinterpret_cast(mockDeviceQueueHw->getQueueBuffer()->getUnderlyingBuffer()); MockParentKernel *mockParentKernel = MockParentKernel::create(*pContext); uint32_t taskCount = 7; auto hwTimeStamp = pCommandQueue->getGpgpuCommandStreamReceiver().getEventTsAllocator()->getTag(); mockDeviceQueueHw->buildSlbDummyCommands(); mockDeviceQueueHw->addExecutionModelCleanUpSection(mockParentKernel, hwTimeStamp, 0x123, taskCount); uint64_t eventTimestampAddr = igilCmdQueue->m_controls.m_EventTimestampAddress; uint64_t contextCompleteAddr = hwTimeStamp->getGpuAddress() + offsetof(HwTimeStamps, ContextCompleteTS); EXPECT_EQ(contextCompleteAddr, eventTimestampAddr); HardwareParse hwParser; auto *slbCS = mockDeviceQueueHw->getSlbCS(); size_t cleanupSectionOffset = alignUp(mockDeviceQueueHw->numberOfDeviceEnqueues * commandsSize + sizeof(MI_BATCH_BUFFER_START), MemoryConstants::pageSize); size_t cleanupSectionOffsetToParse = cleanupSectionOffset; hwParser.parseCommands(*slbCS, cleanupSectionOffsetToParse); hwParser.findHardwareCommands(); uint64_t cleanUpSectionAddress = mockDeviceQueueHw->getSlbBuffer()->getGpuAddress() + cleanupSectionOffset; EXPECT_EQ(cleanUpSectionAddress, igilCmdQueue->m_controls.m_CleanupSectionAddress); EXPECT_EQ(slbCS->getUsed() - cleanupSectionOffset, igilCmdQueue->m_controls.m_CleanupSectionSize); auto pipeControlItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); if (mockParentKernel->getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages && GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(mockParentKernel) > 0) { auto loadRegImmItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_NE(hwParser.cmdList.end(), loadRegImmItor); pipeControlItor = find(loadRegImmItor, hwParser.cmdList.end()); pipeControlItor++; } EXPECT_NE(hwParser.cmdList.end(), pipeControlItor); PIPE_CONTROL *pipeControl = (PIPE_CONTROL *)*pipeControlItor; EXPECT_NE(0u, pipeControl->getCommandStreamerStallEnable()); auto loadRegImmItor = find(pipeControlItor, hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), loadRegImmItor); MI_LOAD_REGISTER_IMM *loadRegImm = (MI_LOAD_REGISTER_IMM *)*loadRegImmItor; EXPECT_EQ(0x2248u, loadRegImm->getRegisterOffset()); EXPECT_EQ(0u, loadRegImm->getDataDword()); pipeControlItor++; EXPECT_NE(hwParser.cmdList.end(), pipeControlItor); auto bbEndItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_NE(hwParser.cmdList.end(), bbEndItor); MI_BATCH_BUFFER_END *bbEnd = (MI_BATCH_BUFFER_END *)*bbEndItor; uint64_t bbEndAddres = (uint64_t)bbEnd; EXPECT_LE((uint64_t)mockDeviceQueueHw->getSlbBuffer()->getUnderlyingBuffer() + cleanupSectionOffset, bbEndAddres); delete mockParentKernel; delete mockDeviceQueueHw; } HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, WhenCreatingDeviceQueueThenDshBufferParamsAreCorrect) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; deviceQueue = createQueueObject(); ASSERT_NE(deviceQueue, nullptr); auto *devQueueHw = castToObject>(deviceQueue); auto heap = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); ASSERT_NE(nullptr, heap); auto dshBuffer = deviceQueue->getDshBuffer()->getUnderlyingBuffer(); auto dshBufferSize = deviceQueue->getDshBuffer()->getUnderlyingBufferSize(); auto size = heap->getAvailableSpace(); auto heapMemory = heap->getCpuBase(); // ExecutionModel DSH is offseted by colorCalcState, ParentKernel Interface Descriptor Data is located in first table just after colorCalcState EXPECT_EQ(dshBufferSize - DeviceQueue::colorCalcStateSize, size); EXPECT_EQ(dshBuffer, heapMemory); EXPECT_EQ(ptrOffset(dshBuffer, DeviceQueue::colorCalcStateSize), heap->getSpace(0)); delete deviceQueue; } HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, WhenCreatingDeviceQueueThenDshOffsetIsCorrect) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; deviceQueue = createQueueObject(); ASSERT_NE(deviceQueue, nullptr); auto *devQueueHw = castToObject>(deviceQueue); size_t offsetDsh = sizeof(INTERFACE_DESCRIPTOR_DATA) * DeviceQueue::interfaceDescriptorEntries * DeviceQueue::numberOfIDTables + DeviceQueue::colorCalcStateSize; EXPECT_EQ(devQueueHw->getDshOffset(), offsetDsh); delete deviceQueue; } class DeviceQueueHwWithKernel : public ExecutionModelKernelFixture { public: void SetUp() override { ExecutionModelKernelFixture::SetUp(); cl_queue_properties properties[5] = { CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0, 0, 0}; cl_int errcodeRet = 0; clDevice = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())}; device = &clDevice->getDevice(); context = new MockContext(); ASSERT_NE(nullptr, context); devQueue = DeviceQueue::create(context, clDevice, *properties, errcodeRet); ASSERT_NE(nullptr, devQueue); } void TearDown() override { delete devQueue; delete context; delete clDevice; ExecutionModelKernelFixture::TearDown(); } Device *device; ClDevice *clDevice; DeviceQueue *devQueue; MockContext *context; }; HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSetiingIUpIndirectStateThenDshIsNotUsed) { if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { EXPECT_TRUE(pKernel->isParentKernel); pKernel->createReflectionSurface(); auto *devQueueHw = castToObject>(devQueue); ASSERT_NE(nullptr, devQueueHw); auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); ASSERT_NE(nullptr, dsh); size_t surfaceStateHeapSize = HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel)); auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize); auto usedBeforeSSH = ssh->getUsed(); auto usedBeforeDSH = dsh->getUsed(); devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, 1, false); auto usedAfterSSH = ssh->getUsed(); auto usedAfterDSH = dsh->getUsed(); EXPECT_GE(surfaceStateHeapSize, usedAfterSSH - usedBeforeSSH); EXPECT_EQ(0u, usedAfterDSH - usedBeforeDSH); alignedFree(ssh->getCpuBase()); delete ssh; } } HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSettingUpIndirectStateThenCorrectStartBlockIdIsSet) { if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { EXPECT_TRUE(pKernel->isParentKernel); pKernel->createReflectionSurface(); auto *devQueueHw = castToObject>(devQueue); ASSERT_NE(nullptr, devQueueHw); auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); ASSERT_NE(nullptr, dsh); size_t surfaceStateHeapSize = HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel)); auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize); uint32_t parentCount = 4; devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false); auto *igilQueue = reinterpret_cast(devQueueHw->getQueueBuffer()->getUnderlyingBuffer()); EXPECT_EQ(parentCount, igilQueue->m_controls.m_StartBlockID); alignedFree(ssh->getCpuBase()); delete ssh; } } HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSettingUpIndirectStateThenDshValuesAreSetCorrectly) { using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { EXPECT_TRUE(pKernel->isParentKernel); pKernel->createReflectionSurface(); MockContext mockContext; MockDeviceQueueHw *devQueueHw = new MockDeviceQueueHw(&mockContext, clDevice, deviceQueueProperties::minimumProperties[0]); ASSERT_NE(nullptr, devQueueHw); auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); ASSERT_NE(nullptr, dsh); size_t surfaceStateHeapSize = HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel)); auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize); uint32_t parentCount = 1; devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false); auto *igilQueue = reinterpret_cast(devQueueHw->getQueueBuffer()->getUnderlyingBuffer()); EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapStart, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE)); EXPECT_EQ(igilQueue->m_controls.m_DynamicHeapSizeInBytes, (uint32_t)devQueueHw->getDshBuffer()->getUnderlyingBufferSize()); EXPECT_EQ(igilQueue->m_controls.m_CurrentDSHoffset, devQueueHw->offsetDsh + alignUp((uint32_t)pKernel->getDynamicStateHeapSize(), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE)); EXPECT_EQ(igilQueue->m_controls.m_ParentDSHOffset, devQueueHw->offsetDsh); alignedFree(ssh->getCpuBase()); delete ssh; delete devQueueHw; } } HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, GivenHasBarriersSetWhenCallingSetupIndirectStateThenAllIddHaveBarriersEnabled) { using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { pKernel->createReflectionSurface(); MockContext mockContext; auto devQueueHw = std::make_unique>(&mockContext, clDevice, deviceQueueProperties::minimumProperties[0]); auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); uint32_t parentCount = 1; auto blockManager = pKernel->getProgram()->getBlockKernelManager(); auto iddCount = blockManager->getCount(); for (uint32_t i = 0; i < iddCount; i++) { ((SPatchExecutionEnvironment *)blockManager->getBlockKernelInfo(i)->patchInfo.executionEnvironment)->HasBarriers = 1u; } auto surfaceStateHeapSize = HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel)); auto ssh = std::make_unique(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize); devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false); auto iddStartPtr = static_cast(ptrOffset(dsh->getCpuBase(), devQueueHw->colorCalcStateSize)); auto iddStartIndex = parentCount; for (uint32_t i = 0; i < iddCount; i++) { EXPECT_TRUE(iddStartPtr[iddStartIndex + i].getBarrierEnable()); } alignedFree(ssh->getCpuBase()); } } static const char *binaryFile = "simple_block_kernel"; static const char *KernelNames[] = {"kernel_reflection", "simple_block_kernel"}; INSTANTIATE_TEST_CASE_P(DeviceQueueHwWithKernel, DeviceQueueHwWithKernel, ::testing::Combine( ::testing::Values(binaryFile), ::testing::ValuesIn(KernelNames))); typedef testing::Test TheSimplestDeviceQueueFixture; HWCMDTEST_F(IGFX_GEN8_CORE, TheSimplestDeviceQueueFixture, WhenResettingDeviceQueueThenEarlyReturnValuesAreSet) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.SchedulerSimulationReturnInstance.set(3); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockContext context; std::unique_ptr> mockDeviceQueueHw(new MockDeviceQueueHw(&context, device.get(), deviceQueueProperties::minimumProperties[0])); mockDeviceQueueHw->resetDeviceQueue(); EXPECT_EQ(3u, mockDeviceQueueHw->getIgilQueue()->m_controls.m_SchedulerEarlyReturn); EXPECT_EQ(0u, mockDeviceQueueHw->getIgilQueue()->m_controls.m_SchedulerEarlyReturnCounter); } HWCMDTEST_F(IGFX_GEN8_CORE, TheSimplestDeviceQueueFixture, WhenAddihMediaStateClearCmdsThenCmdsAreAddedCorrectly) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockContext context; std::unique_ptr> mockDeviceQueueHw(new MockDeviceQueueHw(&context, device.get(), deviceQueueProperties::minimumProperties[0])); HardwareParse hwParser; auto *slbCS = mockDeviceQueueHw->getSlbCS(); mockDeviceQueueHw->addMediaStateClearCmds(); hwParser.parseCommands(*slbCS, 0); hwParser.findHardwareCommands(); auto pipeControlItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_NE(hwParser.cmdList.end(), pipeControlItor); if (mockDeviceQueueHw->pipeControlWa) { pipeControlItor++; EXPECT_NE(hwParser.cmdList.end(), pipeControlItor); } PIPE_CONTROL *pipeControl = (PIPE_CONTROL *)*pipeControlItor; EXPECT_TRUE(pipeControl->getGenericMediaStateClear()); auto mediaVfeStateItor = find(pipeControlItor, hwParser.cmdList.end()); EXPECT_NE(hwParser.cmdList.end(), mediaVfeStateItor); } HWCMDTEST_F(IGFX_GEN8_CORE, TheSimplestDeviceQueueFixture, WhenAddingExecutionModelCleanupSectionThenMediaStateIsCleared) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE; class MockDeviceQueueWithMediaStateClearRegistering : public MockDeviceQueueHw { public: MockDeviceQueueWithMediaStateClearRegistering(Context *context, ClDevice *device, cl_queue_properties &properties) : MockDeviceQueueHw(context, device, properties) { } bool addMediaStateClearCmdsCalled = false; void addMediaStateClearCmds() override { addMediaStateClearCmdsCalled = true; } }; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockContext context(device.get()); std::unique_ptr mockDeviceQueueHw(new MockDeviceQueueWithMediaStateClearRegistering(&context, device.get(), deviceQueueProperties::minimumProperties[0])); std::unique_ptr mockParentKernel(MockParentKernel::create(context)); uint32_t taskCount = 7; mockDeviceQueueHw->buildSlbDummyCommands(); EXPECT_FALSE(mockDeviceQueueHw->addMediaStateClearCmdsCalled); mockDeviceQueueHw->addExecutionModelCleanUpSection(mockParentKernel.get(), nullptr, 0x123, taskCount); EXPECT_TRUE(mockDeviceQueueHw->addMediaStateClearCmdsCalled); } HWCMDTEST_F(IGFX_GEN8_CORE, TheSimplestDeviceQueueFixture, WhenSettingMediaStateClearThenCmdsSizeIsCorrect) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockContext context; std::unique_ptr> mockDeviceQueueHw(new MockDeviceQueueHw(&context, device.get(), deviceQueueProperties::minimumProperties[0])); size_t expectedSize = 2 * sizeof(PIPE_CONTROL) + sizeof(PIPE_CONTROL) + sizeof(MEDIA_VFE_STATE); EXPECT_EQ(expectedSize, MockDeviceQueueHw::getMediaStateClearCmdsSize()); } HWCMDTEST_F(IGFX_GEN8_CORE, TheSimplestDeviceQueueFixture, WhenSettingExecutionModelCleanupThenSectionSizeIsCorrect) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE; using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG; using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; using MI_MATH = typename FamilyType::MI_MATH; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockContext context; std::unique_ptr> mockDeviceQueueHw(new MockDeviceQueueHw(&context, device.get(), deviceQueueProperties::minimumProperties[0])); size_t expectedSize = sizeof(PIPE_CONTROL) + 2 * sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_LOAD_REGISTER_IMM) + sizeof(PIPE_CONTROL) + sizeof(MI_MATH) + NUM_ALU_INST_FOR_READ_MODIFY_WRITE * sizeof(MI_MATH_ALU_INST_INLINE); expectedSize += MockDeviceQueueHw::getProfilingEndCmdsSize(); expectedSize += MockDeviceQueueHw::getMediaStateClearCmdsSize(); expectedSize += 4 * sizeof(PIPE_CONTROL); expectedSize += sizeof(MI_BATCH_BUFFER_END); EXPECT_EQ(expectedSize, MockDeviceQueueHw::getExecutionModelCleanupSectionSize()); } HWCMDTEST_F(IGFX_GEN8_CORE, TheSimplestDeviceQueueFixture, WhenSettingProfilingEndThenCmdsSizeIsCorrect) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockContext context; std::unique_ptr> mockDeviceQueueHw(new MockDeviceQueueHw(&context, device.get(), deviceQueueProperties::minimumProperties[0])); size_t expectedSize = sizeof(PIPE_CONTROL) + sizeof(MI_STORE_REGISTER_MEM) + sizeof(MI_LOAD_REGISTER_IMM); EXPECT_EQ(expectedSize, MockDeviceQueueHw::getProfilingEndCmdsSize()); } compute-runtime-20.13.16352/opencl/test/unit_test/device_queue/device_queue_tests.cpp000066400000000000000000000304321363734646600306700ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device_info.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" #include "opencl/test/unit_test/gen_common/matchers.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" using namespace NEO; using namespace DeviceHostQueue; using DeviceQueueSimpleTest = ::testing::Test; HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSimpleTest, setupExecutionModelDispatchDoesNothing) { DeviceQueue devQueue; char buffer[20]; memset(buffer, 1, 20); size_t size = 20; IndirectHeap ssh(buffer, size); IndirectHeap dsh(buffer, size); devQueue.setupExecutionModelDispatch(ssh, dsh, nullptr, 0, 0, 0x123, 0, false); EXPECT_EQ(0u, ssh.getUsed()); for (uint32_t i = 0; i < 20; i++) { EXPECT_EQ(1, buffer[i]); } } HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSimpleTest, nonUsedBaseMethods) { DeviceQueue devQueue; devQueue.resetDeviceQueue(); EXPECT_EQ(nullptr, devQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE)); } class DeviceQueueTest : public DeviceHostQueueFixture { public: using BaseClass = DeviceHostQueueFixture; void SetUp() override { BaseClass::SetUp(); device = pContext->getDevice(0); if (!device->getHardwareInfo().capabilityTable.supportsDeviceEnqueue) { GTEST_SKIP(); } ASSERT_NE(device, nullptr); } void TearDown() override { BaseClass::TearDown(); } void checkQueueBuffer(cl_uint expedtedSize) { auto alignedExpectedSize = alignUp(expedtedSize, MemoryConstants::pageSize); EXPECT_EQ(deviceQueue->getQueueSize(), expedtedSize); ASSERT_NE(deviceQueue->getQueueBuffer(), nullptr); EXPECT_EQ(deviceQueue->getQueueBuffer()->getUnderlyingBufferSize(), alignedExpectedSize); } DeviceQueue *deviceQueue; ClDevice *device; }; HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueTest, createDeviceQueueWhenNoDeviceQueueIsSupported) { auto maxOnDeviceQueues = device->getSharedDeviceInfo().maxOnDeviceQueues; const_cast(&device->getSharedDeviceInfo())->maxOnDeviceQueues = 0; auto deviceQueue = createQueueObject(); EXPECT_EQ(deviceQueue, nullptr); const_cast(&device->getSharedDeviceInfo())->maxOnDeviceQueues = maxOnDeviceQueues; } HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueTest, createDeviceQueuesWhenSingleDeviceQueueIsSupported) { auto maxOnDeviceQueues = device->getSharedDeviceInfo().maxOnDeviceQueues; const_cast(&device->getSharedDeviceInfo())->maxOnDeviceQueues = 1; auto deviceQueue1 = createQueueObject(); ASSERT_NE(deviceQueue1, nullptr); EXPECT_EQ(deviceQueue1->getReference(), 1); auto deviceQueue2 = createQueueObject(); EXPECT_EQ(deviceQueue2, nullptr); delete deviceQueue1; const_cast(&device->getSharedDeviceInfo())->maxOnDeviceQueues = maxOnDeviceQueues; } HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueTest, createDeviceQueuesWhenMultipleDeviceQueuesAreSupported) { auto maxOnDeviceQueues = device->getSharedDeviceInfo().maxOnDeviceQueues; const_cast(&device->getSharedDeviceInfo())->maxOnDeviceQueues = 2; auto deviceQueue1 = createQueueObject(); ASSERT_NE(deviceQueue1, nullptr); EXPECT_EQ(deviceQueue1->getReference(), 1); auto deviceQueue2 = createQueueObject(); ASSERT_NE(deviceQueue2, nullptr); EXPECT_EQ(deviceQueue2->getReference(), 1); EXPECT_NE(deviceQueue2, deviceQueue1); delete deviceQueue1; delete deviceQueue2; const_cast(&device->getSharedDeviceInfo())->maxOnDeviceQueues = maxOnDeviceQueues; } HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueTest, GivenDeviceQueueWhenEventPoolIsCreatedThenTimestampResolutionIsSet) { auto timestampResolution = static_cast(device->getProfilingTimerResolution()); auto deviceQueue = std::unique_ptr(createQueueObject()); ASSERT_NE(deviceQueue, nullptr); auto eventPoolBuffer = reinterpret_cast(deviceQueue->getEventPoolBuffer()->getUnderlyingBuffer()); EXPECT_FLOAT_EQ(timestampResolution, eventPoolBuffer->m_TimestampResolution); } typedef DeviceQueueTest DeviceQueueBuffer; HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueBuffer, setPreferredSizeWhenNoPropertyGiven) { auto &deviceInfo = device->getDeviceInfo(); deviceQueue = createQueueObject(); // only minimal properties ASSERT_NE(deviceQueue, nullptr); checkQueueBuffer(deviceInfo.queueOnDevicePreferredSize); deviceQueue->release(); } HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueBuffer, setPreferredSizeWhenInvalidPropertyGiven) { cl_queue_properties properties[5] = {CL_QUEUE_PROPERTIES, deviceQueueProperties::minimumProperties[1], CL_QUEUE_SIZE, 0, 0}; auto &deviceInfo = device->getDeviceInfo(); deviceQueue = createQueueObject(properties); // zero size ASSERT_NE(deviceQueue, nullptr); checkQueueBuffer(deviceInfo.queueOnDevicePreferredSize); delete deviceQueue; properties[3] = static_cast(deviceInfo.queueOnDeviceMaxSize + 1); deviceQueue = createQueueObject(properties); // greater than max EXPECT_EQ(deviceQueue, nullptr); delete deviceQueue; } HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueBuffer, setValidQueueSize) { auto &deviceInfo = device->getDeviceInfo(); cl_uint validSize = deviceInfo.queueOnDevicePreferredSize - 1; cl_queue_properties properties[5] = {CL_QUEUE_PROPERTIES, deviceQueueProperties::minimumProperties[1], CL_QUEUE_SIZE, static_cast(validSize), 0}; EXPECT_NE(validSize, alignUp(validSize, MemoryConstants::pageSize)); // create aligned deviceQueue = createQueueObject(properties); ASSERT_NE(deviceQueue, nullptr); checkQueueBuffer(validSize); delete deviceQueue; } HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueBuffer, initValues) { auto &deviceInfo = device->getDeviceInfo(); deviceQueue = createQueueObject(); ASSERT_NE(deviceQueue, nullptr); IGIL_CommandQueue expectedIgilCmdQueue = getExpectedInitIgilCmdQueue(deviceQueue); EXPECT_EQ(static_cast(deviceQueue->isProfilingEnabled()), expectedIgilCmdQueue.m_controls.m_IsProfilingEnabled); IGIL_EventPool expectedIgilEventPool = {0, 0, 0}; expectedIgilEventPool.m_head = 0; expectedIgilEventPool.m_size = deviceInfo.maxOnDeviceEvents; expectedIgilEventPool.m_TimestampResolution = static_cast(device->getProfilingTimerResolution()); // initialized header EXPECT_EQ(0, memcmp(deviceQueue->getQueueBuffer()->getUnderlyingBuffer(), &expectedIgilCmdQueue, sizeof(IGIL_CommandQueue))); EXPECT_EQ(0, memcmp(deviceQueue->getEventPoolBuffer()->getUnderlyingBuffer(), &expectedIgilEventPool, sizeof(IGIL_EventPool))); delete deviceQueue; } typedef DeviceQueueTest DeviceQueueStackBuffer; HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueStackBuffer, allocateResourcesZeroesStackBufferAndQueueStorage) { deviceQueue = createQueueObject(); ASSERT_NE(deviceQueue, nullptr); EXPECT_THAT(deviceQueue->getQueueStorageBuffer()->getUnderlyingBuffer(), MemoryZeroed(deviceQueue->getQueueStorageBuffer()->getUnderlyingBufferSize())); EXPECT_THAT(deviceQueue->getStackBuffer()->getUnderlyingBuffer(), MemoryZeroed(deviceQueue->getStackBuffer()->getUnderlyingBufferSize())); delete deviceQueue; } HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueStackBuffer, initAllocation) { deviceQueue = createQueueObject(); ASSERT_NE(deviceQueue, nullptr); auto maxEnqueue = deviceQueue->getQueueSize() / sizeof(IGIL_CommandHeader); //stack can hold at most 3 full loads of commands auto expectedStackSize = maxEnqueue * sizeof(uint32_t) * 3; expectedStackSize = alignUp(expectedStackSize, MemoryConstants::pageSize); ASSERT_NE(deviceQueue->getStackBuffer(), nullptr); EXPECT_EQ(deviceQueue->getStackBuffer()->getUnderlyingBufferSize(), expectedStackSize); delete deviceQueue; } typedef DeviceQueueTest DeviceQueueStorageBuffer; HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueStorageBuffer, initAllocation) { deviceQueue = createQueueObject(); ASSERT_NE(deviceQueue, nullptr); auto expectedStorageSize = deviceQueue->getQueueBuffer()->getUnderlyingBufferSize() * 2; expectedStorageSize = alignUp(expectedStorageSize, MemoryConstants::pageSize); ASSERT_NE(deviceQueue->getQueueStorageBuffer(), nullptr); EXPECT_EQ(deviceQueue->getQueueStorageBuffer()->getUnderlyingBufferSize(), expectedStorageSize); delete deviceQueue; } typedef DeviceQueueTest DefaultDeviceQueue; HWCMDTEST_F(IGFX_GEN8_CORE, DefaultDeviceQueue, createOnlyOneDefaultDeviceQueueWhenSingleDeviceQueueIsSupported) { cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE_DEFAULT, 0, 0, 0}; auto maxOnDeviceQueues = device->getSharedDeviceInfo().maxOnDeviceQueues; const_cast(&device->getSharedDeviceInfo())->maxOnDeviceQueues = 1; auto deviceQueue1 = createQueueObject(properties); ASSERT_NE(deviceQueue1, nullptr); EXPECT_EQ(pContext->getDefaultDeviceQueue(), deviceQueue1); EXPECT_EQ(deviceQueue1->getReference(), 1); auto deviceQueue2 = createQueueObject(properties); ASSERT_NE(deviceQueue2, nullptr); EXPECT_EQ(deviceQueue2, deviceQueue1); EXPECT_EQ(pContext->getDefaultDeviceQueue(), deviceQueue1); EXPECT_EQ(deviceQueue1->getReference(), 2); deviceQueue1->release(); deviceQueue2->release(); const_cast(&device->getSharedDeviceInfo())->maxOnDeviceQueues = maxOnDeviceQueues; } HWCMDTEST_F(IGFX_GEN8_CORE, DefaultDeviceQueue, createOnlyOneDefaultDeviceQueueWhenMultipleDeviceQueuesAreSupported) { cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE_DEFAULT, 0, 0, 0}; auto maxOnDeviceQueues = device->getSharedDeviceInfo().maxOnDeviceQueues; const_cast(&device->getSharedDeviceInfo())->maxOnDeviceQueues = 2; auto deviceQueue1 = createQueueObject(properties); ASSERT_NE(deviceQueue1, nullptr); EXPECT_EQ(pContext->getDefaultDeviceQueue(), deviceQueue1); EXPECT_EQ(deviceQueue1->getReference(), 1); auto deviceQueue2 = createQueueObject(properties); ASSERT_NE(deviceQueue2, nullptr); EXPECT_EQ(deviceQueue2, deviceQueue1); EXPECT_EQ(pContext->getDefaultDeviceQueue(), deviceQueue1); EXPECT_EQ(deviceQueue1->getReference(), 2); deviceQueue1->release(); deviceQueue2->release(); const_cast(&device->getSharedDeviceInfo())->maxOnDeviceQueues = maxOnDeviceQueues; } typedef DeviceQueueTest DeviceQueueEventPool; HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueEventPool, poolBufferSize) { auto &deviceInfo = device->getDeviceInfo(); // number of events + event pool representation auto expectedSize = static_cast(deviceInfo.maxOnDeviceEvents * sizeof(IGIL_DeviceEvent) + sizeof(IGIL_EventPool)); expectedSize = alignUp(expectedSize, MemoryConstants::pageSize); auto deviceQueue = createQueueObject(); ASSERT_NE(deviceQueue, nullptr); ASSERT_NE(deviceQueue->getEventPoolBuffer(), nullptr); EXPECT_EQ(deviceQueue->getEventPoolBuffer()->getUnderlyingBufferSize(), expectedSize); delete deviceQueue; } HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueTest, sizeOfDshBuffer) { deviceQueue = createQueueObject(); ASSERT_NE(deviceQueue, nullptr); ASSERT_NE(deviceQueue->getDshBuffer(), nullptr); auto dshBufferSize = deviceQueue->getDshBuffer()->getUnderlyingBufferSize(); EXPECT_LE(761856u, dshBufferSize); delete deviceQueue; } HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueTest, dispatchScheduler) { DeviceQueue devQueue; MockContext context; MockProgram program(*device->getExecutionEnvironment()); MockCommandQueue cmdQ(nullptr, nullptr, 0); KernelInfo info; MockSchedulerKernel *kernel = new MockSchedulerKernel(&program, info, *device); LinearStream cmdStream; devQueue.dispatchScheduler(cmdStream, *kernel, device->getPreemptionMode(), nullptr, nullptr, false); delete kernel; } compute-runtime-20.13.16352/opencl/test/unit_test/device_queue/get_device_queue_info_tests.cpp000066400000000000000000000063401363734646600325430ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" using namespace NEO; using namespace DeviceHostQueue; class GetDeviceQueueInfoTest : public DeviceHostQueueFixture { public: using BaseClass = DeviceHostQueueFixture; void SetUp() override { BaseClass::SetUp(); if (!this->pContext->getDevice(0u)->getHardwareInfo().capabilityTable.supportsDeviceEnqueue) { GTEST_SKIP(); } deviceQueue = createQueueObject(deviceQueueProperties::allProperties); ASSERT_NE(deviceQueue, nullptr); } void TearDown() override { if (deviceQueue) delete deviceQueue; BaseClass::TearDown(); } DeviceQueue *deviceQueue = nullptr; }; HWCMDTEST_F(IGFX_GEN8_CORE, GetDeviceQueueInfoTest, context) { cl_context contextReturned = nullptr; retVal = deviceQueue->getCommandQueueInfo( CL_QUEUE_CONTEXT, sizeof(contextReturned), &contextReturned, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ((cl_context)pContext, contextReturned); } HWCMDTEST_F(IGFX_GEN8_CORE, GetDeviceQueueInfoTest, device) { cl_device_id deviceExpected = testedClDevice; cl_device_id deviceIdReturned = nullptr; retVal = deviceQueue->getCommandQueueInfo( CL_QUEUE_DEVICE, sizeof(deviceIdReturned), &deviceIdReturned, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(deviceExpected, deviceIdReturned); } HWCMDTEST_F(IGFX_GEN8_CORE, GetDeviceQueueInfoTest, queueProperties) { cl_command_queue_properties propertiesReturned = 0; retVal = deviceQueue->getCommandQueueInfo( CL_QUEUE_PROPERTIES, sizeof(propertiesReturned), &propertiesReturned, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(deviceQueueProperties::allProperties[1], propertiesReturned); } HWCMDTEST_F(IGFX_GEN8_CORE, GetDeviceQueueInfoTest, queueSize) { cl_uint queueSizeReturned = 0; retVal = deviceQueue->getCommandQueueInfo( CL_QUEUE_SIZE, sizeof(queueSizeReturned), &queueSizeReturned, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(deviceQueue->getQueueSize(), queueSizeReturned); } // OCL 2.1 HWCMDTEST_F(IGFX_GEN8_CORE, GetDeviceQueueInfoTest, queueDeviceDefault) { cl_command_queue commandQueueReturned = nullptr; retVal = deviceQueue->getCommandQueueInfo( CL_QUEUE_DEVICE_DEFAULT, sizeof(commandQueueReturned), &commandQueueReturned, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // 1 device queue is supported which is default EXPECT_EQ(deviceQueue, commandQueueReturned); } HWCMDTEST_F(IGFX_GEN8_CORE, GetDeviceQueueInfoTest, profiling) { EXPECT_TRUE(deviceQueue->isProfilingEnabled()); } HWCMDTEST_F(IGFX_GEN8_CORE, GetDeviceQueueInfoTest, invalidParameter) { uint32_t tempValue = 0; retVal = deviceQueue->getCommandQueueInfo( static_cast(0), sizeof(tempValue), &tempValue, nullptr); EXPECT_EQ(tempValue, 0u); EXPECT_EQ(CL_INVALID_VALUE, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/event/000077500000000000000000000000001363734646600227535ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/event/CMakeLists.txt000066400000000000000000000012661363734646600255200ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_event ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/async_events_handler_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/event_builder_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/event_callbacks_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/event_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/event_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/event_tests_mt.cpp ${CMAKE_CURRENT_SOURCE_DIR}/event_tracker_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/user_events_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/user_events_tests_mt.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_event}) compute-runtime-20.13.16352/opencl/test/unit_test/event/async_events_handler_tests.cpp000066400000000000000000000317031363734646600311030ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/timestamp_packet.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/event/async_events_handler.h" #include "opencl/source/event/event.h" #include "opencl/source/event/user_event.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_async_event_handler.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "test.h" #include "gmock/gmock.h" using namespace NEO; using namespace ::testing; class AsyncEventsHandlerTests : public ::testing::Test { public: class MyEvent : public Event { public: MyEvent(CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount) : Event(cmdQueue, cmdType, taskLevel, taskCount) {} int getExecutionStatus() { //return execution status without updating return executionStatus.load(); } void setTaskStamp(uint32_t taskLevel, uint32_t taskCount) { this->taskLevel.store(taskLevel); this->updateTaskCount(taskCount); } MOCK_METHOD2(wait, bool(bool blocking, bool quickKmdSleep)); }; static void CL_CALLBACK callbackFcn(cl_event e, cl_int status, void *data) { ++(*(int *)data); } void SetUp() override { dbgRestore.reset(new DebugManagerStateRestore()); DebugManager.flags.EnableAsyncEventsHandler.set(false); handler.reset(new MockHandler()); event1 = new NiceMock(nullptr, CL_COMMAND_BARRIER, CompletionStamp::levelNotReady, CompletionStamp::levelNotReady); event2 = new NiceMock(nullptr, CL_COMMAND_BARRIER, CompletionStamp::levelNotReady, CompletionStamp::levelNotReady); event3 = new NiceMock(nullptr, CL_COMMAND_BARRIER, CompletionStamp::levelNotReady, CompletionStamp::levelNotReady); } void TearDown() override { event1->release(); event2->release(); event3->release(); } std::unique_ptr dbgRestore; std::unique_ptr handler; int counter = 0; NiceMock *event1 = nullptr; NiceMock *event2 = nullptr; NiceMock *event3 = nullptr; }; TEST_F(AsyncEventsHandlerTests, givenEventsWhenListIsProcessedThenUpdateExecutionStatus) { event1->setTaskStamp(0, 0); event2->setTaskStamp(0, 0); handler->registerEvent(event1); handler->registerEvent(event2); EXPECT_EQ(CL_QUEUED, event1->getExecutionStatus()); EXPECT_EQ(CL_QUEUED, event2->getExecutionStatus()); handler->process(); EXPECT_NE(CL_QUEUED, event1->getExecutionStatus()); EXPECT_NE(CL_QUEUED, event2->getExecutionStatus()); EXPECT_TRUE(handler->peekIsListEmpty()); // auto-unregister when no callbacs } TEST_F(AsyncEventsHandlerTests, WhenProcessIsCompletedThenRefInternalCountIsDecremented) { event1->setTaskStamp(CompletionStamp::levelNotReady, 0); handler->registerEvent(event1); EXPECT_EQ(2, event1->getRefInternalCount()); handler->process(); EXPECT_TRUE(handler->peekIsListEmpty()); EXPECT_EQ(1, event1->getRefInternalCount()); } TEST_F(AsyncEventsHandlerTests, givenNotCalledCallbacksWhenListIsProcessedThenDontUnregister) { int submittedCounter(0), completeCounter(0); event1->setTaskStamp(CompletionStamp::levelNotReady, 0); event1->addCallback(&this->callbackFcn, CL_SUBMITTED, &submittedCounter); event1->addCallback(&this->callbackFcn, CL_COMPLETE, &completeCounter); handler->registerEvent(event1); auto expect = [&](int status, int sCounter, int cCounter, bool empty) { EXPECT_EQ(status, event1->getExecutionStatus()); EXPECT_EQ(sCounter, submittedCounter); EXPECT_EQ(cCounter, completeCounter); EXPECT_EQ(empty, handler->peekIsListEmpty()); }; handler->process(); expect(CL_QUEUED, 0, 0, false); event1->setStatus(CL_SUBMITTED); handler->process(); expect(CL_SUBMITTED, 1, 0, false); event1->setStatus(CL_COMPLETE); handler->process(); expect(CL_COMPLETE, 1, 1, true); } TEST_F(AsyncEventsHandlerTests, givenExternallSynchronizedEventWhenListIsProcessedAndEventIsNotInCompleteStateThenDontUnregister) { struct ExternallySynchronizedEvent : Event { ExternallySynchronizedEvent(int numUpdatesBeforeCompletion) : Event(nullptr, 0, 0, 0), numUpdatesBeforeCompletion(numUpdatesBeforeCompletion) { } void updateExecutionStatus() override { ++updateCount; if (updateCount == numUpdatesBeforeCompletion) { transitionExecutionStatus(CL_COMPLETE); } } bool isExternallySynchronized() const override { return true; } int updateCount = 0; int numUpdatesBeforeCompletion = 1; }; constexpr int numUpdatesBeforeCompletion = 5; auto *event = new ExternallySynchronizedEvent(numUpdatesBeforeCompletion); handler->registerEvent(event); for (int i = 0; i < numUpdatesBeforeCompletion * 2; ++i) { handler->process(); } EXPECT_EQ(CL_COMPLETE, event->peekExecutionStatus()); EXPECT_EQ(numUpdatesBeforeCompletion, event->updateCount); event->release(); } TEST_F(AsyncEventsHandlerTests, givenDoubleRegisteredEventWhenListIsProcessedAndNoCallbacksToProcessThenUnregister) { event1->setTaskStamp(CompletionStamp::levelNotReady - 1, 0); event1->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter); handler->registerEvent(event1); handler->registerEvent(event1); handler->process(); EXPECT_EQ(CL_SUBMITTED, event1->getExecutionStatus()); EXPECT_EQ(1, counter); EXPECT_TRUE(handler->peekIsListEmpty()); } TEST_F(AsyncEventsHandlerTests, givenEventsNotHandledByHandlderWhenDestructingThenUnreferenceAll) { auto myHandler = new MockHandler(); event1->setTaskStamp(CompletionStamp::levelNotReady, 0); event2->setTaskStamp(CompletionStamp::levelNotReady, 0); event1->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter); event2->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter); myHandler->registerEvent(event1); myHandler->process(); myHandler->registerEvent(event2); EXPECT_FALSE(myHandler->peekIsListEmpty()); EXPECT_FALSE(myHandler->peekIsRegisterListEmpty()); EXPECT_EQ(3, event1->getRefInternalCount()); EXPECT_EQ(3, event2->getRefInternalCount()); delete myHandler; // 1 left because of callbacks EXPECT_EQ(2, event1->getRefInternalCount()); EXPECT_EQ(2, event2->getRefInternalCount()); // release callbacks event1->setStatus(CL_SUBMITTED); event2->setStatus(CL_SUBMITTED); } TEST_F(AsyncEventsHandlerTests, givenEventsNotHandledByHandlderWhenAsyncExecutionInterruptedThenUnreferenceAll) { event1->setTaskStamp(CompletionStamp::levelNotReady, 0); event2->setTaskStamp(CompletionStamp::levelNotReady, 0); event1->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter); event2->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter); handler->registerEvent(event1); handler->process(); handler->registerEvent(event2); EXPECT_FALSE(handler->peekIsListEmpty()); EXPECT_FALSE(handler->peekIsRegisterListEmpty()); EXPECT_EQ(3, event1->getRefInternalCount()); EXPECT_EQ(3, event2->getRefInternalCount()); handler->allowAsyncProcess.store(false); MockHandler::asyncProcess(handler.get()); // enter and exit because of allowAsyncProcess == false EXPECT_EQ(2, event1->getRefInternalCount()); EXPECT_EQ(2, event2->getRefInternalCount()); EXPECT_TRUE(handler->peekIsListEmpty()); EXPECT_TRUE(handler->peekIsRegisterListEmpty()); event1->setStatus(CL_SUBMITTED); event2->setStatus(CL_SUBMITTED); } TEST_F(AsyncEventsHandlerTests, WhenHandlerIsCreatedThenThreadIsNotCreatedByDefault) { MockHandler myHandler; EXPECT_EQ(nullptr, myHandler.thread.get()); } TEST_F(AsyncEventsHandlerTests, WhenHandlerIsRegisteredThenThreadIsCreated) { event1->setTaskStamp(CompletionStamp::levelNotReady, 0); EXPECT_FALSE(handler->openThreadCalled); handler->registerEvent(event1); EXPECT_TRUE(handler->openThreadCalled); } TEST_F(AsyncEventsHandlerTests, WhenProcessingAsynchronouslyThenBothThreadsCompelete) { DebugManager.flags.EnableAsyncEventsHandler.set(true); event1->setTaskStamp(CompletionStamp::levelNotReady, 0); event2->setTaskStamp(CompletionStamp::levelNotReady, 0); event1->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter); event2->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter); EXPECT_EQ(CL_QUEUED, event1->getExecutionStatus()); EXPECT_EQ(CL_QUEUED, event2->getExecutionStatus()); // unblock to submit event1->taskLevel.store(0); event2->taskLevel.store(0); while (event1->getExecutionStatus() == CL_QUEUED || event2->getExecutionStatus() == CL_QUEUED) { std::this_thread::yield(); } EXPECT_EQ(CL_SUBMITTED, event1->getExecutionStatus()); EXPECT_EQ(CL_SUBMITTED, event2->getExecutionStatus()); platform()->getAsyncEventsHandler()->closeThread(); } TEST_F(AsyncEventsHandlerTests, WhenThreadIsDestructedThenGetThreadReturnsNull) { handler->allowThreadCreating = true; handler->openThread(); // wait for sleep while (handler->transferCounter == 0) { std::this_thread::yield(); } std::unique_lock lock(handler->asyncMtx); lock.unlock(); handler->closeThread(); EXPECT_EQ(nullptr, handler->thread.get()); } TEST_F(AsyncEventsHandlerTests, givenReadyEventWhenCallbackIsAddedThenDontOpenThread) { DebugManager.flags.EnableAsyncEventsHandler.set(true); auto myHandler = new MockHandler(true); auto oldHandler = platform()->setAsyncEventsHandler(std::unique_ptr(myHandler)); event1->setTaskStamp(0, 0); event1->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter); EXPECT_EQ(platform()->getAsyncEventsHandler(), myHandler); EXPECT_FALSE(event1->peekHasCallbacks()); EXPECT_FALSE(myHandler->openThreadCalled); platform()->setAsyncEventsHandler(std::move(oldHandler)); } TEST_F(AsyncEventsHandlerTests, givenUserEventWhenCallbackIsAddedThenDontRegister) { DebugManager.flags.EnableAsyncEventsHandler.set(true); auto myHandler = new MockHandler(true); auto oldHandler = platform()->setAsyncEventsHandler(std::unique_ptr(myHandler)); UserEvent userEvent; userEvent.addCallback(&this->callbackFcn, CL_COMPLETE, &counter); EXPECT_TRUE(handler->peekIsListEmpty()); EXPECT_TRUE(handler->peekIsRegisterListEmpty()); EXPECT_TRUE(userEvent.peekHasCallbacks()); userEvent.decRefInternal(); platform()->setAsyncEventsHandler(std::move(oldHandler)); } TEST_F(AsyncEventsHandlerTests, givenRegistredEventsWhenProcessIsCalledThenReturnCandidateWithLowestTaskCount) { int event1Counter(0), event2Counter(0), event3Counter(0); event1->setTaskStamp(0, 1); event2->setTaskStamp(0, 2); event3->setTaskStamp(0, 3); event2->addCallback(&this->callbackFcn, CL_COMPLETE, &event2Counter); handler->registerEvent(event2); event1->addCallback(&this->callbackFcn, CL_COMPLETE, &event1Counter); handler->registerEvent(event1); event3->addCallback(&this->callbackFcn, CL_COMPLETE, &event3Counter); handler->registerEvent(event3); auto sleepCandidate = handler->process(); EXPECT_EQ(event1, sleepCandidate); event1->setStatus(CL_COMPLETE); event2->setStatus(CL_COMPLETE); event3->setStatus(CL_COMPLETE); } TEST_F(AsyncEventsHandlerTests, givenEventWithoutCallbacksWhenProcessedThenDontReturnAsSleepCandidate) { event1->setTaskStamp(0, 1); event2->setTaskStamp(0, 2); handler->registerEvent(event1); event2->addCallback(&this->callbackFcn, CL_COMPLETE, &counter); handler->registerEvent(event2); auto sleepCandidate = handler->process(); EXPECT_EQ(event2, sleepCandidate); event2->setStatus(CL_COMPLETE); } TEST_F(AsyncEventsHandlerTests, givenSleepCandidateWhenProcessedThenCallWaitWithQuickKmdSleepRequest) { event1->setTaskStamp(0, 1); event1->addCallback(&this->callbackFcn, CL_COMPLETE, &counter); handler->registerEvent(event1); handler->allowAsyncProcess.store(true); // break infinite loop after first iteartion auto unsetAsyncFlag = [&](bool blocking, bool quickKmdSleep) { handler->allowAsyncProcess.store(false); return true; }; EXPECT_CALL(*event1, wait(true, true)).Times(1).WillOnce(Invoke(unsetAsyncFlag)); MockHandler::asyncProcess(handler.get()); event1->setStatus(CL_COMPLETE); } TEST_F(AsyncEventsHandlerTests, WhenReturningThenAsyncProcessWillCallProcessList) { Event *event = new Event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0); handler->registerEvent(event); handler->allowAsyncProcess.store(false); MockHandler::asyncProcess(handler.get()); EXPECT_TRUE(handler->peekIsListEmpty()); EXPECT_EQ(1, event->getRefInternalCount()); event->release(); } compute-runtime-20.13.16352/opencl/test/unit_test/event/event_builder_tests.cpp000066400000000000000000000361071363734646600275370ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/utilities/arrayref.h" #include "opencl/source/event/event_builder.h" #include "opencl/source/event/user_event.h" #include "opencl/source/helpers/task_information.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "gtest/gtest.h" namespace NEO { struct SmallEventBuilderEventMock : MockEvent { SmallEventBuilderEventMock(int param1, float param2) : MockEvent(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0), constructionParam1(param1), constructionParam2(param2) { } SmallEventBuilderEventMock() : SmallEventBuilderEventMock(1, 2.0f) { } void overrideMagic(cl_long newMagic) { this->magic = newMagic; } int constructionParam1 = 1; float constructionParam2 = 2.0f; }; struct SmallEventBuilderMock : EventBuilder { void clear() { EventBuilder::clear(); } void clearEvent() { event = nullptr; } ArrayRef getParentEvents() { return this->parentEvents; } }; TEST(EventBuilder, whenCreatingNewEventForwardsArgumentsToEventConstructor) { EventBuilder eventBuilder; EXPECT_EQ(nullptr, eventBuilder.getEvent()); constexpr int constrParam1 = 7; constexpr float constrParam2 = 13.0f; eventBuilder.create(constrParam1, constrParam2); Event *peekedEvent = eventBuilder.getEvent(); ASSERT_NE(nullptr, peekedEvent); auto finalizedEvent = static_cast(eventBuilder.finalizeAndRelease()); EXPECT_EQ(peekedEvent, peekedEvent); EXPECT_EQ(constrParam1, finalizedEvent->constructionParam1); EXPECT_EQ(constrParam2, finalizedEvent->constructionParam2); finalizedEvent->release(); } TEST(EventBuilder, givenVirtualEventWithCommandThenFinalizeAddChild) { class MockCommandComputeKernel : public CommandComputeKernel { public: using CommandComputeKernel::eventsWaitlist; MockCommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr &kernelOperation, std::vector &surfaces, Kernel *kernel) : CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0) {} }; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockCommandQueue cmdQ(nullptr, device.get(), nullptr); MockKernelWithInternals kernel(*device); IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr; cmdQ.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 1, ih1); cmdQ.allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 1, ih2); cmdQ.allocateHeapMemory(IndirectHeap::SURFACE_STATE, 1, ih3); auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 1, GraphicsAllocation::AllocationType::COMMAND_BUFFER})); std::vector surfaces; auto kernelOperation = std::make_unique(cmdStream, *device->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage()); kernelOperation->setHeaps(ih1, ih2, ih3); std::unique_ptr command = std::make_unique(cmdQ, kernelOperation, surfaces, kernel); VirtualEvent virtualEvent; virtualEvent.setCommand(std::move(command)); EventBuilder eventBuilder; EXPECT_EQ(nullptr, eventBuilder.getEvent()); constexpr int constrParam1 = 7; constexpr float constrParam2 = 13.0f; eventBuilder.create(constrParam1, constrParam2); Event *peekedEvent = eventBuilder.getEvent(); ASSERT_NE(nullptr, peekedEvent); virtualEvent.taskLevel = CL_SUBMITTED; eventBuilder.addParentEvent(&virtualEvent); eventBuilder.finalize(); peekedEvent->release(); } TEST(EventBuilder, givenVirtualEventWithSubmittedCommandAsParentThenFinalizeNotAddChild) { class MockVirtualEvent : public VirtualEvent { public: using VirtualEvent::eventWithoutCommand; using VirtualEvent::submittedCmd; }; class MockCommandComputeKernel : public CommandComputeKernel { public: using CommandComputeKernel::eventsWaitlist; MockCommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr &kernelOperation, std::vector &surfaces, Kernel *kernel) : CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0) {} }; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockCommandQueue cmdQ(nullptr, device.get(), nullptr); MockKernelWithInternals kernel(*device); IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr; cmdQ.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 1, ih1); cmdQ.allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 1, ih2); cmdQ.allocateHeapMemory(IndirectHeap::SURFACE_STATE, 1, ih3); auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER})); std::vector surfaces; auto kernelOperation = std::make_unique(cmdStream, *device->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage()); kernelOperation->setHeaps(ih1, ih2, ih3); std::unique_ptr command = std::make_unique(cmdQ, kernelOperation, surfaces, kernel); MockVirtualEvent virtualEvent; virtualEvent.eventWithoutCommand = false; virtualEvent.submittedCmd.exchange(command.release()); EventBuilder eventBuilder; EXPECT_EQ(nullptr, eventBuilder.getEvent()); constexpr int constrParam1 = 7; constexpr float constrParam2 = 13.0f; eventBuilder.create(constrParam1, constrParam2); Event *peekedEvent = eventBuilder.getEvent(); ASSERT_NE(nullptr, peekedEvent); virtualEvent.taskLevel = CL_SUBMITTED; eventBuilder.addParentEvent(&virtualEvent); eventBuilder.finalize(); peekedEvent->release(); } TEST(EventBuilder, whenDestroyingEventBuilderImplicitFinalizeIscalled) { SmallEventBuilderEventMock *ev = nullptr; auto parentEvent = new UserEvent; { EventBuilder eventBuilder{}; eventBuilder.create(); eventBuilder.addParentEvent(*parentEvent); ev = static_cast(eventBuilder.getEvent()); ASSERT_NE(nullptr, ev); EXPECT_EQ(0U, ev->peekNumEventsBlockingThis()); } // make sure that finalize was called on EventBuilder's d-tor and parent was added properly EXPECT_EQ(1U, ev->peekNumEventsBlockingThis()); ev->release(); parentEvent->release(); } TEST(EventBuilder, whenFinalizeIsCalledTwiceOnEventBuilderThenSecondRequestIsDropped) { SmallEventBuilderEventMock *ev = nullptr; EventBuilder eventBuilder{}; eventBuilder.create(); ev = static_cast(eventBuilder.getEvent()); ASSERT_NE(nullptr, ev); eventBuilder.finalize(); auto *falseParentEvent = new UserEvent(); auto *falseChildEvent = new SmallEventBuilderEventMock; auto numParents = ev->peekNumEventsBlockingThis(); auto numChildren = (ev->peekChildEvents() != nullptr) ? 1U + ev->peekChildEvents()->countSuccessors() : 0; eventBuilder.addParentEvent(*falseParentEvent); eventBuilder.finalize(); // make sure that new parent was not added in second finalize EXPECT_EQ(numParents, ev->peekNumEventsBlockingThis()); EXPECT_EQ(numChildren, (ev->peekChildEvents() != nullptr) ? 1U + ev->peekChildEvents()->countSuccessors() : 0); falseParentEvent->release(); falseChildEvent->release(); ev->release(); } TEST(EventBuilder, whenFinalizeAndReleaseIsCalledThenEventBuilderReleasesReferenceToEvent) { EventBuilder eventBuilder; eventBuilder.create(); auto ev = static_cast(eventBuilder.finalizeAndRelease()); ASSERT_NE(nullptr, ev); ASSERT_EQ(nullptr, eventBuilder.getEvent()); ASSERT_EQ(nullptr, eventBuilder.finalizeAndRelease()); ev->release(); } TEST(EventBuilder, whenClearIsCalledThenAllEventsAndReferencesAreDropped) { auto parentEvent = new UserEvent(); SmallEventBuilderMock eventBuilder; eventBuilder.addParentEvent(*parentEvent); eventBuilder.clear(); EXPECT_EQ(0U, eventBuilder.getParentEvents().size()); EXPECT_EQ(nullptr, eventBuilder.getEvent()); parentEvent->release(); } TEST(EventBuilder, whenCParentEventsGetAddedThenTheirReferenceCountGetsIncreasedUntilFinalizeIsCalled) { UserEvent evParent1; UserEvent evParent2; EXPECT_EQ(1, evParent1.getRefInternalCount()); EXPECT_EQ(1, evParent2.getRefInternalCount()); EventBuilder eventBuilder; eventBuilder.create(); eventBuilder.addParentEvent(evParent1); EXPECT_EQ(2, evParent1.getRefInternalCount()); eventBuilder.addParentEvent(evParent2); EXPECT_EQ(2, evParent2.getRefInternalCount()); auto createdEvent = static_cast(eventBuilder.finalizeAndRelease()); EXPECT_EQ(2U, createdEvent->peekNumEventsBlockingThis()); createdEvent->release(); evParent1.setStatus(CL_COMPLETE); evParent2.setStatus(CL_COMPLETE); EXPECT_EQ(1, evParent1.getRefInternalCount()); EXPECT_EQ(1, evParent2.getRefInternalCount()); } TEST(EventBuilder, whenFinalizeIsCalledWithEmptyEventsListsThenParentAndChildListsAreEmpty) { EventBuilder eventBuilder; eventBuilder.create>(nullptr, CL_COMMAND_MARKER, 0, 0); Event *event = eventBuilder.finalizeAndRelease(); EXPECT_EQ(0U, event->peekNumEventsBlockingThis()); EXPECT_EQ(nullptr, event->peekChildEvents()); event->release(); } TEST(EventBuilder, whenFinalizeIsCalledAndBuildersEventsListAreNotEmptyThenEventsListsAreAddedToEvent) { MockEvent *parentEvent = new MockEvent(); EventBuilder eventBuilder; eventBuilder.create>(nullptr, CL_COMMAND_MARKER, 0, 0); eventBuilder.addParentEvent(*parentEvent); Event *event = eventBuilder.finalizeAndRelease(); EXPECT_EQ(1U, event->peekNumEventsBlockingThis()); ASSERT_NE(nullptr, parentEvent->peekChildEvents()); EXPECT_EQ(event, parentEvent->peekChildEvents()->ref); parentEvent->setStatus(CL_COMPLETE); EXPECT_EQ(0U, event->peekNumEventsBlockingThis()); EXPECT_EQ(nullptr, parentEvent->peekChildEvents()); event->release(); parentEvent->release(); } TEST(EventBuilder, whenFinalizeIsCalledAndParentsListContainsManyEventsFromWhichOnlyFirstOnesAreCompletedThenEventIsNotCompleted) { MockEvent *userEventNotCompleted = new MockEvent(); MockEvent *userEventCompleted = new MockEvent(); userEventCompleted->setStatus(CL_COMPLETE); EventBuilder eventBuilder; eventBuilder.create>(nullptr, CL_COMMAND_MARKER, 0, 0); eventBuilder.addParentEvent(*userEventCompleted); eventBuilder.addParentEvent(*userEventNotCompleted); Event *event = eventBuilder.finalizeAndRelease(); EXPECT_FALSE(event->updateStatusAndCheckCompletion()); EXPECT_EQ(1U, event->peekNumEventsBlockingThis()); ASSERT_EQ(nullptr, userEventCompleted->peekChildEvents()); ASSERT_NE(nullptr, userEventNotCompleted->peekChildEvents()); EXPECT_EQ(event, userEventNotCompleted->peekChildEvents()->ref); userEventNotCompleted->setStatus(CL_COMPLETE); EXPECT_EQ(0U, event->peekNumEventsBlockingThis()); EXPECT_EQ(nullptr, userEventNotCompleted->peekChildEvents()); event->release(); userEventCompleted->release(); userEventNotCompleted->release(); } TEST(EventBuilder, whenAddingNullptrAsNewParentEventThenItIsIgnored) { SmallEventBuilderMock eventBuilder; EXPECT_EQ(0U, eventBuilder.getParentEvents().size()); eventBuilder.addParentEvent(nullptr); EXPECT_EQ(0U, eventBuilder.getParentEvents().size()); } TEST(EventBuilder, whenAddingValidEventAsNewParentEventThenItIsProperlyAddedToParentsList) { auto event = new SmallEventBuilderEventMock; SmallEventBuilderMock eventBuilder; eventBuilder.create>(nullptr, CL_COMMAND_MARKER, 0, 0); EXPECT_EQ(0U, eventBuilder.getParentEvents().size()); eventBuilder.addParentEvent(event); EXPECT_EQ(1U, eventBuilder.getParentEvents().size()); event->release(); eventBuilder.finalize(); eventBuilder.getEvent()->release(); } TEST(EventBuilder, whenAddingMultipleEventsAsNewParentsThenOnlyValidOnesAreInsertedIntoParentsList) { auto event = new SmallEventBuilderEventMock; auto invalidEvent = new SmallEventBuilderEventMock; invalidEvent->overrideMagic(0); cl_event eventsList[] = {nullptr, event, invalidEvent}; SmallEventBuilderMock eventBuilder; eventBuilder.create>(nullptr, CL_COMMAND_MARKER, 0, 0); EXPECT_EQ(0U, eventBuilder.getParentEvents().size()); eventBuilder.addParentEvents(ArrayRef(eventsList)); ASSERT_EQ(1U, eventBuilder.getParentEvents().size()); EXPECT_EQ(event, *eventBuilder.getParentEvents().begin()); invalidEvent->release(); event->release(); eventBuilder.finalize(); eventBuilder.getEvent()->release(); } TEST(EventBuilder, parentListDoesNotHaveDuplicates) { auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext mockContext; MockCommandQueue mockCommandQueue(&mockContext, mockDevice.get(), nullptr); SmallEventBuilderMock *eventBuilder = new SmallEventBuilderMock; eventBuilder->create>(&mockCommandQueue, CL_COMMAND_MARKER, 0, 0); Event *event = eventBuilder->getEvent(); Event *parentEvent = new Event(&mockCommandQueue, CL_COMMAND_NDRANGE_KERNEL, 0, 0); Event *parentEvent2 = new Event(&mockCommandQueue, CL_COMMAND_NDRANGE_KERNEL, 0, 0); Event *parentEvent3 = new Event(&mockCommandQueue, CL_COMMAND_NDRANGE_KERNEL, 0, 0); eventBuilder->addParentEvent(parentEvent); eventBuilder->addParentEvent(parentEvent2); eventBuilder->addParentEvent(parentEvent3); // add duplicate eventBuilder->addParentEvent(parentEvent); auto parents = eventBuilder->getParentEvents(); size_t numberOfParents = parents.size(); EXPECT_EQ(3u, numberOfParents); event->release(); parentEvent->release(); parentEvent2->release(); parentEvent3->release(); eventBuilder->clear(); eventBuilder->clearEvent(); delete eventBuilder; } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/event/event_callbacks_tests.cpp000066400000000000000000000033611363734646600300240ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/event/async_events_handler.h" #include "opencl/source/event/user_event.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "test.h" #include using namespace NEO; struct CallbackData { static void CL_CALLBACK callback(cl_event event, cl_int status, void *userData) { uint32_t *nestLevel = (uint32_t *)userData; if (*nestLevel < 4) { (*nestLevel)++; clSetEventCallback(event, CL_COMPLETE, CallbackData::callback, userData); } } }; TEST(EventCallbackTest, GivenUserEventWhenAddingCallbackThenNestedCallbacksCanBeCreated) { MockEvent event(nullptr); uint32_t nestLevel = 0; event.addCallback(CallbackData::callback, CL_COMPLETE, &nestLevel); event.setStatus(CL_COMPLETE); EXPECT_EQ(4u, nestLevel); } TEST(EventCallbackTest, GivenEventWhenAddingCallbackThenNestedCallbacksCanBeCreated) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext context; MockCommandQueue queue(&context, device.get(), nullptr); MockEvent event(&queue, CL_COMMAND_MARKER, 0, 0); uint32_t nestLevel = 0; event.addCallback(CallbackData::callback, CL_COMPLETE, &nestLevel); event.setStatus(CL_COMPLETE); platform()->getAsyncEventsHandler()->closeThread(); EXPECT_EQ(4u, nestLevel); } compute-runtime-20.13.16352/opencl/test/unit_test/event/event_fixture.h000066400000000000000000000066241363734646600260230ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_stream/command_stream_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; struct EventTest : public DeviceFixture, public CommandQueueFixture, public CommandStreamFixture, public ::testing::Test { using CommandQueueFixture::SetUp; void SetUp() override { DeviceFixture::SetUp(); CommandQueueFixture::SetUp(&mockContext, pClDevice, 0); CommandStreamFixture::SetUp(pCmdQ); } void TearDown() override { CommandStreamFixture::TearDown(); CommandQueueFixture::TearDown(); DeviceFixture::TearDown(); } MockContext mockContext; }; struct InternalsEventTest : public DeviceFixture, public ::testing::Test { InternalsEventTest() { } void SetUp() override { DeviceFixture::SetUp(); mockContext = new MockContext(pClDevice); } void TearDown() override { delete mockContext; DeviceFixture::TearDown(); } MockContext *mockContext = nullptr; }; struct MyUserEvent : public VirtualEvent { bool wait(bool blocking, bool quickKmdSleep) override { return VirtualEvent::wait(blocking, quickKmdSleep); }; uint32_t getTaskLevel() override { return VirtualEvent::getTaskLevel(); }; }; struct MyEvent : public Event { MyEvent(CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount) : Event(cmdQueue, cmdType, taskLevel, taskCount) { } TimeStampData getQueueTimeStamp() { return this->queueTimeStamp; }; TimeStampData getSubmitTimeStamp() { return this->submitTimeStamp; }; uint64_t getStartTimeStamp() { return this->startTimeStamp; }; uint64_t getEndTimeStamp() { return this->endTimeStamp; }; uint64_t getCompleteTimeStamp() { return this->completeTimeStamp; } uint64_t getGlobalStartTimestamp() const { return this->globalStartTimestamp; } bool getDataCalcStatus() const { return this->dataCalculated; } void calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS) override { if (DebugManager.flags.ReturnRawGpuTimestamps.get()) { globalStartTimestamp = globalStartTS; } Event::calculateProfilingDataInternal(contextStartTS, contextEndTS, contextCompleteTS, globalStartTS); } uint64_t globalStartTimestamp; }; class MockEventTests : public HelloWorldTest { public: void TearDown() override { uEvent->setStatus(-1); uEvent.reset(); HelloWorldFixture::TearDown(); } protected: ReleaseableObjectPtr uEvent; }; compute-runtime-20.13.16352/opencl/test/unit_test/event/event_tests.cpp000066400000000000000000001724041363734646600260320ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/event/perf_counter.h" #include "opencl/source/helpers/task_information.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/os_interface/mock_performance_counters.h" #include "test.h" #include "event_fixture.h" #include #include using namespace NEO; TEST(Event, GivenEventWhenCheckingTraitThenEventIsNotCopyable) { EXPECT_FALSE(std::is_move_constructible::value); EXPECT_FALSE(std::is_copy_constructible::value); } TEST(Event, GivenEventWhenCheckingTraitThenEventIsNotAssignable) { EXPECT_FALSE(std::is_move_assignable::value); EXPECT_FALSE(std::is_copy_assignable::value); } TEST(Event, WhenPeekIsCalledThenExecutionIsNotUpdated) { auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext ctx; MockCommandQueue cmdQ(&ctx, mockDevice.get(), 0); Event event(&cmdQ, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::levelNotReady, 0); EXPECT_FALSE(event.peekIsBlocked()); EXPECT_EQ(CL_QUEUED, event.peekExecutionStatus()); event.updateExecutionStatus(); EXPECT_EQ(CL_QUEUED, event.peekExecutionStatus()); } TEST(Event, givenEventThatStatusChangeWhenPeekIsCalledThenEventIsNotUpdated) { auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext ctx; MockCommandQueue cmdQ(&ctx, mockDevice.get(), 0); struct mockEvent : public Event { using Event::Event; void updateExecutionStatus() override { callCount++; } uint32_t callCount = 0u; }; mockEvent event(&cmdQ, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::levelNotReady, 0); EXPECT_EQ(0u, event.callCount); event.peekExecutionStatus(); EXPECT_EQ(0u, event.callCount); event.updateEventAndReturnCurrentStatus(); EXPECT_EQ(1u, event.callCount); event.updateEventAndReturnCurrentStatus(); EXPECT_EQ(2u, event.callCount); } TEST(Event, givenEventWithHigherTaskCountWhenLowerTaskCountIsBeingSetThenTaskCountRemainsUnmodifed) { Event *event = new Event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 4, 10); EXPECT_EQ(10u, event->peekTaskCount()); event->updateTaskCount(8); EXPECT_EQ(10u, event->peekTaskCount()); delete event; } TEST(Event, WhenGettingTaskLevelThenCorrectTaskLevelIsReturned) { class TempEvent : public Event { public: TempEvent() : Event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 5, 7){}; uint32_t getTaskLevel() override { return Event::getTaskLevel(); } }; TempEvent event; // taskLevel and getTaskLevel() should give the same result EXPECT_EQ(5u, event.taskLevel); EXPECT_EQ(5u, event.getTaskLevel()); } TEST(Event, WhenGettingTaskCountThenCorrectValueIsReturned) { Event event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 5, 7); EXPECT_EQ(7u, event.getCompletionStamp()); } TEST(Event, WhenGettingEventInfoThenCqIsReturned) { auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto ctx = std::unique_ptr(new MockContext()); auto cmdQ = std::unique_ptr(new MockCommandQueue(ctx.get(), mockDevice.get(), 0)); Event *event = new Event(cmdQ.get(), CL_COMMAND_NDRANGE_KERNEL, 1, 5); cl_event clEvent = event; cl_command_queue cmdQResult = nullptr; size_t sizeReturned = 0; auto result = clGetEventInfo(clEvent, CL_EVENT_COMMAND_QUEUE, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(cl_command_queue), sizeReturned); result = clGetEventInfo(clEvent, CL_EVENT_COMMAND_QUEUE, sizeof(cmdQResult), &cmdQResult, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(cmdQ.get(), cmdQResult); EXPECT_EQ(sizeReturned, sizeof(cmdQResult)); delete event; } TEST(Event, givenCommandQueueWhenEventIsCreatedWithCommandQueueThenCommandQueueInternalRefCountIsIncremented) { auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext ctx; MockCommandQueue cmdQ(&ctx, mockDevice.get(), 0); auto intitialRefCount = cmdQ.getRefInternalCount(); Event *event = new Event(&cmdQ, CL_COMMAND_NDRANGE_KERNEL, 4, 10); auto newRefCount = cmdQ.getRefInternalCount(); EXPECT_EQ(intitialRefCount + 1, newRefCount); delete event; auto finalRefCount = cmdQ.getRefInternalCount(); EXPECT_EQ(intitialRefCount, finalRefCount); } TEST(Event, givenCommandQueueWhenEventIsCreatedWithoutCommandQueueThenCommandQueueInternalRefCountIsNotModified) { MockContext ctx; MockCommandQueue cmdQ(&ctx, nullptr, 0); auto intitialRefCount = cmdQ.getRefInternalCount(); Event *event = new Event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 4, 10); auto newRefCount = cmdQ.getRefInternalCount(); EXPECT_EQ(intitialRefCount, newRefCount); delete event; auto finalRefCount = cmdQ.getRefInternalCount(); EXPECT_EQ(intitialRefCount, finalRefCount); } TEST(Event, WhenWaitingForEventsThenAllQueuesAreFlushed) { class MockCommandQueueWithFlushCheck : public MockCommandQueue { public: MockCommandQueueWithFlushCheck() = delete; MockCommandQueueWithFlushCheck(MockCommandQueueWithFlushCheck &) = delete; MockCommandQueueWithFlushCheck(Context &context, ClDevice *device) : MockCommandQueue(&context, device, nullptr) { } cl_int flush() override { flushCounter++; return CL_SUCCESS; } uint32_t flushCounter = 0; }; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext context; std::unique_ptr cmdQ1(new MockCommandQueueWithFlushCheck(context, device.get())); std::unique_ptr event1(new Event(cmdQ1.get(), CL_COMMAND_NDRANGE_KERNEL, 4, 10)); std::unique_ptr cmdQ2(new MockCommandQueueWithFlushCheck(context, device.get())); std::unique_ptr event2(new Event(cmdQ2.get(), CL_COMMAND_NDRANGE_KERNEL, 5, 20)); cl_event eventWaitlist[] = {event1.get(), event2.get()}; Event::waitForEvents(2, eventWaitlist); EXPECT_EQ(1u, cmdQ1->flushCounter); EXPECT_EQ(1u, cmdQ2->flushCounter); } TEST(Event, GivenNotReadyEventWhenWaitingForEventsThenQueueIsNotFlushed) { class MockCommandQueueWithFlushCheck : public MockCommandQueue { public: MockCommandQueueWithFlushCheck() = delete; MockCommandQueueWithFlushCheck(MockCommandQueueWithFlushCheck &) = delete; MockCommandQueueWithFlushCheck(Context &context, ClDevice *device) : MockCommandQueue(&context, device, nullptr) { } cl_int flush() override { flushCounter++; return CL_SUCCESS; } uint32_t flushCounter = 0; }; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext context; std::unique_ptr cmdQ1(new MockCommandQueueWithFlushCheck(context, device.get())); std::unique_ptr event1(new Event(cmdQ1.get(), CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::levelNotReady, 0)); cl_event eventWaitlist[] = {event1.get()}; Event::waitForEvents(1, eventWaitlist); EXPECT_EQ(0u, cmdQ1->flushCounter); } TEST(Event, givenNotReadyEventOnWaitlistWhenCheckingUserEventDependeciesThenTrueIsReturned) { auto event1 = std::make_unique(nullptr, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::levelNotReady, 0); cl_event eventWaitlist[] = {event1.get()}; bool userEventDependencies = Event::checkUserEventDependencies(1, eventWaitlist); EXPECT_TRUE(userEventDependencies); } TEST(Event, givenReadyEventsOnWaitlistWhenCheckingUserEventDependeciesThenFalseIsReturned) { auto event1 = std::make_unique(nullptr, CL_COMMAND_NDRANGE_KERNEL, 5, 0); cl_event eventWaitlist[] = {event1.get()}; bool userEventDependencies = Event::checkUserEventDependencies(1, eventWaitlist); EXPECT_FALSE(userEventDependencies); } TEST_F(EventTest, WhenGettingClEventCommandExecutionStatusThenCorrectSizeIsReturned) { Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 1, 5); cl_int eventStatus = -1; size_t sizeReturned = 0; auto result = clGetEventInfo(&event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(eventStatus), &eventStatus, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeReturned, sizeof(eventStatus)); } TEST_F(EventTest, GivenTagCsLessThanTaskCountWhenGettingClEventCommandExecutionStatusThenClSubmittedIsReturned) { uint32_t tagHW = 4; uint32_t taskCount = 5; *pTagMemory = tagHW; Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, taskCount); cl_int eventStatus = -1; size_t sizeReturned = 0; auto result = clGetEventInfo(&event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(eventStatus), &eventStatus, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); // If tagCS < taskCount, we always return submitted (ie. no buffering!) EXPECT_EQ(CL_SUBMITTED, eventStatus); } TEST_F(EventTest, GivenTagCsEqualTaskCountWhenGettingClEventCommandExecutionStatusThenClCompleteIsReturned) { uint32_t tagHW = 5; uint32_t taskCount = 5; *pTagMemory = tagHW; Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, taskCount); cl_int eventStatus = -1; size_t sizeReturned = 0; auto result = clGetEventInfo(&event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(eventStatus), &eventStatus, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); // If tagCS == event.taskCount, the event is completed. EXPECT_EQ(CL_COMPLETE, eventStatus); } TEST_F(EventTest, GivenTagCsGreaterThanTaskCountWhenGettingClEventCommandExecutionStatusThenClCompleteIsReturned) { uint32_t tagHW = 6; uint32_t taskCount = 5; *pTagMemory = tagHW; Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, taskCount); cl_int eventStatus = -1; size_t sizeReturned = 0; auto result = clGetEventInfo(&event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(eventStatus), &eventStatus, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(CL_COMPLETE, eventStatus); } TEST_F(EventTest, WhenGettingClEventCommandExecutionStatusThenEventStatusIsReturned) { Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::levelNotReady, CompletionStamp::levelNotReady); cl_int eventStatus = -1; event.setStatus(-1); auto result = clGetEventInfo(&event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(eventStatus), &eventStatus, 0); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(-1, eventStatus); } TEST_F(EventTest, GivenNewEventWhenGettingClEventReferenceCountThenOneIsReturned) { uint32_t tagEvent = 5; Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, tagEvent); cl_uint refCount = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(&event, CL_EVENT_REFERENCE_COUNT, sizeof(refCount), &refCount, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(refCount), sizeReturned); EXPECT_EQ(1u, refCount); } TEST_F(EventTest, GivenRetainedEventWhenGettingClEventReferenceCountThenTwoIsReturned) { uint32_t tagEvent = 5; Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, tagEvent); event.retain(); cl_uint refCount = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(&event, CL_EVENT_REFERENCE_COUNT, sizeof(refCount), &refCount, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(refCount), sizeReturned); EXPECT_EQ(2u, refCount); event.release(); } TEST_F(EventTest, GivenRetainAndReleaseEventWhenGettingClEventReferenceCountThenOneIsReturned) { uint32_t tagEvent = 5; Event *pEvent = new Event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, tagEvent); ASSERT_NE(nullptr, pEvent); pEvent->retain(); auto retVal = pEvent->getReference(); EXPECT_EQ(2, retVal); cl_uint refCount = 0; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_REFERENCE_COUNT, sizeof(refCount), &refCount, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(refCount), sizeReturned); EXPECT_EQ(2u, refCount); pEvent->release(); retVal = pEvent->getReference(); EXPECT_EQ(1, retVal); delete pEvent; } TEST_F(EventTest, WhenGettingClEventContextThenCorrectValueIsReturned) { uint32_t tagEvent = 5; Event *pEvent = new Event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, tagEvent); ASSERT_NE(nullptr, pEvent); cl_context context; size_t sizeReturned = 0; auto result = clGetEventInfo(pEvent, CL_EVENT_CONTEXT, sizeof(context), &context, &sizeReturned); ASSERT_EQ(CL_SUCCESS, result); EXPECT_EQ(sizeof(context), sizeReturned); cl_context qCtx = (cl_context)&mockContext; EXPECT_EQ(qCtx, context); delete pEvent; } TEST_F(EventTest, GivenInvalidEventWhenGettingEventInfoThenInvalidValueErrorIsReturned) { uint32_t tagEvent = 5; Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, tagEvent); cl_int eventStatus = -1; auto result = clGetEventInfo(&event, -1, sizeof(eventStatus), &eventStatus, nullptr); EXPECT_EQ(CL_INVALID_VALUE, result); } TEST_F(EventTest, GivenNonBlockingEventWhenWaitingThenFalseIsReturned) { Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, CompletionStamp::levelNotReady); auto result = event.wait(false, false); EXPECT_FALSE(result); } struct UpdateEventTest : public ::testing::Test { void SetUp() override { executionEnvironment = platform()->peekExecutionEnvironment(); memoryManager = new MockMemoryManager(*executionEnvironment); hostPtrManager = static_cast(memoryManager->getHostPtrManager()); executionEnvironment->memoryManager.reset(memoryManager); device.reset(new ClDevice{*Device::create(executionEnvironment, 0u), platform()}); context = std::make_unique(device.get()); cl_int retVal = CL_OUT_OF_RESOURCES; commandQueue.reset(CommandQueue::create(context.get(), device.get(), nullptr, false, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); } ExecutionEnvironment *executionEnvironment; MockMemoryManager *memoryManager; MockHostPtrManager *hostPtrManager; std::unique_ptr device; std::unique_ptr context; std::unique_ptr commandQueue; }; TEST_F(UpdateEventTest, givenEventContainingCommandQueueWhenItsStatusIsUpdatedToCompletedThenTemporaryAllocationsAreDeleted) { void *ptr = (void *)0x1000; size_t size = 4096; auto temporary = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), false, size}, ptr); temporary->updateTaskCount(3, commandQueue->getGpgpuCommandStreamReceiver().getOsContext().getContextId()); commandQueue->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()->storeAllocation(std::unique_ptr(temporary), TEMPORARY_ALLOCATION); Event event(commandQueue.get(), CL_COMMAND_NDRANGE_KERNEL, 3, 3); EXPECT_EQ(1u, hostPtrManager->getFragmentCount()); event.updateExecutionStatus(); EXPECT_EQ(0u, hostPtrManager->getFragmentCount()); } class SurfaceMock : public Surface { public: SurfaceMock() { resident = nonResident = 0; }; ~SurfaceMock() override{}; void makeResident(CommandStreamReceiver &csr) override { if (parent) { parent->resident++; } else { resident++; } if (this->graphicsAllocation) { csr.makeResident(*graphicsAllocation); } }; Surface *duplicate() override { return new SurfaceMock(this); }; SurfaceMock *parent = nullptr; std::atomic resident; std::atomic nonResident; GraphicsAllocation *graphicsAllocation = nullptr; protected: SurfaceMock(SurfaceMock *parent) : parent(parent){}; }; TEST_F(InternalsEventTest, GivenSubmitCommandFalseWhenSubmittingCommandsThenRefApiCountAndRefInternalGetHandledCorrectly) { MockCommandQueue cmdQ(mockContext, pClDevice, nullptr); MockEvent event(&cmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); auto cmdStream = new LinearStream(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER})); IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; cmdQ.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 4096u, dsh); cmdQ.allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 4096u, ioh); cmdQ.allocateHeapMemory(IndirectHeap::SURFACE_STATE, 4096u, ssh); auto blockedCommandsData = std::make_unique(cmdStream, *cmdQ.getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); blockedCommandsData->setHeaps(dsh, ioh, ssh); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto pKernel = mockKernelWithInternals.mockKernel; auto &csr = cmdQ.getGpgpuCommandStreamReceiver(); std::vector v; MockBuffer buffer; buffer.retain(); auto initialRefCount = buffer.getRefApiCount(); auto initialInternalCount = buffer.getRefInternalCount(); auto bufferSurf = new MemObjSurface(&buffer); EXPECT_EQ(initialInternalCount + 1, buffer.getRefInternalCount()); EXPECT_EQ(initialRefCount, buffer.getRefApiCount()); PreemptionMode preemptionMode = pDevice->getPreemptionMode(); v.push_back(bufferSurf); auto cmd = new CommandComputeKernel(cmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1); event.setCommand(std::unique_ptr(cmd)); auto taskLevelBefore = csr.peekTaskLevel(); auto refCount = buffer.getRefApiCount(); auto refInternal = buffer.getRefInternalCount(); event.submitCommand(false); EXPECT_EQ(refCount, buffer.getRefApiCount()); EXPECT_EQ(refInternal - 1, buffer.getRefInternalCount()); auto taskLevelAfter = csr.peekTaskLevel(); EXPECT_EQ(taskLevelBefore + 1, taskLevelAfter); auto graphicsAllocation = buffer.getGraphicsAllocation(); EXPECT_FALSE(graphicsAllocation->isResident(csr.getOsContext().getContextId())); } TEST_F(InternalsEventTest, GivenSubmitCommandTrueWhenSubmittingCommandsThenRefApiCountAndRefInternalGetHandledCorrectly) { MockCommandQueue cmdQ(mockContext, pClDevice, nullptr); MockEvent event(&cmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); auto cmdStream = new LinearStream(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER})); IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; cmdQ.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 4096u, dsh); cmdQ.allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 4096u, ioh); cmdQ.allocateHeapMemory(IndirectHeap::SURFACE_STATE, 4096u, ssh); auto blockedCommandsData = std::make_unique(cmdStream, *cmdQ.getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); blockedCommandsData->setHeaps(dsh, ioh, ssh); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto pKernel = mockKernelWithInternals.mockKernel; auto &csr = cmdQ.getGpgpuCommandStreamReceiver(); std::vector v; NullSurface *surface = new NullSurface; v.push_back(surface); PreemptionMode preemptionMode = pDevice->getPreemptionMode(); auto cmd = new CommandComputeKernel(cmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1); event.setCommand(std::unique_ptr(cmd)); auto taskLevelBefore = csr.peekTaskLevel(); event.submitCommand(true); auto taskLevelAfter = csr.peekTaskLevel(); EXPECT_EQ(taskLevelBefore, taskLevelAfter); } TEST_F(InternalsEventTest, givenBlockedKernelWithPrintfWhenSubmittedThenPrintOutput) { MockCommandQueue mockCmdQueue(mockContext, pClDevice, nullptr); testing::internal::CaptureStdout(); MockEvent event(&mockCmdQueue, CL_COMMAND_NDRANGE_KERNEL, 0, 0); auto cmdStream = new LinearStream(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER})); IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; mockCmdQueue.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 4096u, dsh); mockCmdQueue.allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 4096u, ioh); mockCmdQueue.allocateHeapMemory(IndirectHeap::SURFACE_STATE, 4096u, ssh); auto blockedCommandsData = std::make_unique(cmdStream, *mockCmdQueue.getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); blockedCommandsData->setHeaps(dsh, ioh, ssh); SPatchAllocateStatelessPrintfSurface *pPrintfSurface = new SPatchAllocateStatelessPrintfSurface(); pPrintfSurface->DataParamOffset = 0; pPrintfSurface->DataParamSize = 8; std::string testString = "test"; MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto pKernel = mockKernelWithInternals.mockKernel; KernelInfo *kernelInfo = const_cast(&pKernel->getKernelInfo()); kernelInfo->patchInfo.pAllocateStatelessPrintfSurface = pPrintfSurface; kernelInfo->patchInfo.stringDataMap.insert(std::make_pair(0, testString)); uint64_t crossThread[10]; pKernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8); MockMultiDispatchInfo multiDispatchInfo(pKernel); std::unique_ptr printfHandler(PrintfHandler::create(multiDispatchInfo, *pClDevice)); printfHandler.get()->prepareDispatch(multiDispatchInfo); auto surface = printfHandler.get()->getSurface(); auto printfSurface = reinterpret_cast(surface->getUnderlyingBuffer()); printfSurface[0] = 8; printfSurface[1] = 0; std::vector v; PreemptionMode preemptionMode = pDevice->getPreemptionMode(); auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1); event.setCommand(std::unique_ptr(cmd)); event.submitCommand(false); EXPECT_EQ(1u, mockCmdQueue.latestTaskCountWaited); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STREQ("test", output.c_str()); EXPECT_FALSE(surface->isResident(pDevice->getDefaultEngine().osContext->getContextId())); delete pPrintfSurface; } TEST_F(InternalsEventTest, GivenMapOperationWhenSubmittingCommandsThenTaskLevelIsIncremented) { auto pCmdQ = make_releaseable(mockContext, pClDevice, nullptr); MockEvent event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto buffer = new MockBuffer; MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; event.setCommand(std::unique_ptr(new CommandMapUnmap(MAP, *buffer, size, offset, false, *pCmdQ))); auto taskLevelBefore = csr.peekTaskLevel(); event.submitCommand(false); auto taskLevelAfter = csr.peekTaskLevel(); EXPECT_EQ(taskLevelBefore + 1, taskLevelAfter); buffer->decRefInternal(); } TEST_F(InternalsEventTest, GivenMapOperationNonZeroCopyBufferWhenSubmittingCommandsThenTaskLevelIsIncremented) { auto pCmdQ = make_releaseable(mockContext, pClDevice, nullptr); MockEvent event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto buffer = new UnalignedBuffer; MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; event.setCommand(std::unique_ptr(new CommandMapUnmap(MAP, *buffer, size, offset, false, *pCmdQ))); auto taskLevelBefore = csr.peekTaskLevel(); event.submitCommand(false); auto taskLevelAfter = csr.peekTaskLevel(); EXPECT_EQ(taskLevelBefore + 1, taskLevelAfter); buffer->decRefInternal(); } uint32_t commands[] = { CL_COMMAND_NDRANGE_KERNEL, CL_COMMAND_TASK, CL_COMMAND_NATIVE_KERNEL, CL_COMMAND_READ_BUFFER, CL_COMMAND_WRITE_BUFFER, CL_COMMAND_COPY_BUFFER, CL_COMMAND_READ_IMAGE, CL_COMMAND_WRITE_IMAGE, CL_COMMAND_COPY_IMAGE, CL_COMMAND_COPY_IMAGE_TO_BUFFER, CL_COMMAND_COPY_BUFFER_TO_IMAGE, CL_COMMAND_MAP_BUFFER, CL_COMMAND_MAP_IMAGE, CL_COMMAND_UNMAP_MEM_OBJECT, CL_COMMAND_MARKER, CL_COMMAND_ACQUIRE_GL_OBJECTS, CL_COMMAND_RELEASE_GL_OBJECTS, CL_COMMAND_READ_BUFFER_RECT, CL_COMMAND_WRITE_BUFFER_RECT, CL_COMMAND_COPY_BUFFER_RECT, CL_COMMAND_BARRIER, CL_COMMAND_MIGRATE_MEM_OBJECTS, CL_COMMAND_FILL_BUFFER, CL_COMMAND_FILL_IMAGE, CL_COMMAND_SVM_FREE, CL_COMMAND_SVM_MEMCPY, CL_COMMAND_SVM_MEMFILL, CL_COMMAND_SVM_MAP, CL_COMMAND_SVM_UNMAP, }; class InternalsEventProfilingTest : public InternalsEventTest, public ::testing::WithParamInterface { void SetUp() override { InternalsEventTest::SetUp(); } void TearDown() override { InternalsEventTest::TearDown(); } }; TEST_P(InternalsEventProfilingTest, GivenProfilingWhenEventCreatedThenProfilingSet) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; std::unique_ptr pCmdQ(new MockCommandQueue(mockContext, pClDevice, props)); std::unique_ptr> event(new MockEvent(pCmdQ.get(), GetParam(), 0, 0)); EXPECT_TRUE(event.get()->isProfilingEnabled()); } INSTANTIATE_TEST_CASE_P(InternalsEventProfilingTest, InternalsEventProfilingTest, ::testing::ValuesIn(commands)); TEST_F(InternalsEventTest, GivenProfilingWhenUserEventCreatedThenProfilingNotSet) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; std::unique_ptr pCmdQ(new MockCommandQueue(mockContext, pClDevice, props)); std::unique_ptr> event(new MockEvent(pCmdQ.get(), CL_COMMAND_USER, 0, 0)); EXPECT_FALSE(event.get()->isProfilingEnabled()); } TEST_F(InternalsEventTest, GivenProfilingWHENMapOperationTHENTimesSet) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; MockCommandQueue *pCmdQ = new MockCommandQueue(mockContext, pClDevice, props); MockEvent *event = new MockEvent(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); UnalignedBuffer buffer; MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; event->setCommand(std::unique_ptr(new CommandMapUnmap(MAP, buffer, size, offset, false, *pCmdQ))); auto taskLevelBefore = csr.peekTaskLevel(); event->submitCommand(false); uint64_t submitTime = 0ULL; event->getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(uint64_t), &submitTime, 0); EXPECT_NE(0ULL, submitTime); auto taskLevelAfter = csr.peekTaskLevel(); delete event; EXPECT_EQ(taskLevelBefore + 1, taskLevelAfter); delete pCmdQ; } TEST_F(InternalsEventTest, GivenUnMapOperationWhenSubmittingCommandsThenTaskLevelIsIncremented) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; auto pCmdQ = make_releaseable(mockContext, pClDevice, props); MockEvent event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto buffer = new UnalignedBuffer; MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; event.setCommand(std::unique_ptr(new CommandMapUnmap(UNMAP, *buffer, size, offset, false, *pCmdQ))); auto taskLevelBefore = csr.peekTaskLevel(); event.submitCommand(false); auto taskLevelAfter = csr.peekTaskLevel(); EXPECT_EQ(taskLevelBefore + 1, taskLevelAfter); buffer->decRefInternal(); } TEST_F(InternalsEventTest, givenBlockedMapCommandWhenSubmitIsCalledItReleasesMemObjectReference) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; auto pCmdQ = std::make_unique(mockContext, pClDevice, props); MockEvent event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0); auto buffer = new UnalignedBuffer; auto currentBufferRefInternal = buffer->getRefInternalCount(); MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; event.setCommand(std::unique_ptr(new CommandMapUnmap(UNMAP, *buffer, size, offset, false, *pCmdQ))); EXPECT_EQ(currentBufferRefInternal + 1, buffer->getRefInternalCount()); event.submitCommand(false); EXPECT_EQ(currentBufferRefInternal, buffer->getRefInternalCount()); buffer->decRefInternal(); } TEST_F(InternalsEventTest, GivenUnMapOperationNonZeroCopyBufferWhenSubmittingCommandsThenTaskLevelIsIncremented) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; auto pCmdQ = std::make_unique(mockContext, pClDevice, props); MockEvent event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto buffer = new UnalignedBuffer; MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; event.setCommand(std::unique_ptr(new CommandMapUnmap(UNMAP, *buffer, size, offset, false, *pCmdQ))); auto taskLevelBefore = csr.peekTaskLevel(); event.submitCommand(false); auto taskLevelAfter = csr.peekTaskLevel(); EXPECT_EQ(taskLevelBefore + 1, taskLevelAfter); buffer->decRefInternal(); } HWTEST_F(InternalsEventTest, givenCpuProfilingPathWhenEnqueuedMarkerThenDontUseTimeStampNode) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; MockCommandQueue *pCmdQ = new MockCommandQueue(mockContext, pClDevice, props); MockEvent *event = new MockEvent(pCmdQ, CL_COMMAND_MARKER, 0, 0); event->setCPUProfilingPath(true); event->setCommand(std::unique_ptr(new CommandWithoutKernel(*pCmdQ))); event->submitCommand(false); uint64_t submit, start, end; event->getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(uint64_t), &submit, 0); event->getEventProfilingInfo(CL_PROFILING_COMMAND_START, sizeof(uint64_t), &start, 0); event->getEventProfilingInfo(CL_PROFILING_COMMAND_END, sizeof(uint64_t), &end, 0); EXPECT_LT(0u, submit); EXPECT_LT(submit, start); EXPECT_LT(start, end); delete event; delete pCmdQ; } struct InternalsEventWithPerfCountersTest : public InternalsEventTest, public PerformanceCountersFixture { void SetUp() override { PerformanceCountersFixture::SetUp(); InternalsEventTest::SetUp(); createPerfCounters(); pDevice->setPerfCounters(performanceCountersBase.get()); } void TearDown() override { performanceCountersBase.release(); InternalsEventTest::TearDown(); PerformanceCountersFixture::TearDown(); } }; HWTEST_F(InternalsEventWithPerfCountersTest, givenCpuProfilingPerfCountersPathWhenEnqueuedMarkerThenDontUseTimeStampNodePerfCounterNode) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; MockCommandQueue *pCmdQ = new MockCommandQueue(mockContext, pClDevice, props); bool ret = false; ret = pCmdQ->setPerfCountersEnabled(); EXPECT_TRUE(ret); MockEvent *event = new MockEvent(pCmdQ, CL_COMMAND_MARKER, 0, 0); event->setCPUProfilingPath(true); event->setCommand(std::unique_ptr(new CommandWithoutKernel(*pCmdQ))); event->submitCommand(false); uint64_t submit, start, end; event->getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(uint64_t), &submit, 0); event->getEventProfilingInfo(CL_PROFILING_COMMAND_START, sizeof(uint64_t), &start, 0); event->getEventProfilingInfo(CL_PROFILING_COMMAND_END, sizeof(uint64_t), &end, 0); EXPECT_LT(0u, submit); EXPECT_LT(submit, start); EXPECT_LT(start, end); delete event; delete pCmdQ; } HWTEST_F(InternalsEventWithPerfCountersTest, givenCpuProfilingPerfCountersPathWhenEnqueuedMarkerThenUseTimeStampNodePerfCounterNode) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; MockCommandQueue *pCmdQ = new MockCommandQueue(mockContext, pClDevice, props); pCmdQ->setPerfCountersEnabled(); MockEvent *event = new MockEvent(pCmdQ, CL_COMMAND_MARKER, 0, 0); event->setCPUProfilingPath(true); HwPerfCounter *perfCounter = event->getHwPerfCounterNode()->tagForCpuAccess; ASSERT_NE(nullptr, perfCounter); HwTimeStamps *timeStamps = event->getHwTimeStampNode()->tagForCpuAccess; ASSERT_NE(nullptr, timeStamps); event->setCommand(std::unique_ptr(new CommandWithoutKernel(*pCmdQ))); event->submitCommand(false); uint64_t submit, start, end; event->getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(uint64_t), &submit, 0); event->getEventProfilingInfo(CL_PROFILING_COMMAND_START, sizeof(uint64_t), &start, 0); event->getEventProfilingInfo(CL_PROFILING_COMMAND_END, sizeof(uint64_t), &end, 0); EXPECT_LT(0u, submit); EXPECT_LT(submit, start); EXPECT_LT(start, end); delete event; delete pCmdQ; } TEST_F(InternalsEventWithPerfCountersTest, GivenPerfCountersEnabledWhenEventIsCreatedThenProfilingEnabledAndPerfCountersEnabledAreTrue) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; MockCommandQueue *pCmdQ = new MockCommandQueue(mockContext, pClDevice, props); pCmdQ->setPerfCountersEnabled(); Event *ev = new Event(pCmdQ, CL_COMMAND_COPY_BUFFER, 3, 0); EXPECT_TRUE(ev->isProfilingEnabled()); EXPECT_TRUE(ev->isPerfCountersEnabled()); delete ev; delete pCmdQ; } TEST(Event, WhenReleasingEventThenEventIsNull) { UserEvent *ue = new UserEvent(); auto autoptr = ue->release(); ASSERT_TRUE(autoptr.isUnused()); } HWTEST_F(EventTest, givenVirtualEventWhenCommandSubmittedThenLockCsrOccurs) { class MockCommandComputeKernel : public CommandComputeKernel { public: using CommandComputeKernel::eventsWaitlist; MockCommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr &kernelOperation, std::vector &surfaces, Kernel *kernel) : CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0) {} }; class MockEvent : public Event { public: using Event::submitCommand; MockEvent(CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount) : Event(cmdQueue, cmdType, taskLevel, taskCount) {} }; MockKernelWithInternals kernel(*pClDevice); IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr; pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 1, ih1); pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 1, ih2); pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, 1, ih3); auto cmdStream = new LinearStream(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER})); std::vector surfaces; auto kernelOperation = std::make_unique(cmdStream, *pDevice->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage()); kernelOperation->setHeaps(ih1, ih2, ih3); std::unique_ptr command = std::make_unique(*pCmdQ, kernelOperation, surfaces, kernel); auto virtualEvent = make_releaseable(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::levelNotReady, CompletionStamp::levelNotReady); virtualEvent->setCommand(std::move(command)); virtualEvent->submitCommand(false); EXPECT_EQ(pDevice->getUltCommandStreamReceiver().recursiveLockCounter, 2u); } HWTEST_F(EventTest, givenVirtualEventWhenSubmitCommandEventNotReadyAndEventWithoutCommandThenOneLockCsrNeeded) { class MockEvent : public Event { public: using Event::submitCommand; MockEvent(CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount) : Event(cmdQueue, cmdType, taskLevel, taskCount) {} }; auto virtualEvent = make_releaseable(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::levelNotReady, CompletionStamp::levelNotReady); virtualEvent->submitCommand(false); EXPECT_EQ(pDevice->getUltCommandStreamReceiver().recursiveLockCounter, 1u); } HWTEST_F(InternalsEventTest, GivenBufferWithoutZeroCopyOnCommandMapOrUnmapFlushesPreviousTasksBeforeMappingOrUnmapping) { struct MockNonZeroCopyBuff : UnalignedBuffer { MockNonZeroCopyBuff(int32_t &executionStamp) : executionStamp(executionStamp), dataTransferedStamp(-1) { hostPtr = &dataTransferedStamp; memoryStorage = &executionStamp; size = sizeof(executionStamp); hostPtrMinSize = size; } void setIsZeroCopy() { isZeroCopy = false; } void swapCopyDirection() { std::swap(hostPtr, memoryStorage); } int32_t &executionStamp; int32_t dataTransferedStamp; }; int32_t executionStamp = 0; auto csr = new MockCsr(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(csr); const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; auto pCmdQ = make_releaseable(mockContext, pClDevice, props); MockNonZeroCopyBuff buffer(executionStamp); MemObjSizeArray size = {{4, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; auto commandMap = std::unique_ptr(new CommandMapUnmap(MAP, buffer, size, offset, false, *pCmdQ)); EXPECT_EQ(0, executionStamp); EXPECT_EQ(-1, csr->flushTaskStamp); EXPECT_EQ(-1, buffer.dataTransferedStamp); auto latestSentFlushTaskCount = csr->peekLatestSentTaskCount(); commandMap->submit(0, false); EXPECT_EQ(1, executionStamp); EXPECT_EQ(0, csr->flushTaskStamp); EXPECT_EQ(1, buffer.dataTransferedStamp); auto latestSentFlushTaskCountAfterSubmit = csr->peekLatestSentTaskCount(); EXPECT_GT(latestSentFlushTaskCountAfterSubmit, latestSentFlushTaskCount); executionStamp = 0; csr->flushTaskStamp = -1; buffer.dataTransferedStamp = -1; buffer.swapCopyDirection(); auto commandUnMap = std::unique_ptr(new CommandMapUnmap(UNMAP, buffer, size, offset, false, *pCmdQ)); EXPECT_EQ(0, executionStamp); EXPECT_EQ(-1, csr->flushTaskStamp); EXPECT_EQ(-1, buffer.dataTransferedStamp); commandUnMap->submit(0, false); EXPECT_EQ(1, executionStamp); EXPECT_EQ(0, csr->flushTaskStamp); EXPECT_EQ(1, buffer.dataTransferedStamp); EXPECT_EQ(nullptr, commandUnMap->getCommandStream()); } TEST(EventCallback, WhenOverridingStatusThenEventUsesNewStatus) { struct ClbFuncTempStruct { static void CL_CALLBACK ClbFuncT(cl_event e, cl_int status, void *retStatus) { *((cl_int *)retStatus) = status; } }; cl_int retStatus = 7; Event::Callback clb(nullptr, ClbFuncTempStruct::ClbFuncT, CL_COMPLETE, &retStatus); EXPECT_EQ(CL_COMPLETE, clb.getCallbackExecutionStatusTarget()); clb.execute(); EXPECT_EQ(CL_COMPLETE, retStatus); retStatus = 7; clb.overrideCallbackExecutionStatusTarget(-1); EXPECT_EQ(-1, clb.getCallbackExecutionStatusTarget()); clb.execute(); EXPECT_EQ(-1, retStatus); } TEST_F(EventTest, WhenSettingTimeStampThenCorrectValuesAreSet) { MyEvent ev(this->pCmdQ, CL_COMMAND_COPY_BUFFER, 3, 0); TimeStampData inTimeStamp = {1ULL, 2ULL}; ev.setSubmitTimeStamp(&inTimeStamp); TimeStampData outtimeStamp = {0, 0}; outtimeStamp = ev.getSubmitTimeStamp(); EXPECT_EQ(1ULL, outtimeStamp.GPUTimeStamp); EXPECT_EQ(2ULL, outtimeStamp.CPUTimeinNS); inTimeStamp.GPUTimeStamp = 3; inTimeStamp.CPUTimeinNS = 4; ev.setQueueTimeStamp(&inTimeStamp); outtimeStamp = ev.getQueueTimeStamp(); EXPECT_EQ(3ULL, outtimeStamp.GPUTimeStamp); EXPECT_EQ(4ULL, outtimeStamp.CPUTimeinNS); } TEST_F(EventTest, WhenSettingCpuTimeStampThenCorrectTimeIsSet) { MyEvent ev(this->pCmdQ, CL_COMMAND_COPY_BUFFER, 3, 0); ev.setProfilingEnabled(true); ev.setQueueTimeStamp(); TimeStampData outtimeStamp = {0, 0}; outtimeStamp = ev.getQueueTimeStamp(); EXPECT_NE(0ULL, outtimeStamp.CPUTimeinNS); EXPECT_EQ(0ULL, outtimeStamp.GPUTimeStamp); ev.setSubmitTimeStamp(); outtimeStamp = ev.getSubmitTimeStamp(); EXPECT_NE(0ULL, outtimeStamp.CPUTimeinNS); EXPECT_EQ(0ULL, outtimeStamp.GPUTimeStamp); ev.setStartTimeStamp(); uint64_t outCPUtimeStamp = ev.getStartTimeStamp(); EXPECT_NE(0ULL, outCPUtimeStamp); ev.setEndTimeStamp(); outCPUtimeStamp = ev.getEndTimeStamp(); EXPECT_NE(0ULL, outCPUtimeStamp); outCPUtimeStamp = ev.getCompleteTimeStamp(); EXPECT_NE(0ULL, outCPUtimeStamp); } TEST_F(EventTest, GivenNoQueueWhenSettingCpuTimeStampThenTimesIsNotSet) { MyEvent ev(nullptr, CL_COMMAND_COPY_BUFFER, 3, 0); ev.setQueueTimeStamp(); TimeStampData outtimeStamp = {0, 0}; outtimeStamp = ev.getQueueTimeStamp(); EXPECT_EQ(0ULL, outtimeStamp.CPUTimeinNS); EXPECT_EQ(0ULL, outtimeStamp.GPUTimeStamp); ev.setSubmitTimeStamp(); outtimeStamp = ev.getSubmitTimeStamp(); EXPECT_EQ(0ULL, outtimeStamp.CPUTimeinNS); EXPECT_EQ(0ULL, outtimeStamp.GPUTimeStamp); ev.setStartTimeStamp(); uint64_t outCPUtimeStamp = ev.getStartTimeStamp(); EXPECT_EQ(0ULL, outCPUtimeStamp); ev.setEndTimeStamp(); outCPUtimeStamp = ev.getEndTimeStamp(); EXPECT_EQ(0ULL, outCPUtimeStamp); outCPUtimeStamp = ev.getCompleteTimeStamp(); EXPECT_EQ(0ULL, outCPUtimeStamp); } TEST_F(EventTest, WhenGettingHwTimeStampsThenValidPointerIsReturned) { std::unique_ptr event(new Event(this->pCmdQ, CL_COMMAND_COPY_BUFFER, 0, 0)); ASSERT_NE(nullptr, event); HwTimeStamps *timeStamps = event->getHwTimeStampNode()->tagForCpuAccess; ASSERT_NE(nullptr, timeStamps); //this should not cause any heap corruptions ASSERT_EQ(0ULL, timeStamps->GlobalStartTS); ASSERT_EQ(0ULL, timeStamps->ContextStartTS); ASSERT_EQ(0ULL, timeStamps->GlobalEndTS); ASSERT_EQ(0ULL, timeStamps->ContextEndTS); ASSERT_EQ(0ULL, timeStamps->GlobalCompleteTS); ASSERT_EQ(0ULL, timeStamps->ContextCompleteTS); EXPECT_TRUE(timeStamps->isCompleted()); HwTimeStamps *timeStamps2 = event->getHwTimeStampNode()->tagForCpuAccess; ASSERT_EQ(timeStamps, timeStamps2); } TEST_F(EventTest, WhenGetHwTimeStampsAllocationThenValidPointerIsReturned) { std::unique_ptr event(new Event(this->pCmdQ, CL_COMMAND_COPY_BUFFER, 0, 0)); ASSERT_NE(nullptr, event); GraphicsAllocation *allocation = event->getHwTimeStampNode()->getBaseGraphicsAllocation(); ASSERT_NE(nullptr, allocation); void *memoryStorage = allocation->getUnderlyingBuffer(); size_t memoryStorageSize = allocation->getUnderlyingBufferSize(); EXPECT_NE(nullptr, memoryStorage); EXPECT_GT(memoryStorageSize, 0u); } TEST_F(EventTest, WhenEventIsCreatedThenHwTimeStampsMemoryIsPlacedInGraphicsAllocation) { std::unique_ptr event(new Event(this->pCmdQ, CL_COMMAND_COPY_BUFFER, 0, 0)); ASSERT_NE(nullptr, event); HwTimeStamps *timeStamps = event->getHwTimeStampNode()->tagForCpuAccess; ASSERT_NE(nullptr, timeStamps); GraphicsAllocation *allocation = event->getHwTimeStampNode()->getBaseGraphicsAllocation(); ASSERT_NE(nullptr, allocation); void *memoryStorage = allocation->getUnderlyingBuffer(); size_t graphicsAllocationSize = allocation->getUnderlyingBufferSize(); EXPECT_GE(timeStamps, memoryStorage); EXPECT_LE(timeStamps + 1, ptrOffset(memoryStorage, graphicsAllocationSize)); } TEST_F(EventTest, GivenNullQueueWhenEventIsCreatedThenProfilingAndPerfCountersAreDisabled) { Event ev(nullptr, CL_COMMAND_COPY_BUFFER, 3, 0); EXPECT_FALSE(ev.isProfilingEnabled()); EXPECT_FALSE(ev.isPerfCountersEnabled()); } TEST_F(EventTest, GivenProfilingDisabledWhenEventIsCreatedThenPerfCountersAreDisabled) { Event ev(pCmdQ, CL_COMMAND_COPY_BUFFER, 3, 0); EXPECT_FALSE(ev.isProfilingEnabled()); EXPECT_FALSE(ev.isPerfCountersEnabled()); } TEST_F(InternalsEventTest, GivenOnlyProfilingEnabledWhenEventIsCreatedThenPerfCountersAreDisabled) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; MockCommandQueue *pCmdQ = new MockCommandQueue(mockContext, pClDevice, props); Event *ev = new Event(pCmdQ, CL_COMMAND_COPY_BUFFER, 3, 0); EXPECT_TRUE(ev->isProfilingEnabled()); EXPECT_FALSE(ev->isPerfCountersEnabled()); delete ev; delete pCmdQ; } TEST_F(EventTest, GivenClSubmittedWhenpeekIsSubmittedThenTrueIsReturned) { Event ev(this->pCmdQ, CL_COMMAND_COPY_BUFFER, 3, 0); int32_t executionStatusSnapshot = CL_SUBMITTED; bool executionStatus = ev.peekIsSubmitted(executionStatusSnapshot); EXPECT_EQ(true, executionStatus); } TEST_F(EventTest, GivenCompletedEventWhenQueryingExecutionStatusAfterFlushThenCsrIsNotFlushed) { cl_int ret; Event ev(this->pCmdQ, CL_COMMAND_COPY_BUFFER, 3, 3); auto &csr = this->pCmdQ->getGpgpuCommandStreamReceiver(); *csr.getTagAddress() = 3; auto previousTaskLevel = csr.peekTaskLevel(); EXPECT_GT(3u, previousTaskLevel); ret = clFlush(this->pCmdQ); ASSERT_EQ(CL_SUCCESS, ret); cl_int execState; ret = clGetEventInfo(&ev, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(execState), &execState, nullptr); ASSERT_EQ(CL_SUCCESS, ret); EXPECT_EQ(previousTaskLevel, csr.peekTaskLevel()); } HWTEST_F(EventTest, GivenEventCreatedOnMapBufferWithoutCommandWhenSubmittingCommandThenTaskCountIsNotUpdated) { MockEvent ev(this->pCmdQ, CL_COMMAND_MAP_BUFFER, CompletionStamp::levelNotReady, CompletionStamp::levelNotReady); EXPECT_EQ(CompletionStamp::levelNotReady, ev.peekTaskCount()); ev.submitCommand(false); EXPECT_EQ(0u, ev.peekTaskCount()); } HWTEST_F(EventTest, GivenEventCreatedOnMapImageWithoutCommandWhenSubmittingCommandThenTaskCountIsNotUpdated) { MockEvent ev(this->pCmdQ, CL_COMMAND_MAP_IMAGE, CompletionStamp::levelNotReady, CompletionStamp::levelNotReady); EXPECT_EQ(CompletionStamp::levelNotReady, ev.peekTaskCount()); ev.submitCommand(false); EXPECT_EQ(0u, ev.peekTaskCount()); } TEST_F(EventTest, givenCmdQueueWithoutProfilingWhenIsCpuProfilingIsCalledThenFalseIsReturned) { MockEvent ev(this->pCmdQ, CL_COMMAND_MAP_IMAGE, CompletionStamp::levelNotReady, CompletionStamp::levelNotReady); bool cpuProfiling = ev.isCPUProfilingPath() != 0; EXPECT_FALSE(cpuProfiling); } TEST_F(EventTest, givenOutEventWhenBlockingEnqueueHandledOnCpuThenUpdateTaskCountAndFlushStampFromCmdQ) { std::unique_ptr image(ImageHelper::create(&mockContext)); EXPECT_TRUE(image->mappingOnCpuAllowed()); pCmdQ->flushStamp->setStamp(10); pCmdQ->taskCount = 11; size_t origin[3] = {0, 0, 0}; size_t region[3] = {1, 1, 1}; cl_int retVal; cl_event clEvent; pCmdQ->enqueueMapImage(image.get(), CL_TRUE, CL_MAP_READ, origin, region, nullptr, nullptr, 0, nullptr, &clEvent, retVal); auto eventObj = castToObject(clEvent); EXPECT_EQ(pCmdQ->taskCount, eventObj->peekTaskCount()); EXPECT_EQ(pCmdQ->flushStamp->peekStamp(), eventObj->flushStamp->peekStamp()); eventObj->release(); } TEST_F(EventTest, givenCmdQueueWithProfilingWhenIsCpuProfilingIsCalledThenTrueIsReturned) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; std::unique_ptr pCmdQ(new MockCommandQueue(&mockContext, pClDevice, props)); MockEvent ev(pCmdQ.get(), CL_COMMAND_MAP_IMAGE, CompletionStamp::levelNotReady, CompletionStamp::levelNotReady); bool cpuProfiling = ev.isCPUProfilingPath() != 0; EXPECT_TRUE(cpuProfiling); } TEST(EventCallback, GivenEventWithCallbacksOnPeekHasCallbacksReturnsTrue) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); struct ClbFuncTempStruct { static void CL_CALLBACK ClbFuncT(cl_event, cl_int, void *) { } }; struct SmallMockEvent : Event { SmallMockEvent() : Event(nullptr, CL_COMMAND_COPY_BUFFER, 0, 0) { this->parentCount = 1; // block event } }; { SmallMockEvent ev; EXPECT_FALSE(ev.peekHasCallbacks()); } { SmallMockEvent ev; ev.addCallback(ClbFuncTempStruct::ClbFuncT, CL_SUBMITTED, nullptr); EXPECT_TRUE(ev.peekHasCallbacks()); ev.decRefInternal(); } { SmallMockEvent ev; ev.addCallback(ClbFuncTempStruct::ClbFuncT, CL_RUNNING, nullptr); EXPECT_TRUE(ev.peekHasCallbacks()); ev.decRefInternal(); } { SmallMockEvent ev; ev.addCallback(ClbFuncTempStruct::ClbFuncT, CL_COMPLETE, nullptr); EXPECT_TRUE(ev.peekHasCallbacks()); ev.decRefInternal(); } { SmallMockEvent ev; ev.addCallback(ClbFuncTempStruct::ClbFuncT, CL_SUBMITTED, nullptr); ev.addCallback(ClbFuncTempStruct::ClbFuncT, CL_COMPLETE, nullptr); EXPECT_TRUE(ev.peekHasCallbacks()); ev.decRefInternal(); ev.decRefInternal(); } { SmallMockEvent ev; ev.addCallback(ClbFuncTempStruct::ClbFuncT, CL_RUNNING, nullptr); ev.addCallback(ClbFuncTempStruct::ClbFuncT, CL_COMPLETE, nullptr); EXPECT_TRUE(ev.peekHasCallbacks()); ev.decRefInternal(); ev.decRefInternal(); } { SmallMockEvent ev; ev.addCallback(ClbFuncTempStruct::ClbFuncT, CL_SUBMITTED, nullptr); ev.addCallback(ClbFuncTempStruct::ClbFuncT, CL_RUNNING, nullptr); ev.addCallback(ClbFuncTempStruct::ClbFuncT, CL_COMPLETE, nullptr); EXPECT_TRUE(ev.peekHasCallbacks()); ev.decRefInternal(); ev.decRefInternal(); ev.decRefInternal(); } } TEST_F(EventTest, GivenNotCompletedEventWhenAddingChildThenNumEventsBlockingThisIsGreaterThanZero) { VirtualEvent virtualEvent(pCmdQ, &mockContext); { Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); event.addChild(virtualEvent); EXPECT_NE(0U, virtualEvent.peekNumEventsBlockingThis()); } } TEST(Event, whenCreatingRegularEventsThenExternalSynchronizationIsNotRequired) { Event *event = new Event(nullptr, 0, 0, 0); EXPECT_FALSE(event->isExternallySynchronized()); event->release(); UserEvent *userEvent = new UserEvent(); EXPECT_FALSE(userEvent->isExternallySynchronized()); userEvent->release(); VirtualEvent *virtualEvent = new VirtualEvent(); EXPECT_FALSE(virtualEvent->isExternallySynchronized()); virtualEvent->release(); } HWTEST_F(EventTest, givenEventWithNotReadyTaskLevelWhenUnblockedThenGetTaskLevelFromCsrIfGreaterThanParent) { uint32_t initialTaskLevel = 10; Event parentEventWithGreaterTaskLevel(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, initialTaskLevel + 5, 0); Event parentEventWithLowerTaskLevel(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, initialTaskLevel - 5, 0); Event childEvent0(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::levelNotReady, CompletionStamp::levelNotReady); Event childEvent1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::levelNotReady, CompletionStamp::levelNotReady); auto &csr = reinterpret_cast &>(pCmdQ->getGpgpuCommandStreamReceiver()); csr.taskLevel = initialTaskLevel; parentEventWithGreaterTaskLevel.addChild(childEvent0); parentEventWithLowerTaskLevel.addChild(childEvent1); parentEventWithGreaterTaskLevel.setStatus(CL_COMPLETE); parentEventWithLowerTaskLevel.setStatus(CL_COMPLETE); EXPECT_EQ(parentEventWithGreaterTaskLevel.getTaskLevel() + 1, childEvent0.getTaskLevel()); EXPECT_EQ(csr.taskLevel, childEvent1.getTaskLevel()); } TEST_F(EventTest, GivenCompletedEventWhenAddingChildThenNumEventsBlockingThisIsZero) { VirtualEvent virtualEvent(pCmdQ, &mockContext); { Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); event.setStatus(CL_COMPLETE); event.addChild(virtualEvent); EXPECT_EQ(0U, virtualEvent.peekNumEventsBlockingThis()); } } HWTEST_F(EventTest, givenQuickKmdSleepRequestWhenWaitIsCalledThenPassRequestToWaitingFunction) { struct MyCsr : public UltCommandStreamReceiver { MyCsr(const ExecutionEnvironment &executionEnvironment) : UltCommandStreamReceiver(const_cast(executionEnvironment), 0) {} MOCK_METHOD3(waitForCompletionWithTimeout, bool(bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait)); }; HardwareInfo localHwInfo = pDevice->getHardwareInfo(); localHwInfo.capabilityTable.kmdNotifyProperties.enableKmdNotify = true; localHwInfo.capabilityTable.kmdNotifyProperties.enableQuickKmdSleep = true; localHwInfo.capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds = 1; localHwInfo.capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds = 2; pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->setHwInfo(&localHwInfo); auto csr = new ::testing::NiceMock(*pDevice->executionEnvironment); pDevice->resetCommandStreamReceiver(csr); Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); event.updateCompletionStamp(1u, 1u, 1u); EXPECT_CALL(*csr, waitForCompletionWithTimeout(::testing::_, localHwInfo.capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds, ::testing::_)) .Times(1) .WillOnce(::testing::Return(true)); event.wait(true, true); } HWTEST_F(EventTest, givenNonQuickKmdSleepRequestWhenWaitIsCalledThenPassRequestToWaitingFunction) { struct MyCsr : public UltCommandStreamReceiver { MyCsr(const ExecutionEnvironment &executionEnvironment) : UltCommandStreamReceiver(const_cast(executionEnvironment), 0) {} MOCK_METHOD3(waitForCompletionWithTimeout, bool(bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait)); }; HardwareInfo localHwInfo = pDevice->getHardwareInfo(); localHwInfo.capabilityTable.kmdNotifyProperties.enableKmdNotify = true; localHwInfo.capabilityTable.kmdNotifyProperties.enableQuickKmdSleep = true; localHwInfo.capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits = false; localHwInfo.capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds = 1; localHwInfo.capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds = 2; pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->setHwInfo(&localHwInfo); auto csr = new ::testing::NiceMock(*pDevice->executionEnvironment); pDevice->resetCommandStreamReceiver(csr); Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); event.updateCompletionStamp(1u, 1u, 1u); EXPECT_CALL(*csr, waitForCompletionWithTimeout(::testing::_, localHwInfo.capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds, ::testing::_)) .Times(1) .WillOnce(::testing::Return(true)); event.wait(true, false); } HWTEST_F(InternalsEventTest, givenCommandWhenSubmitCalledThenUpdateFlushStamp) { auto pCmdQ = std::unique_ptr(new MockCommandQueue(mockContext, pClDevice, 0)); MockEvent *event = new MockEvent(pCmdQ.get(), CL_COMMAND_MARKER, 0, 0); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.flushStamp->setStamp(5); FlushStamp expectedFlushStamp = 0; EXPECT_EQ(expectedFlushStamp, event->flushStamp->peekStamp()); event->setCommand(std::unique_ptr(new CommandWithoutKernel(*pCmdQ))); event->submitCommand(false); EXPECT_EQ(csr.flushStamp->peekStamp(), event->flushStamp->peekStamp()); delete event; } HWTEST_F(InternalsEventTest, givenAbortedCommandWhenSubmitCalledThenDontUpdateFlushStamp) { auto pCmdQ = std::unique_ptr(new MockCommandQueue(mockContext, pClDevice, 0)); MockEvent *event = new MockEvent(pCmdQ.get(), CL_COMMAND_MARKER, 0, 0); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.flushStamp->setStamp(5); MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto pKernel = mockKernelWithInternals.mockKernel; auto cmdStream = new LinearStream(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER})); IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 4096u, dsh); pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 4096u, ioh); pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, 4096u, ssh); auto blockedCommandsData = std::make_unique(cmdStream, *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); blockedCommandsData->setHeaps(dsh, ioh, ssh); PreemptionMode preemptionMode = pDevice->getPreemptionMode(); std::vector v; auto cmd = new CommandComputeKernel(*pCmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1); event->setCommand(std::unique_ptr(cmd)); FlushStamp expectedFlushStamp = 0; EXPECT_EQ(expectedFlushStamp, event->flushStamp->peekStamp()); event->submitCommand(true); EXPECT_EQ(expectedFlushStamp, event->flushStamp->peekStamp()); delete event; } TEST(EventLockerTests, givenEventWhenEventLockerIsUsedThenOwnershipIsAutomaticallyReleased) { Event ev(nullptr, CL_COMMAND_COPY_BUFFER, 3, 0); { TakeOwnershipWrapper locker(ev); EXPECT_TRUE(ev.hasOwnership()); } EXPECT_FALSE(ev.hasOwnership()); } TEST(EventLockerTests, givenEventWhenEventLockerIsUsedAndUnlockedThenOwnershipIsReleased) { Event ev(nullptr, CL_COMMAND_COPY_BUFFER, 3, 0); { TakeOwnershipWrapper locker(ev); locker.unlock(); EXPECT_FALSE(ev.hasOwnership()); } EXPECT_FALSE(ev.hasOwnership()); } TEST(EventLockerTests, givenEventWhenEventLockerIsUsedAndlockedThenOwnershipIsAcquiredAgain) { Event ev(nullptr, CL_COMMAND_COPY_BUFFER, 3, 0); { TakeOwnershipWrapper locker(ev); locker.unlock(); locker.lock(); EXPECT_TRUE(ev.hasOwnership()); } EXPECT_FALSE(ev.hasOwnership()); } TEST(EventLockerTests, givenEventWhenEventLockerIsLockedTwiceThenOwnershipIsReleaseAfterLeavingTheScope) { Event ev(nullptr, CL_COMMAND_COPY_BUFFER, 3, 0); { TakeOwnershipWrapper locker(ev); locker.lock(); EXPECT_TRUE(ev.hasOwnership()); } EXPECT_FALSE(ev.hasOwnership()); } TEST(EventsDebug, givenEventWhenTrackingOfParentsIsOnThenTrackParents) { DebugManagerStateRestore stateRestore; DebugManager.flags.TrackParentEvents.set(true); Event event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0); Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0); auto &parentEvents = event.getParentEvents(); auto &parentEvents2 = event2.getParentEvents(); EXPECT_EQ(0u, parentEvents.size()); EXPECT_EQ(0u, parentEvents2.size()); event.addChild(event2); EXPECT_EQ(0u, parentEvents.size()); EXPECT_EQ(1u, parentEvents2.size()); EXPECT_EQ(&event, parentEvents2.at(0)); event.setStatus(CL_COMPLETE); } TEST(EventsDebug, givenEventWhenTrackingOfParentsIsOffThenDoNotTrackParents) { DebugManagerStateRestore stateRestore; DebugManager.flags.TrackParentEvents.set(false); Event event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0); Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0); auto &parentEvents = event.getParentEvents(); auto &parentEvents2 = event2.getParentEvents(); EXPECT_EQ(0u, parentEvents.size()); EXPECT_EQ(0u, parentEvents2.size()); event.addChild(event2); EXPECT_EQ(0u, parentEvents.size()); EXPECT_EQ(0u, parentEvents2.size()); event.setStatus(CL_COMPLETE); } compute-runtime-20.13.16352/opencl/test/unit_test/event/event_tests_mt.cpp000066400000000000000000000017571363734646600265340ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "event_fixture.h" #include class SmallMockEvent : public Event { public: SmallMockEvent(Context *ctx) : Event(ctx, nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0) {} void switchToSubmitted() { transitionExecutionStatus(CL_SUBMITTED); } void switchToComplete() { transitionExecutionStatus(CL_COMPLETE); } }; TEST(EventTestMt, WhenWaitingForEventsThenDoNotReturnUntilAllStatusesSetToComplete) { for (uint32_t i = 0; i < 100; i++) { std::unique_ptr userEvent = std::unique_ptr(new UserEvent(nullptr)); std::thread t([&]() { userEvent->setStatus(CL_COMPLETE); }); t.join(); cl_event clEvent = userEvent.get(); Event::waitForEvents(1, &clEvent); } } compute-runtime-20.13.16352/opencl/test/unit_test/event/event_tracker_tests.cpp000066400000000000000000000634771363734646600275560ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/file_io.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/event/event.h" #include "opencl/source/event/event_tracker.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "event_fixture.h" #include #include struct ClonedStream : std::stringstream { ClonedStream(std::string &clonedOutput) : clonedOutput(clonedOutput) { } ~ClonedStream() override { clonedOutput = this->str(); } std::string &clonedOutput; }; class EventsTrackerMock : public EventsTracker { public: std::unique_ptr createDumpStream(const std::string &filename) override { return std::make_unique(streamMock); } void overrideGlobal() { originGlobal.swap(EventsTracker::globalEvTracker); EventsTracker::globalEvTracker = std::unique_ptr{new EventsTrackerMock()}; } void restoreGlobal() { EventsTrackerMock::shutdownGlobalEvTracker(); EventsTracker::globalEvTracker.swap(originGlobal); } static void shutdownGlobalEvTracker() { EventsTracker::globalEvTracker.reset(); } IFList *getList() { return &trackedEvents; } std::string streamMock; std::unique_ptr originGlobal; }; TEST(EventsTracker, whenCallingGetEventsTrackerThenGetGlobalEventsTrackerInstance) { auto &evTracker1 = EventsTracker::getEventsTracker(); auto &evTracker2 = EventsTracker::getEventsTracker(); EXPECT_EQ(&evTracker1, &evTracker2); EventsTrackerMock::shutdownGlobalEvTracker(); } TEST(EventsTracker, whenCallLabelFunctionThenGetStringWithProperEventId) { UserEvent uEvent; std::unordered_map map; map[&uEvent] = 0; EXPECT_STREQ("e0", EventsTracker::label(&uEvent, map).c_str()); } TEST(EventsTracker, whenCallLabelFunctionWhenEventIsNotInMapThenGetStringWithoutId) { UserEvent uEvent; std::unordered_map map; EXPECT_STREQ("e", EventsTracker::label(&uEvent, map).c_str()); } TEST(EventsTracker, whenCallLabelFunctionThenGetStringWithProperCmdqId) { MockCommandQueue cmdq; std::string expect = "cq" + std::to_string(reinterpret_cast(&cmdq)); EXPECT_STREQ(expect.c_str(), EventsTracker::label(&cmdq).c_str()); } TEST(EventsTracker, givenNullptrCmdqThenNotDumping) { MockCommandQueue *cmdq_ptr = nullptr; std::stringstream stream; std::set dumped; EventsTracker::dumpQueue(cmdq_ptr, stream, dumped); EXPECT_STREQ("", stream.str().c_str()); } TEST(EventsTracker, givenAlreadyDumpedCmdqThenNotDumping) { MockCommandQueue cmdq; std::stringstream stream; std::set dumped; dumped.insert(&cmdq); EventsTracker::dumpQueue(&cmdq, stream, dumped); EXPECT_STREQ("", stream.str().c_str()); } TEST(EventsTracker, givenCmqdWithTaskCountAndLevelNotReadyThenDumpingCmdqWithNotReadyLabels) { MockCommandQueue cmdq; cmdq.taskCount = CompletionStamp::levelNotReady; cmdq.taskLevel = CompletionStamp::levelNotReady; std::stringstream stream; std::set dumped; EventsTracker::dumpQueue(&cmdq, stream, dumped); std::stringstream expected; expected << EventsTracker::label(&cmdq) << "[label=\"{------CmdQueue, ptr=" << &cmdq << "------|task count=NOT_READY, level=NOT_READY}\",color=blue];\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, whenCallDumpQueueThenDumpingCmdqWithProperCountTaskAndLevelValues) { MockCommandQueue cmdq; cmdq.taskCount = 3; cmdq.taskLevel = 1; std::stringstream stream; std::set dumped; EventsTracker::dumpQueue(&cmdq, stream, dumped); std::stringstream expected; expected << EventsTracker::label(&cmdq) << "[label=\"{------CmdQueue, ptr=" << &cmdq << "------|task count=3, level=1}\",color=blue];\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, whenCallDumpEdgeThenGetStringWithProperLabelOfDumpedEdge) { UserEvent uEvent1; UserEvent uEvent2; std::stringstream stream; std::unordered_map map; map[&uEvent1] = 0; map[&uEvent2] = 1; EventsTracker::dumpEdge(&uEvent1, &uEvent2, stream, map); EXPECT_STREQ("e0->e1;\n", stream.str().c_str()); } TEST(EventsTracker, givenEventWithTaskLevelAndCountNotReadyThenDumpingNodeWithNotReadyLabels) { UserEvent uEvent; uEvent.taskLevel = CompletionStamp::levelNotReady; uEvent.updateTaskCount(CompletionStamp::levelNotReady); std::stringstream stream; std::unordered_map map; map[&uEvent] = 0; EventsTracker::dumpNode(&uEvent, stream, map); std::stringstream expected; expected << "e0[label=\"{------USER_EVENT ptr=" << &uEvent << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, whenCallDumpNodeFunctionThenDumpingNodeWithProperTaskLevelAndCountValues) { UserEvent uEvent; uEvent.taskLevel = 1; uEvent.updateTaskCount(1); std::stringstream stream; std::unordered_map map; map[&uEvent] = 0; EventsTracker::dumpNode(&uEvent, stream, map); std::stringstream expected; expected << "e0[label=\"{------USER_EVENT ptr=" << &uEvent << "------||CL_QUEUED|task count=1, level=1|CALLBACKS=FALSE}\",color=red];\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, givenNullptrEventThenNotDumpingNode) { UserEvent *uEvent = nullptr; std::stringstream stream; std::unordered_map map; map[uEvent] = 0; EventsTracker::dumpNode(uEvent, stream, map); EXPECT_STREQ("eNULL[label=\"{ptr=nullptr}\",color=red];\n", stream.str().c_str()); } TEST(EventsTracker, givenEventAndUserEventThenDumpingNodeWithProperLabels) { UserEvent uEvent; Event event(nullptr, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::levelNotReady, CompletionStamp::levelNotReady); std::stringstream stream; std::unordered_map map; map[&uEvent] = 0; map[&event] = 1; EventsTracker::dumpNode(&uEvent, stream, map); std::stringstream expecteduEvent; expecteduEvent << "e0[label=\"{------USER_EVENT ptr=" << &uEvent << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n"; EXPECT_STREQ(expecteduEvent.str().c_str(), stream.str().c_str()); stream.str(std::string()); EventsTracker::dumpNode(&event, stream, map); std::stringstream expectedEvent; expectedEvent << "e1[label=\"{-----------EVENT ptr=" << &event << "------|CL_COMMAND_NDRANGE_KERNEL|CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n"; EXPECT_STREQ(expectedEvent.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, givenCmdqAndItsVirtualEventThenDumpingWithProperLabels) { MockContext ctx; MockCommandQueue cmdq; VirtualEvent vEvent(&cmdq, &ctx); vEvent.setCurrentCmdQVirtualEvent(true); vEvent.updateTaskCount(1); std::stringstream stream; std::unordered_map map; map[&vEvent] = 0; EventsTracker::dumpNode(&vEvent, stream, map); std::stringstream expected; expected << "e0[label=\"{---------V_EVENT ptr=" << &vEvent << "------|CMD_UNKNOWN:" << (cl_command_type)-1 << "|CL_QUEUED|task count=1, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n" << EventsTracker::label(&cmdq) << "->e0[label=\"VIRTUAL_EVENT\"];\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, givenEventWithCallbackThenDumpingWithProperLabel) { Event::Callback::ClbFuncT func = [](cl_event ev, cl_int i, void *data) {}; UserEvent uEvent; uEvent.addCallback(func, 0, nullptr); std::stringstream stream; std::unordered_map map; map[&uEvent] = 0; EventsTracker::dumpNode(&uEvent, stream, map); std::stringstream expected; expected << "e0[label=\"{------USER_EVENT ptr=" << &uEvent << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=TRUE}\",color=red];\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, givenSubmittedEventThenDumpingWithProperLabel) { Event event(nullptr, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::levelNotReady, CompletionStamp::levelNotReady); std::stringstream stream; std::unordered_map map; map[&event] = 0; std::stringstream expected; event.setStatus(CL_SUBMITTED); EventsTracker::dumpNode(&event, stream, map); expected << "e0[label=\"{-----------EVENT ptr=" << &event << "------|CL_COMMAND_NDRANGE_KERNEL|CL_SUBMITTED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=yellow];\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, givenSubmittedUserEventThenDumpingWithProperLabel) { UserEvent uEvent; std::stringstream stream; std::unordered_map map; map[&uEvent] = 0; std::stringstream expected; uEvent.setStatus(CL_SUBMITTED); EventsTracker::dumpNode(&uEvent, stream, map); expected << "e0[label=\"{------USER_EVENT ptr=" << &uEvent << "------||CL_SUBMITTED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, givenUserEventWithUnproperStatusThenDumpingWithProperLabel) { UserEvent uEvent; std::stringstream stream; std::unordered_map map; map[&uEvent] = 0; std::stringstream expected; uEvent.setStatus(-1); EventsTracker::dumpNode(&uEvent, stream, map); expected << "e0[label=\"{------USER_EVENT ptr=" << &uEvent << "------||ABORTED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=green];\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, givenRunningEventThenDumpingWithProperLabel) { UserEvent uEvent; std::stringstream stream; std::unordered_map map; map[&uEvent] = 0; std::stringstream expected; uEvent.setStatus(CL_RUNNING); EventsTracker::dumpNode(&uEvent, stream, map); expected << "e0[label=\"{------USER_EVENT ptr=" << &uEvent << "------||CL_RUNNING|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, givenQueuedEventThenDumpingWithProperLabel) { UserEvent uEvent; std::stringstream stream; std::unordered_map map; map[&uEvent] = 0; std::stringstream expected; uEvent.setStatus(CL_QUEUED); EventsTracker::dumpNode(&uEvent, stream, map); expected << "e0[label=\"{------USER_EVENT ptr=" << &uEvent << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, givenCompleteEventThenDumpingWithProperLabel) { UserEvent uEvent; std::stringstream stream; std::unordered_map map; map[&uEvent] = 0; std::stringstream expected; uEvent.setStatus(CL_COMPLETE); EventsTracker::dumpNode(&uEvent, stream, map); expected << "e0[label=\"{------USER_EVENT ptr=" << &uEvent << "------||CL_COMPLETE|task count=NOT_READY, level=0|CALLBACKS=FALSE}\",color=green];\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, givenNullptrEventThenNotDumpingGraph) { Event *ev = nullptr; std::stringstream stream; std::unordered_map map; map[ev] = 0; std::set dumpedCmdQs; std::set dumpedEvents; EventsTracker::dumpGraph(ev, stream, dumpedCmdQs, dumpedEvents, map); EXPECT_STREQ("", stream.str().c_str()); } TEST(EventsTracker, givenAlreadyDumpedEventThenNotDumpingGraph) { UserEvent uEvent; std::stringstream stream; std::unordered_map map; map[&uEvent] = 0; std::set dumpedCmdQs; std::set dumpedEvents; dumpedEvents.insert(&uEvent); EventsTracker::dumpGraph(&uEvent, stream, dumpedCmdQs, dumpedEvents, map); EXPECT_STREQ("", stream.str().c_str()); } TEST(EventsTracker, givenCmdqAndItsVirtualEventThenDumpingProperGraph) { MockContext ctx; MockCommandQueue cmdq; VirtualEvent vEvent(&cmdq, &ctx); vEvent.setCurrentCmdQVirtualEvent(true); vEvent.updateTaskCount(1); std::stringstream stream; std::unordered_map map; map[&vEvent] = 0; std::set dumpedCmdQs; std::set dumpedEvents; EventsTracker::dumpGraph(&vEvent, stream, dumpedCmdQs, dumpedEvents, map); std::stringstream expected; expected << EventsTracker::label(&cmdq) << "[label=\"{------CmdQueue, ptr=" << &cmdq << "------|task count=0, level=0}\",color=blue];\ne0[label=\"{---------V_EVENT ptr=" << &vEvent << "------|CMD_UNKNOWN:4294967295|CL_QUEUED|task count=1, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n" << EventsTracker::label(&cmdq) << "->e0[label=\"VIRTUAL_EVENT\"];\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); } TEST(EventsTracker, givenTwoEventsWithCommonParentEventThenDumpingProperGraph) { UserEvent uEvent, uEventChild1, uEventChild2; uEvent.addChild(uEventChild1); uEvent.addChild(uEventChild2); std::stringstream stream; std::unordered_map map; map[&uEvent] = 0; map[&uEventChild1] = 1; map[&uEventChild2] = 2; std::set dumpedCmdQs; std::set dumpedEvents; EventsTracker::dumpGraph(&uEvent, stream, dumpedCmdQs, dumpedEvents, map); std::stringstream expected; expected << "e0[label=\"{------USER_EVENT ptr=" << &uEvent << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne2[label=\"{------USER_EVENT ptr=" << &uEventChild2 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne0->e2;\ne1[label=\"{------USER_EVENT ptr=" << &uEventChild1 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne0->e1;\n"; EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); uEventChild1.updateCompletionStamp(0, 0, 0); uEventChild2.updateCompletionStamp(0, 0, 0); uEvent.updateCompletionStamp(0, 0, 0); uEvent.setStatus(0); } TEST(EventsTracker, whenCalingCreateDumpStreamThenGettingValidFstreamInstance) { std::string testFileName("test_files\\EventsTracker_testfile.gv"); std::shared_ptr stream = EventsTracker::getEventsTracker().createDumpStream(testFileName); EXPECT_TRUE(stream->good()); static_cast(stream.get())->close(); remove(testFileName.c_str()); EventsTrackerMock::shutdownGlobalEvTracker(); } TEST(EventsTracker, whenDeletingEventTwoTimesThenDeletingIsProper) { UserEvent uEvent1; EventsTrackerMock evTrackerMock; std::stringstream expected; evTrackerMock.getList()->pushFrontOne(*new TrackedEvent{&uEvent1, 1}); evTrackerMock.getList()->pushFrontOne(*new TrackedEvent{&uEvent1, -2}); evTrackerMock.getList()->pushFrontOne(*new TrackedEvent{&uEvent1, -3}); evTrackerMock.dump(); expected << "digraph events_registry_" << &evTrackerMock << " {\nnode [shape=record]\n//pragma: somePragmaData\n\n}\n"; EXPECT_STREQ(expected.str().c_str(), evTrackerMock.streamMock.c_str()); } TEST(EventsTracker, givenTwoEventsWithSamePtrWhenFirstOneIsDeletedThenDumpingFirstProperly) { UserEvent uEvent; EventsTrackerMock evTrackerMock; std::stringstream expected; evTrackerMock.getList()->pushFrontOne(*new TrackedEvent{&uEvent, 2}); evTrackerMock.getList()->pushFrontOne(*new TrackedEvent{&uEvent, -1}); evTrackerMock.dump(); expected << "digraph events_registry_" << &evTrackerMock << " {\nnode [shape=record]\n//pragma: somePragmaData\ne2[label=\"{------USER_EVENT ptr=" << &uEvent << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n\n}\n"; EXPECT_STREQ(expected.str().c_str(), evTrackerMock.streamMock.c_str()); } TEST(EventsTracker, whenNotifyCreationOfEventThenEventIsDumped) { Event event(nullptr, CL_COMMAND_USER, CompletionStamp::levelNotReady, CompletionStamp::levelNotReady); EventsTrackerMock evTrackerMock; std::stringstream expected; evTrackerMock.notifyCreation(&event); expected << "digraph events_registry_" << &evTrackerMock << " {\nnode [shape=record]\n//pragma: somePragmaData\n\n}\n"; EXPECT_STREQ(expected.str().c_str(), evTrackerMock.streamMock.c_str()); } TEST(EventsTracker, whenNotifyTransitionedExecutionStatusOfEventThenEventIsDumpedWithProperDescription) { UserEvent uEvent; EventsTrackerMock evTrackerMock; evTrackerMock.notifyCreation(&uEvent); evTrackerMock.notifyTransitionedExecutionStatus(); std::stringstream expected; expected << "digraph events_registry_" << &evTrackerMock << " {\nnode [shape=record]\n//pragma: somePragmaData\ne0[label=\"{------USER_EVENT ptr=" << &uEvent << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n\n}\n"; EXPECT_STREQ(expected.str().c_str(), evTrackerMock.streamMock.c_str()); } TEST(EventsTracker, whenNotifyDestructionOfEventThenEventIsDumped) { UserEvent *uEvent = new UserEvent(); EventsTrackerMock evTrackerMock; evTrackerMock.notifyCreation(uEvent); evTrackerMock.notifyDestruction(uEvent); delete uEvent; std::stringstream stream; stream << "digraph events_registry_" << &evTrackerMock << " {\nnode [shape=record]\n//pragma: somePragmaData\n\n}\n"; EXPECT_STREQ(stream.str().c_str(), evTrackerMock.streamMock.c_str()); } TEST(EventsTracker, givenSeveralEventsWhenOneIsCompleteThenDumpingWithProperLabels) { UserEvent *uEvent1 = new UserEvent(); UserEvent *uEvent2 = new UserEvent(); UserEvent *uEvent3 = new UserEvent(); EventsTrackerMock evTrackerMock; evTrackerMock.notifyCreation(uEvent1); evTrackerMock.notifyCreation(uEvent2); evTrackerMock.notifyCreation(uEvent3); uEvent2->setStatus(CL_COMPLETE); evTrackerMock.notifyTransitionedExecutionStatus(); evTrackerMock.notifyDestruction(uEvent2); delete uEvent2; std::stringstream stream; stream << "digraph events_registry_" << &evTrackerMock << " {\nnode [shape=record]\n//pragma: somePragmaData\ne2[label=\"{------USER_EVENT ptr=" << uEvent3 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne0[label=\"{------USER_EVENT ptr=" << uEvent1 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n\n}\n"; EXPECT_STREQ(stream.str().c_str(), evTrackerMock.streamMock.c_str()); delete uEvent1; delete uEvent3; } TEST(EventsTracker, givenEventsWithDependenciesBetweenThemThenDumpingProperGraph) { EventsTrackerMock evTrackerMock; UserEvent uEvent1; evTrackerMock.notifyCreation(&uEvent1); evTrackerMock.dump(); std::stringstream expected; expected << "digraph events_registry_" << &evTrackerMock << " {\nnode [shape=record]\n//pragma: somePragmaData\ne0[label=\"{------USER_EVENT ptr=" << &uEvent1 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n\n}\n"; EXPECT_STREQ(expected.str().c_str(), evTrackerMock.streamMock.c_str()); UserEvent uEvent2; evTrackerMock.notifyCreation(&uEvent2); evTrackerMock.dump(); expected.str(std::string()); expected << "digraph events_registry_" << &evTrackerMock << " {\nnode [shape=record]\n//pragma: somePragmaData\ne1[label=\"{------USER_EVENT ptr=" << &uEvent2 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne0[label=\"{------USER_EVENT ptr=" << &uEvent1 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n\n}\n"; EXPECT_STREQ(expected.str().c_str(), evTrackerMock.streamMock.c_str()); UserEvent uEventChild1; evTrackerMock.notifyCreation(&uEventChild1); uEvent1.addChild(uEventChild1); evTrackerMock.dump(); expected.str(std::string()); expected << "digraph events_registry_" << &evTrackerMock << " {\nnode [shape=record]\n//pragma: somePragmaData\ne1[label=\"{------USER_EVENT ptr=" << &uEvent2 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne0[label=\"{------USER_EVENT ptr=" << &uEvent1 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne2[label=\"{------USER_EVENT ptr=" << &uEventChild1 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne0->e2;\n\n}\n"; EXPECT_STREQ(expected.str().c_str(), evTrackerMock.streamMock.c_str()); UserEvent uEventChild2; evTrackerMock.notifyCreation(&uEventChild2); uEvent1.addChild(uEventChild2); evTrackerMock.dump(); expected.str(std::string()); expected << "digraph events_registry_" << &evTrackerMock << " {\nnode [shape=record]\n//pragma: somePragmaData\ne1[label=\"{------USER_EVENT ptr=" << &uEvent2 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne0[label=\"{------USER_EVENT ptr=" << &uEvent1 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne3[label=\"{------USER_EVENT ptr=" << &uEventChild2 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne0->e3;\ne2[label=\"{------USER_EVENT ptr=" << &uEventChild1 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne0->e2;\n\n}\n"; EXPECT_STREQ(expected.str().c_str(), evTrackerMock.streamMock.c_str()); uEvent2.addChild(uEvent1); evTrackerMock.dump(); expected.str(std::string()); expected << "digraph events_registry_" << &evTrackerMock << " {\nnode [shape=record]\n//pragma: somePragmaData\ne1[label=\"{------USER_EVENT ptr=" << &uEvent2 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne0[label=\"{------USER_EVENT ptr=" << &uEvent1 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne3[label=\"{------USER_EVENT ptr=" << &uEventChild2 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne0->e3;\ne2[label=\"{------USER_EVENT ptr=" << &uEventChild1 << "------||CL_QUEUED|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\ne0->e2;\ne1->e0;\n\n}\n"; EXPECT_STREQ(expected.str().c_str(), evTrackerMock.streamMock.c_str()); uEventChild1.updateCompletionStamp(0, 0, 0); uEventChild2.updateCompletionStamp(0, 0, 0); uEvent2.updateCompletionStamp(0, 0, 0); uEvent1.updateCompletionStamp(0, 0, 0); uEvent2.setStatus(0); uEvent1.setStatus(0); } TEST(EventsTracker, whenEventsDebugEnableFlagIsTrueAndCreateOrChangeStatusOrDestroyEventThenDumpingGraph) { DebugManagerStateRestore dbRestore; DebugManager.flags.EventsTrackerEnable.set(true); EventsTrackerMock evTrackerMock; evTrackerMock.overrideGlobal(); Event *ev = new Event(nullptr, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::levelNotReady, CompletionStamp::levelNotReady); std::stringstream expected; expected << "digraph events_registry_" << &EventsTracker::getEventsTracker() << " {\nnode [shape=record]\n//pragma: somePragmaData\n\n}\n"; EXPECT_STREQ(expected.str().c_str(), static_cast(&EventsTracker::getEventsTracker())->streamMock.c_str()); ev->setStatus(1); expected.str(std::string()); expected << "digraph events_registry_" << &EventsTracker::getEventsTracker() << " {\nnode [shape=record]\n//pragma: somePragmaData\ne0[label=\"{-----------EVENT ptr=" << ev << "------|CL_COMMAND_NDRANGE_KERNEL|CL_RUNNING|task count=NOT_READY, level=NOT_READY|CALLBACKS=FALSE}\",color=red];\n\n}\n"; EXPECT_STREQ(expected.str().c_str(), static_cast(&EventsTracker::getEventsTracker())->streamMock.c_str()); delete ev; expected.str(std::string()); expected << "digraph events_registry_" << &EventsTracker::getEventsTracker() << " {\nnode [shape=record]\n//pragma: somePragmaData\n\n}\n"; EXPECT_STREQ(expected.str().c_str(), static_cast(&EventsTracker::getEventsTracker())->streamMock.c_str()); evTrackerMock.restoreGlobal(); } TEST(EventsTracker, givenEventsFromDifferentThreadsThenDumpingProperly) { class EventsTrackerMockMT : public EventsTrackerMock { public: TrackedEvent *getNodes() override { auto TrackedEventsMock = std::shared_ptr>{new IFList}; return TrackedEventsMock->detachNodes(); } std::shared_ptr> *TrackedEventsMock; }; auto evTrackerMockMT = std::shared_ptr{new EventsTrackerMockMT()}; UserEvent uEvent1; UserEvent uEvent2; evTrackerMockMT->getList()->pushFrontOne(*new TrackedEvent{&uEvent1, 2}); evTrackerMockMT->getList()->pushFrontOne(*new TrackedEvent{&uEvent2, 3}); evTrackerMockMT->dump(); std::stringstream expected; expected << "digraph events_registry_" << evTrackerMockMT << " {\nnode [shape=record]\n//pragma: somePragmaData\n\n}\n"; EXPECT_STREQ(expected.str().c_str(), evTrackerMockMT->streamMock.c_str()); } compute-runtime-20.13.16352/opencl/test/unit_test/event/user_events_tests.cpp000066400000000000000000001120541363734646600272460ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "event_fixture.h" using namespace NEO; TEST(UserEvent, GivenUserEventWhenGettingEventCommandTypeThenClCommandUserIsReturned) { UserEvent uEvent; size_t retSize; cl_int retValue; retValue = clGetEventInfo(&uEvent, CL_EVENT_COMMAND_QUEUE, 0, nullptr, &retSize); ASSERT_EQ(CL_SUCCESS, retValue); EXPECT_EQ(sizeof(cl_command_queue), retSize); auto cmdQueue = reinterpret_cast(static_cast(0xdeadbeaf)); retValue = clGetEventInfo(&uEvent, CL_EVENT_COMMAND_QUEUE, retSize, &cmdQueue, 0); ASSERT_EQ(CL_SUCCESS, retValue); EXPECT_EQ(nullptr, cmdQueue); retValue = clGetEventInfo(&uEvent, CL_EVENT_COMMAND_TYPE, 0, nullptr, &retSize); ASSERT_EQ(CL_SUCCESS, retValue); EXPECT_EQ(sizeof(cl_event_info), retSize); auto cmdType = CL_COMMAND_SVM_UNMAP; clGetEventInfo(&uEvent, CL_EVENT_COMMAND_TYPE, retSize, &cmdType, 0); EXPECT_EQ(CL_COMMAND_USER, cmdType); } TEST(UserEvent, WhenGettingEventContextThenCorrectContextIsReturned) { MockContext mc; cl_context dummyContext = &mc; UserEvent uEvent(&mc); size_t retSize; cl_int retValue; retValue = clGetEventInfo(&uEvent, CL_EVENT_CONTEXT, 0, nullptr, &retSize); ASSERT_EQ(CL_SUCCESS, retValue); EXPECT_EQ(sizeof(cl_context), retSize); cl_context context; retValue = clGetEventInfo(&uEvent, CL_EVENT_CONTEXT, retSize, &context, 0); ASSERT_EQ(CL_SUCCESS, retValue); ASSERT_EQ(context, dummyContext); } TEST(UserEvent, GivenInitialStatusOfUserEventWhenGettingEventContextThenNullIsReturned) { UserEvent uEvent; cl_context context; auto retValue = clGetEventInfo(&uEvent, CL_EVENT_CONTEXT, sizeof(cl_context), &context, 0); ASSERT_EQ(CL_SUCCESS, retValue); ASSERT_EQ(context, nullptr); } TEST(UserEvent, GivenInitialStatusOfUserEventWhenGettingCommandExecutionStatusThenClSubmittedIsReturned) { UserEvent uEvent; size_t retSize; cl_int retValue; retValue = clGetEventInfo(&uEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, 0, nullptr, &retSize); ASSERT_EQ(CL_SUCCESS, retValue); EXPECT_EQ(sizeof(cl_int), retSize); auto cmdStatus = CL_COMPLETE; retValue = clGetEventInfo(&uEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, retSize, &cmdStatus, 0); ASSERT_EQ(CL_SUCCESS, retValue); EXPECT_EQ(CL_SUBMITTED, cmdStatus); } TEST(UserEvent, givenUserEventWhenItIsQueriedForExecutionStatusThenClQueueIsReturned) { UserEvent uEvent; EXPECT_EQ(CL_QUEUED, uEvent.peekExecutionStatus()); } TEST(UserEvent, givenUserEventWhenItIsCreatedThenItIsInInitialState) { UserEvent uEvent; EXPECT_TRUE(uEvent.isInitialEventStatus()); } TEST(UserEvent, givenUserEventWhenItIsCreatedAndSetThenItIsNotInInitialState) { UserEvent uEvent; uEvent.setStatus(CL_COMPLETE); EXPECT_FALSE(uEvent.isInitialEventStatus()); } TEST(UserEvent, GivenUserEventWhenGettingEventReferenceCountThenOneIsReturned) { UserEvent uEvent; size_t retSize; cl_int retValue; retValue = clGetEventInfo(&uEvent, CL_EVENT_REFERENCE_COUNT, 0, nullptr, &retSize); ASSERT_EQ(CL_SUCCESS, retValue); EXPECT_EQ(sizeof(cl_uint), retSize); auto refCount = 100; retValue = clGetEventInfo(&uEvent, CL_EVENT_REFERENCE_COUNT, retSize, &refCount, 0); ASSERT_EQ(CL_SUCCESS, retValue); EXPECT_EQ(1, refCount); } TEST(UserEvent, GivenSetCompleteStatusWhenGettingEventCommandExecutionStatusThenClCompleteIsReturned) { UserEvent uEvent; uEvent.setStatus(CL_COMPLETE); size_t retSize; cl_int retValue; retValue = clGetEventInfo(&uEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, 0, nullptr, &retSize); ASSERT_EQ(CL_SUCCESS, retValue); EXPECT_EQ(sizeof(cl_int), retSize); auto cmdStatus = CL_COMPLETE; retValue = clGetEventInfo(&uEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, retSize, &cmdStatus, 0); ASSERT_EQ(CL_SUCCESS, retValue); EXPECT_EQ(CL_COMPLETE, cmdStatus); } TEST(UserEvent, GivenInitialUserEventWhenGettingCommandsThenNullIsReturned) { UserEvent uEvent; EXPECT_EQ(nullptr, uEvent.peekCommand()); } TEST(UserEvent, GivenInitialUserEventStateWhenCheckingReadyForSubmissionThenFalseIsReturned) { UserEvent uEvent; EXPECT_FALSE(uEvent.isReadyForSubmission()); } TEST(UserEvent, GivenUserEventWhenGettingTaskLevelThenZeroIsReturned) { MyUserEvent uEvent; EXPECT_EQ(0U, uEvent.getTaskLevel()); EXPECT_FALSE(uEvent.wait(false, false)); } TEST(UserEvent, WhenSettingStatusThenReadyForSubmissionisTrue) { UserEvent uEvent; uEvent.setStatus(0); EXPECT_TRUE(uEvent.isReadyForSubmission()); } TEST(UserEvent, givenUserEventWhenStatusIsCompletedThenReturnZeroTaskLevel) { UserEvent uEvent; uEvent.setStatus(CL_QUEUED); EXPECT_EQ(CompletionStamp::levelNotReady, uEvent.getTaskLevel()); uEvent.setStatus(CL_SUBMITTED); EXPECT_EQ(CompletionStamp::levelNotReady, uEvent.getTaskLevel()); uEvent.setStatus(CL_RUNNING); EXPECT_EQ(CompletionStamp::levelNotReady, uEvent.getTaskLevel()); uEvent.setStatus(CL_COMPLETE); EXPECT_EQ(0u, uEvent.getTaskLevel()); } typedef HelloWorldTest EventTests; TEST_F(MockEventTests, GivenBlockedUserEventWhenEnqueueingNdRangeWithoutReturnEventThenDoNotSubmitToCsr) { uEvent = make_releaseable(); cl_event userEvent = uEvent.get(); cl_event *eventWaitList = &userEvent; auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto taskCount = csr.peekTaskCount(); //call NDR auto retVal = callOneWorkItemNDRKernel(eventWaitList, 1); auto taskCountAfter = csr.peekTaskCount(); //queue should be in blocked state at this moment, task level should be inherited from user event EXPECT_EQ(CompletionStamp::levelNotReady, pCmdQ->taskLevel); //queue should be in blocked state at this moment, task count should be inherited from user event EXPECT_EQ(CompletionStamp::levelNotReady, pCmdQ->taskCount); //queue should be in blocked state EXPECT_EQ(pCmdQ->isQueueBlocked(), true); //and virtual event should be created ASSERT_NE(nullptr, pCmdQ->virtualEvent); //check if kernel was in fact not submitted EXPECT_EQ(taskCountAfter, taskCount); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(MockEventTests, GivenBlockedUserEventWhenEnqueueingNdRangeWithReturnEventThenDoNotSubmitToCsr) { uEvent = make_releaseable(); cl_event userEvent = uEvent.get(); cl_event retEvent = nullptr; cl_event *eventWaitList = &userEvent; auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto taskCount = csr.peekTaskCount(); //call NDR auto retVal = callOneWorkItemNDRKernel(eventWaitList, 1, &retEvent); auto taskCountAfter = csr.peekTaskCount(); //queue should be in blocked state at this moment, task level should be inherited from user event EXPECT_EQ(CompletionStamp::levelNotReady, pCmdQ->taskLevel); //queue should be in blocked state at this moment, task count should be inherited from user event EXPECT_EQ(CompletionStamp::levelNotReady, pCmdQ->taskCount); //queue should be in blocked state EXPECT_EQ(pCmdQ->isQueueBlocked(), true); //and virtual event should be created ASSERT_NE(nullptr, pCmdQ->virtualEvent); //that matches the retEvent EXPECT_EQ(retEvent, pCmdQ->virtualEvent); //check if kernel was in fact not submitted EXPECT_EQ(taskCountAfter, taskCount); //and if normal event inherited status from user event Event *returnEvent = castToObject(retEvent); EXPECT_EQ(returnEvent->taskLevel, CompletionStamp::levelNotReady); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(MockEventTests, WhenAddingChildEventThenConnectionIsCreatedAndCountOnReturnEventIsInjected) { uEvent = make_releaseable(); cl_event userEvent = uEvent.get(); cl_event retEvent = nullptr; cl_event *eventWaitList = &userEvent; //call NDR callOneWorkItemNDRKernel(eventWaitList, 1, &retEvent); //check if dependency count is increased Event *returnEvent = castToObject(retEvent); EXPECT_EQ(1U, returnEvent->peekNumEventsBlockingThis()); //check if user event knows his childs EXPECT_TRUE(uEvent->peekHasChildEvents()); //make sure that proper event is set as child Event *childEvent = pCmdQ->virtualEvent; EXPECT_EQ(childEvent, uEvent->peekChildEvents()->ref); auto retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EventTests, givenNormalEventThatHasParentUserEventWhenUserEventIsUnblockedThenChildEventIsCompleteIfGpuCompletedProcessing) { UserEvent uEvent; Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); uEvent.addChild(event); EXPECT_FALSE(event.updateStatusAndCheckCompletion()); EXPECT_EQ(CL_QUEUED, event.peekExecutionStatus()); uEvent.setStatus(CL_COMPLETE); EXPECT_EQ(CL_COMPLETE, event.peekExecutionStatus()); } TEST_F(MockEventTests, WhenAddingTwoChildEventsThenConnectionIsCreatedAndCountOnReturnEventIsInjected) { uEvent = make_releaseable(); auto uEvent2 = make_releaseable(); cl_event retEvent = nullptr; cl_event eventWaitList[] = {uEvent.get(), uEvent2.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); //call NDR callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, &retEvent); //check if dependency count is increased Event *returnEvent = castToObject(retEvent); ASSERT_EQ(2U, returnEvent->peekNumEventsBlockingThis()); //check if user event knows his childs EXPECT_TRUE(uEvent->peekHasChildEvents()); //check if user event knows his childs EXPECT_TRUE(uEvent2->peekHasChildEvents()); //make sure that proper event is set as child Event *childEvent = pCmdQ->virtualEvent; EXPECT_EQ(childEvent, uEvent->peekChildEvents()->ref); EXPECT_FALSE(childEvent->isReadyForSubmission()); //make sure that proper event is set as child EXPECT_EQ(childEvent, uEvent2->peekChildEvents()->ref); //signal one user event, child event after this operation isn't ready for submission uEvent->setStatus(0); //check if user event knows his children EXPECT_FALSE(uEvent->peekHasChildEvents()); EXPECT_EQ(1U, returnEvent->peekNumEventsBlockingThis()); EXPECT_FALSE(returnEvent->isReadyForSubmission()); auto retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); uEvent2->setStatus(-1); } TEST_F(MockEventTests, GivenTwoUserEvenstWhenCountOnNdr1IsInjectedThenItIsPropagatedToNdr2viaVirtualEvent) { uEvent = make_releaseable(context); auto uEvent2 = make_releaseable(context); cl_event eventWaitList[] = {uEvent.get(), uEvent2.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); //call NDR, no return Event auto retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); //check if dependency count is increased Event *returnEvent1 = castToObject(pCmdQ->virtualEvent); ASSERT_EQ(2U, returnEvent1->peekNumEventsBlockingThis()); //check if user event knows his childs EXPECT_TRUE(uEvent->peekHasChildEvents()); //check if user event knows his childs EXPECT_TRUE(uEvent2->peekHasChildEvents()); //make sure that proper event is set as child Event *childEvent = pCmdQ->virtualEvent; EXPECT_EQ(childEvent, uEvent->peekChildEvents()->ref); //make sure that proper event is set as child EXPECT_EQ(childEvent, uEvent2->peekChildEvents()->ref); //call NDR, no events, Virtual Event mustn't leak and will be bind to previous Virtual Event retVal = callOneWorkItemNDRKernel(); EXPECT_EQ(CL_SUCCESS, retVal); //queue must be in blocked state EXPECT_EQ(pCmdQ->isQueueBlocked(), true); //check if virtual event2 is a child of virtual event 1 VirtualEvent *returnEvent2 = castToObject(pCmdQ->virtualEvent); ASSERT_TRUE(returnEvent1->peekHasChildEvents()); EXPECT_EQ(returnEvent2, returnEvent1->peekChildEvents()->ref); //now signal both parents and see if all childs are notified uEvent->setStatus(CL_COMPLETE); uEvent2->setStatus(CL_COMPLETE); //queue shoud be in unblocked state EXPECT_EQ(pCmdQ->isQueueBlocked(), false); //finish returns immidieatly retVal = clFinish(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EventTests, givenQueueThatIsBlockedByUserEventWhenIsQueueBlockedIsCalledThenVirtualEventOnlyQueriesForExecutionStatus) { struct mockEvent : public Event { using Event::Event; void updateExecutionStatus() override { updateExecutionStatusCalled = true; } bool updateExecutionStatusCalled = false; }; mockEvent mockedVirtualEvent(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, CompletionStamp::levelNotReady, 0); pCmdQ->virtualEvent = &mockedVirtualEvent; EXPECT_TRUE(pCmdQ->isQueueBlocked()); EXPECT_FALSE(mockedVirtualEvent.updateExecutionStatusCalled); pCmdQ->virtualEvent = nullptr; } TEST_F(MockEventTests, GivenUserEventSignalingWhenFinishThenExecutionIsNotBlocked) { uEvent = make_releaseable(context); auto uEvent2 = make_releaseable(context); cl_event eventWaitList[] = {uEvent.get(), uEvent2.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); //call NDR, no return Event auto retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); uEvent->setStatus(0); uEvent2->setStatus(0); retVal = clFinish(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(MockEventTests, WhenCompletingUserEventThenStatusPropagatedToNormalEvent) { uEvent = make_releaseable(); cl_event retEvent = nullptr; cl_event eventWaitList[] = {uEvent.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); //call NDR callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, &retEvent); //set user event status uEvent->setStatus(CL_COMPLETE); //wait for returned event auto retVal = clWaitForEvents(1, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(EventTests, WhenSignalingThenUserEventObtainsProperTaskLevel) { UserEvent uEvent(context); auto &csr = pDevice->getUltCommandStreamReceiver(); auto taskLevel = csr.peekTaskLevel(); csr.taskCount = 3; uEvent.setStatus(CL_COMPLETE); EXPECT_EQ(taskLevel, uEvent.taskLevel); csr.taskLevel = 2; csr.taskCount = 5; uEvent.setStatus(CL_COMPLETE); //even though csr taskLevel has changed, user event taskLevel should remain constant EXPECT_EQ(0u, uEvent.taskLevel); } TEST_F(MockEventTests, GivenUserEventWhenSettingStatusCompleteThenTaskLevelIsUpdatedCorrectly) { uEvent = make_releaseable(context); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto taskLevel = csr.peekTaskLevel(); cl_event retEvent = nullptr; cl_event eventWaitList[] = {uEvent.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); //call NDR retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); //check if dependency count is increased Event *returnEvent = castToObject(retEvent); EXPECT_EQ(CompletionStamp::levelNotReady, returnEvent->taskLevel); EXPECT_EQ(CompletionStamp::levelNotReady, returnEvent->peekTaskCount()); //now set user event for complete status, this triggers update of childs. uEvent->setStatus(CL_COMPLETE); //child event should have the same taskLevel as parentEvent, as parent event is top of the tree and doesn't have any commands. EXPECT_EQ(returnEvent->taskLevel, taskLevel); EXPECT_EQ(csr.peekTaskCount(), returnEvent->peekTaskCount()); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(MockEventTests, GivenCompleteParentWhenWaitingForEventsThenChildrenAreComplete) { uEvent = make_releaseable(context); cl_event retEvent = nullptr; cl_event eventWaitList[] = {uEvent.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); //call NDR retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); //check if dependency count is increased Event *returnEvent = castToObject(retEvent); EXPECT_EQ(CompletionStamp::levelNotReady, returnEvent->taskLevel); //now set user event for complete status, this triggers update of childs. uEvent->setStatus(CL_COMPLETE); retVal = clWaitForEvents(1, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EventTests, WhenStatusIsAbortedWhenWaitingForEventsThenErrorIsReturned) { UserEvent uEvent(context); cl_event eventWaitList[] = {&uEvent}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); //negative values indicate abortion uEvent.setStatus(-1); retVal = clWaitForEvents(sizeOfWaitList, eventWaitList); EXPECT_EQ(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, retVal); } TEST_F(MockEventTests, GivenAbortedUserEventWhenEnqueingNdrThenDoNotFlushToCsr) { uEvent = make_releaseable(context); cl_event eventWaitList[] = {uEvent.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); cl_event retEvent = nullptr; auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto taskCount = csr.peekTaskCount(); //call NDR retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); //negative values indicate abortion uEvent->setStatus(-1); auto taskCountAfter = csr.peekTaskCount(); EXPECT_EQ(taskCount, taskCountAfter); Event *pChildEvent = (Event *)retEvent; EXPECT_EQ(CompletionStamp::levelNotReady, pChildEvent->getTaskLevel()); cl_int eventStatus = 0; retVal = clGetEventInfo(retEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(-1, eventStatus); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(MockEventTests, GivenAbortedParentWhenDestroyingChildEventThenDoNotProcessBlockedCommands) { uEvent = make_releaseable(context); cl_event eventWaitList[] = {uEvent.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); cl_event retEvent = nullptr; auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto taskCount = csr.peekTaskCount(); //call NDR retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); //call second NDR to create Virtual Event retVal = callOneWorkItemNDRKernel(&retEvent, 1, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); //negative values indicate abortion uEvent->setStatus(-1); auto taskCountAfter = csr.peekTaskCount(); EXPECT_EQ(taskCount, taskCountAfter); Event *pChildEvent = (Event *)retEvent; EXPECT_EQ(CompletionStamp::levelNotReady, pChildEvent->taskLevel); cl_int eventStatus = 0; retVal = clGetEventInfo(retEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, NULL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(-1, eventStatus); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); taskCountAfter = csr.peekTaskCount(); EXPECT_EQ(taskCount, taskCountAfter); } TEST_F(MockEventTests, GivenAbortedUserEventWhenWaitingForEventThenErrorIsReturned) { uEvent = make_releaseable(context); cl_event eventWaitList[] = {uEvent.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); cl_event retEvent = nullptr; //call NDR retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); //negative values indicate abortion uEvent->setStatus(-1); eventWaitList[0] = retEvent; retVal = clWaitForEvents(sizeOfWaitList, eventWaitList); EXPECT_EQ(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, retVal); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(MockEventTests, GivenAbortedUserEventAndTwoInputsWhenWaitingForEventThenErrorIsReturned) { uEvent = make_releaseable(context); auto uEvent2 = make_releaseable(context); cl_event eventWaitList[] = {uEvent.get(), uEvent2.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); cl_event retEvent = nullptr; //call NDR retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); //negative values indicate abortion uEvent->setStatus(-1); eventWaitList[0] = retEvent; retVal = clWaitForEvents(sizeOfWaitList, eventWaitList); EXPECT_EQ(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, retVal); uEvent2->setStatus(-1); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(MockEventTests, GivenAbortedQueueWhenFinishingThenSuccessIsReturned) { uEvent = make_releaseable(context); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto taskLevel = csr.peekTaskLevel(); cl_event eventWaitList[] = {uEvent.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); //call NDR retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList); EXPECT_EQ(CL_SUCCESS, retVal); //negative values indicate abortion uEvent->setStatus(-1); //make sure we didn't asked CSR for task level for this event, as it is aborted EXPECT_NE(taskLevel, uEvent->taskLevel); retVal = clFinish(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(MockEventTests, GivenUserEventWhenEnqueingThenDependantPacketIsRegistered) { uEvent = make_releaseable(context); cl_event eventWaitList[] = {uEvent.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); //call NDR retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList); //virtual event should register for this command packet ASSERT_NE(nullptr, pCmdQ->virtualEvent); EXPECT_NE(nullptr, pCmdQ->virtualEvent->peekCommand()); EXPECT_FALSE(pCmdQ->virtualEvent->peekIsCmdSubmitted()); } TEST_F(MockEventTests, GivenUserEventWhenEnqueingThenCommandPacketContainsValidCommandStream) { uEvent = make_releaseable(context); cl_event eventWaitList[] = {uEvent.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); //call NDR retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList); //virtual event should register for this command packet ASSERT_NE(nullptr, pCmdQ->virtualEvent); auto cmd = static_cast(pCmdQ->virtualEvent->peekCommand()); EXPECT_NE(0u, cmd->getCommandStream()->getUsed()); } TEST_F(MockEventTests, WhenStatusIsSetThenBlockedPacketsAreSent) { uEvent = make_releaseable(context); cl_event eventWaitList[] = {uEvent.get()}; auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); //call NDR retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList); EXPECT_EQ(CL_SUCCESS, retVal); //task level untouched as queue blocked by user event EXPECT_EQ(csr.peekTaskLevel(), 0u); //virtual event have stored command packet Event *childEvent = pCmdQ->virtualEvent; EXPECT_NE(nullptr, childEvent); EXPECT_NE(nullptr, childEvent->peekCommand()); EXPECT_FALSE(childEvent->isReadyForSubmission()); EXPECT_NE(nullptr, childEvent->peekCommand()); //signal the input user event uEvent->setStatus(0); EXPECT_EQ(csr.peekTaskLevel(), 1u); } TEST_F(MockEventTests, WhenFinishingThenVirtualEventIsNullAndReleaseEventReturnsSuccess) { uEvent = make_releaseable(context); cl_event eventWaitList[] = {uEvent.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); cl_event retEvent; //call NDR retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); uEvent->setStatus(0); //call finish multiple times retVal |= clFinish(pCmdQ); retVal |= clFinish(pCmdQ); retVal |= clFinish(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); //Virtual Event is gone, but retEvent still lives. EXPECT_EQ(nullptr, pCmdQ->virtualEvent); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(MockEventTests, givenBlockedQueueThenCommandStreamDoesNotChangeWhileEnqueueAndAfterSignaling) { uEvent = make_releaseable(context); cl_event eventWaitList[] = {uEvent.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); cl_event retEvent; auto &cs = pCmdQ->getCS(1024); auto used = cs.getSpace(0); //call NDR retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); auto used2 = cs.getSpace(0); EXPECT_EQ(used2, used); uEvent->setStatus(CL_COMPLETE); auto used3 = cs.getSpace(0); //call finish multiple times retVal |= clFinish(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(used3, used); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EventTests, givenUserEventThatHasCallbackAndBlockQueueWhenQueueIsQueriedForBlockedThenCallBackIsCalled) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); struct EV : UserEvent { EV(Context *ctx) : UserEvent(ctx) { } void updateExecutionStatus() override { updated++; } int updated = 0; }; auto event1 = MockEventBuilder::createAndFinalize(&pCmdQ->getContext()); struct E2Clb { static void CL_CALLBACK SignalEv2(cl_event e, cl_int status, void *data) { bool *called = (bool *)data; *called = true; } }; cl_event eventWaitList[] = {event1}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); cl_event retEvent; //call NDR retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, &retEvent); ASSERT_EQ(retVal, CL_SUCCESS); bool callbackCalled = false; retVal = clSetEventCallback(event1, CL_COMPLETE, E2Clb::SignalEv2, &callbackCalled); ASSERT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(1, event1->updated); EXPECT_TRUE(pCmdQ->isQueueBlocked()); event1->setStatus(CL_COMPLETE); // Must wait for event that depend on callback event to ensure callback is called. Event::waitForEvents(1, &retEvent); EXPECT_TRUE(callbackCalled); clReleaseEvent(retEvent); event1->release(); } TEST_F(EventTests, GivenEventCallbackWithWaitWhenWaitingForEventsThenSuccessIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); UserEvent event1; struct E2Clb { static void CL_CALLBACK SignalEv2(cl_event e, cl_int status, void *data) { UserEvent *event2 = static_cast(data); event2->setStatus(CL_COMPLETE); } }; cl_event retEvent; //call NDR retVal = callOneWorkItemNDRKernel(nullptr, 0, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clWaitForEvents(1, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); clSetEventCallback(retEvent, CL_COMPLETE, E2Clb::SignalEv2, &event1); cl_event events[] = {&event1}; auto result = UserEvent::waitForEvents(sizeof(events) / sizeof(events[0]), events); EXPECT_EQ(result, CL_SUCCESS); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EventTests, GivenEventCallbackWithoutWaitWhenWaitingForEventsThenSuccessIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); UserEvent event1(context); struct E2Clb { static void CL_CALLBACK SignalEv2(cl_event e, cl_int status, void *data) { UserEvent *event2 = static_cast(data); event2->setStatus(CL_COMPLETE); } }; cl_event retEvent; //call NDR retVal = callOneWorkItemNDRKernel(nullptr, 0, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); clSetEventCallback(retEvent, CL_COMPLETE, E2Clb::SignalEv2, &event1); cl_event events[] = {&event1}; auto result = UserEvent::waitForEvents(sizeof(events) / sizeof(events[0]), events); EXPECT_EQ(result, CL_SUCCESS); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(MockEventTests, GivenEnqueueReadImageWhenWaitingforEventThenSuccessIsReturned) { cl_event retEvent; uEvent = make_releaseable(context); cl_event eventWaitList[] = {uEvent.get()}; auto image = clUniquePtr(Image2dHelper<>::create(this->context)); ASSERT_NE(nullptr, image); auto retVal = EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ, image.get(), false, EnqueueReadImageTraits::origin, EnqueueReadImageTraits::region, EnqueueReadImageTraits::rowPitch, EnqueueReadImageTraits::slicePitch, EnqueueReadImageTraits::hostPtr, EnqueueReadImageTraits::mapAllocation, 1, eventWaitList, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetUserEventStatus(uEvent.get(), CL_COMPLETE); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clWaitForEvents(1, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EventTests, WhenWaitingForEventsThenTemporaryAllocationsAreDestroyed) { auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto memoryManager = pCmdQ->getDevice().getMemoryManager(); EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); GraphicsAllocation *temporaryAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr.getRootDeviceIndex(), MemoryConstants::pageSize}); csr.getInternalAllocationStorage()->storeAllocation(std::unique_ptr(temporaryAllocation), TEMPORARY_ALLOCATION); EXPECT_EQ(temporaryAllocation, csr.getTemporaryAllocations().peekHead()); temporaryAllocation->updateTaskCount(10, csr.getOsContext().getContextId()); Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, 11); cl_event eventWaitList[] = {&event}; event.waitForEvents(1, eventWaitList); EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); } TEST_F(EventTest, WhenUserEventIsCreatedThenWaitIsNonBlocking) { UserEvent event; auto result = event.wait(false, false); EXPECT_FALSE(result); } TEST_F(EventTest, GivenSingleUserEventWhenWaitingForEventsThenSuccessIsReturned) { UserEvent event1; event1.setStatus(CL_COMPLETE); cl_event events[] = {&event1}; auto result = UserEvent::waitForEvents(sizeof(events) / sizeof(events[0]), events); EXPECT_EQ(result, CL_SUCCESS); } TEST_F(EventTest, GivenMultipleOutOfOrderCallbacksWhenWaitingForEventsThenSuccessIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); UserEvent event1; struct E2Clb { static void CL_CALLBACK SignalEv2(cl_event e, cl_int status, void *data) { UserEvent *event2 = static_cast(data); event2->setStatus(CL_COMPLETE); } }; UserEvent event2; event2.addCallback(E2Clb::SignalEv2, CL_COMPLETE, &event1); event2.setStatus(CL_COMPLETE); cl_event events[] = {&event1, &event2}; auto result = UserEvent::waitForEvents(sizeof(events) / sizeof(events[0]), events); EXPECT_EQ(result, CL_SUCCESS); } TEST_F(EventTests, WhenCalbackWasRegisteredOnCallbackThenExecutionPassesCorrectExecutionStatus) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); struct HelperClb { static void CL_CALLBACK SetClbStatus(cl_event e, cl_int status, void *data) { cl_int *ret = static_cast(data); *ret = status; } }; cl_event retEvent; retVal = callOneWorkItemNDRKernel(nullptr, 0, &retEvent); ASSERT_EQ(CL_SUCCESS, retVal); cl_int submittedClbExecStatus = -1; cl_int runningClbExecStatus = -1; cl_int completeClbExecStatus = -1; retVal = clSetEventCallback(retEvent, CL_SUBMITTED, HelperClb::SetClbStatus, &submittedClbExecStatus); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetEventCallback(retEvent, CL_RUNNING, HelperClb::SetClbStatus, &runningClbExecStatus); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clSetEventCallback(retEvent, CL_COMPLETE, HelperClb::SetClbStatus, &completeClbExecStatus); ASSERT_EQ(CL_SUCCESS, retVal); auto result = UserEvent::waitForEvents(1, &retEvent); ASSERT_EQ(result, CL_SUCCESS); EXPECT_EQ(CL_SUBMITTED, submittedClbExecStatus); EXPECT_EQ(CL_RUNNING, runningClbExecStatus); EXPECT_EQ(CL_COMPLETE, completeClbExecStatus); clReleaseEvent(retEvent); } TEST_F(EventTests, GivenMultipleEventsWhenEventsAreCompletedThenCorrectNumberOfBlockingEventsIsReported) { UserEvent uEvent1(context); UserEvent uEvent2(context); UserEvent uEvent3(context); EXPECT_EQ(0U, uEvent1.peekNumEventsBlockingThis()); EXPECT_EQ(0U, uEvent2.peekNumEventsBlockingThis()); EXPECT_EQ(0U, uEvent3.peekNumEventsBlockingThis()); cl_event eventWaitList[] = {&uEvent1, &uEvent2, &uEvent3}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); cl_event retClEvent; retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, &retClEvent); Event *retEvent = (Event *)retClEvent; ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, retEvent); EXPECT_EQ(3U, retEvent->peekNumEventsBlockingThis()); retVal = clSetUserEventStatus(&uEvent1, CL_COMPLETE); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2U, retEvent->peekNumEventsBlockingThis()); retVal = clSetUserEventStatus(&uEvent2, CL_COMPLETE); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1U, retEvent->peekNumEventsBlockingThis()); retVal = clSetUserEventStatus(&uEvent3, CL_COMPLETE); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0U, retEvent->peekNumEventsBlockingThis()); retVal |= clFinish(pCmdQ); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EventTests, WhenPassingBlockedUserEventToEnqueueNdRangeThenCommandQueueIsNotRetained) { auto userEvent = clCreateUserEvent(pContext, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); auto uEvent = (UserEvent *)userEvent; ASSERT_NE(nullptr, uEvent); auto cmdQueue = uEvent->getCommandQueue(); ASSERT_EQ(nullptr, cmdQueue); auto intitialRefCount = pCmdQ->getRefInternalCount(); auto retVal = callOneWorkItemNDRKernel(&userEvent, 1); ASSERT_EQ(CL_SUCCESS, retVal); cmdQueue = uEvent->getCommandQueue(); ASSERT_EQ(nullptr, cmdQueue); // Virtual event add refference to cmq queue. EXPECT_EQ(intitialRefCount + 1, pCmdQ->getRefInternalCount()); retVal = clSetUserEventStatus(userEvent, CL_COMPLETE); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseEvent(userEvent); ASSERT_EQ(CL_SUCCESS, retVal); pCmdQ->isQueueBlocked(); // VirtualEvent should be freed, so refCount should equal initial value EXPECT_EQ(intitialRefCount, pCmdQ->getRefInternalCount()); } TEST_F(EventTests, givenUserEventWhenSetStatusIsDoneThenDeviceMutextisAcquired) { struct mockedEvent : public UserEvent { using UserEvent::UserEvent; bool setStatus(cl_int status) override { auto commandStreamReceiverOwnership = ctx->getDevice(0)->getDefaultEngine().commandStreamReceiver->obtainUniqueOwnership(); mutexProperlyAcquired = commandStreamReceiverOwnership.owns_lock(); return true; } bool mutexProperlyAcquired = false; }; mockedEvent mockEvent(this->context); clSetUserEventStatus(&mockEvent, CL_COMPLETE); EXPECT_TRUE(mockEvent.mutexProperlyAcquired); } compute-runtime-20.13.16352/opencl/test/unit_test/event/user_events_tests_mt.cpp000066400000000000000000000114361363734646600277500ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/memory_manager.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "event_fixture.h" #include typedef HelloWorldTest EventTests; TEST_F(MockEventTests, eventCreatedFromUserEventsThatIsNotSignaledDoesntFlushToCSR) { uEvent = make_releaseable(); cl_event retEvent = nullptr; cl_event eventWaitList[] = {uEvent.get()}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); //call NDR auto retVal = callOneWorkItemNDRKernel(eventWaitList, sizeOfWaitList, &retEvent); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); *csr.getTagAddress() = (unsigned int)-1; auto taskLevelBeforeWaitForEvents = csr.peekTaskLevel(); int counter = 0; int Deadline = 20000; std::atomic ThreadStarted(false); std::atomic WaitForEventsCompleted(false); std::thread t([&]() { ThreadStarted = true; //call WaitForEvents clWaitForEvents(1, &retEvent); WaitForEventsCompleted = true; }); //wait for the thread to start while (!ThreadStarted) ; //now wait a while. while (!WaitForEventsCompleted && counter++ < Deadline) ; ASSERT_EQ(WaitForEventsCompleted, false) << "WaitForEvents returned while user event is not signaled!"; EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(taskLevelBeforeWaitForEvents, csr.peekTaskLevel()); //set event to CL_COMPLETE uEvent->setStatus(CL_COMPLETE); t.join(); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(EventTests, givenUserEventBlockingEnqueueWithBlockingFlagWhenUserEventIsCompletedAfterBlockedPathIsChosenThenBlockingFlagDoesNotCauseStall) { std::unique_ptr srcBuffer(BufferHelper<>::create()); std::unique_ptr dst(new char[srcBuffer->getSize()]); for (int32_t i = 0; i < 20; i++) { UserEvent uEvent; cl_event eventWaitList[] = {&uEvent}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); std::thread t([&]() { uEvent.setStatus(CL_COMPLETE); }); auto retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), CL_TRUE, 0, srcBuffer->getSize(), dst.get(), nullptr, sizeOfWaitList, eventWaitList, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); t.join(); } } TEST_F(EventTests, givenUserEventBlockingEnqueueWithBlockingFlagWhenUserEventIsCompletedAfterUpdateFromCompletionStampThenBlockingFlagDoesNotCauseStall) { std::unique_ptr srcBuffer(BufferHelper<>::create()); std::unique_ptr dst(new char[srcBuffer->getSize()]); UserEvent uEvent; cl_event eventWaitList[] = {&uEvent}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); std::thread t([&]() { while (true) { pCmdQ->takeOwnership(); if (pCmdQ->taskLevel == CompletionStamp::levelNotReady) { pCmdQ->releaseOwnership(); break; } pCmdQ->releaseOwnership(); } uEvent.setStatus(CL_COMPLETE); }); auto retVal = pCmdQ->enqueueReadBuffer(srcBuffer.get(), CL_TRUE, 0, srcBuffer->getSize(), dst.get(), nullptr, sizeOfWaitList, eventWaitList, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); t.join(); } HWTEST_F(EventTests, givenOneThreadUpdatingUserEventAnotherWaitingOnFinishWhenFinishIsCalledThenItWaitsForCorrectTaskCount) { MockCommandQueueHw mockCmdQueue(context, pClDevice, nullptr); std::unique_ptr srcBuffer(BufferHelper<>::create()); std::unique_ptr dst(new char[srcBuffer->getSize()]); for (uint32_t i = 0; i < 100; i++) { UserEvent uEvent; cl_event eventWaitList[] = {&uEvent}; int sizeOfWaitList = sizeof(eventWaitList) / sizeof(cl_event); cl_event returnedEvent = nullptr; std::atomic_bool go{false}; std::atomic_bool updateEvent{true}; std::thread t([&]() { while (!go) ; uEvent.setStatus(CL_COMPLETE); }); auto retVal = mockCmdQueue.enqueueReadBuffer(srcBuffer.get(), CL_FALSE, 0, srcBuffer->getSize(), dst.get(), nullptr, sizeOfWaitList, eventWaitList, &returnedEvent); EXPECT_EQ(CL_SUCCESS, retVal); std::thread t2([&]() { while (updateEvent) { castToObject(returnedEvent)->updateExecutionStatus(); } }); go = true; clFinish(&mockCmdQueue); EXPECT_EQ(mockCmdQueue.latestTaskCountWaited, i + 1); t.join(); updateEvent = false; t2.join(); clReleaseEvent(returnedEvent); } } compute-runtime-20.13.16352/opencl/test/unit_test/execution_environment/000077500000000000000000000000001363734646600262615ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/execution_environment/CMakeLists.txt000066400000000000000000000004731363734646600310250ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_execution_environment ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/execution_environment_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_execution_environment}) execution_environment_tests.cpp000066400000000000000000000326231363734646600345650ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/execution_environment/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/device/device.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/source_level_debugger/source_level_debugger.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/utilities/destructor_counted.h" #include "opencl/source/aub/aub_center.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_memory_operations_handler.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "test.h" using namespace NEO; TEST(ExecutionEnvironment, givenDefaultConstructorWhenItIsCalledThenExecutionEnvironmentHasInitialRefCountZero) { ExecutionEnvironment environment; EXPECT_EQ(0, environment.getRefInternalCount()); EXPECT_EQ(0, environment.getRefApiCount()); } TEST(ExecutionEnvironment, givenPlatformWhenItIsConstructedThenItCretesExecutionEnvironmentWithOneRefCountInternal) { auto executionEnvironment = new ExecutionEnvironment(); EXPECT_EQ(0, executionEnvironment->getRefInternalCount()); std::unique_ptr platform(new Platform(*executionEnvironment)); EXPECT_EQ(executionEnvironment, platform->peekExecutionEnvironment()); EXPECT_EQ(1, executionEnvironment->getRefInternalCount()); } TEST(ExecutionEnvironment, givenPlatformAndExecutionEnvironmentWithRefCountsWhenPlatformIsDestroyedThenExecutionEnvironmentIsNotDeleted) { auto executionEnvironment = new ExecutionEnvironment(); std::unique_ptr platform(new Platform(*executionEnvironment)); executionEnvironment->incRefInternal(); platform.reset(); EXPECT_EQ(1, executionEnvironment->getRefInternalCount()); executionEnvironment->decRefInternal(); } TEST(ExecutionEnvironment, WhenCreatingDevicesThenThoseDevicesAddRefcountsToExecutionEnvironment) { auto executionEnvironment = new ExecutionEnvironment(); auto expectedRefCounts = executionEnvironment->getRefInternalCount(); auto devices = DeviceFactory::createDevices(*executionEnvironment); EXPECT_LT(0u, devices[0]->getNumAvailableDevices()); if (devices[0]->getNumAvailableDevices() > 1) { expectedRefCounts++; } expectedRefCounts += devices[0]->getNumAvailableDevices(); EXPECT_EQ(expectedRefCounts, executionEnvironment->getRefInternalCount()); } TEST(ExecutionEnvironment, givenDeviceThatHaveRefferencesAfterPlatformIsDestroyedThenDeviceIsStillUsable) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleSubDevices.set(1); auto executionEnvironment = new ExecutionEnvironment(); std::unique_ptr platform(new Platform(*executionEnvironment)); platform->initialize(DeviceFactory::createDevices(*executionEnvironment)); auto device = platform->getClDevice(0); EXPECT_EQ(1, device->getRefInternalCount()); device->incRefInternal(); platform.reset(nullptr); EXPECT_EQ(1, device->getRefInternalCount()); EXPECT_EQ(1, executionEnvironment->getRefInternalCount()); device->decRefInternal(); } TEST(ExecutionEnvironment, givenPlatformWhenItIsCreatedThenItCreatesMemoryManagerInExecutionEnvironment) { auto executionEnvironment = new ExecutionEnvironment(); Platform platform(*executionEnvironment); prepareDeviceEnvironments(*executionEnvironment); platform.initialize(DeviceFactory::createDevices(*executionEnvironment)); EXPECT_NE(nullptr, executionEnvironment->memoryManager); } TEST(ExecutionEnvironment, givenDeviceWhenItIsDestroyedThenMemoryManagerIsStillAvailable) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); std::unique_ptr device(Device::create(executionEnvironment, 0u)); device.reset(nullptr); EXPECT_NE(nullptr, executionEnvironment->memoryManager); } TEST(RootDeviceEnvironment, givenExecutionEnvironmentWhenInitializeAubCenterIsCalledThenItIsReceivesCorrectInputParams) { MockExecutionEnvironment executionEnvironment; executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); auto rootDeviceEnvironment = static_cast(executionEnvironment.rootDeviceEnvironments[0].get()); rootDeviceEnvironment->initAubCenter(true, "test.aub", CommandStreamReceiverType::CSR_AUB); EXPECT_TRUE(rootDeviceEnvironment->initAubCenterCalled); EXPECT_TRUE(rootDeviceEnvironment->localMemoryEnabledReceived); EXPECT_STREQ(rootDeviceEnvironment->aubFileNameReceived.c_str(), "test.aub"); } TEST(RootDeviceEnvironment, givenUseAubStreamFalseWhenGetAubManagerIsCalledThenReturnNull) { DebugManagerStateRestore dbgRestore; DebugManager.flags.UseAubStream.set(false); ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); auto rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[0].get(); rootDeviceEnvironment->initAubCenter(false, "", CommandStreamReceiverType::CSR_AUB); auto aubManager = rootDeviceEnvironment->aubCenter->getAubManager(); EXPECT_EQ(nullptr, aubManager); } TEST(RootDeviceEnvironment, givenExecutionEnvironmentWhenInitializeAubCenterIsCalledThenItIsInitalizedOnce) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); auto rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[0].get(); rootDeviceEnvironment->initAubCenter(false, "", CommandStreamReceiverType::CSR_AUB); auto currentAubCenter = rootDeviceEnvironment->aubCenter.get(); EXPECT_NE(nullptr, currentAubCenter); auto currentAubStreamProvider = currentAubCenter->getStreamProvider(); EXPECT_NE(nullptr, currentAubStreamProvider); auto currentAubFileStream = currentAubStreamProvider->getStream(); EXPECT_NE(nullptr, currentAubFileStream); rootDeviceEnvironment->initAubCenter(false, "", CommandStreamReceiverType::CSR_AUB); EXPECT_EQ(currentAubCenter, rootDeviceEnvironment->aubCenter.get()); EXPECT_EQ(currentAubStreamProvider, rootDeviceEnvironment->aubCenter->getStreamProvider()); EXPECT_EQ(currentAubFileStream, rootDeviceEnvironment->aubCenter->getStreamProvider()->getStream()); } TEST(ExecutionEnvironment, givenExecutionEnvironmentWhenInitializeMemoryManagerIsCalledThenLocalMemorySupportedInMemoryManagerHasCorrectValue) { const HardwareInfo *hwInfo = defaultHwInfo.get(); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(hwInfo)); auto executionEnvironment = device->getExecutionEnvironment(); auto enableLocalMemory = HwHelper::get(hwInfo->platform.eRenderCoreFamily).getEnableLocalMemory(*hwInfo); executionEnvironment->initializeMemoryManager(); EXPECT_EQ(enableLocalMemory, executionEnvironment->memoryManager->isLocalMemorySupported(device->getRootDeviceIndex())); } TEST(ExecutionEnvironment, givenExecutionEnvironmentWhenInitializeMemoryManagerIsCalledThenItIsInitalized) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); EXPECT_NE(nullptr, executionEnvironment->memoryManager); } static_assert(sizeof(ExecutionEnvironment) == sizeof(std::unique_ptr) + sizeof(std::vector) + sizeof(std::unique_ptr) + (is64bit ? 16 : 12), "New members detected in ExecutionEnvironment, please ensure that destruction sequence of objects is correct"); TEST(ExecutionEnvironment, givenExecutionEnvironmentWithVariousMembersWhenItIsDestroyedThenDeleteSequenceIsSpecified) { uint32_t destructorId = 0u; struct MemoryMangerMock : public DestructorCounted { MemoryMangerMock(uint32_t &destructorId, ExecutionEnvironment &executionEnvironment) : DestructorCounted(destructorId, executionEnvironment) {} }; struct GmmHelperMock : public DestructorCounted { GmmHelperMock(uint32_t &destructorId, const HardwareInfo *hwInfo) : DestructorCounted(destructorId, nullptr, hwInfo) {} }; struct OsInterfaceMock : public DestructorCounted { OsInterfaceMock(uint32_t &destructorId) : DestructorCounted(destructorId) {} }; struct MemoryOperationsHandlerMock : public DestructorCounted { MemoryOperationsHandlerMock(uint32_t &destructorId) : DestructorCounted(destructorId) {} }; struct AubCenterMock : public DestructorCounted { AubCenterMock(uint32_t &destructorId) : DestructorCounted(destructorId, defaultHwInfo.get(), false, "", CommandStreamReceiverType::CSR_AUB) {} }; struct CompilerInterfaceMock : public DestructorCounted { CompilerInterfaceMock(uint32_t &destructorId) : DestructorCounted(destructorId) {} }; struct BuiltinsMock : public DestructorCounted { BuiltinsMock(uint32_t &destructorId) : DestructorCounted(destructorId) {} }; struct SourceLevelDebuggerMock : public DestructorCounted { SourceLevelDebuggerMock(uint32_t &destructorId) : DestructorCounted(destructorId, nullptr) {} }; auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); executionEnvironment->rootDeviceEnvironments[0]->gmmHelper = std::make_unique(destructorId, defaultHwInfo.get()); executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(destructorId); executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface = std::make_unique(destructorId); executionEnvironment->memoryManager = std::make_unique(destructorId, *executionEnvironment); executionEnvironment->rootDeviceEnvironments[0]->aubCenter = std::make_unique(destructorId); executionEnvironment->rootDeviceEnvironments[0]->builtins = std::make_unique(destructorId); executionEnvironment->rootDeviceEnvironments[0]->compilerInterface = std::make_unique(destructorId); executionEnvironment->rootDeviceEnvironments[0]->debugger = std::make_unique(destructorId); executionEnvironment.reset(nullptr); EXPECT_EQ(8u, destructorId); } TEST(ExecutionEnvironment, givenMultipleRootDevicesWhenTheyAreCreatedTheyAllReuseTheSameMemoryManager) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(2); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } std::unique_ptr device(Device::create(executionEnvironment, 0u)); auto &commandStreamReceiver = device->getGpgpuCommandStreamReceiver(); auto memoryManager = device->getMemoryManager(); std::unique_ptr device2(Device::create(executionEnvironment, 1u)); EXPECT_NE(&commandStreamReceiver, &device2->getGpgpuCommandStreamReceiver()); EXPECT_EQ(memoryManager, device2->getMemoryManager()); } TEST(ExecutionEnvironment, givenUnproperSetCsrFlagValueWhenInitializingMemoryManagerThenCreateDefaultMemoryManager) { DebugManagerStateRestore restorer; DebugManager.flags.SetCommandStreamReceiver.set(10); auto executionEnvironment = std::make_unique(defaultHwInfo.get()); executionEnvironment->initializeMemoryManager(); EXPECT_NE(nullptr, executionEnvironment->memoryManager); } TEST(ExecutionEnvironment, whenCalculateMaxOsContexCountThenGlobalVariableHasProperValue) { VariableBackup osContextCountBackup(&MemoryManager::maxOsContextCount, 0); uint32_t numRootDevices = 17u; MockExecutionEnvironment executionEnvironment(nullptr, true, numRootDevices); auto expectedOsContextCount = 0u; for (const auto &rootDeviceEnvironment : executionEnvironment.rootDeviceEnvironments) { auto hwInfo = rootDeviceEnvironment->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily); auto osContextCount = hwHelper.getGpgpuEngineInstances(*hwInfo).size(); auto subDevicesCount = HwHelper::getSubDevicesCount(hwInfo); bool hasRootCsr = subDevicesCount > 1; expectedOsContextCount += static_cast(osContextCount * subDevicesCount + hasRootCsr); } EXPECT_EQ(expectedOsContextCount, MemoryManager::maxOsContextCount); } compute-runtime-20.13.16352/opencl/test/unit_test/execution_model/000077500000000000000000000000001363734646600250155ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/execution_model/CMakeLists.txt000066400000000000000000000007671363734646600275670ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_execution_model ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_execution_model_kernel_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/parent_kernel_dispatch_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_dispatch_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/submit_blocked_parent_kernel_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_execution_model}) enqueue_execution_model_kernel_tests.cpp000066400000000000000000000742011363734646600351420ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/execution_model/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/engine_node_helper.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/command_queue/local_id_gen.h" #include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/source/event/user_event.h" #include "opencl/source/helpers/per_thread_data.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" #include "opencl/test/unit_test/fixtures/execution_model_fixture.h" #include "opencl/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_device_queue.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_submissions_aggregator.h" using namespace NEO; static const char *binaryFile = "simple_block_kernel"; static const char *KernelNames[] = {"kernel_reflection", "simple_block_kernel"}; typedef ExecutionModelKernelTest ParentKernelEnqueueTest; HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedThenDeviceQueueDSHHasCorrectlyFilledInterfaceDesriptorTables) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); const size_t globalOffsets[3] = {0, 0, 0}; const size_t workItems[3] = {1, 1, 1}; pKernel->createReflectionSurface(); BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); uint32_t blockCount = static_cast(blockManager->getCount()); auto *executionModelDshAllocation = pDevQueueHw->getDshBuffer(); void *executionModelDsh = executionModelDshAllocation->getUnderlyingBuffer(); EXPECT_NE(nullptr, executionModelDsh); INTERFACE_DESCRIPTOR_DATA *idData = static_cast(ptrOffset(executionModelDsh, DeviceQueue::colorCalcStateSize)); size_t executionModelDSHUsedBefore = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed(); uint32_t colorCalcSize = DeviceQueue::colorCalcStateSize; EXPECT_EQ(colorCalcSize, executionModelDSHUsedBefore); MockMultiDispatchInfo multiDispatchInfo(pKernel); auto graphicsAllocation = pKernel->getKernelInfo().getGraphicsAllocation(); auto kernelIsaAddress = graphicsAllocation->getGpuAddressToPatch(); auto &hardwareInfo = pKernel->getDevice().getHardwareInfo(); auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) { kernelIsaAddress += pKernel->getKernelInfo().patchInfo.threadPayload->OffsetToSkipSetFFIDGP; } pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); if (pKernel->getKernelInfo().name == "kernel_reflection") { EXPECT_NE(0u, idData[0].getSamplerCount()); EXPECT_NE(0u, idData[0].getSamplerStatePointer()); } EXPECT_NE(0u, idData[0].getConstantIndirectUrbEntryReadLength()); EXPECT_NE(0u, idData[0].getCrossThreadConstantDataReadLength()); EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL, idData[0].getDenormMode()); EXPECT_EQ(static_cast(kernelIsaAddress), idData[0].getKernelStartPointer()); EXPECT_EQ(static_cast(kernelIsaAddress >> 32), idData[0].getKernelStartPointerHigh()); const uint32_t blockFirstIndex = 1; for (uint32_t i = 0; i < blockCount; i++) { const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); ASSERT_NE(nullptr, pBlockInfo); ASSERT_NE(nullptr, pBlockInfo->patchInfo.dataParameterStream); ASSERT_NE(nullptr, pBlockInfo->patchInfo.executionEnvironment); ASSERT_NE(nullptr, pBlockInfo->patchInfo.threadPayload); auto grfSize = pPlatform->getClDevice(0)->getDeviceInfo().grfSize; const uint32_t sizeCrossThreadData = pBlockInfo->patchInfo.dataParameterStream->DataParameterStreamSize / grfSize; auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*pBlockInfo->patchInfo.threadPayload); auto sizePerThreadData = getPerThreadSizeLocalIDs(pBlockInfo->patchInfo.executionEnvironment->LargestCompiledSIMDSize, numChannels); uint32_t numGrfPerThreadData = static_cast(sizePerThreadData / grfSize); numGrfPerThreadData = std::max(numGrfPerThreadData, 1u); EXPECT_EQ(numGrfPerThreadData, idData[blockFirstIndex + i].getConstantIndirectUrbEntryReadLength()); EXPECT_EQ(sizeCrossThreadData, idData[blockFirstIndex + i].getCrossThreadConstantDataReadLength()); EXPECT_NE((uint64_t)0u, ((uint64_t)idData[blockFirstIndex + i].getKernelStartPointerHigh() << 32) | (uint64_t)idData[blockFirstIndex + i].getKernelStartPointer()); uint64_t blockKernelAddress = ((uint64_t)idData[blockFirstIndex + i].getKernelStartPointerHigh() << 32) | (uint64_t)idData[blockFirstIndex + i].getKernelStartPointer(); uint64_t expectedBlockKernelAddress = pBlockInfo->getGraphicsAllocation()->getGpuAddressToPatch(); auto &hardwareInfo = pKernel->getDevice().getHardwareInfo(); auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) { expectedBlockKernelAddress += pBlockInfo->patchInfo.threadPayload->OffsetToSkipSetFFIDGP; } EXPECT_EQ(expectedBlockKernelAddress, blockKernelAddress); } } } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenBlockKernelWithPrivateSurfaceWhenParentKernelIsEnqueuedThenPrivateSurfaceIsMadeResident) { if (pClDevice->getSupportedClVersion() >= 20) { size_t offset[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; int32_t executionStamp = 0; auto mockCSR = new MockCsr(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCSR); size_t kernelRequiringPrivateSurface = pKernel->getProgram()->getBlockKernelManager()->getCount(); for (size_t i = 0; i < pKernel->getProgram()->getBlockKernelManager()->getCount(); ++i) { if (nullptr != pKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(i)->patchInfo.pAllocateStatelessPrivateSurface) { kernelRequiringPrivateSurface = i; break; } } ASSERT_NE(kernelRequiringPrivateSurface, pKernel->getProgram()->getBlockKernelManager()->getCount()); GraphicsAllocation *privateSurface = pKernel->getProgram()->getBlockKernelManager()->getPrivateSurface(kernelRequiringPrivateSurface); if (privateSurface == nullptr) { privateSurface = mockCSR->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); pKernel->getProgram()->getBlockKernelManager()->pushPrivateSurface(privateSurface, kernelRequiringPrivateSurface); } pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 0, nullptr, nullptr); EXPECT_TRUE(privateSurface->isResident(mockCSR->getOsContext().getContextId())); } } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenBlocksWithPrivateMemoryWhenEnqueueKernelThatIsBlockedByUserEventIsCalledThenPrivateAllocationIsMadeResidentWhenEventUnblocks) { if (pClDevice->getSupportedClVersion() >= 20) { size_t offset[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager(); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.storeMakeResidentAllocations = true; size_t kernelRequiringPrivateSurface = pKernel->getProgram()->getBlockKernelManager()->getCount(); for (size_t i = 0; i < pKernel->getProgram()->getBlockKernelManager()->getCount(); ++i) { if (nullptr != pKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(i)->patchInfo.pAllocateStatelessPrivateSurface) { kernelRequiringPrivateSurface = i; break; } } ASSERT_NE(kernelRequiringPrivateSurface, pKernel->getProgram()->getBlockKernelManager()->getCount()); auto privateAllocation = pKernel->getProgram()->getBlockKernelManager()->getPrivateSurface(kernelRequiringPrivateSurface); if (privateAllocation == nullptr) { privateAllocation = csr.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr.getRootDeviceIndex(), MemoryConstants::pageSize}); blockKernelManager->pushPrivateSurface(privateAllocation, kernelRequiringPrivateSurface); } auto uEvent = make_releaseable(pContext); auto clEvent = static_cast(uEvent.get()); pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 1, &clEvent, nullptr); EXPECT_FALSE(csr.isMadeResident(privateAllocation)); uEvent->setStatus(CL_COMPLETE); EXPECT_TRUE(csr.isMadeResident(privateAllocation)); } } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenParentKernelWithBlocksWhenEnqueueKernelIsCalledThenBlockKernelIsaAllocationIsMadeResident) { if (pClDevice->getSupportedClVersion() >= 20) { size_t offset[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager(); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.storeMakeResidentAllocations = true; pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 0, nullptr, nullptr); auto blockCount = blockKernelManager->getCount(); for (auto blockId = 0u; blockId < blockCount; blockId++) { EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation())); } } } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenBlockKernelManagerFilledWithBlocksWhenMakeInternalAllocationsResidentIsCalledThenAllSurfacesAreMadeResident) { if (pClDevice->getSupportedClVersion() >= 20) { auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager(); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.storeMakeResidentAllocations = true; blockKernelManager->makeInternalAllocationsResident(csr); auto blockCount = blockKernelManager->getCount(); for (auto blockId = 0u; blockId < blockCount; blockId++) { EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation())); } } } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, GivenParentKernelWithBlocksWhenEnqueueKernelThatIsBlockedByUserEventIsCalledThenBlockKernelIsaAllocationIsMadeResidentWhenEventUnblocks) { if (pClDevice->getSupportedClVersion() >= 20) { size_t offset[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; auto blockKernelManager = pKernel->getProgram()->getBlockKernelManager(); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.storeMakeResidentAllocations = true; auto uEvent = make_releaseable(pContext); auto clEvent = static_cast(uEvent.get()); pCmdQ->enqueueKernel(pKernel, 1, offset, gws, gws, 1, &clEvent, nullptr); auto blockCount = blockKernelManager->getCount(); for (auto blockId = 0u; blockId < blockCount; blockId++) { EXPECT_FALSE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation())); } uEvent->setStatus(CL_COMPLETE); for (auto blockId = 0u; blockId < blockCount; blockId++) { EXPECT_TRUE(csr.isMadeResident(blockKernelManager->getBlockKernelInfo(blockId)->getGraphicsAllocation())); } } } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedSecondTimeThenDeviceQueueDSHIsResetToInitialOffset) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); const size_t globalOffsets[3] = {0, 0, 0}; const size_t workItems[3] = {1, 1, 1}; auto dsh = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); size_t executionModelDSHUsedBefore = dsh->getUsed(); uint32_t colorCalcSize = DeviceQueue::colorCalcStateSize; EXPECT_EQ(colorCalcSize, executionModelDSHUsedBefore); MockMultiDispatchInfo multiDispatchInfo(pKernel); pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); size_t executionModelDSHUsedAfterFirst = dsh->getUsed(); EXPECT_LT(executionModelDSHUsedBefore, executionModelDSHUsedAfterFirst); pDevQueueHw->resetDeviceQueue(); pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); size_t executionModelDSHUsedAfterSecond = dsh->getUsed(); EXPECT_EQ(executionModelDSHUsedAfterFirst, executionModelDSHUsedAfterSecond); } } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedThenBlocksSurfaceStatesAreCopied) { using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { const size_t globalOffsets[3] = {0, 0, 0}; const size_t workItems[3] = {1, 1, 1}; pKernel->createReflectionSurface(); BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); uint32_t blockCount = static_cast(blockManager->getCount()); size_t parentKernelSSHSize = pKernel->getSurfaceStateHeapSize(); MockMultiDispatchInfo multiDispatchInfo(pKernel); auto ssh = &getIndirectHeap(*pCmdQ, multiDispatchInfo); // prealign the ssh so that it won't need to be realigned in enqueueKernel // this way, we can assume the location in memory into which the surface states // will be coies ssh->align(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); // mark the assumed place for surface states size_t parentSshOffset = ssh->getUsed(); pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); void *blockSSH = ptrOffset(ssh->getCpuBase(), parentSshOffset + parentKernelSSHSize); // note : unaligned at this point for (uint32_t i = 0; i < blockCount; i++) { const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); ASSERT_NE(nullptr, pBlockInfo); ASSERT_NE(nullptr, pBlockInfo->patchInfo.dataParameterStream); ASSERT_NE(nullptr, pBlockInfo->patchInfo.executionEnvironment); ASSERT_NE(nullptr, pBlockInfo->patchInfo.threadPayload); Kernel *blockKernel = Kernel::create(pKernel->getProgram(), *pBlockInfo, nullptr); blockSSH = alignUp(blockSSH, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); if (blockKernel->getNumberOfBindingTableStates() > 0) { ASSERT_NE(nullptr, pBlockInfo->patchInfo.bindingTableState); auto dstBlockBti = ptrOffset(blockSSH, pBlockInfo->patchInfo.bindingTableState->Offset); EXPECT_EQ(0U, reinterpret_cast(dstBlockBti) % INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER_ALIGN_SIZE); auto dstBindingTable = reinterpret_cast(dstBlockBti); auto srcBlockBti = ptrOffset(pBlockInfo->heapInfo.pSsh, pBlockInfo->patchInfo.bindingTableState->Offset); auto srcBindingTable = reinterpret_cast(srcBlockBti); for (uint32_t i = 0; i < blockKernel->getNumberOfBindingTableStates(); ++i) { uint32_t dstSurfaceStatePointer = dstBindingTable[i].getSurfaceStatePointer(); uint32_t srcSurfaceStatePointer = srcBindingTable[i].getSurfaceStatePointer(); auto *dstSurfaceState = reinterpret_cast(ptrOffset(ssh->getCpuBase(), dstSurfaceStatePointer)); auto *srcSurfaceState = reinterpret_cast(ptrOffset(pBlockInfo->heapInfo.pSsh, srcSurfaceStatePointer)); EXPECT_EQ(0, memcmp(srcSurfaceState, dstSurfaceState, sizeof(RENDER_SURFACE_STATE))); } blockSSH = ptrOffset(blockSSH, blockKernel->getSurfaceStateHeapSize()); } delete blockKernel; } } } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedThenReflectionSurfaceIsCreated) { using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { const size_t globalOffsets[3] = {0, 0, 0}; const size_t workItems[3] = {1, 1, 1}; MockMultiDispatchInfo multiDispatchInfo(pKernel); pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); EXPECT_NE(nullptr, pKernel->getKernelReflectionSurface()); } } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenBlockedQueueWhenParentKernelIsEnqueuedThenDeviceQueueIsNotReset) { if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { const size_t globalOffsets[3] = {0, 0, 0}; const size_t workItems[3] = {1, 1, 1}; cl_queue_properties properties[3] = {0}; MockMultiDispatchInfo multiDispatchInfo(pKernel); MockDeviceQueueHw mockDevQueue(context, pClDevice, properties[0]); context->setDefaultDeviceQueue(&mockDevQueue); // Acquire CS to check if reset queue was called mockDevQueue.acquireEMCriticalSection(); auto mockEvent = make_releaseable(context); cl_event eventBlocking = mockEvent.get(); pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 1, &eventBlocking, nullptr); EXPECT_FALSE(mockDevQueue.isEMCriticalSectionFree()); } } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenNonBlockedQueueWhenParentKernelIsEnqueuedThenDeviceQueueDSHAddressIsProgrammedInStateBaseAddressAndDSHIsMadeResident) { typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); ASSERT_NE(nullptr, pDevQueueHw); const size_t globalOffsets[3] = {0, 0, 0}; const size_t workItems[3] = {1, 1, 1}; MockMultiDispatchInfo multiDispatchInfo(pKernel); int32_t executionStamp = 0; auto mockCSR = new MockCsrBase(executionStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCSR); pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); auto &cmdStream = mockCSR->getCS(0); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); hwParser.findHardwareCommands(); auto stateBaseAddressItor = hwParser.itorStateBaseAddress; ASSERT_NE(hwParser.cmdList.end(), stateBaseAddressItor); auto *stateBaseAddress = (STATE_BASE_ADDRESS *)*stateBaseAddressItor; uint64_t addressProgrammed = stateBaseAddress->getDynamicStateBaseAddress(); EXPECT_EQ(addressProgrammed, pDevQueue->getDshBuffer()->getGpuAddress()); bool dshAllocationResident = false; for (auto allocation : mockCSR->madeResidentGfxAllocations) { if (allocation == pDevQueue->getDshBuffer()) { dshAllocationResident = true; break; } } EXPECT_TRUE(dshAllocationResident); } } INSTANTIATE_TEST_CASE_P(ParentKernelEnqueueTest, ParentKernelEnqueueTest, ::testing::Combine( ::testing::Values(binaryFile), ::testing::ValuesIn(KernelNames))); class ParentKernelEnqueueFixture : public ExecutionModelSchedulerTest, public testing::Test { public: void SetUp() override { ExecutionModelSchedulerTest::SetUp(); } void TearDown() override { ExecutionModelSchedulerTest::TearDown(); } }; HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedTheDefaultDeviceQueueAndEventPoolIsPatched) { if (pClDevice->getSupportedClVersion() >= 20) { size_t offset[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; pCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr); const auto &patchInfo = parentKernel->getKernelInfo().patchInfo; if (patchInfo.pAllocateStatelessDefaultDeviceQueueSurface) { auto patchLocation = ptrOffset(reinterpret_cast(parentKernel->getCrossThreadData()), patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamOffset); EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddressToPatch(), *patchLocation); } if (patchInfo.pAllocateStatelessEventPoolSurface) { auto patchLocation = ptrOffset(reinterpret_cast(parentKernel->getCrossThreadData()), patchInfo.pAllocateStatelessEventPoolSurface->DataParamOffset); EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddressToPatch(), *patchLocation); } } } HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedThenBlocksDSHOnReflectionSurfaceArePatchedWithDeviceQueueAndEventPoolAddresses) { if (pClDevice->getSupportedClVersion() >= 20) { size_t offset[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); pCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr); void *reflectionSurface = parentKernel->getKernelReflectionSurface()->getUnderlyingBuffer(); BlockKernelManager *blockManager = parentKernel->getProgram()->getBlockKernelManager(); uint32_t blockCount = static_cast(blockManager->getCount()); for (uint32_t i = 0; i < blockCount; i++) { const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); uint32_t defaultQueueOffset = pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamOffset; uint32_t eventPoolOffset = pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamOffset; uint32_t defaultQueueSize = pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamSize; uint32_t eventPoolSize = pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize; uint32_t offset = MockKernel::ReflectionSurfaceHelperPublic::getConstantBufferOffset(reflectionSurface, i); if (defaultQueueSize == sizeof(uint64_t)) { EXPECT_EQ_VAL(pDevQueueHw->getQueueBuffer()->getGpuAddress(), *(uint64_t *)ptrOffset(reflectionSurface, offset + defaultQueueOffset)); } else { EXPECT_EQ((uint32_t)pDevQueueHw->getQueueBuffer()->getGpuAddress(), *(uint32_t *)ptrOffset(reflectionSurface, offset + defaultQueueOffset)); } if (eventPoolSize == sizeof(uint64_t)) { EXPECT_EQ_VAL(pDevQueueHw->getEventPoolBuffer()->getGpuAddress(), *(uint64_t *)ptrOffset(reflectionSurface, offset + eventPoolOffset)); } else { EXPECT_EQ((uint32_t)pDevQueueHw->getEventPoolBuffer()->getGpuAddress(), *(uint32_t *)ptrOffset(reflectionSurface, offset + eventPoolOffset)); } } } } HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedToNonBlockedQueueThenDeviceQueueCriticalSetionIsAcquired) { if (pClDevice->getSupportedClVersion() >= 20) { size_t offset[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); EXPECT_TRUE(pDevQueueHw->isEMCriticalSectionFree()); pCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr); EXPECT_FALSE(pDevQueueHw->isEMCriticalSectionFree()); } } HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, GivenParentKernelWhenEnqueuedToBlockedQueueThenDeviceQueueCriticalSetionIsNotAcquired) { if (pClDevice->getSupportedClVersion() >= 20) { size_t offset[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); auto mockEvent = make_releaseable>(context); cl_event eventBlocking = mockEvent.get(); EXPECT_TRUE(pDevQueueHw->isEMCriticalSectionFree()); pCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 1, &eventBlocking, nullptr); EXPECT_TRUE(pDevQueueHw->isEMCriticalSectionFree()); mockEvent->setStatus(-1); } } HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, ParentKernelEnqueuedToNonBlockedQueueFlushesCSRWithSLM) { if (pClDevice->getSupportedClVersion() >= 20) { size_t offset[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; int32_t execStamp; auto mockCsr = new MockCsr(execStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); pCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr); EXPECT_TRUE(mockCsr->slmUsedInLastFlushTask); } } HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, ParentKernelEnqueuedWithSchedulerReturnInstanceRunsSimulation) { if (pClDevice->getSupportedClVersion() >= 20) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.SchedulerSimulationReturnInstance.set(1); MockDeviceQueueHw *mockDeviceQueueHw = new MockDeviceQueueHw(context, pClDevice, DeviceHostQueue::deviceQueueProperties::minimumProperties[0]); mockDeviceQueueHw->resetDeviceQueue(); context->setDefaultDeviceQueue(mockDeviceQueueHw); size_t offset[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; int32_t execStamp; auto mockCsr = new MockCsr(execStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); BuiltinKernelsSimulation::SchedulerSimulation::enabled = false; pDevice->resetCommandStreamReceiver(mockCsr); pCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr); BuiltinKernelsSimulation::SchedulerSimulation::enabled = true; EXPECT_TRUE(BuiltinKernelsSimulation::SchedulerSimulation::simulationRun); delete mockDeviceQueueHw; } } HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, givenCsrInBatchingModeWhenExecutionModelKernelIsSubmittedThenItIsFlushed) { if (pClDevice->getSupportedClVersion() >= 20) { auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); pDevice->resetCommandStreamReceiver(mockCsr); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator); size_t offset[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; MockContext context(pClDevice); std::unique_ptr kernelToRun(MockParentKernel::create(context, false, false, false, false, false)); pCmdQ->enqueueKernel(kernelToRun.get(), 1, offset, gws, gws, 0, nullptr, nullptr); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); EXPECT_EQ(1, mockCsr->flushCalledCount); } } HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, ParentKernelEnqueueMarksCSRMediaVFEStateDirty) { if (pClDevice->getSupportedClVersion() >= 20) { size_t offset[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; int32_t execStamp; auto mockCsr = new MockCsr(execStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(mockCsr); mockCsr->setMediaVFEStateDirty(false); pCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr); EXPECT_TRUE(mockCsr->peekMediaVfeStateDirty()); } } compute-runtime-20.13.16352/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp000066400000000000000000000421411363734646600334550ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/command_queue/enqueue_kernel.h" #include "opencl/source/command_queue/hardware_interface.h" #include "opencl/source/event/perf_counter.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/sampler/sampler.h" #include "opencl/test/unit_test/fixtures/execution_model_fixture.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_program.h" using namespace NEO; static const char *binaryFile = "simple_block_kernel"; static const char *KernelNames[] = {"kernel_reflection", "simple_block_kernel"}; typedef ExecutionModelKernelTest ParentKernelDispatchTest; HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenDeviceQueueDSHIsUsed) { if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); KernelOperation *blockedCommandsData = nullptr; const size_t globalOffsets[3] = {0, 0, 0}; const size_t workItems[3] = {1, 1, 1}; pKernel->createReflectionSurface(); size_t dshUsedBefore = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u).getUsed(); EXPECT_EQ(0u, dshUsedBefore); size_t executionModelDSHUsedBefore = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed(); DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets); MultiDispatchInfo multiDispatchInfo(pKernel); multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), blockedCommandsData, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); size_t dshUsedAfter = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u).getUsed(); EXPECT_EQ(0u, dshUsedAfter); size_t executionModelDSHUsedAfter = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed(); EXPECT_NE(executionModelDSHUsedBefore, executionModelDSHUsedAfter); } } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenDynamicStateHeapIsRequestedThenDeviceQueueHeapIsReturned) { if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); MockMultiDispatchInfo multiDispatchInfo(pKernel); auto ish = &getIndirectHeap(*pCmdQ, multiDispatchInfo); auto ishOfDevQueue = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); EXPECT_EQ(ishOfDevQueue, ish); } } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenIndirectObjectHeapIsRequestedThenDeviceQueueDSHIsReturned) { if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); MockMultiDispatchInfo multiDispatchInfo(pKernel); auto ioh = &getIndirectHeap(*pCmdQ, multiDispatchInfo); auto dshOfDevQueue = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); EXPECT_EQ(dshOfDevQueue, ioh); } } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenDefaultCmdQIOHIsNotUsed) { if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { KernelOperation *blockedCommandsData = nullptr; const size_t globalOffsets[3] = {0, 0, 0}; const size_t workItems[3] = {1, 1, 1}; MockMultiDispatchInfo multiDispatchInfo(pKernel); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets); multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), blockedCommandsData, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); auto iohUsed = ioh.getUsed(); EXPECT_EQ(0u, iohUsed); } } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenSSHSizeAccountForsBlocksSurfaceStates) { if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { KernelOperation *blockedCommandsData = nullptr; const size_t globalOffsets[3] = {0, 0, 0}; const size_t workItems[3] = {1, 1, 1}; MockMultiDispatchInfo multiDispatchInfo(pKernel); DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets); multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), blockedCommandsData, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u); EXPECT_LE(pKernel->getKernelInfo().heapInfo.pKernelHeader->SurfaceStateHeapSize, ssh.getMaxAvailableSpace()); size_t minRequiredSize = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); size_t minRequiredSizeForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*pKernel); EXPECT_LE(minRequiredSize + minRequiredSizeForEM, ssh.getMaxAvailableSpace()); } } HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueueIsBlockedThenSSHSizeForParentIsAllocated) { using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { auto blockedCommandsData = createBlockedCommandsData(*pCmdQ); const size_t globalOffsets[3] = {0, 0, 0}; const size_t workItems[3] = {1, 1, 1}; MultiDispatchInfo multiDispatchInfo(pKernel); DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets); multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), blockedCommandsData.get(), nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); ASSERT_NE(nullptr, blockedCommandsData); size_t minRequiredSize = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo) + UnitTestHelper::getDefaultSshUsage(); size_t minRequiredSizeForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*pKernel); size_t sshUsed = blockedCommandsData->ssh->getUsed(); size_t expectedSizeSSH = pKernel->getNumberOfBindingTableStates() * sizeof(RENDER_SURFACE_STATE) + pKernel->getKernelInfo().patchInfo.bindingTableState->Count * sizeof(BINDING_TABLE_STATE) + UnitTestHelper::getDefaultSshUsage(); if ((pKernel->requiresSshForBuffers()) || (pKernel->getKernelInfo().patchInfo.imageMemObjKernelArgs.size() > 0)) { EXPECT_EQ(expectedSizeSSH, sshUsed); } EXPECT_GE(minRequiredSize, sshUsed); // Total SSH size including EM must be greater then ssh allocated EXPECT_GT(minRequiredSize + minRequiredSizeForEM, sshUsed); } } INSTANTIATE_TEST_CASE_P(ParentKernelDispatchTest, ParentKernelDispatchTest, ::testing::Combine( ::testing::Values(binaryFile), ::testing::ValuesIn(KernelNames))); typedef ParentKernelCommandQueueFixture ParentKernelCommandStreamFixture; HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandStreamFixture, GivenDispatchInfoWithParentKernelWhenCommandStreamIsAcquiredThenSizeAccountsForSchedulerDispatch) { if (device->getSupportedClVersion() >= 20) { MockParentKernel *mockParentKernel = MockParentKernel::create(*context); DispatchInfo dispatchInfo(mockParentKernel, 1, Vec3{24, 1, 1}, Vec3{24, 1, 1}, Vec3{0, 0, 0}); MultiDispatchInfo multiDispatchInfo(mockParentKernel); size_t size = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, mockParentKernel); size_t numOfKernels = MemoryConstants::pageSize / size; size_t rest = MemoryConstants::pageSize - (numOfKernels * size); SchedulerKernel &scheduler = pCmdQ->getContext().getSchedulerKernel(); size_t schedulerSize = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, &scheduler); while (rest >= schedulerSize) { numOfKernels++; rest = alignUp(numOfKernels * size, MemoryConstants::pageSize) - numOfKernels * size; } for (size_t i = 0; i < numOfKernels; i++) { multiDispatchInfo.push(dispatchInfo); } size_t totalKernelSize = alignUp(numOfKernels * size, MemoryConstants::pageSize); LinearStream &commandStream = getCommandStream(*pCmdQ, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0); EXPECT_LT(totalKernelSize, commandStream.getMaxAvailableSpace()); delete mockParentKernel; } } class MockParentKernelDispatch : public ExecutionModelSchedulerTest, public testing::Test { public: void SetUp() override { DebugManager.flags.EnableTimestampPacket.set(0); ExecutionModelSchedulerTest::SetUp(); } void TearDown() override { ExecutionModelSchedulerTest::TearDown(); } std::unique_ptr createBlockedCommandsData(CommandQueue &commandQueue) { auto commandStream = new LinearStream(); auto &gpgpuCsr = commandQueue.getGpgpuCommandStreamReceiver(); gpgpuCsr.ensureCommandBufferAllocation(*commandStream, 1, 1); return std::make_unique(commandStream, *gpgpuCsr.getInternalAllocationStorage()); } DebugManagerStateRestore dbgRestore; }; HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenBlockedQueueWhenParentKernelIsDispatchedThenDshHeapForIndirectObjectHeapIsUsed) { if (pClDevice->getSupportedClVersion() >= 20) { MockParentKernel *mockParentKernel = MockParentKernel::create(*context); auto blockedCommandsData = createBlockedCommandsData(*pCmdQ); const size_t globalOffsets[3] = {0, 0, 0}; const size_t workItems[3] = {1, 1, 1}; DispatchInfo dispatchInfo(mockParentKernel, 1, workItems, nullptr, globalOffsets); MultiDispatchInfo multiDispatchInfo(mockParentKernel); multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), blockedCommandsData.get(), nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); ASSERT_NE(nullptr, blockedCommandsData); EXPECT_EQ(blockedCommandsData->dsh.get(), blockedCommandsData->ioh.get()); delete mockParentKernel; } } HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenParentKernelWhenDispatchedThenMediaInterfaceDescriptorLoadIsCorrectlyProgrammed) { typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; if (pClDevice->getSupportedClVersion() >= 20) { MockParentKernel *mockParentKernel = MockParentKernel::create(*context); KernelOperation *blockedCommandsData = nullptr; const size_t globalOffsets[3] = {0, 0, 0}; const size_t workItems[3] = {1, 1, 1}; DispatchInfo dispatchInfo(mockParentKernel, 1, workItems, nullptr, globalOffsets); MultiDispatchInfo multiDispatchInfo(mockParentKernel); multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), blockedCommandsData, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); LinearStream *commandStream = &pCmdQ->getCS(0); HardwareParse hwParser; hwParser.parseCommands(*commandStream, 0); hwParser.findHardwareCommands(); ASSERT_NE(hwParser.cmdList.end(), hwParser.itorMediaInterfaceDescriptorLoad); auto pCmd = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)hwParser.getCommand(hwParser.cmdList.begin(), hwParser.itorWalker); ASSERT_NE(nullptr, pCmd); uint32_t offsetInterfaceDescriptorData = DeviceQueue::colorCalcStateSize; uint32_t sizeInterfaceDescriptorData = sizeof(INTERFACE_DESCRIPTOR_DATA); EXPECT_EQ(offsetInterfaceDescriptorData, pCmd->getInterfaceDescriptorDataStartAddress()); EXPECT_EQ(sizeInterfaceDescriptorData, pCmd->getInterfaceDescriptorTotalLength()); delete mockParentKernel; } } HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenUsedSSHHeapWhenParentKernelIsDispatchedThenNewSSHIsAllocated) { if (pClDevice->getSupportedClVersion() >= 20) { MockParentKernel *mockParentKernel = MockParentKernel::create(*context); KernelOperation *blockedCommandsData = nullptr; const size_t globalOffsets[3] = {0, 0, 0}; const size_t workItems[3] = {1, 1, 1}; auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 100); uint32_t testSshUse = 20u; uint32_t expectedSshUse = testSshUse + UnitTestHelper::getDefaultSshUsage(); ssh.getSpace(testSshUse); EXPECT_EQ(expectedSshUse, ssh.getUsed()); // Assuming parent is not using SSH, this is becuase storing allocation on reuse list and allocating // new one by obtaining from reuse list returns the same allocation and heap buffer does not differ // If parent is not using SSH, then heap obtained has zero usage and the same buffer ASSERT_EQ(0u, mockParentKernel->getKernelInfo().heapInfo.pKernelHeader->SurfaceStateHeapSize); DispatchInfo dispatchInfo(mockParentKernel, 1, workItems, nullptr, globalOffsets); MultiDispatchInfo multiDispatchInfo(mockParentKernel); multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), blockedCommandsData, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(UnitTestHelper::getDefaultSshUsage(), ssh.getUsed()); delete mockParentKernel; } } HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenNotUsedSSHHeapWhenParentKernelIsDispatchedThenExistingSSHIsUsed) { if (pClDevice->getSupportedClVersion() >= 20) { MockParentKernel *mockParentKernel = MockParentKernel::create(*context); KernelOperation *blockedCommandsData = nullptr; const size_t globalOffsets[3] = {0, 0, 0}; const size_t workItems[3] = {1, 1, 1}; auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 100); auto defaultSshUsage = UnitTestHelper::getDefaultSshUsage(); EXPECT_EQ(defaultSshUsage, ssh.getUsed()); auto *bufferMemory = ssh.getCpuBase(); DispatchInfo dispatchInfo(mockParentKernel, 1, workItems, nullptr, globalOffsets); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), blockedCommandsData, nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); EXPECT_EQ(bufferMemory, ssh.getCpuBase()); delete mockParentKernel; } } compute-runtime-20.13.16352/opencl/test/unit_test/execution_model/scheduler_dispatch_tests.cpp000066400000000000000000000301101363734646600325730ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/command_queue/enqueue_kernel.h" #include "opencl/source/device_queue/device_queue.h" #include "opencl/source/scheduler/scheduler_kernel.h" #include "opencl/test/unit_test/fixtures/execution_model_fixture.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" using namespace NEO; class ExecutionModelSchedulerFixture : public ExecutionModelSchedulerTest, public testing::Test { public: void SetUp() override { ExecutionModelSchedulerTest::SetUp(); } void TearDown() override { ExecutionModelSchedulerTest::TearDown(); } }; HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, dispatchScheduler) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD; using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; if (pClDevice->getSupportedClVersion() >= 20) { DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); SchedulerKernel &scheduler = context->getSchedulerKernel(); auto *executionModelDshAllocation = pDevQueueHw->getDshBuffer(); auto *dshHeap = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); void *executionModelDsh = executionModelDshAllocation->getUnderlyingBuffer(); EXPECT_NE(nullptr, executionModelDsh); size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); // Setup heaps in pCmdQ MultiDispatchInfo multiDispatchinfo(&scheduler); LinearStream &commandStream = getCommandStream(*pCmdQ, CsrDependencies(), false, false, false, multiDispatchinfo, nullptr, 0); pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, minRequiredSizeForSchedulerSSH); GpgpuWalkerHelper::dispatchScheduler( pCmdQ->getCS(0), *pDevQueueHw, pDevice->getPreemptionMode(), scheduler, &pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u), pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE), false); EXPECT_EQ(0u, *scheduler.globalWorkOffsetX); EXPECT_EQ(0u, *scheduler.globalWorkOffsetY); EXPECT_EQ(0u, *scheduler.globalWorkOffsetZ); EXPECT_EQ((uint32_t)scheduler.getLws(), *scheduler.localWorkSizeX); EXPECT_EQ(1u, *scheduler.localWorkSizeY); EXPECT_EQ(1u, *scheduler.localWorkSizeZ); EXPECT_EQ((uint32_t)scheduler.getLws(), *scheduler.localWorkSizeX2); EXPECT_EQ(1u, *scheduler.localWorkSizeY2); EXPECT_EQ(1u, *scheduler.localWorkSizeZ2); if (scheduler.enqueuedLocalWorkSizeX != &Kernel::dummyPatchLocation) { EXPECT_EQ((uint32_t)scheduler.getLws(), *scheduler.enqueuedLocalWorkSizeX); } EXPECT_EQ(1u, *scheduler.enqueuedLocalWorkSizeY); EXPECT_EQ(1u, *scheduler.enqueuedLocalWorkSizeZ); EXPECT_EQ((uint32_t)(scheduler.getGws() / scheduler.getLws()), *scheduler.numWorkGroupsX); EXPECT_EQ(0u, *scheduler.numWorkGroupsY); EXPECT_EQ(0u, *scheduler.numWorkGroupsZ); HardwareParse hwParser; hwParser.parseCommands(commandStream, 0); hwParser.findHardwareCommands(); ASSERT_NE(hwParser.cmdList.end(), hwParser.itorWalker); // Before Walker There must be PC PIPE_CONTROL *pc = hwParser.getCommand(hwParser.cmdList.begin(), hwParser.itorWalker); ASSERT_NE(nullptr, pc); ASSERT_NE(hwParser.cmdList.end(), hwParser.itorMediaInterfaceDescriptorLoad); auto *interfaceDescLoad = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*hwParser.itorMediaInterfaceDescriptorLoad; uint32_t addressOffsetProgrammed = interfaceDescLoad->getInterfaceDescriptorDataStartAddress(); uint32_t interfaceDescriptorSizeProgrammed = interfaceDescLoad->getInterfaceDescriptorTotalLength(); uint32_t addressOffsetExpected = pDevQueueHw->colorCalcStateSize; uint32_t intDescSizeExpected = DeviceQueue::interfaceDescriptorEntries * sizeof(INTERFACE_DESCRIPTOR_DATA); EXPECT_EQ(addressOffsetExpected, addressOffsetProgrammed); EXPECT_EQ(intDescSizeExpected, interfaceDescriptorSizeProgrammed); auto *walker = (GPGPU_WALKER *)*hwParser.itorWalker; size_t workGroups[3] = {(scheduler.getGws() / scheduler.getLws()), 1, 1}; size_t numWorkgroupsProgrammed[3] = {0, 0, 0}; uint32_t threadsPerWorkGroup = walker->getThreadWidthCounterMaximum(); EXPECT_EQ(scheduler.getLws() / scheduler.getKernelInfo().getMaxSimdSize(), threadsPerWorkGroup); numWorkgroupsProgrammed[0] = walker->getThreadGroupIdXDimension(); numWorkgroupsProgrammed[1] = walker->getThreadGroupIdYDimension(); numWorkgroupsProgrammed[2] = walker->getThreadGroupIdZDimension(); EXPECT_EQ(workGroups[0], numWorkgroupsProgrammed[0]); EXPECT_EQ(workGroups[1], numWorkgroupsProgrammed[1]); EXPECT_EQ(workGroups[2], numWorkgroupsProgrammed[2]); typename FamilyType::GPGPU_WALKER::SIMD_SIZE simdSize = walker->getSimdSize(); EXPECT_EQ(FamilyType::GPGPU_WALKER::SIMD_SIZE::SIMD_SIZE_SIMD8, simdSize); EXPECT_EQ(0u, walker->getThreadGroupIdStartingX()); EXPECT_EQ(0u, walker->getThreadGroupIdStartingY()); EXPECT_EQ(0u, walker->getThreadGroupIdStartingResumeZ()); uint32_t offsetCrossThreadDataProgrammed = walker->getIndirectDataStartAddress(); assert(offsetCrossThreadDataProgrammed % 64 == 0); size_t curbeSize = scheduler.getCurbeSize(); size_t offsetCrossThreadDataExpected = dshHeap->getMaxAvailableSpace() - curbeSize - 4096; // take additional page for padding into account EXPECT_EQ((uint32_t)offsetCrossThreadDataExpected, offsetCrossThreadDataProgrammed); EXPECT_EQ(62u, walker->getInterfaceDescriptorOffset()); auto numChannels = 3; auto grfSize = pDevice->getHardwareInfo().capabilityTable.grfSize; auto sizePerThreadDataTotal = PerThreadDataHelper::getPerThreadDataSizeTotal(scheduler.getKernelInfo().getMaxSimdSize(), grfSize, numChannels, scheduler.getLws()); auto sizeCrossThreadData = scheduler.getCrossThreadDataSize(); auto IndirectDataLength = alignUp((uint32_t)(sizeCrossThreadData + sizePerThreadDataTotal), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); EXPECT_EQ(IndirectDataLength, walker->getIndirectDataLength()); ASSERT_NE(hwParser.cmdList.end(), hwParser.itorBBStartAfterWalker); auto *bbStart = (MI_BATCH_BUFFER_START *)*hwParser.itorBBStartAfterWalker; uint64_t slbAddress = pDevQueueHw->getSlbBuffer()->getGpuAddress(); EXPECT_EQ(slbAddress, bbStart->getBatchBufferStartAddressGraphicsaddress472()); } } HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, dispatchSchedulerDoesNotUseStandardCmdQIOH) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD; using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; if (pClDevice->getSupportedClVersion() >= 20) { DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); SchedulerKernel &scheduler = context->getSchedulerKernel(); size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); // Setup heaps in pCmdQ MultiDispatchInfo multiDispatchinfo(&scheduler); getCommandStream(*pCmdQ, CsrDependencies(), false, false, false, multiDispatchinfo, nullptr, 0); pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, minRequiredSizeForSchedulerSSH); GpgpuWalkerHelper::dispatchScheduler( pCmdQ->getCS(0), *pDevQueueHw, pDevice->getPreemptionMode(), scheduler, &pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u), pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE), false); auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u); EXPECT_EQ(0u, ioh.getUsed()); } } HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, dispatchSchedulerWithEarlyReturnSetToFirstInstanceDoesNotPutBBStartCmd) { if (device->getSupportedClVersion() >= 20) { cl_queue_properties properties[3] = {0}; MockDeviceQueueHw mockDevQueue(context, device, properties[0]); auto *igilQueue = mockDevQueue.getIgilQueue(); ASSERT_NE(nullptr, igilQueue); igilQueue->m_controls.m_SchedulerEarlyReturn = 1; SchedulerKernel &scheduler = context->getSchedulerKernel(); size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper::getSizeRequiredSSH(scheduler); // Setup heaps in pCmdQ MultiDispatchInfo multiDispatchinfo(&scheduler); LinearStream &commandStream = getCommandStream(*pCmdQ, CsrDependencies(), false, false, false, multiDispatchinfo, nullptr, 0); pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, minRequiredSizeForSchedulerSSH); GpgpuWalkerHelper::dispatchScheduler( pCmdQ->getCS(0), mockDevQueue, device->getPreemptionMode(), scheduler, &pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u), mockDevQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE), false); HardwareParse hwParser; hwParser.parseCommands(commandStream, 0); hwParser.findHardwareCommands(); EXPECT_NE(hwParser.cmdList.end(), hwParser.itorWalker); EXPECT_EQ(hwParser.cmdList.end(), hwParser.itorBBStartAfterWalker); } } HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, ForceDispatchSchedulerEnqueuesSchedulerKernel) { if (pClDevice->getSupportedClVersion() >= 20) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceDispatchScheduler.set(true); size_t offset[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; MockCommandQueueHw *mockCmdQ = new MockCommandQueueHw(context, pClDevice, 0); mockCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr); EXPECT_TRUE(mockCmdQ->lastEnqueuedKernels.front()->isSchedulerKernel); delete mockCmdQ; } } submit_blocked_parent_kernel_tests.cpp000066400000000000000000000531761363734646600345770ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/execution_model/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/utilities/tag_allocator.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/command_queue/hardware_interface.h" #include "opencl/source/event/hw_timestamps.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/helpers/task_information.h" #include "opencl/test/unit_test/fixtures/execution_model_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_device_queue.h" #include using namespace NEO; class SubmitBlockedParentKernelFixture : public ExecutionModelSchedulerTest, public testing::Test { void SetUp() override { ExecutionModelSchedulerTest::SetUp(); } void TearDown() override { ExecutionModelSchedulerTest::TearDown(); } }; template class MockDeviceQueueHwWithCriticalSectionRelease : public DeviceQueueHw { using BaseClass = DeviceQueueHw; public: MockDeviceQueueHwWithCriticalSectionRelease(Context *context, ClDevice *device, cl_queue_properties &properties) : BaseClass(context, device, properties) {} bool isEMCriticalSectionFree() override { auto igilCmdQueue = reinterpret_cast(DeviceQueue::queueBuffer->getUnderlyingBuffer()); criticalSectioncheckCounter++; if (criticalSectioncheckCounter == maxCounter) { igilCmdQueue->m_controls.m_CriticalSection = DeviceQueueHw::ExecutionModelCriticalSection::Free; return true; } return igilCmdQueue->m_controls.m_CriticalSection == DeviceQueueHw::ExecutionModelCriticalSection::Free; } void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed) override { indirectStateSetup = true; return BaseClass::setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentIDCount, isCcsUsed); } void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode *hwTimeStamp, uint64_t tagAddress, uint32_t taskCount) override { cleanupSectionAdded = true; timestampAddedInCleanupSection = hwTimeStamp ? hwTimeStamp->tagForCpuAccess : nullptr; return BaseClass::addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, tagAddress, taskCount); } void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed) override { schedulerDispatched = true; return BaseClass::dispatchScheduler(commandStream, scheduler, preemptionMode, ssh, dsh, isCcsUsed); } uint32_t criticalSectioncheckCounter = 0; const uint32_t maxCounter = 10; bool indirectStateSetup = false; bool cleanupSectionAdded = false; bool schedulerDispatched = false; HwTimeStamps *timestampAddedInCleanupSection = nullptr; }; HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenLockedEMcritcalSectionWhenParentKernelCommandIsSubmittedThenItWaitsForcriticalSectionReleasement) { if (device->getSupportedClVersion() >= 20) { cl_queue_properties properties[3] = {0}; MockParentKernel *parentKernel = MockParentKernel::create(*context); MockDeviceQueueHwWithCriticalSectionRelease mockDevQueue(context, device, properties[0]); parentKernel->createReflectionSurface(); context->setDefaultDeviceQueue(&mockDevQueue); mockDevQueue.acquireEMCriticalSection(); size_t heapSize = 20; size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize(); IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh); pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh); pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh); dsh->getSpace(mockDevQueue.getDshOffset()); size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER}); auto blockedCommandData = std::make_unique(new LinearStream(cmdStreamAllocation), *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); blockedCommandData->setHeaps(dsh, ioh, ssh); blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM; PreemptionMode preemptionMode = device->getPreemptionMode(); std::vector surfaces; auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1); cmdComputeKernel->submit(0, false); EXPECT_EQ(mockDevQueue.maxCounter, mockDevQueue.criticalSectioncheckCounter); delete cmdComputeKernel; delete parentKernel; } } HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmittedThenPassedDshIsUsed) { if (device->getSupportedClVersion() >= 20) { cl_queue_properties properties[3] = {0}; MockParentKernel *parentKernel = MockParentKernel::create(*context); MockDeviceQueueHwWithCriticalSectionRelease mockDevQueue(context, device, properties[0]); parentKernel->createReflectionSurface(); context->setDefaultDeviceQueue(&mockDevQueue); auto *dshOfDevQueue = mockDevQueue.getIndirectHeap(IndirectHeap::DYNAMIC_STATE); size_t heapSize = 20; size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize(); IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh); pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh); pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh); // add initial offset of colorCalState dsh->getSpace(DeviceQueue::colorCalcStateSize); uint64_t ValueToFillDsh = 5; uint64_t *dshVal = (uint64_t *)dsh->getSpace(sizeof(uint64_t)); // Fill Interface Descriptor Data *dshVal = ValueToFillDsh; // Move to parent DSH Offset size_t alignToOffsetDshSize = mockDevQueue.getDshOffset() - DeviceQueue::colorCalcStateSize - sizeof(uint64_t); dsh->getSpace(alignToOffsetDshSize); // Fill with pattern dshVal = (uint64_t *)dsh->getSpace(sizeof(uint64_t)); *dshVal = ValueToFillDsh; size_t usedDSHBeforeSubmit = dshOfDevQueue->getUsed(); uint32_t colorCalcSizeDevQueue = DeviceQueue::colorCalcStateSize; EXPECT_EQ(colorCalcSizeDevQueue, usedDSHBeforeSubmit); auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER}); auto blockedCommandData = std::make_unique(new LinearStream(cmdStreamAllocation), *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); blockedCommandData->setHeaps(dsh, ioh, ssh); size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM; PreemptionMode preemptionMode = device->getPreemptionMode(); std::vector surfaces; auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1); cmdComputeKernel->submit(0, false); //device queue dsh is not changed size_t usedDSHAfterSubmit = dshOfDevQueue->getUsed(); EXPECT_EQ(usedDSHAfterSubmit, usedDSHAfterSubmit); delete cmdComputeKernel; delete parentKernel; } } HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmittedThenIndirectStateAndEMCleanupSectionIsSetup) { if (device->getSupportedClVersion() >= 20) { cl_queue_properties properties[3] = {0}; MockParentKernel *parentKernel = MockParentKernel::create(*context); MockDeviceQueueHwWithCriticalSectionRelease mockDevQueue(context, device, properties[0]); parentKernel->createReflectionSurface(); context->setDefaultDeviceQueue(&mockDevQueue); size_t heapSize = 20; size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize(); IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh); pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh); pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh); dsh->getSpace(mockDevQueue.getDshOffset()); auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER}); auto blockedCommandData = std::make_unique(new LinearStream(cmdStreamAllocation), *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); blockedCommandData->setHeaps(dsh, ioh, ssh); size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM; PreemptionMode preemptionMode = device->getPreemptionMode(); std::vector surfaces; auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1); cmdComputeKernel->submit(0, false); EXPECT_TRUE(mockDevQueue.indirectStateSetup); EXPECT_TRUE(mockDevQueue.cleanupSectionAdded); delete cmdComputeKernel; delete parentKernel; } } HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenBlockedParentKernelWithProfilingWhenCommandIsSubmittedThenEMCleanupSectionsSetsCompleteTimestamp) { if (device->getSupportedClVersion() >= 20) { cl_queue_properties properties[3] = {0}; MockParentKernel *parentKernel = MockParentKernel::create(*context); MockDeviceQueueHwWithCriticalSectionRelease mockDevQueue(context, device, properties[0]); parentKernel->createReflectionSurface(); context->setDefaultDeviceQueue(&mockDevQueue); size_t heapSize = 20; size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize(); IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh); pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh); pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh); dsh->getSpace(mockDevQueue.getDshOffset()); auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER}); auto blockedCommandData = std::make_unique(new LinearStream(cmdStreamAllocation), *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); blockedCommandData->setHeaps(dsh, ioh, ssh); size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM; PreemptionMode preemptionMode = device->getPreemptionMode(); std::vector surfaces; auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1); auto timestamp = pCmdQ->getGpgpuCommandStreamReceiver().getEventTsAllocator()->getTag(); cmdComputeKernel->timestamp = timestamp; cmdComputeKernel->submit(0, false); EXPECT_TRUE(mockDevQueue.cleanupSectionAdded); EXPECT_EQ(mockDevQueue.timestampAddedInCleanupSection, timestamp->tagForCpuAccess); delete cmdComputeKernel; delete parentKernel; } } HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWhenCommandIsSubmittedThenSchedulerIsDispatched) { if (device->getSupportedClVersion() >= 20) { cl_queue_properties properties[3] = {0}; MockParentKernel *parentKernel = MockParentKernel::create(*context); MockDeviceQueueHwWithCriticalSectionRelease mockDevQueue(context, device, properties[0]); parentKernel->createReflectionSurface(); context->setDefaultDeviceQueue(&mockDevQueue); size_t heapSize = 20; size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize(); IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh); pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh); pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh); dsh->getSpace(mockDevQueue.getDshOffset()); auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER}); auto blockedCommandData = std::make_unique(new LinearStream(cmdStreamAllocation), *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); blockedCommandData->setHeaps(dsh, ioh, ssh); size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM; PreemptionMode preemptionMode = device->getPreemptionMode(); std::vector surfaces; auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1); cmdComputeKernel->submit(0, false); EXPECT_TRUE(mockDevQueue.schedulerDispatched); delete cmdComputeKernel; delete parentKernel; } } HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenUsedCommandQueueHeapshenParentKernelIsSubmittedThenQueueHeapsAreNotUsed) { if (device->getSupportedClVersion() >= 20) { cl_queue_properties properties[3] = {0}; MockParentKernel *parentKernel = MockParentKernel::create(*context); MockDeviceQueueHw mockDevQueue(context, device, properties[0]); parentKernel->createReflectionSurface(); context->setDefaultDeviceQueue(&mockDevQueue); MockCommandQueue cmdQ(context, device, properties); size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); size_t heapSize = 20; size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize(); IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh); pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh); pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, heapSize, ssh); dsh->getSpace(mockDevQueue.getDshOffset()); auto &queueSsh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 100); auto &queueDsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 100); auto &queueIoh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 100); size_t usedSize = 4u; queueSsh.getSpace(usedSize); queueDsh.getSpace(usedSize); queueIoh.getSpace(usedSize); auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER}); auto blockedCommandData = std::make_unique(new LinearStream(cmdStreamAllocation), *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); blockedCommandData->setHeaps(dsh, ioh, ssh); blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM; PreemptionMode preemptionMode = device->getPreemptionMode(); std::vector surfaces; auto *cmdComputeKernel = new CommandComputeKernel(cmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1); cmdComputeKernel->submit(0, false); EXPECT_FALSE(cmdQ.releaseIndirectHeapCalled); EXPECT_EQ(usedSize, queueDsh.getUsed()); EXPECT_EQ(usedSize, queueIoh.getUsed()); EXPECT_EQ(usedSize, queueSsh.getUsed()); delete cmdComputeKernel; delete parentKernel; } } HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenNotUsedSSHWhenParentKernelIsSubmittedThenExistingSSHIsUsed) { if (device->getSupportedClVersion() >= 20) { cl_queue_properties properties[3] = {0}; MockParentKernel *parentKernel = MockParentKernel::create(*context); MockDeviceQueueHw mockDevQueue(context, device, properties[0]); parentKernel->createReflectionSurface(); context->setDefaultDeviceQueue(&mockDevQueue); size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); size_t heapSize = 20; size_t dshSize = mockDevQueue.getDshBuffer()->getUnderlyingBufferSize(); size_t sshSize = 1000; IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh); pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, heapSize, ioh); pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, sshSize, ssh); dsh->getSpace(mockDevQueue.getDshOffset()); EXPECT_EQ(0u, ssh->getUsed()); pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, sshSize); void *sshBuffer = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getCpuBase(); auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER}); auto blockedCommandData = std::make_unique(new LinearStream(cmdStreamAllocation), *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); blockedCommandData->setHeaps(dsh, ioh, ssh); blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM; PreemptionMode preemptionMode = device->getPreemptionMode(); std::vector surfaces; auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, blockedCommandData, surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1); cmdComputeKernel->submit(0, false); void *newSshBuffer = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getCpuBase(); EXPECT_EQ(sshBuffer, newSshBuffer); delete cmdComputeKernel; delete parentKernel; } } HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenBlockedCommandQueueWhenDispatchWalkerIsCalledThenHeapsHaveProperSizes) { if (device->getSupportedClVersion() >= 20) { cl_queue_properties properties[3] = {0}; std::unique_ptr parentKernel(MockParentKernel::create(*context)); MockDeviceQueueHw mockDevQueue(context, device, properties[0]); parentKernel->createReflectionSurface(); context->setDefaultDeviceQueue(&mockDevQueue); auto blockedCommandsData = createBlockedCommandsData(*pCmdQ); const size_t globalOffsets[3] = {0, 0, 0}; const size_t workItems[3] = {1, 1, 1}; DispatchInfo dispatchInfo(parentKernel.get(), 1, workItems, nullptr, globalOffsets); MultiDispatchInfo multiDispatchInfo(parentKernel.get()); multiDispatchInfo.push(dispatchInfo); HardwareInterface::dispatchWalker( *pCmdQ, multiDispatchInfo, CsrDependencies(), blockedCommandsData.get(), nullptr, nullptr, nullptr, nullptr, CL_COMMAND_NDRANGE_KERNEL); EXPECT_NE(nullptr, blockedCommandsData); EXPECT_EQ(blockedCommandsData->dsh->getMaxAvailableSpace(), mockDevQueue.getDshBuffer()->getUnderlyingBufferSize()); EXPECT_EQ(blockedCommandsData->dsh, blockedCommandsData->ioh); EXPECT_NE(nullptr, blockedCommandsData->dsh->getGraphicsAllocation()); EXPECT_NE(nullptr, blockedCommandsData->ioh->getGraphicsAllocation()); EXPECT_NE(nullptr, blockedCommandsData->ssh->getGraphicsAllocation()); EXPECT_EQ(blockedCommandsData->dsh->getGraphicsAllocation(), blockedCommandsData->ioh->getGraphicsAllocation()); } } compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/000077500000000000000000000000001363734646600235035ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/CMakeLists.txt000066400000000000000000000050441363734646600262460ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_fixtures ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/buffer_enqueue_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/context_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/context_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/d3d_test_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/dispatch_flags_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/device_host_queue_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device_host_queue_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/device_info_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/device_instrumentation_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device_instrumentation_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_handler_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/execution_model_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/execution_model_kernel_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/hello_world_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/hello_world_kernel_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/image_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel_data_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_data_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/media_kernel_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/memory_allocator_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/memory_allocator_multi_device_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/memory_manager_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory_manager_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_aub_center_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/multi_root_device_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/platform_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/platform_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/run_kernel_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/scenario_test_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/simple_arg_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/simple_arg_kernel_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/templated_fixture_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/two_walker_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/ult_command_stream_receiver_fixture.h ) get_property(NEO_CORE_tests_fixtures GLOBAL PROPERTY NEO_CORE_tests_fixtures) list(APPEND IGDRCL_SRCS_tests_fixtures ${NEO_CORE_tests_fixtures} ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_fixtures}) set_property(GLOBAL PROPERTY IGDRCL_SRCS_tests_fixtures ${IGDRCL_SRCS_tests_fixtures}) add_subdirectories() compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/aub_command_stream_receiver_fixture.h000066400000000000000000000012101363734646600331200ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/mock_aub_center_fixture.h" namespace NEO { struct AubCommandStreamReceiverFixture : public DeviceFixture, MockAubCenterFixture { void SetUp() { DeviceFixture::SetUp(); MockAubCenterFixture::SetUp(); setMockAubCenter(*pDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]); } void TearDown() { MockAubCenterFixture::TearDown(); DeviceFixture::TearDown(); } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/buffer_enqueue_fixture.h000066400000000000000000000055631363734646600304330ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/hw_info.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/helpers/execution_environment_helper.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "test.h" using namespace NEO; struct BufferEnqueueFixture : public HardwareParse, public ::testing::Test { BufferEnqueueFixture(void) : buffer(nullptr) { } void SetUp() override { executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); } void TearDown() override { buffer.reset(nullptr); } template void initializeFixture() { EnvironmentWithCsrWrapper environment; environment.setCsrType>(); memoryManager = new MockMemoryManager(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); device = std::make_unique(MockDevice::create(executionEnvironment, 0)); context = std::make_unique(device.get()); bufferMemory = std::make_unique(alignUp(bufferSizeInDwords, sizeof(uint32_t))); cl_int retVal = 0; buffer.reset(Buffer::create(context.get(), CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, bufferSizeInDwords, reinterpret_cast(bufferMemory.get()), retVal)); EXPECT_EQ(CL_SUCCESS, retVal); } protected: const size_t bufferSizeInDwords = 64; HardwareInfo hardwareInfo; HardwareInfo *hwInfo = nullptr; ExecutionEnvironment *executionEnvironment; cl_queue_properties properties = {}; std::unique_ptr bufferMemory; std::unique_ptr device; std::unique_ptr context; std::unique_ptr buffer; MockMemoryManager *memoryManager = nullptr; }; struct EnqueueReadWriteBufferRectDispatch : public BufferEnqueueFixture { void SetUp() override { BufferEnqueueFixture::SetUp(); } void TearDown() override { BufferEnqueueFixture::TearDown(); } uint32_t memory[64] = {0}; size_t bufferOrigin[3] = {0, 0, 0}; size_t hostOrigin[3] = {1, 1, 1}; size_t region[3] = {1, 2, 1}; size_t bufferRowPitch = 4; size_t bufferSlicePitch = bufferSizeInDwords; size_t hostRowPitch = 5; size_t hostSlicePitch = 15; }; compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/buffer_fixture.cpp000066400000000000000000000010701363734646600272240ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using NEO::Context; // clang-format off static char bufferMemory[] = { 0x00, 0x10, 0x20, 0x30, 0x01, 0x11, 0x21, 0x31, 0x02, 0x12, 0x22, 0x32, 0x03, 0x13, 0x23, 0x33, }; // clang-format on void *BufferDefaults::hostPtr = bufferMemory; const size_t BufferDefaults::sizeInBytes = sizeof(bufferMemory); Context *BufferDefaults::context = nullptr; compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/buffer_fixture.h000066400000000000000000000030311363734646600266700ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "CL/cl.h" #include #include struct BufferDefaults { enum { flags = CL_MEM_READ_WRITE }; static const size_t sizeInBytes; static void *hostPtr; static NEO::Context *context; }; template struct BufferUseHostPtr : public BaseClass { enum { flags = BaseClass::flags | CL_MEM_USE_HOST_PTR }; }; template struct BufferReadOnly : public BaseClass { enum { flags = BaseClass::flags | CL_MEM_READ_ONLY }; }; template struct BufferWriteOnly : public BaseClass { enum { flags = BaseClass::flags | CL_MEM_WRITE_ONLY }; }; template struct BufferHelper { using Buffer = NEO::Buffer; using Context = NEO::Context; using MockContext = NEO::MockContext; static Buffer *create(Context *context = Traits::context) { auto retVal = CL_SUCCESS; auto hostPtr = Traits::flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR) ? Traits::hostPtr : nullptr; auto buffer = Buffer::create( context ? context : std::shared_ptr(new MockContext).get(), Traits::flags, Traits::sizeInBytes, hostPtr, retVal); assert(buffer != nullptr); return buffer; } }; compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/built_in_fixture.cpp000066400000000000000000000022141363734646600275610ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/built_in_fixture.h" #include "shared/source/built_ins/built_ins.h" #include "shared/source/device/device.h" #include "opencl/test/unit_test/global_environment.h" #include "opencl/test/unit_test/helpers/kernel_binary_helper.h" #include "opencl/test/unit_test/helpers/test_files.h" using namespace NEO; void BuiltInFixture::SetUp(Device *pDevice) { // create an instance of the builtins pBuiltIns = pDevice->getBuiltIns(); pBuiltIns->setCacheingEnableState(false); // set mock compiler to return expected kernel... MockCompilerDebugVars fclDebugVars; MockCompilerDebugVars igcDebugVars; retrieveBinaryKernelFilename(fclDebugVars.fileName, KernelBinaryHelper::BUILT_INS + "_", ".bc"); retrieveBinaryKernelFilename(igcDebugVars.fileName, KernelBinaryHelper::BUILT_INS + "_", ".gen"); gEnvironment->fclPushDebugVars(fclDebugVars); gEnvironment->igcPushDebugVars(igcDebugVars); } void BuiltInFixture::TearDown() { gEnvironment->igcPopDebugVars(); gEnvironment->fclPopDebugVars(); } compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/built_in_fixture.h000066400000000000000000000004631363734646600272320ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once namespace NEO { class BuiltIns; class Device; } // namespace NEO class BuiltInFixture { public: void SetUp(NEO::Device *pDevice); void TearDown(); NEO::BuiltIns *pBuiltIns = nullptr; }; compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/context_fixture.cpp000066400000000000000000000013121363734646600274360ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" namespace NEO { void ContextFixture::SetUp(cl_uint numDevices, cl_device_id *pDeviceList) { auto retVal = CL_SUCCESS; pContext = Context::create(nullptr, ClDeviceVector(pDeviceList, numDevices), nullptr, nullptr, retVal); ASSERT_NE(nullptr, pContext); ASSERT_EQ(CL_SUCCESS, retVal); } void ContextFixture::TearDown() { if (pContext != nullptr) { pContext->release(); } } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/context_fixture.h000066400000000000000000000005441363734646600271110ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "CL/cl.h" namespace NEO { class MockContext; class ContextFixture { protected: virtual void SetUp(cl_uint numDevices, cl_device_id *pDeviceList); virtual void TearDown(); MockContext *pContext = nullptr; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/d3d_test_fixture.h000066400000000000000000000232531363734646600271400ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_d3d_objects.h" #include "opencl/test/unit_test/mocks/mock_gmm.h" #include "test.h" namespace NEO { template <> uint32_t MockD3DSharingFunctions::getDxgiDescCalled = 0; template <> uint32_t MockD3DSharingFunctions::getDxgiDescCalled = 0; template <> DXGI_ADAPTER_DESC MockD3DSharingFunctions::mockDxgiDesc = {{0}}; template <> DXGI_ADAPTER_DESC MockD3DSharingFunctions::mockDxgiDesc = {{0}}; template <> IDXGIAdapter *MockD3DSharingFunctions::getDxgiDescAdapterRequested = nullptr; template <> IDXGIAdapter *MockD3DSharingFunctions::getDxgiDescAdapterRequested = nullptr; template class D3DTests : public PlatformFixture, public ::testing::Test { public: typedef typename T::D3DDevice D3DDevice; typedef typename T::D3DQuery D3DQuery; typedef typename T::D3DQueryDesc D3DQueryDesc; typedef typename T::D3DResource D3DResource; typedef typename T::D3DBufferDesc D3DBufferDesc; typedef typename T::D3DBufferObj D3DBufferObj; typedef typename T::D3DTexture2dDesc D3DTexture2dDesc; typedef typename T::D3DTexture3dDesc D3DTexture3dDesc; typedef typename T::D3DTexture2d D3DTexture2d; typedef typename T::D3DTexture3d D3DTexture3d; class MockMM : public OsAgnosticMemoryManager { public: using OsAgnosticMemoryManager::OsAgnosticMemoryManager; GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness) override { auto alloc = OsAgnosticMemoryManager::createGraphicsAllocationFromSharedHandle(handle, properties, requireSpecificBitness); alloc->setDefaultGmm(forceGmm); gmmOwnershipPassed = true; return alloc; } GraphicsAllocation *createGraphicsAllocationFromNTHandle(void *handle, uint32_t rootDeviceIndex) override { AllocationProperties properties(rootDeviceIndex, true, 0, GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY, false, false, 0); auto alloc = OsAgnosticMemoryManager::createGraphicsAllocationFromSharedHandle(toOsHandle(handle), properties, false); alloc->setDefaultGmm(forceGmm); gmmOwnershipPassed = true; return alloc; } bool mapAuxGpuVA(GraphicsAllocation *graphicsAllocation) override { mapAuxGpuVACalled++; return mapAuxGpuVaRetValue; } Gmm *forceGmm = nullptr; bool gmmOwnershipPassed = false; uint32_t mapAuxGpuVACalled = 0u; bool mapAuxGpuVaRetValue = true; }; void setupMockGmm() { cl_image_desc imgDesc = {}; imgDesc.image_height = 4; imgDesc.image_width = 4; imgDesc.image_depth = 1; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); gmm = MockGmm::queryImgParams(pPlatform->peekExecutionEnvironment()->rootDeviceEnvironments[0]->getGmmClientContext(), imgInfo).release(); mockGmmResInfo = reinterpret_cast *>(gmm->gmmResourceInfo.get()); mockMM->forceGmm = gmm; } void SetUp() override { PlatformFixture::SetUp(); context = new MockContext(pPlatform->getClDevice(0)); context->preferD3dSharedResources = true; mockMM = std::make_unique(*context->getDevice(0)->getExecutionEnvironment()); mockSharingFcns = new NiceMock>(); context->setSharingFunctions(mockSharingFcns); context->memoryManager = mockMM.get(); cmdQ = new MockCommandQueue(context, context->getDevice(0), 0); DebugManager.injectFcn = &mockSharingFcns->mockGetDxgiDesc; mockSharingFcns->mockTexture2dDesc.ArraySize = 1; mockSharingFcns->mockTexture2dDesc.MipLevels = 4; mockSharingFcns->mockTexture3dDesc.MipLevels = 4; setupMockGmm(); if (context->getSharing>()) { ASSERT_EQ(0u, d3dMode); d3dMode = 10; } if (context->getSharing>()) { ASSERT_EQ(0u, d3dMode); d3dMode = 11; } ASSERT_NE(0u, d3dMode); } void TearDown() override { delete cmdQ; delete context; if (!mockMM->gmmOwnershipPassed) { delete gmm; } PlatformFixture::TearDown(); } cl_int pickParam(cl_int d3d10, cl_int d3d11) { if (d3dMode == 10u) { return d3d10; } if (d3dMode == 11u) { return d3d11; } EXPECT_TRUE(false); return 0; } cl_mem createFromD3DBufferApi(cl_context context, cl_mem_flags flags, ID3D10Buffer *resource, cl_int *errcodeRet) { return clCreateFromD3D10BufferKHR(context, flags, resource, errcodeRet); } cl_mem createFromD3DBufferApi(cl_context context, cl_mem_flags flags, ID3D11Buffer *resource, cl_int *errcodeRet) { return clCreateFromD3D11BufferKHR(context, flags, resource, errcodeRet); } cl_mem createFromD3DTexture2DApi(cl_context context, cl_mem_flags flags, ID3D10Texture2D *resource, UINT subresource, cl_int *errcodeRet) { return clCreateFromD3D10Texture2DKHR(context, flags, resource, subresource, errcodeRet); } cl_mem createFromD3DTexture2DApi(cl_context context, cl_mem_flags flags, ID3D11Texture2D *resource, UINT subresource, cl_int *errcodeRet) { return clCreateFromD3D11Texture2DKHR(context, flags, resource, subresource, errcodeRet); } cl_mem createFromD3DTexture3DApi(cl_context context, cl_mem_flags flags, ID3D10Texture3D *resource, UINT subresource, cl_int *errcodeRet) { return clCreateFromD3D10Texture3DKHR(context, flags, resource, subresource, errcodeRet); } cl_mem createFromD3DTexture3DApi(cl_context context, cl_mem_flags flags, ID3D11Texture3D *resource, UINT subresource, cl_int *errcodeRet) { return clCreateFromD3D11Texture3DKHR(context, flags, resource, subresource, errcodeRet); } cl_int enqueueAcquireD3DObjectsApi(MockD3DSharingFunctions *mockFcns, cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { return clEnqueueAcquireD3D10ObjectsKHR(commandQueue, numObjects, memObjects, numEventsInWaitList, eventWaitList, event); } cl_int enqueueAcquireD3DObjectsApi(MockD3DSharingFunctions *mockFcns, cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { return clEnqueueAcquireD3D11ObjectsKHR(commandQueue, numObjects, memObjects, numEventsInWaitList, eventWaitList, event); } cl_int enqueueReleaseD3DObjectsApi(MockD3DSharingFunctions *mockFcns, cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { return clEnqueueReleaseD3D10ObjectsKHR(commandQueue, numObjects, memObjects, numEventsInWaitList, eventWaitList, event); } cl_int enqueueReleaseD3DObjectsApi(MockD3DSharingFunctions *mockFcns, cl_command_queue commandQueue, cl_uint numObjects, const cl_mem *memObjects, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { return clEnqueueReleaseD3D11ObjectsKHR(commandQueue, numObjects, memObjects, numEventsInWaitList, eventWaitList, event); } cl_int getDeviceIDsFromD3DApi(MockD3DSharingFunctions *mockFcns, cl_platform_id platform, cl_d3d10_device_source_khr d3dDeviceSource, void *d3dObject, cl_d3d10_device_set_khr d3dDeviceSet, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) { return clGetDeviceIDsFromD3D10KHR(platform, d3dDeviceSource, d3dObject, d3dDeviceSet, numEntries, devices, numDevices); } cl_int getDeviceIDsFromD3DApi(MockD3DSharingFunctions *mockFcns, cl_platform_id platform, cl_d3d10_device_source_khr d3dDeviceSource, void *d3dObject, cl_d3d10_device_set_khr d3dDeviceSet, cl_uint numEntries, cl_device_id *devices, cl_uint *numDevices) { return clGetDeviceIDsFromD3D11KHR(platform, d3dDeviceSource, d3dObject, d3dDeviceSet, numEntries, devices, numDevices); } NiceMock> *mockSharingFcns; MockContext *context; MockCommandQueue *cmdQ; char dummyD3DBuffer; char dummyD3DBufferStaging; char dummyD3DTexture; char dummyD3DTextureStaging; Gmm *gmm = nullptr; NiceMock *mockGmmResInfo = nullptr; DebugManagerStateRestore dbgRestore; std::unique_ptr mockMM; uint8_t d3dMode = 0; }; typedef ::testing::Types D3DTypes; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/device_fixture.cpp000066400000000000000000000021531363734646600272150ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "gtest/gtest.h" namespace NEO { void DeviceFixture::SetUp() { hardwareInfo = *defaultHwInfo; SetUpImpl(&hardwareInfo); } void DeviceFixture::SetUpImpl(const NEO::HardwareInfo *hardwareInfo) { pDevice = MockDevice::createWithNewExecutionEnvironment(hardwareInfo); ASSERT_NE(nullptr, pDevice); pClDevice = new MockClDevice{pDevice}; ASSERT_NE(nullptr, pClDevice); auto &commandStreamReceiver = pDevice->getGpgpuCommandStreamReceiver(); pTagMemory = commandStreamReceiver.getTagAddress(); ASSERT_NE(nullptr, const_cast(pTagMemory)); } void DeviceFixture::TearDown() { delete pClDevice; pClDevice = nullptr; pDevice = nullptr; } MockDevice *DeviceFixture::createWithUsDeviceId(unsigned short usDeviceId) { hardwareInfo = *defaultHwInfo; hardwareInfo.platform.usDeviceID = usDeviceId; return MockDevice::createWithNewExecutionEnvironment(&hardwareInfo); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/device_fixture.h000066400000000000000000000012131363734646600266560ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_device.h" namespace NEO { struct HardwareInfo; struct DeviceFixture { void SetUp(); void SetUpImpl(const NEO::HardwareInfo *hardwareInfo); void TearDown(); MockDevice *createWithUsDeviceId(unsigned short usDeviceId); MockDevice *pDevice = nullptr; MockClDevice *pClDevice = nullptr; volatile uint32_t *pTagMemory = nullptr; HardwareInfo hardwareInfo = {}; PLATFORM platformHelper = {}; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/device_host_queue_fixture.cpp000066400000000000000000000120371363734646600314600ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" using namespace NEO; namespace DeviceHostQueue { cl_queue_properties deviceQueueProperties::minimumProperties[5] = { CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0, 0, 0}; cl_queue_properties deviceQueueProperties::minimumPropertiesWithProfiling[5] = { CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE, 0, 0, 0}; cl_queue_properties deviceQueueProperties::noProperties[5] = {0}; cl_queue_properties deviceQueueProperties::allProperties[5] = { CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE, CL_QUEUE_SIZE, 128 * 1024, 0}; template <> cl_command_queue DeviceHostQueueFixture::create(cl_context ctx, cl_device_id device, cl_int &retVal, cl_queue_properties properties[5]) { cl_queue_properties qProps[5]; memcpy(qProps, properties, 5 * sizeof(cl_queue_properties)); qProps[0] = CL_QUEUE_PROPERTIES; qProps[1] = qProps[1] | deviceQueueProperties::minimumProperties[1]; return clCreateCommandQueueWithProperties(ctx, device, qProps, &retVal); } template <> cl_command_queue DeviceHostQueueFixture::create(cl_context ctx, cl_device_id device, cl_int &retVal, cl_queue_properties properties[5]) { return clCreateCommandQueueWithProperties(ctx, device, properties, &retVal); } IGIL_CommandQueue getExpectedInitIgilCmdQueue(DeviceQueue *deviceQueue) { IGIL_CommandQueue igilCmdQueueInit; auto queueBuffer = deviceQueue->getQueueBuffer(); memset(&igilCmdQueueInit, 0, sizeof(IGIL_CommandQueue)); igilCmdQueueInit.m_head = IGIL_DEVICE_QUEUE_HEAD_INIT; igilCmdQueueInit.m_size = static_cast(queueBuffer->getUnderlyingBufferSize() - sizeof(IGIL_CommandQueue)); igilCmdQueueInit.m_magic = IGIL_MAGIC_NUMBER; igilCmdQueueInit.m_controls.m_SLBENDoffsetInBytes = -1; return igilCmdQueueInit; } IGIL_CommandQueue getExpectedgilCmdQueueAfterReset(DeviceQueue *deviceQueue) { auto queueBuffer = deviceQueue->getQueueBuffer(); auto stackBuffer = deviceQueue->getStackBuffer(); auto queueStorage = deviceQueue->getQueueStorageBuffer(); auto deviceQueueIgilCmdQueue = reinterpret_cast(queueBuffer->getUnderlyingBuffer()); IGIL_CommandQueue expectedIgilCmdQueue; memcpy(&expectedIgilCmdQueue, deviceQueueIgilCmdQueue, sizeof(IGIL_CommandQueue)); expectedIgilCmdQueue.m_head = IGIL_DEVICE_QUEUE_HEAD_INIT; expectedIgilCmdQueue.m_size = static_cast(queueBuffer->getUnderlyingBufferSize() - sizeof(IGIL_CommandQueue)); expectedIgilCmdQueue.m_magic = IGIL_MAGIC_NUMBER; expectedIgilCmdQueue.m_controls.m_SLBENDoffsetInBytes = -1; expectedIgilCmdQueue.m_controls.m_StackSize = static_cast((stackBuffer->getUnderlyingBufferSize() / sizeof(cl_uint)) - 1); expectedIgilCmdQueue.m_controls.m_StackTop = static_cast((stackBuffer->getUnderlyingBufferSize() / sizeof(cl_uint)) - 1); expectedIgilCmdQueue.m_controls.m_PreviousHead = IGIL_DEVICE_QUEUE_HEAD_INIT; expectedIgilCmdQueue.m_controls.m_IDTAfterFirstPhase = 1; expectedIgilCmdQueue.m_controls.m_CurrentIDToffset = 1; expectedIgilCmdQueue.m_controls.m_PreviousStorageTop = static_cast(queueStorage->getUnderlyingBufferSize()); expectedIgilCmdQueue.m_controls.m_PreviousStackTop = static_cast((stackBuffer->getUnderlyingBufferSize() / sizeof(cl_uint)) - 1); expectedIgilCmdQueue.m_controls.m_DebugNextBlockID = 0xFFFFFFFF; expectedIgilCmdQueue.m_controls.m_QstorageSize = static_cast(queueStorage->getUnderlyingBufferSize()); expectedIgilCmdQueue.m_controls.m_QstorageTop = static_cast(queueStorage->getUnderlyingBufferSize()); expectedIgilCmdQueue.m_controls.m_IsProfilingEnabled = static_cast(deviceQueue->isProfilingEnabled()); expectedIgilCmdQueue.m_controls.m_SLBENDoffsetInBytes = -1; expectedIgilCmdQueue.m_controls.m_IsSimulation = static_cast(deviceQueue->getDevice().isSimulation()); expectedIgilCmdQueue.m_controls.m_LastScheduleEventNumber = 0; expectedIgilCmdQueue.m_controls.m_PreviousNumberOfQueues = 0; expectedIgilCmdQueue.m_controls.m_EnqueueMarkerScheduled = 0; expectedIgilCmdQueue.m_controls.m_SecondLevelBatchOffset = 0; expectedIgilCmdQueue.m_controls.m_TotalNumberOfQueues = 0; expectedIgilCmdQueue.m_controls.m_EventTimestampAddress = 0; expectedIgilCmdQueue.m_controls.m_ErrorCode = 0; expectedIgilCmdQueue.m_controls.m_CurrentScheduleEventNumber = 0; expectedIgilCmdQueue.m_controls.m_DummyAtomicOperationPlaceholder = 0x00; expectedIgilCmdQueue.m_controls.m_DebugNextBlockGWS = 0; return expectedIgilCmdQueue; } } // namespace DeviceHostQueue compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/device_host_queue_fixture.h000066400000000000000000000061351363734646600311270ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/device_queue/device_queue.h" #include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/test/unit_test/api/cl_api_tests.h" #include "test.h" using namespace NEO; namespace DeviceHostQueue { struct deviceQueueProperties { static cl_queue_properties minimumProperties[5]; static cl_queue_properties minimumPropertiesWithProfiling[5]; static cl_queue_properties noProperties[5]; static cl_queue_properties allProperties[5]; }; IGIL_CommandQueue getExpectedInitIgilCmdQueue(DeviceQueue *deviceQueue); IGIL_CommandQueue getExpectedgilCmdQueueAfterReset(DeviceQueue *deviceQueue); template class DeviceHostQueueFixture : public ApiFixture<>, public ::testing::Test { public: void SetUp() override { ApiFixture::SetUp(); } void TearDown() override { ApiFixture::TearDown(); } cl_command_queue createClQueue(cl_queue_properties properties[5] = deviceQueueProperties::noProperties) { return create(pContext, devices[testedRootDeviceIndex], retVal, properties); } T *createQueueObject(cl_queue_properties properties[5] = deviceQueueProperties::noProperties) { using BaseType = typename T::BaseType; cl_context context = (cl_context)(pContext); auto clQueue = create(context, devices[testedRootDeviceIndex], retVal, properties); return castToObject(static_cast(clQueue)); } cl_command_queue create(cl_context ctx, cl_device_id device, cl_int &retVal, cl_queue_properties properties[5] = deviceQueueProperties::noProperties); }; class DeviceQueueHwTest : public DeviceHostQueueFixture { public: using BaseClass = DeviceHostQueueFixture; void SetUp() override { BaseClass::SetUp(); device = castToObject(devices[testedRootDeviceIndex]); ASSERT_NE(device, nullptr); if (!device->getHardwareInfo().capabilityTable.supportsDeviceEnqueue) { GTEST_SKIP(); } } void TearDown() override { BaseClass::TearDown(); } template DeviceQueueHw *castToHwType(DeviceQueue *deviceQueue) { return reinterpret_cast *>(deviceQueue); } template size_t getMinimumSlbSize() { return sizeof(typename GfxFamily::MEDIA_STATE_FLUSH) + sizeof(typename GfxFamily::MEDIA_INTERFACE_DESCRIPTOR_LOAD) + sizeof(typename GfxFamily::PIPE_CONTROL) + sizeof(typename GfxFamily::GPGPU_WALKER) + sizeof(typename GfxFamily::MEDIA_STATE_FLUSH) + sizeof(typename GfxFamily::PIPE_CONTROL) + DeviceQueueHw::getCSPrefetchSize(); // prefetch size } DeviceQueue *deviceQueue; ClDevice *device; }; } // namespace DeviceHostQueue compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/device_info_fixture.h000066400000000000000000000024511363734646600276760ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "gtest/gtest.h" namespace NEO { struct GetDeviceInfoMemCapabilitiesTest : ::testing::Test { struct TestParams { cl_uint paramName; cl_unified_shared_memory_capabilities_intel expectedCapabilities; }; void check(std::vector ¶ms) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); for (auto param : params) { cl_unified_shared_memory_capabilities_intel unifiedSharedMemoryCapabilities{}; size_t paramRetSize; const auto retVal = device->getDeviceInfo(param.paramName, sizeof(cl_unified_shared_memory_capabilities_intel), &unifiedSharedMemoryCapabilities, ¶mRetSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(param.expectedCapabilities, unifiedSharedMemoryCapabilities); EXPECT_EQ(sizeof(cl_unified_shared_memory_capabilities_intel), paramRetSize); } } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/device_instrumentation_fixture.cpp000066400000000000000000000013741363734646600325440ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_instrumentation_fixture.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/test/unit_test/helpers/execution_environment_helper.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_platform.h" namespace NEO { void DeviceInstrumentationFixture::SetUp(bool instrumentation) { ExecutionEnvironment *executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); hwInfo->capabilityTable.instrumentationEnabled = instrumentation; device = std::make_unique(*Device::create(executionEnvironment, 0), platform()); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/device_instrumentation_fixture.h000066400000000000000000000005521363734646600322060ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include namespace NEO { class ClDevice; class Device; struct HardwareInfo; struct DeviceInstrumentationFixture { void SetUp(bool instrumentation); std::unique_ptr device = nullptr; HardwareInfo *hwInfo = nullptr; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/dispatch_flags_fixture.h000066400000000000000000000020641363734646600303770ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/os_context.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" namespace NEO { struct DispatchFlagsTests : public ::testing::Test { template void SetUpImpl() { environmentWrapper.setCsrType(); device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context = std::make_unique(device.get()); } EnvironmentWithCsrWrapper environmentWrapper; std::unique_ptr device; std::unique_ptr context; DebugManagerStateRestore restore; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/enqueue_handler_fixture.h000066400000000000000000000011511363734646600305640ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" class EnqueueHandlerTest : public NEO::DeviceFixture, public testing::Test { public: void SetUp() override { DeviceFixture::SetUp(); context = new NEO::MockContext(pClDevice); } void TearDown() override { context->decRefInternal(); DeviceFixture::TearDown(); } NEO::MockContext *context; }; compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/execution_model_fixture.h000066400000000000000000000075041363734646600306130ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/device_queue/device_queue.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/execution_model_kernel_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" class DeviceQueueFixture { public: void SetUp(Context *context, ClDevice *device) { cl_int errcodeRet = 0; cl_queue_properties properties[3]; properties[0] = CL_QUEUE_PROPERTIES; properties[1] = CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; properties[2] = 0; ASSERT_NE(nullptr, context); ASSERT_NE(nullptr, device); pDevQueue = DeviceQueue::create(context, device, properties[0], errcodeRet); ASSERT_NE(nullptr, pDevQueue); auto devQueue = context->getDefaultDeviceQueue(); ASSERT_NE(nullptr, devQueue); EXPECT_EQ(pDevQueue, devQueue); } void TearDown() { delete pDevQueue; } DeviceQueue *pDevQueue = nullptr; }; class ExecutionModelKernelTest : public ExecutionModelKernelFixture, public CommandQueueHwFixture, public DeviceQueueFixture { public: void SetUp() override { DebugManager.flags.EnableTimestampPacket.set(0); ExecutionModelKernelFixture::SetUp(); CommandQueueHwFixture::SetUp(pClDevice, 0); DeviceQueueFixture::SetUp(context, pClDevice); } void TearDown() override { DeviceQueueFixture::TearDown(); CommandQueueHwFixture::TearDown(); ExecutionModelKernelFixture::TearDown(); } std::unique_ptr createBlockedCommandsData(CommandQueue &commandQueue) { auto commandStream = new LinearStream(); auto &gpgpuCsr = commandQueue.getGpgpuCommandStreamReceiver(); gpgpuCsr.ensureCommandBufferAllocation(*commandStream, 1, 1); return std::make_unique(commandStream, *gpgpuCsr.getInternalAllocationStorage()); } DebugManagerStateRestore dbgRestore; }; class ExecutionModelSchedulerTest : public DeviceFixture, public CommandQueueHwFixture, public DeviceQueueFixture { public: void SetUp() override { DeviceFixture::SetUp(); CommandQueueHwFixture::SetUp(pClDevice, 0); DeviceQueueFixture::SetUp(context, pClDevice); parentKernel = MockParentKernel::create(*context); ASSERT_NE(nullptr, parentKernel); } void TearDown() override { parentKernel->release(); DeviceQueueFixture::TearDown(); CommandQueueHwFixture::TearDown(); DeviceFixture::TearDown(); } MockParentKernel *parentKernel = nullptr; }; struct ParentKernelCommandQueueFixture : public CommandQueueHwFixture, testing::Test { void SetUp() override { device = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}; CommandQueueHwFixture::SetUp(device, 0); } void TearDown() override { CommandQueueHwFixture::TearDown(); delete device; } std::unique_ptr createBlockedCommandsData(CommandQueue &commandQueue) { auto commandStream = new LinearStream(); auto &gpgpuCsr = commandQueue.getGpgpuCommandStreamReceiver(); gpgpuCsr.ensureCommandBufferAllocation(*commandStream, 1, 1); return std::make_unique(commandStream, *gpgpuCsr.getInternalAllocationStorage()); } MockClDevice *device = nullptr; }; compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/execution_model_kernel_fixture.h000066400000000000000000000043301363734646600321450ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/program/program_from_binary.h" #include "test.h" using namespace NEO; class ExecutionModelKernelFixture : public ProgramFromBinaryTest, public PlatformFixture { protected: void SetUp() override { PlatformFixture::SetUp(); std::string temp; temp.assign(pPlatform->getClDevice(0)->getDeviceInfo().clVersion); if (temp.find("OpenCL 1.2") != std::string::npos) { pDevice = MockDevice::createWithNewExecutionEnvironment(nullptr); pClDevice = new MockClDevice{pDevice}; return; } std::string options("-cl-std=CL2.0"); this->setOptions(options); ProgramFromBinaryTest::SetUp(); ASSERT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); cl_device_id device = pClDevice; retVal = pProgram->build( 1, &device, nullptr, nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); // create a kernel pKernel = Kernel::create( pProgram, *pProgram->getKernelInfo(KernelName), &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pKernel); } void TearDown() override { if (pKernel != nullptr) { pKernel->release(); } std::string temp; temp.assign(pPlatform->getClDevice(0)->getDeviceInfo().clVersion); ProgramFromBinaryTest::TearDown(); PlatformFixture::TearDown(); if (temp.find("OpenCL 1.2") != std::string::npos) { if (pDevice != nullptr) { delete pDevice; pDevice = nullptr; } if (pClDevice != nullptr) { delete pClDevice; pClDevice = nullptr; } } } Kernel *pKernel = nullptr; cl_int retVal = CL_SUCCESS; }; compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/hello_world_fixture.h000066400000000000000000000113201363734646600277310ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_stream/command_stream_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_kernel_fixture.h" #include "opencl/test/unit_test/gen_common/gen_cmd_parse.h" #include "opencl/test/unit_test/indirect_heap/indirect_heap_fixture.h" #include "test.h" namespace NEO { // Factory used to pick various ingredients for use in aggregate tests struct HelloWorldFixtureFactory { typedef NEO::IndirectHeapFixture IndirectHeapFixture; typedef NEO::CommandStreamFixture CommandStreamFixture; typedef NEO::CommandQueueHwFixture CommandQueueFixture; typedef NEO::HelloWorldKernelFixture KernelFixture; }; // Instantiates a fixture based on the supplied fixture factory. // Used by most tests for integration testing with command queues. template struct HelloWorldFixture : public FixtureFactory::IndirectHeapFixture, public FixtureFactory::CommandStreamFixture, public FixtureFactory::CommandQueueFixture, public FixtureFactory::KernelFixture, public DeviceFixture { typedef typename FixtureFactory::IndirectHeapFixture IndirectHeapFixture; typedef typename FixtureFactory::CommandStreamFixture CommandStreamFixture; typedef typename FixtureFactory::CommandQueueFixture CommandQueueFixture; typedef typename FixtureFactory::KernelFixture KernelFixture; using CommandQueueFixture::pCmdQ; using CommandQueueFixture::SetUp; using CommandStreamFixture::pCS; using CommandStreamFixture::SetUp; using HelloWorldKernelFixture::SetUp; using IndirectHeapFixture::SetUp; using KernelFixture::pKernel; public: void SetUp() override { DeviceFixture::SetUp(); ASSERT_NE(nullptr, pClDevice); CommandQueueFixture::SetUp(pClDevice, 0); ASSERT_NE(nullptr, pCmdQ); CommandStreamFixture::SetUp(pCmdQ); ASSERT_NE(nullptr, pCS); IndirectHeapFixture::SetUp(pCmdQ); KernelFixture::SetUp(pClDevice, kernelFilename, kernelName); ASSERT_NE(nullptr, pKernel); auto retVal = CL_INVALID_VALUE; BufferDefaults::context = new MockContext(pClDevice); destBuffer = Buffer::create( BufferDefaults::context, CL_MEM_READ_WRITE, sizeUserMemory, nullptr, retVal); srcBuffer = Buffer::create( BufferDefaults::context, CL_MEM_READ_WRITE, sizeUserMemory, nullptr, retVal); pDestMemory = destBuffer->getCpuAddressForMapping(); pSrcMemory = srcBuffer->getCpuAddressForMapping(); memset(pDestMemory, destPattern, sizeUserMemory); memset(pSrcMemory, srcPattern, sizeUserMemory); pKernel->setArg(0, srcBuffer); pKernel->setArg(1, destBuffer); } void TearDown() override { pCmdQ->flush(); srcBuffer->release(); destBuffer->release(); KernelFixture::TearDown(); IndirectHeapFixture::TearDown(); CommandStreamFixture::TearDown(); CommandQueueFixture::TearDown(); BufferDefaults::context->release(); DeviceFixture::TearDown(); } Buffer *srcBuffer = nullptr; Buffer *destBuffer = nullptr; void *pSrcMemory = nullptr; void *pDestMemory = nullptr; size_t sizeUserMemory = 128 * sizeof(float); const char *kernelFilename = "CopyBuffer_simd"; const char *kernelName = "CopyBuffer"; const int srcPattern = 85; const int destPattern = 170; cl_int callOneWorkItemNDRKernel(cl_event *eventWaitList = nullptr, cl_int waitListSize = 0, cl_event *returnEvent = nullptr) { cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; return pCmdQ->enqueueKernel( pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, waitListSize, eventWaitList, returnEvent); } }; template struct HelloWorldTest : Test> { }; template struct HelloWorldTestWithParam : HelloWorldFixture { }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/hello_world_kernel_fixture.h000066400000000000000000000060061363734646600312760ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/device/device.h" #include "shared/source/helpers/file_io.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/platform/platform.h" #include "opencl/source/program/program.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/program_fixture.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "CL/cl.h" #include "gtest/gtest.h" namespace NEO { class Kernel; class Program; struct HelloWorldKernelFixture : public ProgramFixture { using ProgramFixture::SetUp; void SetUp(ClDevice *pDevice, const char *kernelFilenameStr, const char *kernelNameStr) { SetUp(pDevice, kernelFilenameStr, kernelNameStr, nullptr); } void SetUp(ClDevice *pDevice, const char *kernelFilenameStr, const char *kernelNameStr, const char *options) { ProgramFixture::SetUp(); pTestFilename = new std::string(kernelFilenameStr); pKernelName = new std::string(kernelNameStr); if (strstr(kernelFilenameStr, "_simd") != nullptr) { pTestFilename->append(std::to_string(simd)); } cl_device_id device = pDevice; pContext = Context::create(nullptr, ClDeviceVector(&device, 1), nullptr, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pContext); if (options) { std::string optionsToProgram(options); if (optionsToProgram.find("-cl-std=CL2.0") != std::string::npos) { ASSERT_TRUE(pDevice->getSupportedClVersion() >= 20u); } CreateProgramFromBinary( pContext, &device, *pTestFilename, optionsToProgram); } else { CreateProgramFromBinary( pContext, &device, *pTestFilename); } ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( 1, &device, nullptr, nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); // create a kernel pKernel = Kernel::create( pProgram, *pProgram->getKernelInfo(pKernelName->c_str()), &retVal); EXPECT_NE(nullptr, pKernel); EXPECT_EQ(CL_SUCCESS, retVal); } void TearDown() override { delete pKernelName; delete pTestFilename; pKernel->release(); pContext->release(); ProgramFixture::TearDown(); } std::string *pTestFilename = nullptr; std::string *pKernelName = nullptr; cl_uint simd = 32; cl_int retVal = CL_SUCCESS; Kernel *pKernel = nullptr; MockContext *pContext = nullptr; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/image_fixture.cpp000066400000000000000000000031021363734646600270330ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" using NEO::MockContext; static const size_t imageWidth = 7; static const size_t imageHeight = 9; static const size_t imageDepth = 11; static const size_t imageArray = imageDepth; const cl_image_format Image1dDefaults::imageFormat = { CL_R, CL_FLOAT}; const cl_image_format LuminanceImage::imageFormat = { CL_LUMINANCE, CL_FLOAT}; const cl_image_desc Image1dDefaults::imageDesc = { CL_MEM_OBJECT_IMAGE1D, imageWidth, 1, 1, 1, 0, 0, 0, 0, {nullptr}}; const cl_image_desc Image2dDefaults::imageDesc = { CL_MEM_OBJECT_IMAGE2D, imageWidth, imageHeight, 1, 1, 0, 0, 0, 0, {nullptr}}; const cl_image_desc Image3dDefaults::imageDesc = { CL_MEM_OBJECT_IMAGE3D, imageWidth, imageHeight, imageDepth, 1, 0, 0, 0, 0, {nullptr}}; const cl_image_desc Image2dArrayDefaults::imageDesc = { CL_MEM_OBJECT_IMAGE2D_ARRAY, imageWidth, imageHeight, 0, imageArray, 0, 0, 0, 0, {nullptr}}; const cl_image_desc Image1dArrayDefaults::imageDesc = { CL_MEM_OBJECT_IMAGE1D_ARRAY, imageWidth, 0, 0, imageArray, 0, 0, 0, 0, {nullptr}}; static float imageMemory[imageWidth * imageHeight * imageDepth] = {}; void *Image1dDefaults::hostPtr = imageMemory; NEO::Context *Image1dDefaults::context = nullptr; compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/image_fixture.h000066400000000000000000000103671363734646600265130ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/hw_info.h" #include "shared/test/unit_test/helpers/default_hw_info.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "test.h" #include "CL/cl.h" #include #include struct Image1dDefaults { enum { flags = 0 }; static const cl_image_format imageFormat; static const cl_image_desc imageDesc; static void *hostPtr; static NEO::Context *context; }; struct Image2dDefaults : public Image1dDefaults { static const cl_image_desc imageDesc; }; struct Image3dDefaults : public Image2dDefaults { static const cl_image_desc imageDesc; }; struct Image2dArrayDefaults : public Image2dDefaults { static const cl_image_desc imageDesc; }; struct Image1dArrayDefaults : public Image2dDefaults { static const cl_image_desc imageDesc; }; struct LuminanceImage : public Image2dDefaults { static const cl_image_format imageFormat; }; template struct ImageUseHostPtr : public BaseClass { enum { flags = BaseClass::flags | CL_MEM_USE_HOST_PTR }; }; template struct ImageReadOnly : public BaseClass { enum { flags = BaseClass::flags | CL_MEM_READ_ONLY }; }; template struct ImageWriteOnly : public BaseClass { enum { flags = BaseClass::flags | CL_MEM_WRITE_ONLY }; }; template struct ImageHelper { using Context = NEO::Context; using Image = NEO::Image; using MockContext = NEO::MockContext; static Image *create(Context *context = Traits::context, const cl_image_desc *imgDesc = &Traits::imageDesc, const cl_image_format *imgFormat = &Traits::imageFormat) { auto retVal = CL_INVALID_VALUE; auto surfaceFormat = Image::getSurfaceFormatFromTable(Traits::flags, imgFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); auto image = Image::create( context, NEO::MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(Traits::flags, 0, 0), Traits::flags, 0, surfaceFormat, imgDesc, Traits::hostPtr, retVal); assert(image != nullptr); return image; } }; template struct Image1dHelper : public ImageHelper { }; template struct Image2dHelper : public ImageHelper { }; template struct Image3dHelper : public ImageHelper { }; template struct Image2dArrayHelper : public ImageHelper { }; template struct Image1dArrayHelper : public ImageHelper { }; struct ImageClearColorFixture : ::testing::Test { using MockContext = NEO::MockContext; using Image = NEO::Image; template void setUpImpl() { hardwareInfo.capabilityTable.ftrRenderCompressedImages = true; NEO::platformsImpl.clear(); NEO::constructPlatform()->peekExecutionEnvironment()->prepareRootDeviceEnvironments(1u); NEO::platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->setHwInfo(&hardwareInfo); NEO::platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->initGmm(); } template typename FamilyType::RENDER_SURFACE_STATE getSurfaceState() { using AUXILIARY_SURFACE_MODE = typename FamilyType::RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; auto surfaceState = FamilyType::cmdInitRenderSurfaceState; surfaceState.setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E); return surfaceState; } NEO::HardwareInfo hardwareInfo = *NEO::defaultHwInfo; MockContext context; std::unique_ptr image; }; compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/kernel_arg_fixture.cpp000066400000000000000000000070361363734646600300740ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/kernel_arg_fixture.h" #include "opencl/source/program/kernel_info.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_image.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" using namespace NEO; KernelImageArgTest::~KernelImageArgTest() = default; void KernelImageArgTest::SetUp() { pKernelInfo = std::make_unique(); KernelArgPatchInfo kernelArgPatchInfo; kernelHeader.reset(new iOpenCL::SKernelBinaryHeaderCommon{}); kernelHeader->SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = kernelHeader.get(); pKernelInfo->usesSsh = true; pKernelInfo->kernelArgInfo.resize(5); pKernelInfo->kernelArgInfo[4].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[2].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[1].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[0].offsetImgWidth = 0x4; pKernelInfo->kernelArgInfo[0].offsetFlatBaseOffset = 0x8; pKernelInfo->kernelArgInfo[0].offsetFlatWidth = 0x10; pKernelInfo->kernelArgInfo[0].offsetFlatHeight = 0x18; pKernelInfo->kernelArgInfo[0].offsetFlatPitch = 0x24; pKernelInfo->kernelArgInfo[0].offsetNumSamples = 0x3c; offsetNumMipLevelsImage0 = 0x40; pKernelInfo->kernelArgInfo[0].offsetNumMipLevels = offsetNumMipLevelsImage0; pKernelInfo->kernelArgInfo[1].offsetImgHeight = 0xc; pKernelInfo->kernelArgInfo[2].kernelArgPatchInfoVector[0].crossthreadOffset = 0x20; pKernelInfo->kernelArgInfo[2].kernelArgPatchInfoVector[0].size = sizeof(void *); pKernelInfo->kernelArgInfo[3].offsetImgDepth = 0x30; pKernelInfo->kernelArgInfo[4].offsetHeap = 0x20; pKernelInfo->kernelArgInfo[4].offsetObjectId = 0x0; pKernelInfo->kernelArgInfo[4].isImage = true; pKernelInfo->kernelArgInfo[3].isImage = true; pKernelInfo->kernelArgInfo[2].isImage = true; pKernelInfo->kernelArgInfo[1].isImage = true; pKernelInfo->kernelArgInfo[0].isImage = true; DeviceFixture::SetUp(); program = std::make_unique(*pDevice->getExecutionEnvironment()); pKernel.reset(new MockKernel(program.get(), *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setKernelArgHandler(0, &Kernel::setArgImage); pKernel->setKernelArgHandler(1, &Kernel::setArgImage); pKernel->setKernelArgHandler(2, &Kernel::setArgImmediate); pKernel->setKernelArgHandler(3, &Kernel::setArgImage); pKernel->setKernelArgHandler(4, &Kernel::setArgImage); uint32_t crossThreadData[0x44] = {}; crossThreadData[0x20 / sizeof(uint32_t)] = 0x12344321; pKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData)); context.reset(new MockContext(pClDevice)); image.reset(Image2dHelper<>::create(context.get())); pKernel->setContext(context.get()); ASSERT_NE(nullptr, image); } void KernelImageArgTest::TearDown() { image.reset(); pKernel.reset(); program.reset(); context.reset(); DeviceFixture::TearDown(); } compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/kernel_arg_fixture.h000066400000000000000000000017511363734646600275370ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" #include namespace NEO { class MockContext; class MockKernel; class MockProgram; class Image; struct KernelInfo; } // namespace NEO namespace iOpenCL { struct SKernelBinaryHeaderCommon; } class KernelImageArgTest : public Test { public: KernelImageArgTest() { } ~KernelImageArgTest() override; protected: void SetUp() override; void TearDown() override; cl_int retVal = 0; std::unique_ptr kernelHeader; std::unique_ptr context; std::unique_ptr program; std::unique_ptr pKernelInfo; std::unique_ptr pKernel; std::unique_ptr image; char surfaceStateHeap[0x80]; uint32_t offsetNumMipLevelsImage0 = -1; }; compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/kernel_data_fixture.cpp000066400000000000000000000116341363734646600302330ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/kernel_data_fixture.h" #include "shared/source/device_binary_format/patchtokens_decoder.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/program/program_info_from_patchtokens.h" void KernelDataTest::buildAndDecode() { cl_int error = CL_SUCCESS; kernelBinaryHeader.CheckSum = checkSum; kernelBinaryHeader.DynamicStateHeapSize = dshSize; kernelBinaryHeader.GeneralStateHeapSize = gshSize; kernelBinaryHeader.KernelHeapSize = kernelHeapSize; kernelBinaryHeader.KernelNameSize = kernelNameSize; kernelBinaryHeader.KernelUnpaddedSize = kernelUnpaddedSize; kernelBinaryHeader.PatchListSize = patchListSize + sizeof(SPatchDataParameterStream); kernelBinaryHeader.ShaderHashCode = shaderHashCode; kernelBinaryHeader.SurfaceStateHeapSize = sshSize; kernelDataSize = sizeof(SKernelBinaryHeaderCommon) + kernelNameSize + sshSize + dshSize + gshSize + kernelHeapSize + patchListSize; kernelDataSize += sizeof(SPatchDataParameterStream); pKernelData = static_cast(alignedMalloc(kernelDataSize, MemoryConstants::cacheLineSize)); ASSERT_NE(nullptr, pKernelData); // kernel blob pCurPtr = pKernelData; // kernel header // first clear it because sizeof() > sum of sizeof(fields). this is due to packing memset(pCurPtr, 0, sizeof(SKernelBinaryHeaderCommon)); *(SKernelBinaryHeaderCommon *)pCurPtr = kernelBinaryHeader; pCurPtr += sizeof(SKernelBinaryHeaderCommon); // kernel name memset(pCurPtr, 0, kernelNameSize); strcpy_s(pCurPtr, strlen(kernelName.c_str()) + 1, kernelName.c_str()); pCurPtr += kernelNameSize; // kernel heap memcpy_s(pCurPtr, kernelHeapSize, pKernelHeap, kernelHeapSize); pCurPtr += kernelHeapSize; // general state heap memcpy_s(pCurPtr, gshSize, pGsh, gshSize); pCurPtr += gshSize; // dynamic state heap memcpy_s(pCurPtr, dshSize, pDsh, dshSize); pCurPtr += dshSize; // surface state heap memcpy_s(pCurPtr, sshSize, pSsh, sshSize); pCurPtr += sshSize; // patch list memcpy_s(pCurPtr, patchListSize, pPatchList, patchListSize); pCurPtr += patchListSize; // add a data stream member iOpenCL::SPatchDataParameterStream dataParameterStream; dataParameterStream.Token = PATCH_TOKEN_DATA_PARAMETER_STREAM; dataParameterStream.Size = sizeof(SPatchDataParameterStream); dataParameterStream.DataParameterStreamSize = 0x40; memcpy_s(pCurPtr, sizeof(SPatchDataParameterStream), &dataParameterStream, sizeof(SPatchDataParameterStream)); pCurPtr += sizeof(SPatchDataParameterStream); // now build a program with this kernel data iOpenCL::SProgramBinaryHeader header = {}; NEO::PatchTokenBinary::ProgramFromPatchtokens programFromPatchtokens; programFromPatchtokens.decodeStatus = DecodeError::Success; programFromPatchtokens.header = &header; programFromPatchtokens.kernels.resize(1); auto &kernelFromPatchtokens = *programFromPatchtokens.kernels.rbegin(); auto kernelBlob = ArrayRef(reinterpret_cast(pKernelData), kernelDataSize); bool decodeSuccess = NEO::PatchTokenBinary::decodeKernelFromPatchtokensBlob(kernelBlob, kernelFromPatchtokens); EXPECT_TRUE(decodeSuccess); ProgramInfo programInfo; NEO::populateProgramInfo(programInfo, programFromPatchtokens); error = program->processProgramInfo(programInfo); EXPECT_EQ(CL_SUCCESS, error); // extract the kernel info pKernelInfo = program->Program::getKernelInfo(kernelName.c_str()); // validate header EXPECT_EQ(0, memcmp(pKernelInfo->heapInfo.pKernelHeader, &kernelBinaryHeader, sizeof(SKernelBinaryHeaderCommon))); // validate name EXPECT_STREQ(pKernelInfo->name.c_str(), kernelName.c_str()); // validate each heap if (pKernelHeap != nullptr) { EXPECT_EQ(0, memcmp(pKernelInfo->heapInfo.pKernelHeap, pKernelHeap, kernelHeapSize)); } if (pGsh != nullptr) { EXPECT_EQ(0, memcmp(pKernelInfo->heapInfo.pGsh, pGsh, gshSize)); } if (pDsh != nullptr) { EXPECT_EQ(0, memcmp(pKernelInfo->heapInfo.pDsh, pDsh, dshSize)); } if (pSsh != nullptr) { EXPECT_EQ(0, memcmp(pKernelInfo->heapInfo.pSsh, pSsh, sshSize)); } if (kernelHeapSize) { auto kernelAllocation = pKernelInfo->getGraphicsAllocation(); UNRECOVERABLE_IF(kernelAllocation == nullptr); EXPECT_EQ(kernelAllocation->getUnderlyingBufferSize(), kernelHeapSize); auto kernelIsa = kernelAllocation->getUnderlyingBuffer(); EXPECT_EQ(0, memcmp(kernelIsa, pKernelInfo->heapInfo.pKernelHeap, kernelHeapSize)); } else { EXPECT_EQ(nullptr, pKernelInfo->getGraphicsAllocation()); } } compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/kernel_data_fixture.h000066400000000000000000000045761363734646600277070ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/memory_manager.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/program/kernel_info.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "gtest/gtest.h" using namespace NEO; using namespace iOpenCL; class KernelDataTest : public testing::Test { public: KernelDataTest() { memset(&kernelBinaryHeader, 0x00, sizeof(SKernelBinaryHeaderCommon)); pCurPtr = nullptr; pKernelData = nullptr; kernelName = "test"; pDsh = nullptr; pGsh = nullptr; pKernelHeap = nullptr; pSsh = nullptr; pPatchList = nullptr; kernelDataSize = 0; kernelNameSize = (uint32_t)alignUp(strlen(kernelName.c_str()) + 1, sizeof(uint32_t)); dshSize = 0; gshSize = 0; kernelHeapSize = 0; sshSize = 0; patchListSize = 0; checkSum = 0; shaderHashCode = 0; kernelUnpaddedSize = 0; pKernelInfo = nullptr; } void buildAndDecode(); protected: void SetUp() override { kernelBinaryHeader.KernelNameSize = kernelNameSize; pContext = new MockContext; program = std::make_unique(*pContext->getDevice(0)->getExecutionEnvironment(), pContext, false, nullptr); } void TearDown() override { if (pKernelInfo->kernelAllocation) { pContext->getDevice(0)->getMemoryManager()->freeGraphicsMemory(pKernelInfo->kernelAllocation); const_cast(pKernelInfo)->kernelAllocation = nullptr; } program.reset(); delete pContext; alignedFree(pKernelData); } char *pCurPtr; char *pKernelData; SKernelBinaryHeaderCommon kernelBinaryHeader; std::string kernelName; void *pDsh; void *pGsh; void *pKernelHeap; void *pSsh; void *pPatchList; uint32_t kernelDataSize; uint32_t kernelNameSize; uint32_t dshSize; uint32_t gshSize; uint32_t kernelHeapSize; uint32_t sshSize; uint32_t patchListSize; uint32_t checkSum; uint64_t shaderHashCode; uint32_t kernelUnpaddedSize; std::unique_ptr program; MockContext *pContext; const KernelInfo *pKernelInfo; }; compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/linux/000077500000000000000000000000001363734646600246425ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/linux/CMakeLists.txt000066400000000000000000000007031363734646600274020ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_fixtures_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/memory_allocator_multi_device_fixture_linux.cpp ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_fixtures_linux}) endif() set_property(GLOBAL PROPERTY IGDRCL_SRCS_tests_fixtures_linux ${IGDRCL_SRCS_tests_fixtures_linux}) add_subdirectories() memory_allocator_multi_device_fixture_linux.cpp000066400000000000000000000017311363734646600363570ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/linux/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/memory_allocator_multi_device_fixture.h" #include "opencl/test/unit_test/mocks/linux/mock_drm_memory_manager.h" using namespace NEO; void MemoryAllocatorMultiDeviceSystemSpecificFixture::SetUp(ExecutionEnvironment &executionEnvironment) { auto memoryManager = static_cast(executionEnvironment.memoryManager.get()); auto bufferObject = memoryManager->createSharedBufferObject(0u, 10, true, 0u); memoryManager->pushSharedBufferObject(bufferObject); } void MemoryAllocatorMultiDeviceSystemSpecificFixture::TearDown(ExecutionEnvironment &executionEnvironment) { auto memoryManager = static_cast(executionEnvironment.memoryManager.get()); auto bufferObject = memoryManager->sharingBufferObjects.back(); memoryManager->eraseSharedBufferObject(bufferObject); delete bufferObject; } compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/media_kernel_fixture.h000066400000000000000000000054021363734646600300420ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/mocks/mock_platform.h" namespace NEO { template struct MediaKernelFixture : public HelloWorldFixture, public HardwareParse, public ::testing::Test { typedef HelloWorldFixture Parent; using Parent::pCmdBuffer; using Parent::pCmdQ; using Parent::pContext; using Parent::pCS; using Parent::pDevice; using Parent::pKernel; using Parent::pProgram; using Parent::retVal; MediaKernelFixture() {} template void enqueueRegularKernel() { auto retVal = EnqueueKernelHelper<>::enqueueKernel( pCmdQ, pKernel); ASSERT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); itorWalker1 = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorWalker1); } template void enqueueVmeKernel() { auto retVal = EnqueueKernelHelper<>::enqueueKernel( pCmdQ, pVmeKernel); ASSERT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); itorWalker1 = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorWalker1); } void SetUp() override { skipVmeTest = !platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->getHardwareInfo()->capabilityTable.supportsVme; if (skipVmeTest) { GTEST_SKIP(); } Parent::kernelFilename = "vme_kernels"; Parent::kernelName = "non_vme_kernel"; Parent::SetUp(); HardwareParse::SetUp(); ASSERT_NE(nullptr, pKernel); ASSERT_EQ(false, pKernel->isVmeKernel()); cl_int retVal; // create the VME kernel pVmeKernel = Kernel::create( pProgram, *pProgram->getKernelInfo("device_side_block_motion_estimate_intel"), &retVal); ASSERT_NE(nullptr, pVmeKernel); ASSERT_EQ(true, pVmeKernel->isVmeKernel()); } void TearDown() override { if (skipVmeTest) { return; } pVmeKernel->release(); HardwareParse::TearDown(); Parent::TearDown(); } GenCmdList::iterator itorWalker1; GenCmdList::iterator itorWalker2; Kernel *pVmeKernel = nullptr; bool skipVmeTest = false; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/memory_allocator_fixture.h000066400000000000000000000040401363734646600307700ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/preemption.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/hw_helper.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" using namespace NEO; class MemoryAllocatorFixture : public MemoryManagementFixture { public: void SetUp() override { MemoryManagementFixture::SetUp(); executionEnvironment = new ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); device.reset(MockDevice::createWithExecutionEnvironment(defaultHwInfo.get(), executionEnvironment, 0u)); memoryManager = new MockMemoryManager(false, false, *executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); csr = &device->getGpgpuCommandStreamReceiver(); auto &hwInfo = device->getHardwareInfo(); auto engineType = HwHelper::get(hwInfo.platform.eRenderCoreFamily).getGpgpuEngineInstances(hwInfo)[0]; auto osContext = memoryManager->createAndRegisterOsContext(csr, engineType, 1, PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo), false, false, false); csr->setupContext(*osContext); } void TearDown() override { device.reset(); platformsImpl.clear(); MemoryManagementFixture::TearDown(); } protected: std::unique_ptr device; ExecutionEnvironment *executionEnvironment; MockMemoryManager *memoryManager = nullptr; CommandStreamReceiver *csr = nullptr; }; compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/memory_allocator_multi_device_fixture.h000066400000000000000000000040271363734646600335260ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/gmm_helper/gmm.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/helpers/ult_hw_config.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/mocks/mock_platform.h" using namespace NEO; class MemoryAllocatorMultiDeviceSystemSpecificFixture { public: void SetUp(ExecutionEnvironment &executionEnvironment); void TearDown(ExecutionEnvironment &executionEnvironment); std::unique_ptr gmm; }; template class MemoryAllocatorMultiDeviceFixture : public MemoryManagementFixture, public MemoryAllocatorMultiDeviceSystemSpecificFixture, public ::testing::TestWithParam { public: void SetUp() override { MemoryManagementFixture::SetUp(); isOsAgnosticMemoryManager = GetParam(); DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices); VariableBackup backup(&ultHwConfig); ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; ultHwConfig.forceOsAgnosticMemoryManager = isOsAgnosticMemoryManager; initPlatform(); executionEnvironment = platform()->peekExecutionEnvironment(); memoryManager = executionEnvironment->memoryManager.get(); if (!isOsAgnosticMemoryManager) { MemoryAllocatorMultiDeviceSystemSpecificFixture::SetUp(*executionEnvironment); } } void TearDown() override { if (!isOsAgnosticMemoryManager) { MemoryAllocatorMultiDeviceSystemSpecificFixture::TearDown(*executionEnvironment); } } uint32_t getNumRootDevices() { return numRootDevices; } protected: ExecutionEnvironment *executionEnvironment = nullptr; MemoryManager *memoryManager = nullptr; DebugManagerStateRestore restorer; bool isOsAgnosticMemoryManager; }; compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/memory_management_fixture.cpp000066400000000000000000000170051363734646600314640ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "shared/test/unit_test/helpers/memory_leak_listener.h" #include "shared/test/unit_test/helpers/memory_management.h" #include #if defined(__linux__) #include #include #include #include #elif defined(_WIN32) #include #pragma warning(push) // Saves the current warning state. #pragma warning(disable : 4091) // Temporarily disables warning 4091. #include #pragma warning(pop) // Restores the warning state. #pragma comment(lib, "Dbghelp.lib") #endif namespace Os { extern const char *frontEndDllName; extern const char *igcDllName; } // namespace Os void MemoryManagementFixture::SetUp() { EXPECT_EQ(static_cast(-1), MemoryManagement::failingAllocation); MemoryManagement::indexAllocation = 0; MemoryManagement::indexDeallocation = 0; MemoryManagement::failingAllocation = -1; previousAllocations = MemoryManagement::numAllocations.load(); MemoryManagement::logTraces = MemoryManagement::captureCallStacks; } void MemoryManagementFixture::TearDown() { clearFailingAllocation(); checkForLeaks(); MemoryManagement::logTraces = false; } void MemoryManagementFixture::setFailingAllocation(size_t allocation) { MemoryManagement::indexAllocation = 0; MemoryManagement::failingAllocation = allocation; } void MemoryManagementFixture::clearFailingAllocation() { MemoryManagement::failingAllocation = -1; } ::testing::AssertionResult MemoryManagementFixture::assertLeak( const char *leakExpr, size_t leakIndex) { using MemoryManagement::AllocationEvent; using MemoryManagement::eventsAllocated; if (leakIndex == MemoryManagement::invalidLeakIndex) { return ::testing::AssertionSuccess(); } auto &event = eventsAllocated[leakIndex]; switch (event.event) { case AllocationEvent::EVENT_DELETE: return ::testing::AssertionFailure() << "event[" << leakIndex << "]: delete doesn't have corresponding new. allocation address=" << event.address << ", allocation size=" << event.size << printCallStack(event); break; case AllocationEvent::EVENT_DELETE_ARRAY: return ::testing::AssertionFailure() << "event[" << leakIndex << "]: delete[] doesn't have corresponding new[]. allocation address=" << event.address << ", allocation size=" << event.size << printCallStack(event); break; case AllocationEvent::EVENT_NEW: return ::testing::AssertionFailure() << "event[" << leakIndex << "]: new doesn't have corresponding delete. allocation address=" << event.address << ", allocation size=" << event.size << printCallStack(event); break; case AllocationEvent::EVENT_NEW_NOTHROW: return ::testing::AssertionFailure() << "event[" << leakIndex << "]: new (std::nothrow) doesn't have corresponding delete. allocation address=" << event.address << ", allocation size=" << event.size << printCallStack(event); break; case AllocationEvent::EVENT_NEW_ARRAY: return ::testing::AssertionFailure() << "event[" << leakIndex << "]: new [] doesn't have corresponding delete[]. allocation address=" << event.address << ", allocation size=" << event.size << printCallStack(event); break; case AllocationEvent::EVENT_NEW_ARRAY_NOTHROW: return ::testing::AssertionFailure() << "event[" << leakIndex << "] new (std::nothrow) [] doesn't have corresponding delete[]. allocation address=" << event.address << ", allocation size=" << event.size << printCallStack(event); break; case AllocationEvent::EVENT_NEW_ARRAY_NOTHROW_FAIL: case AllocationEvent::EVENT_NEW_ARRAY_FAIL: case AllocationEvent::EVENT_NEW_NOTHROW_FAIL: case AllocationEvent::EVENT_NEW_FAIL: case AllocationEvent::EVENT_UNKNOWN: default: return ::testing::AssertionFailure() << "Unknown event[" << leakIndex << "] detected. allocation size=" << event.size; break; } } void MemoryManagementFixture::checkForLeaks() { // We have to alias MemoryManagement::numAllocations because // the following EXPECT_EQ actually allocates more memory :-) auto currentAllocations = MemoryManagement::numAllocations.load(); auto indexAllocationTop = MemoryManagement::indexAllocation.load(); auto indexDellocationTop = MemoryManagement::indexDeallocation.load(); if (previousAllocations != currentAllocations) { auto testInfo = ::testing::UnitTest::GetInstance()->current_test_info(); auto testResult = testInfo->result(); if (testResult->Passed()) { //EXPECT_EQ(previousAllocations, currentAllocations); size_t leakEventIndex; do { leakEventIndex = MemoryManagement::enumerateLeak(indexAllocationTop, indexDellocationTop, false, false); EXPECT_PRED_FORMAT1(assertLeak, leakEventIndex); auto invalidLeakIndexValues = MemoryManagement::invalidLeakIndex; EXPECT_EQ(leakEventIndex, invalidLeakIndexValues); } while (leakEventIndex != MemoryManagement::invalidLeakIndex); } else { printf("*** WARNING: Leaks found but dumping disabled during test failure ***\n"); } } } void MemoryManagementFixture::injectFailures(InjectedFunction &method, uint32_t maxIndex) { MemoryManagement::indexAllocation = 0; method(-1); auto numCurrentAllocations = MemoryManagement::indexAllocation.load(); for (auto i = 0u; i < numCurrentAllocations; i++) { // Force a failure MemoryManagement::indexAllocation = numCurrentAllocations; MemoryManagement::failingAllocation = i + numCurrentAllocations; if (MemoryManagement::eventsAllocated[i].event == MemoryManagement::AllocationEvent::EVENT_NEW || MemoryManagement::eventsAllocated[i].event == MemoryManagement::AllocationEvent::EVENT_NEW_ARRAY) { continue; } if (maxIndex != 0 && i > maxIndex) { break; } // Call the method under test method(i); // Restore allocations MemoryManagement::failingAllocation = -1; } MemoryManagement::failingAllocation = -1; } void MemoryManagementFixture::injectFailureOnIndex(InjectedFunction &method, uint32_t index) { MemoryManagement::indexAllocation = 0; method(-1); auto numCurrentAllocations = MemoryManagement::indexAllocation.load(); // Force a failure MemoryManagement::indexAllocation = numCurrentAllocations; MemoryManagement::failingAllocation = index + numCurrentAllocations; // Call the method under test method(index); // Restore allocations MemoryManagement::failingAllocation = -1; } compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/memory_management_fixture.h000066400000000000000000000021721363734646600311300ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/unit_test/helpers/memory_management.h" #include "gtest/gtest.h" #include struct MemoryManagementFixture { MemoryManagementFixture() { MemoryManagement::detailedAllocationLoggingActive = true; }; virtual ~MemoryManagementFixture() { MemoryManagement::detailedAllocationLoggingActive = false; }; // Typical Fixture methods virtual void SetUp(void); virtual void TearDown(void); // Helper methods void setFailingAllocation(size_t allocation); void clearFailingAllocation(void); ::testing::AssertionResult assertLeak( const char *leakExpr, size_t leakIndex); void checkForLeaks(void); typedef std::function InjectedFunction; void injectFailures(InjectedFunction &method, uint32_t maxIndex = 0); void injectFailureOnIndex(InjectedFunction &method, uint32_t index); // Used to keep track of # of allocations prior at SetUp time // Gets compared to # at TearDown time size_t previousAllocations; }; compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/memory_manager_fixture.cpp000066400000000000000000000026431363734646600307640ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/memory_manager_fixture.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/os_interface/os_context.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" using namespace NEO; void MemoryManagerWithCsrFixture::SetUp() { executionEnvironment.prepareRootDeviceEnvironments(1); executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); csr = std::make_unique(this->executionEnvironment, 0); memoryManager = new MockMemoryManager(executionEnvironment); executionEnvironment.memoryManager.reset(memoryManager); csr->tagAddress = ¤tGpuTag; auto hwInfo = executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo(); auto engine = HwHelper::get(hwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*hwInfo)[0]; auto osContext = memoryManager->createAndRegisterOsContext(csr.get(), engine, 1, PreemptionHelper::getDefaultPreemptionMode(*hwInfo), false, false, false); csr->setupContext(*osContext); } void MemoryManagerWithCsrFixture::TearDown() { } compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/memory_manager_fixture.h000066400000000000000000000012561363734646600304300ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/options.h" using namespace NEO; class MockCommandStreamReceiver; namespace NEO { class MockMemoryManager; }; // namespace NEO class MemoryManagerWithCsrFixture { public: MockMemoryManager *memoryManager; ExecutionEnvironment executionEnvironment; std::unique_ptr csr; uint32_t taskCount = 0; uint32_t currentGpuTag = initialHardwareTag; ~MemoryManagerWithCsrFixture() = default; void SetUp(); void TearDown(); }; compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/mock_aub_center_fixture.h000066400000000000000000000032721363734646600305460ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/execution_environment/root_device_environment.h" #include "shared/test/unit_test/helpers/default_hw_info.h" #include "shared/test/unit_test/tests_configuration.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_aub_center.h" #include "opencl/test/unit_test/mocks/mock_aub_manager.h" #include "opencl/test/unit_test/mocks/mock_platform.h" namespace NEO { struct MockAubCenterFixture { MockAubCenterFixture() = default; MockAubCenterFixture(CommandStreamReceiverType commandStreamReceiverType) : commandStreamReceiverType(commandStreamReceiverType){}; void SetUp() { setMockAubCenter(*platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0], commandStreamReceiverType); } void TearDown() { } static void setMockAubCenter(RootDeviceEnvironment &rootDeviceEnvironment) { setMockAubCenter(rootDeviceEnvironment, CommandStreamReceiverType::CSR_AUB); } static void setMockAubCenter(RootDeviceEnvironment &rootDeviceEnvironment, CommandStreamReceiverType commandStreamReceiverType) { if (testMode != TestMode::AubTests && testMode != TestMode::AubTestsWithTbx) { auto mockAubCenter = std::make_unique(defaultHwInfo.get(), false, "", commandStreamReceiverType); mockAubCenter->aubManager = std::make_unique(); rootDeviceEnvironment.aubCenter.reset(mockAubCenter.release()); } } protected: CommandStreamReceiverType commandStreamReceiverType = CommandStreamReceiverType::CSR_AUB; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/multi_root_device_fixture.h000066400000000000000000000021541363734646600311400ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" namespace NEO { class MultiRootDeviceFixture : public ::testing::Test { public: void SetUp() override { DebugManager.flags.CreateMultipleRootDevices.set(2 * expectedRootDeviceIndex); device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr, expectedRootDeviceIndex)); context.reset(new MockContext(device.get())); mockMemoryManager = reinterpret_cast(device->getMemoryManager()); } const uint32_t expectedRootDeviceIndex = 1; DebugManagerStateRestore restorer; std::unique_ptr device; std::unique_ptr context; MockMemoryManager *mockMemoryManager; }; }; // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/platform_fixture.cpp000066400000000000000000000020551363734646600276030ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gtest/gtest.h" namespace NEO { void PlatformFixture::SetUp() { pPlatform = constructPlatform(); ASSERT_EQ(0u, pPlatform->getNumDevices()); // setup platform / context bool isInitialized = initPlatform(); ASSERT_EQ(true, isInitialized); num_devices = static_cast(pPlatform->getNumDevices()); ASSERT_GT(num_devices, 0u); auto allDev = pPlatform->getClDevices(); ASSERT_NE(nullptr, allDev); devices = new cl_device_id[num_devices]; for (cl_uint deviceOrdinal = 0; deviceOrdinal < num_devices; ++deviceOrdinal) { auto device = allDev[deviceOrdinal]; ASSERT_NE(nullptr, device); devices[deviceOrdinal] = device; } } void PlatformFixture::TearDown() { platformsImpl.clear(); delete[] devices; } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/platform_fixture.h000066400000000000000000000006311363734646600272460ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/api/cl_types.h" #include "opencl/source/platform/platform.h" namespace NEO { class PlatformFixture { protected: void SetUp(); void TearDown(); Platform *pPlatform = nullptr; cl_uint num_devices = 0u; cl_device_id *devices = nullptr; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/program_fixture.cpp000066400000000000000000000047061363734646600274330ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/program_fixture.h" #include "opencl/source/program/create.inl" #include "opencl/test/unit_test/mocks/mock_program.h" namespace NEO { void ProgramFixture::CreateProgramWithSource(cl_context context, cl_device_id *deviceList, const std::string &sourceFileName) { Cleanup(); cl_int retVal = CL_SUCCESS; std::string testFile; testFile.append(clFiles); testFile.append(sourceFileName); ASSERT_EQ(true, fileExists(testFile)); knownSource = loadDataFromFile( testFile.c_str(), knownSourceSize); ASSERT_NE(0u, knownSourceSize); ASSERT_NE(nullptr, knownSource); const char *sources[1] = {knownSource.get()}; pProgram = Program::create( context, 1, sources, &knownSourceSize, retVal); ASSERT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); } void ProgramFixture::CreateProgramFromBinary(cl_context context, cl_device_id *pDeviceList, const std::string &binaryFileName, cl_int &retVal, const std::string &options) { retVal = CL_SUCCESS; std::string testFile; retrieveBinaryKernelFilename(testFile, binaryFileName + "_", ".bin", options); knownSource = loadDataFromFile( testFile.c_str(), knownSourceSize); ASSERT_NE(0u, knownSourceSize); ASSERT_NE(nullptr, knownSource); pProgram = Program::create( context, 1, pDeviceList, &knownSourceSize, (const unsigned char **)&knownSource, nullptr, retVal); } void ProgramFixture::CreateProgramFromBinary(cl_context pContext, cl_device_id *pDeviceList, const std::string &binaryFileName, const std::string &options) { Cleanup(); cl_int retVal = CL_SUCCESS; CreateProgramFromBinary( pContext, pDeviceList, binaryFileName, retVal, options); ASSERT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/program_fixture.h000066400000000000000000000027071363734646600270770ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/file_io.h" #include "opencl/source/program/program.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "gtest/gtest.h" namespace NEO { class ProgramFixture { public: void CreateProgramFromBinary(cl_context context, cl_device_id *pDeviceList, const std::string &binaryFileName, cl_int &retVal, const std::string &options = ""); void CreateProgramFromBinary(cl_context pContext, cl_device_id *pDeviceList, const std::string &binaryFileName, const std::string &options = ""); void CreateProgramWithSource(cl_context pContext, cl_device_id *pDeviceList, const std::string &sourceFileName); protected: virtual void SetUp() { } virtual void TearDown() { Cleanup(); } void Cleanup() { if (pProgram != nullptr) { pProgram->release(); } knownSource.reset(); } MockProgram *pProgram = nullptr; std::unique_ptr knownSource; size_t knownSourceSize = 0u; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/run_kernel_fixture.h000066400000000000000000000024621363734646600275720ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/global_environment.h" #include "opencl/test/unit_test/helpers/test_files.h" namespace NEO { struct CommandQueueHwFixture; struct CommandStreamFixture; // helper functions to enforce MockCompiler input files inline void overwriteBuiltInBinaryName( Device *pDevice, const std::string &filename, bool appendOptionsToFileName = false) { // set mock compiler to return expected kernel... MockCompilerDebugVars fclDebugVars; MockCompilerDebugVars igcDebugVars; retrieveBinaryKernelFilename(fclDebugVars.fileName, filename + "_", ".bc"); fclDebugVars.appendOptionsToFileName = appendOptionsToFileName; retrieveBinaryKernelFilename(igcDebugVars.fileName, filename + "_", ".gen"); igcDebugVars.appendOptionsToFileName = appendOptionsToFileName; gEnvironment->fclPushDebugVars(fclDebugVars); gEnvironment->igcPushDebugVars(igcDebugVars); } inline void restoreBuiltInBinaryName(Device *pDevice) { gEnvironment->igcPopDebugVars(); gEnvironment->fclPopDebugVars(); } struct RunKernelFixtureFactory { typedef NEO::CommandStreamFixture CommandStreamFixture; typedef NEO::CommandQueueHwFixture CommandQueueFixture; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/scenario_test_fixture.h000066400000000000000000000033011363734646600302610ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" #include "gtest/gtest.h" using namespace NEO; class ScenarioTest : public ::testing::Test, public PlatformFixture { using PlatformFixture::SetUp; protected: void SetUp() override { DebugManager.flags.EnableTimestampPacket.set(false); PlatformFixture::SetUp(); auto pDevice = pPlatform->getClDevice(0); ASSERT_NE(nullptr, pDevice); cl_device_id clDevice = pDevice; context = Context::create(nullptr, ClDeviceVector(&clDevice, 1), nullptr, nullptr, retVal); commandQueue = new MockCommandQueue(context, pDevice, 0); program = new MockProgram(*pDevice->getExecutionEnvironment(), context, false, &pDevice->getDevice()); kernelInternals = new MockKernelWithInternals(*pDevice, context); kernel = kernelInternals->mockKernel; ASSERT_NE(nullptr, kernel); } void TearDown() override { delete kernelInternals; delete commandQueue; context->release(); program->release(); PlatformFixture::TearDown(); } cl_int retVal; DebugManagerStateRestore dbgRestorer; MockCommandQueue *commandQueue = nullptr; MockContext *context = nullptr; MockKernelWithInternals *kernelInternals = nullptr; MockKernel *kernel = nullptr; MockProgram *program = nullptr; }; compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/simple_arg_fixture.h000066400000000000000000000013001363734646600275360ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_stream/command_stream_fixture.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h" namespace NEO { struct SimpleArgFixtureFactory { typedef NEO::CommandStreamFixture CommandStreamFixture; typedef NEO::CommandQueueHwFixture CommandQueueFixture; typedef NEO::SimpleArgKernelFixture KernelFixture; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h000066400000000000000000000156651363734646600311210ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/device/device.h" #include "shared/source/helpers/array_count.h" #include "shared/source/helpers/file_io.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/program/program.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/program_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "CL/cl.h" #include "compiler_options.h" #include "gtest/gtest.h" #include namespace NEO { class Kernel; class Program; template inline const char *type_name(T &) { return "unknown"; } template <> inline const char *type_name(char &) { return "char"; } template <> inline const char *type_name(int &) { return "int"; } template <> inline const char *type_name(float &) { return "float"; } template <> inline const char *type_name(short &) { return "short"; } template <> inline const char *type_name(unsigned char &) { return "unsigned char"; } template <> inline const char *type_name(unsigned int &) { return "unsigned int"; } template <> inline const char *type_name(unsigned short &) { return "unsigned short"; } class SimpleArgKernelFixture : public ProgramFixture { public: using ProgramFixture::SetUp; protected: virtual void SetUp(ClDevice *pDevice) { ProgramFixture::SetUp(); std::string testFile; int forTheName = 0; testFile.append("simple_arg_"); testFile.append(type_name(forTheName)); auto pos = testFile.find(" "); if (pos != (size_t)-1) { testFile.replace(pos, 1, "_"); } cl_device_id device = pDevice; pContext = Context::create(nullptr, ClDeviceVector(&device, 1), nullptr, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pContext); CreateProgramFromBinary( pContext, &device, testFile); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( 1, &device, nullptr, nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); // create a kernel pKernel = Kernel::create( pProgram, *pProgram->getKernelInfo("SimpleArg"), &retVal); ASSERT_NE(nullptr, pKernel); ASSERT_EQ(CL_SUCCESS, retVal); } void TearDown() override { if (pKernel) { delete pKernel; pKernel = nullptr; } pContext->release(); ProgramFixture::TearDown(); } cl_int retVal = CL_SUCCESS; Kernel *pKernel = nullptr; MockContext *pContext = nullptr; }; class SimpleArgNonUniformKernelFixture : public ProgramFixture { public: using ProgramFixture::SetUp; protected: void SetUp(ClDevice *device, Context *context) { ProgramFixture::SetUp(); cl_device_id deviceId = device; cl_context clContext = context; CreateProgramFromBinary( clContext, &deviceId, "simple_nonuniform", "-cl-std=CL2.0"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( 1, &deviceId, "-cl-std=CL2.0", nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); kernel = Kernel::create( pProgram, *pProgram->getKernelInfo("simpleNonUniform"), &retVal); ASSERT_NE(nullptr, kernel); ASSERT_EQ(CL_SUCCESS, retVal); } void TearDown() override { if (kernel) { delete kernel; kernel = nullptr; } ProgramFixture::TearDown(); } cl_int retVal = CL_SUCCESS; Kernel *kernel = nullptr; }; class SimpleKernelFixture : public ProgramFixture { public: using ProgramFixture::SetUp; protected: void SetUp(ClDevice *device, Context *context) { ProgramFixture::SetUp(); cl_device_id deviceId = device; cl_context clContext = context; std::string programName("simple_kernels"); CreateProgramFromBinary( clContext, &deviceId, programName); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( 1, &deviceId, nullptr, nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); for (size_t i = 0; i < maxKernelsCount; i++) { if ((1 << i) & kernelIds) { std::string kernelName("simple_kernel_"); kernelName.append(std::to_string(i)); kernels[i].reset(Kernel::create( pProgram, *pProgram->getKernelInfo(kernelName.c_str()), &retVal)); ASSERT_NE(nullptr, kernels[i]); ASSERT_EQ(CL_SUCCESS, retVal); } } } void TearDown() override { for (size_t i = 0; i < maxKernelsCount; i++) { if (kernels[i]) { kernels[i].reset(nullptr); } } ProgramFixture::TearDown(); } uint32_t kernelIds = 0; static constexpr size_t maxKernelsCount = std::numeric_limits::digits; cl_int retVal = CL_SUCCESS; std::array, maxKernelsCount> kernels; }; class SimpleKernelStatelessFixture : public ProgramFixture { public: DebugManagerStateRestore restorer; using ProgramFixture::SetUp; protected: void SetUp(ClDevice *device, Context *context) { ProgramFixture::SetUp(); cl_device_id deviceId = device; cl_context clContext = context; DebugManager.flags.DisableStatelessToStatefulOptimization.set(true); DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(false); CreateProgramFromBinary( clContext, &deviceId, "stateless_kernel"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( 1, &deviceId, CompilerOptions::greaterThan4gbBuffersRequired, nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); kernel.reset(Kernel::create( pProgram, *pProgram->getKernelInfo("statelessKernel"), &retVal)); ASSERT_NE(nullptr, kernel); ASSERT_EQ(CL_SUCCESS, retVal); } void TearDown() override { ProgramFixture::TearDown(); } std::unique_ptr kernel = nullptr; cl_int retVal = CL_SUCCESS; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/templated_fixture_tests.cpp000066400000000000000000000051721363734646600311630ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/default_hw_info.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "test.h" namespace NEO { struct TemplatedFixtureTests : public ::testing::Test { void SetUp() override { baseSetUpCallId = callsOrder++; } void TearDown() override { EXPECT_EQ(idForBaseTearDown, callsOrder); EXPECT_EQ((idForBaseTearDown - 1), templateBaseTearDownCallId); } template void SetUpT() { templateBaseSetUpCallId = callsOrder++; } template void TearDownT() { templateBaseTearDownCallId = callsOrder++; } uint32_t callsOrder = 0; uint32_t baseSetUpCallId = -1; uint32_t templateBaseSetUpCallId = -1; uint32_t templateBaseTearDownCallId = -1; uint32_t idForBaseTearDown = -1; }; HWTEST_TEMPLATED_F(TemplatedFixtureTests, whenExecutingTemplatedTestThenCallTemplatedSetupAndTeardown) { EXPECT_EQ(2u, callsOrder); EXPECT_EQ(0u, baseSetUpCallId); EXPECT_EQ(1u, templateBaseSetUpCallId); idForBaseTearDown = callsOrder + 1; } struct DerivedTemplatedFixtureTests : public TemplatedFixtureTests { template void SetUpT() { TemplatedFixtureTests::SetUpT(); templateDerivedSetUpCallId = callsOrder++; } template void TearDownT() { templateDerivedTearDownCallId = callsOrder++; TemplatedFixtureTests::TearDownT(); } uint32_t templateDerivedSetUpCallId = -1; uint32_t templateDerivedTearDownCallId = -1; }; HWTEST_TEMPLATED_F(DerivedTemplatedFixtureTests, whenExecutingTemplatedTestThenCallTemplatedSetupAndTeardown) { EXPECT_EQ(3u, callsOrder); EXPECT_EQ(0u, baseSetUpCallId); EXPECT_EQ(1u, templateBaseSetUpCallId); EXPECT_EQ(2u, templateDerivedSetUpCallId); idForBaseTearDown = callsOrder + 2; } struct TemplatedFixtureBaseTests : public ::testing::Test { template void SetUpT() { capturedPipeControlWaRequiredInSetUp = HardwareCommandsHelper::isPipeControlWArequired(*defaultHwInfo); } template void TearDownT() {} bool capturedPipeControlWaRequiredInSetUp = false; }; HWTEST_TEMPLATED_F(TemplatedFixtureBaseTests, whenExecutingTemplatedSetupThenTemplateTargetsCorrectPlatform) { bool capturedPipeControlWaRequiredInTestBody = HardwareCommandsHelper::isPipeControlWArequired(*defaultHwInfo); EXPECT_EQ(capturedPipeControlWaRequiredInTestBody, capturedPipeControlWaRequiredInSetUp); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/two_walker_fixture.h000066400000000000000000000034241363734646600276030ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "hello_world_fixture.h" namespace NEO { // Generates two back-to-back walkers using the same kernel for testing purposes template struct TwoWalkerTest : public HelloWorldTest, public HardwareParse { typedef HelloWorldTest Parent; using Parent::pCmdBuffer; using Parent::pCmdQ; using Parent::pCS; using Parent::pKernel; template void enqueueTwoKernels() { auto retVal = EnqueueKernelHelper<>::enqueueKernel( pCmdQ, pKernel); ASSERT_EQ(CL_SUCCESS, retVal); // We have to parse after each enqueue* because // the CSR CS may insert commands in between parseCommands(*pCmdQ); retVal = EnqueueKernelHelper<>::enqueueKernel( pCmdQ, pKernel); ASSERT_EQ(CL_SUCCESS, retVal); parseCommands(*pCmdQ); itorWalker1 = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorWalker1); itorWalker2 = itorWalker1; ++itorWalker2; itorWalker2 = find(itorWalker2, cmdList.end()); ASSERT_NE(cmdList.end(), itorWalker2); } void SetUp() override { Parent::SetUp(); HardwareParse::SetUp(); } void TearDown() override { HardwareParse::TearDown(); Parent::TearDown(); } GenCmdList::iterator itorWalker1; GenCmdList::iterator itorWalker2; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h000066400000000000000000000135741363734646600331750ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/cache_policy.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" namespace NEO { struct UltCommandStreamReceiverTest : public DeviceFixture, public HardwareParse, ::testing::Test { void SetUp() override { DeviceFixture::SetUp(); HardwareParse::SetUp(); size_t sizeStream = 512; size_t alignmentStream = 0x1000; cmdBuffer = alignedMalloc(sizeStream, alignmentStream); dshBuffer = alignedMalloc(sizeStream, alignmentStream); iohBuffer = alignedMalloc(sizeStream, alignmentStream); sshBuffer = alignedMalloc(sizeStream, alignmentStream); ASSERT_NE(nullptr, cmdBuffer); ASSERT_NE(nullptr, dshBuffer); ASSERT_NE(nullptr, iohBuffer); ASSERT_NE(nullptr, sshBuffer); commandStream.replaceBuffer(cmdBuffer, sizeStream); auto graphicsAllocation = new MockGraphicsAllocation(cmdBuffer, sizeStream); commandStream.replaceGraphicsAllocation(graphicsAllocation); dsh.replaceBuffer(dshBuffer, sizeStream); graphicsAllocation = new MockGraphicsAllocation(dshBuffer, sizeStream); dsh.replaceGraphicsAllocation(graphicsAllocation); ioh.replaceBuffer(iohBuffer, sizeStream); graphicsAllocation = new MockGraphicsAllocation(iohBuffer, sizeStream); ioh.replaceGraphicsAllocation(graphicsAllocation); ssh.replaceBuffer(sshBuffer, sizeStream); graphicsAllocation = new MockGraphicsAllocation(sshBuffer, sizeStream); ssh.replaceGraphicsAllocation(graphicsAllocation); pDevice->getGpgpuCommandStreamReceiver().setupContext(*pDevice->getDefaultEngine().osContext); } void TearDown() override { pDevice->getGpgpuCommandStreamReceiver().flushBatchedSubmissions(); delete dsh.getGraphicsAllocation(); delete ioh.getGraphicsAllocation(); delete ssh.getGraphicsAllocation(); delete commandStream.getGraphicsAllocation(); alignedFree(sshBuffer); alignedFree(iohBuffer); alignedFree(dshBuffer); alignedFree(cmdBuffer); HardwareParse::TearDown(); DeviceFixture::TearDown(); } template CompletionStamp flushTask(CommandStreamReceiverType &commandStreamReceiver, bool block = false, size_t startOffset = 0, bool requiresCoherency = false, bool lowPriority = false) { flushTaskFlags.blocking = block; flushTaskFlags.requiresCoherency = requiresCoherency; flushTaskFlags.lowPriority = lowPriority; flushTaskFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); return commandStreamReceiver.flushTask( commandStream, startOffset, dsh, ioh, ssh, taskLevel, flushTaskFlags, *pDevice); } template void configureCSRHeapStatesToNonDirty() { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.dshState.updateAndCheck(&dsh); commandStreamReceiver.iohState.updateAndCheck(&ioh); commandStreamReceiver.sshState.updateAndCheck(&ssh); } template void configureCSRtoNonDirtyState() { bool slmUsed = false; if (DebugManager.flags.ForceSLML3Config.get()) { slmUsed = true; } uint32_t L3Config = PreambleHelper::getL3Config(*defaultHwInfo, slmUsed); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.lastPreemptionMode = pDevice->getPreemptionMode(); commandStreamReceiver.setMediaVFEStateDirty(false); auto gmmHelper = pDevice->getGmmHelper(); auto mocsIndex = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); commandStreamReceiver.latestSentStatelessMocsConfig = mocsIndex >> 1; commandStreamReceiver.lastSentL3Config = L3Config; configureCSRHeapStatesToNonDirty(); commandStreamReceiver.taskLevel = taskLevel; commandStreamReceiver.lastSentThreadArbitrationPolicy = commandStreamReceiver.requiredThreadArbitrationPolicy; commandStreamReceiver.lastSentCoherencyRequest = 0; commandStreamReceiver.lastMediaSamplerConfig = 0; } template UltCommandStreamReceiver &getUltCommandStreamReceiver() { return reinterpret_cast &>(pDevice->getGpgpuCommandStreamReceiver()); } DispatchFlags flushTaskFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); uint32_t taskLevel = 42; LinearStream commandStream; IndirectHeap dsh = {nullptr}; IndirectHeap ioh = {nullptr}; IndirectHeap ssh = {nullptr}; void *cmdBuffer = nullptr; void *dshBuffer = nullptr; void *iohBuffer = nullptr; void *sshBuffer = nullptr; uint32_t latestSentDcFlushTaskCount; uint32_t latestSentNonDcFlushTaskCount; uint32_t dcFlushRequiredTaskCount; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/windows/000077500000000000000000000000001363734646600251755ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/windows/CMakeLists.txt000066400000000000000000000007161363734646600277410ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_fixtures_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/memory_allocator_multi_device_fixture_windows.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_fixtures_windows}) endif() set_property(GLOBAL PROPERTY IGDRCL_SRCS_tests_fixtures_windows ${IGDRCL_SRCS_tests_fixtures_windows}) add_subdirectories() memory_allocator_multi_device_fixture_windows.cpp000066400000000000000000000035421363734646600372470ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/fixtures/windows/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/os_interface/windows/os_environment_win.h" #include "shared/source/os_interface/windows/os_interface.h" #include "shared/test/unit_test/os_interface/windows/mock_gdi_interface.h" #include "opencl/test/unit_test/fixtures/memory_allocator_multi_device_fixture.h" #include "opencl/test/unit_test/mock_gdi/mock_gdi.h" #include "opencl/test/unit_test/mocks/mock_wddm.h" using namespace NEO; void MemoryAllocatorMultiDeviceSystemSpecificFixture::SetUp(ExecutionEnvironment &executionEnvironment) { static D3DDDI_OPENALLOCATIONINFO allocationInfo; auto gdi = new MockGdi(); gdi->getQueryResourceInfoArgOut().NumAllocations = 1; gdi->getOpenResourceArgOut().pOpenAllocationInfo = &allocationInfo; auto osEnvironment = new OsEnvironmentWin(); osEnvironment->gdi.reset(gdi); for (auto i = 0u; i < executionEnvironment.rootDeviceEnvironments.size(); i++) { gmm = std::make_unique(executionEnvironment.rootDeviceEnvironments[i]->getGmmClientContext(), nullptr, 0, false); auto wddm = static_cast(executionEnvironment.rootDeviceEnvironments[i]->osInterface->get()->getWddm()); wddm->hwDeviceId = std::make_unique(ADAPTER_HANDLE, LUID{}, osEnvironment); wddm->callBaseMapGpuVa = false; allocationInfo.pPrivateDriverData = gmm->gmmResourceInfo->peekHandle(); allocationInfo.hAllocation = ALLOCATION_HANDLE; allocationInfo.PrivateDriverDataSize = sizeof(GMM_RESOURCE_INFO); } executionEnvironment.osEnvironment.reset(osEnvironment); } void MemoryAllocatorMultiDeviceSystemSpecificFixture::TearDown(ExecutionEnvironment &executionEnvironment) {} compute-runtime-20.13.16352/opencl/test/unit_test/gen11/000077500000000000000000000000001363734646600225455ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen11/CMakeLists.txt000066400000000000000000000024601363734646600253070ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN11) set(IGDRCL_SRCS_tests_gen11 ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/coherency_tests_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tests_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_media_kernel_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_tests_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_tests_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_tests_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_tests_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_source_tests_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver_tests_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_queue_hw_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_platform_caps_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sample_gen11.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unit_test_helper_gen11.cpp ) get_property(NEO_CORE_TESTS_GEN11 GLOBAL PROPERTY NEO_CORE_TESTS_GEN11) list(APPEND IGDRCL_SRCS_tests_gen11 ${NEO_CORE_TESTS_GEN11}) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen11}) add_subdirectories() endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen11/coherency_tests_gen11.cpp000066400000000000000000000136551363734646600274570ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/gen11/reg_configs.h" #include "shared/source/helpers/hw_helper.h" #include "opencl/test/unit_test/helpers/dispatch_flags_helper.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "test.h" using namespace NEO; struct Gen11CoherencyRequirements : public ::testing::Test { typedef typename ICLFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; struct myCsr : public CommandStreamReceiverHw { using CommandStreamReceiver::commandStream; myCsr(ExecutionEnvironment &executionEnvironment) : CommandStreamReceiverHw(executionEnvironment, 0){}; CsrSizeRequestFlags *getCsrRequestFlags() { return &csrSizeRequestFlags; } }; void overrideCoherencyRequest(bool requestChanged, bool requireCoherency) { csr->getCsrRequestFlags()->coherencyRequestChanged = requestChanged; flags.requiresCoherency = requireCoherency; } void SetUp() override { device.reset(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); csr = new myCsr(*device->executionEnvironment); device->resetCommandStreamReceiver(csr); } myCsr *csr = nullptr; std::unique_ptr device; DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); }; GEN11TEST_F(Gen11CoherencyRequirements, coherencyCmdSize) { auto lriSize = sizeof(MI_LOAD_REGISTER_IMM); overrideCoherencyRequest(false, false); auto retSize = csr->getCmdSizeForComputeMode(); EXPECT_EQ(0u, retSize); overrideCoherencyRequest(false, true); retSize = csr->getCmdSizeForComputeMode(); EXPECT_EQ(0u, retSize); overrideCoherencyRequest(true, true); retSize = csr->getCmdSizeForComputeMode(); EXPECT_EQ(lriSize, retSize); overrideCoherencyRequest(true, false); retSize = csr->getCmdSizeForComputeMode(); EXPECT_EQ(lriSize, retSize); } GEN11TEST_F(Gen11CoherencyRequirements, hdcModeCmdValues) { auto lriSize = sizeof(MI_LOAD_REGISTER_IMM); char buff[MemoryConstants::pageSize]; LinearStream stream(buff, MemoryConstants::pageSize); auto expectedCmd = FamilyType::cmdInitLoadRegisterImm; expectedCmd.setRegisterOffset(gen11HdcModeRegister::address); expectedCmd.setDataDword(DwordBuilder::build(gen11HdcModeRegister::forceNonCoherentEnableBit, true)); overrideCoherencyRequest(true, false); csr->programComputeMode(stream, flags); EXPECT_EQ(csr->getCmdSizeForComputeMode(), stream.getUsed()); auto cmd = reinterpret_cast(stream.getCpuBase()); EXPECT_TRUE(memcmp(&expectedCmd, cmd, lriSize) == 0); overrideCoherencyRequest(true, true); csr->programComputeMode(stream, flags); EXPECT_EQ(csr->getCmdSizeForComputeMode() * 2, stream.getUsed()); cmd = reinterpret_cast(ptrOffset(stream.getCpuBase(), lriSize)); expectedCmd.setDataDword(DwordBuilder::build(gen11HdcModeRegister::forceNonCoherentEnableBit, true, false)); EXPECT_TRUE(memcmp(&expectedCmd, cmd, lriSize) == 0); } struct Gen11CoherencyProgramingTest : public Gen11CoherencyRequirements { void SetUp() override { Gen11CoherencyRequirements::SetUp(); startOffset = csr->commandStream.getUsed(); } void flushTask(bool coherencyRequired) { flags.requiresCoherency = coherencyRequired; auto graphicAlloc = csr->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); IndirectHeap stream(graphicAlloc); startOffset = csr->commandStream.getUsed(); csr->flushTask(stream, 0, stream, stream, stream, 0, flags, *device); csr->getMemoryManager()->freeGraphicsMemory(graphicAlloc); }; void findMmio(bool expectToBeProgrammed, uint32_t registerAddress) { HardwareParse hwParser; hwParser.parseCommands(csr->commandStream, startOffset); bool foundOne = false; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { auto cmd = genCmdCast(*it); if (cmd && cmd->getRegisterOffset() == registerAddress) { EXPECT_FALSE(foundOne); foundOne = true; } } EXPECT_EQ(expectToBeProgrammed, foundOne); }; void findMmio(bool expectToBeProgrammed) { findMmio(expectToBeProgrammed, gen11HdcModeRegister::address); } size_t startOffset; }; GEN11TEST_F(Gen11CoherencyProgramingTest, givenCsrWhenFlushFirstTaskWithoutCoherencyRequiredThenProgramMmio) { flushTask(false); findMmio(true); } GEN11TEST_F(Gen11CoherencyProgramingTest, givenCsrWhenFlushFirstTaskWithCoherencyRequiredThenProgramMmio) { flushTask(true); findMmio(true); } GEN11TEST_F(Gen11CoherencyProgramingTest, givenCsrWithFlushedFirstTaskWithCoherencyRequiredWhenFlushNextTaskWithoutChangingCoherencyRequirementThenDoNotProgramMmio) { flushTask(true); flushTask(true); findMmio(false); } GEN11TEST_F(Gen11CoherencyProgramingTest, givenCsrWithFlushedFirstTaskWithoutCoherencyRequiredWhenFlushNextTaskWithoutChangingCoherencyRequirementThenDoNotProgramMmio) { flushTask(false); flushTask(false); findMmio(false); } GEN11TEST_F(Gen11CoherencyProgramingTest, givenCsrWithFlushedFirstTaskWithCoherencyRequiredWhenFlushNextTaskWithChangingCoherencyRequirementThenProgramMmio) { flushTask(true); flushTask(false); findMmio(true); } GEN11TEST_F(Gen11CoherencyProgramingTest, givenCsrWithFlushedFirstTaskWithoutCoherencyRequiredWhenFlushNextTaskWithChangingCoherencyRequirementThenProgramMmio) { flushTask(false); flushTask(true); findMmio(true); } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/command_stream_receiver_hw_tests_gen11.cpp000066400000000000000000000033471363734646600330500ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/linear_stream.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" #include "gtest/gtest.h" #include "reg_configs_common.h" using namespace NEO; #include "opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests.inl" using CommandStreamReceiverHwTestGen11 = CommandStreamReceiverHwTest; GEN11TEST_F(CommandStreamReceiverHwTestGen11, GivenKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3Config) { givenKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3ConfigImpl(); } GEN11TEST_F(CommandStreamReceiverHwTestGen11, GivenBlockedKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3ConfigAfterUnblocking) { givenBlockedKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3ConfigAfterUnblockingImpl(); } GEN11TEST_F(CommandStreamReceiverHwTestGen11, whenProgrammingMiSemaphoreWaitThenSetRegisterPollModeMemoryPoll) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; MI_SEMAPHORE_WAIT miSemaphoreWait = FamilyType::cmdInitMiSemaphoreWait; EXPECT_EQ(MI_SEMAPHORE_WAIT::REGISTER_POLL_MODE::REGISTER_POLL_MODE_MEMORY_POLL, miSemaphoreWait.getRegisterPollMode()); } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/ehl/000077500000000000000000000000001363734646600233155ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen11/ehl/CMakeLists.txt000066400000000000000000000007611363734646600260610ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_EHL) set(IGDRCL_SRCS_tests_gen11_ehl ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_ehl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_ehl.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen11_ehl}) add_subdirectories() neo_copy_test_files(copy_test_files_ehl ehl) add_dependencies(unit_tests copy_test_files_ehl) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen11/ehl/linux/000077500000000000000000000000001363734646600244545ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen11/ehl/linux/CMakeLists.txt000066400000000000000000000005271363734646600272200ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen11_ehl_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_ehl.cpp ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen11_ehl_linux}) add_subdirectory(dll) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen11/ehl/linux/dll/000077500000000000000000000000001363734646600252275ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen11/ehl/linux/dll/CMakeLists.txt000066400000000000000000000004711363734646600277710ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests_gen11_ehl ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_id_tests_ehl.cpp ) target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_gen11_ehl}) compute-runtime-20.13.16352/opencl/test/unit_test/gen11/ehl/linux/dll/device_id_tests_ehl.cpp000066400000000000000000000033251363734646600317230ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_neo.h" #include "test.h" #include using namespace NEO; TEST(EhlDeviceIdTest, supportedDeviceId) { std::array expectedDescriptors = {{ {IEHL_1x4x8_SUPERSKU_DEVICE_A0_ID, &EHL_1x4x8::hwInfo, &EHL_1x4x8::setupHardwareInfo, GTTYPE_GT1}, {IEHL_1x2x4_DEVICE_A0_ID, &EHL_1x2x4::hwInfo, &EHL_1x2x4::setupHardwareInfo, GTTYPE_GT1}, {IEHL_1x4x4_DEVICE_A0_ID, &EHL_1x4x4::hwInfo, &EHL_1x4x4::setupHardwareInfo, GTTYPE_GT1}, {IEHL_1x4x8_DEVICE_A0_ID, &EHL_1x4x8::hwInfo, &EHL_1x4x8::setupHardwareInfo, GTTYPE_GT1}, {IJSL_1x4x4_DEVICE_B0_ID, &EHL_1x4x4::hwInfo, &EHL_1x4x4::setupHardwareInfo, GTTYPE_GT1}, {IJSL_1x4x6_DEVICE_B0_ID, &EHL_1x4x6::hwInfo, &EHL_1x4x6::setupHardwareInfo, GTTYPE_GT1}, {IJSL_1x4x8_DEVICE_B0_ID, &EHL_1x4x8::hwInfo, &EHL_1x4x8::setupHardwareInfo, GTTYPE_GT1}, }}; auto compareStructs = [](const DeviceDescriptor *first, const DeviceDescriptor *second) { return first->deviceId == second->deviceId && first->pHwInfo == second->pHwInfo && first->setupHardwareInfo == second->setupHardwareInfo && first->eGtType == second->eGtType; }; size_t startIndex = 0; while (!compareStructs(&expectedDescriptors[0], &deviceDescriptorTable[startIndex]) && deviceDescriptorTable[startIndex].deviceId != 0) { startIndex++; }; EXPECT_NE(0u, deviceDescriptorTable[startIndex].deviceId); for (auto &expected : expectedDescriptors) { EXPECT_TRUE(compareStructs(&expected, &deviceDescriptorTable[startIndex])); startIndex++; } } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/ehl/linux/hw_info_config_tests_ehl.cpp000066400000000000000000000062441363734646600322160ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" using namespace NEO; struct HwInfoConfigTestLinuxEhl : HwInfoConfigTestLinux { void SetUp() override { HwInfoConfigTestLinux::SetUp(); drm->StoredDeviceID = IEHL_1x4x8_SUPERSKU_DEVICE_A0_ID; drm->setGtType(GTTYPE_GT1); drm->StoredSSVal = 8; } }; EHLTEST_F(HwInfoConfigTestLinuxEhl, configureHwInfoEhl) { auto hwInfoConfig = HwInfoConfigHw::get(); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->StoredDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->StoredDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->StoredEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->StoredSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(1u, outHwInfo.gtSystemInfo.SliceCount); EXPECT_EQ(GTTYPE_GT1, outHwInfo.platform.eGTType); EXPECT_TRUE(outHwInfo.featureTable.ftrGT1); EXPECT_FALSE(outHwInfo.featureTable.ftrGT1_5); EXPECT_FALSE(outHwInfo.featureTable.ftrGT2); EXPECT_FALSE(outHwInfo.featureTable.ftrGT3); EXPECT_FALSE(outHwInfo.featureTable.ftrGT4); EXPECT_FALSE(outHwInfo.featureTable.ftrGTA); EXPECT_FALSE(outHwInfo.featureTable.ftrGTC); EXPECT_FALSE(outHwInfo.featureTable.ftrGTX); EXPECT_FALSE(outHwInfo.featureTable.ftrTileY); } EHLTEST_F(HwInfoConfigTestLinuxEhl, negative) { auto hwInfoConfig = HwInfoConfigHw::get(); drm->StoredRetValForDeviceID = -1; int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->StoredRetValForDeviceID = 0; drm->StoredRetValForDeviceRevID = -1; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->StoredRetValForDeviceRevID = 0; drm->StoredRetValForEUVal = -1; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->StoredRetValForEUVal = 0; drm->StoredRetValForSSVal = -1; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } template class EhlHwInfoTests : public ::testing::Test {}; typedef ::testing::Types ehlTestTypes; TYPED_TEST_CASE(EhlHwInfoTests, ehlTestTypes); TYPED_TEST(EhlHwInfoTests, gtSetupIsCorrect) { HardwareInfo hwInfo; DrmMock drm; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; DeviceDescriptor device = {0, &hwInfo, &TypeParam::setupHardwareInfo, GTTYPE_GT1}; int ret = drm.setupHardwareInfo(&device, false); EXPECT_EQ(ret, 0); EXPECT_GT(gtSystemInfo.EUCount, 0u); EXPECT_GT(gtSystemInfo.ThreadCount, 0u); EXPECT_GT(gtSystemInfo.SliceCount, 0u); EXPECT_GT(gtSystemInfo.SubSliceCount, 0u); EXPECT_GT_VAL(gtSystemInfo.L3CacheSizeInKb, 0u); EXPECT_EQ(gtSystemInfo.CsrSizeInMb, 8u); EXPECT_FALSE(gtSystemInfo.IsDynamicallyPopulated); } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/ehl/test_device_caps_ehl.cpp000066400000000000000000000016221363734646600301560ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; using EhlTest = Test; EHLTEST_F(EhlTest, givenDeviceIdWhenAskingForSimulationThenReturnValidValue) { unsigned short ehlSimulationIds[2] = { IEHL_1x4x8_SUPERSKU_DEVICE_A0_ID, 0, // default, non-simulation }; for (auto id : ehlSimulationIds) { auto mockDevice = std::unique_ptr(createWithUsDeviceId(id)); EXPECT_NE(nullptr, mockDevice); if (id == 0) { EXPECT_FALSE(mockDevice->isSimulation()); } else { EXPECT_TRUE(mockDevice->isSimulation()); } } } EHLTEST_F(EhlTest, givenEhlWhenSlmSizeIsRequiredThenReturnCorrectValue) { EXPECT_EQ(64u, pDevice->getHardwareInfo().capabilityTable.slmSize); } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/ehl/test_hw_info_config_ehl.cpp000066400000000000000000000067071363734646600307000ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test.h" using namespace NEO; TEST(EhlHwInfoConfig, givenHwInfoErrorneousConfigString) { if (IGFX_ELKHARTLAKE != productFamily) { return; } HardwareInfo hwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; uint64_t config = 0xdeadbeef; gtSystemInfo = {0}; EXPECT_ANY_THROW(hardwareInfoSetup[productFamily](&hwInfo, false, config)); EXPECT_EQ(0u, gtSystemInfo.SliceCount); EXPECT_EQ(0u, gtSystemInfo.SubSliceCount); EXPECT_EQ(0u, gtSystemInfo.EUCount); } using EhlHwInfo = ::testing::Test; EHLTEST_F(EhlHwInfo, givenHwInfoConfigStringThenAfterSetupResultingVmeIsDisabled) { HardwareInfo hwInfo; uint64_t config = 0x100040008; hardwareInfoSetup[productFamily](&hwInfo, false, config); EXPECT_FALSE(hwInfo.capabilityTable.ftrSupportsVmeAvcTextureSampler); EXPECT_FALSE(hwInfo.capabilityTable.ftrSupportsVmeAvcPreemption); EXPECT_FALSE(hwInfo.capabilityTable.supportsVme); } EHLTEST_F(EhlHwInfo, givenBoolWhenCallEhlHardwareInfoSetupThenFeatureTableAndWorkaroundTableAreSetCorrect) { bool boolValue[]{ true, false}; HardwareInfo hwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; FeatureTable &featureTable = hwInfo.featureTable; WorkaroundTable &workaroundTable = hwInfo.workaroundTable; uint64_t configs[] = { 0x100040008, 0x100040006, 0x100040004, 0x100020004}; for (auto config : configs) { for (auto setParamBool : boolValue) { gtSystemInfo = {0}; featureTable = {}; workaroundTable = {}; hardwareInfoSetup[productFamily](&hwInfo, setParamBool, config); EXPECT_EQ(setParamBool, featureTable.ftrL3IACoherency); EXPECT_EQ(setParamBool, featureTable.ftrPPGTT); EXPECT_EQ(setParamBool, featureTable.ftrSVM); EXPECT_EQ(setParamBool, featureTable.ftrIA32eGfxPTEs); EXPECT_EQ(setParamBool, featureTable.ftrStandardMipTailFormat); EXPECT_EQ(setParamBool, featureTable.ftrDisplayYTiling); EXPECT_EQ(setParamBool, featureTable.ftrTranslationTable); EXPECT_EQ(setParamBool, featureTable.ftrUserModeTranslationTable); EXPECT_EQ(setParamBool, featureTable.ftrTileMappedResource); EXPECT_EQ(setParamBool, featureTable.ftrEnableGuC); EXPECT_EQ(setParamBool, featureTable.ftrFbc); EXPECT_EQ(setParamBool, featureTable.ftrFbc2AddressTranslation); EXPECT_EQ(setParamBool, featureTable.ftrFbcBlitterTracking); EXPECT_EQ(setParamBool, featureTable.ftrFbcCpuTracking); EXPECT_EQ(setParamBool, featureTable.ftrTileY); EXPECT_EQ(setParamBool, featureTable.ftrAstcHdr2D); EXPECT_EQ(setParamBool, featureTable.ftrAstcLdr2D); EXPECT_EQ(setParamBool, featureTable.ftr3dMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.ftrGpGpuMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.ftrGpGpuMidThreadLevelPreempt); EXPECT_EQ(setParamBool, featureTable.ftrGpGpuThreadGroupLevelPreempt); EXPECT_EQ(setParamBool, featureTable.ftrPerCtxtPreemptionGranularityControl); EXPECT_EQ(setParamBool, workaroundTable.wa4kAlignUVOffsetNV12LinearSurface); EXPECT_EQ(setParamBool, workaroundTable.waReportPerfCountUseGlobalContextID); } } } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/ehl/windows/000077500000000000000000000000001363734646600250075ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen11/ehl/windows/CMakeLists.txt000066400000000000000000000005041363734646600275460ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen11_ehl_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_ehl.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen11_ehl_windows}) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen11/ehl/windows/hw_info_config_tests_ehl.cpp000066400000000000000000000012371363734646600325460ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/windows/os_interface.h" #include "opencl/test/unit_test/os_interface/windows/hw_info_config_win_tests.h" using namespace NEO; using HwInfoConfigTestWindowsEhl = HwInfoConfigTestWindows; EHLTEST_F(HwInfoConfigTestWindowsEhl, whenCallAdjustPlatformThenDoNothing) { EXPECT_EQ(IGFX_ELKHARTLAKE, productFamily); auto hwInfoConfig = HwInfoConfig::get(productFamily); hwInfoConfig->adjustPlatformForProductFamily(&outHwInfo); int ret = memcmp(&outHwInfo.platform, &pInHwInfo.platform, sizeof(PLATFORM)); EXPECT_EQ(0, ret); } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/enqueue_kernel_gen11.cpp000066400000000000000000000047311363734646600272600ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/helpers/static_size3.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" #include "reg_configs_common.h" namespace NEO { using Gen11EnqueueTest = Test; GEN11TEST_F(Gen11EnqueueTest, givenKernelRequiringIndependentForwardProgressWhenKernelIsSubmittedThenDefaultPolicyIsProgrammed) { MockContext mc; CommandQueueHw cmdQ{&mc, pClDevice, 0, false}; SPatchExecutionEnvironment executionEnvironment = {}; executionEnvironment.SubgroupIndependentForwardProgressRequired = true; MockKernelWithInternals mockKernel(*pClDevice, executionEnvironment); cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, StatickSize3<1, 1, 1>(), nullptr, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(cmdQ); auto cmd = findMmioCmd(hwParser.cmdList.begin(), hwParser.cmdList.end(), RowChickenReg4::address); ASSERT_NE(nullptr, cmd); EXPECT_EQ(RowChickenReg4::regDataForArbitrationPolicy[PreambleHelper::getDefaultThreadArbitrationPolicy()], cmd->getDataDword()); EXPECT_EQ(1U, countMmio(hwParser.cmdList.begin(), hwParser.cmdList.end(), RowChickenReg4::address)); } GEN11TEST_F(Gen11EnqueueTest, givenKernelNotRequiringIndependentForwardProgressWhenKernelIsSubmittedThenAgeBasedPolicyIsProgrammed) { MockContext mc; CommandQueueHw cmdQ{&mc, pClDevice, 0, false}; SPatchExecutionEnvironment executionEnvironment = {}; executionEnvironment.SubgroupIndependentForwardProgressRequired = false; MockKernelWithInternals mockKernel(*pClDevice, executionEnvironment); cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, StatickSize3<1, 1, 1>(), nullptr, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(cmdQ); auto cmd = findMmioCmd(hwParser.cmdList.begin(), hwParser.cmdList.end(), RowChickenReg4::address); ASSERT_NE(nullptr, cmd); EXPECT_EQ(RowChickenReg4::regDataForArbitrationPolicy[ThreadArbitrationPolicy::AgeBased], cmd->getDataDword()); EXPECT_EQ(1U, countMmio(hwParser.cmdList.begin(), hwParser.cmdList.end(), RowChickenReg4::address)); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/gen11/enqueue_media_kernel_gen11.cpp000066400000000000000000000134451363734646600304210ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/pipeline_select_helper.h" #include "shared/source/helpers/preamble.h" #include "opencl/test/unit_test/fixtures/media_kernel_fixture.h" #include "test.h" using namespace NEO; typedef MediaKernelFixture MediaKernelTest; auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits | pipelineSelectMediaSamplerPowerClockGateMaskBits; GEN11TEST_F(MediaKernelTest, givenGen11CsrWhenEnqueueBlockedVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) { typedef typename ICLFamily::PIPELINE_SELECT PIPELINE_SELECT; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; UserEvent userEvent(context); cl_event blockedEvent = &userEvent; auto retVal = pCmdQ->enqueueKernel( pVmeKernel, workDim, globalWorkOffset, globalWorkSize, nullptr, 1, &blockedEvent, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); userEvent.setStatus(CL_COMPLETE); parseCommands(*pCmdQ); ASSERT_NE(cmdPipelineSelect, nullptr); auto *pCmd = genCmdCast(cmdPipelineSelect); auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); EXPECT_FALSE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN11TEST_F(MediaKernelTest, givenGen11CsrWhenEnqueueBlockedNonVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) { typedef typename ICLFamily::PIPELINE_SELECT PIPELINE_SELECT; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; UserEvent userEvent(context); cl_event blockedEvent = &userEvent; auto retVal = pCmdQ->enqueueKernel( pKernel, workDim, globalWorkOffset, globalWorkSize, nullptr, 1, &blockedEvent, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); userEvent.setStatus(CL_COMPLETE); parseCommands(*pCmdQ); ASSERT_NE(cmdPipelineSelect, nullptr); auto *pCmd = genCmdCast(cmdPipelineSelect); auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); EXPECT_TRUE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN11TEST_F(MediaKernelTest, givenGen11CsrWhenEnqueueVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) { typedef typename ICLFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueVmeKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); auto pCmd = getCommand(); auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); EXPECT_FALSE(pCmd->getMediaSamplerDopClockGateEnable()); EXPECT_EQ(1u, pCmd->getMediaSamplerPowerClockGateDisable()); } GEN11TEST_F(MediaKernelTest, givenGen11CsrWhenEnqueueNonVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) { typedef typename ICLFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueRegularKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); auto pCmd = getCommand(); auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); EXPECT_TRUE(pCmd->getMediaSamplerDopClockGateEnable()); EXPECT_EQ(0u, pCmd->getMediaSamplerPowerClockGateDisable()); } GEN11TEST_F(MediaKernelTest, givenGen11CsrWhenEnqueueVmeKernelTwiceThenProgramPipelineSelectOnce) { typedef typename ICLFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueVmeKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); } GEN11TEST_F(MediaKernelTest, givenGen11CsrWhenEnqueueNonVmeKernelTwiceThenProgramPipelineSelectOnce) { typedef typename ICLFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueVmeKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); } GEN11TEST_F(MediaKernelTest, givenGen11CsrWhenEnqueueVmeKernelAfterNonVmeKernelThenProgramPipelineSelectionAndMediaSamplerTwice) { typedef typename ICLFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueRegularKernel(); enqueueVmeKernel(); auto commands = getCommandsList(); EXPECT_EQ(2u, commands.size()); auto pCmd = static_cast(commands.back()); EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_FALSE(pCmd->getMediaSamplerDopClockGateEnable()); EXPECT_EQ(1u, pCmd->getMediaSamplerPowerClockGateDisable()); } GEN11TEST_F(MediaKernelTest, givenGen11CsrWhenEnqueueNonVmeKernelAfterVmeKernelThenProgramProgramPipelineSelectionAndMediaSamplerTwice) { typedef typename ICLFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueVmeKernel(); enqueueRegularKernel(); auto commands = getCommandsList(); EXPECT_EQ(2u, commands.size()); auto pCmd = static_cast(commands.back()); EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_TRUE(pCmd->getMediaSamplerDopClockGateEnable()); EXPECT_EQ(0u, pCmd->getMediaSamplerPowerClockGateDisable()); } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/hw_helper_tests_gen11.cpp000066400000000000000000000042341363734646600274460ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/helpers/get_gpgpu_engines_tests.inl" #include "opencl/test/unit_test/helpers/hw_helper_tests.h" using HwHelperTestGen11 = HwHelperTest; GEN11TEST_F(HwHelperTestGen11, getMaxBarriersPerSliceReturnsCorrectSize) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_EQ(32u, helper.getMaxBarrierRegisterPerSlice()); } GEN11TEST_F(HwHelperTestGen11, setCapabilityCoherencyFlag) { auto &helper = HwHelper::get(renderCoreFamily); bool coherency = false; helper.setCapabilityCoherencyFlag(&hardwareInfo, coherency); EXPECT_TRUE(coherency); } GEN11TEST_F(HwHelperTestGen11, getPitchAlignmentForImage) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_EQ(4u, helper.getPitchAlignmentForImage(&hardwareInfo)); } GEN11TEST_F(HwHelperTestGen11, adjustDefaultEngineType) { auto engineType = hardwareInfo.capabilityTable.defaultEngineType; auto &helper = HwHelper::get(renderCoreFamily); helper.adjustDefaultEngineType(&hardwareInfo); EXPECT_EQ(engineType, hardwareInfo.capabilityTable.defaultEngineType); } GEN11TEST_F(HwHelperTestGen11, givenGen11PlatformWhenSetupHardwareCapabilitiesIsCalledThenDefaultImplementationIsUsed) { auto &helper = HwHelper::get(renderCoreFamily); // Test default method implementation testDefaultImplementationOfSetupHardwareCapabilities(helper, hardwareInfo); } GEN11TEST_F(HwHelperTestGen11, whenGetGpgpuEnginesThenReturnThreeRcsEngines) { whenGetGpgpuEnginesThenReturnTwoRcsEngines(pDevice->getHardwareInfo()); EXPECT_EQ(3u, pDevice->engines.size()); } using MemorySynchronizatiopCommandsTestsGen11 = ::testing::Test; GEN11TEST_F(MemorySynchronizatiopCommandsTestsGen11, WhenProgrammingCacheFlushThenExpectConstantCacheFieldSet) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; std::unique_ptr buffer(new uint8_t[128]); LinearStream stream(buffer.get(), 128); PIPE_CONTROL *pipeControl = MemorySynchronizationCommands::addFullCacheFlush(stream); EXPECT_TRUE(pipeControl->getConstantCacheInvalidationEnable()); } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/icllp/000077500000000000000000000000001363734646600236505ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen11/icllp/CMakeLists.txt000066400000000000000000000012021363734646600264030ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_ICLLP) set(IGDRCL_SRCS_tests_gen11_icllp ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_icllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_icllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_media_kernel_icllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_program_media_sampler_icllp.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen11_icllp}) add_subdirectories() neo_copy_test_files(copy_test_files_icllp icllp) add_dependencies(unit_tests copy_test_files_icllp) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen11/icllp/linux/000077500000000000000000000000001363734646600250075ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen11/icllp/linux/CMakeLists.txt000066400000000000000000000005351363734646600275520ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen11_icllp_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_icllp.cpp ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen11_icllp_linux}) add_subdirectory(dll) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen11/icllp/linux/dll/000077500000000000000000000000001363734646600255625ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen11/icllp/linux/dll/CMakeLists.txt000066400000000000000000000004771363734646600303320ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests_gen11_icllp ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_id_tests_icllp.cpp ) target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_gen11_icllp}) compute-runtime-20.13.16352/opencl/test/unit_test/gen11/icllp/linux/dll/device_id_tests_icllp.cpp000066400000000000000000000036411363734646600326120ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_neo.h" #include "test.h" #include using namespace NEO; TEST(IcllpDeviceIdTest, supportedDeviceId) { std::array expectedDescriptors = {{ {IICL_LP_GT1_MOB_DEVICE_F0_ID, &ICLLP_1x4x8::hwInfo, &ICLLP_1x4x8::setupHardwareInfo, GTTYPE_GT1}, {IICL_LP_1x4x8_LOW_MEDIA_ULT_DEVICE_F0_ID, &ICLLP_1x4x8::hwInfo, &ICLLP_1x4x8::setupHardwareInfo, GTTYPE_GT1}, {IICL_LP_1x4x8_LOW_MEDIA_ULX_DEVICE_F0_ID, &ICLLP_1x4x8::hwInfo, &ICLLP_1x4x8::setupHardwareInfo, GTTYPE_GT1}, {IICL_LP_1x6x8_ULX_DEVICE_F0_ID, &ICLLP_1x6x8::hwInfo, &ICLLP_1x6x8::setupHardwareInfo, GTTYPE_GT1}, {IICL_LP_1x6x8_ULT_DEVICE_F0_ID, &ICLLP_1x6x8::hwInfo, &ICLLP_1x6x8::setupHardwareInfo, GTTYPE_GT1}, {IICL_LP_1x8x8_SUPERSKU_DEVICE_F0_ID, &ICLLP_1x8x8::hwInfo, &ICLLP_1x8x8::setupHardwareInfo, GTTYPE_GT2}, {IICL_LP_1x8x8_ULT_DEVICE_F0_ID, &ICLLP_1x8x8::hwInfo, &ICLLP_1x8x8::setupHardwareInfo, GTTYPE_GT2}, {IICL_LP_1x8x8_ULX_DEVICE_F0_ID, &ICLLP_1x8x8::hwInfo, &ICLLP_1x8x8::setupHardwareInfo, GTTYPE_GT2}, }}; auto compareStructs = [](const DeviceDescriptor *first, const DeviceDescriptor *second) { return first->deviceId == second->deviceId && first->pHwInfo == second->pHwInfo && first->setupHardwareInfo == second->setupHardwareInfo && first->eGtType == second->eGtType; }; size_t startIndex = 0; while (!compareStructs(&expectedDescriptors[0], &deviceDescriptorTable[startIndex]) && deviceDescriptorTable[startIndex].deviceId != 0) { startIndex++; }; EXPECT_NE(0u, deviceDescriptorTable[startIndex].deviceId); for (auto &expected : expectedDescriptors) { EXPECT_TRUE(compareStructs(&expected, &deviceDescriptorTable[startIndex])); startIndex++; } } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/icllp/linux/hw_info_config_tests_icllp.cpp000066400000000000000000000063721363734646600331060ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" using namespace NEO; struct HwInfoConfigTestLinuxIcllp : HwInfoConfigTestLinux { void SetUp() override { HwInfoConfigTestLinux::SetUp(); drm->StoredDeviceID = IICL_LP_GT1_MOB_DEVICE_F0_ID; drm->setGtType(GTTYPE_GT1); } }; ICLLPTEST_F(HwInfoConfigTestLinuxIcllp, configureHwInfoIcllp) { auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->StoredDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->StoredDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->StoredEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->StoredSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(1u, outHwInfo.gtSystemInfo.SliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); EXPECT_EQ(GTTYPE_GT1, outHwInfo.platform.eGTType); EXPECT_TRUE(outHwInfo.featureTable.ftrGT1); EXPECT_FALSE(outHwInfo.featureTable.ftrGT1_5); EXPECT_FALSE(outHwInfo.featureTable.ftrGT2); EXPECT_FALSE(outHwInfo.featureTable.ftrGT3); EXPECT_FALSE(outHwInfo.featureTable.ftrGT4); EXPECT_FALSE(outHwInfo.featureTable.ftrGTA); EXPECT_FALSE(outHwInfo.featureTable.ftrGTC); EXPECT_FALSE(outHwInfo.featureTable.ftrGTX); EXPECT_FALSE(outHwInfo.featureTable.ftrTileY); } ICLLPTEST_F(HwInfoConfigTestLinuxIcllp, negative) { auto hwInfoConfig = HwInfoConfig::get(productFamily); drm->StoredRetValForDeviceID = -1; int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->StoredRetValForDeviceID = 0; drm->StoredRetValForDeviceRevID = -1; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->StoredRetValForDeviceRevID = 0; drm->StoredRetValForEUVal = -1; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->StoredRetValForEUVal = 0; drm->StoredRetValForSSVal = -1; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } template class IcllpHwInfoTests : public ::testing::Test {}; typedef ::testing::Types icllpTestTypes; TYPED_TEST_CASE(IcllpHwInfoTests, icllpTestTypes); TYPED_TEST(IcllpHwInfoTests, gtSetupIsCorrect) { HardwareInfo hwInfo; DrmMock drm; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; DeviceDescriptor device = {0, &hwInfo, &TypeParam::setupHardwareInfo, GTTYPE_GT1}; int ret = drm.setupHardwareInfo(&device, false); EXPECT_EQ(ret, 0); EXPECT_GT(gtSystemInfo.EUCount, 0u); EXPECT_GT(gtSystemInfo.ThreadCount, 0u); EXPECT_GT(gtSystemInfo.SliceCount, 0u); EXPECT_GT(gtSystemInfo.SubSliceCount, 0u); EXPECT_GT_VAL(gtSystemInfo.L3CacheSizeInKb, 0u); EXPECT_EQ(gtSystemInfo.CsrSizeInMb, 5u); EXPECT_FALSE(gtSystemInfo.IsDynamicallyPopulated); } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/icllp/test_device_caps_icllp.cpp000066400000000000000000000034751363734646600310540ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; using IcllpTest = Test; ICLLPTEST_F(IcllpTest, givenIcllpWhenSlmSizeIsRequiredThenReturnCorrectValue) { EXPECT_EQ(64u, pDevice->getHardwareInfo().capabilityTable.slmSize); } ICLLPTEST_F(IcllpTest, givenIclLpWhenCheckFtrSupportsInteger64BitAtomicsThenReturnFalse) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsInteger64BitAtomics); } ICLLPTEST_F(IcllpTest, shouldPassOnIcllp) { EXPECT_EQ(IGFX_ICELAKE_LP, pDevice->getHardwareInfo().platform.eProductFamily); } ICLLPTEST_F(IcllpTest, lpSkusDontSupportFP64) { const auto &caps = pClDevice->getDeviceInfo(); std::string extensionString = caps.deviceExtensions; EXPECT_EQ(std::string::npos, extensionString.find(std::string("cl_khr_fp64"))); EXPECT_EQ(0u, caps.doubleFpConfig); } ICLLPTEST_F(IcllpTest, lpSkusDontSupportCorrectlyRoundedDivideSqrt) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_EQ(0u, caps.singleFpConfig & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT); } ICLLPTEST_F(IcllpTest, isSimulationCap) { unsigned short iclLpSimulationIds[2] = { IICL_LP_GT1_MOB_DEVICE_F0_ID, 0, // default, non-simulation }; NEO::MockDevice *mockDevice = nullptr; for (auto id : iclLpSimulationIds) { mockDevice = createWithUsDeviceId(id); ASSERT_NE(mockDevice, nullptr); if (id == 0) EXPECT_FALSE(mockDevice->isSimulation()); else EXPECT_TRUE(mockDevice->isSimulation()); delete mockDevice; } } ICLLPTEST_F(IcllpTest, GivenICLLPWhenCheckftr64KBpagesThenFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftr64KBpages); } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/icllp/test_hw_info_config_icllp.cpp000066400000000000000000000060731363734646600315620ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test.h" using namespace NEO; TEST(IcllpHwInfoConfig, givenHwInfoErrorneousConfigString) { if (IGFX_ICELAKE_LP != productFamily) { return; } HardwareInfo hwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; gtSystemInfo = {0}; uint64_t config = 0xdeadbeef; gtSystemInfo = {0}; EXPECT_ANY_THROW(hardwareInfoSetup[productFamily](&hwInfo, false, config)); EXPECT_EQ(0u, gtSystemInfo.SliceCount); EXPECT_EQ(0u, gtSystemInfo.SubSliceCount); EXPECT_EQ(0u, gtSystemInfo.EUCount); } using IcllpHwInfo = ::testing::Test; ICLLPTEST_F(IcllpHwInfo, givenBoolWhenCallIcllpHardwareInfoSetupThenFeatureTableAndWorkaroundTableAreSetCorrect) { uint64_t configs[] = { 0x100080008, 0x100040008, 0x100060008}; bool boolValue[]{ true, false}; HardwareInfo hwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; FeatureTable &featureTable = hwInfo.featureTable; WorkaroundTable &workaroundTable = hwInfo.workaroundTable; for (auto config : configs) { for (auto setParamBool : boolValue) { gtSystemInfo = {0}; featureTable = {}; workaroundTable = {}; hardwareInfoSetup[productFamily](&hwInfo, setParamBool, config); EXPECT_EQ(setParamBool, featureTable.ftrL3IACoherency); EXPECT_EQ(setParamBool, featureTable.ftrPPGTT); EXPECT_EQ(setParamBool, featureTable.ftrSVM); EXPECT_EQ(setParamBool, featureTable.ftrIA32eGfxPTEs); EXPECT_EQ(setParamBool, featureTable.ftrStandardMipTailFormat); EXPECT_EQ(setParamBool, featureTable.ftrDisplayYTiling); EXPECT_EQ(setParamBool, featureTable.ftrTranslationTable); EXPECT_EQ(setParamBool, featureTable.ftrUserModeTranslationTable); EXPECT_EQ(setParamBool, featureTable.ftrTileMappedResource); EXPECT_EQ(setParamBool, featureTable.ftrEnableGuC); EXPECT_EQ(setParamBool, featureTable.ftrFbc); EXPECT_EQ(setParamBool, featureTable.ftrFbc2AddressTranslation); EXPECT_EQ(setParamBool, featureTable.ftrFbcBlitterTracking); EXPECT_EQ(setParamBool, featureTable.ftrFbcCpuTracking); EXPECT_EQ(setParamBool, featureTable.ftrTileY); EXPECT_EQ(setParamBool, featureTable.ftrAstcHdr2D); EXPECT_EQ(setParamBool, featureTable.ftrAstcLdr2D); EXPECT_EQ(setParamBool, featureTable.ftr3dMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.ftrGpGpuMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.ftrGpGpuMidThreadLevelPreempt); EXPECT_EQ(setParamBool, featureTable.ftrGpGpuThreadGroupLevelPreempt); EXPECT_EQ(setParamBool, featureTable.ftrPerCtxtPreemptionGranularityControl); EXPECT_EQ(setParamBool, workaroundTable.wa4kAlignUVOffsetNV12LinearSurface); EXPECT_EQ(setParamBool, workaroundTable.waReportPerfCountUseGlobalContextID); } } } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/icllp/test_media_kernel_icllp.cpp000066400000000000000000000055061363734646600312230ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/pipeline_select_helper.h" #include "shared/source/helpers/preamble.h" #include "opencl/test/unit_test/fixtures/media_kernel_fixture.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "test.h" using namespace NEO; typedef MediaKernelFixture MediaKernelTest; ICLLPTEST_F(MediaKernelTest, givenIcllpDefaultLastVmeSubsliceConfigIsFalse) { auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); EXPECT_FALSE(csr->lastVmeSubslicesConfig); } ICLLPTEST_F(MediaKernelTest, givenIcllpCSRWhenEnqueueVmeKernelThenVmeSubslicesConfigChangesToTrue) { auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); enqueueVmeKernel(); EXPECT_TRUE(csr->lastVmeSubslicesConfig); } ICLLPTEST_F(MediaKernelTest, givenIcllpCSRWhenEnqueueRegularKernelAfterVmeKernelThenVmeSubslicesConfigChangesToFalse) { auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); enqueueVmeKernel(); enqueueRegularKernel(); EXPECT_FALSE(csr->lastVmeSubslicesConfig); } ICLLPTEST_F(MediaKernelTest, givenIcllpCSRWhenEnqueueRegularKernelThenVmeSubslicesConfigDoesntChangeToTrue) { auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); enqueueRegularKernel(); EXPECT_FALSE(csr->lastVmeSubslicesConfig); } ICLLPTEST_F(MediaKernelTest, givenIcllpCSRWhenEnqueueRegularKernelAfterRegularKernelThenVmeSubslicesConfigDoesntChangeToTrue) { auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); enqueueRegularKernel(); enqueueRegularKernel(); EXPECT_FALSE(csr->lastVmeSubslicesConfig); } ICLLPTEST_F(MediaKernelTest, givenIcllpCSRWhenEnqueueVmeKernelAfterRegularKernelThenVmeSubslicesConfigChangesToTrue) { auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); enqueueRegularKernel(); enqueueVmeKernel(); EXPECT_TRUE(csr->lastVmeSubslicesConfig); } ICLLPTEST_F(MediaKernelTest, icllpCmdSizeForVme) { typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); size_t programVmeCmdSize = sizeof(MI_LOAD_REGISTER_IMM) + 2 * sizeof(PIPE_CONTROL); EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(false)); EXPECT_EQ(programVmeCmdSize, csr->getCmdSizeForMediaSampler(true)); } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/icllp/test_program_media_sampler_icllp.cpp000066400000000000000000000175431363734646600331410ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/gen11/reg_configs.h" #include "shared/source/helpers/hw_helper.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/test/unit_test/helpers/dispatch_flags_helper.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "test.h" using namespace NEO; struct Gen11MediaSamplerProgramingTest : public ::testing::Test { typedef typename ICLFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; typedef typename ICLFamily::PIPE_CONTROL PIPE_CONTROL; struct myCsr : public CommandStreamReceiverHw { using CommandStreamReceiver::commandStream; using CommandStreamReceiverHw::programMediaSampler; myCsr(ExecutionEnvironment &executionEnvironment) : CommandStreamReceiverHw(executionEnvironment, 0){}; void overrideLastVmeSubliceConfig(bool value) { lastVmeSubslicesConfig = value; } }; void overrideMediaRequest(bool lastVmeConfig, bool mediaSamplerRequired) { csr->overrideLastVmeSubliceConfig(lastVmeConfig); flags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired; } void SetUp() override { device.reset(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); csr = new myCsr(*device->executionEnvironment); device->resetCommandStreamReceiver(csr); stream.reset(new LinearStream(buff, MemoryConstants::pageSize)); } void programMediaSampler() { csr->programMediaSampler(*stream, flags); } size_t getCmdSize() { return csr->getCmdSizeForMediaSampler(flags.pipelineSelectArgs.mediaSamplerRequired); } myCsr *csr = nullptr; std::unique_ptr device; DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); char buff[MemoryConstants::pageSize]; std::unique_ptr stream; }; template void setFlushAllCaches(PIPE_CONTROL &pc) { pc.setDcFlushEnable(true); pc.setRenderTargetCacheFlushEnable(true); pc.setInstructionCacheInvalidateEnable(true); pc.setTextureCacheInvalidationEnable(true); pc.setPipeControlFlushEnable(true); pc.setVfCacheInvalidationEnable(true); pc.setConstantCacheInvalidationEnable(true); pc.setStateCacheInvalidationEnable(true); } ICLLPTEST_F(Gen11MediaSamplerProgramingTest, givenVmeEnableSubsliceDisabledWhenPowerClockStateRegisterEnableThenExpectCorrectCmdValues) { uint32_t programVmeCmdSize = sizeof(MI_LOAD_REGISTER_IMM) + 2 * sizeof(PIPE_CONTROL); overrideMediaRequest(false, true); size_t estimatedCmdSize = getCmdSize(); EXPECT_EQ(programVmeCmdSize, estimatedCmdSize); auto expectedMiLrCmd = FamilyType::cmdInitLoadRegisterImm; expectedMiLrCmd.setRegisterOffset(gen11PowerClockStateRegister::address); auto expectedRegValue = (device->getHardwareInfo().gtSystemInfo.SubSliceCount / 2) << gen11PowerClockStateRegister::subSliceCountShift; expectedRegValue |= (gen11PowerClockStateRegister::vmeSliceCount << gen11PowerClockStateRegister::sliceCountShift); expectedRegValue |= (device->getHardwareInfo().gtSystemInfo.MaxEuPerSubSlice << gen11PowerClockStateRegister::minEuCountShift); expectedRegValue |= (device->getHardwareInfo().gtSystemInfo.MaxEuPerSubSlice << gen11PowerClockStateRegister::maxEuCountShift); expectedRegValue |= gen11PowerClockStateRegister::enabledValue; expectedMiLrCmd.setDataDword(expectedRegValue); programMediaSampler(); ASSERT_EQ(programVmeCmdSize, stream->getUsed()); auto expectedPipeControlCmd = FamilyType::cmdInitPipeControl; expectedPipeControlCmd.setCommandStreamerStallEnable(0x1); setFlushAllCaches(expectedPipeControlCmd); auto pipeControlCmd = reinterpret_cast(stream->getCpuBase()); EXPECT_EQ(0, memcmp(&expectedPipeControlCmd, pipeControlCmd, sizeof(PIPE_CONTROL))); size_t cmdOffset = sizeof(PIPE_CONTROL); auto miLrCmd = reinterpret_cast(ptrOffset(stream->getCpuBase(), cmdOffset)); EXPECT_EQ(0, memcmp(&expectedMiLrCmd, miLrCmd, sizeof(MI_LOAD_REGISTER_IMM))); cmdOffset += sizeof(MI_LOAD_REGISTER_IMM); expectedPipeControlCmd = FamilyType::cmdInitPipeControl; expectedPipeControlCmd.setCommandStreamerStallEnable(0x1); pipeControlCmd = reinterpret_cast(ptrOffset(stream->getCpuBase(), cmdOffset)); EXPECT_EQ(0, memcmp(&expectedPipeControlCmd, pipeControlCmd, sizeof(PIPE_CONTROL))); } ICLLPTEST_F(Gen11MediaSamplerProgramingTest, givenVmeEnableSubsliceEnabledWhenPowerClockStateRegisterDisableThenExpectCorrectCmdValues) { constexpr uint32_t programVmeCmdSize = sizeof(MI_LOAD_REGISTER_IMM) + 3 * sizeof(PIPE_CONTROL); overrideMediaRequest(true, false); size_t estimatedCmdSize = getCmdSize(); EXPECT_EQ(programVmeCmdSize, estimatedCmdSize); programMediaSampler(); auto expectedMiLrCmd = FamilyType::cmdInitLoadRegisterImm; expectedMiLrCmd.setRegisterOffset(gen11PowerClockStateRegister::address); auto expectedRegValue = (device->getHardwareInfo().gtSystemInfo.SubSliceCount / 2) << gen11PowerClockStateRegister::subSliceCountShift; expectedRegValue |= ((device->getHardwareInfo().gtSystemInfo.SliceCount * 2) << gen11PowerClockStateRegister::sliceCountShift); expectedRegValue |= (device->getHardwareInfo().gtSystemInfo.MaxEuPerSubSlice << gen11PowerClockStateRegister::minEuCountShift); expectedRegValue |= (device->getHardwareInfo().gtSystemInfo.MaxEuPerSubSlice << gen11PowerClockStateRegister::maxEuCountShift); expectedRegValue |= gen11PowerClockStateRegister::disabledValue; expectedMiLrCmd.setDataDword(expectedRegValue); ASSERT_EQ(programVmeCmdSize, stream->getUsed()); auto expectedPipeControlCmd = FamilyType::cmdInitPipeControl; expectedPipeControlCmd.setCommandStreamerStallEnable(0x1); setFlushAllCaches(expectedPipeControlCmd); expectedPipeControlCmd.setGenericMediaStateClear(true); auto pipeControlCmd = reinterpret_cast(stream->getCpuBase()); EXPECT_EQ(0, memcmp(&expectedPipeControlCmd, pipeControlCmd, sizeof(PIPE_CONTROL))); size_t cmdOffset = sizeof(PIPE_CONTROL); pipeControlCmd = reinterpret_cast(ptrOffset(stream->getCpuBase(), cmdOffset)); expectedPipeControlCmd = FamilyType::cmdInitPipeControl; expectedPipeControlCmd.setCommandStreamerStallEnable(0x1); EXPECT_EQ(0, memcmp(&expectedPipeControlCmd, pipeControlCmd, sizeof(PIPE_CONTROL))); cmdOffset += sizeof(PIPE_CONTROL); auto miLrCmd = reinterpret_cast(ptrOffset(stream->getCpuBase(), cmdOffset)); EXPECT_EQ(0, memcmp(&expectedMiLrCmd, miLrCmd, sizeof(MI_LOAD_REGISTER_IMM))); cmdOffset += sizeof(MI_LOAD_REGISTER_IMM); pipeControlCmd = reinterpret_cast(ptrOffset(stream->getCpuBase(), cmdOffset)); EXPECT_EQ(0, memcmp(&expectedPipeControlCmd, pipeControlCmd, sizeof(PIPE_CONTROL))); } ICLLPTEST_F(Gen11MediaSamplerProgramingTest, givenVmeEnableSubsliceEnabledWhenPowerClockStateRegisterEnabledThenExpectNoCmds) { constexpr uint32_t programVmeCmdSize = 0; overrideMediaRequest(true, true); size_t estimatedCmdSize = getCmdSize(); EXPECT_EQ(programVmeCmdSize, estimatedCmdSize); programMediaSampler(); EXPECT_EQ(programVmeCmdSize, stream->getUsed()); } ICLLPTEST_F(Gen11MediaSamplerProgramingTest, givenVmeEnableSubsliceDisabledWhenPowerClockStateRegisterDisableThenExpectNoCmds) { constexpr uint32_t programVmeCmdSize = 0; overrideMediaRequest(false, false); size_t estimatedCmdSize = getCmdSize(); EXPECT_EQ(programVmeCmdSize, estimatedCmdSize); programMediaSampler(); EXPECT_EQ(programVmeCmdSize, stream->getUsed()); } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/icllp/windows/000077500000000000000000000000001363734646600253425ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen11/icllp/windows/CMakeLists.txt000066400000000000000000000005121363734646600301000ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen11_icllp_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_icllp.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen11_icllp_windows}) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen11/icllp/windows/hw_info_config_tests_icllp.cpp000066400000000000000000000013241363734646600334310ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/windows/os_interface.h" #include "opencl/test/unit_test/os_interface/windows/hw_info_config_win_tests.h" using namespace NEO; using namespace std; using HwInfoConfigTestWindowsIcllp = HwInfoConfigTestWindows; ICLLPTEST_F(HwInfoConfigTestWindowsIcllp, whenCallAdjustPlatformThenDoNothing) { EXPECT_EQ(IGFX_ICELAKE_LP, productFamily); auto hwInfoConfig = HwInfoConfig::get(productFamily); outHwInfo = pInHwInfo; hwInfoConfig->adjustPlatformForProductFamily(&outHwInfo); int ret = memcmp(&outHwInfo.platform, &pInHwInfo.platform, sizeof(PLATFORM)); EXPECT_EQ(0, ret); } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/image_tests_gen11.cpp000066400000000000000000000103231363734646600265470ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/image/image_surface_state.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" using namespace NEO; typedef ICLFamily::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; struct AppendSurfaceStateParamsTest : public ::testing::Test { void SetUp() override { surfaceState = ICLFamily::cmdInitRenderSurfaceState; EXPECT_EQ(RENDER_SURFACE_STATE::SAMPLE_TAP_DISCARD_DISABLE_DISABLE, surfaceState.getSampleTapDiscardDisable()); imageDesc.image_width = 32; imageDesc.image_height = 0; imageDesc.image_depth = 0; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; flags = CL_MEM_READ_WRITE; } void createImage() { auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); EXPECT_NE(nullptr, surfaceFormat); image.reset(Image::create(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); } cl_int retVal = CL_SUCCESS; MockContext context; std::unique_ptr image; cl_image_format imageFormat; cl_image_desc imageDesc; cl_mem_flags flags; RENDER_SURFACE_STATE surfaceState; }; GEN11TEST_F(AppendSurfaceStateParamsTest, givenImageFormatWithoutAlphaChannelWhenAppendSurfaceStateParamsThenTapDiscardConfigDoesntChange) { imageFormat.image_channel_data_type = CL_FLOAT; imageFormat.image_channel_order = CL_R; createImage(); auto imageHw = static_cast *>(image.get()); imageHw->appendSurfaceStateParams(&surfaceState); bool tapDiscardConfigChanged = RENDER_SURFACE_STATE::SAMPLE_TAP_DISCARD_DISABLE_DISABLE != surfaceState.getSampleTapDiscardDisable(); EXPECT_FALSE(tapDiscardConfigChanged); } GEN11TEST_F(AppendSurfaceStateParamsTest, givenImageFormatWithAlphaChannelWhenAppendSurfaceStateParamsThenTapDiscardConfigChanges) { imageFormat.image_channel_data_type = CL_FLOAT; imageFormat.image_channel_order = CL_RGBA; createImage(); auto imageHw = static_cast *>(image.get()); imageHw->appendSurfaceStateParams(&surfaceState); bool tapDiscardConfigChanged = RENDER_SURFACE_STATE::SAMPLE_TAP_DISCARD_DISABLE_DISABLE != surfaceState.getSampleTapDiscardDisable(); EXPECT_TRUE(tapDiscardConfigChanged); } typedef ::testing::Test gen11ImageTests; GEN11TEST_F(gen11ImageTests, givenImageForGen11WhenClearColorParametersAreSetThenSurfaceStateIsNotModified) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; MockContext context; auto image = std::unique_ptr(ImageHelper::create(&context)); auto surfaceStateBefore = FamilyType::cmdInitRenderSurfaceState; auto surfaceStateAfter = FamilyType::cmdInitRenderSurfaceState; auto imageHw = static_cast *>(image.get()); EXPECT_EQ(0, memcmp(&surfaceStateBefore, &surfaceStateAfter, sizeof(RENDER_SURFACE_STATE))); setClearColorParams(&surfaceStateAfter, imageHw->getGraphicsAllocation()->getDefaultGmm()); EXPECT_EQ(0, memcmp(&surfaceStateBefore, &surfaceStateAfter, sizeof(RENDER_SURFACE_STATE))); } using Gen11RenderSurfaceStateDataTests = ::testing::Test; GEN11TEST_F(Gen11RenderSurfaceStateDataTests, WhenMemoryObjectControlStateIndexToMocsTablesIsSetThenValueIsShift) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; auto surfaceState = FamilyType::cmdInitRenderSurfaceState; uint32_t value = 4; surfaceState.setMemoryObjectControlStateIndexToMocsTables(value); EXPECT_EQ(surfaceState.TheStructure.Common.MemoryObjectControlState_IndexToMocsTables, value >> 1); EXPECT_EQ(surfaceState.getMemoryObjectControlStateIndexToMocsTables(), value); } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/kernel_tests_gen11.cpp000066400000000000000000000014771363734646600267570ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" using namespace NEO; using Gen11KernelTest = Test; GEN11TEST_F(Gen11KernelTest, givenKernelWhenCanTransformImagesIsCalledThenReturnsTrue) { MockKernelWithInternals mockKernel(*pClDevice); auto retVal = mockKernel.mockKernel->Kernel::canTransformImages(); EXPECT_TRUE(retVal); } using Gen11HardwareCommandsTest = testing::Test; GEN11TEST_F(Gen11HardwareCommandsTest, givenGen11PlatformWhenDoBindingTablePrefetchIsCalledThenReturnsFalse) { EXPECT_FALSE(HardwareCommandsHelper::doBindingTablePrefetch()); } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/lkf/000077500000000000000000000000001363734646600233215ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen11/lkf/CMakeLists.txt000066400000000000000000000007611363734646600260650ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_LKF) set(IGDRCL_SRCS_tests_gen11_lkf ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_lkf.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_lkf.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen11_lkf}) add_subdirectories() neo_copy_test_files(copy_test_files_lkf lkf) add_dependencies(unit_tests copy_test_files_lkf) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen11/lkf/linux/000077500000000000000000000000001363734646600244605ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen11/lkf/linux/CMakeLists.txt000066400000000000000000000005271363734646600272240ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen11_lkf_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_lkf.cpp ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen11_lkf_linux}) add_subdirectory(dll) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen11/lkf/linux/dll/000077500000000000000000000000001363734646600252335ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen11/lkf/linux/dll/CMakeLists.txt000066400000000000000000000004711363734646600277750ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests_gen11_lkf ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_id_tests_lkf.cpp ) target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_gen11_lkf}) compute-runtime-20.13.16352/opencl/test/unit_test/gen11/lkf/linux/dll/device_id_tests_lkf.cpp000066400000000000000000000022051363734646600317270ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_neo.h" #include "test.h" #include using namespace NEO; TEST(LkfDeviceIdTest, supportedDeviceId) { std::array expectedDescriptors = {{ {ILKF_1x8x8_DESK_DEVICE_F0_ID, &LKF_1x8x8::hwInfo, &LKF_1x8x8::setupHardwareInfo, GTTYPE_GT1}, }}; auto compareStructs = [](const DeviceDescriptor *first, const DeviceDescriptor *second) { return first->deviceId == second->deviceId && first->pHwInfo == second->pHwInfo && first->setupHardwareInfo == second->setupHardwareInfo && first->eGtType == second->eGtType; }; size_t startIndex = 0; while (!compareStructs(&expectedDescriptors[0], &deviceDescriptorTable[startIndex]) && deviceDescriptorTable[startIndex].deviceId != 0) { startIndex++; }; EXPECT_NE(0u, deviceDescriptorTable[startIndex].deviceId); for (auto &expected : expectedDescriptors) { EXPECT_TRUE(compareStructs(&expected, &deviceDescriptorTable[startIndex])); startIndex++; } } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/lkf/linux/hw_info_config_tests_lkf.cpp000066400000000000000000000062341363734646600322250ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" using namespace NEO; struct HwInfoConfigTestLinuxLkf : HwInfoConfigTestLinux { void SetUp() override { HwInfoConfigTestLinux::SetUp(); drm->StoredDeviceID = ILKF_1x8x8_DESK_DEVICE_F0_ID; drm->setGtType(GTTYPE_GT1); drm->StoredSSVal = 8; } }; LKFTEST_F(HwInfoConfigTestLinuxLkf, configureHwInfoLkf) { auto hwInfoConfig = HwInfoConfigHw::get(); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->StoredDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->StoredDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->StoredEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->StoredSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(1u, outHwInfo.gtSystemInfo.SliceCount); EXPECT_EQ(GTTYPE_GT1, outHwInfo.platform.eGTType); EXPECT_TRUE(outHwInfo.featureTable.ftrGT1); EXPECT_FALSE(outHwInfo.featureTable.ftrGT1_5); EXPECT_FALSE(outHwInfo.featureTable.ftrGT2); EXPECT_FALSE(outHwInfo.featureTable.ftrGT3); EXPECT_FALSE(outHwInfo.featureTable.ftrGT4); EXPECT_FALSE(outHwInfo.featureTable.ftrGTA); EXPECT_FALSE(outHwInfo.featureTable.ftrGTC); EXPECT_FALSE(outHwInfo.featureTable.ftrGTX); EXPECT_FALSE(outHwInfo.featureTable.ftrTileY); } LKFTEST_F(HwInfoConfigTestLinuxLkf, negative) { auto hwInfoConfig = HwInfoConfigHw::get(); drm->StoredRetValForDeviceID = -1; int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->StoredRetValForDeviceID = 0; drm->StoredRetValForDeviceRevID = -1; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->StoredRetValForDeviceRevID = 0; drm->StoredRetValForEUVal = -1; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->StoredRetValForEUVal = 0; drm->StoredRetValForSSVal = -1; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } template class LkfHwInfoTests : public ::testing::Test {}; typedef ::testing::Types lkfTestTypes; TYPED_TEST_CASE(LkfHwInfoTests, lkfTestTypes); TYPED_TEST(LkfHwInfoTests, gtSetupIsCorrect) { HardwareInfo hwInfo; DrmMock drm; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; DeviceDescriptor device = {0, &hwInfo, &TypeParam::setupHardwareInfo, GTTYPE_GT1}; int ret = drm.setupHardwareInfo(&device, false); EXPECT_EQ(ret, 0); EXPECT_GT(gtSystemInfo.EUCount, 0u); EXPECT_GT(gtSystemInfo.ThreadCount, 0u); EXPECT_GT(gtSystemInfo.SliceCount, 0u); EXPECT_GT(gtSystemInfo.SubSliceCount, 0u); EXPECT_GT_VAL(gtSystemInfo.L3CacheSizeInKb, 0u); EXPECT_EQ(gtSystemInfo.CsrSizeInMb, 8u); EXPECT_FALSE(gtSystemInfo.IsDynamicallyPopulated); } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/lkf/test_device_caps_lkf.cpp000066400000000000000000000034611363734646600301710ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; using LkfTest = Test; LKFTEST_F(LkfTest, givenLkfWhenSlmSizeIsRequiredThenReturnCorrectValue) { EXPECT_EQ(64u, pDevice->getHardwareInfo().capabilityTable.slmSize); } LKFTEST_F(LkfTest, givenLKFWhenCheckedOCLVersionThen21IsReported) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_STREQ("OpenCL 1.2 NEO ", caps.clVersion); EXPECT_STREQ("OpenCL C 1.2 ", caps.clCVersion); } LKFTEST_F(LkfTest, givenLKFWhenCheckedSvmSupportThenNoSvmIsReported) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_EQ(caps.svmCapabilities, 0u); } LKFTEST_F(LkfTest, givenLkfWhenDoublePrecissionIsCheckedThenFalseIsReturned) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsFP64); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrSupports64BitMath); } LKFTEST_F(LkfTest, givenLkfWhenExtensionStringIsCheckedThenFP64IsNotReported) { const auto &caps = pClDevice->getDeviceInfo(); std::string extensionString = caps.deviceExtensions; EXPECT_EQ(std::string::npos, extensionString.find(std::string("cl_khr_fp64"))); EXPECT_EQ(0u, caps.doubleFpConfig); } LKFTEST_F(LkfTest, isSimulationCap) { unsigned short lkfSimulationIds[2] = { ILKF_1x8x8_DESK_DEVICE_F0_ID, 0, // default, non-simulation }; NEO::MockDevice *mockDevice = nullptr; for (auto id : lkfSimulationIds) { mockDevice = createWithUsDeviceId(id); ASSERT_NE(mockDevice, nullptr); if (id == 0) EXPECT_FALSE(mockDevice->isSimulation()); else EXPECT_TRUE(mockDevice->isSimulation()); delete mockDevice; } } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/lkf/test_hw_info_config_lkf.cpp000066400000000000000000000063301363734646600307000ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test.h" using namespace NEO; TEST(LkfHwInfoConfig, givenHwInfoErrorneousConfigString) { if (IGFX_LAKEFIELD != productFamily) { return; } HardwareInfo hwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; uint64_t config = 0xdeadbeef; gtSystemInfo = {0}; EXPECT_ANY_THROW(hardwareInfoSetup[productFamily](&hwInfo, false, config)); EXPECT_EQ(0u, gtSystemInfo.SliceCount); EXPECT_EQ(0u, gtSystemInfo.SubSliceCount); EXPECT_EQ(0u, gtSystemInfo.EUCount); } using LkfHwInfo = ::testing::Test; LKFTEST_F(LkfHwInfo, givenHwInfoConfigStringThenAfterSetupResultingVmeIsDisabled) { HardwareInfo hwInfo; uint64_t config = 0x100080008; hardwareInfoSetup[productFamily](&hwInfo, false, config); EXPECT_FALSE(hwInfo.capabilityTable.ftrSupportsVmeAvcTextureSampler); EXPECT_FALSE(hwInfo.capabilityTable.ftrSupportsVmeAvcPreemption); EXPECT_FALSE(hwInfo.capabilityTable.supportsVme); } LKFTEST_F(LkfHwInfo, givenBoolWhenCallLkfHardwareInfoSetupThenFeatureTableAndWorkaroundTableAreSetCorrect) { bool boolValue[]{ true, false}; HardwareInfo hwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; FeatureTable &featureTable = hwInfo.featureTable; WorkaroundTable &workaroundTable = hwInfo.workaroundTable; uint64_t config = 0x100080008; for (auto setParamBool : boolValue) { gtSystemInfo = {0}; featureTable = {}; workaroundTable = {}; hardwareInfoSetup[productFamily](&hwInfo, setParamBool, config); EXPECT_EQ(setParamBool, featureTable.ftrL3IACoherency); EXPECT_EQ(setParamBool, featureTable.ftrPPGTT); EXPECT_EQ(setParamBool, featureTable.ftrSVM); EXPECT_EQ(setParamBool, featureTable.ftrIA32eGfxPTEs); EXPECT_EQ(setParamBool, featureTable.ftrStandardMipTailFormat); EXPECT_EQ(setParamBool, featureTable.ftrDisplayYTiling); EXPECT_EQ(setParamBool, featureTable.ftrTranslationTable); EXPECT_EQ(setParamBool, featureTable.ftrUserModeTranslationTable); EXPECT_EQ(setParamBool, featureTable.ftrTileMappedResource); EXPECT_EQ(setParamBool, featureTable.ftrEnableGuC); EXPECT_EQ(setParamBool, featureTable.ftrFbc); EXPECT_EQ(setParamBool, featureTable.ftrFbc2AddressTranslation); EXPECT_EQ(setParamBool, featureTable.ftrFbcBlitterTracking); EXPECT_EQ(setParamBool, featureTable.ftrFbcCpuTracking); EXPECT_EQ(setParamBool, featureTable.ftrTileY); EXPECT_EQ(setParamBool, featureTable.ftrAstcHdr2D); EXPECT_EQ(setParamBool, featureTable.ftrAstcLdr2D); EXPECT_EQ(setParamBool, featureTable.ftr3dMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.ftrGpGpuMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.ftrGpGpuMidThreadLevelPreempt); EXPECT_EQ(setParamBool, featureTable.ftrGpGpuThreadGroupLevelPreempt); EXPECT_EQ(setParamBool, featureTable.ftrPerCtxtPreemptionGranularityControl); EXPECT_EQ(setParamBool, workaroundTable.wa4kAlignUVOffsetNV12LinearSurface); EXPECT_EQ(setParamBool, workaroundTable.waReportPerfCountUseGlobalContextID); } } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/lkf/windows/000077500000000000000000000000001363734646600250135ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen11/lkf/windows/CMakeLists.txt000066400000000000000000000005041363734646600275520ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen11_lkf_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_lkf.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen11_lkf_windows}) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen11/lkf/windows/hw_info_config_tests_lkf.cpp000066400000000000000000000012701363734646600325530ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/windows/os_interface.h" #include "opencl/test/unit_test/os_interface/windows/hw_info_config_win_tests.h" using namespace NEO; using HwInfoConfigTestWindowsLkf = HwInfoConfigTestWindows; LKFTEST_F(HwInfoConfigTestWindowsLkf, whenCallAdjustPlatformThenDoNothing) { EXPECT_EQ(IGFX_LAKEFIELD, productFamily); auto hwInfoConfig = HwInfoConfig::get(productFamily); outHwInfo = pInHwInfo; hwInfoConfig->adjustPlatformForProductFamily(&outHwInfo); int ret = memcmp(&outHwInfo.platform, &pInHwInfo.platform, sizeof(PLATFORM)); EXPECT_EQ(0, ret); } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/sampler_tests_gen11.cpp000066400000000000000000000021241363734646600271300ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sampler/sampler.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" #include using namespace NEO; typedef Test Gen11SamplerTest; GEN11TEST_F(Gen11SamplerTest, appendSamplerStateParamsDoesNothing) { typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; std::unique_ptr context(new MockContext()); std::unique_ptr> sampler(new SamplerHw(context.get(), CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST)); auto stateWithoutAppendedParams = FamilyType::cmdInitSamplerState; auto stateWithAppendedParams = FamilyType::cmdInitSamplerState; EXPECT_TRUE(memcmp(&stateWithoutAppendedParams, &stateWithAppendedParams, sizeof(SAMPLER_STATE)) == 0); sampler->appendSamplerStateParams(&stateWithAppendedParams); EXPECT_TRUE(memcmp(&stateWithoutAppendedParams, &stateWithAppendedParams, sizeof(SAMPLER_STATE)) == 0); } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/scheduler_source_tests_gen11.cpp000066400000000000000000000030641363734646600310270ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen11/hw_cmds.h" #include "opencl/source/device_queue/device_queue_hw.h" // Keep the order of device_enqueue.h and scheduler_definitions.h as the latter uses defines from the first one #include "opencl/source/gen11/device_enqueue.h" #include "opencl/source/gen11/scheduler_definitions.h" #include "opencl/test/unit_test/scheduler/scheduler_source_tests.h" // Keep this include below scheduler_definitions.h and device_enqueue.h headers as it depends on defines defined in them #include "opencl/test/unit_test/scheduler/scheduler_source_tests.inl" using namespace NEO; static_assert((SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE_GEN11 & (MemoryConstants::cacheLineSize - 1)) == 0, "Second level buffer space incorrect for gen11"); typedef SchedulerSourceTest SchedulerSourceTestGen11; GEN11TEST_F(SchedulerSourceTestGen11, GivenDeviceQueueWhenCommandsSizeIsCalculatedThenItEqualsSpaceForEachEnqueueInSchedulerKernelCode) { givenDeviceQueueWhenCommandsSizeIsCalculatedThenItEqualsSpaceForEachEnqueueInSchedulerKernelCodeTest(); } GEN11TEST_F(SchedulerSourceTestGen11, GivenDeviceQueueWhenSlbDummyCommandsAreBuildThenSizeUsedIsCorrect) { givenDeviceQueueWhenSlbDummyCommandsAreBuildThenSizeUsedIsCorrectTest(); } GEN11TEST_F(SchedulerSourceTestGen11, GivenDeviceQueueThenNumberOfEnqueuesEqualsNumberOfEnqueuesInSchedulerKernelCode) { givenDeviceQueueThenNumberOfEnqueuesEqualsNumberOfEnqueuesInSchedulerKernelCodeTest(); } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/tbx_command_stream_receiver_tests_gen11.cpp000066400000000000000000000020661363734646600332240ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; using Gen11TbxCommandStreamReceiverTests = Test; GEN11TEST_F(Gen11TbxCommandStreamReceiverTests, whenAskedForPollForCompletionParametersThenReturnCorrectValues) { class MyMockTbxHw : public TbxCommandStreamReceiverHw { public: MyMockTbxHw(ExecutionEnvironment &executionEnvironment) : TbxCommandStreamReceiverHw(executionEnvironment, 0) {} using TbxCommandStreamReceiverHw::getpollNotEqualValueForPollForCompletion; using TbxCommandStreamReceiverHw::getMaskAndValueForPollForCompletion; }; MyMockTbxHw myMockTbxHw(*pDevice->executionEnvironment); EXPECT_EQ(0x80u, myMockTbxHw.getMaskAndValueForPollForCompletion()); EXPECT_TRUE(myMockTbxHw.getpollNotEqualValueForPollForCompletion()); } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/test_device_caps_gen11.cpp000066400000000000000000000060441363734646600275540ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; typedef Test Gen11DeviceCaps; GEN11TEST_F(Gen11DeviceCaps, defaultPreemptionMode) { EXPECT_TRUE(PreemptionMode::MidThread == pDevice->getHardwareInfo().capabilityTable.defaultPreemptionMode); } GEN11TEST_F(Gen11DeviceCaps, profilingTimerResolution) { const auto &caps = pDevice->getDeviceInfo(); EXPECT_EQ(83u, caps.outProfilingTimerResolution); } GEN11TEST_F(Gen11DeviceCaps, kmdNotifyMechanism) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableKmdNotify); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); } GEN11TEST_F(Gen11DeviceCaps, compression) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrRenderCompressedBuffers); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrRenderCompressedImages); } GEN11TEST_F(Gen11DeviceCaps, givenHwInfoWhenRequestedComputeUnitsUsedForScratchThenReturnValidValue) { const auto &hwInfo = pDevice->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); uint32_t expectedValue = hwInfo.gtSystemInfo.MaxSubSlicesSupported * hwInfo.gtSystemInfo.MaxEuPerSubSlice * 8; EXPECT_EQ(expectedValue, hwHelper.getComputeUnitsUsedForScratch(&hwInfo)); EXPECT_EQ(expectedValue, pDevice->getDeviceInfo().computeUnitsUsedForScratch); } GEN11TEST_F(Gen11DeviceCaps, givenHwInfoWhenRequestedMaxFrontEndThreadsThenReturnValidValue) { const auto &hwInfo = pDevice->getHardwareInfo(); EXPECT_EQ(HwHelper::getMaxThreadsForVfe(hwInfo), pDevice->getDeviceInfo().maxFrontEndThreads); } GEN11TEST_F(Gen11DeviceCaps, givenGen11WhenCheckSupportCacheFlushAfterWalkerThenFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.supportCacheFlushAfterWalker); } GEN11TEST_F(Gen11DeviceCaps, givenGen11WhenCheckBlitterOperationsSupportThenReturnFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.blitterOperationsSupported); } GEN11TEST_F(Gen11DeviceCaps, givenGen11WhenCheckingImageSupportThenReturnTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.supportsImages); } GEN11TEST_F(Gen11DeviceCaps, givenGen11WhenCheckExtensionsThenSubgroupLocalBlockIOIsSupported) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_intel_subgroup_local_block_io"))); } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/test_device_queue_hw_gen11.cpp000066400000000000000000000043101363734646600304420ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/mocks/mock_device_queue.h" #include using namespace NEO; using namespace DeviceHostQueue; typedef DeviceQueueHwTest Gen11DeviceQueueSlb; GEN11TEST_F(Gen11DeviceQueueSlb, expectedAllocationSize) { deviceQueue = createQueueObject(); ASSERT_NE(deviceQueue, nullptr); auto expectedSize = getMinimumSlbSize(); expectedSize *= 128; //num of enqueues expectedSize += sizeof(typename FamilyType::MI_BATCH_BUFFER_START); expectedSize = alignUp(expectedSize, MemoryConstants::pageSize); expectedSize += MockDeviceQueueHw::getExecutionModelCleanupSectionSize(); expectedSize += (4 * MemoryConstants::pageSize); expectedSize = alignUp(expectedSize, MemoryConstants::pageSize); ASSERT_NE(deviceQueue->getSlbBuffer(), nullptr); EXPECT_EQ(deviceQueue->getSlbBuffer()->getUnderlyingBufferSize(), expectedSize); delete deviceQueue; } GEN11TEST_F(Gen11DeviceQueueSlb, SlbCommandsWa) { auto mockDeviceQueueHw = std::make_unique>(pContext, device, DeviceHostQueue::deviceQueueProperties::minimumProperties[0]); EXPECT_FALSE(mockDeviceQueueHw->arbCheckWa); EXPECT_FALSE(mockDeviceQueueHw->pipeControlWa); EXPECT_FALSE(mockDeviceQueueHw->miAtomicWa); EXPECT_FALSE(mockDeviceQueueHw->lriWa); } GEN11TEST_F(Gen11DeviceQueueSlb, GivenDeviceQueueWhenSingleEnqueueSpaceIsNotCachelineAlignedThenCSPrefetchIsExtendedWithCachelineAlignement) { auto mockDeviceQueueHw = std::make_unique>(pContext, device, DeviceHostQueue::deviceQueueProperties::minimumProperties[0]); EXPECT_LE(8 * MemoryConstants::cacheLineSize, mockDeviceQueueHw->getCSPrefetchSize()); EXPECT_EQ(0u, (mockDeviceQueueHw->getMinimumSlbSize() & (MemoryConstants::cacheLineSize - 1))); } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/test_platform_caps_gen11.cpp000066400000000000000000000011441363734646600301350ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "test.h" using namespace NEO; struct Gen11PlatformCaps : public PlatformFixture, public ::testing::Test { void SetUp() override { PlatformFixture::SetUp(); } void TearDown() override { PlatformFixture::TearDown(); } }; GEN11TEST_F(Gen11PlatformCaps, lpSkusDontSupportFP64) { const auto &caps = pPlatform->getPlatformInfo(); EXPECT_EQ(std::string::npos, caps.extensions.find(std::string("cl_khr_fp64"))); } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/test_sample_gen11.cpp000066400000000000000000000006361363734646600265710ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; typedef Test Gen11OnlyTeset; GEN11TEST_F(Gen11OnlyTeset, shouldPassOnGen11) { EXPECT_NE(IGFX_GEN9_CORE, pDevice->getRenderCoreFamily()); EXPECT_EQ(IGFX_GEN11_CORE, pDevice->getRenderCoreFamily()); } compute-runtime-20.13.16352/opencl/test/unit_test/gen11/unit_test_helper_gen11.cpp000066400000000000000000000005171363734646600276240ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen11/hw_info.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/helpers/unit_test_helper.inl" namespace NEO { template struct UnitTestHelper; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/gen11/windows/000077500000000000000000000000001363734646600242375ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen11/windows/CMakeLists.txt000066400000000000000000000004751363734646600270050ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen11_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/gmm_callbacks_tests_gen11.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen11_windows}) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen11/windows/gmm_callbacks_tests_gen11.cpp000066400000000000000000000010251363734646600317350ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/windows/gmm_callbacks.h" #include "test.h" using namespace NEO; typedef ::testing::Test Gen11GmmCallbacksTests; GEN11TEST_F(Gen11GmmCallbacksTests, notSupportedDeviceCallback) { EXPECT_EQ(0, DeviceCallbacks::notifyAubCapture(nullptr, 0, 0, false)); } GEN11TEST_F(Gen11GmmCallbacksTests, notSupportedTTCallback) { EXPECT_EQ(0, TTCallbacks::writeL3Address(nullptr, 1, 2)); } compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/000077500000000000000000000000001363734646600231025ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/CMakeLists.txt000066400000000000000000000035211363734646600256430ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN12LP) set(IGDRCL_SRCS_tests_gen12lp ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_tests_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/coherency_tests_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tests_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_simulated_common_hw_tests_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/compute_mode_tests_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/device_queue_tests_gen12lp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_media_kernel_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/gen12lp_tests_wrapper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_tests_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/image_tests_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/kernel_tests_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/profiling_tests_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/sampler_tests_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_source_tests_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/special_ult_helper_gen12lp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/special_ult_helper_gen12lp.h ${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver_tests_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/test_device_queue_hw_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/test_platform_caps_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/test_sample_gen12lp.inl ${CMAKE_CURRENT_SOURCE_DIR}/unit_test_helper_gen12lp.cpp ) get_property(NEO_CORE_TESTS_GEN12LP GLOBAL PROPERTY NEO_CORE_TESTS_GEN12LP) list(APPEND IGDRCL_SRCS_tests_gen12lp ${NEO_CORE_TESTS_GEN12LP}) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen12lp}) add_subdirectories() endif() aub_command_stream_receiver_tests_gen12lp.inl000066400000000000000000000065221363734646600340120ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_context.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/aub_mem_dump/page_table_entry_bits.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/helpers/hw_helper_tests.h" #include "opencl/test/unit_test/mocks/mock_aub_csr.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "opencl/test/unit_test/mocks/mock_os_context.h" #include "test.h" using namespace NEO; using Gen12LPAubCommandStreamReceiverTests = Test; GEN12LPTEST_F(Gen12LPAubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenGetGUCWorkQueueItemHeaderIsCalledThenAppropriateValueDependingOnEngineTypeIsReturned) { std::unique_ptr> aubCsr(new AUBCommandStreamReceiverHw("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); MockOsContext rcsOsContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); MockOsContext ccsOsContext(0, 1, aub_stream::ENGINE_CCS, PreemptionMode::Disabled, false, false, false); aubCsr->setupContext(ccsOsContext); uint32_t headerCCS = aubCsr->getGUCWorkQueueItemHeader(); aubCsr->setupContext(rcsOsContext); uint32_t headerRCS = aubCsr->getGUCWorkQueueItemHeader(); EXPECT_EQ(0x00030401u, headerCCS); EXPECT_EQ(0x00030001u, headerRCS); } GEN12LPTEST_F(Gen12LPAubCommandStreamReceiverTests, givenGraphicsAlloctionWhenGetPPGTTAdditionalBitsIsCalledThenAppropriateValueIsReturned) { std::unique_ptr> aubCsr(new AUBCommandStreamReceiverHw("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); MockGraphicsAllocation allocation(nullptr, 0); auto bits = aubCsr->getPPGTTAdditionalBits(&allocation); constexpr uint64_t expectedBits = BIT(PageTableEntry::presentBit) | BIT(PageTableEntry::writableBit); EXPECT_EQ(expectedBits, bits); } GEN12LPTEST_F(Gen12LPAubCommandStreamReceiverTests, givenCCSEnabledWhenEngineMmiosAreInitializedThenExpectL3ConfigMmioIsWritten) { MockOsContext osContext(0, 1, aub_stream::ENGINE_CCS, PreemptionMode::Disabled, false, false, false); AUBCommandStreamReceiverHw aubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); aubCsr.setupContext(osContext); auto stream = std::make_unique(); aubCsr.stream = stream.get(); aubCsr.initEngineMMIO(); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0xB234, 0xD0000020u))); } GEN12LPTEST_F(Gen12LPAubCommandStreamReceiverTests, givenRCSEnabledWhenEngineMmiosAreInitializedThenExpectL3ConfigMmioIsWritten) { MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); AUBCommandStreamReceiverHw aubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); aubCsr.setupContext(osContext); auto stream = std::make_unique(); aubCsr.stream = stream.get(); aubCsr.initEngineMMIO(); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0xB134, 0xD0000020u))); } compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/cmd_parse/000077500000000000000000000000001363734646600250375ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/cmd_parse/cmd_parse_gen12lp.inl000066400000000000000000000003711363734646600310310ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ size_t getAdditionalCommandLengthHwSpecific(void *cmd) { return 0; } const char *getAdditionalCommandNameHwSpecific(void *cmd) { return "UNKNOWN"; } compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/coherency_tests_gen12lp.inl000066400000000000000000000354561363734646600303540ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/ptr_math.h" #include "opencl/source/gen12lp/helpers_gen12lp.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "test.h" using namespace NEO; struct Gen12LpCoherencyRequirements : public ::testing::Test { using STATE_COMPUTE_MODE = typename TGLLPFamily::STATE_COMPUTE_MODE; using PIPE_CONTROL = typename TGLLPFamily::PIPE_CONTROL; using PIPELINE_SELECT = typename TGLLPFamily::PIPELINE_SELECT; struct myCsr : public CommandStreamReceiverHw { using CommandStreamReceiver::commandStream; myCsr(ExecutionEnvironment &executionEnvironment) : CommandStreamReceiverHw(executionEnvironment, 0){}; CsrSizeRequestFlags *getCsrRequestFlags() { return &csrSizeRequestFlags; } }; void makeResidentSharedAlloc() { csr->getResidencyAllocations().push_back(alloc); } void overrideCoherencyRequest(bool reqestChanged, bool requireCoherency, bool hasSharedHandles) { csr->getCsrRequestFlags()->coherencyRequestChanged = reqestChanged; csr->getCsrRequestFlags()->hasSharedHandles = hasSharedHandles; flags.requiresCoherency = requireCoherency; if (hasSharedHandles) { makeResidentSharedAlloc(); } } void SetUp() override { device.reset(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); csr = new myCsr(*device->executionEnvironment); device->resetCommandStreamReceiver(csr); AllocationProperties properties(device->getRootDeviceIndex(), false, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::SHARED_BUFFER, false); alloc = device->getMemoryManager()->createGraphicsAllocationFromSharedHandle((osHandle)123, properties, false); } void TearDown() override { device->getMemoryManager()->freeGraphicsMemory(alloc); } myCsr *csr = nullptr; std::unique_ptr device; DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); GraphicsAllocation *alloc = nullptr; }; GEN12LPTEST_F(Gen12LpCoherencyRequirements, coherencyCmdSizeWithoutSharedHandles) { auto cmdsSize = sizeof(STATE_COMPUTE_MODE); auto &hwHelper = HwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily); if (hwHelper.is3DPipelineSelectWARequired(device->getHardwareInfo())) { cmdsSize += 2 * sizeof(PIPELINE_SELECT); if (Gen12LPHelpers::pipeControlWaRequired(device->getHardwareInfo().platform.eProductFamily)) { cmdsSize += 2 * sizeof(PIPE_CONTROL); } } overrideCoherencyRequest(false, false, false); auto retSize = csr->getCmdSizeForComputeMode(); EXPECT_EQ(0u, retSize); overrideCoherencyRequest(false, true, false); retSize = csr->getCmdSizeForComputeMode(); EXPECT_EQ(0u, retSize); overrideCoherencyRequest(true, true, false); retSize = csr->getCmdSizeForComputeMode(); EXPECT_EQ(cmdsSize, retSize); overrideCoherencyRequest(true, false, false); retSize = csr->getCmdSizeForComputeMode(); EXPECT_EQ(cmdsSize, retSize); } GEN12LPTEST_F(Gen12LpCoherencyRequirements, coherencyCmdSizeWithSharedHandles) { auto cmdsSize = sizeof(STATE_COMPUTE_MODE) + sizeof(PIPE_CONTROL); auto &hwHelper = HwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily); if (hwHelper.is3DPipelineSelectWARequired(device->getHardwareInfo())) { cmdsSize += 2 * sizeof(PIPELINE_SELECT); if (Gen12LPHelpers::pipeControlWaRequired(device->getHardwareInfo().platform.eProductFamily)) { cmdsSize += 2 * sizeof(PIPE_CONTROL); } } overrideCoherencyRequest(false, false, true); auto retSize = csr->getCmdSizeForComputeMode(); EXPECT_EQ(cmdsSize, retSize); overrideCoherencyRequest(false, true, true); retSize = csr->getCmdSizeForComputeMode(); EXPECT_EQ(cmdsSize, retSize); overrideCoherencyRequest(true, true, true); retSize = csr->getCmdSizeForComputeMode(); EXPECT_EQ(cmdsSize, retSize); overrideCoherencyRequest(true, false, true); retSize = csr->getCmdSizeForComputeMode(); EXPECT_EQ(cmdsSize, retSize); } GEN12LPTEST_F(Gen12LpCoherencyRequirements, coherencyCmdValuesWithoutSharedHandles) { auto cmdsSize = sizeof(STATE_COMPUTE_MODE); auto cmdsSizeWABeginOffset = 0; auto &hwHelper = HwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily); if (hwHelper.is3DPipelineSelectWARequired(device->getHardwareInfo())) { cmdsSizeWABeginOffset += sizeof(PIPELINE_SELECT); cmdsSize += sizeof(PIPELINE_SELECT); if (Gen12LPHelpers::pipeControlWaRequired(device->getHardwareInfo().platform.eProductFamily)) { cmdsSizeWABeginOffset += sizeof(PIPE_CONTROL); cmdsSize += sizeof(PIPE_CONTROL); } } cmdsSize += cmdsSizeWABeginOffset; char buff[1024]; LinearStream stream(buff, 1024); auto expectedScmCmd = FamilyType::cmdInitStateComputeMode; expectedScmCmd.setForceNonCoherent(STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT); expectedScmCmd.setMaskBits(FamilyType::stateComputeModeForceNonCoherentMask); overrideCoherencyRequest(true, false, false); csr->programComputeMode(stream, flags); EXPECT_EQ(cmdsSize, stream.getUsed()); auto scmCmd = reinterpret_cast(stream.getCpuBase()); EXPECT_TRUE(memcmp(&expectedScmCmd, scmCmd + cmdsSizeWABeginOffset, sizeof(STATE_COMPUTE_MODE)) == 0); auto startOffset = stream.getUsed(); overrideCoherencyRequest(true, true, false); csr->programComputeMode(stream, flags); EXPECT_EQ(cmdsSize * 2, stream.getUsed()); expectedScmCmd.setForceNonCoherent(STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_DISABLED); expectedScmCmd.setMaskBits(FamilyType::stateComputeModeForceNonCoherentMask); scmCmd = reinterpret_cast(ptrOffset(stream.getCpuBase(), startOffset)); EXPECT_TRUE(memcmp(&expectedScmCmd, scmCmd + cmdsSizeWABeginOffset, sizeof(STATE_COMPUTE_MODE)) == 0); } GEN12LPTEST_F(Gen12LpCoherencyRequirements, coherencyCmdValuesWithSharedHandles) { auto cmdsSize = sizeof(STATE_COMPUTE_MODE) + sizeof(PIPE_CONTROL); auto cmdsSizeWABeginOffset = 0; auto &hwHelper = HwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily); if (hwHelper.is3DPipelineSelectWARequired(device->getHardwareInfo())) { cmdsSizeWABeginOffset += sizeof(PIPELINE_SELECT); cmdsSize += sizeof(PIPELINE_SELECT); if (Gen12LPHelpers::pipeControlWaRequired(device->getHardwareInfo().platform.eProductFamily)) { cmdsSizeWABeginOffset += sizeof(PIPE_CONTROL); cmdsSize += sizeof(PIPE_CONTROL); } } cmdsSize += cmdsSizeWABeginOffset; char buff[1024]; LinearStream stream(buff, 1024); auto expectedScmCmd = FamilyType::cmdInitStateComputeMode; expectedScmCmd.setForceNonCoherent(STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT); expectedScmCmd.setMaskBits(FamilyType::stateComputeModeForceNonCoherentMask); auto expectedPcCmd = FamilyType::cmdInitPipeControl; overrideCoherencyRequest(true, false, true); csr->programComputeMode(stream, flags); EXPECT_EQ(cmdsSize, stream.getUsed()); auto scmCmd = reinterpret_cast(stream.getCpuBase()); EXPECT_TRUE(memcmp(&expectedScmCmd, scmCmd + cmdsSizeWABeginOffset, sizeof(STATE_COMPUTE_MODE)) == 0); auto pcCmd = reinterpret_cast(ptrOffset(stream.getCpuBase(), sizeof(STATE_COMPUTE_MODE))); EXPECT_TRUE(memcmp(&expectedPcCmd, pcCmd + cmdsSizeWABeginOffset, sizeof(PIPE_CONTROL)) == 0); auto startOffset = stream.getUsed(); overrideCoherencyRequest(true, true, true); csr->programComputeMode(stream, flags); EXPECT_EQ(cmdsSize * 2, stream.getUsed()); expectedScmCmd.setForceNonCoherent(STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_DISABLED); expectedScmCmd.setMaskBits(FamilyType::stateComputeModeForceNonCoherentMask); scmCmd = reinterpret_cast(ptrOffset(stream.getCpuBase(), startOffset)); EXPECT_TRUE(memcmp(&expectedScmCmd, scmCmd + cmdsSizeWABeginOffset, sizeof(STATE_COMPUTE_MODE)) == 0); pcCmd = reinterpret_cast(ptrOffset(stream.getCpuBase(), startOffset + sizeof(STATE_COMPUTE_MODE))); EXPECT_TRUE(memcmp(&expectedPcCmd, pcCmd + cmdsSizeWABeginOffset, sizeof(PIPE_CONTROL)) == 0); } GEN12LPTEST_F(Gen12LpCoherencyRequirements, givenCoherencyRequirementWithoutSharedHandlesWhenFlushTaskCalledThenProgramCmdOnlyIfChanged) { auto startOffset = csr->commandStream.getUsed(); auto graphicAlloc = csr->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); IndirectHeap stream(graphicAlloc); auto flushTask = [&](bool coherencyRequired) { flags.requiresCoherency = coherencyRequired; startOffset = csr->commandStream.getUsed(); csr->flushTask(stream, 0, stream, stream, stream, 0, flags, *device); }; auto findCmd = [&](bool expectToBeProgrammed, bool expectCoherent, bool expectPipeControl) { HardwareParse hwParser; hwParser.parseCommands(csr->commandStream, startOffset); bool foundOne = false; STATE_COMPUTE_MODE::FORCE_NON_COHERENT expectedCoherentValue = expectCoherent ? STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_DISABLED : STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT; uint32_t expectedCoherentMask = FamilyType::stateComputeModeForceNonCoherentMask; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { auto cmd = genCmdCast(*it); if (cmd) { EXPECT_EQ(expectedCoherentValue, cmd->getForceNonCoherent()); EXPECT_EQ(expectedCoherentMask, cmd->getMaskBits()); EXPECT_FALSE(foundOne); foundOne = true; auto pc = genCmdCast(*(++it)); if (!expectPipeControl && !Gen12LPHelpers::pipeControlWaRequired(device->getHardwareInfo().platform.eProductFamily)) { EXPECT_EQ(nullptr, pc); } else { EXPECT_NE(nullptr, pc); } } } EXPECT_EQ(expectToBeProgrammed, foundOne); }; auto hwInfo = device->getHardwareInfo(); flushTask(false); if (HardwareCommandsHelper::isPipeControlPriorToPipelineSelectWArequired(hwInfo)) { findCmd(true, false, true); // first time } else { findCmd(true, false, false); // first time } flushTask(false); findCmd(false, false, false); // not changed flushTask(true); findCmd(true, true, false); // changed flushTask(true); findCmd(false, true, false); // not changed flushTask(false); findCmd(true, false, false); // changed flushTask(false); findCmd(false, false, false); // not changed csr->getMemoryManager()->freeGraphicsMemory(graphicAlloc); } GEN12LPTEST_F(Gen12LpCoherencyRequirements, givenCoherencyRequirementWithSharedHandlesWhenFlushTaskCalledThenAlwaysProgramCmds) { auto startOffset = csr->commandStream.getUsed(); auto graphicsAlloc = csr->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); IndirectHeap stream(graphicsAlloc); auto flushTask = [&](bool coherencyRequired) { flags.requiresCoherency = coherencyRequired; makeResidentSharedAlloc(); startOffset = csr->commandStream.getUsed(); csr->flushTask(stream, 0, stream, stream, stream, 0, flags, *device); }; auto flushTaskAndFindCmds = [&](bool expectCoherent) { flushTask(expectCoherent); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream, startOffset); bool foundOne = false; STATE_COMPUTE_MODE::FORCE_NON_COHERENT expectedCoherentValue = expectCoherent ? STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_DISABLED : STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT; uint32_t expectedCoherentMask = FamilyType::stateComputeModeForceNonCoherentMask; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { auto cmd = genCmdCast(*it); if (cmd) { EXPECT_EQ(expectedCoherentValue, cmd->getForceNonCoherent()); EXPECT_EQ(expectedCoherentMask, cmd->getMaskBits()); EXPECT_FALSE(foundOne); foundOne = true; auto pc = genCmdCast(*(++it)); EXPECT_NE(nullptr, pc); } } EXPECT_TRUE(foundOne); }; flushTaskAndFindCmds(false); // first time flushTaskAndFindCmds(false); // not changed flushTaskAndFindCmds(true); // changed flushTaskAndFindCmds(true); // not changed flushTaskAndFindCmds(false); // changed flushTaskAndFindCmds(false); // not changed csr->getMemoryManager()->freeGraphicsMemory(graphicsAlloc); } GEN12LPTEST_F(Gen12LpCoherencyRequirements, givenFlushWithoutSharedHandlesWhenPreviouslyUsedThenProgramPcAndSCM) { auto graphicAlloc = csr->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); IndirectHeap stream(graphicAlloc); makeResidentSharedAlloc(); csr->flushTask(stream, 0, stream, stream, stream, 0, flags, *device); EXPECT_TRUE(csr->getCsrRequestFlags()->hasSharedHandles); auto startOffset = csr->commandStream.getUsed(); csr->flushTask(stream, 0, stream, stream, stream, 0, flags, *device); EXPECT_TRUE(csr->getCsrRequestFlags()->hasSharedHandles); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream, startOffset); STATE_COMPUTE_MODE::FORCE_NON_COHERENT expectedCoherentValue = STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT; uint32_t expectedCoherentMask = FamilyType::stateComputeModeForceNonCoherentMask; bool foundOne = false; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { auto cmd = genCmdCast(*it); if (cmd) { EXPECT_EQ(expectedCoherentValue, cmd->getForceNonCoherent()); EXPECT_EQ(expectedCoherentMask, cmd->getMaskBits()); EXPECT_FALSE(foundOne); foundOne = true; auto pc = genCmdCast(*(++it)); EXPECT_NE(nullptr, pc); } } EXPECT_TRUE(foundOne); csr->getMemoryManager()->freeGraphicsMemory(graphicAlloc); } command_stream_receiver_hw_tests_gen12lp.inl000066400000000000000000000070161363734646600336600ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/linear_stream.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/gen12lp/special_ult_helper_gen12lp.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" #include "gtest/gtest.h" #include "reg_configs_common.h" using namespace NEO; #include "opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests.inl" using CommandStreamReceiverHwTestGen12lp = CommandStreamReceiverHwTest; GEN12LPTEST_F(CommandStreamReceiverHwTestGen12lp, givenPreambleSentWhenL3ConfigRequestChangedThenDontProgramL3Register) { size_t GWS = 1; MockContext ctx(pClDevice); MockKernelWithInternals kernel(*pClDevice); CommandQueueHw commandQueue(&ctx, pClDevice, 0, false); auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); pDevice->resetCommandStreamReceiver(commandStreamReceiver); auto &commandStreamCSR = commandStreamReceiver->getCS(); commandStreamReceiver->isPreambleSent = true; commandStreamReceiver->lastSentL3Config = 0; commandQueue.enqueueKernel(kernel, 1, nullptr, &GWS, nullptr, 0, nullptr, nullptr); parseCommands(commandStreamCSR, 0); auto itorCmd = findMmio(cmdList.begin(), cmdList.end(), L3CNTLRegisterOffset::registerOffset); ASSERT_EQ(cmdList.end(), itorCmd); } GEN12LPTEST_F(CommandStreamReceiverHwTestGen12lp, whenProgrammingMiSemaphoreWaitThenSetRegisterPollModeMemoryPoll) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; MI_SEMAPHORE_WAIT miSemaphoreWait = FamilyType::cmdInitMiSemaphoreWait; EXPECT_EQ(MI_SEMAPHORE_WAIT::REGISTER_POLL_MODE::REGISTER_POLL_MODE_MEMORY_POLL, miSemaphoreWait.getRegisterPollMode()); } using CommandStreamReceiverFlushTaskTests = UltCommandStreamReceiverTest; GEN12LPTEST_F(UltCommandStreamReceiverTest, givenStateBaseAddressWhenItIsRequiredThenThereIsPipeControlPriorToItWithTextureCacheFlushAndHdc) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); configureCSRtoNonDirtyState(); ioh.replaceBuffer(ptrOffset(ioh.getCpuBase(), +1u), ioh.getMaxAvailableSpace() + MemoryConstants::pageSize * 3); flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.commandStream, 0); auto stateBaseAddressItor = find(cmdList.begin(), cmdList.end()); auto pipeControlItor = find(cmdList.begin(), stateBaseAddressItor); EXPECT_NE(stateBaseAddressItor, pipeControlItor); auto pipeControlCmd = reinterpret_cast(*pipeControlItor); EXPECT_TRUE(pipeControlCmd->getTextureCacheInvalidationEnable()); EXPECT_TRUE(pipeControlCmd->getDcFlushEnable()); EXPECT_TRUE(pipeControlCmd->getHdcPipelineFlush()); } command_stream_receiver_simulated_common_hw_tests_gen12lp.inl000066400000000000000000000211471363734646600373000ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_aub_stream.h" #include "test.h" #include "command_stream_receiver_simulated_hw.h" using namespace NEO; using Gen12LPCommandStreamReceiverSimulatedCommonHwTests = Test; template class MockSimulatedCsrHw : public CommandStreamReceiverSimulatedHw { public: using CommandStreamReceiverSimulatedHw::CommandStreamReceiverSimulatedHw; void pollForCompletion() override {} bool writeMemory(GraphicsAllocation &gfxAllocation) override { return true; } void writeMemory(uint64_t gpuAddress, void *cpuAddress, size_t size, uint32_t memoryBank, uint64_t entryBits) override {} }; GEN12LPTEST_F(Gen12LPCommandStreamReceiverSimulatedCommonHwTests, givenAubCommandStreamReceiverWhewGlobalMmiosAreInitializedThenMOCSRegistersAreConfigured) { MockSimulatedCsrHw csrSimulatedCommonHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); auto stream = std::make_unique(); csrSimulatedCommonHw.stream = stream.get(); csrSimulatedCommonHw.initGlobalMMIO(); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004000, 0x00000008))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004004, 0x00000038))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004008, 0x00000038))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000400C, 0x00000008))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004010, 0x00000018))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004014, 0x00060038))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004018, 0x00000000))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000401C, 0x00000033))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004020, 0x00060037))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004024, 0x0000003B))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004028, 0x00000032))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000402C, 0x00000036))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004030, 0x0000003A))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004034, 0x00000033))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004038, 0x00000037))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000403C, 0x0000003B))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004040, 0x00000030))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004044, 0x00000034))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004048, 0x00000038))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000404C, 0x00000031))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004050, 0x00000032))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004054, 0x00000036))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004058, 0x0000003A))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000405C, 0x00000033))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004060, 0x00000037))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004064, 0x0000003B))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004068, 0x00000032))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000406C, 0x00000036))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004070, 0x0000003A))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004074, 0x00000033))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004078, 0x00000037))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000407C, 0x0000003B))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004080, 0x00000030))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004084, 0x00000034))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004088, 0x00000038))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000408C, 0x00000031))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004090, 0x00000032))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004094, 0x00000036))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x00004098, 0x0000003A))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000409C, 0x00000033))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040A0, 0x00000037))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040A4, 0x0000003B))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040A8, 0x00000032))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040AC, 0x00000036))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040B0, 0x0000003A))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040B4, 0x00000033))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040B8, 0x00000037))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040BC, 0x0000003B))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040C0, 0x00000038))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040C4, 0x00000034))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040C8, 0x00000038))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040CC, 0x00000031))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040D0, 0x00000032))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040D4, 0x00000036))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040D8, 0x0000003A))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040DC, 0x00000033))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040E0, 0x00000037))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040E4, 0x0000003B))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040E8, 0x00000032))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040EC, 0x00000036))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040F0, 0x00000038))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040F4, 0x00000038))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040F8, 0x00000038))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x000040FC, 0x00000038))); } GEN12LPTEST_F(Gen12LPCommandStreamReceiverSimulatedCommonHwTests, givenAubCommandStreamReceiverWhenGlobalMmiosAreInitializedThenLNCFRegistersAreConfigured) { MockSimulatedCsrHw csrSimulatedCommonHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); auto stream = std::make_unique(); csrSimulatedCommonHw.stream = stream.get(); csrSimulatedCommonHw.initGlobalMMIO(); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B020, 0x00300010))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B024, 0x00300010))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B028, 0x00300030))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B02C, 0x00000000))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B030, 0x0030001F))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B034, 0x00170013))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B038, 0x0000001F))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B03C, 0x00000000))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B040, 0x00100000))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B044, 0x00170013))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B048, 0x0010001F))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B04C, 0x00170013))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B050, 0x0030001F))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B054, 0x00170013))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B058, 0x0000001F))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B05C, 0x00000000))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B060, 0x00100000))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B064, 0x00170013))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B068, 0x0010001F))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B06C, 0x00170013))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B070, 0x0030001F))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B074, 0x00170013))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B078, 0x0000001F))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B07C, 0x00000000))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B080, 0x00000030))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B084, 0x00170013))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B088, 0x0010001F))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B08C, 0x00170013))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B090, 0x0030001F))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B094, 0x00170013))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B098, 0x00300010))); EXPECT_TRUE(stream->isOnMmioList(MMIOPair(0x0000B09C, 0x00300010))); } compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/compute_mode_tests_gen12lp.inl000066400000000000000000000075661363734646600310560ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/command_stream/compute_mode_tests.h" TGLLPTEST_F(ComputeModeRequirements, givenCsrRequestFlagsWithSharedHandlesWhenCommandSizeIsCalculatedThenCorrectCommandSizeIsReturned) { SetUpImpl(); using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; auto cmdsSize = sizeof(STATE_COMPUTE_MODE) + 3 * sizeof(PIPE_CONTROL) + 2 * sizeof(PIPELINE_SELECT); char buff[1024]; LinearStream stream(buff, 1024); overrideComputeModeRequest(false, false, true); auto retSize = getCsrHw()->getCmdSizeForComputeMode(); EXPECT_EQ(cmdsSize, retSize); getCsrHw()->programComputeMode(stream, flags); EXPECT_EQ(cmdsSize, stream.getUsed()); stream.replaceBuffer(buff, 1024); overrideComputeModeRequest(false, true, true); retSize = getCsrHw()->getCmdSizeForComputeMode(); EXPECT_EQ(cmdsSize, retSize); getCsrHw()->programComputeMode(stream, flags); EXPECT_EQ(cmdsSize, stream.getUsed()); stream.replaceBuffer(buff, 1024); overrideComputeModeRequest(true, true, true); retSize = getCsrHw()->getCmdSizeForComputeMode(); EXPECT_EQ(cmdsSize, retSize); getCsrHw()->programComputeMode(stream, flags); EXPECT_EQ(cmdsSize, stream.getUsed()); stream.replaceBuffer(buff, 1024); overrideComputeModeRequest(true, false, true); retSize = getCsrHw()->getCmdSizeForComputeMode(); EXPECT_EQ(cmdsSize, retSize); getCsrHw()->programComputeMode(stream, flags); EXPECT_EQ(cmdsSize, stream.getUsed()); stream.replaceBuffer(buff, 1024); overrideComputeModeRequest(false, false, true, true, 127u); retSize = getCsrHw()->getCmdSizeForComputeMode(); EXPECT_EQ(cmdsSize, retSize); getCsrHw()->programComputeMode(stream, flags); EXPECT_EQ(cmdsSize, stream.getUsed()); } TGLLPTEST_F(ComputeModeRequirements, givenCsrRequestFlagsWithoutSharedHandlesWhenCommandSizeIsCalculatedThenCorrectCommandSizeIsReturned) { SetUpImpl(); using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; auto cmdsSize = sizeof(STATE_COMPUTE_MODE) + 2 * sizeof(PIPE_CONTROL) + 2 * sizeof(PIPELINE_SELECT); char buff[1024]; LinearStream stream(buff, 1024); overrideComputeModeRequest(false, false, false); auto retSize = getCsrHw()->getCmdSizeForComputeMode(); EXPECT_EQ(0u, retSize); getCsrHw()->programComputeMode(stream, flags); EXPECT_EQ(0u, stream.getUsed()); stream.replaceBuffer(buff, 1024); overrideComputeModeRequest(true, true, false); retSize = getCsrHw()->getCmdSizeForComputeMode(); EXPECT_EQ(cmdsSize, retSize); getCsrHw()->programComputeMode(stream, flags); EXPECT_EQ(cmdsSize, stream.getUsed()); stream.replaceBuffer(buff, 1024); overrideComputeModeRequest(true, false, false); retSize = getCsrHw()->getCmdSizeForComputeMode(); EXPECT_EQ(cmdsSize, retSize); getCsrHw()->programComputeMode(stream, flags); EXPECT_EQ(cmdsSize, stream.getUsed()); stream.replaceBuffer(buff, 1024); overrideComputeModeRequest(false, false, false, true, 127u); retSize = getCsrHw()->getCmdSizeForComputeMode(); EXPECT_EQ(cmdsSize, retSize); getCsrHw()->programComputeMode(stream, flags); EXPECT_EQ(cmdsSize, stream.getUsed()); } compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/device_queue_tests_gen12lp.cpp000066400000000000000000000042211363734646600310220ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_device_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; using namespace DeviceHostQueue; GEN12LPTEST_F(DeviceQueueHwTest, givenDeviceQueueWhenRunningOnCCsThenFfidSkipOffsetIsAddedToBlockKernelStartPointer) { auto device = pContext->getDevice(0); std::unique_ptr mockParentKernel(MockParentKernel::create(*pContext)); KernelInfo *blockInfo = const_cast(mockParentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0)); blockInfo->createKernelAllocation(device->getRootDeviceIndex(), device->getMemoryManager()); ASSERT_NE(nullptr, blockInfo->getGraphicsAllocation()); const_cast(blockInfo->patchInfo.threadPayload)->OffsetToSkipSetFFIDGP = 0x1234; const_cast(device->getHardwareInfo()).platform.usRevId = REVISION_A0; uint64_t expectedOffset = blockInfo->getGraphicsAllocation()->getGpuAddressToPatch() + blockInfo->patchInfo.threadPayload->OffsetToSkipSetFFIDGP; uint64_t offset = MockDeviceQueueHw::getBlockKernelStartPointer(device->getDevice(), blockInfo, true); EXPECT_EQ(expectedOffset, offset); expectedOffset = blockInfo->getGraphicsAllocation()->getGpuAddressToPatch(); offset = MockDeviceQueueHw::getBlockKernelStartPointer(device->getDevice(), blockInfo, false); EXPECT_EQ(expectedOffset, offset); const_cast(device->getHardwareInfo()).platform.usRevId = REVISION_A0 + 1; expectedOffset = blockInfo->getGraphicsAllocation()->getGpuAddressToPatch(); offset = MockDeviceQueueHw::getBlockKernelStartPointer(device->getDevice(), blockInfo, true); EXPECT_EQ(expectedOffset, offset); offset = MockDeviceQueueHw::getBlockKernelStartPointer(device->getDevice(), blockInfo, false); EXPECT_EQ(expectedOffset, offset); } compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/enqueue_media_kernel_gen12lp.inl000066400000000000000000000162201363734646600313050ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/media_kernel_fixture.h" #include "test.h" using namespace NEO; typedef MediaKernelFixture MediaKernelTest; GEN12LPTEST_F(MediaKernelTest, givenGen12LpCsrWhenEnqueueBlockedVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) { typedef typename TGLLPFamily::PIPELINE_SELECT PIPELINE_SELECT; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; UserEvent userEvent(context); cl_event blockedEvent = &userEvent; auto retVal = pCmdQ->enqueueKernel( pVmeKernel, workDim, globalWorkOffset, globalWorkSize, nullptr, 1, &blockedEvent, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); userEvent.setStatus(CL_COMPLETE); parseCommands(*pCmdQ); ASSERT_NE(cmdPipelineSelect, nullptr); auto *pCmd = genCmdCast(cmdPipelineSelect); auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); EXPECT_FALSE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN12LPTEST_F(MediaKernelTest, givenGen12LpCsrWhenEnqueueBlockedNonVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) { typedef typename TGLLPFamily::PIPELINE_SELECT PIPELINE_SELECT; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; UserEvent userEvent(context); cl_event blockedEvent = &userEvent; auto retVal = pCmdQ->enqueueKernel( pKernel, workDim, globalWorkOffset, globalWorkSize, nullptr, 1, &blockedEvent, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); userEvent.setStatus(CL_COMPLETE); parseCommands(*pCmdQ); ASSERT_NE(cmdPipelineSelect, nullptr); auto *pCmd = genCmdCast(cmdPipelineSelect); auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); EXPECT_TRUE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN12LPTEST_F(MediaKernelTest, givenGen12LpCsrWhenEnqueueVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) { typedef typename TGLLPFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueVmeKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); auto pCmd = getCommand(); auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); EXPECT_FALSE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN12LPTEST_F(MediaKernelTest, givenGen12LpCsrWhenEnqueueNonVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) { typedef typename TGLLPFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueRegularKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); auto pCmd = getCommand(); auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); EXPECT_TRUE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN12LPTEST_F(MediaKernelTest, givenGen12LpCsrWhenEnqueueVmeKernelTwiceThenProgramPipelineSelectOnce) { typedef typename TGLLPFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueVmeKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); } GEN12LPTEST_F(MediaKernelTest, givenGen12LpCsrWhenEnqueueNonVmeKernelTwiceThenProgramPipelineSelectOnce) { typedef typename TGLLPFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueVmeKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); } GEN12LPTEST_F(MediaKernelTest, givenGen12LpCsrWhenEnqueueVmeKernelAfterNonVmeKernelThenProgramPipelineSelectionAndMediaSamplerTwice) { typedef typename TGLLPFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueRegularKernel(); enqueueVmeKernel(); auto commands = getCommandsList(); EXPECT_EQ(2u, commands.size()); auto pCmd = static_cast(commands.back()); auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_FALSE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN12LPTEST_F(MediaKernelTest, givenGen12LpCsrWhenEnqueueNonVmeKernelAfterVmeKernelThenProgramProgramPipelineSelectionAndMediaSamplerTwice) { typedef typename TGLLPFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueVmeKernel(); enqueueRegularKernel(); auto commands = getCommandsList(); EXPECT_EQ(2u, commands.size()); auto pCmd = static_cast(commands.back()); auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_TRUE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN12LPTEST_F(MediaKernelTest, givenGen12LpCsrWhenEnqueueVmeKernelThenVmeSubslicesConfigDoesntChangeToFalse) { auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); csr->lastVmeSubslicesConfig = true; enqueueVmeKernel(); EXPECT_TRUE(csr->lastVmeSubslicesConfig); } GEN12LPTEST_F(MediaKernelTest, givenGen12LpCsrWhenEnqueueVmeKernelThenVmeSubslicesConfigDoesntChangeToTrue) { auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); csr->lastVmeSubslicesConfig = false; enqueueVmeKernel(); EXPECT_FALSE(csr->lastVmeSubslicesConfig); } GEN12LPTEST_F(MediaKernelTest, givenGen12LpCmdSizeForVme) { auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); csr->lastVmeSubslicesConfig = false; EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(false)); EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(true)); csr->lastVmeSubslicesConfig = true; EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(false)); EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(true)); } compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/gen12lp_tests_wrapper.cpp000066400000000000000000000026301363734646600300410ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/gen12lp/test_preemption_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/aub_command_stream_receiver_tests_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/coherency_tests_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/command_stream_receiver_hw_tests_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/command_stream_receiver_simulated_common_hw_tests_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/compute_mode_tests_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/enqueue_media_kernel_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/hw_helper_tests_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/image_tests_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/kernel_tests_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/profiling_tests_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/sampler_tests_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/scheduler_source_tests_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/tbx_command_stream_receiver_tests_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/test_device_caps_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/test_device_queue_hw_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/test_platform_caps_gen12lp.inl" #include "opencl/test/unit_test/gen12lp/test_sample_gen12lp.inl" compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/hw_helper_tests_gen12lp.inl000066400000000000000000000245271363734646600303470ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/test/unit_test/gen12lp/special_ult_helper_gen12lp.h" #include "opencl/test/unit_test/helpers/hw_helper_tests.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "engine_node.h" using HwHelperTestGen12Lp = HwHelperTest; GEN12LPTEST_F(HwHelperTestGen12Lp, givenTglLpThenAuxTranslationIsRequired) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_TRUE(helper.requiresAuxResolves()); } GEN12LPTEST_F(HwHelperTestGen12Lp, getMaxBarriersPerSliceReturnsCorrectSize) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_EQ(32u, helper.getMaxBarrierRegisterPerSlice()); } GEN12LPTEST_F(HwHelperTestGen12Lp, givenGen12LpSkuWhenGettingCapabilityCoherencyFlagThenExpectValidValue) { auto &helper = HwHelper::get(renderCoreFamily); bool coherency = false; helper.setCapabilityCoherencyFlag(&hardwareInfo, coherency); const bool checkDone = SpecialUltHelperGen12lp::additionalCoherencyCheck(hardwareInfo.platform.eProductFamily, coherency); if (checkDone) { return; } if (hardwareInfo.platform.eProductFamily == IGFX_TIGERLAKE_LP) { hardwareInfo.platform.usRevId = 0x1; helper.setCapabilityCoherencyFlag(&hardwareInfo, coherency); EXPECT_TRUE(coherency); hardwareInfo.platform.usRevId = 0x0; helper.setCapabilityCoherencyFlag(&hardwareInfo, coherency); EXPECT_FALSE(coherency); } else { EXPECT_TRUE(coherency); } } GEN12LPTEST_F(HwHelperTestGen12Lp, getPitchAlignmentForImage) { auto &helper = HwHelper::get(renderCoreFamily); auto stepping = hardwareInfo.platform.usRevId; if (SpecialUltHelperGen12lp::shouldPerformimagePitchAlignment(hardwareInfo.platform.eProductFamily) && stepping == 0) { EXPECT_EQ(64u, helper.getPitchAlignmentForImage(&hardwareInfo)); } else { EXPECT_EQ(4u, helper.getPitchAlignmentForImage(&hardwareInfo)); } } GEN12LPTEST_F(HwHelperTestGen12Lp, adjustDefaultEngineTypeNoCcs) { hardwareInfo.featureTable.ftrCCSNode = false; auto &helper = HwHelper::get(renderCoreFamily); helper.adjustDefaultEngineType(&hardwareInfo); EXPECT_EQ(aub_stream::ENGINE_RCS, hardwareInfo.capabilityTable.defaultEngineType); } GEN12LPTEST_F(HwHelperTestGen12Lp, givenGen12LpPlatformWhenSetupHardwareCapabilitiesIsCalledThenShouldSetCorrectValues) { HardwareCapabilities hwCaps = {0}; auto &hwHelper = HwHelper::get(renderCoreFamily); hwHelper.setupHardwareCapabilities(&hwCaps, hardwareInfo); EXPECT_EQ(2048u, hwCaps.image3DMaxHeight); EXPECT_EQ(2048u, hwCaps.image3DMaxWidth); EXPECT_TRUE(hwCaps.isStatelesToStatefullWithOffsetSupported); } GEN12LPTEST_F(HwHelperTestGen12Lp, givenCompressionFtrEnabledWhenAskingForPageTableManagerThenReturnCorrectValue) { auto &helper = HwHelper::get(renderCoreFamily); hardwareInfo.capabilityTable.ftrRenderCompressedBuffers = false; hardwareInfo.capabilityTable.ftrRenderCompressedImages = false; EXPECT_FALSE(helper.isPageTableManagerSupported(hardwareInfo)); hardwareInfo.capabilityTable.ftrRenderCompressedBuffers = true; hardwareInfo.capabilityTable.ftrRenderCompressedImages = false; EXPECT_TRUE(helper.isPageTableManagerSupported(hardwareInfo)); hardwareInfo.capabilityTable.ftrRenderCompressedBuffers = false; hardwareInfo.capabilityTable.ftrRenderCompressedImages = true; EXPECT_TRUE(helper.isPageTableManagerSupported(hardwareInfo)); hardwareInfo.capabilityTable.ftrRenderCompressedBuffers = true; hardwareInfo.capabilityTable.ftrRenderCompressedImages = true; EXPECT_TRUE(helper.isPageTableManagerSupported(hardwareInfo)); } GEN12LPTEST_F(HwHelperTestGen12Lp, givenDifferentSizesOfAllocationWhenCheckingCompressionPreferenceThenReturnCorrectValue) { auto &helper = HwHelper::get(renderCoreFamily); const size_t sizesToCheck[] = {128, 256, 512, 1023, 1024, 1025}; for (size_t size : sizesToCheck) { EXPECT_FALSE(helper.obtainRenderBufferCompressionPreference(hardwareInfo, size)); } } GEN12LPTEST_F(HwHelperTestGen12Lp, givenFtrCcsNodeNotSetWhenGetGpgpuEnginesThenReturnThreeRcsEngines) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.ftrCCSNode = false; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_RCS; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(3u, device->engines.size()); auto &engines = HwHelperHw::get().getGpgpuEngineInstances(hwInfo); EXPECT_EQ(3u, engines.size()); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[0]); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[1]); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[2]); } GEN12LPTEST_F(HwHelperTestGen12Lp, givenFtrCcsNodeSetWhenGetGpgpuEnginesThenReturnTwoRcsAndCcsEngines) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.ftrCCSNode = true; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(4u, device->engines.size()); auto &engines = HwHelperHw::get().getGpgpuEngineInstances(hwInfo); EXPECT_EQ(4u, engines.size()); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[0]); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[1]); EXPECT_EQ(aub_stream::ENGINE_CCS, engines[2]); EXPECT_EQ(aub_stream::ENGINE_CCS, engines[3]); } GEN12LPTEST_F(HwHelperTestGen12Lp, givenFtrCcsNodeSetAndDefaultRcsWhenGetGpgpuEnginesThenReturnThreeRcsAndCcsEngines) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.ftrCCSNode = true; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_RCS; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); EXPECT_EQ(4u, device->engines.size()); auto &engines = HwHelperHw::get().getGpgpuEngineInstances(hwInfo); EXPECT_EQ(4u, engines.size()); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[0]); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[1]); EXPECT_EQ(aub_stream::ENGINE_RCS, engines[2]); EXPECT_EQ(aub_stream::ENGINE_CCS, engines[3]); } GEN12LPTEST_F(HwHelperTestGen12Lp, givenTgllpWhenIsFusedEuDispatchEnabledIsCalledThenResultIsCorrect) { DebugManagerStateRestore restorer; auto &helper = HwHelper::get(renderCoreFamily); auto &waTable = hardwareInfo.workaroundTable; bool wa; int32_t debugKey; size_t expectedResult; const std::array, 6> testParams{std::make_tuple(true, false, -1), std::make_tuple(false, true, -1), std::make_tuple(true, false, 0), std::make_tuple(true, true, 0), std::make_tuple(false, false, 1), std::make_tuple(false, true, 1)}; for (const auto ¶ms : testParams) { std::tie(expectedResult, wa, debugKey) = params; waTable.waDisableFusedThreadScheduling = wa; DebugManager.flags.CFEFusedEUDispatch.set(debugKey); EXPECT_EQ(expectedResult, helper.isFusedEuDispatchEnabled(hardwareInfo)); } } class HwHelperTestsGen12LpBuffer : public ::testing::Test { public: void SetUp() override { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); device = std::make_unique(Device::create(executionEnvironment, 0u)); context = std::make_unique(device.get(), true); context->contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; } cl_int retVal = CL_SUCCESS; std::unique_ptr device; std::unique_ptr context; std::unique_ptr buffer; }; GEN12LPTEST_F(HwHelperTestsGen12LpBuffer, givenCompressedBufferThenCheckResourceCompatibilityReturnsFalse) { auto &helper = HwHelper::get(renderCoreFamily); buffer.reset(Buffer::create(context.get(), 0, MemoryConstants::cacheLineSize, nullptr, retVal)); buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); EXPECT_FALSE(helper.checkResourceCompatibility(*buffer->getGraphicsAllocation())); } GEN12LPTEST_F(HwHelperTestsGen12LpBuffer, givenBufferThenCheckResourceCompatibilityReturnsTrue) { auto &helper = HwHelper::get(renderCoreFamily); buffer.reset(Buffer::create(context.get(), 0, MemoryConstants::cacheLineSize, nullptr, retVal)); buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER); EXPECT_TRUE(helper.checkResourceCompatibility(*buffer->getGraphicsAllocation())); } using LriHelperTestsGen12Lp = ::testing::Test; GEN12LPTEST_F(LriHelperTestsGen12Lp, whenProgrammingLriCommandThenExpectMmioRemapEnable) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; std::unique_ptr buffer(new uint8_t[128]); LinearStream stream(buffer.get(), 128); uint32_t address = 0x8888; uint32_t data = 0x1234; auto expectedLri = FamilyType::cmdInitLoadRegisterImm; expectedLri.setRegisterOffset(address); expectedLri.setDataDword(data); expectedLri.setMmioRemapEnable(true); auto lri = LriHelper::program(&stream, address, data); EXPECT_EQ(sizeof(MI_LOAD_REGISTER_IMM), stream.getUsed()); EXPECT_EQ(lri, stream.getCpuBase()); EXPECT_TRUE(memcmp(lri, &expectedLri, sizeof(MI_LOAD_REGISTER_IMM)) == 0); } using MemorySynchronizatiopCommandsTests = ::testing::Test; GEN12LPTEST_F(MemorySynchronizatiopCommandsTests, whenSettingCacheFlushExtraFieldsThenExpectHdcFlushSet) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; PIPE_CONTROL pipeControl = FamilyType::cmdInitPipeControl; pipeControl.setConstantCacheInvalidationEnable(true); MemorySynchronizationCommands::setExtraCacheFlushFields(&pipeControl); EXPECT_TRUE(pipeControl.getHdcPipelineFlush()); EXPECT_FALSE(pipeControl.getConstantCacheInvalidationEnable()); } compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/image_tests_gen12lp.inl000066400000000000000000000172701363734646600274510ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/image/image_surface_state.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/gen12lp/special_ult_helper_gen12lp.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_gmm.h" #include "test.h" #include "mock_gmm_client_context.h" #include using namespace NEO; typedef ::testing::Test gen12LpImageTests; GEN12LPTEST_F(gen12LpImageTests, appendSurfaceStateParamsDoesNothing) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; MockContext context; auto image = std::unique_ptr(ImageHelper::create(&context)); auto surfaceStateBefore = FamilyType::cmdInitRenderSurfaceState; auto surfaceStateAfter = FamilyType::cmdInitRenderSurfaceState; auto imageHw = static_cast *>(image.get()); EXPECT_EQ(0, memcmp(&surfaceStateBefore, &surfaceStateAfter, sizeof(RENDER_SURFACE_STATE))); imageHw->appendSurfaceStateParams(&surfaceStateAfter); EXPECT_EQ(0, memcmp(&surfaceStateBefore, &surfaceStateAfter, sizeof(RENDER_SURFACE_STATE))); } GEN12LPTEST_F(ImageClearColorFixture, givenImageForGen12LpWhenClearColorParametersAreSetThenClearColorSurfaceInSurfaceStateIsSet) { this->setUpImpl(); auto surfaceState = this->getSurfaceState(); surfaceState.setSurfaceBaseAddress(0xABCDEF1000); EXPECT_EQ(false, surfaceState.getClearValueAddressEnable()); EXPECT_EQ(0u, surfaceState.getClearColorAddress()); EXPECT_EQ(0u, surfaceState.getClearColorAddressHigh()); std::unique_ptr> imageHw(static_cast *>(ImageHelper::create(&context))); auto gmm = imageHw->getGraphicsAllocation()->getDefaultGmm(); gmm->gmmResourceInfo->getResourceFlags()->Gpu.IndirectClearColor = 1; setClearColorParams(&surfaceState, gmm); EXPECT_EQ(true, surfaceState.getClearValueAddressEnable()); EXPECT_NE(0u, surfaceState.getClearColorAddress()); EXPECT_NE(0u, surfaceState.getClearColorAddressHigh()); } GEN12LPTEST_F(ImageClearColorFixture, givenImageForGen12LpWhenCanonicalAddresForClearColorIsUsedThenItsConvertedToNonCanonicalForm) { this->setUpImpl(); auto surfaceState = this->getSurfaceState(); uint64_t canonicalAddress = 0xffffABCDABCDE000; EXPECT_THROW(surfaceState.setClearColorAddressHigh(static_cast(canonicalAddress >> 32)), std::exception); surfaceState.setSurfaceBaseAddress(canonicalAddress); std::unique_ptr> imageHw(static_cast *>(ImageHelper::create(&context))); auto gmm = imageHw->getGraphicsAllocation()->getDefaultGmm(); gmm->gmmResourceInfo->getResourceFlags()->Gpu.IndirectClearColor = 1; EXPECT_NO_THROW(setClearColorParams(&surfaceState, gmm)); uint64_t nonCanonicalAddress = ((static_cast(surfaceState.getClearColorAddressHigh()) << 32) | surfaceState.getClearColorAddress()); EXPECT_EQ(GmmHelper::decanonize(canonicalAddress), nonCanonicalAddress); } GEN12LPTEST_F(ImageClearColorFixture, givenMcsAllocationWhenSetArgIsCalledWithUnifiedAuxCapabilityAndMCSThenProgramAuxFieldsForCcs) { this->setUpImpl(); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; std::unique_ptr context(new MockContext()); McsSurfaceInfo msi = {10, 20, 3}; auto mcsAlloc = context->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context->getDevice(0)->getRootDeviceIndex(), MemoryConstants::pageSize}); cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.num_samples = 8; std::unique_ptr image(Image2dHelper<>::create(context.get(), &imgDesc)); auto surfaceState = FamilyType::cmdInitRenderSurfaceState; auto imageHw = static_cast *>(image.get()); mcsAlloc->setDefaultGmm(new Gmm(context->getDevice(0)->getGmmClientContext(), nullptr, 1, false)); surfaceState.setSurfaceBaseAddress(0xABCDEF1000); imageHw->setMcsSurfaceInfo(msi); imageHw->setMcsAllocation(mcsAlloc); auto mockResource = static_cast(mcsAlloc->getDefaultGmm()->gmmResourceInfo.get()); mockResource->setUnifiedAuxTranslationCapable(); mockResource->setMultisampleControlSurface(); EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress()); imageHw->setAuxParamsForMultisamples(&surfaceState); EXPECT_NE(0u, surfaceState.getAuxiliarySurfaceBaseAddress()); EXPECT_EQ(surfaceState.getAuxiliarySurfaceMode(), AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_MCS_LCE); } GEN12LPTEST_F(gen12LpImageTests, givenRenderCompressionSurfaceStateParamsAreSetForRenderCompression) { MockContext context; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.num_samples = 8; std::unique_ptr image(Image2dHelper<>::create(&context, &imgDesc)); auto surfaceState = FamilyType::cmdInitRenderSurfaceState; auto imageHw = static_cast *>(image.get()); imageHw->getGraphicsAllocation()->getDefaultGmm()->gmmResourceInfo->getResourceFlags()->Info.RenderCompressed = true; setAuxParamsForCCS(&surfaceState, imageHw->getGraphicsAllocation()->getDefaultGmm()); EXPECT_FALSE(surfaceState.getMemoryCompressionEnable()); EXPECT_EQ(surfaceState.getAuxiliarySurfaceMode(), RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E); } GEN12LPTEST_F(gen12LpImageTests, givenMediaCompressionSurfaceStateParamsAreSetForMediaCompression) { MockContext context; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.num_samples = 8; std::unique_ptr image(Image2dHelper<>::create(&context, &imgDesc)); auto surfaceState = FamilyType::cmdInitRenderSurfaceState; auto imageHw = static_cast *>(image.get()); imageHw->getGraphicsAllocation()->getDefaultGmm()->gmmResourceInfo->getResourceFlags()->Info.MediaCompressed = true; surfaceState.setAuxiliarySurfaceMode(RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E); setAuxParamsForCCS(&surfaceState, imageHw->getGraphicsAllocation()->getDefaultGmm()); EXPECT_TRUE(surfaceState.getMemoryCompressionEnable()); EXPECT_EQ(surfaceState.getAuxiliarySurfaceMode(), RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); } using Gen12lpRenderSurfaceStateDataTests = ::testing::Test; GEN12LPTEST_F(Gen12lpRenderSurfaceStateDataTests, WhenMemoryObjectControlStateIndexToMocsTablesIsSetThenValueIsShift) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; auto surfaceState = FamilyType::cmdInitRenderSurfaceState; uint32_t value = 4; surfaceState.setMemoryObjectControlStateIndexToMocsTables(value); EXPECT_EQ(surfaceState.TheStructure.Common.MemoryObjectControlStateIndexToMocsTables, value >> 1); EXPECT_EQ(surfaceState.getMemoryObjectControlStateIndexToMocsTables(), value); } compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/kernel_tests_gen12lp.inl000066400000000000000000000015141363734646600276410ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" using namespace NEO; using Gen12LpKernelTest = Test; GEN12LPTEST_F(Gen12LpKernelTest, givenKernelWhenCanTransformImagesIsCalledThenReturnsTrue) { MockKernelWithInternals mockKernel(*pClDevice); auto retVal = mockKernel.mockKernel->Kernel::canTransformImages(); EXPECT_FALSE(retVal); } using Gen12LpHardwareCommandsTest = testing::Test; GEN12LPTEST_F(Gen12LpHardwareCommandsTest, givenGen12LpPlatformWhenDoBindingTablePrefetchIsCalledThenReturnsTrue) { EXPECT_FALSE(HardwareCommandsHelper::doBindingTablePrefetch()); } compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/profiling_tests_gen12lp.inl000066400000000000000000000053361363734646600303600ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/enqueue_common.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" using namespace NEO; struct ProfilingTestsGen12LP : public CommandEnqueueFixture, public ::testing::Test { void SetUp() override { CommandEnqueueFixture::SetUp(CL_QUEUE_PROFILING_ENABLE); mockKernelWithInternals = std::make_unique(*pClDevice, nullptr); } void TearDown() override { CommandEnqueueFixture::TearDown(); } std::unique_ptr mockKernelWithInternals; }; GEN12LPTEST_F(ProfilingTestsGen12LP, GivenCommandQueueWithProflingWhenWalkerIsDispatchedThenTwoMiStoreRegisterMemWithMmioRemapEnableArePresentInCS) { typedef typename FamilyType::MI_STORE_REGISTER_MEM MI_STORE_REGISTER_MEM; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_event event; static_cast *>(pCmdQ)->enqueueKernel( *mockKernelWithInternals, dimensions, globalOffsets, workItems, nullptr, 0, nullptr, &event); parseCommands(*pCmdQ); // Find GPGPU_WALKER auto itorGPGPUWalkerCmd = find(cmdList.begin(), cmdList.end()); GenCmdList::reverse_iterator rItorGPGPUWalkerCmd(itorGPGPUWalkerCmd); ASSERT_NE(cmdList.end(), itorGPGPUWalkerCmd); // Check MI_STORE_REGISTER_MEMs auto itorBeforeMI = reverse_find(rItorGPGPUWalkerCmd, cmdList.rbegin()); ASSERT_NE(cmdList.rbegin(), itorBeforeMI); auto pBeforeMI = genCmdCast(*itorBeforeMI); pBeforeMI = genCmdCast(*itorBeforeMI); ASSERT_NE(nullptr, pBeforeMI); EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, pBeforeMI->getRegisterAddress()); EXPECT_TRUE(pBeforeMI->getMmioRemapEnable()); auto itorAfterMI = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterMI); auto pAfterMI = genCmdCast(*itorAfterMI); ASSERT_NE(nullptr, pAfterMI); EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, pAfterMI->getRegisterAddress()); EXPECT_TRUE(pAfterMI->getMmioRemapEnable()); ++itorAfterMI; pAfterMI = genCmdCast(*itorAfterMI); EXPECT_EQ(nullptr, pAfterMI); clReleaseEvent(event); } compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/sampler_tests_gen12lp.inl000066400000000000000000000055321363734646600300300ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/sampler/sampler.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" #include using namespace NEO; typedef Test Gen12LpSamplerTest; TGLLPTEST_F(Gen12LpSamplerTest, givenTglLpSamplerWhenUsingDefaultFilteringAndAppendSamplerStateParamsThenDisableLowQualityFilter) { EXPECT_FALSE(DebugManager.flags.ForceSamplerLowFilteringPrecision.get()); typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; auto context = clUniquePtr(new MockContext()); auto sampler = clUniquePtr(new SamplerHw(context.get(), CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST)); auto state = FamilyType::cmdInitSamplerState; EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_FILTER_DISABLE, state.getLowQualityFilter()); sampler->appendSamplerStateParams(&state); EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_FILTER_DISABLE, state.getLowQualityFilter()); } TGLLPTEST_F(Gen12LpSamplerTest, givenTglLpSamplerWhenForcingLowQualityFilteringAndAppendSamplerStateParamsThenEnableLowQualityFilter) { DebugManagerStateRestore dbgRestore; DebugManager.flags.ForceSamplerLowFilteringPrecision.set(true); EXPECT_TRUE(DebugManager.flags.ForceSamplerLowFilteringPrecision.get()); typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; auto context = clUniquePtr(new MockContext()); auto sampler = clUniquePtr(new SamplerHw(context.get(), CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST)); auto state = FamilyType::cmdInitSamplerState; EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_FILTER_DISABLE, state.getLowQualityFilter()); sampler->appendSamplerStateParams(&state); EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_FILTER_ENABLE, state.getLowQualityFilter()); } GEN12LPTEST_F(Gen12LpSamplerTest, defaultLowQualityFilter) { typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; auto state = FamilyType::cmdInitSamplerState; EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_FILTER_DISABLE, state.getLowQualityFilter()); } GEN12LPTEST_F(Gen12LpSamplerTest, givenGen12LpSamplerWhenProgrammingLowQualityCubeCornerModeThenTheModeChangesAppropriately) { typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; auto state = FamilyType::cmdInitSamplerState; EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_CUBE_CORNER_MODE_ENABLE, state.getLowQualityCubeCornerMode()); state.setLowQualityCubeCornerMode(SAMPLER_STATE::LOW_QUALITY_CUBE_CORNER_MODE_DISABLE); EXPECT_EQ(SAMPLER_STATE::LOW_QUALITY_CUBE_CORNER_MODE_DISABLE, state.getLowQualityCubeCornerMode()); } compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/scheduler_source_tests_gen12lp.inl000066400000000000000000000026371363734646600317260ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_cmds.h" #include "opencl/source/device_queue/device_queue_hw.h" // Keep the order of device_enqueue.h and scheduler_definitions.h as the latter uses defines from the first one #include "opencl/source/gen12lp/device_enqueue.h" #include "opencl/source/gen12lp/scheduler_definitions.h" #include "opencl/test/unit_test/scheduler/scheduler_source_tests.h" // Keep this include below scheduler_definitions.h and device_enqueue.h headers as it depends on defines defined in them #include "opencl/test/unit_test/scheduler/scheduler_source_tests.inl" using namespace NEO; typedef SchedulerSourceTest SchedulerSourceTestGen12; GEN12LPTEST_F(SchedulerSourceTestGen12, GivenDeviceQueueWhenCommandsSizeIsCalculatedThenItEqualsSpaceForEachEnqueueInSchedulerKernelCode) { givenDeviceQueueWhenCommandsSizeIsCalculatedThenItEqualsSpaceForEachEnqueueInSchedulerKernelCodeTest(); } GEN12LPTEST_F(SchedulerSourceTestGen12, GivenDeviceQueueWhenSlbDummyCommandsAreBuildThenSizeUsedIsCorrect) { givenDeviceQueueWhenSlbDummyCommandsAreBuildThenSizeUsedIsCorrectTest(); } GEN12LPTEST_F(SchedulerSourceTestGen12, GivenDeviceQueueThenNumberOfEnqueuesEqualsNumberOfEnqueuesInSchedulerKernelCode) { givenDeviceQueueThenNumberOfEnqueuesEqualsNumberOfEnqueuesInSchedulerKernelCodeTest(); } compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/special_ult_helper_gen12lp.cpp000066400000000000000000000013311363734646600307770ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/gen12lp/special_ult_helper_gen12lp.h" #include "shared/source/helpers/hw_info.h" namespace NEO { bool SpecialUltHelperGen12lp::additionalCoherencyCheck(PRODUCT_FAMILY productFamily, bool coherency) { return false; } bool SpecialUltHelperGen12lp::shouldPerformimagePitchAlignment(PRODUCT_FAMILY productFamily) { return true; } bool SpecialUltHelperGen12lp::shouldTestDefaultImplementationOfSetupHardwareCapabilities(PRODUCT_FAMILY productFamily) { return false; } bool SpecialUltHelperGen12lp::isPipeControlWArequired(PRODUCT_FAMILY productFamily) { return true; } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/special_ult_helper_gen12lp.h000066400000000000000000000011121363734646600304410ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "igfxfmid.h" #include namespace NEO { struct HardwareInfo; struct SpecialUltHelperGen12lp { static bool additionalCoherencyCheck(PRODUCT_FAMILY productFamily, bool coherency); static bool shouldPerformimagePitchAlignment(PRODUCT_FAMILY productFamily); static bool shouldTestDefaultImplementationOfSetupHardwareCapabilities(PRODUCT_FAMILY productFamily); static bool isPipeControlWArequired(PRODUCT_FAMILY productFamily); }; } // namespace NEO tbx_command_stream_receiver_tests_gen12lp.inl000066400000000000000000000043621363734646600340400ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/aub_mem_dump/page_table_entry_bits.h" #include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "test.h" using namespace NEO; using Gen12LPTbxCommandStreamReceiverTests = Test; GEN12LPTEST_F(Gen12LPTbxCommandStreamReceiverTests, givenNullPtrGraphicsAlloctionWhenGetPPGTTAdditionalBitsIsCalledThenAppropriateValueIsReturned) { auto tbxCsr = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); GraphicsAllocation *allocation = nullptr; auto bits = tbxCsr->getPPGTTAdditionalBits(allocation); constexpr uint64_t expectedBits = BIT(PageTableEntry::presentBit) | BIT(PageTableEntry::writableBit); EXPECT_EQ(expectedBits, bits); } GEN12LPTEST_F(Gen12LPTbxCommandStreamReceiverTests, givenGraphicsAlloctionWWhenGetPPGTTAdditionalBitsIsCalledThenAppropriateValueIsReturned) { auto tbxCsr = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); MockGraphicsAllocation allocation(nullptr, 0); auto bits = tbxCsr->getPPGTTAdditionalBits(&allocation); constexpr uint64_t expectedBits = BIT(PageTableEntry::presentBit) | BIT(PageTableEntry::writableBit); EXPECT_EQ(expectedBits, bits); } GEN12LPTEST_F(Gen12LPTbxCommandStreamReceiverTests, whenAskedForPollForCompletionParametersThenReturnCorrectValues) { class MyMockTbxHw : public TbxCommandStreamReceiverHw { public: MyMockTbxHw(ExecutionEnvironment &executionEnvironment) : TbxCommandStreamReceiverHw(executionEnvironment, 0) {} using TbxCommandStreamReceiverHw::getpollNotEqualValueForPollForCompletion; using TbxCommandStreamReceiverHw::getMaskAndValueForPollForCompletion; }; MyMockTbxHw myMockTbxHw(*pDevice->executionEnvironment); EXPECT_EQ(0x80u, myMockTbxHw.getMaskAndValueForPollForCompletion()); EXPECT_TRUE(myMockTbxHw.getpollNotEqualValueForPollForCompletion()); } compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/test_device_caps_gen12lp.inl000066400000000000000000000111361363734646600304440ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; typedef Test Gen12LpDeviceCaps; GEN12LPTEST_F(Gen12LpDeviceCaps, reportsOcl21) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_STREQ("OpenCL 2.1 NEO ", caps.clVersion); EXPECT_STREQ("OpenCL C 2.0 ", caps.clCVersion); } TGLLPTEST_F(Gen12LpDeviceCaps, lpSkusDontSupportFP64) { const auto &caps = pClDevice->getDeviceInfo(); std::string extensionString = caps.deviceExtensions; EXPECT_EQ(std::string::npos, extensionString.find(std::string("cl_khr_fp64"))); EXPECT_EQ(0u, caps.doubleFpConfig); } TGLLPTEST_F(Gen12LpDeviceCaps, givenGen12lpWhenCheckExtensionsThenSubgroupLocalBlockIOIsSupported) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_THAT(caps.deviceExtensions, testing::HasSubstr(std::string("cl_intel_subgroup_local_block_io"))); } TGLLPTEST_F(Gen12LpDeviceCaps, allSkusSupportCorrectlyRoundedDivideSqrt) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_EQ(0u, caps.singleFpConfig & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT); } GEN12LPTEST_F(Gen12LpDeviceCaps, defaultPreemptionMode) { EXPECT_EQ(PreemptionMode::ThreadGroup, pDevice->getHardwareInfo().capabilityTable.defaultPreemptionMode); } GEN12LPTEST_F(Gen12LpDeviceCaps, profilingTimerResolution) { const auto &caps = pClDevice->getSharedDeviceInfo(); EXPECT_EQ(83u, caps.outProfilingTimerResolution); } GEN12LPTEST_F(Gen12LpDeviceCaps, kmdNotifyMechanism) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableKmdNotify); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); } GEN12LPTEST_F(Gen12LpDeviceCaps, compression) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrRenderCompressedBuffers); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrRenderCompressedImages); } GEN12LPTEST_F(Gen12LpDeviceCaps, givenHwInfoWhenRequestedComputeUnitsUsedForScratchThenReturnValidValue) { const auto &hwInfo = pDevice->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); uint32_t expectedValue = hwInfo.gtSystemInfo.MaxSubSlicesSupported * hwInfo.gtSystemInfo.MaxEuPerSubSlice * 8; EXPECT_EQ(expectedValue, hwHelper.getComputeUnitsUsedForScratch(&hwInfo)); EXPECT_EQ(expectedValue, pDevice->getDeviceInfo().computeUnitsUsedForScratch); } GEN12LPTEST_F(Gen12LpDeviceCaps, givenHwInfoWhenSlmSizeIsRequiredThenReturnCorrectValue) { EXPECT_EQ(64u, pDevice->getHardwareInfo().capabilityTable.slmSize); } GEN12LPTEST_F(Gen12LpDeviceCaps, givenGen12LpWhenCheckBlitterOperationsSupportThenReturnFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.blitterOperationsSupported); } GEN12LPTEST_F(Gen12LpDeviceCaps, givenGen12LpWhenCheckingImageSupportThenReturnTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.supportsImages); } TGLLPTEST_F(Gen12LpDeviceCaps, givenTglLpWhenCheckSupportCacheFlushAfterWalkerThenFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.supportCacheFlushAfterWalker); } using TglLpUsDeviceIdTest = Test; TGLLPTEST_F(TglLpUsDeviceIdTest, isSimulationCap) { unsigned short tglLpSimulationIds[2] = { IGEN12LP_GT1_MOB_DEVICE_F0_ID, 0, // default, non-simulation }; NEO::MockDevice *mockDevice = nullptr; for (auto id : tglLpSimulationIds) { mockDevice = createWithUsDeviceId(id); ASSERT_NE(mockDevice, nullptr); if (id == 0) EXPECT_FALSE(mockDevice->isSimulation()); else EXPECT_TRUE(mockDevice->isSimulation()); delete mockDevice; } } TGLLPTEST_F(TglLpUsDeviceIdTest, GivenTGLLPWhenCheckftr64KBpagesThenTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftr64KBpages); } TGLLPTEST_F(TglLpUsDeviceIdTest, givenGen12lpWhenCheckFtrSupportsInteger64BitAtomicsThenReturnTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsInteger64BitAtomics); } compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/test_device_queue_hw_gen12lp.inl000066400000000000000000000072341363734646600313440ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/utilities/tag_allocator.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/mocks/mock_device_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; using namespace DeviceHostQueue; typedef DeviceQueueHwTest Gen12LpDeviceQueueSlb; GEN12LPTEST_F(Gen12LpDeviceQueueSlb, expectedAllocationSize) { deviceQueue = createQueueObject(); ASSERT_NE(deviceQueue, nullptr); auto expectedSize = getMinimumSlbSize(); expectedSize *= 128; //num of enqueues expectedSize += sizeof(typename FamilyType::MI_BATCH_BUFFER_START); expectedSize = alignUp(expectedSize, MemoryConstants::pageSize); expectedSize += MockDeviceQueueHw::getExecutionModelCleanupSectionSize(); expectedSize += (4 * MemoryConstants::pageSize); expectedSize = alignUp(expectedSize, MemoryConstants::pageSize); ASSERT_NE(deviceQueue->getSlbBuffer(), nullptr); EXPECT_EQ(deviceQueue->getSlbBuffer()->getUnderlyingBufferSize(), expectedSize); delete deviceQueue; } GEN12LPTEST_F(Gen12LpDeviceQueueSlb, SlbCommandsWa) { auto mockDeviceQueueHw = new MockDeviceQueueHw(pContext, device, DeviceHostQueue::deviceQueueProperties::minimumProperties[0]); EXPECT_FALSE(mockDeviceQueueHw->arbCheckWa); EXPECT_FALSE(mockDeviceQueueHw->pipeControlWa); EXPECT_FALSE(mockDeviceQueueHw->miAtomicWa); EXPECT_FALSE(mockDeviceQueueHw->lriWa); delete mockDeviceQueueHw; } GEN12LPTEST_F(Gen12LpDeviceQueueSlb, givenDeviceCommandQueueWithProfilingWhenBatchBufferIsBuiltThenOneMiStoreRegisterMemWithMmioRemapEnableIsPresent) { using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; auto mockDeviceQueueHw = new MockDeviceQueueHw(pContext, device, deviceQueueProperties::minimumProperties[0]); auto commandsSize = mockDeviceQueueHw->getMinimumSlbSize() + mockDeviceQueueHw->getWaCommandsSize(); MockParentKernel *mockParentKernel = MockParentKernel::create(*pContext); uint32_t taskCount = 7; auto hwTimeStamp = pCommandQueue->getGpgpuCommandStreamReceiver().getEventTsAllocator()->getTag(); mockDeviceQueueHw->buildSlbDummyCommands(); mockDeviceQueueHw->addExecutionModelCleanUpSection(mockParentKernel, hwTimeStamp, 0x123, taskCount); HardwareParse hwParser; auto *slbCS = mockDeviceQueueHw->getSlbCS(); size_t cleanupSectionOffset = alignUp(mockDeviceQueueHw->numberOfDeviceEnqueues * commandsSize + sizeof(MI_BATCH_BUFFER_START), MemoryConstants::pageSize); size_t cleanupSectionOffsetToParse = cleanupSectionOffset; hwParser.parseCommands(*slbCS, cleanupSectionOffsetToParse); hwParser.findHardwareCommands(); auto itorMiStore = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), itorMiStore); auto pMiStore = genCmdCast(*itorMiStore); ASSERT_NE(nullptr, pMiStore); EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, pMiStore->getRegisterAddress()); EXPECT_TRUE(pMiStore->getMmioRemapEnable()); ++itorMiStore; pMiStore = genCmdCast(*itorMiStore); EXPECT_EQ(nullptr, pMiStore); delete mockParentKernel; delete mockDeviceQueueHw; } compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/test_platform_caps_gen12lp.inl000066400000000000000000000011501363734646600310240ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "test.h" using namespace NEO; struct Gen12LpPlatformCaps : public PlatformFixture, public ::testing::Test { void SetUp() override { PlatformFixture::SetUp(); } void TearDown() override { PlatformFixture::TearDown(); } }; TGLLPTEST_F(Gen12LpPlatformCaps, lpSkusDontSupportFP64) { const auto &caps = pPlatform->getPlatformInfo(); EXPECT_EQ(std::string::npos, caps.extensions.find(std::string("cl_khr_fp64"))); } compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/test_sample_gen12lp.inl000066400000000000000000000012471363734646600274620ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; typedef Test TigerlakeLpOnlyTest; TGLLPTEST_F(TigerlakeLpOnlyTest, shouldPassOnTglLp) { EXPECT_EQ(IGFX_TIGERLAKE_LP, pDevice->getHardwareInfo().platform.eProductFamily); } typedef Test Gen12LpOnlyTeset; GEN12LPTEST_F(Gen12LpOnlyTeset, shouldPassOnGen12) { EXPECT_NE(IGFX_GEN9_CORE, pDevice->getRenderCoreFamily()); EXPECT_NE(IGFX_GEN11_CORE, pDevice->getRenderCoreFamily()); EXPECT_EQ(IGFX_GEN12LP_CORE, pDevice->getRenderCoreFamily()); } compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/tgllp/000077500000000000000000000000001363734646600242245ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/tgllp/CMakeLists.txt000066400000000000000000000010721363734646600267640ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_TGLLP) set(IGDRCL_SRCS_tests_gen12lp_tgllp ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/kernel_tests_tgllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_helper_tgllp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_tgllp.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen12lp_tgllp}) add_subdirectories() neo_copy_test_files(copy_test_files_tgllp tgllp) add_dependencies(unit_tests copy_test_files_tgllp) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/tgllp/kernel_tests_tgllp.cpp000066400000000000000000000031711363734646600306360ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" using namespace NEO; using KernelTgllpTests = ::testing::Test; TGLLPTEST_F(KernelTgllpTests, GivenUseOffsetToSkipSetFFIDGPWorkaroundActiveWhenSettingKernelStartOffsetThenAdditionalOffsetIsSet) { const uint64_t defaultKernelStartOffset = 0; const uint64_t additionalOffsetDueToFfid = 0x1234; SPatchThreadPayload threadPayload{}; threadPayload.OffsetToSkipSetFFIDGP = additionalOffsetDueToFfid; auto hwInfo = *defaultHwInfo; unsigned short steppings[] = {REVISION_A0, REVISION_A0 + 1}; for (auto stepping : steppings) { hwInfo.platform.usRevId = stepping; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); MockKernelWithInternals mockKernelWithInternals{*device}; mockKernelWithInternals.kernelInfo.patchInfo.threadPayload = &threadPayload; for (auto isCcsUsed : ::testing::Bool()) { uint64_t kernelStartOffset = mockKernelWithInternals.mockKernel->getKernelStartOffset(false, false, isCcsUsed); if (stepping == REVISION_A0 && isCcsUsed) { EXPECT_EQ(defaultKernelStartOffset + additionalOffsetDueToFfid, kernelStartOffset); } else { EXPECT_EQ(defaultKernelStartOffset, kernelStartOffset); } } } } compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/tgllp/linux/000077500000000000000000000000001363734646600253635ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/tgllp/linux/CMakeLists.txt000066400000000000000000000004431363734646600301240ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen12lp_tgllp_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen12lp_tgllp_linux}) add_subdirectories() endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/tgllp/linux/dll/000077500000000000000000000000001363734646600261365ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/tgllp/linux/dll/CMakeLists.txt000066400000000000000000000005031363734646600306740ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests_gen12lp_tgllp ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_id_tests_tgllp.cpp ) target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_gen12lp_tgllp}) compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/tgllp/linux/dll/device_id_tests_tgllp.cpp000066400000000000000000000035151363734646600332050ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_neo.h" #include "test.h" #include using namespace NEO; TEST(TglLpDeviceIdTest, supportedDeviceId) { std::array expectedDescriptors = {{ {IGEN12LP_GT1_MOB_DEVICE_F0_ID, &TGLLP_1x6x16::hwInfo, &TGLLP_1x6x16::setupHardwareInfo, GTTYPE_GT2}, {ITGL_LP_1x6x16_ULT_15W_DEVICE_F0_ID, &TGLLP_1x6x16::hwInfo, &TGLLP_1x6x16::setupHardwareInfo, GTTYPE_GT2}, {ITGL_LP_1x6x16_ULX_5_2W_DEVICE_F0_ID, &TGLLP_1x6x16::hwInfo, &TGLLP_1x6x16::setupHardwareInfo, GTTYPE_GT2}, {ITGL_LP_1x6x16_ULT_12W_DEVICE_F0_ID, &TGLLP_1x6x16::hwInfo, &TGLLP_1x6x16::setupHardwareInfo, GTTYPE_GT2}, {ITGL_LP_1x2x16_HALO_45W_DEVICE_F0_ID, &TGLLP_1x2x16::hwInfo, &TGLLP_1x2x16::setupHardwareInfo, GTTYPE_GT2}, {ITGL_LP_1x2x16_DESK_65W_DEVICE_F0_ID, &TGLLP_1x2x16::hwInfo, &TGLLP_1x2x16::setupHardwareInfo, GTTYPE_GT2}, {ITGL_LP_1x2x16_HALO_WS_45W_DEVICE_F0_ID, &TGLLP_1x2x16::hwInfo, &TGLLP_1x2x16::setupHardwareInfo, GTTYPE_GT2}, }}; auto compareStructs = [](const DeviceDescriptor *first, const DeviceDescriptor *second) { return first->deviceId == second->deviceId && first->pHwInfo == second->pHwInfo && first->setupHardwareInfo == second->setupHardwareInfo && first->eGtType == second->eGtType; }; size_t startIndex = 0; while (!compareStructs(&expectedDescriptors[0], &deviceDescriptorTable[startIndex]) && deviceDescriptorTable[startIndex].deviceId != 0) { startIndex++; }; EXPECT_NE(0u, deviceDescriptorTable[startIndex].deviceId); for (auto &expected : expectedDescriptors) { EXPECT_TRUE(compareStructs(&expected, &deviceDescriptorTable[startIndex])); startIndex++; } } compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/tgllp/test_hw_helper_tgllp.cpp000066400000000000000000000030011363734646600311400ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/helpers/hw_helper_tests.h" using HwHelperTestGen12Lp = HwHelperTest; TGLLPTEST_F(HwHelperTestGen12Lp, givenTgllpA0WhenAdjustDefaultEngineTypeCalledThenRcsIsReturned) { hardwareInfo.featureTable.ftrCCSNode = true; hardwareInfo.platform.usRevId = REVISION_A0; auto &helper = HwHelper::get(renderCoreFamily); helper.adjustDefaultEngineType(&hardwareInfo); EXPECT_EQ(aub_stream::ENGINE_RCS, hardwareInfo.capabilityTable.defaultEngineType); } TGLLPTEST_F(HwHelperTestGen12Lp, givenTgllpBWhenAdjustDefaultEngineTypeCalledThenCcsIsReturned) { hardwareInfo.featureTable.ftrCCSNode = true; hardwareInfo.platform.usRevId = REVISION_A0 + 1; auto &helper = HwHelper::get(renderCoreFamily); helper.adjustDefaultEngineType(&hardwareInfo); EXPECT_EQ(aub_stream::ENGINE_CCS, hardwareInfo.capabilityTable.defaultEngineType); } TGLLPTEST_F(HwHelperTestGen12Lp, givenTgllpWhenSteppingBellowBThenIntegerDivisionEmulationIsEnabled) { hardwareInfo.platform.usRevId = REVISION_A0; auto &helper = HwHelper::get(renderCoreFamily); EXPECT_TRUE(helper.isForceEmuInt32DivRemSPWARequired(hardwareInfo)); } TGLLPTEST_F(HwHelperTestGen12Lp, givenTgllpWhenSteppingBThenIntegerDivisionEmulationIsEnabled) { hardwareInfo.platform.usRevId = REVISION_A0 + 1; auto &helper = HwHelper::get(renderCoreFamily); EXPECT_FALSE(helper.isForceEmuInt32DivRemSPWARequired(hardwareInfo)); } compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/tgllp/test_hw_info_config_tgllp.cpp000066400000000000000000000145361363734646600321600ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/device_factory.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "test.h" using namespace NEO; using TgllpHwInfoConfig = ::testing::Test; TGLLPTEST_F(TgllpHwInfoConfig, givenHwInfoErrorneousConfigString) { HardwareInfo hwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; uint64_t config = 0xdeadbeef; gtSystemInfo = {0}; EXPECT_ANY_THROW(hardwareInfoSetup[productFamily](&hwInfo, false, config)); EXPECT_EQ(0u, gtSystemInfo.SliceCount); EXPECT_EQ(0u, gtSystemInfo.SubSliceCount); EXPECT_EQ(0u, gtSystemInfo.EUCount); } TGLLPTEST_F(TgllpHwInfoConfig, whenUsingCorrectConfigValueThenCorrectHwInfoIsReturned) { HardwareInfo hwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; uint64_t config = 0x100060010; gtSystemInfo = {0}; hardwareInfoSetup[productFamily](&hwInfo, false, config); EXPECT_EQ(1u, gtSystemInfo.SliceCount); EXPECT_EQ(6u, gtSystemInfo.DualSubSliceCount); config = 0x100020010; gtSystemInfo = {0}; hardwareInfoSetup[productFamily](&hwInfo, false, config); EXPECT_EQ(1u, gtSystemInfo.SliceCount); EXPECT_EQ(2u, gtSystemInfo.DualSubSliceCount); } using TgllpHwInfo = ::testing::Test; TGLLPTEST_F(TgllpHwInfo, givenBoolWhenCallTgllpHardwareInfoSetupThenFeatureTableAndWorkaroundTableAreSetCorrect) { static bool boolValue[]{ true, false}; HardwareInfo hwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; FeatureTable &featureTable = hwInfo.featureTable; WorkaroundTable &workaroundTable = hwInfo.workaroundTable; uint64_t configs[] = { 0x100060010, 0x100020010}; for (auto &config : configs) { for (auto setParamBool : boolValue) { gtSystemInfo = {0}; featureTable = {}; workaroundTable = {}; hardwareInfoSetup[productFamily](&hwInfo, setParamBool, config); EXPECT_EQ(setParamBool, featureTable.ftrL3IACoherency); EXPECT_EQ(setParamBool, featureTable.ftrPPGTT); EXPECT_EQ(setParamBool, featureTable.ftrSVM); EXPECT_EQ(setParamBool, featureTable.ftrIA32eGfxPTEs); EXPECT_EQ(setParamBool, featureTable.ftrStandardMipTailFormat); EXPECT_EQ(setParamBool, featureTable.ftrTranslationTable); EXPECT_EQ(setParamBool, featureTable.ftrUserModeTranslationTable); EXPECT_EQ(setParamBool, featureTable.ftrTileMappedResource); EXPECT_EQ(setParamBool, featureTable.ftrEnableGuC); EXPECT_EQ(setParamBool, featureTable.ftrFbc); EXPECT_EQ(setParamBool, featureTable.ftrFbc2AddressTranslation); EXPECT_EQ(setParamBool, featureTable.ftrFbcBlitterTracking); EXPECT_EQ(setParamBool, featureTable.ftrFbcCpuTracking); EXPECT_EQ(setParamBool, featureTable.ftrTileY); EXPECT_EQ(setParamBool, featureTable.ftrAstcHdr2D); EXPECT_EQ(setParamBool, featureTable.ftrAstcLdr2D); EXPECT_EQ(setParamBool, featureTable.ftr3dMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.ftrGpGpuMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.ftrGpGpuThreadGroupLevelPreempt); EXPECT_EQ(setParamBool, featureTable.ftrPerCtxtPreemptionGranularityControl); EXPECT_EQ(setParamBool, workaroundTable.wa4kAlignUVOffsetNV12LinearSurface); EXPECT_EQ(setParamBool, workaroundTable.waEnablePreemptionGranularityControlByUMD); EXPECT_EQ(setParamBool, workaroundTable.waUntypedBufferCompression); } } } TGLLPTEST_F(TgllpHwInfo, givenHwInfoConfigStringThenAfterSetupResultingVmeIsDisabled) { HardwareInfo hwInfo; uint64_t config = 0x100060010; hardwareInfoSetup[productFamily](&hwInfo, false, config); EXPECT_FALSE(hwInfo.capabilityTable.ftrSupportsVmeAvcTextureSampler); EXPECT_FALSE(hwInfo.capabilityTable.ftrSupportsVmeAvcPreemption); EXPECT_FALSE(hwInfo.capabilityTable.supportsVme); } TGLLPTEST_F(TgllpHwInfo, givenSetCommandStreamReceiverInAubModeForTgllpProductFamilyWhenPrepareDeviceEnvironmentsForProductFamilyOverrideIsCalledThenAubCenterIsInitializedCorrectly) { DebugManagerStateRestore stateRestore; DebugManager.flags.SetCommandStreamReceiver.set(1); DebugManager.flags.ProductFamilyOverride.set("tgllp"); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); bool success = DeviceFactory::prepareDeviceEnvironmentsForProductFamilyOverride(executionEnvironment); ASSERT_TRUE(success); auto rootDeviceEnvironment = static_cast(executionEnvironment.rootDeviceEnvironments[0].get()); EXPECT_TRUE(rootDeviceEnvironment->initAubCenterCalled); EXPECT_FALSE(rootDeviceEnvironment->localMemoryEnabledReceived); } TGLLPTEST_F(TgllpHwInfo, givenSetCommandStreamReceiverInAubModeWhenPrepareDeviceEnvironmentsForProductFamilyOverrideIsCalledThenAllRootDeviceEnvironmentMembersAreInitialized) { DebugManagerStateRestore stateRestore; auto requiredDeviceCount = 2u; DebugManager.flags.CreateMultipleRootDevices.set(requiredDeviceCount); DebugManager.flags.SetCommandStreamReceiver.set(1); DebugManager.flags.ProductFamilyOverride.set("tgllp"); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get(), true, requiredDeviceCount); bool success = DeviceFactory::prepareDeviceEnvironmentsForProductFamilyOverride(executionEnvironment); ASSERT_TRUE(success); std::set memoryOperationHandlers; for (auto rootDeviceIndex = 0u; rootDeviceIndex < requiredDeviceCount; rootDeviceIndex++) { auto rootDeviceEnvironment = static_cast(executionEnvironment.rootDeviceEnvironments[rootDeviceIndex].get()); EXPECT_TRUE(rootDeviceEnvironment->initAubCenterCalled); EXPECT_FALSE(rootDeviceEnvironment->localMemoryEnabledReceived); auto memoryOperationInterface = rootDeviceEnvironment->memoryOperationsInterface.get(); EXPECT_NE(nullptr, memoryOperationInterface); EXPECT_EQ(memoryOperationHandlers.end(), memoryOperationHandlers.find(memoryOperationInterface)); memoryOperationHandlers.insert(memoryOperationInterface); } } compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/tgllp/windows/000077500000000000000000000000001363734646600257165ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/tgllp/windows/CMakeLists.txt000066400000000000000000000005121363734646600304540ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen12_tgllp_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_tgllp.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen12_tgllp_windows}) endif() hw_info_config_tests_tgllp.cpp000066400000000000000000000016021363734646600337440ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/tgllp/windows/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/windows/os_interface.h" #include "opencl/test/unit_test/os_interface/windows/hw_info_config_win_tests.h" using namespace NEO; using namespace std; using HwInfoConfigTestWindowsTgllp = HwInfoConfigTestWindows; TGLLPTEST_F(HwInfoConfigTestWindowsTgllp, whenCallAdjustPlatformThenSetGen12LpFamily) { EXPECT_EQ(IGFX_TIGERLAKE_LP, productFamily); auto hwInfoConfig = HwInfoConfig::get(productFamily); PLATFORM *testPlatform = &outHwInfo.platform; testPlatform->eDisplayCoreFamily = IGFX_GEN11_CORE; testPlatform->eRenderCoreFamily = IGFX_GEN11_CORE; hwInfoConfig->adjustPlatformForProductFamily(&outHwInfo); EXPECT_EQ(IGFX_GEN12LP_CORE, testPlatform->eRenderCoreFamily); EXPECT_EQ(IGFX_GEN12LP_CORE, testPlatform->eDisplayCoreFamily); } compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/unit_test_helper_gen12lp.cpp000066400000000000000000000016421363734646600305160ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_info.h" #include "opencl/test/unit_test/gen12lp/special_ult_helper_gen12lp.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/helpers/unit_test_helper.inl" namespace NEO { using Family = TGLLPFamily; template <> bool UnitTestHelper::isL3ConfigProgrammable() { return false; }; template <> bool UnitTestHelper::isPageTableManagerSupported(const HardwareInfo &hwInfo) { return hwInfo.capabilityTable.ftrRenderCompressedBuffers || hwInfo.capabilityTable.ftrRenderCompressedImages; } template <> bool UnitTestHelper::isPipeControlWArequired(const HardwareInfo &hwInfo) { return SpecialUltHelperGen12lp::isPipeControlWArequired(hwInfo.platform.eProductFamily); } template struct UnitTestHelper; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/windows/000077500000000000000000000000001363734646600245745ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/windows/CMakeLists.txt000066400000000000000000000007161363734646600273400ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen12lp_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/gmm_callbacks_tests_gen12lp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_gen12lp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm_tests_gen12lp.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen12lp_windows}) add_subdirectories() endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/windows/gmm_callbacks_tests_gen12lp.cpp000066400000000000000000000127021363734646600326330ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/linear_stream.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/windows/gmm_callbacks.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/helpers/default_hw_info.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" #include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.h" #include "opencl/source/os_interface/windows/wddm_device_command_stream.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/helpers/execution_environment_helper.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "test.h" using namespace NEO; using Gen12LpGmmCallbacksTests = ::testing::Test; template struct MockAubCsrToTestNotifyAubCapture : public AUBCommandStreamReceiverHw { using AUBCommandStreamReceiverHw::AUBCommandStreamReceiverHw; using AUBCommandStreamReceiverHw::externalAllocations; }; GEN12LPTEST_F(Gen12LpGmmCallbacksTests, givenCsrWithoutAubDumpWhenNotifyAubCaptureCallbackIsCalledThenDoNothing) { HardwareInfo *hwInfo = nullptr; ExecutionEnvironment *executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); executionEnvironment->initializeMemoryManager(); auto csr = std::make_unique>(*executionEnvironment, 0); uint64_t address = 0xFEDCBA9876543210; size_t size = 1024; auto res = DeviceCallbacks::notifyAubCapture(csr.get(), address, size, true); EXPECT_EQ(1, res); } GEN12LPTEST_F(Gen12LpGmmCallbacksTests, givenWddmCsrWhenWriteL3CalledThenWriteTwoMmio) { typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); UltCommandStreamReceiver csr(*executionEnvironment, 0); uint8_t buffer[128] = {}; csr.commandStream.replaceBuffer(buffer, 128); uint64_t address = 0x00234564002BCDEC; uint64_t value = 0xFEDCBA987654321C; auto res = TTCallbacks::writeL3Address(&csr, value, address); EXPECT_EQ(1, res); EXPECT_EQ(2 * sizeof(MI_LOAD_REGISTER_IMM), csr.commandStream.getUsed()); HardwareParse hwParse; hwParse.parseCommands(csr.commandStream, 0); EXPECT_EQ(2u, hwParse.cmdList.size()); auto cmd = genCmdCast(*hwParse.cmdList.begin()); ASSERT_NE(nullptr, cmd); EXPECT_EQ(address & 0xFFFFFFFF, cmd->getRegisterOffset()); EXPECT_EQ(value & 0xFFFFFFFF, cmd->getDataDword()); cmd = genCmdCast(*(++hwParse.cmdList.begin())); ASSERT_NE(nullptr, cmd); EXPECT_EQ(address >> 32, cmd->getRegisterOffset()); EXPECT_EQ(value >> 32, cmd->getDataDword()); } GEN12LPTEST_F(Gen12LpGmmCallbacksTests, givenCcsEnabledhenWriteL3CalledThenSetRemapBit) { typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; HardwareInfo localHwInfo = *defaultHwInfo; localHwInfo.featureTable.ftrCCSNode = true; ExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(1u); executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(&localHwInfo); executionEnvironment.initializeMemoryManager(); UltCommandStreamReceiver csr(executionEnvironment, 0); uint8_t buffer[128] = {}; csr.commandStream.replaceBuffer(buffer, 128); auto res = TTCallbacks::writeL3Address(&csr, 1, 1); EXPECT_EQ(1, res); HardwareParse hwParse; hwParse.parseCommands(csr.commandStream, 0); EXPECT_EQ(2u, hwParse.cmdList.size()); auto cmd = genCmdCast(*hwParse.cmdList.begin()); ASSERT_NE(nullptr, cmd); EXPECT_TRUE(cmd->getMmioRemapEnable()); cmd = genCmdCast(*(++hwParse.cmdList.begin())); ASSERT_NE(nullptr, cmd); EXPECT_TRUE(cmd->getMmioRemapEnable()); } GEN12LPTEST_F(Gen12LpGmmCallbacksTests, givenCcsDisabledhenWriteL3CalledThenSetRemapBitToTrue) { typedef typename FamilyType::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; HardwareInfo localHwInfo = *defaultHwInfo; localHwInfo.featureTable.ftrCCSNode = false; ExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(1u); executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(&localHwInfo); executionEnvironment.initializeMemoryManager(); UltCommandStreamReceiver csr(executionEnvironment, 0); uint8_t buffer[128] = {}; csr.commandStream.replaceBuffer(buffer, 128); auto res = TTCallbacks::writeL3Address(&csr, 1, 1); EXPECT_EQ(1, res); HardwareParse hwParse; hwParse.parseCommands(csr.commandStream, 0); EXPECT_EQ(2u, hwParse.cmdList.size()); auto cmd = genCmdCast(*hwParse.cmdList.begin()); ASSERT_NE(nullptr, cmd); EXPECT_TRUE(cmd->getMmioRemapEnable()); cmd = genCmdCast(*(++hwParse.cmdList.begin())); ASSERT_NE(nullptr, cmd); EXPECT_TRUE(cmd->getMmioRemapEnable()); } compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/windows/hw_info_config_tests_gen12lp.cpp000066400000000000000000000023151363734646600330310ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/windows/os_interface.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/test/unit_test/gen12lp/special_ult_helper_gen12lp.h" #include "opencl/test/unit_test/os_interface/windows/hw_info_config_win_tests.h" using namespace NEO; using namespace std; using HwInfoConfigTestWindowsGen12lp = HwInfoConfigTestWindows; GEN12LPTEST_F(HwInfoConfigTestWindowsGen12lp, givenE2ECSetByKmdWhenConfiguringHwThenAdjustInternalImageFlag) { FeatureTable &localFeatureTable = outHwInfo.featureTable; auto hwInfoConfig = HwInfoConfig::get(productFamily); localFeatureTable.ftrE2ECompression = true; hwInfoConfig->configureHardwareCustom(&outHwInfo, nullptr); EXPECT_TRUE(outHwInfo.capabilityTable.ftrRenderCompressedBuffers); EXPECT_TRUE(outHwInfo.capabilityTable.ftrRenderCompressedImages); localFeatureTable.ftrE2ECompression = false; hwInfoConfig->configureHardwareCustom(&outHwInfo, nullptr); EXPECT_FALSE(outHwInfo.capabilityTable.ftrRenderCompressedBuffers); EXPECT_FALSE(outHwInfo.capabilityTable.ftrRenderCompressedImages); } compute-runtime-20.13.16352/opencl/test/unit_test/gen12lp/windows/wddm_tests_gen12lp.cpp000066400000000000000000000042031363734646600310040ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/preemption.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/windows/gdi_interface.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_wddm.h" #include "opencl/test/unit_test/os_interface/windows/gdi_dll_fixture.h" #include "test.h" #include "mock_gmm_memory.h" using namespace NEO; struct Gen12LpWddmTest : public GdiDllFixture, ::testing::Test { void SetUp() override { GdiDllFixture::SetUp(); executionEnvironment = std::make_unique(); rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[0].get(); rootDeviceEnvironment->initGmm(); wddm.reset(static_cast(Wddm::createWddm(nullptr, *rootDeviceEnvironment))); gmmMemory = new ::testing::NiceMock(rootDeviceEnvironment->getGmmClientContext()); wddm->gmmMemory.reset(gmmMemory); } void TearDown() override { GdiDllFixture::TearDown(); } std::unique_ptr executionEnvironment; RootDeviceEnvironment *rootDeviceEnvironment; std::unique_ptr wddm; GmockGmmMemory *gmmMemory = nullptr; }; GEN12LPTEST_F(Gen12LpWddmTest, whenConfigureDeviceAddressSpaceThenObtainMinAddress) { ON_CALL(*gmmMemory, configureDeviceAddressSpace(::testing::_, ::testing::_, ::testing::_, ::testing::_, ::testing::_)) .WillByDefault(::testing::Return(true)); uintptr_t minAddress = 0x12345u; EXPECT_NE(NEO::windowsMinAddress, minAddress); EXPECT_CALL(*gmmMemory, getInternalGpuVaRangeLimit()) .Times(1) .WillRepeatedly(::testing::Return(minAddress)); wddm->init(); EXPECT_EQ(minAddress, wddm->getWddmMinAddress()); } compute-runtime-20.13.16352/opencl/test/unit_test/gen8/000077500000000000000000000000001363734646600224735ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen8/CMakeLists.txt000066400000000000000000000023311363734646600252320ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN8) set(IGDRCL_SRCS_tests_gen8 ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/coherency_tests_gen8.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tests_gen8.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_media_kernel_gen8.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_tests_gen8.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_tests_gen8.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_tests_gen8.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_tests_gen8.cpp ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_dispatch_tests_gen8.cpp ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_source_tests_gen8.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_gen8.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_queue_hw_gen8.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_platform_caps_gen8.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sample_gen8.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unit_test_helper_gen8.cpp ) get_property(NEO_CORE_TESTS_GEN8 GLOBAL PROPERTY NEO_CORE_TESTS_GEN8) list(APPEND IGDRCL_SRCS_tests_gen8 ${NEO_CORE_TESTS_GEN8}) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen8}) add_subdirectories() endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen8/bdw/000077500000000000000000000000001363734646600232475ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen8/bdw/CMakeLists.txt000066400000000000000000000010441363734646600260060ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_BDW) set(IGDRCL_SRCS_tests_gen8_bdw ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_tests_bdw.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_bdw.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_bdw.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen8_bdw}) add_subdirectories() neo_copy_test_files(copy_test_files_bdw bdw) add_dependencies(unit_tests copy_test_files_bdw) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen8/bdw/device_tests_bdw.cpp000066400000000000000000000013051363734646600272670ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_source_level_debugger.h" #include "test.h" using namespace NEO; struct BdwDeviceTest : public DeviceFixture, public ::testing::Test { void SetUp() override { DeviceFixture::SetUp(); } void TearDown() override { DeviceFixture::TearDown(); } }; BDWTEST_F(BdwDeviceTest, givenBdwDeviceWhenAskedForClVersionThenReport21) { auto version = pClDevice->getSupportedClVersion(); EXPECT_EQ(21u, version); } compute-runtime-20.13.16352/opencl/test/unit_test/gen8/bdw/linux/000077500000000000000000000000001363734646600244065ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen8/bdw/linux/CMakeLists.txt000066400000000000000000000005251363734646600271500ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen8_bdw_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_bdw.cpp ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen8_bdw_linux}) add_subdirectory(dll) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen8/bdw/linux/dll/000077500000000000000000000000001363734646600251615ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen8/bdw/linux/dll/CMakeLists.txt000066400000000000000000000004671363734646600277300ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests_gen8_bdw ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_id_tests_bdw.cpp ) target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_gen8_bdw}) compute-runtime-20.13.16352/opencl/test/unit_test/gen8/bdw/linux/dll/device_id_tests_bdw.cpp000066400000000000000000000056741363734646600316720ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_neo.h" #include "test.h" #include using namespace NEO; TEST(BdwDeviceIdTest, supportedDeviceId) { std::array expectedDescriptors = {{ {IBDW_GT1_DESK_DEVICE_F0_ID, &BDW_1x2x6::hwInfo, &BDW_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {IBDW_GT1_HALO_MOBL_DEVICE_F0_ID, &BDW_1x2x6::hwInfo, &BDW_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {IBDW_GT1_SERV_DEVICE_F0_ID, &BDW_1x2x6::hwInfo, &BDW_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {IBDW_GT1_ULT_MOBL_DEVICE_F0_ID, &BDW_1x2x6::hwInfo, &BDW_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {IBDW_GT1_ULX_DEVICE_F0_ID, &BDW_1x2x6::hwInfo, &BDW_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {IBDW_GT1_WRK_DEVICE_F0_ID, &BDW_1x2x6::hwInfo, &BDW_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {IBDW_GT2_DESK_DEVICE_F0_ID, &BDW_1x3x8::hwInfo, &BDW_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {IBDW_GT2_HALO_MOBL_DEVICE_F0_ID, &BDW_1x3x8::hwInfo, &BDW_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {IBDW_GT2_SERV_DEVICE_F0_ID, &BDW_1x3x8::hwInfo, &BDW_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {IBDW_GT2_ULT_MOBL_DEVICE_F0_ID, &BDW_1x3x8::hwInfo, &BDW_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {IBDW_GT2_ULX_DEVICE_F0_ID, &BDW_1x3x8::hwInfo, &BDW_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {IBDW_GT2_WRK_DEVICE_F0_ID, &BDW_1x3x8::hwInfo, &BDW_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {IBDW_GT3_DESK_DEVICE_F0_ID, &BDW_2x3x8::hwInfo, &BDW_2x3x8::setupHardwareInfo, GTTYPE_GT3}, {IBDW_GT3_HALO_MOBL_DEVICE_F0_ID, &BDW_2x3x8::hwInfo, &BDW_2x3x8::setupHardwareInfo, GTTYPE_GT3}, {IBDW_GT3_SERV_DEVICE_F0_ID, &BDW_2x3x8::hwInfo, &BDW_2x3x8::setupHardwareInfo, GTTYPE_GT3}, {IBDW_GT3_ULT_MOBL_DEVICE_F0_ID, &BDW_2x3x8::hwInfo, &BDW_2x3x8::setupHardwareInfo, GTTYPE_GT3}, {IBDW_GT3_ULT25W_MOBL_DEVICE_F0_ID, &BDW_2x3x8::hwInfo, &BDW_2x3x8::setupHardwareInfo, GTTYPE_GT3}, {IBDW_GT3_ULX_DEVICE_F0_ID, &BDW_2x3x8::hwInfo, &BDW_2x3x8::setupHardwareInfo, GTTYPE_GT3}, {IBDW_GT3_WRK_DEVICE_F0_ID, &BDW_2x3x8::hwInfo, &BDW_2x3x8::setupHardwareInfo, GTTYPE_GT3}, }}; auto compareStructs = [](const DeviceDescriptor *first, const DeviceDescriptor *second) { return first->deviceId == second->deviceId && first->pHwInfo == second->pHwInfo && first->setupHardwareInfo == second->setupHardwareInfo && first->eGtType == second->eGtType; }; size_t startIndex = 0; while (!compareStructs(&expectedDescriptors[0], &deviceDescriptorTable[startIndex]) && deviceDescriptorTable[startIndex].deviceId != 0) { startIndex++; }; EXPECT_NE(0u, deviceDescriptorTable[startIndex].deviceId); for (auto &expected : expectedDescriptors) { EXPECT_TRUE(compareStructs(&expected, &deviceDescriptorTable[startIndex])); startIndex++; } } compute-runtime-20.13.16352/opencl/test/unit_test/gen8/bdw/linux/hw_info_config_tests_bdw.cpp000066400000000000000000000166351363734646600321610ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" using namespace NEO; using namespace std; struct HwInfoConfigTestLinuxBdw : HwInfoConfigTestLinux { void SetUp() override { HwInfoConfigTestLinux::SetUp(); drm->StoredDeviceID = IBDW_GT2_ULT_MOBL_DEVICE_F0_ID; drm->setGtType(GTTYPE_GT2); } }; BDWTEST_F(HwInfoConfigTestLinuxBdw, configureHwInfo) { auto hwInfoConfig = HwInfoConfig::get(productFamily); drm->StoredSSVal = 3; int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->StoredDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->StoredDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->StoredEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->StoredSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(1u, outHwInfo.gtSystemInfo.SliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); EXPECT_EQ(GTTYPE_GT2, outHwInfo.platform.eGTType); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1_5); EXPECT_EQ(1u, outHwInfo.featureTable.ftrGT2); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT3); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT4); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTA); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTC); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTX); drm->StoredDeviceID = IBDW_GT1_HALO_MOBL_DEVICE_F0_ID; drm->setGtType(GTTYPE_GT1); ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->StoredDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->StoredDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->StoredEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->StoredSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); EXPECT_EQ(GTTYPE_GT1, outHwInfo.platform.eGTType); EXPECT_EQ(1u, outHwInfo.featureTable.ftrGT1); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1_5); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT2); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT3); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT4); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTA); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTC); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTX); drm->StoredDeviceID = IBDW_GT3_ULT_MOBL_DEVICE_F0_ID; drm->setGtType(GTTYPE_GT3); drm->StoredSSVal = 6; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->StoredDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->StoredDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->StoredEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->StoredSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(2u, outHwInfo.gtSystemInfo.SliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); EXPECT_EQ(GTTYPE_GT3, outHwInfo.platform.eGTType); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1_5); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT2); EXPECT_EQ(1u, outHwInfo.featureTable.ftrGT3); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT4); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTA); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTC); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTX); } BDWTEST_F(HwInfoConfigTestLinuxBdw, negativeUnknownDevId) { drm->StoredDeviceID = 0; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } BDWTEST_F(HwInfoConfigTestLinuxBdw, negativeFailedIoctlDevId) { drm->StoredRetValForDeviceID = -2; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-2, ret); } BDWTEST_F(HwInfoConfigTestLinuxBdw, negativeFailedIoctlDevRevId) { drm->StoredRetValForDeviceRevID = -3; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-3, ret); } BDWTEST_F(HwInfoConfigTestLinuxBdw, negativeFailedIoctlEuCount) { drm->StoredRetValForEUVal = -4; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-4, ret); } BDWTEST_F(HwInfoConfigTestLinuxBdw, negativeFailedIoctlSsCount) { drm->StoredRetValForSSVal = -5; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-5, ret); } BDWTEST_F(HwInfoConfigTestLinuxBdw, configureHwInfoWaFlags) { auto hwInfoConfig = HwInfoConfig::get(productFamily); drm->StoredDeviceRevID = 0; int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); } BDWTEST_F(HwInfoConfigTestLinuxBdw, configureHwInfoEdram) { auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL(0u, outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(0u, outHwInfo.featureTable.ftrEDram); drm->StoredDeviceID = IBDW_GT3_HALO_MOBL_DEVICE_F0_ID; drm->setGtType(GTTYPE_GT3); ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL((128u * 1024u), outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(1u, outHwInfo.featureTable.ftrEDram); drm->StoredDeviceID = IBDW_GT3_SERV_DEVICE_F0_ID; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL((128u * 1024u), outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(1u, outHwInfo.featureTable.ftrEDram); } BDWTEST_F(HwInfoConfigTestLinuxBdw, whenCallAdjustPlatformThenDoNothing) { auto hwInfoConfig = HwInfoConfig::get(productFamily); outHwInfo = pInHwInfo; hwInfoConfig->adjustPlatformForProductFamily(&outHwInfo); int ret = memcmp(&outHwInfo.platform, &pInHwInfo.platform, sizeof(PLATFORM)); EXPECT_EQ(0, ret); } template class BdwHwInfoTests : public ::testing::Test { }; typedef ::testing::Types bdwTestTypes; TYPED_TEST_CASE(BdwHwInfoTests, bdwTestTypes); TYPED_TEST(BdwHwInfoTests, gtSetupIsCorrect) { HardwareInfo hwInfo; DrmMock drm; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; DeviceDescriptor device = {0, &hwInfo, &TypeParam::setupHardwareInfo, GTTYPE_GT1}; int ret = drm.setupHardwareInfo(&device, false); EXPECT_EQ(ret, 0); EXPECT_GT(gtSystemInfo.EUCount, 0u); EXPECT_GT(gtSystemInfo.ThreadCount, 0u); EXPECT_GT(gtSystemInfo.SliceCount, 0u); EXPECT_GT(gtSystemInfo.SubSliceCount, 0u); EXPECT_GT_VAL(gtSystemInfo.L3CacheSizeInKb, 0u); EXPECT_EQ(gtSystemInfo.CsrSizeInMb, 8u); EXPECT_FALSE(gtSystemInfo.IsDynamicallyPopulated); } compute-runtime-20.13.16352/opencl/test/unit_test/gen8/bdw/test_device_caps_bdw.cpp000066400000000000000000000057201363734646600301170ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; typedef Test BdwDeviceCaps; BDWTEST_F(BdwDeviceCaps, givenBdwDeviceWhenAskedForClVersionThenReport21) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_STREQ("OpenCL 2.1 NEO ", caps.clVersion); EXPECT_STREQ("OpenCL C 2.0 ", caps.clCVersion); } BDWTEST_F(BdwDeviceCaps, skuSpecificCaps) { const auto &caps = pClDevice->getDeviceInfo(); std::string extensionString = caps.deviceExtensions; EXPECT_NE(std::string::npos, extensionString.find(std::string("cl_khr_fp64"))); EXPECT_NE(0u, caps.doubleFpConfig); } BDWTEST_F(BdwDeviceCaps, allSkusSupportCorrectlyRoundedDivideSqrt) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_NE(0u, caps.singleFpConfig & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT); } BDWTEST_F(BdwDeviceCaps, defaultPreemptionMode) { EXPECT_TRUE(PreemptionMode::Disabled == pDevice->getHardwareInfo().capabilityTable.defaultPreemptionMode); } BDWTEST_F(BdwDeviceCaps, BdwProfilingTimerResolution) { const auto &caps = pDevice->getDeviceInfo(); EXPECT_EQ(80u, caps.outProfilingTimerResolution); } BDWTEST_F(BdwDeviceCaps, givenHwInfoWhenRequestedComputeUnitsUsedForScratchThenReturnValidValue) { const auto &hwInfo = pDevice->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); uint32_t expectedValue = hwInfo.gtSystemInfo.MaxSubSlicesSupported * hwInfo.gtSystemInfo.MaxEuPerSubSlice * hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount; EXPECT_EQ(expectedValue, hwHelper.getComputeUnitsUsedForScratch(&hwInfo)); EXPECT_EQ(expectedValue, pDevice->getDeviceInfo().computeUnitsUsedForScratch); } BDWTEST_F(BdwDeviceCaps, givenHwInfoWhenRequestedMaxFrontEndThreadsThenReturnValidValue) { const auto &hwInfo = pDevice->getHardwareInfo(); EXPECT_EQ(HwHelper::getMaxThreadsForVfe(hwInfo), pDevice->getDeviceInfo().maxFrontEndThreads); } typedef Test BdwUsDeviceIdTest; BDWTEST_F(BdwUsDeviceIdTest, isSimulationCap) { unsigned short bdwSimulationIds[6] = { IBDW_GT0_DESK_DEVICE_F0_ID, IBDW_GT1_DESK_DEVICE_F0_ID, IBDW_GT2_DESK_DEVICE_F0_ID, IBDW_GT3_DESK_DEVICE_F0_ID, IBDW_GT4_DESK_DEVICE_F0_ID, 0, // default, non-simulation }; NEO::MockDevice *mockDevice = nullptr; for (auto id : bdwSimulationIds) { mockDevice = createWithUsDeviceId(id); ASSERT_NE(mockDevice, nullptr); if (id == 0) EXPECT_FALSE(mockDevice->isSimulation()); else EXPECT_TRUE(mockDevice->isSimulation()); delete mockDevice; } } BDWTEST_F(BdwUsDeviceIdTest, GivenBDWWhenCheckftr64KBpagesThenFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftr64KBpages); } compute-runtime-20.13.16352/opencl/test/unit_test/gen8/bdw/test_hw_info_config_bdw.cpp000066400000000000000000000054101363734646600306240ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test.h" using namespace NEO; TEST(BdwHwInfoConfig, givenHwInfoErrorneousConfigString) { if (IGFX_BROADWELL != productFamily) { return; } HardwareInfo hwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; uint64_t config = 0xdeadbeef; gtSystemInfo = {0}; EXPECT_ANY_THROW(hardwareInfoSetup[productFamily](&hwInfo, false, config)); EXPECT_EQ(0u, gtSystemInfo.SliceCount); EXPECT_EQ(0u, gtSystemInfo.SubSliceCount); EXPECT_EQ(0u, gtSystemInfo.EUCount); } using BdwHwInfo = ::testing::Test; BDWTEST_F(BdwHwInfo, givenBoolWhenCallBdwHardwareInfoSetupThenFeatureTableAndWorkaroundTableAreSetCorrect) { uint64_t configs[] = { 0x100030008, 0x200030008, 0x100020006, 0x100030006}; bool boolValue[]{ true, false}; HardwareInfo hwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; FeatureTable &featureTable = hwInfo.featureTable; WorkaroundTable &workaroundTable = hwInfo.workaroundTable; for (auto &config : configs) { for (auto setParamBool : boolValue) { gtSystemInfo = {0}; featureTable = {}; workaroundTable = {}; hardwareInfoSetup[productFamily](&hwInfo, setParamBool, config); EXPECT_EQ(setParamBool, featureTable.ftrL3IACoherency); EXPECT_EQ(setParamBool, featureTable.ftrPPGTT); EXPECT_EQ(setParamBool, featureTable.ftrSVM); EXPECT_EQ(setParamBool, featureTable.ftrIA32eGfxPTEs); EXPECT_EQ(setParamBool, featureTable.ftrFbc); EXPECT_EQ(setParamBool, featureTable.ftrFbc2AddressTranslation); EXPECT_EQ(setParamBool, featureTable.ftrFbcBlitterTracking); EXPECT_EQ(setParamBool, featureTable.ftrFbcCpuTracking); EXPECT_EQ(setParamBool, featureTable.ftrTileY); EXPECT_EQ(setParamBool, workaroundTable.waDisableLSQCROPERFforOCL); EXPECT_EQ(setParamBool, workaroundTable.waReportPerfCountUseGlobalContextID); EXPECT_EQ(setParamBool, workaroundTable.waUseVAlign16OnTileXYBpp816); EXPECT_EQ(setParamBool, workaroundTable.waModifyVFEStateAfterGPGPUPreemption); EXPECT_EQ(setParamBool, workaroundTable.waSamplerCacheFlushBetweenRedescribedSurfaceReads); } } } BDWTEST_F(BdwHwInfo, givenHwInfoConfigStringThenAfterSetupResultingVmeIsDisabled) { HardwareInfo hwInfo; uint64_t config = 0x0; hardwareInfoSetup[productFamily](&hwInfo, false, config); EXPECT_FALSE(hwInfo.capabilityTable.ftrSupportsVmeAvcTextureSampler); EXPECT_FALSE(hwInfo.capabilityTable.ftrSupportsVmeAvcPreemption); EXPECT_FALSE(hwInfo.capabilityTable.supportsVme); } compute-runtime-20.13.16352/opencl/test/unit_test/gen8/bdw/windows/000077500000000000000000000000001363734646600247415ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen8/bdw/windows/CMakeLists.txt000066400000000000000000000006021363734646600274770ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen8_bdw_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_bdw_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_bdw.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen8_bdw_windows}) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen8/bdw/windows/hw_info_config_tests_bdw.cpp000066400000000000000000000013151363734646600325010ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/windows/os_interface.h" #include "opencl/test/unit_test/os_interface/windows/hw_info_config_win_tests.h" using namespace NEO; using namespace std; using HwInfoConfigTestWindowsBdw = HwInfoConfigTestWindows; BDWTEST_F(HwInfoConfigTestWindowsBdw, whenCallAdjustPlatformThenDoNothing) { EXPECT_EQ(IGFX_BROADWELL, productFamily); auto hwInfoConfig = HwInfoConfig::get(productFamily); outHwInfo = pInHwInfo; hwInfoConfig->adjustPlatformForProductFamily(&outHwInfo); int ret = memcmp(&outHwInfo.platform, &pInHwInfo.platform, sizeof(PLATFORM)); EXPECT_EQ(0, ret); } compute-runtime-20.13.16352/opencl/test/unit_test/gen8/bdw/windows/test_device_caps_bdw_windows.cpp000066400000000000000000000017601363734646600333630ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; typedef Test BdwDeviceCaps; BDWTEST_F(BdwDeviceCaps, kmdNotifyMechanism) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableKmdNotify); EXPECT_EQ(50000, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(5000, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(200000, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); } compute-runtime-20.13.16352/opencl/test/unit_test/gen8/coherency_tests_gen8.cpp000066400000000000000000000023051363734646600273210ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/execution_environment/execution_environment.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/helpers/dispatch_flags_helper.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "test.h" using namespace NEO; typedef ::testing::Test Gen8CoherencyRequirements; GEN8TEST_F(Gen8CoherencyRequirements, noCoherencyProgramming) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); CommandStreamReceiverHw csr(*executionEnvironment, 0); LinearStream stream; DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); auto retSize = csr.getCmdSizeForComputeMode(); EXPECT_EQ(0u, retSize); csr.programComputeMode(stream, flags); EXPECT_EQ(0u, stream.getUsed()); flags.requiresCoherency = true; retSize = csr.getCmdSizeForComputeMode(); EXPECT_EQ(0u, retSize); csr.programComputeMode(stream, flags); EXPECT_EQ(0u, stream.getUsed()); } compute-runtime-20.13.16352/opencl/test/unit_test/gen8/command_stream_receiver_hw_tests_gen8.cpp000066400000000000000000000047701363734646600327250ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/linear_stream.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/helpers/dispatch_flags_helper.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" #include "gtest/gtest.h" #include "reg_configs_common.h" using namespace NEO; #include "opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests.inl" using CommandStreamReceiverHwTestGen8 = CommandStreamReceiverHwTest; GEN8TEST_F(CommandStreamReceiverHwTestGen8, GivenKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3Config) { givenKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3ConfigImpl(); } GEN8TEST_F(CommandStreamReceiverHwTestGen8, GivenBlockedKernelWithSlmWhenPreviousNOSLML3WasSentOnThenProgramL3WithSLML3ConfigAfterUnblocking) { givenBlockedKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3ConfigAfterUnblockingImpl(); } GEN8TEST_F(CommandStreamReceiverHwTestGen8, GivenChangedL3ConfigWhenL3IsProgrammedThenClearSLMWorkAroundIsAdded) { MockCsrHw2 csr(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); csr.csrSizeRequestFlags.l3ConfigChanged = true; csr.isPreambleSent = true; size_t bufferSize = 2 * sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM) + sizeof(typename FamilyType::PIPE_CONTROL); void *buffer = alignedMalloc(bufferSize, 64); LinearStream stream(buffer, bufferSize); DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); uint32_t l3Config = 0x12345678; csr.programL3(stream, flags, l3Config); this->parseCommands(stream); typename FamilyType::PIPE_CONTROL *pc = getCommand(); ASSERT_NE(nullptr, pc); EXPECT_TRUE(pc->getProtectedMemoryDisable() != 0); typename FamilyType::MI_LOAD_REGISTER_IMM *lri = getCommand(); ASSERT_NE(nullptr, lri); EXPECT_EQ(l3Config, lri->getDataDword()); alignedFree(buffer); } compute-runtime-20.13.16352/opencl/test/unit_test/gen8/enqueue_media_kernel_gen8.cpp000066400000000000000000000040101363734646600302610ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/media_kernel_fixture.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "test.h" using namespace NEO; typedef MediaKernelFixture MediaKernelTest; GEN8TEST_F(MediaKernelTest, givenGen8CSRWhenEnqueueVmeKernelThenProgramOnlyPipelineSelection) { typedef typename BDWFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueVmeKernel(); auto pCmd = getCommand(); auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits; auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); } GEN8TEST_F(MediaKernelTest, givenGen8CsrWhenEnqueueVmeKernelThenVmeSubslicesConfigDoesntChangeToFalse) { auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); csr->lastVmeSubslicesConfig = true; enqueueVmeKernel(); EXPECT_TRUE(csr->lastVmeSubslicesConfig); } GEN8TEST_F(MediaKernelTest, givenGen8CsrWhenEnqueueVmeKernelThenVmeSubslicesConfigDoesntChangeToTrue) { auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); csr->lastVmeSubslicesConfig = false; enqueueVmeKernel(); EXPECT_FALSE(csr->lastVmeSubslicesConfig); } GEN8TEST_F(MediaKernelTest, GivenGen8WhenGettingCmdSizeForMediaSamplerThenZeroIsReturned) { auto csr = static_cast *>(&pDevice->getGpgpuCommandStreamReceiver()); csr->lastVmeSubslicesConfig = false; EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(false)); EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(true)); csr->lastVmeSubslicesConfig = true; EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(false)); EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(true)); } compute-runtime-20.13.16352/opencl/test/unit_test/gen8/hw_helper_tests_gen8.cpp000066400000000000000000000046711363734646600273270ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/memory_constants.h" #include "opencl/test/unit_test/helpers/get_gpgpu_engines_tests.inl" #include "opencl/test/unit_test/helpers/hw_helper_tests.h" using HwHelperTestGen8 = HwHelperTest; GEN8TEST_F(HwHelperTestGen8, getMaxBarriersPerSliceReturnsCorrectSize) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_EQ(16u, helper.getMaxBarrierRegisterPerSlice()); } GEN8TEST_F(HwHelperTestGen8, setCapabilityCoherencyFlag) { auto &helper = HwHelper::get(renderCoreFamily); bool coherency = false; helper.setCapabilityCoherencyFlag(&hardwareInfo, coherency); EXPECT_TRUE(coherency); } GEN8TEST_F(HwHelperTestGen8, getPitchAlignmentForImage) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_EQ(4u, helper.getPitchAlignmentForImage(&hardwareInfo)); } GEN8TEST_F(HwHelperTestGen8, adjustDefaultEngineType) { auto engineType = hardwareInfo.capabilityTable.defaultEngineType; auto &helper = HwHelper::get(renderCoreFamily); helper.adjustDefaultEngineType(&hardwareInfo); EXPECT_EQ(engineType, hardwareInfo.capabilityTable.defaultEngineType); } GEN8TEST_F(HwHelperTestGen8, givenGen8PlatformWhenSetupHardwareCapabilitiesIsCalledThenSpecificImplementationIsUsed) { auto &helper = HwHelper::get(renderCoreFamily); HardwareCapabilities hwCaps = {0}; helper.setupHardwareCapabilities(&hwCaps, hardwareInfo); EXPECT_EQ(2048u, hwCaps.image3DMaxHeight); EXPECT_EQ(2048u, hwCaps.image3DMaxWidth); EXPECT_EQ(2 * MemoryConstants::gigaByte - 8 * MemoryConstants::megaByte, hwCaps.maxMemAllocSize); EXPECT_FALSE(hwCaps.isStatelesToStatefullWithOffsetSupported); } GEN8TEST_F(HwHelperTestGen8, whenGetGpgpuEnginesThenReturnTwoThreeEngines) { whenGetGpgpuEnginesThenReturnTwoRcsEngines(pDevice->getHardwareInfo()); EXPECT_EQ(3u, pDevice->engines.size()); } using MemorySynchronizatiopCommandsTestsGen8 = ::testing::Test; GEN8TEST_F(MemorySynchronizatiopCommandsTestsGen8, WhenProgrammingCacheFlushThenExpectConstantCacheFieldSet) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; std::unique_ptr buffer(new uint8_t[128]); LinearStream stream(buffer.get(), 128); PIPE_CONTROL *pipeControl = MemorySynchronizationCommands::addFullCacheFlush(stream); EXPECT_TRUE(pipeControl->getConstantCacheInvalidationEnable()); } compute-runtime-20.13.16352/opencl/test/unit_test/gen8/image_tests_gen8.cpp000066400000000000000000000017541363734646600264330ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" using namespace NEO; typedef ::testing::Test gen8ImageTests; GEN8TEST_F(gen8ImageTests, appendSurfaceStateParamsDoesNothing) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; MockContext context; auto image = std::unique_ptr(ImageHelper::create(&context)); auto surfaceStateBefore = FamilyType::cmdInitRenderSurfaceState; auto surfaceStateAfter = FamilyType::cmdInitRenderSurfaceState; auto imageHw = static_cast *>(image.get()); EXPECT_EQ(0, memcmp(&surfaceStateBefore, &surfaceStateAfter, sizeof(RENDER_SURFACE_STATE))); imageHw->appendSurfaceStateParams(&surfaceStateAfter); EXPECT_EQ(0, memcmp(&surfaceStateBefore, &surfaceStateAfter, sizeof(RENDER_SURFACE_STATE))); } compute-runtime-20.13.16352/opencl/test/unit_test/gen8/kernel_tests_gen8.cpp000066400000000000000000000014671363734646600266320ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" using namespace NEO; using Gen8KernelTest = Test; GEN8TEST_F(Gen8KernelTest, givenKernelWhenCanTransformImagesIsCalledThenReturnsFalse) { MockKernelWithInternals mockKernel(*pClDevice); auto retVal = mockKernel.mockKernel->Kernel::canTransformImages(); EXPECT_FALSE(retVal); } using Gen8HardwareCommandsTest = testing::Test; GEN8TEST_F(Gen8HardwareCommandsTest, givenGen8PlatformWhenDoBindingTablePrefetchIsCalledThenReturnsTrue) { EXPECT_TRUE(HardwareCommandsHelper::doBindingTablePrefetch()); } compute-runtime-20.13.16352/opencl/test/unit_test/gen8/sampler_tests_gen8.cpp000066400000000000000000000021211363734646600270010ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sampler/sampler.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" #include using namespace NEO; typedef Test Gen8SamplerTest; GEN8TEST_F(Gen8SamplerTest, appendSamplerStateParamsDoesNothing) { typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; std::unique_ptr context(new MockContext()); std::unique_ptr> sampler(new SamplerHw(context.get(), CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST)); auto stateWithoutAppendedParams = FamilyType::cmdInitSamplerState; auto stateWithAppendedParams = FamilyType::cmdInitSamplerState; EXPECT_TRUE(memcmp(&stateWithoutAppendedParams, &stateWithAppendedParams, sizeof(SAMPLER_STATE)) == 0); sampler->appendSamplerStateParams(&stateWithAppendedParams); EXPECT_TRUE(memcmp(&stateWithoutAppendedParams, &stateWithAppendedParams, sizeof(SAMPLER_STATE)) == 0); } compute-runtime-20.13.16352/opencl/test/unit_test/gen8/scheduler_dispatch_tests_gen8.cpp000066400000000000000000000053161363734646600312040ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "opencl/source/command_queue/enqueue_kernel.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/device_queue/device_queue.h" #include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/scheduler/scheduler_kernel.h" #include "opencl/test/unit_test/fixtures/execution_model_fixture.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "test.h" #include "gtest/gtest.h" using namespace NEO; typedef Test BdwSchedulerTest; BDWTEST_F(BdwSchedulerTest, givenCallToDispatchSchedulerWhenPipeControlWithCSStallIsAddedThenDCFlushEnabledIsSet) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; if (pClDevice->getSupportedClVersion() >= 20) { DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); SchedulerKernel &scheduler = context->getSchedulerKernel(); size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); // Setup heaps in pCmdQ MultiDispatchInfo multiDispatchinfo(&scheduler); LinearStream &commandStream = getCommandStream(*pCmdQ, CsrDependencies(), false, false, false, multiDispatchinfo, nullptr, 0); pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, minRequiredSizeForSchedulerSSH); GpgpuWalkerHelper::dispatchScheduler( commandStream, *pDevQueueHw, pDevice->getPreemptionMode(), scheduler, &pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u), pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE), false); HardwareParse hwParser; hwParser.parseCommands(commandStream, 0); hwParser.findHardwareCommands(); ASSERT_NE(hwParser.cmdList.end(), hwParser.itorWalker); GenCmdList pcList = hwParser.getCommandsList(); EXPECT_NE(0u, pcList.size()); for (GenCmdList::iterator it = pcList.begin(); it != pcList.end(); it++) { PIPE_CONTROL *pc = (PIPE_CONTROL *)*it; ASSERT_NE(nullptr, pc); if (pc->getCommandStreamerStallEnable()) { EXPECT_TRUE(pc->getDcFlushEnable()); } } } } compute-runtime-20.13.16352/opencl/test/unit_test/gen8/scheduler_source_tests_gen8.cpp000066400000000000000000000026111363734646600307000ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen8/hw_cmds.h" #include "opencl/source/device_queue/device_queue_hw.h" // Keep the order of device_enqueue.h and scheduler_definitions.h as the latter uses defines from the first one #include "opencl/source/gen8/device_enqueue.h" #include "opencl/source/gen8/scheduler_definitions.h" #include "opencl/test/unit_test/scheduler/scheduler_source_tests.h" // Keep this include below scheduler_definitions.h and device_enqueue.h headers as it depends on defines defined in them #include "opencl/test/unit_test/scheduler/scheduler_source_tests.inl" using namespace NEO; typedef SchedulerSourceTest SchedulerSourceTestGen8; GEN8TEST_F(SchedulerSourceTestGen8, GivenDeviceQueueWhenCommandsSizeIsCalculatedThenItEqualsSpaceForEachEnqueueInSchedulerKernelCode) { givenDeviceQueueWhenCommandsSizeIsCalculatedThenItEqualsSpaceForEachEnqueueInSchedulerKernelCodeTest(); } GEN8TEST_F(SchedulerSourceTestGen8, GivenDeviceQueueWhenSlbDummyCommandsAreBuildThenSizeUsedIsCorrect) { givenDeviceQueueWhenSlbDummyCommandsAreBuildThenSizeUsedIsCorrectTest(); } GEN8TEST_F(SchedulerSourceTestGen8, GivenDeviceQueueThenNumberOfEnqueuesEqualsNumberOfEnqueuesInSchedulerKernelCode) { givenDeviceQueueThenNumberOfEnqueuesEqualsNumberOfEnqueuesInSchedulerKernelCodeTest(); } compute-runtime-20.13.16352/opencl/test/unit_test/gen8/test_device_caps_gen8.cpp000066400000000000000000000050001363734646600274170ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; typedef Test Gen8DeviceCaps; GEN8TEST_F(Gen8DeviceCaps, defaultPreemptionMode) { EXPECT_TRUE(PreemptionMode::Disabled == pDevice->getHardwareInfo().capabilityTable.defaultPreemptionMode); } GEN8TEST_F(Gen8DeviceCaps, kmdNotifyMechanism) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableKmdNotify); EXPECT_EQ(50000, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(5000, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(200000, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); } GEN8TEST_F(Gen8DeviceCaps, compression) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrRenderCompressedBuffers); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrRenderCompressedImages); } GEN8TEST_F(Gen8DeviceCaps, image3DDimensions) { const auto &caps = pClDevice->getDeviceInfo(); const auto &sharedCaps = pDevice->getDeviceInfo(); EXPECT_EQ(2048u, caps.image3DMaxWidth); EXPECT_EQ(2048u, sharedCaps.image3DMaxDepth); EXPECT_EQ(2048u, caps.image3DMaxHeight); } GEN8TEST_F(Gen8DeviceCaps, givenHwInfoWhenSlmSizeIsRequiredThenReturnCorrectValue) { EXPECT_EQ(64u, pDevice->getHardwareInfo().capabilityTable.slmSize); } GEN8TEST_F(Gen8DeviceCaps, givenGen8WhenCheckSupportCacheFlushAfterWalkerThenFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.supportCacheFlushAfterWalker); } GEN8TEST_F(Gen8DeviceCaps, givenGen8WhenCheckBlitterOperationsSupportThenReturnFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.blitterOperationsSupported); } GEN8TEST_F(Gen8DeviceCaps, givenGen8WhenCheckFtrSupportsInteger64BitAtomicsThenReturnTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsInteger64BitAtomics); } GEN8TEST_F(Gen8DeviceCaps, givenGen8WhenCheckingImageSupportThenReturnTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.supportsImages); } compute-runtime-20.13.16352/opencl/test/unit_test/gen8/test_device_queue_hw_gen8.cpp000066400000000000000000000061011363734646600303160ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/context/context.h" #include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/mocks/mock_device_queue.h" using namespace NEO; using namespace DeviceHostQueue; typedef DeviceQueueHwTest Gen8DeviceQueueSlb; GEN8TEST_F(Gen8DeviceQueueSlb, expectedAllocationSize) { deviceQueue = createQueueObject(); ASSERT_NE(deviceQueue, nullptr); auto expectedSize = getMinimumSlbSize() + sizeof(typename FamilyType::MI_ATOMIC) + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM) + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM); expectedSize *= 128; //num of enqueues expectedSize += sizeof(typename FamilyType::MI_BATCH_BUFFER_START); expectedSize = alignUp(expectedSize, MemoryConstants::pageSize); expectedSize += MockDeviceQueueHw::getExecutionModelCleanupSectionSize(); expectedSize += (4 * MemoryConstants::pageSize); expectedSize = alignUp(expectedSize, MemoryConstants::pageSize); ASSERT_NE(deviceQueue->getSlbBuffer(), nullptr); EXPECT_EQ(deviceQueue->getSlbBuffer()->getUnderlyingBufferSize(), expectedSize); delete deviceQueue; } GEN8TEST_F(Gen8DeviceQueueSlb, SlbCommandsWa) { auto mockDeviceQueueHw = new MockDeviceQueueHw(pContext, device, DeviceHostQueue::deviceQueueProperties::minimumProperties[0]); EXPECT_FALSE(mockDeviceQueueHw->arbCheckWa); EXPECT_FALSE(mockDeviceQueueHw->pipeControlWa); EXPECT_TRUE(mockDeviceQueueHw->miAtomicWa); EXPECT_TRUE(mockDeviceQueueHw->lriWa); delete mockDeviceQueueHw; } GEN8TEST_F(Gen8DeviceQueueSlb, addProfilingEndcmds) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto mockDeviceQueueHw = new MockDeviceQueueHw(pContext, device, DeviceHostQueue::deviceQueueProperties::minimumProperties[0]); uint64_t timestampAddress = 0x12345678555500; uint32_t timestampAddressLow = (uint32_t)(timestampAddress & 0xFFFFFFFF); uint32_t timestampAddressHigh = (uint32_t)(timestampAddress >> 32); mockDeviceQueueHw->addProfilingEndCmds(timestampAddress); HardwareParse hwParser; auto *slbCS = mockDeviceQueueHw->getSlbCS(); hwParser.parseCommands(*slbCS, 0); auto pipeControlItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), pipeControlItor); PIPE_CONTROL *pipeControl = (PIPE_CONTROL *)*pipeControlItor; uint32_t postSyncOp = (uint32_t)PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP; EXPECT_EQ(postSyncOp, (uint32_t)pipeControl->getPostSyncOperation()); EXPECT_EQ(timestampAddressLow, pipeControl->getAddress()); EXPECT_EQ(timestampAddressHigh, pipeControl->getAddressHigh()); delete mockDeviceQueueHw; } compute-runtime-20.13.16352/opencl/test/unit_test/gen8/test_platform_caps_gen8.cpp000066400000000000000000000011401363734646600300050ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "test.h" using namespace NEO; struct Gen8PlatformCaps : public PlatformFixture, public ::testing::Test { void SetUp() override { PlatformFixture::SetUp(); } void TearDown() override { PlatformFixture::TearDown(); } }; BDWTEST_F(Gen8PlatformCaps, allBdwSkusSupportFP64) { const auto &caps = pPlatform->getPlatformInfo(); EXPECT_NE(std::string::npos, caps.extensions.find(std::string("cl_khr_fp64"))); } compute-runtime-20.13.16352/opencl/test/unit_test/gen8/test_sample_gen8.cpp000066400000000000000000000011151363734646600264360ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; typedef Test BroadwellOnlyTest; BDWTEST_F(BroadwellOnlyTest, shouldPassOnBdw) { EXPECT_EQ(IGFX_BROADWELL, pDevice->getHardwareInfo().platform.eProductFamily); } typedef Test Gen8OnlyTest; GEN8TEST_F(Gen8OnlyTest, shouldPassOnGen8) { EXPECT_EQ(IGFX_GEN8_CORE, pDevice->getRenderCoreFamily()); EXPECT_NE(IGFX_GEN9_CORE, pDevice->getRenderCoreFamily()); } compute-runtime-20.13.16352/opencl/test/unit_test/gen8/unit_test_helper_gen8.cpp000066400000000000000000000005161363734646600274770ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen8/hw_info.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/helpers/unit_test_helper.inl" namespace NEO { template struct UnitTestHelper; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/gen8/windows/000077500000000000000000000000001363734646600241655ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen8/windows/CMakeLists.txt000066400000000000000000000004721363734646600267300ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen8_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/gmm_callbacks_tests_gen8.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen8_windows}) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen8/windows/gmm_callbacks_tests_gen8.cpp000066400000000000000000000010201363734646600316040ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/windows/gmm_callbacks.h" #include "test.h" using namespace NEO; typedef ::testing::Test Gen8GmmCallbacksTests; GEN8TEST_F(Gen8GmmCallbacksTests, notSupportedDeviceCallback) { EXPECT_EQ(0, DeviceCallbacks::notifyAubCapture(nullptr, 0, 0, false)); } GEN8TEST_F(Gen8GmmCallbacksTests, notSupportedTTCallback) { EXPECT_EQ(0, TTCallbacks::writeL3Address(nullptr, 1, 2)); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/000077500000000000000000000000001363734646600224745ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen9/CMakeLists.txt000066400000000000000000000025051363734646600252360ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN9) set(IGDRCL_SRCS_tests_gen9 ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_platform_ids_tests_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/coherency_tests_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tests_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_kernel_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_media_kernel_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_tests_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_tests_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_tests_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_tests_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_source_tests_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sip_tests_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_queue_hw_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_platform_caps_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sample_gen9.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unit_test_helper_gen9.cpp ) get_property(NEO_CORE_TESTS_GEN9 GLOBAL PROPERTY NEO_CORE_TESTS_GEN9) list(APPEND IGDRCL_SRCS_tests_gen9 ${NEO_CORE_TESTS_GEN9}) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9}) add_subdirectories() endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen9/bxt/000077500000000000000000000000001363734646600232715ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen9/bxt/CMakeLists.txt000066400000000000000000000010441363734646600260300ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_BXT) set(IGDRCL_SRCS_tests_gen9_bxt ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_tests_bxt.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_bxt.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_bxt.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_bxt}) add_subdirectories() neo_copy_test_files(copy_test_files_bxt bxt) add_dependencies(unit_tests copy_test_files_bxt) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen9/bxt/device_tests_bxt.cpp000066400000000000000000000010731363734646600273340ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; typedef Test DeviceTest; BXTTEST_F(DeviceTest, getSupportedClVersion12Device) { auto version = pClDevice->getSupportedClVersion(); EXPECT_EQ(12u, version); } BXTTEST_F(DeviceTest, givenBxtDeviceWhenAskedForProflingTimerResolutionThen52IsReturned) { auto resolution = pDevice->getProfilingTimerResolution(); EXPECT_DOUBLE_EQ(52.083, resolution); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/bxt/linux/000077500000000000000000000000001363734646600244305ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen9/bxt/linux/CMakeLists.txt000066400000000000000000000005211363734646600271660ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen9_bxt_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests.cpp ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_bxt_linux}) add_subdirectory(dll) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen9/bxt/linux/dll/000077500000000000000000000000001363734646600252035ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen9/bxt/linux/dll/CMakeLists.txt000066400000000000000000000004631363734646600277460ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests_gen9_bxt ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_id_tests.cpp ) target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_gen9_bxt}) compute-runtime-20.13.16352/opencl/test/unit_test/gen9/bxt/linux/dll/device_id_tests.cpp000066400000000000000000000034421363734646600310470ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_neo.h" #include "test.h" #include using namespace NEO; TEST(BxtDeviceIdTest, supportedDeviceId) { std::array expectedDescriptors = {{ {IBXT_A_DEVICE_F0_ID, &BXT_1x3x6::hwInfo, &BXT_1x3x6::setupHardwareInfo, GTTYPE_GTA}, {IBXT_C_DEVICE_F0_ID, &BXT_1x3x6::hwInfo, &BXT_1x3x6::setupHardwareInfo, GTTYPE_GTA}, {IBXT_GT_3x6_DEVICE_ID, &BXT_1x3x6::hwInfo, &BXT_1x3x6::setupHardwareInfo, GTTYPE_GTA}, {IBXT_P_3x6_DEVICE_ID, &BXT_1x3x6::hwInfo, &BXT_1x3x6::setupHardwareInfo, GTTYPE_GTA}, {IBXT_P_12EU_3x6_DEVICE_ID, &BXT_1x2x6::hwInfo, &BXT_1x2x6::setupHardwareInfo, GTTYPE_GTA}, {IBXT_PRO_12EU_3x6_DEVICE_ID, &BXT_1x2x6::hwInfo, &BXT_1x2x6::setupHardwareInfo, GTTYPE_GTA}, {IBXT_PRO_3x6_DEVICE_ID, &BXT_1x3x6::hwInfo, &BXT_1x3x6::setupHardwareInfo, GTTYPE_GTA}, {IBXT_X_DEVICE_F0_ID, &BXT_1x3x6::hwInfo, &BXT_1x3x6::setupHardwareInfo, GTTYPE_GTA}, }}; auto compareStructs = [](const DeviceDescriptor *first, const DeviceDescriptor *second) { return first->deviceId == second->deviceId && first->pHwInfo == second->pHwInfo && first->setupHardwareInfo == second->setupHardwareInfo && first->eGtType == second->eGtType; }; size_t startIndex = 0; while (!compareStructs(&expectedDescriptors[0], &deviceDescriptorTable[startIndex]) && deviceDescriptorTable[startIndex].deviceId != 0) { startIndex++; }; EXPECT_NE(0u, deviceDescriptorTable[startIndex].deviceId); for (auto &expected : expectedDescriptors) { EXPECT_TRUE(compareStructs(&expected, &deviceDescriptorTable[startIndex])); startIndex++; } } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/bxt/linux/hw_info_config_tests.cpp000066400000000000000000000261161363734646600313420ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" using namespace NEO; using namespace std; struct HwInfoConfigTestLinuxBxt : HwInfoConfigTestLinux { void SetUp() override { HwInfoConfigTestLinux::SetUp(); drm->StoredDeviceID = IBXT_P_3x6_DEVICE_ID; drm->setGtType(GTTYPE_GTA); drm->StoredEUVal = 18; drm->StoredHasPooledEU = 1; drm->StoredMinEUinPool = 3; } }; BXTTEST_F(HwInfoConfigTestLinuxBxt, configureHwInfo) { drm->StoredDeviceRevID = 0; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->StoredDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->StoredDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->StoredEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->StoredSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ((unsigned int)drm->StoredHasPooledEU, outHwInfo.featureTable.ftrPooledEuEnabled); EXPECT_EQ((uint32_t)drm->StoredMinEUinPool, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ((outHwInfo.gtSystemInfo.EUCount - outHwInfo.gtSystemInfo.EuCountPerPoolMin), outHwInfo.gtSystemInfo.EuCountPerPoolMax); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGttCacheInvalidation); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); //constant sysInfo/ftr flags EXPECT_EQ(1u, outHwInfo.gtSystemInfo.VEBoxInfo.Instances.Bits.VEBox0Enabled); EXPECT_TRUE(outHwInfo.gtSystemInfo.VEBoxInfo.IsValid); EXPECT_EQ(GTTYPE_GTA, outHwInfo.platform.eGTType); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1_5); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT2); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT3); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT4); EXPECT_EQ(1u, outHwInfo.featureTable.ftrGTA); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTC); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTX); drm->StoredDeviceID = IBXT_P_12EU_3x6_DEVICE_ID; drm->setGtType(GTTYPE_GTC); //IBXT_P_12EU_3x6_DEVICE_ID is GTA, but fot test sake make it GTC drm->StoredMinEUinPool = 6; drm->StoredDeviceRevID = 4; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->StoredDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->StoredDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->StoredEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->StoredSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ((unsigned int)drm->StoredHasPooledEU, outHwInfo.featureTable.ftrPooledEuEnabled); EXPECT_EQ((uint32_t)drm->StoredMinEUinPool, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ((outHwInfo.gtSystemInfo.EUCount - outHwInfo.gtSystemInfo.EuCountPerPoolMin), outHwInfo.gtSystemInfo.EuCountPerPoolMax); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); EXPECT_EQ(GTTYPE_GTC, outHwInfo.platform.eGTType); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1_5); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT2); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT3); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT4); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTA); EXPECT_EQ(1u, outHwInfo.featureTable.ftrGTC); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTX); drm->StoredDeviceID = IBXT_P_12EU_3x6_DEVICE_ID; drm->setGtType(GTTYPE_GTX); //IBXT_P_12EU_3x6_DEVICE_ID is GTA, but fot test sake make it GTX drm->StoredMinEUinPool = 9; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->StoredDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->StoredDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->StoredEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->StoredSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ((unsigned int)drm->StoredHasPooledEU, outHwInfo.featureTable.ftrPooledEuEnabled); EXPECT_EQ((uint32_t)drm->StoredMinEUinPool, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ((outHwInfo.gtSystemInfo.EUCount - outHwInfo.gtSystemInfo.EuCountPerPoolMin), outHwInfo.gtSystemInfo.EuCountPerPoolMax); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); EXPECT_EQ(GTTYPE_GTX, outHwInfo.platform.eGTType); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1_5); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT2); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT3); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT4); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTA); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTC); EXPECT_EQ(1u, outHwInfo.featureTable.ftrGTX); auto &outKmdNotifyProperties = outHwInfo.capabilityTable.kmdNotifyProperties; EXPECT_TRUE(outKmdNotifyProperties.enableKmdNotify); EXPECT_EQ(50000, outKmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_TRUE(outKmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(5000, outKmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_TRUE(outKmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(200000, outKmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); } BXTTEST_F(HwInfoConfigTestLinuxBxt, negativeUnknownDevId) { drm->StoredDeviceID = 0; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } BXTTEST_F(HwInfoConfigTestLinuxBxt, negativeFailedIoctlDevId) { drm->StoredRetValForDeviceID = -2; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-2, ret); } BXTTEST_F(HwInfoConfigTestLinuxBxt, negativeFailedIoctlDevRevId) { drm->StoredRetValForDeviceRevID = -3; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-3, ret); } BXTTEST_F(HwInfoConfigTestLinuxBxt, negativeFailedIoctlEuCount) { drm->StoredRetValForEUVal = -4; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-4, ret); } BXTTEST_F(HwInfoConfigTestLinuxBxt, negativeFailedIoctlSsCount) { drm->StoredRetValForSSVal = -5; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-5, ret); } BXTTEST_F(HwInfoConfigTestLinuxBxt, configureHwInfoFailingEnabledPool) { drm->StoredRetValForPooledEU = -1; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(0u, outHwInfo.featureTable.ftrPooledEuEnabled); EXPECT_EQ(0u, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ(0u, outHwInfo.gtSystemInfo.EuCountPerPoolMax); } BXTTEST_F(HwInfoConfigTestLinuxBxt, configureHwInfoDisabledEnabledPool) { drm->StoredHasPooledEU = 0; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(0u, outHwInfo.featureTable.ftrPooledEuEnabled); EXPECT_EQ(0u, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ(0u, outHwInfo.gtSystemInfo.EuCountPerPoolMax); } BXTTEST_F(HwInfoConfigTestLinuxBxt, configureHwInfoFailingMinEuInPool) { drm->StoredRetValForMinEUinPool = -1; drm->StoredSSVal = 3; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(1u, outHwInfo.featureTable.ftrPooledEuEnabled); EXPECT_EQ(9u, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ((outHwInfo.gtSystemInfo.EUCount - outHwInfo.gtSystemInfo.EuCountPerPoolMin), outHwInfo.gtSystemInfo.EuCountPerPoolMax); drm->StoredSSVal = 2; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(1u, outHwInfo.featureTable.ftrPooledEuEnabled); EXPECT_EQ(3u, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ((outHwInfo.gtSystemInfo.EUCount - outHwInfo.gtSystemInfo.EuCountPerPoolMin), outHwInfo.gtSystemInfo.EuCountPerPoolMax); } BXTTEST_F(HwInfoConfigTestLinuxBxt, configureHwInfoInvalidMinEuInPool) { drm->StoredMinEUinPool = 4; drm->StoredSSVal = 3; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(1u, outHwInfo.featureTable.ftrPooledEuEnabled); EXPECT_EQ(9u, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ((outHwInfo.gtSystemInfo.EUCount - outHwInfo.gtSystemInfo.EuCountPerPoolMin), outHwInfo.gtSystemInfo.EuCountPerPoolMax); drm->StoredSSVal = 2; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(1u, outHwInfo.featureTable.ftrPooledEuEnabled); EXPECT_EQ(3u, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ((outHwInfo.gtSystemInfo.EUCount - outHwInfo.gtSystemInfo.EuCountPerPoolMin), outHwInfo.gtSystemInfo.EuCountPerPoolMax); } BXTTEST_F(HwInfoConfigTestLinuxBxt, configureHwInfoWaFlags) { auto hwInfoConfig = HwInfoConfig::get(productFamily); drm->StoredDeviceRevID = 0; int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); } BXTTEST_F(HwInfoConfigTestLinuxBxt, whenCallAdjustPlatformThenDoNothing) { auto hwInfoConfig = HwInfoConfig::get(productFamily); outHwInfo = pInHwInfo; hwInfoConfig->adjustPlatformForProductFamily(&outHwInfo); int ret = memcmp(&outHwInfo.platform, &pInHwInfo.platform, sizeof(PLATFORM)); EXPECT_EQ(0, ret); } template class BxtHwInfoTests : public ::testing::Test { }; typedef ::testing::Types bxtTestTypes; TYPED_TEST_CASE(BxtHwInfoTests, bxtTestTypes); TYPED_TEST(BxtHwInfoTests, gtSetupIsCorrect) { HardwareInfo hwInfo; DrmMock drm; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; DeviceDescriptor device = {0, &hwInfo, &TypeParam::setupHardwareInfo, GTTYPE_GT1}; int ret = drm.setupHardwareInfo(&device, false); EXPECT_EQ(ret, 0); EXPECT_GT(gtSystemInfo.EUCount, 0u); EXPECT_GT(gtSystemInfo.ThreadCount, 0u); EXPECT_GT(gtSystemInfo.SliceCount, 0u); EXPECT_GT(gtSystemInfo.SubSliceCount, 0u); EXPECT_GT_VAL(gtSystemInfo.L3CacheSizeInKb, 0u); EXPECT_EQ(gtSystemInfo.CsrSizeInMb, 8u); EXPECT_FALSE(gtSystemInfo.IsDynamicallyPopulated); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/bxt/test_device_caps_bxt.cpp000066400000000000000000000043171363734646600301630ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; typedef Test BxtDeviceCaps; BXTTEST_F(BxtDeviceCaps, reportsOcl12) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_STREQ("OpenCL 1.2 NEO ", caps.clVersion); EXPECT_STREQ("OpenCL C 1.2 ", caps.clCVersion); } BXTTEST_F(BxtDeviceCaps, BxtProfilingTimerResolution) { const auto &caps = pDevice->getDeviceInfo(); EXPECT_EQ(52u, caps.outProfilingTimerResolution); } BXTTEST_F(BxtDeviceCaps, BxtClVersionSupport) { const auto &caps = pClDevice->getDeviceInfo(); const auto &sharedCaps = pDevice->getDeviceInfo(); EXPECT_STREQ("OpenCL 1.2 NEO ", caps.clVersion); EXPECT_STREQ("OpenCL C 1.2 ", caps.clCVersion); auto memoryManager = pDevice->getMemoryManager(); if (is64bit) { EXPECT_TRUE(memoryManager->peekForce32BitAllocations()); EXPECT_TRUE(sharedCaps.force32BitAddressess); } else { EXPECT_FALSE(memoryManager->peekForce32BitAllocations()); EXPECT_FALSE(sharedCaps.force32BitAddressess); } } BXTTEST_F(BxtDeviceCaps, BxtSvmCapabilities) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_EQ(0u, caps.svmCapabilities); } BXTTEST_F(BxtDeviceCaps, GivenBXTWhenCheckftr64KBpagesThenFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftr64KBpages); } BXTTEST_F(BxtDeviceCaps, givenBXTWhenCheckFtrSupportsInteger64BitAtomicsThenReturnFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsInteger64BitAtomics); } typedef Test BxtUsDeviceIdTest; BXTTEST_F(BxtUsDeviceIdTest, isSimulationCap) { unsigned short bxtSimulationIds[3] = { IBXT_A_DEVICE_F0_ID, IBXT_C_DEVICE_F0_ID, 0, // default, non-simulation }; NEO::MockDevice *mockDevice = nullptr; for (auto id : bxtSimulationIds) { mockDevice = createWithUsDeviceId(id); ASSERT_NE(mockDevice, nullptr); if (id == 0) EXPECT_FALSE(mockDevice->isSimulation()); else EXPECT_TRUE(mockDevice->isSimulation()); delete mockDevice; } } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/bxt/test_hw_info_config_bxt.cpp000066400000000000000000000075521363734646600307000ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test.h" using namespace NEO; TEST(BxtHwInfoConfig, givenHwInfoErrorneousConfigString) { if (IGFX_BROXTON != productFamily) { return; } HardwareInfo hwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; uint64_t config = 0xdeadbeef; gtSystemInfo = {0}; EXPECT_ANY_THROW(hardwareInfoSetup[productFamily](&hwInfo, false, config)); EXPECT_EQ(0u, gtSystemInfo.SliceCount); EXPECT_EQ(0u, gtSystemInfo.SubSliceCount); EXPECT_EQ(0u, gtSystemInfo.EUCount); } using BxtHwInfo = ::testing::Test; BXTTEST_F(BxtHwInfo, givenBoolWhenCallBxtHardwareInfoSetupThenFeatureTableAndWorkaroundTableAreSetCorrect) { uint64_t configs[] = { 0x100020006, 0x100030006}; bool boolValue[]{ true, false}; HardwareInfo hwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; FeatureTable &featureTable = hwInfo.featureTable; WorkaroundTable &workaroundTable = hwInfo.workaroundTable; PLATFORM &platform = hwInfo.platform; for (auto &config : configs) { for (auto setParamBool : boolValue) { gtSystemInfo = {0}; featureTable = {}; workaroundTable = {}; platform.usRevId = 9; hardwareInfoSetup[productFamily](&hwInfo, setParamBool, config); EXPECT_EQ(setParamBool, featureTable.ftrGpGpuMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.ftrGpGpuThreadGroupLevelPreempt); EXPECT_EQ(setParamBool, featureTable.ftrL3IACoherency); EXPECT_EQ(setParamBool, featureTable.ftrVEBOX); EXPECT_EQ(setParamBool, featureTable.ftrULT); EXPECT_EQ(setParamBool, featureTable.ftrGpGpuMidThreadLevelPreempt); EXPECT_EQ(setParamBool, featureTable.ftr3dMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.ftr3dObjectLevelPreempt); EXPECT_EQ(setParamBool, featureTable.ftrPerCtxtPreemptionGranularityControl); EXPECT_EQ(setParamBool, featureTable.ftrLCIA); EXPECT_EQ(setParamBool, featureTable.ftrPPGTT); EXPECT_EQ(setParamBool, featureTable.ftrIA32eGfxPTEs); EXPECT_EQ(setParamBool, featureTable.ftrDisplayYTiling); EXPECT_EQ(setParamBool, featureTable.ftrTranslationTable); EXPECT_EQ(setParamBool, featureTable.ftrUserModeTranslationTable); EXPECT_EQ(setParamBool, featureTable.ftrEnableGuC); EXPECT_EQ(setParamBool, featureTable.ftrFbc); EXPECT_EQ(setParamBool, featureTable.ftrFbc2AddressTranslation); EXPECT_EQ(setParamBool, featureTable.ftrFbcBlitterTracking); EXPECT_EQ(setParamBool, featureTable.ftrFbcCpuTracking); EXPECT_EQ(setParamBool, featureTable.ftrTileY); EXPECT_EQ(setParamBool, featureTable.ftrGttCacheInvalidation); EXPECT_EQ(setParamBool, workaroundTable.waLLCCachingUnsupported); EXPECT_EQ(setParamBool, workaroundTable.waMsaa8xTileYDepthPitchAlignment); EXPECT_EQ(setParamBool, workaroundTable.waFbcLinearSurfaceStride); EXPECT_EQ(setParamBool, workaroundTable.wa4kAlignUVOffsetNV12LinearSurface); EXPECT_EQ(setParamBool, workaroundTable.waEnablePreemptionGranularityControlByUMD); EXPECT_EQ(setParamBool, workaroundTable.waSendMIFLUSHBeforeVFE); EXPECT_EQ(setParamBool, workaroundTable.waForcePcBbFullCfgRestore); EXPECT_EQ(setParamBool, workaroundTable.waReportPerfCountUseGlobalContextID); EXPECT_EQ(setParamBool, workaroundTable.waSamplerCacheFlushBetweenRedescribedSurfaceReads); platform.usRevId = 1; featureTable = {}; hardwareInfoSetup[productFamily](&hwInfo, true, config); EXPECT_EQ(false, featureTable.ftrGttCacheInvalidation); } } } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/bxt/windows/000077500000000000000000000000001363734646600247635ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen9/bxt/windows/CMakeLists.txt000066400000000000000000000006021363734646600275210ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen9_bxt_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_bxt_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_bxt.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_bxt_windows}) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen9/bxt/windows/hw_info_config_tests_bxt.cpp000066400000000000000000000013131363734646600325420ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/windows/os_interface.h" #include "opencl/test/unit_test/os_interface/windows/hw_info_config_win_tests.h" using namespace NEO; using namespace std; using HwInfoConfigTestWindowsBxt = HwInfoConfigTestWindows; BXTTEST_F(HwInfoConfigTestWindowsBxt, whenCallAdjustPlatformThenDoNothing) { EXPECT_EQ(IGFX_BROXTON, productFamily); auto hwInfoConfig = HwInfoConfig::get(productFamily); outHwInfo = pInHwInfo; hwInfoConfig->adjustPlatformForProductFamily(&outHwInfo); int ret = memcmp(&outHwInfo.platform, &pInHwInfo.platform, sizeof(PLATFORM)); EXPECT_EQ(0, ret); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/bxt/windows/test_device_caps_bxt_windows.cpp000066400000000000000000000017651363734646600334330ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; typedef Test BxtDeviceCapsWindows; BXTTEST_F(BxtDeviceCapsWindows, givenHwInfoWhenAskedForKmdNotifyMechanismThenReturnCorrectValues) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableKmdNotify); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/cfl/000077500000000000000000000000001363734646600232405ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen9/cfl/CMakeLists.txt000066400000000000000000000007571363734646600260110ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_CFL) set(IGDRCL_SRCS_tests_gen9_cfl ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_cfl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_cfl.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_cfl}) add_subdirectories() neo_copy_test_files(copy_test_files_cfl cfl) add_dependencies(unit_tests copy_test_files_cfl) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen9/cfl/linux/000077500000000000000000000000001363734646600243775ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen9/cfl/linux/CMakeLists.txt000066400000000000000000000005251363734646600271410ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen9_cfl_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_cfl.cpp ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_cfl_linux}) add_subdirectory(dll) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen9/cfl/linux/dll/000077500000000000000000000000001363734646600251525ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen9/cfl/linux/dll/CMakeLists.txt000066400000000000000000000004671363734646600277210ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests_gen9_cfl ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_id_tests_cfl.cpp ) target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_gen9_cfl}) compute-runtime-20.13.16352/opencl/test/unit_test/gen9/cfl/linux/dll/device_id_tests_cfl.cpp000066400000000000000000000126361363734646600316470ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_neo.h" #include "test.h" #include using namespace NEO; TEST(CflDeviceIdTest, supportedDeviceId) { std::array expectedDescriptors = {{ {ICFL_GT1_DT_DEVICE_F0_ID, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {ICFL_GT1_S41_DT_DEVICE_F0_ID, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {ICFL_GT1_S61_DT_DEVICE_F0_ID, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {ICFL_GT1_41F_2F1F_ULT_DEVICE_F0_ID, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {ICFL_GT1_S6_S4_S2_F1F_DT_DEVICE_F0_ID, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {ICFL_GT1_U41F_U2F1F_ULT_DEVICE_F0_ID, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {ICFL_GT2_DT_DEVICE_F0_ID, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ICFL_GT2_HALO_DEVICE_F0_ID, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ICFL_GT2_HALO_WS_DEVICE_F0_ID, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ICFL_GT2_S42_DT_DEVICE_F0_ID, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ICFL_GT2_S62_DT_DEVICE_F0_ID, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ICFL_GT2_SERV_DEVICE_F0_ID, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ICFL_GT2_S82_S6F2_DT_DEVICE_F0_ID, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ICFL_GT2_U42F_U2F1F_ULT_DEVICE_F0_ID, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ICFL_GT2_U42F_U2F2F_ULT_DEVICE_F0_ID, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ICFL_GT2_U42F_U2F2_ULT_DEVICE_F0_ID, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ICFL_GT2_S8_S2_DT_DEVICE_F0_ID, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ICFL_HALO_DEVICE_F0_ID, &CFL_2x3x8::hwInfo, &CFL_2x3x8::setupHardwareInfo, GTTYPE_GT3}, {ICFL_GT3_ULT_15W_DEVICE_F0_ID, &CFL_2x3x8::hwInfo, &CFL_2x3x8::setupHardwareInfo, GTTYPE_GT3}, {ICFL_GT3_ULT_15W_42EU_DEVICE_F0_ID, &CFL_2x3x8::hwInfo, &CFL_2x3x8::setupHardwareInfo, GTTYPE_GT3}, {ICFL_GT3_ULT_28W_DEVICE_F0_ID, &CFL_2x3x8::hwInfo, &CFL_2x3x8::setupHardwareInfo, GTTYPE_GT3}, {ICFL_GT3_ULT_DEVICE_F0_ID, &CFL_2x3x8::hwInfo, &CFL_2x3x8::setupHardwareInfo, GTTYPE_GT3}, {ICFL_GT3_U43_ULT_DEVICE_F0_ID, &CFL_2x3x8::hwInfo, &CFL_2x3x8::setupHardwareInfo, GTTYPE_GT3}, // CML GT1 {ICFL_GT1_ULT_DEVICE_V0_ID, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {ICFL_GT1_ULT_DEVICE_A0_ID, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {ICFL_GT1_ULT_DEVICE_S0_ID, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {ICFL_GT1_ULT_DEVICE_K0_ID, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {ICFL_GT1_ULX_DEVICE_S0_ID, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {ICFL_GT1_DT_DEVICE_P0_ID, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {ICFL_GT1_DT_DEVICE_G0_ID, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {ICFL_GT1_HALO_DEVICE_16_ID, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {ICFL_GT1_HALO_DEVICE_18_ID, &CFL_1x2x6::hwInfo, &CFL_1x2x6::setupHardwareInfo, GTTYPE_GT1}, // CML GT2 {ICFL_GT2_ULT_DEVICE_V0_ID, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ICFL_GT2_ULT_DEVICE_A0_ID, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ICFL_GT2_ULT_DEVICE_S0_ID, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ICFL_GT2_ULT_DEVICE_K0_ID, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ICFL_GT2_ULX_DEVICE_S0_ID, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ICFL_GT2_DT_DEVICE_P0_ID, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ICFL_GT2_DT_DEVICE_G0_ID, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ICFL_GT2_HALO_DEVICE_15_ID, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ICFL_GT2_HALO_DEVICE_17_ID, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, // CML WORKSTATION {ICFL_GT2_WKS_DEVICE_P0_ID, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ICFL_GT2_WKS_DEVICE_G0_ID, &CFL_1x3x8::hwInfo, &CFL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, }}; auto compareStructs = [](const DeviceDescriptor *first, const DeviceDescriptor *second) { return first->deviceId == second->deviceId && first->pHwInfo == second->pHwInfo && first->setupHardwareInfo == second->setupHardwareInfo && first->eGtType == second->eGtType; }; size_t startIndex = 0; while (!compareStructs(&expectedDescriptors[0], &deviceDescriptorTable[startIndex]) && deviceDescriptorTable[startIndex].deviceId != 0) { startIndex++; }; EXPECT_NE(0u, deviceDescriptorTable[startIndex].deviceId); for (auto &expected : expectedDescriptors) { EXPECT_TRUE(compareStructs(&expected, &deviceDescriptorTable[startIndex])); startIndex++; } } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/cfl/linux/hw_info_config_tests_cfl.cpp000066400000000000000000000201151363734646600321260ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" using namespace NEO; using namespace std; struct HwInfoConfigTestLinuxCfl : HwInfoConfigTestLinux { void SetUp() override { HwInfoConfigTestLinux::SetUp(); drm->StoredDeviceID = ICFL_GT2_DT_DEVICE_F0_ID; drm->setGtType(GTTYPE_GT2); } }; CFLTEST_F(HwInfoConfigTestLinuxCfl, configureHwInfo) { auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->StoredDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->StoredDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->StoredEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->StoredSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); EXPECT_EQ(GTTYPE_GT2, outHwInfo.platform.eGTType); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1_5); EXPECT_EQ(1u, outHwInfo.featureTable.ftrGT2); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT3); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT4); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTA); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTC); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTX); //constant sysInfo/ftr flags EXPECT_EQ(1u, outHwInfo.gtSystemInfo.VEBoxInfo.Instances.Bits.VEBox0Enabled); EXPECT_TRUE(outHwInfo.gtSystemInfo.VEBoxInfo.IsValid); drm->StoredDeviceID = ICFL_GT1_DT_DEVICE_F0_ID; drm->StoredSSVal = 3; drm->setGtType(GTTYPE_GT1); ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->StoredDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->StoredDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->StoredEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->StoredSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(1u, outHwInfo.gtSystemInfo.SliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); EXPECT_EQ(GTTYPE_GT1, outHwInfo.platform.eGTType); EXPECT_EQ(1u, outHwInfo.featureTable.ftrGT1); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1_5); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT2); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT3); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT4); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTA); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTC); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTX); drm->StoredDeviceID = ICFL_GT3_ULT_DEVICE_F0_ID; drm->StoredSSVal = 6; drm->setGtType(GTTYPE_GT3); ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->StoredDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->StoredDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->StoredEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->StoredSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(2u, outHwInfo.gtSystemInfo.SliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); EXPECT_EQ(GTTYPE_GT3, outHwInfo.platform.eGTType); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1_5); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT2); EXPECT_EQ(1u, outHwInfo.featureTable.ftrGT3); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT4); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTA); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTC); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTX); auto &outKmdNotifyProperties = outHwInfo.capabilityTable.kmdNotifyProperties; EXPECT_TRUE(outKmdNotifyProperties.enableKmdNotify); EXPECT_EQ(50000, outKmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_TRUE(outKmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(5000, outKmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_TRUE(outKmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(200000, outKmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); } CFLTEST_F(HwInfoConfigTestLinuxCfl, negativeUnknownDevId) { drm->StoredDeviceID = 0; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } CFLTEST_F(HwInfoConfigTestLinuxCfl, negativeFailedIoctlDevId) { drm->StoredRetValForDeviceID = -2; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-2, ret); } CFLTEST_F(HwInfoConfigTestLinuxCfl, negativeFailedIoctlDevRevId) { drm->StoredRetValForDeviceRevID = -3; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-3, ret); } CFLTEST_F(HwInfoConfigTestLinuxCfl, negativeFailedIoctlEuCount) { drm->StoredRetValForEUVal = -4; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-4, ret); } CFLTEST_F(HwInfoConfigTestLinuxCfl, negativeFailedIoctlSsCount) { drm->StoredRetValForSSVal = -5; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-5, ret); } CFLTEST_F(HwInfoConfigTestLinuxCfl, configureHwInfoWaFlags) { auto hwInfoConfig = HwInfoConfig::get(productFamily); drm->StoredDeviceRevID = 0; int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); } CFLTEST_F(HwInfoConfigTestLinuxCfl, configureHwInfoEdram) { auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL(0u, outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(0u, outHwInfo.featureTable.ftrEDram); drm->StoredDeviceID = ICFL_GT3_ULT_28W_DEVICE_F0_ID; drm->setGtType(GTTYPE_GT3); ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL((64u * 1024u), outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(1u, outHwInfo.featureTable.ftrEDram); drm->StoredDeviceID = ICFL_GT3_ULT_15W_DEVICE_F0_ID; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL((64u * 1024u), outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(1u, outHwInfo.featureTable.ftrEDram); } CFLTEST_F(HwInfoConfigTestLinuxCfl, whenCallAdjustPlatformThenDoNothing) { auto hwInfoConfig = HwInfoConfig::get(productFamily); outHwInfo = pInHwInfo; hwInfoConfig->adjustPlatformForProductFamily(&outHwInfo); int ret = memcmp(&outHwInfo.platform, &pInHwInfo.platform, sizeof(PLATFORM)); EXPECT_EQ(0, ret); } template class CflHwInfoTests : public ::testing::Test { }; typedef ::testing::Types cflTestTypes; TYPED_TEST_CASE(CflHwInfoTests, cflTestTypes); TYPED_TEST(CflHwInfoTests, gtSetupIsCorrect) { HardwareInfo hwInfo; DrmMock drm; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; DeviceDescriptor device = {0, &hwInfo, &TypeParam::setupHardwareInfo, GTTYPE_GT1}; int ret = drm.setupHardwareInfo(&device, false); EXPECT_EQ(ret, 0); EXPECT_GT(gtSystemInfo.EUCount, 0u); EXPECT_GT(gtSystemInfo.ThreadCount, 0u); EXPECT_GT(gtSystemInfo.SliceCount, 0u); EXPECT_GT(gtSystemInfo.SubSliceCount, 0u); EXPECT_GT_VAL(gtSystemInfo.L3CacheSizeInKb, 0u); EXPECT_EQ(gtSystemInfo.CsrSizeInMb, 8u); EXPECT_FALSE(gtSystemInfo.IsDynamicallyPopulated); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/cfl/test_device_caps_cfl.cpp000066400000000000000000000013721363734646600300770ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; typedef Test CflDeviceCaps; CFLTEST_F(CflDeviceCaps, reportsOcl21) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_STREQ("OpenCL 2.1 NEO ", caps.clVersion); EXPECT_STREQ("OpenCL C 2.0 ", caps.clCVersion); } CFLTEST_F(CflDeviceCaps, GivenCFLWhenCheckftr64KBpagesThenTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftr64KBpages); } CFLTEST_F(CflDeviceCaps, givenCflWhenCheckFtrSupportsInteger64BitAtomicsThenReturnTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsInteger64BitAtomics); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/cfl/test_hw_info_config_cfl.cpp000066400000000000000000000067111363734646600306120ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test.h" using namespace NEO; TEST(CflHwInfoConfig, givenHwInfoErrorneousConfigString) { if (IGFX_COFFEELAKE != productFamily) { return; } HardwareInfo hwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; uint64_t config = 0xdeadbeef; gtSystemInfo = {0}; EXPECT_ANY_THROW(hardwareInfoSetup[productFamily](&hwInfo, false, config)); EXPECT_EQ(0u, gtSystemInfo.SliceCount); EXPECT_EQ(0u, gtSystemInfo.SubSliceCount); EXPECT_EQ(0u, gtSystemInfo.EUCount); } using CflHwInfo = ::testing::Test; CFLTEST_F(CflHwInfo, givenBoolWhenCallCflHardwareInfoSetupThenFeatureTableAndWorkaroundTableAreSetCorrect) { uint64_t configs[] = { 0x100030008, 0x200030008, 0x300030008, 0x100020006, 0x100030006}; bool boolValue[]{ true, false}; HardwareInfo hwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; FeatureTable &featureTable = hwInfo.featureTable; WorkaroundTable &workaroundTable = hwInfo.workaroundTable; for (auto &config : configs) { for (auto setParamBool : boolValue) { gtSystemInfo = {0}; featureTable = {}; workaroundTable = {}; hardwareInfoSetup[productFamily](&hwInfo, setParamBool, config); EXPECT_EQ(setParamBool, featureTable.ftrGpGpuMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.ftrGpGpuThreadGroupLevelPreempt); EXPECT_EQ(setParamBool, featureTable.ftrL3IACoherency); EXPECT_EQ(setParamBool, featureTable.ftrVEBOX); EXPECT_EQ(setParamBool, featureTable.ftrGpGpuMidThreadLevelPreempt); EXPECT_EQ(setParamBool, featureTable.ftr3dMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.ftr3dObjectLevelPreempt); EXPECT_EQ(setParamBool, featureTable.ftrPerCtxtPreemptionGranularityControl); EXPECT_EQ(setParamBool, featureTable.ftrPPGTT); EXPECT_EQ(setParamBool, featureTable.ftrSVM); EXPECT_EQ(setParamBool, featureTable.ftrIA32eGfxPTEs); EXPECT_EQ(setParamBool, featureTable.ftrDisplayYTiling); EXPECT_EQ(setParamBool, featureTable.ftrTranslationTable); EXPECT_EQ(setParamBool, featureTable.ftrUserModeTranslationTable); EXPECT_EQ(setParamBool, featureTable.ftrEnableGuC); EXPECT_EQ(setParamBool, featureTable.ftrFbc); EXPECT_EQ(setParamBool, featureTable.ftrFbc2AddressTranslation); EXPECT_EQ(setParamBool, featureTable.ftrFbcBlitterTracking); EXPECT_EQ(setParamBool, featureTable.ftrFbcCpuTracking); EXPECT_EQ(setParamBool, featureTable.ftrTileY); EXPECT_EQ(setParamBool, workaroundTable.waEnablePreemptionGranularityControlByUMD); EXPECT_EQ(setParamBool, workaroundTable.waSendMIFLUSHBeforeVFE); EXPECT_EQ(setParamBool, workaroundTable.waReportPerfCountUseGlobalContextID); EXPECT_EQ(setParamBool, workaroundTable.waMsaa8xTileYDepthPitchAlignment); EXPECT_EQ(setParamBool, workaroundTable.waLosslessCompressionSurfaceStride); EXPECT_EQ(setParamBool, workaroundTable.waFbcLinearSurfaceStride); EXPECT_EQ(setParamBool, workaroundTable.wa4kAlignUVOffsetNV12LinearSurface); EXPECT_EQ(setParamBool, workaroundTable.waSamplerCacheFlushBetweenRedescribedSurfaceReads); } } } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/cfl/windows/000077500000000000000000000000001363734646600247325ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen9/cfl/windows/CMakeLists.txt000066400000000000000000000006021363734646600274700ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen9_cfl_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_cfl_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_cfl.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_cfl_windows}) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen9/cfl/windows/hw_info_config_tests_cfl.cpp000066400000000000000000000013161363734646600324630ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/windows/os_interface.h" #include "opencl/test/unit_test/os_interface/windows/hw_info_config_win_tests.h" using namespace NEO; using namespace std; using HwInfoConfigTestWindowsCfl = HwInfoConfigTestWindows; CFLTEST_F(HwInfoConfigTestWindowsCfl, whenCallAdjustPlatformThenDoNothing) { EXPECT_EQ(IGFX_COFFEELAKE, productFamily); auto hwInfoConfig = HwInfoConfig::get(productFamily); outHwInfo = pInHwInfo; hwInfoConfig->adjustPlatformForProductFamily(&outHwInfo); int ret = memcmp(&outHwInfo.platform, &pInHwInfo.platform, sizeof(PLATFORM)); EXPECT_EQ(0, ret); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/cfl/windows/test_device_caps_cfl_windows.cpp000066400000000000000000000017651363734646600333510ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; typedef Test CflDeviceCapsWindows; CFLTEST_F(CflDeviceCapsWindows, givenHwInfoWhenAskedForKmdNotifyMechanismThenReturnCorrectValues) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableKmdNotify); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/cl_get_platform_ids_tests_gen9.cpp000066400000000000000000000047521363734646600313540ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/root_device.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/hw_info_config.h" #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; typedef api_tests clGetPlatformIDsTests; TEST(clGetPlatformIDsMultiPlatformTest, whenCreateDevicesWithDifferentProductFamilyThenClGetPlatformIdsCreatesMultiplePlatformsProperlySorted) { if ((HwInfoConfig::get(IGFX_SKYLAKE) == nullptr) || (HwInfoConfig::get(IGFX_KABYLAKE) == nullptr)) { GTEST_SKIP(); } DebugManagerStateRestore restorer; const size_t numRootDevices = 2u; DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices); VariableBackup createFuncBackup{&DeviceFactory::createRootDeviceFunc}; DeviceFactory::createRootDeviceFunc = [](ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) -> std::unique_ptr { auto device = std::unique_ptr(Device::create(&executionEnvironment, rootDeviceIndex)); auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); if (rootDeviceIndex == 0) { hwInfo->platform.eProductFamily = IGFX_SKYLAKE; } else { hwInfo->platform.eProductFamily = IGFX_KABYLAKE; } return device; }; platformsImpl.clear(); cl_int retVal = CL_SUCCESS; cl_platform_id platformsRet[2]; cl_uint numPlatforms = 0; retVal = clGetPlatformIDs(0, nullptr, &numPlatforms); EXPECT_EQ(2u, numPlatforms); EXPECT_EQ(CL_SUCCESS, retVal); numPlatforms = 0u; retVal = clGetPlatformIDs(2u, platformsRet, &numPlatforms); EXPECT_EQ(2u, numPlatforms); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, platformsRet[0]); auto platform0 = castToObject(platformsRet[0]); EXPECT_EQ(1u, platform0->getNumDevices()); EXPECT_EQ(IGFX_KABYLAKE, platform0->getClDevice(0)->getHardwareInfo().platform.eProductFamily); EXPECT_EQ(1u, platform0->getClDevice(0)->getRootDeviceIndex()); EXPECT_NE(nullptr, platformsRet[1]); auto platform1 = castToObject(platformsRet[1]); EXPECT_EQ(1u, platform1->getNumDevices()); EXPECT_EQ(IGFX_SKYLAKE, platform1->getClDevice(0)->getHardwareInfo().platform.eProductFamily); EXPECT_EQ(0u, platform1->getClDevice(0)->getRootDeviceIndex()); platformsImpl.clear(); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/coherency_tests_gen9.cpp000066400000000000000000000023051363734646600273230ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/execution_environment/execution_environment.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/helpers/dispatch_flags_helper.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "test.h" using namespace NEO; typedef ::testing::Test Gen9CoherencyRequirements; GEN9TEST_F(Gen9CoherencyRequirements, noCoherencyProgramming) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); CommandStreamReceiverHw csr(*executionEnvironment, 0); LinearStream stream; DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); auto retSize = csr.getCmdSizeForComputeMode(); EXPECT_EQ(0u, retSize); csr.programComputeMode(stream, flags); EXPECT_EQ(0u, stream.getUsed()); flags.requiresCoherency = true; retSize = csr.getCmdSizeForComputeMode(); EXPECT_EQ(0u, retSize); csr.programComputeMode(stream, flags); EXPECT_EQ(0u, stream.getUsed()); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/command_stream_receiver_hw_tests_gen9.cpp000066400000000000000000000053351363734646600327250ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/linear_stream.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/helpers/dispatch_flags_helper.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" #include "gtest/gtest.h" #include "reg_configs_common.h" using namespace NEO; #include "opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests.inl" using CommandStreamReceiverHwTestGen9 = CommandStreamReceiverHwTest; GEN9TEST_F(UltCommandStreamReceiverTest, whenPreambleIsProgrammedThenStateSipCmdIsNotPresentInPreambleCmdStream) { using STATE_SIP = typename FamilyType::STATE_SIP; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.isPreambleSent = false; pDevice->setPreemptionMode(PreemptionMode::Disabled); pDevice->setDebuggerActive(true); uint32_t newL3Config; DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); auto cmdSizePreamble = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice); StackVec preambleBuffer; preambleBuffer.resize(cmdSizePreamble); LinearStream preambleStream(&*preambleBuffer.begin(), preambleBuffer.size()); commandStreamReceiver.programPreamble(preambleStream, *pDevice, dispatchFlags, newL3Config); this->parseCommands(preambleStream); auto itorStateSip = find(this->cmdList.begin(), this->cmdList.end()); EXPECT_EQ(this->cmdList.end(), itorStateSip); pDevice->setDebuggerActive(false); } GEN9TEST_F(CommandStreamReceiverHwTestGen9, GivenKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3Config) { givenKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3ConfigImpl(); } GEN9TEST_F(CommandStreamReceiverHwTestGen9, GivenBlockedKernelWithSlmWhenPreviousNOSLML3WasSentOnThenProgramL3WithSLML3ConfigAfterUnblocking) { givenBlockedKernelWithSlmWhenPreviousNOSLML3WasSentThenProgramL3WithSLML3ConfigAfterUnblockingImpl(); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/enqueue_kernel_gen9.cpp000066400000000000000000000050211363734646600271270ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/reg_configs.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/helpers/static_size3.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" namespace NEO { using Gen9EnqueueTest = Test; GEN9TEST_F(Gen9EnqueueTest, givenKernelRequiringIndependentForwardProgressWhenKernelIsSubmittedThenRoundRobinPolicyIsProgrammed) { MockContext mc; CommandQueueHw cmdQ{&mc, pClDevice, 0, false}; SPatchExecutionEnvironment executionEnvironment = {}; executionEnvironment.SubgroupIndependentForwardProgressRequired = true; MockKernelWithInternals mockKernel(*pClDevice, executionEnvironment); cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, StatickSize3<1, 1, 1>(), nullptr, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(cmdQ); auto cmd = findMmioCmd(hwParser.cmdList.begin(), hwParser.cmdList.end(), DebugControlReg2::address); ASSERT_NE(nullptr, cmd); EXPECT_EQ(DebugControlReg2::getRegData(PreambleHelper::getDefaultThreadArbitrationPolicy()), cmd->getDataDword()); EXPECT_EQ(1U, countMmio(hwParser.cmdList.begin(), hwParser.cmdList.end(), DebugControlReg2::address)); } GEN9TEST_F(Gen9EnqueueTest, givenKernelNotRequiringIndependentForwardProgressWhenKernelIsSubmittedThenAgeBasedPolicyIsProgrammed) { MockContext mc; CommandQueueHw cmdQ{&mc, pClDevice, 0, false}; SPatchExecutionEnvironment executionEnvironment = {}; executionEnvironment.SubgroupIndependentForwardProgressRequired = false; MockKernelWithInternals mockKernel(*pClDevice, executionEnvironment); cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, StatickSize3<1, 1, 1>(), nullptr, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(cmdQ); auto cmd = findMmioCmd(hwParser.cmdList.begin(), hwParser.cmdList.end(), DebugControlReg2::address); ASSERT_NE(nullptr, cmd); EXPECT_EQ(DebugControlReg2::getRegData(ThreadArbitrationPolicy::AgeBased), cmd->getDataDword()); EXPECT_EQ(1U, countMmio(hwParser.cmdList.begin(), hwParser.cmdList.end(), DebugControlReg2::address)); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/gen9/enqueue_media_kernel_gen9.cpp000066400000000000000000000162751363734646600303030ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/preamble.h" #include "opencl/test/unit_test/fixtures/media_kernel_fixture.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "test.h" using namespace NEO; typedef MediaKernelFixture MediaKernelTest; GEN9TEST_F(MediaKernelTest, givenGen9CsrWhenEnqueueBlockedVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) { typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; UserEvent userEvent(context); cl_event blockedEvent = &userEvent; auto retVal = pCmdQ->enqueueKernel( pVmeKernel, workDim, globalWorkOffset, globalWorkSize, nullptr, 1, &blockedEvent, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); userEvent.setStatus(CL_COMPLETE); parseCommands(*pCmdQ); ASSERT_NE(cmdPipelineSelect, nullptr); auto *pCmd = genCmdCast(cmdPipelineSelect); auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); EXPECT_FALSE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN9TEST_F(MediaKernelTest, givenGen9CsrWhenEnqueueBlockedNonVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) { typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; UserEvent userEvent(context); cl_event blockedEvent = &userEvent; auto retVal = pCmdQ->enqueueKernel( pKernel, workDim, globalWorkOffset, globalWorkSize, nullptr, 1, &blockedEvent, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); userEvent.setStatus(CL_COMPLETE); parseCommands(*pCmdQ); ASSERT_NE(cmdPipelineSelect, nullptr); auto *pCmd = genCmdCast(cmdPipelineSelect); auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); EXPECT_TRUE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN9TEST_F(MediaKernelTest, givenGen9CsrWhenEnqueueVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) { typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueVmeKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); auto pCmd = getCommand(); auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); EXPECT_FALSE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN9TEST_F(MediaKernelTest, givenGen9CsrWhenEnqueueNonVmeKernelFirstTimeThenProgramPipelineSelectionAndMediaSampler) { typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueRegularKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); auto pCmd = getCommand(); auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; auto expectedPipelineSelection = PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_EQ(expectedPipelineSelection, pCmd->getPipelineSelection()); EXPECT_TRUE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN9TEST_F(MediaKernelTest, givenGen9CsrWhenEnqueueVmeKernelTwiceThenProgramPipelineSelectOnce) { typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueVmeKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); } GEN9TEST_F(MediaKernelTest, givenGen9CsrWhenEnqueueNonVmeKernelTwiceThenProgramPipelineSelectOnce) { typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueVmeKernel(); auto numCommands = getCommandsList().size(); EXPECT_EQ(1u, numCommands); } GEN9TEST_F(MediaKernelTest, givenGen9CsrWhenEnqueueVmeKernelAfterNonVmeKernelThenProgramPipelineSelectionAndMediaSamplerTwice) { typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueRegularKernel(); enqueueVmeKernel(); auto commands = getCommandsList(); EXPECT_EQ(2u, commands.size()); auto pCmd = static_cast(commands.back()); auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_FALSE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN9TEST_F(MediaKernelTest, givenGen9CsrWhenEnqueueNonVmeKernelAfterVmeKernelThenProgramProgramPipelineSelectionAndMediaSamplerTwice) { typedef typename SKLFamily::PIPELINE_SELECT PIPELINE_SELECT; enqueueVmeKernel(); enqueueRegularKernel(); auto commands = getCommandsList(); EXPECT_EQ(2u, commands.size()); auto pCmd = static_cast(commands.back()); auto expectedMask = pipelineSelectEnablePipelineSelectMaskBits | pipelineSelectMediaSamplerDopClockGateMaskBits; EXPECT_EQ(expectedMask, pCmd->getMaskBits()); EXPECT_TRUE(pCmd->getMediaSamplerDopClockGateEnable()); } GEN9TEST_F(MediaKernelTest, givenGen9CsrWhenEnqueueVmeKernelThenVmeSubslicesConfigDoesntChangeToFalse) { auto csr = static_cast *>(&pCmdQ->getGpgpuCommandStreamReceiver()); csr->lastVmeSubslicesConfig = true; enqueueVmeKernel(); EXPECT_TRUE(csr->lastVmeSubslicesConfig); } GEN9TEST_F(MediaKernelTest, givenGen9CsrWhenEnqueueVmeKernelThenVmeSubslicesConfigDoesntChangeToTrue) { auto csr = static_cast *>(&pCmdQ->getGpgpuCommandStreamReceiver()); csr->lastVmeSubslicesConfig = false; enqueueVmeKernel(); EXPECT_FALSE(csr->lastVmeSubslicesConfig); } GEN9TEST_F(MediaKernelTest, GivenGen9WhenGettingCmdSizeForMediaSamplerThenZeroIsReturned) { auto csr = static_cast *>(&pCmdQ->getGpgpuCommandStreamReceiver()); csr->lastVmeSubslicesConfig = false; EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(false)); EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(true)); csr->lastVmeSubslicesConfig = true; EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(false)); EXPECT_EQ(0u, csr->getCmdSizeForMediaSampler(true)); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/glk/000077500000000000000000000000001363734646600232515ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen9/glk/CMakeLists.txt000066400000000000000000000007571363734646600260220ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GLK) set(IGDRCL_SRCS_tests_gen9_glk ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_glk.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_glk.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_glk}) add_subdirectories() neo_copy_test_files(copy_test_files_glk glk) add_dependencies(unit_tests copy_test_files_glk) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen9/glk/linux/000077500000000000000000000000001363734646600244105ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen9/glk/linux/CMakeLists.txt000066400000000000000000000005251363734646600271520ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen9_glk_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_glk.cpp ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_glk_linux}) add_subdirectory(dll) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen9/glk/linux/dll/000077500000000000000000000000001363734646600251635ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen9/glk/linux/dll/CMakeLists.txt000066400000000000000000000004671363734646600277320ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests_gen9_glk ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_id_tests_glk.cpp ) target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_gen9_glk}) compute-runtime-20.13.16352/opencl/test/unit_test/gen9/glk/linux/dll/device_id_tests_glk.cpp000066400000000000000000000022071363734646600316620ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_neo.h" #include "test.h" #include using namespace NEO; TEST(GlkDeviceIdTest, supportedDeviceId) { std::array expectedDescriptors = {{ {IGLK_GT2_ULT_18EU_DEVICE_F0_ID, &GLK_1x3x6::hwInfo, &GLK_1x3x6::setupHardwareInfo, GTTYPE_GTA}, }}; auto compareStructs = [](const DeviceDescriptor *first, const DeviceDescriptor *second) { return first->deviceId == second->deviceId && first->pHwInfo == second->pHwInfo && first->setupHardwareInfo == second->setupHardwareInfo && first->eGtType == second->eGtType; }; size_t startIndex = 0; while (!compareStructs(&expectedDescriptors[0], &deviceDescriptorTable[startIndex]) && deviceDescriptorTable[startIndex].deviceId != 0) { startIndex++; }; EXPECT_NE(0u, deviceDescriptorTable[startIndex].deviceId); for (auto &expected : expectedDescriptors) { EXPECT_TRUE(compareStructs(&expected, &deviceDescriptorTable[startIndex])); startIndex++; } } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/glk/linux/hw_info_config_tests_glk.cpp000066400000000000000000000242061363734646600321550ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" using namespace NEO; using namespace std; struct HwInfoConfigTestLinuxGlk : HwInfoConfigTestLinux { void SetUp() override { HwInfoConfigTestLinux::SetUp(); drm->StoredDeviceID = IGLK_GT2_ULT_12EU_DEVICE_F0_ID; drm->setGtType(GTTYPE_GTA); drm->StoredEUVal = 18; drm->StoredHasPooledEU = 1; drm->StoredMinEUinPool = 3; } }; GLKTEST_F(HwInfoConfigTestLinuxGlk, configureHwInfo) { auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->StoredDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->StoredDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->StoredEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->StoredSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); EXPECT_EQ(GTTYPE_GTA, outHwInfo.platform.eGTType); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1_5); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT2); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT3); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT4); EXPECT_EQ(1u, outHwInfo.featureTable.ftrGTA); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTC); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTX); //constant sysInfo/ftr flags EXPECT_EQ(1u, outHwInfo.gtSystemInfo.VEBoxInfo.Instances.Bits.VEBox0Enabled); EXPECT_TRUE(outHwInfo.gtSystemInfo.VEBoxInfo.IsValid); drm->StoredDeviceID = IGLK_GT2_ULT_18EU_DEVICE_F0_ID; drm->setGtType(GTTYPE_GTC); //IGLK_GT2_ULT_18EU_DEVICE_F0_ID is GTA, but fot test sake make it GTC drm->StoredMinEUinPool = 6; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->StoredDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->StoredDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->StoredEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->StoredSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ((unsigned int)drm->StoredHasPooledEU, outHwInfo.featureTable.ftrPooledEuEnabled); EXPECT_EQ((uint32_t)drm->StoredMinEUinPool, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ((outHwInfo.gtSystemInfo.EUCount - outHwInfo.gtSystemInfo.EuCountPerPoolMin), outHwInfo.gtSystemInfo.EuCountPerPoolMax); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); EXPECT_EQ(GTTYPE_GTC, outHwInfo.platform.eGTType); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1_5); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT2); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT3); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT4); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTA); EXPECT_EQ(1u, outHwInfo.featureTable.ftrGTC); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTX); drm->StoredDeviceID = IGLK_GT2_ULT_12EU_DEVICE_F0_ID; drm->setGtType(GTTYPE_GTX); //IGLK_GT2_ULT_18EU_DEVICE_F0_ID is GTA, but fot test sake make it GTX drm->StoredMinEUinPool = 9; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->StoredDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->StoredDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->StoredEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->StoredSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ((unsigned int)drm->StoredHasPooledEU, outHwInfo.featureTable.ftrPooledEuEnabled); EXPECT_EQ((uint32_t)drm->StoredMinEUinPool, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ((outHwInfo.gtSystemInfo.EUCount - outHwInfo.gtSystemInfo.EuCountPerPoolMin), outHwInfo.gtSystemInfo.EuCountPerPoolMax); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); EXPECT_EQ(GTTYPE_GTX, outHwInfo.platform.eGTType); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1_5); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT2); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT3); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT4); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTA); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTC); EXPECT_EQ(1u, outHwInfo.featureTable.ftrGTX); auto &outKmdNotifyProperties = outHwInfo.capabilityTable.kmdNotifyProperties; EXPECT_TRUE(outKmdNotifyProperties.enableKmdNotify); EXPECT_EQ(50000, outKmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_TRUE(outKmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(5000, outKmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_TRUE(outKmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(200000, outKmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); } GLKTEST_F(HwInfoConfigTestLinuxGlk, negative) { auto hwInfoConfig = HwInfoConfig::get(productFamily); drm->StoredRetValForDeviceID = -1; int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->StoredRetValForDeviceID = 0; drm->StoredRetValForDeviceRevID = -1; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->StoredRetValForDeviceRevID = 0; drm->StoredRetValForEUVal = -1; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); drm->StoredRetValForEUVal = 0; drm->StoredRetValForSSVal = -1; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } GLKTEST_F(HwInfoConfigTestLinuxGlk, configureHwInfoFailingEnabledPool) { drm->StoredRetValForPooledEU = -1; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(0u, outHwInfo.featureTable.ftrPooledEuEnabled); EXPECT_EQ(0u, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ(0u, outHwInfo.gtSystemInfo.EuCountPerPoolMax); } GLKTEST_F(HwInfoConfigTestLinuxGlk, configureHwInfoDisabledEnabledPool) { drm->StoredHasPooledEU = 0; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(0u, outHwInfo.featureTable.ftrPooledEuEnabled); EXPECT_EQ(0u, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ(0u, outHwInfo.gtSystemInfo.EuCountPerPoolMax); } GLKTEST_F(HwInfoConfigTestLinuxGlk, configureHwInfoFailingMinEuInPool) { drm->StoredRetValForMinEUinPool = -1; drm->StoredSSVal = 3; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(1u, outHwInfo.featureTable.ftrPooledEuEnabled); EXPECT_EQ(9u, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ((outHwInfo.gtSystemInfo.EUCount - outHwInfo.gtSystemInfo.EuCountPerPoolMin), outHwInfo.gtSystemInfo.EuCountPerPoolMax); drm->StoredSSVal = 2; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(1u, outHwInfo.featureTable.ftrPooledEuEnabled); EXPECT_EQ(3u, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ((outHwInfo.gtSystemInfo.EUCount - outHwInfo.gtSystemInfo.EuCountPerPoolMin), outHwInfo.gtSystemInfo.EuCountPerPoolMax); } GLKTEST_F(HwInfoConfigTestLinuxGlk, configureHwInfoInvalidMinEuInPool) { drm->StoredMinEUinPool = 4; drm->StoredSSVal = 3; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(1u, outHwInfo.featureTable.ftrPooledEuEnabled); EXPECT_EQ(9u, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ((outHwInfo.gtSystemInfo.EUCount - outHwInfo.gtSystemInfo.EuCountPerPoolMin), outHwInfo.gtSystemInfo.EuCountPerPoolMax); drm->StoredSSVal = 2; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(1u, outHwInfo.featureTable.ftrPooledEuEnabled); EXPECT_EQ(3u, outHwInfo.gtSystemInfo.EuCountPerPoolMin); EXPECT_EQ((outHwInfo.gtSystemInfo.EUCount - outHwInfo.gtSystemInfo.EuCountPerPoolMin), outHwInfo.gtSystemInfo.EuCountPerPoolMax); } GLKTEST_F(HwInfoConfigTestLinuxGlk, configureHwInfoWaFlags) { auto hwInfoConfig = HwInfoConfig::get(productFamily); drm->StoredDeviceRevID = 0; int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); } GLKTEST_F(HwInfoConfigTestLinuxGlk, whenCallAdjustPlatformThenDoNothing) { auto hwInfoConfig = HwInfoConfig::get(productFamily); outHwInfo = pInHwInfo; hwInfoConfig->adjustPlatformForProductFamily(&outHwInfo); int ret = memcmp(&outHwInfo.platform, &pInHwInfo.platform, sizeof(PLATFORM)); EXPECT_EQ(0, ret); } template class GlkHwInfoTests : public ::testing::Test { }; typedef ::testing::Types glkTestTypes; TYPED_TEST_CASE(GlkHwInfoTests, glkTestTypes); TYPED_TEST(GlkHwInfoTests, gtSetupIsCorrect) { HardwareInfo hwInfo; DrmMock drm; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; DeviceDescriptor device = {0, &hwInfo, &TypeParam::setupHardwareInfo, GTTYPE_GT1}; int ret = drm.setupHardwareInfo(&device, false); EXPECT_EQ(ret, 0); EXPECT_GT(gtSystemInfo.EUCount, 0u); EXPECT_GT(gtSystemInfo.ThreadCount, 0u); EXPECT_GT(gtSystemInfo.SliceCount, 0u); EXPECT_GT(gtSystemInfo.SubSliceCount, 0u); EXPECT_GT_VAL(gtSystemInfo.L3CacheSizeInKb, 0u); EXPECT_EQ(gtSystemInfo.CsrSizeInMb, 8u); EXPECT_FALSE(gtSystemInfo.IsDynamicallyPopulated); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/glk/test_device_caps_glk.cpp000066400000000000000000000040201363734646600301120ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; typedef Test Gen9DeviceCaps; GLKTEST_F(Gen9DeviceCaps, GlkProfilingTimerResolution) { const auto &caps = pDevice->getDeviceInfo(); EXPECT_EQ(52u, caps.outProfilingTimerResolution); } GLKTEST_F(Gen9DeviceCaps, givenGlkDeviceWhenAskedForDoubleSupportThenTrueIsReturned) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsFP64); } GLKTEST_F(Gen9DeviceCaps, GlkClVersionSupport) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_STREQ("OpenCL 1.2 NEO ", caps.clVersion); EXPECT_STREQ("OpenCL C 1.2 ", caps.clCVersion); } GLKTEST_F(Gen9DeviceCaps, GlkIs32BitOsAllocatorAvailable) { const auto &caps = pDevice->getDeviceInfo(); auto memoryManager = pDevice->getMemoryManager(); if (is64bit) { EXPECT_TRUE(memoryManager->peekForce32BitAllocations()); EXPECT_TRUE(caps.force32BitAddressess); } else { EXPECT_FALSE(memoryManager->peekForce32BitAllocations()); EXPECT_FALSE(caps.force32BitAddressess); } } typedef Test GlkUsDeviceIdTest; GLKTEST_F(GlkUsDeviceIdTest, isSimulationCap) { unsigned short glkSimulationIds[3] = { IGLK_GT2_ULT_18EU_DEVICE_F0_ID, IGLK_GT2_ULT_12EU_DEVICE_F0_ID, 0, // default, non-simulation }; NEO::MockDevice *mockDevice = nullptr; for (auto id : glkSimulationIds) { mockDevice = createWithUsDeviceId(id); ASSERT_NE(mockDevice, nullptr); EXPECT_FALSE(mockDevice->isSimulation()); delete mockDevice; } } GLKTEST_F(GlkUsDeviceIdTest, GivenGLKWhenCheckftr64KBpagesThenFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftr64KBpages); } GLKTEST_F(GlkUsDeviceIdTest, givenGlkWhenCheckFtrSupportsInteger64BitAtomicsThenReturnFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsInteger64BitAtomics); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/glk/test_hw_info_config_glk.cpp000066400000000000000000000065251363734646600306370ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test.h" using namespace NEO; TEST(GlkHwInfoConfig, givenHwInfoErrorneousConfigString) { if (IGFX_GEMINILAKE != productFamily) { return; } HardwareInfo hwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; gtSystemInfo = {0}; uint64_t config = 0xdeadbeef; gtSystemInfo = {0}; EXPECT_ANY_THROW(hardwareInfoSetup[productFamily](&hwInfo, false, config)); EXPECT_EQ(0u, gtSystemInfo.SliceCount); EXPECT_EQ(0u, gtSystemInfo.SubSliceCount); EXPECT_EQ(0u, gtSystemInfo.EUCount); } using GlkHwInfo = ::testing::Test; GLKTEST_F(GlkHwInfo, givenBoolWhenCallGlkHardwareInfoSetupThenFeatureTableAndWorkaroundTableAreSetCorrect) { uint64_t configs[] = { 0x100020006, 0x100030006}; bool boolValue[]{ true, false}; HardwareInfo hwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; FeatureTable &featureTable = hwInfo.featureTable; WorkaroundTable &workaroundTable = hwInfo.workaroundTable; for (auto &config : configs) { for (auto setParamBool : boolValue) { gtSystemInfo = {0}; featureTable = {}; workaroundTable = {}; hardwareInfoSetup[productFamily](&hwInfo, setParamBool, config); EXPECT_EQ(setParamBool, featureTable.ftrGpGpuMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.ftrGpGpuThreadGroupLevelPreempt); EXPECT_EQ(setParamBool, featureTable.ftrL3IACoherency); EXPECT_EQ(setParamBool, featureTable.ftrGpGpuMidThreadLevelPreempt); EXPECT_EQ(setParamBool, featureTable.ftr3dMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.ftr3dObjectLevelPreempt); EXPECT_EQ(setParamBool, featureTable.ftrPerCtxtPreemptionGranularityControl); EXPECT_EQ(setParamBool, featureTable.ftrLCIA); EXPECT_EQ(setParamBool, featureTable.ftrPPGTT); EXPECT_EQ(setParamBool, featureTable.ftrIA32eGfxPTEs); EXPECT_EQ(setParamBool, featureTable.ftrTranslationTable); EXPECT_EQ(setParamBool, featureTable.ftrUserModeTranslationTable); EXPECT_EQ(setParamBool, featureTable.ftrEnableGuC); EXPECT_EQ(setParamBool, featureTable.ftrTileMappedResource); EXPECT_EQ(setParamBool, featureTable.ftrULT); EXPECT_EQ(setParamBool, featureTable.ftrAstcHdr2D); EXPECT_EQ(setParamBool, featureTable.ftrAstcLdr2D); EXPECT_EQ(setParamBool, featureTable.ftrTileY); EXPECT_EQ(setParamBool, workaroundTable.waLLCCachingUnsupported); EXPECT_EQ(setParamBool, workaroundTable.waMsaa8xTileYDepthPitchAlignment); EXPECT_EQ(setParamBool, workaroundTable.waFbcLinearSurfaceStride); EXPECT_EQ(setParamBool, workaroundTable.wa4kAlignUVOffsetNV12LinearSurface); EXPECT_EQ(setParamBool, workaroundTable.waEnablePreemptionGranularityControlByUMD); EXPECT_EQ(setParamBool, workaroundTable.waSendMIFLUSHBeforeVFE); EXPECT_EQ(setParamBool, workaroundTable.waForcePcBbFullCfgRestore); EXPECT_EQ(setParamBool, workaroundTable.waReportPerfCountUseGlobalContextID); EXPECT_EQ(setParamBool, workaroundTable.waSamplerCacheFlushBetweenRedescribedSurfaceReads); } } } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/glk/windows/000077500000000000000000000000001363734646600247435ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen9/glk/windows/CMakeLists.txt000066400000000000000000000006021363734646600275010ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen9_glk_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_glk_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_glk.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_glk_windows}) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen9/glk/windows/hw_info_config_tests_glk.cpp000066400000000000000000000013161363734646600325050ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/windows/os_interface.h" #include "opencl/test/unit_test/os_interface/windows/hw_info_config_win_tests.h" using namespace NEO; using namespace std; using HwInfoConfigTestWindowsGlk = HwInfoConfigTestWindows; GLKTEST_F(HwInfoConfigTestWindowsGlk, whenCallAdjustPlatformThenDoNothing) { EXPECT_EQ(IGFX_GEMINILAKE, productFamily); auto hwInfoConfig = HwInfoConfig::get(productFamily); outHwInfo = pInHwInfo; hwInfoConfig->adjustPlatformForProductFamily(&outHwInfo); int ret = memcmp(&outHwInfo.platform, &pInHwInfo.platform, sizeof(PLATFORM)); EXPECT_EQ(0, ret); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/glk/windows/test_device_caps_glk_windows.cpp000066400000000000000000000017701363734646600333670ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; typedef Test GlkDeviceCapsWindows; GLKTEST_F(GlkDeviceCapsWindows, givenHwInfoWhenAskedForKmdNotifyMechanismThenReturnCorrectValues) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableKmdNotify); EXPECT_EQ(30000, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/hw_helper_tests_gen9.cpp000066400000000000000000000046251363734646600273300ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/helpers/get_gpgpu_engines_tests.inl" #include "opencl/test/unit_test/helpers/hw_helper_tests.h" using HwHelperTestGen9 = HwHelperTest; GEN9TEST_F(HwHelperTestGen9, getMaxBarriersPerSliceReturnsCorrectSize) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_EQ(32u, helper.getMaxBarrierRegisterPerSlice()); } GEN9TEST_F(HwHelperTestGen9, setCapabilityCoherencyFlag) { auto &helper = HwHelper::get(renderCoreFamily); bool coherency = false; helper.setCapabilityCoherencyFlag(&hardwareInfo, coherency); EXPECT_TRUE(coherency); } GEN9TEST_F(HwHelperTestGen9, getPitchAlignmentForImage) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_EQ(4u, helper.getPitchAlignmentForImage(&hardwareInfo)); } GEN9TEST_F(HwHelperTestGen9, adjustDefaultEngineType) { auto engineType = hardwareInfo.capabilityTable.defaultEngineType; auto &helper = HwHelper::get(renderCoreFamily); helper.adjustDefaultEngineType(&hardwareInfo); EXPECT_EQ(engineType, hardwareInfo.capabilityTable.defaultEngineType); } GEN9TEST_F(HwHelperTestGen9, givenGen9PlatformWhenSetupHardwareCapabilitiesIsCalledThenDefaultImplementationIsUsed) { auto &helper = HwHelper::get(renderCoreFamily); // Test default method implementation testDefaultImplementationOfSetupHardwareCapabilities(helper, hardwareInfo); } GEN9TEST_F(HwHelperTestGen9, givenDebuggingActiveWhenSipKernelTypeIsQueriedThenDbgCsrLocalTypeIsReturned) { auto &helper = HwHelper::get(renderCoreFamily); auto sipType = helper.getSipKernelType(true); EXPECT_EQ(SipKernelType::DbgCsrLocal, sipType); } GEN9TEST_F(HwHelperTestGen9, whenGetGpgpuEnginesThenReturnThreeRcsEngines) { whenGetGpgpuEnginesThenReturnTwoRcsEngines(pDevice->getHardwareInfo()); EXPECT_EQ(3u, pDevice->engines.size()); } using MemorySynchronizatiopCommandsTestsGen9 = ::testing::Test; GEN9TEST_F(MemorySynchronizatiopCommandsTestsGen9, WhenProgrammingCacheFlushThenExpectConstantCacheFieldSet) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; std::unique_ptr buffer(new uint8_t[128]); LinearStream stream(buffer.get(), 128); PIPE_CONTROL *pipeControl = MemorySynchronizationCommands::addFullCacheFlush(stream); EXPECT_TRUE(pipeControl->getConstantCacheInvalidationEnable()); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/image_tests_gen9.cpp000066400000000000000000000031011363734646600264210ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" using namespace NEO; typedef ::testing::Test gen9ImageTests; GEN9TEST_F(gen9ImageTests, appendSurfaceStateParamsDoesNothing) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; MockContext context; auto image = std::unique_ptr(ImageHelper::create(&context)); auto surfaceStateBefore = FamilyType::cmdInitRenderSurfaceState; auto surfaceStateAfter = FamilyType::cmdInitRenderSurfaceState; auto imageHw = static_cast *>(image.get()); EXPECT_EQ(0, memcmp(&surfaceStateBefore, &surfaceStateAfter, sizeof(RENDER_SURFACE_STATE))); imageHw->appendSurfaceStateParams(&surfaceStateAfter); EXPECT_EQ(0, memcmp(&surfaceStateBefore, &surfaceStateAfter, sizeof(RENDER_SURFACE_STATE))); } using Gen9RenderSurfaceStateDataTests = ::testing::Test; GEN9TEST_F(Gen9RenderSurfaceStateDataTests, WhenMemoryObjectControlStateIndexToMocsTablesIsSetThenValueIsShift) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; auto surfaceState = FamilyType::cmdInitRenderSurfaceState; uint32_t value = 4; surfaceState.setMemoryObjectControlStateIndexToMocsTables(value); EXPECT_EQ(surfaceState.TheStructure.Common.MemoryObjectControlState_IndexToMocsTables, value >> 1); EXPECT_EQ(surfaceState.getMemoryObjectControlStateIndexToMocsTables(), value); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/kbl/000077500000000000000000000000001363734646600232445ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen9/kbl/CMakeLists.txt000066400000000000000000000007571363734646600260150ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_KBL) set(IGDRCL_SRCS_tests_gen9_kbl ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_kbl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_kbl.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_kbl}) add_subdirectories() neo_copy_test_files(copy_test_files_kbl kbl) add_dependencies(unit_tests copy_test_files_kbl) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen9/kbl/linux/000077500000000000000000000000001363734646600244035ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen9/kbl/linux/CMakeLists.txt000066400000000000000000000005251363734646600271450ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen9_kbl_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_kbl.cpp ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_kbl_linux}) add_subdirectory(dll) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen9/kbl/linux/dll/000077500000000000000000000000001363734646600251565ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen9/kbl/linux/dll/CMakeLists.txt000066400000000000000000000004671363734646600277250ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests_gen9_kbl ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_id_tests_kbl.cpp ) target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_gen9_kbl}) compute-runtime-20.13.16352/opencl/test/unit_test/gen9/kbl/linux/dll/device_id_tests_kbl.cpp000066400000000000000000000071471363734646600316600ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_neo.h" #include "test.h" #include using namespace NEO; TEST(KblDeviceIdTest, supportedDeviceId) { std::array expectedDescriptors = {{ {IKBL_GT1_DT_DEVICE_F0_ID, &KBL_1x2x6::hwInfo, &KBL_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {IKBL_GT1_HALO_DEVICE_F0_ID, &KBL_1x2x6::hwInfo, &KBL_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {IKBL_GT1_SERV_DEVICE_F0_ID, &KBL_1x2x6::hwInfo, &KBL_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {IKBL_GT1_ULT_DEVICE_F0_ID, &KBL_1x2x6::hwInfo, &KBL_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {IKBL_GT1_ULX_DEVICE_F0_ID, &KBL_1x3x6::hwInfo, &KBL_1x3x6::setupHardwareInfo, GTTYPE_GT1}, {IKBL_GT1F_HALO_DEVICE_F0_ID, &KBL_1x2x6::hwInfo, &KBL_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {IKBL_GT1_5_ULT_DEVICE_F0_ID, &KBL_1x3x6::hwInfo, &KBL_1x3x6::setupHardwareInfo, GTTYPE_GT1_5}, {IKBL_GT1_5_ULX_DEVICE_F0_ID, &KBL_1x2x6::hwInfo, &KBL_1x2x6::setupHardwareInfo, GTTYPE_GT1_5}, {IKBL_GT2_DT_DEVICE_F0_ID, &KBL_1x3x8::hwInfo, &KBL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {IKBL_GT2_HALO_DEVICE_F0_ID, &KBL_1x3x8::hwInfo, &KBL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {IKBL_GT2_R_ULT_DEVICE_F0_ID, &KBL_1x3x8::hwInfo, &KBL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {IKBL_GT2_SERV_DEVICE_F0_ID, &KBL_1x3x8::hwInfo, &KBL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {IKBL_GT2_ULT_DEVICE_F0_ID, &KBL_1x3x8::hwInfo, &KBL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {IKBL_GT2_ULX_DEVICE_F0_ID, &KBL_1x3x8::hwInfo, &KBL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {IKBL_GT2_WRK_DEVICE_F0_ID, &KBL_1x3x8::hwInfo, &KBL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {IKBL_GT2_R_ULX_DEVICE_F0_ID, &KBL_1x3x8::hwInfo, &KBL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {IKBL_GT2F_ULT_DEVICE_F0_ID, &KBL_1x3x8::hwInfo, &KBL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {IKBL_GT3_15W_ULT_DEVICE_F0_ID, &KBL_2x3x8::hwInfo, &KBL_2x3x8::setupHardwareInfo, GTTYPE_GT3}, {IKBL_GT3_28W_ULT_DEVICE_F0_ID, &KBL_2x3x8::hwInfo, &KBL_2x3x8::setupHardwareInfo, GTTYPE_GT3}, {IKBL_GT3_HALO_DEVICE_F0_ID, &KBL_2x3x8::hwInfo, &KBL_2x3x8::setupHardwareInfo, GTTYPE_GT3}, {IKBL_GT3_SERV_DEVICE_F0_ID, &KBL_2x3x8::hwInfo, &KBL_2x3x8::setupHardwareInfo, GTTYPE_GT3}, {IKBL_GT3_ULT_DEVICE_F0_ID, &KBL_2x3x8::hwInfo, &KBL_2x3x8::setupHardwareInfo, GTTYPE_GT3}, {IKBL_GT4_DT_DEVICE_F0_ID, &KBL_3x3x8::hwInfo, &KBL_3x3x8::setupHardwareInfo, GTTYPE_GT4}, {IKBL_GT4_HALO_DEVICE_F0_ID, &KBL_3x3x8::hwInfo, &KBL_3x3x8::setupHardwareInfo, GTTYPE_GT4}, {IKBL_GT4_SERV_DEVICE_F0_ID, &KBL_3x3x8::hwInfo, &KBL_3x3x8::setupHardwareInfo, GTTYPE_GT4}, {IKBL_GT4_WRK_DEVICE_F0_ID, &KBL_3x3x8::hwInfo, &KBL_3x3x8::setupHardwareInfo, GTTYPE_GT4}, }}; auto compareStructs = [](const DeviceDescriptor *first, const DeviceDescriptor *second) { return first->deviceId == second->deviceId && first->pHwInfo == second->pHwInfo && first->setupHardwareInfo == second->setupHardwareInfo && first->eGtType == second->eGtType; }; size_t startIndex = 0; while (!compareStructs(&expectedDescriptors[0], &deviceDescriptorTable[startIndex]) && deviceDescriptorTable[startIndex].deviceId != 0) { startIndex++; }; EXPECT_NE(0u, deviceDescriptorTable[startIndex].deviceId); for (auto &expected : expectedDescriptors) { EXPECT_TRUE(compareStructs(&expected, &deviceDescriptorTable[startIndex])); startIndex++; } } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/kbl/linux/hw_info_config_tests_kbl.cpp000066400000000000000000000255251363734646600321500ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" using namespace NEO; using namespace std; struct HwInfoConfigTestLinuxKbl : HwInfoConfigTestLinux { void SetUp() override { HwInfoConfigTestLinux::SetUp(); drm->StoredDeviceID = IKBL_GT2_DT_DEVICE_F0_ID; drm->setGtType(GTTYPE_GT2); } }; KBLTEST_F(HwInfoConfigTestLinuxKbl, configureHwInfo) { auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->StoredDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->StoredDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->StoredEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->StoredSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); EXPECT_EQ(GTTYPE_GT2, outHwInfo.platform.eGTType); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1_5); EXPECT_EQ(1u, outHwInfo.featureTable.ftrGT2); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT3); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT4); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTA); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTC); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTX); //constant sysInfo/ftr flags EXPECT_EQ(1u, outHwInfo.gtSystemInfo.VEBoxInfo.Instances.Bits.VEBox0Enabled); EXPECT_TRUE(outHwInfo.gtSystemInfo.VEBoxInfo.IsValid); drm->StoredDeviceID = IKBL_GT1_ULT_DEVICE_F0_ID; drm->StoredSSVal = 3; drm->setGtType(GTTYPE_GT1); ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->StoredDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->StoredDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->StoredEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->StoredSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(1u, outHwInfo.gtSystemInfo.SliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); EXPECT_EQ(GTTYPE_GT1, outHwInfo.platform.eGTType); EXPECT_EQ(1u, outHwInfo.featureTable.ftrGT1); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1_5); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT2); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT3); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT4); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTA); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTC); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTX); drm->StoredDeviceID = IKBL_GT1_5_ULX_DEVICE_F0_ID; drm->setGtType(GTTYPE_GT1_5); ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->StoredDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->StoredDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->StoredEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->StoredSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); EXPECT_EQ(GTTYPE_GT1_5, outHwInfo.platform.eGTType); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1); EXPECT_EQ(1u, outHwInfo.featureTable.ftrGT1_5); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT2); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT3); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT4); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTA); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTC); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTX); drm->StoredDeviceID = IKBL_GT3_ULT_DEVICE_F0_ID; drm->StoredSSVal = 6; drm->setGtType(GTTYPE_GT3); ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->StoredDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->StoredDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->StoredEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->StoredSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(2u, outHwInfo.gtSystemInfo.SliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); EXPECT_EQ(GTTYPE_GT3, outHwInfo.platform.eGTType); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1_5); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT2); EXPECT_EQ(1u, outHwInfo.featureTable.ftrGT3); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT4); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTA); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTC); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTX); drm->StoredDeviceID = IKBL_GT4_HALO_DEVICE_F0_ID; drm->StoredSSVal = 6; drm->setGtType(GTTYPE_GT4); ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->StoredDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->StoredDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->StoredEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->StoredSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(2u, outHwInfo.gtSystemInfo.SliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); EXPECT_EQ(GTTYPE_GT4, outHwInfo.platform.eGTType); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1_5); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT2); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT3); EXPECT_EQ(1u, outHwInfo.featureTable.ftrGT4); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTA); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTC); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTX); auto &outKmdNotifyProperties = outHwInfo.capabilityTable.kmdNotifyProperties; EXPECT_TRUE(outKmdNotifyProperties.enableKmdNotify); EXPECT_EQ(50000, outKmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_TRUE(outKmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(5000, outKmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_TRUE(outKmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(200000, outKmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); } KBLTEST_F(HwInfoConfigTestLinuxKbl, negativeUnknownDevId) { drm->StoredDeviceID = 0; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } KBLTEST_F(HwInfoConfigTestLinuxKbl, negativeFailedIoctlDevId) { drm->StoredRetValForDeviceID = -2; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-2, ret); } KBLTEST_F(HwInfoConfigTestLinuxKbl, negativeFailedIoctlDevRevId) { drm->StoredRetValForDeviceRevID = -3; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-3, ret); } KBLTEST_F(HwInfoConfigTestLinuxKbl, negativeFailedIoctlEuCount) { drm->StoredRetValForEUVal = -4; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-4, ret); } KBLTEST_F(HwInfoConfigTestLinuxKbl, negativeFailedIoctlSsCount) { drm->StoredRetValForSSVal = -5; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-5, ret); } KBLTEST_F(HwInfoConfigTestLinuxKbl, configureHwInfoWaFlags) { auto hwInfoConfig = HwInfoConfig::get(productFamily); drm->StoredDeviceRevID = 0; int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); drm->StoredDeviceRevID = 7; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(0u, outHwInfo.workaroundTable.waDisableLSQCROPERFforOCL); EXPECT_EQ(0u, outHwInfo.workaroundTable.waEncryptedEdramOnlyPartials); drm->StoredDeviceRevID = 9; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(0u, outHwInfo.workaroundTable.waDisableLSQCROPERFforOCL); EXPECT_EQ(0u, outHwInfo.workaroundTable.waEncryptedEdramOnlyPartials); EXPECT_EQ(0u, outHwInfo.workaroundTable.waForcePcBbFullCfgRestore); } KBLTEST_F(HwInfoConfigTestLinuxKbl, configureHwInfoEdram) { auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL(0u, outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(0u, outHwInfo.featureTable.ftrEDram); drm->StoredDeviceID = IKBL_GT3_28W_ULT_DEVICE_F0_ID; drm->setGtType(GTTYPE_GT3); ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL((64u * 1024u), outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(1u, outHwInfo.featureTable.ftrEDram); drm->StoredDeviceID = IKBL_GT3_15W_ULT_DEVICE_F0_ID; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL((64u * 1024u), outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(1u, outHwInfo.featureTable.ftrEDram); } KBLTEST_F(HwInfoConfigTestLinuxKbl, whenCallAdjustPlatformThenDoNothing) { auto hwInfoConfig = HwInfoConfig::get(productFamily); outHwInfo = pInHwInfo; hwInfoConfig->adjustPlatformForProductFamily(&outHwInfo); int ret = memcmp(&outHwInfo.platform, &pInHwInfo.platform, sizeof(PLATFORM)); EXPECT_EQ(0, ret); } template class KblHwInfoTests : public ::testing::Test { }; typedef ::testing::Types kblTestTypes; TYPED_TEST_CASE(KblHwInfoTests, kblTestTypes); TYPED_TEST(KblHwInfoTests, gtSetupIsCorrect) { HardwareInfo hwInfo; DrmMock drm; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; DeviceDescriptor device = {0, &hwInfo, &TypeParam::setupHardwareInfo, GTTYPE_GT1}; int ret = drm.setupHardwareInfo(&device, false); EXPECT_EQ(ret, 0); EXPECT_GT(gtSystemInfo.EUCount, 0u); EXPECT_GT(gtSystemInfo.ThreadCount, 0u); EXPECT_GT(gtSystemInfo.SliceCount, 0u); EXPECT_GT(gtSystemInfo.SubSliceCount, 0u); EXPECT_GT_VAL(gtSystemInfo.L3CacheSizeInKb, 0u); EXPECT_EQ(gtSystemInfo.CsrSizeInMb, 8u); EXPECT_FALSE(gtSystemInfo.IsDynamicallyPopulated); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/kbl/test_device_caps_kbl.cpp000066400000000000000000000013721363734646600301070ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; typedef Test KblDeviceCaps; KBLTEST_F(KblDeviceCaps, reportsOcl21) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_STREQ("OpenCL 2.1 NEO ", caps.clVersion); EXPECT_STREQ("OpenCL C 2.0 ", caps.clCVersion); } KBLTEST_F(KblDeviceCaps, GivenKBLWhenCheckftr64KBpagesThenTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftr64KBpages); } KBLTEST_F(KblDeviceCaps, givenKblWhenCheckFtrSupportsInteger64BitAtomicsThenReturnTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsInteger64BitAtomics); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/kbl/test_hw_info_config_kbl.cpp000066400000000000000000000101271363734646600306160ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test.h" using namespace NEO; TEST(KblHwInfoConfig, givenHwInfoErrorneousConfigString) { if (IGFX_KABYLAKE != productFamily) { return; } HardwareInfo hwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; uint64_t config = 0xdeadbeef; gtSystemInfo = {0}; EXPECT_ANY_THROW(hardwareInfoSetup[productFamily](&hwInfo, false, config)); EXPECT_EQ(0u, gtSystemInfo.SliceCount); EXPECT_EQ(0u, gtSystemInfo.SubSliceCount); EXPECT_EQ(0u, gtSystemInfo.EUCount); } using KblHwInfo = ::testing::Test; KBLTEST_F(KblHwInfo, givenBoolWhenCallKblHardwareInfoSetupThenFeatureTableAndWorkaroundTableAreSetCorrect) { uint64_t configs[] = { 0x100030008, 0x200030008, 0x300030008, 0x100020006, 0x100030006}; bool boolValue[]{ true, false}; HardwareInfo hwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; FeatureTable &featureTable = hwInfo.featureTable; WorkaroundTable &workaroundTable = hwInfo.workaroundTable; PLATFORM &platform = hwInfo.platform; for (auto &config : configs) { for (auto setParamBool : boolValue) { gtSystemInfo = {0}; featureTable = {}; workaroundTable = {}; platform.usRevId = 9; hardwareInfoSetup[productFamily](&hwInfo, setParamBool, config); EXPECT_EQ(setParamBool, featureTable.ftrGpGpuMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.ftrGpGpuThreadGroupLevelPreempt); EXPECT_EQ(setParamBool, featureTable.ftrL3IACoherency); EXPECT_EQ(setParamBool, featureTable.ftrVEBOX); EXPECT_EQ(setParamBool, featureTable.ftrGpGpuMidThreadLevelPreempt); EXPECT_EQ(setParamBool, featureTable.ftr3dMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.ftr3dObjectLevelPreempt); EXPECT_EQ(setParamBool, featureTable.ftrPerCtxtPreemptionGranularityControl); EXPECT_EQ(setParamBool, featureTable.ftrPPGTT); EXPECT_EQ(setParamBool, featureTable.ftrSVM); EXPECT_EQ(setParamBool, featureTable.ftrIA32eGfxPTEs); EXPECT_EQ(setParamBool, featureTable.ftrDisplayYTiling); EXPECT_EQ(setParamBool, featureTable.ftrTranslationTable); EXPECT_EQ(setParamBool, featureTable.ftrUserModeTranslationTable); EXPECT_EQ(setParamBool, featureTable.ftrEnableGuC); EXPECT_EQ(setParamBool, featureTable.ftrFbc); EXPECT_EQ(setParamBool, featureTable.ftrFbc2AddressTranslation); EXPECT_EQ(setParamBool, featureTable.ftrFbcBlitterTracking); EXPECT_EQ(setParamBool, featureTable.ftrFbcCpuTracking); EXPECT_EQ(setParamBool, featureTable.ftrTileY); EXPECT_EQ(setParamBool, workaroundTable.waEnablePreemptionGranularityControlByUMD); EXPECT_EQ(setParamBool, workaroundTable.waSendMIFLUSHBeforeVFE); EXPECT_EQ(setParamBool, workaroundTable.waReportPerfCountUseGlobalContextID); EXPECT_EQ(setParamBool, workaroundTable.waMsaa8xTileYDepthPitchAlignment); EXPECT_EQ(setParamBool, workaroundTable.waLosslessCompressionSurfaceStride); EXPECT_EQ(setParamBool, workaroundTable.waFbcLinearSurfaceStride); EXPECT_EQ(setParamBool, workaroundTable.wa4kAlignUVOffsetNV12LinearSurface); EXPECT_EQ(setParamBool, workaroundTable.waSamplerCacheFlushBetweenRedescribedSurfaceReads); EXPECT_EQ(false, workaroundTable.waDisableLSQCROPERFforOCL); EXPECT_EQ(false, workaroundTable.waEncryptedEdramOnlyPartials); EXPECT_EQ(false, workaroundTable.waForcePcBbFullCfgRestore); platform.usRevId = 1; workaroundTable = {}; hardwareInfoSetup[productFamily](&hwInfo, true, config); EXPECT_EQ(true, workaroundTable.waDisableLSQCROPERFforOCL); EXPECT_EQ(true, workaroundTable.waEncryptedEdramOnlyPartials); EXPECT_EQ(true, workaroundTable.waForcePcBbFullCfgRestore); } } } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/kbl/windows/000077500000000000000000000000001363734646600247365ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen9/kbl/windows/CMakeLists.txt000066400000000000000000000006021363734646600274740ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen9_kbl_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_kbl_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_kbl.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_kbl_windows}) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen9/kbl/windows/hw_info_config_tests_kbl.cpp000066400000000000000000000012611363734646600324720ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/windows/os_interface.h" #include "opencl/test/unit_test/os_interface/windows/hw_info_config_win_tests.h" using namespace NEO; using namespace std; using HwInfoConfigTestWindowsKbl = HwInfoConfigTestWindows; KBLTEST_F(HwInfoConfigTestWindowsKbl, whenCallAdjustPlatformThenDoNothing) { EXPECT_EQ(IGFX_KABYLAKE, productFamily); auto hwInfoConfig = HwInfoConfig::get(productFamily); hwInfoConfig->adjustPlatformForProductFamily(&outHwInfo); int ret = memcmp(&outHwInfo.platform, &pInHwInfo.platform, sizeof(PLATFORM)); EXPECT_EQ(0, ret); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/kbl/windows/test_device_caps_kbl_windows.cpp000066400000000000000000000017651363734646600333610ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; typedef Test KblDeviceCapsWindows; KBLTEST_F(KblDeviceCapsWindows, givenHwInfoWhenAskedForKmdNotifyMechanismThenReturnCorrectValues) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableKmdNotify); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/kernel_tests_gen9.cpp000066400000000000000000000015371363734646600266320ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" using namespace NEO; using Gen9KernelTest = Test; GEN9TEST_F(Gen9KernelTest, givenKernelWhenCanTransformImagesIsCalledThenReturnsTrue) { MockKernelWithInternals mockKernel(*pClDevice); auto retVal = mockKernel.mockKernel->Kernel::canTransformImages(); EXPECT_TRUE(retVal); } using Gen9HardwareCommandsTest = testing::Test; GEN9TEST_F(Gen9HardwareCommandsTest, givenGen9PlatformWhenDoBindingTablePrefetchIsCalledThenReturnsTrue) { EXPECT_TRUE(HardwareCommandsHelper::doBindingTablePrefetch()); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/sampler_tests_gen9.cpp000066400000000000000000000021211363734646600270030ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sampler/sampler.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" #include using namespace NEO; typedef Test Gen9SamplerTest; GEN9TEST_F(Gen9SamplerTest, appendSamplerStateParamsDoesNothing) { typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; std::unique_ptr context(new MockContext()); std::unique_ptr> sampler(new SamplerHw(context.get(), CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST)); auto stateWithoutAppendedParams = FamilyType::cmdInitSamplerState; auto stateWithAppendedParams = FamilyType::cmdInitSamplerState; EXPECT_TRUE(memcmp(&stateWithoutAppendedParams, &stateWithAppendedParams, sizeof(SAMPLER_STATE)) == 0); sampler->appendSamplerStateParams(&stateWithAppendedParams); EXPECT_TRUE(memcmp(&stateWithoutAppendedParams, &stateWithAppendedParams, sizeof(SAMPLER_STATE)) == 0); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/scheduler_source_tests_gen9.cpp000066400000000000000000000026111363734646600307020ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_cmds.h" #include "opencl/source/device_queue/device_queue_hw.h" // Keep the order of device_enqueue.h and scheduler_definitions.h as the latter uses defines from the first one #include "opencl/source/gen9/device_enqueue.h" #include "opencl/source/gen9/scheduler_definitions.h" #include "opencl/test/unit_test/scheduler/scheduler_source_tests.h" // Keep this include below scheduler_definitions.h and device_enqueue.h headers as it depends on defines defined in them #include "opencl/test/unit_test/scheduler/scheduler_source_tests.inl" using namespace NEO; typedef SchedulerSourceTest SchedulerSourceTestGen9; GEN9TEST_F(SchedulerSourceTestGen9, GivenDeviceQueueWhenCommandsSizeIsCalculatedThenItEqualsSpaceForEachEnqueueInSchedulerKernelCode) { givenDeviceQueueWhenCommandsSizeIsCalculatedThenItEqualsSpaceForEachEnqueueInSchedulerKernelCodeTest(); } GEN9TEST_F(SchedulerSourceTestGen9, GivenDeviceQueueWhenSlbDummyCommandsAreBuildThenSizeUsedIsCorrect) { givenDeviceQueueWhenSlbDummyCommandsAreBuildThenSizeUsedIsCorrectTest(); } GEN9TEST_F(SchedulerSourceTestGen9, GivenDeviceQueueThenNumberOfEnqueuesEqualsNumberOfEnqueuesInSchedulerKernelCode) { givenDeviceQueueThenNumberOfEnqueuesEqualsNumberOfEnqueuesInSchedulerKernelCodeTest(); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/sip_tests_gen9.cpp000066400000000000000000000034211363734646600261370ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/built_ins/sip.h" #include "opencl/test/unit_test/global_environment.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "test.h" #include "gtest/gtest.h" using namespace NEO; namespace SipKernelTests { extern std::string getDebugSipKernelNameWithBitnessAndProductSuffix(std::string &base, const char *product); typedef ::testing::Test gen9SipTests; GEN9TEST_F(gen9SipTests, DISABLED_givenDebugCsrSipKernelWithLocalMemoryWhenAskedForDebugSurfaceBtiAndSizeThenBtiIsZeroAndSizeGreaterThanZero) { auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_NE(nullptr, mockDevice); MockCompilerDebugVars igcDebugVars; std::string name = "sip_dummy_kernel_debug"; std::string builtInFileRoot = testFiles + getDebugSipKernelNameWithBitnessAndProductSuffix(name, binaryNameSuffix.c_str()); std::string builtInGenFile = builtInFileRoot; builtInGenFile.append(".gen"); igcDebugVars.fileName = builtInGenFile; gEnvironment->igcPushDebugVars(igcDebugVars); auto &builtins = *mockDevice->getBuiltIns(); auto &sipKernel = builtins.getSipKernel(SipKernelType::DbgCsrLocal, *mockDevice); EXPECT_NE(nullptr, &sipKernel); EXPECT_EQ(SipKernelType::DbgCsrLocal, sipKernel.getType()); gEnvironment->igcPopDebugVars(); } GEN9TEST_F(gen9SipTests, givenDebuggingActiveWhenSipTypeIsQueriedThenDbgCsrLocalIsReturned) { auto sipType = SipKernel::getSipKernelType(renderCoreFamily, true); EXPECT_EQ(SipKernelType::DbgCsrLocal, sipType); } } // namespace SipKernelTests compute-runtime-20.13.16352/opencl/test/unit_test/gen9/skl/000077500000000000000000000000001363734646600232655ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen9/skl/CMakeLists.txt000066400000000000000000000013571363734646600260330ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_SKL) set(IGDRCL_SRCS_tests_gen9_skl ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_tests_skl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_skl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_skl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sample_skl.cpp ) get_property(NEO_CORE_TESTS_GEN9_SKL GLOBAL PROPERTY NEO_CORE_TESTS_GEN9_SKL) list(APPEND IGDRCL_SRCS_tests_gen9_skl ${NEO_CORE_TESTS_GEN9_SKL}) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_skl}) add_subdirectories() neo_copy_test_files(copy_test_files_skl skl) add_dependencies(unit_tests copy_test_files_skl) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen9/skl/device_tests_skl.cpp000066400000000000000000000010731363734646600273240ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; typedef Test DeviceTest; SKLTEST_F(DeviceTest, getSupportedClVersion21Device) { auto version = pClDevice->getSupportedClVersion(); EXPECT_EQ(21u, version); } SKLTEST_F(DeviceTest, givenSklDeviceWhenAskedForProflingTimerResolutionThen83IsReturned) { auto resolution = pDevice->getProfilingTimerResolution(); EXPECT_DOUBLE_EQ(83.333, resolution); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/skl/linux/000077500000000000000000000000001363734646600244245ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen9/skl/linux/CMakeLists.txt000066400000000000000000000005251363734646600271660ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen9_skl_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_skl.cpp ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_skl_linux}) add_subdirectory(dll) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen9/skl/linux/dll/000077500000000000000000000000001363734646600251775ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen9/skl/linux/dll/CMakeLists.txt000066400000000000000000000004671363734646600277460ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests_gen9_skl ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_id_tests_skl.cpp ) target_sources(igdrcl_linux_dll_tests PRIVATE ${IGDRCL_SRCS_linux_dll_tests_gen9_skl}) compute-runtime-20.13.16352/opencl/test/unit_test/gen9/skl/linux/dll/device_id_tests_skl.cpp000066400000000000000000000100201363734646600317020ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_neo.h" #include "test.h" #include using namespace NEO; TEST(SklDeviceIdTest, supportedDeviceId) { std::array expectedDescriptors = {{ {ISKL_GT1_DESK_DEVICE_F0_ID, &SKL_1x2x6::hwInfo, &SKL_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {ISKL_GT1_DT_DEVICE_F0_ID, &SKL_1x2x6::hwInfo, &SKL_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {ISKL_GT1_HALO_MOBL_DEVICE_F0_ID, &SKL_1x2x6::hwInfo, &SKL_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {ISKL_GT1_SERV_DEVICE_F0_ID, &SKL_1x2x6::hwInfo, &SKL_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {ISKL_GT1_ULT_DEVICE_F0_ID, &SKL_1x2x6::hwInfo, &SKL_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {ISKL_GT1_ULX_DEVICE_F0_ID, &SKL_1x2x6::hwInfo, &SKL_1x2x6::setupHardwareInfo, GTTYPE_GT1}, {ISKL_GT1_5_DT_DEVICE_F0_ID, &SKL_1x3x6::hwInfo, &SKL_1x3x6::setupHardwareInfo, GTTYPE_GT1_5}, {ISKL_GT1_5_ULT_DEVICE_F0_ID, &SKL_1x3x6::hwInfo, &SKL_1x3x6::setupHardwareInfo, GTTYPE_GT1_5}, {ISKL_GT1_5_ULX_DEVICE_F0_ID, &SKL_1x3x6::hwInfo, &SKL_1x3x6::setupHardwareInfo, GTTYPE_GT1_5}, {ISKL_GT2_DESK_DEVICE_F0_ID, &SKL_1x3x8::hwInfo, &SKL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ISKL_GT2_DT_DEVICE_F0_ID, &SKL_1x3x8::hwInfo, &SKL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ISKL_GT2_HALO_MOBL_DEVICE_F0_ID, &SKL_1x3x8::hwInfo, &SKL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ISKL_GT2_SERV_DEVICE_F0_ID, &SKL_1x3x8::hwInfo, &SKL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ISKL_GT2_ULT_DEVICE_F0_ID, &SKL_1x3x8::hwInfo, &SKL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ISKL_GT2_ULX_DEVICE_F0_ID, &SKL_1x3x8::hwInfo, &SKL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ISKL_GT2_WRK_DEVICE_F0_ID, &SKL_1x3x8::hwInfo, &SKL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ISKL_GT2F_ULT_DEVICE_F0_ID, &SKL_1x3x8::hwInfo, &SKL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ISKL_LP_DEVICE_F0_ID, &SKL_1x3x8::hwInfo, &SKL_1x3x8::setupHardwareInfo, GTTYPE_GT2}, {ISKL_GT3_DESK_DEVICE_F0_ID, &SKL_2x3x8::hwInfo, &SKL_2x3x8::setupHardwareInfo, GTTYPE_GT3}, {ISKL_GT3_HALO_MOBL_DEVICE_F0_ID, &SKL_2x3x8::hwInfo, &SKL_2x3x8::setupHardwareInfo, GTTYPE_GT3}, {ISKL_GT3_MEDIA_SERV_DEVICE_F0_ID, &SKL_2x3x8::hwInfo, &SKL_2x3x8::setupHardwareInfo, GTTYPE_GT3}, {ISKL_GT3_SERV_DEVICE_F0_ID, &SKL_2x3x8::hwInfo, &SKL_2x3x8::setupHardwareInfo, GTTYPE_GT3}, {ISKL_GT3_ULT_DEVICE_F0_ID, &SKL_2x3x8::hwInfo, &SKL_2x3x8::setupHardwareInfo, GTTYPE_GT3}, {ISKL_GT3e_ULT_DEVICE_F0_ID_540, &SKL_2x3x8::hwInfo, &SKL_2x3x8::setupHardwareInfo, GTTYPE_GT3}, {ISKL_GT3e_ULT_DEVICE_F0_ID_550, &SKL_2x3x8::hwInfo, &SKL_2x3x8::setupHardwareInfo, GTTYPE_GT3}, {ISKL_GT4_DESK_DEVICE_F0_ID, &SKL_3x3x8::hwInfo, &SKL_3x3x8::setupHardwareInfo, GTTYPE_GT4}, {ISKL_GT4_DT_DEVICE_F0_ID, &SKL_3x3x8::hwInfo, &SKL_3x3x8::setupHardwareInfo, GTTYPE_GT4}, {ISKL_GT4_HALO_MOBL_DEVICE_F0_ID, &SKL_3x3x8::hwInfo, &SKL_3x3x8::setupHardwareInfo, GTTYPE_GT4}, {ISKL_GT4_SERV_DEVICE_F0_ID, &SKL_3x3x8::hwInfo, &SKL_3x3x8::setupHardwareInfo, GTTYPE_GT4}, {ISKL_GT4_WRK_DEVICE_F0_ID, &SKL_3x3x8::hwInfo, &SKL_3x3x8::setupHardwareInfo, GTTYPE_GT4}, }}; auto compareStructs = [](const DeviceDescriptor *first, const DeviceDescriptor *second) { return first->deviceId == second->deviceId && first->pHwInfo == second->pHwInfo && first->setupHardwareInfo == second->setupHardwareInfo && first->eGtType == second->eGtType; }; size_t startIndex = 0; while (!compareStructs(&expectedDescriptors[0], &deviceDescriptorTable[startIndex]) && deviceDescriptorTable[startIndex].deviceId != 0) { startIndex++; }; EXPECT_NE(0u, deviceDescriptorTable[startIndex].deviceId); for (auto &expected : expectedDescriptors) { EXPECT_TRUE(compareStructs(&expected, &deviceDescriptorTable[startIndex])); startIndex++; } } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/skl/linux/hw_info_config_tests_skl.cpp000066400000000000000000000302671363734646600322110ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" using namespace NEO; using namespace std; struct HwInfoConfigTestLinuxSkl : HwInfoConfigTestLinux { void SetUp() override { HwInfoConfigTestLinux::SetUp(); drm->StoredDeviceID = ISKL_GT2_DESK_DEVICE_F0_ID; drm->setGtType(GTTYPE_GT2); } }; SKLTEST_F(HwInfoConfigTestLinuxSkl, configureHwInfo) { auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->StoredDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->StoredDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->StoredEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->StoredSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); EXPECT_EQ(GTTYPE_GT2, outHwInfo.platform.eGTType); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1_5); EXPECT_EQ(1u, outHwInfo.featureTable.ftrGT2); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT3); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT4); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTA); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTC); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTX); //constant sysInfo/ftr flags EXPECT_EQ(1u, outHwInfo.gtSystemInfo.VEBoxInfo.Instances.Bits.VEBox0Enabled); EXPECT_EQ(1u, outHwInfo.gtSystemInfo.VDBoxInfo.Instances.Bits.VDBox0Enabled); EXPECT_TRUE(outHwInfo.gtSystemInfo.VEBoxInfo.IsValid); EXPECT_TRUE(outHwInfo.gtSystemInfo.VDBoxInfo.IsValid); drm->StoredDeviceID = ISKL_GT1_DT_DEVICE_F0_ID; drm->setGtType(GTTYPE_GT1); drm->StoredSSVal = 3; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->StoredDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->StoredDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->StoredEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->StoredSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(1u, outHwInfo.gtSystemInfo.SliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); EXPECT_EQ(GTTYPE_GT1, outHwInfo.platform.eGTType); EXPECT_EQ(1u, outHwInfo.featureTable.ftrGT1); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1_5); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT2); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT3); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT4); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTA); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTC); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTX); drm->StoredDeviceID = ISKL_GT1_5_DT_DEVICE_F0_ID; drm->setGtType(GTTYPE_GT1_5); ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->StoredDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->StoredDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->StoredEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->StoredSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); EXPECT_EQ(GTTYPE_GT1_5, outHwInfo.platform.eGTType); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1); EXPECT_EQ(1u, outHwInfo.featureTable.ftrGT1_5); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT2); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT3); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT4); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTA); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTC); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTX); drm->StoredDeviceID = ISKL_GT3_DESK_DEVICE_F0_ID; drm->setGtType(GTTYPE_GT3); ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->StoredDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->StoredDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->StoredEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->StoredSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); EXPECT_EQ(GTTYPE_GT3, outHwInfo.platform.eGTType); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1_5); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT2); EXPECT_EQ(1u, outHwInfo.featureTable.ftrGT3); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT4); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTA); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTC); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTX); drm->StoredDeviceID = ISKL_GT4_DESK_DEVICE_F0_ID; drm->setGtType(GTTYPE_GT4); drm->StoredSSVal = 6; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ((unsigned short)drm->StoredDeviceID, outHwInfo.platform.usDeviceID); EXPECT_EQ((unsigned short)drm->StoredDeviceRevID, outHwInfo.platform.usRevId); EXPECT_EQ((uint32_t)drm->StoredEUVal, outHwInfo.gtSystemInfo.EUCount); EXPECT_EQ((uint32_t)drm->StoredSSVal, outHwInfo.gtSystemInfo.SubSliceCount); EXPECT_EQ(2u, outHwInfo.gtSystemInfo.SliceCount); EXPECT_EQ(aub_stream::ENGINE_RCS, outHwInfo.capabilityTable.defaultEngineType); EXPECT_EQ(GTTYPE_GT4, outHwInfo.platform.eGTType); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1_5); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT2); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT3); EXPECT_EQ(1u, outHwInfo.featureTable.ftrGT4); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTA); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTC); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTX); auto &outKmdNotifyProperties = outHwInfo.capabilityTable.kmdNotifyProperties; EXPECT_TRUE(outKmdNotifyProperties.enableKmdNotify); EXPECT_EQ(50000, outKmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_TRUE(outKmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(5000, outKmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_TRUE(outKmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(200000, outKmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); } SKLTEST_F(HwInfoConfigTestLinuxSkl, negativeUnknownDevId) { drm->StoredDeviceID = 0; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } SKLTEST_F(HwInfoConfigTestLinuxSkl, negativeFailedIoctlDevId) { drm->StoredRetValForDeviceID = -2; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-2, ret); } SKLTEST_F(HwInfoConfigTestLinuxSkl, negativeFailedIoctlDevRevId) { drm->StoredRetValForDeviceRevID = -3; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-3, ret); } SKLTEST_F(HwInfoConfigTestLinuxSkl, negativeFailedIoctlEuCount) { drm->StoredRetValForEUVal = -4; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-4, ret); } SKLTEST_F(HwInfoConfigTestLinuxSkl, negativeFailedIoctlSsCount) { drm->StoredRetValForSSVal = -5; auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-5, ret); } SKLTEST_F(HwInfoConfigTestLinuxSkl, configureHwInfoWaFlags) { auto hwInfoConfig = HwInfoConfig::get(productFamily); drm->StoredDeviceRevID = 1; int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); drm->StoredDeviceRevID = 0; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(0u, outHwInfo.workaroundTable.waCompressedResourceRequiresConstVA21); drm->StoredDeviceRevID = 5; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(0u, outHwInfo.workaroundTable.waCompressedResourceRequiresConstVA21); EXPECT_EQ(0u, outHwInfo.workaroundTable.waModifyVFEStateAfterGPGPUPreemption); EXPECT_EQ(0u, outHwInfo.workaroundTable.waDisablePerCtxtPreemptionGranularityControl); drm->StoredDeviceRevID = 6; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(0u, outHwInfo.workaroundTable.waCompressedResourceRequiresConstVA21); EXPECT_EQ(0u, outHwInfo.workaroundTable.waModifyVFEStateAfterGPGPUPreemption); EXPECT_EQ(0u, outHwInfo.workaroundTable.waDisablePerCtxtPreemptionGranularityControl); EXPECT_EQ(0u, outHwInfo.workaroundTable.waCSRUncachable); } SKLTEST_F(HwInfoConfigTestLinuxSkl, configureHwInfoEdram) { auto hwInfoConfig = HwInfoConfig::get(productFamily); int ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL(0u, outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(0u, outHwInfo.featureTable.ftrEDram); drm->StoredDeviceID = ISKL_GT3e_ULT_DEVICE_F0_ID_540; drm->setGtType(GTTYPE_GT3); ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL((64u * 1024u), outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(1u, outHwInfo.featureTable.ftrEDram); drm->StoredDeviceID = ISKL_GT3e_ULT_DEVICE_F0_ID_550; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL((64u * 1024u), outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(1u, outHwInfo.featureTable.ftrEDram); drm->StoredDeviceID = ISKL_GT3_MEDIA_SERV_DEVICE_F0_ID; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL((64u * 1024u), outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(1u, outHwInfo.featureTable.ftrEDram); drm->StoredDeviceID = ISKL_GT4_HALO_MOBL_DEVICE_F0_ID; drm->setGtType(GTTYPE_GT4); ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL((128u * 1024u), outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(1u, outHwInfo.featureTable.ftrEDram); drm->StoredDeviceID = ISKL_GT4_WRK_DEVICE_F0_ID; ret = hwInfoConfig->configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ_VAL((128u * 1024u), outHwInfo.gtSystemInfo.EdramSizeInKb); EXPECT_EQ(1u, outHwInfo.featureTable.ftrEDram); } SKLTEST_F(HwInfoConfigTestLinuxSkl, whenCallAdjustPlatformThenDoNothing) { auto hwInfoConfig = HwInfoConfig::get(productFamily); outHwInfo = pInHwInfo; hwInfoConfig->adjustPlatformForProductFamily(&outHwInfo); int ret = memcmp(&outHwInfo.platform, &pInHwInfo.platform, sizeof(PLATFORM)); EXPECT_EQ(0, ret); } template class SklHwInfoTests : public ::testing::Test { }; typedef ::testing::Types sklTestTypes; TYPED_TEST_CASE(SklHwInfoTests, sklTestTypes); TYPED_TEST(SklHwInfoTests, gtSetupIsCorrect) { HardwareInfo hwInfo; DrmMock drm; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; DeviceDescriptor device = {0, &hwInfo, &TypeParam::setupHardwareInfo, GTTYPE_GT1}; int ret = drm.setupHardwareInfo(&device, false); EXPECT_EQ(ret, 0); EXPECT_GT(gtSystemInfo.EUCount, 0u); EXPECT_GT(gtSystemInfo.ThreadCount, 0u); EXPECT_GT(gtSystemInfo.SliceCount, 0u); EXPECT_GT(gtSystemInfo.SubSliceCount, 0u); EXPECT_GT_VAL(gtSystemInfo.L3CacheSizeInKb, 0u); EXPECT_EQ(gtSystemInfo.CsrSizeInMb, 8u); EXPECT_FALSE(gtSystemInfo.IsDynamicallyPopulated); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/skl/test_device_caps_skl.cpp000066400000000000000000000047031363734646600301520ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; typedef Test SklDeviceCaps; SKLTEST_F(SklDeviceCaps, reportsOcl21) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_STREQ("OpenCL 2.1 NEO ", caps.clVersion); EXPECT_STREQ("OpenCL C 2.0 ", caps.clCVersion); } SKLTEST_F(SklDeviceCaps, SklProfilingTimerResolution) { const auto &caps = pDevice->getDeviceInfo(); EXPECT_EQ(83u, caps.outProfilingTimerResolution); } SKLTEST_F(SklDeviceCaps, givenSklDeviceWhenAskedFor32BitSupportThenFalseIsReturned) { const auto &caps = pClDevice->getDeviceInfo(); const auto &sharedCaps = pDevice->getDeviceInfo(); EXPECT_STREQ("OpenCL 2.1 NEO ", caps.clVersion); EXPECT_STREQ("OpenCL C 2.0 ", caps.clCVersion); auto memoryManager = pDevice->getMemoryManager(); EXPECT_FALSE(memoryManager->peekForce32BitAllocations()); EXPECT_FALSE(sharedCaps.force32BitAddressess); } SKLTEST_F(SklDeviceCaps, SklSvmCapabilities) { const auto &caps = pClDevice->getDeviceInfo(); cl_device_svm_capabilities expectedCaps = (CL_DEVICE_SVM_COARSE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS); EXPECT_EQ(expectedCaps, caps.svmCapabilities); } typedef Test SklUsDeviceIdTest; SKLTEST_F(SklUsDeviceIdTest, isSimulationCap) { unsigned short sklSimulationIds[6] = { ISKL_GT0_DESK_DEVICE_F0_ID, ISKL_GT1_DESK_DEVICE_F0_ID, ISKL_GT2_DESK_DEVICE_F0_ID, ISKL_GT3_DESK_DEVICE_F0_ID, ISKL_GT4_DESK_DEVICE_F0_ID, 0, // default, non-simulation }; NEO::MockDevice *mockDevice = nullptr; for (auto id : sklSimulationIds) { mockDevice = createWithUsDeviceId(id); ASSERT_NE(mockDevice, nullptr); if (id == 0) EXPECT_FALSE(mockDevice->isSimulation()); else EXPECT_TRUE(mockDevice->isSimulation()); delete mockDevice; } } SKLTEST_F(SklUsDeviceIdTest, GivenSKLWhenCheckftr64KBpagesThenTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftr64KBpages); } SKLTEST_F(SklUsDeviceIdTest, givenSklWhenCheckFtrSupportsInteger64BitAtomicsThenReturnTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsInteger64BitAtomics); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/skl/test_hw_info_config_skl.cpp000066400000000000000000000132421363734646600306610ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test.h" using namespace NEO; TEST(SklHwInfoConfig, givenHwInfoErrorneousConfigString) { if (IGFX_SKYLAKE != productFamily) { return; } HardwareInfo hwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; gtSystemInfo = {0}; uint64_t config = 0xdeadbeef; EXPECT_ANY_THROW(hardwareInfoSetup[productFamily](&hwInfo, false, config)); EXPECT_EQ(0u, gtSystemInfo.SliceCount); EXPECT_EQ(0u, gtSystemInfo.SubSliceCount); EXPECT_EQ(0u, gtSystemInfo.EUCount); } using SklHwInfo = ::testing::Test; SKLTEST_F(SklHwInfo, givenBoolWhenCallSklHardwareInfoSetupThenFeatureTableAndWorkaroundTableAreSetCorrect) { uint64_t configs[] = { 0x100030008, 0x200030008, 0x300030008, 0x100020006, 0x100030006}; bool boolValue[]{ true, false}; HardwareInfo hwInfo; GT_SYSTEM_INFO >SystemInfo = hwInfo.gtSystemInfo; FeatureTable &featureTable = hwInfo.featureTable; WorkaroundTable &workaroundTable = hwInfo.workaroundTable; PLATFORM &pPlatform = hwInfo.platform; for (auto &config : configs) { for (auto setParamBool : boolValue) { gtSystemInfo = {0}; featureTable = {}; workaroundTable = {}; pPlatform.usRevId = 9; hardwareInfoSetup[productFamily](&hwInfo, setParamBool, config); EXPECT_EQ(setParamBool, featureTable.ftrGpGpuMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.ftrGpGpuThreadGroupLevelPreempt); EXPECT_EQ(setParamBool, featureTable.ftrL3IACoherency); EXPECT_EQ(setParamBool, featureTable.ftrGpGpuMidThreadLevelPreempt); EXPECT_EQ(setParamBool, featureTable.ftr3dMidBatchPreempt); EXPECT_EQ(setParamBool, featureTable.ftr3dObjectLevelPreempt); EXPECT_EQ(setParamBool, featureTable.ftrPerCtxtPreemptionGranularityControl); EXPECT_EQ(setParamBool, featureTable.ftrPPGTT); EXPECT_EQ(setParamBool, featureTable.ftrSVM); EXPECT_EQ(setParamBool, featureTable.ftrIA32eGfxPTEs); EXPECT_EQ(setParamBool, featureTable.ftrDisplayYTiling); EXPECT_EQ(setParamBool, featureTable.ftrTranslationTable); EXPECT_EQ(setParamBool, featureTable.ftrUserModeTranslationTable); EXPECT_EQ(setParamBool, featureTable.ftrEnableGuC); EXPECT_EQ(setParamBool, featureTable.ftrFbc); EXPECT_EQ(setParamBool, featureTable.ftrFbc2AddressTranslation); EXPECT_EQ(setParamBool, featureTable.ftrFbcBlitterTracking); EXPECT_EQ(setParamBool, featureTable.ftrFbcCpuTracking); EXPECT_EQ(setParamBool, featureTable.ftrVEBOX); EXPECT_EQ(setParamBool, featureTable.ftrTileY); EXPECT_EQ(false, featureTable.ftrSingleVeboxSlice); EXPECT_EQ(false, featureTable.ftrVcs2); EXPECT_EQ(setParamBool, workaroundTable.waEnablePreemptionGranularityControlByUMD); EXPECT_EQ(setParamBool, workaroundTable.waSendMIFLUSHBeforeVFE); EXPECT_EQ(setParamBool, workaroundTable.waReportPerfCountUseGlobalContextID); EXPECT_EQ(setParamBool, workaroundTable.waDisableLSQCROPERFforOCL); EXPECT_EQ(setParamBool, workaroundTable.waMsaa8xTileYDepthPitchAlignment); EXPECT_EQ(setParamBool, workaroundTable.waLosslessCompressionSurfaceStride); EXPECT_EQ(setParamBool, workaroundTable.waFbcLinearSurfaceStride); EXPECT_EQ(setParamBool, workaroundTable.wa4kAlignUVOffsetNV12LinearSurface); EXPECT_EQ(setParamBool, workaroundTable.waEncryptedEdramOnlyPartials); EXPECT_EQ(setParamBool, workaroundTable.waDisableEdramForDisplayRT); EXPECT_EQ(setParamBool, workaroundTable.waForcePcBbFullCfgRestore); EXPECT_EQ(setParamBool, workaroundTable.waSamplerCacheFlushBetweenRedescribedSurfaceReads); EXPECT_EQ(false, workaroundTable.waCompressedResourceRequiresConstVA21); EXPECT_EQ(false, workaroundTable.waDisablePerCtxtPreemptionGranularityControl); EXPECT_EQ(false, workaroundTable.waModifyVFEStateAfterGPGPUPreemption); EXPECT_EQ(false, workaroundTable.waCSRUncachable); pPlatform.usRevId = 1; workaroundTable = {}; featureTable = {}; featureTable.ftrGT1 = true; featureTable.ftrGT3 = true; hardwareInfoSetup[productFamily](&hwInfo, true, config); EXPECT_EQ(true, workaroundTable.waCompressedResourceRequiresConstVA21); EXPECT_EQ(true, workaroundTable.waDisablePerCtxtPreemptionGranularityControl); EXPECT_EQ(true, workaroundTable.waModifyVFEStateAfterGPGPUPreemption); EXPECT_EQ(true, workaroundTable.waCSRUncachable); EXPECT_EQ(true, featureTable.ftrSingleVeboxSlice); EXPECT_EQ(true, featureTable.ftrVcs2); workaroundTable = {}; featureTable = {}; featureTable.ftrGT2 = true; featureTable.ftrGT4 = true; hardwareInfoSetup[productFamily](&hwInfo, true, config); EXPECT_EQ(true, featureTable.ftrSingleVeboxSlice); EXPECT_EQ(true, featureTable.ftrVcs2); workaroundTable = {}; featureTable = {}; featureTable.ftrGT1 = true; featureTable.ftrGT2 = true; featureTable.ftrGT3 = true; featureTable.ftrGT4 = true; hardwareInfoSetup[productFamily](&hwInfo, true, config); EXPECT_EQ(true, featureTable.ftrSingleVeboxSlice); EXPECT_EQ(true, featureTable.ftrVcs2); } } } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/skl/test_sample_skl.cpp000066400000000000000000000005561363734646600271700ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; typedef Test SkylakeOnlyTest; SKLTEST_F(SkylakeOnlyTest, shouldPassOnSkl) { EXPECT_EQ(IGFX_SKYLAKE, pDevice->getHardwareInfo().platform.eProductFamily); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/skl/windows/000077500000000000000000000000001363734646600247575ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen9/skl/windows/CMakeLists.txt000066400000000000000000000006021363734646600275150ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen9_skl_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_device_caps_skl_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests_skl.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_skl_windows}) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen9/skl/windows/hw_info_config_tests_skl.cpp000066400000000000000000000013131363734646600325320ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/windows/os_interface.h" #include "opencl/test/unit_test/os_interface/windows/hw_info_config_win_tests.h" using namespace NEO; using namespace std; using HwInfoConfigTestWindowsSkl = HwInfoConfigTestWindows; SKLTEST_F(HwInfoConfigTestWindowsSkl, whenCallAdjustPlatformThenDoNothing) { EXPECT_EQ(IGFX_SKYLAKE, productFamily); auto hwInfoConfig = HwInfoConfig::get(productFamily); outHwInfo = pInHwInfo; hwInfoConfig->adjustPlatformForProductFamily(&outHwInfo); int ret = memcmp(&outHwInfo.platform, &pInHwInfo.platform, sizeof(PLATFORM)); EXPECT_EQ(0, ret); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/skl/windows/test_device_caps_skl_windows.cpp000066400000000000000000000017651363734646600334230ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; typedef Test SklDeviceCapsWindows; SKLTEST_F(SklDeviceCapsWindows, givenHwInfoWhenAskedForKmdNotifyMechanismThenReturnCorrectValues) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableKmdNotify); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(0, pDevice->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/test_device_caps_gen9.cpp000066400000000000000000000055251363734646600274350ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; typedef Test Gen9DeviceCaps; GEN9TEST_F(Gen9DeviceCaps, skuSpecificCaps) { const auto &caps = pClDevice->getDeviceInfo(); std::string extensionString = caps.deviceExtensions; if (pDevice->getHardwareInfo().capabilityTable.ftrSupportsFP64) { EXPECT_NE(std::string::npos, extensionString.find(std::string("cl_khr_fp64"))); EXPECT_NE(0u, caps.doubleFpConfig); } else { EXPECT_EQ(std::string::npos, extensionString.find(std::string("cl_khr_fp64"))); EXPECT_EQ(0u, caps.doubleFpConfig); } } GEN9TEST_F(Gen9DeviceCaps, allSkusSupportCorrectlyRoundedDivideSqrt) { const auto &caps = pClDevice->getDeviceInfo(); EXPECT_NE(0u, caps.singleFpConfig & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT); } GEN9TEST_F(Gen9DeviceCaps, defaultPreemptionMode) { EXPECT_EQ(PreemptionMode::MidThread, pDevice->getHardwareInfo().capabilityTable.defaultPreemptionMode); } GEN9TEST_F(Gen9DeviceCaps, compression) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrRenderCompressedBuffers); EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrRenderCompressedImages); } GEN9TEST_F(Gen9DeviceCaps, givenHwInfoWhenRequestedComputeUnitsUsedForScratchThenReturnValidValue) { const auto &hwInfo = pDevice->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); uint32_t expectedValue = hwInfo.gtSystemInfo.MaxSubSlicesSupported * hwInfo.gtSystemInfo.MaxEuPerSubSlice * hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount; EXPECT_EQ(expectedValue, hwHelper.getComputeUnitsUsedForScratch(&hwInfo)); EXPECT_EQ(expectedValue, pDevice->getDeviceInfo().computeUnitsUsedForScratch); } GEN9TEST_F(Gen9DeviceCaps, givenHwInfoWhenRequestedMaxFrontEndThreadsThenReturnValidValue) { const auto &hwInfo = pDevice->getHardwareInfo(); EXPECT_EQ(HwHelper::getMaxThreadsForVfe(hwInfo), pDevice->getDeviceInfo().maxFrontEndThreads); } GEN9TEST_F(Gen9DeviceCaps, givenHwInfoWhenSlmSizeIsRequiredThenReturnCorrectValue) { EXPECT_EQ(64u, pDevice->getHardwareInfo().capabilityTable.slmSize); } GEN9TEST_F(Gen9DeviceCaps, givenGen9WhenCheckSupportCacheFlushAfterWalkerThenFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.supportCacheFlushAfterWalker); } GEN9TEST_F(Gen9DeviceCaps, givenGen9WhenCheckBlitterOperationsSupportThenReturnFalse) { EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.blitterOperationsSupported); } GEN9TEST_F(Gen9DeviceCaps, givenGen9WhenCheckingImageSupportThenReturnTrue) { EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.supportsImages); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/test_device_queue_hw_gen9.cpp000066400000000000000000000071071363734646600303270ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/context/context.h" #include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/mocks/mock_device_queue.h" using namespace NEO; using namespace DeviceHostQueue; typedef DeviceQueueHwTest Gen9DeviceQueueSlb; GEN9TEST_F(Gen9DeviceQueueSlb, expectedAllocationSize) { deviceQueue = createQueueObject(); ASSERT_NE(deviceQueue, nullptr); auto expectedSize = getMinimumSlbSize() + sizeof(typename FamilyType::MI_ARB_CHECK) + sizeof(typename FamilyType::MI_ATOMIC) + sizeof(typename FamilyType::MI_ARB_CHECK) + sizeof(typename FamilyType::PIPE_CONTROL) + sizeof(typename FamilyType::PIPE_CONTROL); expectedSize *= 128; //num of enqueues expectedSize += sizeof(typename FamilyType::MI_BATCH_BUFFER_START); expectedSize = alignUp(expectedSize, MemoryConstants::pageSize); expectedSize += MockDeviceQueueHw::getExecutionModelCleanupSectionSize(); expectedSize += (4 * MemoryConstants::pageSize); expectedSize = alignUp(expectedSize, MemoryConstants::pageSize); ASSERT_NE(deviceQueue->getSlbBuffer(), nullptr); EXPECT_EQ(deviceQueue->getSlbBuffer()->getUnderlyingBufferSize(), expectedSize); delete deviceQueue; } GEN9TEST_F(Gen9DeviceQueueSlb, SlbCommandsWa) { auto mockDeviceQueueHw = new MockDeviceQueueHw(pContext, device, DeviceHostQueue::deviceQueueProperties::minimumProperties[0]); EXPECT_TRUE(mockDeviceQueueHw->arbCheckWa); EXPECT_TRUE(mockDeviceQueueHw->pipeControlWa); EXPECT_TRUE(mockDeviceQueueHw->miAtomicWa); EXPECT_FALSE(mockDeviceQueueHw->lriWa); delete mockDeviceQueueHw; } GEN9TEST_F(Gen9DeviceQueueSlb, addProfilingEndcmds) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; auto mockDeviceQueueHw = new MockDeviceQueueHw(pContext, device, DeviceHostQueue::deviceQueueProperties::minimumProperties[0]); uint64_t timestampAddress = 0x12345678555500; mockDeviceQueueHw->addProfilingEndCmds(timestampAddress); HardwareParse hwParser; auto *slbCS = mockDeviceQueueHw->getSlbCS(); hwParser.parseCommands(*slbCS, 0); auto pipeControlItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), pipeControlItor); PIPE_CONTROL *pipeControl = (PIPE_CONTROL *)*pipeControlItor; uint32_t postSyncOp = (uint32_t)PIPE_CONTROL::POST_SYNC_OPERATION_NO_WRITE; EXPECT_EQ(postSyncOp, (uint32_t)pipeControl->getPostSyncOperation()); EXPECT_NE(0u, (uint32_t)pipeControl->getCommandStreamerStallEnable()); auto storeRegMemItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), storeRegMemItor); MI_STORE_REGISTER_MEM *pMICmdLow = (MI_STORE_REGISTER_MEM *)*storeRegMemItor; EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, pMICmdLow->getRegisterAddress()); EXPECT_EQ(timestampAddress, pMICmdLow->getMemoryAddress()); storeRegMemItor++; EXPECT_EQ(hwParser.cmdList.end(), storeRegMemItor); delete mockDeviceQueueHw; } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/test_platform_caps_gen9.cpp000066400000000000000000000031371363734646600300170ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/cl_device/cl_device.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "test.h" using namespace NEO; struct Gen9PlatformCaps : public PlatformFixture, public ::testing::Test { void SetUp() override { PlatformFixture::SetUp(); } void TearDown() override { PlatformFixture::TearDown(); } }; GEN9TEST_F(Gen9PlatformCaps, allSkusSupportFP64) { const auto &caps = pPlatform->getPlatformInfo(); if (pPlatform->getClDevice(0)->getHardwareInfo().capabilityTable.ftrSupportsFP64) { EXPECT_NE(std::string::npos, caps.extensions.find(std::string("cl_khr_fp64"))); } else { EXPECT_EQ(std::string::npos, caps.extensions.find(std::string("cl_khr_fp64"))); } } GEN9TEST_F(Gen9PlatformCaps, SKLVersion) { char *paramValue = new char[12]; cl_int retVal = clGetPlatformInfo(pPlatform, CL_PLATFORM_VERSION, 12, paramValue, nullptr); if (pPlatform->getClDevice(0)->getHardwareInfo().platform.eProductFamily == IGFX_SKYLAKE) { EXPECT_STREQ(paramValue, "OpenCL 2.1 "); } EXPECT_EQ(retVal, CL_SUCCESS); delete[] paramValue; } GEN9TEST_F(Gen9PlatformCaps, BXTVersion) { char *paramValue = new char[12]; cl_int retVal = clGetPlatformInfo(pPlatform, CL_PLATFORM_VERSION, 12, paramValue, nullptr); if (pPlatform->getClDevice(0)->getHardwareInfo().platform.eProductFamily == IGFX_BROXTON) { EXPECT_STREQ(paramValue, "OpenCL 1.2 "); } EXPECT_EQ(retVal, CL_SUCCESS); delete[] paramValue; } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/test_sample_gen9.cpp000066400000000000000000000006271363734646600264470ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; typedef Test Gen9OnlyTest; GEN9TEST_F(Gen9OnlyTest, shouldPassOnGen9) { EXPECT_NE(IGFX_GEN8_CORE, pDevice->getRenderCoreFamily()); EXPECT_EQ(IGFX_GEN9_CORE, pDevice->getRenderCoreFamily()); } compute-runtime-20.13.16352/opencl/test/unit_test/gen9/unit_test_helper_gen9.cpp000066400000000000000000000007301363734646600274770ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen9/hw_info.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/helpers/unit_test_helper.inl" namespace NEO { using Family = SKLFamily; template <> bool UnitTestHelper::isPipeControlWArequired(const HardwareInfo &hwInfo) { return true; } template struct UnitTestHelper; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/gen9/windows/000077500000000000000000000000001363734646600241665ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen9/windows/CMakeLists.txt000066400000000000000000000004721363734646600267310ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen9_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/gmm_callbacks_tests_gen9.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen9_windows}) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gen9/windows/gmm_callbacks_tests_gen9.cpp000066400000000000000000000010201363734646600316060ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/windows/gmm_callbacks.h" #include "test.h" using namespace NEO; typedef ::testing::Test Gen9GmmCallbacksTests; GEN9TEST_F(Gen9GmmCallbacksTests, notSupportedDeviceCallback) { EXPECT_EQ(0, DeviceCallbacks::notifyAubCapture(nullptr, 0, 0, false)); } GEN9TEST_F(Gen9GmmCallbacksTests, notSupportedTTCallback) { EXPECT_EQ(0, TTCallbacks::writeL3Address(nullptr, 1, 2)); } compute-runtime-20.13.16352/opencl/test/unit_test/gen_common/000077500000000000000000000000001363734646600237535ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen_common/CMakeLists.txt000066400000000000000000000017331363734646600265170ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gen_common ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmd_parse_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/cmd_parse_base_mi_arb.inl ${CMAKE_CURRENT_SOURCE_DIR}/cmd_parse_compute_mode.inl ${CMAKE_CURRENT_SOURCE_DIR}/cmd_parse_gpgpu_walker.inl ${CMAKE_CURRENT_SOURCE_DIR}/cmd_parse_mi_arb.inl ${CMAKE_CURRENT_SOURCE_DIR}/cmd_parse_sip.inl ${CMAKE_CURRENT_SOURCE_DIR}/exclude_tests/exclude_test_declare.cpp ${CMAKE_CURRENT_SOURCE_DIR}/exclude_tests/exclude_test_exclude.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gen_cmd_parse.h ${CMAKE_CURRENT_SOURCE_DIR}/gen_commands_common_validation.h ${CMAKE_CURRENT_SOURCE_DIR}/hw_cmds_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/matchers.h ${NEO_SHARED_TEST_DIRECTORY}/unit_test/test_macros${BRANCH_DIR_SUFFIX}/test.h ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gen_common}) add_subdirectories() compute-runtime-20.13.16352/opencl/test/unit_test/gen_common/cmd_parse_base.inl000066400000000000000000000315221363734646600274110ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/gen_common/gen_cmd_parse.h" // clang-format off using namespace NEO; using MI_ARB_CHECK = GenStruct::MI_ARB_CHECK; using MI_ATOMIC = GenStruct::MI_ATOMIC; using MI_BATCH_BUFFER_END = GenStruct::MI_BATCH_BUFFER_END; using MI_BATCH_BUFFER_START = GenStruct::MI_BATCH_BUFFER_START; using MI_LOAD_REGISTER_IMM = GenStruct::MI_LOAD_REGISTER_IMM; using MI_LOAD_REGISTER_MEM = GenStruct::MI_LOAD_REGISTER_MEM; using MI_STORE_REGISTER_MEM = GenStruct::MI_STORE_REGISTER_MEM; using MI_NOOP = GenStruct::MI_NOOP; using PIPE_CONTROL = GenStruct::PIPE_CONTROL; using PIPELINE_SELECT = GenStruct::PIPELINE_SELECT; using STATE_BASE_ADDRESS = GenStruct::STATE_BASE_ADDRESS; using MI_REPORT_PERF_COUNT = GenStruct::MI_REPORT_PERF_COUNT; using MI_MATH = GenStruct::MI_MATH; using MI_LOAD_REGISTER_REG = GenStruct::MI_LOAD_REGISTER_REG; using MI_SEMAPHORE_WAIT = GenStruct::MI_SEMAPHORE_WAIT; using MI_STORE_DATA_IMM = GenStruct::MI_STORE_DATA_IMM; using MI_FLUSH_DW = GenStruct::MI_FLUSH_DW; using XY_COPY_BLT = GenGfxFamily::XY_COPY_BLT; // clang-format on template <> STATE_BASE_ADDRESS *genCmdCast(void *buffer) { auto pCmd = reinterpret_cast(buffer); return STATE_BASE_ADDRESS::COMMAND_TYPE_GFXPIPE == pCmd->TheStructure.Common.CommandType && STATE_BASE_ADDRESS::COMMAND_SUBTYPE_GFXPIPE_COMMON == pCmd->TheStructure.Common.CommandSubtype && STATE_BASE_ADDRESS::_3D_COMMAND_OPCODE_GFXPIPE_NONPIPELINED == pCmd->TheStructure.Common._3DCommandOpcode && STATE_BASE_ADDRESS::_3D_COMMAND_SUB_OPCODE_STATE_BASE_ADDRESS == pCmd->TheStructure.Common._3DCommandSubOpcode ? pCmd : nullptr; } template <> PIPE_CONTROL *genCmdCast(void *buffer) { auto pCmd = reinterpret_cast(buffer); return PIPE_CONTROL::COMMAND_TYPE_GFXPIPE == pCmd->TheStructure.Common.CommandType && PIPE_CONTROL::COMMAND_SUBTYPE_GFXPIPE_3D == pCmd->TheStructure.Common.CommandSubtype && PIPE_CONTROL::_3D_COMMAND_OPCODE_PIPE_CONTROL == pCmd->TheStructure.Common._3DCommandOpcode && PIPE_CONTROL::_3D_COMMAND_SUB_OPCODE_PIPE_CONTROL == pCmd->TheStructure.Common._3DCommandSubOpcode ? pCmd : nullptr; } template <> PIPELINE_SELECT *genCmdCast(void *buffer) { auto pCmd = reinterpret_cast(buffer); return PIPELINE_SELECT::COMMAND_TYPE_GFXPIPE == pCmd->TheStructure.Common.CommandType && PIPELINE_SELECT::COMMAND_SUBTYPE_GFXPIPE_SINGLE_DW == pCmd->TheStructure.Common.CommandSubtype && PIPELINE_SELECT::_3D_COMMAND_OPCODE_GFXPIPE_NONPIPELINED == pCmd->TheStructure.Common._3DCommandOpcode && PIPELINE_SELECT::_3D_COMMAND_SUB_OPCODE_PIPELINE_SELECT == pCmd->TheStructure.Common._3DCommandSubOpcode ? pCmd : nullptr; } template <> MI_LOAD_REGISTER_IMM *genCmdCast(void *buffer) { auto pCmd = reinterpret_cast(buffer); return MI_LOAD_REGISTER_IMM::COMMAND_TYPE_MI_COMMAND == pCmd->TheStructure.Common.CommandType && MI_LOAD_REGISTER_IMM::MI_COMMAND_OPCODE_MI_LOAD_REGISTER_IMM == pCmd->TheStructure.Common.MiCommandOpcode ? pCmd : nullptr; } template <> MI_NOOP *genCmdCast(void *buffer) { auto pCmd = reinterpret_cast(buffer); return MI_NOOP::COMMAND_TYPE_MI_COMMAND == pCmd->TheStructure.Common.CommandType && MI_NOOP::MI_COMMAND_OPCODE_MI_NOOP == pCmd->TheStructure.Common.MiCommandOpcode ? pCmd : nullptr; } template <> MI_ATOMIC *genCmdCast(void *buffer) { auto pCmd = reinterpret_cast(buffer); return MI_ATOMIC::COMMAND_TYPE_MI_COMMAND == pCmd->TheStructure.Common.CommandType && MI_ATOMIC::MI_COMMAND_OPCODE_MI_ATOMIC == pCmd->TheStructure.Common.MiCommandOpcode ? pCmd : nullptr; } template <> MI_BATCH_BUFFER_END *genCmdCast(void *buffer) { auto pCmd = reinterpret_cast(buffer); return MI_BATCH_BUFFER_END::COMMAND_TYPE_MI_COMMAND == pCmd->TheStructure.Common.CommandType && MI_BATCH_BUFFER_END::MI_COMMAND_OPCODE_MI_BATCH_BUFFER_END == pCmd->TheStructure.Common.MiCommandOpcode ? pCmd : nullptr; } template <> MI_BATCH_BUFFER_START *genCmdCast(void *buffer) { auto pCmd = reinterpret_cast(buffer); return MI_BATCH_BUFFER_START::COMMAND_TYPE_MI_COMMAND == pCmd->TheStructure.Common.CommandType && MI_BATCH_BUFFER_START::MI_COMMAND_OPCODE_MI_BATCH_BUFFER_START == pCmd->TheStructure.Common.MiCommandOpcode ? pCmd : nullptr; } template <> MI_LOAD_REGISTER_MEM *genCmdCast(void *buffer) { auto pCmd = reinterpret_cast(buffer); return MI_LOAD_REGISTER_MEM::COMMAND_TYPE_MI_COMMAND == pCmd->TheStructure.Common.CommandType && MI_LOAD_REGISTER_MEM::MI_COMMAND_OPCODE_MI_LOAD_REGISTER_MEM == pCmd->TheStructure.Common.MiCommandOpcode ? pCmd : nullptr; } template <> MI_STORE_REGISTER_MEM *genCmdCast(void *buffer) { auto pCmd = reinterpret_cast(buffer); return MI_STORE_REGISTER_MEM::COMMAND_TYPE_MI_COMMAND == pCmd->TheStructure.Common.CommandType && MI_STORE_REGISTER_MEM::MI_COMMAND_OPCODE_MI_STORE_REGISTER_MEM == pCmd->TheStructure.Common.MiCommandOpcode ? pCmd : nullptr; } template <> MI_REPORT_PERF_COUNT *genCmdCast(void *buffer) { auto pCmd = reinterpret_cast(buffer); return MI_REPORT_PERF_COUNT::COMMAND_TYPE_MI_COMMAND == pCmd->TheStructure.Common.CommandType && MI_REPORT_PERF_COUNT::MI_COMMAND_OPCODE_MI_REPORT_PERF_COUNT == pCmd->TheStructure.Common.MiCommandOpcode ? pCmd : nullptr; } template <> MI_MATH *genCmdCast(void *buffer) { auto pCmd = reinterpret_cast(buffer); return MI_MATH::COMMAND_TYPE_MI_COMMAND == pCmd->DW0.BitField.InstructionType && MI_MATH::MI_COMMAND_OPCODE_MI_MATH == pCmd->DW0.BitField.InstructionOpcode ? pCmd : nullptr; } template <> MI_LOAD_REGISTER_REG *genCmdCast(void *buffer) { auto pCmd = reinterpret_cast(buffer); return MI_LOAD_REGISTER_REG::COMMAND_TYPE_MI_COMMAND == pCmd->TheStructure.Common.CommandType && MI_LOAD_REGISTER_REG::MI_COMMAND_OPCODE_MI_LOAD_REGISTER_REG == pCmd->TheStructure.Common.MiCommandOpcode ? pCmd : nullptr; } template <> MI_SEMAPHORE_WAIT *genCmdCast(void *buffer) { auto pCmd = reinterpret_cast(buffer); return MI_SEMAPHORE_WAIT::COMMAND_TYPE_MI_COMMAND == pCmd->TheStructure.Common.CommandType && MI_SEMAPHORE_WAIT::MI_COMMAND_OPCODE_MI_SEMAPHORE_WAIT == pCmd->TheStructure.Common.MiCommandOpcode ? pCmd : nullptr; } template <> MI_STORE_DATA_IMM *genCmdCast(void *buffer) { auto pCmd = reinterpret_cast(buffer); return MI_STORE_DATA_IMM::COMMAND_TYPE_MI_COMMAND == pCmd->TheStructure.Common.CommandType && MI_STORE_DATA_IMM::MI_COMMAND_OPCODE_MI_STORE_DATA_IMM == pCmd->TheStructure.Common.MiCommandOpcode ? pCmd : nullptr; } template <> MI_FLUSH_DW *genCmdCast(void *buffer) { auto pCmd = reinterpret_cast(buffer); return MI_FLUSH_DW::COMMAND_TYPE_MI_COMMAND == pCmd->TheStructure.Common.CommandType && MI_FLUSH_DW::MI_COMMAND_OPCODE_MI_FLUSH_DW == pCmd->TheStructure.Common.MiCommandOpcode ? pCmd : nullptr; } template <> XY_COPY_BLT *genCmdCast(void *buffer) { auto pCmd = reinterpret_cast(buffer); return XY_COPY_BLT::INSTRUCTIONTARGET_OPCODE_OPCODE == pCmd->TheStructure.Common.InstructionTarget_Opcode && XY_COPY_BLT::CLIENT_2D_PROCESSOR == pCmd->TheStructure.Common.Client ? pCmd : nullptr; } template size_t CmdParse::getCommandLength(void *cmd) { { auto pCmd = genCmdCast(cmd); if (pCmd) return pCmd->TheStructure.Common.DwordLength + 2; } { auto pCmd = genCmdCast(cmd); if (pCmd) return pCmd->TheStructure.Common.DwordLength + 2; } { auto pCmd = genCmdCast(cmd); if (pCmd) return SIZE32(*pCmd); } { auto pCmd = genCmdCast(cmd); if (pCmd) return sizeof(MI_ATOMIC) / sizeof(uint32_t); } { auto pCmd = genCmdCast(cmd); if (pCmd) return SIZE32(*pCmd); } { auto pCmd = genCmdCast(cmd); if (pCmd) return pCmd->TheStructure.Common.DwordLength + 2; } { auto pCmd = genCmdCast(cmd); if (pCmd) return pCmd->TheStructure.Common.DwordLength + 2; } { auto pCmd = genCmdCast(cmd); if (pCmd) return pCmd->TheStructure.Common.DwordLength + 2; } { auto pCmd = genCmdCast(cmd); if (pCmd) return pCmd->TheStructure.Common.DwordLength + 2; } { auto pCmd = genCmdCast(cmd); if (pCmd) return SIZE32(*pCmd); } { auto pCmd = genCmdCast(cmd); if (pCmd) return SIZE32(*pCmd); } { auto pCmd = genCmdCast(cmd); if (pCmd) return pCmd->TheStructure.Common.DwordLength + 2; } { auto pCmd = genCmdCast(cmd); if (pCmd) return pCmd->DW0.BitField.DwordLength + 2; } { auto pCmd = genCmdCast(cmd); if (pCmd) return pCmd->TheStructure.Common.DwordLength + 2; } { auto pCmd = genCmdCast(cmd); if (pCmd) return pCmd->TheStructure.Common.DwordLength + 2; } { auto pCmd = genCmdCast(cmd); if (pCmd) return pCmd->TheStructure.Common.DwordLength + 3; } { auto pCmd = genCmdCast(cmd); if (pCmd) return pCmd->TheStructure.Common.DwordLength + 2; } { auto pCmd = genCmdCast(cmd); if (pCmd) return pCmd->TheStructure.Common.DwordLength + 2; } auto commandLengthHwSpecific = getCommandLengthHwSpecific(cmd); if (commandLengthHwSpecific != 0) { return commandLengthHwSpecific; } return getAdditionalCommandLength(cmd); } template const char *CmdParse::getCommandName(void *cmd) { #define RETURN_NAME_IF(CMD_NAME) \ if (nullptr != genCmdCast(cmd)) \ return #CMD_NAME; RETURN_NAME_IF(STATE_BASE_ADDRESS); RETURN_NAME_IF(PIPE_CONTROL); RETURN_NAME_IF(MI_ARB_CHECK); RETURN_NAME_IF(MI_ATOMIC); RETURN_NAME_IF(MI_BATCH_BUFFER_END); RETURN_NAME_IF(MI_BATCH_BUFFER_START); RETURN_NAME_IF(MI_LOAD_REGISTER_IMM); RETURN_NAME_IF(MI_LOAD_REGISTER_MEM); RETURN_NAME_IF(MI_STORE_REGISTER_MEM); RETURN_NAME_IF(MI_NOOP); RETURN_NAME_IF(PIPELINE_SELECT); RETURN_NAME_IF(MI_REPORT_PERF_COUNT); RETURN_NAME_IF(MI_MATH); RETURN_NAME_IF(MI_LOAD_REGISTER_REG); RETURN_NAME_IF(MI_SEMAPHORE_WAIT); RETURN_NAME_IF(MI_STORE_DATA_IMM); RETURN_NAME_IF(MI_FLUSH_DW); RETURN_NAME_IF(XY_COPY_BLT); #undef RETURN_NAME_IF auto commandNameHwSpecific = getCommandNameHwSpecific(cmd); if (strcmp(commandNameHwSpecific, "UNKNOWN") != 0) { return commandNameHwSpecific; } return getAdditionalCommandName(cmd); } template size_t CmdParse::getAdditionalCommandLength(void *cmd) { return 0; } template const char *CmdParse::getAdditionalCommandName(void *cmd) { return "UNKNOWN"; } compute-runtime-20.13.16352/opencl/test/unit_test/gen_common/cmd_parse_base_mi_arb.inl000066400000000000000000000007601363734646600307220ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ template <> MI_ARB_CHECK *genCmdCast(void *buffer) { auto pCmd = reinterpret_cast(buffer); return MI_ARB_CHECK::MI_INSTRUCTION_TYPE_MI_INSTRUCTION == pCmd->TheStructure.Common.MiInstructionType && MI_ARB_CHECK::MI_INSTRUCTION_OPCODE_MI_ARB_CHECK == pCmd->TheStructure.Common.MiInstructionOpcode ? pCmd : nullptr; } compute-runtime-20.13.16352/opencl/test/unit_test/gen_common/cmd_parse_compute_mode.inl000066400000000000000000000015721363734646600311610ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ // clang-format off using namespace NEO; using STATE_COMPUTE_MODE = GenStruct::STATE_COMPUTE_MODE; // clang-format on template <> STATE_COMPUTE_MODE *genCmdCast(void *buffer) { auto pCmd = reinterpret_cast(buffer); return STATE_COMPUTE_MODE::COMMAND_TYPE_GFXPIPE == pCmd->TheStructure.Common.CommandType && STATE_COMPUTE_MODE::COMMAND_SUBTYPE_GFXPIPE_COMMON == pCmd->TheStructure.Common.CommandSubtype && STATE_COMPUTE_MODE::_3D_COMMAND_OPCODE_GFXPIPE_NONPIPELINED == pCmd->TheStructure.Common._3DCommandOpcode && STATE_COMPUTE_MODE::_3D_COMMAND_SUB_OPCODE_STATE_COMPUTE_MODE == pCmd->TheStructure.Common._3DCommandSubOpcode ? pCmd : nullptr; } compute-runtime-20.13.16352/opencl/test/unit_test/gen_common/cmd_parse_gpgpu_walker.inl000066400000000000000000000136121363734646600311660ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ // clang-format off using namespace NEO; using GPGPU_WALKER = GenStruct::GPGPU_WALKER; using MEDIA_INTERFACE_DESCRIPTOR_LOAD = GenStruct::MEDIA_INTERFACE_DESCRIPTOR_LOAD; using MEDIA_STATE_FLUSH = GenStruct::MEDIA_STATE_FLUSH; using MEDIA_VFE_STATE = GenStruct::MEDIA_VFE_STATE; // clang-format on template <> GPGPU_WALKER *genCmdCast(void *buffer) { auto pCmd = reinterpret_cast(buffer); return GPGPU_WALKER::COMMAND_TYPE_GFXPIPE == pCmd->TheStructure.Common.CommandType && GPGPU_WALKER::PIPELINE_MEDIA == pCmd->TheStructure.Common.Pipeline && GPGPU_WALKER::MEDIA_COMMAND_OPCODE_GPGPU_WALKER == pCmd->TheStructure.Common.MediaCommandOpcode && GPGPU_WALKER::SUBOPCODE_GPGPU_WALKER_SUBOP == pCmd->TheStructure.Common.Subopcode ? pCmd : nullptr; } template <> MEDIA_INTERFACE_DESCRIPTOR_LOAD *genCmdCast(void *buffer) { auto pCmd = reinterpret_cast(buffer); return MEDIA_INTERFACE_DESCRIPTOR_LOAD::COMMAND_TYPE_GFXPIPE == pCmd->TheStructure.Common.CommandType && MEDIA_INTERFACE_DESCRIPTOR_LOAD::PIPELINE_MEDIA == pCmd->TheStructure.Common.Pipeline && MEDIA_INTERFACE_DESCRIPTOR_LOAD::MEDIA_COMMAND_OPCODE_MEDIA_INTERFACE_DESCRIPTOR_LOAD == pCmd->TheStructure.Common.MediaCommandOpcode && MEDIA_INTERFACE_DESCRIPTOR_LOAD::SUBOPCODE_MEDIA_INTERFACE_DESCRIPTOR_LOAD_SUBOP == pCmd->TheStructure.Common.Subopcode ? pCmd : nullptr; } template <> MEDIA_VFE_STATE *genCmdCast(void *buffer) { auto pCmd = reinterpret_cast(buffer); return MEDIA_VFE_STATE::COMMAND_TYPE_GFXPIPE == pCmd->TheStructure.Common.CommandType && MEDIA_VFE_STATE::PIPELINE_MEDIA == pCmd->TheStructure.Common.Pipeline && MEDIA_VFE_STATE::MEDIA_COMMAND_OPCODE_MEDIA_VFE_STATE == pCmd->TheStructure.Common.MediaCommandOpcode && MEDIA_VFE_STATE::SUBOPCODE_MEDIA_VFE_STATE_SUBOP == pCmd->TheStructure.Common.Subopcode ? pCmd : nullptr; } template <> MEDIA_STATE_FLUSH *genCmdCast(void *buffer) { auto pCmd = reinterpret_cast(buffer); return MEDIA_STATE_FLUSH::COMMAND_TYPE_GFXPIPE == pCmd->TheStructure.Common.CommandType && MEDIA_STATE_FLUSH::PIPELINE_MEDIA == pCmd->TheStructure.Common.Pipeline && MEDIA_STATE_FLUSH::MEDIA_COMMAND_OPCODE_MEDIA_STATE_FLUSH == pCmd->TheStructure.Common.MediaCommandOpcode && MEDIA_STATE_FLUSH::SUBOPCODE_MEDIA_STATE_FLUSH_SUBOP == pCmd->TheStructure.Common.Subopcode ? pCmd : nullptr; } template bool CmdParse::parseCommandBuffer(GenCmdList &cmds, void *buffer, size_t length) { if (!buffer || length % sizeof(uint32_t)) { return false; } void *bufferEnd = reinterpret_cast(buffer) + length; while (buffer < bufferEnd) { size_t length = getCommandLength(buffer); if (!length) { return false; } cmds.push_back(buffer); buffer = reinterpret_cast(buffer) + length; } return buffer == bufferEnd; } // MIDL should have a MSF between it and a previous walker template <> template <> void CmdParse::validateCommand(GenCmdList::iterator itorBegin, GenCmdList::iterator itorEnd) { auto itorCurrent = itorBegin; auto itorWalker = itorEnd; // Find last GPGPU_WALKER prior to itorCmd while (itorCurrent != itorEnd) { if (genCmdCast(*itorCurrent)) { itorWalker = itorCurrent; } ++itorCurrent; } // If we don't find a GPGPU_WALKER, assume the beginning of a cmd list itorWalker = itorWalker == itorEnd ? itorBegin : itorWalker; // Look for MEDIA_STATE_FLUSH between last GPGPU_WALKER and MIDL. auto itorMSF = itorEnd; itorCurrent = itorWalker; ++itorCurrent; while (itorCurrent != itorEnd) { if (genCmdCast(*itorCurrent)) { itorMSF = itorCurrent; break; } ++itorCurrent; } ASSERT_FALSE(itorMSF == itorEnd) << "A MEDIA_STATE_FLUSH is required before a MEDIA_INTERFACE_DESCRIPTOR_LOAD."; } template <> template <> void CmdParse::validateCommand(GenCmdList::iterator itorBegin, GenCmdList::iterator itorEnd) { } // MVFES should have a stalling PC between it and a previous walker template <> template <> void CmdParse::validateCommand(GenCmdList::iterator itorBegin, GenCmdList::iterator itorEnd) { auto itorCurrent = itorBegin; auto itorWalker = itorEnd; // Find last GPGPU_WALKER prior to itorCmd while (itorCurrent != itorEnd) { if (genCmdCast(*itorCurrent)) { itorWalker = itorCurrent; } ++itorCurrent; } // If we don't find a GPGPU_WALKER, assume the beginning of a cmd list itorWalker = itorWalker == itorEnd ? itorBegin : itorWalker; // Look for PIPE_CONTROL between last GPGPU_WALKER and MVFES. itorCurrent = itorWalker; ++itorCurrent; while (itorCurrent != itorEnd) { if (genCmdCast(*itorCurrent)) { auto pPC = genCmdCast(*itorCurrent); if (pPC->getCommandStreamerStallEnable()) { return; } } ++itorCurrent; } ASSERT_TRUE(false) << "A PIPE_CONTROL w/ CS stall is required before a MEDIA_VFE_STATE."; } compute-runtime-20.13.16352/opencl/test/unit_test/gen_common/cmd_parse_mi_arb.inl000066400000000000000000000007271363734646600277330ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ template <> MI_ARB_CHECK *genCmdCast(void *buffer) { auto pCmd = reinterpret_cast(buffer); return MI_ARB_CHECK::COMMAND_TYPE_MI_COMMAND == pCmd->TheStructure.Common.CommandType && MI_ARB_CHECK::MI_COMMAND_OPCODE_MI_ARB_CHECK == pCmd->TheStructure.Common.MiCommandOpcode ? pCmd : nullptr; } compute-runtime-20.13.16352/opencl/test/unit_test/gen_common/cmd_parse_sip.inl000066400000000000000000000030601363734646600272660ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ // clang-format off using namespace NEO; using GPGPU_CSR_BASE_ADDRESS = GenStruct::GPGPU_CSR_BASE_ADDRESS; using STATE_SIP = GenStruct::STATE_SIP; // clang-format on template <> GPGPU_CSR_BASE_ADDRESS *genCmdCast(void *buffer) { auto pCmd = reinterpret_cast(buffer); return GPGPU_CSR_BASE_ADDRESS::COMMAND_TYPE_GFXPIPE == pCmd->TheStructure.Common.CommandType && GPGPU_CSR_BASE_ADDRESS::COMMAND_SUBTYPE_GFXPIPE_COMMON == pCmd->TheStructure.Common.CommandSubtype && GPGPU_CSR_BASE_ADDRESS::_3D_COMMAND_OPCODE_GFXPIPE_NONPIPELINED == pCmd->TheStructure.Common._3DCommandOpcode && GPGPU_CSR_BASE_ADDRESS::_3D_COMMAND_SUB_OPCODE_GPGPU_CSR_BASE_ADDRESS == pCmd->TheStructure.Common._3DCommandSubOpcode ? pCmd : nullptr; } template <> STATE_SIP *genCmdCast(void *buffer) { auto pCmd = reinterpret_cast(buffer); return STATE_SIP::COMMAND_TYPE_GFXPIPE == pCmd->TheStructure.Common.CommandType && STATE_SIP::COMMAND_SUBTYPE_GFXPIPE_COMMON == pCmd->TheStructure.Common.CommandSubtype && STATE_SIP::_3D_COMMAND_OPCODE_GFXPIPE_NONPIPELINED == pCmd->TheStructure.Common._3DCommandOpcode && STATE_SIP::_3D_COMMAND_SUB_OPCODE_STATE_SIP == pCmd->TheStructure.Common._3DCommandSubOpcode ? pCmd : nullptr; } compute-runtime-20.13.16352/opencl/test/unit_test/gen_common/exclude_tests/000077500000000000000000000000001363734646600266265ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gen_common/exclude_tests/exclude_test_declare.cpp000066400000000000000000000020251363734646600335000ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include "test.h" #include template struct ExcludeTest : ::testing::Test { void SetUp() override { EXPECT_NE(prohibitedValue, ::productFamily); } void TearDown() override { EXPECT_NE(prohibitedValue, ::productFamily); } }; using ExcludeTestBdw = ExcludeTest; HWCMDTEST_F(IGFX_GEN8_CORE, ExcludeTestBdw, givenHwCmdTestWhenBdwExcludedDontRunOnBdw) { EXPECT_NE(IGFX_BROADWELL, ::productFamily); } HWTEST_F(ExcludeTestBdw, givenHwTestWhenBdwExcludedDontRunOnBdw) { EXPECT_NE(IGFX_BROADWELL, ::productFamily); } using ExcludeTestSkl = ExcludeTest; HWCMDTEST_F(IGFX_GEN8_CORE, ExcludeTestSkl, givenHwCmdTestWhenSklExcludedDontRunOnSkl) { EXPECT_NE(IGFX_SKYLAKE, ::productFamily); } HWTEST_F(ExcludeTestSkl, givenHwTestWhenSklExcludedDontRunOnSkl) { EXPECT_NE(IGFX_SKYLAKE, ::productFamily); } compute-runtime-20.13.16352/opencl/test/unit_test/gen_common/exclude_tests/exclude_test_exclude.cpp000066400000000000000000000007701363734646600335370ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test.h" HWCMDTEST_EXCLUDE_FAMILY(ExcludeTestBdw, givenHwCmdTestWhenBdwExcludedDontRunOnBdw, IGFX_BROADWELL); HWCMDTEST_EXCLUDE_FAMILY(ExcludeTestBdw, givenHwTestWhenBdwExcludedDontRunOnBdw, IGFX_BROADWELL); HWCMDTEST_EXCLUDE_FAMILY(ExcludeTestSkl, givenHwCmdTestWhenSklExcludedDontRunOnSkl, IGFX_SKYLAKE); HWCMDTEST_EXCLUDE_FAMILY(ExcludeTestSkl, givenHwTestWhenSklExcludedDontRunOnSkl, IGFX_SKYLAKE); compute-runtime-20.13.16352/opencl/test/unit_test/gen_common/gen_cmd_parse.h000066400000000000000000000064251363734646600267210ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/hw_cmds.h" #include #include typedef std::list GenCmdList; template Type genCmdCast(void *cmd); template static inline GenCmdList::iterator find(GenCmdList::iterator itorStart, GenCmdList::const_iterator itorEnd) { GenCmdList::iterator itor = itorStart; while (itor != itorEnd) { if (genCmdCast(*itor)) break; ++itor; } return itor; } template static inline std::vector findAll(GenCmdList::iterator commandListStart, GenCmdList::const_iterator commandListEnd) { std::vector matchedCommands; GenCmdList::iterator currentCommand = commandListStart; while (currentCommand != commandListEnd) { if (genCmdCast(*currentCommand)) { matchedCommands.push_back(currentCommand); } ++currentCommand; } return matchedCommands; } template static inline GenCmdList::iterator findMmio(GenCmdList::iterator itorStart, GenCmdList::const_iterator itorEnd, uint32_t regOffset) { GenCmdList::iterator itor = itorStart; while (itor != itorEnd) { auto cmd = genCmdCast(*itor); if (cmd && cmd->getRegisterOffset() == regOffset) break; ++itor; } return itor; } template static inline size_t countMmio(GenCmdList::iterator itorStart, GenCmdList::const_iterator itorEnd, uint32_t regOffset) { size_t count = 0; GenCmdList::iterator itor = itorStart; while (itor != itorEnd) { auto cmd = genCmdCast(*itor); if (cmd && cmd->getRegisterOffset() == regOffset) { ++count; } ++itor; } return count; } template static inline typename FamilyType::MI_LOAD_REGISTER_IMM *findMmioCmd(GenCmdList::iterator itorStart, GenCmdList::const_iterator itorEnd, uint32_t regOffset) { auto itor = findMmio(itorStart, itorEnd, regOffset); if (itor == itorEnd) { return nullptr; } return reinterpret_cast(*itor); } template static inline GenCmdList::reverse_iterator reverse_find(GenCmdList::reverse_iterator itorStart, GenCmdList::const_reverse_iterator itorEnd) { GenCmdList::reverse_iterator itor = itorStart; while (itor != itorEnd) { if (genCmdCast(*itor)) break; ++itor; } return itor; } template struct CmdParse : public T { static size_t getCommandLength(void *cmd); static size_t getCommandLengthHwSpecific(void *cmd); static size_t getAdditionalCommandLength(void *cmd); static bool parseCommandBuffer(GenCmdList &cmds, void *buffer, size_t length); template static void validateCommand(GenCmdList::iterator itorBegin, GenCmdList::iterator itorEnd); static const char *getCommandName(void *cmd); static const char *getCommandNameHwSpecific(void *cmd); static const char *getAdditionalCommandName(void *cmd); }; compute-runtime-20.13.16352/opencl/test/unit_test/gen_common/gen_commands_common_validation.h000066400000000000000000000114071363734646600323430ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/memory_manager/memory_constants.h" #include "opencl/test/unit_test/gen_common/gen_cmd_parse.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "gtest/gtest.h" #include namespace NEO { template void validateStateBaseAddress(uint64_t internalHeapBase, IndirectHeap *pDSH, IndirectHeap *pIOH, IndirectHeap *pSSH, GenCmdList::iterator &startCommand, GenCmdList::iterator &endCommand, GenCmdList &cmdList, uint64_t expectedGeneralStateHeapBaseAddress) { typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; // All state should be programmed before walker auto itorCmd = find(startCommand, endCommand); ASSERT_NE(endCommand, itorCmd); auto *cmd = (STATE_BASE_ADDRESS *)*itorCmd; // Verify all addresses are getting programmed EXPECT_TRUE(cmd->getDynamicStateBaseAddressModifyEnable()); EXPECT_TRUE(cmd->getGeneralStateBaseAddressModifyEnable()); EXPECT_TRUE(cmd->getSurfaceStateBaseAddressModifyEnable()); EXPECT_TRUE(cmd->getIndirectObjectBaseAddressModifyEnable()); EXPECT_TRUE(cmd->getInstructionBaseAddressModifyEnable()); EXPECT_EQ(pDSH->getGraphicsAllocation()->getGpuAddress(), cmd->getDynamicStateBaseAddress()); // Stateless accesses require GSH.base to be 0. EXPECT_EQ(expectedGeneralStateHeapBaseAddress, cmd->getGeneralStateBaseAddress()); EXPECT_EQ(pSSH->getGraphicsAllocation()->getGpuAddress(), cmd->getSurfaceStateBaseAddress()); EXPECT_EQ(pIOH->getGraphicsAllocation()->getGpuBaseAddress(), cmd->getIndirectObjectBaseAddress()); EXPECT_EQ(internalHeapBase, cmd->getInstructionBaseAddress()); // Verify all sizes are getting programmed EXPECT_TRUE(cmd->getDynamicStateBufferSizeModifyEnable()); EXPECT_TRUE(cmd->getGeneralStateBufferSizeModifyEnable()); EXPECT_TRUE(cmd->getIndirectObjectBufferSizeModifyEnable()); EXPECT_TRUE(cmd->getInstructionBufferSizeModifyEnable()); EXPECT_EQ(pDSH->getMaxAvailableSpace(), cmd->getDynamicStateBufferSize() * MemoryConstants::pageSize); EXPECT_NE(0u, cmd->getGeneralStateBufferSize()); EXPECT_EQ(MemoryConstants::sizeOf4GBinPageEntities, cmd->getIndirectObjectBufferSize()); EXPECT_EQ(MemoryConstants::sizeOf4GBinPageEntities, cmd->getInstructionBufferSize()); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorCmd); } template void validateL3Programming(GenCmdList &cmdList, GenCmdList::iterator &itorWalker) { typedef typename FamilyType::PARSE PARSE; typedef typename PARSE::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; auto itorCmd = findMmio(cmdList.begin(), itorWalker, L3CNTLRegisterOffset::registerOffset); if (UnitTestHelper::isL3ConfigProgrammable()) { // All state should be programmed before walker ASSERT_NE(itorWalker, itorCmd); auto *cmd = genCmdCast(*itorCmd); ASSERT_NE(nullptr, cmd); auto registerOffset = L3CNTLRegisterOffset::registerOffset; EXPECT_EQ(registerOffset, cmd->getRegisterOffset()); auto l3Cntlreg = cmd->getDataDword(); auto numURBWays = (l3Cntlreg >> 1) & 0x7f; auto L3ClientPool = (l3Cntlreg >> 25) & 0x7f; EXPECT_NE(0u, numURBWays); EXPECT_NE(0u, L3ClientPool); } else { ASSERT_EQ(itorWalker, itorCmd); } } template void validateMediaVFEState(const HardwareInfo *hwInfo, void *cmdMediaVfeState, GenCmdList &cmdList, GenCmdList::iterator itorMediaVfeState) { typedef typename FamilyType::MEDIA_VFE_STATE MEDIA_VFE_STATE; auto *cmd = (MEDIA_VFE_STATE *)cmdMediaVfeState; ASSERT_NE(nullptr, cmd); uint32_t threadPerEU = (hwInfo->gtSystemInfo.ThreadCount / hwInfo->gtSystemInfo.EUCount) + hwInfo->capabilityTable.extraQuantityThreadsPerEU; uint32_t expected = hwInfo->gtSystemInfo.EUCount * threadPerEU; EXPECT_EQ(expected, cmd->getMaximumNumberOfThreads()); EXPECT_NE(0u, cmd->getNumberOfUrbEntries()); EXPECT_NE(0u, cmd->getUrbEntryAllocationSize()); EXPECT_EQ(0u, cmd->getScratchSpaceBasePointer()); EXPECT_EQ(0u, cmd->getPerThreadScratchSpace()); EXPECT_EQ(0u, cmd->getStackSize()); // Generically validate this command FamilyType::PARSE::template validateCommand(cmdList.begin(), itorMediaVfeState); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/gen_common/hw_cmds_tests.cpp000066400000000000000000000020571363734646600273310ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_cmds.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "test.h" using namespace NEO; using InterfaceDescriptorDataTests = ::testing::Test; HWCMDTEST_F(IGFX_GEN8_CORE, InterfaceDescriptorDataTests, givenVariousValuesWhenCallingSetBarrierEnableThenCorrectValueIsSet) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData; MockDevice device; auto hwInfo = device.getHardwareInfo(); HardwareCommandsHelper::programBarrierEnable(&idd, 0, hwInfo); EXPECT_FALSE(idd.getBarrierEnable()); HardwareCommandsHelper::programBarrierEnable(&idd, 1, hwInfo); EXPECT_TRUE(idd.getBarrierEnable()); HardwareCommandsHelper::programBarrierEnable(&idd, 2, hwInfo); EXPECT_TRUE(idd.getBarrierEnable()); } compute-runtime-20.13.16352/opencl/test/unit_test/gen_common/matchers.h000066400000000000000000000010551363734646600257330ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "gmock/gmock.h" #include #include MATCHER_P2(MemCompare, memory, size, "") { return memcmp(arg, memory, size) == 0; } MATCHER_P(MemoryZeroed, size, "") { size_t sizeLeft = (size_t)size; bool memoryZeroed = true; while (--sizeLeft) { uint8_t *pMem = (uint8_t *)arg; if (pMem[sizeLeft] != 0) { memoryZeroed = false; break; } } return memoryZeroed; } compute-runtime-20.13.16352/opencl/test/unit_test/global_environment.cpp000066400000000000000000000045251363734646600262300ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/global_environment.h" #include "shared/source/helpers/hw_info.h" #include "opencl/source/os_interface/os_inc_base.h" TestEnvironment::TestEnvironment() { igcDebugVarStack.reserve(3); fclDebugVarStack.reserve(3); } void TestEnvironment::SetUp() { mockCompilerGuard.Enable(); fclPushDebugVars(fclDefaultDebugVars); igcPushDebugVars(igcDefaultDebugVars); if (libraryOS == nullptr) { libraryOS = setAdapterInfo(&hwInfoDefaultDebugVars.platform, &hwInfoDefaultDebugVars.gtSystemInfo, hwInfoDefaultDebugVars.capabilityTable.gpuAddressSpace); } } void TestEnvironment::TearDown() { delete libraryFrontEnd; delete libraryIGC; if (libraryOS != nullptr) { delete libraryOS; libraryOS = nullptr; } mockCompilerGuard.Disable(); } void TestEnvironment::fclPushDebugVars( MockCompilerDebugVars &newDebugVars) { fclDebugVarStack.push_back(newDebugVars); NEO::setFclDebugVars(newDebugVars); } void TestEnvironment::fclPopDebugVars() { fclDebugVarStack.pop_back(); if (fclDebugVarStack.empty()) { NEO::clearFclDebugVars(); } else { NEO::setFclDebugVars(fclDebugVarStack.back()); } } void TestEnvironment::igcPushDebugVars( MockCompilerDebugVars &newDebugVars) { igcDebugVarStack.push_back(newDebugVars); NEO::setIgcDebugVars(newDebugVars); } void TestEnvironment::igcPopDebugVars() { igcDebugVarStack.pop_back(); if (igcDebugVarStack.empty()) { NEO::clearIgcDebugVars(); } else { NEO::setIgcDebugVars(igcDebugVarStack.back()); } } void TestEnvironment::setDefaultDebugVars( MockCompilerDebugVars &fclDefaults, MockCompilerDebugVars &igcDefaults, HardwareInfo &hwInfo) { fclDefaultDebugVars = fclDefaults; igcDefaultDebugVars = igcDefaults; hwInfoDefaultDebugVars = hwInfo; } void TestEnvironment::setMockFileNames( std::string &fclMockFile, std::string &igcMockFile) { this->fclMockFile = fclMockFile; this->igcMockFile = igcMockFile; } std::string &TestEnvironment::fclGetMockFile() { return this->fclMockFile; } std::string &TestEnvironment::igcGetMockFile() { return this->igcMockFile; } compute-runtime-20.13.16352/opencl/test/unit_test/global_environment.h000066400000000000000000000034501363734646600256710ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/os_library.h" #include "opencl/test/unit_test/mocks/mock_compilers.h" #include "gtest/gtest.h" using namespace NEO; OsLibrary *setAdapterInfo(const PLATFORM *platform, const GT_SYSTEM_INFO *gtSystemInfo, uint64_t gpuAddressSpace); class TestEnvironment : public ::testing::Environment { public: TestEnvironment(); void SetUp() override; void TearDown() override; virtual void fclPushDebugVars( MockCompilerDebugVars &newDebugVars); virtual void fclPopDebugVars(); virtual void igcPushDebugVars( MockCompilerDebugVars &newDebugVars); virtual void igcPopDebugVars(); virtual void setDefaultDebugVars( MockCompilerDebugVars &fclDefaults, MockCompilerDebugVars &igcDefaults, HardwareInfo &hwInfo); virtual void setMockFileNames( std::string &fclMockFile, std::string &igcMockFile); virtual std::string &fclGetMockFile(); virtual std::string &igcGetMockFile(); protected: OsLibrary *libraryFrontEnd = nullptr; OsLibrary *libraryIGC = nullptr; OsLibrary *libraryOS = nullptr; std::vector igcDebugVarStack; std::vector fclDebugVarStack; void (*igcSetDebugVarsFPtr)(MockCompilerDebugVars &debugVars); void (*fclSetDebugVarsFptr)(MockCompilerDebugVars &debugVars); MockCompilerDebugVars fclDefaultDebugVars{}; MockCompilerDebugVars igcDefaultDebugVars{}; HardwareInfo hwInfoDefaultDebugVars{}; std::string fclMockFile{}; std::string igcMockFile{}; MockCompilerEnableGuard mockCompilerGuard{}; }; extern TestEnvironment *gEnvironment; compute-runtime-20.13.16352/opencl/test/unit_test/gmm_helper/000077500000000000000000000000001363734646600237515ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gmm_helper/CMakeLists.txt000066400000000000000000000004571363734646600265170ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gmm_helper ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/gmm_helper_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gmm_helper}) add_subdirectories() compute-runtime-20.13.16352/opencl/test/unit_test/gmm_helper/gmm_helper_tests.cpp000066400000000000000000001100321363734646600300130ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/sku_info/operations/sku_info_transfer.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/helpers/gmm_types_converter.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_gmm.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "GL/gl.h" #include "GL/glext.h" #include "gmm_client_context.h" #include "gtest/gtest.h" #include "igfxfmid.h" using namespace ::testing; namespace NEO { extern GMM_INIT_IN_ARGS passedInputArgs; extern SKU_FEATURE_TABLE passedFtrTable; extern WA_TABLE passedWaTable; extern bool copyInputArgs; struct GmmTests : public ::testing::Test { void SetUp() override { executionEnvironment = platform()->peekExecutionEnvironment(); rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[0].get(); } ExecutionEnvironment *executionEnvironment = nullptr; RootDeviceEnvironment *rootDeviceEnvironment = nullptr; }; TEST(GmmGlTests, givenGmmWhenAskedforCubeFaceIndexThenProperValueIsReturned) { std::vector> v = {{__GMM_CUBE_FACE_NEG_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_X}, {__GMM_CUBE_FACE_POS_X, GL_TEXTURE_CUBE_MAP_POSITIVE_X}, {__GMM_CUBE_FACE_NEG_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y}, {__GMM_CUBE_FACE_POS_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Y}, {__GMM_CUBE_FACE_NEG_Z, GL_TEXTURE_CUBE_MAP_NEGATIVE_Z}, {__GMM_CUBE_FACE_POS_Z, GL_TEXTURE_CUBE_MAP_POSITIVE_Z}}; uint32_t maxVal = 0; for (auto p : v) { EXPECT_TRUE(p.first == GmmTypesConverter::getCubeFaceIndex(p.second)); maxVal = std::max(maxVal, p.second); } maxVal++; EXPECT_TRUE(__GMM_NO_CUBE_MAP == GmmTypesConverter::getCubeFaceIndex(maxVal)); } TEST_F(GmmTests, WhenGmmIsCreatedThenAllResourceAreCreated) { std::unique_ptr mm(new MemoryManagerCreate(false, false, *executionEnvironment)); void *pSysMem = mm->allocateSystemMemory(4096, 4096); std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), pSysMem, 4096, false)); ASSERT_TRUE(gmm->gmmResourceInfo.get() != nullptr); void *pGmmSysMem = gmm->gmmResourceInfo->getSystemMemPointer(); EXPECT_EQ(gmm->resourceParams.Flags.Gpu.NoRestriction, 0u); EXPECT_TRUE(pSysMem == pGmmSysMem); mm->freeSystemMemory(pSysMem); } TEST_F(GmmTests, GivenUncacheableWhenGmmIsCreatedThenAllResourceAreCreated) { std::unique_ptr mm(new MemoryManagerCreate(false, false, *executionEnvironment)); void *pSysMem = mm->allocateSystemMemory(4096, 4096); std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), pSysMem, 4096, true)); ASSERT_TRUE(gmm->gmmResourceInfo.get() != nullptr); void *pGmmSysMem = gmm->gmmResourceInfo->getSystemMemPointer(); EXPECT_EQ(gmm->resourceParams.Flags.Gpu.NoRestriction, 0u); EXPECT_TRUE(pSysMem == pGmmSysMem); EXPECT_EQ(GMM_RESOURCE_USAGE_OCL_BUFFER_CSR_UC, gmm->resourceParams.Usage); mm->freeSystemMemory(pSysMem); } TEST_F(GmmTests, givenHostPointerWithHighestBitSetWhenGmmIsCreatedItHasTheSameAddress) { uintptr_t addressWithHighestBitSet = 0xffff0000; auto address = reinterpret_cast(addressWithHighestBitSet); auto expectedAddress = castToUint64(address); std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), address, 4096, false)); EXPECT_EQ(gmm->resourceParams.pExistingSysMem, expectedAddress); } TEST_F(GmmTests, GivenBufferSizeLargerThenMaxPitchWhenAskedForGmmCreationThenGmmResourceIsCreatedWithNoRestrictionsFlag) { auto maxSize = static_cast(GmmHelper::maxPossiblePitch); MemoryManager *mm = new MemoryManagerCreate(false, false, *executionEnvironment); void *pSysMem = mm->allocateSystemMemory(4096, 4096); auto gmmRes = new Gmm(rootDeviceEnvironment->getGmmClientContext(), pSysMem, maxSize, false); ASSERT_TRUE(gmmRes->gmmResourceInfo.get() != nullptr); EXPECT_EQ(gmmRes->resourceParams.Flags.Gpu.NoRestriction, 1u); mm->freeSystemMemory(pSysMem); delete gmmRes; delete mm; } TEST_F(GmmTests, givenGmmCreatedFromExistingGmmThenHelperDoesNotReleaseParentGmm) { auto size = 4096u; void *incomingPtr = (void *)0x1000; auto gmmRes = new Gmm(rootDeviceEnvironment->getGmmClientContext(), incomingPtr, size, false); auto gmmRes2 = new Gmm(rootDeviceEnvironment->getGmmClientContext(), gmmRes->gmmResourceInfo->peekHandle()); //copy is being made EXPECT_NE(gmmRes2->gmmResourceInfo->peekHandle(), gmmRes->gmmResourceInfo->peekHandle()); auto allocationSize = gmmRes->gmmResourceInfo->getSizeAllocation(); EXPECT_NE(0u, allocationSize); EXPECT_EQ(allocationSize, gmmRes2->gmmResourceInfo->getSizeAllocation()); //now delete parent GMM and query child, this shouldn't fail delete gmmRes; EXPECT_EQ(allocationSize, gmmRes2->gmmResourceInfo->getSizeAllocation()); delete gmmRes2; } TEST_F(GmmTests, GivenInvalidImageSizeWhenQueryingImgParamsThenImageInfoReturnsSizeZero) { cl_image_desc imgDesc = {CL_MEM_OBJECT_IMAGE2D}; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); auto queryGmm = MockGmm::queryImgParams(rootDeviceEnvironment->getGmmClientContext(), imgInfo); EXPECT_EQ(imgInfo.size, 0u); } TEST_F(GmmTests, GivenInvalidImageTypeWhenQueryingImgParamsThenExceptionIsThrown) { cl_image_desc imgDesc{}; imgDesc.image_width = 10; imgDesc.image_type = 0; // invalid auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); EXPECT_THROW(MockGmm::queryImgParams(rootDeviceEnvironment->getGmmClientContext(), imgInfo), std::exception); } TEST_F(GmmTests, WhenQueryingImgParamsThenCorrectValuesAreReturned) { const HardwareInfo *hwinfo = defaultHwInfo.get(); cl_image_desc imgDesc{}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE3D; imgDesc.image_width = 17; imgDesc.image_height = 17; imgDesc.image_depth = 17; size_t pixelSize = 4; size_t minSize = imgDesc.image_width * imgDesc.image_height * imgDesc.image_depth * pixelSize; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); auto queryGmm = MockGmm::queryImgParams(rootDeviceEnvironment->getGmmClientContext(), imgInfo); EXPECT_GT(imgInfo.size, minSize); EXPECT_GT(imgInfo.rowPitch, 0u); EXPECT_GT(imgInfo.slicePitch, 0u); if (hwinfo->platform.eRenderCoreFamily == IGFX_GEN8_CORE) { EXPECT_EQ(imgInfo.qPitch, 0u); } else { EXPECT_GT(imgInfo.qPitch, 0u); } auto &hwHelper = HwHelper::get(hwinfo->platform.eRenderCoreFamily); EXPECT_EQ(queryGmm->resourceParams.Type, GMM_RESOURCE_TYPE::RESOURCE_3D); EXPECT_EQ(queryGmm->resourceParams.NoGfxMemory, 1u); EXPECT_EQ(queryGmm->resourceParams.Usage, GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_IMAGE); EXPECT_EQ(static_cast(queryGmm->resourceParams.Format), static_cast(GMM_RESOURCE_FORMAT::GMM_FORMAT_R8G8B8A8_UNORM)); EXPECT_EQ(queryGmm->resourceParams.Flags.Gpu.Texture, 1u); EXPECT_EQ(queryGmm->resourceParams.BaseWidth64, 17u); EXPECT_EQ(queryGmm->resourceParams.BaseHeight, 17u); EXPECT_EQ(queryGmm->resourceParams.Depth, 17u); EXPECT_EQ(queryGmm->resourceParams.ArraySize, 1u); EXPECT_EQ(!!queryGmm->resourceParams.Flags.Wa.__ForceOtherHVALIGN4, hwHelper.hvAlign4Required()); } TEST_F(GmmTests, givenWidthWhenCreatingResourceThenSetWidth64Field) { const void *dummyPtr = reinterpret_cast(0x123); size_t allocationSize = std::numeric_limits::max(); Gmm gmm(rootDeviceEnvironment->getGmmClientContext(), dummyPtr, allocationSize, false); EXPECT_EQ(static_cast(allocationSize), gmm.resourceParams.BaseWidth64); } TEST_F(GmmTests, givenNullptrWhenGmmConstructorIsCalledThenNoGfxMemoryIsProperlySet) { void *pSysMem = nullptr; std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), pSysMem, 4096, false)); EXPECT_EQ(gmm->resourceParams.NoGfxMemory, 1u); } TEST_F(GmmTests, givenPtrWhenGmmConstructorIsCalledThenNoGfxMemoryIsProperlySet) { void *pSysMem = reinterpret_cast(0x1111); std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), pSysMem, 4096, false)); EXPECT_EQ(gmm->resourceParams.NoGfxMemory, 0u); } TEST_F(GmmTests, given2DimageFromBufferParametersWhenGmmResourceIsCreatedThenItHasDesiredPitchAndSize) { cl_image_desc imgDesc{}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imgDesc.image_width = 329; imgDesc.image_height = 349; imgDesc.image_depth = 1; imgDesc.image_row_pitch = 5312; imgDesc.buffer = (cl_mem)0x1000; ClSurfaceFormatInfo surfaceFormat = {{CL_RGBA, CL_FLOAT}, {GMM_FORMAT_R32G32B32A32_FLOAT_TYPE, (GFX3DSTATE_SURFACEFORMAT)0, 0, 4, 4, 16}}; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, &surfaceFormat); auto queryGmm = MockGmm::queryImgParams(rootDeviceEnvironment->getGmmClientContext(), imgInfo); auto renderSize = queryGmm->gmmResourceInfo->getSizeAllocation(); size_t expectedSize = imgDesc.image_row_pitch * imgDesc.image_height; EXPECT_GE(renderSize, expectedSize); EXPECT_EQ(imgDesc.image_row_pitch, queryGmm->gmmResourceInfo->getRenderPitch()); } TEST_F(GmmTests, given2DimageFromBufferParametersWhenGmmResourceIsCreatedAndPitchIsOverridenThenItHasDesiredPitchAndSize) { cl_image_desc imgDesc{}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imgDesc.image_width = 329; imgDesc.image_height = 349; imgDesc.image_depth = 1; imgDesc.image_row_pitch = 5376; imgDesc.buffer = (cl_mem)0x1000; ClSurfaceFormatInfo surfaceFormat = {{CL_RGBA, CL_FLOAT}, {GMM_FORMAT_R32G32B32A32_FLOAT_TYPE, (GFX3DSTATE_SURFACEFORMAT)0, 0, 4, 4, 16}}; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, &surfaceFormat); EXPECT_EQ(imgInfo.imgDesc.imageRowPitch, imgDesc.image_row_pitch); auto queryGmm = MockGmm::queryImgParams(rootDeviceEnvironment->getGmmClientContext(), imgInfo); auto renderSize = queryGmm->gmmResourceInfo->getSizeAllocation(); size_t expectedSize = imgDesc.image_row_pitch * imgDesc.image_height; EXPECT_GE(renderSize, expectedSize); EXPECT_EQ(imgDesc.image_row_pitch, queryGmm->gmmResourceInfo->getRenderPitch()); } TEST_F(GmmTests, givenPlanarFormatsWhenQueryingImageParamsThenUvOffsetIsQueried) { cl_image_desc imgDesc{}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imgDesc.image_width = 4; imgDesc.image_height = 4; imgDesc.image_depth = 1; ClSurfaceFormatInfo surfaceFormatNV12 = {{CL_NV12_INTEL, CL_UNORM_INT8}, {GMM_FORMAT_NV12, GFX3DSTATE_SURFACEFORMAT_NV12, 0, 1, 1, 1}}; ClSurfaceFormatInfo surfaceFormatP010 = {{CL_R, CL_UNORM_INT16}, {GMM_FORMAT_P010, GFX3DSTATE_SURFACEFORMAT_NV12, 0, 1, 2, 2}}; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, &surfaceFormatNV12); imgInfo.yOffsetForUVPlane = 0; MockGmm::queryImgParams(rootDeviceEnvironment->getGmmClientContext(), imgInfo); EXPECT_NE(0u, imgInfo.yOffsetForUVPlane); imgInfo = MockGmm::initImgInfo(imgDesc, 0, &surfaceFormatP010); imgInfo.yOffsetForUVPlane = 0; MockGmm::queryImgParams(rootDeviceEnvironment->getGmmClientContext(), imgInfo); EXPECT_NE(0u, imgInfo.yOffsetForUVPlane); } TEST_F(GmmTests, givenTilingModeSetToTileYWhenHwSupportsTilingThenTileYFlagIsSet) { cl_image_desc imgDesc{}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imgDesc.image_width = 4; imgDesc.image_height = 4; imgDesc.image_depth = 1; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); imgInfo.linearStorage = false; auto gmm = std::make_unique(rootDeviceEnvironment->getGmmClientContext(), imgInfo, StorageInfo{}); EXPECT_EQ(gmm->resourceParams.Flags.Info.Linear, 0u); EXPECT_EQ(gmm->resourceParams.Flags.Info.TiledY, 0u); } TEST_F(GmmTests, givenTilingModeSetToNonTiledWhenCreatingGmmThenLinearFlagIsSet) { cl_image_desc imgDesc{}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imgDesc.image_width = 4; imgDesc.image_height = 4; imgDesc.image_depth = 1; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); imgInfo.linearStorage = true; auto gmm = std::make_unique(rootDeviceEnvironment->getGmmClientContext(), imgInfo, StorageInfo{}); EXPECT_EQ(gmm->resourceParams.Flags.Info.Linear, 1u); EXPECT_EQ(gmm->resourceParams.Flags.Info.TiledY, 0u); } TEST_F(GmmTests, givenZeroRowPitchWhenQueryImgFromBufferParamsThenCalculate) { MockGraphicsAllocation bufferAllocation(nullptr, 4096); cl_image_desc imgDesc{}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imgDesc.image_width = 5; imgDesc.image_height = 5; imgDesc.image_row_pitch = 0; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); size_t expectedRowPitch = imgDesc.image_width * imgInfo.surfaceFormat->ImageElementSizeInBytes; GmmTypesConverter::queryImgFromBufferParams(imgInfo, &bufferAllocation); EXPECT_EQ(imgInfo.rowPitch, expectedRowPitch); } TEST_F(GmmTests, givenNonZeroRowPitchWhenQueryImgFromBufferParamsThenUseUserValue) { MockGraphicsAllocation bufferAllocation(nullptr, 4096); cl_image_desc imgDesc{}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imgDesc.image_width = 5; imgDesc.image_height = 5; imgDesc.image_row_pitch = 123; size_t expectedRowPitch = imgDesc.image_row_pitch; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); GmmTypesConverter::queryImgFromBufferParams(imgInfo, &bufferAllocation); EXPECT_EQ(imgInfo.rowPitch, expectedRowPitch); } TEST_F(GmmTests, WhenCanonizingThenCorrectAddressIsReturned) { auto hwInfo = *defaultHwInfo; // 48 bit - canonize to 48 bit hwInfo.capabilityTable.gpuAddressSpace = maxNBitValue(48); // 0x0000FFFFFFFFFFFF; auto gmmHelper = std::make_unique(nullptr, &hwInfo); uint64_t testAddr1 = 0x7777777777777777; uint64_t goodAddr1 = 0x0000777777777777; EXPECT_EQ(GmmHelper::canonize(testAddr1), goodAddr1); uint64_t testAddr2 = 0x7FFFFFFFFFFFFFFF; uint64_t goodAddr2 = 0xFFFFFFFFFFFFFFFF; EXPECT_EQ(GmmHelper::canonize(testAddr2), goodAddr2); // 36 bit - also canonize to 48 bit hwInfo.capabilityTable.gpuAddressSpace = maxNBitValue(36); // 0x0000000FFFFFFFFF; gmmHelper = std::make_unique(nullptr, &hwInfo); EXPECT_EQ(GmmHelper::canonize(testAddr1), goodAddr1); EXPECT_EQ(GmmHelper::canonize(testAddr2), goodAddr2); } TEST_F(GmmTests, WhenDecanonizingThenCorrectAddressIsReturned) { auto hwInfo = *defaultHwInfo; // 48 bit - decanonize to 48 bit hwInfo.capabilityTable.gpuAddressSpace = maxNBitValue(48); //0x0000FFFFFFFFFFFF; auto gmmHelper = std::make_unique(nullptr, &hwInfo); uint64_t testAddr1 = 0x7777777777777777; uint64_t goodAddr1 = 0x0000777777777777; EXPECT_EQ(GmmHelper::decanonize(testAddr1), goodAddr1); uint64_t testAddr2 = 0x7FFFFFFFFFFFFFFF; uint64_t goodAddr2 = 0x0000FFFFFFFFFFFF; EXPECT_EQ(GmmHelper::decanonize(testAddr2), goodAddr2); // 36 bit - also decanonize to 48 bit hwInfo.capabilityTable.gpuAddressSpace = maxNBitValue(36); // 0x0000000FFFFFFFFF; gmmHelper = std::make_unique(nullptr, &hwInfo); EXPECT_EQ(GmmHelper::decanonize(testAddr1), goodAddr1); EXPECT_EQ(GmmHelper::decanonize(testAddr2), goodAddr2); } TEST_F(GmmTests, givenMipmapedInputWhenAskedForHalingThenNonDefaultValueIsReturned) { cl_image_desc imgDesc{}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imgDesc.image_width = 60; imgDesc.image_height = 40; imgDesc.image_depth = 1; int mipLevel = 5; auto imgInfo = MockGmm::initImgInfo(imgDesc, mipLevel, nullptr); auto queryGmm = MockGmm::queryImgParams(rootDeviceEnvironment->getGmmClientContext(), imgInfo); EXPECT_EQ(static_cast(queryGmm->resourceParams.MaxLod), mipLevel); } TEST_F(GmmTests, givenNumSamplesWhenAskedForMultisamplesCountThenReturnValue) { uint32_t numSamples[5][2] = {{0, 0}, {2, 1}, {4, 2}, {8, 3}, {16, 4}}; //{given, expected} for (int i = 0; i < 5; i++) { auto result = GmmTypesConverter::getRenderMultisamplesCount(numSamples[i][0]); EXPECT_EQ(numSamples[i][1], result); } } struct GmmMediaCompressedTests : public GmmTests { void SetUp() override { GmmTests::SetUp(); StorageInfo info; gmm = std::make_unique(rootDeviceEnvironment->getGmmClientContext(), nullptr, 4, false, true, true, info); flags = gmm->gmmResourceInfo->getResourceFlags(); flags->Gpu.CCS = true; flags->Gpu.UnifiedAuxSurface = true; } std::unique_ptr gmm; GMM_RESOURCE_FLAG *flags; }; TEST_F(GmmMediaCompressedTests, givenMediaCompressedGmmUnifiedAuxTranslationCapableReturnsTrue) { flags->Info.MediaCompressed = true; flags->Info.RenderCompressed = false; EXPECT_TRUE(gmm->unifiedAuxTranslationCapable()); } TEST_F(GmmMediaCompressedTests, givenRenderCompressedGmmUnifiedAuxTranslationCapableReturnsTrue) { flags->Info.MediaCompressed = false; flags->Info.RenderCompressed = true; EXPECT_TRUE(gmm->unifiedAuxTranslationCapable()); } TEST_F(GmmMediaCompressedTests, givenMediaAndRenderCompressedGmmUnifiedAuxTranslationCapableThrowsException) { flags->Info.MediaCompressed = true; flags->Info.RenderCompressed = true; EXPECT_THROW(gmm->unifiedAuxTranslationCapable(), std::exception); } TEST_F(GmmMediaCompressedTests, givenNotMediaAndNotRenderCompressedGmmUnifiedAuxTranslationCapableReturnsFalse) { flags->Info.MediaCompressed = false; flags->Info.RenderCompressed = false; EXPECT_FALSE(gmm->unifiedAuxTranslationCapable()); } namespace GmmTestConst { static const cl_mem_object_type imgTypes[6] = { CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D}; } // namespace GmmTestConst TEST_F(GmmTests, converNeoPlaneToGmmPlane) { std::vector> v = {{ImagePlane::NO_PLANE, GMM_YUV_PLANE::GMM_NO_PLANE}, {ImagePlane::PLANE_Y, GMM_YUV_PLANE::GMM_PLANE_Y}, {ImagePlane::PLANE_U, GMM_YUV_PLANE::GMM_PLANE_U}, {ImagePlane::PLANE_UV, GMM_YUV_PLANE::GMM_PLANE_U}, {ImagePlane::PLANE_V, GMM_YUV_PLANE::GMM_PLANE_V}}; for (auto p : v) { EXPECT_TRUE(p.second == GmmTypesConverter::convertPlane(p.first)); } } class GmmImgTest : public GmmTests, public ::testing::WithParamInterface {}; INSTANTIATE_TEST_CASE_P( GmmImgTests, GmmImgTest, testing::ValuesIn(GmmTestConst::imgTypes)); TEST_P(GmmImgTest, updateImgInfoAndDesc) { struct MyMockGmmResourceInfo : MockGmmResourceInfo { MyMockGmmResourceInfo(GMM_RESCREATE_PARAMS *resourceCreateParams) : MockGmmResourceInfo(resourceCreateParams) {} GMM_STATUS getOffset(GMM_REQ_OFFSET_INFO &reqOffsetInfo) override { givenReqInfo[getOffsetCalled] = reqOffsetInfo; getOffsetCalled++; return MockGmmResourceInfo::getOffset(reqOffsetInfo); } uint32_t getOffsetCalled = 0u; GMM_REQ_OFFSET_INFO givenReqInfo[2] = {}; }; ImageInfo updateImgInfo = {}; updateImgInfo.plane = GMM_YUV_PLANE::GMM_PLANE_U; uint32_t expectCalls = 1u; GMM_REQ_OFFSET_INFO expectedReqInfo[2] = {}; expectedReqInfo[0].ReqLock = 1; expectedReqInfo[1].ReqRender = 1; expectedReqInfo[1].Plane = updateImgInfo.plane; cl_image_desc imgDesc{}; imgDesc.image_type = GetParam(); imgDesc.image_width = 60; imgDesc.image_height = 1; imgDesc.image_depth = 1; imgDesc.image_array_size = 1; cl_uint arrayIndex = 0; if (imgDesc.image_type == CL_MEM_OBJECT_IMAGE2D || imgDesc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY || imgDesc.image_type == CL_MEM_OBJECT_IMAGE3D) { imgDesc.image_height = 40; } if (imgDesc.image_type == CL_MEM_OBJECT_IMAGE3D) { imgDesc.image_depth = 5; expectCalls = 2u; expectedReqInfo[0].Slice = 1; } if (imgDesc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY || imgDesc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { imgDesc.image_array_size = 5; expectCalls = 2u; arrayIndex = 2; expectedReqInfo[0].ArrayIndex = 1; expectedReqInfo[1].ArrayIndex = arrayIndex; } auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); auto queryGmm = MockGmm::queryImgParams(rootDeviceEnvironment->getGmmClientContext(), imgInfo); auto mockResInfo = new NiceMock(&queryGmm->resourceParams); queryGmm->gmmResourceInfo.reset(mockResInfo); queryGmm->updateImgInfoAndDesc(updateImgInfo, arrayIndex); EXPECT_EQ(expectCalls, mockResInfo->getOffsetCalled); EXPECT_EQ(imgDesc.image_width, updateImgInfo.imgDesc.imageWidth); EXPECT_EQ(imgDesc.image_height, updateImgInfo.imgDesc.imageHeight); EXPECT_EQ(imgDesc.image_depth, updateImgInfo.imgDesc.imageDepth); EXPECT_EQ(imgDesc.image_array_size, updateImgInfo.imgDesc.imageArraySize); EXPECT_GT(updateImgInfo.imgDesc.imageRowPitch, 0u); EXPECT_GT(updateImgInfo.imgDesc.imageSlicePitch, 0u); if (expectCalls == 1) { EXPECT_TRUE(memcmp(&expectedReqInfo[1], &mockResInfo->givenReqInfo[0], sizeof(GMM_REQ_OFFSET_INFO)) == 0); } else if (expectCalls == 2u) { EXPECT_TRUE(memcmp(&expectedReqInfo[0], &mockResInfo->givenReqInfo[0], sizeof(GMM_REQ_OFFSET_INFO)) == 0); EXPECT_TRUE(memcmp(&expectedReqInfo[1], &mockResInfo->givenReqInfo[1], sizeof(GMM_REQ_OFFSET_INFO)) == 0); } else { EXPECT_TRUE(false); } } TEST(GmmImgTest, givenImgInfoWhenUpdatingOffsetsCallGmmToGetOffsets) { struct GmmGetOffsetOutput { uint32_t Offset; uint32_t XOffset; uint32_t YOffset; }; struct MyMockGmmResourceInfo : MockGmmResourceInfo { MyMockGmmResourceInfo(GMM_RESCREATE_PARAMS *resourceCreateParams) : MockGmmResourceInfo(resourceCreateParams) {} GMM_STATUS getOffset(GMM_REQ_OFFSET_INFO &reqOffsetInfo) override { EXPECT_EQ(1u, reqOffsetInfo.ReqRender); EXPECT_EQ(0u, reqOffsetInfo.Slice); EXPECT_EQ(expectedArrayIndex, reqOffsetInfo.ArrayIndex); EXPECT_EQ(expectedGmmPlane, reqOffsetInfo.Plane); reqOffsetInfo.Render.Offset = gmmGetOffsetOutput.Offset; reqOffsetInfo.Render.XOffset = gmmGetOffsetOutput.XOffset; reqOffsetInfo.Render.YOffset = gmmGetOffsetOutput.YOffset; return GMM_SUCCESS; } uint32_t getBitsPerPixel() override { return gmmGetBitsPerPixelOutput; } cl_uint expectedArrayIndex; GMM_YUV_PLANE_ENUM expectedGmmPlane; GmmGetOffsetOutput gmmGetOffsetOutput; uint32_t gmmGetBitsPerPixelOutput; }; cl_image_desc imgDesc{}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; imgDesc.image_width = 60; imgDesc.image_height = 1; imgDesc.image_depth = 1; imgDesc.image_array_size = 10; ImageInfo imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); std::unique_ptr gmm = MockGmm::queryImgParams(platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->getGmmClientContext(), imgInfo); MyMockGmmResourceInfo *mockGmmResourceInfo = new MyMockGmmResourceInfo(&gmm->resourceParams); gmm->gmmResourceInfo.reset(mockGmmResourceInfo); mockGmmResourceInfo->expectedArrayIndex = 7; mockGmmResourceInfo->expectedGmmPlane = imgInfo.plane; mockGmmResourceInfo->gmmGetOffsetOutput = {10, 111, 120}; mockGmmResourceInfo->gmmGetBitsPerPixelOutput = 24; gmm->updateOffsetsInImgInfo(imgInfo, mockGmmResourceInfo->expectedArrayIndex); EXPECT_EQ(mockGmmResourceInfo->gmmGetOffsetOutput.Offset, imgInfo.offset); const auto expectedXOffset = mockGmmResourceInfo->gmmGetOffsetOutput.XOffset / (mockGmmResourceInfo->gmmGetBitsPerPixelOutput / 8); EXPECT_EQ(expectedXOffset, imgInfo.xOffset); EXPECT_EQ(mockGmmResourceInfo->gmmGetOffsetOutput.YOffset, imgInfo.yOffset); } TEST_F(GmmTests, copyResourceBlt) { cl_image_desc imgDesc{}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE3D; imgDesc.image_width = 17; imgDesc.image_height = 17; imgDesc.image_depth = 17; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); auto gmm = MockGmm::queryImgParams(rootDeviceEnvironment->getGmmClientContext(), imgInfo); auto mockResInfo = reinterpret_cast *>(gmm->gmmResourceInfo.get()); GMM_RES_COPY_BLT requestedCpuBlt = {}; GMM_RES_COPY_BLT expectedCpuBlt = {}; char sys(0), gpu(0); uint32_t pitch = 300; uint32_t height = 400; uint8_t upload = 1u; expectedCpuBlt.Sys.pData = &sys; expectedCpuBlt.Gpu.pData = &gpu; expectedCpuBlt.Sys.RowPitch = pitch; expectedCpuBlt.Blt.Upload = upload; expectedCpuBlt.Sys.BufferSize = pitch * height; auto invokeParamsCopy = [&](GMM_RES_COPY_BLT *resCopyBlt) { requestedCpuBlt = *resCopyBlt; return 1; }; // plane Y EXPECT_CALL(*mockResInfo, cpuBlt(_)).Times(1).WillOnce(Invoke(invokeParamsCopy)); auto retVal = gmm->resourceCopyBlt(&sys, &gpu, pitch, height, upload, ImagePlane::PLANE_Y); EXPECT_EQ(1u, retVal); EXPECT_TRUE(memcmp(&expectedCpuBlt, &requestedCpuBlt, sizeof(GMM_RES_COPY_BLT)) == 0); // no-plane EXPECT_CALL(*mockResInfo, cpuBlt(_)).Times(1).WillOnce(Invoke(invokeParamsCopy)); retVal = gmm->resourceCopyBlt(&sys, &gpu, pitch, height, upload, ImagePlane::NO_PLANE); EXPECT_EQ(1u, retVal); EXPECT_TRUE(memcmp(&expectedCpuBlt, &requestedCpuBlt, sizeof(GMM_RES_COPY_BLT)) == 0); //plane UV expectedCpuBlt.Sys.pData = ptrOffset(&sys, height * pitch * 2u); EXPECT_CALL(*mockResInfo, cpuBlt(_)).Times(1).WillOnce(Invoke(invokeParamsCopy)); retVal = gmm->resourceCopyBlt(&sys, &gpu, pitch, height, upload, ImagePlane::PLANE_UV); EXPECT_EQ(1u, retVal); EXPECT_TRUE(memcmp(&expectedCpuBlt, &requestedCpuBlt, sizeof(GMM_RES_COPY_BLT)) == 0); //plane V expectedCpuBlt.Sys.pData = ptrOffset(&sys, height * pitch * 2u); expectedCpuBlt.Sys.RowPitch = pitch / 2; expectedCpuBlt.Sys.BufferSize = expectedCpuBlt.Sys.RowPitch * height; EXPECT_CALL(*mockResInfo, cpuBlt(_)).Times(1).WillOnce(Invoke(invokeParamsCopy)); retVal = gmm->resourceCopyBlt(&sys, &gpu, pitch, height, upload, ImagePlane::PLANE_V); EXPECT_EQ(1u, retVal); EXPECT_TRUE(memcmp(&expectedCpuBlt, &requestedCpuBlt, sizeof(GMM_RES_COPY_BLT)) == 0); //plane U expectedCpuBlt.Sys.pData = ptrOffset(&sys, height * pitch * 2u + height * pitch / 2u); expectedCpuBlt.Sys.RowPitch = pitch / 2; expectedCpuBlt.Sys.BufferSize = expectedCpuBlt.Sys.RowPitch * height; EXPECT_CALL(*mockResInfo, cpuBlt(_)).Times(1).WillOnce(Invoke(invokeParamsCopy)); retVal = gmm->resourceCopyBlt(&sys, &gpu, pitch, height, upload, ImagePlane::PLANE_U); EXPECT_EQ(1u, retVal); EXPECT_TRUE(memcmp(&expectedCpuBlt, &requestedCpuBlt, sizeof(GMM_RES_COPY_BLT)) == 0); } TEST(GmmTest, givenAllValidFlagsWhenAskedForUnifiedAuxTranslationCapabilityThenReturnTrue) { auto gmm = std::unique_ptr(new Gmm(platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->getGmmClientContext(), nullptr, 1, false)); auto mockResource = reinterpret_cast(gmm->gmmResourceInfo.get()); mockResource->setUnifiedAuxTranslationCapable(); EXPECT_EQ(1u, mockResource->mockResourceCreateParams.Flags.Gpu.CCS); EXPECT_EQ(1u, mockResource->mockResourceCreateParams.Flags.Gpu.UnifiedAuxSurface); EXPECT_EQ(1u, mockResource->mockResourceCreateParams.Flags.Info.RenderCompressed); EXPECT_TRUE(gmm->unifiedAuxTranslationCapable()); } TEST(GmmTest, givenInvalidFlagsSetWhenAskedForUnifiedAuxTranslationCapabilityThenReturnFalse) { auto gmm = std::unique_ptr(new Gmm(platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->getGmmClientContext(), nullptr, 1, false)); auto mockResource = reinterpret_cast(gmm->gmmResourceInfo.get()); mockResource->mockResourceCreateParams.Flags.Gpu.CCS = 0; mockResource->mockResourceCreateParams.Flags.Gpu.UnifiedAuxSurface = 1; mockResource->mockResourceCreateParams.Flags.Info.RenderCompressed = 1; EXPECT_FALSE(gmm->unifiedAuxTranslationCapable()); // CCS == 0 mockResource->mockResourceCreateParams.Flags.Gpu.CCS = 1; mockResource->mockResourceCreateParams.Flags.Gpu.UnifiedAuxSurface = 0; EXPECT_FALSE(gmm->unifiedAuxTranslationCapable()); // UnifiedAuxSurface == 0 mockResource->mockResourceCreateParams.Flags.Gpu.UnifiedAuxSurface = 1; mockResource->mockResourceCreateParams.Flags.Info.RenderCompressed = 0; EXPECT_FALSE(gmm->unifiedAuxTranslationCapable()); // RenderCompressed == 0 } TEST(GmmTest, givenHwInfoWhenDeviceIsCreatedTheSetThisHwInfoToGmmHelper) { std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_EQ(&device->getHardwareInfo(), device->getGmmHelper()->getHardwareInfo()); } TEST(GmmTest, whenResourceIsCreatedThenHandleItsOwnership) { struct MyMockResourecInfo : public GmmResourceInfo { using GmmResourceInfo::resourceInfo; MyMockResourecInfo(GMM_RESCREATE_PARAMS *inputParams) : GmmResourceInfo(platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->getGmmClientContext(), inputParams){}; MyMockResourecInfo(GMM_RESOURCE_INFO *inputGmmResourceInfo) : GmmResourceInfo(platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->getGmmClientContext(), inputGmmResourceInfo){}; }; GMM_RESCREATE_PARAMS gmmParams = {}; gmmParams.Type = RESOURCE_BUFFER; gmmParams.Format = GMM_FORMAT_GENERIC_8BIT; gmmParams.BaseWidth64 = 1; gmmParams.BaseHeight = 1; gmmParams.Depth = 1; gmmParams.Flags.Info.Linear = 1; gmmParams.Flags.Info.Cacheable = 1; gmmParams.Flags.Gpu.Texture = 1; gmmParams.Usage = GMM_RESOURCE_USAGE_OCL_BUFFER; MyMockResourecInfo myMockResourceInfo1(&gmmParams); EXPECT_NE(nullptr, myMockResourceInfo1.resourceInfo.get()); MyMockResourecInfo myMockResourceInfo2(myMockResourceInfo1.resourceInfo.get()); EXPECT_NE(nullptr, myMockResourceInfo2.resourceInfo.get()); EXPECT_NE(myMockResourceInfo1.resourceInfo.get(), myMockResourceInfo2.resourceInfo.get()); } TEST(GmmTest, givenGmmWithNotSetMCSInResourceInfoGpuFlagsWhenCallHasMultisampleControlSurfaceThenReturnFalse) { auto gmm = std::unique_ptr(new Gmm(platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->getGmmClientContext(), nullptr, 1, false)); EXPECT_FALSE(gmm->hasMultisampleControlSurface()); } TEST(GmmTest, givenGmmWithSetMCSInResourceInfoGpuFlagsWhenCallhasMultisampleControlSurfaceThenReturnTrue) { auto gmm = std::unique_ptr(new Gmm(platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->getGmmClientContext(), nullptr, 1, false)); auto mockResource = reinterpret_cast(gmm->gmmResourceInfo.get()); mockResource->setMultisampleControlSurface(); EXPECT_TRUE(gmm->hasMultisampleControlSurface()); } TEST(GmmHelperTest, whenGmmHelperIsInitializedThenClientContextIsSet) { ASSERT_NE(nullptr, platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->getGmmHelper()); EXPECT_NE(nullptr, platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->getGmmClientContext()->getHandle()); } TEST(GmmHelperTest, givenPlatformAlreadyDestroyedWhenResourceIsBeingDestroyedThenObserveNoExceptions) { struct MockGmmResourecInfo : public GmmResourceInfo { using GmmResourceInfo::resourceInfo; MockGmmResourecInfo(GMM_RESCREATE_PARAMS *inputParams) : GmmResourceInfo(platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->getGmmClientContext(), inputParams){}; }; GMM_RESCREATE_PARAMS gmmParams = {}; gmmParams.Type = RESOURCE_BUFFER; gmmParams.Format = GMM_FORMAT_GENERIC_8BIT; gmmParams.BaseWidth64 = 1; gmmParams.BaseHeight = 1; gmmParams.Depth = 1; gmmParams.Flags.Info.Linear = 1; gmmParams.Flags.Info.Cacheable = 1; gmmParams.Flags.Gpu.Texture = 1; gmmParams.Usage = GMM_RESOURCE_USAGE_OCL_BUFFER; auto gmmResourceInfo = new MockGmmResourecInfo(&gmmParams); auto executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->incRefInternal(); platformsImpl.clear(); EXPECT_EQ(nullptr, platform()); EXPECT_NO_THROW(delete gmmResourceInfo); executionEnvironment->decRefInternal(); } TEST(GmmHelperTest, givenValidGmmFunctionsWhenCreateGmmHelperWithInitializedOsInterfaceThenProperParametersArePassed) { std::unique_ptr gmmHelper; auto executionEnvironment = platform()->peekExecutionEnvironment(); DeviceFactory::prepareDeviceEnvironments(*executionEnvironment); VariableBackup passedInputArgsBackup(&passedInputArgs); VariableBackup passedFtrTableBackup(&passedFtrTable); VariableBackup passedWaTableBackup(&passedWaTable); VariableBackup copyInputArgsBackup(©InputArgs, true); auto hwInfo = defaultHwInfo.get(); SKU_FEATURE_TABLE expectedFtrTable = {}; WA_TABLE expectedWaTable = {}; SkuInfoTransfer::transferFtrTableForGmm(&expectedFtrTable, &hwInfo->featureTable); SkuInfoTransfer::transferWaTableForGmm(&expectedWaTable, &hwInfo->workaroundTable); gmmHelper.reset(new GmmHelper(executionEnvironment->rootDeviceEnvironments[0]->osInterface.get(), hwInfo)); EXPECT_EQ(0, memcmp(&hwInfo->platform, &passedInputArgs.Platform, sizeof(PLATFORM))); EXPECT_EQ(&hwInfo->gtSystemInfo, passedInputArgs.pGtSysInfo); EXPECT_EQ(0, memcmp(&expectedFtrTable, &passedFtrTable, sizeof(SKU_FEATURE_TABLE))); EXPECT_EQ(0, memcmp(&expectedWaTable, &passedWaTable, sizeof(WA_TABLE))); EXPECT_EQ(GMM_CLIENT::GMM_OCL_VISTA, passedInputArgs.ClientType); } TEST(GmmHelperTest, givenValidGmmFunctionsWhenCreateGmmHelperWithoutOsInterfaceThenInitializationDoesntCrashAndProperParametersArePassed) { std::unique_ptr gmmHelper; VariableBackup passedInputArgsBackup(&passedInputArgs); VariableBackup passedFtrTableBackup(&passedFtrTable); VariableBackup passedWaTableBackup(&passedWaTable); VariableBackup copyInputArgsBackup(©InputArgs, true); auto hwInfo = defaultHwInfo.get(); SKU_FEATURE_TABLE expectedFtrTable = {}; WA_TABLE expectedWaTable = {}; SkuInfoTransfer::transferFtrTableForGmm(&expectedFtrTable, &hwInfo->featureTable); SkuInfoTransfer::transferWaTableForGmm(&expectedWaTable, &hwInfo->workaroundTable); gmmHelper.reset(new GmmHelper(nullptr, hwInfo)); EXPECT_EQ(0, memcmp(&hwInfo->platform, &passedInputArgs.Platform, sizeof(PLATFORM))); EXPECT_EQ(&hwInfo->gtSystemInfo, passedInputArgs.pGtSysInfo); EXPECT_EQ(0, memcmp(&expectedFtrTable, &passedFtrTable, sizeof(SKU_FEATURE_TABLE))); EXPECT_EQ(0, memcmp(&expectedWaTable, &passedWaTable, sizeof(WA_TABLE))); EXPECT_EQ(GMM_CLIENT::GMM_OCL_VISTA, passedInputArgs.ClientType); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/gtpin/000077500000000000000000000000001363734646600227535ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/gtpin/CMakeLists.txt000066400000000000000000000007671363734646600255250ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_gtpin ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_tests.cpp ) macro(macro_for_each_gen) list(APPEND IGDRCL_SRCS_tests_gtpin ${NEO_SOURCE_DIR}/opencl/source/${GEN_TYPE_LOWER}/gtpin_setup_${GEN_TYPE_LOWER}.cpp) endmacro() apply_macro_for_each_gen("TESTED") if(NOT DISABLED_GTPIN_SUPPORT) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_gtpin}) endif() compute-runtime-20.13.16352/opencl/test/unit_test/gtpin/gtpin_tests.cpp000066400000000000000000003016071363734646600260310ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/device_binary_format/patchtokens_decoder.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/hash.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/unit_test/device_binary_format/patchtokens_tests.h" #include "opencl/source/api/api.h" #include "opencl/source/context/context.h" #include "opencl/source/gtpin/gtpin_defs.h" #include "opencl/source/gtpin/gtpin_helpers.h" #include "opencl/source/gtpin/gtpin_hw_helper.h" #include "opencl/source/gtpin/gtpin_init.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/program/create.inl" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/helpers/kernel_binary_helper.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/program/program_tests.h" #include "test.h" #include "gtest/gtest.h" #include #include using namespace NEO; using namespace gtpin; namespace NEO { extern std::deque kernelExecQueue; } namespace ULT { int ContextCreateCallbackCount = 0; int ContextDestroyCallbackCount = 0; int KernelCreateCallbackCount = 0; int KernelSubmitCallbackCount = 0; int CommandBufferCreateCallbackCount = 0; int CommandBufferCompleteCallbackCount = 0; uint32_t kernelOffset = 0; bool returnNullResource = false; context_handle_t currContext = nullptr; std::deque kernelResources; platform_info_t platformInfo; void OnContextCreate(context_handle_t context, platform_info_t *platformInfo, igc_init_t **igcInit) { ULT::platformInfo.gen_version = platformInfo->gen_version; currContext = context; kernelResources.clear(); ContextCreateCallbackCount++; *igcInit = reinterpret_cast(0x1234); } void OnContextDestroy(context_handle_t context) { currContext = nullptr; EXPECT_EQ(0u, kernelResources.size()); kernelResources.clear(); ContextDestroyCallbackCount++; } void OnKernelCreate(context_handle_t context, const instrument_params_in_t *paramsIn, instrument_params_out_t *paramsOut) { paramsOut->inst_kernel_binary = const_cast(paramsIn->orig_kernel_binary); paramsOut->inst_kernel_size = paramsIn->orig_kernel_size; paramsOut->kernel_id = paramsIn->igc_hash_id; KernelCreateCallbackCount++; } void OnKernelSubmit(command_buffer_handle_t cb, uint64_t kernelId, uint32_t *entryOffset, resource_handle_t *resource) { resource_handle_t currResource = nullptr; ASSERT_NE(nullptr, currContext); if (!returnNullResource) { GTPIN_DI_STATUS st = gtpinCreateBuffer(currContext, (uint32_t)256, &currResource); EXPECT_EQ(GTPIN_DI_SUCCESS, st); EXPECT_NE(nullptr, currResource); uint8_t *bufAddress = nullptr; st = gtpinMapBuffer(currContext, currResource, &bufAddress); EXPECT_EQ(GTPIN_DI_SUCCESS, st); EXPECT_NE(nullptr, bufAddress); } *entryOffset = kernelOffset; *resource = currResource; kernelResources.push_back(currResource); KernelSubmitCallbackCount++; } void OnCommandBufferCreate(context_handle_t context, command_buffer_handle_t cb) { CommandBufferCreateCallbackCount++; } void OnCommandBufferComplete(command_buffer_handle_t cb) { ASSERT_NE(nullptr, currContext); resource_handle_t currResource = kernelResources[0]; EXPECT_NE(nullptr, currResource); GTPIN_DI_STATUS st = gtpinUnmapBuffer(currContext, currResource); EXPECT_EQ(GTPIN_DI_SUCCESS, st); st = gtpinFreeBuffer(currContext, currResource); EXPECT_EQ(GTPIN_DI_SUCCESS, st); kernelResources.pop_front(); CommandBufferCompleteCallbackCount++; } class MockMemoryManagerWithFailures : public OsAgnosticMemoryManager { public: MockMemoryManagerWithFailures(ExecutionEnvironment &executionEnvironment) : OsAgnosticMemoryManager(executionEnvironment){}; GraphicsAllocation *allocateGraphicsMemoryInDevicePool(const AllocationData &allocationData, AllocationStatus &status) override { if (failAllAllocationsInDevicePool) { failAllAllocationsInDevicePool = false; return nullptr; } return OsAgnosticMemoryManager::allocateGraphicsMemoryInDevicePool(allocationData, status); } bool failAllAllocationsInDevicePool = false; }; class GTPinFixture : public ContextFixture, public MemoryManagementFixture { using ContextFixture::SetUp; public: void SetUp() override { platformsImpl.clear(); MemoryManagementFixture::SetUp(); constructPlatform(); pPlatform = platform(); auto executionEnvironment = pPlatform->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); memoryManager = new MockMemoryManagerWithFailures(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); initPlatform(); pDevice = pPlatform->getClDevice(0); cl_device_id device = (cl_device_id)pDevice; ContextFixture::SetUp(1, &device); driverServices.bufferAllocate = nullptr; driverServices.bufferDeallocate = nullptr; driverServices.bufferMap = nullptr; driverServices.bufferUnMap = nullptr; gtpinCallbacks.onContextCreate = nullptr; gtpinCallbacks.onContextDestroy = nullptr; gtpinCallbacks.onKernelCreate = nullptr; gtpinCallbacks.onKernelSubmit = nullptr; gtpinCallbacks.onCommandBufferCreate = nullptr; gtpinCallbacks.onCommandBufferComplete = nullptr; NEO::isGTPinInitialized = false; kernelOffset = 0; } void TearDown() override { ContextFixture::TearDown(); platformsImpl.clear(); MemoryManagementFixture::TearDown(); NEO::isGTPinInitialized = false; } Platform *pPlatform = nullptr; ClDevice *pDevice = nullptr; cl_int retVal = CL_SUCCESS; GTPIN_DI_STATUS retFromGtPin = GTPIN_DI_SUCCESS; driver_services_t driverServices; gtpin::ocl::gtpin_events_t gtpinCallbacks; MockMemoryManagerWithFailures *memoryManager = nullptr; }; typedef Test GTPinTests; TEST_F(GTPinTests, givenInvalidArgumentsThenGTPinInitFails) { bool isInitialized = false; retFromGtPin = GTPin_Init(nullptr, nullptr, nullptr); EXPECT_EQ(GTPIN_DI_ERROR_INVALID_ARGUMENT, retFromGtPin); isInitialized = gtpinIsGTPinInitialized(); EXPECT_FALSE(isInitialized); retFromGtPin = GTPin_Init(>pinCallbacks, nullptr, nullptr); EXPECT_EQ(GTPIN_DI_ERROR_INVALID_ARGUMENT, retFromGtPin); isInitialized = gtpinIsGTPinInitialized(); EXPECT_FALSE(isInitialized); retFromGtPin = GTPin_Init(nullptr, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_ERROR_INVALID_ARGUMENT, retFromGtPin); isInitialized = gtpinIsGTPinInitialized(); EXPECT_FALSE(isInitialized); } TEST_F(GTPinTests, givenIncompleteArgumentsThenGTPinInitFails) { interface_version_t ver; ver.common = 0; ver.specific = 0; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, &ver); EXPECT_EQ(GTPIN_DI_ERROR_INVALID_ARGUMENT, retFromGtPin); gtpinCallbacks.onContextCreate = OnContextCreate; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_ERROR_INVALID_ARGUMENT, retFromGtPin); gtpinCallbacks.onContextDestroy = OnContextDestroy; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_ERROR_INVALID_ARGUMENT, retFromGtPin); gtpinCallbacks.onKernelCreate = OnKernelCreate; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_ERROR_INVALID_ARGUMENT, retFromGtPin); gtpinCallbacks.onKernelSubmit = OnKernelSubmit; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_ERROR_INVALID_ARGUMENT, retFromGtPin); gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_ERROR_INVALID_ARGUMENT, retFromGtPin); } TEST_F(GTPinTests, givenInvalidArgumentsWhenVersionArgumentIsProvidedThenGTPinInitReturnsDriverVersion) { interface_version_t ver; ver.common = 0; ver.specific = 0; retFromGtPin = GTPin_Init(nullptr, nullptr, &ver); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_EQ(gtpin::ocl::GTPIN_OCL_INTERFACE_VERSION, ver.specific); EXPECT_EQ(gtpin::GTPIN_COMMON_INTERFACE_VERSION, ver.common); retFromGtPin = GTPin_Init(>pinCallbacks, nullptr, &ver); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_EQ(gtpin::ocl::GTPIN_OCL_INTERFACE_VERSION, ver.specific); EXPECT_EQ(gtpin::GTPIN_COMMON_INTERFACE_VERSION, ver.common); retFromGtPin = GTPin_Init(nullptr, &driverServices, &ver); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_EQ(gtpin::ocl::GTPIN_OCL_INTERFACE_VERSION, ver.specific); EXPECT_EQ(gtpin::GTPIN_COMMON_INTERFACE_VERSION, ver.common); } TEST_F(GTPinTests, givenValidAndCompleteArgumentsThenGTPinInitSucceeds) { bool isInitialized = false; gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_EQ(&NEO::gtpinCreateBuffer, driverServices.bufferAllocate); EXPECT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); EXPECT_EQ(&NEO::gtpinMapBuffer, driverServices.bufferMap); EXPECT_EQ(&NEO::gtpinUnmapBuffer, driverServices.bufferUnMap); isInitialized = gtpinIsGTPinInitialized(); EXPECT_TRUE(isInitialized); } TEST_F(GTPinTests, givenValidAndCompleteArgumentsWhenGTPinIsAlreadyInitializedThenGTPinInitFails) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_EQ(&NEO::gtpinCreateBuffer, driverServices.bufferAllocate); EXPECT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); EXPECT_EQ(&NEO::gtpinMapBuffer, driverServices.bufferMap); EXPECT_EQ(&NEO::gtpinUnmapBuffer, driverServices.bufferUnMap); retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_ERROR_INSTANCE_ALREADY_CREATED, retFromGtPin); } TEST_F(GTPinTests, givenInvalidArgumentsThenBufferAllocateFails) { resource_handle_t res; uint32_t buffSize = 400u; gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); ASSERT_EQ(&NEO::gtpinCreateBuffer, driverServices.bufferAllocate); EXPECT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); EXPECT_EQ(&NEO::gtpinMapBuffer, driverServices.bufferMap); EXPECT_EQ(&NEO::gtpinUnmapBuffer, driverServices.bufferUnMap); retFromGtPin = (*driverServices.bufferAllocate)(nullptr, buffSize, &res); EXPECT_NE(GTPIN_DI_SUCCESS, retFromGtPin); cl_context ctxt = (cl_context)((Context *)pContext); retFromGtPin = (*driverServices.bufferAllocate)((gtpin::context_handle_t)ctxt, buffSize, nullptr); EXPECT_NE(GTPIN_DI_SUCCESS, retFromGtPin); } TEST_F(GTPinTests, givenInvalidArgumentsThenBufferDeallocateFails) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_EQ(&NEO::gtpinCreateBuffer, driverServices.bufferAllocate); ASSERT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); EXPECT_EQ(&NEO::gtpinMapBuffer, driverServices.bufferMap); EXPECT_EQ(&NEO::gtpinUnmapBuffer, driverServices.bufferUnMap); retFromGtPin = (*driverServices.bufferDeallocate)(nullptr, nullptr); EXPECT_NE(GTPIN_DI_SUCCESS, retFromGtPin); cl_context ctxt = (cl_context)((Context *)pContext); retFromGtPin = (*driverServices.bufferDeallocate)((gtpin::context_handle_t)ctxt, nullptr); EXPECT_NE(GTPIN_DI_SUCCESS, retFromGtPin); retFromGtPin = (*driverServices.bufferDeallocate)((gtpin::context_handle_t)ctxt, (gtpin::resource_handle_t)ctxt); EXPECT_NE(GTPIN_DI_SUCCESS, retFromGtPin); } TEST_F(GTPinTests, givenInvalidArgumentsThenBufferMapFails) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_EQ(&NEO::gtpinCreateBuffer, driverServices.bufferAllocate); EXPECT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); ASSERT_EQ(&NEO::gtpinMapBuffer, driverServices.bufferMap); EXPECT_EQ(&NEO::gtpinUnmapBuffer, driverServices.bufferUnMap); uint8_t *mappedPtr; retFromGtPin = (*driverServices.bufferMap)(nullptr, nullptr, &mappedPtr); EXPECT_NE(GTPIN_DI_SUCCESS, retFromGtPin); cl_context ctxt = (cl_context)((Context *)pContext); retFromGtPin = (*driverServices.bufferMap)((gtpin::context_handle_t)ctxt, nullptr, &mappedPtr); EXPECT_NE(GTPIN_DI_SUCCESS, retFromGtPin); retFromGtPin = (*driverServices.bufferMap)((gtpin::context_handle_t)ctxt, (gtpin::resource_handle_t)ctxt, &mappedPtr); EXPECT_NE(GTPIN_DI_SUCCESS, retFromGtPin); } TEST_F(GTPinTests, givenInvalidArgumentsThenBufferUnMapFails) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_EQ(&NEO::gtpinCreateBuffer, driverServices.bufferAllocate); EXPECT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); EXPECT_EQ(&NEO::gtpinMapBuffer, driverServices.bufferMap); ASSERT_EQ(&NEO::gtpinUnmapBuffer, driverServices.bufferUnMap); retFromGtPin = (*driverServices.bufferUnMap)(nullptr, nullptr); EXPECT_NE(GTPIN_DI_SUCCESS, retFromGtPin); cl_context ctxt = (cl_context)((Context *)pContext); retFromGtPin = (*driverServices.bufferUnMap)((gtpin::context_handle_t)ctxt, nullptr); EXPECT_NE(GTPIN_DI_SUCCESS, retFromGtPin); retFromGtPin = (*driverServices.bufferUnMap)((gtpin::context_handle_t)ctxt, (gtpin::resource_handle_t)ctxt); EXPECT_NE(GTPIN_DI_SUCCESS, retFromGtPin); } TEST_F(GTPinTests, givenValidRequestForHugeMemoryAllocationThenBufferAllocateFails) { InjectedFunction allocBufferFunc = [this](size_t failureIndex) { resource_handle_t res; cl_context ctxt = (cl_context)((Context *)pContext); uint32_t hugeSize = 400u; // Will be handled as huge memory allocation retFromGtPin = (*driverServices.bufferAllocate)((gtpin::context_handle_t)ctxt, hugeSize, &res); if (MemoryManagement::nonfailingAllocation != failureIndex) { EXPECT_EQ(GTPIN_DI_ERROR_ALLOCATION_FAILED, retFromGtPin); } else { EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_NE(nullptr, res); retFromGtPin = (*driverServices.bufferDeallocate)((gtpin::context_handle_t)ctxt, res); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); } }; gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); ASSERT_EQ(&NEO::gtpinCreateBuffer, driverServices.bufferAllocate); ASSERT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); EXPECT_EQ(&NEO::gtpinMapBuffer, driverServices.bufferMap); EXPECT_EQ(&NEO::gtpinUnmapBuffer, driverServices.bufferUnMap); injectFailures(allocBufferFunc); } TEST_F(GTPinTests, givenValidRequestForMemoryAllocationThenBufferAllocateAndDeallocateSucceeds) { resource_handle_t res; uint32_t buffSize = 400u; gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); ASSERT_EQ(&NEO::gtpinCreateBuffer, driverServices.bufferAllocate); ASSERT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); EXPECT_EQ(&NEO::gtpinMapBuffer, driverServices.bufferMap); EXPECT_EQ(&NEO::gtpinUnmapBuffer, driverServices.bufferUnMap); cl_context ctxt = (cl_context)((Context *)pContext); retFromGtPin = (*driverServices.bufferAllocate)((gtpin::context_handle_t)ctxt, buffSize, &res); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_NE(nullptr, res); retFromGtPin = (*driverServices.bufferDeallocate)((gtpin::context_handle_t)ctxt, res); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); } TEST_F(GTPinTests, givenValidArgumentsForBufferMapWhenCallSequenceIsCorrectThenBufferMapSucceeds) { resource_handle_t res; uint32_t buffSize = 400u; gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); ASSERT_EQ(&NEO::gtpinCreateBuffer, driverServices.bufferAllocate); ASSERT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); ASSERT_EQ(&NEO::gtpinMapBuffer, driverServices.bufferMap); EXPECT_EQ(&NEO::gtpinUnmapBuffer, driverServices.bufferUnMap); cl_context ctxt = (cl_context)((Context *)pContext); retFromGtPin = (*driverServices.bufferAllocate)((gtpin::context_handle_t)ctxt, buffSize, &res); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_NE(nullptr, res); uint8_t *mappedPtr = nullptr; retFromGtPin = (*driverServices.bufferMap)((gtpin::context_handle_t)ctxt, res, &mappedPtr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_NE(nullptr, mappedPtr); retFromGtPin = (*driverServices.bufferDeallocate)((gtpin::context_handle_t)ctxt, res); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); } TEST_F(GTPinTests, givenMissingReturnArgumentForBufferMapWhenCallSequenceIsCorrectThenBufferMapFails) { resource_handle_t res; uint32_t buffSize = 400u; gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); ASSERT_EQ(&NEO::gtpinCreateBuffer, driverServices.bufferAllocate); ASSERT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); ASSERT_EQ(&NEO::gtpinMapBuffer, driverServices.bufferMap); EXPECT_EQ(&NEO::gtpinUnmapBuffer, driverServices.bufferUnMap); cl_context ctxt = (cl_context)((Context *)pContext); retFromGtPin = (*driverServices.bufferAllocate)((gtpin::context_handle_t)ctxt, buffSize, &res); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_NE(nullptr, res); retFromGtPin = (*driverServices.bufferMap)((gtpin::context_handle_t)ctxt, res, nullptr); EXPECT_NE(GTPIN_DI_SUCCESS, retFromGtPin); retFromGtPin = (*driverServices.bufferDeallocate)((gtpin::context_handle_t)ctxt, res); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); } TEST_F(GTPinTests, givenValidArgumentsForBufferUnMapWhenCallSequenceIsCorrectThenBufferUnMapSucceeds) { resource_handle_t res; uint32_t buffSize = 400u; gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); ASSERT_EQ(&NEO::gtpinCreateBuffer, driverServices.bufferAllocate); ASSERT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); ASSERT_EQ(&NEO::gtpinMapBuffer, driverServices.bufferMap); ASSERT_EQ(&NEO::gtpinUnmapBuffer, driverServices.bufferUnMap); cl_context ctxt = (cl_context)((Context *)pContext); retFromGtPin = (*driverServices.bufferAllocate)((gtpin::context_handle_t)ctxt, buffSize, &res); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_NE(nullptr, res); uint8_t *mappedPtr = nullptr; retFromGtPin = (*driverServices.bufferMap)((gtpin::context_handle_t)ctxt, res, &mappedPtr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); EXPECT_NE(nullptr, mappedPtr); retFromGtPin = (*driverServices.bufferUnMap)((gtpin::context_handle_t)ctxt, res); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); retFromGtPin = (*driverServices.bufferDeallocate)((gtpin::context_handle_t)ctxt, res); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); } TEST_F(GTPinTests, givenUninitializedGTPinInterfaceThenGTPinContextCallbackIsNotCalled) { int prevCount = ContextCreateCallbackCount; cl_device_id device = (cl_device_id)pDevice; auto context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); EXPECT_EQ(ContextCreateCallbackCount, prevCount); prevCount = ContextDestroyCallbackCount; retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(ContextDestroyCallbackCount, prevCount); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenContextCreationArgumentsAreInvalidThenGTPinContextCallbackIsNotCalled) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); int prevCount = ContextCreateCallbackCount; cl_device_id device = (cl_device_id)pDevice; cl_context_properties invalidProperties[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties) nullptr, 0}; auto context = clCreateContext(invalidProperties, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_PLATFORM, retVal); EXPECT_EQ(nullptr, context); EXPECT_EQ(ContextCreateCallbackCount, prevCount); context = clCreateContextFromType(invalidProperties, CL_DEVICE_TYPE_GPU, nullptr, nullptr, &retVal); EXPECT_EQ(CL_INVALID_PLATFORM, retVal); EXPECT_EQ(nullptr, context); EXPECT_EQ(ContextCreateCallbackCount, prevCount); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceThenGTPinContextCallbackIsCalled) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); int prevCount = ContextCreateCallbackCount; cl_device_id device = (cl_device_id)pDevice; auto context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); EXPECT_EQ(ContextCreateCallbackCount, prevCount + 1); prevCount = ContextDestroyCallbackCount; retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(ContextDestroyCallbackCount, prevCount + 1); prevCount = ContextCreateCallbackCount; context = clCreateContextFromType(nullptr, CL_DEVICE_TYPE_GPU, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); EXPECT_EQ(ContextCreateCallbackCount, prevCount + 1); prevCount = ContextDestroyCallbackCount; retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(ContextDestroyCallbackCount, prevCount + 1); } TEST_F(GTPinTests, givenUninitializedGTPinInterfaceThenGTPinKernelCreateCallbackIsNotCalled) { cl_kernel kernel = nullptr; cl_program pProgram = nullptr; cl_device_id device = (cl_device_id)pDevice; size_t sourceSize = 0; std::string testFile; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSource = loadDataFromFile(testFile.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSource); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( (cl_context)((Context *)pContext), 1, sources, &sourceSize, &retVal); ASSERT_NE(nullptr, pProgram); retVal = clBuildProgram( pProgram, 1, &device, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); int prevCount = KernelCreateCallbackCount; kernel = clCreateKernel(pProgram, "CopyBuffer", &retVal); EXPECT_NE(nullptr, kernel); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount, KernelCreateCallbackCount); // Cleanup retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenContextIsCreatedThenCorrectVersionIsSet) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); cl_device_id device = static_cast(pDevice); cl_context context = nullptr; context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); GFXCORE_FAMILY genFamily = pDevice->getHardwareInfo().platform.eRenderCoreFamily; GTPinHwHelper >pinHelper = GTPinHwHelper::get(genFamily); EXPECT_EQ(ULT::platformInfo.gen_version, static_cast(gtpinHelper.getGenVersion())); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelIsExecutedThenGTPinCallbacksAreCalled) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); cl_kernel kernel1 = nullptr; cl_kernel kernel2 = nullptr; cl_program pProgram = nullptr; cl_device_id device = (cl_device_id)pDevice; size_t sourceSize = 0; std::string testFile; cl_command_queue cmdQ = nullptr; cl_queue_properties properties = 0; cl_context context = nullptr; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSource = loadDataFromFile(testFile.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSource); context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); cmdQ = clCreateCommandQueue(context, device, properties, &retVal); ASSERT_NE(nullptr, cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( context, 1, sources, &sourceSize, &retVal); ASSERT_NE(nullptr, pProgram); retVal = clBuildProgram( pProgram, 1, &device, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // Create and submit first instance of "CopyBuffer" kernel int prevCount11 = KernelCreateCallbackCount; kernel1 = clCreateKernel(pProgram, "CopyBuffer", &retVal); EXPECT_NE(nullptr, kernel1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount11 + 1, KernelCreateCallbackCount); Kernel *pKernel1 = (Kernel *)kernel1; const KernelInfo &kInfo1 = pKernel1->getKernelInfo(); uint64_t gtpinKernelId1 = pKernel1->getKernelId(); EXPECT_EQ(kInfo1.heapInfo.pKernelHeader->ShaderHashCode, gtpinKernelId1); constexpr size_t n = 256; auto buff10 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); auto buff11 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); retVal = clSetKernelArg(pKernel1, 0, sizeof(cl_mem), &buff10); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pKernel1, 1, sizeof(cl_mem), &buff11); EXPECT_EQ(CL_SUCCESS, retVal); int prevCount12 = KernelSubmitCallbackCount; int prevCount13 = CommandBufferCreateCallbackCount; int prevCount14 = CommandBufferCompleteCallbackCount; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {n, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; retVal = clEnqueueNDRangeKernel(cmdQ, pKernel1, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount12 + 1, KernelSubmitCallbackCount); EXPECT_EQ(prevCount13 + 1, CommandBufferCreateCallbackCount); // Create and submit second instance of "CopyBuffer" kernel int prevCount21 = KernelCreateCallbackCount; kernel2 = clCreateKernel(pProgram, "CopyBuffer", &retVal); EXPECT_NE(nullptr, kernel2); EXPECT_EQ(CL_SUCCESS, retVal); // Verify that GT-Pin Kernel Create callback is not called multiple times for the same kernel EXPECT_EQ(prevCount21, KernelCreateCallbackCount); Kernel *pKernel2 = (Kernel *)kernel2; const KernelInfo &kInfo2 = pKernel2->getKernelInfo(); uint64_t gtpinKernelId2 = pKernel2->getKernelId(); EXPECT_EQ(kInfo2.heapInfo.pKernelHeader->ShaderHashCode, gtpinKernelId2); auto buff20 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); auto buff21 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); retVal = clSetKernelArg(pKernel2, 0, sizeof(cl_mem), &buff20); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pKernel2, 1, sizeof(cl_mem), &buff21); EXPECT_EQ(CL_SUCCESS, retVal); int prevCount22 = KernelSubmitCallbackCount; int prevCount23 = CommandBufferCreateCallbackCount; int prevCount24 = CommandBufferCompleteCallbackCount; retVal = clEnqueueNDRangeKernel(cmdQ, pKernel2, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount22 + 1, KernelSubmitCallbackCount); EXPECT_EQ(prevCount23 + 1, CommandBufferCreateCallbackCount); retVal = clFinish(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount14 + 2, CommandBufferCompleteCallbackCount); EXPECT_EQ(prevCount24 + 2, CommandBufferCompleteCallbackCount); // Cleanup retVal = clReleaseKernel(kernel1); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseKernel(kernel2); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); pSource.reset(); retVal = clReleaseMemObject(buff10); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buff11); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buff20); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buff21); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelINTELIsExecutedThenGTPinCallbacksAreCalled) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); cl_kernel kernel1 = nullptr; cl_kernel kernel2 = nullptr; cl_program pProgram = nullptr; cl_device_id device = (cl_device_id)pDevice; size_t sourceSize = 0; std::string testFile; cl_command_queue cmdQ = nullptr; cl_queue_properties properties = 0; cl_context context = nullptr; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSource = loadDataFromFile(testFile.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSource); context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); cmdQ = clCreateCommandQueue(context, device, properties, &retVal); ASSERT_NE(nullptr, cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( context, 1, sources, &sourceSize, &retVal); ASSERT_NE(nullptr, pProgram); retVal = clBuildProgram( pProgram, 1, &device, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // Create and submit first instance of "CopyBuffer" kernel int prevCount11 = KernelCreateCallbackCount; kernel1 = clCreateKernel(pProgram, "CopyBuffer", &retVal); EXPECT_NE(nullptr, kernel1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount11 + 1, KernelCreateCallbackCount); Kernel *pKernel1 = (Kernel *)kernel1; const KernelInfo &kInfo1 = pKernel1->getKernelInfo(); uint64_t gtpinKernelId1 = pKernel1->getKernelId(); EXPECT_EQ(kInfo1.heapInfo.pKernelHeader->ShaderHashCode, gtpinKernelId1); cl_uint workDim = 1; size_t localWorkSize[3] = {1, 1, 1}; size_t n = pKernel1->getMaxWorkGroupCount(workDim, localWorkSize); auto buff10 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); auto buff11 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); retVal = clSetKernelArg(pKernel1, 0, sizeof(cl_mem), &buff10); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pKernel1, 1, sizeof(cl_mem), &buff11); EXPECT_EQ(CL_SUCCESS, retVal); int prevCount12 = KernelSubmitCallbackCount; int prevCount13 = CommandBufferCreateCallbackCount; int prevCount14 = CommandBufferCompleteCallbackCount; size_t globalWorkOffset[3] = {0, 0, 0}; size_t workgroupCount[3] = {n, 1, 1}; retVal = clEnqueueNDCountKernelINTEL(cmdQ, pKernel1, workDim, globalWorkOffset, workgroupCount, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount12 + 1, KernelSubmitCallbackCount); EXPECT_EQ(prevCount13 + 1, CommandBufferCreateCallbackCount); // Create and submit second instance of "CopyBuffer" kernel int prevCount21 = KernelCreateCallbackCount; kernel2 = clCreateKernel(pProgram, "CopyBuffer", &retVal); EXPECT_NE(nullptr, kernel2); EXPECT_EQ(CL_SUCCESS, retVal); // Verify that GT-Pin Kernel Create callback is not called multiple times for the same kernel EXPECT_EQ(prevCount21, KernelCreateCallbackCount); Kernel *pKernel2 = (Kernel *)kernel2; const KernelInfo &kInfo2 = pKernel2->getKernelInfo(); uint64_t gtpinKernelId2 = pKernel2->getKernelId(); EXPECT_EQ(kInfo2.heapInfo.pKernelHeader->ShaderHashCode, gtpinKernelId2); auto buff20 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); auto buff21 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); retVal = clSetKernelArg(pKernel2, 0, sizeof(cl_mem), &buff20); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pKernel2, 1, sizeof(cl_mem), &buff21); EXPECT_EQ(CL_SUCCESS, retVal); int prevCount22 = KernelSubmitCallbackCount; int prevCount23 = CommandBufferCreateCallbackCount; int prevCount24 = CommandBufferCompleteCallbackCount; retVal = clEnqueueNDCountKernelINTEL(cmdQ, pKernel2, workDim, globalWorkOffset, workgroupCount, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount22 + 1, KernelSubmitCallbackCount); EXPECT_EQ(prevCount23 + 1, CommandBufferCreateCallbackCount); retVal = clFinish(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount14 + 2, CommandBufferCompleteCallbackCount); EXPECT_EQ(prevCount24 + 2, CommandBufferCompleteCallbackCount); // Cleanup retVal = clReleaseKernel(kernel1); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseKernel(kernel2); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); pSource.reset(); retVal = clReleaseMemObject(buff10); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buff11); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buff20); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buff21); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelWithoutSSHIsUsedThenKernelCreateCallbacksIsNotCalled) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); cl_device_id device = (cl_device_id)pDevice; cl_context context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); auto pContext = castToObject(context); char binary[1024] = {1, 2, 3, 4, 5, 6, 7, 8, 9, '\0'}; size_t binSize = 10; MockProgram *pProgram = Program::createFromGenBinary(*pDevice->getExecutionEnvironment(), pContext, &binary[0], binSize, false, &retVal, &pDevice->getDevice()); ASSERT_NE(nullptr, pProgram); EXPECT_EQ(CL_SUCCESS, retVal); PatchTokensTestData::ValidProgramWithKernel programTokens; pProgram->unpackedDeviceBinary = makeCopy(reinterpret_cast(programTokens.storage.data()), programTokens.storage.size()); pProgram->unpackedDeviceBinarySize = programTokens.storage.size(); retVal = pProgram->processGenBinary(); EXPECT_EQ(CL_SUCCESS, retVal); int prevCount = KernelCreateCallbackCount; cl_kernel kernel = clCreateKernel(pProgram, std::string(programTokens.kernels[0].name.begin(), programTokens.kernels[0].name.size()).c_str(), &retVal); EXPECT_NE(nullptr, kernel); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount, KernelCreateCallbackCount); retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelWithExecEnvIsUsedThenKernelCreateCallbacksIsNotCalled) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); cl_device_id device = (cl_device_id)pDevice; cl_context context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); auto pContext = castToObject(context); // Prepare a kernel with fake Execution Environment char binary[1024] = {1, 2, 3, 4, 5, 6, 7, 8, 9, '\0'}; size_t binSize = 10; MockProgram *pProgram = Program::createFromGenBinary(*pDevice->getExecutionEnvironment(), pContext, &binary[0], binSize, false, &retVal, &pDevice->getDevice()); ASSERT_NE(nullptr, pProgram); EXPECT_EQ(CL_SUCCESS, retVal); char *pBin = &binary[0]; SProgramBinaryHeader *pBHdr = (SProgramBinaryHeader *)pBin; pBHdr->Magic = iOpenCL::MAGIC_CL; pBHdr->Version = iOpenCL::CURRENT_ICBE_VERSION; pBHdr->Device = pDevice->getHardwareInfo().platform.eRenderCoreFamily; pBHdr->GPUPointerSizeInBytes = 8; pBHdr->NumberOfKernels = 1; pBHdr->SteppingId = 0; pBHdr->PatchListSize = 0; pBin += sizeof(SProgramBinaryHeader); binSize += sizeof(SProgramBinaryHeader); SKernelBinaryHeaderCommon *pKHdr = (SKernelBinaryHeaderCommon *)pBin; pKHdr->CheckSum = 0; pKHdr->ShaderHashCode = 0; pKHdr->KernelNameSize = 4; pKHdr->PatchListSize = sizeof(SPatchExecutionEnvironment) + sizeof(SPatchBindingTableState); pKHdr->KernelHeapSize = 16; pKHdr->GeneralStateHeapSize = 0; pKHdr->DynamicStateHeapSize = 0; pKHdr->SurfaceStateHeapSize = 64; pKHdr->KernelUnpaddedSize = 0; pBin += sizeof(SKernelBinaryHeaderCommon); binSize += sizeof(SKernelBinaryHeaderCommon); char *pKernelBin = pBin; strcpy(pBin, "Tst"); pBin += pKHdr->KernelNameSize; binSize += pKHdr->KernelNameSize; strcpy(pBin, "fake_ISA_code__"); pBin += pKHdr->KernelHeapSize; binSize += pKHdr->KernelHeapSize; memset(pBin, 0, pKHdr->SurfaceStateHeapSize); pBin += pKHdr->SurfaceStateHeapSize; binSize += pKHdr->SurfaceStateHeapSize; SPatchExecutionEnvironment *pPatch1 = (SPatchExecutionEnvironment *)pBin; pPatch1->Token = iOpenCL::PATCH_TOKEN_EXECUTION_ENVIRONMENT; pPatch1->Size = sizeof(iOpenCL::SPatchExecutionEnvironment); pPatch1->RequiredWorkGroupSizeX = 0; pPatch1->RequiredWorkGroupSizeY = 0; pPatch1->RequiredWorkGroupSizeZ = 0; pPatch1->LargestCompiledSIMDSize = 8; pPatch1->CompiledSubGroupsNumber = 0; pPatch1->HasBarriers = 0; pPatch1->DisableMidThreadPreemption = 0; pPatch1->CompiledSIMD8 = 0; pPatch1->CompiledSIMD16 = 0; pPatch1->CompiledSIMD32 = 1; pPatch1->HasDeviceEnqueue = 1; pPatch1->MayAccessUndeclaredResource = 0; pPatch1->UsesFencesForReadWriteImages = 0; pPatch1->UsesStatelessSpillFill = 0; pPatch1->IsCoherent = 0; pPatch1->IsInitializer = 0; pPatch1->IsFinalizer = 0; pPatch1->SubgroupIndependentForwardProgressRequired = 0; pPatch1->CompiledForGreaterThan4GBBuffers = 0; pBin += sizeof(SPatchExecutionEnvironment); binSize += sizeof(SPatchExecutionEnvironment); SPatchBindingTableState *pPatch2 = (SPatchBindingTableState *)pBin; pPatch2->Token = iOpenCL::PATCH_TOKEN_BINDING_TABLE_STATE; pPatch2->Size = sizeof(iOpenCL::SPatchBindingTableState); pPatch2->Offset = 0; pPatch2->Count = 1; pPatch2->SurfaceStateOffset = 0; binSize += sizeof(SPatchBindingTableState); uint32_t kernelBinSize = pKHdr->DynamicStateHeapSize + pKHdr->GeneralStateHeapSize + pKHdr->KernelHeapSize + pKHdr->KernelNameSize + pKHdr->PatchListSize + pKHdr->SurfaceStateHeapSize; uint64_t hashValue = Hash::hash(reinterpret_cast(pKernelBin), kernelBinSize); pKHdr->CheckSum = static_cast(hashValue & 0xFFFFFFFF); pProgram->unpackedDeviceBinary = makeCopy(&binary[0], binSize); pProgram->unpackedDeviceBinarySize = binSize; retVal = pProgram->processGenBinary(); EXPECT_EQ(CL_SUCCESS, retVal); // Verify that GT-Pin Kernel Create callback is not called int prevCount = KernelCreateCallbackCount; cl_kernel kernel = clCreateKernel(pProgram, "Tst", &retVal); EXPECT_NE(nullptr, kernel); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount, KernelCreateCallbackCount); // Cleanup retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelWithoutSSHIsUsedThenGTPinSubmitKernelCallbackIsNotCalled) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); cl_kernel kernel = nullptr; cl_program pProgram = nullptr; cl_device_id device = (cl_device_id)pDevice; size_t sourceSize = 0; std::string testFile; cl_command_queue cmdQ = nullptr; cl_queue_properties properties = 0; cl_context context = nullptr; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSource = loadDataFromFile(testFile.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSource); context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); cmdQ = clCreateCommandQueue(context, device, properties, &retVal); ASSERT_NE(nullptr, cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( context, 1, sources, &sourceSize, &retVal); ASSERT_NE(nullptr, pProgram); retVal = clBuildProgram( pProgram, 1, &device, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); int prevCount1 = KernelCreateCallbackCount; kernel = clCreateKernel(pProgram, "CopyBuffer", &retVal); EXPECT_NE(nullptr, kernel); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount1 + 1, KernelCreateCallbackCount); Kernel *pKernel = (Kernel *)kernel; const KernelInfo &kInfo = pKernel->getKernelInfo(); uint64_t gtpinKernelId = pKernel->getKernelId(); EXPECT_EQ(kInfo.heapInfo.pKernelHeader->ShaderHashCode, gtpinKernelId); constexpr size_t n = 256; auto buff0 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); auto buff1 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); retVal = clSetKernelArg(pKernel, 0, sizeof(cl_mem), &buff0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pKernel, 1, sizeof(cl_mem), &buff1); EXPECT_EQ(CL_SUCCESS, retVal); // Verify that when SSH is removed then during kernel execution // GT-Pin Kernel Submit, Command Buffer Create and Command Buffer Complete callbacks are not called. pKernel->resizeSurfaceStateHeap(nullptr, 0, 0, 0); int prevCount2 = KernelSubmitCallbackCount; int prevCount3 = CommandBufferCreateCallbackCount; int prevCount4 = CommandBufferCompleteCallbackCount; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {n, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; retVal = clEnqueueNDRangeKernel(cmdQ, pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount2, KernelSubmitCallbackCount); EXPECT_EQ(prevCount3, CommandBufferCreateCallbackCount); retVal = clFinish(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount4, CommandBufferCompleteCallbackCount); // Cleanup retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); pSource.reset(); retVal = clReleaseMemObject(buff0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buff1); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenBlockedKernelWithoutSSHIsUsedThenGTPinSubmitKernelCallbackIsNotCalled) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); cl_kernel kernel = nullptr; cl_program pProgram = nullptr; cl_device_id device = (cl_device_id)pDevice; size_t sourceSize = 0; std::string testFile; cl_command_queue cmdQ = nullptr; cl_queue_properties properties = 0; cl_context context = nullptr; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSource = loadDataFromFile(testFile.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSource); context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); cmdQ = clCreateCommandQueue(context, device, properties, &retVal); ASSERT_NE(nullptr, cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( context, 1, sources, &sourceSize, &retVal); ASSERT_NE(nullptr, pProgram); retVal = clBuildProgram( pProgram, 1, &device, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); int prevCount1 = KernelCreateCallbackCount; kernel = clCreateKernel(pProgram, "CopyBuffer", &retVal); EXPECT_NE(nullptr, kernel); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount1 + 1, KernelCreateCallbackCount); Kernel *pKernel = (Kernel *)kernel; const KernelInfo &kInfo = pKernel->getKernelInfo(); uint64_t gtpinKernelId = pKernel->getKernelId(); EXPECT_EQ(kInfo.heapInfo.pKernelHeader->ShaderHashCode, gtpinKernelId); constexpr size_t n = 256; auto buff0 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); auto buff1 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); retVal = clSetKernelArg(pKernel, 0, sizeof(cl_mem), &buff0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pKernel, 1, sizeof(cl_mem), &buff1); EXPECT_EQ(CL_SUCCESS, retVal); // Verify that when SSH is removed then during kernel execution // GT-Pin Kernel Submit, Command Buffer Create and Command Buffer Complete callbacks are not called. pKernel->resizeSurfaceStateHeap(nullptr, 0, 0, 0); cl_event userEvent = clCreateUserEvent(context, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); int prevCount2 = KernelSubmitCallbackCount; int prevCount3 = CommandBufferCreateCallbackCount; int prevCount4 = CommandBufferCompleteCallbackCount; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {n, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; retVal = clEnqueueNDRangeKernel(cmdQ, pKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 1, &userEvent, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount2, KernelSubmitCallbackCount); EXPECT_EQ(prevCount3, CommandBufferCreateCallbackCount); retVal = clSetUserEventStatus(userEvent, CL_COMPLETE); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clFinish(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount4, CommandBufferCompleteCallbackCount); // Cleanup retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); pSource.reset(); retVal = clReleaseMemObject(buff0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buff1); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenTheSameKerneIsExecutedTwiceThenGTPinCreateKernelCallbackIsCalledOnce) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); cl_kernel kernel1 = nullptr; cl_kernel kernel2 = nullptr; cl_program pProgram = nullptr; cl_device_id device = (cl_device_id)pDevice; size_t sourceSize = 0; std::string testFile; cl_command_queue cmdQ = nullptr; cl_queue_properties properties = 0; cl_context context = nullptr; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSource = loadDataFromFile(testFile.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSource); context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); cmdQ = clCreateCommandQueue(context, device, properties, &retVal); ASSERT_NE(nullptr, cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( context, 1, sources, &sourceSize, &retVal); ASSERT_NE(nullptr, pProgram); retVal = clBuildProgram( pProgram, 1, &device, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // Kernel "CopyBuffer" - called for the first time int prevCount11 = KernelCreateCallbackCount; kernel1 = clCreateKernel(pProgram, "CopyBuffer", &retVal); EXPECT_NE(nullptr, kernel1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount11 + 1, KernelCreateCallbackCount); Kernel *pKernel1 = (Kernel *)kernel1; const KernelInfo &kInfo1 = pKernel1->getKernelInfo(); uint64_t gtpinKernelId1 = pKernel1->getKernelId(); EXPECT_EQ(kInfo1.heapInfo.pKernelHeader->ShaderHashCode, gtpinKernelId1); constexpr size_t n = 256; auto buff10 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); auto buff11 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); retVal = clSetKernelArg(pKernel1, 0, sizeof(cl_mem), &buff10); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pKernel1, 1, sizeof(cl_mem), &buff11); EXPECT_EQ(CL_SUCCESS, retVal); cl_event userEvent = clCreateUserEvent(context, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); int prevCount12 = KernelSubmitCallbackCount; int prevCount13 = CommandBufferCreateCallbackCount; int prevCount14 = CommandBufferCompleteCallbackCount; cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {n, 1, 1}; size_t localWorkSize[3] = {1, 1, 1}; retVal = clEnqueueNDRangeKernel(cmdQ, pKernel1, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 1, &userEvent, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount12 + 1, KernelSubmitCallbackCount); EXPECT_EQ(prevCount13 + 1, CommandBufferCreateCallbackCount); EXPECT_EQ(prevCount14, CommandBufferCompleteCallbackCount); // The same kernel "CopyBuffer" - called second time int prevCount21 = KernelCreateCallbackCount; kernel2 = clCreateKernel(pProgram, "CopyBuffer", &retVal); EXPECT_NE(nullptr, kernel2); EXPECT_EQ(CL_SUCCESS, retVal); // Verify that Kernel Create callback was not called now EXPECT_EQ(prevCount21, KernelCreateCallbackCount); Kernel *pKernel2 = (Kernel *)kernel2; const KernelInfo &kInfo2 = pKernel2->getKernelInfo(); uint64_t gtpinKernelId2 = pKernel2->getKernelId(); EXPECT_EQ(kInfo2.heapInfo.pKernelHeader->ShaderHashCode, gtpinKernelId2); auto buff20 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); auto buff21 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); retVal = clSetKernelArg(pKernel2, 0, sizeof(cl_mem), &buff20); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clSetKernelArg(pKernel2, 1, sizeof(cl_mem), &buff21); EXPECT_EQ(CL_SUCCESS, retVal); int prevCount22 = KernelSubmitCallbackCount; int prevCount23 = CommandBufferCreateCallbackCount; int prevCount24 = CommandBufferCompleteCallbackCount; EXPECT_EQ(prevCount14, prevCount24); retVal = clEnqueueNDRangeKernel(cmdQ, pKernel2, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount22 + 1, KernelSubmitCallbackCount); EXPECT_EQ(prevCount23 + 1, CommandBufferCreateCallbackCount); EXPECT_EQ(prevCount14, CommandBufferCompleteCallbackCount); EXPECT_EQ(prevCount24, CommandBufferCompleteCallbackCount); EXPECT_EQ(prevCount14, prevCount24); clSetUserEventStatus(userEvent, CL_COMPLETE); retVal = clFinish(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); // Verify that both kernel instances were completed EXPECT_EQ(prevCount14 + 2, CommandBufferCompleteCallbackCount); EXPECT_EQ(prevCount24 + 2, CommandBufferCompleteCallbackCount); // Cleanup retVal = clReleaseKernel(kernel1); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseKernel(kernel2); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); pSource.reset(); retVal = clReleaseMemObject(buff10); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buff11); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buff20); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(buff21); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GTPinTests, givenMultipleKernelSubmissionsWhenOneOfGtpinSurfacesIsNullThenOnlyNonNullSurfacesAreMadeResident) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); cl_kernel kernel1 = nullptr; cl_program pProgram = nullptr; cl_device_id device = (cl_device_id)pDevice; size_t sourceSize = 0; std::string testFile; cl_command_queue cmdQ = nullptr; cl_queue_properties properties = 0; cl_context context = nullptr; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSource = loadDataFromFile(testFile.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSource); context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); cmdQ = clCreateCommandQueue(context, device, properties, &retVal); ASSERT_NE(nullptr, cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( context, 1, sources, &sourceSize, &retVal); ASSERT_NE(nullptr, pProgram); retVal = clBuildProgram( pProgram, 1, &device, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); kernel1 = clCreateKernel(pProgram, "CopyBuffer", &retVal); EXPECT_NE(nullptr, kernel1); EXPECT_EQ(CL_SUCCESS, retVal); Kernel *pKernel1 = (Kernel *)kernel1; returnNullResource = true; auto pCmdQueue = castToObject(cmdQ); gtpinNotifyKernelSubmit(pKernel1, pCmdQueue); EXPECT_EQ(nullptr, kernelExecQueue[0].gtpinResource); CommandStreamReceiver &csr = pCmdQueue->getGpgpuCommandStreamReceiver(); gtpinNotifyMakeResident(pKernel1, &csr); EXPECT_FALSE(kernelExecQueue[0].isResourceResident); std::vector residencyVector; gtpinNotifyUpdateResidencyList(pKernel1, &residencyVector); EXPECT_EQ(0u, residencyVector.size()); returnNullResource = false; gtpinNotifyKernelSubmit(pKernel1, pCmdQueue); EXPECT_NE(nullptr, kernelExecQueue[1].gtpinResource); gtpinNotifyMakeResident(pKernel1, &csr); EXPECT_TRUE(kernelExecQueue[1].isResourceResident); cl_mem gtpinBuffer1 = kernelExecQueue[1].gtpinResource; gtpinNotifyKernelSubmit(pKernel1, pCmdQueue); EXPECT_NE(nullptr, kernelExecQueue[2].gtpinResource); gtpinNotifyUpdateResidencyList(pKernel1, &residencyVector); EXPECT_EQ(1u, residencyVector.size()); EXPECT_TRUE(kernelExecQueue[2].isResourceResident); EXPECT_FALSE(kernelExecQueue[0].isResourceResident); GeneralSurface *pSurf = static_cast(residencyVector[0]); delete pSurf; residencyVector.clear(); cl_mem gtpinBuffer2 = kernelExecQueue[2].gtpinResource; gtpinUnmapBuffer(reinterpret_cast(context), reinterpret_cast(gtpinBuffer1)); gtpinFreeBuffer(reinterpret_cast(context), reinterpret_cast(gtpinBuffer1)); gtpinUnmapBuffer(reinterpret_cast(context), reinterpret_cast(gtpinBuffer2)); gtpinFreeBuffer(reinterpret_cast(context), reinterpret_cast(gtpinBuffer2)); retVal = clFinish(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); // Cleanup returnNullResource = false; kernelResources.clear(); retVal = clReleaseKernel(kernel1); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); pSource.reset(); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelIsCreatedThenAllKernelSubmitRelatedNotificationsAreCalled) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); kernelExecQueue.clear(); cl_kernel kernel = nullptr; cl_program pProgram = nullptr; cl_device_id device = (cl_device_id)pDevice; size_t sourceSize = 0; std::string testFile; cl_command_queue cmdQ = nullptr; cl_queue_properties properties = 0; cl_context context = nullptr; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSource = loadDataFromFile(testFile.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSource); context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); cmdQ = clCreateCommandQueue(context, device, properties, &retVal); ASSERT_NE(nullptr, cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( context, 1, sources, &sourceSize, &retVal); ASSERT_NE(nullptr, pProgram); retVal = clBuildProgram( pProgram, 1, &device, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // Create kernel int prevCount1 = KernelCreateCallbackCount; kernel = clCreateKernel(pProgram, "CopyBuffer", &retVal); ASSERT_NE(nullptr, kernel); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount1 + 1, KernelCreateCallbackCount); // Simulate that created kernel was sent for execution auto pKernel = castToObject(kernel); auto pCmdQueue = castToObject(cmdQ); ASSERT_NE(nullptr, pKernel); EXPECT_EQ(0u, kernelExecQueue.size()); EXPECT_EQ(0u, kernelResources.size()); int prevCount2 = CommandBufferCreateCallbackCount; int prevCount3 = KernelSubmitCallbackCount; gtpinNotifyKernelSubmit(kernel, pCmdQueue); EXPECT_EQ(prevCount2 + 1, CommandBufferCreateCallbackCount); EXPECT_EQ(prevCount3 + 1, KernelSubmitCallbackCount); EXPECT_EQ(1u, kernelExecQueue.size()); EXPECT_EQ(1u, kernelResources.size()); EXPECT_EQ(pKernel, kernelExecQueue[0].pKernel); EXPECT_EQ(kernelResources[0], (resource_handle_t)kernelExecQueue[0].gtpinResource); EXPECT_EQ(pCmdQueue, kernelExecQueue[0].pCommandQueue); EXPECT_FALSE(kernelExecQueue[0].isTaskCountValid); EXPECT_FALSE(kernelExecQueue[0].isResourceResident); // Verify that if kernel unknown to GT-Pin is about to be flushed // then its residency vector does not obtain GT-Pin resource std::vector residencyVector; EXPECT_EQ(0u, residencyVector.size()); gtpinNotifyUpdateResidencyList(nullptr, &residencyVector); EXPECT_EQ(0u, residencyVector.size()); EXPECT_FALSE(kernelExecQueue[0].isResourceResident); // Verify that if kernel known to GT-Pin is about to be flushed // then its residency vector obtains GT-Pin resource EXPECT_EQ(0u, residencyVector.size()); gtpinNotifyUpdateResidencyList(pKernel, &residencyVector); EXPECT_EQ(1u, residencyVector.size()); GeneralSurface *pSurf = (GeneralSurface *)residencyVector[0]; delete pSurf; residencyVector.clear(); EXPECT_TRUE(kernelExecQueue[0].isResourceResident); kernelExecQueue[0].isResourceResident = false; // Create second kernel ... cl_kernel kernel2 = clCreateKernel(pProgram, "CopyBuffer", &retVal); ASSERT_NE(nullptr, kernel2); EXPECT_EQ(CL_SUCCESS, retVal); // ... and simulate that it was sent for execution auto pKernel2 = castToObject(kernel2); ASSERT_NE(nullptr, pKernel2); EXPECT_EQ(1u, kernelExecQueue.size()); EXPECT_EQ(1u, kernelResources.size()); int prevCount22 = CommandBufferCreateCallbackCount; int prevCount23 = KernelSubmitCallbackCount; gtpinNotifyKernelSubmit(kernel2, pCmdQueue); EXPECT_EQ(prevCount22 + 1, CommandBufferCreateCallbackCount); EXPECT_EQ(prevCount23 + 1, KernelSubmitCallbackCount); EXPECT_EQ(2u, kernelExecQueue.size()); EXPECT_EQ(2u, kernelResources.size()); EXPECT_EQ(pKernel2, kernelExecQueue[1].pKernel); EXPECT_EQ(kernelResources[1], (resource_handle_t)kernelExecQueue[1].gtpinResource); EXPECT_EQ(pCmdQueue, kernelExecQueue[1].pCommandQueue); EXPECT_FALSE(kernelExecQueue[1].isTaskCountValid); EXPECT_FALSE(kernelExecQueue[1].isResourceResident); // Verify that correct GT-Pin resource is made resident cl_mem gtpinBuffer0 = kernelExecQueue[0].gtpinResource; auto pBuffer0 = castToObject(gtpinBuffer0); GraphicsAllocation *pGfxAlloc0 = pBuffer0->getGraphicsAllocation(); cl_mem gtpinBuffer1 = kernelExecQueue[1].gtpinResource; auto pBuffer1 = castToObject(gtpinBuffer1); GraphicsAllocation *pGfxAlloc1 = pBuffer1->getGraphicsAllocation(); CommandStreamReceiver &csr = pCmdQueue->getGpgpuCommandStreamReceiver(); EXPECT_FALSE(pGfxAlloc0->isResident(csr.getOsContext().getContextId())); EXPECT_FALSE(pGfxAlloc1->isResident(csr.getOsContext().getContextId())); gtpinNotifyMakeResident(pKernel, &csr); EXPECT_TRUE(pGfxAlloc0->isResident(csr.getOsContext().getContextId())); EXPECT_FALSE(pGfxAlloc1->isResident(csr.getOsContext().getContextId())); // Cancel information about second submitted kernel kernelExecQueue.pop_back(); EXPECT_EQ(1u, kernelExecQueue.size()); kernelResources.pop_back(); EXPECT_EQ(1u, kernelResources.size()); gtpinUnmapBuffer((context_handle_t)context, (resource_handle_t)gtpinBuffer1); gtpinFreeBuffer((context_handle_t)context, (resource_handle_t)gtpinBuffer1); retVal = clReleaseKernel(kernel2); EXPECT_EQ(CL_SUCCESS, retVal); // Verify that if flush occurs on another queue then our kernel is not flushed to CSR uint32_t taskCount = 11; gtpinNotifyPreFlushTask(nullptr); EXPECT_EQ(1u, kernelExecQueue.size()); EXPECT_FALSE(kernelExecQueue[0].isTaskCountValid); gtpinNotifyFlushTask(taskCount); EXPECT_EQ(1u, kernelExecQueue.size()); EXPECT_FALSE(kernelExecQueue[0].isTaskCountValid); // Verify that if flush occurs on current queue then our kernel is flushed to CSR gtpinNotifyPreFlushTask(pCmdQueue); EXPECT_EQ(1u, kernelExecQueue.size()); EXPECT_FALSE(kernelExecQueue[0].isTaskCountValid); gtpinNotifyFlushTask(taskCount); EXPECT_EQ(1u, kernelExecQueue.size()); EXPECT_TRUE(kernelExecQueue[0].isTaskCountValid); EXPECT_EQ(taskCount, kernelExecQueue[0].taskCount); // Verify that if previous task was completed then it does not affect our kernel uint32_t taskCompleted = taskCount - 1; int prevCount4 = CommandBufferCompleteCallbackCount; gtpinNotifyTaskCompletion(taskCompleted); EXPECT_EQ(1u, kernelExecQueue.size()); EXPECT_EQ(1u, kernelResources.size()); EXPECT_EQ(prevCount4, CommandBufferCompleteCallbackCount); // Verify that if current task was completed then it is our kernel gtpinNotifyTaskCompletion(taskCompleted + 1); EXPECT_EQ(0u, kernelExecQueue.size()); EXPECT_EQ(0u, kernelResources.size()); EXPECT_EQ(prevCount4 + 1, CommandBufferCompleteCallbackCount); // Cleanup retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); pSource.reset(); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenOneKernelIsSubmittedSeveralTimesThenCorrectBuffersAreMadeResident) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); kernelExecQueue.clear(); cl_kernel kernel = nullptr; cl_program pProgram = nullptr; cl_device_id device = (cl_device_id)pDevice; size_t sourceSize = 0; std::string testFile; cl_command_queue cmdQ = nullptr; cl_queue_properties properties = 0; cl_context context = nullptr; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSource = loadDataFromFile(testFile.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSource); context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); cmdQ = clCreateCommandQueue(context, device, properties, &retVal); ASSERT_NE(nullptr, cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( context, 1, sources, &sourceSize, &retVal); ASSERT_NE(nullptr, pProgram); retVal = clBuildProgram( pProgram, 1, &device, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // Create kernel int prevCount1 = KernelCreateCallbackCount; kernel = clCreateKernel(pProgram, "CopyBuffer", &retVal); ASSERT_NE(nullptr, kernel); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(prevCount1 + 1, KernelCreateCallbackCount); // Simulate that created kernel was sent for execution two times in a row auto pKernel = castToObject(kernel); auto pCmdQueue = castToObject(cmdQ); ASSERT_NE(nullptr, pKernel); EXPECT_EQ(0u, kernelExecQueue.size()); EXPECT_EQ(0u, kernelResources.size()); int prevCount2 = CommandBufferCreateCallbackCount; int prevCount3 = KernelSubmitCallbackCount; // First kernel submission gtpinNotifyKernelSubmit(kernel, pCmdQueue); EXPECT_EQ(prevCount2 + 1, CommandBufferCreateCallbackCount); EXPECT_EQ(prevCount3 + 1, KernelSubmitCallbackCount); EXPECT_EQ(1u, kernelExecQueue.size()); EXPECT_EQ(1u, kernelResources.size()); EXPECT_EQ(pKernel, kernelExecQueue[0].pKernel); EXPECT_EQ(kernelResources[0], (resource_handle_t)kernelExecQueue[0].gtpinResource); EXPECT_EQ(pCmdQueue, kernelExecQueue[0].pCommandQueue); EXPECT_FALSE(kernelExecQueue[0].isTaskCountValid); EXPECT_FALSE(kernelExecQueue[0].isResourceResident); // Second kernel submission gtpinNotifyKernelSubmit(kernel, pCmdQueue); EXPECT_EQ(prevCount2 + 2, CommandBufferCreateCallbackCount); EXPECT_EQ(prevCount3 + 2, KernelSubmitCallbackCount); EXPECT_EQ(2u, kernelExecQueue.size()); EXPECT_EQ(2u, kernelResources.size()); EXPECT_EQ(pKernel, kernelExecQueue[0].pKernel); EXPECT_EQ(kernelResources[0], (resource_handle_t)kernelExecQueue[0].gtpinResource); EXPECT_EQ(pCmdQueue, kernelExecQueue[0].pCommandQueue); EXPECT_FALSE(kernelExecQueue[0].isTaskCountValid); EXPECT_FALSE(kernelExecQueue[0].isResourceResident); EXPECT_EQ(pKernel, kernelExecQueue[1].pKernel); EXPECT_EQ(kernelResources[1], (resource_handle_t)kernelExecQueue[1].gtpinResource); EXPECT_EQ(pCmdQueue, kernelExecQueue[1].pCommandQueue); EXPECT_FALSE(kernelExecQueue[1].isTaskCountValid); EXPECT_FALSE(kernelExecQueue[1].isResourceResident); // Verify that correct GT-Pin resource is made resident. // This simulates enqueuing non-blocked kernels cl_mem gtpinBuffer0 = kernelExecQueue[0].gtpinResource; auto pBuffer0 = castToObject(gtpinBuffer0); GraphicsAllocation *pGfxAlloc0 = pBuffer0->getGraphicsAllocation(); cl_mem gtpinBuffer1 = kernelExecQueue[1].gtpinResource; auto pBuffer1 = castToObject(gtpinBuffer1); GraphicsAllocation *pGfxAlloc1 = pBuffer1->getGraphicsAllocation(); CommandStreamReceiver &csr = pCmdQueue->getGpgpuCommandStreamReceiver(); // Make resident resource of first submitted kernel EXPECT_FALSE(pGfxAlloc0->isResident(csr.getOsContext().getContextId())); EXPECT_FALSE(pGfxAlloc1->isResident(csr.getOsContext().getContextId())); gtpinNotifyMakeResident(pKernel, &csr); EXPECT_TRUE(pGfxAlloc0->isResident(csr.getOsContext().getContextId())); EXPECT_FALSE(pGfxAlloc1->isResident(csr.getOsContext().getContextId())); // Make resident resource of second submitted kernel gtpinNotifyMakeResident(pKernel, &csr); EXPECT_TRUE(pGfxAlloc0->isResident(csr.getOsContext().getContextId())); EXPECT_TRUE(pGfxAlloc1->isResident(csr.getOsContext().getContextId())); // Verify that correct GT-Pin resource is added to residency list. // This simulates enqueuing blocked kernels kernelExecQueue[0].isResourceResident = false; kernelExecQueue[1].isResourceResident = false; pGfxAlloc0->releaseResidencyInOsContext(csr.getOsContext().getContextId()); pGfxAlloc1->releaseResidencyInOsContext(csr.getOsContext().getContextId()); EXPECT_FALSE(pGfxAlloc0->isResident(csr.getOsContext().getContextId())); EXPECT_FALSE(pGfxAlloc1->isResident(csr.getOsContext().getContextId())); std::vector residencyVector; EXPECT_EQ(0u, residencyVector.size()); // Add to residency list resource of first submitted kernel gtpinNotifyUpdateResidencyList(pKernel, &residencyVector); EXPECT_EQ(1u, residencyVector.size()); // Make resident first resource on residency list GeneralSurface *pSurf1 = (GeneralSurface *)residencyVector[0]; pSurf1->makeResident(csr); EXPECT_TRUE(pGfxAlloc0->isResident(csr.getOsContext().getContextId())); EXPECT_FALSE(pGfxAlloc1->isResident(csr.getOsContext().getContextId())); // Add to residency list resource of second submitted kernel gtpinNotifyUpdateResidencyList(pKernel, &residencyVector); EXPECT_EQ(2u, residencyVector.size()); // Make resident second resource on residency list GeneralSurface *pSurf2 = (GeneralSurface *)residencyVector[1]; pSurf2->makeResident(csr); EXPECT_TRUE(pGfxAlloc0->isResident(csr.getOsContext().getContextId())); EXPECT_TRUE(pGfxAlloc1->isResident(csr.getOsContext().getContextId())); // Cleanup delete pSurf1; delete pSurf2; residencyVector.clear(); kernelExecQueue.pop_back(); EXPECT_EQ(1u, kernelExecQueue.size()); kernelResources.pop_back(); EXPECT_EQ(1u, kernelResources.size()); gtpinUnmapBuffer((context_handle_t)context, (resource_handle_t)gtpinBuffer1); gtpinFreeBuffer((context_handle_t)context, (resource_handle_t)gtpinBuffer1); kernelExecQueue.pop_back(); EXPECT_EQ(0u, kernelExecQueue.size()); kernelResources.pop_back(); EXPECT_EQ(0u, kernelResources.size()); gtpinUnmapBuffer((context_handle_t)context, (resource_handle_t)gtpinBuffer0); gtpinFreeBuffer((context_handle_t)context, (resource_handle_t)gtpinBuffer0); retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); pSource.reset(); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenLowMemoryConditionOccursThenKernelCreationFails) { InjectedFunction allocBufferFunc = [this](size_t failureIndex) { cl_device_id device = (cl_device_id)pDevice; cl_context context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); auto pContext = castToObject(context); char binary[1024] = {1, 2, 3, 4, 5, 6, 7, 8, 9, '\0'}; size_t binSize = 10; MockProgram *pProgram = Program::createFromGenBinary(*pDevice->getExecutionEnvironment(), pContext, &binary[0], binSize, false, &retVal, &pDevice->getDevice()); ASSERT_NE(nullptr, pProgram); EXPECT_EQ(CL_SUCCESS, retVal); PatchTokensTestData::ValidProgramWithKernel programTokens; pProgram->unpackedDeviceBinary = makeCopy(programTokens.storage.data(), programTokens.storage.size()); pProgram->unpackedDeviceBinarySize = programTokens.storage.size(); retVal = pProgram->processGenBinary(); if (retVal == CL_OUT_OF_HOST_MEMORY) { auto nonFailingAlloc = MemoryManagement::nonfailingAllocation; EXPECT_NE(nonFailingAlloc, failureIndex); } else { EXPECT_EQ(CL_SUCCESS, retVal); // Create kernels from program cl_kernel kernels[2] = {0}; cl_uint numCreatedKernels = 0; if (MemoryManagement::nonfailingAllocation != failureIndex) { memoryManager->failAllAllocationsInDevicePool = true; } retVal = clCreateKernelsInProgram(pProgram, 2, kernels, &numCreatedKernels); if (MemoryManagement::nonfailingAllocation != failureIndex) { if (retVal != CL_SUCCESS) { EXPECT_EQ(nullptr, kernels[0]); EXPECT_EQ(1u, numCreatedKernels); } clReleaseKernel(kernels[0]); } else { EXPECT_NE(nullptr, kernels[0]); EXPECT_EQ(1u, numCreatedKernels); clReleaseKernel(kernels[0]); } } clReleaseProgram(pProgram); clReleaseContext(context); }; gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); ASSERT_EQ(&NEO::gtpinCreateBuffer, driverServices.bufferAllocate); ASSERT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); EXPECT_EQ(&NEO::gtpinMapBuffer, driverServices.bufferMap); EXPECT_EQ(&NEO::gtpinUnmapBuffer, driverServices.bufferUnMap); injectFailures(allocBufferFunc); } TEST_F(GTPinTests, givenKernelWithSSHThenVerifyThatSSHResizeWorksWell) { cl_kernel kernel = nullptr; cl_program pProgram = nullptr; cl_device_id device = (cl_device_id)pDevice; size_t sourceSize = 0; std::string testFile; cl_context context = nullptr; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSource = loadDataFromFile(testFile.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSource); context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( context, 1, sources, &sourceSize, &retVal); ASSERT_NE(nullptr, pProgram); retVal = clBuildProgram( pProgram, 1, &device, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // Create kernel kernel = clCreateKernel(pProgram, "CopyBuffer", &retVal); ASSERT_NE(nullptr, kernel); EXPECT_EQ(CL_SUCCESS, retVal); Kernel *pKernel = castToObject(kernel); ASSERT_NE(nullptr, pKernel); size_t numBTS1 = pKernel->getNumberOfBindingTableStates(); EXPECT_EQ(2u, numBTS1); size_t sizeSurfaceStates1 = pKernel->getSurfaceStateHeapSize(); EXPECT_NE(0u, sizeSurfaceStates1); size_t offsetBTS1 = pKernel->getBindingTableOffset(); EXPECT_NE(0u, offsetBTS1); GFXCORE_FAMILY genFamily = pDevice->getHardwareInfo().platform.eRenderCoreFamily; GTPinHwHelper >pinHelper = GTPinHwHelper::get(genFamily); void *pSS1 = gtpinHelper.getSurfaceState(pKernel, 0); EXPECT_NE(nullptr, pSS1); // Enlarge SSH by one SURFACE STATE element bool surfaceAdded = gtpinHelper.addSurfaceState(pKernel); EXPECT_TRUE(surfaceAdded); size_t numBTS2 = pKernel->getNumberOfBindingTableStates(); EXPECT_EQ(numBTS1 + 1, numBTS2); size_t sizeSurfaceStates2 = pKernel->getSurfaceStateHeapSize(); EXPECT_GT(sizeSurfaceStates2, sizeSurfaceStates1); size_t offsetBTS2 = pKernel->getBindingTableOffset(); EXPECT_GT(offsetBTS2, offsetBTS1); void *pSS2 = gtpinHelper.getSurfaceState(pKernel, 0); EXPECT_NE(pSS2, pSS1); pSS2 = gtpinHelper.getSurfaceState(pKernel, numBTS2); EXPECT_EQ(nullptr, pSS2); // Remove kernel's SSH pKernel->resizeSurfaceStateHeap(nullptr, 0, 0, 0); // Try to enlarge SSH once again, this time the operation must fail surfaceAdded = gtpinHelper.addSurfaceState(pKernel); EXPECT_FALSE(surfaceAdded); size_t numBTS3 = pKernel->getNumberOfBindingTableStates(); EXPECT_EQ(0u, numBTS3); size_t sizeSurfaceStates3 = pKernel->getSurfaceStateHeapSize(); EXPECT_EQ(0u, sizeSurfaceStates3); size_t offsetBTS3 = pKernel->getBindingTableOffset(); EXPECT_EQ(0u, offsetBTS3); void *pSS3 = gtpinHelper.getSurfaceState(pKernel, 0); EXPECT_EQ(nullptr, pSS3); // Cleanup retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GTPinTests, givenKernelThenVerifyThatKernelCodeSubstitutionWorksWell) { cl_kernel kernel = nullptr; cl_program pProgram = nullptr; cl_device_id device = (cl_device_id)pDevice; size_t sourceSize = 0; std::string testFile; cl_context context = nullptr; KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSource = loadDataFromFile(testFile.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSource); context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context); const char *sources[1] = {pSource.get()}; pProgram = clCreateProgramWithSource( context, 1, sources, &sourceSize, &retVal); ASSERT_NE(nullptr, pProgram); retVal = clBuildProgram( pProgram, 1, &device, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // Create kernel kernel = clCreateKernel(pProgram, "CopyBuffer", &retVal); ASSERT_NE(nullptr, kernel); EXPECT_EQ(CL_SUCCESS, retVal); Kernel *pKernel = castToObject(kernel); ASSERT_NE(nullptr, pKernel); bool isKernelCodeSubstituted = pKernel->isKernelHeapSubstituted(); EXPECT_FALSE(isKernelCodeSubstituted); // Substitute new kernel code constexpr size_t newCodeSize = 64; uint8_t newCode[newCodeSize] = {0x0, 0x1, 0x2, 0x3, 0x4}; pKernel->substituteKernelHeap(&newCode[0], newCodeSize); // Verify that substitution went properly isKernelCodeSubstituted = pKernel->isKernelHeapSubstituted(); EXPECT_TRUE(isKernelCodeSubstituted); uint8_t *pBin2 = reinterpret_cast(const_cast(pKernel->getKernelHeap())); EXPECT_EQ(pBin2, &newCode[0]); auto kernelIsa = pKernel->getKernelInfo().kernelAllocation->getUnderlyingBuffer(); EXPECT_EQ(0, memcmp(kernelIsa, newCode, newCodeSize)); // Cleanup retVal = clReleaseKernel(kernel); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseProgram(pProgram); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseContext(context); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GTPinTests, checkWhetherGTPinHwHelperGetterWorksWell) { GFXCORE_FAMILY genFamily = pDevice->getHardwareInfo().platform.eRenderCoreFamily; GTPinHwHelper *pGTPinHelper = >PinHwHelper::get(genFamily); EXPECT_NE(nullptr, pGTPinHelper); } TEST(GTPinOfflineTests, givenGtPinInDisabledStateWhenCallbacksFromEnqueuePathAreCalledThenNothingHappens) { ASSERT_FALSE(gtpinIsGTPinInitialized()); auto dummyKernel = reinterpret_cast(0x1000); auto dummyQueue = reinterpret_cast(0x1000); uint32_t dummyCompletedTask = 0u; //now call gtpin function with dummy data, this must not crash gtpinNotifyKernelSubmit(dummyKernel, dummyQueue); gtpinNotifyPreFlushTask(dummyQueue); gtpinNotifyTaskCompletion(dummyCompletedTask); gtpinNotifyFlushTask(dummyCompletedTask); EXPECT_FALSE(gtpinIsGTPinInitialized()); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenOnKernelSubitIsCalledThenCorrectOffsetisSetInKernel) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); VariableBackup returnNullResourceBckp(&returnNullResource); VariableBackup kernelOffsetBckp(&kernelOffset); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); char surfaceStateHeap[0x80]; SKernelBinaryHeaderCommon kernelHeader; std::unique_ptr context(new MockContext(pDevice)); EXPECT_EQ(CL_SUCCESS, retVal); auto pKernelInfo = std::make_unique(); kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; pKernelInfo->usesSsh = true; auto pProgramm = std::make_unique(*pDevice->getExecutionEnvironment(), context.get(), false, nullptr); std::unique_ptr cmdQ(new MockCommandQueue(context.get(), pDevice, nullptr)); std::unique_ptr pKernel(new MockKernel(pProgramm.get(), *pKernelInfo, *pDevice)); pKernel->setSshLocal(nullptr, sizeof(surfaceStateHeap)); kernelOffset = 0x1234; EXPECT_NE(pKernel->getStartOffset(), kernelOffset); returnNullResource = true; cl_context ctxt = (cl_context)((Context *)context.get()); currContext = (gtpin::context_handle_t)ctxt; gtpinNotifyKernelSubmit(pKernel.get(), cmdQ.get()); EXPECT_EQ(pKernel->getStartOffset(), kernelOffset); EXPECT_EQ(CL_SUCCESS, retVal); kernelResources.clear(); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenOnContextCreateIsCalledThenGtpinInitIsSet) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); auto context = std::make_unique(); gtpinNotifyContextCreate(context.get()); EXPECT_NE(gtpinGetIgcInit(), nullptr); } TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenOnKernelCreateIsCalledWithNullptrThenCallIsIgnored) { gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = OnKernelSubmit; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); auto prevCreateCount = KernelCreateCallbackCount; gtpinNotifyKernelCreate(nullptr); EXPECT_EQ(prevCreateCount, KernelCreateCallbackCount); } HWTEST_F(GTPinTests, givenGtPinInitializedWhenSubmittingKernelCommandThenFlushedTaskCountIsNotified) { auto mockCmdQ = std::make_unique>(pContext, pDevice, nullptr); auto onKernelSubmitFnc = [](command_buffer_handle_t cb, uint64_t kernelId, uint32_t *entryOffset, resource_handle_t *resource) { return; }; gtpinCallbacks.onContextCreate = OnContextCreate; gtpinCallbacks.onContextDestroy = OnContextDestroy; gtpinCallbacks.onKernelCreate = OnKernelCreate; gtpinCallbacks.onKernelSubmit = onKernelSubmitFnc; gtpinCallbacks.onCommandBufferCreate = OnCommandBufferCreate; gtpinCallbacks.onCommandBufferComplete = OnCommandBufferComplete; retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr; mockCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 128, ih1); mockCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 128, ih2); mockCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, 128, ih3); PreemptionMode preemptionMode = pDevice->getPreemptionMode(); auto cmdStream = new LinearStream(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({pDevice->getRootDeviceIndex(), 128, GraphicsAllocation::AllocationType::COMMAND_BUFFER})); std::vector surfaces; auto kernelOperation = std::make_unique(cmdStream, *mockCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); MockKernelWithInternals kernel(*pDevice); kernel.kernelInfo.usesSsh = true; kernelOperation->setHeaps(ih1, ih2, ih3); bool flushDC = false; bool slmUsed = false; bool ndRangeKernel = false; gtpinNotifyKernelSubmit(kernel, mockCmdQ.get()); std::unique_ptr command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1)); CompletionStamp stamp = command->submit(20, false); ASSERT_EQ(1u, kernelExecQueue.size()); EXPECT_TRUE(kernelExecQueue[0].isTaskCountValid); EXPECT_EQ(kernelExecQueue[0].taskCount, stamp.taskCount); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/helpers/000077500000000000000000000000001363734646600232745ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/helpers/CMakeLists.txt000066400000000000000000000062721363734646600260430ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_helpers ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/array_count_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/base_object_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/base_object_tests_mt.cpp ${CMAKE_CURRENT_SOURCE_DIR}/basic_math_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/bit_helpers_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_helper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmd_buffer_validator.h ${CMAKE_CURRENT_SOURCE_DIR}/cmd_buffer_validator_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/debug_helpers_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/deferred_deleter_helpers_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/dirty_state_helpers_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/dispatch_flags_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/dispatch_info_builder_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/dispatch_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/extendable_enum_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/flush_stamp_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/get_gpgpu_engines_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/get_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/get_info_status_mapper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gtest_helpers.h ${CMAKE_CURRENT_SOURCE_DIR}/hardware_commands_helper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hardware_commands_helper_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_default_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/hw_parse.h ${CMAKE_CURRENT_SOURCE_DIR}/hw_parse.inl ${CMAKE_CURRENT_SOURCE_DIR}/kernel_filename_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/kmd_notify_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mem_properties_parser_helper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory_management_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory_properties_flags_helpers_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mipmap_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/per_thread_data_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ptr_math_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/queue_helpers_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/raii_hw_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/sampler_helpers_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/task_information_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_debug_variables.inl ${CMAKE_CURRENT_SOURCE_DIR}/timestamp_packet_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/transfer_properties_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/uint16_sse4_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ult_limits.h ${CMAKE_CURRENT_SOURCE_DIR}/unit_test_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/unit_test_helper.inl ${CMAKE_CURRENT_SOURCE_DIR}/validator_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/variable_backup.h ${NEO_SHARED_TEST_DIRECTORY}/unit_test/helpers/aligned_memory_tests.cpp ${NEO_SHARED_TEST_DIRECTORY}/unit_test/helpers/debug_manager_state_restore.h ) get_property(NEO_CORE_PREAMBLE_TESTS GLOBAL PROPERTY NEO_CORE_PREAMBLE_TESTS) get_property(NEO_CORE_HELPERS_TESTS GLOBAL PROPERTY NEO_CORE_HELPERS_TESTS) list(APPEND IGDRCL_SRCS_tests_helpers ${NEO_CORE_HELPERS_TESTS} ${NEO_CORE_PREAMBLE_TESTS} ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_helpers}) add_subdirectories() compute-runtime-20.13.16352/opencl/test/unit_test/helpers/array_count_tests.cpp000066400000000000000000000007511363734646600275530ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/array_count.h" #include "gtest/gtest.h" namespace NEO { TEST(ArrayCountTests, WhenGettingArrayCountThenCorrectCountIsReturned) { int a[10]; EXPECT_EQ(10u, arrayCount(a)); } TEST(ArrayCountTests, WhenGettingInRangeThenOnlyIndexInRangeReturnsTrue) { int a[10]; EXPECT_TRUE(isInRange(1, a)); EXPECT_FALSE(isInRange(10, a)); } } // namespace NEOcompute-runtime-20.13.16352/opencl/test/unit_test/helpers/base_object_tests.cpp000066400000000000000000000265561363734646600275000ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/accelerators/intel_accelerator.h" #include "opencl/source/api/api.h" #include "opencl/source/api/cl_types.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/device_queue/device_queue.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/mem_obj.h" #include "opencl/source/platform/platform.h" #include "opencl/source/program/program.h" #include "opencl/source/sampler/sampler.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gmock/gmock.h" namespace NEO { typedef struct _cl_object_for_test2 *cl_object_for_test2; struct _cl_object_for_test2 : public ClDispatch { }; template <> struct OpenCLObjectMapper<_cl_object_for_test2> { typedef struct ObjectForTest2 DerivedType; }; template <> struct OpenCLObjectMapper { typedef _cl_object_for_test2 BaseType; }; struct ObjectForTest2 : public NEO::BaseObject<_cl_object_for_test2> { static const cl_ulong objectMagic = 0x13650a12b79ce4dfLL; }; template struct BaseObjectTests : public ::testing::Test { }; template class MockObjectBase : public OclObject { public: using OclObject::OclObject; void setInvalidMagic() { validMagic = this->magic; this->magic = 0x0101010101010101LL; } void setInvalidIcdDispath() { this->dispatch.icdDispatch = reinterpret_cast(this); } void setValidMagic() { this->magic = validMagic; } bool isObjectValid() const { return this->isValid(); } cl_ulong validMagic; }; template class MockObject : public MockObjectBase {}; template <> class MockObject : public MockObjectBase { public: void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly) override {} }; template <> class MockObject : public MockObjectBase { public: MockObject() : MockObjectBase(*new ExecutionEnvironment()), executionEnvironment(&this->peekExecutionEnvironment()) {} private: std::unique_ptr executionEnvironment; }; typedef ::testing::Types< MockPlatform, IntelAccelerator, //Context, //Program, //Kernel, //Sampler //others... MockCommandQueue, DeviceQueue> BaseObjectTypes; typedef ::testing::Types< MockPlatform, IntelAccelerator, Context, Program, Buffer, MockCommandQueue, DeviceQueue> BaseObjectTypesForCastInvalidMagicTest; TYPED_TEST_CASE(BaseObjectTests, BaseObjectTypes); // "typedef" BaseObjectTests template to use with different TypeParams for testing template using BaseObjectWithDefaultCtorTests = BaseObjectTests; TYPED_TEST_CASE(BaseObjectWithDefaultCtorTests, BaseObjectTypesForCastInvalidMagicTest); TYPED_TEST(BaseObjectWithDefaultCtorTests, castToObjectWithInvalidMagicReturnsNullptr) { MockObject *object = new MockObject; EXPECT_TRUE(object->isObjectValid()); object->setInvalidMagic(); EXPECT_FALSE(object->isObjectValid()); auto objectCasted = castToObject(object); EXPECT_EQ(nullptr, objectCasted); object->setValidMagic(); delete object; } TYPED_TEST(BaseObjectWithDefaultCtorTests, whenCastToObjectWithInvalidIcdDispatchThenReturnsNullptr) { auto object = std::make_unique>(); object->setInvalidIcdDispath(); auto objectCasted = castToObject(object.get()); EXPECT_EQ(nullptr, objectCasted); } TYPED_TEST(BaseObjectTests, retain) { TypeParam *object = new TypeParam; object->retain(); EXPECT_EQ(2, object->getReference()); object->release(); EXPECT_EQ(1, object->getReference()); object->release(); // MemoryLeakListener will detect a leak // if release doesn't delete memory. } TYPED_TEST(BaseObjectTests, castToObjectFromNullptr) { typename TypeParam::BaseType *handle = nullptr; auto object = castToObject(handle); EXPECT_EQ(nullptr, object); } TYPED_TEST(BaseObjectTests, castToFromBaseTypeYieldsDerivedType) { TypeParam object; typename TypeParam::BaseType *baseObject = &object; auto objectNew = castToObject(baseObject); EXPECT_EQ(&object, objectNew); } TYPED_TEST(BaseObjectTests, castToSameTypeReturnsSameObject) { TypeParam object; auto objectNew = castToObject(&object); EXPECT_EQ(&object, objectNew); } TYPED_TEST(BaseObjectTests, castToDifferentTypeReturnsNullPtr) { TypeParam object; typename TypeParam::BaseType *baseObject = &object; auto notOriginalType = reinterpret_cast(baseObject); auto invalidObject = castToObject(notOriginalType); EXPECT_EQ(nullptr, invalidObject); } TYPED_TEST(BaseObjectTests, commonRuntimeExpectsDispatchTableAtFirstPointerInObject) { TypeParam objectDrv; // Automatic downcasting to _cl_type *. typename TypeParam::BaseType *objectCL = &objectDrv; sharingFactory.fillGlobalDispatchTable(); // Common runtime casts to generic type assuming // the dispatch table is the first ptr in the structure auto genericObject = reinterpret_cast(objectCL); EXPECT_EQ(globalDispatchTable.icdDispatch, genericObject->dispatch.icdDispatch); EXPECT_EQ(globalDispatchTable.crtDispatch, genericObject->dispatch.crtDispatch); EXPECT_EQ(reinterpret_cast(clGetKernelArgInfo), genericObject->dispatch.crtDispatch->clGetKernelArgInfo); EXPECT_EQ(reinterpret_cast(clGetImageParamsINTEL), genericObject->dispatch.crtDispatch->clGetImageParamsINTEL); EXPECT_EQ(reinterpret_cast(clCreateAcceleratorINTEL), genericObject->dispatch.crtDispatch->clCreateAcceleratorINTEL); EXPECT_EQ(reinterpret_cast(clGetAcceleratorInfoINTEL), genericObject->dispatch.crtDispatch->clGetAcceleratorInfoINTEL); EXPECT_EQ(reinterpret_cast(clRetainAcceleratorINTEL), genericObject->dispatch.crtDispatch->clRetainAcceleratorINTEL); EXPECT_EQ(reinterpret_cast(clReleaseAcceleratorINTEL), genericObject->dispatch.crtDispatch->clReleaseAcceleratorINTEL); EXPECT_EQ(reinterpret_cast(clCreatePerfCountersCommandQueueINTEL), genericObject->dispatch.crtDispatch->clCreatePerfCountersCommandQueueINTEL); EXPECT_EQ(reinterpret_cast(clSetPerformanceConfigurationINTEL), genericObject->dispatch.crtDispatch->clSetPerformanceConfigurationINTEL); // Check empty placeholder dispatch table entries are null EXPECT_EQ(nullptr, genericObject->dispatch.crtDispatch->placeholder12); EXPECT_EQ(nullptr, genericObject->dispatch.crtDispatch->placeholder13); EXPECT_EQ(nullptr, genericObject->dispatch.crtDispatch->placeholder18); EXPECT_EQ(nullptr, genericObject->dispatch.crtDispatch->placeholder19); EXPECT_EQ(nullptr, genericObject->dispatch.crtDispatch->placeholder20); EXPECT_EQ(nullptr, genericObject->dispatch.crtDispatch->placeholder21); } TEST(BaseObjectTests, commonRuntimeSetsSharedContextFlag) { MockContext newContext; //cast to cl_context cl_context clContext = &newContext; EXPECT_FALSE(newContext.isSharedContext); clContext->isSharedContext = true; EXPECT_TRUE(newContext.isSharedContext); } TYPED_TEST(BaseObjectTests, WhenAlreadyOwnedByThreadOnTakeOrReleaseOwnershipUsesRecursiveOwnageCounter) { TypeParam obj; EXPECT_FALSE(obj.hasOwnership()); obj.takeOwnership(); EXPECT_TRUE(obj.hasOwnership()); obj.takeOwnership(); EXPECT_TRUE(obj.hasOwnership()); obj.takeOwnership(); EXPECT_TRUE(obj.hasOwnership()); obj.releaseOwnership(); EXPECT_TRUE(obj.hasOwnership()); obj.releaseOwnership(); EXPECT_TRUE(obj.hasOwnership()); obj.releaseOwnership(); EXPECT_FALSE(obj.hasOwnership()); } TEST(CastToBuffer, fromMemObj) { MockContext context; auto buffer = BufferHelper<>::create(&context); MemObj *memObj = buffer; cl_mem clObj = buffer; EXPECT_EQ(buffer, castToObject(clObj)); EXPECT_EQ(memObj, castToObject(clObj)); EXPECT_EQ(nullptr, castToObject(clObj)); buffer->release(); } TEST(CastToImage, fromMemObj) { MockContext context; auto image = Image2dHelper<>::create(&context); MemObj *memObj = image; cl_mem clObj = image; EXPECT_EQ(image, castToObject(clObj)); EXPECT_EQ(memObj, castToObject(clObj)); EXPECT_EQ(nullptr, castToObject(clObj)); image->release(); } extern std::thread::id tempThreadID; class MockBuffer : public MockBufferStorage, public Buffer { public: MockBuffer() : MockBufferStorage(), Buffer(nullptr, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_USE_HOST_PTR, 0, 0), CL_MEM_USE_HOST_PTR, 0, sizeof(data), &data, &data, &mockGfxAllocation, true, false, false) { } void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly) override { } }; TEST(BaseObjectTest, takeOwnershipWrapper) { MockBuffer buffer; { TakeOwnershipWrapper bufferOwnership(buffer, false); EXPECT_FALSE(buffer.hasOwnership()); } { TakeOwnershipWrapper bufferOwnership(buffer, true); EXPECT_TRUE(buffer.hasOwnership()); bufferOwnership.unlock(); EXPECT_FALSE(buffer.hasOwnership()); } } TYPED_TEST(BaseObjectTests, getCond) { TypeParam *object = new TypeParam; EXPECT_EQ(0U, object->getCond().peekNumWaiters()); object->release(); } TYPED_TEST(BaseObjectTests, convertToInternalObject) { class ObjectForTest : public NEO::MemObj { public: ObjectForTest() : MemObj(nullptr, 0, {}, 0, 0, 0u, nullptr, nullptr, nullptr, false, false, false) { } void convertToInternalObject(void) { NEO::BaseObject<_cl_mem>::convertToInternalObject(); } }; ObjectForTest *object = new ObjectForTest; EXPECT_EQ(1, object->getRefApiCount()); EXPECT_EQ(1, object->getRefInternalCount()); object->convertToInternalObject(); EXPECT_EQ(0, object->getRefApiCount()); EXPECT_EQ(1, object->getRefInternalCount()); object->decRefInternal(); } TYPED_TEST(BaseObjectTests, castToObjectOrAbortFromNullptrAbort) { EXPECT_ANY_THROW(castToObjectOrAbort(nullptr)); } TYPED_TEST(BaseObjectTests, castToObjectOrAbortFromBaseTypeYieldsDerivedType) { TypeParam object; typename TypeParam::BaseType *baseObject = &object; auto objectNew = castToObjectOrAbort(baseObject); EXPECT_EQ(&object, objectNew); } TYPED_TEST(BaseObjectTests, castToOrAbortDifferentTypeAborts) { TypeParam object; typename TypeParam::BaseType *baseObject = &object; auto notOriginalType = reinterpret_cast(baseObject); EXPECT_ANY_THROW(castToObjectOrAbort(notOriginalType)); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/helpers/base_object_tests_mt.cpp000066400000000000000000000025611363734646600301660ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/accelerators/intel_accelerator.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "gtest/gtest.h" namespace NEO { TEST(BaseObjectTestsMt, givenObjectOwnershipForEachThreadWhenIncrementingNonAtomicValueThenNoDataRacesAreExpected) { MockCommandQueue *object = new MockCommandQueue; object->takeOwnership(); uint32_t counter = 0; const uint32_t loopCount = 50; const uint32_t numThreads = 3; auto incrementNonAtomicValue = [&](CommandQueue *obj) { for (uint32_t i = 0; i < loopCount; i++) { obj->takeOwnership(); counter++; obj->releaseOwnership(); } }; EXPECT_EQ(0U, object->getCond().peekNumWaiters()); std::thread t1(incrementNonAtomicValue, object); std::thread t2(incrementNonAtomicValue, object); std::thread t3(incrementNonAtomicValue, object); while (object->getCond().peekNumWaiters() != numThreads) { std::this_thread::yield(); } EXPECT_EQ(0u, counter); object->releaseOwnership(); t1.join(); t2.join(); t3.join(); EXPECT_EQ(loopCount * numThreads, counter); object->release(); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/helpers/basic_math_tests.cpp000066400000000000000000000144511363734646600273210ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/basic_math.h" #include "gtest/gtest.h" using namespace Math; TEST(NextPowerOfTwo, aFewCases) { EXPECT_EQ(1u, nextPowerOfTwo(1U)); EXPECT_EQ(2u, nextPowerOfTwo(2U)); EXPECT_EQ(4u, nextPowerOfTwo(3U)); EXPECT_EQ(32u, nextPowerOfTwo(31U)); EXPECT_EQ(32u, nextPowerOfTwo(32U)); EXPECT_EQ(64u, nextPowerOfTwo(33U)); EXPECT_EQ(1u << 31, nextPowerOfTwo((1u << 30U) + 1)); EXPECT_EQ(1u << 31, nextPowerOfTwo(1u << 31U)); EXPECT_EQ(1ULL << 32, nextPowerOfTwo(static_cast((1ULL << 31ULL) + 1))); EXPECT_EQ(1ULL << 32, nextPowerOfTwo(static_cast(1ULL << 32ULL))); } TEST(PrevPowerOfTwo, aroundPowers) { EXPECT_EQ(0u, prevPowerOfTwo(0U)); EXPECT_EQ(1u, prevPowerOfTwo(1U)); for (uint32_t i = 1; i < 32; i++) { uint32_t b = 1 << i; EXPECT_EQ(1u << (i - 1), prevPowerOfTwo(b - 1)); EXPECT_EQ(b, prevPowerOfTwo(b)); EXPECT_EQ(b, prevPowerOfTwo(b + 1)); } EXPECT_EQ(1ULL << 32, prevPowerOfTwo(static_cast(1ULL << 32ULL))); EXPECT_EQ(1ULL << 32, prevPowerOfTwo(static_cast((1ULL << 32ULL) + 7))); } TEST(getMinLsbSet, basicValues) { // clang-format off EXPECT_EQ(0u, getMinLsbSet(0x00000001u)); EXPECT_EQ(1u, getMinLsbSet(0x00000002u)); EXPECT_EQ(16u, getMinLsbSet(0x40010000u)); EXPECT_EQ(30u, getMinLsbSet(0x40000000u)); EXPECT_EQ(31u, getMinLsbSet(0x80000000u)); // clang-format on } TEST(getExponentWithLog2, zeroReturns32) { // clang-format off EXPECT_EQ(32u, log2((uint32_t)0u)); EXPECT_EQ(64u, log2((uint64_t)0u)); // clang-format on } TEST(getExponentWithLog2, basicValues32) { // clang-format off EXPECT_EQ(0u, log2((uint32_t)1u)); EXPECT_EQ(1u, log2((uint32_t)2u)); EXPECT_EQ(2u, log2((uint32_t)4u)); EXPECT_EQ(3u, log2((uint32_t)8u)); EXPECT_EQ(4u, log2((uint32_t)16u)); EXPECT_EQ(10u, log2((uint32_t)1024u)); EXPECT_EQ(31u, log2((uint32_t)2147483648u)); // clang-format on } TEST(getExponentWithLog2, basicValues64) { // clang-format off EXPECT_EQ(0u, log2((uint64_t)1u)); EXPECT_EQ(1u, log2((uint64_t)2u)); EXPECT_EQ(2u, log2((uint64_t)4u)); EXPECT_EQ(3u, log2((uint64_t)8u)); EXPECT_EQ(4u, log2((uint64_t)16u)); EXPECT_EQ(10u, log2((uint64_t)1024u)); EXPECT_EQ(31u, log2((uint64_t)2147483648u)); EXPECT_EQ(41u, log2((uint64_t)2199023255552u)); EXPECT_EQ(63u, log2((uint64_t)0x8000000000000000u)); // clang-format on } TEST(getExponentWithLog2, nonPowerOfToValues) { // clang-format off EXPECT_EQ(1u, log2(3u)); EXPECT_EQ(2u, log2(5u)); EXPECT_EQ(2u, log2(7u)); EXPECT_EQ(3u, log2(9u)); EXPECT_EQ(3u, log2(10u)); EXPECT_EQ(10u, log2(1025u)); // clang-format on } struct Float2HalfParams { float floatInput; uint16_t uintOutput; }; // clang-format off Float2HalfParams float2HalfParams[] = { {0.0f, 0x0000}, {1.0f, 0x3c00}, {PosInfinity.f, 0x7c00}, {Nan.f, 0x7e00}, {std::ldexp(1.0f, 16), 0x7bff}, {std::ldexp(1.0f, -25), 0x0000}, {std::ldexp(1.0f, -15), 0x0200}}; // clang-format on typedef ::testing::TestWithParam Float2HalfTest; TEST_P(Float2HalfTest, variousCases) { float floatValue = GetParam().floatInput; uint16_t uint16ofHalf = float2Half(floatValue); uint16_t uintOutput = GetParam().uintOutput; EXPECT_EQ(uintOutput, uint16ofHalf); } INSTANTIATE_TEST_CASE_P(Float2Half, Float2HalfTest, ::testing::ValuesIn(float2HalfParams)); struct l3Config { union { unsigned int RawValue; struct { unsigned int SlmModeEnable : 1; unsigned int UrbAllocation : 7; unsigned int GpGpuCreditEnable : 1; unsigned int ErrorDetectionBehaviour : 1; unsigned int Reserved : 1; unsigned int ReadOnlyClientPool : 7; unsigned int DCWayAssignement : 7; unsigned int AllL3WayAssignement : 7; } bits; }; }; TEST(l3configsGenerator, givenInputValuesWhenPassedToL3ConfigThenRawValueIsProduced) { l3Config config; config.bits = { 0, // SLM Enabled 0x30, // URB Allocation 1, // GPGPU L3 Credit Mode Enable 0, // Error Detection Behavior Control 0, // Reserved - MBZ 0, // Read-Only Client Pool 0, // DC-Way Assignment 0x30}; EXPECT_EQ(config.RawValue, 0x60000160u); l3Config config2; config2.RawValue = 0x80000140u; EXPECT_EQ(0x40u, config2.bits.AllL3WayAssignement); EXPECT_EQ(0x20u, config2.bits.UrbAllocation); } struct ElementCountsTestData { size_t x, y, z; size_t result; }; ElementCountsTestData elementCountInputData[] = { {1, 2, 3, 6}, {0, 0, 1, 1}, {0, 1, 0, 1}, {1, 0, 0, 1}, {5, 0, 10, 50}, {0, 0, 30, 30}, }; typedef ::testing::TestWithParam ComputeTotalElementsCount; TEST_P(ComputeTotalElementsCount, givenVariousInputVectorsWhenComputeTotalElementsCountIsUsedThenProperProductIsComputed) { Vec3 inputData(GetParam().x, GetParam().y, GetParam().z); EXPECT_EQ(GetParam().result, computeTotalElementsCount(inputData)); } TEST(isPow2Test, WhenArgZeroThenReturnFalse) { EXPECT_FALSE(isPow2(0u)); } TEST(isPow2Test, WhenArgNonPow2ThenReturnFalse) { EXPECT_FALSE(isPow2(3u)); EXPECT_FALSE(isPow2(5u)); EXPECT_FALSE(isPow2(6u)); EXPECT_FALSE(isPow2(7u)); EXPECT_FALSE(isPow2(10u)); } TEST(isPow2Test, WhenArgPow2ThenReturnTrue) { EXPECT_TRUE(isPow2(1u)); EXPECT_TRUE(isPow2(4u)); EXPECT_TRUE(isPow2(8u)); EXPECT_TRUE(isPow2(128u)); EXPECT_TRUE(isPow2(4096u)); } TEST(ffs, givenZeroReturnMaxRange) { EXPECT_EQ(std::numeric_limits::max(), ffs(0U)); } TEST(ffs, givenNonZeroReturnFirstSetBitIndex) { EXPECT_EQ(0U, ffs(0b1U)); EXPECT_EQ(0U, ffs(0b11U)); EXPECT_EQ(1U, ffs(0b10U)); EXPECT_EQ(3U, ffs(0b1001000U)); EXPECT_EQ(31U, ffs(1U << 31U)); EXPECT_EQ(16U, ffs((1U << 31U) | (1U << 31U) | (1U << 16U))); EXPECT_EQ(16ULL, ffs((1ULL << 63ULL) | (1ULL << 32ULL) | (1ULL << 16ULL))); EXPECT_EQ(63ULL, ffs(1ULL << 63ULL)); } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/bit_helpers_tests.cpp000066400000000000000000000156221363734646600275300ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/bit_helpers.h" #include "gtest/gtest.h" using namespace NEO; TEST(IsBitSetTests, givenDifferentValuesWhenTestingIsBitSetThenCorrectValueIsReturned) { size_t field1 = 0; size_t field2 = 0b1; size_t field3 = 0b1000; size_t field4 = 0b1010; EXPECT_FALSE(isBitSet(field1, 0)); EXPECT_FALSE(isBitSet(field1, 1)); EXPECT_FALSE(isBitSet(field1, 2)); EXPECT_FALSE(isBitSet(field1, 3)); EXPECT_TRUE(isBitSet(field2, 0)); EXPECT_FALSE(isBitSet(field2, 1)); EXPECT_FALSE(isBitSet(field2, 2)); EXPECT_FALSE(isBitSet(field2, 3)); EXPECT_FALSE(isBitSet(field3, 0)); EXPECT_FALSE(isBitSet(field3, 1)); EXPECT_FALSE(isBitSet(field3, 2)); EXPECT_TRUE(isBitSet(field3, 3)); EXPECT_FALSE(isBitSet(field4, 0)); EXPECT_TRUE(isBitSet(field4, 1)); EXPECT_FALSE(isBitSet(field4, 2)); EXPECT_TRUE(isBitSet(field4, 3)); } TEST(IsAnyBitSetTests, givenDifferentValuesWhenTestingIsAnyBitSetThenCorrectValueIsReturned) { EXPECT_FALSE(isAnyBitSet(0, 0)); EXPECT_FALSE(isAnyBitSet(0, 0b1)); EXPECT_FALSE(isAnyBitSet(0, 0b10)); EXPECT_FALSE(isAnyBitSet(0, 0b1000)); EXPECT_FALSE(isAnyBitSet(0, 0b1010)); EXPECT_FALSE(isAnyBitSet(0, 0b1111)); EXPECT_FALSE(isAnyBitSet(0b1, 0)); EXPECT_TRUE(isAnyBitSet(0b1, 0b1)); EXPECT_FALSE(isAnyBitSet(0b1, 0b10)); EXPECT_FALSE(isAnyBitSet(0b1, 0b1000)); EXPECT_FALSE(isAnyBitSet(0b1, 0b1010)); EXPECT_TRUE(isAnyBitSet(0b1, 0b1111)); EXPECT_FALSE(isAnyBitSet(0b10, 0)); EXPECT_FALSE(isAnyBitSet(0b10, 0b1)); EXPECT_TRUE(isAnyBitSet(0b10, 0b10)); EXPECT_FALSE(isAnyBitSet(0b10, 0b1000)); EXPECT_TRUE(isAnyBitSet(0b10, 0b1010)); EXPECT_TRUE(isAnyBitSet(0b10, 0b1111)); EXPECT_FALSE(isAnyBitSet(0b1000, 0)); EXPECT_FALSE(isAnyBitSet(0b1000, 0b1)); EXPECT_FALSE(isAnyBitSet(0b1000, 0b10)); EXPECT_TRUE(isAnyBitSet(0b1000, 0b1000)); EXPECT_TRUE(isAnyBitSet(0b1000, 0b1010)); EXPECT_TRUE(isAnyBitSet(0b1000, 0b1111)); EXPECT_FALSE(isAnyBitSet(0b1010, 0)); EXPECT_FALSE(isAnyBitSet(0b1010, 0b1)); EXPECT_TRUE(isAnyBitSet(0b1010, 0b10)); EXPECT_TRUE(isAnyBitSet(0b1010, 0b1000)); EXPECT_TRUE(isAnyBitSet(0b1010, 0b1010)); EXPECT_TRUE(isAnyBitSet(0b1010, 0b1111)); EXPECT_FALSE(isAnyBitSet(0b1111, 0)); EXPECT_TRUE(isAnyBitSet(0b1111, 0b1)); EXPECT_TRUE(isAnyBitSet(0b1111, 0b10)); EXPECT_TRUE(isAnyBitSet(0b1111, 0b1000)); EXPECT_TRUE(isAnyBitSet(0b1111, 0b1010)); EXPECT_TRUE(isAnyBitSet(0b1111, 0b1111)); } TEST(IsValueSetTests, givenDifferentValuesWhenTestingIsValueSetThenCorrectValueIsReturned) { size_t field1 = 0; size_t field2 = 0b1; size_t field3 = 0b10; size_t field4 = 0b1000; size_t field5 = 0b1010; size_t field6 = 0b1111; EXPECT_FALSE(isValueSet(field1, field2)); EXPECT_FALSE(isValueSet(field1, field3)); EXPECT_FALSE(isValueSet(field1, field4)); EXPECT_FALSE(isValueSet(field1, field5)); EXPECT_FALSE(isValueSet(field1, field6)); EXPECT_TRUE(isValueSet(field2, field2)); EXPECT_FALSE(isValueSet(field2, field3)); EXPECT_FALSE(isValueSet(field2, field4)); EXPECT_FALSE(isValueSet(field2, field5)); EXPECT_FALSE(isValueSet(field2, field6)); EXPECT_FALSE(isValueSet(field3, field2)); EXPECT_TRUE(isValueSet(field3, field3)); EXPECT_FALSE(isValueSet(field3, field4)); EXPECT_FALSE(isValueSet(field3, field5)); EXPECT_FALSE(isValueSet(field3, field6)); EXPECT_FALSE(isValueSet(field4, field2)); EXPECT_FALSE(isValueSet(field4, field3)); EXPECT_TRUE(isValueSet(field4, field4)); EXPECT_FALSE(isValueSet(field4, field5)); EXPECT_FALSE(isValueSet(field4, field6)); EXPECT_FALSE(isValueSet(field5, field2)); EXPECT_TRUE(isValueSet(field5, field3)); EXPECT_TRUE(isValueSet(field5, field4)); EXPECT_TRUE(isValueSet(field5, field5)); EXPECT_FALSE(isValueSet(field5, field6)); EXPECT_TRUE(isValueSet(field6, field2)); EXPECT_TRUE(isValueSet(field6, field3)); EXPECT_TRUE(isValueSet(field6, field4)); EXPECT_TRUE(isValueSet(field6, field5)); EXPECT_TRUE(isValueSet(field6, field6)); } TEST(IsFieldValidTests, givenDifferentValuesWhenTestingIsFieldValidThenCorrectValueIsReturned) { size_t field1 = 0; size_t field2 = 0b1; size_t field3 = 0b10; size_t field4 = 0b1000; size_t field5 = 0b1010; size_t field6 = 0b1111; EXPECT_TRUE(isFieldValid(field1, field1)); EXPECT_TRUE(isFieldValid(field1, field2)); EXPECT_TRUE(isFieldValid(field1, field3)); EXPECT_TRUE(isFieldValid(field1, field4)); EXPECT_TRUE(isFieldValid(field1, field5)); EXPECT_TRUE(isFieldValid(field1, field6)); EXPECT_FALSE(isFieldValid(field2, field1)); EXPECT_TRUE(isFieldValid(field2, field2)); EXPECT_FALSE(isFieldValid(field2, field3)); EXPECT_FALSE(isFieldValid(field2, field4)); EXPECT_FALSE(isFieldValid(field2, field5)); EXPECT_TRUE(isFieldValid(field2, field6)); EXPECT_FALSE(isFieldValid(field3, field1)); EXPECT_FALSE(isFieldValid(field3, field2)); EXPECT_TRUE(isFieldValid(field3, field3)); EXPECT_FALSE(isFieldValid(field3, field4)); EXPECT_TRUE(isFieldValid(field3, field5)); EXPECT_TRUE(isFieldValid(field3, field6)); EXPECT_FALSE(isFieldValid(field4, field1)); EXPECT_FALSE(isFieldValid(field4, field2)); EXPECT_FALSE(isFieldValid(field4, field3)); EXPECT_TRUE(isFieldValid(field4, field4)); EXPECT_TRUE(isFieldValid(field4, field5)); EXPECT_TRUE(isFieldValid(field4, field6)); EXPECT_FALSE(isFieldValid(field5, field1)); EXPECT_FALSE(isFieldValid(field5, field2)); EXPECT_FALSE(isFieldValid(field5, field3)); EXPECT_FALSE(isFieldValid(field5, field4)); EXPECT_TRUE(isFieldValid(field5, field5)); EXPECT_TRUE(isFieldValid(field5, field6)); EXPECT_FALSE(isFieldValid(field6, field1)); EXPECT_FALSE(isFieldValid(field6, field2)); EXPECT_FALSE(isFieldValid(field6, field3)); EXPECT_FALSE(isFieldValid(field6, field4)); EXPECT_FALSE(isFieldValid(field6, field5)); EXPECT_TRUE(isFieldValid(field6, field6)); } TEST(SetBitsTests, givenDifferentValuesWhenTestingSetBitsThenCorrectValueIsReturned) { EXPECT_EQ(0b0u, setBits(0b0, false, 0b0)); EXPECT_EQ(0b0u, setBits(0b0, false, 0b1)); EXPECT_EQ(0b1u, setBits(0b1, false, 0b0)); EXPECT_EQ(0b0u, setBits(0b1, false, 0b1)); EXPECT_EQ(0b0u, setBits(0b0, true, 0b0)); EXPECT_EQ(0b1u, setBits(0b0, true, 0b1)); EXPECT_EQ(0b1u, setBits(0b1, true, 0b0)); EXPECT_EQ(0b1u, setBits(0b1, true, 0b1)); EXPECT_EQ(0b1010u, setBits(0b1010, false, 0b101)); EXPECT_EQ(0b1111u, setBits(0b1010, true, 0b101)); EXPECT_EQ(0b101u, setBits(0b101, false, 0b1010)); EXPECT_EQ(0b1111u, setBits(0b101, true, 0b1010)); EXPECT_EQ(0b0u, setBits(0b1010, false, 0b1010)); EXPECT_EQ(0b1010u, setBits(0b1010, true, 0b1010)); } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/built_ins_helper.cpp000066400000000000000000000025361363734646600273350ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/built_ins_helper.h" #include "shared/source/device/device.h" #include "opencl/source/compiler_interface/default_cl_cache_config.h" #include "opencl/test/unit_test/mocks/mock_builtins.h" #include "opencl/test/unit_test/mocks/mock_compilers.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/mocks/mock_sip.h" namespace NEO { namespace MockSipData { std::unique_ptr mockSipKernel; SipKernelType calledType = SipKernelType::COUNT; bool called = false; } // namespace MockSipData const SipKernel &initSipKernel(SipKernelType type, Device &device) { MockSipData::calledType = type; MockSipData::mockSipKernel->type = type; MockSipData::called = true; return *MockSipData::mockSipKernel; } Program *createProgramForSip(ExecutionEnvironment &executionEnvironment, Context *context, std::vector &binary, size_t size, cl_int *errcodeRet, Device *device) { GlobalMockSipProgram::sipProgram->incRefApi(); GlobalMockSipProgram::sipProgram->resetAllocationState(); return GlobalMockSipProgram::sipProgram; } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/helpers/cl_helper_tests.cpp000066400000000000000000000055221363734646600271630ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/cl_helper.h" #include "gtest/gtest.h" #include TEST(ClHelper, whenCallGetStringWithCmdTypeFunctionThenGetProperCmdTypeAsString) { std::array expected = {{"CL_COMMAND_NDRANGE_KERNEL", "CL_COMMAND_TASK", "CL_COMMAND_NATIVE_KERNEL", "CL_COMMAND_READ_BUFFER", "CL_COMMAND_WRITE_BUFFER", "CL_COMMAND_COPY_BUFFER", "CL_COMMAND_READ_IMAGE", "CL_COMMAND_WRITE_IMAGE", "CL_COMMAND_COPY_IMAGE", "CL_COMMAND_COPY_IMAGE_TO_BUFFER", "CL_COMMAND_COPY_BUFFER_TO_IMAGE", "CL_COMMAND_MAP_BUFFER", "CL_COMMAND_MAP_IMAGE", "CL_COMMAND_UNMAP_MEM_OBJECT", "CL_COMMAND_MARKER", "CL_COMMAND_ACQUIRE_GL_OBJECTS", "CL_COMMAND_RELEASE_GL_OBJECTS", "CL_COMMAND_READ_BUFFER_RECT", "CL_COMMAND_WRITE_BUFFER_RECT", "CL_COMMAND_COPY_BUFFER_RECT", "CL_COMMAND_USER", "CL_COMMAND_BARRIER", "CL_COMMAND_MIGRATE_MEM_OBJECTS", "CL_COMMAND_FILL_BUFFER", "CL_COMMAND_FILL_IMAGE", "CL_COMMAND_SVM_FREE", "CL_COMMAND_SVM_MEMCPY", "CL_COMMAND_SVM_MEMFILL", "CL_COMMAND_SVM_MAP", "CL_COMMAND_SVM_UNMAP"}}; for (int i = CL_COMMAND_NDRANGE_KERNEL; i <= CL_COMMAND_SVM_UNMAP; i++) { EXPECT_STREQ(expected[i - CL_COMMAND_NDRANGE_KERNEL].c_str(), NEO::cmdTypetoString(i).c_str()); } std::stringstream stream; stream << "CMD_UNKNOWN:" << (cl_command_type)-1; EXPECT_STREQ(stream.str().c_str(), NEO::cmdTypetoString(-1).c_str()); EXPECT_STREQ("CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR", NEO::cmdTypetoString(CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR).c_str()); }compute-runtime-20.13.16352/opencl/test/unit_test/helpers/cmd_buffer_validator.h000066400000000000000000000333241363734646600276130ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/utilities/stackvec.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include #include #include namespace NEO { struct CmdValidator { CmdValidator() { } virtual ~CmdValidator() = default; virtual bool operator()(GenCmdList::iterator it, size_t numInSection, const std::string &member, std::string &outFailReason) = 0; }; template struct CmdValidatorWithStaticStorage : CmdValidator { static ChildT *get() { static ChildT val; return &val; } }; template struct GenericCmdValidator : CmdValidatorWithStaticStorage> { bool operator()(GenCmdList::iterator it, size_t numInSection, const std::string &member, std::string &outFailReason) override { auto cmd = genCmdCast(*it); UNRECOVERABLE_IF(cmd == nullptr); if (Expected != (cmd->*Getter)()) { outFailReason = member + " - expected: " + std::to_string(Expected) + ", got: " + std::to_string((cmd->*Getter)()); return false; } return true; } }; struct NamedValidator { NamedValidator(CmdValidator *validator) : NamedValidator(validator, "Unspecified") { } NamedValidator(CmdValidator *validator, const char *name) : validator(validator), name(name) { } CmdValidator *validator; const char *name; }; #define EXPECT_MEMBER(TYPE, FUNC, EXPECTED) \ NamedValidator { GenericCmdValidatorFUNC)()), &TYPE::FUNC, EXPECTED>::get(), #FUNC } using Expects = std::vector; struct MatchCmd { MatchCmd(int amount, bool matchesAny) : amount(amount), matchesAny(matchesAny) { } MatchCmd(int amount) : MatchCmd(amount, false) { } virtual ~MatchCmd() = default; virtual bool matches(GenCmdList::iterator it) const = 0; virtual bool validates(GenCmdList::iterator it, std::string &outReason) const = 0; virtual const char *getName() const = 0; virtual void capture(GenCmdList::iterator it) = 0; int getExpectedCount() const { return amount; } bool getMatchesAny() const { return matchesAny; } protected: int amount = 0; bool matchesAny = false; }; constexpr int32_t AnyNumber = -1; constexpr int32_t AtLeastOne = -2; inline std::string countToString(int32_t count) { if (count == AnyNumber) { return "AnyNumber"; } else if (count == AtLeastOne) { return "AtLeastOne"; } else { return std::to_string(count); } } inline bool notPreciseNumber(int32_t count) { return (count == AnyNumber) || (count == AtLeastOne); } struct MatchAnyCmd : MatchCmd { MatchAnyCmd(int amount) : MatchCmd(amount, true) { if (amount > 0) { captured.reserve(amount); } } bool matches(GenCmdList::iterator it) const override { return true; } bool validates(GenCmdList::iterator it, std::string &outReason) const override { return true; } void capture(GenCmdList::iterator it) override { captured.push_back(*it); } const char *getName() const override { return "AnyCommand"; } protected: StackVec captured; }; template struct MatchHwCmd : MatchCmd { MatchHwCmd(int amount) : MatchCmd(amount) { if (amount > 0) { captured.reserve(amount); } } MatchHwCmd(int amount, Expects &&validators) : MatchHwCmd(amount) { this->validators.swap(validators); } bool matches(GenCmdList::iterator it) const override { return nullptr != genCmdCast(*it); } bool validates(GenCmdList::iterator it, std::string &outReason) const override { for (auto &v : validators) { if (false == (*v.validator)(it, captured.size(), v.name, outReason)) { return false; } } return true; } void capture(GenCmdList::iterator it) override { UNRECOVERABLE_IF(false == matches(it)); UNRECOVERABLE_IF(captured.size() == static_cast(amount)); captured.push_back(genCmdCast(*it)); } const char *getName() const override { CmdType cmd; cmd.init(); return HardwareParse::getCommandName(&cmd); } protected: StackVec captured; Expects validators; }; template inline bool expectCmdBuff(GenCmdList::iterator begin, GenCmdList::iterator end, std::vector &&expectedCmdBuffMatchers, std::string *outReason = nullptr) { if (expectedCmdBuffMatchers.size() == 0) { return begin == end; } bool failed = false; std::string failReason; auto it = begin; int cmdNum = 0; size_t currentMatcher = 0; int currentMatcherCount = 0; StackVec, 32> matchedCommandNames; auto matchedCommandsString = [&]() -> std::string { if (matchedCommandNames.size() == 0) { return "EMPTY"; } std::string ret = ""; for (size_t i = 0; i < matchedCommandNames.size(); ++i) { if (matchedCommandNames[i].second) { ret += std::to_string(i) + ":ANY(" + matchedCommandNames[i].first + ") "; } else { ret += std::to_string(i) + ":" + matchedCommandNames[i].first + " "; } } return ret; }; while (it != end) { if (currentMatcher < expectedCmdBuffMatchers.size()) { auto currentMatcherExpectedCount = expectedCmdBuffMatchers[currentMatcher]->getExpectedCount(); if (expectedCmdBuffMatchers[currentMatcher]->getMatchesAny() && ((currentMatcherExpectedCount == AnyNumber) || ((currentMatcherExpectedCount == AtLeastOne) && (currentMatcherCount > 0)))) { if (expectedCmdBuffMatchers.size() > currentMatcher + 1) { // eat as many as possible but proceed to next matcher when possible if (expectedCmdBuffMatchers[currentMatcher + 1]->matches(it)) { ++currentMatcher; currentMatcherCount = 0; } } } else if ((notPreciseNumber(expectedCmdBuffMatchers[currentMatcher]->getExpectedCount())) && (false == expectedCmdBuffMatchers[currentMatcher]->matches(it))) { // proceed to next matcher if not matched if ((expectedCmdBuffMatchers[currentMatcher]->getExpectedCount() == AtLeastOne) && (currentMatcherCount < 1)) { failed = true; failReason = "Unmatched cmd#" + std::to_string(cmdNum) + ":" + HardwareParse::getCommandName(*it) + " - expected " + std::string(expectedCmdBuffMatchers[currentMatcher]->getName()) + "(" + countToString(expectedCmdBuffMatchers[currentMatcher]->getExpectedCount()) + " - " + std::to_string(currentMatcherCount) + ") after : " + matchedCommandsString(); break; } ++currentMatcher; currentMatcherCount = 0; } while ((currentMatcher < expectedCmdBuffMatchers.size()) && expectedCmdBuffMatchers[currentMatcher]->getExpectedCount() == 0) { if (expectedCmdBuffMatchers[currentMatcher]->matches(it)) { failed = true; failReason = "Unmatched cmd#" + std::to_string(cmdNum) + " - expected anything but " + std::string(expectedCmdBuffMatchers[currentMatcher]->getName()) + "(" + countToString(expectedCmdBuffMatchers[currentMatcher]->getExpectedCount()) + " - " + std::to_string(currentMatcherCount) + ") after : " + matchedCommandsString(); break; } ++currentMatcher; currentMatcherCount = 0; } } if (currentMatcher >= expectedCmdBuffMatchers.size()) { failed = true; std::string unmatchedCommands; while (it != end) { unmatchedCommands += std::to_string(cmdNum) + ":" + HardwareParse::getCommandName(*it) + " "; ++it; ++cmdNum; } failReason = "Unexpected commands at the end of the command buffer : " + unmatchedCommands + ", AFTER : " + matchedCommandsString(); break; } if (false == expectedCmdBuffMatchers[currentMatcher]->matches(it)) { failed = true; failReason = "Unmatched cmd#" + std::to_string(cmdNum) + ":" + HardwareParse::getCommandName(*it) + " - expected " + std::string(expectedCmdBuffMatchers[currentMatcher]->getName()) + "(" + countToString(expectedCmdBuffMatchers[currentMatcher]->getExpectedCount()) + " - " + std::to_string(currentMatcherCount) + ") after : " + matchedCommandsString(); break; } if (false == expectedCmdBuffMatchers[currentMatcher]->validates(it, failReason)) { failReason = "cmd#" + std::to_string(cmdNum) + " (" + HardwareParse::getCommandName(*it) + ") failed validation - reason : " + failReason + " after : " + matchedCommandsString(); failed = true; break; } matchedCommandNames.push_back(std::make_pair(HardwareParse::getCommandName(*it), expectedCmdBuffMatchers[currentMatcher]->getMatchesAny())); ++currentMatcherCount; if (currentMatcherCount == expectedCmdBuffMatchers[currentMatcher]->getExpectedCount()) { ++currentMatcher; currentMatcherCount = 0; } ++cmdNum; ++it; } if (failed == false) { while ((currentMatcher < expectedCmdBuffMatchers.size()) && ((expectedCmdBuffMatchers[currentMatcher]->getExpectedCount() == 0) || (expectedCmdBuffMatchers[currentMatcher]->getExpectedCount() == AnyNumber))) { ++currentMatcher; currentMatcherCount = 0; } if (currentMatcher == expectedCmdBuffMatchers.size()) { // no more matchers } else if (currentMatcher + 1 == expectedCmdBuffMatchers.size()) { // last matcher auto currentMatcherExpectedCount = expectedCmdBuffMatchers[currentMatcher]->getExpectedCount(); if ((currentMatcherExpectedCount == AtLeastOne) && (currentMatcherCount < 1)) { failReason = "Unexpected command buffer end at cmd#" + std::to_string(cmdNum) + " - expected " + expectedCmdBuffMatchers[currentMatcher]->getName() + "(" + countToString(currentMatcherExpectedCount) + " - " + std::to_string(currentMatcherCount) + ") after : " + matchedCommandsString(); failed = true; } if ((false == notPreciseNumber(currentMatcherExpectedCount)) && (currentMatcherExpectedCount != currentMatcherCount)) { failReason = "Unexpected command buffer end at cmd#" + std::to_string(cmdNum) + " - expected " + expectedCmdBuffMatchers[currentMatcher]->getName() + "(" + countToString(currentMatcherExpectedCount) + " - " + std::to_string(currentMatcherCount) + ") after : " + matchedCommandsString(); failed = true; } } else { // many matchers left std::string expectedMatchers = ""; int32_t currentMatcherExpectedCount = expectedCmdBuffMatchers[currentMatcher]->getExpectedCount(); expectedMatchers = expectedCmdBuffMatchers[currentMatcher]->getName() + std::string("(") + countToString(currentMatcherExpectedCount) + " - " + std::to_string(currentMatcherCount) + "), "; ++currentMatcher; while (currentMatcher < expectedCmdBuffMatchers.size()) { currentMatcherExpectedCount = expectedCmdBuffMatchers[currentMatcher]->getExpectedCount(); expectedMatchers += expectedCmdBuffMatchers[currentMatcher]->getName() + std::string("(") + countToString(currentMatcherExpectedCount) + " - 0), "; ++currentMatcher; } failReason = "Unexpected command buffer end at cmd#" + std::to_string(cmdNum) + " - expected " + expectedMatchers + " after : " + matchedCommandsString(); failed = true; } } else { if ((it != end) && (++it != end)) { ++cmdNum; failReason += "\n Unconsumed commands after failed one : "; while (it != end) { failReason += std::to_string(cmdNum) + ":" + HardwareParse::getCommandName(*it) + " "; ++cmdNum; ++it; } } } if (failed) { if (outReason != nullptr) { failReason += "\n Note : Input command buffer was : "; it = begin; cmdNum = 0; while (it != end) { failReason += std::to_string(cmdNum) + ":" + HardwareParse::getCommandName(*it) + " "; ++cmdNum; ++it; } *outReason = failReason; } } for (auto *matcher : expectedCmdBuffMatchers) { delete matcher; } return (failed == false); } template inline bool expectCmdBuff(NEO::LinearStream &commandStream, size_t startOffset, std::vector &&expectedCmdBuffMatchers, std::string *outReason = nullptr) { HardwareParse hwParser; hwParser.parseCommands(commandStream, startOffset); return expectCmdBuff(hwParser.cmdList.begin(), hwParser.cmdList.end(), std::move(expectedCmdBuffMatchers), outReason); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/helpers/cmd_buffer_validator_tests.cpp000066400000000000000000000357501363734646600313750ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/helpers/cmd_buffer_validator.h" #include "test.h" #include "hw_parse.h" using HwParseTest = ::testing::Test; using namespace NEO; HWTEST_F(HwParseTest, WhenEmptyBufferThenDontExpectCommands) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; bool cmdBuffOk = false; GenCmdList::iterator beg, end; end = beg; cmdBuffOk = expectCmdBuff(beg, end, std::vector{}); EXPECT_TRUE(cmdBuffOk); cmdBuffOk = expectCmdBuff(beg, end, std::vector{ new MatchHwCmd(0), }); EXPECT_TRUE(cmdBuffOk); cmdBuffOk = expectCmdBuff(beg, end, std::vector{ new MatchHwCmd(AnyNumber), }); EXPECT_TRUE(cmdBuffOk); cmdBuffOk = expectCmdBuff(beg, end, std::vector{ new MatchHwCmd(AtLeastOne), }); EXPECT_FALSE(cmdBuffOk); cmdBuffOk = expectCmdBuff(beg, end, std::vector{ new MatchHwCmd(1), }); EXPECT_FALSE(cmdBuffOk); } HWTEST_F(HwParseTest, WhenExpectingAnyCommandThenAllCommandsAreValidAsLongAsTheCountMatches) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; bool cmdBuffOk = false; char buffer[8192]; LinearStream stream{alignUp(buffer, 4096), 4096}; *stream.getSpaceForCmd() = FamilyType::cmdInitPipeControl; *stream.getSpaceForCmd() = FamilyType::cmdInitStateBaseAddress; *stream.getSpaceForCmd() = FamilyType::cmdInitPipeControl; cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchAnyCmd(1), new MatchAnyCmd(1), new MatchAnyCmd(1)}); EXPECT_TRUE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchAnyCmd(AtLeastOne), }); EXPECT_TRUE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchAnyCmd(AnyNumber), }); EXPECT_TRUE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchAnyCmd(1), new MatchAnyCmd(1), new MatchAnyCmd(1), new MatchAnyCmd(1)}); EXPECT_FALSE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchAnyCmd(1), new MatchAnyCmd(1), new MatchAnyCmd(1), new MatchAnyCmd(AtLeastOne)}); EXPECT_FALSE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchAnyCmd(1), new MatchAnyCmd(1), new MatchAnyCmd(1), new MatchAnyCmd(AnyNumber)}); EXPECT_TRUE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchAnyCmd(AtLeastOne), new MatchAnyCmd(1)}); EXPECT_FALSE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchAnyCmd(AnyNumber), new MatchAnyCmd(1)}); EXPECT_FALSE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchAnyCmd(1), new MatchAnyCmd(1), }); EXPECT_FALSE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchAnyCmd(1)}); EXPECT_FALSE(cmdBuffOk); } HWTEST_F(HwParseTest, WhenExpectingSpecificSetOfCommandsThenNoOtherCommandBufferIsValid) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; bool cmdBuffOk = false; char buffer[8192]; LinearStream stream{alignUp(buffer, 4096), 4096}; *stream.getSpaceForCmd() = FamilyType::cmdInitPipeControl; *stream.getSpaceForCmd() = FamilyType::cmdInitStateBaseAddress; *stream.getSpaceForCmd() = FamilyType::cmdInitPipeControl; cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(1), new MatchHwCmd(1), new MatchHwCmd(1)}); EXPECT_TRUE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(1), new MatchHwCmd(1), new MatchHwCmd(1), new MatchHwCmd(1)}); EXPECT_FALSE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(1), new MatchHwCmd(1), }); EXPECT_FALSE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(1), new MatchHwCmd(1), new MatchHwCmd(1), new MatchHwCmd(1)}); EXPECT_FALSE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(1), new MatchHwCmd(1), new MatchHwCmd(1)}); EXPECT_FALSE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(1), new MatchHwCmd(1), new MatchHwCmd(1)}); EXPECT_FALSE(cmdBuffOk); } HWTEST_F(HwParseTest, WhenExpectingAnyNumberOfCommandsThenOnlyTypeOfCommandMatters) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; bool cmdBuffOk = false; char buffer[8192]; LinearStream stream{alignUp(buffer, 4096), 4096}; *stream.getSpaceForCmd() = FamilyType::cmdInitPipeControl; *stream.getSpaceForCmd() = FamilyType::cmdInitStateBaseAddress; *stream.getSpaceForCmd() = FamilyType::cmdInitStateBaseAddress; *stream.getSpaceForCmd() = FamilyType::cmdInitStateBaseAddress; *stream.getSpaceForCmd() = FamilyType::cmdInitStateBaseAddress; *stream.getSpaceForCmd() = FamilyType::cmdInitPipeControl; cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(1), new MatchHwCmd(1), new MatchHwCmd(1), new MatchHwCmd(1), new MatchHwCmd(1), new MatchHwCmd(1)}); EXPECT_TRUE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(1), new MatchHwCmd(1), new MatchHwCmd(1)}); EXPECT_FALSE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(1), new MatchHwCmd(AnyNumber), new MatchHwCmd(1)}); EXPECT_TRUE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(AnyNumber), new MatchHwCmd(AnyNumber), new MatchHwCmd(AnyNumber)}); EXPECT_TRUE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(AtLeastOne), new MatchHwCmd(AtLeastOne), new MatchHwCmd(AtLeastOne)}); EXPECT_TRUE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(AtLeastOne), new MatchHwCmd(AtLeastOne), new MatchHwCmd(AtLeastOne), new MatchHwCmd(AtLeastOne), new MatchHwCmd(AtLeastOne), }); EXPECT_FALSE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(AnyNumber), new MatchHwCmd(0), new MatchHwCmd(AnyNumber), new MatchHwCmd(AnyNumber), new MatchHwCmd(0)}); EXPECT_TRUE(cmdBuffOk); } HWTEST_F(HwParseTest, WhenCommandMemberValidatorFailsThenCommandBufferValidationFails) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; bool cmdBuffOk = false; char buffer[8192]; LinearStream stream{alignUp(buffer, 4096), 4096}; *stream.getSpaceForCmd() = FamilyType::cmdInitPipeControl; auto sba = stream.getSpaceForCmd(); *sba = FamilyType::cmdInitStateBaseAddress; sba->setGeneralStateBaseAddressModifyEnable(true); *stream.getSpaceForCmd() = FamilyType::cmdInitPipeControl; cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(AnyNumber), new MatchHwCmd(AnyNumber, Expects{EXPECT_MEMBER(STATE_BASE_ADDRESS, getGeneralStateBaseAddressModifyEnable, true)}), new MatchHwCmd(AnyNumber)}); EXPECT_TRUE(cmdBuffOk); cmdBuffOk = expectCmdBuff(stream, 0, std::vector{ new MatchHwCmd(AnyNumber), new MatchHwCmd(AnyNumber, Expects{EXPECT_MEMBER(STATE_BASE_ADDRESS, getGeneralStateBaseAddressModifyEnable, false)}), new MatchHwCmd(AnyNumber)}); EXPECT_FALSE(cmdBuffOk); } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/debug_helpers.cpp000066400000000000000000000007121363734646600266100ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/debug_helpers.h" #include #include namespace NEO { void debugBreak(int line, const char *file) { } void abortUnrecoverable(int line, const char *file) { std::string message = "Abort was called at " + std::to_string(line) + " line in file " + file; throw std::runtime_error(message); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/helpers/debug_helpers_tests.cpp000066400000000000000000000005361363734646600300360ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/debug_helpers.h" #include "gtest/gtest.h" using namespace NEO; TEST(debugBreak, whenDebugBreakCalledInTestThenNothingIsThrown) { DEBUG_BREAK_IF(!false); } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/deferred_deleter_helpers_tests.cpp000066400000000000000000000014221363734646600322270ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/deferred_deleter_helper.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "gtest/gtest.h" using namespace NEO; TEST(deferredDeleterHelper, DefferedDeleterIsDisabledWhenCheckIFDeferrDeleterIsEnabledThenCorrectValueReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableDeferredDeleter.set(false); EXPECT_FALSE(isDeferredDeleterEnabled()); } TEST(deferredDeleterHelper, DefferedDeleterIsEnabledWhenCheckIFDeferrDeleterIsEnabledThenCorrectValueReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableDeferredDeleter.set(true); EXPECT_TRUE(isDeferredDeleterEnabled()); }compute-runtime-20.13.16352/opencl/test/unit_test/helpers/dirty_state_helpers_tests.cpp000066400000000000000000000116741363734646600313100ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/dirty_state_helpers.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "gtest/gtest.h" #include namespace DirtyStateHelpers { using namespace NEO; size_t getSizeInPages(size_t sizeInBytes) { return (sizeInBytes + MemoryConstants::pageSize) / MemoryConstants::pageSize; } void *buffer = reinterpret_cast(123); constexpr size_t bufferSize = 456; struct HeapDirtyStateTests : ::testing::Test { struct MockHeapDirtyState : public HeapDirtyState { using HeapDirtyState::gpuBaseAddress; using HeapDirtyState::sizeInPages; }; void SetUp() override { stream.reset(new IndirectHeap(&heapAllocation)); ASSERT_EQ(heapAllocation.getUnderlyingBuffer(), stream->getCpuBase()); ASSERT_EQ(heapAllocation.getUnderlyingBufferSize(), stream->getMaxAvailableSpace()); } MockGraphicsAllocation heapAllocation = {buffer, bufferSize}; std::unique_ptr stream; MockHeapDirtyState mockHeapDirtyState; }; TEST_F(HeapDirtyStateTests, initialValues) { EXPECT_EQ(0u, mockHeapDirtyState.sizeInPages); EXPECT_EQ(0llu, mockHeapDirtyState.gpuBaseAddress); } TEST_F(HeapDirtyStateTests, givenInitializedObjectWhenUpdatedMultipleTimesThenSetValuesAndReturnDirtyOnce) { EXPECT_TRUE(mockHeapDirtyState.updateAndCheck(stream.get())); EXPECT_EQ(getSizeInPages(bufferSize), mockHeapDirtyState.sizeInPages); EXPECT_EQ(castToUint64(buffer), mockHeapDirtyState.gpuBaseAddress); EXPECT_FALSE(mockHeapDirtyState.updateAndCheck(stream.get())); EXPECT_EQ(getSizeInPages(bufferSize), mockHeapDirtyState.sizeInPages); EXPECT_EQ(castToUint64(buffer), mockHeapDirtyState.gpuBaseAddress); } TEST_F(HeapDirtyStateTests, givenNonDirtyObjectWhenAddressChangedThenReturnDirty) { EXPECT_TRUE(mockHeapDirtyState.updateAndCheck(stream.get())); auto newBuffer = ptrOffset(buffer, MemoryConstants::pageSize + 1); auto graphicsAllocation = stream->getGraphicsAllocation(); graphicsAllocation->setCpuPtrAndGpuAddress(newBuffer, castToUint64(newBuffer)); stream->replaceBuffer(newBuffer, bufferSize); EXPECT_TRUE(mockHeapDirtyState.updateAndCheck(stream.get())); EXPECT_EQ(1u, mockHeapDirtyState.sizeInPages); EXPECT_EQ(castToUint64(newBuffer), mockHeapDirtyState.gpuBaseAddress); } TEST_F(HeapDirtyStateTests, givenIndirectHeapWithoutGraphicsAllocationWhenUpdateAndCheckIsCalledThenSizeIsSetToZero) { IndirectHeap nullHeap(nullptr); EXPECT_TRUE(mockHeapDirtyState.updateAndCheck(stream.get())); EXPECT_NE(0llu, mockHeapDirtyState.sizeInPages); EXPECT_TRUE(mockHeapDirtyState.updateAndCheck(&nullHeap)); EXPECT_EQ(0llu, mockHeapDirtyState.sizeInPages); } TEST_F(HeapDirtyStateTests, givenNonDirtyObjectWhenSizeChangedThenReturnDirty) { EXPECT_TRUE(mockHeapDirtyState.updateAndCheck(stream.get())); auto newBufferSize = bufferSize + MemoryConstants::pageSize; stream->replaceBuffer(stream->getCpuBase(), newBufferSize); EXPECT_TRUE(mockHeapDirtyState.updateAndCheck(stream.get())); EXPECT_EQ(getSizeInPages(newBufferSize), mockHeapDirtyState.sizeInPages); EXPECT_EQ(castToUint64(buffer), mockHeapDirtyState.gpuBaseAddress); } TEST_F(HeapDirtyStateTests, givenNonDirtyObjectWhenSizeAndBufferChangedThenReturnDirty) { EXPECT_TRUE(mockHeapDirtyState.updateAndCheck(stream.get())); auto newBuffer = ptrOffset(buffer, 1); auto newBufferSize = bufferSize + MemoryConstants::pageSize; auto graphicsAllocation = stream->getGraphicsAllocation(); graphicsAllocation->setSize(newBufferSize); graphicsAllocation->setCpuPtrAndGpuAddress(newBuffer, castToUint64(newBuffer)); stream->replaceBuffer(stream->getCpuBase(), newBufferSize); EXPECT_TRUE(mockHeapDirtyState.updateAndCheck(stream.get())); EXPECT_EQ(getSizeInPages(newBufferSize), mockHeapDirtyState.sizeInPages); EXPECT_EQ(castToUint64(newBuffer), mockHeapDirtyState.gpuBaseAddress); } TEST(DirtyStateHelpers, givenDirtyStateHelperWhenTwoDifferentIndirectHeapsAreCheckedButWithTheSame4GBbaseThenStateIsNotDirty) { MockGraphicsAllocation firstHeapAllocation(reinterpret_cast(0x1234), 4192); MockGraphicsAllocation secondHeapAllocation(reinterpret_cast(0x9345), 1234); uint64_t commonBase = 0x8123456; firstHeapAllocation.setGpuBaseAddress(commonBase); secondHeapAllocation.setGpuBaseAddress(commonBase); IndirectHeap firstHeap(&firstHeapAllocation, true); IndirectHeap secondHeap(&secondHeapAllocation, true); HeapDirtyState heapChecker; auto dirty = heapChecker.updateAndCheck(&firstHeap); EXPECT_TRUE(dirty); dirty = heapChecker.updateAndCheck(&secondHeap); EXPECT_FALSE(dirty); } } // namespace DirtyStateHelpers compute-runtime-20.13.16352/opencl/test/unit_test/helpers/dispatch_flags_helper.h000066400000000000000000000033261363734646600277630ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/csr_definitions.h" using namespace NEO; struct DispatchFlagsHelper { static DispatchFlags createDefaultDispatchFlags() { return DispatchFlags( {}, //csrDependencies nullptr, //barrierTimestampPacketNodes {}, //pipelineSelectArgs nullptr, //flushStampReference QueueThrottle::MEDIUM, //throttle PreemptionMode::Disabled, //preemptionMode GrfConfig::DefaultGrfNumber, //numGrfRequired L3CachingSettings::l3CacheOn, //l3CacheSettings ThreadArbitrationPolicy::NotPresent, //threadArbitrationPolicy QueueSliceCount::defaultSliceCount, //sliceCount false, //blocking false, //dcFlush false, //useSLM false, //guardCommandBufferWithPipeControl false, //gsba32BitRequired false, //requiresCoherency false, //lowPriority false, //implicitFlush false, //outOfOrderExecutionAllowed false, //epilogueRequired false //usePerDssBackedBuffer ); } }; compute-runtime-20.13.16352/opencl/test/unit_test/helpers/dispatch_info_builder_tests.cpp000066400000000000000000001347421363734646600315550ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/dispatch_info_builder.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" namespace NEO { using namespace SplitDispatch; class DispatchInfoBuilderFixture : public ContextFixture, public DeviceFixture { using ContextFixture::SetUp; public: DispatchInfoBuilderFixture() {} void clearCrossThreadData() { memset(pCrossThreadData, 0, sizeof(pCrossThreadData)); } protected: void SetUp() { DeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); pKernelInfo = std::make_unique(); pMediaVFEstate = new SPatchMediaVFEState(); pMediaVFEstate->PerThreadScratchSpace = 1024; pMediaVFEstate->ScratchSpaceOffset = 0; pExecutionEnvironment = new SPatchExecutionEnvironment(); pExecutionEnvironment->CompiledSIMD32 = 1; pExecutionEnvironment->LargestCompiledSIMDSize = 32; pExecutionEnvironment->NumGRFRequired = GrfConfig::DefaultGrfNumber; pPrintfSurface = new SPatchAllocateStatelessPrintfSurface(); pKernelInfo->patchInfo.mediavfestate = pMediaVFEstate; pKernelInfo->patchInfo.executionEnvironment = pExecutionEnvironment; pKernelInfo->patchInfo.pAllocateStatelessPrintfSurface = pPrintfSurface; KernelArgPatchInfo kernelArg1PatchInfo; KernelArgPatchInfo kernelArg2PatchInfo; KernelArgPatchInfo kernelArg3PatchInfo; pKernelInfo->kernelArgInfo.resize(3); pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArg1PatchInfo); pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = 0x10; pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size = (uint32_t)sizeof(void *); pKernelInfo->kernelArgInfo[1].kernelArgPatchInfoVector.push_back(kernelArg2PatchInfo); pKernelInfo->kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset = 0x30; pKernelInfo->kernelArgInfo[1].kernelArgPatchInfoVector[0].size = (uint32_t)sizeof(void *); pKernelInfo->kernelArgInfo[2].kernelArgPatchInfoVector.push_back(kernelArg3PatchInfo); pKernelInfo->kernelArgInfo[2].kernelArgPatchInfoVector[0].crossthreadOffset = 0x50; pKernelInfo->kernelArgInfo[2].kernelArgPatchInfoVector[0].size = (uint32_t)sizeof(void *); pProgram = new MockProgram(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); pKernel = new MockKernel(pProgram, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); pKernel->setKernelArgHandler(0, &Kernel::setArgBuffer); pKernel->slmTotalSize = 128; pKernel->isBuiltIn = true; } void TearDown() override { delete pKernel; delete pPrintfSurface; delete pExecutionEnvironment; delete pMediaVFEstate; delete pProgram; ContextFixture::TearDown(); DeviceFixture::TearDown(); } std::unique_ptr pKernelInfo; SPatchMediaVFEState *pMediaVFEstate = nullptr; SPatchExecutionEnvironment *pExecutionEnvironment; SPatchAllocateStatelessPrintfSurface *pPrintfSurface = nullptr; MockProgram *pProgram = nullptr; MockKernel *pKernel = nullptr; char pCrossThreadData[128]; }; typedef Test DispatchInfoBuilderTest; template class DispatchInfoBuilderMock : DispatchInfoBuilder { public: void pushSplit(const DispatchInfo &dispatchInfo, MultiDispatchInfo &outMdi) { DispatchInfoBuilder::pushSplit(dispatchInfo, outMdi); } }; TEST_F(DispatchInfoBuilderTest, setDispatchInfoNoDim) { MultiDispatchInfo multiDispatchInfo; DispatchInfoBuilderMock *diBuilder = new DispatchInfoBuilderMock(); ASSERT_NE(nullptr, diBuilder); DispatchInfo dispatchInfo; diBuilder->pushSplit(dispatchInfo, multiDispatchInfo); EXPECT_TRUE(multiDispatchInfo.empty()); delete diBuilder; } TEST_F(DispatchInfoBuilderTest, setDispatchInfoDim) { MultiDispatchInfo mdi1D, mdi2D, mdi3D; DispatchInfoBuilder *diBuilder1D = new DispatchInfoBuilder(); ASSERT_NE(nullptr, diBuilder1D); DispatchInfoBuilder *diBuilder2D = new DispatchInfoBuilder(); ASSERT_NE(nullptr, diBuilder2D); DispatchInfoBuilder *diBuilder3D = new DispatchInfoBuilder(); ASSERT_NE(nullptr, diBuilder3D); diBuilder1D->setDispatchGeometry(Vec3(1, 0, 0), Vec3(0, 0, 0), Vec3(0, 0, 0)); diBuilder1D->bake(mdi1D); for (auto &dispatchInfo : mdi1D) { EXPECT_EQ(1u, dispatchInfo.getDim()); } diBuilder2D->setDispatchGeometry(Vec3(1, 2, 0), Vec3(0, 0, 0), Vec3(0, 0, 0)); diBuilder2D->bake(mdi2D); for (auto &dispatchInfo : mdi2D) { EXPECT_EQ(2u, dispatchInfo.getDim()); } diBuilder3D->setDispatchGeometry(Vec3(1, 2, 3), Vec3(0, 0, 0), Vec3(0, 0, 0)); diBuilder3D->bake(mdi3D); for (auto &dispatchInfo : mdi3D) { EXPECT_EQ(3u, dispatchInfo.getDim()); } delete diBuilder3D; delete diBuilder2D; delete diBuilder1D; } TEST_F(DispatchInfoBuilderTest, setDispatchInfoGWS) { DispatchInfoBuilder *diBuilder = new DispatchInfoBuilder(); ASSERT_NE(nullptr, diBuilder); MultiDispatchInfo mdi0, mdi1, mdi2, mdi3; diBuilder->setDispatchGeometry(Vec3(0, 0, 0), Vec3(0, 0, 0), Vec3(0, 0, 0)); diBuilder->bake(mdi0); EXPECT_TRUE(mdi0.empty()); diBuilder->setDispatchGeometry(Vec3(1, 0, 0), Vec3(0, 0, 0), Vec3(0, 0, 0)); diBuilder->bake(mdi1); for (auto &dispatchInfo : mdi1) { EXPECT_EQ(1u, dispatchInfo.getGWS().x); EXPECT_EQ(1u, dispatchInfo.getGWS().y); EXPECT_EQ(1u, dispatchInfo.getGWS().z); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().z); } diBuilder->setDispatchGeometry(Vec3(1, 2, 0), Vec3(0, 0, 0), Vec3(0, 0, 0)); diBuilder->bake(mdi2); for (auto &dispatchInfo : mdi2) { EXPECT_EQ(1u, dispatchInfo.getGWS().x); EXPECT_EQ(2u, dispatchInfo.getGWS().y); EXPECT_EQ(1u, dispatchInfo.getGWS().z); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(2u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().z); } diBuilder->setDispatchGeometry(Vec3(1, 2, 3), Vec3(0, 0, 0), Vec3(0, 0, 0)); diBuilder->bake(mdi3); for (auto &dispatchInfo : mdi3) { EXPECT_EQ(1u, dispatchInfo.getGWS().x); EXPECT_EQ(2u, dispatchInfo.getGWS().y); EXPECT_EQ(3u, dispatchInfo.getGWS().z); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(2u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(3u, dispatchInfo.getActualWorkgroupSize().z); } delete diBuilder; } TEST_F(DispatchInfoBuilderTest, setDispatchInfoELWS) { DispatchInfoBuilder *diBuilder = new DispatchInfoBuilder(); ASSERT_NE(nullptr, diBuilder); MultiDispatchInfo mdi0, mdi1, mdi2, mdi3; diBuilder->setDispatchGeometry(Vec3(0, 0, 0), Vec3(1, 1, 1), Vec3(0, 0, 0)); diBuilder->bake(mdi0); EXPECT_TRUE(mdi0.empty()); diBuilder->setDispatchGeometry(Vec3(1, 0, 0), Vec3(1, 0, 0), Vec3(0, 0, 0)); diBuilder->bake(mdi1); for (auto &dispatchInfo : mdi1) { EXPECT_EQ(1u, dispatchInfo.getEnqueuedWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getEnqueuedWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getEnqueuedWorkgroupSize().z); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); } diBuilder->setDispatchGeometry(Vec3(1, 1, 0), Vec3(1, 2, 0), Vec3(0, 0, 0)); diBuilder->bake(mdi2); for (auto &dispatchInfo : mdi2) { EXPECT_EQ(1u, dispatchInfo.getEnqueuedWorkgroupSize().x); EXPECT_EQ(2u, dispatchInfo.getEnqueuedWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getEnqueuedWorkgroupSize().z); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(2u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); } diBuilder->setDispatchGeometry(Vec3(1, 1, 1), Vec3(1, 2, 3), Vec3(0, 0, 0)); diBuilder->bake(mdi3); for (auto &dispatchInfo : mdi3) { EXPECT_EQ(1u, dispatchInfo.getEnqueuedWorkgroupSize().x); EXPECT_EQ(2u, dispatchInfo.getEnqueuedWorkgroupSize().y); EXPECT_EQ(3u, dispatchInfo.getEnqueuedWorkgroupSize().z); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(2u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(3u, dispatchInfo.getLocalWorkgroupSize().z); } delete diBuilder; } TEST_F(DispatchInfoBuilderTest, setDispatchInfoLWS) { DispatchInfoBuilder *diBuilder = new DispatchInfoBuilder(); ASSERT_NE(nullptr, diBuilder); MultiDispatchInfo mdi0, mdi1, mdi2, mdi3; diBuilder->setKernel(pKernel); diBuilder->setDispatchGeometry(Vec3(0, 0, 0), Vec3(0, 0, 0), Vec3(0, 0, 0)); diBuilder->bake(mdi0); EXPECT_TRUE(mdi0.empty()); diBuilder->setKernel(pKernel); diBuilder->setDispatchGeometry(Vec3(16, 0, 0), Vec3(0, 0, 0), Vec3(0, 0, 0)); diBuilder->bake(mdi1); for (auto &dispatchInfo : mdi1) { EXPECT_EQ(16u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); } diBuilder->setKernel(pKernel); diBuilder->setDispatchGeometry(Vec3(16, 16, 0), Vec3(0, 0, 0), Vec3(0, 0, 0)); diBuilder->bake(mdi2); for (auto &dispatchInfo : mdi2) { EXPECT_EQ(16u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(16u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); } diBuilder->setKernel(pKernel); diBuilder->setDispatchGeometry(Vec3(16, 16, 16), Vec3(0, 0, 0), Vec3(0, 0, 0)); diBuilder->bake(mdi3); for (auto &dispatchInfo : mdi3) { EXPECT_EQ(16u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(16u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); } delete diBuilder; } TEST_F(DispatchInfoBuilderTest, setKernelNoSplit) { DispatchInfoBuilder *diBuilder = new DispatchInfoBuilder(); ASSERT_NE(nullptr, diBuilder); diBuilder->setKernel(pKernel); diBuilder->setDispatchGeometry(Vec3(256, 256, 256), Vec3(16, 16, 16), Vec3(0, 0, 0)); MultiDispatchInfo multiDispatchInfo; diBuilder->bake(multiDispatchInfo); for (auto &dispatchInfo : multiDispatchInfo) { ASSERT_EQ(pKernel, dispatchInfo.getKernel()); EXPECT_TRUE(dispatchInfo.getKernel()->isBuiltIn); } delete diBuilder; } TEST_F(DispatchInfoBuilderTest, setKernelSplit) { DispatchInfoBuilder *diBuilder1D = new DispatchInfoBuilder(); ASSERT_NE(nullptr, diBuilder1D); DispatchInfoBuilder *diBuilder2D = new DispatchInfoBuilder(); ASSERT_NE(nullptr, diBuilder2D); DispatchInfoBuilder *diBuilder3D = new DispatchInfoBuilder(); ASSERT_NE(nullptr, diBuilder3D); // 1D diBuilder1D->setKernel(RegionCoordX::Left, pKernel); diBuilder1D->setDispatchGeometry(RegionCoordX::Left, Vec3(256, 0, 0), Vec3(16, 0, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdi1D; diBuilder1D->bake(mdi1D); for (auto &dispatchInfo : mdi1D) { EXPECT_EQ(pKernel, dispatchInfo.getKernel()); EXPECT_TRUE(dispatchInfo.getKernel()->isBuiltIn); } //2D diBuilder2D->setKernel(RegionCoordX::Left, RegionCoordY::Bottom, pKernel); diBuilder2D->setDispatchGeometry(RegionCoordX::Left, RegionCoordY::Bottom, Vec3(256, 256, 0), Vec3(16, 16, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdi2D; diBuilder2D->bake(mdi2D); for (auto &dispatchInfo : mdi2D) { EXPECT_EQ(pKernel, dispatchInfo.getKernel()); EXPECT_TRUE(dispatchInfo.getKernel()->isBuiltIn); } //3D diBuilder3D->setKernel(RegionCoordX::Right, RegionCoordY::Bottom, RegionCoordZ::Back, pKernel); diBuilder3D->setDispatchGeometry(RegionCoordX::Right, RegionCoordY::Bottom, RegionCoordZ::Back, Vec3(256, 256, 256), Vec3(16, 16, 16), Vec3(0, 0, 0)); MultiDispatchInfo mdi3D; diBuilder3D->bake(mdi3D); for (auto &dispatchInfo : mdi3D) { EXPECT_EQ(pKernel, dispatchInfo.getKernel()); EXPECT_TRUE(dispatchInfo.getKernel()->isBuiltIn); } delete diBuilder3D; delete diBuilder2D; delete diBuilder1D; } TEST_F(DispatchInfoBuilderTest, setWalkerNoSplit) { DispatchInfoBuilder *diBuilder1D = new DispatchInfoBuilder(); ASSERT_NE(nullptr, diBuilder1D); DispatchInfoBuilder *diBuilder2D = new DispatchInfoBuilder(); ASSERT_NE(nullptr, diBuilder2D); DispatchInfoBuilder *diBuilder3D = new DispatchInfoBuilder(); ASSERT_NE(nullptr, diBuilder3D); // 1D diBuilder1D->setKernel(pKernel); diBuilder1D->setDispatchGeometry(Vec3(256, 0, 0), Vec3(16, 0, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdi1D; diBuilder1D->bake(mdi1D); EXPECT_EQ(1u, mdi1D.size()); const DispatchInfo *di1D = mdi1D.begin(); EXPECT_EQ(pKernel, di1D->getKernel()); EXPECT_EQ(256u, di1D->getGWS().x); EXPECT_EQ(1u, di1D->getGWS().y); EXPECT_EQ(1u, di1D->getGWS().z); EXPECT_EQ(16u, di1D->getEnqueuedWorkgroupSize().x); EXPECT_EQ(1u, di1D->getEnqueuedWorkgroupSize().y); EXPECT_EQ(1u, di1D->getEnqueuedWorkgroupSize().z); EXPECT_EQ(0u, di1D->getOffset().x); EXPECT_EQ(0u, di1D->getOffset().y); EXPECT_EQ(0u, di1D->getOffset().z); EXPECT_EQ(16u, di1D->getLocalWorkgroupSize().x); EXPECT_EQ(1u, di1D->getLocalWorkgroupSize().y); EXPECT_EQ(1u, di1D->getLocalWorkgroupSize().z); EXPECT_EQ(256u, di1D->getActualWorkgroupSize().x); EXPECT_EQ(1u, di1D->getActualWorkgroupSize().y); EXPECT_EQ(1u, di1D->getActualWorkgroupSize().z); EXPECT_EQ(16u, di1D->getTotalNumberOfWorkgroups().x); EXPECT_EQ(1u, di1D->getTotalNumberOfWorkgroups().y); EXPECT_EQ(1u, di1D->getTotalNumberOfWorkgroups().z); EXPECT_EQ(16u, di1D->getNumberOfWorkgroups().x); EXPECT_EQ(1u, di1D->getNumberOfWorkgroups().y); EXPECT_EQ(1u, di1D->getNumberOfWorkgroups().z); EXPECT_EQ(0u, di1D->getStartOfWorkgroups().x); EXPECT_EQ(0u, di1D->getStartOfWorkgroups().y); EXPECT_EQ(0u, di1D->getStartOfWorkgroups().z); // 2D diBuilder2D->setKernel(pKernel); diBuilder2D->setDispatchGeometry(Vec3(256, 256, 0), Vec3(16, 16, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdi2D; diBuilder2D->bake(mdi2D); EXPECT_EQ(1u, mdi2D.size()); const DispatchInfo *di2D = mdi2D.begin(); EXPECT_EQ(pKernel, di2D->getKernel()); EXPECT_EQ(256u, di2D->getGWS().x); EXPECT_EQ(256u, di2D->getGWS().y); EXPECT_EQ(1u, di2D->getGWS().z); EXPECT_EQ(16u, di2D->getEnqueuedWorkgroupSize().x); EXPECT_EQ(16u, di2D->getEnqueuedWorkgroupSize().y); EXPECT_EQ(1u, di2D->getEnqueuedWorkgroupSize().z); EXPECT_EQ(0u, di2D->getOffset().x); EXPECT_EQ(0u, di2D->getOffset().y); EXPECT_EQ(0u, di2D->getOffset().z); EXPECT_EQ(16u, di2D->getLocalWorkgroupSize().x); EXPECT_EQ(16u, di2D->getLocalWorkgroupSize().y); EXPECT_EQ(1u, di2D->getLocalWorkgroupSize().z); EXPECT_EQ(16u, di2D->getTotalNumberOfWorkgroups().x); EXPECT_EQ(16u, di2D->getTotalNumberOfWorkgroups().y); EXPECT_EQ(1u, di2D->getTotalNumberOfWorkgroups().z); EXPECT_EQ(16u, di2D->getNumberOfWorkgroups().x); EXPECT_EQ(16u, di2D->getNumberOfWorkgroups().y); EXPECT_EQ(1u, di2D->getNumberOfWorkgroups().z); EXPECT_EQ(0u, di2D->getStartOfWorkgroups().x); EXPECT_EQ(0u, di2D->getStartOfWorkgroups().y); EXPECT_EQ(0u, di2D->getStartOfWorkgroups().z); // 3D diBuilder3D->setKernel(pKernel); diBuilder3D->setDispatchGeometry(Vec3(256, 256, 256), Vec3(16, 16, 16), Vec3(0, 0, 0)); MultiDispatchInfo mdi3D; diBuilder3D->bake(mdi3D); EXPECT_EQ(1u, mdi3D.size()); const DispatchInfo *di3D = mdi3D.begin(); EXPECT_EQ(pKernel, di3D->getKernel()); EXPECT_EQ(256u, di3D->getGWS().x); EXPECT_EQ(256u, di3D->getGWS().y); EXPECT_EQ(256u, di3D->getGWS().z); EXPECT_EQ(16u, di3D->getEnqueuedWorkgroupSize().x); EXPECT_EQ(16u, di3D->getEnqueuedWorkgroupSize().y); EXPECT_EQ(16u, di3D->getEnqueuedWorkgroupSize().z); EXPECT_EQ(0u, di3D->getOffset().x); EXPECT_EQ(0u, di3D->getOffset().y); EXPECT_EQ(0u, di3D->getOffset().z); EXPECT_EQ(16u, di3D->getLocalWorkgroupSize().x); EXPECT_EQ(16u, di3D->getLocalWorkgroupSize().y); EXPECT_EQ(16u, di3D->getLocalWorkgroupSize().z); EXPECT_EQ(16u, di3D->getTotalNumberOfWorkgroups().x); EXPECT_EQ(16u, di3D->getTotalNumberOfWorkgroups().y); EXPECT_EQ(16u, di3D->getTotalNumberOfWorkgroups().z); EXPECT_EQ(16u, di3D->getNumberOfWorkgroups().x); EXPECT_EQ(16u, di3D->getNumberOfWorkgroups().y); EXPECT_EQ(16u, di3D->getNumberOfWorkgroups().z); EXPECT_EQ(0u, di3D->getStartOfWorkgroups().x); EXPECT_EQ(0u, di3D->getStartOfWorkgroups().y); EXPECT_EQ(0u, di3D->getStartOfWorkgroups().z); delete diBuilder3D; delete diBuilder2D; delete diBuilder1D; } TEST_F(DispatchInfoBuilderTest, setWalkerSplit) { DispatchInfoBuilder *diBuilder1D = new DispatchInfoBuilder(); ASSERT_NE(nullptr, diBuilder1D); DispatchInfoBuilder *diBuilder2D = new DispatchInfoBuilder(); ASSERT_NE(nullptr, diBuilder2D); DispatchInfoBuilder *diBuilder3D = new DispatchInfoBuilder(); ASSERT_NE(nullptr, diBuilder3D); // 1D diBuilder1D->setKernel(pKernel); diBuilder1D->setDispatchGeometry(Vec3(256, 0, 0), Vec3(15, 0, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdi1D; diBuilder1D->bake(mdi1D); EXPECT_EQ(2u, mdi1D.size()); auto dispatchId = 0; for (auto &dispatchInfo : mdi1D) { EXPECT_EQ(pKernel, dispatchInfo.getKernel()); EXPECT_EQ(256u, dispatchInfo.getGWS().x); EXPECT_EQ(1u, dispatchInfo.getGWS().y); EXPECT_EQ(1u, dispatchInfo.getGWS().z); EXPECT_EQ(15u, dispatchInfo.getEnqueuedWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getEnqueuedWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getEnqueuedWorkgroupSize().z); EXPECT_EQ(0u, dispatchInfo.getOffset().x); EXPECT_EQ(0u, dispatchInfo.getOffset().y); EXPECT_EQ(0u, dispatchInfo.getOffset().z); EXPECT_EQ(18u, dispatchInfo.getTotalNumberOfWorkgroups().x); EXPECT_EQ(1u, dispatchInfo.getTotalNumberOfWorkgroups().y); EXPECT_EQ(1u, dispatchInfo.getTotalNumberOfWorkgroups().z); switch (dispatchId) { case 0: EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(1u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(1u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().z); break; case 1: EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(1u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(1u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().z); break; } dispatchId++; } //2D diBuilder2D->setKernel(pKernel); diBuilder2D->setDispatchGeometry(Vec3(256, 256, 0), Vec3(15, 15, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdi2D; diBuilder2D->bake(mdi2D); EXPECT_EQ(4u, mdi2D.size()); dispatchId = 0; for (auto &dispatchInfo : mdi2D) { EXPECT_EQ(pKernel, dispatchInfo.getKernel()); EXPECT_EQ(256u, dispatchInfo.getGWS().x); EXPECT_EQ(256u, dispatchInfo.getGWS().y); EXPECT_EQ(1u, dispatchInfo.getGWS().z); EXPECT_EQ(15u, dispatchInfo.getEnqueuedWorkgroupSize().x); EXPECT_EQ(15u, dispatchInfo.getEnqueuedWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getEnqueuedWorkgroupSize().z); EXPECT_EQ(0u, dispatchInfo.getOffset().x); EXPECT_EQ(0u, dispatchInfo.getOffset().y); EXPECT_EQ(0u, dispatchInfo.getOffset().z); EXPECT_EQ(18u, dispatchInfo.getTotalNumberOfWorkgroups().x); EXPECT_EQ(18u, dispatchInfo.getTotalNumberOfWorkgroups().y); EXPECT_EQ(1u, dispatchInfo.getTotalNumberOfWorkgroups().z); switch (dispatchId) { case 0: EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(1u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().z); break; case 1: EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(1u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().z); break; case 2: EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(1u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().z); break; case 3: EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(1u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().z); break; } dispatchId++; } //3D diBuilder3D->setKernel(pKernel); diBuilder3D->setDispatchGeometry(Vec3(256, 256, 256), Vec3(15, 15, 15), Vec3(0, 0, 0)); MultiDispatchInfo mdi3D; diBuilder3D->bake(mdi3D); EXPECT_EQ(8u, mdi3D.size()); dispatchId = 0; for (auto &dispatchInfo : mdi3D) { EXPECT_EQ(pKernel, dispatchInfo.getKernel()); EXPECT_EQ(256u, dispatchInfo.getGWS().x); EXPECT_EQ(256u, dispatchInfo.getGWS().y); EXPECT_EQ(256u, dispatchInfo.getGWS().z); EXPECT_EQ(15u, dispatchInfo.getEnqueuedWorkgroupSize().x); EXPECT_EQ(15u, dispatchInfo.getEnqueuedWorkgroupSize().y); EXPECT_EQ(15u, dispatchInfo.getEnqueuedWorkgroupSize().z); EXPECT_EQ(0u, dispatchInfo.getOffset().x); EXPECT_EQ(0u, dispatchInfo.getOffset().y); EXPECT_EQ(0u, dispatchInfo.getOffset().z); EXPECT_EQ(18u, dispatchInfo.getTotalNumberOfWorkgroups().x); EXPECT_EQ(18u, dispatchInfo.getTotalNumberOfWorkgroups().y); EXPECT_EQ(18u, dispatchInfo.getTotalNumberOfWorkgroups().z); switch (dispatchId) { case 0: EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().z); break; case 1: EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().z); break; case 2: EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().z); break; case 3: EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().z); break; case 4: EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().z); break; case 5: EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().z); break; case 6: EXPECT_EQ(255u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(15u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(17u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(0u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().z); break; case 7: EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getActualWorkgroupSize().z); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().x); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().y); EXPECT_EQ(1u, dispatchInfo.getLocalWorkgroupSize().z); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().x); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().y); EXPECT_EQ(18u, dispatchInfo.getNumberOfWorkgroups().z); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().x); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().y); EXPECT_EQ(17u, dispatchInfo.getStartOfWorkgroups().z); break; } dispatchId++; } delete diBuilder3D; delete diBuilder2D; delete diBuilder1D; } TEST_F(DispatchInfoBuilderTest, mdiSizesForWalkerSplit1D) { DispatchInfoBuilder *diBuilder = new DispatchInfoBuilder(); ASSERT_NE(nullptr, diBuilder); diBuilder->setDispatchGeometry(Vec3(0, 0, 0), Vec3(2, 0, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize0; diBuilder->bake(mdiSize0); EXPECT_EQ(0u, mdiSize0.size()); diBuilder->setDispatchGeometry(Vec3(2, 0, 0), Vec3(2, 0, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize1; diBuilder->bake(mdiSize1); EXPECT_EQ(1u, mdiSize1.size()); diBuilder->setDispatchGeometry(Vec3(3, 0, 0), Vec3(2, 0, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize2; diBuilder->bake(mdiSize2); EXPECT_EQ(2u, mdiSize2.size()); delete diBuilder; } TEST_F(DispatchInfoBuilderTest, mdiSizesForWalkerSplit2D) { DispatchInfoBuilder *diBuilder = new DispatchInfoBuilder(); ASSERT_NE(nullptr, diBuilder); diBuilder->setDispatchGeometry(Vec3(0, 0, 0), Vec3(2, 2, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize00; diBuilder->bake(mdiSize00); EXPECT_EQ(0u, mdiSize00.size()); diBuilder->setDispatchGeometry(Vec3(2, 2, 0), Vec3(2, 2, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize11; diBuilder->bake(mdiSize11); EXPECT_EQ(1u, mdiSize11.size()); diBuilder->setDispatchGeometry(Vec3(3, 2, 0), Vec3(2, 2, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize21; diBuilder->bake(mdiSize21); EXPECT_EQ(2u, mdiSize21.size()); diBuilder->setDispatchGeometry(Vec3(2, 3, 0), Vec3(2, 2, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize12; diBuilder->bake(mdiSize12); EXPECT_EQ(2u, mdiSize12.size()); diBuilder->setDispatchGeometry(Vec3(3, 3, 0), Vec3(2, 2, 0), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize22; diBuilder->bake(mdiSize22); EXPECT_EQ(4u, mdiSize22.size()); delete diBuilder; } TEST_F(DispatchInfoBuilderTest, mdiSizesForWalkerSplit3D) { DispatchInfoBuilder *diBuilder = new DispatchInfoBuilder(); ASSERT_NE(nullptr, diBuilder); diBuilder->setDispatchGeometry(Vec3(0, 0, 0), Vec3(2, 2, 2), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize000; diBuilder->bake(mdiSize000); EXPECT_EQ(0u, mdiSize000.size()); diBuilder->setDispatchGeometry(Vec3(2, 2, 2), Vec3(2, 2, 2), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize111; diBuilder->bake(mdiSize111); EXPECT_EQ(1u, mdiSize111.size()); diBuilder->setDispatchGeometry(Vec3(3, 2, 2), Vec3(2, 2, 2), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize211; diBuilder->bake(mdiSize211); EXPECT_EQ(2u, mdiSize211.size()); diBuilder->setDispatchGeometry(Vec3(2, 3, 2), Vec3(2, 2, 2), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize121; diBuilder->bake(mdiSize121); EXPECT_EQ(2u, mdiSize121.size()); diBuilder->setDispatchGeometry(Vec3(2, 2, 3), Vec3(2, 2, 2), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize112; diBuilder->bake(mdiSize112); EXPECT_EQ(2u, mdiSize112.size()); diBuilder->setDispatchGeometry(Vec3(3, 3, 2), Vec3(2, 2, 2), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize221; diBuilder->bake(mdiSize221); EXPECT_EQ(4u, mdiSize221.size()); diBuilder->setDispatchGeometry(Vec3(3, 2, 3), Vec3(2, 2, 2), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize212; diBuilder->bake(mdiSize212); EXPECT_EQ(4u, mdiSize212.size()); diBuilder->setDispatchGeometry(Vec3(2, 3, 3), Vec3(2, 2, 2), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize122; diBuilder->bake(mdiSize122); EXPECT_EQ(4u, mdiSize122.size()); diBuilder->setDispatchGeometry(Vec3(3, 3, 3), Vec3(2, 2, 2), Vec3(0, 0, 0)); MultiDispatchInfo mdiSize222; diBuilder->bake(mdiSize222); EXPECT_EQ(8u, mdiSize222.size()); delete diBuilder; } TEST_F(DispatchInfoBuilderTest, setKernelArg) { Buffer *buffer = new MockBuffer(); auto val = (cl_mem)buffer; auto pVal = &val; DispatchInfoBuilder *diBuilder = new DispatchInfoBuilder(); ASSERT_NE(nullptr, diBuilder); diBuilder->setKernel(pKernel); diBuilder->setDispatchGeometry(Vec3(256, 256, 256), Vec3(16, 16, 16), Vec3(0, 0, 0)); MultiDispatchInfo multiDispatchInfo; diBuilder->bake(multiDispatchInfo); clearCrossThreadData(); EXPECT_EQ(CL_SUCCESS, diBuilder->setArg(0, sizeof(cl_mem *), pVal)); char data[128]; void *svmPtr = &data; EXPECT_EQ(CL_SUCCESS, diBuilder->setArgSvm(1, sizeof(svmPtr), svmPtr, nullptr, 0u)); MockGraphicsAllocation svmAlloc(svmPtr, 128); EXPECT_EQ(CL_SUCCESS, diBuilder->setArgSvmAlloc(2, svmPtr, &svmAlloc)); for (auto &dispatchInfo : multiDispatchInfo) { auto crossthreadOffset0 = pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset; EXPECT_EQ(buffer->getCpuAddress(), *reinterpret_cast((dispatchInfo.getKernel()->getCrossThreadData() + crossthreadOffset0))); auto crossthreadOffset1 = pKernelInfo->kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset; EXPECT_EQ(svmPtr, *(reinterpret_cast(dispatchInfo.getKernel()->getCrossThreadData() + crossthreadOffset1))); auto crossthreadOffset2 = pKernelInfo->kernelArgInfo[2].kernelArgPatchInfoVector[0].crossthreadOffset; EXPECT_EQ(svmPtr, *(reinterpret_cast(dispatchInfo.getKernel()->getCrossThreadData() + crossthreadOffset2))); } delete buffer; delete diBuilder; } TEST_F(DispatchInfoBuilderTest, SetArgSplit) { DispatchInfoBuilder builder1D; DispatchInfoBuilder builder2D; DispatchInfoBuilder builder3D; Buffer *buffer = new MockBuffer(); auto val = (cl_mem)buffer; auto pVal = &val; char data[128]; void *svmPtr = &data; builder1D.setKernel(pKernel); builder2D.setKernel(pKernel); builder3D.setKernel(pKernel); Vec3 GWS(256, 256, 256); Vec3 ELWS(16, 16, 16); Vec3 offset(0, 0, 0); builder1D.setDispatchGeometry(SplitDispatch::RegionCoordX::Left, GWS, ELWS, offset); builder2D.setDispatchGeometry(SplitDispatch::RegionCoordX::Left, SplitDispatch::RegionCoordY::Top, GWS, ELWS, offset); builder3D.setDispatchGeometry(SplitDispatch::RegionCoordX::Left, SplitDispatch::RegionCoordY::Top, SplitDispatch::RegionCoordZ::Front, GWS, ELWS, offset); MultiDispatchInfo mdi1D; MultiDispatchInfo mdi2D; MultiDispatchInfo mdi3D; builder1D.bake(mdi1D); builder1D.bake(mdi2D); builder1D.bake(mdi3D); //Set arg clearCrossThreadData(); builder1D.setArg(SplitDispatch::RegionCoordX::Left, static_cast(0), sizeof(cl_mem *), pVal); for (auto &dispatchInfo : mdi1D) { EXPECT_EQ(buffer->getCpuAddress(), *reinterpret_cast((dispatchInfo.getKernel()->getCrossThreadData() + 0x10))); } clearCrossThreadData(); builder2D.setArg(SplitDispatch::RegionCoordX::Left, SplitDispatch::RegionCoordY::Top, static_cast(0), sizeof(cl_mem *), pVal); for (auto &dispatchInfo : mdi2D) { EXPECT_EQ(buffer->getCpuAddress(), *reinterpret_cast((dispatchInfo.getKernel()->getCrossThreadData() + 0x10))); } clearCrossThreadData(); builder3D.setArg(SplitDispatch::RegionCoordX::Left, SplitDispatch::RegionCoordY::Top, SplitDispatch::RegionCoordZ::Front, static_cast(0), sizeof(cl_mem *), pVal); for (auto &dispatchInfo : mdi3D) { EXPECT_EQ(buffer->getCpuAddress(), *reinterpret_cast((dispatchInfo.getKernel()->getCrossThreadData() + 0x10))); } //Set arg SVM clearCrossThreadData(); builder1D.setArgSvm(SplitDispatch::RegionCoordX::Left, 1, sizeof(svmPtr), svmPtr, nullptr, 0u); for (auto &dispatchInfo : mdi1D) { EXPECT_EQ(svmPtr, *(reinterpret_cast(dispatchInfo.getKernel()->getCrossThreadData() + 0x30))); } clearCrossThreadData(); builder2D.setArgSvm(SplitDispatch::RegionCoordX::Left, SplitDispatch::RegionCoordY::Top, 1, sizeof(svmPtr), svmPtr, nullptr, 0u); for (auto &dispatchInfo : mdi2D) { EXPECT_EQ(svmPtr, *(reinterpret_cast(dispatchInfo.getKernel()->getCrossThreadData() + 0x30))); } clearCrossThreadData(); builder3D.setArgSvm(SplitDispatch::RegionCoordX::Left, SplitDispatch::RegionCoordY::Top, SplitDispatch::RegionCoordZ::Front, 1, sizeof(svmPtr), svmPtr, nullptr, 0u); for (auto &dispatchInfo : mdi3D) { EXPECT_EQ(svmPtr, *(reinterpret_cast(dispatchInfo.getKernel()->getCrossThreadData() + 0x30))); } delete buffer; } TEST_F(DispatchInfoBuilderTest, setKernelArgNegative) { char *buffer = new char[sizeof(Buffer)]; auto val = (cl_mem)buffer; auto pVal = &val; DispatchInfoBuilder *diBuilder = new DispatchInfoBuilder(); ASSERT_NE(nullptr, diBuilder); diBuilder->setKernel(pKernel); diBuilder->setDispatchGeometry(Vec3(256, 256, 256), Vec3(16, 16, 16), Vec3(0, 0, 0)); MultiDispatchInfo multiDispatchInfo; diBuilder->bake(multiDispatchInfo); EXPECT_NE(CL_SUCCESS, diBuilder->setArg(0, sizeof(cl_mem *), pVal)); EXPECT_EQ(CL_SUCCESS, diBuilder->setArgSvm(1, sizeof(void *), nullptr, nullptr, 0u)); delete diBuilder; delete[] buffer; } TEST_F(DispatchInfoBuilderTest, setKernelArgNullKernel) { Buffer *buffer = new MockBuffer(); auto val = (cl_mem)buffer; auto pVal = &val; char data[128]; void *svmPtr = &data; MockGraphicsAllocation svmAlloc(svmPtr, 128); DispatchInfoBuilder *diBuilder = new DispatchInfoBuilder(); ASSERT_NE(nullptr, diBuilder); diBuilder->setDispatchGeometry(Vec3(256, 256, 256), Vec3(16, 16, 16), Vec3(0, 0, 0)); MultiDispatchInfo multiDispatchInfo; diBuilder->bake(multiDispatchInfo); EXPECT_EQ(CL_SUCCESS, diBuilder->setArg(0, sizeof(cl_mem *), pVal)); EXPECT_EQ(CL_SUCCESS, diBuilder->setArgSvm(1, sizeof(svmPtr), svmPtr, nullptr, 0u)); EXPECT_EQ(CL_SUCCESS, diBuilder->setArgSvmAlloc(2, svmPtr, &svmAlloc)); delete diBuilder; delete buffer; } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/helpers/dispatch_info_tests.cpp000066400000000000000000000330741363734646600300430ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/dispatch_info.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" #include "gtest/gtest.h" #include using namespace NEO; class DispatchInfoFixture : public ContextFixture, public DeviceFixture { using ContextFixture::SetUp; public: DispatchInfoFixture() {} protected: void SetUp() { DeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); pKernelInfo = std::make_unique(); pMediaVFEstate = new SPatchMediaVFEState(); pMediaVFEstate->PerThreadScratchSpace = 1024; pMediaVFEstate->ScratchSpaceOffset = 0; pKernelInfo->patchInfo.mediavfestate = pMediaVFEstate; pPrintfSurface = new SPatchAllocateStatelessPrintfSurface(); pKernelInfo->patchInfo.pAllocateStatelessPrintfSurface = pPrintfSurface; pProgram = new MockProgram(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); pKernel = new MockKernel(pProgram, *pKernelInfo, *pClDevice); pKernel->slmTotalSize = 128; } void TearDown() override { delete pKernel; delete pPrintfSurface; delete pMediaVFEstate; delete pProgram; ContextFixture::TearDown(); DeviceFixture::TearDown(); } std::unique_ptr pKernelInfo; SPatchMediaVFEState *pMediaVFEstate = nullptr; SPatchAllocateStatelessPrintfSurface *pPrintfSurface = nullptr; MockProgram *pProgram = nullptr; Kernel *pKernel = nullptr; }; typedef Test DispatchInfoTest; TEST_F(DispatchInfoTest, DispatchInfoWithNoGeometry) { std::unique_ptr dispatchInfo(new DispatchInfo); EXPECT_EQ(nullptr, dispatchInfo->getKernel()); EXPECT_EQ(0u, dispatchInfo->getRequiredScratchSize()); EXPECT_FALSE(dispatchInfo->usesSlm()); EXPECT_FALSE(dispatchInfo->usesStatelessPrintfSurface()); EXPECT_EQ(0u, dispatchInfo->getDim()); Vec3 vecZero({0, 0, 0}); EXPECT_EQ(vecZero, dispatchInfo->getGWS()); EXPECT_EQ(vecZero, dispatchInfo->getEnqueuedWorkgroupSize()); EXPECT_EQ(vecZero, dispatchInfo->getOffset()); EXPECT_EQ(vecZero, dispatchInfo->getActualWorkgroupSize()); EXPECT_EQ(vecZero, dispatchInfo->getLocalWorkgroupSize()); EXPECT_EQ(vecZero, dispatchInfo->getTotalNumberOfWorkgroups()); EXPECT_EQ(vecZero, dispatchInfo->getNumberOfWorkgroups()); EXPECT_EQ(vecZero, dispatchInfo->getStartOfWorkgroups()); } TEST_F(DispatchInfoTest, DispatchInfoWithUserGeometry) { Vec3 gws({256, 256, 256}); Vec3 elws({16, 16, 16}); Vec3 offset({1, 2, 3}); std::unique_ptr dispatchInfo(new DispatchInfo(pKernel, 3, gws, elws, offset)); EXPECT_NE(nullptr, dispatchInfo->getKernel()); EXPECT_EQ(1024u, dispatchInfo->getRequiredScratchSize()); EXPECT_TRUE(dispatchInfo->usesSlm()); EXPECT_TRUE(dispatchInfo->usesStatelessPrintfSurface()); EXPECT_EQ(3u, dispatchInfo->getDim()); EXPECT_EQ(gws, dispatchInfo->getGWS()); EXPECT_EQ(elws, dispatchInfo->getEnqueuedWorkgroupSize()); EXPECT_EQ(offset, dispatchInfo->getOffset()); Vec3 vecZero({0, 0, 0}); EXPECT_EQ(vecZero, dispatchInfo->getActualWorkgroupSize()); EXPECT_EQ(vecZero, dispatchInfo->getLocalWorkgroupSize()); EXPECT_EQ(vecZero, dispatchInfo->getTotalNumberOfWorkgroups()); EXPECT_EQ(vecZero, dispatchInfo->getNumberOfWorkgroups()); EXPECT_EQ(vecZero, dispatchInfo->getStartOfWorkgroups()); dispatchInfo->setKernel(nullptr); EXPECT_EQ(nullptr, dispatchInfo->getKernel()); } TEST_F(DispatchInfoTest, DispatchInfoWithFullGeometry) { Vec3 gws({256, 256, 256}); Vec3 elws({32, 32, 32}); Vec3 offset({1, 2, 3}); Vec3 agws({256, 256, 256}); Vec3 lws({32, 32, 32}); Vec3 twgs({8, 8, 8}); Vec3 nwgs({8, 8, 8}); Vec3 swgs({0, 0, 0}); std::unique_ptr dispatchInfo(new DispatchInfo(pKernel, 3, gws, elws, offset, agws, lws, twgs, nwgs, swgs)); EXPECT_NE(nullptr, dispatchInfo->getKernel()); EXPECT_EQ(1024u, dispatchInfo->getRequiredScratchSize()); EXPECT_TRUE(dispatchInfo->usesSlm()); EXPECT_TRUE(dispatchInfo->usesStatelessPrintfSurface()); EXPECT_EQ(3u, dispatchInfo->getDim()); EXPECT_EQ(gws, dispatchInfo->getGWS()); EXPECT_EQ(elws, dispatchInfo->getEnqueuedWorkgroupSize()); EXPECT_EQ(offset, dispatchInfo->getOffset()); EXPECT_EQ(agws, dispatchInfo->getActualWorkgroupSize()); EXPECT_EQ(lws, dispatchInfo->getEnqueuedWorkgroupSize()); EXPECT_EQ(twgs, dispatchInfo->getTotalNumberOfWorkgroups()); EXPECT_EQ(nwgs, dispatchInfo->getNumberOfWorkgroups()); EXPECT_EQ(swgs, dispatchInfo->getStartOfWorkgroups()); dispatchInfo->setKernel(nullptr); EXPECT_EQ(nullptr, dispatchInfo->getKernel()); } TEST_F(DispatchInfoTest, MultiDispatchInfoNonCopyable) { EXPECT_FALSE(std::is_move_constructible::value); EXPECT_FALSE(std::is_copy_constructible::value); } TEST_F(DispatchInfoTest, MultiDispatchInfoNonAssignable) { EXPECT_FALSE(std::is_move_assignable::value); EXPECT_FALSE(std::is_copy_assignable::value); } TEST_F(DispatchInfoTest, MultiDispatchInfoEmpty) { MultiDispatchInfo multiDispatchInfo; EXPECT_TRUE(multiDispatchInfo.empty()); EXPECT_EQ(0u, multiDispatchInfo.getRequiredScratchSize()); EXPECT_FALSE(multiDispatchInfo.usesSlm()); EXPECT_FALSE(multiDispatchInfo.usesStatelessPrintfSurface()); EXPECT_EQ(0u, multiDispatchInfo.getRedescribedSurfaces().size()); } TEST_F(DispatchInfoTest, MultiDispatchInfoWithRedescribedSurfaces) { MultiDispatchInfo multiDispatchInfo; auto image = std::unique_ptr(Image2dHelper<>::create(pContext)); ASSERT_NE(nullptr, image); auto imageRedescribed = image->redescribe(); multiDispatchInfo.pushRedescribedMemObj(std::unique_ptr(imageRedescribed)); EXPECT_EQ(1u, multiDispatchInfo.getRedescribedSurfaces().size()); } TEST_F(DispatchInfoTest, MultiDispatchInfoWithNoGeometry) { DispatchInfo dispatchInfo; MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); EXPECT_FALSE(multiDispatchInfo.empty()); EXPECT_EQ(0u, multiDispatchInfo.getRequiredScratchSize()); EXPECT_FALSE(multiDispatchInfo.usesSlm()); EXPECT_FALSE(multiDispatchInfo.usesStatelessPrintfSurface()); } TEST_F(DispatchInfoTest, MultiDispatchInfoWithUserGeometry) { Vec3 gws({256, 256, 256}); Vec3 elws({16, 16, 16}); Vec3 offset({1, 2, 3}); DispatchInfo dispatchInfo(pKernel, 3, gws, elws, offset); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); EXPECT_FALSE(multiDispatchInfo.empty()); EXPECT_EQ(1024u, multiDispatchInfo.getRequiredScratchSize()); EXPECT_TRUE(multiDispatchInfo.usesSlm()); EXPECT_TRUE(multiDispatchInfo.usesStatelessPrintfSurface()); EXPECT_NE(nullptr, multiDispatchInfo.begin()->getKernel()); EXPECT_EQ(gws, multiDispatchInfo.begin()->getGWS()); EXPECT_EQ(elws, multiDispatchInfo.begin()->getEnqueuedWorkgroupSize()); EXPECT_EQ(offset, multiDispatchInfo.begin()->getOffset()); Vec3 vecZero({0, 0, 0}); EXPECT_EQ(vecZero, multiDispatchInfo.begin()->getLocalWorkgroupSize()); EXPECT_EQ(vecZero, multiDispatchInfo.begin()->getTotalNumberOfWorkgroups()); EXPECT_EQ(vecZero, multiDispatchInfo.begin()->getNumberOfWorkgroups()); EXPECT_EQ(vecZero, multiDispatchInfo.begin()->getStartOfWorkgroups()); } TEST_F(DispatchInfoTest, MultiDispatchInfoWithFullGeometry) { Vec3 gws({256, 256, 256}); Vec3 elws({32, 32, 32}); Vec3 offset({1, 2, 3}); Vec3 agws({256, 256, 256}); Vec3 lws({32, 32, 32}); Vec3 twgs({8, 8, 8}); Vec3 nwgs({8, 8, 8}); Vec3 swgs({0, 0, 0}); DispatchInfo dispatchInfo(pKernel, 3, gws, elws, offset, agws, lws, twgs, nwgs, swgs); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); EXPECT_FALSE(multiDispatchInfo.empty()); EXPECT_EQ(1024u, multiDispatchInfo.getRequiredScratchSize()); EXPECT_TRUE(multiDispatchInfo.usesSlm()); EXPECT_TRUE(multiDispatchInfo.usesStatelessPrintfSurface()); EXPECT_NE(nullptr, multiDispatchInfo.begin()->getKernel()); EXPECT_EQ(gws, multiDispatchInfo.begin()->getGWS()); EXPECT_EQ(elws, multiDispatchInfo.begin()->getEnqueuedWorkgroupSize()); EXPECT_EQ(offset, multiDispatchInfo.begin()->getOffset()); EXPECT_EQ(agws, multiDispatchInfo.begin()->getActualWorkgroupSize()); EXPECT_EQ(lws, multiDispatchInfo.begin()->getLocalWorkgroupSize()); EXPECT_EQ(twgs, multiDispatchInfo.begin()->getTotalNumberOfWorkgroups()); EXPECT_EQ(nwgs, multiDispatchInfo.begin()->getNumberOfWorkgroups()); EXPECT_EQ(swgs, multiDispatchInfo.begin()->getStartOfWorkgroups()); } TEST_F(DispatchInfoTest, WorkGroupSetGet) { DispatchInfo dispatchInfo; Vec3 gws({256, 256, 256}); Vec3 elws({16, 16, 16}); Vec3 offset({1, 2, 3}); Vec3 agws({256, 256, 256}); Vec3 lws({4, 4, 4}); Vec3 twgs({64, 64, 64}); Vec3 nwgs({64, 64, 64}); Vec3 swgs({8, 8, 8}); dispatchInfo.setGWS(gws); dispatchInfo.setEnqueuedWorkgroupSize(elws); dispatchInfo.setOffsets(offset); dispatchInfo.setActualGlobalWorkgroupSize(agws); dispatchInfo.setLWS(lws); dispatchInfo.setTotalNumberOfWorkgroups(twgs); dispatchInfo.setNumberOfWorkgroups(nwgs); dispatchInfo.setStartOfWorkgroups(swgs); EXPECT_EQ(gws, dispatchInfo.getGWS()); EXPECT_EQ(elws, dispatchInfo.getEnqueuedWorkgroupSize()); EXPECT_EQ(offset, dispatchInfo.getOffset()); EXPECT_EQ(agws, dispatchInfo.getActualWorkgroupSize()); EXPECT_EQ(lws, dispatchInfo.getLocalWorkgroupSize()); EXPECT_EQ(twgs, dispatchInfo.getTotalNumberOfWorkgroups()); EXPECT_EQ(nwgs, dispatchInfo.getNumberOfWorkgroups()); EXPECT_EQ(swgs, dispatchInfo.getStartOfWorkgroups()); } TEST_F(DispatchInfoTest, givenKernelWhenMultiDispatchInfoIsCreatedThenQueryParentAndMainKernel) { std::unique_ptr parentKernel(MockParentKernel::create(*pContext)); std::unique_ptr baseKernel(MockKernel::create(*pDevice, pProgram)); std::unique_ptr builtInKernel(MockKernel::create(*pDevice, pProgram)); builtInKernel->isBuiltIn = true; DispatchInfo parentKernelDispatchInfo(parentKernel.get(), 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}); DispatchInfo baseDispatchInfo(baseKernel.get(), 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}); DispatchInfo builtInDispatchInfo(builtInKernel.get(), 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}); { MultiDispatchInfo multiDispatchInfo(parentKernel.get()); multiDispatchInfo.push(parentKernelDispatchInfo); EXPECT_EQ(parentKernel.get(), multiDispatchInfo.peekParentKernel()); EXPECT_EQ(parentKernel.get(), multiDispatchInfo.peekMainKernel()); } { MultiDispatchInfo multiDispatchInfo(baseKernel.get()); multiDispatchInfo.push(builtInDispatchInfo); EXPECT_EQ(nullptr, multiDispatchInfo.peekParentKernel()); EXPECT_EQ(baseKernel.get(), multiDispatchInfo.peekMainKernel()); // dont pick builtin kernel multiDispatchInfo.push(baseDispatchInfo); EXPECT_EQ(nullptr, multiDispatchInfo.peekParentKernel()); EXPECT_EQ(baseKernel.get(), multiDispatchInfo.peekMainKernel()); } { MultiDispatchInfo multiDispatchInfo; EXPECT_EQ(nullptr, multiDispatchInfo.peekParentKernel()); EXPECT_EQ(nullptr, multiDispatchInfo.peekMainKernel()); multiDispatchInfo.push(builtInDispatchInfo); EXPECT_EQ(nullptr, multiDispatchInfo.peekParentKernel()); EXPECT_EQ(builtInKernel.get(), multiDispatchInfo.peekMainKernel()); } { MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(parentKernelDispatchInfo); multiDispatchInfo.push(baseDispatchInfo); multiDispatchInfo.push(builtInDispatchInfo); std::reverse_iterator rend = multiDispatchInfo.rend(); std::reverse_iterator crend = multiDispatchInfo.crend(); std::reverse_iterator rbegin = multiDispatchInfo.rbegin(); std::reverse_iterator crbegin = multiDispatchInfo.crbegin(); EXPECT_EQ(rbegin.base(), multiDispatchInfo.end()); EXPECT_EQ(crbegin.base(), multiDispatchInfo.end()); EXPECT_EQ(rend.base(), multiDispatchInfo.begin()); EXPECT_EQ(crend.base(), multiDispatchInfo.begin()); } } TEST(DispatchInfoBasicTests, givenDispatchInfoWhenCreatedThenDefaultValueOfPartitionIsFalse) { DispatchInfo dispatchInfo; EXPECT_FALSE(dispatchInfo.peekCanBePartitioned()); } TEST(DispatchInfoBasicTests, givenDispatchInfoWhenSetCanBePartitionIsCalledThenStateIsChangedAccordingly) { DispatchInfo dispatchInfo; dispatchInfo.setCanBePartitioned(true); EXPECT_TRUE(dispatchInfo.peekCanBePartitioned()); } TEST(DispatchInfoBasicTests, givenDispatchInfoWithoutKernelWhenGettingSizeForPrivateScratchThenZeroIsReturned) { DispatchInfo dispatchInfo; EXPECT_EQ(nullptr, dispatchInfo.getKernel()); EXPECT_EQ(0u, dispatchInfo.getRequiredPrivateScratchSize()); } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/execution_environment_helper.cpp000066400000000000000000000022141363734646600317650ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/helpers/execution_environment_helper.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/device_factory.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_platform.h" namespace NEO { ExecutionEnvironment *getExecutionEnvironmentImpl(HardwareInfo *&hwInfo, uint32_t rootDeviceEnvironments) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(rootDeviceEnvironments); DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleRootDevices.set(rootDeviceEnvironments); hwInfo = nullptr; DeviceFactory::prepareDeviceEnvironments(*executionEnvironment); hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo(); executionEnvironment->initializeMemoryManager(); return executionEnvironment; } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/helpers/execution_environment_helper.h000066400000000000000000000005731363734646600314400ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/execution_environment/execution_environment.h" #include "CL/cl.h" #include namespace NEO { struct HardwareInfo; ExecutionEnvironment *getExecutionEnvironmentImpl(HardwareInfo *&hwInfo, uint32_t rootDeviceEnvironments); } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/helpers/extendable_enum_tests.cpp000066400000000000000000000020131363734646600303550ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/extendable_enum.h" #include "gtest/gtest.h" namespace ExtendableEnumTest { struct Type : ExtendableEnum { public: constexpr Type(uint32_t val) : ExtendableEnum(val) {} }; constexpr Type testEnum0{0}; constexpr Type testEnum1{1}; } // namespace ExtendableEnumTest TEST(ExtendableEnumTest, givenExtendableEnumWhenValuesAreCheckedThenCorrectValuesAreCorrect) { EXPECT_EQ(0u, ExtendableEnumTest::testEnum0); EXPECT_EQ(1u, ExtendableEnumTest::testEnum1); } TEST(ExtendableEnumTest, givenExtendableEnumVariableWhenValueIsAssignedThenCorrectValueIsStored) { ExtendableEnumTest::Type enumVal0 = ExtendableEnumTest::testEnum0; EXPECT_EQ(ExtendableEnumTest::testEnum0, enumVal0); } namespace ExtendableEnumTest { constexpr Type testEnum2{2}; } TEST(ExtendableEnumTest, givenExtendableEnumWhenNewValuesAreAddedThenCorrectValuesAreAssigned) { EXPECT_EQ(2u, ExtendableEnumTest::testEnum2); } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/flush_stamp_tests.cpp000066400000000000000000000076011363734646600275530ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/flush_stamp.h" #include "gtest/gtest.h" using namespace NEO; TEST(FlushStampTest, referenceTrackedFlushStamp) { FlushStampTracker *flushStampTracker = new FlushStampTracker(true); auto flushStampSharedHandle = flushStampTracker->getStampReference(); ASSERT_NE(nullptr, flushStampSharedHandle); EXPECT_EQ(1, flushStampSharedHandle->getRefInternalCount()); EXPECT_EQ(0, flushStampSharedHandle->getRefApiCount()); flushStampSharedHandle->incRefInternal(); EXPECT_EQ(2, flushStampSharedHandle->getRefInternalCount()); delete flushStampTracker; EXPECT_EQ(1, flushStampSharedHandle->getRefInternalCount()); flushStampSharedHandle->decRefInternal(); } TEST(FlushStampTest, dontAllocateStamp) { FlushStampTracker flushStampTracker(false); EXPECT_EQ(nullptr, flushStampTracker.getStampReference()); } TEST(FlushStampTest, updateStampValue) { FlushStampTracker flushStampTracker(true); FlushStamp flushStamp = 0; EXPECT_EQ(flushStamp, flushStampTracker.peekStamp()); flushStamp = 2; flushStampTracker.setStamp(flushStamp); EXPECT_EQ(flushStamp, flushStampTracker.peekStamp()); } TEST(FlushStampTest, handleStampObjReplacing) { FlushStampTracker flushStampTracker(true); EXPECT_EQ(1, flushStampTracker.getStampReference()->getRefInternalCount()); //obj to release auto stampObj = new FlushStampTrackingObj(); EXPECT_EQ(0, stampObj->getRefInternalCount()); // no owner flushStampTracker.replaceStampObject(stampObj); EXPECT_EQ(stampObj, flushStampTracker.getStampReference()); EXPECT_EQ(1, stampObj->getRefInternalCount()); } TEST(FlushStampTest, ignoreNullptrReplace) { FlushStampTracker flushStampTracker(true); auto currentObj = flushStampTracker.getStampReference(); flushStampTracker.replaceStampObject(nullptr); EXPECT_EQ(currentObj, flushStampTracker.getStampReference()); } TEST(FlushStampUpdateHelperTest, manageRefCounts) { FlushStampTrackingObj obj1, obj2; { FlushStampUpdateHelper updater; obj1.incRefInternal(); obj2.incRefInternal(); EXPECT_EQ(1, obj1.getRefInternalCount()); EXPECT_EQ(1, obj2.getRefInternalCount()); updater.insert(&obj1); updater.insert(&obj2); EXPECT_EQ(1, obj1.getRefInternalCount()); EXPECT_EQ(1, obj2.getRefInternalCount()); } EXPECT_EQ(1, obj1.getRefInternalCount()); EXPECT_EQ(1, obj2.getRefInternalCount()); } TEST(FlushStampUpdateHelperTest, multipleInserts) { FlushStampTrackingObj obj1; { FlushStampUpdateHelper updater; obj1.incRefInternal(); EXPECT_EQ(1, obj1.getRefInternalCount()); updater.insert(&obj1); updater.insert(&obj1); EXPECT_EQ(2u, updater.size()); obj1.incRefInternal(); updater.insert(&obj1); updater.insert(&obj1); EXPECT_EQ(4u, updater.size()); EXPECT_EQ(2, obj1.getRefInternalCount()); } EXPECT_EQ(2, obj1.getRefInternalCount()); obj1.decRefInternal(); } TEST(FlushStampUpdateHelperTest, ignoreNullptr) { FlushStampUpdateHelper updater; updater.insert(nullptr); EXPECT_EQ(0u, updater.size()); } TEST(FlushStampUpdateHelperTest, givenUninitializedFlushStampWhenUpdateAllIsCalledThenItIsUpdated) { FlushStampUpdateHelper updater; FlushStampTrackingObj obj1; updater.insert(&obj1); FlushStamp flushStampToUpdate = 2; updater.updateAll(flushStampToUpdate); EXPECT_EQ(flushStampToUpdate, obj1.flushStamp); EXPECT_TRUE(obj1.initialized); } TEST(FlushStampUpdateHelperTest, givenFlushStampWhenSetStampWithZeroIsCalledThenFlushStampIsNotInitialized) { FlushStamp zeroStamp = 0; FlushStampTracker tracker(true); tracker.setStamp(zeroStamp); EXPECT_FALSE(tracker.getStampReference()->initialized); } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/get_gpgpu_engines_tests.inl000066400000000000000000000011101363734646600307040ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "test.h" using namespace NEO; template void whenGetGpgpuEnginesThenReturnTwoRcsEngines(const HardwareInfo &hwInfo) { auto gpgpuEngines = HwHelperHw::get().getGpgpuEngineInstances(hwInfo); EXPECT_EQ(3u, gpgpuEngines.size()); EXPECT_EQ(aub_stream::ENGINE_RCS, gpgpuEngines[0]); EXPECT_EQ(aub_stream::ENGINE_RCS, gpgpuEngines[1]); EXPECT_EQ(aub_stream::ENGINE_RCS, gpgpuEngines[2]); } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/get_info_status_mapper_tests.cpp000066400000000000000000000015311363734646600317630ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/get_info_status_mapper.h" #include "gtest/gtest.h" TEST(getInfoStatusMapper, GivenValidGetInfoStatusReturnExpectedCLCode) { auto getInfoStatus = changeGetInfoStatusToCLResultType(GetInfoStatus::SUCCESS); EXPECT_EQ(CL_SUCCESS, getInfoStatus); getInfoStatus = changeGetInfoStatusToCLResultType(GetInfoStatus::INVALID_CONTEXT); EXPECT_EQ(CL_INVALID_CONTEXT, getInfoStatus); getInfoStatus = changeGetInfoStatusToCLResultType(GetInfoStatus::INVALID_VALUE); EXPECT_EQ(CL_INVALID_VALUE, getInfoStatus); } TEST(getInfoStatusMapper, GivenInvalidGetStatusReturnCLInvalidValue) { auto getInfoStatus = changeGetInfoStatusToCLResultType(static_cast(1)); EXPECT_EQ(CL_INVALID_VALUE, getInfoStatus); } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/get_info_tests.cpp000066400000000000000000000046631363734646600270250ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/get_info.h" #include "gtest/gtest.h" TEST(getInfo, valid_params_returnsSuccess) { float dest = 0.0f; float src = 1.0f; auto retVal = getInfo(&dest, sizeof(dest), &src, sizeof(src)); EXPECT_EQ(GetInfoStatus::SUCCESS, retVal); EXPECT_EQ(src, dest); } TEST(getInfo, null_param_and_param_size_too_small_returnsSuccess) { float dest = 0.0f; float src = 1.0f; auto retVal = getInfo(nullptr, 0, &src, sizeof(src)); EXPECT_EQ(GetInfoStatus::SUCCESS, retVal); EXPECT_NE(src, dest); } TEST(getInfo, GivenNullPtrAsValueAndNonZeroSizeWhenAskedForGetInfoThenSuccessIsReturned) { float dest = 0.0f; float src = 1.0f; auto retVal = getInfo(nullptr, sizeof(dest), &src, sizeof(src)); EXPECT_EQ(GetInfoStatus::SUCCESS, retVal); EXPECT_NE(src, dest); } TEST(getInfo, param_size_too_small_returnsError) { float dest = 0.0f; float src = 1.0f; auto retVal = getInfo(&dest, 0, &src, sizeof(src)); EXPECT_EQ(GetInfoStatus::INVALID_VALUE, retVal); EXPECT_NE(src, dest); } TEST(getInfo, null_src_param_returnsError) { float dest = 0.0f; float src = 1.0f; auto retVal = getInfo(&dest, sizeof(dest), nullptr, sizeof(src)); EXPECT_EQ(GetInfoStatus::INVALID_VALUE, retVal); EXPECT_NE(src, dest); } TEST(getInfo, zero_src_param_size_returnsError) { float dest = 0.0f; float src = 1.0f; auto retVal = getInfo(&dest, sizeof(dest), &src, 0); EXPECT_EQ(GetInfoStatus::INVALID_VALUE, retVal); EXPECT_NE(src, dest); } TEST(getInfoHelper, GivenInstanceOfGetInfoHelperAndNullPtrParamsSuccessIsReturned) { GetInfoStatus retVal; GetInfoHelper info(nullptr, 0, nullptr, &retVal); info.set(1); EXPECT_EQ(GetInfoStatus::SUCCESS, retVal); } TEST(getInfoHelper, staticSetter) { uint32_t *getValue = nullptr; uint32_t expectedValue = 1; GetInfoHelper::set(getValue, expectedValue); EXPECT_EQ(nullptr, getValue); getValue = new uint32_t(0); GetInfoHelper::set(getValue, expectedValue); ASSERT_NE(nullptr, getValue); EXPECT_EQ(*getValue, expectedValue); delete getValue; } TEST(errorCodeHelper, localVariable) { int errCode = 0; ErrorCodeHelper err(&errCode, 1); EXPECT_EQ(1, errCode); EXPECT_EQ(1, err.localErrcode); err.set(2); EXPECT_EQ(2, errCode); EXPECT_EQ(2, err.localErrcode); } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/gtest_helpers.h000066400000000000000000000014511363734646600263160ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #define EXPECT_EQ_VAL(a, b) \ { \ auto evalA = (a); \ auto evalB = (b); \ EXPECT_EQ(evalA, evalB); \ } #define EXPECT_NE_VAL(a, b) \ { \ auto evalA = (a); \ auto evalB = (b); \ EXPECT_NE(evalA, evalB); \ } #define EXPECT_GT_VAL(a, b) \ { \ auto evalA = (a); \ auto evalB = (b); \ EXPECT_GT(evalA, evalB); \ } #define EXPECT_EQ_CONST(a, b) \ { \ decltype(b) expected = (a); \ EXPECT_EQ_VAL(expected, b); \ } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp000066400000000000000000002144721363734646600322510ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/helpers/hardware_commands_helper_tests.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/api/api.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/fixtures/execution_model_kernel_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" using namespace NEO; void HardwareCommandsTest::SetUp() { DeviceFixture::SetUp(); ASSERT_NE(nullptr, pClDevice); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); ASSERT_NE(nullptr, pContext); BuiltInFixture::SetUp(pDevice); ASSERT_NE(nullptr, pBuiltIns); mockKernelWithInternal = std::make_unique(*pClDevice, pContext); } void HardwareCommandsTest::TearDown() { mockKernelWithInternal.reset(nullptr); BuiltInFixture::TearDown(); ContextFixture::TearDown(); DeviceFixture::TearDown(); } void HardwareCommandsTest::addSpaceForSingleKernelArg() { kernelArguments.resize(1); kernelArguments[0] = kernelArgInfo; mockKernelWithInternal->kernelInfo.resizeKernelArgInfoAndRegisterParameter(1); mockKernelWithInternal->kernelInfo.kernelArgInfo.resize(1); mockKernelWithInternal->kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector.resize(1); mockKernelWithInternal->kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = 0; mockKernelWithInternal->kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].size = sizeof(uintptr_t); mockKernelWithInternal->mockKernel->setKernelArguments(kernelArguments); mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush.resize(1); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, programInterfaceDescriptorDataResourceUsage) { CommandQueueHw cmdQ(pContext, pClDevice, 0, false); std::unique_ptr srcImage(Image2dHelper<>::create(pContext)); ASSERT_NE(nullptr, srcImage.get()); std::unique_ptr dstImage(Image2dHelper<>::create(pContext)); ASSERT_NE(nullptr, dstImage.get()); MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImageToImage3d, cmdQ.getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcImage.get(); dc.dstMemObj = dstImage.get(); dc.srcOffset = {0, 0, 0}; dc.dstOffset = {0, 0, 0}; dc.size = {1, 1, 1}; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; auto &indirectHeap = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192); auto usedIndirectHeapBefore = indirectHeap.getUsed(); indirectHeap.getSpace(sizeof(INTERFACE_DESCRIPTOR_DATA)); size_t crossThreadDataSize = kernel->getCrossThreadDataSize(); HardwareCommandsHelper::sendInterfaceDescriptorData( indirectHeap, 0, 0, crossThreadDataSize, 64, 0, 0, 0, 1, *kernel, 0, pDevice->getPreemptionMode(), nullptr); auto usedIndirectHeapAfter = indirectHeap.getUsed(); EXPECT_EQ(sizeof(INTERFACE_DESCRIPTOR_DATA), usedIndirectHeapAfter - usedIndirectHeapBefore); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, programMediaInterfaceDescriptorLoadResourceUsage) { CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; typedef typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD MEDIA_INTERFACE_DESCRIPTOR_LOAD; typedef typename FamilyType::MEDIA_STATE_FLUSH MEDIA_STATE_FLUSH; auto &commandStream = cmdQ.getCS(1024); auto usedBefore = commandStream.getUsed(); HardwareCommandsHelper::sendMediaInterfaceDescriptorLoad(commandStream, 0, sizeof(INTERFACE_DESCRIPTOR_DATA)); auto usedAfter = commandStream.getUsed(); EXPECT_EQ(sizeof(MEDIA_INTERFACE_DESCRIPTOR_LOAD) + sizeof(MEDIA_STATE_FLUSH), usedAfter - usedBefore); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, programMediaStateFlushResourceUsage) { CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; typedef typename FamilyType::MEDIA_STATE_FLUSH MEDIA_STATE_FLUSH; auto &commandStream = cmdQ.getCS(1024); auto usedBefore = commandStream.getUsed(); HardwareCommandsHelper::sendMediaStateFlush(commandStream, sizeof(INTERFACE_DESCRIPTOR_DATA)); auto usedAfter = commandStream.getUsed(); EXPECT_EQ(sizeof(MEDIA_STATE_FLUSH), usedAfter - usedBefore); } HWTEST_F(HardwareCommandsTest, sendCrossThreadDataResourceUsage) { CommandQueueHw cmdQ(pContext, pClDevice, 0, false); std::unique_ptr srcImage(Image2dHelper<>::create(pContext)); ASSERT_NE(nullptr, srcImage.get()); std::unique_ptr dstImage(Image2dHelper<>::create(pContext)); ASSERT_NE(nullptr, dstImage.get()); MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImageToImage3d, cmdQ.getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcImage.get(); dc.dstMemObj = dstImage.get(); dc.srcOffset = {0, 0, 0}; dc.dstOffset = {0, 0, 0}; dc.size = {1, 1, 1}; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); auto &indirectHeap = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192); auto usedBefore = indirectHeap.getUsed(); auto sizeCrossThreadData = kernel->getCrossThreadDataSize(); HardwareCommandsHelper::sendCrossThreadData( indirectHeap, *kernel, false, nullptr, sizeCrossThreadData); auto usedAfter = indirectHeap.getUsed(); EXPECT_EQ(kernel->getCrossThreadDataSize(), usedAfter - usedBefore); } HWTEST_F(HardwareCommandsTest, givenSendCrossThreadDataWhenWhenAddPatchInfoCommentsForAUBDumpIsNotSetThenAddPatchInfoDataOffsetsAreNotMoved) { CommandQueueHw cmdQ(pContext, pClDevice, 0, false); MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false, pDevice); auto kernelInfo = std::make_unique(); std::unique_ptr kernel(new MockKernel(&program, *kernelInfo, *pClDevice)); auto &indirectHeap = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192); PatchInfoData patchInfoData = {0xaaaaaaaa, 0, PatchInfoAllocationType::KernelArg, 0xbbbbbbbb, 0, PatchInfoAllocationType::IndirectObjectHeap}; kernel->getPatchInfoDataList().push_back(patchInfoData); auto sizeCrossThreadData = kernel->getCrossThreadDataSize(); HardwareCommandsHelper::sendCrossThreadData( indirectHeap, *kernel, false, nullptr, sizeCrossThreadData); ASSERT_EQ(1u, kernel->getPatchInfoDataList().size()); EXPECT_EQ(0xaaaaaaaa, kernel->getPatchInfoDataList()[0].sourceAllocation); EXPECT_EQ(0u, kernel->getPatchInfoDataList()[0].sourceAllocationOffset); EXPECT_EQ(PatchInfoAllocationType::KernelArg, kernel->getPatchInfoDataList()[0].sourceType); EXPECT_EQ(0xbbbbbbbb, kernel->getPatchInfoDataList()[0].targetAllocation); EXPECT_EQ(0u, kernel->getPatchInfoDataList()[0].targetAllocationOffset); EXPECT_EQ(PatchInfoAllocationType::IndirectObjectHeap, kernel->getPatchInfoDataList()[0].targetType); } HWTEST_F(HardwareCommandsTest, givenIndirectHeapNotAllocatedFromInternalPoolWhenSendCrossThreadDataIsCalledThenOffsetZeroIsReturned) { auto nonInternalAllocation = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); IndirectHeap indirectHeap(nonInternalAllocation, false); auto sizeCrossThreadData = mockKernelWithInternal->mockKernel->getCrossThreadDataSize(); auto offset = HardwareCommandsHelper::sendCrossThreadData( indirectHeap, *mockKernelWithInternal->mockKernel, false, nullptr, sizeCrossThreadData); EXPECT_EQ(0u, offset); pDevice->getMemoryManager()->freeGraphicsMemory(nonInternalAllocation); } HWTEST_F(HardwareCommandsTest, givenIndirectHeapAllocatedFromInternalPoolWhenSendCrossThreadDataIsCalledThenHeapBaseOffsetIsReturned) { auto internalAllocation = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::INTERNAL_HEAP)); IndirectHeap indirectHeap(internalAllocation, true); auto expectedOffset = internalAllocation->getGpuAddressToPatch(); auto sizeCrossThreadData = mockKernelWithInternal->mockKernel->getCrossThreadDataSize(); auto offset = HardwareCommandsHelper::sendCrossThreadData( indirectHeap, *mockKernelWithInternal->mockKernel, false, nullptr, sizeCrossThreadData); EXPECT_EQ(expectedOffset, offset); pDevice->getMemoryManager()->freeGraphicsMemory(internalAllocation); } HWTEST_F(HardwareCommandsTest, givenSendCrossThreadDataWhenWhenAddPatchInfoCommentsForAUBDumpIsSetThenAddPatchInfoDataOffsetsAreMoved) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); CommandQueueHw cmdQ(pContext, pClDevice, 0, false); MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false, pDevice); auto kernelInfo = std::make_unique(); std::unique_ptr kernel(new MockKernel(&program, *kernelInfo, *pClDevice)); auto &indirectHeap = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192); indirectHeap.getSpace(128u); PatchInfoData patchInfoData1 = {0xaaaaaaaa, 0, PatchInfoAllocationType::KernelArg, 0xbbbbbbbb, 0, PatchInfoAllocationType::IndirectObjectHeap}; PatchInfoData patchInfoData2 = {0xcccccccc, 0, PatchInfoAllocationType::IndirectObjectHeap, 0xdddddddd, 0, PatchInfoAllocationType::Default}; kernel->getPatchInfoDataList().push_back(patchInfoData1); kernel->getPatchInfoDataList().push_back(patchInfoData2); auto sizeCrossThreadData = kernel->getCrossThreadDataSize(); auto offsetCrossThreadData = HardwareCommandsHelper::sendCrossThreadData( indirectHeap, *kernel, false, nullptr, sizeCrossThreadData); ASSERT_NE(0u, offsetCrossThreadData); EXPECT_EQ(128u, offsetCrossThreadData); ASSERT_EQ(2u, kernel->getPatchInfoDataList().size()); EXPECT_EQ(0xaaaaaaaa, kernel->getPatchInfoDataList()[0].sourceAllocation); EXPECT_EQ(0u, kernel->getPatchInfoDataList()[0].sourceAllocationOffset); EXPECT_EQ(PatchInfoAllocationType::KernelArg, kernel->getPatchInfoDataList()[0].sourceType); EXPECT_NE(0xbbbbbbbb, kernel->getPatchInfoDataList()[0].targetAllocation); EXPECT_EQ(indirectHeap.getGraphicsAllocation()->getGpuAddress(), kernel->getPatchInfoDataList()[0].targetAllocation); EXPECT_NE(0u, kernel->getPatchInfoDataList()[0].targetAllocationOffset); EXPECT_EQ(offsetCrossThreadData, kernel->getPatchInfoDataList()[0].targetAllocationOffset); EXPECT_EQ(PatchInfoAllocationType::IndirectObjectHeap, kernel->getPatchInfoDataList()[0].targetType); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, sendIndirectStateResourceUsage) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; CommandQueueHw cmdQ(pContext, pClDevice, 0, false); std::unique_ptr srcImage(Image2dHelper<>::create(pContext)); ASSERT_NE(nullptr, srcImage.get()); std::unique_ptr dstImage(Image2dHelper<>::create(pContext)); ASSERT_NE(nullptr, dstImage.get()); MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImageToImage3d, cmdQ.getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcMemObj = srcImage.get(); dc.dstMemObj = dstImage.get(); dc.srcOffset = {0, 0, 0}; dc.dstOffset = {0, 0, 0}; dc.size = {1, 1, 1}; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); const size_t localWorkSize = 256; const size_t localWorkSizes[3]{localWorkSize, 1, 1}; auto &commandStream = cmdQ.getCS(1024); auto pWalkerCmd = static_cast(commandStream.getSpace(sizeof(GPGPU_WALKER))); *pWalkerCmd = FamilyType::cmdInitGpgpuWalker; auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192); auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192); auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 8192); auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeDSH = dsh.getUsed(); auto usedBeforeIOH = ioh.getUsed(); auto usedBeforeSSH = ssh.getUsed(); dsh.align(HardwareCommandsHelper::alignInterfaceDescriptorData); size_t IDToffset = dsh.getUsed(); dsh.getSpace(sizeof(INTERFACE_DESCRIPTOR_DATA)); HardwareCommandsHelper::sendMediaInterfaceDescriptorLoad( commandStream, IDToffset, sizeof(INTERFACE_DESCRIPTOR_DATA)); uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*kernel); HardwareCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, *kernel, kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), kernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, IDToffset, interfaceDescriptorIndex, pDevice->getPreemptionMode(), pWalkerCmd, nullptr, true); // It's okay these are EXPECT_GE as they're only going to be used for // estimation purposes to avoid OOM. auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); auto sizeRequiredDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); auto sizeRequiredIOH = HardwareCommandsHelper::getSizeRequiredIOH(*kernel, localWorkSize); auto sizeRequiredSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel); EXPECT_GE(sizeRequiredDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(sizeRequiredIOH, usedAfterIOH - usedBeforeIOH); EXPECT_GE(sizeRequiredSSH, usedAfterSSH - usedBeforeSSH); auto usedAfterCS = commandStream.getUsed(); EXPECT_GE(HardwareCommandsHelper::getSizeRequiredCS(kernel), usedAfterCS - usedBeforeCS); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWithFourBindingTableEntriesWhenIndirectStateIsEmittedThenInterfaceDescriptorContainsCorrectBindingTableEntryCount) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; CommandQueueHw cmdQ(pContext, pClDevice, 0, false); auto &commandStream = cmdQ.getCS(1024); auto pWalkerCmd = static_cast(commandStream.getSpace(sizeof(GPGPU_WALKER))); *pWalkerCmd = FamilyType::cmdInitGpgpuWalker; auto expectedBindingTableCount = 3u; mockKernelWithInternal->mockKernel->numberOfBindingTableStates = expectedBindingTableCount; auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192); auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192); auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 8192); const size_t localWorkSize = 256; const size_t localWorkSizes[3]{localWorkSize, 1, 1}; uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel); HardwareCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, *mockKernelWithInternal->mockKernel, mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, interfaceDescriptorIndex, pDevice->getPreemptionMode(), pWalkerCmd, nullptr, true); auto interfaceDescriptor = reinterpret_cast(dsh.getCpuBase()); if (HardwareCommandsHelper::doBindingTablePrefetch()) { EXPECT_EQ(expectedBindingTableCount, interfaceDescriptor->getBindingTableEntryCount()); } else { EXPECT_EQ(0u, interfaceDescriptor->getBindingTableEntryCount()); } } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelThatIsSchedulerWhenIndirectStateIsEmittedThenInterfaceDescriptorContainsZeroBindingTableEntryCount) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; CommandQueueHw cmdQ(pContext, pClDevice, 0, false); auto &commandStream = cmdQ.getCS(1024); auto pWalkerCmd = static_cast(commandStream.getSpace(sizeof(GPGPU_WALKER))); *pWalkerCmd = FamilyType::cmdInitGpgpuWalker; auto expectedBindingTableCount = 3u; mockKernelWithInternal->mockKernel->numberOfBindingTableStates = expectedBindingTableCount; auto isScheduler = const_cast(&mockKernelWithInternal->mockKernel->isSchedulerKernel); *isScheduler = true; auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192); auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192); auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 8192); const size_t localWorkSize = 256; const size_t localWorkSizes[3]{localWorkSize, 1, 1}; uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel); HardwareCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, *mockKernelWithInternal->mockKernel, mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, interfaceDescriptorIndex, pDevice->getPreemptionMode(), pWalkerCmd, nullptr, true); auto interfaceDescriptor = reinterpret_cast(dsh.getCpuBase()); EXPECT_EQ(0u, interfaceDescriptor->getBindingTableEntryCount()); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWith100BindingTableEntriesWhenIndirectStateIsEmittedThenInterfaceDescriptorHas31BindingTableEntriesSet) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; CommandQueueHw cmdQ(pContext, pClDevice, 0, false); auto &commandStream = cmdQ.getCS(1024); auto pWalkerCmd = static_cast(commandStream.getSpace(sizeof(GPGPU_WALKER))); *pWalkerCmd = FamilyType::cmdInitGpgpuWalker; auto expectedBindingTableCount = 100u; mockKernelWithInternal->mockKernel->numberOfBindingTableStates = expectedBindingTableCount; auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192); auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192); auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 8192); const size_t localWorkSize = 256; const size_t localWorkSizes[3]{localWorkSize, 1, 1}; uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel); HardwareCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, *mockKernelWithInternal->mockKernel, mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, interfaceDescriptorIndex, pDevice->getPreemptionMode(), pWalkerCmd, nullptr, true); auto interfaceDescriptor = reinterpret_cast(dsh.getCpuBase()); if (HardwareCommandsHelper::doBindingTablePrefetch()) { EXPECT_EQ(31u, interfaceDescriptor->getBindingTableEntryCount()); } else { EXPECT_EQ(0u, interfaceDescriptor->getBindingTableEntryCount()); } } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKernelsWalkOrderIsTakenIntoAccount) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; CommandQueueHw cmdQ(pContext, pClDevice, 0, false); std::unique_ptr img(Image2dHelper<>::create(pContext)); MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImageToImage3d, cmdQ.getDevice()); BuiltinOpParams dc; dc.srcMemObj = img.get(); dc.dstMemObj = img.get(); dc.size = {1, 1, 1}; builder.buildDispatchInfos(multiDispatchInfo, dc); ASSERT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); const size_t localWorkSizeX = 2; const size_t localWorkSizeY = 3; const size_t localWorkSizeZ = 4; const size_t localWorkSizes[3]{localWorkSizeX, localWorkSizeY, localWorkSizeZ}; auto &commandStream = cmdQ.getCS(1024); auto pWalkerCmd = static_cast(commandStream.getSpace(sizeof(GPGPU_WALKER))); *pWalkerCmd = FamilyType::cmdInitGpgpuWalker; auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192); auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192); auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 8192); dsh.align(HardwareCommandsHelper::alignInterfaceDescriptorData); size_t IDToffset = dsh.getUsed(); dsh.getSpace(sizeof(INTERFACE_DESCRIPTOR_DATA)); KernelInfo modifiedKernelInfo = {}; modifiedKernelInfo.patchInfo = kernel->getKernelInfo().patchInfo; modifiedKernelInfo.workgroupWalkOrder[0] = 2; modifiedKernelInfo.workgroupWalkOrder[1] = 1; modifiedKernelInfo.workgroupWalkOrder[2] = 0; modifiedKernelInfo.workgroupDimensionsOrder[0] = 2; modifiedKernelInfo.workgroupDimensionsOrder[1] = 1; modifiedKernelInfo.workgroupDimensionsOrder[2] = 0; MockKernel mockKernel{kernel->getProgram(), modifiedKernelInfo, kernel->getDevice(), false}; uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(mockKernel); HardwareCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, mockKernel, mockKernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), modifiedKernelInfo.getMaxSimdSize(), localWorkSizes, IDToffset, interfaceDescriptorIndex, pDevice->getPreemptionMode(), pWalkerCmd, nullptr, true); size_t numThreads = localWorkSizeX * localWorkSizeY * localWorkSizeZ; numThreads = Math::divideAndRoundUp(numThreads, modifiedKernelInfo.getMaxSimdSize()); size_t expectedIohSize = ((modifiedKernelInfo.getMaxSimdSize() == 32) ? 32 : 16) * 3 * numThreads * sizeof(uint16_t); ASSERT_LE(expectedIohSize, ioh.getUsed()); auto expectedLocalIds = alignedMalloc(expectedIohSize, 64); uint32_t grfSize = sizeof(typename FamilyType::GRF); generateLocalIDs(expectedLocalIds, modifiedKernelInfo.getMaxSimdSize(), std::array{{localWorkSizeX, localWorkSizeY, localWorkSizeZ}}, std::array{{modifiedKernelInfo.workgroupDimensionsOrder[0], modifiedKernelInfo.workgroupDimensionsOrder[1], modifiedKernelInfo.workgroupDimensionsOrder[2]}}, false, grfSize); EXPECT_EQ(0, memcmp(expectedLocalIds, ioh.getCpuBase(), expectedIohSize)); alignedFree(expectedLocalIds); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, usedBindingTableStatePointer) { typedef typename FamilyType::BINDING_TABLE_STATE BINDING_TABLE_STATE; typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; CommandQueueHw cmdQ(pContext, pClDevice, 0, false); std::unique_ptr dstImage(Image2dHelper<>::create(pContext)); ASSERT_NE(nullptr, dstImage.get()); MultiDispatchInfo multiDispatchInfo; auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToImage3d, cmdQ.getDevice()); ASSERT_NE(nullptr, &builder); BuiltinOpParams dc; dc.srcPtr = nullptr; dc.dstMemObj = dstImage.get(); dc.dstOffset = {0, 0, 0}; dc.size = {1, 1, 1}; dc.dstRowPitch = 0; dc.dstSlicePitch = 0; builder.buildDispatchInfos(multiDispatchInfo, dc); EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); const size_t localWorkSizes[3]{256, 1, 1}; auto &commandStream = cmdQ.getCS(1024); auto pWalkerCmd = static_cast(commandStream.getSpace(sizeof(GPGPU_WALKER))); *pWalkerCmd = FamilyType::cmdInitGpgpuWalker; auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192); auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192); auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 8192); // Obtain where the pointers will be stored const auto &kernelInfo = kernel->getKernelInfo(); auto numSurfaceStates = kernelInfo.patchInfo.statelessGlobalMemObjKernelArgs.size() + kernelInfo.patchInfo.imageMemObjKernelArgs.size(); EXPECT_EQ(2u, numSurfaceStates); size_t bindingTableStateSize = numSurfaceStates * sizeof(RENDER_SURFACE_STATE); uint32_t *bindingTableStatesPointers = reinterpret_cast( reinterpret_cast(ssh.getCpuBase()) + ssh.getUsed() + bindingTableStateSize); for (auto i = 0u; i < numSurfaceStates; i++) { *(&bindingTableStatesPointers[i]) = 0xDEADBEEF; } // force statefull path for buffers const_cast(kernelInfo).requiresSshForBuffers = true; uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*kernel); HardwareCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, *kernel, kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), kernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, interfaceDescriptorIndex, pDevice->getPreemptionMode(), pWalkerCmd, nullptr, true); EXPECT_EQ(0x00000000u, *(&bindingTableStatesPointers[0])); EXPECT_EQ(0x00000040u, *(&bindingTableStatesPointers[1])); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, usedBindingTableStatePointersForGlobalAndConstantAndPrivateAndEventPoolAndDefaultCommandQueueSurfaces) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // define patch offsets for global, constant, private, event pool and default device queue surfaces SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization AllocateStatelessGlobalMemorySurfaceWithInitialization; AllocateStatelessGlobalMemorySurfaceWithInitialization.GlobalBufferIndex = 0; AllocateStatelessGlobalMemorySurfaceWithInitialization.SurfaceStateHeapOffset = 0; AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamOffset = 0; AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = &AllocateStatelessGlobalMemorySurfaceWithInitialization; SPatchAllocateStatelessConstantMemorySurfaceWithInitialization AllocateStatelessConstantMemorySurfaceWithInitialization; AllocateStatelessConstantMemorySurfaceWithInitialization.ConstantBufferIndex = 0; AllocateStatelessConstantMemorySurfaceWithInitialization.SurfaceStateHeapOffset = 64; AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamOffset = 8; AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = &AllocateStatelessConstantMemorySurfaceWithInitialization; SPatchAllocateStatelessPrivateSurface AllocateStatelessPrivateMemorySurface; AllocateStatelessPrivateMemorySurface.PerThreadPrivateMemorySize = 32; AllocateStatelessPrivateMemorySurface.SurfaceStateHeapOffset = 128; AllocateStatelessPrivateMemorySurface.DataParamOffset = 16; AllocateStatelessPrivateMemorySurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &AllocateStatelessPrivateMemorySurface; SPatchAllocateStatelessEventPoolSurface AllocateStatelessEventPoolSurface; AllocateStatelessEventPoolSurface.SurfaceStateHeapOffset = 192; AllocateStatelessEventPoolSurface.DataParamOffset = 24; AllocateStatelessEventPoolSurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface = &AllocateStatelessEventPoolSurface; SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface; AllocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset = 256; AllocateStatelessDefaultDeviceQueueSurface.DataParamOffset = 32; AllocateStatelessDefaultDeviceQueueSurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = &AllocateStatelessDefaultDeviceQueueSurface; // create program with valid context MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false, pDevice); // setup global memory char globalBuffer[16]; GraphicsAllocation gfxGlobalAlloc(0, GraphicsAllocation::AllocationType::UNKNOWN, globalBuffer, castToUint64(globalBuffer), 0llu, sizeof(globalBuffer), MemoryPool::MemoryNull); program.setGlobalSurface(&gfxGlobalAlloc); // setup constant memory char constBuffer[16]; GraphicsAllocation gfxConstAlloc(0, GraphicsAllocation::AllocationType::UNKNOWN, constBuffer, castToUint64(constBuffer), 0llu, sizeof(constBuffer), MemoryPool::MemoryNull); program.setConstantSurface(&gfxConstAlloc); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); SKernelBinaryHeaderCommon kernelHeader; // setup surface state heap constexpr uint32_t numSurfaces = 5; constexpr uint32_t sshSize = numSurfaces * sizeof(typename FamilyType::RENDER_SURFACE_STATE) + numSurfaces * sizeof(typename FamilyType::BINDING_TABLE_STATE); unsigned char *surfaceStateHeap = reinterpret_cast(alignedMalloc(sshSize, sizeof(typename FamilyType::RENDER_SURFACE_STATE))); uint32_t btiOffset = static_cast(numSurfaces * sizeof(typename FamilyType::RENDER_SURFACE_STATE)); auto bti = reinterpret_cast(surfaceStateHeap + btiOffset); for (uint32_t i = 0; i < numSurfaces; ++i) { bti[i].setSurfaceStatePointer(i * sizeof(typename FamilyType::RENDER_SURFACE_STATE)); } kernelHeader.SurfaceStateHeapSize = sshSize; // setup kernel heap uint32_t kernelIsa[32]; kernelHeader.KernelHeapSize = sizeof(kernelIsa); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeap = kernelIsa; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; // setup binding table state SPatchBindingTableState bindingTableState; bindingTableState.Token = iOpenCL::PATCH_TOKEN_BINDING_TABLE_STATE; bindingTableState.Size = sizeof(SPatchBindingTableState); bindingTableState.Count = 5; bindingTableState.Offset = btiOffset; bindingTableState.SurfaceStateOffset = 0; pKernelInfo->patchInfo.bindingTableState = &bindingTableState; // setup thread payload SPatchThreadPayload threadPayload; threadPayload.LocalIDXPresent = 1; threadPayload.LocalIDYPresent = 1; threadPayload.LocalIDZPresent = 1; pKernelInfo->patchInfo.threadPayload = &threadPayload; // define stateful path pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; // initialize kernel ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); // setup cross thread data char pCrossThreadData[64]; pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); // try with different offsets to surface state base address for (uint32_t ssbaOffset : {0U, (uint32_t)sizeof(typename FamilyType::RENDER_SURFACE_STATE)}) { CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); auto &commandStream = cmdQ.getCS(1024); auto pWalkerCmd = static_cast(commandStream.getSpace(sizeof(GPGPU_WALKER))); *pWalkerCmd = FamilyType::cmdInitGpgpuWalker; auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192); auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192); auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 8192); // Initialize binding table state pointers with pattern EXPECT_EQ(numSurfaces, pKernel->getNumberOfBindingTableStates()); const size_t localWorkSizes[3]{256, 1, 1}; dsh.getSpace(sizeof(INTERFACE_DESCRIPTOR_DATA)); ssh.getSpace(ssbaOffset); // offset local ssh from surface state base address uint32_t localSshOffset = static_cast(ssh.getUsed()); // push surfaces states and binding table to given ssh heap uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*pKernel); HardwareCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, *pKernel, pKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), pKernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, interfaceDescriptorIndex, pDevice->getPreemptionMode(), pWalkerCmd, nullptr, true); bti = reinterpret_cast(reinterpret_cast(ssh.getCpuBase()) + localSshOffset + btiOffset); for (uint32_t i = 0; i < numSurfaces; ++i) { uint32_t expected = localSshOffset + i * sizeof(typename FamilyType::RENDER_SURFACE_STATE); EXPECT_EQ(expected, bti[i].getSurfaceStatePointer()); } program.setGlobalSurface(nullptr); program.setConstantSurface(nullptr); //exhaust space to trigger reload ssh.getSpace(ssh.getAvailableSpace()); dsh.getSpace(dsh.getAvailableSpace()); } alignedFree(surfaceStateHeap); delete pKernel; } HWTEST_F(HardwareCommandsTest, setBindingTableStatesForKernelWithBuffersNotRequiringSSHDoesNotTouchSSH) { // define kernel info auto pKernelInfo = std::make_unique(); // create program with valid context MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false, pDevice); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap char surfaceStateHeap[256]; SKernelBinaryHeaderCommon kernelHeader; kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; // define stateful path pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = false; SPatchStatelessGlobalMemoryObjectKernelArgument statelessGlobalMemory; statelessGlobalMemory.ArgumentNumber = 0; statelessGlobalMemory.DataParamOffset = 0; statelessGlobalMemory.DataParamSize = 0; statelessGlobalMemory.Size = 0; statelessGlobalMemory.SurfaceStateHeapOffset = 0; pKernelInfo->patchInfo.statelessGlobalMemObjKernelArgs.push_back(&statelessGlobalMemory); // initialize kernel ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 8192); ssh.align(8); auto usedBefore = ssh.getUsed(); // Initialize binding table state pointers with pattern auto numSurfaceStates = pKernel->getNumberOfBindingTableStates(); EXPECT_EQ(0u, numSurfaceStates); // set binding table states auto dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, *pKernel); EXPECT_EQ(0u, dstBindingTablePointer); auto usedAfter = ssh.getUsed(); EXPECT_EQ(usedBefore, usedAfter); ssh.align(8); EXPECT_EQ(usedAfter, ssh.getUsed()); delete pKernel; } HWTEST_F(HardwareCommandsTest, setBindingTableStatesForNoSurfaces) { // define kernel info auto pKernelInfo = std::make_unique(); // create program with valid context MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false, pDevice); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap char surfaceStateHeap[256]; SKernelBinaryHeaderCommon kernelHeader; kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; // define stateful path pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; // initialize kernel ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 8192); // Initialize binding table state pointers with pattern auto numSurfaceStates = pKernel->getNumberOfBindingTableStates(); EXPECT_EQ(0u, numSurfaceStates); auto dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, *pKernel); EXPECT_EQ(0u, dstBindingTablePointer); dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, *pKernel); EXPECT_EQ(0u, dstBindingTablePointer); SPatchBindingTableState bindingTableState; bindingTableState.Token = iOpenCL::PATCH_TOKEN_BINDING_TABLE_STATE; bindingTableState.Size = sizeof(SPatchBindingTableState); bindingTableState.Count = 0; bindingTableState.Offset = 64; bindingTableState.SurfaceStateOffset = 0; pKernelInfo->patchInfo.bindingTableState = &bindingTableState; dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, *pKernel); EXPECT_EQ(0u, dstBindingTablePointer); pKernelInfo->patchInfo.bindingTableState = nullptr; delete pKernel; } HWTEST_F(HardwareCommandsTest, GivenVariousValuesWhenAlignSlmSizeIsCalledThenCorrectValueIsReturned) { if (::renderCoreFamily == IGFX_GEN8_CORE) { EXPECT_EQ(0u, HardwareCommandsHelper::alignSlmSize(0)); EXPECT_EQ(4096u, HardwareCommandsHelper::alignSlmSize(1)); EXPECT_EQ(4096u, HardwareCommandsHelper::alignSlmSize(1024)); EXPECT_EQ(4096u, HardwareCommandsHelper::alignSlmSize(1025)); EXPECT_EQ(4096u, HardwareCommandsHelper::alignSlmSize(2048)); EXPECT_EQ(4096u, HardwareCommandsHelper::alignSlmSize(2049)); EXPECT_EQ(4096u, HardwareCommandsHelper::alignSlmSize(4096)); EXPECT_EQ(8192u, HardwareCommandsHelper::alignSlmSize(4097)); EXPECT_EQ(8192u, HardwareCommandsHelper::alignSlmSize(8192)); EXPECT_EQ(16384u, HardwareCommandsHelper::alignSlmSize(8193)); EXPECT_EQ(16384u, HardwareCommandsHelper::alignSlmSize(12288)); EXPECT_EQ(16384u, HardwareCommandsHelper::alignSlmSize(16384)); EXPECT_EQ(32768u, HardwareCommandsHelper::alignSlmSize(16385)); EXPECT_EQ(32768u, HardwareCommandsHelper::alignSlmSize(24576)); EXPECT_EQ(32768u, HardwareCommandsHelper::alignSlmSize(32768)); EXPECT_EQ(65536u, HardwareCommandsHelper::alignSlmSize(32769)); EXPECT_EQ(65536u, HardwareCommandsHelper::alignSlmSize(49152)); EXPECT_EQ(65536u, HardwareCommandsHelper::alignSlmSize(65535)); EXPECT_EQ(65536u, HardwareCommandsHelper::alignSlmSize(65536)); } else { EXPECT_EQ(0u, HardwareCommandsHelper::alignSlmSize(0)); EXPECT_EQ(1024u, HardwareCommandsHelper::alignSlmSize(1)); EXPECT_EQ(1024u, HardwareCommandsHelper::alignSlmSize(1024)); EXPECT_EQ(2048u, HardwareCommandsHelper::alignSlmSize(1025)); EXPECT_EQ(2048u, HardwareCommandsHelper::alignSlmSize(2048)); EXPECT_EQ(4096u, HardwareCommandsHelper::alignSlmSize(2049)); EXPECT_EQ(4096u, HardwareCommandsHelper::alignSlmSize(4096)); EXPECT_EQ(8192u, HardwareCommandsHelper::alignSlmSize(4097)); EXPECT_EQ(8192u, HardwareCommandsHelper::alignSlmSize(8192)); EXPECT_EQ(16384u, HardwareCommandsHelper::alignSlmSize(8193)); EXPECT_EQ(16384u, HardwareCommandsHelper::alignSlmSize(16384)); EXPECT_EQ(32768u, HardwareCommandsHelper::alignSlmSize(16385)); EXPECT_EQ(32768u, HardwareCommandsHelper::alignSlmSize(32768)); EXPECT_EQ(65536u, HardwareCommandsHelper::alignSlmSize(32769)); EXPECT_EQ(65536u, HardwareCommandsHelper::alignSlmSize(65536)); } } HWTEST_F(HardwareCommandsTest, GivenVariousValuesWhenComputeSlmSizeIsCalledThenCorrectValueIsReturned) { if (::renderCoreFamily == IGFX_GEN8_CORE) { EXPECT_EQ(0u, HardwareCommandsHelper::computeSlmValues(0)); EXPECT_EQ(1u, HardwareCommandsHelper::computeSlmValues(1)); EXPECT_EQ(1u, HardwareCommandsHelper::computeSlmValues(1024)); EXPECT_EQ(1u, HardwareCommandsHelper::computeSlmValues(1025)); EXPECT_EQ(1u, HardwareCommandsHelper::computeSlmValues(2048)); EXPECT_EQ(1u, HardwareCommandsHelper::computeSlmValues(2049)); EXPECT_EQ(1u, HardwareCommandsHelper::computeSlmValues(4096)); EXPECT_EQ(2u, HardwareCommandsHelper::computeSlmValues(4097)); EXPECT_EQ(2u, HardwareCommandsHelper::computeSlmValues(8192)); EXPECT_EQ(4u, HardwareCommandsHelper::computeSlmValues(8193)); EXPECT_EQ(4u, HardwareCommandsHelper::computeSlmValues(12288)); EXPECT_EQ(4u, HardwareCommandsHelper::computeSlmValues(16384)); EXPECT_EQ(8u, HardwareCommandsHelper::computeSlmValues(16385)); EXPECT_EQ(8u, HardwareCommandsHelper::computeSlmValues(24576)); EXPECT_EQ(8u, HardwareCommandsHelper::computeSlmValues(32768)); EXPECT_EQ(16u, HardwareCommandsHelper::computeSlmValues(32769)); EXPECT_EQ(16u, HardwareCommandsHelper::computeSlmValues(49152)); EXPECT_EQ(16u, HardwareCommandsHelper::computeSlmValues(65535)); EXPECT_EQ(16u, HardwareCommandsHelper::computeSlmValues(65536)); } else { EXPECT_EQ(0u, HardwareCommandsHelper::computeSlmValues(0)); EXPECT_EQ(1u, HardwareCommandsHelper::computeSlmValues(1)); EXPECT_EQ(1u, HardwareCommandsHelper::computeSlmValues(1024)); EXPECT_EQ(2u, HardwareCommandsHelper::computeSlmValues(1025)); EXPECT_EQ(2u, HardwareCommandsHelper::computeSlmValues(2048)); EXPECT_EQ(3u, HardwareCommandsHelper::computeSlmValues(2049)); EXPECT_EQ(3u, HardwareCommandsHelper::computeSlmValues(4096)); EXPECT_EQ(4u, HardwareCommandsHelper::computeSlmValues(4097)); EXPECT_EQ(4u, HardwareCommandsHelper::computeSlmValues(8192)); EXPECT_EQ(5u, HardwareCommandsHelper::computeSlmValues(8193)); EXPECT_EQ(5u, HardwareCommandsHelper::computeSlmValues(16384)); EXPECT_EQ(6u, HardwareCommandsHelper::computeSlmValues(16385)); EXPECT_EQ(6u, HardwareCommandsHelper::computeSlmValues(32768)); EXPECT_EQ(7u, HardwareCommandsHelper::computeSlmValues(32769)); EXPECT_EQ(7u, HardwareCommandsHelper::computeSlmValues(65536)); } } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithSamplersWhenIndirectStateIsProgrammedThenBorderColorIsCorrectlyCopiedToDshAndSamplerStatesAreProgrammedWithPointer) { typedef typename FamilyType::BINDING_TABLE_STATE BINDING_TABLE_STATE; typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); const size_t localWorkSizes[3]{1, 1, 1}; auto &commandStream = cmdQ.getCS(1024); auto pWalkerCmd = static_cast(commandStream.getSpace(sizeof(GPGPU_WALKER))); *pWalkerCmd = FamilyType::cmdInitGpgpuWalker; auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192); auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192); auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 8192); const uint32_t borderColorSize = 64; const uint32_t samplerStateSize = sizeof(SAMPLER_STATE) * 2; SPatchSamplerStateArray samplerStateArray; samplerStateArray.BorderColorOffset = 0x0; samplerStateArray.Count = 2; samplerStateArray.Offset = borderColorSize; samplerStateArray.Size = samplerStateSize; samplerStateArray.Token = 1; char *mockDsh = new char[(borderColorSize + samplerStateSize) * 4]; memset(mockDsh, 6, borderColorSize); memset(mockDsh + borderColorSize, 8, borderColorSize); mockKernelWithInternal->kernelInfo.heapInfo.pDsh = mockDsh; mockKernelWithInternal->kernelInfo.patchInfo.samplerStateArray = &samplerStateArray; uint64_t interfaceDescriptorTableOffset = dsh.getUsed(); dsh.getSpace(sizeof(INTERFACE_DESCRIPTOR_DATA)); dsh.getSpace(4); char *initialDshPointer = static_cast(dsh.getCpuBase()) + dsh.getUsed(); char *borderColorPointer = alignUp(initialDshPointer, 64); uint32_t borderColorOffset = static_cast(borderColorPointer - static_cast(dsh.getCpuBase())); SAMPLER_STATE *pSamplerState = reinterpret_cast(mockDsh + borderColorSize); for (uint32_t i = 0; i < 2; i++) { pSamplerState[i].setIndirectStatePointer(0); } mockKernelWithInternal->mockKernel->setCrossThreadData(mockKernelWithInternal->crossThreadData, sizeof(mockKernelWithInternal->crossThreadData)); mockKernelWithInternal->mockKernel->setSshLocal(mockKernelWithInternal->sshLocal, sizeof(mockKernelWithInternal->sshLocal)); uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel); HardwareCommandsHelper::sendIndirectState( commandStream, dsh, ioh, ssh, *mockKernelWithInternal->mockKernel, mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), 8, localWorkSizes, interfaceDescriptorTableOffset, interfaceDescriptorIndex, pDevice->getPreemptionMode(), pWalkerCmd, nullptr, true); bool isMemorySame = memcmp(borderColorPointer, mockDsh, borderColorSize) == 0; EXPECT_TRUE(isMemorySame); SAMPLER_STATE *pSamplerStatesCopied = reinterpret_cast(borderColorPointer + borderColorSize); for (uint32_t i = 0; i < 2; i++) { EXPECT_EQ(pSamplerState[i].getNonNormalizedCoordinateEnable(), pSamplerStatesCopied[i].getNonNormalizedCoordinateEnable()); EXPECT_EQ(pSamplerState[i].getTcxAddressControlMode(), pSamplerStatesCopied[i].getTcxAddressControlMode()); EXPECT_EQ(pSamplerState[i].getTcyAddressControlMode(), pSamplerStatesCopied[i].getTcyAddressControlMode()); EXPECT_EQ(pSamplerState[i].getTczAddressControlMode(), pSamplerStatesCopied[i].getTczAddressControlMode()); EXPECT_EQ(pSamplerState[i].getMinModeFilter(), pSamplerStatesCopied[i].getMinModeFilter()); EXPECT_EQ(pSamplerState[i].getMagModeFilter(), pSamplerStatesCopied[i].getMagModeFilter()); EXPECT_EQ(pSamplerState[i].getMipModeFilter(), pSamplerStatesCopied[i].getMipModeFilter()); EXPECT_EQ(pSamplerState[i].getUAddressMinFilterRoundingEnable(), pSamplerStatesCopied[i].getUAddressMinFilterRoundingEnable()); EXPECT_EQ(pSamplerState[i].getUAddressMagFilterRoundingEnable(), pSamplerStatesCopied[i].getUAddressMagFilterRoundingEnable()); EXPECT_EQ(pSamplerState[i].getVAddressMinFilterRoundingEnable(), pSamplerStatesCopied[i].getVAddressMinFilterRoundingEnable()); EXPECT_EQ(pSamplerState[i].getVAddressMagFilterRoundingEnable(), pSamplerStatesCopied[i].getVAddressMagFilterRoundingEnable()); EXPECT_EQ(pSamplerState[i].getRAddressMagFilterRoundingEnable(), pSamplerStatesCopied[i].getRAddressMagFilterRoundingEnable()); EXPECT_EQ(pSamplerState[i].getRAddressMinFilterRoundingEnable(), pSamplerStatesCopied[i].getRAddressMinFilterRoundingEnable()); EXPECT_EQ(pSamplerState[i].getLodAlgorithm(), pSamplerStatesCopied[i].getLodAlgorithm()); EXPECT_EQ(pSamplerState[i].getTextureLodBias(), pSamplerStatesCopied[i].getTextureLodBias()); EXPECT_EQ(pSamplerState[i].getLodPreclampMode(), pSamplerStatesCopied[i].getLodPreclampMode()); EXPECT_EQ(pSamplerState[i].getTextureBorderColorMode(), pSamplerStatesCopied[i].getTextureBorderColorMode()); EXPECT_EQ(pSamplerState[i].getSamplerDisable(), pSamplerStatesCopied[i].getSamplerDisable()); EXPECT_EQ(pSamplerState[i].getCubeSurfaceControlMode(), pSamplerStatesCopied[i].getCubeSurfaceControlMode()); EXPECT_EQ(pSamplerState[i].getShadowFunction(), pSamplerStatesCopied[i].getShadowFunction()); EXPECT_EQ(pSamplerState[i].getChromakeyMode(), pSamplerStatesCopied[i].getChromakeyMode()); EXPECT_EQ(pSamplerState[i].getChromakeyIndex(), pSamplerStatesCopied[i].getChromakeyIndex()); EXPECT_EQ(pSamplerState[i].getChromakeyEnable(), pSamplerStatesCopied[i].getChromakeyEnable()); EXPECT_EQ(pSamplerState[i].getMaxLod(), pSamplerStatesCopied[i].getMaxLod()); EXPECT_EQ(pSamplerState[i].getMinLod(), pSamplerStatesCopied[i].getMinLod()); EXPECT_EQ(pSamplerState[i].getLodClampMagnificationMode(), pSamplerStatesCopied[i].getLodClampMagnificationMode()); EXPECT_EQ(borderColorOffset, pSamplerStatesCopied[i].getIndirectStatePointer()); } delete[] mockDsh; } using HardwareCommandsHelperTests = ::testing::Test; HWTEST_F(HardwareCommandsHelperTests, givenCompareAddressAndDataWhenProgrammingSemaphoreWaitThenSetupAllFields) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using COMPARE_OPERATION = typename FamilyType::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; uint64_t compareAddress = 0x10000; uint32_t compareData = 1234; uint8_t buffer[1024] = {}; LinearStream cmdStream(buffer, 1024); MI_SEMAPHORE_WAIT referenceCommand = FamilyType::cmdInitMiSemaphoreWait; referenceCommand.setCompareOperation(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); referenceCommand.setSemaphoreDataDword(compareData); referenceCommand.setSemaphoreGraphicsAddress(compareAddress); referenceCommand.setWaitMode(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE); COMPARE_OPERATION compareMode = COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD; HardwareCommandsHelper::programMiSemaphoreWait(cmdStream, compareAddress, compareData, compareMode); EXPECT_EQ(sizeof(MI_SEMAPHORE_WAIT), cmdStream.getUsed()); EXPECT_EQ(0, memcmp(&referenceCommand, buffer, sizeof(MI_SEMAPHORE_WAIT))); } HWTEST_F(HardwareCommandsHelperTests, whenProgrammingMiAtomicThenSetupAllFields) { using MI_ATOMIC = typename FamilyType::MI_ATOMIC; uint64_t writeAddress = 0x10000; auto opcode = MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_DECREMENT; auto dataSize = MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD; uint8_t buffer[1024] = {}; LinearStream cmdStream(buffer, 1024); MI_ATOMIC referenceCommand = FamilyType::cmdInitAtomic; HardwareCommandsHelper::programMiAtomic(referenceCommand, writeAddress, opcode, dataSize); auto miAtomic = HardwareCommandsHelper::programMiAtomic(cmdStream, writeAddress, opcode, dataSize); EXPECT_EQ(sizeof(MI_ATOMIC), cmdStream.getUsed()); EXPECT_EQ(miAtomic, cmdStream.getCpuBase()); EXPECT_EQ(0, memcmp(&referenceCommand, miAtomic, sizeof(MI_ATOMIC))); } typedef ExecutionModelKernelFixture ParentKernelCommandsFromBinaryTest; HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelCommandsFromBinaryTest, getSizeRequiredForExecutionModelForSurfaceStatesReturnsSizeOfBlocksPlusMaxBindingTableSizeForAllIDTEntriesAndSchedulerSSHSize) { using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { EXPECT_TRUE(pKernel->isParentKernel); size_t totalSize = 0; BlockKernelManager *blockManager = pKernel->getProgram()->getBlockKernelManager(); uint32_t blockCount = static_cast(blockManager->getCount()); totalSize = BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE - 1; // for initial alignment uint32_t maxBindingTableCount = 0; for (uint32_t i = 0; i < blockCount; i++) { const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); totalSize += pBlockInfo->heapInfo.pKernelHeader->SurfaceStateHeapSize; totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); maxBindingTableCount = std::max(maxBindingTableCount, pBlockInfo->patchInfo.bindingTableState ? pBlockInfo->patchInfo.bindingTableState->Count : 0); } totalSize += maxBindingTableCount * sizeof(BINDING_TABLE_STATE) * DeviceQueue::interfaceDescriptorEntries; auto &scheduler = pContext->getSchedulerKernel(); auto schedulerSshSize = scheduler.getSurfaceStateHeapSize(); totalSize += schedulerSshSize + ((schedulerSshSize != 0) ? BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE : 0); totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); EXPECT_EQ(totalSize, HardwareCommandsHelper::getSshSizeForExecutionModel(*pKernel)); } } static const char *binaryFile = "simple_block_kernel"; static const char *KernelNames[] = {"kernel_reflection", "simple_block_kernel"}; INSTANTIATE_TEST_CASE_P(ParentKernelCommandsFromBinaryTest, ParentKernelCommandsFromBinaryTest, ::testing::Combine( ::testing::Values(binaryFile), ::testing::ValuesIn(KernelNames))); HWTEST_F(HardwareCommandsTest, givenEnabledPassInlineDataWhenKernelAllowsInlineThenReturnTrue) { DebugManagerStateRestore restore; DebugManager.flags.EnablePassInlineData.set(1u); uint32_t crossThreadData[8]; const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->PassInlineData = 1; mockKernelWithInternal->mockKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData)); EXPECT_TRUE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel)); } HWTEST_F(HardwareCommandsTest, givenNoDebugSettingsWhenDefaultModeIsExcercisedThenWeFollowKernelSettingForInlineProgramming) { const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->PassInlineData = 1; EXPECT_TRUE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel)); } HWTEST_F(HardwareCommandsTest, givenDisabledPassInlineDataWhenKernelAllowsInlineThenReturnFalse) { DebugManagerStateRestore restore; DebugManager.flags.EnablePassInlineData.set(0u); const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->PassInlineData = 1; EXPECT_FALSE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel)); } HWTEST_F(HardwareCommandsTest, givenEnabledPassInlineDataWhenKernelDisallowsInlineThenReturnFalse) { DebugManagerStateRestore restore; DebugManager.flags.EnablePassInlineData.set(1u); uint32_t crossThreadData[8]; const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->PassInlineData = 0; mockKernelWithInternal->mockKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData)); EXPECT_FALSE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel)); } HWTEST_F(HardwareCommandsTest, whenLocalIdxInXDimPresentThenExpectLocalIdsInUseIsTrue) { const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 1; const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 0; const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 0; EXPECT_TRUE(HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel)); } HWTEST_F(HardwareCommandsTest, whenLocalIdxInYDimPresentThenExpectLocalIdsInUseIsTrue) { const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 0; const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 1; const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 0; EXPECT_TRUE(HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel)); } HWTEST_F(HardwareCommandsTest, whenLocalIdxInZDimPresentThenExpectLocalIdsInUseIsTrue) { const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 0; const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 0; const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 1; EXPECT_TRUE(HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel)); } HWTEST_F(HardwareCommandsTest, whenLocalIdxAreNotPresentThenExpectLocalIdsInUseIsFalse) { const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDXPresent = 0; const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 0; const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 0; EXPECT_FALSE(HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel)); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenCacheFlushAfterWalkerEnabledWhenProgramGlobalSurfacePresentThenExpectCacheFlushCommand) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH; using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); auto &commandStream = cmdQ.getCS(1024); MockGraphicsAllocation globalAllocation; mockKernelWithInternal->mockProgram->setGlobalSurface(&globalAllocation); Kernel::CacheFlushAllocationsVec allocs; mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocs); EXPECT_NE(allocs.end(), std::find(allocs.begin(), allocs.end(), &globalAllocation)); size_t expectedSize = sizeof(PIPE_CONTROL); size_t actualSize = HardwareCommandsHelper::getSizeRequiredForCacheFlush(cmdQ, mockKernelWithInternal->mockKernel, 0U); EXPECT_EQ(expectedSize, actualSize); HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, mockKernelWithInternal->mockKernel, 0U); HardwareParse hwParse; hwParse.parseCommands(commandStream); PIPE_CONTROL *pipeControl = hwParse.getCommand(); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); EXPECT_TRUE(pipeControl->getDcFlushEnable()); mockKernelWithInternal->mockProgram->setGlobalSurface(nullptr); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenCacheFlushAfterWalkerEnabledWhenSvmAllocationsSetAsCacheFlushRequiringThenExpectCacheFlushCommand) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH; using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); auto &commandStream = cmdQ.getCS(1024); char buff[MemoryConstants::pageSize * 2]; MockGraphicsAllocation svmAllocation1{alignUp(buff, MemoryConstants::pageSize), MemoryConstants::pageSize}; mockKernelWithInternal->mockKernel->kernelSvmGfxAllocations.push_back(&svmAllocation1); MockGraphicsAllocation svmAllocation2{alignUp(buff, MemoryConstants::pageSize), MemoryConstants::pageSize}; svmAllocation2.setFlushL3Required(false); mockKernelWithInternal->mockKernel->kernelSvmGfxAllocations.push_back(&svmAllocation2); mockKernelWithInternal->mockKernel->svmAllocationsRequireCacheFlush = true; Kernel::CacheFlushAllocationsVec allocs; mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocs); EXPECT_NE(allocs.end(), std::find(allocs.begin(), allocs.end(), &svmAllocation1)); EXPECT_EQ(allocs.end(), std::find(allocs.begin(), allocs.end(), &svmAllocation2)); size_t expectedSize = sizeof(PIPE_CONTROL); size_t actualSize = HardwareCommandsHelper::getSizeRequiredForCacheFlush(cmdQ, mockKernelWithInternal->mockKernel, 0U); EXPECT_EQ(expectedSize, actualSize); HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, mockKernelWithInternal->mockKernel, 0U); HardwareParse hwParse; hwParse.parseCommands(commandStream); PIPE_CONTROL *pipeControl = hwParse.getCommand(); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); EXPECT_TRUE(pipeControl->getDcFlushEnable()); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenCacheFlushAfterWalkerEnabledWhenKernelArgIsSetAsCacheFlushRequiredThenExpectCacheFlushCommand) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH; using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); auto &commandStream = cmdQ.getCS(1024); addSpaceForSingleKernelArg(); MockGraphicsAllocation cacheRequiringAllocation; mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush.resize(2); mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; Kernel::CacheFlushAllocationsVec allocs; mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocs); EXPECT_NE(allocs.end(), std::find(allocs.begin(), allocs.end(), &cacheRequiringAllocation)); size_t expectedSize = sizeof(PIPE_CONTROL); size_t actualSize = HardwareCommandsHelper::getSizeRequiredForCacheFlush(cmdQ, mockKernelWithInternal->mockKernel, 0U); EXPECT_EQ(expectedSize, actualSize); HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(&commandStream, cmdQ, mockKernelWithInternal->mockKernel, 0U); HardwareParse hwParse; hwParse.parseCommands(commandStream); PIPE_CONTROL *pipeControl = hwParse.getCommand(); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); EXPECT_TRUE(pipeControl->getDcFlushEnable()); } HWTEST_F(HardwareCommandsTest, givenCacheFlushAfterWalkerDisabledWhenGettingRequiredCacheFlushSizeThenReturnZero) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(0); CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); size_t expectedSize = 0U; size_t actualSize = HardwareCommandsHelper::getSizeRequiredForCacheFlush(cmdQ, mockKernelWithInternal->mockKernel, 0U); EXPECT_EQ(expectedSize, actualSize); } TEST_F(HardwareCommandsTest, givenCacheFlushAfterWalkerEnabledWhenPlatformNotSupportFlushThenExpectNoCacheAllocationForFlush) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(-1); hardwareInfo.capabilityTable.supportCacheFlushAfterWalker = false; StackVec allocationsForCacheFlush; mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocationsForCacheFlush); EXPECT_EQ(0U, allocationsForCacheFlush.size()); } using KernelCacheFlushTests = Test>; HWTEST_F(KernelCacheFlushTests, givenLocallyUncachedBufferWhenGettingAllocationsForFlushThenEmptyVectorIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(-1); auto kernel = clUniquePtr(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal)); cl_mem_properties_intel bufferPropertiesUncachedResource[] = {CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_RESOURCE, 0}; auto bufferLocallyUncached = clCreateBufferWithPropertiesINTEL(context, bufferPropertiesUncachedResource, 1, nullptr, nullptr); kernel->setArg(0, sizeof(bufferLocallyUncached), &bufferLocallyUncached); using CacheFlushAllocationsVec = StackVec; CacheFlushAllocationsVec cacheFlushVec; kernel->getAllocationsForCacheFlush(cacheFlushVec); EXPECT_EQ(0u, cacheFlushVec.size()); auto bufferRegular = clCreateBufferWithPropertiesINTEL(context, nullptr, 1, nullptr, nullptr); kernel->setArg(1, sizeof(bufferRegular), &bufferRegular); kernel->getAllocationsForCacheFlush(cacheFlushVec); size_t expectedCacheFlushVecSize = (hardwareInfo.capabilityTable.supportCacheFlushAfterWalker ? 1u : 0u); EXPECT_EQ(expectedCacheFlushVecSize, cacheFlushVec.size()); clReleaseMemObject(bufferLocallyUncached); clReleaseMemObject(bufferRegular); } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/hardware_commands_helper_tests.h000066400000000000000000000036711363734646600317130ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/fixtures/built_in_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" #include using namespace NEO; struct HardwareCommandsTest : DeviceFixture, ContextFixture, BuiltInFixture, ::testing::Test { using BuiltInFixture::SetUp; using ContextFixture::SetUp; void SetUp() override; void TearDown() override; void addSpaceForSingleKernelArg(); size_t sizeRequiredCS; size_t sizeRequiredISH; std::unique_ptr mockKernelWithInternal; Kernel::SimpleKernelArgInfo kernelArgInfo = {}; std::vector kernelArguments; template size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const Kernel &srcKernel) { return HardwareCommandsHelper::pushBindingTableAndSurfaceStates(dstHeap, (srcKernel.getKernelInfo().patchInfo.bindingTableState != nullptr) ? srcKernel.getKernelInfo().patchInfo.bindingTableState->Count : 0, srcKernel.getSurfaceStateHeap(), srcKernel.getSurfaceStateHeapSize(), srcKernel.getNumberOfBindingTableStates(), srcKernel.getBindingTableOffset()); } }; compute-runtime-20.13.16352/opencl/test/unit_test/helpers/hw_helper_default_tests.cpp000066400000000000000000000020451363734646600307040ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_info.h" #include "opencl/test/unit_test/helpers/hw_helper_tests.h" void testDefaultImplementationOfSetupHardwareCapabilities(HwHelper &hwHelper, const HardwareInfo &hwInfo) { HardwareCapabilities hwCaps = {0}; hwHelper.setupHardwareCapabilities(&hwCaps, hwInfo); EXPECT_EQ(16384u, hwCaps.image3DMaxHeight); EXPECT_EQ(16384u, hwCaps.image3DMaxWidth); EXPECT_TRUE(hwCaps.isStatelesToStatefullWithOffsetSupported); } HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, givenHwHelperWhenAskedForHvAlign4RequiredThenReturnTrue) { auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily); EXPECT_TRUE(hwHelper.hvAlign4Required()); } HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, givenHwHelperWhenAskedForLowPriorityEngineTypeThenReturnRcs) { auto hwHelperEngineType = HwHelperHw::lowPriorityEngineType; EXPECT_EQ(aub_stream::EngineType::ENGINE_RCS, hwHelperEngineType); } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/hw_helper_tests.cpp000066400000000000000000001174741363734646600272150ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/helpers/hw_helper_tests.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include #include #include #include using namespace NEO; TEST(HwHelperSimpleTest, givenDebugVariableWhenAskingForRenderCompressionThenReturnCorrectValue) { DebugManagerStateRestore restore; HardwareInfo localHwInfo = *defaultHwInfo; // debug variable not set localHwInfo.capabilityTable.ftrRenderCompressedBuffers = false; localHwInfo.capabilityTable.ftrRenderCompressedImages = false; EXPECT_FALSE(HwHelper::renderCompressedBuffersSupported(localHwInfo)); EXPECT_FALSE(HwHelper::renderCompressedImagesSupported(localHwInfo)); localHwInfo.capabilityTable.ftrRenderCompressedBuffers = true; localHwInfo.capabilityTable.ftrRenderCompressedImages = true; EXPECT_TRUE(HwHelper::renderCompressedBuffersSupported(localHwInfo)); EXPECT_TRUE(HwHelper::renderCompressedImagesSupported(localHwInfo)); // debug variable set DebugManager.flags.RenderCompressedBuffersEnabled.set(1); DebugManager.flags.RenderCompressedImagesEnabled.set(1); localHwInfo.capabilityTable.ftrRenderCompressedBuffers = false; localHwInfo.capabilityTable.ftrRenderCompressedImages = false; EXPECT_TRUE(HwHelper::renderCompressedBuffersSupported(localHwInfo)); EXPECT_TRUE(HwHelper::renderCompressedImagesSupported(localHwInfo)); DebugManager.flags.RenderCompressedBuffersEnabled.set(0); DebugManager.flags.RenderCompressedImagesEnabled.set(0); localHwInfo.capabilityTable.ftrRenderCompressedBuffers = true; localHwInfo.capabilityTable.ftrRenderCompressedImages = true; EXPECT_FALSE(HwHelper::renderCompressedBuffersSupported(localHwInfo)); EXPECT_FALSE(HwHelper::renderCompressedImagesSupported(localHwInfo)); } TEST_F(HwHelperTest, getReturnsValidHwHelperHw) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_NE(nullptr, &helper); } HWTEST_F(HwHelperTest, getBindingTableStateSurfaceStatePointerReturnsCorrectPointer) { using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; BINDING_TABLE_STATE bindingTableState[4]; bindingTableState[2].getRawData(0) = 0x00123456; auto &helper = HwHelper::get(renderCoreFamily); auto pointer = helper.getBindingTableStateSurfaceStatePointer(bindingTableState, 2); EXPECT_EQ(0x00123456u, pointer); } HWTEST_F(HwHelperTest, getBindingTableStateSizeReturnsCorrectSize) { using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; auto &helper = HwHelper::get(renderCoreFamily); auto pointer = helper.getBindingTableStateSize(); EXPECT_EQ(sizeof(BINDING_TABLE_STATE), pointer); } TEST_F(HwHelperTest, getBindingTableStateAlignementReturnsCorrectSize) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_NE(0u, helper.getBindingTableStateAlignement()); } HWTEST_F(HwHelperTest, getInterfaceDescriptorDataSizeReturnsCorrectSize) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; auto &helper = HwHelper::get(renderCoreFamily); EXPECT_EQ(sizeof(INTERFACE_DESCRIPTOR_DATA), helper.getInterfaceDescriptorDataSize()); } TEST_F(HwHelperTest, givenDebuggingInactiveWhenSipKernelTypeIsQueriedThenCsrTypeIsReturned) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_NE(nullptr, &helper); auto sipType = helper.getSipKernelType(false); EXPECT_EQ(SipKernelType::Csr, sipType); } TEST_F(HwHelperTest, givenEngineTypeRcsWhenCsTraitsAreQueiredThenCorrectNameInTraitsIsReturned) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_NE(nullptr, &helper); auto &csTraits = helper.getCsTraits(aub_stream::ENGINE_RCS); EXPECT_STREQ("RCS", csTraits.name); } HWTEST_F(HwHelperTest, givenHwHelperWhenAskedForPageTableManagerSupportThenReturnCorrectValue) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_EQ(helper.isPageTableManagerSupported(hardwareInfo), UnitTestHelper::isPageTableManagerSupported(hardwareInfo)); } TEST(DwordBuilderTest, setNonMaskedBits) { uint32_t dword = 0; // expect non-masked bit 2 uint32_t expectedDword = (1 << 2); dword = DwordBuilder::build(2, false, true, 0); // set 2nd bit EXPECT_EQ(expectedDword, dword); // expect non-masked bits 2 and 3 expectedDword |= (1 << 3); dword = DwordBuilder::build(3, false, true, dword); // set 3rd bit with init value EXPECT_EQ(expectedDword, dword); } TEST(DwordBuilderTest, setMaskedBits) { uint32_t dword = 0; // expect masked bit 2 uint32_t expectedDword = (1 << 2); expectedDword |= (1 << (2 + 16)); dword = DwordBuilder::build(2, true, true, 0); // set 2nd bit (masked) EXPECT_EQ(expectedDword, dword); // expect masked bits 2 and 3 expectedDword |= (1 << 3); expectedDword |= (1 << (3 + 16)); dword = DwordBuilder::build(3, true, true, dword); // set 3rd bit (masked) with init value EXPECT_EQ(expectedDword, dword); } TEST(DwordBuilderTest, setMaskedBitsWithDifferentBitValue) { // expect only mask bit uint32_t expectedDword = 1 << (2 + 16); auto dword = DwordBuilder::build(2, true, false, 0); EXPECT_EQ(expectedDword, dword); // expect masked bits 3 expectedDword = (1 << 3); expectedDword |= (1 << (3 + 16)); dword = DwordBuilder::build(3, true, true, 0); EXPECT_EQ(expectedDword, dword); } using LriHelperTests = ::testing::Test; HWTEST_F(LriHelperTests, givenAddressAndOffsetWhenHelperIsUsedThenProgramCmdStream) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; std::unique_ptr buffer(new uint8_t[128]); LinearStream stream(buffer.get(), 128); uint32_t address = 0x8888; uint32_t data = 0x1234; auto expectedLri = FamilyType::cmdInitLoadRegisterImm; expectedLri.setRegisterOffset(address); expectedLri.setDataDword(data); auto lri = LriHelper::program(&stream, address, data); EXPECT_EQ(sizeof(MI_LOAD_REGISTER_IMM), stream.getUsed()); EXPECT_EQ(lri, stream.getCpuBase()); EXPECT_TRUE(memcmp(lri, &expectedLri, sizeof(MI_LOAD_REGISTER_IMM)) == 0); } using PipeControlHelperTests = ::testing::Test; HWTEST_F(PipeControlHelperTests, givenPostSyncWriteTimestampModeWhenHelperIsUsedThenProperFieldsAreProgrammed) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; std::unique_ptr buffer(new uint8_t[128]); LinearStream stream(buffer.get(), 128); uint64_t address = 0x1234567887654321; uint64_t immediateData = 0x1234; auto expectedPipeControl = FamilyType::cmdInitPipeControl; expectedPipeControl.setCommandStreamerStallEnable(true); expectedPipeControl.setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP); expectedPipeControl.setAddress(static_cast(address & 0x0000FFFFFFFFULL)); expectedPipeControl.setAddressHigh(static_cast(address >> 32)); HardwareInfo hardwareInfo = *defaultHwInfo; auto pipeControl = MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, address, immediateData, false, hardwareInfo); auto additionalPcSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hardwareInfo) - sizeof(PIPE_CONTROL); auto pipeControlLocationSize = additionalPcSize - MemorySynchronizationCommands::getSizeForSingleSynchronization(hardwareInfo); EXPECT_EQ(sizeof(PIPE_CONTROL) + additionalPcSize, stream.getUsed()); EXPECT_EQ(pipeControl, ptrOffset(stream.getCpuBase(), pipeControlLocationSize)); EXPECT_TRUE(memcmp(pipeControl, &expectedPipeControl, sizeof(PIPE_CONTROL)) == 0); } HWTEST_F(PipeControlHelperTests, givenPostSyncWriteImmediateDataModeWhenHelperIsUsedThenProperFieldsAreProgrammed) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; std::unique_ptr buffer(new uint8_t[128]); LinearStream stream(buffer.get(), 128); uint64_t address = 0x1234567887654321; uint64_t immediateData = 0x1234; auto expectedPipeControl = FamilyType::cmdInitPipeControl; expectedPipeControl.setCommandStreamerStallEnable(true); expectedPipeControl.setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA); expectedPipeControl.setAddress(static_cast(address & 0x0000FFFFFFFFULL)); expectedPipeControl.setAddressHigh(static_cast(address >> 32)); expectedPipeControl.setImmediateData(immediateData); HardwareInfo hardwareInfo = *defaultHwInfo; auto pipeControl = MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, address, immediateData, false, hardwareInfo); auto additionalPcSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hardwareInfo) - sizeof(PIPE_CONTROL); auto pipeControlLocationSize = additionalPcSize - MemorySynchronizationCommands::getSizeForSingleSynchronization(hardwareInfo); EXPECT_EQ(sizeof(PIPE_CONTROL) + additionalPcSize, stream.getUsed()); EXPECT_EQ(pipeControl, ptrOffset(stream.getCpuBase(), pipeControlLocationSize)); EXPECT_TRUE(memcmp(pipeControl, &expectedPipeControl, sizeof(PIPE_CONTROL)) == 0); } TEST(HwInfoTest, givenHwInfoWhenChosenEngineTypeQueriedThenDefaultIsReturned) { HardwareInfo hwInfo; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_RCS; auto engineType = getChosenEngineType(hwInfo); EXPECT_EQ(aub_stream::ENGINE_RCS, engineType); } TEST(HwInfoTest, givenNodeOrdinalSetWhenChosenEngineTypeQueriedThenSetValueIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.NodeOrdinal.set(aub_stream::ENGINE_VECS); HardwareInfo hwInfo; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_RCS; auto engineType = getChosenEngineType(hwInfo); EXPECT_EQ(aub_stream::ENGINE_VECS, engineType); } HWTEST_F(HwHelperTest, givenCreatedSurfaceStateBufferWhenNoAllocationProvidedThenUseArgumentsasInput) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment(); auto gmmHelper = pDevice->getGmmHelper(); void *stateBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE)); ASSERT_NE(nullptr, stateBuffer); memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE)); auto &helper = HwHelper::get(renderCoreFamily); EXPECT_EQ(sizeof(RENDER_SURFACE_STATE), helper.getRenderSurfaceStateSize()); size_t size = 0x1000; SURFACE_STATE_BUFFER_LENGTH length; length.Length = static_cast(size - 1); uint64_t addr = 0x2000; size_t offset = 0x1000; uint32_t pitch = 0x40; SURFACE_TYPE type = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER; helper.setRenderSurfaceStateForBuffer(rootDeviceEnvironment, stateBuffer, size, addr, offset, pitch, nullptr, false, type, true); RENDER_SURFACE_STATE *state = reinterpret_cast(stateBuffer); EXPECT_EQ(length.SurfaceState.Depth + 1u, state->getDepth()); EXPECT_EQ(length.SurfaceState.Width + 1u, state->getWidth()); EXPECT_EQ(length.SurfaceState.Height + 1u, state->getHeight()); EXPECT_EQ(pitch, state->getSurfacePitch()); addr += offset; EXPECT_EQ(addr, state->getSurfaceBaseAddress()); EXPECT_EQ(type, state->getSurfaceType()); EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER), state->getMemoryObjectControlState()); memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE)); size = 0x1003; length.Length = static_cast(alignUp(size, 4) - 1); bool isReadOnly = false; helper.setRenderSurfaceStateForBuffer(rootDeviceEnvironment, stateBuffer, size, addr, 0, pitch, nullptr, isReadOnly, type, true); EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED), state->getMemoryObjectControlState()); EXPECT_EQ(length.SurfaceState.Depth + 1u, state->getDepth()); EXPECT_EQ(length.SurfaceState.Width + 1u, state->getWidth()); EXPECT_EQ(length.SurfaceState.Height + 1u, state->getHeight()); memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE)); size = 0x1000; addr = 0x2001; length.Length = static_cast(size - 1); helper.setRenderSurfaceStateForBuffer(rootDeviceEnvironment, stateBuffer, size, addr, 0, pitch, nullptr, isReadOnly, type, true); EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED), state->getMemoryObjectControlState()); EXPECT_EQ(length.SurfaceState.Depth + 1u, state->getDepth()); EXPECT_EQ(length.SurfaceState.Width + 1u, state->getWidth()); EXPECT_EQ(length.SurfaceState.Height + 1u, state->getHeight()); EXPECT_EQ(addr, state->getSurfaceBaseAddress()); memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE)); size = 0x1005; length.Length = static_cast(alignUp(size, 4) - 1); isReadOnly = true; helper.setRenderSurfaceStateForBuffer(rootDeviceEnvironment, stateBuffer, size, addr, 0, pitch, nullptr, isReadOnly, type, true); EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER), state->getMemoryObjectControlState()); EXPECT_EQ(length.SurfaceState.Depth + 1u, state->getDepth()); EXPECT_EQ(length.SurfaceState.Width + 1u, state->getWidth()); EXPECT_EQ(length.SurfaceState.Height + 1u, state->getHeight()); EXPECT_EQ(addr, state->getSurfaceBaseAddress()); alignedFree(stateBuffer); } HWTEST_F(HwHelperTest, givenCreatedSurfaceStateBufferWhenAllocationProvidedThenUseAllocationAsInput) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment(); void *stateBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE)); ASSERT_NE(nullptr, stateBuffer); RENDER_SURFACE_STATE *state = reinterpret_cast(stateBuffer); memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE)); auto &helper = HwHelper::get(renderCoreFamily); size_t size = 0x1000; SURFACE_STATE_BUFFER_LENGTH length; uint64_t addr = 0x2000; uint32_t pitch = 0; void *cpuAddr = reinterpret_cast(0x4000); uint64_t gpuAddr = 0x4000u; size_t allocSize = size; length.Length = static_cast(allocSize - 1); GraphicsAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, cpuAddr, gpuAddr, 0u, allocSize, MemoryPool::MemoryNull); allocation.setDefaultGmm(new Gmm(pDevice->getGmmClientContext(), allocation.getUnderlyingBuffer(), allocation.getUnderlyingBufferSize(), false)); SURFACE_TYPE type = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER; helper.setRenderSurfaceStateForBuffer(rootDeviceEnvironment, stateBuffer, size, addr, 0, pitch, &allocation, false, type, true); EXPECT_EQ(length.SurfaceState.Depth + 1u, state->getDepth()); EXPECT_EQ(length.SurfaceState.Width + 1u, state->getWidth()); EXPECT_EQ(length.SurfaceState.Height + 1u, state->getHeight()); EXPECT_EQ(pitch, state->getSurfacePitch() - 1u); EXPECT_EQ(gpuAddr, state->getSurfaceBaseAddress()); EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT, state->getCoherencyType()); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, state->getAuxiliarySurfaceMode()); delete allocation.getDefaultGmm(); alignedFree(stateBuffer); } HWTEST_F(HwHelperTest, givenCreatedSurfaceStateBufferWhenGmmAndAllocationCompressionEnabledAnNonAuxDisabledThenSetCoherencyToGpuAndAuxModeToCompression) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment(); void *stateBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE)); ASSERT_NE(nullptr, stateBuffer); RENDER_SURFACE_STATE *state = reinterpret_cast(stateBuffer); memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE)); auto &helper = HwHelper::get(renderCoreFamily); size_t size = 0x1000; uint64_t addr = 0x2000; uint32_t pitch = 0; void *cpuAddr = reinterpret_cast(0x4000); uint64_t gpuAddr = 0x4000u; size_t allocSize = size; GraphicsAllocation allocation(0, GraphicsAllocation::AllocationType::BUFFER_COMPRESSED, cpuAddr, gpuAddr, 0u, allocSize, MemoryPool::MemoryNull); allocation.setDefaultGmm(new Gmm(rootDeviceEnvironment.getGmmClientContext(), allocation.getUnderlyingBuffer(), allocation.getUnderlyingBufferSize(), false)); allocation.getDefaultGmm()->isRenderCompressed = true; SURFACE_TYPE type = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER; helper.setRenderSurfaceStateForBuffer(rootDeviceEnvironment, stateBuffer, size, addr, 0, pitch, &allocation, false, type, false); EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT, state->getCoherencyType()); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E, state->getAuxiliarySurfaceMode()); delete allocation.getDefaultGmm(); alignedFree(stateBuffer); } HWTEST_F(HwHelperTest, givenCreatedSurfaceStateBufferWhenGmmCompressionEnabledAndAllocationDisabledAnNonAuxDisabledThenSetCoherencyToIaAndAuxModeToNone) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment(); void *stateBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE)); ASSERT_NE(nullptr, stateBuffer); RENDER_SURFACE_STATE *state = reinterpret_cast(stateBuffer); memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE)); auto &helper = HwHelper::get(renderCoreFamily); size_t size = 0x1000; uint64_t addr = 0x2000; uint32_t pitch = 0; void *cpuAddr = reinterpret_cast(0x4000); uint64_t gpuAddr = 0x4000u; size_t allocSize = size; GraphicsAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, cpuAddr, gpuAddr, 0u, allocSize, MemoryPool::MemoryNull); allocation.setDefaultGmm(new Gmm(rootDeviceEnvironment.getGmmClientContext(), allocation.getUnderlyingBuffer(), allocation.getUnderlyingBufferSize(), false)); allocation.getDefaultGmm()->isRenderCompressed = true; SURFACE_TYPE type = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER; helper.setRenderSurfaceStateForBuffer(rootDeviceEnvironment, stateBuffer, size, addr, 0, pitch, &allocation, false, type, false); EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT, state->getCoherencyType()); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, state->getAuxiliarySurfaceMode()); delete allocation.getDefaultGmm(); alignedFree(stateBuffer); } HWTEST_F(HwHelperTest, givenCreatedSurfaceStateBufferWhenGmmCompressionDisabledAndAllocationEnabledAnNonAuxDisabledThenSetCoherencyToIaAndAuxModeToNone) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment(); void *stateBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE)); ASSERT_NE(nullptr, stateBuffer); RENDER_SURFACE_STATE *state = reinterpret_cast(stateBuffer); memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE)); auto &helper = HwHelper::get(renderCoreFamily); size_t size = 0x1000; uint64_t addr = 0x2000; uint32_t pitch = 0; void *cpuAddr = reinterpret_cast(0x4000); uint64_t gpuAddr = 0x4000u; size_t allocSize = size; GraphicsAllocation allocation(0, GraphicsAllocation::AllocationType::BUFFER_COMPRESSED, cpuAddr, gpuAddr, 0u, allocSize, MemoryPool::MemoryNull); allocation.setDefaultGmm(new Gmm(rootDeviceEnvironment.getGmmClientContext(), allocation.getUnderlyingBuffer(), allocation.getUnderlyingBufferSize(), false)); SURFACE_TYPE type = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER; helper.setRenderSurfaceStateForBuffer(rootDeviceEnvironment, stateBuffer, size, addr, 0, pitch, &allocation, false, type, false); EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT, state->getCoherencyType()); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, state->getAuxiliarySurfaceMode()); delete allocation.getDefaultGmm(); alignedFree(stateBuffer); } HWTEST_F(HwHelperTest, givenCreatedSurfaceStateBufferWhenGmmAndAllocationCompressionEnabledAnNonAuxEnabledThenSetCoherencyToIaAndAuxModeToNone) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment(); void *stateBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE)); ASSERT_NE(nullptr, stateBuffer); RENDER_SURFACE_STATE *state = reinterpret_cast(stateBuffer); memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE)); auto &helper = HwHelper::get(renderCoreFamily); size_t size = 0x1000; uint64_t addr = 0x2000; uint32_t pitch = 0; void *cpuAddr = reinterpret_cast(0x4000); uint64_t gpuAddr = 0x4000u; size_t allocSize = size; GraphicsAllocation allocation(0, GraphicsAllocation::AllocationType::BUFFER_COMPRESSED, cpuAddr, gpuAddr, 0u, allocSize, MemoryPool::MemoryNull); allocation.setDefaultGmm(new Gmm(rootDeviceEnvironment.getGmmClientContext(), allocation.getUnderlyingBuffer(), allocation.getUnderlyingBufferSize(), false)); allocation.getDefaultGmm()->isRenderCompressed = true; SURFACE_TYPE type = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER; helper.setRenderSurfaceStateForBuffer(rootDeviceEnvironment, stateBuffer, size, addr, 0, pitch, &allocation, false, type, true); EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT, state->getCoherencyType()); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, state->getAuxiliarySurfaceMode()); delete allocation.getDefaultGmm(); alignedFree(stateBuffer); } HWTEST_F(HwHelperTest, DISABLED_profilingCreationOfRenderSurfaceStateVsMemcpyOfCachelineAlignedBuffer) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; constexpr uint32_t maxLoop = 1000u; std::vector> timesCreate; timesCreate.reserve(maxLoop * 2); std::vector> timesMemCpy; timesMemCpy.reserve(maxLoop * 2); std::vector nanoDurationCreate; nanoDurationCreate.reserve(maxLoop); std::vector nanoDurationCpy; nanoDurationCpy.reserve(maxLoop); std::vector surfaceStates; surfaceStates.reserve(maxLoop); std::vector copyBuffers; copyBuffers.reserve(maxLoop); for (uint32_t i = 0; i < maxLoop; ++i) { void *stateBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE)); ASSERT_NE(nullptr, stateBuffer); memset(stateBuffer, 0, sizeof(RENDER_SURFACE_STATE)); surfaceStates.push_back(stateBuffer); void *copyBuffer = alignedMalloc(sizeof(RENDER_SURFACE_STATE), sizeof(RENDER_SURFACE_STATE)); ASSERT_NE(nullptr, copyBuffer); copyBuffers.push_back(copyBuffer); } auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment(); auto &helper = HwHelper::get(renderCoreFamily); size_t size = 0x1000; uint64_t addr = 0x2000; uint32_t pitch = 0; SURFACE_TYPE type = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER; for (uint32_t i = 0; i < maxLoop; ++i) { auto t1 = std::chrono::high_resolution_clock::now(); helper.setRenderSurfaceStateForBuffer(rootDeviceEnvironment, surfaceStates[i], size, addr, 0, pitch, nullptr, false, type, true); auto t2 = std::chrono::high_resolution_clock::now(); timesCreate.push_back(t1); timesCreate.push_back(t2); } for (uint32_t i = 0; i < maxLoop; ++i) { auto t1 = std::chrono::high_resolution_clock::now(); memcpy_s(copyBuffers[i], sizeof(RENDER_SURFACE_STATE), surfaceStates[i], sizeof(RENDER_SURFACE_STATE)); auto t2 = std::chrono::high_resolution_clock::now(); timesMemCpy.push_back(t1); timesMemCpy.push_back(t2); } for (uint32_t i = 0; i < maxLoop; ++i) { std::chrono::duration delta = timesCreate[i * 2 + 1] - timesCreate[i * 2]; std::chrono::nanoseconds duration = std::chrono::duration_cast(delta); nanoDurationCreate.push_back(duration.count()); delta = timesMemCpy[i * 2 + 1] - timesMemCpy[i * 2]; duration = std::chrono::duration_cast(delta); nanoDurationCpy.push_back(duration.count()); } sort(nanoDurationCreate.begin(), nanoDurationCreate.end()); sort(nanoDurationCpy.begin(), nanoDurationCpy.end()); double averageCreate = std::accumulate(nanoDurationCreate.begin(), nanoDurationCreate.end(), 0.0) / nanoDurationCreate.size(); double averageCpy = std::accumulate(nanoDurationCpy.begin(), nanoDurationCpy.end(), 0.0) / nanoDurationCpy.size(); size_t middleCreate = nanoDurationCreate.size() / 2; size_t middleCpy = nanoDurationCpy.size() / 2; std::cout << "Creation average: " << averageCreate << " median: " << nanoDurationCreate[middleCreate]; std::cout << " min: " << nanoDurationCreate[0] << " max: " << nanoDurationCreate[nanoDurationCreate.size() - 1] << std::endl; std::cout << "Copy average: " << averageCpy << " median: " << nanoDurationCpy[middleCpy]; std::cout << " min: " << nanoDurationCpy[0] << " max: " << nanoDurationCpy[nanoDurationCpy.size() - 1] << std::endl; for (uint32_t i = 0; i < maxLoop; i++) { std::cout << "#" << (i + 1) << " Create: " << nanoDurationCreate[i] << " Copy: " << nanoDurationCpy[i] << std::endl; } for (uint32_t i = 0; i < maxLoop; ++i) { alignedFree(surfaceStates[i]); alignedFree(copyBuffers[i]); } } HWTEST_F(HwHelperTest, testIfL3ConfigProgrammable) { bool PreambleHelperL3Config; bool isL3Programmable; const HardwareInfo &hwInfo = *defaultHwInfo; PreambleHelperL3Config = PreambleHelper::isL3Configurable(*defaultHwInfo); isL3Programmable = HwHelperHw::get().isL3Configurable(hwInfo); EXPECT_EQ(PreambleHelperL3Config, isL3Programmable); } TEST(HwHelperCacheFlushTest, givenEnableCacheFlushFlagIsEnableWhenPlatformDoesNotSupportThenOverrideAndReturnSupportTrue) { DebugManagerStateRestore restore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); HardwareInfo localHwInfo = *defaultHwInfo; localHwInfo.capabilityTable.supportCacheFlushAfterWalker = false; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&localHwInfo)); EXPECT_TRUE(HwHelper::cacheFlushAfterWalkerSupported(device->getHardwareInfo())); } TEST(HwHelperCacheFlushTest, givenEnableCacheFlushFlagIsDisableWhenPlatformSupportsThenOverrideAndReturnSupportFalse) { DebugManagerStateRestore restore; DebugManager.flags.EnableCacheFlushAfterWalker.set(0); HardwareInfo localHwInfo = *defaultHwInfo; localHwInfo.capabilityTable.supportCacheFlushAfterWalker = true; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&localHwInfo)); EXPECT_FALSE(HwHelper::cacheFlushAfterWalkerSupported(device->getHardwareInfo())); } TEST(HwHelperCacheFlushTest, givenEnableCacheFlushFlagIsReadPlatformSettingWhenPlatformDoesNotSupportThenReturnSupportFalse) { DebugManagerStateRestore restore; DebugManager.flags.EnableCacheFlushAfterWalker.set(-1); HardwareInfo localHwInfo = *defaultHwInfo; localHwInfo.capabilityTable.supportCacheFlushAfterWalker = false; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&localHwInfo)); EXPECT_FALSE(HwHelper::cacheFlushAfterWalkerSupported(device->getHardwareInfo())); } TEST(HwHelperCacheFlushTest, givenEnableCacheFlushFlagIsReadPlatformSettingWhenPlatformSupportsThenReturnSupportTrue) { DebugManagerStateRestore restore; DebugManager.flags.EnableCacheFlushAfterWalker.set(-1); HardwareInfo localHwInfo = *defaultHwInfo; localHwInfo.capabilityTable.supportCacheFlushAfterWalker = true; auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&localHwInfo)); EXPECT_TRUE(HwHelper::cacheFlushAfterWalkerSupported(device->getHardwareInfo())); } TEST_F(HwHelperTest, givenEnableLocalMemoryDebugVarAndOsEnableLocalMemoryWhenSetThenGetEnableLocalMemoryReturnsCorrectValue) { DebugManagerStateRestore dbgRestore; VariableBackup orgOsEnableLocalMemory(&OSInterface::osEnableLocalMemory); auto &helper = HwHelper::get(renderCoreFamily); DebugManager.flags.EnableLocalMemory.set(0); EXPECT_FALSE(helper.getEnableLocalMemory(hardwareInfo)); DebugManager.flags.EnableLocalMemory.set(1); EXPECT_TRUE(helper.getEnableLocalMemory(hardwareInfo)); DebugManager.flags.EnableLocalMemory.set(-1); OSInterface::osEnableLocalMemory = false; EXPECT_FALSE(helper.getEnableLocalMemory(hardwareInfo)); OSInterface::osEnableLocalMemory = true; EXPECT_EQ(helper.isLocalMemoryEnabled(hardwareInfo), helper.getEnableLocalMemory(hardwareInfo)); } TEST_F(HwHelperTest, givenAUBDumpForceAllToLocalMemoryDebugVarWhenSetThenGetEnableLocalMemoryReturnsCorrectValue) { DebugManagerStateRestore dbgRestore; std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(&hardwareInfo)); auto &helper = HwHelper::get(renderCoreFamily); DebugManager.flags.AUBDumpForceAllToLocalMemory.set(true); EXPECT_TRUE(helper.getEnableLocalMemory(hardwareInfo)); } TEST_F(HwHelperTest, givenVariousCachesRequestProperMOCSIndexesAreBeingReturned) { auto &helper = HwHelper::get(renderCoreFamily); auto gmmHelper = this->pDevice->getGmmHelper(); auto expectedMocsForL3off = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1; auto expectedMocsForL3on = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1; auto expectedMocsForL3andL1on = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST) >> 1; auto mocsIndex = helper.getMocsIndex(*gmmHelper, false, true); EXPECT_EQ(expectedMocsForL3off, mocsIndex); mocsIndex = helper.getMocsIndex(*gmmHelper, true, false); EXPECT_EQ(expectedMocsForL3on, mocsIndex); mocsIndex = helper.getMocsIndex(*gmmHelper, true, true); if (mocsIndex != expectedMocsForL3andL1on) { EXPECT_EQ(expectedMocsForL3on, mocsIndex); } else { EXPECT_EQ(expectedMocsForL3andL1on, mocsIndex); } } HWTEST_F(HwHelperTest, givenMultiDispatchInfoWhenAskingForAuxTranslationThenCheckMemObjectsCountAndDebugFlag) { DebugManagerStateRestore restore; MockBuffer buffer; MemObjsForAuxTranslation memObjects; MultiDispatchInfo multiDispatchInfo; HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.blitterOperationsSupported = true; DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Blit)); EXPECT_FALSE(HwHelperHw::isBlitAuxTranslationRequired(hwInfo, multiDispatchInfo)); multiDispatchInfo.setMemObjsForAuxTranslation(memObjects); EXPECT_FALSE(HwHelperHw::isBlitAuxTranslationRequired(hwInfo, multiDispatchInfo)); memObjects.insert(&buffer); EXPECT_TRUE(HwHelperHw::isBlitAuxTranslationRequired(hwInfo, multiDispatchInfo)); hwInfo.capabilityTable.blitterOperationsSupported = false; EXPECT_FALSE(HwHelperHw::isBlitAuxTranslationRequired(hwInfo, multiDispatchInfo)); hwInfo.capabilityTable.blitterOperationsSupported = true; DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Builtin)); EXPECT_FALSE(HwHelperHw::isBlitAuxTranslationRequired(hwInfo, multiDispatchInfo)); } HWTEST_F(HwHelperTest, givenDebugVariableSetWhenAskingForAuxTranslationModeThenReturnCorrectValue) { DebugManagerStateRestore restore; EXPECT_EQ(UnitTestHelper::requiredAuxTranslationMode, HwHelperHw::getAuxTranslationMode()); if (HwHelperHw::getAuxTranslationMode() == AuxTranslationMode::Blit) { auto hwInfoConfig = HwInfoConfig::get(productFamily); HardwareInfo hwInfo = {}; hwInfoConfig->configureHardwareCustom(&hwInfo, nullptr); EXPECT_TRUE(hwInfo.capabilityTable.blitterOperationsSupported); } DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Blit)); EXPECT_EQ(AuxTranslationMode::Blit, HwHelperHw::getAuxTranslationMode()); DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Builtin)); EXPECT_EQ(AuxTranslationMode::Builtin, HwHelperHw::getAuxTranslationMode()); } HWTEST_F(HwHelperTest, givenHwHelperWhenAskingForTilingSupportThenReturnValidValue) { bool tilingSupported = UnitTestHelper::tiledImagesSupported; const uint32_t numImageTypes = 6; const cl_mem_object_type imgTypes[numImageTypes] = {CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D}; cl_image_desc imgDesc = {}; MockContext context; cl_int retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create(&context, 0, 1, nullptr, retVal)); auto &helper = HwHelper::get(renderCoreFamily); for (uint32_t i = 0; i < numImageTypes; i++) { imgDesc.image_type = imgTypes[i]; imgDesc.buffer = nullptr; bool allowedType = imgTypes[i] == (CL_MEM_OBJECT_IMAGE2D) || (imgTypes[i] == CL_MEM_OBJECT_IMAGE3D) || (imgTypes[i] == CL_MEM_OBJECT_IMAGE2D_ARRAY); // non shared context, dont force linear storage EXPECT_EQ((tilingSupported & allowedType), helper.tilingAllowed(false, Image::isImage1d(imgDesc), false)); { DebugManagerStateRestore restore; DebugManager.flags.ForceLinearImages.set(true); // non shared context, dont force linear storage + debug flag EXPECT_FALSE(helper.tilingAllowed(false, Image::isImage1d(imgDesc), false)); } // shared context, dont force linear storage EXPECT_FALSE(helper.tilingAllowed(true, Image::isImage1d(imgDesc), false)); // non shared context, force linear storage EXPECT_FALSE(helper.tilingAllowed(false, Image::isImage1d(imgDesc), true)); // non shared context, dont force linear storage + create from buffer imgDesc.buffer = buffer.get(); EXPECT_FALSE(helper.tilingAllowed(false, Image::isImage1d(imgDesc), false)); } } HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, GivenVariousValuesWhenCallingGetBarriersCountFromHasBarrierThenCorrectValueIsReturned) { auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); EXPECT_EQ(0u, hwHelper.getBarriersCountFromHasBarriers(0u)); EXPECT_EQ(1u, hwHelper.getBarriersCountFromHasBarriers(1u)); } HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, GivenVariousValuesWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned) { auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); auto result = hwHelper.calculateAvailableThreadCount( hardwareInfo.platform.eProductFamily, 0, hardwareInfo.gtSystemInfo.EUCount, hardwareInfo.gtSystemInfo.ThreadCount / hardwareInfo.gtSystemInfo.EUCount); EXPECT_EQ(hardwareInfo.gtSystemInfo.ThreadCount, result); } HWTEST_F(HwHelperTest, givenDefaultHwHelperHwWhenIsOffsetToSkipSetFFIDGPWARequiredCalledThenFalseIsReturned) { if (hardwareInfo.platform.eRenderCoreFamily == IGFX_GEN12LP_CORE) { GTEST_SKIP(); } auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); EXPECT_FALSE(hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)); } HWTEST_F(HwHelperTest, givenDefaultHwHelperHwWhenIsForceDefaultRCSEngineWARequiredCalledThenFalseIsReturned) { if (hardwareInfo.platform.eRenderCoreFamily == IGFX_GEN12LP_CORE) { GTEST_SKIP(); } EXPECT_FALSE(HwHelperHw::isForceDefaultRCSEngineWARequired(hardwareInfo)); } HWTEST_F(HwHelperTest, givenDefaultHwHelperHwWhenIsForceEmuInt32DivRemSPWARequiredCalledThenFalseIsReturned) { if (hardwareInfo.platform.eRenderCoreFamily == IGFX_GEN12LP_CORE) { GTEST_SKIP(); } auto &helper = HwHelper::get(renderCoreFamily); EXPECT_FALSE(helper.isForceEmuInt32DivRemSPWARequired(hardwareInfo)); } HWTEST_F(HwHelperTest, givenDefaultHwHelperHwWhenMinimalSIMDSizeIsQueriedThen8IsReturned) { auto &helper = HwHelper::get(renderCoreFamily); EXPECT_EQ(8u, helper.getMinimalSIMDSize()); } HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, WhenIsFusedEuDispatchEnabledIsCalledThenFalseIsReturned) { if (hardwareInfo.platform.eRenderCoreFamily == IGFX_GEN12LP_CORE) { GTEST_SKIP(); } auto &helper = HwHelper::get(renderCoreFamily); EXPECT_FALSE(helper.isFusedEuDispatchEnabled(hardwareInfo)); } HWTEST_F(PipeControlHelperTests, WhenGettingPipeControSizeForCacheFlushThenReturnCorrectValue) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; size_t actualSize = MemorySynchronizationCommands::getSizeForFullCacheFlush(); EXPECT_EQ(sizeof(PIPE_CONTROL), actualSize); } HWTEST_F(PipeControlHelperTests, WhenProgrammingCacheFlushThenExpectBasicFieldsSet) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; std::unique_ptr buffer(new uint8_t[128]); LinearStream stream(buffer.get(), 128); PIPE_CONTROL *pipeControl = MemorySynchronizationCommands::addFullCacheFlush(stream); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); EXPECT_TRUE(pipeControl->getDcFlushEnable()); EXPECT_TRUE(pipeControl->getRenderTargetCacheFlushEnable()); EXPECT_TRUE(pipeControl->getInstructionCacheInvalidateEnable()); EXPECT_TRUE(pipeControl->getTextureCacheInvalidationEnable()); EXPECT_TRUE(pipeControl->getPipeControlFlushEnable()); EXPECT_TRUE(pipeControl->getStateCacheInvalidationEnable()); } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/hw_helper_tests.h000066400000000000000000000006721363734646600266510ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/device/device.h" #include "shared/source/helpers/hw_helper.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "test.h" using namespace NEO; using HwHelperTest = Test; void testDefaultImplementationOfSetupHardwareCapabilities(HwHelper &hwHelper, const HardwareInfo &hwInfo); compute-runtime-20.13.16352/opencl/test/unit_test/helpers/hw_parse.h000066400000000000000000000211511363734646600252550ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/helpers/pipeline_select_helper.h" #include "shared/source/helpers/ptr_math.h" #include "shared/test/unit_test/helpers/default_hw_info.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/gen_common/gen_cmd_parse.h" #include "gtest/gtest.h" namespace NEO { struct HardwareParse { HardwareParse() { itorMediaInterfaceDescriptorLoad = cmdList.end(); itorMediaVfeState = cmdList.end(); itorPipelineSelect = cmdList.end(); itorStateBaseAddress = cmdList.end(); itorWalker = cmdList.end(); itorGpgpuCsrBaseAddress = cmdList.end(); } void SetUp() { } void TearDown() { cmdList.clear(); lriList.clear(); pipeControlList.clear(); } template GenCmdList getCommandsList() { GenCmdList list; for (auto it = cmdList.begin(); it != cmdList.end(); it++) { auto cmd = genCmdCast(*it); if (cmd) { list.push_back(*it); } } return list; } template void findCsrBaseAddress(); template void findHardwareCommands(); template void findHardwareCommands(IndirectHeap *dsh); template void parseCommands(NEO::LinearStream &commandStream, size_t startOffset = 0) { ASSERT_LE(startOffset, commandStream.getUsed()); auto sizeToParse = commandStream.getUsed() - startOffset; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandStream.getCpuBase(), startOffset), sizeToParse)); } template void parseCommands(NEO::CommandQueue &commandQueue) { auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver(); auto &commandStreamCSR = commandStreamReceiver.getCS(); parseCommands(commandStreamCSR, startCSRCS); startCSRCS = commandStreamCSR.getUsed(); auto &commandStream = commandQueue.getCS(1024); if (previousCS != &commandStream) { startCS = 0; } parseCommands(commandStream, startCS); startCS = commandStream.getUsed(); previousCS = &commandStream; sizeUsed = commandStream.getUsed(); findHardwareCommands(&commandStreamReceiver.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0)); } template const typename FamilyType::RENDER_SURFACE_STATE &getSurfaceState(IndirectHeap *ssh, uint32_t index) { typedef typename FamilyType::BINDING_TABLE_STATE BINDING_TABLE_STATE; typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; const auto &interfaceDescriptorData = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; auto cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress; auto surfaceStateHeap = cmdSBA->getSurfaceStateBaseAddress(); if (ssh && (ssh->getHeapGpuBase() == surfaceStateHeap)) { surfaceStateHeap = reinterpret_cast(ssh->getCpuBase()); } EXPECT_NE(0u, surfaceStateHeap); auto bindingTablePointer = interfaceDescriptorData.getBindingTablePointer(); const auto &bindingTableState = reinterpret_cast(surfaceStateHeap + bindingTablePointer)[index]; auto surfaceStatePointer = bindingTableState.getSurfaceStatePointer(); return *(RENDER_SURFACE_STATE *)(surfaceStateHeap + surfaceStatePointer); } template const typename FamilyType::SAMPLER_STATE &getSamplerState(uint32_t index) { typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; const auto &interfaceDescriptorData = *(INTERFACE_DESCRIPTOR_DATA *)cmdInterfaceDescriptorData; auto cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress; auto dynamicStateHeap = cmdSBA->getDynamicStateBaseAddress(); EXPECT_NE(0, dynamicStateHeap); const auto samplerState = reinterpret_cast(dynamicStateHeap + interfaceDescriptorData.getSamplerStatePointer()); return samplerState[index]; } template const void *getStatelessArgumentPointer(const Kernel &kernel, uint32_t indexArg, IndirectHeap &ioh); template CmdType *getCommand(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) { auto itorCmd = find(itorStart, itorEnd); return itorCmd != cmdList.end() ? genCmdCast(*itorCmd) : nullptr; } template CmdType *getCommand() { return getCommand(cmdList.begin(), cmdList.end()); } template int getNumberOfPipelineSelectsThatEnablePipelineSelect() { typedef typename FamilyType::PIPELINE_SELECT PIPELINE_SELECT; int numberOfGpgpuSelects = 0; int numberOf3dSelects = 0; auto itorCmd = find(cmdList.begin(), cmdList.end()); while (itorCmd != cmdList.end()) { auto cmd = getCommand(itorCmd, cmdList.end()); if (cmd->getPipelineSelection() == PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU && pipelineSelectEnablePipelineSelectMaskBits == (pipelineSelectEnablePipelineSelectMaskBits & cmd->getMaskBits())) { numberOfGpgpuSelects++; } if (cmd->getPipelineSelection() == PIPELINE_SELECT::PIPELINE_SELECTION_3D && pipelineSelectEnablePipelineSelectMaskBits == (pipelineSelectEnablePipelineSelectMaskBits & cmd->getMaskBits())) { numberOf3dSelects++; } itorCmd = find(++itorCmd, cmdList.end()); } auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); if (hwHelper.is3DPipelineSelectWARequired(*defaultHwInfo)) { auto maximalNumberOf3dSelectsRequired = 2; EXPECT_LE(numberOf3dSelects, maximalNumberOf3dSelectsRequired); EXPECT_EQ(numberOf3dSelects, numberOfGpgpuSelects); auto numberOfGpgpuSelectsAddedByWa = numberOf3dSelects - 1; numberOfGpgpuSelects -= numberOfGpgpuSelectsAddedByWa; } else { EXPECT_EQ(0, numberOf3dSelects); } return numberOfGpgpuSelects; } template uint32_t getCommandCount() { GenCmdList::iterator cmdItor = cmdList.begin(); uint32_t cmdCount = 0; do { cmdItor = find(cmdItor, cmdList.end()); if (cmdItor != cmdList.end()) { ++cmdCount; ++cmdItor; } } while (cmdItor != cmdList.end()); return cmdCount; } template static const char *getCommandName(void *cmd) { return FamilyType::PARSE::getCommandName(cmd); } // The starting point of parsing commandBuffers. This is important // because as buffers get reused, we only want to parse the deltas. LinearStream *previousCS = nullptr; size_t startCS = 0u; size_t startCSRCS = 0u; size_t sizeUsed = 0u; GenCmdList cmdList; GenCmdList lriList; GenCmdList pipeControlList; GenCmdList::iterator itorMediaInterfaceDescriptorLoad; GenCmdList::iterator itorMediaVfeState; GenCmdList::iterator itorPipelineSelect; GenCmdList::iterator itorStateBaseAddress; GenCmdList::iterator itorWalker; GenCmdList::iterator itorBBStartAfterWalker; GenCmdList::iterator itorGpgpuCsrBaseAddress; void *cmdInterfaceDescriptorData = nullptr; void *cmdMediaInterfaceDescriptorLoad = nullptr; void *cmdMediaVfeState = nullptr; void *cmdPipelineSelect = nullptr; void *cmdStateBaseAddress = nullptr; void *cmdWalker = nullptr; void *cmdBBStartAfterWalker = nullptr; void *cmdGpgpuCsrBaseAddress = nullptr; bool parsePipeControl = false; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/helpers/hw_parse.inl000066400000000000000000000116071363734646600256150ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/helpers/hw_parse.h" namespace NEO { template void HardwareParse::findHardwareCommands(IndirectHeap *dsh) { using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD; using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; itorWalker = find(cmdList.begin(), cmdList.end()); if (itorWalker != cmdList.end()) { cmdWalker = *itorWalker; } itorBBStartAfterWalker = find(itorWalker, cmdList.end()); if (itorBBStartAfterWalker != cmdList.end()) { cmdBBStartAfterWalker = *itorBBStartAfterWalker; } for (auto it = cmdList.begin(); it != cmdList.end(); it++) { auto lri = genCmdCast(*it); if (lri) { lriList.push_back(*it); } } if (parsePipeControl) { for (auto it = cmdList.begin(); it != cmdList.end(); it++) { auto pipeControl = genCmdCast(*it); if (pipeControl) { pipeControlList.push_back(*it); } } } MEDIA_INTERFACE_DESCRIPTOR_LOAD *cmdMIDL = nullptr; itorMediaInterfaceDescriptorLoad = find(cmdList.begin(), itorWalker); if (itorMediaInterfaceDescriptorLoad != itorWalker) { cmdMIDL = (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorMediaInterfaceDescriptorLoad; cmdMediaInterfaceDescriptorLoad = *itorMediaInterfaceDescriptorLoad; } itorPipelineSelect = find(cmdList.begin(), itorWalker); if (itorPipelineSelect != itorWalker) { cmdPipelineSelect = *itorPipelineSelect; } itorMediaVfeState = find(itorPipelineSelect, itorWalker); if (itorMediaVfeState != itorWalker) { cmdMediaVfeState = *itorMediaVfeState; } STATE_BASE_ADDRESS *cmdSBA = nullptr; uint64_t dynamicStateHeap = 0; itorStateBaseAddress = find(cmdList.begin(), itorWalker); if (itorStateBaseAddress != itorWalker) { cmdSBA = (STATE_BASE_ADDRESS *)*itorStateBaseAddress; cmdStateBaseAddress = *itorStateBaseAddress; // Extract the dynamicStateHeap dynamicStateHeap = cmdSBA->getDynamicStateBaseAddress(); if (dsh && (dsh->getHeapGpuBase() == dynamicStateHeap)) { dynamicStateHeap = reinterpret_cast(dsh->getCpuBase()); } ASSERT_NE(0u, dynamicStateHeap); } // interfaceDescriptorData should be located within dynamicStateHeap if (cmdMIDL && cmdSBA) { auto iddStart = cmdMIDL->getInterfaceDescriptorDataStartAddress(); auto iddEnd = iddStart + cmdMIDL->getInterfaceDescriptorTotalLength(); ASSERT_LE(iddEnd, cmdSBA->getDynamicStateBufferSize() * MemoryConstants::pageSize); // Extract the interfaceDescriptorData cmdInterfaceDescriptorData = (INTERFACE_DESCRIPTOR_DATA *)(dynamicStateHeap + iddStart); } } template void HardwareParse::findHardwareCommands() { findHardwareCommands(nullptr); } template const void *HardwareParse::getStatelessArgumentPointer(const Kernel &kernel, uint32_t indexArg, IndirectHeap &ioh) { typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; typedef typename FamilyType::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; auto cmdWalker = (GPGPU_WALKER *)this->cmdWalker; EXPECT_NE(nullptr, cmdWalker); auto cmdSBA = (STATE_BASE_ADDRESS *)cmdStateBaseAddress; EXPECT_NE(nullptr, cmdSBA); auto offsetCrossThreadData = cmdWalker->getIndirectDataStartAddress(); EXPECT_LT(offsetCrossThreadData, cmdSBA->getIndirectObjectBufferSize() * MemoryConstants::pageSize); offsetCrossThreadData -= static_cast(ioh.getGraphicsAllocation()->getGpuAddressToPatch()); // Get the base of cross thread auto pCrossThreadData = ptrOffset( reinterpret_cast(ioh.getCpuBase()), offsetCrossThreadData); // Determine where the argument is auto &patchInfo = kernel.getKernelInfo().patchInfo; for (auto &arg : patchInfo.statelessGlobalMemObjKernelArgs) { if (arg->ArgumentNumber == indexArg) { return ptrOffset(pCrossThreadData, arg->DataParamOffset); } } return nullptr; } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/helpers/kernel_binary_helper.cpp000066400000000000000000000020501363734646600301600ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/helpers/kernel_binary_helper.h" #include "opencl/test/unit_test/global_environment.h" #include "opencl/test/unit_test/helpers/test_files.h" #include extern PRODUCT_FAMILY productFamily; KernelBinaryHelper::KernelBinaryHelper(const std::string &name, bool appendOptionsToFileName) { // set mock compiler to return expected kernel MockCompilerDebugVars fclDebugVars; MockCompilerDebugVars igcDebugVars; retrieveBinaryKernelFilename(fclDebugVars.fileName, name + "_", ".bc"); retrieveBinaryKernelFilename(igcDebugVars.fileName, name + "_", ".gen"); fclDebugVars.appendOptionsToFileName = appendOptionsToFileName; igcDebugVars.appendOptionsToFileName = appendOptionsToFileName; gEnvironment->fclPushDebugVars(fclDebugVars); gEnvironment->igcPushDebugVars(igcDebugVars); } KernelBinaryHelper::~KernelBinaryHelper() { gEnvironment->igcPopDebugVars(); gEnvironment->fclPopDebugVars(); } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/kernel_binary_helper.h000066400000000000000000000005141363734646600276300ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include class KernelBinaryHelper { public: KernelBinaryHelper(const std::string &name = "copybuffer", bool appendOptionsToFileName = true); ~KernelBinaryHelper(); static const std::string BUILT_INS; }; compute-runtime-20.13.16352/opencl/test/unit_test/helpers/kernel_binary_helper_hash_value.cpp000066400000000000000000000003431363734646600323620ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/helpers/kernel_binary_helper.h" const std::string KernelBinaryHelper::BUILT_INS("7206969092167061981");compute-runtime-20.13.16352/opencl/test/unit_test/helpers/kernel_filename_helper.h000066400000000000000000000010231363734646600301200ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include class KernelFilenameHelper { public: static void getKernelFilenameFromInternalOption(std::string &option, std::string &filename) { // remove leading spaces size_t position = option.find_first_not_of(" "); filename = option.substr(position); // replace space with underscore std::replace(filename.begin(), filename.end(), ' ', '_'); } }; compute-runtime-20.13.16352/opencl/test/unit_test/helpers/kmd_notify_tests.cpp000066400000000000000000000466531363734646600274030ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_context.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "test.h" #include "gmock/gmock.h" #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Winconsistent-missing-override" #endif using namespace NEO; struct KmdNotifyTests : public ::testing::Test { void SetUp() override { device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); cmdQ.reset(new MockCommandQueue(&context, device.get(), nullptr)); *device->getDefaultEngine().commandStreamReceiver->getTagAddress() = taskCountToWait; cmdQ->getGpgpuCommandStreamReceiver().waitForFlushStamp(flushStampToWait); overrideKmdNotifyParams(true, 2, true, 1, false, 0); } void overrideKmdNotifyParams(bool kmdNotifyEnable, int64_t kmdNotifyDelay, bool quickKmdSleepEnable, int64_t quickKmdSleepDelay, bool quickKmdSleepEnableForSporadicWaits, int64_t quickKmdSleepDelayForSporadicWaits) { auto &properties = hwInfo->capabilityTable.kmdNotifyProperties; properties.enableKmdNotify = kmdNotifyEnable; properties.delayKmdNotifyMicroseconds = kmdNotifyDelay; properties.enableQuickKmdSleep = quickKmdSleepEnable; properties.delayQuickKmdSleepMicroseconds = quickKmdSleepDelay; properties.enableQuickKmdSleepForSporadicWaits = quickKmdSleepEnableForSporadicWaits; properties.delayQuickKmdSleepForSporadicWaitsMicroseconds = quickKmdSleepDelayForSporadicWaits; } class MockKmdNotifyHelper : public KmdNotifyHelper { public: using KmdNotifyHelper::acLineConnected; using KmdNotifyHelper::getMicrosecondsSinceEpoch; using KmdNotifyHelper::lastWaitForCompletionTimestampUs; using KmdNotifyHelper::properties; MockKmdNotifyHelper() = delete; MockKmdNotifyHelper(const KmdNotifyProperties *newProperties) : KmdNotifyHelper(newProperties){}; void updateLastWaitForCompletionTimestamp() override { KmdNotifyHelper::updateLastWaitForCompletionTimestamp(); updateLastWaitForCompletionTimestampCalled++; } void updateAcLineStatus() override { KmdNotifyHelper::updateAcLineStatus(); updateAcLineStatusCalled++; } uint32_t updateLastWaitForCompletionTimestampCalled = 0u; uint32_t updateAcLineStatusCalled = 0u; }; template class MockKmdNotifyCsr : public UltCommandStreamReceiver { public: MockKmdNotifyCsr(const ExecutionEnvironment &executionEnvironment) : UltCommandStreamReceiver(const_cast(executionEnvironment), 0) {} MOCK_METHOD1(waitForFlushStamp, bool(FlushStamp &flushStampToWait)); MOCK_METHOD3(waitForCompletionWithTimeout, bool(bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait)); }; template MockKmdNotifyCsr *createMockCsr() { auto csr = new ::testing::NiceMock>(*device->executionEnvironment); device->resetCommandStreamReceiver(csr); mockKmdNotifyHelper = new MockKmdNotifyHelper(&device->getHardwareInfo().capabilityTable.kmdNotifyProperties); csr->resetKmdNotifyHelper(mockKmdNotifyHelper); return csr; } MockKmdNotifyHelper *mockKmdNotifyHelper = nullptr; HardwareInfo *hwInfo = nullptr; MockContext context; std::unique_ptr device; std::unique_ptr cmdQ; FlushStamp flushStampToWait = 1000; uint32_t taskCountToWait = 5; }; HWTEST_F(KmdNotifyTests, givenTaskCountWhenWaitUntilCompletionCalledThenAlwaysTryCpuPolling) { auto csr = createMockCsr(); EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, 2, taskCountToWait)).Times(1).WillOnce(::testing::Return(true)); cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, false); } HWTEST_F(KmdNotifyTests, givenTaskCountAndKmdNotifyDisabledWhenWaitUntilCompletionCalledThenTryCpuPollingWithoutTimeout) { overrideKmdNotifyParams(false, 0, false, 0, false, 0); auto csr = createMockCsr(); EXPECT_CALL(*csr, waitForCompletionWithTimeout(false, 0, taskCountToWait)).Times(1).WillOnce(::testing::Return(true)); EXPECT_CALL(*csr, waitForFlushStamp(::testing::_)).Times(0); cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, false); } HWTEST_F(KmdNotifyTests, givenNotReadyTaskCountWhenWaitUntilCompletionCalledThenTryCpuPollingAndKmdWait) { auto csr = createMockCsr(); *csr->getTagAddress() = taskCountToWait - 1; ::testing::InSequence is; EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, 2, taskCountToWait)).Times(1).WillOnce(::testing::Return(false)); EXPECT_CALL(*csr, waitForFlushStamp(flushStampToWait)).Times(1).WillOnce(::testing::Return(true)); EXPECT_CALL(*csr, waitForCompletionWithTimeout(false, 0, taskCountToWait)).Times(1).WillOnce(::testing::Return(false)); //we have unrecoverable for this case, this will throw. EXPECT_THROW(cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, false), std::exception); } HWTEST_F(KmdNotifyTests, givenReadyTaskCountWhenWaitUntilCompletionCalledThenTryCpuPollingAndDontCallKmdWait) { auto csr = createMockCsr(); ::testing::InSequence is; EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, 2, taskCountToWait)).Times(1).WillOnce(::testing::Return(true)); EXPECT_CALL(*csr, waitForFlushStamp(::testing::_)).Times(0); cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, false); } HWTEST_F(KmdNotifyTests, givenDefaultArgumentWhenWaitUntilCompleteIsCalledThenDisableQuickKmdSleep) { auto csr = createMockCsr(); auto expectedTimeout = device->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds; EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, expectedTimeout, taskCountToWait)).Times(1).WillOnce(::testing::Return(true)); cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, false); } HWTEST_F(KmdNotifyTests, givenEnabledQuickSleepWhenWaitUntilCompleteIsCalledThenChangeDelayValue) { auto csr = createMockCsr(); auto expectedTimeout = device->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds; EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, expectedTimeout, taskCountToWait)).Times(1).WillOnce(::testing::Return(true)); cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, true); } HWTEST_F(KmdNotifyTests, givenDisabledQuickSleepWhenWaitUntilCompleteWithQuickSleepRequestIsCalledThenUseBaseDelayValue) { overrideKmdNotifyParams(true, 1, false, 0, false, 0); auto csr = createMockCsr(); auto expectedTimeout = device->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds; EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, expectedTimeout, taskCountToWait)).Times(1).WillOnce(::testing::Return(true)); cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, true); } HWTEST_F(KmdNotifyTests, givenNotReadyTaskCountWhenPollForCompletionCalledThenTimeout) { *device->getDefaultEngine().commandStreamReceiver->getTagAddress() = taskCountToWait - 1; auto success = device->getUltCommandStreamReceiver().waitForCompletionWithTimeout(true, 1, taskCountToWait); EXPECT_FALSE(success); } HWTEST_F(KmdNotifyTests, givenZeroFlushStampWhenWaitIsCalledThenDisableTimeout) { auto csr = createMockCsr(); EXPECT_TRUE(device->getHardwareInfo().capabilityTable.kmdNotifyProperties.enableKmdNotify); EXPECT_CALL(*csr, waitForCompletionWithTimeout(false, ::testing::_, taskCountToWait)).Times(1).WillOnce(::testing::Return(true)); EXPECT_CALL(*csr, waitForFlushStamp(::testing::_)).Times(0); csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 0, false, false); } HWTEST_F(KmdNotifyTests, givenNonQuickSleepRequestWhenItsSporadicWaitThenOverrideQuickSleepRequest) { overrideKmdNotifyParams(true, 3, true, 2, true, 1); auto csr = createMockCsr(); auto expectedDelay = device->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds; EXPECT_CALL(*csr, waitForCompletionWithTimeout(::testing::_, expectedDelay, ::testing::_)).Times(1).WillOnce(::testing::Return(true)); int64_t timeSinceLastWait = mockKmdNotifyHelper->properties->delayQuickKmdSleepForSporadicWaitsMicroseconds + 1; mockKmdNotifyHelper->lastWaitForCompletionTimestampUs = mockKmdNotifyHelper->getMicrosecondsSinceEpoch() - timeSinceLastWait; csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 1, false, false); } HWTEST_F(KmdNotifyTests, givenNonQuickSleepRequestWhenItsNotSporadicWaitThenOverrideQuickSleepRequest) { overrideKmdNotifyParams(true, 3, true, 2, true, 9999999); auto csr = createMockCsr(); auto expectedDelay = device->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds; EXPECT_CALL(*csr, waitForCompletionWithTimeout(::testing::_, expectedDelay, ::testing::_)).Times(1).WillOnce(::testing::Return(true)); csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 1, false, false); } HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenPowerSavingModeIsRequestedThenTimeoutIsEnabled) { overrideKmdNotifyParams(false, 3, false, 2, false, 9999999); auto csr = createMockCsr(); EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, 1, ::testing::_)).Times(1).WillOnce(::testing::Return(true)); csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 1, false, true); } HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModeAndCallWaitThenTimeoutIsEnabled) { overrideKmdNotifyParams(false, 3, false, 2, false, 9999999); auto csr = createMockCsr(); EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, 1, ::testing::_)).Times(1).WillOnce(::testing::Return(true)); cmdQ->throttle = QueueThrottle::LOW; cmdQ->waitUntilComplete(1, 1, false); } HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModButThereIsNoFlushStampeAndCallWaitThenTimeoutIsDisabled) { overrideKmdNotifyParams(false, 3, false, 2, false, 9999999); auto csr = createMockCsr(); EXPECT_CALL(*csr, waitForCompletionWithTimeout(false, 0, ::testing::_)).Times(1).WillOnce(::testing::Return(true)); cmdQ->throttle = QueueThrottle::LOW; cmdQ->waitUntilComplete(1, 0, false); } HWTEST_F(KmdNotifyTests, givenQuickSleepRequestWhenItsSporadicWaitOptimizationIsDisabledThenDontOverrideQuickSleepRequest) { overrideKmdNotifyParams(true, 3, true, 2, false, 0); auto csr = createMockCsr(); auto expectedDelay = device->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds; EXPECT_CALL(*csr, waitForCompletionWithTimeout(::testing::_, expectedDelay, ::testing::_)).Times(1).WillOnce(::testing::Return(true)); csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 1, true, false); } HWTEST_F(KmdNotifyTests, givenTaskCountEqualToHwTagWhenWaitCalledThenDontMultiplyTimeout) { auto csr = createMockCsr(); *csr->getTagAddress() = taskCountToWait; auto expectedTimeout = device->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds; EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, expectedTimeout, ::testing::_)).Times(1).WillOnce(::testing::Return(true)); csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 1, false, false); } HWTEST_F(KmdNotifyTests, givenTaskCountLowerThanHwTagWhenWaitCalledThenDontMultiplyTimeout) { auto csr = createMockCsr(); *csr->getTagAddress() = taskCountToWait + 5; auto expectedTimeout = device->getHardwareInfo().capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds; EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, expectedTimeout, ::testing::_)).Times(1).WillOnce(::testing::Return(true)); csr->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, 1, false, false); } HWTEST_F(KmdNotifyTests, givenDefaultCommandStreamReceiverWhenWaitCalledThenUpdateWaitTimestamp) { overrideKmdNotifyParams(true, 3, true, 2, true, 1); auto csr = createMockCsr(); EXPECT_NE(0, mockKmdNotifyHelper->lastWaitForCompletionTimestampUs.load()); EXPECT_EQ(1u, mockKmdNotifyHelper->updateLastWaitForCompletionTimestampCalled); csr->waitForTaskCountWithKmdNotifyFallback(0, 0, false, false); EXPECT_EQ(2u, mockKmdNotifyHelper->updateLastWaitForCompletionTimestampCalled); } HWTEST_F(KmdNotifyTests, givenDefaultCommandStreamReceiverWithDisabledSporadicWaitOptimizationWhenWaitCalledThenDontUpdateWaitTimestamp) { overrideKmdNotifyParams(true, 3, true, 2, false, 0); auto csr = createMockCsr(); EXPECT_EQ(0, mockKmdNotifyHelper->lastWaitForCompletionTimestampUs.load()); csr->waitForTaskCountWithKmdNotifyFallback(0, 0, false, false); EXPECT_EQ(0u, mockKmdNotifyHelper->updateLastWaitForCompletionTimestampCalled); } HWTEST_F(KmdNotifyTests, givenNewHelperWhenItsSetToCsrThenUpdateAcLineStatus) { auto helper = new MockKmdNotifyHelper(&(hwInfo->capabilityTable.kmdNotifyProperties)); EXPECT_EQ(0u, helper->updateAcLineStatusCalled); auto csr = createMockCsr(); csr->resetKmdNotifyHelper(helper); EXPECT_EQ(1u, helper->updateAcLineStatusCalled); } TEST_F(KmdNotifyTests, givenTaskCountDiffLowerThanMinimumToCheckAcLineWhenObtainingTimeoutPropertiesThenDontCheck) { hwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify = false; MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties)); uint32_t hwTag = 9; uint32_t taskCountToWait = 10; EXPECT_TRUE(taskCountToWait - hwTag < KmdNotifyConstants::minimumTaskCountDiffToCheckAcLine); EXPECT_EQ(10u, KmdNotifyConstants::minimumTaskCountDiffToCheckAcLine); int64_t timeout = 0; helper.obtainTimeoutParams(timeout, false, hwTag, taskCountToWait, 1, false); EXPECT_EQ(0u, helper.updateAcLineStatusCalled); } TEST_F(KmdNotifyTests, givenTaskCountDiffGreaterThanMinimumToCheckAcLineAndDisabledKmdNotifyWhenObtainingTimeoutPropertiesThenCheck) { hwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify = false; MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties)); uint32_t hwTag = 10; uint32_t taskCountToWait = 21; EXPECT_TRUE(taskCountToWait - hwTag > KmdNotifyConstants::minimumTaskCountDiffToCheckAcLine); EXPECT_EQ(10u, KmdNotifyConstants::minimumTaskCountDiffToCheckAcLine); int64_t timeout = 0; helper.obtainTimeoutParams(timeout, false, hwTag, taskCountToWait, 1, false); EXPECT_EQ(1u, helper.updateAcLineStatusCalled); } TEST_F(KmdNotifyTests, givenTaskCountDiffGreaterThanMinimumToCheckAcLineAndEnabledKmdNotifyWhenObtainingTimeoutPropertiesThenDontCheck) { hwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify = true; MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties)); uint32_t hwTag = 10; uint32_t taskCountToWait = 21; EXPECT_TRUE(taskCountToWait - hwTag > KmdNotifyConstants::minimumTaskCountDiffToCheckAcLine); EXPECT_EQ(10u, KmdNotifyConstants::minimumTaskCountDiffToCheckAcLine); int64_t timeout = 0; helper.obtainTimeoutParams(timeout, false, hwTag, taskCountToWait, 1, false); EXPECT_EQ(0u, helper.updateAcLineStatusCalled); } TEST_F(KmdNotifyTests, givenDisabledKmdNotifyMechanismWhenAcLineIsDisconnectedThenForceEnableTimeout) { hwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify = false; MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties)); helper.acLineConnected = false; int64_t timeout = 0; bool timeoutEnabled = helper.obtainTimeoutParams(timeout, false, 1, 2, 2, false); EXPECT_TRUE(timeoutEnabled); EXPECT_EQ(KmdNotifyConstants::timeoutInMicrosecondsForDisconnectedAcLine, timeout); EXPECT_EQ(10000, KmdNotifyConstants::timeoutInMicrosecondsForDisconnectedAcLine); } TEST_F(KmdNotifyTests, givenEnabledKmdNotifyMechanismWhenAcLineIsDisconnectedThenDontChangeTimeoutValue) { hwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify = true; hwInfo->capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds = 5; MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties)); helper.acLineConnected = false; int64_t timeout = 0; bool timeoutEnabled = helper.obtainTimeoutParams(timeout, false, 1, 2, 2, false); EXPECT_TRUE(timeoutEnabled); EXPECT_EQ(hwInfo->capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds, timeout); } TEST_F(KmdNotifyTests, givenDisabledKmdNotifyMechanismAndFlushStampIsZeroWhenAcLineIsDisconnectedThenDontForceEnableTimeout) { hwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify = false; MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties)); helper.acLineConnected = false; int64_t timeout = 0; FlushStamp flushStampToWait = 0; bool timeoutEnabled = helper.obtainTimeoutParams(timeout, false, 1, 2, flushStampToWait, false); EXPECT_FALSE(timeoutEnabled); } TEST_F(KmdNotifyTests, givenDisabledKmdNotifyMechanismWhenPowerSavingModeIsSetThenKmdNotifyMechanismIsUsedAndReturnsShortestWaitingTimePossible) { DebugManagerStateRestore stateRestore; DebugManager.flags.PowerSavingMode.set(1u); hwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify = false; MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties)); helper.acLineConnected = false; int64_t timeout = 0; FlushStamp flushStampToWait = 1; bool timeoutEnabled = helper.obtainTimeoutParams(timeout, false, 1, 2, flushStampToWait, false); EXPECT_TRUE(timeoutEnabled); EXPECT_EQ(1, timeout); } TEST_F(KmdNotifyTests, givenDisabledKmdNotifyMechanismWhenPowerSavingModeIsRequestedThenKmdNotifyMechanismIsUsedAndReturnsShortestWaitingTimePossible) { hwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify = false; MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties)); int64_t timeout = 0; FlushStamp flushStampToWait = 1; bool timeoutEnabled = helper.obtainTimeoutParams(timeout, false, 1, 2, flushStampToWait, true); EXPECT_TRUE(timeoutEnabled); EXPECT_EQ(1, timeout); } TEST_F(KmdNotifyTests, givenEnabledKmdNotifyMechanismWhenPowerSavingModeIsSetAndNoFlushStampProvidedWhenParametersAreObtainedThenFalseIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.PowerSavingMode.set(1u); hwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify = true; MockKmdNotifyHelper helper(&(hwInfo->capabilityTable.kmdNotifyProperties)); helper.acLineConnected = false; int64_t timeout = 0; FlushStamp flushStampToWait = 0; bool timeoutEnabled = helper.obtainTimeoutParams(timeout, false, 1, 2, flushStampToWait, false); EXPECT_FALSE(timeoutEnabled); EXPECT_EQ(0, timeout); } #if defined(__clang__) #pragma clang diagnostic pop #endif compute-runtime-20.13.16352/opencl/test/unit_test/helpers/linux/000077500000000000000000000000001363734646600244335ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/helpers/linux/CMakeLists.txt000066400000000000000000000006151363734646600271750ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(UNIX) set(IGDRCL_SRCS_tests_helpers_linux ${CMAKE_CURRENT_SOURCE_DIR}/kmd_notify_linux_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_helpers_linux}) add_subdirectories() set_property(GLOBAL PROPERTY IGDRCL_SRCS_tests_helpers_linux ${IGDRCL_SRCS_tests_helpers_linux}) endif() compute-runtime-20.13.16352/opencl/test/unit_test/helpers/linux/kmd_notify_linux_tests.cpp000066400000000000000000000016071363734646600317470ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/kmd_notify_properties.h" #include "shared/test/unit_test/helpers/default_hw_info.h" #include "test.h" namespace NEO { class MockKmdNotifyHelper : public KmdNotifyHelper { public: using KmdNotifyHelper::getBaseTimeout; MockKmdNotifyHelper(const KmdNotifyProperties *newProperties) : KmdNotifyHelper(newProperties){}; }; TEST(KmdNotifyLinuxTests, givenTaskCountDiffGreaterThanOneWhenBaseTimeoutRequestedThenMultiply) { auto localProperties = (defaultHwInfo->capabilityTable.kmdNotifyProperties); localProperties.delayKmdNotifyMicroseconds = 10; const int64_t multiplier = 10; MockKmdNotifyHelper helper(&localProperties); EXPECT_EQ(localProperties.delayKmdNotifyMicroseconds * multiplier, helper.getBaseTimeout(multiplier)); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/helpers/mem_properties_parser_helper_tests.cpp000066400000000000000000000232121363734646600331670ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/mem_properties_parser_helper.h" #include "opencl/source/mem_obj/mem_obj_helper.h" #include "CL/cl_ext_intel.h" #include "gtest/gtest.h" using namespace NEO; TEST(MemoryPropertiesParser, givenNullPropertiesWhenParsingMemoryPropertiesThenTrueIsReturned) { MemoryPropertiesFlags memoryProperties; cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; cl_mem_alloc_flags_intel allocflags = 0; EXPECT_TRUE(MemoryPropertiesParser::parseMemoryProperties(nullptr, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesParser::MemoryPropertiesParser::ObjType::UNKNOWN)); } TEST(MemoryPropertiesParser, givenEmptyPropertiesWhenParsingMemoryPropertiesThenTrueIsReturned) { cl_mem_properties_intel properties[] = {0}; MemoryPropertiesFlags memoryProperties; cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; cl_mem_alloc_flags_intel allocflags = 0; EXPECT_TRUE(MemoryPropertiesParser::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesParser::MemoryPropertiesParser::ObjType::UNKNOWN)); EXPECT_TRUE(MemoryPropertiesParser::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesParser::MemoryPropertiesParser::ObjType::BUFFER)); EXPECT_TRUE(MemoryPropertiesParser::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesParser::MemoryPropertiesParser::ObjType::IMAGE)); } TEST(MemoryPropertiesParser, givenValidPropertiesWhenParsingMemoryPropertiesThenTrueIsReturned) { cl_mem_properties_intel properties[] = { CL_MEM_FLAGS, CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS, CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_RESOURCE | CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE, CL_MEM_ALLOC_FLAGS_INTEL, CL_MEM_ALLOC_WRITE_COMBINED_INTEL, CL_MEM_ALLOC_DEFAULT_INTEL, 0}; MemoryPropertiesFlags memoryProperties; cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; cl_mem_alloc_flags_intel allocflags = 0; EXPECT_TRUE(MemoryPropertiesParser::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesParser::ObjType::UNKNOWN)); } TEST(MemoryPropertiesParser, givenValidPropertiesWhenParsingMemoryPropertiesForBufferThenTrueIsReturned) { cl_mem_properties_intel properties[] = { CL_MEM_FLAGS, MemObjHelper::validFlagsForBuffer, CL_MEM_FLAGS_INTEL, MemObjHelper::validFlagsForBufferIntel, 0}; MemoryPropertiesFlags memoryProperties; cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; cl_mem_alloc_flags_intel allocflags = 0; EXPECT_TRUE(MemoryPropertiesParser::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesParser::ObjType::BUFFER)); } TEST(MemoryPropertiesParser, givenValidPropertiesWhenParsingMemoryPropertiesForImageThenTrueIsReturned) { cl_mem_properties_intel properties[] = { CL_MEM_FLAGS, MemObjHelper::validFlagsForImage, CL_MEM_FLAGS_INTEL, MemObjHelper::validFlagsForImageIntel, 0}; MemoryPropertiesFlags memoryProperties; cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; cl_mem_alloc_flags_intel allocflags = 0; EXPECT_TRUE(MemoryPropertiesParser::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesParser::ObjType::IMAGE)); } TEST(MemoryPropertiesParser, givenInvalidPropertiesWhenParsingMemoryPropertiesThenFalseIsReturned) { cl_mem_properties_intel properties[] = { (1 << 30), CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR, 0}; MemoryPropertiesFlags memoryProperties; cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; cl_mem_alloc_flags_intel allocflags = 0; EXPECT_FALSE(MemoryPropertiesParser::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesParser::ObjType::UNKNOWN)); EXPECT_FALSE(MemoryPropertiesParser::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesParser::ObjType::BUFFER)); EXPECT_FALSE(MemoryPropertiesParser::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesParser::ObjType::IMAGE)); } TEST(MemoryPropertiesParser, givenInvalidPropertiesWhenParsingMemoryPropertiesForImageThenFalseIsReturned) { cl_mem_properties_intel properties[] = { CL_MEM_FLAGS, MemObjHelper::validFlagsForBuffer, CL_MEM_FLAGS_INTEL, MemObjHelper::validFlagsForBufferIntel, 0}; MemoryPropertiesFlags memoryProperties; cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; cl_mem_alloc_flags_intel allocflags = 0; EXPECT_FALSE(MemoryPropertiesParser::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesParser::ObjType::IMAGE)); } TEST(MemoryPropertiesParser, givenInvalidFlagsWhenParsingMemoryPropertiesForImageThenFalseIsReturned) { cl_mem_properties_intel properties[] = { CL_MEM_FLAGS, (1 << 30), CL_MEM_FLAGS_INTEL, MemObjHelper::validFlagsForImageIntel, 0}; MemoryPropertiesFlags memoryProperties; cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; cl_mem_alloc_flags_intel allocflags = 0; EXPECT_FALSE(MemoryPropertiesParser::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesParser::ObjType::IMAGE)); } TEST(MemoryPropertiesParser, givenInvalidFlagsIntelWhenParsingMemoryPropertiesForImageThenFalseIsReturned) { cl_mem_properties_intel properties[] = { CL_MEM_FLAGS, MemObjHelper::validFlagsForImage, CL_MEM_FLAGS_INTEL, (1 << 30), 0}; MemoryPropertiesFlags memoryProperties; cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; cl_mem_alloc_flags_intel allocflags = 0; EXPECT_FALSE(MemoryPropertiesParser::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesParser::ObjType::IMAGE)); } TEST(MemoryPropertiesParser, givenInvalidPropertiesWhenParsingMemoryPropertiesForBufferThenFalseIsReturned) { cl_mem_properties_intel properties[] = { CL_MEM_FLAGS, MemObjHelper::validFlagsForImage, CL_MEM_FLAGS_INTEL, MemObjHelper::validFlagsForImageIntel, 0}; MemoryPropertiesFlags memoryProperties; cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; cl_mem_alloc_flags_intel allocflags = 0; EXPECT_FALSE(MemoryPropertiesParser::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesParser::ObjType::BUFFER)); } TEST(MemoryPropertiesParser, givenInvalidFlagsWhenParsingMemoryPropertiesForBufferThenFalseIsReturned) { cl_mem_properties_intel properties[] = { CL_MEM_FLAGS, (1 << 30), CL_MEM_FLAGS_INTEL, MemObjHelper::validFlagsForBufferIntel, 0}; MemoryPropertiesFlags memoryProperties; cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; cl_mem_alloc_flags_intel allocflags = 0; EXPECT_FALSE(MemoryPropertiesParser::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesParser::ObjType::BUFFER)); } TEST(MemoryPropertiesParser, givenInvalidFlagsIntelWhenParsingMemoryPropertiesForBufferThenFalseIsReturned) { cl_mem_properties_intel properties[] = { CL_MEM_FLAGS, MemObjHelper::validFlagsForBuffer, CL_MEM_FLAGS_INTEL, (1 << 30), 0}; MemoryPropertiesFlags memoryProperties; cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; cl_mem_alloc_flags_intel allocflags = 0; EXPECT_FALSE(MemoryPropertiesParser::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesParser::ObjType::BUFFER)); } TEST(MemoryPropertiesParser, givenDifferentParametersWhenCallingFillCachePolicyInPropertiesThenFlushL3FlagsAreCorrectlySet) { AllocationProperties allocationProperties{0, 0, GraphicsAllocation::AllocationType::BUFFER}; for (auto uncached : ::testing::Bool()) { for (auto readOnly : ::testing::Bool()) { for (auto deviceOnlyVisibilty : ::testing::Bool()) { if (uncached || readOnly || deviceOnlyVisibilty) { allocationProperties.flags.flushL3RequiredForRead = true; allocationProperties.flags.flushL3RequiredForWrite = true; MemoryPropertiesParser::fillCachePolicyInProperties(allocationProperties, uncached, readOnly, deviceOnlyVisibilty); EXPECT_FALSE(allocationProperties.flags.flushL3RequiredForRead); EXPECT_FALSE(allocationProperties.flags.flushL3RequiredForWrite); } else { allocationProperties.flags.flushL3RequiredForRead = false; allocationProperties.flags.flushL3RequiredForWrite = false; MemoryPropertiesParser::fillCachePolicyInProperties(allocationProperties, uncached, readOnly, deviceOnlyVisibilty); EXPECT_TRUE(allocationProperties.flags.flushL3RequiredForRead); EXPECT_TRUE(allocationProperties.flags.flushL3RequiredForWrite); } } } } } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/memory_management_tests.cpp000066400000000000000000000132331363734646600307300ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/memory_management.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "gtest/gtest.h" using MemoryManagement::AllocationEvent; using MemoryManagement::eventsAllocated; using MemoryManagement::eventsDeallocated; using MemoryManagement::failingAllocation; using MemoryManagement::indexAllocation; using MemoryManagement::indexDeallocation; using MemoryManagement::numAllocations; TEST(allocation, nothrow_defaultShouldPass) { ASSERT_EQ(failingAllocation, static_cast(-1)); auto ptr = new (std::nothrow) char; EXPECT_NE(nullptr, ptr); delete ptr; } TEST(allocation, nothrow_injectingAFailure) { MemoryManagement::detailedAllocationLoggingActive = true; ASSERT_EQ(static_cast(-1), failingAllocation); auto previousAllocations = numAllocations.load(); MemoryManagement::indexAllocation = 0; failingAllocation = 1; auto ptr1 = new (std::nothrow) char; auto ptr2 = new (std::nothrow) char; delete ptr1; delete ptr2; auto currentAllocations = numAllocations.load(); failingAllocation = -1; EXPECT_NE(nullptr, ptr1); EXPECT_EQ(nullptr, ptr2); EXPECT_EQ(previousAllocations, currentAllocations); MemoryManagement::detailedAllocationLoggingActive = false; } struct MemoryManagementTest : public MemoryManagementFixture, public ::testing::Test { void SetUp() override { MemoryManagementFixture::SetUp(); } void TearDown() override { MemoryManagementFixture::TearDown(); } }; TEST_F(MemoryManagementTest, nothrow_injectingAFailure) { setFailingAllocation(1); auto ptr1 = new (std::nothrow) char; auto ptr2 = new (std::nothrow) char; delete ptr1; delete ptr2; clearFailingAllocation(); EXPECT_NE(nullptr, ptr1); EXPECT_EQ(nullptr, ptr2); } TEST_F(MemoryManagementTest, NoLeaks) { auto indexAllocationTop = indexAllocation.load(); auto indexDellocationTop = indexDeallocation.load(); EXPECT_EQ(static_cast(-1), MemoryManagement::enumerateLeak(indexAllocationTop, indexDellocationTop, false, false)); } TEST_F(MemoryManagementTest, OneLeak) { size_t sizeBuffer = 10; auto ptr = new (std::nothrow) char[sizeBuffer]; auto indexAllocationTop = indexAllocation.load(); auto indexDeallocationTop = indexDeallocation.load(); auto leakIndex = MemoryManagement::enumerateLeak(indexAllocationTop, indexDeallocationTop, false, false); ASSERT_NE(static_cast(-1), leakIndex); EXPECT_EQ(ptr, eventsAllocated[leakIndex].address); EXPECT_EQ(sizeBuffer, eventsAllocated[leakIndex].size); // Not expecting any more failures EXPECT_EQ(static_cast(-1), MemoryManagement::enumerateLeak(indexAllocationTop, indexDeallocationTop, false, false)); delete[] ptr; } TEST_F(MemoryManagementTest, OneLeakBetweenFourEvents) { size_t sizeBuffer = 10; delete new (std::nothrow) char; auto ptr = new (std::nothrow) char[sizeBuffer]; delete new (std::nothrow) char; auto indexAllocationTop = indexAllocation.load(); auto indexDeallocationTop = indexDeallocation.load(); auto leakIndex = MemoryManagement::enumerateLeak(indexAllocationTop, indexDeallocationTop, false, false); ASSERT_NE(static_cast(-1), leakIndex); EXPECT_EQ(ptr, eventsAllocated[leakIndex].address); EXPECT_EQ(sizeBuffer, eventsAllocated[leakIndex].size); // Not expecting any more failures EXPECT_EQ(static_cast(-1), MemoryManagement::enumerateLeak(indexAllocationTop, indexDeallocationTop, false, false)); delete[] ptr; } TEST_F(MemoryManagementTest, TwoLeaks) { size_t sizeBuffer = 10; auto ptr1 = new (std::nothrow) char[sizeBuffer]; auto ptr2 = new (std::nothrow) char[sizeBuffer]; auto indexAllocationTop = indexAllocation.load(); auto indexDeallocationTop = indexDeallocation.load(); auto leakIndex1 = MemoryManagement::enumerateLeak(indexAllocationTop, indexDeallocationTop, false, false); auto leakIndex2 = MemoryManagement::enumerateLeak(indexAllocationTop, indexDeallocationTop, false, false); ASSERT_NE(static_cast(-1), leakIndex1); EXPECT_EQ(ptr1, eventsAllocated[leakIndex1].address); EXPECT_EQ(sizeBuffer, eventsAllocated[leakIndex1].size); ASSERT_NE(static_cast(-1), leakIndex2); EXPECT_EQ(ptr2, eventsAllocated[leakIndex2].address); EXPECT_EQ(sizeBuffer, eventsAllocated[leakIndex2].size); // Not expecting any more failures EXPECT_EQ(static_cast(-1), MemoryManagement::enumerateLeak(indexAllocationTop, indexDeallocationTop, false, false)); delete[] ptr1; delete[] ptr2; } TEST_F(MemoryManagementTest, delete_nullptr_shouldntReportLeak) { char *ptr = nullptr; delete ptr; } TEST_F(MemoryManagementTest, shouldBeAbleToViewAllocation) { size_t sizeBuffer = 10; auto index = MemoryManagement::indexAllocation.load(); auto ptr = new (std::nothrow) char[sizeBuffer]; EXPECT_EQ(ptr, eventsAllocated[index].address); EXPECT_EQ(sizeBuffer, eventsAllocated[index].size); index = MemoryManagement::indexDeallocation; auto ptrCopy = ptr; delete[] ptr; EXPECT_EQ(ptrCopy, eventsDeallocated[index].address); } #if ENABLE_ME_FOR_LEAK_TESTING TEST_F(MemoryManagementTest, EnableForLeakTesting) { // Useful reference : MemoryManagement::onAllocationEvent MemoryManagement::breakOnAllocationEvent = 1; MemoryManagement::breakOnDeallocationEvent = 0; delete new char; new char; MemoryManagement::breakOnAllocationEvent = -1; MemoryManagement::breakOnDeallocationEvent = -1; } #endif compute-runtime-20.13.16352/opencl/test/unit_test/helpers/memory_properties_flags_helpers_tests.cpp000066400000000000000000000125221363734646600337060ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "CL/cl_ext_intel.h" #include "gtest/gtest.h" using namespace NEO; TEST(MemoryPropertiesFlags, givenValidPropertiesWhenCreateMemoryPropertiesFlagsThenTrueIsReturned) { MemoryPropertiesFlags properties; properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_READ_WRITE, 0, 0); EXPECT_TRUE(properties.flags.readWrite); properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_WRITE_ONLY, 0, 0); EXPECT_TRUE(properties.flags.writeOnly); properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_READ_ONLY, 0, 0); EXPECT_TRUE(properties.flags.readOnly); properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_USE_HOST_PTR, 0, 0); EXPECT_TRUE(properties.flags.useHostPtr); properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_ALLOC_HOST_PTR, 0, 0); EXPECT_TRUE(properties.flags.allocHostPtr); properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_COPY_HOST_PTR, 0, 0); EXPECT_TRUE(properties.flags.copyHostPtr); properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_HOST_WRITE_ONLY, 0, 0); EXPECT_TRUE(properties.flags.hostWriteOnly); properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_HOST_READ_ONLY, 0, 0); EXPECT_TRUE(properties.flags.hostReadOnly); properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_HOST_NO_ACCESS, 0, 0); EXPECT_TRUE(properties.flags.hostNoAccess); properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_KERNEL_READ_AND_WRITE, 0, 0); EXPECT_TRUE(properties.flags.kernelReadAndWrite); properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL, 0, 0); EXPECT_TRUE(properties.flags.accessFlagsUnrestricted); properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_NO_ACCESS_INTEL, 0, 0); EXPECT_TRUE(properties.flags.noAccess); properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(0, CL_MEM_LOCALLY_UNCACHED_RESOURCE, 0); EXPECT_TRUE(properties.flags.locallyUncachedResource); properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(0, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE, 0); EXPECT_TRUE(properties.flags.locallyUncachedInSurfaceState); properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_FORCE_SHARED_PHYSICAL_MEMORY_INTEL, 0, 0); EXPECT_TRUE(properties.flags.forceSharedPhysicalMemory); properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(0, 0, CL_MEM_ALLOC_WRITE_COMBINED_INTEL); EXPECT_TRUE(properties.allocFlags.allocWriteCombined); properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(0, CL_MEM_48BIT_RESOURCE_INTEL, 0); EXPECT_TRUE(properties.flags.resource48Bit); } TEST(MemoryPropertiesFlags, givenClMemForceLinearStorageFlagWhenCreateMemoryPropertiesFlagsThenReturnProperValue) { MemoryPropertiesFlags memoryProperties; cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; flags |= CL_MEM_FORCE_LINEAR_STORAGE_INTEL; flagsIntel = 0; memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0); EXPECT_TRUE(memoryProperties.flags.forceLinearStorage); flags = 0; flagsIntel |= CL_MEM_FORCE_LINEAR_STORAGE_INTEL; memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0); EXPECT_TRUE(memoryProperties.flags.forceLinearStorage); flags |= CL_MEM_FORCE_LINEAR_STORAGE_INTEL; flagsIntel |= CL_MEM_FORCE_LINEAR_STORAGE_INTEL; memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0); EXPECT_TRUE(memoryProperties.flags.forceLinearStorage); flags = 0; flagsIntel = 0; memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0); EXPECT_FALSE(memoryProperties.flags.forceLinearStorage); } TEST(MemoryPropertiesFlags, givenClAllowUnrestrictedSizeFlagWhenCreateMemoryPropertiesFlagsThenReturnProperValue) { MemoryPropertiesFlags memoryProperties; cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; flags |= CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL; flagsIntel = 0; memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0); EXPECT_TRUE(memoryProperties.flags.allowUnrestrictedSize); flags = 0; flagsIntel |= CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL; memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0); EXPECT_TRUE(memoryProperties.flags.allowUnrestrictedSize); flags |= CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL; flagsIntel |= CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL; memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0); EXPECT_TRUE(memoryProperties.flags.allowUnrestrictedSize); flags = 0; flagsIntel = 0; memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0); EXPECT_FALSE(memoryProperties.flags.allowUnrestrictedSize); } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/mipmap_tests.cpp000066400000000000000000000165211363734646600265120ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/mipmap.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_gmm.h" #include "opencl/test/unit_test/mocks/mock_image.h" #include "gtest/gtest.h" using namespace NEO; constexpr size_t testOrigin[]{2, 3, 5, 7}; typedef ::testing::TestWithParam> MipLevelTest; TEST_P(MipLevelTest, givenMemObjectTypeReturnProperMipLevel) { auto pair = GetParam(); EXPECT_EQ(static_cast(pair.second), findMipLevel(pair.first, testOrigin)); } INSTANTIATE_TEST_CASE_P(MipLevel, MipLevelTest, ::testing::Values(std::make_pair(CL_MEM_OBJECT_IMAGE1D, testOrigin[1]), std::make_pair(CL_MEM_OBJECT_IMAGE1D_ARRAY, testOrigin[2]), std::make_pair(CL_MEM_OBJECT_IMAGE2D, testOrigin[2]), std::make_pair(CL_MEM_OBJECT_IMAGE2D_ARRAY, testOrigin[3]), std::make_pair(CL_MEM_OBJECT_IMAGE3D, testOrigin[3]), std::make_pair(CL_MEM_OBJECT_BUFFER, 0U), std::make_pair(CL_MEM_OBJECT_IMAGE1D_BUFFER, 0U), std::make_pair(CL_MEM_OBJECT_PIPE, 0U))); typedef ::testing::TestWithParam> MipLevelOriginIdxTest; TEST_P(MipLevelOriginIdxTest, givenMemObjectTypeReturnProperMipLevelOriginIdx) { auto pair = GetParam(); EXPECT_EQ(static_cast(pair.second), getMipLevelOriginIdx(pair.first)); } INSTANTIATE_TEST_CASE_P(MipLevelOriginIdx, MipLevelOriginIdxTest, ::testing::Values(std::make_pair(CL_MEM_OBJECT_IMAGE1D, 1U), std::make_pair(CL_MEM_OBJECT_IMAGE1D_ARRAY, 2U), std::make_pair(CL_MEM_OBJECT_IMAGE2D, 2U), std::make_pair(CL_MEM_OBJECT_IMAGE2D_ARRAY, 3U), std::make_pair(CL_MEM_OBJECT_IMAGE3D, 3U), std::make_pair(CL_MEM_OBJECT_IMAGE1D_BUFFER, 0U), std::make_pair(CL_MEM_OBJECT_BUFFER, static_cast(-1)), std::make_pair(CL_MEM_OBJECT_PIPE, static_cast(-1)))); TEST(MipmapHelper, givenClImageDescWithoutMipLevelsWhenIsMipMappedIsCalledThenFalseIsReturned) { cl_image_desc desc = {}; desc.num_mip_levels = 0; EXPECT_FALSE(NEO::isMipMapped(desc)); desc.num_mip_levels = 1; EXPECT_FALSE(NEO::isMipMapped(desc)); } TEST(MipmapHelper, givenClImageDescWithMipLevelsWhenIsMipMappedIsCalledThenTrueIsReturned) { cl_image_desc desc = {}; desc.num_mip_levels = 2; EXPECT_TRUE(NEO::isMipMapped(desc)); } TEST(MipmapHelper, givenBufferWhenIsMipMappedIsCalledThenFalseIsReturned) { MockBuffer buffer; EXPECT_FALSE(NEO::isMipMapped(&buffer)); } struct MockImage : MockImageBase { MockImage() : MockImageBase() { surfaceFormatInfo.surfaceFormat.ImageElementSizeInBytes = 4u; } }; TEST(MipmapHelper, givenImageWithoutMipLevelsWhenIsMipMappedIsCalledThenFalseIsReturned) { MockImage image; image.imageDesc.num_mip_levels = 0; EXPECT_FALSE(NEO::isMipMapped(&image)); image.imageDesc.num_mip_levels = 1; EXPECT_FALSE(NEO::isMipMapped(&image)); } TEST(MipmapHelper, givenImageWithMipLevelsWhenIsMipMappedIsCalledThenTrueIsReturned) { MockImage image; image.imageDesc.num_mip_levels = 2; EXPECT_TRUE(NEO::isMipMapped(&image)); } TEST(MipmapHelper, givenImageWithoutMipLevelsWhenGetMipOffsetIsCalledThenZeroIsReturned) { MockImage image; image.imageDesc.num_mip_levels = 1; auto offset = getMipOffset(&image, testOrigin); EXPECT_EQ(0U, offset); } using myTuple = std::tuple, uint32_t, uint32_t>; using MipOffsetTest = ::testing::TestWithParam; TEST_P(MipOffsetTest, givenImageWithMipLevelsWhenGetMipOffsetIsCalledThenProperOffsetIsReturned) { std::array origin; uint32_t expectedOffset; cl_mem_object_type imageType; std::tie(origin, expectedOffset, imageType) = GetParam(); MockImage image; image.imageDesc.num_mip_levels = 16; image.imageDesc.image_type = imageType; image.imageDesc.image_width = 11; image.imageDesc.image_height = 13; image.imageDesc.image_depth = 17; auto offset = getMipOffset(&image, origin.data()); EXPECT_EQ(expectedOffset, offset); } constexpr myTuple testOrigins[]{myTuple({{2, 3, 5, 7}}, 812u, CL_MEM_OBJECT_IMAGE3D), myTuple({{2, 3, 5, 2}}, 592u, CL_MEM_OBJECT_IMAGE3D), myTuple({{2, 3, 5, 1}}, 572u, CL_MEM_OBJECT_IMAGE3D), myTuple({{2, 3, 5, 0}}, 0u, CL_MEM_OBJECT_IMAGE3D), myTuple({{2, 3, 5, 7}}, 812u, CL_MEM_OBJECT_IMAGE2D_ARRAY), myTuple({{2, 3, 5, 2}}, 592u, CL_MEM_OBJECT_IMAGE2D_ARRAY), myTuple({{2, 3, 5, 1}}, 572u, CL_MEM_OBJECT_IMAGE2D_ARRAY), myTuple({{2, 3, 5, 0}}, 0u, CL_MEM_OBJECT_IMAGE2D_ARRAY), myTuple({{2, 3, 5, 0}}, 724u, CL_MEM_OBJECT_IMAGE2D), myTuple({{2, 3, 2, 0}}, 592u, CL_MEM_OBJECT_IMAGE2D), myTuple({{2, 3, 1, 0}}, 572u, CL_MEM_OBJECT_IMAGE2D), myTuple({{2, 3, 0, 0}}, 0u, CL_MEM_OBJECT_IMAGE2D), myTuple({{2, 3, 5, 0}}, 724u, CL_MEM_OBJECT_IMAGE1D_ARRAY), myTuple({{2, 3, 2, 0}}, 592u, CL_MEM_OBJECT_IMAGE1D_ARRAY), myTuple({{2, 3, 1, 0}}, 572u, CL_MEM_OBJECT_IMAGE1D_ARRAY), myTuple({{2, 3, 0, 0}}, 0u, CL_MEM_OBJECT_IMAGE1D_ARRAY), myTuple({{2, 3, 0, 0}}, 56u, CL_MEM_OBJECT_IMAGE1D), myTuple({{2, 2, 0, 0}}, 52u, CL_MEM_OBJECT_IMAGE1D), myTuple({{2, 1, 0, 0}}, 44u, CL_MEM_OBJECT_IMAGE1D), myTuple({{2, 0, 0, 0}}, 0u, CL_MEM_OBJECT_IMAGE1D)}; INSTANTIATE_TEST_CASE_P(MipMapOffset, MipOffsetTest, ::testing::ValuesIn(testOrigins)); compute-runtime-20.13.16352/opencl/test/unit_test/helpers/per_thread_data_tests.cpp000066400000000000000000000255401363734646600303360ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/linear_stream.h" #include "shared/source/helpers/aligned_memory.h" #include "opencl/source/command_queue/local_id_gen.h" #include "opencl/source/helpers/per_thread_data.h" #include "opencl/source/program/kernel_info.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "test.h" #include "patch_shared.h" using namespace NEO; template struct PerThreadDataTests : public DeviceFixture, ::testing::Test { void SetUp() override { DeviceFixture::SetUp(); kernelHeader = {}; kernelHeader.KernelHeapSize = sizeof(kernelIsa); threadPayload = {}; threadPayload.LocalIDXPresent = localIdX ? 1 : 0; threadPayload.LocalIDYPresent = localIdY ? 1 : 0; threadPayload.LocalIDZPresent = localIdZ ? 1 : 0; threadPayload.LocalIDFlattenedPresent = flattenedId; threadPayload.UnusedPerThreadConstantPresent = !(localIdX || localIdY || localIdZ || flattenedId); executionEnvironment = {}; executionEnvironment.CompiledSIMD32 = 1; executionEnvironment.LargestCompiledSIMDSize = 32; kernelInfo.heapInfo.pKernelHeap = kernelIsa; kernelInfo.heapInfo.pKernelHeader = &kernelHeader; kernelInfo.patchInfo.executionEnvironment = &executionEnvironment; kernelInfo.patchInfo.threadPayload = &threadPayload; simd = executionEnvironment.LargestCompiledSIMDSize; numChannels = threadPayload.LocalIDXPresent + threadPayload.LocalIDYPresent + threadPayload.LocalIDZPresent; grfSize = 32; indirectHeapMemorySize = 4096; indirectHeapMemory = reinterpret_cast(alignedMalloc(indirectHeapMemorySize, 32)); ASSERT_TRUE(isAligned<32>(indirectHeapMemory)); } void TearDown() override { alignedFree(indirectHeapMemory); DeviceFixture::TearDown(); } const std::array workgroupWalkOrder = {{0, 1, 2}}; uint32_t simd; uint32_t grfSize; uint32_t numChannels; uint32_t kernelIsa[32]; uint8_t *indirectHeapMemory; size_t indirectHeapMemorySize; SKernelBinaryHeaderCommon kernelHeader; SPatchThreadPayload threadPayload; SPatchExecutionEnvironment executionEnvironment; KernelInfo kernelInfo; }; typedef PerThreadDataTests<> PerThreadDataXYZTests; HWTEST_F(PerThreadDataXYZTests, getLocalIdSizePerThread) { EXPECT_EQ(3 * 2 * grfSize, PerThreadDataHelper::getLocalIdSizePerThread(simd, grfSize, numChannels)); } HWTEST_F(PerThreadDataXYZTests, getPerThreadDataSizeTotal) { size_t localWorkSize = 256; EXPECT_EQ(256 * 3 * 2 * grfSize / 32, PerThreadDataHelper::getPerThreadDataSizeTotal(simd, grfSize, numChannels, localWorkSize)); } HWTEST_F(PerThreadDataXYZTests, sendPerThreadData_256x1x1) { MockGraphicsAllocation gfxAllocation(indirectHeapMemory, indirectHeapMemorySize); LinearStream indirectHeap(&gfxAllocation); size_t localWorkSizes[3] = {256, 1, 1}; auto localWorkSize = localWorkSizes[0] * localWorkSizes[1] * localWorkSizes[2]; auto offsetPerThreadData = PerThreadDataHelper::sendPerThreadData( indirectHeap, simd, grfSize, numChannels, localWorkSizes, workgroupWalkOrder, false); auto expectedPerThreadDataSizeTotal = PerThreadDataHelper::getPerThreadDataSizeTotal(simd, grfSize, numChannels, localWorkSize); size_t sizeConsumed = indirectHeap.getUsed() - offsetPerThreadData; EXPECT_EQ(expectedPerThreadDataSizeTotal, sizeConsumed); } HWTEST_F(PerThreadDataXYZTests, sendPerThreadData_2x4x8) { MockGraphicsAllocation gfxAllocation(indirectHeapMemory, indirectHeapMemorySize); LinearStream indirectHeap(&gfxAllocation); const size_t localWorkSizes[3]{2, 4, 8}; auto offsetPerThreadData = PerThreadDataHelper::sendPerThreadData( indirectHeap, simd, grfSize, numChannels, localWorkSizes, workgroupWalkOrder, false); size_t sizeConsumed = indirectHeap.getUsed() - offsetPerThreadData; EXPECT_EQ(64u * (3u * 2u * 4u * 8u) / 32u, sizeConsumed); } HWTEST_F(PerThreadDataXYZTests, getThreadPayloadSize) { simd = 32; uint32_t size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize); EXPECT_EQ(grfSize * 2u * 3u, size); simd = 16; size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize); EXPECT_EQ(grfSize * 3u, size); simd = 16; threadPayload.HeaderPresent = 1; size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize); EXPECT_EQ(grfSize * 4u, size); simd = 16; threadPayload.UnusedPerThreadConstantPresent = 1; size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize); EXPECT_EQ(grfSize * 5u, size); } typedef PerThreadDataTests PerThreadDataNoIdsTests; HWTEST_F(PerThreadDataNoIdsTests, givenZeroChannelsWhenPassedTogetLocalIdSizePerThreadThenSizeOfOneGrfIsReturned) { EXPECT_EQ(32u, PerThreadDataHelper::getLocalIdSizePerThread(simd, grfSize, numChannels)); } HWTEST_F(PerThreadDataNoIdsTests, givenZeroChannelsAndHighWkgSizeWhengetPerThreadDataSizeTotalIsCalledThenReturnedSizeContainsUnusedGrfPerEachThread) { size_t localWorkSize = 256u; auto threadCount = localWorkSize / simd; auto expectedSize = threadCount * grfSize; EXPECT_EQ(expectedSize, PerThreadDataHelper::getPerThreadDataSizeTotal(simd, grfSize, numChannels, localWorkSize)); } HWTEST_F(PerThreadDataNoIdsTests, sendPerThreadDataDoesntSendAnyData) { uint8_t fillValue = 0xcc; memset(indirectHeapMemory, fillValue, indirectHeapMemorySize); MockGraphicsAllocation gfxAllocation(indirectHeapMemory, indirectHeapMemorySize); LinearStream indirectHeap(&gfxAllocation); size_t localWorkSizes[3] = {256, 1, 1}; auto offsetPerThreadData = PerThreadDataHelper::sendPerThreadData( indirectHeap, simd, grfSize, numChannels, localWorkSizes, workgroupWalkOrder, false); size_t sizeConsumed = indirectHeap.getUsed() - offsetPerThreadData; EXPECT_EQ(0u, sizeConsumed); size_t i = 0; while (i < indirectHeapMemorySize) { ASSERT_EQ(fillValue, indirectHeapMemory[i]) << "for index " << i; ++i; } } HWTEST_F(PerThreadDataNoIdsTests, getThreadPayloadSize) { simd = 32; uint32_t size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize); EXPECT_EQ(grfSize, size); simd = 16; size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize); EXPECT_EQ(grfSize, size); simd = 16; threadPayload.HeaderPresent = 1; size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize); EXPECT_EQ(grfSize * 2u, size); } typedef PerThreadDataTests PerThreadDataFlattenedIdsTests; HWTEST_F(PerThreadDataFlattenedIdsTests, getThreadPayloadSize) { simd = 32; uint32_t size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize); EXPECT_EQ(grfSize * 2u, size); simd = 16; size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize); EXPECT_EQ(grfSize, size); simd = 16; threadPayload.HeaderPresent = 1; size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize); EXPECT_EQ(grfSize * 2u, size); simd = 32; threadPayload.HeaderPresent = 1; size = PerThreadDataHelper::getThreadPayloadSize(threadPayload, simd, grfSize); EXPECT_EQ(grfSize * 3u, size); } TEST(PerThreadDataTest, generateLocalIDs) { uint32_t simd = 8; uint32_t grfSize = 32; uint32_t numChannels = 3; uint32_t localWorkSize = 24; size_t localWorkSizes[3] = {24, 1, 1}; auto sizePerThreadDataTotal = PerThreadDataHelper::getPerThreadDataSizeTotal(simd, numChannels, localWorkSize, grfSize); auto sizeOverSizedBuffer = sizePerThreadDataTotal * 4; auto buffer = static_cast(alignedMalloc(sizeOverSizedBuffer, 16)); memset(buffer, 0, sizeOverSizedBuffer); // Setup reference filled with zeros auto reference = static_cast(alignedMalloc(sizePerThreadDataTotal, 16)); memset(reference, 0, sizePerThreadDataTotal); LinearStream stream(buffer, sizeOverSizedBuffer / 2); PerThreadDataHelper::sendPerThreadData( stream, simd, grfSize, numChannels, localWorkSizes, {{0, 1, 2}}, false); // Check if buffer overrun happend, only first sizePerThreadDataTotal bytes can be overwriten, following should be same as reference. for (auto i = sizePerThreadDataTotal; i < sizeOverSizedBuffer; i += sizePerThreadDataTotal) { int result = memcmp(buffer + i, reference, sizePerThreadDataTotal); EXPECT_EQ(0, result); } alignedFree(buffer); alignedFree(reference); } TEST(PerThreadDataTest, givenSimdEqualOneWhenSetingLocalIdsInPerThreadDataThenIdsAreSetInCorrectOrder) { uint32_t simd = 1; uint32_t grfSize = 32; uint32_t numChannels = 3; uint32_t localWorkSize = 24; size_t localWorkSizes[3] = {3, 4, 2}; auto sizePerThreadDataTotal = PerThreadDataHelper::getPerThreadDataSizeTotal(simd, grfSize, numChannels, localWorkSize); auto sizeOverSizedBuffer = sizePerThreadDataTotal * 4; auto buffer = static_cast(alignedMalloc(sizeOverSizedBuffer, 16)); memset(buffer, 0, sizeOverSizedBuffer); // Setup reference filled with zeros auto reference = static_cast(alignedMalloc(sizePerThreadDataTotal, 16)); memset(reference, 0, sizePerThreadDataTotal); LinearStream stream(buffer, sizeOverSizedBuffer / 2); PerThreadDataHelper::sendPerThreadData( stream, simd, grfSize, numChannels, localWorkSizes, {{0, 1, 2}}, false); auto bufferPtr = buffer; for (uint16_t i = 0; i < localWorkSizes[2]; i++) { for (uint16_t j = 0; j < localWorkSizes[1]; j++) { for (uint16_t k = 0; k < localWorkSizes[0]; k++) { uint16_t ids[] = {k, j, i}; int result = memcmp(bufferPtr, ids, sizeof(uint16_t) * 3); EXPECT_EQ(0, result); bufferPtr += grfSize; } } } // Check if buffer overrun happend, only first sizePerThreadDataTotal bytes can be overwriten, following should be same as reference. for (auto i = sizePerThreadDataTotal; i < sizeOverSizedBuffer; i += sizePerThreadDataTotal) { int result = memcmp(buffer + i, reference, sizePerThreadDataTotal); EXPECT_EQ(0, result); } alignedFree(buffer); alignedFree(reference); } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/ptr_math_tests.cpp000066400000000000000000000043111363734646600270370ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "gtest/gtest.h" TEST(PtrMath, ptrOffset) { auto addrBefore = (uintptr_t)ptrGarbage; auto ptrBefore = addrBefore; size_t offset = 0x1234; auto ptrAfter = ptrOffset(ptrBefore, offset); auto addrAfter = ptrAfter; EXPECT_EQ(offset, addrAfter - addrBefore); } TEST(PtrMath, ptrDiff) { size_t offset = 0x1234; auto addrBefore = (uintptr_t)ptrGarbage; auto addrAfter = addrBefore + offset; EXPECT_EQ(offset, ptrDiff(addrAfter, addrBefore)); } TEST(PtrMath, addrToPtr) { uint32_t addr32Bit = 0x3456; uint64_t addr64Bit = 0xf000000000003456; void *ptr32BitAddr = (void *)((uintptr_t)addr32Bit); void *ptr64BitAddr = (void *)((uintptr_t)addr64Bit); EXPECT_EQ(ptr32BitAddr, addrToPtr(addr32Bit)); EXPECT_EQ(ptr64BitAddr, addrToPtr(addr64Bit)); } TEST(PtrMath, givenCastToUint64FunctionWhenItIsCalledThenProperValueIsReturned) { uintptr_t address = 0xf0000000; void *addressWithTrailingBitSet = reinterpret_cast(address); uint64_t expectedUint64Address = 0xf0000000; auto uintAddress = castToUint64(addressWithTrailingBitSet); EXPECT_EQ(uintAddress, expectedUint64Address); } TEST(PtrMath, givenCastToUint64FunctionWhenConstPointerIsPassedItIsCalledThenProperValueIsReturned) { uintptr_t address = 0xf0000000; const void *addressWithTrailingBitSet = reinterpret_cast(address); uint64_t expectedUint64Address = 0xf0000000; auto uintAddress = castToUint64(addressWithTrailingBitSet); EXPECT_EQ(uintAddress, expectedUint64Address); } TEST(ptrOffset, preserve64Bit) { uint64_t ptrBefore = 0x800000000; size_t offset = 0x1234; auto ptrAfter = ptrOffset(ptrBefore, offset); EXPECT_EQ(0x800001234ull, ptrAfter); } TEST(ptrDiff, preserve64Bit) { auto ptrAfter = 0x800001234ull; auto ptrBefore = ptrDiff(ptrAfter, (size_t)0x1234); EXPECT_EQ(0x800000000ull, ptrBefore); auto ptrBefore2 = ptrDiff(ptrAfter, 0x1234); EXPECT_EQ(0x800000000ull, ptrBefore2); auto ptrBefore3 = ptrDiff(ptrAfter, 0x1234ull); EXPECT_EQ(0x800000000ull, ptrBefore3); } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/queue_helpers_tests.cpp000066400000000000000000000015351363734646600300740ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/queue_helpers.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "gtest/gtest.h" using namespace NEO; TEST(ReleaseQueueTest, givenCommandQueueWithoutVirtualEventWhenReleaseQueueIsCalledThenCmdQInternalRefCountIsNotDecremented) { cl_int retVal = CL_SUCCESS; MockCommandQueue *cmdQ = new MockCommandQueue; EXPECT_EQ(1, cmdQ->getRefInternalCount()); EXPECT_EQ(1, cmdQ->getRefInternalCount()); cmdQ->incRefInternal(); EXPECT_EQ(2, cmdQ->getRefInternalCount()); releaseQueue(cmdQ, retVal); EXPECT_EQ(1, cmdQ->getRefInternalCount()); cmdQ->decRefInternal(); } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/raii_hw_helper.h000066400000000000000000000013071363734646600264270ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/hw_helper.h" namespace NEO { extern HwHelper *hwHelperFactory[IGFX_MAX_CORE]; template class RAIIHwHelperFactory { public: GFXCORE_FAMILY gfxCoreFamily; HwHelper *hwHelper; MockHelper mockHwHelper; RAIIHwHelperFactory(GFXCORE_FAMILY gfxCoreFamily) { this->gfxCoreFamily = gfxCoreFamily; hwHelper = hwHelperFactory[this->gfxCoreFamily]; hwHelperFactory[this->gfxCoreFamily] = &mockHwHelper; } ~RAIIHwHelperFactory() { hwHelperFactory[this->gfxCoreFamily] = hwHelper; } }; } // namespace NEOcompute-runtime-20.13.16352/opencl/test/unit_test/helpers/sampler_helpers_tests.cpp000066400000000000000000000015001363734646600304030ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/sampler_helpers.h" #include "gtest/gtest.h" TEST(SamplerHelpers, ForAddressingModeCalledForEnumReturnClkEnum) { EXPECT_EQ(CLK_ADDRESS_REPEAT, GetAddrModeEnum(CL_ADDRESS_REPEAT)); EXPECT_EQ(CLK_ADDRESS_CLAMP_TO_EDGE, GetAddrModeEnum(CL_ADDRESS_CLAMP_TO_EDGE)); EXPECT_EQ(CLK_ADDRESS_CLAMP, GetAddrModeEnum(CL_ADDRESS_CLAMP)); EXPECT_EQ(CLK_ADDRESS_NONE, GetAddrModeEnum(CL_ADDRESS_NONE)); EXPECT_EQ(CLK_ADDRESS_MIRRORED_REPEAT, GetAddrModeEnum(CL_ADDRESS_MIRRORED_REPEAT)); } TEST(SamplerHelpers, ForNormalizationTypeCalledForEnumReturnClkEnum) { EXPECT_EQ(CLK_NORMALIZED_COORDS_TRUE, GetNormCoordsEnum(true)); EXPECT_EQ(CLK_NORMALIZED_COORDS_FALSE, GetNormCoordsEnum(false)); } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/static_size3.h000066400000000000000000000004171363734646600260530ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once template struct StatickSize3 { operator const size_t *() { static const size_t v[] = {X, Y, Z}; return v; } }; compute-runtime-20.13.16352/opencl/test/unit_test/helpers/task_information_tests.cpp000066400000000000000000000433071363734646600306000ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/internal_allocation_storage.h" #include "opencl/source/event/user_event.h" #include "opencl/source/helpers/task_information.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/dispatch_flags_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" #include using namespace NEO; TEST(CommandTest, mapUnmapSubmitWithoutTerminateFlagFlushesCsr) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); std::unique_ptr cmdQ(new MockCommandQueue(nullptr, device.get(), nullptr)); MockCommandStreamReceiver csr(*device->getExecutionEnvironment(), device->getRootDeviceIndex()); MockBuffer buffer; auto initialTaskCount = csr.peekTaskCount(); MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; std::unique_ptr command(new CommandMapUnmap(MapOperationType::MAP, buffer, size, offset, false, *cmdQ)); CompletionStamp completionStamp = command->submit(20, false); auto expectedTaskCount = initialTaskCount + 1; EXPECT_EQ(expectedTaskCount, completionStamp.taskCount); } TEST(CommandTest, mapUnmapSubmitWithTerminateFlagAbortsFlush) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); std::unique_ptr cmdQ(new MockCommandQueue(nullptr, device.get(), nullptr)); MockCommandStreamReceiver csr(*device->getExecutionEnvironment(), device->getRootDeviceIndex()); MockBuffer buffer; auto initialTaskCount = csr.peekTaskCount(); MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; std::unique_ptr command(new CommandMapUnmap(MapOperationType::MAP, buffer, size, offset, false, *cmdQ)); CompletionStamp completionStamp = command->submit(20, true); auto submitTaskCount = csr.peekTaskCount(); EXPECT_EQ(initialTaskCount, submitTaskCount); auto expectedTaskCount = 0u; EXPECT_EQ(expectedTaskCount, completionStamp.taskCount); } TEST(CommandTest, markerSubmitWithoutTerminateFlagDosntFlushCsr) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); std::unique_ptr cmdQ(new MockCommandQueue(nullptr, device.get(), nullptr)); MockCommandStreamReceiver csr(*device->getExecutionEnvironment(), device->getRootDeviceIndex()); MockBuffer buffer; auto initialTaskCount = csr.peekTaskCount(); std::unique_ptr command(new CommandWithoutKernel(*cmdQ)); CompletionStamp completionStamp = command->submit(20, false); EXPECT_EQ(initialTaskCount, completionStamp.taskCount); EXPECT_EQ(initialTaskCount, csr.peekTaskCount()); } TEST(CommandTest, markerSubmitWithTerminateFlagAbortsFlush) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); std::unique_ptr cmdQ(new MockCommandQueue(nullptr, device.get(), nullptr)); MockCommandStreamReceiver csr(*device->getExecutionEnvironment(), device->getRootDeviceIndex()); MockBuffer buffer; auto initialTaskCount = csr.peekTaskCount(); std::unique_ptr command(new CommandWithoutKernel(*cmdQ)); CompletionStamp completionStamp = command->submit(20, true); auto submitTaskCount = csr.peekTaskCount(); EXPECT_EQ(initialTaskCount, submitTaskCount); auto expectedTaskCount = 0u; EXPECT_EQ(expectedTaskCount, completionStamp.taskCount); } TEST(CommandTest, givenWaitlistRequestWhenCommandComputeKernelIsCreatedThenMakeLocalCopyOfWaitlist) { class MockCommandComputeKernel : public CommandComputeKernel { public: using CommandComputeKernel::eventsWaitlist; MockCommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr &kernelOperation, std::vector &surfaces, Kernel *kernel) : CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0) {} }; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockCommandQueue cmdQ(nullptr, device.get(), nullptr); MockKernelWithInternals kernel(*device); IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr; cmdQ.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 1, ih1); cmdQ.allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 1, ih2); cmdQ.allocateHeapMemory(IndirectHeap::SURFACE_STATE, 1, ih3); auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 1, GraphicsAllocation::AllocationType::COMMAND_BUFFER})); std::vector surfaces; auto kernelOperation = std::make_unique(cmdStream, *device->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage()); kernelOperation->setHeaps(ih1, ih2, ih3); UserEvent event1, event2, event3; cl_event waitlist[] = {&event1, &event2}; EventsRequest eventsRequest(2, waitlist, nullptr); MockCommandComputeKernel command(cmdQ, kernelOperation, surfaces, kernel); event1.incRefInternal(); event2.incRefInternal(); command.setEventsRequest(eventsRequest); waitlist[1] = &event3; EXPECT_EQ(static_cast(&event1), command.eventsWaitlist[0]); EXPECT_EQ(static_cast(&event2), command.eventsWaitlist[1]); } TEST(KernelOperationDestruction, givenKernelOperationWhenItIsDestructedThenAllAllocationsAreStoredInInternalStorageForReuse) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockCommandQueue cmdQ(nullptr, device.get(), nullptr); InternalAllocationStorage &allocationStorage = *device->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage(); auto &allocationsForReuse = allocationStorage.getAllocationsForReuse(); IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr; cmdQ.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 1, ih1); cmdQ.allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 1, ih2); cmdQ.allocateHeapMemory(IndirectHeap::SURFACE_STATE, 1, ih3); auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 1, GraphicsAllocation::AllocationType::COMMAND_BUFFER})); auto &heapAllocation1 = *ih1->getGraphicsAllocation(); auto &heapAllocation2 = *ih2->getGraphicsAllocation(); auto &heapAllocation3 = *ih3->getGraphicsAllocation(); auto &cmdStreamAllocation = *cmdStream->getGraphicsAllocation(); auto kernelOperation = std::make_unique(cmdStream, allocationStorage); kernelOperation->setHeaps(ih1, ih2, ih3); EXPECT_TRUE(allocationsForReuse.peekIsEmpty()); kernelOperation.reset(); EXPECT_TRUE(allocationsForReuse.peekContains(cmdStreamAllocation)); EXPECT_TRUE(allocationsForReuse.peekContains(heapAllocation1)); EXPECT_TRUE(allocationsForReuse.peekContains(heapAllocation2)); EXPECT_TRUE(allocationsForReuse.peekContains(heapAllocation3)); } template class MockCsr1 : public CommandStreamReceiverHw { public: CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap &dsh, const IndirectHeap &ioh, const IndirectHeap &ssh, uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override { passedDispatchFlags = dispatchFlags; return CompletionStamp(); } MockCsr1(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) : CommandStreamReceiverHw::CommandStreamReceiverHw(executionEnvironment, rootDeviceIndex) {} DispatchFlags passedDispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); using CommandStreamReceiver::timestampPacketWriteEnabled; }; HWTEST_F(DispatchFlagsTests, givenCommandMapUnmapWhenSubmitThenPassCorrectDispatchFlags) { using CsrType = MockCsr1; SetUpImpl(); auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto mockCsr = static_cast(&mockCmdQ->getGpgpuCommandStreamReceiver()); MockBuffer buffer; MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; std::unique_ptr command(new CommandMapUnmap(MapOperationType::MAP, buffer, size, offset, false, *mockCmdQ)); command->submit(20, false); PreemptionFlags flags = {}; PreemptionMode devicePreemption = mockCmdQ->getDevice().getPreemptionMode(); EXPECT_EQ(mockCmdQ->flushStamp->getStampReference(), mockCsr->passedDispatchFlags.flushStampReference); EXPECT_EQ(mockCmdQ->getThrottle(), mockCsr->passedDispatchFlags.throttle); EXPECT_EQ(PreemptionHelper::taskPreemptionMode(devicePreemption, flags), mockCsr->passedDispatchFlags.preemptionMode); EXPECT_EQ(GrfConfig::DefaultGrfNumber, mockCsr->passedDispatchFlags.numGrfRequired); EXPECT_EQ(L3CachingSettings::l3CacheOn, mockCsr->passedDispatchFlags.l3CacheSettings); EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking); EXPECT_TRUE(mockCsr->passedDispatchFlags.dcFlush); EXPECT_FALSE(mockCsr->passedDispatchFlags.useSLM); EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl); EXPECT_FALSE(mockCsr->passedDispatchFlags.gsba32BitRequired); EXPECT_FALSE(mockCsr->passedDispatchFlags.requiresCoherency); EXPECT_EQ(mockCmdQ->getPriority() == QueuePriority::LOW, mockCsr->passedDispatchFlags.lowPriority); EXPECT_FALSE(mockCsr->passedDispatchFlags.implicitFlush); EXPECT_EQ(mockCmdQ->getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), mockCsr->passedDispatchFlags.outOfOrderExecutionAllowed); EXPECT_FALSE(mockCsr->passedDispatchFlags.epilogueRequired); } HWTEST_F(DispatchFlagsTests, givenCommandComputeKernelWhenSubmitThenPassCorrectDispatchFlags) { using CsrType = MockCsr1; SetUpImpl(); auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto mockCsr = static_cast(&mockCmdQ->getGpgpuCommandStreamReceiver()); IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr; mockCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 1, ih1); mockCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 1, ih2); mockCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, 1, ih3); PreemptionMode preemptionMode = device->getPreemptionMode(); auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 1, GraphicsAllocation::AllocationType::COMMAND_BUFFER})); std::vector surfaces; auto kernelOperation = std::make_unique(cmdStream, *mockCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); MockKernelWithInternals kernel(*device); kernelOperation->setHeaps(ih1, ih2, ih3); bool flushDC = false; bool slmUsed = false; bool ndRangeKernel = false; bool requiresCoherency = false; for (auto &surface : surfaces) { requiresCoherency |= surface->IsCoherent; } std::unique_ptr command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1)); command->submit(20, false); EXPECT_FALSE(mockCsr->passedDispatchFlags.pipelineSelectArgs.specialPipelineSelectMode); EXPECT_EQ(kernel.mockKernel->isVmeKernel(), mockCsr->passedDispatchFlags.pipelineSelectArgs.mediaSamplerRequired); EXPECT_EQ(mockCmdQ->flushStamp->getStampReference(), mockCsr->passedDispatchFlags.flushStampReference); EXPECT_EQ(mockCmdQ->getThrottle(), mockCsr->passedDispatchFlags.throttle); EXPECT_EQ(preemptionMode, mockCsr->passedDispatchFlags.preemptionMode); EXPECT_EQ(kernel.mockKernel->getKernelInfo().patchInfo.executionEnvironment->NumGRFRequired, mockCsr->passedDispatchFlags.numGrfRequired); EXPECT_EQ(L3CachingSettings::l3CacheOn, mockCsr->passedDispatchFlags.l3CacheSettings); EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking); EXPECT_EQ(flushDC, mockCsr->passedDispatchFlags.dcFlush); EXPECT_EQ(slmUsed, mockCsr->passedDispatchFlags.useSLM); EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl); EXPECT_EQ(ndRangeKernel, mockCsr->passedDispatchFlags.gsba32BitRequired); EXPECT_EQ(requiresCoherency, mockCsr->passedDispatchFlags.requiresCoherency); EXPECT_EQ(mockCmdQ->getPriority() == QueuePriority::LOW, mockCsr->passedDispatchFlags.lowPriority); EXPECT_FALSE(mockCsr->passedDispatchFlags.implicitFlush); EXPECT_EQ(mockCmdQ->getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), mockCsr->passedDispatchFlags.outOfOrderExecutionAllowed); EXPECT_FALSE(mockCsr->passedDispatchFlags.epilogueRequired); } HWTEST_F(DispatchFlagsTests, givenCommandWithoutKernelWhenSubmitThenPassCorrectDispatchFlags) { using CsrType = MockCsr1; SetUpImpl(); auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto mockCsr = static_cast(&mockCmdQ->getGpgpuCommandStreamReceiver()); mockCsr->timestampPacketWriteEnabled = true; mockCmdQ->timestampPacketContainer = std::make_unique(); IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr; TimestampPacketDependencies timestampPacketDependencies; mockCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 1, ih1); mockCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 1, ih2); mockCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, 1, ih3); auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 1, GraphicsAllocation::AllocationType::COMMAND_BUFFER})); auto kernelOperation = std::make_unique(cmdStream, *mockCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); kernelOperation->setHeaps(ih1, ih2, ih3); std::unique_ptr command(new CommandWithoutKernel(*mockCmdQ, kernelOperation)); command->setTimestampPacketNode(*mockCmdQ->timestampPacketContainer, std::move(timestampPacketDependencies)); command->submit(20, false); EXPECT_EQ(mockCmdQ->flushStamp->getStampReference(), mockCsr->passedDispatchFlags.flushStampReference); EXPECT_EQ(mockCmdQ->getThrottle(), mockCsr->passedDispatchFlags.throttle); EXPECT_EQ(mockCmdQ->getDevice().getPreemptionMode(), mockCsr->passedDispatchFlags.preemptionMode); EXPECT_EQ(GrfConfig::DefaultGrfNumber, mockCsr->passedDispatchFlags.numGrfRequired); EXPECT_EQ(L3CachingSettings::l3CacheOn, mockCsr->passedDispatchFlags.l3CacheSettings); EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking); EXPECT_FALSE(mockCsr->passedDispatchFlags.dcFlush); EXPECT_FALSE(mockCsr->passedDispatchFlags.useSLM); EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl); EXPECT_FALSE(mockCsr->passedDispatchFlags.gsba32BitRequired); EXPECT_FALSE(mockCsr->passedDispatchFlags.requiresCoherency); EXPECT_EQ(mockCmdQ->getPriority() == QueuePriority::LOW, mockCsr->passedDispatchFlags.lowPriority); EXPECT_FALSE(mockCsr->passedDispatchFlags.implicitFlush); EXPECT_EQ(mockCmdQ->getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), mockCsr->passedDispatchFlags.outOfOrderExecutionAllowed); EXPECT_FALSE(mockCsr->passedDispatchFlags.epilogueRequired); } HWTEST_F(DispatchFlagsTests, givenCommandComputeKernelWhenSubmitThenPassCorrectDispatchHints) { using CsrType = MockCsr1; SetUpImpl(); auto mockCmdQ = std::make_unique>(context.get(), device.get(), nullptr); auto mockCsr = static_cast(&mockCmdQ->getGpgpuCommandStreamReceiver()); IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr; mockCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 1, ih1); mockCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 1, ih2); mockCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, 1, ih3); mockCmdQ->dispatchHints = 1234; PreemptionMode preemptionMode = device->getPreemptionMode(); auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 1, GraphicsAllocation::AllocationType::COMMAND_BUFFER})); std::vector surfaces; auto kernelOperation = std::make_unique(cmdStream, *mockCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); MockKernelWithInternals kernel(*device); kernelOperation->setHeaps(ih1, ih2, ih3); bool flushDC = false; bool slmUsed = false; bool ndRangeKernel = false; bool requiresCoherency = false; for (auto &surface : surfaces) { requiresCoherency |= surface->IsCoherent; } std::unique_ptr command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1)); command->submit(20, false); EXPECT_TRUE(mockCsr->passedDispatchFlags.epilogueRequired); EXPECT_EQ(1234u, mockCsr->passedDispatchFlags.engineHints); EXPECT_EQ(kernel.mockKernel->getThreadArbitrationPolicy(), mockCsr->passedDispatchFlags.threadArbitrationPolicy); } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/test_debug_variables.inl000066400000000000000000000003671363734646600301630ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ DECLARE_DEBUG_VARIABLE(std::string, StringTestKey, "DefaultTestValue", "TestDescription") DECLARE_DEBUG_VARIABLE(int32_t, IntTestKey, 1, "TestDescription") compute-runtime-20.13.16352/opencl/test/unit_test/helpers/test_files.cpp000066400000000000000000000016511363734646600261440ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test_files.h" #include "shared/source/helpers/file_io.h" #include "config.h" std::string testFiles("test_files/" NEO_ARCH "/"); std::string clFiles("test_files/"); std::string binaryNameSuffix(""); void retrieveBinaryKernelFilename(std::string &outputFilename, const std::string &kernelName, const std::string &extension, const std::string &options) { if (outputFilename.length() > 0) { outputFilename.clear(); } outputFilename.reserve(2 * testFiles.length()); outputFilename.append(testFiles); outputFilename.append(kernelName); outputFilename.append(binaryNameSuffix); outputFilename.append(extension); outputFilename.append(options); if (!fileExists(outputFilename) && (extension == ".bc")) { retrieveBinaryKernelFilename(outputFilename, kernelName, ".spv", options); } } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/test_files.h000066400000000000000000000005711363734646600256110ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include extern std::string testFiles; extern std::string clFiles; extern std::string binaryNameSuffix; void retrieveBinaryKernelFilename(std::string &outputFilename, const std::string &kernelName, const std::string &extension, const std::string &options = ""); compute-runtime-20.13.16352/opencl/test/unit_test/helpers/timestamp_packet_tests.cpp000066400000000000000000002610471363734646600305660ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/command_queue/hardware_interface.h" #include "opencl/source/event/user_event.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/helpers/dispatch_flags_helper.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_timestamp_container.h" #include "test.h" #include "gmock/gmock.h" using namespace NEO; struct TimestampPacketSimpleTests : public ::testing::Test { void setTagToReadyState(TagNode *tagNode) { for (auto &packet : tagNode->tagForCpuAccess->packets) { packet.contextStart = 0u; packet.globalStart = 0u; packet.contextEnd = 0u; packet.globalEnd = 0u; } tagNode->tagForCpuAccess->implicitDependenciesCount.store(0); } const size_t gws[3] = {1, 1, 1}; }; struct TimestampPacketTests : public TimestampPacketSimpleTests { void SetUp() override { executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(2); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } device = std::make_unique(Device::create(executionEnvironment, 0u)); context = new MockContext(device.get()); kernel = std::make_unique(*device, context); mockCmdQ = new MockCommandQueue(context, device.get(), nullptr); } void TearDown() override { mockCmdQ->release(); context->release(); } template void verifySemaphore(MI_SEMAPHORE_WAIT *semaphoreCmd, TagNode *timestampPacketNode, uint32_t packetId) { EXPECT_NE(nullptr, semaphoreCmd); EXPECT_EQ(semaphoreCmd->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword()); uint64_t compareOffset = packetId * sizeof(TimestampPacketStorage::Packet); auto dataAddress = timestampPacketNode->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd) + compareOffset; EXPECT_EQ(dataAddress, semaphoreCmd->getSemaphoreGraphicsAddress()); }; template void verifyMiAtomic(typename GfxFamily::MI_ATOMIC *miAtomicCmd, TagNode *timestampPacketNode) { using MI_ATOMIC = typename GfxFamily::MI_ATOMIC; EXPECT_NE(nullptr, miAtomicCmd); auto writeAddress = timestampPacketNode->getGpuAddress() + offsetof(TimestampPacketStorage, implicitDependenciesCount); EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_DECREMENT, miAtomicCmd->getAtomicOpcode()); EXPECT_EQ(writeAddress, UnitTestHelper::getMemoryAddress(*miAtomicCmd)); }; void verifyDependencyCounterValues(TimestampPacketContainer *timestampPacketContainer, uint32_t expectedValue) { auto &nodes = timestampPacketContainer->peekNodes(); EXPECT_NE(0u, nodes.size()); for (auto &node : nodes) { EXPECT_EQ(expectedValue, node->tagForCpuAccess->implicitDependenciesCount.load()); } } ExecutionEnvironment *executionEnvironment; std::unique_ptr device; MockContext *context; std::unique_ptr kernel; MockCommandQueue *mockCmdQ; }; HWTEST_F(TimestampPacketTests, givenTagNodeWhenSemaphoreAndAtomicAreProgrammedThenUseGpuAddress) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_ATOMIC = typename FamilyType::MI_ATOMIC; struct MockTagNode : public TagNode { using TagNode::gpuAddress; }; TimestampPacketStorage tag; MockTagNode mockNode; mockNode.tagForCpuAccess = &tag; mockNode.gpuAddress = 0x1230000; auto &cmdStream = mockCmdQ->getCS(0); TimestampPacketHelper::programSemaphoreWithImplicitDependency(cmdStream, mockNode); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); auto it = hwParser.cmdList.begin(); verifySemaphore(genCmdCast(*it++), &mockNode, 0); verifyMiAtomic(genCmdCast(*it++), &mockNode); } HWTEST_F(TimestampPacketTests, givenTagNodeWithPacketsUsed2WhenSemaphoreAndAtomicAreProgrammedThenUseGpuAddress) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_ATOMIC = typename FamilyType::MI_ATOMIC; struct MockTagNode : public TagNode { using TagNode::gpuAddress; }; TimestampPacketStorage tag; tag.packetsUsed = 2; MockTagNode mockNode; mockNode.tagForCpuAccess = &tag; mockNode.gpuAddress = 0x1230000; auto &cmdStream = mockCmdQ->getCS(0); TimestampPacketHelper::programSemaphoreWithImplicitDependency(cmdStream, mockNode); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); auto it = hwParser.cmdList.begin(); for (uint32_t packetId = 0; packetId < tag.packetsUsed; packetId++) { verifySemaphore(genCmdCast(*it++), &mockNode, packetId); } verifyMiAtomic(genCmdCast(*it++), &mockNode); } TEST_F(TimestampPacketSimpleTests, whenEndTagIsNotOneThenMarkAsCompleted) { TimestampPacketStorage timestampPacketStorage; auto &packet = timestampPacketStorage.packets[0]; packet.contextEnd = 1; packet.globalEnd = 1; EXPECT_FALSE(timestampPacketStorage.isCompleted()); packet.contextEnd = 1; packet.globalEnd = 0; EXPECT_FALSE(timestampPacketStorage.isCompleted()); packet.contextEnd = 0; packet.globalEnd = 1; EXPECT_FALSE(timestampPacketStorage.isCompleted()); packet.contextEnd = 0; packet.globalEnd = 0; EXPECT_TRUE(timestampPacketStorage.isCompleted()); } TEST_F(TimestampPacketSimpleTests, givenTimestampPacketContainerWhenMovedTheMoveAllNodes) { EXPECT_TRUE(std::is_move_constructible::value); EXPECT_TRUE(std::is_move_assignable::value); EXPECT_FALSE(std::is_copy_assignable::value); EXPECT_FALSE(std::is_copy_constructible::value); struct MockTagNode : public TagNode { void returnTag() override { returnCalls++; } using TagNode::refCount; uint32_t returnCalls = 0; }; MockTagNode node0; MockTagNode node1; { TimestampPacketContainer timestampPacketContainer0; TimestampPacketContainer timestampPacketContainer1; timestampPacketContainer0.add(&node0); timestampPacketContainer0.add(&node1); timestampPacketContainer1 = std::move(timestampPacketContainer0); EXPECT_EQ(0u, node0.returnCalls); EXPECT_EQ(0u, node1.returnCalls); EXPECT_EQ(2u, timestampPacketContainer1.peekNodes().size()); EXPECT_EQ(&node0, timestampPacketContainer1.peekNodes()[0]); EXPECT_EQ(&node1, timestampPacketContainer1.peekNodes()[1]); } EXPECT_EQ(1u, node0.returnCalls); EXPECT_EQ(1u, node1.returnCalls); } TEST_F(TimestampPacketSimpleTests, whenIsCompletedIsCalledThenItReturnsProperTimestampPacketStatus) { TimestampPacketStorage timestampPacketStorage; auto &packet = timestampPacketStorage.packets[0]; EXPECT_FALSE(timestampPacketStorage.isCompleted()); packet.contextEnd = 0; EXPECT_FALSE(timestampPacketStorage.isCompleted()); packet.globalEnd = 0; EXPECT_TRUE(timestampPacketStorage.isCompleted()); } TEST_F(TimestampPacketSimpleTests, givenMultiplePacketsInUseWhenCompletionIsCheckedTheVerifyAllUsedNodes) { TimestampPacketStorage timestampPacketStorage; auto &packets = timestampPacketStorage.packets; timestampPacketStorage.packetsUsed = TimestampPacketSizeControl::preferredPacketCount - 1; for (uint32_t i = 0; i < timestampPacketStorage.packetsUsed - 1; i++) { packets[i].contextEnd = 0; packets[i].globalEnd = 0; EXPECT_FALSE(timestampPacketStorage.isCompleted()); } packets[timestampPacketStorage.packetsUsed - 1].contextEnd = 0; EXPECT_FALSE(timestampPacketStorage.isCompleted()); packets[timestampPacketStorage.packetsUsed - 1].globalEnd = 0; EXPECT_TRUE(timestampPacketStorage.isCompleted()); } TEST_F(TimestampPacketSimpleTests, givenImplicitDependencyWhenEndTagIsWrittenThenCantBeReleased) { TimestampPacketStorage timestampPacketStorage; timestampPacketStorage.packets[0].contextEnd = 0; timestampPacketStorage.packets[0].globalEnd = 0; timestampPacketStorage.implicitDependenciesCount.store(1); EXPECT_FALSE(timestampPacketStorage.isCompleted()); timestampPacketStorage.implicitDependenciesCount.store(0); EXPECT_TRUE(timestampPacketStorage.isCompleted()); } TEST_F(TimestampPacketSimpleTests, whenNewTagIsTakenThenReinitialize) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(executionEnvironment); MockTagAllocator allocator(0, &memoryManager, 1); auto firstNode = allocator.getTag(); auto i = 0u; for (auto &packet : firstNode->tagForCpuAccess->packets) { packet.contextStart = i++; packet.globalStart = i++; packet.contextEnd = i++; packet.globalEnd = i++; } auto &dependenciesCount = firstNode->tagForCpuAccess->implicitDependenciesCount; setTagToReadyState(firstNode); allocator.returnTag(firstNode); dependenciesCount++; auto secondNode = allocator.getTag(); EXPECT_EQ(secondNode, firstNode); EXPECT_EQ(0u, dependenciesCount.load()); for (const auto &packet : firstNode->tagForCpuAccess->packets) { EXPECT_EQ(1u, packet.contextStart); EXPECT_EQ(1u, packet.globalStart); EXPECT_EQ(1u, packet.contextEnd); EXPECT_EQ(1u, packet.globalEnd); } EXPECT_EQ(1u, firstNode->tagForCpuAccess->packetsUsed); } TEST_F(TimestampPacketSimpleTests, whenObjectIsCreatedThenInitializeAllStamps) { TimestampPacketStorage timestampPacketStorage; EXPECT_EQ(TimestampPacketSizeControl::preferredPacketCount * sizeof(timestampPacketStorage.packets[0]), sizeof(timestampPacketStorage.packets)); for (const auto &packet : timestampPacketStorage.packets) { EXPECT_EQ(1u, packet.contextStart); EXPECT_EQ(1u, packet.globalStart); EXPECT_EQ(1u, packet.contextEnd); EXPECT_EQ(1u, packet.globalEnd); } EXPECT_EQ(1u, timestampPacketStorage.packetsUsed); } HWTEST_F(TimestampPacketTests, givenCommandStreamReceiverHwWhenObtainingPreferredTagPoolSizeThenReturnCorrectValue) { CommandStreamReceiverHw csr(*executionEnvironment, 0); EXPECT_EQ(512u, csr.getPreferredTagPoolSize()); } HWTEST_F(TimestampPacketTests, givenDebugFlagSetWhenCreatingTimestampPacketAllocatorThenDisableReusingAndLimitPoolSize) { DebugManagerStateRestore restore; DebugManager.flags.DisableTimestampPacketOptimizations.set(true); CommandStreamReceiverHw csr(*executionEnvironment, 0); EXPECT_EQ(1u, csr.getPreferredTagPoolSize()); auto tag = csr.getTimestampPacketAllocator()->getTag(); for (auto &packet : tag->tagForCpuAccess->packets) { packet.contextStart = 0; packet.globalStart = 0; packet.contextEnd = 0; packet.globalEnd = 0; } EXPECT_TRUE(tag->tagForCpuAccess->isCompleted()); EXPECT_FALSE(tag->canBeReleased()); } HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEstimatingStreamSizeThenAddPipeControl) { MockKernelWithInternals kernel2(*device); MockMultiDispatchInfo multiDispatchInfo(std::vector({kernel->mockKernel, kernel2.mockKernel})); auto mockCmdQHw = std::make_unique>(context, device.get(), nullptr); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; getCommandStream(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0); auto sizeWithDisabled = mockCmdQHw->requestedCmdStreamSize; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; getCommandStream(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0); auto sizeWithEnabled = mockCmdQHw->requestedCmdStreamSize; auto extendedSize = sizeWithDisabled + sizeof(typename FamilyType::PIPE_CONTROL); EXPECT_EQ(sizeWithEnabled, extendedSize); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledAndOoqWhenEstimatingStreamSizeDontDontAddAdditionalSize) { MockMultiDispatchInfo multiDispatchInfo(std::vector({kernel->mockKernel})); auto mockCmdQHw = std::make_unique>(context, device.get(), nullptr); mockCmdQHw->setOoqEnabled(); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; getCommandStream(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0); auto sizeWithDisabled = mockCmdQHw->requestedCmdStreamSize; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockTimestampPacketContainer timestamp1(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp2(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 2); MockTimestampPacketContainer timestamp3(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 3); MockTimestampPacketContainer timestamp4(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 4); MockTimestampPacketContainer timestamp5(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 5); Event event1(mockCmdQHw.get(), 0, 0, 0); event1.addTimestampPacketNodes(timestamp1); Event event2(mockCmdQHw.get(), 0, 0, 0); event2.addTimestampPacketNodes(timestamp2); Event event3(mockCmdQHw.get(), 0, 0, 0); event3.addTimestampPacketNodes(timestamp3); Event event4(mockCmdQHw.get(), 0, 0, 0); event4.addTimestampPacketNodes(timestamp4); Event event5(mockCmdQHw.get(), 0, 0, 0); event5.addTimestampPacketNodes(timestamp5); const cl_uint numEventsOnWaitlist = 5; cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5}; EventsRequest eventsRequest(numEventsOnWaitlist, waitlist, nullptr); CsrDependencies csrDeps; eventsRequest.fillCsrDependencies( csrDeps, device->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr); getCommandStream(*mockCmdQHw, csrDeps, false, false, false, multiDispatchInfo, nullptr, 0); auto sizeWithEnabled = mockCmdQHw->requestedCmdStreamSize; size_t sizeForNodeDependency = 0; for (auto timestampPacketContainer : csrDeps) { for (auto &node : timestampPacketContainer->peekNodes()) { sizeForNodeDependency += TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency(*node); } } size_t extendedSize = sizeWithDisabled + EnqueueOperation::getSizeRequiredForTimestampPacketWrite() + sizeForNodeDependency; EXPECT_EQ(sizeWithEnabled, extendedSize); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEstimatingStreamSizeWithWaitlistThenAddSizeForSemaphores) { MockKernelWithInternals kernel2(*device); MockMultiDispatchInfo multiDispatchInfo(std::vector({kernel->mockKernel, kernel2.mockKernel})); auto mockCmdQHw = std::make_unique>(context, device.get(), nullptr); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; getCommandStream(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0); auto sizeWithDisabled = mockCmdQHw->requestedCmdStreamSize; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockTimestampPacketContainer timestamp1(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp2(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 2); MockTimestampPacketContainer timestamp3(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 3); MockTimestampPacketContainer timestamp4(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 4); MockTimestampPacketContainer timestamp5(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 5); Event event1(mockCmdQHw.get(), 0, 0, 0); event1.addTimestampPacketNodes(timestamp1); Event event2(mockCmdQHw.get(), 0, 0, 0); event2.addTimestampPacketNodes(timestamp2); Event event3(mockCmdQHw.get(), 0, 0, 0); event3.addTimestampPacketNodes(timestamp3); Event event4(mockCmdQHw.get(), 0, 0, 0); event4.addTimestampPacketNodes(timestamp4); Event event5(mockCmdQHw.get(), 0, 0, 0); event5.addTimestampPacketNodes(timestamp5); const cl_uint numEventsOnWaitlist = 5; cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5}; EventsRequest eventsRequest(numEventsOnWaitlist, waitlist, nullptr); CsrDependencies csrDeps; eventsRequest.fillCsrDependencies(csrDeps, device->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr); getCommandStream(*mockCmdQHw, csrDeps, false, false, false, multiDispatchInfo, nullptr, 0); auto sizeWithEnabled = mockCmdQHw->requestedCmdStreamSize; size_t sizeForNodeDependency = 0; for (auto timestampPacketContainer : csrDeps) { for (auto &node : timestampPacketContainer->peekNodes()) { sizeForNodeDependency += TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency(*node); } } size_t extendedSize = sizeWithDisabled + EnqueueOperation::getSizeRequiredForTimestampPacketWrite() + sizeForNodeDependency; EXPECT_EQ(sizeWithEnabled, extendedSize); } HWTEST_F(TimestampPacketTests, givenEventsRequestWithEventsWithoutTimestampsWhenComputeCsrDepsThanDoNotAddthemToCsrDeps) { device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; Event eventWithoutTimestampContainer1(mockCmdQ, 0, 0, 0); Event eventWithoutTimestampContainer2(mockCmdQ, 0, 0, 0); Event eventWithoutTimestampContainer3(mockCmdQ, 0, 0, 0); Event eventWithoutTimestampContainer4(mockCmdQ, 0, 0, 0); Event eventWithoutTimestampContainer5(mockCmdQ, 0, 0, 0); const cl_uint numEventsOnWaitlist = 5; cl_event waitlist[] = {&eventWithoutTimestampContainer1, &eventWithoutTimestampContainer2, &eventWithoutTimestampContainer3, &eventWithoutTimestampContainer4, &eventWithoutTimestampContainer5}; EventsRequest eventsRequest(numEventsOnWaitlist, waitlist, nullptr); CsrDependencies csrDepsEmpty; eventsRequest.fillCsrDependencies(csrDepsEmpty, device->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr); EXPECT_EQ(0u, csrDepsEmpty.size()); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockTimestampPacketContainer timestamp1(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp2(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 2); MockTimestampPacketContainer timestamp3(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 3); MockTimestampPacketContainer timestamp4(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 4); MockTimestampPacketContainer timestamp5(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 5); Event event1(mockCmdQ, 0, 0, 0); event1.addTimestampPacketNodes(timestamp1); Event eventWithEmptyTimestampContainer2(mockCmdQ, 0, 0, 0); // event2 does not have timestamp Event event3(mockCmdQ, 0, 0, 0); event3.addTimestampPacketNodes(timestamp3); Event eventWithEmptyTimestampContainer4(mockCmdQ, 0, 0, 0); // event4 does not have timestamp Event event5(mockCmdQ, 0, 0, 0); event5.addTimestampPacketNodes(timestamp5); cl_event waitlist2[] = {&event1, &eventWithEmptyTimestampContainer2, &event3, &eventWithEmptyTimestampContainer4, &event5}; EventsRequest eventsRequest2(numEventsOnWaitlist, waitlist2, nullptr); CsrDependencies csrDepsSize3; eventsRequest2.fillCsrDependencies(csrDepsSize3, device->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr); EXPECT_EQ(3u, csrDepsSize3.size()); size_t sizeForNodeDependency = 0; for (auto timestampPacketContainer : csrDepsSize3) { for (auto &node : timestampPacketContainer->peekNodes()) { sizeForNodeDependency += TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency(*node); } } size_t expectedSize = sizeForNodeDependency; EXPECT_EQ(expectedSize, TimestampPacketHelper::getRequiredCmdStreamSize(csrDepsSize3)); } HWTEST_F(TimestampPacketTests, whenEstimatingSizeForNodeDependencyThenReturnCorrectValue) { struct MockTagNode : public TagNode { using TagNode::gpuAddress; }; TimestampPacketStorage tag; MockTagNode mockNode; mockNode.tagForCpuAccess = &tag; mockNode.gpuAddress = 0x1230000; size_t sizeForNodeDependency = 0; sizeForNodeDependency += TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency(mockNode); size_t expectedSize = mockNode.tagForCpuAccess->packetsUsed * sizeof(typename FamilyType::MI_SEMAPHORE_WAIT) + sizeof(typename FamilyType::MI_ATOMIC); EXPECT_EQ(expectedSize, sizeForNodeDependency); } HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWhenDispatchingGpuWalkerThenAddTwoPcForLastWalker) { using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; MockTimestampPacketContainer timestampPacket(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 2); MockKernelWithInternals kernel2(*device); MockMultiDispatchInfo multiDispatchInfo(std::vector({kernel->mockKernel, kernel2.mockKernel})); auto &cmdStream = mockCmdQ->getCS(0); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; HardwareInterface::dispatchWalker( *mockCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, ×tampPacket, CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); uint32_t walkersFound = 0; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { if (genCmdCast(*it)) { if (HardwareCommandsHelper::isPipeControlWArequired(device->getHardwareInfo())) { auto pipeControl = genCmdCast(*++it); EXPECT_NE(nullptr, pipeControl); } auto pipeControl = genCmdCast(*++it); EXPECT_NE(nullptr, pipeControl); auto expectedAddress = timestampPacket.getNode(walkersFound)->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); EXPECT_EQ(1u, pipeControl->getCommandStreamerStallEnable()); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation()); EXPECT_EQ(0u, pipeControl->getImmediateData()); EXPECT_EQ(static_cast(expectedAddress), pipeControl->getAddress()); EXPECT_EQ(static_cast(expectedAddress >> 32), pipeControl->getAddressHigh()); walkersFound++; } } EXPECT_EQ(2u, walkersFound); } HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketDisabledWhenDispatchingGpuWalkerThenDontAddPipeControls) { MockTimestampPacketContainer timestampPacket(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockMultiDispatchInfo multiDispatchInfo(kernel->mockKernel); auto &cmdStream = mockCmdQ->getCS(0); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; HardwareInterface::dispatchWalker( *mockCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, ×tampPacket, CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); auto cmdItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_EQ(hwParser.cmdList.end(), cmdItor); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThenObtainNewStampAndPassToEvent) { auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; auto mockTagAllocator = new MockTagAllocator<>(device->getRootDeviceIndex(), executionEnvironment->memoryManager.get()); csr.timestampPacketAllocator.reset(mockTagAllocator); auto cmdQ = std::make_unique>(context, device.get(), nullptr); cl_event event1, event2; // obtain first node for cmdQ and event1 cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event1); auto node1 = cmdQ->timestampPacketContainer->peekNodes().at(0); EXPECT_NE(nullptr, node1); EXPECT_EQ(node1, cmdQ->timestampPacketContainer->peekNodes().at(0)); // obtain new node for cmdQ and event2 cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event2); auto node2 = cmdQ->timestampPacketContainer->peekNodes().at(0); EXPECT_NE(nullptr, node2); EXPECT_EQ(node2, cmdQ->timestampPacketContainer->peekNodes().at(0)); EXPECT_EQ(0u, mockTagAllocator->returnedToFreePoolNodes.size()); // nothing returned. event1 owns previous node EXPECT_EQ(1u, mockTagAllocator->releaseReferenceNodes.size()); // cmdQ released first node EXPECT_EQ(node1, mockTagAllocator->releaseReferenceNodes.at(0)); EXPECT_NE(node1, node2); setTagToReadyState(node1); setTagToReadyState(node2); clReleaseEvent(event2); EXPECT_EQ(0u, mockTagAllocator->returnedToFreePoolNodes.size()); // nothing returned. cmdQ owns node2 EXPECT_EQ(2u, mockTagAllocator->releaseReferenceNodes.size()); // event2 released node2 EXPECT_EQ(node2, mockTagAllocator->releaseReferenceNodes.at(1)); clReleaseEvent(event1); EXPECT_EQ(1u, mockTagAllocator->returnedToFreePoolNodes.size()); // removed last reference on node1 EXPECT_EQ(node1, mockTagAllocator->returnedToFreePoolNodes.at(0)); EXPECT_EQ(3u, mockTagAllocator->releaseReferenceNodes.size()); // event1 released node1 EXPECT_EQ(node1, mockTagAllocator->releaseReferenceNodes.at(2)); cmdQ.reset(nullptr); EXPECT_EQ(2u, mockTagAllocator->returnedToFreePoolNodes.size()); // removed last reference on node2 EXPECT_EQ(node2, mockTagAllocator->returnedToFreePoolNodes.at(1)); EXPECT_EQ(4u, mockTagAllocator->releaseReferenceNodes.size()); // cmdQ released node2 EXPECT_EQ(node2, mockTagAllocator->releaseReferenceNodes.at(3)); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThenWriteWalkerStamp) { using GPGPU_WALKER = typename FamilyType::WALKER_TYPE; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; auto cmdQ = std::make_unique>(context, device.get(), nullptr); cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(1u, cmdQ->timestampPacketContainer->peekNodes().size()); HardwareParse hwParser; hwParser.parseCommands(cmdQ->getCS(0), 0); bool walkerFound = false; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { if (genCmdCast(*it)) { if (HardwareCommandsHelper::isPipeControlWArequired(device->getHardwareInfo())) { auto pipeControl = genCmdCast(*++it); EXPECT_NE(nullptr, pipeControl); } walkerFound = true; it = find(++it, hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), it); auto pipeControl = genCmdCast(*it); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation()); } } EXPECT_TRUE(walkerFound); } HWTEST_F(TimestampPacketTests, givenEventsRequestWhenEstimatingStreamSizeForCsrThenAddSizeForSemaphores) { auto device2 = std::make_unique(Device::create(executionEnvironment, 1u)); MockContext context2(device2.get()); auto cmdQ2 = std::make_unique>(&context2, device2.get(), nullptr); MockTimestampPacketContainer timestamp1(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp2(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 2); MockTimestampPacketContainer timestamp3(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 3); MockTimestampPacketContainer timestamp4(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 4); MockTimestampPacketContainer timestamp5(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 5); auto &csr = device->getUltCommandStreamReceiver(); auto &csr2 = device2->getUltCommandStreamReceiver(); csr2.timestampPacketWriteEnabled = true; Event event1(cmdQ2.get(), 0, 0, 0); event1.addTimestampPacketNodes(timestamp1); Event event2(cmdQ2.get(), 0, 0, 0); event2.addTimestampPacketNodes(timestamp2); Event event3(cmdQ2.get(), 0, 0, 0); event3.addTimestampPacketNodes(timestamp3); Event event4(cmdQ2.get(), 0, 0, 0); event4.addTimestampPacketNodes(timestamp4); Event event5(cmdQ2.get(), 0, 0, 0); event5.addTimestampPacketNodes(timestamp5); const cl_uint numEventsOnWaitlist = 5; cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5}; EventsRequest eventsRequest(numEventsOnWaitlist, waitlist, nullptr); DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); auto sizeWithoutEvents = csr.getRequiredCmdStreamSize(flags, device->getDevice()); eventsRequest.fillCsrDependencies(flags.csrDependencies, csr, NEO::CsrDependencies::DependenciesType::OutOfCsr); auto sizeWithEvents = csr.getRequiredCmdStreamSize(flags, device->getDevice()); size_t sizeForNodeDependency = 0; for (auto timestampPacketContainer : flags.csrDependencies) { for (auto &node : timestampPacketContainer->peekNodes()) { sizeForNodeDependency += TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency(*node); } } size_t extendedSize = sizeWithoutEvents + sizeForNodeDependency; EXPECT_EQ(sizeWithEvents, extendedSize); } HWTEST_F(TimestampPacketTests, givenEventsRequestWhenEstimatingStreamSizeForDifferentCsrFromSameDeviceThenAddSizeForSemaphores) { // Create second (LOW_PRIORITY) queue on the same device cl_queue_properties props[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR, 0}; auto cmdQ2 = std::make_unique>(context, device.get(), props); MockTimestampPacketContainer timestamp1(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp2(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 2); MockTimestampPacketContainer timestamp3(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 3); MockTimestampPacketContainer timestamp4(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 4); MockTimestampPacketContainer timestamp5(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 5); auto &csr = device->getUltCommandStreamReceiver(); auto &csr2 = cmdQ2->getUltCommandStreamReceiver(); csr2.timestampPacketWriteEnabled = true; Event event1(cmdQ2.get(), 0, 0, 0); event1.addTimestampPacketNodes(timestamp1); Event event2(cmdQ2.get(), 0, 0, 0); event2.addTimestampPacketNodes(timestamp2); Event event3(cmdQ2.get(), 0, 0, 0); event3.addTimestampPacketNodes(timestamp3); Event event4(cmdQ2.get(), 0, 0, 0); event4.addTimestampPacketNodes(timestamp4); Event event5(cmdQ2.get(), 0, 0, 0); event5.addTimestampPacketNodes(timestamp5); const cl_uint numEventsOnWaitlist = 5; cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5}; EventsRequest eventsRequest(numEventsOnWaitlist, waitlist, nullptr); DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); auto sizeWithoutEvents = csr.getRequiredCmdStreamSize(flags, device->getDevice()); eventsRequest.fillCsrDependencies(flags.csrDependencies, csr, NEO::CsrDependencies::DependenciesType::OutOfCsr); auto sizeWithEvents = csr.getRequiredCmdStreamSize(flags, device->getDevice()); size_t sizeForNodeDependency = 0; for (auto timestampPacketContainer : flags.csrDependencies) { for (auto &node : timestampPacketContainer->peekNodes()) { sizeForNodeDependency += TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency(*node); } } size_t extendedSize = sizeWithoutEvents + sizeForNodeDependency; EXPECT_EQ(sizeWithEvents, extendedSize); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThenProgramSemaphoresOnCsrStream) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_ATOMIC = typename FamilyType::MI_ATOMIC; auto device2 = std::make_unique(Device::create(executionEnvironment, 1u)); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; device2->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockContext context2(device2.get()); auto cmdQ1 = std::make_unique>(context, device.get(), nullptr); auto cmdQ2 = std::make_unique>(&context2, device2.get(), nullptr); const cl_uint eventsOnWaitlist = 6; MockTimestampPacketContainer timestamp3(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp4(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp5(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp6(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 2); UserEvent event1; event1.setStatus(CL_COMPLETE); UserEvent event2; event2.setStatus(CL_COMPLETE); Event event3(cmdQ1.get(), 0, 0, 0); event3.addTimestampPacketNodes(timestamp3); Event event4(cmdQ2.get(), 0, 0, 0); event4.addTimestampPacketNodes(timestamp4); Event event5(cmdQ1.get(), 0, 0, 0); event5.addTimestampPacketNodes(timestamp5); Event event6(cmdQ2.get(), 0, 0, 0); event6.addTimestampPacketNodes(timestamp6); cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5, &event6}; cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, eventsOnWaitlist, waitlist, nullptr); auto &cmdStream = device->getUltCommandStreamReceiver().commandStream; HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); auto it = hwParser.cmdList.begin(); verifySemaphore(genCmdCast(*it++), timestamp4.getNode(0), 0); verifyMiAtomic(genCmdCast(*it++), timestamp4.getNode(0)); verifyDependencyCounterValues(event4.getTimestampPacketNodes(), 1); verifySemaphore(genCmdCast(*it++), timestamp6.getNode(0), 0); verifyMiAtomic(genCmdCast(*it++), timestamp6.getNode(0)); verifySemaphore(genCmdCast(*it++), timestamp6.getNode(1), 0); verifyMiAtomic(genCmdCast(*it++), timestamp6.getNode(1)); verifyDependencyCounterValues(event6.getTimestampPacketNodes(), 1); while (it != hwParser.cmdList.end()) { EXPECT_EQ(nullptr, genCmdCast(*it)); it++; } } HWTEST_F(TimestampPacketTests, givenAllDependencyTypesModeWhenFillingFromDifferentCsrsThenPushEverything) { auto device2 = std::make_unique(Device::create(executionEnvironment, 1u)); auto &csr1 = device->getUltCommandStreamReceiver(); auto &csr2 = device2->getUltCommandStreamReceiver(); csr1.timestampPacketWriteEnabled = true; csr2.timestampPacketWriteEnabled = true; MockContext context2(device2.get()); auto cmdQ1 = std::make_unique>(context, device.get(), nullptr); auto cmdQ2 = std::make_unique>(&context2, device2.get(), nullptr); const cl_uint eventsOnWaitlist = 2; MockTimestampPacketContainer timestamp1(*csr1.getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp2(*csr2.getTimestampPacketAllocator(), 1); Event event1(cmdQ1.get(), 0, 0, 0); event1.addTimestampPacketNodes(timestamp1); Event event2(cmdQ2.get(), 0, 0, 0); event2.addTimestampPacketNodes(timestamp2); cl_event waitlist[] = {&event1, &event2}; EventsRequest eventsRequest(eventsOnWaitlist, waitlist, nullptr); CsrDependencies csrDependencies; eventsRequest.fillCsrDependencies(csrDependencies, csr1, CsrDependencies::DependenciesType::All); EXPECT_EQ(static_cast(eventsOnWaitlist), csrDependencies.size()); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFromOneDeviceWhenEnqueueingThenProgramSemaphoresOnCsrStream) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_ATOMIC = typename FamilyType::MI_ATOMIC; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; auto cmdQ1 = std::make_unique>(context, device.get(), nullptr); // Create second (LOW_PRIORITY) queue on the same device cl_queue_properties props[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR, 0}; auto cmdQ2 = std::make_unique>(context, device.get(), props); cmdQ2->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; const cl_uint eventsOnWaitlist = 6; MockTimestampPacketContainer timestamp3(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp4(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp5(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp6(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 2); UserEvent event1; event1.setStatus(CL_COMPLETE); UserEvent event2; event2.setStatus(CL_COMPLETE); Event event3(cmdQ1.get(), 0, 0, 0); event3.addTimestampPacketNodes(timestamp3); Event event4(cmdQ2.get(), 0, 0, 0); event4.addTimestampPacketNodes(timestamp4); Event event5(cmdQ1.get(), 0, 0, 0); event5.addTimestampPacketNodes(timestamp5); Event event6(cmdQ2.get(), 0, 0, 0); event6.addTimestampPacketNodes(timestamp6); cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5, &event6}; cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, eventsOnWaitlist, waitlist, nullptr); auto &cmdStream = device->getUltCommandStreamReceiver().commandStream; HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); auto it = hwParser.cmdList.begin(); verifySemaphore(genCmdCast(*it++), timestamp4.getNode(0), 0); verifyMiAtomic(genCmdCast(*it++), timestamp4.getNode(0)); verifyDependencyCounterValues(event4.getTimestampPacketNodes(), 1); verifySemaphore(genCmdCast(*it++), timestamp6.getNode(0), 0); verifyMiAtomic(genCmdCast(*it++), timestamp6.getNode(0)); verifySemaphore(genCmdCast(*it++), timestamp6.getNode(1), 0); verifyMiAtomic(genCmdCast(*it++), timestamp6.getNode(1)); verifyDependencyCounterValues(event6.getTimestampPacketNodes(), 1); while (it != hwParser.cmdList.end()) { EXPECT_EQ(nullptr, genCmdCast(*it)); it++; } } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingBlockedThenProgramSemaphoresOnCsrStreamOnFlush) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto device2 = std::make_unique(Device::create(executionEnvironment, 1u)); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; device2->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; auto context2 = new MockContext(device2.get()); auto cmdQ1 = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); auto cmdQ2 = new MockCommandQueueHw(context2, device2.get(), nullptr); MockTimestampPacketContainer timestamp0(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp1(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); UserEvent userEvent; Event event0(cmdQ1.get(), 0, 0, 0); event0.addTimestampPacketNodes(timestamp0); Event event1(cmdQ2, 0, 0, 0); event1.addTimestampPacketNodes(timestamp1); cl_event waitlist[] = {&userEvent, &event0, &event1}; cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 3, waitlist, nullptr); auto &cmdStream = device->getUltCommandStreamReceiver().commandStream; EXPECT_EQ(0u, cmdStream.getUsed()); userEvent.setStatus(CL_COMPLETE); cmdQ1->isQueueBlocked(); cmdQ2->isQueueBlocked(); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); auto it = hwParser.cmdList.begin(); verifySemaphore(genCmdCast(*it++), timestamp1.getNode(0), 0); verifyMiAtomic(genCmdCast(*it++), timestamp1.getNode(0)); verifyDependencyCounterValues(event1.getTimestampPacketNodes(), 1); while (it != hwParser.cmdList.end()) { EXPECT_EQ(nullptr, genCmdCast(*it)); it++; } cmdQ2->release(); context2->release(); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFromOneDeviceWhenEnqueueingBlockedThenProgramSemaphoresOnCsrStreamOnFlush) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto device2 = std::unique_ptr(Device::create(executionEnvironment, 1u)); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; auto cmdQ1 = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); // Create second (LOW_PRIORITY) queue on the same device cl_queue_properties props[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR, 0}; auto cmdQ2 = clUniquePtr(new MockCommandQueueHw(context, device.get(), props)); cmdQ2->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockTimestampPacketContainer timestamp0(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp1(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); UserEvent userEvent; Event event0(cmdQ1.get(), 0, 0, 0); event0.addTimestampPacketNodes(timestamp0); Event event1(cmdQ2.get(), 0, 0, 0); event1.addTimestampPacketNodes(timestamp1); cl_event waitlist[] = {&userEvent, &event0, &event1}; cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 3, waitlist, nullptr); auto &cmdStream = device->getUltCommandStreamReceiver().commandStream; EXPECT_EQ(0u, cmdStream.getUsed()); userEvent.setStatus(CL_COMPLETE); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); auto it = hwParser.cmdList.begin(); verifySemaphore(genCmdCast(*it++), timestamp1.getNode(0), 0); verifyMiAtomic(genCmdCast(*it++), timestamp1.getNode(0)); verifyDependencyCounterValues(event1.getTimestampPacketNodes(), 1); while (it != hwParser.cmdList.end()) { EXPECT_EQ(nullptr, genCmdCast(*it)); it++; } cmdQ2->isQueueBlocked(); cmdQ1->isQueueBlocked(); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenDispatchingThenProgramSemaphoresForWaitlist) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using WALKER = WALKER_TYPE; auto device2 = std::make_unique(Device::create(executionEnvironment, 1u)); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; device2->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockContext context2(device2.get()); MockMultiDispatchInfo multiDispatchInfo(std::vector({kernel->mockKernel})); MockCommandQueue mockCmdQ2(&context2, device2.get(), nullptr); auto &cmdStream = mockCmdQ->getCS(0); const cl_uint eventsOnWaitlist = 6; MockTimestampPacketContainer timestamp3(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp4(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp5(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 2); MockTimestampPacketContainer timestamp6(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp7(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); UserEvent event1; UserEvent event2; Event event3(mockCmdQ, 0, 0, 0); event3.addTimestampPacketNodes(timestamp3); Event event4(&mockCmdQ2, 0, 0, 0); event4.addTimestampPacketNodes(timestamp4); Event event5(mockCmdQ, 0, 0, 0); event5.addTimestampPacketNodes(timestamp5); Event event6(&mockCmdQ2, 0, 0, 0); event6.addTimestampPacketNodes(timestamp6); cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5, &event6}; EventsRequest eventsRequest(eventsOnWaitlist, waitlist, nullptr); CsrDependencies csrDeps; eventsRequest.fillCsrDependencies(csrDeps, mockCmdQ->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr); HardwareInterface::dispatchWalker( *mockCmdQ, multiDispatchInfo, csrDeps, nullptr, nullptr, nullptr, nullptr, ×tamp7, CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); uint32_t semaphoresFound = 0; uint32_t walkersFound = 0; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { auto semaphoreCmd = genCmdCast(*it); if (semaphoreCmd) { semaphoresFound++; if (semaphoresFound == 1) { verifySemaphore(semaphoreCmd, timestamp3.getNode(0), 0); verifyMiAtomic(genCmdCast(*++it), timestamp3.getNode(0)); verifyDependencyCounterValues(event3.getTimestampPacketNodes(), 1); } else if (semaphoresFound == 2) { verifySemaphore(semaphoreCmd, timestamp5.getNode(0), 0); verifyMiAtomic(genCmdCast(*++it), timestamp5.getNode(0)); verifyDependencyCounterValues(event5.getTimestampPacketNodes(), 1); } else if (semaphoresFound == 3) { verifySemaphore(semaphoreCmd, timestamp5.getNode(1), 0); verifyMiAtomic(genCmdCast(*++it), timestamp5.getNode(1)); verifyDependencyCounterValues(event5.getTimestampPacketNodes(), 1); } } if (genCmdCast(*it)) { walkersFound++; EXPECT_EQ(3u, semaphoresFound); // semaphores from events programmed before walker } } EXPECT_EQ(1u, walkersFound); EXPECT_EQ(3u, semaphoresFound); // total number of semaphores found in cmdList } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFromOneDeviceWhenDispatchingThenProgramSemaphoresForWaitlist) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using WALKER = WALKER_TYPE; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockMultiDispatchInfo multiDispatchInfo(std::vector({kernel->mockKernel})); // Create second (LOW_PRIORITY) queue on the same device cl_queue_properties props[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR, 0}; auto mockCmdQ2 = std::make_unique>(context, device.get(), props); mockCmdQ2->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; auto &cmdStream = mockCmdQ->getCS(0); const cl_uint eventsOnWaitlist = 6; MockTimestampPacketContainer timestamp3(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp4(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp5(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 2); MockTimestampPacketContainer timestamp6(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp7(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); UserEvent event1; UserEvent event2; Event event3(mockCmdQ, 0, 0, 0); event3.addTimestampPacketNodes(timestamp3); Event event4(mockCmdQ2.get(), 0, 0, 0); event4.addTimestampPacketNodes(timestamp4); Event event5(mockCmdQ, 0, 0, 0); event5.addTimestampPacketNodes(timestamp5); Event event6(mockCmdQ2.get(), 0, 0, 0); event6.addTimestampPacketNodes(timestamp6); cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5, &event6}; EventsRequest eventsRequest(eventsOnWaitlist, waitlist, nullptr); CsrDependencies csrDeps; eventsRequest.fillCsrDependencies(csrDeps, mockCmdQ->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr); HardwareInterface::dispatchWalker( *mockCmdQ, multiDispatchInfo, csrDeps, nullptr, nullptr, nullptr, nullptr, ×tamp7, CL_COMMAND_NDRANGE_KERNEL); HardwareParse hwParser; hwParser.parseCommands(cmdStream, 0); uint32_t semaphoresFound = 0; uint32_t walkersFound = 0; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { auto semaphoreCmd = genCmdCast(*it); if (semaphoreCmd) { semaphoresFound++; if (semaphoresFound == 1) { verifySemaphore(semaphoreCmd, timestamp3.getNode(0), 0); verifyMiAtomic(genCmdCast(*++it), timestamp3.getNode(0)); verifyDependencyCounterValues(event3.getTimestampPacketNodes(), 1); } else if (semaphoresFound == 2) { verifySemaphore(semaphoreCmd, timestamp5.getNode(0), 0); verifyMiAtomic(genCmdCast(*++it), timestamp5.getNode(0)); verifyDependencyCounterValues(event5.getTimestampPacketNodes(), 1); } else if (semaphoresFound == 3) { verifySemaphore(semaphoreCmd, timestamp5.getNode(1), 0); verifyMiAtomic(genCmdCast(*++it), timestamp5.getNode(1)); verifyDependencyCounterValues(event5.getTimestampPacketNodes(), 1); } } if (genCmdCast(*it)) { walkersFound++; EXPECT_EQ(3u, semaphoresFound); // semaphores from events programmed before walker } } EXPECT_EQ(1u, walkersFound); EXPECT_EQ(3u, semaphoresFound); // total number of semaphores found in cmdList } HWTEST_F(TimestampPacketTests, givenTimestampPacketWhenItIsQueriedForCompletionStatusThenItReturnsCurrentStatus) { MockTimestampPacketContainer timestamp(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); EXPECT_FALSE(timestamp.isCompleted()); timestamp.getNode(0u)->tagForCpuAccess->packets[0].contextEnd = 0; EXPECT_FALSE(timestamp.isCompleted()); timestamp.getNode(0u)->tagForCpuAccess->packets[0].globalEnd = 0; EXPECT_TRUE(timestamp.isCompleted()); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWithMultipleNodesWhenItIsQueriedForCompletionStatusThenItReturnsCurrentStatus) { MockTimestampPacketContainer timestamp(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 2); timestamp.getNode(0u)->tagForCpuAccess->packets[0].contextEnd = 0; timestamp.getNode(0u)->tagForCpuAccess->packets[0].globalEnd = 0; EXPECT_FALSE(timestamp.isCompleted()); timestamp.getNode(1u)->tagForCpuAccess->packets[0].contextEnd = 0; timestamp.getNode(1u)->tagForCpuAccess->packets[0].globalEnd = 0; EXPECT_TRUE(timestamp.isCompleted()); } HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingNonBlockedThenMakeItResident) { auto mockTagAllocator = new MockTagAllocator<>(device->getRootDeviceIndex(), executionEnvironment->memoryManager.get(), 1); auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketAllocator.reset(mockTagAllocator); csr.timestampPacketWriteEnabled = true; auto cmdQ = std::make_unique>(context, device.get(), nullptr); TimestampPacketContainer previousNodes; cmdQ->obtainNewTimestampPacketNodes(1, previousNodes, false); auto firstNode = cmdQ->timestampPacketContainer->peekNodes().at(0); csr.storeMakeResidentAllocations = true; csr.timestampPacketWriteEnabled = true; cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto secondNode = cmdQ->timestampPacketContainer->peekNodes().at(0); EXPECT_NE(firstNode->getBaseGraphicsAllocation(), secondNode->getBaseGraphicsAllocation()); EXPECT_TRUE(csr.isMadeResident(firstNode->getBaseGraphicsAllocation(), csr.taskCount)); } HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingBlockedThenMakeItResident) { auto mockTagAllocator = new MockTagAllocator<>(device->getRootDeviceIndex(), executionEnvironment->memoryManager.get(), 1); auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketAllocator.reset(mockTagAllocator); csr.timestampPacketWriteEnabled = true; auto cmdQ = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); TimestampPacketContainer previousNodes; cmdQ->obtainNewTimestampPacketNodes(1, previousNodes, false); auto firstNode = cmdQ->timestampPacketContainer->peekNodes().at(0); csr.storeMakeResidentAllocations = true; csr.timestampPacketWriteEnabled = true; UserEvent userEvent; cl_event clEvent = &userEvent; cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 1, &clEvent, nullptr); auto secondNode = cmdQ->timestampPacketContainer->peekNodes().at(0); EXPECT_NE(firstNode->getBaseGraphicsAllocation(), secondNode->getBaseGraphicsAllocation()); EXPECT_FALSE(csr.isMadeResident(firstNode->getBaseGraphicsAllocation(), csr.taskCount)); userEvent.setStatus(CL_COMPLETE); EXPECT_TRUE(csr.isMadeResident(firstNode->getBaseGraphicsAllocation(), csr.taskCount)); cmdQ->isQueueBlocked(); } HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingThenDontKeepDependencyOnPreviousNodeIfItsReady) { device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockCommandQueueHw cmdQ(context, device.get(), nullptr); TimestampPacketContainer previousNodes; cmdQ.obtainNewTimestampPacketNodes(1, previousNodes, false); auto firstNode = cmdQ.timestampPacketContainer->peekNodes().at(0); setTagToReadyState(firstNode); cmdQ.enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(*cmdQ.commandStream, 0); uint32_t semaphoresFound = 0; uint32_t atomicsFound = 0; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { if (genCmdCast(*it)) { semaphoresFound++; } if (genCmdCast(*it)) { atomicsFound++; } } uint32_t expectedSemaphoresCount = (UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(device->getHardwareInfo()) ? 2 : 0); EXPECT_EQ(expectedSemaphoresCount, semaphoresFound); EXPECT_EQ(0u, atomicsFound); } HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingThenKeepDependencyOnPreviousNodeIfItsNotReady) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_ATOMIC = typename FamilyType::MI_ATOMIC; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockTimestampPacketContainer firstNode(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 0); MockCommandQueueHw cmdQ(context, device.get(), nullptr); TimestampPacketContainer previousNodes; cmdQ.obtainNewTimestampPacketNodes(2, previousNodes, false); firstNode.add(cmdQ.timestampPacketContainer->peekNodes().at(0)); firstNode.add(cmdQ.timestampPacketContainer->peekNodes().at(1)); auto firstTag0 = firstNode.getNode(0); auto firstTag1 = firstNode.getNode(1); verifyDependencyCounterValues(&firstNode, 0); cmdQ.enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); verifyDependencyCounterValues(&firstNode, 1); HardwareParse hwParser; hwParser.parseCommands(*cmdQ.commandStream, 0); auto it = hwParser.cmdList.begin(); verifySemaphore(genCmdCast(*it), firstTag0, 0); verifyMiAtomic(genCmdCast(*++it), firstTag0); verifySemaphore(genCmdCast(*++it), firstTag1, 0); verifyMiAtomic(genCmdCast(*++it), firstTag1); while (it != hwParser.cmdList.end()) { auto semaphoreWait = genCmdCast(*it); if (semaphoreWait) { EXPECT_TRUE(UnitTestHelper::isAdditionalMiSemaphoreWait(*semaphoreWait)); } it++; } } HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingToOoqThenDontKeepDependencyOnPreviousNodeIfItsNotReady) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; MockCommandQueueHw cmdQ(context, device.get(), properties); TimestampPacketContainer previousNodes; cmdQ.obtainNewTimestampPacketNodes(1, previousNodes, false); cmdQ.enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(*cmdQ.commandStream, 0); uint32_t semaphoresFound = 0; uint32_t atomicsFound = 0; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { if (genCmdCast(*it)) { semaphoresFound++; } if (genCmdCast(*it)) { atomicsFound++; } } uint32_t expectedSemaphoresCount = (UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(device->getHardwareInfo()) ? 2 : 0); EXPECT_EQ(expectedSemaphoresCount, semaphoresFound); EXPECT_EQ(0u, atomicsFound); } HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingWithOmitTimestampPacketDependenciesThenDontKeepDependencyOnPreviousNodeIfItsNotReady) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; DebugManagerStateRestore restore; DebugManager.flags.OmitTimestampPacketDependencies.set(true); MockCommandQueueHw cmdQ(context, device.get(), nullptr); TimestampPacketContainer previousNodes; cmdQ.obtainNewTimestampPacketNodes(1, previousNodes, false); cmdQ.enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(*cmdQ.commandStream, 0); uint32_t semaphoresFound = 0; uint32_t atomicsFound = 0; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { if (genCmdCast(*it)) { semaphoresFound++; } if (genCmdCast(*it)) { atomicsFound++; } } uint32_t expectedSemaphoresCount = (UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(device->getHardwareInfo()) ? 2 : 0); EXPECT_EQ(expectedSemaphoresCount, semaphoresFound); EXPECT_EQ(0u, atomicsFound); } HWTEST_F(TimestampPacketTests, givenEventsWaitlistFromDifferentDevicesWhenEnqueueingThenMakeAllTimestampsResident) { TagAllocator tagAllocator(device->getRootDeviceIndex(), executionEnvironment->memoryManager.get(), 1, 1, sizeof(TimestampPacketStorage), false); auto device2 = std::make_unique(Device::create(executionEnvironment, 1u)); auto &ultCsr = device->getUltCommandStreamReceiver(); ultCsr.timestampPacketWriteEnabled = true; ultCsr.storeMakeResidentAllocations = true; device2->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockContext context2(device2.get()); auto cmdQ1 = std::make_unique>(context, device.get(), nullptr); auto cmdQ2 = std::make_unique>(&context2, device2.get(), nullptr); MockTimestampPacketContainer node1(*ultCsr.getTimestampPacketAllocator(), 0); MockTimestampPacketContainer node2(*ultCsr.getTimestampPacketAllocator(), 0); auto tagNode1 = tagAllocator.getTag(); node1.add(tagNode1); auto tagNode2 = tagAllocator.getTag(); node2.add(tagNode2); Event event0(cmdQ1.get(), 0, 0, 0); event0.addTimestampPacketNodes(node1); Event event1(cmdQ2.get(), 0, 0, 0); event1.addTimestampPacketNodes(node2); cl_event waitlist[] = {&event0, &event1}; cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 2, waitlist, nullptr); EXPECT_NE(tagNode1->getBaseGraphicsAllocation(), tagNode2->getBaseGraphicsAllocation()); EXPECT_TRUE(ultCsr.isMadeResident(tagNode1->getBaseGraphicsAllocation(), ultCsr.taskCount)); EXPECT_TRUE(ultCsr.isMadeResident(tagNode2->getBaseGraphicsAllocation(), ultCsr.taskCount)); } HWTEST_F(TimestampPacketTests, givenEventsWaitlistFromDifferentCSRsWhenEnqueueingThenMakeAllTimestampsResident) { TagAllocator tagAllocator(device->getRootDeviceIndex(), executionEnvironment->memoryManager.get(), 1, 1, sizeof(TimestampPacketStorage), false); auto &ultCsr = device->getUltCommandStreamReceiver(); ultCsr.timestampPacketWriteEnabled = true; ultCsr.storeMakeResidentAllocations = true; auto cmdQ1 = std::make_unique>(context, device.get(), nullptr); // Create second (LOW_PRIORITY) queue on the same device cl_queue_properties props[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR, 0}; auto cmdQ2 = std::make_unique>(context, device.get(), props); cmdQ2->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockTimestampPacketContainer node1(*ultCsr.getTimestampPacketAllocator(), 0); MockTimestampPacketContainer node2(*ultCsr.getTimestampPacketAllocator(), 0); auto tagNode1 = tagAllocator.getTag(); node1.add(tagNode1); auto tagNode2 = tagAllocator.getTag(); node2.add(tagNode2); Event event0(cmdQ1.get(), 0, 0, 0); event0.addTimestampPacketNodes(node1); Event event1(cmdQ2.get(), 0, 0, 0); event1.addTimestampPacketNodes(node2); cl_event waitlist[] = {&event0, &event1}; cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 2, waitlist, nullptr); EXPECT_NE(tagNode1->getBaseGraphicsAllocation(), tagNode2->getBaseGraphicsAllocation()); EXPECT_TRUE(ultCsr.isMadeResident(tagNode1->getBaseGraphicsAllocation(), ultCsr.taskCount)); EXPECT_TRUE(ultCsr.isMadeResident(tagNode2->getBaseGraphicsAllocation(), ultCsr.taskCount)); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWhenEnqueueingNonBlockedThenMakeItResident) { auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; csr.storeMakeResidentAllocations = true; MockKernelWithInternals mockKernel(*device, context); MockCommandQueueHw cmdQ(context, device.get(), nullptr); cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto timestampPacketNode = cmdQ.timestampPacketContainer->peekNodes().at(0); EXPECT_TRUE(csr.isMadeResident(timestampPacketNode->getBaseGraphicsAllocation(), csr.taskCount)); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWhenEnqueueingBlockedThenMakeItResidentOnSubmit) { auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; MockKernelWithInternals mockKernel(*device, context); auto cmdQ = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); csr.storeMakeResidentAllocations = true; UserEvent userEvent; cl_event clEvent = &userEvent; cmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 1, &clEvent, nullptr); auto timestampPacketNode = cmdQ->timestampPacketContainer->peekNodes().at(0); EXPECT_FALSE(csr.isMadeResident(timestampPacketNode->getBaseGraphicsAllocation(), csr.taskCount)); userEvent.setStatus(CL_COMPLETE); EXPECT_TRUE(csr.isMadeResident(timestampPacketNode->getBaseGraphicsAllocation(), csr.taskCount)); cmdQ->isQueueBlocked(); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingBlockedThenVirtualEventIncrementsRefInternalAndDecrementsAfterCompleteEvent) { auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; MockKernelWithInternals mockKernelWithInternals(*device, context); auto mockKernel = mockKernelWithInternals.mockKernel; auto cmdQ = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); UserEvent userEvent; cl_event waitlist = &userEvent; auto internalCount = userEvent.getRefInternalCount(); cmdQ->enqueueKernel(mockKernel, 1, nullptr, gws, nullptr, 1, &waitlist, nullptr); EXPECT_EQ(internalCount + 1, userEvent.getRefInternalCount()); userEvent.setStatus(CL_COMPLETE); cmdQ->isQueueBlocked(); EXPECT_EQ(internalCount, mockKernel->getRefInternalCount()); } TEST_F(TimestampPacketTests, givenDispatchSizeWhenAskingForNewTimestampsThenObtainEnoughTags) { size_t dispatchSize = 3; mockCmdQ->timestampPacketContainer = std::make_unique(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 0); EXPECT_EQ(0u, mockCmdQ->timestampPacketContainer->peekNodes().size()); TimestampPacketContainer previousNodes; mockCmdQ->obtainNewTimestampPacketNodes(dispatchSize, previousNodes, false); EXPECT_EQ(dispatchSize, mockCmdQ->timestampPacketContainer->peekNodes().size()); } HWTEST_F(TimestampPacketTests, givenWaitlistAndOutputEventWhenEnqueueingWithoutKernelThenInheritTimestampPacketsWithoutSubmitting) { device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; auto cmdQ = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); MockKernelWithInternals mockKernel(*device, context); cmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); // obtain first TimestampPacketStorage TimestampPacketContainer cmdQNodes; cmdQNodes.assignAndIncrementNodesRefCounts(*cmdQ->timestampPacketContainer); MockTimestampPacketContainer node1(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer node2(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); Event event0(cmdQ.get(), 0, 0, 0); event0.addTimestampPacketNodes(node1); Event event1(cmdQ.get(), 0, 0, 0); event1.addTimestampPacketNodes(node2); UserEvent userEvent; Event eventWithoutContainer(nullptr, 0, 0, 0); uint32_t numEventsWithContainer = 2; uint32_t numEventsOnWaitlist = numEventsWithContainer + 2; // UserEvent + eventWithoutContainer cl_event waitlist[] = {&event0, &event1, &userEvent, &eventWithoutContainer}; cl_event clOutEvent; cmdQ->enqueueMarkerWithWaitList(numEventsOnWaitlist, waitlist, &clOutEvent); auto outEvent = castToObject(clOutEvent); EXPECT_EQ(cmdQ->timestampPacketContainer->peekNodes().at(0), cmdQNodes.peekNodes().at(0)); // no new nodes obtained EXPECT_EQ(1u, cmdQ->timestampPacketContainer->peekNodes().size()); auto &eventsNodes = outEvent->getTimestampPacketNodes()->peekNodes(); EXPECT_EQ(numEventsWithContainer + 1, eventsNodes.size()); // numEventsWithContainer + command queue EXPECT_EQ(cmdQNodes.peekNodes().at(0), eventsNodes.at(0)); EXPECT_EQ(event0.getTimestampPacketNodes()->peekNodes().at(0), eventsNodes.at(1)); EXPECT_EQ(event1.getTimestampPacketNodes()->peekNodes().at(0), eventsNodes.at(2)); clReleaseEvent(clOutEvent); userEvent.setStatus(CL_COMPLETE); cmdQ->isQueueBlocked(); } HWTEST_F(TimestampPacketTests, givenBlockedEnqueueWithoutKernelWhenSubmittingThenDispatchBlockedCommands) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto mockCsr = new MockCsrHw2(*device->getExecutionEnvironment(), device->getRootDeviceIndex()); device->resetCommandStreamReceiver(mockCsr); mockCsr->timestampPacketWriteEnabled = true; mockCsr->storeFlushedTaskStream = true; auto cmdQ0 = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); auto &secondEngine = device->getEngine(HwHelperHw::lowPriorityEngineType, true); static_cast *>(secondEngine.commandStreamReceiver)->timestampPacketWriteEnabled = true; auto cmdQ1 = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); cmdQ1->gpgpuEngine = &secondEngine; cmdQ1->timestampPacketContainer = std::make_unique(); EXPECT_NE(&cmdQ0->getGpgpuCommandStreamReceiver(), &cmdQ1->getGpgpuCommandStreamReceiver()); MockTimestampPacketContainer node0(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer node1(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); Event event0(cmdQ0.get(), 0, 0, 0); // on the same CSR event0.addTimestampPacketNodes(node0); Event event1(cmdQ1.get(), 0, 0, 0); // on different CSR event1.addTimestampPacketNodes(node1); uint32_t numEventsOnWaitlist = 3; uint32_t commands[] = {CL_COMMAND_MARKER, CL_COMMAND_BARRIER}; for (int i = 0; i < 2; i++) { UserEvent userEvent; cl_event waitlist[] = {&event0, &event1, &userEvent}; if (commands[i] == CL_COMMAND_MARKER) { cmdQ0->enqueueMarkerWithWaitList(numEventsOnWaitlist, waitlist, nullptr); } else if (commands[i] == CL_COMMAND_BARRIER) { cmdQ0->enqueueBarrierWithWaitList(numEventsOnWaitlist, waitlist, nullptr); } else { EXPECT_TRUE(false); } auto initialCsrStreamOffset = mockCsr->commandStream.getUsed(); userEvent.setStatus(CL_COMPLETE); HardwareParse hwParserCsr; HardwareParse hwParserCmdQ; LinearStream taskStream(mockCsr->storedTaskStream.get(), mockCsr->storedTaskStreamSize); taskStream.getSpace(mockCsr->storedTaskStreamSize); hwParserCsr.parseCommands(mockCsr->commandStream, initialCsrStreamOffset); hwParserCmdQ.parseCommands(taskStream, 0); auto queueSemaphores = findAll(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end()); auto expectedQueueSemaphoresCount = 1u; if (UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(device->getHardwareInfo())) { expectedQueueSemaphoresCount += 2; } EXPECT_EQ(expectedQueueSemaphoresCount, queueSemaphores.size()); verifySemaphore(genCmdCast(*(queueSemaphores[0])), node0.getNode(0), 0); auto csrSemaphores = findAll(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end()); EXPECT_EQ(1u, csrSemaphores.size()); verifySemaphore(genCmdCast(*(csrSemaphores[0])), node1.getNode(0), 0); EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking); EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl); EXPECT_EQ(device->getPreemptionMode(), mockCsr->passedDispatchFlags.preemptionMode); cmdQ0->isQueueBlocked(); } } HWTEST_F(TimestampPacketTests, givenWaitlistAndOutputEventWhenEnqueueingMarkerWithoutKernelThenInheritTimestampPacketsAndProgramSemaphores) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto device2 = std::make_unique(Device::create(executionEnvironment, 1u)); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; device2->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockContext context2(device2.get()); auto cmdQ = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); auto cmdQ2 = std::make_unique>(&context2, device2.get(), nullptr); MockTimestampPacketContainer node1(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer node2(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); Event event0(cmdQ.get(), 0, 0, 0); event0.addTimestampPacketNodes(node1); Event event1(cmdQ2.get(), 0, 0, 0); event1.addTimestampPacketNodes(node2); uint32_t numEventsOnWaitlist = 2; cl_event waitlist[] = {&event0, &event1}; cmdQ->enqueueMarkerWithWaitList(numEventsOnWaitlist, waitlist, nullptr); HardwareParse hwParserCsr; HardwareParse hwParserCmdQ; hwParserCsr.parseCommands(device->getUltCommandStreamReceiver().commandStream, 0); hwParserCmdQ.parseCommands(*cmdQ->commandStream, 0); auto csrSemaphores = findAll(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end()); EXPECT_EQ(1u, csrSemaphores.size()); verifySemaphore(genCmdCast(*(csrSemaphores[0])), node2.getNode(0), 0); auto queueSemaphores = findAll(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end()); auto expectedQueueSemaphoresCount = 1u; if (UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(device->getHardwareInfo())) { expectedQueueSemaphoresCount += 2; } EXPECT_EQ(expectedQueueSemaphoresCount, queueSemaphores.size()); verifySemaphore(genCmdCast(*(queueSemaphores[0])), node1.getNode(0), 0); } HWTEST_F(TimestampPacketTests, givenWaitlistAndOutputEventWhenEnqueueingBarrierWithoutKernelThenInheritTimestampPacketsAndProgramSemaphores) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto device2 = std::make_unique(Device::create(executionEnvironment, 1u)); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; device2->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; MockContext context2(device2.get()); auto cmdQ = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); auto cmdQ2 = std::make_unique>(&context2, device2.get(), nullptr); MockTimestampPacketContainer node1(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); MockTimestampPacketContainer node2(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1); Event event0(cmdQ.get(), 0, 0, 0); event0.addTimestampPacketNodes(node1); Event event1(cmdQ2.get(), 0, 0, 0); event1.addTimestampPacketNodes(node2); uint32_t numEventsOnWaitlist = 2; cl_event waitlist[] = {&event0, &event1}; cmdQ->enqueueBarrierWithWaitList(numEventsOnWaitlist, waitlist, nullptr); HardwareParse hwParserCsr; HardwareParse hwParserCmdQ; hwParserCsr.parseCommands(device->getUltCommandStreamReceiver().commandStream, 0); hwParserCmdQ.parseCommands(*cmdQ->commandStream, 0); auto csrSemaphores = findAll(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end()); EXPECT_EQ(1u, csrSemaphores.size()); verifySemaphore(genCmdCast(*(csrSemaphores[0])), node2.getNode(0), 0); auto queueSemaphores = findAll(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end()); auto expectedQueueSemaphoresCount = 1u; if (UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(device->getHardwareInfo())) { expectedQueueSemaphoresCount += 2; } EXPECT_EQ(expectedQueueSemaphoresCount, queueSemaphores.size()); verifySemaphore(genCmdCast(*(queueSemaphores[0])), node1.getNode(0), 0); } HWTEST_F(TimestampPacketTests, givenEmptyWaitlistAndNoOutputEventWhenEnqueueingMarkerThenDoNothing) { auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; auto cmdQ = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); cmdQ->enqueueMarkerWithWaitList(0, nullptr, nullptr); EXPECT_EQ(0u, cmdQ->timestampPacketContainer->peekNodes().size()); EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired); } HWTEST_F(TimestampPacketTests, whenEnqueueingBarrierThenRequestPipeControlOnCsrFlush) { auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired); MockCommandQueueHw cmdQ(context, device.get(), nullptr); MockKernelWithInternals mockKernel(*device, context); cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); // obtain first TimestampPacketStorage TimestampPacketContainer cmdQNodes; cmdQNodes.assignAndIncrementNodesRefCounts(*cmdQ.timestampPacketContainer); cmdQ.enqueueBarrierWithWaitList(0, nullptr, nullptr); EXPECT_EQ(cmdQ.timestampPacketContainer->peekNodes().at(0), cmdQNodes.peekNodes().at(0)); // dont obtain new node EXPECT_EQ(1u, cmdQ.timestampPacketContainer->peekNodes().size()); EXPECT_TRUE(csr.stallingPipeControlOnNextFlushRequired); } HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteDisabledWhenEnqueueingBarrierThenDontRequestPipeControlOnCsrFlush) { auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = false; EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired); MockCommandQueueHw cmdQ(context, device.get(), nullptr); cmdQ.enqueueBarrierWithWaitList(0, nullptr, nullptr); EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired); } HWTEST_F(TimestampPacketTests, givenBlockedQueueWhenEnqueueingBarrierThenRequestPipeControlOnCsrFlush) { auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired); MockCommandQueueHw cmdQ(context, device.get(), nullptr); auto userEvent = make_releaseable(); cl_event waitlist[] = {userEvent.get()}; cmdQ.enqueueBarrierWithWaitList(1, waitlist, nullptr); EXPECT_TRUE(csr.stallingPipeControlOnNextFlushRequired); userEvent->setStatus(CL_COMPLETE); } HWTEST_F(TimestampPacketTests, givenPipeControlRequestWhenEstimatingCsrStreamSizeThenAddSizeForPipeControl) { auto &csr = device->getUltCommandStreamReceiver(); DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); csr.stallingPipeControlOnNextFlushRequired = false; auto sizeWithoutPcRequest = device->getUltCommandStreamReceiver().getRequiredCmdStreamSize(flags, device->getDevice()); csr.stallingPipeControlOnNextFlushRequired = true; auto sizeWithPcRequest = device->getUltCommandStreamReceiver().getRequiredCmdStreamSize(flags, device->getDevice()); size_t extendedSize = sizeWithoutPcRequest + sizeof(typename FamilyType::PIPE_CONTROL); EXPECT_EQ(sizeWithPcRequest, extendedSize); } HWTEST_F(TimestampPacketTests, givenPipeControlRequestWithBarrierWriteWhenEstimatingCsrStreamSizeThenAddSizeForPipeControlForWrite) { auto &csr = device->getUltCommandStreamReceiver(); DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); TimestampPacketContainer barrierTimestampPacketNode; barrierTimestampPacketNode.add(csr.getTimestampPacketAllocator()->getTag()); flags.barrierTimestampPacketNodes = &barrierTimestampPacketNode; csr.stallingPipeControlOnNextFlushRequired = false; auto sizeWithoutPcRequest = device->getUltCommandStreamReceiver().getRequiredCmdStreamSize(flags, device->getDevice()); csr.stallingPipeControlOnNextFlushRequired = true; auto sizeWithPcRequest = device->getUltCommandStreamReceiver().getRequiredCmdStreamSize(flags, device->getDevice()); size_t extendedSize = sizeWithoutPcRequest + MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(device->getHardwareInfo()); EXPECT_EQ(sizeWithPcRequest, extendedSize); } HWTEST_F(TimestampPacketTests, givenInstructionCacheRequesWhenSizeIsEstimatedThenPipeControlIsAdded) { auto &csr = device->getUltCommandStreamReceiver(); DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); csr.requiresInstructionCacheFlush = false; auto sizeWithoutPcRequest = device->getUltCommandStreamReceiver().getRequiredCmdStreamSize(flags, device->getDevice()); csr.requiresInstructionCacheFlush = true; auto sizeWithPcRequest = device->getUltCommandStreamReceiver().getRequiredCmdStreamSize(flags, device->getDevice()); size_t extendedSize = sizeWithoutPcRequest + sizeof(typename FamilyType::PIPE_CONTROL); EXPECT_EQ(sizeWithPcRequest, extendedSize); } HWTEST_F(TimestampPacketTests, givenPipeControlRequestWhenFlushingThenProgramPipeControlAndResetRequestFlag) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto &csr = device->getUltCommandStreamReceiver(); csr.stallingPipeControlOnNextFlushRequired = true; csr.timestampPacketWriteEnabled = true; MockCommandQueueHw cmdQ(context, device.get(), nullptr); MockKernelWithInternals mockKernel(*device, context); cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_FALSE(csr.stallingPipeControlOnNextFlushRequired); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream, 0); auto secondEnqueueOffset = csr.commandStream.getUsed(); auto pipeControl = genCmdCast(*hwParser.cmdList.begin()); EXPECT_NE(nullptr, pipeControl); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE, pipeControl->getPostSyncOperation()); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(secondEnqueueOffset, csr.commandStream.getUsed()); // nothing programmed when flag is not set } HWTEST_F(TimestampPacketTests, givenKernelWhichDoesntRequireFlushWhenEnqueueingKernelThenOneNodeIsCreated) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(false); auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; auto mockTagAllocator = new MockTagAllocator<>(device->getRootDeviceIndex(), executionEnvironment->memoryManager.get()); csr.timestampPacketAllocator.reset(mockTagAllocator); auto cmdQ = std::make_unique>(context, device.get(), nullptr); // obtain first node for cmdQ and event1 cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto size = cmdQ->timestampPacketContainer->peekNodes().size(); EXPECT_EQ(size, 1u); } HWTEST_F(TimestampPacketTests, givenKernelWhichRequiresFlushWhenEnqueueingKernelThenTwoNodesAreCreated) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(true); auto &csr = device->getUltCommandStreamReceiver(); csr.timestampPacketWriteEnabled = true; auto mockTagAllocator = new MockTagAllocator<>(device->getRootDeviceIndex(), executionEnvironment->memoryManager.get()); csr.timestampPacketAllocator.reset(mockTagAllocator); auto cmdQ = std::make_unique>(context, device.get(), nullptr); kernel->mockKernel->svmAllocationsRequireCacheFlush = true; // obtain first node for cmdQ and event1 cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto node1 = cmdQ->timestampPacketContainer->peekNodes().at(0); auto node2 = cmdQ->timestampPacketContainer->peekNodes().at(1); auto size = cmdQ->timestampPacketContainer->peekNodes().size(); EXPECT_EQ(size, 2u); EXPECT_NE(nullptr, node1); EXPECT_NE(nullptr, node2); EXPECT_NE(node1, node2); } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/transfer_properties_tests.cpp000066400000000000000000000127321363734646600313270ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/helpers/properties_helper.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "gtest/gtest.h" using namespace NEO; TEST(TransferPropertiesTest, givenTransferPropertiesCreatedWhenDefaultDebugSettingThenLockPtrIsNotSet) { MockBuffer buffer; size_t offset = 0; size_t size = 4096u; TransferProperties transferProperties(&buffer, CL_COMMAND_MAP_BUFFER, 0, false, &offset, &size, nullptr, true); EXPECT_EQ(nullptr, transferProperties.lockedPtr); } TEST(TransferPropertiesTest, givenAllocationInNonSystemPoolWhenTransferPropertiesAreCreatedForMapBufferAndCpuTransferIsRequestedThenLockPtrIsSet) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, true, executionEnvironment); MockContext ctx; ctx.memoryManager = &memoryManager; cl_int retVal; std::unique_ptr buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal)); static_cast(buffer->getGraphicsAllocation())->overrideMemoryPool(MemoryPool::SystemCpuInaccessible); size_t offset = 0; size_t size = 4096u; TransferProperties transferProperties(buffer.get(), CL_COMMAND_MAP_BUFFER, 0, false, &offset, &size, nullptr, true); EXPECT_NE(nullptr, transferProperties.lockedPtr); } TEST(TransferPropertiesTest, givenAllocationInNonSystemPoolWhenTransferPropertiesAreCreatedForMapBufferAndCpuTransferIsNotRequestedThenLockPtrIsNotSet) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, true, executionEnvironment); MockContext ctx; ctx.memoryManager = &memoryManager; cl_int retVal; std::unique_ptr buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal)); static_cast(buffer->getGraphicsAllocation())->overrideMemoryPool(MemoryPool::SystemCpuInaccessible); size_t offset = 0; size_t size = 4096u; TransferProperties transferProperties(buffer.get(), CL_COMMAND_MAP_BUFFER, 0, false, &offset, &size, nullptr, false); EXPECT_EQ(nullptr, transferProperties.lockedPtr); } TEST(TransferPropertiesTest, givenAllocationInSystemPoolWhenTransferPropertiesAreCreatedForMapBufferThenLockPtrIsNotSet) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, true, executionEnvironment); MockContext ctx; ctx.memoryManager = &memoryManager; cl_int retVal; std::unique_ptr buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal)); static_cast(buffer->getGraphicsAllocation())->overrideMemoryPool(MemoryPool::System4KBPages); size_t offset = 0; size_t size = 4096u; TransferProperties transferProperties(buffer.get(), CL_COMMAND_MAP_BUFFER, 0, false, &offset, &size, nullptr, true); EXPECT_EQ(nullptr, transferProperties.lockedPtr); } TEST(TransferPropertiesTest, givenTransferPropertiesCreatedWhenMemoryManagerInMemObjectIsNotSetThenLockPtrIsNotSet) { MockBuffer buffer; size_t offset = 0; size_t size = 4096u; TransferProperties transferProperties(&buffer, CL_COMMAND_MAP_BUFFER, 0, false, &offset, &size, nullptr, true); EXPECT_EQ(nullptr, transferProperties.lockedPtr); } TEST(TransferPropertiesTest, givenTransferPropertiesWhenLockedPtrIsSetThenItIsReturnedForReadWrite) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, true, executionEnvironment); MockContext ctx; ctx.memoryManager = &memoryManager; cl_int retVal; std::unique_ptr buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal)); static_cast(buffer->getGraphicsAllocation())->overrideMemoryPool(MemoryPool::SystemCpuInaccessible); size_t offset = 0; size_t size = 4096u; TransferProperties transferProperties(buffer.get(), CL_COMMAND_MAP_BUFFER, 0, false, &offset, &size, nullptr, true); ASSERT_NE(nullptr, transferProperties.lockedPtr); EXPECT_EQ(transferProperties.lockedPtr, transferProperties.getCpuPtrForReadWrite()); } TEST(TransferPropertiesTest, givenTransferPropertiesWhenLockedPtrIsNotSetThenItIsNotReturnedForReadWrite) { MockBuffer buffer; size_t offset = 0; size_t size = 4096u; TransferProperties transferProperties(&buffer, CL_COMMAND_MAP_BUFFER, 0, false, &offset, &size, nullptr, true); ASSERT_EQ(nullptr, transferProperties.lockedPtr); EXPECT_NE(transferProperties.lockedPtr, transferProperties.getCpuPtrForReadWrite()); } TEST(TransferPropertiesTest, givenTransferPropertiesWhenLockedPtrIsSetThenLockedPtrWithMemObjOffsetIsReturnedForReadWrite) { MockBuffer buffer; void *lockedPtr = reinterpret_cast(0x1000); auto memObjOffset = MemoryConstants::cacheLineSize; buffer.offset = memObjOffset; size_t offset = 0; size_t size = 4096u; TransferProperties transferProperties(&buffer, CL_COMMAND_MAP_BUFFER, 0, false, &offset, &size, nullptr, true); transferProperties.lockedPtr = lockedPtr; auto expectedPtr = ptrOffset(lockedPtr, memObjOffset); EXPECT_EQ(expectedPtr, transferProperties.getCpuPtrForReadWrite()); } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/uint16_sse4_tests.cpp000066400000000000000000000066311363734646600273140ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "opencl/source/helpers/uint16_sse4.h" #include "gtest/gtest.h" using namespace NEO; TEST(Uint16_Sse4, booleanOperator) { EXPECT_TRUE(static_cast(uint16x8_t::mask())); EXPECT_FALSE(static_cast(uint16x8_t::zero())); } TEST(Uint16_Sse4, logicalAnd) { EXPECT_TRUE(uint16x8_t::mask() && uint16x8_t::mask()); EXPECT_FALSE(uint16x8_t::mask() && uint16x8_t::zero()); EXPECT_FALSE(uint16x8_t::zero() && uint16x8_t::mask()); EXPECT_FALSE(uint16x8_t::zero() && uint16x8_t::zero()); } TEST(Uint16_Sse4, constructor) { auto one = uint16x8_t::one(); uint16x8_t alsoOne(one.value); EXPECT_EQ(0, memcmp(&alsoOne, &one, sizeof(uint16x8_t))); } TEST(Uint16_Sse4, replicatingConstructor) { uint16x8_t allSevens(7u); for (int i = 0; i < uint16x8_t::numChannels; ++i) { EXPECT_EQ(7u, allSevens.get(i)); } } ALIGNAS(32) static const uint16_t laneValues[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; TEST(Uint16_Sse4, pointerConstructor) { uint16x8_t lanes(laneValues); for (int i = 0; i < uint16x8_t::numChannels; ++i) { EXPECT_EQ(static_cast(i), lanes.get(i)); } } TEST(Uint16_Sse4, load) { uint16x8_t lanes; lanes.load(laneValues); for (int i = 0; i < uint16x8_t::numChannels; ++i) { EXPECT_EQ(static_cast(i), lanes.get(i)); } } TEST(Uint16_Sse4, loadUnaligned) { uint16x8_t lanes; lanes.loadUnaligned(laneValues + 1); for (int i = 0; i < uint16x8_t::numChannels; ++i) { EXPECT_EQ(static_cast(i + 1), lanes.get(i)); } } TEST(Uint16_Sse4, store) { uint16_t *alignedMemory = reinterpret_cast(alignedMalloc(1024, 32)); uint16x8_t lanes(laneValues); lanes.store(alignedMemory); for (int i = 0; i < uint16x8_t::numChannels; ++i) { EXPECT_EQ(static_cast(i), alignedMemory[i]); } alignedFree(alignedMemory); } TEST(Uint16_Sse4, storeUnaligned) { uint16_t *alignedMemory = reinterpret_cast(alignedMalloc(1024, 32)); uint16x8_t lanes(laneValues); lanes.storeUnaligned(alignedMemory + 1); for (int i = 0; i < uint16x8_t::numChannels; ++i) { EXPECT_EQ(static_cast(i), (alignedMemory + 1)[i]); } alignedFree(alignedMemory); } TEST(Uint16_Sse4, decrementingAssignmentOperator) { uint16x8_t result(laneValues); result -= uint16x8_t::one(); for (int i = 0; i < uint16x8_t::numChannels; ++i) { EXPECT_EQ(static_cast(i - 1), result.get(i)); } } TEST(Uint16_Sse4, incrementingAssignmentOperator) { uint16x8_t result(laneValues); result += uint16x8_t::one(); for (int i = 0; i < uint16x8_t::numChannels; ++i) { EXPECT_EQ(static_cast(i + 1), result.get(i)); } } TEST(Uint16_Sse4, blend) { uint16x8_t a(uint16x8_t::one()); uint16x8_t b(uint16x8_t::zero()); uint16x8_t c; // c = mask ? a : b c = blend(a, b, uint16x8_t::mask()); for (int i = 0; i < uint16x8_t::numChannels; ++i) { EXPECT_EQ(a.get(i), c.get(i)); } // c = mask ? a : b c = blend(a, b, uint16x8_t::zero()); for (int i = 0; i < uint16x8_t::numChannels; ++i) { EXPECT_EQ(b.get(i), c.get(i)); } } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/ult_limits.h000066400000000000000000000003131363734646600256270ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { constexpr uint32_t maxRootDeviceCount = 3u; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/helpers/unit_test_helper.h000066400000000000000000000031321363734646600270210ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/aux_translation.h" #include "shared/source/helpers/hw_cmds.h" #include "opencl/source/helpers/properties_helper.h" namespace NEO { class Kernel; struct HardwareInfo; template struct UnitTestHelper { static bool isL3ConfigProgrammable(); static bool evaluateDshUsage(size_t sizeBeforeEnqueue, size_t sizeAfterEnqueue, Kernel *kernel); static bool isPageTableManagerSupported(const HardwareInfo &hwInfo); static bool isTimestampPacketWriteSupported(); static bool isExpectMemoryNotEqualSupported(); static uint32_t getDefaultSshUsage(); static uint32_t getAppropriateThreadArbitrationPolicy(uint32_t policy); static bool evaluateGshAddressForScratchSpace(uint64_t usedScratchGpuAddress, uint64_t retrievedGshAddress); static bool isSynchronizationWArequired(const HardwareInfo &hwInfo); static bool isPipeControlWArequired(const HardwareInfo &hwInfo); static bool isAdditionalMiSemaphoreWaitRequired(const HardwareInfo &hwInfo); static bool isAdditionalMiSemaphoreWait(const typename GfxFamily::MI_SEMAPHORE_WAIT &semaphoreWait); static uint64_t getMemoryAddress(const typename GfxFamily::MI_ATOMIC &atomic); static const bool tiledImagesSupported; static const uint32_t smallestTestableSimdSize; static const AuxTranslationMode requiredAuxTranslationMode; static const bool useFullRowForLocalIdsGeneration; static const bool additionalMiFlushDwRequired; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/helpers/unit_test_helper.inl000066400000000000000000000052641363734646600273640ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ namespace NEO { template bool UnitTestHelper::isL3ConfigProgrammable() { return true; }; template bool UnitTestHelper::evaluateDshUsage(size_t sizeBeforeEnqueue, size_t sizeAfterEnqueue, Kernel *kernel) { if (sizeBeforeEnqueue != sizeAfterEnqueue) { return true; } return false; } template bool UnitTestHelper::isPageTableManagerSupported(const HardwareInfo &hwInfo) { return false; } template bool UnitTestHelper::isTimestampPacketWriteSupported() { return false; } template bool UnitTestHelper::isExpectMemoryNotEqualSupported() { return false; } template uint32_t UnitTestHelper::getDefaultSshUsage() { return 0; } template inline uint32_t UnitTestHelper::getAppropriateThreadArbitrationPolicy(uint32_t policy) { return policy; } template bool UnitTestHelper::evaluateGshAddressForScratchSpace(uint64_t usedScratchGpuAddress, uint64_t retrievedGshAddress) { return usedScratchGpuAddress == retrievedGshAddress; } template bool UnitTestHelper::isSynchronizationWArequired(const HardwareInfo &hwInfo) { return false; } template bool UnitTestHelper::isPipeControlWArequired(const HardwareInfo &hwInfo) { return false; } template bool UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(const HardwareInfo &hwInfo) { return false; } template bool UnitTestHelper::isAdditionalMiSemaphoreWait(const typename GfxFamily::MI_SEMAPHORE_WAIT &semaphoreWait) { return false; } template inline uint64_t UnitTestHelper::getMemoryAddress(const typename GfxFamily::MI_ATOMIC &atomic) { return atomic.getMemoryAddress() | ((static_cast(atomic.getMemoryAddressHigh())) << 32); } template const bool UnitTestHelper::tiledImagesSupported = true; template const uint32_t UnitTestHelper::smallestTestableSimdSize = 8; template const AuxTranslationMode UnitTestHelper::requiredAuxTranslationMode = AuxTranslationMode::Builtin; template const bool UnitTestHelper::useFullRowForLocalIdsGeneration = false; template const bool UnitTestHelper::additionalMiFlushDwRequired = false; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/helpers/validator_tests.cpp000066400000000000000000000176151363734646600272210ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "opencl/source/api/cl_types.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/helpers/error_mappers.h" #include "opencl/source/helpers/validators.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gtest/gtest.h" using namespace NEO; template struct ValidatorFixture : public ::testing::Test { }; TYPED_TEST_CASE_P(ValidatorFixture); TYPED_TEST_P(ValidatorFixture, nullPtr) { TypeParam object = nullptr; cl_int rv = NullObjectErrorMapper::retVal; EXPECT_EQ(rv, validateObjects(object)); } TYPED_TEST_P(ValidatorFixture, randomMemory) { // 6*uint64_t to satisfy memory requirements // we need 2 before object (dispatchTable) // and 4 of object (magic) uint64_t randomMemory[6] = { 0xdeadbeef, }; TypeParam object = (TypeParam)(randomMemory + 2); cl_int rv = InvalidObjectErrorMapper::retVal; EXPECT_EQ(rv, validateObjects(object)); } REGISTER_TYPED_TEST_CASE_P( ValidatorFixture, nullPtr, randomMemory); // Define new command types to run the parameterized tests typedef ::testing::Types< cl_command_queue, device_queue, // internal type cl_context, cl_device_id, cl_event, cl_kernel, cl_mem, cl_platform_id, cl_program, uint64_t /*cl_queue_properties*/ *, cl_sampler> ValidatorParams; INSTANTIATE_TYPED_TEST_CASE_P(Validator, ValidatorFixture, ValidatorParams); TEST(GenericValidator, nullCTXTnullCQ) { cl_context context = nullptr; cl_command_queue command_queue = nullptr; EXPECT_EQ(CL_INVALID_CONTEXT, validateObjects(context, command_queue)); } TEST(UserPointer, ExpectNonNullUserPtr) { void *ptr = nullptr; EXPECT_EQ(CL_INVALID_VALUE, validateObjects(ptr)); } TEST(UserPointer, DontValidateUserPointersForValidity) { void *ptr = ptrGarbage; EXPECT_EQ(CL_SUCCESS, validateObjects(ptr)); } TEST(EventWaitList, zeroCount_nonNullPointer) { cl_event eventList = (cl_event)ptrGarbage; EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, validateObjects(EventWaitList(0, &eventList))); } TEST(EventWaitList, zeroCount_nullPointer) { EXPECT_EQ(CL_SUCCESS, validateObjects(EventWaitList(0, nullptr))); } TEST(EventWaitList, nonZeroCount_nullPointer) { EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, validateObjects(EventWaitList(1, nullptr))); } TEST(EventWaitList, nonZeroCount_noNullPointer) { cl_event eventList = (cl_event)ptrGarbage; EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, validateObjects(EventWaitList(1, &eventList))); } TEST(DeviceList, zeroCount_nonNullPointer) { cl_device_id devList = (cl_device_id)ptrGarbage; EXPECT_EQ(CL_INVALID_VALUE, validateObjects(DeviceList(0, &devList))); } TEST(DeviceList, zeroCount_nullPointer) { EXPECT_EQ(CL_SUCCESS, validateObjects(DeviceList(0, nullptr))); } TEST(DeviceList, nonZeroCount_nullPointer) { EXPECT_EQ(CL_INVALID_VALUE, validateObjects(DeviceList(1, nullptr))); } TEST(DeviceList, nonZeroCount_noNullPointer) { cl_device_id devList = (cl_device_id)ptrGarbage; EXPECT_EQ(CL_INVALID_DEVICE, validateObjects(DeviceList(1, &devList))); } TEST(MemObjList, zeroCount_nonNullPointer) { cl_mem memList = static_cast(ptrGarbage); EXPECT_EQ(CL_INVALID_VALUE, validateObjects(MemObjList(0, &memList))); } TEST(MemObjList, zeroCount_nullPointer) { EXPECT_EQ(CL_SUCCESS, validateObjects(MemObjList(0, nullptr))); } TEST(MemObjList, nonZeroCount_nullPointer) { EXPECT_EQ(CL_INVALID_VALUE, validateObjects(MemObjList(1, nullptr))); } TEST(MemObjList, nonZeroCount_noNullPointer) { cl_mem memList = static_cast(ptrGarbage); EXPECT_EQ(CL_INVALID_MEM_OBJECT, validateObjects(MemObjList(1, &memList))); } TEST(MemObjList, nonZeroCount_validPointer) { std::unique_ptr buffer(new MockBuffer()); cl_mem memList = static_cast(buffer.get()); EXPECT_EQ(CL_SUCCESS, validateObjects(MemObjList(1, &memList))); } TEST(NonZeroBufferSizeValidator, zero) { auto bsv = (NonZeroBufferSize)0; EXPECT_EQ(CL_INVALID_BUFFER_SIZE, validateObjects(bsv)); } TEST(NonZeroBufferSizeValidator, nonZero) { auto bsv = (NonZeroBufferSize)~0; EXPECT_EQ(CL_SUCCESS, validateObjects(bsv)); } TEST(Platform, givenNullPlatformThenReturnInvalidPlatform) { cl_platform_id platform = nullptr; EXPECT_EQ(CL_INVALID_PLATFORM, validateObjects(platform)); } TEST(Platform, givenPlatformThenReturnSUCCESS) { MockPlatform platform; cl_platform_id clPlatformId = &platform; EXPECT_EQ(CL_SUCCESS, validateObjects(clPlatformId)); } typedef ::testing::TestWithParam PatternSizeValid; TEST_P(PatternSizeValid, valid) { auto psv = (PatternSize)GetParam(); EXPECT_EQ(CL_SUCCESS, validateObjects(psv)); } INSTANTIATE_TEST_CASE_P(PatternSize, PatternSizeValid, ::testing::Values(1, 2, 4, 8, 16, 32, 64, 128)); typedef ::testing::TestWithParam PatternSizeInvalid; TEST_P(PatternSizeInvalid, invalid) { auto psv = (PatternSize)GetParam(); EXPECT_EQ(CL_INVALID_VALUE, validateObjects(psv)); } INSTANTIATE_TEST_CASE_P(PatternSize, PatternSizeInvalid, ::testing::Values(0, 3, 5, 256, 512, 1024)); TEST(WithCastToInternal, nullpointer) { Context *pContext = nullptr; cl_context context = nullptr; auto ret = WithCastToInternal(context, &pContext); EXPECT_EQ(ret, nullptr); } TEST(WithCastToInternal, nonnullpointer) { Context *pContext = nullptr; auto temp = std::unique_ptr(new MockContext()); cl_context context = temp.get(); auto ret = WithCastToInternal(context, &pContext); EXPECT_NE(ret, nullptr); } TEST(validateYuvOperation, GivenValidateYuvOperationWhenValidOriginAndRegionThenReturnSuccess) { size_t origin[3] = {8, 0, 0}; size_t region[3] = {8, 0, 0}; auto ret = validateYuvOperation(origin, region); EXPECT_EQ(CL_SUCCESS, ret); } TEST(validateYuvOperation, GivenValidateYuvOperationWhenInvalidOriginThenReturnFailure) { size_t origin[3] = {1, 0, 0}; size_t region[3] = {8, 0, 0}; auto ret = validateYuvOperation(origin, region); EXPECT_EQ(CL_INVALID_VALUE, ret); } TEST(validateYuvOperation, GivenValidateYuvOperationWhenInvalidRegionThenReturnFailure) { size_t origin[3] = {8, 0, 0}; size_t region[3] = {1, 0, 0}; auto ret = validateYuvOperation(origin, region); EXPECT_EQ(CL_INVALID_VALUE, ret); } TEST(validateYuvOperation, GivenValidateYuvOperationWhenNullOriginThenReturnFailure) { size_t *origin = nullptr; size_t region[3] = {1, 0, 0}; auto ret = validateYuvOperation(origin, region); EXPECT_EQ(CL_INVALID_VALUE, ret); } TEST(validateYuvOperation, GivenValidateYuvOperationWhenNullRegionThenReturnFailure) { size_t origin[3] = {8, 0, 0}; size_t *region = nullptr; auto ret = validateYuvOperation(origin, region); EXPECT_EQ(CL_INVALID_VALUE, ret); } TEST(areNotNullptr, WhenGivenAllNonNullParamsTheReturnsTrue) { int a = 0; int b = 0; int c = 0; EXPECT_TRUE(areNotNullptr(&a)); EXPECT_TRUE(areNotNullptr(&a, &b)); EXPECT_TRUE(areNotNullptr(&a, &b, &c)); } TEST(areNotNullptr, WhenGivenAllNullParamsTheReturnsFalse) { int *a = nullptr; int *b = nullptr; int *c = nullptr; EXPECT_FALSE(areNotNullptr(a)); EXPECT_FALSE(areNotNullptr(a, b)); EXPECT_FALSE(areNotNullptr(a, b, c)); } TEST(areNotNullptr, WhenGivenNullParameterAmongNonNullParamsTheReturnsFalse) { int *a = nullptr; int b = 0; int c = 0; EXPECT_FALSE(areNotNullptr(a)); EXPECT_FALSE(areNotNullptr(a, &b)); EXPECT_FALSE(areNotNullptr(&b, a)); EXPECT_FALSE(areNotNullptr(a, &b, &c)); EXPECT_FALSE(areNotNullptr(&b, a, &c)); } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/variable_backup.h000066400000000000000000000011711363734646600265570ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once template class VariableBackup { public: VariableBackup(T *ptr) : pValue(ptr) { oldValue = *ptr; } VariableBackup(T *ptr, T &&newValue) : pValue(ptr) { oldValue = *ptr; *pValue = newValue; } VariableBackup(T *ptr, T &newValue) : pValue(ptr) { oldValue = *ptr; *pValue = newValue; } ~VariableBackup() { *pValue = oldValue; } void operator=(const T &val) { *pValue = val; } private: T oldValue; T *pValue; }; compute-runtime-20.13.16352/opencl/test/unit_test/helpers/windows/000077500000000000000000000000001363734646600247665ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/helpers/windows/CMakeLists.txt000066400000000000000000000007071363734646600275320ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(IGDRCL_SRCS_tests_helpers_windows ${CMAKE_CURRENT_SOURCE_DIR}/kmd_notify_windows_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_helper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_function.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_function.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_helpers_windows}) add_subdirectories() endif() compute-runtime-20.13.16352/opencl/test/unit_test/helpers/windows/gl_helper_tests.cpp000066400000000000000000000035741363734646600306660ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_library.h" #include "shared/source/os_interface/windows/windows_wrapper.h" #include "opencl/source/helpers/windows/gl_helper.h" #include "opencl/test/unit_test/helpers/windows/mock_function.h" #include "test.h" #include "gtest/gtest.h" #include typedef const char *(__cdecl *funcType)(); namespace NEO { class glFunctionHelperMock : public glFunctionHelper { public: glFunctionHelperMock(OsLibrary *glLibrary, const std::string &functionName) : glFunctionHelper(glLibrary, functionName) {} using glFunctionHelper::glFunctionPtr; }; TEST(glFunctionHelper, whenCreateGlFunctionHelperThenSetGlFunctionPtrToLoadAnotherFunctions) { std::unique_ptr glLibrary(OsLibrary::load("mock_opengl32.dll")); EXPECT_TRUE(glLibrary->isLoaded()); glFunctionHelperMock loader(glLibrary.get(), "mockLoader"); funcType function1 = ConvertibleProcAddr{loader.glFunctionPtr("realFunction")}; funcType function2 = loader["realFunction"]; EXPECT_STREQ(function1(), function2()); } TEST(glFunctionHelper, givenNonExistingFunctionNameWhenCreateGlFunctionHelperThenNullptr) { std::unique_ptr glLibrary(OsLibrary::load("mock_opengl32.dll")); EXPECT_TRUE(glLibrary->isLoaded()); glFunctionHelper loader(glLibrary.get(), "mockLoader"); funcType function = loader["nonExistingFunction"]; EXPECT_EQ(nullptr, function); } TEST(glFunctionHelper, givenRealFunctionNameWhenCreateGlFunctionHelperThenGetPointerToAppropriateFunction) { std::unique_ptr glLibrary(OsLibrary::load("mock_opengl32.dll")); EXPECT_TRUE(glLibrary->isLoaded()); glFunctionHelper loader(glLibrary.get(), "mockLoader"); funcType function = loader["realFunction"]; EXPECT_STREQ(realFunction(), function()); } }; // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/helpers/windows/kmd_notify_windows_tests.cpp000066400000000000000000000050701363734646600326330ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/kmd_notify_properties.h" #include "shared/source/os_interface/windows/sys_calls.h" #include "shared/test/unit_test/helpers/default_hw_info.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "test.h" namespace NEO { namespace SysCalls { extern BOOL systemPowerStatusRetVal; extern BYTE systemPowerStatusACLineStatusOverride; } // namespace SysCalls class MockKmdNotifyHelper : public KmdNotifyHelper { public: using KmdNotifyHelper::acLineConnected; using KmdNotifyHelper::getBaseTimeout; using KmdNotifyHelper::updateAcLineStatus; MockKmdNotifyHelper(const KmdNotifyProperties *newProperties) : KmdNotifyHelper(newProperties){}; }; TEST(KmdNotifyWindowsTests, whenGetSystemPowerStatusReturnSuccessThenUpdateAcLineStatus) { auto properties = &(defaultHwInfo->capabilityTable.kmdNotifyProperties); MockKmdNotifyHelper helper(properties); EXPECT_TRUE(helper.acLineConnected); VariableBackup systemPowerStatusRetValBkp(&SysCalls::systemPowerStatusRetVal); VariableBackup systemPowerStatusACLineStatusOverrideBkp(&SysCalls::systemPowerStatusACLineStatusOverride); systemPowerStatusRetValBkp = 1; systemPowerStatusACLineStatusOverrideBkp = 0; helper.updateAcLineStatus(); EXPECT_FALSE(helper.acLineConnected); systemPowerStatusACLineStatusOverrideBkp = 1; helper.updateAcLineStatus(); EXPECT_TRUE(helper.acLineConnected); } TEST(KmdNotifyWindowsTests, whenGetSystemPowerStatusReturnErrorThenDontUpdateAcLineStatus) { auto properties = &(defaultHwInfo->capabilityTable.kmdNotifyProperties); MockKmdNotifyHelper helper(properties); EXPECT_TRUE(helper.acLineConnected); VariableBackup systemPowerStatusRetValBkp(&SysCalls::systemPowerStatusRetVal); VariableBackup systemPowerStatusACLineStatusOverrideBkp(&SysCalls::systemPowerStatusACLineStatusOverride); systemPowerStatusRetValBkp = 0; systemPowerStatusACLineStatusOverrideBkp = 0; helper.updateAcLineStatus(); EXPECT_TRUE(helper.acLineConnected); } TEST(KmdNotifyWindowsTests, givenTaskCountDiffGreaterThanOneWhenBaseTimeoutRequestedThenDontMultiply) { auto localProperties = (defaultHwInfo->capabilityTable.kmdNotifyProperties); localProperties.delayKmdNotifyMicroseconds = 10; const int64_t multiplier = 10; MockKmdNotifyHelper helper(&localProperties); EXPECT_EQ(localProperties.delayKmdNotifyMicroseconds, helper.getBaseTimeout(multiplier)); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/helpers/windows/mock_function.cpp000066400000000000000000000002461363734646600303320ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "mock_function.h" const char *realFunction() { return "value"; } compute-runtime-20.13.16352/opencl/test/unit_test/helpers/windows/mock_function.h000066400000000000000000000001671363734646600300010ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ const char *realFunction(); compute-runtime-20.13.16352/opencl/test/unit_test/igdrcl_tests_pch.cpp000066400000000000000000000002171363734646600256560ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/igdrcl_tests_pch.h" compute-runtime-20.13.16352/opencl/test/unit_test/igdrcl_tests_pch.h000066400000000000000000000015611363734646600253260ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm_lib.h" #include "shared/source/helpers/abort.h" #include "shared/source/helpers/completion_stamp.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/hw_cmds.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/kmd_notify_properties.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/memory_constants.h" #include "shared/source/sku_info/sku_info_base.h" #include "opencl/test/unit_test/gen_common/gen_cmd_parse.h" #include "test.h" #include "third_party/opencl_headers/CL/cl.h" #include #include #include #include #include #include #include #include #include #include compute-runtime-20.13.16352/opencl/test/unit_test/indirect_heap/000077500000000000000000000000001363734646600244305ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/indirect_heap/indirect_heap_fixture.cpp000066400000000000000000000010171363734646600314770ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/indirect_heap/indirect_heap_fixture.h" #include "opencl/source/command_queue/command_queue.h" namespace NEO { void IndirectHeapFixture::SetUp(CommandQueue *pCmdQ) { pDSH = &pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192); pSSH = &pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 4096); pIOH = &pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 4096); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/indirect_heap/indirect_heap_fixture.h000066400000000000000000000006621363734646600311510ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/indirect_heap/indirect_heap.h" namespace NEO { class CommandQueue; struct IndirectHeapFixture { virtual void SetUp(CommandQueue *pCmdQ); virtual void TearDown() { } IndirectHeap *pDSH = nullptr; IndirectHeap *pIOH = nullptr; IndirectHeap *pSSH = nullptr; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/kernel/000077500000000000000000000000001363734646600231125ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/kernel/CMakeLists.txt000066400000000000000000000033201363734646600256500ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_kernel ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/clone_kernel_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_transformer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_accelerator_arg_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_buffer_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_buffer_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_pipe_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_svm_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/kernel_cache_flush_requirements_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_info_cl_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_image_arg_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_immediate_arg_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_is_patched_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_dev_queue_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_reflection_surface_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_slm_arg_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_slm_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_transformable_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/debug_kernel_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/parent_kernel_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/substitute_kernel_heap_tests.cpp ) get_property(NEO_SHARED_KERNEL_TESTS GLOBAL PROPERTY NEO_SHARED_KERNEL_TESTS) list(APPEND IGDRCL_SRCS_tests_kernel ${NEO_SHARED_KERNEL_TESTS} ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_kernel}) add_subdirectories() compute-runtime-20.13.16352/opencl/test/unit_test/kernel/clone_kernel_tests.cpp000066400000000000000000000630241363734646600275050ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/source/accelerators/intel_accelerator.h" #include "opencl/source/accelerators/intel_motion_estimation.h" #include "opencl/source/helpers/sampler_helpers.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/pipe.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_device_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_pipe.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/mocks/mock_sampler.h" #include "test.h" #include "CL/cl.h" #include "gtest/gtest.h" #include using namespace NEO; class CloneKernelFixture : public ContextFixture, public DeviceFixture { using ContextFixture::SetUp; public: CloneKernelFixture() { } protected: void SetUp() { DeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); // define kernel info pKernelInfo = std::make_unique(); // setup kernel arg offsets KernelArgPatchInfo kernelArgPatchInfo; kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; pKernelInfo->kernelArgInfo.resize(1); pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = 0x20; pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size = (uint32_t)sizeof(void *); pKernelInfo->kernelArgInfo[0].offsetHeap = 0x20; pKernelInfo->kernelArgInfo[0].offsetObjectId = 0x0; // image pKernelInfo->kernelArgInfo[0].offsetImgWidth = 0x4; pKernelInfo->kernelArgInfo[0].offsetImgHeight = 0x8; pKernelInfo->kernelArgInfo[0].offsetImgDepth = 0xc; // sampler pKernelInfo->kernelArgInfo[0].offsetSamplerSnapWa = 0x4; pKernelInfo->kernelArgInfo[0].offsetSamplerAddressingMode = 0x8; pKernelInfo->kernelArgInfo[0].offsetSamplerNormalizedCoords = 0x10; // accelerator pKernelInfo->kernelArgInfo[0].samplerArgumentType = iOpenCL::SAMPLER_OBJECT_VME; pKernelInfo->kernelArgInfo[0].offsetVmeMbBlockType = 0x4; pKernelInfo->kernelArgInfo[0].offsetVmeSubpixelMode = 0xc; pKernelInfo->kernelArgInfo[0].offsetVmeSadAdjustMode = 0x14; pKernelInfo->kernelArgInfo[0].offsetVmeSearchPathType = 0x1c; pProgram = new MockProgram(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); pSourceKernel = new MockKernel(pProgram, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pSourceKernel->initialize()); char pSourceCrossThreadData[64] = {}; pSourceKernel->setCrossThreadData(pSourceCrossThreadData, sizeof(pSourceCrossThreadData)); pClonedKernel = new MockKernel(pProgram, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pClonedKernel->initialize()); char pClonedCrossThreadData[64] = {}; pClonedKernel->setCrossThreadData(pClonedCrossThreadData, sizeof(pClonedCrossThreadData)); } void TearDown() override { delete pSourceKernel; delete pClonedKernel; delete pProgram; ContextFixture::TearDown(); DeviceFixture::TearDown(); } cl_int retVal = CL_SUCCESS; MockProgram *pProgram = nullptr; MockKernel *pSourceKernel = nullptr; MockKernel *pClonedKernel = nullptr; std::unique_ptr pKernelInfo; SKernelBinaryHeaderCommon kernelHeader; char surfaceStateHeap[128]; }; typedef Test CloneKernelTest; TEST_F(CloneKernelTest, cloneKernelWithUnsetArg) { EXPECT_EQ(1u, pSourceKernel->getKernelArguments().size()); EXPECT_EQ(Kernel::NONE_OBJ, pSourceKernel->getKernelArgInfo(0).type); EXPECT_EQ(nullptr, pSourceKernel->getKernelArgInfo(0).object); EXPECT_EQ(nullptr, pSourceKernel->getKernelArgInfo(0).value); EXPECT_EQ(0u, pSourceKernel->getKernelArgInfo(0).size); EXPECT_EQ(0u, pSourceKernel->getPatchedArgumentsNum()); EXPECT_FALSE(pSourceKernel->getKernelArgInfo(0).isPatched); retVal = pClonedKernel->cloneKernel(pSourceKernel); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pSourceKernel->getKernelArguments().size(), pClonedKernel->getKernelArguments().size()); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).type, pClonedKernel->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).object, pClonedKernel->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).value, pClonedKernel->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).size, pClonedKernel->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel->getPatchedArgumentsNum(), pClonedKernel->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).isPatched, pClonedKernel->getKernelArgInfo(0).isPatched); } TEST_F(CloneKernelTest, cloneKernelWithArgLocal) { const size_t slmSize = 0x800; pSourceKernel->setKernelArgHandler(0, &Kernel::setArgLocal); pClonedKernel->setKernelArgHandler(0, &Kernel::setArgLocal); retVal = pSourceKernel->setArg(0, slmSize, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pSourceKernel->getKernelArguments().size()); EXPECT_EQ(Kernel::SLM_OBJ, pSourceKernel->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel->getKernelArgInfo(0).isPatched); retVal = pClonedKernel->cloneKernel(pSourceKernel); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pSourceKernel->getKernelArguments().size(), pClonedKernel->getKernelArguments().size()); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).type, pClonedKernel->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).object, pClonedKernel->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).value, pClonedKernel->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).size, pClonedKernel->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel->getPatchedArgumentsNum(), pClonedKernel->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).isPatched, pClonedKernel->getKernelArgInfo(0).isPatched); EXPECT_EQ(alignUp(slmSize, 1024), pClonedKernel->slmTotalSize); } TEST_F(CloneKernelTest, cloneKernelWithArgBuffer) { MockBuffer buffer; cl_mem memObj = &buffer; pSourceKernel->setKernelArgHandler(0, &Kernel::setArgBuffer); pClonedKernel->setKernelArgHandler(0, &Kernel::setArgBuffer); retVal = pSourceKernel->setArg(0, sizeof(cl_mem), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pSourceKernel->getKernelArguments().size()); EXPECT_EQ(Kernel::BUFFER_OBJ, pSourceKernel->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel->getKernelArgInfo(0).isPatched); retVal = pClonedKernel->cloneKernel(pSourceKernel); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pSourceKernel->getKernelArguments().size(), pClonedKernel->getKernelArguments().size()); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).type, pClonedKernel->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).object, pClonedKernel->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).value, pClonedKernel->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).size, pClonedKernel->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel->getPatchedArgumentsNum(), pClonedKernel->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).isPatched, pClonedKernel->getKernelArgInfo(0).isPatched); auto pKernelArg = (cl_mem *)(pClonedKernel->getCrossThreadData() + pClonedKernel->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(buffer.getCpuAddress(), *pKernelArg); } TEST_F(CloneKernelTest, cloneKernelWithArgPipe) { MockPipe pipe(pContext); cl_mem memObj = &pipe; pSourceKernel->setKernelArgHandler(0, &Kernel::setArgPipe); pClonedKernel->setKernelArgHandler(0, &Kernel::setArgPipe); retVal = pSourceKernel->setArg(0, sizeof(cl_mem), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pSourceKernel->getKernelArguments().size()); EXPECT_EQ(Kernel::PIPE_OBJ, pSourceKernel->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel->getKernelArgInfo(0).isPatched); retVal = pClonedKernel->cloneKernel(pSourceKernel); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pSourceKernel->getKernelArguments().size(), pClonedKernel->getKernelArguments().size()); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).type, pClonedKernel->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).object, pClonedKernel->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).value, pClonedKernel->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).size, pClonedKernel->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel->getPatchedArgumentsNum(), pClonedKernel->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).isPatched, pClonedKernel->getKernelArgInfo(0).isPatched); auto pKernelArg = (cl_mem *)(pClonedKernel->getCrossThreadData() + pClonedKernel->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(pipe.getCpuAddress(), *pKernelArg); } TEST_F(CloneKernelTest, cloneKernelWithArgImage) { auto image = std::unique_ptr(Image2dHelper<>::create(pContext)); ASSERT_NE(nullptr, image); uint32_t objectId = pKernelInfo->kernelArgInfo[0].offsetHeap; size_t imageWidth = image->getImageDesc().image_width; size_t imageHeight = image->getImageDesc().image_height; size_t imageDepth = image->getImageDesc().image_depth; cl_mem memObj = image.get(); pSourceKernel->setKernelArgHandler(0, &Kernel::setArgImage); pClonedKernel->setKernelArgHandler(0, &Kernel::setArgImage); retVal = pSourceKernel->setArg(0, sizeof(cl_mem), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pSourceKernel->getKernelArguments().size()); EXPECT_EQ(Kernel::IMAGE_OBJ, pSourceKernel->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel->getKernelArgInfo(0).isPatched); retVal = pClonedKernel->cloneKernel(pSourceKernel); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pSourceKernel->getKernelArguments().size(), pClonedKernel->getKernelArguments().size()); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).type, pClonedKernel->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).object, pClonedKernel->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).value, pClonedKernel->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).size, pClonedKernel->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel->getPatchedArgumentsNum(), pClonedKernel->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).isPatched, pClonedKernel->getKernelArgInfo(0).isPatched); auto crossThreadData = reinterpret_cast(pClonedKernel->getCrossThreadData()); EXPECT_EQ(objectId, *crossThreadData); const auto &argInfo = pClonedKernel->getKernelInfo().kernelArgInfo[0]; auto pImgWidth = ptrOffset(crossThreadData, argInfo.offsetImgWidth); EXPECT_EQ(imageWidth, *pImgWidth); auto pImgHeight = ptrOffset(crossThreadData, argInfo.offsetImgHeight); EXPECT_EQ(imageHeight, *pImgHeight); auto pImgDepth = ptrOffset(crossThreadData, argInfo.offsetImgDepth); EXPECT_EQ(imageDepth, *pImgDepth); } TEST_F(CloneKernelTest, cloneKernelWithArgAccelerator) { cl_motion_estimation_desc_intel desc = { CL_ME_MB_TYPE_4x4_INTEL, CL_ME_SUBPIXEL_MODE_QPEL_INTEL, CL_ME_SAD_ADJUST_MODE_HAAR_INTEL, CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL}; cl_accelerator_intel accelerator = VmeAccelerator::create( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(desc), &desc, retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, accelerator); pSourceKernel->setKernelArgHandler(0, &Kernel::setArgAccelerator); pClonedKernel->setKernelArgHandler(0, &Kernel::setArgAccelerator); retVal = pSourceKernel->setArg(0, sizeof(cl_accelerator_intel), &accelerator); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pSourceKernel->getKernelArguments().size()); EXPECT_EQ(Kernel::ACCELERATOR_OBJ, pSourceKernel->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel->getKernelArgInfo(0).isPatched); retVal = pClonedKernel->cloneKernel(pSourceKernel); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pSourceKernel->getKernelArguments().size(), pClonedKernel->getKernelArguments().size()); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).type, pClonedKernel->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).object, pClonedKernel->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).value, pClonedKernel->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).size, pClonedKernel->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel->getPatchedArgumentsNum(), pClonedKernel->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).isPatched, pClonedKernel->getKernelArgInfo(0).isPatched); auto crossThreadData = reinterpret_cast(pClonedKernel->getCrossThreadData()); const auto &argInfo = pClonedKernel->getKernelInfo().kernelArgInfo[0]; uint32_t *pMbBlockType = ptrOffset(crossThreadData, argInfo.offsetVmeMbBlockType); EXPECT_EQ(desc.mb_block_type, *pMbBlockType); uint32_t *pSubpixelMode = ptrOffset(crossThreadData, argInfo.offsetVmeSubpixelMode); EXPECT_EQ(desc.subpixel_mode, *pSubpixelMode); uint32_t *pSadAdjustMode = ptrOffset(crossThreadData, argInfo.offsetVmeSadAdjustMode); EXPECT_EQ(desc.sad_adjust_mode, *pSadAdjustMode); uint32_t *pSearchPathType = ptrOffset(crossThreadData, argInfo.offsetVmeSearchPathType); EXPECT_EQ(desc.search_path_type, *pSearchPathType); retVal = clReleaseAcceleratorINTEL(accelerator); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(CloneKernelTest, cloneKernelWithArgSampler) { std::unique_ptr sampler(new MockSampler(pContext, true, (cl_addressing_mode)CL_ADDRESS_MIRRORED_REPEAT, (cl_filter_mode)CL_FILTER_NEAREST)); uint32_t objectId = SAMPLER_OBJECT_ID_SHIFT + pKernelInfo->kernelArgInfo[0].offsetHeap; cl_sampler samplerObj = sampler.get(); pSourceKernel->setKernelArgHandler(0, &Kernel::setArgSampler); pClonedKernel->setKernelArgHandler(0, &Kernel::setArgSampler); retVal = pSourceKernel->setArg(0, sizeof(cl_sampler), &samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pSourceKernel->getKernelArguments().size()); EXPECT_EQ(Kernel::SAMPLER_OBJ, pSourceKernel->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel->getKernelArgInfo(0).isPatched); retVal = pClonedKernel->cloneKernel(pSourceKernel); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pSourceKernel->getKernelArguments().size(), pClonedKernel->getKernelArguments().size()); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).type, pClonedKernel->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).object, pClonedKernel->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).value, pClonedKernel->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).size, pClonedKernel->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel->getPatchedArgumentsNum(), pClonedKernel->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).isPatched, pClonedKernel->getKernelArgInfo(0).isPatched); auto crossThreadData = reinterpret_cast(pClonedKernel->getCrossThreadData()); EXPECT_EQ(objectId, *crossThreadData); const auto &argInfo = pClonedKernel->getKernelInfo().kernelArgInfo[0]; auto pSnapWa = ptrOffset(crossThreadData, argInfo.offsetSamplerSnapWa); EXPECT_EQ(sampler->getSnapWaValue(), *pSnapWa); auto pAddressingMode = ptrOffset(crossThreadData, argInfo.offsetSamplerAddressingMode); EXPECT_EQ(GetAddrModeEnum(sampler->addressingMode), *pAddressingMode); auto pNormalizedCoords = ptrOffset(crossThreadData, argInfo.offsetSamplerNormalizedCoords); EXPECT_EQ(GetNormCoordsEnum(sampler->normalizedCoordinates), *pNormalizedCoords); } HWCMDTEST_F(IGFX_GEN8_CORE, CloneKernelTest, cloneKernelWithArgDeviceQueue) { cl_queue_properties queueProps[5] = { CL_QUEUE_PROPERTIES, CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0, 0, 0}; MockDeviceQueueHw mockDevQueue(pContext, pClDevice, queueProps[0]); auto clDeviceQueue = static_cast(&mockDevQueue); pSourceKernel->setKernelArgHandler(0, &Kernel::setArgDevQueue); pClonedKernel->setKernelArgHandler(0, &Kernel::setArgDevQueue); retVal = pSourceKernel->setArg(0, sizeof(cl_command_queue), &clDeviceQueue); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pSourceKernel->getKernelArguments().size()); EXPECT_EQ(Kernel::DEVICE_QUEUE_OBJ, pSourceKernel->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel->getKernelArgInfo(0).isPatched); retVal = pClonedKernel->cloneKernel(pSourceKernel); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pSourceKernel->getKernelArguments().size(), pClonedKernel->getKernelArguments().size()); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).type, pClonedKernel->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).object, pClonedKernel->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).value, pClonedKernel->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).size, pClonedKernel->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel->getPatchedArgumentsNum(), pClonedKernel->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).isPatched, pClonedKernel->getKernelArgInfo(0).isPatched); auto pKernelArg = (uintptr_t *)(pClonedKernel->getCrossThreadData() + pClonedKernel->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(static_cast(mockDevQueue.getQueueBuffer()->getGpuAddressToPatch()), *pKernelArg); } TEST_F(CloneKernelTest, cloneKernelWithArgSvm) { char *svmPtr = new char[256]; retVal = pSourceKernel->setArgSvm(0, 256, svmPtr, nullptr, 0u); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pSourceKernel->getKernelArguments().size()); EXPECT_EQ(Kernel::SVM_OBJ, pSourceKernel->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel->getKernelArgInfo(0).isPatched); retVal = pClonedKernel->cloneKernel(pSourceKernel); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pSourceKernel->getKernelArguments().size(), pClonedKernel->getKernelArguments().size()); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).type, pClonedKernel->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).object, pClonedKernel->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).value, pClonedKernel->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).size, pClonedKernel->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel->getPatchedArgumentsNum(), pClonedKernel->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).isPatched, pClonedKernel->getKernelArgInfo(0).isPatched); auto pKernelArg = (void **)(pClonedKernel->getCrossThreadData() + pClonedKernel->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(svmPtr, *pKernelArg); delete[] svmPtr; } TEST_F(CloneKernelTest, cloneKernelWithArgSvmAlloc) { char *svmPtr = new char[256]; MockGraphicsAllocation svmAlloc(svmPtr, 256); retVal = pSourceKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pSourceKernel->getKernelArguments().size()); EXPECT_EQ(Kernel::SVM_ALLOC_OBJ, pSourceKernel->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel->getKernelArgInfo(0).isPatched); retVal = pClonedKernel->cloneKernel(pSourceKernel); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pSourceKernel->getKernelArguments().size(), pClonedKernel->getKernelArguments().size()); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).type, pClonedKernel->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).object, pClonedKernel->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).value, pClonedKernel->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).size, pClonedKernel->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel->getPatchedArgumentsNum(), pClonedKernel->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).isPatched, pClonedKernel->getKernelArgInfo(0).isPatched); auto pKernelArg = (void **)(pClonedKernel->getCrossThreadData() + pClonedKernel->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(svmPtr, *pKernelArg); delete[] svmPtr; } TEST_F(CloneKernelTest, cloneKernelWithArgImmediate) { using TypeParam = unsigned long; auto value = (TypeParam)0xAA55AA55UL; retVal = pSourceKernel->setArg(0, sizeof(TypeParam), &value); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pSourceKernel->getKernelArguments().size()); EXPECT_EQ(Kernel::NONE_OBJ, pSourceKernel->getKernelArgInfo(0).type); EXPECT_NE(0u, pSourceKernel->getKernelArgInfo(0).size); EXPECT_EQ(1u, pSourceKernel->getPatchedArgumentsNum()); EXPECT_TRUE(pSourceKernel->getKernelArgInfo(0).isPatched); retVal = pClonedKernel->cloneKernel(pSourceKernel); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pSourceKernel->getKernelArguments().size(), pClonedKernel->getKernelArguments().size()); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).type, pClonedKernel->getKernelArgInfo(0).type); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).object, pClonedKernel->getKernelArgInfo(0).object); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).value, pClonedKernel->getKernelArgInfo(0).value); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).size, pClonedKernel->getKernelArgInfo(0).size); EXPECT_EQ(pSourceKernel->getPatchedArgumentsNum(), pClonedKernel->getPatchedArgumentsNum()); EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).isPatched, pClonedKernel->getKernelArgInfo(0).isPatched); auto pKernelArg = (TypeParam *)(pClonedKernel->getCrossThreadData() + pClonedKernel->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(value, *pKernelArg); } TEST_F(CloneKernelTest, cloneKernelWithExecInfo) { if (!pDevice->getHardwareInfo().capabilityTable.ftrSvm) { GTEST_SKIP(); } void *ptrSVM = pContext->getSVMAllocsManager()->createSVMAlloc(pDevice->getRootDeviceIndex(), 256, {}); ASSERT_NE(nullptr, ptrSVM); auto svmData = pContext->getSVMAllocsManager()->getSVMAlloc(ptrSVM); ASSERT_NE(nullptr, svmData); GraphicsAllocation *pSvmAlloc = svmData->gpuAllocation; ASSERT_NE(nullptr, pSvmAlloc); pSourceKernel->setSvmKernelExecInfo(pSvmAlloc); EXPECT_EQ(1u, pSourceKernel->kernelSvmGfxAllocations.size()); retVal = pClonedKernel->cloneKernel(pSourceKernel); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pSourceKernel->kernelSvmGfxAllocations.size(), pClonedKernel->kernelSvmGfxAllocations.size()); EXPECT_EQ(pSourceKernel->kernelSvmGfxAllocations.at(0), pClonedKernel->kernelSvmGfxAllocations.at(0)); pContext->getSVMAllocsManager()->freeSVMAlloc(ptrSVM); } TEST_F(CloneKernelTest, givenBuiltinSourceKernelWhenCloningThenSetBuiltinFlagToClonedKernel) { pSourceKernel->isBuiltIn = true; pClonedKernel->cloneKernel(pSourceKernel); EXPECT_TRUE(pClonedKernel->isBuiltIn); } compute-runtime-20.13.16352/opencl/test/unit_test/kernel/debug_kernel_tests.cpp000066400000000000000000000042531363734646600274720ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/execution_model_kernel_fixture.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" #include using namespace NEO; TEST(DebugKernelTest, givenKernelCompiledForDebuggingWhenGetDebugSurfaceBtiIsCalledThenCorrectValueIsReturned) { auto device = std::make_unique(new MockDevice); MockProgram program(*device->getExecutionEnvironment()); program.enableKernelDebug(); std::unique_ptr kernel(MockKernel::create(device->getDevice(), &program)); EXPECT_EQ(0, kernel->getDebugSurfaceBti()); } TEST(DebugKernelTest, givenKernelCompiledForDebuggingWhenGetPerThreadSystemThreadSurfaceSizeIsCalledThenCorrectValueIsReturned) { auto device = std::make_unique(new MockDevice); MockProgram program(*device->getExecutionEnvironment()); program.enableKernelDebug(); std::unique_ptr kernel(MockKernel::create(device->getDevice(), &program)); EXPECT_EQ(MockDebugKernel::perThreadSystemThreadSurfaceSize, kernel->getPerThreadSystemThreadSurfaceSize()); } TEST(DebugKernelTest, givenKernelWithoutDebugFlagWhenGetDebugSurfaceBtiIsCalledThenInvalidIndexValueIsReturned) { auto device = std::make_unique(new MockDevice); MockProgram program(*device->getExecutionEnvironment()); program.enableKernelDebug(); std::unique_ptr kernel(MockKernel::create(device->getDevice(), &program)); EXPECT_EQ(-1, kernel->getDebugSurfaceBti()); } TEST(DebugKernelTest, givenKernelWithoutDebugFlagWhenGetPerThreadSystemThreadSurfaceSizeIsCalledThenZeroIsReturned) { auto device = std::make_unique(new MockDevice); MockProgram program(*device->getExecutionEnvironment()); program.enableKernelDebug(); std::unique_ptr kernel(MockKernel::create(device->getDevice(), &program)); EXPECT_EQ(0u, kernel->getPerThreadSystemThreadSurfaceSize()); } compute-runtime-20.13.16352/opencl/test/unit_test/kernel/gl/000077500000000000000000000000001363734646600235145ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/kernel/gl/kernel_arg_buffer_tests.cpp000066400000000000000000000045531363734646600311130ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/sharings/gl/gl_buffer.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/kernel/kernel_arg_buffer_fixture.h" #include "opencl/test/unit_test/mocks/gl/mock_gl_sharing.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" #include "CL/cl.h" #include "config.h" #include "gtest/gtest.h" #include using namespace NEO; typedef Test KernelArgBufferTest; TEST_F(KernelArgBufferTest, givenSharedBufferWhenSetArgIsCalledThenReportSharedObjUsage) { MockGlSharing glSharing; glSharing.uploadDataToBufferInfo(1, 0); pContext->setSharingFunctions(new GlSharingFunctionsMock()); auto sharedBuffer = GlBuffer::createSharedGlBuffer(pContext, CL_MEM_READ_WRITE, 1); auto nonSharedBuffer = new MockBuffer; auto sharedMem = static_cast(sharedBuffer); auto nonSharedMem = static_cast(nonSharedBuffer); EXPECT_FALSE(pKernel->isUsingSharedObjArgs()); this->pKernel->setArg(0, sizeof(cl_mem *), &nonSharedMem); EXPECT_FALSE(pKernel->isUsingSharedObjArgs()); this->pKernel->setArg(0, sizeof(cl_mem *), &sharedMem); EXPECT_TRUE(pKernel->isUsingSharedObjArgs()); delete nonSharedBuffer; delete sharedBuffer; } HWTEST_F(KernelArgBufferTest, givenSharedBufferWhenSetArgStatefulIsCalledThenBufferSurfaceShouldBeUsed) { MockGlSharing glSharing; glSharing.uploadDataToBufferInfo(1, 0); pContext->setSharingFunctions(new GlSharingFunctionsMock()); auto sharedBuffer = GlBuffer::createSharedGlBuffer(pContext, CL_MEM_READ_WRITE, 1); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; sharedBuffer->setArgStateful(&surfaceState); auto surfType = surfaceState.getSurfaceType(); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER, surfType); delete sharedBuffer; } compute-runtime-20.13.16352/opencl/test/unit_test/kernel/image_transformer_tests.cpp000066400000000000000000000202011363734646600305370ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/kernel/image_transformer.h" #include "opencl/source/program/kernel_info.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "test.h" using namespace NEO; class ImageTransformerTest : public ::testing::Test { public: void SetUp() override { using SimpleKernelArgInfo = Kernel::SimpleKernelArgInfo; pKernelInfo = std::make_unique(); pKernelInfo->kernelArgInfo.resize(2); pKernelInfo->kernelArgInfo[0].isTransformable = true; pKernelInfo->kernelArgInfo[0].offsetHeap = firstImageOffset; pKernelInfo->kernelArgInfo[1].isTransformable = true; pKernelInfo->kernelArgInfo[1].offsetHeap = secondImageOffset; image1.reset(Image3dHelper<>::create(&context)); image2.reset(Image3dHelper<>::create(&context)); SimpleKernelArgInfo imageArg1; SimpleKernelArgInfo imageArg2; clImage1 = static_cast(image2.get()); clImage2 = static_cast(image2.get()); imageArg1.value = &clImage1; imageArg1.object = clImage1; imageArg2.value = &clImage2; imageArg2.object = clImage2; kernelArguments.push_back(imageArg1); kernelArguments.push_back(imageArg2); } const int firstImageOffset = 0x20; const int secondImageOffset = 0x40; std::unique_ptr pKernelInfo; ImageTransformer imageTransformer; MockContext context; std::unique_ptr image1; std::unique_ptr image2; cl_mem clImage1; cl_mem clImage2; char ssh[0x80]; std::vector kernelArguments; }; TEST_F(ImageTransformerTest, givenImageTransformerWhenRegisterImage3dThenTransformerHasRegisteredImages3d) { bool retVal; retVal = imageTransformer.hasRegisteredImages3d(); EXPECT_FALSE(retVal); imageTransformer.registerImage3d(0); retVal = imageTransformer.hasRegisteredImages3d(); EXPECT_TRUE(retVal); } TEST_F(ImageTransformerTest, givenImageTransformerWhenTransformToImage2dArrayThenTransformerDidTransform) { bool retVal; retVal = imageTransformer.didTransform(); EXPECT_FALSE(retVal); imageTransformer.transformImagesTo2dArray(*pKernelInfo, kernelArguments, nullptr); retVal = imageTransformer.didTransform(); EXPECT_TRUE(retVal); } TEST_F(ImageTransformerTest, givenImageTransformerWhenTransformToImage3dThenTransformerDidNotTransform) { bool retVal; retVal = imageTransformer.didTransform(); EXPECT_FALSE(retVal); imageTransformer.transformImagesTo2dArray(*pKernelInfo, kernelArguments, nullptr); imageTransformer.transformImagesTo3d(*pKernelInfo, kernelArguments, nullptr); retVal = imageTransformer.didTransform(); EXPECT_FALSE(retVal); } HWTEST_F(ImageTransformerTest, givenImageTransformerWhenTransformToImage2dArrayThenTransformOnlyRegisteredImages) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); firstSurfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL); secondSurfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL); firstSurfaceState->setSurfaceArray(false); secondSurfaceState->setSurfaceArray(false); imageTransformer.registerImage3d(1); imageTransformer.transformImagesTo2dArray(*pKernelInfo, kernelArguments, ssh); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL, firstSurfaceState->getSurfaceType()); EXPECT_FALSE(firstSurfaceState->getSurfaceArray()); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, secondSurfaceState->getSurfaceType()); EXPECT_TRUE(secondSurfaceState->getSurfaceArray()); } HWTEST_F(ImageTransformerTest, givenImageTransformerWhenTransformToImage2dArrayThenTransformOnlyTransformableImages) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; pKernelInfo->kernelArgInfo[1].isTransformable = false; auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); firstSurfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL); secondSurfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL); firstSurfaceState->setSurfaceArray(false); secondSurfaceState->setSurfaceArray(false); imageTransformer.registerImage3d(0); imageTransformer.registerImage3d(1); imageTransformer.transformImagesTo2dArray(*pKernelInfo, kernelArguments, ssh); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, firstSurfaceState->getSurfaceType()); EXPECT_TRUE(firstSurfaceState->getSurfaceArray()); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL, secondSurfaceState->getSurfaceType()); EXPECT_FALSE(secondSurfaceState->getSurfaceArray()); } HWTEST_F(ImageTransformerTest, givenImageTransformerWhenTransformToImage3dhenTransformAllRegisteredImages) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; pKernelInfo->kernelArgInfo[1].isTransformable = false; auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); firstSurfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL); secondSurfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL); firstSurfaceState->setSurfaceArray(true); secondSurfaceState->setSurfaceArray(true); imageTransformer.registerImage3d(0); imageTransformer.registerImage3d(1); imageTransformer.transformImagesTo3d(*pKernelInfo, kernelArguments, ssh); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D, firstSurfaceState->getSurfaceType()); EXPECT_FALSE(firstSurfaceState->getSurfaceArray()); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D, secondSurfaceState->getSurfaceType()); EXPECT_FALSE(secondSurfaceState->getSurfaceArray()); } HWTEST_F(ImageTransformerTest, givenImageTransformerWhenTransformToImage3dhenTransformOnlyRegisteredImages) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; pKernelInfo->kernelArgInfo[1].isTransformable = false; auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); firstSurfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL); secondSurfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL); firstSurfaceState->setSurfaceArray(true); secondSurfaceState->setSurfaceArray(true); imageTransformer.registerImage3d(1); imageTransformer.transformImagesTo3d(*pKernelInfo, kernelArguments, ssh); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL, firstSurfaceState->getSurfaceType()); EXPECT_TRUE(firstSurfaceState->getSurfaceArray()); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D, secondSurfaceState->getSurfaceType()); EXPECT_FALSE(secondSurfaceState->getSurfaceArray()); } class MockImageTransformer : public ImageTransformer { public: using ImageTransformer::argIndexes; }; TEST(ImageTransformerRegisterImageTest, givenImageTransformerWhenRegisterTheSameImageTwiceThenAppendOnlyOne) { MockImageTransformer transformer; EXPECT_EQ(0u, transformer.argIndexes.size()); transformer.registerImage3d(0); EXPECT_EQ(1u, transformer.argIndexes.size()); transformer.registerImage3d(0); EXPECT_EQ(1u, transformer.argIndexes.size()); transformer.registerImage3d(1); EXPECT_EQ(2u, transformer.argIndexes.size()); } compute-runtime-20.13.16352/opencl/test/unit_test/kernel/kernel_accelerator_arg_tests.cpp000066400000000000000000000131141363734646600315150ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/accelerators/intel_accelerator.h" #include "opencl/source/accelerators/intel_motion_estimation.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" #include "CL/cl.h" #include "gtest/gtest.h" #include using namespace NEO; class KernelArgAcceleratorFixture : public ContextFixture, public DeviceFixture { using ContextFixture::SetUp; public: KernelArgAcceleratorFixture() { } protected: void SetUp() { desc = { CL_ME_MB_TYPE_4x4_INTEL, CL_ME_SUBPIXEL_MODE_QPEL_INTEL, CL_ME_SAD_ADJUST_MODE_HAAR_INTEL, CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL}; DeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); pKernelInfo = std::make_unique(); KernelArgPatchInfo kernelArgPatchInfo; pKernelInfo->kernelArgInfo.resize(1); pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[0].samplerArgumentType = iOpenCL::SAMPLER_OBJECT_VME; pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = 0x20; pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size = (uint32_t)sizeof(uint32_t); pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size = 1; pKernelInfo->kernelArgInfo[0].offsetVmeMbBlockType = 0x04; pKernelInfo->kernelArgInfo[0].offsetVmeSubpixelMode = 0x0c; pKernelInfo->kernelArgInfo[0].offsetVmeSadAdjustMode = 0x14; pKernelInfo->kernelArgInfo[0].offsetVmeSearchPathType = 0x1c; pProgram = new MockProgram(*pDevice->getExecutionEnvironment(), pContext, false, nullptr); pKernel = new MockKernel(pProgram, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setKernelArgHandler(0, &Kernel::setArgAccelerator); pCrossThreadData[0x04] = desc.mb_block_type; pCrossThreadData[0x0c] = desc.subpixel_mode; pCrossThreadData[0x14] = desc.sad_adjust_mode; pCrossThreadData[0x1c] = desc.sad_adjust_mode; pKernel->setCrossThreadData(&pCrossThreadData[0], sizeof(pCrossThreadData)); } void TearDown() override { delete pKernel; delete pProgram; ContextFixture::TearDown(); DeviceFixture::TearDown(); } cl_motion_estimation_desc_intel desc; MockProgram *pProgram = nullptr; MockKernel *pKernel = nullptr; std::unique_ptr pKernelInfo; char pCrossThreadData[64]; }; typedef Test KernelArgAcceleratorTest; TEST_F(KernelArgAcceleratorTest, SetKernelArgValidAccelerator) { cl_int status; cl_accelerator_intel accelerator = VmeAccelerator::create( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(desc), &desc, status); ASSERT_EQ(CL_SUCCESS, status); ASSERT_NE(nullptr, accelerator); status = this->pKernel->setArg(0, sizeof(cl_accelerator_intel), &accelerator); ASSERT_EQ(CL_SUCCESS, status); char *crossThreadData = pKernel->getCrossThreadData(); const auto &arginfo = pKernelInfo->kernelArgInfo[0]; uint32_t *pMbBlockType = ptrOffset(reinterpret_cast(crossThreadData), arginfo.offsetVmeMbBlockType); EXPECT_EQ(desc.mb_block_type, *pMbBlockType); uint32_t *pSubpixelMode = ptrOffset(reinterpret_cast(crossThreadData), arginfo.offsetVmeSubpixelMode); EXPECT_EQ(desc.subpixel_mode, *pSubpixelMode); uint32_t *pSadAdjustMode = ptrOffset(reinterpret_cast(crossThreadData), arginfo.offsetVmeSadAdjustMode); EXPECT_EQ(desc.sad_adjust_mode, *pSadAdjustMode); uint32_t *pSearchPathType = ptrOffset(reinterpret_cast(crossThreadData), arginfo.offsetVmeSearchPathType); EXPECT_EQ(desc.search_path_type, *pSearchPathType); status = clReleaseAcceleratorINTEL(accelerator); EXPECT_EQ(CL_SUCCESS, status); } TEST_F(KernelArgAcceleratorTest, SetKernelArgNullAccelerator) { cl_int status; status = this->pKernel->setArg(0, sizeof(cl_accelerator_intel), nullptr); ASSERT_EQ(CL_INVALID_ARG_VALUE, status); } TEST_F(KernelArgAcceleratorTest, SetKernelArgInvalidSize) { cl_int status; cl_accelerator_intel accelerator = VmeAccelerator::create( pContext, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL, sizeof(desc), &desc, status); ASSERT_EQ(CL_SUCCESS, status); ASSERT_NE(nullptr, accelerator); status = this->pKernel->setArg(0, sizeof(cl_accelerator_intel) - 1, accelerator); ASSERT_EQ(CL_INVALID_ARG_SIZE, status); status = clReleaseAcceleratorINTEL(accelerator); EXPECT_EQ(CL_SUCCESS, status); } TEST_F(KernelArgAcceleratorTest, SetKernelArgInvalidAccelerator) { cl_int status; const void *notAnAccelerator = static_cast(pKernel); status = this->pKernel->setArg(0, sizeof(cl_accelerator_intel), notAnAccelerator); ASSERT_EQ(CL_INVALID_ARG_VALUE, status); } compute-runtime-20.13.16352/opencl/test/unit_test/kernel/kernel_arg_buffer_fixture.cpp000066400000000000000000000037311363734646600310320ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "kernel_arg_buffer_fixture.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" #include "CL/cl.h" #include "gtest/gtest.h" #include using namespace NEO; void KernelArgBufferFixture::SetUp() { DeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); // define kernel info pKernelInfo = std::make_unique(); // setup kernel arg offsets KernelArgPatchInfo kernelArgPatchInfo; kernelHeader.SurfaceStateHeapSize = sizeof(pSshLocal); pKernelInfo->heapInfo.pSsh = pSshLocal; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; pKernelInfo->kernelArgInfo.resize(1); pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = 0x30; pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size = (uint32_t)sizeof(void *); pProgram = new MockProgram(*pDevice->getExecutionEnvironment(), pContext, false, nullptr); pKernel = new MockKernel(pProgram, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); pKernel->setKernelArgHandler(0, &Kernel::setArgBuffer); } void KernelArgBufferFixture::TearDown() { delete pKernel; delete pProgram; ContextFixture::TearDown(); DeviceFixture::TearDown(); } compute-runtime-20.13.16352/opencl/test/unit_test/kernel/kernel_arg_buffer_fixture.h000066400000000000000000000020471363734646600304760ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" #include "CL/cl.h" #include "gtest/gtest.h" #include using namespace NEO; class KernelArgBufferFixture : public ContextFixture, public DeviceFixture { using ContextFixture::SetUp; public: void SetUp(); void TearDown() override; cl_int retVal = CL_SUCCESS; MockProgram *pProgram = nullptr; MockKernel *pKernel = nullptr; std::unique_ptr pKernelInfo = nullptr; SKernelBinaryHeaderCommon kernelHeader{}; char pSshLocal[64]{}; char pCrossThreadData[64]{}; }; compute-runtime-20.13.16352/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp000066400000000000000000000156601363734646600305120ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/kernel/kernel_arg_buffer_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" #include "CL/cl.h" #include "gtest/gtest.h" #include using namespace NEO; typedef Test KernelArgBufferTest; TEST_F(KernelArgBufferTest, SetKernelArgValidBuffer) { Buffer *buffer = new MockBuffer(); auto val = (cl_mem)buffer; auto pVal = &val; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); auto pKernelArg = (cl_mem **)(this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(buffer->getCpuAddress(), *pKernelArg); delete buffer; } TEST_F(KernelArgBufferTest, SetKernelArgValidSvmPtrStateless) { Buffer *buffer = new MockBuffer(); auto val = (cl_mem)buffer; auto pVal = &val; pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(pKernel->requiresCoherency()); EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); delete buffer; } HWTEST_F(KernelArgBufferTest, SetKernelArgValidSvmPtrStateful) { Buffer *buffer = new MockBuffer(); auto val = (cl_mem)buffer; auto pVal = &val; pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(pKernel->requiresCoherency()); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(buffer->getGraphicsAllocation()->getGpuAddress(), surfaceAddress); delete buffer; } HWTEST_F(KernelArgBufferTest, SetKernelArgBufferFromSvmPtr) { Buffer *buffer = new MockBuffer(); buffer->getGraphicsAllocation()->setCoherent(true); auto val = (cl_mem)buffer; auto pVal = &val; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(pKernel->requiresCoherency()); delete buffer; } TEST_F(KernelArgBufferTest, SetKernelArgFakeBuffer) { char *ptr = new char[sizeof(Buffer)]; auto val = (cl_mem *)ptr; auto pVal = &val; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); delete[] ptr; } TEST_F(KernelArgBufferTest, SetKernelArgPtrToNull) { auto val = (cl_mem *)nullptr; auto pVal = &val; this->pKernel->setArg(0, sizeof(cl_mem *), pVal); auto pKernelArg = (cl_mem **)(this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(nullptr, *pKernelArg); } TEST_F(KernelArgBufferTest, given32BitDeviceWhenArgPtrPassedIsNullThenOnly4BytesAreBeingPatched) { auto val = (cl_mem *)nullptr; auto pVal = &val; this->pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size = 4; auto pKernelArg64bit = (uint64_t *)(this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); uint32_t *pKernelArg32bit = (uint32_t *)pKernelArg64bit; *pKernelArg64bit = 0xffffffffffffffff; this->pKernel->setArg(0, sizeof(cl_mem *), pVal); uint64_t expValue = 0u; EXPECT_EQ(0u, *pKernelArg32bit); EXPECT_NE(expValue, *pKernelArg64bit); } TEST_F(KernelArgBufferTest, SetKernelArgNull) { auto pVal = nullptr; this->pKernel->setArg(0, sizeof(cl_mem *), pVal); auto pKernelArg = (cl_mem **)(this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(nullptr, *pKernelArg); } TEST_F(KernelArgBufferTest, given32BitDeviceWhenArgPassedIsNullThenOnly4BytesAreBeingPatched) { auto pVal = nullptr; this->pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size = 4; auto pKernelArg64bit = (uint64_t *)(this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); *pKernelArg64bit = 0xffffffffffffffff; uint32_t *pKernelArg32bit = (uint32_t *)pKernelArg64bit; this->pKernel->setArg(0, sizeof(cl_mem *), pVal); uint64_t expValue = 0u; EXPECT_EQ(0u, *pKernelArg32bit); EXPECT_NE(expValue, *pKernelArg64bit); } TEST_F(KernelArgBufferTest, givenWritableBufferWhenSettingAsArgThenDoNotExpectAllocationInCacheFlushVector) { auto buffer = std::make_unique(); buffer->mockGfxAllocation.setMemObjectsAllocationWithWritableFlags(true); buffer->mockGfxAllocation.setFlushL3Required(false); auto val = static_cast(buffer.get()); auto pVal = &val; auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]); } TEST_F(KernelArgBufferTest, givenCacheFlushBufferWhenSettingAsArgThenExpectAllocationInCacheFlushVector) { auto buffer = std::make_unique(); buffer->mockGfxAllocation.setMemObjectsAllocationWithWritableFlags(false); buffer->mockGfxAllocation.setFlushL3Required(true); auto val = static_cast(buffer.get()); auto pVal = &val; auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(&buffer->mockGfxAllocation, pKernel->kernelArgRequiresCacheFlush[0]); } TEST_F(KernelArgBufferTest, givenNoCacheFlushBufferWhenSettingAsArgThenNotExpectAllocationInCacheFlushVector) { auto buffer = std::make_unique(); buffer->mockGfxAllocation.setMemObjectsAllocationWithWritableFlags(false); buffer->mockGfxAllocation.setFlushL3Required(false); auto val = static_cast(buffer.get()); auto pVal = &val; auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]); } compute-runtime-20.13.16352/opencl/test/unit_test/kernel/kernel_arg_dev_queue_tests.cpp000066400000000000000000000120611363734646600312130ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" using namespace NEO; using namespace DeviceHostQueue; struct KernelArgDevQueueTest : public DeviceFixture, public DeviceHostQueueFixture { protected: void SetUp() override { DeviceFixture::SetUp(); DeviceHostQueueFixture::SetUp(); if (!this->pDevice->getHardwareInfo().capabilityTable.supportsDeviceEnqueue) { GTEST_SKIP(); } pDeviceQueue = createQueueObject(); pKernelInfo = std::make_unique(); pKernelInfo->kernelArgInfo.resize(1); pKernelInfo->kernelArgInfo[0].isDeviceQueue = true; kernelArgPatchInfo.crossthreadOffset = 0x4; kernelArgPatchInfo.size = 0x4; kernelArgPatchInfo.sourceOffset = 0; pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); program = std::make_unique(*pDevice->getExecutionEnvironment()); pKernel = new MockKernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); uint8_t pCrossThreadData[crossThreadDataSize]; memset(pCrossThreadData, crossThreadDataInit, sizeof(pCrossThreadData)); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); } void TearDown() override { delete pKernel; delete pDeviceQueue; DeviceHostQueueFixture::TearDown(); DeviceFixture::TearDown(); } bool crossThreadDataUnchanged() { for (uint32_t i = 0; i < crossThreadDataSize; i++) { if (pKernel->mockCrossThreadData[i] != crossThreadDataInit) { return false; } } return true; } static const uint32_t crossThreadDataSize = 0x10; static const char crossThreadDataInit = 0x7e; std::unique_ptr program; DeviceQueue *pDeviceQueue = nullptr; MockKernel *pKernel = nullptr; std::unique_ptr pKernelInfo; KernelArgPatchInfo kernelArgPatchInfo; }; HWCMDTEST_F(IGFX_GEN8_CORE, KernelArgDevQueueTest, GIVENkernelWithDevQueueArgWHENsetArgHandleTHENsetsProperHandle) { EXPECT_EQ(pKernel->kernelArgHandlers[0], &Kernel::setArgDevQueue); } HWCMDTEST_F(IGFX_GEN8_CORE, KernelArgDevQueueTest, GIVENdevQueueArgHandlerWHENpassDevQueueTHENacceptObjAndPatch) { auto clDeviceQueue = static_cast(pDeviceQueue); auto ret = pKernel->setArgDevQueue(0, sizeof(cl_command_queue), &clDeviceQueue); EXPECT_EQ(ret, CL_SUCCESS); auto gpuAddress = static_cast(pDeviceQueue->getQueueBuffer()->getGpuAddressToPatch()); auto patchLocation = ptrOffset(pKernel->mockCrossThreadData.data(), kernelArgPatchInfo.crossthreadOffset); EXPECT_EQ(*(reinterpret_cast(patchLocation)), gpuAddress); } HWCMDTEST_F(IGFX_GEN8_CORE, KernelArgDevQueueTest, GIVENdevQueueArgHandlerWHENpassNormalQueueTHENrejectObjAndReturnError) { auto clCmdQueue = static_cast(pCommandQueue); auto ret = pKernel->setArgDevQueue(0, sizeof(cl_command_queue), &clCmdQueue); EXPECT_EQ(ret, CL_INVALID_DEVICE_QUEUE); EXPECT_EQ(crossThreadDataUnchanged(), true); } HWCMDTEST_F(IGFX_GEN8_CORE, KernelArgDevQueueTest, GIVENdevQueueArgHandlerWHENpassNonQueueObjTHENrejectObjAndReturnError) { Buffer *buffer = new MockBuffer(); auto clBuffer = static_cast(buffer); auto ret = pKernel->setArgDevQueue(0, sizeof(cl_command_queue), &clBuffer); EXPECT_EQ(ret, CL_INVALID_DEVICE_QUEUE); EXPECT_EQ(crossThreadDataUnchanged(), true); delete buffer; } HWCMDTEST_F(IGFX_GEN8_CORE, KernelArgDevQueueTest, GIVENdevQueueArgHandlerWHENpassFakeQueueTHENrejectObjAndReturnError) { char *pFakeDeviceQueue = new char[sizeof(DeviceQueue)]; auto clFakeDeviceQueue = reinterpret_cast(pFakeDeviceQueue); auto ret = pKernel->setArgDevQueue(0, sizeof(cl_command_queue), &clFakeDeviceQueue); EXPECT_EQ(ret, CL_INVALID_DEVICE_QUEUE); EXPECT_EQ(crossThreadDataUnchanged(), true); delete[] pFakeDeviceQueue; } HWCMDTEST_F(IGFX_GEN8_CORE, KernelArgDevQueueTest, GIVENdevQueueArgHandlerWHENpassNullptrTHENrejectObjAndReturnError) { auto ret = pKernel->setArgDevQueue(0, sizeof(cl_command_queue), nullptr); EXPECT_EQ(ret, CL_INVALID_ARG_VALUE); EXPECT_EQ(crossThreadDataUnchanged(), true); } HWCMDTEST_F(IGFX_GEN8_CORE, KernelArgDevQueueTest, GIVENdevQueueArgHandlerWHENpassWrongSizeTHENrejectObjAndReturnError) { auto clDeviceQueue = static_cast(pDeviceQueue); auto ret = pKernel->setArgDevQueue(0, sizeof(cl_command_queue) - 1, &clDeviceQueue); EXPECT_EQ(ret, CL_INVALID_ARG_SIZE); EXPECT_EQ(crossThreadDataUnchanged(), true); } compute-runtime-20.13.16352/opencl/test/unit_test/kernel/kernel_arg_info_tests.cpp000066400000000000000000000130641363734646600301700ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/helpers/kernel_binary_helper.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/program/program_tests.h" #include "opencl/test/unit_test/program/program_with_source.h" #include "test.h" using namespace NEO; class KernelArgInfoTest : public ProgramFromSourceTest { public: KernelArgInfoTest() { } ~KernelArgInfoTest() override = default; protected: void SetUp() override { kbHelper = new KernelBinaryHelper("copybuffer", true); ProgramFromSourceTest::SetUp(); ASSERT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); cl_device_id device = pPlatform->getClDevice(0); retVal = pProgram->build( 1, &device, nullptr, nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); // create a kernel pKernel = Kernel::create( pProgram, *pProgram->getKernelInfo(KernelName), &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pKernel); } void TearDown() override { delete pKernel; pKernel = nullptr; ProgramFromSourceTest::TearDown(); delete kbHelper; } template void queryArgInfo(cl_kernel_arg_info paramName, T ¶mValue) { size_t paramValueSize = 0; size_t param_value_size_ret = 0; // get size retVal = pKernel->getArgInfo( 0, paramName, paramValueSize, nullptr, ¶m_value_size_ret); EXPECT_NE(0u, param_value_size_ret); ASSERT_EQ(CL_SUCCESS, retVal); // get the name paramValueSize = param_value_size_ret; retVal = pKernel->getArgInfo( 0, paramName, paramValueSize, ¶mValue, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); } Kernel *pKernel = nullptr; cl_int retVal = CL_SUCCESS; KernelBinaryHelper *kbHelper = nullptr; }; TEST_P(KernelArgInfoTest, Create_Simple) { // included in the setup of fixture } TEST_P(KernelArgInfoTest, WhenQueryingWithNullptrKernelNameTheniReturnNullptr) { auto kernelInfo = this->pProgram->getKernelInfo(nullptr); EXPECT_EQ(nullptr, kernelInfo); } TEST_P(KernelArgInfoTest, getKernelArgAcessQualifier) { cl_kernel_arg_access_qualifier param_value = 0; queryArgInfo(CL_KERNEL_ARG_ACCESS_QUALIFIER, param_value); EXPECT_EQ(static_cast(CL_KERNEL_ARG_ACCESS_NONE), param_value); } TEST_P(KernelArgInfoTest, getKernelAddressQulifier) { cl_kernel_arg_address_qualifier param_value = 0; queryArgInfo(CL_KERNEL_ARG_ADDRESS_QUALIFIER, param_value); EXPECT_EQ(static_cast(CL_KERNEL_ARG_ADDRESS_GLOBAL), param_value); } TEST_P(KernelArgInfoTest, getKernelTypeQualifer) { cl_kernel_arg_type_qualifier param_value = 0; queryArgInfo(CL_KERNEL_ARG_TYPE_QUALIFIER, param_value); EXPECT_EQ(static_cast(CL_KERNEL_ARG_TYPE_NONE), param_value); } TEST_P(KernelArgInfoTest, getKernelTypeName) { cl_kernel_arg_info param_name = CL_KERNEL_ARG_TYPE_NAME; char *param_value = nullptr; size_t paramValueSize = 0; size_t param_value_size_ret = 0; // get size retVal = pKernel->getArgInfo( 0, param_name, paramValueSize, nullptr, ¶m_value_size_ret); EXPECT_NE(0u, param_value_size_ret); ASSERT_EQ(CL_SUCCESS, retVal); // allocate space for name param_value = new char[param_value_size_ret]; // get the name paramValueSize = param_value_size_ret; retVal = pKernel->getArgInfo( 0, param_name, paramValueSize, param_value, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); const char expectedString[] = "uint*"; auto result = strncmp(param_value, expectedString, sizeof(expectedString)); EXPECT_EQ(0, result); delete[] param_value; } TEST_P(KernelArgInfoTest, getKernelArgName) { cl_kernel_arg_info param_name = CL_KERNEL_ARG_NAME; char *param_value = nullptr; size_t paramValueSize = 0; size_t param_value_size_ret = 0; // get size retVal = pKernel->getArgInfo( 0, param_name, paramValueSize, nullptr, ¶m_value_size_ret); EXPECT_NE(0u, param_value_size_ret); ASSERT_EQ(CL_SUCCESS, retVal); // allocate space for name param_value = new char[param_value_size_ret]; // get the name paramValueSize = param_value_size_ret; retVal = pKernel->getArgInfo( 0, param_name, paramValueSize, param_value, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, strcmp(param_value, "src")); delete[] param_value; } INSTANTIATE_TEST_CASE_P(KernelArgInfoTests, KernelArgInfoTest, ::testing::Combine( ::testing::ValuesIn(SourceFileNames), ::testing::ValuesIn(BinaryForSourceFileNames), ::testing::ValuesIn(KernelNames))); compute-runtime-20.13.16352/opencl/test/unit_test/kernel/kernel_arg_pipe_tests.cpp000066400000000000000000000136031363734646600301710ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/pipe.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_pipe.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" #include "CL/cl.h" #include "gtest/gtest.h" #include using namespace NEO; class KernelArgPipeFixture : public ContextFixture, public DeviceFixture { using ContextFixture::SetUp; public: KernelArgPipeFixture() { } protected: void SetUp() { DeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); // define kernel info pKernelInfo = std::make_unique(); // setup kernel arg offsets KernelArgPatchInfo kernelArgPatchInfo; kernelHeader.SurfaceStateHeapSize = sizeof(pSshLocal); pKernelInfo->heapInfo.pSsh = pSshLocal; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; pKernelInfo->kernelArgInfo.resize(1); pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = 0x30; pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size = (uint32_t)sizeof(void *); pProgram = new MockProgram(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); pKernel = new MockKernel(pProgram, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); pKernel->setKernelArgHandler(0, &Kernel::setArgPipe); } void TearDown() override { delete pKernel; delete pProgram; ContextFixture::TearDown(); DeviceFixture::TearDown(); } cl_int retVal = CL_SUCCESS; MockProgram *pProgram = nullptr; MockKernel *pKernel = nullptr; std::unique_ptr pKernelInfo; SKernelBinaryHeaderCommon kernelHeader; char pSshLocal[64]; char pCrossThreadData[64]; }; typedef Test KernelArgPipeTest; TEST_F(KernelArgPipeTest, SetKernelArgValidPipe) { Pipe *pipe = new MockPipe(pContext); auto val = (cl_mem)pipe; auto pVal = &val; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); auto pKernelArg = (cl_mem **)(this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(pipe->getCpuAddress(), *pKernelArg); delete pipe; } TEST_F(KernelArgPipeTest, SetKernelArgValidSvmPtrStateless) { Pipe *pipe = new MockPipe(pContext); auto val = (cl_mem)pipe; auto pVal = &val; pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); delete pipe; } HWTEST_F(KernelArgPipeTest, SetKernelArgValidSvmPtrStateful) { Pipe *pipe = new MockPipe(pContext); auto val = (cl_mem)pipe; auto pVal = &val; pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); EXPECT_EQ(pipe->getCpuAddress(), surfaceAddress); delete pipe; } TEST_F(KernelArgPipeTest, SetKernelArgFakePipe) { char *ptr = new char[sizeof(Pipe)]; auto val = (cl_mem *)ptr; auto pVal = &val; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); delete[] ptr; } TEST_F(KernelArgPipeTest, SetKernelArgBufferAsPipe) { Buffer *buffer = new MockBuffer(); auto val = (cl_mem)buffer; auto pVal = &val; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_INVALID_ARG_VALUE, retVal); delete buffer; } TEST_F(KernelArgPipeTest, SetKernelArgPipeFromDifferentContext) { Pipe *pipe = new MockPipe(pContext); Context *oldContext = &pKernel->getContext(); MockContext newContext; this->pKernel->setContext(&newContext); auto val = (cl_mem)pipe; auto pVal = &val; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); pKernel->setContext(oldContext); delete pipe; } TEST_F(KernelArgPipeTest, SetKernelArgInvalidSize) { Pipe *pipe = new MockPipe(pContext); auto val = (cl_mem *)pipe; auto pVal = &val; auto retVal = this->pKernel->setArg(0, 1, pVal); EXPECT_EQ(CL_INVALID_ARG_SIZE, retVal); delete pipe; } TEST_F(KernelArgPipeTest, SetKernelArgPtrToNull) { auto val = (cl_mem *)nullptr; auto pVal = &val; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(KernelArgPipeTest, SetKernelArgNull) { auto pVal = nullptr; auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp000066400000000000000000000506631363734646600300500ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" #include "CL/cl.h" #include "gtest/gtest.h" #include using namespace NEO; class KernelArgSvmFixture_ : public ContextFixture, public DeviceFixture { using ContextFixture::SetUp; public: KernelArgSvmFixture_() { } protected: void SetUp() { DeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); // define kernel info pKernelInfo = std::make_unique(); // setup kernel arg offsets KernelArgPatchInfo kernelArgPatchInfo; kernelHeader.SurfaceStateHeapSize = sizeof(pSshLocal); pKernelInfo->heapInfo.pSsh = pSshLocal; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; pKernelInfo->kernelArgInfo.resize(1); pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = 0x30; pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size = (uint32_t)sizeof(void *); pProgram = new MockProgram(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); pKernel = new MockKernel(pProgram, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); } void TearDown() override { delete pKernel; delete pProgram; ContextFixture::TearDown(); DeviceFixture::TearDown(); } cl_int retVal = CL_SUCCESS; MockProgram *pProgram = nullptr; MockKernel *pKernel = nullptr; std::unique_ptr pKernelInfo; SKernelBinaryHeaderCommon kernelHeader; char pSshLocal[64]; char pCrossThreadData[64]; }; typedef Test KernelArgSvmTest; TEST_F(KernelArgSvmTest, SetKernelArgValidSvmPtr) { char *svmPtr = new char[256]; auto retVal = pKernel->setArgSvm(0, 256, svmPtr, nullptr, 0u); EXPECT_EQ(CL_SUCCESS, retVal); auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(svmPtr, *pKernelArg); delete[] svmPtr; } TEST_F(KernelArgSvmTest, SetKernelArgValidSvmPtrStateless) { char *svmPtr = new char[256]; pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; auto retVal = pKernel->setArgSvm(0, 256, svmPtr, nullptr, 0u); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); delete[] svmPtr; } HWTEST_F(KernelArgSvmTest, SetKernelArgValidSvmPtrStateful) { char *svmPtr = new char[256]; pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; auto retVal = pKernel->setArgSvm(0, 256, svmPtr, nullptr, 0u); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); EXPECT_EQ(svmPtr, surfaceAddress); delete[] svmPtr; } TEST_F(KernelArgSvmTest, SetKernelArgValidSvmAlloc) { char *svmPtr = new char[256]; MockGraphicsAllocation svmAlloc(svmPtr, 256); auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc); EXPECT_EQ(CL_SUCCESS, retVal); auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(svmPtr, *pKernelArg); delete[] svmPtr; } TEST_F(KernelArgSvmTest, SetKernelArgValidSvmAllocStateless) { char *svmPtr = new char[256]; MockGraphicsAllocation svmAlloc(svmPtr, 256); pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); delete[] svmPtr; } HWTEST_F(KernelArgSvmTest, SetKernelArgValidSvmAllocStateful) { char *svmPtr = new char[256]; MockGraphicsAllocation svmAlloc(svmPtr, 256); pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); EXPECT_EQ(svmPtr, surfaceAddress); delete[] svmPtr; } HWTEST_F(KernelArgSvmTest, givenOffsetedSvmPointerWhenSetArgSvmAllocIsCalledThenProperSvmAddressIsPatched) { std::unique_ptr svmPtr(new char[256]); auto offsetedPtr = svmPtr.get() + 4; MockGraphicsAllocation svmAlloc(svmPtr.get(), 256); pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; pKernel->setArgSvmAlloc(0, offsetedPtr, &svmAlloc); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); EXPECT_EQ(offsetedPtr, surfaceAddress); } HWTEST_F(KernelArgSvmTest, givenDeviceSupportingSharedSystemAllocationsWhenSetArgSvmIsCalledWithSurfaceStateThenSizeIsMaxAndAddressIsProgrammed) { this->pClDevice->deviceInfo.sharedSystemMemCapabilities = CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL; auto systemPointer = reinterpret_cast(0xfeedbac); pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; pKernel->setArgSvmAlloc(0, systemPointer, nullptr); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); EXPECT_EQ(systemPointer, surfaceAddress); EXPECT_EQ(128u, surfaceState->getWidth()); EXPECT_EQ(2048u, surfaceState->getDepth()); EXPECT_EQ(16384u, surfaceState->getHeight()); } TEST_F(KernelArgSvmTest, SetKernelArgImmediateInvalidArgValue) { auto retVal = pKernel->setArgImmediate(0, 256, nullptr); EXPECT_EQ(CL_INVALID_ARG_VALUE, retVal); } HWTEST_F(KernelArgSvmTest, PatchWithImplicitSurface) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; constexpr size_t rendSurfSize = sizeof(RENDER_SURFACE_STATE); std::vector svmPtr; svmPtr.resize(256); pKernel->setCrossThreadData(nullptr, sizeof(void *)); pKernel->setSshLocal(nullptr, rendSurfSize); pKernelInfo->requiresSshForBuffers = true; pKernelInfo->usesSsh = true; { MockGraphicsAllocation svmAlloc(svmPtr.data(), svmPtr.size()); SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization patch; memset(&patch, 0, sizeof(patch)); patch.DataParamOffset = 0; patch.DataParamSize = sizeof(void *); patch.SurfaceStateHeapOffset = 0; constexpr size_t patchOffset = 16; void *ptrToPatch = svmPtr.data() + patchOffset; ASSERT_GE(pKernel->getCrossThreadDataSize(), sizeof(void *)); *reinterpret_cast(pKernel->getCrossThreadData()) = 0U; ASSERT_GE(pKernel->getSurfaceStateHeapSize(), rendSurfSize); RENDER_SURFACE_STATE *surfState = reinterpret_cast(pKernel->getSurfaceStateHeap()); memset(surfState, 0, rendSurfSize); pKernel->patchWithImplicitSurface(ptrToPatch, svmAlloc, patch); // verify cross thread data was properly patched EXPECT_EQ(ptrToPatch, *reinterpret_cast(pKernel->getCrossThreadData())); // create surface state for comparison RENDER_SURFACE_STATE expectedSurfaceState; memset(&expectedSurfaceState, 0, rendSurfSize); { void *addressToPatch = svmAlloc.getUnderlyingBuffer(); size_t sizeToPatch = svmAlloc.getUnderlyingBufferSize(); Buffer::setSurfaceState(pDevice, &expectedSurfaceState, sizeToPatch, addressToPatch, 0, &svmAlloc, 0, 0); } // verify ssh was properly patched EXPECT_EQ(0, memcmp(&expectedSurfaceState, surfState, rendSurfSize)); // when cross thread and ssh data is not available then should not do anything pKernel->setCrossThreadData(nullptr, 0); pKernel->setSshLocal(nullptr, 0); pKernel->patchWithImplicitSurface(ptrToPatch, svmAlloc, patch); } } TEST_F(KernelArgSvmTest, patchBufferOffset) { std::vector svmPtr; svmPtr.resize(256); pKernel->setCrossThreadData(nullptr, sizeof(uint32_t)); { constexpr uint32_t initVal = 7U; constexpr uint32_t svmOffset = 13U; MockGraphicsAllocation svmAlloc(svmPtr.data(), 256); uint32_t *expectedPatchPtr = reinterpret_cast(pKernel->getCrossThreadData()); KernelArgInfo kai; void *returnedPtr = nullptr; kai.offsetBufferOffset = static_cast(-1); *expectedPatchPtr = initVal; returnedPtr = pKernel->patchBufferOffset(kai, svmPtr.data(), &svmAlloc); EXPECT_EQ(svmPtr.data(), returnedPtr); EXPECT_EQ(initVal, *expectedPatchPtr); kai.offsetBufferOffset = static_cast(-1); *expectedPatchPtr = initVal; returnedPtr = pKernel->patchBufferOffset(kai, svmPtr.data(), nullptr); EXPECT_EQ(svmPtr.data(), returnedPtr); EXPECT_EQ(initVal, *expectedPatchPtr); kai.offsetBufferOffset = 0U; *expectedPatchPtr = initVal; returnedPtr = pKernel->patchBufferOffset(kai, svmPtr.data(), &svmAlloc); EXPECT_EQ(svmPtr.data(), returnedPtr); EXPECT_EQ(0U, *expectedPatchPtr); kai.offsetBufferOffset = 0U; *expectedPatchPtr = initVal; returnedPtr = pKernel->patchBufferOffset(kai, svmPtr.data() + svmOffset, nullptr); void *expectedPtr = alignDown(svmPtr.data() + svmOffset, 4); // expecting to see DWORD alignment restriction in offset uint32_t expectedOffset = static_cast(ptrDiff(svmPtr.data() + svmOffset, expectedPtr)); EXPECT_EQ(expectedPtr, returnedPtr); EXPECT_EQ(expectedOffset, *expectedPatchPtr); kai.offsetBufferOffset = 0U; *expectedPatchPtr = initVal; returnedPtr = pKernel->patchBufferOffset(kai, svmPtr.data() + svmOffset, &svmAlloc); EXPECT_EQ(svmPtr.data(), returnedPtr); EXPECT_EQ(svmOffset, *expectedPatchPtr); } } template class KernelArgSvmTestTyped : public KernelArgSvmTest { }; struct SetArgHandlerSetArgSvm { static void setArg(Kernel &kernel, uint32_t argNum, void *ptrToPatch, size_t allocSize, GraphicsAllocation &alloc) { kernel.setArgSvm(argNum, allocSize, ptrToPatch, &alloc, 0u); } static constexpr bool supportsOffsets() { return true; } }; struct SetArgHandlerSetArgSvmAlloc { static void setArg(Kernel &kernel, uint32_t argNum, void *ptrToPatch, size_t allocSize, GraphicsAllocation &alloc) { kernel.setArgSvmAlloc(argNum, ptrToPatch, &alloc); } static constexpr bool supportsOffsets() { return true; } }; struct SetArgHandlerSetArgBuffer { static void setArg(Kernel &kernel, uint32_t argNum, void *ptrToPatch, size_t allocSize, GraphicsAllocation &alloc) { MockBuffer mb{alloc}; cl_mem memObj = &mb; kernel.setArgBuffer(argNum, sizeof(cl_mem), &memObj); } static constexpr bool supportsOffsets() { return false; } }; using SetArgHandlers = ::testing::Types; TYPED_TEST_CASE(KernelArgSvmTestTyped, SetArgHandlers); HWTEST_TYPED_TEST(KernelArgSvmTestTyped, GivenBufferKernelArgWhenBufferOffsetIsNeededTheSetArgSetsIt) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; constexpr size_t rendSurfSize = sizeof(RENDER_SURFACE_STATE); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); uint32_t svmSize = MemoryConstants::pageSize; char *svmPtr = reinterpret_cast(alignedMalloc(svmSize, MemoryConstants::pageSize)); KernelArgInfo &kai = this->pKernelInfo->kernelArgInfo[0]; kai.offsetHeap = 0; kai.kernelArgPatchInfoVector[0].sourceOffset = 0; kai.kernelArgPatchInfoVector[0].crossthreadOffset = 0; kai.kernelArgPatchInfoVector[0].size = sizeof(void *); kai.offsetBufferOffset = kai.kernelArgPatchInfoVector[0].size; this->pKernel->setCrossThreadData(nullptr, kai.offsetBufferOffset + sizeof(uint32_t)); this->pKernel->setSshLocal(nullptr, rendSurfSize); this->pKernelInfo->requiresSshForBuffers = true; this->pKernelInfo->usesSsh = true; { MockGraphicsAllocation svmAlloc(svmPtr, svmSize); constexpr size_t patchOffset = 16; void *ptrToPatch = svmPtr + patchOffset; size_t sizeToPatch = svmSize - patchOffset; ASSERT_GE(this->pKernel->getCrossThreadDataSize(), kai.offsetBufferOffset + sizeof(uint32_t)); void **expectedPointerPatchPtr = reinterpret_cast(this->pKernel->getCrossThreadData()); uint32_t *expectedOffsetPatchPtr = reinterpret_cast(ptrOffset(this->pKernel->getCrossThreadData(), kai.offsetBufferOffset)); *expectedPointerPatchPtr = reinterpret_cast(0U); *expectedOffsetPatchPtr = 0U; ASSERT_GE(this->pKernel->getSurfaceStateHeapSize(), rendSurfSize); RENDER_SURFACE_STATE *surfState = reinterpret_cast(this->pKernel->getSurfaceStateHeap()); memset(surfState, 0, rendSurfSize); TypeParam::setArg(*this->pKernel, 0U, ptrToPatch, sizeToPatch, svmAlloc); // surface state for comparison RENDER_SURFACE_STATE expectedSurfaceState; memset(&expectedSurfaceState, 0, rendSurfSize); if (TypeParam::supportsOffsets()) { // setArgSvm, setArgSvmAlloc EXPECT_EQ(ptrToPatch, *expectedPointerPatchPtr); EXPECT_EQ(patchOffset, *expectedOffsetPatchPtr); } else { // setArgBuffer EXPECT_EQ(svmAlloc.getUnderlyingBuffer(), *expectedPointerPatchPtr); EXPECT_EQ(0U, *expectedOffsetPatchPtr); } Buffer::setSurfaceState(device.get(), &expectedSurfaceState, svmAlloc.getUnderlyingBufferSize(), svmAlloc.getUnderlyingBuffer(), 0, &svmAlloc, 0, 0); // verify ssh was properly patched int32_t cmpResult = memcmp(&expectedSurfaceState, surfState, rendSurfSize); EXPECT_EQ(0, cmpResult); } alignedFree(svmPtr); } TEST_F(KernelArgSvmTest, givenWritableSvmAllocationWhenSettingAsArgThenDoNotExpectAllocationInCacheFlushVector) { size_t svmSize = 4096; void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); MockGraphicsAllocation svmAlloc(svmPtr, svmSize); svmAlloc.setMemObjectsAllocationWithWritableFlags(true); svmAlloc.setFlushL3Required(false); auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]); alignedFree(svmPtr); } TEST_F(KernelArgSvmTest, givenCacheFlushSvmAllocationWhenSettingAsArgThenExpectAllocationInCacheFlushVector) { size_t svmSize = 4096; void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); MockGraphicsAllocation svmAlloc(svmPtr, svmSize); svmAlloc.setMemObjectsAllocationWithWritableFlags(false); svmAlloc.setFlushL3Required(true); auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(&svmAlloc, pKernel->kernelArgRequiresCacheFlush[0]); alignedFree(svmPtr); } TEST_F(KernelArgSvmTest, givenNoCacheFlushSvmAllocationWhenSettingAsArgThenNotExpectAllocationInCacheFlushVector) { size_t svmSize = 4096; void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); MockGraphicsAllocation svmAlloc(svmPtr, svmSize); svmAlloc.setMemObjectsAllocationWithWritableFlags(false); svmAlloc.setFlushL3Required(false); auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]); alignedFree(svmPtr); } TEST_F(KernelArgSvmTest, givenWritableSvmAllocationWhenSettingKernelExecInfoThenDoNotExpectSvmFlushFlagTrue) { size_t svmSize = 4096; void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); MockGraphicsAllocation svmAlloc(svmPtr, svmSize); svmAlloc.setMemObjectsAllocationWithWritableFlags(true); svmAlloc.setFlushL3Required(false); pKernel->setSvmKernelExecInfo(&svmAlloc); EXPECT_FALSE(pKernel->svmAllocationsRequireCacheFlush); alignedFree(svmPtr); } TEST_F(KernelArgSvmTest, givenCacheFlushSvmAllocationWhenSettingKernelExecInfoThenExpectSvmFlushFlagTrue) { size_t svmSize = 4096; void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); MockGraphicsAllocation svmAlloc(svmPtr, svmSize); svmAlloc.setMemObjectsAllocationWithWritableFlags(false); svmAlloc.setFlushL3Required(true); pKernel->setSvmKernelExecInfo(&svmAlloc); EXPECT_TRUE(pKernel->svmAllocationsRequireCacheFlush); alignedFree(svmPtr); } TEST_F(KernelArgSvmTest, givenNoCacheFlushReadOnlySvmAllocationWhenSettingKernelExecInfoThenExpectSvmFlushFlagFalse) { size_t svmSize = 4096; void *svmPtr = alignedMalloc(svmSize, MemoryConstants::pageSize); MockGraphicsAllocation svmAlloc(svmPtr, svmSize); svmAlloc.setMemObjectsAllocationWithWritableFlags(false); svmAlloc.setFlushL3Required(false); pKernel->setSvmKernelExecInfo(&svmAlloc); EXPECT_FALSE(pKernel->svmAllocationsRequireCacheFlush); alignedFree(svmPtr); } TEST_F(KernelArgSvmTest, givenCpuAddressIsNullWhenGpuAddressIsValidThenExpectSvmArgUseGpuAddress) { char svmPtr[256]; pKernelInfo->kernelArgInfo[0].offsetBufferOffset = 0u; MockGraphicsAllocation svmAlloc(nullptr, reinterpret_cast(svmPtr), 256); auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc); EXPECT_EQ(CL_SUCCESS, retVal); auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(svmPtr, *pKernelArg); } TEST_F(KernelArgSvmTest, givenCpuAddressIsNullWhenGpuAddressIsValidPatchBufferOffsetWithGpuAddress) { std::vector svmPtr; svmPtr.resize(256); pKernel->setCrossThreadData(nullptr, sizeof(uint32_t)); constexpr uint32_t initVal = 7U; MockGraphicsAllocation svmAlloc(nullptr, reinterpret_cast(svmPtr.data()), 256); uint32_t *expectedPatchPtr = reinterpret_cast(pKernel->getCrossThreadData()); KernelArgInfo kai; void *returnedPtr = nullptr; kai.offsetBufferOffset = 0U; *expectedPatchPtr = initVal; returnedPtr = pKernel->patchBufferOffset(kai, svmPtr.data(), &svmAlloc); EXPECT_EQ(svmPtr.data(), returnedPtr); EXPECT_EQ(0U, *expectedPatchPtr); } compute-runtime-20.13.16352/opencl/test/unit_test/kernel/kernel_cache_flush_requirements_tests.cpp000066400000000000000000000021331363734646600334460ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" using namespace NEO; TEST(KernelWithCacheFlushTests, givenDeviceWhichDoesntRequireCacheFlushWhenCheckIfKernelRequireFlushThenReturnedFalse) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); auto mockKernel = std::make_unique(*device); MockContext mockContext(device.get()); MockCommandQueue queue; bool flushRequired = mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(queue); EXPECT_FALSE(flushRequired); } compute-runtime-20.13.16352/opencl/test/unit_test/kernel/kernel_image_arg_tests.cpp000066400000000000000000000343011363734646600303140ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/fixtures/kernel_arg_fixture.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "opencl/test/unit_test/mocks/mock_image.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" #include "gtest/gtest.h" using namespace NEO; TEST_F(KernelImageArgTest, GIVENkernelWithImageArgsWHENcheckDifferentScenariosTHENproperBehaviour) { size_t imageWidth = image->getImageDesc().image_width; size_t imageHeight = image->getImageDesc().image_height; size_t imageDepth = image->getImageDesc().image_depth; uint32_t objectId = pKernelInfo->kernelArgInfo[4].offsetHeap; cl_mem memObj = image.get(); pKernel->setArg(0, sizeof(memObj), &memObj); pKernel->setArg(1, sizeof(memObj), &memObj); pKernel->setArg(3, sizeof(memObj), &memObj); pKernel->setArg(4, sizeof(memObj), &memObj); auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData()); auto imgWidthOffset = ptrOffset(crossThreadData, 0x4); EXPECT_EQ(imageWidth, *imgWidthOffset); auto imgHeightOffset = ptrOffset(crossThreadData, 0xc); EXPECT_EQ(imageHeight, *imgHeightOffset); auto dummyOffset = ptrOffset(crossThreadData, 0x20); EXPECT_EQ(0x12344321u, *dummyOffset); auto imgDepthOffset = ptrOffset(crossThreadData, 0x30); EXPECT_EQ(imageDepth, *imgDepthOffset); EXPECT_EQ(objectId, *crossThreadData); } TEST_F(KernelImageArgTest, givenKernelWithFlatImageTokensWhenArgIsSetThenPatchAllParams) { size_t imageWidth = image->getImageDesc().image_width; size_t imageHeight = image->getImageDesc().image_height; size_t imageRowPitch = image->getImageDesc().image_row_pitch; uint64_t imageBaseAddress = image->getGraphicsAllocation()->getGpuAddress(); cl_mem memObj = image.get(); pKernel->setArg(0, sizeof(memObj), &memObj); auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData()); auto pixelSize = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes; auto offsetFlatBaseOffset = ptrOffset(crossThreadData, pKernel->getKernelInfo().kernelArgInfo[0].offsetFlatBaseOffset); EXPECT_EQ(imageBaseAddress, *reinterpret_cast(offsetFlatBaseOffset)); auto offsetFlatWidth = ptrOffset(crossThreadData, pKernel->getKernelInfo().kernelArgInfo[0].offsetFlatWidth); EXPECT_EQ(static_cast((imageWidth * pixelSize) - 1), *offsetFlatWidth); auto offsetFlatHeight = ptrOffset(crossThreadData, pKernel->getKernelInfo().kernelArgInfo[0].offsetFlatHeight); EXPECT_EQ(static_cast((imageHeight * pixelSize) - 1), *offsetFlatHeight); auto offsetFlatPitch = ptrOffset(crossThreadData, pKernel->getKernelInfo().kernelArgInfo[0].offsetFlatPitch); EXPECT_EQ(imageRowPitch - 1, *offsetFlatPitch); } TEST_F(KernelImageArgTest, givenKernelWithValidOffsetNumMipLevelsWhenImageArgIsSetThenCrossthreadDataIsProperlyPatched) { MockImageBase image; image.imageDesc.num_mip_levels = 7U; cl_mem imageObj = ℑ pKernel->setArg(0, sizeof(imageObj), &imageObj); auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData()); auto patchedNumMipLevels = ptrOffset(crossThreadData, offsetNumMipLevelsImage0); EXPECT_EQ(7U, *patchedNumMipLevels); } TEST_F(KernelImageArgTest, givenImageWithNumSamplesWhenSetArgIsCalledThenPatchNumSamplesInfo) { cl_image_format imgFormat = {CL_RGBA, CL_UNORM_INT8}; cl_image_desc imgDesc = {}; imgDesc.num_samples = 16; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imgDesc.image_width = 5; imgDesc.image_height = 5; auto surfaceFormat = Image::getSurfaceFormatFromTable(0, &imgFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); auto sampleImg = Image::create(context.get(), {}, 0, 0, surfaceFormat, &imgDesc, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); cl_mem memObj = sampleImg; pKernel->setArg(0, sizeof(memObj), &memObj); auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData()); auto patchedNumSamples = ptrOffset(crossThreadData, 0x3c); EXPECT_EQ(16u, *patchedNumSamples); sampleImg->release(); } TEST_F(KernelImageArgTest, givenImageWithWriteOnlyAccessAndReadOnlyArgWhenCheckCorrectImageAccessQualifierIsCalledThenRetValNotValid) { cl_image_format imgFormat = {CL_RGBA, CL_UNORM_INT8}; cl_image_desc imgDesc = {}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; cl_mem_flags flags = CL_MEM_WRITE_ONLY; imgDesc.image_width = 5; imgDesc.image_height = 5; auto surfaceFormat = Image::getSurfaceFormatFromTable(0, &imgFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); std::unique_ptr img(Image::create(context.get(), MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imgDesc, nullptr, retVal)); pKernelInfo->kernelArgInfo[0].metadata.accessQualifier = NEO::KernelArgMetadata::AccessReadOnly; cl_mem memObj = img.get(); retVal = pKernel->checkCorrectImageAccessQualifier(0, sizeof(memObj), &memObj); EXPECT_EQ(retVal, CL_INVALID_ARG_VALUE); retVal = clSetKernelArg( pKernel.get(), 0, sizeof(memObj), &memObj); EXPECT_EQ(retVal, CL_INVALID_ARG_VALUE); retVal = clSetKernelArg( pKernel.get(), 0, sizeof(memObj), &memObj); EXPECT_EQ(retVal, CL_INVALID_ARG_VALUE); retVal = clSetKernelArg( pKernel.get(), 1000, sizeof(memObj), &memObj); EXPECT_EQ(retVal, CL_INVALID_ARG_INDEX); } TEST_F(KernelImageArgTest, givenImageWithReadOnlyAccessAndWriteOnlyArgWhenCheckCorrectImageAccessQualifierIsCalledThenReturnsInvalidArgValue) { cl_image_format imgFormat = {CL_RGBA, CL_UNORM_INT8}; cl_image_desc imgDesc = {}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; cl_mem_flags flags = CL_MEM_READ_ONLY; imgDesc.image_width = 5; imgDesc.image_height = 5; auto surfaceFormat = Image::getSurfaceFormatFromTable(0, &imgFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); std::unique_ptr img(Image::create(context.get(), MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imgDesc, nullptr, retVal)); pKernelInfo->kernelArgInfo[0].metadata.accessQualifier = NEO::KernelArgMetadata::AccessWriteOnly; cl_mem memObj = img.get(); retVal = pKernel->checkCorrectImageAccessQualifier(0, sizeof(memObj), &memObj); EXPECT_EQ(retVal, CL_INVALID_ARG_VALUE); Image *image = NULL; memObj = image; retVal = pKernel->checkCorrectImageAccessQualifier(0, sizeof(memObj), &memObj); EXPECT_EQ(retVal, CL_INVALID_ARG_VALUE); } TEST_F(KernelImageArgTest, givenImageWithReadOnlyAccessAndReadOnlyArgWhenCheckCorrectImageAccessQualifierIsCalledThenRetValNotValid) { cl_image_format imgFormat = {CL_RGBA, CL_UNORM_INT8}; cl_image_desc imgDesc = {}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; cl_mem_flags flags = CL_MEM_READ_ONLY; imgDesc.image_width = 5; imgDesc.image_height = 5; auto surfaceFormat = Image::getSurfaceFormatFromTable(0, &imgFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); std::unique_ptr img(Image::create(context.get(), MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imgDesc, nullptr, retVal)); pKernelInfo->kernelArgInfo[0].metadata.accessQualifier = NEO::KernelArgMetadata::AccessReadOnly; cl_mem memObj = img.get(); retVal = pKernel->checkCorrectImageAccessQualifier(0, sizeof(memObj), &memObj); EXPECT_EQ(retVal, CL_SUCCESS); } TEST_F(KernelImageArgTest, givenImageWithWriteOnlyAccessAndWriteOnlyArgWhenCheckCorrectImageAccessQualifierIsCalledThenRetValNotValid) { cl_image_format imgFormat = {CL_RGBA, CL_UNORM_INT8}; cl_image_desc imgDesc = {}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; cl_mem_flags flags = CL_MEM_WRITE_ONLY; imgDesc.image_width = 5; imgDesc.image_height = 5; auto surfaceFormat = Image::getSurfaceFormatFromTable(0, &imgFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); std::unique_ptr img(Image::create(context.get(), MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imgDesc, nullptr, retVal)); pKernelInfo->kernelArgInfo[0].metadata.accessQualifier = NEO::KernelArgMetadata::AccessWriteOnly; cl_mem memObj = img.get(); retVal = pKernel->checkCorrectImageAccessQualifier(0, sizeof(memObj), &memObj); EXPECT_EQ(retVal, CL_SUCCESS); } HWTEST_F(KernelImageArgTest, givenImgWithMcsAllocWhenMakeResidentThenMakeMcsAllocationResident) { int32_t execStamp = 0; cl_image_format imgFormat = {CL_RGBA, CL_UNORM_INT8}; cl_image_desc imgDesc = {}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imgDesc.image_width = 5; imgDesc.image_height = 5; auto surfaceFormat = Image::getSurfaceFormatFromTable(0, &imgFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); auto img = Image::create(context.get(), {}, 0, 0, surfaceFormat, &imgDesc, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); auto mcsAlloc = context->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); img->setMcsAllocation(mcsAlloc); cl_mem memObj = img; pKernel->setArg(0, sizeof(memObj), &memObj); std::unique_ptr> csr(new MockCsr(execStamp, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex())); csr->setupContext(*pDevice->getDefaultEngine().osContext); pKernel->makeResident(*csr.get()); EXPECT_TRUE(csr->isMadeResident(mcsAlloc)); csr->makeSurfacePackNonResident(csr->getResidencyAllocations()); EXPECT_TRUE(csr->isMadeNonResident(mcsAlloc)); delete img; } TEST_F(KernelImageArgTest, givenKernelWithSettedArgWhenUnSetCalledThenArgIsUnsetAndArgCountIsDecreased) { cl_image_format imgFormat = {CL_RGBA, CL_UNORM_INT8}; cl_image_desc imgDesc = {}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; cl_mem_flags flags = CL_MEM_WRITE_ONLY; imgDesc.image_width = 5; imgDesc.image_height = 5; auto surfaceFormat = Image::getSurfaceFormatFromTable(0, &imgFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); std::unique_ptr img(Image::create(context.get(), MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imgDesc, nullptr, retVal)); cl_mem memObj = img.get(); retVal = pKernel->setArg(0, sizeof(memObj), &memObj); EXPECT_EQ(1u, pKernel->getPatchedArgumentsNum()); EXPECT_TRUE(pKernel->getKernelArguments()[0].isPatched); pKernel->unsetArg(0); EXPECT_EQ(0u, pKernel->getPatchedArgumentsNum()); EXPECT_FALSE(pKernel->getKernelArguments()[0].isPatched); } TEST_F(KernelImageArgTest, givenNullKernelWhenClSetKernelArgCalledThenInvalidKernelCodeReturned) { cl_mem memObj = NULL; retVal = clSetKernelArg( NULL, 1000, sizeof(memObj), &memObj); EXPECT_EQ(retVal, CL_INVALID_KERNEL); } class MockSharingHandler : public SharingHandler { public: void synchronizeObject(UpdateData &updateData) override { updateData.synchronizationStatus = ACQUIRE_SUCCESFUL; } }; TEST_F(KernelImageArgTest, givenKernelWithSharedImageWhenSetArgCalledThenUsingSharedObjArgsShouldBeTrue) { cl_image_format imgFormat = {CL_RGBA, CL_UNORM_INT8}; cl_image_desc imgDesc = {}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; cl_mem_flags flags = CL_MEM_WRITE_ONLY; imgDesc.image_width = 5; imgDesc.image_height = 5; auto surfaceFormat = Image::getSurfaceFormatFromTable(0, &imgFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); std::unique_ptr img(Image::create(context.get(), MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imgDesc, nullptr, retVal)); cl_mem memObj = img.get(); MockSharingHandler *mockSharingHandler = new MockSharingHandler; img->setSharingHandler(mockSharingHandler); retVal = pKernel->setArg(0, sizeof(memObj), &memObj); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, pKernel->getPatchedArgumentsNum()); EXPECT_TRUE(pKernel->getKernelArguments()[0].isPatched); EXPECT_TRUE(pKernel->isUsingSharedObjArgs()); } TEST_F(KernelImageArgTest, givenWritableImageWhenSettingAsArgThenDoNotExpectAllocationInCacheFlushVector) { MockImageBase image; image.graphicsAllocation->setMemObjectsAllocationWithWritableFlags(true); image.graphicsAllocation->setFlushL3Required(false); cl_mem imageObj = ℑ pKernel->setArg(0, sizeof(imageObj), &imageObj); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]); } TEST_F(KernelImageArgTest, givenCacheFlushImageWhenSettingAsArgThenExpectAllocationInCacheFlushVector) { MockImageBase image; image.graphicsAllocation->setMemObjectsAllocationWithWritableFlags(false); image.graphicsAllocation->setFlushL3Required(true); cl_mem imageObj = ℑ pKernel->setArg(0, sizeof(imageObj), &imageObj); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(image.graphicsAllocation, pKernel->kernelArgRequiresCacheFlush[0]); } TEST_F(KernelImageArgTest, givenNoCacheFlushImageWhenSettingAsArgThenExpectAllocationInCacheFlushVector) { MockImageBase image; image.graphicsAllocation->setMemObjectsAllocationWithWritableFlags(false); image.graphicsAllocation->setFlushL3Required(false); cl_mem imageObj = ℑ pKernel->setArg(0, sizeof(imageObj), &imageObj); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]); } compute-runtime-20.13.16352/opencl/test/unit_test/kernel/kernel_immediate_arg_tests.cpp000066400000000000000000000377041363734646600312020ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" #include "CL/cl.h" #include "gtest/gtest.h" using namespace NEO; template class KernelArgImmediateTest : public Test { public: KernelArgImmediateTest() { } protected: void SetUp() override { DeviceFixture::SetUp(); memset(pCrossThreadData, 0xfe, sizeof(pCrossThreadData)); program = std::make_unique(*pDevice->getExecutionEnvironment()); // define kernel info pKernelInfo = std::make_unique(); // setup kernel arg offsets KernelArgPatchInfo kernelArgPatchInfo; pKernelInfo->kernelArgInfo.resize(4); pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[2].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[1].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[2].crossthreadOffset = 0x38; pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[1].crossthreadOffset = 0x28; pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[0].crossthreadOffset = 0x20; pKernelInfo->kernelArgInfo[2].kernelArgPatchInfoVector[0].crossthreadOffset = 0x30; pKernelInfo->kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset = 0x40; pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = 0x50; pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[2].size = sizeof(T); pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[1].size = sizeof(T); pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[0].size = sizeof(T); pKernelInfo->kernelArgInfo[2].kernelArgPatchInfoVector[0].size = sizeof(T); pKernelInfo->kernelArgInfo[1].kernelArgPatchInfoVector[0].size = sizeof(T); pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size = sizeof(T); program = std::make_unique(*pDevice->getExecutionEnvironment()); pKernel = new MockKernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); pKernel->setKernelArgHandler(0, &Kernel::setArgImmediate); pKernel->setKernelArgHandler(1, &Kernel::setArgImmediate); pKernel->setKernelArgHandler(2, &Kernel::setArgImmediate); pKernel->setKernelArgHandler(3, &Kernel::setArgImmediate); } void TearDown() override { delete pKernel; DeviceFixture::TearDown(); } cl_int retVal = CL_SUCCESS; std::unique_ptr program; MockKernel *pKernel = nullptr; std::unique_ptr pKernelInfo; char pCrossThreadData[0x60]; }; typedef ::testing::Types< char, float, int, short, long, unsigned char, unsigned int, unsigned short, unsigned long> KernelArgImmediateTypes; TYPED_TEST_CASE(KernelArgImmediateTest, KernelArgImmediateTypes); TYPED_TEST(KernelArgImmediateTest, SetKernelArg) { auto val = (TypeParam)0xaaaaaaaaULL; auto pVal = &val; this->pKernel->setArg(0, sizeof(TypeParam), pVal); auto pKernelArg = (TypeParam *)(this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(val, *pKernelArg); } TYPED_TEST(KernelArgImmediateTest, SetKernelArgWithInvalidIndex) { auto val = (TypeParam)0U; auto pVal = &val; auto ret = this->pKernel->setArg((uint32_t)-1, sizeof(TypeParam), pVal); EXPECT_EQ(ret, CL_INVALID_ARG_INDEX); } TYPED_TEST(KernelArgImmediateTest, setKernelArgMultipleArguments) { auto val = (TypeParam)0xaaaaaaaaULL; auto pVal = &val; this->pKernel->setArg(0, sizeof(TypeParam), pVal); auto pKernelArg = (TypeParam *)(this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(val, *pKernelArg); val = (TypeParam)0xbbbbbbbbULL; this->pKernel->setArg(1, sizeof(TypeParam), &val); pKernelArg = (TypeParam *)(this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(val, *pKernelArg); val = (TypeParam)0xccccccccULL; this->pKernel->setArg(2, sizeof(TypeParam), &val); pKernelArg = (TypeParam *)(this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[2].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(val, *pKernelArg); } TYPED_TEST(KernelArgImmediateTest, setKernelArgOverwritesCrossThreadData) { TypeParam val = (TypeParam)0xaaaaaaaaULL; TypeParam *pVal = &val; this->pKernel->setArg(0, sizeof(TypeParam), pVal); TypeParam *pKernelArg = (TypeParam *)(this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(val, *pKernelArg); val = (TypeParam)0xbbbbbbbbULL; this->pKernel->setArg(1, sizeof(TypeParam), &val); pKernelArg = (TypeParam *)(this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(val, *pKernelArg); val = (TypeParam)0xccccccccULL; this->pKernel->setArg(0, sizeof(TypeParam), &val); pKernelArg = (TypeParam *)(this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(val, *pKernelArg); } TYPED_TEST(KernelArgImmediateTest, setSingleKernelArgMultipleStructElements) { struct ImmediateStruct { TypeParam a; unsigned char unused[3]; // want to force a gap, ideally unpadded TypeParam b; } immediateStruct; immediateStruct.a = (TypeParam)0xaaaaaaaaULL; immediateStruct.b = (TypeParam)0xbbbbbbbbULL; immediateStruct.unused[0] = 0xfe; immediateStruct.unused[1] = 0xfe; immediateStruct.unused[2] = 0xfe; this->pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[0].sourceOffset = offsetof(struct ImmediateStruct, a); this->pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[1].sourceOffset = offsetof(struct ImmediateStruct, b); this->pKernel->setArg(3, sizeof(immediateStruct), &immediateStruct); auto pCrossthreadA = (TypeParam *)(this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(immediateStruct.a, *pCrossthreadA); auto pCrossthreadB = (TypeParam *)(this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[1].crossthreadOffset); EXPECT_EQ(immediateStruct.b, *pCrossthreadB); } TYPED_TEST(KernelArgImmediateTest, givenTooLargePatchSizeWhenSettingArgThenDontReadMemoryBeyondLimit) { TypeParam memory[2]; std::memset(&memory[0], 0xaa, sizeof(TypeParam)); std::memset(&memory[1], 0xbb, sizeof(TypeParam)); const auto destinationMemoryAddress = this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset; const auto memoryBeyondLimitAddress = destinationMemoryAddress + sizeof(TypeParam); const auto memoryBeyondLimitBefore = *reinterpret_cast(memoryBeyondLimitAddress); this->pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size = sizeof(TypeParam) + 1; auto retVal = this->pKernel->setArg(0, sizeof(TypeParam), &memory[0]); const auto memoryBeyondLimitAfter = *reinterpret_cast(memoryBeyondLimitAddress); EXPECT_EQ(memoryBeyondLimitBefore, memoryBeyondLimitAfter); EXPECT_EQ(memory[0], *reinterpret_cast(destinationMemoryAddress)); EXPECT_EQ(CL_SUCCESS, retVal); } TYPED_TEST(KernelArgImmediateTest, givenNotTooLargePatchSizeWhenSettingArgThenDontReadMemoryBeyondLimit) { TypeParam memory[2]; std::memset(&memory[0], 0xaa, sizeof(TypeParam)); std::memset(&memory[1], 0xbb, sizeof(TypeParam)); const auto destinationMemoryAddress = this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset; const auto memoryBeyondLimitAddress = destinationMemoryAddress + sizeof(TypeParam); const auto memoryBeyondLimitBefore = *reinterpret_cast(memoryBeyondLimitAddress); this->pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size = sizeof(TypeParam); auto retVal = this->pKernel->setArg(0, sizeof(TypeParam), &memory[0]); const auto memoryBeyondLimitAfter = *reinterpret_cast(memoryBeyondLimitAddress); EXPECT_EQ(memoryBeyondLimitBefore, memoryBeyondLimitAfter); EXPECT_EQ(memory[0], *reinterpret_cast(destinationMemoryAddress)); EXPECT_EQ(CL_SUCCESS, retVal); } TYPED_TEST(KernelArgImmediateTest, givenMulitplePatchesAndFirstPatchSizeTooLargeWhenSettingArgThenDontReadMemoryBeyondLimit) { if (sizeof(TypeParam) == 1) return; // multiple patch chars don't make sense TypeParam memory[2]; std::memset(&memory[0], 0xaa, sizeof(TypeParam)); std::memset(&memory[1], 0xbb, sizeof(TypeParam)); const auto destinationMemoryAddress1 = this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[2].crossthreadOffset; const auto destinationMemoryAddress2 = this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[1].crossthreadOffset; const auto memoryBeyondLimitAddress1 = destinationMemoryAddress1 + sizeof(TypeParam); const auto memoryBeyondLimitAddress2 = destinationMemoryAddress2 + sizeof(TypeParam) / 2; const std::vector memoryBeyondLimitBefore1(memoryBeyondLimitAddress1, memoryBeyondLimitAddress1 + sizeof(TypeParam)); const std::vector memoryBeyondLimitBefore2(memoryBeyondLimitAddress2, memoryBeyondLimitAddress2 + sizeof(TypeParam) / 2); this->pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[2].sourceOffset = 0; this->pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[1].sourceOffset = sizeof(TypeParam) / 2; this->pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[2].size = sizeof(TypeParam); this->pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[1].size = sizeof(TypeParam) / 2; auto retVal = this->pKernel->setArg(3, sizeof(TypeParam), &memory[0]); EXPECT_EQ(0, std::memcmp(memoryBeyondLimitBefore1.data(), memoryBeyondLimitAddress1, sizeof(TypeParam))); EXPECT_EQ(0, std::memcmp(memoryBeyondLimitBefore2.data(), memoryBeyondLimitAddress2, sizeof(TypeParam) / 2)); EXPECT_EQ(0, std::memcmp(&memory[0], destinationMemoryAddress1, sizeof(TypeParam))); EXPECT_EQ(0, std::memcmp(&memory[0], destinationMemoryAddress2, sizeof(TypeParam) / 2)); EXPECT_EQ(CL_SUCCESS, retVal); } TYPED_TEST(KernelArgImmediateTest, givenMulitplePatchesAndSecondPatchSizeTooLargeWhenSettingArgThenDontReadMemoryBeyondLimit) { if (sizeof(TypeParam) == 1) return; // multiple patch chars don't make sense TypeParam memory[2]; std::memset(&memory[0], 0xaa, sizeof(TypeParam)); std::memset(&memory[1], 0xbb, sizeof(TypeParam)); const auto destinationMemoryAddress1 = this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[2].crossthreadOffset; const auto destinationMemoryAddress2 = this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[1].crossthreadOffset; const auto memoryBeyondLimitAddress1 = destinationMemoryAddress1 + sizeof(TypeParam) / 2; const auto memoryBeyondLimitAddress2 = destinationMemoryAddress2 + sizeof(TypeParam) / 2; const std::vector memoryBeyondLimitBefore1(memoryBeyondLimitAddress1, memoryBeyondLimitAddress1 + sizeof(TypeParam) / 2); const std::vector memoryBeyondLimitBefore2(memoryBeyondLimitAddress2, memoryBeyondLimitAddress2 + sizeof(TypeParam) / 2); this->pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[0].size = 0; this->pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[2].sourceOffset = 0; this->pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[1].sourceOffset = sizeof(TypeParam) / 2; this->pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[2].size = sizeof(TypeParam) / 2; this->pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[1].size = sizeof(TypeParam); auto retVal = this->pKernel->setArg(3, sizeof(TypeParam), &memory[0]); EXPECT_EQ(0, std::memcmp(memoryBeyondLimitBefore1.data(), memoryBeyondLimitAddress1, sizeof(TypeParam) / 2)); EXPECT_EQ(0, std::memcmp(memoryBeyondLimitBefore2.data(), memoryBeyondLimitAddress2, sizeof(TypeParam) / 2)); EXPECT_EQ(0, std::memcmp(&memory[0], destinationMemoryAddress1, sizeof(TypeParam) / 2)); EXPECT_EQ(0, std::memcmp(&memory[0], destinationMemoryAddress2, sizeof(TypeParam) / 2)); EXPECT_EQ(CL_SUCCESS, retVal); } TYPED_TEST(KernelArgImmediateTest, givenMultiplePatchesAndOneSourceOffsetBeyondArgumentWhenSettingArgThenDontCopyThisPatch) { TypeParam memory[2]; std::memset(&memory[0], 0xaa, sizeof(TypeParam)); std::memset(&memory[1], 0xbb, sizeof(TypeParam)); const auto destinationMemoryAddress1 = this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[1].crossthreadOffset; const auto destinationMemoryAddress2 = this->pKernel->getCrossThreadData() + this->pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[2].crossthreadOffset; const auto memoryBeyondLimitAddress1 = destinationMemoryAddress1 + sizeof(TypeParam); const auto memoryBeyondLimitAddress2 = destinationMemoryAddress2; const std::vector memoryBeyondLimitBefore1(memoryBeyondLimitAddress1, memoryBeyondLimitAddress1 + sizeof(TypeParam)); const std::vector memoryBeyondLimitBefore2(memoryBeyondLimitAddress2, memoryBeyondLimitAddress2 + sizeof(TypeParam)); this->pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[0].size = 0; this->pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[1].sourceOffset = 0; this->pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[1].size = sizeof(TypeParam); this->pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[2].sourceOffset = sizeof(TypeParam); this->pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[2].size = 1; auto retVal = this->pKernel->setArg(3, sizeof(TypeParam), &memory[0]); EXPECT_EQ(0, std::memcmp(memoryBeyondLimitBefore1.data(), memoryBeyondLimitAddress1, memoryBeyondLimitBefore1.size())); EXPECT_EQ(0, std::memcmp(memoryBeyondLimitBefore2.data(), memoryBeyondLimitAddress2, memoryBeyondLimitBefore2.size())); EXPECT_EQ(0, std::memcmp(&memory[0], destinationMemoryAddress1, sizeof(TypeParam))); EXPECT_EQ(CL_SUCCESS, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/kernel/kernel_info_cl_tests.cpp000066400000000000000000000064141363734646600300160ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/kernel/kernel_info_cl.h" #include "gmock/gmock.h" #include "gtest/gtest.h" TEST(AsClConvertersTest, whenConvertingAccessQualifiersThenProperEnumValuesAreReturned) { using namespace NEO::KernelArgMetadata; EXPECT_EQ(static_cast(CL_KERNEL_ARG_ACCESS_NONE), NEO::asClKernelArgAccessQualifier(AccessNone)); EXPECT_EQ(static_cast(CL_KERNEL_ARG_ACCESS_READ_ONLY), NEO::asClKernelArgAccessQualifier(AccessReadOnly)); EXPECT_EQ(static_cast(CL_KERNEL_ARG_ACCESS_WRITE_ONLY), NEO::asClKernelArgAccessQualifier(AccessWriteOnly)); EXPECT_EQ(static_cast(CL_KERNEL_ARG_ACCESS_READ_WRITE), NEO::asClKernelArgAccessQualifier(AccessReadWrite)); EXPECT_EQ(0U, NEO::asClKernelArgAccessQualifier(AccessUnknown)); } TEST(AsClConvertersTest, whenConvertingAddressQualifiersThenProperEnumValuesAreReturned) { using namespace NEO::KernelArgMetadata; EXPECT_EQ(static_cast(CL_KERNEL_ARG_ADDRESS_GLOBAL), NEO::asClKernelArgAddressQualifier(AddrGlobal)); EXPECT_EQ(static_cast(CL_KERNEL_ARG_ADDRESS_LOCAL), NEO::asClKernelArgAddressQualifier(AddrLocal)); EXPECT_EQ(static_cast(CL_KERNEL_ARG_ADDRESS_PRIVATE), NEO::asClKernelArgAddressQualifier(AddrPrivate)); EXPECT_EQ(static_cast(CL_KERNEL_ARG_ADDRESS_CONSTANT), NEO::asClKernelArgAddressQualifier(AddrConstant)); EXPECT_EQ(0U, NEO::asClKernelArgAddressQualifier(AddrUnknown)); } TEST(AsClConvertersTest, whenConvertingTypeQualifiersThenProperBitfieldsAreSet) { using namespace NEO::KernelArgMetadata; TypeQualifiers typeQualifiers = {}; typeQualifiers.constQual = true; EXPECT_EQ(static_cast(CL_KERNEL_ARG_TYPE_CONST), NEO::asClKernelArgTypeQualifier(typeQualifiers)); typeQualifiers = {}; typeQualifiers.volatileQual = true; EXPECT_EQ(static_cast(CL_KERNEL_ARG_TYPE_VOLATILE), NEO::asClKernelArgTypeQualifier(typeQualifiers)); typeQualifiers = {}; typeQualifiers.restrictQual = true; EXPECT_EQ(static_cast(CL_KERNEL_ARG_TYPE_RESTRICT), NEO::asClKernelArgTypeQualifier(typeQualifiers)); typeQualifiers = {}; typeQualifiers.pipeQual = true; EXPECT_EQ(static_cast(CL_KERNEL_ARG_TYPE_PIPE), NEO::asClKernelArgTypeQualifier(typeQualifiers)); typeQualifiers = {}; typeQualifiers.constQual = true; typeQualifiers.volatileQual = true; EXPECT_EQ(static_cast(CL_KERNEL_ARG_TYPE_CONST | CL_KERNEL_ARG_TYPE_VOLATILE), NEO::asClKernelArgTypeQualifier(typeQualifiers)); typeQualifiers = {}; typeQualifiers.constQual = true; typeQualifiers.volatileQual = true; typeQualifiers.pipeQual = true; typeQualifiers.restrictQual = true; EXPECT_EQ(static_cast(CL_KERNEL_ARG_TYPE_CONST | CL_KERNEL_ARG_TYPE_VOLATILE | CL_KERNEL_ARG_TYPE_RESTRICT | CL_KERNEL_ARG_TYPE_PIPE), NEO::asClKernelArgTypeQualifier(typeQualifiers)); } compute-runtime-20.13.16352/opencl/test/unit_test/kernel/kernel_is_patched_tests.cpp000066400000000000000000000076471363734646600305210ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "gtest/gtest.h" using namespace NEO; class PatchedKernelTest : public ::testing::Test { public: void SetUp() override { device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); context.reset(new MockContext(device.get())); program.reset(Program::create("FillBufferBytes", context.get(), *device.get(), true, &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); cl_device_id clDevice = device.get(); program->build(1, &clDevice, nullptr, nullptr, nullptr, false); kernel.reset(Kernel::create(program.get(), *program->getKernelInfo("FillBufferBytes"), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); } void TearDown() override { context.reset(); } std::unique_ptr context; std::unique_ptr device; std::unique_ptr program; std::unique_ptr kernel; cl_int retVal = CL_SUCCESS; }; TEST_F(PatchedKernelTest, givenKernelWithoutPatchedArgsWhenIsPatchedIsCalledThenReturnsFalse) { EXPECT_FALSE(kernel->isPatched()); } TEST_F(PatchedKernelTest, givenKernelWithAllArgsSetWithBufferWhenIsPatchedIsCalledThenReturnsTrue) { auto buffer = clCreateBuffer(context.get(), CL_MEM_READ_ONLY, sizeof(int), nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); auto argsNum = kernel->getKernelArgsNumber(); for (uint32_t i = 0; i < argsNum; i++) { kernel->setArg(i, buffer); } EXPECT_TRUE(kernel->isPatched()); clReleaseMemObject(buffer); } TEST_F(PatchedKernelTest, givenKernelWithoutAllArgsSetWhenIsPatchedIsCalledThenReturnsFalse) { auto buffer = clCreateBuffer(context.get(), CL_MEM_READ_ONLY, sizeof(int), nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); auto argsNum = kernel->getKernelArgsNumber(); for (uint32_t i = 0; i < argsNum; i++) { kernel->setArg(0, buffer); } EXPECT_FALSE(kernel->isPatched()); clReleaseMemObject(buffer); } TEST_F(PatchedKernelTest, givenKernelWithAllArgsSetWithSvmAllocWhenIsPatchedIsCalledThenReturnsTrue) { auto argsNum = kernel->getKernelArgsNumber(); for (uint32_t i = 0; i < argsNum; i++) { kernel->setArgSvmAlloc(0, nullptr, nullptr); } EXPECT_FALSE(kernel->isPatched()); for (uint32_t i = 0; i < argsNum; i++) { kernel->setArgSvmAlloc(i, nullptr, nullptr); } EXPECT_TRUE(kernel->isPatched()); } TEST_F(PatchedKernelTest, givenKernelWithAllArgsSetWithSvmWhenIsPatchedIsCalledThenReturnsTrue) { uint32_t size = sizeof(int); auto argsNum = kernel->getKernelArgsNumber(); for (uint32_t i = 0; i < argsNum; i++) { kernel->setArgSvm(0, size, nullptr, nullptr, 0u); } EXPECT_FALSE(kernel->isPatched()); for (uint32_t i = 0; i < argsNum; i++) { kernel->setArgSvm(i, size, nullptr, nullptr, 0u); } EXPECT_TRUE(kernel->isPatched()); } TEST_F(PatchedKernelTest, givenKernelWithOneArgumentToPatchWhichIsNonzeroIndexedWhenThatArgumentIsSetThenKernelIsPatched) { uint32_t size = sizeof(int); MockKernelWithInternals mockKernel(*device.get(), context.get()); EXPECT_EQ(0u, mockKernel.kernelInfo.argumentsToPatchNum); mockKernel.kernelInfo.storeKernelArgPatchInfo(1, 0, 0, 0, 0); EXPECT_EQ(1u, mockKernel.kernelInfo.argumentsToPatchNum); mockKernel.kernelInfo.storeKernelArgPatchInfo(1, 0, 0, 0, 0); EXPECT_EQ(1u, mockKernel.kernelInfo.argumentsToPatchNum); kernel.reset(mockKernel.mockKernel); kernel->initialize(); EXPECT_FALSE(kernel->Kernel::isPatched()); kernel->setArgSvm(1, size, nullptr, nullptr, 0u); EXPECT_TRUE(kernel->Kernel::isPatched()); kernel.release(); } compute-runtime-20.13.16352/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp000066400000000000000000003142651363734646600322550ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/execution_model/device_enqueue.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/program/printf_handler.h" #include "opencl/source/sampler/sampler.h" #include "opencl/test/unit_test/fixtures/execution_model_fixture.h" #include "opencl/test/unit_test/fixtures/execution_model_kernel_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/gen_common/matchers.h" #include "opencl/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/mocks/mock_sampler.h" #include "patch_list.h" #include #include using namespace NEO; typedef ExecutionModelKernelFixture KernelReflectionSurfaceTest; typedef ExecutionModelKernelTest KernelReflectionSurfaceWithQueueTest; TEST_P(KernelReflectionSurfaceTest, CreatedKernelHasNullKernelReflectionSurface) { if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { EXPECT_EQ(nullptr, pKernel->getKernelReflectionSurface()); } } TEST_P(KernelReflectionSurfaceTest, GivenEmptyKernelInfoWhenPassedToGetCurbeParamsThenEmptyVectorIsReturned) { KernelInfo info; SPatchImageMemoryObjectKernelArgument imageMemObjKernelArg; imageMemObjKernelArg.ArgumentNumber = 0; imageMemObjKernelArg.Offset = 32; imageMemObjKernelArg.Size = 4; imageMemObjKernelArg.Type = iOpenCL::IMAGE_MEMORY_OBJECT_2D; info.storeKernelArgument(&imageMemObjKernelArg); SPatchSamplerKernelArgument samplerMemObjKernelArg; samplerMemObjKernelArg.ArgumentNumber = 1; samplerMemObjKernelArg.Offset = 32; samplerMemObjKernelArg.Size = 4; samplerMemObjKernelArg.Type = iOpenCL::SAMPLER_OBJECT_TEXTURE; info.storeKernelArgument(&samplerMemObjKernelArg); SPatchDataParameterBuffer bufferMemObjKernelArg; bufferMemObjKernelArg.ArgumentNumber = 2; bufferMemObjKernelArg.Offset = 32; bufferMemObjKernelArg.Size = 4; info.storeKernelArgument(&bufferMemObjKernelArg); std::vector curbeParamsForBlock; uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, info, pPlatform->getClDevice(0)->getHardwareInfo()); // 3 params with Binding Table index of type 1024 EXPECT_EQ(3u, curbeParamsForBlock.size()); for (const auto &curbeParam : curbeParamsForBlock) { EXPECT_EQ(1024u, curbeParam.m_parameterType); } EXPECT_EQ(0u, firstSSHTokenIndex); } TEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithCorrectlyFilledImageArgumentWhenPassedToGetCurbeParamsThenImageCurbeParamsAreReturned) { KernelInfo info; SPatchImageMemoryObjectKernelArgument imageMemObjKernelArg; imageMemObjKernelArg.ArgumentNumber = 0; imageMemObjKernelArg.Offset = 32; imageMemObjKernelArg.Size = 4; imageMemObjKernelArg.Type = iOpenCL::IMAGE_MEMORY_OBJECT_2D; const uint32_t offsetDataType = 4; const uint32_t offsetChannelOrder = 8; const uint32_t offsetHeap = 12; const uint32_t offsetDepth = 16; const uint32_t offsetWidth = 20; const uint32_t offsetHeight = 24; const uint32_t offsetObjectID = 28; const uint32_t offsetArraySize = 32; info.storeKernelArgument(&imageMemObjKernelArg); info.kernelArgInfo[0].metadataExtended = std::make_unique(); info.kernelArgInfo[0].metadata.accessQualifier = NEO::KernelArgMetadata::AccessReadOnly; info.kernelArgInfo[0].metadataExtended->accessQualifier = "read_only"; info.kernelArgInfo[0].isImage = true; info.kernelArgInfo[0].metadataExtended->argName = "img"; info.kernelArgInfo[0].offsetChannelDataType = offsetDataType; info.kernelArgInfo[0].offsetChannelOrder = offsetChannelOrder; info.kernelArgInfo[0].offsetHeap = offsetHeap; info.kernelArgInfo[0].offsetImgDepth = offsetDepth; info.kernelArgInfo[0].offsetImgWidth = offsetWidth; info.kernelArgInfo[0].offsetImgHeight = offsetHeight; info.kernelArgInfo[0].offsetObjectId = offsetObjectID; info.kernelArgInfo[0].offsetArraySize = offsetArraySize; info.gpuPointerSize = 8; std::vector curbeParams; uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParams, tokenMask, firstSSHTokenIndex, info, pPlatform->getClDevice(0)->getHardwareInfo()); std::vector supportedImageParamTypes = {iOpenCL::DATA_PARAMETER_IMAGE_WIDTH, iOpenCL::DATA_PARAMETER_IMAGE_HEIGHT, iOpenCL::DATA_PARAMETER_IMAGE_DEPTH, iOpenCL::DATA_PARAMETER_IMAGE_CHANNEL_DATA_TYPE, iOpenCL::DATA_PARAMETER_IMAGE_CHANNEL_ORDER, iOpenCL::DATA_PARAMETER_IMAGE_ARRAY_SIZE, iOpenCL::DATA_PARAMETER_OBJECT_ID, 1024}; // type for Binding Table Index std::sort(supportedImageParamTypes.begin(), supportedImageParamTypes.end()); size_t ParamCount = supportedImageParamTypes.size(); EXPECT_EQ(ParamCount, curbeParams.size()); for (size_t i = 0; i < std::min(ParamCount, curbeParams.size()); i++) { if (i < ParamCount - 1) { EXPECT_EQ(supportedImageParamTypes[i] + 50, curbeParams[i].m_parameterType); EXPECT_EQ(sizeof(uint32_t), curbeParams[i].m_parameterSize); } else { EXPECT_EQ(1024u, curbeParams[i].m_parameterType); EXPECT_EQ(8u, curbeParams[i].m_parameterSize); } switch (curbeParams[i].m_parameterType - 50) { case iOpenCL::DATA_PARAMETER_IMAGE_WIDTH: EXPECT_EQ(offsetWidth, curbeParams[i].m_patchOffset); break; case iOpenCL::DATA_PARAMETER_IMAGE_HEIGHT: EXPECT_EQ(offsetHeight, curbeParams[i].m_patchOffset); break; case iOpenCL::DATA_PARAMETER_IMAGE_DEPTH: EXPECT_EQ(offsetDepth, curbeParams[i].m_patchOffset); break; case iOpenCL::DATA_PARAMETER_IMAGE_CHANNEL_DATA_TYPE: EXPECT_EQ(offsetDataType, curbeParams[i].m_patchOffset); break; case iOpenCL::DATA_PARAMETER_IMAGE_CHANNEL_ORDER: EXPECT_EQ(offsetChannelOrder, curbeParams[i].m_patchOffset); break; case iOpenCL::DATA_PARAMETER_IMAGE_ARRAY_SIZE: EXPECT_EQ(offsetArraySize, curbeParams[i].m_patchOffset); break; case iOpenCL::DATA_PARAMETER_OBJECT_ID: EXPECT_EQ(offsetObjectID, curbeParams[i].m_patchOffset); break; } } EXPECT_EQ(curbeParams.size() - 1, firstSSHTokenIndex); } HWTEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithSetBindingTableStateAndImageArgumentWhenPassedToGetCurbeParamsThenProperCurbeParamIsReturned) { typedef typename FamilyType::BINDING_TABLE_STATE BINDING_TABLE_STATE; KernelInfo info; uint32_t imageOffset = 32; uint32_t btIndex = 3; info.gpuPointerSize = 8; SPatchImageMemoryObjectKernelArgument imageMemObjKernelArg; imageMemObjKernelArg.ArgumentNumber = 0; imageMemObjKernelArg.Offset = imageOffset; imageMemObjKernelArg.Size = 4; imageMemObjKernelArg.Type = iOpenCL::IMAGE_MEMORY_OBJECT_2D; info.storeKernelArgument(&imageMemObjKernelArg); SPatchBindingTableState bindingTableStateInfo; bindingTableStateInfo.Offset = 0; bindingTableStateInfo.Count = 4; info.patchInfo.bindingTableState = &bindingTableStateInfo; BINDING_TABLE_STATE bindingTableState[4]; memset(&bindingTableState, 0, 4 * sizeof(BINDING_TABLE_STATE)); bindingTableState[btIndex].getRawData(0) = imageOffset; info.heapInfo.pSsh = reinterpret_cast(bindingTableState); std::vector curbeParams; uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParams, tokenMask, firstSSHTokenIndex, info, pPlatform->getClDevice(0)->getHardwareInfo()); EXPECT_NE(0u, curbeParams.size()); bool foundProperParam = false; for (const auto &curbeParam : curbeParams) { if (curbeParam.m_parameterType == 1024u) { EXPECT_EQ(btIndex, curbeParam.m_patchOffset); EXPECT_EQ(8u, curbeParam.m_parameterSize); EXPECT_EQ(0u, curbeParam.m_sourceOffset); foundProperParam = true; break; } } EXPECT_TRUE(foundProperParam); } HWTEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithBindingTableStateAndImageArgumentWhenCountIsZeroThenGetCurbeParamsReturnsMaxBTIndex) { typedef typename FamilyType::BINDING_TABLE_STATE BINDING_TABLE_STATE; KernelInfo info; uint32_t imageOffset = 32; uint32_t btIndex = 0; uint32_t maxBTIndex = 253; info.gpuPointerSize = 8; SPatchImageMemoryObjectKernelArgument imageMemObjKernelArg; imageMemObjKernelArg.ArgumentNumber = 0; imageMemObjKernelArg.Offset = imageOffset; imageMemObjKernelArg.Size = 4; imageMemObjKernelArg.Type = iOpenCL::IMAGE_MEMORY_OBJECT_2D; info.storeKernelArgument(&imageMemObjKernelArg); SPatchBindingTableState bindingTableStateInfo; bindingTableStateInfo.Offset = 0; bindingTableStateInfo.Count = 0; info.patchInfo.bindingTableState = &bindingTableStateInfo; BINDING_TABLE_STATE bindingTableState[1]; memset(&bindingTableState, 0, 1 * sizeof(BINDING_TABLE_STATE)); bindingTableState[btIndex].getRawData(0) = imageOffset; info.heapInfo.pSsh = reinterpret_cast(bindingTableState); std::vector curbeParams; uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParams, tokenMask, firstSSHTokenIndex, info, pPlatform->getClDevice(0)->getHardwareInfo()); EXPECT_EQ(1u, curbeParams.size()); bool foundProperParam = false; for (const auto &curbeParam : curbeParams) { if (curbeParam.m_parameterType == 1024u) { EXPECT_EQ(maxBTIndex, curbeParam.m_patchOffset); EXPECT_EQ(8u, curbeParam.m_parameterSize); EXPECT_EQ(0u, curbeParam.m_sourceOffset); foundProperParam = true; break; } } EXPECT_TRUE(foundProperParam); } TEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithCorrectlyFilledSamplerArgumentWhenPassedToGetCurbeParamsThenSamplerCurbeParamsAreReturned) { KernelInfo info; SPatchSamplerKernelArgument samplerMemObjKernelArg; samplerMemObjKernelArg.ArgumentNumber = 1; samplerMemObjKernelArg.Offset = 32; samplerMemObjKernelArg.Size = 4; samplerMemObjKernelArg.Type = iOpenCL::SAMPLER_OBJECT_TEXTURE; const uint32_t offsetSamplerAddressingMode = 4; const uint32_t offsetSamplerNormalizedCoords = 8; const uint32_t offsetSamplerSnapWa = 12; const uint32_t offsetObjectID = 28; info.storeKernelArgument(&samplerMemObjKernelArg); info.kernelArgInfo[0].metadataExtended = std::make_unique(); info.kernelArgInfo[0].isSampler = true; info.kernelArgInfo[0].metadataExtended->argName = "smp"; info.kernelArgInfo[0].offsetSamplerAddressingMode = offsetSamplerAddressingMode; info.kernelArgInfo[0].offsetSamplerNormalizedCoords = offsetSamplerNormalizedCoords; info.kernelArgInfo[0].offsetSamplerSnapWa = offsetSamplerSnapWa; info.kernelArgInfo[0].offsetObjectId = offsetObjectID; std::vector curbeParams; uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParams, tokenMask, firstSSHTokenIndex, info, pPlatform->getClDevice(0)->getHardwareInfo()); std::vector supportedSamplerParamTypes = {iOpenCL::DATA_PARAMETER_SAMPLER_ADDRESS_MODE, iOpenCL::DATA_PARAMETER_SAMPLER_NORMALIZED_COORDS, iOpenCL::DATA_PARAMETER_SAMPLER_COORDINATE_SNAP_WA_REQUIRED, iOpenCL::DATA_PARAMETER_OBJECT_ID}; std::sort(supportedSamplerParamTypes.begin(), supportedSamplerParamTypes.end()); size_t ParamCount = supportedSamplerParamTypes.size(); EXPECT_EQ(ParamCount + 2, curbeParams.size()); // + 2 for 2 arguments' Binding Table Index params stored for (size_t i = 0; i < std::min(ParamCount, curbeParams.size()); i++) { EXPECT_EQ(supportedSamplerParamTypes[i] + 100, curbeParams[i].m_parameterType); EXPECT_EQ(sizeof(uint32_t), curbeParams[i].m_parameterSize); switch (curbeParams[i].m_parameterType - 100) { case iOpenCL::DATA_PARAMETER_SAMPLER_ADDRESS_MODE: EXPECT_EQ(offsetSamplerAddressingMode, curbeParams[i].m_patchOffset); break; case iOpenCL::DATA_PARAMETER_SAMPLER_NORMALIZED_COORDS: EXPECT_EQ(offsetSamplerNormalizedCoords, curbeParams[i].m_patchOffset); break; case iOpenCL::DATA_PARAMETER_SAMPLER_COORDINATE_SNAP_WA_REQUIRED: EXPECT_EQ(offsetSamplerSnapWa, curbeParams[i].m_patchOffset); break; case iOpenCL::DATA_PARAMETER_OBJECT_ID: EXPECT_EQ(offsetObjectID, curbeParams[i].m_patchOffset); break; } } } TEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithBufferAndDataParameterBuffersTokensWhenPassedToGetCurbeParamsThenCorrectCurbeParamsWithProperSizesAreReturned) { KernelInfo info; SPatchDataParameterBuffer dataParameterBuffer; dataParameterBuffer.ArgumentNumber = 0; dataParameterBuffer.DataSize = 8; dataParameterBuffer.Offset = 40; dataParameterBuffer.SourceOffset = 0; dataParameterBuffer.Type = iOpenCL::DATA_PARAMETER_KERNEL_ARGUMENT; info.patchInfo.dataParameterBuffersKernelArgs.push_back(&dataParameterBuffer); info.storeKernelArgument(&dataParameterBuffer); std::vector curbeParams; uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParams, tokenMask, firstSSHTokenIndex, info, pPlatform->getClDevice(0)->getHardwareInfo()); EXPECT_LT(1u, curbeParams.size()); bool kernelArgumentTokenFound = false; bool kernelArgumentSSHParamFound = false; for (const auto &curbeParam : curbeParams) { if (iOpenCL::DATA_PARAMETER_KERNEL_ARGUMENT == curbeParam.m_parameterType) { kernelArgumentTokenFound = true; EXPECT_EQ(0u, curbeParam.m_sourceOffset); EXPECT_EQ(8u, curbeParam.m_parameterSize); EXPECT_EQ(40u, curbeParam.m_patchOffset); } // kernel arg SSH param if (1024 == curbeParam.m_parameterType) { kernelArgumentSSHParamFound = true; EXPECT_EQ(0u, curbeParam.m_sourceOffset); EXPECT_EQ(0u, curbeParam.m_parameterSize); EXPECT_EQ(0u, curbeParam.m_patchOffset); } } EXPECT_TRUE(kernelArgumentTokenFound); EXPECT_TRUE((tokenMask & ((uint64_t)1 << iOpenCL::DATA_PARAMETER_KERNEL_ARGUMENT)) > 0); EXPECT_TRUE(kernelArgumentSSHParamFound); } TEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithBufferAndNoDataParameterBuffersTokenWhenPassedToGetCurbeParamsThenCurbeParamForDataKernelArgumentTokenIsNotReturned) { KernelInfo info; SPatchDataParameterBuffer dataParameterBuffer; dataParameterBuffer.ArgumentNumber = 0; dataParameterBuffer.DataSize = 8; dataParameterBuffer.Offset = 40; dataParameterBuffer.SourceOffset = 0; dataParameterBuffer.Type = iOpenCL::DATA_PARAMETER_KERNEL_ARGUMENT; info.storeKernelArgument(&dataParameterBuffer); std::vector curbeParams; uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParams, tokenMask, firstSSHTokenIndex, info, pPlatform->getClDevice(0)->getHardwareInfo()); bool kernelArgumentTokenFound = false; for (const auto &curbeParam : curbeParams) { if (iOpenCL::DATA_PARAMETER_KERNEL_ARGUMENT == curbeParam.m_parameterType) { kernelArgumentTokenFound = true; } } EXPECT_FALSE(kernelArgumentTokenFound); EXPECT_TRUE((tokenMask & ((uint64_t)1 << iOpenCL::DATA_PARAMETER_KERNEL_ARGUMENT)) == 0); } TEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithLocalMemoryParameterWhenPassedToGetCurbeParamsThenCurbeParamForLocalMemoryArgIsReturned) { KernelInfo info; SPatchDataParameterBuffer dataParameterBuffer; const uint32_t crossThreadOffset = 10; const uint32_t dataSize = 8; const uint32_t slmAlignment = 80; dataParameterBuffer.ArgumentNumber = 0; dataParameterBuffer.DataSize = dataSize; dataParameterBuffer.Offset = crossThreadOffset; dataParameterBuffer.SourceOffset = 0; dataParameterBuffer.Type = iOpenCL::DATA_PARAMETER_KERNEL_ARGUMENT; info.storeKernelArgument(&dataParameterBuffer); KernelArgPatchInfo kernelArgPatchInfo; kernelArgPatchInfo.crossthreadOffset = crossThreadOffset; kernelArgPatchInfo.size = dataSize; kernelArgPatchInfo.sourceOffset = 76; info.kernelArgInfo[0].slmAlignment = slmAlignment; info.kernelArgInfo[0].kernelArgPatchInfoVector[0] = kernelArgPatchInfo; std::vector curbeParams; uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParams, tokenMask, firstSSHTokenIndex, info, pPlatform->getClDevice(0)->getHardwareInfo()); bool localMemoryTokenFound = false; for (const auto &curbeParam : curbeParams) { if (iOpenCL::DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES == curbeParam.m_parameterType) { localMemoryTokenFound = true; EXPECT_EQ(slmAlignment, curbeParam.m_sourceOffset); EXPECT_EQ(0u, curbeParam.m_parameterSize); EXPECT_EQ(crossThreadOffset, curbeParam.m_patchOffset); } } EXPECT_TRUE(localMemoryTokenFound); EXPECT_TRUE((tokenMask & ((uint64_t)1 << iOpenCL::DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES)) > 0); } TEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithoutLocalMemoryParameterWhenPassedToGetCurbeParamsThenCurbeParamForLocalMemoryArgIsNotReturned) { KernelInfo info; SPatchDataParameterBuffer dataParameterBuffer; const uint32_t crossThreadOffset = 10; const uint32_t dataSize = 8; const uint32_t slmAlignment = 0; dataParameterBuffer.ArgumentNumber = 0; dataParameterBuffer.DataSize = dataSize; dataParameterBuffer.Offset = crossThreadOffset; dataParameterBuffer.SourceOffset = 0; dataParameterBuffer.Type = iOpenCL::DATA_PARAMETER_KERNEL_ARGUMENT; info.storeKernelArgument(&dataParameterBuffer); KernelArgPatchInfo kernelArgPatchInfo; kernelArgPatchInfo.crossthreadOffset = crossThreadOffset; kernelArgPatchInfo.size = dataSize; kernelArgPatchInfo.sourceOffset = 76; info.kernelArgInfo[0].slmAlignment = slmAlignment; info.kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); std::vector curbeParams; uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParams, tokenMask, firstSSHTokenIndex, info, pPlatform->getClDevice(0)->getHardwareInfo()); bool localMemoryTokenFound = false; for (const auto &curbeParam : curbeParams) { if (iOpenCL::DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES == curbeParam.m_parameterType) { localMemoryTokenFound = true; } } EXPECT_FALSE(localMemoryTokenFound); EXPECT_TRUE((tokenMask & ((uint64_t)1 << iOpenCL::DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES)) == 0); } TEST_P(KernelReflectionSurfaceTest, getCurbeParamsReturnsSortedVector) { if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { EXPECT_TRUE(pKernel->isParentKernel); BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); size_t blockCount = blockManager->getCount(); EXPECT_NE(0u, blockCount); std::vector curbeParamsForBlock; for (size_t i = 0; i < blockCount; i++) { const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo()); if (pBlockInfo->name.find("simple_block_kernel") == std::string::npos) { EXPECT_LT(1u, curbeParamsForBlock.size()); } for (size_t i = 1; i < curbeParamsForBlock.size(); i++) { EXPECT_LE(curbeParamsForBlock[i - 1].m_parameterType, curbeParamsForBlock[i].m_parameterType); if (curbeParamsForBlock[i - 1].m_parameterType == curbeParamsForBlock[i].m_parameterType) { if (curbeParamsForBlock[i - 1].m_parameterType == iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_LOCAL_WORK_SIZE) { EXPECT_LE(curbeParamsForBlock[i - 1].m_patchOffset, curbeParamsForBlock[i].m_patchOffset); } else { EXPECT_LE(curbeParamsForBlock[i - 1].m_sourceOffset, curbeParamsForBlock[i].m_sourceOffset); } } } EXPECT_EQ(curbeParamsForBlock.size() - pBlockInfo->kernelArgInfo.size(), firstSSHTokenIndex); curbeParamsForBlock.resize(0); } } } TEST_P(KernelReflectionSurfaceTest, getCurbeParamsReturnsVectorWithExpectedParamTypes) { if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { EXPECT_TRUE(pKernel->isParentKernel); BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); size_t blockCount = blockManager->getCount(); EXPECT_NE(0u, blockCount); std::vector curbeParamsForBlock; for (size_t i = 0; i < blockCount; i++) { const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo()); const uint32_t bufferType = 49; const uint32_t imageType = iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_OBJECT_ID + 50; const uint32_t samplerType = iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_OBJECT_ID + 100; bool bufferFound = false; bool imageFound = false; bool samplerFound = false; if (pBlockInfo->name.find("kernel_reflection_dispatch_0") != std::string::npos) { EXPECT_LT(1u, curbeParamsForBlock.size()); for (const auto &curbeParams : curbeParamsForBlock) { switch (curbeParams.m_parameterType) { case bufferType: bufferFound = true; break; case imageType: imageFound = true; break; case samplerType: samplerFound = true; break; } } EXPECT_TRUE(bufferFound); EXPECT_TRUE(imageFound); EXPECT_TRUE(samplerFound); } EXPECT_EQ(curbeParamsForBlock.size() - pBlockInfo->kernelArgInfo.size(), firstSSHTokenIndex); curbeParamsForBlock.resize(0); } } } TEST_P(KernelReflectionSurfaceTest, getCurbeParamsReturnsTokenMask) { if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { EXPECT_TRUE(pKernel->isParentKernel); BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); size_t blockCount = blockManager->getCount(); EXPECT_NE(0u, blockCount); std::vector curbeParamsForBlock; for (size_t i = 0; i < blockCount; i++) { const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo()); if (pBlockInfo->name.find("kernel_reflection_dispatch_0") != std::string::npos) { EXPECT_LT(1u, curbeParamsForBlock.size()); const uint64_t bufferToken = (uint64_t)1 << 63; const uint64_t imageToken = (uint64_t)1 << 50; const uint64_t samplerToken = (uint64_t)1 << 51; uint64_t expectedTokens = bufferToken | imageToken | samplerToken; EXPECT_NE(0u, tokenMask & expectedTokens); } curbeParamsForBlock.resize(0); } } } TEST(KernelReflectionSurfaceTestSingle, CreateKernelReflectionSurfaceCalledOnNonParentKernelDoesNotCreateReflectionSurface) { MockClDevice device{new MockDevice}; MockProgram program(*device.getExecutionEnvironment()); KernelInfo info; MockKernel kernel(&program, info, device); EXPECT_FALSE(kernel.isParentKernel); kernel.createReflectionSurface(); auto reflectionSurface = kernel.getKernelReflectionSurface(); EXPECT_EQ(nullptr, reflectionSurface); } TEST(KernelReflectionSurfaceTestSingle, CreateKernelReflectionSurfaceCalledOnNonSchedulerKernelWithForcedSchedulerDispatchDoesNotCreateKRS) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceDispatchScheduler.set(true); MockClDevice device{new MockDevice}; MockProgram program(*device.getExecutionEnvironment()); KernelInfo info; MockKernel kernel(&program, info, device); EXPECT_FALSE(kernel.isParentKernel); kernel.createReflectionSurface(); auto reflectionSurface = kernel.getKernelReflectionSurface(); EXPECT_EQ(nullptr, reflectionSurface); } TEST(KernelReflectionSurfaceTestSingle, ObtainKernelReflectionSurfaceWithoutKernelArgs) { MockContext context; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockProgram program(*device->getExecutionEnvironment()); KernelInfo *blockInfo = new KernelInfo; KernelInfo &info = *blockInfo; cl_queue_properties properties[1] = {0}; DeviceQueue devQueue(&context, device.get(), properties[0]); SPatchExecutionEnvironment environment = {}; environment.HasDeviceEnqueue = 1; info.patchInfo.executionEnvironment = &environment; SKernelBinaryHeaderCommon kernelHeader; info.heapInfo.pKernelHeader = &kernelHeader; SPatchDataParameterStream dataParameterStream; dataParameterStream.Size = 0; dataParameterStream.DataParameterStreamSize = 0; info.patchInfo.dataParameterStream = &dataParameterStream; SPatchBindingTableState bindingTableState; bindingTableState.Count = 0; bindingTableState.Offset = 0; bindingTableState.Size = 0; bindingTableState.SurfaceStateOffset = 0; info.patchInfo.bindingTableState = &bindingTableState; MockKernel kernel(&program, info, *device.get()); EXPECT_TRUE(kernel.isParentKernel); program.blockKernelManager->addBlockKernelInfo(blockInfo); kernel.createReflectionSurface(); auto reflectionSurface = kernel.getKernelReflectionSurface(); EXPECT_NE(nullptr, reflectionSurface); kernel.patchReflectionSurface(&devQueue, nullptr); uint64_t undefinedOffset = MockKernel::ReflectionSurfaceHelperPublic::undefinedOffset; EXPECT_EQ(undefinedOffset, MockKernel::ReflectionSurfaceHelperPublic::defaultQueue.offset); EXPECT_EQ(undefinedOffset, MockKernel::ReflectionSurfaceHelperPublic::devQueue.offset); EXPECT_EQ(undefinedOffset, MockKernel::ReflectionSurfaceHelperPublic::eventPool.offset); EXPECT_EQ(0u, MockKernel::ReflectionSurfaceHelperPublic::defaultQueue.size); EXPECT_EQ(0u, MockKernel::ReflectionSurfaceHelperPublic::devQueue.size); EXPECT_EQ(0u, MockKernel::ReflectionSurfaceHelperPublic::eventPool.size); } TEST(KernelReflectionSurfaceTestSingle, ObtainKernelReflectionSurfaceWithDeviceQueueKernelArg) { MockContext context; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockProgram program(*device->getExecutionEnvironment()); KernelInfo *blockInfo = new KernelInfo; KernelInfo &info = *blockInfo; cl_queue_properties properties[1] = {0}; DeviceQueue devQueue(&context, device.get(), properties[0]); uint32_t devQueueCurbeOffset = 16; uint32_t devQueueCurbeSize = 4; SPatchExecutionEnvironment environment = {}; environment.HasDeviceEnqueue = 1; info.patchInfo.executionEnvironment = &environment; SKernelBinaryHeaderCommon kernelHeader; info.heapInfo.pKernelHeader = &kernelHeader; SPatchDataParameterStream dataParameterStream; dataParameterStream.Size = 0; dataParameterStream.DataParameterStreamSize = 0; info.patchInfo.dataParameterStream = &dataParameterStream; SPatchBindingTableState bindingTableState; bindingTableState.Count = 0; bindingTableState.Offset = 0; bindingTableState.Size = 0; bindingTableState.SurfaceStateOffset = 0; info.patchInfo.bindingTableState = &bindingTableState; KernelArgInfo argInfo; argInfo.isDeviceQueue = true; info.kernelArgInfo.resize(1); info.kernelArgInfo[0] = std::move(argInfo); info.kernelArgInfo[0].kernelArgPatchInfoVector.resize(1); info.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = devQueueCurbeOffset; info.kernelArgInfo[0].kernelArgPatchInfoVector[0].size = devQueueCurbeSize; MockKernel kernel(&program, info, *device.get()); EXPECT_TRUE(kernel.isParentKernel); program.blockKernelManager->addBlockKernelInfo(blockInfo); kernel.createReflectionSurface(); auto reflectionSurface = kernel.getKernelReflectionSurface(); EXPECT_NE(nullptr, reflectionSurface); kernel.patchReflectionSurface(&devQueue, nullptr); uint64_t undefinedOffset = MockKernel::ReflectionSurfaceHelperPublic::undefinedOffset; EXPECT_EQ(undefinedOffset, MockKernel::ReflectionSurfaceHelperPublic::defaultQueue.offset); EXPECT_EQ(devQueueCurbeOffset, MockKernel::ReflectionSurfaceHelperPublic::devQueue.offset); EXPECT_EQ(undefinedOffset, MockKernel::ReflectionSurfaceHelperPublic::eventPool.offset); EXPECT_EQ(0u, MockKernel::ReflectionSurfaceHelperPublic::defaultQueue.size); EXPECT_EQ(4u, MockKernel::ReflectionSurfaceHelperPublic::devQueue.size); EXPECT_EQ(0u, MockKernel::ReflectionSurfaceHelperPublic::eventPool.size); } TEST_P(KernelReflectionSurfaceTest, CreateKernelReflectionSurface) { if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { EXPECT_TRUE(pKernel->isParentKernel); BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); size_t blockCount = blockManager->getCount(); EXPECT_EQ(3u, blockCount); size_t maxConstantBufferSize = 0; size_t parentImageCount = 0; size_t parentSamplerCount = 0; if (pKernel->getKernelInfo().name == "kernel_reflection") { parentImageCount = 1; parentSamplerCount = 1; } size_t samplerStateArrayAndBorderColorTotalSize = 0; size_t totalCurbeParamsSize = 0; std::vector blockCurbeParamCounts(blockCount); std::vector samplerStateAndBorderColorSizes(blockCount); std::vector curbeParamsForBlock; for (size_t i = 0; i < blockCount; i++) { const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParamsForBlock, tokenMask, firstSSHTokenIndex, *pBlockInfo, pDevice->getHardwareInfo()); blockCurbeParamCounts[i] = curbeParamsForBlock.size(); maxConstantBufferSize = std::max(maxConstantBufferSize, static_cast(pBlockInfo->patchInfo.dataParameterStream->DataParameterStreamSize)); totalCurbeParamsSize += blockCurbeParamCounts[i]; size_t samplerStateAndBorderColorSize = pBlockInfo->getSamplerStateArraySize(pDevice->getHardwareInfo()); samplerStateAndBorderColorSize = alignUp(samplerStateAndBorderColorSize, Sampler::samplerStateArrayAlignment); samplerStateAndBorderColorSize += pBlockInfo->getBorderColorStateSize(); samplerStateAndBorderColorSizes[i] = samplerStateAndBorderColorSize; samplerStateArrayAndBorderColorTotalSize += alignUp(samplerStateAndBorderColorSizes[i], sizeof(void *)); curbeParamsForBlock.clear(); } totalCurbeParamsSize *= sizeof(IGIL_KernelCurbeParams); size_t expectedReflectionSurfaceSize = alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) * blockCount, sizeof(void *)); expectedReflectionSurfaceSize += alignUp(sizeof(IGIL_KernelData), sizeof(void *)) * blockCount; expectedReflectionSurfaceSize += (parentSamplerCount * sizeof(IGIL_SamplerParams) + maxConstantBufferSize) * blockCount + totalCurbeParamsSize + parentImageCount * sizeof(IGIL_ImageParamters) + parentSamplerCount * sizeof(IGIL_ParentSamplerParams) + samplerStateArrayAndBorderColorTotalSize; pKernel->createReflectionSurface(); auto reflectionSurface = pKernel->getKernelReflectionSurface(); ASSERT_NE(nullptr, reflectionSurface); EXPECT_EQ(expectedReflectionSurfaceSize, reflectionSurface->getUnderlyingBufferSize()); IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface->getUnderlyingBuffer()); uint32_t parentImages = 0; uint32_t parentSamplers = 0; if (pKernel->getKernelInfo().name == "kernel_reflection") { parentImages = 1; parentSamplers = 1; EXPECT_LT(sizeof(IGIL_KernelDataHeader), pKernelHeader->m_ParentSamplerParamsOffset); } EXPECT_EQ(blockCount, pKernelHeader->m_numberOfKernels); EXPECT_EQ(parentImages, pKernelHeader->m_ParentKernelImageCount); EXPECT_LT(sizeof(IGIL_KernelDataHeader), pKernelHeader->m_ParentImageDataOffset); EXPECT_EQ(parentSamplers, pKernelHeader->m_ParentSamplerCount); EXPECT_NE(pKernelHeader->m_ParentImageDataOffset, pKernelHeader->m_ParentSamplerParamsOffset); // Curbe tokens EXPECT_NE(0u, totalCurbeParamsSize); for (uint32_t i = 0; i < pKernelHeader->m_numberOfKernels; i++) { IGIL_KernelAddressData *addressData = pKernelHeader->m_data; EXPECT_NE(0u, addressData->m_KernelDataOffset); EXPECT_NE(0u, addressData->m_BTSize); EXPECT_NE(0u, addressData->m_SSHTokensOffset); EXPECT_NE(0u, addressData->m_ConstantBufferOffset); EXPECT_NE(0u, addressData->m_BTSoffset); IGIL_KernelData *kernelData = reinterpret_cast(ptrOffset(pKernelHeader, (size_t)(addressData->m_KernelDataOffset))); EXPECT_NE_VAL(0u, kernelData->m_SIMDSize); EXPECT_NE_VAL(0u, kernelData->m_PatchTokensMask); EXPECT_NE_VAL(0u, kernelData->m_numberOfCurbeParams); EXPECT_NE_VAL(0u, kernelData->m_numberOfCurbeTokens); EXPECT_NE_VAL(0u, kernelData->m_sizeOfConstantBuffer); for (uint32_t j = 0; j < kernelData->m_numberOfCurbeParams; j++) { EXPECT_NE_VAL(0u, kernelData->m_data[j].m_parameterType); } } } } TEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithArgsWhenPassedToGetCurbeParamsThenProperFirstSSHTokenIndexIsReturned) { KernelInfo info; info.kernelArgInfo.resize(9); std::vector curbeParams; uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParams, tokenMask, firstSSHTokenIndex, info, pDevice->getHardwareInfo()); EXPECT_EQ(curbeParams.size() - 9, firstSSHTokenIndex); } TEST_P(KernelReflectionSurfaceTest, GivenKernelInfoWithExecutionParametersWhenPassedToGetCurbeParamsThenProperCurbeParamsAreReturned) { KernelInfo info; std::vector supportedExecutionParamTypes = {iOpenCL::DATA_PARAMETER_LOCAL_WORK_SIZE, iOpenCL::DATA_PARAMETER_GLOBAL_WORK_SIZE, iOpenCL::DATA_PARAMETER_NUM_WORK_GROUPS, iOpenCL::DATA_PARAMETER_WORK_DIMENSIONS, iOpenCL::DATA_PARAMETER_GLOBAL_WORK_OFFSET, iOpenCL::DATA_PARAMETER_NUM_HARDWARE_THREADS, iOpenCL::DATA_PARAMETER_PARENT_EVENT, iOpenCL::DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE}; std::sort(supportedExecutionParamTypes.begin(), supportedExecutionParamTypes.end()); const uint32_t workDimOffset = 80; const uint32_t numHwThreads = 84; const uint32_t parentEventOffset = 88; const uint32_t lwsOffsets[3] = {4, 8, 12}; const uint32_t lwsOffsets2[3] = {16, 20, 24}; const uint32_t gwsOffsets[3] = {28, 32, 36}; const uint32_t numOffsets[3] = {40, 44, 48}; const uint32_t globalOffsetOffsets[3] = {52, 56, 60}; const uint32_t enqueuedLocalWorkSizeOffsets[3] = {64, 68, 72}; info.workloadInfo.localWorkSizeOffsets[0] = lwsOffsets[0]; info.workloadInfo.localWorkSizeOffsets[1] = lwsOffsets[1]; info.workloadInfo.localWorkSizeOffsets[2] = lwsOffsets[2]; info.workloadInfo.localWorkSizeOffsets2[0] = lwsOffsets2[0]; info.workloadInfo.localWorkSizeOffsets2[1] = lwsOffsets2[1]; info.workloadInfo.localWorkSizeOffsets2[2] = lwsOffsets2[2]; info.workloadInfo.globalWorkSizeOffsets[0] = gwsOffsets[0]; info.workloadInfo.globalWorkSizeOffsets[1] = gwsOffsets[1]; info.workloadInfo.globalWorkSizeOffsets[2] = gwsOffsets[2]; info.workloadInfo.numWorkGroupsOffset[0] = numOffsets[0]; info.workloadInfo.numWorkGroupsOffset[1] = numOffsets[1]; info.workloadInfo.numWorkGroupsOffset[2] = numOffsets[2]; info.workloadInfo.globalWorkOffsetOffsets[0] = globalOffsetOffsets[0]; info.workloadInfo.globalWorkOffsetOffsets[1] = globalOffsetOffsets[1]; info.workloadInfo.globalWorkOffsetOffsets[2] = globalOffsetOffsets[2]; info.workloadInfo.enqueuedLocalWorkSizeOffsets[0] = enqueuedLocalWorkSizeOffsets[0]; info.workloadInfo.enqueuedLocalWorkSizeOffsets[1] = enqueuedLocalWorkSizeOffsets[1]; info.workloadInfo.enqueuedLocalWorkSizeOffsets[2] = enqueuedLocalWorkSizeOffsets[2]; info.workloadInfo.workDimOffset = workDimOffset; // NUM_HARDWARE_THREADS unsupported EXPECT_TRUE(numHwThreads > 0u); info.workloadInfo.parentEventOffset = parentEventOffset; std::vector curbeParams; uint64_t tokenMask = 0; uint32_t firstSSHTokenIndex = 0; MockKernel::ReflectionSurfaceHelperPublic::getCurbeParams(curbeParams, tokenMask, firstSSHTokenIndex, info, pPlatform->getClDevice(0)->getHardwareInfo()); EXPECT_LE(supportedExecutionParamTypes.size(), curbeParams.size()); for (auto paramType : supportedExecutionParamTypes) { auto foundParams = 0u; auto j = 0; for (const auto &curbeParam : curbeParams) { if (paramType == curbeParam.m_parameterType) { foundParams++; uint32_t index = curbeParam.m_sourceOffset / sizeof(uint32_t); switch (paramType) { case iOpenCL::DATA_PARAMETER_LOCAL_WORK_SIZE: if (j < 3) { EXPECT_EQ(lwsOffsets[index], curbeParam.m_patchOffset); } else { EXPECT_EQ(lwsOffsets2[index], curbeParam.m_patchOffset); } break; case iOpenCL::DATA_PARAMETER_GLOBAL_WORK_SIZE: EXPECT_EQ(gwsOffsets[index], curbeParam.m_patchOffset); break; case iOpenCL::DATA_PARAMETER_NUM_WORK_GROUPS: EXPECT_EQ(numOffsets[index], curbeParam.m_patchOffset); break; case iOpenCL::DATA_PARAMETER_GLOBAL_WORK_OFFSET: EXPECT_EQ(globalOffsetOffsets[index], curbeParam.m_patchOffset); break; case iOpenCL::DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE: EXPECT_EQ(enqueuedLocalWorkSizeOffsets[index], curbeParam.m_patchOffset); break; } } j++; } switch (paramType) { case iOpenCL::DATA_PARAMETER_LOCAL_WORK_SIZE: EXPECT_EQ(6u, foundParams) << "Parameter token: " << paramType; break; case iOpenCL::DATA_PARAMETER_GLOBAL_WORK_SIZE: case iOpenCL::DATA_PARAMETER_NUM_WORK_GROUPS: case iOpenCL::DATA_PARAMETER_GLOBAL_WORK_OFFSET: case iOpenCL::DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE: EXPECT_EQ(3u, foundParams) << "Parameter token: " << paramType; break; } } for (auto paramType : supportedExecutionParamTypes) { auto foundParams = 0u; for (const auto &curbeParam : curbeParams) { if (paramType == curbeParam.m_parameterType) { switch (paramType) { case iOpenCL::DATA_PARAMETER_PARENT_EVENT: EXPECT_EQ(parentEventOffset, curbeParam.m_patchOffset); foundParams++; break; case iOpenCL::DATA_PARAMETER_WORK_DIMENSIONS: EXPECT_EQ(workDimOffset, curbeParam.m_patchOffset); foundParams++; break; } } } switch (paramType) { case iOpenCL::DATA_PARAMETER_PARENT_EVENT: case iOpenCL::DATA_PARAMETER_WORK_DIMENSIONS: EXPECT_EQ(1u, foundParams); break; } } for (auto paramType : supportedExecutionParamTypes) { if (paramType != iOpenCL::DATA_PARAMETER_NUM_HARDWARE_THREADS) { auto expectedTokens = (uint64_t)1 << paramType; EXPECT_TRUE((tokenMask & expectedTokens) > 0) << "Parameter Token: " << paramType; } } } static const char *binaryFile = "simple_block_kernel"; static const char *KernelNames[] = {"kernel_reflection", "simple_block_kernel"}; INSTANTIATE_TEST_CASE_P(KernelReflectionSurfaceTest, KernelReflectionSurfaceTest, ::testing::Combine( ::testing::Values(binaryFile), ::testing::ValuesIn(KernelNames))); HWCMDTEST_P(IGFX_GEN8_CORE, KernelReflectionSurfaceWithQueueTest, ObtainKernelReflectionSurfacePatchesBlocksCurbe) { if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); size_t blockCount = blockManager->getCount(); EXPECT_NE(0u, blockCount); std::vector curbeParamsForBlock; pKernel->createReflectionSurface(); pKernel->patchReflectionSurface(pDevQueue, nullptr); auto *reflectionSurface = pKernel->getKernelReflectionSurface(); ASSERT_NE(nullptr, reflectionSurface); void *reflectionSurfaceMemory = reflectionSurface->getUnderlyingBuffer(); IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface->getUnderlyingBuffer()); EXPECT_EQ(blockCount, pKernelHeader->m_numberOfKernels); for (uint32_t i = 0; i < pKernelHeader->m_numberOfKernels; i++) { const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); IGIL_KernelAddressData *addressData = pKernelHeader->m_data; EXPECT_NE(0u, addressData[i].m_ConstantBufferOffset); void *pCurbe = ptrOffset(reflectionSurfaceMemory, (size_t)(addressData[i].m_ConstantBufferOffset)); if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface) { auto *patchedPointer = ptrOffset(pCurbe, pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamOffset); if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint32_t)) { uint32_t *patchedValue = static_cast(patchedPointer); uint64_t patchedValue64 = *patchedValue; EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddress(), patchedValue64); } else if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint64_t)) { uint64_t *patchedValue = static_cast(patchedPointer); EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddress(), *patchedValue); } } if (pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface) { auto *patchedPointer = ptrOffset(pCurbe, pBlockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamOffset); if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint32_t)) { uint32_t *patchedValue = static_cast(patchedPointer); uint64_t patchedValue64 = *patchedValue; EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), patchedValue64); } else if (pBlockInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize == sizeof(uint64_t)) { uint64_t *patchedValue = static_cast(patchedPointer); EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), *patchedValue); } } for (const auto &arg : pBlockInfo->kernelArgInfo) { if (arg.isDeviceQueue) { auto *patchedPointer = ptrOffset(pCurbe, arg.kernelArgPatchInfoVector[0].crossthreadOffset); if (arg.kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) { uint32_t *patchedValue = static_cast(patchedPointer); uint64_t patchedValue64 = *patchedValue; EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), patchedValue64); } else if (arg.kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) { uint64_t *patchedValue = static_cast(patchedPointer); EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), *patchedValue); } } } } } } HWCMDTEST_P(IGFX_GEN8_CORE, KernelReflectionSurfaceWithQueueTest, ObtainKernelReflectionSurfaceSetsParentImageAndSamplersParams) { if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); size_t blockCount = blockManager->getCount(); EXPECT_NE(0u, blockCount); std::vector curbeParamsForBlock; std::unique_ptr image3d(ImageHelper::create(context)); std::unique_ptr sampler(new MockSampler(context, true, (cl_addressing_mode)CL_ADDRESS_CLAMP_TO_EDGE, (cl_filter_mode)CL_FILTER_LINEAR)); cl_sampler samplerCl = sampler.get(); cl_mem imageCl = image3d.get(); if (pKernel->getKernelInfo().name == "kernel_reflection") { pKernel->setArgSampler(0, sizeof(cl_sampler), &samplerCl); pKernel->setArgImage(1, sizeof(cl_mem), &imageCl); } pKernel->createReflectionSurface(); auto *reflectionSurface = pKernel->getKernelReflectionSurface(); ASSERT_NE(nullptr, reflectionSurface); IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface->getUnderlyingBuffer()); IGIL_ImageParamters *pParentImageParams = reinterpret_cast(ptrOffset(pKernelHeader, (size_t)pKernelHeader->m_ParentImageDataOffset)); IGIL_ParentSamplerParams *pParentSamplerParams = reinterpret_cast(ptrOffset(pKernelHeader, (size_t)pKernelHeader->m_ParentSamplerParamsOffset)); memset(pParentImageParams, 0xff, sizeof(IGIL_ImageParamters) * pKernelHeader->m_ParentKernelImageCount); memset(pParentSamplerParams, 0xff, sizeof(IGIL_ParentSamplerParams) * pKernelHeader->m_ParentSamplerCount); pKernel->patchReflectionSurface(pDevQueue, nullptr); EXPECT_EQ(blockCount, pKernelHeader->m_numberOfKernels); for (uint32_t i = 0; i < pKernelHeader->m_numberOfKernels; i++) { if (pKernelHeader->m_ParentKernelImageCount > 0) { uint32_t imageIndex = 0; for (const auto &arg : pKernel->getKernelInfo().kernelArgInfo) { if (arg.isImage) { EXPECT_EQ(arg.offsetHeap, pParentImageParams[imageIndex].m_ObjectID); imageIndex++; } } } if (pKernelHeader->m_ParentSamplerCount > 0) { uint32_t samplerIndex = 0; for (const auto &arg : pKernel->getKernelInfo().kernelArgInfo) { if (arg.isSampler) { EXPECT_EQ(OCLRT_ARG_OFFSET_TO_SAMPLER_OBJECT_ID(arg.offsetHeap), pParentSamplerParams[samplerIndex].m_ObjectID); samplerIndex++; } } } } } } INSTANTIATE_TEST_CASE_P(KernelReflectionSurfaceWithQueueTest, KernelReflectionSurfaceWithQueueTest, ::testing::Combine( ::testing::Values(binaryFile), ::testing::ValuesIn(KernelNames))); class ReflectionSurfaceHelperTest : public testing::TestWithParam> { protected: ReflectionSurfaceHelperTest() { } void SetUp() override { } void TearDown() override { } }; TEST_P(ReflectionSurfaceHelperTest, ReflectionSurfaceHelperCompareFunction) { IGIL_KernelCurbeParams curbeParamFirst, curbeParamSecond; bool result; std::tie(curbeParamFirst, curbeParamSecond, result) = GetParam(); EXPECT_EQ(result, MockKernel::ReflectionSurfaceHelperPublic::compareFunction(curbeParamFirst, curbeParamSecond)); } // arg first, arg second, expected result static std::tuple Inputs[] = {std::make_tuple(IGIL_KernelCurbeParams{1, 0, 0, 0}, IGIL_KernelCurbeParams{1, 0, 0, 100}, true), std::make_tuple(IGIL_KernelCurbeParams{1, 0, 0, 100}, IGIL_KernelCurbeParams{1, 0, 0, 0}, false), std::make_tuple(IGIL_KernelCurbeParams{1, 0, 0, 100}, IGIL_KernelCurbeParams{100, 0, 0, 0}, true), std::make_tuple(IGIL_KernelCurbeParams{iOpenCL::DATA_PARAMETER_LOCAL_WORK_SIZE, 0, 4, 100}, IGIL_KernelCurbeParams{iOpenCL::DATA_PARAMETER_LOCAL_WORK_SIZE, 0, 8, 0}, true), std::make_tuple(IGIL_KernelCurbeParams{iOpenCL::DATA_PARAMETER_LOCAL_WORK_SIZE, 0, 4, 0}, IGIL_KernelCurbeParams{iOpenCL::DATA_PARAMETER_LOCAL_WORK_SIZE, 0, 8, 100}, true), std::make_tuple(IGIL_KernelCurbeParams{iOpenCL::DATA_PARAMETER_LOCAL_WORK_SIZE, 0, 8, 100}, IGIL_KernelCurbeParams{iOpenCL::DATA_PARAMETER_LOCAL_WORK_SIZE, 0, 4, 0}, false), std::make_tuple(IGIL_KernelCurbeParams{iOpenCL::DATA_PARAMETER_LOCAL_WORK_SIZE, 0, 8, 0}, IGIL_KernelCurbeParams{iOpenCL::DATA_PARAMETER_LOCAL_WORK_SIZE, 0, 4, 100}, false)}; INSTANTIATE_TEST_CASE_P(ReflectionSurfaceHelperTest, ReflectionSurfaceHelperTest, ::testing::ValuesIn(Inputs)); struct LocalIDPresent { bool x; bool y; bool z; bool flattend; }; class ReflectionSurfaceHelperFixture : public PlatformFixture, public ::testing::Test { protected: ReflectionSurfaceHelperFixture() { } void SetUp() override { PlatformFixture::SetUp(); } void TearDown() override { PlatformFixture::TearDown(); } }; class ReflectionSurfaceHelperSetKernelDataTest : public testing::TestWithParam>, // LocalIDPresent, private surface size, public PlatformFixture { protected: ReflectionSurfaceHelperSetKernelDataTest() { } void SetUp() override { PlatformFixture::SetUp(); samplerStateArray.BorderColorOffset = 0x3; samplerStateArray.Count = 1; samplerStateArray.Offset = 5; samplerStateArray.Size = 16; samplerStateArray.Token = 1; info.patchInfo.samplerStateArray = &samplerStateArray; dataParameterStream.DataParameterStreamSize = 60; dataParameterStream.Size = 20; dataParameterStream.Token = 3; info.patchInfo.dataParameterStream = &dataParameterStream; executionEnvironment = {}; executionEnvironment.LargestCompiledSIMDSize = 16; executionEnvironment.HasBarriers = 1; info.patchInfo.executionEnvironment = &executionEnvironment; info.patchInfo.threadPayload = &threadPayload; info.patchInfo.pAllocateStatelessPrivateSurface = &privateSurface; info.reqdWorkGroupSize[0] = 4; info.reqdWorkGroupSize[1] = 8; info.reqdWorkGroupSize[2] = 2; info.workloadInfo.slmStaticSize = 1652; IGIL_KernelCurbeParams testParams[3] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}}; curbeParams.push_back(testParams[0]); curbeParams.push_back(testParams[1]); curbeParams.push_back(testParams[2]); } void TearDown() override { PlatformFixture::TearDown(); } KernelInfo info; SPatchSamplerStateArray samplerStateArray; SPatchDataParameterStream dataParameterStream; SPatchExecutionEnvironment executionEnvironment; SPatchThreadPayload threadPayload; SPatchAllocateStatelessPrivateSurface privateSurface; std::vector curbeParams; }; // arg first, arg second, expected result static std::tuple InputsSetKernelData[] = {std::make_tuple(LocalIDPresent{1, 0, 0, 0}, 0), std::make_tuple(LocalIDPresent{0, 1, 0, 0}, 0), std::make_tuple(LocalIDPresent{0, 0, 1, 0}, 32), std::make_tuple(LocalIDPresent{0, 0, 0, 1}, 0), std::make_tuple(LocalIDPresent{0, 0, 0, 0}, 32)}; INSTANTIATE_TEST_CASE_P(ReflectionSurfaceHelperSetKernelDataTest, ReflectionSurfaceHelperSetKernelDataTest, ::testing::ValuesIn(InputsSetKernelData)); TEST_P(ReflectionSurfaceHelperSetKernelDataTest, setKernelData) { LocalIDPresent localIDPresent; uint32_t privateSurfaceSize; std::tie(localIDPresent, privateSurfaceSize) = GetParam(); threadPayload.LocalIDFlattenedPresent = localIDPresent.flattend; threadPayload.LocalIDXPresent = localIDPresent.x; threadPayload.LocalIDYPresent = localIDPresent.y; threadPayload.LocalIDZPresent = localIDPresent.z; privateSurface.PerThreadPrivateMemorySize = privateSurfaceSize; std::unique_ptr kernelDataMemory(new char[4096]); uint64_t tokenMask = 1 | 2 | 4; size_t maxConstantBufferSize = 32; size_t samplerCount = 1; size_t samplerHeapSize = alignUp(info.getSamplerStateArraySize(pPlatform->getClDevice(0)->getHardwareInfo()), Sampler::samplerStateArrayAlignment) + info.getBorderColorStateSize(); uint32_t offsetInKernelDataMemory = 12; uint32_t offset = MockKernel::ReflectionSurfaceHelperPublic::setKernelData(kernelDataMemory.get(), offsetInKernelDataMemory, curbeParams, tokenMask, maxConstantBufferSize, samplerCount, info, pPlatform->getClDevice(0)->getHardwareInfo()); IGIL_KernelData *kernelData = reinterpret_cast(kernelDataMemory.get() + offsetInKernelDataMemory); EXPECT_EQ(3u, kernelData->m_numberOfCurbeParams); EXPECT_EQ(3u, kernelData->m_numberOfCurbeTokens); EXPECT_EQ(samplerStateArray.Count, kernelData->m_numberOfSamplerStates); EXPECT_EQ(alignUp(samplerStateArray.Size, Sampler::samplerStateArrayAlignment) + samplerStateArray.Offset - samplerStateArray.BorderColorOffset, kernelData->m_SizeOfSamplerHeap); EXPECT_EQ(samplerStateArray.BorderColorOffset, kernelData->m_SamplerBorderColorStateOffsetOnDSH); EXPECT_EQ(samplerStateArray.Offset, kernelData->m_SamplerStateArrayOffsetOnDSH); EXPECT_EQ(dataParameterStream.DataParameterStreamSize, kernelData->m_sizeOfConstantBuffer); EXPECT_EQ(tokenMask, kernelData->m_PatchTokensMask); EXPECT_EQ(0u, kernelData->m_ScratchSpacePatchValue); EXPECT_EQ(executionEnvironment.LargestCompiledSIMDSize, kernelData->m_SIMDSize); EXPECT_EQ(executionEnvironment.HasBarriers, kernelData->m_HasBarriers); EXPECT_EQ(info.reqdWorkGroupSize[0], kernelData->m_RequiredWkgSizes[0]); EXPECT_EQ(info.reqdWorkGroupSize[1], kernelData->m_RequiredWkgSizes[1]); EXPECT_EQ(info.reqdWorkGroupSize[2], kernelData->m_RequiredWkgSizes[2]); EXPECT_EQ(info.workloadInfo.slmStaticSize, kernelData->m_InilineSLMSize); if (localIDPresent.flattend || localIDPresent.x || localIDPresent.y || localIDPresent.z) EXPECT_EQ(1u, kernelData->m_NeedLocalIDS); else EXPECT_EQ(0u, kernelData->m_NeedLocalIDS); EXPECT_EQ(0u, kernelData->m_DisablePreemption); if (privateSurfaceSize == 0) EXPECT_EQ(1u, kernelData->m_CanRunConcurently); else EXPECT_EQ(0u, kernelData->m_CanRunConcurently); size_t expectedOffset = offsetInKernelDataMemory; expectedOffset += alignUp(sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams) * curbeParams.size(), sizeof(void *)); expectedOffset += maxConstantBufferSize + alignUp(samplerHeapSize, sizeof(void *)) + samplerCount * sizeof(IGIL_SamplerParams); EXPECT_EQ(expectedOffset, offset); } TEST_F(ReflectionSurfaceHelperSetKernelDataTest, nullExecutionEnvironment) { info.patchInfo.executionEnvironment = nullptr; std::unique_ptr kernelDataMemory(new char[4096]); std::vector curbeParams; uint64_t tokenMask = 1 | 2 | 4; size_t maxConstantBufferSize = 32; size_t samplerCount = 1; size_t samplerHeapSize = alignUp(info.getSamplerStateArraySize(pPlatform->getClDevice(0)->getHardwareInfo()), Sampler::samplerStateArrayAlignment) + info.getBorderColorStateSize(); uint32_t offsetInKernelDataMemory = 0; uint32_t offset = MockKernel::ReflectionSurfaceHelperPublic::setKernelData(kernelDataMemory.get(), offsetInKernelDataMemory, curbeParams, tokenMask, maxConstantBufferSize, samplerCount, info, pPlatform->getClDevice(0)->getHardwareInfo()); IGIL_KernelData *kernelData = reinterpret_cast(kernelDataMemory.get() + offsetInKernelDataMemory); EXPECT_EQ(0u, kernelData->m_SIMDSize); EXPECT_EQ(0u, kernelData->m_HasBarriers); size_t expectedOffset = offsetInKernelDataMemory; expectedOffset += alignUp(sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams) * curbeParams.size(), sizeof(void *)); expectedOffset += maxConstantBufferSize + alignUp(samplerHeapSize, sizeof(void *)) + samplerCount * sizeof(IGIL_SamplerParams); EXPECT_EQ(expectedOffset, offset); } TEST_F(ReflectionSurfaceHelperSetKernelDataTest, nullThreadPayload) { info.patchInfo.threadPayload = nullptr; std::unique_ptr kernelDataMemory(new char[4096]); std::vector curbeParams; uint64_t tokenMask = 1 | 2 | 4; size_t maxConstantBufferSize = 32; size_t samplerCount = 1; size_t samplerHeapSize = alignUp(info.getSamplerStateArraySize(pPlatform->getClDevice(0)->getHardwareInfo()), Sampler::samplerStateArrayAlignment) + info.getBorderColorStateSize(); uint32_t offsetInKernelDataMemory = 0; uint32_t offset = MockKernel::ReflectionSurfaceHelperPublic::setKernelData(kernelDataMemory.get(), offsetInKernelDataMemory, curbeParams, tokenMask, maxConstantBufferSize, samplerCount, info, pPlatform->getClDevice(0)->getHardwareInfo()); IGIL_KernelData *kernelData = reinterpret_cast(kernelDataMemory.get() + offsetInKernelDataMemory); EXPECT_EQ(0u, kernelData->m_NeedLocalIDS); size_t expectedOffset = offsetInKernelDataMemory; expectedOffset += alignUp(sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams) * curbeParams.size(), sizeof(void *)); expectedOffset += maxConstantBufferSize + alignUp(samplerHeapSize, sizeof(void *)) + samplerCount * sizeof(IGIL_SamplerParams); EXPECT_EQ(expectedOffset, offset); } TEST_F(ReflectionSurfaceHelperSetKernelDataTest, nullPrivateSurface) { info.patchInfo.pAllocateStatelessPrivateSurface = nullptr; std::unique_ptr kernelDataMemory(new char[4096]); std::vector curbeParams; uint64_t tokenMask = 1 | 2 | 4; size_t maxConstantBufferSize = 32; size_t samplerCount = 1; size_t samplerHeapSize = alignUp(info.getSamplerStateArraySize(pPlatform->getClDevice(0)->getHardwareInfo()), Sampler::samplerStateArrayAlignment) + info.getBorderColorStateSize(); uint32_t offsetInKernelDataMemory = 0; uint32_t offset = MockKernel::ReflectionSurfaceHelperPublic::setKernelData(kernelDataMemory.get(), offsetInKernelDataMemory, curbeParams, tokenMask, maxConstantBufferSize, samplerCount, info, pPlatform->getClDevice(0)->getHardwareInfo()); IGIL_KernelData *kernelData = reinterpret_cast(kernelDataMemory.get() + offsetInKernelDataMemory); EXPECT_EQ(1u, kernelData->m_CanRunConcurently); size_t expectedOffset = offsetInKernelDataMemory; expectedOffset += alignUp(sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams) * curbeParams.size(), sizeof(void *)); expectedOffset += maxConstantBufferSize + alignUp(samplerHeapSize, sizeof(void *)) + samplerCount * sizeof(IGIL_SamplerParams); EXPECT_EQ(expectedOffset, offset); } TEST_F(ReflectionSurfaceHelperSetKernelDataTest, nullSamplerState) { info.patchInfo.samplerStateArray = nullptr; std::unique_ptr kernelDataMemory(new char[4096]); std::vector curbeParams; uint64_t tokenMask = 1 | 2 | 4; size_t maxConstantBufferSize = 32; size_t samplerCount = 1; size_t samplerHeapSize = alignUp(info.getSamplerStateArraySize(pPlatform->getClDevice(0)->getHardwareInfo()), Sampler::samplerStateArrayAlignment) + info.getBorderColorStateSize(); uint32_t offsetInKernelDataMemory = 0; uint32_t offset = MockKernel::ReflectionSurfaceHelperPublic::setKernelData(kernelDataMemory.get(), offsetInKernelDataMemory, curbeParams, tokenMask, maxConstantBufferSize, samplerCount, info, pPlatform->getClDevice(0)->getHardwareInfo()); IGIL_KernelData *kernelData = reinterpret_cast(kernelDataMemory.get() + offsetInKernelDataMemory); size_t expectedOffset = offsetInKernelDataMemory; expectedOffset += alignUp(sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams) * curbeParams.size(), sizeof(void *)); expectedOffset += maxConstantBufferSize + alignUp(samplerHeapSize, sizeof(void *)) + samplerCount * sizeof(IGIL_SamplerParams); EXPECT_EQ(0u, kernelData->m_numberOfSamplerStates); EXPECT_EQ(0u, kernelData->m_SizeOfSamplerHeap); EXPECT_EQ(expectedOffset, offset); } TEST_F(ReflectionSurfaceHelperSetKernelDataTest, setKernelDataWithDisabledConcurrentExecutionDebugFlag) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.DisableConcurrentBlockExecution.set(true); std::unique_ptr kernelDataMemory(new char[4096]); std::vector curbeParams; uint64_t tokenMask = 0; size_t maxConstantBufferSize = 0; size_t samplerCount = 0; uint32_t offsetInKernelDataMemory = 0; MockKernel::ReflectionSurfaceHelperPublic::setKernelData(kernelDataMemory.get(), offsetInKernelDataMemory, curbeParams, tokenMask, maxConstantBufferSize, samplerCount, info, pPlatform->getClDevice(0)->getHardwareInfo()); IGIL_KernelData *kernelData = reinterpret_cast(kernelDataMemory.get() + offsetInKernelDataMemory); EXPECT_EQ(0u, kernelData->m_CanRunConcurently); } TEST_F(ReflectionSurfaceHelperFixture, setKernelAddressDataWithNullBindingTable) { KernelInfo info; info.patchInfo.bindingTableState = nullptr; std::unique_ptr kernelDataMemory(new char[200]); IGIL_KernelAddressData *kernalAddressData = reinterpret_cast(kernelDataMemory.get()); MockKernel::ReflectionSurfaceHelperPublic::setKernelAddressData(kernelDataMemory.get(), 0, 1, 2, 3, 4, 5, 6, info, pPlatform->getClDevice(0)->getHardwareInfo()); EXPECT_EQ(1u, kernalAddressData->m_KernelDataOffset); EXPECT_EQ(2u, kernalAddressData->m_SamplerHeapOffset); EXPECT_EQ(4u, kernalAddressData->m_SamplerParamsOffset); EXPECT_EQ(3u, kernalAddressData->m_ConstantBufferOffset); EXPECT_EQ(5u, kernalAddressData->m_SSHTokensOffset); EXPECT_EQ(6u, kernalAddressData->m_BTSoffset); EXPECT_EQ(0u, kernalAddressData->m_BTSize); } TEST_F(ReflectionSurfaceHelperFixture, setKernelAddressDataWithSetBindingTable) { KernelInfo info; SPatchBindingTableState bindingTableStateInfo; bindingTableStateInfo.Offset = 0; bindingTableStateInfo.Count = 4; info.patchInfo.bindingTableState = &bindingTableStateInfo; std::unique_ptr kernelDataMemory(new char[200]); IGIL_KernelAddressData *kernalAddressData = reinterpret_cast(kernelDataMemory.get()); MockKernel::ReflectionSurfaceHelperPublic::setKernelAddressData(kernelDataMemory.get(), 0, 1, 2, 3, 4, 5, 6, info, pPlatform->getClDevice(0)->getHardwareInfo()); EXPECT_EQ(1u, kernalAddressData->m_KernelDataOffset); EXPECT_EQ(2u, kernalAddressData->m_SamplerHeapOffset); EXPECT_EQ(4u, kernalAddressData->m_SamplerParamsOffset); EXPECT_EQ(3u, kernalAddressData->m_ConstantBufferOffset); EXPECT_EQ(5u, kernalAddressData->m_SSHTokensOffset); EXPECT_EQ(6u, kernalAddressData->m_BTSoffset); EXPECT_NE(0u, kernalAddressData->m_BTSize); } TEST_F(ReflectionSurfaceHelperFixture, patchBlocksCurbe) { KernelInfo info; info.patchInfo.bindingTableState = nullptr; std::unique_ptr refletionSurfaceMemory(new char[4096]); IGIL_KernelDataHeader *header = reinterpret_cast(refletionSurfaceMemory.get()); header->m_numberOfKernels = 2; IGIL_KernelAddressData *kernalAddressData = header->m_data; uint32_t offset = static_cast(alignUp(sizeof(IGIL_KernelDataHeader) + 2 * sizeof(IGIL_KernelAddressData) + 2 * sizeof(IGIL_KernelData), MemoryConstants::cacheLineSize)); for (uint32_t i = 0; i < 2; i++) { assert(offset < 4000); kernalAddressData[i].m_ConstantBufferOffset = offset; offset += 128; } uint64_t defaultQueueOffset = 8; uint64_t deviceQueueOffset = 16; uint64_t eventPoolOffset = 24; uint64_t printfOffset = 32; uint64_t privateSurfaceOffset = 40; uint64_t deviceQueueAddress = 0x12345678; uint64_t eventPoolAddress = 0x87654321; uint64_t printfAddress = 0x55667788; uint64_t privateSurfaceAddress = 0x19283746; for (uint32_t i = 0; i < 2; i++) { MockKernel::ReflectionSurfaceHelperPublic::patchBlocksCurbe(refletionSurfaceMemory.get(), i, defaultQueueOffset, sizeof(uint64_t), deviceQueueAddress, eventPoolOffset, sizeof(uint64_t), eventPoolAddress, deviceQueueOffset, sizeof(uint64_t), deviceQueueAddress, printfOffset, sizeof(uint64_t), printfAddress, privateSurfaceOffset, sizeof(uint64_t), privateSurfaceAddress); void *pCurbe = ptrOffset(refletionSurfaceMemory.get(), (size_t)(kernalAddressData[i].m_ConstantBufferOffset)); EXPECT_EQ(deviceQueueAddress, *static_cast(ptrOffset(pCurbe, (size_t)defaultQueueOffset))); EXPECT_EQ(eventPoolAddress, *static_cast(ptrOffset(pCurbe, (size_t)eventPoolOffset))); EXPECT_EQ(deviceQueueAddress, *static_cast(ptrOffset(pCurbe, (size_t)deviceQueueOffset))); EXPECT_EQ(printfAddress, *static_cast(ptrOffset(pCurbe, (size_t)printfOffset))); EXPECT_EQ(privateSurfaceAddress, *static_cast(ptrOffset(pCurbe, (size_t)privateSurfaceOffset))); } } TEST_F(ReflectionSurfaceHelperFixture, patchBlocksCurbeWithUndefinedOffsets) { KernelInfo info; info.patchInfo.bindingTableState = nullptr; std::unique_ptr refletionSurfaceMemory(new char[4096]); IGIL_KernelDataHeader *header = reinterpret_cast(refletionSurfaceMemory.get()); header->m_numberOfKernels = 2; IGIL_KernelAddressData *kernalAddressData = header->m_data; uint32_t offset = sizeof(IGIL_KernelDataHeader) + 2 * sizeof(IGIL_KernelAddressData) + 2 * sizeof(IGIL_KernelData); uint8_t pattern[100] = {0}; memset(pattern, 0, 100); memset(ptrOffset(refletionSurfaceMemory.get(), offset), 0, 200); for (uint32_t i = 0; i < 2; i++) { assert(offset < 4000); kernalAddressData[i].m_ConstantBufferOffset = offset; offset += 100; } uint64_t defaultQueueOffset = MockKernel::ReflectionSurfaceHelperPublic::undefinedOffset; uint64_t deviceQueueOffset = MockKernel::ReflectionSurfaceHelperPublic::undefinedOffset; uint64_t eventPoolOffset = MockKernel::ReflectionSurfaceHelperPublic::undefinedOffset; uint64_t printfOffset = MockKernel::ReflectionSurfaceHelperPublic::undefinedOffset; uint64_t privateSurfaceOffset = MockKernel::ReflectionSurfaceHelperPublic::undefinedOffset; uint64_t deviceQueueAddress = 0x12345678; uint64_t eventPoolAddress = 0x87654321; uint64_t printfAddress = 0x55667788; uint64_t privateSurfaceGpuAddress = 0x19283746; uint32_t privateSurfaceSize = 128; for (uint32_t i = 0; i < 2; i++) { MockKernel::ReflectionSurfaceHelperPublic::patchBlocksCurbe(refletionSurfaceMemory.get(), i, defaultQueueOffset, sizeof(uint64_t), deviceQueueAddress, eventPoolOffset, sizeof(uint64_t), eventPoolAddress, deviceQueueOffset, sizeof(uint64_t), deviceQueueAddress, printfOffset, sizeof(uint64_t), printfAddress, privateSurfaceOffset, privateSurfaceSize, privateSurfaceGpuAddress); void *pCurbe = ptrOffset(refletionSurfaceMemory.get(), (size_t)(kernalAddressData[i].m_ConstantBufferOffset)); // constant buffer should be intact EXPECT_EQ(0, memcmp(pattern, pCurbe, 100)); } } TEST_F(ReflectionSurfaceHelperFixture, setParentImageParams) { MockContext context; KernelInfo info; std::vector kernelArguments; std::unique_ptr image2d(ImageHelper::create(&context)); std::unique_ptr image1d(ImageHelper::create(&context)); Kernel::SimpleKernelArgInfo imgInfo; KernelArgInfo argInfo; uint32_t imageID[4] = {32, 64, 0, 0}; // Buffer Object should never be dereferenced by setParentImageParams imgInfo.type = Kernel::kernelArgType::BUFFER_OBJ; imgInfo.object = reinterpret_cast(0x0); kernelArguments.push_back(imgInfo); argInfo.offsetHeap = 0; argInfo.isBuffer = true; info.kernelArgInfo.push_back(std::move(argInfo)); imgInfo.type = Kernel::kernelArgType::IMAGE_OBJ; imgInfo.object = (cl_mem)image2d.get(); kernelArguments.push_back(imgInfo); argInfo = {}; argInfo.offsetHeap = imageID[0]; argInfo.isImage = true; info.kernelArgInfo.push_back(std::move(argInfo)); // Buffer Object should never be dereferenced by setParentImageParams imgInfo.type = Kernel::kernelArgType::BUFFER_OBJ; imgInfo.object = reinterpret_cast(0x0); kernelArguments.push_back(imgInfo); argInfo = {}; argInfo.offsetHeap = 0; argInfo.isBuffer = true; info.kernelArgInfo.push_back(std::move(argInfo)); imgInfo.type = Kernel::kernelArgType::IMAGE_OBJ; imgInfo.object = (cl_mem)image1d.get(); kernelArguments.push_back(imgInfo); argInfo = {}; argInfo.offsetHeap = imageID[1]; argInfo.isImage = true; info.kernelArgInfo.push_back(std::move(argInfo)); std::unique_ptr reflectionSurfaceMemory(new char[4096]); IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurfaceMemory.get()); pKernelHeader->m_ParentImageDataOffset = 16; pKernelHeader->m_ParentKernelImageCount = 2; IGIL_ImageParamters *pImageParameters = reinterpret_cast(ptrOffset(pKernelHeader, (size_t)pKernelHeader->m_ParentImageDataOffset)); MockKernel::ReflectionSurfaceHelperPublic::setParentImageParams(reflectionSurfaceMemory.get(), kernelArguments, info); auto numArgs = kernelArguments.size(); uint32_t imageIndex = 0; for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) { if (kernelArguments[argIndex].type == Kernel::kernelArgType::IMAGE_OBJ) { const Image *image = const_cast(castToObject((cl_mem)kernelArguments[argIndex].object)); EXPECT_EQ(image->getImageDesc().image_array_size, pImageParameters->m_ArraySize); EXPECT_EQ(image->getImageDesc().image_depth, pImageParameters->m_Depth); EXPECT_EQ(image->getImageDesc().image_height, pImageParameters->m_Height); EXPECT_EQ(image->getImageDesc().image_width, pImageParameters->m_Width); EXPECT_EQ(image->getImageDesc().num_mip_levels, pImageParameters->m_NumMipLevels); EXPECT_EQ(image->getImageDesc().num_samples, pImageParameters->m_NumSamples); EXPECT_EQ(image->getImageFormat().image_channel_data_type, pImageParameters->m_ChannelDataType); EXPECT_EQ(image->getImageFormat().image_channel_data_type, pImageParameters->m_ChannelOrder); EXPECT_EQ(imageID[imageIndex], pImageParameters->m_ObjectID); pImageParameters++; imageIndex++; } } } TEST_F(ReflectionSurfaceHelperFixture, setParentSamplerParams) { MockContext context; KernelInfo info; std::vector kernelArguments; std::unique_ptr sampler1(new MockSampler(&context, true, (cl_addressing_mode)CL_ADDRESS_CLAMP_TO_EDGE, (cl_filter_mode)CL_FILTER_LINEAR)); std::unique_ptr sampler2(new MockSampler(&context, false, (cl_addressing_mode)CL_ADDRESS_CLAMP, (cl_filter_mode)CL_FILTER_NEAREST)); Kernel::SimpleKernelArgInfo imgInfo; KernelArgInfo argInfo; uint32_t samplerID[4] = {32, 64, 0, 0}; // Buffer Object should never be dereferenced by setParentImageParams imgInfo.type = Kernel::kernelArgType::BUFFER_OBJ; imgInfo.object = reinterpret_cast(0x0); kernelArguments.push_back(std::move(imgInfo)); argInfo.offsetHeap = 0; argInfo.isBuffer = true; info.kernelArgInfo.push_back(std::move(argInfo)); imgInfo = {}; imgInfo.type = Kernel::kernelArgType::SAMPLER_OBJ; imgInfo.object = (cl_sampler)sampler1.get(); kernelArguments.push_back(std::move(imgInfo)); argInfo = {}; argInfo.offsetHeap = samplerID[0]; argInfo.isSampler = true; info.kernelArgInfo.push_back(std::move(argInfo)); // Buffer Object should never be dereferenced by setParentImageParams imgInfo = {}; imgInfo.type = Kernel::kernelArgType::BUFFER_OBJ; imgInfo.object = reinterpret_cast(0x0); kernelArguments.push_back(std::move(imgInfo)); argInfo = {}; argInfo.offsetHeap = 0; argInfo.isBuffer = true; info.kernelArgInfo.push_back(std::move(argInfo)); imgInfo = {}; imgInfo.type = Kernel::kernelArgType::SAMPLER_OBJ; imgInfo.object = (cl_sampler)sampler2.get(); kernelArguments.push_back(std::move(imgInfo)); argInfo = {}; argInfo.offsetHeap = samplerID[1]; argInfo.isSampler = true; info.kernelArgInfo.push_back(std::move(argInfo)); std::unique_ptr reflectionSurfaceMemory(new char[4096]); IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurfaceMemory.get()); pKernelHeader->m_ParentSamplerParamsOffset = 16; pKernelHeader->m_ParentSamplerCount = 2; IGIL_ParentSamplerParams *pParentSamplerParams = reinterpret_cast(ptrOffset(pKernelHeader, (size_t)pKernelHeader->m_ParentSamplerParamsOffset)); MockKernel::ReflectionSurfaceHelperPublic::setParentSamplerParams(reflectionSurfaceMemory.get(), kernelArguments, info); auto numArgs = kernelArguments.size(); uint32_t samplerIndex = 0; for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) { if (kernelArguments[argIndex].type == Kernel::kernelArgType::SAMPLER_OBJ) { MockSampler *sampler = reinterpret_cast(castToObject((cl_sampler)kernelArguments[argIndex].object)); EXPECT_EQ((uint32_t)sampler->getAddressingMode(), pParentSamplerParams->m_AddressingMode); EXPECT_EQ((uint32_t)sampler->getNormalizedCoordinates(), pParentSamplerParams->NormalizedCoords); EXPECT_EQ(sampler->getSnapWaValue(), pParentSamplerParams->CoordinateSnapRequired); EXPECT_EQ(OCLRT_ARG_OFFSET_TO_SAMPLER_OBJECT_ID(samplerID[samplerIndex]), pParentSamplerParams->m_ObjectID); pParentSamplerParams++; samplerIndex++; } } } TEST_F(ReflectionSurfaceHelperFixture, PatchBlocksCurbeWithConstantValuesWithDefinedOffsetsPatchesCurbeOnReflectionSurface) { IGIL_KernelDataHeader reflectionSurface[10]; IGIL_KernelDataHeader referenceHeader = {0}; memset(reflectionSurface, 0, sizeof(IGIL_KernelDataHeader) * 10); const uint32_t constBufferOffset = (uint32_t)alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) + sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams), sizeof(uint64_t)); reflectionSurface[0].m_numberOfKernels = 1; reflectionSurface[0].m_data[0].m_ConstantBufferOffset = constBufferOffset; reflectionSurface[0].m_data[0].m_KernelDataOffset = sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData); referenceHeader = reflectionSurface[0]; uint64_t inititalOffset = 8; uint64_t offset = inititalOffset; uint64_t globalMemoryCurbeOffset = offset; uint32_t globalMemoryPatchSize = 8; uint64_t globalMemoryGpuAddress = 0x12345678; offset += 8; uint64_t constantMemoryCurbeOffset = offset; uint32_t constantMemoryPatchSize = 8; uint64_t constantMemoryGpuAddress = 0x87654321; offset += 8; uint64_t privateMemoryCurbeOffset = offset; uint32_t privateMemoryPatchSize = 8; uint64_t privateMemoryGpuAddress = 0x22446688; MockKernel::ReflectionSurfaceHelperPublic::patchBlocksCurbeWithConstantValues((void *)reflectionSurface, 0, globalMemoryCurbeOffset, globalMemoryPatchSize, globalMemoryGpuAddress, constantMemoryCurbeOffset, constantMemoryPatchSize, constantMemoryGpuAddress, privateMemoryCurbeOffset, privateMemoryPatchSize, privateMemoryGpuAddress); uint64_t *patchedValues = reinterpret_cast(reinterpret_cast(reflectionSurface) + constBufferOffset + inititalOffset); EXPECT_EQ(patchedValues[0], globalMemoryGpuAddress); EXPECT_EQ(patchedValues[1], constantMemoryGpuAddress); EXPECT_EQ(patchedValues[2], privateMemoryGpuAddress); EXPECT_THAT(&referenceHeader, MemCompare(&reflectionSurface[0], sizeof(IGIL_KernelDataHeader))); IGIL_KernelData *kernelData = (IGIL_KernelData *)ptrOffset((char *)&reflectionSurface[0], sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData)); IGIL_KernelData referenceKerneldData = {0}; EXPECT_THAT(&referenceKerneldData, MemCompare(kernelData, sizeof(IGIL_KernelData))); } TEST_F(ReflectionSurfaceHelperFixture, PatchBlocksCurbeWithConstantValuesWithUndefinedOffsetsDoesNotPatchCurbeOnReflectionSurface) { IGIL_KernelDataHeader reflectionSurface[10]; IGIL_KernelDataHeader referenceHeader = {0}; memset(reflectionSurface, 0, sizeof(IGIL_KernelDataHeader) * 10); const uint32_t constBufferOffset = (uint32_t)alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) + sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams), sizeof(uint64_t)); reflectionSurface[0].m_numberOfKernels = 1; reflectionSurface[0].m_data[0].m_ConstantBufferOffset = constBufferOffset; reflectionSurface[0].m_data[0].m_KernelDataOffset = sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData); referenceHeader = reflectionSurface[0]; uint64_t offset = MockKernel::ReflectionSurfaceHelperPublic::undefinedOffset; uint64_t globalMemoryCurbeOffset = offset; uint32_t globalMemoryPatchSize = 8; uint64_t globalMemoryGpuAddress = 0x12345678; uint64_t constantMemoryCurbeOffset = offset; uint32_t constantMemoryPatchSize = 8; uint64_t constantMemoryGpuAddress = 0x87654321; uint64_t privateMemoryCurbeOffset = offset; uint32_t privateMemoryPatchSize = 8; uint64_t privateMemoryGpuAddress = 0x22446688; MockKernel::ReflectionSurfaceHelperPublic::patchBlocksCurbeWithConstantValues((void *)reflectionSurface, 0, globalMemoryCurbeOffset, globalMemoryPatchSize, globalMemoryGpuAddress, constantMemoryCurbeOffset, constantMemoryPatchSize, constantMemoryGpuAddress, privateMemoryCurbeOffset, privateMemoryPatchSize, privateMemoryGpuAddress); uint64_t *patchedValues = reinterpret_cast(reinterpret_cast(reflectionSurface) + constBufferOffset); std::unique_ptr reference = std::unique_ptr(new char[10 * sizeof(IGIL_KernelDataHeader)]); memset(reference.get(), 0, 10 * sizeof(IGIL_KernelDataHeader)); EXPECT_THAT(patchedValues, MemCompare(reference.get(), 10 * sizeof(IGIL_KernelDataHeader) - constBufferOffset)); } typedef ParentKernelCommandQueueFixture ReflectionSurfaceTestForPrintfHandler; TEST_F(ReflectionSurfaceTestForPrintfHandler, PatchReflectionSurfacePatchesPrintfBufferWhenPrintfHandlerIsPassed) { MockContext context(device); cl_queue_properties properties[3] = {0}; MockParentKernel *parentKernel = MockParentKernel::create(context); DeviceQueue devQueue(&context, device, properties[0]); parentKernel->createReflectionSurface(); context.setDefaultDeviceQueue(&devQueue); MockMultiDispatchInfo multiDispatchInfo(parentKernel); PrintfHandler *printfHandler = PrintfHandler::create(multiDispatchInfo, *device); printfHandler->prepareDispatch(multiDispatchInfo); parentKernel->patchReflectionSurface(&devQueue, printfHandler); uint64_t printfBufferOffset = parentKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(0)->patchInfo.pAllocateStatelessPrintfSurface->DataParamOffset; uint64_t printfBufferPatchSize = parentKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(0)->patchInfo.pAllocateStatelessPrintfSurface->DataParamSize; EXPECT_EQ(printfBufferOffset, MockKernel::ReflectionSurfaceHelperPublic::printfBuffer.offset); EXPECT_EQ(printfHandler->getSurface()->getGpuAddress(), MockKernel::ReflectionSurfaceHelperPublic::printfBuffer.address); EXPECT_EQ(printfBufferPatchSize, MockKernel::ReflectionSurfaceHelperPublic::printfBuffer.size); delete printfHandler; delete parentKernel; } TEST_F(ReflectionSurfaceTestForPrintfHandler, PatchReflectionSurfaceDoesNotPatchPrintfBufferWhenPrintfSurfaceIsNotCreated) { MockContext context(device); cl_queue_properties properties[3] = {0}; MockParentKernel *parentKernel = MockParentKernel::create(context); DeviceQueue devQueue(&context, device, properties[0]); parentKernel->createReflectionSurface(); context.setDefaultDeviceQueue(&devQueue); MockMultiDispatchInfo multiDispatchInfo(parentKernel); PrintfHandler *printfHandler = PrintfHandler::create(multiDispatchInfo, *device); parentKernel->patchReflectionSurface(&devQueue, printfHandler); uint64_t printfBufferOffset = parentKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(0)->patchInfo.pAllocateStatelessPrintfSurface->DataParamOffset; uint64_t printfBufferPatchSize = parentKernel->getProgram()->getBlockKernelManager()->getBlockKernelInfo(0)->patchInfo.pAllocateStatelessPrintfSurface->DataParamSize; EXPECT_EQ(printfBufferOffset, MockKernel::ReflectionSurfaceHelperPublic::printfBuffer.offset); EXPECT_EQ(0u, MockKernel::ReflectionSurfaceHelperPublic::printfBuffer.address); EXPECT_EQ(printfBufferPatchSize, MockKernel::ReflectionSurfaceHelperPublic::printfBuffer.size); delete printfHandler; delete parentKernel; } class ReflectionSurfaceConstantValuesPatchingTest : public DeviceFixture, public ::testing::Test { public: void SetUp() override { DeviceFixture::SetUp(); } void TearDown() override { DeviceFixture::TearDown(); } }; TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithGlobalMemoryWhenReflectionSurfaceIsPatchedWithConstantValuesThenProgramGlobalMemoryAddressIsPatched) { MockContext context(pClDevice); MockParentKernel *parentKernel = MockParentKernel::create(context, false, true, false); // graphicsMemory is released by Program GraphicsAllocation *globalMemory = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); parentKernel->mockProgram->setGlobalSurface(globalMemory); // Allocte reflectionSurface, 2 * 4096 should be enough GraphicsAllocation *reflectionSurface = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), 2 * MemoryConstants::pageSize}); parentKernel->setReflectionSurface(reflectionSurface); memset(reflectionSurface->getUnderlyingBuffer(), 0, reflectionSurface->getUnderlyingBufferSize()); const uint32_t constBufferOffset = (uint32_t)alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) + sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams), sizeof(uint64_t)); IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface->getUnderlyingBuffer()); pKernelHeader->m_numberOfKernels = 1; pKernelHeader->m_data[0].m_ConstantBufferOffset = constBufferOffset; parentKernel->patchBlocksCurbeWithConstantValues(); auto *blockInfo = parentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0); uint32_t blockPatchOffset = blockInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->DataParamOffset; uint64_t *pCurbe = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset + blockPatchOffset); EXPECT_EQ(globalMemory->getGpuAddressToPatch(), *pCurbe); delete parentKernel; } TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithGlobalMemoryAndProgramWithoutGlobalMemortWhenReflectionSurfaceIsPatchedWithConstantValuesThenZeroAddressIsPatched) { MockContext context(pClDevice); MockParentKernel *parentKernel = MockParentKernel::create(context, false, true, false); if (parentKernel->mockProgram->getGlobalSurface()) { pDevice->getMemoryManager()->freeGraphicsMemory(parentKernel->mockProgram->getGlobalSurface()); parentKernel->mockProgram->setGlobalSurface(nullptr); } // Allocte reflectionSurface, 2 * 4096 should be enough GraphicsAllocation *reflectionSurface = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), 2 * MemoryConstants::pageSize}); parentKernel->setReflectionSurface(reflectionSurface); memset(reflectionSurface->getUnderlyingBuffer(), 0, reflectionSurface->getUnderlyingBufferSize()); const uint32_t constBufferOffset = (uint32_t)alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) + sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams), sizeof(uint64_t)); IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface->getUnderlyingBuffer()); pKernelHeader->m_numberOfKernels = 1; pKernelHeader->m_data[0].m_ConstantBufferOffset = constBufferOffset; parentKernel->patchBlocksCurbeWithConstantValues(); auto *blockInfo = parentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0); uint32_t blockPatchOffset = blockInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->DataParamOffset; uint64_t *pCurbe = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset + blockPatchOffset); EXPECT_EQ(0u, *pCurbe); delete parentKernel; } TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithConstantMemoryWhenReflectionSurfaceIsPatchedWithConstantValuesThenProgramConstantMemoryAddressIsPatched) { MockContext context(pClDevice); MockParentKernel *parentKernel = MockParentKernel::create(context, false, false, true); // graphicsMemory is released by Program GraphicsAllocation *constantMemory = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); parentKernel->mockProgram->setConstantSurface(constantMemory); // Allocte reflectionSurface, 2 * 4096 should be enough GraphicsAllocation *reflectionSurface = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), 2 * MemoryConstants::pageSize}); parentKernel->setReflectionSurface(reflectionSurface); memset(reflectionSurface->getUnderlyingBuffer(), 0, reflectionSurface->getUnderlyingBufferSize()); const uint32_t constBufferOffset = (uint32_t)alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) + sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams), sizeof(uint64_t)); IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface->getUnderlyingBuffer()); pKernelHeader->m_numberOfKernels = 1; pKernelHeader->m_data[0].m_ConstantBufferOffset = constBufferOffset; parentKernel->patchBlocksCurbeWithConstantValues(); auto *blockInfo = parentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0); uint32_t blockPatchOffset = blockInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->DataParamOffset; uint64_t *pCurbe = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset); uint64_t *pCurbeToPatch = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset + blockPatchOffset); EXPECT_EQ(constantMemory->getGpuAddressToPatch(), *pCurbeToPatch); std::unique_ptr zeroMemory = std::unique_ptr(new char[4096]); memset(zeroMemory.get(), 0, 4096); // memory before is not written EXPECT_THAT(zeroMemory.get(), MemCompare(pCurbe, std::min(4096u, blockPatchOffset))); //memory after is not written EXPECT_THAT(zeroMemory.get(), MemCompare(pCurbeToPatch + 1, std::min(4096u, 8192u - constBufferOffset - blockPatchOffset - (uint32_t)sizeof(uint64_t)))); delete parentKernel; } TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithConstantMemoryAndProgramWithoutConstantMemortWhenReflectionSurfaceIsPatchedWithConstantValuesThenZeroAddressIsPatched) { MockContext context(pClDevice); MockParentKernel *parentKernel = MockParentKernel::create(context, false, false, true); if (parentKernel->mockProgram->getConstantSurface()) { pDevice->getMemoryManager()->freeGraphicsMemory(parentKernel->mockProgram->getConstantSurface()); parentKernel->mockProgram->setConstantSurface(nullptr); } // Allocte reflectionSurface, 2 * 4096 should be enough GraphicsAllocation *reflectionSurface = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), 2 * MemoryConstants::pageSize}); parentKernel->setReflectionSurface(reflectionSurface); memset(reflectionSurface->getUnderlyingBuffer(), 0, reflectionSurface->getUnderlyingBufferSize()); const uint32_t constBufferOffset = (uint32_t)alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) + sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams), sizeof(uint64_t)); IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface->getUnderlyingBuffer()); pKernelHeader->m_numberOfKernels = 1; pKernelHeader->m_data[0].m_ConstantBufferOffset = constBufferOffset; parentKernel->patchBlocksCurbeWithConstantValues(); auto *blockInfo = parentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0); uint32_t blockPatchOffset = blockInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->DataParamOffset; uint64_t *pCurbe = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset); uint64_t *pCurbeToPatch = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset + blockPatchOffset); EXPECT_EQ(0u, *pCurbeToPatch); std::unique_ptr zeroMemory = std::unique_ptr(new char[4096]); memset(zeroMemory.get(), 0, 4096); // memory before is not written EXPECT_THAT(zeroMemory.get(), MemCompare(pCurbe, std::min(4096u, blockPatchOffset))); //memory after is not written EXPECT_THAT(zeroMemory.get(), MemCompare(pCurbeToPatch + 1, std::min(4096u, 8192u - constBufferOffset - blockPatchOffset - (uint32_t)sizeof(uint64_t)))); delete parentKernel; } using KernelReflectionMultiDeviceTest = MultiRootDeviceFixture; TEST_F(KernelReflectionMultiDeviceTest, ObtainKernelReflectionSurfaceWithoutKernelArgs) { MockProgram program(*device->getExecutionEnvironment()); KernelInfo *blockInfo = new KernelInfo; KernelInfo &info = *blockInfo; cl_queue_properties properties[1] = {0}; DeviceQueue devQueue(context.get(), device.get(), properties[0]); SPatchExecutionEnvironment environment = {}; environment.HasDeviceEnqueue = 1; info.patchInfo.executionEnvironment = &environment; SKernelBinaryHeaderCommon kernelHeader; info.heapInfo.pKernelHeader = &kernelHeader; SPatchDataParameterStream dataParameterStream; dataParameterStream.Size = 0; dataParameterStream.DataParameterStreamSize = 0; info.patchInfo.dataParameterStream = &dataParameterStream; SPatchBindingTableState bindingTableState; bindingTableState.Count = 0; bindingTableState.Offset = 0; bindingTableState.Size = 0; bindingTableState.SurfaceStateOffset = 0; info.patchInfo.bindingTableState = &bindingTableState; MockKernel kernel(&program, info, *device.get()); EXPECT_TRUE(kernel.isParentKernel); program.blockKernelManager->addBlockKernelInfo(blockInfo); kernel.createReflectionSurface(); auto reflectionSurface = kernel.getKernelReflectionSurface(); ASSERT_NE(nullptr, reflectionSurface); EXPECT_EQ(expectedRootDeviceIndex, reflectionSurface->getRootDeviceIndex()); kernel.patchReflectionSurface(&devQueue, nullptr); uint64_t undefinedOffset = MockKernel::ReflectionSurfaceHelperPublic::undefinedOffset; EXPECT_EQ(undefinedOffset, MockKernel::ReflectionSurfaceHelperPublic::defaultQueue.offset); EXPECT_EQ(undefinedOffset, MockKernel::ReflectionSurfaceHelperPublic::devQueue.offset); EXPECT_EQ(undefinedOffset, MockKernel::ReflectionSurfaceHelperPublic::eventPool.offset); EXPECT_EQ(0u, MockKernel::ReflectionSurfaceHelperPublic::defaultQueue.size); EXPECT_EQ(0u, MockKernel::ReflectionSurfaceHelperPublic::devQueue.size); EXPECT_EQ(0u, MockKernel::ReflectionSurfaceHelperPublic::eventPool.size); } TEST_F(KernelReflectionMultiDeviceTest, ObtainKernelReflectionSurfaceWithDeviceQueueKernelArg) { MockProgram program(*device->getExecutionEnvironment()); KernelInfo *blockInfo = new KernelInfo; KernelInfo &info = *blockInfo; cl_queue_properties properties[1] = {0}; DeviceQueue devQueue(context.get(), device.get(), properties[0]); uint32_t devQueueCurbeOffset = 16; uint32_t devQueueCurbeSize = 4; SPatchExecutionEnvironment environment = {}; environment.HasDeviceEnqueue = 1; info.patchInfo.executionEnvironment = &environment; SKernelBinaryHeaderCommon kernelHeader; info.heapInfo.pKernelHeader = &kernelHeader; SPatchDataParameterStream dataParameterStream; dataParameterStream.Size = 0; dataParameterStream.DataParameterStreamSize = 0; info.patchInfo.dataParameterStream = &dataParameterStream; SPatchBindingTableState bindingTableState; bindingTableState.Count = 0; bindingTableState.Offset = 0; bindingTableState.Size = 0; bindingTableState.SurfaceStateOffset = 0; info.patchInfo.bindingTableState = &bindingTableState; KernelArgInfo argInfo; argInfo.isDeviceQueue = true; info.kernelArgInfo.resize(1); info.kernelArgInfo[0] = std::move(argInfo); info.kernelArgInfo[0].kernelArgPatchInfoVector.resize(1); info.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = devQueueCurbeOffset; info.kernelArgInfo[0].kernelArgPatchInfoVector[0].size = devQueueCurbeSize; MockKernel kernel(&program, info, *device.get()); EXPECT_TRUE(kernel.isParentKernel); program.blockKernelManager->addBlockKernelInfo(blockInfo); kernel.createReflectionSurface(); auto reflectionSurface = kernel.getKernelReflectionSurface(); ASSERT_NE(nullptr, reflectionSurface); EXPECT_EQ(expectedRootDeviceIndex, reflectionSurface->getRootDeviceIndex()); kernel.patchReflectionSurface(&devQueue, nullptr); uint64_t undefinedOffset = MockKernel::ReflectionSurfaceHelperPublic::undefinedOffset; EXPECT_EQ(undefinedOffset, MockKernel::ReflectionSurfaceHelperPublic::defaultQueue.offset); EXPECT_EQ(devQueueCurbeOffset, MockKernel::ReflectionSurfaceHelperPublic::devQueue.offset); EXPECT_EQ(undefinedOffset, MockKernel::ReflectionSurfaceHelperPublic::eventPool.offset); EXPECT_EQ(0u, MockKernel::ReflectionSurfaceHelperPublic::defaultQueue.size); EXPECT_EQ(4u, MockKernel::ReflectionSurfaceHelperPublic::devQueue.size); EXPECT_EQ(0u, MockKernel::ReflectionSurfaceHelperPublic::eventPool.size); } compute-runtime-20.13.16352/opencl/test/unit_test/kernel/kernel_slm_arg_tests.cpp000066400000000000000000000071431363734646600300310ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/ptr_math.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" #include "gtest/gtest.h" using namespace NEO; class KernelSlmArgTest : public Test { protected: void SetUp() override { DeviceFixture::SetUp(); pKernelInfo = std::make_unique(); KernelArgPatchInfo kernelArgPatchInfo; pKernelInfo->kernelArgInfo.resize(3); pKernelInfo->kernelArgInfo[2].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[1].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = 0x10; pKernelInfo->kernelArgInfo[0].slmAlignment = 0x1; pKernelInfo->kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset = 0x20; pKernelInfo->kernelArgInfo[1].kernelArgPatchInfoVector[0].size = sizeof(void *); pKernelInfo->kernelArgInfo[2].kernelArgPatchInfoVector[0].crossthreadOffset = 0x30; pKernelInfo->kernelArgInfo[2].slmAlignment = 0x400; pKernelInfo->workloadInfo.slmStaticSize = 3 * KB; program = std::make_unique(*pDevice->getExecutionEnvironment()); pKernel = new MockKernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setKernelArgHandler(0, &Kernel::setArgLocal); pKernel->setKernelArgHandler(1, &Kernel::setArgImmediate); pKernel->setKernelArgHandler(2, &Kernel::setArgLocal); uint32_t crossThreadData[0x40] = {}; crossThreadData[0x20 / sizeof(uint32_t)] = 0x12344321; pKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData)); } void TearDown() override { delete pKernel; DeviceFixture::TearDown(); } cl_int retVal = CL_SUCCESS; std::unique_ptr program; MockKernel *pKernel = nullptr; std::unique_ptr pKernelInfo; static const size_t slmSize0 = 0x200; static const size_t slmSize2 = 0x30; }; TEST_F(KernelSlmArgTest, settingSizeUpdatesAlignmentOfHigherSlmArgs) { pKernel->setArg(0, slmSize0, nullptr); pKernel->setArg(2, slmSize2, nullptr); auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData()); auto slmOffset = ptrOffset(crossThreadData, 0x10); EXPECT_EQ(0u, *slmOffset); slmOffset = ptrOffset(crossThreadData, 0x20); EXPECT_EQ(0x12344321u, *slmOffset); slmOffset = ptrOffset(crossThreadData, 0x30); EXPECT_EQ(0x400u, *slmOffset); EXPECT_EQ(5 * KB, pKernel->slmTotalSize); } TEST_F(KernelSlmArgTest, settingSizeUpdatesAlignmentOfHigherSlmArgsReverseOrder) { pKernel->setArg(2, slmSize2, nullptr); pKernel->setArg(0, slmSize0, nullptr); auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData()); auto slmOffset = ptrOffset(crossThreadData, 0x10); EXPECT_EQ(0u, *slmOffset); slmOffset = ptrOffset(crossThreadData, 0x20); EXPECT_EQ(0x12344321u, *slmOffset); slmOffset = ptrOffset(crossThreadData, 0x30); EXPECT_EQ(0x400u, *slmOffset); EXPECT_EQ(5 * KB, pKernel->slmTotalSize); } compute-runtime-20.13.16352/opencl/test/unit_test/kernel/kernel_slm_tests.cpp000066400000000000000000000126471363734646600272050ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/program/kernel_info.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" using namespace NEO; struct KernelSLMAndBarrierTest : public DeviceFixture, public ::testing::TestWithParam { void SetUp() override { DeviceFixture::SetUp(); program = std::make_unique(*pDevice->getExecutionEnvironment()); memset(&kernelHeader, 0, sizeof(kernelHeader)); kernelHeader.KernelHeapSize = sizeof(kernelIsa); memset(&dataParameterStream, 0, sizeof(dataParameterStream)); dataParameterStream.DataParameterStreamSize = sizeof(crossThreadData); executionEnvironment = {}; memset(&executionEnvironment, 0, sizeof(executionEnvironment)); executionEnvironment.CompiledSIMD32 = 1; executionEnvironment.LargestCompiledSIMDSize = 32; memset(&threadPayload, 0, sizeof(threadPayload)); threadPayload.LocalIDXPresent = 1; threadPayload.LocalIDYPresent = 1; threadPayload.LocalIDZPresent = 1; kernelInfo.heapInfo.pKernelHeap = kernelIsa; kernelInfo.heapInfo.pKernelHeader = &kernelHeader; kernelInfo.patchInfo.dataParameterStream = &dataParameterStream; kernelInfo.patchInfo.executionEnvironment = &executionEnvironment; kernelInfo.patchInfo.threadPayload = &threadPayload; } void TearDown() override { DeviceFixture::TearDown(); } uint32_t simd; uint32_t numChannels; std::unique_ptr program; SKernelBinaryHeaderCommon kernelHeader; SPatchDataParameterStream dataParameterStream; SPatchExecutionEnvironment executionEnvironment; SPatchThreadPayload threadPayload; KernelInfo kernelInfo; uint32_t kernelIsa[32]; uint32_t crossThreadData[32]; uint32_t perThreadData[8]; }; static uint32_t slmSizeInKb[] = {1, 4, 8, 16, 32, 64}; HWCMDTEST_P(IGFX_GEN8_CORE, KernelSLMAndBarrierTest, test_SLMProgramming) { ASSERT_NE(nullptr, pClDevice); CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); typedef typename FamilyType::INTERFACE_DESCRIPTOR_DATA INTERFACE_DESCRIPTOR_DATA; // define kernel info executionEnvironment.HasBarriers = 1; kernelInfo.workloadInfo.slmStaticSize = GetParam() * KB; MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); // After creating Mock Kernel now create Indirect Heap auto &indirectHeap = cmdQ.getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 8192); uint64_t interfaceDescriptorOffset = indirectHeap.getUsed(); size_t offsetInterfaceDescriptorData = HardwareCommandsHelper::sendInterfaceDescriptorData( indirectHeap, interfaceDescriptorOffset, 0, sizeof(crossThreadData), sizeof(perThreadData), 0, 0, 0, 1, kernel, 4u, pDevice->getPreemptionMode(), nullptr); // add the heap base + offset uint32_t *pIdData = (uint32_t *)indirectHeap.getCpuBase() + offsetInterfaceDescriptorData; INTERFACE_DESCRIPTOR_DATA *pSrcIDData = (INTERFACE_DESCRIPTOR_DATA *)pIdData; uint32_t ExpectedSLMSize = 0; if (::renderCoreFamily == IGFX_GEN8_CORE) { if (kernelInfo.workloadInfo.slmStaticSize <= (4 * 1024)) { ExpectedSLMSize = 1; } else if (kernelInfo.workloadInfo.slmStaticSize <= (8 * 1024)) { ExpectedSLMSize = 2; } else if (kernelInfo.workloadInfo.slmStaticSize <= (16 * 1024)) { ExpectedSLMSize = 4; } else if (kernelInfo.workloadInfo.slmStaticSize <= (32 * 1024)) { ExpectedSLMSize = 8; } else if (kernelInfo.workloadInfo.slmStaticSize <= (64 * 1024)) { ExpectedSLMSize = 16; } } else { if (kernelInfo.workloadInfo.slmStaticSize <= (1 * 1024)) // its a power of "2" +1 for example 1 is 2^0 ( 0+1); 2 is 2^1 is (1+1) etc. { ExpectedSLMSize = 1; } else if (kernelInfo.workloadInfo.slmStaticSize <= (2 * 1024)) { ExpectedSLMSize = 2; } else if (kernelInfo.workloadInfo.slmStaticSize <= (4 * 1024)) { ExpectedSLMSize = 3; } else if (kernelInfo.workloadInfo.slmStaticSize <= (8 * 1024)) { ExpectedSLMSize = 4; } else if (kernelInfo.workloadInfo.slmStaticSize <= (16 * 1024)) { ExpectedSLMSize = 5; } else if (kernelInfo.workloadInfo.slmStaticSize <= (32 * 1024)) { ExpectedSLMSize = 6; } else if (kernelInfo.workloadInfo.slmStaticSize <= (64 * 1024)) { ExpectedSLMSize = 7; } } ASSERT_GT(ExpectedSLMSize, 0u); EXPECT_EQ(ExpectedSLMSize, pSrcIDData->getSharedLocalMemorySize()); EXPECT_EQ(!!executionEnvironment.HasBarriers, pSrcIDData->getBarrierEnable()); EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL, pSrcIDData->getDenormMode()); EXPECT_EQ(4u, pSrcIDData->getBindingTableEntryCount()); } INSTANTIATE_TEST_CASE_P( SlmSizes, KernelSLMAndBarrierTest, testing::ValuesIn(slmSizeInKb)); compute-runtime-20.13.16352/opencl/test/unit_test/kernel/kernel_tests.cpp000066400000000000000000004205271363734646600263320ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/page_fault_manager/mock_cpu_page_fault_manager.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" #include "opencl/test/unit_test/fixtures/execution_model_fixture.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/program/program_from_binary.h" #include "opencl/test/unit_test/program/program_tests.h" #include "test.h" #include using namespace NEO; using namespace DeviceHostQueue; class KernelTest : public ProgramFromBinaryTest { public: ~KernelTest() override = default; protected: void SetUp() override { ProgramFromBinaryTest::SetUp(); ASSERT_NE(nullptr, pProgram); ASSERT_EQ(CL_SUCCESS, retVal); cl_device_id device = pClDevice; retVal = pProgram->build( 1, &device, nullptr, nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); // create a kernel pKernel = Kernel::create( pProgram, *pProgram->getKernelInfo(KernelName), &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pKernel); } void TearDown() override { delete pKernel; pKernel = nullptr; knownSource.reset(); ProgramFromBinaryTest::TearDown(); } Kernel *pKernel = nullptr; cl_int retVal = CL_SUCCESS; }; TEST(KernelTest, isMemObj) { EXPECT_TRUE(Kernel::isMemObj(Kernel::BUFFER_OBJ)); EXPECT_TRUE(Kernel::isMemObj(Kernel::IMAGE_OBJ)); EXPECT_TRUE(Kernel::isMemObj(Kernel::PIPE_OBJ)); EXPECT_FALSE(Kernel::isMemObj(Kernel::SAMPLER_OBJ)); EXPECT_FALSE(Kernel::isMemObj(Kernel::ACCELERATOR_OBJ)); EXPECT_FALSE(Kernel::isMemObj(Kernel::NONE_OBJ)); EXPECT_FALSE(Kernel::isMemObj(Kernel::SVM_ALLOC_OBJ)); } TEST_P(KernelTest, getKernelHeap) { EXPECT_EQ(pKernel->getKernelInfo().heapInfo.pKernelHeap, pKernel->getKernelHeap()); EXPECT_EQ(pKernel->getKernelInfo().heapInfo.pKernelHeader->KernelHeapSize, pKernel->getKernelHeapSize()); } TEST_P(KernelTest, GetInfo_InvalidParamName) { size_t paramValueSizeRet = 0; // get size retVal = pKernel->getInfo( 0, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_P(KernelTest, GetInfo_Name) { cl_kernel_info paramName = CL_KERNEL_FUNCTION_NAME; size_t paramValueSize = 0; char *paramValue = nullptr; size_t paramValueSizeRet = 0; // get size retVal = pKernel->getInfo( paramName, paramValueSize, nullptr, ¶mValueSizeRet); EXPECT_NE(0u, paramValueSizeRet); ASSERT_EQ(CL_SUCCESS, retVal); // allocate space for name paramValue = new char[paramValueSizeRet]; // get the name paramValueSize = paramValueSizeRet; retVal = pKernel->getInfo( paramName, paramValueSize, paramValue, nullptr); EXPECT_NE(nullptr, paramValue); EXPECT_EQ(0, strcmp(paramValue, KernelName)); EXPECT_EQ(CL_SUCCESS, retVal); delete[] paramValue; } TEST_P(KernelTest, GetInfo_BinaryProgramIntel) { cl_kernel_info paramName = CL_KERNEL_BINARY_PROGRAM_INTEL; size_t paramValueSize = 0; char *paramValue = nullptr; size_t paramValueSizeRet = 0; const char *pKernelData = reinterpret_cast(pKernel->getKernelHeap()); EXPECT_NE(nullptr, pKernelData); // get size of kernel binary retVal = pKernel->getInfo( paramName, paramValueSize, nullptr, ¶mValueSizeRet); EXPECT_NE(0u, paramValueSizeRet); ASSERT_EQ(CL_SUCCESS, retVal); // allocate space for kernel binary paramValue = new char[paramValueSizeRet]; // get kernel binary paramValueSize = paramValueSizeRet; retVal = pKernel->getInfo( paramName, paramValueSize, paramValue, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, paramValue); EXPECT_EQ(0, memcmp(paramValue, pKernelData, paramValueSize)); delete[] paramValue; } TEST_P(KernelTest, givenBinaryWhenItIsQueriedForGpuAddressThenAbsoluteAddressIsReturned) { cl_kernel_info paramName = CL_KERNEL_BINARY_GPU_ADDRESS_INTEL; uint64_t paramValue = 0llu; size_t paramValueSize = sizeof(paramValue); size_t paramValueSizeRet = 0; retVal = pKernel->getInfo( paramName, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); auto expectedGpuAddress = GmmHelper::decanonize(pKernel->getKernelInfo().kernelAllocation->getGpuAddress()); EXPECT_EQ(expectedGpuAddress, paramValue); EXPECT_EQ(paramValueSize, paramValueSizeRet); } TEST_P(KernelTest, GetInfo_NumArgs) { cl_kernel_info paramName = CL_KERNEL_NUM_ARGS; size_t paramValueSize = sizeof(cl_uint); cl_uint paramValue = 0; size_t paramValueSizeRet = 0; // get size retVal = pKernel->getInfo( paramName, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(sizeof(cl_uint), paramValueSizeRet); EXPECT_EQ(2u, paramValue); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_P(KernelTest, GetInfo_Program) { cl_kernel_info paramName = CL_KERNEL_PROGRAM; size_t paramValueSize = sizeof(cl_program); cl_program paramValue = 0; size_t paramValueSizeRet = 0; cl_program prog = pProgram; // get size retVal = pKernel->getInfo( paramName, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_program), paramValueSizeRet); EXPECT_EQ(prog, paramValue); } TEST_P(KernelTest, GetInfo_Context) { cl_kernel_info paramName = CL_KERNEL_CONTEXT; cl_context paramValue = 0; size_t paramValueSize = sizeof(paramValue); size_t paramValueSizeRet = 0; cl_context context = pContext; // get size retVal = pKernel->getInfo( paramName, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSize, paramValueSizeRet); EXPECT_EQ(context, paramValue); } TEST_P(KernelTest, GetWorkGroupInfo_WorkgroupSize) { cl_kernel_info paramName = CL_KERNEL_WORK_GROUP_SIZE; size_t paramValue = 0; size_t paramValueSize = sizeof(paramValue); size_t paramValueSizeRet = 0; auto kernelMaxWorkGroupSize = pDevice->getDeviceInfo().maxWorkGroupSize - 1; pKernel->maxKernelWorkGroupSize = static_cast(kernelMaxWorkGroupSize); retVal = pKernel->getWorkGroupInfo( pClDevice, paramName, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSize, paramValueSizeRet); EXPECT_EQ(kernelMaxWorkGroupSize, paramValue); } TEST_P(KernelTest, GetWorkGroupInfo_CompileWorkgroupSize) { cl_kernel_info paramName = CL_KERNEL_COMPILE_WORK_GROUP_SIZE; size_t paramValue[3]; size_t paramValueSize = sizeof(paramValue); size_t paramValueSizeRet = 0; retVal = pKernel->getWorkGroupInfo( pClDevice, paramName, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSize, paramValueSizeRet); } INSTANTIATE_TEST_CASE_P(KernelTests, KernelTest, ::testing::Combine( ::testing::ValuesIn(BinaryFileNames), ::testing::ValuesIn(KernelNames))); class KernelFromBinaryTest : public ProgramSimpleFixture { public: void SetUp() override { ProgramSimpleFixture::SetUp(); } void TearDown() override { ProgramSimpleFixture::TearDown(); } }; typedef Test KernelFromBinaryTests; TEST_F(KernelFromBinaryTests, getInfo_NumArgs) { cl_device_id device = pClDevice; CreateProgramFromBinary(pContext, &device, "kernel_num_args"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( 1, &device, nullptr, nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); auto pKernelInfo = pProgram->getKernelInfo("test"); // create a kernel auto pKernel = Kernel::create( pProgram, *pKernelInfo, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); cl_uint paramValue = 0; size_t paramValueSizeRet = 0; // get size retVal = pKernel->getInfo( CL_KERNEL_NUM_ARGS, sizeof(cl_uint), ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_uint), paramValueSizeRet); EXPECT_EQ(3u, paramValue); delete pKernel; } TEST_F(KernelFromBinaryTests, BuiltInIsSetToFalseForRegularKernels) { cl_device_id device = pClDevice; CreateProgramFromBinary(pContext, &device, "simple_kernels"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( 1, &device, nullptr, nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); auto pKernelInfo = pProgram->getKernelInfo("simple_kernel_0"); // create a kernel auto pKernel = Kernel::create( pProgram, *pKernelInfo, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pKernel); // get builtIn property bool isBuiltIn = pKernel->isBuiltIn; EXPECT_FALSE(isBuiltIn); delete pKernel; } TEST_F(KernelFromBinaryTests, givenArgumentDeclaredAsConstantWhenKernelIsCreatedThenArgumentIsMarkedAsReadOnly) { cl_device_id device = pClDevice; CreateProgramFromBinary(pContext, &device, "simple_kernels"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( 1, &device, nullptr, nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); auto pKernelInfo = pProgram->getKernelInfo("simple_kernel_6"); EXPECT_TRUE(pKernelInfo->kernelArgInfo[1].isReadOnly); pKernelInfo = pProgram->getKernelInfo("simple_kernel_1"); EXPECT_TRUE(pKernelInfo->kernelArgInfo[0].isReadOnly); } TEST(PatchInfo, Constructor) { PatchInfo patchInfo; EXPECT_EQ(nullptr, patchInfo.interfaceDescriptorDataLoad); EXPECT_EQ(nullptr, patchInfo.localsurface); EXPECT_EQ(nullptr, patchInfo.mediavfestate); EXPECT_EQ(nullptr, patchInfo.mediaVfeStateSlot1); EXPECT_EQ(nullptr, patchInfo.interfaceDescriptorData); EXPECT_EQ(nullptr, patchInfo.samplerStateArray); EXPECT_EQ(nullptr, patchInfo.bindingTableState); EXPECT_EQ(nullptr, patchInfo.dataParameterStream); EXPECT_EQ(nullptr, patchInfo.threadPayload); EXPECT_EQ(nullptr, patchInfo.executionEnvironment); EXPECT_EQ(nullptr, patchInfo.pKernelAttributesInfo); EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessPrivateSurface); EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization); EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization); EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessPrintfSurface); EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessEventPoolSurface); EXPECT_EQ(nullptr, patchInfo.pAllocateStatelessDefaultDeviceQueueSurface); } typedef Test KernelPrivateSurfaceTest; typedef Test KernelGlobalSurfaceTest; typedef Test KernelConstantSurfaceTest; struct KernelWithDeviceQueueFixture : public DeviceFixture, public DeviceQueueFixture, public testing::Test { void SetUp() override { DeviceFixture::SetUp(); DeviceQueueFixture::SetUp(&context, pClDevice); } void TearDown() override { DeviceQueueFixture::TearDown(); DeviceFixture::TearDown(); } MockContext context; }; typedef KernelWithDeviceQueueFixture KernelDefaultDeviceQueueSurfaceTest; typedef KernelWithDeviceQueueFixture KernelEventPoolSurfaceTest; class CommandStreamReceiverMock : public CommandStreamReceiver { typedef CommandStreamReceiver BaseClass; public: using CommandStreamReceiver::executionEnvironment; using BaseClass::CommandStreamReceiver; bool isMultiOsContextCapable() const override { return false; } CommandStreamReceiverMock() : BaseClass(*(new ExecutionEnvironment), 0) { this->mockExecutionEnvironment.reset(&this->executionEnvironment); executionEnvironment.prepareRootDeviceEnvironments(1); executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); executionEnvironment.initializeMemoryManager(); } void makeResident(GraphicsAllocation &graphicsAllocation) override { residency[graphicsAllocation.getUnderlyingBuffer()] = graphicsAllocation.getUnderlyingBufferSize(); if (passResidencyCallToBaseClass) { CommandStreamReceiver::makeResident(graphicsAllocation); } } void makeNonResident(GraphicsAllocation &graphicsAllocation) override { residency.erase(graphicsAllocation.getUnderlyingBuffer()); if (passResidencyCallToBaseClass) { CommandStreamReceiver::makeNonResident(graphicsAllocation); } } bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override { return true; } void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) override { } uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking) override { return taskCount; }; CompletionStamp flushTask( LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap &dsh, const IndirectHeap &ioh, const IndirectHeap &ssh, uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override { CompletionStamp cs = {}; return cs; } bool flushBatchedSubmissions() override { return true; } CommandStreamReceiverType getType() override { return CommandStreamReceiverType::CSR_HW; } std::map residency; bool passResidencyCallToBaseClass = true; std::unique_ptr mockExecutionEnvironment; }; TEST_F(KernelPrivateSurfaceTest, testPrivateSurface) { ASSERT_NE(nullptr, pDevice); // define kernel info auto pKernelInfo = std::make_unique(); // setup private memory SPatchAllocateStatelessPrivateSurface tokenSPS; tokenSPS.SurfaceStateHeapOffset = 64; tokenSPS.DataParamOffset = 40; tokenSPS.DataParamSize = 8; tokenSPS.PerThreadPrivateMemorySize = 112; pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &tokenSPS; SPatchDataParameterStream tokenDPS; tokenDPS.DataParameterStreamSize = 64; pKernelInfo->patchInfo.dataParameterStream = &tokenDPS; SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // create kernel MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false, pDevice); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); // Test it auto executionEnvironment = pDevice->getExecutionEnvironment(); std::unique_ptr csr(new CommandStreamReceiverMock(*executionEnvironment, 0)); csr->setupContext(*pDevice->getDefaultEngine().osContext); csr->residency.clear(); EXPECT_EQ(0u, csr->residency.size()); pKernel->makeResident(*csr.get()); EXPECT_EQ(1u, csr->residency.size()); csr->makeSurfacePackNonResident(csr->getResidencyAllocations()); EXPECT_EQ(0u, csr->residency.size()); delete pKernel; } TEST_F(KernelPrivateSurfaceTest, givenKernelWithPrivateSurfaceThatIsInUseByGpuWhenKernelIsBeingDestroyedThenAllocationIsAddedToDefferedFreeList) { auto pKernelInfo = std::make_unique(); SPatchAllocateStatelessPrivateSurface tokenSPS; tokenSPS.SurfaceStateHeapOffset = 64; tokenSPS.DataParamOffset = 40; tokenSPS.DataParamSize = 8; tokenSPS.PerThreadPrivateMemorySize = 112; pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &tokenSPS; SPatchDataParameterStream tokenDPS; tokenDPS.DataParameterStreamSize = 64; pKernelInfo->patchInfo.dataParameterStream = &tokenDPS; SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false, pDevice); std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); pKernel->initialize(); auto &csr = pDevice->getGpgpuCommandStreamReceiver(); auto privateSurface = pKernel->getPrivateSurface(); auto tagAddress = csr.getTagAddress(); privateSurface->updateTaskCount(*tagAddress + 1, csr.getOsContext().getContextId()); EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); pKernel.reset(nullptr); EXPECT_FALSE(csr.getTemporaryAllocations().peekIsEmpty()); EXPECT_EQ(csr.getTemporaryAllocations().peekHead(), privateSurface); } TEST_F(KernelPrivateSurfaceTest, testPrivateSurfaceAllocationFailure) { ASSERT_NE(nullptr, pDevice); // define kernel info auto pKernelInfo = std::make_unique(); // setup private memory SPatchAllocateStatelessPrivateSurface tokenSPS; tokenSPS.SurfaceStateHeapOffset = 64; tokenSPS.DataParamOffset = 40; tokenSPS.DataParamSize = 8; tokenSPS.PerThreadPrivateMemorySize = 112; pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &tokenSPS; SPatchDataParameterStream tokenDPS; tokenDPS.DataParameterStreamSize = 64; pKernelInfo->patchInfo.dataParameterStream = &tokenDPS; SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // create kernel MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false, pDevice); MemoryManagementFixture::InjectedFunction method = [&](size_t failureIndex) { MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_EQ(CL_SUCCESS, pKernel->initialize()); } else { EXPECT_EQ(CL_OUT_OF_RESOURCES, pKernel->initialize()); } delete pKernel; }; auto f = new MemoryManagementFixture(); f->SetUp(); f->injectFailures(method); f->TearDown(); delete f; } TEST_F(KernelPrivateSurfaceTest, given32BitDeviceWhenKernelIsCreatedThenPrivateSurfaceIs32BitAllocation) { if (is64bit) { pDevice->getMemoryManager()->setForce32BitAllocations(true); // define kernel info auto pKernelInfo = std::make_unique(); // setup private memory SPatchAllocateStatelessPrivateSurface tokenSPS; tokenSPS.SurfaceStateHeapOffset = 64; tokenSPS.DataParamOffset = 40; tokenSPS.DataParamSize = 4; tokenSPS.PerThreadPrivateMemorySize = 112; pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &tokenSPS; SPatchDataParameterStream tokenDPS; tokenDPS.DataParameterStreamSize = 64; pKernelInfo->patchInfo.dataParameterStream = &tokenDPS; SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // create kernel MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false, pDevice); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_TRUE(pKernel->getPrivateSurface()->is32BitAllocation()); delete pKernel; } } HWTEST_F(KernelPrivateSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenPrivateMemorySurfaceStateIsPatchedWithCpuAddress) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup constant memory SPatchAllocateStatelessPrivateSurface AllocateStatelessPrivateMemorySurface; AllocateStatelessPrivateMemorySurface.SurfaceStateHeapOffset = 0; AllocateStatelessPrivateMemorySurface.DataParamOffset = 0; AllocateStatelessPrivateMemorySurface.DataParamSize = 8; AllocateStatelessPrivateMemorySurface.PerThreadPrivateMemorySize = 16; pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &AllocateStatelessPrivateMemorySurface; MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false, pDevice); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap char surfaceStateHeap[0x80]; SKernelBinaryHeaderCommon kernelHeader; kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; // define stateful path pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); auto bufferAddress = pKernel->getPrivateSurface()->getGpuAddress(); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface->SurfaceStateHeapOffset)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(bufferAddress, surfaceAddress); delete pKernel; } TEST_F(KernelPrivateSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenPrivateMemorySurfaceStateIsNotPatched) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup global memory char buffer[16]; MockGraphicsAllocation gfxAlloc(buffer, sizeof(buffer)); MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false, pDevice); program.setConstantSurface(&gfxAlloc); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // define stateful path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap()); program.setConstantSurface(nullptr); delete pKernel; } TEST_F(KernelPrivateSurfaceTest, givenNullDataParameterStreamGetConstantBufferSizeReturnsZero) { auto pKernelInfo = std::make_unique(); EXPECT_EQ(0u, pKernelInfo->getConstantBufferSize()); } TEST_F(KernelPrivateSurfaceTest, givenNonNullDataParameterStreamGetConstantBufferSizeReturnsCorrectSize) { auto pKernelInfo = std::make_unique(); SPatchDataParameterStream tokenDPS; tokenDPS.DataParameterStreamSize = 64; pKernelInfo->patchInfo.dataParameterStream = &tokenDPS; EXPECT_EQ(64u, pKernelInfo->getConstantBufferSize()); } TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointerSize4ThenReturnOutOfResources) { auto pAllocateStatelessPrivateSurface = std::unique_ptr(new SPatchAllocateStatelessPrivateSurface()); pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize = std::numeric_limits::max(); auto executionEnvironment = std::unique_ptr(new SPatchExecutionEnvironment()); *executionEnvironment = {}; executionEnvironment->CompiledSIMD32 = 32; auto pKernelInfo = std::make_unique(); pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = pAllocateStatelessPrivateSurface.get(); pKernelInfo->patchInfo.executionEnvironment = executionEnvironment.get(); MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false, pDevice); std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); pKernelInfo->gpuPointerSize = 4; pDevice->getMemoryManager()->setForce32BitAllocations(false); if (pDevice->getDeviceInfo().computeUnitsUsedForScratch == 0) pDevice->deviceInfo.computeUnitsUsedForScratch = 120; EXPECT_EQ(CL_OUT_OF_RESOURCES, pKernel->initialize()); } TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointerSize4And32BitAllocationsThenReturnOutOfResources) { auto pAllocateStatelessPrivateSurface = std::unique_ptr(new SPatchAllocateStatelessPrivateSurface()); pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize = std::numeric_limits::max(); auto executionEnvironment = std::unique_ptr(new SPatchExecutionEnvironment()); *executionEnvironment = {}; executionEnvironment->CompiledSIMD32 = 32; auto pKernelInfo = std::make_unique(); pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = pAllocateStatelessPrivateSurface.get(); pKernelInfo->patchInfo.executionEnvironment = executionEnvironment.get(); MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false, pDevice); std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); pKernelInfo->gpuPointerSize = 4; pDevice->getMemoryManager()->setForce32BitAllocations(true); if (pDevice->getDeviceInfo().computeUnitsUsedForScratch == 0) pDevice->deviceInfo.computeUnitsUsedForScratch = 120; EXPECT_EQ(CL_OUT_OF_RESOURCES, pKernel->initialize()); } TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointerSize8And32BitAllocationsThenReturnOutOfResources) { auto pAllocateStatelessPrivateSurface = std::unique_ptr(new SPatchAllocateStatelessPrivateSurface()); pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize = std::numeric_limits::max(); auto executionEnvironment = std::unique_ptr(new SPatchExecutionEnvironment()); *executionEnvironment = {}; executionEnvironment->CompiledSIMD32 = 32; auto pKernelInfo = std::make_unique(); pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = pAllocateStatelessPrivateSurface.get(); pKernelInfo->patchInfo.executionEnvironment = executionEnvironment.get(); MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false, pDevice); std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); pKernelInfo->gpuPointerSize = 8; pDevice->getMemoryManager()->setForce32BitAllocations(true); if (pDevice->getDeviceInfo().computeUnitsUsedForScratch == 0) pDevice->deviceInfo.computeUnitsUsedForScratch = 120; EXPECT_EQ(CL_OUT_OF_RESOURCES, pKernel->initialize()); } TEST_F(KernelGlobalSurfaceTest, givenBuiltInKernelWhenKernelIsCreatedThenGlobalSurfaceIsPatchedWithCpuAddress) { // define kernel info auto pKernelInfo = std::make_unique(); // setup global memory SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization AllocateStatelessGlobalMemorySurfaceWithInitialization; AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamOffset = 0; AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = &AllocateStatelessGlobalMemorySurfaceWithInitialization; SPatchDataParameterStream tempSPatchDataParameterStream; tempSPatchDataParameterStream.DataParameterStreamSize = 16; pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream; SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; char buffer[16]; GraphicsAllocation gfxAlloc(0, GraphicsAllocation::AllocationType::UNKNOWN, buffer, (uint64_t)buffer - 8u, 8, (osHandle)1u, MemoryPool::MemoryNull); uint64_t bufferAddress = (uint64_t)gfxAlloc.getUnderlyingBuffer(); // create kernel MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false, pDevice); program.setGlobalSurface(&gfxAlloc); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); pKernel->isBuiltIn = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData()); program.setGlobalSurface(nullptr); delete pKernel; } TEST_F(KernelGlobalSurfaceTest, givenNDRangeKernelWhenKernelIsCreatedThenGlobalSurfaceIsPatchedWithBaseAddressOffset) { // define kernel info auto pKernelInfo = std::make_unique(); // setup global memory SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization AllocateStatelessGlobalMemorySurfaceWithInitialization; AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamOffset = 0; AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = &AllocateStatelessGlobalMemorySurfaceWithInitialization; SPatchDataParameterStream tempSPatchDataParameterStream; tempSPatchDataParameterStream.DataParameterStreamSize = 16; pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream; SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; char buffer[16]; GraphicsAllocation gfxAlloc(0, GraphicsAllocation::AllocationType::UNKNOWN, buffer, (uint64_t)buffer - 8u, 8, MemoryPool::MemoryNull); uint64_t bufferAddress = gfxAlloc.getGpuAddress(); // create kernel MockProgram program(*pDevice->getExecutionEnvironment()); program.setGlobalSurface(&gfxAlloc); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData()); program.setGlobalSurface(nullptr); delete pKernel; } HWTEST_F(KernelGlobalSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenGlobalMemorySurfaceStateIsPatchedWithCpuAddress) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup global memory SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization AllocateStatelessGlobalMemorySurfaceWithInitialization; AllocateStatelessGlobalMemorySurfaceWithInitialization.SurfaceStateHeapOffset = 0; AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamOffset = 0; AllocateStatelessGlobalMemorySurfaceWithInitialization.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = &AllocateStatelessGlobalMemorySurfaceWithInitialization; char buffer[16]; MockGraphicsAllocation gfxAlloc(buffer, sizeof(buffer)); auto bufferAddress = gfxAlloc.getGpuAddress(); MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false, pDevice); program.setGlobalSurface(&gfxAlloc); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap char surfaceStateHeap[0x80]; SKernelBinaryHeaderCommon kernelHeader; kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; // define stateful path pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->SurfaceStateHeapOffset)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(bufferAddress, surfaceAddress); program.setGlobalSurface(nullptr); delete pKernel; } TEST_F(KernelGlobalSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenGlobalMemorySurfaceStateIsNotPatched) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup global memory char buffer[16]; MockGraphicsAllocation gfxAlloc(buffer, sizeof(buffer)); MockProgram program(*pDevice->getExecutionEnvironment()); program.setGlobalSurface(&gfxAlloc); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // define stateful path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap()); program.setGlobalSurface(nullptr); delete pKernel; } TEST_F(KernelConstantSurfaceTest, givenBuiltInKernelWhenKernelIsCreatedThenConstantSurfaceIsPatchedWithCpuAddress) { // define kernel info auto pKernelInfo = std::make_unique(); // setup constant memory SPatchAllocateStatelessConstantMemorySurfaceWithInitialization AllocateStatelessConstantMemorySurfaceWithInitialization; AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamOffset = 0; AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = &AllocateStatelessConstantMemorySurfaceWithInitialization; SPatchDataParameterStream tempSPatchDataParameterStream; tempSPatchDataParameterStream.DataParameterStreamSize = 16; pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream; SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; char buffer[16]; GraphicsAllocation gfxAlloc(0, GraphicsAllocation::AllocationType::UNKNOWN, buffer, (uint64_t)buffer - 8u, 8, (osHandle)1u, MemoryPool::MemoryNull); uint64_t bufferAddress = (uint64_t)gfxAlloc.getUnderlyingBuffer(); // create kernel MockProgram program(*pDevice->getExecutionEnvironment()); program.setConstantSurface(&gfxAlloc); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); pKernel->isBuiltIn = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData()); program.setConstantSurface(nullptr); delete pKernel; } TEST_F(KernelConstantSurfaceTest, givenNDRangeKernelWhenKernelIsCreatedThenConstantSurfaceIsPatchedWithBaseAddressOffset) { // define kernel info auto pKernelInfo = std::make_unique(); // setup constant memory SPatchAllocateStatelessConstantMemorySurfaceWithInitialization AllocateStatelessConstantMemorySurfaceWithInitialization; AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamOffset = 0; AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = &AllocateStatelessConstantMemorySurfaceWithInitialization; SPatchDataParameterStream tempSPatchDataParameterStream; tempSPatchDataParameterStream.DataParameterStreamSize = 16; pKernelInfo->patchInfo.dataParameterStream = &tempSPatchDataParameterStream; SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; char buffer[16]; GraphicsAllocation gfxAlloc(0, GraphicsAllocation::AllocationType::UNKNOWN, buffer, (uint64_t)buffer - 8u, 8, MemoryPool::MemoryNull); uint64_t bufferAddress = gfxAlloc.getGpuAddress(); // create kernel MockProgram program(*pDevice->getExecutionEnvironment()); program.setConstantSurface(&gfxAlloc); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(bufferAddress, *(uint64_t *)pKernel->getCrossThreadData()); program.setConstantSurface(nullptr); delete pKernel; } HWTEST_F(KernelConstantSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenConstantMemorySurfaceStateIsPatchedWithCpuAddress) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup constant memory SPatchAllocateStatelessConstantMemorySurfaceWithInitialization AllocateStatelessConstantMemorySurfaceWithInitialization; AllocateStatelessConstantMemorySurfaceWithInitialization.SurfaceStateHeapOffset = 0; AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamOffset = 0; AllocateStatelessConstantMemorySurfaceWithInitialization.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = &AllocateStatelessConstantMemorySurfaceWithInitialization; char buffer[16]; MockGraphicsAllocation gfxAlloc(buffer, sizeof(buffer)); auto bufferAddress = gfxAlloc.getGpuAddress(); MockContext context; MockProgram program(*pDevice->getExecutionEnvironment(), &context, false, pDevice); program.setConstantSurface(&gfxAlloc); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap char surfaceStateHeap[0x80]; SKernelBinaryHeaderCommon kernelHeader; kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; // define stateful path pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->SurfaceStateHeapOffset)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(bufferAddress, surfaceAddress); program.setConstantSurface(nullptr); delete pKernel; } TEST_F(KernelConstantSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenConstantMemorySurfaceStateIsNotPatched) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup global memory char buffer[16]; MockGraphicsAllocation gfxAlloc(buffer, sizeof(buffer)); MockProgram program(*pDevice->getExecutionEnvironment()); program.setConstantSurface(&gfxAlloc); // create kernel MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // define stateful path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap()); program.setConstantSurface(nullptr); delete pKernel; } HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenEventPoolSurfaceStateIsPatchedWithNullSurface) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup event pool surface SPatchAllocateStatelessEventPoolSurface AllocateStatelessEventPoolSurface; AllocateStatelessEventPoolSurface.SurfaceStateHeapOffset = 0; AllocateStatelessEventPoolSurface.DataParamOffset = 0; AllocateStatelessEventPoolSurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface = &AllocateStatelessEventPoolSurface; // create kernel MockProgram program(*pDevice->getExecutionEnvironment(), &context, false, pDevice); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap char surfaceStateHeap[0x80]; SKernelBinaryHeaderCommon kernelHeader; kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; // define stateful path pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(0u, surfaceAddress); auto surfaceType = surfaceState->getSurfaceType(); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL, surfaceType); delete pKernel; } HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatefulKernelWhenEventPoolIsPatchedThenEventPoolSurfaceStateIsProgrammed) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup event pool surface SPatchAllocateStatelessEventPoolSurface AllocateStatelessEventPoolSurface; AllocateStatelessEventPoolSurface.SurfaceStateHeapOffset = 0; AllocateStatelessEventPoolSurface.DataParamOffset = 0; AllocateStatelessEventPoolSurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface = &AllocateStatelessEventPoolSurface; // create kernel MockProgram program(*pDevice->getExecutionEnvironment(), &context, false, pDevice); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap char surfaceStateHeap[0x80]; SKernelBinaryHeaderCommon kernelHeader; kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; // define stateful path pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->patchEventPool(pDevQueue); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddress(), surfaceAddress); auto surfaceType = surfaceState->getSurfaceType(); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER, surfaceType); delete pKernel; } HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenKernelWithNullEventPoolInKernelInfoWhenEventPoolIsPatchedThenAddressIsNotPatched) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface = nullptr; // create kernel MockProgram program(*pDevice->getExecutionEnvironment()); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // define stateful path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; uint64_t crossThreadData = 123; pKernel->setCrossThreadData(&crossThreadData, sizeof(uint64_t)); pKernel->patchEventPool(pDevQueue); EXPECT_EQ(123u, *(uint64_t *)pKernel->getCrossThreadData()); delete pKernel; } HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenEventPoolSurfaceStateIsNotPatched) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup event pool surface SPatchAllocateStatelessEventPoolSurface AllocateStatelessEventPoolSurface; AllocateStatelessEventPoolSurface.SurfaceStateHeapOffset = 0; AllocateStatelessEventPoolSurface.DataParamOffset = 0; AllocateStatelessEventPoolSurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface = &AllocateStatelessEventPoolSurface; // create kernel MockProgram program(*pDevice->getExecutionEnvironment()); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // define stateful path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); if (pClDevice->getSupportedClVersion() < 20) { EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); } else { } delete pKernel; } HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatelessKernelWhenEventPoolIsPatchedThenCrossThreadDataIsPatched) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup event pool surface SPatchAllocateStatelessEventPoolSurface AllocateStatelessEventPoolSurface; AllocateStatelessEventPoolSurface.SurfaceStateHeapOffset = 0; AllocateStatelessEventPoolSurface.DataParamOffset = 0; AllocateStatelessEventPoolSurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface = &AllocateStatelessEventPoolSurface; // create kernel MockProgram program(*pDevice->getExecutionEnvironment()); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // define stateful path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; uint64_t crossThreadData = 0; pKernel->setCrossThreadData(&crossThreadData, sizeof(uint64_t)); pKernel->patchEventPool(pDevQueue); EXPECT_EQ(pDevQueue->getEventPoolBuffer()->getGpuAddressToPatch(), *(uint64_t *)pKernel->getCrossThreadData()); delete pKernel; } HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenDefaultDeviceQueueSurfaceStateIsPatchedWithNullSurface) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup default device queue surface SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface; AllocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset = 0; AllocateStatelessDefaultDeviceQueueSurface.DataParamOffset = 0; AllocateStatelessDefaultDeviceQueueSurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = &AllocateStatelessDefaultDeviceQueueSurface; // create kernel MockProgram program(*pDevice->getExecutionEnvironment(), &context, false, pDevice); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap char surfaceStateHeap[0x80]; SKernelBinaryHeaderCommon kernelHeader; kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; // define stateful path pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(0u, surfaceAddress); auto surfaceType = surfaceState->getSurfaceType(); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL, surfaceType); delete pKernel; } HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatefulKernelWhenDefaultDeviceQueueIsPatchedThenSurfaceStateIsCorrectlyProgrammed) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup default device queue surface SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface; AllocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset = 0; AllocateStatelessDefaultDeviceQueueSurface.DataParamOffset = 0; AllocateStatelessDefaultDeviceQueueSurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = &AllocateStatelessDefaultDeviceQueueSurface; // create kernel MockProgram program(*pDevice->getExecutionEnvironment(), &context, false, pDevice); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // setup surface state heap char surfaceStateHeap[0x80]; SKernelBinaryHeaderCommon kernelHeader; kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; // define stateful path pKernelInfo->usesSsh = true; pKernelInfo->requiresSshForBuffers = true; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->patchDefaultDeviceQueue(pDevQueue); EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddress(), surfaceAddress); auto surfaceType = surfaceState->getSurfaceType(); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER, surfaceType); delete pKernel; } HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenDefaultDeviceQueueSurfaceStateIsNotPatched) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup default device queue surface SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface; AllocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset = 0; AllocateStatelessDefaultDeviceQueueSurface.DataParamOffset = 0; AllocateStatelessDefaultDeviceQueueSurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = &AllocateStatelessDefaultDeviceQueueSurface; // create kernel MockProgram program(*pDevice->getExecutionEnvironment()); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // define stateful path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); delete pKernel; } HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenKernelWithNullDeviceQueueKernelInfoWhenDefaultDeviceQueueIsPatchedThenAddressIsNotPatched) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = nullptr; // create kernel MockProgram program(*pDevice->getExecutionEnvironment()); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // define stateful path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; uint64_t crossThreadData = 123; pKernel->setCrossThreadData(&crossThreadData, sizeof(uint64_t)); pKernel->patchDefaultDeviceQueue(pDevQueue); EXPECT_EQ(123u, *(uint64_t *)pKernel->getCrossThreadData()); delete pKernel; } HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatelessKernelWhenDefaultDeviceQueueIsPatchedThenCrossThreadDataIsPatched) { // define kernel info auto pKernelInfo = std::make_unique(); SPatchExecutionEnvironment tokenEE = {}; tokenEE.CompiledSIMD8 = false; tokenEE.CompiledSIMD16 = false; tokenEE.CompiledSIMD32 = true; pKernelInfo->patchInfo.executionEnvironment = &tokenEE; // setup default device queue surface SPatchAllocateStatelessDefaultDeviceQueueSurface AllocateStatelessDefaultDeviceQueueSurface; AllocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset = 0; AllocateStatelessDefaultDeviceQueueSurface.DataParamOffset = 0; AllocateStatelessDefaultDeviceQueueSurface.DataParamSize = 8; pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = &AllocateStatelessDefaultDeviceQueueSurface; // create kernel MockProgram program(*pDevice->getExecutionEnvironment()); MockKernel *pKernel = new MockKernel(&program, *pKernelInfo, *pClDevice); // define stateful path pKernelInfo->usesSsh = false; pKernelInfo->requiresSshForBuffers = false; uint64_t crossThreadData = 0; pKernel->setCrossThreadData(&crossThreadData, sizeof(uint64_t)); pKernel->patchDefaultDeviceQueue(pDevQueue); EXPECT_EQ(pDevQueue->getQueueBuffer()->getGpuAddressToPatch(), *(uint64_t *)pKernel->getCrossThreadData()); delete pKernel; } typedef Test KernelResidencyTest; HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenKernelIsaIsMadeResident) { ASSERT_NE(nullptr, pDevice); char pCrossThreadData[64]; // define kernel info auto pKernelInfo = std::make_unique(); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; auto memoryManager = commandStreamReceiver.getMemoryManager(); pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); // setup kernel arg offsets KernelArgPatchInfo kernelArgPatchInfo; pKernelInfo->kernelArgInfo.resize(3); pKernelInfo->kernelArgInfo[2].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[1].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[2].kernelArgPatchInfoVector[0].crossthreadOffset = 0x10; pKernelInfo->kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset = 0x20; pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = 0x30; MockProgram program(*pDevice->getExecutionEnvironment()); MockContext ctx; program.setContext(&ctx); std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); EXPECT_EQ(0u, commandStreamReceiver.makeResidentAllocations.size()); pKernel->makeResident(pDevice->getGpgpuCommandStreamReceiver()); EXPECT_EQ(1u, commandStreamReceiver.makeResidentAllocations.size()); EXPECT_TRUE(commandStreamReceiver.isMadeResident(pKernel->getKernelInfo().getGraphicsAllocation())); memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenExportedFunctionsIsaAllocationIsMadeResident) { auto pKernelInfo = std::make_unique(); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; auto memoryManager = commandStreamReceiver.getMemoryManager(); pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); MockProgram program(*pDevice->getExecutionEnvironment()); auto exportedFunctionsSurface = std::make_unique(); program.exportedFunctionsSurface = exportedFunctionsSurface.get(); MockContext ctx; program.setContext(&ctx); std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, commandStreamReceiver.makeResidentAllocations.size()); pKernel->makeResident(pDevice->getGpgpuCommandStreamReceiver()); EXPECT_TRUE(commandStreamReceiver.isMadeResident(program.exportedFunctionsSurface)); // check getResidency as well std::vector residencySurfaces; pKernel->getResidency(residencySurfaces); std::unique_ptr mockCsrExecEnv; { CommandStreamReceiverMock csrMock; csrMock.passResidencyCallToBaseClass = false; for (const auto &s : residencySurfaces) { s->makeResident(csrMock); delete s; } EXPECT_EQ(1U, csrMock.residency.count(exportedFunctionsSurface->getUnderlyingBuffer())); mockCsrExecEnv = std::move(csrMock.mockExecutionEnvironment); } memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenGlobalBufferIsMadeResident) { auto pKernelInfo = std::make_unique(); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; auto memoryManager = commandStreamReceiver.getMemoryManager(); pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); MockProgram program(*pDevice->getExecutionEnvironment()); MockContext ctx; program.setContext(&ctx); program.globalSurface = new MockGraphicsAllocation(); std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, commandStreamReceiver.makeResidentAllocations.size()); pKernel->makeResident(pDevice->getGpgpuCommandStreamReceiver()); EXPECT_TRUE(commandStreamReceiver.isMadeResident(program.globalSurface)); std::vector residencySurfaces; pKernel->getResidency(residencySurfaces); std::unique_ptr mockCsrExecEnv; { CommandStreamReceiverMock csrMock; csrMock.passResidencyCallToBaseClass = false; for (const auto &s : residencySurfaces) { s->makeResident(csrMock); delete s; } EXPECT_EQ(1U, csrMock.residency.count(program.globalSurface->getUnderlyingBuffer())); mockCsrExecEnv = std::move(csrMock.mockExecutionEnvironment); } memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } HWTEST_F(KernelResidencyTest, givenKernelWhenItUsesIndirectUnifiedMemoryDeviceAllocationThenTheyAreMadeResident) { MockKernelWithInternals mockKernel(*this->pClDevice); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto unifiedMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), 4096u, SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY)); mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver()); EXPECT_EQ(0u, commandStreamReceiver.getResidencyAllocations().size()); mockKernel.mockKernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, true); mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver()); EXPECT_EQ(1u, commandStreamReceiver.getResidencyAllocations().size()); EXPECT_EQ(commandStreamReceiver.getResidencyAllocations()[0]->getGpuAddress(), castToUint64(unifiedMemoryAllocation)); mockKernel.mockKernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_SVM_PTRS, true); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); } HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectHostMemoryWhenMakeResidentIsCalledThenOnlyHostAllocationsAreMadeResident) { MockKernelWithInternals mockKernel(*this->pClDevice); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto unifiedDeviceMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), 4096u, SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY)); auto unifiedHostMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), 4096u, SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY)); mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver()); EXPECT_EQ(0u, commandStreamReceiver.getResidencyAllocations().size()); mockKernel.mockKernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, true); mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver()); EXPECT_EQ(1u, commandStreamReceiver.getResidencyAllocations().size()); EXPECT_EQ(commandStreamReceiver.getResidencyAllocations()[0]->getGpuAddress(), castToUint64(unifiedHostMemoryAllocation)); svmAllocationsManager->freeSVMAlloc(unifiedDeviceMemoryAllocation); svmAllocationsManager->freeSVMAlloc(unifiedHostMemoryAllocation); } HWTEST_F(KernelResidencyTest, givenKernelUsingIndirectSharedMemoryWhenMakeResidentIsCalledThenOnlySharedAllocationsAreMadeResident) { MockKernelWithInternals mockKernel(*this->pClDevice); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto unifiedSharedMemoryAllocation = svmAllocationsManager->createSharedUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), 4096u, SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY), mockKernel.mockContext->getSpecialQueue()); auto unifiedHostMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), 4096u, SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY)); mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver()); EXPECT_EQ(0u, commandStreamReceiver.getResidencyAllocations().size()); mockKernel.mockKernel->setUnifiedMemoryProperty(CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, true); mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver()); EXPECT_EQ(1u, commandStreamReceiver.getResidencyAllocations().size()); EXPECT_EQ(commandStreamReceiver.getResidencyAllocations()[0]->getGpuAddress(), castToUint64(unifiedSharedMemoryAllocation)); svmAllocationsManager->freeSVMAlloc(unifiedSharedMemoryAllocation); svmAllocationsManager->freeSVMAlloc(unifiedHostMemoryAllocation); } HWTEST_F(KernelResidencyTest, givenDeviceUnifiedMemoryAndPageFaultManagerWhenMakeResidentIsCalledThenAllocationIsNotDecommited) { auto mockPageFaultManager = new MockPageFaultManager(); static_cast(this->pDevice->getExecutionEnvironment()->memoryManager.get())->pageFaultManager.reset(mockPageFaultManager); MockKernelWithInternals mockKernel(*this->pClDevice); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto unifiedMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), 4096u, SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY)); auto unifiedMemoryGraphicsAllocation = svmAllocationsManager->getSVMAlloc(unifiedMemoryAllocation); EXPECT_EQ(0u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); mockKernel.mockKernel->setUnifiedMemoryExecInfo(unifiedMemoryGraphicsAllocation->gpuAllocation); EXPECT_EQ(1u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); mockKernel.mockKernel->makeResident(commandStreamReceiver); EXPECT_EQ(mockPageFaultManager->allowMemoryAccessCalled, 0); EXPECT_EQ(mockPageFaultManager->protectMemoryCalled, 0); EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 0); EXPECT_EQ(mockPageFaultManager->transferToGpuCalled, 0); mockKernel.mockKernel->clearUnifiedMemoryExecInfo(); EXPECT_EQ(0u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); } HWTEST_F(KernelResidencyTest, givenSharedUnifiedMemoryAndPageFaultManagerWhenMakeResidentIsCalledThenAllocationIsDecommited) { auto mockPageFaultManager = new MockPageFaultManager(); static_cast(this->pDevice->getExecutionEnvironment()->memoryManager.get())->pageFaultManager.reset(mockPageFaultManager); MockKernelWithInternals mockKernel(*this->pClDevice); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto unifiedMemoryAllocation = svmAllocationsManager->createSharedUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), 4096u, SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY), mockKernel.mockContext->getSpecialQueue()); auto unifiedMemoryGraphicsAllocation = svmAllocationsManager->getSVMAlloc(unifiedMemoryAllocation); mockPageFaultManager->insertAllocation(unifiedMemoryAllocation, 4096u, svmAllocationsManager, mockKernel.mockContext->getSpecialQueue()); EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 1); EXPECT_EQ(0u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); mockKernel.mockKernel->setUnifiedMemoryExecInfo(unifiedMemoryGraphicsAllocation->gpuAllocation); EXPECT_EQ(1u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); mockKernel.mockKernel->makeResident(commandStreamReceiver); EXPECT_EQ(mockPageFaultManager->allowMemoryAccessCalled, 0); EXPECT_EQ(mockPageFaultManager->protectMemoryCalled, 1); EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 1); EXPECT_EQ(mockPageFaultManager->transferToGpuCalled, 1); EXPECT_EQ(mockPageFaultManager->protectedMemoryAccessAddress, unifiedMemoryAllocation); EXPECT_EQ(mockPageFaultManager->protectedSize, 4096u); EXPECT_EQ(mockPageFaultManager->transferToGpuAddress, unifiedMemoryAllocation); mockKernel.mockKernel->clearUnifiedMemoryExecInfo(); EXPECT_EQ(0u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); } HWTEST_F(KernelResidencyTest, givenSharedUnifiedMemoryAndNotRequiredMemSyncWhenMakeResidentIsCalledThenAllocationIsNotDecommited) { auto mockPageFaultManager = new MockPageFaultManager(); static_cast(this->pDevice->getExecutionEnvironment()->memoryManager.get())->pageFaultManager.reset(mockPageFaultManager); MockKernelWithInternals mockKernel(*this->pClDevice, nullptr, true); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto unifiedMemoryAllocation = svmAllocationsManager->createSharedUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), 4096u, SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY), mockKernel.mockContext->getSpecialQueue()); auto unifiedMemoryGraphicsAllocation = svmAllocationsManager->getSVMAlloc(unifiedMemoryAllocation); mockPageFaultManager->insertAllocation(unifiedMemoryAllocation, 4096u, svmAllocationsManager, mockKernel.mockContext->getSpecialQueue()); EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 1); mockKernel.mockKernel->kernelArguments[0] = {Kernel::kernelArgType::SVM_ALLOC_OBJ, unifiedMemoryGraphicsAllocation->gpuAllocation, unifiedMemoryAllocation, 4096u, unifiedMemoryGraphicsAllocation->gpuAllocation, sizeof(uintptr_t)}; mockKernel.mockKernel->setUnifiedMemorySyncRequirement(false); mockKernel.mockKernel->makeResident(commandStreamReceiver); EXPECT_EQ(mockPageFaultManager->allowMemoryAccessCalled, 0); EXPECT_EQ(mockPageFaultManager->protectMemoryCalled, 0); EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 1); EXPECT_EQ(mockPageFaultManager->transferToGpuCalled, 0); EXPECT_EQ(0u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); } HWTEST_F(KernelResidencyTest, givenSharedUnifiedMemoryRequiredMemSyncWhenMakeResidentIsCalledThenAllocationIsDecommited) { auto mockPageFaultManager = new MockPageFaultManager(); static_cast(this->pDevice->getExecutionEnvironment()->memoryManager.get())->pageFaultManager.reset(mockPageFaultManager); MockKernelWithInternals mockKernel(*this->pClDevice, nullptr, true); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto unifiedMemoryAllocation = svmAllocationsManager->createSharedUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), 4096u, SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY), mockKernel.mockContext->getSpecialQueue()); auto unifiedMemoryGraphicsAllocation = svmAllocationsManager->getSVMAlloc(unifiedMemoryAllocation); mockPageFaultManager->insertAllocation(unifiedMemoryAllocation, 4096u, svmAllocationsManager, mockKernel.mockContext->getSpecialQueue()); EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 1); mockKernel.mockKernel->kernelArguments[0] = {Kernel::kernelArgType::SVM_ALLOC_OBJ, unifiedMemoryGraphicsAllocation->gpuAllocation, unifiedMemoryAllocation, 4096u, unifiedMemoryGraphicsAllocation->gpuAllocation, sizeof(uintptr_t)}; mockKernel.mockKernel->setUnifiedMemorySyncRequirement(true); mockKernel.mockKernel->makeResident(commandStreamReceiver); EXPECT_EQ(mockPageFaultManager->allowMemoryAccessCalled, 0); EXPECT_EQ(mockPageFaultManager->protectMemoryCalled, 1); EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 1); EXPECT_EQ(mockPageFaultManager->transferToGpuCalled, 1); EXPECT_EQ(0u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); } HWTEST_F(KernelResidencyTest, givenSharedUnifiedMemoryAllocPageFaultManagerAndIndirectAllocsAllowedWhenMakeResidentIsCalledThenAllocationIsDecommited) { auto mockPageFaultManager = new MockPageFaultManager(); static_cast(this->pDevice->getExecutionEnvironment()->memoryManager.get())->pageFaultManager.reset(mockPageFaultManager); MockKernelWithInternals mockKernel(*this->pClDevice); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto unifiedMemoryAllocation = svmAllocationsManager->createSharedUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), 4096u, SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY), mockKernel.mockContext->getSpecialQueue()); mockPageFaultManager->insertAllocation(unifiedMemoryAllocation, 4096u, svmAllocationsManager, mockKernel.mockContext->getSpecialQueue()); EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 1); mockKernel.mockKernel->unifiedMemoryControls.indirectSharedAllocationsAllowed = true; mockKernel.mockKernel->makeResident(commandStreamReceiver); EXPECT_EQ(mockPageFaultManager->allowMemoryAccessCalled, 0); EXPECT_EQ(mockPageFaultManager->protectMemoryCalled, 1); EXPECT_EQ(mockPageFaultManager->transferToCpuCalled, 1); EXPECT_EQ(mockPageFaultManager->transferToGpuCalled, 1); EXPECT_EQ(mockPageFaultManager->protectedMemoryAccessAddress, unifiedMemoryAllocation); EXPECT_EQ(mockPageFaultManager->protectedSize, 4096u); EXPECT_EQ(mockPageFaultManager->transferToGpuAddress, unifiedMemoryAllocation); mockKernel.mockKernel->clearUnifiedMemoryExecInfo(); EXPECT_EQ(0u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); } HWTEST_F(KernelResidencyTest, givenKernelWhenSetKernelExecInfoWithUnifiedMemoryIsCalledThenAllocationIsStoredWithinKernel) { MockKernelWithInternals mockKernel(*this->pClDevice); auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver(); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto unifiedMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), 4096u, SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY)); auto unifiedMemoryGraphicsAllocation = svmAllocationsManager->getSVMAlloc(unifiedMemoryAllocation); EXPECT_EQ(0u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); mockKernel.mockKernel->setUnifiedMemoryExecInfo(unifiedMemoryGraphicsAllocation->gpuAllocation); EXPECT_EQ(1u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); EXPECT_EQ(mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations[0]->getGpuAddress(), castToUint64(unifiedMemoryAllocation)); mockKernel.mockKernel->makeResident(this->pDevice->getGpgpuCommandStreamReceiver()); EXPECT_EQ(1u, commandStreamReceiver.getResidencyAllocations().size()); EXPECT_EQ(commandStreamReceiver.getResidencyAllocations()[0]->getGpuAddress(), castToUint64(unifiedMemoryAllocation)); mockKernel.mockKernel->clearUnifiedMemoryExecInfo(); EXPECT_EQ(0u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); } HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemoryIsCalledThenAllocationIsStoredWithinKernel) { if (pClDevice->getHardwareInfo().capabilityTable.ftrSvm == false) { GTEST_SKIP(); } MockKernelWithInternals mockKernel(*this->pClDevice); auto svmAllocationsManager = mockKernel.mockContext->getSVMAllocsManager(); auto unifiedMemoryAllocation = svmAllocationsManager->createUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), 4096u, SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY)); auto unifiedMemoryAllocation2 = svmAllocationsManager->createUnifiedMemoryAllocation(pDevice->getRootDeviceIndex(), 4096u, SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY)); auto status = clSetKernelExecInfo(mockKernel.mockKernel, CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, sizeof(unifiedMemoryAllocation), &unifiedMemoryAllocation); EXPECT_EQ(CL_SUCCESS, status); EXPECT_EQ(1u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); EXPECT_EQ(mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations[0]->getGpuAddress(), castToUint64(unifiedMemoryAllocation)); status = clSetKernelExecInfo(mockKernel.mockKernel, CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, sizeof(unifiedMemoryAllocation), &unifiedMemoryAllocation2); EXPECT_EQ(CL_SUCCESS, status); EXPECT_EQ(1u, mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations.size()); EXPECT_EQ(mockKernel.mockKernel->kernelUnifiedMemoryGfxAllocations[0]->getGpuAddress(), castToUint64(unifiedMemoryAllocation2)); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation); svmAllocationsManager->freeSVMAlloc(unifiedMemoryAllocation2); } HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemoryDevicePropertyIsCalledThenKernelControlIsChanged) { if (pClDevice->getHardwareInfo().capabilityTable.ftrSvm == false) { GTEST_SKIP(); } MockKernelWithInternals mockKernel(*this->pClDevice); cl_bool enableIndirectDeviceAccess = CL_TRUE; auto status = clSetKernelExecInfo(mockKernel.mockKernel, CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectDeviceAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_TRUE(mockKernel.mockKernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed); enableIndirectDeviceAccess = CL_FALSE; status = clSetKernelExecInfo(mockKernel.mockKernel, CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectDeviceAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_FALSE(mockKernel.mockKernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed); } HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemoryHostPropertyIsCalledThenKernelControlIsChanged) { if (pClDevice->getHardwareInfo().capabilityTable.ftrSvm == false) { GTEST_SKIP(); } MockKernelWithInternals mockKernel(*this->pClDevice); cl_bool enableIndirectHostAccess = CL_TRUE; auto status = clSetKernelExecInfo(mockKernel.mockKernel, CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectHostAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_TRUE(mockKernel.mockKernel->unifiedMemoryControls.indirectHostAllocationsAllowed); enableIndirectHostAccess = CL_FALSE; status = clSetKernelExecInfo(mockKernel.mockKernel, CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectHostAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_FALSE(mockKernel.mockKernel->unifiedMemoryControls.indirectHostAllocationsAllowed); } HWTEST_F(KernelResidencyTest, givenKernelWhenclSetKernelExecInfoWithUnifiedMemorySharedPropertyIsCalledThenKernelControlIsChanged) { if (pClDevice->getHardwareInfo().capabilityTable.ftrSvm == false) { GTEST_SKIP(); } MockKernelWithInternals mockKernel(*this->pClDevice); cl_bool enableIndirectSharedAccess = CL_TRUE; auto status = clSetKernelExecInfo(mockKernel.mockKernel, CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectSharedAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_TRUE(mockKernel.mockKernel->unifiedMemoryControls.indirectSharedAllocationsAllowed); enableIndirectSharedAccess = CL_FALSE; status = clSetKernelExecInfo(mockKernel.mockKernel, CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, sizeof(cl_bool), &enableIndirectSharedAccess); EXPECT_EQ(CL_SUCCESS, status); EXPECT_FALSE(mockKernel.mockKernel->unifiedMemoryControls.indirectSharedAllocationsAllowed); } TEST(KernelImageDetectionTests, givenKernelWithImagesOnlyWhenItIsAskedIfItHasImagesOnlyThenTrueIsReturned) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelArgInfo.resize(3); pKernelInfo->kernelArgInfo[2].isImage = true; pKernelInfo->kernelArgInfo[1].isMediaBlockImage = true; pKernelInfo->kernelArgInfo[0].isMediaImage = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(*device->getExecutionEnvironment(), context.get(), false, &device->getDevice())); auto kernel = clUniquePtr(new MockKernel(program.get(), *pKernelInfo, *device)); EXPECT_FALSE(kernel->usesOnlyImages()); kernel->initialize(); EXPECT_TRUE(kernel->usesOnlyImages()); } TEST(KernelImageDetectionTests, givenKernelWithImagesAndBuffersWhenItIsAskedIfItHasImagesOnlyThenFalseIsReturned) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelArgInfo.resize(3); pKernelInfo->kernelArgInfo[2].isImage = true; pKernelInfo->kernelArgInfo[1].isBuffer = true; pKernelInfo->kernelArgInfo[0].isMediaImage = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(*device->getExecutionEnvironment(), context.get(), false, &device->getDevice())); auto kernel = clUniquePtr(new MockKernel(program.get(), *pKernelInfo, *device)); EXPECT_FALSE(kernel->usesOnlyImages()); kernel->initialize(); EXPECT_FALSE(kernel->usesOnlyImages()); } TEST(KernelImageDetectionTests, givenKernelWithNoImagesWhenItIsAskedIfItHasImagesOnlyThenFalseIsReturned) { auto pKernelInfo = std::make_unique(); pKernelInfo->kernelArgInfo.resize(1); pKernelInfo->kernelArgInfo[0].isBuffer = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(*device->getExecutionEnvironment(), context.get(), false, &device->getDevice())); auto kernel = clUniquePtr(new MockKernel(program.get(), *pKernelInfo, *device)); EXPECT_FALSE(kernel->usesOnlyImages()); kernel->initialize(); EXPECT_FALSE(kernel->usesOnlyImages()); } HWTEST_F(KernelResidencyTest, test_MakeArgsResidentCheckImageFromImage) { ASSERT_NE(nullptr, pDevice); //create NV12 image cl_mem_flags flags = CL_MEM_READ_ONLY | CL_MEM_HOST_NO_ACCESS; cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_NV12_INTEL; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, pClDevice->getHardwareInfo().capabilityTable.clVersionSupport); cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 16; imageDesc.image_height = 16; imageDesc.image_depth = 1; cl_int retVal; MockContext context; std::unique_ptr imageNV12(Image::create(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); EXPECT_EQ(imageNV12->getMediaPlaneType(), 0u); //create Y plane imageFormat.image_channel_order = CL_R; flags = CL_MEM_READ_ONLY; surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); imageDesc.image_width = 0; imageDesc.image_height = 0; imageDesc.image_depth = 0; imageDesc.mem_object = imageNV12.get(); std::unique_ptr imageY(Image::create(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); EXPECT_EQ(imageY->getMediaPlaneType(), 0u); auto pKernelInfo = std::make_unique(); KernelArgInfo kernelArgInfo; kernelArgInfo.isImage = true; pKernelInfo->kernelArgInfo.push_back(std::move(kernelArgInfo)); auto program = std::make_unique(*pDevice->getExecutionEnvironment()); program->setContext(&context); std::unique_ptr pKernel(new MockKernel(program.get(), *pKernelInfo, *pClDevice)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->storeKernelArg(0, Kernel::IMAGE_OBJ, (cl_mem)imageY.get(), NULL, 0); pKernel->makeResident(pDevice->getGpgpuCommandStreamReceiver()); EXPECT_FALSE(imageNV12->isImageFromImage()); EXPECT_TRUE(imageY->isImageFromImage()); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); EXPECT_EQ(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore, commandStreamReceiver.samplerCacheFlushRequired); } struct KernelExecutionEnvironmentTest : public Test { void SetUp() override { DeviceFixture::SetUp(); program = std::make_unique(*pDevice->getExecutionEnvironment()); pKernelInfo = std::make_unique(); executionEnvironment.CompiledSIMD32 = 1; pKernelInfo->patchInfo.executionEnvironment = &executionEnvironment; pKernel = new MockKernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); } void TearDown() override { delete pKernel; DeviceFixture::TearDown(); } MockKernel *pKernel; std::unique_ptr program; std::unique_ptr pKernelInfo; SPatchExecutionEnvironment executionEnvironment = {}; }; TEST_F(KernelExecutionEnvironmentTest, getMaxSimdReturnsMaxOfAll32) { executionEnvironment.CompiledSIMD32 = true; executionEnvironment.CompiledSIMD16 = true; executionEnvironment.CompiledSIMD8 = true; EXPECT_EQ(32u, this->pKernelInfo->getMaxSimdSize()); } TEST_F(KernelExecutionEnvironmentTest, getMaxSimdReturnsMaxOfAll16) { executionEnvironment.CompiledSIMD32 = false; executionEnvironment.CompiledSIMD16 = true; executionEnvironment.CompiledSIMD8 = true; EXPECT_EQ(16u, this->pKernelInfo->getMaxSimdSize()); } TEST_F(KernelExecutionEnvironmentTest, getMaxSimdReturnsMaxOfAll8) { executionEnvironment.CompiledSIMD32 = false; executionEnvironment.CompiledSIMD16 = false; executionEnvironment.CompiledSIMD8 = true; EXPECT_EQ(8u, this->pKernelInfo->getMaxSimdSize()); } TEST_F(KernelExecutionEnvironmentTest, getMaxSimdReturns8ByDefault) { executionEnvironment.CompiledSIMD32 = false; executionEnvironment.CompiledSIMD16 = false; executionEnvironment.CompiledSIMD8 = false; EXPECT_EQ(8u, this->pKernelInfo->getMaxSimdSize()); } TEST_F(KernelExecutionEnvironmentTest, getMaxSimdReturns1WhenExecutionEnvironmentNotAvailable) { executionEnvironment.CompiledSIMD32 = false; executionEnvironment.CompiledSIMD16 = false; executionEnvironment.CompiledSIMD8 = false; auto oldExcEnv = this->pKernelInfo->patchInfo.executionEnvironment; this->pKernelInfo->patchInfo.executionEnvironment = nullptr; EXPECT_EQ(1U, this->pKernelInfo->getMaxSimdSize()); this->pKernelInfo->patchInfo.executionEnvironment = oldExcEnv; } TEST_F(KernelExecutionEnvironmentTest, getMaxSimdReturns1WhenLargestCompilledSimdSizeEqualOne) { executionEnvironment.LargestCompiledSIMDSize = 1; auto oldExcEnv = this->pKernelInfo->patchInfo.executionEnvironment; EXPECT_EQ(1U, this->pKernelInfo->getMaxSimdSize()); this->pKernelInfo->patchInfo.executionEnvironment = oldExcEnv; } TEST_F(KernelExecutionEnvironmentTest, getMaxRequiredWorkGroupSizeWhenCompiledWorkGroupSizeIsZero) { auto maxWorkGroupSize = static_cast(pDevice->getDeviceInfo().maxWorkGroupSize); auto oldRequiredWorkGroupSizeX = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeX; auto oldRequiredWorkGroupSizeY = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeY; auto oldRequiredWorkGroupSizeZ = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeZ; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = 0; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = 0; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = 0; EXPECT_EQ(maxWorkGroupSize, this->pKernelInfo->getMaxRequiredWorkGroupSize(maxWorkGroupSize)); const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = oldRequiredWorkGroupSizeX; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = oldRequiredWorkGroupSizeY; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = oldRequiredWorkGroupSizeZ; } TEST_F(KernelExecutionEnvironmentTest, getMaxRequiredWorkGroupSizeWhenCompiledWorkGroupSizeIsLowerThanMaxWorkGroupSize) { auto maxWorkGroupSize = static_cast(pDevice->getDeviceInfo().maxWorkGroupSize); auto oldRequiredWorkGroupSizeX = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeX; auto oldRequiredWorkGroupSizeY = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeY; auto oldRequiredWorkGroupSizeZ = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeZ; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = static_cast(maxWorkGroupSize / 2); const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = 1; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = 1; EXPECT_EQ(maxWorkGroupSize / 2, this->pKernelInfo->getMaxRequiredWorkGroupSize(maxWorkGroupSize)); const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = oldRequiredWorkGroupSizeX; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = oldRequiredWorkGroupSizeY; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = oldRequiredWorkGroupSizeZ; } TEST_F(KernelExecutionEnvironmentTest, getMaxRequiredWorkGroupSizeWhenCompiledWorkGroupSizeIsGreaterThanMaxWorkGroupSize) { auto maxWorkGroupSize = static_cast(pDevice->getDeviceInfo().maxWorkGroupSize); auto oldRequiredWorkGroupSizeX = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeX; auto oldRequiredWorkGroupSizeY = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeY; auto oldRequiredWorkGroupSizeZ = this->pKernelInfo->patchInfo.executionEnvironment->RequiredWorkGroupSizeZ; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = static_cast(maxWorkGroupSize); const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = static_cast(maxWorkGroupSize); const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = static_cast(maxWorkGroupSize); EXPECT_EQ(maxWorkGroupSize, this->pKernelInfo->getMaxRequiredWorkGroupSize(maxWorkGroupSize)); const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeX = oldRequiredWorkGroupSizeX; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeY = oldRequiredWorkGroupSizeY; const_cast(this->pKernelInfo->patchInfo.executionEnvironment)->RequiredWorkGroupSizeZ = oldRequiredWorkGroupSizeZ; } struct KernelCrossThreadTests : Test { KernelCrossThreadTests() { } void SetUp() override { DeviceFixture::SetUp(); program = std::make_unique(*pDevice->getExecutionEnvironment()); patchDataParameterStream.DataParameterStreamSize = 64 * sizeof(uint8_t); pKernelInfo = std::make_unique(); ASSERT_NE(nullptr, pKernelInfo); pKernelInfo->patchInfo.dataParameterStream = &patchDataParameterStream; executionEnvironment.CompiledSIMD32 = 1; pKernelInfo->patchInfo.executionEnvironment = &executionEnvironment; } void TearDown() override { DeviceFixture::TearDown(); } std::unique_ptr program; std::unique_ptr pKernelInfo; SPatchDataParameterStream patchDataParameterStream; SPatchExecutionEnvironment executionEnvironment = {}; }; TEST_F(KernelCrossThreadTests, globalWorkOffset) { pKernelInfo->workloadInfo.globalWorkOffsetOffsets[1] = 4; MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkOffsetX); EXPECT_NE(nullptr, kernel.globalWorkOffsetY); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.globalWorkOffsetY); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkOffsetZ); } TEST_F(KernelCrossThreadTests, localWorkSize) { pKernelInfo->workloadInfo.localWorkSizeOffsets[0] = 0xc; MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.localWorkSizeX); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.localWorkSizeX); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.localWorkSizeY); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.localWorkSizeZ); } TEST_F(KernelCrossThreadTests, localWorkSize2) { pKernelInfo->workloadInfo.localWorkSizeOffsets2[1] = 0xd; MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.localWorkSizeX2); EXPECT_NE(nullptr, kernel.localWorkSizeY2); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.localWorkSizeY2); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.localWorkSizeZ2); } TEST_F(KernelCrossThreadTests, globalWorkSize) { pKernelInfo->workloadInfo.globalWorkSizeOffsets[2] = 8; MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkSizeX); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkSizeY); EXPECT_NE(nullptr, kernel.globalWorkSizeZ); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.globalWorkSizeZ); } TEST_F(KernelCrossThreadTests, workDim) { pKernelInfo->workloadInfo.workDimOffset = 12; MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.workDim); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.workDim); } TEST_F(KernelCrossThreadTests, numWorkGroups) { pKernelInfo->workloadInfo.numWorkGroupsOffset[0] = 0 * sizeof(uint32_t); pKernelInfo->workloadInfo.numWorkGroupsOffset[1] = 1 * sizeof(uint32_t); pKernelInfo->workloadInfo.numWorkGroupsOffset[2] = 2 * sizeof(uint32_t); MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.numWorkGroupsX); EXPECT_NE(nullptr, kernel.numWorkGroupsY); EXPECT_NE(nullptr, kernel.numWorkGroupsZ); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.numWorkGroupsX); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.numWorkGroupsY); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.numWorkGroupsZ); } TEST_F(KernelCrossThreadTests, enqueuedLocalWorkSize) { pKernelInfo->workloadInfo.enqueuedLocalWorkSizeOffsets[0] = 0; MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.enqueuedLocalWorkSizeX); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.enqueuedLocalWorkSizeX); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.enqueuedLocalWorkSizeY); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.enqueuedLocalWorkSizeZ); } TEST_F(KernelCrossThreadTests, maxWorkGroupSize) { pKernelInfo->workloadInfo.maxWorkGroupSizeOffset = 12; MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.maxWorkGroupSizeForCrossThreadData); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.maxWorkGroupSizeForCrossThreadData); EXPECT_EQ(static_cast(kernel.getCrossThreadData() + pKernelInfo->workloadInfo.maxWorkGroupSizeOffset), static_cast(kernel.maxWorkGroupSizeForCrossThreadData)); EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize, *kernel.maxWorkGroupSizeForCrossThreadData); EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize, kernel.maxKernelWorkGroupSize); } TEST_F(KernelCrossThreadTests, dataParameterSimdSize) { pKernelInfo->workloadInfo.simdSizeOffset = 16; MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); executionEnvironment.CompiledSIMD32 = false; executionEnvironment.CompiledSIMD16 = true; executionEnvironment.CompiledSIMD8 = true; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.dataParameterSimdSize); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.dataParameterSimdSize); EXPECT_EQ(static_cast(kernel.getCrossThreadData() + pKernelInfo->workloadInfo.simdSizeOffset), static_cast(kernel.dataParameterSimdSize)); EXPECT_EQ_VAL(pKernelInfo->getMaxSimdSize(), *kernel.dataParameterSimdSize); } TEST_F(KernelCrossThreadTests, GIVENparentEventOffsetWHENinitializeKernelTHENparentEventInitWithInvalid) { pKernelInfo->workloadInfo.parentEventOffset = 16; MockKernel kernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.parentEventOffset); EXPECT_NE(&Kernel::dummyPatchLocation, kernel.parentEventOffset); EXPECT_EQ(static_cast(kernel.getCrossThreadData() + pKernelInfo->workloadInfo.parentEventOffset), static_cast(kernel.parentEventOffset)); EXPECT_EQ(WorkloadInfo::invalidParentEvent, *kernel.parentEventOffset); } TEST_F(KernelCrossThreadTests, kernelAddRefCountToProgram) { auto refCount = program->getReference(); MockKernel *kernel = new MockKernel(program.get(), *pKernelInfo, *pClDevice); auto refCount2 = program->getReference(); EXPECT_EQ(refCount2, refCount + 1); delete kernel; auto refCount3 = program->getReference(); EXPECT_EQ(refCount, refCount3); } TEST_F(KernelCrossThreadTests, kernelSetsTotalSLMSize) { pKernelInfo->workloadInfo.slmStaticSize = 1024; MockKernel *kernel = new MockKernel(program.get(), *pKernelInfo, *pClDevice); EXPECT_EQ(1024u, kernel->slmTotalSize); delete kernel; } TEST_F(KernelCrossThreadTests, givenKernelWithPrivateMemoryWhenItIsCreatedThenCurbeIsPatchedProperly) { SPatchAllocateStatelessPrivateSurface allocatePrivate; allocatePrivate.DataParamSize = 8; allocatePrivate.DataParamOffset = 0; allocatePrivate.PerThreadPrivateMemorySize = 1; pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &allocatePrivate; MockKernel *kernel = new MockKernel(program.get(), *pKernelInfo, *pClDevice); kernel->initialize(); auto privateSurface = kernel->getPrivateSurface(); auto constantBuffer = kernel->getCrossThreadData(); auto privateAddress = (uintptr_t)privateSurface->getGpuAddressToPatch(); auto ptrCurbe = (uint64_t *)constantBuffer; auto privateAddressFromCurbe = (uintptr_t)*ptrCurbe; EXPECT_EQ(privateAddressFromCurbe, privateAddress); delete kernel; } TEST_F(KernelCrossThreadTests, givenKernelWithPreferredWkgMultipleWhenItIsCreatedThenCurbeIsPatchedProperly) { pKernelInfo->workloadInfo.preferredWkgMultipleOffset = 8; MockKernel *kernel = new MockKernel(program.get(), *pKernelInfo, *pClDevice); kernel->initialize(); auto *crossThread = kernel->getCrossThreadData(); uint32_t *preferredWkgMultipleOffset = (uint32_t *)ptrOffset(crossThread, 8); EXPECT_EQ(pKernelInfo->getMaxSimdSize(), *preferredWkgMultipleOffset); delete kernel; } TEST_F(KernelCrossThreadTests, patchBlocksSimdSize) { MockKernelWithInternals *kernel = new MockKernelWithInternals(*pClDevice); // store offset to child's simd size in kernel info uint32_t crossThreadOffset = 0; //offset of simd size kernel->kernelInfo.childrenKernelsIdOffset.push_back({0, crossThreadOffset}); // add a new block kernel to program auto infoBlock = new KernelInfo(); kernel->executionEnvironmentBlock.CompiledSIMD8 = 0; kernel->executionEnvironmentBlock.CompiledSIMD16 = 1; kernel->executionEnvironmentBlock.CompiledSIMD32 = 0; infoBlock->patchInfo.executionEnvironment = &kernel->executionEnvironmentBlock; kernel->mockProgram->blockKernelManager->addBlockKernelInfo(infoBlock); // patch block's simd size kernel->mockKernel->patchBlocksSimdSize(); // obtain block's simd size from cross thread data void *blockSimdSize = ptrOffset(kernel->mockKernel->getCrossThreadData(), kernel->kernelInfo.childrenKernelsIdOffset[0].second); uint32_t *simdSize = reinterpret_cast(blockSimdSize); // check of block's simd size has been patched correctly EXPECT_EQ(kernel->mockProgram->blockKernelManager->getBlockKernelInfo(0)->getMaxSimdSize(), *simdSize); delete kernel; } TEST(KernelInfoTest, borderColorOffset) { KernelInfo info; SPatchSamplerStateArray samplerState; samplerState.BorderColorOffset = 3; info.patchInfo.samplerStateArray = nullptr; EXPECT_EQ(0u, info.getBorderColorOffset()); info.patchInfo.samplerStateArray = &samplerState; EXPECT_EQ(3u, info.getBorderColorOffset()); } TEST(KernelInfoTest, getArgNumByName) { KernelInfo info; EXPECT_EQ(-1, info.getArgNumByName("")); KernelArgInfo kai; kai.metadataExtended = std::make_unique(); kai.metadataExtended->argName = "arg1"; info.kernelArgInfo.push_back(std::move(kai)); EXPECT_EQ(-1, info.getArgNumByName("")); EXPECT_EQ(-1, info.getArgNumByName("arg2")); EXPECT_EQ(0, info.getArgNumByName("arg1")); kai = {}; kai.metadataExtended = std::make_unique(); kai.metadataExtended->argName = "arg2"; info.kernelArgInfo.push_back(std::move(kai)); EXPECT_EQ(0, info.getArgNumByName("arg1")); EXPECT_EQ(1, info.getArgNumByName("arg2")); info.kernelArgInfo[0].metadataExtended.reset(); EXPECT_EQ(-1, info.getArgNumByName("arg1")); } TEST(KernelTest, getInstructionHeapSizeForExecutionModelReturnsZeroForNormalKernel) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals kernel(*device); EXPECT_EQ(0u, kernel.mockKernel->getInstructionHeapSizeForExecutionModel()); } TEST(KernelTest, setKernelArgUsesBuiltinDispatchInfoBuilderIfAvailable) { struct MockBuiltinDispatchBuilder : BuiltinDispatchInfoBuilder { MockBuiltinDispatchBuilder(BuiltIns &builtins) : BuiltinDispatchInfoBuilder(builtins) { } bool setExplicitArg(uint32_t argIndex, size_t argSize, const void *argVal, cl_int &err) const override { receivedArgs.push_back(std::make_tuple(argIndex, argSize, argVal)); err = errToReturn; return valueToReturn; } bool valueToReturn = false; cl_int errToReturn = CL_SUCCESS; mutable std::vector> receivedArgs; }; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals kernel(*device); kernel.kernelInfo.resizeKernelArgInfoAndRegisterParameter(1); kernel.mockKernel->initialize(); MockBuiltinDispatchBuilder mockBuilder(*device->getBuiltIns()); kernel.kernelInfo.builtinDispatchBuilder = &mockBuilder; mockBuilder.valueToReturn = false; mockBuilder.errToReturn = CL_SUCCESS; EXPECT_EQ(0u, kernel.mockKernel->getPatchedArgumentsNum()); auto ret = kernel.mockKernel->setArg(1, 3, reinterpret_cast(5)); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(1u, kernel.mockKernel->getPatchedArgumentsNum()); mockBuilder.valueToReturn = false; mockBuilder.errToReturn = CL_INVALID_ARG_SIZE; ret = kernel.mockKernel->setArg(7, 11, reinterpret_cast(13)); EXPECT_EQ(CL_INVALID_ARG_SIZE, ret); EXPECT_EQ(1u, kernel.mockKernel->getPatchedArgumentsNum()); mockBuilder.valueToReturn = true; mockBuilder.errToReturn = CL_SUCCESS; ret = kernel.mockKernel->setArg(17, 19, reinterpret_cast(23)); EXPECT_EQ(CL_INVALID_ARG_INDEX, ret); EXPECT_EQ(1u, kernel.mockKernel->getPatchedArgumentsNum()); mockBuilder.valueToReturn = true; mockBuilder.errToReturn = CL_INVALID_ARG_SIZE; ret = kernel.mockKernel->setArg(29, 31, reinterpret_cast(37)); EXPECT_EQ(CL_INVALID_ARG_INDEX, ret); EXPECT_EQ(1u, kernel.mockKernel->getPatchedArgumentsNum()); ASSERT_EQ(4U, mockBuilder.receivedArgs.size()); EXPECT_EQ(1U, std::get<0>(mockBuilder.receivedArgs[0])); EXPECT_EQ(3U, std::get<1>(mockBuilder.receivedArgs[0])); EXPECT_EQ(reinterpret_cast(5), std::get<2>(mockBuilder.receivedArgs[0])); EXPECT_EQ(7U, std::get<0>(mockBuilder.receivedArgs[1])); EXPECT_EQ(11U, std::get<1>(mockBuilder.receivedArgs[1])); EXPECT_EQ(reinterpret_cast(13), std::get<2>(mockBuilder.receivedArgs[1])); EXPECT_EQ(17U, std::get<0>(mockBuilder.receivedArgs[2])); EXPECT_EQ(19U, std::get<1>(mockBuilder.receivedArgs[2])); EXPECT_EQ(reinterpret_cast(23), std::get<2>(mockBuilder.receivedArgs[2])); EXPECT_EQ(29U, std::get<0>(mockBuilder.receivedArgs[3])); EXPECT_EQ(31U, std::get<1>(mockBuilder.receivedArgs[3])); EXPECT_EQ(reinterpret_cast(37), std::get<2>(mockBuilder.receivedArgs[3])); } TEST(KernelTest, givenKernelWhenDebugFlagToUseMaxSimdForCalculationsIsUsedThenMaxWorkgroupSizeIsSimdSizeDependant) { DebugManagerStateRestore dbgStateRestore; DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.set(true); HardwareInfo myHwInfo = *defaultHwInfo; GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo; mySysInfo.EUCount = 24; mySysInfo.SubSliceCount = 3; mySysInfo.ThreadCount = 24 * 7; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); MockKernelWithInternals kernel(*device); kernel.executionEnvironment.LargestCompiledSIMDSize = 32; size_t maxKernelWkgSize; kernel.mockKernel->getWorkGroupInfo(device.get(), CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr); EXPECT_EQ(1024u, maxKernelWkgSize); kernel.executionEnvironment.LargestCompiledSIMDSize = 16; kernel.mockKernel->getWorkGroupInfo(device.get(), CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr); EXPECT_EQ(512u, maxKernelWkgSize); kernel.executionEnvironment.LargestCompiledSIMDSize = 8; kernel.mockKernel->getWorkGroupInfo(device.get(), CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr); EXPECT_EQ(256u, maxKernelWkgSize); } TEST(KernelTest, givenKernelWithKernelInfoWith32bitPointerSizeThenReport32bit) { KernelInfo info; info.gpuPointerSize = 4; MockContext context; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(*device->getExecutionEnvironment(), &context, false, &device->getDevice()); std::unique_ptr kernel(new MockKernel(&program, info, *device.get())); EXPECT_TRUE(kernel->is32Bit()); } TEST(KernelTest, givenKernelWithKernelInfoWith64bitPointerSizeThenReport64bit) { KernelInfo info; info.gpuPointerSize = 8; MockContext context; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(*device->getExecutionEnvironment(), &context, false, &device->getDevice()); std::unique_ptr kernel(new MockKernel(&program, info, *device.get())); EXPECT_FALSE(kernel->is32Bit()); } TEST(KernelTest, givenFtrRenderCompressedBuffersWhenInitializingArgsWithNonStatefulAccessThenMarkKernelForAuxTranslation) { DebugManagerStateRestore restore; DebugManager.flags.DisableAuxTranslation.set(false); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); auto &capabilityTable = hwInfo->capabilityTable; auto context = clUniquePtr(new MockContext(device.get())); context->contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; MockKernelWithInternals kernel(*device, context.get()); kernel.kernelInfo.kernelArgInfo.resize(1); kernel.kernelInfo.kernelArgInfo[0].metadataExtended = std::make_unique(); kernel.kernelInfo.kernelArgInfo[0].metadataExtended->type = "char *"; kernel.kernelInfo.kernelArgInfo[0].isBuffer = true; capabilityTable.ftrRenderCompressedBuffers = false; kernel.kernelInfo.kernelArgInfo[0].pureStatefulBufferAccess = true; kernel.mockKernel->initialize(); EXPECT_FALSE(kernel.mockKernel->isAuxTranslationRequired()); kernel.kernelInfo.kernelArgInfo[0].pureStatefulBufferAccess = false; kernel.mockKernel->initialize(); EXPECT_FALSE(kernel.mockKernel->isAuxTranslationRequired()); capabilityTable.ftrRenderCompressedBuffers = true; kernel.mockKernel->initialize(); if (HwHelper::get(hwInfo->platform.eRenderCoreFamily).requiresAuxResolves()) { EXPECT_TRUE(kernel.mockKernel->isAuxTranslationRequired()); } else { EXPECT_FALSE(kernel.mockKernel->isAuxTranslationRequired()); } DebugManager.flags.DisableAuxTranslation.set(true); kernel.mockKernel->initialize(); EXPECT_FALSE(kernel.mockKernel->isAuxTranslationRequired()); } TEST(KernelTest, givenDebugVariableSetWhenKernelHasStatefulBufferAccessThenMarkKernelForAuxTranslation) { DebugManagerStateRestore restore; DebugManager.flags.RenderCompressedBuffersEnabled.set(1); HardwareInfo localHwInfo = *defaultHwInfo; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&localHwInfo)); auto context = clUniquePtr(new MockContext(device.get())); MockKernelWithInternals kernel(*device, context.get()); kernel.kernelInfo.kernelArgInfo.resize(1); kernel.kernelInfo.kernelArgInfo[0].metadataExtended = std::make_unique(); kernel.kernelInfo.kernelArgInfo[0].metadataExtended->type = "char *"; kernel.kernelInfo.kernelArgInfo[0].isBuffer = true; kernel.kernelInfo.kernelArgInfo[0].pureStatefulBufferAccess = false; localHwInfo.capabilityTable.ftrRenderCompressedBuffers = false; kernel.mockKernel->initialize(); if (HwHelper::get(localHwInfo.platform.eRenderCoreFamily).requiresAuxResolves()) { EXPECT_TRUE(kernel.mockKernel->isAuxTranslationRequired()); } else { EXPECT_FALSE(kernel.mockKernel->isAuxTranslationRequired()); } } TEST(KernelTest, givenKernelWithPairArgumentWhenItIsInitializedThenPatchImmediateIsUsedAsArgHandler) { HardwareInfo localHwInfo = *defaultHwInfo; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&localHwInfo)); auto context = clUniquePtr(new MockContext(device.get())); MockKernelWithInternals kernel(*device, context.get()); kernel.kernelInfo.kernelArgInfo.resize(1); kernel.kernelInfo.kernelArgInfo[0].metadataExtended = std::make_unique(); kernel.kernelInfo.kernelArgInfo[0].metadataExtended->type = "pair"; kernel.mockKernel->initialize(); EXPECT_EQ(&Kernel::setArgImmediate, kernel.mockKernel->kernelArgHandlers[0]); } TEST(KernelTest, whenNullAllocationThenAssignNullPointerToCacheFlushVector) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals kernel(*device); kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); kernel.mockKernel->kernelArgRequiresCacheFlush[0] = reinterpret_cast(0x1); kernel.mockKernel->addAllocationToCacheFlushVector(0, nullptr); EXPECT_EQ(nullptr, kernel.mockKernel->kernelArgRequiresCacheFlush[0]); } TEST(KernelTest, givenKernelCompiledWithSimdSizeLowerThanExpectedWhenInitializingThenReturnError) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); auto minSimd = HwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily).getMinimalSIMDSize(); MockKernelWithInternals kernel(*device); kernel.executionEnvironment.CompiledSIMD32 = 0; kernel.executionEnvironment.CompiledSIMD16 = 0; kernel.executionEnvironment.CompiledSIMD8 = 1; cl_int retVal = kernel.mockKernel->initialize(); if (minSimd > 8) { EXPECT_EQ(CL_INVALID_KERNEL, retVal); } else { EXPECT_EQ(CL_SUCCESS, retVal); } } TEST(KernelTest, givenKernelCompiledWithSimdOneWhenInitializingThenReturnError) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals kernel(*device); kernel.executionEnvironment.CompiledSIMD32 = 0; kernel.executionEnvironment.CompiledSIMD16 = 0; kernel.executionEnvironment.CompiledSIMD8 = 0; kernel.executionEnvironment.LargestCompiledSIMDSize = 1; cl_int retVal = kernel.mockKernel->initialize(); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(KernelTest, whenAllocationRequiringCacheFlushThenAssignAllocationPointerToCacheFlushVector) { MockGraphicsAllocation mockAllocation; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals kernel(*device); kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); mockAllocation.setMemObjectsAllocationWithWritableFlags(false); mockAllocation.setFlushL3Required(true); kernel.mockKernel->addAllocationToCacheFlushVector(0, &mockAllocation); EXPECT_EQ(&mockAllocation, kernel.mockKernel->kernelArgRequiresCacheFlush[0]); } TEST(KernelTest, whenKernelRequireCacheFlushAfterWalkerThenRequireCacheFlushAfterWalker) { MockGraphicsAllocation mockAllocation; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals kernel(*device); kernel.mockKernel->svmAllocationsRequireCacheFlush = true; MockCommandQueue queue; DebugManagerStateRestore debugRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(true); queue.requiresCacheFlushAfterWalker = true; EXPECT_TRUE(kernel.mockKernel->requiresCacheFlushCommand(queue)); queue.requiresCacheFlushAfterWalker = false; EXPECT_TRUE(kernel.mockKernel->requiresCacheFlushCommand(queue)); } TEST(KernelTest, whenAllocationWriteableThenDoNotAssignAllocationPointerToCacheFlushVector) { MockGraphicsAllocation mockAllocation; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals kernel(*device); kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); mockAllocation.setMemObjectsAllocationWithWritableFlags(true); mockAllocation.setFlushL3Required(false); kernel.mockKernel->addAllocationToCacheFlushVector(0, &mockAllocation); EXPECT_EQ(nullptr, kernel.mockKernel->kernelArgRequiresCacheFlush[0]); } TEST(KernelTest, whenAllocationReadOnlyNonFlushRequiredThenAssignNullPointerToCacheFlushVector) { MockGraphicsAllocation mockAllocation; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals kernel(*device); kernel.mockKernel->kernelArgRequiresCacheFlush.resize(1); kernel.mockKernel->kernelArgRequiresCacheFlush[0] = reinterpret_cast(0x1); mockAllocation.setMemObjectsAllocationWithWritableFlags(false); mockAllocation.setFlushL3Required(false); kernel.mockKernel->addAllocationToCacheFlushVector(0, &mockAllocation); EXPECT_EQ(nullptr, kernel.mockKernel->kernelArgRequiresCacheFlush[0]); } TEST(KernelTest, givenKernelUsesPrivateMemoryWhenDeviceReleasedBeforeKernelThenKernelUsesMemoryManagerFromEnvironment) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); auto executionEnvironment = device->getExecutionEnvironment(); auto mockKernel = std::make_unique(*device); GraphicsAllocation *privateSurface = device->getExecutionEnvironment()->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); mockKernel->mockKernel->setPrivateSurface(privateSurface, 10); executionEnvironment->incRefInternal(); device.reset(nullptr); mockKernel.reset(nullptr); executionEnvironment->decRefInternal(); } TEST(KernelTest, givenAllArgumentsAreStatefulBuffersWhenInitializingThenAllBufferArgsStatefulIsTrue) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); std::vector kernelArgInfo(2); kernelArgInfo[0].isBuffer = true; kernelArgInfo[1].isBuffer = true; kernelArgInfo[0].pureStatefulBufferAccess = true; kernelArgInfo[1].pureStatefulBufferAccess = true; MockKernelWithInternals kernel{*device}; kernel.kernelInfo.kernelArgInfo.swap(kernelArgInfo); kernel.mockKernel->initialize(); EXPECT_TRUE(kernel.mockKernel->allBufferArgsStateful); } TEST(KernelTest, givenAllArgumentsAreBuffersButNotAllAreStatefulWhenInitializingThenAllBufferArgsStatefulIsFalse) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); std::vector kernelArgInfo(2); kernelArgInfo[0].isBuffer = true; kernelArgInfo[1].isBuffer = true; kernelArgInfo[0].pureStatefulBufferAccess = true; kernelArgInfo[1].pureStatefulBufferAccess = false; MockKernelWithInternals kernel{*device}; kernel.kernelInfo.kernelArgInfo.swap(kernelArgInfo); kernel.mockKernel->initialize(); EXPECT_FALSE(kernel.mockKernel->allBufferArgsStateful); } TEST(KernelTest, givenNotAllArgumentsAreBuffersButAllBuffersAreStatefulWhenInitializingThenAllBufferArgsStatefulIsTrue) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); std::vector kernelArgInfo(2); kernelArgInfo[0].isBuffer = true; kernelArgInfo[1].isBuffer = false; kernelArgInfo[0].pureStatefulBufferAccess = true; kernelArgInfo[1].pureStatefulBufferAccess = false; MockKernelWithInternals kernel{*device}; kernel.kernelInfo.kernelArgInfo.swap(kernelArgInfo); kernel.mockKernel->initialize(); EXPECT_TRUE(kernel.mockKernel->allBufferArgsStateful); } TEST(KernelTest, givenKernelRequiringPrivateScratchSpaceWhenGettingSizeForPrivateScratchSpaceThenCorrectSizeIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals mockKernel(*device); SPatchMediaVFEState mediaVFEstate; SPatchMediaVFEState mediaVFEstateSlot1; mediaVFEstateSlot1.PerThreadScratchSpace = 1024u; mediaVFEstate.PerThreadScratchSpace = 512u; mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate; mockKernel.kernelInfo.patchInfo.mediaVfeStateSlot1 = &mediaVFEstateSlot1; EXPECT_EQ(1024u, mockKernel.mockKernel->getPrivateScratchSize()); } TEST(KernelTest, givenKernelWithoutMediaVfeStateSlot1WhenGettingSizeForPrivateScratchSpaceThenCorrectSizeIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals mockKernel(*device); mockKernel.kernelInfo.patchInfo.mediaVfeStateSlot1 = nullptr; EXPECT_EQ(0u, mockKernel.mockKernel->getPrivateScratchSize()); } TEST(KernelTest, givenKernelWithPatchInfoCollectionEnabledWhenPatchWithImplicitSurfaceCalledThenPatchInfoDataIsCollected) { DebugManagerStateRestore restore; DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals kernel(*device); MockGraphicsAllocation mockAllocation; SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization patchToken{}; uint64_t crossThreadData = 0; EXPECT_EQ(0u, kernel.mockKernel->getPatchInfoDataList().size()); kernel.mockKernel->patchWithImplicitSurface(&crossThreadData, mockAllocation, patchToken); EXPECT_EQ(1u, kernel.mockKernel->getPatchInfoDataList().size()); } TEST(KernelTest, givenKernelWithPatchInfoCollectionDisabledWhenPatchWithImplicitSurfaceCalledThenPatchInfoDataIsNotCollected) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals kernel(*device); MockGraphicsAllocation mockAllocation; SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization patchToken{}; uint64_t crossThreadData = 0; EXPECT_EQ(0u, kernel.mockKernel->getPatchInfoDataList().size()); kernel.mockKernel->patchWithImplicitSurface(&crossThreadData, mockAllocation, patchToken); EXPECT_EQ(0u, kernel.mockKernel->getPatchInfoDataList().size()); } TEST(KernelTest, givenDefaultKernelWhenItIsCreatedThenItReportsStatelessWrites) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals kernel(*device); EXPECT_TRUE(kernel.mockKernel->areStatelessWritesUsed()); } TEST(KernelTest, givenPolicyWhensetKernelThreadArbitrationPolicyThenExpectedClValueIsReturned) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals kernel(*device); EXPECT_EQ(CL_SUCCESS, kernel.mockKernel->setKernelThreadArbitrationPolicy(CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL)); EXPECT_EQ(CL_SUCCESS, kernel.mockKernel->setKernelThreadArbitrationPolicy(CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_OLDEST_FIRST_INTEL)); EXPECT_EQ(CL_SUCCESS, kernel.mockKernel->setKernelThreadArbitrationPolicy(CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_AFTER_DEPENDENCY_ROUND_ROBIN_INTEL)); uint32_t notExistPolicy = 0; EXPECT_EQ(CL_INVALID_VALUE, kernel.mockKernel->setKernelThreadArbitrationPolicy(notExistPolicy)); } TEST(KernelTest, GivenDifferentValuesWhenSetKernelExecutionTypeIsCalledThenCorrectValueIsSet) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals mockKernelWithInternals(*device); auto &kernel = *mockKernelWithInternals.mockKernel; cl_int retVal; EXPECT_EQ(KernelExecutionType::Default, kernel.executionType); retVal = kernel.setKernelExecutionType(-1); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(KernelExecutionType::Default, kernel.executionType); retVal = kernel.setKernelExecutionType(CL_KERNEL_EXEC_INFO_CONCURRENT_TYPE_INTEL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(KernelExecutionType::Concurrent, kernel.executionType); retVal = kernel.setKernelExecutionType(-1); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(KernelExecutionType::Concurrent, kernel.executionType); retVal = kernel.setKernelExecutionType(CL_KERNEL_EXEC_INFO_DEFAULT_TYPE_INTEL); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(KernelExecutionType::Default, kernel.executionType); } TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsAdded) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals mockKernel(*device); SPatchThreadPayload threadPayload = {}; threadPayload.OffsetToSkipPerThreadDataLoad = 128u; mockKernel.kernelInfo.patchInfo.threadPayload = &threadPayload; mockKernel.kernelInfo.createKernelAllocation(device->getRootDeviceIndex(), device->getMemoryManager()); auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); mockKernel.mockKernel->setStartOffset(128); auto offset = mockKernel.mockKernel->getKernelStartOffset(false, true, false); EXPECT_EQ(allocationOffset + 256u, offset); device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation()); } TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeTrueAndLocalIdsUsedWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsNotAdded) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals mockKernel(*device); SPatchThreadPayload threadPayload = {}; threadPayload.OffsetToSkipPerThreadDataLoad = 128u; mockKernel.kernelInfo.patchInfo.threadPayload = &threadPayload; mockKernel.kernelInfo.createKernelAllocation(device->getRootDeviceIndex(), device->getMemoryManager()); auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); mockKernel.mockKernel->setStartOffset(128); auto offset = mockKernel.mockKernel->getKernelStartOffset(true, true, false); EXPECT_EQ(allocationOffset + 128u, offset); device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation()); } TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseAndLocalIdsNotUsedWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsNotAdded) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals mockKernel(*device); SPatchThreadPayload threadPayload = {}; threadPayload.OffsetToSkipPerThreadDataLoad = 128u; mockKernel.kernelInfo.patchInfo.threadPayload = &threadPayload; mockKernel.kernelInfo.createKernelAllocation(device->getRootDeviceIndex(), device->getMemoryManager()); auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); mockKernel.mockKernel->setStartOffset(128); auto offset = mockKernel.mockKernel->getKernelStartOffset(false, false, false); EXPECT_EQ(allocationOffset + 128u, offset); device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation()); } TEST(KernelTest, givenKernelWhenForcePerDssBackedBufferProgrammingIsSetThenKernelRequiresPerDssBackedBuffer) { DebugManagerStateRestore restore; DebugManager.flags.ForcePerDssBackedBufferProgramming.set(true); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals kernel(*device); EXPECT_TRUE(kernel.mockKernel->requiresPerDssBackedBuffer()); } TEST(KernelTest, givenKernelWhenForcePerDssBackedBufferProgrammingIsNotSetThenKernelDoesntRequirePerDssBackedBuffer) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals kernel(*device); EXPECT_FALSE(kernel.mockKernel->requiresPerDssBackedBuffer()); } namespace NEO { template class DeviceQueueHwMock : public DeviceQueueHw { using BaseClass = DeviceQueueHw; public: using BaseClass::buildSlbDummyCommands; using BaseClass::getCSPrefetchSize; using BaseClass::getExecutionModelCleanupSectionSize; using BaseClass::getMediaStateClearCmdsSize; using BaseClass::getMinimumSlbSize; using BaseClass::getProfilingEndCmdsSize; using BaseClass::getSlbCS; using BaseClass::getWaCommandsSize; using BaseClass::offsetDsh; DeviceQueueHwMock(Context *context, ClDevice *device, cl_queue_properties &properties) : BaseClass(context, device, properties) { auto slb = this->getSlbBuffer(); LinearStream *slbCS = getSlbCS(); slbCS->replaceBuffer(slb->getUnderlyingBuffer(), slb->getUnderlyingBufferSize()); // reset }; }; } // namespace NEO HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, whenSlbEndOffsetGreaterThanZeroThenOverwriteOneEnqueue) { std::unique_ptr> mockDeviceQueueHw(new DeviceQueueHwMock(pContext, device, deviceQueueProperties::minimumProperties[0])); auto slb = mockDeviceQueueHw->getSlbBuffer(); auto commandsSize = mockDeviceQueueHw->getMinimumSlbSize() + mockDeviceQueueHw->getWaCommandsSize(); auto slbCopy = malloc(slb->getUnderlyingBufferSize()); memset(slb->getUnderlyingBuffer(), 0xFE, slb->getUnderlyingBufferSize()); memcpy(slbCopy, slb->getUnderlyingBuffer(), slb->getUnderlyingBufferSize()); auto igilCmdQueue = reinterpret_cast(mockDeviceQueueHw->getQueueBuffer()->getUnderlyingBuffer()); // slbEndOffset < commandsSize * 128 // always fill only 1 enqueue (after offset) auto offset = static_cast(commandsSize) * 50; igilCmdQueue->m_controls.m_SLBENDoffsetInBytes = offset; mockDeviceQueueHw->resetDeviceQueue(); EXPECT_EQ(0, memcmp(slb->getUnderlyingBuffer(), slbCopy, offset)); // dont touch memory before offset EXPECT_NE(0, memcmp(ptrOffset(slb->getUnderlyingBuffer(), offset), slbCopy, commandsSize)); // change 1 enqueue EXPECT_EQ(0, memcmp(ptrOffset(slb->getUnderlyingBuffer(), offset + commandsSize), slbCopy, offset)); // dont touch memory after (offset + 1 enqueue) // slbEndOffset == commandsSize * 128 // dont fill commands memset(slb->getUnderlyingBuffer(), 0xFEFEFEFE, slb->getUnderlyingBufferSize()); offset = static_cast(commandsSize) * 128; igilCmdQueue->m_controls.m_SLBENDoffsetInBytes = static_cast(commandsSize); mockDeviceQueueHw->resetDeviceQueue(); EXPECT_EQ(0, memcmp(slb->getUnderlyingBuffer(), slbCopy, commandsSize * 128)); // dont touch memory for enqueues free(slbCopy); } using KernelMultiRootDeviceTest = MultiRootDeviceFixture; TEST_F(KernelMultiRootDeviceTest, privateSurfaceHasCorrectRootDeviceIndex) { auto kernelInfo = std::make_unique(); // setup private memory SPatchAllocateStatelessPrivateSurface tokenSPS; tokenSPS.SurfaceStateHeapOffset = 64; tokenSPS.DataParamOffset = 40; tokenSPS.DataParamSize = 8; tokenSPS.PerThreadPrivateMemorySize = 112; kernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &tokenSPS; MockProgram program(*device->getExecutionEnvironment(), context.get(), false, &device->getDevice()); std::unique_ptr kernel(new MockKernel(&program, *kernelInfo, *device.get())); kernel->initialize(); auto privateSurface = kernel->getPrivateSurface(); ASSERT_NE(nullptr, privateSurface); EXPECT_EQ(expectedRootDeviceIndex, privateSurface->getRootDeviceIndex()); } TEST(KernelCreateTest, whenInitFailedThenReturnNull) { struct MockProgram { Device &getDevice() { return mDevice.getDevice(); } void getSource(std::string &) {} MockClDevice mDevice{new MockDevice}; } mockProgram; struct MockKernel { MockKernel(MockProgram *, const KernelInfo &, ClDevice &) {} int initialize() { return -1; }; }; KernelInfo info; info.gpuPointerSize = 8; auto ret = Kernel::create(&mockProgram, info, nullptr); EXPECT_EQ(nullptr, ret); } TEST(ArgTypeTraits, GivenDefaultInitializedArgTypeMetadataThenAddressSpaceIsGlobal) { ArgTypeTraits metadata; EXPECT_EQ(NEO::KernelArgMetadata::AddrGlobal, metadata.addressQualifier); } compute-runtime-20.13.16352/opencl/test/unit_test/kernel/kernel_transformable_tests.cpp000066400000000000000000000351651363734646600312510ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/program/kernel_info.h" #include "opencl/source/sampler/sampler.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_sampler.h" #include "test.h" #include using namespace NEO; class KernelTransformableTest : public ::testing::Test { public: void SetUp() override { pKernelInfo = std::make_unique(); KernelArgPatchInfo kernelArgPatchInfo; kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; pKernelInfo->usesSsh = true; pKernelInfo->kernelArgInfo.resize(4); pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[2].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[1].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[0].offsetHeap = 0x0; pKernelInfo->kernelArgInfo[0].isSampler = true; pKernelInfo->kernelArgInfo[1].offsetHeap = 0x0; pKernelInfo->kernelArgInfo[1].isSampler = true; pKernelInfo->kernelArgInfo[2].offsetHeap = firstImageOffset; pKernelInfo->kernelArgInfo[2].isImage = true; pKernelInfo->kernelArgInfo[3].offsetHeap = secondImageOffset; pKernelInfo->kernelArgInfo[3].isImage = true; pKernelInfo->argumentsToPatchNum = 4; program = std::make_unique(*context.getDevice(0)->getExecutionEnvironment()); pKernel.reset(new MockKernel(program.get(), *pKernelInfo, *context.getDevice(0))); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setKernelArgHandler(0, &Kernel::setArgSampler); pKernel->setKernelArgHandler(1, &Kernel::setArgSampler); pKernel->setKernelArgHandler(2, &Kernel::setArgImage); pKernel->setKernelArgHandler(3, &Kernel::setArgImage); } Sampler *createTransformableSampler() { return new MockSampler(nullptr, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST); } Sampler *createNonTransformableSampler() { return new MockSampler(nullptr, CL_TRUE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST); } const int firstImageOffset = 0x20; const int secondImageOffset = 0x40; cl_int retVal = CL_SUCCESS; MockContext context; std::unique_ptr program; std::unique_ptr sampler; std::unique_ptr pKernelInfo; std::unique_ptr pKernel; std::unique_ptr image; SKernelBinaryHeaderCommon kernelHeader; char surfaceStateHeap[0x80]; }; HWTEST_F(KernelTransformableTest, givenKernelThatCannotTranformImagesWithTwoTransformableImagesAndTwoTransformableSamplersWhenAllArgsAreSetThenImagesAreNotTransformed) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; image.reset(Image3dHelper<>::create(&context)); sampler.reset(createTransformableSampler()); cl_mem clImage = image.get(); cl_sampler clSampler = sampler.get(); pKernelInfo->kernelArgInfo[2].isTransformable = true; pKernelInfo->kernelArgInfo[3].isTransformable = true; pKernel->canKernelTransformImages = false; pKernel->setArg(0, sizeof(clSampler), &clSampler); pKernel->setArg(1, sizeof(clSampler), &clSampler); pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); auto ssh = pKernel->getSurfaceStateHeap(); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D, firstSurfaceState->getSurfaceType()); EXPECT_FALSE(firstSurfaceState->getSurfaceArray()); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D, secondSurfaceState->getSurfaceType()); EXPECT_FALSE(secondSurfaceState->getSurfaceArray()); } HWTEST_F(KernelTransformableTest, givenKernelWithTwoTransformableImagesAndTwoTransformableSamplersWhenAllArgsAreSetThenImagesAreTransformed) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; image.reset(Image3dHelper<>::create(&context)); sampler.reset(createTransformableSampler()); cl_mem clImage = image.get(); cl_sampler clSampler = sampler.get(); pKernelInfo->kernelArgInfo[2].isTransformable = true; pKernelInfo->kernelArgInfo[3].isTransformable = true; pKernel->setArg(0, sizeof(clSampler), &clSampler); pKernel->setArg(1, sizeof(clSampler), &clSampler); pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); auto ssh = pKernel->getSurfaceStateHeap(); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, firstSurfaceState->getSurfaceType()); EXPECT_TRUE(firstSurfaceState->getSurfaceArray()); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, secondSurfaceState->getSurfaceType()); EXPECT_TRUE(secondSurfaceState->getSurfaceArray()); } HWTEST_F(KernelTransformableTest, givenKernelWithTwoTransformableImagesAndTwoTransformableSamplersWhenAnyArgIsResetThenImagesAreTransformedAgain) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; image.reset(Image3dHelper<>::create(&context)); sampler.reset(createTransformableSampler()); cl_mem clImage = image.get(); cl_sampler clSampler = sampler.get(); pKernelInfo->kernelArgInfo[2].isTransformable = true; pKernelInfo->kernelArgInfo[3].isTransformable = true; pKernel->setArg(0, sizeof(clSampler), &clSampler); pKernel->setArg(1, sizeof(clSampler), &clSampler); pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); auto ssh = pKernel->getSurfaceStateHeap(); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); firstSurfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL); secondSurfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL); pKernelInfo->kernelArgInfo[3].isTransformable = false; pKernel->setArg(3, sizeof(clImage), &clImage); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, firstSurfaceState->getSurfaceType()); EXPECT_TRUE(firstSurfaceState->getSurfaceArray()); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D, secondSurfaceState->getSurfaceType()); EXPECT_FALSE(secondSurfaceState->getSurfaceArray()); } HWTEST_F(KernelTransformableTest, givenKernelWithOneTransformableImageAndTwoTransformableSamplersWhenAnyArgIsResetThenOnlyOneImageIsTransformed) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; image.reset(Image3dHelper<>::create(&context)); sampler.reset(createTransformableSampler()); cl_mem clImage = image.get(); cl_sampler clSampler = sampler.get(); pKernelInfo->kernelArgInfo[2].isTransformable = true; pKernelInfo->kernelArgInfo[3].isTransformable = false; pKernel->setArg(0, sizeof(clSampler), &clSampler); pKernel->setArg(1, sizeof(clSampler), &clSampler); pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); auto ssh = pKernel->getSurfaceStateHeap(); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, firstSurfaceState->getSurfaceType()); EXPECT_TRUE(firstSurfaceState->getSurfaceArray()); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D, secondSurfaceState->getSurfaceType()); EXPECT_FALSE(secondSurfaceState->getSurfaceArray()); } HWTEST_F(KernelTransformableTest, givenKernelWithImages2dAndTwoTransformableSamplersWhenAnyArgIsResetThenImagesAreNotTransformed) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; image.reset(Image2dHelper<>::create(&context)); sampler.reset(createTransformableSampler()); cl_mem clImage = image.get(); cl_sampler clSampler = sampler.get(); pKernelInfo->kernelArgInfo[2].isTransformable = true; pKernelInfo->kernelArgInfo[3].isTransformable = true; auto ssh = pKernel->getSurfaceStateHeap(); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); pKernel->setArg(0, sizeof(clSampler), &clSampler); pKernel->setArg(1, sizeof(clSampler), &clSampler); pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, firstSurfaceState->getSurfaceType()); EXPECT_FALSE(firstSurfaceState->getSurfaceArray()); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, secondSurfaceState->getSurfaceType()); EXPECT_FALSE(secondSurfaceState->getSurfaceArray()); } HWTEST_F(KernelTransformableTest, givenKernelWithTwoTransformableImagesAndTwoTransformableSamplersWhenChangeSamplerToNontransformableThenImagesAreTransformedTo3d) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; image.reset(Image3dHelper<>::create(&context)); sampler.reset(createTransformableSampler()); cl_mem clImage = image.get(); cl_sampler clSampler = sampler.get(); pKernelInfo->kernelArgInfo[2].isTransformable = true; pKernelInfo->kernelArgInfo[3].isTransformable = true; pKernel->setArg(0, sizeof(clSampler), &clSampler); pKernel->setArg(1, sizeof(clSampler), &clSampler); pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); auto ssh = pKernel->getSurfaceStateHeap(); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); std::unique_ptr sampler2(createNonTransformableSampler()); cl_sampler clSampler2 = sampler2.get(); pKernel->setArg(1, sizeof(clSampler2), &clSampler2); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D, firstSurfaceState->getSurfaceType()); EXPECT_FALSE(firstSurfaceState->getSurfaceArray()); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D, secondSurfaceState->getSurfaceType()); EXPECT_FALSE(secondSurfaceState->getSurfaceArray()); pKernel.reset(); } HWTEST_F(KernelTransformableTest, givenKernelWithNonTransformableSamplersWhenResetSamplerWithNontransformableThenImagesNotChangedAgain) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; image.reset(Image3dHelper<>::create(&context)); sampler.reset(createNonTransformableSampler()); cl_mem clImage = image.get(); cl_sampler clSampler = sampler.get(); pKernelInfo->kernelArgInfo[2].isTransformable = true; pKernelInfo->kernelArgInfo[3].isTransformable = true; pKernel->setArg(0, sizeof(clSampler), &clSampler); pKernel->setArg(1, sizeof(clSampler), &clSampler); pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); auto ssh = pKernel->getSurfaceStateHeap(); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); firstSurfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL); secondSurfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL); pKernel->setArg(0, sizeof(clSampler), &clSampler); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL, firstSurfaceState->getSurfaceType()); EXPECT_FALSE(firstSurfaceState->getSurfaceArray()); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_NULL, secondSurfaceState->getSurfaceType()); EXPECT_FALSE(secondSurfaceState->getSurfaceArray()); } HWTEST_F(KernelTransformableTest, givenKernelWithoutSamplersAndTransformableImagesWhenResolveKernelThenImagesAreTransformed) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; image.reset(Image3dHelper<>::create(&context)); cl_mem clImage = image.get(); pKernelInfo->kernelArgInfo[0].isSampler = false; pKernelInfo->kernelArgInfo[0].isImage = true; pKernelInfo->kernelArgInfo[1].isSampler = false; pKernelInfo->kernelArgInfo[1].isImage = true; pKernelInfo->kernelArgInfo[2].isTransformable = true; pKernelInfo->kernelArgInfo[3].isTransformable = true; pKernel->setKernelArgHandler(0, &Kernel::setArgImage); pKernel->setKernelArgHandler(1, &Kernel::setArgImage); pKernel->setArg(0, sizeof(clImage), &clImage); pKernel->setArg(1, sizeof(clImage), &clImage); pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); auto ssh = pKernel->getSurfaceStateHeap(); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, firstSurfaceState->getSurfaceType()); EXPECT_TRUE(firstSurfaceState->getSurfaceArray()); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, secondSurfaceState->getSurfaceType()); EXPECT_TRUE(secondSurfaceState->getSurfaceArray()); } compute-runtime-20.13.16352/opencl/test/unit_test/kernel/parent_kernel_tests.cpp000066400000000000000000000204601363734646600276730ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/execution_model_kernel_fixture.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" #include using namespace NEO; typedef ExecutionModelKernelFixture ParentKernelFromBinaryTest; class MockKernelWithArgumentAccess : public Kernel { public: std::vector &getKernelArguments() { return kernelArguments; } class ObjectCountsPublic : public Kernel::ObjectCounts { }; MockKernelWithArgumentAccess(Program *programArg, const KernelInfo &kernelInfoArg, const ClDevice &deviceArg) : Kernel(programArg, kernelInfoArg, deviceArg) { } void getParentObjectCountsPublic(MockKernelWithArgumentAccess::ObjectCountsPublic &objectCount) { getParentObjectCounts(objectCount); } }; TEST(ParentKernelTest, GetObjectCounts) { KernelInfo info; MockClDevice *device = new MockClDevice{new MockDevice}; MockProgram program(*device->getExecutionEnvironment()); SPatchExecutionEnvironment environment = {}; environment.HasDeviceEnqueue = 1; info.patchInfo.executionEnvironment = &environment; MockKernelWithArgumentAccess kernel(&program, info, *device); std::vector &args = kernel.getKernelArguments(); Kernel::SimpleKernelArgInfo argInfo; argInfo.type = Kernel::kernelArgType::SAMPLER_OBJ; args.push_back(argInfo); argInfo.type = Kernel::kernelArgType::IMAGE_OBJ; args.push_back(argInfo); MockKernelWithArgumentAccess::ObjectCountsPublic objectCounts; kernel.getParentObjectCountsPublic(objectCounts); EXPECT_EQ(1u, objectCounts.imageCount); EXPECT_EQ(1u, objectCounts.samplerCount); delete device; } TEST(ParentKernelTest, patchBlocksSimdSize) { MockClDevice device{new MockDevice}; MockContext context(&device); std::unique_ptr parentKernel(MockParentKernel::create(context, true)); MockProgram *program = (MockProgram *)parentKernel->mockProgram; parentKernel->patchBlocksSimdSize(); void *blockSimdSize = ptrOffset(parentKernel->getCrossThreadData(), parentKernel->getKernelInfo().childrenKernelsIdOffset[0].second); uint32_t *simdSize = reinterpret_cast(blockSimdSize); EXPECT_EQ(program->blockKernelManager->getBlockKernelInfo(0)->getMaxSimdSize(), *simdSize); } TEST(ParentKernelTest, hasDeviceEnqueue) { MockClDevice device{new MockDevice}; MockContext context(&device); std::unique_ptr parentKernel(MockParentKernel::create(context)); EXPECT_TRUE(parentKernel->getKernelInfo().hasDeviceEnqueue()); } TEST(ParentKernelTest, doesnthaveDeviceEnqueue) { MockClDevice device{new MockDevice}; MockKernelWithInternals kernel(device); EXPECT_FALSE(kernel.kernelInfo.hasDeviceEnqueue()); } TEST(ParentKernelTest, initializeOnParentKernelPatchesBlocksSimdSize) { MockClDevice device{new MockDevice}; MockContext context(&device); std::unique_ptr parentKernel(MockParentKernel::create(context, true)); MockProgram *program = (MockProgram *)parentKernel->mockProgram; parentKernel->initialize(); void *blockSimdSize = ptrOffset(parentKernel->getCrossThreadData(), parentKernel->getKernelInfo().childrenKernelsIdOffset[0].second); uint32_t *simdSize = reinterpret_cast(blockSimdSize); EXPECT_EQ(program->blockKernelManager->getBlockKernelInfo(0)->getMaxSimdSize(), *simdSize); } TEST(ParentKernelTest, initializeOnParentKernelAllocatesPrivateMemoryForBlocks) { MockClDevice device{new MockDevice}; MockContext context(&device); std::unique_ptr parentKernel(MockParentKernel::create(context, true)); MockProgram *program = (MockProgram *)parentKernel->mockProgram; uint32_t crossThreadOffsetBlock = 0; auto infoBlock = new KernelInfo(); SPatchAllocateStatelessDefaultDeviceQueueSurface *allocateDeviceQueueBlock = new SPatchAllocateStatelessDefaultDeviceQueueSurface; allocateDeviceQueueBlock->DataParamOffset = crossThreadOffsetBlock; allocateDeviceQueueBlock->DataParamSize = 8; allocateDeviceQueueBlock->SurfaceStateHeapOffset = 0; allocateDeviceQueueBlock->Size = 8; infoBlock->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = allocateDeviceQueueBlock; crossThreadOffsetBlock += 8; SPatchAllocateStatelessEventPoolSurface *eventPoolBlock = new SPatchAllocateStatelessEventPoolSurface; eventPoolBlock->DataParamOffset = crossThreadOffsetBlock; eventPoolBlock->DataParamSize = 8; eventPoolBlock->EventPoolSurfaceIndex = 0; eventPoolBlock->Size = 8; infoBlock->patchInfo.pAllocateStatelessEventPoolSurface = eventPoolBlock; crossThreadOffsetBlock += 8; auto privateSurfaceBlock = std::make_unique(); privateSurfaceBlock->DataParamOffset = crossThreadOffsetBlock; privateSurfaceBlock->DataParamSize = 8; privateSurfaceBlock->Size = 8; privateSurfaceBlock->SurfaceStateHeapOffset = 0; privateSurfaceBlock->Token = 0; privateSurfaceBlock->PerThreadPrivateMemorySize = 1000; infoBlock->patchInfo.pAllocateStatelessPrivateSurface = privateSurfaceBlock.get(); crossThreadOffsetBlock += 8; SKernelBinaryHeaderCommon *headerBlock = new SKernelBinaryHeaderCommon; headerBlock->DynamicStateHeapSize = 0; headerBlock->GeneralStateHeapSize = 0; headerBlock->KernelHeapSize = 0; headerBlock->KernelNameSize = 0; headerBlock->PatchListSize = 0; headerBlock->SurfaceStateHeapSize = 0; infoBlock->heapInfo.pKernelHeader = headerBlock; SPatchThreadPayload *threadPayloadBlock = new SPatchThreadPayload; threadPayloadBlock->LocalIDXPresent = 0; threadPayloadBlock->LocalIDYPresent = 0; threadPayloadBlock->LocalIDZPresent = 0; threadPayloadBlock->HeaderPresent = 0; threadPayloadBlock->Size = 128; infoBlock->patchInfo.threadPayload = threadPayloadBlock; SPatchExecutionEnvironment *executionEnvironmentBlock = new SPatchExecutionEnvironment; *executionEnvironmentBlock = {}; executionEnvironmentBlock->HasDeviceEnqueue = 1; infoBlock->patchInfo.executionEnvironment = executionEnvironmentBlock; SPatchDataParameterStream *streamBlock = new SPatchDataParameterStream; streamBlock->DataParameterStreamSize = 0; streamBlock->Size = 0; infoBlock->patchInfo.dataParameterStream = streamBlock; SPatchBindingTableState *bindingTable = new SPatchBindingTableState; bindingTable->Count = 0; bindingTable->Offset = 0; bindingTable->Size = 0; bindingTable->SurfaceStateOffset = 0; infoBlock->patchInfo.bindingTableState = bindingTable; SPatchInterfaceDescriptorData *idData = new SPatchInterfaceDescriptorData; idData->BindingTableOffset = 0; idData->KernelOffset = 0; idData->Offset = 0; idData->SamplerStateOffset = 0; idData->Size = 0; infoBlock->patchInfo.interfaceDescriptorData = idData; infoBlock->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = nullptr; infoBlock->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = nullptr; infoBlock->heapInfo.pDsh = (void *)new uint64_t[64]; infoBlock->crossThreadData = new char[crossThreadOffsetBlock]; program->blockKernelManager->addBlockKernelInfo(infoBlock); parentKernel->initialize(); EXPECT_NE(nullptr, program->getBlockKernelManager()->getPrivateSurface(program->getBlockKernelManager()->getCount() - 1)); } TEST_P(ParentKernelFromBinaryTest, getInstructionHeapSizeForExecutionModelReturnsNonZeroForParentKernel) { if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { EXPECT_TRUE(pKernel->isParentKernel); EXPECT_LT(0u, pKernel->getInstructionHeapSizeForExecutionModel()); } } static const char *binaryFile = "simple_block_kernel"; static const char *KernelNames[] = {"simple_block_kernel"}; INSTANTIATE_TEST_CASE_P(ParentKernelFromBinaryTest, ParentKernelFromBinaryTest, ::testing::Combine( ::testing::Values(binaryFile), ::testing::ValuesIn(KernelNames))); compute-runtime-20.13.16352/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp000066400000000000000000000145071363734646600316170ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/os_interface/os_context.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" using namespace NEO; typedef Test KernelSubstituteTest; TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithGreaterSizeThenAllocatesNewKernelAllocation) { MockKernelWithInternals kernel(*pClDevice); auto pHeader = const_cast(kernel.kernelInfo.heapInfo.pKernelHeader); const size_t initialHeapSize = 0x40; pHeader->KernelHeapSize = initialHeapSize; EXPECT_EQ(nullptr, kernel.kernelInfo.kernelAllocation); kernel.kernelInfo.createKernelAllocation(pDevice->getRootDeviceIndex(), pDevice->getMemoryManager()); auto firstAllocation = kernel.kernelInfo.kernelAllocation; EXPECT_NE(nullptr, firstAllocation); auto firstAllocationSize = firstAllocation->getUnderlyingBufferSize(); EXPECT_EQ(initialHeapSize, firstAllocationSize); auto firstAllocationId = static_cast(firstAllocation)->id; const size_t newHeapSize = initialHeapSize + 1; char newHeap[newHeapSize]; kernel.mockKernel->substituteKernelHeap(newHeap, newHeapSize); auto secondAllocation = kernel.kernelInfo.kernelAllocation; EXPECT_NE(nullptr, secondAllocation); auto secondAllocationSize = secondAllocation->getUnderlyingBufferSize(); EXPECT_NE(initialHeapSize, secondAllocationSize); EXPECT_EQ(newHeapSize, secondAllocationSize); auto secondAllocationId = static_cast(secondAllocation)->id; EXPECT_NE(firstAllocationId, secondAllocationId); pDevice->getMemoryManager()->checkGpuUsageAndDestroyGraphicsAllocations(secondAllocation); } TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithSameSizeThenDoesNotAllocateNewKernelAllocation) { MockKernelWithInternals kernel(*pClDevice); auto pHeader = const_cast(kernel.kernelInfo.heapInfo.pKernelHeader); const size_t initialHeapSize = 0x40; pHeader->KernelHeapSize = initialHeapSize; EXPECT_EQ(nullptr, kernel.kernelInfo.kernelAllocation); kernel.kernelInfo.createKernelAllocation(pDevice->getRootDeviceIndex(), pDevice->getMemoryManager()); auto firstAllocation = kernel.kernelInfo.kernelAllocation; EXPECT_NE(nullptr, firstAllocation); auto firstAllocationSize = firstAllocation->getUnderlyingBufferSize(); EXPECT_EQ(initialHeapSize, firstAllocationSize); auto firstAllocationId = static_cast(firstAllocation)->id; const size_t newHeapSize = initialHeapSize; char newHeap[newHeapSize]; kernel.mockKernel->substituteKernelHeap(newHeap, newHeapSize); auto secondAllocation = kernel.kernelInfo.kernelAllocation; EXPECT_NE(nullptr, secondAllocation); auto secondAllocationSize = secondAllocation->getUnderlyingBufferSize(); EXPECT_EQ(initialHeapSize, secondAllocationSize); auto secondAllocationId = static_cast(secondAllocation)->id; EXPECT_EQ(firstAllocationId, secondAllocationId); pDevice->getMemoryManager()->checkGpuUsageAndDestroyGraphicsAllocations(secondAllocation); } TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithSmallerSizeThenDoesNotAllocateNewKernelAllocation) { MockKernelWithInternals kernel(*pClDevice); auto pHeader = const_cast(kernel.kernelInfo.heapInfo.pKernelHeader); const size_t initialHeapSize = 0x40; pHeader->KernelHeapSize = initialHeapSize; EXPECT_EQ(nullptr, kernel.kernelInfo.kernelAllocation); kernel.kernelInfo.createKernelAllocation(pDevice->getRootDeviceIndex(), pDevice->getMemoryManager()); auto firstAllocation = kernel.kernelInfo.kernelAllocation; EXPECT_NE(nullptr, firstAllocation); auto firstAllocationSize = firstAllocation->getUnderlyingBufferSize(); EXPECT_EQ(initialHeapSize, firstAllocationSize); auto firstAllocationId = static_cast(firstAllocation)->id; const size_t newHeapSize = initialHeapSize - 1; char newHeap[newHeapSize]; kernel.mockKernel->substituteKernelHeap(newHeap, newHeapSize); auto secondAllocation = kernel.kernelInfo.kernelAllocation; EXPECT_NE(nullptr, secondAllocation); auto secondAllocationSize = secondAllocation->getUnderlyingBufferSize(); EXPECT_EQ(initialHeapSize, secondAllocationSize); auto secondAllocationId = static_cast(secondAllocation)->id; EXPECT_EQ(firstAllocationId, secondAllocationId); pDevice->getMemoryManager()->checkGpuUsageAndDestroyGraphicsAllocations(secondAllocation); } TEST_F(KernelSubstituteTest, givenKernelWithUsedKernelAllocationWhenSubstituteKernelHeapAndAllocateNewMemoryThenStoreOldAllocationOnTemporaryList) { MockKernelWithInternals kernel(*pClDevice); auto pHeader = const_cast(kernel.kernelInfo.heapInfo.pKernelHeader); auto memoryManager = pDevice->getMemoryManager(); auto &commandStreamReceiver = pDevice->getGpgpuCommandStreamReceiver(); const size_t initialHeapSize = 0x40; pHeader->KernelHeapSize = initialHeapSize; kernel.kernelInfo.createKernelAllocation(pDevice->getRootDeviceIndex(), memoryManager); auto firstAllocation = kernel.kernelInfo.kernelAllocation; uint32_t notReadyTaskCount = *commandStreamReceiver.getTagAddress() + 1u; firstAllocation->updateTaskCount(notReadyTaskCount, commandStreamReceiver.getOsContext().getContextId()); const size_t newHeapSize = initialHeapSize + 1; char newHeap[newHeapSize]; EXPECT_TRUE(commandStreamReceiver.getTemporaryAllocations().peekIsEmpty()); kernel.mockKernel->substituteKernelHeap(newHeap, newHeapSize); auto secondAllocation = kernel.kernelInfo.kernelAllocation; EXPECT_FALSE(commandStreamReceiver.getTemporaryAllocations().peekIsEmpty()); EXPECT_EQ(commandStreamReceiver.getTemporaryAllocations().peekHead(), firstAllocation); memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(secondAllocation); commandStreamReceiver.getInternalAllocationStorage()->cleanAllocationList(notReadyTaskCount, TEMPORARY_ALLOCATION); } compute-runtime-20.13.16352/opencl/test/unit_test/libult/000077500000000000000000000000001363734646600231255ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/libult/CMakeLists.txt000066400000000000000000000174771363734646600257050ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # macro(macro_for_each_gen) list(APPEND IGDRCL_SRCS_ENABLE_TESTED_HW ${NEO_SHARED_TEST_DIRECTORY}/unit_test/${GEN_TYPE_LOWER}/cmd_parse_${GEN_TYPE_LOWER}.cpp ${NEO_SHARED_TEST_DIRECTORY}/unit_test/libult/${GEN_TYPE_LOWER}.cpp ${NEO_SHARED_DIRECTORY}/${GEN_TYPE_LOWER}/enable_hw_info_config_${GEN_TYPE_LOWER}.cpp ${NEO_SHARED_DIRECTORY}/${GEN_TYPE_LOWER}/enable_${GEN_TYPE_LOWER}.cpp ${NEO_SHARED_DIRECTORY}/${GEN_TYPE_LOWER}/enable_family_full_core_${GEN_TYPE_LOWER}.cpp ${NEO_SOURCE_DIR}/opencl/source/${GEN_TYPE_LOWER}/enable_family_full_ocl_${GEN_TYPE_LOWER}.cpp ) if(EXISTS "${NEO_SHARED_TEST_DIRECTORY}/unit_test/${GEN_TYPE_LOWER}/cmd_parse${BRANCH_DIR_SUFFIX}/cmd_parse_${GEN_TYPE_LOWER}.inl") list(APPEND IGDRCL_SRCS_ENABLE_TESTED_HW "${NEO_SHARED_TEST_DIRECTORY}/unit_test/${GEN_TYPE_LOWER}/cmd_parse${BRANCH_DIR_SUFFIX}/cmd_parse_${GEN_TYPE_LOWER}.inl") include_directories(${NEO_SHARED_TEST_DIRECTORY}/unit_test/${GEN_TYPE_LOWER}/cmd_parse${BRANCH_DIR_SUFFIX}/) endif() endmacro() apply_macro_for_each_gen("TESTED") set(IGDRCL_SRCS_LIB_ULT ${NEO_SHARED_DIRECTORY}/helpers/allow_deferred_deleter.cpp ${NEO_SHARED_TEST_DIRECTORY}/unit_test/utilities/cpuintrinsics.cpp ${NEO_SOURCE_DIR}/opencl/source/compiler_interface/default_cache_config.cpp ${NEO_SOURCE_DIR}/opencl/source/dll/debugger.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/abort.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/helpers/built_ins_helper.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/helpers/debug_helpers.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/helpers/execution_environment_helper.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/helpers/execution_environment_helper.h ${NEO_SOURCE_DIR}/opencl/test/unit_test/helpers/test_files.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/helpers/test_files.h ${NEO_SOURCE_DIR}/opencl/test/unit_test/libult/create_tbx_sockets.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/libult/debug_manager.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/libult/source_level_debugger.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/libult/source_level_debugger_library.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/libult/source_level_debugger_library.h ${NEO_SOURCE_DIR}/opencl/test/unit_test/libult/ult_aub_command_stream_receiver.h ${NEO_SOURCE_DIR}/opencl/test/unit_test/libult/ult_command_stream_receiver.h ${NEO_SOURCE_DIR}/opencl/test/unit_test/utilities/debug_settings_reader_creator.cpp ${NEO_SHARED_TEST_DIRECTORY}/unit_test/helpers/memory_leak_listener.cpp ${NEO_SHARED_TEST_DIRECTORY}/unit_test/helpers/memory_leak_listener.h ${NEO_SHARED_TEST_DIRECTORY}/unit_test/helpers/memory_management.cpp ${NEO_SHARED_TEST_DIRECTORY}/unit_test/helpers/memory_management.h ) set(IGDRCL_SRCS_LIB_ULT_WINDOWS ${NEO_SOURCE_DIR}/opencl/test/unit_test/os_interface/windows/mock_environment_variables.cpp ) add_library (igdrcl_libult OBJECT EXCLUDE_FROM_ALL ${IGDRCL_SRCS_LIB_ULT} ${IGDRCL_SRCS_ENABLE_TESTED_HW} ) if(UNIX) target_sources(igdrcl_libult PRIVATE ${IGDRCL_SRCS_ENABLE_TESTED_HW_LINUX}) endif() set(IGDRCL_SRCS_LIB_ULT_CS ${NEO_SOURCE_DIR}/opencl/test/unit_test/libult/create_command_stream.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/libult/create_command_stream.h ) add_library (igdrcl_libult_cs OBJECT EXCLUDE_FROM_ALL ${IGDRCL_SRCS_LIB_ULT_CS} ) target_include_directories(igdrcl_libult PRIVATE $ ) set(IGDRCL_SRCS_LIB_ULT_ENV ${NEO_SOURCE_DIR}/opencl/test/unit_test/custom_event_listener.h ${NEO_SOURCE_DIR}/opencl/test/unit_test/global_environment.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/global_environment.h ${NEO_SOURCE_DIR}/opencl/test/unit_test/main.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/options_unit_tests.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/command_queue/command_queue_fixture.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/command_queue/command_queue_fixture.h ${NEO_SOURCE_DIR}/opencl/test/unit_test/fixtures/memory_management_fixture.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/fixtures/memory_management_fixture.h ${NEO_SOURCE_DIR}/opencl/test/unit_test/fixtures/built_in_fixture.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/fixtures/built_in_fixture.h ${NEO_SOURCE_DIR}/opencl/test/unit_test/fixtures/buffer_fixture.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/fixtures/buffer_fixture.h ${NEO_SOURCE_DIR}/opencl/test/unit_test/fixtures/device_fixture.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/fixtures/device_fixture.h ${NEO_SOURCE_DIR}/opencl/test/unit_test/fixtures/program_fixture.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/fixtures/program_fixture.h ${NEO_SOURCE_DIR}/opencl/test/unit_test/helpers/kernel_binary_helper.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/helpers/kernel_binary_helper.h ${NEO_SOURCE_DIR}/opencl/test/unit_test/helpers${BRANCH_DIR_SUFFIX}/kernel_binary_helper_hash_value.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/indirect_heap/indirect_heap_fixture.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/indirect_heap/indirect_heap_fixture.h ${NEO_SOURCE_DIR}/opencl/test/unit_test/ult_config_listener.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/ult_config_listener.h ) add_library (igdrcl_libult_env OBJECT EXCLUDE_FROM_ALL ${IGDRCL_SRCS_LIB_ULT_ENV} ) set(IGDRCL_SRCS_LIB_ULT_ENV_WINDOWS ${NEO_SOURCE_DIR}/opencl/test/unit_test/os_interface/windows/create_wddm_memory_manager.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/os_interface/windows/options.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/os_interface/windows/sys_calls.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/os_interface/windows/ult_dxgi_factory.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/os_interface/windows/ult_dxgi_factory.h ${NEO_SOURCE_DIR}/opencl/test/unit_test/os_interface/windows/wddm_calls.cpp ) set(IGDRCL_SRCS_LIB_ULT_ENV_LINUX ${NEO_SOURCE_DIR}/opencl/test/unit_test/os_interface/linux/allocator_helper.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/os_interface/linux/create_drm_memory_manager.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/os_interface/linux/options.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/os_interface/linux/sys_calls_linux_ult.cpp ) if(WIN32) target_sources(igdrcl_libult_env PRIVATE ${IGDRCL_SRCS_LIB_ULT_ENV_WINDOWS}) target_sources(igdrcl_libult PRIVATE ${IGDRCL_SRCS_LIB_ULT_WINDOWS}) else() target_sources(igdrcl_libult_env PRIVATE ${IGDRCL_SRCS_LIB_ULT_ENV_LINUX}) endif() target_include_directories(igdrcl_libult PRIVATE ${SOURCE_LEVEL_DEBUGGER_HEADERS_DIR} ) target_include_directories(igdrcl_libult_env PRIVATE $ ${NEO_SOURCE_DIR}/opencl/test/unit_test/mocks${BRANCH_DIR_SUFFIX} ) set_property(GLOBAL PROPERTY IGDRCL_SRCS_ENABLE_TESTED_HW ${IGDRCL_SRCS_ENABLE_TESTED_HW}) set_property(GLOBAL PROPERTY IGDRCL_SRCS_ENABLE_TESTED_HW_LINUX ${IGDRCL_SRCS_ENABLE_TESTED_HW_LINUX}) set_property(GLOBAL PROPERTY IGDRCL_SRCS_LIB_ULT ${IGDRCL_SRCS_LIB_ULT}) set_property(GLOBAL PROPERTY IGDRCL_SRCS_LIB_ULT_CS ${IGDRCL_SRCS_LIB_ULT_CS}) set_property(GLOBAL PROPERTY IGDRCL_SRCS_LIB_ULT_ENV ${IGDRCL_SRCS_LIB_ULT_ENV}) set_property(GLOBAL PROPERTY IGDRCL_SRCS_LIB_ULT_ENV_LINUX ${IGDRCL_SRCS_LIB_ULT_ENV_LINUX}) foreach(target_name igdrcl_libult igdrcl_libult_cs igdrcl_libult_env) set_target_properties(${target_name} PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(${target_name} PROPERTIES FOLDER ${OPENCL_TEST_PROJECTS_FOLDER}) set_property(TARGET ${target_name} APPEND_STRING PROPERTY COMPILE_FLAGS ${ASAN_FLAGS} ${TSAN_FLAGS}) target_include_directories(${target_name} PRIVATE $) target_compile_definitions(${target_name} PRIVATE $) create_project_source_tree(${target_name}) endforeach() compute-runtime-20.13.16352/opencl/test/unit_test/libult/create_command_stream.cpp000066400000000000000000000056771363734646600301640ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/libult/create_command_stream.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/test/unit_test/helpers/default_hw_info.h" #include "shared/test/unit_test/helpers/ult_hw_config.h" #include "opencl/source/command_stream/aub_command_stream_receiver.h" #include "opencl/source/command_stream/create_command_stream_impl.h" #include "opencl/source/command_stream/tbx_command_stream_receiver.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include namespace NEO { extern CommandStreamReceiverCreateFunc commandStreamReceiverFactory[2 * IGFX_MAX_CORE]; CommandStreamReceiver *createCommandStream(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) { auto hwInfo = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); if (ultHwConfig.useHwCsr) { return createCommandStreamImpl(executionEnvironment, rootDeviceIndex); } auto funcCreate = commandStreamReceiverFactory[IGFX_MAX_CORE + hwInfo->platform.eRenderCoreFamily]; if (funcCreate) { return funcCreate(false, executionEnvironment, rootDeviceIndex); } return nullptr; } bool prepareDeviceEnvironments(ExecutionEnvironment &executionEnvironment) { if (executionEnvironment.rootDeviceEnvironments.size() == 0) { executionEnvironment.prepareRootDeviceEnvironments(1u); } auto currentHwInfo = executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo(); if (currentHwInfo->platform.eProductFamily == IGFX_UNKNOWN && currentHwInfo->platform.eRenderCoreFamily == IGFX_UNKNOWN_CORE) { executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); } if (ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc) { uint32_t numRootDevices = DebugManager.flags.CreateMultipleRootDevices.get() != 0 ? DebugManager.flags.CreateMultipleRootDevices.get() : 1u; executionEnvironment.prepareRootDeviceEnvironments(numRootDevices); for (auto i = 0u; i < numRootDevices; i++) { if (executionEnvironment.rootDeviceEnvironments[i]->getHardwareInfo() == nullptr || (executionEnvironment.rootDeviceEnvironments[i]->getHardwareInfo()->platform.eProductFamily == IGFX_UNKNOWN && executionEnvironment.rootDeviceEnvironments[i]->getHardwareInfo()->platform.eRenderCoreFamily == IGFX_UNKNOWN_CORE)) { executionEnvironment.rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } } executionEnvironment.calculateMaxOsContextCount(); executionEnvironment.initializeMemoryManager(); return ultHwConfig.mockedPrepareDeviceEnvironmentsFuncResult; } return prepareDeviceEnvironmentsImpl(executionEnvironment); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/libult/create_command_stream.h000066400000000000000000000006701363734646600276150ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include namespace NEO { class CommandStreamReceiver; class ExecutionEnvironment; extern CommandStreamReceiver *createCommandStream(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex); extern bool prepareDeviceEnvironments(ExecutionEnvironment &executionEnvironment); } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/libult/create_tbx_sockets.cpp000066400000000000000000000006751363734646600275140ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/tests_configuration.h" #include "opencl/source/tbx/tbx_sockets_imp.h" #include "opencl/test/unit_test/mocks/mock_tbx_sockets.h" namespace NEO { TbxSockets *TbxSockets::create() { if (testMode == TestMode::AubTestsWithTbx) { return new TbxSocketsImp; } return new MockTbxSockets; } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/libult/debug_manager.cpp000066400000000000000000000005111363734646600264060ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "opencl/source/os_interface/ocl_reg_path.h" using namespace std; namespace NEO { DebugSettingsManager DebugManager(oclRegPath); } compute-runtime-20.13.16352/opencl/test/unit_test/libult/os_interface.cpp000066400000000000000000000005221363734646600262710ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_interface.h" #include "shared/source/memory_manager/memory_constants.h" namespace NEO { bool OSInterface::osEnableLocalMemory = true; void OSInterface::setGmmInputArgs(void *args) {} } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/libult/source_level_debugger.cpp000066400000000000000000000006031363734646600301630ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/source_level_debugger/source_level_debugger.h" #include "opencl/test/unit_test/libult/source_level_debugger_library.h" namespace NEO { OsLibrary *SourceLevelDebugger::loadDebugger() { return DebuggerLibrary::load(SourceLevelDebugger::dllName); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/libult/source_level_debugger_library.cpp000066400000000000000000000073201363734646600317120ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "source_level_debugger_library.h" #include "shared/source/helpers/string.h" using namespace NEO; bool DebuggerLibrary::debuggerActive = false; bool DebuggerLibrary::isLibraryAvailable = false; DebuggerLibraryInterceptor *DebuggerLibrary::interceptor = nullptr; void *DebuggerLibrary::getProcAddress(const std::string &procName) { if (procName == "notifyNewDevice") { return reinterpret_cast(notifyNewDevice); } else if (procName == "notifySourceCode") { return reinterpret_cast(notifySourceCode); } else if (procName == "getDebuggerOption") { return reinterpret_cast(getDebuggerOption); } else if (procName == "notifyKernelDebugData") { return reinterpret_cast(notifyKernelDebugData); } else if (procName == "init") { return reinterpret_cast(init); } else if (procName == "isDebuggerActive") { return reinterpret_cast(isDebuggerActive); } else if (procName == "notifyDeviceDestruction") { return reinterpret_cast(notifyDeviceDestruction); } return nullptr; } OsLibrary *DebuggerLibrary::load(const std::string &name) { if (isLibraryAvailable) { return new DebuggerLibrary(); } return nullptr; } int DebuggerLibrary::notifyNewDevice(GfxDbgNewDeviceData *newDevice) { if (interceptor) { interceptor->newDeviceArgIn = *newDevice; interceptor->newDeviceCalled = true; return interceptor->newDeviceRetVal; } return IgfxdbgRetVal::IGFXDBG_SUCCESS; } int DebuggerLibrary::notifySourceCode(GfxDbgSourceCode *sourceCode) { if (interceptor) { interceptor->sourceCodeArgIn = *sourceCode; interceptor->sourceCodeCalled = true; if (interceptor->sourceCodeArgOut && sourceCode->sourceNameMaxLen > 0) { memcpy_s(sourceCode->sourceName, sourceCode->sourceNameMaxLen, interceptor->sourceCodeArgOut->sourceName, interceptor->sourceCodeArgOut->sourceNameMaxLen); } return interceptor->sourceCodeRetVal; } return IgfxdbgRetVal::IGFXDBG_SUCCESS; } int DebuggerLibrary::getDebuggerOption(GfxDbgOption *option) { if (interceptor) { interceptor->optionArgIn = *option; interceptor->optionCalled = true; if (interceptor->optionArgOut && option->valueLen >= interceptor->optionArgOut->valueLen) { memcpy_s(option->value, option->valueLen, interceptor->optionArgOut->value, interceptor->optionArgOut->valueLen); } else { memset(option->value, 0, option->valueLen); } return interceptor->optionRetVal; } return IgfxdbgRetVal::IGFXDBG_SUCCESS; } int DebuggerLibrary::notifyKernelDebugData(GfxDbgKernelDebugData *kernelDebugData) { if (interceptor) { interceptor->kernelDebugDataArgIn = *kernelDebugData; interceptor->kernelDebugDataCalled = true; return interceptor->kernelDebugDataRetVal; } return IgfxdbgRetVal::IGFXDBG_SUCCESS; } int DebuggerLibrary::init(GfxDbgTargetCaps *targetCaps) { if (interceptor) { interceptor->targetCapsArgIn = *targetCaps; interceptor->initCalled = true; return interceptor->initRetVal; } return IgfxdbgRetVal::IGFXDBG_SUCCESS; } int DebuggerLibrary::isDebuggerActive(void) { return debuggerActive ? 1 : 0; } int DebuggerLibrary::notifyDeviceDestruction(GfxDbgDeviceDestructionData *deviceDestruction) { if (interceptor) { interceptor->deviceDestructionArgIn = *deviceDestruction; interceptor->deviceDestructionCalled = true; return interceptor->deviceDestructionRetVal; } return IgfxdbgRetVal::IGFXDBG_SUCCESS; }compute-runtime-20.13.16352/opencl/test/unit_test/libult/source_level_debugger_library.h000066400000000000000000000050541363734646600313610ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_library.h" #include "igfx_debug_interchange_types.h" #include #define IGFXDBG_CURRENT_VERSION 4 struct DebuggerLibraryInterceptor { GfxDbgNewDeviceData newDeviceArgIn; GfxDbgSourceCode sourceCodeArgIn; GfxDbgOption optionArgIn; GfxDbgKernelDebugData kernelDebugDataArgIn; GfxDbgTargetCaps targetCapsArgIn; GfxDbgDeviceDestructionData deviceDestructionArgIn; GfxDbgNewDeviceData *newDeviceArgOut = nullptr; GfxDbgSourceCode *sourceCodeArgOut = nullptr; GfxDbgOption *optionArgOut = nullptr; GfxDbgKernelDebugData *kernelDebugDataArgOut = nullptr; GfxDbgTargetCaps *targetCapsArgOut = nullptr; GfxDbgDeviceDestructionData *deviceDestructionArgOut = nullptr; bool newDeviceCalled = false; bool sourceCodeCalled = false; bool optionCalled = false; bool kernelDebugDataCalled = false; bool initCalled = false; bool deviceDestructionCalled = false; int newDeviceRetVal = 0; int sourceCodeRetVal = 0; int optionRetVal = 0; int kernelDebugDataRetVal = 0; int initRetVal = 0; int deviceDestructionRetVal = 0; }; class DebuggerLibrary : public NEO::OsLibrary { public: DebuggerLibrary() = default; void *getProcAddress(const std::string &procName) override; static OsLibrary *load(const std::string &name); bool isLoaded() override { return true; } static void setDebuggerActive(bool active) { debuggerActive = active; } static bool getDebuggerActive() { return debuggerActive; } static void setLibraryAvailable(bool available) { isLibraryAvailable = available; } static bool getLibraryAvailable() { return isLibraryAvailable; } static void injectDebuggerLibraryInterceptor(DebuggerLibraryInterceptor *interceptorArg) { interceptor = interceptorArg; } static void clearDebuggerLibraryInterceptor() { interceptor = nullptr; } static DebuggerLibraryInterceptor *interceptor; protected: static int notifyNewDevice(GfxDbgNewDeviceData *); static int notifySourceCode(GfxDbgSourceCode *); static int getDebuggerOption(GfxDbgOption *); static int notifyKernelDebugData(GfxDbgKernelDebugData *); static int init(GfxDbgTargetCaps *); static int isDebuggerActive(void); static int notifyDeviceDestruction(GfxDbgDeviceDestructionData *); static bool isLibraryAvailable; static bool debuggerActive; }; compute-runtime-20.13.16352/opencl/test/unit_test/libult/ult_aub_command_stream_receiver.h000066400000000000000000000033201363734646600316640ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/execution_environment/execution_environment.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_experimental_command_buffer.h" namespace NEO { template class UltAubCommandStreamReceiver : public AUBCommandStreamReceiverHw, public NonCopyableOrMovableClass { using BaseClass = AUBCommandStreamReceiverHw; public: using BaseClass::osContext; UltAubCommandStreamReceiver(const std::string &fileName, bool standalone, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) : BaseClass(fileName, standalone, executionEnvironment, rootDeviceIndex) { } UltAubCommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) : BaseClass("aubfile", true, executionEnvironment, rootDeviceIndex) { } static CommandStreamReceiver *create(bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) { auto csr = new UltAubCommandStreamReceiver("aubfile", true, executionEnvironment, rootDeviceIndex); if (!csr->subCaptureManager->isSubCaptureMode()) { csr->openFile("aubfile"); } return csr; } uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking) override { blitBufferCalled++; return BaseClass::blitBuffer(blitPropertiesContainer, blocking); } uint32_t blitBufferCalled = 0; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/libult/ult_command_stream_receiver.h000066400000000000000000000266571363734646600310570ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/device/device.h" #include "shared/source/direct_submission/direct_submission_hw.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/unit_test/helpers/ult_hw_config.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/test/unit_test/helpers/dispatch_flags_helper.h" #include "opencl/test/unit_test/mocks/mock_experimental_command_buffer.h" #include #include namespace NEO { class GmmPageTableMngr; template class UltCommandStreamReceiver : public CommandStreamReceiverHw, public NonCopyableOrMovableClass { using BaseClass = CommandStreamReceiverHw; public: using BaseClass::dshState; using BaseClass::getCmdSizeForPrologue; using BaseClass::getScratchPatchAddress; using BaseClass::getScratchSpaceController; using BaseClass::indirectHeap; using BaseClass::iohState; using BaseClass::isDirectSubmissionEnabled; using BaseClass::perDssBackedBuffer; using BaseClass::programEnginePrologue; using BaseClass::programPreamble; using BaseClass::programStateSip; using BaseClass::requiresInstructionCacheFlush; using BaseClass::rootDeviceIndex; using BaseClass::sshState; using BaseClass::CommandStreamReceiver::bindingTableBaseAddressRequired; using BaseClass::CommandStreamReceiver::cleanupResources; using BaseClass::CommandStreamReceiver::commandStream; using BaseClass::CommandStreamReceiver::dispatchMode; using BaseClass::CommandStreamReceiver::executionEnvironment; using BaseClass::CommandStreamReceiver::experimentalCmdBuffer; using BaseClass::CommandStreamReceiver::flushStamp; using BaseClass::CommandStreamReceiver::globalFenceAllocation; using BaseClass::CommandStreamReceiver::GSBAFor32BitProgrammed; using BaseClass::CommandStreamReceiver::initDirectSubmission; using BaseClass::CommandStreamReceiver::internalAllocationStorage; using BaseClass::CommandStreamReceiver::isDirectSubmissionEnabled; using BaseClass::CommandStreamReceiver::isEnginePrologueSent; using BaseClass::CommandStreamReceiver::isPreambleSent; using BaseClass::CommandStreamReceiver::isStateSipSent; using BaseClass::CommandStreamReceiver::lastMediaSamplerConfig; using BaseClass::CommandStreamReceiver::lastPreemptionMode; using BaseClass::CommandStreamReceiver::lastSentCoherencyRequest; using BaseClass::CommandStreamReceiver::lastSentL3Config; using BaseClass::CommandStreamReceiver::lastSentThreadArbitrationPolicy; using BaseClass::CommandStreamReceiver::lastVmeSubslicesConfig; using BaseClass::CommandStreamReceiver::latestFlushedTaskCount; using BaseClass::CommandStreamReceiver::latestSentStatelessMocsConfig; using BaseClass::CommandStreamReceiver::latestSentTaskCount; using BaseClass::CommandStreamReceiver::mediaVfeStateDirty; using BaseClass::CommandStreamReceiver::osContext; using BaseClass::CommandStreamReceiver::perfCounterAllocator; using BaseClass::CommandStreamReceiver::profilingTimeStampAllocator; using BaseClass::CommandStreamReceiver::requiredPrivateScratchSize; using BaseClass::CommandStreamReceiver::requiredScratchSize; using BaseClass::CommandStreamReceiver::requiredThreadArbitrationPolicy; using BaseClass::CommandStreamReceiver::samplerCacheFlushRequired; using BaseClass::CommandStreamReceiver::scratchSpaceController; using BaseClass::CommandStreamReceiver::stallingPipeControlOnNextFlushRequired; using BaseClass::CommandStreamReceiver::submissionAggregator; using BaseClass::CommandStreamReceiver::taskCount; using BaseClass::CommandStreamReceiver::taskLevel; using BaseClass::CommandStreamReceiver::timestampPacketAllocator; using BaseClass::CommandStreamReceiver::timestampPacketWriteEnabled; using BaseClass::CommandStreamReceiver::waitForTaskCountAndCleanAllocationList; UltCommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) : BaseClass(executionEnvironment, rootDeviceIndex), recursiveLockCounter(0), recordedDispatchFlags(DispatchFlagsHelper::createDefaultDispatchFlags()) {} static CommandStreamReceiver *create(bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) { return new UltCommandStreamReceiver(executionEnvironment, rootDeviceIndex); } GmmPageTableMngr *createPageTableManager() override { createPageTableManagerCalled = true; return nullptr; } void makeSurfacePackNonResident(ResidencyContainer &allocationsForResidency) override { makeSurfacePackNonResidentCalled++; BaseClass::makeSurfacePackNonResident(allocationsForResidency); } bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override { if (recordFlusheBatchBuffer) { latestFlushedBatchBuffer = batchBuffer; } latestSentTaskCountValueDuringFlush = latestSentTaskCount; return BaseClass::flush(batchBuffer, allocationsForResidency); } CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap &dsh, const IndirectHeap &ioh, const IndirectHeap &ssh, uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override { recordedDispatchFlags = dispatchFlags; this->lastFlushedCommandStream = &commandStream; return BaseClass::flushTask(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); } size_t getPreferredTagPoolSize() const override { return BaseClass::getPreferredTagPoolSize() + 1; } void setPreemptionAllocation(GraphicsAllocation *allocation) { this->preemptionAllocation = allocation; } void downloadAllocation(GraphicsAllocation &gfxAllocation) override { downloadAllocationCalled = true; } bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) override { latestWaitForCompletionWithTimeoutTaskCount.store(taskCountToWait); return BaseClass::waitForCompletionWithTimeout(enableTimeout, timeoutMicroseconds, taskCountToWait); } void overrideCsrSizeReqFlags(CsrSizeRequestFlags &flags) { this->csrSizeRequestFlags = flags; } GraphicsAllocation *getPreemptionAllocation() const { return this->preemptionAllocation; } void makeResident(GraphicsAllocation &gfxAllocation) override { if (storeMakeResidentAllocations) { std::map::iterator it = makeResidentAllocations.find(&gfxAllocation); if (it == makeResidentAllocations.end()) { std::pair::iterator, bool> result; result = makeResidentAllocations.insert(std::pair(&gfxAllocation, 1)); DEBUG_BREAK_IF(!result.second); } else { makeResidentAllocations[&gfxAllocation]++; } } BaseClass::makeResident(gfxAllocation); } bool isMadeResident(GraphicsAllocation *graphicsAllocation) const { return makeResidentAllocations.find(graphicsAllocation) != makeResidentAllocations.end(); } bool isMadeResident(GraphicsAllocation *graphicsAllocation, uint32_t taskCount) const { auto it = makeResidentAllocations.find(graphicsAllocation); if (it == makeResidentAllocations.end()) { return false; } return (it->first->getTaskCount(osContext->getContextId()) == taskCount); } std::map makeResidentAllocations; bool storeMakeResidentAllocations = false; AubSubCaptureStatus checkAndActivateAubSubCapture(const MultiDispatchInfo &dispatchInfo) override { auto status = CommandStreamReceiverHw::checkAndActivateAubSubCapture(dispatchInfo); checkAndActivateAubSubCaptureCalled = true; return status; } void addAubComment(const char *message) override { CommandStreamReceiverHw::addAubComment(message); aubCommentMessages.push_back(message); addAubCommentCalled = true; } bool flushBatchedSubmissions() override { flushBatchedSubmissionsCalled = true; return CommandStreamReceiverHw::flushBatchedSubmissions(); } void initProgrammingFlags() override { CommandStreamReceiverHw::initProgrammingFlags(); initProgrammingFlagsCalled = true; } std::unique_lock obtainUniqueOwnership() override { recursiveLockCounter++; return CommandStreamReceiverHw::obtainUniqueOwnership(); } uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking) override { blitBufferCalled++; return CommandStreamReceiverHw::blitBuffer(blitPropertiesContainer, blocking); } bool createPerDssBackedBuffer(Device &device) override { createPerDssBackedBufferCalled++; bool result = BaseClass::createPerDssBackedBuffer(device); if (!perDssBackedBuffer) { AllocationProperties properties{device.getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::INTERNAL_HEAP}; perDssBackedBuffer = executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties(properties); } return result; } bool isMultiOsContextCapable() const override { return multiOsContextCapable; } bool initDirectSubmission(Device &device, OsContext &osContext) override { if (ultHwConfig.csrFailInitDirectSubmission) { return false; } return BaseClass::CommandStreamReceiver::initDirectSubmission(device, osContext); } bool isDirectSubmissionEnabled() const override { if (ultHwConfig.csrBaseCallDirectSubmissionAvailable) { return BaseClass::isDirectSubmissionEnabled(); } if (ultHwConfig.csrSuperBaseCallDirectSubmissionAvailable) { return BaseClass::CommandStreamReceiver::isDirectSubmissionEnabled(); } return directSubmissionAvailable; } std::atomic recursiveLockCounter; bool createPageTableManagerCalled = false; bool recordFlusheBatchBuffer = false; bool checkAndActivateAubSubCaptureCalled = false; bool addAubCommentCalled = false; bool downloadAllocationCalled = false; std::vector aubCommentMessages; bool flushBatchedSubmissionsCalled = false; uint32_t makeSurfacePackNonResidentCalled = false; bool initProgrammingFlagsCalled = false; LinearStream *lastFlushedCommandStream = nullptr; BatchBuffer latestFlushedBatchBuffer = {}; uint32_t latestSentTaskCountValueDuringFlush = 0; uint32_t blitBufferCalled = 0; uint32_t createPerDssBackedBufferCalled = 0; std::atomic latestWaitForCompletionWithTimeoutTaskCount{0}; DispatchFlags recordedDispatchFlags; bool multiOsContextCapable = false; bool directSubmissionAvailable = false; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/linux/000077500000000000000000000000001363734646600227715ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/linux/CMakeLists.txt000066400000000000000000000037251363734646600255400ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_linux_dll_tests ) set(target_name linux_dll_tests) add_executable(igdrcl_${target_name} $ $ $ ${CMAKE_CURRENT_SOURCE_DIR}/drm_null_device_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_wrap.h ${CMAKE_CURRENT_SOURCE_DIR}/linux_tests_configuration.cpp ${CMAKE_CURRENT_SOURCE_DIR}/main_linux_dll.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_os_layer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_os_layer.h ${CMAKE_CURRENT_SOURCE_DIR}/os_interface_linux_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_mode.h ${NEO_SHARED_DIRECTORY}/os_interface/linux/sys_calls_linux.cpp ${NEO_SOURCE_DIR}/opencl/source/dll/linux/allocator_helper.cpp ${NEO_SOURCE_DIR}/opencl/source/dll/linux/drm_neo_create.cpp ${NEO_SOURCE_DIR}/opencl/source/dll/linux/options_linux.cpp ${NEO_SOURCE_DIR}/opencl/source/dll/linux/os_interface.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/aub_stream_mocks/aub_stream_interface_mock.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/linux${BRANCH_DIR_SUFFIX}/drm_other_requests.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/os_interface/linux/create_drm_memory_manager.cpp ) if(NEO__LIBVA_FOUND) target_sources(igdrcl_${target_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/va_tests.cpp) endif() target_link_libraries(igdrcl_${target_name} ${NEO_STATICALLY_LINKED_LIBRARIES_MOCKABLE} igdrcl_mocks ${IGDRCL_EXTRA_LIBS} gmock-gtest) set_property(TARGET igdrcl_${target_name} APPEND_STRING PROPERTY COMPILE_FLAGS ${ASAN_FLAGS}) target_include_directories(igdrcl_${target_name} PRIVATE ${NEO_SHARED_TEST_DIRECTORY}/unit_test/test_macros${BRANCH_DIR_SUFFIX} ${NEO_SOURCE_DIR}/opencl/source/dll/linux/devices${BRANCH_DIR_SUFFIX} ${NEO_SOURCE_DIR}/opencl/test/unit_test/gen_common${BRANCH_DIR_SUFFIX} ${CMAKE_CURRENT_SOURCE_DIR} ) add_dependencies(unit_tests igdrcl_${target_name}) add_subdirectories() compute-runtime-20.13.16352/opencl/test/unit_test/linux/drm_null_device_tests.cpp000066400000000000000000000046471363734646600300650ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/os_interface/linux/drm_null_device.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/test/unit_test/linux/drm_wrap.h" #include "opencl/test/unit_test/linux/mock_os_layer.h" #include "test.h" #include using namespace NEO; class DrmNullDeviceTestsFixture { public: void SetUp() { // Create nullDevice drm DebugManager.flags.EnableNullHardware.set(true); executionEnvironment.prepareRootDeviceEnvironments(1); drmNullDevice = DrmWrap::createDrm(*executionEnvironment.rootDeviceEnvironments[0]); ASSERT_NE(drmNullDevice, nullptr); } void TearDown() { } Drm *drmNullDevice; ExecutionEnvironment executionEnvironment; protected: DebugManagerStateRestore dbgRestorer; }; typedef Test DrmNullDeviceTests; TEST_F(DrmNullDeviceTests, GIVENdrmNullDeviceWHENcallGetDeviceIdTHENreturnProperDeviceId) { int deviceIdQueried = 0; int ret = drmNullDevice->getDeviceID(deviceIdQueried); EXPECT_EQ(0, ret); EXPECT_EQ(deviceId, deviceIdQueried); } TEST_F(DrmNullDeviceTests, GIVENdrmNullDeviceWHENcallIoctlTHENalwaysSuccess) { EXPECT_EQ(drmNullDevice->ioctl(0, nullptr), 0); } TEST_F(DrmNullDeviceTests, GIVENdrmNullDeviceWHENregReadOtherThenTimestampReadTHENalwaysSuccess) { struct drm_i915_reg_read arg; arg.offset = 0; ASSERT_EQ(drmNullDevice->ioctl(DRM_IOCTL_I915_REG_READ, &arg), 0); } TEST_F(DrmNullDeviceTests, GIVENdrmNullDeviceWHENgetGpuTimestamp32bOr64bTHENerror) { struct drm_i915_reg_read arg; arg.offset = TIMESTAMP_LOW_REG; ASSERT_EQ(drmNullDevice->ioctl(DRM_IOCTL_I915_REG_READ, &arg), -1); arg.offset = TIMESTAMP_HIGH_REG; ASSERT_EQ(drmNullDevice->ioctl(DRM_IOCTL_I915_REG_READ, &arg), -1); } TEST_F(DrmNullDeviceTests, GIVENdrmNullDeviceWHENgetGpuTimestamp36bTHENproperValues) { struct drm_i915_reg_read arg; arg.offset = TIMESTAMP_LOW_REG | 1; ASSERT_EQ(drmNullDevice->ioctl(DRM_IOCTL_I915_REG_READ, &arg), 0); EXPECT_EQ(arg.val, 1000ULL); ASSERT_EQ(drmNullDevice->ioctl(DRM_IOCTL_I915_REG_READ, &arg), 0); EXPECT_EQ(arg.val, 2000ULL); ASSERT_EQ(drmNullDevice->ioctl(DRM_IOCTL_I915_REG_READ, &arg), 0); EXPECT_EQ(arg.val, 3000ULL); } compute-runtime-20.13.16352/opencl/test/unit_test/linux/drm_other_requests.cpp000066400000000000000000000002721363734646600274140ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include int drmOtherRequests(unsigned long int request, va_list vl) { return 0; } compute-runtime-20.13.16352/opencl/test/unit_test/linux/drm_wrap.h000066400000000000000000000012771363734646600247640ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/os_interface.h" #include "drm/i915_drm.h" class DrmWrap : public NEO::Drm { public: static NEO::Drm *createDrm(RootDeviceEnvironment &rootDeviceEnvironment) { auto hwDeviceIds = OSInterface::discoverDevices(rootDeviceEnvironment.executionEnvironment); if (!hwDeviceIds.empty()) { return NEO::Drm::create(std::move(hwDeviceIds[0]), rootDeviceEnvironment); } return nullptr; } }; compute-runtime-20.13.16352/opencl/test/unit_test/linux/linux_tests_configuration.cpp000066400000000000000000000002731363734646600310070ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test_mode.h" namespace NEO { TestMode testMode = defaultTestMode; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/linux/main_linux_dll.cpp000066400000000000000000000300421363734646600264720ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/linux/allocator_helper.h" #include "shared/source/os_interface/linux/os_interface.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/helpers/default_hw_info.inl" #include "shared/test/unit_test/helpers/ult_hw_config.inl" #include "opencl/test/unit_test/custom_event_listener.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/linux/drm_wrap.h" #include "opencl/test/unit_test/linux/mock_os_layer.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/os_interface/linux/device_command_stream_fixture.h" #include "test.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include using namespace NEO; class DrmTestsFixture { public: void SetUp() { executionEnvironment.prepareRootDeviceEnvironments(1); rootDeviceEnvironment = executionEnvironment.rootDeviceEnvironments[0].get(); } void TearDown() { } ExecutionEnvironment executionEnvironment; RootDeviceEnvironment *rootDeviceEnvironment = nullptr; }; typedef Test DrmTests; void initializeTestedDevice() { for (uint32_t i = 0; deviceDescriptorTable[i].eGtType != GTTYPE::GTTYPE_UNDEFINED; i++) { if (defaultHwInfo->platform.eProductFamily == deviceDescriptorTable[i].pHwInfo->platform.eProductFamily) { deviceId = deviceDescriptorTable[i].deviceId; break; } } } int openRetVal = 0; int testOpen(const char *fullPath, int, ...) { return openRetVal; }; int openCounter = 1; int openWithCounter(const char *fullPath, int, ...) { if (openCounter > 0) { openCounter--; return 1023; // valid file descriptor for ULT } return -1; }; TEST(DrmTest, GivenTwoOpenableDevicesWhenDiscoverDevicesThenCreateTwoHwDeviceIds) { VariableBackup backupOpenFull(&openFull); openFull = openWithCounter; openCounter = 2; ExecutionEnvironment executionEnvironment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_EQ(2u, hwDeviceIds.size()); } TEST(DrmTest, GivenSelectedNotExistingDeviceWhenGetDeviceFdThenFail) { DebugManagerStateRestore stateRestore; DebugManager.flags.ForceDeviceId.set("1234"); VariableBackup backupOpenFull(&openFull); openFull = testOpen; openRetVal = -1; ExecutionEnvironment executionEnvironment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_TRUE(hwDeviceIds.empty()); } TEST(DrmTest, GivenSelectedExistingDeviceWhenGetDeviceFdThenReturnFd) { DebugManagerStateRestore stateRestore; DebugManager.flags.ForceDeviceId.set("1234"); VariableBackup backupOpenFull(&openFull); openRetVal = 1023; // fakeFd openFull = testOpen; ExecutionEnvironment executionEnvironment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_EQ(1u, hwDeviceIds.size()); EXPECT_NE(nullptr, hwDeviceIds[0].get()); } TEST(DrmTest, GivenSelectedIncorectDeviceWhenGetDeviceFdThenFail) { DebugManagerStateRestore stateRestore; DebugManager.flags.ForceDeviceId.set("1234"); VariableBackup backupOpenFull(&openFull); openFull = testOpen; openRetVal = 1024; ExecutionEnvironment executionEnvironment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_TRUE(hwDeviceIds.empty()); } TEST_F(DrmTests, createReturnsDrm) { auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_NE(drm, nullptr); drm_i915_getparam_t getParam; int lDeviceId; VariableBackup backupIoctlCnt(&ioctlCnt); VariableBackup backupIoctlSeq(&ioctlSeq[0]); ioctlCnt = 0; ioctlSeq[0] = -1; errno = EINTR; // check if device works, although there was EINTR error from KMD getParam.param = I915_PARAM_CHIPSET_ID; getParam.value = &lDeviceId; auto ret = drm->ioctl(DRM_IOCTL_I915_GETPARAM, &getParam); EXPECT_EQ(0, ret); EXPECT_EQ(deviceId, lDeviceId); ioctlCnt = 0; ioctlSeq[0] = -1; errno = EAGAIN; // check if device works, although there was EAGAIN error from KMD getParam.param = I915_PARAM_CHIPSET_ID; getParam.value = &lDeviceId; ret = drm->ioctl(DRM_IOCTL_I915_GETPARAM, &getParam); EXPECT_EQ(0, ret); EXPECT_EQ(deviceId, lDeviceId); ioctlCnt = 0; ioctlSeq[0] = -1; errno = 0; // we failed with any other error code getParam.param = I915_PARAM_CHIPSET_ID; getParam.value = &lDeviceId; ret = drm->ioctl(DRM_IOCTL_I915_GETPARAM, &getParam); EXPECT_EQ(-1, ret); EXPECT_EQ(deviceId, lDeviceId); } TEST_F(DrmTests, createTwiceReturnsDifferentDrm) { auto drm1 = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_NE(drm1, nullptr); auto drm2 = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_NE(drm2, nullptr); EXPECT_NE(drm1, drm2); } TEST_F(DrmTests, createDriFallback) { VariableBackup backupHaveDri(&haveDri); haveDri = 1; auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_NE(drm, nullptr); } TEST_F(DrmTests, createNoDevice) { VariableBackup backupHaveDri(&haveDri); haveDri = -1; auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_EQ(drm, nullptr); } TEST_F(DrmTests, createUnknownDevice) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.PrintDebugMessages.set(true); VariableBackup backupDeviceId(&deviceId); deviceId = -1; ::testing::internal::CaptureStderr(); auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_EQ(drm, nullptr); std::string errStr = ::testing::internal::GetCapturedStderr(); EXPECT_THAT(errStr, ::testing::HasSubstr(std::string("FATAL: Unknown device: deviceId: ffffffff, revisionId: 0000"))); } TEST_F(DrmTests, createNoSoftPin) { VariableBackup backupHaveSoftPin(&haveSoftPin); haveSoftPin = 0; auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_EQ(drm, nullptr); } TEST_F(DrmTests, failOnDeviceId) { VariableBackup backupFailOnDeviceId(&failOnDeviceId); failOnDeviceId = -1; auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_EQ(drm, nullptr); } TEST_F(DrmTests, failOnEuTotal) { VariableBackup backupfailOnEuTotal(&failOnEuTotal); failOnEuTotal = -1; auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_EQ(drm, nullptr); } TEST_F(DrmTests, failOnSubsliceTotal) { VariableBackup backupfailOnSubsliceTotal(&failOnSubsliceTotal); failOnSubsliceTotal = -1; auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_EQ(drm, nullptr); } TEST_F(DrmTests, failOnRevisionId) { VariableBackup backupFailOnRevisionId(&failOnRevisionId); failOnRevisionId = -1; auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_EQ(drm, nullptr); } TEST_F(DrmTests, failOnSoftPin) { VariableBackup backupFailOnSoftPin(&failOnSoftPin); failOnSoftPin = -1; auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_EQ(drm, nullptr); } TEST_F(DrmTests, failOnParamBoost) { VariableBackup backupFailOnParamBoost(&failOnParamBoost); failOnParamBoost = -1; auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); //non-fatal error - issue warning only EXPECT_NE(drm, nullptr); } TEST_F(DrmTests, failOnContextCreate) { VariableBackup backupFailOnContextCreate(&failOnContextCreate); auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_NE(drm, nullptr); failOnContextCreate = -1; EXPECT_THROW(drm->createDrmContext(), std::exception); EXPECT_FALSE(drm->isPreemptionSupported()); failOnContextCreate = 0; } TEST_F(DrmTests, failOnSetPriority) { VariableBackup backupFailOnSetPriority(&failOnSetPriority); auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_NE(drm, nullptr); failOnSetPriority = -1; auto drmContext = drm->createDrmContext(); EXPECT_THROW(drm->setLowPriorityContextParam(drmContext), std::exception); EXPECT_FALSE(drm->isPreemptionSupported()); failOnSetPriority = 0; } TEST_F(DrmTests, failOnDrmGetVersion) { VariableBackup backupFailOnDrmVersion(&failOnDrmVersion); failOnDrmVersion = -1; auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_EQ(drm, nullptr); failOnDrmVersion = 0; } TEST_F(DrmTests, failOnInvalidDeviceName) { VariableBackup backupFailOnDrmVersion(&failOnDrmVersion); strcpy(providedDrmVersion, "NA"); auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_EQ(drm, nullptr); failOnDrmVersion = 0; strcpy(providedDrmVersion, "i915"); } TEST_F(DrmTests, whenDrmIsCreatedThenSetMemoryRegionsDoesntFailAndDrmObjectIsReturned) { DebugManagerStateRestore restore; DebugManager.flags.EnableLocalMemory.set(1); auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_NE(drm, nullptr); } TEST(AllocatorHelper, givenExpectedSizeToReserveWhenGetSizeToReserveCalledThenExpectedValueReturned) { EXPECT_EQ((maxNBitValue(47) + 1) / 4, NEO::getSizeToReserve()); } TEST(DrmMemoryManagerCreate, whenCallCreateMemoryManagerThenDrmMemoryManagerIsCreated) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); auto drm = new DrmMockSuccess(*executionEnvironment.rootDeviceEnvironments[0]); executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment.rootDeviceEnvironments[0]->osInterface->get()->setDrm(drm); auto drmMemoryManager = MemoryManager::createMemoryManager(executionEnvironment); EXPECT_NE(nullptr, drmMemoryManager.get()); executionEnvironment.memoryManager = std::move(drmMemoryManager); } TEST(OsInterfaceTests, givenOsInterfaceWhenEnableLocalMemoryIsSpecifiedThenItIsSetToTrueOn64Bit) { EXPECT_TRUE(OSInterface::osEnableLocalMemory); } int main(int argc, char **argv) { bool useDefaultListener = false; ::testing::InitGoogleTest(&argc, argv); // parse remaining args assuming they're mine for (int i = 1; i < argc; ++i) { if (!strcmp("--disable_default_listener", argv[i])) { useDefaultListener = false; } else if (!strcmp("--enable_default_listener", argv[i])) { useDefaultListener = true; } } if (useDefaultListener == false) { auto &listeners = ::testing::UnitTest::GetInstance()->listeners(); auto defaultListener = listeners.default_result_printer(); auto customEventListener = new CCustomEventListener(defaultListener); listeners.Release(defaultListener); listeners.Append(customEventListener); } defaultHwInfo = std::make_unique(); *defaultHwInfo = DEFAULT_TEST_PLATFORM::hwInfo; initializeTestedDevice(); auto retVal = RUN_ALL_TESTS(); return retVal; } TEST_F(DrmTests, whenCreateDrmIsCalledThenProperHwInfoIsSetup) { auto oldHwInfo = rootDeviceEnvironment->getMutableHardwareInfo(); *oldHwInfo = {}; auto drm = DrmWrap::createDrm(*rootDeviceEnvironment); EXPECT_NE(drm, nullptr); auto currentHwInfo = rootDeviceEnvironment->getHardwareInfo(); EXPECT_NE(IGFX_UNKNOWN, currentHwInfo->platform.eProductFamily); EXPECT_NE(IGFX_UNKNOWN_CORE, currentHwInfo->platform.eRenderCoreFamily); EXPECT_LT(0u, currentHwInfo->gtSystemInfo.EUCount); EXPECT_LT(0u, currentHwInfo->gtSystemInfo.SubSliceCount); } compute-runtime-20.13.16352/opencl/test/unit_test/linux/mock_os_layer.cpp000066400000000000000000000133241363734646600263260ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "mock_os_layer.h" #include #include int (*c_open)(const char *pathname, int flags, ...) = nullptr; int (*openFull)(const char *pathname, int flags, ...) = nullptr; int (*c_ioctl)(int fd, unsigned long int request, ...) = nullptr; int fakeFd = 1023; int haveDri = 0; // index of dri to serve, -1 - none int deviceId = NEO::deviceDescriptorTable[0].deviceId; // default supported DeviceID int haveSoftPin = 1; int havePreemption = I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY | I915_SCHEDULER_CAP_PREEMPTION; int failOnDeviceId = 0; int failOnEuTotal = 0; int failOnSubsliceTotal = 0; int failOnRevisionId = 0; int failOnSoftPin = 0; int failOnParamBoost = 0; int failOnSetParamSseu = 0; int failOnGetParamSseu = 0; int failOnContextCreate = 0; int failOnSetPriority = 0; int failOnPreemption = 0; int failOnDrmVersion = 0; char providedDrmVersion[5] = {'i', '9', '1', '5', '\0'}; uint64_t gpuTimestamp = 0; int ioctlSeq[8] = {0, 0, 0, 0, 0, 0, 0, 0}; size_t ioctlCnt = 0; int open(const char *pathname, int flags, ...) { if (openFull != nullptr) { return openFull(pathname, flags); } if (c_open == nullptr) { c_open = (int (*)(const char *, int, ...))dlsym(RTLD_NEXT, "open"); } if (strncmp("/dev/dri/", pathname, 9) == 0) { if (haveDri >= 0) { return fakeFd; } else { return -1; } } return c_open(pathname, flags); } int drmGetParam(drm_i915_getparam_t *param) { assert(param); int ret = 0; switch (param->param) { case I915_PARAM_CHIPSET_ID: *param->value = deviceId; ret = failOnDeviceId; break; case I915_PARAM_EU_TOTAL: *param->value = 3; ret = failOnEuTotal; break; case I915_PARAM_SUBSLICE_TOTAL: *param->value = 1; ret = failOnSubsliceTotal; break; case I915_PARAM_REVISION: *param->value = 0x0; ret = failOnRevisionId; break; case I915_PARAM_HAS_EXEC_SOFTPIN: *param->value = haveSoftPin; ret = failOnSoftPin; break; #if defined(I915_PARAM_HAS_SCHEDULER) case I915_PARAM_HAS_SCHEDULER: *param->value = havePreemption; ret = failOnPreemption; break; #endif default: ret = -1; std::cerr << "drm.getParam: " << std::dec << param->param << std::endl; break; } return ret; } int drmSetContextParam(drm_i915_gem_context_param *param) { assert(param); int ret = 0; switch (param->param) { case I915_CONTEXT_PRIVATE_PARAM_BOOST: ret = failOnParamBoost; break; #if defined(I915_PARAM_HAS_SCHEDULER) case I915_CONTEXT_PARAM_PRIORITY: ret = failOnSetPriority; break; #endif case I915_CONTEXT_PARAM_SSEU: if (param->size == sizeof(struct drm_i915_gem_context_param_sseu) && param->value != 0 && param->ctx_id == 0) { ret = failOnSetParamSseu; } else { ret = -1; } break; default: ret = -1; std::cerr << "drm.setContextParam: " << std::dec << param->param << std::endl; break; } return ret; } int drmGetContextParam(drm_i915_gem_context_param *param) { int ret = 0; switch (param->param) { case I915_CONTEXT_PARAM_SSEU: if (param->size == sizeof(struct drm_i915_gem_context_param_sseu) && param->value != 0 && param->ctx_id == 0) { ret = failOnGetParamSseu; } else { ret = -1; } break; default: ret = -1; std::cerr << "drm.getContextParam: " << std::dec << param->param << std::endl; break; } return ret; } int drmContextCreate(drm_i915_gem_context_create *create) { assert(create); create->ctx_id = 1; return failOnContextCreate; } int drmContextDestroy(drm_i915_gem_context_destroy *destroy) { assert(destroy); if (destroy->ctx_id == 1) return 0; else return -1; } int drmVersion(drm_version_t *version) { strcpy(version->name, providedDrmVersion); return failOnDrmVersion; } int ioctl(int fd, unsigned long int request, ...) throw() { if (c_ioctl == nullptr) c_ioctl = (int (*)(int, unsigned long int, ...))dlsym(RTLD_NEXT, "ioctl"); int res; va_list vl; va_start(vl, request); if (fd == fakeFd) { res = ioctlSeq[ioctlCnt % (sizeof(ioctlSeq) / sizeof(int))]; ioctlCnt++; if (res == 0) { switch (request) { case DRM_IOCTL_I915_GETPARAM: res = drmGetParam(va_arg(vl, drm_i915_getparam_t *)); break; case DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM: res = drmSetContextParam(va_arg(vl, drm_i915_gem_context_param *)); break; case DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM: res = drmGetContextParam(va_arg(vl, drm_i915_gem_context_param *)); break; case DRM_IOCTL_I915_GEM_CONTEXT_CREATE: res = drmContextCreate(va_arg(vl, drm_i915_gem_context_create *)); break; case DRM_IOCTL_I915_GEM_CONTEXT_DESTROY: res = drmContextDestroy(va_arg(vl, drm_i915_gem_context_destroy *)); break; case DRM_IOCTL_VERSION: res = drmVersion(va_arg(vl, drm_version_t *)); break; default: res = drmOtherRequests(request, vl); break; } } va_end(vl); return res; } res = c_ioctl(fd, request, vl); va_end(vl); return res; } compute-runtime-20.13.16352/opencl/test/unit_test/linux/mock_os_layer.h000066400000000000000000000023551363734646600257750ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/linux/drm_neo.h" #include "drm/i915_drm.h" #include #include #include #include #include #include extern "C" { int open(const char *pathname, int flags, ...); int ioctl(int fd, unsigned long int request, ...) throw(); } extern int (*c_open)(const char *pathname, int flags, ...); extern int (*openFull)(const char *pathname, int flags, ...); extern int (*c_ioctl)(int fd, unsigned long int request, ...); extern int drmOtherRequests(unsigned long int request, va_list vl); extern int fakeFd; extern int haveDri; // index of dri to serve, -1 - none extern int deviceId; // known DeviceID extern int haveSoftPin; extern int failOnDeviceId; extern int failOnEuTotal; extern int failOnSubsliceTotal; extern int failOnRevisionId; extern int failOnSoftPin; extern int failOnParamBoost; extern int failOnContextCreate; extern int failOnSetPriority; extern int failOnPreemption; extern int havePreemption; extern int failOnDrmVersion; extern char providedDrmVersion[5]; extern int ioctlSeq[8]; extern size_t ioctlCnt; extern std::array drms; compute-runtime-20.13.16352/opencl/test/unit_test/linux/os_interface_linux_tests.cpp000066400000000000000000000034361363734646600306050ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/os_interface/linux/os_interface.h" #include "shared/test/unit_test/helpers/default_hw_info.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/os_interface/linux/drm_mock.h" #include "test.h" namespace NEO { extern GMM_INIT_IN_ARGS passedInputArgs; extern bool copyInputArgs; TEST(OsInterfaceTest, whenOsInterfaceSetupsGmmInputArgsThenProperFileDescriptorIsSet) { MockExecutionEnvironment executionEnvironment; auto rootDeviceEnvironment = executionEnvironment.rootDeviceEnvironments[0].get(); auto osInterface = new OSInterface(); rootDeviceEnvironment->osInterface.reset(osInterface); auto drm = new DrmMock(*rootDeviceEnvironment); osInterface->get()->setDrm(drm); GMM_INIT_IN_ARGS gmmInputArgs = {}; EXPECT_EQ(0u, gmmInputArgs.FileDescriptor); osInterface->setGmmInputArgs(&gmmInputArgs); EXPECT_NE(0u, gmmInputArgs.FileDescriptor); auto expectedFileDescriptor = drm->getFileDescriptor(); EXPECT_EQ(static_cast(expectedFileDescriptor), gmmInputArgs.FileDescriptor); } TEST(GmmHelperTest, whenCreateGmmHelperWithoutOsInterfaceThenPassedFileDescriptorIsZeroed) { std::unique_ptr gmmHelper; VariableBackup passedInputArgsBackup(&passedInputArgs); VariableBackup copyInputArgsBackup(©InputArgs, true); uint32_t expectedFileDescriptor = 0u; gmmHelper.reset(new GmmHelper(nullptr, defaultHwInfo.get())); EXPECT_EQ(expectedFileDescriptor, passedInputArgs.FileDescriptor); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/linux/test_mode.h000066400000000000000000000004011363734646600251200ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/unit_test/tests_configuration.h" namespace NEO { constexpr TestMode defaultTestMode = TestMode::NotSpecified; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/linux/va_tests.cpp000066400000000000000000000022021363734646600253210ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/va/va_sharing_functions.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "test.h" using namespace NEO; TEST(VaTests, whenLibvaSo2IsNotInstalledThenFail) { VariableBackup dlopenBackup(&VASharingFunctions::fdlopen); VariableBackup dlcloseBackup(&VASharingFunctions::fdlclose); VariableBackup dlsymBackup(&VASharingFunctions::fdlsym); VASharingFunctions::fdlopen = [&](const char *filename, int flag) -> void * { if (!strncmp(filename, "libva.so.2", 10)) { return (void *)0xdeadbeef; } else return 0; }; VASharingFunctions::fdlclose = [&](void *handle) -> int { return 0; }; VASharingFunctions::fdlsym = [&](void *handle, const char *symbol) -> void * { return nullptr; }; VADisplay vaDisplay = nullptr; VASharingFunctions va(vaDisplay); EXPECT_EQ(true, va.isVaLibraryAvailable()); } compute-runtime-20.13.16352/opencl/test/unit_test/main.cpp000066400000000000000000000364651363734646600233000ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/gmm_interface.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/utilities/debug_settings_reader.h" #include "shared/test/unit_test/helpers/default_hw_info.inl" #include "shared/test/unit_test/helpers/memory_leak_listener.h" #include "shared/test/unit_test/helpers/ult_hw_config.inl" #include "shared/test/unit_test/tests_configuration.h" #include "opencl/source/os_interface/ocl_reg_path.h" #include "opencl/test/unit_test/custom_event_listener.h" #include "opencl/test/unit_test/global_environment.h" #include "opencl/test/unit_test/helpers/kernel_binary_helper.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "opencl/test/unit_test/mocks/mock_gmm.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/mocks/mock_sip.h" #include "opencl/test/unit_test/ult_config_listener.h" #include "gmock/gmock.h" #include "mock_gmm_client_context.h" #include #include #include #include #include #include #ifdef WIN32 const char *fSeparator = "\\"; #else const char *fSeparator = "/"; #endif namespace NEO { extern const char *hardwarePrefix[]; extern const HardwareInfo *hardwareInfoTable[IGFX_MAX_PRODUCT]; extern const unsigned int ultIterationMaxTime; extern bool useMockGmm; extern TestMode testMode; extern const char *executionDirectorySuffix; std::thread::id tempThreadID; namespace MockSipData { extern std::unique_ptr mockSipKernel; } namespace PagaFaultManagerTestConfig { bool disabled = false; } } // namespace NEO using namespace NEO; TestEnvironment *gEnvironment; PRODUCT_FAMILY productFamily = DEFAULT_TEST_PLATFORM::hwInfo.platform.eProductFamily; GFXCORE_FAMILY renderCoreFamily = DEFAULT_TEST_PLATFORM::hwInfo.platform.eRenderCoreFamily; extern std::string lastTest; bool generateRandomInput = false; void applyWorkarounds() { platformsImpl.reserve(1); { std::ofstream f; const std::string fileName("_tmp_"); f.open(fileName, std::ofstream::binary); f.close(); } { std::mutex mtx; std::unique_lock stateLock(mtx); } { std::stringstream ss("1"); int val; ss >> val; } { class BaseClass { public: int method(int param) { return 1; } }; class MockClass : public BaseClass { public: MOCK_METHOD1(method, int(int param)); }; ::testing::NiceMock mockObj; EXPECT_CALL(mockObj, method(::testing::_)) .Times(1); mockObj.method(2); } //intialize rand srand(static_cast(time(nullptr))); //Create at least on thread to prevent false memory leaks in tests using threads std::thread t([&]() { }); tempThreadID = t.get_id(); t.join(); } #ifdef __linux__ void handle_SIGALRM(int signal) { std::cout << "Tests timeout on: " << lastTest << std::endl; abort(); } void handle_SIGSEGV(int signal) { std::cout << "SIGSEGV on: " << lastTest << std::endl; abort(); } struct sigaction oldSigAbrt; void handle_SIGABRT(int signal) { std::cout << "SIGABRT on: " << lastTest << std::endl; // restore signal handler to abort if (sigaction(SIGABRT, &oldSigAbrt, nullptr) == -1) { std::cout << "FATAL: cannot fatal SIGABRT handler" << std::endl; std::cout << "FATAL: try SEGV" << std::endl; uint8_t *ptr = nullptr; *ptr = 0; std::cout << "FATAL: still alive, call exit()" << std::endl; exit(-1); } raise(signal); } #else LONG WINAPI UltExceptionFilter( _In_ struct _EXCEPTION_POINTERS *exceptionInfo) { std::cout << "UnhandledException: 0x" << std::hex << exceptionInfo->ExceptionRecord->ExceptionCode << std::dec << " on test: " << lastTest << std::endl; return EXCEPTION_CONTINUE_SEARCH; } #endif void initializeTestHelpers() { GlobalMockSipProgram::initSipProgram(); MockSipData::mockSipKernel.reset(new MockSipKernel()); } void cleanTestHelpers() { GlobalMockSipProgram::shutDownSipProgram(); } std::string getHardwarePrefix() { std::string s = hardwarePrefix[defaultHwInfo->platform.eProductFamily]; return s; } std::string getRunPath(char *argv0) { std::string res(argv0); auto pos = res.rfind(fSeparator); if (pos != std::string::npos) res = res.substr(0, pos); if (res == "." || pos == std::string::npos) { #if defined(__linux__) res = getcwd(nullptr, 0); #else res = _getcwd(nullptr, 0); #endif } return res; } int main(int argc, char **argv) { int retVal = 0; bool useDefaultListener = false; bool enable_alarm = true; bool setupFeatureTableAndWorkaroundTable = testMode == TestMode::AubTests ? true : false; applyWorkarounds(); #if defined(__linux__) bool enable_segv = true; bool enable_abrt = true; if (getenv("IGDRCL_TEST_SELF_EXEC") == nullptr) { std::string wd = getRunPath(argv[0]); setenv("LD_LIBRARY_PATH", wd.c_str(), 1); setenv("IGDRCL_TEST_SELF_EXEC", wd.c_str(), 1); execv(argv[0], argv); printf("FATAL ERROR: cannot self-exec test: %s!, errno: %d\n", argv[0], errno); return -1; } else { } #endif ::testing::InitGoogleMock(&argc, argv); HardwareInfo hwInfoForTests = DEFAULT_TEST_PLATFORM::hwInfo; uint32_t euPerSubSlice = 0; uint32_t sliceCount = 0; uint32_t subSlicePerSliceCount = 0; int32_t revId = -1; int dieRecovery = 0; for (int i = 1; i < argc; ++i) { if (!strcmp("--disable_default_listener", argv[i])) { useDefaultListener = false; } else if (!strcmp("--enable_default_listener", argv[i])) { useDefaultListener = true; } else if (!strcmp("--disable_alarm", argv[i])) { enable_alarm = false; } else if (!strcmp("--disable_pagefaulting_tests", argv[i])) { //disable tests which raise page fault signal during execution NEO::PagaFaultManagerTestConfig::disabled = true; } else if (!strcmp("--tbx", argv[i])) { if (testMode == TestMode::AubTests) { testMode = TestMode::AubTestsWithTbx; } initialHardwareTag = 0; } else if (!strcmp("--rev_id", argv[i])) { ++i; if (i < argc) { revId = atoi(argv[i]); } } else if (!strcmp("--product", argv[i])) { ++i; if (i < argc) { if (::isdigit(argv[i][0])) { int productValue = atoi(argv[i]); if (productValue > 0 && productValue < IGFX_MAX_PRODUCT && hardwarePrefix[productValue] != nullptr) { productFamily = static_cast(productValue); } else { productFamily = IGFX_UNKNOWN; } } else { productFamily = IGFX_UNKNOWN; for (int j = 0; j < IGFX_MAX_PRODUCT; j++) { if (hardwarePrefix[j] == nullptr) continue; if (strcmp(hardwarePrefix[j], argv[i]) == 0) { productFamily = static_cast(j); break; } } } if (productFamily == IGFX_UNKNOWN) { std::cout << "unknown or unsupported product family has been set: " << argv[i] << std::endl; return -1; } else { std::cout << "product family: " << hardwarePrefix[productFamily] << " (" << productFamily << ")" << std::endl; } hwInfoForTests = *hardwareInfoTable[productFamily]; } } else if (!strcmp("--slices", argv[i])) { ++i; if (i < argc) { sliceCount = atoi(argv[i]); } } else if (!strcmp("--subslices", argv[i])) { ++i; if (i < argc) { subSlicePerSliceCount = atoi(argv[i]); } } else if (!strcmp("--eu_per_ss", argv[i])) { ++i; if (i < argc) { euPerSubSlice = atoi(argv[i]); } } else if (!strcmp("--die_recovery", argv[i])) { ++i; if (i < argc) { dieRecovery = atoi(argv[i]) ? 1 : 0; } } else if (!strcmp("--generate_random_inputs", argv[i])) { generateRandomInput = true; } else if (!strcmp("--read-config", argv[i]) && testMode == TestMode::AubTests) { if (DebugManager.registryReadAvailable()) { DebugManager.setReaderImpl(SettingsReader::create(oclRegPath)); DebugManager.injectSettingsFromReader(); } } else if (!strcmp("--dump_buffer_format", argv[i]) && testMode == TestMode::AubTests) { ++i; std::string dumpBufferFormat(argv[i]); std::transform(dumpBufferFormat.begin(), dumpBufferFormat.end(), dumpBufferFormat.begin(), ::toupper); DebugManager.flags.AUBDumpBufferFormat.set(dumpBufferFormat); } else if (!strcmp("--dump_image_format", argv[i]) && testMode == TestMode::AubTests) { ++i; std::string dumpImageFormat(argv[i]); std::transform(dumpImageFormat.begin(), dumpImageFormat.end(), dumpImageFormat.begin(), ::toupper); DebugManager.flags.AUBDumpImageFormat.set(dumpImageFormat); } } productFamily = hwInfoForTests.platform.eProductFamily; renderCoreFamily = hwInfoForTests.platform.eRenderCoreFamily; uint32_t threadsPerEu = hwInfoConfigFactory[productFamily]->threadsPerEu; PLATFORM &platform = hwInfoForTests.platform; if (revId != -1) { platform.usRevId = revId; } uint64_t hwInfoConfig = defaultHardwareInfoConfigTable[productFamily]; setHwInfoValuesFromConfig(hwInfoConfig, hwInfoForTests); // set Gt and FeatureTable to initial state hardwareInfoSetup[productFamily](&hwInfoForTests, setupFeatureTableAndWorkaroundTable, hwInfoConfig); GT_SYSTEM_INFO >SystemInfo = hwInfoForTests.gtSystemInfo; // and adjust dynamic values if not secified sliceCount = sliceCount > 0 ? sliceCount : gtSystemInfo.SliceCount; subSlicePerSliceCount = subSlicePerSliceCount > 0 ? subSlicePerSliceCount : (gtSystemInfo.SubSliceCount / sliceCount); euPerSubSlice = euPerSubSlice > 0 ? euPerSubSlice : gtSystemInfo.MaxEuPerSubSlice; // clang-format off gtSystemInfo.SliceCount = sliceCount; gtSystemInfo.SubSliceCount = gtSystemInfo.SliceCount * subSlicePerSliceCount; gtSystemInfo.EUCount = gtSystemInfo.SubSliceCount * euPerSubSlice - dieRecovery; gtSystemInfo.ThreadCount = gtSystemInfo.EUCount * threadsPerEu; gtSystemInfo.MaxEuPerSubSlice = std::max(gtSystemInfo.MaxEuPerSubSlice, euPerSubSlice); gtSystemInfo.MaxSlicesSupported = std::max(gtSystemInfo.MaxSlicesSupported, gtSystemInfo.SliceCount); gtSystemInfo.MaxSubSlicesSupported = std::max(gtSystemInfo.MaxSubSlicesSupported, gtSystemInfo.SubSliceCount); gtSystemInfo.IsDynamicallyPopulated = false; // clang-format on binaryNameSuffix.append(familyName[hwInfoForTests.platform.eRenderCoreFamily]); binaryNameSuffix.append(hwInfoForTests.capabilityTable.platformType); std::string nBinaryKernelFiles = getRunPath(argv[0]); nBinaryKernelFiles.append("/"); nBinaryKernelFiles.append(binaryNameSuffix); nBinaryKernelFiles.append("/"); nBinaryKernelFiles.append(testFiles); testFiles = nBinaryKernelFiles; std::string nClFiles = getRunPath(argv[0]); nClFiles.append("/"); nClFiles.append(hardwarePrefix[productFamily]); nClFiles.append("/"); nClFiles.append(clFiles); clFiles = nClFiles; std::string executionDirectory(hardwarePrefix[productFamily]); executionDirectory += NEO::executionDirectorySuffix; // _aub for aub_tests, empty otherwise #ifdef WIN32 #include if (_chdir(executionDirectory.c_str())) { std::cout << "chdir into " << executionDirectory << " directory failed.\nThis might cause test failures." << std::endl; } #elif defined(__linux__) #include if (chdir(executionDirectory.c_str()) != 0) { std::cout << "chdir into " << executionDirectory << " directory failed.\nThis might cause test failures." << std::endl; } #endif defaultHwInfo = std::make_unique(); *defaultHwInfo = hwInfoForTests; auto &listeners = ::testing::UnitTest::GetInstance()->listeners(); if (useDefaultListener == false) { auto defaultListener = listeners.default_result_printer(); auto customEventListener = new CCustomEventListener(defaultListener, hardwarePrefix[productFamily]); listeners.Release(defaultListener); listeners.Append(customEventListener); } listeners.Append(new MemoryLeakListener); listeners.Append(new UltConfigListener); gEnvironment = reinterpret_cast(::testing::AddGlobalTestEnvironment(new TestEnvironment)); MockCompilerDebugVars fclDebugVars; MockCompilerDebugVars igcDebugVars; retrieveBinaryKernelFilename(fclDebugVars.fileName, KernelBinaryHelper::BUILT_INS + "_", ".bc"); retrieveBinaryKernelFilename(igcDebugVars.fileName, KernelBinaryHelper::BUILT_INS + "_", ".gen"); gEnvironment->setMockFileNames(fclDebugVars.fileName, igcDebugVars.fileName); gEnvironment->setDefaultDebugVars(fclDebugVars, igcDebugVars, hwInfoForTests); #if defined(__linux__) //ULTs timeout if (enable_alarm) { unsigned int alarmTime = NEO::ultIterationMaxTime * ::testing::GTEST_FLAG(repeat); struct sigaction sa; sa.sa_handler = &handle_SIGALRM; sa.sa_flags = SA_RESTART; sigfillset(&sa.sa_mask); if (sigaction(SIGALRM, &sa, NULL) == -1) { printf("FATAL ERROR: cannot intercept SIGALRM\n"); return -2; } alarm(alarmTime); std::cout << "set timeout to: " << alarmTime << std::endl; } if (enable_segv) { struct sigaction sa; sa.sa_handler = &handle_SIGSEGV; sa.sa_flags = SA_RESTART; sigfillset(&sa.sa_mask); if (sigaction(SIGSEGV, &sa, NULL) == -1) { printf("FATAL ERROR: cannot intercept SIGSEGV\n"); return -2; } } if (enable_abrt) { struct sigaction sa; sa.sa_handler = &handle_SIGABRT; sa.sa_flags = SA_RESTART; sigfillset(&sa.sa_mask); if (sigaction(SIGABRT, &sa, &oldSigAbrt) == -1) { printf("FATAL ERROR: cannot intercept SIGABRT\n"); return -2; } } #else SetUnhandledExceptionFilter(&UltExceptionFilter); #endif if (useMockGmm) { GmmHelper::createGmmContextWrapperFunc = GmmClientContextBase::create; } else { GmmInterface::initialize(nullptr, nullptr); } initializeTestHelpers(); retVal = RUN_ALL_TESTS(); cleanTestHelpers(); return retVal; } compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/000077500000000000000000000000001363734646600232425ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/CMakeLists.txt000066400000000000000000000040071363734646600260030ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_mem_obj ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/buffer_pin_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/buffer_set_arg_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/create_image_format_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/destructor_callback_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/get_mem_object_info_subbuffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/get_mem_object_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_from_subbuffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image1d_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image2d_from_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image2d_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image3d_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_array_size_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_compression_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/image_format_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_redescribe_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_release_mapped_ptr_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_set_arg_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_snorm_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_tests_tgllp_plus.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_tiled_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_transfer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_validate_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/map_operations_handler_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_destruction_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_helper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nv12_image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/packed_yuv_image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pipe_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sub_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/zero_copy_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_mem_obj}) add_subdirectories() compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/buffer_pin_tests.cpp000066400000000000000000000065151363734646600273160ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "test.h" #include "gtest/gtest.h" using namespace NEO; class TestedMemoryManager : public OsAgnosticMemoryManager { public: using OsAgnosticMemoryManager::OsAgnosticMemoryManager; GraphicsAllocation *allocateGraphicsMemoryWithAlignment(const AllocationData &allocationData) override { EXPECT_NE(0u, expectedSize); if (expectedSize == allocationData.size) { EXPECT_TRUE(allocationData.flags.forcePin); allocCount++; } return OsAgnosticMemoryManager::allocateGraphicsMemoryWithAlignment(allocationData); }; GraphicsAllocation *allocateGraphicsMemory64kb(const AllocationData &allocationData) override { return nullptr; }; GraphicsAllocation *allocateGraphicsMemoryWithHostPtr(const AllocationData &properties) override { EXPECT_NE(0u, HPExpectedSize); if (HPExpectedSize == properties.size) { EXPECT_TRUE(properties.flags.forcePin); HPAllocCount++; } return OsAgnosticMemoryManager::allocateGraphicsMemoryWithHostPtr(properties); } size_t expectedSize = 0; uint32_t allocCount = 0; size_t HPExpectedSize = 0; uint32_t HPAllocCount = 0; }; TEST(BufferTests, doPinIsSet) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); std::unique_ptr mm(new MemoryManagerCreate(false, false, executionEnvironment)); { MockContext context; auto size = MemoryConstants::pageSize * 32; auto retVal = CL_INVALID_OPERATION; mm->expectedSize = size; mm->HPExpectedSize = 0u; context.memoryManager = mm.get(); auto buffer = Buffer::create( &context, 0, size, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, mm->allocCount); delete buffer; } } TEST(BufferTests, doPinIsSetForHostPtr) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); std::unique_ptr mm(new TestedMemoryManager(executionEnvironment)); { MockContext context; auto retVal = CL_INVALID_OPERATION; auto size = MemoryConstants::pageSize * 32; mm->expectedSize = 0u; mm->HPExpectedSize = size; context.memoryManager = mm.get(); // memory must be aligned to use zero-copy void *bff = alignedMalloc(size, MemoryConstants::pageSize); auto buffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR | CL_MEM_FORCE_SHARED_PHYSICAL_MEMORY_INTEL, size, bff, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, mm->HPAllocCount); delete buffer; alignedFree(bff); } } compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp000066400000000000000000000336621363734646600301570ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" #include "gtest/gtest.h" using namespace NEO; class BufferSetArgTest : public ContextFixture, public DeviceFixture, public testing::Test { using ContextFixture::SetUp; public: BufferSetArgTest() { } protected: void SetUp() override { DeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); pKernelInfo = std::make_unique(); ASSERT_NE(nullptr, pKernelInfo); // define kernel info // setup kernel arg offsets KernelArgPatchInfo kernelArgPatchInfo; pKernelInfo->kernelArgInfo.resize(3); pKernelInfo->kernelArgInfo[2].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[1].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); uint32_t sizeOfPointer = sizeof(void *); pKernelInfo->kernelArgInfo[2].kernelArgPatchInfoVector[0].crossthreadOffset = 0x10; pKernelInfo->kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset = 0x20; pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = 0x30; pKernelInfo->kernelArgInfo[2].kernelArgPatchInfoVector[0].size = sizeOfPointer; pKernelInfo->kernelArgInfo[1].kernelArgPatchInfoVector[0].size = sizeOfPointer; pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size = sizeOfPointer; kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; pKernelInfo->usesSsh = true; pProgram = new MockProgram(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); pKernel = new MockKernel(pProgram, *pKernelInfo, *pClDevice); ASSERT_NE(nullptr, pKernel); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); pKernel->setKernelArgHandler(1, &Kernel::setArgBuffer); pKernel->setKernelArgHandler(2, &Kernel::setArgBuffer); pKernel->setKernelArgHandler(0, &Kernel::setArgBuffer); BufferDefaults::context = new MockContext(pClDevice); buffer = BufferHelper<>::create(BufferDefaults::context); } void TearDown() override { delete buffer; delete BufferDefaults::context; delete pKernel; delete pProgram; ContextFixture::TearDown(); DeviceFixture::TearDown(); } cl_int retVal = CL_SUCCESS; MockProgram *pProgram; MockKernel *pKernel = nullptr; std::unique_ptr pKernelInfo; SKernelBinaryHeaderCommon kernelHeader; char surfaceStateHeap[0x80]; char pCrossThreadData[64]; Buffer *buffer = nullptr; }; TEST_F(BufferSetArgTest, setKernelArgBuffer) { auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); auto tokenSize = pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size; buffer->setArgStateless(pKernelArg, tokenSize); EXPECT_EQ((void *)((uintptr_t)buffer->getGraphicsAllocation()->getGpuAddress()), *pKernelArg); } TEST_F(BufferSetArgTest, givenInvalidSizeWhenSettingKernelArgBufferThenReturnClInvalidArgSize) { cl_mem arg = buffer; cl_int err = pKernel->setArgBuffer(0, sizeof(cl_mem) + 1, arg); EXPECT_EQ(CL_INVALID_ARG_SIZE, err); } HWTEST_F(BufferSetArgTest, givenSetArgBufferWhenNullArgStatefulThenProgramNullSurfaceState) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); pKernelInfo->requiresSshForBuffers = true; cl_int ret = pKernel->setArgBuffer(0, sizeof(cl_mem), nullptr); EXPECT_EQ(CL_SUCCESS, ret); auto surfaceFormat = surfaceState->getSurfaceType(); auto surfacetype = surfaceState->getSurfaceFormat(); EXPECT_EQ(surfaceFormat, RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL); EXPECT_EQ(surfacetype, SURFACE_FORMAT::SURFACE_FORMAT_RAW); } HWTEST_F(BufferSetArgTest, givenSetKernelArgOnReadOnlyBufferThatIsMisalingedWhenSurfaceStateIsSetThenCachingIsOn) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); pKernelInfo->requiresSshForBuffers = true; pKernelInfo->kernelArgInfo[0].isReadOnly = true; auto graphicsAllocation = castToObject(buffer)->getGraphicsAllocation(); graphicsAllocation->setSize(graphicsAllocation->getUnderlyingBufferSize() - 1); cl_mem clMemBuffer = buffer; cl_int ret = pKernel->setArgBuffer(0, sizeof(cl_mem), &clMemBuffer); EXPECT_EQ(CL_SUCCESS, ret); auto mocs = surfaceState->getMemoryObjectControlState(); auto gmmHelper = pDevice->getGmmHelper(); auto expectedMocs = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto expectedMocs2 = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); EXPECT_TRUE(expectedMocs == mocs || expectedMocs2 == mocs); } HWTEST_F(BufferSetArgTest, givenSetArgBufferWithNullArgStatelessThenDontProgramNullSurfaceState) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; char sshOriginal[sizeof(surfaceStateHeap)]; memcpy(sshOriginal, surfaceStateHeap, sizeof(surfaceStateHeap)); pKernelInfo->requiresSshForBuffers = false; cl_int ret = pKernel->setArgBuffer(0, sizeof(cl_mem), nullptr); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(memcmp(sshOriginal, surfaceStateHeap, sizeof(surfaceStateHeap)), 0); } HWTEST_F(BufferSetArgTest, givenNonPureStatefulArgWhenRenderCompressedBufferIsSetThenSetNonAuxMode) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); buffer->getGraphicsAllocation()->setDefaultGmm(new Gmm(pDevice->getGmmClientContext(), buffer->getGraphicsAllocation()->getUnderlyingBuffer(), buffer->getSize(), false)); buffer->getGraphicsAllocation()->getDefaultGmm()->isRenderCompressed = true; pKernelInfo->requiresSshForBuffers = true; cl_mem clMem = buffer; pKernelInfo->kernelArgInfo.at(0).pureStatefulBufferAccess = false; cl_int ret = pKernel->setArgBuffer(0, sizeof(cl_mem), &clMem); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_TRUE(RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE == surfaceState->getAuxiliarySurfaceMode()); pKernelInfo->kernelArgInfo.at(0).pureStatefulBufferAccess = true; ret = pKernel->setArgBuffer(0, sizeof(cl_mem), &clMem); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_TRUE(RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E == surfaceState->getAuxiliarySurfaceMode()); } TEST_F(BufferSetArgTest, setKernelArgBufferFor32BitAddressing) { auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); auto tokenSize = pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size; uintptr_t gpuBase = (uintptr_t)buffer->getGraphicsAllocation()->getGpuAddress() >> 2; buffer->getGraphicsAllocation()->setGpuBaseAddress(gpuBase); buffer->setArgStateless(pKernelArg, tokenSize, true); EXPECT_EQ((uintptr_t)buffer->getGraphicsAllocation()->getGpuAddress() - gpuBase, (uintptr_t)*pKernelArg); } TEST_F(BufferSetArgTest, givenBufferWhenOffsetedSubbufferIsPassedToSetKernelArgThenCorrectGpuVAIsPatched) { cl_buffer_region region; region.origin = 0xc0; region.size = 32; cl_int error = 0; auto subBuffer = buffer->createSubBuffer(buffer->getMemoryPropertiesFlags(), buffer->getMemoryPropertiesFlagsIntel(), ®ion, error); ASSERT_NE(nullptr, subBuffer); EXPECT_EQ(ptrOffset(buffer->getCpuAddress(), region.origin), subBuffer->getCpuAddress()); auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); auto tokenSize = pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size; subBuffer->setArgStateless(pKernelArg, tokenSize); EXPECT_EQ((void *)((uintptr_t)subBuffer->getGraphicsAllocation()->getGpuAddress() + region.origin), *pKernelArg); delete subBuffer; } TEST_F(BufferSetArgTest, givenCurbeTokenThatSizeIs4BytesWhenStatelessArgIsPatchedThenOnly4BytesArePatchedInCurbe) { auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); //fill 8 bytes with 0xffffffffffffffff; uint64_t fillValue = -1; uint64_t *pointer64bytes = (uint64_t *)pKernelArg; *pointer64bytes = fillValue; uint32_t sizeOf4Bytes = sizeof(uint32_t); pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size = sizeOf4Bytes; buffer->setArgStateless(pKernelArg, sizeOf4Bytes); //make sure only 4 bytes are patched uintptr_t bufferAddress = (uintptr_t)buffer->getGraphicsAllocation()->getGpuAddress(); uint32_t address32bits = (uint32_t)bufferAddress; uint64_t curbeValue = *pointer64bytes; uint32_t higherPart = curbeValue >> 32; uint32_t lowerPart = (curbeValue & 0xffffffff); EXPECT_EQ(0xffffffff, higherPart); EXPECT_EQ(address32bits, lowerPart); } TEST_F(BufferSetArgTest, clSetKernelArgBuffer) { cl_mem memObj = buffer; retVal = clSetKernelArg( pKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ((void *)buffer->getGraphicsAllocation()->getGpuAddressToPatch(), *pKernelArg); std::vector surfaces; pKernel->getResidency(surfaces); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { delete surface; } } TEST_F(BufferSetArgTest, clSetKernelArgSVMPointer) { if (!pDevice->getHardwareInfo().capabilityTable.ftrSvm) { GTEST_SKIP(); } void *ptrSVM = pContext->getSVMAllocsManager()->createSVMAlloc(pDevice->getRootDeviceIndex(), 256, {}); EXPECT_NE(nullptr, ptrSVM); auto svmData = pContext->getSVMAllocsManager()->getSVMAlloc(ptrSVM); ASSERT_NE(nullptr, svmData); GraphicsAllocation *pSvmAlloc = svmData->gpuAllocation; EXPECT_NE(nullptr, pSvmAlloc); retVal = pKernel->setArgSvmAlloc( 0, ptrSVM, pSvmAlloc); ASSERT_EQ(CL_SUCCESS, retVal); auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(ptrSVM, *pKernelArg); std::vector surfaces; pKernel->getResidency(surfaces); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { delete surface; } pContext->getSVMAllocsManager()->freeSVMAlloc(ptrSVM); } TEST_F(BufferSetArgTest, getKernelArgShouldReturnBuffer) { cl_mem memObj = buffer; retVal = pKernel->setArg( 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(memObj, pKernel->getKernelArg(0)); } TEST_F(BufferSetArgTest, givenKernelArgBufferWhenAddPathInfoDataIsSetThenPatchInfoDataIsCollected) { DebugManagerStateRestore dbgRestore; DebugManager.flags.AddPatchInfoCommentsForAUBDump.set(true); cl_mem memObj = buffer; retVal = pKernel->setArg( 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(1u, pKernel->getPatchInfoDataList().size()); EXPECT_EQ(PatchInfoAllocationType::KernelArg, pKernel->getPatchInfoDataList()[0].sourceType); EXPECT_EQ(PatchInfoAllocationType::IndirectObjectHeap, pKernel->getPatchInfoDataList()[0].targetType); EXPECT_EQ(buffer->getGraphicsAllocation()->getGpuAddressToPatch(), pKernel->getPatchInfoDataList()[0].sourceAllocation); EXPECT_EQ(reinterpret_cast(pKernel->getCrossThreadData()), pKernel->getPatchInfoDataList()[0].targetAllocation); EXPECT_EQ(0u, pKernel->getPatchInfoDataList()[0].sourceAllocationOffset); } TEST_F(BufferSetArgTest, givenKernelArgBufferWhenAddPathInfoDataIsNotSetThenPatchInfoDataIsNotCollected) { cl_mem memObj = buffer; retVal = pKernel->setArg( 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, pKernel->getPatchInfoDataList().size()); } compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/buffer_tests.cpp000066400000000000000000003557221363734646600264570ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/array_count.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/vec.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/memory_operations_handler.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/helpers/ult_hw_config.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/event/user_event.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/gen_common/matchers.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_gmm_resource_info.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_timestamp_container.h" #include "test.h" #include "gmock/gmock.h" #include "gtest/gtest.h" using namespace NEO; static const unsigned int g_scTestBufferSizeInBytes = 16; TEST(Buffer, giveBufferWhenAskedForPtrOffsetForMappingThenReturnCorrectValue) { MockContext ctx; cl_int retVal; std::unique_ptr buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal)); MemObjOffsetArray offset = {{4, 5, 6}}; auto retOffset = buffer->calculateOffsetForMapping(offset); EXPECT_EQ(offset[0], retOffset); } TEST(Buffer, giveBufferCreateWithHostPtrButWithoutProperFlagsWhenCreatedThenErrorIsReturned) { MockContext ctx; cl_int retVal; auto hostPtr = reinterpret_cast(0x1774); std::unique_ptr buffer(Buffer::create(&ctx, CL_MEM_READ_WRITE, 1, hostPtr, retVal)); EXPECT_EQ(retVal, CL_INVALID_HOST_PTR); } TEST(Buffer, givenBufferWhenAskedForPtrLengthThenReturnCorrectValue) { MockContext ctx; cl_int retVal; std::unique_ptr buffer(Buffer::create(&ctx, 0, 1, nullptr, retVal)); MemObjSizeArray size = {{4, 5, 6}}; auto retOffset = buffer->calculateMappedPtrLength(size); EXPECT_EQ(size[0], retOffset); } TEST(Buffer, givenReadOnlySetOfInputFlagsWhenPassedToisReadOnlyMemoryPermittedByFlagsThenTrueIsReturned) { class MockBuffer : public Buffer { public: using Buffer::isReadOnlyMemoryPermittedByFlags; }; cl_mem_flags flags = CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY; MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0); EXPECT_TRUE(MockBuffer::isReadOnlyMemoryPermittedByFlags(memoryProperties)); flags = CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY; memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0); EXPECT_TRUE(MockBuffer::isReadOnlyMemoryPermittedByFlags(memoryProperties)); } class BufferReadOnlyTest : public testing::TestWithParam { }; TEST_P(BufferReadOnlyTest, givenNonReadOnlySetOfInputFlagsWhenPassedToisReadOnlyMemoryPermittedByFlagsThenFalseIsReturned) { class MockBuffer : public Buffer { public: using Buffer::isReadOnlyMemoryPermittedByFlags; }; cl_mem_flags flags = GetParam() | CL_MEM_USE_HOST_PTR; MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0); EXPECT_FALSE(MockBuffer::isReadOnlyMemoryPermittedByFlags(memoryProperties)); } static cl_mem_flags nonReadOnlyFlags[] = { CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY, CL_MEM_HOST_READ_ONLY, CL_MEM_HOST_WRITE_ONLY, CL_MEM_HOST_NO_ACCESS, CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY, CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY, 0}; INSTANTIATE_TEST_CASE_P( nonReadOnlyFlags, BufferReadOnlyTest, testing::ValuesIn(nonReadOnlyFlags)); TEST(Buffer, givenReadOnlyHostPtrMemoryWhenBufferIsCreatedWithReadOnlyFlagsThenBufferHasAllocatedNewMemoryStorageAndBufferIsNotZeroCopy) { void *memory = alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize); ASSERT_NE(nullptr, memory); memset(memory, 0xAA, MemoryConstants::pageSize); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); ::testing::NiceMock *memoryManager = new ::testing::NiceMock(*device->getExecutionEnvironment()); device->injectMemoryManager(memoryManager); MockContext ctx(device.get()); // First fail simulates error for read only memory allocation EXPECT_CALL(*memoryManager, allocateGraphicsMemoryInDevicePool(::testing::_, ::testing::_)) .WillOnce(::testing::Return(nullptr)) .WillRepeatedly(::testing::Invoke(memoryManager, &GMockMemoryManagerFailFirstAllocation::baseAllocateGraphicsMemoryInDevicePool)); cl_int retVal; cl_mem_flags flags = CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR; std::unique_ptr buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, (void *)memory, retVal)); EXPECT_FALSE(buffer->isMemObjZeroCopy()); void *memoryStorage = buffer->getCpuAddressForMemoryTransfer(); EXPECT_NE((void *)memory, memoryStorage); EXPECT_THAT(buffer->getCpuAddressForMemoryTransfer(), MemCompare(memory, MemoryConstants::pageSize)); alignedFree(memory); } TEST(Buffer, givenReadOnlyHostPtrMemoryWhenBufferIsCreatedWithReadOnlyFlagsAndSecondAllocationFailsThenNullptrIsReturned) { void *memory = alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize); ASSERT_NE(nullptr, memory); memset(memory, 0xAA, MemoryConstants::pageSize); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); ::testing::NiceMock *memoryManager = new ::testing::NiceMock(*device->getExecutionEnvironment()); device->injectMemoryManager(memoryManager); MockContext ctx(device.get()); // First fail simulates error for read only memory allocation // Second fail returns nullptr EXPECT_CALL(*memoryManager, allocateGraphicsMemoryInDevicePool(::testing::_, ::testing::_)) .WillRepeatedly(::testing::Return(nullptr)); cl_int retVal; cl_mem_flags flags = CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR; std::unique_ptr buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, (void *)memory, retVal)); EXPECT_EQ(nullptr, buffer.get()); alignedFree(memory); } TEST(Buffer, givenReadOnlyHostPtrMemoryWhenBufferIsCreatedWithKernelWriteFlagThenBufferAllocationFailsAndReturnsNullptr) { void *memory = alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize); ASSERT_NE(nullptr, memory); memset(memory, 0xAA, MemoryConstants::pageSize); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); ::testing::NiceMock *memoryManager = new ::testing::NiceMock(*device->getExecutionEnvironment()); device->injectMemoryManager(memoryManager); MockContext ctx(device.get()); // First fail simulates error for read only memory allocation EXPECT_CALL(*memoryManager, allocateGraphicsMemoryInDevicePool(::testing::_, ::testing::_)) .WillOnce(::testing::Return(nullptr)); cl_int retVal; cl_mem_flags flags = CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR; std::unique_ptr buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, (void *)memory, retVal)); EXPECT_EQ(nullptr, buffer.get()); alignedFree(memory); } TEST(Buffer, givenNullPtrWhenBufferIsCreatedWithKernelReadOnlyFlagsThenBufferAllocationFailsAndReturnsNullptr) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); ::testing::NiceMock *memoryManager = new ::testing::NiceMock(*device->getExecutionEnvironment()); device->injectMemoryManager(memoryManager); MockContext ctx(device.get()); // First fail simulates error for read only memory allocation EXPECT_CALL(*memoryManager, allocateGraphicsMemoryInDevicePool(::testing::_, ::testing::_)) .WillOnce(::testing::Return(nullptr)); cl_int retVal; cl_mem_flags flags = CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY; std::unique_ptr buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, nullptr, retVal)); EXPECT_EQ(nullptr, buffer.get()); } TEST(Buffer, givenNullptrPassedToBufferCreateWhenAllocationIsNotSystemMemoryPoolThenBufferIsNotZeroCopy) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); ::testing::NiceMock *memoryManager = new ::testing::NiceMock(*device->getExecutionEnvironment()); device->injectMemoryManager(memoryManager); MockContext ctx(device.get()); EXPECT_CALL(*memoryManager, allocateGraphicsMemoryInDevicePool(::testing::_, ::testing::_)) .WillOnce(::testing::Invoke(memoryManager, &GMockMemoryManagerFailFirstAllocation::allocateNonSystemGraphicsMemoryInDevicePool)); cl_int retVal = 0; cl_mem_flags flags = CL_MEM_READ_WRITE; std::unique_ptr buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); EXPECT_FALSE(buffer->isMemObjZeroCopy()); } TEST(Buffer, givenNullptrPassedToBufferCreateWhenAllocationIsNotSystemMemoryPoolThenAllocationIsNotAddedToHostPtrManager) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); ::testing::NiceMock *memoryManager = new ::testing::NiceMock(*device->getExecutionEnvironment()); device->injectMemoryManager(memoryManager); MockContext ctx(device.get()); EXPECT_CALL(*memoryManager, allocateGraphicsMemoryInDevicePool(::testing::_, ::testing::_)) .WillOnce(::testing::Invoke(memoryManager, &GMockMemoryManagerFailFirstAllocation::allocateNonSystemGraphicsMemoryInDevicePool)); cl_int retVal = 0; cl_mem_flags flags = CL_MEM_READ_WRITE; auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); auto hostPtrAllocationCountBefore = hostPtrManager->getFragmentCount(); std::unique_ptr buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); auto hostPtrAllocationCountAfter = hostPtrManager->getFragmentCount(); EXPECT_EQ(hostPtrAllocationCountBefore, hostPtrAllocationCountAfter); } TEST(Buffer, givenNullptrPassedToBufferCreateWhenNoSharedContextOrRenderCompressedBuffersThenBuffersAllocationTypeIsBufferOrBufferHostMemory) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext ctx(device.get()); cl_int retVal = 0; cl_mem_flags flags = CL_MEM_READ_WRITE; std::unique_ptr buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); if (MemoryPool::isSystemMemoryPool(buffer->getGraphicsAllocation()->getMemoryPool())) { EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, buffer->getGraphicsAllocation()->getAllocationType()); } else { EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER, buffer->getGraphicsAllocation()->getAllocationType()); } } TEST(Buffer, givenHostPtrPassedToBufferCreateWhenMemUseHostPtrFlagisSetAndBufferIsNotZeroCopyThenCreateMapAllocationWithHostPtr) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext ctx(device.get()); cl_int retVal = 0; cl_mem_flags flags = CL_MEM_USE_HOST_PTR; auto size = MemoryConstants::pageSize; void *ptr = (void *)alignedMalloc(size * 2, MemoryConstants::pageSize); auto ptrOffset = 1; void *offsetedPtr = (void *)((uintptr_t)ptr + ptrOffset); std::unique_ptr buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, offsetedPtr, retVal)); ASSERT_NE(nullptr, buffer.get()); auto mapAllocation = buffer->getMapAllocation(); EXPECT_NE(nullptr, mapAllocation); EXPECT_EQ(offsetedPtr, mapAllocation->getUnderlyingBuffer()); EXPECT_EQ(GraphicsAllocation::AllocationType::MAP_ALLOCATION, mapAllocation->getAllocationType()); alignedFree(ptr); } TEST(Buffer, givenAlignedHostPtrPassedToBufferCreateWhenNoSharedContextOrRenderCompressedBuffersThenBuffersAllocationTypeIsBufferHostMemory) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext ctx(device.get()); cl_int retVal = 0; cl_mem_flags flags = CL_MEM_USE_HOST_PTR; void *hostPtr = reinterpret_cast(0x3000); std::unique_ptr buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, hostPtr, retVal)); ASSERT_NE(nullptr, buffer.get()); EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, buffer->getGraphicsAllocation()->getAllocationType()); } TEST(Buffer, givenAllocHostPtrFlagPassedToBufferCreateWhenNoSharedContextOrRenderCompressedBuffersThenBuffersAllocationTypeIsBufferHostMemory) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext ctx(device.get()); cl_int retVal = 0; cl_mem_flags flags = CL_MEM_ALLOC_HOST_PTR; std::unique_ptr buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, buffer->getGraphicsAllocation()->getAllocationType()); } TEST(Buffer, givenRenderCompressedBuffersEnabledWhenAllocationTypeIsQueriedThenBufferCompressedTypeIsReturnedIn64Bit) { MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(0, 0, 0); MockContext context; context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; context.isSharedContext = false; auto type = MockPublicAccessBuffer::getGraphicsAllocationType(memoryProperties, context, true, false, true); EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED, type); } TEST(Buffer, givenRenderCompressedBuffersDisabledLocalMemoryEnabledWhenAllocationTypeIsQueriedThenBufferTypeIsReturnedIn64Bit) { MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(0, 0, 0); MockContext context; context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; context.isSharedContext = false; auto type = MockPublicAccessBuffer::getGraphicsAllocationType(memoryProperties, context, false, true, true); EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER, type); } TEST(Buffer, givenSharedContextWhenAllocationTypeIsQueriedThenBufferHostMemoryTypeIsReturned) { MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(0, 0, 0); MockContext context; context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; context.isSharedContext = true; auto type = MockPublicAccessBuffer::getGraphicsAllocationType(memoryProperties, context, false, false, true); EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, type); } TEST(Buffer, givenSharedContextAndRenderCompressedBuffersEnabledWhenAllocationTypeIsQueriedThenBufferHostMemoryTypeIsReturned) { MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(0, 0, 0); MockContext context; context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; context.isSharedContext = true; auto type = MockPublicAccessBuffer::getGraphicsAllocationType(memoryProperties, context, true, false, true); EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, type); } TEST(Buffer, givenUseHostPtrFlagAndLocalMemoryDisabledWhenAllocationTypeIsQueriedThenBufferHostMemoryTypeIsReturned) { cl_mem_flags flags = CL_MEM_USE_HOST_PTR; MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0); MockContext context; context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; context.isSharedContext = false; auto type = MockPublicAccessBuffer::getGraphicsAllocationType(memoryProperties, context, false, false, true); EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, type); } TEST(Buffer, givenUseHostPtrFlagAndLocalMemoryEnabledWhenAllocationTypeIsQueriedThenBufferTypeIsReturned) { cl_mem_flags flags = CL_MEM_USE_HOST_PTR; MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0); MockContext context; context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; context.isSharedContext = false; auto type = MockPublicAccessBuffer::getGraphicsAllocationType(memoryProperties, context, false, true, true); EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER, type); } TEST(Buffer, givenAllocHostPtrFlagWhenAllocationTypeIsQueriedThenBufferTypeIsReturned) { cl_mem_flags flags = CL_MEM_ALLOC_HOST_PTR; MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0); MockContext context; context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; context.isSharedContext = false; auto type = MockPublicAccessBuffer::getGraphicsAllocationType(memoryProperties, context, false, false, true); EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER, type); } TEST(Buffer, givenUseHostPtrFlagAndLocalMemoryDisabledAndRenderCompressedBuffersEnabledWhenAllocationTypeIsQueriedThenBufferMemoryTypeIsReturned) { cl_mem_flags flags = CL_MEM_USE_HOST_PTR; MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0); MockContext context; context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; context.isSharedContext = false; auto type = MockPublicAccessBuffer::getGraphicsAllocationType(memoryProperties, context, true, false, true); EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, type); } TEST(Buffer, givenUseHostPtrFlagAndLocalMemoryEnabledAndRenderCompressedBuffersEnabledWhenAllocationTypeIsQueriedThenBufferMemoryTypeIsReturned) { cl_mem_flags flags = CL_MEM_USE_HOST_PTR; MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0); MockContext context; context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; context.isSharedContext = false; auto type = MockPublicAccessBuffer::getGraphicsAllocationType(memoryProperties, context, true, true, true); EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED, type); } TEST(Buffer, givenUseHostPointerFlagAndForceSharedPhysicalStorageWhenLocalMemoryIsEnabledThenBufferHostMemoryTypeIsReturned) { cl_mem_flags flags = CL_MEM_USE_HOST_PTR | CL_MEM_FORCE_SHARED_PHYSICAL_MEMORY_INTEL; MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0); MockContext context; context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; context.isSharedContext = false; auto type = MockPublicAccessBuffer::getGraphicsAllocationType(memoryProperties, context, true, true, true); EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, type); } TEST(Buffer, givenAllocHostPtrFlagAndRenderCompressedBuffersEnabledWhenAllocationTypeIsQueriedThenBufferCompressedTypeIsReturned) { cl_mem_flags flags = CL_MEM_ALLOC_HOST_PTR; MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0); MockContext context; context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; context.isSharedContext = false; auto type = MockPublicAccessBuffer::getGraphicsAllocationType(memoryProperties, context, true, false, true); EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED, type); } TEST(Buffer, givenZeroFlagsNoSharedContextAndRenderCompressedBuffersDisabledWhenAllocationTypeIsQueriedThenBufferTypeIsReturned) { MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(0, 0, 0); MockContext context; context.contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; context.isSharedContext = false; auto type = MockPublicAccessBuffer::getGraphicsAllocationType(memoryProperties, context, false, false, true); EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER, type); } TEST(Buffer, givenClMemCopyHostPointerPassedToBufferCreateWhenAllocationIsNotInSystemMemoryPoolThenAllocationIsWrittenByEnqueueWriteBuffer) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); auto *memoryManager = new ::testing::NiceMock(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); EXPECT_CALL(*memoryManager, allocateGraphicsMemoryInDevicePool(::testing::_, ::testing::_)) .WillRepeatedly(::testing::Invoke(memoryManager, &GMockMemoryManagerFailFirstAllocation::baseAllocateGraphicsMemoryInDevicePool)); auto device = std::make_unique(MockDevice::create(executionEnvironment, 0)); MockContext ctx(device.get()); EXPECT_CALL(*memoryManager, allocateGraphicsMemoryInDevicePool(::testing::_, ::testing::_)) .WillOnce(::testing::Invoke(memoryManager, &GMockMemoryManagerFailFirstAllocation::allocateNonSystemGraphicsMemoryInDevicePool)) .WillRepeatedly(::testing::Invoke(memoryManager, &GMockMemoryManagerFailFirstAllocation::baseAllocateGraphicsMemoryInDevicePool)); cl_int retVal = 0; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR; char memory[] = {1, 2, 3, 4, 5, 6, 7, 8}; auto taskCount = device->getGpgpuCommandStreamReceiver().peekLatestFlushedTaskCount(); std::unique_ptr buffer(Buffer::create(&ctx, flags, sizeof(memory), memory, retVal)); ASSERT_NE(nullptr, buffer.get()); auto taskCountSent = device->getGpgpuCommandStreamReceiver().peekLatestFlushedTaskCount(); if (is64bit) { EXPECT_LT(taskCount, taskCountSent); } } struct RenderCompressedBuffersTests : public ::testing::Test { void SetUp() override { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1u); hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo(); device = std::make_unique(MockDevice::create(executionEnvironment, 0u)); context = std::make_unique(device.get(), true); context->contextType = ContextType::CONTEXT_TYPE_UNRESTRICTIVE; } cl_int retVal = CL_SUCCESS; HardwareInfo *hwInfo = nullptr; std::unique_ptr device; std::unique_ptr context; std::unique_ptr buffer; uint32_t hostPtr[2048]; size_t bufferSize = sizeof(hostPtr); }; TEST_F(RenderCompressedBuffersTests, givenBufferCompressedAllocationAndZeroCopyHostPtrWhenCheckingMemoryPropertiesThenUseHostPtrAndDontAllocateStorage) { hwInfo->capabilityTable.ftrRenderCompressedBuffers = false; void *cacheAlignedHostPtr = alignedMalloc(MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); buffer.reset(Buffer::create(context.get(), CL_MEM_FORCE_SHARED_PHYSICAL_MEMORY_INTEL | CL_MEM_USE_HOST_PTR, MemoryConstants::cacheLineSize, cacheAlignedHostPtr, retVal)); EXPECT_EQ(cacheAlignedHostPtr, buffer->getGraphicsAllocation()->getUnderlyingBuffer()); EXPECT_TRUE(buffer->isMemObjZeroCopy()); EXPECT_EQ(buffer->getGraphicsAllocation()->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY); uint32_t pattern[2] = {0, 0}; pattern[0] = 0xdeadbeef; pattern[1] = 0xdeadbeef; static_assert(sizeof(pattern) <= MemoryConstants::cacheLineSize, "Incorrect pattern size"); uint32_t *dest = reinterpret_cast(cacheAlignedHostPtr); for (size_t i = 0; i < arrayCount(pattern); i++) { dest[i] = pattern[i]; } hwInfo->capabilityTable.ftrRenderCompressedBuffers = true; buffer.reset(Buffer::create(context.get(), CL_MEM_FORCE_SHARED_PHYSICAL_MEMORY_INTEL | CL_MEM_USE_HOST_PTR, MemoryConstants::cacheLineSize, cacheAlignedHostPtr, retVal)); EXPECT_EQ(cacheAlignedHostPtr, buffer->getGraphicsAllocation()->getUnderlyingBuffer()); EXPECT_TRUE(buffer->isMemObjZeroCopy()); EXPECT_EQ(buffer->getGraphicsAllocation()->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY); EXPECT_THAT(buffer->getGraphicsAllocation()->getUnderlyingBuffer(), MemCompare(&pattern[0], sizeof(pattern))); alignedFree(cacheAlignedHostPtr); } TEST_F(RenderCompressedBuffersTests, givenAllocationCreatedWithForceSharedPhysicalMemoryWhenItIsCreatedItIsZeroCopy) { buffer.reset(Buffer::create(context.get(), CL_MEM_FORCE_SHARED_PHYSICAL_MEMORY_INTEL, 1u, nullptr, retVal)); EXPECT_EQ(buffer->getGraphicsAllocation()->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY); EXPECT_TRUE(buffer->isMemObjZeroCopy()); EXPECT_EQ(1u, buffer->getSize()); } TEST_F(RenderCompressedBuffersTests, givenRenderCompressedBuffersAndAllocationCreatedWithForceSharedPhysicalMemoryWhenItIsCreatedItIsZeroCopy) { hwInfo->capabilityTable.ftrRenderCompressedBuffers = true; buffer.reset(Buffer::create(context.get(), CL_MEM_FORCE_SHARED_PHYSICAL_MEMORY_INTEL, 1u, nullptr, retVal)); EXPECT_EQ(buffer->getGraphicsAllocation()->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY); EXPECT_TRUE(buffer->isMemObjZeroCopy()); EXPECT_EQ(1u, buffer->getSize()); } TEST_F(RenderCompressedBuffersTests, givenBufferNotCompressedAllocationAndNoHostPtrWhenCheckingMemoryPropertiesThenForceDisableZeroCopy) { hwInfo->capabilityTable.ftrRenderCompressedBuffers = false; buffer.reset(Buffer::create(context.get(), 0, bufferSize, nullptr, retVal)); EXPECT_TRUE(buffer->isMemObjZeroCopy()); if (MemoryPool::isSystemMemoryPool(buffer->getGraphicsAllocation()->getMemoryPool())) { EXPECT_EQ(buffer->getGraphicsAllocation()->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY); } else { EXPECT_EQ(buffer->getGraphicsAllocation()->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER); } hwInfo->capabilityTable.ftrRenderCompressedBuffers = true; buffer.reset(Buffer::create(context.get(), 0, bufferSize, nullptr, retVal)); if (HwHelper::get(context->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily).obtainRenderBufferCompressionPreference(context->getDevice(0)->getHardwareInfo(), bufferSize)) { EXPECT_FALSE(buffer->isMemObjZeroCopy()); EXPECT_EQ(buffer->getGraphicsAllocation()->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); } else { EXPECT_TRUE(buffer->isMemObjZeroCopy()); EXPECT_EQ(buffer->getGraphicsAllocation()->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY); } } TEST_F(RenderCompressedBuffersTests, givenBufferCompressedAllocationWhenSharedContextIsUsedThenForceDisableCompression) { hwInfo->capabilityTable.ftrRenderCompressedBuffers = true; context->isSharedContext = false; buffer.reset(Buffer::create(context.get(), CL_MEM_READ_WRITE, bufferSize, nullptr, retVal)); if (HwHelper::get(context->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily).obtainRenderBufferCompressionPreference(context->getDevice(0)->getHardwareInfo(), bufferSize)) { EXPECT_EQ(buffer->getGraphicsAllocation()->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); } else { EXPECT_EQ(buffer->getGraphicsAllocation()->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY); } context->isSharedContext = true; buffer.reset(Buffer::create(context.get(), CL_MEM_USE_HOST_PTR, bufferSize, hostPtr, retVal)); EXPECT_EQ(buffer->getGraphicsAllocation()->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY); } TEST_F(RenderCompressedBuffersTests, givenDebugVariableSetWhenHwFlagIsNotSetThenSelectOptionFromDebugFlag) { DebugManagerStateRestore restore; hwInfo->capabilityTable.ftrRenderCompressedBuffers = false; DebugManager.flags.RenderCompressedBuffersEnabled.set(1); buffer.reset(Buffer::create(context.get(), 0, bufferSize, nullptr, retVal)); if (HwHelper::get(context->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily).obtainRenderBufferCompressionPreference(context->getDevice(0)->getHardwareInfo(), bufferSize)) { EXPECT_EQ(buffer->getGraphicsAllocation()->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); } else { EXPECT_EQ(buffer->getGraphicsAllocation()->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY); } DebugManager.flags.RenderCompressedBuffersEnabled.set(0); buffer.reset(Buffer::create(context.get(), 0, bufferSize, nullptr, retVal)); EXPECT_NE(buffer->getGraphicsAllocation()->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); } struct RenderCompressedBuffersSvmTests : public RenderCompressedBuffersTests { void SetUp() override { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1u); hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo(); hwInfo->capabilityTable.gpuAddressSpace = MemoryConstants::max48BitAddress; RenderCompressedBuffersTests::SetUp(); } }; TEST_F(RenderCompressedBuffersSvmTests, givenSvmAllocationWhenCreatingBufferThenForceDisableCompression) { hwInfo->capabilityTable.ftrRenderCompressedBuffers = true; auto svmPtr = context->getSVMAllocsManager()->createSVMAlloc(device->getRootDeviceIndex(), sizeof(uint32_t), {}); auto expectedAllocationType = context->getSVMAllocsManager()->getSVMAlloc(svmPtr)->gpuAllocation->getAllocationType(); buffer.reset(Buffer::create(context.get(), CL_MEM_USE_HOST_PTR, sizeof(uint32_t), svmPtr, retVal)); EXPECT_EQ(expectedAllocationType, buffer->getGraphicsAllocation()->getAllocationType()); context->getSVMAllocsManager()->freeSVMAlloc(svmPtr); } struct RenderCompressedBuffersCopyHostMemoryTests : public RenderCompressedBuffersTests { void SetUp() override { RenderCompressedBuffersTests::SetUp(); device->injectMemoryManager(new MockMemoryManager(true, false, *platform()->peekExecutionEnvironment())); context->memoryManager = device->getMemoryManager(); mockCmdQ = new MockCommandQueue(); context->setSpecialQueue(mockCmdQ); } MockCommandQueue *mockCmdQ = nullptr; }; TEST_F(RenderCompressedBuffersCopyHostMemoryTests, givenRenderCompressedBufferWhenCopyFromHostPtrIsRequiredThenCallWriteBuffer) { if (is32bit) { return; } hwInfo->capabilityTable.ftrRenderCompressedBuffers = true; buffer.reset(Buffer::create(context.get(), CL_MEM_COPY_HOST_PTR, bufferSize, hostPtr, retVal)); if (HwHelper::get(context->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily).obtainRenderBufferCompressionPreference(context->getDevice(0)->getHardwareInfo(), bufferSize)) { EXPECT_EQ(buffer->getGraphicsAllocation()->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); EXPECT_EQ(1u, mockCmdQ->writeBufferCounter); EXPECT_TRUE(mockCmdQ->writeBufferBlocking); EXPECT_EQ(0u, mockCmdQ->writeBufferOffset); EXPECT_EQ(bufferSize, mockCmdQ->writeBufferSize); EXPECT_EQ(hostPtr, mockCmdQ->writeBufferPtr); } else { EXPECT_EQ(buffer->getGraphicsAllocation()->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY); EXPECT_EQ(0u, mockCmdQ->writeBufferCounter); EXPECT_FALSE(mockCmdQ->writeBufferBlocking); EXPECT_EQ(0u, mockCmdQ->writeBufferOffset); EXPECT_EQ(0u, mockCmdQ->writeBufferSize); EXPECT_EQ(nullptr, mockCmdQ->writeBufferPtr); } EXPECT_EQ(CL_SUCCESS, retVal); } struct BcsBufferTests : public ::testing::Test { class BcsMockContext : public MockContext { public: BcsMockContext(ClDevice *device) : MockContext(device) { bcsOsContext.reset(OsContext::create(nullptr, 0, 0, aub_stream::ENGINE_BCS, PreemptionMode::Disabled, false, false, false)); bcsCsr.reset(createCommandStream(*device->getExecutionEnvironment(), device->getRootDeviceIndex())); bcsCsr->setupContext(*bcsOsContext); bcsCsr->initializeTagAllocation(); } BlitOperationResult blitMemoryToAllocation(MemObj &memObj, GraphicsAllocation *memory, void *hostPtr, Vec3 size) const override { auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, *bcsCsr, memory, nullptr, hostPtr, memory->getGpuAddress(), 0, 0, 0, size, 0, 0, 0, 0); BlitPropertiesContainer container; container.push_back(blitProperties); bcsCsr->blitBuffer(container, true); return BlitOperationResult::Success; } std::unique_ptr bcsOsContext; std::unique_ptr bcsCsr; }; template class MyMockCsr : public UltCommandStreamReceiver { public: using UltCommandStreamReceiver::UltCommandStreamReceiver; void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override { EXPECT_EQ(this->latestFlushedTaskCount, taskCountToWait); EXPECT_EQ(0u, flushStampToWait); EXPECT_FALSE(useQuickKmdSleep); EXPECT_FALSE(forcePowerSavingMode); waitForTaskCountWithKmdNotifyFallbackCalled++; } void waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount) override { EXPECT_EQ(1u, waitForTaskCountWithKmdNotifyFallbackCalled); EXPECT_EQ(this->latestFlushedTaskCount, requiredTaskCount); waitForTaskCountAndCleanAllocationListCalled++; } uint32_t waitForTaskCountAndCleanAllocationListCalled = 0; uint32_t waitForTaskCountWithKmdNotifyFallbackCalled = 0; CommandStreamReceiver *gpgpuCsr = nullptr; }; template void SetUpT() { if (is32bit) { GTEST_SKIP(); } DebugManager.flags.EnableTimestampPacket.set(1); DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(1); device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto &capabilityTable = device->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable; bool createBcsEngine = !capabilityTable.blitterOperationsSupported; capabilityTable.blitterOperationsSupported = true; if (createBcsEngine) { auto &engine = device->getEngine(HwHelperHw::lowPriorityEngineType, true); bcsOsContext.reset(OsContext::create(nullptr, 1, 0, aub_stream::ENGINE_BCS, PreemptionMode::Disabled, false, false, false)); engine.osContext = bcsOsContext.get(); engine.commandStreamReceiver->setupContext(*bcsOsContext); } bcsMockContext = std::make_unique(device.get()); commandQueue.reset(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); } template void TearDownT() {} template void waitForCacheFlushFromBcsTest(MockCommandQueueHw &commandQueue); DebugManagerStateRestore restore; std::unique_ptr bcsOsContext; std::unique_ptr device; std::unique_ptr bcsMockContext; std::unique_ptr commandQueue; uint32_t hostPtr = 0; cl_int retVal = CL_SUCCESS; }; HWTEST_TEMPLATED_F(BcsBufferTests, givenBufferWithInitializationDataAndBcsCsrWhenCreatingThenUseBlitOperation) { auto bcsCsr = static_cast *>(bcsMockContext->bcsCsr.get()); auto newMemoryManager = new MockMemoryManager(true, true, *device->getExecutionEnvironment()); device->getExecutionEnvironment()->memoryManager.reset(newMemoryManager); bcsMockContext->memoryManager = newMemoryManager; EXPECT_EQ(0u, bcsCsr->blitBufferCalled); auto bufferForBlt = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_COPY_HOST_PTR, 2000, &hostPtr, retVal)); EXPECT_EQ(1u, bcsCsr->blitBufferCalled); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIsCalledThenUseBcsCsr) { DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(0); auto bcsCsr = static_cast *>(commandQueue->getBcsCommandStreamReceiver()); auto bufferForBlt0 = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto bufferForBlt1 = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); bufferForBlt0->forceDisallowCPUCopy = true; bufferForBlt1->forceDisallowCPUCopy = true; auto *hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 2, 1}; DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(0); hwInfo->capabilityTable.blitterOperationsSupported = false; commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(1); hwInfo->capabilityTable.blitterOperationsSupported = false; commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(0); hwInfo->capabilityTable.blitterOperationsSupported = true; commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(-1); hwInfo->capabilityTable.blitterOperationsSupported = true; commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); EXPECT_EQ(4u, bcsCsr->blitBufferCalled); DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(1); hwInfo->capabilityTable.blitterOperationsSupported = true; commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(5u, bcsCsr->blitBufferCalled); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(6u, bcsCsr->blitBufferCalled); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); EXPECT_EQ(7u, bcsCsr->blitBufferCalled); commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); EXPECT_EQ(8u, bcsCsr->blitBufferCalled); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenQueueIsBlockedThenDispatchBlitWhenUnblocked) { auto bcsCsr = static_cast *>(commandQueue->getBcsCommandStreamReceiver()); auto bufferForBlt0 = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto bufferForBlt1 = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); bufferForBlt0->forceDisallowCPUCopy = true; bufferForBlt1->forceDisallowCPUCopy = true; UserEvent userEvent(bcsMockContext.get()); cl_event waitlist = &userEvent; size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 2, 1}; commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 1, &waitlist, nullptr); commandQueue->enqueueReadBuffer(bufferForBlt1.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_FALSE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); EXPECT_EQ(0u, bcsCsr->blitBufferCalled); userEvent.setStatus(CL_COMPLETE); EXPECT_EQ(4u, bcsCsr->blitBufferCalled); commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(5u, bcsCsr->blitBufferCalled); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(6u, bcsCsr->blitBufferCalled); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); EXPECT_EQ(7u, bcsCsr->blitBufferCalled); commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_FALSE, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBuffersWhenCopyBufferCalledThenUseBcs) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); auto bufferForBlt0 = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto bufferForBlt1 = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); bufferForBlt0->forceDisallowCPUCopy = true; bufferForBlt1->forceDisallowCPUCopy = true; cmdQ->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 0, 1, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(commandQueue->getBcsCommandStreamReceiver()->getCS(0)); auto commandItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_NE(hwParser.cmdList.end(), commandItor); auto copyBltCmd = genCmdCast(*commandItor); EXPECT_EQ(bufferForBlt0->getGraphicsAllocation()->getGpuAddress(), copyBltCmd->getSourceBaseAddress()); EXPECT_EQ(bufferForBlt1->getGraphicsAllocation()->getGpuAddress(), copyBltCmd->getDestinationBaseAddress()); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockedBlitEnqueueWhenUnblockingThenMakeResidentAllTimestampPackets) { auto bcsCsr = static_cast *>(commandQueue->getBcsCommandStreamReceiver()); bcsCsr->storeMakeResidentAllocations = true; auto mockCmdQ = static_cast *>(commandQueue.get()); auto bufferForBlt = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); bufferForBlt->forceDisallowCPUCopy = true; TimestampPacketContainer previousTimestampPackets; mockCmdQ->obtainNewTimestampPacketNodes(1, previousTimestampPackets, false); auto dependencyFromPreviousEnqueue = mockCmdQ->timestampPacketContainer->peekNodes()[0]; auto event = make_releaseable(mockCmdQ, CL_COMMAND_READ_BUFFER, 0, 0); MockTimestampPacketContainer eventDependencyContainer(*bcsCsr->getTimestampPacketAllocator(), 1); auto eventDependency = eventDependencyContainer.getNode(0); event->addTimestampPacketNodes(eventDependencyContainer); auto userEvent = make_releaseable(bcsMockContext.get()); cl_event waitlist[] = {userEvent.get(), event.get()}; commandQueue->enqueueReadBuffer(bufferForBlt.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 2, waitlist, nullptr); auto outputDependency = mockCmdQ->timestampPacketContainer->peekNodes()[0]; EXPECT_NE(outputDependency, dependencyFromPreviousEnqueue); EXPECT_FALSE(bcsCsr->isMadeResident(dependencyFromPreviousEnqueue->getBaseGraphicsAllocation())); EXPECT_FALSE(bcsCsr->isMadeResident(outputDependency->getBaseGraphicsAllocation())); EXPECT_FALSE(bcsCsr->isMadeResident(eventDependency->getBaseGraphicsAllocation())); userEvent->setStatus(CL_COMPLETE); EXPECT_TRUE(bcsCsr->isMadeResident(dependencyFromPreviousEnqueue->getBaseGraphicsAllocation(), bcsCsr->taskCount)); EXPECT_TRUE(bcsCsr->isMadeResident(outputDependency->getBaseGraphicsAllocation(), bcsCsr->taskCount)); EXPECT_TRUE(bcsCsr->isMadeResident(eventDependency->getBaseGraphicsAllocation(), bcsCsr->taskCount)); } HWTEST_TEMPLATED_F(BcsBufferTests, givenMapAllocationWhenEnqueueingReadOrWriteBufferThenStoreMapAllocationInDispatchParameters) { DebugManager.flags.DisableZeroCopyForBuffers.set(true); auto mockCmdQ = static_cast *>(commandQueue.get()); uint8_t hostPtr[64] = {}; auto bufferForBlt = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_USE_HOST_PTR, 1, hostPtr, retVal)); bufferForBlt->forceDisallowCPUCopy = true; auto mapAllocation = bufferForBlt->getMapAllocation(); EXPECT_NE(nullptr, mapAllocation); mockCmdQ->kernelParams.transferAllocation = nullptr; auto mapPtr = clEnqueueMapBuffer(mockCmdQ, bufferForBlt.get(), true, 0, 0, 1, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mapAllocation, mockCmdQ->kernelParams.transferAllocation); mockCmdQ->kernelParams.transferAllocation = nullptr; retVal = clEnqueueUnmapMemObject(mockCmdQ, bufferForBlt.get(), mapPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mapAllocation, mockCmdQ->kernelParams.transferAllocation); } HWTEST_TEMPLATED_F(BcsBufferTests, givenWriteBufferEnqueueWhenProgrammingCommandStreamThenAddSemaphoreWait) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_ATOMIC = typename FamilyType::MI_ATOMIC; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); auto queueCsr = cmdQ->gpgpuEngine->commandStreamReceiver; auto initialTaskCount = queueCsr->peekTaskCount(); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; void *hostPtr = reinterpret_cast(0x12340000); cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); auto timestampPacketNode = cmdQ->timestampPacketContainer->peekNodes().at(0); HardwareParse hwParser; hwParser.parseCommands(*cmdQ->peekCommandStream()); uint32_t semaphoresCount = 0; uint32_t miAtomicsCount = 0; for (auto &cmd : hwParser.cmdList) { if (auto semaphoreCmd = genCmdCast(cmd)) { if (UnitTestHelper::isAdditionalMiSemaphoreWait(*semaphoreCmd)) { continue; } semaphoresCount++; auto dataAddress = timestampPacketNode->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); EXPECT_EQ(dataAddress, semaphoreCmd->getSemaphoreGraphicsAddress()); EXPECT_EQ(0u, miAtomicsCount); } else if (auto miAtomicCmd = genCmdCast(cmd)) { miAtomicsCount++; auto dataAddress = timestampPacketNode->getGpuAddress() + offsetof(TimestampPacketStorage, implicitDependenciesCount); EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_DECREMENT, miAtomicCmd->getAtomicOpcode()); EXPECT_EQ(dataAddress, UnitTestHelper::getMemoryAddress(*miAtomicCmd)); EXPECT_EQ(1u, semaphoresCount); } } EXPECT_EQ(1u, semaphoresCount); EXPECT_EQ(1u, miAtomicsCount); EXPECT_EQ(initialTaskCount + 1, queueCsr->peekTaskCount()); } HWTEST_TEMPLATED_F(BcsBufferTests, givenReadBufferEnqueueWhenProgrammingCommandStreamThenAddSemaphoreWait) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_ATOMIC = typename FamilyType::MI_ATOMIC; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); auto queueCsr = cmdQ->gpgpuEngine->commandStreamReceiver; auto initialTaskCount = queueCsr->peekTaskCount(); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; void *hostPtr = reinterpret_cast(0x12340000); cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); auto timestampPacketNode = cmdQ->timestampPacketContainer->peekNodes().at(0); HardwareParse hwParser; hwParser.parseCommands(*cmdQ->peekCommandStream()); uint32_t semaphoresCount = 0; uint32_t miAtomicsCount = 0; for (auto &cmd : hwParser.cmdList) { if (auto semaphoreCmd = genCmdCast(cmd)) { if (UnitTestHelper::isAdditionalMiSemaphoreWait(*semaphoreCmd)) { continue; } semaphoresCount++; auto dataAddress = timestampPacketNode->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); EXPECT_EQ(dataAddress, semaphoreCmd->getSemaphoreGraphicsAddress()); EXPECT_EQ(0u, miAtomicsCount); } else if (auto miAtomicCmd = genCmdCast(cmd)) { miAtomicsCount++; auto dataAddress = timestampPacketNode->getGpuAddress() + offsetof(TimestampPacketStorage, implicitDependenciesCount); EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_DECREMENT, miAtomicCmd->getAtomicOpcode()); EXPECT_EQ(dataAddress, UnitTestHelper::getMemoryAddress(*miAtomicCmd)); EXPECT_EQ(1u, semaphoresCount); } } EXPECT_EQ(1u, semaphoresCount); EXPECT_EQ(1u, miAtomicsCount); EXPECT_EQ(initialTaskCount + 1, queueCsr->peekTaskCount()); } template void BcsBufferTests::waitForCacheFlushFromBcsTest(MockCommandQueueHw &commandQueue) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_ATOMIC = typename FamilyType::MI_ATOMIC; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; bool isCacheFlushForBcsRequired = commandQueue.isCacheFlushForBcsRequired(); auto bcsCsr = static_cast *>(commandQueue.getBcsCommandStreamReceiver()); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; void *hostPtr = reinterpret_cast(0x12340000); commandQueue.enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); HardwareParse hwParserGpGpu; HardwareParse hwParserBcs; hwParserGpGpu.parseCommands(*commandQueue.peekCommandStream()); hwParserBcs.parseCommands(bcsCsr->commandStream); auto gpgpuPipeControls = findAll(hwParserGpGpu.cmdList.begin(), hwParserGpGpu.cmdList.end()); uint64_t cacheFlushWriteAddress = 0; for (auto &pipeControl : gpgpuPipeControls) { auto pipeControlCmd = genCmdCast(*pipeControl); uint64_t addressHigh = static_cast(pipeControlCmd->getAddressHigh()) << 32; uint64_t addressLow = pipeControlCmd->getAddress(); cacheFlushWriteAddress = addressHigh | addressLow; if (cacheFlushWriteAddress != 0) { EXPECT_TRUE(pipeControlCmd->getDcFlushEnable()); EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable()); EXPECT_EQ(isCacheFlushForBcsRequired, 0u == pipeControlCmd->getImmediateData()); break; } } auto bcsSemaphores = findAll(hwParserBcs.cmdList.begin(), hwParserBcs.cmdList.end()); size_t additionalSemaphores = UnitTestHelper::isSynchronizationWArequired(device->getHardwareInfo()) ? 2 : 0; if (isCacheFlushForBcsRequired) { EXPECT_NE(0u, cacheFlushWriteAddress); EXPECT_EQ(1u + additionalSemaphores, bcsSemaphores.size()); auto semaphoreCmd = genCmdCast(*bcsSemaphores[0]); EXPECT_EQ(cacheFlushWriteAddress, semaphoreCmd->getSemaphoreGraphicsAddress()); } else { EXPECT_EQ(additionalSemaphores, bcsSemaphores.size()); } } HWTEST_TEMPLATED_F(BcsBufferTests, givenCommandQueueWithCacheFlushRequirementWhenProgrammingCmdBufferThenWaitForCacheFlushFromBcs) { auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cmdQ->overrideIsCacheFlushForBcsRequired.enabled = true; cmdQ->overrideIsCacheFlushForBcsRequired.returnValue = true; waitForCacheFlushFromBcsTest(*cmdQ); } HWTEST_TEMPLATED_F(BcsBufferTests, givenCommandQueueWithoutCacheFlushRequirementWhenProgrammingCmdBufferThenWaitForCacheFlushFromBcs) { auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cmdQ->overrideIsCacheFlushForBcsRequired.enabled = true; cmdQ->overrideIsCacheFlushForBcsRequired.returnValue = false; waitForCacheFlushFromBcsTest(*cmdQ); } HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlitEnqueueThenWaitPipeControlOnBcsEngine) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); auto bcsCsr = static_cast *>(cmdQ->getBcsCommandStreamReceiver()); auto queueCsr = static_cast *>(cmdQ->gpgpuEngine->commandStreamReceiver); queueCsr->stallingPipeControlOnNextFlushRequired = true; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; void *hostPtr = reinterpret_cast(0x12340000); cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); HardwareParse hwParser; hwParser.parseCommands(queueCsr->commandStream); uint64_t pipeControlWriteAddress = 0; for (auto &cmd : hwParser.cmdList) { if (auto pipeControlCmd = genCmdCast(cmd)) { if (pipeControlCmd->getPostSyncOperation() != PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { continue; } EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable()); auto addressLow = static_cast(pipeControlCmd->getAddress()); auto addressHigh = static_cast(pipeControlCmd->getAddressHigh()); pipeControlWriteAddress = (addressHigh << 32) | addressLow; break; } } EXPECT_NE(0u, pipeControlWriteAddress); HardwareParse bcsHwParser; bcsHwParser.parseCommands(bcsCsr->commandStream); auto semaphores = findAll(bcsHwParser.cmdList.begin(), bcsHwParser.cmdList.end()); if (cmdQ->isCacheFlushForBcsRequired()) { EXPECT_EQ(UnitTestHelper::isSynchronizationWArequired(device->getHardwareInfo()) ? 4u : 2u, semaphores.size()); EXPECT_EQ(pipeControlWriteAddress, genCmdCast(*(semaphores[1]))->getSemaphoreGraphicsAddress()); } else { EXPECT_EQ(UnitTestHelper::isSynchronizationWArequired(device->getHardwareInfo()) ? 3u : 1u, semaphores.size()); EXPECT_EQ(pipeControlWriteAddress, genCmdCast(*(semaphores[0]))->getSemaphoreGraphicsAddress()); } } HWTEST_TEMPLATED_F(BcsBufferTests, givenBarrierWhenReleasingMultipleBlockedEnqueuesThenProgramBarrierOnce) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; void *hostPtr = reinterpret_cast(0x12340000); UserEvent userEvent0, userEvent1; cl_event waitlist0[] = {&userEvent0}; cl_event waitlist1[] = {&userEvent1}; cmdQ->enqueueBarrierWithWaitList(0, nullptr, nullptr); cmdQ->enqueueWriteBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 1, waitlist0, nullptr); cmdQ->enqueueWriteBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 1, waitlist1, nullptr); auto pipeControlLookup = [](LinearStream &stream, size_t offset) { HardwareParse hwParser; hwParser.parseCommands(stream, offset); bool stallingPipeControlFound = false; for (auto &cmd : hwParser.cmdList) { if (auto pipeControlCmd = genCmdCast(cmd)) { if (pipeControlCmd->getPostSyncOperation() != PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { continue; } stallingPipeControlFound = true; EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable()); EXPECT_TRUE(pipeControlCmd->getDcFlushEnable()); break; } } return stallingPipeControlFound; }; auto &csrStream = cmdQ->getGpgpuCommandStreamReceiver().getCS(0); EXPECT_TRUE(cmdQ->getGpgpuCommandStreamReceiver().isStallingPipeControlOnNextFlushRequired()); userEvent0.setStatus(CL_COMPLETE); EXPECT_FALSE(cmdQ->getGpgpuCommandStreamReceiver().isStallingPipeControlOnNextFlushRequired()); EXPECT_TRUE(pipeControlLookup(csrStream, 0)); auto csrOffset = csrStream.getUsed(); userEvent1.setStatus(CL_COMPLETE); EXPECT_FALSE(pipeControlLookup(csrStream, csrOffset)); cmdQ->isQueueBlocked(); } HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlockedBlitEnqueueThenWaitPipeControlOnBcsEngine) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); auto bcsCsr = static_cast *>(cmdQ->getBcsCommandStreamReceiver()); auto queueCsr = static_cast *>(cmdQ->gpgpuEngine->commandStreamReceiver); queueCsr->stallingPipeControlOnNextFlushRequired = true; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; void *hostPtr = reinterpret_cast(0x12340000); UserEvent userEvent; cl_event waitlist = &userEvent; cmdQ->enqueueWriteBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 1, &waitlist, nullptr); userEvent.setStatus(CL_COMPLETE); HardwareParse bcsHwParser; bcsHwParser.parseCommands(bcsCsr->commandStream); auto semaphores = findAll(bcsHwParser.cmdList.begin(), bcsHwParser.cmdList.end()); if (cmdQ->isCacheFlushForBcsRequired()) { EXPECT_EQ(UnitTestHelper::isSynchronizationWArequired(device->getHardwareInfo()) ? 4u : 2u, semaphores.size()); } else { EXPECT_EQ(UnitTestHelper::isSynchronizationWArequired(device->getHardwareInfo()) ? 3u : 1u, semaphores.size()); } cmdQ->isQueueBlocked(); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBufferOperationWithoutKernelWhenEstimatingCommandsSizeThenReturnCorrectValue) { auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); CsrDependencies csrDependencies; MultiDispatchInfo multiDispatchInfo; auto &hwInfo = cmdQ->getDevice().getHardwareInfo(); auto readBufferCmdsSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_READ_BUFFER, csrDependencies, false, false, true, *cmdQ, multiDispatchInfo); auto writeBufferCmdsSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_WRITE_BUFFER, csrDependencies, false, false, true, *cmdQ, multiDispatchInfo); auto copyBufferCmdsSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_COPY_BUFFER, csrDependencies, false, false, true, *cmdQ, multiDispatchInfo); auto expectedSize = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue(); if (cmdQ->isCacheFlushForBcsRequired()) { expectedSize += MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); } EXPECT_EQ(expectedSize, readBufferCmdsSize); EXPECT_EQ(expectedSize, writeBufferCmdsSize); EXPECT_EQ(expectedSize, copyBufferCmdsSize); } HWTEST_TEMPLATED_F(BcsBufferTests, givenOutputTimestampPacketWhenBlitCalledThenProgramMiFlushDwWithDataWrite) { using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; auto csr = static_cast *>(commandQueue->getBcsCommandStreamReceiver()); auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; void *hostPtr = reinterpret_cast(0x12340000); cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); auto outputTimestampPacket = cmdQ->timestampPacketContainer->peekNodes().at(0); auto timestampPacketGpuWriteAddress = outputTimestampPacket->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); HardwareParse hwParser; hwParser.parseCommands(csr->commandStream); uint32_t miFlushDwCmdsCount = 0; bool blitCmdFound = false; for (auto &cmd : hwParser.cmdList) { if (auto miFlushDwCmd = genCmdCast(cmd)) { EXPECT_TRUE(blitCmdFound); if (UnitTestHelper::additionalMiFlushDwRequired) { miFlushDwCmd++; if (miFlushDwCmdsCount % 2 == 0) { EXPECT_EQ(miFlushDwCmdsCount == 0, timestampPacketGpuWriteAddress == miFlushDwCmd->getDestinationAddress()); EXPECT_EQ(miFlushDwCmdsCount == 0, 0u == miFlushDwCmd->getImmediateData()); } } else { EXPECT_EQ(miFlushDwCmdsCount == 0, timestampPacketGpuWriteAddress == miFlushDwCmd->getDestinationAddress()); EXPECT_EQ(miFlushDwCmdsCount == 0, 0u == miFlushDwCmd->getImmediateData()); } miFlushDwCmdsCount++; } else if (genCmdCast(cmd)) { blitCmdFound = true; EXPECT_EQ(0u, miFlushDwCmdsCount); } } if (UnitTestHelper::additionalMiFlushDwRequired) { EXPECT_EQ(4u, miFlushDwCmdsCount); } else { EXPECT_EQ(2u, miFlushDwCmdsCount); } EXPECT_TRUE(blitCmdFound); } HWTEST_TEMPLATED_F(BcsBufferTests, givenInputAndOutputTimestampPacketWhenBlitCalledThenMakeThemResident) { auto bcsCsr = static_cast *>(commandQueue->getBcsCommandStreamReceiver()); auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cl_int retVal = CL_SUCCESS; auto &cmdQueueCsr = static_cast &>(cmdQ->getGpgpuCommandStreamReceiver()); auto memoryManager = cmdQueueCsr.getMemoryManager(); cmdQueueCsr.timestampPacketAllocator = std::make_unique>(device->getRootDeviceIndex(), memoryManager, 1, MemoryConstants::cacheLineSize, sizeof(TimestampPacketStorage), false); auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; void *hostPtr = reinterpret_cast(0x12340000); // first enqueue to create IOQ dependency cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); auto inputTimestampPacketAllocation = cmdQ->timestampPacketContainer->peekNodes().at(0)->getBaseGraphicsAllocation(); cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); auto outputTimestampPacketAllocation = cmdQ->timestampPacketContainer->peekNodes().at(0)->getBaseGraphicsAllocation(); EXPECT_NE(outputTimestampPacketAllocation, inputTimestampPacketAllocation); EXPECT_EQ(cmdQ->taskCount, inputTimestampPacketAllocation->getTaskCount(bcsCsr->getOsContext().getContextId())); EXPECT_EQ(cmdQ->taskCount, outputTimestampPacketAllocation->getTaskCount(bcsCsr->getOsContext().getContextId())); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferWhenUsingBcsThenCallWait) { auto myMockCsr = new MyMockCsr(*device->getExecutionEnvironment(), device->getRootDeviceIndex()); myMockCsr->taskCount = 1234; myMockCsr->initializeTagAllocation(); myMockCsr->setupContext(*bcsMockContext->bcsOsContext); bcsMockContext->bcsCsr.reset(myMockCsr); EngineControl bcsEngineControl = {myMockCsr, bcsMockContext->bcsOsContext.get()}; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cmdQ->bcsEngine = &bcsEngineControl; auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; void *hostPtr = reinterpret_cast(0x12340000); cmdQ->enqueueWriteBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); bool tempAllocationFound = false; auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead(); while (tempAllocation) { if (tempAllocation->getUnderlyingBuffer() == hostPtr) { tempAllocationFound = true; break; } tempAllocation = tempAllocation->next; } EXPECT_TRUE(tempAllocationFound); cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferRectWhenUsingBcsThenCallWait) { auto myMockCsr = new MyMockCsr(*device->getExecutionEnvironment(), device->getRootDeviceIndex()); myMockCsr->taskCount = 1234; myMockCsr->initializeTagAllocation(); myMockCsr->setupContext(*bcsMockContext->bcsOsContext); bcsMockContext->bcsCsr.reset(myMockCsr); EngineControl bcsEngineControl = {myMockCsr, bcsMockContext->bcsOsContext.get()}; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cmdQ->bcsEngine = &bcsEngineControl; auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; void *hostPtr = reinterpret_cast(0x12340000); size_t bufferOrigin[] = {0, 0, 0}; size_t hostOrigin[] = {0, 0, 0}; size_t region[] = {1, 2, 1}; cmdQ->enqueueReadBufferRect(buffer.get(), false, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr); EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); bool tempAllocationFound = false; auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead(); while (tempAllocation) { if (tempAllocation->getUnderlyingBuffer() == hostPtr) { tempAllocationFound = true; break; } tempAllocation = tempAllocation->next; } EXPECT_TRUE(tempAllocationFound); cmdQ->enqueueReadBufferRect(buffer.get(), true, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr); EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferWhenUsingBcsThenCallWait) { auto myMockCsr = new MyMockCsr(*device->getExecutionEnvironment(), device->getRootDeviceIndex()); myMockCsr->taskCount = 1234; myMockCsr->initializeTagAllocation(); myMockCsr->setupContext(*bcsMockContext->bcsOsContext); bcsMockContext->bcsCsr.reset(myMockCsr); EngineControl bcsEngineControl = {myMockCsr, bcsMockContext->bcsOsContext.get()}; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cmdQ->bcsEngine = &bcsEngineControl; auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; void *hostPtr = reinterpret_cast(0x12340000); cmdQ->enqueueReadBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); bool tempAllocationFound = false; auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead(); while (tempAllocation) { if (tempAllocation->getUnderlyingBuffer() == hostPtr) { tempAllocationFound = true; break; } tempAllocation = tempAllocation->next; } EXPECT_TRUE(tempAllocationFound); cmdQ->enqueueReadBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockedEnqueueWhenUsingBcsThenWaitForValidTaskCountOnBlockingCall) { auto myMockCsr = new MyMockCsr(*device->getExecutionEnvironment(), device->getRootDeviceIndex()); myMockCsr->taskCount = 1234; myMockCsr->initializeTagAllocation(); myMockCsr->setupContext(*bcsMockContext->bcsOsContext); bcsMockContext->bcsCsr.reset(myMockCsr); EngineControl bcsEngineControl = {myMockCsr, bcsMockContext->bcsOsContext.get()}; auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cmdQ->bcsEngine = &bcsEngineControl; auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; cl_int retVal = CL_SUCCESS; auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->forceDisallowCPUCopy = true; void *hostPtr = reinterpret_cast(0x12340000); UserEvent userEvent; cl_event waitlist = &userEvent; cmdQ->enqueueWriteBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 1, &waitlist, nullptr); userEvent.setStatus(CL_COMPLETE); EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); cmdQ->finish(); EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); } TEST_F(RenderCompressedBuffersCopyHostMemoryTests, givenNonRenderCompressedBufferWhenCopyFromHostPtrIsRequiredThenDontCallWriteBuffer) { hwInfo->capabilityTable.ftrRenderCompressedBuffers = false; buffer.reset(Buffer::create(context.get(), CL_MEM_COPY_HOST_PTR, sizeof(uint32_t), &hostPtr, retVal)); EXPECT_NE(buffer->getGraphicsAllocation()->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, mockCmdQ->writeBufferCounter); } TEST_F(RenderCompressedBuffersCopyHostMemoryTests, givenRenderCompressedBufferWhenWriteBufferFailsThenReturnErrorCode) { if (is32bit || !HwHelper::get(context->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily).obtainRenderBufferCompressionPreference(context->getDevice(0)->getHardwareInfo(), bufferSize)) { return; } hwInfo->capabilityTable.ftrRenderCompressedBuffers = true; mockCmdQ->writeBufferRetValue = CL_INVALID_VALUE; buffer.reset(Buffer::create(context.get(), CL_MEM_COPY_HOST_PTR, bufferSize, hostPtr, retVal)); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); EXPECT_EQ(nullptr, buffer.get()); } class BufferTest : public DeviceFixture, public testing::TestWithParam { public: BufferTest() { } protected: void SetUp() override { flags = GetParam(); DeviceFixture::SetUp(); context.reset(new MockContext(pClDevice)); } void TearDown() override { context.reset(); DeviceFixture::TearDown(); } cl_int retVal = CL_SUCCESS; std::unique_ptr context; MemoryManager *contextMemoryManager; cl_mem_flags flags = 0; unsigned char pHostPtr[g_scTestBufferSizeInBytes]; }; typedef BufferTest NoHostPtr; TEST_P(NoHostPtr, ValidFlags) { auto buffer = Buffer::create( context.get(), flags, g_scTestBufferSizeInBytes, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, buffer); auto address = buffer->getCpuAddress(); EXPECT_NE(nullptr, address); delete buffer; } TEST_P(NoHostPtr, GivenNoHostPtrWhenHwBufferCreationFailsThenReturnNullptr) { BufferFuncs BufferFuncsBackup[IGFX_MAX_CORE]; for (uint32_t i = 0; i < IGFX_MAX_CORE; i++) { BufferFuncsBackup[i] = bufferFactory[i]; bufferFactory[i].createBufferFunction = [](Context *, MemoryPropertiesFlags, cl_mem_flags, cl_mem_flags_intel, size_t, void *, void *, GraphicsAllocation *, bool, bool, bool) -> NEO::Buffer * { return nullptr; }; } auto buffer = Buffer::create( context.get(), flags, g_scTestBufferSizeInBytes, nullptr, retVal); EXPECT_EQ(nullptr, buffer); for (uint32_t i = 0; i < IGFX_MAX_CORE; i++) { bufferFactory[i] = BufferFuncsBackup[i]; } } TEST_P(NoHostPtr, WithUseHostPtr_returnsError) { auto buffer = Buffer::create( context.get(), flags | CL_MEM_USE_HOST_PTR, g_scTestBufferSizeInBytes, nullptr, retVal); EXPECT_EQ(CL_INVALID_HOST_PTR, retVal); EXPECT_EQ(nullptr, buffer); delete buffer; } TEST_P(NoHostPtr, WithCopyHostPtr_returnsError) { auto buffer = Buffer::create( context.get(), flags | CL_MEM_COPY_HOST_PTR, g_scTestBufferSizeInBytes, nullptr, retVal); EXPECT_EQ(CL_INVALID_HOST_PTR, retVal); EXPECT_EQ(nullptr, buffer); delete buffer; } TEST_P(NoHostPtr, withBufferGraphicsAllocationReportsBufferType) { auto buffer = Buffer::create( context.get(), flags, g_scTestBufferSizeInBytes, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, buffer); auto allocation = buffer->getGraphicsAllocation(); if (MemoryPool::isSystemMemoryPool(allocation->getMemoryPool())) { EXPECT_EQ(allocation->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY); } else { EXPECT_EQ(allocation->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER); } auto isBufferWritable = !(flags & (CL_MEM_READ_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)); EXPECT_EQ(isBufferWritable, allocation->isMemObjectsAllocationWithWritableFlags()); delete buffer; } // Parameterized test that tests buffer creation with all flags // that should be valid with a nullptr host ptr cl_mem_flags NoHostPtrFlags[] = { CL_MEM_READ_WRITE, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY, CL_MEM_HOST_READ_ONLY, CL_MEM_HOST_WRITE_ONLY, CL_MEM_HOST_NO_ACCESS}; INSTANTIATE_TEST_CASE_P( BufferTest_Create, NoHostPtr, testing::ValuesIn(NoHostPtrFlags)); struct ValidHostPtr : public BufferTest, public MemoryManagementFixture { typedef BufferTest BaseClass; using BufferTest::SetUp; using MemoryManagementFixture::SetUp; ValidHostPtr() { } void SetUp() override { MemoryManagementFixture::SetUp(); BaseClass::SetUp(); ASSERT_NE(nullptr, pDevice); } void TearDown() override { delete buffer; BaseClass::TearDown(); platformsImpl.clear(); MemoryManagementFixture::TearDown(); } Buffer *createBuffer() { return Buffer::create( context.get(), flags, g_scTestBufferSizeInBytes, pHostPtr, retVal); } cl_int retVal = CL_INVALID_VALUE; Buffer *buffer = nullptr; }; TEST_P(ValidHostPtr, isResident_defaultsToFalseAfterCreate) { buffer = createBuffer(); ASSERT_NE(nullptr, buffer); EXPECT_FALSE(buffer->getGraphicsAllocation()->isResident(pDevice->getDefaultEngine().osContext->getContextId())); } TEST_P(ValidHostPtr, getAddress) { buffer = createBuffer(); ASSERT_NE(nullptr, buffer); auto address = buffer->getCpuAddress(); EXPECT_NE(nullptr, address); if (flags & CL_MEM_USE_HOST_PTR && buffer->isMemObjZeroCopy()) { // Buffer should use host ptr EXPECT_EQ(pHostPtr, address); EXPECT_EQ(pHostPtr, buffer->getHostPtr()); } else { // Buffer should have a different ptr EXPECT_NE(pHostPtr, address); } if (flags & CL_MEM_COPY_HOST_PTR) { // Buffer should contain a copy of host memory EXPECT_EQ(0, memcmp(pHostPtr, address, sizeof(g_scTestBufferSizeInBytes))); EXPECT_EQ(nullptr, buffer->getHostPtr()); } } TEST_P(ValidHostPtr, getSize) { buffer = createBuffer(); ASSERT_NE(nullptr, buffer); EXPECT_EQ(g_scTestBufferSizeInBytes, buffer->getSize()); } TEST_P(ValidHostPtr, givenValidHostPtrParentFlagsWhenSubBufferIsCreatedWithZeroFlagsThenItCreatesSuccesfuly) { auto retVal = CL_SUCCESS; auto clBuffer = clCreateBuffer(context.get(), flags, g_scTestBufferSizeInBytes, pHostPtr, &retVal); ASSERT_NE(nullptr, clBuffer); cl_buffer_region region = {0, g_scTestBufferSizeInBytes}; auto subBuffer = clCreateSubBuffer(clBuffer, 0, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(subBuffer); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(clBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_P(ValidHostPtr, givenValidHostPtrParentFlagsWhenSubBufferIsCreatedWithParentFlagsThenItIsCreatedSuccesfuly) { auto retVal = CL_SUCCESS; auto clBuffer = clCreateBuffer(context.get(), flags, g_scTestBufferSizeInBytes, pHostPtr, &retVal); ASSERT_NE(nullptr, clBuffer); cl_buffer_region region = {0, g_scTestBufferSizeInBytes}; const cl_mem_flags allValidFlags = CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS; cl_mem_flags unionFlags = flags & allValidFlags; auto subBuffer = clCreateSubBuffer(clBuffer, unionFlags, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, subBuffer); retVal = clReleaseMemObject(subBuffer); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(clBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_P(ValidHostPtr, givenValidHostPtrParentFlagsWhenSubBufferIsCreatedWithInvalidParentFlagsThenCreationFails) { auto retVal = CL_SUCCESS; cl_mem_flags invalidFlags = 0; if (flags & CL_MEM_READ_ONLY) { invalidFlags |= CL_MEM_WRITE_ONLY; } if (flags & CL_MEM_WRITE_ONLY) { invalidFlags |= CL_MEM_READ_ONLY; } if (flags & CL_MEM_HOST_NO_ACCESS) { invalidFlags |= CL_MEM_HOST_READ_ONLY; } if (flags & CL_MEM_HOST_READ_ONLY) { invalidFlags |= CL_MEM_HOST_WRITE_ONLY; } if (flags & CL_MEM_HOST_WRITE_ONLY) { invalidFlags |= CL_MEM_HOST_READ_ONLY; } if (invalidFlags == 0) { return; } auto clBuffer = clCreateBuffer(context.get(), flags, g_scTestBufferSizeInBytes, pHostPtr, &retVal); ASSERT_NE(nullptr, clBuffer); cl_buffer_region region = {0, g_scTestBufferSizeInBytes}; auto subBuffer = clCreateSubBuffer(clBuffer, invalidFlags, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &retVal); EXPECT_NE(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, subBuffer); retVal = clReleaseMemObject(clBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_P(ValidHostPtr, failedAllocationInjection) { InjectedFunction method = [this](size_t failureIndex) { delete buffer; buffer = nullptr; // System under test buffer = createBuffer(); if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); } }; injectFailures(method); } TEST_P(ValidHostPtr, SvmHostPtr) { const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { auto ptr = context->getSVMAllocsManager()->createSVMAlloc(pDevice->getRootDeviceIndex(), 64, {}); auto bufferSvm = Buffer::create(context.get(), CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, 64, ptr, retVal); EXPECT_NE(nullptr, bufferSvm); EXPECT_TRUE(bufferSvm->isMemObjWithHostPtrSVM()); auto svmData = context->getSVMAllocsManager()->getSVMAlloc(ptr); ASSERT_NE(nullptr, svmData); EXPECT_EQ(svmData->gpuAllocation, bufferSvm->getGraphicsAllocation()); EXPECT_EQ(CL_SUCCESS, retVal); context->getSVMAllocsManager()->freeSVMAlloc(ptr); delete bufferSvm; } } // Parameterized test that tests buffer creation with all flags that should be // valid with a valid host ptr cl_mem_flags ValidHostPtrFlags[] = { 0 | CL_MEM_USE_HOST_PTR, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, CL_MEM_HOST_READ_ONLY | CL_MEM_USE_HOST_PTR, CL_MEM_HOST_WRITE_ONLY | CL_MEM_USE_HOST_PTR, CL_MEM_HOST_NO_ACCESS | CL_MEM_USE_HOST_PTR, 0 | CL_MEM_COPY_HOST_PTR, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, CL_MEM_HOST_READ_ONLY | CL_MEM_COPY_HOST_PTR, CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, CL_MEM_HOST_NO_ACCESS | CL_MEM_COPY_HOST_PTR}; INSTANTIATE_TEST_CASE_P( BufferTest_Create, ValidHostPtr, testing::ValuesIn(ValidHostPtrFlags)); class BufferCalculateHostPtrSize : public testing::TestWithParam> { public: BufferCalculateHostPtrSize(){}; protected: void SetUp() override { std::tie(origin[0], origin[1], origin[2], region[0], region[1], region[2], rowPitch, slicePitch, hostPtrSize) = GetParam(); } void TearDown() override { } size_t origin[3]; size_t region[3]; size_t rowPitch; size_t slicePitch; size_t hostPtrSize; }; /* origin, region, rowPitch, slicePitch, hostPtrSize*/ static std::tuple Inputs[] = {std::make_tuple(0, 0, 0, 1, 1, 1, 10, 1, 1), std::make_tuple(0, 0, 0, 7, 1, 1, 10, 1, 7), std::make_tuple(0, 0, 0, 7, 3, 1, 10, 1, 27), std::make_tuple(0, 0, 0, 7, 1, 3, 10, 10, 27), std::make_tuple(0, 0, 0, 7, 2, 3, 10, 20, 57), std::make_tuple(0, 0, 0, 7, 1, 3, 10, 30, 67), std::make_tuple(0, 0, 0, 7, 2, 3, 10, 30, 77), std::make_tuple(9, 0, 0, 1, 1, 1, 10, 1, 10), std::make_tuple(0, 2, 0, 7, 3, 1, 10, 1, 27 + 20), std::make_tuple(0, 0, 1, 7, 1, 3, 10, 10, 27 + 10), std::make_tuple(0, 2, 1, 7, 2, 3, 10, 20, 57 + 40), std::make_tuple(1, 1, 1, 7, 1, 3, 10, 30, 67 + 41), std::make_tuple(2, 0, 2, 7, 2, 3, 10, 30, 77 + 62)}; TEST_P(BufferCalculateHostPtrSize, CheckReturnedSize) { size_t calculatedSize = Buffer::calculateHostPtrSize(origin, region, rowPitch, slicePitch); EXPECT_EQ(hostPtrSize, calculatedSize); } INSTANTIATE_TEST_CASE_P( BufferCalculateHostPtrSizes, BufferCalculateHostPtrSize, testing::ValuesIn(Inputs)); TEST(Buffers64on32Tests, given32BitBufferCreatedWithUseHostPtrFlagThatIsZeroCopyWhenAskedForStorageThenHostPtrIsReturned) { DebugManagerStateRestore dbgRestorer; { DebugManager.flags.Force32bitAddressing.set(true); MockContext context; auto size = MemoryConstants::pageSize; void *ptr = (void *)0x1000; auto ptrOffset = MemoryConstants::cacheLineSize; uintptr_t offsetedPtr = (uintptr_t)ptr + ptrOffset; auto retVal = CL_SUCCESS; auto buffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, size, (void *)offsetedPtr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(buffer->isMemObjZeroCopy()); EXPECT_EQ((void *)offsetedPtr, buffer->getCpuAddressForMapping()); EXPECT_EQ((void *)offsetedPtr, buffer->getCpuAddressForMemoryTransfer()); delete buffer; DebugManager.flags.Force32bitAddressing.set(false); } } TEST(Buffers64on32Tests, given32BitBufferCreatedWithAllocHostPtrFlagThatIsZeroCopyWhenAskedForStorageThenStorageIsEqualToMemoryStorage) { DebugManagerStateRestore dbgRestorer; { DebugManager.flags.Force32bitAddressing.set(true); MockContext context; auto size = MemoryConstants::pageSize; auto retVal = CL_SUCCESS; auto buffer = Buffer::create( &context, CL_MEM_ALLOC_HOST_PTR, size, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(buffer->isMemObjZeroCopy()); EXPECT_EQ(buffer->getCpuAddress(), buffer->getCpuAddressForMapping()); EXPECT_EQ(buffer->getCpuAddress(), buffer->getCpuAddressForMemoryTransfer()); delete buffer; DebugManager.flags.Force32bitAddressing.set(false); } } TEST(Buffers64on32Tests, given32BitBufferThatIsCreatedWithUseHostPtrButIsNotZeroCopyThenProperPointersAreReturned) { DebugManagerStateRestore dbgRestorer; { DebugManager.flags.Force32bitAddressing.set(true); MockContext context; auto size = MemoryConstants::pageSize; void *ptr = (void *)alignedMalloc(size * 2, MemoryConstants::pageSize); auto ptrOffset = 1; uintptr_t offsetedPtr = (uintptr_t)ptr + ptrOffset; auto retVal = CL_SUCCESS; auto buffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, size, (void *)offsetedPtr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(buffer->isMemObjZeroCopy()); EXPECT_EQ((void *)offsetedPtr, buffer->getCpuAddressForMapping()); EXPECT_EQ(buffer->getCpuAddress(), buffer->getCpuAddressForMemoryTransfer()); delete buffer; DebugManager.flags.Force32bitAddressing.set(false); alignedFree(ptr); } } TEST(SharedBuffersTest, whenBuffersIsCreatedWithSharingHandlerThenItIsSharedBuffer) { MockContext context; auto memoryManager = context.getDevice(0)->getMemoryManager(); auto handler = new SharingHandler(); auto graphicsAlloaction = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), MemoryConstants::pageSize}); auto buffer = Buffer::createSharedBuffer(&context, CL_MEM_READ_ONLY, handler, graphicsAlloaction); ASSERT_NE(nullptr, buffer); EXPECT_EQ(handler, buffer->peekSharingHandler()); buffer->release(); } TEST(ResidencyTests, whenBuffersIsCreatedWithMakeResidentFlagThenItSuccessfulyCreates) { VariableBackup backup(&ultHwConfig); ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; DebugManagerStateRestore restorer; DebugManager.flags.MakeAllBuffersResident.set(true); initPlatform(); auto device = platform()->getClDevice(0u); MockContext context(device, false); auto retValue = CL_SUCCESS; auto clBuffer = clCreateBuffer(&context, 0u, 4096u, nullptr, &retValue); ASSERT_EQ(retValue, CL_SUCCESS); clReleaseMemObject(clBuffer); } class BufferTests : public ::testing::Test { protected: void SetUp() override { device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); } void TearDown() override { } std::unique_ptr device; }; typedef BufferTests BufferSetSurfaceTests; HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryPtrAndSizeIsAlignedToCachelineThenL3CacheShouldBeOn) { auto size = MemoryConstants::pageSize; auto ptr = (void *)alignedMalloc(size * 2, MemoryConstants::pageSize); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; Buffer::setSurfaceState(device.get(), &surfaceState, size, ptr, 0, nullptr, 0, 0); auto mocs = surfaceState.getMemoryObjectControlState(); auto gmmHelper = device->getGmmHelper(); EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER), mocs); alignedFree(ptr); } HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryPtrIsUnalignedToCachelineThenL3CacheShouldBeOff) { auto size = MemoryConstants::pageSize; auto ptr = alignedMalloc(size * 2, MemoryConstants::pageSize); auto ptrOffset = 1; auto offsetedPtr = (void *)((uintptr_t)ptr + ptrOffset); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; Buffer::setSurfaceState(device.get(), &surfaceState, size, offsetedPtr, 0, nullptr, 0, 0); auto mocs = surfaceState.getMemoryObjectControlState(); auto gmmHelper = device->getGmmHelper(); EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED), mocs); alignedFree(ptr); } HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemorySizeIsUnalignedToCachelineThenL3CacheShouldBeOff) { auto size = MemoryConstants::pageSize; auto ptr = alignedMalloc(size * 2, MemoryConstants::pageSize); auto sizeOffset = 1; auto offsetedSize = size + sizeOffset; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; Buffer::setSurfaceState(device.get(), &surfaceState, offsetedSize, ptr, 0, nullptr, 0, 0); auto mocs = surfaceState.getMemoryObjectControlState(); auto gmmHelper = device->getGmmHelper(); EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED), mocs); alignedFree(ptr); } HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryIsUnalignedToCachelineButReadOnlyThenL3CacheShouldBeStillOn) { auto size = MemoryConstants::pageSize; auto ptr = alignedMalloc(size * 2, MemoryConstants::pageSize); auto sizeOffset = 1; auto offsetedSize = size + sizeOffset; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; Buffer::setSurfaceState(device.get(), &surfaceState, offsetedSize, ptr, 0, nullptr, CL_MEM_READ_ONLY, 0); auto mocs = surfaceState.getMemoryObjectControlState(); auto gmmHelper = device->getGmmHelper(); EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER), mocs); alignedFree(ptr); } HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemorySizeIsUnalignedThenSurfaceSizeShouldBeAlignedToFour) { auto size = MemoryConstants::pageSize; auto ptr = alignedMalloc(size * 2, MemoryConstants::pageSize); auto sizeOffset = 1; auto offsetedSize = size + sizeOffset; using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; Buffer::setSurfaceState(device.get(), &surfaceState, offsetedSize, ptr, 0, nullptr, 0, 0); auto width = surfaceState.getWidth(); EXPECT_EQ(alignUp(width, 4), width); alignedFree(ptr); } HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceWhenOffsetIsSpecifiedForSvmAllocationThenSetSurfaceAddressWithOffsetedPointer) { auto size = 2 * MemoryConstants::pageSize; auto ptr = alignedMalloc(size, MemoryConstants::pageSize); auto offset = 4; MockGraphicsAllocation svmAlloc(ptr, size); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; Buffer::setSurfaceState(device.get(), &surfaceState, size, ptr, offset, &svmAlloc, 0, 0); auto baseAddress = surfaceState.getSurfaceBaseAddress(); EXPECT_EQ(svmAlloc.getGpuAddress() + offset, baseAddress); alignedFree(ptr); } HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryPtrIsNotNullThenBufferSurfaceShouldBeUsed) { auto size = MemoryConstants::pageSize; auto ptr = alignedMalloc(size * 2, MemoryConstants::pageSize); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; Buffer::setSurfaceState(device.get(), &surfaceState, size, ptr, 0, nullptr, 0, 0); auto surfType = surfaceState.getSurfaceType(); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER, surfType); alignedFree(ptr); } HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryPtrIsNullThenNullSurfaceShouldBeUsed) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; Buffer::setSurfaceState(device.get(), &surfaceState, 0, nullptr, 0, nullptr, 0, 0); auto surfType = surfaceState.getSurfaceType(); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL, surfType); } HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatAddressIsForcedTo32bitWhenSetArgStatefulIsCalledThenSurfaceBaseAddressIsPopulatedWithGpuAddress) { DebugManagerStateRestore dbgRestorer; { DebugManager.flags.Force32bitAddressing.set(true); MockContext context; auto size = MemoryConstants::pageSize; auto ptr = (void *)alignedMalloc(size * 2, MemoryConstants::pageSize); auto retVal = CL_SUCCESS; auto buffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, size, ptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(is64bit ? buffer->getGraphicsAllocation()->is32BitAllocation() : true); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; buffer->setArgStateful(&surfaceState, false, false, false, false); auto surfBaseAddress = surfaceState.getSurfaceBaseAddress(); auto bufferAddress = buffer->getGraphicsAllocation()->getGpuAddress(); EXPECT_EQ(bufferAddress, surfBaseAddress); delete buffer; alignedFree(ptr); DebugManager.flags.Force32bitAddressing.set(false); } } HWTEST_F(BufferSetSurfaceTests, givenBufferWithOffsetWhenSetArgStatefulIsCalledThenSurfaceBaseAddressIsProperlyOffseted) { MockContext context; auto size = MemoryConstants::pageSize; auto ptr = (void *)alignedMalloc(size * 2, MemoryConstants::pageSize); auto retVal = CL_SUCCESS; auto buffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, size, ptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); cl_buffer_region region = {4, 8}; retVal = -1; auto subBuffer = buffer->createSubBuffer(CL_MEM_READ_WRITE, 0, ®ion, retVal); ASSERT_NE(nullptr, subBuffer); ASSERT_EQ(CL_SUCCESS, retVal); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; subBuffer->setArgStateful(&surfaceState, false, false, false, false); auto surfBaseAddress = surfaceState.getSurfaceBaseAddress(); auto bufferAddress = buffer->getGraphicsAllocation()->getGpuAddress(); EXPECT_EQ(bufferAddress + region.origin, surfBaseAddress); subBuffer->release(); delete buffer; alignedFree(ptr); DebugManager.flags.Force32bitAddressing.set(false); } HWTEST_F(BufferSetSurfaceTests, givenBufferWhenSetArgStatefulWithL3ChacheDisabledIsCalledThenL3CacheShouldBeOffAndSizeIsAlignedTo512) { MockContext context; auto size = 128; auto retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create( &context, CL_MEM_READ_WRITE, size, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; buffer->setArgStateful(&surfaceState, false, true, true, false); auto mocs = surfaceState.getMemoryObjectControlState(); auto gmmHelper = device->getGmmHelper(); EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED), mocs); EXPECT_EQ(128u, surfaceState.getWidth()); EXPECT_EQ(4u, surfaceState.getHeight()); } HWTEST_F(BufferSetSurfaceTests, givenBufferThatIsMisalignedButIsAReadOnlyArgumentWhenSurfaceStateIsSetThenL3IsOn) { MockContext context; auto size = 128; auto retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create( &context, CL_MEM_READ_WRITE, size, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; buffer->getGraphicsAllocation()->setSize(127); buffer->setArgStateful(&surfaceState, false, false, false, true); auto mocs = surfaceState.getMemoryObjectControlState(); auto gmmHelper = device->getGmmHelper(); auto expectedMocs = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto expectedMocs2 = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); EXPECT_TRUE(expectedMocs == mocs || expectedMocs2 == mocs); } HWTEST_F(BufferSetSurfaceTests, givenAlignedCacheableReadOnlyBufferThenChoseOclBufferPolicy) { MockContext context; const auto size = MemoryConstants::pageSize; const auto ptr = (void *)alignedMalloc(size * 2, MemoryConstants::pageSize); const auto flags = CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY; auto retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create( &context, flags, size, ptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; buffer->setArgStateful(&surfaceState, false, false, false, false); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); EXPECT_EQ(expectedMocs, actualMocs); alignedFree(ptr); } HWTEST_F(BufferSetSurfaceTests, givenAlignedCacheableNonReadOnlyBufferThenChooseOclBufferPolicy) { MockContext context; const auto size = MemoryConstants::pageSize; const auto ptr = (void *)alignedMalloc(size * 2, MemoryConstants::pageSize); const auto flags = CL_MEM_USE_HOST_PTR; auto retVal = CL_SUCCESS; auto buffer = std::unique_ptr(Buffer::create( &context, flags, size, ptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; buffer->setArgStateful(&surfaceState, false, false, false, false); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); EXPECT_EQ(expectedMocs, actualMocs); alignedFree(ptr); } HWTEST_F(BufferSetSurfaceTests, givenRenderCompressedGmmResourceWhenSurfaceStateIsProgrammedThenSetAuxParams) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; RENDER_SURFACE_STATE surfaceState = {}; MockContext context; auto retVal = CL_SUCCESS; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_READ_WRITE, 1, nullptr, retVal)); buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); auto gmm = new Gmm(context.getDevice(0)->getGmmClientContext(), nullptr, 1, false); buffer->getGraphicsAllocation()->setDefaultGmm(gmm); gmm->isRenderCompressed = true; buffer->setArgStateful(&surfaceState, false, false, false, false); EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress()); EXPECT_TRUE(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E == surfaceState.getAuxiliarySurfaceMode()); EXPECT_TRUE(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT == surfaceState.getCoherencyType()); buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER); buffer->setArgStateful(&surfaceState, false, false, false, false); EXPECT_TRUE(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE == surfaceState.getAuxiliarySurfaceMode()); } HWTEST_F(BufferSetSurfaceTests, givenNonRenderCompressedGmmResourceWhenSurfaceStateIsProgrammedThenDontSetAuxParams) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; RENDER_SURFACE_STATE surfaceState = {}; MockContext context; auto retVal = CL_SUCCESS; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto gmm = new Gmm(context.getDevice(0)->getGmmClientContext(), nullptr, 1, false); buffer->getGraphicsAllocation()->setDefaultGmm(gmm); gmm->isRenderCompressed = false; buffer->setArgStateful(&surfaceState, false, false, false, false); EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress()); EXPECT_TRUE(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE == surfaceState.getAuxiliarySurfaceMode()); EXPECT_TRUE(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT == surfaceState.getCoherencyType()); } HWTEST_F(BufferSetSurfaceTests, givenMisalignedPointerWhenSurfaceStateIsProgrammedThenBaseAddressAndLengthAreAlignedToDword) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; RENDER_SURFACE_STATE surfaceState = {}; MockContext context; uintptr_t ptr = 0xfffff000; void *svmPtr = reinterpret_cast(ptr); Buffer::setSurfaceState(device.get(), &surfaceState, 5, svmPtr, 0, nullptr, 0, 0); EXPECT_EQ(castToUint64(svmPtr), surfaceState.getSurfaceBaseAddress()); SURFACE_STATE_BUFFER_LENGTH length = {}; length.SurfaceState.Width = surfaceState.getWidth() - 1; length.SurfaceState.Height = surfaceState.getHeight() - 1; length.SurfaceState.Depth = surfaceState.getDepth() - 1; EXPECT_EQ(alignUp(5u, 4u), length.Length + 1); } HWTEST_F(BufferSetSurfaceTests, givenBufferThatIsMisalignedWhenSurfaceStateIsBeingProgrammedThenL3CacheIsOff) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; MockContext context; void *svmPtr = reinterpret_cast(0x1005); Buffer::setSurfaceState(device.get(), &surfaceState, 5, svmPtr, 0, nullptr, 0, 0); EXPECT_EQ(0u, surfaceState.getMemoryObjectControlState()); } class BufferL3CacheTests : public ::testing::TestWithParam { public: void SetUp() override { hostPtr = reinterpret_cast(GetParam()); } MockContext ctx; const size_t region[3] = {3, 3, 1}; const size_t origin[3] = {0, 0, 0}; void *hostPtr; }; HWTEST_P(BufferL3CacheTests, givenMisalignedAndAlignedBufferWhenClEnqueueWriteImageThenL3CacheIsOn) { if (ctx.getDevice(0)->areSharedSystemAllocationsAllowed()) { GTEST_SKIP(); } using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; CommandQueueHw cmdQ(&ctx, ctx.getDevice(0), nullptr, false); auto surfaceState = reinterpret_cast(cmdQ.getGpgpuCommandStreamReceiver().getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0).getSpace(0)); cl_image_format imageFormat; cl_image_desc imageDesc; imageFormat.image_channel_order = CL_RGBA; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 3; imageDesc.image_height = 3; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = nullptr; auto image = clCreateImage(&ctx, CL_MEM_READ_WRITE, &imageFormat, &imageDesc, nullptr, nullptr); clEnqueueWriteImage(&cmdQ, image, false, origin, region, 0, 0, hostPtr, 0, nullptr, nullptr); auto expect = ctx.getDevice(0)->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto expect2 = ctx.getDevice(0)->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); EXPECT_NE(NULL, surfaceState->getMemoryObjectControlState()); EXPECT_TRUE(expect == surfaceState->getMemoryObjectControlState() || expect2 == surfaceState->getMemoryObjectControlState()); clReleaseMemObject(image); } HWTEST_P(BufferL3CacheTests, givenMisalignedAndAlignedBufferWhenClEnqueueWriteBufferRectThenL3CacheIsOn) { if (ctx.getDevice(0)->areSharedSystemAllocationsAllowed()) { GTEST_SKIP(); } using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; CommandQueueHw cmdQ(&ctx, ctx.getDevice(0), nullptr, false); auto surfaceState = reinterpret_cast(cmdQ.getGpgpuCommandStreamReceiver().getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 0).getSpace(0)); auto buffer = clCreateBuffer(&ctx, CL_MEM_READ_WRITE, 36, nullptr, nullptr); clEnqueueWriteBufferRect(&cmdQ, buffer, false, origin, origin, region, 0, 0, 0, 0, hostPtr, 0, nullptr, nullptr); auto expect = ctx.getDevice(0)->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto expect2 = ctx.getDevice(0)->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); EXPECT_NE(NULL, surfaceState->getMemoryObjectControlState()); EXPECT_TRUE(expect == surfaceState->getMemoryObjectControlState() || expect2 == surfaceState->getMemoryObjectControlState()); clReleaseMemObject(buffer); } static uint64_t pointers[] = { 0x1005, 0x2000}; INSTANTIATE_TEST_CASE_P( pointers, BufferL3CacheTests, testing::ValuesIn(pointers)); struct BufferUnmapTest : public DeviceFixture, public ::testing::Test { void SetUp() override { DeviceFixture::SetUp(); } void TearDown() override { DeviceFixture::TearDown(); } }; HWTEST_F(BufferUnmapTest, givenBufferWithSharingHandlerWhenUnmappingThenUseNonBlockingEnqueueWriteBuffer) { MockContext context(pClDevice); MockCommandQueueHw cmdQ(&context, pClDevice, nullptr); auto retVal = CL_SUCCESS; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_ALLOC_HOST_PTR, 123, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); buffer->setSharingHandler(new SharingHandler()); EXPECT_NE(nullptr, buffer->peekSharingHandler()); auto mappedPtr = clEnqueueMapBuffer(&cmdQ, buffer.get(), CL_TRUE, CL_MAP_WRITE, 0, 1, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, cmdQ.EnqueueWriteBufferCounter); retVal = clEnqueueUnmapMemObject(&cmdQ, buffer.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, cmdQ.EnqueueWriteBufferCounter); EXPECT_FALSE(cmdQ.blockingWriteBuffer); } HWTEST_F(BufferUnmapTest, givenBufferWithoutSharingHandlerWhenUnmappingThenDontUseEnqueueWriteBuffer) { MockContext context(pClDevice); MockCommandQueueHw cmdQ(&context, pClDevice, nullptr); auto retVal = CL_SUCCESS; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_ALLOC_HOST_PTR, 123, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, buffer->peekSharingHandler()); auto mappedPtr = clEnqueueMapBuffer(&cmdQ, buffer.get(), CL_TRUE, CL_MAP_READ, 0, 1, 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueUnmapMemObject(&cmdQ, buffer.get(), mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, cmdQ.EnqueueWriteBufferCounter); } using BufferTransferTests = BufferUnmapTest; TEST_F(BufferTransferTests, givenBufferWhenTransferToHostPtrCalledThenCopyRequestedSizeAndOffsetOnly) { MockContext context(pClDevice); auto retVal = CL_SUCCESS; const size_t bufferSize = 100; size_t ignoredParam = 123; MemObjOffsetArray copyOffset = {{20, ignoredParam, ignoredParam}}; MemObjSizeArray copySize = {{10, ignoredParam, ignoredParam}}; uint8_t hostPtr[bufferSize] = {}; uint8_t expectedHostPtr[bufferSize] = {}; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_USE_HOST_PTR, bufferSize, hostPtr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); auto srcPtr = buffer->getCpuAddress(); EXPECT_NE(srcPtr, hostPtr); memset(srcPtr, 123, bufferSize); memset(ptrOffset(expectedHostPtr, copyOffset[0]), 123, copySize[0]); buffer->transferDataToHostPtr(copySize, copyOffset); EXPECT_TRUE(memcmp(hostPtr, expectedHostPtr, copySize[0]) == 0); } TEST_F(BufferTransferTests, givenBufferWhenTransferFromHostPtrCalledThenCopyRequestedSizeAndOffsetOnly) { MockContext context(pClDevice); auto retVal = CL_SUCCESS; const size_t bufferSize = 100; size_t ignoredParam = 123; MemObjOffsetArray copyOffset = {{20, ignoredParam, ignoredParam}}; MemObjSizeArray copySize = {{10, ignoredParam, ignoredParam}}; uint8_t hostPtr[bufferSize] = {}; uint8_t expectedBufferMemory[bufferSize] = {}; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_USE_HOST_PTR, bufferSize, hostPtr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(buffer->getCpuAddress(), hostPtr); memset(hostPtr, 123, bufferSize); memset(ptrOffset(expectedBufferMemory, copyOffset[0]), 123, copySize[0]); buffer->transferDataFromHostPtr(copySize, copyOffset); EXPECT_TRUE(memcmp(expectedBufferMemory, buffer->getCpuAddress(), copySize[0]) == 0); } using MultiRootDeviceBufferTest = MultiRootDeviceFixture; TEST_F(MultiRootDeviceBufferTest, bufferGraphicsAllocationHasCorrectRootDeviceIndex) { cl_int retVal = 0; cl_mem_flags flags = CL_MEM_READ_WRITE; std::unique_ptr buffer(Buffer::create(context.get(), flags, MemoryConstants::pageSize, nullptr, retVal)); auto graphicsAllocation = buffer->getGraphicsAllocation(); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_EQ(expectedRootDeviceIndex, graphicsAllocation->getRootDeviceIndex()); } compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/create_image_format_tests.cpp000066400000000000000000000070041363734646600311460ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; static const unsigned int testImageDimensions = 32; template class CreateImageFormatTest : public testing::TestWithParam { public: CreateImageFormatTest() : flags(_flags) { } protected: void SetUp() override { indexImageFormat = GetParam(); ArrayRef surfaceFormatTable = SurfaceFormats::surfaceFormats(flags, defaultHwInfo->capabilityTable.clVersionSupport); ASSERT_GT(surfaceFormatTable.size(), indexImageFormat); surfaceFormat = &surfaceFormatTable[indexImageFormat]; // clang-format off imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = testImageDimensions; imageDesc.image_height = testImageDimensions; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // clang-format on } void TearDown() override { } const ClSurfaceFormatInfo *surfaceFormat; size_t indexImageFormat; cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal = CL_SUCCESS; MockContext context; cl_mem_flags flags; }; typedef CreateImageFormatTest ReadWriteFormatTest; TEST_P(ReadWriteFormatTest, returnsSuccess) { auto image = Image::create( &context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); delete image; } static const size_t zero = 0; INSTANTIATE_TEST_CASE_P( CreateImage, ReadWriteFormatTest, testing::Range(zero, SurfaceFormats::readWrite().size())); typedef CreateImageFormatTest ReadOnlyFormatTest; TEST_P(ReadOnlyFormatTest, returnsSuccess) { auto image = Image::create( &context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); delete image; } INSTANTIATE_TEST_CASE_P( CreateImage, ReadOnlyFormatTest, testing::Range(zero, SurfaceFormats::readOnly12().size())); typedef CreateImageFormatTest WriteOnlyFormatTest; TEST_P(WriteOnlyFormatTest, returnsSuccess) { auto image = Image::create( &context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); delete image; } INSTANTIATE_TEST_CASE_P( CreateImage, WriteOnlyFormatTest, testing::Range(zero, SurfaceFormats::writeOnly().size())); compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/destructor_callback_tests.cpp000066400000000000000000000060241363734646600312040ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "test.h" #include "gtest/gtest.h" using namespace NEO; class DestructorCallbackFixture : public MemoryManagementFixture { public: DestructorCallbackFixture() { } void SetUp() override { MemoryManagementFixture::SetUp(); BufferDefaults::context = new MockContext; } void TearDown() override { delete BufferDefaults::context; platformsImpl.clear(); MemoryManagementFixture::TearDown(); } protected: cl_int retVal = CL_SUCCESS; }; typedef Test DestructorCallbackTest; static std::vector calls(32); void CL_CALLBACK callBack1(cl_mem memObj, void *userData) { calls.push_back(1); } void CL_CALLBACK callBack2(cl_mem memObj, void *userData) { calls.push_back(2); } void CL_CALLBACK callBack3(cl_mem memObj, void *userData) { calls.push_back(3); } TEST_F(DestructorCallbackTest, checkCallOrder) { auto buffer = BufferHelper>::create(); auto address = buffer->getCpuAddress(); EXPECT_NE(nullptr, address); calls.clear(); retVal = buffer->setDestructorCallback(callBack1, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = buffer->setDestructorCallback(callBack2, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); retVal = buffer->setDestructorCallback(callBack3, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); delete buffer; ASSERT_EQ(3u, calls.size()); EXPECT_EQ(3, calls[0]); EXPECT_EQ(2, calls[1]); EXPECT_EQ(1, calls[2]); calls.clear(); } TEST_F(DestructorCallbackTest, doFailAllocations) { std::shared_ptr context(new MockContext); InjectedFunction method = [this, context](size_t failureIndex) { char hostPtr[42]; auto buffer = Buffer::create( context.get(), CL_MEM_USE_HOST_PTR, sizeof(hostPtr), hostPtr, retVal); // if failures are injected into Buffer::create, we ignore them // we are only interested in setDestructorCallback if (retVal == CL_SUCCESS && buffer != nullptr) { auto address = buffer->getCpuAddress(); EXPECT_NE(nullptr, address); calls.clear(); retVal = buffer->setDestructorCallback(callBack1, nullptr); if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_EQ(CL_SUCCESS, retVal); } else { EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal) << "for allocation " << failureIndex; } delete buffer; if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_EQ(1u, calls.size()); } else { EXPECT_EQ(0u, calls.size()); } } }; injectFailures(method); } compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/get_mem_object_info_subbuffer_tests.cpp000066400000000000000000000101421363734646600332070ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; struct GetMemObjectSubBufferInfo : public ::testing::Test { GetMemObjectSubBufferInfo() { } void SetUp() override { bufferStorage = alignedMalloc(4096, MemoryConstants::preferredAlignment); region.origin = 4; region.size = 12; } void TearDown() override { delete subBuffer; delete buffer; alignedFree(bufferStorage); } void createBuffer(cl_mem_flags flags = CL_MEM_READ_WRITE) { auto retVal = CL_INVALID_VALUE; buffer = Buffer::create(&context, flags, bufferSize, nullptr, retVal); ASSERT_NE(nullptr, buffer); } void createSubBuffer(cl_mem_flags flags = CL_MEM_READ_WRITE) { cl_int retVal; subBuffer = buffer->createSubBuffer(flags, 0, ®ion, retVal); ASSERT_NE(nullptr, subBuffer); } void createHostPtrBuffer(cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR) { auto retVal = CL_INVALID_VALUE; buffer = Buffer::create(&context, flags, bufferSize, bufferStorage, retVal); ASSERT_NE(nullptr, buffer); } MockContext context; Buffer *buffer = nullptr; Buffer *subBuffer = nullptr; void *bufferStorage; static const size_t bufferSize = 256; cl_buffer_region region; cl_int retVal; size_t sizeReturned = 0; }; TEST_F(GetMemObjectSubBufferInfo, MEM_ASSOCIATED_MEMOBJECT) { createBuffer(); createSubBuffer(); cl_mem object = nullptr; retVal = subBuffer->getMemObjectInfo(CL_MEM_ASSOCIATED_MEMOBJECT, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(object), sizeReturned); retVal = subBuffer->getMemObjectInfo(CL_MEM_ASSOCIATED_MEMOBJECT, sizeof(object), &object, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); cl_mem clBuffer = (cl_mem)buffer; EXPECT_EQ(clBuffer, object); } TEST_F(GetMemObjectSubBufferInfo, MEM_OFFSET) { createBuffer(); createSubBuffer(); size_t offset = 0; retVal = subBuffer->getMemObjectInfo(CL_MEM_OFFSET, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(offset), sizeReturned); retVal = subBuffer->getMemObjectInfo(CL_MEM_OFFSET, sizeof(offset), &offset, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(region.origin, offset); } TEST_F(GetMemObjectSubBufferInfo, MEM_FLAGS) { createBuffer(); createSubBuffer(); cl_mem_flags flags = 0; retVal = subBuffer->getMemObjectInfo(CL_MEM_FLAGS, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(flags), sizeReturned); retVal = subBuffer->getMemObjectInfo(CL_MEM_FLAGS, sizeof(flags), &flags, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast(CL_MEM_READ_WRITE), flags); } TEST_F(GetMemObjectSubBufferInfo, MEM_FLAGS_empty) { createBuffer(CL_MEM_READ_ONLY); createSubBuffer(0); cl_mem_flags flags = 0; retVal = subBuffer->getMemObjectInfo(CL_MEM_FLAGS, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(flags), sizeReturned); retVal = subBuffer->getMemObjectInfo( CL_MEM_FLAGS, sizeof(flags), &flags, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast(0), flags); } TEST_F(GetMemObjectSubBufferInfo, MEM_HOST_PTR) { createHostPtrBuffer(); createSubBuffer(); void *hostPtr = 0; retVal = subBuffer->getMemObjectInfo(CL_MEM_HOST_PTR, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(hostPtr), sizeReturned); retVal = subBuffer->getMemObjectInfo(CL_MEM_HOST_PTR, sizeof(hostPtr), &hostPtr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto expected = ptrOffset(this->bufferStorage, region.origin); EXPECT_EQ(expected, hostPtr); } compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/get_mem_object_info_tests.cpp000066400000000000000000000246001363734646600311500ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" #include using namespace NEO; class GetMemObjectInfo : public ::testing::Test, public PlatformFixture, public DeviceFixture { using DeviceFixture::SetUp; using PlatformFixture::SetUp; public: void SetUp() override { PlatformFixture::SetUp(); DeviceFixture::SetUp(); BufferDefaults::context = new MockContext; } void TearDown() override { delete BufferDefaults::context; DeviceFixture::TearDown(); PlatformFixture::TearDown(); } }; TEST_F(GetMemObjectInfo, InvalidFlags_returnsError) { auto buffer = std::unique_ptr(BufferHelper<>::create()); size_t sizeReturned = 0; auto retVal = buffer->getMemObjectInfo( 0, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(GetMemObjectInfo, MEM_TYPE) { auto buffer = std::unique_ptr(BufferHelper<>::create()); size_t sizeReturned = 0; auto retVal = buffer->getMemObjectInfo( CL_MEM_TYPE, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(cl_mem_object_type), sizeReturned); cl_mem_object_type object_type = 0; retVal = buffer->getMemObjectInfo( CL_MEM_TYPE, sizeof(cl_mem_object_type), &object_type, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast(CL_MEM_OBJECT_BUFFER), object_type); } TEST_F(GetMemObjectInfo, MEM_FLAGS) { auto buffer = std::unique_ptr(BufferHelper<>::create()); size_t sizeReturned = 0; cl_mem_flags mem_flags = 0; auto retVal = buffer->getMemObjectInfo( CL_MEM_FLAGS, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(mem_flags), sizeReturned); retVal = buffer->getMemObjectInfo( CL_MEM_FLAGS, sizeof(mem_flags), &mem_flags, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast(CL_MEM_READ_WRITE), mem_flags); } TEST_F(GetMemObjectInfo, MEM_SIZE) { auto buffer = std::unique_ptr(BufferHelper<>::create()); size_t sizeReturned = 0; size_t mem_size = 0; auto retVal = buffer->getMemObjectInfo( CL_MEM_SIZE, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(mem_size), sizeReturned); retVal = buffer->getMemObjectInfo( CL_MEM_SIZE, sizeof(mem_size), &mem_size, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(buffer->getSize(), mem_size); } TEST_F(GetMemObjectInfo, MEM_HOST_PTR) { auto buffer = std::unique_ptr(BufferHelper<>::create()); size_t sizeReturned = 0; void *host_ptr = nullptr; auto retVal = buffer->getMemObjectInfo( CL_MEM_HOST_PTR, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(host_ptr), sizeReturned); retVal = buffer->getMemObjectInfo( CL_MEM_HOST_PTR, sizeof(host_ptr), &host_ptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(buffer->getHostPtr(), host_ptr); } TEST_F(GetMemObjectInfo, MEM_CONTEXT) { MockContext context; auto buffer = std::unique_ptr(BufferHelper<>::create(&context)); size_t sizeReturned = 0; cl_context contextReturned = nullptr; auto retVal = buffer->getMemObjectInfo( CL_MEM_CONTEXT, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(contextReturned), sizeReturned); retVal = buffer->getMemObjectInfo( CL_MEM_CONTEXT, sizeof(contextReturned), &contextReturned, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(&context, contextReturned); } TEST_F(GetMemObjectInfo, MEM_USES_SVM_POINTER_FALSE) { auto buffer = std::unique_ptr(BufferHelper>::create()); size_t sizeReturned = 0; cl_bool usesSVMPointer = false; auto retVal = buffer->getMemObjectInfo( CL_MEM_USES_SVM_POINTER, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(usesSVMPointer), sizeReturned); retVal = buffer->getMemObjectInfo( CL_MEM_USES_SVM_POINTER, sizeof(usesSVMPointer), &usesSVMPointer, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast(CL_FALSE), usesSVMPointer); } TEST_F(GetMemObjectInfo, MEM_USES_SVM_POINTER_TRUE) { const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { auto hostPtr = clSVMAlloc(BufferDefaults::context, CL_MEM_READ_WRITE, BufferUseHostPtr<>::sizeInBytes, 64); ASSERT_NE(nullptr, hostPtr); cl_int retVal; auto buffer = Buffer::create( BufferDefaults::context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, BufferUseHostPtr<>::sizeInBytes, hostPtr, retVal); size_t sizeReturned = 0; cl_bool usesSVMPointer = false; retVal = buffer->getMemObjectInfo( CL_MEM_USES_SVM_POINTER, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(usesSVMPointer), sizeReturned); retVal = buffer->getMemObjectInfo( CL_MEM_USES_SVM_POINTER, sizeof(usesSVMPointer), &usesSVMPointer, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast(CL_TRUE), usesSVMPointer); delete buffer; clSVMFree(BufferDefaults::context, hostPtr); } } TEST_F(GetMemObjectInfo, MEM_OFFSET) { auto buffer = std::unique_ptr(BufferHelper<>::create()); size_t sizeReturned = 0; size_t offset = false; auto retVal = buffer->getMemObjectInfo( CL_MEM_OFFSET, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(offset), sizeReturned); retVal = buffer->getMemObjectInfo( CL_MEM_OFFSET, sizeof(offset), &offset, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, offset); } TEST_F(GetMemObjectInfo, MEM_ASSOCIATED_MEMOBJECT) { auto buffer = std::unique_ptr(BufferHelper<>::create()); size_t sizeReturned = 0; cl_mem object = nullptr; auto retVal = buffer->getMemObjectInfo( CL_MEM_ASSOCIATED_MEMOBJECT, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(object), sizeReturned); retVal = buffer->getMemObjectInfo( CL_MEM_ASSOCIATED_MEMOBJECT, sizeof(object), &object, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, object); } TEST_F(GetMemObjectInfo, MEM_MAP_COUNT) { auto buffer = std::unique_ptr(BufferHelper<>::create()); size_t sizeReturned = 0; cl_uint mapCount = static_cast(-1); auto retVal = buffer->getMemObjectInfo( CL_MEM_MAP_COUNT, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(mapCount), sizeReturned); retVal = buffer->getMemObjectInfo( CL_MEM_MAP_COUNT, sizeof(mapCount), &mapCount, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(mapCount), sizeReturned); } TEST_F(GetMemObjectInfo, MEM_REFERENCE_COUNT) { auto buffer = std::unique_ptr(BufferHelper<>::create()); size_t sizeReturned = 0; cl_uint refCount = static_cast(-1); auto retVal = buffer->getMemObjectInfo( CL_MEM_REFERENCE_COUNT, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(refCount), sizeReturned); retVal = buffer->getMemObjectInfo( CL_MEM_REFERENCE_COUNT, sizeof(refCount), &refCount, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(refCount), sizeReturned); } class GetMemObjectInfoLocalMemory : public GetMemObjectInfo { using GetMemObjectInfo::SetUp; public: void SetUp() override { dbgRestore = std::make_unique(); DebugManager.flags.EnableLocalMemory.set(1); GetMemObjectInfo::SetUp(); delete BufferDefaults::context; BufferDefaults::context = new MockContext(pClDevice, true); } std::unique_ptr dbgRestore; }; TEST_F(GetMemObjectInfoLocalMemory, givenLocalMemoryEnabledWhenNoZeroCopySvmAllocationUsedThenBufferAllocationInheritsZeroCopyFlag) { const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { auto hostPtr = clSVMAlloc(BufferDefaults::context, CL_MEM_READ_WRITE, BufferUseHostPtr<>::sizeInBytes, 64); ASSERT_NE(nullptr, hostPtr); cl_int retVal; auto buffer = Buffer::create( BufferDefaults::context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, BufferUseHostPtr<>::sizeInBytes, hostPtr, retVal); size_t sizeReturned = 0; cl_bool usesSVMPointer = false; retVal = buffer->getMemObjectInfo( CL_MEM_USES_SVM_POINTER, 0, nullptr, &sizeReturned); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(sizeof(usesSVMPointer), sizeReturned); retVal = buffer->getMemObjectInfo( CL_MEM_USES_SVM_POINTER, sizeof(usesSVMPointer), &usesSVMPointer, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast(CL_TRUE), usesSVMPointer); EXPECT_TRUE(buffer->isMemObjWithHostPtrSVM()); EXPECT_FALSE(buffer->isMemObjZeroCopy()); delete buffer; clSVMFree(BufferDefaults::context, hostPtr); } } compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/image1d_tests.cpp000066400000000000000000000107321363734646600265020ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" using namespace NEO; static const unsigned int testImageDimensions = 32; class CreateImage1DTest : public DeviceFixture, public testing::TestWithParam { public: CreateImage1DTest() { } protected: void SetUp() override { DeviceFixture::SetUp(); types = GetParam(); // clang-format off imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_type = types; imageDesc.image_width = testImageDimensions; imageDesc.image_height = 0; imageDesc.image_depth = 0; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // clang-format on if (types == CL_MEM_OBJECT_IMAGE1D_ARRAY) { imageDesc.image_array_size = 10; } context = new MockContext(pClDevice); if (types == CL_MEM_OBJECT_IMAGE1D_BUFFER) { imageDesc.mem_object = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR, testImageDimensions, nullptr, nullptr); } } void TearDown() override { if (types == CL_MEM_OBJECT_IMAGE1D_BUFFER) { clReleaseMemObject(imageDesc.mem_object); } delete context; DeviceFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal = CL_SUCCESS; MockContext *context; cl_mem_object_type types = 0; }; typedef CreateImage1DTest CreateImage1DType; HWTEST_P(CreateImage1DType, validTypes) { cl_mem_flags flags = CL_MEM_READ_WRITE; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, pClDevice->getHardwareInfo().capabilityTable.clVersionSupport); auto image = Image::create( context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, image); auto imgDesc = image->getImageDesc(); EXPECT_NE(0u, imgDesc.image_width); EXPECT_EQ(0u, imgDesc.image_height); EXPECT_EQ(0u, imgDesc.image_depth); EXPECT_NE(0u, imgDesc.image_row_pitch); EXPECT_GE(imgDesc.image_slice_pitch, imgDesc.image_row_pitch); size_t ImageInfoHeight = 0; retVal = clGetImageInfo(image, CL_IMAGE_HEIGHT, sizeof(size_t), &ImageInfoHeight, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(0u, ImageInfoHeight); if ((types == CL_MEM_OBJECT_IMAGE1D) || (types == CL_MEM_OBJECT_IMAGE1D_BUFFER)) { EXPECT_EQ(0u, imgDesc.image_array_size); } else if (types == CL_MEM_OBJECT_IMAGE1D_ARRAY) { EXPECT_NE(0u, imgDesc.image_array_size); } else { ASSERT_TRUE(false); } EXPECT_EQ(image->getCubeFaceIndex(), static_cast(__GMM_NO_CUBE_MAP)); ASSERT_EQ(true, image->isMemObjZeroCopy()); EXPECT_FALSE(image->isImageFromImage()); auto address = image->getCpuAddress(); EXPECT_NE(nullptr, address); if (types == CL_MEM_OBJECT_IMAGE1D_BUFFER) { Buffer *inputBuffer = castToObject(imageDesc.buffer); EXPECT_NE(nullptr, inputBuffer->getCpuAddress()); EXPECT_EQ(inputBuffer->getCpuAddress(), image->getCpuAddress()); EXPECT_FALSE(image->getIsObjectRedescribed()); EXPECT_GE(2, inputBuffer->getRefInternalCount()); EXPECT_TRUE(image->isImageFromBuffer()); } else { EXPECT_FALSE(image->isImageFromBuffer()); } typedef typename FamilyType::RENDER_SURFACE_STATE SURFACE_STATE; auto imageHw = static_cast *>(image); EXPECT_EQ(SURFACE_STATE::SURFACE_TYPE_SURFTYPE_1D, imageHw->surfaceType); delete image; } static cl_mem_object_type Image1DTypes[] = { CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE1D_ARRAY}; INSTANTIATE_TEST_CASE_P( CreateImage1DTest_Create, CreateImage1DType, testing::ValuesIn(Image1DTypes)); compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/image2d_from_buffer_tests.cpp000066400000000000000000000504751363734646600310670ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/hw_helper.h" #include "opencl/source/cl_device/cl_device_info_map.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/platform/extensions.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/helpers/raii_hw_helper.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_gmm.h" #include "test.h" using namespace NEO; namespace NEO { extern HwHelper *hwHelperFactory[IGFX_MAX_CORE]; } // Tests for cl_khr_image2d_from_buffer class Image2dFromBufferTest : public DeviceFixture, public ::testing::Test { public: Image2dFromBufferTest() {} protected: void SetUp() override { imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_array_size = 0; imageDesc.image_depth = 0; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_height = 128; imageDesc.image_width = 256; imageDesc.num_mip_levels = 0; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_samples = 0; size = 128 * 256 * 4; hostPtr = alignedMalloc(size, 16); ASSERT_NE(nullptr, hostPtr); imageDesc.mem_object = clCreateBuffer(&context, CL_MEM_USE_HOST_PTR, size, hostPtr, &retVal); ASSERT_NE(nullptr, imageDesc.mem_object); } void TearDown() override { clReleaseMemObject(imageDesc.mem_object); alignedFree(hostPtr); } Image *createImage() { cl_mem_flags flags = CL_MEM_READ_ONLY; auto surfaceFormat = (ClSurfaceFormatInfo *)Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); return Image::create(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, NULL, retVal); } cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal = CL_SUCCESS; MockContext context; void *hostPtr; size_t size; }; TEST_F(Image2dFromBufferTest, CreateImage2dFromBuffer) { auto buffer = castToObject(imageDesc.mem_object); ASSERT_NE(nullptr, buffer); EXPECT_EQ(1, buffer->getRefInternalCount()); auto imageFromBuffer = createImage(); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2, buffer->getRefInternalCount()); EXPECT_NE(nullptr, imageFromBuffer); EXPECT_FALSE(imageFromBuffer->isTiledAllocation()); EXPECT_EQ(imageFromBuffer->getCubeFaceIndex(), static_cast(__GMM_NO_CUBE_MAP)); delete imageFromBuffer; EXPECT_EQ(1, buffer->getRefInternalCount()); } TEST_F(Image2dFromBufferTest, givenBufferWhenCreateImage2dArrayFromBufferThenImageDescriptorIsInvalid) { imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; cl_mem_flags flags = CL_MEM_READ_ONLY; auto surfaceFormat = (ClSurfaceFormatInfo *)Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); retVal = Image::validate(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), surfaceFormat, &imageDesc, NULL); EXPECT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); } TEST_F(Image2dFromBufferTest, CalculateRowPitch) { auto imageFromBuffer = createImage(); ASSERT_NE(nullptr, imageFromBuffer); EXPECT_NE(0u, imageFromBuffer->getImageDesc().image_row_pitch); EXPECT_EQ(1024u, imageFromBuffer->getImageDesc().image_row_pitch); delete imageFromBuffer; } TEST_F(Image2dFromBufferTest, givenInvalidRowPitchWhenCreateImage2dFromBufferThenReturnsError) { char ptr[10]; imageDesc.image_row_pitch = 255; cl_mem_flags flags = CL_MEM_READ_ONLY; auto surfaceFormat = (ClSurfaceFormatInfo *)Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); retVal = Image::validate(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), surfaceFormat, &imageDesc, ptr); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); } TEST_F(Image2dFromBufferTest, givenRowPitchThatIsGreaterThenComputedWhenImageIsCreatedThenPassedRowPitchIsUsedInsteadOfComputed) { auto computedSize = imageDesc.image_width * 4; auto passedSize = computedSize * 2; imageDesc.image_row_pitch = passedSize; auto imageFromBuffer = createImage(); EXPECT_EQ(passedSize, imageFromBuffer->getHostPtrRowPitch()); delete imageFromBuffer; } TEST_F(Image2dFromBufferTest, InvalidHostPtrAlignment) { std::unique_ptr myHostPtr(malloc(size + 1), free); ASSERT_NE(nullptr, myHostPtr); void *nonAlignedHostPtr = myHostPtr.get(); if ((reinterpret_cast(myHostPtr.get()) % 4) == 0) { nonAlignedHostPtr = reinterpret_cast((reinterpret_cast(myHostPtr.get()) + 1)); } cl_mem origBuffer = imageDesc.mem_object; imageDesc.mem_object = clCreateBuffer(&context, CL_MEM_USE_HOST_PTR, size, nonAlignedHostPtr, &retVal); ASSERT_NE(nullptr, imageDesc.mem_object); cl_mem_flags flags = CL_MEM_READ_ONLY; auto surfaceFormat = (ClSurfaceFormatInfo *)Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); retVal = Image::validate(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), surfaceFormat, &imageDesc, NULL); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); clReleaseMemObject(imageDesc.mem_object); imageDesc.mem_object = origBuffer; } TEST_F(Image2dFromBufferTest, givenInvalidFlagsWhenValidateIsCalledThenReturnError) { cl_mem_flags flags[] = {CL_MEM_USE_HOST_PTR, CL_MEM_COPY_HOST_PTR}; for (auto flag : flags) { const auto surfaceFormat = Image::getSurfaceFormatFromTable(flag, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); retVal = Image::validate(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flag, 0, 0), surfaceFormat, &imageDesc, reinterpret_cast(0x12345)); EXPECT_EQ(CL_INVALID_VALUE, retVal); } } TEST_F(Image2dFromBufferTest, givenOneChannel8BitColorsNoRowPitchSpecifiedAndTooLargeImageWhenValidatingSurfaceFormatThenReturnError) { imageDesc.image_height = 1 + castToObject(imageDesc.mem_object)->getSize() / imageDesc.image_width; cl_mem_flags flags = CL_MEM_READ_ONLY; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; const auto surfaceFormat = static_cast(Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport)); retVal = Image::validate(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), surfaceFormat, &imageDesc, NULL); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); } TEST_F(Image2dFromBufferTest, givenOneChannel16BitColorsNoRowPitchSpecifiedAndTooLargeImageWhenValidatingSurfaceFormatThenReturnError) { imageDesc.image_height = 1 + castToObject(imageDesc.mem_object)->getSize() / imageDesc.image_width / 2; cl_mem_flags flags = CL_MEM_READ_ONLY; imageFormat.image_channel_data_type = CL_UNORM_INT16; imageFormat.image_channel_order = CL_R; const auto surfaceFormat = static_cast(Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport)); retVal = Image::validate(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), surfaceFormat, &imageDesc, NULL); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); } TEST_F(Image2dFromBufferTest, givenFourChannel8BitColorsNoRowPitchSpecifiedAndTooLargeImageWhenValidatingSurfaceFormatThenReturnError) { imageDesc.image_height = 1 + castToObject(imageDesc.mem_object)->getSize() / imageDesc.image_width / 4; cl_mem_flags flags = CL_MEM_READ_ONLY; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; const auto surfaceFormat = static_cast(Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport)); retVal = Image::validate(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), surfaceFormat, &imageDesc, NULL); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); } TEST_F(Image2dFromBufferTest, givenFourChannel16BitColorsNoRowPitchSpecifiedAndTooLargeImageWhenValidatingSurfaceFormatThenReturnError) { imageDesc.image_height = 1 + castToObject(imageDesc.mem_object)->getSize() / imageDesc.image_width / 8; cl_mem_flags flags = CL_MEM_READ_ONLY; imageFormat.image_channel_data_type = CL_UNORM_INT16; imageFormat.image_channel_order = CL_RGBA; const auto surfaceFormat = static_cast(Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport)); retVal = Image::validate(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), surfaceFormat, &imageDesc, NULL); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); } TEST_F(Image2dFromBufferTest, givenFourChannel8BitColorsAndNotTooLargeRowPitchSpecifiedWhenValidatingSurfaceFormatThenDoNotReturnError) { imageDesc.image_height = castToObject(imageDesc.mem_object)->getSize() / imageDesc.image_width; imageDesc.image_row_pitch = imageDesc.image_width; cl_mem_flags flags = CL_MEM_READ_ONLY; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; const auto surfaceFormat = static_cast(Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport)); retVal = Image::validate(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), surfaceFormat, &imageDesc, NULL); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(Image2dFromBufferTest, givenFourChannel8BitColorsAndTooLargeRowPitchSpecifiedWhenValidatingSurfaceFormatThenReturnError) { const auto pitchAlignment = &ClDeviceInfoTable::Map::getValue(*context.getDevice(0u)); imageDesc.image_height = castToObject(imageDesc.mem_object)->getSize() / imageDesc.image_width; imageDesc.image_row_pitch = imageDesc.image_width + *pitchAlignment; cl_mem_flags flags = CL_MEM_READ_ONLY; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; const auto surfaceFormat = static_cast(Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport)); retVal = Image::validate(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), surfaceFormat, &imageDesc, NULL); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); } TEST_F(Image2dFromBufferTest, givenUnalignedImageWidthAndNoSpaceInBufferForAlignmentWhenValidatingSurfaceFormatThenReturnError) { static_cast(context.getDevice(0))->deviceInfo.imagePitchAlignment = 128; imageDesc.image_width = 64; imageDesc.image_height = castToObject(imageDesc.mem_object)->getSize() / imageDesc.image_width; cl_mem_flags flags = CL_MEM_READ_ONLY; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; const auto surfaceFormat = static_cast(Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport)); retVal = Image::validate(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), surfaceFormat, &imageDesc, NULL); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); } TEST_F(Image2dFromBufferTest, ExtensionString) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto hwInfo = device->getHardwareInfo(); const auto &caps = device->getDeviceInfo(); std::string extensions = caps.deviceExtensions; size_t found = extensions.find("cl_khr_image2d_from_buffer"); if (hwInfo.capabilityTable.supportsImages) { EXPECT_NE(std::string::npos, found); } else { EXPECT_EQ(std::string::npos, found); } } TEST_F(Image2dFromBufferTest, InterceptBuffersHostPtr) { auto buffer = castToObject(imageDesc.mem_object); ASSERT_NE(nullptr, buffer); EXPECT_EQ(1, buffer->getRefInternalCount()); auto imageFromBuffer = createImage(); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(buffer->getHostPtr(), imageFromBuffer->getHostPtr()); EXPECT_EQ(true, imageFromBuffer->isMemObjZeroCopy()); delete imageFromBuffer; } TEST_F(Image2dFromBufferTest, givenImageFromBufferWhenItIsRedescribedThenItReturnsProperImageFromBufferValue) { std::unique_ptr imageFromBuffer(createImage()); EXPECT_TRUE(imageFromBuffer->isImageFromBuffer()); std::unique_ptr redescribedImage(imageFromBuffer->redescribe()); EXPECT_TRUE(redescribedImage->isImageFromBuffer()); std::unique_ptr redescribedfillImage(imageFromBuffer->redescribeFillImage()); EXPECT_TRUE(redescribedfillImage->isImageFromBuffer()); } TEST_F(Image2dFromBufferTest, givenMemoryManagerNotSupportingVirtualPaddingWhenImageIsCreatedThenPaddingIsNotApplied) { auto memoryManager = context.getMemoryManager(); memoryManager->setVirtualPaddingSupport(false); auto buffer = castToObject(imageDesc.mem_object); ASSERT_NE(nullptr, buffer); EXPECT_EQ(1, buffer->getRefInternalCount()); std::unique_ptr imageFromBuffer(createImage()); ASSERT_EQ(CL_SUCCESS, retVal); //graphics allocation for image and buffer is the same auto bufferGraphicsAllocation = buffer->getGraphicsAllocation(); auto imageGraphicsAllocation = imageFromBuffer->getGraphicsAllocation(); EXPECT_EQ(bufferGraphicsAllocation, imageGraphicsAllocation); } TEST_F(Image2dFromBufferTest, givenMemoryManagerSupportingVirtualPaddingWhenImageIsCreatedThatFitsInTheBufferThenPaddingIsNotApplied) { auto memoryManager = context.getMemoryManager(); memoryManager->setVirtualPaddingSupport(true); auto buffer = castToObject(imageDesc.mem_object); ASSERT_NE(nullptr, buffer); EXPECT_EQ(1, buffer->getRefInternalCount()); std::unique_ptr imageFromBuffer(createImage()); ASSERT_EQ(CL_SUCCESS, retVal); //graphics allocation for image and buffer is the same auto bufferGraphicsAllocation = buffer->getGraphicsAllocation(); auto imageGraphicsAllocation = imageFromBuffer->getGraphicsAllocation(); EXPECT_EQ(this->size, bufferGraphicsAllocation->getUnderlyingBufferSize()); auto imgInfo = MockGmm::initImgInfo(imageDesc, 0, &imageFromBuffer->getSurfaceFormatInfo()); auto queryGmm = MockGmm::queryImgParams(context.getDevice(0)->getGmmClientContext(), imgInfo); EXPECT_TRUE(queryGmm->gmmResourceInfo->getSizeAllocation() >= this->size); EXPECT_EQ(bufferGraphicsAllocation, imageGraphicsAllocation); } TEST_F(Image2dFromBufferTest, givenMemoryManagerSupportingVirtualPaddingWhenImageIsCreatedThatDoesntFitInTheBufferThenPaddingIsApplied) { imageFormat.image_channel_data_type = CL_FLOAT; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_width = 29; imageDesc.image_height = 29; imageDesc.image_row_pitch = 512; //application calcualted buffer size auto bufferSize = imageDesc.image_row_pitch * imageDesc.image_height; auto buffer2 = clCreateBuffer(&context, CL_MEM_READ_WRITE, bufferSize, nullptr, nullptr); auto storeMem = imageDesc.mem_object; imageDesc.mem_object = buffer2; auto memoryManager = context.getMemoryManager(); memoryManager->setVirtualPaddingSupport(true); auto buffer = castToObject(imageDesc.mem_object); std::unique_ptr imageFromBuffer(createImage()); ASSERT_EQ(CL_SUCCESS, retVal); //graphics allocation for image and buffer is the same auto bufferGraphicsAllocation = buffer->getGraphicsAllocation(); auto imageGraphicsAllocation = imageFromBuffer->getGraphicsAllocation(); EXPECT_EQ(bufferSize, bufferGraphicsAllocation->getUnderlyingBufferSize()); auto imgInfo = MockGmm::initImgInfo(imageDesc, 0, &imageFromBuffer->getSurfaceFormatInfo()); auto queryGmm = MockGmm::queryImgParams(context.getDevice(0)->getGmmClientContext(), imgInfo); EXPECT_GT(queryGmm->gmmResourceInfo->getSizeAllocation(), bufferSize); EXPECT_NE(bufferGraphicsAllocation, imageGraphicsAllocation); EXPECT_EQ(queryGmm->gmmResourceInfo->getSizeAllocation(), imageFromBuffer->getGraphicsAllocation()->getUnderlyingBufferSize()); EXPECT_EQ(bufferSize, imageFromBuffer->getSize()); imageDesc.mem_object = storeMem; clReleaseMemObject(buffer2); } TEST_F(Image2dFromBufferTest, givenMemoryManagerSupportingVirtualPaddingWhen1DImageFromBufferImageIsCreatedThenVirtualPaddingIsNotApplied) { imageFormat.image_channel_data_type = CL_FLOAT; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_width = 1024; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; //application calcualted buffer size auto bufferSize = imageDesc.image_width * 16; auto buffer2 = clCreateBuffer(&context, CL_MEM_READ_WRITE, bufferSize, nullptr, nullptr); auto storeMem = imageDesc.mem_object; imageDesc.mem_object = buffer2; auto memoryManager = context.getMemoryManager(); memoryManager->setVirtualPaddingSupport(true); auto buffer = castToObject(imageDesc.mem_object); std::unique_ptr imageFromBuffer(createImage()); ASSERT_EQ(CL_SUCCESS, retVal); //graphics allocation match auto bufferGraphicsAllocation = buffer->getGraphicsAllocation(); auto imageGraphicsAllocation = imageFromBuffer->getGraphicsAllocation(); EXPECT_EQ(bufferGraphicsAllocation, imageGraphicsAllocation); imageDesc.mem_object = storeMem; clReleaseMemObject(buffer2); } TEST_F(Image2dFromBufferTest, givenMemoryManagerSupporting1DImageFromBufferWhenNoBufferThenCreatesImage) { imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; auto storeMem = imageDesc.mem_object; imageDesc.mem_object = nullptr; std::unique_ptr imageFromBuffer(createImage()); EXPECT_EQ(CL_SUCCESS, retVal); imageDesc.mem_object = storeMem; } TEST_F(Image2dFromBufferTest, givenBufferWhenImageFromBufferThenIsImageFromBufferSetAndAllocationTypeIsBuffer) { cl_int errCode = 0; auto buffer = Buffer::create(&context, 0, 1, nullptr, errCode); imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; auto memObj = imageDesc.mem_object; imageDesc.mem_object = buffer; std::unique_ptr imageFromBuffer(createImage()); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(imageFromBuffer.get()->isImageFromBuffer()); EXPECT_TRUE(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY == imageFromBuffer.get()->getGraphicsAllocation()->getAllocationType()); buffer->release(); imageDesc.mem_object = memObj; } HWTEST_F(Image2dFromBufferTest, givenBufferWhenImageFromBufferThenIsImageFromBufferSetAndAllocationTypeIsBufferNullptr) { class MockHwHelperHw : public HwHelperHw { public: bool checkResourceCompatibility(GraphicsAllocation &graphicsAllocation) override { return false; } }; auto raiiFactory = RAIIHwHelperFactory(context.getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily); cl_int errCode = CL_SUCCESS; auto buffer = Buffer::create(&context, 0, 1, nullptr, errCode); imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; auto memObj = imageDesc.mem_object; imageDesc.mem_object = buffer; Image *imageFromBuffer = createImage(); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); EXPECT_EQ(imageFromBuffer, nullptr); buffer->release(); imageDesc.mem_object = memObj; } compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/image2d_tests.cpp000066400000000000000000000072531363734646600265070ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" using namespace NEO; static const unsigned int testImageDimensions = 32; class CreateImage2DTest : public DeviceFixture, public testing::TestWithParam { public: CreateImage2DTest() { } protected: void SetUp() override { DeviceFixture::SetUp(); types = GetParam(); // clang-format off imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_type = types; imageDesc.image_width = testImageDimensions; imageDesc.image_height = testImageDimensions; imageDesc.image_depth = 0; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // clang-format on if (types == CL_MEM_OBJECT_IMAGE2D_ARRAY) { imageDesc.image_array_size = 10; } context = new MockContext(pClDevice); } void TearDown() override { delete context; DeviceFixture::TearDown(); } Image *createImageWithFlags(cl_mem_flags flags) { auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); return Image::create(context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal); } cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal = CL_SUCCESS; MockContext *context; cl_mem_object_type types = 0; }; typedef CreateImage2DTest CreateImage2DType; HWTEST_P(CreateImage2DType, validTypes) { auto image = createImageWithFlags(CL_MEM_READ_WRITE); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, image); auto imgDesc = image->getImageDesc(); EXPECT_NE(0u, imgDesc.image_width); EXPECT_NE(0u, imgDesc.image_height); EXPECT_EQ(0u, imgDesc.image_depth); EXPECT_NE(0u, imgDesc.image_row_pitch); EXPECT_GE(imgDesc.image_slice_pitch, imgDesc.image_row_pitch); if (types == CL_MEM_OBJECT_IMAGE2D) { EXPECT_EQ(0u, imgDesc.image_array_size); } else if (types == CL_MEM_OBJECT_IMAGE2D_ARRAY) { EXPECT_NE(0u, imgDesc.image_array_size); } else { ASSERT_TRUE(false); } EXPECT_EQ(image->getCubeFaceIndex(), static_cast(__GMM_NO_CUBE_MAP)); EXPECT_EQ(!UnitTestHelper::tiledImagesSupported, image->isMemObjZeroCopy()); typedef typename FamilyType::RENDER_SURFACE_STATE SURFACE_STATE; auto imageHw = static_cast *>(image); EXPECT_EQ(SURFACE_STATE::SURFACE_TYPE_SURFTYPE_2D, imageHw->surfaceType); SurfaceOffsets surfaceOffsets; image->getSurfaceOffsets(surfaceOffsets); EXPECT_EQ(0u, surfaceOffsets.offset); EXPECT_EQ(0u, surfaceOffsets.xOffset); EXPECT_EQ(0u, surfaceOffsets.yOffset); EXPECT_EQ(0u, surfaceOffsets.yOffsetForUVplane); delete image; } static cl_mem_object_type Image2DTypes[] = { CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY}; INSTANTIATE_TEST_CASE_P( CreateImage2DTest_Create, CreateImage2DType, testing::ValuesIn(Image2DTypes)); compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/image3d_tests.cpp000066400000000000000000000114101363734646600264760ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_gmm.h" #include "test.h" using namespace NEO; static const unsigned int testImageDimensions = 31; class CreateImage3DTest : public DeviceFixture, public testing::TestWithParam { public: CreateImage3DTest() {} protected: void SetUp() override { DeviceFixture::SetUp(); context = new MockContext(pClDevice); // clang-format off imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_type = CL_MEM_OBJECT_IMAGE3D; imageDesc.image_width = testImageDimensions; imageDesc.image_height = testImageDimensions; imageDesc.image_depth = testImageDimensions; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // clang-format on } void TearDown() override { delete context; DeviceFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal = CL_SUCCESS; MockContext *context; cl_mem_object_type types = 0; }; HWTEST_F(CreateImage3DTest, validTypes) { cl_mem_flags flags = CL_MEM_READ_WRITE; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); auto image = Image::create(context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, image); auto imgDesc = image->getImageDesc(); EXPECT_NE(0u, imgDesc.image_width); EXPECT_NE(0u, imgDesc.image_height); EXPECT_NE(0u, imgDesc.image_depth); EXPECT_NE(0u, imgDesc.image_slice_pitch); EXPECT_EQ(0u, imgDesc.image_array_size); EXPECT_NE(0u, imgDesc.image_row_pitch); EXPECT_EQ(image->getCubeFaceIndex(), static_cast(__GMM_NO_CUBE_MAP)); EXPECT_EQ(!UnitTestHelper::tiledImagesSupported, image->isMemObjZeroCopy()); typedef typename FamilyType::RENDER_SURFACE_STATE SURFACE_STATE; auto imageHw = static_cast *>(image); EXPECT_EQ(SURFACE_STATE::SURFACE_TYPE_SURFTYPE_3D, imageHw->surfaceType); delete image; } HWTEST_F(CreateImage3DTest, calculate3dImageQpitchTiledAndLinear) { bool defaultTiling = DebugManager.flags.ForceLinearImages.get(); imageDesc.image_height = 1; auto surfaceFormat = Image::getSurfaceFormatFromTable(0, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); auto imgInfo = MockGmm::initImgInfo(imageDesc, 0, surfaceFormat); MockGmm::queryImgParams(context->getDevice(0)->getGmmClientContext(), imgInfo); auto image = Image::create( context, {}, 0, 0, surfaceFormat, &imageDesc, nullptr, retVal); ASSERT_NE(nullptr, image); EXPECT_EQ(image->getSize(), imgInfo.size); EXPECT_EQ(image->getImageDesc().image_slice_pitch, imgInfo.slicePitch); EXPECT_GE(image->getImageDesc().image_slice_pitch, image->getImageDesc().image_row_pitch); EXPECT_EQ(image->getImageDesc().image_row_pitch, imgInfo.rowPitch); EXPECT_EQ(image->getQPitch(), imgInfo.qPitch); delete image; DebugManager.flags.ForceLinearImages.set(!defaultTiling); // query again surfaceFormat = Image::getSurfaceFormatFromTable(0, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); MockGmm::queryImgParams(context->getDevice(0)->getGmmClientContext(), imgInfo); image = Image::create( context, {}, 0, 0, surfaceFormat, &imageDesc, nullptr, retVal); ASSERT_NE(nullptr, image); EXPECT_EQ(image->getSize(), imgInfo.size); EXPECT_EQ(image->getImageDesc().image_slice_pitch, imgInfo.slicePitch); EXPECT_EQ(image->getImageDesc().image_row_pitch, imgInfo.rowPitch); EXPECT_GE(image->getImageDesc().image_slice_pitch, image->getImageDesc().image_row_pitch); EXPECT_EQ(image->getQPitch(), imgInfo.qPitch); delete image; DebugManager.flags.ForceLinearImages.set(defaultTiling); } compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/image_array_size_tests.cpp000066400000000000000000000211141363734646600305010ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" using namespace NEO; static const unsigned int testImageDimensions = 17; class ImageArraySizeTest : public DeviceFixture, public testing::TestWithParam { public: ImageArraySizeTest() { } protected: void SetUp() override { DeviceFixture::SetUp(); types = GetParam(); // clang-format off imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_type = types; imageDesc.image_width = testImageDimensions; imageDesc.image_height = testImageDimensions; imageDesc.image_depth = 0; imageDesc.image_array_size = 10; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // clang-format on context = new MockContext(pClDevice); if (types == CL_MEM_OBJECT_IMAGE1D_BUFFER) { imageDesc.mem_object = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR, testImageDimensions, nullptr, nullptr); } } void TearDown() override { if (types == CL_MEM_OBJECT_IMAGE1D_BUFFER) { clReleaseMemObject(imageDesc.mem_object); } delete context; DeviceFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal = CL_SUCCESS; MockContext *context; cl_mem_object_type types = 0; }; typedef ImageArraySizeTest CreateImageArraySize; HWTEST_P(CreateImageArraySize, arrayTypes) { cl_mem_flags flags = CL_MEM_READ_WRITE; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); auto image = Image::create( context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, image); if (types == CL_MEM_OBJECT_IMAGE1D_ARRAY) { EXPECT_TRUE(image->isMemObjZeroCopy()); auto address = image->getCpuAddress(); EXPECT_NE(nullptr, address); } else if (types == CL_MEM_OBJECT_IMAGE2D_ARRAY) { EXPECT_EQ(!UnitTestHelper::tiledImagesSupported, image->isMemObjZeroCopy()); } ASSERT_EQ(10u, image->getImageDesc().image_array_size); delete image; } static cl_mem_object_type ArrayImageTypes[] = { CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY}; INSTANTIATE_TEST_CASE_P( ImageArraySizeTest_Create, CreateImageArraySize, testing::ValuesIn(ArrayImageTypes)); typedef ImageArraySizeTest CreateImageNonArraySize; HWTEST_P(CreateImageNonArraySize, NonArrayTypes) { cl_mem_flags flags = CL_MEM_READ_WRITE; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); auto image = Image::create( context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, image); if (types == CL_MEM_OBJECT_IMAGE2D || types == CL_MEM_OBJECT_IMAGE3D) { EXPECT_EQ(!UnitTestHelper::tiledImagesSupported, image->isMemObjZeroCopy()); } else { EXPECT_TRUE(image->isMemObjZeroCopy()); auto address = image->getCpuAddress(); EXPECT_NE(nullptr, address); } ASSERT_EQ(0u, image->getImageDesc().image_array_size); delete image; } static cl_mem_object_type NonArrayImageTypes[] = { CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE3D}; INSTANTIATE_TEST_CASE_P( ImageArraySizeTest_Create, CreateImageNonArraySize, testing::ValuesIn(NonArrayImageTypes)); typedef ImageArraySizeTest CreateImageSize; HWTEST_P(CreateImageSize, GivenImageTypeAndRegionWhenAskedForHostPtrSizeThenProperSizeIsBeingReturned) { size_t region[3] = {100, 200, 300}; auto rowPitch = 1000; auto slicePitch = 4000; auto pixelSize = 4; auto imageType = GetParam(); auto size = Image::calculateHostPtrSize(region, rowPitch, slicePitch, pixelSize, imageType); if ((imageType == CL_MEM_OBJECT_IMAGE1D) || (imageType == CL_MEM_OBJECT_IMAGE1D_BUFFER)) { EXPECT_EQ(region[0] * pixelSize, size); } else if (imageType == CL_MEM_OBJECT_IMAGE2D) { EXPECT_EQ((region[1] - 1) * rowPitch + region[0] * pixelSize, size); } else if (imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY) { EXPECT_EQ((region[1] - 1) * slicePitch + region[0] * pixelSize, size); } else if ((imageType == CL_MEM_OBJECT_IMAGE3D) || (imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY)) { EXPECT_EQ((region[2] - 1) * slicePitch + (region[1] - 1) * rowPitch + region[0] * pixelSize, size); } else { EXPECT_EQ(0u, size); } } typedef ImageArraySizeTest CreateImageOffset; HWTEST_P(CreateImageOffset, GivenImageTypeAndRegionWhenAskedForHostPtrOffsetThenProperOffsetIsBeingReturned) { size_t region[3] = {100, 1, 1}; size_t origin[3] = {0, 0, 0}; auto rowPitch = 1000; auto slicePitch = 0; auto pixelSize = 4; size_t imageOffset; auto imageType = GetParam(); switch (imageType) { case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE1D_BUFFER: Image::calculateHostPtrOffset(&imageOffset, origin, region, rowPitch, slicePitch, imageType, pixelSize); EXPECT_EQ(origin[0] * pixelSize, imageOffset); break; case CL_MEM_OBJECT_IMAGE2D: region[1] = 200; Image::calculateHostPtrOffset(&imageOffset, origin, region, rowPitch, slicePitch, imageType, pixelSize); EXPECT_EQ(origin[1] * rowPitch + origin[0] * pixelSize, imageOffset); break; case CL_MEM_OBJECT_IMAGE1D_ARRAY: slicePitch = 4000; Image::calculateHostPtrOffset(&imageOffset, origin, region, rowPitch, slicePitch, imageType, pixelSize); EXPECT_EQ(origin[1] * slicePitch + origin[0] * pixelSize, imageOffset); break; case CL_MEM_OBJECT_IMAGE3D: case CL_MEM_OBJECT_IMAGE2D_ARRAY: region[2] = 300; Image::calculateHostPtrOffset(&imageOffset, origin, region, rowPitch, slicePitch, imageType, pixelSize); EXPECT_EQ(origin[2] * slicePitch + origin[1] * rowPitch + origin[0] * pixelSize, imageOffset); break; case CL_MEM_OBJECT_BUFFER: Image::calculateHostPtrOffset(&imageOffset, origin, region, rowPitch, slicePitch, imageType, pixelSize); EXPECT_EQ(0u, imageOffset); break; } } typedef ImageArraySizeTest CheckImageType; TEST_P(CheckImageType, GivenImageTypeWhenImageTypeIsCheckedThenProperValueIsReturned) { auto imageType = GetParam(); switch (imageType) { case CL_MEM_OBJECT_IMAGE2D: EXPECT_TRUE(Image::isImage2d(imageType)); EXPECT_TRUE(Image::isImage2dOr2dArray(imageType)); break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: EXPECT_FALSE(Image::isImage2d(imageType)); EXPECT_TRUE(Image::isImage2dOr2dArray(imageType)); break; default: EXPECT_FALSE(Image::isImage2d(imageType)); EXPECT_FALSE(Image::isImage2dOr2dArray(imageType)); break; } } static cl_mem_object_type AllImageTypes[] = { 0, //negative scenario CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE3D, CL_MEM_OBJECT_IMAGE2D_ARRAY}; INSTANTIATE_TEST_CASE_P( ImageArraySizeTest_Create, CreateImageSize, testing::ValuesIn(AllImageTypes)); static cl_mem_object_type AllImageTypesWithBadOne[] = { 0, //negative scenario CL_MEM_OBJECT_BUFFER, CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE3D, CL_MEM_OBJECT_IMAGE2D_ARRAY}; INSTANTIATE_TEST_CASE_P( ImageArraySizeTest_Create, CreateImageOffset, testing::ValuesIn(AllImageTypesWithBadOne)); compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/image_compression_fixture.h000066400000000000000000000033221363734646600306640ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "test.h" using namespace NEO; class ImageCompressionTests : public ::testing::Test { public: class MyMemoryManager : public MockMemoryManager { public: using MockMemoryManager::MockMemoryManager; GraphicsAllocation *allocateGraphicsMemoryForImage(const AllocationData &allocationData) override { mockMethodCalled = true; capturedImgInfo = *allocationData.imgInfo; return OsAgnosticMemoryManager::allocateGraphicsMemoryForImage(allocationData); } ImageInfo capturedImgInfo = {}; bool mockMethodCalled = false; }; void SetUp() override { mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); myMemoryManager = new MyMemoryManager(*mockDevice->getExecutionEnvironment()); mockDevice->injectMemoryManager(myMemoryManager); mockContext = make_releaseable(mockDevice.get()); } std::unique_ptr mockDevice; ReleaseableObjectPtr mockContext; MyMemoryManager *myMemoryManager = nullptr; cl_image_desc imageDesc = {}; cl_image_format imageFormat{CL_R, CL_UNSIGNED_INT8}; cl_mem_flags flags = CL_MEM_READ_WRITE; cl_int retVal = CL_SUCCESS; }; compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/image_format_tests.cpp000066400000000000000000000042641363734646600276300ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/mem_obj/image.h" #include "gtest/gtest.h" using namespace NEO; struct MockImage : public Image { using Image::hasAlphaChannel; }; typedef ::testing::TestWithParam> HasAlphaChannelTest; TEST_P(HasAlphaChannelTest, imageFormatHasAlphaChannel) { cl_image_format imageFormat; bool expectedValue; std::tie(imageFormat.image_channel_order, expectedValue) = GetParam(); EXPECT_EQ(expectedValue, MockImage::hasAlphaChannel(&imageFormat)); } std::tuple paramsForAlphaChannelTests[] = { std::make_tuple(CL_R, false), std::make_tuple(CL_A, true), std::make_tuple(CL_RG, false), std::make_tuple(CL_RA, true), std::make_tuple(CL_RGB, false), std::make_tuple(CL_RGBA, true), std::make_tuple(CL_BGRA, true), std::make_tuple(CL_ARGB, true), std::make_tuple(CL_INTENSITY, true), std::make_tuple(CL_LUMINANCE, false), std::make_tuple(CL_Rx, true), std::make_tuple(CL_RGx, true), std::make_tuple(CL_RGBx, true), std::make_tuple(CL_DEPTH, false), std::make_tuple(CL_DEPTH_STENCIL, false), std::make_tuple(CL_sRGB, false), std::make_tuple(CL_sRGBx, true), std::make_tuple(CL_sRGBA, true), std::make_tuple(CL_sBGRA, true), std::make_tuple(CL_ABGR, true), std::make_tuple(CL_NV12_INTEL, false), std::make_tuple(CL_YUYV_INTEL, false), std::make_tuple(CL_UYVY_INTEL, false), std::make_tuple(CL_YVYU_INTEL, false), std::make_tuple(CL_VYUY_INTEL, false)}; INSTANTIATE_TEST_CASE_P( ImageFormatTests, HasAlphaChannelTest, ::testing::ValuesIn(paramsForAlphaChannelTests)); compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/image_from_subbuffer_tests.cpp000066400000000000000000000100101363734646600313300ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" #include using namespace NEO; // Tests for cl_khr_image2d_from_buffer class ImageFromSubBufferTest : public DeviceFixture, public ::testing::Test { public: ImageFromSubBufferTest() {} protected: void SetUp() override { imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; imageDesc.image_array_size = 0; imageDesc.image_depth = 0; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_height = 128 / 2; imageDesc.image_width = 256 / 2; imageDesc.num_mip_levels = 0; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_samples = 0; size = 128 * 256 * 4; hostPtr = alignedMalloc(size, 16); ASSERT_NE(nullptr, hostPtr); parentBuffer = clCreateBuffer(&context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, size, hostPtr, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); const cl_buffer_region region = {size / 2, size / 2}; subBuffer = clCreateSubBuffer(parentBuffer, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, reinterpret_cast(®ion), &retVal); ASSERT_EQ(CL_SUCCESS, retVal); imageDesc.mem_object = subBuffer; ASSERT_NE(nullptr, imageDesc.mem_object); } void TearDown() override { clReleaseMemObject(subBuffer); clReleaseMemObject(parentBuffer); alignedFree(hostPtr); } Image *createImage() { cl_mem_flags flags = CL_MEM_READ_ONLY; auto surfaceFormat = (ClSurfaceFormatInfo *)Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); return Image::create(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, NULL, retVal); } cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal = CL_SUCCESS; MockContext context; void *hostPtr; size_t size; cl_mem parentBuffer; cl_mem subBuffer; }; TEST_F(ImageFromSubBufferTest, CreateImage2dFromSubBufferWithOffset) { std::unique_ptr imageFromSubBuffer(createImage()); EXPECT_NE(nullptr, imageFromSubBuffer); SurfaceOffsets surfaceOffsets = {0}; imageFromSubBuffer->getSurfaceOffsets(surfaceOffsets); uint32_t offsetExpected = static_cast(size) / 2; EXPECT_EQ(offsetExpected, surfaceOffsets.offset); EXPECT_EQ(0u, surfaceOffsets.xOffset); EXPECT_EQ(0u, surfaceOffsets.yOffset); EXPECT_EQ(0u, surfaceOffsets.yOffsetForUVplane); } TEST_F(ImageFromSubBufferTest, givenSubbufferWithOffsetGreaterThan4GBWhenImageIsCreatedThenSurfaceOffsetsOffsetHasCorrectValue) { Buffer *buffer = castToObject(parentBuffer); uint64_t offsetExpected = 0; cl_buffer_region region = {0, size / 2}; if (is64bit) { offsetExpected = 8 * GB; region = {static_cast(offsetExpected), size / 2}; } Buffer *subBufferWithBigOffset = buffer->createSubBuffer(CL_MEM_READ_WRITE, 0, ®ion, retVal); imageDesc.mem_object = subBufferWithBigOffset; std::unique_ptr imageFromSubBuffer(createImage()); EXPECT_NE(nullptr, imageFromSubBuffer); SurfaceOffsets surfaceOffsets = {0}; imageFromSubBuffer->getSurfaceOffsets(surfaceOffsets); EXPECT_EQ(offsetExpected, surfaceOffsets.offset); EXPECT_EQ(0u, surfaceOffsets.xOffset); EXPECT_EQ(0u, surfaceOffsets.yOffset); EXPECT_EQ(0u, surfaceOffsets.yOffsetForUVplane); subBufferWithBigOffset->release(); } compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/image_redescribe_tests.cpp000066400000000000000000000223011363734646600304370ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" #include "igfxfmid.h" extern GFXCORE_FAMILY renderCoreFamily; using namespace NEO; class ImageRedescribeTest : public testing::TestWithParam> { protected: void SetUp() override { cl_image_format imageFormat; cl_image_desc imageDesc; std::tie(indexImageFormat, ImageType) = this->GetParam(); ArrayRef readWriteSurfaceFormats = SurfaceFormats::readWrite(); auto &surfaceFormatInfo = readWriteSurfaceFormats[indexImageFormat]; imageFormat = surfaceFormatInfo.OCLImageFormat; auto imageHeight = ImageType == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 0 : 32; auto imageArrays = ImageType == CL_MEM_OBJECT_IMAGE1D_ARRAY || ImageType == CL_MEM_OBJECT_IMAGE2D_ARRAY ? 7 : 1; imageDesc.image_type = ImageType; imageDesc.image_width = 32; imageDesc.image_height = imageHeight; imageDesc.image_depth = 1; imageDesc.image_array_size = imageArrays; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; retVal = CL_INVALID_VALUE; cl_mem_flags flags = CL_MEM_READ_WRITE; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); image.reset(Image::create( &context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, image); } cl_int retVal = CL_SUCCESS; MockContext context; std::unique_ptr image; size_t indexImageFormat = 0; uint32_t ImageType; }; TEST_P(ImageRedescribeTest, givenImageWhenItIsRedescribedThenItContainsProperFormatFlagsAddressAndSameElementSizeInBytes) { std::unique_ptr imageNew(image->redescribe()); ASSERT_NE(nullptr, imageNew); ASSERT_NE(image, imageNew); EXPECT_EQ(static_cast(CL_MEM_USE_HOST_PTR), imageNew->getMemoryPropertiesFlags() & CL_MEM_USE_HOST_PTR); EXPECT_EQ(image->getCpuAddress(), imageNew->getCpuAddress()); EXPECT_NE(static_cast(CL_FLOAT), imageNew->getSurfaceFormatInfo().OCLImageFormat.image_channel_data_type); EXPECT_NE(static_cast(CL_HALF_FLOAT), imageNew->getSurfaceFormatInfo().OCLImageFormat.image_channel_data_type); EXPECT_EQ(imageNew->getSurfaceFormatInfo().surfaceFormat.NumChannels * imageNew->getSurfaceFormatInfo().surfaceFormat.PerChannelSizeInBytes, imageNew->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes); EXPECT_EQ(image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes, imageNew->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes); } TEST_P(ImageRedescribeTest, givenImageWhenItIsRedescribedThenNewImageFormatHasNumberOfChannelsDependingOnBytesPerPixel) { std::unique_ptr imageNew(image->redescribe()); ASSERT_NE(nullptr, imageNew); size_t bytesPerPixel = image->getSurfaceFormatInfo().surfaceFormat.NumChannels * image->getSurfaceFormatInfo().surfaceFormat.PerChannelSizeInBytes; size_t channelsExpected = 0; switch (bytesPerPixel) { case 1: case 2: case 4: channelsExpected = 1; break; case 8: channelsExpected = 2; break; case 16: channelsExpected = 4; break; } EXPECT_EQ(channelsExpected, imageNew->getSurfaceFormatInfo().surfaceFormat.NumChannels); } TEST_P(ImageRedescribeTest, givenImageWhenItIsRedescribedThenNewImageDimensionsAreMatchingTheRedescribedImage) { std::unique_ptr imageNew(image->redescribe()); ASSERT_NE(nullptr, imageNew); auto bytesWide = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes * image->getImageDesc().image_width; auto bytesWideNew = imageNew->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes * imageNew->getImageDesc().image_width; EXPECT_EQ(bytesWide, bytesWideNew); EXPECT_EQ(imageNew->getImageDesc().image_height, image->getImageDesc().image_height); EXPECT_EQ(imageNew->getImageDesc().image_array_size, image->getImageDesc().image_array_size); EXPECT_EQ(imageNew->getImageDesc().image_depth, image->getImageDesc().image_depth); EXPECT_EQ(imageNew->getImageDesc().image_type, image->getImageDesc().image_type); EXPECT_EQ(imageNew->getQPitch(), image->getQPitch()); EXPECT_EQ(imageNew->getImageDesc().image_width, image->getImageDesc().image_width); } TEST_P(ImageRedescribeTest, givenImageWhenItIsRedescribedThenCubeFaceIndexIsProperlySet) { std::unique_ptr imageNew(image->redescribe()); ASSERT_NE(nullptr, imageNew); ASSERT_EQ(imageNew->getCubeFaceIndex(), __GMM_NO_CUBE_MAP); for (uint32_t n = __GMM_CUBE_FACE_POS_X; n < __GMM_MAX_CUBE_FACE; n++) { image->setCubeFaceIndex(n); imageNew.reset(image->redescribe()); ASSERT_NE(nullptr, imageNew); ASSERT_EQ(imageNew->getCubeFaceIndex(), n); imageNew.reset(image->redescribeFillImage()); ASSERT_NE(nullptr, imageNew); ASSERT_EQ(imageNew->getCubeFaceIndex(), n); } } TEST_P(ImageRedescribeTest, givenImageWithMaxSizesWhenItIsRedescribedThenNewImageDoesNotExceedMaxSizes) { cl_image_format imageFormat; cl_image_desc imageDesc; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); const auto &caps = device->getDeviceInfo(); const auto &sharedCaps = device->getSharedDeviceInfo(); auto memoryManager = (OsAgnosticMemoryManager *)context.getMemoryManager(); memoryManager->turnOnFakingBigAllocations(); ArrayRef readWriteSurfaceFormats = SurfaceFormats::readWrite(); auto &surfaceFormatInfo = readWriteSurfaceFormats[indexImageFormat]; imageFormat = surfaceFormatInfo.OCLImageFormat; auto imageWidth = 1; auto imageHeight = 1; auto imageArrays = ImageType == CL_MEM_OBJECT_IMAGE1D_ARRAY || ImageType == CL_MEM_OBJECT_IMAGE2D_ARRAY ? 7 : 1; size_t maxImageWidth = 0; size_t maxImageHeight = 0; switch (ImageType) { case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE1D_ARRAY: imageWidth = 16384; maxImageWidth = static_cast(sharedCaps.maxMemAllocSize); maxImageHeight = 1; break; case CL_MEM_OBJECT_IMAGE2D: case CL_MEM_OBJECT_IMAGE2D_ARRAY: imageHeight = 16384; maxImageWidth = sharedCaps.image2DMaxWidth; maxImageHeight = sharedCaps.image2DMaxHeight; break; case CL_MEM_OBJECT_IMAGE3D: imageHeight = 16384; maxImageWidth = caps.image3DMaxWidth; maxImageHeight = caps.image3DMaxHeight; break; } imageDesc.image_type = ImageType; imageDesc.image_width = imageWidth; imageDesc.image_height = imageHeight; imageDesc.image_depth = 1; imageDesc.image_array_size = imageArrays; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; cl_mem_flags flags = CL_MEM_READ_WRITE; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); auto bigImage = std::unique_ptr(Image::create(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); std::unique_ptr imageNew(bigImage->redescribe()); ASSERT_NE(nullptr, imageNew); EXPECT_GE(maxImageWidth, imageNew->getImageDesc().image_width); EXPECT_GE(maxImageHeight, imageNew->getImageDesc().image_height); } static uint32_t ImageTypes[] = { CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY}; decltype(SurfaceFormats::readWrite().size()) readWriteSurfaceFormatsStart = 0u; INSTANTIATE_TEST_CASE_P( Redescribe, ImageRedescribeTest, testing::Combine( ::testing::Range(readWriteSurfaceFormatsStart, SurfaceFormats::readWrite().size()), ::testing::ValuesIn(ImageTypes))); compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/image_release_mapped_ptr_tests.cpp000066400000000000000000000105361363734646600321720ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/event/user_event.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "test.h" using namespace NEO; template class MyMockCommandQueue : public CommandQueueHw { public: MyMockCommandQueue(Context *context, ClDevice *device) : CommandQueueHw(context, device, nullptr, false){}; cl_int enqueueWriteImage(Image *dstImage, cl_bool blockingWrite, const size_t *origin, const size_t *region, size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { passedBlockingWrite = blockingWrite; passedPtr = (void *)ptr; enqueueWriteImageCalled++; return CL_SUCCESS; } cl_int finish() override { finishCalled++; return CL_SUCCESS; } void *passedPtr = nullptr; cl_bool passedBlockingWrite = CL_INVALID_VALUE; unsigned int enqueueWriteImageCalled = 0; unsigned int finishCalled = 0; }; class ImageUnmapTest : public ::testing::Test { public: void SetUp() override { device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context = std::make_unique(device.get()); image.reset(ImageHelper>::create(context.get())); } std::unique_ptr device; std::unique_ptr context; std::unique_ptr image; }; HWTEST_F(ImageUnmapTest, givenImageWhenUnmapMemObjIsCalledThenEnqueueNonBlockingMapImage) { std::unique_ptr> commandQueue(new MyMockCommandQueue(context.get(), device.get())); void *ptr = alignedMalloc(MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); MemObjOffsetArray origin = {{0, 0, 0}}; MemObjSizeArray region = {{1, 1, 1}}; image->setAllocatedMapPtr(ptr); cl_map_flags mapFlags = CL_MAP_WRITE; image->addMappedPtr(ptr, 1, mapFlags, region, origin, 0); AllocationProperties properties{0, false, MemoryConstants::cacheLineSize, GraphicsAllocation::AllocationType::MAP_ALLOCATION, false}; auto allocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties, ptr); image->setMapAllocation(allocation); commandQueue->enqueueUnmapMemObject(image.get(), ptr, 0, nullptr, nullptr); if (UnitTestHelper::tiledImagesSupported) { EXPECT_EQ(ptr, commandQueue->passedPtr); EXPECT_EQ((cl_bool)CL_FALSE, commandQueue->passedBlockingWrite); EXPECT_EQ(1u, commandQueue->enqueueWriteImageCalled); } else { EXPECT_EQ(0u, commandQueue->enqueueWriteImageCalled); } } HWTEST_F(ImageUnmapTest, givenImageWhenEnqueueMapImageIsCalledTwiceThenAllocatedMemoryPtrIsNotOverridden) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } cl_int retVal; size_t origin[] = {0, 0, 0}; size_t region[] = {1, 1, 1}; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); std::unique_ptr commandQueue(CommandQueue::create(context.get(), device.get(), nullptr, false, retVal)); commandQueue->enqueueMapImage(image.get(), CL_FALSE, 0, origin, region, nullptr, nullptr, 0, nullptr, nullptr, retVal); EXPECT_NE(nullptr, image->getAllocatedMapPtr()); void *ptr = image->getAllocatedMapPtr(); EXPECT_EQ(alignUp(ptr, MemoryConstants::pageSize), ptr); commandQueue->enqueueMapImage(image.get(), CL_FALSE, 0, origin, region, nullptr, nullptr, 0, nullptr, nullptr, retVal); EXPECT_EQ(ptr, image->getAllocatedMapPtr()); commandQueue->enqueueUnmapMemObject(image.get(), ptr, 0, nullptr, nullptr); } compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/image_set_arg_tests.cpp000066400000000000000000001473671363734646600300000ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/surface.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_gmm.h" #include "opencl/test/unit_test/mocks/mock_gmm_resource_info.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" #include "gmock/gmock.h" using namespace NEO; using namespace ::testing; class ImageSetArgTest : public DeviceFixture, public testing::Test { public: ImageSetArgTest() { memset(&kernelHeader, 0, sizeof(kernelHeader)); } protected: template void SetupChannels(int imgChannelOrder) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; expectedChannelRed = RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED; expectedChannelGreen = RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN; expectedChannelBlue = RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE; if (imgChannelOrder == CL_A) { expectedChannelRed = RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO; expectedChannelGreen = RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO; expectedChannelBlue = RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO; } else if (imgChannelOrder == CL_RA || imgChannelOrder == CL_R || imgChannelOrder == CL_Rx) { expectedChannelGreen = RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO; expectedChannelBlue = RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO; } else if (imgChannelOrder == CL_RG || imgChannelOrder == CL_RGx) { expectedChannelBlue = RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO; } } void SetUp() override { DeviceFixture::SetUp(); pKernelInfo = std::make_unique(); // define kernel info kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; pKernelInfo->usesSsh = true; // setup kernel arg offsets pKernelInfo->kernelArgInfo.resize(2); pKernelInfo->kernelArgInfo[1].offsetHeap = 0x00; pKernelInfo->kernelArgInfo[0].offsetHeap = 0x40; pKernelInfo->kernelArgInfo[1].isImage = true; pKernelInfo->kernelArgInfo[0].isImage = true; program = std::make_unique(*pDevice->getExecutionEnvironment()); pKernel = new MockKernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_NE(nullptr, pKernel); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setKernelArgHandler(0, &Kernel::setArgImage); pKernel->setKernelArgHandler(1, &Kernel::setArgImage); context = new MockContext(pClDevice); srcImage = Image3dHelper<>::create(context); ASSERT_NE(nullptr, srcImage); expectedChannelRed = 0; expectedChannelGreen = 0; expectedChannelBlue = 0; } void TearDown() override { delete srcImage; delete pKernel; delete context; DeviceFixture::TearDown(); } cl_int retVal = CL_SUCCESS; MockContext *context; std::unique_ptr program; MockKernel *pKernel = nullptr; SKernelBinaryHeaderCommon kernelHeader; std::unique_ptr pKernelInfo; char surfaceStateHeap[0x80]; Image *srcImage = nullptr; int expectedChannelRed; int expectedChannelGreen; int expectedChannelBlue; }; HWTEST_F(ImageSetArgTest, setKernelArgImage) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); srcImage->setImageArg(const_cast(surfaceState), false, 0); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(srcImage->getGraphicsAllocation()->getGpuAddress(), surfaceAddress); std::vector surfaces; pKernel->getResidency(surfaces); EXPECT_EQ(0u, surfaces.size()); } HWTEST_F(ImageSetArgTest, setKernelArgImageUsingMediaBlockImage) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState; srcImage->setImageArg(&surfaceState, true, 0); auto computedWidth = surfaceState.getWidth(); auto expectedWidth = (srcImage->getImageDesc().image_width * srcImage->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes) / sizeof(uint32_t); EXPECT_EQ(expectedWidth, computedWidth); } HWTEST_F(ImageSetArgTest, setKernelArgImageUsingNormalImage) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState; srcImage->setImageArg(&surfaceState, true, 0); auto computedWidth = surfaceState.getWidth(); EXPECT_EQ(srcImage->getImageDesc().image_width, computedWidth); EXPECT_EQ(0u, surfaceState.getMipCountLod()); } HWTEST_F(ImageSetArgTest, givenImageWhenSettingMipTailStartLodThenProgramValueFromGmmResourceinfo) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; if (pDevice->getHardwareInfo().platform.eRenderCoreFamily == IGFX_GEN8_CORE) { GTEST_SKIP(); } RENDER_SURFACE_STATE surfaceState = {}; const uint32_t mipTailStartLod = 4; auto gmm = srcImage->getGraphicsAllocation()->getDefaultGmm(); EXPECT_NE(nullptr, gmm); auto mockGmmResourceInfo = static_cast(gmm->gmmResourceInfo.get()); mockGmmResourceInfo->setMipTailStartLod(mipTailStartLod); srcImage->setImageArg(&surfaceState, false, 0); EXPECT_EQ(mipTailStartLod, surfaceState.getMipTailStartLod()); // default value delete gmm; srcImage->getGraphicsAllocation()->setDefaultGmm(nullptr); srcImage->setImageArg(&surfaceState, false, 0); EXPECT_EQ(0u, surfaceState.getMipTailStartLod()); } HWTEST_F(ImageSetArgTest, givenCubeMapIndexWhenSetKernelArgImageIsCalledThenModifySurfaceState) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; uint32_t cubeFaceIndex = 2; Image *src2dImage = Image2dHelper<>::create(context); src2dImage->setCubeFaceIndex(cubeFaceIndex); auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); src2dImage->setImageArg(const_cast(surfaceState), false, 0); auto renderTargetViewExtent = surfaceState->getRenderTargetViewExtent(); auto minimumArrayElement = surfaceState->getMinimumArrayElement(); auto isImageArray = surfaceState->getSurfaceArray(); auto depth = surfaceState->getDepth(); EXPECT_EQ(renderTargetViewExtent, 1u); EXPECT_EQ(minimumArrayElement, cubeFaceIndex); EXPECT_EQ(depth, (__GMM_MAX_CUBE_FACE - cubeFaceIndex)); EXPECT_TRUE(isImageArray); delete src2dImage; } struct ImageSetArgSurfaceArrayTest : ImageSetArgTest { template void testSurfaceArrayProgramming(cl_mem_object_type imageType, size_t imageArraySize, bool expectedSurfaceArray) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState; cl_image_desc imageDesc = Image2dDefaults::imageDesc; imageDesc.image_array_size = imageArraySize; imageDesc.image_type = imageType; std::unique_ptr image{Image2dHelper<>::create(context, &imageDesc)}; image->setCubeFaceIndex(__GMM_NO_CUBE_MAP); image->setImageArg(&surfaceState, false, 0); EXPECT_EQ(expectedSurfaceArray, surfaceState.getSurfaceArray()); } }; HWTEST_F(ImageSetArgSurfaceArrayTest, givenImage1DArrayAndImageArraySizeIsZeroWhenCallingSetImageArgThenDoNotProgramSurfaceArray) { testSurfaceArrayProgramming(CL_MEM_OBJECT_IMAGE1D_ARRAY, 0u, false); } HWTEST_F(ImageSetArgSurfaceArrayTest, givenImage2DArrayAndImageArraySizeIsZeroWhenCallingSetImageArgThenDoNotProgramSurfaceArray) { testSurfaceArrayProgramming(CL_MEM_OBJECT_IMAGE2D_ARRAY, 0u, false); } HWTEST_F(ImageSetArgSurfaceArrayTest, givenImage1DArrayAndImageArraySizeIsOneWhenCallingSetImageArgThenDoNotProgramSurfaceArray) { testSurfaceArrayProgramming(CL_MEM_OBJECT_IMAGE1D_ARRAY, 1u, false); } HWTEST_F(ImageSetArgSurfaceArrayTest, givenImage2DArrayAndImageArraySizeIsOneWhenCallingSetImageArgThenDoNotProgramSurfaceArray) { testSurfaceArrayProgramming(CL_MEM_OBJECT_IMAGE2D_ARRAY, 1u, false); } HWTEST_F(ImageSetArgSurfaceArrayTest, givenImage1DArrayAndImageArraySizeIsGreaterThanOneWhenCallingSetImageArgThenProgramSurfaceArray) { testSurfaceArrayProgramming(CL_MEM_OBJECT_IMAGE1D_ARRAY, 2u, true); } HWTEST_F(ImageSetArgSurfaceArrayTest, givenImage2DArrayAndImageArraySizeIsGreaterThanOneWhenCallingSetImageArgThenProgramSurfaceArray) { testSurfaceArrayProgramming(CL_MEM_OBJECT_IMAGE2D_ARRAY, 2u, true); } HWTEST_F(ImageSetArgSurfaceArrayTest, givenNonArrayImageWhenCallingSetImageArgThenDoNotProgramSurfaceArray) { testSurfaceArrayProgramming(CL_MEM_OBJECT_IMAGE1D_BUFFER, 2u, false); } HWTEST_F(ImageSetArgTest, givenImageArraySizeGreaterThanOneButTypeIsNotImageArrayWhenCallingSetImageArgThenDoNotProgramSurfaceArray) { MockContext context; McsSurfaceInfo mcsSurfaceInfo = {}; MockGraphicsAllocation *allocation = new MockGraphicsAllocation(0, 0x1000); ImageInfo imageInfo = {}; ClSurfaceFormatInfo surfaceFormatInfo{}; surfaceFormatInfo.surfaceFormat.GMMSurfaceFormat = GMM_FORMAT_B8G8R8A8_UNORM; imageInfo.surfaceFormat = &surfaceFormatInfo.surfaceFormat; cl_image_desc imageDesc = Image2dDefaults::imageDesc; imageDesc.image_array_size = 3u; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; imageInfo.imgDesc = Image::convertDescriptor(imageDesc); imageInfo.plane = GMM_NO_PLANE; auto gmm = MockGmm::queryImgParams(context.getDevice(0)->getGmmClientContext(), imageInfo); allocation->setDefaultGmm(gmm.release()); auto image = std::unique_ptr{Image::createSharedImage( &context, nullptr, mcsSurfaceInfo, allocation, nullptr, CL_MEM_READ_WRITE, &surfaceFormatInfo, imageInfo, 0, 0, 0)}; image->setCubeFaceIndex(__GMM_NO_CUBE_MAP); typename FamilyType::RENDER_SURFACE_STATE surfaceState{}; image->setImageArg(&surfaceState, false, 0); EXPECT_FALSE(surfaceState.getSurfaceArray()); } HWTEST_F(ImageSetArgTest, givenNonCubeMapIndexWhenSetKernelArgImageIsCalledThenDontModifySurfaceState) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_EQ(srcImage->getCubeFaceIndex(), __GMM_NO_CUBE_MAP); srcImage->setImageArg(const_cast(surfaceState), false, 0); auto renderTargetViewExtent = surfaceState->getRenderTargetViewExtent(); auto minimumArrayElement = surfaceState->getMinimumArrayElement(); auto isImageArray = surfaceState->getSurfaceArray(); auto depth = surfaceState->getDepth(); auto hAlign = static_cast(surfaceState->getSurfaceHorizontalAlignment()); auto vAlign = static_cast(surfaceState->getSurfaceVerticalAlignment()); auto expectedHAlign = static_cast(RENDER_SURFACE_STATE::SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_4); auto expectedVAlign = static_cast(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4); // 3D image EXPECT_EQ(renderTargetViewExtent, srcImage->getImageDesc().image_depth); EXPECT_EQ(minimumArrayElement, 0u); EXPECT_EQ(depth, srcImage->getImageDesc().image_depth); EXPECT_EQ(expectedHAlign, hAlign); EXPECT_EQ(expectedVAlign, vAlign); EXPECT_FALSE(isImageArray); } HWTEST_F(ImageSetArgTest, givenOffsetedBufferWhenSetKernelArgImageIscalledThenFullGPuPointerIsPatched) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); auto graphicsAllocation = srcImage->getGraphicsAllocation(); graphicsAllocation->setGpuBaseAddress(12345u); srcImage->setImageArg(const_cast(surfaceState), false, 0); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(srcImage->getGraphicsAllocation()->getGpuAddress(), surfaceAddress); std::vector surfaces; pKernel->getResidency(surfaces); EXPECT_EQ(0u, surfaces.size()); } HWTEST_F(ImageSetArgTest, clSetKernelArgImage) { auto gmmHelper = pDevice->getGmmHelper(); auto imageMocs = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_IMAGE); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; cl_mem memObj = srcImage; retVal = clSetKernelArg( pKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); size_t rPitch = srcImage->getImageDesc().image_row_pitch; SetupChannels(srcImage->getImageFormat().image_channel_order); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(srcImage->getGraphicsAllocation()->getGpuAddress(), surfaceAddress); EXPECT_EQ(srcImage->getImageDesc().image_width, surfaceState->getWidth()); EXPECT_EQ(srcImage->getImageDesc().image_height, surfaceState->getHeight()); EXPECT_EQ(srcImage->getImageDesc().image_depth, surfaceState->getDepth()); EXPECT_EQ(srcImage->getImageDesc().image_depth, surfaceState->getRenderTargetViewExtent()); EXPECT_EQ(rPitch, surfaceState->getSurfacePitch()); EXPECT_EQ(0u, surfaceState->getSurfaceQpitch() % 4); EXPECT_EQ(srcImage->getSurfaceFormatInfo().surfaceFormat.GenxSurfaceFormat, (GFX3DSTATE_SURFACEFORMAT)surfaceState->getSurfaceFormat()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_3D, surfaceState->getSurfaceType()); EXPECT_EQ(expectedChannelRed, surfaceState->getShaderChannelSelectRed()); EXPECT_EQ(expectedChannelGreen, surfaceState->getShaderChannelSelectGreen()); EXPECT_EQ(expectedChannelBlue, surfaceState->getShaderChannelSelectBlue()); EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, surfaceState->getShaderChannelSelectAlpha()); EXPECT_EQ(imageMocs, surfaceState->getMemoryObjectControlState()); EXPECT_EQ(0u, surfaceState->getCoherencyType()); std::vector surfaces; pKernel->getResidency(surfaces); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { delete surface; } } HWTEST_F(ImageSetArgTest, givenImage2DWithMipMapsWhenSetKernelArgIsCalledThenMipLevelAndMipCountIsSet) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; cl_mem memObj = srcImage; int mipLevel = 2; uint32_t mipCount = 3; srcImage->setBaseMipLevel(mipLevel); srcImage->setMipCount(mipCount); EXPECT_EQ(mipLevel, srcImage->peekBaseMipLevel()); EXPECT_EQ(3u, srcImage->peekMipCount()); retVal = clSetKernelArg( pKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_EQ((uint32_t)mipLevel, surfaceState->getSurfaceMinLod()); EXPECT_EQ((uint32_t)mipCount, surfaceState->getMipCountLod() + 1); } HWTEST_F(ImageSetArgTest, clSetKernelArgImage2Darray) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; Image *image2Darray = Image2dArrayHelper<>::create(context); cl_mem memObj = image2Darray; retVal = clSetKernelArg( pKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); size_t rPitch = srcImage->getImageDesc().image_row_pitch; SetupChannels(image2Darray->getImageFormat().image_channel_order); EXPECT_EQ(image2Darray->getGraphicsAllocation()->getGpuAddress(), surfaceAddress); EXPECT_EQ(image2Darray->getImageDesc().image_width, surfaceState->getWidth()); EXPECT_EQ(image2Darray->getImageDesc().image_height, surfaceState->getHeight()); EXPECT_EQ(image2Darray->getImageDesc().image_array_size, surfaceState->getDepth()); EXPECT_EQ(image2Darray->getImageDesc().image_array_size, surfaceState->getRenderTargetViewExtent()); EXPECT_EQ(rPitch, surfaceState->getSurfacePitch()); EXPECT_EQ(0u, surfaceState->getSurfaceQpitch() % 4); EXPECT_EQ(image2Darray->getSurfaceFormatInfo().surfaceFormat.GenxSurfaceFormat, (GFX3DSTATE_SURFACEFORMAT)surfaceState->getSurfaceFormat()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_2D, surfaceState->getSurfaceType()); EXPECT_TRUE((GFX3DSTATE_SURFACEFORMAT)surfaceState->getSurfaceArray()); EXPECT_EQ(expectedChannelRed, surfaceState->getShaderChannelSelectRed()); EXPECT_EQ(expectedChannelGreen, surfaceState->getShaderChannelSelectGreen()); EXPECT_EQ(expectedChannelBlue, surfaceState->getShaderChannelSelectBlue()); EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, surfaceState->getShaderChannelSelectAlpha()); std::vector surfaces; pKernel->getResidency(surfaces); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { delete surface; } delete image2Darray; } HWTEST_F(ImageSetArgTest, clSetKernelArgImage1Darray) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; Image *image1Darray = Image1dArrayHelper<>::create(context); cl_mem memObj = image1Darray; retVal = clSetKernelArg( pKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); SetupChannels(image1Darray->getImageFormat().image_channel_order); EXPECT_EQ(image1Darray->getGraphicsAllocation()->getGpuAddress(), surfaceAddress); EXPECT_EQ(image1Darray->getImageDesc().image_width, surfaceState->getWidth()); EXPECT_EQ(1u, surfaceState->getHeight()); EXPECT_EQ(image1Darray->getImageDesc().image_array_size, surfaceState->getDepth()); EXPECT_EQ(image1Darray->getImageDesc().image_array_size, surfaceState->getRenderTargetViewExtent()); EXPECT_EQ(image1Darray->getImageDesc().image_row_pitch, surfaceState->getSurfacePitch()); EXPECT_EQ(0u, surfaceState->getSurfaceQpitch() % 4); EXPECT_EQ(image1Darray->getGraphicsAllocation()->getDefaultGmm()->queryQPitch(GMM_RESOURCE_TYPE::RESOURCE_1D), surfaceState->getSurfaceQpitch()); EXPECT_EQ(image1Darray->getSurfaceFormatInfo().surfaceFormat.GenxSurfaceFormat, (GFX3DSTATE_SURFACEFORMAT)surfaceState->getSurfaceFormat()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_1D, surfaceState->getSurfaceType()); EXPECT_TRUE((GFX3DSTATE_SURFACEFORMAT)surfaceState->getSurfaceArray()); EXPECT_EQ(expectedChannelRed, surfaceState->getShaderChannelSelectRed()); EXPECT_EQ(expectedChannelGreen, surfaceState->getShaderChannelSelectGreen()); EXPECT_EQ(expectedChannelBlue, surfaceState->getShaderChannelSelectBlue()); EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, surfaceState->getShaderChannelSelectAlpha()); std::vector surfaces; pKernel->getResidency(surfaces); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { delete surface; } delete image1Darray; } HWTEST_F(ImageSetArgTest, givenMcsAllocationWhenSetArgIsCalledWithoutUnifiedAuxCapabilityThenProgramAuxFieldsForMultisamples) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; McsSurfaceInfo msi = {10, 20, 3}; auto mcsAlloc = context->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); mcsAlloc->setDefaultGmm(new Gmm(pDevice->getGmmClientContext(), nullptr, 1, false)); cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.num_samples = 8; auto image = Image2dHelper<>::create(context, &imgDesc); image->setMcsSurfaceInfo(msi); image->setMcsAllocation(mcsAlloc); cl_mem memObj = image; EXPECT_FALSE(mcsAlloc->getDefaultGmm()->unifiedAuxTranslationCapable()); retVal = clSetKernelArg( pKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_FALSE(Image::isDepthFormat(image->getImageFormat())); EXPECT_TRUE(surfaceState->getMultisampledSurfaceStorageFormat() == RENDER_SURFACE_STATE::MULTISAMPLED_SURFACE_STORAGE_FORMAT::MULTISAMPLED_SURFACE_STORAGE_FORMAT_MSS); EXPECT_TRUE(surfaceState->getAuxiliarySurfaceMode() == (typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE)1); EXPECT_EQ(msi.pitch, surfaceState->getAuxiliarySurfacePitch()); EXPECT_EQ(msi.qPitch, surfaceState->getAuxiliarySurfaceQpitch()); EXPECT_EQ(msi.multisampleCount, static_cast(surfaceState->getNumberOfMultisamples())); EXPECT_EQ(mcsAlloc->getGpuAddress(), surfaceState->getAuxiliarySurfaceBaseAddress()); delete image; } HWTEST_F(ImageSetArgTest, givenDepthFormatWhenSetArgIsCalledThenProgramAuxFields) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; McsSurfaceInfo msi = {0, 0, 3}; cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.num_samples = 8; cl_image_format imgFormat = {CL_DEPTH, CL_FLOAT}; auto image = Image2dHelper<>::create(context, &imgDesc, &imgFormat); image->setMcsSurfaceInfo(msi); cl_mem memObj = image; retVal = clSetKernelArg( pKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_TRUE(Image::isDepthFormat(image->getImageFormat())); EXPECT_TRUE(surfaceState->getMultisampledSurfaceStorageFormat() == RENDER_SURFACE_STATE::MULTISAMPLED_SURFACE_STORAGE_FORMAT::MULTISAMPLED_SURFACE_STORAGE_FORMAT_DEPTH_STENCIL); EXPECT_TRUE(surfaceState->getAuxiliarySurfaceMode() == AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); EXPECT_EQ(1u, surfaceState->getAuxiliarySurfacePitch()); EXPECT_EQ(0u, surfaceState->getAuxiliarySurfaceQpitch()); EXPECT_EQ(msi.multisampleCount, static_cast(surfaceState->getNumberOfMultisamples())); EXPECT_EQ(0u, surfaceState->getAuxiliarySurfaceBaseAddress()); delete image; } HWTEST_F(ImageSetArgTest, givenMultisampledR32Floatx8x24DepthStencilFormatWhenSetArgIsCalledThenSetMssSurfaceStateStorageParam) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; McsSurfaceInfo msi = {0, 0, 3}; cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.num_samples = 8; cl_image_format imgFormat = {CL_DEPTH_STENCIL, CL_FLOAT}; std::unique_ptr image(ImageHelper>::create(context, &imgDesc, &imgFormat)); image->setMcsSurfaceInfo(msi); cl_mem memObj = image.get(); retVal = clSetKernelArg(pKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_TRUE(Image::isDepthFormat(image->getImageFormat())); EXPECT_TRUE(surfaceState->getSurfaceFormat() == SURFACE_FORMAT::SURFACE_FORMAT_R32_FLOAT_X8X24_TYPELESS); EXPECT_TRUE(surfaceState->getMultisampledSurfaceStorageFormat() == RENDER_SURFACE_STATE::MULTISAMPLED_SURFACE_STORAGE_FORMAT::MULTISAMPLED_SURFACE_STORAGE_FORMAT_MSS); } HWTEST_F(ImageSetArgTest, givenMcsAllocationAndRenderCompressionWhenSetArgOnMultisampledImgIsCalledThenProgramAuxFieldsWithMcsParams) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; McsSurfaceInfo msi = {10, 20, 3}; auto mcsAlloc = context->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); mcsAlloc->setDefaultGmm(new Gmm(pDevice->getGmmClientContext(), nullptr, 1, false)); cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.num_samples = 8; auto image = std::unique_ptr(Image2dHelper<>::create(context, &imgDesc)); image->getGraphicsAllocation()->getDefaultGmm()->isRenderCompressed = true; image->setMcsSurfaceInfo(msi); image->setMcsAllocation(mcsAlloc); cl_mem memObj = image.get(); retVal = clSetKernelArg(pKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_TRUE(surfaceState->getMultisampledSurfaceStorageFormat() == RENDER_SURFACE_STATE::MULTISAMPLED_SURFACE_STORAGE_FORMAT::MULTISAMPLED_SURFACE_STORAGE_FORMAT_MSS); EXPECT_TRUE(surfaceState->getAuxiliarySurfaceMode() == (typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE)1); EXPECT_EQ(msi.pitch, surfaceState->getAuxiliarySurfacePitch()); EXPECT_EQ(msi.qPitch, surfaceState->getAuxiliarySurfaceQpitch()); EXPECT_EQ(msi.multisampleCount, static_cast(surfaceState->getNumberOfMultisamples())); EXPECT_EQ(mcsAlloc->getGpuAddress(), surfaceState->getAuxiliarySurfaceBaseAddress()); } HWTEST_F(ImageSetArgTest, givenDepthFormatAndRenderCompressionWhenSetArgOnMultisampledImgIsCalledThenDontProgramAuxFields) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; McsSurfaceInfo msi = {0, 0, 3}; cl_image_desc imgDesc = Image2dDefaults::imageDesc; cl_image_format imgFormat = {CL_DEPTH, CL_FLOAT}; imgDesc.num_samples = 8; auto image = std::unique_ptr(Image2dHelper<>::create(context, &imgDesc, &imgFormat)); image->getGraphicsAllocation()->getDefaultGmm()->isRenderCompressed = true; image->setMcsSurfaceInfo(msi); cl_mem memObj = image.get(); retVal = clSetKernelArg(pKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_TRUE(Image::isDepthFormat(image->getImageFormat())); EXPECT_TRUE(surfaceState->getMultisampledSurfaceStorageFormat() == RENDER_SURFACE_STATE::MULTISAMPLED_SURFACE_STORAGE_FORMAT::MULTISAMPLED_SURFACE_STORAGE_FORMAT_DEPTH_STENCIL); EXPECT_TRUE(surfaceState->getAuxiliarySurfaceMode() == AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); EXPECT_EQ(1u, surfaceState->getAuxiliarySurfacePitch()); EXPECT_EQ(0u, surfaceState->getAuxiliarySurfaceQpitch()); EXPECT_EQ(msi.multisampleCount, static_cast(surfaceState->getNumberOfMultisamples())); EXPECT_EQ(0u, surfaceState->getAuxiliarySurfaceBaseAddress()); } HWTEST_F(ImageSetArgTest, givenMcsAllocationWhenSetArgIsCalledWithUnifiedAuxCapabilityThenProgramAuxFieldsForCcs) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; McsSurfaceInfo msi = {10, 20, 3}; auto mcsAlloc = context->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); mcsAlloc->setDefaultGmm(new Gmm(pDevice->getGmmClientContext(), nullptr, 1, false)); cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.num_samples = 8; auto image = std::unique_ptr(Image2dHelper<>::create(context, &imgDesc)); image->setMcsSurfaceInfo(msi); image->setMcsAllocation(mcsAlloc); cl_mem memObj = image.get(); auto mockMcsGmmResInfo = reinterpret_cast *>(mcsAlloc->getDefaultGmm()->gmmResourceInfo.get()); mockMcsGmmResInfo->setUnifiedAuxTranslationCapable(); EXPECT_TRUE(mcsAlloc->getDefaultGmm()->unifiedAuxTranslationCapable()); retVal = clSetKernelArg(pKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_TRUE(surfaceState->getAuxiliarySurfaceMode() == AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E); EXPECT_EQ(1u, surfaceState->getAuxiliarySurfacePitch()); EXPECT_EQ(0u, surfaceState->getAuxiliarySurfaceQpitch()); } HWTEST_F(ImageSetArgTest, givenMcsAllocationWhenSetArgIsCalledWithUnifiedAuxCapabilityAndMcsThenAuxBaseAddressIsSet) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; McsSurfaceInfo msi = {10, 20, 3}; auto mcsAlloc = context->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); mcsAlloc->setDefaultGmm(new Gmm(pDevice->getGmmClientContext(), nullptr, 1, false)); cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.num_samples = 8; auto image = std::unique_ptr(Image2dHelper<>::create(context, &imgDesc)); image->setMcsSurfaceInfo(msi); image->setMcsAllocation(mcsAlloc); cl_mem memObj = image.get(); auto mockMcsGmmResInfo = reinterpret_cast *>(mcsAlloc->getDefaultGmm()->gmmResourceInfo.get()); mockMcsGmmResInfo->setUnifiedAuxTranslationCapable(); mockMcsGmmResInfo->setMultisampleControlSurface(); EXPECT_TRUE(mcsAlloc->getDefaultGmm()->unifiedAuxTranslationCapable()); retVal = clSetKernelArg(pKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_NE(0u, surfaceState->getAuxiliarySurfaceBaseAddress()); } HWTEST_F(ImageSetArgTest, givenMcsAllocationWhenSetArgIsCalledWithUnifiedAuxCapabilityAndMcsThenAuxSurfPitchAndQPitchIsSet) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; McsSurfaceInfo msi = {10, 20, 3}; auto mcsAlloc = context->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); mcsAlloc->setDefaultGmm(new Gmm(pDevice->getGmmClientContext(), nullptr, 1, false)); cl_image_desc imgDesc = Image2dDefaults::imageDesc; imgDesc.num_samples = 8; auto image = std::unique_ptr(Image2dHelper<>::create(context, &imgDesc)); image->setMcsSurfaceInfo(msi); image->setMcsAllocation(mcsAlloc); cl_mem memObj = image.get(); auto mockMcsGmmResInfo = reinterpret_cast *>(mcsAlloc->getDefaultGmm()->gmmResourceInfo.get()); mockMcsGmmResInfo->setUnifiedAuxTranslationCapable(); mockMcsGmmResInfo->setMultisampleControlSurface(); uint32_t pitchValue = 4u; uint32_t qPitchValue = 12u; mockMcsGmmResInfo->setUnifiedAuxPitchTiles(pitchValue); mockMcsGmmResInfo->setAuxQPitch(qPitchValue); EXPECT_TRUE(mcsAlloc->getDefaultGmm()->unifiedAuxTranslationCapable()); retVal = clSetKernelArg(pKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_EQ(pitchValue, surfaceState->getAuxiliarySurfacePitch()); EXPECT_EQ(qPitchValue, surfaceState->getAuxiliarySurfaceQpitch()); } HWTEST_F(ImageSetArgTest, clSetKernelArgImage1Dbuffer) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto buffer = clCreateBuffer(context, 0, 4096 * 10, nullptr, nullptr); ASSERT_NE(nullptr, buffer); cl_image_desc imageDesc = {0}; imageDesc.buffer = buffer; imageDesc.image_width = 6400; // 2 * (1 << 21) + 5 * (1 << 7) + 0; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; cl_image_format imageFormat = {0}; imageFormat.image_channel_data_type = CL_FLOAT; imageFormat.image_channel_order = CL_RGBA; auto imageFromBuffer = clCreateImage(context, 0, &imageFormat, &imageDesc, nullptr, nullptr); ASSERT_NE(nullptr, imageFromBuffer); retVal = clSetKernelArg( pKernel, 0, sizeof(imageFromBuffer), &imageFromBuffer); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); auto image = castToObject(imageFromBuffer); EXPECT_EQ(image->getGraphicsAllocation()->getGpuAddress(), surfaceAddress); // Width is 7 bits EXPECT_EQ(128u, surfaceState->getWidth()); // Height is 14 bits EXPECT_EQ(50u, surfaceState->getHeight()); // Depth is 11 bits EXPECT_EQ(1u, surfaceState->getDepth()); EXPECT_EQ(1u, surfaceState->getRenderTargetViewExtent()); EXPECT_EQ(0u, surfaceState->getSurfaceQpitch() % 4); EXPECT_EQ(0u, surfaceState->getSurfaceQpitch()); EXPECT_EQ(image->getSurfaceFormatInfo().surfaceFormat.GenxSurfaceFormat, (GFX3DSTATE_SURFACEFORMAT)surfaceState->getSurfaceFormat()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER, surfaceState->getSurfaceType()); EXPECT_FALSE((GFX3DSTATE_SURFACEFORMAT)surfaceState->getSurfaceArray()); EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, surfaceState->getShaderChannelSelectRed()); EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN, surfaceState->getShaderChannelSelectGreen()); EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE, surfaceState->getShaderChannelSelectBlue()); EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, surfaceState->getShaderChannelSelectAlpha()); clReleaseMemObject(imageFromBuffer); clReleaseMemObject(buffer); } HWTEST_F(ImageSetArgTest, clSetKernelArgImageWithCLLuminanceFormat) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; Image *luminanceImage = Image3dHelper::create(context); cl_mem memObj = luminanceImage; retVal = clSetKernelArg( pKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); //for CL_LUMINANCE format we override channels to RED to be spec complaint. EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, surfaceState->getShaderChannelSelectRed()); EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, surfaceState->getShaderChannelSelectGreen()); EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, surfaceState->getShaderChannelSelectBlue()); EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, surfaceState->getShaderChannelSelectAlpha()); std::vector surfaces; pKernel->getResidency(surfaces); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { delete surface; } delete luminanceImage; } HWTEST_F(ImageSetArgTest, getKernelArgShouldReturnImage) { cl_mem memObj = srcImage; retVal = pKernel->setArg( 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(memObj, pKernel->getKernelArg(0)); std::vector surfaces; pKernel->getResidency(surfaces); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { delete surface; } } HWTEST_F(ImageSetArgTest, givenRenderCompressedResourceWhenSettingImgArgThenSetCorrectAuxParams) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; auto surfaceState = FamilyType::cmdInitRenderSurfaceState; srcImage->getGraphicsAllocation()->getDefaultGmm()->isRenderCompressed = true; srcImage->setImageArg(&surfaceState, false, 0); EXPECT_TRUE(surfaceState.getAuxiliarySurfaceMode() == AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E); EXPECT_EQ(1u, surfaceState.getAuxiliarySurfacePitch()); EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceQpitch()); } HWTEST_F(ImageSetArgTest, givenNonRenderCompressedResourceWhenSettingImgArgThenDontSetAuxParams) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; typedef typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE AUXILIARY_SURFACE_MODE; auto surfaceState = FamilyType::cmdInitRenderSurfaceState; auto gmm = srcImage->getGraphicsAllocation()->getDefaultGmm(); auto mockGmmResInfo = reinterpret_cast *>(gmm->gmmResourceInfo.get()); gmm->isRenderCompressed = false; EXPECT_CALL(*mockGmmResInfo, getUnifiedAuxSurfaceOffset(_)).Times(0); EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceQpitch()); EXPECT_EQ(1u, surfaceState.getAuxiliarySurfacePitch()); srcImage->setImageArg(&surfaceState, false, 0); EXPECT_TRUE(surfaceState.getAuxiliarySurfaceMode() == AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); EXPECT_EQ(1u, surfaceState.getAuxiliarySurfacePitch()); EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceQpitch()); EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress()); } /* cl_intel_media_block_io */ class ImageMediaBlockSetArgTest : public ImageSetArgTest { protected: void SetUp() override { DeviceFixture::SetUp(); pKernelInfo = std::make_unique(); // define kernel info kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); pKernelInfo->heapInfo.pSsh = surfaceStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; pKernelInfo->usesSsh = true; // setup kernel arg offsets pKernelInfo->kernelArgInfo.resize(2); pKernelInfo->kernelArgInfo[1].offsetHeap = 0x00; pKernelInfo->kernelArgInfo[0].offsetHeap = 0x40; pKernelInfo->kernelArgInfo[1].isImage = true; pKernelInfo->kernelArgInfo[0].isImage = true; pKernelInfo->kernelArgInfo[1].isMediaBlockImage = true; pKernelInfo->kernelArgInfo[0].isMediaBlockImage = true; program = std::make_unique(*pDevice->getExecutionEnvironment()); pKernel = new MockKernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_NE(nullptr, pKernel); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setKernelArgHandler(0, &Kernel::setArgImage); pKernel->setKernelArgHandler(1, &Kernel::setArgImage); context = new MockContext(pClDevice); srcImage = Image3dHelper<>::create(context); ASSERT_NE(nullptr, srcImage); } }; HWTEST_F(ImageMediaBlockSetArgTest, clSetKernelArgImage) { auto gmmHelper = pDevice->getGmmHelper(); auto imageMocs = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_IMAGE); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; cl_mem memObj = srcImage; retVal = clSetKernelArg( pKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); size_t rPitch = srcImage->getImageDesc().image_row_pitch; auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(srcImage->getGraphicsAllocation()->getGpuAddress(), surfaceAddress); uint32_t element_size = static_cast(srcImage->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes); SetupChannels(srcImage->getImageFormat().image_channel_order); EXPECT_EQ(srcImage->getImageDesc().image_width * element_size / sizeof(uint32_t), surfaceState->getWidth()); EXPECT_EQ(srcImage->getImageDesc().image_height, surfaceState->getHeight()); EXPECT_EQ(srcImage->getImageDesc().image_depth, surfaceState->getDepth()); EXPECT_EQ(srcImage->getImageDesc().image_depth, surfaceState->getRenderTargetViewExtent()); EXPECT_EQ(rPitch, surfaceState->getSurfacePitch()); EXPECT_EQ(0u, surfaceState->getSurfaceQpitch() % 4); EXPECT_EQ(srcImage->getSurfaceFormatInfo().surfaceFormat.GenxSurfaceFormat, (GFX3DSTATE_SURFACEFORMAT)surfaceState->getSurfaceFormat()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_3D, surfaceState->getSurfaceType()); EXPECT_EQ(expectedChannelRed, surfaceState->getShaderChannelSelectRed()); EXPECT_EQ(expectedChannelGreen, surfaceState->getShaderChannelSelectGreen()); EXPECT_EQ(expectedChannelBlue, surfaceState->getShaderChannelSelectBlue()); EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, surfaceState->getShaderChannelSelectAlpha()); EXPECT_EQ(imageMocs, surfaceState->getMemoryObjectControlState()); std::vector surfaces; pKernel->getResidency(surfaces); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { delete surface; } } typedef ImageSetArgTest ImageShaderChanelValueTest; HWTEST_F(ImageShaderChanelValueTest, ChannelA) { typedef typename FamilyType::RENDER_SURFACE_STATE SURFACE_STATE; int outputChannel = 0; int inputChannel = 0; inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_RED; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_A); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_A); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_A); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_A); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, outputChannel); } HWTEST_F(ImageShaderChanelValueTest, ChannelRA) { typedef typename FamilyType::RENDER_SURFACE_STATE SURFACE_STATE; int outputChannel = 0; int inputChannel = 0; inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_R); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_R); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_RED; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_R); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_R); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RA); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RA); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_RED; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RA); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RA); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_Rx); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_Rx); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_RED; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_Rx); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_Rx); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, outputChannel); } HWTEST_F(ImageShaderChanelValueTest, ChannelRGA) { typedef typename FamilyType::RENDER_SURFACE_STATE SURFACE_STATE; int outputChannel = 0; int inputChannel = 0; inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RG); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RG); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_RED; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RG); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RG); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RGx); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RGx); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_RED; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RGx); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RGx); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN, outputChannel); } HWTEST_F(ImageShaderChanelValueTest, ChannelRGBA) { typedef typename FamilyType::RENDER_SURFACE_STATE SURFACE_STATE; int outputChannel = 0; int inputChannel = 0; inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RGBA); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_RED; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RGBA); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RGBA); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN, outputChannel); inputChannel = SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE; outputChannel = ImageHw::getShaderChannelValue(inputChannel, CL_RGBA); EXPECT_EQ(SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE, outputChannel); } HWTEST_F(ImageSetArgTest, givenImageWithOffsetGreaterThan4GBWhenSurfaceStateIsProgrammedThenCorrectStataBaseAddressIsSet) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState; uint64_t surfaceOffset = 8 * GB; srcImage->setSurfaceOffsets(surfaceOffset, 0, 0, 0); srcImage->setImageArg(&surfaceState, false, 0); auto expectedAddress = srcImage->getGraphicsAllocation()->getGpuAddress() + surfaceOffset; auto surfaceAddress = surfaceState.getSurfaceBaseAddress(); EXPECT_EQ(expectedAddress, surfaceAddress); } HWTEST_F(ImageSetArgTest, givenMediaCompressedResourceSurfaceModeIsNone) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; RENDER_SURFACE_STATE surfaceState; auto gmm = srcImage->getGraphicsAllocation()->getDefaultGmm(); gmm->gmmResourceInfo->getResourceFlags()->Info.MediaCompressed = true; gmm->isRenderCompressed = true; srcImage->setImageArg(&surfaceState, false, 0); EXPECT_EQ(surfaceState.getAuxiliarySurfaceMode(), AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); } compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/image_snorm_tests.cpp000066400000000000000000000064751363734646600275040ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/mem_obj/image.h" #include "gtest/gtest.h" #include using namespace NEO; const cl_mem_flags flagsForTests[] = {CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE}; const ArrayRef paramsForSnormTests[] = { SurfaceFormats::readOnly12(), SurfaceFormats::readOnly20(), SurfaceFormats::writeOnly(), SurfaceFormats::readWrite()}; const std::array referenceSnormSurfaceFormats = {{ // clang-format off {{CL_R, CL_SNORM_INT8}, {GMM_FORMAT_R8_SNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R8_SNORM, 0, 1, 1, 1}}, {{CL_R, CL_SNORM_INT16}, {GMM_FORMAT_R16_SNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R16_SNORM, 0, 1, 2, 2}}, {{CL_RG, CL_SNORM_INT8}, {GMM_FORMAT_R8G8_SNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R8G8_SNORM, 0, 2, 1, 2}}, {{CL_RG, CL_SNORM_INT16}, {GMM_FORMAT_R16G16_SNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R16G16_SNORM, 0, 2, 2, 4}}, {{CL_RGBA, CL_SNORM_INT8}, {GMM_FORMAT_R8G8B8A8_SNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R8G8B8A8_SNORM, 0, 4, 1, 4}}, {{CL_RGBA, CL_SNORM_INT16}, {GMM_FORMAT_R16G16B16A16_SNORM_TYPE, GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_SNORM, 0, 4, 2, 8}}, // clang-format on }}; using SnormSurfaceFormatAccessFlagsTests = ::testing::TestWithParam; TEST_P(SnormSurfaceFormatAccessFlagsTests, givenSnormFormatWhenGetSurfaceFormatFromTableIsCalledThenReturnsCorrectFormat) { EXPECT_EQ(6u, referenceSnormSurfaceFormats.size()); cl_mem_flags flags = GetParam(); for (const auto &snormSurfaceFormat : referenceSnormSurfaceFormats) { auto format = Image::getSurfaceFormatFromTable(flags, &snormSurfaceFormat.OCLImageFormat, 12); EXPECT_NE(nullptr, format); EXPECT_TRUE(memcmp(&snormSurfaceFormat, format, sizeof(ClSurfaceFormatInfo)) == 0); } for (const auto &snormSurfaceFormat : referenceSnormSurfaceFormats) { auto format = Image::getSurfaceFormatFromTable(flags, &snormSurfaceFormat.OCLImageFormat, 20); EXPECT_NE(nullptr, format); EXPECT_TRUE(memcmp(&snormSurfaceFormat, format, sizeof(ClSurfaceFormatInfo)) == 0); } } using SnormSurfaceFormatTests = ::testing::TestWithParam>; TEST_P(SnormSurfaceFormatTests, givenSnormOclFormatWhenCheckingrReadOnlySurfaceFormatsThenFindExactCount) { ArrayRef formatsTable = GetParam(); size_t snormFormatsFound = 0; for (const auto &format : formatsTable) { auto oclFormat = format.OCLImageFormat; if (CL_SNORM_INT8 == oclFormat.image_channel_data_type || CL_SNORM_INT16 == oclFormat.image_channel_data_type) { EXPECT_TRUE(oclFormat.image_channel_order == CL_R || oclFormat.image_channel_order == CL_RG || oclFormat.image_channel_order == CL_RGBA); snormFormatsFound++; } } EXPECT_EQ(6u, snormFormatsFound); } INSTANTIATE_TEST_CASE_P( ImageSnormTests, SnormSurfaceFormatAccessFlagsTests, ::testing::ValuesIn(flagsForTests)); INSTANTIATE_TEST_CASE_P( ImageSnormTests, SnormSurfaceFormatTests, ::testing::ValuesIn(paramsForSnormTests)); compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/image_tests.cpp000066400000000000000000001636111363734646600262620ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/image/image_surface_state.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/helpers/mipmap.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/mem_obj/mem_obj_helper.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/helpers/kernel_binary_helper.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mem_obj/image_compression_fixture.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_gmm.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "test.h" using namespace NEO; static const unsigned int testImageDimensions = 45; auto channelType = CL_UNORM_INT8; auto channelOrder = CL_RGBA; auto const elementSize = 4; //sizeof CL_RGBA * CL_UNORM_INT8 class CreateImageTest : public DeviceFixture, public testing::TestWithParam, public CommandQueueHwFixture { typedef CommandQueueHwFixture CommandQueueFixture; public: CreateImageTest() { } Image *createImageWithFlags(cl_mem_flags flags) { auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); return Image::create(context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal); } protected: void SetUp() override { DeviceFixture::SetUp(); CommandQueueFixture::SetUp(pClDevice, 0); flags = GetParam(); // clang-format off imageFormat.image_channel_data_type = channelType; imageFormat.image_channel_order = channelOrder; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = testImageDimensions; imageDesc.image_height = testImageDimensions; imageDesc.image_depth = 0; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // clang-format on } void TearDown() override { CommandQueueFixture::TearDown(); DeviceFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal = CL_SUCCESS; cl_mem_flags flags = 0; unsigned char pHostPtr[testImageDimensions * testImageDimensions * elementSize * 4]; }; typedef CreateImageTest CreateImageNoHostPtr; TEST(TestSliceAndRowPitch, ForDifferentDescriptorsGetHostPtrSlicePitchAndRowPitchReturnsProperValues) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceLinearImages.set(true); cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal; MockContext context; const size_t width = 5; const size_t height = 3; const size_t depth = 2; char *hostPtr = (char *)alignedMalloc(width * height * depth * elementSize * 2, 64); imageFormat.image_channel_data_type = channelType; imageFormat.image_channel_order = channelOrder; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // 1D image with 0 row_pitch and 0 slice_pitch imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = width; imageDesc.image_height = 0; imageDesc.image_depth = 0; imageDesc.image_array_size = 0; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); auto image = Image::create( &context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, hostPtr, retVal); ASSERT_NE(nullptr, image); EXPECT_EQ(width * elementSize, image->getHostPtrRowPitch()); EXPECT_EQ(0u, image->getHostPtrSlicePitch()); delete image; // 1D image with non-zero row_pitch and 0 slice_pitch imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = width; imageDesc.image_height = 0; imageDesc.image_depth = 0; imageDesc.image_array_size = 0; imageDesc.image_row_pitch = (width + 1) * elementSize; imageDesc.image_slice_pitch = 0; image = Image::create( &context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, hostPtr, retVal); ASSERT_NE(nullptr, image); EXPECT_EQ((width + 1) * elementSize, image->getHostPtrRowPitch()); EXPECT_EQ(0u, image->getHostPtrSlicePitch()); delete image; // 2D image with non-zero row_pitch and 0 slice_pitch imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = width; imageDesc.image_height = height; imageDesc.image_depth = 0; imageDesc.image_array_size = 0; imageDesc.image_row_pitch = (width + 1) * elementSize; imageDesc.image_slice_pitch = 0; image = Image::create( &context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, hostPtr, retVal); ASSERT_NE(nullptr, image); EXPECT_EQ((width + 1) * elementSize, image->getHostPtrRowPitch()); EXPECT_EQ(0u, image->getHostPtrSlicePitch()); delete image; // 1D ARRAY image with non-zero row_pitch and 0 slice_pitch imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; imageDesc.image_width = width; imageDesc.image_height = 0; imageDesc.image_depth = 0; imageDesc.image_array_size = 2; imageDesc.image_row_pitch = (width + 1) * elementSize; imageDesc.image_slice_pitch = 0; image = Image::create( &context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, hostPtr, retVal); ASSERT_NE(nullptr, image); EXPECT_EQ((width + 1) * elementSize, image->getHostPtrRowPitch()); EXPECT_EQ((width + 1) * elementSize, image->getHostPtrSlicePitch()); delete image; // 2D ARRAY image with non-zero row_pitch and 0 slice_pitch imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; imageDesc.image_width = width; imageDesc.image_height = height; imageDesc.image_depth = 0; imageDesc.image_array_size = 2; imageDesc.image_row_pitch = (width + 1) * elementSize; imageDesc.image_slice_pitch = 0; image = Image::create( &context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, hostPtr, retVal); ASSERT_NE(nullptr, image); EXPECT_EQ((width + 1) * elementSize, image->getHostPtrRowPitch()); EXPECT_EQ((width + 1) * elementSize * height, image->getHostPtrSlicePitch()); delete image; // 2D ARRAY image with zero row_pitch and non-zero slice_pitch imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; imageDesc.image_width = width; imageDesc.image_height = height; imageDesc.image_depth = 0; imageDesc.image_array_size = 2; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = (width + 1) * elementSize * height; image = Image::create( &context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, hostPtr, retVal); ASSERT_NE(nullptr, image); EXPECT_EQ(width * elementSize, image->getHostPtrRowPitch()); EXPECT_EQ((width + 1) * elementSize * height, image->getHostPtrSlicePitch()); delete image; // 2D ARRAY image with non-zero row_pitch and non-zero slice_pitch imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; imageDesc.image_width = width; imageDesc.image_height = height; imageDesc.image_depth = 0; imageDesc.image_array_size = 2; imageDesc.image_row_pitch = (width + 1) * elementSize; imageDesc.image_slice_pitch = (width + 1) * elementSize * height; image = Image::create( &context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, hostPtr, retVal); ASSERT_NE(nullptr, image); EXPECT_EQ((width + 1) * elementSize, image->getHostPtrRowPitch()); EXPECT_EQ((width + 1) * elementSize * height, image->getHostPtrSlicePitch()); delete image; // 2D ARRAY image with non-zero row_pitch and non-zero slice_pitch > row_pitch * height imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; imageDesc.image_width = width; imageDesc.image_height = height; imageDesc.image_depth = 0; imageDesc.image_array_size = 2; imageDesc.image_row_pitch = (width + 1) * elementSize; imageDesc.image_slice_pitch = (width + 1) * elementSize * (height + 1); image = Image::create( &context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, hostPtr, retVal); ASSERT_NE(nullptr, image); EXPECT_EQ((width + 1) * elementSize, image->getHostPtrRowPitch()); EXPECT_EQ((width + 1) * elementSize * (height + 1), image->getHostPtrSlicePitch()); delete image; alignedFree(hostPtr); } TEST(TestCreateImage, UseSharedContextToCreateImage) { cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal; MockContext context; context.isSharedContext = true; const size_t width = 5; const size_t height = 3; const size_t depth = 2; char *hostPtr = (char *)alignedMalloc(width * height * depth * elementSize * 2, 64); imageFormat.image_channel_data_type = channelType; imageFormat.image_channel_order = channelOrder; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // 2D image with non-zero row_pitch and 0 slice_pitch imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = width; imageDesc.image_height = height; imageDesc.image_depth = 0; imageDesc.image_array_size = 0; imageDesc.image_row_pitch = (width + 1) * elementSize; imageDesc.image_slice_pitch = 0; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); auto image = Image::create( &context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, hostPtr, retVal); ASSERT_NE(nullptr, image); EXPECT_EQ((width + 1) * elementSize, image->getHostPtrRowPitch()); EXPECT_EQ(0u, image->getHostPtrSlicePitch()); EXPECT_TRUE(image->isMemObjZeroCopy()); delete image; alignedFree(hostPtr); } TEST(TestCreateImageUseHostPtr, CheckMemoryAllocationForDifferenHostPtrAlignments) { KernelBinaryHelper kbHelper(KernelBinaryHelper::BUILT_INS); cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal; MockContext context; const size_t width = 4; const size_t height = 32; imageFormat.image_channel_data_type = channelType; imageFormat.image_channel_order = channelOrder; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // 2D image with 0 row_pitch and 0 slice_pitch imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = width; imageDesc.image_height = height; imageDesc.image_depth = 0; imageDesc.image_array_size = 0; imageDesc.image_row_pitch = alignUp(alignUp(width, 4) * 4, 0x80); //row pitch for tiled img imageDesc.image_slice_pitch = 0; void *pageAlignedPointer = alignedMalloc(imageDesc.image_row_pitch * height * 1 * 4 + 256, 4096); void *hostPtr[] = {ptrOffset(pageAlignedPointer, 16), // 16 - byte alignment ptrOffset(pageAlignedPointer, 32), // 32 - byte alignment ptrOffset(pageAlignedPointer, 64), // 64 - byte alignment ptrOffset(pageAlignedPointer, 128)}; // 128 - byte alignment bool result[] = {false, false, true, true}; cl_mem_flags flags = CL_MEM_HOST_NO_ACCESS | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); for (int i = 0; i < 4; i++) { auto image = Image::create( &context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, hostPtr[i], retVal); ASSERT_NE(nullptr, image); auto address = image->getCpuAddress(); if (result[i] && image->isMemObjZeroCopy()) { EXPECT_EQ(hostPtr[i], address); } else { EXPECT_NE(hostPtr[i], address); } delete image; } alignedFree(pageAlignedPointer); } TEST(TestCreateImageUseHostPtr, givenZeroCopyImageValuesWhenUsingHostPtrThenZeroCopyImageIsCreated) { cl_int retVal = CL_SUCCESS; MockContext context; cl_image_desc imageDesc = {}; imageDesc.image_width = 4096; imageDesc.image_height = 1; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; cl_image_format imageFormat = {}; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_R; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); auto hostPtr = alignedMalloc(imageDesc.image_width * surfaceFormat->surfaceFormat.ImageElementSizeInBytes, MemoryConstants::cacheLineSize); auto image = std::unique_ptr(Image::create( &context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, hostPtr, retVal)); EXPECT_NE(nullptr, image); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(image->isMemObjZeroCopy()); EXPECT_EQ(hostPtr, image->getGraphicsAllocation()->getUnderlyingBuffer()); EXPECT_NE(nullptr, image->getMapAllocation()); alignedFree(hostPtr); } TEST(TestRedescribableFormatCheck, givenVariousOclFormatsWhenCheckingIfRedescribableThenReturnCorrectResults) { static const cl_image_format redescribeFormats[] = { {CL_R, CL_UNSIGNED_INT8}, {CL_R, CL_UNSIGNED_INT16}, {CL_R, CL_UNSIGNED_INT32}, {CL_RG, CL_UNSIGNED_INT32}, {CL_RGBA, CL_UNSIGNED_INT32}, }; const ArrayRef formats = SurfaceFormats::readWrite(); for (const auto &format : formats) { const cl_image_format oclFormat = format.OCLImageFormat; bool expectedResult = true; for (const auto &nonRedescribableFormat : redescribeFormats) { expectedResult &= (memcmp(&oclFormat, &nonRedescribableFormat, sizeof(cl_image_format)) != 0); } EXPECT_EQ(expectedResult, Image::isFormatRedescribable(oclFormat)); } } TEST_P(CreateImageNoHostPtr, getImageDesc) { auto image = createImageWithFlags(flags); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, image); const auto &imageDesc = image->getImageDesc(); // Sometimes the user doesn't pass image_row/slice_pitch during a create. // Ensure the driver fills in the missing data. EXPECT_NE(0u, imageDesc.image_row_pitch); EXPECT_GE(imageDesc.image_slice_pitch, imageDesc.image_row_pitch); delete image; } TEST_P(CreateImageNoHostPtr, whenImageIsCreatedThenItHasProperAccessAndCacheProperties) { auto image = createImageWithFlags(flags); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, image); auto allocation = image->getGraphicsAllocation(); EXPECT_TRUE(allocation->getAllocationType() == GraphicsAllocation::AllocationType::IMAGE); auto isImageWritable = !(flags & (CL_MEM_READ_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)); EXPECT_EQ(isImageWritable, allocation->isMemObjectsAllocationWithWritableFlags()); auto isReadOnly = isValueSet(flags, CL_MEM_READ_ONLY); EXPECT_NE(isReadOnly, allocation->isFlushL3Required()); delete image; } // Parameterized test that tests image creation with all flags that should be // valid with a nullptr host ptr static cl_mem_flags NoHostPtrFlags[] = { CL_MEM_READ_WRITE, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY, CL_MEM_HOST_READ_ONLY, CL_MEM_HOST_WRITE_ONLY, CL_MEM_HOST_NO_ACCESS}; INSTANTIATE_TEST_CASE_P( CreateImageTest_Create, CreateImageNoHostPtr, testing::ValuesIn(NoHostPtrFlags)); struct CreateImageHostPtr : public CreateImageTest, public MemoryManagementFixture { typedef CreateImageTest BaseClass; CreateImageHostPtr() { } void SetUp() override { MemoryManagementFixture::SetUp(); BaseClass::SetUp(); } void TearDown() override { delete image; BaseClass::TearDown(); platformsImpl.clear(); MemoryManagementFixture::TearDown(); } Image *createImage(cl_int &retVal) { auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context->getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); return Image::create( context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, pHostPtr, retVal); } cl_int retVal = CL_INVALID_VALUE; Image *image = nullptr; }; TEST_P(CreateImageHostPtr, isResidentDefaultsToFalseAfterCreate) { image = createImage(retVal); ASSERT_NE(nullptr, image); EXPECT_FALSE(image->getGraphicsAllocation()->isResident(pDevice->getDefaultEngine().osContext->getContextId())); } TEST_P(CreateImageHostPtr, getAddress) { image = createImage(retVal); ASSERT_NE(nullptr, image); auto address = image->getBasePtrForMap(0); EXPECT_NE(nullptr, address); if (!(flags & CL_MEM_USE_HOST_PTR)) { EXPECT_EQ(nullptr, image->getHostPtr()); } if (flags & CL_MEM_USE_HOST_PTR) { //if size fits within a page then zero copy can be applied, if not RT needs to do a copy of image auto computedSize = imageDesc.image_width * elementSize * alignUp(imageDesc.image_height, 4) * imageDesc.image_array_size; auto ptrSize = imageDesc.image_width * elementSize * imageDesc.image_height * imageDesc.image_array_size; auto alignedRequiredSize = alignSizeWholePage(static_cast(pHostPtr), computedSize); auto alignedPtrSize = alignSizeWholePage(static_cast(pHostPtr), ptrSize); size_t HalignReq = imageDesc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 64 : 1; auto rowPitch = imageDesc.image_width * elementSize; auto slicePitch = rowPitch * imageDesc.image_height; auto requiredRowPitch = alignUp(imageDesc.image_width, HalignReq) * elementSize; auto requiredSlicePitch = requiredRowPitch * alignUp(imageDesc.image_height, 4); bool copyRequired = (alignedRequiredSize > alignedPtrSize) | (requiredRowPitch != rowPitch) | (slicePitch != requiredSlicePitch); EXPECT_EQ(pHostPtr, address); EXPECT_EQ(pHostPtr, image->getHostPtr()); if (copyRequired) { EXPECT_FALSE(image->isMemObjZeroCopy()); } } else { EXPECT_NE(pHostPtr, address); } if (flags & CL_MEM_COPY_HOST_PTR && image->isMemObjZeroCopy()) { // Buffer should contain a copy of host memory EXPECT_EQ(0, memcmp(pHostPtr, image->getGraphicsAllocation()->getUnderlyingBuffer(), sizeof(testImageDimensions))); } } TEST_P(CreateImageHostPtr, getImageDesc) { image = createImage(retVal); ASSERT_NE(nullptr, image); const auto &imageDesc = image->getImageDesc(); // clang-format off EXPECT_EQ(this->imageDesc.image_type, imageDesc.image_type); EXPECT_EQ(this->imageDesc.image_width, imageDesc.image_width); EXPECT_EQ(this->imageDesc.image_height, imageDesc.image_height); EXPECT_EQ(this->imageDesc.image_depth, imageDesc.image_depth); EXPECT_EQ(0u, imageDesc.image_array_size); EXPECT_NE(0u, imageDesc.image_row_pitch); EXPECT_GE(imageDesc.image_slice_pitch, imageDesc.image_row_pitch); EXPECT_EQ(this->imageDesc.num_mip_levels, imageDesc.num_mip_levels); EXPECT_EQ(this->imageDesc.num_samples, imageDesc.num_samples); EXPECT_EQ(this->imageDesc.buffer, imageDesc.buffer); EXPECT_EQ(this->imageDesc.mem_object, imageDesc.mem_object); // clang-format on EXPECT_EQ(image->getHostPtrRowPitch(), static_cast(imageDesc.image_width * elementSize)); // Only 3D, and array images can have slice pitch int isArrayOr3DType = 0; if (this->imageDesc.image_type == CL_MEM_OBJECT_IMAGE3D || this->imageDesc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY || this->imageDesc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { isArrayOr3DType = 1; } EXPECT_EQ(image->getHostPtrSlicePitch(), static_cast(imageDesc.image_width * elementSize * imageDesc.image_height) * isArrayOr3DType); EXPECT_EQ(image->getImageCount(), 1u); EXPECT_NE(0u, image->getSize()); EXPECT_NE(nullptr, image->getGraphicsAllocation()); } TEST_P(CreateImageHostPtr, failedAllocationInjection) { InjectedFunction method = [this](size_t failureIndex) { // System under test image = createImage(retVal); if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); } else { EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal) << "for allocation " << failureIndex; EXPECT_EQ(nullptr, image); } delete image; image = nullptr; }; injectFailures(method, 4); // check only first 5 allocations - avoid checks on writeImg call allocations for tiled imgs } TEST_P(CreateImageHostPtr, givenLinearImageWhenFailedAtCreationThenReturnError) { DebugManagerStateRestore restore; DebugManager.flags.ForceLinearImages.set(true); InjectedFunction method = [this](size_t failureIndex) { // System under test image = createImage(retVal); if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, image); } else { EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal) << "for allocation " << failureIndex; EXPECT_EQ(nullptr, image); } delete image; image = nullptr; }; injectFailures(method, 4); // check only first 5 allocations - avoid checks on writeImg call allocations for tiled imgs } TEST_P(CreateImageHostPtr, checkWritingOutsideAllocatedMemoryWhileCreatingImage) { auto mockMemoryManager = new MockMemoryManager(*pDevice->executionEnvironment); pDevice->injectMemoryManager(mockMemoryManager); context->memoryManager = mockMemoryManager; mockMemoryManager->redundancyRatio = 2; memset(pHostPtr, 1, testImageDimensions * testImageDimensions * elementSize * 4); imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; imageDesc.image_height = 1; imageDesc.image_row_pitch = elementSize * imageDesc.image_width + 1; image = createImage(retVal); char *memory = (char *)image->getGraphicsAllocation()->getUnderlyingBuffer(); auto memorySize = image->getGraphicsAllocation()->getUnderlyingBufferSize() / 2; for (size_t i = 0; i < image->getHostPtrSlicePitch(); ++i) { if (i < imageDesc.image_width * elementSize) { EXPECT_EQ(1, memory[i]); } else { EXPECT_EQ(0, memory[i]); } } for (size_t i = 0; i < memorySize; ++i) { EXPECT_EQ(0, memory[memorySize + i]); } mockMemoryManager->redundancyRatio = 1; } struct ModifyableImage { enum { flags = 0 }; static cl_image_format imageFormat; static cl_image_desc imageDesc; static void *hostPtr; static NEO::Context *context; }; void *ModifyableImage::hostPtr = nullptr; NEO::Context *ModifyableImage::context = nullptr; cl_image_format ModifyableImage::imageFormat; cl_image_desc ModifyableImage::imageDesc; class ImageTransfer : public ::testing::Test { public: void SetUp() override { context = new MockContext(); ASSERT_NE(context, nullptr); ModifyableImage::context = context; ModifyableImage::hostPtr = nullptr; ModifyableImage::imageFormat = {CL_R, CL_FLOAT}; ModifyableImage::imageDesc = {CL_MEM_OBJECT_IMAGE1D, 512, 0, 0, 0, 0, 0, 0, 0, {nullptr}}; hostPtr = nullptr; unalignedHostPtr = nullptr; } void TearDown() override { if (context) delete context; if (hostPtr) alignedFree(hostPtr); } void createHostPtrs(size_t imageSize) { hostPtr = alignedMalloc(imageSize + 100, 4096); unalignedHostPtr = (char *)hostPtr + 4; memset(hostPtr, 0, imageSize + 100); memset(unalignedHostPtr, 1, imageSize); } MockContext *context; void *hostPtr; void *unalignedHostPtr; }; TEST_F(ImageTransfer, GivenNonZeroCopyImageWhenDataTransferedFromHostPtrToMemStorageThenNoOverflowOfHostPtr) { size_t imageSize = 512 * 4; createHostPtrs(imageSize); ModifyableImage::imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; ModifyableImage::imageDesc.image_width = 512; ModifyableImage::imageDesc.image_height = 0; ModifyableImage::imageDesc.image_row_pitch = 0; ModifyableImage::imageDesc.image_array_size = 0; ModifyableImage::imageFormat.image_channel_order = CL_R; ModifyableImage::imageFormat.image_channel_data_type = CL_FLOAT; ModifyableImage::hostPtr = unalignedHostPtr; Image *imageNonZeroCopy = ImageHelper>::create(); ASSERT_NE(nullptr, imageNonZeroCopy); void *memoryStorage = imageNonZeroCopy->getCpuAddress(); size_t memoryStorageSize = imageNonZeroCopy->getSize(); ASSERT_NE(memoryStorage, unalignedHostPtr); int result = memcmp(memoryStorage, unalignedHostPtr, imageSize); EXPECT_EQ(0, result); memset(memoryStorage, 0, memoryStorageSize); memset((char *)unalignedHostPtr + imageSize, 2, 100 - 4); auto &imgDesc = imageNonZeroCopy->getImageDesc(); MemObjOffsetArray copyOffset = {{0, 0, 0}}; MemObjSizeArray copySize = {{imgDesc.image_width, imgDesc.image_height, imgDesc.image_depth}}; imageNonZeroCopy->transferDataFromHostPtr(copySize, copyOffset); void *foundData = memchr(memoryStorage, 2, memoryStorageSize); EXPECT_EQ(0, foundData); delete imageNonZeroCopy; } TEST_F(ImageTransfer, GivenNonZeroCopyNonZeroRowPitchImageWhenDataIsTransferedFromHostPtrToMemStorageThenDestinationIsNotOverflowed) { ModifyableImage::imageDesc.image_width = 16; ModifyableImage::imageDesc.image_row_pitch = 65; ModifyableImage::imageFormat.image_channel_data_type = CL_UNORM_INT8; size_t imageSize = ModifyableImage::imageDesc.image_row_pitch; size_t imageWidth = ModifyableImage::imageDesc.image_width; createHostPtrs(imageSize); ModifyableImage::hostPtr = unalignedHostPtr; Image *imageNonZeroCopy = ImageHelper>::create(); ASSERT_NE(nullptr, imageNonZeroCopy); void *memoryStorage = imageNonZeroCopy->getCpuAddress(); size_t memoryStorageSize = imageNonZeroCopy->getSize(); ASSERT_NE(memoryStorage, unalignedHostPtr); int result = memcmp(memoryStorage, unalignedHostPtr, imageWidth); EXPECT_EQ(0, result); memset(memoryStorage, 0, memoryStorageSize); memset((char *)unalignedHostPtr + imageSize, 2, 100 - 4); auto &imgDesc = imageNonZeroCopy->getImageDesc(); MemObjOffsetArray copyOffset = {{0, 0, 0}}; MemObjSizeArray copySize = {{imgDesc.image_width, imgDesc.image_height, imgDesc.image_depth}}; imageNonZeroCopy->transferDataFromHostPtr(copySize, copyOffset); void *foundData = memchr(memoryStorage, 2, memoryStorageSize); EXPECT_EQ(0, foundData); delete imageNonZeroCopy; } TEST_F(ImageTransfer, GivenNonZeroCopyNonZeroRowPitchWithExtraBytes1DArrayImageWhenDataIsTransferedForthAndBackThenDataValidates) { ModifyableImage::imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; ModifyableImage::imageDesc.image_width = 5; ModifyableImage::imageDesc.image_row_pitch = 28; // == (4 * 5) row bytes + (4 * 2) extra bytes ModifyableImage::imageDesc.image_array_size = 3; ModifyableImage::imageFormat.image_channel_order = CL_RGBA; ModifyableImage::imageFormat.image_channel_data_type = CL_UNORM_INT8; const size_t imageWidth = ModifyableImage::imageDesc.image_width; const size_t imageRowPitchInPixels = ModifyableImage::imageDesc.image_row_pitch / 4; const size_t imageHeight = 1; const size_t imageCount = ModifyableImage::imageDesc.image_array_size; size_t imageSize = ModifyableImage::imageDesc.image_row_pitch * imageHeight * imageCount; createHostPtrs(imageSize); uint32_t *row = static_cast(unalignedHostPtr); for (uint32_t arrayIndex = 0; arrayIndex < imageCount; ++arrayIndex) { for (uint32_t pixelInRow = 0; pixelInRow < imageRowPitchInPixels; ++pixelInRow) { if (pixelInRow < imageWidth) { row[pixelInRow] = pixelInRow; } else { row[pixelInRow] = 66; } } row = row + imageRowPitchInPixels; } ModifyableImage::hostPtr = unalignedHostPtr; Image *imageNonZeroCopy = ImageHelper>::create(); ASSERT_NE(nullptr, imageNonZeroCopy); void *memoryStorage = imageNonZeroCopy->getCpuAddress(); ASSERT_NE(memoryStorage, unalignedHostPtr); size_t internalSlicePitch = imageNonZeroCopy->getImageDesc().image_slice_pitch; // Check twice, once after image create, and second time after transfer from HostPtrToMemoryStorage // when these paths are unified, only one check will be enough for (size_t run = 0; run < 2; ++run) { row = static_cast(unalignedHostPtr); unsigned char *internalRow = static_cast(memoryStorage); if (run == 1) { auto &imgDesc = imageNonZeroCopy->getImageDesc(); MemObjOffsetArray copyOffset = {{0, 0, 0}}; MemObjSizeArray copySize = {{imgDesc.image_width, imgDesc.image_height, imgDesc.image_depth}}; imageNonZeroCopy->transferDataFromHostPtr(copySize, copyOffset); } for (size_t arrayIndex = 0; arrayIndex < imageCount; ++arrayIndex) { for (size_t pixelInRow = 0; pixelInRow < imageRowPitchInPixels; ++pixelInRow) { if (pixelInRow < imageWidth) { if (memcmp(&row[pixelInRow], &internalRow[pixelInRow * 4], 4)) { EXPECT_FALSE(1) << "Data in memory storage did not validate, row: " << pixelInRow << " array: " << arrayIndex << "\n"; } } else { // Change extra bytes pattern row[pixelInRow] = 55; } } row = row + imageRowPitchInPixels; internalRow = internalRow + internalSlicePitch; } } auto &imgDesc = imageNonZeroCopy->getImageDesc(); MemObjOffsetArray copyOffset = {{0, 0, 0}}; MemObjSizeArray copySize = {{imgDesc.image_width, imgDesc.image_height, imgDesc.image_depth}}; imageNonZeroCopy->transferDataToHostPtr(copySize, copyOffset); row = static_cast(unalignedHostPtr); for (size_t arrayIndex = 0; arrayIndex < imageCount; ++arrayIndex) { for (size_t pixelInRow = 0; pixelInRow < imageRowPitchInPixels; ++pixelInRow) { if (pixelInRow < imageWidth) { if (row[pixelInRow] != pixelInRow) { EXPECT_FALSE(1) << "Data under host_ptr did not validate, row: " << pixelInRow << " array: " << arrayIndex << "\n"; } } else { if (row[pixelInRow] != 55) { EXPECT_FALSE(1) << "Data under host_ptr corrupted in extra bytes, row: " << pixelInRow << " array: " << arrayIndex << "\n"; } } } row = row + imageRowPitchInPixels; } delete imageNonZeroCopy; } // Parameterized test that tests image creation with all flags that should be // valid with a valid host ptr static cl_mem_flags ValidHostPtrFlags[] = { 0 | CL_MEM_USE_HOST_PTR, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, CL_MEM_HOST_READ_ONLY | CL_MEM_USE_HOST_PTR, CL_MEM_HOST_WRITE_ONLY | CL_MEM_USE_HOST_PTR, CL_MEM_HOST_NO_ACCESS | CL_MEM_USE_HOST_PTR, 0 | CL_MEM_COPY_HOST_PTR, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, CL_MEM_HOST_READ_ONLY | CL_MEM_COPY_HOST_PTR, CL_MEM_HOST_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, CL_MEM_HOST_NO_ACCESS | CL_MEM_COPY_HOST_PTR}; INSTANTIATE_TEST_CASE_P( CreateImageTest_Create, CreateImageHostPtr, testing::ValuesIn(ValidHostPtrFlags)); TEST(ImageGetSurfaceFormatInfoTest, givenNullptrFormatWhenGetSurfaceFormatInfoIsCalledThenReturnsNullptr) { MockContext context; auto surfaceFormat = Image::getSurfaceFormatFromTable(0, nullptr, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); EXPECT_EQ(nullptr, surfaceFormat); } HWTEST_F(ImageCompressionTests, givenTiledImageWhenCreatingAllocationThenPreferRenderCompression) { imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 5; imageDesc.image_height = 5; MockContext context; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); auto image = std::unique_ptr(Image::create(mockContext.get(), MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, image); EXPECT_EQ(UnitTestHelper::tiledImagesSupported, image->isTiledAllocation()); EXPECT_TRUE(myMemoryManager->mockMethodCalled); EXPECT_EQ(UnitTestHelper::tiledImagesSupported, myMemoryManager->capturedImgInfo.preferRenderCompression); } TEST_F(ImageCompressionTests, givenNonTiledImageWhenCreatingAllocationThenDontPreferRenderCompression) { imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 5; MockContext context; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); auto image = std::unique_ptr(Image::create(mockContext.get(), MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); ASSERT_NE(nullptr, image); EXPECT_FALSE(image->isTiledAllocation()); EXPECT_TRUE(myMemoryManager->mockMethodCalled); EXPECT_FALSE(myMemoryManager->capturedImgInfo.preferRenderCompression); } using ImageTests = ::testing::Test; HWTEST_F(ImageTests, givenImageWhenAskedForPtrOffsetForGpuMappingThenReturnCorrectValue) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } MockContext ctx; std::unique_ptr image(ImageHelper::create(&ctx)); EXPECT_FALSE(image->mappingOnCpuAllowed()); MemObjOffsetArray origin = {{4, 5, 6}}; auto retOffset = image->calculateOffsetForMapping(origin); size_t expectedOffset = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes * origin[0] + image->getHostPtrRowPitch() * origin[1] + image->getHostPtrSlicePitch() * origin[2]; EXPECT_EQ(expectedOffset, retOffset); } TEST(ImageTest, givenImageWhenAskedForMcsInfoThenDefaultValuesAreReturned) { MockContext ctx; std::unique_ptr image(ImageHelper::create(&ctx)); auto mcsInfo = image->getMcsSurfaceInfo(); EXPECT_EQ(0u, mcsInfo.multisampleCount); EXPECT_EQ(0u, mcsInfo.qPitch); EXPECT_EQ(0u, mcsInfo.pitch); } TEST(ImageTest, givenImageWhenAskedForPtrOffsetForCpuMappingThenReturnCorrectValue) { DebugManagerStateRestore restore; DebugManager.flags.ForceLinearImages.set(true); MockContext ctx; std::unique_ptr image(ImageHelper::create(&ctx)); EXPECT_TRUE(image->mappingOnCpuAllowed()); MemObjOffsetArray origin = {{4, 5, 6}}; auto retOffset = image->calculateOffsetForMapping(origin); size_t expectedOffset = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes * origin[0] + image->getImageDesc().image_row_pitch * origin[1] + image->getImageDesc().image_slice_pitch * origin[2]; EXPECT_EQ(expectedOffset, retOffset); } TEST(ImageTest, given1DArrayImageWhenAskedForPtrOffsetForMappingThenReturnCorrectValue) { MockContext ctx; std::unique_ptr image(ImageHelper::create(&ctx)); MemObjOffsetArray origin = {{4, 5, 0}}; auto retOffset = image->calculateOffsetForMapping(origin); size_t expectedOffset = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes * origin[0] + image->getImageDesc().image_slice_pitch * origin[1]; EXPECT_EQ(expectedOffset, retOffset); } HWTEST_F(ImageTests, givenImageWhenAskedForPtrLengthForGpuMappingThenReturnCorrectValue) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } MockContext ctx; std::unique_ptr image(ImageHelper::create(&ctx)); EXPECT_FALSE(image->mappingOnCpuAllowed()); MemObjSizeArray region = {{4, 5, 6}}; auto retLength = image->calculateMappedPtrLength(region); size_t expectedLength = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes * region[0] + image->getHostPtrRowPitch() * region[1] + image->getHostPtrSlicePitch() * region[2]; EXPECT_EQ(expectedLength, retLength); } TEST(ImageTest, givenImageWhenAskedForPtrLengthForCpuMappingThenReturnCorrectValue) { DebugManagerStateRestore restore; DebugManager.flags.ForceLinearImages.set(true); MockContext ctx; std::unique_ptr image(ImageHelper::create(&ctx)); EXPECT_TRUE(image->mappingOnCpuAllowed()); MemObjSizeArray region = {{4, 5, 6}}; auto retLength = image->calculateMappedPtrLength(region); size_t expectedLength = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes * region[0] + image->getImageDesc().image_row_pitch * region[1] + image->getImageDesc().image_slice_pitch * region[2]; EXPECT_EQ(expectedLength, retLength); } TEST(ImageTest, givenMipMapImage3DWhenAskedForPtrOffsetForGpuMappingThenReturnOffsetWithSlicePitch) { MockContext ctx; cl_image_desc imageDesc{}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE3D; imageDesc.image_width = 5; imageDesc.image_height = 5; imageDesc.image_depth = 5; imageDesc.num_mip_levels = 2; std::unique_ptr image(ImageHelper::create(&ctx, &imageDesc)); EXPECT_FALSE(image->mappingOnCpuAllowed()); MemObjOffsetArray origin{{1, 1, 1}}; auto retOffset = image->calculateOffsetForMapping(origin); size_t expectedOffset = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes * origin[0] + image->getHostPtrRowPitch() * origin[1] + image->getHostPtrSlicePitch() * origin[2]; EXPECT_EQ(expectedOffset, retOffset); } TEST(ImageTest, givenMipMapImage2DArrayWhenAskedForPtrOffsetForGpuMappingThenReturnOffsetWithSlicePitch) { MockContext ctx; cl_image_desc imageDesc{}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; imageDesc.image_width = 5; imageDesc.image_height = 5; imageDesc.image_array_size = 5; imageDesc.num_mip_levels = 2; std::unique_ptr image(ImageHelper::create(&ctx, &imageDesc)); EXPECT_FALSE(image->mappingOnCpuAllowed()); MemObjOffsetArray origin{{1, 1, 1}}; auto retOffset = image->calculateOffsetForMapping(origin); size_t expectedOffset = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes * origin[0] + image->getHostPtrRowPitch() * origin[1] + image->getHostPtrSlicePitch() * origin[2]; EXPECT_EQ(expectedOffset, retOffset); } TEST(ImageTest, givenNonMipMapImage2DArrayWhenAskedForPtrOffsetForGpuMappingThenReturnOffsetWithSlicePitch) { MockContext ctx; cl_image_desc imageDesc{}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; imageDesc.image_width = 5; imageDesc.image_height = 5; imageDesc.image_array_size = 5; imageDesc.num_mip_levels = 1; std::unique_ptr image(ImageHelper::create(&ctx, &imageDesc)); EXPECT_FALSE(image->mappingOnCpuAllowed()); MemObjOffsetArray origin{{1, 1, 1}}; auto retOffset = image->calculateOffsetForMapping(origin); size_t expectedOffset = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes * origin[0] + image->getHostPtrRowPitch() * origin[1] + image->getHostPtrSlicePitch() * origin[2]; EXPECT_EQ(expectedOffset, retOffset); } TEST(ImageTest, givenMipMapImage1DArrayWhenAskedForPtrOffsetForGpuMappingThenReturnOffsetWithSlicePitch) { MockContext ctx; cl_image_desc imageDesc{}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; imageDesc.image_width = 5; imageDesc.image_array_size = 5; imageDesc.num_mip_levels = 2; std::unique_ptr image(ImageHelper::create(&ctx, &imageDesc)); EXPECT_FALSE(image->mappingOnCpuAllowed()); MemObjOffsetArray origin{{1, 1, 0}}; auto retOffset = image->calculateOffsetForMapping(origin); size_t expectedOffset = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes * origin[0] + image->getHostPtrSlicePitch() * origin[1]; EXPECT_EQ(expectedOffset, retOffset); } TEST(ImageTest, givenClMemForceLinearStorageSetWhenCreateImageThenDisallowTiling) { cl_int retVal = CL_SUCCESS; MockContext context; cl_image_desc imageDesc = {}; imageDesc.image_width = 4096; imageDesc.image_height = 1; imageDesc.image_depth = 1; imageDesc.image_type = CL_MEM_OBJECT_IMAGE3D; cl_image_format imageFormat = {}; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_R; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_FORCE_LINEAR_STORAGE_INTEL; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); auto image = std::unique_ptr(Image::create( &context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); EXPECT_FALSE(image->isTiledAllocation()); EXPECT_NE(nullptr, image); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(ImageTest, givenClMemCopyHostPointerPassedToImageCreateWhenAllocationIsNotInSystemMemoryPoolThenAllocationIsWrittenByEnqueueWriteImage) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); auto *memoryManager = new ::testing::NiceMock(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); EXPECT_CALL(*memoryManager, allocateGraphicsMemoryInDevicePool(::testing::_, ::testing::_)) .WillRepeatedly(::testing::Invoke(memoryManager, &GMockMemoryManagerFailFirstAllocation::baseAllocateGraphicsMemoryInDevicePool)); auto device = std::make_unique(MockDevice::create(executionEnvironment, 0)); MockContext ctx(device.get()); EXPECT_CALL(*memoryManager, allocateGraphicsMemoryInDevicePool(::testing::_, ::testing::_)) .WillOnce(::testing::Invoke(memoryManager, &GMockMemoryManagerFailFirstAllocation::allocateNonSystemGraphicsMemoryInDevicePool)) .WillRepeatedly(::testing::Invoke(memoryManager, &GMockMemoryManagerFailFirstAllocation::baseAllocateGraphicsMemoryInDevicePool)); char memory[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; auto taskCount = device->getGpgpuCommandStreamReceiver().peekLatestFlushedTaskCount(); cl_int retVal = 0; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR; cl_image_desc imageDesc{}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 1; imageDesc.image_height = 1; imageDesc.image_row_pitch = sizeof(memory); cl_image_format imageFormat = {}; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_R; MockContext context; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); std::unique_ptr image(Image::create(&ctx, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, memory, retVal)); EXPECT_NE(nullptr, image); auto taskCountSent = device->getGpgpuCommandStreamReceiver().peekLatestFlushedTaskCount(); EXPECT_LT(taskCount, taskCountSent); } struct ImageConvertTypeTest : public ::testing::Test { void SetUp() override { } void TearDown() override { } std::array, 7> types = {{std::make_pair<>(CL_MEM_OBJECT_IMAGE1D, ImageType::Image1D), std::make_pair<>(CL_MEM_OBJECT_IMAGE2D, ImageType::Image2D), std::make_pair<>(CL_MEM_OBJECT_IMAGE3D, ImageType::Image3D), std::make_pair<>(CL_MEM_OBJECT_IMAGE1D_ARRAY, ImageType::Image1DArray), std::make_pair<>(CL_MEM_OBJECT_IMAGE2D_ARRAY, ImageType::Image2DArray), std::make_pair<>(CL_MEM_OBJECT_IMAGE1D_BUFFER, ImageType::Image1DBuffer), std::make_pair<>(0, ImageType::Invalid)}}; }; TEST_F(ImageConvertTypeTest, givenClMemObjectTypeWhenConvertedThenCorrectImageTypeIsReturned) { for (size_t i = 0; i < types.size(); i++) { EXPECT_EQ(types[i].second, Image::convertType(static_cast(types[i].first))); } } TEST_F(ImageConvertTypeTest, givenImageTypeWhenConvertedThenCorrectClMemObjectTypeIsReturned) { for (size_t i = 0; i < types.size(); i++) { EXPECT_EQ(static_cast(types[i].first), Image::convertType(types[i].second)); } } TEST(ImageConvertDescriptorTest, givenClImageDescWhenConvertedThenCorrectImageDescriptorIsReturned) { cl_image_desc clDesc = {CL_MEM_OBJECT_IMAGE1D, 16, 24, 1, 1, 1024, 2048, 1, 3, {nullptr}}; auto desc = Image::convertDescriptor(clDesc); EXPECT_EQ(ImageType::Image1D, desc.imageType); EXPECT_EQ(clDesc.image_array_size, desc.imageArraySize); EXPECT_EQ(clDesc.image_depth, desc.imageDepth); EXPECT_EQ(clDesc.image_height, desc.imageHeight); EXPECT_EQ(clDesc.image_row_pitch, desc.imageRowPitch); EXPECT_EQ(clDesc.image_slice_pitch, desc.imageSlicePitch); EXPECT_EQ(clDesc.image_width, desc.imageWidth); EXPECT_EQ(clDesc.num_mip_levels, desc.numMipLevels); EXPECT_EQ(clDesc.num_samples, desc.numSamples); EXPECT_FALSE(desc.fromParent); cl_mem temporary = reinterpret_cast(0x1234); clDesc.mem_object = temporary; desc = Image::convertDescriptor(clDesc); EXPECT_TRUE(desc.fromParent); } TEST(ImageConvertDescriptorTest, givenImageDescriptorWhenConvertedThenCorrectClImageDescIsReturned) { ImageDescriptor desc = {ImageType::Image2D, 16, 24, 1, 1, 1024, 2048, 1, 3, false}; auto clDesc = Image::convertDescriptor(desc); EXPECT_EQ(clDesc.image_type, static_cast(CL_MEM_OBJECT_IMAGE2D)); EXPECT_EQ(clDesc.image_array_size, desc.imageArraySize); EXPECT_EQ(clDesc.image_depth, desc.imageDepth); EXPECT_EQ(clDesc.image_height, desc.imageHeight); EXPECT_EQ(clDesc.image_row_pitch, desc.imageRowPitch); EXPECT_EQ(clDesc.image_slice_pitch, desc.imageSlicePitch); EXPECT_EQ(clDesc.image_width, desc.imageWidth); EXPECT_EQ(clDesc.num_mip_levels, desc.numMipLevels); EXPECT_EQ(clDesc.num_samples, desc.numSamples); EXPECT_EQ(nullptr, clDesc.mem_object); } typedef ::testing::TestWithParam MipLevelCoordinateTest; TEST_P(MipLevelCoordinateTest, givenMipmappedImageWhenValidateRegionAndOriginIsCalledThenAdditionalOriginCoordinateIsAnalyzed) { size_t origin[4]{}; size_t region[3]{1, 1, 1}; cl_image_desc desc = {}; desc.image_type = GetParam(); desc.num_mip_levels = 2; origin[getMipLevelOriginIdx(desc.image_type)] = 1; EXPECT_EQ(CL_SUCCESS, Image::validateRegionAndOrigin(origin, region, desc)); origin[getMipLevelOriginIdx(desc.image_type)] = 2; EXPECT_EQ(CL_INVALID_MIP_LEVEL, Image::validateRegionAndOrigin(origin, region, desc)); } INSTANTIATE_TEST_CASE_P(MipLevelCoordinate, MipLevelCoordinateTest, ::testing::Values(CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D)); typedef ::testing::TestWithParam> HasSlicesTest; TEST_P(HasSlicesTest, givenMemObjectTypeWhenHasSlicesIsCalledThenReturnsTrueIfTypeDefinesObjectWithSlicePitch) { auto pair = GetParam(); EXPECT_EQ(pair.second, Image::hasSlices(pair.first)); } INSTANTIATE_TEST_CASE_P(HasSlices, HasSlicesTest, ::testing::Values(std::make_pair(CL_MEM_OBJECT_IMAGE1D, false), std::make_pair(CL_MEM_OBJECT_IMAGE1D_ARRAY, true), std::make_pair(CL_MEM_OBJECT_IMAGE2D, false), std::make_pair(CL_MEM_OBJECT_IMAGE2D_ARRAY, true), std::make_pair(CL_MEM_OBJECT_IMAGE3D, true), std::make_pair(CL_MEM_OBJECT_BUFFER, false), std::make_pair(CL_MEM_OBJECT_PIPE, false))); typedef ::testing::Test ImageTransformTest; HWTEST_F(ImageTransformTest, givenSurfaceStateWhenTransformImage3dTo2dArrayIsCalledThenSurface2dArrayIsSet) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; MockContext context; auto image = std::unique_ptr(ImageHelper::create(&context)); auto surfaceState = FamilyType::cmdInitRenderSurfaceState; auto imageHw = static_cast *>(image.get()); surfaceState.setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D); surfaceState.setSurfaceArray(false); imageHw->transformImage3dTo2dArray(reinterpret_cast(&surfaceState)); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, surfaceState.getSurfaceType()); EXPECT_TRUE(surfaceState.getSurfaceArray()); } HWTEST_F(ImageTransformTest, givenSurfaceStateWhenTransformImage2dArrayTo3dIsCalledThenSurface3dIsSet) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; MockContext context; auto image = std::unique_ptr(ImageHelper::create(&context)); auto surfaceState = FamilyType::cmdInitRenderSurfaceState; auto imageHw = static_cast *>(image.get()); surfaceState.setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D); surfaceState.setSurfaceArray(true); imageHw->transformImage2dArrayTo3d(reinterpret_cast(&surfaceState)); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D, surfaceState.getSurfaceType()); EXPECT_FALSE(surfaceState.getSurfaceArray()); } HWTEST_F(ImageTransformTest, givenSurfaceBaseAddressAndUnifiedSurfaceWhenSetUnifiedAuxAddressCalledThenAddressIsSet) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE; MockContext context; auto image = std::unique_ptr(ImageHelper::create(&context)); auto surfaceState = FamilyType::cmdInitRenderSurfaceState; auto gmm = std::unique_ptr(new Gmm(context.getDevice(0)->getGmmClientContext(), nullptr, 1, false)); uint64_t surfBsaseAddress = 0xABCDEF1000; surfaceState.setSurfaceBaseAddress(surfBsaseAddress); auto mockResource = reinterpret_cast(gmm->gmmResourceInfo.get()); mockResource->setUnifiedAuxTranslationCapable(); EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress()); setUnifiedAuxBaseAddress(&surfaceState, gmm.get()); uint64_t offset = gmm->gmmResourceInfo->getUnifiedAuxSurfaceOffset(GMM_UNIFIED_AUX_TYPE::GMM_AUX_SURF); EXPECT_EQ(surfBsaseAddress + offset, surfaceState.getAuxiliarySurfaceBaseAddress()); } template class MockImageHw : public ImageHw { public: MockImageHw(Context *context, const cl_image_format &format, const cl_image_desc &desc, ClSurfaceFormatInfo &surfaceFormatInfo, GraphicsAllocation *graphicsAllocation) : ImageHw(context, {}, 0, 0, 0, nullptr, format, desc, false, graphicsAllocation, false, 0, 0, surfaceFormatInfo) { } void setAuxParamsForMCSCCS(typename FamilyName::RENDER_SURFACE_STATE *surfaceState, Gmm *gmm) override; bool setAuxParamsForMCSCCSCalled = false; }; template void MockImageHw::setAuxParamsForMCSCCS(typename FamilyName::RENDER_SURFACE_STATE *surfaceState, Gmm *gmm) { this->setAuxParamsForMCSCCSCalled = true; } using HwImageTest = ::testing::Test; HWTEST_F(HwImageTest, givenImageHwWithUnifiedSurfaceAndMcsWhenSettingParamsForMultisampleImageThenSetParamsForCcsMcsIsCalled) { MockContext context; OsAgnosticMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; cl_image_desc imgDesc = {}; imgDesc.image_height = 1; imgDesc.image_width = 4; imgDesc.image_depth = 1; imgDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imgDesc.num_samples = 8; cl_image_format format = {}; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); AllocationProperties allocProperties = MemObjHelper::getAllocationPropertiesWithImageInfo(0, imgInfo, true, {}, context.getDevice(0)->getHardwareInfo()); auto graphicsAllocation = memoryManager.allocateGraphicsMemoryInPreferredPool(allocProperties, nullptr); ClSurfaceFormatInfo formatInfo = {}; std::unique_ptr> mockImage(new MockImageHw(&context, format, imgDesc, formatInfo, graphicsAllocation)); McsSurfaceInfo msi = {10, 20, 3}; auto mcsAlloc = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), MemoryConstants::pageSize}); mcsAlloc->setDefaultGmm(new Gmm(context.getDevice(0)->getGmmClientContext(), nullptr, 1, false)); auto mockMcsGmmResInfo = reinterpret_cast<::testing::NiceMock *>(mcsAlloc->getDefaultGmm()->gmmResourceInfo.get()); mockMcsGmmResInfo->setUnifiedAuxTranslationCapable(); mockMcsGmmResInfo->setMultisampleControlSurface(); EXPECT_TRUE(mcsAlloc->getDefaultGmm()->unifiedAuxTranslationCapable()); EXPECT_TRUE(mcsAlloc->getDefaultGmm()->hasMultisampleControlSurface()); mockImage->setMcsSurfaceInfo(msi); mockImage->setMcsAllocation(mcsAlloc); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = FamilyType::cmdInitRenderSurfaceState; EXPECT_FALSE(mockImage->setAuxParamsForMCSCCSCalled); mockImage->setAuxParamsForMultisamples(&surfaceState); EXPECT_TRUE(mockImage->setAuxParamsForMCSCCSCalled); } using ImageMultiRootDeviceTests = MultiRootDeviceFixture; TEST_F(ImageMultiRootDeviceTests, imageAllocationHasCorrectRootDeviceIndex) { std::unique_ptr image(ImageHelper::create(context.get())); auto graphicsAllocation = image->getGraphicsAllocation(); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_EQ(expectedRootDeviceIndex, graphicsAllocation->getRootDeviceIndex()); } compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/image_tests_tgllp_plus.cpp000066400000000000000000000027411363734646600305230ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" using namespace NEO; struct ImageTestsTgllPlus : DeviceFixture, testing::Test { void SetUp() override { DeviceFixture::SetUp(); context = std::make_unique(pClDevice); srcImage = std::unique_ptr(Image3dHelper<>::create(context.get())); } void TearDown() override { srcImage.reset(); context.reset(); DeviceFixture::TearDown(); } std::unique_ptr context{}; std::unique_ptr srcImage{}; }; using TgllpPlusMatcher = IsAtLeastProduct; HWTEST2_F(ImageTestsTgllPlus, givenDepthResourceWhenSettingImageArgThenSetDepthStencilResourceField, TgllpPlusMatcher) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState{}; auto &gpuFlags = srcImage->getGraphicsAllocation()->getDefaultGmm()->gmmResourceInfo->getResourceFlags()->Gpu; gpuFlags.Depth = 0; srcImage->setImageArg(&surfaceState, false, 0); EXPECT_FALSE(surfaceState.getDepthStencilResource()); gpuFlags.Depth = 1; srcImage->setImageArg(&surfaceState, false, 0); EXPECT_TRUE(surfaceState.getDepthStencilResource()); } compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/image_tiled_tests.cpp000066400000000000000000000120041363734646600274300ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_helper.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_gmm.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "gtest/gtest.h" using namespace NEO; static const auto dimension = 16; static auto channelType = CL_UNORM_INT8; static auto channelOrder = CL_RGBA; class CreateTiledImageTest : public DeviceFixture, public testing::TestWithParam, public CommandQueueHwFixture { typedef CommandQueueHwFixture CommandQueueFixture; public: CreateTiledImageTest() { } protected: void SetUp() override { DeviceFixture::SetUp(); CommandQueueFixture::SetUp(pClDevice, 0); type = GetParam(); // clang-format off imageFormat.image_channel_data_type = channelType; imageFormat.image_channel_order = channelOrder; imageDesc.image_type = type; imageDesc.image_width = dimension; imageDesc.image_height = dimension; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // clang-format on } void TearDown() override { CommandQueueFixture::TearDown(); DeviceFixture::TearDown(); } cl_image_format imageFormat; cl_image_desc imageDesc; cl_int retVal = CL_SUCCESS; cl_mem_object_type type = 0; }; HWTEST_P(CreateTiledImageTest, isTiledImageIsSetForTiledImages) { MockContext context; cl_mem_flags flags = CL_MEM_READ_WRITE; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, pClDevice->getHardwareInfo().capabilityTable.clVersionSupport); auto image = Image::create( &context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal); ASSERT_NE(nullptr, image); EXPECT_EQ(UnitTestHelper::tiledImagesSupported, image->isTiledAllocation()); delete image; } TEST_P(CreateTiledImageTest, isTiledImageIsSetForSharedImages) { MockContext context; MockGraphicsAllocation *alloc = new MockGraphicsAllocation(0, 0x1000); ImageInfo info = {}; McsSurfaceInfo msi = {}; ClSurfaceFormatInfo surfaceFormat; surfaceFormat.surfaceFormat.GMMSurfaceFormat = GMM_FORMAT_B8G8R8A8_UNORM; info.surfaceFormat = &surfaceFormat.surfaceFormat; info.imgDesc = Image::convertDescriptor(imageDesc); info.plane = GMM_NO_PLANE; auto gmm = MockGmm::queryImgParams(context.getDevice(0)->getGmmClientContext(), info); alloc->setDefaultGmm(gmm.release()); auto image = Image::createSharedImage( &context, nullptr, msi, alloc, nullptr, CL_MEM_READ_WRITE, &surfaceFormat, info, 0, 0, 0); ASSERT_NE(nullptr, image); EXPECT_TRUE(image->isTiledAllocation()); delete image; } typedef CreateTiledImageTest CreateNonTiledImageTest; TEST_P(CreateNonTiledImageTest, isTiledImageIsNotSetForNonTiledSharedImage) { MockContext context; MockGraphicsAllocation *alloc = new MockGraphicsAllocation(0, 0x1000); ImageInfo info = {}; McsSurfaceInfo msi = {}; ClSurfaceFormatInfo surfaceFormat; imageDesc.image_height = 1; surfaceFormat.surfaceFormat.GMMSurfaceFormat = GMM_FORMAT_B8G8R8A8_UNORM; info.surfaceFormat = &surfaceFormat.surfaceFormat; info.imgDesc = Image::convertDescriptor(imageDesc); info.plane = GMM_NO_PLANE; auto gmm = MockGmm::queryImgParams(context.getDevice(0)->getGmmClientContext(), info); alloc->setDefaultGmm(gmm.release()); auto image = Image::createSharedImage( &context, nullptr, msi, alloc, nullptr, CL_MEM_READ_WRITE, &surfaceFormat, info, 0, 0, 0); ASSERT_NE(nullptr, image); EXPECT_FALSE(image->isTiledAllocation()); delete image; } // Parameterized test that tests image creation with tiled types static uint32_t TiledImageTypes[] = { CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_OBJECT_IMAGE3D}; static uint32_t NonTiledImageTypes[] = { CL_MEM_OBJECT_IMAGE1D}; INSTANTIATE_TEST_CASE_P(CreateTiledImageTest, CreateTiledImageTest, testing::ValuesIn(TiledImageTypes)); INSTANTIATE_TEST_CASE_P(CreateNonTiledImageTest, CreateNonTiledImageTest, testing::ValuesIn(NonTiledImageTypes)); compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/image_transfer_tests.cpp000066400000000000000000000223651363734646600301660ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "gtest/gtest.h" using namespace NEO; class ImageHostPtrTransferTests : public testing::Test { public: void SetUp() override { device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); context.reset(new MockContext(device.get())); } template void createImageAndSetTestParams() { image.reset(ImageHelper>::create(context.get())); imgDesc = &image->getImageDesc(); hostPtrSlicePitch = image->getHostPtrSlicePitch(); hostPtrRowPitch = image->getHostPtrRowPitch(); imageSlicePitch = image->getImageDesc().image_slice_pitch; imageRowPitch = image->getImageDesc().image_row_pitch; pixelSize = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes; } void setExpectedData(uint8_t *dstPtr, size_t slicePitch, size_t rowPitch, std::array copyOrigin, std::array copyRegion) { if (image->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { // For 1DArray type, array region and origin are stored on 2nd position. For 2Darray its on 3rd position. std::swap(copyOrigin[1], copyOrigin[2]); std::swap(copyRegion[1], copyRegion[2]); } for (size_t slice = copyOrigin[2]; slice < (copyOrigin[2] + copyRegion[2]); slice++) { auto sliceOffset = ptrOffset(dstPtr, slicePitch * slice); for (size_t height = copyOrigin[1]; height < (copyOrigin[1] + copyRegion[1]); height++) { auto rowOffset = ptrOffset(sliceOffset, rowPitch * height); memset(ptrOffset(rowOffset, copyOrigin[0] * pixelSize), 123, copyRegion[0] * pixelSize); } } } std::unique_ptr device; std::unique_ptr context; std::unique_ptr image; const cl_image_desc *imgDesc = nullptr; size_t hostPtrSlicePitch, hostPtrRowPitch, imageSlicePitch, imageRowPitch, pixelSize; }; TEST_F(ImageHostPtrTransferTests, given3dImageWithoutTilingWhenTransferToHostPtrCalledThenCopyRequestedRegionAndOriginOnly) { DebugManagerStateRestore restorer; DebugManager.flags.ForceLinearImages.set(true); createImageAndSetTestParams(); EXPECT_NE(hostPtrSlicePitch, imageSlicePitch); EXPECT_NE(hostPtrRowPitch, imageRowPitch); EXPECT_NE(image->getCpuAddress(), image->getHostPtr()); std::array copyOrigin = {{imgDesc->image_width / 2, imgDesc->image_height / 2, imgDesc->image_depth / 2}}; std::array copyRegion = copyOrigin; std::unique_ptr expectedHostPtr(new uint8_t[hostPtrSlicePitch * imgDesc->image_depth]); memset(image->getHostPtr(), 0, hostPtrSlicePitch * imgDesc->image_depth); memset(expectedHostPtr.get(), 0, hostPtrSlicePitch * imgDesc->image_depth); memset(image->getCpuAddress(), 123, imageSlicePitch * imgDesc->image_depth); setExpectedData(expectedHostPtr.get(), hostPtrSlicePitch, hostPtrRowPitch, copyOrigin, copyRegion); image->transferDataToHostPtr(copyRegion, copyOrigin); EXPECT_TRUE(memcmp(image->getHostPtr(), expectedHostPtr.get(), hostPtrSlicePitch * imgDesc->image_depth) == 0); } TEST_F(ImageHostPtrTransferTests, given3dImageWithoutTilingWhenTransferFromHostPtrCalledThenCopyRequestedRegionAndOriginOnly) { DebugManagerStateRestore restorer; DebugManager.flags.ForceLinearImages.set(true); createImageAndSetTestParams(); EXPECT_NE(hostPtrSlicePitch, imageSlicePitch); EXPECT_NE(hostPtrRowPitch, imageRowPitch); EXPECT_NE(image->getCpuAddress(), image->getHostPtr()); std::array copyOrigin = {{imgDesc->image_width / 2, imgDesc->image_height / 2, imgDesc->image_depth / 2}}; std::array copyRegion = copyOrigin; std::unique_ptr expectedImageData(new uint8_t[imageSlicePitch * imgDesc->image_depth]); memset(image->getHostPtr(), 123, hostPtrSlicePitch * imgDesc->image_depth); memset(expectedImageData.get(), 0, imageSlicePitch * imgDesc->image_depth); memset(image->getCpuAddress(), 0, imageSlicePitch * imgDesc->image_depth); setExpectedData(expectedImageData.get(), imageSlicePitch, imageRowPitch, copyOrigin, copyRegion); image->transferDataFromHostPtr(copyRegion, copyOrigin); EXPECT_TRUE(memcmp(image->getCpuAddress(), expectedImageData.get(), imageSlicePitch * imgDesc->image_depth) == 0); } TEST_F(ImageHostPtrTransferTests, given2dArrayImageWithoutTilingWhenTransferToHostPtrCalledThenCopyRequestedRegionAndOriginOnly) { DebugManagerStateRestore restorer; DebugManager.flags.ForceLinearImages.set(true); createImageAndSetTestParams(); EXPECT_NE(hostPtrSlicePitch, imageSlicePitch); EXPECT_NE(hostPtrRowPitch, imageRowPitch); EXPECT_NE(image->getCpuAddress(), image->getHostPtr()); std::array copyOrigin = {{imgDesc->image_width / 2, imgDesc->image_height / 2, imgDesc->image_array_size / 2}}; std::array copyRegion = copyOrigin; std::unique_ptr expectedHostPtr(new uint8_t[hostPtrSlicePitch * imgDesc->image_array_size]); memset(image->getHostPtr(), 0, hostPtrSlicePitch * imgDesc->image_array_size); memset(expectedHostPtr.get(), 0, hostPtrSlicePitch * imgDesc->image_array_size); memset(image->getCpuAddress(), 123, imageSlicePitch * imgDesc->image_array_size); setExpectedData(expectedHostPtr.get(), hostPtrSlicePitch, hostPtrRowPitch, copyOrigin, copyRegion); image->transferDataToHostPtr(copyRegion, copyOrigin); EXPECT_TRUE(memcmp(image->getHostPtr(), expectedHostPtr.get(), hostPtrSlicePitch * imgDesc->image_array_size) == 0); } TEST_F(ImageHostPtrTransferTests, given2dArrayImageWithoutTilingWhenTransferFromHostPtrCalledThenCopyRequestedRegionAndOriginOnly) { DebugManagerStateRestore restorer; DebugManager.flags.ForceLinearImages.set(true); createImageAndSetTestParams(); EXPECT_NE(hostPtrSlicePitch, imageSlicePitch); EXPECT_NE(hostPtrRowPitch, imageRowPitch); EXPECT_NE(image->getCpuAddress(), image->getHostPtr()); std::array copyOrigin = {{imgDesc->image_width / 2, imgDesc->image_height / 2, imgDesc->image_array_size / 2}}; std::array copyRegion = copyOrigin; std::unique_ptr expectedImageData(new uint8_t[imageSlicePitch * imgDesc->image_array_size]); memset(image->getHostPtr(), 123, hostPtrSlicePitch * imgDesc->image_array_size); memset(expectedImageData.get(), 0, imageSlicePitch * imgDesc->image_array_size); memset(image->getCpuAddress(), 0, imageSlicePitch * imgDesc->image_array_size); setExpectedData(expectedImageData.get(), imageSlicePitch, imageRowPitch, copyOrigin, copyRegion); image->transferDataFromHostPtr(copyRegion, copyOrigin); EXPECT_TRUE(memcmp(image->getCpuAddress(), expectedImageData.get(), imageSlicePitch * imgDesc->image_array_size) == 0); } TEST_F(ImageHostPtrTransferTests, given1dArrayImageWhenTransferToHostPtrCalledThenUseSecondCoordinateAsSlice) { createImageAndSetTestParams(); std::array copyOrigin = {{imgDesc->image_width / 2, imgDesc->image_array_size / 2, 0}}; std::array copyRegion = {{imgDesc->image_width / 2, imgDesc->image_array_size / 2, 1}}; std::unique_ptr expectedHostPtr(new uint8_t[hostPtrSlicePitch * imgDesc->image_array_size]); memset(image->getHostPtr(), 0, hostPtrSlicePitch * imgDesc->image_array_size); memset(expectedHostPtr.get(), 0, hostPtrSlicePitch * imgDesc->image_array_size); memset(image->getCpuAddress(), 123, imageSlicePitch * imgDesc->image_array_size); setExpectedData(expectedHostPtr.get(), hostPtrSlicePitch, hostPtrRowPitch, copyOrigin, copyRegion); image->transferDataToHostPtr(copyRegion, copyOrigin); EXPECT_TRUE(memcmp(image->getHostPtr(), expectedHostPtr.get(), hostPtrSlicePitch * imgDesc->image_array_size) == 0); } TEST_F(ImageHostPtrTransferTests, given1dArrayImageWhenTransferFromHostPtrCalledThenUseSecondCoordinateAsSlice) { createImageAndSetTestParams(); std::array copyOrigin = {{imgDesc->image_width / 2, imgDesc->image_array_size / 2, 0}}; std::array copyRegion = {{imgDesc->image_width / 2, imgDesc->image_array_size / 2, 1}}; std::unique_ptr expectedImageData(new uint8_t[imageSlicePitch * imgDesc->image_array_size]); memset(image->getHostPtr(), 123, hostPtrSlicePitch * imgDesc->image_array_size); memset(expectedImageData.get(), 0, imageSlicePitch * imgDesc->image_array_size); memset(image->getCpuAddress(), 0, imageSlicePitch * imgDesc->image_array_size); setExpectedData(expectedImageData.get(), imageSlicePitch, imageRowPitch, copyOrigin, copyRegion); image->transferDataFromHostPtr(copyRegion, copyOrigin); EXPECT_TRUE(memcmp(image->getCpuAddress(), expectedImageData.get(), imageSlicePitch * imgDesc->image_array_size) == 0); } compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/image_validate_tests.cpp000066400000000000000000001161021363734646600301240ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/helpers/convert_color.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "gtest/gtest.h" using namespace NEO; typedef decltype(&Image::redescribe) RedescribeMethod; class ImageValidateTest : public testing::TestWithParam { public: ImageValidateTest() { imageFormat = &surfaceFormat.OCLImageFormat; imageFormat->image_channel_data_type = CL_UNSIGNED_INT8; imageFormat->image_channel_order = CL_RGBA; } protected: void SetUp() override { } void TearDown() override { } cl_int retVal = CL_SUCCESS; MockContext context; ClSurfaceFormatInfo surfaceFormat; cl_image_format *imageFormat; cl_image_desc imageDesc; }; typedef ImageValidateTest ValidDescriptor; typedef ImageValidateTest InvalidDescriptor; typedef ImageValidateTest InvalidSize; TEST_P(ValidDescriptor, validSizePassedToValidateReturnsSuccess) { imageDesc = GetParam(); retVal = Image::validate(&context, {}, &surfaceFormat, &imageDesc, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_P(InvalidDescriptor, zeroSizePassedToValidateReturnsError) { imageDesc = GetParam(); retVal = Image::validate(&context, {}, &surfaceFormat, &imageDesc, nullptr); EXPECT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); } TEST_P(InvalidSize, invalidSizePassedToValidateReturnsError) { imageDesc = GetParam(); retVal = Image::validate(&context, {}, &surfaceFormat, &imageDesc, nullptr); EXPECT_EQ(CL_INVALID_IMAGE_SIZE, retVal); } TEST_P(ValidDescriptor, given3dImageFormatWhenGetSupportedFormatIsCalledThenDontReturnDepthFormats) { imageDesc = GetParam(); uint32_t readOnlyformatCount; uint32_t writeOnlyformatCount; uint32_t readWriteOnlyformatCount; context.getSupportedImageFormats(&context.getDevice(0)->getDevice(), CL_MEM_READ_ONLY, imageDesc.image_type, 0, nullptr, &readOnlyformatCount); context.getSupportedImageFormats(&context.getDevice(0)->getDevice(), CL_MEM_WRITE_ONLY, imageDesc.image_type, 0, nullptr, &writeOnlyformatCount); context.getSupportedImageFormats(&context.getDevice(0)->getDevice(), CL_MEM_READ_WRITE, imageDesc.image_type, 0, nullptr, &readWriteOnlyformatCount); auto readOnlyImgFormats = new cl_image_format[readOnlyformatCount]; auto writeOnlyImgFormats = new cl_image_format[writeOnlyformatCount]; auto readWriteOnlyImgFormats = new cl_image_format[readWriteOnlyformatCount]; context.getSupportedImageFormats(&context.getDevice(0)->getDevice(), CL_MEM_READ_ONLY, imageDesc.image_type, readOnlyformatCount, readOnlyImgFormats, 0); context.getSupportedImageFormats(&context.getDevice(0)->getDevice(), CL_MEM_WRITE_ONLY, imageDesc.image_type, writeOnlyformatCount, writeOnlyImgFormats, 0); context.getSupportedImageFormats(&context.getDevice(0)->getDevice(), CL_MEM_READ_WRITE, imageDesc.image_type, readWriteOnlyformatCount, readWriteOnlyImgFormats, 0); bool depthFound = false; for (uint32_t i = 0; i < readOnlyformatCount; i++) { if (readOnlyImgFormats[i].image_channel_order == CL_DEPTH || readOnlyImgFormats[i].image_channel_order == CL_DEPTH_STENCIL) depthFound = true; } for (uint32_t i = 0; i < readOnlyformatCount; i++) { if (readOnlyImgFormats[i].image_channel_order == CL_DEPTH || readOnlyImgFormats[i].image_channel_order == CL_DEPTH_STENCIL) depthFound = true; } for (uint32_t i = 0; i < readOnlyformatCount; i++) { if (readOnlyImgFormats[i].image_channel_order == CL_DEPTH || readOnlyImgFormats[i].image_channel_order == CL_DEPTH_STENCIL) depthFound = true; } if (!Image::isImage2dOr2dArray(imageDesc.image_type)) { EXPECT_FALSE(depthFound); } else { EXPECT_TRUE(depthFound); } delete[] readOnlyImgFormats; delete[] writeOnlyImgFormats; delete[] readWriteOnlyImgFormats; } TEST(ImageDepthFormatTest, returnSurfaceFormatForDepthFormats) { cl_image_format imgFormat = {}; imgFormat.image_channel_order = CL_DEPTH; imgFormat.image_channel_data_type = CL_FLOAT; auto surfaceFormatInfo = Image::getSurfaceFormatFromTable(CL_MEM_READ_WRITE, &imgFormat, defaultHwInfo->capabilityTable.clVersionSupport); ASSERT_NE(surfaceFormatInfo, nullptr); EXPECT_TRUE(surfaceFormatInfo->surfaceFormat.GMMSurfaceFormat == GMM_FORMAT_R32_FLOAT_TYPE); imgFormat.image_channel_data_type = CL_UNORM_INT16; surfaceFormatInfo = Image::getSurfaceFormatFromTable(CL_MEM_READ_WRITE, &imgFormat, defaultHwInfo->capabilityTable.clVersionSupport); ASSERT_NE(surfaceFormatInfo, nullptr); EXPECT_TRUE(surfaceFormatInfo->surfaceFormat.GMMSurfaceFormat == GMM_FORMAT_R16_UNORM_TYPE); } TEST(ImageDepthFormatTest, returnSurfaceFormatForWriteOnlyDepthFormats) { cl_image_format imgFormat = {}; imgFormat.image_channel_order = CL_DEPTH; imgFormat.image_channel_data_type = CL_FLOAT; auto surfaceFormatInfo = Image::getSurfaceFormatFromTable(CL_MEM_WRITE_ONLY, &imgFormat, defaultHwInfo->capabilityTable.clVersionSupport); ASSERT_NE(surfaceFormatInfo, nullptr); EXPECT_TRUE(surfaceFormatInfo->surfaceFormat.GMMSurfaceFormat == GMM_FORMAT_R32_FLOAT_TYPE); imgFormat.image_channel_data_type = CL_UNORM_INT16; surfaceFormatInfo = Image::getSurfaceFormatFromTable(CL_MEM_WRITE_ONLY, &imgFormat, defaultHwInfo->capabilityTable.clVersionSupport); ASSERT_NE(surfaceFormatInfo, nullptr); EXPECT_TRUE(surfaceFormatInfo->surfaceFormat.GMMSurfaceFormat == GMM_FORMAT_R16_UNORM_TYPE); } TEST(ImageDepthFormatTest, returnSurfaceFormatForDepthStencilFormats) { cl_image_format imgFormat = {}; imgFormat.image_channel_order = CL_DEPTH_STENCIL; imgFormat.image_channel_data_type = CL_UNORM_INT24; auto surfaceFormatInfo = Image::getSurfaceFormatFromTable(CL_MEM_READ_ONLY, &imgFormat, defaultHwInfo->capabilityTable.clVersionSupport); ASSERT_NE(surfaceFormatInfo, nullptr); EXPECT_TRUE(surfaceFormatInfo->surfaceFormat.GMMSurfaceFormat == GMM_FORMAT_GENERIC_32BIT); imgFormat.image_channel_order = CL_DEPTH_STENCIL; imgFormat.image_channel_data_type = CL_FLOAT; surfaceFormatInfo = Image::getSurfaceFormatFromTable(CL_MEM_READ_ONLY, &imgFormat, defaultHwInfo->capabilityTable.clVersionSupport); ASSERT_NE(surfaceFormatInfo, nullptr); EXPECT_TRUE(surfaceFormatInfo->surfaceFormat.GMMSurfaceFormat == GMM_FORMAT_R32G32_FLOAT_TYPE); } static cl_image_desc validImageDesc[] = { {CL_MEM_OBJECT_IMAGE1D, /*image_type*/ 16384, /*image_width*/ 1, /*image_height*/ 1, /*image_depth*/ 0, /*image_array_size*/ 0, /*image_row_pitch*/ 0, /*image_slice_pitch*/ 0, /*num_mip_levels*/ 0, /*num_samples*/ {0}}, /*mem_object */ {CL_MEM_OBJECT_IMAGE1D_ARRAY, /*image_type*/ 16384, /*image_width*/ 1, /*image_height*/ 1, /*image_depth*/ 2, /*image_array_size*/ 0, /*image_row_pitch*/ 0, /*image_slice_pitch*/ 0, /*num_mip_levels*/ 0, /*num_samples*/ {0}}, /*mem_object */ {CL_MEM_OBJECT_IMAGE2D, /*image_type*/ 512, /*image_width*/ 512, /*image_height*/ 1, /*image_depth*/ 0, /*image_array_size*/ 0, /*image_row_pitch*/ 0, /*image_slice_pitch*/ 0, /*num_mip_levels*/ 0, /*num_samples*/ {0}}, /*mem_object */ {CL_MEM_OBJECT_IMAGE2D, /*image_type*/ 16384, /*image_width*/ 16384, /*image_height*/ 1, /*image_depth*/ 0, /*image_array_size*/ 0, /*image_row_pitch*/ 0, /*image_slice_pitch*/ 0, /*num_mip_levels*/ 0, /*num_samples*/ {0}}, /*mem_object */ {CL_MEM_OBJECT_IMAGE2D_ARRAY, /*image_type*/ 16384, /*image_width*/ 16384, /*image_height*/ 1, /*image_depth*/ 1, /*image_array_size*/ 0, /*image_row_pitch*/ 0, /*image_slice_pitch*/ 0, /*num_mip_levels*/ 0, /*num_samples*/ {0}}, /*mem_object */ {CL_MEM_OBJECT_IMAGE2D_ARRAY, /*image_type*/ 16384, /*image_width*/ 16384, /*image_height*/ 1, /*image_depth*/ 2, /*image_array_size*/ 0, /*image_row_pitch*/ 0, /*image_slice_pitch*/ 0, /*num_mip_levels*/ 0, /*num_samples*/ {0}}, /*mem_object */ {CL_MEM_OBJECT_IMAGE3D, /*image_type*/ 16384, /*image_width*/ 16384, /*image_height*/ 3, /*image_depth*/ 0, /*image_array_size*/ 0, /*image_row_pitch*/ 0, /*image_slice_pitch*/ 0, /*num_mip_levels*/ 0, /*num_samples*/ {0}}, /*mem_object */ {CL_MEM_OBJECT_IMAGE3D, /*image_type*/ 2, /*image_width*/ 2, /*image_height*/ 2, /*image_depth*/ 0, /*image_array_size*/ 0, /*image_row_pitch*/ 0, /*image_slice_pitch*/ 0, /*num_mip_levels*/ 0, /*num_samples*/ {0}}, /*mem_object */ }; static cl_image_desc invalidImageDesc[] = { {CL_MEM_OBJECT_IMAGE2D, /*image_type*/ 0, /*image_width*/ 512, /*image_height*/ 1, /*image_depth*/ 0, /*image_array_size*/ 0, /*image_row_pitch*/ 0, /*image_slice_pitch*/ 0, /*num_mip_levels*/ 0, /*num_samples*/ {0}}, /*mem_object */ {CL_MEM_OBJECT_IMAGE2D, /*image_type*/ 512, /*image_width*/ 0, /*image_height*/ 1, /*image_depth*/ 0, /*image_array_size*/ 0, /*image_row_pitch*/ 0, /*image_slice_pitch*/ 0, /*num_mip_levels*/ 0, /*num_samples*/ {0}}, /*mem_object */ }; static cl_image_desc invalidImageSize[] = { {CL_MEM_OBJECT_IMAGE2D, /*image_type*/ 16384 + 10, /*image_width*/ 512, /*image_height*/ 1, /*image_depth*/ 0, /*image_array_size*/ 0, /*image_row_pitch*/ 0, /*image_slice_pitch*/ 0, /*num_mip_levels*/ 0, /*num_samples*/ {0}}, /*mem_object */ {CL_MEM_OBJECT_IMAGE2D, /*image_type*/ 1, /*image_width*/ 16384 + 10, /*image_height*/ 1, /*image_depth*/ 0, /*image_array_size*/ 0, /*image_row_pitch*/ 0, /*image_slice_pitch*/ 0, /*num_mip_levels*/ 0, /*num_samples*/ {0}}, /*mem_object */ }; INSTANTIATE_TEST_CASE_P( ImageValidate, ValidDescriptor, ::testing::ValuesIn(validImageDesc)); INSTANTIATE_TEST_CASE_P( ImageValidate, InvalidDescriptor, ::testing::ValuesIn(invalidImageDesc)); INSTANTIATE_TEST_CASE_P( ImageValidate, InvalidSize, ::testing::ValuesIn(invalidImageSize)); class ValidImageFormatTest : public ::testing::TestWithParam> { public: void validateFormat() { cl_image_format imageFormat; cl_int retVal; std::tie(imageFormat.image_channel_order, imageFormat.image_channel_data_type) = GetParam(); retVal = Image::validateImageFormat(&imageFormat); EXPECT_EQ(CL_SUCCESS, retVal); } }; class InvalidImageFormatTest : public ::testing::TestWithParam> { public: void validateFormat() { cl_image_format imageFormat; cl_int retVal; std::tie(imageFormat.image_channel_order, imageFormat.image_channel_data_type) = GetParam(); retVal = Image::validateImageFormat(&imageFormat); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); } }; typedef ValidImageFormatTest ValidSingleChannelFormat; typedef InvalidImageFormatTest InvalidSingleChannelFormat; cl_channel_order validSingleChannelOrder[] = {CL_R, CL_A, CL_Rx}; cl_channel_type validSingleChannelDataTypes[] = {CL_SNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT8, CL_UNORM_INT16, CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT}; cl_channel_type invalidSingleChannelDataTypes[] = {CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010, CL_UNORM_INT24, CL_UNORM_INT_101010_2}; TEST_P(ValidSingleChannelFormat, givenValidSingleChannelImageFormatWhenValidateImageFormatIsCalledThenReturnsSuccess) { validateFormat(); }; TEST_P(InvalidSingleChannelFormat, givenInvalidSingleChannelChannelDataTypeWhenValidateImageFormatIsCalledThenReturnsError) { validateFormat(); }; INSTANTIATE_TEST_CASE_P( ImageValidate, ValidSingleChannelFormat, ::testing::Combine( ::testing::ValuesIn(validSingleChannelOrder), ::testing::ValuesIn(validSingleChannelDataTypes))); INSTANTIATE_TEST_CASE_P( ImageValidate, InvalidSingleChannelFormat, ::testing::Combine( ::testing::ValuesIn(validSingleChannelOrder), ::testing::ValuesIn(invalidSingleChannelDataTypes))); typedef ValidImageFormatTest ValidIntensityFormat; typedef InvalidImageFormatTest InvalidIntensityFormat; cl_channel_order validIntensityChannelOrders[] = {CL_INTENSITY}; cl_channel_type validIntensityChannelDataTypes[] = {CL_UNORM_INT8, CL_UNORM_INT16, CL_SNORM_INT8, CL_SNORM_INT16, CL_HALF_FLOAT, CL_FLOAT}; cl_channel_type invalidIntensityChannelDataTypes[] = {CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010, CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_UNORM_INT24, CL_UNORM_INT_101010_2}; TEST_P(ValidIntensityFormat, givenValidIntensityImageFormatWhenValidateImageFormatIsCalledThenReturnsSuccess) { validateFormat(); }; TEST_P(InvalidIntensityFormat, givenInvalidIntensityChannelDataTypeWhenValidateImageFormatIsCalledThenReturnsError) { validateFormat(); }; INSTANTIATE_TEST_CASE_P( ImageValidate, ValidIntensityFormat, ::testing::Combine( ::testing::ValuesIn(validIntensityChannelOrders), ::testing::ValuesIn(validIntensityChannelDataTypes))); INSTANTIATE_TEST_CASE_P( ImageValidate, InvalidIntensityFormat, ::testing::Combine( ::testing::ValuesIn(validIntensityChannelOrders), ::testing::ValuesIn(invalidIntensityChannelDataTypes))); typedef ValidImageFormatTest ValidLuminanceFormat; typedef InvalidImageFormatTest InvalidLuminanceFormat; cl_channel_order validLuminanceChannelOrders[] = {CL_LUMINANCE}; cl_channel_type validLuminanceChannelDataTypes[] = {CL_UNORM_INT8, CL_UNORM_INT16, CL_SNORM_INT8, CL_SNORM_INT16, CL_HALF_FLOAT, CL_FLOAT}; cl_channel_type invalidLuminanceChannelDataTypes[] = {CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010, CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_UNORM_INT24, CL_UNORM_INT_101010_2}; TEST_P(ValidLuminanceFormat, givenValidLuminanceImageFormatWhenValidateImageFormatIsCalledThenReturnsSuccess) { validateFormat(); }; TEST_P(InvalidLuminanceFormat, givenInvalidLuminanceChannelDataTypeWhenValidateImageFormatIsCalledThenReturnsError) { validateFormat(); }; INSTANTIATE_TEST_CASE_P( ImageValidate, ValidLuminanceFormat, ::testing::Combine( ::testing::ValuesIn(validLuminanceChannelOrders), ::testing::ValuesIn(validLuminanceChannelDataTypes))); INSTANTIATE_TEST_CASE_P( ImageValidate, InvalidLuminanceFormat, ::testing::Combine( ::testing::ValuesIn(validLuminanceChannelOrders), ::testing::ValuesIn(invalidLuminanceChannelDataTypes))); typedef ValidImageFormatTest ValidDepthFormat; typedef InvalidImageFormatTest InvalidDepthFormat; cl_channel_order validDepthChannelOrders[] = {CL_DEPTH}; cl_channel_type validDepthChannelDataTypes[] = {CL_UNORM_INT16, CL_FLOAT}; cl_channel_type invalidDepthChannelDataTypes[] = {CL_SNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT8, CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010, CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_UNORM_INT24, CL_UNORM_INT_101010_2}; TEST_P(ValidDepthFormat, givenValidDepthImageFormatWhenValidateImageFormatIsCalledThenReturnsSuccess) { validateFormat(); }; TEST_P(InvalidDepthFormat, givenInvalidDepthChannelDataTypeWhenValidateImageFormatIsCalledThenReturnsError) { validateFormat(); }; INSTANTIATE_TEST_CASE_P( ImageValidate, ValidDepthFormat, ::testing::Combine( ::testing::ValuesIn(validDepthChannelOrders), ::testing::ValuesIn(validDepthChannelDataTypes))); INSTANTIATE_TEST_CASE_P( ImageValidate, InvalidDepthFormat, ::testing::Combine( ::testing::ValuesIn(validDepthChannelOrders), ::testing::ValuesIn(invalidDepthChannelDataTypes))); typedef ValidImageFormatTest ValidDoubleChannelFormat; typedef InvalidImageFormatTest InvalidDoubleChannelFormat; cl_channel_order validDoubleChannelOrders[] = {CL_RG, CL_RGx, CL_RA}; cl_channel_type validDoubleChannelDataTypes[] = {CL_SNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT8, CL_UNORM_INT16, CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT}; cl_channel_type invalidDoubleChannelDataTypes[] = {CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010, CL_UNORM_INT24, CL_UNORM_INT_101010_2}; TEST_P(ValidDoubleChannelFormat, givenValidDoubleChannelImageFormatWhenValidateImageFormatIsCalledThenReturnsSuccess) { validateFormat(); }; TEST_P(InvalidDoubleChannelFormat, givenInvalidDoubleChannelDataTypeWhenValidateImageFormatIsCalledThenReturnsError) { validateFormat(); }; INSTANTIATE_TEST_CASE_P( ImageValidate, ValidDoubleChannelFormat, ::testing::Combine( ::testing::ValuesIn(validDoubleChannelOrders), ::testing::ValuesIn(validDoubleChannelDataTypes))); INSTANTIATE_TEST_CASE_P( ImageValidate, InvalidDoubleChannelFormat, ::testing::Combine( ::testing::ValuesIn(validDoubleChannelOrders), ::testing::ValuesIn(invalidDoubleChannelDataTypes))); typedef ValidImageFormatTest ValidTripleChannelFormat; typedef InvalidImageFormatTest InvalidTripleChannelFormat; cl_channel_order validTripleChannelOrders[] = {CL_RGB, CL_RGBx}; cl_channel_type validTripleChannelDataTypes[] = {CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010}; cl_channel_type invalidTripleChannelDataTypes[] = {CL_SNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT8, CL_UNORM_INT16, CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT, CL_UNORM_INT24, CL_UNORM_INT_101010_2}; TEST_P(ValidTripleChannelFormat, givenValidTripleChannelImageFormatWhenValidateImageFormatIsCalledThenReturnsSuccess) { validateFormat(); }; TEST_P(InvalidTripleChannelFormat, givenInvalidTripleChannelDataTypeWhenValidateImageFormatIsCalledThenReturnsError) { validateFormat(); }; INSTANTIATE_TEST_CASE_P( ImageValidate, ValidTripleChannelFormat, ::testing::Combine( ::testing::ValuesIn(validTripleChannelOrders), ::testing::ValuesIn(validTripleChannelDataTypes))); INSTANTIATE_TEST_CASE_P( ImageValidate, InvalidTripleChannelFormat, ::testing::Combine( ::testing::ValuesIn(validTripleChannelOrders), ::testing::ValuesIn(invalidTripleChannelDataTypes))); typedef ValidImageFormatTest ValidRGBAChannelFormat; typedef InvalidImageFormatTest InvalidRGBAChannelFormat; cl_channel_order validRGBAChannelOrders[] = {CL_RGBA}; cl_channel_type validRGBAChannelDataTypes[] = {CL_SNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT8, CL_UNORM_INT16, CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT}; cl_channel_type invalidRGBAChannelDataTypes[] = {CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010, CL_UNORM_INT24, CL_UNORM_INT_101010_2}; TEST_P(ValidRGBAChannelFormat, givenValidRGBAChannelImageFormatWhenValidateImageFormatIsCalledThenReturnsSuccess) { validateFormat(); }; TEST_P(InvalidRGBAChannelFormat, givenInvalidRGBAChannelDataTypeWhenValidateImageFormatIsCalledThenReturnsError) { validateFormat(); }; INSTANTIATE_TEST_CASE_P( ImageValidate, ValidRGBAChannelFormat, ::testing::Combine( ::testing::ValuesIn(validRGBAChannelOrders), ::testing::ValuesIn(validRGBAChannelDataTypes))); INSTANTIATE_TEST_CASE_P( ImageValidate, InvalidRGBAChannelFormat, ::testing::Combine( ::testing::ValuesIn(validRGBAChannelOrders), ::testing::ValuesIn(invalidRGBAChannelDataTypes))); typedef ValidImageFormatTest ValidSRGBChannelFormat; typedef InvalidImageFormatTest InvalidSRGBChannelFormat; cl_channel_order validSRGBChannelOrders[] = {CL_sRGB, CL_sRGBx, CL_sRGBA, CL_sBGRA}; cl_channel_type validSRGBChannelDataTypes[] = {CL_UNORM_INT8}; cl_channel_type invalidSRGBChannelDataTypes[] = {CL_SNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT16, CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010, CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT, CL_UNORM_INT24, CL_UNORM_INT_101010_2}; TEST_P(ValidSRGBChannelFormat, givenValidSRGBChannelImageFormatWhenValidateImageFormatIsCalledThenReturnsSuccess) { validateFormat(); }; TEST_P(InvalidSRGBChannelFormat, givenInvalidSRGBChannelDataTypeWhenValidateImageFormatIsCalledThenReturnsError) { validateFormat(); }; INSTANTIATE_TEST_CASE_P( ImageValidate, ValidSRGBChannelFormat, ::testing::Combine( ::testing::ValuesIn(validSRGBChannelOrders), ::testing::ValuesIn(validSRGBChannelDataTypes))); INSTANTIATE_TEST_CASE_P( ImageValidate, InvalidSRGBChannelFormat, ::testing::Combine( ::testing::ValuesIn(validSRGBChannelOrders), ::testing::ValuesIn(invalidSRGBChannelDataTypes))); typedef ValidImageFormatTest ValidARGBChannelFormat; typedef InvalidImageFormatTest InvalidARGBChannelFormat; cl_channel_order validARGBChannelOrders[] = {CL_ARGB, CL_BGRA, CL_ABGR}; cl_channel_type validARGBChannelDataTypes[] = {CL_UNORM_INT8, CL_SNORM_INT8, CL_SIGNED_INT8, CL_UNSIGNED_INT8}; cl_channel_type invalidARGBChannelDataTypes[] = {CL_SNORM_INT16, CL_UNORM_INT16, CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT, CL_UNORM_INT24, CL_UNORM_INT_101010_2}; TEST_P(ValidARGBChannelFormat, givenValidARGBChannelImageFormatWhenValidateImageFormatIsCalledThenReturnsSuccess) { validateFormat(); }; TEST_P(InvalidARGBChannelFormat, givenInvalidARGBChannelDataTypeWhenValidateImageFormatIsCalledThenReturnsError) { validateFormat(); }; INSTANTIATE_TEST_CASE_P( ImageValidate, ValidARGBChannelFormat, ::testing::Combine( ::testing::ValuesIn(validARGBChannelOrders), ::testing::ValuesIn(validARGBChannelDataTypes))); INSTANTIATE_TEST_CASE_P( ImageValidate, InvalidARGBChannelFormat, ::testing::Combine( ::testing::ValuesIn(validARGBChannelOrders), ::testing::ValuesIn(invalidARGBChannelDataTypes))); typedef ValidImageFormatTest ValidDepthStencilChannelFormat; typedef InvalidImageFormatTest InvalidDepthStencilChannelFormat; cl_channel_order validDepthStencilChannelOrders[] = {CL_DEPTH_STENCIL}; cl_channel_type validDepthStencilChannelDataTypes[] = {CL_UNORM_INT24, CL_FLOAT}; cl_channel_type invalidDepthStencilChannelDataTypes[] = {CL_SNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT8, CL_UNORM_INT16, CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010, CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_UNORM_INT_101010_2}; TEST_P(ValidDepthStencilChannelFormat, givenValidDepthStencilChannelImageFormatWhenValidateImageFormatIsCalledThenReturnsSuccess) { validateFormat(); }; TEST_P(InvalidDepthStencilChannelFormat, givenInvalidDepthStencilChannelDataTypeWhenValidateImageFormatIsCalledThenReturnsError) { validateFormat(); }; INSTANTIATE_TEST_CASE_P( ImageValidate, ValidDepthStencilChannelFormat, ::testing::Combine( ::testing::ValuesIn(validDepthStencilChannelOrders), ::testing::ValuesIn(validDepthStencilChannelDataTypes))); INSTANTIATE_TEST_CASE_P( ImageValidate, InvalidDepthStencilChannelFormat, ::testing::Combine( ::testing::ValuesIn(validDepthStencilChannelOrders), ::testing::ValuesIn(invalidDepthStencilChannelDataTypes))); typedef ValidImageFormatTest ValidYUVImageFormat; typedef InvalidImageFormatTest InvalidYUVImageFormat; cl_channel_order validYUVChannelOrders[] = {CL_NV12_INTEL, CL_YUYV_INTEL, CL_UYVY_INTEL, CL_YVYU_INTEL, CL_VYUY_INTEL}; cl_channel_type validYUVChannelDataTypes[] = {CL_UNORM_INT8}; cl_channel_type invalidYUVChannelDataTypes[] = {CL_SNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT16, CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010, CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT, CL_UNORM_INT24, CL_UNORM_INT_101010_2}; TEST_P(ValidYUVImageFormat, givenValidYUVImageFormatWhenValidateImageFormatIsCalledThenReturnsSuccess) { validateFormat(); }; TEST_P(InvalidYUVImageFormat, givenInvalidYUVChannelDataTypeWhenValidateImageFormatIsCalledThenReturnsError) { validateFormat(); }; INSTANTIATE_TEST_CASE_P( ImageValidate, ValidYUVImageFormat, ::testing::Combine( ::testing::ValuesIn(validYUVChannelOrders), ::testing::ValuesIn(validYUVChannelDataTypes))); INSTANTIATE_TEST_CASE_P( ImageValidate, InvalidYUVImageFormat, ::testing::Combine( ::testing::ValuesIn(validYUVChannelOrders), ::testing::ValuesIn(invalidYUVChannelDataTypes))); TEST(ImageFormat, givenNullptrImageFormatWhenValidateImageFormatIsCalledThenReturnsError) { auto retVal = Image::validateImageFormat(nullptr); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); } TEST(validateAndCreateImage, givenInvalidImageFormatWhenValidateAndCreateImageIsCalledThenReturnsInvalidDescriptorError) { MockContext context; cl_image_format imageFormat; cl_int retVal = CL_SUCCESS; Image *image; imageFormat.image_channel_order = 0; imageFormat.image_channel_data_type = 0; image = Image::validateAndCreateImage(&context, {}, 0, 0, &imageFormat, &Image1dDefaults::imageDesc, nullptr, retVal); EXPECT_EQ(nullptr, image); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); } TEST(validateAndCreateImage, givenNotSupportedImageFormatWhenValidateAndCreateImageIsCalledThenReturnsNotSupportedFormatError) { MockContext context; cl_image_format imageFormat = {CL_INTENSITY, CL_UNORM_INT8}; cl_int retVal = CL_SUCCESS; Image *image; cl_mem_flags flags = CL_MEM_READ_WRITE; image = Image::validateAndCreateImage(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, &imageFormat, &Image1dDefaults::imageDesc, nullptr, retVal); EXPECT_EQ(nullptr, image); EXPECT_EQ(CL_IMAGE_FORMAT_NOT_SUPPORTED, retVal); } TEST(validateAndCreateImage, givenValidImageParamsWhenValidateAndCreateImageIsCalledThenReturnsSuccess) { MockContext context; cl_image_desc imageDesc; cl_mem_flags flags = CL_MEM_READ_ONLY; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; imageDesc.mem_object = NULL; // 1D image with 0 row_pitch and 0 slice_pitch imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 10; imageDesc.image_height = 0; imageDesc.image_depth = 0; imageDesc.image_array_size = 0; imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; cl_image_format imageFormat = {CL_INTENSITY, CL_UNORM_INT8}; cl_int retVal = CL_SUCCESS; std::unique_ptr image = nullptr; image.reset(Image::validateAndCreateImage( &context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, &imageFormat, &imageDesc, nullptr, retVal)); EXPECT_NE(nullptr, image); EXPECT_EQ(CL_SUCCESS, retVal); } std::tuple normalizingFactorValues[] = { std::make_tuple(CL_SNORM_INT8, 0x7F), std::make_tuple(CL_SNORM_INT16, 0x7fFF), std::make_tuple(CL_UNORM_INT8, 0xFF), std::make_tuple(CL_UNORM_INT16, 0xFFFF), std::make_tuple(CL_UNORM_SHORT_565, 0), std::make_tuple(CL_UNORM_SHORT_555, 0), std::make_tuple(CL_UNORM_INT_101010, 0), std::make_tuple(CL_SIGNED_INT8, 0), std::make_tuple(CL_SIGNED_INT16, 0), std::make_tuple(CL_SIGNED_INT32, 0), std::make_tuple(CL_UNSIGNED_INT8, 0), std::make_tuple(CL_UNSIGNED_INT16, 0), std::make_tuple(CL_UNSIGNED_INT32, 0), std::make_tuple(CL_HALF_FLOAT, 0), std::make_tuple(CL_FLOAT, 0), std::make_tuple(CL_UNORM_INT24, 0), std::make_tuple(CL_UNORM_INT_101010_2, 0), }; using NormalizingFactorTests = ::testing::TestWithParam>; TEST_P(NormalizingFactorTests, givenChannelTypeWhenAskingForFactorThenReturnValidValue) { auto factor = selectNormalizingFactor(std::get<0>(GetParam())); EXPECT_EQ(std::get<1>(GetParam()), factor); }; INSTANTIATE_TEST_CASE_P( NormalizingFactorTests, NormalizingFactorTests, ::testing::ValuesIn(normalizingFactorValues)); using ValidParentImageFormatTest = ::testing::TestWithParam>; cl_channel_order allChannelOrders[] = {CL_R, CL_A, CL_RG, CL_RA, CL_RGB, CL_RGBA, CL_BGRA, CL_ARGB, CL_INTENSITY, CL_LUMINANCE, CL_Rx, CL_RGx, CL_RGBx, CL_DEPTH, CL_DEPTH_STENCIL, CL_sRGB, CL_sRGBx, CL_sRGBA, CL_sBGRA, CL_ABGR, CL_NV12_INTEL}; struct NullImage : public Image { using Image::imageDesc; using Image::imageFormat; NullImage() : Image(nullptr, MemoryPropertiesFlags(), cl_mem_flags{}, 0, 0, nullptr, cl_image_format{}, cl_image_desc{}, false, new MockGraphicsAllocation(nullptr, 0), false, 0, 0, ClSurfaceFormatInfo{}, nullptr) { } ~NullImage() override { delete this->graphicsAllocation; } void setImageArg(void *memory, bool isMediaBlockImage, uint32_t mipLevel) override {} void setMediaImageArg(void *memory) override {} void setMediaSurfaceRotation(void *memory) override {} void setSurfaceMemoryObjectControlStateIndexToMocsTable(void *memory, uint32_t value) override {} void transformImage2dArrayTo3d(void *memory) override {} void transformImage3dTo2dArray(void *memory) override {} }; TEST_P(ValidParentImageFormatTest, givenParentChannelOrderWhenTestWithAllChannelOrdersThenReturnTrueForValidChannelOrder) { cl_image_format parentImageFormat; cl_image_format imageFormat; cl_channel_order validChannelOrder; NullImage image; std::tie(parentImageFormat.image_channel_order, validChannelOrder) = GetParam(); parentImageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_data_type = CL_UNORM_INT8; image.imageFormat = parentImageFormat; for (auto channelOrder : allChannelOrders) { imageFormat.image_channel_order = channelOrder; bool retVal = image.hasValidParentImageFormat(imageFormat); EXPECT_EQ(imageFormat.image_channel_order == validChannelOrder, retVal); } }; std::tuple imageFromImageValidChannelOrderPairs[] = { std::make_tuple(CL_BGRA, CL_sBGRA), std::make_tuple(CL_sBGRA, CL_BGRA), std::make_tuple(CL_RGBA, CL_sRGBA), std::make_tuple(CL_sRGBA, CL_RGBA), std::make_tuple(CL_RGB, CL_sRGB), std::make_tuple(CL_sRGB, CL_RGB), std::make_tuple(CL_RGBx, CL_sRGBx), std::make_tuple(CL_sRGBx, CL_RGBx), std::make_tuple(CL_R, CL_DEPTH), std::make_tuple(CL_A, 0), std::make_tuple(CL_RG, 0), std::make_tuple(CL_RA, 0), std::make_tuple(CL_ARGB, 0), std::make_tuple(CL_INTENSITY, 0), std::make_tuple(CL_LUMINANCE, 0), std::make_tuple(CL_Rx, 0), std::make_tuple(CL_RGx, 0), std::make_tuple(CL_DEPTH, 0), std::make_tuple(CL_DEPTH_STENCIL, 0), std::make_tuple(CL_ABGR, 0), std::make_tuple(CL_NV12_INTEL, 0)}; INSTANTIATE_TEST_CASE_P( ValidParentImageFormatTests, ValidParentImageFormatTest, ::testing::ValuesIn(imageFromImageValidChannelOrderPairs)); TEST(ImageDescriptorComparatorTest, givenImageWhenCallHasSameDescriptorWithSameDescriptorThenReturnTrueOtherwiseFalse) { NullImage image; cl_image_desc descriptor = image.imageDesc; image.imageDesc.image_row_pitch = image.getHostPtrRowPitch() + 10; // to make sure we compare host ptr row/slice pitches image.imageDesc.image_slice_pitch = image.getHostPtrSlicePitch() + 10; EXPECT_TRUE(image.hasSameDescriptor(descriptor)); descriptor.image_type++; EXPECT_FALSE(image.hasSameDescriptor(descriptor)); descriptor.image_type--; EXPECT_TRUE(image.hasSameDescriptor(descriptor)); descriptor.image_width++; EXPECT_FALSE(image.hasSameDescriptor(descriptor)); descriptor.image_width--; EXPECT_TRUE(image.hasSameDescriptor(descriptor)); descriptor.image_height++; EXPECT_FALSE(image.hasSameDescriptor(descriptor)); descriptor.image_height--; EXPECT_TRUE(image.hasSameDescriptor(descriptor)); descriptor.image_depth++; EXPECT_FALSE(image.hasSameDescriptor(descriptor)); descriptor.image_depth--; EXPECT_TRUE(image.hasSameDescriptor(descriptor)); descriptor.image_array_size++; EXPECT_FALSE(image.hasSameDescriptor(descriptor)); descriptor.image_array_size--; EXPECT_TRUE(image.hasSameDescriptor(descriptor)); descriptor.image_row_pitch++; EXPECT_FALSE(image.hasSameDescriptor(descriptor)); descriptor.image_row_pitch--; EXPECT_TRUE(image.hasSameDescriptor(descriptor)); descriptor.image_slice_pitch++; EXPECT_FALSE(image.hasSameDescriptor(descriptor)); descriptor.image_slice_pitch--; EXPECT_TRUE(image.hasSameDescriptor(descriptor)); descriptor.num_mip_levels++; EXPECT_FALSE(image.hasSameDescriptor(descriptor)); descriptor.num_mip_levels--; EXPECT_TRUE(image.hasSameDescriptor(descriptor)); descriptor.num_samples++; EXPECT_FALSE(image.hasSameDescriptor(descriptor)); }; TEST(ImageFormatValidatorTest, givenValidParentChannelOrderAndChannelOrderWhenFormatsHaveDifferentDataTypeThenHasValidParentImageFormatReturnsFalse) { cl_image_format imageFormat; NullImage image; image.imageFormat.image_channel_data_type = CL_UNORM_INT8; image.imageFormat.image_channel_order = CL_BGRA; imageFormat.image_channel_data_type = CL_UNORM_INT16; imageFormat.image_channel_order = CL_sBGRA; EXPECT_FALSE(image.hasValidParentImageFormat(imageFormat)); }; TEST(ImageValidatorTest, givenInvalidImage2dSizesWithoutParentObjectWhenValidateImageThenReturnsError) { MockContext context; cl_image_desc descriptor; void *dummyPtr = reinterpret_cast(0x17); ClSurfaceFormatInfo surfaceFormat; descriptor.image_type = CL_MEM_OBJECT_IMAGE2D; descriptor.image_row_pitch = 0; descriptor.image_height = 1; descriptor.image_width = 0; descriptor.mem_object = nullptr; EXPECT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, Image::validate(&context, {}, &surfaceFormat, &descriptor, dummyPtr)); descriptor.image_height = 0; descriptor.image_width = 1; EXPECT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, Image::validate(&context, {}, &surfaceFormat, &descriptor, dummyPtr)); }; TEST(ImageValidatorTest, givenNV12Image2dAsParentImageWhenValidateImageZeroSizedThenReturnsSuccess) { NullImage image; cl_image_desc descriptor; MockContext context; void *dummyPtr = reinterpret_cast(0x17); ClSurfaceFormatInfo surfaceFormat = {}; image.imageFormat.image_channel_order = CL_NV12_INTEL; descriptor.image_type = CL_MEM_OBJECT_IMAGE2D; descriptor.image_height = 0; descriptor.image_width = 0; descriptor.image_row_pitch = 0; descriptor.mem_object = ℑ EXPECT_EQ(CL_SUCCESS, Image::validate(&context, {}, &surfaceFormat, &descriptor, dummyPtr)); }; TEST(ImageValidatorTest, givenNonNV12Image2dAsParentImageWhenValidateImageZeroSizedThenReturnsError) { NullImage image; cl_image_desc descriptor; MockContext context; void *dummyPtr = reinterpret_cast(0x17); ClSurfaceFormatInfo surfaceFormat; image.imageFormat.image_channel_order = CL_BGRA; image.imageFormat.image_channel_data_type = CL_UNORM_INT8; surfaceFormat.OCLImageFormat.image_channel_order = CL_sBGRA; surfaceFormat.OCLImageFormat.image_channel_data_type = CL_UNORM_INT8; descriptor.image_type = CL_MEM_OBJECT_IMAGE2D; descriptor.image_height = 0; descriptor.image_width = 0; descriptor.image_row_pitch = image.getHostPtrRowPitch(); descriptor.image_slice_pitch = image.getHostPtrSlicePitch(); image.imageDesc = descriptor; descriptor.mem_object = ℑ EXPECT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, Image::validate(&context, {}, &surfaceFormat, &descriptor, dummyPtr)); }; compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/map_operations_handler_tests.cpp000066400000000000000000000166211363734646600317130ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "opencl/source/mem_obj/map_operations_handler.h" #include "test.h" #include using namespace NEO; struct MockMapOperationsHandler : public MapOperationsHandler { using MapOperationsHandler::isOverlapping; using MapOperationsHandler::mappedPointers; }; struct MapOperationsHandlerTests : public ::testing::Test { MockMapOperationsHandler mockHandler; MapInfo mappedPtrs[3] = { {(void *)0x1000, 1, {{1, 2, 3}}, {{4, 5, 6}}, 0}, {(void *)0x2000, 1, {{7, 8, 9}}, {{10, 11, 12}}, 0}, {(void *)0x3000, 1, {{13, 14, 15}}, {{16, 17, 18}}, 0}, }; cl_map_flags mapFlags = CL_MAP_READ; }; TEST_F(MapOperationsHandlerTests, givenMapInfoWhenFindingThenReturnCorrectvalues) { for (size_t i = 0; i < 3; i++) { EXPECT_TRUE(mockHandler.add(mappedPtrs[i].ptr, mappedPtrs[i].ptrLength, mapFlags, mappedPtrs[i].size, mappedPtrs[i].offset, 0)); } EXPECT_EQ(3u, mockHandler.size()); for (int i = 2; i >= 0; i--) { MapInfo receivedMapInfo; EXPECT_TRUE(mockHandler.find(mappedPtrs[i].ptr, receivedMapInfo)); EXPECT_EQ(receivedMapInfo.ptr, mappedPtrs[i].ptr); EXPECT_EQ(receivedMapInfo.size, mappedPtrs[i].size); EXPECT_EQ(receivedMapInfo.offset, mappedPtrs[i].offset); } } TEST_F(MapOperationsHandlerTests, givenMapInfoWhenRemovingThenRemoveCorrectPointers) { for (size_t i = 0; i < 3; i++) { mockHandler.add(mappedPtrs[i].ptr, mappedPtrs[i].ptrLength, mapFlags, mappedPtrs[i].size, mappedPtrs[i].offset, 0); } for (int i = 2; i >= 0; i--) { mockHandler.remove(mappedPtrs[i].ptr); MapInfo receivedMapInfo; EXPECT_FALSE(mockHandler.find(mappedPtrs[i].ptr, receivedMapInfo)); } EXPECT_EQ(0u, mockHandler.size()); } TEST_F(MapOperationsHandlerTests, givenMappedPtrsWhenDoubleRemovedThenDoNothing) { mockHandler.add(mappedPtrs[0].ptr, mappedPtrs[0].ptrLength, mapFlags, mappedPtrs[0].size, mappedPtrs[0].offset, 0); mockHandler.add(mappedPtrs[1].ptr, mappedPtrs[1].ptrLength, mapFlags, mappedPtrs[0].size, mappedPtrs[0].offset, 0); EXPECT_EQ(2u, mockHandler.size()); mockHandler.remove(mappedPtrs[1].ptr); mockHandler.remove(mappedPtrs[1].ptr); EXPECT_EQ(1u, mockHandler.size()); MapInfo receivedMapInfo; EXPECT_FALSE(mockHandler.find(mappedPtrs[1].ptr, receivedMapInfo)); EXPECT_TRUE(mockHandler.find(mappedPtrs[0].ptr, receivedMapInfo)); } TEST_F(MapOperationsHandlerTests, givenMapInfoWhenAddedThenSetReadOnlyFlag) { mapFlags = CL_MAP_READ; mockHandler.add(mappedPtrs[0].ptr, mappedPtrs[0].ptrLength, mapFlags, mappedPtrs[0].size, mappedPtrs[0].offset, 0); EXPECT_TRUE(mockHandler.mappedPointers.back().readOnly); mockHandler.remove(mappedPtrs[0].ptr); mapFlags = CL_MAP_WRITE; mockHandler.add(mappedPtrs[0].ptr, mappedPtrs[0].ptrLength, mapFlags, mappedPtrs[0].size, mappedPtrs[0].offset, 0); EXPECT_FALSE(mockHandler.mappedPointers.back().readOnly); mockHandler.remove(mappedPtrs[0].ptr); mapFlags = CL_MAP_WRITE_INVALIDATE_REGION; mockHandler.add(mappedPtrs[0].ptr, mappedPtrs[0].ptrLength, mapFlags, mappedPtrs[0].size, mappedPtrs[0].offset, 0); EXPECT_FALSE(mockHandler.mappedPointers.back().readOnly); mockHandler.remove(mappedPtrs[0].ptr); mapFlags = CL_MAP_READ | CL_MAP_WRITE; mockHandler.add(mappedPtrs[0].ptr, mappedPtrs[0].ptrLength, mapFlags, mappedPtrs[0].size, mappedPtrs[0].offset, 0); EXPECT_FALSE(mockHandler.mappedPointers.back().readOnly); mockHandler.remove(mappedPtrs[0].ptr); mapFlags = CL_MAP_READ | CL_MAP_WRITE_INVALIDATE_REGION; mockHandler.add(mappedPtrs[0].ptr, mappedPtrs[0].ptrLength, mapFlags, mappedPtrs[0].size, mappedPtrs[0].offset, 0); EXPECT_FALSE(mockHandler.mappedPointers.back().readOnly); mockHandler.remove(mappedPtrs[0].ptr); } TEST_F(MapOperationsHandlerTests, givenNonReadOnlyOverlappingPtrWhenAddingThenReturnFalseAndDontAdd) { mapFlags = CL_MAP_WRITE; mappedPtrs->readOnly = false; mockHandler.add(mappedPtrs[0].ptr, mappedPtrs[0].ptrLength, mapFlags, mappedPtrs[0].size, mappedPtrs[0].offset, 0); EXPECT_EQ(1u, mockHandler.size()); EXPECT_FALSE(mockHandler.mappedPointers.back().readOnly); EXPECT_TRUE(mockHandler.isOverlapping(mappedPtrs[0])); EXPECT_FALSE(mockHandler.add(mappedPtrs[0].ptr, mappedPtrs[0].ptrLength, mapFlags, mappedPtrs[0].size, mappedPtrs[0].offset, 0)); EXPECT_EQ(1u, mockHandler.size()); } TEST_F(MapOperationsHandlerTests, givenReadOnlyOverlappingPtrWhenAddingThenReturnTrueAndAdd) { mapFlags = CL_MAP_READ; mappedPtrs->readOnly = true; mockHandler.add(mappedPtrs[0].ptr, mappedPtrs[0].ptrLength, mapFlags, mappedPtrs[0].size, mappedPtrs[0].offset, 0); EXPECT_EQ(1u, mockHandler.size()); EXPECT_TRUE(mockHandler.mappedPointers.back().readOnly); EXPECT_FALSE(mockHandler.isOverlapping(mappedPtrs[0])); EXPECT_TRUE(mockHandler.add(mappedPtrs[0].ptr, mappedPtrs[0].ptrLength, mapFlags, mappedPtrs[0].size, mappedPtrs[0].offset, 0)); EXPECT_EQ(2u, mockHandler.size()); EXPECT_TRUE(mockHandler.mappedPointers.back().readOnly); } const std::tuple overlappingCombinations[] = { // mappedPtrStart, mappedPtrLength, requestPtrStart, requestPtrLength, expectOverlap std::make_tuple((void *)5000, 50, (void *)4000, 1, false), //requested before, non-overlapping std::make_tuple((void *)5000, 50, (void *)4999, 10, true), //requested before, overlapping inside std::make_tuple((void *)5000, 50, (void *)4999, 100, true), //requested before, overlapping outside std::make_tuple((void *)5000, 50, (void *)5001, 1, true), //requested inside, overlapping inside std::make_tuple((void *)5000, 50, (void *)5001, 100, true), //requested inside, overlapping outside std::make_tuple((void *)5000, 50, (void *)6000, 1, false), //requested after, non-overlapping std::make_tuple((void *)5000, 50, (void *)5000, 1, true), //requested on start, overlapping inside std::make_tuple((void *)5000, 50, (void *)5000, 100, true), //requested on start, overlapping outside }; struct MapOperationsHandlerOverlapTests : public ::testing::WithParamInterface>, public ::testing::Test {}; TEST_P(MapOperationsHandlerOverlapTests, givenAlreadyMappedPtrWhenAskingForOverlapThenReturnCorrectValue) { cl_map_flags mapFlags = CL_MAP_WRITE; void *mappedPtr = std::get<0>(GetParam()); size_t mappedPtrLength = std::get<1>(GetParam()); void *requestedPtr = std::get<2>(GetParam()); size_t requestedPtrLength = std::get<3>(GetParam()); bool expectOverlap = std::get<4>(GetParam()); // size and offset arrays are ignored MapInfo mappedInfo(mappedPtr, mappedPtrLength, {{0, 0, 0}}, {{0, 0, 0}}, 0); MapInfo requestedInfo(requestedPtr, requestedPtrLength, {{0, 0, 0}}, {{0, 0, 0}}, 0); requestedInfo.readOnly = false; MockMapOperationsHandler mockHandler; mockHandler.add(mappedInfo.ptr, mappedInfo.ptrLength, mapFlags, mappedInfo.size, mappedInfo.offset, 0); EXPECT_EQ(expectOverlap, mockHandler.isOverlapping(requestedInfo)); } INSTANTIATE_TEST_CASE_P(MapOperationsHandlerOverlapTests, MapOperationsHandlerOverlapTests, ::testing::ValuesIn(overlappingCombinations)); compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/mem_obj_destruction_tests.cpp000066400000000000000000000420211363734646600312220ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/os_interface/os_context.h" #include "opencl/source/api/api.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/mem_obj/mem_obj.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "test.h" using namespace NEO; template class MyCsr : public UltCommandStreamReceiver { public: MyCsr(const ExecutionEnvironment &executionEnvironment) : UltCommandStreamReceiver(const_cast(executionEnvironment), 0) {} MOCK_METHOD3(waitForCompletionWithTimeout, bool(bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait)); }; void CL_CALLBACK emptyDestructorCallback(cl_mem memObj, void *userData) { } class MemObjDestructionTest : public ::testing::TestWithParam { public: void SetUp() override { executionEnvironment = platform()->peekExecutionEnvironment(); memoryManager = new MockMemoryManager(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); device = std::make_unique(MockDevice::create(executionEnvironment, 0)); context.reset(new MockContext(device.get())); allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), size}); memObj = new MemObj(context.get(), CL_MEM_OBJECT_BUFFER, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_READ_WRITE, 0, 0), CL_MEM_READ_WRITE, 0, size, nullptr, nullptr, allocation, true, false, false); csr = device->getDefaultEngine().commandStreamReceiver; *csr->getTagAddress() = 0; contextId = device->getDefaultEngine().osContext->getContextId(); } void TearDown() override { context.reset(); } void makeMemObjUsed() { memObj->getGraphicsAllocation()->updateTaskCount(taskCountReady, contextId); } void makeMemObjNotReady() { makeMemObjUsed(); *device->getDefaultEngine().commandStreamReceiver->getTagAddress() = taskCountReady - 1; } void makeMemObjReady() { makeMemObjUsed(); *device->getDefaultEngine().commandStreamReceiver->getTagAddress() = taskCountReady; } constexpr static uint32_t taskCountReady = 3u; ExecutionEnvironment *executionEnvironment = nullptr; std::unique_ptr device; uint32_t contextId = 0; MockMemoryManager *memoryManager = nullptr; std::unique_ptr context; GraphicsAllocation *allocation = nullptr; MemObj *memObj = nullptr; CommandStreamReceiver *csr = nullptr; size_t size = MemoryConstants::pageSize; }; class MemObjAsyncDestructionTest : public MemObjDestructionTest { public: void SetUp() override { DebugManager.flags.EnableAsyncDestroyAllocations.set(true); MemObjDestructionTest::SetUp(); } void TearDown() override { MemObjDestructionTest::TearDown(); DebugManager.flags.EnableAsyncDestroyAllocations.set(defaultFlag); } bool defaultFlag = DebugManager.flags.EnableAsyncDestroyAllocations.get(); }; class MemObjSyncDestructionTest : public MemObjDestructionTest { public: void SetUp() override { DebugManager.flags.EnableAsyncDestroyAllocations.set(false); MemObjDestructionTest::SetUp(); } void TearDown() override { MemObjDestructionTest::TearDown(); DebugManager.flags.EnableAsyncDestroyAllocations.set(defaultFlag); } bool defaultFlag = DebugManager.flags.EnableAsyncDestroyAllocations.get(); }; TEST_P(MemObjAsyncDestructionTest, givenMemObjWithDestructableAllocationWhenAsyncDestructionsAreEnabledAndAllocationIsNotReadyAndMemObjectIsDestructedThenAllocationIsDeferred) { bool isMemObjReady; bool expectedDeferration; isMemObjReady = GetParam(); expectedDeferration = !isMemObjReady; if (isMemObjReady) { makeMemObjReady(); } else { makeMemObjNotReady(); } auto &allocationList = csr->getTemporaryAllocations(); EXPECT_TRUE(allocationList.peekIsEmpty()); delete memObj; EXPECT_EQ(!expectedDeferration, allocationList.peekIsEmpty()); if (expectedDeferration) { EXPECT_EQ(allocation, allocationList.peekHead()); } } HWTEST_P(MemObjAsyncDestructionTest, givenUsedMemObjWithAsyncDestructionsEnabledThatHasDestructorCallbacksWhenItIsDestroyedThenDestructorWaitsOnTaskCount) { bool hasCallbacks = GetParam(); if (hasCallbacks) { memObj->setDestructorCallback(emptyDestructorCallback, nullptr); } auto mockCsr0 = new ::testing::NiceMock>(*device->executionEnvironment); auto mockCsr1 = new ::testing::NiceMock>(*device->executionEnvironment); device->resetCommandStreamReceiver(mockCsr0, 0); device->resetCommandStreamReceiver(mockCsr1, 1); *mockCsr0->getTagAddress() = 0; *mockCsr1->getTagAddress() = 0; auto waitForCompletionWithTimeoutMock0 = [&mockCsr0](bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait) -> bool { *mockCsr0->getTagAddress() = taskCountReady; return true; }; auto waitForCompletionWithTimeoutMock1 = [&mockCsr1](bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait) -> bool { *mockCsr1->getTagAddress() = taskCountReady; return true; }; auto osContextId0 = mockCsr0->getOsContext().getContextId(); auto osContextId1 = mockCsr1->getOsContext().getContextId(); memObj->getGraphicsAllocation()->updateTaskCount(taskCountReady, osContextId0); memObj->getGraphicsAllocation()->updateTaskCount(taskCountReady, osContextId1); ON_CALL(*mockCsr0, waitForCompletionWithTimeout(::testing::_, ::testing::_, ::testing::_)) .WillByDefault(::testing::Invoke(waitForCompletionWithTimeoutMock0)); ON_CALL(*mockCsr1, waitForCompletionWithTimeout(::testing::_, ::testing::_, ::testing::_)) .WillByDefault(::testing::Invoke(waitForCompletionWithTimeoutMock1)); if (hasCallbacks) { EXPECT_CALL(*mockCsr0, waitForCompletionWithTimeout(::testing::_, TimeoutControls::maxTimeout, allocation->getTaskCount(osContextId0))) .Times(1); EXPECT_CALL(*mockCsr1, waitForCompletionWithTimeout(::testing::_, TimeoutControls::maxTimeout, allocation->getTaskCount(osContextId1))) .Times(1); } else { *mockCsr0->getTagAddress() = taskCountReady; *mockCsr1->getTagAddress() = taskCountReady; EXPECT_CALL(*mockCsr0, waitForCompletionWithTimeout(::testing::_, ::testing::_, ::testing::_)) .Times(0); EXPECT_CALL(*mockCsr1, waitForCompletionWithTimeout(::testing::_, ::testing::_, ::testing::_)) .Times(0); } delete memObj; } HWTEST_P(MemObjAsyncDestructionTest, givenUsedMemObjWithAsyncDestructionsEnabledThatHasAllocatedMappedPtrWhenItIsDestroyedThenDestructorWaitsOnTaskCount) { makeMemObjUsed(); bool hasAllocatedMappedPtr = GetParam(); if (hasAllocatedMappedPtr) { auto allocatedPtr = alignedMalloc(size, MemoryConstants::pageSize); memObj->setAllocatedMapPtr(allocatedPtr); } auto mockCsr = new ::testing::NiceMock>(*device->executionEnvironment); device->resetCommandStreamReceiver(mockCsr); *mockCsr->getTagAddress() = 0; auto osContextId = mockCsr->getOsContext().getContextId(); bool desired = true; auto waitForCompletionWithTimeoutMock = [=](bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait) -> bool { return desired; }; ON_CALL(*mockCsr, waitForCompletionWithTimeout(::testing::_, ::testing::_, ::testing::_)) .WillByDefault(::testing::Invoke(waitForCompletionWithTimeoutMock)); if (hasAllocatedMappedPtr) { EXPECT_CALL(*mockCsr, waitForCompletionWithTimeout(::testing::_, TimeoutControls::maxTimeout, allocation->getTaskCount(osContextId))) .Times(1); } else { EXPECT_CALL(*mockCsr, waitForCompletionWithTimeout(::testing::_, ::testing::_, ::testing::_)) .Times(0); } delete memObj; } HWTEST_P(MemObjAsyncDestructionTest, givenUsedMemObjWithAsyncDestructionsEnabledThatHasDestructableMappedPtrWhenItIsDestroyedThenDestructorWaitsOnTaskCount) { auto storage = alignedMalloc(size, MemoryConstants::pageSize); bool hasAllocatedMappedPtr = GetParam(); if (!hasAllocatedMappedPtr) { delete memObj; allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context->getDevice(0)->getRootDeviceIndex(), size}); MemObjOffsetArray origin = {{0, 0, 0}}; MemObjSizeArray region = {{1, 1, 1}}; cl_map_flags mapFlags = CL_MAP_READ; memObj = new MemObj(context.get(), CL_MEM_OBJECT_BUFFER, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_READ_WRITE, 0, 0), CL_MEM_READ_WRITE, 0, size, storage, nullptr, allocation, true, false, false); memObj->addMappedPtr(storage, 1, mapFlags, region, origin, 0); } else { memObj->setAllocatedMapPtr(storage); } makeMemObjUsed(); auto mockCsr = new ::testing::NiceMock>(*device->executionEnvironment); device->resetCommandStreamReceiver(mockCsr); *mockCsr->getTagAddress() = 0; bool desired = true; auto waitForCompletionWithTimeoutMock = [=](bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait) -> bool { return desired; }; auto osContextId = mockCsr->getOsContext().getContextId(); ON_CALL(*mockCsr, waitForCompletionWithTimeout(::testing::_, ::testing::_, ::testing::_)) .WillByDefault(::testing::Invoke(waitForCompletionWithTimeoutMock)); if (hasAllocatedMappedPtr) { EXPECT_CALL(*mockCsr, waitForCompletionWithTimeout(::testing::_, TimeoutControls::maxTimeout, allocation->getTaskCount(osContextId))) .Times(1); } else { EXPECT_CALL(*mockCsr, waitForCompletionWithTimeout(::testing::_, ::testing::_, ::testing::_)) .Times(0); } delete memObj; if (!hasAllocatedMappedPtr) { alignedFree(storage); } } HWTEST_P(MemObjSyncDestructionTest, givenMemObjWithDestructableAllocationWhenAsyncDestructionsAreDisabledThenDestructorWaitsOnTaskCount) { bool isMemObjReady; isMemObjReady = GetParam(); if (isMemObjReady) { makeMemObjReady(); } else { makeMemObjNotReady(); } auto mockCsr = new ::testing::NiceMock>(*device->executionEnvironment); device->resetCommandStreamReceiver(mockCsr); *mockCsr->getTagAddress() = 0; bool desired = true; auto waitForCompletionWithTimeoutMock = [=](bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait) -> bool { return desired; }; auto osContextId = mockCsr->getOsContext().getContextId(); ON_CALL(*mockCsr, waitForCompletionWithTimeout(::testing::_, ::testing::_, ::testing::_)) .WillByDefault(::testing::Invoke(waitForCompletionWithTimeoutMock)); EXPECT_CALL(*mockCsr, waitForCompletionWithTimeout(::testing::_, TimeoutControls::maxTimeout, allocation->getTaskCount(osContextId))) .Times(1); delete memObj; } HWTEST_P(MemObjSyncDestructionTest, givenMemObjWithDestructableAllocationWhenAsyncDestructionsAreDisabledThenAllocationIsNotDeferred) { bool isMemObjReady; isMemObjReady = GetParam(); if (isMemObjReady) { makeMemObjReady(); } else { makeMemObjNotReady(); } auto mockCsr = new ::testing::NiceMock>(*device->executionEnvironment); device->resetCommandStreamReceiver(mockCsr); *mockCsr->getTagAddress() = 0; bool desired = true; auto waitForCompletionWithTimeoutMock = [=](bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait) -> bool { return desired; }; ON_CALL(*mockCsr, waitForCompletionWithTimeout(::testing::_, ::testing::_, ::testing::_)) .WillByDefault(::testing::Invoke(waitForCompletionWithTimeoutMock)); delete memObj; auto &allocationList = mockCsr->getTemporaryAllocations(); EXPECT_TRUE(allocationList.peekIsEmpty()); } INSTANTIATE_TEST_CASE_P( MemObjTests, MemObjAsyncDestructionTest, testing::Bool()); INSTANTIATE_TEST_CASE_P( MemObjTests, MemObjSyncDestructionTest, testing::Bool()); using UsmDestructionTests = ::testing::Test; HWTEST_F(UsmDestructionTests, givenSharedUsmAllocationWhenBlockingFreeIsCalledThenWaitForCompletionIsCalled) { MockDevice mockDevice; mockDevice.incRefInternal(); MockClDevice mockClDevice(&mockDevice); MockContext mockContext(&mockClDevice, false); if (mockContext.getDevice(0u)->getHardwareInfo().capabilityTable.clVersionSupport < 20) { GTEST_SKIP(); } auto mockCsr = new ::testing::NiceMock>(*mockDevice.executionEnvironment); auto osContext = mockDevice.executionEnvironment->memoryManager->createAndRegisterOsContext(mockDevice.engines[0].commandStreamReceiver, aub_stream::ENGINE_RCS, {}, PreemptionMode::Disabled, false, false, false); mockDevice.engines[0].osContext = osContext; mockDevice.resetCommandStreamReceiver(mockCsr); *mockCsr->getTagAddress() = 5u; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY); auto svmAllocationsManager = mockContext.getSVMAllocsManager(); auto sharedMemory = svmAllocationsManager->createUnifiedAllocationWithDeviceStorage(0u, 4096u, {}, unifiedMemoryProperties); ASSERT_NE(nullptr, sharedMemory); auto svmEntry = svmAllocationsManager->getSVMAlloc(sharedMemory); auto waitForCompletionWithTimeoutMock = [=](bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait) -> bool { return true; }; ON_CALL(*mockCsr, waitForCompletionWithTimeout(::testing::_, ::testing::_, ::testing::_)) .WillByDefault(::testing::Invoke(waitForCompletionWithTimeoutMock)); svmEntry->gpuAllocation->updateTaskCount(6u, 0u); svmEntry->cpuAllocation->updateTaskCount(6u, 0u); EXPECT_CALL(*mockCsr, waitForCompletionWithTimeout(::testing::_, TimeoutControls::maxTimeout, 6u)) .Times(2); clMemBlockingFreeINTEL(&mockContext, sharedMemory); } HWTEST_F(UsmDestructionTests, givenUsmAllocationWhenBlockingFreeIsCalledThenWaitForCompletionIsCalled) { MockDevice mockDevice; mockDevice.incRefInternal(); MockClDevice mockClDevice(&mockDevice); MockContext mockContext(&mockClDevice, false); if (mockContext.getDevice(0u)->getHardwareInfo().capabilityTable.clVersionSupport < 20) { GTEST_SKIP(); } auto mockCsr = new ::testing::NiceMock>(*mockDevice.executionEnvironment); auto osContext = mockDevice.executionEnvironment->memoryManager->createAndRegisterOsContext(mockDevice.engines[0].commandStreamReceiver, aub_stream::ENGINE_RCS, {}, PreemptionMode::Disabled, false, false, false); mockDevice.engines[0].osContext = osContext; mockDevice.resetCommandStreamReceiver(mockCsr); *mockCsr->getTagAddress() = 5u; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY); auto svmAllocationsManager = mockContext.getSVMAllocsManager(); auto hostMemory = svmAllocationsManager->createUnifiedMemoryAllocation(0u, 4096u, unifiedMemoryProperties); ASSERT_NE(nullptr, hostMemory); auto svmEntry = svmAllocationsManager->getSVMAlloc(hostMemory); auto waitForCompletionWithTimeoutMock = [=](bool enableTimeout, int64_t timeoutMs, uint32_t taskCountToWait) -> bool { return true; }; ON_CALL(*mockCsr, waitForCompletionWithTimeout(::testing::_, ::testing::_, ::testing::_)) .WillByDefault(::testing::Invoke(waitForCompletionWithTimeoutMock)); svmEntry->gpuAllocation->updateTaskCount(6u, 0u); EXPECT_CALL(*mockCsr, waitForCompletionWithTimeout(::testing::_, TimeoutControls::maxTimeout, 6u)) .Times(1); clMemBlockingFreeINTEL(&mockContext, hostMemory); } compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/mem_obj_helper_tests.cpp000066400000000000000000000175241363734646600301500ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/mem_obj/mem_obj_helper.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; TEST(MemObjHelper, givenValidMemFlagsForSubBufferWhenFlagsAreCheckedThenTrueIsReturned) { cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS; EXPECT_TRUE(MemObjHelper::checkMemFlagsForSubBuffer(flags)); } TEST(MemObjHelper, givenInvalidMemFlagsForSubBufferWhenFlagsAreCheckedThenTrueIsReturned) { cl_mem_flags flags = CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR; EXPECT_FALSE(MemObjHelper::checkMemFlagsForSubBuffer(flags)); } TEST(MemObjHelper, givenClMemForceLinearStorageFlagWhenCheckForLinearStorageForceThenReturnProperValue) { MemoryPropertiesFlags memoryProperties; cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; flags |= CL_MEM_FORCE_LINEAR_STORAGE_INTEL; flagsIntel = 0; memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0); EXPECT_TRUE(memoryProperties.flags.forceLinearStorage); flags = 0; flagsIntel |= CL_MEM_FORCE_LINEAR_STORAGE_INTEL; memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0); EXPECT_TRUE(memoryProperties.flags.forceLinearStorage); flags |= CL_MEM_FORCE_LINEAR_STORAGE_INTEL; flagsIntel |= CL_MEM_FORCE_LINEAR_STORAGE_INTEL; memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0); EXPECT_TRUE(memoryProperties.flags.forceLinearStorage); flags = 0; flagsIntel = 0; memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0); EXPECT_FALSE(memoryProperties.flags.forceLinearStorage); } TEST(MemObjHelper, givenValidPropertiesWhenValidatingMemoryPropertiesThenTrueIsReturned) { cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0); MockContext context; EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, flagsIntel, context)); EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, flagsIntel, nullptr, context)); flags = CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL | CL_MEM_NO_ACCESS_INTEL; memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0); flags = CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL | CL_MEM_NO_ACCESS_INTEL; flagsIntel = 0; EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, flagsIntel, nullptr, context)); flags = CL_MEM_NO_ACCESS_INTEL; memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0); flags = CL_MEM_NO_ACCESS_INTEL; flagsIntel = 0; EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, flagsIntel, nullptr, context)); flags = CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_HOST_NO_ACCESS; memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0); flags = CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_HOST_NO_ACCESS; flagsIntel = 0; EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, flagsIntel, context)); EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, flagsIntel, nullptr, context)); flags = CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_WRITE_ONLY; memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0); flags = CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_WRITE_ONLY; flagsIntel = 0; EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, flagsIntel, context)); EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, flagsIntel, nullptr, context)); flags = CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS; memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0); flags = CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS; flagsIntel = 0; EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, flagsIntel, context)); EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, flagsIntel, nullptr, context)); flagsIntel = CL_MEM_LOCALLY_UNCACHED_RESOURCE; memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0); flags = 0; flagsIntel = CL_MEM_LOCALLY_UNCACHED_RESOURCE; EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, flagsIntel, context)); EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, flagsIntel, nullptr, context)); flagsIntel = CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE; memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0); flags = 0; flagsIntel = CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE; EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, flagsIntel, context)); EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, flagsIntel, nullptr, context)); flags = 0; memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0); flags = 0; flagsIntel = 0; EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, flagsIntel, context)); EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, flagsIntel, nullptr, context)); } struct Image1dWithAccessFlagsUnrestricted : public Image1dDefaults { enum { flags = CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL }; }; TEST(MemObjHelper, givenParentMemObjAndHostPtrFlagsWhenValidatingMemoryPropertiesForImageThenFalseIsReturned) { cl_mem_flags flags = 0; cl_mem_flags_intel flagsIntel = 0; MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0); MockContext context; auto image = clUniquePtr(Image1dHelper<>::create(&context)); auto imageWithAccessFlagsUnrestricted = clUniquePtr(ImageHelper::create(&context)); cl_mem_flags hostPtrFlags[] = {CL_MEM_USE_HOST_PTR, CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR}; for (auto hostPtrFlag : hostPtrFlags) { flags = hostPtrFlag; memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0); flags = hostPtrFlag; EXPECT_FALSE(MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, 0, image.get(), context)); EXPECT_FALSE(MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, 0, imageWithAccessFlagsUnrestricted.get(), context)); flags |= CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL; memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0); flags |= CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL; EXPECT_FALSE(MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, 0, image.get(), context)); EXPECT_FALSE(MemObjHelper::validateMemoryPropertiesForImage(memoryProperties, flags, 0, imageWithAccessFlagsUnrestricted.get(), context)); } }compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/mem_obj_tests.cpp000066400000000000000000000640211363734646600266030ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/os_interface/os_context.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/helpers/properties_helper.h" #include "opencl/source/mem_obj/mem_obj.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_deferred_deleter.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gtest/gtest.h" using namespace NEO; struct MySharingHandler : public SharingHandler { MySharingHandler(MemObj *memObj) : memObj(memObj) { auto alloc = getAllocation(); if (alloc) { alloc->incReuseCount(); } } MySharingHandler(GraphicsAllocation *allocation) : allocation(allocation) { auto alloc = getAllocation(); if (alloc) { alloc->incReuseCount(); } } void releaseReusedGraphicsAllocation() override { auto alloc = getAllocation(); if (alloc) { alloc->decReuseCount(); } } GraphicsAllocation *getAllocation() { if (memObj) { return memObj->getGraphicsAllocation(); } return allocation; } MemObj *memObj = nullptr; GraphicsAllocation *allocation = nullptr; }; TEST(MemObj, GivenMemObjWhenInititalizedFromHostPtrThenInitializeFields) { const size_t size = 64; char buffer[size]; MockContext context; MockGraphicsAllocation *mockAllocation = new MockGraphicsAllocation(buffer, sizeof(buffer)); MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_USE_HOST_PTR, 0, 0); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_USE_HOST_PTR, 0, sizeof(buffer), buffer, buffer, mockAllocation, true, false, false); EXPECT_EQ(&buffer, memObj.getCpuAddress()); EXPECT_EQ(&buffer, memObj.getHostPtr()); EXPECT_EQ(size, memObj.getSize()); EXPECT_EQ(static_cast(CL_MEM_USE_HOST_PTR), memObj.getMemoryPropertiesFlags()); } TEST(MemObj, givenMemObjectWhenAskedForTransferToHostPtrThenDoNothing) { const size_t size = 64; uint8_t hostPtr[size] = {}; uint8_t expectedHostPtr[size] = {}; MockContext context; MockGraphicsAllocation *mockAllocation = new MockGraphicsAllocation(hostPtr, sizeof(hostPtr)); MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_USE_HOST_PTR, 0, 0); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_USE_HOST_PTR, 0, size, hostPtr, hostPtr, mockAllocation, true, false, false); memset(memObj.getCpuAddress(), 123, size); memset(hostPtr, 0, size); MemObjOffsetArray copyOffset = {{0, 0, 0}}; MemObjSizeArray copySize = {{size, 0, 0}}; EXPECT_THROW(memObj.transferDataToHostPtr(copySize, copyOffset), std::exception); EXPECT_TRUE(memcmp(hostPtr, expectedHostPtr, size) == 0); } TEST(MemObj, givenMemObjectWhenAskedForTransferFromHostPtrThenDoNothing) { const size_t size = 64; uint8_t hostPtr[size] = {}; uint8_t expectedBufferPtr[size] = {}; MockContext context; MockGraphicsAllocation *mockAllocation = new MockGraphicsAllocation(hostPtr, sizeof(hostPtr)); MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_USE_HOST_PTR, 0, 0); MemObj memObj(&context, CL_MEM_OBJECT_PIPE, memoryProperties, CL_MEM_USE_HOST_PTR, 0, size, hostPtr, hostPtr, mockAllocation, true, false, false); memset(memObj.getCpuAddress(), 123, size); memset(expectedBufferPtr, 123, size); MemObjOffsetArray copyOffset = {{0, 0, 0}}; MemObjSizeArray copySize = {{size, 0, 0}}; EXPECT_THROW(memObj.transferDataFromHostPtr(copySize, copyOffset), std::exception); EXPECT_TRUE(memcmp(memObj.getCpuAddress(), expectedBufferPtr, size) == 0); } TEST(MemObj, givenHostPtrAndUseHostPtrFlagWhenAskingForBaseMapPtrThenReturnHostPtr) { uint8_t hostPtr = 0; MockContext context; MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_USE_HOST_PTR, 0, 0); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_USE_HOST_PTR, 0, 1, nullptr, &hostPtr, nullptr, true, false, false); EXPECT_EQ(&hostPtr, memObj.getBasePtrForMap(context.getDevice(0)->getRootDeviceIndex())); } TEST(MemObj, givenHostPtrWithoutUseHostPtrFlagWhenAskingForBaseMapPtrThenReturnAllocatedPtr) { uint8_t hostPtr = 0; MockContext context; MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_COPY_HOST_PTR, 0, 0); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_COPY_HOST_PTR, 0, 1, nullptr, &hostPtr, nullptr, true, false, false); EXPECT_NE(&hostPtr, memObj.getBasePtrForMap(context.getDevice(0)->getRootDeviceIndex())); EXPECT_EQ(memObj.getAllocatedMapPtr(), memObj.getBasePtrForMap(context.getDevice(0)->getRootDeviceIndex())); } TEST(MemObj, givenMemObjWhenReleaseAllocatedPtrIsCalledTwiceThenItDoesntCrash) { void *allocatedPtr = alignedMalloc(MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); MockContext context; MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_USE_HOST_PTR, 0, 0); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_USE_HOST_PTR, 0, 1, nullptr, nullptr, nullptr, true, false, false); memObj.setAllocatedMapPtr(allocatedPtr); memObj.releaseAllocatedMapPtr(); EXPECT_EQ(nullptr, memObj.getAllocatedMapPtr()); memObj.releaseAllocatedMapPtr(); EXPECT_EQ(nullptr, memObj.getAllocatedMapPtr()); } TEST(MemObj, givenNotReadyGraphicsAllocationWhenMemObjDestroysAllocationAsyncThenAllocationIsAddedToMemoryManagerAllocationList) { MockContext context; auto memoryManager = context.getDevice(0)->getExecutionEnvironment()->memoryManager.get(); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), MemoryConstants::pageSize}); auto defaultEngine = context.getDevice(0)->getDefaultEngine(); allocation->updateTaskCount(2, defaultEngine.osContext->getContextId()); *(defaultEngine.commandStreamReceiver->getTagAddress()) = 1; MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_COPY_HOST_PTR, 0, 0); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_COPY_HOST_PTR, 0, MemoryConstants::pageSize, nullptr, nullptr, nullptr, true, false, false); auto &allocationList = defaultEngine.commandStreamReceiver->getTemporaryAllocations(); EXPECT_TRUE(allocationList.peekIsEmpty()); memObj.destroyGraphicsAllocation(allocation, true); EXPECT_FALSE(allocationList.peekIsEmpty()); } TEST(MemObj, givenReadyGraphicsAllocationWhenMemObjDestroysAllocationAsyncThenAllocationIsNotAddedToMemoryManagerAllocationList) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); auto device = std::make_unique(MockDevice::create(executionEnvironment, 0)); MockContext context(device.get()); auto memoryManager = executionEnvironment->memoryManager.get(); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); allocation->updateTaskCount(1, device->getDefaultEngine().osContext->getContextId()); *device->getDefaultEngine().commandStreamReceiver->getTagAddress() = 1; MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_USE_HOST_PTR, 0, 0); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_COPY_HOST_PTR, 0, MemoryConstants::pageSize, nullptr, nullptr, nullptr, true, false, false); auto &allocationList = device->getDefaultEngine().commandStreamReceiver->getTemporaryAllocations(); EXPECT_TRUE(allocationList.peekIsEmpty()); memObj.destroyGraphicsAllocation(allocation, true); EXPECT_TRUE(allocationList.peekIsEmpty()); } TEST(MemObj, givenNotUsedGraphicsAllocationWhenMemObjDestroysAllocationAsyncThenAllocationIsNotAddedToMemoryManagerAllocationList) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), MemoryConstants::pageSize}); MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_COPY_HOST_PTR, 0, 0); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_COPY_HOST_PTR, 0, MemoryConstants::pageSize, nullptr, nullptr, nullptr, true, false, false); auto &allocationList = context.getDevice(0)->getDefaultEngine().commandStreamReceiver->getTemporaryAllocations(); EXPECT_TRUE(allocationList.peekIsEmpty()); memObj.destroyGraphicsAllocation(allocation, true); EXPECT_TRUE(allocationList.peekIsEmpty()); } TEST(MemObj, givenMemoryManagerWithoutDeviceWhenMemObjDestroysAllocationAsyncThenAllocationIsNotAddedToMemoryManagerAllocationList) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), MemoryConstants::pageSize}); MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_COPY_HOST_PTR, 0, 0); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_COPY_HOST_PTR, 0, MemoryConstants::pageSize, nullptr, nullptr, nullptr, true, false, false); auto &allocationList = context.getDevice(0)->getDefaultEngine().commandStreamReceiver->getTemporaryAllocations(); EXPECT_TRUE(allocationList.peekIsEmpty()); memObj.destroyGraphicsAllocation(allocation, true); EXPECT_TRUE(allocationList.peekIsEmpty()); } TEST(MemObj, givenMemObjAndPointerToObjStorageWithProperCommandWhenCheckIfMemTransferRequiredThenReturnFalse) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_COPY_HOST_PTR, 0, 0); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_COPY_HOST_PTR, 0, MemoryConstants::pageSize, nullptr, nullptr, nullptr, true, false, false); void *ptr = memObj.getCpuAddressForMemoryTransfer(); bool isMemTransferNeeded = memObj.checkIfMemoryTransferIsRequired(0, 0, ptr, CL_COMMAND_WRITE_BUFFER); EXPECT_FALSE(isMemTransferNeeded); isMemTransferNeeded = memObj.checkIfMemoryTransferIsRequired(0, 0, ptr, CL_COMMAND_READ_BUFFER); EXPECT_FALSE(isMemTransferNeeded); isMemTransferNeeded = memObj.checkIfMemoryTransferIsRequired(0, 0, ptr, CL_COMMAND_WRITE_BUFFER_RECT); EXPECT_FALSE(isMemTransferNeeded); isMemTransferNeeded = memObj.checkIfMemoryTransferIsRequired(0, 0, ptr, CL_COMMAND_READ_BUFFER_RECT); EXPECT_FALSE(isMemTransferNeeded); isMemTransferNeeded = memObj.checkIfMemoryTransferIsRequired(0, 0, ptr, CL_COMMAND_WRITE_IMAGE); EXPECT_FALSE(isMemTransferNeeded); isMemTransferNeeded = memObj.checkIfMemoryTransferIsRequired(0, 0, ptr, CL_COMMAND_READ_IMAGE); EXPECT_FALSE(isMemTransferNeeded); } TEST(MemObj, givenMemObjAndPointerToObjStorageBadCommandWhenCheckIfMemTransferRequiredThenReturnTrue) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_COPY_HOST_PTR, 0, 0); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_COPY_HOST_PTR, 0, MemoryConstants::pageSize, nullptr, nullptr, nullptr, true, false, false); void *ptr = memObj.getCpuAddressForMemoryTransfer(); bool isMemTransferNeeded = memObj.checkIfMemoryTransferIsRequired(0, 0, ptr, CL_COMMAND_FILL_BUFFER); EXPECT_TRUE(isMemTransferNeeded); } TEST(MemObj, givenMemObjAndPointerToDiffrentStorageAndProperCommandWhenCheckIfMemTransferRequiredThenReturnTrue) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_COPY_HOST_PTR, 0, 0); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_COPY_HOST_PTR, 0, MemoryConstants::pageSize, nullptr, nullptr, nullptr, true, false, false); void *ptr = (void *)0x1234; bool isMemTransferNeeded = memObj.checkIfMemoryTransferIsRequired(0, 0, ptr, CL_COMMAND_WRITE_BUFFER); EXPECT_TRUE(isMemTransferNeeded); } TEST(MemObj, givenSharingHandlerWhenAskedForCpuMappingThenReturnFalse) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), MemoryConstants::pageSize}); MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_COPY_HOST_PTR, 0, 0); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_COPY_HOST_PTR, 0, MemoryConstants::pageSize, nullptr, nullptr, allocation, true, false, false); memObj.setSharingHandler(new SharingHandler()); EXPECT_FALSE(memObj.mappingOnCpuAllowed()); } TEST(MemObj, givenTiledObjectWhenAskedForCpuMappingThenReturnFalse) { struct MyMemObj : public MemObj { using MemObj::MemObj; bool isTiledAllocation() const override { return true; } }; MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_COPY_HOST_PTR, 0, 0); MyMemObj memObj(nullptr, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_COPY_HOST_PTR, 0, MemoryConstants::pageSize, nullptr, nullptr, nullptr, true, false, false); EXPECT_FALSE(memObj.mappingOnCpuAllowed()); } TEST(MemObj, givenRenderCompressedGmmWhenAskingForMappingOnCpuThenDisallow) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), MemoryConstants::pageSize}); allocation->setDefaultGmm(new Gmm(context.getDevice(0)->getGmmClientContext(), nullptr, 1, false)); MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_READ_WRITE, 0, 0); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_READ_WRITE, 0, 1, allocation->getUnderlyingBuffer(), nullptr, allocation, false, false, false); allocation->getDefaultGmm()->isRenderCompressed = false; EXPECT_TRUE(memObj.mappingOnCpuAllowed()); allocation->getDefaultGmm()->isRenderCompressed = true; EXPECT_FALSE(memObj.mappingOnCpuAllowed()); } TEST(MemObj, givenDefaultWhenAskedForCpuMappingThenReturnTrue) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), MemoryConstants::pageSize}); MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_COPY_HOST_PTR, 0, 0); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_COPY_HOST_PTR, 0, 64, allocation->getUnderlyingBuffer(), nullptr, allocation, true, false, false); EXPECT_FALSE(memObj.isTiledAllocation()); EXPECT_FALSE(memObj.peekSharingHandler()); EXPECT_TRUE(memObj.mappingOnCpuAllowed()); } TEST(MemObj, givenNonCpuAccessibleMemoryWhenAskingForMappingOnCpuThenDisallow) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), MemoryConstants::pageSize}); allocation->setDefaultGmm(new Gmm(context.getDevice(0)->getGmmClientContext(), nullptr, 1, false)); MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_READ_WRITE, 0, 0); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_READ_WRITE, 0, 1, allocation->getUnderlyingBuffer(), nullptr, allocation, false, false, false); EXPECT_TRUE(memObj.mappingOnCpuAllowed()); reinterpret_cast(allocation)->overrideMemoryPool(MemoryPool::SystemCpuInaccessible); EXPECT_FALSE(memObj.mappingOnCpuAllowed()); } TEST(MemObj, givenMultipleMemObjectsWithReusedGraphicsAllocationWhenDestroyedThenFreeAllocationOnce) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), MemoryConstants::pageSize}); std::unique_ptr memObj1(new MemObj(&context, CL_MEM_OBJECT_BUFFER, {}, 0, 0, 1, nullptr, nullptr, allocation, true, false, false)); memObj1->setSharingHandler(new MySharingHandler(allocation)); std::unique_ptr memObj2(new MemObj(&context, CL_MEM_OBJECT_BUFFER, {}, 0, 0, 1, nullptr, nullptr, allocation, true, false, false)); memObj2->setSharingHandler(new MySharingHandler(allocation)); std::unique_ptr memObj3(new MemObj(&context, CL_MEM_OBJECT_BUFFER, {}, 0, 0, 1, nullptr, nullptr, allocation, true, false, false)); memObj3->setSharingHandler(new MySharingHandler(allocation)); EXPECT_EQ(3u, allocation->peekReuseCount()); memObj3.reset(nullptr); EXPECT_EQ(2u, allocation->peekReuseCount()); memObj1.reset(nullptr); EXPECT_EQ(1u, allocation->peekReuseCount()); memObj2.reset(nullptr); } TEST(MemObj, givenMemObjectWhenContextIsNotNullThenContextOutlivesMemobjects) { MockContext context; EXPECT_EQ(1, context.getRefInternalCount()); { MemObj memObj(&context, 0, {}, 0, 0, 0, nullptr, nullptr, nullptr, false, false, false); EXPECT_EQ(2, context.getRefInternalCount()); } EXPECT_EQ(1, context.getRefInternalCount()); } TEST(MemObj, givenSharedMemObjectWithNullGfxAllocationWhenSettingGfxAllocationThenSucceed) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; MockGraphicsAllocation *gfxAllocation = new MockGraphicsAllocation(nullptr, 0); MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_USE_HOST_PTR, 0, 0); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_USE_HOST_PTR, 0, 1, nullptr, nullptr, nullptr, true, false, false); memObj.setSharingHandler(new MySharingHandler(&memObj)); memObj.resetGraphicsAllocation(gfxAllocation); gfxAllocation->incReuseCount(); ASSERT_EQ(1u, gfxAllocation->peekReuseCount()); EXPECT_EQ(gfxAllocation, memObj.getGraphicsAllocation()); } TEST(MemObj, givenSharedMemObjectAndNullGfxAllocationProvidedWhenSettingGfxAllocationThenSucceed) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; MockGraphicsAllocation *graphicsAllocation = new MockGraphicsAllocation(nullptr, 0); MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_USE_HOST_PTR, 0, 0); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_USE_HOST_PTR, 0, 1, nullptr, nullptr, graphicsAllocation, true, false, false); memObj.setSharingHandler(new MySharingHandler(&memObj)); graphicsAllocation->decReuseCount(); memObj.resetGraphicsAllocation(nullptr); EXPECT_EQ(nullptr, memObj.getGraphicsAllocation()); } TEST(MemObj, givenSharedMemObjectAndZeroReuseCountWhenChangingGfxAllocationThenOldAllocationIsDestroyed) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; MockGraphicsAllocation *oldGfxAllocation = new MockGraphicsAllocation(nullptr, 0); MockGraphicsAllocation *newGfxAllocation = new MockGraphicsAllocation(nullptr, 0); MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_USE_HOST_PTR, 0, 0); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_USE_HOST_PTR, 0, 1, nullptr, nullptr, oldGfxAllocation, true, false, false); memObj.setSharingHandler(new MySharingHandler(&memObj)); oldGfxAllocation->decReuseCount(); memObj.resetGraphicsAllocation(newGfxAllocation); newGfxAllocation->incReuseCount(); ASSERT_EQ(1u, newGfxAllocation->peekReuseCount()); EXPECT_EQ(newGfxAllocation, memObj.getGraphicsAllocation()); } TEST(MemObj, givenSharedMemObjectAndNonZeroReuseCountWhenChangingGfxAllocationThenOldAllocationIsNotDestroyed) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; MockGraphicsAllocation *oldGfxAllocation = new MockGraphicsAllocation(nullptr, 0); MockGraphicsAllocation *newGfxAllocation = new MockGraphicsAllocation(nullptr, 0); MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_USE_HOST_PTR, 0, 0); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_USE_HOST_PTR, 0, 1, nullptr, nullptr, oldGfxAllocation, true, false, false); memObj.setSharingHandler(new MySharingHandler(&memObj)); memObj.resetGraphicsAllocation(newGfxAllocation); newGfxAllocation->incReuseCount(); ASSERT_EQ(1u, newGfxAllocation->peekReuseCount()); EXPECT_EQ(newGfxAllocation, memObj.getGraphicsAllocation()); memoryManager.checkGpuUsageAndDestroyGraphicsAllocations(oldGfxAllocation); } TEST(MemObj, givenNotSharedMemObjectWhenChangingGfxAllocationThenOldAllocationIsDestroyed) { MockContext context; MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment()); context.memoryManager = &memoryManager; MockGraphicsAllocation *oldGfxAllocation = new MockGraphicsAllocation(nullptr, 0); MockGraphicsAllocation *newGfxAllocation = new MockGraphicsAllocation(nullptr, 0); MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_USE_HOST_PTR, 0, 0); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_USE_HOST_PTR, 0, 1, nullptr, nullptr, oldGfxAllocation, true, false, false); memObj.resetGraphicsAllocation(newGfxAllocation); EXPECT_EQ(newGfxAllocation, memObj.getGraphicsAllocation()); } TEST(MemObj, givenGraphicsAllocationWhenCallingIsAllocDumpableThenItReturnsTheCorrectValue) { MockGraphicsAllocation gfxAllocation(nullptr, 0); EXPECT_FALSE(gfxAllocation.isAllocDumpable()); gfxAllocation.setAllocDumpable(true); EXPECT_TRUE(gfxAllocation.isAllocDumpable()); } TEST(MemObj, givenMemObjNotUsingHostPtrWhenGettingBasePtrTwiceReturnSameMapPtr) { MockContext context; MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_READ_WRITE, 0, 0); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_READ_WRITE, 0, 1, nullptr, nullptr, nullptr, true, false, false); void *mapPtr = memObj.getBasePtrForMap(context.getDevice(0)->getRootDeviceIndex()); EXPECT_NE(nullptr, mapPtr); auto mapAllocation = memObj.getMapAllocation(); ASSERT_NE(nullptr, mapAllocation); EXPECT_EQ(mapPtr, mapAllocation->getUnderlyingBuffer()); EXPECT_EQ(mapPtr, memObj.getAllocatedMapPtr()); } using MemObjMultiRootDeviceTests = MultiRootDeviceFixture; TEST_F(MemObjMultiRootDeviceTests, memObjMapAllocationHasCorrectRootDeviceIndex) { MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_READ_WRITE, 0, 0); MemObj memObj(context.get(), CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_READ_WRITE, 0, 1, nullptr, nullptr, nullptr, true, false, false); void *mapPtr = memObj.getBasePtrForMap(device->getRootDeviceIndex()); EXPECT_NE(nullptr, mapPtr); auto mapAllocation = memObj.getMapAllocation(); ASSERT_NE(nullptr, mapAllocation); EXPECT_EQ(expectedRootDeviceIndex, mapAllocation->getRootDeviceIndex()); } compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/nv12_image_tests.cpp000066400000000000000000000523471363734646600271330ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/cl_device/cl_device_get_cap.inl" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/helpers/kernel_binary_helper.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_gmm_resource_info.h" #include "test.h" #include "gtest/gtest.h" using namespace NEO; class Nv12ImageTest : public testing::Test { public: void computeExpectedOffsets(Image *image) { SurfaceOffsets expectedSurfaceOffsets = {0}; GMM_REQ_OFFSET_INFO reqOffsetInfo = {}; SurfaceOffsets requestedOffsets = {0}; auto mockResInfo = reinterpret_cast<::testing::NiceMock *>(image->getGraphicsAllocation()->getDefaultGmm()->gmmResourceInfo.get()); mockResInfo->getOffset(reqOffsetInfo); if (image->getImageDesc().mem_object) { expectedSurfaceOffsets.offset = reqOffsetInfo.Render.Offset; expectedSurfaceOffsets.xOffset = reqOffsetInfo.Render.XOffset / (mockResInfo->getBitsPerPixel() / 8); expectedSurfaceOffsets.yOffset = reqOffsetInfo.Render.YOffset; } expectedSurfaceOffsets.yOffsetForUVplane = reqOffsetInfo.Lock.Offset / reqOffsetInfo.Lock.Pitch; image->getSurfaceOffsets(requestedOffsets); EXPECT_EQ(expectedSurfaceOffsets.offset, requestedOffsets.offset); EXPECT_EQ(expectedSurfaceOffsets.xOffset, requestedOffsets.xOffset); EXPECT_EQ(expectedSurfaceOffsets.yOffset, requestedOffsets.yOffset); EXPECT_EQ(expectedSurfaceOffsets.yOffsetForUVplane, requestedOffsets.yOffsetForUVplane); } protected: void SetUp() override { imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_NV12_INTEL; imageDesc.mem_object = NULL; imageDesc.image_array_size = 0; imageDesc.image_depth = 1; imageDesc.image_height = 4 * 4; // Valid values multiple of 4 imageDesc.image_width = 4 * 4; // Valid values multiple of 4 imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; flags = CL_MEM_HOST_NO_ACCESS; } void validateImageWithFlags(cl_mem_flags flags) { auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); retVal = Image::validate(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), surfaceFormat, &imageDesc, nullptr); } Image *createImageWithFlags(cl_mem_flags flags) { auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); return Image::create(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal); } cl_int retVal = CL_SUCCESS; MockContext context; cl_image_format imageFormat; cl_image_desc imageDesc; cl_mem_flags flags; }; TEST_F(Nv12ImageTest, isNV12ImageReturnsTrue) { auto image = createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL); ASSERT_NE(nullptr, image); EXPECT_TRUE(IsNV12Image(&image->getImageFormat())); delete image; } TEST_F(Nv12ImageTest, validNV12ImageFormatAndDescriptor) { validateImageWithFlags(flags); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(Nv12ImageTest, invalidNV12ImageFormat) { imageFormat.image_channel_data_type = CL_SNORM_INT16; validateImageWithFlags(flags); EXPECT_EQ(CL_IMAGE_FORMAT_NOT_SUPPORTED, retVal); } TEST_F(Nv12ImageTest, invalidNV12ImageType) { imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; validateImageWithFlags(flags); EXPECT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); } TEST_F(Nv12ImageTest, DISABLED_invalidNV12ImageDepth) { imageDesc.image_depth = 2; validateImageWithFlags(flags); EXPECT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); } TEST_F(Nv12ImageTest, invalidNV12ImageHeigth) { imageDesc.image_height = 17; validateImageWithFlags(flags); EXPECT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); } TEST_F(Nv12ImageTest, invalidNV12ImageWidth) { imageDesc.image_width = 17; validateImageWithFlags(flags); EXPECT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); } TEST_F(Nv12ImageTest, invalidNV12ImageFlag) { flags &= ~(CL_MEM_HOST_NO_ACCESS); validateImageWithFlags(flags); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(Nv12ImageTest, validateNV12YPlane) { auto image = createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL); ASSERT_NE(nullptr, image); imageDesc.mem_object = image; imageDesc.image_depth = 0; // Plane Y of NV12 image validateImageWithFlags(CL_MEM_READ_WRITE); EXPECT_EQ(CL_SUCCESS, retVal); delete image; } TEST_F(Nv12ImageTest, validateNV12YUVPlane) { auto image = createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL); ASSERT_NE(nullptr, image); imageDesc.mem_object = image; imageDesc.image_depth = 1; // Plane UV of NV12 image validateImageWithFlags(CL_MEM_READ_WRITE); EXPECT_EQ(CL_SUCCESS, retVal); delete image; } TEST_F(Nv12ImageTest, givenNV12ImageWhenInvalidDepthIsPassedThenValidateFails) { auto image = createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL); ASSERT_NE(nullptr, image); imageDesc.mem_object = image; imageDesc.image_depth = 3; // Invalid Plane of NV12 image validateImageWithFlags(CL_MEM_READ_WRITE); EXPECT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); delete image; } TEST_F(Nv12ImageTest, given2DImageWhenPassedToValidateImageTraitsThenValidateReturnsSuccess) { auto image = createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL); ASSERT_NE(nullptr, image); imageDesc.mem_object = image; imageDesc.image_depth = 0; retVal = Image::validateImageTraits(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_READ_WRITE, 0, 0), &imageFormat, &imageDesc, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); delete image; } TEST_F(Nv12ImageTest, given1DImageWhenPassedAsParentImageThenValidateImageTraitsReturnsSuccess) { imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; auto image = createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL); ASSERT_NE(nullptr, image); imageDesc.mem_object = image; imageDesc.image_depth = 0; retVal = Image::validateImageTraits(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_READ_WRITE, 0, 0), &imageFormat, &imageDesc, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); delete image; } TEST_F(Nv12ImageTest, givenBufferWhenPassedAsNV12ParentImageThenValidateImageTraitsReturnsInvalidDesriptor) { MockBuffer Buffer; imageDesc.mem_object = &Buffer; imageDesc.image_depth = 0; // Plane of NV12 image retVal = Image::validateImageTraits(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_READ_WRITE, 0, 0), &imageFormat, &imageDesc, nullptr); EXPECT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); } TEST_F(Nv12ImageTest, createNV12Image) { auto image = createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL); ASSERT_NE(nullptr, image); auto rowPitch = image->getHostPtrRowPitch(); EXPECT_NE(0u, rowPitch); SurfaceOffsets surfaceOffsets; image->getSurfaceOffsets(surfaceOffsets); EXPECT_EQ(0u, surfaceOffsets.offset); EXPECT_EQ(0u, surfaceOffsets.xOffset); EXPECT_EQ(0u, surfaceOffsets.yOffset); EXPECT_NE(0u, surfaceOffsets.yOffsetForUVplane); delete image; } TEST_F(Nv12ImageTest, createNV12YPlaneImage) { // Create Parent NV12 image auto imageNV12 = createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL); ASSERT_NE(nullptr, imageNV12); imageDesc.mem_object = imageNV12; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; imageDesc.image_width = 0; imageDesc.image_height = 0; imageDesc.image_depth = 0; // Create NV12 Y Plane image auto imageYPlane = createImageWithFlags(CL_MEM_READ_WRITE); ASSERT_NE(nullptr, imageYPlane); EXPECT_EQ(true, imageYPlane->isImageFromImage()); EXPECT_EQ(imageNV12->getGraphicsAllocation(), imageYPlane->getGraphicsAllocation()); cl_image_desc parentDimensions, planeDimensions; parentDimensions = imageNV12->getImageDesc(); planeDimensions = imageYPlane->getImageDesc(); EXPECT_EQ(parentDimensions.image_height, planeDimensions.image_height); EXPECT_EQ(parentDimensions.image_width, planeDimensions.image_width); EXPECT_EQ(0u, planeDimensions.image_depth); EXPECT_NE(0u, planeDimensions.image_row_pitch); EXPECT_EQ(parentDimensions.image_slice_pitch, planeDimensions.image_slice_pitch); EXPECT_EQ(parentDimensions.image_type, planeDimensions.image_type); EXPECT_EQ(parentDimensions.image_array_size, planeDimensions.image_array_size); computeExpectedOffsets(imageYPlane); computeExpectedOffsets(imageNV12); delete imageYPlane; delete imageNV12; } TEST_F(Nv12ImageTest, createNV12UVPlaneImage) { // Create Parent NV12 image auto imageNV12 = createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL); ASSERT_NE(nullptr, imageNV12); imageDesc.mem_object = imageNV12; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; imageDesc.image_width = 0; imageDesc.image_height = 0; imageDesc.image_depth = 1; // UV plane // Create NV12 UV Plane image auto imageUVPlane = createImageWithFlags(CL_MEM_READ_WRITE); ASSERT_NE(nullptr, imageUVPlane); EXPECT_EQ(true, imageUVPlane->isImageFromImage()); EXPECT_EQ(imageNV12->getGraphicsAllocation(), imageUVPlane->getGraphicsAllocation()); cl_image_desc parentDimensions, planeDimensions; parentDimensions = imageNV12->getImageDesc(); planeDimensions = imageUVPlane->getImageDesc(); EXPECT_EQ(parentDimensions.image_height / 2, planeDimensions.image_height); EXPECT_EQ(parentDimensions.image_width / 2, planeDimensions.image_width); EXPECT_EQ(0u, planeDimensions.image_depth); EXPECT_EQ(parentDimensions.image_row_pitch, planeDimensions.image_row_pitch); EXPECT_NE(0u, planeDimensions.image_row_pitch); EXPECT_EQ(parentDimensions.image_slice_pitch, planeDimensions.image_slice_pitch); EXPECT_EQ(parentDimensions.image_type, planeDimensions.image_type); EXPECT_EQ(parentDimensions.image_array_size, planeDimensions.image_array_size); computeExpectedOffsets(imageUVPlane); computeExpectedOffsets(imageNV12); delete imageUVPlane; delete imageNV12; } TEST_F(Nv12ImageTest, createNV12UVPlaneImageWithOffsetOfUVPlane) { // This size returns offset of UV plane, and 0 yOffset imageDesc.image_height = 64; // Valid values multiple of 4 imageDesc.image_width = 64; // Valid values multiple of 4 // Create Parent NV12 image auto imageNV12 = createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL); ASSERT_NE(nullptr, imageNV12); imageDesc.mem_object = imageNV12; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; imageDesc.image_width = 0; imageDesc.image_height = 0; imageDesc.image_depth = 1; // UV plane // Create NV12 UV Plane image auto imageUVPlane = createImageWithFlags(CL_MEM_READ_WRITE); ASSERT_NE(nullptr, imageUVPlane); EXPECT_EQ(true, imageUVPlane->isImageFromImage()); EXPECT_EQ(imageNV12->getGraphicsAllocation(), imageUVPlane->getGraphicsAllocation()); cl_image_desc parentDimensions, planeDimensions; parentDimensions = imageNV12->getImageDesc(); planeDimensions = imageUVPlane->getImageDesc(); EXPECT_EQ(parentDimensions.image_height / 2, planeDimensions.image_height); EXPECT_EQ(parentDimensions.image_width / 2, planeDimensions.image_width); EXPECT_EQ(0u, planeDimensions.image_depth); EXPECT_EQ(parentDimensions.image_row_pitch, planeDimensions.image_row_pitch); EXPECT_NE(0u, planeDimensions.image_row_pitch); EXPECT_EQ(parentDimensions.image_slice_pitch, planeDimensions.image_slice_pitch); EXPECT_EQ(parentDimensions.image_type, planeDimensions.image_type); EXPECT_EQ(parentDimensions.image_array_size, planeDimensions.image_array_size); computeExpectedOffsets(imageUVPlane); computeExpectedOffsets(imageNV12); delete imageUVPlane; delete imageNV12; } HWTEST_F(Nv12ImageTest, checkIfPlanesAreWritten) { KernelBinaryHelper kbHelper(KernelBinaryHelper::BUILT_INS); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); char hostPtr[16 * 16 * 16]; auto contextWithMockCmdQ = new MockContext(device.get(), true); auto cmdQ = new MockCommandQueueHw(contextWithMockCmdQ, device.get(), 0); contextWithMockCmdQ->overrideSpecialQueueAndDecrementRefCount(cmdQ); // Create Parent NV12 image cl_mem_flags flags = CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); auto imageNV12 = Image::create(contextWithMockCmdQ, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, hostPtr, retVal); EXPECT_EQ(imageNV12->isTiledAllocation() ? 2u : 0u, cmdQ->EnqueueWriteImageCounter); ASSERT_NE(nullptr, imageNV12); contextWithMockCmdQ->release(); delete imageNV12; } HWTEST_F(Nv12ImageTest, setImageArg) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState; auto image = createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL); ASSERT_NE(nullptr, image); SurfaceOffsets surfaceOffsets; image->getSurfaceOffsets(surfaceOffsets); image->setImageArg(&surfaceState, false, 0); EXPECT_EQ(surfaceOffsets.xOffset, surfaceState.getXOffset()); EXPECT_EQ(surfaceOffsets.yOffset, surfaceState.getYOffset()); EXPECT_EQ(surfaceOffsets.yOffsetForUVplane, surfaceState.getYOffsetForUOrUvPlane()); // NV 12 image has correct alpha channel == one EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ONE, surfaceState.getShaderChannelSelectAlpha()); delete image; } HWTEST_F(Nv12ImageTest, givenNv12ImageArrayAndImageArraySizeIsZeroWhenCallingSetImageArgThenDoNotProgramSurfaceArray) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState; cl_image_desc imageDesc = Image2dDefaults::imageDesc; imageDesc.image_array_size = 1; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; cl_image_format imageFormat = Image2dDefaults::imageFormat; imageFormat.image_channel_order = CL_NV12_INTEL; imageFormat.image_channel_data_type = CL_UNORM_INT8; std::unique_ptr image{Image2dHelper<>::create(&context, &imageDesc, &imageFormat)}; image->setCubeFaceIndex(__GMM_NO_CUBE_MAP); image->setImageArg(&surfaceState, false, 0); EXPECT_FALSE(surfaceState.getSurfaceArray()); } HWTEST_F(Nv12ImageTest, setImageArgUVPlaneImageSetsOffsetedSurfaceBaseAddressAndSetsCorrectTileMode) { typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState; // Create Parent NV12 image auto imageNV12 = createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL); ASSERT_NE(nullptr, imageNV12); imageDesc.mem_object = imageNV12; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; imageDesc.image_width = 0; imageDesc.image_height = 0; imageDesc.image_depth = 1; // UV plane // Create NV12 UV Plane image auto imageUVPlane = createImageWithFlags(CL_MEM_READ_WRITE); ASSERT_NE(nullptr, imageUVPlane); EXPECT_EQ(imageNV12->getGraphicsAllocation(), imageUVPlane->getGraphicsAllocation()); SurfaceOffsets surfaceOffsets; imageUVPlane->getSurfaceOffsets(surfaceOffsets); imageUVPlane->setImageArg(&surfaceState, false, 0); EXPECT_EQ(imageUVPlane->getGraphicsAllocation()->getGpuAddress() + surfaceOffsets.offset, surfaceState.getSurfaceBaseAddress()); auto tileMode = RENDER_SURFACE_STATE::TILE_MODE_LINEAR; if (imageNV12->isTiledAllocation()) { tileMode = RENDER_SURFACE_STATE::TILE_MODE_YMAJOR; } EXPECT_EQ(tileMode, surfaceState.getTileMode()); delete imageUVPlane; delete imageNV12; } HWTEST_F(Nv12ImageTest, setMediaImageArg) { using MEDIA_SURFACE_STATE = typename FamilyType::MEDIA_SURFACE_STATE; MEDIA_SURFACE_STATE surfaceState; auto image = createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL); ASSERT_NE(nullptr, image); SurfaceOffsets surfaceOffsets; image->getSurfaceOffsets(surfaceOffsets); image->setMediaImageArg(&surfaceState); EXPECT_EQ(surfaceOffsets.xOffset, surfaceState.getXOffsetForUCb()); EXPECT_EQ(surfaceOffsets.yOffset, surfaceState.getXOffsetForUCb()); EXPECT_EQ(surfaceOffsets.yOffsetForUVplane, surfaceState.getYOffsetForUCb()); EXPECT_EQ(image->getGraphicsAllocation()->getGpuAddress() + surfaceOffsets.offset, surfaceState.getSurfaceBaseAddress()); delete image; } TEST_F(Nv12ImageTest, redescribedNV12ImageAndUVPlaneImageHasCorrectOffsets) { auto image = createImageWithFlags(CL_MEM_READ_ONLY | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL); ASSERT_NE(nullptr, image); auto imageRedescribed = image->redescribe(); ASSERT_NE(nullptr, imageRedescribed); SurfaceOffsets imageOffsets, redescribedOffsets; image->getSurfaceOffsets(imageOffsets); imageRedescribed->getSurfaceOffsets(redescribedOffsets); EXPECT_EQ(imageOffsets.xOffset, redescribedOffsets.xOffset); EXPECT_EQ(imageOffsets.yOffset, redescribedOffsets.yOffset); EXPECT_EQ(imageOffsets.yOffsetForUVplane, redescribedOffsets.yOffsetForUVplane); delete imageRedescribed; imageDesc.mem_object = image; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; imageDesc.image_width = 0; imageDesc.image_height = 0; imageDesc.image_depth = 1; // UV plane // Create NV12 UV Plane image auto imageUVPlane = createImageWithFlags(CL_MEM_READ_WRITE); ASSERT_NE(nullptr, imageUVPlane); imageRedescribed = imageUVPlane->redescribe(); ASSERT_NE(nullptr, imageRedescribed); imageUVPlane->getSurfaceOffsets(imageOffsets); imageRedescribed->getSurfaceOffsets(redescribedOffsets); EXPECT_EQ(imageOffsets.xOffset, redescribedOffsets.xOffset); EXPECT_EQ(imageOffsets.yOffset, redescribedOffsets.yOffset); EXPECT_EQ(imageOffsets.yOffsetForUVplane, redescribedOffsets.yOffsetForUVplane); delete imageRedescribed; delete imageUVPlane; delete image; } TEST_F(Nv12ImageTest, invalidPlanarYUVImageHeight) { auto pDevice = context.getDevice(0); const size_t *maxHeight = nullptr; size_t srcSize = 0; size_t retSize = 0; ASSERT_NE(nullptr, pDevice); pDevice->getCap(reinterpret_cast(maxHeight), srcSize, retSize); imageDesc.image_height = *maxHeight + 12; retVal = Image::validatePlanarYUV(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), &imageDesc, nullptr); EXPECT_EQ(CL_INVALID_IMAGE_SIZE, retVal); } TEST_F(Nv12ImageTest, invalidPlanarYUVImageWidth) { auto pDevice = context.getDevice(0); const size_t *maxWidth = nullptr; size_t srcSize = 0; size_t retSize = 0; ASSERT_NE(nullptr, pDevice); pDevice->getCap(reinterpret_cast(maxWidth), srcSize, retSize); imageDesc.image_width = *maxWidth + 12; retVal = Image::validatePlanarYUV(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), &imageDesc, nullptr); EXPECT_EQ(CL_INVALID_IMAGE_SIZE, retVal); } TEST_F(Nv12ImageTest, validPlanarYUVImageHeight) { retVal = Image::validatePlanarYUV(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), &imageDesc, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(Nv12ImageTest, validPlanarYUVImageWidth) { retVal = Image::validatePlanarYUV(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), &imageDesc, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/packed_yuv_image_tests.cpp000066400000000000000000000066431363734646600304750ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/helpers/validators.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" #include "gtest/gtest.h" using namespace NEO; typedef decltype(&Image::redescribe) RedescribeMethod; class PackedYuvImageTest : public testing::Test, public testing::WithParamInterface { public: PackedYuvImageTest() { } protected: void SetUp() override { imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = GetParam(); imageDesc.mem_object = nullptr; imageDesc.image_array_size = 0; imageDesc.image_depth = 1; imageDesc.image_height = 13; imageDesc.image_width = 16; // Valid values multiple of 2 imageDesc.image_row_pitch = 0; imageDesc.image_slice_pitch = 0; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.num_mip_levels = 0; imageDesc.num_samples = 0; } void TearDown() override { } void validateFormat() { retVal = Image::validateImageFormat(&imageFormat); if (retVal != CL_SUCCESS) return; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); retVal = Image::validate(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), surfaceFormat, &imageDesc, nullptr); } cl_int retVal = CL_SUCCESS; MockContext context; cl_image_format imageFormat; cl_image_desc imageDesc; cl_mem_flags flags; }; cl_channel_order packedYuvChannels[] = {CL_YUYV_INTEL, CL_UYVY_INTEL, CL_YVYU_INTEL, CL_VYUY_INTEL}; TEST_P(PackedYuvImageTest, isPackedYuvImageReturnsTrue) { flags = CL_MEM_READ_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); auto image = Image::create( &context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal); ASSERT_NE(nullptr, image); EXPECT_TRUE(IsPackedYuvImage(&image->getImageFormat())); delete image; } TEST_P(PackedYuvImageTest, validPackedYuvImageFormatAndDescriptor) { flags = CL_MEM_READ_ONLY; validateFormat(); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_P(PackedYuvImageTest, invalidPackedYuvImageFormat) { imageFormat.image_channel_data_type = CL_SNORM_INT16; flags = CL_MEM_READ_ONLY; validateFormat(); EXPECT_EQ(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, retVal); } TEST_P(PackedYuvImageTest, invalidPackedYuvImageWidth) { imageDesc.image_width = 17; flags = CL_MEM_READ_ONLY; validateFormat(); EXPECT_EQ(CL_INVALID_IMAGE_DESCRIPTOR, retVal); } INSTANTIATE_TEST_CASE_P( PackedYuvImageTests, PackedYuvImageTest, testing::ValuesIn(packedYuvChannels)); compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/pipe_tests.cpp000066400000000000000000000100721363734646600261250ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/mem_obj/pipe.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" using namespace NEO; //Tests for pipes class PipeTest : public DeviceFixture, public ::testing::Test, public MemoryManagementFixture { public: PipeTest() {} protected: void SetUp() override { } void TearDown() override { } cl_int retVal = CL_SUCCESS; MockContext context; size_t size; }; TEST_F(PipeTest, CreatePipe) { int errCode = CL_SUCCESS; auto pipe = Pipe::create(&context, CL_MEM_READ_ONLY, 1, 20, nullptr, errCode); EXPECT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, errCode); delete pipe; } TEST_F(PipeTest, PipeCheckReservedHeaderSizeAddition) { int errCode = CL_SUCCESS; auto pipe = Pipe::create(&context, CL_MEM_READ_ONLY, 1, 20, nullptr, errCode); ASSERT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, errCode); EXPECT_EQ((1 * (20 + 1)) + Pipe::intelPipeHeaderReservedSpace, pipe->getSize()); delete pipe; } TEST_F(PipeTest, PipeCheckHeaderinitialization) { int errCode = CL_SUCCESS; auto pipe = Pipe::create(&context, CL_MEM_READ_ONLY, 1, 20, nullptr, errCode); ASSERT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, errCode); EXPECT_EQ(21u, *reinterpret_cast(pipe->getCpuAddress())); delete pipe; } TEST_F(PipeTest, FailedAllocationInjection) { InjectedFunction method = [this](size_t failureIndex) { auto retVal = CL_INVALID_VALUE; auto pipe = Pipe::create(&context, CL_MEM_READ_ONLY, 1, 20, nullptr, retVal); if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, pipe); delete pipe; } else { EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal) << "for allocation " << failureIndex; EXPECT_EQ(nullptr, pipe); } }; injectFailures(method); } TEST_F(PipeTest, givenPipeWhenEnqueueWriteForUnmapIsCalledThenReturnError) { int errCode = CL_SUCCESS; std::unique_ptr pipe(Pipe::create(&context, CL_MEM_READ_ONLY, 1, 20, nullptr, errCode)); ASSERT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, errCode); MockCommandQueue cmdQ; errCode = clEnqueueUnmapMemObject(&cmdQ, pipe.get(), nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_MEM_OBJECT, errCode); } TEST_F(PipeTest, givenPipeWithDifferentCpuAndGpuAddressesWhenSetArgPipeThenUseGpuAddress) { int errCode = CL_SUCCESS; auto pipe = Pipe::create(&context, CL_MEM_READ_ONLY, 1, 20, nullptr, errCode); ASSERT_NE(nullptr, pipe); EXPECT_EQ(CL_SUCCESS, errCode); EXPECT_EQ(21u, *reinterpret_cast(pipe->getCpuAddress())); uint64_t gpuAddress = 0x12345; auto pipeAllocation = pipe->getGraphicsAllocation(); pipeAllocation->setCpuPtrAndGpuAddress(pipeAllocation->getUnderlyingBuffer(), gpuAddress); EXPECT_NE(reinterpret_cast(pipeAllocation->getUnderlyingBuffer()), pipeAllocation->getGpuAddress()); uint64_t valueToPatch; pipe->setPipeArg(&valueToPatch, sizeof(valueToPatch)); EXPECT_EQ(valueToPatch, pipeAllocation->getGpuAddressToPatch()); delete pipe; } using MultiRootDeviceTests = MultiRootDeviceFixture; TEST_F(MultiRootDeviceTests, pipeGraphicsAllocationHasCorrectRootDeviceIndex) { int errCode = CL_SUCCESS; std::unique_ptr pipe(Pipe::create(context.get(), CL_MEM_READ_ONLY, 1, 20, nullptr, errCode)); EXPECT_EQ(CL_SUCCESS, errCode); ASSERT_NE(nullptr, pipe.get()); auto graphicsAllocation = pipe->getGraphicsAllocation(); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_EQ(expectedRootDeviceIndex, graphicsAllocation->getRootDeviceIndex()); } compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/sub_buffer_tests.cpp000066400000000000000000000164111363734646600273150ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/memory_constants.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; namespace ULT { static const unsigned int sizeTestBufferInBytes = 32; class SubBufferTest : public DeviceFixture, public ::testing::Test { public: SubBufferTest() { } protected: void SetUp() override { buffer = Buffer::create(&context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, sizeTestBufferInBytes, pHostPtr, retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, buffer); } void TearDown() override { delete buffer; DeviceFixture::TearDown(); } cl_int retVal = CL_SUCCESS; MockContext context; unsigned char pHostPtr[sizeTestBufferInBytes]; Buffer *buffer = nullptr; }; TEST_F(SubBufferTest, createSubBuffer) { cl_buffer_region region = {2, 12}; EXPECT_EQ(1, buffer->getRefInternalCount()); auto subBuffer = buffer->createSubBuffer(CL_MEM_READ_ONLY, 0, ®ion, retVal); EXPECT_EQ(2, buffer->getRefInternalCount()); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, subBuffer); delete subBuffer; EXPECT_EQ(1, buffer->getRefInternalCount()); } TEST_F(SubBufferTest, GivenUnalignedHostPtrBufferWhenSubBufferIsCreatedThenItIsNonZeroCopy) { cl_buffer_region region = {2, 2}; cl_int retVal = 0; void *pUnalignedHostPtr = alignUp(&pHostPtr, 4); Buffer *buffer = Buffer::create(&context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, sizeTestBufferInBytes, pUnalignedHostPtr, retVal); ASSERT_NE(nullptr, buffer); ASSERT_EQ(CL_SUCCESS, retVal); auto subBuffer = buffer->createSubBuffer(CL_MEM_READ_ONLY, 0, ®ion, retVal); EXPECT_NE(nullptr, subBuffer); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(subBuffer->isMemObjZeroCopy()); subBuffer->release(); buffer->release(); } TEST_F(SubBufferTest, GivenAlignmentThatIsHigherThen4BytesWhenCheckedForValidityThenTrueIsReturned) { cl_buffer_region region = {2, 2}; EXPECT_FALSE(buffer->isValidSubBufferOffset(region.origin)); cl_buffer_region region2 = {4, 4}; EXPECT_TRUE(buffer->isValidSubBufferOffset(region2.origin)); cl_buffer_region region3 = {8, 4}; EXPECT_TRUE(buffer->isValidSubBufferOffset(region3.origin)); buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); EXPECT_FALSE(buffer->isValidSubBufferOffset(region.origin)); EXPECT_FALSE(buffer->isValidSubBufferOffset(region2.origin)); cl_buffer_region region4 = {1025, 4}; EXPECT_FALSE(buffer->isValidSubBufferOffset(region4.origin)); cl_buffer_region region5 = {1024, 4}; EXPECT_TRUE(buffer->isValidSubBufferOffset(region5.origin)); cl_buffer_region region6 = {127, 4}; EXPECT_FALSE(buffer->isValidSubBufferOffset(region6.origin)); cl_buffer_region region7 = {128, 4}; EXPECT_TRUE(buffer->isValidSubBufferOffset(region7.origin)); cl_buffer_region region8 = {129, 4}; EXPECT_FALSE(buffer->isValidSubBufferOffset(region8.origin)); } TEST_F(SubBufferTest, givenSharingHandlerFromParentBufferWhenCreateThenShareHandler) { cl_buffer_region region = {2, 12}; auto handler = new SharingHandler(); buffer->setSharingHandler(handler); auto subBuffer = buffer->createSubBuffer(CL_MEM_READ_ONLY, 0, ®ion, retVal); ASSERT_NE(nullptr, subBuffer); EXPECT_EQ(subBuffer->getSharingHandler().get(), handler); delete subBuffer; EXPECT_EQ(1, buffer->getRefInternalCount()); } TEST_F(SubBufferTest, GivenBufferWithAlignedHostPtrAndSameMemoryStorageWhenSubBufferIsCreatedThenHostPtrAndMemoryStorageAreOffseted) { cl_buffer_region region = {2, 2}; cl_int retVal = 0; void *alignedPointer = alignedMalloc(MemoryConstants::pageSize, MemoryConstants::preferredAlignment); Buffer *buffer = Buffer::create(&context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_FORCE_SHARED_PHYSICAL_MEMORY_INTEL, MemoryConstants::pageSize, alignedPointer, retVal); ASSERT_NE(nullptr, buffer); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(alignedPointer, buffer->getHostPtr()); EXPECT_EQ(alignedPointer, buffer->getCpuAddress()); auto subBuffer = buffer->createSubBuffer(CL_MEM_READ_ONLY, 0, ®ion, retVal); EXPECT_NE(nullptr, subBuffer); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(ptrOffset(alignedPointer, 2), subBuffer->getHostPtr()); EXPECT_EQ(ptrOffset(alignedPointer, 2), subBuffer->getCpuAddress()); subBuffer->release(); buffer->release(); alignedFree(alignedPointer); } TEST_F(SubBufferTest, GivenBufferWithMemoryStorageAndNullHostPtrWhenSubBufferIsCreatedThenMemoryStorageIsOffsetedAndHostPtrIsNull) { cl_buffer_region region = {2, 2}; cl_int retVal = 0; Buffer *buffer = Buffer::create(&context, CL_MEM_READ_WRITE, MemoryConstants::pageSize, nullptr, retVal); ASSERT_NE(nullptr, buffer); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, buffer->getHostPtr()); EXPECT_NE(nullptr, buffer->getCpuAddress()); auto subBuffer = buffer->createSubBuffer(CL_MEM_READ_ONLY, 0, ®ion, retVal); EXPECT_NE(nullptr, subBuffer); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, subBuffer->getHostPtr()); EXPECT_EQ(ptrOffset(buffer->getCpuAddress(), 2), subBuffer->getCpuAddress()); subBuffer->release(); buffer->release(); } TEST_F(SubBufferTest, givenBufferWithHostPtrWhenSubbufferGetsMapPtrThenExpectBufferHostPtr) { cl_buffer_region region = {0, 16}; auto subBuffer = buffer->createSubBuffer(CL_MEM_READ_WRITE, 0, ®ion, retVal); ASSERT_NE(nullptr, subBuffer); ASSERT_EQ(CL_SUCCESS, retVal); void *mapPtr = subBuffer->getBasePtrForMap(0); EXPECT_EQ(pHostPtr, mapPtr); mapPtr = subBuffer->getBasePtrForMap(0); EXPECT_EQ(pHostPtr, mapPtr); subBuffer->release(); } TEST_F(SubBufferTest, givenBufferWithNoHostPtrWhenSubbufferGetsMapPtrThenExpectBufferMap) { cl_buffer_region region = {0, 16}; Buffer *buffer = Buffer::create(&context, CL_MEM_READ_WRITE, MemoryConstants::pageSize, nullptr, retVal); ASSERT_NE(nullptr, buffer); ASSERT_EQ(CL_SUCCESS, retVal); auto subBuffer = buffer->createSubBuffer(CL_MEM_READ_WRITE, 0, ®ion, retVal); ASSERT_NE(nullptr, subBuffer); ASSERT_EQ(CL_SUCCESS, retVal); void *mapPtr = subBuffer->getBasePtrForMap(0); void *bufferMapPtr = buffer->getBasePtrForMap(0); EXPECT_EQ(bufferMapPtr, mapPtr); auto mapAllocation = subBuffer->getMapAllocation(); auto bufferMapAllocation = buffer->getMapAllocation(); ASSERT_NE(nullptr, bufferMapAllocation); EXPECT_EQ(bufferMapAllocation, mapAllocation); EXPECT_EQ(bufferMapPtr, mapAllocation->getUnderlyingBuffer()); mapPtr = subBuffer->getBasePtrForMap(0); EXPECT_EQ(bufferMapPtr, mapPtr); subBuffer->release(); buffer->release(); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/mem_obj/zero_copy_tests.cpp000066400000000000000000000216201363734646600272020ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; class ZeroCopyBufferTest : public DeviceFixture, public testing::TestWithParam> { public: ZeroCopyBufferTest() { } protected: void SetUp() override { size_t sizeToAlloc; size_t alignment; host_ptr = nullptr; std::tie(flags, sizeToAlloc, alignment, size, ShouldBeZeroCopy, MisalignPointer) = GetParam(); if (sizeToAlloc > 0) { host_ptr = (void *)alignedMalloc(sizeToAlloc, alignment); } DeviceFixture::SetUp(); } void TearDown() override { DeviceFixture::TearDown(); alignedFree(host_ptr); } cl_int retVal = CL_SUCCESS; MockContext context; cl_mem_flags flags = 0; void *host_ptr; bool ShouldBeZeroCopy; cl_int size; bool MisalignPointer; }; static const int Multiplier = 1000; static const int CacheLinedAlignedSize = MemoryConstants::cacheLineSize * Multiplier; static const int CacheLinedMisAlignedSize = CacheLinedAlignedSize - 1; static const int PageAlignSize = MemoryConstants::preferredAlignment * Multiplier; // clang-format off //flags, size to alloc, alignment, size, ZeroCopy, MisalignPointer std::tuple Inputs[] = {std::make_tuple((cl_mem_flags)CL_MEM_USE_HOST_PTR, CacheLinedMisAlignedSize, MemoryConstants::preferredAlignment, CacheLinedMisAlignedSize, false, true), std::make_tuple((cl_mem_flags)CL_MEM_USE_HOST_PTR, CacheLinedAlignedSize, MemoryConstants::preferredAlignment, CacheLinedAlignedSize, false, true), std::make_tuple((cl_mem_flags)CL_MEM_USE_HOST_PTR, CacheLinedAlignedSize, MemoryConstants::preferredAlignment, CacheLinedAlignedSize, true, false), std::make_tuple((cl_mem_flags)CL_MEM_USE_HOST_PTR, CacheLinedMisAlignedSize, MemoryConstants::preferredAlignment, CacheLinedMisAlignedSize, false, false), std::make_tuple((cl_mem_flags)CL_MEM_USE_HOST_PTR, PageAlignSize, MemoryConstants::preferredAlignment, PageAlignSize, true, false), std::make_tuple((cl_mem_flags)CL_MEM_USE_HOST_PTR, CacheLinedMisAlignedSize, MemoryConstants::cacheLineSize, CacheLinedAlignedSize, true, false), std::make_tuple((cl_mem_flags)CL_MEM_COPY_HOST_PTR, CacheLinedMisAlignedSize, MemoryConstants::preferredAlignment, CacheLinedMisAlignedSize, true, true), std::make_tuple((cl_mem_flags)CL_MEM_COPY_HOST_PTR, CacheLinedMisAlignedSize, MemoryConstants::preferredAlignment, CacheLinedMisAlignedSize, true, false), std::make_tuple((cl_mem_flags)NULL, 0, 0, CacheLinedMisAlignedSize, true, false), std::make_tuple((cl_mem_flags)NULL, 0, 0, CacheLinedAlignedSize, true, true)}; // clang-format on TEST_P(ZeroCopyBufferTest, CheckCacheAlignedPointerResultsInZeroCopy) { char *PassedPtr = (char *)host_ptr; //misalign the pointer if (MisalignPointer && PassedPtr) { PassedPtr += 1; } auto buffer = Buffer::create( &context, flags, size, PassedPtr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(ShouldBeZeroCopy, buffer->isMemObjZeroCopy()) << "Zero Copy not handled properly"; if (!ShouldBeZeroCopy && flags & CL_MEM_USE_HOST_PTR) { EXPECT_NE(buffer->getCpuAddress(), host_ptr); } EXPECT_NE(nullptr, buffer->getCpuAddress()); //check if buffer always have properly aligned storage ( PAGE ) EXPECT_EQ(alignUp(buffer->getCpuAddress(), MemoryConstants::cacheLineSize), buffer->getCpuAddress()); delete buffer; } INSTANTIATE_TEST_CASE_P( ZeroCopyBufferTests, ZeroCopyBufferTest, testing::ValuesIn(Inputs)); TEST(ZeroCopyBufferTestWithSharedContext, GivenContextThatIsSharedWhenAskedForBufferCreationThenAlwaysResultsInZeroCopy) { MockContext context; auto host_ptr = reinterpret_cast(0x1001); auto size = 64; auto retVal = CL_SUCCESS; context.isSharedContext = true; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_USE_HOST_PTR, size, host_ptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(buffer->isMemObjZeroCopy()) << "Zero Copy not handled properly"; if (buffer->getGraphicsAllocation()->is32BitAllocation() == false) { EXPECT_EQ(host_ptr, buffer->getGraphicsAllocation()->getUnderlyingBuffer()); } } TEST(ZeroCopyBufferTestWithSharedContext, GivenContextThatIsSharedAndDisableZeroCopyFlagWhenAskedForBufferCreationThenAlwaysResultsInZeroCopy) { DebugManagerStateRestore stateRestore; DebugManager.flags.DisableZeroCopyForUseHostPtr.set(true); MockContext context; auto host_ptr = reinterpret_cast(0x1001); auto size = 64; auto retVal = CL_SUCCESS; context.isSharedContext = true; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_USE_HOST_PTR, size, host_ptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(buffer->isMemObjZeroCopy()); } TEST(ZeroCopyWithDebugFlag, GivenInputsThatWouldResultInZeroCopyAndUseHostptrDisableZeroCopyFlagWhenBufferIsCreatedThenNonZeroCopyBufferIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.DisableZeroCopyForUseHostPtr.set(true); MockContext context; auto host_ptr = alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize); auto size = MemoryConstants::pageSize; auto retVal = CL_SUCCESS; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_USE_HOST_PTR, size, host_ptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(buffer->isMemObjZeroCopy()); alignedFree(host_ptr); } TEST(ZeroCopyWithDebugFlag, GivenInputsThatWouldResultInZeroCopyAndDisableZeroCopyFlagWhenBufferIsCreatedThenNonZeroCopyBufferIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.DisableZeroCopyForBuffers.set(true); MockContext context; auto host_ptr = alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize); auto size = MemoryConstants::pageSize; auto retVal = CL_SUCCESS; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_USE_HOST_PTR, size, host_ptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(buffer->isMemObjZeroCopy()); EXPECT_FALSE(buffer->mappingOnCpuAllowed()); alignedFree(host_ptr); } TEST(ZeroCopyWithDebugFlag, GivenBufferInputsThatWouldResultInZeroCopyAndDisableZeroCopyFlagWhenBufferIsCreatedThenNonZeroCopyBufferIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.DisableZeroCopyForBuffers.set(true); MockContext context; auto retVal = CL_SUCCESS; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_ALLOC_HOST_PTR, MemoryConstants::pageSize, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(buffer->isMemObjZeroCopy()); EXPECT_FALSE(buffer->mappingOnCpuAllowed()); EXPECT_EQ(nullptr, buffer->getHostPtr()); EXPECT_EQ(nullptr, buffer->getAllocatedMapPtr()); auto bufferAllocation = buffer->getGraphicsAllocation()->getUnderlyingBuffer(); auto mapAllocation = buffer->getBasePtrForMap(0); EXPECT_EQ(mapAllocation, buffer->getAllocatedMapPtr()); EXPECT_NE(mapAllocation, bufferAllocation); } TEST(ZeroCopyBufferWith32BitAddressing, GivenDeviceSupporting32BitAddressingWhenAskedForBufferCreationFromHostPtrThenNonZeroCopyBufferIsReturned) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.Force32bitAddressing.set(true); MockContext context; auto host_ptr = (void *)alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize); auto size = MemoryConstants::pageSize; auto retVal = CL_SUCCESS; std::unique_ptr buffer(Buffer::create(&context, CL_MEM_USE_HOST_PTR, size, host_ptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(buffer->isMemObjZeroCopy()); if (is64bit) { EXPECT_TRUE(buffer->getGraphicsAllocation()->is32BitAllocation()); } alignedFree(host_ptr); } compute-runtime-20.13.16352/opencl/test/unit_test/memory_manager/000077500000000000000000000000001363734646600246345ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/memory_manager/CMakeLists.txt000066400000000000000000000047471363734646600274100ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_memory_manager ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/address_mapper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cpu_page_fault_manager_memory_sync_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/deferrable_allocation_deletion_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/deferred_deleter_mt_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/${BRANCH_DIR_SUFFIX}/gfx_partition_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gfx_partition_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/graphics_allocation_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/host_ptr_manager_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/internal_allocation_storage_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/local_memory_usage_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory_manager_multi_device_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory_manager_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/memory_manager_allocate_in_device_pool_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory_manager_allocate_in_device_pool_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/memory_manager_allocate_in_preferred_pool_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory_manager_allocate_in_preferred_pool_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/memory_pool_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/page_table_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/physical_address_allocator_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/surface_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_manager_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_token_tests.cpp ) get_property(NEO_CORE_UNIFIED_MEMORY_TESTS GLOBAL PROPERTY NEO_CORE_UNIFIED_MEMORY_TESTS) list(APPEND IGDRCL_SRCS_tests_memory_manager ${NEO_CORE_UNIFIED_MEMORY_TESTS}) get_property(NEO_CORE_CPU_PAGE_FAULT_MANAGER_TESTS GLOBAL PROPERTY NEO_CORE_CPU_PAGE_FAULT_MANAGER_TESTS) list(APPEND IGDRCL_SRCS_tests_memory_manager ${NEO_CORE_CPU_PAGE_FAULT_MANAGER_TESTS}) if(WIN32) get_property(NEO_CORE_PAGE_FAULT_MANAGER_WINDOWS_TESTS GLOBAL PROPERTY NEO_CORE_PAGE_FAULT_MANAGER_WINDOWS_TESTS) list(APPEND IGDRCL_SRCS_tests_memory_manager ${NEO_CORE_PAGE_FAULT_MANAGER_WINDOWS_TESTS}) endif() if(UNIX) get_property(NEO_CORE_PAGE_FAULT_MANAGER_LINUX_TESTS GLOBAL PROPERTY NEO_CORE_PAGE_FAULT_MANAGER_LINUX_TESTS) list(APPEND IGDRCL_SRCS_tests_memory_manager ${NEO_CORE_PAGE_FAULT_MANAGER_LINUX_TESTS}) endif() target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_memory_manager}) add_subdirectories() compute-runtime-20.13.16352/opencl/test/unit_test/memory_manager/address_mapper_tests.cpp000066400000000000000000000036261363734646600315620ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "opencl/source/memory_manager/address_mapper.h" #include "test.h" #include "gtest/gtest.h" #include using namespace NEO; class AddressMapperFixture { public: void SetUp() { mapper = new AddressMapper(); } void TearDown() { delete mapper; } AddressMapper *mapper; }; typedef Test AddressMapperTests; TEST_F(AddressMapperTests, mapAlignedPointers) { uint32_t m1 = mapper->map((void *)0x1000, MemoryConstants::pageSize); EXPECT_EQ(0x1000u, m1); uint32_t m2 = mapper->map((void *)0x3000, MemoryConstants::pageSize); EXPECT_EQ(0x2000u, m2); uint32_t m3 = mapper->map((void *)0x1000, MemoryConstants::pageSize); EXPECT_EQ(0x1000u, m3); } TEST_F(AddressMapperTests, mapNotAlignedPointers) { void *vm1 = (void *)(0x1100); void *vm2 = (void *)(0x4100); uint32_t m1 = mapper->map(vm1, MemoryConstants::pageSize); EXPECT_EQ(0x1000u, m1); uint32_t m2 = mapper->map(vm2, MemoryConstants::pageSize); EXPECT_EQ(0x3000u, m2); uint32_t m3 = mapper->map(vm1, MemoryConstants::pageSize); EXPECT_EQ(0x1000u, m3); } TEST_F(AddressMapperTests, mapThenResize) { uint32_t m1 = mapper->map((void *)0x1000, MemoryConstants::pageSize); EXPECT_EQ(0x1000u, m1); uint32_t m2 = mapper->map((void *)0x1000, 2 * MemoryConstants::pageSize); EXPECT_EQ(0x2000u, m2); } TEST_F(AddressMapperTests, unmapNotMapped) { mapper->unmap((void *)0x1000); uint32_t m1 = mapper->map((void *)0x2000, MemoryConstants::pageSize); EXPECT_EQ(0x1000u, m1); // no crash expected mapper->unmap((void *)0x1000); mapper->unmap((void *)0x2000); uint32_t m2 = mapper->map((void *)0x2000, MemoryConstants::pageSize); EXPECT_NE(m1, m2); EXPECT_EQ(0x2000u, m2); } cpu_page_fault_manager_memory_sync_tests.cpp000066400000000000000000000034701363734646600356030ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/memory_manager/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/page_fault_manager/cpu_page_fault_manager_tests_fixture.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "gtest/gtest.h" using namespace NEO; struct CommandQueueMock : public MockCommandQueue { cl_int enqueueSVMUnmap(void *svmPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool externalAppCall) override { transferToGpuCalled++; return CL_SUCCESS; } cl_int enqueueSVMMap(cl_bool blockingMap, cl_map_flags mapFlags, void *svmPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool externalAppCall) override { transferToCpuCalled++; passedMapFlags = mapFlags; return CL_SUCCESS; } cl_int finish() override { finishCalled++; return CL_SUCCESS; } int transferToCpuCalled = 0; int transferToGpuCalled = 0; int finishCalled = 0; uint64_t passedMapFlags = 0; }; TEST_F(PageFaultManagerTest, givenUnifiedMemoryAllocWhenSynchronizeMemoryThenEnqueueProperCalls) { void *alloc = reinterpret_cast(0x1); auto cmdQ = std::make_unique(); pageFaultManager->baseCpuTransfer(alloc, 10, cmdQ.get()); EXPECT_EQ(cmdQ->transferToCpuCalled, 1); EXPECT_EQ(cmdQ->transferToGpuCalled, 0); EXPECT_EQ(cmdQ->finishCalled, 0); pageFaultManager->baseGpuTransfer(alloc, cmdQ.get()); EXPECT_EQ(cmdQ->transferToCpuCalled, 1); EXPECT_EQ(cmdQ->transferToGpuCalled, 1); EXPECT_EQ(cmdQ->finishCalled, 1); } deferrable_allocation_deletion_tests.cpp000066400000000000000000000212471363734646600346740ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/memory_manager/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/memory_manager/deferrable_allocation_deletion.h" #include "shared/source/memory_manager/deferred_deleter.h" #include "shared/source/os_interface/os_context.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "test.h" using namespace NEO; struct DeferredDeleterPublic : DeferredDeleter { public: using DeferredDeleter::doWorkInBackground; using DeferredDeleter::queue; using DeferredDeleter::queueMutex; bool shouldStopReached = false; bool allowExit = false; bool shouldStop() override { if (allowExit) { EXPECT_TRUE(queue.peekIsEmpty()); } shouldStopReached = allowExit; return allowExit; } }; struct DeferrableAllocationDeletionTest : ::testing::Test { void SetUp() override { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); memoryManager = new MockMemoryManager(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); device.reset(Device::create(executionEnvironment, 0u)); hwTag = device->getDefaultEngine().commandStreamReceiver->getTagAddress(); defaultOsContextId = device->getDefaultEngine().osContext->getContextId(); asyncDeleter = std::make_unique(); asyncDeleter->addClient(); } void TearDown() override { asyncDeleter->allowExit = true; asyncDeleter->removeClient(); } std::unique_ptr asyncDeleter; MockMemoryManager *memoryManager = nullptr; std::unique_ptr device; uint32_t defaultOsContextId = 0; volatile uint32_t *hwTag = nullptr; }; TEST_F(DeferrableAllocationDeletionTest, givenDeferrableAllocationWhenApplyThenWaitForEachTaskCount) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); allocation->updateTaskCount(1u, defaultOsContextId); *hwTag = 0u; asyncDeleter->deferDeletion(new DeferrableAllocationDeletion(*memoryManager, *allocation)); while (!asyncDeleter->queue.peekIsEmpty()) // wait for async thread to get allocation from queue std::this_thread::yield(); EXPECT_EQ(0u, memoryManager->freeGraphicsMemoryCalled); EXPECT_TRUE(allocation->isUsedByOsContext(defaultOsContextId)); // let async thread exit asyncDeleter->allowExit = true; *hwTag = 1u; // allow to destroy allocation while (!asyncDeleter->shouldStopReached) std::this_thread::yield(); EXPECT_EQ(1u, memoryManager->freeGraphicsMemoryCalled); } HWTEST_F(DeferrableAllocationDeletionTest, givenAllocationUsedByTwoOsContextsWhenApplyDeletionThenWaitForBothContextsAndFlushNotReadyCsr) { auto &nonDefaultCommandStreamReceiver = static_cast &>(*device->commandStreamReceivers[1]); auto nonDefaultOsContextId = nonDefaultCommandStreamReceiver.getOsContext().getContextId(); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); *hwTag = 0u; *nonDefaultCommandStreamReceiver.getTagAddress() = 1u; allocation->updateTaskCount(1u, defaultOsContextId); allocation->updateTaskCount(1u, nonDefaultOsContextId); EXPECT_TRUE(allocation->isUsedByOsContext(defaultOsContextId)); EXPECT_TRUE(allocation->isUsedByOsContext(nonDefaultOsContextId)); EXPECT_EQ(0u, memoryManager->freeGraphicsMemoryCalled); EXPECT_FALSE(device->getUltCommandStreamReceiver().flushBatchedSubmissionsCalled); EXPECT_FALSE(nonDefaultCommandStreamReceiver.flushBatchedSubmissionsCalled); asyncDeleter->deferDeletion(new DeferrableAllocationDeletion(*memoryManager, *allocation)); while (allocation->isUsedByOsContext(nonDefaultOsContextId) && !device->getUltCommandStreamReceiver().flushBatchedSubmissionsCalled) // wait for second context completion signal std::this_thread::yield(); EXPECT_EQ(0u, memoryManager->freeGraphicsMemoryCalled); EXPECT_FALSE(nonDefaultCommandStreamReceiver.flushBatchedSubmissionsCalled); asyncDeleter->allowExit = true; *hwTag = 1u; } TEST_F(DeferrableAllocationDeletionTest, givenNotUsedAllocationWhenApplyDeletionThenDontWait) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); EXPECT_FALSE(allocation->isUsed()); EXPECT_EQ(0u, memoryManager->freeGraphicsMemoryCalled); while (!asyncDeleter->doWorkInBackground) std::this_thread::yield(); //wait for start async thread work std::unique_lock lock(asyncDeleter->queueMutex); asyncDeleter->allowExit = true; lock.unlock(); asyncDeleter->deferDeletion(new DeferrableAllocationDeletion(*memoryManager, *allocation)); while (!asyncDeleter->shouldStopReached) // wait async thread job end std::this_thread::yield(); EXPECT_EQ(1u, memoryManager->freeGraphicsMemoryCalled); } TEST_F(DeferrableAllocationDeletionTest, givenTwoAllocationsUsedByOneOsContextsEnqueuedToAsyncDeleterWhenOneAllocationIsCompletedThenReleaseThatAllocation) { auto allocation1 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); auto allocation2 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); *hwTag = 1u; allocation1->updateTaskCount(2u, defaultOsContextId); allocation2->updateTaskCount(1u, defaultOsContextId); EXPECT_EQ(0u, memoryManager->freeGraphicsMemoryCalled); EXPECT_TRUE(allocation1->isUsed()); EXPECT_TRUE(allocation2->isUsed()); asyncDeleter->deferDeletion(new DeferrableAllocationDeletion(*memoryManager, *allocation1)); asyncDeleter->deferDeletion(new DeferrableAllocationDeletion(*memoryManager, *allocation2)); while (0u == memoryManager->freeGraphicsMemoryCalled) // wait for delete second allocation std::this_thread::yield(); EXPECT_EQ(1u, memoryManager->freeGraphicsMemoryCalled); asyncDeleter->allowExit = true; *hwTag = 2u; } TEST_F(DeferrableAllocationDeletionTest, givenNotCompletedAllocationWhenDeletionIsAppliedThenReturnFalse) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); *hwTag = 0u; allocation->updateTaskCount(1u, defaultOsContextId); EXPECT_EQ(0u, memoryManager->freeGraphicsMemoryCalled); DeferrableAllocationDeletion deletion{*memoryManager, *allocation}; EXPECT_FALSE(deletion.apply()); EXPECT_EQ(0u, memoryManager->freeGraphicsMemoryCalled); *hwTag = 1u; // complete allocation EXPECT_TRUE(deletion.apply()); EXPECT_EQ(1u, memoryManager->freeGraphicsMemoryCalled); } TEST_F(DeferrableAllocationDeletionTest, givenNotUsedAllocationWhenDeletionIsAppliedThenReturnTrue) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); EXPECT_FALSE(allocation->isUsed()); DeferrableAllocationDeletion deletion{*memoryManager, *allocation}; EXPECT_TRUE(deletion.apply()); EXPECT_EQ(1u, memoryManager->freeGraphicsMemoryCalled); } TEST_F(DeferrableAllocationDeletionTest, givenAllocationUsedByUnregisteredEngineWhenDeletionIsAppliedThenReturnTrue) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); allocation->updateTaskCount(2u, defaultOsContextId); EXPECT_TRUE(allocation->isUsed()); DeferrableAllocationDeletion deletion{*memoryManager, *allocation}; device.reset(); ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->rootDeviceEnvironments.clear(); EXPECT_EQ(0u, memoryManager->registeredEngines.size()); EXPECT_TRUE(allocation->isUsed()); memoryManager->freeGraphicsMemoryCalled = 0u; EXPECT_TRUE(deletion.apply()); EXPECT_EQ(1u, memoryManager->freeGraphicsMemoryCalled); } compute-runtime-20.13.16352/opencl/test/unit_test/memory_manager/deferred_deleter_mt_tests.cpp000066400000000000000000000167541363734646600325630ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_deferrable_deletion.h" #include "opencl/test/unit_test/mocks/mock_deferred_deleter.h" #include "gtest/gtest.h" using namespace NEO; TEST(DeferredDeleter, NonCopyable) { EXPECT_FALSE(std::is_move_constructible::value); EXPECT_FALSE(std::is_copy_constructible::value); } TEST(DeferredDeleter, NonAssignable) { EXPECT_FALSE(std::is_move_assignable::value); EXPECT_FALSE(std::is_copy_assignable::value); } struct DeferredDeleterTest : public ::testing::Test { void SetUp() override { deleter.reset(new MockDeferredDeleter()); } void TearDown() override { EXPECT_TRUE(deleter->isQueueEmpty()); EXPECT_EQ(0, deleter->getElementsToRelease()); } void waitForAsyncThread() { while (!deleter->isWorking()) { std::this_thread::yield(); } } MockDeferrableDeletion *createDeletion() { return new MockDeferrableDeletion(); } std::unique_ptr deleter; }; TEST_F(DeferredDeleterTest, initialValues) { EXPECT_EQ(0, deleter->getClientsNum()); EXPECT_FALSE(deleter->isWorking()); EXPECT_FALSE(deleter->isThreadRunning()); EXPECT_EQ(0, deleter->drainCalled); EXPECT_EQ(0, deleter->clearCalled); EXPECT_EQ(0, deleter->areElementsReleasedCalled); EXPECT_EQ(0, deleter->shouldStopCalled); EXPECT_EQ(0, deleter->getElementsToRelease()); EXPECT_TRUE(deleter->isQueueEmpty()); EXPECT_FALSE(deleter->expectDrainCalled); EXPECT_FALSE(deleter->expectedDrainValue); } TEST_F(DeferredDeleterTest, clearQueueWithOnePair) { auto deletion = createDeletion(); deleter->DeferredDeleter::deferDeletion(deletion); EXPECT_FALSE(deleter->isQueueEmpty()); deleter->drain(); EXPECT_TRUE(deleter->isQueueEmpty()); EXPECT_EQ(1, deleter->drainCalled); EXPECT_EQ(1, deleter->clearCalled); } TEST_F(DeferredDeleterTest, addTwoClients) { deleter->DeferredDeleter::addClient(); waitForAsyncThread(); EXPECT_TRUE(deleter->isThreadRunning()); EXPECT_TRUE(deleter->isWorking()); EXPECT_EQ(1, deleter->getClientsNum()); auto threadHandle = deleter->getThreadHandle(); EXPECT_NE(nullptr, threadHandle); deleter->DeferredDeleter::addClient(); EXPECT_TRUE(deleter->isThreadRunning()); EXPECT_TRUE(deleter->isWorking()); EXPECT_EQ(2, deleter->getClientsNum()); EXPECT_EQ(threadHandle, deleter->getThreadHandle()); deleter->forceStop(); EXPECT_FALSE(deleter->isWorking()); EXPECT_FALSE(deleter->isThreadRunning()); } TEST_F(DeferredDeleterTest, addAndRemoveTwoClients) { deleter->DeferredDeleter::addClient(); deleter->DeferredDeleter::addClient(); waitForAsyncThread(); EXPECT_EQ(2, deleter->getClientsNum()); deleter->DeferredDeleter::removeClient(); EXPECT_TRUE(deleter->isThreadRunning()); EXPECT_TRUE(deleter->isWorking()); EXPECT_EQ(1, deleter->getClientsNum()); deleter->allowEarlyStopThread(); deleter->DeferredDeleter::removeClient(); EXPECT_FALSE(deleter->isThreadRunning()); EXPECT_FALSE(deleter->isWorking()); EXPECT_EQ(0, deleter->getClientsNum()); deleter->forceStop(); EXPECT_FALSE(deleter->isThreadRunning()); } TEST_F(DeferredDeleterTest, drainWhenNotWorking) { EXPECT_FALSE(deleter->isWorking()); deleter->drain(); EXPECT_EQ(1, deleter->drainCalled); EXPECT_EQ(1, deleter->clearCalled); EXPECT_EQ(2, deleter->areElementsReleasedCalled); } TEST_F(DeferredDeleterTest, drainWhenWorking) { deleter->DeferredDeleter::addClient(); waitForAsyncThread(); EXPECT_TRUE(deleter->isWorking()); deleter->drain(); EXPECT_EQ(1, deleter->drainCalled); EXPECT_EQ(2, deleter->areElementsReleasedCalled); deleter->forceStop(); } TEST_F(DeferredDeleterTest, stopWhenThreadNotRunning) { auto deletion = createDeletion(); deleter->DeferredDeleter::deferDeletion(deletion); EXPECT_FALSE(deleter->isQueueEmpty()); EXPECT_FALSE(deleter->isThreadRunning()); deleter->expectDrainBlockingValue(false); deleter->forceStop(); EXPECT_TRUE(deleter->isQueueEmpty()); EXPECT_NE(0, deleter->drainCalled); } TEST_F(DeferredDeleterTest, stopWhenThreadRunning) { deleter->DeferredDeleter::addClient(); auto deletion = createDeletion(); deleter->DeferredDeleter::deferDeletion(deletion); EXPECT_TRUE(deleter->isThreadRunning()); deleter->allowEarlyStopThread(); deleter->expectDrainBlockingValue(false); deleter->DeferredDeleter::removeClient(); EXPECT_FALSE(deleter->isThreadRunning()); EXPECT_TRUE(deleter->isQueueEmpty()); EXPECT_NE(0, deleter->drainCalled); } TEST_F(DeferredDeleterTest, asyncThreadWaitsForQueueItem) { deleter->DeferredDeleter::addClient(); waitForAsyncThread(); auto deletion = createDeletion(); deleter->DeferredDeleter::deferDeletion(deletion); EXPECT_TRUE(deleter->isThreadRunning()); EXPECT_TRUE(deleter->isWorking()); deleter->allowEarlyStopThread(); deleter->DeferredDeleter::removeClient(); EXPECT_TRUE(deleter->isQueueEmpty()); } TEST_F(DeferredDeleterTest, asyncThreadClearQueueWithoutWaitingForQueueItem) { auto deletion = createDeletion(); deleter->DeferredDeleter::deferDeletion(deletion); EXPECT_FALSE(deleter->isQueueEmpty()); EXPECT_FALSE(deleter->isThreadRunning()); deleter->DeferredDeleter::addClient(); waitForAsyncThread(); EXPECT_TRUE(deleter->isThreadRunning()); deleter->allowEarlyStopThread(); deleter->DeferredDeleter::removeClient(); EXPECT_TRUE(deleter->isQueueEmpty()); } TEST_F(DeferredDeleterTest, asyncThreadWaitsForQueueItemTwice) { deleter->DeferredDeleter::addClient(); waitForAsyncThread(); auto deletion = createDeletion(); deleter->DeferredDeleter::deferDeletion(deletion); EXPECT_TRUE(deleter->isThreadRunning()); EXPECT_TRUE(deleter->isWorking()); while (deleter->shouldStopCalled == 0) ; EXPECT_TRUE(deleter->isThreadRunning()); EXPECT_TRUE(deleter->isWorking()); auto secondDeletion = createDeletion(); deleter->DeferredDeleter::deferDeletion(secondDeletion); deleter->allowEarlyStopThread(); deleter->DeferredDeleter::removeClient(); EXPECT_TRUE(deleter->isQueueEmpty()); EXPECT_EQ(0, deleter->getElementsToRelease()); } TEST_F(DeferredDeleterTest, checkIfAllElementsAreReleased) { deleter->setElementsToRelease(1); EXPECT_EQ(1, deleter->getElementsToRelease()); EXPECT_FALSE(deleter->baseAreElementsReleased()); deleter->setElementsToRelease(0); EXPECT_EQ(0, deleter->getElementsToRelease()); EXPECT_TRUE(deleter->baseAreElementsReleased()); } TEST_F(DeferredDeleterTest, checkIfThreadShouldStop) { deleter->setDoWorkInBackgroundValue(false); EXPECT_TRUE(deleter->baseShouldStop()); deleter->setDoWorkInBackgroundValue(true); EXPECT_FALSE(deleter->baseShouldStop()); } TEST_F(DeferredDeleterTest, givenDeferredDeleterWhenBlockingDrainIsCalledThenArElementsReleasedIsCalled) { deleter->drain(true); EXPECT_NE(0, deleter->areElementsReleasedCalled); EXPECT_EQ(1, deleter->drainCalled); } TEST_F(DeferredDeleterTest, givenDeferredDeleterWhenNonBlockingDrainIsCalledThenArElementsReleasedIsNotCalled) { deleter->drain(false); EXPECT_EQ(0, deleter->areElementsReleasedCalled); EXPECT_EQ(1, deleter->drainCalled); } compute-runtime-20.13.16352/opencl/test/unit_test/memory_manager/gfx_partition_tests.cpp000066400000000000000000000002431363734646600314360ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/memory_manager/gfx_partition_tests.inl" compute-runtime-20.13.16352/opencl/test/unit_test/memory_manager/gfx_partition_tests.inl000066400000000000000000000160021363734646600314360ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/os_interface/os_memory.h" #include "opencl/test/unit_test/mocks/mock_gfx_partition.h" #include "gtest/gtest.h" using namespace NEO; constexpr size_t reservedCpuAddressRangeSize = is64bit ? (6 * 4 * GB) : 0; constexpr uint64_t sizeHeap32 = 4 * MemoryConstants::gigaByte; void testGfxPartition(MockGfxPartition &gfxPartition, uint64_t gfxBase, uint64_t gfxTop, uint64_t svmTop) { if (svmTop) { // SVM should be initialized EXPECT_TRUE(gfxPartition.heapInitialized(HeapIndex::HEAP_SVM)); EXPECT_EQ(gfxPartition.getHeapBase(HeapIndex::HEAP_SVM), 0ull); EXPECT_EQ(gfxPartition.getHeapSize(HeapIndex::HEAP_SVM), svmTop); EXPECT_EQ(gfxPartition.getHeapLimit(HeapIndex::HEAP_SVM), svmTop - 1); } else { // Limited range EXPECT_FALSE(gfxPartition.heapInitialized(HeapIndex::HEAP_SVM)); } for (auto heap32 : GfxPartition::heap32Names) { EXPECT_TRUE(gfxPartition.heapInitialized(heap32)); EXPECT_TRUE(isAligned(gfxPartition.getHeapBase(heap32))); EXPECT_EQ(gfxPartition.getHeapBase(heap32), gfxBase); EXPECT_EQ(gfxPartition.getHeapSize(heap32), sizeHeap32); gfxBase += sizeHeap32; } uint64_t sizeStandard = (gfxTop - gfxBase) >> 1; EXPECT_TRUE(gfxPartition.heapInitialized(HeapIndex::HEAP_STANDARD)); auto heapStandardBase = gfxPartition.getHeapBase(HeapIndex::HEAP_STANDARD); auto heapStandardSize = gfxPartition.getHeapSize(HeapIndex::HEAP_STANDARD); EXPECT_TRUE(isAligned(heapStandardBase)); EXPECT_EQ(heapStandardBase, gfxBase); EXPECT_EQ(heapStandardSize, sizeStandard); gfxBase += sizeStandard; EXPECT_TRUE(gfxPartition.heapInitialized(HeapIndex::HEAP_STANDARD64KB)); auto heapStandard64KbBase = gfxPartition.getHeapBase(HeapIndex::HEAP_STANDARD64KB); auto heapStandard64KbSize = gfxPartition.getHeapSize(HeapIndex::HEAP_STANDARD64KB); EXPECT_TRUE(isAligned(heapStandard64KbBase)); EXPECT_EQ(heapStandard64KbBase, heapStandardBase + heapStandardSize); EXPECT_EQ(heapStandard64KbSize, heapStandardSize); EXPECT_EQ(heapStandard64KbBase + heapStandard64KbSize, gfxTop); EXPECT_EQ(gfxBase + sizeStandard, gfxTop); size_t sizeSmall = MemoryConstants::pageSize; size_t sizeBig = 4 * MemoryConstants::megaByte + MemoryConstants::pageSize; for (auto heap : MockGfxPartition::allHeapNames) { if (!gfxPartition.heapInitialized(heap)) { EXPECT_TRUE(heap == HeapIndex::HEAP_SVM || heap == HeapIndex::HEAP_EXTENDED); continue; } EXPECT_GT(gfxPartition.getHeapMinimalAddress(heap), gfxPartition.getHeapBase(heap)); EXPECT_EQ(gfxPartition.getHeapMinimalAddress(heap), gfxPartition.getHeapBase(heap) + GfxPartition::heapGranularity); auto ptrBig = gfxPartition.heapAllocate(heap, sizeBig); EXPECT_NE(ptrBig, 0ull); EXPECT_LT(gfxPartition.getHeapBase(heap), ptrBig); EXPECT_EQ(ptrBig, gfxPartition.getHeapBase(heap) + GfxPartition::heapGranularity); gfxPartition.heapFree(heap, ptrBig, sizeBig); auto ptrSmall = gfxPartition.heapAllocate(heap, sizeSmall); EXPECT_NE(ptrSmall, 0ull); EXPECT_LT(gfxPartition.getHeapBase(heap), ptrSmall); EXPECT_GT(gfxPartition.getHeapLimit(heap), ptrSmall); EXPECT_EQ(ptrSmall, gfxPartition.getHeapBase(heap) + gfxPartition.getHeapSize(heap) - GfxPartition::heapGranularity - sizeSmall); gfxPartition.heapFree(heap, ptrSmall, sizeSmall); } } TEST(GfxPartitionTest, testGfxPartitionFullRange48BitSVM) { MockGfxPartition gfxPartition; gfxPartition.init(maxNBitValue(48), reservedCpuAddressRangeSize, 0, 1); uint64_t gfxTop = maxNBitValue(48) + 1; uint64_t gfxBase = MemoryConstants::maxSvmAddress + 1; testGfxPartition(gfxPartition, gfxBase, gfxTop, gfxBase); } TEST(GfxPartitionTest, testGfxPartitionFullRange47BitSVM) { MockGfxPartition gfxPartition; gfxPartition.init(maxNBitValue(47), reservedCpuAddressRangeSize, 0, 1); uint64_t gfxBase = is32bit ? MemoryConstants::maxSvmAddress + 1 : (uint64_t)gfxPartition.getReservedCpuAddressRange(); uint64_t gfxTop = is32bit ? maxNBitValue(47) + 1 : gfxBase + gfxPartition.getReservedCpuAddressRangeSize(); uint64_t svmTop = MemoryConstants::maxSvmAddress + 1; testGfxPartition(gfxPartition, gfxBase, gfxTop, svmTop); } TEST(GfxPartitionTest, testGfxPartitionLimitedRange) { MockGfxPartition gfxPartition; gfxPartition.init(maxNBitValue(47 - 1), reservedCpuAddressRangeSize, 0, 1); uint64_t gfxBase = is32bit ? MemoryConstants::maxSvmAddress + 1 : 0ull; uint64_t gfxTop = maxNBitValue(47 - 1) + 1; uint64_t svmTop = gfxBase; testGfxPartition(gfxPartition, gfxBase, gfxTop, svmTop); } TEST(GfxPartitionTest, testGfxPartitionUnsupportedRange) { if (is32bit) { GTEST_SKIP(); } MockGfxPartition gfxPartition; EXPECT_THROW(gfxPartition.init(maxNBitValue(48 + 1), reservedCpuAddressRangeSize, 0, 1), std::exception); } TEST(GfxPartitionTest, testGfxPartitionFullRange48BitSVMHeap64KBSplit) { uint32_t rootDeviceIndex = 3; size_t numRootDevices = 5; MockGfxPartition gfxPartition; gfxPartition.init(maxNBitValue(48), reservedCpuAddressRangeSize, rootDeviceIndex, numRootDevices); uint64_t gfxBase = is32bit ? MemoryConstants::maxSvmAddress + 1 : maxNBitValue(48 - 1) + 1; uint64_t gfxTop = maxNBitValue(48) + 1; auto heapStandardSize = (gfxTop - gfxBase - 4 * sizeHeap32) / 2; auto heapStandard64KBSize = alignDown(heapStandardSize / numRootDevices, GfxPartition::heapGranularity); EXPECT_EQ(heapStandard64KBSize, gfxPartition.getHeapSize(HeapIndex::HEAP_STANDARD64KB)); EXPECT_EQ(gfxBase + 4 * sizeHeap32 + heapStandardSize + rootDeviceIndex * heapStandard64KBSize, gfxPartition.getHeapBase(HeapIndex::HEAP_STANDARD64KB)); } TEST(GfxPartitionTest, testGfxPartitionFullRange47BitSVMHeap64KBSplit) { uint32_t rootDeviceIndex = 3; size_t numRootDevices = 5; MockGfxPartition gfxPartition; gfxPartition.init(maxNBitValue(47), reservedCpuAddressRangeSize, rootDeviceIndex, numRootDevices); uint64_t gfxBase = is32bit ? MemoryConstants::maxSvmAddress + 1 : (uint64_t)gfxPartition.getReservedCpuAddressRange(); uint64_t gfxTop = is32bit ? maxNBitValue(47) + 1 : gfxBase + gfxPartition.getReservedCpuAddressRangeSize(); auto heapStandardSize = ((gfxTop - gfxBase) - 4 * sizeHeap32) / 2; auto heapStandard64KBSize = alignDown(heapStandardSize / numRootDevices, GfxPartition::heapGranularity); EXPECT_EQ(heapStandard64KBSize, gfxPartition.getHeapSize(HeapIndex::HEAP_STANDARD64KB)); EXPECT_EQ(gfxBase + 4 * sizeHeap32 + heapStandardSize + rootDeviceIndex * heapStandard64KBSize, gfxPartition.getHeapBase(HeapIndex::HEAP_STANDARD64KB)); } compute-runtime-20.13.16352/opencl/test/unit_test/memory_manager/graphics_allocation_tests.cpp000066400000000000000000000217571363734646600326030ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "gtest/gtest.h" using namespace NEO; TEST(GraphicsAllocationTest, givenGraphicsAllocationWhenIsCreatedThenAllInspectionIdsAreSetToZero) { MockGraphicsAllocation graphicsAllocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, 0u, 0u, 0, MemoryPool::MemoryNull); for (auto i = 0u; i < MemoryManager::maxOsContextCount; i++) { EXPECT_EQ(0u, graphicsAllocation.getInspectionId(i)); } } TEST(GraphicsAllocationTest, givenGraphicsAllocationWhenIsCreatedThenTaskCountsAreInitializedProperly) { GraphicsAllocation graphicsAllocation1(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, 0u, 0u, 0, MemoryPool::MemoryNull); GraphicsAllocation graphicsAllocation2(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, 0u, 0u, 0, MemoryPool::MemoryNull); for (auto i = 0u; i < MemoryManager::maxOsContextCount; i++) { EXPECT_EQ(MockGraphicsAllocation::objectNotUsed, graphicsAllocation1.getTaskCount(i)); EXPECT_EQ(MockGraphicsAllocation::objectNotUsed, graphicsAllocation2.getTaskCount(i)); EXPECT_EQ(MockGraphicsAllocation::objectNotResident, graphicsAllocation1.getResidencyTaskCount(i)); EXPECT_EQ(MockGraphicsAllocation::objectNotResident, graphicsAllocation2.getResidencyTaskCount(i)); } } TEST(GraphicsAllocationTest, givenGraphicsAllocationWhenUpdatedTaskCountThenAllocationWasUsed) { MockGraphicsAllocation graphicsAllocation; EXPECT_FALSE(graphicsAllocation.isUsed()); graphicsAllocation.updateTaskCount(0u, 0u); EXPECT_TRUE(graphicsAllocation.isUsed()); } TEST(GraphicsAllocationTest, givenGraphicsAllocationWhenUpdatedTaskCountThenOnlyOneTaskCountIsUpdated) { MockGraphicsAllocation graphicsAllocation; graphicsAllocation.updateTaskCount(1u, 0u); EXPECT_EQ(1u, graphicsAllocation.getTaskCount(0u)); for (auto i = 1u; i < MemoryManager::maxOsContextCount; i++) { EXPECT_EQ(MockGraphicsAllocation::objectNotUsed, graphicsAllocation.getTaskCount(i)); } graphicsAllocation.updateTaskCount(2u, 1u); EXPECT_EQ(1u, graphicsAllocation.getTaskCount(0u)); EXPECT_EQ(2u, graphicsAllocation.getTaskCount(1u)); for (auto i = 2u; i < MemoryManager::maxOsContextCount; i++) { EXPECT_EQ(MockGraphicsAllocation::objectNotUsed, graphicsAllocation.getTaskCount(i)); } } TEST(GraphicsAllocationTest, givenGraphicsAllocationWhenUpdatedTaskCountToobjectNotUsedValueThenUnregisterContext) { MockGraphicsAllocation graphicsAllocation; EXPECT_FALSE(graphicsAllocation.isUsed()); graphicsAllocation.updateTaskCount(0u, 0u); EXPECT_TRUE(graphicsAllocation.isUsed()); graphicsAllocation.updateTaskCount(MockGraphicsAllocation::objectNotUsed, 0u); EXPECT_FALSE(graphicsAllocation.isUsed()); } TEST(GraphicsAllocationTest, whenTwoContextsUpdatedTaskCountAndOneOfThemUnregisteredThenOneContextUsageRemains) { MockGraphicsAllocation graphicsAllocation; EXPECT_FALSE(graphicsAllocation.isUsed()); graphicsAllocation.updateTaskCount(0u, 0u); graphicsAllocation.updateTaskCount(0u, 1u); EXPECT_TRUE(graphicsAllocation.isUsed()); graphicsAllocation.updateTaskCount(MockGraphicsAllocation::objectNotUsed, 0u); EXPECT_TRUE(graphicsAllocation.isUsed()); graphicsAllocation.updateTaskCount(MockGraphicsAllocation::objectNotUsed, 0u); EXPECT_TRUE(graphicsAllocation.isUsed()); graphicsAllocation.updateTaskCount(MockGraphicsAllocation::objectNotUsed, 1u); EXPECT_FALSE(graphicsAllocation.isUsed()); } TEST(GraphicsAllocationTest, givenGraphicsAllocationWhenUpdatedResidencyTaskCountToNonDefaultValueThenAllocationIsResident) { MockGraphicsAllocation graphicsAllocation; EXPECT_FALSE(graphicsAllocation.isResident(0u)); uint32_t residencyTaskCount = 1u; graphicsAllocation.updateResidencyTaskCount(residencyTaskCount, 0u); EXPECT_EQ(residencyTaskCount, graphicsAllocation.getResidencyTaskCount(0u)); EXPECT_TRUE(graphicsAllocation.isResident(0u)); graphicsAllocation.updateResidencyTaskCount(MockGraphicsAllocation::objectNotResident, 0u); EXPECT_EQ(MockGraphicsAllocation::objectNotResident, graphicsAllocation.getResidencyTaskCount(0u)); EXPECT_FALSE(graphicsAllocation.isResident(0u)); } TEST(GraphicsAllocationTest, givenResidentGraphicsAllocationWhenResetResidencyTaskCountThenAllocationIsNotResident) { MockGraphicsAllocation graphicsAllocation; graphicsAllocation.updateResidencyTaskCount(1u, 0u); EXPECT_TRUE(graphicsAllocation.isResident(0u)); graphicsAllocation.releaseResidencyInOsContext(0u); EXPECT_FALSE(graphicsAllocation.isResident(0u)); } TEST(GraphicsAllocationTest, givenNonResidentGraphicsAllocationWhenCheckIfResidencyTaskCountIsBelowAnyValueThenReturnTrue) { MockGraphicsAllocation graphicsAllocation; EXPECT_FALSE(graphicsAllocation.isResident(0u)); EXPECT_TRUE(graphicsAllocation.isResidencyTaskCountBelow(0u, 0u)); } TEST(GraphicsAllocationTest, givenResidentGraphicsAllocationWhenCheckIfResidencyTaskCountIsBelowCurrentResidencyTaskCountThenReturnFalse) { MockGraphicsAllocation graphicsAllocation; auto currentResidencyTaskCount = 1u; graphicsAllocation.updateResidencyTaskCount(currentResidencyTaskCount, 0u); EXPECT_TRUE(graphicsAllocation.isResident(0u)); EXPECT_FALSE(graphicsAllocation.isResidencyTaskCountBelow(currentResidencyTaskCount, 0u)); } TEST(GraphicsAllocationTest, givenResidentGraphicsAllocationWhenCheckIfResidencyTaskCountIsBelowHigherThanCurrentResidencyTaskCountThenReturnTrue) { MockGraphicsAllocation graphicsAllocation; auto currentResidencyTaskCount = 1u; graphicsAllocation.updateResidencyTaskCount(currentResidencyTaskCount, 0u); EXPECT_TRUE(graphicsAllocation.isResident(0u)); EXPECT_TRUE(graphicsAllocation.isResidencyTaskCountBelow(currentResidencyTaskCount + 1u, 0u)); } TEST(GraphicsAllocationTest, whenAllocationTypeIsCommandBufferThenCpuAccessIsRequired) { EXPECT_TRUE(GraphicsAllocation::isCpuAccessRequired(GraphicsAllocation::AllocationType::COMMAND_BUFFER)); } TEST(GraphicsAllocationTest, whenAllocationTypeIsConstantSurfaceThenCpuAccessIsRequired) { EXPECT_TRUE(GraphicsAllocation::isCpuAccessRequired(GraphicsAllocation::AllocationType::CONSTANT_SURFACE)); } TEST(GraphicsAllocationTest, whenAllocationTypeIsGlobalSurfaceThenCpuAccessIsRequired) { EXPECT_TRUE(GraphicsAllocation::isCpuAccessRequired(GraphicsAllocation::AllocationType::GLOBAL_SURFACE)); } TEST(GraphicsAllocationTest, whenAllocationTypeIsInternalHeapThenCpuAccessIsRequired) { EXPECT_TRUE(GraphicsAllocation::isCpuAccessRequired(GraphicsAllocation::AllocationType::INTERNAL_HEAP)); } TEST(GraphicsAllocationTest, whenAllocationTypeIsKernelIsaThenCpuAccessIsNotRequired) { EXPECT_FALSE(GraphicsAllocation::isCpuAccessRequired(GraphicsAllocation::AllocationType::KERNEL_ISA)); } TEST(GraphicsAllocationTest, whenAllocationTypeIsLinearStreamThenCpuAccessIsRequired) { EXPECT_TRUE(GraphicsAllocation::isCpuAccessRequired(GraphicsAllocation::AllocationType::LINEAR_STREAM)); } TEST(GraphicsAllocationTest, whenAllocationTypeIsPipeThenCpuAccessIsRequired) { EXPECT_TRUE(GraphicsAllocation::isCpuAccessRequired(GraphicsAllocation::AllocationType::PIPE)); } TEST(GraphicsAllocationTest, whenAllocationTypeIsTimestampPacketThenCpuAccessIsRequired) { EXPECT_TRUE(GraphicsAllocation::isCpuAccessRequired(GraphicsAllocation::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER)); } TEST(GraphicsAllocationTest, givenDefaultAllocationWhenGettingNumHandlesThenOneIsReturned) { MockGraphicsAllocation graphicsAllocation; EXPECT_EQ(1u, graphicsAllocation.getNumHandles()); } TEST(GraphicsAllocationTest, givenDefaultGraphicsAllocationWhenInternalHandleIsBeingObtainedThenZeroIsReturned) { MockGraphicsAllocation graphicsAllocation; EXPECT_EQ(0llu, graphicsAllocation.peekInternalHandle(nullptr)); } TEST(GraphicsAllocationTest, givenGraphicsAllocationWhenQueryingUsedPageSizeThenCorrectSizeForMemoryPoolUsedIsReturned) { MemoryPool::Type page4kPools[] = {MemoryPool::MemoryNull, MemoryPool::System4KBPages, MemoryPool::System4KBPagesWith32BitGpuAddressing, MemoryPool::SystemCpuInaccessible}; for (auto pool : page4kPools) { MockGraphicsAllocation graphicsAllocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, 0u, 0u, (osHandle)1, pool); EXPECT_EQ(MemoryConstants::pageSize, graphicsAllocation.getUsedPageSize()); } MemoryPool::Type page64kPools[] = {MemoryPool::System64KBPages, MemoryPool::System64KBPagesWith32BitGpuAddressing, MemoryPool::LocalMemory}; for (auto pool : page64kPools) { MockGraphicsAllocation graphicsAllocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, 0u, 0u, 0, pool); EXPECT_EQ(MemoryConstants::pageSize64k, graphicsAllocation.getUsedPageSize()); } } compute-runtime-20.13.16352/opencl/test/unit_test/memory_manager/host_ptr_manager_tests.cpp000066400000000000000000001323141363734646600321220ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/memory_constants.h" #include "opencl/test/unit_test/fixtures/memory_manager_fixture.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_host_ptr_manager.h" #include "opencl/test/unit_test/mocks/mock_internal_allocation_storage.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "test.h" using namespace NEO; TEST(HostPtrManager, AlignedPointerAndAlignedSizeAskedForAllocationCountReturnsOne) { auto size = MemoryConstants::pageSize * 10; void *ptr = (void *)0x1000; AllocationRequirements reqs = MockHostPtrManager::getAllocationRequirements(ptr, size); EXPECT_EQ(1u, reqs.requiredFragmentsCount); EXPECT_EQ(reqs.allocationFragments[0].fragmentPosition, FragmentPosition::MIDDLE); EXPECT_EQ(reqs.allocationFragments[1].fragmentPosition, FragmentPosition::NONE); EXPECT_EQ(reqs.allocationFragments[2].fragmentPosition, FragmentPosition::NONE); EXPECT_EQ(reqs.totalRequiredSize, size); EXPECT_EQ(ptr, reqs.allocationFragments[0].allocationPtr); EXPECT_EQ(size, reqs.allocationFragments[0].allocationSize); EXPECT_EQ(nullptr, reqs.allocationFragments[1].allocationPtr); EXPECT_EQ(nullptr, reqs.allocationFragments[2].allocationPtr); } TEST(HostPtrManager, AlignedPointerAndNotAlignedSizeAskedForAllocationCountReturnsTwo) { auto size = MemoryConstants::pageSize * 10 - 1; void *ptr = (void *)0x1000; AllocationRequirements reqs = MockHostPtrManager::getAllocationRequirements(ptr, size); EXPECT_EQ(2u, reqs.requiredFragmentsCount); EXPECT_EQ(reqs.allocationFragments[0].fragmentPosition, FragmentPosition::MIDDLE); EXPECT_EQ(reqs.allocationFragments[1].fragmentPosition, FragmentPosition::TRAILING); EXPECT_EQ(reqs.allocationFragments[2].fragmentPosition, FragmentPosition::NONE); EXPECT_EQ(reqs.totalRequiredSize, alignUp(size, MemoryConstants::pageSize)); EXPECT_EQ(ptr, reqs.allocationFragments[0].allocationPtr); EXPECT_EQ(9 * MemoryConstants::pageSize, reqs.allocationFragments[0].allocationSize); auto trailingPtr = alignDown(ptrOffset(ptr, size), MemoryConstants::pageSize); EXPECT_EQ(trailingPtr, reqs.allocationFragments[1].allocationPtr); EXPECT_EQ(MemoryConstants::pageSize, reqs.allocationFragments[1].allocationSize); EXPECT_EQ(nullptr, reqs.allocationFragments[2].allocationPtr); EXPECT_EQ(0u, reqs.allocationFragments[2].allocationSize); } TEST(HostPtrManager, NotAlignedPointerAndNotAlignedSizeAskedForAllocationCountReturnsThree) { auto size = MemoryConstants::pageSize * 10 - 1; void *ptr = (void *)0x1045; AllocationRequirements reqs = MockHostPtrManager::getAllocationRequirements(ptr, size); EXPECT_EQ(3u, reqs.requiredFragmentsCount); EXPECT_EQ(reqs.allocationFragments[0].fragmentPosition, FragmentPosition::LEADING); EXPECT_EQ(reqs.allocationFragments[1].fragmentPosition, FragmentPosition::MIDDLE); EXPECT_EQ(reqs.allocationFragments[2].fragmentPosition, FragmentPosition::TRAILING); auto leadingPtr = (void *)0x1000; auto middlePtr = (void *)0x2000; auto trailingPtr = (void *)0xb000; EXPECT_EQ(reqs.totalRequiredSize, 11 * MemoryConstants::pageSize); EXPECT_EQ(leadingPtr, reqs.allocationFragments[0].allocationPtr); EXPECT_EQ(MemoryConstants::pageSize, reqs.allocationFragments[0].allocationSize); EXPECT_EQ(middlePtr, reqs.allocationFragments[1].allocationPtr); EXPECT_EQ(9 * MemoryConstants::pageSize, reqs.allocationFragments[1].allocationSize); EXPECT_EQ(trailingPtr, reqs.allocationFragments[2].allocationPtr); EXPECT_EQ(MemoryConstants::pageSize, reqs.allocationFragments[2].allocationSize); } TEST(HostPtrManager, NotAlignedPointerAndNotAlignedSizeWithinOnePageAskedForAllocationCountReturnsOne) { auto size = 200; void *ptr = (void *)0x1045; AllocationRequirements reqs = MockHostPtrManager::getAllocationRequirements(ptr, size); EXPECT_EQ(1u, reqs.requiredFragmentsCount); EXPECT_EQ(reqs.allocationFragments[0].fragmentPosition, FragmentPosition::LEADING); EXPECT_EQ(reqs.allocationFragments[1].fragmentPosition, FragmentPosition::NONE); EXPECT_EQ(reqs.allocationFragments[2].fragmentPosition, FragmentPosition::NONE); auto leadingPtr = (void *)0x1000; EXPECT_EQ(reqs.totalRequiredSize, MemoryConstants::pageSize); EXPECT_EQ(leadingPtr, reqs.allocationFragments[0].allocationPtr); EXPECT_EQ(MemoryConstants::pageSize, reqs.allocationFragments[0].allocationSize); EXPECT_EQ(nullptr, reqs.allocationFragments[1].allocationPtr); EXPECT_EQ(0u, reqs.allocationFragments[1].allocationSize); EXPECT_EQ(nullptr, reqs.allocationFragments[2].allocationPtr); EXPECT_EQ(0u, reqs.allocationFragments[2].allocationSize); } TEST(HostPtrManager, NotAlignedPointerAndNotAlignedSizeWithinTwoPagesAskedForAllocationCountReturnsTwo) { auto size = MemoryConstants::pageSize; void *ptr = (void *)0x1045; AllocationRequirements reqs = MockHostPtrManager::getAllocationRequirements(ptr, size); EXPECT_EQ(2u, reqs.requiredFragmentsCount); EXPECT_EQ(reqs.allocationFragments[0].fragmentPosition, FragmentPosition::LEADING); EXPECT_EQ(reqs.allocationFragments[1].fragmentPosition, FragmentPosition::TRAILING); EXPECT_EQ(reqs.allocationFragments[2].fragmentPosition, FragmentPosition::NONE); auto leadingPtr = (void *)0x1000; auto trailingPtr = (void *)0x2000; EXPECT_EQ(reqs.totalRequiredSize, 2 * MemoryConstants::pageSize); EXPECT_EQ(leadingPtr, reqs.allocationFragments[0].allocationPtr); EXPECT_EQ(MemoryConstants::pageSize, reqs.allocationFragments[0].allocationSize); EXPECT_EQ(trailingPtr, reqs.allocationFragments[1].allocationPtr); EXPECT_EQ(MemoryConstants::pageSize, reqs.allocationFragments[1].allocationSize); EXPECT_EQ(nullptr, reqs.allocationFragments[2].allocationPtr); EXPECT_EQ(0u, reqs.allocationFragments[2].allocationSize); } TEST(HostPtrManager, AlignedPointerAndAlignedSizeOfOnePageAskedForAllocationCountReturnsMiddleOnly) { auto size = MemoryConstants::pageSize * 10; void *ptr = (void *)0x1000; AllocationRequirements reqs = MockHostPtrManager::getAllocationRequirements(ptr, size); EXPECT_EQ(1u, reqs.requiredFragmentsCount); EXPECT_EQ(reqs.allocationFragments[0].fragmentPosition, FragmentPosition::MIDDLE); EXPECT_EQ(reqs.allocationFragments[1].fragmentPosition, FragmentPosition::NONE); EXPECT_EQ(reqs.allocationFragments[2].fragmentPosition, FragmentPosition::NONE); auto middlePtr = (void *)0x1000; EXPECT_EQ(reqs.totalRequiredSize, 10 * MemoryConstants::pageSize); EXPECT_EQ(middlePtr, reqs.allocationFragments[0].allocationPtr); EXPECT_EQ(10 * MemoryConstants::pageSize, reqs.allocationFragments[0].allocationSize); EXPECT_EQ(nullptr, reqs.allocationFragments[1].allocationPtr); EXPECT_EQ(0u, reqs.allocationFragments[1].allocationSize); EXPECT_EQ(nullptr, reqs.allocationFragments[2].allocationPtr); EXPECT_EQ(0u, reqs.allocationFragments[2].allocationSize); } TEST(HostPtrManager, NotAlignedPointerAndSizeThatFitsToPageAskedForAllocationCountReturnsMiddleAndLeading) { auto size = MemoryConstants::pageSize * 10 - 1; void *ptr = (void *)0x1001; AllocationRequirements reqs = MockHostPtrManager::getAllocationRequirements(ptr, size); EXPECT_EQ(2u, reqs.requiredFragmentsCount); EXPECT_EQ(reqs.allocationFragments[0].fragmentPosition, FragmentPosition::LEADING); EXPECT_EQ(reqs.allocationFragments[1].fragmentPosition, FragmentPosition::MIDDLE); EXPECT_EQ(reqs.allocationFragments[2].fragmentPosition, FragmentPosition::NONE); auto leadingPtr = (void *)0x1000; auto middlePtr = (void *)0x2000; EXPECT_EQ(reqs.totalRequiredSize, 10 * MemoryConstants::pageSize); EXPECT_EQ(leadingPtr, reqs.allocationFragments[0].allocationPtr); EXPECT_EQ(MemoryConstants::pageSize, reqs.allocationFragments[0].allocationSize); EXPECT_EQ(middlePtr, reqs.allocationFragments[1].allocationPtr); EXPECT_EQ(9 * MemoryConstants::pageSize, reqs.allocationFragments[1].allocationSize); EXPECT_EQ(nullptr, reqs.allocationFragments[2].allocationPtr); EXPECT_EQ(0u, reqs.allocationFragments[2].allocationSize); } TEST(HostPtrManager, AlignedPointerAndPageSizeAskedForAllocationCountRetrunsMiddle) { auto size = MemoryConstants::pageSize; void *ptr = (void *)0x1000; AllocationRequirements reqs = MockHostPtrManager::getAllocationRequirements(ptr, size); EXPECT_EQ(1u, reqs.requiredFragmentsCount); EXPECT_EQ(reqs.allocationFragments[0].fragmentPosition, FragmentPosition::MIDDLE); EXPECT_EQ(reqs.allocationFragments[1].fragmentPosition, FragmentPosition::NONE); EXPECT_EQ(reqs.allocationFragments[2].fragmentPosition, FragmentPosition::NONE); auto middlePtr = (void *)0x1000; EXPECT_EQ(reqs.totalRequiredSize, MemoryConstants::pageSize); EXPECT_EQ(middlePtr, reqs.allocationFragments[0].allocationPtr); EXPECT_EQ(MemoryConstants::pageSize, reqs.allocationFragments[0].allocationSize); EXPECT_EQ(nullptr, reqs.allocationFragments[1].allocationPtr); EXPECT_EQ(0u, reqs.allocationFragments[1].allocationSize); EXPECT_EQ(nullptr, reqs.allocationFragments[2].allocationPtr); EXPECT_EQ(0u, reqs.allocationFragments[2].allocationSize); } TEST(HostPtrManager, AllocationRequirementsForMiddleAllocationThatIsNotStoredInManagerAskedForGraphicsAllocationReturnsNotAvailable) { auto size = MemoryConstants::pageSize; void *ptr = (void *)0x1000; auto reqs = MockHostPtrManager::getAllocationRequirements(ptr, size); MockHostPtrManager hostPtrManager; auto gpuAllocationFragments = hostPtrManager.populateAlreadyAllocatedFragments(reqs); EXPECT_EQ(nullptr, gpuAllocationFragments.fragmentStorageData[0].osHandleStorage); EXPECT_EQ(ptr, gpuAllocationFragments.fragmentStorageData[0].cpuPtr); EXPECT_EQ(nullptr, gpuAllocationFragments.fragmentStorageData[1].osHandleStorage); EXPECT_EQ(nullptr, gpuAllocationFragments.fragmentStorageData[1].cpuPtr); EXPECT_EQ(nullptr, gpuAllocationFragments.fragmentStorageData[2].osHandleStorage); EXPECT_EQ(nullptr, gpuAllocationFragments.fragmentStorageData[2].cpuPtr); } TEST(HostPtrManager, AllocationRequirementsForMiddleAllocationThatIsStoredInManagerAskedForGraphicsAllocationReturnsProperAllocationAndIncreasesRefCount) { MockHostPtrManager hostPtrManager; FragmentStorage allocationFragment; auto cpuPtr = (void *)0x1000; auto ptrSize = MemoryConstants::pageSize; auto osInternalStorage = (OsHandle *)0x12312; allocationFragment.fragmentCpuPointer = cpuPtr; allocationFragment.fragmentSize = ptrSize; allocationFragment.osInternalStorage = osInternalStorage; hostPtrManager.storeFragment(allocationFragment); auto reqs = MockHostPtrManager::getAllocationRequirements(cpuPtr, ptrSize); auto gpuAllocationFragments = hostPtrManager.populateAlreadyAllocatedFragments(reqs); EXPECT_EQ(osInternalStorage, gpuAllocationFragments.fragmentStorageData[0].osHandleStorage); EXPECT_EQ(cpuPtr, gpuAllocationFragments.fragmentStorageData[0].cpuPtr); EXPECT_EQ(nullptr, gpuAllocationFragments.fragmentStorageData[1].osHandleStorage); EXPECT_EQ(nullptr, gpuAllocationFragments.fragmentStorageData[1].cpuPtr); EXPECT_EQ(nullptr, gpuAllocationFragments.fragmentStorageData[2].osHandleStorage); EXPECT_EQ(nullptr, gpuAllocationFragments.fragmentStorageData[2].cpuPtr); auto fragment = hostPtrManager.getFragment(cpuPtr); EXPECT_EQ(2, fragment->refCount); } TEST(HostPtrManager, AllocationRequirementsForAllocationWithinSizeOfStoredAllocationInManagerAskedForGraphicsAllocationReturnsProperAllocation) { MockHostPtrManager hostPtrManager; FragmentStorage allocationFragment; auto cpuPtr = (void *)0x1000; auto ptrSize = MemoryConstants::pageSize * 10; auto osInternalStorage = (OsHandle *)0x12312; allocationFragment.fragmentCpuPointer = cpuPtr; allocationFragment.fragmentSize = ptrSize; allocationFragment.osInternalStorage = osInternalStorage; hostPtrManager.storeFragment(allocationFragment); auto reqs = MockHostPtrManager::getAllocationRequirements(cpuPtr, MemoryConstants::pageSize); auto gpuAllocationFragments = hostPtrManager.populateAlreadyAllocatedFragments(reqs); EXPECT_EQ(osInternalStorage, gpuAllocationFragments.fragmentStorageData[0].osHandleStorage); EXPECT_EQ(cpuPtr, gpuAllocationFragments.fragmentStorageData[0].cpuPtr); EXPECT_EQ(nullptr, gpuAllocationFragments.fragmentStorageData[1].osHandleStorage); EXPECT_EQ(nullptr, gpuAllocationFragments.fragmentStorageData[1].cpuPtr); EXPECT_EQ(nullptr, gpuAllocationFragments.fragmentStorageData[2].osHandleStorage); EXPECT_EQ(nullptr, gpuAllocationFragments.fragmentStorageData[2].cpuPtr); auto fragment = hostPtrManager.getFragment(cpuPtr); EXPECT_EQ(2, fragment->refCount); } TEST(HostPtrManager, HostPtrAndSizeStoredToHostPtrManagerIncreasesTheContainerCount) { MockHostPtrManager hostPtrManager; FragmentStorage allocationFragment; EXPECT_EQ(allocationFragment.fragmentCpuPointer, nullptr); EXPECT_EQ(allocationFragment.fragmentSize, 0u); EXPECT_EQ(allocationFragment.refCount, 0); hostPtrManager.storeFragment(allocationFragment); EXPECT_EQ(1u, hostPtrManager.getFragmentCount()); } TEST(HostPtrManager, HostPtrAndSizeStoredToHostPtrManagerTwiceReturnsOneAsFragmentCount) { MockHostPtrManager hostPtrManager; FragmentStorage allocationFragment; hostPtrManager.storeFragment(allocationFragment); hostPtrManager.storeFragment(allocationFragment); EXPECT_EQ(1u, hostPtrManager.getFragmentCount()); } TEST(HostPtrManager, EmptyHostPtrManagerAskedForFragmentReturnsNullptr) { MockHostPtrManager hostPtrManager; auto fragment = hostPtrManager.getFragment((void *)0x10121); EXPECT_EQ(nullptr, fragment); EXPECT_EQ(0u, hostPtrManager.getFragmentCount()); } TEST(HostPtrManager, NonEmptyHostPtrManagerAskedForFragmentReturnsProperFragmentWithRefCountOne) { MockHostPtrManager hostPtrManager; FragmentStorage fragment; void *cpuPtr = (void *)0x10121; auto fragmentSize = 101u; fragment.fragmentCpuPointer = cpuPtr; fragment.fragmentSize = fragmentSize; fragment.refCount = 0; hostPtrManager.storeFragment(fragment); auto retFragment = hostPtrManager.getFragment(cpuPtr); EXPECT_NE(retFragment, &fragment); EXPECT_EQ(1, retFragment->refCount); EXPECT_EQ(cpuPtr, retFragment->fragmentCpuPointer); EXPECT_EQ(fragmentSize, retFragment->fragmentSize); EXPECT_EQ(1u, hostPtrManager.getFragmentCount()); } TEST(HostPtrManager, HostPtrManagerFilledTwiceWithTheSamePointerWhenAskedForFragmentReturnsItWithRefCountSetToTwo) { MockHostPtrManager hostPtrManager; FragmentStorage fragment; void *cpuPtr = (void *)0x10121; auto fragmentSize = 101u; fragment.fragmentCpuPointer = cpuPtr; fragment.fragmentSize = fragmentSize; fragment.refCount = 0; hostPtrManager.storeFragment(fragment); hostPtrManager.storeFragment(fragment); auto retFragment = hostPtrManager.getFragment(cpuPtr); EXPECT_NE(retFragment, &fragment); EXPECT_EQ(2, retFragment->refCount); EXPECT_EQ(cpuPtr, retFragment->fragmentCpuPointer); EXPECT_EQ(fragmentSize, retFragment->fragmentSize); EXPECT_EQ(1u, hostPtrManager.getFragmentCount()); } TEST(HostPtrManager, GivenHostPtrManagerFilledWithFragmentsWhenFragmentIsBeingReleasedThenManagerMaintainsProperRefferenceCount) { MockHostPtrManager hostPtrManager; FragmentStorage fragment; void *cpuPtr = (void *)0x1000; auto fragmentSize = MemoryConstants::pageSize; fragment.fragmentCpuPointer = cpuPtr; fragment.fragmentSize = fragmentSize; hostPtrManager.storeFragment(fragment); hostPtrManager.storeFragment(fragment); ASSERT_EQ(1u, hostPtrManager.getFragmentCount()); auto fragmentReadyForRelease = hostPtrManager.releaseHostPtr(cpuPtr); EXPECT_FALSE(fragmentReadyForRelease); auto retFragment = hostPtrManager.getFragment(cpuPtr); EXPECT_EQ(1, retFragment->refCount); fragmentReadyForRelease = hostPtrManager.releaseHostPtr(cpuPtr); EXPECT_TRUE(fragmentReadyForRelease); retFragment = hostPtrManager.getFragment(cpuPtr); EXPECT_EQ(nullptr, retFragment); EXPECT_EQ(0u, hostPtrManager.getFragmentCount()); } TEST(HostPtrManager, GivenOsHandleStorageWhenAskedToStoreTheFragmentThenFragmentIsStoredProperly) { OsHandleStorage storage; void *cpu1 = (void *)0x1000; void *cpu2 = (void *)0x2000; auto size1 = MemoryConstants::pageSize; auto size2 = MemoryConstants::pageSize * 2; storage.fragmentStorageData[0].cpuPtr = cpu1; storage.fragmentStorageData[0].fragmentSize = size1; storage.fragmentStorageData[1].cpuPtr = cpu2; storage.fragmentStorageData[1].fragmentSize = size2; MockHostPtrManager hostPtrManager; EXPECT_EQ(0u, hostPtrManager.getFragmentCount()); hostPtrManager.storeFragment(storage.fragmentStorageData[0]); hostPtrManager.storeFragment(storage.fragmentStorageData[1]); EXPECT_EQ(2u, hostPtrManager.getFragmentCount()); hostPtrManager.releaseHandleStorage(storage); EXPECT_EQ(0u, hostPtrManager.getFragmentCount()); } TEST(HostPtrManager, GivenHostPtrFilledWith3TripleFragmentsWhenAskedForPopulationThenAllFragmentsAreResued) { void *cpuPtr = (void *)0x1001; auto fragmentSize = MemoryConstants::pageSize * 10; MockHostPtrManager hostPtrManager; auto reqs = hostPtrManager.getAllocationRequirements(cpuPtr, fragmentSize); ASSERT_EQ(3u, reqs.requiredFragmentsCount); FragmentStorage fragments[maxFragmentsCount]; //check all fragments for (int i = 0; i < maxFragmentsCount; i++) { fragments[i].fragmentCpuPointer = const_cast(reqs.allocationFragments[i].allocationPtr); fragments[i].fragmentSize = reqs.allocationFragments[i].allocationSize; hostPtrManager.storeFragment(fragments[i]); } EXPECT_EQ(3u, hostPtrManager.getFragmentCount()); auto OsHandles = hostPtrManager.populateAlreadyAllocatedFragments(reqs); EXPECT_EQ(3u, hostPtrManager.getFragmentCount()); for (int i = 0; i < maxFragmentsCount; i++) { EXPECT_EQ(OsHandles.fragmentStorageData[i].cpuPtr, reqs.allocationFragments[i].allocationPtr); EXPECT_EQ(OsHandles.fragmentStorageData[i].fragmentSize, reqs.allocationFragments[i].allocationSize); auto fragment = hostPtrManager.getFragment(const_cast(reqs.allocationFragments[i].allocationPtr)); ASSERT_NE(nullptr, fragment); EXPECT_EQ(2, fragment->refCount); EXPECT_EQ(OsHandles.fragmentStorageData[i].cpuPtr, fragment->fragmentCpuPointer); } for (int i = 0; i < maxFragmentsCount; i++) { hostPtrManager.releaseHostPtr(fragments[i].fragmentCpuPointer); } EXPECT_EQ(3u, hostPtrManager.getFragmentCount()); for (int i = 0; i < maxFragmentsCount; i++) { auto fragment = hostPtrManager.getFragment(const_cast(reqs.allocationFragments[i].allocationPtr)); ASSERT_NE(nullptr, fragment); EXPECT_EQ(1, fragment->refCount); } for (int i = 0; i < maxFragmentsCount; i++) { hostPtrManager.releaseHostPtr(fragments[i].fragmentCpuPointer); } EXPECT_EQ(0u, hostPtrManager.getFragmentCount()); } TEST(HostPtrManager, FragmentFindWhenFragmentSizeIsZero) { HostPtrManager hostPtrManager; auto ptr1 = (void *)0x010000; FragmentStorage fragment1; fragment1.fragmentCpuPointer = ptr1; fragment1.fragmentSize = 0; hostPtrManager.storeFragment(fragment1); auto ptr2 = (void *)0x040000; FragmentStorage fragment2; fragment2.fragmentCpuPointer = ptr2; fragment2.fragmentSize = 0; hostPtrManager.storeFragment(fragment2); auto cptr1 = (void *)0x00F000; auto frag1 = hostPtrManager.getFragment(cptr1); EXPECT_EQ(frag1, nullptr); auto cptr2 = (void *)0x010000; auto frag2 = hostPtrManager.getFragment(cptr2); EXPECT_NE(frag2, nullptr); auto cptr3 = (void *)0x010001; auto frag3 = hostPtrManager.getFragment(cptr3); EXPECT_EQ(frag3, nullptr); auto cptr4 = (void *)0x020000; auto frag4 = hostPtrManager.getFragment(cptr4); EXPECT_EQ(frag4, nullptr); auto cptr5 = (void *)0x040000; auto frag5 = hostPtrManager.getFragment(cptr5); EXPECT_NE(frag5, nullptr); auto cptr6 = (void *)0x040001; auto frag6 = hostPtrManager.getFragment(cptr6); EXPECT_EQ(frag6, nullptr); auto cptr7 = (void *)0x060000; auto frag7 = hostPtrManager.getFragment(cptr7); EXPECT_EQ(frag7, nullptr); } TEST(HostPtrManager, FragmentFindWhenFragmentSizeIsNotZero) { MockHostPtrManager hostPtrManager; auto size1 = MemoryConstants::pageSize; auto ptr1 = (void *)0x010000; FragmentStorage fragment1; fragment1.fragmentCpuPointer = ptr1; fragment1.fragmentSize = size1; hostPtrManager.storeFragment(fragment1); auto ptr2 = (void *)0x040000; FragmentStorage fragment2; fragment2.fragmentCpuPointer = ptr2; fragment2.fragmentSize = size1; hostPtrManager.storeFragment(fragment2); auto cptr1 = (void *)0x010060; auto frag1 = hostPtrManager.getFragment(cptr1); EXPECT_NE(frag1, nullptr); auto cptr2 = (void *)0x020000; auto frag2 = hostPtrManager.getFragment(cptr2); EXPECT_EQ(frag2, nullptr); auto cptr3 = (void *)0x040060; auto frag3 = hostPtrManager.getFragment(cptr3); EXPECT_NE(frag3, nullptr); auto cptr4 = (void *)0x060000; auto frag4 = hostPtrManager.getFragment(cptr4); EXPECT_EQ(frag4, nullptr); AllocationRequirements requiredAllocations; auto ptr3 = (void *)0x040000; auto size3 = MemoryConstants::pageSize * 2; requiredAllocations = hostPtrManager.getAllocationRequirements(ptr3, size3); auto catchme = false; try { OsHandleStorage st = hostPtrManager.populateAlreadyAllocatedFragments(requiredAllocations); EXPECT_EQ(st.fragmentCount, 0u); } catch (...) { catchme = true; } EXPECT_TRUE(catchme); } TEST(HostPtrManager, FragmentCheck) { MockHostPtrManager hostPtrManager; auto size1 = MemoryConstants::pageSize; auto ptr1 = (void *)0x010000; FragmentStorage fragment1; fragment1.fragmentCpuPointer = ptr1; fragment1.fragmentSize = size1; hostPtrManager.storeFragment(fragment1); auto ptr2 = (void *)0x040000; FragmentStorage fragment2; fragment2.fragmentCpuPointer = ptr2; fragment2.fragmentSize = size1; hostPtrManager.storeFragment(fragment2); OverlapStatus overlappingStatus; auto cptr1 = (void *)0x010060; auto frag1 = hostPtrManager.getFragmentAndCheckForOverlaps(cptr1, 1u, overlappingStatus); EXPECT_NE(frag1, nullptr); EXPECT_EQ(overlappingStatus, OverlapStatus::FRAGMENT_WITHIN_STORED_FRAGMENT); frag1 = hostPtrManager.getFragmentAndCheckForOverlaps(ptr1, size1, overlappingStatus); EXPECT_NE(frag1, nullptr); EXPECT_EQ(overlappingStatus, OverlapStatus::FRAGMENT_WITH_EXACT_SIZE_AS_STORED_FRAGMENT); frag1 = hostPtrManager.getFragmentAndCheckForOverlaps(ptr1, size1 - 1, overlappingStatus); EXPECT_NE(frag1, nullptr); EXPECT_EQ(overlappingStatus, OverlapStatus::FRAGMENT_WITHIN_STORED_FRAGMENT); auto cptr2 = (void *)0x020000; auto frag2 = hostPtrManager.getFragmentAndCheckForOverlaps(cptr2, 1u, overlappingStatus); EXPECT_EQ(frag2, nullptr); EXPECT_EQ(overlappingStatus, OverlapStatus::FRAGMENT_NOT_OVERLAPING_WITH_ANY_OTHER); auto cptr3 = (void *)0x040060; auto frag3 = hostPtrManager.getFragmentAndCheckForOverlaps(cptr3, 1u, overlappingStatus); EXPECT_NE(frag3, nullptr); EXPECT_EQ(overlappingStatus, OverlapStatus::FRAGMENT_WITHIN_STORED_FRAGMENT); auto cptr4 = (void *)0x060000; auto frag4 = hostPtrManager.getFragmentAndCheckForOverlaps(cptr4, 1u, overlappingStatus); EXPECT_EQ(frag4, nullptr); EXPECT_EQ(overlappingStatus, OverlapStatus::FRAGMENT_NOT_OVERLAPING_WITH_ANY_OTHER); auto cptr5 = (void *)0x040000; auto frag5 = hostPtrManager.getFragmentAndCheckForOverlaps(cptr5, size1 - 1, overlappingStatus); EXPECT_NE(frag5, nullptr); EXPECT_EQ(overlappingStatus, OverlapStatus::FRAGMENT_WITHIN_STORED_FRAGMENT); auto cptr6 = (void *)0x040000; auto frag6 = hostPtrManager.getFragmentAndCheckForOverlaps(cptr6, size1 + 1, overlappingStatus); EXPECT_EQ(frag6, nullptr); EXPECT_EQ(overlappingStatus, OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT); auto cptr7 = (void *)0x03FFF0; auto frag7 = hostPtrManager.getFragmentAndCheckForOverlaps(cptr7, 2 * size1, overlappingStatus); EXPECT_EQ(frag7, nullptr); EXPECT_EQ(overlappingStatus, OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT); auto cptr8 = (void *)0x040000; auto frag8 = hostPtrManager.getFragmentAndCheckForOverlaps(cptr8, size1, overlappingStatus); EXPECT_NE(frag8, nullptr); EXPECT_EQ(overlappingStatus, OverlapStatus::FRAGMENT_WITH_EXACT_SIZE_AS_STORED_FRAGMENT); auto cptr9 = (void *)0x010060; auto frag9 = hostPtrManager.getFragmentAndCheckForOverlaps(cptr9, 2 * size1, overlappingStatus); EXPECT_EQ(frag9, nullptr); EXPECT_EQ(overlappingStatus, OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT); } TEST(HostPtrManager, GivenHostPtrManagerFilledWithBigFragmentWhenAskedForFragmnetInTheMiddleOfBigFragmentThenBigFragmentIsReturned) { auto bigSize = 10 * MemoryConstants::pageSize; auto bigPtr = (void *)0x01000; FragmentStorage fragment; fragment.fragmentCpuPointer = bigPtr; fragment.fragmentSize = bigSize; MockHostPtrManager hostPtrManager; hostPtrManager.storeFragment(fragment); EXPECT_EQ(1u, hostPtrManager.getFragmentCount()); auto ptrInTheMiddle = (void *)0x2000; auto smallSize = MemoryConstants::pageSize; auto storedBigFragment = hostPtrManager.getFragment(bigPtr); auto fragment2 = hostPtrManager.getFragment(ptrInTheMiddle); EXPECT_EQ(storedBigFragment, fragment2); OverlapStatus overlapStatus; auto fragment3 = hostPtrManager.getFragmentAndCheckForOverlaps(ptrInTheMiddle, smallSize, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_WITHIN_STORED_FRAGMENT, overlapStatus); EXPECT_EQ(fragment3, storedBigFragment); auto ptrOutside = (void *)0x1000000; auto outsideSize = 1; auto perfectMatchFragment = hostPtrManager.getFragmentAndCheckForOverlaps(bigPtr, bigSize, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_WITH_EXACT_SIZE_AS_STORED_FRAGMENT, overlapStatus); EXPECT_EQ(perfectMatchFragment, storedBigFragment); auto oustideFragment = hostPtrManager.getFragmentAndCheckForOverlaps(ptrOutside, outsideSize, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_NOT_OVERLAPING_WITH_ANY_OTHER, overlapStatus); EXPECT_EQ(nullptr, oustideFragment); //partialOverlap auto ptrPartial = (void *)(((uintptr_t)bigPtr + bigSize) - 100); auto partialBigSize = MemoryConstants::pageSize * 100; auto partialFragment = hostPtrManager.getFragmentAndCheckForOverlaps(ptrPartial, partialBigSize, overlapStatus); EXPECT_EQ(nullptr, partialFragment); EXPECT_EQ(OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT, overlapStatus); } TEST(HostPtrManager, GivenHostPtrManagerFilledWithFragmentsWhenCheckedForOverlappingThenProperOverlappingStatusIsReturned) { auto bigPtr = (void *)0x04000; auto bigSize = 10 * MemoryConstants::pageSize; FragmentStorage fragment; fragment.fragmentCpuPointer = bigPtr; fragment.fragmentSize = bigSize; MockHostPtrManager hostPtrManager; hostPtrManager.storeFragment(fragment); EXPECT_EQ(1u, hostPtrManager.getFragmentCount()); auto ptrNonOverlapingPriorToBigPtr = (void *)0x2000; auto smallSize = MemoryConstants::pageSize; OverlapStatus overlapStatus; auto fragment2 = hostPtrManager.getFragmentAndCheckForOverlaps(ptrNonOverlapingPriorToBigPtr, smallSize, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_NOT_OVERLAPING_WITH_ANY_OTHER, overlapStatus); EXPECT_EQ(nullptr, fragment2); auto ptrNonOverlapingPriorToBigPtrByPage = (void *)0x3000; auto checkMatch = (uintptr_t)ptrNonOverlapingPriorToBigPtrByPage + smallSize; EXPECT_EQ(checkMatch, (uintptr_t)bigPtr); auto fragment3 = hostPtrManager.getFragmentAndCheckForOverlaps(ptrNonOverlapingPriorToBigPtrByPage, smallSize, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_NOT_OVERLAPING_WITH_ANY_OTHER, overlapStatus); EXPECT_EQ(nullptr, fragment3); auto fragment4 = hostPtrManager.getFragmentAndCheckForOverlaps(ptrNonOverlapingPriorToBigPtrByPage, smallSize + 1, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT, overlapStatus); EXPECT_EQ(nullptr, fragment4); } TEST(HostPtrManager, GivenEmptyHostPtrManagerWhenAskedForOverlapingThenNoOverlappingIsReturned) { MockHostPtrManager hostPtrManager; auto bigPtr = (void *)0x04000; auto bigSize = 10 * MemoryConstants::pageSize; OverlapStatus overlapStatus; auto fragment3 = hostPtrManager.getFragmentAndCheckForOverlaps(bigPtr, bigSize, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_NOT_OVERLAPING_WITH_ANY_OTHER, overlapStatus); EXPECT_EQ(nullptr, fragment3); } TEST(HostPtrManager, GivenHostPtrManagerFilledWithFragmentsWhenAskedForOverlpaingThenProperStatusIsReturned) { auto bigPtr1 = (void *)0x01000; auto bigPtr2 = (void *)0x03000; auto bigSize = MemoryConstants::pageSize; FragmentStorage fragment; fragment.fragmentCpuPointer = bigPtr1; fragment.fragmentSize = bigSize; MockHostPtrManager hostPtrManager; hostPtrManager.storeFragment(fragment); fragment.fragmentCpuPointer = bigPtr2; hostPtrManager.storeFragment(fragment); EXPECT_EQ(2u, hostPtrManager.getFragmentCount()); auto ptrNonOverlapingInTheMiddleOfBigPtrs = (void *)0x2000; auto ptrNonOverlapingAfterBigPtr = (void *)0x4000; auto ptrNonOverlapingBeforeBigPtr = (void *)0; OverlapStatus overlapStatus; auto fragment1 = hostPtrManager.getFragmentAndCheckForOverlaps(ptrNonOverlapingInTheMiddleOfBigPtrs, bigSize, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_NOT_OVERLAPING_WITH_ANY_OTHER, overlapStatus); EXPECT_EQ(nullptr, fragment1); auto fragment2 = hostPtrManager.getFragmentAndCheckForOverlaps(ptrNonOverlapingInTheMiddleOfBigPtrs, bigSize * 5, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT, overlapStatus); EXPECT_EQ(nullptr, fragment2); auto fragment3 = hostPtrManager.getFragmentAndCheckForOverlaps(ptrNonOverlapingAfterBigPtr, bigSize * 5, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_NOT_OVERLAPING_WITH_ANY_OTHER, overlapStatus); EXPECT_EQ(nullptr, fragment3); auto fragment4 = hostPtrManager.getFragmentAndCheckForOverlaps(ptrNonOverlapingBeforeBigPtr, bigSize, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_NOT_OVERLAPING_WITH_ANY_OTHER, overlapStatus); EXPECT_EQ(nullptr, fragment4); } TEST(HostPtrManager, GivenHostPtrManagerFilledWithFragmentsWhenAskedForOverlapingThenProperOverlapingStatusIsReturned) { auto bigPtr1 = (void *)0x10000; auto bigPtr2 = (void *)0x03000; auto bigPtr3 = (void *)0x11000; auto bigSize1 = MemoryConstants::pageSize; auto bigSize2 = MemoryConstants::pageSize * 4; auto bigSize3 = MemoryConstants::pageSize * 10; FragmentStorage fragment; fragment.fragmentCpuPointer = bigPtr1; fragment.fragmentSize = bigSize1; MockHostPtrManager hostPtrManager; hostPtrManager.storeFragment(fragment); fragment.fragmentCpuPointer = bigPtr2; fragment.fragmentSize = bigSize2; hostPtrManager.storeFragment(fragment); fragment.fragmentCpuPointer = bigPtr3; fragment.fragmentSize = bigSize3; hostPtrManager.storeFragment(fragment); EXPECT_EQ(3u, hostPtrManager.getFragmentCount()); OverlapStatus overlapStatus; auto fragment1 = hostPtrManager.getFragmentAndCheckForOverlaps(bigPtr1, bigSize1 + 1, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_OVERLAPING_AND_BIGGER_THEN_STORED_FRAGMENT, overlapStatus); EXPECT_EQ(nullptr, fragment1); auto priorToBig1 = (void *)0x9999; auto fragment2 = hostPtrManager.getFragmentAndCheckForOverlaps(priorToBig1, 1, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_NOT_OVERLAPING_WITH_ANY_OTHER, overlapStatus); EXPECT_EQ(nullptr, fragment2); auto middleOfBig3 = (void *)0x11111; auto fragment3 = hostPtrManager.getFragmentAndCheckForOverlaps(middleOfBig3, 1, overlapStatus); EXPECT_EQ(OverlapStatus::FRAGMENT_WITHIN_STORED_FRAGMENT, overlapStatus); EXPECT_NE(nullptr, fragment3); } using HostPtrAllocationTest = Test; TEST_F(HostPtrAllocationTest, givenTwoAllocationsThatSharesOneFragmentWhenOneIsDestroyedThenFragmentRemains) { void *cpuPtr1 = reinterpret_cast(0x100001); void *cpuPtr2 = ptrOffset(cpuPtr1, MemoryConstants::pageSize); auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); auto graphicsAllocation1 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, 2 * MemoryConstants::pageSize - 1}, cpuPtr1); EXPECT_EQ(2u, hostPtrManager->getFragmentCount()); auto graphicsAllocation2 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize}, cpuPtr2); EXPECT_EQ(3u, hostPtrManager->getFragmentCount()); memoryManager->freeGraphicsMemory(graphicsAllocation1); EXPECT_EQ(2u, hostPtrManager->getFragmentCount()); memoryManager->freeGraphicsMemory(graphicsAllocation2); EXPECT_EQ(0u, hostPtrManager->getFragmentCount()); } TEST_F(HostPtrAllocationTest, whenPrepareOsHandlesForAllocationThenPopulateAsManyFragmentsAsRequired) { auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); void *cpuPtr = reinterpret_cast(0x100001); size_t allocationSize = MemoryConstants::pageSize / 2; for (uint32_t expectedFragmentCount = 1; expectedFragmentCount <= 3; expectedFragmentCount++, allocationSize += MemoryConstants::pageSize) { auto requirements = hostPtrManager->getAllocationRequirements(cpuPtr, allocationSize); EXPECT_EQ(expectedFragmentCount, requirements.requiredFragmentsCount); auto osStorage = hostPtrManager->prepareOsStorageForAllocation(*memoryManager, allocationSize, cpuPtr, 0); EXPECT_EQ(expectedFragmentCount, osStorage.fragmentCount); EXPECT_EQ(expectedFragmentCount, hostPtrManager->getFragmentCount()); hostPtrManager->releaseHandleStorage(osStorage); memoryManager->cleanOsHandles(osStorage, 0); EXPECT_EQ(0u, hostPtrManager->getFragmentCount()); } } TEST_F(HostPtrAllocationTest, whenOverlappedFragmentIsBiggerThenStoredAndStoredFragmentIsDestroyedDuringSecondCleaningThenCheckForOverlappingReturnsSuccess) { void *cpuPtr1 = (void *)0x100004; auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); auto graphicsAllocation1 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize}, cpuPtr1); EXPECT_EQ(2u, hostPtrManager->getFragmentCount()); EXPECT_NE(nullptr, graphicsAllocation1); auto fragment1 = hostPtrManager->getFragment(alignDown(cpuPtr1, MemoryConstants::pageSize)); EXPECT_NE(nullptr, fragment1); auto fragment2 = hostPtrManager->getFragment(alignUp(cpuPtr1, MemoryConstants::pageSize)); EXPECT_NE(nullptr, fragment2); uint32_t taskCountReady = 2; auto storage = new MockInternalAllocationStorage(*csr); csr->internalAllocationStorage.reset(storage); storage->storeAllocationWithTaskCount(std::unique_ptr(graphicsAllocation1), TEMPORARY_ALLOCATION, taskCountReady); storage->updateCompletionAfterCleaningList(taskCountReady); // All fragments ready for release currentGpuTag = 1; csr->latestSentTaskCount = taskCountReady - 1; AllocationRequirements requirements; requirements.requiredFragmentsCount = 1; requirements.totalRequiredSize = MemoryConstants::pageSize * 10; requirements.allocationFragments[0].allocationPtr = alignDown(cpuPtr1, MemoryConstants::pageSize); requirements.allocationFragments[0].allocationSize = MemoryConstants::pageSize * 10; requirements.allocationFragments[0].fragmentPosition = FragmentPosition::NONE; RequirementsStatus status = hostPtrManager->checkAllocationsForOverlapping(*memoryManager, &requirements); EXPECT_EQ(RequirementsStatus::SUCCESS, status); } HWTEST_F(HostPtrAllocationTest, givenOverlappingFragmentsWhenCheckIsCalledThenWaitAndCleanOnAllEngines) { uint32_t taskCountReady = 2; uint32_t taskCountNotReady = 1; auto &engines = memoryManager->getRegisteredEngines(); EXPECT_EQ(1u, engines.size()); auto csr0 = static_cast(engines[0].commandStreamReceiver); auto csr1 = std::make_unique(executionEnvironment, 0); uint32_t csr0GpuTag = taskCountNotReady; uint32_t csr1GpuTag = taskCountNotReady; csr0->tagAddress = &csr0GpuTag; csr1->tagAddress = &csr1GpuTag; auto osContext = memoryManager->createAndRegisterOsContext(csr1.get(), aub_stream::EngineType::ENGINE_RCS, 0, PreemptionMode::Disabled, true, false, false); csr1->setupContext(*osContext); void *cpuPtr = reinterpret_cast(0x100004); auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); auto graphicsAllocation0 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize}, cpuPtr); auto graphicsAllocation1 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize}, cpuPtr); auto storage0 = new MockInternalAllocationStorage(*csr0); auto storage1 = new MockInternalAllocationStorage(*csr1); csr0->internalAllocationStorage.reset(storage0); storage0->storeAllocationWithTaskCount(std::unique_ptr(graphicsAllocation0), TEMPORARY_ALLOCATION, taskCountReady); storage0->updateCompletionAfterCleaningList(taskCountReady); csr1->internalAllocationStorage.reset(storage1); storage1->storeAllocationWithTaskCount(std::unique_ptr(graphicsAllocation1), TEMPORARY_ALLOCATION, taskCountReady); storage1->updateCompletionAfterCleaningList(taskCountReady); csr0->setLatestSentTaskCount(taskCountNotReady); csr1->setLatestSentTaskCount(taskCountNotReady); AllocationRequirements requirements; requirements.requiredFragmentsCount = 1; requirements.totalRequiredSize = MemoryConstants::pageSize * 10; requirements.allocationFragments[0].allocationPtr = alignDown(cpuPtr, MemoryConstants::pageSize); requirements.allocationFragments[0].allocationSize = MemoryConstants::pageSize * 10; requirements.allocationFragments[0].fragmentPosition = FragmentPosition::NONE; hostPtrManager->checkAllocationsForOverlapping(*memoryManager, &requirements); EXPECT_EQ(1u, csr0->waitForCompletionWithTimeoutCalled); EXPECT_EQ(1u, csr1->waitForCompletionWithTimeoutCalled); EXPECT_EQ(2u, storage0->cleanAllocationsCalled); EXPECT_EQ(2u, storage0->lastCleanAllocationsTaskCount); EXPECT_EQ(2u, storage1->cleanAllocationsCalled); EXPECT_EQ(2u, storage1->lastCleanAllocationsTaskCount); } TEST_F(HostPtrAllocationTest, whenOverlappedFragmentIsBiggerThenStoredAndStoredFragmentCannotBeDestroyedThenCheckForOverlappingReturnsError) { void *cpuPtr1 = (void *)0x100004; auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); auto graphicsAllocation1 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize}, cpuPtr1); EXPECT_EQ(2u, hostPtrManager->getFragmentCount()); EXPECT_NE(nullptr, graphicsAllocation1); auto fragment1 = hostPtrManager->getFragment(alignDown(cpuPtr1, MemoryConstants::pageSize)); EXPECT_NE(nullptr, fragment1); auto fragment2 = hostPtrManager->getFragment(alignUp(cpuPtr1, MemoryConstants::pageSize)); EXPECT_NE(nullptr, fragment2); uint32_t taskCountReady = 2; auto storage = csr->getInternalAllocationStorage(); storage->storeAllocationWithTaskCount(std::unique_ptr(graphicsAllocation1), TEMPORARY_ALLOCATION, taskCountReady); // All fragments ready for release currentGpuTag = taskCountReady - 1; csr->latestSentTaskCount = taskCountReady - 1; AllocationRequirements requirements; requirements.requiredFragmentsCount = 1; requirements.totalRequiredSize = MemoryConstants::pageSize * 10; requirements.allocationFragments[0].allocationPtr = alignDown(cpuPtr1, MemoryConstants::pageSize); requirements.allocationFragments[0].allocationSize = MemoryConstants::pageSize * 10; requirements.allocationFragments[0].fragmentPosition = FragmentPosition::NONE; RequirementsStatus status = hostPtrManager->checkAllocationsForOverlapping(*memoryManager, &requirements); EXPECT_EQ(RequirementsStatus::FATAL, status); } TEST_F(HostPtrAllocationTest, checkAllocationsForOverlappingWithoutBiggerOverlap) { void *cpuPtr1 = (void *)0x100004; void *cpuPtr2 = (void *)0x101008; auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); auto graphicsAllocation1 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize}, cpuPtr1); EXPECT_EQ(2u, hostPtrManager->getFragmentCount()); auto graphicsAllocation2 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize * 3}, cpuPtr2); EXPECT_EQ(4u, hostPtrManager->getFragmentCount()); EXPECT_NE(nullptr, graphicsAllocation1); EXPECT_NE(nullptr, graphicsAllocation2); auto fragment1 = hostPtrManager->getFragment(alignDown(cpuPtr1, MemoryConstants::pageSize)); EXPECT_NE(nullptr, fragment1); auto fragment2 = hostPtrManager->getFragment(alignUp(cpuPtr1, MemoryConstants::pageSize)); EXPECT_NE(nullptr, fragment2); auto fragment3 = hostPtrManager->getFragment(alignDown(cpuPtr2, MemoryConstants::pageSize)); EXPECT_NE(nullptr, fragment3); auto fragment4 = hostPtrManager->getFragment(alignUp(cpuPtr2, MemoryConstants::pageSize)); EXPECT_NE(nullptr, fragment4); AllocationRequirements requirements; requirements.requiredFragmentsCount = 2; requirements.totalRequiredSize = MemoryConstants::pageSize * 2; requirements.allocationFragments[0].allocationPtr = alignDown(cpuPtr1, MemoryConstants::pageSize); requirements.allocationFragments[0].allocationSize = MemoryConstants::pageSize; requirements.allocationFragments[0].fragmentPosition = FragmentPosition::LEADING; requirements.allocationFragments[1].allocationPtr = alignUp(cpuPtr1, MemoryConstants::pageSize); requirements.allocationFragments[1].allocationSize = MemoryConstants::pageSize; requirements.allocationFragments[1].fragmentPosition = FragmentPosition::TRAILING; RequirementsStatus status = hostPtrManager->checkAllocationsForOverlapping(*memoryManager, &requirements); EXPECT_EQ(RequirementsStatus::SUCCESS, status); memoryManager->freeGraphicsMemory(graphicsAllocation1); memoryManager->freeGraphicsMemory(graphicsAllocation2); } TEST_F(HostPtrAllocationTest, checkAllocationsForOverlappingWithBiggerOverlapUntilFirstClean) { void *cpuPtr1 = (void *)0x100004; auto graphicsAllocation1 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize}, cpuPtr1); auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); EXPECT_EQ(2u, hostPtrManager->getFragmentCount()); EXPECT_NE(nullptr, graphicsAllocation1); auto fragment1 = hostPtrManager->getFragment(alignDown(cpuPtr1, MemoryConstants::pageSize)); EXPECT_NE(nullptr, fragment1); auto fragment2 = hostPtrManager->getFragment(alignUp(cpuPtr1, MemoryConstants::pageSize)); EXPECT_NE(nullptr, fragment2); uint32_t taskCountReady = 1; auto storage = csr->getInternalAllocationStorage(); storage->storeAllocationWithTaskCount(std::unique_ptr(graphicsAllocation1), TEMPORARY_ALLOCATION, taskCountReady); // All fragments ready for release taskCount = taskCountReady; csr->latestSentTaskCount = taskCountReady; AllocationRequirements requirements; requirements.requiredFragmentsCount = 1; requirements.totalRequiredSize = MemoryConstants::pageSize * 10; requirements.allocationFragments[0].allocationPtr = alignDown(cpuPtr1, MemoryConstants::pageSize); requirements.allocationFragments[0].allocationSize = MemoryConstants::pageSize * 10; requirements.allocationFragments[0].fragmentPosition = FragmentPosition::NONE; RequirementsStatus status = hostPtrManager->checkAllocationsForOverlapping(*memoryManager, &requirements); EXPECT_EQ(RequirementsStatus::SUCCESS, status); } internal_allocation_storage_tests.cpp000066400000000000000000000271651363734646600342630ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/memory_manager/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/utilities/containers_tests_helpers.h" #include "opencl/test/unit_test/fixtures/memory_allocator_fixture.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "test.h" struct InternalAllocationStorageTest : public MemoryAllocatorFixture, public ::testing::Test { using MemoryAllocatorFixture::TearDown; void SetUp() override { MemoryAllocatorFixture::SetUp(); storage = csr->getInternalAllocationStorage(); } InternalAllocationStorage *storage; }; TEST_F(InternalAllocationStorageTest, givenDebugFlagThatDisablesAllocationReuseWhenStoreReusableAllocationIsCalledThenAllocationIsReleased) { DebugManagerStateRestore stateRestorer; DebugManager.flags.DisableResourceRecycling.set(true); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); storage->storeAllocation(std::unique_ptr(allocation), REUSABLE_ALLOCATION); EXPECT_NE(allocation, csr->getAllocationsForReuse().peekHead()); EXPECT_TRUE(csr->getAllocationsForReuse().peekIsEmpty()); } TEST_F(InternalAllocationStorageTest, whenCleanAllocationListThenRemoveOnlyCompletedAllocations) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); auto allocation2 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); auto allocation3 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); allocation->updateTaskCount(10, csr->getOsContext().getContextId()); allocation2->updateTaskCount(5, csr->getOsContext().getContextId()); allocation3->updateTaskCount(15, csr->getOsContext().getContextId()); storage->storeAllocation(std::unique_ptr(allocation), TEMPORARY_ALLOCATION); storage->storeAllocation(std::unique_ptr(allocation2), TEMPORARY_ALLOCATION); storage->storeAllocation(std::unique_ptr(allocation3), TEMPORARY_ALLOCATION); //head point to alloc 2, tail points to alloc3 EXPECT_TRUE(csr->getTemporaryAllocations().peekContains(*allocation)); EXPECT_TRUE(csr->getTemporaryAllocations().peekContains(*allocation2)); EXPECT_TRUE(csr->getTemporaryAllocations().peekContains(*allocation3)); EXPECT_EQ(-1, verifyDListOrder(csr->getTemporaryAllocations().peekHead(), allocation, allocation2, allocation3)); //now remove element form the middle storage->cleanAllocationList(6, TEMPORARY_ALLOCATION); EXPECT_TRUE(csr->getTemporaryAllocations().peekContains(*allocation)); EXPECT_FALSE(csr->getTemporaryAllocations().peekContains(*allocation2)); EXPECT_TRUE(csr->getTemporaryAllocations().peekContains(*allocation3)); EXPECT_EQ(-1, verifyDListOrder(csr->getTemporaryAllocations().peekHead(), allocation, allocation3)); //now remove head storage->cleanAllocationList(11, TEMPORARY_ALLOCATION); EXPECT_FALSE(csr->getTemporaryAllocations().peekContains(*allocation)); EXPECT_FALSE(csr->getTemporaryAllocations().peekContains(*allocation2)); EXPECT_TRUE(csr->getTemporaryAllocations().peekContains(*allocation3)); //now remove tail storage->cleanAllocationList(16, TEMPORARY_ALLOCATION); EXPECT_TRUE(csr->getTemporaryAllocations().peekIsEmpty()); } TEST_F(InternalAllocationStorageTest, whenAllocationIsStoredAsReusableButIsStillUsedThenCannotBeObtained) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(AllocationProperties{0, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}); storage->storeAllocationWithTaskCount(std::unique_ptr(allocation), REUSABLE_ALLOCATION, 2u); auto *hwTag = csr->getTagAddress(); *hwTag = 1u; auto newAllocation = storage->obtainReusableAllocation(1, GraphicsAllocation::AllocationType::BUFFER); EXPECT_EQ(nullptr, newAllocation); storage->cleanAllocationList(2u, REUSABLE_ALLOCATION); } TEST_F(InternalAllocationStorageTest, whenObtainAllocationFromEmptyReuseListThenReturnNullptr) { auto allocation2 = storage->obtainReusableAllocation(1, GraphicsAllocation::AllocationType::BUFFER); EXPECT_EQ(nullptr, allocation2); } TEST_F(InternalAllocationStorageTest, whenCompletedAllocationIsStoredAsReusableAndThenCanBeObtained) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(AllocationProperties{0, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}); EXPECT_NE(nullptr, allocation); storage->storeAllocationWithTaskCount(std::unique_ptr(allocation), REUSABLE_ALLOCATION, 2u); EXPECT_FALSE(csr->getAllocationsForReuse().peekIsEmpty()); auto *hwTag = csr->getTagAddress(); *hwTag = 2u; auto reusedAllocation = storage->obtainReusableAllocation(1, GraphicsAllocation::AllocationType::BUFFER).release(); EXPECT_EQ(allocation, reusedAllocation); EXPECT_TRUE(csr->getAllocationsForReuse().peekIsEmpty()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(InternalAllocationStorageTest, whenNotUsedAllocationIsStoredAsReusableAndThenCanBeObtained) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(AllocationProperties{0, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}); EXPECT_NE(nullptr, allocation); EXPECT_FALSE(allocation->isUsed()); EXPECT_EQ(0u, csr->peekTaskCount()); *csr->getTagAddress() = 0; // initial hw tag for dll storage->storeAllocation(std::unique_ptr(allocation), REUSABLE_ALLOCATION); EXPECT_EQ(0u, allocation->getTaskCount(csr->getOsContext().getContextId())); EXPECT_FALSE(csr->getAllocationsForReuse().peekIsEmpty()); auto reusedAllocation = storage->obtainReusableAllocation(1, GraphicsAllocation::AllocationType::BUFFER).release(); EXPECT_EQ(allocation, reusedAllocation); EXPECT_TRUE(csr->getAllocationsForReuse().peekIsEmpty()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(InternalAllocationStorageTest, whenObtainAllocationFromMidlleOfReusableListThenItIsDetachedFromLinkedList) { auto &reusableAllocations = csr->getAllocationsForReuse(); EXPECT_TRUE(reusableAllocations.peekIsEmpty()); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(AllocationProperties{0, 1, GraphicsAllocation::AllocationType::BUFFER}); auto allocation2 = memoryManager->allocateGraphicsMemoryWithProperties(AllocationProperties{0, 10000, GraphicsAllocation::AllocationType::BUFFER}); auto allocation3 = memoryManager->allocateGraphicsMemoryWithProperties(AllocationProperties{0, 1, GraphicsAllocation::AllocationType::BUFFER}); EXPECT_TRUE(reusableAllocations.peekIsEmpty()); EXPECT_EQ(nullptr, allocation2->next); EXPECT_EQ(nullptr, allocation2->prev); storage->storeAllocation(std::unique_ptr(allocation), REUSABLE_ALLOCATION); EXPECT_TRUE(reusableAllocations.peekContains(*allocation)); EXPECT_FALSE(reusableAllocations.peekContains(*allocation2)); EXPECT_FALSE(reusableAllocations.peekContains(*allocation3)); EXPECT_EQ(nullptr, allocation2->next); EXPECT_EQ(nullptr, allocation2->prev); storage->storeAllocation(std::unique_ptr(allocation2), REUSABLE_ALLOCATION); EXPECT_TRUE(reusableAllocations.peekContains(*allocation)); EXPECT_TRUE(reusableAllocations.peekContains(*allocation2)); EXPECT_FALSE(reusableAllocations.peekContains(*allocation3)); EXPECT_EQ(nullptr, allocation2->next); EXPECT_EQ(allocation, allocation2->prev); storage->storeAllocation(std::unique_ptr(allocation3), REUSABLE_ALLOCATION); EXPECT_TRUE(reusableAllocations.peekContains(*allocation)); EXPECT_TRUE(reusableAllocations.peekContains(*allocation2)); EXPECT_TRUE(reusableAllocations.peekContains(*allocation3)); EXPECT_EQ(allocation3, allocation2->next); EXPECT_EQ(allocation, allocation2->prev); auto reusableAllocation = storage->obtainReusableAllocation(10000, GraphicsAllocation::AllocationType::BUFFER).release(); EXPECT_EQ(reusableAllocation, allocation2); EXPECT_EQ(nullptr, allocation2->next); EXPECT_EQ(nullptr, allocation2->prev); EXPECT_EQ(nullptr, reusableAllocation->next); EXPECT_EQ(nullptr, reusableAllocation->prev); EXPECT_FALSE(reusableAllocations.peekContains(*reusableAllocation)); EXPECT_TRUE(reusableAllocations.peekContains(*allocation)); EXPECT_FALSE(reusableAllocations.peekContains(*allocation2)); EXPECT_TRUE(reusableAllocations.peekContains(*allocation3)); memoryManager->freeGraphicsMemory(allocation2); allocation->updateTaskCount(0u, csr->getOsContext().getContextId()); allocation3->updateTaskCount(0u, csr->getOsContext().getContextId()); } TEST_F(InternalAllocationStorageTest, givenAllocationWhenItIsPutOnReusableListWhenOtherAllocationTypeIsRequestedThenNullIsReturned) { EXPECT_TRUE(csr->getAllocationsForReuse().peekIsEmpty()); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(AllocationProperties{0, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}); storage->storeAllocation(std::unique_ptr(allocation), REUSABLE_ALLOCATION); EXPECT_FALSE(csr->getAllocationsForReuse().peekIsEmpty()); auto internalAllocation = storage->obtainReusableAllocation(1, GraphicsAllocation::AllocationType::INTERNAL_HEAP); EXPECT_EQ(nullptr, internalAllocation); } class WaitAtDeletionAllocation : public MockGraphicsAllocation { public: WaitAtDeletionAllocation(void *buffer, size_t sizeIn) : MockGraphicsAllocation(buffer, sizeIn) { inDestructor = false; } std::mutex mutex; std::atomic inDestructor; ~WaitAtDeletionAllocation() override { inDestructor = true; std::lock_guard lock(mutex); } }; TEST_F(InternalAllocationStorageTest, givenAllocationListWhenTwoThreadsCleanConcurrentlyThenBothThreadsCanAccessTheList) { auto allocation1 = new WaitAtDeletionAllocation(nullptr, 0); allocation1->updateTaskCount(1, csr->getOsContext().getContextId()); storage->storeAllocation(std::unique_ptr(allocation1), TEMPORARY_ALLOCATION); std::unique_lock allocationDeletionLock(allocation1->mutex); auto allocation2 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); allocation2->updateTaskCount(2, csr->getOsContext().getContextId()); storage->storeAllocation(std::unique_ptr(allocation2), TEMPORARY_ALLOCATION); std::mutex mutex; std::unique_lock lock(mutex); std::thread thread1([&] { storage->cleanAllocationList(1, TEMPORARY_ALLOCATION); }); std::thread thread2([&] { std::lock_guard lock(mutex); storage->cleanAllocationList(2, TEMPORARY_ALLOCATION); }); while (!allocation1->inDestructor) ; lock.unlock(); allocationDeletionLock.unlock(); thread1.join(); thread2.join(); EXPECT_TRUE(csr->getTemporaryAllocations().peekIsEmpty()); } compute-runtime-20.13.16352/opencl/test/unit_test/memory_manager/local_memory_usage_tests.cpp000066400000000000000000000130431363734646600324310ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/basic_math.h" #include "shared/source/memory_manager/local_memory_usage.h" #include "third_party/gtest/gtest/gtest.h" namespace NEO { struct MockLocalMemoryUsageBankSelector : public LocalMemoryUsageBankSelector { using LocalMemoryUsageBankSelector::banksCount; using LocalMemoryUsageBankSelector::freeOnBank; using LocalMemoryUsageBankSelector::LocalMemoryUsageBankSelector; using LocalMemoryUsageBankSelector::reserveOnBank; using LocalMemoryUsageBankSelector::updateUsageInfo; std::atomic *getMemorySizes() { return memorySizes.get(); } }; TEST(localMemoryUsageTest, givenLocalMemoryUsageBankSelectorWhenItsCreatedAllValuesAreZero) { MockLocalMemoryUsageBankSelector selector(2u); for (uint32_t i = 0; i < selector.banksCount; i++) { EXPECT_EQ(0u, selector.getMemorySizes()[i].load()); } } TEST(localMemoryUsageTest, givenLocalMemoryUsageBankSelectorWhenMemoryIsReservedOnGivenBankThenValueStoredInTheArrayIsCorrect) { MockLocalMemoryUsageBankSelector selector(4u); uint64_t allocationSize = 1024u; auto bankIndex = selector.getLeastOccupiedBank(); selector.reserveOnBank(bankIndex, allocationSize); EXPECT_EQ(allocationSize, selector.getOccupiedMemorySizeForBank(bankIndex)); } TEST(localMemoryUsageTest, givenLocalMemoryUsageBankSelectorWhenMemoryIsReleasedThenValueIsCorrectlyAllocated) { MockLocalMemoryUsageBankSelector selector(1u); uint64_t allocationSize = 1024u; auto bankIndex = selector.getLeastOccupiedBank(); EXPECT_EQ(0u, bankIndex); selector.reserveOnBank(bankIndex, allocationSize); bankIndex = selector.getLeastOccupiedBank(); EXPECT_EQ(0u, bankIndex); selector.reserveOnBank(bankIndex, allocationSize); selector.freeOnBank(bankIndex, allocationSize); EXPECT_EQ(allocationSize, selector.getOccupiedMemorySizeForBank(bankIndex)); } TEST(localMemoryUsageTest, givenLocalMemoryUsageBankSelectorWhenMemoryAllocatedSeveralTimesItIsStoredOnDifferentBanks) { MockLocalMemoryUsageBankSelector selector(5u); uint64_t allocationSize = 1024u; auto bankIndex = selector.getLeastOccupiedBank(); selector.reserveOnBank(bankIndex, allocationSize); bankIndex = selector.getLeastOccupiedBank(); selector.reserveOnBank(bankIndex, allocationSize); bankIndex = selector.getLeastOccupiedBank(); selector.reserveOnBank(bankIndex, allocationSize); bankIndex = selector.getLeastOccupiedBank(); selector.reserveOnBank(bankIndex, allocationSize); bankIndex = selector.getLeastOccupiedBank(); selector.reserveOnBank(bankIndex, allocationSize); for (uint32_t i = 0; i < selector.banksCount; i++) { EXPECT_EQ(allocationSize, selector.getOccupiedMemorySizeForBank(i)); } } TEST(localMemoryUsageTest, givenLocalMemoryUsageBankSelectorWhenIndexIsInvalidThenErrorIsReturned) { MockLocalMemoryUsageBankSelector selector(3u); EXPECT_THROW(selector.reserveOnBank(8u, 1024u), std::exception); EXPECT_THROW(selector.freeOnBank(8u, 1024u), std::exception); EXPECT_THROW(selector.getOccupiedMemorySizeForBank(8u), std::exception); } TEST(localMemoryUsageTest, givenLocalMemoryUsageBankSelectorWhenItsCreatedWithZeroBanksThenErrorIsReturned) { EXPECT_THROW(LocalMemoryUsageBankSelector(0u), std::exception); } TEST(localMemoryUsageTest, givenLocalMemoryUsageBankSelectorWhenMultipleBanksAreUsedThenMemoryIsReservedOnEachOfThem) { MockLocalMemoryUsageBankSelector selector(6u); uint32_t banks = 5u; uint64_t allocationSize = 1024u; selector.reserveOnBanks(banks, allocationSize); EXPECT_EQ(allocationSize, selector.getOccupiedMemorySizeForBank(0u)); EXPECT_EQ(0u, selector.getOccupiedMemorySizeForBank(1u)); EXPECT_EQ(allocationSize, selector.getOccupiedMemorySizeForBank(2u)); EXPECT_EQ(0u, selector.getOccupiedMemorySizeForBank(3u)); EXPECT_EQ(0u, selector.getOccupiedMemorySizeForBank(4u)); EXPECT_EQ(0u, selector.getOccupiedMemorySizeForBank(5u)); } TEST(localMemoryUsageTest, givenLocalMemoryUsageBankSelectorWhenMultipleBanksAreUsedThenMemoryIsReleasedOnEachOfThem) { MockLocalMemoryUsageBankSelector selector(6u); uint32_t banks = 5u; uint64_t allocationSize = 1024u; selector.reserveOnBanks(banks, allocationSize); selector.reserveOnBanks(banks, allocationSize); EXPECT_EQ(2 * allocationSize, selector.getOccupiedMemorySizeForBank(0u)); EXPECT_EQ(0u, selector.getOccupiedMemorySizeForBank(1u)); EXPECT_EQ(2 * allocationSize, selector.getOccupiedMemorySizeForBank(2u)); EXPECT_EQ(0u, selector.getOccupiedMemorySizeForBank(3)); EXPECT_EQ(0u, selector.getOccupiedMemorySizeForBank(4)); EXPECT_EQ(0u, selector.getOccupiedMemorySizeForBank(5)); selector.freeOnBanks(banks, allocationSize); EXPECT_EQ(allocationSize, selector.getOccupiedMemorySizeForBank(0u)); EXPECT_EQ(0u, selector.getOccupiedMemorySizeForBank(1u)); EXPECT_EQ(allocationSize, selector.getOccupiedMemorySizeForBank(2u)); EXPECT_EQ(0u, selector.getOccupiedMemorySizeForBank(3u)); EXPECT_EQ(0u, selector.getOccupiedMemorySizeForBank(4u)); EXPECT_EQ(0u, selector.getOccupiedMemorySizeForBank(5u)); } TEST(localMemoryUsageTest, givenLocalMemoryUsageBankSelectorWhenThereAreMoreThan32BanksThenOnly32AreUpdated) { MockLocalMemoryUsageBankSelector selector(33u); uint32_t banks = ~0u; uint64_t allocationSize = 1024u; selector.reserveOnBanks(banks, allocationSize); EXPECT_EQ(0u, selector.getOccupiedMemorySizeForBank(32)); } } // namespace NEO memory_manager_allocate_in_device_pool_tests.cpp000066400000000000000000000101541363734646600364100ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/memory_manager/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/memory_manager/memory_manager_allocate_in_device_pool_tests.inl" #include "shared/source/helpers/array_count.h" TEST(MemoryManagerTest, givenNotSetUseSystemMemoryWhenGraphicsAllocationInDevicePoolIsAllocatedThenAllocationIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Error; AllocationData allocData; allocData.size = MemoryConstants::pageSize; allocData.flags.allocateMemory = true; auto allocation = memoryManager.allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenImageOrSharedResourceCopyWhenGraphicsAllocationInDevicePoolIsAllocatedThenNullptrIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Error; AllocationData allocData; allocData.size = MemoryConstants::pageSize; allocData.flags.allocateMemory = true; GraphicsAllocation::AllocationType types[] = {GraphicsAllocation::AllocationType::IMAGE, GraphicsAllocation::AllocationType::SHARED_RESOURCE_COPY}; for (auto type : types) { allocData.type = type; auto allocation = memoryManager.allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_EQ(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::RetryInNonDevicePool, status); } } TEST(MemoryManagerTest, givenSvmGpuAllocationTypeWhenAllocationSystemMemoryFailsThenReturnNull) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Error; AllocationData allocData; allocData.allFlags = 0; allocData.size = MemoryConstants::pageSize; allocData.type = GraphicsAllocation::AllocationType::SVM_GPU; allocData.hostPtr = reinterpret_cast(0x1000); memoryManager.failAllocateSystemMemory = true; auto allocation = memoryManager.allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_EQ(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Error, status); } TEST(MemoryManagerTest, givenSvmGpuAllocationTypeWhenAllocationSucceedThenReturnGpuAddressAsHostPtr) { if (defaultHwInfo->capabilityTable.gpuAddressSpace != maxNBitValue(48) && defaultHwInfo->capabilityTable.gpuAddressSpace != maxNBitValue(47)) { return; } MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Error; AllocationData allocData; allocData.size = MemoryConstants::pageSize; allocData.type = GraphicsAllocation::AllocationType::SVM_GPU; allocData.hostPtr = reinterpret_cast(0x1000); auto allocation = memoryManager.allocateGraphicsMemoryInDevicePool(allocData, status); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); EXPECT_EQ(reinterpret_cast(allocData.hostPtr), allocation->getGpuAddress()); EXPECT_NE(reinterpret_cast(allocation->getUnderlyingBuffer()), allocation->getGpuAddress()); memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenOsAgnosticMemoryManagerWhenGetLocalMemoryIsCalledThenSizeOfLocalMemoryIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); EXPECT_EQ(0 * GB, memoryManager.getLocalMemorySize(0u)); } memory_manager_allocate_in_device_pool_tests.inl000066400000000000000000000110211363734646600364020ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/memory_manager/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/mem_obj/mem_obj_helper.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "test.h" #include "gtest/gtest.h" using namespace NEO; TEST(MemoryManagerTest, givenSetUseSytemMemoryWhenGraphicsAllocationInDevicePoolIsAllocatedThenNullptrIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.size = MemoryConstants::pageSize; allocData.flags.useSystemMemory = true; allocData.flags.allocateMemory = true; auto allocation = memoryManager.allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_EQ(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::RetryInNonDevicePool, status); } TEST(MemoryManagerTest, givenAllowed32BitAndFroce32BitWhenGraphicsAllocationInDevicePoolIsAllocatedThenNullptrIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); memoryManager.setForce32BitAllocations(true); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.size = MemoryConstants::pageSize; allocData.flags.allow32Bit = true; allocData.flags.allocateMemory = true; auto allocation = memoryManager.allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_EQ(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::RetryInNonDevicePool, status); } TEST(AllocationFlagsTest, givenAllocateMemoryFlagWhenGetAllocationFlagsIsCalledThenAllocateFlagIsCorrectlySet) { HardwareInfo hwInfo(*defaultHwInfo); auto allocationProperties = MemoryPropertiesParser::getAllocationProperties(0, {}, true, 0, GraphicsAllocation::AllocationType::BUFFER, false, hwInfo); EXPECT_TRUE(allocationProperties.flags.allocateMemory); auto allocationProperties2 = MemoryPropertiesParser::getAllocationProperties(0, {}, false, 0, GraphicsAllocation::AllocationType::BUFFER, false, hwInfo); EXPECT_FALSE(allocationProperties2.flags.allocateMemory); } TEST(UncacheableFlagsTest, givenUncachedResourceFlagWhenGetAllocationFlagsIsCalledThenUncacheableFlagIsCorrectlySet) { cl_mem_flags_intel flagsIntel = CL_MEM_LOCALLY_UNCACHED_RESOURCE; HardwareInfo hwInfo(*defaultHwInfo); MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(0, flagsIntel, 0); auto allocationFlags = MemoryPropertiesParser::getAllocationProperties(0, memoryProperties, false, 0, GraphicsAllocation::AllocationType::BUFFER, false, hwInfo); EXPECT_TRUE(allocationFlags.flags.uncacheable); flagsIntel = 0; memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(0, flagsIntel, 0); auto allocationFlags2 = MemoryPropertiesParser::getAllocationProperties(0, memoryProperties, false, 0, GraphicsAllocation::AllocationType::BUFFER, false, hwInfo); EXPECT_FALSE(allocationFlags2.flags.uncacheable); } TEST(AllocationFlagsTest, givenReadOnlyResourceFlagWhenGetAllocationFlagsIsCalledThenFlushL3FlagsAreCorrectlySet) { cl_mem_flags flags = CL_MEM_READ_ONLY; MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0); HardwareInfo hwInfo(*defaultHwInfo); auto allocationFlags = MemoryPropertiesParser::getAllocationProperties(0, memoryProperties, true, 0, GraphicsAllocation::AllocationType::BUFFER, false, hwInfo); EXPECT_FALSE(allocationFlags.flags.flushL3RequiredForRead); EXPECT_FALSE(allocationFlags.flags.flushL3RequiredForWrite); auto allocationFlags2 = MemoryPropertiesParser::getAllocationProperties(0, {}, true, 0, GraphicsAllocation::AllocationType::BUFFER, false, hwInfo); EXPECT_TRUE(allocationFlags2.flags.flushL3RequiredForRead); EXPECT_TRUE(allocationFlags2.flags.flushL3RequiredForWrite); } TEST(StorageInfoTest, whenStorageInfoIsCreatedWithDefaultConstructorThenReturnsOneHandle) { StorageInfo storageInfo; EXPECT_EQ(1u, storageInfo.getNumHandles()); } memory_manager_allocate_in_preferred_pool_tests.cpp000066400000000000000000000002771363734646600371340ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/memory_manager/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/memory_manager/memory_manager_allocate_in_preferred_pool_tests.inl" memory_manager_allocate_in_preferred_pool_tests.inl000066400000000000000000001352151363734646600371350ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/memory_manager/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "test.h" #include "gtest/gtest.h" using namespace NEO; class MemoryManagerGetAlloctionDataTest : public testing::TestWithParam { public: void SetUp() override {} void TearDown() override {} }; TEST(MemoryManagerGetAlloctionDataTest, givenHostMemoryAllocationTypeAndAllocateMemoryFlagAndNullptrWhenAllocationDataIsQueriedThenCorrectFlagsAndSizeAreSet) { AllocationData allocData; AllocationProperties properties(0, true, 10, GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, false); MockMemoryManager mockMemoryManager; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); EXPECT_EQ(10u, allocData.size); EXPECT_EQ(nullptr, allocData.hostPtr); } TEST(MemoryManagerGetAlloctionDataTest, givenNonHostMemoryAllocatoinTypeWhenAllocationDataIsQueriedThenUseSystemMemoryFlagsIsNotSet) { AllocationData allocData; AllocationProperties properties(0, true, 10, GraphicsAllocation::AllocationType::BUFFER, false); MockMemoryManager mockMemoryManager; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); EXPECT_EQ(10u, allocData.size); EXPECT_EQ(nullptr, allocData.hostPtr); } TEST(MemoryManagerGetAlloctionDataTest, givenAllocateMemoryFlagTrueWhenHostPtrIsNotNullThenAllocationDataHasHostPtrNulled) { AllocationData allocData; char memory = 0; AllocationProperties properties(0, true, sizeof(memory), GraphicsAllocation::AllocationType::BUFFER, false); MockMemoryManager mockMemoryManager; MockMemoryManager::getAllocationData(allocData, properties, &memory, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_EQ(sizeof(memory), allocData.size); EXPECT_EQ(nullptr, allocData.hostPtr); } TEST(MemoryManagerGetAlloctionDataTest, givenBufferTypeWhenAllocationDataIsQueriedThenForcePinFlagIsSet) { AllocationData allocData; AllocationProperties properties(0, true, 10, GraphicsAllocation::AllocationType::BUFFER, false); MockMemoryManager mockMemoryManager; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.forcePin); } TEST(MemoryManagerGetAlloctionDataTest, givenBufferHostMemoryTypeWhenAllocationDataIsQueriedThenForcePinFlagIsSet) { AllocationData allocData; AllocationProperties properties(0, true, 10, GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, false); MockMemoryManager mockMemoryManager; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.forcePin); } TEST(MemoryManagerGetAlloctionDataTest, givenBufferCompressedTypeWhenAllocationDataIsQueriedThenForcePinFlagIsSet) { AllocationData allocData; AllocationProperties properties(0, true, 10, GraphicsAllocation::AllocationType::BUFFER_COMPRESSED, false); MockMemoryManager mockMemoryManager; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.forcePin); } TEST(MemoryManagerGetAlloctionDataTest, givenWriteCombinedTypeWhenAllocationDataIsQueriedThenForcePinFlagIsSet) { AllocationData allocData; AllocationProperties properties(0, true, 10, GraphicsAllocation::AllocationType::WRITE_COMBINED, false); MockMemoryManager mockMemoryManager; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.forcePin); } TEST(MemoryManagerGetAlloctionDataTest, givenDefaultAllocationFlagsWhenAllocationDataIsQueriedThenAllocateMemoryIsFalse) { AllocationData allocData; AllocationProperties properties(0, false, 0, GraphicsAllocation::AllocationType::BUFFER_COMPRESSED, false); char memory; MockMemoryManager mockMemoryManager; MockMemoryManager::getAllocationData(allocData, properties, &memory, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.allocateMemory); } typedef MemoryManagerGetAlloctionDataTest MemoryManagerGetAlloctionData32BitAnd64kbPagesAllowedTest; TEST_P(MemoryManagerGetAlloctionData32BitAnd64kbPagesAllowedTest, givenAllocationTypesWith32BitAnd64kbPagesAllowedWhenAllocationDataIsQueriedThenProperFlagsAreSet) { AllocationData allocData; auto allocType = GetParam(); AllocationProperties properties(0, true, 0, allocType, false); MockMemoryManager mockMemoryManager; mockMemoryManager.mockExecutionEnvironment->initGmm(); MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.allow32Bit); EXPECT_TRUE(allocData.flags.allow64kbPages); EXPECT_EQ(allocType, allocData.type); } TEST_P(MemoryManagerGetAlloctionData32BitAnd64kbPagesAllowedTest, given64kbAllowedAllocationTypeWhenAllocatingThenPreferRenderCompressionOnlyForSpecificTypes) { auto allocType = GetParam(); AllocationData allocData; AllocationProperties properties(0, true, 10, allocType, false); MockMemoryManager mockMemoryManager(true, false); mockMemoryManager.mockExecutionEnvironment->initGmm(); MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); bool bufferCompressedType = (allocType == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); EXPECT_TRUE(allocData.flags.allow64kbPages); auto allocation = mockMemoryManager.allocateGraphicsMemory(allocData); EXPECT_TRUE(mockMemoryManager.allocation64kbPageCreated); EXPECT_EQ(mockMemoryManager.preferRenderCompressedFlagPassed, bufferCompressedType); mockMemoryManager.freeGraphicsMemory(allocation); } typedef MemoryManagerGetAlloctionDataTest MemoryManagerGetAlloctionData32BitAnd64kbPagesNotAllowedTest; TEST_P(MemoryManagerGetAlloctionData32BitAnd64kbPagesNotAllowedTest, givenAllocationTypesWith32BitAnd64kbPagesDisallowedWhenAllocationDataIsQueriedThenFlagsAreNotSet) { AllocationData allocData; auto allocType = GetParam(); AllocationProperties properties(0, true, 0, allocType, false); MockMemoryManager mockMemoryManager; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.allow32Bit); EXPECT_FALSE(allocData.flags.allow64kbPages); EXPECT_EQ(allocType, allocData.type); } static const GraphicsAllocation::AllocationType allocationTypesWith32BitAnd64KbPagesAllowed[] = {GraphicsAllocation::AllocationType::BUFFER, GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, GraphicsAllocation::AllocationType::BUFFER_COMPRESSED, GraphicsAllocation::AllocationType::PIPE, GraphicsAllocation::AllocationType::SCRATCH_SURFACE, GraphicsAllocation::AllocationType::PRIVATE_SURFACE, GraphicsAllocation::AllocationType::PRINTF_SURFACE, GraphicsAllocation::AllocationType::CONSTANT_SURFACE, GraphicsAllocation::AllocationType::GLOBAL_SURFACE, GraphicsAllocation::AllocationType::WRITE_COMBINED}; INSTANTIATE_TEST_CASE_P(Allow32BitAnd64kbPagesTypes, MemoryManagerGetAlloctionData32BitAnd64kbPagesAllowedTest, ::testing::ValuesIn(allocationTypesWith32BitAnd64KbPagesAllowed)); static const GraphicsAllocation::AllocationType allocationTypesWith32BitAnd64KbPagesNotAllowed[] = {GraphicsAllocation::AllocationType::COMMAND_BUFFER, GraphicsAllocation::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, GraphicsAllocation::AllocationType::PROFILING_TAG_BUFFER, GraphicsAllocation::AllocationType::IMAGE, GraphicsAllocation::AllocationType::INSTRUCTION_HEAP, GraphicsAllocation::AllocationType::SHARED_RESOURCE_COPY}; INSTANTIATE_TEST_CASE_P(Disallow32BitAnd64kbPagesTypes, MemoryManagerGetAlloctionData32BitAnd64kbPagesNotAllowedTest, ::testing::ValuesIn(allocationTypesWith32BitAnd64KbPagesNotAllowed)); TEST(MemoryManagerTest, givenForced32BitSetWhenGraphicsMemoryFor32BitAllowedTypeIsAllocatedThen32BitAllocationIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); memoryManager.setForce32BitAllocations(true); AllocationData allocData; AllocationProperties properties(0, true, 10, GraphicsAllocation::AllocationType::BUFFER, false); MockMemoryManager::getAllocationData(allocData, properties, nullptr, memoryManager.createStorageInfoFromProperties(properties)); auto allocation = memoryManager.allocateGraphicsMemory(allocData); ASSERT_NE(nullptr, allocation); if (is64bit) { EXPECT_TRUE(allocation->is32BitAllocation()); EXPECT_EQ(MemoryPool::System4KBPagesWith32BitGpuAddressing, allocation->getMemoryPool()); } else { EXPECT_FALSE(allocation->is32BitAllocation()); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); } memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenEnabledShareableWhenGraphicsAllocationIsAllocatedThenAllocationIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.initGmm(); MockMemoryManager memoryManager(false, false, executionEnvironment); AllocationData allocData; AllocationProperties properties(0, true, 10, GraphicsAllocation::AllocationType::BUFFER, false); properties.flags.shareable = true; MockMemoryManager::getAllocationData(allocData, properties, nullptr, memoryManager.createStorageInfoFromProperties(properties)); EXPECT_EQ(allocData.flags.shareable, 1u); auto allocation = memoryManager.allocateGraphicsMemory(allocData); ASSERT_NE(nullptr, allocation); memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenEnabledShareableWhenGraphicsAllocationIsCalledAndSystemMemoryFailsThenNullAllocationIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.initGmm(); MockMemoryManager memoryManager(false, false, executionEnvironment); AllocationData allocData; AllocationProperties properties(0, true, 10, GraphicsAllocation::AllocationType::BUFFER, false); properties.flags.shareable = true; MockMemoryManager::getAllocationData(allocData, properties, nullptr, memoryManager.createStorageInfoFromProperties(properties)); EXPECT_EQ(allocData.flags.shareable, 1u); memoryManager.failAllocateSystemMemory = true; auto allocation = memoryManager.allocateGraphicsMemory(allocData); ASSERT_EQ(nullptr, allocation); memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenForced32BitEnabledWhenGraphicsMemoryWihtoutAllow32BitFlagIsAllocatedThenNon32BitAllocationIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(executionEnvironment); memoryManager.setForce32BitAllocations(true); AllocationData allocData; AllocationProperties properties(0, true, 10, GraphicsAllocation::AllocationType::BUFFER, false); MockMemoryManager::getAllocationData(allocData, properties, nullptr, memoryManager.createStorageInfoFromProperties(properties)); allocData.flags.allow32Bit = false; auto allocation = memoryManager.allocateGraphicsMemory(allocData); ASSERT_NE(nullptr, allocation); EXPECT_FALSE(allocation->is32BitAllocation()); memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenForced32BitDisabledWhenGraphicsMemoryWith32BitFlagFor32BitAllowedTypeIsAllocatedThenNon32BitAllocationIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(executionEnvironment); memoryManager.setForce32BitAllocations(false); AllocationData allocData; AllocationProperties properties(0, true, 10, GraphicsAllocation::AllocationType::BUFFER, false); MockMemoryManager::getAllocationData(allocData, properties, nullptr, memoryManager.createStorageInfoFromProperties(properties)); auto allocation = memoryManager.allocateGraphicsMemory(allocData); ASSERT_NE(nullptr, allocation); EXPECT_FALSE(allocation->is32BitAllocation()); memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenEnabled64kbPagesWhenGraphicsMemoryMustBeHostMemoryAndIsAllocatedWithNullptrForBufferThen64kbAllocationIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.initGmm(); MockMemoryManager memoryManager(true, false, executionEnvironment); AllocationData allocData; AllocationProperties properties(0, true, 10, GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, false); MockMemoryManager::getAllocationData(allocData, properties, nullptr, memoryManager.createStorageInfoFromProperties(properties)); auto allocation = memoryManager.allocateGraphicsMemory(allocData); ASSERT_NE(nullptr, allocation); EXPECT_EQ(0u, reinterpret_cast(allocation->getUnderlyingBuffer()) & MemoryConstants::page64kMask); EXPECT_EQ(0u, allocation->getGpuAddress() & MemoryConstants::page64kMask); EXPECT_EQ(0u, allocation->getUnderlyingBufferSize() & MemoryConstants::page64kMask); EXPECT_EQ(MemoryPool::System64KBPages, allocation->getMemoryPool()); memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenEnabled64kbPagesWhenGraphicsMemoryWithoutAllow64kbPagesFlagsIsAllocatedThenNon64kbAllocationIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(true, false, executionEnvironment); AllocationData allocData; AllocationProperties properties(0, true, 10, GraphicsAllocation::AllocationType::BUFFER, false); MockMemoryManager::getAllocationData(allocData, properties, nullptr, memoryManager.createStorageInfoFromProperties(properties)); allocData.flags.allow64kbPages = false; auto allocation = memoryManager.allocateGraphicsMemory(allocData); ASSERT_NE(nullptr, allocation); EXPECT_FALSE(memoryManager.allocation64kbPageCreated); EXPECT_TRUE(memoryManager.allocationCreated); memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenDisabled64kbPagesWhenGraphicsMemoryMustBeHostMemoryAndIsAllocatedWithNullptrForBufferThenNon64kbAllocationIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); AllocationData allocData; AllocationProperties properties(0, true, 10, GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, false); MockMemoryManager::getAllocationData(allocData, properties, nullptr, memoryManager.createStorageInfoFromProperties(properties)); auto allocation = memoryManager.allocateGraphicsMemory(allocData); ASSERT_NE(nullptr, allocation); EXPECT_FALSE(memoryManager.allocation64kbPageCreated); EXPECT_TRUE(memoryManager.allocationCreated); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenForced32BitAndEnabled64kbPagesWhenGraphicsMemoryMustBeHostMemoryAndIsAllocatedWithNullptrForBufferThen32BitAllocationOver64kbIsChosen) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); memoryManager.setForce32BitAllocations(true); AllocationData allocData; AllocationProperties properties(0, true, 10, GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, false); MockMemoryManager::getAllocationData(allocData, properties, nullptr, memoryManager.createStorageInfoFromProperties(properties)); auto allocation = memoryManager.allocateGraphicsMemory(allocData); ASSERT_NE(nullptr, allocation); if (is64bit) { EXPECT_TRUE(allocation->is32BitAllocation()); } else { EXPECT_FALSE(allocation->is32BitAllocation()); } memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenEnabled64kbPagesWhenGraphicsMemoryIsAllocatedWithHostPtrForBufferThenExistingMemoryIsUsedForAllocation) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(true, false, executionEnvironment); AllocationData allocData; AllocationProperties properties(0, false, 1, GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, false); char memory[1]; MockMemoryManager::getAllocationData(allocData, properties, &memory, memoryManager.createStorageInfoFromProperties(properties)); auto allocation = memoryManager.allocateGraphicsMemory(allocData); ASSERT_NE(nullptr, allocation); EXPECT_EQ(1u, allocation->fragmentsStorage.fragmentCount); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenMemoryManagerWhenGraphicsMemoryAllocationInDevicePoolFailsThenFallbackAllocationIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, true, executionEnvironment); memoryManager.failInDevicePool = true; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties({0, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}); ASSERT_NE(nullptr, allocation); EXPECT_TRUE(memoryManager.allocationCreated); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenMemoryManagerWhenBufferTypeIsPassedThenAllocateGraphicsMemoryInPreferredPoolCanAllocateInDevicePool) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, true, executionEnvironment); auto allocation = memoryManager.allocateGraphicsMemoryWithProperties({0, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}); EXPECT_NE(nullptr, allocation); memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenMemoryManagerWhenBufferTypeIsPassedAndAllocateInDevicePoolFailsWithErrorThenAllocateGraphicsMemoryInPreferredPoolReturnsNullptr) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, true, executionEnvironment); memoryManager.failInDevicePoolWithError = true; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties({0, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}); ASSERT_EQ(nullptr, allocation); EXPECT_FALSE(memoryManager.allocationInDevicePoolCreated); memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenSvmAllocationTypeWhenGetAllocationDataIsCalledThenAllocatingMemoryIsRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{0, 1, GraphicsAllocation::AllocationType::SVM_ZERO_COPY}; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.allocateMemory); } TEST(MemoryManagerTest, givenSvmAllocationTypeWhenGetAllocationDataIsCalledThen64kbPagesAreAllowedAnd32BitAllocationIsDisallowed) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{0, 1, GraphicsAllocation::AllocationType::SVM_ZERO_COPY}; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.allow64kbPages); EXPECT_FALSE(allocData.flags.allow32Bit); } TEST(MemoryManagerTest, givenTagBufferTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{0, 1, GraphicsAllocation::AllocationType::TAG_BUFFER}; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenGlobalFenceTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{0, 1, GraphicsAllocation::AllocationType::GLOBAL_FENCE}; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenPreemptionTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{0, 1, GraphicsAllocation::AllocationType::PREEMPTION}; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenSharedContextImageTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{0, 1, GraphicsAllocation::AllocationType::SHARED_CONTEXT_IMAGE}; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenMCSTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{0, 1, GraphicsAllocation::AllocationType::MCS}; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenPipeTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsNotRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{0, 1, GraphicsAllocation::AllocationType::PIPE}; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenGlobalSurfaceTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsNotRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{0, 1, GraphicsAllocation::AllocationType::GLOBAL_SURFACE}; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenWriteCombinedTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsNotRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{0, 1, GraphicsAllocation::AllocationType::WRITE_COMBINED}; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenDeviceQueueBufferTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{0, 1, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER}; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenInternalHostMemoryTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{0, 1, GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY}; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenFillPatternTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{0, 1, GraphicsAllocation::AllocationType::FILL_PATTERN}; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenLinearStreamTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsNotRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{0, 1, GraphicsAllocation::AllocationType::LINEAR_STREAM}; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); EXPECT_TRUE(allocData.flags.requiresCpuAccess); } TEST(MemoryManagerTest, givenTimestampPacketTagBufferTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsNotRequestedAndRequireCpuAccess) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{0, 1, GraphicsAllocation::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); EXPECT_TRUE(allocData.flags.requiresCpuAccess); } TEST(MemoryManagerTest, givenProfilingTagBufferTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{0, 1, GraphicsAllocation::AllocationType::PROFILING_TAG_BUFFER}; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); EXPECT_FALSE(allocData.flags.requiresCpuAccess); } TEST(MemoryManagerTest, givenAllocationPropertiesWithMultiOsContextCapableFlagEnabledWhenAllocateMemoryThenAllocationDataIsMultiOsContextCapable) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); AllocationProperties properties{0, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}; properties.flags.multiOsContextCapable = true; AllocationData allocData; MockMemoryManager::getAllocationData(allocData, properties, nullptr, memoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.multiOsContextCapable); } TEST(MemoryManagerTest, givenAllocationPropertiesWithMultiOsContextCapableFlagDisabledWhenAllocateMemoryThenAllocationDataIsNotMultiOsContextCapable) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); AllocationProperties properties{0, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}; properties.flags.multiOsContextCapable = false; AllocationData allocData; MockMemoryManager::getAllocationData(allocData, properties, nullptr, memoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.multiOsContextCapable); } TEST(MemoryManagerTest, givenConstantSurfaceTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsNotRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{0, 1, GraphicsAllocation::AllocationType::CONSTANT_SURFACE}; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenInternalHeapTypeThenUseInternal32BitAllocator) { EXPECT_TRUE(MockMemoryManager::useInternal32BitAllocator(GraphicsAllocation::AllocationType::INTERNAL_HEAP)); } TEST(MemoryManagerTest, givenInternalHeapTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsNotRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{0, 1, GraphicsAllocation::AllocationType::INTERNAL_HEAP}; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); EXPECT_TRUE(allocData.flags.requiresCpuAccess); } TEST(MemoryManagerTest, givenKernelIsaTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsNotRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{0, 1, GraphicsAllocation::AllocationType::KERNEL_ISA}; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenLinearStreamWhenGetAllocationDataIsCalledThenSystemMemoryIsNotRequested) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{0, 1, GraphicsAllocation::AllocationType::LINEAR_STREAM}; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); EXPECT_TRUE(allocData.flags.requiresCpuAccess); } TEST(MemoryManagerTest, givenPrintfAllocationWhenGetAllocationDataIsCalledThenDontUseSystemMemoryAndRequireCpuAccess) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{0, 1, GraphicsAllocation::AllocationType::PRINTF_SURFACE}; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); EXPECT_TRUE(allocData.flags.requiresCpuAccess); } TEST(MemoryManagerTest, givenKernelIsaTypeThenUseInternal32BitAllocator) { EXPECT_TRUE(MockMemoryManager::useInternal32BitAllocator(GraphicsAllocation::AllocationType::KERNEL_ISA)); } TEST(MemoryManagerTest, givenExternalHostMemoryWhenGetAllocationDataIsCalledThenItHasProperFieldsSet) { AllocationData allocData; auto hostPtr = reinterpret_cast(0x1234); MockMemoryManager mockMemoryManager; AllocationProperties properties{0, false, 1, GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR, false}; MockMemoryManager::getAllocationData(allocData, properties, hostPtr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); EXPECT_FALSE(allocData.flags.allocateMemory); EXPECT_FALSE(allocData.flags.allow32Bit); EXPECT_FALSE(allocData.flags.allow64kbPages); EXPECT_EQ(allocData.hostPtr, hostPtr); } TEST(MemoryManagerTest, getAllocationDataProperlyHandlesRootDeviceIndexFromAllcationProperties) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{0, 1, GraphicsAllocation::AllocationType::BUFFER}; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_EQ(allocData.rootDeviceIndex, 0u); AllocationProperties properties2{100, 1, GraphicsAllocation::AllocationType::BUFFER}; MockMemoryManager::getAllocationData(allocData, properties2, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_EQ(allocData.rootDeviceIndex, properties2.rootDeviceIndex); } TEST(MemoryManagerTest, givenMapAllocationWhenGetAllocationDataIsCalledThenItHasProperFieldsSet) { AllocationData allocData; auto hostPtr = reinterpret_cast(0x1234); MockMemoryManager mockMemoryManager; AllocationProperties properties{0, false, 1, GraphicsAllocation::AllocationType::MAP_ALLOCATION, false}; MockMemoryManager::getAllocationData(allocData, properties, hostPtr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.useSystemMemory); EXPECT_FALSE(allocData.flags.allocateMemory); EXPECT_FALSE(allocData.flags.allow32Bit); EXPECT_FALSE(allocData.flags.allow64kbPages); EXPECT_EQ(allocData.hostPtr, hostPtr); } TEST(MemoryManagerTest, givenRingBufferAllocationWhenGetAllocationDataIsCalledThenItHasProperFieldsSet) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{0, true, 0x10000u, GraphicsAllocation::AllocationType::RING_BUFFER, false}; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); EXPECT_TRUE(allocData.flags.allocateMemory); EXPECT_FALSE(allocData.flags.allow32Bit); EXPECT_FALSE(allocData.flags.allow64kbPages); EXPECT_EQ(0x10000u, allocData.size); EXPECT_EQ(nullptr, allocData.hostPtr); EXPECT_TRUE(allocData.flags.requiresCpuAccess); } TEST(MemoryManagerTest, givenSemaphoreBufferAllocationWhenGetAllocationDataIsCalledThenItHasProperFieldsSet) { AllocationData allocData; MockMemoryManager mockMemoryManager; AllocationProperties properties{0, true, 0x1000u, GraphicsAllocation::AllocationType::SEMAPHORE_BUFFER, false}; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.useSystemMemory); EXPECT_TRUE(allocData.flags.allocateMemory); EXPECT_FALSE(allocData.flags.allow32Bit); EXPECT_FALSE(allocData.flags.allow64kbPages); EXPECT_EQ(0x1000u, allocData.size); EXPECT_EQ(nullptr, allocData.hostPtr); EXPECT_TRUE(allocData.flags.requiresCpuAccess); } TEST(MemoryManagerTest, givenDirectBufferPlacementSetWhenDefaultIsUsedThenExpectNoFlagsChanged) { AllocationData allocationData; AllocationProperties properties(0, 0x1000, GraphicsAllocation::AllocationType::RING_BUFFER); MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(0u, allocationData.flags.requiresCpuAccess); EXPECT_EQ(0u, allocationData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenDirectBufferPlacementSetWhenOverrideToNonSystemThenExpectNonSystemFlags) { DebugManagerStateRestore restorer; DebugManager.flags.DirectSubmissionBufferPlacement.set(0); AllocationData allocationData; AllocationProperties properties(0, 0x1000, GraphicsAllocation::AllocationType::RING_BUFFER); MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(1u, allocationData.flags.requiresCpuAccess); EXPECT_EQ(0u, allocationData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenDirectBufferPlacementSetWhenOverrideToSystemThenExpectNonFlags) { DebugManagerStateRestore restorer; DebugManager.flags.DirectSubmissionBufferPlacement.set(1); AllocationData allocationData; AllocationProperties properties(0, 0x1000, GraphicsAllocation::AllocationType::RING_BUFFER); MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(0u, allocationData.flags.requiresCpuAccess); EXPECT_EQ(1u, allocationData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenDirectSemaphorePlacementSetWhenDefaultIsUsedThenExpectNoFlagsChanged) { AllocationData allocationData; AllocationProperties properties(0, 0x1000, GraphicsAllocation::AllocationType::SEMAPHORE_BUFFER); MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(0u, allocationData.flags.requiresCpuAccess); EXPECT_EQ(0u, allocationData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenDirectSemaphorePlacementSetWhenOverrideToNonSystemThenExpectNonSystemFlags) { DebugManagerStateRestore restorer; DebugManager.flags.DirectSubmissionSemaphorePlacement.set(0); AllocationData allocationData; AllocationProperties properties(0, 0x1000, GraphicsAllocation::AllocationType::SEMAPHORE_BUFFER); MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(1u, allocationData.flags.requiresCpuAccess); EXPECT_EQ(0u, allocationData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenDirectSemaphorePlacementSetWhenOverrideToSystemThenExpectNonFlags) { DebugManagerStateRestore restorer; DebugManager.flags.DirectSubmissionSemaphorePlacement.set(1); AllocationData allocationData; AllocationProperties properties(0, 0x1000, GraphicsAllocation::AllocationType::SEMAPHORE_BUFFER); MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(0u, allocationData.flags.requiresCpuAccess); EXPECT_EQ(1u, allocationData.flags.useSystemMemory); } TEST(MemoryManagerTest, givenDirectBufferAddressingWhenOverrideToNo48BitThenExpect48BitFlagFalse) { DebugManagerStateRestore restorer; DebugManager.flags.DirectSubmissionBufferAddressing.set(0); AllocationData allocationData; AllocationProperties properties(0, 0x1000, GraphicsAllocation::AllocationType::RING_BUFFER); allocationData.flags.resource48Bit = 1; MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(0u, allocationData.flags.resource48Bit); } TEST(MemoryManagerTest, givenDirectBufferAddressingWhenOverrideTo48BitThenExpect48BitFlagTrue) { DebugManagerStateRestore restorer; DebugManager.flags.DirectSubmissionBufferAddressing.set(1); AllocationData allocationData; AllocationProperties properties(0, 0x1000, GraphicsAllocation::AllocationType::RING_BUFFER); allocationData.flags.resource48Bit = 0; MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(1u, allocationData.flags.resource48Bit); } TEST(MemoryManagerTest, givenDirectBufferAddressingDefaultWhenNoOverrideThenExpect48BitFlagSame) { AllocationData allocationData; AllocationProperties properties(0, 0x1000, GraphicsAllocation::AllocationType::RING_BUFFER); allocationData.flags.resource48Bit = 0; MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(0u, allocationData.flags.resource48Bit); allocationData.flags.resource48Bit = 1; MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(1u, allocationData.flags.resource48Bit); } TEST(MemoryManagerTest, givenDirectSemaphoreAddressingWhenOverrideToNo48BitThenExpect48BitFlagFalse) { DebugManagerStateRestore restorer; DebugManager.flags.DirectSubmissionSemaphoreAddressing.set(0); AllocationData allocationData; AllocationProperties properties(0, 0x1000, GraphicsAllocation::AllocationType::SEMAPHORE_BUFFER); allocationData.flags.resource48Bit = 1; MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(0u, allocationData.flags.resource48Bit); } TEST(MemoryManagerTest, givenDirectSemaphoreAddressingWhenOverrideTo48BitThenExpect48BitFlagTrue) { DebugManagerStateRestore restorer; DebugManager.flags.DirectSubmissionSemaphoreAddressing.set(1); AllocationData allocationData; AllocationProperties properties(0, 0x1000, GraphicsAllocation::AllocationType::SEMAPHORE_BUFFER); allocationData.flags.resource48Bit = 0; MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(1u, allocationData.flags.resource48Bit); } TEST(MemoryManagerTest, givenDirectSemaphoreAddressingDefaultWhenNoOverrideThenExpect48BitFlagSame) { AllocationData allocationData; AllocationProperties properties(0, 0x1000, GraphicsAllocation::AllocationType::SEMAPHORE_BUFFER); allocationData.flags.resource48Bit = 0; MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(0u, allocationData.flags.resource48Bit); allocationData.flags.resource48Bit = 1; MockMemoryManager::overrideAllocationData(allocationData, properties); EXPECT_EQ(1u, allocationData.flags.resource48Bit); } using MemoryManagerGetAlloctionDataHaveToBeForcedTo48BitTest = testing::TestWithParam>; TEST_P(MemoryManagerGetAlloctionDataHaveToBeForcedTo48BitTest, givenAllocationTypesHaveToBeForcedTo48BitThenAllocationDataResource48BitIsSet) { GraphicsAllocation::AllocationType allocationType; bool propertiesFlag48Bit; std::tie(allocationType, propertiesFlag48Bit) = GetParam(); AllocationProperties properties(0, true, 0, allocationType, false); properties.flags.resource48Bit = propertiesFlag48Bit; AllocationData allocationData; MockMemoryManager mockMemoryManager; MockMemoryManager::getAllocationData(allocationData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocationData.flags.resource48Bit); } using MemoryManagerGetAlloctionDataHaveNotToBeForcedTo48BitTest = testing::TestWithParam>; TEST_P(MemoryManagerGetAlloctionDataHaveNotToBeForcedTo48BitTest, givenAllocationTypesHaveNotToBeForcedTo48BitThenAllocationDataResource48BitIsSetProperly) { GraphicsAllocation::AllocationType allocationType; bool propertiesFlag48Bit; std::tie(allocationType, propertiesFlag48Bit) = GetParam(); AllocationProperties properties(0, true, 0, allocationType, false); properties.flags.resource48Bit = propertiesFlag48Bit; AllocationData allocationData; MockMemoryManager mockMemoryManager; MockMemoryManager::getAllocationData(allocationData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_EQ(allocationData.flags.resource48Bit, propertiesFlag48Bit); } static const GraphicsAllocation::AllocationType allocationHaveToBeForcedTo48Bit[] = { GraphicsAllocation::AllocationType::COMMAND_BUFFER, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER, GraphicsAllocation::AllocationType::IMAGE, GraphicsAllocation::AllocationType::INDIRECT_OBJECT_HEAP, GraphicsAllocation::AllocationType::INSTRUCTION_HEAP, GraphicsAllocation::AllocationType::INTERNAL_HEAP, GraphicsAllocation::AllocationType::KERNEL_ISA, GraphicsAllocation::AllocationType::LINEAR_STREAM, GraphicsAllocation::AllocationType::MCS, GraphicsAllocation::AllocationType::SCRATCH_SURFACE, GraphicsAllocation::AllocationType::SHARED_CONTEXT_IMAGE, GraphicsAllocation::AllocationType::SHARED_IMAGE, GraphicsAllocation::AllocationType::SHARED_RESOURCE_COPY, GraphicsAllocation::AllocationType::SURFACE_STATE_HEAP, GraphicsAllocation::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, }; static const GraphicsAllocation::AllocationType allocationHaveNotToBeForcedTo48Bit[] = { GraphicsAllocation::AllocationType::BUFFER, GraphicsAllocation::AllocationType::BUFFER_COMPRESSED, GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, GraphicsAllocation::AllocationType::CONSTANT_SURFACE, GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR, GraphicsAllocation::AllocationType::FILL_PATTERN, GraphicsAllocation::AllocationType::GLOBAL_SURFACE, GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY, GraphicsAllocation::AllocationType::MAP_ALLOCATION, GraphicsAllocation::AllocationType::PIPE, GraphicsAllocation::AllocationType::PREEMPTION, GraphicsAllocation::AllocationType::PRINTF_SURFACE, GraphicsAllocation::AllocationType::PRIVATE_SURFACE, GraphicsAllocation::AllocationType::PROFILING_TAG_BUFFER, GraphicsAllocation::AllocationType::SHARED_BUFFER, GraphicsAllocation::AllocationType::SVM_CPU, GraphicsAllocation::AllocationType::SVM_GPU, GraphicsAllocation::AllocationType::SVM_ZERO_COPY, GraphicsAllocation::AllocationType::TAG_BUFFER, GraphicsAllocation::AllocationType::GLOBAL_FENCE, GraphicsAllocation::AllocationType::WRITE_COMBINED, GraphicsAllocation::AllocationType::RING_BUFFER, GraphicsAllocation::AllocationType::SEMAPHORE_BUFFER, }; INSTANTIATE_TEST_CASE_P(ForceTo48Bit, MemoryManagerGetAlloctionDataHaveToBeForcedTo48BitTest, ::testing::Combine( ::testing::ValuesIn(allocationHaveToBeForcedTo48Bit), ::testing::Bool())); INSTANTIATE_TEST_CASE_P(NotForceTo48Bit, MemoryManagerGetAlloctionDataHaveNotToBeForcedTo48BitTest, ::testing::Combine( ::testing::ValuesIn(allocationHaveNotToBeForcedTo48Bit), ::testing::Bool())); memory_manager_multi_device_tests.cpp000066400000000000000000000057141363734646600342450ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/memory_manager/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/memory_constants.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/test/unit_test/fixtures/memory_allocator_multi_device_fixture.h" #include "test.h" using namespace NEO; using MemoryManagerMultiDeviceTest = MemoryAllocatorMultiDeviceFixture<10>; TEST_P(MemoryManagerMultiDeviceTest, givenRootDeviceIndexSpecifiedWhenAllocateGraphicsMemoryIsCalledThenGraphicsAllocationHasTheSameRootDeviceIndex) { std::vector allocationTypes{GraphicsAllocation::AllocationType::BUFFER, GraphicsAllocation::AllocationType::KERNEL_ISA}; for (auto allocationType : allocationTypes) { for (uint32_t rootDeviceIndex = 0; rootDeviceIndex < getNumRootDevices(); ++rootDeviceIndex) { AllocationProperties properties{rootDeviceIndex, true, MemoryConstants::pageSize, allocationType, false, false, 0}; auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties); ASSERT_NE(gfxAllocation, nullptr); EXPECT_EQ(rootDeviceIndex, gfxAllocation->getRootDeviceIndex()); memoryManager->freeGraphicsMemory(gfxAllocation); gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, (void *)0x1234); ASSERT_NE(gfxAllocation, nullptr); EXPECT_EQ(rootDeviceIndex, gfxAllocation->getRootDeviceIndex()); memoryManager->freeGraphicsMemory(gfxAllocation); gfxAllocation = memoryManager->allocateGraphicsMemoryInPreferredPool(properties, nullptr); ASSERT_NE(gfxAllocation, nullptr); EXPECT_EQ(rootDeviceIndex, gfxAllocation->getRootDeviceIndex()); memoryManager->freeGraphicsMemory(gfxAllocation); gfxAllocation = memoryManager->allocateGraphicsMemoryInPreferredPool(properties, (void *)0x1234); ASSERT_NE(gfxAllocation, nullptr); EXPECT_EQ(rootDeviceIndex, gfxAllocation->getRootDeviceIndex()); memoryManager->freeGraphicsMemory(gfxAllocation); gfxAllocation = memoryManager->createGraphicsAllocationFromSharedHandle((osHandle)0u, properties, false); ASSERT_NE(gfxAllocation, nullptr); EXPECT_EQ(rootDeviceIndex, gfxAllocation->getRootDeviceIndex()); memoryManager->freeGraphicsMemory(gfxAllocation); gfxAllocation = memoryManager->createGraphicsAllocationFromSharedHandle((osHandle)0u, properties, true); ASSERT_NE(gfxAllocation, nullptr); EXPECT_EQ(rootDeviceIndex, gfxAllocation->getRootDeviceIndex()); memoryManager->freeGraphicsMemory(gfxAllocation); } } } INSTANTIATE_TEST_CASE_P(MemoryManagerType, MemoryManagerMultiDeviceTest, ::testing::Bool()); compute-runtime-20.13.16352/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp000066400000000000000000003450751363734646600316020ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/preemption.h" #include "shared/source/gmm_helper/page_table_mngr.h" #include "shared/source/helpers/cache_policy.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_constants.h" #include "shared/source/memory_manager/residency.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/program/program_initialization.h" #include "shared/test/unit_test/compiler_interface/linker_mock.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/event/event.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/mem_obj/mem_obj_helper.h" #include "opencl/source/platform/platform.h" #include "opencl/source/program/printf_handler.h" #include "opencl/source/program/program.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/memory_allocator_fixture.h" #include "opencl/test/unit_test/fixtures/memory_manager_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/helpers/execution_environment_helper.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_deferrable_deletion.h" #include "opencl/test/unit_test/mocks/mock_deferred_deleter.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_gmm.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_os_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "test.h" #include #include using namespace NEO; typedef Test MemoryAllocatorTest; TEST(MemoryManagerTest, whenCreatingOsAgnosticMemoryManagerThenSupportsMultiStorageResourcesFlagIsSetToTrue) { MockMemoryManager memoryManager; EXPECT_TRUE(memoryManager.supportsMultiStorageResources); } TEST(MemoryManagerTest, whenCreatingAllocPropertiesForMultiStorageResourceThenMultiStorageResourcesFlagIsSetToTrue) { AllocationProperties properties{0, false, 0u, GraphicsAllocation::AllocationType::SCRATCH_SURFACE, false, true, 0}; EXPECT_TRUE(properties.multiStorageResource); } TEST(MemoryBank, givenDifferentDeviceOrdinalsWhenGettingBankThenCorrectBanksAreReturned) { auto bank = MemoryBanks::getBank(0); EXPECT_EQ(MemoryBanks::MainBank, bank); bank = MemoryBanks::getBank(1); EXPECT_EQ(MemoryBanks::MainBank, bank); bank = MemoryBanks::getBank(100); EXPECT_EQ(MemoryBanks::MainBank, bank); } TEST(GraphicsAllocationTest, defaultTypeTraits) { EXPECT_FALSE(std::is_copy_constructible::value); EXPECT_FALSE(std::is_copy_assignable::value); } TEST(GraphicsAllocationTest, Ctor) { void *cpuPtr = (void *)0x30000; size_t size = 0x1000; MockGraphicsAllocation gfxAllocation(cpuPtr, size); uint64_t expectedGpuAddr = static_cast(reinterpret_cast(gfxAllocation.getUnderlyingBuffer())); EXPECT_EQ(expectedGpuAddr, gfxAllocation.getGpuAddress()); EXPECT_EQ(0u, gfxAllocation.getGpuBaseAddress()); } TEST(GraphicsAllocationTest, Ctor2) { void *cpuPtr = (void *)0x30000; size_t size = 0x1000; osHandle sharedHandle = Sharing::nonSharedResource; GraphicsAllocation gfxAllocation(0, GraphicsAllocation::AllocationType::UNKNOWN, cpuPtr, size, sharedHandle, MemoryPool::MemoryNull); uint64_t expectedGpuAddr = static_cast(reinterpret_cast(gfxAllocation.getUnderlyingBuffer())); EXPECT_EQ(expectedGpuAddr, gfxAllocation.getGpuAddress()); EXPECT_EQ(0u, gfxAllocation.getGpuBaseAddress()); EXPECT_EQ(sharedHandle, gfxAllocation.peekSharedHandle()); } TEST(GraphicsAllocationTest, getGpuAddress) { void *cpuPtr = (void *)0x30000; uint64_t gpuAddr = 0x30000; uint64_t gpuBaseAddr = 0x10000; size_t size = 0x1000; GraphicsAllocation gfxAllocation(0, GraphicsAllocation::AllocationType::UNKNOWN, cpuPtr, gpuAddr, gpuBaseAddr, size, MemoryPool::MemoryNull); EXPECT_EQ(gpuAddr, gfxAllocation.getGpuAddress()); cpuPtr = (void *)65535; gpuAddr = 1ULL; gfxAllocation.setCpuPtrAndGpuAddress(cpuPtr, gpuAddr); EXPECT_EQ(gpuAddr, gfxAllocation.getGpuAddress()); EXPECT_EQ(cpuPtr, gfxAllocation.getUnderlyingBuffer()); } TEST(GraphicsAllocationTest, getGpuAddressToPatch) { void *cpuPtr = (void *)0x30000; uint64_t gpuAddr = 0x30000; uint64_t gpuBaseAddr = 0x10000; size_t size = 0x1000; GraphicsAllocation gfxAllocation(0, GraphicsAllocation::AllocationType::UNKNOWN, cpuPtr, gpuAddr, gpuBaseAddr, size, MemoryPool::MemoryNull); EXPECT_EQ(gpuAddr - gpuBaseAddr, gfxAllocation.getGpuAddressToPatch()); } TEST(GraphicsAllocationTest, setSize) { void *cpuPtr = (void *)0x30000; uint64_t gpuAddr = 0x30000; uint64_t gpuBaseAddr = 0x10000; size_t size = 0x2000; GraphicsAllocation gfxAllocation(0, GraphicsAllocation::AllocationType::UNKNOWN, cpuPtr, gpuAddr, gpuBaseAddr, size, MemoryPool::MemoryNull); EXPECT_EQ(size, gfxAllocation.getUnderlyingBufferSize()); size = 0x3000; gfxAllocation.setSize(size); EXPECT_EQ(size, gfxAllocation.getUnderlyingBufferSize()); } TEST_F(MemoryAllocatorTest, allocateSystem) { auto ptr = memoryManager->allocateSystemMemory(sizeof(char), 0); EXPECT_NE(nullptr, ptr); memoryManager->freeSystemMemory(ptr); } TEST_F(MemoryAllocatorTest, GivenGraphicsAllocationWhenAddAndRemoveAllocationToHostPtrManagerThenfragmentHasCorrectValues) { void *cpuPtr = (void *)0x30000; size_t size = 0x1000; MockGraphicsAllocation gfxAllocation(cpuPtr, size); memoryManager->addAllocationToHostPtrManager(&gfxAllocation); auto fragment = memoryManager->getHostPtrManager()->getFragment(gfxAllocation.getUnderlyingBuffer()); EXPECT_NE(fragment, nullptr); EXPECT_TRUE(fragment->driverAllocation); EXPECT_EQ(fragment->refCount, 1); EXPECT_EQ(fragment->fragmentCpuPointer, cpuPtr); EXPECT_EQ(fragment->fragmentSize, size); EXPECT_NE(fragment->osInternalStorage, nullptr); EXPECT_NE(fragment->residency, nullptr); FragmentStorage fragmentStorage = {}; fragmentStorage.fragmentCpuPointer = cpuPtr; memoryManager->getHostPtrManager()->storeFragment(fragmentStorage); fragment = memoryManager->getHostPtrManager()->getFragment(gfxAllocation.getUnderlyingBuffer()); EXPECT_EQ(fragment->refCount, 2); fragment->driverAllocation = false; memoryManager->removeAllocationFromHostPtrManager(&gfxAllocation); fragment = memoryManager->getHostPtrManager()->getFragment(gfxAllocation.getUnderlyingBuffer()); EXPECT_EQ(fragment->refCount, 2); fragment->driverAllocation = true; memoryManager->removeAllocationFromHostPtrManager(&gfxAllocation); fragment = memoryManager->getHostPtrManager()->getFragment(gfxAllocation.getUnderlyingBuffer()); EXPECT_EQ(fragment->refCount, 1); memoryManager->removeAllocationFromHostPtrManager(&gfxAllocation); fragment = memoryManager->getHostPtrManager()->getFragment(gfxAllocation.getUnderlyingBuffer()); EXPECT_EQ(fragment, nullptr); } TEST_F(MemoryAllocatorTest, allocateSystemAligned) { unsigned int alignment = 0x100; auto ptr = memoryManager->allocateSystemMemory(sizeof(char), alignment); EXPECT_NE(nullptr, ptr); EXPECT_EQ(0u, reinterpret_cast(ptr) & (alignment - 1)); memoryManager->freeSystemMemory(ptr); } TEST_F(MemoryAllocatorTest, allocateGraphics) { unsigned int alignment = 4096; memoryManager->createAndRegisterOsContext(csr, HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], 1, PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo), false, false, false); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, allocation); // initial taskCount must be -1. if not, we may kill allocation before it will be used EXPECT_EQ((uint32_t)-1, allocation->getTaskCount(csr->getOsContext().getContextId())); // We know we want graphics memory to be page aligned EXPECT_EQ(0u, reinterpret_cast(allocation->getUnderlyingBuffer()) & (alignment - 1)); EXPECT_EQ(Sharing::nonSharedResource, allocation->peekSharedHandle()); // Gpu address equal to cpu address if (defaultHwInfo->capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress) { EXPECT_EQ(reinterpret_cast(allocation->getUnderlyingBuffer()), allocation->getGpuAddress()); } memoryManager->freeGraphicsMemory(allocation); } TEST_F(MemoryAllocatorTest, allocateGraphicsPageAligned) { unsigned int alignment = 4096; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); EXPECT_NE(nullptr, allocation); EXPECT_EQ(0u, reinterpret_cast(allocation->getUnderlyingBuffer()) & (alignment - 1)); memoryManager->freeGraphicsMemory(allocation); } TEST_F(MemoryAllocatorTest, AlignedHostPtrWithAlignedSizeWhenAskedForGraphicsAllocationReturnsNullStorageFromHostPtrManager) { auto ptr = (void *)0x1000; MockMemoryManager mockMemoryManager(*executionEnvironment); auto hostPtrManager = static_cast(mockMemoryManager.getHostPtrManager()); auto graphicsAllocation = mockMemoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), false, 4096}, ptr); EXPECT_NE(nullptr, graphicsAllocation); EXPECT_EQ(1u, hostPtrManager->getFragmentCount()); auto fragmentData = hostPtrManager->getFragment(ptr); ASSERT_NE(nullptr, fragmentData); EXPECT_NE(nullptr, fragmentData->osInternalStorage); mockMemoryManager.freeGraphicsMemory(graphicsAllocation); } TEST_F(MemoryAllocatorTest, GivenAlignedHostPtrAndCacheAlignedSizeWhenAskedForL3AllowanceThenTrueIsReturned) { auto ptr = (void *)0x1000; auto alignedSize = MemoryConstants::cacheLineSize; auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), false, alignedSize}, ptr); EXPECT_TRUE(isL3Capable(*graphicsAllocation)); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(MemoryAllocatorTest, GivenAlignedHostPtrAndNotCacheAlignedSizeWhenAskedForL3AllowanceThenFalseIsReturned) { auto ptr = (void *)0x1000; auto alignedSize = MemoryConstants::cacheLineSize - 1; auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), false, alignedSize}, ptr); EXPECT_FALSE(isL3Capable(*graphicsAllocation)); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(MemoryAllocatorTest, GivenMisAlignedHostPtrAndNotCacheAlignedSizeWhenAskedForL3AllowanceThenFalseIsReturned) { auto ptr = (void *)0x1001; auto alignedSize = MemoryConstants::cacheLineSize - 1; auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), false, alignedSize}, ptr); EXPECT_FALSE(isL3Capable(*graphicsAllocation)); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(MemoryAllocatorTest, GivenHostPtrAlignedToCacheLineWhenAskedForL3AllowanceThenTrueIsReturned) { auto ptr = (void *)0x1040; auto alignedSize = MemoryConstants::cacheLineSize; auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), false, alignedSize}, ptr); EXPECT_TRUE(isL3Capable(*graphicsAllocation)); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(MemoryAllocatorTest, NullOsHandleStorageAskedForPopulationReturnsFilledPointer) { OsHandleStorage storage; storage.fragmentStorageData[0].cpuPtr = (void *)0x1000; memoryManager->populateOsHandles(storage, 0); EXPECT_NE(nullptr, storage.fragmentStorageData[0].osHandleStorage); EXPECT_EQ(nullptr, storage.fragmentStorageData[1].osHandleStorage); EXPECT_EQ(nullptr, storage.fragmentStorageData[2].osHandleStorage); memoryManager->getHostPtrManager()->releaseHandleStorage(storage); memoryManager->cleanOsHandles(storage, 0); } TEST_F(MemoryAllocatorTest, givenOsHandleStorageWhenOsHandlesAreCleanedAndAubManagerIsNotAvailableThenFreeMemoryIsNotCalledOnAubManager) { MockExecutionEnvironment mockExecutionEnvironment(defaultHwInfo.get()); MockMemoryManager mockMemoryManager(mockExecutionEnvironment); auto mockAubCenter = new MockAubCenter(defaultHwInfo.get(), false, "aubfile", CommandStreamReceiverType::CSR_AUB); mockAubCenter->aubManager.reset(nullptr); mockExecutionEnvironment.rootDeviceEnvironments[0]->aubCenter.reset(mockAubCenter); OsHandleStorage storage; storage.fragmentStorageData[0].cpuPtr = (void *)0x1000; mockMemoryManager.populateOsHandles(storage, 0); mockMemoryManager.getHostPtrManager()->releaseHandleStorage(storage); mockMemoryManager.cleanOsHandles(storage, 0); EXPECT_EQ(nullptr, mockAubCenter->aubManager); } TEST_F(MemoryAllocatorTest, givenOsHandleStorageAndFreeMemoryEnabledWhenOsHandlesAreCleanedAndAubManagerIsAvailableThenFreeMemoryIsCalledOnAubManager) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableFreeMemory.set(true); const uint32_t rootDeviceIndex = 1u; MockExecutionEnvironment mockExecutionEnvironment(defaultHwInfo.get(), true, 3); MockMemoryManager mockMemoryManager(mockExecutionEnvironment); auto mockManager0 = new MockAubManager(); auto mockAubCenter0 = new MockAubCenter(defaultHwInfo.get(), false, "aubfile", CommandStreamReceiverType::CSR_AUB); mockAubCenter0->aubManager.reset(mockManager0); mockExecutionEnvironment.rootDeviceEnvironments[0]->aubCenter.reset(mockAubCenter0); auto mockManager1 = new MockAubManager(); auto mockAubCenter1 = new MockAubCenter(defaultHwInfo.get(), false, "aubfile", CommandStreamReceiverType::CSR_AUB); mockAubCenter1->aubManager.reset(mockManager1); mockExecutionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->aubCenter.reset(mockAubCenter1); OsHandleStorage storage; storage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); mockMemoryManager.populateOsHandles(storage, 0); mockMemoryManager.getHostPtrManager()->releaseHandleStorage(storage); mockMemoryManager.cleanOsHandles(storage, rootDeviceIndex); EXPECT_FALSE(mockManager0->freeMemoryCalled); EXPECT_TRUE(mockManager1->freeMemoryCalled); } TEST_F(MemoryAllocatorTest, GivenEmptyMemoryManagerAndMisalingedHostPtrWithHugeSizeWhenAskedForHostPtrAllocationThenGraphicsAllocationIsBeignCreatedWithAllFragmentsPresent) { void *cpuPtr = (void *)0x1005; auto size = MemoryConstants::pageSize * 10 - 1; MockMemoryManager mockMemoryManager(*executionEnvironment); auto hostPtrManager = static_cast(mockMemoryManager.getHostPtrManager()); auto reqs = MockHostPtrManager::getAllocationRequirements(cpuPtr, size); ASSERT_EQ(3u, reqs.requiredFragmentsCount); auto graphicsAllocation = mockMemoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), false, size}, cpuPtr); for (int i = 0; i < maxFragmentsCount; i++) { EXPECT_NE(nullptr, graphicsAllocation->fragmentsStorage.fragmentStorageData[i].osHandleStorage); EXPECT_EQ(reqs.allocationFragments[i].allocationPtr, graphicsAllocation->fragmentsStorage.fragmentStorageData[i].cpuPtr); EXPECT_EQ(reqs.allocationFragments[i].allocationSize, graphicsAllocation->fragmentsStorage.fragmentStorageData[i].fragmentSize); } EXPECT_EQ(3u, hostPtrManager->getFragmentCount()); EXPECT_EQ(Sharing::nonSharedResource, graphicsAllocation->peekSharedHandle()); mockMemoryManager.freeGraphicsMemory(graphicsAllocation); } TEST_F(MemoryAllocatorTest, GivenPointerAndSizeWhenAskedToCreateGrahicsAllocationThenGraphicsAllocationIsCreated) { OsHandleStorage handleStorage; auto ptr = (void *)0x1000; auto ptr2 = (void *)0x1001; auto size = MemoryConstants::pageSize; handleStorage.fragmentStorageData[0].cpuPtr = ptr; handleStorage.fragmentStorageData[1].cpuPtr = ptr2; handleStorage.fragmentStorageData[2].cpuPtr = nullptr; handleStorage.fragmentStorageData[0].fragmentSize = size; handleStorage.fragmentStorageData[1].fragmentSize = size * 2; handleStorage.fragmentStorageData[2].fragmentSize = size * 3; MockMemoryManager::AllocationData allocationData; allocationData.size = size; allocationData.hostPtr = ptr; auto allocation = std::unique_ptr(memoryManager->createGraphicsAllocation(handleStorage, allocationData)); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); EXPECT_EQ(ptr, allocation->getUnderlyingBuffer()); EXPECT_EQ(size, allocation->getUnderlyingBufferSize()); EXPECT_EQ(ptr, allocation->fragmentsStorage.fragmentStorageData[0].cpuPtr); EXPECT_EQ(ptr2, allocation->fragmentsStorage.fragmentStorageData[1].cpuPtr); EXPECT_EQ(nullptr, allocation->fragmentsStorage.fragmentStorageData[2].cpuPtr); EXPECT_EQ(size, allocation->fragmentsStorage.fragmentStorageData[0].fragmentSize); EXPECT_EQ(size * 2, allocation->fragmentsStorage.fragmentStorageData[1].fragmentSize); EXPECT_EQ(size * 3, allocation->fragmentsStorage.fragmentStorageData[2].fragmentSize); EXPECT_NE(&allocation->fragmentsStorage, &handleStorage); } TEST_F(MemoryAllocatorTest, givenMemoryManagerWhenAskedFor32bitAllocationThen32bitGraphicsAllocationIsReturned) { size_t size = 10; auto allocation = memoryManager->allocate32BitGraphicsMemory(device->getRootDeviceIndex(), size, nullptr, GraphicsAllocation::AllocationType::BUFFER); EXPECT_NE(nullptr, allocation); EXPECT_NE(nullptr, allocation->getUnderlyingBuffer()); EXPECT_EQ(size, allocation->getUnderlyingBufferSize()); EXPECT_TRUE(allocation->is32BitAllocation()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(MemoryAllocatorTest, givenNotEnoughSpaceInAllocatorWhenAskedFor32bitAllocationNullptrIsReturned) { size_t size = 0xfffff000; auto allocationFirst = memoryManager->allocate32BitGraphicsMemory(device->getRootDeviceIndex(), 0x5000, nullptr, GraphicsAllocation::AllocationType::BUFFER); auto allocation = memoryManager->allocate32BitGraphicsMemory(device->getRootDeviceIndex(), size, nullptr, GraphicsAllocation::AllocationType::BUFFER); EXPECT_EQ(nullptr, allocation); memoryManager->freeGraphicsMemory(allocationFirst); } TEST_F(MemoryAllocatorTest, givenNotEnoughSpaceInAllocatorWhenAskedFor32bitAllocationWithHostPtrThenNullptrIsReturned) { size_t size = 0xfffff000; void *ptr = (void *)0x10000; auto allocationFirst = memoryManager->allocate32BitGraphicsMemory(device->getRootDeviceIndex(), 0x5000, nullptr, GraphicsAllocation::AllocationType::BUFFER); auto allocation = memoryManager->allocate32BitGraphicsMemory(device->getRootDeviceIndex(), size, ptr, GraphicsAllocation::AllocationType::BUFFER); EXPECT_EQ(nullptr, allocation); memoryManager->freeGraphicsMemory(allocationFirst); } TEST_F(MemoryAllocatorTest, givenMemoryManagerWhenAskedFor32bitAllocationWithPtrThen32bitGraphicsAllocationWithGpuAddressIsReturned) { size_t size = 10; void *ptr = (void *)0x1000; auto allocation = memoryManager->allocate32BitGraphicsMemory(device->getRootDeviceIndex(), size, ptr, GraphicsAllocation::AllocationType::BUFFER); EXPECT_NE(nullptr, allocation); EXPECT_NE(nullptr, allocation->getUnderlyingBuffer()); EXPECT_EQ(size, allocation->getUnderlyingBufferSize()); EXPECT_TRUE(allocation->is32BitAllocation()); EXPECT_EQ(ptr, allocation->getUnderlyingBuffer()); EXPECT_NE(0u, allocation->getGpuAddress()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(MemoryAllocatorTest, givenAllocationWithFragmentsWhenCallingFreeGraphicsMemoryThenDoNotCallHandleFenceCompletion) { auto size = 3u * MemoryConstants::pageSize; auto *ptr = reinterpret_cast(0xbeef1); AllocationProperties properties{0, false, size, GraphicsAllocation::AllocationType::BUFFER, false}; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, ptr); EXPECT_EQ(3u, allocation->fragmentsStorage.fragmentCount); EXPECT_EQ(0u, memoryManager->handleFenceCompletionCalled); memoryManager->freeGraphicsMemory(allocation); EXPECT_EQ(0u, memoryManager->handleFenceCompletionCalled); } TEST_F(MemoryAllocatorTest, GivenShareableEnabledAndDisabledWhenAskedToCreateGrahicsAllocationThenValidAllocationIsReturned) { MockMemoryManager::AllocationData allocationData; allocationData.type = GraphicsAllocation::AllocationType::BUFFER; allocationData.flags.shareable = 1u; auto shareableAllocation = memoryManager->allocateGraphicsMemory(allocationData); EXPECT_NE(nullptr, shareableAllocation); allocationData.flags.shareable = 0u; auto nonShareableAllocation = memoryManager->allocateGraphicsMemory(allocationData); EXPECT_NE(nullptr, nonShareableAllocation); memoryManager->freeGraphicsMemory(shareableAllocation); memoryManager->freeGraphicsMemory(nonShareableAllocation); } TEST_F(MemoryAllocatorTest, givenAllocationWithoutFragmentsWhenCallingFreeGraphicsMemoryThenCallHandleFenceCompletion) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties({0, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}); EXPECT_EQ(0u, allocation->fragmentsStorage.fragmentCount); EXPECT_EQ(0u, memoryManager->handleFenceCompletionCalled); memoryManager->freeGraphicsMemory(allocation); EXPECT_EQ(1u, memoryManager->handleFenceCompletionCalled); } class MockPrintfHandler : public PrintfHandler { public: static MockPrintfHandler *create(const MultiDispatchInfo &multiDispatchInfo, ClDevice &deviceArg) { return (MockPrintfHandler *)PrintfHandler::create(multiDispatchInfo, deviceArg); } }; TEST_F(MemoryAllocatorTest, givenStatelessKernelWithPrintfWhenPrintfSurfaceIsCreatedThenPrintfSurfaceIsPatchedWithBaseAddressOffset) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals kernel(*device); MockMultiDispatchInfo multiDispatchInfo(kernel.mockKernel); SPatchAllocateStatelessPrintfSurface printfSurface; printfSurface.Token = iOpenCL::PATCH_TOKEN_ALLOCATE_STATELESS_PRINTF_SURFACE; printfSurface.Size = static_cast(sizeof(SPatchAllocateStatelessPrintfSurface)); printfSurface.PrintfSurfaceIndex = 11; printfSurface.SurfaceStateHeapOffset = 0; printfSurface.DataParamOffset = 8; printfSurface.DataParamSize = sizeof(void *); kernel.kernelInfo.patchInfo.pAllocateStatelessPrintfSurface = &printfSurface; // define stateless path kernel.kernelInfo.usesSsh = false; kernel.kernelInfo.requiresSshForBuffers = false; auto printfHandler = MockPrintfHandler::create(multiDispatchInfo, *device.get()); printfHandler->prepareDispatch(multiDispatchInfo); auto printfAllocation = printfHandler->getSurface(); auto allocationAddress = printfAllocation->getGpuAddressToPatch(); auto printfPatchAddress = ptrOffset(reinterpret_cast(kernel.mockKernel->getCrossThreadData()), kernel.mockKernel->getKernelInfo().patchInfo.pAllocateStatelessPrintfSurface->DataParamOffset); EXPECT_EQ(allocationAddress, *(uintptr_t *)printfPatchAddress); EXPECT_EQ(0u, kernel.mockKernel->getSurfaceStateHeapSize()); delete printfHandler; } HWTEST_F(MemoryAllocatorTest, givenStatefulKernelWithPrintfWhenPrintfSurfaceIsCreatedThenPrintfSurfaceIsPatchedWithCpuAddress) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals kernel(*device); MockMultiDispatchInfo multiDispatchInfo(kernel.mockKernel); SPatchAllocateStatelessPrintfSurface printfSurface; printfSurface.Token = iOpenCL::PATCH_TOKEN_ALLOCATE_STATELESS_PRINTF_SURFACE; printfSurface.Size = static_cast(sizeof(SPatchAllocateStatelessPrintfSurface)); printfSurface.PrintfSurfaceIndex = 22; printfSurface.SurfaceStateHeapOffset = 16; printfSurface.DataParamOffset = 8; printfSurface.DataParamSize = sizeof(void *); kernel.kernelInfo.patchInfo.pAllocateStatelessPrintfSurface = &printfSurface; // define stateful path kernel.kernelInfo.usesSsh = true; kernel.kernelInfo.requiresSshForBuffers = true; auto printfHandler = MockPrintfHandler::create(multiDispatchInfo, *device.get()); printfHandler->prepareDispatch(multiDispatchInfo); auto printfAllocation = printfHandler->getSurface(); auto allocationAddress = printfAllocation->getGpuAddress(); EXPECT_NE(0u, kernel.mockKernel->getSurfaceStateHeapSize()); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(kernel.mockKernel->getSurfaceStateHeap(), kernel.mockKernel->getKernelInfo().patchInfo.pAllocateStatelessPrintfSurface->SurfaceStateHeapOffset)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(allocationAddress, surfaceAddress); delete printfHandler; } TEST_F(MemoryAllocatorTest, given32BitDeviceWhenPrintfSurfaceIsCreatedThen32BitAllocationsIsMade) { DebugManagerStateRestore dbgRestorer; if (is64bit) { DebugManager.flags.Force32bitAddressing.set(true); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals kernel(*device); MockMultiDispatchInfo multiDispatchInfo(kernel.mockKernel); SPatchAllocateStatelessPrintfSurface printfSurface; printfSurface.Token = iOpenCL::PATCH_TOKEN_ALLOCATE_STATELESS_PRINTF_SURFACE; printfSurface.Size = static_cast(sizeof(SPatchAllocateStatelessPrintfSurface)); printfSurface.PrintfSurfaceIndex = 33; printfSurface.SurfaceStateHeapOffset = 0x1FF0; printfSurface.DataParamOffset = 0; printfSurface.DataParamSize = 4; kernel.kernelInfo.patchInfo.pAllocateStatelessPrintfSurface = &printfSurface; auto printfHandler = MockPrintfHandler::create(multiDispatchInfo, *device.get()); for (int i = 0; i < 8; i++) { kernel.mockKernel->mockCrossThreadData[i] = 50; } printfHandler->prepareDispatch(multiDispatchInfo); uint32_t *ptr32Bit = (uint32_t *)kernel.mockKernel->mockCrossThreadData.data(); auto printfAllocation = printfHandler->getSurface(); auto allocationAddress = printfAllocation->getGpuAddressToPatch(); uint32_t allocationAddress32bit = (uint32_t)(uintptr_t)allocationAddress; EXPECT_TRUE(printfAllocation->is32BitAllocation()); EXPECT_EQ(allocationAddress32bit, *ptr32Bit); for (int i = 4; i < 8; i++) { EXPECT_EQ(50, kernel.mockKernel->mockCrossThreadData[i]); } delete printfHandler; DebugManager.flags.Force32bitAddressing.set(false); } } TEST(OsAgnosticMemoryManager, givenDefaultMemoryManagerWhenItIsCreatedThenForce32BitAllocationsIsFalse) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); OsAgnosticMemoryManager memoryManager(executionEnvironment); EXPECT_FALSE(memoryManager.peekForce32BitAllocations()); } TEST(OsAgnosticMemoryManager, givenDefaultMemoryManagerWhenForce32bitallocationIsCalledWithTrueThenMemoryManagerForces32BitAlloactions) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); OsAgnosticMemoryManager memoryManager(executionEnvironment); memoryManager.setForce32BitAllocations(true); EXPECT_TRUE(memoryManager.peekForce32BitAllocations()); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenAskedFor32BitAllocationWhenLimitedAllocationIsEnabledThenGpuRangeFromExternalHeapIsAllocatiedAndBaseAddressIsSet) { if (is32bit) { GTEST_SKIP(); } ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); MockMemoryManager memoryManager(*executionEnvironment); memoryManager.setForce32BitAllocations(true); memoryManager.forceLimitedRangeAllocator(0, 0xFFFFFFFFF); AllocationData allocationData; MockMemoryManager::getAllocationData(allocationData, {0, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}, nullptr, StorageInfo{}); auto gfxAllocation = memoryManager.allocateGraphicsMemoryWithAlignment(allocationData); ASSERT_NE(gfxAllocation, nullptr); EXPECT_NE(gfxAllocation->getGpuBaseAddress(), 0ull); EXPECT_EQ(gfxAllocation->getGpuBaseAddress(), memoryManager.getExternalHeapBaseAddress(gfxAllocation->getRootDeviceIndex())); memoryManager.freeGraphicsMemory(gfxAllocation); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenAskedForNon32BitAllocationWhenLimitedAllocationIsEnabledThenGpuRangeFromiStandardHeapIsAllocatiedAndBaseAddressIsNotSet) { if (is32bit) { GTEST_SKIP(); } ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); MockMemoryManager memoryManager(*executionEnvironment); memoryManager.forceLimitedRangeAllocator(0, 0xFFFFFFFFF); AllocationData allocationData; MockMemoryManager::getAllocationData(allocationData, {0, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}, nullptr, StorageInfo{}); auto gfxAllocation = memoryManager.allocateGraphicsMemoryWithAlignment(allocationData); ASSERT_NE(gfxAllocation, nullptr); EXPECT_EQ(gfxAllocation->getGpuBaseAddress(), 0ull); EXPECT_EQ(gfxAllocation->getGpuAddress(), memoryManager.getGfxPartition(allocationData.rootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_STANDARD) + 1 - GfxPartition::heapGranularity - MemoryConstants::pageSize); memoryManager.freeGraphicsMemory(gfxAllocation); } TEST(OsAgnosticMemoryManager, givenDefaultMemoryManagerWhenAllocateGraphicsMemoryForImageIsCalledThenGraphicsAllocationIsReturned) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); MockMemoryManager memoryManager(*executionEnvironment); cl_image_desc imgDesc = {}; imgDesc.image_width = 512; imgDesc.image_height = 1; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); MockMemoryManager::AllocationData allocationData; allocationData.imgInfo = &imgInfo; auto imageAllocation = memoryManager.allocateGraphicsMemoryForImage(allocationData); ASSERT_NE(nullptr, imageAllocation); EXPECT_TRUE(imageAllocation->getDefaultGmm()->resourceParams.Usage == GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_IMAGE); EXPECT_TRUE(imageAllocation->getDefaultGmm()->useSystemMemoryPool); memoryManager.freeGraphicsMemory(imageAllocation); } TEST(OsAgnosticMemoryManager, givenEnabledLocalMemoryWhenAllocateGraphicsMemoryForImageIsCalledThenUseLocalMemoryIsNotSet) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); MockMemoryManager memoryManager(false, true, *executionEnvironment); cl_image_desc imgDesc = {}; imgDesc.image_width = 1; imgDesc.image_height = 1; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); MockMemoryManager::AllocationData allocationData; allocationData.imgInfo = &imgInfo; auto imageAllocation = memoryManager.allocateGraphicsMemoryForImage(allocationData); ASSERT_NE(nullptr, imageAllocation); EXPECT_FALSE(imgInfo.useLocalMemory); memoryManager.freeGraphicsMemory(imageAllocation); } TEST(OsAgnosticMemoryManager, givenHostPointerNotRequiringCopyWhenAllocateGraphicsMemoryForImageFromHostPtrIsCalledThenGraphicsAllocationIsReturned) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); MockMemoryManager memoryManager(false, false, *executionEnvironment); cl_image_desc imgDesc = {}; imgDesc.image_width = 4; imgDesc.image_height = 1; imgDesc.image_type = CL_MEM_OBJECT_IMAGE1D; cl_image_format imageFormat = {}; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_RGBA; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, defaultHwInfo->capabilityTable.clVersionSupport); auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, surfaceFormat); imgInfo.rowPitch = imgDesc.image_width * 4; imgInfo.slicePitch = imgInfo.rowPitch * imgDesc.image_height; imgInfo.size = imgInfo.slicePitch; imgInfo.linearStorage = true; auto hostPtr = alignedMalloc(imgDesc.image_width * imgDesc.image_height * 4, MemoryConstants::pageSize); bool copyRequired = MockMemoryManager::isCopyRequired(imgInfo, hostPtr); EXPECT_FALSE(copyRequired); MockMemoryManager::AllocationData allocationData; allocationData.imgInfo = &imgInfo; allocationData.hostPtr = hostPtr; allocationData.size = imgInfo.size; auto imageAllocation = memoryManager.allocateGraphicsMemoryForImageFromHostPtr(allocationData); ASSERT_NE(nullptr, imageAllocation); EXPECT_EQ(hostPtr, imageAllocation->getUnderlyingBuffer()); memoryManager.freeGraphicsMemory(imageAllocation); alignedFree(hostPtr); } TEST(OsAgnosticMemoryManager, givenHostPointerRequiringCopyWhenAllocateGraphicsMemoryForImageFromHostPtrIsCalledThenNullptrIsReturned) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); MockMemoryManager memoryManager(false, false, *executionEnvironment); cl_image_desc imgDesc = {}; imgDesc.image_width = 4; imgDesc.image_height = 4; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; cl_image_format imageFormat = {}; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_RGBA; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, defaultHwInfo->capabilityTable.clVersionSupport); auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, surfaceFormat); imgInfo.rowPitch = imgDesc.image_width * 4; imgInfo.slicePitch = imgInfo.rowPitch * imgDesc.image_height; imgInfo.size = imgInfo.slicePitch; auto hostPtr = alignedMalloc(imgDesc.image_width * imgDesc.image_height * 4, MemoryConstants::pageSize); bool copyRequired = MockMemoryManager::isCopyRequired(imgInfo, hostPtr); EXPECT_TRUE(copyRequired); MockMemoryManager::AllocationData allocationData; allocationData.imgInfo = &imgInfo; allocationData.hostPtr = hostPtr; auto imageAllocation = memoryManager.allocateGraphicsMemoryForImageFromHostPtr(allocationData); EXPECT_EQ(nullptr, imageAllocation); alignedFree(hostPtr); } TEST(OsAgnosticMemoryManager, givenDefaultMemoryManagerAndUnifiedAuxCapableAllocationWhenMappingThenReturnFalse) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.initGmm(); OsAgnosticMemoryManager memoryManager(executionEnvironment); auto gmm = new Gmm(executionEnvironment.rootDeviceEnvironments[0]->getGmmClientContext(), nullptr, 123, false); auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, MemoryConstants::pageSize}); allocation->setDefaultGmm(gmm); auto mockGmmRes = reinterpret_cast(gmm->gmmResourceInfo.get()); mockGmmRes->setUnifiedAuxTranslationCapable(); EXPECT_FALSE(memoryManager.mapAuxGpuVA(allocation)); memoryManager.freeGraphicsMemory(allocation); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenAllocateGraphicsMemoryIsCalledThenMemoryPoolIsSystem4KBPages) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, false, executionEnvironment); auto size = 4096u; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, size}); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); memoryManager.freeGraphicsMemory(allocation); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWith64KBPagesEnabledWhenAllocateGraphicsMemory64kbIsCalledThenMemoryPoolIsSystem64KBPages) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.initGmm(); MockMemoryManager memoryManager(true, false, executionEnvironment); AllocationData allocationData; allocationData.size = 4096u; auto allocation = memoryManager.allocateGraphicsMemory64kb(allocationData); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::System64KBPages, allocation->getMemoryPool()); memoryManager.freeGraphicsMemory(allocation); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWith64KBPagesEnabledWhenAllocateGraphicsMemoryFailsThenNullptrIsReturned) { class MockOsAgnosticManagerWithFailingAllocate : public MemoryManagerCreate { public: using OsAgnosticMemoryManager::allocateGraphicsMemory64kb; MockOsAgnosticManagerWithFailingAllocate(bool enable64kbPages, ExecutionEnvironment &executionEnvironment) : MemoryManagerCreate(enable64kbPages, false, executionEnvironment) {} GraphicsAllocation *allocateGraphicsMemoryWithAlignment(const AllocationData &allocationData) override { return nullptr; } }; MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockOsAgnosticManagerWithFailingAllocate memoryManager(true, executionEnvironment); AllocationData allocationData; allocationData.size = 4096u; auto allocation = memoryManager.allocateGraphicsMemory64kb(allocationData); EXPECT_EQ(nullptr, allocation); memoryManager.freeGraphicsMemory(allocation); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenAllocateGraphicsMemoryWithPtrIsCalledThenMemoryPoolIsSystem4KBPages) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, false, executionEnvironment); void *ptr = reinterpret_cast(0x1001); auto size = MemoryConstants::pageSize; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, false, size}, ptr); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); memoryManager.freeGraphicsMemory(allocation); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenAllocate32BitGraphicsMemoryWithPtrIsCalledThenMemoryPoolIsSystem4KBPagesWith32BitGpuAddressing) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); void *ptr = reinterpret_cast(0x1001); auto size = MemoryConstants::pageSize; auto allocation = memoryManager.allocate32BitGraphicsMemory(0, size, ptr, GraphicsAllocation::AllocationType::BUFFER); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::System4KBPagesWith32BitGpuAddressing, allocation->getMemoryPool()); memoryManager.freeGraphicsMemory(allocation); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenAllocate32BitGraphicsMemoryWithoutPtrIsCalledThenMemoryPoolIsSystem4KBPagesWith32BitGpuAddressing) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); void *ptr = nullptr; auto size = MemoryConstants::pageSize; auto allocation = memoryManager.allocate32BitGraphicsMemory(0, size, ptr, GraphicsAllocation::AllocationType::BUFFER); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::System4KBPagesWith32BitGpuAddressing, allocation->getMemoryPool()); memoryManager.freeGraphicsMemory(allocation); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWith64KBPagesEnabledWhenAllocateGraphicsMemoryThenMemoryPoolIsSystem64KBPages) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(true, false, executionEnvironment); auto svmAllocation = memoryManager.allocateGraphicsMemoryWithProperties({0, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::SVM_ZERO_COPY}); EXPECT_NE(nullptr, svmAllocation); EXPECT_EQ(MemoryPool::System64KBPages, svmAllocation->getMemoryPool()); memoryManager.freeGraphicsMemory(svmAllocation); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWith64KBPagesDisabledWhenAllocateGraphicsMemoryThen4KBGraphicsAllocationIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, false, executionEnvironment); auto svmAllocation = memoryManager.allocateGraphicsMemoryWithProperties({0, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::SVM_ZERO_COPY}); EXPECT_EQ(MemoryPool::System4KBPages, svmAllocation->getMemoryPool()); memoryManager.freeGraphicsMemory(svmAllocation); } TEST(OsAgnosticMemoryManager, givenDefaultMemoryManagerWhenCreateGraphicsAllocationFromSharedObjectIsCalledThenGraphicsAllocationIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, false, executionEnvironment); osHandle handle = 1; auto size = 4096u; AllocationProperties properties(0, false, size, GraphicsAllocation::AllocationType::SHARED_BUFFER, false); auto sharedAllocation = memoryManager.createGraphicsAllocationFromSharedHandle(handle, properties, false); EXPECT_NE(nullptr, sharedAllocation); EXPECT_FALSE(sharedAllocation->isCoherent()); EXPECT_NE(nullptr, sharedAllocation->getUnderlyingBuffer()); EXPECT_EQ(size, sharedAllocation->getUnderlyingBufferSize()); EXPECT_EQ(MemoryPool::SystemCpuInaccessible, sharedAllocation->getMemoryPool()); memoryManager.freeGraphicsMemory(sharedAllocation); } TEST(OsAgnosticMemoryManager, givenDefaultMemoryManagerWhenCreateGraphicsAllocationFromSharedObjectIsCalledAndRootDeviceIndexIsSpecifiedThenGraphicsAllocationIsReturnedWithCorrectRootDeviceIndex) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, false, executionEnvironment); osHandle handle = 1; auto size = 4096u; AllocationProperties properties(0u, false, size, GraphicsAllocation::AllocationType::SHARED_BUFFER, false, false, 0u); EXPECT_TRUE(properties.subDevicesBitfield.none()); EXPECT_EQ(properties.rootDeviceIndex, 0u); auto sharedAllocation = memoryManager.createGraphicsAllocationFromSharedHandle(handle, properties, false); EXPECT_NE(nullptr, sharedAllocation); EXPECT_EQ(0u, sharedAllocation->getRootDeviceIndex()); EXPECT_FALSE(sharedAllocation->isCoherent()); EXPECT_NE(nullptr, sharedAllocation->getUnderlyingBuffer()); EXPECT_EQ(size, sharedAllocation->getUnderlyingBufferSize()); EXPECT_EQ(MemoryPool::SystemCpuInaccessible, sharedAllocation->getMemoryPool()); memoryManager.freeGraphicsMemory(sharedAllocation); } TEST(OsAgnosticMemoryManager, givenDefaultMemoryManagerWhenCreateGraphicsAllocationFromSharedObjectIsCalledWithSpecificBitnessThen32BitGraphicsAllocationIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, false, executionEnvironment); osHandle handle = 1; auto size = 4096u; AllocationProperties properties(0, false, size, GraphicsAllocation::AllocationType::SHARED_BUFFER, false); auto sharedAllocation = memoryManager.createGraphicsAllocationFromSharedHandle(handle, properties, true); EXPECT_NE(nullptr, sharedAllocation); EXPECT_TRUE(sharedAllocation->is32BitAllocation()); EXPECT_FALSE(sharedAllocation->isCoherent()); EXPECT_NE(nullptr, sharedAllocation->getUnderlyingBuffer()); EXPECT_EQ(size, sharedAllocation->getUnderlyingBufferSize()); EXPECT_EQ(MemoryPool::SystemCpuInaccessible, sharedAllocation->getMemoryPool()); memoryManager.freeGraphicsMemory(sharedAllocation); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenCreateAllocationFromNtHandleIsCalledThenReturnNullptr) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); OsAgnosticMemoryManager memoryManager(executionEnvironment); auto graphicsAllocation = memoryManager.createGraphicsAllocationFromNTHandle((void *)1, 0); EXPECT_EQ(nullptr, graphicsAllocation); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenLockUnlockCalledThenReturnCpuPtr) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); OsAgnosticMemoryManager memoryManager(executionEnvironment); auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, MemoryConstants::pageSize}); ASSERT_NE(nullptr, allocation); EXPECT_FALSE(allocation->isLocked()); auto ptr = memoryManager.lockResource(allocation); EXPECT_EQ(ptrOffset(allocation->getUnderlyingBuffer(), static_cast(allocation->getAllocationOffset())), ptr); EXPECT_TRUE(allocation->isLocked()); memoryManager.unlockResource(allocation); EXPECT_FALSE(allocation->isLocked()); memoryManager.freeGraphicsMemory(allocation); } TEST(OsAgnosticMemoryManager, givenDefaultMemoryManagerWhenGraphicsAllocationContainsOffsetWhenAddressIsObtainedThenOffsetIsAdded) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, false, executionEnvironment); auto graphicsAllocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, MemoryConstants::pageSize}); auto graphicsAddress = graphicsAllocation->getGpuAddress(); auto graphicsAddressToPatch = graphicsAllocation->getGpuAddressToPatch(); graphicsAllocation->setAllocationOffset(4); auto offsetedGraphicsAddress = graphicsAllocation->getGpuAddress(); auto offsetedGraphicsAddressToPatch = graphicsAllocation->getGpuAddressToPatch(); EXPECT_EQ(offsetedGraphicsAddress, graphicsAddress + graphicsAllocation->getAllocationOffset()); EXPECT_EQ(offsetedGraphicsAddressToPatch, graphicsAddressToPatch + graphicsAllocation->getAllocationOffset()); memoryManager.freeGraphicsMemory(graphicsAllocation); } TEST(OsAgnosticMemoryManager, givenDefaultMemoryManagerWhenGraphicsAllocationIsPaddedThenNewGraphicsAllocationIsCreated) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, false, executionEnvironment); auto graphicsAllocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, MemoryConstants::pageSize}); auto sizeWithPadding = 8192; auto paddedGraphicsAllocation = memoryManager.createGraphicsAllocationWithPadding(graphicsAllocation, sizeWithPadding); ASSERT_NE(nullptr, paddedGraphicsAllocation); EXPECT_NE(paddedGraphicsAllocation, graphicsAllocation); memoryManager.freeGraphicsMemory(paddedGraphicsAllocation); memoryManager.freeGraphicsMemory(graphicsAllocation); } TEST(OsAgnosticMemoryManager, pleaseDetectLeak) { void *ptr = new int[10]; EXPECT_NE(nullptr, ptr); MemoryManagement::fastLeaksDetectionMode = MemoryManagement::LeakDetectionMode::EXPECT_TO_LEAK; } TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenAllocateMemoryWithNoAlignmentProvidedThenAllocationIsAlignedToPageSize) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, false, executionEnvironment); MockAllocationProperties properties(0, MemoryConstants::pageSize >> 1); properties.alignment = 0; auto ga = memoryManager.allocateGraphicsMemoryWithProperties(properties); uintptr_t ptr = reinterpret_cast(ga->getUnderlyingBuffer()); ptr &= (MemoryConstants::allocationAlignment - 1); EXPECT_EQ(ptr, 0u); memoryManager.freeGraphicsMemory(ga); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenAllocateMemoryWithAlignmentNotAlignedToPageSizeThenAlignmentIsAlignedUp) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(false, false, executionEnvironment); MockAllocationProperties properties(0, MemoryConstants::pageSize >> 1); properties.alignment = MemoryConstants::pageSize - 1; auto ga = memoryManager.allocateGraphicsMemoryWithProperties(properties); uintptr_t ptr = reinterpret_cast(ga->getUnderlyingBuffer()); ptr &= (MemoryConstants::allocationAlignment - 1); EXPECT_EQ(ptr, 0u); memoryManager.freeGraphicsMemory(ga); } TEST(OsAgnosticMemoryManager, givenCommonMemoryManagerWhenIsAskedIfApplicationMemoryBudgetIsExhaustedThenFalseIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); OsAgnosticMemoryManager memoryManager(executionEnvironment); EXPECT_FALSE(memoryManager.isMemoryBudgetExhausted()); } class MemoryManagerWithAsyncDeleterTest : public ::testing::Test { public: MemoryManagerWithAsyncDeleterTest() : memoryManager(false, false){}; void SetUp() override { memoryManager.overrideAsyncDeleterFlag(true); } const uint32_t rootDeviceIndex = 0u; MockMemoryManager memoryManager; }; TEST_F(MemoryManagerWithAsyncDeleterTest, givenMemoryManagerWhenWaitForDeletionsIsCalledThenDeferredDeleterIsNullptr) { auto deleter = new MockDeferredDeleter(); memoryManager.setDeferredDeleter(deleter); deleter->expectDrainBlockingValue(false); EXPECT_EQ(deleter, memoryManager.getDeferredDeleter()); EXPECT_EQ(0, deleter->drainCalled); memoryManager.waitForDeletions(); EXPECT_EQ(nullptr, memoryManager.getDeferredDeleter()); } TEST_F(MemoryManagerWithAsyncDeleterTest, givenMemoryManagerWhenWaitForDeletionsIsCalledTwiceThenItDoesntCrash) { EXPECT_NE(nullptr, memoryManager.getDeferredDeleter()); memoryManager.waitForDeletions(); EXPECT_EQ(nullptr, memoryManager.getDeferredDeleter()); memoryManager.waitForDeletions(); EXPECT_EQ(nullptr, memoryManager.getDeferredDeleter()); } TEST_F(MemoryManagerWithAsyncDeleterTest, givenMemoryManagerWhenAllocateGraphicsMemoryIsCalledWithPtrAndDeleterIsNotNullptrThenDeletersQueueIsReleased) { MockDeferredDeleter *deleter = new MockDeferredDeleter(); memoryManager.setDeferredDeleter(deleter); EXPECT_NE(nullptr, memoryManager.getDeferredDeleter()); auto deletion = new MockDeferrableDeletion(); deleter->DeferredDeleter::deferDeletion(deletion); EXPECT_FALSE(deleter->isQueueEmpty()); char ptr[128]; EXPECT_EQ(0, deleter->drainCalled); deleter->expectDrainBlockingValue(true); auto allocation = memoryManager.MemoryManager::allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, false, sizeof(char)}, ptr); EXPECT_TRUE(deleter->isQueueEmpty()); memoryManager.freeGraphicsMemoryImpl(allocation); } TEST_F(MemoryManagerWithAsyncDeleterTest, givenMemoryManagerWhenAllocateGraphicsMemoryIsCalledWithPtrAndDeleterIsNullptrThenItDoesntCrash) { memoryManager.setDeferredDeleter(nullptr); EXPECT_EQ(nullptr, memoryManager.getDeferredDeleter()); char ptr[128]; auto allocation = memoryManager.MemoryManager::allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, false, sizeof(char)}, ptr); memoryManager.freeGraphicsMemoryImpl(allocation); } TEST(OsAgnosticMemoryManager, givenMemoryManagerWhenIsAsyncDeleterEnabledCalledThenReturnsValueOfFlag) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(executionEnvironment); memoryManager.overrideAsyncDeleterFlag(false); EXPECT_FALSE(memoryManager.isAsyncDeleterEnabled()); memoryManager.overrideAsyncDeleterFlag(true); EXPECT_TRUE(memoryManager.isAsyncDeleterEnabled()); } TEST(OsAgnosticMemoryManager, givenDefaultMemoryManagerWhenItIsCreatedThenAsyncDeleterEnabledIsFalse) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); OsAgnosticMemoryManager memoryManager(executionEnvironment); EXPECT_FALSE(memoryManager.isAsyncDeleterEnabled()); EXPECT_EQ(nullptr, memoryManager.getDeferredDeleter()); } TEST(OsAgnosticMemoryManager, givenEnabledAsyncDeleterFlagWhenMemoryManagerIsCreatedThenAsyncDeleterEnabledIsFalseAndDeleterIsNullptr) { bool defaultEnableDeferredDeleterFlag = DebugManager.flags.EnableDeferredDeleter.get(); DebugManager.flags.EnableDeferredDeleter.set(true); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); OsAgnosticMemoryManager memoryManager(executionEnvironment); EXPECT_FALSE(memoryManager.isAsyncDeleterEnabled()); EXPECT_EQ(nullptr, memoryManager.getDeferredDeleter()); DebugManager.flags.EnableDeferredDeleter.set(defaultEnableDeferredDeleterFlag); } TEST(OsAgnosticMemoryManager, givenDisabledAsyncDeleterFlagWhenMemoryManagerIsCreatedThenAsyncDeleterEnabledIsFalseAndDeleterIsNullptr) { bool defaultEnableDeferredDeleterFlag = DebugManager.flags.EnableDeferredDeleter.get(); DebugManager.flags.EnableDeferredDeleter.set(false); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); OsAgnosticMemoryManager memoryManager(executionEnvironment); EXPECT_FALSE(memoryManager.isAsyncDeleterEnabled()); EXPECT_EQ(nullptr, memoryManager.getDeferredDeleter()); DebugManager.flags.EnableDeferredDeleter.set(defaultEnableDeferredDeleterFlag); } TEST(OsAgnosticMemoryManager, GivenEnabled64kbPagesWhenHostMemoryAllocationIsCreatedThenAlignedto64KbAllocationIsReturned) { DebugManagerStateRestore dbgRestore; DebugManager.flags.Enable64kbpages.set(true); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MemoryManagerCreate memoryManager(true, false, executionEnvironment); GraphicsAllocation *galloc = memoryManager.allocateGraphicsMemoryWithProperties({0, MemoryConstants::pageSize64k, GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY}); EXPECT_NE(nullptr, galloc); memoryManager.freeGraphicsMemory(galloc); galloc = memoryManager.allocateGraphicsMemoryWithProperties({0, MemoryConstants::pageSize64k, GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY}); EXPECT_NE(nullptr, galloc); EXPECT_NE(nullptr, galloc->getUnderlyingBuffer()); EXPECT_EQ(0u, (uintptr_t)galloc->getUnderlyingBuffer() % MemoryConstants::pageSize64k); EXPECT_NE(0u, galloc->getGpuAddress()); EXPECT_EQ(0u, (uintptr_t)galloc->getGpuAddress() % MemoryConstants::pageSize64k); memoryManager.freeGraphicsMemory(galloc); } TEST(OsAgnosticMemoryManager, givenPointerAndSizeWhenCreateInternalAllocationIsCalledThenGraphicsAllocationIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); auto ptr = (void *)0x100000; size_t allocationSize = 4096; auto graphicsAllocation = memoryManager.allocate32BitGraphicsMemory(0, allocationSize, ptr, GraphicsAllocation::AllocationType::INTERNAL_HEAP); EXPECT_EQ(ptr, graphicsAllocation->getUnderlyingBuffer()); EXPECT_EQ(allocationSize, graphicsAllocation->getUnderlyingBufferSize()); memoryManager.freeGraphicsMemory(graphicsAllocation); } TEST(OsAgnosticMemoryManager, givenOsAgnosticMemoryManagerWhenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledThenAllocationIsCreated) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(executionEnvironment); AllocationData allocationData; allocationData.size = 13; allocationData.hostPtr = reinterpret_cast(0x5001); auto allocation = memoryManager.allocateGraphicsMemoryForNonSvmHostPtr(allocationData); EXPECT_NE(nullptr, allocation); EXPECT_EQ(13u, allocation->getUnderlyingBufferSize()); EXPECT_EQ(1u, allocation->getAllocationOffset()); memoryManager.freeGraphicsMemory(allocation); } using OsAgnosticMemoryManagerWithParams = ::testing::TestWithParam; TEST_P(OsAgnosticMemoryManagerWithParams, givenReducedGpuAddressSpaceWhenAllocateGraphicsMemoryForHostPtrIsCalledThenAllocationWithoutFragmentsIsCreated) { bool requiresL3Flush = GetParam(); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); if (executionEnvironment.rootDeviceEnvironments[0]->isFullRangeSvm() || is32bit) { return; } OsAgnosticMemoryManager memoryManager(executionEnvironment); auto hostPtr = reinterpret_cast(0x5001); AllocationProperties properties{0, false, 13, GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR, false}; properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = requiresL3Flush; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(properties, hostPtr); EXPECT_NE(nullptr, allocation); EXPECT_EQ(0u, allocation->fragmentsStorage.fragmentCount); EXPECT_EQ(requiresL3Flush, allocation->isFlushL3Required()); memoryManager.freeGraphicsMemory(allocation); } TEST_P(OsAgnosticMemoryManagerWithParams, givenFullGpuAddressSpaceWhenAllocateGraphicsMemoryForHostPtrIsCalledThenAllocationWithFragmentsIsCreated) { bool requiresL3Flush = GetParam(); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); if ((!executionEnvironment.rootDeviceEnvironments[0]->isFullRangeSvm() && !is32bit) || !defaultHwInfo->capabilityTable.hostPtrTrackingEnabled) { GTEST_SKIP(); } OsAgnosticMemoryManager memoryManager(executionEnvironment); auto hostPtr = reinterpret_cast(0x5001); AllocationProperties properties{0, false, 13, GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR, false}; properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = requiresL3Flush; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(properties, hostPtr); EXPECT_NE(nullptr, allocation); EXPECT_EQ(1u, allocation->fragmentsStorage.fragmentCount); EXPECT_EQ(requiresL3Flush, allocation->isFlushL3Required()); EXPECT_EQ(GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR, allocation->getAllocationType()); memoryManager.freeGraphicsMemory(allocation); } TEST_P(OsAgnosticMemoryManagerWithParams, givenDisabledHostPtrTrackingWhenAllocateGraphicsMemoryForHostPtrIsCalledThenAllocationWithoutFragmentsIsCreated) { if (is32bit) { GTEST_SKIP(); } DebugManagerStateRestore restore; DebugManager.flags.EnableHostPtrTracking.set(0); bool requiresL3Flush = GetParam(); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); if (!executionEnvironment.rootDeviceEnvironments[0]->isFullRangeSvm()) { return; } OsAgnosticMemoryManager memoryManager(executionEnvironment); auto hostPtr = reinterpret_cast(0x5001); AllocationProperties properties{0, false, 13, GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR, false}; properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = requiresL3Flush; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(properties, hostPtr); EXPECT_NE(nullptr, allocation); EXPECT_EQ(0u, allocation->fragmentsStorage.fragmentCount); EXPECT_EQ(requiresL3Flush, allocation->isFlushL3Required()); memoryManager.freeGraphicsMemory(allocation); } INSTANTIATE_TEST_CASE_P(OsAgnosticMemoryManagerWithParams, OsAgnosticMemoryManagerWithParams, ::testing::Values(false, true)); TEST(OsAgnosticMemoryManager, givenOsAgnosticMemoryManagerAndFreeMemoryEnabledWhenGraphicsAllocationIsDestroyedThenFreeMemoryOnAubManagerShouldBeCalled) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableFreeMemory.set(true); MockExecutionEnvironment executionEnvironment; OsAgnosticMemoryManager memoryManager(executionEnvironment); MockAubManager *mockManager = new MockAubManager(); MockAubCenter *mockAubCenter = new MockAubCenter(defaultHwInfo.get(), false, "file_name.aub", CommandStreamReceiverType::CSR_AUB); mockAubCenter->aubManager = std::unique_ptr(mockManager); executionEnvironment.rootDeviceEnvironments[0]->aubCenter.reset(mockAubCenter); auto gfxAllocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, MemoryConstants::pageSize}); EXPECT_FALSE(mockManager->freeMemoryCalled); memoryManager.freeGraphicsMemory(gfxAllocation); EXPECT_TRUE(mockManager->freeMemoryCalled); } TEST(OsAgnosticMemoryManager, givenOsAgnosticMemoryManagerAndFreeMemoryDisabledWhenGraphicsAllocationIsDestroyedThenFreeMemoryOnAubManagerShouldBeCalled) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableFreeMemory.set(false); MockExecutionEnvironment executionEnvironment; OsAgnosticMemoryManager memoryManager(executionEnvironment); MockAubManager *mockManager = new MockAubManager(); MockAubCenter *mockAubCenter = new MockAubCenter(defaultHwInfo.get(), false, "file_name.aub", CommandStreamReceiverType::CSR_AUB); mockAubCenter->aubManager = std::unique_ptr(mockManager); executionEnvironment.rootDeviceEnvironments[0]->aubCenter.reset(mockAubCenter); auto gfxAllocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, MemoryConstants::pageSize}); EXPECT_FALSE(mockManager->freeMemoryCalled); memoryManager.freeGraphicsMemory(gfxAllocation); EXPECT_FALSE(mockManager->freeMemoryCalled); } TEST(MemoryManager, givenSharedResourceCopyWhenAllocatingGraphicsMemoryThenAllocateGraphicsMemoryForImageIsCalled) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); MockMemoryManager memoryManager(false, true, *executionEnvironment); cl_image_desc imgDesc = {}; imgDesc.image_width = 1; imgDesc.image_height = 1; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); MockMemoryManager::AllocationData allocationData; allocationData.imgInfo = &imgInfo; allocationData.type = GraphicsAllocation::AllocationType::SHARED_RESOURCE_COPY; auto imageAllocation = memoryManager.allocateGraphicsMemory(allocationData); EXPECT_NE(nullptr, imageAllocation); EXPECT_TRUE(memoryManager.allocateForImageCalled); memoryManager.freeGraphicsMemory(imageAllocation); } TEST(MemoryManager, givenShareableWhenAllocatingGraphicsMemoryThenAllocateShareableIsCalled) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); MockMemoryManager memoryManager(false, true, *executionEnvironment); MockMemoryManager::AllocationData allocationData; allocationData.size = 4096u; allocationData.type = GraphicsAllocation::AllocationType::BUFFER; allocationData.flags.shareable = true; auto allocation = memoryManager.allocateGraphicsMemory(allocationData); EXPECT_NE(nullptr, allocation); EXPECT_TRUE(memoryManager.allocateForShareableCalled); memoryManager.freeGraphicsMemory(allocation); } TEST_F(MemoryAllocatorTest, GivenSizeWhenGmmIsCreatedThenSuccess) { Gmm *gmm = new Gmm(device->getGmmClientContext(), nullptr, 65536, false); EXPECT_NE(nullptr, gmm); delete gmm; } typedef Test MemoryManagerWithCsrTest; TEST_F(MemoryManagerWithCsrTest, GivenAllocationsInHostPtrManagerWhenBiggerOverllapingAllcoationIsCreatedAndNothingToCleanThenAbortExecution) { void *cpuPtr1 = (void *)0x100004; void *cpuPtr2 = (void *)0x101008; void *cpuPtr3 = (void *)0x100000; auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); auto graphicsAllocation1 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize}, cpuPtr1); EXPECT_EQ(2u, hostPtrManager->getFragmentCount()); auto graphicsAllocation2 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize * 3}, cpuPtr2); EXPECT_EQ(4u, hostPtrManager->getFragmentCount()); GraphicsAllocation *graphicsAllocation3 = nullptr; bool catchMe = false; try { graphicsAllocation3 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize * 10}, cpuPtr3); } catch (...) { catchMe = true; } EXPECT_NE(nullptr, graphicsAllocation1); EXPECT_NE(nullptr, graphicsAllocation2); EXPECT_EQ(nullptr, graphicsAllocation3); EXPECT_TRUE(catchMe); EXPECT_EQ((uintptr_t)cpuPtr1 & ~MemoryConstants::pageMask, (uintptr_t)graphicsAllocation1->fragmentsStorage.fragmentStorageData[0].cpuPtr); EXPECT_EQ((uintptr_t)cpuPtr2 & ~MemoryConstants::pageMask, (uintptr_t)graphicsAllocation2->fragmentsStorage.fragmentStorageData[0].cpuPtr); EXPECT_EQ(((uintptr_t)cpuPtr2 + MemoryConstants::pageSize) & ~MemoryConstants::pageMask, (uintptr_t)graphicsAllocation2->fragmentsStorage.fragmentStorageData[1].cpuPtr); memoryManager->freeGraphicsMemory(graphicsAllocation1); memoryManager->freeGraphicsMemory(graphicsAllocation2); memoryManager->freeGraphicsMemory(graphicsAllocation3); } TEST_F(MemoryManagerWithCsrTest, GivenAllocationsInHostPtrManagerReadyForCleaningWhenBiggerOverllapingAllcoationIsCreatedThenTemporaryAllocationsAreCleaned) { void *cpuPtr1 = (void *)0x100004; void *cpuPtr2 = (void *)0x101008; void *cpuPtr3 = (void *)0x100000; auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); auto graphicsAllocation1 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize}, cpuPtr1); EXPECT_EQ(2u, hostPtrManager->getFragmentCount()); auto graphicsAllocation2 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize * 3}, cpuPtr2); EXPECT_EQ(4u, hostPtrManager->getFragmentCount()); EXPECT_NE(nullptr, graphicsAllocation1); EXPECT_NE(nullptr, graphicsAllocation2); auto fragment1 = hostPtrManager->getFragment(alignDown(cpuPtr1, MemoryConstants::pageSize)); EXPECT_NE(nullptr, fragment1); auto fragment2 = hostPtrManager->getFragment(alignUp(cpuPtr1, MemoryConstants::pageSize)); EXPECT_NE(nullptr, fragment2); auto fragment3 = hostPtrManager->getFragment(alignDown(cpuPtr2, MemoryConstants::pageSize)); EXPECT_NE(nullptr, fragment3); auto fragment4 = hostPtrManager->getFragment(alignUp(cpuPtr2, MemoryConstants::pageSize)); EXPECT_NE(nullptr, fragment4); uint32_t taskCountReady = 1; auto storage = csr->getInternalAllocationStorage(); storage->storeAllocationWithTaskCount(std::unique_ptr(graphicsAllocation1), TEMPORARY_ALLOCATION, taskCountReady); storage->storeAllocationWithTaskCount(std::unique_ptr(graphicsAllocation2), TEMPORARY_ALLOCATION, taskCountReady); EXPECT_EQ(4u, hostPtrManager->getFragmentCount()); // All fragments ready for release taskCount = taskCountReady; csr->latestSentTaskCount = taskCountReady; auto graphicsAllocation3 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize * 10}, cpuPtr3); EXPECT_NE(nullptr, graphicsAllocation3); // no more overlapping allocation, previous allocations cleaned EXPECT_EQ(1u, graphicsAllocation3->fragmentsStorage.fragmentCount); EXPECT_EQ(cpuPtr3, graphicsAllocation3->fragmentsStorage.fragmentStorageData[0].cpuPtr); memoryManager->freeGraphicsMemory(graphicsAllocation3); } TEST_F(MemoryManagerWithCsrTest, givenAllocationThatWasNotUsedWhencheckGpuUsageAndDestroyGraphicsAllocationsIsCalledThenItIsDestroyedInPlace) { auto notUsedAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(notUsedAllocation); EXPECT_TRUE(csr->getTemporaryAllocations().peekIsEmpty()); } TEST_F(MemoryManagerWithCsrTest, givenAllocationThatWasUsedAndIsCompletedWhencheckGpuUsageAndDestroyGraphicsAllocationsIsCalledThenItIsDestroyedInPlace) { auto usedAllocationButGpuCompleted = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); auto tagAddress = csr->getTagAddress(); ASSERT_NE(0u, *tagAddress); usedAllocationButGpuCompleted->updateTaskCount(*tagAddress - 1, csr->getOsContext().getContextId()); memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(usedAllocationButGpuCompleted); EXPECT_TRUE(csr->getTemporaryAllocations().peekIsEmpty()); } TEST_F(MemoryManagerWithCsrTest, givenAllocationThatWasUsedAndIsNotCompletedWhencheckGpuUsageAndDestroyGraphicsAllocationsIsCalledThenItIsAddedToTemporaryAllocationList) { memoryManager->createAndRegisterOsContext(csr.get(), HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], 1, PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo), false, false, false); auto usedAllocationAndNotGpuCompleted = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); auto tagAddress = csr->getTagAddress(); usedAllocationAndNotGpuCompleted->updateTaskCount(*tagAddress + 1, csr->getOsContext().getContextId()); memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(usedAllocationAndNotGpuCompleted); EXPECT_FALSE(csr->getTemporaryAllocations().peekIsEmpty()); EXPECT_EQ(csr->getTemporaryAllocations().peekHead(), usedAllocationAndNotGpuCompleted); //change task count so cleanup will not clear alloc in use usedAllocationAndNotGpuCompleted->updateTaskCount(csr->peekLatestFlushedTaskCount(), csr->getOsContext().getContextId()); } class MockAlignMallocMemoryManager : public MockMemoryManager { public: MockAlignMallocMemoryManager(ExecutionEnvironment &executionEnvironment) : MockMemoryManager(executionEnvironment) { testMallocRestrictions.minAddress = 0; alignMallocRestrictions = nullptr; alignMallocCount = 0; alignMallocMaxIter = 3; returnNullBad = false; returnNullGood = false; } AlignedMallocRestrictions testMallocRestrictions; AlignedMallocRestrictions *alignMallocRestrictions; static const uintptr_t alignMallocMinAddress = 0x100000; static const uintptr_t alignMallocStep = 10; int alignMallocMaxIter; int alignMallocCount; bool returnNullBad; bool returnNullGood; void *alignedMallocWrapper(size_t size, size_t align) override { if (alignMallocCount < alignMallocMaxIter) { alignMallocCount++; if (!returnNullBad) { return reinterpret_cast(alignMallocMinAddress - alignMallocStep); } else { return nullptr; } } alignMallocCount = 0; if (!returnNullGood) { return reinterpret_cast(alignMallocMinAddress + alignMallocStep); } else { return nullptr; } }; void alignedFreeWrapper(void *) override { alignMallocCount = 0; } AlignedMallocRestrictions *getAlignedMallocRestrictions() override { return alignMallocRestrictions; } }; class MockAlignMallocMemoryManagerTest : public MemoryAllocatorTest { public: MockAlignMallocMemoryManager *alignedMemoryManager = nullptr; void SetUp() override { MemoryAllocatorTest::SetUp(); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); alignedMemoryManager = new (std::nothrow) MockAlignMallocMemoryManager(executionEnvironment); //assert we have memory manager ASSERT_NE(nullptr, memoryManager); } void TearDown() override { alignedMemoryManager->alignedFreeWrapper(nullptr); delete alignedMemoryManager; MemoryAllocatorTest::TearDown(); } }; TEST_F(MockAlignMallocMemoryManagerTest, givenMemoryManagerWhenNullAlignRestrictionsThenNotUseRestrictions) { EXPECT_EQ(nullptr, memoryManager->getAlignedMallocRestrictions()); EXPECT_EQ(nullptr, alignedMemoryManager->getAlignedMallocRestrictions()); uintptr_t expectedVal = MockAlignMallocMemoryManager::alignMallocMinAddress - MockAlignMallocMemoryManager::alignMallocStep; uintptr_t memVal = reinterpret_cast(alignedMemoryManager->allocateSystemMemory(0x1000, 0x1000)); EXPECT_EQ(expectedVal, memVal); } TEST_F(MockAlignMallocMemoryManagerTest, givenMemoryManagerWhenZeroAlignRestrictionsThenNotUseRestrictions) { alignedMemoryManager->alignMallocRestrictions = &alignedMemoryManager->testMallocRestrictions; EXPECT_NE(nullptr, alignedMemoryManager->getAlignedMallocRestrictions()); alignedMemoryManager->alignMallocCount = 0; uintptr_t expectedVal = MockAlignMallocMemoryManager::alignMallocMinAddress - MockAlignMallocMemoryManager::alignMallocStep; uintptr_t memVal = reinterpret_cast(alignedMemoryManager->allocateSystemMemory(0x1000, 0x1000)); EXPECT_EQ(expectedVal, memVal); alignedMemoryManager->alignMallocCount = alignedMemoryManager->alignMallocMaxIter + 1; expectedVal = MockAlignMallocMemoryManager::alignMallocMinAddress + MockAlignMallocMemoryManager::alignMallocStep; memVal = reinterpret_cast(alignedMemoryManager->allocateSystemMemory(0x1000, 0x1000)); EXPECT_EQ(expectedVal, memVal); } TEST_F(MockAlignMallocMemoryManagerTest, givenMemoryManagerWitNonZeroAlignRestrictionsWhenFirstGoodAddressThenUseRestrictionsAndReturnFirst) { alignedMemoryManager->alignMallocRestrictions = &alignedMemoryManager->testMallocRestrictions; alignedMemoryManager->testMallocRestrictions.minAddress = MockAlignMallocMemoryManager::alignMallocMinAddress; EXPECT_NE(nullptr, alignedMemoryManager->getAlignedMallocRestrictions()); alignedMemoryManager->alignMallocCount = alignedMemoryManager->alignMallocMaxIter + 1; uintptr_t expectedVal = MockAlignMallocMemoryManager::alignMallocMinAddress + MockAlignMallocMemoryManager::alignMallocStep; uintptr_t memVal = reinterpret_cast(alignedMemoryManager->allocateSystemMemory(0x1000, 0x1000)); EXPECT_EQ(expectedVal, memVal); } TEST_F(MockAlignMallocMemoryManagerTest, givenMemoryManagerWitNonZeroAlignRestrictionsWhenFirstNullAddressThenUseRestrictionsAndReturnFirstNull) { alignedMemoryManager->alignMallocRestrictions = &alignedMemoryManager->testMallocRestrictions; alignedMemoryManager->testMallocRestrictions.minAddress = MockAlignMallocMemoryManager::alignMallocMinAddress; EXPECT_NE(nullptr, alignedMemoryManager->getAlignedMallocRestrictions()); alignedMemoryManager->alignMallocCount = alignedMemoryManager->alignMallocMaxIter + 1; alignedMemoryManager->returnNullGood = true; uintptr_t expectedVal = 0; uintptr_t memVal = reinterpret_cast(alignedMemoryManager->allocateSystemMemory(0x1000, 0x1000)); EXPECT_EQ(expectedVal, memVal); } TEST_F(MockAlignMallocMemoryManagerTest, givenMemoryManagerWitNonZeroAlignRestrictionsWhenFirstBadAnotherGoodAddressThenUseRestrictionsAndReturnAnother) { alignedMemoryManager->alignMallocRestrictions = &alignedMemoryManager->testMallocRestrictions; alignedMemoryManager->testMallocRestrictions.minAddress = MockAlignMallocMemoryManager::alignMallocMinAddress; EXPECT_NE(nullptr, alignedMemoryManager->getAlignedMallocRestrictions()); alignedMemoryManager->alignMallocCount = 0; uintptr_t expectedVal = MockAlignMallocMemoryManager::alignMallocMinAddress + MockAlignMallocMemoryManager::alignMallocStep; uintptr_t memVal = reinterpret_cast(alignedMemoryManager->allocateSystemMemory(0x1000, 0x1000)); EXPECT_EQ(expectedVal, memVal); } TEST_F(MockAlignMallocMemoryManagerTest, givenMemoryManagerWitNonZeroAlignRestrictionsWhenFirstBadAnotherNullAddressThenUseRestrictionsAndReturnNull) { alignedMemoryManager->alignMallocRestrictions = &alignedMemoryManager->testMallocRestrictions; alignedMemoryManager->testMallocRestrictions.minAddress = MockAlignMallocMemoryManager::alignMallocMinAddress; EXPECT_NE(nullptr, alignedMemoryManager->getAlignedMallocRestrictions()); alignedMemoryManager->alignMallocCount = 0; alignedMemoryManager->returnNullGood = true; uintptr_t expectedVal = 0; uintptr_t memVal = reinterpret_cast(alignedMemoryManager->allocateSystemMemory(0x1000, 0x1000)); EXPECT_EQ(expectedVal, memVal); } TEST(GraphicsAllocation, givenCpuPointerBasedConstructorWhenGraphicsAllocationIsCreatedThenGpuAddressHasCorrectValue) { uintptr_t address = 0xf0000000; void *addressWithTrailingBitSet = reinterpret_cast(address); uint64_t expectedGpuAddress = 0xf0000000; MockGraphicsAllocation graphicsAllocation(addressWithTrailingBitSet, 1u); EXPECT_EQ(expectedGpuAddress, graphicsAllocation.getGpuAddress()); } using GraphicsAllocationTests = ::testing::Test; HWTEST_F(GraphicsAllocationTests, givenAllocationUsedOnlyByNonDefaultCsrWhenCheckingUsageBeforeDestroyThenStoreItAsTemporaryAllocation) { auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); auto nonDefaultOsContext = device->engines[HwHelper::lowPriorityGpgpuEngineIndex].osContext; auto nonDefaultCsr = static_cast *>(device->engines[HwHelper::lowPriorityGpgpuEngineIndex].commandStreamReceiver); auto memoryManager = device->getExecutionEnvironment()->memoryManager.get(); auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); nonDefaultCsr->taskCount = *nonDefaultCsr->getTagAddress() + 1; nonDefaultCsr->latestFlushedTaskCount = *nonDefaultCsr->getTagAddress() + 1; graphicsAllocation->updateTaskCount(*nonDefaultCsr->getTagAddress() + 1, nonDefaultOsContext->getContextId()); memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(graphicsAllocation); EXPECT_NE(nullptr, nonDefaultCsr->getInternalAllocationStorage()->getTemporaryAllocations().peekHead()); (*nonDefaultCsr->getTagAddress())++; // no need to call freeGraphicsAllocation } HWTEST_F(GraphicsAllocationTests, givenAllocationUsedOnlyByNonDefaultDeviceWhenCheckingUsageBeforeDestroyThenStoreItAsTemporaryAllocation) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); auto device = std::unique_ptr(Device::create(executionEnvironment, 0u)); auto &defaultCommandStreamReceiver = device->getGpgpuCommandStreamReceiver(); auto &nonDefaultCommandStreamReceiver = static_cast &>(*device->commandStreamReceivers[1]); auto memoryManager = executionEnvironment->memoryManager.get(); auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); auto notReadyTaskCount = *nonDefaultCommandStreamReceiver.getTagAddress() + 1; EXPECT_NE(defaultCommandStreamReceiver.getOsContext().getContextId(), nonDefaultCommandStreamReceiver.getOsContext().getContextId()); nonDefaultCommandStreamReceiver.taskCount = notReadyTaskCount; nonDefaultCommandStreamReceiver.latestFlushedTaskCount = notReadyTaskCount; graphicsAllocation->updateTaskCount(notReadyTaskCount, nonDefaultCommandStreamReceiver.getOsContext().getContextId()); EXPECT_TRUE(nonDefaultCommandStreamReceiver.getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty()); memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(graphicsAllocation); EXPECT_FALSE(nonDefaultCommandStreamReceiver.getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty()); (*nonDefaultCommandStreamReceiver.getTagAddress())++; // no need to call freeGraphicsAllocation } HWTEST_F(GraphicsAllocationTests, givenAllocationUsedByManyOsContextsWhenCheckingUsageBeforeDestroyThenMultiContextDestructorIsUsedForWaitingForAllOsContexts) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); auto memoryManager = new MockMemoryManager(false, false, *executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); auto multiContextDestructor = new MockDeferredDeleter(); multiContextDestructor->expectDrainBlockingValue(false); memoryManager->multiContextResourceDestructor.reset(multiContextDestructor); auto device = std::unique_ptr(MockDevice::create(executionEnvironment, 0u)); auto nonDefaultOsContext = device->engines[HwHelper::lowPriorityGpgpuEngineIndex].osContext; auto nonDefaultCsr = static_cast *>(device->engines[HwHelper::lowPriorityGpgpuEngineIndex].commandStreamReceiver); auto defaultCsr = static_cast *>(device->getDefaultEngine().commandStreamReceiver); auto defaultOsContext = device->getDefaultEngine().osContext; EXPECT_FALSE(defaultOsContext->isLowPriority()); EXPECT_TRUE(nonDefaultOsContext->isLowPriority()); auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); nonDefaultCsr->taskCount = *nonDefaultCsr->getTagAddress(); nonDefaultCsr->latestFlushedTaskCount = *nonDefaultCsr->getTagAddress(); graphicsAllocation->updateTaskCount(*nonDefaultCsr->getTagAddress(), nonDefaultOsContext->getContextId()); graphicsAllocation->updateTaskCount(0, defaultOsContext->getContextId()); // used and ready EXPECT_TRUE(graphicsAllocation->isUsedByManyOsContexts()); memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(graphicsAllocation); EXPECT_EQ(1, multiContextDestructor->deferDeletionCalled); EXPECT_TRUE(nonDefaultCsr->getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty()); EXPECT_TRUE(defaultCsr->getInternalAllocationStorage()->getTemporaryAllocations().peekIsEmpty()); } TEST(GraphicsAllocation, givenSharedHandleBasedConstructorWhenGraphicsAllocationIsCreatedThenGpuAddressHasCorrectValue) { uintptr_t address = 0xf0000000; void *addressWithTrailingBitSet = reinterpret_cast(address); uint64_t expectedGpuAddress = 0xf0000000; osHandle sharedHandle{}; GraphicsAllocation graphicsAllocation(0, GraphicsAllocation::AllocationType::UNKNOWN, addressWithTrailingBitSet, 1u, sharedHandle, MemoryPool::MemoryNull); EXPECT_EQ(expectedGpuAddress, graphicsAllocation.getGpuAddress()); } TEST(ResidencyDataTest, givenOsContextWhenItIsRegisteredToMemoryManagerThenRefCountIncreases) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); auto memoryManager = new MockMemoryManager(false, false, executionEnvironment); executionEnvironment.memoryManager.reset(memoryManager); std::unique_ptr csr(createCommandStream(executionEnvironment, 0u)); memoryManager->createAndRegisterOsContext(csr.get(), HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], 1, PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo), false, false, false); EXPECT_EQ(1u, memoryManager->getRegisteredEnginesCount()); EXPECT_EQ(1, memoryManager->registeredEngines[0].osContext->getRefInternalCount()); } TEST(MemoryManagerRegisteredEnginesTest, givenOsContextWhenItIsUnregisteredFromMemoryManagerThenRefCountDecreases) { auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); auto memoryManager = device->getMemoryManager(); auto &engine = device->getDefaultEngine(); engine.osContext->incRefInternal(); EXPECT_EQ(2, engine.osContext->getRefInternalCount()); memoryManager->unregisterEngineForCsr(engine.commandStreamReceiver); EXPECT_EQ(1, engine.osContext->getRefInternalCount()); engine.osContext->decRefInternal(); } TEST(ResidencyDataTest, givenDeviceBitfieldWhenCreatingOsContextThenSetValidValue) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); auto memoryManager = new MockMemoryManager(false, false, executionEnvironment); executionEnvironment.memoryManager.reset(memoryManager); std::unique_ptr csr(createCommandStream(executionEnvironment, 0u)); DeviceBitfield deviceBitfield = 0b11; PreemptionMode preemptionMode = PreemptionMode::MidThread; memoryManager->createAndRegisterOsContext(csr.get(), HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], deviceBitfield, preemptionMode, false, false, false); EXPECT_EQ(2u, memoryManager->registeredEngines[0].osContext->getNumSupportedDevices()); EXPECT_EQ(deviceBitfield, memoryManager->registeredEngines[0].osContext->getDeviceBitfield()); EXPECT_EQ(preemptionMode, memoryManager->registeredEngines[0].osContext->getPreemptionMode()); } TEST(ResidencyDataTest, givenTwoOsContextsWhenTheyAreRegisteredFromHigherToLowerThenProperSizeIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get(), true, 2u); auto memoryManager = new MockMemoryManager(false, false, executionEnvironment); executionEnvironment.memoryManager.reset(memoryManager); std::unique_ptr csr(createCommandStream(executionEnvironment, 0u)); std::unique_ptr csr1(createCommandStream(executionEnvironment, 1u)); memoryManager->createAndRegisterOsContext(csr.get(), HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], 1, PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo), false, false, false); memoryManager->createAndRegisterOsContext(csr1.get(), HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[1], 1, PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo), false, false, false); EXPECT_EQ(2u, memoryManager->getRegisteredEnginesCount()); EXPECT_EQ(1, memoryManager->registeredEngines[0].osContext->getRefInternalCount()); EXPECT_EQ(1, memoryManager->registeredEngines[1].osContext->getRefInternalCount()); } TEST(ResidencyDataTest, givenGpgpuEnginesWhenAskedForMaxOsContextCountThenValueIsGreaterOrEqual) { auto &engines = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo); EXPECT_TRUE(MemoryManager::maxOsContextCount >= engines.size()); } TEST(ResidencyDataTest, givenResidencyDataWhenUpdateCompletionDataIsCalledThenItIsProperlyUpdated) { struct MockResidencyData : public ResidencyData { using ResidencyData::lastFenceValues; }; MockResidencyData residency; MockOsContext osContext(0u, 1, HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo), false, false, false); MockOsContext osContext2(1u, 1, HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[1], PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo), false, false, false); auto lastFenceValue = 45llu; auto lastFenceValue2 = 23llu; auto lastFenceValue3 = 373llu; EXPECT_EQ(MemoryManager::maxOsContextCount, residency.lastFenceValues.size()); residency.updateCompletionData(lastFenceValue, osContext.getContextId()); EXPECT_EQ(MemoryManager::maxOsContextCount, residency.lastFenceValues.size()); EXPECT_EQ(lastFenceValue, residency.lastFenceValues[0]); EXPECT_EQ(lastFenceValue, residency.getFenceValueForContextId(osContext.getContextId())); residency.updateCompletionData(lastFenceValue2, osContext2.getContextId()); EXPECT_EQ(MemoryManager::maxOsContextCount, residency.lastFenceValues.size()); EXPECT_EQ(lastFenceValue2, residency.lastFenceValues[1]); EXPECT_EQ(lastFenceValue2, residency.getFenceValueForContextId(osContext2.getContextId())); residency.updateCompletionData(lastFenceValue3, osContext2.getContextId()); EXPECT_EQ(lastFenceValue3, residency.lastFenceValues[1]); EXPECT_EQ(lastFenceValue3, residency.getFenceValueForContextId(osContext2.getContextId())); } TEST(MemoryManagerTest, givenMemoryManagerWhenLockIsCalledOnLockedResourceThenDoesNothing) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, MemoryConstants::pageSize}); EXPECT_FALSE(allocation->isLocked()); auto ptr = memoryManager.MemoryManager::lockResource(allocation); EXPECT_TRUE(allocation->isLocked()); EXPECT_EQ(1u, memoryManager.lockResourceCalled); EXPECT_EQ(0u, memoryManager.unlockResourceCalled); auto ptr2 = memoryManager.MemoryManager::lockResource(allocation); EXPECT_TRUE(allocation->isLocked()); EXPECT_EQ(1u, memoryManager.lockResourceCalled); EXPECT_EQ(0u, memoryManager.unlockResourceCalled); EXPECT_EQ(ptr, ptr2); memoryManager.freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenMemoryManagerWhenAllocationWasNotUnlockedThenItIsUnlockedDuringDestruction) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, MemoryConstants::pageSize}); EXPECT_FALSE(allocation->isLocked()); memoryManager.MemoryManager::lockResource(allocation); EXPECT_TRUE(allocation->isLocked()); EXPECT_EQ(1u, memoryManager.lockResourceCalled); EXPECT_EQ(0u, memoryManager.unlockResourceCalled); memoryManager.freeGraphicsMemory(allocation); EXPECT_EQ(1u, memoryManager.unlockResourceCalled); } TEST(MemoryManagerTest, givenExecutionEnvrionmentWithCleanedRootDeviceExecutionsWhenFreeGraphicsMemoryIsCalledThenMemoryManagerDoesntCrash) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, MemoryConstants::pageSize}); EXPECT_NE(nullptr, allocation); executionEnvironment.rootDeviceEnvironments.clear(); EXPECT_NO_THROW(memoryManager.freeGraphicsMemory(allocation)); } TEST(MemoryManagerTest, givenAllocationTypesThatMayNeedL3FlushWhenCallingGetAllocationDataThenFlushL3FlagIsCorrectlySet) { AllocationData allocData; AllocationProperties properties(0, 1, GraphicsAllocation::AllocationType::UNKNOWN); properties.flags.flushL3RequiredForRead = 1; properties.flags.flushL3RequiredForWrite = 1; GraphicsAllocation::AllocationType allocationTypesThatMayNeedL3Flush[] = { GraphicsAllocation::AllocationType::BUFFER, GraphicsAllocation::AllocationType::BUFFER_COMPRESSED, GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR, GraphicsAllocation::AllocationType::GLOBAL_SURFACE, GraphicsAllocation::AllocationType::IMAGE, GraphicsAllocation::AllocationType::PIPE, GraphicsAllocation::AllocationType::SHARED_IMAGE, GraphicsAllocation::AllocationType::SHARED_BUFFER, GraphicsAllocation::AllocationType::SHARED_RESOURCE_COPY, GraphicsAllocation::AllocationType::SVM_ZERO_COPY, GraphicsAllocation::AllocationType::SVM_GPU, GraphicsAllocation::AllocationType::SVM_CPU, GraphicsAllocation::AllocationType::WRITE_COMBINED, GraphicsAllocation::AllocationType::MAP_ALLOCATION}; MockMemoryManager mockMemoryManager; for (auto allocationType : allocationTypesThatMayNeedL3Flush) { properties.allocationType = allocationType; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.flushL3); } properties.flags.flushL3RequiredForRead = 0; properties.flags.flushL3RequiredForWrite = 0; for (auto allocationType : allocationTypesThatMayNeedL3Flush) { properties.allocationType = allocationType; MockMemoryManager::getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_FALSE(allocData.flags.flushL3); } } TEST(MemoryManagerTest, givenNullHostPtrWhenIsCopyRequiredIsCalledThenFalseIsReturned) { ImageInfo imgInfo{}; EXPECT_FALSE(MockMemoryManager::isCopyRequired(imgInfo, nullptr)); } TEST(MemoryManagerTest, givenAllowedTilingWhenIsCopyRequiredIsCalledThenTrueIsReturned) { ImageInfo imgInfo{}; cl_image_desc imageDesc{}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 1; imageDesc.image_height = 1; cl_image_format imageFormat = {}; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_R; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, defaultHwInfo->capabilityTable.clVersionSupport); imgInfo.imgDesc = Image::convertDescriptor(imageDesc); imgInfo.surfaceFormat = &surfaceFormat->surfaceFormat; imgInfo.rowPitch = imageDesc.image_width * surfaceFormat->surfaceFormat.ImageElementSizeInBytes; imgInfo.slicePitch = imgInfo.rowPitch * imageDesc.image_height; imgInfo.size = imgInfo.slicePitch; char memory; EXPECT_TRUE(MockMemoryManager::isCopyRequired(imgInfo, &memory)); } TEST(MemoryManagerTest, givenDifferentRowPitchWhenIsCopyRequiredIsCalledThenTrueIsReturned) { ImageInfo imgInfo{}; cl_image_desc imageDesc{}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 1; imageDesc.image_height = 1; imageDesc.image_row_pitch = 10; cl_image_format imageFormat = {}; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_R; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, defaultHwInfo->capabilityTable.clVersionSupport); imgInfo.imgDesc = Image::convertDescriptor(imageDesc); imgInfo.surfaceFormat = &surfaceFormat->surfaceFormat; imgInfo.rowPitch = imageDesc.image_width * surfaceFormat->surfaceFormat.ImageElementSizeInBytes; imgInfo.slicePitch = imgInfo.rowPitch * imageDesc.image_height; imgInfo.size = imgInfo.slicePitch; char memory[10]; EXPECT_TRUE(MockMemoryManager::isCopyRequired(imgInfo, memory)); } TEST(MemoryManagerTest, givenDifferentSlicePitchAndTilingNotAllowedWhenIsCopyRequiredIsCalledThenTrueIsReturned) { ImageInfo imgInfo{}; cl_image_format imageFormat = {}; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_R; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, defaultHwInfo->capabilityTable.clVersionSupport); cl_image_desc imageDesc{}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 4; imageDesc.image_height = 2; imageDesc.image_slice_pitch = imageDesc.image_width * (imageDesc.image_height + 3) * surfaceFormat->surfaceFormat.ImageElementSizeInBytes; imgInfo.imgDesc = Image::convertDescriptor(imageDesc); imgInfo.surfaceFormat = &surfaceFormat->surfaceFormat; imgInfo.rowPitch = imageDesc.image_width * surfaceFormat->surfaceFormat.ImageElementSizeInBytes; imgInfo.slicePitch = imgInfo.rowPitch * imageDesc.image_height; imgInfo.size = imgInfo.slicePitch; char memory[8]; EXPECT_TRUE(MockMemoryManager::isCopyRequired(imgInfo, memory)); } TEST(MemoryManagerTest, givenNotCachelinAlignedPointerWhenIsCopyRequiredIsCalledThenTrueIsReturned) { ImageInfo imgInfo{}; cl_image_format imageFormat = {}; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_R; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, defaultHwInfo->capabilityTable.clVersionSupport); cl_image_desc imageDesc{}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 4096; imageDesc.image_height = 1; imgInfo.imgDesc = Image::convertDescriptor(imageDesc); imgInfo.surfaceFormat = &surfaceFormat->surfaceFormat; imgInfo.rowPitch = imageDesc.image_width * surfaceFormat->surfaceFormat.ImageElementSizeInBytes; imgInfo.slicePitch = imgInfo.rowPitch * imageDesc.image_height; imgInfo.size = imgInfo.slicePitch; char memory[8]; EXPECT_TRUE(MockMemoryManager::isCopyRequired(imgInfo, &memory[1])); } TEST(MemoryManagerTest, givenCachelineAlignedPointerAndProperDescriptorValuesWhenIsCopyRequiredIsCalledThenFalseIsReturned) { ImageInfo imgInfo{}; cl_image_format imageFormat = {}; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_R; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, defaultHwInfo->capabilityTable.clVersionSupport); cl_image_desc imageDesc{}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 2; imageDesc.image_height = 1; imgInfo.imgDesc = Image::convertDescriptor(imageDesc); imgInfo.surfaceFormat = &surfaceFormat->surfaceFormat; imgInfo.rowPitch = imageDesc.image_width * surfaceFormat->surfaceFormat.ImageElementSizeInBytes; imgInfo.slicePitch = imgInfo.rowPitch * imageDesc.image_height; imgInfo.size = imgInfo.slicePitch; imgInfo.linearStorage = true; auto hostPtr = alignedMalloc(imgInfo.size, MemoryConstants::cacheLineSize); EXPECT_FALSE(MockMemoryManager::isCopyRequired(imgInfo, hostPtr)); alignedFree(hostPtr); } TEST(MemoryManagerTest, givenForcedLinearImages3DImageAndProperDescriptorValuesWhenIsCopyRequiredIsCalledThenFalseIsReturned) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceLinearImages.set(true); auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); ImageInfo imgInfo{}; cl_image_format imageFormat = {}; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_R; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, defaultHwInfo->capabilityTable.clVersionSupport); cl_image_desc imageDesc{}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE3D; imageDesc.image_width = 2; imageDesc.image_height = 2; imageDesc.image_depth = 2; imgInfo.imgDesc = Image::convertDescriptor(imageDesc); imgInfo.surfaceFormat = &surfaceFormat->surfaceFormat; imgInfo.rowPitch = imageDesc.image_width * surfaceFormat->surfaceFormat.ImageElementSizeInBytes; imgInfo.slicePitch = imgInfo.rowPitch * imageDesc.image_height; imgInfo.size = imgInfo.slicePitch; imgInfo.linearStorage = !hwHelper.tilingAllowed(false, Image::isImage1d(Image::convertDescriptor(imgInfo.imgDesc)), false); auto hostPtr = alignedMalloc(imgInfo.size, MemoryConstants::cacheLineSize); EXPECT_FALSE(MockMemoryManager::isCopyRequired(imgInfo, hostPtr)); alignedFree(hostPtr); } TEST(HeapSelectorTest, given32bitInternalAllocationWhenSelectingHeapThenInternalHeapIsUsed) { GraphicsAllocation allocation{0, GraphicsAllocation::AllocationType::KERNEL_ISA, nullptr, 0, 0, 0, MemoryPool::MemoryNull}; allocation.set32BitAllocation(true); EXPECT_EQ(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY, MemoryManager::selectHeap(&allocation, false, false)); } TEST(HeapSelectorTest, givenNon32bitInternalAllocationWhenSelectingHeapThenInternalHeapIsUsed) { GraphicsAllocation allocation{0, GraphicsAllocation::AllocationType::KERNEL_ISA, nullptr, 0, 0, 0, MemoryPool::MemoryNull}; allocation.set32BitAllocation(false); EXPECT_EQ(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY, MemoryManager::selectHeap(&allocation, false, false)); } TEST(HeapSelectorTest, given32bitExternalAllocationWhenSelectingHeapThenExternalHeapIsUsed) { GraphicsAllocation allocation{0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, 0, 0, 0, MemoryPool::MemoryNull}; allocation.set32BitAllocation(true); EXPECT_EQ(HeapIndex::HEAP_EXTERNAL, MemoryManager::selectHeap(&allocation, false, false)); } TEST(HeapSelectorTest, givenLimitedAddressSpaceWhenSelectingHeapForExternalAllocationThenStandardHeapIsUsed) { GraphicsAllocation allocation{0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, 0, 0, 0, MemoryPool::MemoryNull}; EXPECT_EQ(HeapIndex::HEAP_STANDARD, MemoryManager::selectHeap(&allocation, true, false)); } TEST(HeapSelectorTest, givenFullAddressSpaceWhenSelectingHeapForExternalAllocationWithPtrThenSvmHeapIsUsed) { GraphicsAllocation allocation{0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, 0, 0, 0, MemoryPool::MemoryNull}; EXPECT_EQ(HeapIndex::HEAP_SVM, MemoryManager::selectHeap(&allocation, true, true)); } TEST(HeapSelectorTest, givenFullAddressSpaceWhenSelectingHeapForExternalAllocationWithoutPtrAndResourceIs64KSuitableThenStandard64kHeapIsUsed) { GraphicsAllocation allocation{0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, 0, 0, 0, MemoryPool::MemoryNull}; auto rootDeviceEnvironment = platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0].get(); auto gmm = std::make_unique(rootDeviceEnvironment->getGmmClientContext(), nullptr, 0, false); auto resourceInfo = static_cast(gmm->gmmResourceInfo.get()); resourceInfo->is64KBPageSuitableValue = true; allocation.setDefaultGmm(gmm.get()); EXPECT_EQ(HeapIndex::HEAP_STANDARD64KB, MemoryManager::selectHeap(&allocation, false, true)); } TEST(HeapSelectorTest, givenFullAddressSpaceWhenSelectingHeapForExternalAllocationWithoutPtrAndResourceIsNot64KSuitableThenStandardHeapIsUsed) { GraphicsAllocation allocation{0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, 0, 0, 0, MemoryPool::MemoryNull}; auto rootDeviceEnvironment = platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0].get(); auto gmm = std::make_unique(rootDeviceEnvironment->getGmmClientContext(), nullptr, 0, false); auto resourceInfo = static_cast(gmm->gmmResourceInfo.get()); resourceInfo->is64KBPageSuitableValue = false; allocation.setDefaultGmm(gmm.get()); EXPECT_EQ(HeapIndex::HEAP_STANDARD, MemoryManager::selectHeap(&allocation, false, true)); } TEST(HeapSelectorTest, givenFullAddressSpaceWhenSelectingHeapForNullAllocationWithoutPtrThenStandardHeapIsUsed) { EXPECT_EQ(HeapIndex::HEAP_STANDARD, MemoryManager::selectHeap(nullptr, false, true)); } TEST(HeapSelectorTest, givenLimitedAddressSpaceWhenSelectingHeapForNullAllocationWithoutPtrThenStandardHeapIsUsed) { EXPECT_EQ(HeapIndex::HEAP_STANDARD, MemoryManager::selectHeap(nullptr, false, false)); } TEST(MemoryAllocationTest, givenAllocationTypeWhenPassedToMemoryAllocationConstructorThenAllocationTypeIsStored) { MemoryAllocation allocation{0, GraphicsAllocation::AllocationType::COMMAND_BUFFER, nullptr, nullptr, 0, 0, 0, MemoryPool::MemoryNull, false, false}; EXPECT_EQ(GraphicsAllocation::AllocationType::COMMAND_BUFFER, allocation.getAllocationType()); } TEST(MemoryAllocationTest, givenMemoryPoolWhenPassedToMemoryAllocationConstructorThenMemoryPoolIsStored) { MemoryAllocation allocation{0, GraphicsAllocation::AllocationType::COMMAND_BUFFER, nullptr, nullptr, 0, 0, 0, MemoryPool::System64KBPages, false, false}; EXPECT_EQ(MemoryPool::System64KBPages, allocation.getMemoryPool()); } TEST_F(MemoryAllocatorTest, whenCommandStreamerIsRegisteredThenReturnAssociatedEngineControl) { auto engineControl = memoryManager->getRegisteredEngineForCsr(csr); ASSERT_NE(nullptr, engineControl); EXPECT_EQ(csr, engineControl->commandStreamReceiver); } TEST_F(MemoryAllocatorTest, whenCommandStreamerIsNotRegisteredThenReturnNullEngineControl) { CommandStreamReceiver *dummyCsr = reinterpret_cast(0x1); auto engineControl = memoryManager->getRegisteredEngineForCsr(dummyCsr); EXPECT_EQ(nullptr, engineControl); } TEST(MemoryManagerCopyMemoryTest, givenAllocationWithNoStorageWhenCopyMemoryToAllocationThenReturnFalse) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); uint8_t memory = 1; MockGraphicsAllocation invalidAllocation{nullptr, 0u}; EXPECT_FALSE(memoryManager.copyMemoryToAllocation(&invalidAllocation, &memory, sizeof(memory))); } TEST(MemoryManagerCopyMemoryTest, givenValidAllocationAndMemoryWhenCopyMemoryToAllocationThenDataIsCopied) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); constexpr uint8_t allocationSize = 10; uint8_t allocationStorage[allocationSize] = {0}; MockGraphicsAllocation allocation{allocationStorage, allocationSize}; uint8_t memory = 1u; EXPECT_EQ(0u, allocationStorage[0]); EXPECT_TRUE(memoryManager.copyMemoryToAllocation(&allocation, &memory, sizeof(memory))); EXPECT_EQ(memory, allocationStorage[0]); } TEST_F(MemoryAllocatorTest, whenReservingAddressRangeThenExpectProperAddressAndReleaseWhenFreeing) { size_t size = 0x1000; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), size}); ASSERT_NE(nullptr, allocation); void *reserve = memoryManager->reserveCpuAddressRange(size, 0); EXPECT_NE(nullptr, reserve); allocation->setReservedAddressRange(reserve, size); EXPECT_EQ(reserve, allocation->getReservedAddressPtr()); EXPECT_EQ(size, allocation->getReservedAddressSize()); memoryManager->freeGraphicsMemory(allocation); } TEST(MemoryManagerTest, givenMemoryManagerWhenGettingReservedMemoryThenAllocateIt) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); EXPECT_EQ(nullptr, memoryManager.reservedMemory); memoryManager.getReservedMemory(MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); EXPECT_NE(nullptr, memoryManager.reservedMemory); } TEST(MemoryManagerTest, givenMemoryManagerWhenGetReservedMemoryIsCalledManyTimesThenReuseSameMemory) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(false, false, executionEnvironment); auto reservedMemory = memoryManager.getReservedMemory(MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); memoryManager.getReservedMemory(MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); memoryManager.getReservedMemory(MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); EXPECT_EQ(reservedMemory, memoryManager.reservedMemory); } class MemoryManagerWithFailure : public MockMemoryManager { public: GraphicsAllocation *allocateGraphicsMemoryWithProperties(const AllocationProperties &properties) override { return nullptr; } }; TEST(MemoryManagerTest, whenMemoryManagerReturnsNullptrThenAllocateGlobalsSurfaceAlsoReturnsNullptr) { MockClDevice device{new MockDevice}; std::unique_ptr memoryManager(new MemoryManagerWithFailure()); device.injectMemoryManager(memoryManager.release()); WhiteBox linkerInput; linkerInput.traits.exportsGlobalConstants = true; linkerInput.traits.exportsGlobalVariables = true; GraphicsAllocation *allocation = allocateGlobalsSurface(nullptr, device.getDevice(), 1024, false, &linkerInput, nullptr); EXPECT_EQ(nullptr, allocation); auto svmAllocsManager = std::make_unique(device.getMemoryManager()); allocation = allocateGlobalsSurface(svmAllocsManager.get(), device.getDevice(), 1024, false, &linkerInput, nullptr); EXPECT_EQ(nullptr, allocation); } HWTEST_F(MemoryAllocatorTest, givenMemoryManagerWhenEnableHostPtrTrackingFlagIsSetTo0ThenHostPointerTrackingIsDisabled) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableHostPtrTracking.set(0); EXPECT_FALSE(memoryManager->isHostPointerTrackingEnabled(0u)); } HWTEST_F(MemoryAllocatorTest, givenMemoryManagerWhenEnableHostPtrTrackingFlagIsNotSetTo1ThenHostPointerTrackingIsEnabled) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableHostPtrTracking.set(1); EXPECT_TRUE(memoryManager->isHostPointerTrackingEnabled(0u)); } HWTEST_F(MemoryAllocatorTest, givenMemoryManagerWhenEnableHostPtrTrackingFlagIsSetNotSetThenHostPointerTrackingDependsOnCapabilityTable) { if (is32bit) { EXPECT_TRUE(memoryManager->isHostPointerTrackingEnabled(0u)); } else { EXPECT_EQ(device->getHardwareInfo().capabilityTable.hostPtrTrackingEnabled, memoryManager->isHostPointerTrackingEnabled(0u)); } } using MemoryManagerMultiRootDeviceTests = MultiRootDeviceFixture; TEST_F(MemoryManagerMultiRootDeviceTests, globalsSurfaceHasCorrectRootDeviceIndex) { if (device->getMemoryManager()->isLimitedRange(expectedRootDeviceIndex)) { delete context->svmAllocsManager; context->svmAllocsManager = nullptr; } std::vector initData(1024, 0x5B); WhiteBox linkerInput; linkerInput.traits.exportsGlobalConstants = true; linkerInput.traits.exportsGlobalVariables = true; GraphicsAllocation *allocation = allocateGlobalsSurface(context->svmAllocsManager, device->getDevice(), initData.size(), false, &linkerInput, initData.data()); ASSERT_NE(nullptr, allocation); EXPECT_EQ(expectedRootDeviceIndex, allocation->getRootDeviceIndex()); if (device->getMemoryManager()->isLimitedRange(expectedRootDeviceIndex)) { device->getMemoryManager()->freeGraphicsMemory(allocation); } else { context->getSVMAllocsManager()->freeSVMAlloc(allocation->getUnderlyingBuffer()); } } HWTEST_F(MemoryAllocatorTest, givenMemoryManagerWhen64BitAndHostPtrTrackingDisabledThenNonSvmHostPtrUsageIsSet) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableHostPtrTracking.set(0); bool expectedValue = !is32bit; auto result = memoryManager->useNonSvmHostPtrAlloc(GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR, 0u); EXPECT_EQ(expectedValue, result); result = memoryManager->useNonSvmHostPtrAlloc(GraphicsAllocation::AllocationType::MAP_ALLOCATION, 0u); EXPECT_EQ(expectedValue, result); } HWTEST_F(MemoryAllocatorTest, givenMemoryManagerWhenHostPtrTrackingModeThenNonSvmHostPtrUsageIsSet) { memoryManager->setForceNonSvmForExternalHostPtr(true); auto result = memoryManager->useNonSvmHostPtrAlloc(GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR, 0u); EXPECT_EQ(true, result); result = memoryManager->useNonSvmHostPtrAlloc(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, 0u); EXPECT_EQ(false, result); } HWTEST_F(MemoryAllocatorTest, givenMemoryManagerWhenHostPtrTrackingEnabledThenNonSvmHostPtrUsageDependsOnFullRangeSvm) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableHostPtrTracking.set(1); auto result = memoryManager->useNonSvmHostPtrAlloc(GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR, 0u); EXPECT_EQ(!executionEnvironment->rootDeviceEnvironments[0]->isFullRangeSvm() && !is32bit, result); result = memoryManager->useNonSvmHostPtrAlloc(GraphicsAllocation::AllocationType::MAP_ALLOCATION, 0u); EXPECT_EQ(!executionEnvironment->rootDeviceEnvironments[0]->isFullRangeSvm() && !is32bit, result); } using PageTableManagerTest = ::testing::Test; HWTEST_F(PageTableManagerTest, givenMemoryManagerThatSupportsPageTableManagerWhenMapAuxGpuVAIsCalledThenItReturnsTrue) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(2); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } auto memoryManager = new MockMemoryManager(false, false, *executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); MockGraphicsAllocation allocation(1u, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, 0, 0, 0, MemoryPool::MemoryNull); MockGmm gmm; allocation.setDefaultGmm(&gmm); bool mapped = memoryManager->mapAuxGpuVA(&allocation); auto hwInfo = executionEnvironment->rootDeviceEnvironments[allocation.getRootDeviceIndex()]->getHardwareInfo(); EXPECT_EQ(HwHelper::get(hwInfo->platform.eRenderCoreFamily).isPageTableManagerSupported(*hwInfo), mapped); } compute-runtime-20.13.16352/opencl/test/unit_test/memory_manager/memory_pool_tests.cpp000066400000000000000000000016771363734646600311360ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/memory_pool.h" #include "gtest/gtest.h" TEST(MemoryPool, givenSystemMemoryPoolTypesWhenIsSystemMemoryPoolIsCalledThenTrueIsReturned) { EXPECT_TRUE(MemoryPool::isSystemMemoryPool(MemoryPool::System4KBPages)); EXPECT_TRUE(MemoryPool::isSystemMemoryPool(MemoryPool::System4KBPagesWith32BitGpuAddressing)); EXPECT_TRUE(MemoryPool::isSystemMemoryPool(MemoryPool::System64KBPages)); EXPECT_TRUE(MemoryPool::isSystemMemoryPool(MemoryPool::System64KBPagesWith32BitGpuAddressing)); } TEST(MemoryPool, givenNonSystemMemoryPoolTypesWhenIsSystemMemoryPoolIsCalledThenFalseIsReturned) { EXPECT_FALSE(MemoryPool::isSystemMemoryPool(MemoryPool::MemoryNull)); EXPECT_FALSE(MemoryPool::isSystemMemoryPool(MemoryPool::SystemCpuInaccessible)); EXPECT_FALSE(MemoryPool::isSystemMemoryPool(MemoryPool::LocalMemory)); } compute-runtime-20.13.16352/opencl/test/unit_test/memory_manager/page_table_tests.cpp000066400000000000000000000507741363734646600306620ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "opencl/source/aub_mem_dump/page_table_entry_bits.h" #include "opencl/source/memory_manager/memory_banks.h" #include "opencl/source/memory_manager/page_table.h" #include "opencl/source/memory_manager/page_table.inl" #include "opencl/test/unit_test/mocks/mock_physical_address_allocator.h" #include "test.h" #include "gtest/gtest.h" #include using namespace NEO; static const bool is64Bit = (sizeof(void *) == 8); template class MockPageTable : public PageTable { public: using PageTable::PageTable; using PageTable::entries; }; class MockPTE : public PTE { public: using PTE::entries; MockPTE(PhysicalAddressAllocator *physicalAddressAllocator) : PTE(physicalAddressAllocator) {} uintptr_t map(uintptr_t vm, size_t size, uint64_t entryBits, uint32_t memoryBank) override { return PTE::map(vm, size, entryBits, memoryBank); } void pageWalk(uintptr_t vm, size_t size, size_t offset, uint64_t entryBits, PageWalker &pageWalker, uint32_t memoryBank) override { return PTE::pageWalk(vm, size, offset, entryBits, pageWalker, memoryBank); } }; class MockPDE : public MockPageTable { public: using MockPageTable::entries; MockPDE(PhysicalAddressAllocator *physicalAddressAllocator) : MockPageTable(physicalAddressAllocator) { } }; class MockPDP : public MockPageTable { public: using MockPageTable::entries; MockPDP(PhysicalAddressAllocator *physicalAddressAllocator) : MockPageTable(physicalAddressAllocator) { } }; class MockPML4 : public MockPageTable { public: using MockPageTable::entries; using PageTable::allocator; MockPML4(PhysicalAddressAllocator *physicalAddressAllocator) : MockPageTable(physicalAddressAllocator) { } }; class MockPDPE : public MockPageTable { public: using MockPageTable::entries; using PageTable::allocator; MockPDPE(PhysicalAddressAllocator *physicalAddressAllocator) : MockPageTable(physicalAddressAllocator) { } }; class PPGTTPageTable : public std::conditional::type { public: const size_t ppgttEntries = is64bit ? 512u : 4u; PPGTTPageTable(PhysicalAddressAllocator *allocator) : std::conditional::type(allocator) { EXPECT_EQ(ppgttEntries, entries.size()); } bool isEmpty() { for (const auto &e : entries) if (e != nullptr) return false; return true; } }; class GGTTPageTable : public PDPE { public: GGTTPageTable(PhysicalAddressAllocator *allocator) : PDPE(allocator) { EXPECT_EQ(4u, entries.size()); } bool isEmpty() { for (const auto &e : entries) if (e != nullptr) return false; return true; } }; class PageTableFixture { protected: const size_t pageSize = 1 << 12; const uintptr_t refAddr = uintptr_t(1) << (is64Bit ? 46 : 31); MockPhysicalAddressAllocator allocator; uint64_t startAddress = 0x1000; public: void SetUp() { startAddress = 0x1000; } void TearDown() { } }; class PageTableEntryChecker { public: template static void testEntry(T *pageTable, uint32_t pteIndex, uintptr_t expectedValue) { } }; template <> void PageTableEntryChecker::testEntry(MockPML4 *pageTable, uint32_t pteIndex, uintptr_t expectedValue) { ASSERT_NE(nullptr, pageTable->entries[0]); ASSERT_NE(nullptr, pageTable->entries[0]->entries[0]); ASSERT_NE(nullptr, pageTable->entries[0]->entries[0]->entries[0]); EXPECT_EQ(reinterpret_cast(expectedValue), pageTable->entries[0]->entries[0]->entries[0]->entries[pteIndex]); } template <> void PageTableEntryChecker::testEntry(MockPDPE *pageTable, uint32_t pteIndex, uintptr_t expectedValue) { ASSERT_NE(nullptr, pageTable->entries[0]); EXPECT_EQ(reinterpret_cast(expectedValue), pageTable->entries[0]->entries[0]->entries[pteIndex]); } typedef Test PageTableTests32; typedef Test PageTableTests48; typedef Test PageTableTestsGPU; TEST_F(PageTableTests48, dummy) { PageTable pt(&allocator); PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset, uint64_t entryBits) { }; pt.pageWalk(0, pageSize, 0, 0, walker, MemoryBanks::MainBank); } TEST_F(PageTableTests48, newIsEmpty) { std::unique_ptr pageTable(new PPGTTPageTable(&allocator)); EXPECT_TRUE(pageTable->isEmpty()); } TEST_F(PageTableTests48, DISABLED_mapSizeZero) { std::unique_ptr pageTable(new PPGTTPageTable(&allocator)); EXPECT_TRUE(pageTable->isEmpty()); auto phys1 = pageTable->map(0x0, 0x0, 0, MemoryBanks::MainBank); std::cerr << phys1 << std::endl; } TEST_F(PageTableTests48, pageWalkSimple) { std::unique_ptr pageTable(new PPGTTPageTable(&allocator)); uintptr_t addr1 = refAddr + (510 * pageSize) + 0x10; size_t lSize = 8 * pageSize; size_t walked = 0u; size_t lastOffset = 0; PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset, uint64_t entryBits) { EXPECT_EQ(lastOffset, offset); EXPECT_GE(pageSize, size); walked += size; lastOffset += size; }; pageTable->pageWalk(addr1, lSize, 0, 0, walker, MemoryBanks::MainBank); EXPECT_EQ(lSize, walked); } TEST_F(PageTableTests48, givenReservedPhysicalAddressWhenPageWalkIsCalledThenPageTablesAreFilledWithProperAddresses) { if (is64Bit) { std::unique_ptr pageTable(std::make_unique(&allocator)); int shiftPML4 = is64Bit ? (9 + 9 + 9 + 12) : 0; int shiftPDP = is64Bit ? (9 + 9 + 12) : 0; uintptr_t gpuVa = (uintptr_t(0x1) << (shiftPML4)) | (uintptr_t(0x1) << (shiftPDP)) | (uintptr_t(0x1) << (9 + 12)) | 0x100; size_t size = 10 * pageSize; size_t walked = 0u; auto address = allocator.mainAllocator.load(); PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset, uint64_t entryBits) { walked += size; }; pageTable->pageWalk(gpuVa, size, 0, 0, walker, MemoryBanks::MainBank); EXPECT_EQ(size, walked); ASSERT_NE(nullptr, pageTable->entries[1]); ASSERT_NE(nullptr, pageTable->entries[1]->entries[1]); ASSERT_NE(nullptr, pageTable->entries[1]->entries[1]->entries[1]); for (uint32_t i = 0; i < 10; i++) { EXPECT_EQ(reinterpret_cast(address | 0x1), pageTable->entries[1]->entries[1]->entries[1]->entries[i]); address += pageSize; } } } TEST_F(PageTableTests48, givenBigGpuAddressWhenPageWalkIsCalledThenPageTablesAreFilledWithProperAddresses) { if (is64Bit) { std::unique_ptr pageTable(std::make_unique(&allocator)); int shiftPML4 = is64Bit ? (47) : 0; int shiftPDP = is64Bit ? (9 + 9 + 12) : 0; uintptr_t gpuVa = (uintptr_t(0x1) << (shiftPML4)) | (uintptr_t(0x1) << (shiftPDP)) | (uintptr_t(0x1) << (9 + 12)) | 0x100; size_t size = 10 * pageSize; size_t walked = 0u; auto address = allocator.mainAllocator.load(); PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset, uint64_t entryBits) { walked += size; }; pageTable->pageWalk(gpuVa, size, 0, 0, walker, MemoryBanks::MainBank); EXPECT_EQ(size, walked); ASSERT_NE(nullptr, pageTable->entries[0x100]); ASSERT_NE(nullptr, pageTable->entries[0x100]->entries[1]); ASSERT_NE(nullptr, pageTable->entries[0x100]->entries[1]->entries[1]); for (uint32_t i = 0; i < 10; i++) { EXPECT_EQ(reinterpret_cast(address | 0x1), pageTable->entries[0x100]->entries[1]->entries[1]->entries[i]); address += pageSize; } } } TEST_F(PageTableTests48, givenZeroEntryBitsWhenPageWalkIsCalledThenPageTableEntryHasPresentBitSet) { std::unique_ptr::type> pageTable(std::make_unique::type>(&allocator)); uintptr_t gpuVa = 0x1000; size_t size = pageSize; size_t walked = 0u; auto address = allocator.mainAllocator.load(); PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset, uint64_t entryBits) { walked += size; }; pageTable->pageWalk(gpuVa, size, 0, 0, walker, MemoryBanks::MainBank); EXPECT_EQ(size, walked); ASSERT_NE(nullptr, pageTable->entries[0]); PageTableEntryChecker::testEntry::type>(pageTable.get(), 1, static_cast(address | 0x1)); } TEST_F(PageTableTests48, givenZeroEntryBitsWhenMapIsCalledThenPageTableEntryHasPresentBitSet) { std::unique_ptr::type> pageTable(std::make_unique::type>(&allocator)); uintptr_t gpuVa = 0x1000; size_t size = pageSize; auto address = allocator.mainAllocator.load(); pageTable->map(gpuVa, size, 0, MemoryBanks::MainBank); ASSERT_NE(nullptr, pageTable->entries[0]); PageTableEntryChecker::testEntry::type>(pageTable.get(), 1, static_cast(address | 0x1)); } TEST_F(PageTableTests48, givenEntryBitsWhenPageWalkIsCalledThenEntryBitsArePassedToPageWalker) { std::unique_ptr::type> pageTable(std::make_unique::type>(&allocator)); uintptr_t gpuVa = 0x1000; size_t size = pageSize; size_t walked = 0u; uint64_t ppgttBits = 0xabc; uint64_t entryBitsPassed = 0u; PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset, uint64_t entryBits) { walked += size; entryBitsPassed = entryBits; }; pageTable->pageWalk(gpuVa, size, 0, ppgttBits, walker, MemoryBanks::MainBank); ppgttBits |= 0x1; EXPECT_EQ(ppgttBits, entryBitsPassed); } TEST_F(PageTableTests48, givenTwoPageWalksWhenSecondWalkHasDifferentEntryBitsThenEntryIsUpdated) { std::unique_ptr::type> pageTable(std::make_unique::type>(&allocator)); uintptr_t gpuVa = 0x1000; size_t size = pageSize; size_t walked = 0u; uint64_t ppgttBits = 0xabc; uint64_t entryBitsPassed = 0u; PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset, uint64_t entryBits) { walked += size; entryBitsPassed = entryBits; }; pageTable->pageWalk(gpuVa, size, 0, ppgttBits, walker, MemoryBanks::MainBank); ppgttBits |= 0x1; EXPECT_EQ(ppgttBits, entryBitsPassed); ppgttBits = 0x345; pageTable->pageWalk(gpuVa, size, 0, ppgttBits, walker, MemoryBanks::MainBank); EXPECT_EQ(ppgttBits, entryBitsPassed); } TEST_F(PageTableTests48, givenTwoPageWalksWhenSecondWalkHasNonValidEntryBitsThenEntryIsNotUpdated) { std::unique_ptr::type> pageTable(std::make_unique::type>(&allocator)); uintptr_t gpuVa = 0x1000; size_t size = pageSize; size_t walked = 0u; uint64_t ppgttBits = 0xabc; uint64_t entryBitsPassed = 0u; uint64_t entryBitsPassedFirstTime = 0u; uint64_t entryBitsPassedSecondTime = 0u; PageWalker walker = [&](uint64_t physAddress, size_t size, size_t offset, uint64_t entryBits) { walked += size; entryBitsPassed = entryBits; }; pageTable->pageWalk(gpuVa, size, 0, ppgttBits, walker, 0); ppgttBits |= 0x1; EXPECT_EQ(ppgttBits, entryBitsPassed); entryBitsPassedFirstTime = entryBitsPassed; ppgttBits = PageTableEntry::nonValidBits; pageTable->pageWalk(gpuVa, size, 0, ppgttBits, walker, 0); entryBitsPassedSecondTime = entryBitsPassed; EXPECT_EQ(entryBitsPassedFirstTime, entryBitsPassedSecondTime); } TEST_F(PageTableTests48, givenTwoMapsWhenSecondMapHasDifferentEntryBitsThenEntryIsUpdated) { std::unique_ptr::type> pageTable(std::make_unique::type>(&allocator)); uintptr_t gpuVa = 0x1000; size_t size = pageSize; uint64_t ppgttBits = 0xabc; auto address = allocator.mainAllocator.load(); pageTable->map(gpuVa, size, ppgttBits, 0); ASSERT_NE(nullptr, pageTable->entries[0]); PageTableEntryChecker::testEntry::type>(pageTable.get(), 1, static_cast(address | ppgttBits | 0x1)); ppgttBits = 0x345; pageTable->map(gpuVa, size, ppgttBits, 0); PageTableEntryChecker::testEntry::type>(pageTable.get(), 1, static_cast(address | ppgttBits | 0x1)); } TEST_F(PageTableTests48, givenTwoMapsWhenSecondMapHasNonValidEntryBitsThenEntryIsNotUpdated) { std::unique_ptr::type> pageTable(std::make_unique::type>(&allocator)); uintptr_t gpuVa = 0x1000; size_t size = pageSize; uint64_t ppgttBits = 0xabc; auto address = allocator.mainAllocator.load(); pageTable->map(gpuVa, size, ppgttBits, 0); ASSERT_NE(nullptr, pageTable->entries[0]); PageTableEntryChecker::testEntry::type>(pageTable.get(), 1, static_cast(address | ppgttBits | 0x1)); uint64_t nonValidPpgttBits = PageTableEntry::nonValidBits; pageTable->map(gpuVa, size, nonValidPpgttBits, 0); PageTableEntryChecker::testEntry::type>(pageTable.get(), 1, static_cast(address | ppgttBits | 0x1)); } TEST_F(PageTableTests48, givenPageTableWhenMappingTheSameAddressMultipleTimesThenNumberOfPagesReservedInAllocatorMatchPagesMapped) { std::unique_ptr pageTable(new PPGTTPageTable(&allocator)); uintptr_t address = refAddr; auto initialAddress = allocator.initialPageAddress; auto phys1 = pageTable->map(address, pageSize, 0, MemoryBanks::MainBank); EXPECT_EQ(startAddress, phys1); auto phys1_1 = pageTable->map(address, 1, 0, MemoryBanks::MainBank); EXPECT_EQ(startAddress, phys1_1); auto phys2 = pageTable->map(address, pageSize, 0, MemoryBanks::MainBank); EXPECT_EQ(phys1, phys2); address = ptrOffset(address, pageSize); auto phys3 = pageTable->map(address, pageSize, 0, MemoryBanks::MainBank); EXPECT_NE(phys1, phys3); address = ptrOffset(address, pageSize); auto phys4 = pageTable->map(address, pageSize, 0, MemoryBanks::MainBank); EXPECT_NE(phys3, phys4); auto nextFreeAddress = initialAddress + ptrDiff(phys4 + pageSize, initialAddress); EXPECT_EQ(nextFreeAddress, allocator.mainAllocator.load()); } TEST_F(PageTableTests48, physicalAddressesInAUBCantStartAt0) { std::unique_ptr pageTable(new PPGTTPageTable(&allocator)); uintptr_t addr1 = refAddr; auto phys1 = pageTable->map(addr1, pageSize, 0, MemoryBanks::MainBank); EXPECT_NE(0u, phys1); } TEST_F(PageTableTests48, mapPageMapByteInMapped) { std::unique_ptr pageTable(new PPGTTPageTable(&allocator)); uintptr_t addr1 = refAddr; auto phys1 = pageTable->map(addr1, pageSize, 0, MemoryBanks::MainBank); EXPECT_EQ(startAddress, phys1); EXPECT_EQ(allocator.initialPageAddress + pageSize, allocator.mainAllocator); auto phys1_1 = pageTable->map(addr1, 1, 0, MemoryBanks::MainBank); EXPECT_EQ(startAddress, phys1_1); EXPECT_EQ(allocator.initialPageAddress + pageSize, allocator.mainAllocator); } TEST_F(PageTableTests48, mapsCorrectlyEvenMultipleCalls) { std::unique_ptr pageTable(new PPGTTPageTable(&allocator)); uintptr_t addr1 = refAddr; auto phys1 = pageTable->map(addr1, pageSize, 0, MemoryBanks::MainBank); EXPECT_EQ(startAddress, phys1); EXPECT_EQ(allocator.initialPageAddress + pageSize, allocator.mainAllocator); auto phys1_1 = pageTable->map(addr1, 1, 0, MemoryBanks::MainBank); EXPECT_EQ(startAddress, phys1_1); EXPECT_EQ(allocator.initialPageAddress + pageSize, allocator.mainAllocator); auto phys2 = pageTable->map(addr1, pageSize, 0, MemoryBanks::MainBank); EXPECT_EQ(phys1, phys2); EXPECT_EQ(allocator.initialPageAddress + pageSize, allocator.mainAllocator); auto phys3 = pageTable->map(addr1 + pageSize, pageSize, 0, MemoryBanks::MainBank); EXPECT_NE(phys1, phys3); EXPECT_EQ(allocator.initialPageAddress + 2 * pageSize, allocator.mainAllocator); auto phys4 = pageTable->map(addr1 + pageSize, pageSize, 0, MemoryBanks::MainBank); EXPECT_NE(phys1, phys3); EXPECT_EQ(phys3, phys4); auto addr2 = addr1 + pageSize + pageSize; auto phys5 = pageTable->map(addr2, 2 * pageSize, 0, MemoryBanks::MainBank); EXPECT_NE(phys1, phys5); EXPECT_NE(phys3, phys5); auto phys6 = pageTable->map(addr2, 2 * pageSize, 0, MemoryBanks::MainBank); EXPECT_NE(phys1, phys6); EXPECT_NE(phys3, phys6); EXPECT_EQ(phys5, phys6); auto phys7 = pageTable->map(addr2 + pageSize, pageSize, 0, MemoryBanks::MainBank); EXPECT_NE(0u, phys7); EXPECT_NE(phys1, phys7); EXPECT_NE(phys3, phys7); EXPECT_NE(phys5, phys7); EXPECT_NE(phys6, phys7); EXPECT_EQ(phys6 + pageSize, phys7); } TEST_F(PageTableTests48, mapsPagesOnTableBoundary) { std::unique_ptr pageTable(new PPGTTPageTable(&allocator)); uintptr_t addr1 = refAddr + pageSize * 16; size_t pages = (1 << 9) * 2; size_t size = pages * pageSize; auto phys1 = pageTable->map(addr1, size, 0, MemoryBanks::MainBank); EXPECT_EQ(startAddress, phys1); } TEST_F(PageTableTests48, mapsPagesOnTableBoundary2ndAllocation) { std::unique_ptr pageTable(new PPGTTPageTable(&allocator)); uintptr_t addr1 = refAddr + pageSize * 16; size_t pages = (1 << 9) * 2; size_t size = pages * pageSize; auto phys1 = pageTable->map(0x0, pageSize, 0, MemoryBanks::MainBank); EXPECT_EQ(startAddress, phys1); auto phys2 = pageTable->map(addr1, size, 0, MemoryBanks::MainBank); EXPECT_EQ(startAddress + pageSize, phys2); } TEST_F(PageTableTestsGPU, mapsPagesOnTableBoundary) { std::unique_ptr ggtt(new GGTTPageTable(&allocator)); std::unique_ptr ppgtt(new PPGTTPageTable(&allocator)); uintptr_t addrGGTT = 0x70000000 + pageSize * 16; uintptr_t addrPPGTT = refAddr + pageSize * 16; size_t pages = (1 << 9) * 2; size_t size = pages * pageSize; auto phys32 = ggtt->map(addrGGTT, size, 0, MemoryBanks::MainBank); EXPECT_EQ(startAddress, phys32); auto phys48 = ppgtt->map(addrPPGTT, size, 0, MemoryBanks::MainBank); EXPECT_EQ(startAddress + size, phys48); } TEST_F(PageTableTestsGPU, newIsEmpty) { std::unique_ptr ggtt(new GGTTPageTable(&allocator)); EXPECT_TRUE(ggtt->isEmpty()); std::unique_ptr ppgtt(new PPGTTPageTable(&allocator)); EXPECT_TRUE(ppgtt->isEmpty()); } TEST_F(PageTableTests32, level0) { std::unique_ptr> pt(new PageTable(&allocator)); auto phys = pt->map(0x10000, pageSize, 0, MemoryBanks::MainBank); EXPECT_EQ(0u, phys); } TEST_F(PageTableTests32, newIsEmpty) { std::unique_ptr pageTable(new GGTTPageTable(&allocator)); EXPECT_TRUE(pageTable->isEmpty()); } TEST_F(PageTableTests32, mapsPagesOnTableBoundary) { std::unique_ptr pageTable(new GGTTPageTable(&allocator)); uintptr_t addr1 = 0x70000000 + pageSize * 16; size_t pages = (1 << 9) * 2; size_t size = pages * pageSize; auto phys1 = pageTable->map(addr1, size, 0, MemoryBanks::MainBank); EXPECT_EQ(startAddress, phys1); } TEST_F(PageTableTests32, mapsPagesOnTableBoundary2ndAllocation) { std::unique_ptr pageTable(new GGTTPageTable(&allocator)); uintptr_t addr1 = 0x70000000 + pageSize * 16; size_t pages = (1 << 9) * 2; size_t size = pages * pageSize; auto phys1 = pageTable->map(0x0, pageSize, 0, MemoryBanks::MainBank); EXPECT_EQ(startAddress, phys1); auto phys2 = pageTable->map(addr1, size, 0, MemoryBanks::MainBank); EXPECT_EQ(startAddress + pageSize, phys2); } physical_address_allocator_tests.cpp000066400000000000000000000062371363734646600340740ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/memory_manager/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/memory_manager/memory_banks.h" #include "opencl/source/memory_manager/page_table.h" #include "opencl/test/unit_test/mocks/mock_physical_address_allocator.h" #include "gtest/gtest.h" using namespace NEO; TEST(PhysicalAddressAllocator, givenPhysicalAddressesAllocatorWhenReservingFirstPageThenNonZeroAddressIsReturned) { MockPhysicalAddressAllocator allocator; auto physAddress = allocator.reserve4kPage(MemoryBanks::MainBank); EXPECT_NE(0u, physAddress); } TEST(PhysicalAddressAllocator, givenPhysicalAddressesAllocatorWhenReservingConsecutive4kPagesThenReturnedAddressesAreDifferentAndAligned) { MockPhysicalAddressAllocator allocator; auto physAddress = allocator.reserve4kPage(MemoryBanks::MainBank); EXPECT_NE(0u, physAddress); EXPECT_EQ(0u, physAddress & MemoryConstants::pageMask); auto physAddress1 = allocator.reserve4kPage(MemoryBanks::MainBank); EXPECT_NE(physAddress, physAddress1); EXPECT_EQ(0u, physAddress1 & MemoryConstants::pageMask); auto physAddress2 = allocator.reserve4kPage(MemoryBanks::MainBank); EXPECT_NE(physAddress1, physAddress2); EXPECT_EQ(0u, physAddress2 & MemoryConstants::pageMask); } TEST(PhysicalAddressAllocator, givenPhysicalAddressesAllocatorWhenReservingFirst64kPageThen64kAlignedIsReturned) { MockPhysicalAddressAllocator allocator; auto physAddress = allocator.reserve64kPage(MemoryBanks::MainBank); EXPECT_NE(0u, physAddress); EXPECT_EQ(0u, physAddress & MemoryConstants::page64kMask); } TEST(PhysicalAddressAllocator, givenPhysicalAddressesAllocatorWhenReservingConsecutive64kPagesThenReturnedAddressesAreDifferentAndAligned) { MockPhysicalAddressAllocator allocator; auto physAddress = allocator.reserve64kPage(MemoryBanks::MainBank); EXPECT_NE(0u, physAddress); EXPECT_EQ(0u, physAddress & MemoryConstants::page64kMask); auto physAddress1 = allocator.reserve64kPage(MemoryBanks::MainBank); EXPECT_NE(physAddress, physAddress1); EXPECT_EQ(0u, physAddress & MemoryConstants::page64kMask); auto physAddress2 = allocator.reserve64kPage(MemoryBanks::MainBank); EXPECT_NE(physAddress1, physAddress2); EXPECT_EQ(0u, physAddress & MemoryConstants::page64kMask); } TEST(PhysicalAddressAllocator, givenPhysicalAddressesAllocatorWhenReservingInterleaving4kPagesAnd64kPagesThenReturnedAddressesAreCorrectlyAligned) { MockPhysicalAddressAllocator allocator; auto physAddress = allocator.reserve4kPage(MemoryBanks::MainBank); EXPECT_NE(0u, physAddress); EXPECT_EQ(0u, physAddress & MemoryConstants::pageMask); auto physAddress1 = allocator.reserve64kPage(MemoryBanks::MainBank); EXPECT_NE(physAddress, physAddress1); EXPECT_EQ(0u, physAddress1 & MemoryConstants::page64kMask); auto physAddress2 = allocator.reserve4kPage(MemoryBanks::MainBank); EXPECT_NE(physAddress1, physAddress2); EXPECT_EQ(0u, physAddress2 & MemoryConstants::pageMask); auto physAddress3 = allocator.reserve64kPage(MemoryBanks::MainBank); EXPECT_NE(physAddress, physAddress1); EXPECT_EQ(0u, physAddress3 & MemoryConstants::page64kMask); } compute-runtime-20.13.16352/opencl/test/unit_test/memory_manager/surface_tests.cpp000066400000000000000000000106351363734646600302170ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/preemption.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/memory_manager/mem_obj_surface.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "test.h" #include "gtest/gtest.h" #include using namespace NEO; typedef ::testing::Types SurfaceTypes; namespace createSurface { template Surface *Create(char *data, MockBuffer *buffer, GraphicsAllocation *gfxAllocation); template <> Surface *Create(char *data, MockBuffer *buffer, GraphicsAllocation *gfxAllocation) { return new NullSurface; } template <> Surface *Create(char *data, MockBuffer *buffer, GraphicsAllocation *gfxAllocation) { return new HostPtrSurface(data, 10, gfxAllocation); } template <> Surface *Create(char *data, MockBuffer *buffer, GraphicsAllocation *gfxAllocation) { return new MemObjSurface(buffer); } template <> Surface *Create(char *data, MockBuffer *buffer, GraphicsAllocation *gfxAllocation) { return new GeneralSurface(gfxAllocation); } } // namespace createSurface template class SurfaceTest : public ::testing::Test { public: char data[10]; MockBuffer buffer; MockGraphicsAllocation gfxAllocation{nullptr, 0}; }; TYPED_TEST_CASE(SurfaceTest, SurfaceTypes); HWTEST_TYPED_TEST(SurfaceTest, GivenSurfaceWhenInterfaceIsUsedThenSurfaceBehavesCorrectly) { int32_t execStamp; ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->initializeMemoryManager(); auto csr = std::make_unique>(execStamp, *executionEnvironment, 0); auto hwInfo = *defaultHwInfo; auto engine = HwHelper::get(hwInfo.platform.eRenderCoreFamily).getGpgpuEngineInstances(hwInfo)[0]; auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext(csr.get(), engine, 1, PreemptionHelper::getDefaultPreemptionMode(hwInfo), false, false, false); csr->setupContext(*osContext); Surface *surface = createSurface::Create(this->data, &this->buffer, &this->gfxAllocation); ASSERT_NE(nullptr, surface); Surface *duplicatedSurface = surface->duplicate(); ASSERT_NE(nullptr, duplicatedSurface); surface->makeResident(*csr); if (std::is_same::value || std::is_same::value || std::is_same::value) { EXPECT_EQ(1u, csr->madeResidentGfxAllocations.size()); } delete duplicatedSurface; delete surface; } class CoherentMemObjSurface : public SurfaceTest { public: CoherentMemObjSurface() { this->buffer.getGraphicsAllocation()->setCoherent(true); } }; TEST_F(CoherentMemObjSurface, BufferFromCoherentSvm) { Surface *surface = createSurface::Create(this->data, &this->buffer, &this->gfxAllocation); EXPECT_TRUE(surface->IsCoherent); delete surface; } TEST(HostPtrSurfaceTest, givenHostPtrSurfaceWhenCreatedWithoutSpecifyingPtrCopyAllowanceThenPtrCopyIsNotAllowed) { char memory[2]; HostPtrSurface surface(memory, sizeof(memory)); EXPECT_FALSE(surface.peekIsPtrCopyAllowed()); } TEST(HostPtrSurfaceTest, givenHostPtrSurfaceWhenCreatedWithPtrCopyAllowedThenQueryReturnsTrue) { char memory[2]; HostPtrSurface surface(memory, sizeof(memory), true); EXPECT_TRUE(surface.peekIsPtrCopyAllowed()); } TEST(HostPtrSurfaceTest, givenHostPtrSurfaceWhenCreatedWithPtrCopyNotAllowedThenQueryReturnsFalse) { char memory[2]; HostPtrSurface surface(memory, sizeof(memory), false); EXPECT_FALSE(surface.peekIsPtrCopyAllowed()); } compute-runtime-20.13.16352/opencl/test/unit_test/memory_manager/unified_memory_manager_tests.cpp000066400000000000000000001171171363734646600332770ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/page_fault_manager/mock_cpu_page_fault_manager.h" #include "opencl/source/api/api.h" #include "opencl/source/mem_obj/mem_obj_helper.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_svm_manager.h" #include "test.h" #include "gtest/gtest.h" using namespace NEO; template struct SVMMemoryAllocatorFixture { SVMMemoryAllocatorFixture() : executionEnvironment(defaultHwInfo.get()) {} virtual void SetUp() { bool svmSupported = executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo()->capabilityTable.ftrSvm; if (!svmSupported) { GTEST_SKIP(); } executionEnvironment.initGmm(); memoryManager = std::make_unique(false, enableLocalMemory, executionEnvironment); svmManager = std::make_unique(memoryManager.get()); if (enableLocalMemory) { memoryManager->pageFaultManager.reset(new MockPageFaultManager); } } virtual void TearDown() { } MockExecutionEnvironment executionEnvironment; std::unique_ptr memoryManager; std::unique_ptr svmManager; }; using SVMMemoryAllocatorTest = Test>; using SVMLocalMemoryAllocatorTest = Test>; TEST_F(SVMMemoryAllocatorTest, whenCreateZeroSizedSVMAllocationThenReturnNullptr) { auto ptr = svmManager->createSVMAlloc(0, 0, {}); EXPECT_EQ(0u, svmManager->SVMAllocs.getNumAllocs()); EXPECT_EQ(ptr, nullptr); } TEST_F(SVMMemoryAllocatorTest, whenRequestSVMAllocsThenReturnNonNullptr) { auto svmAllocs = svmManager->getSVMAllocs(); EXPECT_NE(svmAllocs, nullptr); } TEST_F(SVMMemoryAllocatorTest, whenSVMAllocationIsFreedThenCannotBeGotAgain) { auto ptr = svmManager->createSVMAlloc(0, MemoryConstants::pageSize, {}); EXPECT_NE(nullptr, ptr); auto svmData = svmManager->getSVMAlloc(ptr); ASSERT_NE(nullptr, svmData); EXPECT_NE(nullptr, svmData->gpuAllocation); svmData = svmManager->getSVMAlloc(ptr); ASSERT_NE(nullptr, svmData); EXPECT_NE(nullptr, svmData->gpuAllocation); EXPECT_EQ(1u, svmManager->SVMAllocs.getNumAllocs()); auto svmAllocation = svmManager->getSVMAlloc(ptr)->gpuAllocation; EXPECT_FALSE(svmAllocation->isCoherent()); svmManager->freeSVMAlloc(ptr); EXPECT_EQ(nullptr, svmManager->getSVMAlloc(ptr)); EXPECT_EQ(0u, svmManager->SVMAllocs.getNumAllocs()); } TEST_F(SVMMemoryAllocatorTest, whenGetSVMAllocationFromReturnedPointerAreaThenReturnSameAllocation) { auto ptr = svmManager->createSVMAlloc(0, MemoryConstants::pageSize, {}); EXPECT_NE(ptr, nullptr); auto svmData = svmManager->getSVMAlloc(ptr); ASSERT_NE(nullptr, svmData); GraphicsAllocation *graphicsAllocation = svmData->gpuAllocation; EXPECT_NE(nullptr, graphicsAllocation); auto ptrInRange = ptrOffset(ptr, MemoryConstants::pageSize - 4); svmData = svmManager->getSVMAlloc(ptrInRange); ASSERT_NE(nullptr, svmData); GraphicsAllocation *graphicsAllocationInRange = svmData->gpuAllocation; EXPECT_NE(nullptr, graphicsAllocationInRange); EXPECT_EQ(graphicsAllocation, graphicsAllocationInRange); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMMemoryAllocatorTest, whenGetSVMAllocationFromOutsideOfReturnedPointerAreaThenDontReturnThisAllocation) { auto ptr = svmManager->createSVMAlloc(0, MemoryConstants::pageSize, {}); EXPECT_NE(ptr, nullptr); auto svmData = svmManager->getSVMAlloc(ptr); ASSERT_NE(nullptr, svmData); GraphicsAllocation *graphicsAllocation = svmData->gpuAllocation; EXPECT_NE(nullptr, graphicsAllocation); auto ptrBefore = ptrOffset(ptr, -4); svmData = svmManager->getSVMAlloc(ptrBefore); EXPECT_EQ(nullptr, svmData); auto ptrAfter = ptrOffset(ptr, MemoryConstants::pageSize); svmData = svmManager->getSVMAlloc(ptrAfter); EXPECT_EQ(nullptr, svmData); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMMemoryAllocatorTest, whenCouldNotAllocateInMemoryManagerThenReturnsNullAndDoesNotChangeAllocsMap) { FailMemoryManager failMemoryManager(executionEnvironment); svmManager->memoryManager = &failMemoryManager; auto ptr = svmManager->createSVMAlloc(0, MemoryConstants::pageSize, {}); EXPECT_EQ(nullptr, ptr); EXPECT_EQ(0u, svmManager->SVMAllocs.getNumAllocs()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMMemoryAllocatorTest, whenCouldNotAllocateInMemoryManagerThenCreateUnifiedMemoryAllocationReturnsNullAndDoesNotChangeAllocsMap) { FailMemoryManager failMemoryManager(executionEnvironment); svmManager->memoryManager = &failMemoryManager; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties; unifiedMemoryProperties.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; auto ptr = svmManager->createUnifiedMemoryAllocation(0, 4096u, unifiedMemoryProperties); EXPECT_EQ(nullptr, ptr); EXPECT_EQ(0u, svmManager->SVMAllocs.getNumAllocs()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMMemoryAllocatorTest, given64kbAllowedWhenAllocatingSvmMemoryThenDontPreferRenderCompression) { MockMemoryManager memoryManager64Kb(true, false, executionEnvironment); svmManager->memoryManager = &memoryManager64Kb; auto ptr = svmManager->createSVMAlloc(0, MemoryConstants::pageSize, {}); EXPECT_FALSE(memoryManager64Kb.preferRenderCompressedFlagPassed); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMMemoryAllocatorTest, given64kbAllowedwhenAllocatingSvmMemoryThenAllocationIsIn64kbPagePool) { MockMemoryManager memoryManager64Kb(true, false, executionEnvironment); svmManager->memoryManager = &memoryManager64Kb; auto ptr = svmManager->createSVMAlloc(0, MemoryConstants::pageSize, {}); EXPECT_EQ(MemoryPool::System64KBPages, svmManager->getSVMAlloc(ptr)->gpuAllocation->getMemoryPool()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMMemoryAllocatorTest, given64kbDisallowedWhenAllocatingSvmMemoryThenAllocationIsIn4kbPagePool) { auto ptr = svmManager->createSVMAlloc(0, MemoryConstants::pageSize, {}); EXPECT_EQ(MemoryPool::System4KBPages, svmManager->getSVMAlloc(ptr)->gpuAllocation->getMemoryPool()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMMemoryAllocatorTest, whenCoherentFlagIsPassedThenAllocationIsCoherent) { SVMAllocsManager::SvmAllocationProperties svmProperties; svmProperties.coherent = true; auto ptr = svmManager->createSVMAlloc(0, MemoryConstants::pageSize, svmProperties); EXPECT_TRUE(svmManager->getSVMAlloc(ptr)->gpuAllocation->isCoherent()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMLocalMemoryAllocatorTest, whenDeviceAllocationIsCreatedThenItIsStoredWithWriteCombinedTypeInAllocationMap) { SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties; unifiedMemoryProperties.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; unifiedMemoryProperties.allocationFlags.allocFlags.allocWriteCombined = true; auto allocationSize = 4000u; auto ptr = svmManager->createUnifiedMemoryAllocation(0, 4000u, unifiedMemoryProperties); EXPECT_NE(nullptr, ptr); auto allocation = svmManager->getSVMAlloc(ptr); EXPECT_EQ(nullptr, allocation->cpuAllocation); EXPECT_NE(nullptr, allocation->gpuAllocation); EXPECT_EQ(InternalMemoryType::DEVICE_UNIFIED_MEMORY, allocation->memoryType); EXPECT_EQ(allocationSize, allocation->size); EXPECT_EQ(allocation->gpuAllocation->getMemoryPool(), MemoryPool::LocalMemory); EXPECT_EQ(alignUp(allocationSize, MemoryConstants::pageSize64k), allocation->gpuAllocation->getUnderlyingBufferSize()); EXPECT_EQ(GraphicsAllocation::AllocationType::WRITE_COMBINED, allocation->gpuAllocation->getAllocationType()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMMemoryAllocatorTest, givenNoWriteCombinedFlagwhenDeviceAllocationIsCreatedThenItIsStoredWithProperTypeInAllocationMap) { if (is32bit) { GTEST_SKIP(); } SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties; unifiedMemoryProperties.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; unifiedMemoryProperties.allocationFlags.allocFlags.allocWriteCombined = false; auto allocationSize = 4096u; auto ptr = svmManager->createUnifiedMemoryAllocation(0, 4096u, unifiedMemoryProperties); EXPECT_NE(nullptr, ptr); auto allocation = svmManager->getSVMAlloc(ptr); EXPECT_EQ(nullptr, allocation->cpuAllocation); EXPECT_NE(nullptr, allocation->gpuAllocation); EXPECT_EQ(InternalMemoryType::DEVICE_UNIFIED_MEMORY, allocation->memoryType); EXPECT_EQ(allocationSize, allocation->size); EXPECT_EQ(alignUp(allocationSize, MemoryConstants::pageSize64k), allocation->gpuAllocation->getUnderlyingBufferSize()); EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER, allocation->gpuAllocation->getAllocationType()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMMemoryAllocatorTest, whenHostAllocationIsCreatedThenItIsStoredWithProperTypeInAllocationMap) { SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties; unifiedMemoryProperties.memoryType = InternalMemoryType::HOST_UNIFIED_MEMORY; auto allocationSize = 4096u; auto ptr = svmManager->createUnifiedMemoryAllocation(0, 4096u, unifiedMemoryProperties); EXPECT_NE(nullptr, ptr); auto allocation = svmManager->getSVMAlloc(ptr); EXPECT_EQ(nullptr, allocation->cpuAllocation); EXPECT_NE(nullptr, allocation->gpuAllocation); EXPECT_EQ(InternalMemoryType::HOST_UNIFIED_MEMORY, allocation->memoryType); EXPECT_EQ(allocationSize, allocation->size); EXPECT_EQ(alignUp(allocationSize, MemoryConstants::pageSize64k), allocation->gpuAllocation->getUnderlyingBufferSize()); EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, allocation->gpuAllocation->getAllocationType()); EXPECT_NE(allocation->gpuAllocation->getMemoryPool(), MemoryPool::LocalMemory); EXPECT_NE(nullptr, allocation->gpuAllocation->getUnderlyingBuffer()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMMemoryAllocatorTest, whenCouldNotAllocateInMemoryManagerThenCreateSharedUnifiedMemoryAllocationReturnsNullAndDoesNotChangeAllocsMap) { MockCommandQueue cmdQ; DebugManagerStateRestore restore; DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.set(true); FailMemoryManager failMemoryManager(executionEnvironment); svmManager->memoryManager = &failMemoryManager; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties; unifiedMemoryProperties.memoryType = InternalMemoryType::SHARED_UNIFIED_MEMORY; auto ptr = svmManager->createSharedUnifiedMemoryAllocation(0, 4096u, unifiedMemoryProperties, &cmdQ); EXPECT_EQ(nullptr, ptr); EXPECT_EQ(0u, svmManager->SVMAllocs.getNumAllocs()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMMemoryAllocatorTest, whenSharedAllocationIsCreatedThenItIsStoredWithProperTypeInAllocationMap) { MockCommandQueue cmdQ; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties; unifiedMemoryProperties.memoryType = InternalMemoryType::SHARED_UNIFIED_MEMORY; auto allocationSize = 4096u; auto ptr = svmManager->createSharedUnifiedMemoryAllocation(0, 4096u, unifiedMemoryProperties, &cmdQ); EXPECT_NE(nullptr, ptr); auto allocation = svmManager->getSVMAlloc(ptr); EXPECT_EQ(nullptr, allocation->cpuAllocation); EXPECT_NE(nullptr, allocation->gpuAllocation); EXPECT_EQ(InternalMemoryType::SHARED_UNIFIED_MEMORY, allocation->memoryType); EXPECT_EQ(allocationSize, allocation->size); EXPECT_EQ(alignUp(allocationSize, MemoryConstants::pageSize64k), allocation->gpuAllocation->getUnderlyingBufferSize()); EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, allocation->gpuAllocation->getAllocationType()); EXPECT_NE(allocation->gpuAllocation->getMemoryPool(), MemoryPool::LocalMemory); EXPECT_NE(nullptr, allocation->gpuAllocation->getUnderlyingBuffer()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMLocalMemoryAllocatorTest, whenSharedAllocationIsCreatedWithDebugFlagSetThenItIsStoredWithProperTypeInAllocationMapAndHasCpuAndGpuStorage) { MockCommandQueue cmdQ; MockContext mockContext; DebugManagerStateRestore restore; DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.set(true); SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties; unifiedMemoryProperties.memoryType = InternalMemoryType::SHARED_UNIFIED_MEMORY; unifiedMemoryProperties.device = mockContext.getDevice(0u); auto allocationSize = 4096u; auto ptr = svmManager->createSharedUnifiedMemoryAllocation(0, 4096u, unifiedMemoryProperties, &cmdQ); EXPECT_NE(nullptr, ptr); auto allocation = svmManager->getSVMAlloc(ptr); EXPECT_NE(nullptr, allocation->cpuAllocation); EXPECT_NE(nullptr, allocation->gpuAllocation); EXPECT_EQ(InternalMemoryType::SHARED_UNIFIED_MEMORY, allocation->memoryType); EXPECT_EQ(allocationSize, allocation->size); EXPECT_EQ(mockContext.getDevice(0u), allocation->device); EXPECT_EQ(alignUp(allocationSize, 2u * MB), allocation->gpuAllocation->getUnderlyingBufferSize()); EXPECT_EQ(alignUp(allocationSize, 2u * MB), allocation->cpuAllocation->getUnderlyingBufferSize()); EXPECT_EQ(GraphicsAllocation::AllocationType::SVM_GPU, allocation->gpuAllocation->getAllocationType()); EXPECT_EQ(GraphicsAllocation::AllocationType::SVM_CPU, allocation->cpuAllocation->getAllocationType()); EXPECT_EQ(allocation->gpuAllocation->getMemoryPool(), MemoryPool::LocalMemory); EXPECT_NE(allocation->cpuAllocation->getMemoryPool(), MemoryPool::LocalMemory); EXPECT_NE(nullptr, allocation->gpuAllocation->getUnderlyingBuffer()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMLocalMemoryAllocatorTest, whenSharedAllocationIsCreatedWithLocalMemoryAndRegisteredPageFaultHandlerThenItIsStoredWithProperTypeInAllocationMapAndHasCpuAndGpuStorage) { MockCommandQueue cmdQ; DebugManagerStateRestore restore; DebugManager.flags.EnableLocalMemory.set(1); SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties; unifiedMemoryProperties.memoryType = InternalMemoryType::SHARED_UNIFIED_MEMORY; auto allocationSize = 4096u; auto ptr = svmManager->createSharedUnifiedMemoryAllocation(0, 4096u, unifiedMemoryProperties, &cmdQ); EXPECT_NE(nullptr, ptr); auto allocation = svmManager->getSVMAlloc(ptr); EXPECT_NE(nullptr, allocation->cpuAllocation); EXPECT_NE(nullptr, allocation->gpuAllocation); EXPECT_EQ(InternalMemoryType::SHARED_UNIFIED_MEMORY, allocation->memoryType); EXPECT_EQ(allocationSize, allocation->size); EXPECT_EQ(alignUp(allocationSize, 2u * MB), allocation->gpuAllocation->getUnderlyingBufferSize()); EXPECT_EQ(alignUp(allocationSize, 2u * MB), allocation->cpuAllocation->getUnderlyingBufferSize()); EXPECT_EQ(GraphicsAllocation::AllocationType::SVM_GPU, allocation->gpuAllocation->getAllocationType()); EXPECT_EQ(GraphicsAllocation::AllocationType::SVM_CPU, allocation->cpuAllocation->getAllocationType()); EXPECT_EQ(allocation->gpuAllocation->getMemoryPool(), MemoryPool::LocalMemory); EXPECT_NE(allocation->cpuAllocation->getMemoryPool(), MemoryPool::LocalMemory); EXPECT_NE(nullptr, allocation->gpuAllocation->getUnderlyingBuffer()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMMemoryAllocatorTest, givenSharedAllocationsDebugFlagWhenDeviceMemoryIsAllocatedThenOneStorageIsProduced) { DebugManagerStateRestore restore; DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.set(true); SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties; unifiedMemoryProperties.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; auto allocationSize = 4096u; auto ptr = svmManager->createUnifiedMemoryAllocation(0, 4096u, unifiedMemoryProperties); EXPECT_NE(nullptr, ptr); auto allocation = svmManager->getSVMAlloc(ptr); EXPECT_EQ(nullptr, allocation->cpuAllocation); EXPECT_NE(nullptr, allocation->gpuAllocation); EXPECT_EQ(InternalMemoryType::DEVICE_UNIFIED_MEMORY, allocation->memoryType); EXPECT_EQ(allocationSize, allocation->size); EXPECT_EQ(alignUp(allocationSize, MemoryConstants::pageSize64k), allocation->gpuAllocation->getUnderlyingBufferSize()); EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER, allocation->gpuAllocation->getAllocationType()); svmManager->freeSVMAlloc(ptr); } TEST(SvmAllocationPropertiesTests, givenDifferentMemFlagsWhenGettingSvmAllocationPropertiesThenPropertiesAreCorrectlySet) { SVMAllocsManager::SvmAllocationProperties allocationProperties = MemObjHelper::getSvmAllocationProperties(0); EXPECT_FALSE(allocationProperties.coherent); EXPECT_FALSE(allocationProperties.hostPtrReadOnly); EXPECT_FALSE(allocationProperties.readOnly); allocationProperties = MemObjHelper::getSvmAllocationProperties(CL_MEM_SVM_FINE_GRAIN_BUFFER); EXPECT_TRUE(allocationProperties.coherent); EXPECT_FALSE(allocationProperties.hostPtrReadOnly); EXPECT_FALSE(allocationProperties.readOnly); allocationProperties = MemObjHelper::getSvmAllocationProperties(CL_MEM_HOST_READ_ONLY); EXPECT_FALSE(allocationProperties.coherent); EXPECT_TRUE(allocationProperties.hostPtrReadOnly); EXPECT_FALSE(allocationProperties.readOnly); allocationProperties = MemObjHelper::getSvmAllocationProperties(CL_MEM_HOST_NO_ACCESS); EXPECT_FALSE(allocationProperties.coherent); EXPECT_TRUE(allocationProperties.hostPtrReadOnly); EXPECT_FALSE(allocationProperties.readOnly); allocationProperties = MemObjHelper::getSvmAllocationProperties(CL_MEM_READ_ONLY); EXPECT_FALSE(allocationProperties.coherent); EXPECT_FALSE(allocationProperties.hostPtrReadOnly); EXPECT_TRUE(allocationProperties.readOnly); allocationProperties = MemObjHelper::getSvmAllocationProperties(CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_HOST_READ_ONLY); EXPECT_TRUE(allocationProperties.coherent); EXPECT_TRUE(allocationProperties.hostPtrReadOnly); EXPECT_FALSE(allocationProperties.readOnly); allocationProperties = MemObjHelper::getSvmAllocationProperties(CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_READ_ONLY); EXPECT_TRUE(allocationProperties.coherent); EXPECT_FALSE(allocationProperties.hostPtrReadOnly); EXPECT_TRUE(allocationProperties.readOnly); } TEST_F(SVMMemoryAllocatorTest, whenReadOnlySvmAllocationCreatedThenGraphicsAllocationHasWriteableFlagFalse) { SVMAllocsManager::SvmAllocationProperties svmProperties; svmProperties.readOnly = true; void *svm = svmManager->createSVMAlloc(0, 4096, svmProperties); EXPECT_NE(nullptr, svm); auto svmData = svmManager->getSVMAlloc(svm); ASSERT_NE(nullptr, svmData); GraphicsAllocation *svmAllocation = svmData->gpuAllocation; EXPECT_NE(nullptr, svmAllocation); EXPECT_FALSE(svmAllocation->isMemObjectsAllocationWithWritableFlags()); svmManager->freeSVMAlloc(svm); } TEST_F(SVMLocalMemoryAllocatorTest, whenAllocatingSvmThenExpectCpuAllocationWithPointerAndGpuAllocationWithSameGpuAddress) { auto ptr = svmManager->createSVMAlloc(0, MemoryConstants::pageSize, {}); EXPECT_NE(ptr, nullptr); auto svmData = svmManager->getSVMAlloc(ptr); ASSERT_NE(nullptr, svmData); GraphicsAllocation *cpuAllocation = svmData->cpuAllocation; EXPECT_NE(nullptr, cpuAllocation); EXPECT_EQ(ptr, cpuAllocation->getUnderlyingBuffer()); GraphicsAllocation *gpuAllocation = svmData->gpuAllocation; EXPECT_NE(nullptr, gpuAllocation); EXPECT_EQ(reinterpret_cast(ptr), gpuAllocation->getGpuAddress()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMLocalMemoryAllocatorTest, whenGetSVMAllocationFromOutsideOfReturnedPointerAreaThenDontReturnThisAllocation) { auto ptr = svmManager->createSVMAlloc(0, MemoryConstants::pageSize, {}); EXPECT_NE(ptr, nullptr); auto svmData = svmManager->getSVMAlloc(ptr); ASSERT_NE(nullptr, svmData); GraphicsAllocation *graphicsAllocation = svmData->gpuAllocation; EXPECT_NE(nullptr, graphicsAllocation); auto ptrBefore = ptrOffset(ptr, -4); svmData = svmManager->getSVMAlloc(ptrBefore); EXPECT_EQ(nullptr, svmData); auto ptrAfter = ptrOffset(ptr, MemoryConstants::pageSize); svmData = svmManager->getSVMAlloc(ptrAfter); EXPECT_EQ(nullptr, svmData); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMLocalMemoryAllocatorTest, whenCouldNotAllocateCpuAllocationInMemoryManagerThenReturnsNullAndDoesNotChangeAllocsMap) { FailMemoryManager failMemoryManager(false, true, executionEnvironment); svmManager->memoryManager = &failMemoryManager; auto ptr = svmManager->createSVMAlloc(0, MemoryConstants::pageSize, {}); EXPECT_EQ(nullptr, ptr); EXPECT_EQ(0u, svmManager->SVMAllocs.getNumAllocs()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMLocalMemoryAllocatorTest, whenCouldNotAllocateGpuAllocationInMemoryManagerThenReturnsNullAndDoesNotChangeAllocsMap) { FailMemoryManager failMemoryManager(1, executionEnvironment, true); svmManager->memoryManager = &failMemoryManager; auto ptr = svmManager->createSVMAlloc(0, MemoryConstants::pageSize, {}); EXPECT_EQ(nullptr, ptr); EXPECT_EQ(0u, svmManager->SVMAllocs.getNumAllocs()); svmManager->freeSVMAlloc(ptr); } TEST_F(SVMLocalMemoryAllocatorTest, whenCouldNotReserveCpuAddressRangeInMemoryManagerThenReturnsNullAndDoesNotChangeAllocsMap) { memoryManager->failReserveAddress = true; auto ptr = svmManager->createSVMAlloc(0, MemoryConstants::pageSize, {}); EXPECT_EQ(nullptr, ptr); EXPECT_EQ(0u, svmManager->SVMAllocs.getNumAllocs()); } struct MemoryManagerPropertiesCheck : public MockMemoryManager { using MockMemoryManager::MockMemoryManager; GraphicsAllocation *allocateGraphicsMemoryWithProperties(const AllocationProperties &properties) override { return this->allocateGraphicsMemoryWithProperties(properties, nullptr); } GraphicsAllocation *allocateGraphicsMemoryWithProperties(const AllocationProperties &properties, const void *ptr) override { this->multiOsContextCapablePassed = properties.flags.multiOsContextCapable; this->multiStorageResourcePassed = properties.multiStorageResource; this->subDevicesBitfieldPassed = properties.subDevicesBitfield; this->shareablePassed = properties.flags.shareable; return MockMemoryManager::allocateGraphicsMemoryWithProperties(properties, ptr); } bool multiOsContextCapablePassed; bool multiStorageResourcePassed; bool shareablePassed; DeviceBitfield subDevicesBitfieldPassed; }; struct UnifiedMemoryManagerPropertiesTest : public ::testing::Test { void SetUp() override { bool svmSupported = executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo()->capabilityTable.ftrSvm; if (!svmSupported) { GTEST_SKIP(); } memoryManager = std::make_unique(false, true, executionEnvironment); svmManager = std::make_unique(memoryManager.get()); memoryManager->pageFaultManager.reset(new MockPageFaultManager); } MockExecutionEnvironment executionEnvironment; std::unique_ptr memoryManager; std::unique_ptr svmManager; }; TEST_F(UnifiedMemoryManagerPropertiesTest, givenDeviceBitfieldWithMultipleBitsSetWhenSharedUnifiedMemoryAllocationIsCreatedThenProperPropertiesArePassedToMemoryManager) { MockCommandQueue cmdQ; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties; unifiedMemoryProperties.memoryType = InternalMemoryType::SHARED_UNIFIED_MEMORY; unifiedMemoryProperties.subdeviceBitfield = DeviceBitfield(0xf); auto ptr = svmManager->createSharedUnifiedMemoryAllocation(0, 4096u, unifiedMemoryProperties, &cmdQ); EXPECT_TRUE(memoryManager->multiOsContextCapablePassed); EXPECT_FALSE(memoryManager->multiStorageResourcePassed); EXPECT_EQ(unifiedMemoryProperties.subdeviceBitfield, memoryManager->subDevicesBitfieldPassed); svmManager->freeSVMAlloc(ptr); } TEST_F(UnifiedMemoryManagerPropertiesTest, givenDeviceBitfieldWithSingleBitSetWhenSharedUnifiedMemoryAllocationIsCreatedThenProperPropertiesArePassedToMemoryManager) { MockCommandQueue cmdQ; SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties; unifiedMemoryProperties.memoryType = InternalMemoryType::SHARED_UNIFIED_MEMORY; unifiedMemoryProperties.subdeviceBitfield = DeviceBitfield(0x8); auto ptr = svmManager->createSharedUnifiedMemoryAllocation(0, 4096u, unifiedMemoryProperties, &cmdQ); EXPECT_FALSE(memoryManager->multiOsContextCapablePassed); EXPECT_FALSE(memoryManager->multiStorageResourcePassed); EXPECT_EQ(unifiedMemoryProperties.subdeviceBitfield, memoryManager->subDevicesBitfieldPassed); svmManager->freeSVMAlloc(ptr); } TEST_F(UnifiedMemoryManagerPropertiesTest, givenDeviceBitfieldWithMultipleBitsSetWhenDeviceUnifiedMemoryAllocationIsCreatedThenProperPropertiesArePassedToMemoryManager) { SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties; unifiedMemoryProperties.memoryType = InternalMemoryType::SHARED_UNIFIED_MEMORY; unifiedMemoryProperties.subdeviceBitfield = DeviceBitfield(0xf); auto ptr = svmManager->createUnifiedMemoryAllocation(0, 4096u, unifiedMemoryProperties); EXPECT_TRUE(memoryManager->multiOsContextCapablePassed); EXPECT_TRUE(memoryManager->multiStorageResourcePassed); EXPECT_EQ(unifiedMemoryProperties.subdeviceBitfield, memoryManager->subDevicesBitfieldPassed); svmManager->freeSVMAlloc(ptr); } TEST_F(UnifiedMemoryManagerPropertiesTest, givenDeviceBitfieldWithSingleBitSetWhenDeviceUnifiedMemoryAllocationIsCreatedThenProperPropertiesArePassedToMemoryManager) { SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties; unifiedMemoryProperties.memoryType = InternalMemoryType::SHARED_UNIFIED_MEMORY; unifiedMemoryProperties.subdeviceBitfield = DeviceBitfield(0x8); auto ptr = svmManager->createUnifiedMemoryAllocation(0, 4096u, unifiedMemoryProperties); EXPECT_FALSE(memoryManager->multiOsContextCapablePassed); EXPECT_FALSE(memoryManager->multiStorageResourcePassed); EXPECT_EQ(unifiedMemoryProperties.subdeviceBitfield, memoryManager->subDevicesBitfieldPassed); svmManager->freeSVMAlloc(ptr); } struct ShareableUnifiedMemoryManagerPropertiesTest : public ::testing::Test { void SetUp() override { executionEnvironment = platform()->peekExecutionEnvironment(); bool svmSupported = executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo()->capabilityTable.ftrSvm; if (!svmSupported) { GTEST_SKIP(); } memoryManager = std::make_unique(false, true, *executionEnvironment); svmManager = std::make_unique(memoryManager.get()); memoryManager->pageFaultManager.reset(new MockPageFaultManager); } ExecutionEnvironment *executionEnvironment; std::unique_ptr memoryManager; std::unique_ptr svmManager; }; TEST_F(ShareableUnifiedMemoryManagerPropertiesTest, givenShareableUnifiedPropertyFlagThenShareableAllocationPropertyFlagIsSet) { SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties; unifiedMemoryProperties.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY; unifiedMemoryProperties.allocationFlags.flags.shareable = 1; auto ptr = svmManager->createUnifiedMemoryAllocation(0, 4096u, unifiedMemoryProperties); EXPECT_TRUE(memoryManager->shareablePassed); svmManager->freeSVMAlloc(ptr); } TEST(UnfiedSharedMemoryTransferCalls, givenHostUSMllocationWhenPointerIsUsedForTransferCallsThenUSMAllocationIsReused) { MockContext mockContext; cl_context clContext = &mockContext; auto status = CL_SUCCESS; auto hostMemory = clHostMemAllocINTEL(clContext, nullptr, 4096u, 0u, &status); auto svmAllocation = mockContext.getSVMAllocsManager()->getSVMAlloc(hostMemory); ASSERT_EQ(CL_SUCCESS, status); auto buffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE, 4096u, nullptr, &status); ASSERT_EQ(CL_SUCCESS, status); cl_device_id clDevice = mockContext.getDevice(0u); auto commandQueue = clCreateCommandQueue(clContext, clDevice, 0u, &status); ASSERT_EQ(CL_SUCCESS, status); status = clEnqueueWriteBuffer(commandQueue, buffer, false, 0u, 4096u, hostMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); auto neoQueue = castToObject(commandQueue); auto &temporaryAllocations = neoQueue->getGpgpuCommandStreamReceiver().getTemporaryAllocations(); EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); auto osContextId = neoQueue->getGpgpuCommandStreamReceiver().getOsContext().getContextId(); EXPECT_EQ(1u, svmAllocation->gpuAllocation->getTaskCount(osContextId)); status = clEnqueueReadBuffer(commandQueue, buffer, false, 0u, 4096u, hostMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); EXPECT_EQ(2u, svmAllocation->gpuAllocation->getTaskCount(osContextId)); status = clReleaseMemObject(buffer); ASSERT_EQ(CL_SUCCESS, status); status = clMemFreeINTEL(clContext, hostMemory); ASSERT_EQ(CL_SUCCESS, status); clReleaseCommandQueue(commandQueue); } TEST(UnfiedSharedMemoryTransferCalls, givenDeviceUsmAllocationWhenPtrIsUsedForTransferCallsThenUsmAllocationIsReused) { MockContext mockContext; cl_context clContext = &mockContext; auto status = CL_SUCCESS; cl_device_id clDevice = mockContext.getDevice(0u); auto deviceMemory = clDeviceMemAllocINTEL(clContext, clDevice, nullptr, 4096u, 0u, &status); auto svmAllocation = mockContext.getSVMAllocsManager()->getSVMAlloc(deviceMemory); ASSERT_EQ(CL_SUCCESS, status); auto buffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE, 4096u, nullptr, &status); ASSERT_EQ(CL_SUCCESS, status); auto commandQueue = clCreateCommandQueue(clContext, clDevice, 0u, &status); ASSERT_EQ(CL_SUCCESS, status); status = clEnqueueWriteBuffer(commandQueue, buffer, false, 0u, 4096u, deviceMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); auto neoQueue = castToObject(commandQueue); auto &temporaryAllocations = neoQueue->getGpgpuCommandStreamReceiver().getTemporaryAllocations(); EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); auto osContextId = neoQueue->getGpgpuCommandStreamReceiver().getOsContext().getContextId(); EXPECT_EQ(1u, svmAllocation->gpuAllocation->getTaskCount(osContextId)); status = clEnqueueReadBuffer(commandQueue, buffer, false, 0u, 4096u, deviceMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); EXPECT_EQ(2u, svmAllocation->gpuAllocation->getTaskCount(osContextId)); status = clReleaseMemObject(buffer); ASSERT_EQ(CL_SUCCESS, status); status = clMemFreeINTEL(clContext, deviceMemory); ASSERT_EQ(CL_SUCCESS, status); clReleaseCommandQueue(commandQueue); } TEST(UnfiedSharedMemoryTransferCalls, givenDeviceUsmAllocationWhenPtrIsUsedForTransferCallsThenCPUPathIsNotChoosen) { MockContext mockContext; cl_context clContext = &mockContext; auto status = CL_SUCCESS; cl_device_id clDevice = mockContext.getDevice(0u); auto deviceMemory = clDeviceMemAllocINTEL(clContext, clDevice, nullptr, 4096u, 0u, &status); auto svmAllocation = mockContext.getSVMAllocsManager()->getSVMAlloc(deviceMemory); ASSERT_EQ(CL_SUCCESS, status); auto buffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE, 4096u, nullptr, &status); ASSERT_EQ(CL_SUCCESS, status); auto commandQueue = clCreateCommandQueue(clContext, clDevice, 0u, &status); ASSERT_EQ(CL_SUCCESS, status); status = clEnqueueWriteBuffer(commandQueue, buffer, true, 0u, 4096u, deviceMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); auto neoQueue = castToObject(commandQueue); auto &temporaryAllocations = neoQueue->getGpgpuCommandStreamReceiver().getTemporaryAllocations(); EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); auto osContextId = neoQueue->getGpgpuCommandStreamReceiver().getOsContext().getContextId(); EXPECT_EQ(1u, svmAllocation->gpuAllocation->getTaskCount(osContextId)); status = clEnqueueReadBuffer(commandQueue, buffer, true, 0u, 4096u, deviceMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); EXPECT_EQ(2u, svmAllocation->gpuAllocation->getTaskCount(osContextId)); status = clReleaseMemObject(buffer); ASSERT_EQ(CL_SUCCESS, status); status = clMemFreeINTEL(clContext, deviceMemory); ASSERT_EQ(CL_SUCCESS, status); clReleaseCommandQueue(commandQueue); } TEST(UnfiedSharedMemoryTransferCalls, givenHostUsmAllocationWhenPtrIsUsedForTransferCallsThenCPUPathIsChoosen) { DebugManagerStateRestore restorer; DebugManager.flags.EnableLocalMemory.set(false); MockContext mockContext; cl_context clContext = &mockContext; if (mockContext.getDevice(0u)->getHardwareInfo().capabilityTable.clVersionSupport < 20) { GTEST_SKIP(); } auto status = CL_SUCCESS; cl_device_id clDevice = mockContext.getDevice(0u); auto sharedMemory = clSharedMemAllocINTEL(clContext, clDevice, nullptr, 4096u, 0u, &status); auto svmAllocation = mockContext.getSVMAllocsManager()->getSVMAlloc(sharedMemory); ASSERT_EQ(CL_SUCCESS, status); auto buffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE, 4096u, nullptr, &status); ASSERT_EQ(CL_SUCCESS, status); auto commandQueue = clCreateCommandQueue(clContext, clDevice, 0u, &status); ASSERT_EQ(CL_SUCCESS, status); status = clEnqueueWriteBuffer(commandQueue, buffer, true, 0u, 4096u, sharedMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); auto neoQueue = castToObject(commandQueue); auto &temporaryAllocations = neoQueue->getGpgpuCommandStreamReceiver().getTemporaryAllocations(); EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); auto osContextId = neoQueue->getGpgpuCommandStreamReceiver().getOsContext().getContextId(); EXPECT_EQ(GraphicsAllocation::objectNotUsed, svmAllocation->gpuAllocation->getTaskCount(osContextId)); status = clEnqueueReadBuffer(commandQueue, buffer, true, 0u, 4096u, sharedMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); EXPECT_EQ(GraphicsAllocation::objectNotUsed, svmAllocation->gpuAllocation->getTaskCount(osContextId)); status = clReleaseMemObject(buffer); ASSERT_EQ(CL_SUCCESS, status); status = clMemFreeINTEL(clContext, sharedMemory); ASSERT_EQ(CL_SUCCESS, status); clReleaseCommandQueue(commandQueue); } TEST(UnfiedSharedMemoryTransferCalls, givenHostAllocationThatIsSmallerThenTransferRequirementsThenErrorIsReturned) { MockContext mockContext; cl_context clContext = &mockContext; auto status = CL_SUCCESS; auto hostMemory = clHostMemAllocINTEL(clContext, nullptr, 4u, 0u, &status); ASSERT_EQ(CL_SUCCESS, status); auto buffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE, 4096u, nullptr, &status); ASSERT_EQ(CL_SUCCESS, status); cl_device_id clDevice = mockContext.getDevice(0u); auto commandQueue = clCreateCommandQueue(clContext, clDevice, 0u, &status); ASSERT_EQ(CL_SUCCESS, status); status = clEnqueueWriteBuffer(commandQueue, buffer, false, 0u, 4096u, hostMemory, 0u, nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, status); status = clEnqueueReadBuffer(commandQueue, buffer, false, 0u, 4096u, hostMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_INVALID_OPERATION, status); status = clReleaseMemObject(buffer); ASSERT_EQ(CL_SUCCESS, status); status = clMemFreeINTEL(clContext, hostMemory); ASSERT_EQ(CL_SUCCESS, status); clReleaseCommandQueue(commandQueue); } TEST(UnfiedSharedMemoryTransferCalls, givenSharedUSMllocationWithoutLocalMemoryWhenPointerIsUsedAsTranfserParameterThenUSMAllocationIsReused) { DebugManagerStateRestore restore; DebugManager.flags.EnableLocalMemory.set(0); MockContext mockContext; cl_context clContext = &mockContext; cl_device_id clDevice = mockContext.getDevice(0u); auto status = CL_SUCCESS; auto sharedMemory = clSharedMemAllocINTEL(clContext, clDevice, nullptr, 4096u, 0u, &status); auto svmAllocation = mockContext.getSVMAllocsManager()->getSVMAlloc(sharedMemory); ASSERT_EQ(CL_SUCCESS, status); auto buffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE, 4096u, nullptr, &status); ASSERT_EQ(CL_SUCCESS, status); auto commandQueue = clCreateCommandQueue(clContext, clDevice, 0u, &status); ASSERT_EQ(CL_SUCCESS, status); status = clEnqueueWriteBuffer(commandQueue, buffer, false, 0u, 4096u, sharedMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); auto neoQueue = castToObject(commandQueue); auto &temporaryAllocations = neoQueue->getGpgpuCommandStreamReceiver().getTemporaryAllocations(); EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); auto osContextId = neoQueue->getGpgpuCommandStreamReceiver().getOsContext().getContextId(); EXPECT_EQ(1u, svmAllocation->gpuAllocation->getTaskCount(osContextId)); status = clEnqueueReadBuffer(commandQueue, buffer, false, 0u, 4096u, sharedMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); EXPECT_EQ(2u, svmAllocation->gpuAllocation->getTaskCount(osContextId)); status = clReleaseMemObject(buffer); ASSERT_EQ(CL_SUCCESS, status); status = clMemFreeINTEL(clContext, sharedMemory); ASSERT_EQ(CL_SUCCESS, status); clReleaseCommandQueue(commandQueue); } TEST(UnfiedSharedMemoryTransferCalls, givenSharedUSMllocationWithLocalMemoryWhenPointerIsUsedAsTransferParameterThenUSMAllocationIsReused) { DebugManagerStateRestore restore; DebugManager.flags.EnableLocalMemory.set(1); MockContext mockContext; cl_context clContext = &mockContext; cl_device_id clDevice = mockContext.getDevice(0u); auto status = CL_SUCCESS; auto sharedMemory = clSharedMemAllocINTEL(clContext, clDevice, nullptr, 4096u, 0u, &status); auto svmAllocation = mockContext.getSVMAllocsManager()->getSVMAlloc(sharedMemory); ASSERT_EQ(CL_SUCCESS, status); auto buffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE, 4096u, nullptr, &status); ASSERT_EQ(CL_SUCCESS, status); auto commandQueue = clCreateCommandQueue(clContext, clDevice, 0u, &status); ASSERT_EQ(CL_SUCCESS, status); auto neoQueue = castToObject(commandQueue); auto osContextId = neoQueue->getGpgpuCommandStreamReceiver().getOsContext().getContextId(); EXPECT_EQ(1u, svmAllocation->cpuAllocation->getTaskCount(osContextId)); status = clEnqueueWriteBuffer(commandQueue, buffer, false, 0u, 4096u, sharedMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); auto &temporaryAllocations = neoQueue->getGpgpuCommandStreamReceiver().getTemporaryAllocations(); EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); EXPECT_EQ(2u, svmAllocation->cpuAllocation->getTaskCount(osContextId)); status = clEnqueueReadBuffer(commandQueue, buffer, false, 0u, 4096u, sharedMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); EXPECT_EQ(3u, svmAllocation->cpuAllocation->getTaskCount(osContextId)); status = clReleaseMemObject(buffer); ASSERT_EQ(CL_SUCCESS, status); status = clMemFreeINTEL(clContext, sharedMemory); ASSERT_EQ(CL_SUCCESS, status); clReleaseCommandQueue(commandQueue); } compute-runtime-20.13.16352/opencl/test/unit_test/memory_manager/unified_memory_token_tests.cpp000066400000000000000000000016451363734646600330030ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/unified_memory/unified_memory.h" #include "shared/source/unified_memory/usm_memory_support.h" #include "opencl/extensions/public/cl_ext_private.h" #include "gtest/gtest.h" TEST(UnifiedMemoryTests, givenCLUSMMemorySupportFlagsWhenUsingUnifiedMemorySupportFlagsThenEverythingMatch) { static_assert(CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL == UNIFIED_SHARED_MEMORY_ACCESS, "Flags value difference"); static_assert(CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL == UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS, "Flags value difference"); static_assert(CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL == UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS, "Flags value difference"); static_assert(CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL == UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS, "Flags value difference"); } compute-runtime-20.13.16352/opencl/test/unit_test/mock_gdi/000077500000000000000000000000001363734646600234065ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mock_gdi/CMakeLists.txt000066400000000000000000000022271363734646600261510ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) project(mock_gdi) set(DEF_FILE "${CMAKE_CURRENT_SOURCE_DIR}/gdi32_mock.def") # Setting up our local list of test files set(IGDRCL_SRCS_tests_mock_gdi ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/mock_gdi.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_gdi.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/mock_gdi_gfx_partition.cpp ${DEF_FILE} ) # this is a DLL add_library(mock_gdi SHARED ${IGDRCL_SRCS_tests_mock_gdi}) add_definitions(-DINSIDE_PLUGIN) target_include_directories(mock_gdi PRIVATE ${WDK_INCLUDE_PATHS} ${NEO__GMM_INCLUDE_DIR} ) set_target_properties( mock_gdi PROPERTIES DEBUG_OUTPUT_NAME "gdi32_mock" RELEASE_OUTPUT_NAME "gdi32_mock" RELEASEINTERNAL_OUTPUT_NAME "gdi32_mock" OUTPUT_NAME "gdi32_mock" ) create_project_source_tree(mock_gdi) set_target_properties(mock_gdi PROPERTIES FOLDER "test mocks") target_compile_definitions(mock_gdi PUBLIC MOCKABLE_VIRTUAL=virtual) add_dependencies(unit_tests mock_gdi) add_dependencies(igdrcl_tests mock_gdi) endif() compute-runtime-20.13.16352/opencl/test/unit_test/mock_gdi/gdi32_mock.def000066400000000000000000000033521363734646600260120ustar00rootroot00000000000000; ; Copyright (C) 2017-2019 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ; LIBRARY "gdi32_mock" EXPORTS D3DKMTOpenAdapterFromHdc D3DKMTOpenAdapterFromLuid D3DKMTCreateAllocation D3DKMTDestroyAllocation D3DKMTDestroyAllocation2 D3DKMTQueryAdapterInfo D3DKMTCloseAdapter D3DKMTCreateDevice D3DKMTDestroyDevice D3DKMTEscape D3DKMTCreateContextVirtual D3DKMTDestroyContext D3DKMTOpenResource D3DKMTOpenResourceFromNtHandle D3DKMTQueryResourceInfo D3DKMTQueryResourceInfoFromNtHandle D3DKMTLock D3DKMTUnlock D3DKMTRender D3DKMTCreateSynchronizationObject D3DKMTCreateSynchronizationObject2 D3DKMTDestroySynchronizationObject D3DKMTSignalSynchronizationObject D3DKMTWaitForSynchronizationObject D3DKMTWaitForSynchronizationObjectFromCpu D3DKMTSignalSynchronizationObjectFromCpu D3DKMTWaitForSynchronizationObjectFromGpu D3DKMTSignalSynchronizationObjectFromGpu D3DKMTCreatePagingQueue D3DKMTDestroyPagingQueue D3DKMTLock2 D3DKMTUnlock2 D3DKMTMapGpuVirtualAddress D3DKMTReserveGpuVirtualAddress D3DKMTFreeGpuVirtualAddress D3DKMTUpdateGpuVirtualAddress D3DKMTSubmitCommand D3DKMTMakeResident D3DKMTEvict D3DKMTGetDeviceState D3DKMTRegisterTrimNotification D3DKMTUnregisterTrimNotification D3DKMTCreateHwQueue D3DKMTDestroyHwQueue D3DKMTSubmitCommandToHwQueue MockSetAdapterInfo MockSetSizes GetMockSizes GetMockLastDestroyedResHandle SetMockLastDestroyedResHandle GetMockCreateDeviceParams SetMockCreateDeviceParams getMockAllocation getAdapterInfoAddress getLastCallMapGpuVaArg getLastCallReserveGpuVaArg setMapGpuVaFailConfig getCreateContextData getCreateHwQueueData getDestroyHwQueueData getSubmitCommandToHwQueueData getDestroySynchronizationObjectData getMonitorFenceCpuFenceAddress getCreateSynchronizationObject2FailCall getRegisterTrimNotificationFailCall compute-runtime-20.13.16352/opencl/test/unit_test/mock_gdi/mock_gdi.cpp000066400000000000000000000436031363734646600256740ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mock_gdi/mock_gdi.h" #include "shared/source/memory_manager/memory_constants.h" ADAPTER_INFO gAdapterInfo = {0}; D3DDDI_MAPGPUVIRTUALADDRESS gLastCallMapGpuVaArg = {0}; D3DDDI_RESERVEGPUVIRTUALADDRESS gLastCallReserveGpuVaArg = {0}; uint32_t gMapGpuVaFailConfigCount = 0; uint32_t gMapGpuVaFailConfigMax = 0; uint64_t gGpuAddressSpace = 0ull; #ifdef __cplusplus // If used by C++ code, extern "C" { // we need to export the C interface #endif BOOLEAN WINAPI DllMain(IN HINSTANCE hDllHandle, IN DWORD nReason, IN LPVOID Reserved) { return TRUE; } NTSTATUS __stdcall D3DKMTEscape(IN CONST D3DKMT_ESCAPE *pData) { static int PerfTicks = 0; ((NEO::TimeStampDataHeader *)pData->pPrivateDriverData)->m_Data.m_Out.RetCode = NEO::GTDI_RET_OK; ((NEO::TimeStampDataHeader *)pData->pPrivateDriverData)->m_Data.m_Out.gpuPerfTicks = ++PerfTicks; ((NEO::TimeStampDataHeader *)pData->pPrivateDriverData)->m_Data.m_Out.cpuPerfTicks = PerfTicks; ((NEO::TimeStampDataHeader *)pData->pPrivateDriverData)->m_Data.m_Out.gpuPerfFreq = 1; ((NEO::TimeStampDataHeader *)pData->pPrivateDriverData)->m_Data.m_Out.cpuPerfFreq = 1; return STATUS_SUCCESS; } DECL_FUNCTIONS() UINT64 PagingFence = 0; void __stdcall MockSetAdapterInfo(const void *pGfxPlatform, const void *pGTSystemInfo, uint64_t gpuAddressSpace) { if (pGfxPlatform != NULL) { gAdapterInfo.GfxPlatform = *(PLATFORM *)pGfxPlatform; } if (pGTSystemInfo != NULL) { gAdapterInfo.SystemInfo = *(GT_SYSTEM_INFO *)pGTSystemInfo; } gGpuAddressSpace = gpuAddressSpace; InitGfxPartition(); } NTSTATUS __stdcall D3DKMTOpenAdapterFromLuid(IN OUT CONST D3DKMT_OPENADAPTERFROMLUID *openAdapter) { if (openAdapter == nullptr || (openAdapter->AdapterLuid.HighPart == 0 && openAdapter->AdapterLuid.LowPart == 0)) { return STATUS_INVALID_PARAMETER; } D3DKMT_OPENADAPTERFROMLUID *openAdapterNonConst = const_cast(openAdapter); openAdapterNonConst->hAdapter = ADAPTER_HANDLE; return STATUS_SUCCESS; } NTSTATUS __stdcall D3DKMTCreateDevice(IN OUT D3DKMT_CREATEDEVICE *createDevice) { if (createDevice == nullptr || createDevice->hAdapter != ADAPTER_HANDLE) { return STATUS_INVALID_PARAMETER; } createDevice->hDevice = DEVICE_HANDLE; SetMockCreateDeviceParams(*createDevice); return STATUS_SUCCESS; } NTSTATUS __stdcall D3DKMTDestroyDevice(IN CONST D3DKMT_DESTROYDEVICE *destoryDevice) { if (destoryDevice == nullptr || destoryDevice->hDevice != DEVICE_HANDLE) { return STATUS_INVALID_PARAMETER; } return STATUS_SUCCESS; } NTSTATUS __stdcall D3DKMTCreatePagingQueue(IN OUT D3DKMT_CREATEPAGINGQUEUE *createQueue) { if (createQueue == nullptr || (createQueue->hDevice != DEVICE_HANDLE)) { return STATUS_INVALID_PARAMETER; } createQueue->hPagingQueue = PAGINGQUEUE_HANDLE; createQueue->hSyncObject = PAGINGQUEUE_SYNCOBJECT_HANDLE; createQueue->FenceValueCPUVirtualAddress = &PagingFence; return STATUS_SUCCESS; } NTSTATUS __stdcall D3DKMTDestroyPagingQueue(IN OUT D3DDDI_DESTROYPAGINGQUEUE *destoryQueue) { if (destoryQueue == nullptr || destoryQueue->hPagingQueue != PAGINGQUEUE_HANDLE) { return STATUS_INVALID_PARAMETER; } return STATUS_SUCCESS; } static D3DKMT_CREATECONTEXTVIRTUAL createContextData = {0}; static CREATECONTEXT_PVTDATA createContextPrivateData = {{0}}; NTSTATUS __stdcall D3DKMTCreateContextVirtual(IN D3DKMT_CREATECONTEXTVIRTUAL *createContext) { if (createContext == nullptr || createContext->hDevice != DEVICE_HANDLE) { return STATUS_INVALID_PARAMETER; } createContextData = *createContext; if (createContext->pPrivateDriverData) { createContextPrivateData = *((CREATECONTEXT_PVTDATA *)createContext->pPrivateDriverData); createContextData.pPrivateDriverData = &createContextPrivateData; } if ((createContext->PrivateDriverDataSize != 0 && createContext->pPrivateDriverData == nullptr) || (createContext->PrivateDriverDataSize == 0 && createContext->pPrivateDriverData != nullptr)) { return STATUS_INVALID_PARAMETER; } createContext->hContext = CONTEXT_HANDLE; return STATUS_SUCCESS; } NTSTATUS __stdcall D3DKMTDestroyContext(IN CONST D3DKMT_DESTROYCONTEXT *destroyContext) { if (destroyContext == nullptr || destroyContext->hContext != CONTEXT_HANDLE) { return STATUS_INVALID_PARAMETER; } return STATUS_SUCCESS; } static D3DKMT_CREATEALLOCATION pallocation{}; NTSTATUS __stdcall D3DKMTCreateAllocation(IN OUT D3DKMT_CREATEALLOCATION *allocation) { D3DDDI_ALLOCATIONINFO *allocationInfo; int numOfAllocations; bool createResource; bool globalShare; if (allocation == nullptr || allocation->hDevice != DEVICE_HANDLE) { return STATUS_INVALID_PARAMETER; } pallocation = *allocation; allocationInfo = allocation->pAllocationInfo; if (allocationInfo == NULL) { return STATUS_INVALID_PARAMETER; } numOfAllocations = allocation->NumAllocations; createResource = allocation->Flags.CreateResource; globalShare = allocation->Flags.CreateShared; if (createResource) { allocation->hResource = RESOURCE_HANDLE; } if (globalShare) { allocation->hGlobalShare = RESOURCE_HANDLE; } for (int i = 0; i < numOfAllocations; ++i) { if (allocationInfo != NULL) { allocationInfo->hAllocation = ALLOCATION_HANDLE; } allocationInfo++; } return STATUS_SUCCESS; } static unsigned int DestroyAllocationWithResourceHandleCalled = 0u; static D3DKMT_DESTROYALLOCATION2 destroyalloc2 = {0}; static D3DKMT_HANDLE LastDestroyedResourceHandle = 0; static D3DKMT_CREATEDEVICE CreateDeviceParams = {{0}}; NTSTATUS __stdcall D3DKMTDestroyAllocation2(IN CONST D3DKMT_DESTROYALLOCATION2 *destroyAllocation) { int numOfAllocations; const D3DKMT_HANDLE *allocationList; LastDestroyedResourceHandle = 0; if (destroyAllocation == nullptr || destroyAllocation->hDevice != DEVICE_HANDLE) { return STATUS_INVALID_PARAMETER; } destroyalloc2 = *destroyAllocation; numOfAllocations = destroyAllocation->AllocationCount; allocationList = destroyAllocation->phAllocationList; for (int i = 0; i < numOfAllocations; ++i) { if (allocationList != NULL) { if (*allocationList != ALLOCATION_HANDLE) { return STATUS_UNSUCCESSFUL; } } allocationList++; } if (numOfAllocations == 0 && destroyAllocation->hResource == 0u) { return STATUS_UNSUCCESSFUL; } if (destroyAllocation->hResource) { DestroyAllocationWithResourceHandleCalled = 1; LastDestroyedResourceHandle = destroyAllocation->hResource; } return STATUS_SUCCESS; } NTSTATUS __stdcall D3DKMTMapGpuVirtualAddress(IN OUT D3DDDI_MAPGPUVIRTUALADDRESS *mapGpuVA) { if (mapGpuVA == nullptr) { memset(&gLastCallMapGpuVaArg, 0, sizeof(gLastCallMapGpuVaArg)); return STATUS_INVALID_PARAMETER; } memcpy(&gLastCallMapGpuVaArg, mapGpuVA, sizeof(gLastCallMapGpuVaArg)); if (mapGpuVA->hPagingQueue != PAGINGQUEUE_HANDLE) { return STATUS_INVALID_PARAMETER; } if (mapGpuVA->hAllocation != ALLOCATION_HANDLE && mapGpuVA->hAllocation != NT_ALLOCATION_HANDLE) { return STATUS_INVALID_PARAMETER; } if (mapGpuVA->MinimumAddress != 0) { if (mapGpuVA->BaseAddress != 0 && mapGpuVA->BaseAddress < mapGpuVA->MinimumAddress) { return STATUS_INVALID_PARAMETER; } } if (mapGpuVA->MaximumAddress != 0) { if (mapGpuVA->BaseAddress != 0 && mapGpuVA->BaseAddress > mapGpuVA->MaximumAddress) { return STATUS_INVALID_PARAMETER; } } if (mapGpuVA->BaseAddress == 0) { if (mapGpuVA->MinimumAddress) { mapGpuVA->VirtualAddress = mapGpuVA->MinimumAddress; } else { mapGpuVA->VirtualAddress = MemoryConstants::pageSize64k; } } else { if (MemoryConstants::maxSvmAddress != mapGpuVA->MaximumAddress && gLastCallReserveGpuVaArg.MinimumAddress != mapGpuVA->BaseAddress) { return STATUS_INVALID_PARAMETER; } mapGpuVA->VirtualAddress = mapGpuVA->BaseAddress; } if (gMapGpuVaFailConfigMax != 0) { if (gMapGpuVaFailConfigMax > gMapGpuVaFailConfigCount) { gMapGpuVaFailConfigCount++; return STATUS_UNSUCCESSFUL; } } mapGpuVA->PagingFenceValue = 1; return STATUS_PENDING; } NTSTATUS __stdcall D3DKMTReserveGpuVirtualAddress(IN OUT D3DDDI_RESERVEGPUVIRTUALADDRESS *reserveGpuVirtualAddress) { gLastCallReserveGpuVaArg = *reserveGpuVirtualAddress; reserveGpuVirtualAddress->VirtualAddress = reserveGpuVirtualAddress->MinimumAddress; return STATUS_SUCCESS; } NTSTATUS __stdcall D3DKMTQueryAdapterInfo(IN CONST D3DKMT_QUERYADAPTERINFO *queryAdapterInfo) { if (queryAdapterInfo == nullptr || queryAdapterInfo->hAdapter != ADAPTER_HANDLE) { return STATUS_INVALID_PARAMETER; } if (queryAdapterInfo->Type == KMTQAITYPE_UMDRIVERPRIVATE) { if (queryAdapterInfo->pPrivateDriverData == NULL) { return STATUS_INVALID_PARAMETER; } if (queryAdapterInfo->PrivateDriverDataSize == 0) { return STATUS_INVALID_PARAMETER; } } ADAPTER_INFO *adapterInfo = reinterpret_cast(queryAdapterInfo->pPrivateDriverData); adapterInfo->GfxPlatform = gAdapterInfo.GfxPlatform; adapterInfo->SystemInfo = gAdapterInfo.SystemInfo; adapterInfo->SkuTable = gAdapterInfo.SkuTable; adapterInfo->WaTable = gAdapterInfo.WaTable; adapterInfo->CacheLineSize = 64; adapterInfo->MinRenderFreq = 350; adapterInfo->MaxRenderFreq = 1150; adapterInfo->SizeOfDmaBuffer = 32768; adapterInfo->GfxMemorySize = 2181038080; adapterInfo->SystemSharedMemory = 4249540608; adapterInfo->SystemVideoMemory = 0; adapterInfo->GfxPartition.Standard.Base = gAdapterInfo.GfxPartition.Standard.Base; adapterInfo->GfxPartition.Standard.Limit = gAdapterInfo.GfxPartition.Standard.Limit; adapterInfo->GfxPartition.Standard64KB.Base = gAdapterInfo.GfxPartition.Standard64KB.Base; adapterInfo->GfxPartition.Standard64KB.Limit = gAdapterInfo.GfxPartition.Standard64KB.Limit; adapterInfo->GfxPartition.SVM.Base = gAdapterInfo.GfxPartition.SVM.Base; adapterInfo->GfxPartition.SVM.Limit = gAdapterInfo.GfxPartition.SVM.Limit; adapterInfo->GfxPartition.Heap32[0].Base = gAdapterInfo.GfxPartition.Heap32[0].Base; adapterInfo->GfxPartition.Heap32[0].Limit = gAdapterInfo.GfxPartition.Heap32[0].Limit; adapterInfo->GfxPartition.Heap32[1].Base = gAdapterInfo.GfxPartition.Heap32[1].Base; adapterInfo->GfxPartition.Heap32[1].Limit = gAdapterInfo.GfxPartition.Heap32[1].Limit; adapterInfo->GfxPartition.Heap32[2].Base = gAdapterInfo.GfxPartition.Heap32[2].Base; adapterInfo->GfxPartition.Heap32[2].Limit = gAdapterInfo.GfxPartition.Heap32[2].Limit; adapterInfo->GfxPartition.Heap32[3].Base = gAdapterInfo.GfxPartition.Heap32[3].Base; adapterInfo->GfxPartition.Heap32[3].Limit = gAdapterInfo.GfxPartition.Heap32[3].Limit; return STATUS_SUCCESS; } NTSTATUS __stdcall D3DKMTMakeResident(IN OUT D3DDDI_MAKERESIDENT *makeResident) { if (makeResident == nullptr || makeResident->hPagingQueue != PAGINGQUEUE_HANDLE) { return STATUS_INVALID_PARAMETER; } makeResident->PagingFenceValue = 0; return STATUS_PENDING; } static UINT totalPrivateSize = 0u; static UINT gmmSize = 0u; static void *gmmPtr = nullptr; static UINT numberOfAllocsToReturn = 0u; NTSTATUS __stdcall D3DKMTOpenResource(IN OUT D3DKMT_OPENRESOURCE *openResurce) { openResurce->hResource = RESOURCE_HANDLE; openResurce->pOpenAllocationInfo[0].hAllocation = ALLOCATION_HANDLE; openResurce->pOpenAllocationInfo[0].pPrivateDriverData = gmmPtr; return STATUS_SUCCESS; } NTSTATUS __stdcall D3DKMTOpenResourceFromNtHandle(IN OUT D3DKMT_OPENRESOURCEFROMNTHANDLE *openResurce) { openResurce->hResource = NT_RESOURCE_HANDLE; openResurce->pOpenAllocationInfo2[0].hAllocation = NT_ALLOCATION_HANDLE; openResurce->pOpenAllocationInfo2[0].pPrivateDriverData = gmmPtr; return STATUS_SUCCESS; } NTSTATUS __stdcall D3DKMTQueryResourceInfo(IN OUT D3DKMT_QUERYRESOURCEINFO *queryResourceInfo) { if (queryResourceInfo->hDevice != DEVICE_HANDLE || queryResourceInfo->hGlobalShare == INVALID_HANDLE) { return STATUS_INVALID_PARAMETER; } queryResourceInfo->TotalPrivateDriverDataSize = totalPrivateSize; queryResourceInfo->PrivateRuntimeDataSize = gmmSize; queryResourceInfo->NumAllocations = numberOfAllocsToReturn; return STATUS_SUCCESS; } NTSTATUS __stdcall D3DKMTQueryResourceInfoFromNtHandle(IN OUT D3DKMT_QUERYRESOURCEINFOFROMNTHANDLE *queryResourceInfo) { if (queryResourceInfo->hDevice != DEVICE_HANDLE || queryResourceInfo->hNtHandle == INVALID_HANDLE) { return STATUS_INVALID_PARAMETER; } queryResourceInfo->TotalPrivateDriverDataSize = totalPrivateSize; queryResourceInfo->PrivateRuntimeDataSize = gmmSize; queryResourceInfo->NumAllocations = numberOfAllocsToReturn; return STATUS_SUCCESS; } NTSTATUS __stdcall D3DKMTLock2(IN OUT D3DKMT_LOCK2 *lock2) { if (lock2->hAllocation == 0 || lock2->hDevice == 0) { return STATUS_INVALID_PARAMETER; } lock2->pData = (void *)65536; return STATUS_SUCCESS; } NTSTATUS __stdcall D3DKMTUnlock2(IN CONST D3DKMT_UNLOCK2 *unlock2) { if (unlock2->hAllocation == 0 || unlock2->hDevice == 0) { return STATUS_INVALID_PARAMETER; } return STATUS_SUCCESS; } static size_t cpuFence = 0; static bool createSynchronizationObject2FailCall = false; NTSTATUS __stdcall D3DKMTCreateSynchronizationObject2(IN OUT D3DKMT_CREATESYNCHRONIZATIONOBJECT2 *synchObject) { if (synchObject == nullptr) { return STATUS_INVALID_PARAMETER; } if (createSynchronizationObject2FailCall) { return STATUS_INVALID_PARAMETER; } synchObject->Info.MonitoredFence.FenceValueCPUVirtualAddress = &cpuFence; synchObject->Info.MonitoredFence.FenceValueGPUVirtualAddress = 3; synchObject->hSyncObject = 4; return STATUS_SUCCESS; } static D3DKMT_CREATEHWQUEUE createHwQueueData = {}; NTSTATUS __stdcall D3DKMTCreateHwQueue(IN OUT D3DKMT_CREATEHWQUEUE *createHwQueue) { createHwQueue->hHwQueueProgressFence = 1; createHwQueue->HwQueueProgressFenceCPUVirtualAddress = reinterpret_cast(2); createHwQueue->HwQueueProgressFenceGPUVirtualAddress = 3; createHwQueue->hHwQueue = 4; createHwQueueData = *createHwQueue; return STATUS_SUCCESS; } static D3DKMT_DESTROYHWQUEUE destroyHwQueueData = {}; NTSTATUS __stdcall D3DKMTDestroyHwQueue(IN CONST D3DKMT_DESTROYHWQUEUE *destroyHwQueue) { destroyHwQueueData = *destroyHwQueue; return STATUS_SUCCESS; } static D3DKMT_SUBMITCOMMANDTOHWQUEUE submitCommandToHwQueueData = {}; NTSTATUS __stdcall D3DKMTSubmitCommandToHwQueue(IN CONST D3DKMT_SUBMITCOMMANDTOHWQUEUE *submitCommandToHwQueue) { submitCommandToHwQueueData = *submitCommandToHwQueue; return STATUS_SUCCESS; } static D3DKMT_DESTROYSYNCHRONIZATIONOBJECT destroySynchronizationObjectData = {}; NTSTATUS __stdcall D3DKMTDestroySynchronizationObject(IN CONST D3DKMT_DESTROYSYNCHRONIZATIONOBJECT *destroySynchronizationObject) { destroySynchronizationObjectData = *destroySynchronizationObject; return STATUS_SUCCESS; } static bool registerTrimNotificationFailCall = false; NTSTATUS __stdcall D3DKMTRegisterTrimNotification(IN D3DKMT_REGISTERTRIMNOTIFICATION *registerTrimNotification) { if (registerTrimNotificationFailCall) { return STATUS_INVALID_PARAMETER; } registerTrimNotification->Handle = TRIM_CALLBACK_HANDLE; return STATUS_SUCCESS; } #ifdef __cplusplus } #endif NTSTATUS MockSetSizes(void *inGmmPtr, UINT inNumAllocsToReturn, UINT inGmmSize, UINT inTotalPrivateSize) { gmmSize = inGmmSize; gmmPtr = inGmmPtr; totalPrivateSize = inTotalPrivateSize; numberOfAllocsToReturn = inNumAllocsToReturn; return STATUS_SUCCESS; } NTSTATUS GetMockSizes(UINT &destroyAlloactionWithResourceHandleCalled, D3DKMT_DESTROYALLOCATION2 *&ptrDestroyAlloc) { destroyAlloactionWithResourceHandleCalled = DestroyAllocationWithResourceHandleCalled; ptrDestroyAlloc = &destroyalloc2; return NTSTATUS(); } D3DKMT_HANDLE GetMockLastDestroyedResHandle() { return LastDestroyedResourceHandle; } void SetMockLastDestroyedResHandle(D3DKMT_HANDLE handle) { LastDestroyedResourceHandle = handle; } D3DKMT_CREATEDEVICE GetMockCreateDeviceParams() { return CreateDeviceParams; } void SetMockCreateDeviceParams(D3DKMT_CREATEDEVICE params) { CreateDeviceParams = params; } D3DKMT_CREATEALLOCATION *getMockAllocation() { return &pallocation; } ADAPTER_INFO *getAdapterInfoAddress() { return &gAdapterInfo; } D3DDDI_MAPGPUVIRTUALADDRESS *getLastCallMapGpuVaArg() { return &gLastCallMapGpuVaArg; } D3DDDI_RESERVEGPUVIRTUALADDRESS *getLastCallReserveGpuVaArg() { return &gLastCallReserveGpuVaArg; } void setMapGpuVaFailConfig(uint32_t count, uint32_t max) { gMapGpuVaFailConfigCount = count; gMapGpuVaFailConfigMax = max; } D3DKMT_CREATECONTEXTVIRTUAL *getCreateContextData() { return &createContextData; } D3DKMT_CREATEHWQUEUE *getCreateHwQueueData() { return &createHwQueueData; } D3DKMT_DESTROYHWQUEUE *getDestroyHwQueueData() { return &destroyHwQueueData; } D3DKMT_SUBMITCOMMANDTOHWQUEUE *getSubmitCommandToHwQueueData() { return &submitCommandToHwQueueData; } D3DKMT_DESTROYSYNCHRONIZATIONOBJECT *getDestroySynchronizationObjectData() { return &destroySynchronizationObjectData; } VOID *getMonitorFenceCpuFenceAddress() { return &cpuFence; } bool *getCreateSynchronizationObject2FailCall() { return &createSynchronizationObject2FailCall; } bool *getRegisterTrimNotificationFailCall() { return ®isterTrimNotificationFailCall; } compute-runtime-20.13.16352/opencl/test/unit_test/mock_gdi/mock_gdi.h000066400000000000000000000107661363734646600253450ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/gmm_helper/gmm_lib.h" #include "shared/source/os_interface/windows/os_time_win.h" #include #include #include "Windows.h" #include "umKmInc/sharedata.h" #include #include #define DECL_FUNCTIONS() \ FUNCTION(OpenAdapterFromHdc, IN OUT D3DKMT_OPENADAPTERFROMHDC *) \ FUNCTION(DestroyAllocation, IN CONST D3DKMT_DESTROYALLOCATION *) \ FUNCTION(CloseAdapter, IN CONST D3DKMT_CLOSEADAPTER *) \ FUNCTION(Lock, IN OUT D3DKMT_LOCK *) \ FUNCTION(Unlock, IN CONST D3DKMT_UNLOCK *) \ FUNCTION(Render, IN OUT D3DKMT_RENDER *) \ FUNCTION(CreateSynchronizationObject, IN OUT D3DKMT_CREATESYNCHRONIZATIONOBJECT *) \ FUNCTION(SignalSynchronizationObject, IN CONST D3DKMT_SIGNALSYNCHRONIZATIONOBJECT *) \ FUNCTION(WaitForSynchronizationObject, IN OUT CONST D3DKMT_WAITFORSYNCHRONIZATIONOBJECT *) \ FUNCTION(WaitForSynchronizationObjectFromCpu, IN CONST D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMCPU *) \ FUNCTION(SignalSynchronizationObjectFromCpu, IN CONST D3DKMT_SIGNALSYNCHRONIZATIONOBJECTFROMCPU *) \ FUNCTION(WaitForSynchronizationObjectFromGpu, IN CONST D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMGPU *) \ FUNCTION(SignalSynchronizationObjectFromGpu, IN CONST D3DKMT_SIGNALSYNCHRONIZATIONOBJECTFROMGPU *) \ FUNCTION(FreeGpuVirtualAddress, IN CONST D3DKMT_FREEGPUVIRTUALADDRESS *) \ FUNCTION(UpdateGpuVirtualAddress, IN CONST D3DKMT_UPDATEGPUVIRTUALADDRESS *) \ FUNCTION(SubmitCommand, IN CONST D3DKMT_SUBMITCOMMAND *) \ FUNCTION(Evict, IN OUT D3DKMT_EVICT *) \ FUNCTION(GetDeviceState, IN OUT D3DKMT_GETDEVICESTATE *) \ FUNCTION(UnregisterTrimNotification, IN D3DKMT_UNREGISTERTRIMNOTIFICATION *) #define STR(X) #X #define FUNCTION(FUNC_NAME, FUNC_ARG) \ NTSTATUS __stdcall D3DKMT##FUNC_NAME(FUNC_ARG) { \ return STATUS_SUCCESS; \ } #define ADAPTER_HANDLE (static_cast(0x40001234)) #define DEVICE_HANDLE (static_cast(0x40004321)) #define PAGINGQUEUE_HANDLE (static_cast(0x40005678)) #define PAGINGQUEUE_SYNCOBJECT_HANDLE (static_cast(0x40008765)) #define CONTEXT_HANDLE (static_cast(0x40001111)) #define INVALID_HANDLE (static_cast(0)) #define RESOURCE_HANDLE (static_cast(0x80000000)) #define ALLOCATION_HANDLE (static_cast(0x80000008)) #define NT_RESOURCE_HANDLE (static_cast(0x80000001)) #define NT_ALLOCATION_HANDLE (static_cast(0x80000009)) #define TRIM_CALLBACK_HANDLE (reinterpret_cast(0x80123000010)) #define GPUVA (static_cast(0x80123000000)) NTSTATUS MockSetSizes(void *gmmPtr, UINT numAllocsToReturn, UINT gmmSize, UINT totalPrivateSize); NTSTATUS GetMockSizes(UINT &destroyAlloactionWithResourceHandleCalled, D3DKMT_DESTROYALLOCATION2 *&ptrDestroyAlloc); D3DKMT_HANDLE GetMockLastDestroyedResHandle(); void SetMockLastDestroyedResHandle(D3DKMT_HANDLE handle); D3DKMT_CREATEDEVICE GetMockCreateDeviceParams(); void SetMockCreateDeviceParams(D3DKMT_CREATEDEVICE params); D3DKMT_CREATEALLOCATION *getMockAllocation(); ADAPTER_INFO *getAdapterInfoAddress(); D3DDDI_MAPGPUVIRTUALADDRESS *getLastCallMapGpuVaArg(); D3DDDI_RESERVEGPUVIRTUALADDRESS *getLastCallReserveGpuVaArg(); void setMapGpuVaFailConfig(uint32_t count, uint32_t max); D3DKMT_CREATECONTEXTVIRTUAL *getCreateContextData(); D3DKMT_CREATEHWQUEUE *getCreateHwQueueData(); D3DKMT_DESTROYHWQUEUE *getDestroyHwQueueData(); D3DKMT_SUBMITCOMMANDTOHWQUEUE *getSubmitCommandToHwQueueData(); D3DKMT_DESTROYSYNCHRONIZATIONOBJECT *getDestroySynchronizationObjectData(); void InitGfxPartition(); VOID *getMonitorFenceCpuFenceAddress(); bool *getCreateSynchronizationObject2FailCall(); bool *getRegisterTrimNotificationFailCall(); compute-runtime-20.13.16352/opencl/test/unit_test/mock_gdi/mock_gdi_gfx_partition.cpp000066400000000000000000000022211363734646600306200ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/memory_constants.h" #include "mock_gdi.h" extern ADAPTER_INFO gAdapterInfo; void InitGfxPartition() { gAdapterInfo.GfxPartition.Standard.Base = 0x0000800400000000; gAdapterInfo.GfxPartition.Standard.Limit = 0x0000eeffffffffff; gAdapterInfo.GfxPartition.Standard64KB.Base = 0x0000b80200000000; gAdapterInfo.GfxPartition.Standard64KB.Limit = 0x0000efffffffffff; gAdapterInfo.GfxPartition.SVM.Base = 0; gAdapterInfo.GfxPartition.SVM.Limit = MemoryConstants::maxSvmAddress; gAdapterInfo.GfxPartition.Heap32[0].Base = 0x0000800000000000; gAdapterInfo.GfxPartition.Heap32[0].Limit = 0x00008000ffffefff; gAdapterInfo.GfxPartition.Heap32[1].Base = 0x0000800100000000; gAdapterInfo.GfxPartition.Heap32[1].Limit = 0x00008001ffffefff; gAdapterInfo.GfxPartition.Heap32[2].Base = 0x0000800200000000; gAdapterInfo.GfxPartition.Heap32[2].Limit = 0x00008002ffffefff; gAdapterInfo.GfxPartition.Heap32[3].Base = 0x0000800300000000; gAdapterInfo.GfxPartition.Heap32[3].Limit = 0x00008003ffffefff; } compute-runtime-20.13.16352/opencl/test/unit_test/mock_gl/000077500000000000000000000000001363734646600232455ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mock_gl/CMakeLists.txt000066400000000000000000000001421363734646600260020ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # add_subdirectories() compute-runtime-20.13.16352/opencl/test/unit_test/mock_gl/windows/000077500000000000000000000000001363734646600247375ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mock_gl/windows/CMakeLists.txt000066400000000000000000000014331363734646600275000ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(IGDRCL_SRCS_mock_opengl32 ${CMAKE_CURRENT_SOURCE_DIR}/mock_opengl32.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_opengl32.def ${NEO_SOURCE_DIR}/opencl/test/unit_test/helpers/windows/mock_function.cpp ) add_library(mock_opengl32 SHARED ${IGDRCL_SRCS_mock_opengl32}) add_dependencies(unit_tests mock_opengl32) add_dependencies(igdrcl_tests mock_opengl32) set_target_properties(mock_opengl32 PROPERTIES FOLDER "test mocks") target_include_directories(mock_opengl32 PRIVATE $) target_compile_definitions(mock_opengl32 PRIVATE $) endif() compute-runtime-20.13.16352/opencl/test/unit_test/mock_gl/windows/mock_opengl32.cpp000066400000000000000000000332201363734646600301050ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #define _GDI32_ //It causes that definitions of functions are not loaded from dll in file wingdi.h because they are in this file. #include "opencl/test/unit_test/helpers/windows/mock_function.h" #include "opencl/test/unit_test/mocks/gl/windows/mock_gl_sharing_windows.h" #include "GL/gl.h" #include extern "C" { const char *glString = "Intel"; const char *glVersion = "4.0"; const char *arrayStringi[2]{"GL_OES_framebuffer_object", "GL_EXT_framebuffer_object"}; int GLAcquireSharedBufferCalled = 0; int GLAcquireSharedRenderBufferCalled = 0; int GLAcquireSharedTextureCalled = 0; int GLDeleteContextCalled = 0; int GLGetCurrentContextCalled = 0; int GLGetCurrentDisplayCalled = 0; int GLGetSyncivCalled = 0; int GLMakeCurrentCalled = 0; int GLReleaseSharedBufferCalled = 0; int GLReleaseSharedRenderBufferCalled = 0; int GLReleaseSharedTextureCalled = 0; int GLReleaseSyncCalled = 0; int GLRetainSyncCalled = 0; int WGLCreateContextCalled = 0; int WGLDeleteContextCalled = 0; int WGLShareListsCalled = 0; CL_GL_BUFFER_INFO bufferInfoInput = {0}; CL_GL_BUFFER_INFO bufferInfoOutput = {0}; CL_GL_RESOURCE_INFO textureInfoInput = {0}; CL_GL_RESOURCE_INFO textureInfoOutput = {0}; NEO::GLMockReturnedValues glMockReturnedValues = {0}; GLboolean GLSetSharedOCLContextStateReturnedValue = 1u; const unsigned char *WINAPI glGetString(unsigned int name) { if (name == GL_VENDOR) return reinterpret_cast(glString); if (name == GL_VERSION) return reinterpret_cast(glVersion); return reinterpret_cast(""); }; GLboolean WINAPI wglSetSharedOCLContextStateINTELMock(HDC HDCHandle, HGLRC ContextHandle, unsigned char State, void *pContextInfo) { ((NEO::ContextInfo *)pContextInfo)->ContextHandle = 1; ((NEO::ContextInfo *)pContextInfo)->DeviceHandle = 2; return GLSetSharedOCLContextStateReturnedValue; }; GLboolean WINAPI mockGLAcquireSharedBuffer(GLDisplay, GLContext, GLContext, GLvoid *pResourceInfo) { auto pBufferInfo = (CL_GL_BUFFER_INFO *)pResourceInfo; bufferInfoInput = *pBufferInfo; pBufferInfo->bufferSize = bufferInfoOutput.bufferSize; pBufferInfo->globalShareHandle = bufferInfoOutput.globalShareHandle; pBufferInfo->pGmmResInfo = bufferInfoOutput.pGmmResInfo; pBufferInfo->bufferOffset = bufferInfoOutput.bufferOffset; GLAcquireSharedBufferCalled++; return (GLboolean)1; }; GLboolean WINAPI mockGLReleaseSharedBuffer(GLDisplay, GLContext, GLContext, GLvoid *pResourceInfo) { bufferInfoInput = *static_cast(pResourceInfo); GLReleaseSharedBufferCalled++; return (GLboolean)1; }; GLboolean WINAPI mockGLAcquireSharedRenderBuffer(GLDisplay, GLContext, GLContext, GLvoid *pResourceInfo) { auto pTextureInfo = (CL_GL_RESOURCE_INFO *)pResourceInfo; textureInfoInput = *pTextureInfo; pTextureInfo->globalShareHandle = textureInfoOutput.globalShareHandle; pTextureInfo->pGmmResInfo = textureInfoOutput.pGmmResInfo; pTextureInfo->glInternalFormat = GL_RGBA8; GLAcquireSharedRenderBufferCalled++; return (GLboolean)1; }; GLboolean WINAPI mockGLReleaseSharedRenderBuffer(GLDisplay, GLContext, GLContext, GLvoid *pResourceInfo) { textureInfoInput = *static_cast(pResourceInfo); GLReleaseSharedRenderBufferCalled++; return (GLboolean)1; }; GLboolean WINAPI mockGLAcquireSharedTexture(GLDisplay, GLContext, GLContext, GLvoid *pResourceInfo) { auto pTextureInfo = (CL_GL_RESOURCE_INFO *)pResourceInfo; textureInfoInput = *pTextureInfo; pTextureInfo->globalShareHandle = textureInfoOutput.globalShareHandle; pTextureInfo->globalShareHandleMCS = textureInfoOutput.globalShareHandleMCS; if (pTextureInfo->target == GL_TEXTURE_BUFFER) { // size and width for texture buffer are queried from textureInfo - not from gmm pTextureInfo->textureBufferSize = textureInfoOutput.textureBufferSize; pTextureInfo->textureBufferWidth = textureInfoOutput.textureBufferWidth; } pTextureInfo->pGmmResInfo = textureInfoOutput.pGmmResInfo; pTextureInfo->glInternalFormat = textureInfoOutput.glInternalFormat ? textureInfoOutput.glInternalFormat : GL_RGBA8; pTextureInfo->glHWFormat = textureInfoOutput.glHWFormat; pTextureInfo->textureBufferOffset = textureInfoOutput.textureBufferOffset; pTextureInfo->numberOfSamples = textureInfoOutput.numberOfSamples; GLAcquireSharedTextureCalled++; return (GLboolean)1; }; GLboolean WINAPI mockGLReleaseSharedTexture(GLDisplay, GLContext, GLContext, GLvoid *pResourceInfo) { textureInfoInput = *static_cast(pResourceInfo); GLReleaseSharedTextureCalled++; return (GLboolean)1; }; GLboolean WINAPI mockGlRetainSync(GLDisplay HDCHandle, GLContext ContextHandle, GLContext BackupContextHandle, GLvoid *pSyncInfo) { GLRetainSyncCalled++; GL_CL_SYNC_INFO *syncInfo = (GL_CL_SYNC_INFO *)(pSyncInfo); syncInfo->pSync = (void *)0x123; return GL_TRUE; }; GLboolean WINAPI mockGlReleaseSync(GLDisplay HDCHandle, GLContext ContextHandle, GLContext BackupContextHandle, GLvoid *pSync) { GLReleaseSyncCalled++; return GL_TRUE; }; void WINAPI mockGlGetSynciv(GLvoid *pSync, GLenum pname, GLint *value) { GLGetSyncivCalled++; *value = glMockReturnedValues.syncivRetVal; }; const unsigned char *_stdcall glGetStringiMock(unsigned int name, unsigned int index) { return reinterpret_cast(arrayStringi[index]); }; GLDisplay WINAPI mockGLGetCurrentDisplay() { GLGetCurrentDisplayCalled++; return glMockReturnedValues.currentDisplay; }; PROC WINAPI wglGetProcAddress(LPCSTR name) { if (strcmp(name, "wglSetSharedOCLContextStateINTEL") == 0) { return reinterpret_cast(*wglSetSharedOCLContextStateINTELMock); } if (strcmp(name, "wglAcquireSharedBufferINTEL") == 0) { return reinterpret_cast(*mockGLAcquireSharedBuffer); } if (strcmp(name, "wglReleaseSharedBufferINTEL") == 0) { return reinterpret_cast(*mockGLReleaseSharedBuffer); } if (strcmp(name, "wglAcquireSharedRenderBufferINTEL") == 0) { return reinterpret_cast(*mockGLAcquireSharedRenderBuffer); } if (strcmp(name, "wglReleaseSharedRenderBufferINTEL") == 0) { return reinterpret_cast(*mockGLReleaseSharedRenderBuffer); } if (strcmp(name, "wglAcquireSharedTextureINTEL") == 0) { return reinterpret_cast(*mockGLAcquireSharedTexture); } if (strcmp(name, "wglReleaseSharedTextureINTEL") == 0) { return reinterpret_cast(*mockGLReleaseSharedTexture); } if (strcmp(name, "wglRetainSyncINTEL") == 0) { return reinterpret_cast(*mockGlRetainSync); } if (strcmp(name, "wglReleaseSyncINTEL") == 0) { return reinterpret_cast(*mockGlReleaseSync); } if (strcmp(name, "wglGetSyncivINTEL") == 0) { return reinterpret_cast(*mockGlGetSynciv); } if (strcmp(name, "glGetStringi") == 0) { return reinterpret_cast(*glGetStringiMock); } return nullptr; } HGLRC WINAPI wglGetCurrentContext() { GLGetCurrentContextCalled++; return glMockReturnedValues.currentContext; }; HDC WINAPI wglGetCurrentDC() { return mockGLGetCurrentDisplay(); }; HGLRC WINAPI wglCreateContext(HDC Arg1) { WGLCreateContextCalled++; return (GLContext)0x101; }; BOOL WINAPI wglDeleteContext(HGLRC Arg1) { WGLDeleteContextCalled++; GLDeleteContextCalled++; return (GLboolean)1; }; void WINAPI glGetIntegerv(GLenum pname, GLint *params) { return NEO::MockGLSharingFunctions::glGetIntegervTest(pname, params); }; BOOL WINAPI wglShareLists(HGLRC arg1, HGLRC arg2) { WGLShareListsCalled++; return 1; }; BOOL WINAPI wglMakeCurrent(HDC arg1, HGLRC arg2) { GLMakeCurrentCalled++; glMockReturnedValues.madeCurrentContext = arg2; if (glMockReturnedValues.forceMakeCurrentCallFail) { if (glMockReturnedValues.failsCounter < glMockReturnedValues.numberOfCallFails) { glMockReturnedValues.failsCounter++; return GL_FALSE; } } return (GLboolean)1; }; void *WINAPI mockLoader(const char *name) { if (strcmp(name, "realFunction") == 0) { return *realFunction; } return nullptr; }; void resetParam(const char *name) { if (strcmp(name, "GLAcquireSharedBufferCalled") == 0) { GLAcquireSharedBufferCalled = 0; } if (strcmp(name, "GLAcquireSharedRenderBufferCalled") == 0) { GLAcquireSharedRenderBufferCalled = 0; } if (strcmp(name, "GLAcquireSharedTextureCalled") == 0) { GLAcquireSharedTextureCalled = 0; } if (strcmp(name, "GLDeleteContextCalled") == 0) { GLDeleteContextCalled = 0; } if (strcmp(name, "GLGetCurrentContextCalled") == 0) { GLGetCurrentContextCalled = 0; } if (strcmp(name, "GLGetCurrentDisplayCalled") == 0) { GLGetCurrentDisplayCalled = 0; } if (strcmp(name, "GLGetSyncivCalled") == 0) { GLGetSyncivCalled = 0; } if (strcmp(name, "GLMakeCurrentCalled") == 0) { GLMakeCurrentCalled = 0; } if (strcmp(name, "GLReleaseSharedBufferCalled") == 0) { GLReleaseSharedBufferCalled = 0; } if (strcmp(name, "GLReleaseSharedRenderBufferCalled") == 0) { GLReleaseSharedRenderBufferCalled = 0; } if (strcmp(name, "GLReleaseSharedTextureCalled") == 0) { GLReleaseSharedTextureCalled = 0; } if (strcmp(name, "GLReleaseSyncCalled") == 0) { GLReleaseSyncCalled = 0; } if (strcmp(name, "GLRetainSyncCalled") == 0) { GLRetainSyncCalled = 0; } if (strcmp(name, "WGLCreateContextCalled") == 0) { WGLCreateContextCalled = 0; } if (strcmp(name, "WGLDeleteContextCalled") == 0) { WGLDeleteContextCalled = 0; } if (strcmp(name, "WGLShareListsCalled") == 0) { WGLShareListsCalled = 0; } if (strcmp(name, "") == 0) { GLAcquireSharedBufferCalled = 0; GLAcquireSharedRenderBufferCalled = 0; GLAcquireSharedTextureCalled = 0; GLDeleteContextCalled = 0; GLGetCurrentContextCalled = 0; GLGetCurrentDisplayCalled = 0; GLGetSyncivCalled = 0; GLMakeCurrentCalled = 0; GLReleaseSharedBufferCalled = 0; GLReleaseSharedRenderBufferCalled = 0; GLReleaseSharedTextureCalled = 0; GLReleaseSyncCalled = 0; GLRetainSyncCalled = 0; WGLCreateContextCalled = 0; WGLDeleteContextCalled = 0; WGLShareListsCalled = 0; } }; int getParam(const char *name) { if (strcmp(name, "GLAcquireSharedBufferCalled") == 0) { return GLAcquireSharedBufferCalled; } if (strcmp(name, "GLAcquireSharedRenderBufferCalled") == 0) { return GLAcquireSharedRenderBufferCalled; } if (strcmp(name, "GLAcquireSharedTextureCalled") == 0) { return GLAcquireSharedTextureCalled; } if (strcmp(name, "GLDeleteContextCalled") == 0) { return GLDeleteContextCalled; } if (strcmp(name, "GLGetCurrentContextCalled") == 0) { return GLGetCurrentContextCalled; } if (strcmp(name, "GLGetCurrentDisplayCalled") == 0) { return GLGetCurrentDisplayCalled; } if (strcmp(name, "GLGetSyncivCalled") == 0) { return GLGetSyncivCalled; } if (strcmp(name, "GLMakeCurrentCalled") == 0) { return GLMakeCurrentCalled; } if (strcmp(name, "GLReleaseSharedBufferCalled") == 0) { return GLReleaseSharedBufferCalled; } if (strcmp(name, "GLReleaseSharedRenderBufferCalled") == 0) { return GLReleaseSharedRenderBufferCalled; } if (strcmp(name, "GLReleaseSharedTextureCalled") == 0) { return GLReleaseSharedTextureCalled; } if (strcmp(name, "GLReleaseSyncCalled") == 0) { return GLReleaseSyncCalled; } if (strcmp(name, "GLRetainSyncCalled") == 0) { return GLRetainSyncCalled; } if (strcmp(name, "WGLCreateContextCalled") == 0) { return WGLCreateContextCalled; } if (strcmp(name, "WGLDeleteContextCalled") == 0) { return WGLDeleteContextCalled; } if (strcmp(name, "WGLShareListsCalled") == 0) { return WGLShareListsCalled; } return 0; }; CL_GL_BUFFER_INFO getBufferInfo() { return bufferInfoInput; }; CL_GL_RESOURCE_INFO getTextureInfo() { return textureInfoInput; }; void memParam() { memset(&bufferInfoInput, 0, sizeof(CL_GL_BUFFER_INFO)); memset(&bufferInfoOutput, 0, sizeof(CL_GL_BUFFER_INFO)); memset(&textureInfoInput, 0, sizeof(CL_GL_RESOURCE_INFO)); memset(&textureInfoOutput, 0, sizeof(CL_GL_RESOURCE_INFO)); memset(&glMockReturnedValues, 0, sizeof(GLMockReturnedValues)); }; void loadBuffer(CL_GL_BUFFER_INFO buff) { bufferInfoOutput = buff; }; void loadTexture(CL_GL_RESOURCE_INFO texture) { textureInfoOutput = texture; }; NEO::GLMockReturnedValues getGlMockReturnedValues() { return glMockReturnedValues; }; void setGlMockReturnedValues(NEO::GLMockReturnedValues value) { glMockReturnedValues = value; }; void setGetSyncivReturnValue(int val) { glMockReturnedValues.syncivRetVal = val; } void glSetString(const char *name, unsigned int var) { if (var == GL_VENDOR) { glString = name; } else if (var == GL_VERSION) { glVersion = name; } }; void glSetStringi(const char *name, unsigned int index) { arrayStringi[index] = name; }; void setGLSetSharedOCLContextStateReturnedValue(GLboolean value) { GLSetSharedOCLContextStateReturnedValue = static_cast(value); }; GLboolean getGLSetSharedOCLContextStateReturnedValue() { return GLSetSharedOCLContextStateReturnedValue; }; } compute-runtime-20.13.16352/opencl/test/unit_test/mock_gl/windows/mock_opengl32.def000066400000000000000000000034631363734646600300670ustar00rootroot00000000000000; Copyright (c) 2017-2020, Intel Corporation ; ; Permission is hereby granted, free of charge, to any person obtaining a ; copy of this software and associated documentation files (the "Software"), ; to deal in the Software without restriction, including without limitation ; the rights to use, copy, modify, merge, publish, distribute, sublicense, ; and/or sell copies of the Software, and to permit persons to whom the ; Software is furnished to do so, subject to the following conditions: ; ; The above copyright notice and this permission notice shall be included ; in all copies or substantial portions of the Software. ; ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ; OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ; THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR ; OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ; ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ; OTHER DEALINGS IN THE SOFTWARE. LIBRARY "mock_opengl32" EXPORTS glGetString wglGetProcAddress glSetString mockLoader glGetStringiMock wglSetSharedOCLContextStateINTELMock wglGetCurrentContext wglGetCurrentDC glGetIntegerv wglCreateContext wglDeleteContext wglShareLists wglMakeCurrent glSetString glSetStringi mockGLAcquireSharedBuffer resetParam getParam loadBuffer getBufferInfo memParam setGLSetSharedOCLContextStateReturnedValue getGLSetSharedOCLContextStateReturnedValue mockGLAcquireSharedRenderBuffer mockGLReleaseSharedBuffer mockGLReleaseSharedRenderBuffer mockGLReleaseSharedTexture mockGLAcquireSharedTexture loadTexture getTextureInfo mockGLGetCurrentDisplay setGlMockReturnedValues getGlMockReturnedValues mockGlRetainSync mockGlReleaseSync mockGlGetSyncivcompute-runtime-20.13.16352/opencl/test/unit_test/mock_gmm/000077500000000000000000000000001363734646600234235ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mock_gmm/CMakeLists.txt000066400000000000000000000012041363734646600261600ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(target_name mock_gmm) project(${target_name}) # Setting up our local list of test files set(IGDRCL_SRCS_tests_mock_gmm ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/mock_gmm.cpp ) add_library(${target_name} EXCLUDE_FROM_ALL OBJECT ${IGDRCL_SRCS_tests_mock_gmm}) target_include_directories(${target_name} PRIVATE ${WDK_INCLUDE_PATHS} ${NEO__GMM_INCLUDE_DIR} ) create_project_source_tree(${target_name}) set_target_properties(${target_name} PROPERTIES FOLDER "test mocks") target_compile_definitions(${target_name} PUBLIC) compute-runtime-20.13.16352/opencl/test/unit_test/mock_gmm/mock_gmm.cpp000066400000000000000000000021261363734646600257210ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm_interface.h" namespace NEO { GMM_INIT_IN_ARGS passedInputArgs = {}; SKU_FEATURE_TABLE passedFtrTable = {}; WA_TABLE passedWaTable = {}; bool copyInputArgs = false; namespace GmmInterface { GMM_STATUS initialize(GMM_INIT_IN_ARGS *pInArgs, GMM_INIT_OUT_ARGS *pOutArgs) { pOutArgs->pGmmClientContext = reinterpret_cast(0x01); if (pInArgs) { if (pInArgs->Platform.eProductFamily == PRODUCT_FAMILY::IGFX_UNKNOWN && pInArgs->Platform.ePCHProductFamily == PCH_PRODUCT_FAMILY::PCH_UNKNOWN) { return GMM_ERROR; } if (copyInputArgs) { passedInputArgs = *pInArgs; passedFtrTable = *reinterpret_cast(pInArgs->pSkuTable); passedWaTable = *reinterpret_cast(pInArgs->pWaTable); } return GMM_SUCCESS; } return GMM_INVALIDPARAM; } void destroy(GMM_INIT_OUT_ARGS *pInArgs) { } } // namespace GmmInterface } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/000077500000000000000000000000001363734646600227465ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mocks/CMakeLists.txt000066400000000000000000000153511363734646600255130ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_compiler_mocks ${CMAKE_CURRENT_SOURCE_DIR}/mock_cif.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_cif.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_compilers.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_compilers.h ) if(WIN32) list(APPEND IGDRCL_SRCS_tests_compiler_mocks ${CMAKE_CURRENT_SOURCE_DIR}/mock_compilers_windows.cpp ) else() list(APPEND IGDRCL_SRCS_tests_compiler_mocks ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/mock_compilers_linux.cpp ) endif() set_property(GLOBAL PROPERTY IGDRCL_SRCS_tests_compiler_mocks ${IGDRCL_SRCS_tests_compiler_mocks}) set(IGDRCL_SRCS_tests_mocks ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/mock_allocation_properties.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_async_event_handler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_async_event_handler.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_aub_center.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_aub_csr.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_aub_file_stream.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_aub_manager.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_aub_stream.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_aub_subcapture_manager.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_block_kernel_manager.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_buffer.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_builtins.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_builtins.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_cl_device.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_cl_device.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_command_queue.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_context.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_context.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_csr.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_csr.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_deferrable_deletion.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_deferrable_deletion.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_deferred_deleter.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_deferred_deleter.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_device_queue.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_device.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_device.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_event.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_execution_environment.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_experimental_command_buffer.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_gfx_partition.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_gfx_partition.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_gmm.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_gmm_client_context_base.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_gmm_client_context_base.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_gmm_page_table_mngr.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_gmm_page_table_mngr.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_gmm_resource_info.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_gmm_resource_info.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_graphics_allocation.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/mock_gmm_client_context.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/mock_gmm_client_context.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_host_ptr_manager.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_internal_allocation_storage.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_image.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_kernel.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_kernel.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_lrca_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_memory_manager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_memory_manager.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_memory_operations_handler.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_os_context.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_ostime.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_physical_address_allocator.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_platform.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_platform.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_program.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_program.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_sampler.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_sharing_factory.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_sip.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_sip.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_source_level_debugger.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_submissions_aggregator.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_svm_manager.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_tbx_csr.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_tbx_stream.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_timestamp_container.h ${NEO_SHARED_DIRECTORY}/gmm_helper/page_table_mngr_impl.cpp ${IGDRCL_SRCS_tests_compiler_mocks} ) if(WIN32) file(GLOB IGDRCL_SRC_tests_mock_wddm "${CMAKE_CURRENT_SOURCE_DIR}/mock_wddm2[0-9]\.*") list(APPEND IGDRCL_SRCS_tests_mocks ${CMAKE_CURRENT_SOURCE_DIR}/mock_d3d_objects.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_ostime_win.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_gmm_memory_base.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_gmm_memory_base.h ${CMAKE_CURRENT_SOURCE_DIR}/gmm_memory${BRANCH_DIR_SUFFIX}/mock_gmm_memory.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_wddm_interface20.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_wddm_interface23.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_wddm.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_wddm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_wddm_residency_allocations_container.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_wddm_residency_logger.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_wddm_residency_logger_functions.h ${CMAKE_CURRENT_SOURCE_DIR}/wddm_mock_helpers.h ${IGDRCL_SRC_tests_mock_wddm} ) else() list(APPEND IGDRCL_SRCS_tests_mocks ${CMAKE_CURRENT_SOURCE_DIR}/linux/mock_drm_allocation.h ${CMAKE_CURRENT_SOURCE_DIR}/linux/mock_drm_memory_manager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/linux/mock_drm_memory_manager.h ${CMAKE_CURRENT_SOURCE_DIR}/linux/mock_drm_command_stream_receiver.h ${NEO_SHARED_DIRECTORY}/os_interface/linux/page_table_manager_functions.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/os_interface/linux/drm_mock.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/os_interface/linux/drm_mock.h ) endif() add_library(igdrcl_mocks STATIC EXCLUDE_FROM_ALL ${IGDRCL_SRCS_tests_mocks}) add_subdirectories() if(WIN32) target_include_directories(igdrcl_mocks PUBLIC ${NEO_SOURCE_DIR}/opencl/test/unit_test/mocks/gmm_memory${BRANCH_DIR_SUFFIX} ) endif() # add_dependencies(igdrcl_mocks gmock-gtest) get_property(NEO_CORE_tests_mocks GLOBAL PROPERTY NEO_CORE_tests_mocks) list(APPEND NEO_CORE_tests_mocks ${NEO_CORE_tests_mocks} ) target_sources(igdrcl_mocks PRIVATE ${NEO_CORE_tests_mocks}) target_include_directories(igdrcl_mocks PRIVATE $ $ ) target_compile_definitions(igdrcl_mocks PRIVATE MOCKABLE_VIRTUAL=virtual $) set_target_properties(igdrcl_mocks PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(igdrcl_mocks PROPERTIES FOLDER "test mocks") create_project_source_tree(igdrcl_mocks) compute-runtime-20.13.16352/opencl/test/unit_test/mocks/gl/000077500000000000000000000000001363734646600233505ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mocks/gl/CMakeLists.txt000066400000000000000000000001471363734646600261120ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # add_subdirectories() compute-runtime-20.13.16352/opencl/test/unit_test/mocks/gl/windows/000077500000000000000000000000001363734646600250425ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mocks/gl/windows/CMakeLists.txt000066400000000000000000000007101363734646600276000ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) list (APPEND IGDRCL_SRCS_tests_mocks_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/mock_gl_arb_sync_event_windows.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_gl_sharing_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_gl_sharing_windows.h ) target_sources(igdrcl_mocks PRIVATE ${IGDRCL_SRCS_tests_mocks_windows}) endif() compute-runtime-20.13.16352/opencl/test/unit_test/mocks/gl/windows/mock_gl_arb_sync_event_windows.h000066400000000000000000000024421363734646600334630ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/gl/gl_arb_sync_event.h" template struct DummyArbEvent : NEO::GlArbSyncEvent { DummyArbEvent(NEO::Context &ctx) : GlArbSyncEvent(ctx) { } using GlArbSyncEvent::baseEvent; using GlArbSyncEvent::glSyncInfo; using GlArbSyncEvent::osInterface; bool useBaseSetEvent = false; bool setBaseEvent(Event &ev) override { return GlArbSyncEvent::setBaseEvent(ev); } ~DummyArbEvent() override { GlArbSyncEvent::glSyncInfo.reset(); } static GlArbSyncEvent *create(Event &baseEv) { if (FailCreation) { return nullptr; } auto syncEv = new DummyArbEvent(*baseEv.getContext()); syncEv->baseEvent = &baseEv; return syncEv; } }; inline void glArbSyncObjectCleanupMockDoNothing(NEO::OSInterface &osInterface, CL_GL_SYNC_INFO *glSyncInfo) { } inline void glArbSyncObjectSignalMockDoNothing(NEO::OsContext &osContext, CL_GL_SYNC_INFO &glSyncInfo) { } template inline bool mockGlArbSyncObjectSetup(NEO::GLSharingFunctions &sharing, NEO::OSInterface &osInterface, CL_GL_SYNC_INFO &glSyncInfo) { return (Fail == false); } compute-runtime-20.13.16352/opencl/test/unit_test/mocks/gl/windows/mock_gl_sharing_windows.cpp000066400000000000000000000027711363734646600324550ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/gl/windows/mock_gl_sharing_windows.h" #include "config.h" namespace NEO { int EGLCreateContextCalled = 0; int EGLChooseConfigCalled = 0; int EGLDeleteContextCalled = 0; int GlxChooseFBConfigCalled = 0; int GlxQueryContextCalled = 0; int GlxCreateNewContextCalled = 0; int GlxDeleteContextCalled = 0; int GlxIsDirectCalled = 0; EGLBkpContextParams eglBkpContextParams = {0}; GLXBkpContextParams glxBkpContextParams = {0}; void GlSharingFunctionsMock::initMembers() { GLSharingFunctionsWindows::initGLFunctions(); glDllHelper dllParam; dllParam.setGLSetSharedOCLContextStateReturnedValue(1u); dllParam.resetParam(""); dllParam.loadTexture({0}); dllParam.loadBuffer({0}); EGLChooseConfigCalled = 0; EGLCreateContextCalled = 0; EGLDeleteContextCalled = 0; GlxChooseFBConfigCalled = 0; GlxQueryContextCalled = 0; GlxCreateNewContextCalled = 0; GlxDeleteContextCalled = 0; GlxIsDirectCalled = 0; memset(&eglBkpContextParams, 0, sizeof(EGLBkpContextParams)); memset(&glxBkpContextParams, 0, sizeof(GLXBkpContextParams)); } GlSharingFunctionsMock::GlSharingFunctionsMock() { initMembers(); } MockGlSharing::MockGlSharing(GLType glhdcType, GLContext glhglrcHandle, GLContext glhglrcHandleBkpCtx, GLDisplay glhdcHandle) { sharingFunctions->setHandles(glhdcType, glhglrcHandle, glhglrcHandleBkpCtx, glhdcHandle); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/gl/windows/mock_gl_sharing_windows.h000066400000000000000000000165021363734646600321170ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/extensions/public/cl_gl_private_intel.h" #include "opencl/source/sharings/gl/windows/gl_sharing_windows.h" #include "opencl/test/unit_test/os_interface/windows/gl/gl_dll_helper.h" #include "config.h" #include namespace NEO { struct EGLBkpContextParams { int32_t configAttrs; int32_t contextAttrs[3]; int32_t numConfigs; }; struct GLXBkpContextParams { int FBConfigAttrs; int queryAttribute; int renderType; }; struct GLMockReturnedValues { GLContext currentContext; GLDisplay currentDisplay; GLContext madeCurrentContext; bool forceMakeCurrentCallFail; int numberOfCallFails; int failsCounter; int syncivRetVal; }; extern int GLSetSharedOCLContextStateCalled; extern int EGLCreateContextCalled; extern int EGLDeleteContextCalled; extern int EGLChooseConfigCalled; extern int GlxChooseFBConfigCalled; extern int GlxQueryContextCalled; extern int GlxCreateNewContextCalled; extern int GlxDeleteContextCalled; extern int GlxIsDirectCalled; extern EGLBkpContextParams eglBkpContextParams; extern GLXBkpContextParams glxBkpContextParams; namespace glTextureTargets { static const unsigned int supportedTargets[] = { GL_TEXTURE_1D, GL_TEXTURE_1D_ARRAY, GL_TEXTURE_BUFFER, GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_RECTANGLE, GL_TEXTURE_CUBE_MAP_NEGATIVE_X, GL_TEXTURE_CUBE_MAP_POSITIVE_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, GL_TEXTURE_CUBE_MAP_POSITIVE_Z, GL_TEXTURE_3D, GL_RENDERBUFFER_EXT, GL_TEXTURE_2D_MULTISAMPLE, GL_TEXTURE_2D_MULTISAMPLE_ARRAY, }; } class GlSharingFunctionsMock : public GLSharingFunctionsWindows { void initMembers(); public: static GLboolean OSAPI mockGLSetSharedOCLContextState(GLDisplay, GLContext, GLboolean, GLvoid *pBufferInfo) { GLSetSharedOCLContextStateCalled++; return (GLboolean)1; }; ~GlSharingFunctionsMock() override = default; using GLSharingFunctionsWindows::GLAcquireSharedBuffer; using GLSharingFunctionsWindows::GLAcquireSharedRenderBuffer; using GLSharingFunctionsWindows::GLAcquireSharedTexture; using GLSharingFunctionsWindows::GLGetCurrentContext; using GLSharingFunctionsWindows::GLGetCurrentDisplay; using GLSharingFunctionsWindows::glGetIntegerv; using GLSharingFunctionsWindows::glGetString; using GLSharingFunctionsWindows::glGetStringi; using GLSharingFunctionsWindows::GLGetSynciv; using GLSharingFunctionsWindows::GLReleaseSharedBuffer; using GLSharingFunctionsWindows::GLReleaseSharedRenderBuffer; using GLSharingFunctionsWindows::GLReleaseSharedTexture; using GLSharingFunctionsWindows::GLReleaseSync; using GLSharingFunctionsWindows::GLRetainSync; using GLSharingFunctionsWindows::GLSetSharedOCLContextState; using GLSharingFunctionsWindows::isOpenGlExtensionSupported; using GLSharingFunctionsWindows::pfnWglCreateContext; using GLSharingFunctionsWindows::pfnWglDeleteContext; using GLSharingFunctionsWindows::pfnWglShareLists; using GLSharingFunctionsWindows::setSharedOCLContextState; using GLSharingFunctionsWindows::wglMakeCurrent; using GLSharingFunctionsWindows::glArbEventMapping; using GLSharingFunctionsWindows::GLContextHandle; using GLSharingFunctionsWindows::GLDeviceHandle; using GLSharingFunctionsWindows::getSupportedFormats; using GLSharingFunctionsWindows::pfnGlArbSyncObjectCleanup; using GLSharingFunctionsWindows::pfnGlArbSyncObjectSetup; using GLSharingFunctionsWindows::pfnGlArbSyncObjectSignal; using GLSharingFunctionsWindows::pfnGlArbSyncObjectWaitServer; GlSharingFunctionsMock(GLType GLHDCType, GLContext GLHGLRCHandle, GLContext GLHGLRCHandleBkpCtx, GLDisplay GLHDCHandle) : GLSharingFunctionsWindows(GLHDCType, GLHGLRCHandle, GLHGLRCHandleBkpCtx, GLHDCHandle) { initMembers(); updateOpenGLContext(); createBackupContext(); } GlSharingFunctionsMock(); void setHandles(GLType GLHDCType, GLContext GLHGLRCHandle, GLContext GLHGLRCHandleBkpCtx, GLDisplay GLHDCHandle) { this->GLHDCType = GLHDCType; this->GLHGLRCHandle = GLHGLRCHandle; this->GLHGLRCHandleBkpCtx = GLHGLRCHandleBkpCtx; this->GLHDCHandle = GLHDCHandle; } void setGLAcquireSharedBufferMock(PFNOGLAcquireSharedBufferINTEL mock) { GLAcquireSharedBuffer = mock; } void setGLAcquireSharedTextureMock(PFNOGLAcquireSharedTextureINTEL mock) { GLAcquireSharedTexture = mock; } }; class MockGlSharing { public: MockGlSharing() {} MockGlSharing(GLType GLHDCType, GLContext GLHGLRCHandle, GLContext GLHGLRCHandleBkpCtx, GLDisplay GLHDCHandle); void uploadDataToBufferInfo() { dllParam->loadBuffer(m_bufferInfoOutput); } void uploadDataToBufferInfo(unsigned int sharedHandle, int bufferOffset) { m_bufferInfoOutput.globalShareHandle = sharedHandle; m_bufferInfoOutput.bufferOffset = bufferOffset; dllParam->loadBuffer(m_bufferInfoOutput); } void uploadDataToTextureInfo() { dllParam->loadTexture(m_textureInfoOutput); } void uploadDataToTextureInfo(unsigned int sharedHandle) { m_textureInfoOutput.globalShareHandle = sharedHandle; dllParam->loadTexture(m_textureInfoOutput); } void uploadTextureBufferOffsetToTextureInfo(int texBufOffset) { m_textureInfoOutput.textureBufferOffset = texBufOffset; dllParam->loadTexture(m_textureInfoOutput); } void overrideGetCurrentValues(GLContext ctx, GLDisplay display, bool forceMakeCurrentFail = false, int numberOfFails = 0) { glMockReturnedValues.currentContext = ctx; glMockReturnedValues.currentDisplay = display; glMockReturnedValues.forceMakeCurrentCallFail = forceMakeCurrentFail; glMockReturnedValues.numberOfCallFails = numberOfFails; glMockReturnedValues.failsCounter = 0; dllParam->setGlMockReturnedValues(glMockReturnedValues); } void setGetSyncivReturnValue(int val) { glMockReturnedValues.syncivRetVal = val; dllParam->setGlMockReturnedValues(glMockReturnedValues); } std::unique_ptr sharingFunctions = std::make_unique(); std::unique_ptr dllParam = std::make_unique(); CL_GL_RESOURCE_INFO m_clGlResourceInfo = {0}; GL_CL_RESOURCE_INFO m_glClResourceInfo = {0}; CL_GL_BUFFER_INFO m_bufferInfoOutput = {0}; CL_GL_RESOURCE_INFO m_textureInfoOutput = {0}; GLMockReturnedValues glMockReturnedValues = {0}; }; class MockGLSharingFunctions : public GLSharingFunctionsWindows { public: using GLSharingFunctionsWindows::isOpenGlExtensionSupported; using GLSharingFunctionsWindows::setSharedOCLContextState; static bool SharingEnabled; static void OSAPI glGetIntegervTest(GLenum pname, GLint *data) { if (pname == GL_NUM_EXTENSIONS) *data = 2; }; using GLSharingFunctionsWindows::glGetIntegerv; using GLSharingFunctionsWindows::glGetString; std::unique_ptr dllParam = std::make_unique(); MockGLSharingFunctions() { GLSharingFunctionsWindows::initGLFunctions(); MockGLSharingFunctions::SharingEnabled = 1; } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/gmm_memory/000077500000000000000000000000001363734646600251165ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mocks/gmm_memory/mock_gmm_memory.h000066400000000000000000000006301363734646600304470ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/mocks/mock_gmm_memory_base.h" namespace NEO { class MockGmmMemory : public MockGmmMemoryBase { using MockGmmMemoryBase::MockGmmMemoryBase; }; class GmockGmmMemory : public GmockGmmMemoryBase { using GmockGmmMemoryBase::GmockGmmMemoryBase; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/linux/000077500000000000000000000000001363734646600241055ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mocks/linux/mock_drm_allocation.h000066400000000000000000000013341363734646600302570ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/linux/drm_allocation.h" #include "shared/source/os_interface/linux/drm_buffer_object.h" namespace NEO { class MockBufferObject : public BufferObject { public: using BufferObject::handle; MockBufferObject() : BufferObject(nullptr, 0, 0) { } }; class MockDrmAllocation : public DrmAllocation { public: using DrmAllocation::bufferObjects; using DrmAllocation::memoryPool; MockDrmAllocation(AllocationType allocationType, MemoryPool::Type pool) : DrmAllocation(0, allocationType, nullptr, nullptr, 0, static_cast(0), pool) { } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/linux/mock_drm_command_stream_receiver.h000066400000000000000000000036721363734646600330160ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/os_interface/linux/drm_command_stream.h" using namespace NEO; template class TestedDrmCommandStreamReceiver : public DrmCommandStreamReceiver { public: using CommandStreamReceiver::commandStream; using CommandStreamReceiver::makeResident; using DrmCommandStreamReceiver::makeResidentBufferObjects; using DrmCommandStreamReceiver::residency; using CommandStreamReceiverHw::CommandStreamReceiver::lastSentSliceCount; TestedDrmCommandStreamReceiver(gemCloseWorkerMode mode, ExecutionEnvironment &executionEnvironment) : DrmCommandStreamReceiver(executionEnvironment, 0, mode) { } TestedDrmCommandStreamReceiver(ExecutionEnvironment &executionEnvironment) : DrmCommandStreamReceiver(executionEnvironment, 0, gemCloseWorkerMode::gemCloseWorkerInactive) { } void overrideDispatchPolicy(DispatchMode overrideValue) { this->dispatchMode = overrideValue; } void makeNonResident(GraphicsAllocation &gfxAllocation) override { makeNonResidentResult.called = true; makeNonResidentResult.allocation = &gfxAllocation; DrmCommandStreamReceiver::makeNonResident(gfxAllocation); } struct MakeResidentNonResidentResult { bool called = false; GraphicsAllocation *allocation = nullptr; }; MakeResidentNonResidentResult makeNonResidentResult; SubmissionAggregator *peekSubmissionAggregator() const { return this->submissionAggregator.get(); } void overrideSubmissionAggregator(SubmissionAggregator *newSubmissionsAggregator) { this->submissionAggregator.reset(newSubmissionsAggregator); } std::vector &getExecStorage() { return this->execObjectsStorage; } }; compute-runtime-20.13.16352/opencl/test/unit_test/mocks/linux/mock_drm_memory_manager.cpp000066400000000000000000000067371363734646600315030ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/linux/mock_drm_memory_manager.h" #include "shared/source/os_interface/linux/allocator_helper.h" #include "shared/source/os_interface/linux/drm_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_host_ptr_manager.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include namespace NEO { off_t lseekReturn = 4096u; std::atomic lseekCalledCount(0); TestedDrmMemoryManager::TestedDrmMemoryManager(ExecutionEnvironment &executionEnvironment) : MemoryManagerCreate(gemCloseWorkerMode::gemCloseWorkerInactive, false, false, executionEnvironment) { this->lseekFunction = &lseekMock; this->closeFunction = &closeMock; lseekReturn = 4096; lseekCalledCount = 0; hostPtrManager.reset(new MockHostPtrManager); }; TestedDrmMemoryManager::TestedDrmMemoryManager(bool enableLocalMemory, bool allowForcePin, bool validateHostPtrMemory, ExecutionEnvironment &executionEnvironment) : MemoryManagerCreate(false, enableLocalMemory, gemCloseWorkerMode::gemCloseWorkerInactive, allowForcePin, validateHostPtrMemory, executionEnvironment) { this->lseekFunction = &lseekMock; this->closeFunction = &closeMock; lseekReturn = 4096; lseekCalledCount = 0; } void TestedDrmMemoryManager::injectPinBB(BufferObject *newPinBB) { BufferObject *currentPinBB = pinBBs[0u]; pinBBs[0u] = nullptr; DrmMemoryManager::unreference(currentPinBB, true); pinBBs[0u] = newPinBB; } DrmGemCloseWorker *TestedDrmMemoryManager::getgemCloseWorker() { return this->gemCloseWorker.get(); } void TestedDrmMemoryManager::forceLimitedRangeAllocator(uint64_t range) { getGfxPartition(0)->init(range, getSizeToReserve(), 0, 1); } void TestedDrmMemoryManager::overrideGfxPartition(GfxPartition *newGfxPartition) { gfxPartitions[0].reset(newGfxPartition); } DrmAllocation *TestedDrmMemoryManager::allocate32BitGraphicsMemory(uint32_t rootDeviceIndex, size_t size, const void *ptr, GraphicsAllocation::AllocationType allocationType) { bool allocateMemory = ptr == nullptr; AllocationData allocationData; MockAllocationProperties properties(rootDeviceIndex, allocateMemory, size, allocationType); getAllocationData(allocationData, properties, ptr, createStorageInfoFromProperties(properties)); return allocate32BitGraphicsMemoryImpl(allocationData); } TestedDrmMemoryManager::~TestedDrmMemoryManager() { DrmMemoryManager::commonCleanup(); } }; // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/linux/mock_drm_memory_manager.h000066400000000000000000000054721363734646600311430ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/linux/drm_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include namespace NEO { extern off_t lseekReturn; extern std::atomic lseekCalledCount; inline off_t lseekMock(int fd, off_t offset, int whence) noexcept { lseekCalledCount++; return lseekReturn; } inline int closeMock(int) { return 0; } class ExecutionEnvironment; class BufferObject; class DrmGemCloseWorker; class TestedDrmMemoryManager : public MemoryManagerCreate { public: using DrmMemoryManager::acquireGpuRange; using DrmMemoryManager::allocateGraphicsMemory; using DrmMemoryManager::allocateGraphicsMemory64kb; using DrmMemoryManager::allocateGraphicsMemoryForImage; using DrmMemoryManager::allocateGraphicsMemoryForNonSvmHostPtr; using DrmMemoryManager::allocateGraphicsMemoryWithAlignment; using DrmMemoryManager::allocateGraphicsMemoryWithHostPtr; using DrmMemoryManager::allocateShareableMemory; using DrmMemoryManager::AllocationData; using DrmMemoryManager::allocUserptr; using DrmMemoryManager::createGraphicsAllocation; using DrmMemoryManager::createSharedBufferObject; using DrmMemoryManager::eraseSharedBufferObject; using DrmMemoryManager::getDefaultDrmContextId; using DrmMemoryManager::getDrm; using DrmMemoryManager::getRootDeviceIndex; using DrmMemoryManager::gfxPartitions; using DrmMemoryManager::lockResourceInLocalMemoryImpl; using DrmMemoryManager::pinBBs; using DrmMemoryManager::pinThreshold; using DrmMemoryManager::pushSharedBufferObject; using DrmMemoryManager::releaseGpuRange; using DrmMemoryManager::setDomainCpu; using DrmMemoryManager::sharingBufferObjects; using DrmMemoryManager::supportsMultiStorageResources; using DrmMemoryManager::unlockResourceInLocalMemoryImpl; using MemoryManager::allocateGraphicsMemoryInDevicePool; using MemoryManager::registeredEngines; using MemoryManager::useInternal32BitAllocator; TestedDrmMemoryManager(ExecutionEnvironment &executionEnvironment); TestedDrmMemoryManager(bool enableLocalMemory, bool allowForcePin, bool validateHostPtrMemory, ExecutionEnvironment &executionEnvironment); void injectPinBB(BufferObject *newPinBB); DrmGemCloseWorker *getgemCloseWorker(); void forceLimitedRangeAllocator(uint64_t range); void overrideGfxPartition(GfxPartition *newGfxPartition); DrmAllocation *allocate32BitGraphicsMemory(uint32_t rootDeviceIndex, size_t size, const void *ptr, GraphicsAllocation::AllocationType allocationType); ~TestedDrmMemoryManager() override; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_allocation_properties.h000066400000000000000000000015661363734646600305410ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/memory_manager.h" namespace NEO { struct MockAllocationProperties : public AllocationProperties { MockAllocationProperties(uint32_t rootDeviceIndex, size_t size) : AllocationProperties(rootDeviceIndex, size, GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY) {} MockAllocationProperties(uint32_t rootDeviceIndex, bool allocateMemory, size_t size) : AllocationProperties(rootDeviceIndex, allocateMemory, size, GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY, false) {} MockAllocationProperties(uint32_t rootDeviceIndex, bool allocateMemory, size_t size, GraphicsAllocation::AllocationType allocationType) : AllocationProperties(rootDeviceIndex, allocateMemory, size, allocationType, false) {} }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_async_event_handler.cpp000066400000000000000000000003461363734646600305010ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_async_event_handler.h" namespace MockAsyncEventHandlerGlobals { bool destructorCalled = false; }compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_async_event_handler.h000066400000000000000000000033261363734646600301470ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/event/async_events_handler.h" #include #include #include #include #include using namespace NEO; namespace MockAsyncEventHandlerGlobals { extern bool destructorCalled; } class MockHandler : public AsyncEventsHandler { public: using AsyncEventsHandler::allowAsyncProcess; using AsyncEventsHandler::asyncMtx; using AsyncEventsHandler::asyncProcess; using AsyncEventsHandler::openThread; using AsyncEventsHandler::thread; ~MockHandler() override { if (!allowThreadCreating) { asyncProcess(this); // process once for cleanup } MockAsyncEventHandlerGlobals::destructorCalled = true; } MockHandler(bool allowAsync = false) : AsyncEventsHandler() { allowThreadCreating = allowAsync; transferCounter.store(0); MockAsyncEventHandlerGlobals::destructorCalled = false; } Event *process() { std::move(registerList.begin(), registerList.end(), std::back_inserter(list)); registerList.clear(); return processList(); } void transferRegisterList() override { transferCounter++; AsyncEventsHandler::transferRegisterList(); } void openThread() override { if (allowThreadCreating) { AsyncEventsHandler::openThread(); } openThreadCalled = true; } bool peekIsListEmpty() { return list.size() == 0; } bool peekIsRegisterListEmpty() { return registerList.size() == 0; } std::atomic transferCounter; bool openThreadCalled = false; bool allowThreadCreating = false; }; compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_aub_center.h000066400000000000000000000014271363734646600262430ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/aub/aub_center.h" #include "opencl/source/command_stream/aub_stream_provider.h" #include "opencl/test/unit_test/mocks/mock_aub_file_stream.h" namespace NEO { class MockAubStreamProvider : public AubStreamProvider { public: AubMemDump::AubFileStream *getStream() override { return &stream; } protected: MockAubFileStream stream; }; class MockAubCenter : public AubCenter { public: using AubCenter::AubCenter; using AubCenter::aubManager; using AubCenter::aubStreamMode; MockAubCenter() { streamProvider.reset(new MockAubStreamProvider()); } ~MockAubCenter() override = default; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_aub_csr.h000066400000000000000000000205101363734646600255440ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/preemption.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/hw_info.h" #include "shared/test/unit_test/helpers/default_hw_info.h" #include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "gmock/gmock.h" #include #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Winconsistent-missing-override" #endif namespace NEO { struct MockAubFileStreamMockMmioWrite : public AubMemDump::AubFileStream { void writeMMIOImpl(uint32_t offset, uint32_t value) override { mmioList.push_back(std::make_pair(offset, value)); } bool isOnMmioList(const MMIOPair &mmio) { bool mmioFound = false; for (auto &mmioPair : mmioList) { if (mmioPair.first == mmio.first && mmioPair.second == mmio.second) { mmioFound = true; break; } } return mmioFound; } std::vector> mmioList; }; template struct MockAubCsrToTestDumpContext : public AUBCommandStreamReceiverHw { using AUBCommandStreamReceiverHw::AUBCommandStreamReceiverHw; void addContextToken(uint32_t dumpHandle) override { handle = dumpHandle; } uint32_t handle = 0; }; template struct MockAubCsr : public AUBCommandStreamReceiverHw { using CommandStreamReceiverHw::defaultSshSize; using AUBCommandStreamReceiverHw::taskCount; using AUBCommandStreamReceiverHw::latestSentTaskCount; using AUBCommandStreamReceiverHw::pollForCompletionTaskCount; using AUBCommandStreamReceiverHw::writeMemory; using AUBCommandStreamReceiverHw::AUBCommandStreamReceiverHw; DispatchMode peekDispatchMode() const { return this->dispatchMode; } GraphicsAllocation *getTagAllocation() const { return this->tagAllocation; } void setLatestSentTaskCount(uint32_t latestSentTaskCount) { this->latestSentTaskCount = latestSentTaskCount; } bool flushBatchedSubmissions() override { flushBatchedSubmissionsCalled = true; return true; } void initProgrammingFlags() override { initProgrammingFlagsCalled = true; } void initializeEngine() override { AUBCommandStreamReceiverHw::initializeEngine(); initializeEngineCalled = true; } void writeMemory(uint64_t gpuAddress, void *cpuAddress, size_t size, uint32_t memoryBank, uint64_t entryBits) override { AUBCommandStreamReceiverHw::writeMemory(gpuAddress, cpuAddress, size, memoryBank, entryBits); writeMemoryCalled = true; } void submitBatchBuffer(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits) override { AUBCommandStreamReceiverHw::submitBatchBuffer(batchBufferGpuAddress, batchBuffer, batchBufferSize, memoryBank, entryBits); submitBatchBufferCalled = true; } void writeMemoryWithAubManager(GraphicsAllocation &graphicsAllocation) override { CommandStreamReceiverSimulatedHw::writeMemoryWithAubManager(graphicsAllocation); writeMemoryWithAubManagerCalled = true; } void pollForCompletion() override { AUBCommandStreamReceiverHw::pollForCompletion(); pollForCompletionCalled = true; } void expectMemoryEqual(void *gfxAddress, const void *srcAddress, size_t length) override { AUBCommandStreamReceiverHw::expectMemoryEqual(gfxAddress, srcAddress, length); expectMemoryEqualCalled = true; } void expectMemoryNotEqual(void *gfxAddress, const void *srcAddress, size_t length) override { AUBCommandStreamReceiverHw::expectMemoryNotEqual(gfxAddress, srcAddress, length); expectMemoryNotEqualCalled = true; } bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) override { return true; } void addAubComment(const char *message) override { AUBCommandStreamReceiverHw::addAubComment(message); addAubCommentCalled = true; } void dumpAllocation(GraphicsAllocation &gfxAllocation) override { AUBCommandStreamReceiverHw::dumpAllocation(gfxAllocation); dumpAllocationCalled = true; } bool isMultiOsContextCapable() const override { return multiOsContextCapable; } bool multiOsContextCapable = false; bool flushBatchedSubmissionsCalled = false; bool initProgrammingFlagsCalled = false; bool initializeEngineCalled = false; bool writeMemoryCalled = false; bool writeMemoryWithAubManagerCalled = false; bool submitBatchBufferCalled = false; bool pollForCompletionCalled = false; bool expectMemoryEqualCalled = false; bool expectMemoryNotEqualCalled = false; bool addAubCommentCalled = false; bool dumpAllocationCalled = false; void initFile(const std::string &fileName) override { fileIsOpen = true; openFileName = fileName; } void closeFile() override { fileIsOpen = false; openFileName = ""; } bool isFileOpen() const override { return fileIsOpen; } const std::string getFileName() override { return openFileName; } bool fileIsOpen = false; std::string openFileName = ""; MOCK_METHOD0(addPatchInfoComments, bool(void)); using CommandStreamReceiverHw::localMemoryEnabled; }; struct AubExecutionEnvironment { std::unique_ptr executionEnvironment; GraphicsAllocation *commandBuffer = nullptr; std::unique_ptr commandStreamReceiver; template CsrType *getCsr() { return static_cast(commandStreamReceiver.get()); } ~AubExecutionEnvironment() { if (commandBuffer) { executionEnvironment->memoryManager->freeGraphicsMemory(commandBuffer); } } }; template std::unique_ptr getEnvironment(bool createTagAllocation, bool allocateCommandBuffer, bool standalone) { std::unique_ptr executionEnvironment(new ExecutionEnvironment); executionEnvironment->prepareRootDeviceEnvironments(1); uint32_t rootDeviceIndex = 0u; executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->setHwInfo(defaultHwInfo.get()); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->aubCenter.reset(new AubCenter()); executionEnvironment->initializeMemoryManager(); auto commandStreamReceiver = std::make_unique("", standalone, *executionEnvironment, rootDeviceIndex); if (createTagAllocation) { commandStreamReceiver->initializeTagAllocation(); } auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext(commandStreamReceiver.get(), getChosenEngineType(*defaultHwInfo), 1, PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo), false, false, false); commandStreamReceiver->setupContext(*osContext); std::unique_ptr aubExecutionEnvironment(new AubExecutionEnvironment); if (allocateCommandBuffer) { aubExecutionEnvironment->commandBuffer = executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, MemoryConstants::pageSize}); } aubExecutionEnvironment->executionEnvironment = std::move(executionEnvironment); aubExecutionEnvironment->commandStreamReceiver = std::move(commandStreamReceiver); return aubExecutionEnvironment; } } // namespace NEO #if defined(__clang__) #pragma clang diagnostic pop #endif compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_aub_file_stream.h000066400000000000000000000062241363734646600272550ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/command_stream/aub_command_stream_receiver.h" #include "gmock/gmock.h" #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Winconsistent-missing-override" #endif namespace NEO { struct MockAubFileStream : public AUBCommandStreamReceiver::AubFileStream { bool init(uint32_t stepping, uint32_t device) override { initCalledCnt++; return true; } void open(const char *filePath) override { fileName.assign(filePath); openCalledCnt++; } void close() override { fileName.clear(); closeCalled = true; } bool isOpen() const override { isOpenCalled = true; return !fileName.empty(); } const std::string &getFileName() const override { getFileNameCalled = true; return fileName; } void flush() override { flushCalled = true; } std::unique_lock lockStream() override { lockStreamCalled = true; return AUBCommandStreamReceiver::AubFileStream::lockStream(); } void expectMMIO(uint32_t mmioRegister, uint32_t expectedValue) override { mmioRegisterFromExpectMMIO = mmioRegister; expectedValueFromExpectMMIO = expectedValue; } void expectMemory(uint64_t physAddress, const void *memory, size_t size, uint32_t addressSpace, uint32_t compareOperation) override { physAddressCapturedFromExpectMemory = physAddress; memoryCapturedFromExpectMemory = reinterpret_cast(memory); sizeCapturedFromExpectMemory = size; addressSpaceCapturedFromExpectMemory = addressSpace; compareOperationFromExpectMemory = compareOperation; } bool addComment(const char *message) override { receivedComment.assign(message); addCommentCalled = true; return true; } void registerPoll(uint32_t registerOffset, uint32_t mask, uint32_t value, bool pollNotEqual, uint32_t timeoutAction) override { registerPollCalled = true; AUBCommandStreamReceiver::AubFileStream::registerPoll(registerOffset, mask, value, pollNotEqual, timeoutAction); } uint32_t openCalledCnt = 0; std::string fileName = ""; bool closeCalled = false; uint32_t initCalledCnt = 0; mutable bool isOpenCalled = false; mutable bool getFileNameCalled = false; bool registerPollCalled = false; bool addCommentCalled = false; std::string receivedComment = ""; bool flushCalled = false; bool lockStreamCalled = false; uint32_t mmioRegisterFromExpectMMIO = 0; uint32_t expectedValueFromExpectMMIO = 0; uint64_t physAddressCapturedFromExpectMemory = 0; uintptr_t memoryCapturedFromExpectMemory = 0; size_t sizeCapturedFromExpectMemory = 0; uint32_t addressSpaceCapturedFromExpectMemory = 0; uint32_t compareOperationFromExpectMemory = 0; }; struct GmockAubFileStream : public AUBCommandStreamReceiver::AubFileStream { MOCK_METHOD1(addComment, bool(const char *message)); }; } // namespace NEO #if defined(__clang__) #pragma clang diagnostic pop #endif compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_aub_manager.h000066400000000000000000000127771363734646600264070ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "third_party/aub_stream/headers/aub_manager.h" #include "third_party/aub_stream/headers/aubstream.h" #include "third_party/aub_stream/headers/hardware_context.h" struct MockHardwareContext : public aub_stream::HardwareContext { using SurfaceInfo = aub_stream::SurfaceInfo; MockHardwareContext(uint32_t deviceIndex) : deviceIndex(deviceIndex) {} ~MockHardwareContext() override {} void initialize() override { initializeCalled = true; } void pollForCompletion() override { pollForCompletionCalled = true; } void writeAndSubmitBatchBuffer(uint64_t gfxAddress, const void *batchBuffer, size_t size, uint32_t memoryBank, size_t pageSize) override { writeAndSubmitCalled = true; } void submitBatchBuffer(uint64_t gfxAddress, bool overrideRingHead) override { submitCalled = true; } void writeMemory(uint64_t gfxAddress, const void *memory, size_t size, uint32_t memoryBanks, int hint, size_t pageSize) override { writeMemoryCalled = true; writeMemoryPageSizePassed = pageSize; memoryBanksPassed = memoryBanks; } void freeMemory(uint64_t gfxAddress, size_t size) override { freeMemoryCalled = true; } void expectMemory(uint64_t gfxAddress, const void *memory, size_t size, uint32_t compareOperation) override { expectMemoryCalled = true; } void readMemory(uint64_t gfxAddress, void *memory, size_t size, uint32_t memoryBank, size_t pageSize) override { readMemoryCalled = true; } void dumpBufferBIN(uint64_t gfxAddress, size_t size) override { dumpBufferBINCalled = true; } void dumpSurface(const SurfaceInfo &surfaceInfo) override { dumpSurfaceCalled = true; } bool initializeCalled = false; bool pollForCompletionCalled = false; bool writeAndSubmitCalled = false; bool submitCalled = false; bool writeMemoryCalled = false; bool freeMemoryCalled = false; bool expectMemoryCalled = false; bool readMemoryCalled = false; bool dumpBufferBINCalled = false; bool dumpSurfaceCalled = false; size_t writeMemoryPageSizePassed = 0; uint32_t memoryBanksPassed = 0; const uint32_t deviceIndex; }; class MockAubManager : public aub_stream::AubManager { using HardwareContext = aub_stream::HardwareContext; using PageInfo = aub_stream::PageInfo; public: MockAubManager(){}; MockAubManager(uint32_t productFamily, uint32_t devicesCount, uint64_t memoryBankSize, bool localMemorySupported, uint32_t streamMode, uint64_t gpuAddressSpace) { mockAubManagerParams.productFamily = productFamily; mockAubManagerParams.devicesCount = devicesCount; mockAubManagerParams.memoryBankSize = memoryBankSize; mockAubManagerParams.localMemorySupported = localMemorySupported; mockAubManagerParams.streamMode = streamMode; mockAubManagerParams.gpuAddressSpace = gpuAddressSpace; } ~MockAubManager() override {} HardwareContext *createHardwareContext(uint32_t device, uint32_t engine) { return createHardwareContext(device, engine, 0); } HardwareContext *createHardwareContext(uint32_t device, uint32_t engine, uint32_t flags) override { contextFlags = flags; return new MockHardwareContext(device); } void open(const std::string &aubFileName) override { fileName.assign(aubFileName); openCalledCnt++; } void close() override { fileName.clear(); closeCalled = true; } bool isOpen() override { isOpenCalled = true; return !fileName.empty(); } const std::string getFileName() override { getFileNameCalled = true; return fileName; } void pause(bool onoff) override { isPaused = onoff; } void addComment(const char *message) override { receivedComment.assign(message); addCommentCalled = true; } void writeMemory(uint64_t gfxAddress, const void *memory, size_t size, uint32_t memoryBanks, int hint, size_t pageSize) override { writeMemoryCalled = true; hintToWriteMemory = hint; writeMemoryPageSizePassed = pageSize; } void writePageTableEntries(uint64_t gfxAddress, size_t size, uint32_t memoryBanks, int hint, std::vector &lastLevelPages, size_t pageSize) override { writePageTableEntriesCalled = true; } void writePhysicalMemoryPages(const void *memory, std::vector &pages, size_t size, int hint) override { writePhysicalMemoryPagesCalled = true; } void freeMemory(uint64_t gfxAddress, size_t size) override { freeMemoryCalled = true; } uint32_t openCalledCnt = 0; std::string fileName = ""; bool closeCalled = false; bool isOpenCalled = false; bool getFileNameCalled = false; bool isPaused = false; bool addCommentCalled = false; std::string receivedComment = ""; bool writeMemoryCalled = false; bool writePageTableEntriesCalled = false; bool writePhysicalMemoryPagesCalled = false; bool freeMemoryCalled = false; uint32_t contextFlags = 0; int hintToWriteMemory = 0; size_t writeMemoryPageSizePassed = 0; struct MockAubManagerParams { uint32_t productFamily = 0; int32_t devicesCount = 0; uint64_t memoryBankSize = 0; bool localMemorySupported = false; uint32_t streamMode = 0xFFFFFFFF; uint64_t gpuAddressSpace = 0xFFFFFFFFFFFF; } mockAubManagerParams; protected: HardwareContext *hardwareContext = nullptr; }; compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_aub_stream.h000066400000000000000000000027611363734646600262600ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/aub_mem_dump/aub_mem_dump.h" #include "opencl/source/gen_common/aub_mapper_base.h" namespace NEO { struct MockAubStreamMockMmioWrite : AubMemDump::AubStream { void open(const char *filePath) override{}; void close() override{}; bool init(uint32_t stepping, uint32_t device) override { return true; }; void writeMemory(uint64_t physAddress, const void *memory, size_t sizeToDumpThisIteration, uint32_t addressSpace, uint32_t hint) override{}; void writeMemoryWriteHeader(uint64_t physAddress, size_t size, uint32_t addressSpace, uint32_t hint) override{}; void writePTE(uint64_t physAddress, uint64_t entry, uint32_t addressSpace) override{}; void writeGTT(uint32_t offset, uint64_t entry) override{}; void registerPoll(uint32_t registerOffset, uint32_t mask, uint32_t value, bool pollNotEqual, uint32_t timeoutAction) override{}; void writeMMIOImpl(uint32_t offset, uint32_t value) override { mmioList.push_back(std::make_pair(offset, value)); } bool isOnMmioList(const MMIOPair &mmio) { bool mmioFound = false; for (auto &mmioPair : mmioList) { if (mmioPair.first == mmio.first && mmioPair.second == mmio.second) { mmioFound = true; break; } } return mmioFound; } std::vector> mmioList; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_aub_subcapture_manager.h000066400000000000000000000037331363734646600306340ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/command_stream/aub_subcapture.h" using namespace NEO; class AubSubCaptureManagerMock : public AubSubCaptureManager { public: using AubSubCaptureManager::AubSubCaptureManager; void setSubCaptureIsActive(bool on) { subCaptureIsActive = on; } bool isSubCaptureActive() const { return subCaptureIsActive; } void setSubCaptureWasActiveInPreviousEnqueue(bool on) { subCaptureWasActiveInPreviousEnqueue = on; } bool wasSubCaptureActiveInPreviousEnqueue() const { return subCaptureWasActiveInPreviousEnqueue; } void setKernelCurrentIndex(uint32_t index) { kernelCurrentIdx = index; } uint32_t getKernelCurrentIndex() const { return kernelCurrentIdx; } bool getUseToggleFileName() const { return useToggleFileName; } const std::string &getInitialFileName() const { return initialFileName; } const std::string &getCurrentFileName() const { return currentFileName; } SettingsReader *getSettingsReader() const { return settingsReader.get(); } void setSubCaptureToggleActive(bool on) { isToggledOn = on; } bool isSubCaptureToggleActive() const override { return isToggledOn; } void setToggleFileName(const std::string &fileName) { toggleFileName = fileName; } std::string getToggleFileName() const override { return toggleFileName; } std::unique_lock lock() const override { isLocked = true; return std::unique_lock{mutex}; } using AubSubCaptureManager::generateFilterFileName; using AubSubCaptureManager::generateToggleFileName; using AubSubCaptureManager::getAubCaptureFileName; mutable bool isLocked = false; protected: bool isToggledOn = false; std::string toggleFileName = ""; }; compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_block_kernel_manager.h000066400000000000000000000006411363734646600302550ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/program/block_kernel_manager.h" namespace NEO { class MockBlockKernelManager : public BlockKernelManager { public: MockBlockKernelManager() = default; using BlockKernelManager::blockKernelInfoArray; using BlockKernelManager::blockPrivateSurfaceArray; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_buffer.h000066400000000000000000000103651363734646600254060ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/aligned_memory.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" using namespace NEO; class MockBufferStorage { public: MockBufferStorage() : mockGfxAllocation(data, sizeof(data) / 2) { } MockBufferStorage(bool unaligned) : mockGfxAllocation(unaligned ? alignUp(&data, 4) : alignUp(&data, 64), sizeof(data) / 2) { } char data[128]; MockGraphicsAllocation mockGfxAllocation; std::unique_ptr device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(nullptr)); }; class MockBuffer : public MockBufferStorage, public Buffer { public: using Buffer::magic; using Buffer::offset; using Buffer::size; using MemObj::isZeroCopy; using MockBufferStorage::device; MockBuffer(GraphicsAllocation &alloc) : MockBufferStorage(), Buffer(nullptr, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_USE_HOST_PTR, 0, 0), CL_MEM_USE_HOST_PTR, 0, alloc.getUnderlyingBufferSize(), alloc.getUnderlyingBuffer(), alloc.getUnderlyingBuffer(), &alloc, true, false, false), externalAlloc(&alloc) { } MockBuffer() : MockBufferStorage(), Buffer(nullptr, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_USE_HOST_PTR, 0, 0), CL_MEM_USE_HOST_PTR, 0, sizeof(data), &data, &data, &mockGfxAllocation, true, false, false) { } ~MockBuffer() override { if (externalAlloc != nullptr) { // no ownership over graphics allocation // return to mock allocations this->graphicsAllocation = &this->mockGfxAllocation; } } void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly) override { Buffer::setSurfaceState(device.get(), memory, getSize(), getCpuAddress(), 0, (externalAlloc != nullptr) ? externalAlloc : &mockGfxAllocation, 0, 0); } GraphicsAllocation *externalAlloc = nullptr; }; class AlignedBuffer : public MockBufferStorage, public Buffer { public: using MockBufferStorage::device; AlignedBuffer() : MockBufferStorage(false), Buffer(nullptr, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_USE_HOST_PTR, 0, 0), CL_MEM_USE_HOST_PTR, 0, sizeof(data) / 2, alignUp(&data, 64), alignUp(&data, 64), &mockGfxAllocation, true, false, false) { } AlignedBuffer(GraphicsAllocation *gfxAllocation) : MockBufferStorage(), Buffer(nullptr, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_USE_HOST_PTR, 0, 0), CL_MEM_USE_HOST_PTR, 0, sizeof(data) / 2, alignUp(&data, 64), alignUp(&data, 64), gfxAllocation, true, false, false) { } void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly) override { Buffer::setSurfaceState(device.get(), memory, getSize(), getCpuAddress(), 0, &mockGfxAllocation, 0, 0); } }; class UnalignedBuffer : public MockBufferStorage, public Buffer { public: using MockBufferStorage::device; UnalignedBuffer() : MockBufferStorage(true), Buffer(nullptr, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_USE_HOST_PTR, 0, 0), CL_MEM_USE_HOST_PTR, 0, sizeof(data) / 2, alignUp(&data, 4), alignUp(&data, 4), &mockGfxAllocation, false, false, false) { } UnalignedBuffer(GraphicsAllocation *gfxAllocation) : MockBufferStorage(true), Buffer(nullptr, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_USE_HOST_PTR, 0, 0), CL_MEM_USE_HOST_PTR, 0, sizeof(data) / 2, alignUp(&data, 4), alignUp(&data, 4), gfxAllocation, false, false, false) { } void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly) override { Buffer::setSurfaceState(device.get(), memory, getSize(), getCpuAddress(), 0, &mockGfxAllocation, 0, 0); } }; class MockPublicAccessBuffer : public Buffer { public: using Buffer::getGraphicsAllocationType; }; compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_builtin_dispatch_info_builder.h000066400000000000000000000032251363734646600322000ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "opencl/source/helpers/dispatch_info.h" using namespace NEO; class MockBuiltinDispatchInfoBuilder : public BuiltinDispatchInfoBuilder { public: MockBuiltinDispatchInfoBuilder(BuiltIns &kernelLib, BuiltinDispatchInfoBuilder *origBuilder) : BuiltinDispatchInfoBuilder(kernelLib), originalBuilder(origBuilder) { } virtual void validateInput(const BuiltinOpParams &conf) const {}; bool buildDispatchInfos(MultiDispatchInfo &mdi, const BuiltinOpParams &conf) const override { validateInput(conf); builtinOpParams = conf; originalBuilder->buildDispatchInfos(mdi, conf); for (auto &di : mdi) { multiDispatchInfo.push(di); } return true; } const BuiltinOpParams *getBuiltinOpParams() const { return &builtinOpParams; }; const MultiDispatchInfo *getMultiDispatchInfo() const { return &multiDispatchInfo; }; void setFailingArgIndex(uint32_t index) { withFailureInjection = true; failingArgIndex = index; } bool setExplicitArg(uint32_t argIndex, size_t argSize, const void *argVal, cl_int &err) const override { err = (withFailureInjection && argIndex == failingArgIndex) ? CL_INVALID_ARG_VALUE : CL_SUCCESS; return false; } protected: mutable BuiltinOpParams builtinOpParams; mutable MultiDispatchInfo multiDispatchInfo; BuiltinDispatchInfoBuilder *originalBuilder; bool withFailureInjection = false; uint32_t failingArgIndex = 0; }; compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_builtins.cpp000066400000000000000000000012651363734646600263200ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_builtins.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" namespace NEO { std::unique_ptr MockBuiltins::setBuiltinDispatchInfoBuilder(EBuiltInOps::Type operation, Context &context, Device &device, std::unique_ptr builder) { uint32_t operationId = static_cast(operation); auto &operationBuilder = BuiltinOpsBuilders[operationId]; std::call_once(operationBuilder.second, [] {}); operationBuilder.first.swap(builder); return builder; } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_builtins.h000066400000000000000000000024201363734646600257570ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "shared/source/built_ins/sip.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/program/program.h" #include namespace NEO { class MockBuiltins : public BuiltIns { public: const SipKernel &getSipKernel(SipKernelType type, Device &device) override { if (sipKernelsOverride.find(type) != sipKernelsOverride.end()) { return *sipKernelsOverride[type]; } getSipKernelCalled = true; getSipKernelType = type; return BuiltIns::getSipKernel(type, device); } void overrideSipKernel(std::unique_ptr kernel) { sipKernelsOverride[kernel->getType()] = std::move(kernel); } std::unique_ptr setBuiltinDispatchInfoBuilder(EBuiltInOps::Type operation, Context &context, Device &device, std::unique_ptr builder); BuiltIns *originalGlobalBuiltins = nullptr; std::map> sipKernelsOverride; bool getSipKernelCalled = false; SipKernelType getSipKernelType = SipKernelType::COUNT; }; } // namespace NEOcompute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_cif.cpp000066400000000000000000000052441363734646600252310ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_cif.h" #include "cif/builtins/memory/buffer/buffer.h" #include "cif/export/library_api.h" namespace NEO { bool failCreateCifMain = false; } namespace CIF { namespace Builtins { template Buffer<0>::Buffer(ArgsT &&... args) { } Buffer<0>::~Buffer() { } void BufferSimple::SetAllocator(CIF::Builtins::AllocatorT allocator, CIF::Builtins::DeallocatorT deallocator, CIF::Builtins::ReallocatorT reallocator) { } void BufferSimple::SetUnderlyingStorage(void *memory, size_t size, CIF::Builtins::DeallocatorT deallocator) { } void BufferSimple::SetUnderlyingStorage(const void *memory, size_t size) { } void *BufferSimple::DetachAllocation() { return nullptr; } const void *BufferSimple::GetMemoryRaw() const { return nullptr; } void *BufferSimple::GetMemoryRawWriteable() { return nullptr; } size_t BufferSimple::GetSizeRaw() const { return 0; } size_t BufferSimple::GetCapacityRaw() const { return 0; } bool BufferSimple::Resize(size_t newSize) { return false; } bool BufferSimple::Reserve(size_t newCapacity) { return false; } void BufferSimple::Clear() { } void BufferSimple::Deallocate() { } bool BufferSimple::AlignUp(uint32_t alignment) { return false; } bool BufferSimple::PushBackRawBytes(const void *newData, size_t size) { return false; } bool BufferSimple::IsConst() const { return false; } } // namespace Builtins } // namespace CIF namespace NEO { std::map MockCIFMain::globalCreators; bool MockCIFBuffer::failAllocations = false; CIF::ICIF *MockCIFBuffer::Create(CIF::InterfaceId_t intId, CIF::Version_t version) { if (failAllocations) { return nullptr; } if (version != CIF::Builtins::BufferSimple::GetVersion()) { return nullptr; } return new MockCIFBuffer(); } MockCIFBuffer::MockCIFBuffer() { } MockCIFMain::MockCIFMain() { defaultCreators[CIF::Builtins::BufferSimple::GetInterfaceId()] = MockCIFBuffer::Create; } CIF::ICIF *MockCIFMain::CreateInterfaceImpl(CIF::InterfaceId_t intId, CIF::Version_t version) { auto it = globalCreators.find(intId); if ((it == globalCreators.end()) || (it->second == nullptr)) { it = defaultCreators.find(intId); if ((it == defaultCreators.end()) || (it->second == nullptr)) { return nullptr; } } return it->second(intId, version); } } // namespace NEO extern CIF::CIFMain *CreateCIFMainImpl() { if (NEO::failCreateCifMain) { return nullptr; } return new NEO::MockCIFMain; } compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_cif.h000066400000000000000000000135141363734646600246750ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "cif/builtins/memory/buffer/buffer.h" #include "cif/common/cif.h" #include "cif/common/cif_main.h" #include "cif/common/id.h" #include namespace NEO { extern bool failCreateCifMain; using CreatorFuncT = CIF::ICIF *(*)(CIF::InterfaceId_t intId, CIF::Version_t version); template struct MockCIF : BaseType { void Release() override { auto prev = refCount--; assert(prev >= 1); if (prev == 1) { delete this; } } void Retain() override { ++refCount; } uint32_t GetRefCount() const override { return refCount; } CIF::Version_t GetEnabledVersion() const override { return 1; } bool GetSupportedVersions(CIF::InterfaceId_t intId, CIF::Version_t &verMin, CIF::Version_t &verMax) const override { verMin = 1; verMax = CIF::MaxVersion; return true; // by default : no sub-interface are supported } uint32_t refCount = 1u; }; struct MockCIFMain : MockCIF { template static void setGlobalCreatorFunc(CreatorFuncT func) { globalCreators[InterfaceT::GetInterfaceId()] = func; } template void setDefaultCreatorFunc(CreatorFuncT func) { defaultCreators[InterfaceT::GetInterfaceId()] = func; } template static CreatorFuncT getGlobalCreatorFunc() { auto it = globalCreators.find(InterfaceT::GetInterfaceId()); if (it == globalCreators.end()) { return nullptr; } return it->second; } template static void removeGlobalCreatorFunc() { auto it = globalCreators.find(InterfaceT::GetInterfaceId()); if (it == globalCreators.end()) { return; } globalCreators.erase(it); } static void clearGlobalCreatorFuncs() { decltype(globalCreators) emptyCreators; globalCreators.swap(emptyCreators); } static std::map globalCreators; MockCIFMain(); CIF::Version_t GetBinaryVersion() const override { return 1; } CIF::ICIF *CreateInterfaceImpl(CIF::InterfaceId_t intId, CIF::Version_t version) override; CIF::InterfaceId_t FindIncompatibleImpl(CIF::InterfaceId_t entryPointInterface, CIF::CompatibilityDataHandle handle) const override { if (globalCreators.find(entryPointInterface) != globalCreators.end()) { return CIF::InvalidInterface; } if (defaultCreators.find(entryPointInterface) != defaultCreators.end()) { return CIF::InvalidInterface; } return entryPointInterface; } bool FindSupportedVersionsImpl(CIF::InterfaceId_t entryPointInterface, CIF::InterfaceId_t interfaceToFind, CIF::Version_t &verMin, CIF::Version_t &verMax) const override { if (globalCreators.find(entryPointInterface) != globalCreators.end()) { verMin = verMax = 1; return true; } if (defaultCreators.find(entryPointInterface) != defaultCreators.end()) { verMin = verMax = 1; return true; } return false; } std::map defaultCreators; }; struct MockCIFBuffer : MockCIF { MockCIFBuffer(); static CIF::ICIF *Create(CIF::InterfaceId_t intId, CIF::Version_t version); static bool failAllocations; void SetAllocator(CIF::Builtins::AllocatorT allocator, CIF::Builtins::DeallocatorT deallocator, CIF::Builtins::ReallocatorT reallocator) override { // unsupported in mock } void SetUnderlyingStorage(void *memory, size_t size, CIF::Builtins::DeallocatorT deallocator) override { if ((memory == nullptr) || (size == 0)) { data.clear(); return; } data.assign((char *)memory, ((char *)memory) + size); } void SetUnderlyingStorage(const void *memory, size_t size) override { if ((memory == nullptr) || (size == 0)) { data.clear(); return; } data.assign((char *)memory, ((char *)memory) + size); } void *DetachAllocation() override { // unsupported in mock return nullptr; } const void *GetMemoryRaw() const override { if (data.size() > 0) { return data.data(); } else { return nullptr; } } void *GetMemoryRawWriteable() override { if (data.size() > 0) { return data.data(); } else { return nullptr; } } size_t GetSizeRaw() const override { return data.size(); } size_t GetCapacityRaw() const override { return data.capacity(); } bool Resize(size_t newSize) override { data.resize(newSize); return true; } bool Reserve(size_t newCapacity) override { data.reserve(newCapacity); return true; } void Clear() override { data.clear(); } void Deallocate() override { std::vector rhs; rhs.swap(data); } bool AlignUp(uint32_t alignment) override { auto rest = data.size() & alignment; if (rest != 0) { data.resize(data.size() + alignment - rest); } return true; } bool PushBackRawBytes(const void *newData, size_t size) override { if ((newData == nullptr) || (size == 0)) { return true; } data.insert(data.end(), (char *)newData, ((char *)newData) + size); return true; } bool IsConst() const override { return false; } std::vector data; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_cl_device.cpp000066400000000000000000000014221363734646600263770ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_platform.h" using namespace NEO; bool &MockClDevice::createSingleDevice = MockDevice::createSingleDevice; decltype(&createCommandStream) &MockClDevice::createCommandStreamReceiverFunc = MockDevice::createCommandStreamReceiverFunc; MockClDevice::MockClDevice(MockDevice *pMockDevice) : ClDevice(*pMockDevice, platform()), device(*pMockDevice), sharedDeviceInfo(device.deviceInfo), executionEnvironment(pMockDevice->executionEnvironment), mockMemoryManager(pMockDevice->mockMemoryManager), engines(pMockDevice->engines) { } compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_cl_device.h000066400000000000000000000067061363734646600260560ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/cl_device/cl_device.h" #include "opencl/test/unit_test/mocks/mock_device.h" namespace NEO { class FailMemoryManager; class OSTime; class SubDevice; template class UltCommandStreamReceiver; struct HardwareInfo; extern CommandStreamReceiver *createCommandStream(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex); class MockClDevice : public ClDevice { public: using ClDevice::ClDevice; using ClDevice::deviceExtensions; using ClDevice::deviceInfo; using ClDevice::driverInfo; using ClDevice::enabledClVersion; using ClDevice::initializeCaps; using ClDevice::name; using ClDevice::simultaneousInterops; using ClDevice::subDevices; explicit MockClDevice(MockDevice *pMockDevice); bool createEngines() { return device.createEngines(); } void setOSTime(OSTime *osTime) { device.setOSTime(osTime); } bool getCpuTime(uint64_t *timeStamp) { return device.getCpuTime(timeStamp); } void setPreemptionMode(PreemptionMode mode) { device.setPreemptionMode(mode); } void injectMemoryManager(MemoryManager *pMemoryManager) { device.injectMemoryManager(pMemoryManager); } void setPerfCounters(PerformanceCounters *perfCounters) { device.setPerfCounters(perfCounters); } const char *getProductAbbrev() const { return device.getProductAbbrev(); } template UltCommandStreamReceiver &getUltCommandStreamReceiver() { return device.getUltCommandStreamReceiver(); } template UltCommandStreamReceiver &getUltCommandStreamReceiverFromIndex(uint32_t index) { return device.getUltCommandStreamReceiverFromIndex(index); } CommandStreamReceiver &getGpgpuCommandStreamReceiver() const { return device.getGpgpuCommandStreamReceiver(); } void resetCommandStreamReceiver(CommandStreamReceiver *newCsr) { device.resetCommandStreamReceiver(newCsr); } void resetCommandStreamReceiver(CommandStreamReceiver *newCsr, uint32_t engineIndex) { device.resetCommandStreamReceiver(newCsr, engineIndex); } void setSourceLevelDebuggerActive(bool active) { device.setDebuggerActive(active); } template static T *createWithExecutionEnvironment(const HardwareInfo *pHwInfo, ExecutionEnvironment *executionEnvironment, uint32_t rootDeviceIndex) { return MockDevice::createWithExecutionEnvironment(pHwInfo, executionEnvironment, rootDeviceIndex); } template static T *createWithNewExecutionEnvironment(const HardwareInfo *pHwInfo, uint32_t rootDeviceIndex = 0) { return MockDevice::createWithNewExecutionEnvironment(pHwInfo, rootDeviceIndex); } SubDevice *createSubDevice(uint32_t subDeviceIndex) { return device.createSubDevice(subDeviceIndex); } std::unique_ptr createCommandStreamReceiver() const { return device.createCommandStreamReceiver(); } BuiltIns *getBuiltIns() const { return getDevice().getBuiltIns(); } void setDebuggerActive(bool active) { sharedDeviceInfo.debuggerActive = active; } MockDevice &device; DeviceInfo &sharedDeviceInfo; ExecutionEnvironment *&executionEnvironment; static bool &createSingleDevice; static decltype(&createCommandStream) &createCommandStreamReceiverFunc; std::unique_ptr &mockMemoryManager; std::vector &engines; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_command_queue.h000066400000000000000000000372111363734646600267560ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" //////////////////////////////////////////////////////////////////////////////// // MockCommandQueue - Core implementation //////////////////////////////////////////////////////////////////////////////// namespace NEO { class MockCommandQueue : public CommandQueue { public: using CommandQueue::bufferCpuCopyAllowed; using CommandQueue::device; using CommandQueue::gpgpuEngine; using CommandQueue::obtainNewTimestampPacketNodes; using CommandQueue::requiresCacheFlushAfterWalker; using CommandQueue::throttle; using CommandQueue::timestampPacketContainer; void setProfilingEnabled() { commandQueueProperties |= CL_QUEUE_PROFILING_ENABLE; } void setOoqEnabled() { commandQueueProperties |= CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; } MockCommandQueue() : CommandQueue(nullptr, nullptr, 0) {} MockCommandQueue(Context &context) : MockCommandQueue(&context, context.getDevice(0), nullptr) {} MockCommandQueue(Context *context, ClDevice *device, const cl_queue_properties *props) : CommandQueue(context, device, props) { } LinearStream &getCS(size_t minRequiredSize) override { requestedCmdStreamSize = minRequiredSize; return CommandQueue::getCS(minRequiredSize); } void releaseIndirectHeap(IndirectHeap::Type heap) override { releaseIndirectHeapCalled = true; CommandQueue::releaseIndirectHeap(heap); } cl_int enqueueWriteBuffer(Buffer *buffer, cl_bool blockingWrite, size_t offset, size_t size, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { writeBufferCounter++; writeBufferBlocking = (CL_TRUE == blockingWrite); writeBufferOffset = offset; writeBufferSize = size; writeBufferPtr = const_cast(ptr); return writeBufferRetValue; } void waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { latestTaskCountWaited = taskCountToWait; return CommandQueue::waitUntilComplete(taskCountToWait, flushStampToWait, useQuickKmdSleep); } cl_int enqueueCopyImage(Image *srcImage, Image *dstImage, const size_t srcOrigin[3], const size_t dstOrigin[3], const size_t region[3], cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueFillImage(Image *image, const void *fillColor, const size_t *origin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueFillBuffer(Buffer *buffer, const void *pattern, size_t patternSize, size_t offset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueKernel(cl_kernel kernel, cl_uint workDim, const size_t *globalWorkOffset, const size_t *globalWorkSize, const size_t *localWorkSize, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueBarrierWithWaitList(cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueSVMMap(cl_bool blockingMap, cl_map_flags mapFlags, void *svmPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool externalAppCall) override { return CL_SUCCESS; } cl_int enqueueSVMUnmap(void *svmPtr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool externalAppCall) override { return CL_SUCCESS; } cl_int enqueueSVMFree(cl_uint numSvmPointers, void *svmPointers[], void(CL_CALLBACK *pfnFreeFunc)(cl_command_queue queue, cl_uint numSvmPointers, void *svmPointers[], void *userData), void *userData, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueSVMMemcpy(cl_bool blockingCopy, void *dstPtr, const void *srcPtr, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueSVMMemFill(void *svmPtr, const void *pattern, size_t patternSize, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueMarkerWithWaitList(cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueMigrateMemObjects(cl_uint numMemObjects, const cl_mem *memObjects, cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueSVMMigrateMem(cl_uint numSvmPointers, const void **svmPointers, const size_t *sizes, const cl_mem_migration_flags flags, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueCopyBuffer(Buffer *srcBuffer, Buffer *dstBuffer, size_t srcOffset, size_t dstOffset, size_t size, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueReadBuffer(Buffer *buffer, cl_bool blockingRead, size_t offset, size_t size, void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueReadImage(Image *srcImage, cl_bool blockingRead, const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch, void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueWriteImage(Image *dstImage, cl_bool blockingWrite, const size_t *origin, const size_t *region, size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueCopyBufferRect(Buffer *srcBuffer, Buffer *dstBuffer, const size_t *srcOrigin, const size_t *dstOrigin, const size_t *region, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueWriteBufferRect(Buffer *buffer, cl_bool blockingWrite, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, const void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueReadBufferRect(Buffer *buffer, cl_bool blockingRead, const size_t *bufferOrigin, const size_t *hostOrigin, const size_t *region, size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueCopyBufferToImage(Buffer *srcBuffer, Image *dstImage, size_t srcOffset, const size_t *dstOrigin, const size_t *region, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueCopyImageToBuffer(Image *srcImage, Buffer *dstBuffer, const size_t *srcOrigin, const size_t *region, size_t dstOffset, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int enqueueResourceBarrier(BarrierCommand *resourceBarrier, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int finish() override { return CL_SUCCESS; } cl_int enqueueInitDispatchGlobals(DispatchGlobalsArgs *dispatchGlobalsArgs, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } cl_int flush() override { return CL_SUCCESS; } bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const override { return isCacheFlushRequired; } bool releaseIndirectHeapCalled = false; cl_int writeBufferRetValue = CL_SUCCESS; uint32_t writeBufferCounter = 0; bool writeBufferBlocking = false; size_t writeBufferOffset = 0; size_t writeBufferSize = 0; void *writeBufferPtr = nullptr; size_t requestedCmdStreamSize = 0; std::atomic latestTaskCountWaited{std::numeric_limits::max()}; }; template class MockCommandQueueHw : public CommandQueueHw { typedef CommandQueueHw BaseClass; public: using BaseClass::bcsEngine; using BaseClass::bcsTaskCount; using BaseClass::commandQueueProperties; using BaseClass::commandStream; using BaseClass::gpgpuEngine; using BaseClass::obtainCommandStream; using BaseClass::obtainNewTimestampPacketNodes; using BaseClass::requiresCacheFlushAfterWalker; using BaseClass::throttle; using BaseClass::timestampPacketContainer; MockCommandQueueHw(Context *context, ClDevice *device, cl_queue_properties *properties) : BaseClass(context, device, properties, false) { } void setOoqEnabled() { commandQueueProperties |= CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; } LinearStream &getCS(size_t minRequiredSize) override { requestedCmdStreamSize = minRequiredSize; return CommandQueue::getCS(minRequiredSize); } UltCommandStreamReceiver &getUltCommandStreamReceiver() { return reinterpret_cast &>(*BaseClass::gpgpuEngine->commandStreamReceiver); } cl_int enqueueWriteImage(Image *dstImage, cl_bool blockingWrite, const size_t *origin, const size_t *region, size_t inputRowPitch, size_t inputSlicePitch, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { EnqueueWriteImageCounter++; return BaseClass::enqueueWriteImage(dstImage, blockingWrite, origin, region, inputRowPitch, inputSlicePitch, ptr, mapAllocation, numEventsInWaitList, eventWaitList, event); } void *cpuDataTransferHandler(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &retVal) override { cpuDataTransferHandlerCalled = true; return BaseClass::cpuDataTransferHandler(transferProperties, eventsRequest, retVal); } cl_int enqueueWriteBuffer(Buffer *buffer, cl_bool blockingWrite, size_t offset, size_t size, const void *ptr, GraphicsAllocation *mapAllocation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { EnqueueWriteBufferCounter++; blockingWriteBuffer = blockingWrite == CL_TRUE; return BaseClass::enqueueWriteBuffer(buffer, blockingWrite, offset, size, ptr, mapAllocation, numEventsInWaitList, eventWaitList, event); } void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo) override { kernelParams = dispatchInfo.peekBuiltinOpParams(); lastCommandType = commandType; for (auto &di : dispatchInfo) { lastEnqueuedKernels.push_back(di.getKernel()); if (storeMultiDispatchInfo) { storedMultiDispatchInfo.push(di); } } } void notifyEnqueueReadBuffer(Buffer *buffer, bool blockingRead) override { notifyEnqueueReadBufferCalled = true; } void notifyEnqueueReadImage(Image *image, bool blockingRead) override { notifyEnqueueReadImageCalled = true; } void waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { latestTaskCountWaited = taskCountToWait; return BaseClass::waitUntilComplete(taskCountToWait, flushStampToWait, useQuickKmdSleep); } bool isCacheFlushForBcsRequired() const override { if (overrideIsCacheFlushForBcsRequired.enabled) { return overrideIsCacheFlushForBcsRequired.returnValue; } return BaseClass::isCacheFlushForBcsRequired(); } unsigned int lastCommandType; std::vector lastEnqueuedKernels; MultiDispatchInfo storedMultiDispatchInfo; size_t EnqueueWriteImageCounter = 0; size_t EnqueueWriteBufferCounter = 0; size_t requestedCmdStreamSize = 0; bool blockingWriteBuffer = false; bool storeMultiDispatchInfo = false; bool notifyEnqueueReadBufferCalled = false; bool notifyEnqueueReadImageCalled = false; bool cpuDataTransferHandlerCalled = false; struct OverrideReturnValue { bool enabled = false; bool returnValue = false; } overrideIsCacheFlushForBcsRequired; BuiltinOpParams kernelParams; std::atomic latestTaskCountWaited{std::numeric_limits::max()}; LinearStream *peekCommandStream() { return this->commandStream; } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_compilers.cpp000066400000000000000000000544741363734646600264760ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "mock_compilers.h" #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/hw_info.h" #include "opencl/source/os_interface/os_inc_base.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "opencl/test/unit_test/mocks/mock_compilers.h" #include "opencl/test/unit_test/mocks/mock_sip.h" #include "cif/macros/enable.h" #include "compiler_options.h" #include "ocl_igc_interface/fcl_ocl_device_ctx.h" #include "ocl_igc_interface/igc_ocl_device_ctx.h" #include #include namespace NEO { std::unique_ptr fclDebugVars; std::unique_ptr igcDebugVars; void setFclDebugVars(MockCompilerDebugVars &dbgv) { fclDebugVars.reset(new MockCompilerDebugVars(dbgv)); } void setIgcDebugVars(MockCompilerDebugVars &dbgv) { igcDebugVars.reset(new MockCompilerDebugVars(dbgv)); } MockCompilerDebugVars getFclDebugVars() { if (fclDebugVars == nullptr) { fclDebugVars.reset(new MockCompilerDebugVars()); } return *fclDebugVars.get(); } MockCompilerDebugVars getIgcDebugVars() { if (igcDebugVars == nullptr) { igcDebugVars.reset(new MockCompilerDebugVars()); } return *igcDebugVars.get(); } void clearFclDebugVars() { fclDebugVars.reset(); } void clearIgcDebugVars() { igcDebugVars.reset(); } MockCompilerEnableGuard::MockCompilerEnableGuard(bool autoEnable) { if (autoEnable) { Enable(); } } MockCompilerEnableGuard::~MockCompilerEnableGuard() { Disable(); } void MockCompilerEnableGuard::Enable() { if (enabled == false) { // load mock from self (don't load dynamic libraries) this->oldFclDllName = Os::frontEndDllName; this->oldIgcDllName = Os::igcDllName; Os::frontEndDllName = ""; Os::igcDllName = ""; MockCIFMain::setGlobalCreatorFunc(NEO::MockIgcOclDeviceCtx::Create); MockCIFMain::setGlobalCreatorFunc(NEO::MockFclOclDeviceCtx::Create); if (fclDebugVars == nullptr) { fclDebugVars.reset(new MockCompilerDebugVars); } if (fclDebugVars == nullptr) { igcDebugVars.reset(new MockCompilerDebugVars); } enabled = true; } } void MockCompilerEnableGuard::Disable() { if (enabled) { Os::frontEndDllName = this->oldFclDllName; Os::igcDllName = this->oldIgcDllName; MockCIFMain::removeGlobalCreatorFunc(); MockCIFMain::removeGlobalCreatorFunc(); clearFclDebugVars(); clearIgcDebugVars(); enabled = false; } } } // namespace NEO namespace IGC { // Stub versions - overridable in mocks // IgcOclDeviceCtx stubs IgcOclDeviceCtx<0>::~IgcOclDeviceCtx() {} template IgcOclDeviceCtx<0>::IgcOclDeviceCtx(ArgsT &&... args) {} void CIF_GET_INTERFACE_CLASS(IgcOclDeviceCtx, 1)::SetProfilingTimerResolution(float v) {} PlatformBase *CIF_GET_INTERFACE_CLASS(IgcOclDeviceCtx, 1)::GetPlatformHandleImpl(CIF::Version_t ver) { return nullptr; } GTSystemInfoBase *CIF_GET_INTERFACE_CLASS(IgcOclDeviceCtx, 1)::GetGTSystemInfoHandleImpl(CIF::Version_t ver) { return nullptr; } IgcFeaturesAndWorkaroundsBase *CIF_GET_INTERFACE_CLASS(IgcOclDeviceCtx, 1)::GetIgcFeaturesAndWorkaroundsHandleImpl(CIF::Version_t ver) { return nullptr; } IgcOclTranslationCtxBase *CIF_GET_INTERFACE_CLASS(IgcOclDeviceCtx, 1)::CreateTranslationCtxImpl(CIF::Version_t ver, CodeType::CodeType_t inType, CodeType::CodeType_t outType) { return nullptr; } // Platform stubs Platform<0>::~Platform() {} template Platform<0>::Platform(ArgsT &&... args) {} #define DEFINE_GET_SET_PREFIX(INTERFACE, VERSION, NAME, TYPE, PREFIX) \ TYPE CIF_GET_INTERFACE_CLASS(INTERFACE, VERSION)::Get##NAME() const { return (TYPE)0; } \ void CIF_GET_INTERFACE_CLASS(INTERFACE, VERSION)::Set##NAME(TYPE v) {} DEFINE_GET_SET_PREFIX(Platform, 1, ProductFamily, TypeErasedEnum, e); DEFINE_GET_SET_PREFIX(Platform, 1, PCHProductFamily, TypeErasedEnum, e); DEFINE_GET_SET_PREFIX(Platform, 1, DisplayCoreFamily, TypeErasedEnum, e); DEFINE_GET_SET_PREFIX(Platform, 1, RenderCoreFamily, TypeErasedEnum, e); DEFINE_GET_SET_PREFIX(Platform, 1, PlatformType, TypeErasedEnum, e); DEFINE_GET_SET_PREFIX(Platform, 1, DeviceID, unsigned short, us); DEFINE_GET_SET_PREFIX(Platform, 1, RevId, unsigned short, us); DEFINE_GET_SET_PREFIX(Platform, 1, DeviceID_PCH, unsigned short, us); DEFINE_GET_SET_PREFIX(Platform, 1, RevId_PCH, unsigned short, us); DEFINE_GET_SET_PREFIX(Platform, 1, GTType, TypeErasedEnum, e); #undef DEFINE_GET_SET_PREFIX // GtSystemInfo stubs GTSystemInfo<0>::~GTSystemInfo() {} template GTSystemInfo<0>::GTSystemInfo(ArgsT &&... args) {} #define DEFINE_GET_SET(INTERFACE, VERSION, NAME, TYPE) \ TYPE CIF_GET_INTERFACE_CLASS(INTERFACE, VERSION)::Get##NAME() const { return (TYPE)0; } \ void CIF_GET_INTERFACE_CLASS(INTERFACE, VERSION)::Set##NAME(TYPE v) {} DEFINE_GET_SET(GTSystemInfo, 1, EUCount, uint32_t); DEFINE_GET_SET(GTSystemInfo, 1, ThreadCount, uint32_t); DEFINE_GET_SET(GTSystemInfo, 1, SliceCount, uint32_t); DEFINE_GET_SET(GTSystemInfo, 1, SubSliceCount, uint32_t); DEFINE_GET_SET(GTSystemInfo, 1, L3CacheSizeInKb, uint64_t); DEFINE_GET_SET(GTSystemInfo, 1, LLCCacheSizeInKb, uint64_t); DEFINE_GET_SET(GTSystemInfo, 1, EdramSizeInKb, uint64_t); DEFINE_GET_SET(GTSystemInfo, 1, L3BankCount, uint32_t); DEFINE_GET_SET(GTSystemInfo, 1, MaxFillRate, uint32_t); DEFINE_GET_SET(GTSystemInfo, 1, EuCountPerPoolMax, uint32_t); DEFINE_GET_SET(GTSystemInfo, 1, EuCountPerPoolMin, uint32_t); DEFINE_GET_SET(GTSystemInfo, 1, TotalVsThreads, uint32_t); DEFINE_GET_SET(GTSystemInfo, 1, TotalHsThreads, uint32_t); DEFINE_GET_SET(GTSystemInfo, 1, TotalDsThreads, uint32_t); DEFINE_GET_SET(GTSystemInfo, 1, TotalGsThreads, uint32_t); DEFINE_GET_SET(GTSystemInfo, 1, TotalPsThreadsWindowerRange, uint32_t); DEFINE_GET_SET(GTSystemInfo, 1, CsrSizeInMb, uint32_t); DEFINE_GET_SET(GTSystemInfo, 1, MaxEuPerSubSlice, uint32_t); DEFINE_GET_SET(GTSystemInfo, 1, MaxSlicesSupported, uint32_t); DEFINE_GET_SET(GTSystemInfo, 1, MaxSubSlicesSupported, uint32_t); DEFINE_GET_SET(GTSystemInfo, 1, IsL3HashModeEnabled, bool); DEFINE_GET_SET(GTSystemInfo, 1, IsDynamicallyPopulated, bool); #undef DEFINE_GET_SET // IgcFeaturesAndWorkarounds stubs IgcFeaturesAndWorkarounds<0>::~IgcFeaturesAndWorkarounds() {} template IgcFeaturesAndWorkarounds<0>::IgcFeaturesAndWorkarounds(ArgsT &&... args) {} #define DEFINE_GET_SET(INTERFACE, VERSION, NAME, TYPE) \ TYPE CIF_GET_INTERFACE_CLASS(INTERFACE, VERSION)::Get##NAME() const { return (TYPE)0; } \ void CIF_GET_INTERFACE_CLASS(INTERFACE, VERSION)::Set##NAME(TYPE v) {} DEFINE_GET_SET(IgcFeaturesAndWorkarounds, 1, FtrDesktop, bool); DEFINE_GET_SET(IgcFeaturesAndWorkarounds, 1, FtrChannelSwizzlingXOREnabled, bool); DEFINE_GET_SET(IgcFeaturesAndWorkarounds, 1, FtrGtBigDie, bool); DEFINE_GET_SET(IgcFeaturesAndWorkarounds, 1, FtrGtMediumDie, bool); DEFINE_GET_SET(IgcFeaturesAndWorkarounds, 1, FtrGtSmallDie, bool); DEFINE_GET_SET(IgcFeaturesAndWorkarounds, 1, FtrGT1, bool); DEFINE_GET_SET(IgcFeaturesAndWorkarounds, 1, FtrGT1_5, bool); DEFINE_GET_SET(IgcFeaturesAndWorkarounds, 1, FtrGT2, bool); DEFINE_GET_SET(IgcFeaturesAndWorkarounds, 1, FtrGT3, bool); DEFINE_GET_SET(IgcFeaturesAndWorkarounds, 1, FtrGT4, bool); DEFINE_GET_SET(IgcFeaturesAndWorkarounds, 1, FtrIVBM0M1Platform, bool); DEFINE_GET_SET(IgcFeaturesAndWorkarounds, 1, FtrGTL, bool); DEFINE_GET_SET(IgcFeaturesAndWorkarounds, 1, FtrGTM, bool); DEFINE_GET_SET(IgcFeaturesAndWorkarounds, 1, FtrGTH, bool); DEFINE_GET_SET(IgcFeaturesAndWorkarounds, 1, FtrSGTPVSKUStrapPresent, bool); DEFINE_GET_SET(IgcFeaturesAndWorkarounds, 1, FtrGTA, bool); DEFINE_GET_SET(IgcFeaturesAndWorkarounds, 1, FtrGTC, bool); DEFINE_GET_SET(IgcFeaturesAndWorkarounds, 1, FtrGTX, bool); DEFINE_GET_SET(IgcFeaturesAndWorkarounds, 1, Ftr5Slice, bool); DEFINE_GET_SET(IgcFeaturesAndWorkarounds, 1, FtrGpGpuMidThreadLevelPreempt, bool); DEFINE_GET_SET(IgcFeaturesAndWorkarounds, 1, FtrIoMmuPageFaulting, bool); DEFINE_GET_SET(IgcFeaturesAndWorkarounds, 1, FtrWddm2Svm, bool); DEFINE_GET_SET(IgcFeaturesAndWorkarounds, 1, FtrPooledEuEnabled, bool); DEFINE_GET_SET(IgcFeaturesAndWorkarounds, 1, FtrResourceStreamer, bool); #undef DEFINE_GET_SET // IgcOclTranslationCtx IgcOclTranslationCtx<0>::~IgcOclTranslationCtx() {} template IgcOclTranslationCtx<0>::IgcOclTranslationCtx(ArgsT &&... args) {} OclTranslationOutputBase *CIF_GET_INTERFACE_CLASS(IgcOclTranslationCtx, 1)::TranslateImpl( CIF::Version_t outVersion, CIF::Builtins::BufferSimple *src, CIF::Builtins::BufferSimple *options, CIF::Builtins::BufferSimple *internalOptions, CIF::Builtins::BufferSimple *tracingOptions, uint32_t tracingOptionsCount) { return nullptr; } OclTranslationOutputBase *CIF_GET_INTERFACE_CLASS(IgcOclTranslationCtx, 2)::TranslateImpl( CIF::Version_t outVersion, CIF::Builtins::BufferSimple *src, CIF::Builtins::BufferSimple *options, CIF::Builtins::BufferSimple *internalOptions, CIF::Builtins::BufferSimple *tracingOptions, uint32_t tracingOptionsCount, void *gtPinInput) { return nullptr; } bool CIF_GET_INTERFACE_CLASS(IgcOclTranslationCtx, 3)::GetSpecConstantsInfoImpl( CIF::Builtins::BufferSimple *src, CIF::Builtins::BufferSimple *outSpecConstantsIds, CIF::Builtins::BufferSimple *outSpecConstantsSizes) { return true; } OclTranslationOutputBase *CIF_GET_INTERFACE_CLASS(IgcOclTranslationCtx, 3)::TranslateImpl( CIF::Version_t outVersion, CIF::Builtins::BufferSimple *src, CIF::Builtins::BufferSimple *specConstantsIds, CIF::Builtins::BufferSimple *specConstantsValues, CIF::Builtins::BufferSimple *options, CIF::Builtins::BufferSimple *internalOptions, CIF::Builtins::BufferSimple *tracingOptions, uint32_t tracingOptionsCount, void *gtPinInput) { return nullptr; } // OclTranslationOutput OclTranslationOutput<0>::~OclTranslationOutput() {} template OclTranslationOutput<0>::OclTranslationOutput(ArgsT &&... args) {} bool CIF_GET_INTERFACE_CLASS(OclTranslationOutput, 1)::Successful() const { return true; } bool CIF_GET_INTERFACE_CLASS(OclTranslationOutput, 1)::HasWarnings() const { return false; } CIF::Builtins::BufferBase *CIF_GET_INTERFACE_CLASS(OclTranslationOutput, 1)::GetBuildLogImpl(CIF::Version_t bufferVersion) { return nullptr; } CIF::Builtins::BufferBase *CIF_GET_INTERFACE_CLASS(OclTranslationOutput, 1)::GetOutputImpl(CIF::Version_t bufferVersion) { return nullptr; } CIF::Builtins::BufferBase *CIF_GET_INTERFACE_CLASS(OclTranslationOutput, 1)::GetDebugDataImpl(CIF::Version_t bufferVersion) { return nullptr; } CodeType::CodeType_t CIF_GET_INTERFACE_CLASS(OclTranslationOutput, 1)::GetOutputType() const { return IGC::CodeType::undefined; } // FclOclTranslationCtx FclOclTranslationCtx<0>::~FclOclTranslationCtx() {} template FclOclTranslationCtx<0>::FclOclTranslationCtx(ArgsT &&... args) {} IGC::OclTranslationOutputBase *CIF_GET_INTERFACE_CLASS(FclOclTranslationCtx, 1)::TranslateImpl( CIF::Version_t outVersion, CIF::Builtins::BufferSimple *src, CIF::Builtins::BufferSimple *options, CIF::Builtins::BufferSimple *internalOptions, CIF::Builtins::BufferSimple *tracingOptions, uint32_t tracingOptionsCount) { return nullptr; } // MockFclOclDeviceCtx FclOclDeviceCtx<0>::~FclOclDeviceCtx() {} template FclOclDeviceCtx<0>::FclOclDeviceCtx(ArgsT &&... args) {} void CIF_GET_INTERFACE_CLASS(FclOclDeviceCtx, 1)::SetOclApiVersion(uint32_t version) {} IGC::FclOclTranslationCtxBase *CIF_GET_INTERFACE_CLASS(FclOclDeviceCtx, 1)::CreateTranslationCtxImpl(CIF::Version_t ver, IGC::CodeType::CodeType_t inType, IGC::CodeType::CodeType_t outType) { return nullptr; } CodeType::CodeType_t CIF_GET_INTERFACE_CLASS(FclOclDeviceCtx, 2)::GetPreferredIntermediateRepresentation() { return CodeType::spirV; } IGC::FclOclTranslationCtxBase *CIF_GET_INTERFACE_CLASS(FclOclDeviceCtx, 3)::CreateTranslationCtxImpl(CIF::Version_t ver, IGC::CodeType::CodeType_t inType, IGC::CodeType::CodeType_t outType, CIF::Builtins::BufferSimple *err) { return nullptr; } } // namespace IGC #include "cif/macros/disable.h" namespace NEO { template std::unique_ptr loadBinaryFile(StrT &&fileName, size_t &fileSize) { std::ifstream f{fileName, std::ios::binary | std::ios::in | std::ios::ate}; auto end = f.tellg(); f.seekg(0, std::ios::beg); auto beg = f.tellg(); auto s = static_cast(end - beg); if (s == 0) { fileSize = 0; return nullptr; } std::unique_ptr data{new unsigned char[s]}; f.read(reinterpret_cast(data.get()), s); fileSize = s; return data; }; void translate(bool usingIgc, CIF::Builtins::BufferSimple *src, CIF::Builtins::BufferSimple *options, CIF::Builtins::BufferSimple *internalOptions, MockOclTranslationOutput *out) { MockCompilerDebugVars &debugVars = (usingIgc) ? *NEO::igcDebugVars : *fclDebugVars; if (debugVars.receivedInput != nullptr) { if (src != nullptr) { debugVars.receivedInput->assign(src->GetMemory(), src->GetMemory() + src->GetSizeRaw()); } } if (debugVars.receivedInternalOptionsOutput != nullptr) { if (internalOptions != nullptr) { debugVars.receivedInternalOptionsOutput->assign(internalOptions->GetMemory(), internalOptions->GetMemory() + internalOptions->GetSizeRaw()); } } if ((debugVars.forceBuildFailure == false) && (out && src && src->GetMemoryRaw() && src->GetSizeRaw())) { if (debugVars.internalOptionsExpected) { if (internalOptions->GetSizeRaw() < 1 || internalOptions->GetMemoryRaw() == nullptr) { if (out) { out->setError(); } } } std::string inputFile = ""; inputFile.append(debugVars.fileName); std::string debugFile; auto pos = inputFile.rfind("."); debugFile = inputFile.substr(0, pos); debugFile.append(".dbg"); if (debugVars.appendOptionsToFileName && options->GetSizeRaw()) { std::string opts(options->GetMemory(), options->GetMemory() + options->GetSize()); // handle special option "-create-library" - just erase it size_t pos = opts.find(CompilerOptions::createLibrary, 0); if (pos != std::string::npos) { opts.erase(pos, CompilerOptions::createLibrary.length()); } std::replace(opts.begin(), opts.end(), ' ', '_'); inputFile.append(opts); if (debugVars.debugDataToReturn == nullptr) { debugFile.append(opts); } } if ((debugVars.binaryToReturn != nullptr) || (debugVars.binaryToReturnSize != 0)) { out->setOutput(debugVars.binaryToReturn, debugVars.binaryToReturnSize); } else { size_t fileSize = 0; auto fileData = loadBinaryFile(inputFile, fileSize); out->setOutput(fileData.get(), fileSize); if (fileSize == 0) { out->setError("error: Mock compiler could not find cached input file: " + inputFile); } } if (debugVars.debugDataToReturn != nullptr) { out->setDebugData(debugVars.debugDataToReturn, debugVars.debugDataToReturnSize); } else { size_t fileSize = 0; auto fileData = loadBinaryFile(debugFile, fileSize); out->setDebugData(fileData.get(), fileSize); } } else { out->setError(); } } MockIgcOclDeviceCtx::MockIgcOclDeviceCtx() { platform = new MockCIFPlatform; gtSystemInfo = new MockGTSystemInfo; igcFeWa = new MockIgcFeaturesAndWorkarounds; } MockIgcOclDeviceCtx::~MockIgcOclDeviceCtx() { if (platform != nullptr) { platform->Release(); } if (gtSystemInfo != nullptr) { gtSystemInfo->Release(); } if (igcFeWa != nullptr) { igcFeWa->Release(); } } CIF::ICIF *MockIgcOclDeviceCtx::Create(CIF::InterfaceId_t intId, CIF::Version_t version) { return new MockIgcOclDeviceCtx; } IGC::IgcOclTranslationCtxBase *MockIgcOclDeviceCtx::CreateTranslationCtxImpl(CIF::Version_t ver, IGC::CodeType::CodeType_t inType, IGC::CodeType::CodeType_t outType) { requestedTranslationCtxs.emplace_back(inType, outType); return new MockIgcOclTranslationCtx; } MockIgcOclTranslationCtx::MockIgcOclTranslationCtx() = default; MockIgcOclTranslationCtx::~MockIgcOclTranslationCtx() = default; IGC::OclTranslationOutputBase *MockIgcOclTranslationCtx::TranslateImpl( CIF::Version_t outVersion, CIF::Builtins::BufferSimple *src, CIF::Builtins::BufferSimple *options, CIF::Builtins::BufferSimple *internalOptions, CIF::Builtins::BufferSimple *tracingOptions, uint32_t tracingOptionsCount) { auto out = new MockOclTranslationOutput(); translate(true, src, options, internalOptions, out); return out; } IGC::OclTranslationOutputBase *MockIgcOclTranslationCtx::TranslateImpl( CIF::Version_t outVersion, CIF::Builtins::BufferSimple *src, CIF::Builtins::BufferSimple *options, CIF::Builtins::BufferSimple *internalOptions, CIF::Builtins::BufferSimple *tracingOptions, uint32_t tracingOptionsCount, void *gtpinInput) { auto out = new MockOclTranslationOutput(); translate(true, src, options, internalOptions, out); return out; } bool MockIgcOclTranslationCtx::GetSpecConstantsInfoImpl( CIF::Builtins::BufferSimple *src, CIF::Builtins::BufferSimple *outSpecConstantsIds, CIF::Builtins::BufferSimple *outSpecConstantsSizes) { return true; } IGC::OclTranslationOutputBase *MockIgcOclTranslationCtx::TranslateImpl( CIF::Version_t outVersion, CIF::Builtins::BufferSimple *src, CIF::Builtins::BufferSimple *specConstantsIds, CIF::Builtins::BufferSimple *specConstantsValues, CIF::Builtins::BufferSimple *options, CIF::Builtins::BufferSimple *internalOptions, CIF::Builtins::BufferSimple *tracingOptions, uint32_t tracingOptionsCount, void *gtPinInput) { auto out = new MockOclTranslationOutput(); translate(true, src, options, internalOptions, out); return out; } MockOclTranslationOutput::MockOclTranslationOutput() { this->log = new MockCIFBuffer(); this->output = new MockCIFBuffer(); this->debugData = new MockCIFBuffer(); } MockOclTranslationOutput::~MockOclTranslationOutput() { if (this->log != nullptr) { this->log->Release(); } if (this->output != nullptr) { this->output->Release(); } if (this->debugData != nullptr) { this->debugData->Release(); } } void MockOclTranslationOutput::setError(const std::string &message) { failed = true; this->log->SetUnderlyingStorage(message.c_str(), message.size()); } void MockOclTranslationOutput::setOutput(const void *data, size_t dataLen) { this->output->SetUnderlyingStorage(data, dataLen); } void MockOclTranslationOutput::setDebugData(const void *data, size_t dataLen) { this->debugData->SetUnderlyingStorage(data, dataLen); } CIF::Builtins::BufferBase *MockOclTranslationOutput::GetBuildLogImpl(CIF::Version_t bufferVersion) { return log; } CIF::Builtins::BufferBase *MockOclTranslationOutput::GetOutputImpl(CIF::Version_t bufferVersion) { return output; } CIF::Builtins::BufferBase *MockOclTranslationOutput::GetDebugDataImpl(CIF::Version_t bufferVersion) { return debugData; } MockFclOclTranslationCtx::MockFclOclTranslationCtx() { } MockFclOclTranslationCtx::~MockFclOclTranslationCtx() { } IGC::OclTranslationOutputBase *MockFclOclTranslationCtx::TranslateImpl( CIF::Version_t outVersion, CIF::Builtins::BufferSimple *src, CIF::Builtins::BufferSimple *options, CIF::Builtins::BufferSimple *internalOptions, CIF::Builtins::BufferSimple *tracingOptions, uint32_t tracingOptionsCount) { auto out = new MockOclTranslationOutput(); translate(false, src, options, internalOptions, out); return out; } MockFclOclDeviceCtx::MockFclOclDeviceCtx() = default; MockFclOclDeviceCtx::~MockFclOclDeviceCtx() = default; CIF::ICIF *MockFclOclDeviceCtx::Create(CIF::InterfaceId_t intId, CIF::Version_t version) { return new MockFclOclDeviceCtx; } IGC::FclOclTranslationCtxBase *MockFclOclDeviceCtx::CreateTranslationCtxImpl(CIF::Version_t ver, IGC::CodeType::CodeType_t inType, IGC::CodeType::CodeType_t outType) { return new MockFclOclTranslationCtx; } IGC::FclOclTranslationCtxBase *MockFclOclDeviceCtx::CreateTranslationCtxImpl(CIF::Version_t ver, IGC::CodeType::CodeType_t inType, IGC::CodeType::CodeType_t outType, CIF::Builtins::BufferSimple *err) { return new MockFclOclTranslationCtx; } std::vector MockCompilerInterface::getDummyGenBinary() { return MockSipKernel::getDummyGenBinary(); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_compilers.h000066400000000000000000000317651363734646600261410ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/compiler_interface/compiler_interface.h" #include "opencl/test/unit_test/mocks/mock_cif.h" #include "ocl_igc_interface/fcl_ocl_device_ctx.h" #include "ocl_igc_interface/igc_ocl_device_ctx.h" #include #include #include namespace NEO { struct MockCompilerDebugVars { bool forceBuildFailure = false; bool forceCreateFailure = false; bool forceRegisterFail = false; bool internalOptionsExpected = false; bool appendOptionsToFileName = true; void *debugDataToReturn = nullptr; size_t debugDataToReturnSize = 0; void *binaryToReturn = nullptr; size_t binaryToReturnSize = 0; bool failCreatePlatformInterface = false; bool failCreateGtSystemInfoInterface = false; bool failCreateIgcFeWaInterface = false; std::string *receivedInternalOptionsOutput = nullptr; std::string *receivedInput = nullptr; std::string fileName; }; struct MockCompilerEnableGuard { MockCompilerEnableGuard(bool autoEnable = false); ~MockCompilerEnableGuard(); void Enable(); void Disable(); const char *oldFclDllName; const char *oldIgcDllName; bool enabled = false; }; void setFclDebugVars(MockCompilerDebugVars &dbgv); void setIgcDebugVars(MockCompilerDebugVars &dbgv); void clearFclDebugVars(); void clearIgcDebugVars(); MockCompilerDebugVars getFclDebugVars(); MockCompilerDebugVars getIgcDebugVars(); struct MockCIFPlatform : MockCIF { IGC::TypeErasedEnum GetProductFamily() const override { return productFamily; } void SetProductFamily(IGC::TypeErasedEnum v) override { productFamily = v; } IGC::TypeErasedEnum GetRenderCoreFamily() const override { return renderCoreFamily; } void SetRenderCoreFamily(IGC::TypeErasedEnum v) override { renderCoreFamily = v; } protected: IGC::TypeErasedEnum productFamily; IGC::TypeErasedEnum renderCoreFamily; }; struct MockGTSystemInfo : MockCIF { uint32_t GetEUCount() const override { return this->euCount; } void SetEUCount(uint32_t v) override { euCount = v; } uint32_t GetThreadCount() const override { return this->threadCount; } void SetThreadCount(uint32_t v) override { threadCount = v; } uint32_t GetSliceCount() const override { return this->sliceCount; } void SetSliceCount(uint32_t v) override { sliceCount = v; } uint32_t GetSubSliceCount() const override { return this->subsliceCount; } void SetSubSliceCount(uint32_t v) override { subsliceCount = v; } protected: uint32_t euCount; uint32_t threadCount; uint32_t sliceCount; uint32_t subsliceCount; }; struct MockIgcFeaturesAndWorkarounds : MockCIF { }; struct MockIgcOclTranslationCtx : MockCIF { using MockCIF::TranslateImpl; MockIgcOclTranslationCtx(); ~MockIgcOclTranslationCtx() override; IGC::OclTranslationOutputBase *TranslateImpl( CIF::Version_t outVersion, CIF::Builtins::BufferSimple *src, CIF::Builtins::BufferSimple *options, CIF::Builtins::BufferSimple *internalOptions, CIF::Builtins::BufferSimple *tracingOptions, uint32_t tracingOptionsCount) override; IGC::OclTranslationOutputBase *TranslateImpl( CIF::Version_t outVersion, CIF::Builtins::BufferSimple *src, CIF::Builtins::BufferSimple *options, CIF::Builtins::BufferSimple *internalOptions, CIF::Builtins::BufferSimple *tracingOptions, uint32_t tracingOptionsCount, void *gtpinInput) override; bool GetSpecConstantsInfoImpl( CIF::Builtins::BufferSimple *src, CIF::Builtins::BufferSimple *outSpecConstantsIds, CIF::Builtins::BufferSimple *outSpecConstantsSizes) override; IGC::OclTranslationOutputBase *TranslateImpl( CIF::Version_t outVersion, CIF::Builtins::BufferSimple *src, CIF::Builtins::BufferSimple *specConstantsIds, CIF::Builtins::BufferSimple *specConstantsValues, CIF::Builtins::BufferSimple *options, CIF::Builtins::BufferSimple *internalOptions, CIF::Builtins::BufferSimple *tracingOptions, uint32_t tracingOptionsCount, void *gtPinInput) override; }; struct MockOclTranslationOutput : MockCIF { MockOclTranslationOutput(); ~MockOclTranslationOutput() override; CIF::Builtins::BufferBase *GetBuildLogImpl(CIF::Version_t bufferVersion) override; CIF::Builtins::BufferBase *GetOutputImpl(CIF::Version_t bufferVersion) override; CIF::Builtins::BufferBase *GetDebugDataImpl(CIF::Version_t bufferVersion) override; bool Successful() const override { return failed == false; } void setError() { setError(""); } void setError(const std::string &message); void setOutput(const void *data, size_t dataLen); void setDebugData(const void *data, size_t dataLen); bool failed = false; MockCIFBuffer *output = nullptr; MockCIFBuffer *log = nullptr; MockCIFBuffer *debugData = nullptr; }; struct MockIgcOclDeviceCtx : MockCIF { static CIF::ICIF *Create(CIF::InterfaceId_t intId, CIF::Version_t version); MockIgcOclDeviceCtx(); ~MockIgcOclDeviceCtx() override; IGC::PlatformBase *GetPlatformHandleImpl(CIF::Version_t ver) override { if (getIgcDebugVars().failCreatePlatformInterface) { return nullptr; } return platform; } IGC::GTSystemInfoBase *GetGTSystemInfoHandleImpl(CIF::Version_t ver) override { if (getIgcDebugVars().failCreateGtSystemInfoInterface) { return nullptr; } return gtSystemInfo; } IGC::IgcFeaturesAndWorkaroundsBase *GetIgcFeaturesAndWorkaroundsHandleImpl(CIF::Version_t ver) override { if (getIgcDebugVars().failCreateIgcFeWaInterface) { return nullptr; } return igcFeWa; } IGC::IgcOclTranslationCtxBase *CreateTranslationCtxImpl(CIF::Version_t ver, IGC::CodeType::CodeType_t inType, IGC::CodeType::CodeType_t outType) override; void SetDebugVars(MockCompilerDebugVars &debugVars) { this->debugVars = debugVars; } MockCIFPlatform *platform = nullptr; MockGTSystemInfo *gtSystemInfo = nullptr; MockIgcFeaturesAndWorkarounds *igcFeWa = nullptr; MockCompilerDebugVars debugVars; using TranslationOpT = std::pair; std::vector requestedTranslationCtxs; }; struct MockFclOclTranslationCtx : MockCIF { MockFclOclTranslationCtx(); ~MockFclOclTranslationCtx() override; IGC::OclTranslationOutputBase *TranslateImpl( CIF::Version_t outVersion, CIF::Builtins::BufferSimple *src, CIF::Builtins::BufferSimple *options, CIF::Builtins::BufferSimple *internalOptions, CIF::Builtins::BufferSimple *tracingOptions, uint32_t tracingOptionsCount) override; }; struct MockFclOclDeviceCtx : MockCIF> { MockFclOclDeviceCtx(); ~MockFclOclDeviceCtx() override; static CIF::ICIF *Create(CIF::InterfaceId_t intId, CIF::Version_t version); void SetOclApiVersion(uint32_t version) override { oclApiVersion = version; } IGC::FclOclTranslationCtxBase *CreateTranslationCtxImpl(CIF::Version_t ver, IGC::CodeType::CodeType_t inType, IGC::CodeType::CodeType_t outType) override; IGC::FclOclTranslationCtxBase *CreateTranslationCtxImpl(CIF::Version_t ver, IGC::CodeType::CodeType_t inType, IGC::CodeType::CodeType_t outType, CIF::Builtins::BufferSimple *err) override; uint32_t oclApiVersion = 120; }; class MockCompilerInterface : public CompilerInterface { public: using CompilerInterface::initialize; using CompilerInterface::isCompilerAvailable; using CompilerInterface::isFclAvailable; using CompilerInterface::isIgcAvailable; using CompilerInterface::fclMain; using CompilerInterface::igcMain; bool loadFcl() override { if (failLoadFcl) { return false; } return CompilerInterface::loadFcl(); } bool loadIgc() override { if (failLoadIgc) { return false; } return CompilerInterface::loadIgc(); } void setFclDeviceCtx(const Device &d, IGC::FclOclDeviceCtxTagOCL *ctx) { this->fclDeviceContexts[&d] = CIF::RAII::RetainAndPack(ctx); } std::map &getFclDeviceContexts() { return this->fclDeviceContexts; } void setIgcDeviceCtx(const Device &d, IGC::IgcOclDeviceCtxTagOCL *ctx) { this->igcDeviceContexts[&d] = CIF::RAII::RetainAndPack(ctx); } std::map &getIgcDeviceContexts() { return this->igcDeviceContexts; } void setDeviceCtx(const Device &d, IGC::IgcOclDeviceCtxTagOCL *ctx) { setIgcDeviceCtx(d, ctx); } void setDeviceCtx(const Device &d, IGC::FclOclDeviceCtxTagOCL *ctx) { setFclDeviceCtx(d, ctx); } template std::map> &getDeviceContexts(); std::unique_lock lock() override { if (lockListener != nullptr) { lockListener(*this); } return std::unique_lock(spinlock); } void SetIgcMain(CIF::CIFMain *main) { this->igcMain.release(); this->igcMain.reset(main); } void SetFclMain(CIF::CIFMain *main) { this->fclMain.release(); this->fclMain.reset(main); } CIF::RAII::UPtr_t createFclTranslationCtx(const Device &device, IGC::CodeType::CodeType_t inType, IGC::CodeType::CodeType_t outType) override { requestedTranslationCtxs.emplace_back(inType, outType); if (failCreateFclTranslationCtx) { return nullptr; } return CompilerInterface::createFclTranslationCtx(device, inType, outType); } CIF::RAII::UPtr_t createIgcTranslationCtx(const Device &device, IGC::CodeType::CodeType_t inType, IGC::CodeType::CodeType_t outType) override { requestedTranslationCtxs.emplace_back(inType, outType); if (failCreateIgcTranslationCtx) { return nullptr; } return CompilerInterface::createIgcTranslationCtx(device, inType, outType); } IGC::FclOclTranslationCtxTagOCL *getFclBaseTranslationCtx() { return this->fclBaseTranslationCtx.get(); } TranslationOutput::ErrorCode getSipKernelBinary(NEO::Device &device, SipKernelType type, std::vector &retBinary) override { if (this->sipKernelBinaryOverride.size() > 0) { retBinary = this->sipKernelBinaryOverride; this->requestedSipKernel = type; return TranslationOutput::ErrorCode::Success; } else { return CompilerInterface::getSipKernelBinary(device, type, retBinary); } } static std::vector getDummyGenBinary(); void (*lockListener)(MockCompilerInterface &compInt) = nullptr; void *lockListenerData = nullptr; bool failCreateFclTranslationCtx = false; bool failCreateIgcTranslationCtx = false; bool failLoadFcl = false; bool failLoadIgc = false; using TranslationOpT = std::pair; std::vector requestedTranslationCtxs; std::vector sipKernelBinaryOverride; SipKernelType requestedSipKernel = SipKernelType::COUNT; IGC::IgcOclDeviceCtxTagOCL *peekIgcDeviceCtx(Device *device) { return igcDeviceContexts[device].get(); } }; template <> inline std::map &MockCompilerInterface::getDeviceContexts() { return getIgcDeviceContexts(); } template <> inline std::map &MockCompilerInterface::getDeviceContexts() { return getFclDeviceContexts(); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_compilers_linux.cpp000066400000000000000000000001251363734646600276750ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_compilers_windows.cpp000066400000000000000000000011421363734646600302300ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_compilers.h" #include "cif/macros/enable.h" #include "ocl_igc_interface/igc_ocl_device_ctx.h" namespace IGC { #define DEFINE_GET_SET(INTERFACE, VERSION, NAME, TYPE) \ TYPE CIF_GET_INTERFACE_CLASS(INTERFACE, VERSION)::Get##NAME() const { return (TYPE)0; } \ void CIF_GET_INTERFACE_CLASS(INTERFACE, VERSION)::Set##NAME(TYPE v) {} DEFINE_GET_SET(GTSystemInfo, 2, DualSubSliceCount, uint32_t); #undef DEFINE_GET_SET } // namespace IGC compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_context.cpp000066400000000000000000000057141363734646600261560ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_context.h" #include "shared/source/built_ins/built_ins.h" #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/memory_manager/deferred_deleter.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/source/sharings/sharing.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "d3d_sharing_functions.h" namespace NEO { MockContext::MockContext(ClDevice *device, bool noSpecialQueue) { memoryManager = device->getMemoryManager(); devices.push_back(device); svmAllocsManager = new SVMAllocsManager(memoryManager); cl_int retVal; if (!specialQueue && !noSpecialQueue) { auto commandQueue = CommandQueue::create(this, device, nullptr, false, retVal); assert(retVal == CL_SUCCESS); overrideSpecialQueueAndDecrementRefCount(commandQueue); } device->incRefInternal(); } MockContext::MockContext( void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *), void *data) { device = nullptr; properties = nullptr; numProperties = 0; contextCallback = funcNotify; userData = data; memoryManager = nullptr; specialQueue = nullptr; defaultDeviceQueue = nullptr; driverDiagnostics = nullptr; } MockContext::~MockContext() { if (specialQueue) { specialQueue->release(); specialQueue = nullptr; } if (memoryManager->isAsyncDeleterEnabled()) { memoryManager->getDeferredDeleter()->removeClient(); } memoryManager = nullptr; } MockContext::MockContext() { device = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}; devices.push_back(device); memoryManager = device->getMemoryManager(); svmAllocsManager = new SVMAllocsManager(memoryManager); cl_int retVal; if (!specialQueue) { auto commandQueue = CommandQueue::create(this, device, nullptr, false, retVal); assert(retVal == CL_SUCCESS); overrideSpecialQueueAndDecrementRefCount(commandQueue); } } void MockContext::setSharingFunctions(SharingFunctions *sharingFunctions) { this->sharingFunctions[sharingFunctions->getId()].reset(sharingFunctions); } void MockContext::releaseSharingFunctions(SharingType sharing) { this->sharingFunctions[sharing].release(); } void MockContext::resetSharingFunctions(SharingType sharing) { this->sharingFunctions[sharing].reset(); } void MockContext::registerSharingWithId(SharingFunctions *sharing, SharingType sharingId) { this->sharingFunctions[sharingId].reset(sharing); } void MockContext::clearSharingFunctions() { std::vectorsharingFunctions)::value_type> v; this->sharingFunctions.swap(v); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_context.h000066400000000000000000000021071363734646600256140ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/context/context.h" #include "opencl/source/sharings/sharing_factory.h" #include namespace NEO { class MockContext : public Context { public: using Context::contextType; using Context::driverDiagnostics; using Context::memoryManager; using Context::preferD3dSharedResources; using Context::sharingFunctions; using Context::svmAllocsManager; MockContext(ClDevice *device, bool noSpecialQueue = false); MockContext( void(CL_CALLBACK *funcNotify)(const char *, const void *, size_t, void *), void *data); MockContext(); ~MockContext() override; void clearSharingFunctions(); void setSharingFunctions(SharingFunctions *sharingFunctions); void releaseSharingFunctions(SharingType sharing); void resetSharingFunctions(SharingType sharing); void registerSharingWithId(SharingFunctions *sharing, SharingType sharingId); private: ClDevice *device = nullptr; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_csr.cpp000066400000000000000000000012401363734646600252470ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_csr.h" bool MockCommandStreamReceiver::flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) { return true; } CompletionStamp MockCommandStreamReceiver::flushTask( LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap &dsh, const IndirectHeap &ioh, const IndirectHeap &ssh, uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) { ++taskCount; CompletionStamp stamp = {taskCount, taskLevel, flushStamp->peekStamp()}; return stamp; } compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_csr.h000066400000000000000000000266511363734646600247310ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/flat_batch_buffer_helper_hw.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/os_interface/os_context.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "gmock/gmock.h" #include #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Winconsistent-missing-override" #endif using namespace NEO; template class MockCsrBase : public UltCommandStreamReceiver { public: using BaseUltCsrClass = UltCommandStreamReceiver; using BaseUltCsrClass::BaseUltCsrClass; MockCsrBase() = delete; MockCsrBase(int32_t &execStamp, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) : BaseUltCsrClass(executionEnvironment, rootDeviceIndex), executionStamp(&execStamp), flushTaskStamp(-1) { } void makeResident(GraphicsAllocation &gfxAllocation) override { madeResidentGfxAllocations.push_back(&gfxAllocation); if (this->getMemoryManager()) { this->getResidencyAllocations().push_back(&gfxAllocation); } gfxAllocation.updateResidencyTaskCount(this->taskCount, this->osContext->getContextId()); } void makeNonResident(GraphicsAllocation &gfxAllocation) override { madeNonResidentGfxAllocations.push_back(&gfxAllocation); } uint32_t peekThreadArbitrationPolicy() { return this->requiredThreadArbitrationPolicy; } bool isMadeResident(GraphicsAllocation *gfxAllocation) { for (GraphicsAllocation *gfxAlloc : madeResidentGfxAllocations) { if (gfxAlloc == gfxAllocation) return true; } return false; } bool isMadeNonResident(GraphicsAllocation *gfxAllocation) { for (GraphicsAllocation *gfxAlloc : madeNonResidentGfxAllocations) { if (gfxAlloc == gfxAllocation) return true; } return false; } bool getGSBAFor32BitProgrammed() { return this->GSBAFor32BitProgrammed; } void processEviction() override { processEvictionCalled = true; } ResidencyContainer madeResidentGfxAllocations; ResidencyContainer madeNonResidentGfxAllocations; int32_t *executionStamp; int32_t flushTaskStamp; bool processEvictionCalled = false; }; template using MockCsrHw = MockCsrBase; template class MockCsrAub : public MockCsrBase { public: MockCsrAub(int32_t &execStamp, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) : MockCsrBase(execStamp, executionEnvironment, rootDeviceIndex) {} CommandStreamReceiverType getType() override { return CommandStreamReceiverType::CSR_AUB; } }; template class MockCsr : public MockCsrBase { public: using BaseClass = MockCsrBase; using CommandStreamReceiver::mediaVfeStateDirty; MockCsr() = delete; MockCsr(const HardwareInfo &hwInfoIn) = delete; MockCsr(int32_t &execStamp, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) : BaseClass(execStamp, executionEnvironment, rootDeviceIndex) { } bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override { return true; } CompletionStamp flushTask( LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap &dsh, const IndirectHeap &ioh, const IndirectHeap &ssh, uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override { this->flushTaskStamp = *this->executionStamp; (*this->executionStamp)++; slmUsedInLastFlushTask = dispatchFlags.useSLM; this->latestSentTaskCount = ++this->taskCount; lastTaskLevelToFlushTask = taskLevel; return CommandStreamReceiverHw::flushTask( commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); } bool peekMediaVfeStateDirty() const { return mediaVfeStateDirty; } bool slmUsedInLastFlushTask = false; uint32_t lastTaskLevelToFlushTask = 0; }; template class MockCsrHw2 : public CommandStreamReceiverHw { public: using CommandStreamReceiverHw::CommandStreamReceiverHw; using CommandStreamReceiverHw::csrSizeRequestFlags; using CommandStreamReceiverHw::flushStamp; using CommandStreamReceiverHw::programL3; using CommandStreamReceiverHw::programVFEState; using CommandStreamReceiver::commandStream; using CommandStreamReceiver::dispatchMode; using CommandStreamReceiver::isPreambleSent; using CommandStreamReceiver::lastSentCoherencyRequest; using CommandStreamReceiver::mediaVfeStateDirty; using CommandStreamReceiver::nTo1SubmissionModelEnabled; using CommandStreamReceiver::pageTableManagerInitialized; using CommandStreamReceiver::requiredScratchSize; using CommandStreamReceiver::requiredThreadArbitrationPolicy; using CommandStreamReceiver::taskCount; using CommandStreamReceiver::taskLevel; using CommandStreamReceiver::timestampPacketWriteEnabled; MockCsrHw2(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) : CommandStreamReceiverHw::CommandStreamReceiverHw(executionEnvironment, rootDeviceIndex) {} SubmissionAggregator *peekSubmissionAggregator() { return this->submissionAggregator.get(); } void overrideSubmissionAggregator(SubmissionAggregator *newSubmissionsAggregator) { this->submissionAggregator.reset(newSubmissionsAggregator); } uint64_t peekTotalMemoryUsed() { return this->totalMemoryUsed; } bool peekMediaVfeStateDirty() const { return mediaVfeStateDirty; } bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override { flushCalledCount++; recordedCommandBuffer->batchBuffer = batchBuffer; copyOfAllocations = allocationsForResidency; flushStamp->setStamp(flushStamp->peekStamp() + 1); return true; } CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap &dsh, const IndirectHeap &ioh, const IndirectHeap &ssh, uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override { passedDispatchFlags = dispatchFlags; recordedCommandBuffer = std::unique_ptr(new CommandBuffer(device)); auto completionStamp = CommandStreamReceiverHw::flushTask(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); if (storeFlushedTaskStream && commandStream.getUsed() > commandStreamStart) { storedTaskStreamSize = commandStream.getUsed() - commandStreamStart; // Overfetch to allow command parser verify if "big" command is programmed at the end of allocation auto overfetchedSize = storedTaskStreamSize + MemoryConstants::cacheLineSize; storedTaskStream.reset(new uint8_t[overfetchedSize]); memset(storedTaskStream.get(), 0, overfetchedSize); memcpy_s(storedTaskStream.get(), storedTaskStreamSize, ptrOffset(commandStream.getCpuBase(), commandStreamStart), storedTaskStreamSize); } return completionStamp; } uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking) override { if (!skipBlitCalls) { return CommandStreamReceiverHw::blitBuffer(blitPropertiesContainer, blocking); } return taskCount; } bool skipBlitCalls = false; bool storeFlushedTaskStream = false; std::unique_ptr storedTaskStream; size_t storedTaskStreamSize = 0; int flushCalledCount = 0; std::unique_ptr recordedCommandBuffer = nullptr; ResidencyContainer copyOfAllocations; DispatchFlags passedDispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); }; template class MockFlatBatchBufferHelper : public FlatBatchBufferHelperHw { public: using FlatBatchBufferHelperHw::FlatBatchBufferHelperHw; MOCK_METHOD1(setPatchInfoData, bool(const PatchInfoData &)); MOCK_METHOD1(removePatchInfoData, bool(uint64_t)); MOCK_METHOD1(registerCommandChunk, bool(CommandChunk &)); MOCK_METHOD2(registerBatchBufferStartAddress, bool(uint64_t, uint64_t)); MOCK_METHOD4(flattenBatchBuffer, GraphicsAllocation *(uint32_t rootDeviceIndex, BatchBuffer &batchBuffer, size_t &sizeBatchBuffer, DispatchMode dispatchMode)); }; class MockCommandStreamReceiver : public CommandStreamReceiver { public: using CommandStreamReceiver::CommandStreamReceiver; using CommandStreamReceiver::globalFenceAllocation; using CommandStreamReceiver::internalAllocationStorage; using CommandStreamReceiver::latestFlushedTaskCount; using CommandStreamReceiver::latestSentTaskCount; using CommandStreamReceiver::requiredThreadArbitrationPolicy; using CommandStreamReceiver::tagAddress; std::vector instructionHeapReserveredData; int *flushBatchedSubmissionsCallCounter = nullptr; uint32_t waitForCompletionWithTimeoutCalled = 0; bool multiOsContextCapable = false; bool downloadAllocationCalled = false; bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) override { waitForCompletionWithTimeoutCalled++; return true; } bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override; bool isMultiOsContextCapable() const override { return multiOsContextCapable; } CompletionStamp flushTask( LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap &dsh, const IndirectHeap &ioh, const IndirectHeap &ssh, uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override; bool flushBatchedSubmissions() override { if (flushBatchedSubmissionsCallCounter) { (*flushBatchedSubmissionsCallCounter)++; } return true; } void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) override { } void downloadAllocation(GraphicsAllocation &gfxAllocation) override { downloadAllocationCalled = true; } uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking) override { return taskCount; }; CommandStreamReceiverType getType() override { return CommandStreamReceiverType::CSR_HW; } }; #if defined(__clang__) #pragma clang diagnostic pop #endif compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_d3d_objects.h000066400000000000000000000071411363734646600263160ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/d3d/d3d_sharing.h" #include "gmock/gmock.h" using ::testing::_; using ::testing::NiceMock; using ::testing::SetArgPointee; namespace NEO { template class MockD3DSharingFunctions : public D3DSharingFunctions { typedef typename D3D::D3DDevice D3DDevice; typedef typename D3D::D3DQuery D3DQuery; typedef typename D3D::D3DQueryDesc D3DQueryDesc; typedef typename D3D::D3DResource D3DResource; typedef typename D3D::D3DBufferDesc D3DBufferDesc; typedef typename D3D::D3DBufferObj D3DBufferObj; typedef typename D3D::D3DTexture2dDesc D3DTexture2dDesc; typedef typename D3D::D3DTexture3dDesc D3DTexture3dDesc; typedef typename D3D::D3DTexture2d D3DTexture2d; typedef typename D3D::D3DTexture3d D3DTexture3d; public: MockD3DSharingFunctions() : D3DSharingFunctions((D3DDevice *)1) { memset(&mockDxgiDesc, 0, sizeof(DXGI_ADAPTER_DESC)); mockDxgiDesc.VendorId = INTEL_VENDOR_ID; getDxgiDescFcn = &this->mockGetDxgiDesc; getDxgiDescCalled = 0; getDxgiDescAdapterRequested = nullptr; } MOCK_METHOD1_T(createQuery, void(D3DQuery **query)); MOCK_METHOD2_T(createBuffer, void(D3DBufferObj **buffer, unsigned int width)); MOCK_METHOD3_T(createTexture2d, void(D3DTexture2d **texture, D3DTexture2dDesc *desc, cl_uint subresource)); MOCK_METHOD3_T(createTexture3d, void(D3DTexture3d **texture, D3DTexture3dDesc *desc, cl_uint subresource)); MOCK_METHOD2_T(getBufferDesc, void(D3DBufferDesc *bufferDesc, D3DBufferObj *buffer)); MOCK_METHOD2_T(getTexture2dDesc, void(D3DTexture2dDesc *textureDesc, D3DTexture2d *texture)); MOCK_METHOD2_T(getTexture3dDesc, void(D3DTexture3dDesc *textureDesc, D3DTexture3d *texture)); MOCK_METHOD2_T(getSharedHandle, void(D3DResource *resource, void **handle)); MOCK_METHOD2_T(getSharedNTHandle, void(D3DResource *resource, void **handle)); MOCK_METHOD1_T(addRef, void(D3DResource *resource)); MOCK_METHOD1_T(release, void(IUnknown *resource)); MOCK_METHOD1_T(getDeviceContext, void(D3DQuery *query)); MOCK_METHOD1_T(releaseDeviceContext, void(D3DQuery *query)); MOCK_METHOD4_T(copySubresourceRegion, void(D3DResource *dst, cl_uint dstSubresource, D3DResource *src, cl_uint srcSubresource)); MOCK_METHOD1_T(flushAndWait, void(D3DQuery *query)); MOCK_METHOD3_T(lockRect, void(D3DTexture2d *d3dResource, D3DLOCKED_RECT *lockedRect, uint32_t flags)); MOCK_METHOD1_T(unlockRect, void(D3DTexture2d *d3dResource)); MOCK_METHOD2_T(getRenderTargetData, void(D3DTexture2d *renderTarget, D3DTexture2d *dstSurface)); MOCK_METHOD2_T(updateSurface, void(D3DTexture2d *src, D3DTexture2d *dst)); MOCK_METHOD1_T(updateDevice, void(D3DResource *resource)); MOCK_METHOD2_T(checkFormatSupport, void(DXGI_FORMAT format, UINT *pFormat)); MOCK_METHOD2_T(memObjectFormatSupport, bool(cl_mem_object_type object, UINT format)); std::vector> *getTrackedResourcesVector() { return &this->trackedResources; } D3DBufferDesc mockBufferDesc = {}; D3DTexture2dDesc mockTexture2dDesc = {}; D3DTexture3dDesc mockTexture3dDesc = {}; static DXGI_ADAPTER_DESC mockDxgiDesc; static IDXGIAdapter *getDxgiDescAdapterRequested; static uint32_t getDxgiDescCalled; static void mockGetDxgiDesc(DXGI_ADAPTER_DESC *dxgiDesc, IDXGIAdapter *adapter, D3DDevice *device) { getDxgiDescCalled++; getDxgiDescAdapterRequested = adapter; *dxgiDesc = mockDxgiDesc; } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_deferrable_deletion.cpp000066400000000000000000000005421363734646600304420ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_deferrable_deletion.h" namespace NEO { bool MockDeferrableDeletion::apply() { applyCalled++; return true; } MockDeferrableDeletion::~MockDeferrableDeletion() { EXPECT_EQ(1, applyCalled); } } // namespace NEOcompute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_deferrable_deletion.h000066400000000000000000000006111363734646600301040ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/deferrable_deletion.h" #include "gtest/gtest.h" namespace NEO { class MockDeferrableDeletion : public DeferrableDeletion { public: bool apply() override; ~MockDeferrableDeletion() override; int applyCalled = 0; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_deferred_deleter.cpp000066400000000000000000000054761363734646600277630ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_deferred_deleter.h" #include "shared/source/memory_manager/deferrable_deletion.h" #include "shared/source/os_interface/os_thread.h" #include "gtest/gtest.h" namespace NEO { MockDeferredDeleter::MockDeferredDeleter() { shouldStopCalled = 0; clearCalled = 0; } void MockDeferredDeleter::deferDeletion(DeferrableDeletion *deletion) { deferDeletionCalled++; deletion->apply(); delete deletion; } void MockDeferredDeleter::addClient() { ++numClients; } void MockDeferredDeleter::removeClient() { --numClients; } void MockDeferredDeleter::drain(bool blocking) { if (expectDrainCalled) { EXPECT_EQ(expectedDrainValue, blocking); } DeferredDeleter::drain(blocking); drainCalled++; } void MockDeferredDeleter::drain() { return drain(true); } bool MockDeferredDeleter::areElementsReleased() { areElementsReleasedCalled++; return areElementsReleasedCalled != 1; } bool MockDeferredDeleter::shouldStop() { shouldStopCalled++; return shouldStopCalled > 1; } void MockDeferredDeleter::clearQueue() { DeferredDeleter::clearQueue(); clearCalled++; } int MockDeferredDeleter::getClientsNum() { return numClients; } int MockDeferredDeleter::getElementsToRelease() { return elementsToRelease; } bool MockDeferredDeleter::isWorking() { return doWorkInBackground; } bool MockDeferredDeleter::isThreadRunning() { return worker != nullptr; } bool MockDeferredDeleter::isQueueEmpty() { std::lock_guard lock(queueMutex); return queue.peekIsEmpty(); } void MockDeferredDeleter::setElementsToRelease(int elementsNum) { elementsToRelease = elementsNum; } void MockDeferredDeleter::setDoWorkInBackgroundValue(bool value) { doWorkInBackground = value; } bool MockDeferredDeleter::baseAreElementsReleased() { return DeferredDeleter::areElementsReleased(); } bool MockDeferredDeleter::baseShouldStop() { return DeferredDeleter::shouldStop(); } Thread *MockDeferredDeleter::getThreadHandle() { return worker.get(); } std::unique_ptr createDeferredDeleter() { return std::unique_ptr(new MockDeferredDeleter()); } void MockDeferredDeleter::runThread() { worker = Thread::create(run, reinterpret_cast(this)); } void MockDeferredDeleter::forceStop() { allowEarlyStopThread(); stop(); } void MockDeferredDeleter::allowEarlyStopThread() { shouldStopCalled = 2; } MockDeferredDeleter::~MockDeferredDeleter() { allowEarlyStopThread(); if (expectDrainCalled) { EXPECT_NE(0, drainCalled); } } void MockDeferredDeleter::expectDrainBlockingValue(bool value) { expectedDrainValue = value; expectDrainCalled = true; } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_deferred_deleter.h000066400000000000000000000024741363734646600274230ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/deferred_deleter.h" namespace NEO { class MockDeferredDeleter : public DeferredDeleter { public: MockDeferredDeleter(); ~MockDeferredDeleter() override; void deferDeletion(DeferrableDeletion *deletion) override; void addClient() override; void removeClient() override; void drain(bool blocking) override; bool areElementsReleased() override; bool shouldStop() override; void drain(); int getClientsNum(); int getElementsToRelease(); bool isWorking(); bool isThreadRunning(); bool isQueueEmpty(); void setElementsToRelease(int elementsNum); void setDoWorkInBackgroundValue(bool value); bool baseAreElementsReleased(); bool baseShouldStop(); Thread *getThreadHandle(); void runThread(); int drainCalled = 0; int areElementsReleasedCalled = 0; std::atomic shouldStopCalled; std::atomic clearCalled; int deferDeletionCalled = 0; void forceStop(); void allowEarlyStopThread(); void expectDrainBlockingValue(bool value); bool expectedDrainValue = false; bool expectDrainCalled = false; void clearQueue() override; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_device.cpp000066400000000000000000000106451363734646600257300ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_device.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/unit_test/tests_configuration.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_ostime.h" using namespace NEO; bool MockDevice::createSingleDevice = true; decltype(&createCommandStream) MockSubDevice::createCommandStreamReceiverFunc = createCommandStream; decltype(&createCommandStream) MockDevice::createCommandStreamReceiverFunc = createCommandStream; MockDevice::MockDevice() : MockDevice(new MockExecutionEnvironment(), 0u) { CommandStreamReceiver *commandStreamReceiver = createCommandStream(*this->executionEnvironment, this->getRootDeviceIndex()); commandStreamReceivers.resize(1); commandStreamReceivers[0].reset(commandStreamReceiver); this->executionEnvironment->memoryManager = std::move(this->mockMemoryManager); this->engines.resize(1); this->engines[0] = {commandStreamReceiver, nullptr}; initializeCaps(); } const char *MockDevice::getProductAbbrev() const { return hardwarePrefix[getHardwareInfo().platform.eProductFamily]; } MockDevice::MockDevice(ExecutionEnvironment *executionEnvironment, uint32_t rootDeviceIndex) : RootDevice(executionEnvironment, rootDeviceIndex) { auto &hwInfo = getHardwareInfo(); bool enableLocalMemory = HwHelper::get(hwInfo.platform.eRenderCoreFamily).getEnableLocalMemory(hwInfo); bool aubUsage = (testMode == TestMode::AubTests) || (testMode == TestMode::AubTestsWithTbx); this->mockMemoryManager.reset(new MemoryManagerCreate(false, enableLocalMemory, aubUsage, *executionEnvironment)); this->osTime = MockOSTime::create(); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->setHwInfo(&hwInfo); executionEnvironment->initializeMemoryManager(); initializeCaps(); preemptionMode = PreemptionHelper::getDefaultPreemptionMode(hwInfo); } bool MockDevice::createDeviceImpl() { if (MockDevice::createSingleDevice) { return Device::createDeviceImpl(); } return RootDevice::createDeviceImpl(); } void MockDevice::setOSTime(OSTime *osTime) { this->osTime.reset(osTime); }; void MockDevice::injectMemoryManager(MemoryManager *memoryManager) { executionEnvironment->memoryManager.reset(memoryManager); } void MockDevice::resetCommandStreamReceiver(CommandStreamReceiver *newCsr) { resetCommandStreamReceiver(newCsr, defaultEngineIndex); } void MockDevice::resetCommandStreamReceiver(CommandStreamReceiver *newCsr, uint32_t engineIndex) { auto osContext = this->engines[engineIndex].osContext; auto memoryManager = executionEnvironment->memoryManager.get(); auto registeredEngine = *memoryManager->getRegisteredEngineForCsr(engines[engineIndex].commandStreamReceiver); registeredEngine.commandStreamReceiver = newCsr; engines[engineIndex].commandStreamReceiver = newCsr; memoryManager->getRegisteredEngines().emplace_back(registeredEngine); osContext->incRefInternal(); newCsr->setupContext(*osContext); commandStreamReceivers[engineIndex].reset(newCsr); commandStreamReceivers[engineIndex]->initializeTagAllocation(); commandStreamReceivers[engineIndex]->createGlobalFenceAllocation(); if (preemptionMode == PreemptionMode::MidThread || isDebuggerActive()) { commandStreamReceivers[engineIndex]->createPreemptionAllocation(); } } MockAlignedMallocManagerDevice::MockAlignedMallocManagerDevice(ExecutionEnvironment *executionEnvironment, uint32_t internalDeviceIndex) : MockDevice(executionEnvironment, internalDeviceIndex) { this->mockMemoryManager.reset(new MockAllocSysMemAgnosticMemoryManager(*executionEnvironment)); } FailDevice::FailDevice(ExecutionEnvironment *executionEnvironment, uint32_t deviceIndex) : MockDevice(executionEnvironment, deviceIndex) { this->mockMemoryManager.reset(new FailMemoryManager(*executionEnvironment)); } FailDeviceAfterOne::FailDeviceAfterOne(ExecutionEnvironment *executionEnvironment, uint32_t deviceIndex) : MockDevice(executionEnvironment, deviceIndex) { this->mockMemoryManager.reset(new FailMemoryManager(1, *executionEnvironment)); } compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_device.h000066400000000000000000000152161363734646600253740ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/root_device.h" #include "shared/source/device/sub_device.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/test/unit_test/fixtures/mock_aub_center_fixture.h" #include "opencl/test/unit_test/helpers/variable_backup.h" namespace NEO { class CommandStreamReceiver; class DriverInfo; class OSTime; template class UltCommandStreamReceiver; extern CommandStreamReceiver *createCommandStream(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex); struct MockSubDevice : public SubDevice { using SubDevice::SubDevice; std::unique_ptr createCommandStreamReceiver() const override { return std::unique_ptr(createCommandStreamReceiverFunc(*executionEnvironment, getRootDeviceIndex())); } static decltype(&createCommandStream) createCommandStreamReceiverFunc; }; class MockDevice : public RootDevice { public: using Device::commandStreamReceivers; using Device::createDeviceInternals; using Device::createEngine; using Device::deviceInfo; using Device::engines; using Device::executionEnvironment; using Device::initializeCaps; using RootDevice::createEngines; using RootDevice::subdevices; void setOSTime(OSTime *osTime); void setDriverInfo(DriverInfo *driverInfo); static bool createSingleDevice; bool createDeviceImpl() override; bool getCpuTime(uint64_t *timeStamp) { return true; }; MockDevice(); MockDevice(ExecutionEnvironment *executionEnvironment, uint32_t rootDeviceIndex); void setPreemptionMode(PreemptionMode mode) { preemptionMode = mode; } void injectMemoryManager(MemoryManager *); void setPerfCounters(PerformanceCounters *perfCounters) { if (perfCounters) { performanceCounters = std::unique_ptr(perfCounters); } else { performanceCounters.release(); } } const char *getProductAbbrev() const; template UltCommandStreamReceiver &getUltCommandStreamReceiver() { return reinterpret_cast &>(*engines[defaultEngineIndex].commandStreamReceiver); } template UltCommandStreamReceiver &getUltCommandStreamReceiverFromIndex(uint32_t index) { return reinterpret_cast &>(*engines[index].commandStreamReceiver); } CommandStreamReceiver &getGpgpuCommandStreamReceiver() const { return *engines[defaultEngineIndex].commandStreamReceiver; } void resetCommandStreamReceiver(CommandStreamReceiver *newCsr); void resetCommandStreamReceiver(CommandStreamReceiver *newCsr, uint32_t engineIndex); void setDebuggerActive(bool active) { this->deviceInfo.debuggerActive = active; } template static T *createWithExecutionEnvironment(const HardwareInfo *pHwInfo, ExecutionEnvironment *executionEnvironment, uint32_t rootDeviceIndex) { pHwInfo = pHwInfo ? pHwInfo : defaultHwInfo.get(); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->setHwInfo(pHwInfo); T *device = new T(executionEnvironment, rootDeviceIndex); executionEnvironment->memoryManager = std::move(device->mockMemoryManager); return createDeviceInternals(device); } template static T *createWithNewExecutionEnvironment(const HardwareInfo *pHwInfo, uint32_t rootDeviceIndex = 0) { ExecutionEnvironment *executionEnvironment = new ExecutionEnvironment(); auto numRootDevices = DebugManager.flags.CreateMultipleRootDevices.get() ? DebugManager.flags.CreateMultipleRootDevices.get() : 1u; executionEnvironment->prepareRootDeviceEnvironments(numRootDevices); pHwInfo = pHwInfo ? pHwInfo : defaultHwInfo.get(); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(pHwInfo); } return createWithExecutionEnvironment(pHwInfo, executionEnvironment, rootDeviceIndex); } SubDevice *createSubDevice(uint32_t subDeviceIndex) override { return Device::create(executionEnvironment, subDeviceIndex, *this); } std::unique_ptr createCommandStreamReceiver() const override { return std::unique_ptr(createCommandStreamReceiverFunc(*executionEnvironment, getRootDeviceIndex())); } static decltype(&createCommandStream) createCommandStreamReceiverFunc; std::unique_ptr mockMemoryManager; }; template <> inline Device *MockDevice::createWithNewExecutionEnvironment(const HardwareInfo *pHwInfo, uint32_t rootDeviceIndex) { auto executionEnvironment = new ExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1); MockAubCenterFixture::setMockAubCenter(*executionEnvironment->rootDeviceEnvironments[0]); auto hwInfo = pHwInfo ? pHwInfo : defaultHwInfo.get(); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(hwInfo); executionEnvironment->initializeMemoryManager(); return Device::create(executionEnvironment, 0u); } class FailDevice : public MockDevice { public: FailDevice(ExecutionEnvironment *executionEnvironment, uint32_t deviceIndex); }; class FailDeviceAfterOne : public MockDevice { public: FailDeviceAfterOne(ExecutionEnvironment *executionEnvironment, uint32_t deviceIndex); }; class MockAlignedMallocManagerDevice : public MockDevice { public: MockAlignedMallocManagerDevice(ExecutionEnvironment *executionEnvironment, uint32_t deviceIndex); }; struct EnvironmentWithCsrWrapper { template void setCsrType() { createSubDeviceCsrFuncBackup = EnvironmentWithCsrWrapper::createCommandStreamReceiver; createRootDeviceCsrFuncBackup = EnvironmentWithCsrWrapper::createCommandStreamReceiver; } template static CommandStreamReceiver *createCommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) { return new CsrType(executionEnvironment, 0); } VariableBackup createSubDeviceCsrFuncBackup{&MockSubDevice::createCommandStreamReceiverFunc}; VariableBackup createRootDeviceCsrFuncBackup{&MockDevice::createCommandStreamReceiverFunc}; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_device_queue.h000066400000000000000000000155601363734646600266020ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/device_queue/device_queue.h" #include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/source/helpers/hardware_commands_helper.h" namespace NEO { template class MockDeviceQueueHw : public DeviceQueueHw { using BaseClass = DeviceQueueHw; using MI_ATOMIC = typename GfxFamily::MI_ATOMIC; using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; using MI_ARB_CHECK = typename GfxFamily::MI_ARB_CHECK; using MEDIA_STATE_FLUSH = typename GfxFamily::MEDIA_STATE_FLUSH; using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename GfxFamily::MEDIA_INTERFACE_DESCRIPTOR_LOAD; using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER; using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; public: using BaseClass::addArbCheckCmdWa; using BaseClass::addLriCmd; using BaseClass::addLriCmdWa; using BaseClass::addMediaStateClearCmds; using BaseClass::addMiAtomicCmdWa; using BaseClass::addPipeControlCmdWa; using BaseClass::addProfilingEndCmds; using BaseClass::buildSlbDummyCommands; using BaseClass::getBlockKernelStartPointer; using BaseClass::getCSPrefetchSize; using BaseClass::getExecutionModelCleanupSectionSize; using BaseClass::getMediaStateClearCmdsSize; using BaseClass::getMinimumSlbSize; using BaseClass::getProfilingEndCmdsSize; using BaseClass::getSlbCS; using BaseClass::getWaCommandsSize; using BaseClass::offsetDsh; bool arbCheckWa; bool miAtomicWa; bool lriWa; bool pipeControlWa; struct ExpectedCmds { MEDIA_STATE_FLUSH mediaStateFlush; MI_ARB_CHECK arbCheck; MI_ATOMIC miAtomic; MEDIA_INTERFACE_DESCRIPTOR_LOAD mediaIdLoad; MI_LOAD_REGISTER_IMM lriTrue; MI_LOAD_REGISTER_IMM lriFalse; PIPE_CONTROL pipeControl; PIPE_CONTROL noopedPipeControl; GPGPU_WALKER gpgpuWalker; uint8_t *prefetch; MI_BATCH_BUFFER_START bbStart; } expectedCmds; MockDeviceQueueHw(Context *context, ClDevice *device, cl_queue_properties &properties) : BaseClass(context, device, properties) { auto slb = this->getSlbBuffer(); LinearStream *slbCS = getSlbCS(); slbCS->replaceBuffer(slb->getUnderlyingBuffer(), slb->getUnderlyingBufferSize()); size_t size = slbCS->getUsed(); lriWa = false; addLriCmdWa(true); if (slbCS->getUsed() > size) { size = slbCS->getUsed(); lriWa = true; } pipeControlWa = false; addPipeControlCmdWa(); if (slbCS->getUsed() > size) { size = slbCS->getUsed(); pipeControlWa = true; } arbCheckWa = false; addArbCheckCmdWa(); if (slbCS->getUsed() > size) { size = slbCS->getUsed(); arbCheckWa = true; } miAtomicWa = false; addMiAtomicCmdWa(0); if (slbCS->getUsed() > size) { size = slbCS->getUsed(); miAtomicWa = true; } slbCS->replaceBuffer(slb->getUnderlyingBuffer(), slb->getUnderlyingBufferSize()); // reset setupExpectedCmds(); }; ~MockDeviceQueueHw() override { if (expectedCmds.prefetch) delete expectedCmds.prefetch; } MI_ATOMIC getExpectedMiAtomicCmd() { auto igilCmdQueue = reinterpret_cast(this->queueBuffer->getUnderlyingBuffer()); auto placeholder = (uint64_t)&igilCmdQueue->m_controls.m_DummyAtomicOperationPlaceholder; MI_ATOMIC miAtomic = GfxFamily::cmdInitAtomic; miAtomic.setReturnDataControl(0x1); miAtomic.setCsStall(0x1); HardwareCommandsHelper::programMiAtomic(miAtomic, placeholder, MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_8B_INCREMENT, MI_ATOMIC::DATA_SIZE::DATA_SIZE_QWORD); return miAtomic; }; MI_LOAD_REGISTER_IMM getExpectedLriCmd(bool arbCheck) { MI_LOAD_REGISTER_IMM lri = GfxFamily::cmdInitLoadRegisterImm; lri.setRegisterOffset(0x2248); // CTXT_PREMP_DBG offset if (arbCheck) lri.setDataDword(0x00000100); // set only bit 8 (Preempt On MI_ARB_CHK Only) else lri.setDataDword(0x0); // default value return lri; } PIPE_CONTROL getExpectedPipeControlCmd() { PIPE_CONTROL pc; this->initPipeControl(&pc); return pc; } MI_ARB_CHECK getExpectedArbCheckCmd() { return GfxFamily::cmdInitArbCheck; } void setupExpectedCmds() { expectedCmds.mediaStateFlush = GfxFamily::cmdInitMediaStateFlush; expectedCmds.arbCheck = getExpectedArbCheckCmd(); expectedCmds.miAtomic = getExpectedMiAtomicCmd(); expectedCmds.mediaIdLoad = GfxFamily::cmdInitMediaInterfaceDescriptorLoad; expectedCmds.mediaIdLoad.setInterfaceDescriptorTotalLength(2048); auto dataStartAddress = DeviceQueue::colorCalcStateSize; // add shift to second table ( 62 index of first ID table with scheduler ) dataStartAddress += sizeof(INTERFACE_DESCRIPTOR_DATA) * DeviceQueue::schedulerIDIndex; expectedCmds.mediaIdLoad.setInterfaceDescriptorDataStartAddress(dataStartAddress); expectedCmds.lriTrue = getExpectedLriCmd(true); expectedCmds.lriFalse = getExpectedLriCmd(false); expectedCmds.pipeControl = getExpectedPipeControlCmd(); memset(&expectedCmds.noopedPipeControl, 0x0, sizeof(PIPE_CONTROL)); expectedCmds.gpgpuWalker = GfxFamily::cmdInitGpgpuWalker; expectedCmds.gpgpuWalker.setSimdSize(GPGPU_WALKER::SIMD_SIZE::SIMD_SIZE_SIMD16); expectedCmds.gpgpuWalker.setThreadGroupIdXDimension(1); expectedCmds.gpgpuWalker.setThreadGroupIdYDimension(1); expectedCmds.gpgpuWalker.setThreadGroupIdZDimension(1); expectedCmds.gpgpuWalker.setRightExecutionMask(0xFFFFFFFF); expectedCmds.gpgpuWalker.setBottomExecutionMask(0xFFFFFFFF); expectedCmds.prefetch = new uint8_t[DeviceQueueHw::getCSPrefetchSize()]; memset(expectedCmds.prefetch, 0x0, DeviceQueueHw::getCSPrefetchSize()); expectedCmds.bbStart = GfxFamily::cmdInitBatchBufferStart; auto slbPtr = reinterpret_cast(this->getSlbBuffer()->getUnderlyingBuffer()); expectedCmds.bbStart.setBatchBufferStartAddressGraphicsaddress472(slbPtr); } IGIL_CommandQueue *getIgilQueue() { auto igilCmdQueue = reinterpret_cast(DeviceQueue::queueBuffer->getUnderlyingBuffer()); return igilCmdQueue; } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_event.h000066400000000000000000000032451363734646600252550ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/event/event_builder.h" #include "opencl/source/event/user_event.h" namespace NEO { #define FORWARD_CONSTRUCTOR(THIS_CLASS, BASE_CLASS) \ template \ THIS_CLASS(ArgsT &&... args) : BASE_CLASS(std::forward(args)...) { \ } #define FORWARD_FUNC(FUNC_NAME, BASE_CLASS) \ template \ void FUNC_NAME(ArgsT &&... args) { \ BASE_CLASS::FUNC_NAME(std::forward(args)...); \ } template struct MockEvent : public BaseEventType { FORWARD_CONSTRUCTOR(MockEvent, BaseEventType); // make some protected members public : FORWARD_FUNC(submitCommand, BaseEventType); using BaseEventType::timeStampNode; using Event::calcProfilingData; using Event::magic; using Event::queueTimeStamp; using Event::submitTimeStamp; using Event::timestampPacketContainer; }; #undef FORWARD_CONSTRUCTOR #undef FORWARD_FUNC struct MockEventBuilder : EventBuilder { MockEventBuilder() = default; MockEventBuilder(Event *ev) { setEvent(ev); } void setEvent(Event *ev) { this->event = ev; } template static EventType *createAndFinalize(ArgsT &&... args) { MockEventBuilder mb; mb.create(std::forward(args)...); return static_cast(mb.finalizeAndRelease()); } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_execution_environment.h000066400000000000000000000045531363734646600305660ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/hw_helper.h" #include "opencl/test/unit_test/fixtures/mock_aub_center_fixture.h" namespace NEO { struct MockRootDeviceEnvironment : public RootDeviceEnvironment { using RootDeviceEnvironment::RootDeviceEnvironment; ~MockRootDeviceEnvironment() override = default; void initAubCenter(bool localMemoryEnabled, const std::string &aubFileName, CommandStreamReceiverType csrType) override { if (!initAubCenterCalled) { initAubCenterCalled = true; localMemoryEnabledReceived = localMemoryEnabled; aubFileNameReceived = aubFileName; } if (useMockAubCenter) { MockAubCenterFixture::setMockAubCenter(*this); } RootDeviceEnvironment::initAubCenter(localMemoryEnabled, aubFileName, csrType); } bool initAubCenterCalled = false; bool localMemoryEnabledReceived = false; std::string aubFileNameReceived = ""; bool useMockAubCenter = true; }; struct MockExecutionEnvironment : ExecutionEnvironment { ~MockExecutionEnvironment() override = default; MockExecutionEnvironment() : MockExecutionEnvironment(defaultHwInfo.get()) {} MockExecutionEnvironment(const HardwareInfo *hwInfo) : MockExecutionEnvironment(hwInfo, true, 1u) { } MockExecutionEnvironment(const HardwareInfo *hwInfo, bool useMockAubCenter, uint32_t numRootDevices) { prepareRootDeviceEnvironments(numRootDevices); for (auto rootDeviceIndex = 0u; rootDeviceIndex < numRootDevices; rootDeviceIndex++) { auto rootDeviceEnvironment = new MockRootDeviceEnvironment(*this); rootDeviceEnvironment->useMockAubCenter = useMockAubCenter; rootDeviceEnvironments[rootDeviceIndex].reset(rootDeviceEnvironment); if (hwInfo) { rootDeviceEnvironments[rootDeviceIndex]->setHwInfo(hwInfo); } else { rootDeviceEnvironments[rootDeviceIndex]->setHwInfo(defaultHwInfo.get()); } } calculateMaxOsContextCount(); } void initGmm() { for (auto &rootDeviceEnvironment : rootDeviceEnvironments) { rootDeviceEnvironment->initGmm(); } } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_experimental_command_buffer.h000066400000000000000000000012641363734646600316570ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/experimental_command_buffer.h" namespace NEO { class MockExperimentalCommandBuffer : public ExperimentalCommandBuffer { using BaseClass = ExperimentalCommandBuffer; public: using BaseClass::currentStream; using BaseClass::experimentalAllocation; using BaseClass::experimentalAllocationOffset; using BaseClass::timestamps; using BaseClass::timestampsOffset; MockExperimentalCommandBuffer(CommandStreamReceiver *csr) : ExperimentalCommandBuffer(csr, 80.0) { defaultPrint = false; } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_gfx_partition.cpp000066400000000000000000000013021363734646600273340ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_gfx_partition.h" using namespace NEO; std::array(HeapIndex::TOTAL_HEAPS)> MockGfxPartition::allHeapNames{{HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY, HeapIndex::HEAP_INTERNAL, HeapIndex::HEAP_EXTERNAL_DEVICE_MEMORY, HeapIndex::HEAP_EXTERNAL, HeapIndex::HEAP_STANDARD, HeapIndex::HEAP_STANDARD64KB, HeapIndex::HEAP_SVM}}; compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_gfx_partition.h000066400000000000000000000015201363734646600270030ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/gfx_partition.h" #include "gmock/gmock.h" using namespace NEO; class MockGfxPartition : public GfxPartition { public: using GfxPartition::osMemory; uint64_t getHeapSize(HeapIndex heapIndex) { return getHeap(heapIndex).getSize(); } bool heapInitialized(HeapIndex heapIndex) { return getHeapSize(heapIndex) > 0; } void *getReservedCpuAddressRange() { return reservedCpuAddressRange; } size_t getReservedCpuAddressRangeSize() { return reservedCpuAddressRangeSize; } MOCK_METHOD2(freeGpuAddressRange, void(uint64_t gpuAddress, size_t size)); static std::array(HeapIndex::TOTAL_HEAPS)> allHeapNames; }; compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_gmm.h000066400000000000000000000041171363734646600247130ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/gmm_helper/gmm.h" #include "shared/source/memory_manager/memory_manager.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_gmm_resource_info.h" #include "opencl/test/unit_test/mocks/mock_platform.h" namespace NEO { namespace MockGmmParams { static ClSurfaceFormatInfo mockSurfaceFormat; } class MockGmm : public Gmm { public: using Gmm::Gmm; using Gmm::setupImageResourceParams; MockGmm() : Gmm(platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->getGmmClientContext(), nullptr, 1, false){}; static std::unique_ptr queryImgParams(GmmClientContext *clientContext, ImageInfo &imgInfo) { return std::unique_ptr(new Gmm(clientContext, imgInfo, {})); } static ImageInfo initImgInfo(cl_image_desc &imgDesc, int baseMipLevel, const ClSurfaceFormatInfo *surfaceFormat) { ImageInfo imgInfo = {}; imgInfo.baseMipLevel = baseMipLevel; imgInfo.imgDesc = Image::convertDescriptor(imgDesc); if (!surfaceFormat) { ArrayRef readWriteSurfaceFormats = SurfaceFormats::readWrite(); MockGmmParams::mockSurfaceFormat = readWriteSurfaceFormats[0]; // any valid format imgInfo.surfaceFormat = &MockGmmParams::mockSurfaceFormat.surfaceFormat; } else { imgInfo.surfaceFormat = &surfaceFormat->surfaceFormat; } return imgInfo; } static GraphicsAllocation *allocateImage2d(MemoryManager &memoryManager) { cl_image_desc imgDesc{}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imgDesc.image_width = 5; imgDesc.image_height = 5; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); return memoryManager.allocateGraphicsMemoryWithProperties({0, true, imgInfo, GraphicsAllocation::AllocationType::IMAGE}); } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_gmm_client_context.cpp000066400000000000000000000004631363734646600303500ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "mock_gmm_client_context.h" namespace NEO { MockGmmClientContext::MockGmmClientContext(OSInterface *osInterface, HardwareInfo *hwInfo) : MockGmmClientContextBase(osInterface, hwInfo) { } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_gmm_client_context.h000066400000000000000000000005501363734646600300120ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/mocks/mock_gmm_client_context_base.h" namespace NEO { class MockGmmClientContext : public MockGmmClientContextBase { public: MockGmmClientContext(OSInterface *osInterface, HardwareInfo *hwInfo); }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_gmm_client_context_base.cpp000066400000000000000000000036201363734646600313400ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_gmm_client_context.h" namespace NEO { MEMORY_OBJECT_CONTROL_STATE MockGmmClientContextBase::cachePolicyGetMemoryObject(GMM_RESOURCE_INFO *pResInfo, GMM_RESOURCE_USAGE_TYPE usage) { MEMORY_OBJECT_CONTROL_STATE retVal = {}; memset(&retVal, 0, sizeof(MEMORY_OBJECT_CONTROL_STATE)); switch (usage) { case GMM_RESOURCE_USAGE_OCL_INLINE_CONST_HDC: retVal.DwordValue = 32u; break; case GMM_RESOURCE_USAGE_OCL_BUFFER: retVal.DwordValue = 16u; break; case GMM_RESOURCE_USAGE_OCL_BUFFER_CONST: retVal.DwordValue = 8u; break; case GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED: retVal.DwordValue = 0u; break; case GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER: retVal.DwordValue = 2u; break; default: retVal.DwordValue = 4u; break; } return retVal; } GMM_RESOURCE_INFO *MockGmmClientContextBase::createResInfoObject(GMM_RESCREATE_PARAMS *pCreateParams) { return reinterpret_cast(new char[1]); } GMM_RESOURCE_INFO *MockGmmClientContextBase::copyResInfoObject(GMM_RESOURCE_INFO *pSrcRes) { return reinterpret_cast(new char[1]); } void MockGmmClientContextBase::destroyResInfoObject(GMM_RESOURCE_INFO *pResInfo) { delete[] reinterpret_cast(pResInfo); } uint8_t MockGmmClientContextBase::getSurfaceStateCompressionFormat(GMM_RESOURCE_FORMAT format) { capturedFormat = format; getSurfaceStateCompressionFormatCalled++; return compressionFormatToReturn; } uint8_t MockGmmClientContextBase::getMediaSurfaceStateCompressionFormat(GMM_RESOURCE_FORMAT format) { capturedFormat = format; getMediaSurfaceStateCompressionFormatCalled++; return compressionFormatToReturn; } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_gmm_client_context_base.h000066400000000000000000000020541363734646600310050ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "gmm_client_context.h" namespace NEO { class MockGmmClientContextBase : public GmmClientContext { public: MEMORY_OBJECT_CONTROL_STATE cachePolicyGetMemoryObject(GMM_RESOURCE_INFO *pResInfo, GMM_RESOURCE_USAGE_TYPE usage) override; GMM_RESOURCE_INFO *createResInfoObject(GMM_RESCREATE_PARAMS *pCreateParams) override; GMM_RESOURCE_INFO *copyResInfoObject(GMM_RESOURCE_INFO *pSrcRes) override; void destroyResInfoObject(GMM_RESOURCE_INFO *pResInfo) override; uint8_t getSurfaceStateCompressionFormat(GMM_RESOURCE_FORMAT format) override; uint8_t getMediaSurfaceStateCompressionFormat(GMM_RESOURCE_FORMAT format) override; GMM_RESOURCE_FORMAT capturedFormat = GMM_FORMAT_INVALID; uint8_t compressionFormatToReturn = 1; uint32_t getSurfaceStateCompressionFormatCalled = 0u; uint32_t getMediaSurfaceStateCompressionFormatCalled = 0u; protected: using GmmClientContext::GmmClientContext; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_gmm_memory_base.cpp000066400000000000000000000004211363734646600276220ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "mock_gmm_memory.h" namespace NEO { GmmMemory *GmmMemory::create(GmmClientContext *gmmClientContext) { return new MockGmmMemory(gmmClientContext); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_gmm_memory_base.h000066400000000000000000000040251363734646600272730ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/windows/windows_defs.h" #include "gmm_memory.h" #include "gmock/gmock.h" namespace NEO { class MockGmmMemoryBase : public GmmMemory { public: ~MockGmmMemoryBase() = default; MockGmmMemoryBase(GmmClientContext *gmmClientContext) : GmmMemory(gmmClientContext){}; bool configureDeviceAddressSpace(GMM_ESCAPE_HANDLE hAdapter, GMM_ESCAPE_HANDLE hDevice, GMM_ESCAPE_FUNC_TYPE pfnEscape, GMM_GFX_SIZE_T SvmSize, BOOLEAN BDWL3Coherency) override { return true; } uintptr_t getInternalGpuVaRangeLimit() override { return NEO::windowsMinAddress; } bool setDeviceInfo(GMM_DEVICE_INFO *deviceInfo) override { partition = deviceInfo->pGfxPartition; deviceCallbacks = *deviceInfo->pDeviceCb; return setDeviceInfoValue; } GMM_GFX_PARTITIONING *partition = nullptr; bool setDeviceInfoValue = true; GMM_DEVICE_CALLBACKS_INT deviceCallbacks{}; }; class GmockGmmMemoryBase : public GmmMemory { public: ~GmockGmmMemoryBase() = default; GmockGmmMemoryBase(GmmClientContext *gmmClientContext) : GmmMemory(gmmClientContext) { ON_CALL(*this, getInternalGpuVaRangeLimit()) .WillByDefault(::testing::Return(NEO::windowsMinAddress)); ON_CALL(*this, setDeviceInfo(::testing::_)) .WillByDefault(::testing::Return(true)); } MOCK_METHOD0(getInternalGpuVaRangeLimit, uintptr_t()); MOCK_METHOD1(setDeviceInfo, bool(GMM_DEVICE_INFO *)); MOCK_METHOD5(configureDeviceAddressSpace, bool(GMM_ESCAPE_HANDLE hAdapter, GMM_ESCAPE_HANDLE hDevice, GMM_ESCAPE_FUNC_TYPE pfnEscape, GMM_GFX_SIZE_T SvmSize, BOOLEAN BDWL3Coherency)); }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_gmm_page_table_mngr.cpp000066400000000000000000000014771363734646600304420ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_gmm_page_table_mngr.h" namespace NEO { using namespace ::testing; GmmPageTableMngr *GmmPageTableMngr::create(GmmClientContext *clientContext, unsigned int translationTableFlags, GMM_TRANSLATIONTABLE_CALLBACKS *translationTableCb) { auto pageTableMngr = new ::testing::NiceMock(translationTableFlags, translationTableCb); ON_CALL(*pageTableMngr, initContextAuxTableRegister(_, _)).WillByDefault(Return(GMM_SUCCESS)); ON_CALL(*pageTableMngr, updateAuxTable(_)).WillByDefault(Return(GMM_SUCCESS)); return pageTableMngr; } void MockGmmPageTableMngr::setCsrHandle(void *csrHandle) { passedCsrHandle = csrHandle; setCsrHanleCalled++; } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_gmm_page_table_mngr.h000066400000000000000000000023231363734646600300760ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/gmm_helper/page_table_mngr.h" #include "gmock/gmock.h" #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Winconsistent-missing-override" #endif namespace NEO { class MockGmmPageTableMngr : public GmmPageTableMngr { public: MockGmmPageTableMngr() = default; MockGmmPageTableMngr(unsigned int translationTableFlags, GMM_TRANSLATIONTABLE_CALLBACKS *translationTableCb) : translationTableFlags(translationTableFlags) { if (translationTableCb) { this->translationTableCb = *translationTableCb; } }; MOCK_METHOD2(initContextAuxTableRegister, GMM_STATUS(HANDLE initialBBHandle, GMM_ENGINE_TYPE engineType)); MOCK_METHOD1(updateAuxTable, GMM_STATUS(const GMM_DDI_UPDATEAUXTABLE *ddiUpdateAuxTable)); void setCsrHandle(void *csrHandle) override; uint32_t setCsrHanleCalled = 0; void *passedCsrHandle = nullptr; unsigned int translationTableFlags = 0; GMM_TRANSLATIONTABLE_CALLBACKS translationTableCb = {}; }; } // namespace NEO #if defined(__clang__) #pragma clang diagnostic pop #endif compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_gmm_resource_info.cpp000066400000000000000000000120371363734646600301700ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_gmm_resource_info.h" #include "shared/source/helpers/aligned_memory.h" #include "opencl/source/helpers/surface_formats.h" using namespace ::testing; namespace NEO { GmmResourceInfo *GmmResourceInfo::create(GmmClientContext *clientContext, GMM_RESCREATE_PARAMS *resourceCreateParams) { if (resourceCreateParams->Type == GMM_RESOURCE_TYPE::RESOURCE_INVALID) { return nullptr; } return new ::testing::NiceMock(resourceCreateParams); } GmmResourceInfo *GmmResourceInfo::create(GmmClientContext *clientContext, GMM_RESOURCE_INFO *inputGmmResourceInfo) { return new ::testing::NiceMock(inputGmmResourceInfo); } MockGmmResourceInfo::MockGmmResourceInfo(GMM_RESCREATE_PARAMS *resourceCreateParams) { mockResourceCreateParams = *resourceCreateParams; setupDefaultActions(); } MockGmmResourceInfo::MockGmmResourceInfo(GMM_RESOURCE_INFO *inputGmmResourceInfo) { mockResourceCreateParams = reinterpret_cast(inputGmmResourceInfo)->mockResourceCreateParams; setupDefaultActions(); }; // Simulate GMM behaviour. We dont want to test 3rd party lib void MockGmmResourceInfo::setupDefaultActions() { setSurfaceFormat(); computeRowPitch(); size = rowPitch; size *= static_cast(mockResourceCreateParams.BaseHeight); qPitch = alignUp((uint32_t)size, 64); size *= mockResourceCreateParams.Depth ? static_cast(mockResourceCreateParams.Depth) : 1; size *= mockResourceCreateParams.ArraySize ? static_cast(mockResourceCreateParams.ArraySize) : 1; size = alignUp(size, MemoryConstants::pageSize); } GMM_STATUS MockGmmResourceInfo::getOffset(GMM_REQ_OFFSET_INFO &reqOffsetInfo) { arrayIndexPassedToGetOffset = reqOffsetInfo.ArrayIndex; getOffsetCalled++; reqOffsetInfo.Lock.Offset = 16; reqOffsetInfo.Lock.Pitch = 2; reqOffsetInfo.Render.YOffset = 1; if (mockResourceCreateParams.Format == GMM_RESOURCE_FORMAT::GMM_FORMAT_NV12) { reqOffsetInfo.Render.XOffset = 32; reqOffsetInfo.Render.Offset = 64; } if (reqOffsetInfo.Slice == 1) { reqOffsetInfo.Render.YOffset = mockResourceCreateParams.BaseHeight; } if (reqOffsetInfo.MipLevel > 0) { reqOffsetInfo.Lock.Offset = 32; } return GMM_SUCCESS; } void MockGmmResourceInfo::computeRowPitch() { if (mockResourceCreateParams.OverridePitch) { rowPitch = mockResourceCreateParams.OverridePitch; } else { rowPitch = static_cast(mockResourceCreateParams.BaseWidth64 * (surfaceFormatInfo->PerChannelSizeInBytes * surfaceFormatInfo->NumChannels)); rowPitch = alignUp(rowPitch, 64); } } void MockGmmResourceInfo::setSurfaceFormat() { auto iterate = [&](ArrayRef formats) { if (!surfaceFormatInfo) { for (auto &format : formats) { if (mockResourceCreateParams.Format == format.surfaceFormat.GMMSurfaceFormat) { surfaceFormatInfo = &format.surfaceFormat; break; } } } }; if (mockResourceCreateParams.Format == GMM_RESOURCE_FORMAT::GMM_FORMAT_P010) { tempSurface.GMMSurfaceFormat = GMM_RESOURCE_FORMAT::GMM_FORMAT_P010; tempSurface.NumChannels = 1; tempSurface.ImageElementSizeInBytes = 16; tempSurface.PerChannelSizeInBytes = 16; surfaceFormatInfo = &tempSurface; } iterate(SurfaceFormats::readOnly12()); iterate(SurfaceFormats::readOnly20()); iterate(SurfaceFormats::writeOnly()); iterate(SurfaceFormats::readWrite()); iterate(SurfaceFormats::packedYuv()); iterate(SurfaceFormats::planarYuv()); iterate(SurfaceFormats::readOnlyDepth()); iterate(SurfaceFormats::readWriteDepth()); ASSERT_NE(nullptr, surfaceFormatInfo); } uint32_t MockGmmResourceInfo::getBitsPerPixel() { return (surfaceFormatInfo->PerChannelSizeInBytes << 3) * surfaceFormatInfo->NumChannels; } void MockGmmResourceInfo::setUnifiedAuxTranslationCapable() { mockResourceCreateParams.Flags.Gpu.CCS = 1; mockResourceCreateParams.Flags.Gpu.UnifiedAuxSurface = 1; mockResourceCreateParams.Flags.Info.RenderCompressed = 1; } void MockGmmResourceInfo::setMultisampleControlSurface() { mockResourceCreateParams.Flags.Gpu.MCS = 1; } void MockGmmResourceInfo::setUnifiedAuxPitchTiles(uint32_t value) { this->unifiedAuxPitch = value; } void MockGmmResourceInfo::setAuxQPitch(uint32_t value) { this->auxQPitch = value; } uint32_t MockGmmResourceInfo::getTileModeSurfaceState() { if (mockResourceCreateParams.Flags.Info.Linear == 1) { return 0; } if (mockResourceCreateParams.Type == GMM_RESOURCE_TYPE::RESOURCE_2D || mockResourceCreateParams.Type == GMM_RESOURCE_TYPE::RESOURCE_3D) { return 3; } else { return 0; } } MockGmmResourceInfo::MockGmmResourceInfo() {} MockGmmResourceInfo::~MockGmmResourceInfo() {} } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_gmm_resource_info.h000066400000000000000000000074751363734646600276470ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/gmm_helper/resource_info.h" #include "opencl/source/helpers/surface_formats.h" #include "gmock/gmock.h" #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Winconsistent-missing-override" #endif namespace NEO { struct SurfaceFormatInfo; class MockGmmResourceInfo : public GmmResourceInfo { public: ~MockGmmResourceInfo() override; MockGmmResourceInfo(GMM_RESCREATE_PARAMS *resourceCreateParams); MockGmmResourceInfo(GMM_RESOURCE_INFO *inputGmmResourceInfo); size_t getSizeAllocation() override { return size; } size_t getBaseWidth() override { return static_cast(mockResourceCreateParams.BaseWidth); } size_t getBaseHeight() override { return static_cast(mockResourceCreateParams.BaseHeight); } size_t getBaseDepth() override { return static_cast(mockResourceCreateParams.Depth); } size_t getArraySize() override { return static_cast(mockResourceCreateParams.ArraySize); } size_t getRenderPitch() override { return rowPitch; } uint32_t getNumSamples() override { return mockResourceCreateParams.MSAA.NumSamples; } uint32_t getQPitch() override { return qPitch; } uint32_t getBitsPerPixel() override; uint32_t getHAlign() override { return 4u; } uint32_t getHAlignSurfaceState() override { return 1u; } uint32_t getVAlignSurfaceState() override { return 1u; } uint32_t getMaxLod() override { return 7u; } uint32_t getTileModeSurfaceState() override; uint32_t getRenderAuxPitchTiles() override { return unifiedAuxPitch; }; uint32_t getAuxQPitch() override { return auxQPitch; } uint32_t getMipTailStartLodSurfaceState() override { return mipTailStartLod; } GMM_RESOURCE_FORMAT getResourceFormat() override { return mockResourceCreateParams.Format; } GMM_SURFACESTATE_FORMAT getResourceFormatSurfaceState() override { return (GMM_SURFACESTATE_FORMAT)0; } GMM_RESOURCE_TYPE getResourceType() override { return mockResourceCreateParams.Type; } GMM_RESOURCE_FLAG *getResourceFlags() override { return &mockResourceCreateParams.Flags; } GMM_STATUS getOffset(GMM_REQ_OFFSET_INFO &reqOffsetInfo) override; MOCK_METHOD1(cpuBlt, uint8_t(GMM_RES_COPY_BLT *resCopyBlt)); void *getSystemMemPointer() override { return (void *)mockResourceCreateParams.pExistingSysMem; } MOCK_METHOD1(getUnifiedAuxSurfaceOffset, uint64_t(GMM_UNIFIED_AUX_TYPE auxType)); bool is64KBPageSuitable() const override { return is64KBPageSuitableValue; } GMM_RESOURCE_INFO *peekHandle() const override { return mockResourceInfoHandle; } GMM_RESOURCE_INFO *mockResourceInfoHandle = (GMM_RESOURCE_INFO *)this; GMM_RESCREATE_PARAMS mockResourceCreateParams = {}; void overrideReturnedRenderPitch(size_t newPitch) { rowPitch = newPitch; } void overrideReturnedSize(size_t newSize) { size = newSize; } void setUnifiedAuxTranslationCapable(); void setMultisampleControlSurface(); void setUnifiedAuxPitchTiles(uint32_t value); void setAuxQPitch(uint32_t value); void setMipTailStartLod(uint32_t newMipTailStartLod) { mipTailStartLod = newMipTailStartLod; } uint32_t getOffsetCalled = 0u; uint32_t arrayIndexPassedToGetOffset = 0; SurfaceFormatInfo tempSurface{}; bool is64KBPageSuitableValue = true; protected: MockGmmResourceInfo(); void computeRowPitch(); void setupDefaultActions(); void setSurfaceFormat(); const SurfaceFormatInfo *surfaceFormatInfo = nullptr; size_t size = 0; size_t rowPitch = 0; uint32_t qPitch = 0; uint32_t unifiedAuxPitch = 0; uint32_t auxQPitch = 0; uint32_t mipTailStartLod = 0; }; } // namespace NEO #if defined(__clang__) #pragma clang diagnostic pop #endif compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_graphics_allocation.h000066400000000000000000000024021363734646600301330ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" namespace NEO { class MockGraphicsAllocation : public MemoryAllocation { public: using MemoryAllocation::aubInfo; using MemoryAllocation::MemoryAllocation; using MemoryAllocation::objectNotResident; using MemoryAllocation::objectNotUsed; using MemoryAllocation::usageInfos; MockGraphicsAllocation() : MemoryAllocation(0, AllocationType::UNKNOWN, nullptr, 0u, 0, MemoryPool::MemoryNull) {} MockGraphicsAllocation(void *buffer, size_t sizeIn) : MemoryAllocation(0, AllocationType::UNKNOWN, buffer, castToUint64(buffer), 0llu, sizeIn, MemoryPool::MemoryNull) {} MockGraphicsAllocation(void *buffer, uint64_t gpuAddr, size_t sizeIn) : MemoryAllocation(0, AllocationType::UNKNOWN, buffer, gpuAddr, 0llu, sizeIn, MemoryPool::MemoryNull) {} void resetInspectionIds() { for (auto &usageInfo : usageInfos) { usageInfo.inspectionId = 0u; } } void overrideMemoryPool(MemoryPool::Type pool) { this->memoryPool = pool; } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_host_ptr_manager.h000066400000000000000000000010611363734646600274620ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/host_ptr_manager.h" namespace NEO { class MockHostPtrManager : public HostPtrManager { public: using HostPtrManager::checkAllocationsForOverlapping; using HostPtrManager::getAllocationRequirements; using HostPtrManager::getFragmentAndCheckForOverlaps; using HostPtrManager::populateAlreadyAllocatedFragments; size_t getFragmentCount() { return partialAllocations.size(); } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_hw_helper.h000066400000000000000000000012221363734646600261020ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" namespace NEO { template class MockHwHelperWithFenceAllocation : public HwHelperHw { public: bool isFenceAllocationRequired(const HardwareInfo &hwInfo) const override { return true; } }; template class MockHwHelperWithLocalMemory : public HwHelperHw { public: bool isLocalMemoryEnabled(const HardwareInfo &hwInfo) const override { return true; } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_image.h000066400000000000000000000024671363734646600252230ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "memory_properties_flags.h" namespace NEO { struct MockImageBase : public Image { using Image::graphicsAllocation; using Image::imageDesc; MockImageBase() : Image( nullptr, MemoryPropertiesFlags(), cl_mem_flags{}, 0, 0, nullptr, cl_image_format{}, cl_image_desc{}, false, new MockGraphicsAllocation(nullptr, 0), false, 0, 0, ClSurfaceFormatInfo{}, nullptr) { } ~MockImageBase() override { delete this->graphicsAllocation; } MockGraphicsAllocation *getAllocation() { return static_cast(graphicsAllocation); } void setImageArg(void *memory, bool isMediaBlockImage, uint32_t mipLevel) override {} void setMediaImageArg(void *memory) override {} void setMediaSurfaceRotation(void *memory) override {} void setSurfaceMemoryObjectControlStateIndexToMocsTable(void *memory, uint32_t value) override {} void transformImage2dArrayTo3d(void *memory) override {} void transformImage3dTo2dArray(void *memory) override {} }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_internal_allocation_storage.h000066400000000000000000000023651363734646600317030ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/memory_manager/internal_allocation_storage.h" namespace NEO { class MockInternalAllocationStorage : public InternalAllocationStorage { public: using InternalAllocationStorage::InternalAllocationStorage; void cleanAllocationList(uint32_t waitTaskCount, uint32_t allocationUsage) override { cleanAllocationsCalled++; lastCleanAllocationsTaskCount = waitTaskCount; lastCleanAllocationUsage = allocationUsage; InternalAllocationStorage::cleanAllocationList(waitTaskCount, allocationUsage); if (doUpdateCompletion) { *commandStreamReceiver.getTagAddress() = valueToUpdateCompletion; doUpdateCompletion = false; } } void updateCompletionAfterCleaningList(uint32_t newValue) { doUpdateCompletion = true; valueToUpdateCompletion = newValue; } bool doUpdateCompletion = false; uint32_t valueToUpdateCompletion; uint32_t lastCleanAllocationUsage = 0; uint32_t lastCleanAllocationsTaskCount = 0; uint32_t cleanAllocationsCalled = 0; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_kernel.cpp000066400000000000000000000057151363734646600257530ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/source/kernel/kernel.inl" #include "opencl/source/program/printf_handler.h" namespace NEO { MockKernel::BlockPatchValues MockKernel::ReflectionSurfaceHelperPublic::devQueue; MockKernel::BlockPatchValues MockKernel::ReflectionSurfaceHelperPublic::defaultQueue; MockKernel::BlockPatchValues MockKernel::ReflectionSurfaceHelperPublic::eventPool; MockKernel::BlockPatchValues MockKernel::ReflectionSurfaceHelperPublic::printfBuffer; const uint32_t MockDebugKernel::perThreadSystemThreadSurfaceSize = 0x100; template <> void Kernel::ReflectionSurfaceHelper::patchBlocksCurbe(void *reflectionSurface, uint32_t blockID, uint64_t defaultDeviceQueueCurbeOffset, uint32_t patchSizeDefaultQueue, uint64_t defaultDeviceQueueGpuAddress, uint64_t eventPoolCurbeOffset, uint32_t patchSizeEventPool, uint64_t eventPoolGpuAddress, uint64_t deviceQueueCurbeOffset, uint32_t patchSizeDeviceQueue, uint64_t deviceQueueGpuAddress, uint64_t printfBufferOffset, uint32_t patchSizePrintfBuffer, uint64_t printfBufferGpuAddress, uint64_t privateSurfaceOffset, uint32_t privateSurfaceSize, uint64_t privateSurfaceGpuAddress) { MockKernel::ReflectionSurfaceHelperPublic::patchBlocksCurbeMock(reflectionSurface, blockID, defaultDeviceQueueCurbeOffset, patchSizeDefaultQueue, defaultDeviceQueueGpuAddress, eventPoolCurbeOffset, patchSizeEventPool, eventPoolGpuAddress, deviceQueueCurbeOffset, patchSizeDeviceQueue, deviceQueueGpuAddress, printfBufferOffset, patchSizePrintfBuffer, printfBufferGpuAddress); } template void Kernel::patchReflectionSurface(DeviceQueue *, PrintfHandler *); bool MockKernel::isPatched() const { return isPatchedOverride; } bool MockKernel::canTransformImages() const { return canKernelTransformImages; } void MockKernel::makeResident(CommandStreamReceiver &commandStreamReceiver) { makeResidentCalls++; Kernel::makeResident(commandStreamReceiver); } void MockKernel::getResidency(std::vector &dst) { getResidencyCalls++; Kernel::getResidency(dst); } bool MockKernel::requiresCacheFlushCommand(const CommandQueue &commandQueue) const { if (DebugManager.flags.EnableCacheFlushAfterWalker.get() != -1) { return !!DebugManager.flags.EnableCacheFlushAfterWalker.get(); } return false; } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_kernel.h000066400000000000000000000661421363734646600254210ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/device/device.h" #include "shared/source/helpers/string.h" #include "shared/source/kernel/grf_config.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/platform/platform.h" #include "opencl/source/program/block_kernel_manager.h" #include "opencl/source/scheduler/scheduler_kernel.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include namespace NEO { //////////////////////////////////////////////////////////////////////////////// // Kernel - Core implementation //////////////////////////////////////////////////////////////////////////////// class MockKernel : public Kernel { public: using Kernel::addAllocationToCacheFlushVector; using Kernel::allBufferArgsStateful; using Kernel::auxTranslationRequired; using Kernel::containsStatelessWrites; using Kernel::executionType; using Kernel::isSchedulerKernel; using Kernel::kernelArgHandlers; using Kernel::kernelArgRequiresCacheFlush; using Kernel::kernelArguments; using Kernel::kernelSvmGfxAllocations; using Kernel::kernelUnifiedMemoryGfxAllocations; using Kernel::numberOfBindingTableStates; using Kernel::svmAllocationsRequireCacheFlush; using Kernel::threadArbitrationPolicy; using Kernel::unifiedMemoryControls; struct BlockPatchValues { uint64_t offset; uint32_t size; uint64_t address; }; class ReflectionSurfaceHelperPublic : public Kernel::ReflectionSurfaceHelper { public: static BlockPatchValues devQueue; static BlockPatchValues defaultQueue; static BlockPatchValues eventPool; static BlockPatchValues printfBuffer; static const uint64_t undefinedOffset = (uint64_t)-1; static void patchBlocksCurbeMock(void *reflectionSurface, uint32_t blockID, uint64_t defaultDeviceQueueCurbeOffset, uint32_t patchSizeDefaultQueue, uint64_t defaultDeviceQueueGpuAddress, uint64_t eventPoolCurbeOffset, uint32_t patchSizeEventPool, uint64_t eventPoolGpuAddress, uint64_t deviceQueueCurbeOffset, uint32_t patchSizeDeviceQueue, uint64_t deviceQueueGpuAddress, uint64_t printfBufferOffset, uint32_t patchSizePrintfBuffer, uint64_t printfBufferGpuAddress) { defaultQueue.address = defaultDeviceQueueGpuAddress; defaultQueue.offset = defaultDeviceQueueCurbeOffset; defaultQueue.size = patchSizeDefaultQueue; devQueue.address = deviceQueueGpuAddress; devQueue.offset = deviceQueueCurbeOffset; devQueue.size = patchSizeDeviceQueue; eventPool.address = eventPoolGpuAddress; eventPool.offset = eventPoolCurbeOffset; eventPool.size = patchSizeEventPool; printfBuffer.address = printfBufferGpuAddress; printfBuffer.offset = printfBufferOffset; printfBuffer.size = patchSizePrintfBuffer; } static uint32_t getConstantBufferOffset(void *reflectionSurface, uint32_t blockID) { IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface); assert(blockID < pKernelHeader->m_numberOfKernels); IGIL_KernelAddressData *addressData = pKernelHeader->m_data; assert(addressData[blockID].m_ConstantBufferOffset != 0); return addressData[blockID].m_ConstantBufferOffset; } }; MockKernel(Program *programArg, const KernelInfo &kernelInfoArg, const ClDevice &deviceArg, bool scheduler = false) : Kernel(programArg, kernelInfoArg, deviceArg, scheduler) { } ~MockKernel() override { // prevent double deletion if (Kernel::crossThreadData == mockCrossThreadData.data()) { Kernel::crossThreadData = nullptr; } if (kernelInfoAllocated) { delete kernelInfoAllocated->heapInfo.pKernelHeader; delete kernelInfoAllocated->patchInfo.executionEnvironment; delete kernelInfoAllocated->patchInfo.threadPayload; delete kernelInfoAllocated; } } template static KernelType *create(Device &device, Program *program) { return create(device, program, GrfConfig::DefaultGrfNumber); } template static KernelType *create(Device &device, Program *program, uint32_t grfNumber) { auto info = new KernelInfo(); const size_t crossThreadSize = 160; auto pClDevice = device.getSpecializedDevice(); SKernelBinaryHeaderCommon *header = new SKernelBinaryHeaderCommon; header->DynamicStateHeapSize = 0; header->GeneralStateHeapSize = 0; header->KernelHeapSize = 0; header->KernelNameSize = 0; header->PatchListSize = 0; header->SurfaceStateHeapSize = 0; info->heapInfo.pKernelHeader = header; SPatchThreadPayload *threadPayload = new SPatchThreadPayload; threadPayload->LocalIDXPresent = 0; threadPayload->LocalIDYPresent = 0; threadPayload->LocalIDZPresent = 0; threadPayload->HeaderPresent = 0; threadPayload->Size = 128; info->patchInfo.threadPayload = threadPayload; SPatchExecutionEnvironment *executionEnvironment = new SPatchExecutionEnvironment; memset(executionEnvironment, 0, sizeof(SPatchExecutionEnvironment)); executionEnvironment->HasDeviceEnqueue = 0; executionEnvironment->NumGRFRequired = grfNumber; executionEnvironment->CompiledSIMD32 = 1; info->patchInfo.executionEnvironment = executionEnvironment; info->crossThreadData = new char[crossThreadSize]; auto kernel = new KernelType(program, *info, *pClDevice); kernel->crossThreadData = new char[crossThreadSize]; memset(kernel->crossThreadData, 0, crossThreadSize); kernel->crossThreadDataSize = crossThreadSize; kernel->kernelInfoAllocated = info; return kernel; } uint32_t getPatchedArgumentsNum() const { return patchedArgumentsNum; } bool isPatched() const override; bool canTransformImages() const override; //////////////////////////////////////////////////////////////////////////////// void setCrossThreadData(const void *crossThreadDataPattern, uint32_t newCrossThreadDataSize) { if ((Kernel::crossThreadData != nullptr) && (Kernel::crossThreadData != mockCrossThreadData.data())) { delete[] Kernel::crossThreadData; Kernel::crossThreadData = nullptr; Kernel::crossThreadDataSize = 0; } if (crossThreadDataPattern && (newCrossThreadDataSize > 0)) { mockCrossThreadData.clear(); mockCrossThreadData.insert(mockCrossThreadData.begin(), (char *)crossThreadDataPattern, ((char *)crossThreadDataPattern) + newCrossThreadDataSize); } else { mockCrossThreadData.resize(newCrossThreadDataSize, 0); } if (newCrossThreadDataSize == 0) { return; } Kernel::crossThreadData = mockCrossThreadData.data(); Kernel::crossThreadDataSize = static_cast(mockCrossThreadData.size()); } void setSshLocal(const void *sshPattern, uint32_t newSshSize) { sshLocalSize = newSshSize; if (newSshSize == 0) { pSshLocal.reset(nullptr); } else { pSshLocal = std::make_unique(newSshSize); if (sshPattern) { memcpy_s(pSshLocal.get(), newSshSize, sshPattern, newSshSize); } } } void setPrivateSurface(GraphicsAllocation *gfxAllocation, uint32_t size) { privateSurface = gfxAllocation; privateSurfaceSize = size; } GraphicsAllocation *getPrivateSurface() const { return privateSurface; } void setTotalSLMSize(uint32_t size) { slmTotalSize = size; } void setKernelArguments(std::vector kernelArguments) { this->kernelArguments = kernelArguments; } template void patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const PatchTokenT &patch) { Kernel::patchWithImplicitSurface(ptrToPatchInCrossThreadData, allocation, patch); } void *patchBufferOffset(const KernelArgInfo &argInfo, void *svmPtr, GraphicsAllocation *svmAlloc) { return Kernel::patchBufferOffset(argInfo, svmPtr, svmAlloc); } KernelInfo *getAllocatedKernelInfo() { return kernelInfoAllocated; } std::vector mockCrossThreadData; std::vector mockSshLocal; void setUsingSharedArgs(bool usingSharedArgValue) { this->usingSharedObjArgs = usingSharedArgValue; } void makeResident(CommandStreamReceiver &commandStreamReceiver) override; void getResidency(std::vector &dst) override; void takeOwnership() const override { Kernel::takeOwnership(); takeOwnershipCalls++; } void releaseOwnership() const override { releaseOwnershipCalls++; Kernel::releaseOwnership(); } void setSpecialPipelineSelectMode(bool value) { specialPipelineSelectMode = value; } bool requiresCacheFlushCommand(const CommandQueue &commandQueue) const override; uint32_t makeResidentCalls = 0; uint32_t getResidencyCalls = 0; mutable uint32_t takeOwnershipCalls = 0; mutable uint32_t releaseOwnershipCalls = 0; bool canKernelTransformImages = true; bool isPatchedOverride = true; protected: KernelInfo *kernelInfoAllocated = nullptr; }; //class below have enough internals to service Enqueue operation. class MockKernelWithInternals { public: MockKernelWithInternals(const ClDevice &deviceArg, Context *context = nullptr, bool addDefaultArg = false, SPatchExecutionEnvironment newExecutionEnvironment = {}) { memset(&kernelHeader, 0, sizeof(SKernelBinaryHeaderCommon)); memset(&threadPayload, 0, sizeof(SPatchThreadPayload)); memcpy(&executionEnvironment, &newExecutionEnvironment, sizeof(SPatchExecutionEnvironment)); memset(&executionEnvironmentBlock, 0, sizeof(SPatchExecutionEnvironment)); memset(&dataParameterStream, 0, sizeof(SPatchDataParameterStream)); memset(&mediaVfeState, 0, sizeof(SPatchMediaVFEState)); memset(&mediaVfeStateSlot1, 0, sizeof(SPatchMediaVFEState)); executionEnvironment.NumGRFRequired = GrfConfig::DefaultGrfNumber; executionEnvironmentBlock.NumGRFRequired = GrfConfig::DefaultGrfNumber; executionEnvironment.CompiledSIMD32 = 1; kernelHeader.SurfaceStateHeapSize = sizeof(sshLocal); threadPayload.LocalIDXPresent = 1; threadPayload.LocalIDYPresent = 1; threadPayload.LocalIDZPresent = 1; kernelInfo.heapInfo.pKernelHeap = kernelIsa; kernelInfo.heapInfo.pSsh = sshLocal; kernelInfo.heapInfo.pDsh = dshLocal; kernelInfo.heapInfo.pKernelHeader = &kernelHeader; kernelInfo.patchInfo.dataParameterStream = &dataParameterStream; kernelInfo.patchInfo.executionEnvironment = &executionEnvironment; kernelInfo.patchInfo.threadPayload = &threadPayload; kernelInfo.patchInfo.mediavfestate = &mediaVfeState; kernelInfo.patchInfo.mediaVfeStateSlot1 = &mediaVfeStateSlot1; if (context == nullptr) { mockContext = new MockContext; context = mockContext; } else { context->incRefInternal(); mockContext = context; } mockProgram = new MockProgram(*deviceArg.getExecutionEnvironment(), context, false, nullptr); mockKernel = new MockKernel(mockProgram, kernelInfo, deviceArg); mockKernel->setCrossThreadData(&crossThreadData, sizeof(crossThreadData)); mockKernel->setSshLocal(&sshLocal, sizeof(sshLocal)); if (addDefaultArg) { defaultKernelArguments.resize(2); defaultKernelArguments[0] = {}; defaultKernelArguments[1] = {}; kernelInfo.resizeKernelArgInfoAndRegisterParameter(2); kernelInfo.kernelArgInfo.resize(2); kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector.resize(1); kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = 0; kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].size = sizeof(uintptr_t); kernelInfo.kernelArgInfo[0].metadata.addressQualifier = NEO::KernelArgMetadata::AddrGlobal; kernelInfo.kernelArgInfo[0].metadata.accessQualifier = NEO::KernelArgMetadata::AccessReadWrite; kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector.resize(1); kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset = 0; kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].size = sizeof(uintptr_t); kernelInfo.kernelArgInfo[1].metadata.addressQualifier = NEO::KernelArgMetadata::AddrGlobal; kernelInfo.kernelArgInfo[1].metadata.accessQualifier = NEO::KernelArgMetadata::AccessReadWrite; mockKernel->setKernelArguments(defaultKernelArguments); mockKernel->kernelArgRequiresCacheFlush.resize(2); mockKernel->kernelArgHandlers.resize(2); mockKernel->kernelArgHandlers[0] = &Kernel::setArgBuffer; mockKernel->kernelArgHandlers[1] = &Kernel::setArgBuffer; kernelInfo.kernelArgInfo[1].offsetHeap = 64; kernelInfo.kernelArgInfo[0].offsetHeap = 64; } } MockKernelWithInternals(const ClDevice &deviceArg, SPatchExecutionEnvironment newExecutionEnvironment) : MockKernelWithInternals(deviceArg, nullptr, false, newExecutionEnvironment) { mockKernel->initialize(); } ~MockKernelWithInternals() { mockKernel->decRefInternal(); mockProgram->decRefInternal(); mockContext->decRefInternal(); } operator MockKernel *() { return mockKernel; } MockKernel *mockKernel; MockProgram *mockProgram; Context *mockContext; KernelInfo kernelInfo; SKernelBinaryHeaderCommon kernelHeader = {}; SPatchThreadPayload threadPayload = {}; SPatchMediaVFEState mediaVfeState = {}; SPatchMediaVFEState mediaVfeStateSlot1 = {}; SPatchDataParameterStream dataParameterStream = {}; SPatchExecutionEnvironment executionEnvironment = {}; SPatchExecutionEnvironment executionEnvironmentBlock = {}; uint32_t kernelIsa[32]; char crossThreadData[256]; char sshLocal[128]; char dshLocal[128]; std::vector defaultKernelArguments; }; class MockParentKernel : public Kernel { public: using Kernel::auxTranslationRequired; using Kernel::patchBlocksCurbeWithConstantValues; static MockParentKernel *create(Context &context, bool addChildSimdSize = false, bool addChildGlobalMemory = false, bool addChildConstantMemory = false, bool addPrintfForParent = true, bool addPrintfForBlock = true) { Device &device = context.getDevice(0)->getDevice(); auto info = new KernelInfo(); const size_t crossThreadSize = 160; uint32_t crossThreadOffset = 0; uint32_t crossThreadOffsetBlock = 0; SKernelBinaryHeaderCommon *header = new SKernelBinaryHeaderCommon; header->DynamicStateHeapSize = 0; header->GeneralStateHeapSize = 0; header->KernelHeapSize = 0; header->KernelNameSize = 0; header->PatchListSize = 0; header->SurfaceStateHeapSize = 0; info->heapInfo.pKernelHeader = header; SPatchThreadPayload *threadPayload = new SPatchThreadPayload; threadPayload->LocalIDXPresent = 0; threadPayload->LocalIDYPresent = 0; threadPayload->LocalIDZPresent = 0; threadPayload->HeaderPresent = 0; threadPayload->Size = 128; info->patchInfo.threadPayload = threadPayload; SPatchExecutionEnvironment *executionEnvironment = new SPatchExecutionEnvironment; *executionEnvironment = {}; executionEnvironment->HasDeviceEnqueue = 1; executionEnvironment->NumGRFRequired = GrfConfig::DefaultGrfNumber; executionEnvironment->CompiledSIMD32 = 1; info->patchInfo.executionEnvironment = executionEnvironment; SPatchAllocateStatelessDefaultDeviceQueueSurface *allocateDeviceQueue = new SPatchAllocateStatelessDefaultDeviceQueueSurface; allocateDeviceQueue->DataParamOffset = crossThreadOffset; allocateDeviceQueue->DataParamSize = 8; allocateDeviceQueue->SurfaceStateHeapOffset = 0; allocateDeviceQueue->Size = 8; info->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = allocateDeviceQueue; crossThreadOffset += 8; SPatchAllocateStatelessEventPoolSurface *eventPool = new SPatchAllocateStatelessEventPoolSurface; eventPool->DataParamOffset = crossThreadOffset; eventPool->DataParamSize = 8; eventPool->EventPoolSurfaceIndex = 0; eventPool->Size = 8; info->patchInfo.pAllocateStatelessEventPoolSurface = eventPool; crossThreadOffset += 8; if (addPrintfForParent) { SPatchAllocateStatelessPrintfSurface *printfBuffer = new SPatchAllocateStatelessPrintfSurface; printfBuffer->DataParamOffset = crossThreadOffset; printfBuffer->DataParamSize = 8; printfBuffer->PrintfSurfaceIndex = 0; printfBuffer->Size = 8; printfBuffer->SurfaceStateHeapOffset = 0; printfBuffer->Token = 0; info->patchInfo.pAllocateStatelessPrintfSurface = printfBuffer; crossThreadOffset += 8; } MockProgram *mockProgram = new MockProgram(*device.getExecutionEnvironment()); mockProgram->setContext(&context); mockProgram->setDevice(&device); if (addChildSimdSize) { info->childrenKernelsIdOffset.push_back({0, crossThreadOffset}); } UNRECOVERABLE_IF(crossThreadSize < crossThreadOffset + 8); info->crossThreadData = new char[crossThreadSize]; auto clDevice = device.getSpecializedDevice(); DEBUG_BREAK_IF(clDevice == nullptr); auto parent = new MockParentKernel(mockProgram, *info, *clDevice); parent->crossThreadData = new char[crossThreadSize]; memset(parent->crossThreadData, 0, crossThreadSize); parent->crossThreadDataSize = crossThreadSize; parent->mockKernelInfo = info; auto infoBlock = new KernelInfo(); SPatchAllocateStatelessDefaultDeviceQueueSurface *allocateDeviceQueueBlock = new SPatchAllocateStatelessDefaultDeviceQueueSurface; allocateDeviceQueueBlock->DataParamOffset = crossThreadOffsetBlock; allocateDeviceQueueBlock->DataParamSize = 8; allocateDeviceQueueBlock->SurfaceStateHeapOffset = 0; allocateDeviceQueueBlock->Size = 8; infoBlock->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface = allocateDeviceQueueBlock; crossThreadOffsetBlock += 8; SPatchAllocateStatelessEventPoolSurface *eventPoolBlock = new SPatchAllocateStatelessEventPoolSurface; eventPoolBlock->DataParamOffset = crossThreadOffsetBlock; eventPoolBlock->DataParamSize = 8; eventPoolBlock->EventPoolSurfaceIndex = 0; eventPoolBlock->Size = 8; infoBlock->patchInfo.pAllocateStatelessEventPoolSurface = eventPoolBlock; crossThreadOffsetBlock += 8; if (addPrintfForBlock) { SPatchAllocateStatelessPrintfSurface *printfBufferBlock = new SPatchAllocateStatelessPrintfSurface; printfBufferBlock->DataParamOffset = crossThreadOffsetBlock; printfBufferBlock->DataParamSize = 8; printfBufferBlock->PrintfSurfaceIndex = 0; printfBufferBlock->Size = 8; printfBufferBlock->SurfaceStateHeapOffset = 0; printfBufferBlock->Token = 0; infoBlock->patchInfo.pAllocateStatelessPrintfSurface = printfBufferBlock; crossThreadOffsetBlock += 8; } infoBlock->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = nullptr; infoBlock->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = nullptr; if (addChildGlobalMemory) { SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *globalMemoryBlock = new SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization; globalMemoryBlock->DataParamOffset = crossThreadOffsetBlock; globalMemoryBlock->DataParamSize = 8; globalMemoryBlock->Size = 8; globalMemoryBlock->SurfaceStateHeapOffset = 0; globalMemoryBlock->Token = 0; infoBlock->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization = globalMemoryBlock; crossThreadOffsetBlock += 8; } if (addChildConstantMemory) { SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *constantMemoryBlock = new SPatchAllocateStatelessConstantMemorySurfaceWithInitialization; constantMemoryBlock->DataParamOffset = crossThreadOffsetBlock; constantMemoryBlock->DataParamSize = 8; constantMemoryBlock->Size = 8; constantMemoryBlock->SurfaceStateHeapOffset = 0; constantMemoryBlock->Token = 0; infoBlock->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization = constantMemoryBlock; crossThreadOffsetBlock += 8; } SKernelBinaryHeaderCommon *headerBlock = new SKernelBinaryHeaderCommon; headerBlock->DynamicStateHeapSize = 0; headerBlock->GeneralStateHeapSize = 0; headerBlock->KernelHeapSize = 0; headerBlock->KernelNameSize = 0; headerBlock->PatchListSize = 0; headerBlock->SurfaceStateHeapSize = 0; infoBlock->heapInfo.pKernelHeader = headerBlock; SPatchThreadPayload *threadPayloadBlock = new SPatchThreadPayload; threadPayloadBlock->LocalIDXPresent = 0; threadPayloadBlock->LocalIDYPresent = 0; threadPayloadBlock->LocalIDZPresent = 0; threadPayloadBlock->HeaderPresent = 0; threadPayloadBlock->Size = 128; infoBlock->patchInfo.threadPayload = threadPayloadBlock; SPatchExecutionEnvironment *executionEnvironmentBlock = new SPatchExecutionEnvironment; executionEnvironmentBlock->HasDeviceEnqueue = 1; executionEnvironmentBlock->NumGRFRequired = GrfConfig::DefaultGrfNumber; executionEnvironmentBlock->CompiledSIMD32 = 1; infoBlock->patchInfo.executionEnvironment = executionEnvironmentBlock; SPatchDataParameterStream *streamBlock = new SPatchDataParameterStream; streamBlock->DataParameterStreamSize = 0; streamBlock->Size = 0; infoBlock->patchInfo.dataParameterStream = streamBlock; SPatchBindingTableState *bindingTable = new SPatchBindingTableState; bindingTable->Count = 0; bindingTable->Offset = 0; bindingTable->Size = 0; bindingTable->SurfaceStateOffset = 0; infoBlock->patchInfo.bindingTableState = bindingTable; SPatchInterfaceDescriptorData *idData = new SPatchInterfaceDescriptorData; idData->BindingTableOffset = 0; idData->KernelOffset = 0; idData->Offset = 0; idData->SamplerStateOffset = 0; idData->Size = 0; infoBlock->patchInfo.interfaceDescriptorData = idData; infoBlock->heapInfo.pDsh = (void *)new uint64_t[64]; infoBlock->crossThreadData = new char[crossThreadOffsetBlock > crossThreadSize ? crossThreadOffsetBlock : crossThreadSize]; mockProgram->blockKernelManager->addBlockKernelInfo(infoBlock); parent->mockProgram = mockProgram; return parent; } MockParentKernel(Program *programArg, const KernelInfo &kernelInfoArg, const ClDevice &deviceArg) : Kernel(programArg, kernelInfoArg, deviceArg) { } ~MockParentKernel() override { delete kernelInfo.patchInfo.executionEnvironment; delete kernelInfo.patchInfo.pAllocateStatelessDefaultDeviceQueueSurface; delete kernelInfo.patchInfo.pAllocateStatelessEventPoolSurface; delete kernelInfo.patchInfo.pAllocateStatelessPrintfSurface; delete kernelInfo.patchInfo.threadPayload; delete kernelInfo.heapInfo.pKernelHeader; delete &kernelInfo; BlockKernelManager *blockManager = program->getBlockKernelManager(); for (uint32_t i = 0; i < blockManager->getCount(); i++) { const KernelInfo *blockInfo = blockManager->getBlockKernelInfo(i); delete blockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface; delete blockInfo->patchInfo.pAllocateStatelessEventPoolSurface; delete blockInfo->patchInfo.pAllocateStatelessPrintfSurface; delete blockInfo->heapInfo.pKernelHeader; delete blockInfo->patchInfo.threadPayload; delete blockInfo->patchInfo.executionEnvironment; delete blockInfo->patchInfo.dataParameterStream; delete blockInfo->patchInfo.bindingTableState; delete blockInfo->patchInfo.interfaceDescriptorData; delete blockInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization; delete blockInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization; delete[](uint64_t *) blockInfo->heapInfo.pDsh; } if (mockProgram) { mockProgram->decRefInternal(); } } Context *getContext() { return &mockProgram->getContext(); } void setReflectionSurface(GraphicsAllocation *reflectionSurface) { kernelReflectionSurface = reflectionSurface; } MockProgram *mockProgram; KernelInfo *mockKernelInfo = nullptr; }; class MockSchedulerKernel : public SchedulerKernel { public: MockSchedulerKernel(Program *programArg, const KernelInfo &kernelInfoArg, const ClDevice &deviceArg) : SchedulerKernel(programArg, kernelInfoArg, deviceArg){}; }; class MockDebugKernel : public MockKernel { public: MockDebugKernel(Program *program, KernelInfo &kernelInfo, const ClDevice &device) : MockKernel(program, kernelInfo, device) { if (!kernelInfo.patchInfo.pAllocateSystemThreadSurface) { SPatchAllocateSystemThreadSurface *patchToken = new SPatchAllocateSystemThreadSurface; patchToken->BTI = 0; patchToken->Offset = 0; patchToken->PerThreadSystemThreadSurfaceSize = MockDebugKernel::perThreadSystemThreadSurfaceSize; patchToken->Size = sizeof(SPatchAllocateSystemThreadSurface); patchToken->Token = iOpenCL::PATCH_TOKEN_ALLOCATE_SIP_SURFACE; kernelInfo.patchInfo.pAllocateSystemThreadSurface = patchToken; systemThreadSurfaceAllocated = true; } } ~MockDebugKernel() override { if (systemThreadSurfaceAllocated) { delete kernelInfo.patchInfo.pAllocateSystemThreadSurface; } } static const uint32_t perThreadSystemThreadSurfaceSize; bool systemThreadSurfaceAllocated = false; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_lrca_helper.h000066400000000000000000000010141363734646600264040ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/aub_mem_dump/aub_mem_dump.h" struct MockLrcaHelper : AubMemDump::LrcaHelper { mutable uint32_t setContextSaveRestoreFlagsCalled = 0; MockLrcaHelper(uint32_t base) : AubMemDump::LrcaHelper(base) {} void setContextSaveRestoreFlags(uint32_t &value) const override { setContextSaveRestoreFlagsCalled++; AubMemDump::LrcaHelper::setContextSaveRestoreFlags(value); } };compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_mdi.h000066400000000000000000000015321363734646600247020ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/helpers/dispatch_info.h" using namespace NEO; class MockMultiDispatchInfo : public MultiDispatchInfo { public: using MultiDispatchInfo::dispatchInfos; MockMultiDispatchInfo(Kernel *kernel) : MultiDispatchInfo(kernel) { DispatchInfo di(kernel, 1, {100, 1, 1}, {10, 1, 1}, {0, 0, 0}); dispatchInfos.push_back(di); } MockMultiDispatchInfo(std::vector kernels) { for (auto kernel : kernels) { DispatchInfo di(kernel, 1, {100, 1, 1}, {10, 1, 1}, {0, 0, 0}); dispatchInfos.push_back(di); } } MockMultiDispatchInfo(std::vector dis) { for (auto di : dis) { dispatchInfos.push_back(*di); } } }; compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_memory_manager.cpp000066400000000000000000000117361363734646600274750ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/memory_manager/deferred_deleter.h" #include "opencl/source/helpers/surface_formats.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include namespace NEO { void MockMemoryManager::setDeferredDeleter(DeferredDeleter *deleter) { deferredDeleter.reset(deleter); } void MockMemoryManager::overrideAsyncDeleterFlag(bool newValue) { asyncDeleterEnabled = newValue; if (asyncDeleterEnabled && deferredDeleter == nullptr) { deferredDeleter = createDeferredDeleter(); } } void *MockMemoryManager::allocateSystemMemory(size_t size, size_t alignment) { if (failAllocateSystemMemory) { return nullptr; } return OsAgnosticMemoryManager::allocateSystemMemory(redundancyRatio * size, alignment); } GraphicsAllocation *MockMemoryManager::allocateGraphicsMemoryWithProperties(const AllocationProperties &properties) { AllocationProperties adjustedProperties(properties); adjustedProperties.size = redundancyRatio * properties.size; return OsAgnosticMemoryManager::allocateGraphicsMemoryWithProperties(adjustedProperties); } GraphicsAllocation *MockMemoryManager::allocateGraphicsMemoryForImage(const AllocationData &allocationData) { allocateForImageCalled = true; auto *allocation = MemoryManager::allocateGraphicsMemoryForImage(allocationData); if (redundancyRatio != 1) { memset((unsigned char *)allocation->getUnderlyingBuffer(), 0, allocationData.imgInfo->size * redundancyRatio); } return allocation; } GraphicsAllocation *MockMemoryManager::allocateShareableMemory(const AllocationData &allocationData) { allocateForShareableCalled = true; return OsAgnosticMemoryManager::allocateShareableMemory(allocationData); } GraphicsAllocation *MockMemoryManager::allocateGraphicsMemory64kb(const AllocationData &allocationData) { allocation64kbPageCreated = true; preferRenderCompressedFlagPassed = allocationData.flags.preferRenderCompressed; auto allocation = OsAgnosticMemoryManager::allocateGraphicsMemory64kb(allocationData); if (allocation) { allocation->setDefaultGmm(new Gmm(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), allocation->getUnderlyingBuffer(), allocationData.size, false, preferRenderCompressedFlagPassed, true, {})); allocation->getDefaultGmm()->isRenderCompressed = preferRenderCompressedFlagPassed; } return allocation; } GraphicsAllocation *MockMemoryManager::allocateGraphicsMemoryInDevicePool(const AllocationData &allocationData, AllocationStatus &status) { if (failInDevicePool) { status = AllocationStatus::RetryInNonDevicePool; return nullptr; } if (failInDevicePoolWithError) { status = AllocationStatus::Error; return nullptr; } auto allocation = OsAgnosticMemoryManager::allocateGraphicsMemoryInDevicePool(allocationData, status); if (allocation) { allocationInDevicePoolCreated = true; if (localMemorySupported[allocation->getRootDeviceIndex()]) { static_cast(allocation)->overrideMemoryPool(MemoryPool::LocalMemory); } } return allocation; } GraphicsAllocation *MockMemoryManager::allocateGraphicsMemoryWithAlignment(const AllocationData &allocationData) { if (failInAllocateWithSizeAndAlignment) { return nullptr; } allocationCreated = true; return OsAgnosticMemoryManager::allocateGraphicsMemoryWithAlignment(allocationData); } GraphicsAllocation *MockMemoryManager::allocate32BitGraphicsMemory(uint32_t rootDeviceIndex, size_t size, const void *ptr, GraphicsAllocation::AllocationType allocationType) { bool allocateMemory = ptr == nullptr; AllocationData allocationData{}; MockAllocationProperties properties(rootDeviceIndex, allocateMemory, size, allocationType); getAllocationData(allocationData, properties, ptr, createStorageInfoFromProperties(properties)); return allocate32BitGraphicsMemoryImpl(allocationData); } GraphicsAllocation *MockMemoryManager::allocate32BitGraphicsMemoryImpl(const AllocationData &allocationData) { if (failAllocate32Bit) { return nullptr; } return OsAgnosticMemoryManager::allocate32BitGraphicsMemoryImpl(allocationData); } FailMemoryManager::FailMemoryManager(int32_t failedAllocationsCount, ExecutionEnvironment &executionEnvironment) : MockMemoryManager(executionEnvironment) { this->failedAllocationsCount = failedAllocationsCount; } FailMemoryManager::FailMemoryManager(int32_t failedAllocationsCount, ExecutionEnvironment &executionEnvironment, bool enableLocalMemory) : MockMemoryManager(enableLocalMemory, executionEnvironment) { this->failedAllocationsCount = failedAllocationsCount; } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_memory_manager.h000066400000000000000000000264241363734646600271420ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/execution_environment/execution_environment.h" #include "shared/test/unit_test/helpers/default_hw_info.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_host_ptr_manager.h" #include "gmock/gmock.h" namespace NEO { template class MemoryManagerCreate : public T { public: using T::T; template MemoryManagerCreate(bool enable64kbPages, bool enableLocalMemory, U &&... args) : T(std::forward(args)...) { std::fill(this->enable64kbpages.begin(), this->enable64kbpages.end(), enable64kbPages); std::fill(this->localMemorySupported.begin(), this->localMemorySupported.end(), enableLocalMemory); } }; class MockMemoryManager : public MemoryManagerCreate { public: using MemoryManager::allocateGraphicsMemoryForNonSvmHostPtr; using MemoryManager::allocateGraphicsMemoryInPreferredPool; using MemoryManager::allocateGraphicsMemoryWithAlignment; using MemoryManager::allocateGraphicsMemoryWithProperties; using MemoryManager::AllocationData; using MemoryManager::createGraphicsAllocation; using MemoryManager::createStorageInfoFromProperties; using MemoryManager::getAllocationData; using MemoryManager::gfxPartitions; using MemoryManager::localMemoryUsageBankSelector; using MemoryManager::multiContextResourceDestructor; using MemoryManager::overrideAllocationData; using MemoryManager::pageFaultManager; using MemoryManager::registeredEngines; using MemoryManager::supportsMultiStorageResources; using MemoryManager::useInternal32BitAllocator; using MemoryManager::useNonSvmHostPtrAlloc; using OsAgnosticMemoryManager::allocateGraphicsMemoryForImageFromHostPtr; using MemoryManagerCreate::MemoryManagerCreate; using MemoryManager::isCopyRequired; using MemoryManager::reservedMemory; MockMemoryManager(ExecutionEnvironment &executionEnvironment) : MockMemoryManager(false, executionEnvironment) {} MockMemoryManager(bool enableLocalMemory, ExecutionEnvironment &executionEnvironment) : MemoryManagerCreate(false, enableLocalMemory, executionEnvironment) { hostPtrManager.reset(new MockHostPtrManager); }; MockMemoryManager() : MockMemoryManager(*(new MockExecutionEnvironment(defaultHwInfo.get()))) { mockExecutionEnvironment.reset(static_cast(&executionEnvironment)); }; MockMemoryManager(bool enable64pages, bool enableLocalMemory) : MemoryManagerCreate(enable64pages, enableLocalMemory, *(new MockExecutionEnvironment(defaultHwInfo.get()))) { mockExecutionEnvironment.reset(static_cast(&executionEnvironment)); } GraphicsAllocation *allocateGraphicsMemory64kb(const AllocationData &allocationData) override; void setDeferredDeleter(DeferredDeleter *deleter); void overrideAsyncDeleterFlag(bool newValue); GraphicsAllocation *allocateGraphicsMemoryForImage(const AllocationData &allocationData) override; GraphicsAllocation *allocateShareableMemory(const AllocationData &allocationData) override; int redundancyRatio = 1; GraphicsAllocation *allocateGraphicsMemoryInDevicePool(const AllocationData &allocationData, AllocationStatus &status) override; GraphicsAllocation *allocateGraphicsMemoryWithAlignment(const AllocationData &allocationData) override; GraphicsAllocation *allocateGraphicsMemoryWithProperties(const AllocationProperties &properties) override; void *allocateSystemMemory(size_t size, size_t alignment) override; void freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation) override { freeGraphicsMemoryCalled++; OsAgnosticMemoryManager::freeGraphicsMemoryImpl(gfxAllocation); }; void *lockResourceImpl(GraphicsAllocation &gfxAllocation) override { lockResourceCalled++; return OsAgnosticMemoryManager::lockResourceImpl(gfxAllocation); } void unlockResourceImpl(GraphicsAllocation &gfxAllocation) override { unlockResourceCalled++; OsAgnosticMemoryManager::unlockResourceImpl(gfxAllocation); } void handleFenceCompletion(GraphicsAllocation *graphicsAllocation) override { handleFenceCompletionCalled++; OsAgnosticMemoryManager::handleFenceCompletion(graphicsAllocation); } void *reserveCpuAddressRange(size_t size, uint32_t rootDeviceIndex) override { if (failReserveAddress) { return nullptr; } return OsAgnosticMemoryManager::reserveCpuAddressRange(size, rootDeviceIndex); } bool isCpuCopyRequired(const void *ptr) override { return cpuCopyRequired; } GraphicsAllocation *allocate32BitGraphicsMemory(uint32_t rootDeviceIndex, size_t size, const void *ptr, GraphicsAllocation::AllocationType allocationType); GraphicsAllocation *allocate32BitGraphicsMemoryImpl(const AllocationData &allocationData) override; void forceLimitedRangeAllocator(uint32_t rootDeviceIndex, uint64_t range) { getGfxPartition(rootDeviceIndex)->init(range, 0, 0, gfxPartitions.size()); } uint32_t freeGraphicsMemoryCalled = 0u; uint32_t unlockResourceCalled = 0u; uint32_t lockResourceCalled = 0u; uint32_t handleFenceCompletionCalled = 0u; bool allocationCreated = false; bool allocation64kbPageCreated = false; bool allocationInDevicePoolCreated = false; bool failInDevicePool = false; bool failInDevicePoolWithError = false; bool failInAllocateWithSizeAndAlignment = false; bool preferRenderCompressedFlagPassed = false; bool allocateForImageCalled = false; bool allocateForShareableCalled = false; bool failReserveAddress = false; bool failAllocateSystemMemory = false; bool failAllocate32Bit = false; bool cpuCopyRequired = false; std::unique_ptr mockExecutionEnvironment; }; using AllocationData = MockMemoryManager::AllocationData; class GMockMemoryManager : public MockMemoryManager { public: GMockMemoryManager(const ExecutionEnvironment &executionEnvironment) : MockMemoryManager(const_cast(executionEnvironment)){}; MOCK_METHOD2(populateOsHandles, MemoryManager::AllocationStatus(OsHandleStorage &handleStorage, uint32_t rootDeviceIndex)); MOCK_METHOD1(allocateGraphicsMemoryForNonSvmHostPtr, GraphicsAllocation *(const AllocationData &)); MemoryManager::AllocationStatus MemoryManagerPopulateOsHandles(OsHandleStorage &handleStorage, uint32_t rootDeviceIndex) { return OsAgnosticMemoryManager::populateOsHandles(handleStorage, rootDeviceIndex); } }; class MockAllocSysMemAgnosticMemoryManager : public OsAgnosticMemoryManager { public: MockAllocSysMemAgnosticMemoryManager(ExecutionEnvironment &executionEnvironment) : OsAgnosticMemoryManager(executionEnvironment) { ptrRestrictions = nullptr; testRestrictions.minAddress = 0; } AlignedMallocRestrictions *getAlignedMallocRestrictions() override { return ptrRestrictions; } void *allocateSystemMemory(size_t size, size_t alignment) override { constexpr size_t minAlignment = 16; alignment = std::max(alignment, minAlignment); return alignedMalloc(size, alignment); } AlignedMallocRestrictions testRestrictions; AlignedMallocRestrictions *ptrRestrictions; }; class FailMemoryManager : public MockMemoryManager { public: using MemoryManager::allocateGraphicsMemoryWithProperties; using MockMemoryManager::MockMemoryManager; FailMemoryManager(int32_t failedAllocationsCount, ExecutionEnvironment &executionEnvironment); FailMemoryManager(int32_t failedAllocationsCount, ExecutionEnvironment &executionEnvironment, bool localMemory); GraphicsAllocation *allocateGraphicsMemoryWithAlignment(const AllocationData &allocationData) override { if (failedAllocationsCount <= 0) { return nullptr; } failedAllocationsCount--; return OsAgnosticMemoryManager::allocateGraphicsMemoryWithAlignment(allocationData); } GraphicsAllocation *allocateGraphicsMemoryForNonSvmHostPtr(const AllocationData &allocationData) override { return nullptr; } GraphicsAllocation *allocateGraphicsMemory64kb(const AllocationData &allocationData) override { return nullptr; } GraphicsAllocation *allocateGraphicsMemoryWithProperties(const AllocationProperties &properties, const void *ptr) override { return nullptr; } GraphicsAllocation *allocate32BitGraphicsMemoryImpl(const AllocationData &allocationData) override { return nullptr; } GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness) override { return nullptr; } GraphicsAllocation *createGraphicsAllocationFromNTHandle(void *handle, uint32_t rootDeviceIndex) override { return nullptr; } void *lockResourceImpl(GraphicsAllocation &gfxAllocation) override { return nullptr; }; void unlockResourceImpl(GraphicsAllocation &gfxAllocation) override{}; MemoryManager::AllocationStatus populateOsHandles(OsHandleStorage &handleStorage, uint32_t rootDeviceIndex) override { return AllocationStatus::Error; }; void cleanOsHandles(OsHandleStorage &handleStorage, uint32_t rootDeviceIndex) override{}; uint64_t getSystemSharedMemory(uint32_t rootDeviceIndex) override { return 0; }; GraphicsAllocation *createGraphicsAllocation(OsHandleStorage &handleStorage, const AllocationData &allocationData) override { return nullptr; }; GraphicsAllocation *allocateGraphicsMemoryForImage(const AllocationData &allocationData) override { return nullptr; } GraphicsAllocation *allocateShareableMemory(const AllocationData &allocationData) override { return nullptr; } int32_t failedAllocationsCount = 0; }; class GMockMemoryManagerFailFirstAllocation : public MockMemoryManager { public: GMockMemoryManagerFailFirstAllocation(bool enableLocalMemory, const ExecutionEnvironment &executionEnvironment) : MockMemoryManager(enableLocalMemory, const_cast(executionEnvironment)){}; GMockMemoryManagerFailFirstAllocation(const ExecutionEnvironment &executionEnvironment) : GMockMemoryManagerFailFirstAllocation(false, executionEnvironment){}; MOCK_METHOD2(allocateGraphicsMemoryInDevicePool, GraphicsAllocation *(const AllocationData &, AllocationStatus &)); GraphicsAllocation *baseAllocateGraphicsMemoryInDevicePool(const AllocationData &allocationData, AllocationStatus &status) { return OsAgnosticMemoryManager::allocateGraphicsMemoryInDevicePool(allocationData, status); } GraphicsAllocation *allocateNonSystemGraphicsMemoryInDevicePool(const AllocationData &allocationData, AllocationStatus &status) { auto allocation = baseAllocateGraphicsMemoryInDevicePool(allocationData, status); if (!allocation) { allocation = allocateGraphicsMemory(allocationData); } static_cast(allocation)->overrideMemoryPool(MemoryPool::SystemCpuInaccessible); return allocation; } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_memory_operations_handler.h000066400000000000000000000013621363734646600314020ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/memory_operations_handler.h" namespace NEO { class GraphicsAllocation; class MockMemoryOperationsHandler : public MemoryOperationsHandler { public: MockMemoryOperationsHandler() {} MemoryOperationsStatus makeResident(ArrayRef gfxAllocations) override { return MemoryOperationsStatus::UNSUPPORTED; } MemoryOperationsStatus evict(GraphicsAllocation &gfxAllocation) override { return MemoryOperationsStatus::UNSUPPORTED; } MemoryOperationsStatus isResident(GraphicsAllocation &gfxAllocation) override { return MemoryOperationsStatus::UNSUPPORTED; } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_os_context.h000066400000000000000000000011431363734646600263140ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/os_context.h" namespace NEO { class MockOsContext : public OsContext { public: MockOsContext(uint32_t contextId, DeviceBitfield deviceBitfield, aub_stream::EngineType engineType, PreemptionMode preemptionMode, bool lowPriority, bool internalEngine, bool rootDevice) : OsContext(contextId, deviceBitfield, engineType, preemptionMode, lowPriority, internalEngine, rootDevice) {} }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_ostime.h000066400000000000000000000017611363734646600254350ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/os_time.h" namespace NEO { class MockOSTime : public OSTime { public: bool getCpuGpuTime(TimeStampData *pGpuCpuTime) override { static int PerfTicks = 0; pGpuCpuTime->GPUTimeStamp = ++PerfTicks; pGpuCpuTime->CPUTimeinNS = PerfTicks; return true; } bool getCpuTime(uint64_t *timeStamp) override { static int PerfTicks = 0; *timeStamp = ++PerfTicks; return true; }; double getHostTimerResolution() const override { return 0; } double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override { return OSTime::getDeviceTimerResolution(hwInfo); } uint64_t getCpuRawTimestamp() override { return 0; } static std::unique_ptr create() { return std::unique_ptr(new MockOSTime()); } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_ostime_win.h000066400000000000000000000007401363734646600263060ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/windows/os_time_win.h" namespace NEO { class MockOSTimeWin : public OSTimeWin { public: MockOSTimeWin(Wddm *inWddm) { wddm = inWddm; } double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override { return OSTimeWin::getDynamicDeviceTimerResolution(hwInfo); }; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_physical_address_allocator.h000066400000000000000000000007011363734646600315070ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/memory_manager/physical_address_allocator.h" using namespace NEO; class MockPhysicalAddressAllocator : public PhysicalAddressAllocator { public: using PhysicalAddressAllocator::initialPageAddress; using PhysicalAddressAllocator::mainAllocator; using PhysicalAddressAllocator::PhysicalAddressAllocator; }; compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_pipe.h000066400000000000000000000016621363734646600250720ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/aligned_memory.h" #include "opencl/source/mem_obj/pipe.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" using namespace NEO; class MockPipeStorage { public: MockPipeStorage() { mockGfxAllocation = new MockGraphicsAllocation(data, sizeof(data) / 2); } MockPipeStorage(bool unaligned) { mockGfxAllocation = new MockGraphicsAllocation(alignUp(&data, 4), sizeof(data) / 2); } char data[256]{}; MockGraphicsAllocation *mockGfxAllocation = nullptr; }; class MockPipe : public MockPipeStorage, public Pipe { public: MockPipe(Context *context) : MockPipeStorage(), Pipe(context, 0, 1, 128, nullptr, &data, mockGfxAllocation) { } ~MockPipe() override { if (!getContext()) { delete mockGfxAllocation; } } }; compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_platform.cpp000066400000000000000000000020071363734646600263060ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_platform.h" #include "shared/source/device/device.h" #include "shared/source/os_interface/device_factory.h" namespace NEO { bool initPlatform() { auto pPlatform = platform(); return pPlatform->initialize(DeviceFactory::createDevices(*pPlatform->peekExecutionEnvironment())); } bool MockPlatform::initializeWithNewDevices() { executionEnvironment.prepareRootDeviceEnvironments(1u); return Platform::initialize(DeviceFactory::createDevices(executionEnvironment)); } Platform *platform() { if (platformsImpl.empty()) { return nullptr; } return platformsImpl[0].get(); } Platform *constructPlatform() { static std::mutex mutex; std::unique_lock lock(mutex); if (platformsImpl.empty()) { platformsImpl.push_back(std::make_unique(*(new ExecutionEnvironment()))); } return platformsImpl[0].get(); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_platform.h000066400000000000000000000012431363734646600257540ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/execution_environment/execution_environment.h" #include "opencl/source/platform/platform.h" namespace NEO { class MockPlatform : public Platform { public: using Platform::fillGlobalDispatchTable; using Platform::initializationLoopHelper; MockPlatform() : MockPlatform(*(new ExecutionEnvironment())) {} MockPlatform(ExecutionEnvironment &executionEnvironment) : Platform(executionEnvironment) {} bool initializeWithNewDevices(); }; Platform *platform(); Platform *constructPlatform(); bool initPlatform(); } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_program.cpp000066400000000000000000000114461363734646600261400ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_program.h" #include "shared/source/compiler_interface/compiler_cache.h" #include "shared/source/device_binary_format/patchtokens_decoder.h" #include "shared/source/helpers/hash.h" #include "shared/source/program/program_info_from_patchtokens.h" #include "shared/test/unit_test/helpers/default_hw_info.h" #include "opencl/source/context/context.h" #include "opencl/source/program/create.inl" #include "opencl/source/program/kernel_info.h" #include "opencl/test/unit_test/helpers/ult_limits.h" #include "opencl/test/unit_test/mocks/mock_compilers.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" namespace NEO { GlobalMockSipProgram *GlobalMockSipProgram::sipProgram; ExecutionEnvironment GlobalMockSipProgram::executionEnvironment; Device *MockProgram::getDevicePtr() { return this->pDevice; } std::string MockProgram::getCachedFileName() const { auto hwInfo = this->context->getDevice(0)->getHardwareInfo(); auto input = ArrayRef(this->sourceCode.c_str(), this->sourceCode.size()); auto opts = ArrayRef(this->options.c_str(), this->options.size()); auto internalOpts = ArrayRef(this->internalOptions.c_str(), this->internalOptions.size()); return CompilerCache::getCachedFileName(hwInfo, input, opts, internalOpts); } cl_int GlobalMockSipProgram::processGenBinary() { return CL_SUCCESS; } cl_int GlobalMockSipProgram::processGenBinaryOnce() { cl_int ret = Program::processGenBinary(); sipAllocationStorage = alignedMalloc(this->kernelInfoArray[0]->heapInfo.pKernelHeader->KernelHeapSize, MemoryConstants::pageSize); this->kernelInfoArray[0]->kernelAllocation = new MockGraphicsAllocation(sipAllocationStorage, this->kernelInfoArray[0]->heapInfo.pKernelHeader->KernelHeapSize); return ret; } void GlobalMockSipProgram::resetAllocationState() { auto allocation = static_cast(this->kernelInfoArray[0]->kernelAllocation); for (uint32_t index = 0u; index < allocation->usageInfos.size(); index++) { this->kernelInfoArray[0]->kernelAllocation->releaseResidencyInOsContext(index); } allocation->resetInspectionIds(); } void GlobalMockSipProgram::initSipProgram() { cl_int retVal = 0; std::vector binary = MockCompilerInterface::getDummyGenBinary(); executionEnvironment.prepareRootDeviceEnvironments(maxRootDeviceCount); for (auto i = 0u; i < executionEnvironment.rootDeviceEnvironments.size(); i++) { executionEnvironment.rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } executionEnvironment.calculateMaxOsContextCount(); sipProgram = Program::createFromGenBinary(executionEnvironment, nullptr, binary.data(), binary.size(), true, &retVal, nullptr); DEBUG_BREAK_IF(retVal != 0); sipProgram->processGenBinaryOnce(); } void GlobalMockSipProgram::resetAllocation(GraphicsAllocation *allocation) { this->kernelInfoArray[0]->kernelAllocation = allocation; } GraphicsAllocation *GlobalMockSipProgram::getAllocation() { return this->kernelInfoArray[0]->kernelAllocation; } void GlobalMockSipProgram::deleteAllocation() { delete this->kernelInfoArray[0]->kernelAllocation; alignedFree(sipAllocationStorage); this->kernelInfoArray[0]->kernelAllocation = nullptr; } void GlobalMockSipProgram::shutDownSipProgram() { sipProgram->deleteAllocation(); delete sipProgram; } Program *GlobalMockSipProgram::getSipProgramWithCustomBinary() { NEO::PatchTokenBinary::ProgramFromPatchtokens programTokens; programTokens.kernels.resize(1); const uint8_t isa[] = "kernel morphEUs()"; const char name[] = "sip"; SProgramBinaryHeader progHeader = {}; progHeader.NumberOfKernels = 1; SKernelBinaryHeaderCommon kernHeader = {}; kernHeader.KernelNameSize = sizeof(name); kernHeader.KernelHeapSize = sizeof(isa); kernHeader.KernelUnpaddedSize = sizeof(isa); programTokens.header = &progHeader; programTokens.kernels[0].header = &kernHeader; programTokens.kernels[0].isa = isa; programTokens.kernels[0].name = name; NEO::ProgramInfo programInfo; NEO::populateProgramInfo(programInfo, programTokens); Program *ret = new Program(executionEnvironment, nullptr, false, nullptr); ret->processProgramInfo(programInfo); return ret; } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_program.h000066400000000000000000000132761363734646600256100ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/device/device.h" #include "shared/source/helpers/hash.h" #include "shared/source/helpers/string.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/program/kernel_info.h" #include "opencl/source/program/program.h" #include "gmock/gmock.h" #include namespace NEO { class GraphicsAllocation; //////////////////////////////////////////////////////////////////////////////// // Program - Core implementation //////////////////////////////////////////////////////////////////////////////// class MockProgram : public Program { public: using Program::createProgramFromBinary; using Program::internalOptionsToExtract; using Program::isKernelDebugEnabled; using Program::linkBinary; using Program::separateBlockKernels; using Program::updateNonUniformFlag; using Program::applyAdditionalOptions; using Program::areSpecializationConstantsInitialized; using Program::blockKernelManager; using Program::constantSurface; using Program::context; using Program::createdFrom; using Program::debugData; using Program::debugDataSize; using Program::exportedFunctionsSurface; using Program::extractInternalOptions; using Program::getKernelInfo; using Program::globalSurface; using Program::irBinary; using Program::irBinarySize; using Program::isSpirV; using Program::linkerInput; using Program::options; using Program::packDeviceBinary; using Program::packedDeviceBinary; using Program::packedDeviceBinarySize; using Program::pDevice; using Program::programBinaryType; using Program::sourceCode; using Program::specConstantsIds; using Program::specConstantsSizes; using Program::specConstantsValues; using Program::symbols; using Program::unpackedDeviceBinary; using Program::unpackedDeviceBinarySize; template MockProgram(T &&... args) : Program(std::forward(args)...) { } ~MockProgram() override { if (contextSet) context = nullptr; } KernelInfo mockKernelInfo; void setBuildOptions(const char *buildOptions) { options = buildOptions != nullptr ? buildOptions : ""; } std::string &getInternalOptions() { return internalOptions; }; void setConstantSurface(GraphicsAllocation *gfxAllocation) { constantSurface = gfxAllocation; } void setGlobalSurface(GraphicsAllocation *gfxAllocation) { globalSurface = gfxAllocation; } void setDevice(Device *device) { this->pDevice = device; }; std::vector &getKernelInfoArray() { return kernelInfoArray; } void addKernelInfo(KernelInfo *inInfo) { kernelInfoArray.push_back(inInfo); } std::vector &getParentKernelInfoArray() { return parentKernelInfoArray; } std::vector &getSubgroupKernelInfoArray() { return subgroupKernelInfoArray; } void setContext(Context *context) { this->context = context; contextSet = true; } void SetBuildStatus(cl_build_status st) { buildStatus = st; } void SetSourceCode(const char *ptr) { sourceCode = ptr; } void ClearOptions() { options = ""; } void SetCreatedFromBinary(bool createdFromBin) { isCreatedFromBinary = createdFromBin; } void ClearLog() { buildLog.clear(); } void SetGlobalVariableTotalSize(size_t globalVarSize) { globalVarTotalSize = globalVarSize; } void SetDevice(Device *pDev) { pDevice = pDev; } void SetIrBinary(char *ptr, bool isSpirv) { irBinary.reset(ptr); this->isSpirV = isSpirV; } void SetIrBinarySize(size_t bsz, bool isSpirv) { irBinarySize = bsz; this->isSpirV = isSpirV; } std::string getCachedFileName() const; void setAllowNonUniform(bool allow) { allowNonUniform = allow; } Device *getDevicePtr(); bool isFlagOption(ConstStringRef option) override { if (isFlagOptionOverride != -1) { return (isFlagOptionOverride > 0); } return Program::isFlagOption(option); } bool isOptionValueValid(ConstStringRef option, ConstStringRef value) override { if (isOptionValueValidOverride != -1) { return (isOptionValueValidOverride > 0); } return Program::isOptionValueValid(option, value); } cl_int rebuildProgramFromIr() { this->isCreatedFromBinary = false; this->buildStatus = CL_BUILD_NONE; std::unordered_map builtins; auto &device = this->getDevice(); return this->build(&device, this->options.c_str(), false, builtins); } bool contextSet = false; int isFlagOptionOverride = -1; int isOptionValueValidOverride = -1; }; class GlobalMockSipProgram : public Program { public: using Program::Program; GlobalMockSipProgram(ExecutionEnvironment &executionEnvironment) : Program(executionEnvironment) { } cl_int processGenBinary() override; cl_int processGenBinaryOnce(); void resetAllocationState(); void resetAllocation(GraphicsAllocation *allocation); void deleteAllocation(); GraphicsAllocation *getAllocation(); static void initSipProgram(); static void shutDownSipProgram(); static GlobalMockSipProgram *sipProgram; static Program *getSipProgramWithCustomBinary(); protected: void *sipAllocationStorage; static ExecutionEnvironment executionEnvironment; }; class GMockProgram : public Program { public: using Program::Program; MOCK_METHOD0(appendKernelDebugOptions, bool(void)); }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_sampler.h000066400000000000000000000020671363734646600256000ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sampler/sampler.h" namespace NEO { struct MockSampler : public Sampler { public: MockSampler(Context *context, cl_bool normalizedCoordinates, cl_addressing_mode addressingMode, cl_filter_mode filterMode, cl_filter_mode mipFilterMode = CL_FILTER_NEAREST, float lodMin = 0.0f, float lodMax = 0.0f) : Sampler(context, normalizedCoordinates, addressingMode, filterMode, mipFilterMode, lodMin, lodMax) { } cl_context getContext() const { return context; } cl_bool getNormalizedCoordinates() const { return normalizedCoordinates; } cl_addressing_mode getAddressingMode() const { return addressingMode; } cl_filter_mode getFilterMode() const { return filterMode; } void setArg(void *memory) override { } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_sharing_factory.h000066400000000000000000000005101363734646600273060ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/sharing_factory.h" class SharingFactoryMock : public NEO::SharingFactory { public: using NEO::SharingFactory::sharings; SharingFactoryMock() = default; ~SharingFactoryMock() = default; }; compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_sip.cpp000066400000000000000000000054111363734646600252570ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_sip.h" #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/hw_info.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/source/os_interface/os_inc_base.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "opencl/test/unit_test/mocks/mock_compilers.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "cif/macros/enable.h" #include "ocl_igc_interface/igc_ocl_device_ctx.h" #include #include namespace NEO { MockSipKernel::MockSipKernel(SipKernelType type, Program *sipProgram) : SipKernel(type, sipProgram) { this->mockSipMemoryAllocation = std::make_unique(0u, GraphicsAllocation::AllocationType::KERNEL_ISA, nullptr, MemoryConstants::pageSize * 10u, 0u, MemoryConstants::pageSize, MemoryPool::System4KBPages); } MockSipKernel::MockSipKernel() : SipKernel(SipKernelType::Csr, nullptr) { this->mockSipMemoryAllocation = std::make_unique(0u, GraphicsAllocation::AllocationType::KERNEL_ISA, nullptr, MemoryConstants::pageSize * 10u, 0u, MemoryConstants::pageSize, MemoryPool::System4KBPages); this->program = new MockProgram(this->executionEnvironment, nullptr, false, nullptr); } MockSipKernel::~MockSipKernel() = default; std::vector MockSipKernel::dummyBinaryForSip; std::vector MockSipKernel::getDummyGenBinary() { if (dummyBinaryForSip.empty()) { dummyBinaryForSip = getBinary(); } return dummyBinaryForSip; } std::vector MockSipKernel::getBinary() { std::string testFile; retrieveBinaryKernelFilename(testFile, "CopyBuffer_simd16_", ".gen"); size_t binarySize = 0; auto binary = loadDataFromFile(testFile.c_str(), binarySize); UNRECOVERABLE_IF(binary == nullptr); std::vector ret{binary.get(), binary.get() + binarySize}; return ret; } void MockSipKernel::initDummyBinary() { dummyBinaryForSip = getBinary(); } void MockSipKernel::shutDown() { MockSipKernel::dummyBinaryForSip.clear(); } GraphicsAllocation *MockSipKernel::getSipAllocation() const { return mockSipMemoryAllocation.get(); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_sip.h000066400000000000000000000016241363734646600247260ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/sip.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include #include namespace NEO { class MemoryAllocation; class MockSipKernel : public SipKernel { public: using SipKernel::program; using SipKernel::type; MockSipKernel(SipKernelType type, Program *sipProgram); MockSipKernel(); ~MockSipKernel() override; static std::vector dummyBinaryForSip; static std::vector getDummyGenBinary(); static std::vector getBinary(); static void initDummyBinary(); static void shutDown(); GraphicsAllocation *getSipAllocation() const override; std::unique_ptr mockSipMemoryAllocation; MockExecutionEnvironment executionEnvironment; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_source_level_debugger.h000066400000000000000000000057751363734646600305010ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/source_level_debugger/source_level_debugger.h" #include "gmock/gmock.h" #include #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Winconsistent-missing-override" #endif using namespace NEO; class MockSourceLevelDebugger : public SourceLevelDebugger { public: using SourceLevelDebugger::debuggerLibrary; using SourceLevelDebugger::deviceHandle; MockSourceLevelDebugger() : SourceLevelDebugger(SourceLevelDebugger::loadDebugger()) { this->deviceHandle = mockDeviceHandle; } MockSourceLevelDebugger(OsLibrary *library) : SourceLevelDebugger(library) { this->deviceHandle = mockDeviceHandle; } void setActive(bool active) { isActive = active; } static const uint32_t mockDeviceHandle = 23; }; class MockActiveSourceLevelDebugger : public SourceLevelDebugger { public: using SourceLevelDebugger::debuggerLibrary; using SourceLevelDebugger::deviceHandle; MockActiveSourceLevelDebugger() : SourceLevelDebugger(nullptr) { this->deviceHandle = mockDeviceHandle; isActive = true; } MockActiveSourceLevelDebugger(OsLibrary *library) : SourceLevelDebugger(library) { this->deviceHandle = mockDeviceHandle; isActive = true; } void setActive(bool active) { isActive = active; } bool isOptimizationDisabled() const override { return isOptDisabled; } bool isDebuggerActive() override { return isActive; } bool notifyNewDevice(uint32_t deviceHandle) override { return false; } bool notifyDeviceDestruction() override { return true; } bool notifySourceCode(const char *sourceCode, size_t size, std::string &filename) const override { filename = sourceCodeFilename; return true; } bool notifyKernelDebugData(const DebugData *debugData, const std::string &name, const void *isa, size_t isaSize) const override { return false; } bool initialize(bool useLocalMemory) override { return false; } static const uint32_t mockDeviceHandle = 23; bool isOptDisabled = false; std::string sourceCodeFilename; }; class GMockSourceLevelDebugger : public SourceLevelDebugger { public: GMockSourceLevelDebugger(OsLibrary *library) : SourceLevelDebugger(library) { } void setActive(bool active) { isActive = active; } bool isDebuggerActive() override { return isActive; } MOCK_METHOD0(notifyDeviceDestruction, bool(void)); MOCK_CONST_METHOD4(notifyKernelDebugData, bool(const DebugData *debugData, const std::string &name, const void *isa, size_t isaSize)); MOCK_CONST_METHOD0(isOptimizationDisabled, bool()); MOCK_METHOD1(notifyNewDevice, bool(uint32_t)); MOCK_CONST_METHOD3(notifySourceCode, bool(const char *, size_t, std::string &)); MOCK_METHOD1(initialize, bool(bool)); }; compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_submissions_aggregator.h000066400000000000000000000006621363734646600307140ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/submissions_aggregator.h" namespace NEO { struct mockSubmissionsAggregator : public SubmissionAggregator { CommandBufferList &peekCommandBuffers() { return this->cmdBuffers; } uint32_t peekInspectionId() { return this->inspectionId; } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_svm_manager.h000066400000000000000000000006561363734646600264360ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/unified_memory_manager.h" namespace NEO { struct MockSVMAllocsManager : SVMAllocsManager { using SVMAllocsManager::memoryManager; using SVMAllocsManager::SVMAllocs; using SVMAllocsManager::SVMAllocsManager; using SVMAllocsManager::svmMapOperations; }; } // namespace NEOcompute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_tbx_csr.h000066400000000000000000000051671363734646600256050ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/preemption.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/hw_info.h" #include "opencl/source/aub/aub_center.h" #include "opencl/source/command_stream/tbx_command_stream_receiver_hw.h" #include "gmock/gmock.h" #include namespace NEO { template class MockTbxCsr : public TbxCommandStreamReceiverHw { public: using TbxCommandStreamReceiverHw::writeMemory; using TbxCommandStreamReceiverHw::allocationsForDownload; MockTbxCsr(ExecutionEnvironment &executionEnvironment) : TbxCommandStreamReceiverHw(executionEnvironment, 0) {} void initializeEngine() { TbxCommandStreamReceiverHw::initializeEngine(); initializeEngineCalled = true; } void writeMemoryWithAubManager(GraphicsAllocation &graphicsAllocation) override { CommandStreamReceiverSimulatedHw::writeMemoryWithAubManager(graphicsAllocation); writeMemoryWithAubManagerCalled = true; } void writeMemory(uint64_t gpuAddress, void *cpuAddress, size_t size, uint32_t memoryBank, uint64_t entryBits) override { TbxCommandStreamReceiverHw::writeMemory(gpuAddress, cpuAddress, size, memoryBank, entryBits); writeMemoryCalled = true; } void submitBatchBuffer(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits, bool overrideRingHead) override { TbxCommandStreamReceiverHw::submitBatchBuffer(batchBufferGpuAddress, batchBuffer, batchBufferSize, memoryBank, entryBits, overrideRingHead); overrideRingHeadPassed = overrideRingHead; submitBatchBufferCalled = true; } void pollForCompletion() override { TbxCommandStreamReceiverHw::pollForCompletion(); pollForCompletionCalled = true; } void downloadAllocation(GraphicsAllocation &gfxAllocation) override { TbxCommandStreamReceiverHw::downloadAllocation(gfxAllocation); makeCoherentCalled = true; } bool initializeEngineCalled = false; bool writeMemoryWithAubManagerCalled = false; bool writeMemoryCalled = false; bool submitBatchBufferCalled = false; bool overrideRingHeadPassed = false; bool pollForCompletionCalled = false; bool expectMemoryEqualCalled = false; bool expectMemoryNotEqualCalled = false; bool makeCoherentCalled = false; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_tbx_sockets.h000066400000000000000000000017101363734646600264570ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/tbx/tbx_sockets.h" namespace NEO { class MockTbxSockets : public TbxSockets { public: MockTbxSockets(){}; ~MockTbxSockets() override = default; bool init(const std::string &hostNameOrIp, uint16_t port) override { return true; }; void close() override{}; bool writeGTT(uint32_t gttOffset, uint64_t entry) override { return true; }; bool readMemory(uint64_t offset, void *data, size_t size) override { return true; }; bool writeMemory(uint64_t offset, const void *data, size_t size, uint32_t type) override { typeCapturedFromWriteMemory = type; return true; }; bool readMMIO(uint32_t offset, uint32_t *data) override { return true; }; bool writeMMIO(uint32_t offset, uint32_t data) override { return true; }; uint32_t typeCapturedFromWriteMemory = 0; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_tbx_stream.h000066400000000000000000000005221363734646600262770ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/command_stream/tbx_command_stream_receiver.h" namespace NEO { struct MockTbxStream : public TbxCommandStreamReceiver::TbxStream { using TbxCommandStreamReceiver::TbxStream::socket; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_timestamp_container.h000066400000000000000000000030761363734646600302030ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/timestamp_packet.h" namespace NEO { template class MockTagAllocator : public TagAllocator { public: using BaseClass = TagAllocator; using BaseClass::freeTags; using BaseClass::usedTags; using NodeType = typename BaseClass::NodeType; MockTagAllocator(uint32_t rootDeviceIndex, MemoryManager *memoryManager, size_t tagCount = 10) : BaseClass(rootDeviceIndex, memoryManager, tagCount, MemoryConstants::cacheLineSize, sizeof(TagType), false) {} void returnTag(NodeType *node) override { releaseReferenceNodes.push_back(node); BaseClass::returnTag(node); } void returnTagToFreePool(NodeType *node) override { returnedToFreePoolNodes.push_back(node); BaseClass::returnTagToFreePool(node); } std::vector releaseReferenceNodes; std::vector returnedToFreePoolNodes; }; class MockTimestampPacketContainer : public TimestampPacketContainer { public: using TimestampPacketContainer::timestampPacketNodes; MockTimestampPacketContainer(TagAllocator &tagAllocator, size_t numberOfPreallocatedTags) { for (size_t i = 0; i < numberOfPreallocatedTags; i++) { add(tagAllocator.getTag()); } } TagNode *getNode(size_t position) { return timestampPacketNodes.at(position); } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_wddm.cpp000066400000000000000000000301451363734646600254210ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_wddm.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/os_interface/windows/gdi_interface.h" #include "shared/source/os_interface/windows/os_environment_win.h" #include "shared/source/os_interface/windows/wddm_allocation.h" #include "opencl/test/unit_test/mock_gdi/mock_gdi.h" #include "opencl/test/unit_test/mocks/mock_wddm_residency_allocations_container.h" #include "opencl/test/unit_test/mocks/mock_wddm_residency_logger.h" #include "gtest/gtest.h" using namespace NEO; struct mockHwDeviceId : public HwDeviceId { using HwDeviceId::osEnvironment; }; WddmMock::WddmMock(RootDeviceEnvironment &rootDeviceEnvironment) : Wddm(std::make_unique(ADAPTER_HANDLE, LUID{}, rootDeviceEnvironment.executionEnvironment.osEnvironment.get()), rootDeviceEnvironment) { if (!rootDeviceEnvironment.executionEnvironment.osEnvironment.get()) { rootDeviceEnvironment.executionEnvironment.osEnvironment = std::make_unique(); } static_cast(this->hwDeviceId.get())->osEnvironment = rootDeviceEnvironment.executionEnvironment.osEnvironment.get(); this->temporaryResources = std::make_unique(this); }; WddmMock::~WddmMock() { EXPECT_EQ(0, reservedAddresses.size()); } bool WddmMock::makeResident(const D3DKMT_HANDLE *handles, uint32_t count, bool cantTrimFurther, uint64_t *numberOfBytesToTrim, size_t totalSize) { makeResidentResult.called++; makeResidentResult.handleCount = count; for (auto i = 0u; i < count; i++) { makeResidentResult.handlePack.push_back(handles[i]); } if (callBaseMakeResident) { return makeResidentResult.success = Wddm::makeResident(handles, count, cantTrimFurther, numberOfBytesToTrim, totalSize); } else { makeResidentResult.success = makeResidentStatus; return makeResidentStatus; } } bool WddmMock::evict(const D3DKMT_HANDLE *handles, uint32_t num, uint64_t &sizeToTrim) { makeNonResidentResult.called++; return makeNonResidentResult.success = Wddm::evict(handles, num, sizeToTrim); } bool WddmMock::mapGpuVirtualAddress(WddmAllocation *allocation) { D3DGPU_VIRTUAL_ADDRESS minimumAddress = gfxPartition.Standard.Base; D3DGPU_VIRTUAL_ADDRESS maximumAddress = gfxPartition.Standard.Limit; if (allocation->getAlignedCpuPtr()) { minimumAddress = 0u; maximumAddress = MemoryConstants::maxSvmAddress; } return mapGpuVirtualAddress(allocation->getDefaultGmm(), allocation->getDefaultHandle(), minimumAddress, maximumAddress, reinterpret_cast(allocation->getAlignedCpuPtr()), allocation->getGpuAddressToModify()); } bool WddmMock::mapGpuVirtualAddress(Gmm *gmm, D3DKMT_HANDLE handle, D3DGPU_VIRTUAL_ADDRESS minimumAddress, D3DGPU_VIRTUAL_ADDRESS maximumAddress, D3DGPU_VIRTUAL_ADDRESS preferredAddress, D3DGPU_VIRTUAL_ADDRESS &gpuPtr) { mapGpuVirtualAddressResult.called++; mapGpuVirtualAddressResult.cpuPtrPassed = reinterpret_cast(preferredAddress); mapGpuVirtualAddressResult.uint64ParamPassed = preferredAddress; if (callBaseMapGpuVa) { return mapGpuVirtualAddressResult.success = Wddm::mapGpuVirtualAddress(gmm, handle, minimumAddress, maximumAddress, preferredAddress, gpuPtr); } else { gpuPtr = preferredAddress; return mapGpuVaStatus; } } bool WddmMock::freeGpuVirtualAddress(D3DGPU_VIRTUAL_ADDRESS &gpuPtr, uint64_t size) { freeGpuVirtualAddressResult.called++; freeGpuVirtualAddressResult.uint64ParamPassed = gpuPtr; freeGpuVirtualAddressResult.sizePassed = size; return freeGpuVirtualAddressResult.success = Wddm::freeGpuVirtualAddress(gpuPtr, size); } NTSTATUS WddmMock::createAllocation(WddmAllocation *wddmAllocation) { if (wddmAllocation) { return createAllocation(wddmAllocation->getAlignedCpuPtr(), wddmAllocation->getDefaultGmm(), wddmAllocation->getHandleToModify(0u), wddmAllocation->resourceHandle, wddmAllocation->getSharedHandleToModify()); } return false; } NTSTATUS WddmMock::createAllocation(const void *alignedCpuPtr, const Gmm *gmm, D3DKMT_HANDLE &outHandle, D3DKMT_HANDLE &outResourceHandle, D3DKMT_HANDLE *outSharedHandle) { createAllocationResult.called++; if (callBaseDestroyAllocations) { createAllocationStatus = Wddm::createAllocation(alignedCpuPtr, gmm, outHandle, outResourceHandle, outSharedHandle); createAllocationResult.success = createAllocationStatus == STATUS_SUCCESS; } else { createAllocationResult.success = true; outHandle = ALLOCATION_HANDLE; return createAllocationStatus; } return createAllocationStatus; } bool WddmMock::createAllocation64k(WddmAllocation *wddmAllocation) { if (wddmAllocation) { return createAllocation64k(wddmAllocation->getDefaultGmm(), wddmAllocation->getHandleToModify(0u)); } return false; } bool WddmMock::createAllocation64k(const Gmm *gmm, D3DKMT_HANDLE &outHandle) { createAllocationResult.called++; return createAllocationResult.success = Wddm::createAllocation64k(gmm, outHandle); } bool WddmMock::destroyAllocations(const D3DKMT_HANDLE *handles, uint32_t allocationCount, D3DKMT_HANDLE resourceHandle) { destroyAllocationResult.called++; if (callBaseDestroyAllocations) { return destroyAllocationResult.success = Wddm::destroyAllocations(handles, allocationCount, resourceHandle); } else { return true; } } bool WddmMock::destroyAllocation(WddmAllocation *alloc, OsContextWin *osContext) { const D3DKMT_HANDLE *allocationHandles = nullptr; uint32_t allocationCount = 0; D3DKMT_HANDLE resourceHandle = 0; void *reserveAddress = alloc->getReservedAddressPtr(); if (alloc->peekSharedHandle()) { resourceHandle = alloc->resourceHandle; } else { allocationHandles = alloc->getHandles().data(); allocationCount = 1; } auto success = destroyAllocations(allocationHandles, allocationCount, resourceHandle); ::alignedFree(alloc->getDriverAllocatedCpuPtr()); releaseReservedAddress(reserveAddress); return success; } bool WddmMock::openSharedHandle(D3DKMT_HANDLE handle, WddmAllocation *alloc) { if (failOpenSharedHandle) { return false; } else { return Wddm::openSharedHandle(handle, alloc); } } bool WddmMock::createContext(OsContextWin &osContext) { createContextResult.called++; return createContextResult.success = Wddm::createContext(osContext); } void WddmMock::applyAdditionalContextFlags(CREATECONTEXT_PVTDATA &privateData, OsContextWin &osContext) { applyAdditionalContextFlagsResult.called++; Wddm::applyAdditionalContextFlags(privateData, osContext); } bool WddmMock::destroyContext(D3DKMT_HANDLE context) { destroyContextResult.called++; return destroyContextResult.success = Wddm::destroyContext(context); } bool WddmMock::queryAdapterInfo() { queryAdapterInfoResult.called++; return queryAdapterInfoResult.success = Wddm::queryAdapterInfo(); } bool WddmMock::submit(uint64_t commandBuffer, size_t size, void *commandHeader, WddmSubmitArguments &submitArguments) { submitResult.called++; submitResult.commandBufferSubmitted = commandBuffer; submitResult.size = size; submitResult.commandHeaderSubmitted = commandHeader; submitResult.submitArgs = submitArguments; return submitResult.success = Wddm::submit(commandBuffer, size, commandHeader, submitArguments); } bool WddmMock::waitOnGPU(D3DKMT_HANDLE context) { waitOnGPUResult.called++; return waitOnGPUResult.success = Wddm::waitOnGPU(context); } void *WddmMock::lockResource(const D3DKMT_HANDLE &handle, bool applyMakeResidentPriorToLock, size_t size) { lockResult.called++; auto ptr = Wddm::lockResource(handle, applyMakeResidentPriorToLock, size); lockResult.success = ptr != nullptr; lockResult.uint64ParamPassed = applyMakeResidentPriorToLock; return ptr; } void WddmMock::unlockResource(const D3DKMT_HANDLE &handle) { unlockResult.called++; unlockResult.success = true; Wddm::unlockResource(handle); } void WddmMock::kmDafLock(D3DKMT_HANDLE handle) { kmDafLockResult.called++; kmDafLockResult.success = true; kmDafLockResult.lockedAllocations.push_back(handle); Wddm::kmDafLock(handle); } bool WddmMock::isKmDafEnabled() const { return kmDafEnabled; } void WddmMock::setKmDafEnabled(bool state) { kmDafEnabled = state; } void WddmMock::setHwContextId(unsigned long hwContextId) { this->hwContextId = hwContextId; } void WddmMock::resetGdi(Gdi *gdi) { static_cast(this->rootDeviceEnvironment.executionEnvironment.osEnvironment.get())->gdi.reset(gdi); } void WddmMock::setHeap32(uint64_t base, uint64_t size) { gfxPartition.Heap32[3].Base = base; gfxPartition.Heap32[3].Limit = base + size; } GMM_GFX_PARTITIONING *WddmMock::getGfxPartitionPtr() { return &gfxPartition; } bool WddmMock::waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredFence) { waitFromCpuResult.called++; waitFromCpuResult.uint64ParamPassed = lastFenceValue; waitFromCpuResult.monitoredFence = &monitoredFence; return waitFromCpuResult.success = Wddm::waitFromCpu(lastFenceValue, monitoredFence); } void *WddmMock::virtualAlloc(void *inPtr, size_t size, unsigned long flags, unsigned long type) { void *address = Wddm::virtualAlloc(inPtr, size, flags, type); virtualAllocAddress = reinterpret_cast(address); return address; } int WddmMock::virtualFree(void *ptr, size_t size, unsigned long flags) { int success = Wddm::virtualFree(ptr, size, flags); return success; } void WddmMock::releaseReservedAddress(void *reservedAddress) { releaseReservedAddressResult.called++; if (reservedAddress != nullptr) { std::set::iterator it; it = reservedAddresses.find(reservedAddress); EXPECT_NE(reservedAddresses.end(), it); reservedAddresses.erase(it); } Wddm::releaseReservedAddress(reservedAddress); } bool WddmMock::reserveValidAddressRange(size_t size, void *&reservedMem) { reserveValidAddressRangeResult.called++; bool ret = Wddm::reserveValidAddressRange(size, reservedMem); if (reservedMem != nullptr) { std::set::iterator it; it = reservedAddresses.find(reservedMem); EXPECT_EQ(reservedAddresses.end(), it); reservedAddresses.insert(reservedMem); } return ret; } VOID *WddmMock::registerTrimCallback(PFND3DKMT_TRIMNOTIFICATIONCALLBACK callback, WddmResidencyController &residencyController) { registerTrimCallbackResult.called++; return Wddm::registerTrimCallback(callback, residencyController); } D3DGPU_VIRTUAL_ADDRESS WddmMock::reserveGpuVirtualAddress(D3DGPU_VIRTUAL_ADDRESS minimumAddress, D3DGPU_VIRTUAL_ADDRESS maximumAddress, D3DGPU_SIZE_T size) { reserveGpuVirtualAddressResult.called++; return Wddm::reserveGpuVirtualAddress(minimumAddress, maximumAddress, size); } uint64_t *WddmMock::getPagingFenceAddress() { if (NEO::residencyLoggingAvailable) { getPagingFenceAddressResult.called++; } mockPagingFence++; return &mockPagingFence; } void WddmMock::waitOnPagingFenceFromCpu() { waitOnPagingFenceFromCpuResult.called++; Wddm::waitOnPagingFenceFromCpu(); } void WddmMock::createPagingFenceLogger() { if (callBaseCreatePagingLogger) { Wddm::createPagingFenceLogger(); } else { if (DebugManager.flags.WddmResidencyLogger.get()) { residencyLogger = std::make_unique(device, pagingFenceAddress); } } } void *GmockWddm::virtualAllocWrapper(void *inPtr, size_t size, uint32_t flags, uint32_t type) { void *tmp = reinterpret_cast(virtualAllocAddress); size += MemoryConstants::pageSize; size -= size % MemoryConstants::pageSize; virtualAllocAddress += size; return tmp; } GmockWddm::GmockWddm(RootDeviceEnvironment &rootDeviceEnvironment) : WddmMock(rootDeviceEnvironment) { virtualAllocAddress = NEO::windowsMinAddress; } compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_wddm.h000066400000000000000000000164611363734646600250730ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/host_ptr_defines.h" #include "shared/source/memory_manager/memory_constants.h" #include "shared/source/os_interface/windows/wddm/wddm.h" #include "shared/source/os_interface/windows/wddm_residency_allocations_container.h" #include "shared/source/os_interface/windows/windows_defs.h" #include "opencl/test/unit_test/mocks/wddm_mock_helpers.h" #include "gmock/gmock.h" #include #include namespace NEO { class GraphicsAllocation; constexpr auto virtualAllocAddress = is64bit ? 0x7FFFF0000000 : 0xFF000000; class WddmMock : public Wddm { public: using Wddm::adapterBDF; using Wddm::createPagingFenceLogger; using Wddm::currentPagingFenceValue; using Wddm::dedicatedVideoMemory; using Wddm::device; using Wddm::featureTable; using Wddm::getSystemInfo; using Wddm::gmmMemory; using Wddm::hwDeviceId; using Wddm::mapGpuVirtualAddress; using Wddm::minAddress; using Wddm::pagingFenceAddress; using Wddm::pagingQueue; using Wddm::residencyLogger; using Wddm::temporaryResources; using Wddm::wddmInterface; WddmMock(RootDeviceEnvironment &rootDeviceEnvironment); ~WddmMock(); bool makeResident(const D3DKMT_HANDLE *handles, uint32_t count, bool cantTrimFurther, uint64_t *numberOfBytesToTrim, size_t totalSize) override; bool evict(const D3DKMT_HANDLE *handles, uint32_t num, uint64_t &sizeToTrim) override; bool mapGpuVirtualAddress(Gmm *gmm, D3DKMT_HANDLE handle, D3DGPU_VIRTUAL_ADDRESS minimumAddress, D3DGPU_VIRTUAL_ADDRESS maximumAddress, D3DGPU_VIRTUAL_ADDRESS preferredAddress, D3DGPU_VIRTUAL_ADDRESS &gpuPtr) override; bool mapGpuVirtualAddress(WddmAllocation *allocation); bool freeGpuVirtualAddress(D3DGPU_VIRTUAL_ADDRESS &gpuPtr, uint64_t size) override; NTSTATUS createAllocation(const void *alignedCpuPtr, const Gmm *gmm, D3DKMT_HANDLE &outHandle, D3DKMT_HANDLE &outResource, D3DKMT_HANDLE *outSharedHandle) override; bool createAllocation64k(const Gmm *gmm, D3DKMT_HANDLE &outHandle) override; bool destroyAllocations(const D3DKMT_HANDLE *handles, uint32_t allocationCount, D3DKMT_HANDLE resourceHandle) override; NTSTATUS createAllocation(WddmAllocation *wddmAllocation); bool createAllocation64k(WddmAllocation *wddmAllocation); bool destroyAllocation(WddmAllocation *alloc, OsContextWin *osContext); bool openSharedHandle(D3DKMT_HANDLE handle, WddmAllocation *alloc) override; bool createContext(OsContextWin &osContext) override; void applyAdditionalContextFlags(CREATECONTEXT_PVTDATA &privateData, OsContextWin &osContext) override; bool destroyContext(D3DKMT_HANDLE context) override; bool queryAdapterInfo() override; bool submit(uint64_t commandBuffer, size_t size, void *commandHeader, WddmSubmitArguments &submitArguments) override; bool waitOnGPU(D3DKMT_HANDLE context) override; void *lockResource(const D3DKMT_HANDLE &handle, bool applyMakeResidentPriorToLock, size_t size) override; void unlockResource(const D3DKMT_HANDLE &handle) override; void kmDafLock(D3DKMT_HANDLE handle) override; bool isKmDafEnabled() const override; void setKmDafEnabled(bool state); void setHwContextId(unsigned long hwContextId); void setHeap32(uint64_t base, uint64_t size); GMM_GFX_PARTITIONING *getGfxPartitionPtr(); bool waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredFence) override; void *virtualAlloc(void *inPtr, size_t size, unsigned long flags, unsigned long type) override; int virtualFree(void *ptr, size_t size, unsigned long flags) override; void releaseReservedAddress(void *reservedAddress) override; VOID *registerTrimCallback(PFND3DKMT_TRIMNOTIFICATIONCALLBACK callback, WddmResidencyController &residencyController) override; D3DGPU_VIRTUAL_ADDRESS reserveGpuVirtualAddress(D3DGPU_VIRTUAL_ADDRESS minimumAddress, D3DGPU_VIRTUAL_ADDRESS maximumAddress, D3DGPU_SIZE_T size) override; bool reserveValidAddressRange(size_t size, void *&reservedMem); PLATFORM *getGfxPlatform() { return gfxPlatform.get(); } uint64_t *getPagingFenceAddress() override; void waitOnPagingFenceFromCpu() override; void createPagingFenceLogger() override; bool configureDeviceAddressSpace() { configureDeviceAddressSpaceResult.called++; //create context cant be called before configureDeviceAddressSpace if (createContextResult.called > 0) { return configureDeviceAddressSpaceResult.success = false; } else { return configureDeviceAddressSpaceResult.success = Wddm::configureDeviceAddressSpace(); } } void resetGdi(Gdi *gdi); WddmMockHelpers::MakeResidentCall makeResidentResult; WddmMockHelpers::CallResult makeNonResidentResult; WddmMockHelpers::CallResult mapGpuVirtualAddressResult; WddmMockHelpers::FreeGpuVirtualAddressCall freeGpuVirtualAddressResult; WddmMockHelpers::CallResult createAllocationResult; WddmMockHelpers::CallResult destroyAllocationResult; WddmMockHelpers::CallResult destroyContextResult; WddmMockHelpers::CallResult queryAdapterInfoResult; WddmMockHelpers::SubmitResult submitResult; WddmMockHelpers::CallResult waitOnGPUResult; WddmMockHelpers::CallResult configureDeviceAddressSpaceResult; WddmMockHelpers::CallResult createContextResult; WddmMockHelpers::CallResult applyAdditionalContextFlagsResult; WddmMockHelpers::CallResult lockResult; WddmMockHelpers::CallResult unlockResult; WddmMockHelpers::KmDafLockCall kmDafLockResult; WddmMockHelpers::WaitFromCpuResult waitFromCpuResult; WddmMockHelpers::CallResult releaseReservedAddressResult; WddmMockHelpers::CallResult reserveValidAddressRangeResult; WddmMockHelpers::CallResult registerTrimCallbackResult; WddmMockHelpers::CallResult getPagingFenceAddressResult; WddmMockHelpers::CallResult reserveGpuVirtualAddressResult; WddmMockHelpers::CallResult waitOnPagingFenceFromCpuResult; NTSTATUS createAllocationStatus = STATUS_SUCCESS; bool mapGpuVaStatus = true; bool callBaseDestroyAllocations = true; bool failOpenSharedHandle = false; bool callBaseMapGpuVa = true; std::set reservedAddresses; uintptr_t virtualAllocAddress = NEO::windowsMinAddress; bool kmDafEnabled = false; uint64_t mockPagingFence = 0u; bool makeResidentStatus = true; bool callBaseMakeResident = true; bool callBaseCreatePagingLogger = true; }; struct GmockWddm : WddmMock { GmockWddm(RootDeviceEnvironment &rootDeviceEnvironment); ~GmockWddm() = default; bool virtualFreeWrapper(void *ptr, size_t size, uint32_t flags) { return true; } void *virtualAllocWrapper(void *inPtr, size_t size, uint32_t flags, uint32_t type); uintptr_t virtualAllocAddress; MOCK_METHOD5(makeResident, bool(const D3DKMT_HANDLE *handles, uint32_t count, bool cantTrimFurther, uint64_t *numberOfBytesToTrim, size_t totalSize)); MOCK_METHOD3(evict, bool(const D3DKMT_HANDLE *handles, uint32_t num, uint64_t &sizeToTrim)); MOCK_METHOD1(createAllocationsAndMapGpuVa, NTSTATUS(OsHandleStorage &osHandles)); NTSTATUS baseCreateAllocationAndMapGpuVa(OsHandleStorage &osHandles) { return Wddm::createAllocationsAndMapGpuVa(osHandles); } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_wddm_interface20.h000066400000000000000000000016571363734646600272560ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/windows/wddm/wddm_interface.h" namespace NEO { class WddmMockInterface20 : public WddmInterface20 { public: using WddmInterface::createMonitoredFence; using WddmInterface20::WddmInterface20; void destroyMonitorFence(MonitoredFence &monitorFence) override { destroyMonitorFenceCalled++; WddmInterface20::destroyMonitorFence(monitorFence); } bool createMonitoredFence(MonitoredFence &monitorFence) override { createMonitoredFenceCalled++; if (createMonitoredFenceCalledFail) { return false; } return WddmInterface::createMonitoredFence(monitorFence); } uint32_t destroyMonitorFenceCalled = 0; uint32_t createMonitoredFenceCalled = 0; bool createMonitoredFenceCalledFail = false; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_wddm_interface23.h000066400000000000000000000015771363734646600272620ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/windows/wddm/wddm_interface.h" namespace NEO { class WddmMockInterface23 : public WddmInterface23 { public: using WddmInterface23::WddmInterface23; bool createHwQueue(OsContextWin &osContext) override { createHwQueueCalled++; createHwQueueResult = forceCreateHwQueueFail ? false : WddmInterface23::createHwQueue(osContext); return createHwQueueResult; } void destroyMonitorFence(MonitoredFence &monitorFence) override { destroyMonitorFenceCalled++; WddmInterface23::destroyMonitorFence(monitorFence); } uint32_t createHwQueueCalled = 0; bool forceCreateHwQueueFail = false; bool createHwQueueResult = false; uint32_t destroyMonitorFenceCalled = 0; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_wddm_residency_allocations_container.h000066400000000000000000000045061363734646600335670ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/memory_operations_status.h" #include "shared/source/os_interface/windows/wddm_residency_allocations_container.h" #include "opencl/test/unit_test/mocks/wddm_mock_helpers.h" namespace NEO { class Wddm; class MockWddmResidentAllocationsContainer : public WddmResidentAllocationsContainer { public: using WddmResidentAllocationsContainer::resourceHandles; using WddmResidentAllocationsContainer::resourcesLock; MockWddmResidentAllocationsContainer(Wddm *wddm) : WddmResidentAllocationsContainer(wddm) {} virtual ~MockWddmResidentAllocationsContainer() = default; MemoryOperationsStatus makeResidentResource(const D3DKMT_HANDLE &handle, size_t size) override { makeResidentResult.called++; makeResidentResult.operationSuccess = WddmResidentAllocationsContainer::makeResidentResource(handle, size); return makeResidentResult.operationSuccess; } MemoryOperationsStatus evictAllResources() override { evictAllResourcesResult.called++; evictAllResourcesResult.operationSuccess = WddmResidentAllocationsContainer::evictAllResources(); return evictAllResourcesResult.operationSuccess; } MemoryOperationsStatus evictResource(const D3DKMT_HANDLE &handle) override { evictResourceResult.called++; evictResourceResult.operationSuccess = WddmResidentAllocationsContainer::evictResource(handle); return evictResourceResult.operationSuccess; } std::unique_lock acquireLock(SpinLock &lock) override { acquireLockResult.called++; acquireLockResult.uint64ParamPassed = reinterpret_cast(&lock); return WddmResidentAllocationsContainer::acquireLock(lock); } void removeResource(const D3DKMT_HANDLE &handle) override { removeResourceResult.called++; WddmResidentAllocationsContainer::removeResource(handle); } WddmMockHelpers::MemoryOperationResult makeResidentResult; WddmMockHelpers::MemoryOperationResult acquireLockResult; WddmMockHelpers::MemoryOperationResult removeResourceResult; WddmMockHelpers::MemoryOperationResult evictAllResourcesResult; WddmMockHelpers::MemoryOperationResult evictResourceResult; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_wddm_residency_logger.h000066400000000000000000000017431363734646600304740ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/windows/wddm/wddm_residency_logger.h" namespace NEO { struct MockWddmResidencyLogger : public WddmResidencyLogger { using WddmResidencyLogger::endTime; using WddmResidencyLogger::enterWait; using WddmResidencyLogger::makeResidentCall; using WddmResidencyLogger::makeResidentPagingFence; using WddmResidencyLogger::pagingLog; using WddmResidencyLogger::pendingMakeResident; using WddmResidencyLogger::pendingTime; using WddmResidencyLogger::startWaitPagingFence; using WddmResidencyLogger::waitStartTime; using WddmResidencyLogger::WddmResidencyLogger; void startWaitTime(UINT64 startWaitPagingFence) override { WddmResidencyLogger::startWaitTime(startWaitPagingFence); startWaitPagingFenceSave = this->startWaitPagingFence; } UINT64 startWaitPagingFenceSave = 0ull; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/mock_wddm_residency_logger_functions.h000066400000000000000000000012631363734646600325610ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/windows/wddm/wddm_residency_logger_defs.h" namespace NEO { namespace ResLog { extern uint32_t mockFopenCalled; extern uint32_t mockVfptrinfCalled; extern uint32_t mockFcloseCalled; inline FILE *mockFopen(const char *filename, const char *mode) { mockFopenCalled++; return reinterpret_cast(0x40); } inline int mockVfptrinf(FILE *stream, const char *format, va_list arg) { mockVfptrinfCalled++; return 0x10; } inline int mockFclose(FILE *stream) { mockFcloseCalled++; return 0; } } // namespace ResLog } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mocks/wddm_mock_helpers.h000066400000000000000000000023421363734646600266060ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/eviction_status.h" #include "shared/source/os_interface/windows/wddm/wddm_defs.h" #include "shared/source/os_interface/windows/windows_defs.h" #include namespace NEO { namespace WddmMockHelpers { struct CallResult { uint32_t called = 0; uint64_t uint64ParamPassed = -1; bool success = false; void *cpuPtrPassed = nullptr; }; struct MakeResidentCall : CallResult { std::vector handlePack; uint32_t handleCount = 0; }; struct KmDafLockCall : CallResult { std::vector lockedAllocations; }; struct WaitFromCpuResult : CallResult { const MonitoredFence *monitoredFence = nullptr; }; struct FreeGpuVirtualAddressCall : CallResult { uint64_t sizePassed = -1; }; struct MemoryOperationResult : CallResult { MemoryOperationsStatus operationSuccess = MemoryOperationsStatus::UNSUPPORTED; }; struct SubmitResult : CallResult { uint64_t commandBufferSubmitted = 0ull; void *commandHeaderSubmitted = nullptr; size_t size = 0u; WddmSubmitArguments submitArgs = {0}; }; } // namespace WddmMockHelpers } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/000077500000000000000000000000001363734646600234745ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/CMakeLists.txt000066400000000000000000000043371363734646600262430ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # project(igdrcl_mt_tests) set(OPENCL_MT_TEST_DIR ${CMAKE_CURRENT_SOURCE_DIR}) option(SHOW_VERBOSE_UTESTS_RESULTS "Use the default/verbose test output" ON) if(NOT SHOW_VERBOSE_UTESTS_RESULTS) set(igdrcl_mt_tests_LISTENER_OPTION "--disable_default_listener") endif() add_custom_target(run_mt_unit_tests) add_executable(igdrcl_mt_tests EXCLUDE_FROM_ALL ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${NEO_SOURCE_DIR}/opencl/test/unit_test/libult/os_interface.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/ult_configuration.cpp ${NEO_SOURCE_DIR}/opencl/source/aub/aub_stream_interface.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_mode.h $ $ $ $ $ ) target_include_directories(igdrcl_mt_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${NEO_SHARED_TEST_DIRECTORY}/unit_test/test_macros${BRANCH_DIR_SUFFIX} ${NEO_SOURCE_DIR}/opencl/source/gen_common ) add_subdirectories() target_link_libraries(igdrcl_mt_tests ${TSAN_LIB}) target_link_libraries(igdrcl_mt_tests ${NEO_STATICALLY_LINKED_LIBRARIES_MOCKABLE}) target_link_libraries(igdrcl_mt_tests gmock-gtest) target_link_libraries(igdrcl_mt_tests igdrcl_mocks ${IGDRCL_EXTRA_LIBS}) if(WIN32) target_sources(igdrcl_mt_tests PRIVATE ${NEO_SOURCE_DIR}/opencl/test/unit_test/os_interface/windows/wddm_create.cpp ) else() target_sources(igdrcl_mt_tests PRIVATE ${NEO_SOURCE_DIR}/opencl/test/unit_test/os_interface/linux/drm_neo_create.cpp ) endif() if(WIN32) add_dependencies(igdrcl_mt_tests mock_gdi igdrcl_tests) endif() add_dependencies(igdrcl_mt_tests test_dynamic_lib) create_project_source_tree(igdrcl_mt_tests ${NEO_SOURCE_DIR}/runtime ${NEO_SOURCE_DIR}/unit_tests) set_target_properties(igdrcl_mt_tests PROPERTIES FOLDER ${OPENCL_TEST_PROJECTS_FOLDER}) set_property(TARGET igdrcl_mt_tests APPEND_STRING PROPERTY COMPILE_FLAGS ${TSAN_FLAGS}) if(NOT WIN32) set_property(TARGET igdrcl_mt_tests APPEND_STRING PROPERTY COMPILE_FLAGS " -g") endif() set_target_properties(run_mt_unit_tests PROPERTIES FOLDER ${OPENCL_TEST_PROJECTS_FOLDER}) compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/api/000077500000000000000000000000001363734646600242455ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/api/CMakeLists.txt000066400000000000000000000012531363734646600270060ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_mt_tests_api # local files ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt # necessary dependencies from igdrcl_tests ${NEO_SOURCE_DIR}/opencl/test/unit_test/api/cl_api_tests.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/api/cl_create_user_event_tests_mt.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/api/cl_get_platform_ids_tests_mt.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/api/cl_intel_tracing_tests_mt.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/api/cl_set_mem_object_destructor_callback_tests_mt.cpp ) target_sources(igdrcl_mt_tests PRIVATE ${IGDRCL_SRCS_mt_tests_api}) compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/cmake/000077500000000000000000000000001363734646600245545ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/cmake/run_mt_test_target.cmake000066400000000000000000000021031363734646600314630ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # string(REPLACE "/" ";" mt_test_config ${mt_test_config}) list(GET mt_test_config 0 product) list(GET mt_test_config 1 slices) list(GET mt_test_config 2 subslices) list(GET mt_test_config 3 eu_per_ss) add_custom_target(run_${product}_mt_unit_tests DEPENDS igdrcl_mt_tests) if(NOT WIN32) add_dependencies(run_${product}_mt_unit_tests copy_test_files_${product}) endif() add_dependencies(run_mt_unit_tests run_${product}_mt_unit_tests) set_target_properties(run_${product}_mt_unit_tests PROPERTIES FOLDER "${PLATFORM_SPECIFIC_TEST_TARGETS_FOLDER}/${product}") add_custom_command( TARGET run_${product}_mt_unit_tests POST_BUILD COMMAND WORKING_DIRECTORY ${TargetDir} COMMAND echo "Running igdrcl_mt_tests ${product} ${slices}x${subslices}x${eu_per_ss}" COMMAND igdrcl_mt_tests --product ${product} --slices ${slices} --subslices ${subslices} --eu_per_ss ${eu_per_ss} --gtest_repeat=${GTEST_REPEAT} ${igdrcl_mt_tests_LISTENER_OPTION} ) add_dependencies(run_${product}_mt_unit_tests prepare_test_kernels)compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/command_queue/000077500000000000000000000000001363734646600263165ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/command_queue/CMakeLists.txt000066400000000000000000000011601363734646600310540ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_mt_tests_command_queue # local files ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt # necessary dependencies from igdrcl_tests ${NEO_SOURCE_DIR}/opencl/test/unit_test/command_queue/enqueue_kernel_mt_tests.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/command_queue/enqueue_fixture.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/command_queue/ooq_task_tests_mt.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/command_queue/ioq_task_tests_mt.cpp ) target_sources(igdrcl_mt_tests PRIVATE ${IGDRCL_SRCS_mt_tests_command_queue}) compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/device_queue/000077500000000000000000000000001363734646600261375ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/device_queue/CMakeLists.txt000066400000000000000000000004741363734646600307040ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_mt_tests_device_queue # local files ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_queue_mt_tests.cpp ) target_sources(igdrcl_mt_tests PRIVATE ${IGDRCL_SRCS_mt_tests_device_queue}) compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/device_queue/device_queue_mt_tests.cpp000066400000000000000000000031201363734646600332240ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_device_queue.h" #include "test.h" using namespace NEO; typedef ::testing::Test DeviceQueueHwMtTest; HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwMtTest, givenTakenIgilCriticalSectionWhenSecondThreadIsWaitingThenDontHang) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto context = std::unique_ptr(new MockContext()); cl_queue_properties properties[3] = {0}; MockDeviceQueueHw mockDevQueue(context.get(), device.get(), properties[0]); auto igilCmdQueue = mockDevQueue.getIgilQueue(); auto igilCriticalSection = const_cast(&igilCmdQueue->m_controls.m_CriticalSection); *igilCriticalSection = DeviceQueue::ExecutionModelCriticalSection::Taken; EXPECT_FALSE(mockDevQueue.isEMCriticalSectionFree()); std::mutex mtx; auto thread = std::thread([&] { std::unique_lock inThreadLock(mtx); while (!mockDevQueue.isEMCriticalSectionFree()) { inThreadLock.unlock(); inThreadLock.lock(); } }); std::unique_lock lock(mtx); *igilCriticalSection = DeviceQueue::ExecutionModelCriticalSection::Free; lock.unlock(); thread.join(); EXPECT_TRUE(mockDevQueue.isEMCriticalSectionFree()); } compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/event/000077500000000000000000000000001363734646600246155ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/event/CMakeLists.txt000066400000000000000000000005551363734646600273620ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_mt_tests_event # local files ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt # necessary dependencies from igdrcl_tests ${NEO_SOURCE_DIR}/opencl/test/unit_test/event/user_events_tests_mt.cpp ) target_sources(igdrcl_mt_tests PRIVATE ${IGDRCL_SRCS_mt_tests_event}) compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/fixtures/000077500000000000000000000000001363734646600253455ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/fixtures/CMakeLists.txt000066400000000000000000000006671363734646600301160ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_mt_tests_fixtures # local files ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt # necessary dependencies from igdrcl_tests ${NEO_SOURCE_DIR}/opencl/test/unit_test/fixtures/image_fixture.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/fixtures/platform_fixture.cpp ) target_sources(igdrcl_mt_tests PRIVATE ${IGDRCL_SRCS_mt_tests_fixtures}) compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/gen11/000077500000000000000000000000001363734646600244075ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/gen11/CMakeLists.txt000066400000000000000000000001741363734646600271510ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN11) add_subdirectories() endif() compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/gen11/icllp/000077500000000000000000000000001363734646600255125ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/gen11/icllp/CMakeLists.txt000066400000000000000000000003111363734646600302450ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_ICLLP) set(mt_test_config "icllp/1/8/8") include(${OPENCL_MT_TEST_DIR}/cmake/run_mt_test_target.cmake) endif() compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/gen12lp/000077500000000000000000000000001363734646600247445ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/gen12lp/CMakeLists.txt000066400000000000000000000001761363734646600275100ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN12LP) add_subdirectories() endif() compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/gen12lp/tgllp/000077500000000000000000000000001363734646600260665ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/gen12lp/tgllp/CMakeLists.txt000066400000000000000000000003121363734646600306220ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_TGLLP) set(mt_test_config "tgllp/1/6/16") include(${OPENCL_MT_TEST_DIR}/cmake/run_mt_test_target.cmake) endif() compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/gen8/000077500000000000000000000000001363734646600243355ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/gen8/CMakeLists.txt000066400000000000000000000001731363734646600270760ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN8) add_subdirectories() endif() compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/gen8/bdw/000077500000000000000000000000001363734646600251115ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/gen8/bdw/CMakeLists.txt000066400000000000000000000003051363734646600276470ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_BDW) set(mt_test_config "bdw/1/3/8") include(${OPENCL_MT_TEST_DIR}/cmake/run_mt_test_target.cmake) endif() compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/gen9/000077500000000000000000000000001363734646600243365ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/gen9/CMakeLists.txt000066400000000000000000000001731363734646600270770ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_GEN9) add_subdirectories() endif() compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/gen9/skl/000077500000000000000000000000001363734646600251275ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/gen9/skl/CMakeLists.txt000066400000000000000000000003051363734646600276650ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(TESTS_SKL) set(mt_test_config "skl/1/3/8") include(${OPENCL_MT_TEST_DIR}/cmake/run_mt_test_target.cmake) endif() compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/helpers/000077500000000000000000000000001363734646600251365ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/helpers/CMakeLists.txt000066400000000000000000000006561363734646600277050ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_mt_tests_helpers # local files ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/interlocked_max_mt_tests.cpp # necessary dependencies from igdrcl_tests ${NEO_SOURCE_DIR}/opencl/test/unit_test/helpers/base_object_tests_mt.cpp ) target_sources(igdrcl_mt_tests PRIVATE ${IGDRCL_SRCS_mt_tests_helpers}) compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/helpers/interlocked_max_mt_tests.cpp000066400000000000000000000017141363734646600327370ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/interlocked_max.h" #include "gtest/gtest.h" #include TEST(MtTestInterlockedMaxFixture, givenCurrentPagingFenceValueWhenValueChangedThenValueIsSet) { std::atomic currentPagingFenceValue; std::atomic testCount; std::atomic maxValue; currentPagingFenceValue.store(0); testCount.store(100); maxValue.store(0); int threadsCount = 8; std::thread threads[8]; for (int i = 0; i < threadsCount; i++) { threads[i] = std::thread([&]() { while (testCount-- > 0) { uint64_t newVal = ++maxValue; interlockedMax(currentPagingFenceValue, newVal); } }); } for (int i = 0; i < threadsCount; i++) { threads[i].join(); } uint64_t endValue = currentPagingFenceValue.load(); EXPECT_EQ(endValue, 100u); }compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/memory_manager/000077500000000000000000000000001363734646600264765ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/memory_manager/CMakeLists.txt000066400000000000000000000007251363734646600312420ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_mt_tests_memory_manager # local files ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/deferred_deleter_clear_queue_mt_tests.cpp # necessary dependencies from igdrcl_tests ${NEO_SOURCE_DIR}/opencl/test/unit_test/memory_manager/deferred_deleter_mt_tests.cpp ) target_sources(igdrcl_mt_tests PRIVATE ${IGDRCL_SRCS_mt_tests_memory_manager}) deferred_deleter_clear_queue_mt_tests.cpp000066400000000000000000000071551363734646600367130ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/memory_manager/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_deferrable_deletion.h" #include "opencl/test/unit_test/mocks/mock_deferred_deleter.h" #include "gtest/gtest.h" using namespace NEO; const int threadCount = 4; struct ClearQueueTest : public ::testing::Test, public ::testing::WithParamInterface { void SetUp() override { threadStopped = 0; startClear = false; deleter.reset(new MockDeferredDeleter()); } void TearDown() override { EXPECT_TRUE(deleter->isQueueEmpty()); EXPECT_EQ(0, deleter->getElementsToRelease()); } static void threadMethod(MockDeferredDeleter *deleter) { while (!startClear) ; deleter->clearQueue(); threadStopped++; } MockDeferrableDeletion *createDeletion() { return new MockDeferrableDeletion(); } std::unique_ptr deleter; static std::atomic startClear; static std::atomic threadStopped; }; std::atomic ClearQueueTest::startClear; std::atomic ClearQueueTest::threadStopped; TEST_P(ClearQueueTest, clearQueueAfterFeed) { auto elementsInQueue = GetParam(); EXPECT_EQ(0, deleter->clearCalled); for (int i = 0; i < elementsInQueue; i++) { deleter->DeferredDeleter::deferDeletion(createDeletion()); } std::thread threads[threadCount]; for (int i = 0; i < threadCount; i++) { threads[i] = std::thread(threadMethod, deleter.get()); } EXPECT_EQ(0, deleter->clearCalled); EXPECT_EQ(elementsInQueue, deleter->getElementsToRelease()); startClear = true; for (int i = 0; i < threadCount; i++) { threads[i].join(); } EXPECT_EQ(threadCount, deleter->clearCalled); EXPECT_EQ(0, deleter->getElementsToRelease()); } int paramsForClearQueueTest[] = {1, 10, 20, 50, 100}; INSTANTIATE_TEST_CASE_P(DeferredDeleterMtTests, ClearQueueTest, ::testing::ValuesIn(paramsForClearQueueTest)); class MyDeferredDeleter : public DeferredDeleter { public: bool isQueueEmpty() { std::lock_guard lock(queueMutex); return queue.peekIsEmpty(); } int getElementsToRelease() { return elementsToRelease; } bool isWorking() { return doWorkInBackground; } bool isThreadRunning() { return worker != nullptr; } int getClientsNum() { return numClients; } void forceSafeStop() { safeStop(); } }; struct DeferredDeleterMtTest : public ::testing::Test { void SetUp() override { deleter.reset(new MyDeferredDeleter()); } void TearDown() override { EXPECT_TRUE(deleter->isQueueEmpty()); EXPECT_EQ(0, deleter->getElementsToRelease()); } void waitForAsyncThread() { while (!deleter->isWorking()) { std::this_thread::yield(); } } std::unique_ptr deleter; }; TEST_F(DeferredDeleterMtTest, asyncThreadsStopDeferredDeleter) { deleter->addClient(); waitForAsyncThread(); EXPECT_TRUE(deleter->isThreadRunning()); EXPECT_TRUE(deleter->isWorking()); // Start worker thread std::thread t([&]() { deleter->forceSafeStop(); EXPECT_FALSE(deleter->isThreadRunning()); EXPECT_FALSE(deleter->isWorking()); }); deleter->forceSafeStop(); EXPECT_FALSE(deleter->isThreadRunning()); EXPECT_FALSE(deleter->isWorking()); t.join(); deleter->removeClient(); EXPECT_EQ(0, deleter->getClientsNum()); } compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/os_interface/000077500000000000000000000000001363734646600261355ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/os_interface/CMakeLists.txt000066400000000000000000000003261363734646600306760ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(UNIX) target_sources(igdrcl_mt_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/linux/drm_memory_manager_mt_tests.cpp ) endif() compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/os_interface/linux/000077500000000000000000000000001363734646600272745ustar00rootroot00000000000000drm_memory_manager_mt_tests.cpp000066400000000000000000000116751363734646600355210ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/os_interface/linux/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/os_interface/linux/drm_memory_manager.h" #include "shared/source/os_interface/linux/os_interface.h" #include "opencl/test/unit_test/mocks/linux/mock_drm_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/os_interface/linux/device_command_stream_fixture.h" #include "gtest/gtest.h" #include #include #include using namespace NEO; using namespace std; TEST(DrmMemoryManagerTest, givenDrmMemoryManagerWhenSharedAllocationIsCreatedFromMultipleThreadsThenSingleBoIsReused) { class MockDrm : public Drm { public: MockDrm(int fd, RootDeviceEnvironment &rootDeviceEnvironment) : Drm(std::make_unique(fd), rootDeviceEnvironment) {} int ioctl(unsigned long request, void *arg) override { if (request == DRM_IOCTL_PRIME_FD_TO_HANDLE) { auto *primeToHandleParams = (drm_prime_handle *)arg; primeToHandleParams->handle = 10; } return 0; } }; MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); auto mock = new MockDrm(0, *executionEnvironment.rootDeviceEnvironments[0]); executionEnvironment.rootDeviceEnvironments[0]->osInterface->get()->setDrm(mock); auto memoryManager = make_unique(executionEnvironment); osHandle handle = 3; constexpr size_t maxThreads = 10; GraphicsAllocation *createdAllocations[maxThreads]; thread threads[maxThreads]; atomic index(0); auto createFunction = [&]() { size_t indexFree = index++; AllocationProperties properties(0, false, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::SHARED_BUFFER, false); createdAllocations[indexFree] = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, false); EXPECT_NE(nullptr, createdAllocations[indexFree]); }; for (size_t i = 0; i < maxThreads; i++) { threads[i] = std::thread(createFunction); } while (index < maxThreads) { EXPECT_GE(1u, memoryManager->sharingBufferObjects.size()); } for (size_t i = 0; i < maxThreads; i++) { threads[i].join(); memoryManager->freeGraphicsMemory(createdAllocations[i]); } } TEST(DrmMemoryManagerTest, givenMultipleThreadsWhenSharedAllocationIsCreatedThenPrimeFdToHandleDoesNotRaceWithClose) { class MockDrm : public Drm { public: MockDrm(int fd, RootDeviceEnvironment &rootDeviceEnvironment) : Drm(std::make_unique(fd), rootDeviceEnvironment) { primeFdHandle = 1; closeHandle = 1; } atomic primeFdHandle; atomic closeHandle; int ioctl(unsigned long request, void *arg) override { if (request == DRM_IOCTL_PRIME_FD_TO_HANDLE) { auto *primeToHandleParams = (drm_prime_handle *)arg; primeToHandleParams->handle = primeFdHandle; // PrimeFdHandle should not be lower than closeHandle // GemClose shouldn't be executed concurrently with primtFdToHandle EXPECT_EQ(closeHandle.load(), primeFdHandle.load()); } else if (request == DRM_IOCTL_GEM_CLOSE) { closeHandle++; this_thread::yield(); primeFdHandle.store(closeHandle.load()); } return 0; } }; MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); auto mock = new MockDrm(0, *executionEnvironment.rootDeviceEnvironments[0]); executionEnvironment.rootDeviceEnvironments[0]->osInterface->get()->setDrm(mock); auto memoryManager = make_unique(executionEnvironment); osHandle handle = 3; constexpr size_t maxThreads = 10; GraphicsAllocation *createdAllocations[maxThreads]; thread threads[maxThreads]; atomic index(0); auto createFunction = [&]() { size_t indexFree = index++; AllocationProperties properties(0, false, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::SHARED_BUFFER, false); createdAllocations[indexFree] = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, false); EXPECT_NE(nullptr, createdAllocations[indexFree]); this_thread::yield(); memoryManager->freeGraphicsMemory(createdAllocations[indexFree]); }; for (size_t i = 0; i < maxThreads; i++) { threads[i] = std::thread(createFunction); } for (size_t i = 0; i < maxThreads; i++) { threads[i].join(); } } compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/test_mode.h000066400000000000000000000003761363734646600256360ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/unit_test/tests_configuration.h" namespace NEO { constexpr TestMode defaultTestMode = TestMode::UnitTests; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/utilities/000077500000000000000000000000001363734646600255075ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/utilities/CMakeLists.txt000066400000000000000000000005601363734646600302500ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_mt_tests_utilities # local files ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt # necessary dependencies from igdrcl_tests ${CMAKE_CURRENT_SOURCE_DIR}/reference_tracked_object_tests_mt.cpp ) target_sources(igdrcl_mt_tests PRIVATE ${IGDRCL_SRCS_mt_tests_utilities}) reference_tracked_object_tests_mt.cpp000066400000000000000000000106151363734646600350420ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/mt_tests/utilities/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/utilities/reference_tracked_object.h" #include "gtest/gtest.h" #include #include namespace NEO { struct MockReferenceTrackedObject : ReferenceTrackedObject { MockReferenceTrackedObject(std::atomic &marker, std::atomic &flagInsideCustomDeleter, std::atomic &flagUseCustomDeleter, std::atomic &flagAfterBgDecRefCount) : marker(marker), flagInsideCustomDeleter(flagInsideCustomDeleter), flagUseCustomDeleter(flagUseCustomDeleter), flagAfterBgDecRefCount(flagAfterBgDecRefCount) { } using DeleterFuncType = void (*)(MockReferenceTrackedObject *); DeleterFuncType getCustomDeleter() const { if (flagUseCustomDeleter == false) { return nullptr; } flagInsideCustomDeleter = true; while (flagAfterBgDecRefCount == false) { } const_cast(this)->SetMarker(marker); return nullptr; } virtual void SetMarker(std::atomic &marker) { marker = GetMarker(); } static int GetMarker() { return 1; } std::atomic ▮ std::atomic &flagInsideCustomDeleter; std::atomic &flagUseCustomDeleter; std::atomic &flagAfterBgDecRefCount; }; struct MockReferenceTrackedObjectDerivative : MockReferenceTrackedObject { using MockReferenceTrackedObject::MockReferenceTrackedObject; void SetMarker(std::atomic &marker) override { marker = GetMarker(); } static int GetMarker() { return 2; } }; void DecRefCount(MockReferenceTrackedObject *obj, bool useInternalRefCount, std::atomic *flagInsideCustomDeleter, std::atomic *flagUseCustomDeleter, std::atomic *flagAfterBgDecRefCount) { while (*flagInsideCustomDeleter == false) { } *flagUseCustomDeleter = false; if (useInternalRefCount) { obj->decRefInternal(); } else { obj->decRefApi(); } *flagAfterBgDecRefCount = true; } TEST(ReferenceTrackedObject, whenDecreasingApiRefcountSimultaneouslyWillRetrieveProperCustomDeleterWhileObjectIsStillAlive) { ASSERT_NE(MockReferenceTrackedObjectDerivative::GetMarker(), MockReferenceTrackedObject::GetMarker()); std::atomic marker; std::atomic flagInsideCustomDeleter; std::atomic flagUseCustomDeleter; std::atomic flagAfterBgDecRefCount; marker = 0; flagInsideCustomDeleter = false; flagUseCustomDeleter = true; flagAfterBgDecRefCount = false; MockReferenceTrackedObjectDerivative *obj = new MockReferenceTrackedObjectDerivative(marker, flagInsideCustomDeleter, flagUseCustomDeleter, flagAfterBgDecRefCount); obj->incRefApi(); obj->incRefApi(); ASSERT_EQ(2, obj->getRefApiCount()); ASSERT_EQ(2, obj->getRefInternalCount()); ASSERT_EQ(0, marker); std::thread bgThread(DecRefCount, obj, false, &flagInsideCustomDeleter, &flagUseCustomDeleter, &flagAfterBgDecRefCount); obj->decRefApi(); bgThread.join(); EXPECT_EQ(MockReferenceTrackedObjectDerivative::GetMarker(), marker); } TEST(ReferenceTrackedObject, whenDecreasingInternalRefcountSimultaneouslyWillRetrieveProperCustomDeleterWhileObjectIsStillAlive) { ASSERT_NE(MockReferenceTrackedObjectDerivative::GetMarker(), MockReferenceTrackedObject::GetMarker()); std::atomic marker; std::atomic flagInsideCustomDeleter; std::atomic flagUseCustomDeleter; std::atomic flagAfterBgDecRefCount; marker = 0; flagInsideCustomDeleter = false; flagUseCustomDeleter = true; flagAfterBgDecRefCount = false; MockReferenceTrackedObjectDerivative *obj = new MockReferenceTrackedObjectDerivative(marker, flagInsideCustomDeleter, flagUseCustomDeleter, flagAfterBgDecRefCount); obj->incRefInternal(); obj->incRefInternal(); ASSERT_EQ(2, obj->getRefInternalCount()); ASSERT_EQ(0, obj->getRefApiCount()); ASSERT_EQ(0, marker); std::thread bgThread(DecRefCount, obj, true, &flagInsideCustomDeleter, &flagUseCustomDeleter, &flagAfterBgDecRefCount); obj->decRefInternal(); bgThread.join(); EXPECT_EQ(MockReferenceTrackedObjectDerivative::GetMarker(), marker); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/offline_compiler/000077500000000000000000000000001363734646600251465ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/offline_compiler/CMakeLists.txt000066400000000000000000000136221363734646600277120ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # project(ocloc_tests) set(IGDRCL_SRCS_cloc ${OCLOC_DIRECTORY}/source/decoder/binary_decoder.cpp ${OCLOC_DIRECTORY}/source/decoder/binary_encoder.cpp ${OCLOC_DIRECTORY}/source/offline_compiler.cpp ${OCLOC_DIRECTORY}/source/ocloc_fatbinary.cpp ) set(IGDRCL_SRCS_offline_compiler_mock ${CMAKE_CURRENT_SOURCE_DIR}/decoder/mock/mock_decoder.h ${CMAKE_CURRENT_SOURCE_DIR}/decoder/mock/mock_encoder.h ${CMAKE_CURRENT_SOURCE_DIR}/decoder/mock/mock_iga_wrapper.h ${CMAKE_CURRENT_SOURCE_DIR}/mock/mock_argument_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/mock/mock_offline_compiler.h ${CMAKE_CURRENT_SOURCE_DIR}/mock/mock_sip_ocloc_tests.cpp ) set(CLOC_LIB_SRCS_UTILITIES ${OCLOC_DIRECTORY}/source/utilities/safety_caller.h ${OCLOC_DIRECTORY}/source/utilities//get_current_dir.h ) if(WIN32) list(APPEND CLOC_LIB_SRCS_UTILITIES ${OCLOC_DIRECTORY}/source/utilities/windows/safety_caller_windows.cpp ${OCLOC_DIRECTORY}/source/utilities/windows/safety_guard_windows.h ${OCLOC_DIRECTORY}/source/utilities/windows/seh_exception.cpp ${OCLOC_DIRECTORY}/source/utilities/windows/seh_exception.h ${OCLOC_DIRECTORY}/source/utilities/windows/get_current_dir_windows.cpp ) else() list(APPEND CLOC_LIB_SRCS_UTILITIES ${OCLOC_DIRECTORY}/source/utilities/linux/safety_caller_linux.cpp ${OCLOC_DIRECTORY}/source/utilities/linux/safety_guard_linux.h ${OCLOC_DIRECTORY}/source/utilities/linux/get_current_dir_linux.cpp ) endif() get_property(IGDRCL_SRCS_tests_compiler_mocks GLOBAL PROPERTY IGDRCL_SRCS_tests_compiler_mocks) set(IGDRCL_SRCS_offline_compiler_tests ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/decoder/decoder_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/decoder/encoder_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/environment.h ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ocloc_fatbinary_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ocloc_fatbinary_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/offline_compiler_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/offline_compiler_tests.h ${NEO_SHARED_DIRECTORY}/helpers/abort.cpp ${NEO_SHARED_DIRECTORY}/helpers/file_io.cpp ${NEO_SHARED_DIRECTORY}/memory_manager/deferred_deleter.cpp ${NEO_SHARED_DIRECTORY}/memory_manager/deferred_deleter.h ${NEO_SOURCE_DIR}/opencl/test/unit_test/helpers/test_files.cpp ${IGDRCL_SRCS_cloc} ${IGDRCL_SRCS_offline_compiler_mock} ${IGDRCL_SRCS_tests_compiler_mocks} ${CLOC_LIB_SRCS_LIB} ${CLOC_LIB_SRCS_UTILITIES} ) if(WIN32) list(APPEND IGDRCL_SRCS_offline_compiler_tests ${NEO_SHARED_DIRECTORY}/os_interface/windows/os_thread_win.cpp ) else() list(APPEND IGDRCL_SRCS_offline_compiler_tests ${NEO_SHARED_DIRECTORY}/os_interface/linux/os_thread_linux.cpp ) endif() link_directories(${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) add_executable(ocloc_tests ${IGDRCL_SRCS_offline_compiler_tests}) target_include_directories(ocloc_tests PRIVATE $ ) target_compile_definitions(ocloc_tests PUBLIC MOCKABLE_VIRTUAL=virtual $) target_link_libraries(ocloc_tests gmock-gtest) if(WIN32) target_link_libraries(ocloc_tests dbghelp) endif() if(UNIX) target_link_libraries(ocloc_tests dl pthread) endif() get_property(CLOC_LIB_FOLDER TARGET ocloc_lib PROPERTY FOLDER) get_property(CLOC_LIB_COMPILE_FLAGS TARGET ocloc_lib PROPERTY COMPILE_FLAGS) set_property(TARGET ocloc_tests PROPERTY FOLDER ${CLOC_LIB_FOLDER}) set_property(TARGET ocloc_tests APPEND_STRING PROPERTY COMPILE_FLAGS ${CLOC_LIB_COMPILE_FLAGS}) string(TOLOWER ${DEFAULT_TESTED_PLATFORM} CLOC_LIB_DEFAULT_DEVICE) add_custom_target(run_ocloc_tests ALL DEPENDS ocloc_tests ) macro(macro_for_each_platform) if("${PLATFORM_IT_LOWER}" STREQUAL "${CLOC_LIB_DEFAULT_DEVICE}") foreach(PLATFORM_TYPE ${PLATFORM_TYPES}) if(${PLATFORM_IT}_IS_${PLATFORM_TYPE}) get_family_name_with_type(${GEN_TYPE} ${PLATFORM_TYPE}) add_dependencies(run_ocloc_tests prepare_test_kernels_${family_name_with_type}) neo_copy_test_files(copy_test_files_${family_name_with_type} ${family_name_with_type}) add_dependencies(run_ocloc_tests copy_test_files_${family_name_with_type}) set(run_tests_cmd ocloc_tests --device ${CLOC_LIB_DEFAULT_DEVICE} --family_type ${family_name_with_type}) endif() endforeach() endif() endmacro() macro(macro_for_each_gen) apply_macro_for_each_platform() endmacro() apply_macro_for_each_gen("TESTED") set_property(TARGET run_ocloc_tests PROPERTY FOLDER ${CLOC_LIB_FOLDER}) if(WIN32) add_custom_command( TARGET run_ocloc_tests POST_BUILD COMMAND echo deleting offline compiler files and directories... COMMAND ${CMAKE_COMMAND} -E remove ${TargetDir}/${CLOC_LIB_DEFAULT_DEVICE}/copybuffer_${CLOC_LIB_DEFAULT_DEVICE}.bc COMMAND ${CMAKE_COMMAND} -E remove ${TargetDir}/${CLOC_LIB_DEFAULT_DEVICE}/copybuffer_${CLOC_LIB_DEFAULT_DEVICE}.gen COMMAND ${CMAKE_COMMAND} -E remove ${TargetDir}/${CLOC_LIB_DEFAULT_DEVICE}/copybuffer_${CLOC_LIB_DEFAULT_DEVICE}.bin COMMAND ${CMAKE_COMMAND} -E remove_directory ${TargetDir}/${CLOC_LIB_DEFAULT_DEVICE}/offline_compiler_test COMMAND ${run_tests_cmd} WORKING_DIRECTORY ${TargetDir} ) else() add_custom_command( TARGET run_ocloc_tests POST_BUILD COMMAND echo deleting offline compiler files and directories... COMMAND ${CMAKE_COMMAND} -E remove ${TargetDir}/${CLOC_LIB_DEFAULT_DEVICE}/*.bc COMMAND ${CMAKE_COMMAND} -E remove ${TargetDir}/${CLOC_LIB_DEFAULT_DEVICE}/*.gen COMMAND ${CMAKE_COMMAND} -E remove ${TargetDir}/${CLOC_LIB_DEFAULT_DEVICE}/*.ll COMMAND ${CMAKE_COMMAND} -E remove ${TargetDir}/${CLOC_LIB_DEFAULT_DEVICE}/*.bin COMMAND ${CMAKE_COMMAND} -E remove_directory "${TargetDir}/offline_compiler_test" COMMAND ${run_tests_cmd} WORKING_DIRECTORY ${TargetDir} ) endif() add_subdirectories() create_project_source_tree(ocloc_tests) compute-runtime-20.13.16352/opencl/test/unit_test/offline_compiler/decoder/000077500000000000000000000000001363734646600265535ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/offline_compiler/decoder/decoder_tests.cpp000066400000000000000000000344271363734646600321200ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/array_count.h" #include "opencl/test/unit_test/test_files/patch_list.h" #include "gmock/gmock.h" #include "mock/mock_decoder.h" #include SProgramBinaryHeader createProgramBinaryHeader(const uint32_t numberOfKernels, const uint32_t patchListSize) { return SProgramBinaryHeader{MAGIC_CL, 0, 0, 0, numberOfKernels, 0, patchListSize}; } SKernelBinaryHeaderCommon createKernelBinaryHeaderCommon(const uint32_t kernelNameSize, const uint32_t patchListSize) { SKernelBinaryHeaderCommon kernelHeader = {}; kernelHeader.CheckSum = 0xFFFFFFFF; kernelHeader.ShaderHashCode = 0xFFFFFFFFFFFFFFFF; kernelHeader.KernelNameSize = kernelNameSize; kernelHeader.PatchListSize = patchListSize; return kernelHeader; } namespace NEO { TEST(DecoderTests, WhenParsingValidListOfParametersThenReturnValueIsZero) { std::vector args = { "ocloc", "decoder", "-file", "test_files/binary.bin", "-patch", "test_files/patch", "-dump", "test_files/created"}; MockDecoder decoder; EXPECT_EQ(0, decoder.validateInput(args)); } TEST(DecoderTests, WhenMissingParametersThenValidateInputReturnsErrorCode) { std::vector args = { "ocloc", "decoder", "-patch", "test_files"}; MockDecoder decoder; EXPECT_NE(0, decoder.validateInput(args)); } TEST(DecoderTests, GivenWrongParametersWhenParsingParametersThenValidateInputReturnsErrorCode) { std::vector args = { "cloc", "decoder", "-file", "test_files/no_extension", "-patch", "test_files", "-dump", "test_files/created"}; MockDecoder decoder; EXPECT_NE(0, decoder.validateInput(args)); } TEST(DecoderTests, GivenValidSizeStringWhenGettingSizeThenProperOutcomeIsExpectedAndExceptionIsNotThrown) { MockDecoder decoder; EXPECT_EQ(static_cast(1), decoder.getSize("uint8_t")); EXPECT_EQ(static_cast(2), decoder.getSize("uint16_t")); EXPECT_EQ(static_cast(4), decoder.getSize("uint32_t")); EXPECT_EQ(static_cast(8), decoder.getSize("uint64_t")); } TEST(DecoderTests, GivenProperStructWhenReadingStructFieldsThenFieldsVectorGetsPopulatedCorrectly) { std::vector lines; lines.push_back("/* */"); lines.push_back("struct SPatchSamplerStateArray :"); lines.push_back(" SPatchItemHeader"); lines.push_back("{"); lines.push_back(" uint64_t SomeField;"); lines.push_back(" uint32_t Offset;"); lines.push_back(""); lines.push_back(" uint16_t Count;"); lines.push_back(" uint8_t BorderColorOffset;"); lines.push_back("};"); std::vector fields; MockDecoder decoder; size_t pos = 4; uint32_t full_size = decoder.readStructFields(lines, pos, fields); EXPECT_EQ(static_cast(15), full_size); EXPECT_EQ(static_cast(8), fields[0].size); EXPECT_EQ("SomeField", fields[0].name); EXPECT_EQ(static_cast(4), fields[1].size); EXPECT_EQ("Offset", fields[1].name); EXPECT_EQ(static_cast(2), fields[2].size); EXPECT_EQ("Count", fields[2].name); EXPECT_EQ(static_cast(1), fields[3].size); EXPECT_EQ("BorderColorOffset", fields[3].name); } TEST(DecoderTests, GivenProperPatchListFileWhenParsingTokensThenFileIsParsedCorrectly) { MockDecoder decoder; decoder.pathToPatch = "test_files/"; decoder.parseTokens(); EXPECT_EQ(static_cast(28), (decoder.programHeader.size)); EXPECT_EQ(static_cast(4), (decoder.programHeader.fields[0].size)); EXPECT_EQ("Magic", (decoder.programHeader.fields[0].name)); EXPECT_EQ(static_cast(4), (decoder.programHeader.fields[1].size)); EXPECT_EQ("Version", (decoder.programHeader.fields[1].name)); EXPECT_EQ(static_cast(4), (decoder.programHeader.fields[2].size)); EXPECT_EQ("Device", (decoder.programHeader.fields[2].name)); EXPECT_EQ(static_cast(4), (decoder.programHeader.fields[3].size)); EXPECT_EQ("GPUPointerSizeInBytes", (decoder.programHeader.fields[3].name)); EXPECT_EQ(static_cast(4), (decoder.programHeader.fields[4].size)); EXPECT_EQ("NumberOfKernels", (decoder.programHeader.fields[4].name)); EXPECT_EQ(static_cast(4), (decoder.programHeader.fields[5].size)); EXPECT_EQ("SteppingId", (decoder.programHeader.fields[5].name)); EXPECT_EQ(static_cast(4), (decoder.programHeader.fields[6].size)); EXPECT_EQ("PatchListSize", (decoder.programHeader.fields[6].name)); EXPECT_EQ(static_cast(40), (decoder.kernelHeader.size)); EXPECT_EQ(static_cast(4), (decoder.kernelHeader.fields[0].size)); EXPECT_EQ("CheckSum", (decoder.kernelHeader.fields[0].name)); EXPECT_EQ(static_cast(8), (decoder.kernelHeader.fields[1].size)); EXPECT_EQ("ShaderHashCode", (decoder.kernelHeader.fields[1].name)); EXPECT_EQ(static_cast(4), (decoder.kernelHeader.fields[2].size)); EXPECT_EQ("KernelNameSize", (decoder.kernelHeader.fields[2].name)); EXPECT_EQ(static_cast(4), (decoder.kernelHeader.fields[3].size)); EXPECT_EQ("PatchListSize", (decoder.kernelHeader.fields[3].name)); EXPECT_EQ(static_cast(4), (decoder.kernelHeader.fields[4].size)); EXPECT_EQ("KernelHeapSize", (decoder.kernelHeader.fields[4].name)); EXPECT_EQ(static_cast(4), (decoder.kernelHeader.fields[5].size)); EXPECT_EQ("GeneralStateHeapSize", (decoder.kernelHeader.fields[5].name)); EXPECT_EQ(static_cast(4), (decoder.kernelHeader.fields[6].size)); EXPECT_EQ("DynamicStateHeapSize", (decoder.kernelHeader.fields[6].name)); EXPECT_EQ(static_cast(4), (decoder.kernelHeader.fields[7].size)); EXPECT_EQ("SurfaceStateHeapSize", (decoder.kernelHeader.fields[7].name)); EXPECT_EQ(static_cast(4), (decoder.kernelHeader.fields[8].size)); EXPECT_EQ("KernelUnpaddedSize", (decoder.kernelHeader.fields[8].name)); EXPECT_EQ(static_cast(4), (decoder.patchTokens[2]->size)); EXPECT_EQ("PATCH_TOKEN_STATE_SIP", (decoder.patchTokens[2]->name)); EXPECT_EQ(static_cast(4), (decoder.patchTokens[2]->fields[0].size)); EXPECT_EQ("SystemKernelOffset", (decoder.patchTokens[2]->fields[0].name)); EXPECT_EQ(static_cast(12), decoder.patchTokens[5]->size); EXPECT_EQ("PATCH_TOKEN_SAMPLER_STATE_ARRAY", decoder.patchTokens[5]->name); EXPECT_EQ(static_cast(4), (decoder.patchTokens[5]->fields[0].size)); EXPECT_EQ("Offset", (decoder.patchTokens[5]->fields[0].name)); EXPECT_EQ(static_cast(4), (decoder.patchTokens[5]->fields[1].size)); EXPECT_EQ("Count", (decoder.patchTokens[5]->fields[1].name)); EXPECT_EQ(static_cast(4), (decoder.patchTokens[5]->fields[2].size)); EXPECT_EQ("BorderColorOffset", (decoder.patchTokens[5]->fields[2].name)); EXPECT_EQ(static_cast(8), decoder.patchTokens[42]->size); EXPECT_EQ("PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO", decoder.patchTokens[42]->name); EXPECT_EQ(static_cast(4), (decoder.patchTokens[42]->fields[0].size)); EXPECT_EQ("ConstantBufferIndex", (decoder.patchTokens[42]->fields[0].name)); EXPECT_EQ(static_cast(4), (decoder.patchTokens[42]->fields[1].size)); EXPECT_EQ("InlineDataSize", (decoder.patchTokens[42]->fields[1].name)); EXPECT_EQ(static_cast(4), decoder.patchTokens[19]->size); EXPECT_EQ("PATCH_TOKEN_MEDIA_INTERFACE_DESCRIPTOR_LOAD", decoder.patchTokens[19]->name); EXPECT_EQ(static_cast(4), (decoder.patchTokens[19]->fields[0].size)); EXPECT_EQ("InterfaceDescriptorDataOffset", (decoder.patchTokens[19]->fields[0].name)); } TEST(DecoderTests, WhenPathToPatchTokensNotProvidedThenUseDefaults) { MockDecoder decoder; decoder.pathToPatch = ""; decoder.parseTokens(); EXPECT_NE(0U, decoder.programHeader.size); EXPECT_NE(0U, decoder.kernelHeader.size); } TEST(DecoderTests, GivenValidBinaryWhenReadingPatchTokensFromBinaryThenBinaryIsReadCorrectly) { std::string binaryString; std::stringstream binarySS; uint8_t byte; uint32_t byte4; byte4 = 4; binarySS.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 16; binarySS.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 1234; binarySS.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 5678; binarySS.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 2; binarySS.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 12; binarySS.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte = 255; for (auto i = 0; i < 4; ++i) { binarySS.write(reinterpret_cast(&byte), sizeof(uint8_t)); } binaryString = binarySS.str(); std::vector binary(binaryString.begin(), binaryString.end()); MockDecoder decoder; std::stringstream out; auto PTptr = std::make_unique(); PTptr->size = 20; PTptr->name = "Example patchtoken"; PTptr->fields.push_back(PTField{4, "First"}); PTptr->fields.push_back(PTField{4, "Second"}); decoder.patchTokens.insert(std::pair>(4, std::move(PTptr))); const void *ptr = reinterpret_cast(binary.data()); decoder.readPatchTokens(ptr, 28, out); std::string s = "Example patchtoken:\n\t4 Token 4\n\t4 Size 16\n\t4 First 1234\n\t4 Second 5678\nUnidentified PatchToken:\n\t4 Token 2\n\t4 Size 12\n\tHex ff ff ff ff\n"; EXPECT_EQ(s, out.str()); } TEST(DecoderTests, GivenValidBinaryWithoutPatchTokensWhenProcessingBinaryThenBinaryIsReadCorrectly) { auto programHeader = createProgramBinaryHeader(1, 0); std::string kernelName("ExampleKernel"); auto kernelHeader = createKernelBinaryHeaderCommon(static_cast(kernelName.size() + 1), 0); std::stringstream binarySS; binarySS.write(reinterpret_cast(&programHeader), sizeof(SProgramBinaryHeader)); binarySS.write(reinterpret_cast(&kernelHeader), sizeof(SKernelBinaryHeaderCommon)); binarySS.write(kernelName.c_str(), kernelHeader.KernelNameSize); std::stringstream ptmFile; MockDecoder decoder; decoder.pathToPatch = "test_files/"; decoder.pathToDump = "non_existing_folder/"; decoder.parseTokens(); std::string binaryString = binarySS.str(); std::vector binary(binaryString.begin(), binaryString.end()); const void *ptr = reinterpret_cast(binary.data()); int retVal = decoder.processBinary(ptr, ptmFile); EXPECT_EQ(0, retVal); std::string expectedOutput = "ProgramBinaryHeader:\n\t4 Magic 1229870147\n\t4 Version 0\n\t4 Device 0\n\t4 GPUPointerSizeInBytes 0\n\t4 NumberOfKernels 1\n\t4 SteppingId 0\n\t4 PatchListSize 0\nKernel #0\nKernelBinaryHeader:\n\t4 CheckSum 4294967295\n\t8 ShaderHashCode 18446744073709551615\n\t4 KernelNameSize 14\n\t4 PatchListSize 0\n\t4 KernelHeapSize 0\n\t4 GeneralStateHeapSize 0\n\t4 DynamicStateHeapSize 0\n\t4 SurfaceStateHeapSize 0\n\t4 KernelUnpaddedSize 0\n\tKernelName ExampleKernel\n"; EXPECT_EQ(expectedOutput, ptmFile.str()); } TEST(DecoderTests, GivenValidBinaryWhenProcessingBinaryThenProgramAndKernelAndPatchTokensAreReadCorrectly) { std::stringstream binarySS; //ProgramBinaryHeader auto programHeader = createProgramBinaryHeader(1, 30); binarySS.write(reinterpret_cast(&programHeader), sizeof(SProgramBinaryHeader)); //PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO SPatchAllocateConstantMemorySurfaceProgramBinaryInfo patchAllocateConstantMemory; patchAllocateConstantMemory.Token = 42; patchAllocateConstantMemory.Size = 16; patchAllocateConstantMemory.ConstantBufferIndex = 0; patchAllocateConstantMemory.InlineDataSize = 14; binarySS.write(reinterpret_cast(&patchAllocateConstantMemory), sizeof(patchAllocateConstantMemory)); //InlineData for (uint8_t i = 0; i < 14; ++i) { binarySS.write(reinterpret_cast(&i), sizeof(uint8_t)); } //KernelBinaryHeader std::string kernelName("ExampleKernel"); auto kernelHeader = createKernelBinaryHeaderCommon(static_cast(kernelName.size() + 1), 12); binarySS.write(reinterpret_cast(&kernelHeader), sizeof(SKernelBinaryHeaderCommon)); binarySS.write(kernelName.c_str(), kernelHeader.KernelNameSize); //PATCH_TOKEN_MEDIA_INTERFACE_DESCRIPTOR_LOAD SPatchMediaInterfaceDescriptorLoad patchMediaInterfaceDescriptorLoad; patchMediaInterfaceDescriptorLoad.Token = 19; patchMediaInterfaceDescriptorLoad.Size = 12; patchMediaInterfaceDescriptorLoad.InterfaceDescriptorDataOffset = 0; binarySS.write(reinterpret_cast(&patchMediaInterfaceDescriptorLoad), sizeof(SPatchMediaInterfaceDescriptorLoad)); std::string binaryString = binarySS.str(); std::vector binary(binaryString.begin(), binaryString.end()); std::stringstream ptmFile; MockDecoder decoder; decoder.pathToPatch = "test_files/"; decoder.pathToDump = "non_existing_folder/"; decoder.parseTokens(); const void *ptr = reinterpret_cast(binary.data()); int retVal = decoder.processBinary(ptr, ptmFile); EXPECT_EQ(0, retVal); std::string expectedOutput = "ProgramBinaryHeader:\n\t4 Magic 1229870147\n\t4 Version 0\n\t4 Device 0\n\t4 GPUPointerSizeInBytes 0\n\t4 NumberOfKernels 1\n\t4 SteppingId 0\n\t4 PatchListSize 30\nPATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO:\n\t4 Token 42\n\t4 Size 16\n\t4 ConstantBufferIndex 0\n\t4 InlineDataSize 14\n\tHex 0 1 2 3 4 5 6 7 8 9 a b c d\nKernel #0\nKernelBinaryHeader:\n\t4 CheckSum 4294967295\n\t8 ShaderHashCode 18446744073709551615\n\t4 KernelNameSize 14\n\t4 PatchListSize 12\n\t4 KernelHeapSize 0\n\t4 GeneralStateHeapSize 0\n\t4 DynamicStateHeapSize 0\n\t4 SurfaceStateHeapSize 0\n\t4 KernelUnpaddedSize 0\n\tKernelName ExampleKernel\nPATCH_TOKEN_MEDIA_INTERFACE_DESCRIPTOR_LOAD:\n\t4 Token 19\n\t4 Size 12\n\t4 InterfaceDescriptorDataOffset 0\n"; EXPECT_EQ(expectedOutput, ptmFile.str()); EXPECT_TRUE(decoder.getMockIga()->disasmWasCalled); EXPECT_FALSE(decoder.getMockIga()->asmWasCalled); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/offline_compiler/decoder/encoder_tests.cpp000066400000000000000000000461251363734646600321300ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/decoder/binary_decoder.h" #include "shared/source/helpers/array_count.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "gmock/gmock.h" #include "mock/mock_encoder.h" #include namespace NEO { TEST(EncoderTests, WhenParsingValidListOfParametersThenReturnValueIsZero) { std::vector args = { "ocloc", "asm", "-dump", "test_files/dump", "-out", "test_files/binary_gen.bin"}; MockEncoder encoder; EXPECT_EQ(0, encoder.validateInput(args)); } TEST(EncoderTests, WhenMissingParametersThenErrorCodeIsReturned) { std::vector args = { "ocloc", "asm", "-dump", "test_files/dump", "-out"}; MockEncoder encoder; EXPECT_NE(0, encoder.validateInput(args)); } TEST(EncoderTests, GivenWrongParametersWhenParsingParametersThenErrorCodeIsReturned) { std::vector args = { "ocloc", "asm", "-dump", "", "-out", "rasputin"}; MockEncoder encoder; EXPECT_NE(0, encoder.validateInput(args)); } TEST(EncoderTests, WhenTryingToCopyNonExistingFileThenErrorCodeIsReturned) { MockEncoder encoder; std::stringstream ss; auto retVal = encoder.copyBinaryToBinary("test_files/non_existing.bin", ss); EXPECT_FALSE(retVal); } TEST(EncoderTests, WhenWritingValuesToBinaryThenValuesAreWrittenCorrectly) { MockEncoder encoder; std::stringstream in; std::stringstream out; in.str("255 255 255 255"); std::string s = in.str(); encoder.write(in, out); uint8_t val1; out.read(reinterpret_cast(&val1), sizeof(uint8_t)); ASSERT_EQ(static_cast(255), val1); encoder.write(in, out); uint16_t val2; out.read(reinterpret_cast(&val2), sizeof(uint16_t)); ASSERT_EQ(static_cast(255), val2); encoder.write(in, out); uint32_t val3; out.read(reinterpret_cast(&val3), sizeof(uint32_t)); ASSERT_EQ(static_cast(255), val3); encoder.write(in, out); uint64_t val4; out.read(reinterpret_cast(&val4), sizeof(uint64_t)); ASSERT_EQ(static_cast(255), val4); } TEST(EncoderTests, GivenProperPTMFileFormatWhenWritingToBinaryThenValuesAreWrittenCorrectly) { MockEncoder encoder; std::stringstream out; out.str(""); std::string s = "ProgramBinaryHeader:"; int retVal = encoder.writeDeviceBinary(s, out); ASSERT_EQ(0, retVal); ASSERT_EQ("", out.str()); s = "Hex 48 65 6c 6c 6f 20 77 6f 72 6c 64"; retVal = encoder.writeDeviceBinary(s, out); ASSERT_EQ(0, retVal); ASSERT_EQ("Hello world", out.str()); s = "1 CheckOne 220"; out.str(""); retVal = encoder.writeDeviceBinary(s, out); ASSERT_EQ(0, retVal); uint8_t val1; out.read(reinterpret_cast(&val1), sizeof(uint8_t)); ASSERT_EQ(static_cast(220), val1); s = "2 CheckTwo 2428"; out.str(""); retVal = encoder.writeDeviceBinary(s, out); ASSERT_EQ(0, retVal); uint16_t val2; out.read(reinterpret_cast(&val2), sizeof(uint16_t)); ASSERT_EQ(static_cast(2428), val2); s = "4 CheckThree 242806820"; out.str(""); retVal = encoder.writeDeviceBinary(s, out); ASSERT_EQ(retVal, 0); uint32_t val3; out.read(reinterpret_cast(&val3), sizeof(uint32_t)); ASSERT_EQ(static_cast(242806820), val3); s = "8 CheckFour 242806820"; out.str(""); retVal = encoder.writeDeviceBinary(s, out); ASSERT_EQ(retVal, 0); uint64_t val4; out.read(reinterpret_cast(&val4), sizeof(uint64_t)); ASSERT_EQ(static_cast(242806820), val4); } TEST(EncoderTests, GivenImproperPTMFIleFormatWhenWritingToBinaryThenErrorCodeIsReturned) { std::string s = "3 UnknownSize 41243"; std::stringstream out(""); MockEncoder encoder; int retVal = encoder.writeDeviceBinary(s, out); ASSERT_EQ(-1, retVal); } TEST(EncoderTests, GivenIncorrectPatchListSizeWhileCalculatingPatchListSizeThenPatchListSizeIsSetToCorrectValue) { std::vector ptmFile; ptmFile.push_back("ProgramBinaryHeader:"); ptmFile.push_back("\t4 Magic 1229870147"); ptmFile.push_back("\t4 PatchListSize 14"); ptmFile.push_back("PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO:"); ptmFile.push_back("\t8 Token 42"); ptmFile.push_back("\t4 Size 16"); ptmFile.push_back("\t1 ConstantBufferIndex 0"); ptmFile.push_back("\t2 InlineDataSize 14"); ptmFile.push_back("\tHex 48 65 6c 6c 6f 20 77 6f 72 6c 64 21 a 0"); ptmFile.push_back("Kernel #0"); MockEncoder encoder; encoder.calculatePatchListSizes(ptmFile); EXPECT_EQ("\t4 PatchListSize 29", ptmFile[2]); } TEST(EncoderTests, GivenCorrectPTMFileWhileProcessingThenCorrectProgramHeaderExpected) { std::stringstream expectedBinary; uint8_t byte; uint32_t byte4; byte4 = 1229870147; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 1042; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 12; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 4; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 1; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 2; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 18; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 42; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 16; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 0; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 2; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte = 0x48; expectedBinary.write(reinterpret_cast(&byte), sizeof(uint8_t)); byte = 0x65; expectedBinary.write(reinterpret_cast(&byte), sizeof(uint8_t)); std::vector ptmFile; ptmFile.push_back("ProgramBinaryHeader:"); ptmFile.push_back("\t4 Magic 1229870147"); ptmFile.push_back("\t4 Version 1042"); ptmFile.push_back("\t4 Device 12"); ptmFile.push_back("\t4 GPUPointerSizeInBytes 4"); ptmFile.push_back("\t4 NumberOfKernels 1"); ptmFile.push_back("\t4 SteppingId 2"); ptmFile.push_back("\t4 PatchListSize 18"); ptmFile.push_back("PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO:"); ptmFile.push_back("\t4 Token 42"); ptmFile.push_back("\t4 Size 16"); ptmFile.push_back("\t4 ConstantBufferIndex 0"); ptmFile.push_back("\t4 InlineDataSize 2"); ptmFile.push_back("\tHex 48 65"); ptmFile.push_back("Kernel #0"); ptmFile.push_back("KernelBinaryHeader:"); ptmFile.push_back("\t4 CheckSum 2316678223"); ptmFile.push_back("\t8 ShaderHashCode 4988534869940066475"); ptmFile.push_back("\t4 KernelNameSize 12"); ptmFile.push_back("\t4 PatchListSize 0"); ptmFile.push_back("\t4 KernelHeapSize 0"); ptmFile.push_back("\t4 GeneralStateHeapSize 0"); ptmFile.push_back("\t4 DynamicStateHeapSize 0"); ptmFile.push_back("\t4 KernelUnpaddedSize 520"); ptmFile.push_back("\tKernelName kernel"); std::stringstream binary; MockEncoder().processBinary(ptmFile, binary); EXPECT_EQ(expectedBinary.str(), binary.str()); } TEST(EncoderTests, WhenAddPaddingIsCalledThenProperNumberOfZerosIsAdded) { std::stringstream stream; stream << "aa"; MockEncoder().addPadding(stream, 8); std::string asString = stream.str(); ASSERT_EQ(10U, asString.size()); char expected[] = {'a', 'a', 0, 0, 0, 0, 0, 0, 0, 0}; EXPECT_EQ(0, memcmp(asString.c_str(), expected, 10U)); } TEST(EncoderTests, WhenProcessingDeviceBinaryThenProperChecksumIsCalculated) { std::stringstream expectedBinary; uint8_t byte; uint32_t byte4; uint64_t byte8; MockEncoder encoder; std::string kernelName = "kernel"; encoder.filesMap["kernel_DynamicStateHeap.bin"] = std::string(16, 2); encoder.filesMap["kernel_KernelHeap.dat"] = std::string(16, 4); encoder.filesMap["kernel_SurfaceStateHeap.bin"] = std::string(16, 8); std::stringstream kernelBlob; kernelBlob << kernelName; kernelBlob.write(encoder.filesMap["kernel_KernelHeap.dat"].data(), encoder.filesMap["kernel_KernelHeap.dat"].size()); encoder.addPadding(kernelBlob, 128); // isa prefetch padding encoder.addPadding(kernelBlob, 64 - (encoder.filesMap["kernel_KernelHeap.dat"].size() + 128) % 64); // isa alignment size_t kernelHeapSize = encoder.filesMap["kernel_KernelHeap.dat"].size(); kernelHeapSize = alignUp(kernelHeapSize + 128, 64); kernelBlob.write(encoder.filesMap["kernel_DynamicStateHeap.bin"].data(), encoder.filesMap["kernel_DynamicStateHeap.bin"].size()); kernelBlob.write(encoder.filesMap["kernel_SurfaceStateHeap.bin"].data(), encoder.filesMap["kernel_SurfaceStateHeap.bin"].size()); auto kernelBlobData = kernelBlob.str(); uint64_t hashValue = NEO::Hash::hash(reinterpret_cast(kernelBlobData.data()), kernelBlobData.size()); uint32_t checksum = hashValue & 0xFFFFFFFF; byte4 = 1229870147; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 1042; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 12; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 4; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 1; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 2; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 18; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 42; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 16; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 0; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 2; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte = 0x48; expectedBinary.write(reinterpret_cast(&byte), sizeof(uint8_t)); byte = 0x65; expectedBinary.write(reinterpret_cast(&byte), sizeof(uint8_t)); byte4 = checksum; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte8 = 4988534869940066475; expectedBinary.write(reinterpret_cast(&byte8), sizeof(uint64_t)); byte4 = static_cast(kernelName.size()); expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 0; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = static_cast(kernelHeapSize); expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 0; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 16; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = static_cast(encoder.filesMap["kernel_KernelHeap.dat"].size()); expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); expectedBinary.write(kernelName.c_str(), kernelName.length()); expectedBinary.write(encoder.filesMap["kernel_KernelHeap.dat"].data(), encoder.filesMap["kernel_KernelHeap.dat"].size()); std::vector ptmFile; ptmFile.push_back("ProgramBinaryHeader:"); ptmFile.push_back("\t4 Magic 1229870147"); ptmFile.push_back("\t4 Version 1042"); ptmFile.push_back("\t4 Device 12"); ptmFile.push_back("\t4 GPUPointerSizeInBytes 4"); ptmFile.push_back("\t4 NumberOfKernels 1"); ptmFile.push_back("\t4 SteppingId 2"); ptmFile.push_back("\t4 PatchListSize 18"); ptmFile.push_back("PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO:"); ptmFile.push_back("\t4 Token 42"); ptmFile.push_back("\t4 Size 16"); ptmFile.push_back("\t4 ConstantBufferIndex 0"); ptmFile.push_back("\t4 InlineDataSize 2"); ptmFile.push_back("\tHex 48 65"); ptmFile.push_back("Kernel #0"); ptmFile.push_back("KernelBinaryHeader:"); ptmFile.push_back("\t4 CheckSum 0"); ptmFile.push_back("\t8 ShaderHashCode 4988534869940066475"); ptmFile.push_back("\t4 KernelNameSize " + std::to_string(kernelName.size())); ptmFile.push_back("\t4 PatchListSize 0"); ptmFile.push_back("\t4 KernelHeapSize 16"); ptmFile.push_back("\t4 GeneralStateHeapSize 0"); ptmFile.push_back("\t4 DynamicStateHeapSize 16"); ptmFile.push_back("\t4 KernelUnpaddedSize 16"); ptmFile.push_back("\tKernelName " + kernelName); std::stringstream result; auto ret = encoder.processBinary(ptmFile, result); auto resultAsString = result.str(); EXPECT_EQ(0, ret); auto expectedBinaryAsString = expectedBinary.str(); resultAsString.resize(expectedBinaryAsString.size()); // don't test beyond kernel header EXPECT_EQ(expectedBinaryAsString, resultAsString); EXPECT_FALSE(encoder.getMockIga()->disasmWasCalled); EXPECT_FALSE(encoder.getMockIga()->asmWasCalled); } TEST(EncoderTests, WhenProcessingDeviceBinaryAndAsmIsAvailableThenAseembleItWithIga) { std::stringstream expectedBinary; uint8_t byte; uint32_t byte4; uint64_t byte8; MockEncoder encoder; encoder.getMockIga()->binaryToReturn = std::string(32, 13); std::string kernelName = "kernel"; encoder.filesMap["kernel_DynamicStateHeap.bin"] = std::string(16, 2); encoder.filesMap["kernel_KernelHeap.dat"] = std::string(16, 4); encoder.filesMap["kernel_KernelHeap.asm"] = std::string(16, 7); encoder.filesMap["kernel_SurfaceStateHeap.bin"] = std::string(16, 8); std::stringstream kernelBlob; kernelBlob << kernelName; kernelBlob.write(encoder.getMockIga()->binaryToReturn.c_str(), encoder.getMockIga()->binaryToReturn.size()); encoder.addPadding(kernelBlob, 128); // isa prefetch padding encoder.addPadding(kernelBlob, 64 - (encoder.getMockIga()->binaryToReturn.size() + 128) % 64); // isa alignment size_t kernelHeapSize = encoder.getMockIga()->binaryToReturn.size(); kernelHeapSize = alignUp(kernelHeapSize + 128, 64); kernelBlob.write(encoder.filesMap["kernel_DynamicStateHeap.bin"].data(), encoder.filesMap["kernel_DynamicStateHeap.bin"].size()); kernelBlob.write(encoder.filesMap["kernel_SurfaceStateHeap.bin"].data(), encoder.filesMap["kernel_SurfaceStateHeap.bin"].size()); auto kernelBlobData = kernelBlob.str(); uint64_t hashValue = NEO::Hash::hash(reinterpret_cast(kernelBlobData.data()), kernelBlobData.size()); uint32_t checksum = hashValue & 0xFFFFFFFF; byte4 = 1229870147; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 1042; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 12; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 4; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 1; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 2; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 18; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 42; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 16; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 0; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 2; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte = 0x48; expectedBinary.write(reinterpret_cast(&byte), sizeof(uint8_t)); byte = 0x65; expectedBinary.write(reinterpret_cast(&byte), sizeof(uint8_t)); byte4 = checksum; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte8 = 4988534869940066475; expectedBinary.write(reinterpret_cast(&byte8), sizeof(uint64_t)); byte4 = static_cast(kernelName.size()); expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 0; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = static_cast(kernelHeapSize); expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = 0; expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = static_cast(16); expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); byte4 = static_cast(encoder.getMockIga()->binaryToReturn.size()); expectedBinary.write(reinterpret_cast(&byte4), sizeof(uint32_t)); expectedBinary.write(kernelName.c_str(), kernelName.length()); expectedBinary.write(encoder.getMockIga()->binaryToReturn.data(), encoder.getMockIga()->binaryToReturn.size()); std::vector ptmFile; ptmFile.push_back("ProgramBinaryHeader:"); ptmFile.push_back("\t4 Magic 1229870147"); ptmFile.push_back("\t4 Version 1042"); ptmFile.push_back("\t4 Device 12"); ptmFile.push_back("\t4 GPUPointerSizeInBytes 4"); ptmFile.push_back("\t4 NumberOfKernels 1"); ptmFile.push_back("\t4 SteppingId 2"); ptmFile.push_back("\t4 PatchListSize 18"); ptmFile.push_back("PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO:"); ptmFile.push_back("\t4 Token 42"); ptmFile.push_back("\t4 Size 16"); ptmFile.push_back("\t4 ConstantBufferIndex 0"); ptmFile.push_back("\t4 InlineDataSize 2"); ptmFile.push_back("\tHex 48 65"); ptmFile.push_back("Kernel #0"); ptmFile.push_back("KernelBinaryHeader:"); ptmFile.push_back("\t4 CheckSum 0"); ptmFile.push_back("\t8 ShaderHashCode 4988534869940066475"); ptmFile.push_back("\t4 KernelNameSize " + std::to_string(kernelName.size())); ptmFile.push_back("\t4 PatchListSize 0"); ptmFile.push_back("\t4 KernelHeapSize 16"); ptmFile.push_back("\t4 GeneralStateHeapSize 0"); ptmFile.push_back("\t4 DynamicStateHeapSize 16"); ptmFile.push_back("\t4 KernelUnpaddedSize 16"); ptmFile.push_back("\tKernelName " + kernelName); std::stringstream result; auto ret = encoder.processBinary(ptmFile, result); auto resultAsString = result.str(); EXPECT_EQ(0, ret); auto expectedBinaryAsString = expectedBinary.str(); resultAsString.resize(expectedBinaryAsString.size()); // don't test beyond kernel header EXPECT_EQ(expectedBinaryAsString, resultAsString); EXPECT_FALSE(encoder.getMockIga()->disasmWasCalled); EXPECT_TRUE(encoder.getMockIga()->asmWasCalled); EXPECT_EQ(encoder.filesMap["kernel_KernelHeap.asm"], encoder.getMockIga()->receivedAsm); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/offline_compiler/decoder/mock/000077500000000000000000000000001363734646600275045ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/offline_compiler/decoder/mock/mock_decoder.h000066400000000000000000000025401363734646600322740ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/offline_compiler/source/decoder/binary_decoder.h" #include "mock_iga_wrapper.h" struct MockDecoder : public BinaryDecoder { MockDecoder() : MockDecoder("", "", "") { } MockDecoder(const std::string &file, const std::string &patch, const std::string &dump) : BinaryDecoder(file, patch, dump) { this->iga.reset(new MockIgaWrapper); oclocArgHelperWithoutInput = std::make_unique(); argHelper = oclocArgHelperWithoutInput.get(); argHelper->getPrinterRef() = MessagePrinter(true); }; using BinaryDecoder::binaryFile; using BinaryDecoder::decode; using BinaryDecoder::getSize; using BinaryDecoder::iga; using BinaryDecoder::kernelHeader; using BinaryDecoder::parseTokens; using BinaryDecoder::patchTokens; using BinaryDecoder::pathToDump; using BinaryDecoder::pathToPatch; using BinaryDecoder::processBinary; using BinaryDecoder::processKernel; using BinaryDecoder::programHeader; using BinaryDecoder::readPatchTokens; using BinaryDecoder::readStructFields; std::unique_ptr oclocArgHelperWithoutInput; MockIgaWrapper *getMockIga() const { return static_cast(iga.get()); } }; compute-runtime-20.13.16352/opencl/test/unit_test/offline_compiler/decoder/mock/mock_encoder.h000066400000000000000000000036051363734646600323110ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/offline_compiler/source/decoder/binary_encoder.h" #include "shared/source/helpers/hash.h" #include "opencl/test/unit_test/offline_compiler/mock/mock_argument_helper.h" #include "mock_iga_wrapper.h" #include #include struct MockEncoder : public BinaryEncoder { MockEncoder() : MockEncoder("", ""){}; MockEncoder(const std::string &dump, const std::string &elf) : BinaryEncoder(dump, elf) { this->iga.reset(new MockIgaWrapper); oclocArgHelperWithoutInput = std::make_unique(filesMap); argHelper = oclocArgHelperWithoutInput.get(); argHelper->getPrinterRef() = MessagePrinter(true); }; std::map filesMap; bool copyBinaryToBinary(const std::string &srcFileName, std::ostream &outBinary, uint32_t *binaryLength) override { auto it = filesMap.find(srcFileName); if (it == filesMap.end()) { return false; } outBinary.write(it->second.c_str(), it->second.size()); if (binaryLength != nullptr) { *binaryLength = static_cast(it->second.size()); } return true; } using BinaryEncoder::addPadding; using BinaryEncoder::calculatePatchListSizes; using BinaryEncoder::copyBinaryToBinary; using BinaryEncoder::createElf; using BinaryEncoder::elfName; using BinaryEncoder::encode; using BinaryEncoder::iga; using BinaryEncoder::pathToDump; using BinaryEncoder::processBinary; using BinaryEncoder::processKernel; using BinaryEncoder::write; using BinaryEncoder::writeDeviceBinary; std::unique_ptr oclocArgHelperWithoutInput; MockIgaWrapper *getMockIga() const { return static_cast(iga.get()); } }; compute-runtime-20.13.16352/opencl/test/unit_test/offline_compiler/decoder/mock/mock_iga_wrapper.h000066400000000000000000000023051363734646600331660ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/offline_compiler/source/decoder/iga_wrapper.h" #include #include struct MockIgaWrapper : public IgaWrapper { bool tryDisassembleGenISA(const void *kernelPtr, uint32_t kernelSize, std::string &out) override { out = asmToReturn; disasmWasCalled = true; receivedBinary.assign(reinterpret_cast(kernelPtr), kernelSize); return asmToReturn.size() != 0; } bool tryAssembleGenISA(const std::string &inAsm, std::string &outBinary) override { outBinary = binaryToReturn; asmWasCalled = true; receivedAsm = inAsm; return outBinary.size() != 0; } void setGfxCore(GFXCORE_FAMILY core) override { } void setProductFamily(PRODUCT_FAMILY product) override { } bool isKnownPlatform() const override { return false; } bool tryLoadIga() override { return true; } std::string asmToReturn; std::string binaryToReturn; std::string receivedAsm; std::string receivedBinary; bool disasmWasCalled = false; bool asmWasCalled = false; }; compute-runtime-20.13.16352/opencl/test/unit_test/offline_compiler/environment.h000066400000000000000000000031671363734646600276720ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/offline_compiler/source/offline_compiler.h" #include "opencl/source/os_interface/os_inc_base.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "opencl/test/unit_test/mocks/mock_compilers.h" #include "gtest/gtest.h" class Environment : public ::testing::Environment { public: Environment(const std::string &devicePrefix, const std::string &familyNameWithType) : libraryFrontEnd(nullptr), libraryIGC(nullptr), devicePrefix(devicePrefix), familyNameWithType(familyNameWithType) { } void SetInputFileName( const std::string filename) { retrieveBinaryKernelFilename(igcDebugVars.fileName, filename + "_", ".gen"); retrieveBinaryKernelFilename(fclDebugVars.fileName, filename + "_", ".bc"); NEO::setIgcDebugVars(igcDebugVars); NEO::setFclDebugVars(fclDebugVars); } void SetUp() override { mockCompilerGuard.Enable(); SetInputFileName("copybuffer"); } void TearDown() override { delete libraryFrontEnd; delete libraryIGC; mockCompilerGuard.Disable(); } NEO::OsLibrary *libraryFrontEnd; NEO::OsLibrary *libraryIGC; NEO::MockCompilerDebugVars igcDebugVars; NEO::MockCompilerDebugVars fclDebugVars; void (*igcSetDebugVarsFPtr)(NEO::MockCompilerDebugVars &debugVars); void (*fclSetDebugVarsFPtr)(NEO::MockCompilerDebugVars &debugVars); NEO::MockCompilerEnableGuard mockCompilerGuard; const std::string devicePrefix; const std::string familyNameWithType; }; compute-runtime-20.13.16352/opencl/test/unit_test/offline_compiler/main.cpp000066400000000000000000000065651363734646600266120ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_library.h" #include "opencl/test/unit_test/custom_event_listener.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "environment.h" #include "limits.h" #ifdef WIN32 const char *fSeparator = "\\"; #elif defined(__linux__) const char *fSeparator = "/"; #endif Environment *gEnvironment; std::string getRunPath() { std::string res; #if defined(__linux__) res = getcwd(nullptr, 0); #else res = _getcwd(nullptr, 0); #endif return res; } int main(int argc, char **argv) { int retVal = 0; bool useDefaultListener = false; std::string devicePrefix("skl"); std::string familyNameWithType("Gen9core"); #if defined(__linux__) if (getenv("CLOC_SELFTEST") == nullptr) { setenv("CLOC_SELFTEST", "YES", 1); char *ldLibraryPath = getenv("LD_LIBRARY_PATH"); if (ldLibraryPath == nullptr) { setenv("LD_LIBRARY_PATH", getRunPath().c_str(), 1); } else { std::string ldLibraryPathConcat = getRunPath() + ":" + std::string(ldLibraryPath); setenv("LD_LIBRARY_PATH", ldLibraryPathConcat.c_str(), 1); } execv(argv[0], argv); //execv failed, we return with error printf("FATAL ERROR: cannot self-exec test!\n"); return -1; } #endif ::testing::InitGoogleTest(&argc, argv); if (argc > 0) { // parse remaining args assuming they're mine for (int i = 0; i < argc; i++) { if (strcmp("--use_default_listener", argv[i]) == 0) { useDefaultListener = true; } else if (strcmp("--device", argv[i]) == 0) { ++i; devicePrefix = argv[i]; } else if (strcmp("--family_type", argv[i]) == 0) { ++i; familyNameWithType = argv[i]; } } } // we look for test files always relative to binary location // this simplifies multi-process execution and using different // working directories std::string nTestFiles = getRunPath(); nTestFiles.append("/"); nTestFiles.append(familyNameWithType); nTestFiles.append("/"); nTestFiles.append(testFiles); testFiles = nTestFiles; binaryNameSuffix.append(familyNameWithType); #ifdef WIN32 #include if (_chdir(familyNameWithType.c_str())) { std::cout << "chdir into " << familyNameWithType << " directory failed.\nThis might cause test failures." << std::endl; } #elif defined(__linux__) #include if (chdir(familyNameWithType.c_str()) != 0) { std::cout << "chdir into " << familyNameWithType << " directory failed.\nThis might cause test failures." << std::endl; } #endif if (useDefaultListener == false) { ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners(); ::testing::TestEventListener *defaultListener = listeners.default_result_printer(); auto customEventListener = new CCustomEventListener(defaultListener); listeners.Release(listeners.default_result_printer()); listeners.Append(customEventListener); } gEnvironment = reinterpret_cast(::testing::AddGlobalTestEnvironment(new Environment(devicePrefix, familyNameWithType))); retVal = RUN_ALL_TESTS(); return retVal; } compute-runtime-20.13.16352/opencl/test/unit_test/offline_compiler/mock/000077500000000000000000000000001363734646600260775ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/offline_compiler/mock/mock_argument_helper.h000066400000000000000000000017201363734646600324420ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/ocloc_arg_helper.h" #include #include class MockOclocArgHelper : public OclocArgHelper { public: std::map &filesMap; MockOclocArgHelper(std::map &filesMap) : OclocArgHelper( 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr), filesMap(filesMap){}; protected: bool fileExists(const std::string &filename) const override { return filesMap.find(filename) != filesMap.end(); } std::vector readBinaryFile(const std::string &filename) override { auto file = filesMap[filename]; return std::vector(file.begin(), file.end()); } }; compute-runtime-20.13.16352/opencl/test/unit_test/offline_compiler/mock/mock_offline_compiler.h000066400000000000000000000052051363734646600325770ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/offline_compiler/source/offline_compiler.h" #include namespace NEO { class MockOfflineCompiler : public OfflineCompiler { public: using OfflineCompiler::deviceName; using OfflineCompiler::elfBinary; using OfflineCompiler::fclDeviceCtx; using OfflineCompiler::generateFilePathForIr; using OfflineCompiler::generateOptsSuffix; using OfflineCompiler::igcDeviceCtx; using OfflineCompiler::inputFileLlvm; using OfflineCompiler::inputFileSpirV; using OfflineCompiler::isSpirV; using OfflineCompiler::options; using OfflineCompiler::outputDirectory; using OfflineCompiler::outputFile; using OfflineCompiler::sourceCode; using OfflineCompiler::useLlvmText; using OfflineCompiler::useOptionsSuffix; MockOfflineCompiler() : OfflineCompiler() { uniqueHelper = std::make_unique(); argHelper = uniqueHelper.get(); } int initialize(size_t numArgs, const std::vector &argv) { return OfflineCompiler::initialize(numArgs, argv, true); } int parseCommandLine(size_t numArgs, const std::vector &argv) { return OfflineCompiler::parseCommandLine(numArgs, argv); } void parseDebugSettings() { return OfflineCompiler::parseDebugSettings(); } std::string &getOptions() { return options; } std::string &getInternalOptions() { return internalOptions; } std::string getStringWithinDelimiters(const std::string &src) { return OfflineCompiler::getStringWithinDelimiters(src); } void updateBuildLog(const char *pErrorString, const size_t errorStringSize) { OfflineCompiler::updateBuildLog(pErrorString, errorStringSize); } int getHardwareInfo(const char *pDeviceName) { return OfflineCompiler::getHardwareInfo(pDeviceName); } void storeBinary(char *&pDst, size_t &dstSize, const void *pSrc, const size_t srcSize) { OfflineCompiler::storeBinary(pDst, dstSize, pSrc, srcSize); } void storeGenBinary(const void *pSrc, const size_t srcSize) { OfflineCompiler::storeBinary(genBinary, genBinarySize, pSrc, srcSize); } int buildSourceCode() { return OfflineCompiler::buildSourceCode(); } bool generateElfBinary() { return OfflineCompiler::generateElfBinary(); } char *getGenBinary() { return genBinary; } size_t getGenBinarySize() { return genBinarySize; } std::unique_ptr uniqueHelper; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/offline_compiler/mock/mock_sip_ocloc_tests.cpp000066400000000000000000000010551363734646600330110ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_sip.h" #include static std::vector dummyBinaryForSip; using namespace NEO; std::vector MockSipKernel::dummyBinaryForSip; std::vector MockSipKernel::getDummyGenBinary() { return MockSipKernel::dummyBinaryForSip; } std::vector MockSipKernel::getBinary() { return MockSipKernel::dummyBinaryForSip; } void MockSipKernel::initDummyBinary() { } void MockSipKernel::shutDown() { } compute-runtime-20.13.16352/opencl/test/unit_test/offline_compiler/ocloc_fatbinary_tests.cpp000066400000000000000000000410771363734646600322430ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/offline_compiler/ocloc_fatbinary_tests.h" #include "shared/source/helpers/hw_helper.h" #include namespace NEO { TEST(OclocFatBinaryRequestedFatBinary, WhenDeviceArgMissingThenReturnsFalse) { const char *args[] = {"ocloc", "-aaa", "*", "-device", "*"}; EXPECT_FALSE(NEO::requestedFatBinary(0, nullptr)); EXPECT_FALSE(NEO::requestedFatBinary(1, args)); EXPECT_FALSE(NEO::requestedFatBinary(2, args)); EXPECT_FALSE(NEO::requestedFatBinary(3, args)); EXPECT_FALSE(NEO::requestedFatBinary(4, args)); } TEST(OclocFatBinaryRequestedFatBinary, WhenDeviceArgProvidedAndContainsFatbinaryArgFormatThenReturnsTrue) { const char *allPlatforms[] = {"ocloc", "-device", "*"}; const char *manyPlatforms[] = {"ocloc", "-device", "a,b"}; const char *manyGens[] = {"ocloc", "-device", "gen0,gen1"}; const char *gen[] = {"ocloc", "-device", "gen0"}; const char *rangePlatformFrom[] = {"ocloc", "-device", "skl-"}; const char *rangePlatformTo[] = {"ocloc", "-device", "-skl"}; const char *rangePlatformBounds[] = {"ocloc", "-device", "skl-icllp"}; const char *rangeGenFrom[] = {"ocloc", "-device", "gen0-"}; const char *rangeGenTo[] = {"ocloc", "-device", "-gen5"}; const char *rangeGenBounds[] = {"ocloc", "-device", "gen0-gen5"}; EXPECT_TRUE(NEO::requestedFatBinary(3, allPlatforms)); EXPECT_TRUE(NEO::requestedFatBinary(3, manyPlatforms)); EXPECT_TRUE(NEO::requestedFatBinary(3, manyGens)); EXPECT_TRUE(NEO::requestedFatBinary(3, gen)); EXPECT_TRUE(NEO::requestedFatBinary(3, rangePlatformFrom)); EXPECT_TRUE(NEO::requestedFatBinary(3, rangePlatformTo)); EXPECT_TRUE(NEO::requestedFatBinary(3, rangePlatformBounds)); EXPECT_TRUE(NEO::requestedFatBinary(3, rangeGenFrom)); EXPECT_TRUE(NEO::requestedFatBinary(3, rangeGenTo)); EXPECT_TRUE(NEO::requestedFatBinary(3, rangeGenBounds)); } TEST(OclocFatBinaryRequestedFatBinary, WhenDeviceArgProvidedButDoesnNotContainFatbinaryArgFormatThenReturnsFalse) { const char *skl[] = {"ocloc", "-device", "skl"}; EXPECT_FALSE(NEO::requestedFatBinary(3, skl)); } TEST(OclocFatBinaryGetAllSupportedTargetPlatforms, WhenRequestedThenReturnsAllPlatformsWithNonNullHardwarePrefixes) { auto platforms = NEO::getAllSupportedTargetPlatforms(); std::unordered_set platformsSet(platforms.begin(), platforms.end()); for (unsigned int productId = 0; productId < IGFX_MAX_PRODUCT; ++productId) { if (nullptr != NEO::hardwarePrefix[productId]) { EXPECT_EQ(1U, platformsSet.count(static_cast(productId))) << productId; } else { EXPECT_EQ(0U, platformsSet.count(static_cast(productId))) << productId; } } } TEST(OclocFatBinaryToProductNames, GivenListOfProductIdsThenReturnsListOfHardwarePrefixes) { auto platforms = NEO::getAllSupportedTargetPlatforms(); auto names = NEO::toProductNames(platforms); EXPECT_EQ(names.size(), platforms.size()); } TEST(OclocFatBinaryAsProductId, GivenEnabledPlatformNameThenReturnsProperPlatformId) { auto platforms = NEO::getAllSupportedTargetPlatforms(); auto names = NEO::toProductNames(platforms); for (size_t i = 0; i < platforms.size(); ++i) { auto idByName = NEO::asProductId(names[i], platforms); EXPECT_EQ(idByName, platforms[i]) << names[i] << " : " << platforms[i] << " != " << idByName; } } TEST(OclocFatBinaryAsProductId, GivenDisabledPlatformNameThenReturnsUnknownPlatformId) { auto platforms = NEO::getAllSupportedTargetPlatforms(); auto names = NEO::toProductNames(platforms); platforms.clear(); for (size_t i = 0; i < platforms.size(); ++i) { auto idByName = NEO::asProductId(names[i], platforms); EXPECT_EQ(IGFX_UNKNOWN, platforms[i]) << names[i] << " : IGFX_UNKNOWN != " << idByName; } } TEST(OclocFatBinaryAsGfxCoreId, GivenEnabledGfxCoreNameThenReturnsProperGfxCoreId) { for (unsigned int coreId = 0; coreId < IGFX_MAX_CORE; ++coreId) { if (nullptr != NEO::familyName[coreId]) { EXPECT_NE(IGFX_UNKNOWN_CORE, NEO::asGfxCoreId(ConstStringRef(NEO::familyName[coreId], strlen(NEO::familyName[coreId])))); std::string caseInsesitive = NEO::familyName[coreId]; caseInsesitive[0] = 'g'; EXPECT_NE(IGFX_UNKNOWN_CORE, NEO::asGfxCoreId(caseInsesitive)); } } } TEST(OclocFatBinaryAsGfxCoreId, GivenDisabledGfxCoreNameThenReturnsProperGfxCoreId) { EXPECT_EQ(IGFX_UNKNOWN_CORE, NEO::asGfxCoreId(ConstStringRef("genA"))); EXPECT_EQ(IGFX_UNKNOWN_CORE, NEO::asGfxCoreId(ConstStringRef("gen0"))); EXPECT_EQ(IGFX_UNKNOWN_CORE, NEO::asGfxCoreId(ConstStringRef("gen1"))); EXPECT_EQ(IGFX_UNKNOWN_CORE, NEO::asGfxCoreId(ConstStringRef("gen2"))); } TEST(OclocFatBinaryAppendPlatformsForGfxCore, GivenCoreIdThenAppendsEnabledProductIdsThatMatch) { auto allEnabledPlatforms = NEO::getAllSupportedTargetPlatforms(); auto platform0 = allEnabledPlatforms[0]; auto gfxCore0 = NEO::hardwareInfoTable[platform0]->platform.eRenderCoreFamily; std::vector appendedPlatforms; NEO::appendPlatformsForGfxCore(gfxCore0, allEnabledPlatforms, appendedPlatforms); std::unordered_set appendedPlatformsSet(appendedPlatforms.begin(), appendedPlatforms.end()); EXPECT_EQ(1U, appendedPlatformsSet.count(platform0)); for (auto platformId : allEnabledPlatforms) { if (gfxCore0 == NEO::hardwareInfoTable[platformId]->platform.eRenderCoreFamily) { EXPECT_EQ(1U, appendedPlatformsSet.count(platformId)) << platformId; } else { EXPECT_EQ(0U, appendedPlatformsSet.count(platformId)) << platformId; } } NEO::appendPlatformsForGfxCore(gfxCore0, allEnabledPlatforms, appendedPlatforms); EXPECT_EQ(2 * appendedPlatformsSet.size(), appendedPlatforms.size()); } TEST_F(OclocFatBinaryGetTargetPlatformsForFatbinary, GivenAsterixThenReturnAllEnabledPlatforms) { auto allEnabledPlatformsIds = NEO::getAllSupportedTargetPlatforms(); auto expected = NEO::toProductNames(allEnabledPlatformsIds); auto got = NEO::getTargetPlatformsForFatbinary("*", oclocArgHelperWithoutInput.get()); EXPECT_EQ(expected, got); } TEST_F(OclocFatBinaryGetTargetPlatformsForFatbinary, GivenGenThenReturnAllEnabledPlatformsThatMatch) { auto allEnabledPlatforms = NEO::getAllSupportedTargetPlatforms(); auto platform0 = allEnabledPlatforms[0]; auto gfxCore0 = NEO::hardwareInfoTable[platform0]->platform.eRenderCoreFamily; std::string genName = NEO::familyName[gfxCore0]; genName[0] = 'g'; // ocloc uses lower case std::vector platformsForGen; NEO::appendPlatformsForGfxCore(gfxCore0, allEnabledPlatforms, platformsForGen); auto expected = NEO::toProductNames(platformsForGen); auto got = NEO::getTargetPlatformsForFatbinary(genName, oclocArgHelperWithoutInput.get()); EXPECT_EQ(expected, got); } TEST_F(OclocFatBinaryGetTargetPlatformsForFatbinary, GivenMutiplePlatformThenReturnThosePlatforms) { auto allEnabledPlatforms = NEO::getAllSupportedTargetPlatforms(); if (allEnabledPlatforms.size() < 2) { return; } auto platform0 = allEnabledPlatforms[0]; std::string platform0Name = NEO::hardwarePrefix[platform0]; auto platform1 = allEnabledPlatforms[1]; std::string platform1Name = NEO::hardwarePrefix[platform1]; std::vector expected{platform0Name, platform1Name}; auto got = NEO::getTargetPlatformsForFatbinary(platform0Name + "," + platform1Name, oclocArgHelperWithoutInput.get()); EXPECT_EQ(expected, got); } TEST_F(OclocFatBinaryGetTargetPlatformsForFatbinary, GivenPlatformOpenRangeFromThenReturnAllEnabledPlatformsThatMatch) { auto allEnabledPlatforms = NEO::getAllSupportedTargetPlatforms(); if (allEnabledPlatforms.size() < 3) { return; } auto platform0 = allEnabledPlatforms[allEnabledPlatforms.size() / 2]; std::string platformName = NEO::hardwarePrefix[platform0]; std::vector expectedPlatforms; auto platformFrom = std::find(allEnabledPlatforms.begin(), allEnabledPlatforms.end(), platform0); expectedPlatforms.insert(expectedPlatforms.end(), platformFrom, allEnabledPlatforms.end()); auto expected = NEO::toProductNames(expectedPlatforms); auto got = NEO::getTargetPlatformsForFatbinary(platformName + "-", oclocArgHelperWithoutInput.get()); EXPECT_EQ(expected, got); } TEST_F(OclocFatBinaryGetTargetPlatformsForFatbinary, GivenPlatformOpenRangeToThenReturnAllEnabledPlatformsThatMatch) { auto allEnabledPlatforms = NEO::getAllSupportedTargetPlatforms(); if (allEnabledPlatforms.size() < 3) { return; } auto platform0 = allEnabledPlatforms[allEnabledPlatforms.size() / 2]; std::string platformName = NEO::hardwarePrefix[platform0]; std::vector expectedPlatforms; auto platformTo = std::find(allEnabledPlatforms.begin(), allEnabledPlatforms.end(), platform0); expectedPlatforms.insert(expectedPlatforms.end(), allEnabledPlatforms.begin(), platformTo + 1); auto expected = NEO::toProductNames(expectedPlatforms); auto got = NEO::getTargetPlatformsForFatbinary("-" + platformName, oclocArgHelperWithoutInput.get()); EXPECT_EQ(expected, got); } TEST_F(OclocFatBinaryGetTargetPlatformsForFatbinary, GivenPlatformClosedRangeThenReturnAllEnabledPlatformsThatMatch) { auto allEnabledPlatforms = NEO::getAllSupportedTargetPlatforms(); if (allEnabledPlatforms.size() < 4) { return; } auto platformFrom = allEnabledPlatforms[1]; auto platformTo = allEnabledPlatforms[allEnabledPlatforms.size() - 2]; std::string platformNameFrom = NEO::hardwarePrefix[platformFrom]; std::string platformNameTo = NEO::hardwarePrefix[platformTo]; std::vector expectedPlatforms; expectedPlatforms.insert(expectedPlatforms.end(), allEnabledPlatforms.begin() + 1, allEnabledPlatforms.begin() + allEnabledPlatforms.size() - 1); auto expected = NEO::toProductNames(expectedPlatforms); auto got = NEO::getTargetPlatformsForFatbinary(platformNameFrom + "-" + platformNameTo, oclocArgHelperWithoutInput.get()); EXPECT_EQ(expected, got); got = NEO::getTargetPlatformsForFatbinary(platformNameTo + "-" + platformNameFrom, oclocArgHelperWithoutInput.get()); // swap min with max implicitly EXPECT_EQ(expected, got); } std::vector getEnabledCores() { std::vector ret; for (unsigned int coreId = 0; coreId < IGFX_MAX_CORE; ++coreId) { if (nullptr != NEO::familyName[coreId]) { ret.push_back(static_cast(coreId)); } } return ret; } TEST_F(OclocFatBinaryGetTargetPlatformsForFatbinary, GivenGenOpenRangeFromThenReturnAllEnabledPlatformsThatMatch) { auto allSupportedPlatforms = NEO::getAllSupportedTargetPlatforms(); auto allEnabledCores = getEnabledCores(); if (allEnabledCores.size() < 3) { return; } auto core0 = allEnabledCores[allEnabledCores.size() / 2]; std::string genName = NEO::familyName[core0]; genName[0] = 'g'; // ocloc uses lower case std::vector expectedPlatforms; unsigned int coreIt = core0; while (coreIt < static_cast(IGFX_MAX_CORE)) { NEO::appendPlatformsForGfxCore(static_cast(coreIt), allSupportedPlatforms, expectedPlatforms); ++coreIt; } auto expected = NEO::toProductNames(expectedPlatforms); auto got = NEO::getTargetPlatformsForFatbinary(genName + "-", oclocArgHelperWithoutInput.get()); EXPECT_EQ(expected, got); } TEST_F(OclocFatBinaryGetTargetPlatformsForFatbinary, GivenGenOpenRangeToThenReturnAllEnabledPlatformsThatMatch) { auto allSupportedPlatforms = NEO::getAllSupportedTargetPlatforms(); auto allEnabledCores = getEnabledCores(); if (allEnabledCores.size() < 3) { return; } auto core0 = allEnabledCores[allEnabledCores.size() / 2]; std::string genName = NEO::familyName[core0]; genName[0] = 'g'; // ocloc uses lower case std::vector expectedPlatforms; unsigned int coreIt = IGFX_UNKNOWN_CORE; ++coreIt; while (coreIt <= static_cast(core0)) { NEO::appendPlatformsForGfxCore(static_cast(coreIt), allSupportedPlatforms, expectedPlatforms); ++coreIt; } auto expected = NEO::toProductNames(expectedPlatforms); auto got = NEO::getTargetPlatformsForFatbinary("-" + genName, oclocArgHelperWithoutInput.get()); EXPECT_EQ(expected, got); } TEST_F(OclocFatBinaryGetTargetPlatformsForFatbinary, GivenGenClosedRangeThenReturnAllEnabledPlatformsThatMatch) { auto allEnabledPlatforms = NEO::getAllSupportedTargetPlatforms(); auto allEnabledCores = getEnabledCores(); if (allEnabledCores.size() < 4) { return; } auto genFrom = allEnabledCores[1]; auto genTo = allEnabledCores[allEnabledCores.size() - 2]; std::string genNameFrom = NEO::familyName[genFrom]; genNameFrom[0] = 'g'; std::string genNameTo = NEO::familyName[genTo]; genNameTo[0] = 'g'; std::vector expectedPlatforms; auto genIt = genFrom; while (genIt <= genTo) { NEO::appendPlatformsForGfxCore(static_cast(genIt), allEnabledPlatforms, expectedPlatforms); genIt = static_cast(static_cast(genIt) + 1); } auto expected = NEO::toProductNames(expectedPlatforms); auto got = NEO::getTargetPlatformsForFatbinary(genNameFrom + "-" + genNameTo, oclocArgHelperWithoutInput.get()); EXPECT_EQ(expected, got); got = NEO::getTargetPlatformsForFatbinary(genNameTo + "-" + genNameFrom, oclocArgHelperWithoutInput.get()); // swap min with max implicitly EXPECT_EQ(expected, got); } TEST_F(OclocFatBinaryGetTargetPlatformsForFatbinary, GivenUnkownGenThenReturnEmptyList) { auto got = NEO::getTargetPlatformsForFatbinary("gen0", oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); } TEST_F(OclocFatBinaryGetTargetPlatformsForFatbinary, GivenMutiplePlatformWhenAnyOfPlatformsIsUnknownThenReturnEmptyList) { auto allEnabledPlatforms = NEO::getAllSupportedTargetPlatforms(); auto platform0 = allEnabledPlatforms[0]; std::string platform0Name = NEO::hardwarePrefix[platform0]; auto got = NEO::getTargetPlatformsForFatbinary(platform0Name + ",unk", oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); } TEST_F(OclocFatBinaryGetTargetPlatformsForFatbinary, GivenPlatformOpenRangeFromWhenPlatformsIsUnkownThenReturnEmptyList) { auto got = NEO::getTargetPlatformsForFatbinary("unk-", oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); } TEST_F(OclocFatBinaryGetTargetPlatformsForFatbinary, GivenPlatformOpenRangeToWhenPlatformsIsUnkownThenReturnEmptyList) { auto got = NEO::getTargetPlatformsForFatbinary("-unk", oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); } TEST_F(OclocFatBinaryGetTargetPlatformsForFatbinary, GivenPlatformClosedRangeWhenAnyOfPlatformsIsUnkownThenReturnEmptyList) { auto allEnabledPlatforms = NEO::getAllSupportedTargetPlatforms(); auto platform0 = allEnabledPlatforms[0]; std::string platform0Name = NEO::hardwarePrefix[platform0]; auto got = NEO::getTargetPlatformsForFatbinary("unk-" + platform0Name, oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); got = NEO::getTargetPlatformsForFatbinary(platform0Name + "-unk", oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); } TEST_F(OclocFatBinaryGetTargetPlatformsForFatbinary, GivenGenOpenRangeFromWhenGenIsUnknownTheReturnEmptyList) { auto got = NEO::getTargetPlatformsForFatbinary("gen2-", oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); } TEST_F(OclocFatBinaryGetTargetPlatformsForFatbinary, GivenGenOpenRangeToWhenGenIsUnknownTheReturnEmptyList) { auto got = NEO::getTargetPlatformsForFatbinary("-gen2", oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); } TEST_F(OclocFatBinaryGetTargetPlatformsForFatbinary, GivenGenClosedRangeWhenAnyOfGensIsUnknownThenReturnEmptyList) { auto allEnabledPlatforms = NEO::getAllSupportedTargetPlatforms(); auto platform0 = allEnabledPlatforms[0]; auto gfxCore0 = NEO::hardwareInfoTable[platform0]->platform.eRenderCoreFamily; std::string genName = NEO::familyName[gfxCore0]; genName[0] = 'g'; // ocloc uses lower case auto got = NEO::getTargetPlatformsForFatbinary("gen2-" + genName, oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); got = NEO::getTargetPlatformsForFatbinary(genName + "-gen2", oclocArgHelperWithoutInput.get()); EXPECT_TRUE(got.empty()); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/offline_compiler/ocloc_fatbinary_tests.h000066400000000000000000000012311363734646600316740ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/offline_compiler/source/ocloc_arg_helper.h" #include "shared/offline_compiler/source/ocloc_fatbinary.h" #include "gtest/gtest.h" #include namespace NEO { class OclocFatBinaryGetTargetPlatformsForFatbinary : public ::testing::Test { public: OclocFatBinaryGetTargetPlatformsForFatbinary() { oclocArgHelperWithoutInput = std::make_unique(); oclocArgHelperWithoutInput->getPrinterRef() = MessagePrinter{true}; } std::unique_ptr oclocArgHelperWithoutInput; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/offline_compiler/offline_compiler_tests.cpp000066400000000000000000001240221363734646600324110ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "offline_compiler_tests.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/hw_cmds.h" #include "shared/source/helpers/hw_info.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/test/unit_test/mocks/mock_compilers.h" #include "compiler_options.h" #include "environment.h" #include "gmock/gmock.h" #include "mock/mock_offline_compiler.h" #include #include extern Environment *gEnvironment; namespace NEO { std::string getCompilerOutputFileName(const std::string &fileName, const std::string &type) { std::string fName(fileName); fName.append("_"); fName.append(gEnvironment->familyNameWithType); fName.append("."); fName.append(type); return fName; } bool compilerOutputExists(const std::string &fileName, const std::string &type) { return fileExists(getCompilerOutputFileName(fileName, type)); } void compilerOutputRemove(const std::string &fileName, const std::string &type) { std::remove(getCompilerOutputFileName(fileName, type).c_str()); } TEST_F(MultiCommandTests, MultiCommandSuccessfulBuildTest) { nameOfFileWithArgs = "test_files/ImAMulitiComandMinimalGoodFile.txt"; std::vector argv = { "ocloc", "-multi", nameOfFileWithArgs.c_str(), "-q", }; std::vector singleArgs = { "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; int numOfBuild = 4; createFileWithArgs(singleArgs, numOfBuild); auto pMultiCommand = std::unique_ptr(MultiCommand::create(argv, retVal, oclocArgHelperWithoutInput.get())); EXPECT_NE(nullptr, pMultiCommand); EXPECT_EQ(CL_SUCCESS, retVal); deleteFileWithArgs(); } TEST_F(MultiCommandTests, MultiCommandSuccessfulBuildWithOutputFileTest) { nameOfFileWithArgs = "test_files/ImAMulitiComandMinimalGoodFile.txt"; std::vector argv = { "ocloc", "-multi", nameOfFileWithArgs.c_str(), "-q", }; std::vector singleArgs = { "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; int numOfBuild = 4; createFileWithArgs(singleArgs, numOfBuild); auto pMultiCommand = std::unique_ptr(MultiCommand::create(argv, retVal, oclocArgHelperWithoutInput.get())); EXPECT_NE(nullptr, pMultiCommand); EXPECT_EQ(CL_SUCCESS, retVal); for (int i = 0; i < numOfBuild; i++) { std::string outFileName = pMultiCommand->outDirForBuilds + "/build_no_" + std::to_string(i + 1); EXPECT_TRUE(compilerOutputExists(outFileName, "bc") || compilerOutputExists(outFileName, "spv")); EXPECT_TRUE(compilerOutputExists(outFileName, "gen")); EXPECT_TRUE(compilerOutputExists(outFileName, "bin")); } deleteFileWithArgs(); } TEST_F(MultiCommandTests, GoodMultiBuildTestWithspecifiedOutputDir) { nameOfFileWithArgs = "test_files/ImAMulitiComandMinimalGoodFile.txt"; std::vector argv = { "ocloc", "-multi", nameOfFileWithArgs.c_str(), "-q", }; std::vector singleArgs = { "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str(), "-out_dir", "offline_compiler_test"}; int numOfBuild = 4; createFileWithArgs(singleArgs, numOfBuild); pMultiCommand = MultiCommand::create(argv, retVal, oclocArgHelperWithoutInput.get()); EXPECT_NE(nullptr, pMultiCommand); EXPECT_EQ(CL_SUCCESS, retVal); for (int i = 0; i < numOfBuild; i++) { std::string outFileName = "offline_compiler_test/build_no_" + std::to_string(i + 1); EXPECT_TRUE(compilerOutputExists(outFileName, "bc") || compilerOutputExists(outFileName, "spv")); EXPECT_TRUE(compilerOutputExists(outFileName, "gen")); EXPECT_TRUE(compilerOutputExists(outFileName, "bin")); } deleteFileWithArgs(); delete pMultiCommand; } TEST_F(MultiCommandTests, LackOfTxtFileWithArgsMultiTest) { nameOfFileWithArgs = "test_files/ImANotExistedComandFile.txt"; std::vector argv = { "ocloc", "-multi", "test_files/ImANaughtyFile.txt", "-q", }; testing::internal::CaptureStdout(); auto pMultiCommand = std::unique_ptr(MultiCommand::create(argv, retVal, oclocArgHelperWithoutInput.get())); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STRNE(output.c_str(), ""); EXPECT_EQ(nullptr, pMultiCommand); EXPECT_EQ(INVALID_FILE, retVal); DebugManager.flags.PrintDebugMessages.set(false); } TEST_F(MultiCommandTests, LackOfClFilePointedInTxtFileMultiTest) { nameOfFileWithArgs = "test_files/ImAMulitiComandMinimalGoodFile.txt"; std::vector argv = { "ocloc", "-multi", nameOfFileWithArgs.c_str(), "-q", }; std::vector singleArgs = { "-file", "test_files/ImANaughtyFile.cl", "-device", gEnvironment->devicePrefix.c_str()}; int numOfBuild = 4; createFileWithArgs(singleArgs, numOfBuild); testing::internal::CaptureStdout(); auto pMultiCommand = std::unique_ptr(MultiCommand::create(argv, retVal, oclocArgHelperWithoutInput.get())); std::string output = testing::internal::GetCapturedStdout(); EXPECT_EQ(nullptr, pMultiCommand); EXPECT_EQ(INVALID_FILE, retVal); DebugManager.flags.PrintDebugMessages.set(false); deleteFileWithArgs(); } TEST_F(MultiCommandTests, GoodMultiBuildTestWithOutputFileListFlag) { nameOfFileWithArgs = "test_files/ImAMulitiComandMinimalGoodFile.txt"; std::vector argv = { "ocloc", "-multi", nameOfFileWithArgs.c_str(), "-q", "-output_file_list", "outFileList.txt", }; std::vector singleArgs = { "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; int numOfBuild = 4; createFileWithArgs(singleArgs, numOfBuild); pMultiCommand = MultiCommand::create(argv, retVal, oclocArgHelperWithoutInput.get()); EXPECT_NE(nullptr, pMultiCommand); EXPECT_EQ(CL_SUCCESS, retVal); outFileList = pMultiCommand->outputFileList; EXPECT_TRUE(fileExists(outFileList)); for (int i = 0; i < numOfBuild; i++) { std::string outFileName = pMultiCommand->outDirForBuilds + "/build_no_" + std::to_string(i + 1); EXPECT_TRUE(compilerOutputExists(outFileName, "bc") || compilerOutputExists(outFileName, "spv")); EXPECT_TRUE(compilerOutputExists(outFileName, "gen")); EXPECT_TRUE(compilerOutputExists(outFileName, "bin")); } deleteFileWithArgs(); deleteOutFileList(); delete pMultiCommand; } TEST_F(OfflineCompilerTests, GoodArgTest) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); EXPECT_NE(nullptr, pOfflineCompiler); EXPECT_EQ(CL_SUCCESS, retVal); delete pOfflineCompiler; } TEST_F(OfflineCompilerTests, TestExtensions) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); mockOfflineCompiler->parseCommandLine(argv.size(), argv); std::string internalOptions = mockOfflineCompiler->getInternalOptions(); EXPECT_THAT(internalOptions, ::testing::HasSubstr(std::string("cl_khr_3d_image_writes"))); } TEST_F(OfflineCompilerTests, GoodBuildTest) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); EXPECT_NE(nullptr, pOfflineCompiler); EXPECT_EQ(CL_SUCCESS, retVal); testing::internal::CaptureStdout(); retVal = pOfflineCompiler->build(); std::string output = testing::internal::GetCapturedStdout(); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(compilerOutputExists("copybuffer", "bc") || compilerOutputExists("copybuffer", "spv")); EXPECT_TRUE(compilerOutputExists("copybuffer", "gen")); EXPECT_TRUE(compilerOutputExists("copybuffer", "bin")); std::string buildLog = pOfflineCompiler->getBuildLog(); EXPECT_STREQ(buildLog.c_str(), ""); delete pOfflineCompiler; } TEST_F(OfflineCompilerTests, GoodBuildTestWithLlvmText) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str(), "-llvm_text"}; pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); EXPECT_NE(nullptr, pOfflineCompiler); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pOfflineCompiler->build(); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(compilerOutputExists("copybuffer", "ll")); EXPECT_TRUE(compilerOutputExists("copybuffer", "gen")); EXPECT_TRUE(compilerOutputExists("copybuffer", "bin")); delete pOfflineCompiler; } TEST_F(OfflineCompilerTests, WhenFclNotNeededDontLoadIt) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str(), "-spirv_input"}; MockOfflineCompiler offlineCompiler; auto ret = offlineCompiler.initialize(argv.size(), argv); EXPECT_EQ(0, ret); EXPECT_EQ(nullptr, offlineCompiler.fclDeviceCtx); EXPECT_NE(nullptr, offlineCompiler.igcDeviceCtx); } TEST_F(OfflineCompilerTests, GoodParseBinToCharArray) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); // clang-format off uint8_t binary[] = { 0x02, 0x23, 0x3, 0x40, 0x56, 0x7, 0x80, 0x90, 0x1, 0x03, 0x34, 0x5, 0x60, 0x78, 0x9, 0x66, 0xff, 0x10, 0x10, 0x10, 0x02, 0x23, 0x3, 0x40, 0x56, 0x7, 0x80, 0x90, 0x1, 0x03, 0x34, 0x5, 0x60, 0x78, 0x9, 0x66, 0xff, }; // clang-format on std::string familyNameWithType = gEnvironment->familyNameWithType; std::string fileName = "scheduler"; std::string retArray = pOfflineCompiler->parseBinAsCharArray(binary, sizeof(binary), fileName); std::string target = "#include \n" "#include \n\n" "size_t SchedulerBinarySize_" + familyNameWithType + " = 37;\n" "uint32_t SchedulerBinary_" + familyNameWithType + "[10] = {\n" " 0x40032302, 0x90800756, 0x05340301, 0x66097860, 0x101010ff, 0x40032302, 0x90800756, 0x05340301, \n" " 0x66097860, 0xff000000};\n\n" "#include \"shared/source/built_ins/registry/built_ins_registry.h\"\n\n" "namespace NEO {\n" "static RegisterEmbeddedResource registerSchedulerBin(\n" " \"" + gEnvironment->familyNameWithType + "_0_scheduler.builtin_kernel.bin\",\n" " (const char *)SchedulerBinary_" + familyNameWithType + ",\n" " SchedulerBinarySize_" + familyNameWithType + ");\n" "}\n"; EXPECT_EQ(retArray, target); delete pOfflineCompiler; } TEST_F(OfflineCompilerTests, GoodBuildTestWithCppFile) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str(), "-cpp_file"}; pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); EXPECT_NE(nullptr, pOfflineCompiler); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pOfflineCompiler->build(); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(compilerOutputExists("copybuffer", "cpp")); EXPECT_TRUE(compilerOutputExists("copybuffer", "bc") || compilerOutputExists("copybuffer", "spv")); EXPECT_TRUE(compilerOutputExists("copybuffer", "gen")); EXPECT_TRUE(compilerOutputExists("copybuffer", "bin")); delete pOfflineCompiler; } TEST_F(OfflineCompilerTests, GoodBuildTestWithOutputDir) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str(), "-out_dir", "offline_compiler_test"}; pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); EXPECT_NE(nullptr, pOfflineCompiler); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pOfflineCompiler->build(); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(compilerOutputExists("offline_compiler_test/copybuffer", "bc") || compilerOutputExists("offline_compiler_test/copybuffer", "spv")); EXPECT_TRUE(compilerOutputExists("offline_compiler_test/copybuffer", "gen")); EXPECT_TRUE(compilerOutputExists("offline_compiler_test/copybuffer", "bin")); delete pOfflineCompiler; } TEST_F(OfflineCompilerTests, PrintUsage) { std::vector argv = { "ocloc", "--help"}; testing::internal::CaptureStdout(); pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); std::string output = testing::internal::GetCapturedStdout(); EXPECT_EQ(nullptr, pOfflineCompiler); EXPECT_STRNE("", output.c_str()); EXPECT_EQ(PRINT_USAGE, retVal); delete pOfflineCompiler; } TEST_F(OfflineCompilerTests, NaughtyArgTest_File) { DebugManager.flags.PrintDebugMessages.set(true); std::vector argv = { "ocloc", "-file", "test_files/ImANaughtyFile.cl", "-device", gEnvironment->devicePrefix.c_str()}; testing::internal::CaptureStdout(); pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STRNE(output.c_str(), ""); EXPECT_EQ(nullptr, pOfflineCompiler); EXPECT_EQ(INVALID_FILE, retVal); DebugManager.flags.PrintDebugMessages.set(false); delete pOfflineCompiler; } TEST_F(OfflineCompilerTests, NaughtyArgTest_Flag) { std::vector argv = { "ocloc", "-n", "test_files/ImANaughtyFile.cl", "-device", gEnvironment->devicePrefix.c_str()}; testing::internal::CaptureStdout(); pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STRNE(output.c_str(), ""); EXPECT_EQ(nullptr, pOfflineCompiler); EXPECT_EQ(INVALID_COMMAND_LINE, retVal); delete pOfflineCompiler; } TEST_F(OfflineCompilerTests, NaughtyArgTest_NumArgs) { std::vector argvA = { "ocloc", "-file", }; testing::internal::CaptureStdout(); pOfflineCompiler = OfflineCompiler::create(argvA.size(), argvA, true, retVal, oclocArgHelperWithoutInput.get()); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STRNE(output.c_str(), ""); EXPECT_EQ(nullptr, pOfflineCompiler); EXPECT_EQ(INVALID_COMMAND_LINE, retVal); delete pOfflineCompiler; std::vector argvB = { "ocloc", "-file", "test_files/ImANaughtyFile.cl", "-device"}; testing::internal::CaptureStdout(); pOfflineCompiler = OfflineCompiler::create(argvB.size(), argvB, true, retVal, oclocArgHelperWithoutInput.get()); output = testing::internal::GetCapturedStdout(); EXPECT_STRNE(output.c_str(), ""); EXPECT_EQ(nullptr, pOfflineCompiler); EXPECT_EQ(INVALID_COMMAND_LINE, retVal); delete pOfflineCompiler; } TEST_F(OfflineCompilerTests, GivenNonexistantDeviceWhenCompilingThenExitWithErrorMsg) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", "foobar"}; testing::internal::CaptureStdout(); pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STREQ(output.c_str(), "Error: Cannot get HW Info for device foobar.\n"); EXPECT_EQ(nullptr, pOfflineCompiler); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_F(OfflineCompilerTests, NaughtyKernelTest) { std::vector argv = { "ocloc", "-file", "test_files/shouldfail.cl", "-device", gEnvironment->devicePrefix.c_str()}; pOfflineCompiler = OfflineCompiler::create(argv.size(), argv, true, retVal, oclocArgHelperWithoutInput.get()); EXPECT_NE(nullptr, pOfflineCompiler); EXPECT_EQ(CL_SUCCESS, retVal); gEnvironment->SetInputFileName("invalid_file_name"); testing::internal::CaptureStdout(); retVal = pOfflineCompiler->build(); EXPECT_EQ(CL_BUILD_PROGRAM_FAILURE, retVal); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STREQ(output.c_str(), ""); std::string buildLog = pOfflineCompiler->getBuildLog(); EXPECT_STRNE(buildLog.c_str(), ""); gEnvironment->SetInputFileName("copybuffer"); delete pOfflineCompiler; } TEST(OfflineCompilerTest, parseCmdLine) { std::vector argv = { "ocloc", NEO::CompilerOptions::greaterThan4gbBuffersRequired.data()}; MockOfflineCompiler *mockOfflineCompiler = new MockOfflineCompiler(); ASSERT_NE(nullptr, mockOfflineCompiler); testing::internal::CaptureStdout(); mockOfflineCompiler->parseCommandLine(argv.size(), argv); std::string output = testing::internal::GetCapturedStdout(); std::string internalOptions = mockOfflineCompiler->getInternalOptions(); size_t found = internalOptions.find(argv.begin()[1]); EXPECT_NE(std::string::npos, found); delete mockOfflineCompiler; } TEST(OfflineCompilerTest, givenStatelessToStatefullOptimizationEnabledWhenDebugSettingsAreParsedThenOptimizationStringIsPresent) { DebugManagerStateRestore stateRestore; MockOfflineCompiler mockOfflineCompiler; DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(1); mockOfflineCompiler.parseDebugSettings(); std::string internalOptions = mockOfflineCompiler.getInternalOptions(); size_t found = internalOptions.find(NEO::CompilerOptions::hasBufferOffsetArg); EXPECT_NE(std::string::npos, found); } TEST(OfflineCompilerTest, givenStatelessToStatefullOptimizationEnabledWhenDebugSettingsAreParsedThenOptimizationStringIsSetToDefault) { DebugManagerStateRestore stateRestore; MockOfflineCompiler mockOfflineCompiler; DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(-1); mockOfflineCompiler.parseDebugSettings(); std::string internalOptions = mockOfflineCompiler.getInternalOptions(); size_t found = internalOptions.find(NEO::CompilerOptions::hasBufferOffsetArg); EXPECT_NE(std::string::npos, found); } TEST(OfflineCompilerTest, givenStatelessToStatefullOptimizationDisableddWhenDeviceNameIsSetToBDW) { DebugManagerStateRestore stateRestore; MockOfflineCompiler mockOfflineCompiler; mockOfflineCompiler.deviceName = "bdw"; mockOfflineCompiler.parseDebugSettings(); std::string internalOptions = mockOfflineCompiler.getInternalOptions(); size_t found = internalOptions.find(NEO::CompilerOptions::hasBufferOffsetArg); EXPECT_EQ(std::string::npos, found); } TEST(OfflineCompilerTest, givenStatelessToStatefullOptimizationEnabledWhenDeviceNameIsSetToSKL) { DebugManagerStateRestore stateRestore; MockOfflineCompiler mockOfflineCompiler; mockOfflineCompiler.deviceName = "skl"; mockOfflineCompiler.parseDebugSettings(); std::string internalOptions = mockOfflineCompiler.getInternalOptions(); size_t found = internalOptions.find(NEO::CompilerOptions::hasBufferOffsetArg); EXPECT_NE(std::string::npos, found); } TEST(OfflineCompilerTest, givenStatelessToStatefullOptimizationDisabledWhenDeviceNameIsSetToSKLAndDebugSettingsAreDisabled) { DebugManagerStateRestore stateRestore; MockOfflineCompiler mockOfflineCompiler; mockOfflineCompiler.deviceName = "skl"; DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(0); mockOfflineCompiler.parseDebugSettings(); std::string internalOptions = mockOfflineCompiler.getInternalOptions(); size_t found = internalOptions.find(NEO::CompilerOptions::hasBufferOffsetArg); EXPECT_EQ(std::string::npos, found); } TEST(OfflineCompilerTest, getStringWithinDelimiters) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); size_t srcSize = 0; auto ptrSrc = loadDataFromFile("test_files/copy_buffer_to_buffer.builtin_kernel", srcSize); const std::string src = ptrSrc.get(); ASSERT_EQ(srcSize, src.size()); // assert that pattern was found ASSERT_NE(std::string::npos, src.find("R\"===(")); ASSERT_NE(std::string::npos, src.find(")===\"")); auto dst = mockOfflineCompiler->getStringWithinDelimiters(src); size_t size = dst.size(); char nullChar = '\0'; EXPECT_EQ(nullChar, dst[size - 1]); // expect that pattern was not found EXPECT_EQ(std::string::npos, dst.find("R\"===(")); EXPECT_EQ(std::string::npos, dst.find(")===\"")); } TEST(OfflineCompilerTest, convertToPascalCase) { EXPECT_EQ(0, strcmp("AuxTranslation", convertToPascalCase("aux_translation").c_str())); EXPECT_EQ(0, strcmp("CopyBufferToBuffer", convertToPascalCase("copy_buffer_to_buffer").c_str())); EXPECT_EQ(0, strcmp("CopyBufferRect", convertToPascalCase("copy_buffer_rect").c_str())); EXPECT_EQ(0, strcmp("FillBuffer", convertToPascalCase("fill_buffer").c_str())); EXPECT_EQ(0, strcmp("CopyBufferToImage3d", convertToPascalCase("copy_buffer_to_image3d").c_str())); EXPECT_EQ(0, strcmp("CopyImage3dToBuffer", convertToPascalCase("copy_image3d_to_buffer").c_str())); EXPECT_EQ(0, strcmp("CopyImageToImage1d", convertToPascalCase("copy_image_to_image1d").c_str())); EXPECT_EQ(0, strcmp("CopyImageToImage2d", convertToPascalCase("copy_image_to_image2d").c_str())); EXPECT_EQ(0, strcmp("CopyImageToImage3d", convertToPascalCase("copy_image_to_image3d").c_str())); EXPECT_EQ(0, strcmp("FillImage1d", convertToPascalCase("fill_image1d").c_str())); EXPECT_EQ(0, strcmp("FillImage2d", convertToPascalCase("fill_image2d").c_str())); EXPECT_EQ(0, strcmp("FillImage3d", convertToPascalCase("fill_image3d").c_str())); EXPECT_EQ(0, strcmp("VmeBlockMotionEstimateIntel", convertToPascalCase("vme_block_motion_estimate_intel").c_str())); EXPECT_EQ(0, strcmp("VmeBlockAdvancedMotionEstimateCheckIntel", convertToPascalCase("vme_block_advanced_motion_estimate_check_intel").c_str())); EXPECT_EQ(0, strcmp("VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel", convertToPascalCase("vme_block_advanced_motion_estimate_bidirectional_check_intel").c_str())); EXPECT_EQ(0, strcmp("Scheduler", convertToPascalCase("scheduler").c_str())); EXPECT_EQ(0, strcmp("", convertToPascalCase("").c_str())); } TEST(OfflineCompilerTest, getHardwareInfo) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); EXPECT_EQ(CL_INVALID_DEVICE, mockOfflineCompiler->getHardwareInfo("invalid")); EXPECT_EQ(CL_SUCCESS, mockOfflineCompiler->getHardwareInfo(gEnvironment->devicePrefix.c_str())); } TEST(OfflineCompilerTest, storeBinary) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); const char pSrcBinary[] = {0x01, 0x02, 0x03, 0x04, 0x05}; const size_t srcBinarySize = sizeof(pSrcBinary); char *pDstBinary = new char[srcBinarySize]; size_t dstBinarySize = srcBinarySize; mockOfflineCompiler->storeBinary(pDstBinary, dstBinarySize, pSrcBinary, srcBinarySize); EXPECT_EQ(0, memcmp(pDstBinary, pSrcBinary, srcBinarySize)); delete[] pDstBinary; } TEST(OfflineCompilerTest, updateBuildLog) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); std::string ErrorString = "Error: undefined variable"; mockOfflineCompiler->updateBuildLog(ErrorString.c_str(), ErrorString.length()); EXPECT_EQ(0, ErrorString.compare(mockOfflineCompiler->getBuildLog())); std::string FinalString = "Build failure"; mockOfflineCompiler->updateBuildLog(FinalString.c_str(), FinalString.length()); EXPECT_EQ(0, (ErrorString + "\n" + FinalString).compare(mockOfflineCompiler->getBuildLog().c_str())); } TEST(OfflineCompilerTest, buildSourceCode) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); auto retVal = mockOfflineCompiler->buildSourceCode(); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; retVal = mockOfflineCompiler->initialize(argv.size(), argv); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, mockOfflineCompiler->getGenBinary()); EXPECT_EQ(0u, mockOfflineCompiler->getGenBinarySize()); retVal = mockOfflineCompiler->buildSourceCode(); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, mockOfflineCompiler->getGenBinary()); EXPECT_NE(0u, mockOfflineCompiler->getGenBinarySize()); } TEST(OfflineCompilerTest, GivenKernelWhenNoCharAfterKernelSourceThenBuildWithSuccess) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); auto retVal = mockOfflineCompiler->buildSourceCode(); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); std::vector argv = { "ocloc", "-file", "test_files/emptykernel.cl", "-device", gEnvironment->devicePrefix.c_str()}; retVal = mockOfflineCompiler->initialize(argv.size(), argv); EXPECT_EQ(CL_SUCCESS, retVal); retVal = mockOfflineCompiler->buildSourceCode(); EXPECT_EQ(CL_SUCCESS, retVal); } TEST(OfflineCompilerTest, generateElfBinary) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); auto retVal = mockOfflineCompiler->generateElfBinary(); EXPECT_FALSE(retVal); iOpenCL::SProgramBinaryHeader binHeader; memset(&binHeader, 0, sizeof(binHeader)); binHeader.Magic = iOpenCL::MAGIC_CL; binHeader.Version = iOpenCL::CURRENT_ICBE_VERSION - 3; binHeader.Device = DEFAULT_PLATFORM::hwInfo.platform.eRenderCoreFamily; binHeader.GPUPointerSizeInBytes = 8; binHeader.NumberOfKernels = 0; binHeader.SteppingId = 0; binHeader.PatchListSize = 0; size_t binSize = sizeof(iOpenCL::SProgramBinaryHeader); mockOfflineCompiler->storeGenBinary(&binHeader, binSize); EXPECT_TRUE(mockOfflineCompiler->elfBinary.empty()); retVal = mockOfflineCompiler->generateElfBinary(); EXPECT_TRUE(retVal); EXPECT_FALSE(mockOfflineCompiler->elfBinary.empty()); } TEST(OfflineCompilerTest, givenLlvmInputOptionPassedWhenCmdLineParsedThenInputFileLlvmIsSetTrue) { std::vector argv = { "ocloc", "-llvm_input"}; auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); testing::internal::CaptureStdout(); mockOfflineCompiler->parseCommandLine(argv.size(), argv); std::string output = testing::internal::GetCapturedStdout(); EXPECT_NE(0u, output.size()); bool llvmFileOption = mockOfflineCompiler->inputFileLlvm; EXPECT_TRUE(llvmFileOption); } TEST(OfflineCompilerTest, givenDefaultOfflineCompilerObjectWhenNoOptionsAreChangedThenLlvmInputFileIsFalse) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); bool llvmFileOption = mockOfflineCompiler->inputFileLlvm; EXPECT_FALSE(llvmFileOption); } TEST(OfflineCompilerTest, givenSpirvInputOptionPassedWhenCmdLineParsedThenInputFileSpirvIsSetTrue) { std::vector argv = {"ocloc", "-spirv_input"}; auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); testing::internal::CaptureStdout(); mockOfflineCompiler->parseCommandLine(argv.size(), argv); std::string output = testing::internal::GetCapturedStdout(); EXPECT_NE(0u, output.size()); EXPECT_TRUE(mockOfflineCompiler->inputFileSpirV); } TEST(OfflineCompilerTest, givenDefaultOfflineCompilerObjectWhenNoOptionsAreChangedThenSpirvInputFileIsFalse) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); EXPECT_FALSE(mockOfflineCompiler->inputFileSpirV); } TEST(OfflineCompilerTest, givenIntermediatedRepresentationInputWhenBuildSourceCodeIsCalledThenProperTranslationContextIsUsed) { MockOfflineCompiler mockOfflineCompiler; std::vector argv = { "ocloc", "-file", "test_files/emptykernel.cl", "-device", gEnvironment->devicePrefix.c_str()}; auto retVal = mockOfflineCompiler.initialize(argv.size(), argv); auto mockIgcOclDeviceCtx = new NEO::MockIgcOclDeviceCtx(); mockOfflineCompiler.igcDeviceCtx = CIF::RAII::Pack(mockIgcOclDeviceCtx); ASSERT_EQ(CL_SUCCESS, retVal); mockOfflineCompiler.inputFileSpirV = true; retVal = mockOfflineCompiler.buildSourceCode(); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(1U, mockIgcOclDeviceCtx->requestedTranslationCtxs.size()); NEO::MockIgcOclDeviceCtx::TranslationOpT expectedTranslation = {IGC::CodeType::spirV, IGC::CodeType::oclGenBin}; ASSERT_EQ(expectedTranslation, mockIgcOclDeviceCtx->requestedTranslationCtxs[0]); mockOfflineCompiler.inputFileSpirV = false; mockOfflineCompiler.inputFileLlvm = true; mockIgcOclDeviceCtx->requestedTranslationCtxs.clear(); retVal = mockOfflineCompiler.buildSourceCode(); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(1U, mockIgcOclDeviceCtx->requestedTranslationCtxs.size()); expectedTranslation = {IGC::CodeType::llvmBc, IGC::CodeType::oclGenBin}; ASSERT_EQ(expectedTranslation, mockIgcOclDeviceCtx->requestedTranslationCtxs[0]); } TEST(OfflineCompilerTest, givenBinaryInputThenDontTruncateSourceAtFirstZero) { std::vector argvLlvm = {"ocloc", "-llvm_input", "-file", "test_files/binary_with_zeroes", "-device", gEnvironment->devicePrefix.c_str()}; auto mockOfflineCompiler = std::make_unique(); mockOfflineCompiler->initialize(argvLlvm.size(), argvLlvm); EXPECT_LT(0U, mockOfflineCompiler->sourceCode.size()); std::vector argvSpirV = {"ocloc", "-spirv_input", "-file", "test_files/binary_with_zeroes", "-device", gEnvironment->devicePrefix.c_str()}; mockOfflineCompiler = std::make_unique(); mockOfflineCompiler->initialize(argvSpirV.size(), argvSpirV); EXPECT_LT(0U, mockOfflineCompiler->sourceCode.size()); } TEST(OfflineCompilerTest, givenSpirvInputFileWhenCmdLineHasOptionsThenCorrectOptionsArePassedToCompiler) { char data[] = {1, 2, 3, 4, 5, 6, 7, 8}; MockCompilerDebugVars igcDebugVars(gEnvironment->igcDebugVars); igcDebugVars.binaryToReturn = data; igcDebugVars.binaryToReturnSize = sizeof(data); NEO::setIgcDebugVars(igcDebugVars); MockOfflineCompiler mockOfflineCompiler; std::vector argv = { "ocloc", "-file", "test_files/emptykernel.cl", "-spirv_input", "-device", gEnvironment->devicePrefix.c_str(), "-options", "test_options_passed"}; auto retVal = mockOfflineCompiler.initialize(argv.size(), argv); auto mockIgcOclDeviceCtx = new NEO::MockIgcOclDeviceCtx(); mockOfflineCompiler.igcDeviceCtx = CIF::RAII::Pack(mockIgcOclDeviceCtx); ASSERT_EQ(CL_SUCCESS, retVal); mockOfflineCompiler.inputFileSpirV = true; retVal = mockOfflineCompiler.buildSourceCode(); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_STREQ("test_options_passed", mockOfflineCompiler.options.c_str()); NEO::setIgcDebugVars(gEnvironment->igcDebugVars); } TEST(OfflineCompilerTest, givenOutputFileOptionWhenSourceIsCompiledThenOutputFileHasCorrectName) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-output", "myOutputFileName", "-device", gEnvironment->devicePrefix.c_str()}; auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); int retVal = mockOfflineCompiler->initialize(argv.size(), argv); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(compilerOutputExists("myOutputFileName", "bc") || compilerOutputExists("myOutputFileName", "spv")); EXPECT_FALSE(compilerOutputExists("myOutputFileName", "bin")); EXPECT_FALSE(compilerOutputExists("myOutputFileName", "gen")); retVal = mockOfflineCompiler->build(); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(compilerOutputExists("myOutputFileName", "bc") || compilerOutputExists("myOutputFileName", "spv")); EXPECT_TRUE(compilerOutputExists("myOutputFileName", "bin")); EXPECT_TRUE(compilerOutputExists("myOutputFileName", "gen")); compilerOutputRemove("myOutputFileName", "bc"); compilerOutputRemove("myOutputFileName", "spv"); compilerOutputRemove("myOutputFileName", "bin"); compilerOutputRemove("myOutputFileName", "gen"); } TEST(OfflineCompilerTest, givenDebugDataAvailableWhenSourceIsBuiltThenDebugDataFileIsCreated) { std::vector argv = { "ocloc", "-file", "test_files/copybuffer.cl", "-output", "myOutputFileName", "-device", gEnvironment->devicePrefix.c_str()}; char debugData[10]; MockCompilerDebugVars igcDebugVars(gEnvironment->igcDebugVars); igcDebugVars.debugDataToReturn = debugData; igcDebugVars.debugDataToReturnSize = sizeof(debugData); NEO::setIgcDebugVars(igcDebugVars); auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); int retVal = mockOfflineCompiler->initialize(argv.size(), argv); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(compilerOutputExists("myOutputFileName", "bc") || compilerOutputExists("myOutputFileName", "spv")); EXPECT_FALSE(compilerOutputExists("myOutputFileName", "bin")); EXPECT_FALSE(compilerOutputExists("myOutputFileName", "gen")); EXPECT_FALSE(compilerOutputExists("myOutputFileName", "dbg")); retVal = mockOfflineCompiler->build(); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(compilerOutputExists("myOutputFileName", "bc") || compilerOutputExists("myOutputFileName", "spv")); EXPECT_TRUE(compilerOutputExists("myOutputFileName", "bin")); EXPECT_TRUE(compilerOutputExists("myOutputFileName", "gen")); EXPECT_TRUE(compilerOutputExists("myOutputFileName", "dbg")); compilerOutputRemove("myOutputFileName", "bc"); compilerOutputRemove("myOutputFileName", "spv"); compilerOutputRemove("myOutputFileName", "bin"); compilerOutputRemove("myOutputFileName", "gen"); compilerOutputRemove("myOutputFileName", "dbg"); NEO::setIgcDebugVars(gEnvironment->igcDebugVars); } TEST(OfflineCompilerTest, givenInternalOptionsWhenCmdLineParsedThenOptionsAreAppendedToInternalOptionsString) { std::vector argv = { "ocloc", "-internal_options", "myInternalOptions"}; auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); testing::internal::CaptureStdout(); mockOfflineCompiler->parseCommandLine(argv.size(), argv); std::string output = testing::internal::GetCapturedStdout(); EXPECT_NE(0u, output.size()); std::string internalOptions = mockOfflineCompiler->getInternalOptions(); EXPECT_THAT(internalOptions, ::testing::HasSubstr(std::string("myInternalOptions"))); } TEST(OfflineCompilerTest, givenInputOptionsAndInternalOptionsFilesWhenOfflineCompilerIsInitializedThenCorrectOptionsAreSetAndRemainAfterBuild) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); ASSERT_TRUE(fileExists("test_files/shouldfail_options.txt")); ASSERT_TRUE(fileExists("test_files/shouldfail_internal_options.txt")); std::vector argv = { "ocloc", "-q", "-file", "test_files/shouldfail.cl", "-device", gEnvironment->devicePrefix.c_str()}; int retVal = mockOfflineCompiler->initialize(argv.size(), argv); EXPECT_EQ(CL_SUCCESS, retVal); auto &options = mockOfflineCompiler->getOptions(); auto &internalOptions = mockOfflineCompiler->getInternalOptions(); EXPECT_STREQ(options.c_str(), "-shouldfailOptions"); EXPECT_TRUE(internalOptions.find("-shouldfailInternalOptions") != std::string::npos); mockOfflineCompiler->build(); EXPECT_STREQ(options.c_str(), "-shouldfailOptions"); EXPECT_TRUE(internalOptions.find("-shouldfailInternalOptions") != std::string::npos); } TEST(OfflineCompilerTest, givenInputOptionsAndOclockOptionsFileWithForceStosOptWhenOfflineCompilerIsInitializedThenCompilerOptionGreaterThan4gbBuffersRequiredIsNotApplied) { auto mockOfflineCompiler = std::unique_ptr(new MockOfflineCompiler()); ASSERT_NE(nullptr, mockOfflineCompiler); ASSERT_TRUE(fileExists("test_files/stateful_copy_buffer_ocloc_options.txt")); std::vector argv = { "ocloc", "-q", "-file", "test_files/stateful_copy_buffer.cl", "-device", gEnvironment->devicePrefix.c_str()}; int retVal = mockOfflineCompiler->initialize(argv.size(), argv); EXPECT_EQ(CL_SUCCESS, retVal); mockOfflineCompiler->build(); auto &internalOptions = mockOfflineCompiler->getInternalOptions(); size_t found = internalOptions.find(NEO::CompilerOptions::greaterThan4gbBuffersRequired); EXPECT_EQ(std::string::npos, found); } TEST(OfflineCompilerTest, givenNonExistingFilenameWhenUsedToReadOptionsThenReadOptionsFromFileReturnsFalse) { std::string options; std::string file("non_existing_file"); ASSERT_FALSE(fileExists(file.c_str())); auto helper = std::make_unique(); bool result = OfflineCompiler::readOptionsFromFile(options, file, helper.get()); EXPECT_FALSE(result); } TEST(OfflineCompilerTest, givenEmptyDirectoryWhenGenerateFilePathIsCalledThenTrailingSlashIsNotAppended) { std::string path = generateFilePath("", "a", "b"); EXPECT_STREQ("ab", path.c_str()); } TEST(OfflineCompilerTest, givenNonEmptyDirectoryWithTrailingSlashWhenGenerateFilePathIsCalledThenAdditionalTrailingSlashIsNotAppended) { std::string path = generateFilePath("d/", "a", "b"); EXPECT_STREQ("d/ab", path.c_str()); } TEST(OfflineCompilerTest, givenNonEmptyDirectoryWithoutTrailingSlashWhenGenerateFilePathIsCalledThenTrailingSlashIsAppended) { std::string path = generateFilePath("d", "a", "b"); EXPECT_STREQ("d/ab", path.c_str()); } TEST(OfflineCompilerTest, givenSpirvPathWhenGenerateFilePathForIrIsCalledThenProperExtensionIsReturned) { MockOfflineCompiler compiler; compiler.isSpirV = true; compiler.outputDirectory = "d"; std::string path = compiler.generateFilePathForIr("a"); EXPECT_STREQ("d/a.spv", path.c_str()); } TEST(OfflineCompilerTest, givenLlvmBcPathWhenGenerateFilePathForIrIsCalledThenProperExtensionIsReturned) { MockOfflineCompiler compiler; compiler.isSpirV = false; compiler.outputDirectory = "d"; std::string path = compiler.generateFilePathForIr("a"); EXPECT_STREQ("d/a.bc", path.c_str()); } TEST(OfflineCompilerTest, givenLlvmTextPathWhenGenerateFilePathForIrIsCalledThenProperExtensionIsReturned) { MockOfflineCompiler compiler; compiler.isSpirV = false; compiler.useLlvmText = true; compiler.outputDirectory = "d"; std::string path = compiler.generateFilePathForIr("a"); EXPECT_STREQ("d/a.ll", path.c_str()); compiler.isSpirV = true; path = compiler.generateFilePathForIr("a"); EXPECT_STREQ("d/a.ll", path.c_str()); } TEST(OfflineCompilerTest, givenDisabledOptsSuffixWhenGenerateOptsSuffixIsCalledThenEmptyStringIsReturned) { MockOfflineCompiler compiler; compiler.options = "A B C"; compiler.useOptionsSuffix = false; std::string suffix = compiler.generateOptsSuffix(); EXPECT_STREQ("", suffix.c_str()); } TEST(OfflineCompilerTest, givenEnabledOptsSuffixWhenGenerateOptsSuffixIsCalledThenEscapedStringIsReturned) { MockOfflineCompiler compiler; compiler.options = "A B C"; compiler.useOptionsSuffix = true; std::string suffix = compiler.generateOptsSuffix(); EXPECT_STREQ("A_B_C", suffix.c_str()); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/offline_compiler/offline_compiler_tests.h000066400000000000000000000036751363734646600320700ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/offline_compiler/source/multi_command.h" #include "shared/offline_compiler/source/offline_compiler.h" #include "gtest/gtest.h" #include #include namespace NEO { class OfflineCompilerTests : public ::testing::Test { public: OfflineCompilerTests() : pOfflineCompiler(nullptr), retVal(SUCCESS) { oclocArgHelperWithoutInput = std::make_unique(); // ctor } OfflineCompiler *pOfflineCompiler; int retVal; std::unique_ptr oclocArgHelperWithoutInput; }; class MultiCommandTests : public ::testing::Test { public: MultiCommandTests() : pMultiCommand(nullptr), retVal(SUCCESS) { oclocArgHelperWithoutInput = std::make_unique(); } void createFileWithArgs(const std::vector &, int numOfBuild); void deleteFileWithArgs(); void deleteOutFileList(); MultiCommand *pMultiCommand = nullptr; std::string nameOfFileWithArgs; std::string outFileList; int retVal; std::unique_ptr oclocArgHelperWithoutInput; }; void MultiCommandTests::createFileWithArgs(const std::vector &singleArgs, int numOfBuild) { std::ofstream myfile(nameOfFileWithArgs); if (myfile.is_open()) { for (int i = 0; i < numOfBuild; i++) { for (auto singleArg : singleArgs) myfile << singleArg + " "; myfile << std::endl; } myfile.close(); } else printf("Unable to open file\n"); } void MultiCommandTests::deleteFileWithArgs() { if (remove(nameOfFileWithArgs.c_str()) != 0) perror("Error deleting file"); } void MultiCommandTests::deleteOutFileList() { if (remove(outFileList.c_str()) != 0) perror("Error deleting file"); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/offline_compiler/segfault_test/000077500000000000000000000000001363734646600300175ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/offline_compiler/segfault_test/CMakeLists.txt000066400000000000000000000045641363734646600325700ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(CLOC_SEGFAULT_TEST_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/segfault_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp ${NEO_SHARED_DIRECTORY}/helpers/abort.cpp ${NEO_SHARED_DIRECTORY}/os_interface/os_library.h ${NEO_SOURCE_DIR}/opencl/test/unit_test/helpers/debug_helpers.cpp ) if(WIN32) list(APPEND CLOC_SEGFAULT_TEST_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/windows/safety_guard_caller_windows.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/os_library_win.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/os_library_win.h ${OCLOC_DIRECTORY}/source/utilities/windows/safety_guard_windows.h ${OCLOC_DIRECTORY}/source/utilities/windows/seh_exception.cpp ${OCLOC_DIRECTORY}/source/utilities/windows/seh_exception.h ) else() list(APPEND CLOC_SEGFAULT_TEST_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/linux/safety_guard_caller_linux.cpp ${NEO_SHARED_DIRECTORY}/os_interface/linux/os_library_linux.cpp ${NEO_SHARED_DIRECTORY}/os_interface/linux/os_library_linux.h ) endif() add_executable(ocloc_segfault_test ${CLOC_SEGFAULT_TEST_SOURCES}) target_link_libraries(ocloc_segfault_test gmock-gtest) if(MSVC) target_compile_options(ocloc_segfault_test PRIVATE /Zi) set_property(TARGET ocloc_segfault_test APPEND PROPERTY LINK_FLAGS /DEBUG) target_link_libraries(ocloc_segfault_test dbghelp) endif() if(UNIX) target_link_libraries(ocloc_segfault_test dl pthread) endif() set(CLOC_SEGFAULT_TEST_INCLUDES ${NEO_SOURCE_DIR} ${THIRD_PARTY_DIR} ) if(CMAKE_COMPILER_IS_GNUCC) target_compile_definitions(ocloc_segfault_test PRIVATE SKIP_SEGFAULT_TEST=1) endif() get_property(CLOC_FOLDER TARGET ocloc PROPERTY FOLDER) set_property(TARGET ocloc_segfault_test PROPERTY FOLDER ${CLOC_FOLDER}) target_include_directories(ocloc_segfault_test BEFORE PRIVATE ${CLOC_SEGFAULT_TEST_INCLUDES}) create_project_source_tree(ocloc_segfault_test ${NEO_SOURCE_DIR}/offline_compiler ${NEO_SOURCE_DIR}/runtime ${NEO_SOURCE_DIR}/unit_tests) add_custom_target(run_ocloc_segfault_test ALL DEPENDS ocloc_segfault_test ) add_custom_command( TARGET run_ocloc_segfault_test POST_BUILD COMMAND echo Running ocloc_segfault_test COMMAND ocloc_segfault_test WORKING_DIRECTORY ${TargetDir} ) set_property(TARGET run_ocloc_segfault_test PROPERTY FOLDER ${CLOC_FOLDER}) compute-runtime-20.13.16352/opencl/test/unit_test/offline_compiler/segfault_test/linux/000077500000000000000000000000001363734646600311565ustar00rootroot00000000000000safety_guard_caller_linux.cpp000066400000000000000000000010441363734646600370200ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/offline_compiler/segfault_test/linux/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/utilities/linux/safety_guard_linux.h" #include "../segfault_helper.h" void generateSegfaultWithSafetyGuard(SegfaultHelper *segfaultHelper) { SafetyGuardLinux safetyGuard; safetyGuard.onSigSegv = segfaultHelper->segfaultHandlerCallback; int retVal = 0; safetyGuard.call(segfaultHelper, &SegfaultHelper::generateSegfault, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/offline_compiler/segfault_test/main.cpp000066400000000000000000000021161363734646600314470ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "gmock/gmock.h" #include "gtest/gtest.h" #include "segfault_helper.h" #include using namespace std; extern void generateSegfaultWithSafetyGuard(SegfaultHelper *segfaultHelper); int main(int argc, char **argv) { int retVal = 0; ::testing::InitGoogleTest(&argc, argv); retVal = RUN_ALL_TESTS(); return retVal; } void captureAndCheckStdOut() { string callstack = ::testing::internal::GetCapturedStdout(); EXPECT_THAT(callstack, ::testing::HasSubstr(string("Callstack"))); EXPECT_THAT(callstack, ::testing::HasSubstr(string("cloc_segfault_test"))); EXPECT_THAT(callstack, ::testing::HasSubstr(string("generateSegfaultWithSafetyGuard"))); } TEST(SegFault, givenCallWithSafetyGuardWhenSegfaultHappensThenCallstackIsPrintedToStdOut) { #if !defined(SKIP_SEGFAULT_TEST) ::testing::internal::CaptureStdout(); SegfaultHelper segfault; segfault.segfaultHandlerCallback = captureAndCheckStdOut; generateSegfaultWithSafetyGuard(&segfault); #endif } compute-runtime-20.13.16352/opencl/test/unit_test/offline_compiler/segfault_test/segfault_helper.h000066400000000000000000000011131363734646600333350ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #if defined(__clang__) #define NO_SANITIZE __attribute__((no_sanitize("address", "undefined"))) #elif defined(__GNUC__) #define NO_SANITIZE __attribute__((no_sanitize_address)) #else #define NO_SANITIZE #endif class SegfaultHelper { public: int NO_SANITIZE generateSegfault() { int *pointer = reinterpret_cast(0); *pointer = 0; return 0; } typedef void (*callbackFunction)(); callbackFunction segfaultHandlerCallback = nullptr; }; compute-runtime-20.13.16352/opencl/test/unit_test/offline_compiler/segfault_test/windows/000077500000000000000000000000001363734646600315115ustar00rootroot00000000000000safety_guard_caller_windows.cpp000066400000000000000000000011331363734646600377050ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/offline_compiler/segfault_test/windows/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/utilities/windows/safety_guard_windows.h" #include "opencl/test/unit_test/offline_compiler/segfault_test/segfault_helper.h" void generateSegfaultWithSafetyGuard(SegfaultHelper *segfaultHelper) { SafetyGuardWindows safetyGuard; safetyGuard.onExcept = segfaultHelper->segfaultHandlerCallback; int retVal = 0; safetyGuard.call(segfaultHelper, &SegfaultHelper::generateSegfault, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/options_unit_tests.cpp000066400000000000000000000003671363734646600263200ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include namespace NEO { const char *folderAUB = "aub_out"; uint32_t initialHardwareTag = static_cast(0xFFFFFF00); } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/000077500000000000000000000000001363734646600242735ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/CMakeLists.txt000066400000000000000000000022131363734646600270310ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_os_interface_base ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/device_factory_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_performance_counters.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_performance_counters.h ${CMAKE_CURRENT_SOURCE_DIR}/os_context_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_interface_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_library_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_memory_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/performance_counters_gen_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/performance_counters_tests.cpp ) get_property(NEO_CORE_OS_INTERFACE_AUB_TESTS GLOBAL PROPERTY NEO_CORE_OS_INTERFACE_AUB_TESTS) list(APPEND IGDRCL_SRCS_tests_os_interface_base ${NEO_CORE_OS_INTERFACE_AUB_TESTS}) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_os_interface_base}) set_property(GLOBAL PROPERTY IGDRCL_SRCS_tests_os_interface_base ${IGDRCL_SRCS_tests_os_interface_base}) add_subdirectories() compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/device_factory_tests.cpp000066400000000000000000000256711363734646600312220ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/memory_manager/memory_constants.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/os_library.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gtest/gtest.h" #include "hw_device_id.h" #include using namespace NEO; OsLibrary *setAdapterInfo(const PLATFORM *platform, const GT_SYSTEM_INFO *gtSystemInfo, uint64_t gpuAddressSpace); struct DeviceFactoryTest : public ::testing::Test { public: void SetUp() override { const HardwareInfo *hwInfo = defaultHwInfo.get(); executionEnvironment = platform()->peekExecutionEnvironment(); mockGdiDll = setAdapterInfo(&hwInfo->platform, &hwInfo->gtSystemInfo, hwInfo->capabilityTable.gpuAddressSpace); } void TearDown() override { delete mockGdiDll; } protected: OsLibrary *mockGdiDll; ExecutionEnvironment *executionEnvironment; }; TEST_F(DeviceFactoryTest, PrepareDeviceEnvironments_Check_HwInfo_Platform) { const HardwareInfo *refHwinfo = defaultHwInfo.get(); bool success = DeviceFactory::prepareDeviceEnvironments(*executionEnvironment); EXPECT_TRUE(success); const HardwareInfo *hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(); EXPECT_EQ(refHwinfo->platform.eDisplayCoreFamily, hwInfo->platform.eDisplayCoreFamily); } TEST_F(DeviceFactoryTest, overrideKmdNotifySettings) { DebugManagerStateRestore stateRestore; bool success = DeviceFactory::prepareDeviceEnvironments(*executionEnvironment); auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(); ASSERT_TRUE(success); auto refEnableKmdNotify = hwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify; auto refDelayKmdNotifyMicroseconds = hwInfo->capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds; auto refEnableQuickKmdSleep = hwInfo->capabilityTable.kmdNotifyProperties.enableQuickKmdSleep; auto refDelayQuickKmdSleepMicroseconds = hwInfo->capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds; auto refEnableQuickKmdSleepForSporadicWaits = hwInfo->capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits; auto refDelayQuickKmdSleepForSporadicWaitsMicroseconds = hwInfo->capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds; DebugManager.flags.OverrideEnableKmdNotify.set(!refEnableKmdNotify); DebugManager.flags.OverrideKmdNotifyDelayMicroseconds.set(static_cast(refDelayKmdNotifyMicroseconds) + 10); DebugManager.flags.OverrideEnableQuickKmdSleep.set(!refEnableQuickKmdSleep); DebugManager.flags.OverrideQuickKmdSleepDelayMicroseconds.set(static_cast(refDelayQuickKmdSleepMicroseconds) + 11); DebugManager.flags.OverrideEnableQuickKmdSleepForSporadicWaits.set(!refEnableQuickKmdSleepForSporadicWaits); DebugManager.flags.OverrideDelayQuickKmdSleepForSporadicWaitsMicroseconds.set(static_cast(refDelayQuickKmdSleepForSporadicWaitsMicroseconds) + 12); platformsImpl.clear(); executionEnvironment = constructPlatform()->peekExecutionEnvironment(); success = DeviceFactory::prepareDeviceEnvironments(*executionEnvironment); ASSERT_TRUE(success); hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(); EXPECT_EQ(!refEnableKmdNotify, hwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify); EXPECT_EQ(refDelayKmdNotifyMicroseconds + 10, hwInfo->capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_EQ(!refEnableQuickKmdSleep, hwInfo->capabilityTable.kmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(refDelayQuickKmdSleepMicroseconds + 11, hwInfo->capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds); EXPECT_EQ(!refEnableQuickKmdSleepForSporadicWaits, hwInfo->capabilityTable.kmdNotifyProperties.enableQuickKmdSleepForSporadicWaits); EXPECT_EQ(refDelayQuickKmdSleepForSporadicWaitsMicroseconds + 12, hwInfo->capabilityTable.kmdNotifyProperties.delayQuickKmdSleepForSporadicWaitsMicroseconds); } TEST_F(DeviceFactoryTest, getEngineTypeDebugOverride) { DebugManagerStateRestore dbgRestorer; int32_t debugEngineType = 2; DebugManager.flags.NodeOrdinal.set(debugEngineType); bool success = DeviceFactory::prepareDeviceEnvironments(*executionEnvironment); ASSERT_TRUE(success); auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(); int32_t actualEngineType = static_cast(hwInfo->capabilityTable.defaultEngineType); EXPECT_EQ(debugEngineType, actualEngineType); } TEST_F(DeviceFactoryTest, givenPointerToHwInfoWhenGetDevicedCalledThenRequiedSurfaceSizeIsSettedProperly) { bool success = DeviceFactory::prepareDeviceEnvironments(*executionEnvironment); ASSERT_TRUE(success); auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(); EXPECT_EQ(hwInfo->gtSystemInfo.CsrSizeInMb * MemoryConstants::megaByte, hwInfo->capabilityTable.requiredPreemptionSurfaceSize); } TEST_F(DeviceFactoryTest, givenCreateMultipleRootDevicesDebugFlagWhenPrepareDeviceEnvironmentsIsCalledThenNumberOfReturnedDevicesIsEqualToDebugVariable) { DebugManagerStateRestore stateRestore; auto requiredDeviceCount = 2u; DebugManager.flags.CreateMultipleRootDevices.set(requiredDeviceCount); bool success = DeviceFactory::prepareDeviceEnvironments(*executionEnvironment); ASSERT_TRUE(success); EXPECT_EQ(requiredDeviceCount, executionEnvironment->rootDeviceEnvironments.size()); } TEST_F(DeviceFactoryTest, givenDebugFlagSetWhenPrepareDeviceEnvironmentsIsCalledThenOverrideGpuAddressSpace) { DebugManagerStateRestore restore; DebugManager.flags.OverrideGpuAddressSpace.set(12); bool success = DeviceFactory::prepareDeviceEnvironments(*executionEnvironment); EXPECT_TRUE(success); EXPECT_EQ(maxNBitValue(12), executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo()->capabilityTable.gpuAddressSpace); } TEST_F(DeviceFactoryTest, givenDebugFlagSetWhenPrepareDeviceEnvironmentsForProductFamilyOverrideIsCalledThenOverrideGpuAddressSpace) { DebugManagerStateRestore restore; DebugManager.flags.OverrideGpuAddressSpace.set(12); bool success = DeviceFactory::prepareDeviceEnvironmentsForProductFamilyOverride(*executionEnvironment); EXPECT_TRUE(success); EXPECT_EQ(maxNBitValue(12), executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo()->capabilityTable.gpuAddressSpace); } TEST_F(DeviceFactoryTest, whenPrepareDeviceEnvironmentsIsCalledThenAllRootDeviceEnvironmentMembersAreInitialized) { DebugManagerStateRestore stateRestore; auto requiredDeviceCount = 2u; DebugManager.flags.CreateMultipleRootDevices.set(requiredDeviceCount); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get(), true, requiredDeviceCount); bool success = DeviceFactory::prepareDeviceEnvironments(executionEnvironment); ASSERT_TRUE(success); std::set memoryOperationHandlers; std::set osInterfaces; for (auto rootDeviceIndex = 0u; rootDeviceIndex < requiredDeviceCount; rootDeviceIndex++) { auto rootDeviceEnvironment = static_cast(executionEnvironment.rootDeviceEnvironments[rootDeviceIndex].get()); auto memoryOperationInterface = rootDeviceEnvironment->memoryOperationsInterface.get(); EXPECT_NE(nullptr, memoryOperationInterface); EXPECT_EQ(memoryOperationHandlers.end(), memoryOperationHandlers.find(memoryOperationInterface)); memoryOperationHandlers.insert(memoryOperationInterface); auto osInterface = rootDeviceEnvironment->osInterface.get(); EXPECT_NE(nullptr, osInterface); EXPECT_EQ(osInterfaces.end(), osInterfaces.find(osInterface)); osInterfaces.insert(osInterface); } } TEST_F(DeviceFactoryTest, givenInvalidHwConfigStringPrepareDeviceEnvironmentsForProductFamilyOverrideReturnsFalse) { DebugManagerStateRestore stateRestore; DebugManager.flags.HardwareInfoOverride.set("1x3"); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); bool success = DeviceFactory::prepareDeviceEnvironmentsForProductFamilyOverride(executionEnvironment); EXPECT_FALSE(success); } TEST_F(DeviceFactoryTest, givenValidHwConfigStringPrepareDeviceEnvironmentsForProductFamilyOverrideReturnsTrue) { DebugManagerStateRestore stateRestore; DebugManager.flags.HardwareInfoOverride.set("1x1x1"); MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); EXPECT_ANY_THROW(DeviceFactory::prepareDeviceEnvironmentsForProductFamilyOverride(executionEnvironment)); } TEST_F(DeviceFactoryTest, givenPrepareDeviceEnvironmentsCallWhenItIsDoneThenOsInterfaceIsAllocated) { bool success = DeviceFactory::prepareDeviceEnvironments(*executionEnvironment); EXPECT_TRUE(success); EXPECT_NE(nullptr, executionEnvironment->rootDeviceEnvironments[0]->osInterface); } TEST(DeviceFactory, givenHwModeSelectedWhenIsHwModeSelectedIsCalledThenTrueIsReturned) { DebugManagerStateRestore stateRestore; constexpr int32_t hwModes[] = {-1, CommandStreamReceiverType::CSR_HW, CommandStreamReceiverType::CSR_HW_WITH_AUB}; for (const auto &hwMode : hwModes) { DebugManager.flags.SetCommandStreamReceiver.set(hwMode); EXPECT_TRUE(DeviceFactory::isHwModeSelected()); } } TEST(DeviceFactory, givenNonHwModeSelectedWhenIsHwModeSelectedIsCalledThenFalseIsReturned) { DebugManagerStateRestore stateRestore; constexpr int32_t nonHwModes[] = {CommandStreamReceiverType::CSR_AUB, CommandStreamReceiverType::CSR_TBX, CommandStreamReceiverType::CSR_TBX_WITH_AUB}; for (const auto &nonHwMode : nonHwModes) { DebugManager.flags.SetCommandStreamReceiver.set(nonHwMode); EXPECT_FALSE(DeviceFactory::isHwModeSelected()); } } TEST(DiscoverDevices, whenDiscoverDevicesAndForceDeviceIdIsDifferentFromTheExistingDeviceThenReturnNullptr) { DebugManagerStateRestore stateRestore; DebugManager.flags.ForceDeviceId.set("invalid"); ExecutionEnvironment executionEnviornment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnviornment); EXPECT_TRUE(hwDeviceIds.empty()); } TEST(DiscoverDevices, whenDiscoverDevicesAndForceDeviceIdIsDifferentFromTheExistingDeviceThenPrepareDeviceEnvironmentsReturnsFalse) { DebugManagerStateRestore stateRestore; DebugManager.flags.ForceDeviceId.set("invalid"); ExecutionEnvironment executionEnviornment; auto result = DeviceFactory::prepareDeviceEnvironments(executionEnviornment); EXPECT_FALSE(result); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/hw_info_config_tests.cpp000066400000000000000000000064351363734646600312070ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/os_interface/hw_info_config_tests.h" #include "shared/source/helpers/hw_helper.h" #include "opencl/source/cl_device/cl_device.h" using namespace NEO; using namespace std; void HwInfoConfigTest::SetUp() { PlatformFixture::SetUp(); pInHwInfo = pPlatform->getClDevice(0)->getHardwareInfo(); testPlatform = &pInHwInfo.platform; testSkuTable = &pInHwInfo.featureTable; testWaTable = &pInHwInfo.workaroundTable; testSysInfo = &pInHwInfo.gtSystemInfo; outHwInfo = {}; } void HwInfoConfigTest::TearDown() { PlatformFixture::TearDown(); } TEST_F(HwInfoConfigTest, givenHwInfoConfigSetHwInfoValuesFromConfigStringReturnsSetsProperValues) { uint64_t hwInfoConfig = 0x0; bool success = parseHwInfoConfigString("1x1x1", hwInfoConfig); EXPECT_TRUE(success); EXPECT_EQ(hwInfoConfig, 0x100010001u); setHwInfoValuesFromConfig(hwInfoConfig, outHwInfo); EXPECT_EQ(outHwInfo.gtSystemInfo.SliceCount, 1u); EXPECT_EQ(outHwInfo.gtSystemInfo.SubSliceCount, 1u); EXPECT_EQ(outHwInfo.gtSystemInfo.EUCount, 1u); success = parseHwInfoConfigString("7x1x1", hwInfoConfig); EXPECT_TRUE(success); EXPECT_EQ(hwInfoConfig, 0x700010001u); setHwInfoValuesFromConfig(hwInfoConfig, outHwInfo); EXPECT_EQ(outHwInfo.gtSystemInfo.SliceCount, 7u); EXPECT_EQ(outHwInfo.gtSystemInfo.SubSliceCount, 7u); EXPECT_EQ(outHwInfo.gtSystemInfo.EUCount, 7u); success = parseHwInfoConfigString("1x7x1", hwInfoConfig); EXPECT_TRUE(success); EXPECT_EQ(hwInfoConfig, 0x100070001u); setHwInfoValuesFromConfig(hwInfoConfig, outHwInfo); EXPECT_EQ(outHwInfo.gtSystemInfo.SliceCount, 1u); EXPECT_EQ(outHwInfo.gtSystemInfo.SubSliceCount, 7u); EXPECT_EQ(outHwInfo.gtSystemInfo.EUCount, 7u); success = parseHwInfoConfigString("1x1x7", hwInfoConfig); EXPECT_TRUE(success); EXPECT_EQ(hwInfoConfig, 0x100010007u); setHwInfoValuesFromConfig(hwInfoConfig, outHwInfo); EXPECT_EQ(outHwInfo.gtSystemInfo.SliceCount, 1u); EXPECT_EQ(outHwInfo.gtSystemInfo.SubSliceCount, 1u); EXPECT_EQ(outHwInfo.gtSystemInfo.EUCount, 7u); success = parseHwInfoConfigString("2x4x16", hwInfoConfig); EXPECT_TRUE(success); EXPECT_EQ(0x200040010u, hwInfoConfig); setHwInfoValuesFromConfig(hwInfoConfig, outHwInfo); EXPECT_EQ(outHwInfo.gtSystemInfo.SliceCount, 2u); EXPECT_EQ(outHwInfo.gtSystemInfo.SubSliceCount, 8u); EXPECT_EQ(outHwInfo.gtSystemInfo.EUCount, 128u); } TEST_F(HwInfoConfigTest, givenInvalidHwInfoSetHwInfoValuesFromConfigString) { uint64_t hwInfoConfig = 0x0; bool success = parseHwInfoConfigString("1", hwInfoConfig); EXPECT_FALSE(success); success = parseHwInfoConfigString("1x3", hwInfoConfig); EXPECT_FALSE(success); success = parseHwInfoConfigString("65536x3x8", hwInfoConfig); EXPECT_FALSE(success); success = parseHwInfoConfigString("1x65536x8", hwInfoConfig); EXPECT_FALSE(success); success = parseHwInfoConfigString("1x3x65536", hwInfoConfig); EXPECT_FALSE(success); success = parseHwInfoConfigString("65535x65535x8", hwInfoConfig); EXPECT_FALSE(success); success = parseHwInfoConfigString("1x65535x65535", hwInfoConfig); EXPECT_FALSE(success); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/hw_info_config_tests.h000066400000000000000000000012521363734646600306440ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/device/device.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "test.h" #include "gtest/gtest.h" using namespace NEO; using namespace std; struct HwInfoConfigTest : public ::testing::Test, public PlatformFixture { void SetUp() override; void TearDown() override; HardwareInfo pInHwInfo; HardwareInfo outHwInfo; PLATFORM *testPlatform = nullptr; FeatureTable *testSkuTable = nullptr; WorkaroundTable *testWaTable = nullptr; GT_SYSTEM_INFO *testSysInfo = nullptr; }; compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/000077500000000000000000000000001363734646600254325ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/.clang-tidy000066400000000000000000000031001363734646600274600ustar00rootroot00000000000000--- Checks: 'clang-diagnostic-*,clang-analyzer-*,google-default-arguments,modernize-use-override,modernize-use-default-member-init,-clang-analyzer-alpha*,readability-identifier-naming,-clang-analyzer-optin.performance.Padding,-clang-analyzer-cplusplus.NewDelete,-clang-analyzer-cplusplus.NewDeleteLeaks,-clang-analyzer-optin.cplusplus.VirtualCall' # WarningsAsErrors: '.*' HeaderFilterRegex: '/runtime/|/core/|/offline_compiler/' AnalyzeTemporaryDtors: false CheckOptions: - key: google-readability-braces-around-statements.ShortStatementLines value: '1' - key: google-readability-function-size.StatementThreshold value: '800' - key: google-readability-namespace-comments.ShortNamespaceLines value: '10' - key: google-readability-namespace-comments.SpacesBeforeComments value: '2' - key: readability-identifier-naming.ParameterCase value: camelBack - key: modernize-loop-convert.MaxCopySize value: '16' - key: modernize-loop-convert.MinConfidence value: reasonable - key: modernize-loop-convert.NamingStyle value: CamelCase - key: modernize-pass-by-value.IncludeStyle value: llvm - key: modernize-replace-auto-ptr.IncludeStyle value: llvm - key: modernize-use-nullptr.NullMacros value: 'NULL' - key: modernize-use-default-member-init.UseAssignment value: '1' ... compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/CMakeLists.txt000066400000000000000000000044271363734646600302010ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_os_interface_linux ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/allocator_helper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/debug_env_reader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device_command_stream_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device_factory_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device_factory_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/device_os_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/driver_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_buffer_object_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_command_stream_mm_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_command_stream_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/drm_engine_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_gem_close_worker_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/drm_mapper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/drm_memory_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_manager_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_manager_tests.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/drm_memory_manager_allocate_in_device_pool_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_neo_create.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_os_memory_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_residency_handler_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/file_logger_linux_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_linux_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_linux_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/linux_create_command_queue_with_properties_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_os_time_linux.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_performance_counters_linux.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_performance_counters_linux.h ${CMAKE_CURRENT_SOURCE_DIR}/os_interface_linux_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_time_test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/performance_counters_linux_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/self_lib_lin.cpp ) if(UNIX) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_os_interface_linux}) endif() set_property(GLOBAL PROPERTY IGDRCL_SRCS_tests_os_interface_linux ${IGDRCL_SRCS_tests_os_interface_linux}) add_subdirectories() compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/allocator_helper.cpp000066400000000000000000000005531363734646600314600ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/allocator_helper.h" #include "shared/source/helpers/basic_math.h" namespace NEO { size_t getSizeToReserve() { // 4 x sizeof(Heap32) + 2 x sizeof(Standard/Standard64k) return (4 * 4 + 2 * 4) * GB; } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/allocator_helper_tests.cpp000066400000000000000000000006121363734646600326760ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/basic_math.h" #include "shared/source/os_interface/linux/allocator_helper.h" #include "gtest/gtest.h" TEST(AllocatorHelper, givenExpectedSizeToReserveWhenGetSizeToReserveCalledThenExpectedValueReturned) { EXPECT_EQ((4 * 4 + 2 * 4) * GB, NEO::getSizeToReserve()); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/create_drm_memory_manager.cpp000066400000000000000000000020351363734646600333250ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/os_interface/linux/drm_memory_manager.h" #include "shared/source/os_interface/linux/os_interface.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/unit_test/helpers/ult_hw_config.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" namespace NEO { std::unique_ptr MemoryManager::createMemoryManager(ExecutionEnvironment &executionEnvironment) { if (ultHwConfig.forceOsAgnosticMemoryManager) { return std::make_unique(executionEnvironment); } return std::make_unique(gemCloseWorkerMode::gemCloseWorkerInactive, DebugManager.flags.EnableForcePin.get(), true, executionEnvironment); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/debug_env_reader.cpp000066400000000000000000000045151363734646600314230ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/debug_env_reader.h" #include "test.h" #include namespace NEO { class DebugEnvReaderTests : public ::testing::Test { public: void SetUp() override { evr = SettingsReader::createOsReader(false, ""); EXPECT_NE(nullptr, evr); } void TearDown() override { delete evr; } SettingsReader *evr = nullptr; }; TEST_F(DebugEnvReaderTests, IfVariableIsSetReturnSetValue) { int32_t ret; std::string retString; std::string defaultString = "Default Value"; std::string setString = "Expected Value"; const char *testingVariableValue = "1234"; setenv("TestingVariable", testingVariableValue, 0); ret = evr->getSetting("TestingVariable", 1); EXPECT_EQ(1234, ret); setenv("TestingVariable", setString.c_str(), 1); retString = evr->getSetting("TestingVariable", defaultString); EXPECT_EQ(0, retString.compare(setString)); unsetenv("TestingVariable"); ret = evr->getSetting("TestingVariable", 1); EXPECT_EQ(1, ret); } TEST_F(DebugEnvReaderTests, IfVariableIsNotSetReturnDefaultValue) { int32_t ret; std::string retString; std::string defaultString = "Default Value"; unsetenv("TestingVariable"); ret = evr->getSetting("TestingVariable", 1); EXPECT_EQ(1, ret); retString = evr->getSetting("TestingVariable", defaultString); EXPECT_EQ(0, retString.compare(defaultString)); } TEST_F(DebugEnvReaderTests, CheckBoolEnvVariable) { bool ret; bool defaultValue = true; bool expectedValue = false; setenv("TestingVariable", "0", 0); ret = evr->getSetting("TestingVariable", defaultValue); EXPECT_EQ(expectedValue, ret); unsetenv("TestingVariable"); ret = evr->getSetting("TestingVariable", defaultValue); EXPECT_EQ(defaultValue, ret); } TEST_F(DebugEnvReaderTests, appSpecificLacationReturnClCacheLocation) { std::string appSpecific; appSpecific = "cl_cache_dir"; EXPECT_EQ(appSpecific, evr->appSpecificLocation(appSpecific)); } TEST_F(DebugEnvReaderTests, givenEnvironmentVariableReaderWhenCreateOsReaderWithStringThenNotNullPointer) { std::unique_ptr evr(SettingsReader::createOsReader(false, "")); EXPECT_NE(nullptr, evr); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/device_command_stream_fixture.h000066400000000000000000000265571363734646600337000ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/os_interface/linux/drm_memory_manager.h" #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/test/unit_test/helpers/default_hw_info.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "drm/i915_drm.h" #include "engine_node.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include #include #include #define RENDER_DEVICE_NAME_MATCHER ::testing::StrEq("/dev/dri/renderD128") using NEO::constructPlatform; using NEO::Drm; using NEO::HwDeviceId; using NEO::RootDeviceEnvironment; static const int mockFd = 33; class DrmMockImpl : public Drm { public: DrmMockImpl(int fd) : Drm(std::make_unique(fd), *constructPlatform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]){}; MOCK_METHOD2(ioctl, int(unsigned long request, void *arg)); }; class DrmMockSuccess : public Drm { public: DrmMockSuccess() : DrmMockSuccess(*constructPlatform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]) {} DrmMockSuccess(RootDeviceEnvironment &rootDeviceEnvironment) : Drm(std::make_unique(mockFd), rootDeviceEnvironment) {} int ioctl(unsigned long request, void *arg) override { return 0; }; }; class DrmMockFail : public Drm { public: DrmMockFail() : Drm(std::make_unique(mockFd), *constructPlatform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]) {} int ioctl(unsigned long request, void *arg) override { return -1; }; }; class DrmMockTime : public DrmMockSuccess { public: int ioctl(unsigned long request, void *arg) override { drm_i915_reg_read *reg = reinterpret_cast(arg); reg->val = getVal() << 32; return 0; }; uint64_t getVal() { static uint64_t val = 0; return ++val; } }; class DrmMockCustom : public Drm { public: struct IoctlResExt { int32_t no; int32_t res; IoctlResExt(int32_t no, int32_t res) : no(no), res(res) {} }; class Ioctls { public: void reset() { total = 0; execbuffer2 = 0; gemUserptr = 0; gemCreate = 0; gemSetTiling = 0; gemGetTiling = 0; primeFdToHandle = 0; handleToPrimeFd = 0; gemMmap = 0; gemSetDomain = 0; gemWait = 0; gemClose = 0; regRead = 0; getParam = 0; contextGetParam = 0; contextCreate = 0; contextDestroy = 0; } std::atomic total; std::atomic execbuffer2; std::atomic gemUserptr; std::atomic gemCreate; std::atomic gemSetTiling; std::atomic gemGetTiling; std::atomic primeFdToHandle; std::atomic handleToPrimeFd; std::atomic gemMmap; std::atomic gemSetDomain; std::atomic gemWait; std::atomic gemClose; std::atomic regRead; std::atomic getParam; std::atomic contextGetParam; std::atomic contextCreate; std::atomic contextDestroy; }; std::atomic ioctl_res; Ioctls ioctl_cnt; Ioctls ioctl_expected; std::atomic ioctl_res_ext; void testIoctls() { if (this->ioctl_expected.total == -1) return; #define NEO_IOCTL_EXPECT_EQ(PARAM) \ if (this->ioctl_expected.PARAM >= 0) { \ EXPECT_EQ(this->ioctl_expected.PARAM, this->ioctl_cnt.PARAM); \ } NEO_IOCTL_EXPECT_EQ(execbuffer2); NEO_IOCTL_EXPECT_EQ(gemUserptr); NEO_IOCTL_EXPECT_EQ(gemCreate); NEO_IOCTL_EXPECT_EQ(gemSetTiling); NEO_IOCTL_EXPECT_EQ(gemGetTiling); NEO_IOCTL_EXPECT_EQ(primeFdToHandle); NEO_IOCTL_EXPECT_EQ(handleToPrimeFd); NEO_IOCTL_EXPECT_EQ(gemMmap); NEO_IOCTL_EXPECT_EQ(gemSetDomain); NEO_IOCTL_EXPECT_EQ(gemWait); NEO_IOCTL_EXPECT_EQ(gemClose); NEO_IOCTL_EXPECT_EQ(regRead); NEO_IOCTL_EXPECT_EQ(getParam); NEO_IOCTL_EXPECT_EQ(contextGetParam); NEO_IOCTL_EXPECT_EQ(contextCreate); NEO_IOCTL_EXPECT_EQ(contextDestroy); #undef NEO_IOCTL_EXPECT_EQ } //DRM_IOCTL_I915_GEM_EXECBUFFER2 drm_i915_gem_execbuffer2 execBuffer = {0}; //First exec object drm_i915_gem_exec_object2 execBufferBufferObjects = {0}; //DRM_IOCTL_I915_GEM_CREATE __u64 createParamsSize = 0; __u32 createParamsHandle = 0; //DRM_IOCTL_I915_GEM_SET_TILING __u32 setTilingMode = 0; __u32 setTilingHandle = 0; __u32 setTilingStride = 0; //DRM_IOCTL_I915_GEM_GET_TILING __u32 getTilingModeOut = I915_TILING_NONE; __u32 getTilingHandleIn = 0; //DRM_IOCTL_PRIME_FD_TO_HANDLE __u32 outputHandle = 0; __s32 inputFd = 0; //DRM_IOCTL_PRIME_HANDLE_TO_FD __u32 inputHandle = 0; __s32 outputFd = 0; __s32 inputFlags = 0; //DRM_IOCTL_I915_GEM_USERPTR __u32 returnHandle = 0; //DRM_IOCTL_I915_GEM_MMAP __u32 mmapHandle = 0; __u32 mmapPad = 0; __u64 mmapOffset = 0; __u64 mmapSize = 0; __u64 mmapAddrPtr = 0x7F4000001000; __u64 mmapFlags = 0; //DRM_IOCTL_I915_GEM_SET_DOMAIN __u32 setDomainHandle = 0; __u32 setDomainReadDomains = 0; __u32 setDomainWriteDomain = 0; //DRM_IOCTL_I915_GETPARAM drm_i915_getparam_t recordedGetParam = {0}; int getParamRetValue = 0; //DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM drm_i915_gem_context_param recordedGetContextParam = {0}; __u64 getContextParamRetValue = 0; int errnoValue = 0; int ioctl(unsigned long request, void *arg) override { auto ext = ioctl_res_ext.load(); //store flags switch (request) { case DRM_IOCTL_I915_GEM_EXECBUFFER2: { drm_i915_gem_execbuffer2 *execbuf = (drm_i915_gem_execbuffer2 *)arg; this->execBuffer = *execbuf; this->execBufferBufferObjects = *reinterpret_cast(this->execBuffer.buffers_ptr); ioctl_cnt.execbuffer2++; } break; case DRM_IOCTL_I915_GEM_USERPTR: { auto *userPtrParams = (drm_i915_gem_userptr *)arg; userPtrParams->handle = returnHandle; returnHandle++; ioctl_cnt.gemUserptr++; } break; case DRM_IOCTL_I915_GEM_CREATE: { auto *createParams = (drm_i915_gem_create *)arg; this->createParamsSize = createParams->size; this->createParamsHandle = createParams->handle = 1u; ioctl_cnt.gemCreate++; } break; case DRM_IOCTL_I915_GEM_SET_TILING: { auto *setTilingParams = (drm_i915_gem_set_tiling *)arg; setTilingMode = setTilingParams->tiling_mode; setTilingHandle = setTilingParams->handle; setTilingStride = setTilingParams->stride; ioctl_cnt.gemSetTiling++; } break; case DRM_IOCTL_I915_GEM_GET_TILING: { auto *getTilingParams = (drm_i915_gem_get_tiling *)arg; getTilingParams->tiling_mode = getTilingModeOut; getTilingHandleIn = getTilingParams->handle; ioctl_cnt.gemGetTiling++; } break; case DRM_IOCTL_PRIME_FD_TO_HANDLE: { auto *primeToHandleParams = (drm_prime_handle *)arg; //return BO primeToHandleParams->handle = outputHandle; inputFd = primeToHandleParams->fd; ioctl_cnt.primeFdToHandle++; } break; case DRM_IOCTL_PRIME_HANDLE_TO_FD: { auto *handleToPrimeParams = (drm_prime_handle *)arg; //return FD inputHandle = handleToPrimeParams->handle; inputFlags = handleToPrimeParams->flags; handleToPrimeParams->fd = outputFd; ioctl_cnt.handleToPrimeFd++; } break; case DRM_IOCTL_I915_GEM_MMAP: { auto mmapParams = (drm_i915_gem_mmap *)arg; mmapHandle = mmapParams->handle; mmapPad = mmapParams->pad; mmapOffset = mmapParams->offset; mmapSize = mmapParams->size; mmapFlags = mmapParams->flags; mmapParams->addr_ptr = mmapAddrPtr; ioctl_cnt.gemMmap++; } break; case DRM_IOCTL_I915_GEM_SET_DOMAIN: { auto setDomainParams = (drm_i915_gem_set_domain *)arg; setDomainHandle = setDomainParams->handle; setDomainReadDomains = setDomainParams->read_domains; setDomainWriteDomain = setDomainParams->write_domain; ioctl_cnt.gemSetDomain++; } break; case DRM_IOCTL_I915_GEM_WAIT: ioctl_cnt.gemWait++; break; case DRM_IOCTL_GEM_CLOSE: ioctl_cnt.gemClose++; break; case DRM_IOCTL_I915_REG_READ: ioctl_cnt.regRead++; break; case DRM_IOCTL_I915_GETPARAM: { ioctl_cnt.contextGetParam++; auto getParam = (drm_i915_getparam_t *)arg; recordedGetParam = *getParam; *getParam->value = getParamRetValue; } break; case DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM: { } break; case DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM: { ioctl_cnt.contextGetParam++; auto getContextParam = (drm_i915_gem_context_param *)arg; recordedGetContextParam = *getContextParam; getContextParam->value = getContextParamRetValue; } break; case DRM_IOCTL_I915_GEM_CONTEXT_CREATE: { auto contextCreateParam = reinterpret_cast(arg); contextCreateParam->ctx_id = ++ioctl_cnt.contextCreate; } break; case DRM_IOCTL_I915_GEM_CONTEXT_DESTROY: { ioctl_cnt.contextDestroy++; } break; default: ioctlExtra(request, arg); } if (ext->no != -1 && ext->no == ioctl_cnt.total.load()) { ioctl_cnt.total.fetch_add(1); return ext->res; } ioctl_cnt.total.fetch_add(1); return ioctl_res.load(); }; virtual int ioctlExtra(unsigned long request, void *arg) { switch (request) { default: std::cout << "unexpected IOCTL: " << std::hex << request << std::endl; UNRECOVERABLE_IF(true); break; } return 0; } IoctlResExt NONE = {-1, 0}; void reset() { ioctl_res = 0; ioctl_cnt.reset(); ioctl_expected.reset(); ioctl_res_ext = &NONE; } DrmMockCustom() : Drm(std::make_unique(mockFd), *constructPlatform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]) { reset(); ioctl_expected.contextCreate = static_cast(NEO::HwHelper::get(NEO::defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*NEO::defaultHwInfo).size()); ioctl_expected.contextDestroy = ioctl_expected.contextCreate.load(); } int getErrno() override { return errnoValue; } }; compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/device_command_stream_tests.cpp000066400000000000000000000060211363734646600336670ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/device_command_stream.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/os_interface/linux/os_interface.h" #include "opencl/source/command_stream/aub_command_stream_receiver.h" #include "opencl/source/os_interface/linux/device_command_stream.inl" #include "opencl/source/os_interface/linux/drm_command_stream.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/mock_aub_center_fixture.h" #include "opencl/test/unit_test/helpers/execution_environment_helper.h" #include "opencl/test/unit_test/os_interface/linux/device_command_stream_fixture.h" #include "test.h" #include "gtest/gtest.h" #include using namespace NEO; struct DeviceCommandStreamLeaksTest : ::testing::Test { void SetUp() override { HardwareInfo *hwInfo = nullptr; executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); MockAubCenterFixture::setMockAubCenter(*executionEnvironment->rootDeviceEnvironments[0]); } ExecutionEnvironment *executionEnvironment; }; HWTEST_F(DeviceCommandStreamLeaksTest, Create) { std::unique_ptr ptr(DeviceCommandStreamReceiver::create(false, *executionEnvironment, 0)); DrmMockSuccess mockDrm; EXPECT_NE(nullptr, ptr); } HWTEST_F(DeviceCommandStreamLeaksTest, givenDefaultDrmCsrWhenItIsCreatedThenGemCloseWorkerInactiveModeIsSelected) { std::unique_ptr ptr(DeviceCommandStreamReceiver::create(false, *executionEnvironment, 0)); auto drmCsr = (DrmCommandStreamReceiver *)ptr.get(); EXPECT_EQ(drmCsr->peekGemCloseWorkerOperationMode(), gemCloseWorkerMode::gemCloseWorkerActive); } HWTEST_F(DeviceCommandStreamLeaksTest, givenDefaultDrmCsrWithAubDumWhenItIsCreatedThenGemCloseWorkerInactiveModeIsSelected) { std::unique_ptr ptr(DeviceCommandStreamReceiver::create(true, *executionEnvironment, 0)); auto drmCsrWithAubDump = (CommandStreamReceiverWithAUBDump> *)ptr.get(); EXPECT_EQ(drmCsrWithAubDump->peekGemCloseWorkerOperationMode(), gemCloseWorkerMode::gemCloseWorkerActive); auto aubCSR = static_cast> *>(ptr.get())->aubCSR.get(); EXPECT_NE(nullptr, aubCSR); } HWTEST_F(DeviceCommandStreamLeaksTest, givenDefaultDrmCsrWhenOsInterfaceIsNullptrThenValidateDrm) { std::unique_ptr ptr(DeviceCommandStreamReceiver::create(false, *executionEnvironment, 0)); auto drmCsr = (DrmCommandStreamReceiver *)ptr.get(); EXPECT_NE(nullptr, executionEnvironment->rootDeviceEnvironments[0]->osInterface); EXPECT_EQ(drmCsr->getOSInterface()->get()->getDrm(), executionEnvironment->rootDeviceEnvironments[0]->osInterface->get()->getDrm()); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/device_factory_tests.cpp000066400000000000000000000040271363734646600323510ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/os_interface/linux/device_factory_tests.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/linux/os_interface.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/unit_test/helpers/default_hw_info.h" TEST_F(DeviceFactoryLinuxTest, PrepareDeviceEnvironmentsCheckEUCntSSCnt) { const HardwareInfo *refHwinfo = defaultHwInfo.get(); pDrm->StoredEUVal = 11; pDrm->StoredSSVal = 8; bool success = DeviceFactory::prepareDeviceEnvironments(executionEnvironment); auto hwInfo = executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo(); EXPECT_TRUE(success); EXPECT_NE(hwInfo, nullptr); EXPECT_EQ(refHwinfo->platform.eDisplayCoreFamily, hwInfo->platform.eDisplayCoreFamily); EXPECT_EQ((int)hwInfo->gtSystemInfo.EUCount, 11); EXPECT_EQ((int)hwInfo->gtSystemInfo.SubSliceCount, 8); //temporararily return GT2. EXPECT_EQ(1u, hwInfo->featureTable.ftrGT2); } TEST_F(DeviceFactoryLinuxTest, PrepareDeviceEnvironmentsDrmCreateFailedConfigureHwInfo) { pDrm->StoredRetValForDeviceID = -1; bool success = DeviceFactory::prepareDeviceEnvironments(executionEnvironment); EXPECT_FALSE(success); pDrm->StoredRetValForDeviceID = 0; } TEST_F(DeviceFactoryLinuxTest, givenGetDeviceCallWhenItIsDoneThenOsInterfaceIsAllocatedAndItContainDrm) { bool success = DeviceFactory::prepareDeviceEnvironments(executionEnvironment); EXPECT_TRUE(success); EXPECT_NE(nullptr, executionEnvironment.rootDeviceEnvironments[0]->osInterface); EXPECT_NE(nullptr, pDrm); EXPECT_EQ(pDrm, executionEnvironment.rootDeviceEnvironments[0]->osInterface->get()->getDrm()); } TEST_F(DeviceFactoryLinuxTest, whenDrmIsNotCretedThenPrepareDeviceEnvironmentsFails) { delete pDrm; pDrm = nullptr; bool success = DeviceFactory::prepareDeviceEnvironments(executionEnvironment); EXPECT_FALSE(success); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/device_factory_tests.h000066400000000000000000000017641363734646600320230ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/device/device.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/os_interface/device_factory.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/os_interface/linux/drm_mock.h" #include "test.h" #include "gtest/gtest.h" namespace NEO { extern Drm **pDrmToReturnFromCreateFunc; }; // namespace NEO using namespace NEO; struct DeviceFactoryLinuxTest : public ::testing::Test { void SetUp() override { pDrm = new DrmMock; pDrmToReturnFromCreateFunc = reinterpret_cast(&pDrm); pDrm->setGtType(GTTYPE_GT2); } void TearDown() override { } VariableBackup drmBackup{&pDrmToReturnFromCreateFunc}; DrmMock *pDrm; MockExecutionEnvironment executionEnvironment; }; compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/device_os_tests.cpp000066400000000000000000000113151363734646600313210ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/helpers/get_info.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/api/api.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "gmock/gmock.h" #include "gtest/gtest.h" using namespace ::testing; namespace NEO { TEST(DeviceOsTest, GivenDefaultClDeviceWhenCheckingForOsSpecificExtensionsThenCorrectExtensionsAreSet) { auto hwInfo = defaultHwInfo.get(); auto pDevice = MockDevice::createWithNewExecutionEnvironment(hwInfo); auto pClDevice = new ClDevice{*pDevice, platform()}; std::string extensionString(pClDevice->getDeviceInfo().deviceExtensions); EXPECT_THAT(extensionString, Not(testing::HasSubstr(std::string("cl_intel_dx9_media_sharing ")))); EXPECT_THAT(extensionString, Not(testing::HasSubstr(std::string("cl_khr_dx9_media_sharing ")))); EXPECT_THAT(extensionString, Not(testing::HasSubstr(std::string("cl_khr_d3d10_sharing ")))); EXPECT_THAT(extensionString, Not(testing::HasSubstr(std::string("cl_khr_d3d11_sharing ")))); EXPECT_THAT(extensionString, Not(testing::HasSubstr(std::string("cl_intel_d3d11_nv12_media_sharing ")))); EXPECT_THAT(extensionString, Not(testing::HasSubstr(std::string("cl_intel_simultaneous_sharing ")))); delete pClDevice; } TEST(DeviceOsTest, supportedSimultaneousInterops) { auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); std::vector expected = {0}; EXPECT_TRUE(pDevice->simultaneousInterops == expected); } TEST(DeviceOsTest, DeviceCreationFail) { auto hwInfo = defaultHwInfo.get(); auto pDevice = MockDevice::createWithNewExecutionEnvironment(hwInfo); EXPECT_THAT(pDevice, nullptr); } TEST(ApiOsTest, notSupportedApiTokens) { MockContext context; MockBuffer buffer; cl_bool boolVal; size_t size; auto retVal = context.getInfo(CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR, sizeof(cl_bool), &boolVal, &size); EXPECT_EQ(CL_INVALID_VALUE, retVal); void *paramVal = nullptr; retVal = buffer.getMemObjectInfo(CL_MEM_D3D10_RESOURCE_KHR, sizeof(void *), paramVal, &size); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST(ApiOsTest, notSupportedApiList) { MockContext context; EXPECT_EQ(nullptr, context.dispatch.crtDispatch->clGetDeviceIDsFromDX9INTEL); EXPECT_EQ(nullptr, context.dispatch.crtDispatch->clCreateFromDX9MediaSurfaceINTEL); EXPECT_EQ(nullptr, context.dispatch.crtDispatch->clEnqueueAcquireDX9ObjectsINTEL); EXPECT_EQ(nullptr, context.dispatch.crtDispatch->clEnqueueReleaseDX9ObjectsINTEL); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clGetDeviceIDsFromDX9MediaAdapterKHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clCreateFromDX9MediaSurfaceKHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clEnqueueAcquireDX9MediaSurfacesKHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clEnqueueReleaseDX9MediaSurfacesKHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clGetDeviceIDsFromD3D10KHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clCreateFromD3D10BufferKHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clCreateFromD3D10Texture2DKHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clCreateFromD3D10Texture3DKHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clEnqueueAcquireD3D10ObjectsKHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clEnqueueReleaseD3D10ObjectsKHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clGetDeviceIDsFromD3D11KHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clCreateFromD3D11BufferKHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clCreateFromD3D11Texture2DKHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clCreateFromD3D11Texture3DKHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clEnqueueAcquireD3D11ObjectsKHR); EXPECT_EQ(nullptr, context.dispatch.icdDispatch->clEnqueueReleaseD3D11ObjectsKHR); } TEST(DeviceOsTest, DeviceCreationFailMidThreadPreemption) { DebugManagerStateRestore dbgRestore; DebugManager.flags.ForcePreemptionMode.set(static_cast(PreemptionMode::MidThread)); auto pDevice = MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()); EXPECT_THAT(pDevice, nullptr); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/driver_info_tests.cpp000066400000000000000000000021521363734646600316660ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/driver_info.h" #include "gtest/gtest.h" #include #include namespace NEO { TEST(DriverInfo, GivenCreateDriverInfoWhenLinuxThenReturnNewInstance) { std::unique_ptr driverInfo(DriverInfo::create(nullptr)); EXPECT_NE(nullptr, driverInfo.get()); } TEST(DriverInfo, GivenDriverInfoWhenLinuxThenReturnDefault) { std::unique_ptr driverInfo(DriverInfo::create(nullptr)); std::string defaultName = "testName"; std::string defaultVersion = "testVersion"; auto resultName = driverInfo.get()->getDeviceName(defaultName); auto resultVersion = driverInfo.get()->getVersion(defaultVersion); EXPECT_STREQ(defaultName.c_str(), resultName.c_str()); EXPECT_STREQ(defaultVersion.c_str(), resultVersion.c_str()); } TEST(DriverInfo, givenGetMediaSharingSupportWhenLinuxThenReturnTrue) { std::unique_ptr driverInfo(DriverInfo::create(nullptr)); EXPECT_TRUE(driverInfo->getMediaSharingSupport()); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/drm_buffer_object_tests.cpp000066400000000000000000000146471363734646600330350ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_buffer_object.h" #include "opencl/test/unit_test/os_interface/linux/device_command_stream_fixture.h" #include "test.h" #include "drm/i915_drm.h" #include using namespace NEO; class TestedBufferObject : public BufferObject { public: TestedBufferObject(Drm *drm) : BufferObject(drm, 1, 0) { } void tileBy(uint32_t mode) { this->tiling_mode = mode; } void fillExecObject(drm_i915_gem_exec_object2 &execObject, uint32_t drmContextId) override { BufferObject::fillExecObject(execObject, drmContextId); execObjectPointerFilled = &execObject; } void setSize(size_t size) { this->size = size; } drm_i915_gem_exec_object2 *execObjectPointerFilled = nullptr; }; class DrmBufferObjectFixture { public: std::unique_ptr mock; TestedBufferObject *bo; drm_i915_gem_exec_object2 execObjectsStorage[256]; void SetUp() { this->mock = std::make_unique(); ASSERT_NE(nullptr, this->mock); bo = new TestedBufferObject(this->mock.get()); ASSERT_NE(nullptr, bo); } void TearDown() { delete bo; if (this->mock->ioctl_expected.total >= 0) { EXPECT_EQ(this->mock->ioctl_expected.total, this->mock->ioctl_cnt.total); } } }; typedef Test DrmBufferObjectTest; TEST_F(DrmBufferObjectTest, exec) { mock->ioctl_expected.total = 1; mock->ioctl_res = 0; drm_i915_gem_exec_object2 execObjectsStorage = {}; auto ret = bo->exec(0, 0, 0, false, 1, nullptr, 0u, &execObjectsStorage); EXPECT_EQ(mock->ioctl_res, ret); EXPECT_EQ(0u, mock->execBuffer.flags); } TEST_F(DrmBufferObjectTest, exec_ioctlFailed) { mock->ioctl_expected.total = 1; mock->ioctl_res = -1; mock->errnoValue = EFAULT; drm_i915_gem_exec_object2 execObjectsStorage = {}; EXPECT_EQ(EFAULT, bo->exec(0, 0, 0, false, 1, nullptr, 0u, &execObjectsStorage)); } TEST_F(DrmBufferObjectTest, setTiling_success) { mock->ioctl_expected.total = 1; //set_tiling auto ret = bo->setTiling(I915_TILING_X, 0); EXPECT_TRUE(ret); } TEST_F(DrmBufferObjectTest, setTiling_theSameTiling) { mock->ioctl_expected.total = 0; //set_tiling bo->tileBy(I915_TILING_X); auto ret = bo->setTiling(I915_TILING_X, 0); EXPECT_TRUE(ret); } TEST_F(DrmBufferObjectTest, setTiling_ioctlFailed) { mock->ioctl_expected.total = 1; //set_tiling mock->ioctl_res = -1; auto ret = bo->setTiling(I915_TILING_X, 0); EXPECT_FALSE(ret); } TEST_F(DrmBufferObjectTest, givenAddressThatWhenSizeIsAddedCrosses32BitBoundaryWhenExecIsCalledThen48BitFlagIsSet) { drm_i915_gem_exec_object2 execObject; memset(&execObject, 0, sizeof(execObject)); bo->setAddress(((uint64_t)1u << 32) - 0x1000u); bo->setSize(0x1000); bo->fillExecObject(execObject, 1); //base address + size > size of 32bit address space EXPECT_TRUE(execObject.flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS); } TEST_F(DrmBufferObjectTest, givenAddressThatWhenSizeIsAddedWithin32BitBoundaryWhenExecIsCalledThen48BitFlagSet) { drm_i915_gem_exec_object2 execObject; memset(&execObject, 0, sizeof(execObject)); bo->setAddress(((uint64_t)1u << 32) - 0x1000u); bo->setSize(0xFFF); bo->fillExecObject(execObject, 1); //base address + size < size of 32bit address space EXPECT_TRUE(execObject.flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS); } TEST_F(DrmBufferObjectTest, onPinIoctlFailed) { std::unique_ptr buff(new uint32_t[1024]); mock->ioctl_expected.total = 1; mock->ioctl_res = -1; this->mock->errnoValue = EINVAL; std::unique_ptr boToPin(new TestedBufferObject(this->mock.get())); ASSERT_NE(nullptr, boToPin.get()); bo->setAddress(reinterpret_cast(buff.get())); BufferObject *boArray[1] = {boToPin.get()}; auto ret = bo->pin(boArray, 1, 1); EXPECT_EQ(EINVAL, ret); } TEST(DrmBufferObjectSimpleTest, givenInvalidBoWhenPinIsCalledThenErrorIsReturned) { std::unique_ptr buff(new uint32_t[256]); std::unique_ptr mock(new DrmMockCustom); ASSERT_NE(nullptr, mock.get()); std::unique_ptr bo(new TestedBufferObject(mock.get())); ASSERT_NE(nullptr, bo.get()); // fail DRM_IOCTL_I915_GEM_EXECBUFFER2 in pin mock->ioctl_res = -1; std::unique_ptr boToPin(new TestedBufferObject(mock.get())); ASSERT_NE(nullptr, boToPin.get()); bo->setAddress(reinterpret_cast(buff.get())); mock->errnoValue = EFAULT; BufferObject *boArray[1] = {boToPin.get()}; auto ret = bo->pin(boArray, 1, 1); EXPECT_EQ(EFAULT, ret); } TEST(DrmBufferObjectSimpleTest, givenBufferObjectWhenConstructedWithASizeThenTheSizeIsInitialized) { std::unique_ptr drmMock(new DrmMockCustom); std::unique_ptr bo(new BufferObject(drmMock.get(), 1, 0x1000)); EXPECT_EQ(0x1000u, bo->peekSize()); } TEST(DrmBufferObjectSimpleTest, givenArrayOfBosWhenPinnedThenAllBosArePinned) { std::unique_ptr buff(new uint32_t[256]); std::unique_ptr mock(new DrmMockCustom); ASSERT_NE(nullptr, mock.get()); std::unique_ptr bo(new TestedBufferObject(mock.get())); ASSERT_NE(nullptr, bo.get()); mock->ioctl_res = 0; std::unique_ptr boToPin(new TestedBufferObject(mock.get())); std::unique_ptr boToPin2(new TestedBufferObject(mock.get())); std::unique_ptr boToPin3(new TestedBufferObject(mock.get())); ASSERT_NE(nullptr, boToPin.get()); ASSERT_NE(nullptr, boToPin2.get()); ASSERT_NE(nullptr, boToPin3.get()); BufferObject *array[3] = {boToPin.get(), boToPin2.get(), boToPin3.get()}; bo->setAddress(reinterpret_cast(buff.get())); auto ret = bo->pin(array, 3, 1); EXPECT_EQ(mock->ioctl_res, ret); uint32_t bb_end = 0x05000000; EXPECT_EQ(buff[0], bb_end); EXPECT_LT(0u, mock->execBuffer.batch_len); EXPECT_EQ(4u, mock->execBuffer.buffer_count); // 3 bos to pin plus 1 exec bo EXPECT_EQ(reinterpret_cast(boToPin->execObjectPointerFilled), mock->execBuffer.buffers_ptr); EXPECT_NE(nullptr, boToPin2->execObjectPointerFilled); EXPECT_NE(nullptr, boToPin3->execObjectPointerFilled); bo->setAddress(0llu); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/drm_command_stream_fixture.h000066400000000000000000000137141363734646600332120ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/preemption.h" #include "shared/source/os_interface/linux/os_context_linux.h" #include "shared/source/os_interface/linux/os_interface.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/os_interface/linux/drm_command_stream.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/linux/mock_drm_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/os_interface/linux/device_command_stream_fixture.h" #include "test.h" #include "gmock/gmock.h" #include class DrmCommandStreamTest : public ::testing::Test { public: template void SetUpT() { //make sure this is disabled, we don't want to test this now DebugManager.flags.EnableForcePin.set(false); mock = new ::testing::NiceMock(mockFd); executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment.rootDeviceEnvironments[0]->osInterface->get()->setDrm(mock); auto hwInfo = executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo(); osContext = std::make_unique(*mock, 0u, 1, HwHelper::get(hwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*hwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*hwInfo), false, false, false); csr = new DrmCommandStreamReceiver(executionEnvironment, 0, gemCloseWorkerMode::gemCloseWorkerActive); ASSERT_NE(nullptr, csr); csr->setupContext(*osContext); // Memory manager creates pinBB with ioctl, expect one call EXPECT_CALL(*mock, ioctl(::testing::_, ::testing::_)) .Times(1); memoryManager = new DrmMemoryManager(gemCloseWorkerMode::gemCloseWorkerActive, DebugManager.flags.EnableForcePin.get(), true, executionEnvironment); executionEnvironment.memoryManager.reset(memoryManager); ::testing::Mock::VerifyAndClearExpectations(mock); //assert we have memory manager ASSERT_NE(nullptr, memoryManager); } template void TearDownT() { memoryManager->waitForDeletions(); memoryManager->peekGemCloseWorker()->close(true); delete csr; ::testing::Mock::VerifyAndClearExpectations(mock); // Memory manager closes pinBB with ioctl, expect one call EXPECT_CALL(*mock, ioctl(::testing::_, ::testing::_)) .Times(::testing::AtLeast(1)); } CommandStreamReceiver *csr = nullptr; DrmMemoryManager *memoryManager = nullptr; ::testing::NiceMock *mock; const int mockFd = 33; static const uint64_t alignment = MemoryConstants::allocationAlignment; DebugManagerStateRestore dbgState; MockExecutionEnvironment executionEnvironment; std::unique_ptr osContext; }; class DrmCommandStreamEnhancedTest : public ::testing::Test { public: std::unique_ptr dbgState; MockExecutionEnvironment *executionEnvironment; DrmMockCustom *mock; CommandStreamReceiver *csr = nullptr; DrmMemoryManager *mm = nullptr; std::unique_ptr device; template void SetUpT() { executionEnvironment = new MockExecutionEnvironment(); executionEnvironment->incRefInternal(); executionEnvironment->initGmm(); this->dbgState = std::make_unique(); //make sure this is disabled, we don't want to test this now DebugManager.flags.EnableForcePin.set(false); mock = new DrmMockCustom(); executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[0]->osInterface->get()->setDrm(mock); csr = new TestedDrmCommandStreamReceiver(*executionEnvironment); ASSERT_NE(nullptr, csr); mm = new DrmMemoryManager(gemCloseWorkerMode::gemCloseWorkerInactive, DebugManager.flags.EnableForcePin.get(), true, *executionEnvironment); ASSERT_NE(nullptr, mm); executionEnvironment->memoryManager.reset(mm); device.reset(MockDevice::create(executionEnvironment, 0u)); device->resetCommandStreamReceiver(csr); ASSERT_NE(nullptr, device); } template void TearDownT() { executionEnvironment->decRefInternal(); } template void makeResidentBufferObjects(const DrmAllocation *drmAllocation) { static_cast *>(csr)->makeResidentBufferObjects(drmAllocation, 0u); } template bool isResident(BufferObject *bo) const { auto &residency = this->getResidencyVector(); return std::find(residency.begin(), residency.end(), bo) != residency.end(); } template const std::vector &getResidencyVector() const { return static_cast *>(csr)->residency; } protected: class MockBufferObject : public BufferObject { friend DrmCommandStreamEnhancedTest; protected: MockBufferObject(Drm *drm, size_t size) : BufferObject(drm, 1, 0) { this->size = alignUp(size, 4096); } }; MockBufferObject *createBO(size_t size) { return new MockBufferObject(this->mock, size); } }; compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/drm_command_stream_mm_tests.cpp000066400000000000000000000075751363734646600337220ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/linux/drm_memory_manager.h" #include "shared/source/os_interface/linux/drm_memory_operations_handler.h" #include "shared/source/os_interface/linux/os_interface.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/os_interface/linux/drm_command_stream.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/linux/mock_drm_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/os_interface/linux/device_command_stream_fixture.h" #include "test.h" using namespace NEO; class DrmCommandStreamMMTest : public ::testing::Test { }; HWTEST_F(DrmCommandStreamMMTest, MMwithPinBB) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableForcePin.set(true); auto drm = new DrmMockCustom(); MockExecutionEnvironment executionEnvironment; executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment.rootDeviceEnvironments[0]->osInterface->get()->setDrm(drm); executionEnvironment.rootDeviceEnvironments[0]->memoryOperationsInterface = std::make_unique(); DrmCommandStreamReceiver csr(executionEnvironment, 0, gemCloseWorkerMode::gemCloseWorkerInactive); auto memoryManager = new TestedDrmMemoryManager(false, true, false, executionEnvironment); executionEnvironment.memoryManager.reset(memoryManager); ASSERT_NE(nullptr, memoryManager); EXPECT_NE(nullptr, memoryManager->pinBBs[0]); } HWTEST_F(DrmCommandStreamMMTest, givenForcePinDisabledWhenMemoryManagerIsCreatedThenPinBBIsCreated) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableForcePin.set(false); auto drm = new DrmMockCustom(); MockExecutionEnvironment executionEnvironment; executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment.rootDeviceEnvironments[0]->osInterface->get()->setDrm(drm); DrmCommandStreamReceiver csr(executionEnvironment, 0, gemCloseWorkerMode::gemCloseWorkerInactive); auto memoryManager = new TestedDrmMemoryManager(false, true, false, executionEnvironment); executionEnvironment.memoryManager.reset(memoryManager); ASSERT_NE(nullptr, memoryManager); EXPECT_NE(nullptr, memoryManager->pinBBs[0]); } HWTEST_F(DrmCommandStreamMMTest, givenExecutionEnvironmentWithMoreThanOneRootDeviceEnvWhenCreatingDrmMemoryManagerThenCreateAsManyPinBBs) { MockExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(2); for (uint32_t rootDeviceIndex = 0; rootDeviceIndex < executionEnvironment.rootDeviceEnvironments.size(); rootDeviceIndex++) { executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->setHwInfo(defaultHwInfo.get()); executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->osInterface = std::make_unique(); executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->osInterface->get()->setDrm(new DrmMockCustom()); executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface = std::make_unique(); } auto memoryManager = new TestedDrmMemoryManager(false, true, false, executionEnvironment); executionEnvironment.memoryManager.reset(memoryManager); ASSERT_NE(nullptr, memoryManager); for (uint32_t rootDeviceIndex = 0; rootDeviceIndex < executionEnvironment.rootDeviceEnvironments.size(); rootDeviceIndex++) { EXPECT_NE(nullptr, memoryManager->pinBBs[rootDeviceIndex]); } } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/drm_command_stream_tests.cpp000066400000000000000000002062731363734646600332250ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/preemption.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/page_table_mngr.h" #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/residency.h" #include "shared/source/os_interface/linux/drm_buffer_object.h" #include "shared/source/os_interface/linux/os_context_linux.h" #include "shared/source/os_interface/linux/os_interface.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/os_interface/linux/drm_command_stream.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/helpers/dispatch_flags_helper.h" #include "opencl/test/unit_test/helpers/execution_environment_helper.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/mocks/linux/mock_drm_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_gmm.h" #include "opencl/test/unit_test/mocks/mock_gmm_page_table_mngr.h" #include "opencl/test/unit_test/mocks/mock_host_ptr_manager.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/mocks/mock_submissions_aggregator.h" #include "opencl/test/unit_test/os_interface/linux/drm_command_stream_fixture.h" #include "test.h" #include "drm/i915_drm.h" #include "gmock/gmock.h" using namespace NEO; ACTION_P(copyIoctlParam, dstValue) { *dstValue = *static_cast(arg1); return 0; }; HWTEST_TEMPLATED_F(DrmCommandStreamTest, givenFlushStampWhenWaitCalledThenWaitForSpecifiedBoHandle) { FlushStamp handleToWait = 123; drm_i915_gem_wait expectedWait = {}; drm_i915_gem_wait calledWait = {}; expectedWait.bo_handle = static_cast(handleToWait); expectedWait.timeout_ns = -1; EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_WAIT, ::testing::_)) .Times(1) .WillRepeatedly(copyIoctlParam(&calledWait)); csr->waitForFlushStamp(handleToWait); EXPECT_TRUE(memcmp(&expectedWait, &calledWait, sizeof(drm_i915_gem_wait)) == 0); } HWTEST_TEMPLATED_F(DrmCommandStreamTest, makeResident) { EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_USERPTR, ::testing::_)) .Times(0); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_EXECBUFFER2, ::testing::_)) .Times(0); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_GEM_CLOSE, ::testing::_)) .Times(0); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_WAIT, ::testing::_)) .Times(0); DrmAllocation graphicsAllocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, nullptr, 1024, (osHandle)1u, MemoryPool::MemoryNull); csr->makeResident(graphicsAllocation); } HWTEST_TEMPLATED_F(DrmCommandStreamTest, makeResidentTwiceTheSame) { EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_USERPTR, ::testing::_)) .Times(0); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_EXECBUFFER2, ::testing::_)) .Times(0); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_GEM_CLOSE, ::testing::_)) .Times(0); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_WAIT, ::testing::_)) .Times(0); DrmAllocation graphicsAllocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, nullptr, 1024, (osHandle)1u, MemoryPool::MemoryNull); csr->makeResident(graphicsAllocation); csr->makeResident(graphicsAllocation); } HWTEST_TEMPLATED_F(DrmCommandStreamTest, makeResidentSizeZero) { EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_USERPTR, ::testing::_)) .Times(0); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_EXECBUFFER2, ::testing::_)) .Times(0); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_GEM_CLOSE, ::testing::_)) .Times(0); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_WAIT, ::testing::_)) .Times(0); DrmAllocation graphicsAllocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, nullptr, 0, (osHandle)1u, MemoryPool::MemoryNull); csr->makeResident(graphicsAllocation); } HWTEST_TEMPLATED_F(DrmCommandStreamTest, makeResidentResized) { EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_USERPTR, ::testing::_)) .Times(0); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_EXECBUFFER2, ::testing::_)) .Times(0); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_GEM_CLOSE, ::testing::_)) .Times(0); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_WAIT, ::testing::_)) .Times(0); DrmAllocation graphicsAllocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, nullptr, 1024, (osHandle)1u, MemoryPool::MemoryNull); DrmAllocation graphicsAllocation2(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, nullptr, 8192, (osHandle)1u, MemoryPool::MemoryNull); csr->makeResident(graphicsAllocation); csr->makeResident(graphicsAllocation2); } // matcher to check batch buffer offset and len on execbuffer2 call MATCHER_P2(BoExecFlushEq, batch_start_offset, batch_len, "") { drm_i915_gem_execbuffer2 *exec2 = (drm_i915_gem_execbuffer2 *)arg; return (exec2->batch_start_offset == batch_start_offset) && (exec2->batch_len == batch_len); } // matcher to check DrmContextId MATCHER_P2(BoExecFlushContextEq, drmContextId, numExecs, "") { auto execBuff2 = reinterpret_cast(arg); bool allExecsWithTheSameId = (execBuff2->buffer_count == numExecs); allExecsWithTheSameId &= (execBuff2->rsvd1 == drmContextId); auto execObjects = reinterpret_cast(execBuff2->buffers_ptr); for (uint32_t i = 0; i < execBuff2->buffer_count - 1; i++) { allExecsWithTheSameId &= (execObjects[i].rsvd1 == drmContextId); } return allExecsWithTheSameId; } HWTEST_TEMPLATED_F(DrmCommandStreamTest, Flush) { auto expectedSize = alignUp(8u, MemoryConstants::cacheLineSize); // bbEnd int boHandle = 123; auto setBoHandle = [&](unsigned long request, void *arg) { auto userptr = static_cast(arg); userptr->handle = boHandle; return 0; }; ::testing::InSequence inSequence; EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_USERPTR, ::testing::_)) .Times(1) .WillRepeatedly(::testing::Invoke(setBoHandle)) .RetiresOnSaturation(); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_EXECBUFFER2, BoExecFlushEq(0u, expectedSize))) .Times(1) .WillRepeatedly(::testing::Return(0)) .RetiresOnSaturation(); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_WAIT, ::testing::_)) .Times(2) .RetiresOnSaturation(); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_GEM_CLOSE, ::testing::_)) .Times(1) .RetiresOnSaturation(); auto &cs = csr->getCS(); auto commandBuffer = static_cast(cs.getGraphicsAllocation()); ASSERT_NE(nullptr, commandBuffer); ASSERT_EQ(0u, reinterpret_cast(commandBuffer->getUnderlyingBuffer()) & 0xFFF); EXPECT_EQ(boHandle, commandBuffer->getBO()->peekHandle()); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); CommandStreamReceiverHw::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; auto availableSpacePriorToFlush = cs.getAvailableSpace(); csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(static_cast(boHandle), csr->obtainCurrentFlushStamp()); EXPECT_NE(cs.getCpuBase(), nullptr); EXPECT_EQ(availableSpacePriorToFlush, cs.getAvailableSpace()); } HWTEST_TEMPLATED_F(DrmCommandStreamTest, givenDrmContextIdWhenFlushingThenSetIdToAllExecBuffersAndObjects) { uint32_t expectedDrmContextId = 321; uint32_t numAllocations = 3; auto createdContextId = [&expectedDrmContextId](unsigned long request, void *arg) { auto contextCreate = static_cast(arg); contextCreate->ctx_id = expectedDrmContextId; return 0; }; auto allocation1 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); auto allocation2 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); csr->makeResident(*allocation1); csr->makeResident(*allocation2); EXPECT_CALL(*mock, ioctl(::testing::_, ::testing::_)).WillRepeatedly(::testing::Return(0)).RetiresOnSaturation(); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_CONTEXT_CREATE, ::testing::_)) .Times(1) .WillRepeatedly(::testing::Invoke(createdContextId)) .RetiresOnSaturation(); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_EXECBUFFER2, BoExecFlushContextEq(expectedDrmContextId, numAllocations))) .Times(1) .WillRepeatedly(::testing::Return(0)) .RetiresOnSaturation(); osContext = std::make_unique(*mock, 1, 1, HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo), false, false, false); csr->setupContext(*osContext); auto &cs = csr->getCS(); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); CommandStreamReceiverHw::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); memoryManager->freeGraphicsMemory(allocation1); memoryManager->freeGraphicsMemory(allocation2); } HWTEST_TEMPLATED_F(DrmCommandStreamTest, FlushWithLowPriorityContext) { auto expectedSize = alignUp(8u, MemoryConstants::cacheLineSize); // bbEnd ::testing::InSequence inSequence; EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_USERPTR, ::testing::_)) .Times(1) .WillRepeatedly(::testing::Return(0)) .RetiresOnSaturation(); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_EXECBUFFER2, BoExecFlushEq(0u, expectedSize))) .Times(1) .WillRepeatedly(::testing::Return(0)) .RetiresOnSaturation(); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_WAIT, ::testing::_)) .Times(2) .RetiresOnSaturation(); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_GEM_CLOSE, ::testing::_)) .Times(1) .RetiresOnSaturation(); auto &cs = csr->getCS(); auto commandBuffer = static_cast(cs.getGraphicsAllocation()); ASSERT_NE(nullptr, commandBuffer); ASSERT_EQ(0u, reinterpret_cast(commandBuffer->getUnderlyingBuffer()) & 0xFFF); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); CommandStreamReceiverHw::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, true, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_NE(cs.getCpuBase(), nullptr); } HWTEST_TEMPLATED_F(DrmCommandStreamTest, FlushInvalidAddress) { ::testing::InSequence inSequence; EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_USERPTR, ::testing::_)) .Times(0) .RetiresOnSaturation(); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_EXECBUFFER2, BoExecFlushEq(0u, 8u))) .Times(0) .RetiresOnSaturation(); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_WAIT, ::testing::_)) .Times(0) .RetiresOnSaturation(); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_GEM_CLOSE, ::testing::_)) .Times(0) .RetiresOnSaturation(); //allocate command buffer manually char *commandBuffer = new (std::nothrow) char[1024]; ASSERT_NE(nullptr, commandBuffer); DrmAllocation commandBufferAllocation(0, GraphicsAllocation::AllocationType::COMMAND_BUFFER, nullptr, commandBuffer, 1024, (osHandle)1u, MemoryPool::MemoryNull); LinearStream cs(&commandBufferAllocation); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); CommandStreamReceiverHw::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); delete[] commandBuffer; } HWTEST_TEMPLATED_F(DrmCommandStreamTest, FlushNotEmptyBB) { uint32_t bbUsed = 16 * sizeof(uint32_t); auto expectedSize = alignUp(bbUsed + 8, MemoryConstants::cacheLineSize); // bbUsed + bbEnd ::testing::InSequence inSequence; EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_USERPTR, ::testing::_)) .Times(1) .WillRepeatedly(::testing::Return(0)) .RetiresOnSaturation(); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_EXECBUFFER2, BoExecFlushEq(0u, expectedSize))) .Times(1) .WillRepeatedly(::testing::Return(0)) .RetiresOnSaturation(); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_WAIT, ::testing::_)) .Times(2) .RetiresOnSaturation(); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_GEM_CLOSE, ::testing::_)) .Times(1) .RetiresOnSaturation(); auto &cs = csr->getCS(); cs.getSpace(bbUsed); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); CommandStreamReceiverHw::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); } HWTEST_TEMPLATED_F(DrmCommandStreamTest, FlushNotEmptyNotPaddedBB) { uint32_t bbUsed = 15 * sizeof(uint32_t); ::testing::InSequence inSequence; EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_USERPTR, ::testing::_)) .Times(1) .WillRepeatedly(::testing::Return(0)) .RetiresOnSaturation(); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_EXECBUFFER2, BoExecFlushEq(0u, bbUsed + 4))) .Times(1) .WillRepeatedly(::testing::Return(0)) .RetiresOnSaturation(); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_WAIT, ::testing::_)) .Times(2) .RetiresOnSaturation(); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_GEM_CLOSE, ::testing::_)) .Times(1) .RetiresOnSaturation(); auto &cs = csr->getCS(); cs.getSpace(bbUsed); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); CommandStreamReceiverHw::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); } HWTEST_TEMPLATED_F(DrmCommandStreamTest, FlushNotAligned) { EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_USERPTR, ::testing::_)) .Times(1) .WillRepeatedly(::testing::Return(0)); auto &cs = csr->getCS(); auto commandBuffer = static_cast(cs.getGraphicsAllocation()); //make sure command buffer with offset is not page aligned ASSERT_NE(0u, (reinterpret_cast(commandBuffer->getUnderlyingBuffer()) + 4) & (this->alignment - 1)); ASSERT_EQ(4u, (reinterpret_cast(commandBuffer->getUnderlyingBuffer()) + 4) & 0x7F); auto expectedSize = alignUp(8u, MemoryConstants::cacheLineSize); // bbEnd EXPECT_CALL(*mock, ioctl( DRM_IOCTL_I915_GEM_EXECBUFFER2, BoExecFlushEq((reinterpret_cast(commandBuffer->getUnderlyingBuffer()) + 4) & (this->alignment - 1), expectedSize))) .Times(1) .WillRepeatedly(::testing::Return(0)); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_GEM_CLOSE, ::testing::_)) .Times(1); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_WAIT, ::testing::_)) .Times(2); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); CommandStreamReceiverHw::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); } ACTION_P(UserptrSetHandle, _set_handle) { struct drm_i915_gem_userptr *userptr = reinterpret_cast(arg1); userptr->handle = _set_handle; } MATCHER_P(GemCloseEq, handle, "") { drm_gem_close *gemClose = (drm_gem_close *)arg; return (gemClose->handle == handle); } MATCHER(BoExecFlushCheckFlags, "") { drm_i915_gem_execbuffer2 *exec2 = (drm_i915_gem_execbuffer2 *)arg; drm_i915_gem_exec_object2 *exec_objects = (drm_i915_gem_exec_object2 *)exec2->buffers_ptr; for (unsigned int i = 0; i < exec2->buffer_count; i++) { EXPECT_TRUE(exec_objects[i].flags == (EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS)); } return true; } HWTEST_TEMPLATED_F(DrmCommandStreamTest, FlushCheckFlags) { EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_USERPTR, ::testing::_)) .WillRepeatedly(::testing::Return(0)); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_GEM_CLOSE, ::testing::_)) .WillRepeatedly(::testing::Return(0)); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_WAIT, ::testing::_)) .WillRepeatedly(::testing::Return(0)); auto &cs = csr->getCS(); EXPECT_CALL(*mock, ioctl( DRM_IOCTL_I915_GEM_EXECBUFFER2, BoExecFlushCheckFlags())) .Times(1) .WillRepeatedly(::testing::Return(0)); DrmAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, (void *)0x7FFFFFFF, 1024, (osHandle)0u, MemoryPool::MemoryNull); DrmAllocation allocation2(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, (void *)0x307FFFFFFF, 1024, (osHandle)0u, MemoryPool::MemoryNull); csr->makeResident(allocation); csr->makeResident(allocation2); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); CommandStreamReceiverHw::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); } HWTEST_TEMPLATED_F(DrmCommandStreamTest, CheckDrmFree) { EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_USERPTR, ::testing::_)) .Times(1) .WillOnce(::testing::DoAll(UserptrSetHandle(17), ::testing::Return(0))); auto &cs = csr->getCS(); auto commandBuffer = static_cast(cs.getGraphicsAllocation()); //make sure command buffer with offset is not page aligned ASSERT_NE(0u, (reinterpret_cast(commandBuffer->getUnderlyingBuffer()) + 4) & (this->alignment - 1)); ASSERT_EQ(4u, (reinterpret_cast(commandBuffer->getUnderlyingBuffer()) + 4) & 0x7F); auto expectedSize = alignUp(8u, MemoryConstants::cacheLineSize); // bbEnd EXPECT_CALL(*mock, ioctl( DRM_IOCTL_I915_GEM_EXECBUFFER2, BoExecFlushEq((reinterpret_cast(commandBuffer->getUnderlyingBuffer()) + 4) & (this->alignment - 1), expectedSize))) .Times(1) .WillRepeatedly(::testing::Return(0)); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_GEM_CLOSE, GemCloseEq(17u))) .Times(1); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_WAIT, ::testing::_)) .Times(2); DrmAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, nullptr, 1024, (osHandle)0u, MemoryPool::MemoryNull); csr->makeResident(allocation); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); CommandStreamReceiverHw::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); } HWTEST_TEMPLATED_F(DrmCommandStreamTest, GIVENCSRWHENgetDMTHENNotNull) { Drm *pDrm = nullptr; if (csr->getOSInterface()) { pDrm = csr->getOSInterface()->get()->getDrm(); } ASSERT_NE(nullptr, pDrm); } HWTEST_TEMPLATED_F(DrmCommandStreamTest, CheckDrmFreeCloseFailed) { EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_USERPTR, ::testing::_)) .Times(1) .WillOnce(::testing::DoAll(UserptrSetHandle(17), ::testing::Return(0))); auto &cs = csr->getCS(); auto commandBuffer = static_cast(cs.getGraphicsAllocation()); //make sure command buffer with offset is not page aligned ASSERT_NE(0u, (reinterpret_cast(commandBuffer->getUnderlyingBuffer()) + 4) & (this->alignment - 1)); ASSERT_EQ(4u, (reinterpret_cast(commandBuffer->getUnderlyingBuffer()) + 4) & 0x7F); auto expectedSize = alignUp(8u, MemoryConstants::cacheLineSize); // bbEnd EXPECT_CALL(*mock, ioctl( DRM_IOCTL_I915_GEM_EXECBUFFER2, BoExecFlushEq((reinterpret_cast(commandBuffer->getUnderlyingBuffer()) + 4) & (this->alignment - 1), expectedSize))) .Times(1) .WillRepeatedly(::testing::Return(0)); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_GEM_CLOSE, GemCloseEq(17u))) .Times(1) .WillOnce(::testing::Return(-1)); EXPECT_CALL(*mock, ioctl(DRM_IOCTL_I915_GEM_WAIT, ::testing::_)) .Times(2); DrmAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, nullptr, 1024, (osHandle)0u, MemoryPool::MemoryNull); csr->makeResident(allocation); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); CommandStreamReceiverHw::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenDefaultDrmCSRWhenItIsCreatedThenGemCloseWorkerModeIsInactive) { EXPECT_EQ(gemCloseWorkerMode::gemCloseWorkerInactive, static_cast *>(csr)->peekGemCloseWorkerOperationMode()); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenCommandStreamWhenItIsFlushedWithGemCloseWorkerInDefaultModeThenWorkerDecreasesTheRefCount) { auto commandBuffer = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); CommandStreamReceiverHw::alignToCacheLine(cs); auto storedBase = cs.getCpuBase(); auto storedGraphicsAllocation = cs.getGraphicsAllocation(); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(cs.getCpuBase(), storedBase); EXPECT_EQ(cs.getGraphicsAllocation(), storedGraphicsAllocation); auto drmAllocation = static_cast(storedGraphicsAllocation); auto bo = drmAllocation->getBO(); //spin until gem close worker finishes execution while (bo->getRefCount() > 1) ; mm->freeGraphicsMemory(commandBuffer); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenTaskThatRequiresLargeResourceCountWhenItIsFlushedThenExecStorageIsResized) { std::vector graphicsAllocations; auto &execStorage = static_cast *>(csr)->getExecStorage(); execStorage.resize(0); for (auto id = 0; id < 10; id++) { auto graphicsAllocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); csr->makeResident(*graphicsAllocation); graphicsAllocations.push_back(graphicsAllocation); } auto commandBuffer = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); LinearStream cs(commandBuffer); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); CommandStreamReceiverHw::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(11u, this->mock->execBuffer.buffer_count); mm->freeGraphicsMemory(commandBuffer); for (auto graphicsAllocation : graphicsAllocations) { mm->freeGraphicsMemory(graphicsAllocation); } EXPECT_EQ(11u, execStorage.size()); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenGemCloseWorkerInactiveModeWhenMakeResidentIsCalledThenRefCountsAreNotUpdated) { auto dummyAllocation = static_cast(mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); auto bo = dummyAllocation->getBO(); EXPECT_EQ(1u, bo->getRefCount()); csr->makeResident(*dummyAllocation); EXPECT_EQ(1u, bo->getRefCount()); csr->processResidency(csr->getResidencyAllocations(), 0u); csr->makeNonResident(*dummyAllocation); EXPECT_EQ(1u, bo->getRefCount()); mm->freeGraphicsMemory(dummyAllocation); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, GivenTwoAllocationsWhenBackingStorageIsDifferentThenMakeResidentShouldAddTwoLocations) { auto allocation = static_cast(mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); auto allocation2 = static_cast(mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); csr->makeResident(*allocation); csr->makeResident(*allocation2); auto osContextId = csr->getOsContext().getContextId(); EXPECT_TRUE(allocation->isResident(osContextId)); EXPECT_TRUE(allocation2->isResident(osContextId)); csr->processResidency(csr->getResidencyAllocations(), 0u); EXPECT_TRUE(allocation->isResident(osContextId)); EXPECT_TRUE(allocation2->isResident(osContextId)); EXPECT_EQ(getResidencyVector().size(), 2u); csr->makeNonResident(*allocation); csr->makeNonResident(*allocation2); EXPECT_FALSE(allocation->isResident(osContextId)); EXPECT_FALSE(allocation2->isResident(osContextId)); EXPECT_EQ(getResidencyVector().size(), 0u); mm->freeGraphicsMemory(allocation); mm->freeGraphicsMemory(allocation2); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenCommandStreamWithDuplicatesWhenItIsFlushedWithGemCloseWorkerInactiveModeThenCsIsNotNulled) { auto commandBuffer = static_cast(mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); auto dummyAllocation = static_cast(mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); ASSERT_NE(nullptr, commandBuffer); ASSERT_EQ(0u, reinterpret_cast(commandBuffer->getUnderlyingBuffer()) & 0xFFF); LinearStream cs(commandBuffer); csr->makeResident(*dummyAllocation); csr->makeResident(*dummyAllocation); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); CommandStreamReceiverHw::alignToCacheLine(cs); auto storedBase = cs.getCpuBase(); auto storedGraphicsAllocation = cs.getGraphicsAllocation(); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(cs.getCpuBase(), storedBase); EXPECT_EQ(cs.getGraphicsAllocation(), storedGraphicsAllocation); mm->freeGraphicsMemory(dummyAllocation); mm->freeGraphicsMemory(commandBuffer); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenDrmCsrCreatedWithInactiveGemCloseWorkerPolicyThenThreadIsNotCreated) { TestedDrmCommandStreamReceiver testedCsr(gemCloseWorkerMode::gemCloseWorkerInactive, *this->executionEnvironment); EXPECT_EQ(gemCloseWorkerMode::gemCloseWorkerInactive, testedCsr.peekGemCloseWorkerOperationMode()); } class DrmCommandStreamBatchingTests : public DrmCommandStreamEnhancedTest { public: DrmAllocation *tagAllocation; DrmAllocation *preemptionAllocation; GraphicsAllocation *tmpAllocation; template void SetUpT() { DrmCommandStreamEnhancedTest::SetUpT(); if (PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo) == PreemptionMode::MidThread) { tmpAllocation = GlobalMockSipProgram::sipProgram->getAllocation(); GlobalMockSipProgram::sipProgram->resetAllocation(device->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); } tagAllocation = static_cast(device->getDefaultEngine().commandStreamReceiver->getTagAllocation()); preemptionAllocation = static_cast(device->getDefaultEngine().commandStreamReceiver->getPreemptionAllocation()); } template void TearDownT() { if (PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo) == PreemptionMode::MidThread) { device->getMemoryManager()->freeGraphicsMemory((GlobalMockSipProgram::sipProgram)->getAllocation()); GlobalMockSipProgram::sipProgram->resetAllocation(tmpAllocation); } DrmCommandStreamEnhancedTest::TearDownT(); } }; HWTEST_TEMPLATED_F(DrmCommandStreamBatchingTests, givenCSRWhenFlushIsCalledThenProperFlagsArePassed) { mock->reset(); auto commandBuffer = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); auto dummyAllocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); ASSERT_EQ(0u, reinterpret_cast(commandBuffer->getUnderlyingBuffer()) & 0xFFF); LinearStream cs(commandBuffer); csr->makeResident(*dummyAllocation); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); CommandStreamReceiverHw::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); int ioctlExecCnt = 1; int ioctlUserPtrCnt = 2; auto engineFlag = static_cast(csr->getOsContext()).getEngineFlag(); EXPECT_EQ(ioctlExecCnt + ioctlUserPtrCnt, this->mock->ioctl_cnt.total); EXPECT_EQ(ioctlExecCnt, this->mock->ioctl_cnt.execbuffer2); EXPECT_EQ(ioctlUserPtrCnt, this->mock->ioctl_cnt.gemUserptr); uint64_t flags = engineFlag | I915_EXEC_NO_RELOC; EXPECT_EQ(flags, this->mock->execBuffer.flags); mm->freeGraphicsMemory(dummyAllocation); mm->freeGraphicsMemory(commandBuffer); } HWTEST_TEMPLATED_F(DrmCommandStreamBatchingTests, givenCsrWhenDispatchPolicyIsSetToBatchingThenCommandBufferIsNotSubmitted) { mock->reset(); csr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); static_cast *>(csr)->overrideSubmissionAggregator(mockedSubmissionsAggregator); auto commandBuffer = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); auto dummyAllocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); ASSERT_EQ(0u, reinterpret_cast(commandBuffer->getUnderlyingBuffer()) & 0xFFF); IndirectHeap cs(commandBuffer); csr->makeResident(*dummyAllocation); csr->setTagAllocation(tagAllocation); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(device->getHardwareInfo()); csr->flushTask(cs, 0u, cs, cs, cs, 0u, dispatchFlags, *device); //make sure command buffer is recorded auto &cmdBuffers = mockedSubmissionsAggregator->peekCommandBuffers(); EXPECT_FALSE(cmdBuffers.peekIsEmpty()); EXPECT_NE(nullptr, cmdBuffers.peekHead()); //preemption allocation size_t csrSurfaceCount = (device->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0; auto recordedCmdBuffer = cmdBuffers.peekHead(); EXPECT_EQ(3u + csrSurfaceCount, recordedCmdBuffer->surfaces.size()); //try to find all allocations auto elementInVector = std::find(recordedCmdBuffer->surfaces.begin(), recordedCmdBuffer->surfaces.end(), dummyAllocation); EXPECT_NE(elementInVector, recordedCmdBuffer->surfaces.end()); elementInVector = std::find(recordedCmdBuffer->surfaces.begin(), recordedCmdBuffer->surfaces.end(), commandBuffer); EXPECT_NE(elementInVector, recordedCmdBuffer->surfaces.end()); elementInVector = std::find(recordedCmdBuffer->surfaces.begin(), recordedCmdBuffer->surfaces.end(), tagAllocation); EXPECT_NE(elementInVector, recordedCmdBuffer->surfaces.end()); EXPECT_EQ(static_cast *>(csr)->commandStream.getGraphicsAllocation(), recordedCmdBuffer->batchBuffer.commandBufferAllocation); int ioctlUserPtrCnt = 3; EXPECT_EQ(ioctlUserPtrCnt, this->mock->ioctl_cnt.total); EXPECT_EQ(ioctlUserPtrCnt, this->mock->ioctl_cnt.gemUserptr); EXPECT_EQ(0u, this->mock->execBuffer.flags); csr->flushBatchedSubmissions(); mm->freeGraphicsMemory(dummyAllocation); mm->freeGraphicsMemory(commandBuffer); } HWTEST_TEMPLATED_F(DrmCommandStreamBatchingTests, givenRecordedCommandBufferWhenItIsSubmittedThenFlushTaskIsProperlyCalled) { mock->reset(); csr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); static_cast *>(csr)->overrideSubmissionAggregator(mockedSubmissionsAggregator); auto commandBuffer = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); IndirectHeap cs(commandBuffer); csr->setTagAllocation(tagAllocation); auto &submittedCommandBuffer = csr->getCS(1024); //use some bytes submittedCommandBuffer.getSpace(4); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(device->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; csr->flushTask(cs, 0u, cs, cs, cs, 0u, dispatchFlags, *device); auto &cmdBuffers = mockedSubmissionsAggregator->peekCommandBuffers(); auto storedCommandBuffer = cmdBuffers.peekHead(); ResidencyContainer copyOfResidency = storedCommandBuffer->surfaces; copyOfResidency.push_back(storedCommandBuffer->batchBuffer.commandBufferAllocation); csr->flushBatchedSubmissions(); EXPECT_TRUE(cmdBuffers.peekIsEmpty()); auto commandBufferGraphicsAllocation = submittedCommandBuffer.getGraphicsAllocation(); EXPECT_FALSE(commandBufferGraphicsAllocation->isResident(csr->getOsContext().getContextId())); //preemption allocation size_t csrSurfaceCount = (device->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0; //validate that submited command buffer has what we want EXPECT_EQ(3u + csrSurfaceCount, this->mock->execBuffer.buffer_count); EXPECT_EQ(4u, this->mock->execBuffer.batch_start_offset); EXPECT_EQ(submittedCommandBuffer.getUsed(), this->mock->execBuffer.batch_len); drm_i915_gem_exec_object2 *exec_objects = (drm_i915_gem_exec_object2 *)this->mock->execBuffer.buffers_ptr; for (unsigned int i = 0; i < this->mock->execBuffer.buffer_count; i++) { int handle = exec_objects[i].handle; auto handleFound = false; for (auto &graphicsAllocation : copyOfResidency) { auto bo = static_cast(graphicsAllocation)->getBO(); if (bo->peekHandle() == handle) { handleFound = true; } } EXPECT_TRUE(handleFound); } int ioctlExecCnt = 1; int ioctlUserPtrCnt = 2; EXPECT_EQ(ioctlExecCnt, this->mock->ioctl_cnt.execbuffer2); EXPECT_EQ(ioctlUserPtrCnt, this->mock->ioctl_cnt.gemUserptr); EXPECT_EQ(ioctlExecCnt + ioctlUserPtrCnt, this->mock->ioctl_cnt.total); mm->freeGraphicsMemory(commandBuffer); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenDrmAllocationWhenGetBufferObjectToModifyIsCalledForAGivenHandleIdThenTheCorrespondingBufferObjectGetsModified) { auto size = 1024u; auto allocation = new DrmAllocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, nullptr, size, (osHandle)0u, MemoryPool::MemoryNull); auto &bos = allocation->getBOs(); for (auto handleId = 0u; handleId < EngineLimits::maxHandleCount; handleId++) { EXPECT_EQ(nullptr, bos[handleId]); } for (auto handleId = 0u; handleId < EngineLimits::maxHandleCount; handleId++) { allocation->getBufferObjectToModify(handleId) = this->createBO(size); } for (auto handleId = 0u; handleId < EngineLimits::maxHandleCount; handleId++) { EXPECT_NE(nullptr, bos[handleId]); } mm->freeGraphicsMemory(allocation); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, makeResident) { auto buffer = this->createBO(1024); auto allocation = new DrmAllocation(0, GraphicsAllocation::AllocationType::UNKNOWN, buffer, nullptr, buffer->peekSize(), (osHandle)0u, MemoryPool::MemoryNull); EXPECT_EQ(nullptr, allocation->getUnderlyingBuffer()); csr->makeResident(*allocation); csr->processResidency(csr->getResidencyAllocations(), 0u); EXPECT_TRUE(isResident(buffer)); EXPECT_EQ(1u, buffer->getRefCount()); csr->makeNonResident(*allocation); EXPECT_FALSE(isResident(buffer)); EXPECT_EQ(1u, buffer->getRefCount()); mm->freeGraphicsMemory(allocation); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, makeResidentOnly) { BufferObject *buffer1 = this->createBO(4096); BufferObject *buffer2 = this->createBO(4096); auto allocation1 = new DrmAllocation(0, GraphicsAllocation::AllocationType::UNKNOWN, buffer1, nullptr, buffer1->peekSize(), (osHandle)0u, MemoryPool::MemoryNull); auto allocation2 = new DrmAllocation(0, GraphicsAllocation::AllocationType::UNKNOWN, buffer2, nullptr, buffer2->peekSize(), (osHandle)0u, MemoryPool::MemoryNull); EXPECT_EQ(nullptr, allocation1->getUnderlyingBuffer()); EXPECT_EQ(nullptr, allocation2->getUnderlyingBuffer()); csr->makeResident(*allocation1); csr->makeResident(*allocation2); csr->processResidency(csr->getResidencyAllocations(), 0u); EXPECT_TRUE(isResident(buffer1)); EXPECT_TRUE(isResident(buffer2)); EXPECT_EQ(1u, buffer1->getRefCount()); EXPECT_EQ(1u, buffer2->getRefCount()); // dont call makeNonResident on allocation2, any other makeNonResident call will clean this // we want to keep all makeResident calls before flush and makeNonResident everyting after flush csr->makeNonResident(*allocation1); mm->freeGraphicsMemory(allocation1); mm->freeGraphicsMemory(allocation2); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, makeResidentTwice) { auto buffer = this->createBO(1024); auto allocation = new DrmAllocation(0, GraphicsAllocation::AllocationType::UNKNOWN, buffer, nullptr, buffer->peekSize(), (osHandle)0u, MemoryPool::MemoryNull); csr->makeResident(*allocation); csr->processResidency(csr->getResidencyAllocations(), 0u); EXPECT_TRUE(isResident(buffer)); EXPECT_EQ(1u, buffer->getRefCount()); csr->getResidencyAllocations().clear(); csr->makeResident(*allocation); csr->processResidency(csr->getResidencyAllocations(), 0u); EXPECT_TRUE(isResident(buffer)); EXPECT_EQ(1u, buffer->getRefCount()); csr->makeNonResident(*allocation); EXPECT_FALSE(isResident(buffer)); EXPECT_EQ(1u, buffer->getRefCount()); mm->freeGraphicsMemory(allocation); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, makeResidentTwiceWhenFragmentStorage) { auto ptr = (void *)0x1001; auto size = MemoryConstants::pageSize * 10; auto reqs = MockHostPtrManager::getAllocationRequirements(ptr, size); auto allocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size}, ptr); ASSERT_EQ(3u, allocation->fragmentsStorage.fragmentCount); csr->makeResident(*allocation); csr->makeResident(*allocation); csr->processResidency(csr->getResidencyAllocations(), 0u); for (int i = 0; i < maxFragmentsCount; i++) { ASSERT_EQ(allocation->fragmentsStorage.fragmentStorageData[i].cpuPtr, reqs.allocationFragments[i].allocationPtr); auto bo = allocation->fragmentsStorage.fragmentStorageData[i].osHandleStorage->bo; EXPECT_TRUE(isResident(bo)); EXPECT_EQ(1u, bo->getRefCount()); } csr->makeNonResident(*allocation); for (int i = 0; i < maxFragmentsCount; i++) { auto bo = allocation->fragmentsStorage.fragmentStorageData[i].osHandleStorage->bo; EXPECT_FALSE(isResident(bo)); EXPECT_EQ(1u, bo->getRefCount()); } mm->freeGraphicsMemory(allocation); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenFragmentedAllocationsWithResuedFragmentsWhenTheyAreMadeResidentThenFragmentsDoNotDuplicate) { mock->ioctl_expected.total = 9; //3 fragments auto ptr = (void *)0x1001; auto size = MemoryConstants::pageSize * 10; auto graphicsAllocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size}, ptr); auto offsetedPtr = (void *)((uintptr_t)ptr + size); auto size2 = MemoryConstants::pageSize - 1; auto graphicsAllocation2 = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size2}, offsetedPtr); //graphicsAllocation2 reuses one fragment from graphicsAllocation EXPECT_EQ(graphicsAllocation->fragmentsStorage.fragmentStorageData[2].residency, graphicsAllocation2->fragmentsStorage.fragmentStorageData[0].residency); csr->makeResident(*graphicsAllocation); csr->makeResident(*graphicsAllocation2); csr->processResidency(csr->getResidencyAllocations(), 0u); auto &osContext = csr->getOsContext(); EXPECT_TRUE(graphicsAllocation->fragmentsStorage.fragmentStorageData[0].residency->resident[osContext.getContextId()]); EXPECT_TRUE(graphicsAllocation->fragmentsStorage.fragmentStorageData[1].residency->resident[osContext.getContextId()]); EXPECT_TRUE(graphicsAllocation->fragmentsStorage.fragmentStorageData[2].residency->resident[osContext.getContextId()]); EXPECT_TRUE(graphicsAllocation2->fragmentsStorage.fragmentStorageData[0].residency->resident[osContext.getContextId()]); auto &residency = getResidencyVector(); EXPECT_EQ(3u, residency.size()); csr->makeSurfacePackNonResident(csr->getResidencyAllocations()); //check that each packet is not resident EXPECT_FALSE(graphicsAllocation->fragmentsStorage.fragmentStorageData[0].residency->resident[osContext.getContextId()]); EXPECT_FALSE(graphicsAllocation->fragmentsStorage.fragmentStorageData[1].residency->resident[osContext.getContextId()]); EXPECT_FALSE(graphicsAllocation->fragmentsStorage.fragmentStorageData[2].residency->resident[osContext.getContextId()]); EXPECT_FALSE(graphicsAllocation2->fragmentsStorage.fragmentStorageData[0].residency->resident[osContext.getContextId()]); EXPECT_EQ(0u, residency.size()); csr->makeResident(*graphicsAllocation); csr->makeResident(*graphicsAllocation2); csr->processResidency(csr->getResidencyAllocations(), 0u); EXPECT_TRUE(graphicsAllocation->fragmentsStorage.fragmentStorageData[0].residency->resident[osContext.getContextId()]); EXPECT_TRUE(graphicsAllocation->fragmentsStorage.fragmentStorageData[1].residency->resident[osContext.getContextId()]); EXPECT_TRUE(graphicsAllocation->fragmentsStorage.fragmentStorageData[2].residency->resident[osContext.getContextId()]); EXPECT_TRUE(graphicsAllocation2->fragmentsStorage.fragmentStorageData[0].residency->resident[osContext.getContextId()]); EXPECT_EQ(3u, residency.size()); csr->makeSurfacePackNonResident(csr->getResidencyAllocations()); EXPECT_EQ(0u, residency.size()); EXPECT_FALSE(graphicsAllocation->fragmentsStorage.fragmentStorageData[0].residency->resident[osContext.getContextId()]); EXPECT_FALSE(graphicsAllocation->fragmentsStorage.fragmentStorageData[1].residency->resident[osContext.getContextId()]); EXPECT_FALSE(graphicsAllocation->fragmentsStorage.fragmentStorageData[2].residency->resident[osContext.getContextId()]); EXPECT_FALSE(graphicsAllocation2->fragmentsStorage.fragmentStorageData[0].residency->resident[osContext.getContextId()]); mm->freeGraphicsMemory(graphicsAllocation); mm->freeGraphicsMemory(graphicsAllocation2); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, GivenAllocationCreatedFromThreeFragmentsWhenMakeResidentIsBeingCalledThenAllFragmentsAreMadeResident) { auto ptr = (void *)0x1001; auto size = MemoryConstants::pageSize * 10; auto reqs = MockHostPtrManager::getAllocationRequirements(ptr, size); auto allocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size}, ptr); ASSERT_EQ(3u, allocation->fragmentsStorage.fragmentCount); csr->makeResident(*allocation); csr->processResidency(csr->getResidencyAllocations(), 0u); for (int i = 0; i < maxFragmentsCount; i++) { ASSERT_EQ(allocation->fragmentsStorage.fragmentStorageData[i].cpuPtr, reqs.allocationFragments[i].allocationPtr); auto bo = allocation->fragmentsStorage.fragmentStorageData[i].osHandleStorage->bo; EXPECT_TRUE(isResident(bo)); EXPECT_EQ(1u, bo->getRefCount()); } csr->makeNonResident(*allocation); for (int i = 0; i < maxFragmentsCount; i++) { auto bo = allocation->fragmentsStorage.fragmentStorageData[i].osHandleStorage->bo; EXPECT_FALSE(isResident(bo)); EXPECT_EQ(1u, bo->getRefCount()); } mm->freeGraphicsMemory(allocation); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, GivenAllocationsContainingDifferentCountOfFragmentsWhenAllocationIsMadeResidentThenAllFragmentsAreMadeResident) { auto ptr = (void *)0x1001; auto size = MemoryConstants::pageSize; auto size2 = 100u; auto reqs = MockHostPtrManager::getAllocationRequirements(ptr, size); auto allocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size}, ptr); ASSERT_EQ(2u, allocation->fragmentsStorage.fragmentCount); ASSERT_EQ(2u, reqs.requiredFragmentsCount); csr->makeResident(*allocation); csr->processResidency(csr->getResidencyAllocations(), 0u); for (unsigned int i = 0; i < reqs.requiredFragmentsCount; i++) { ASSERT_EQ(allocation->fragmentsStorage.fragmentStorageData[i].cpuPtr, reqs.allocationFragments[i].allocationPtr); auto bo = allocation->fragmentsStorage.fragmentStorageData[i].osHandleStorage->bo; EXPECT_TRUE(isResident(bo)); EXPECT_EQ(1u, bo->getRefCount()); } csr->makeNonResident(*allocation); for (unsigned int i = 0; i < reqs.requiredFragmentsCount; i++) { auto bo = allocation->fragmentsStorage.fragmentStorageData[i].osHandleStorage->bo; EXPECT_FALSE(isResident(bo)); EXPECT_EQ(1u, bo->getRefCount()); } mm->freeGraphicsMemory(allocation); csr->getResidencyAllocations().clear(); auto allocation2 = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size2}, ptr); reqs = MockHostPtrManager::getAllocationRequirements(ptr, size2); ASSERT_EQ(1u, allocation2->fragmentsStorage.fragmentCount); ASSERT_EQ(1u, reqs.requiredFragmentsCount); csr->makeResident(*allocation2); csr->processResidency(csr->getResidencyAllocations(), 0u); for (unsigned int i = 0; i < reqs.requiredFragmentsCount; i++) { ASSERT_EQ(allocation2->fragmentsStorage.fragmentStorageData[i].cpuPtr, reqs.allocationFragments[i].allocationPtr); auto bo = allocation2->fragmentsStorage.fragmentStorageData[i].osHandleStorage->bo; EXPECT_TRUE(isResident(bo)); EXPECT_EQ(1u, bo->getRefCount()); } csr->makeNonResident(*allocation2); for (unsigned int i = 0; i < reqs.requiredFragmentsCount; i++) { auto bo = allocation2->fragmentsStorage.fragmentStorageData[i].osHandleStorage->bo; EXPECT_FALSE(isResident(bo)); EXPECT_EQ(1u, allocation2->fragmentsStorage.fragmentStorageData[i].osHandleStorage->bo->getRefCount()); } mm->freeGraphicsMemory(allocation2); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, GivenTwoAllocationsWhenBackingStorageIsTheSameThenMakeResidentShouldAddOnlyOneLocation) { auto ptr = (void *)0x1000; auto size = MemoryConstants::pageSize; auto ptr2 = (void *)0x1000; auto allocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size}, ptr); auto allocation2 = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size}, ptr2); csr->makeResident(*allocation); csr->makeResident(*allocation2); csr->processResidency(csr->getResidencyAllocations(), 0u); EXPECT_EQ(getResidencyVector().size(), 1u); csr->makeNonResident(*allocation); csr->makeNonResident(*allocation2); mm->freeGraphicsMemory(allocation); mm->freeGraphicsMemory(allocation2); csr->getResidencyAllocations().clear(); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, Flush) { auto &cs = csr->getCS(); auto commandBuffer = static_cast(cs.getGraphicsAllocation()); ASSERT_EQ(0u, reinterpret_cast(commandBuffer->getUnderlyingBuffer()) & 0xFFF); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); CommandStreamReceiverHw::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_NE(cs.getCpuBase(), nullptr); EXPECT_NE(cs.getGraphicsAllocation(), nullptr); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, ClearResidencyWhenFlushNotCalled) { auto allocation1 = static_cast(mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); auto allocation2 = static_cast(mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); ASSERT_NE(nullptr, allocation1); ASSERT_NE(nullptr, allocation2); EXPECT_EQ(getResidencyVector().size(), 0u); csr->makeResident(*allocation1); csr->makeResident(*allocation2); csr->processResidency(csr->getResidencyAllocations(), 0u); EXPECT_TRUE(isResident(allocation1->getBO())); EXPECT_TRUE(isResident(allocation2->getBO())); EXPECT_EQ(getResidencyVector().size(), 2u); EXPECT_EQ(allocation1->getBO()->getRefCount(), 1u); EXPECT_EQ(allocation2->getBO()->getRefCount(), 1u); // makeNonResident without flush csr->makeNonResident(*allocation1); EXPECT_EQ(getResidencyVector().size(), 0u); // everything is nonResident after first call EXPECT_FALSE(isResident(allocation1->getBO())); EXPECT_FALSE(isResident(allocation2->getBO())); EXPECT_EQ(allocation1->getBO()->getRefCount(), 1u); EXPECT_EQ(allocation2->getBO()->getRefCount(), 1u); mm->freeGraphicsMemory(allocation1); mm->freeGraphicsMemory(allocation2); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, FlushMultipleTimes) { auto &cs = csr->getCS(); auto commandBuffer = static_cast(cs.getGraphicsAllocation()); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); CommandStreamReceiverHw::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); cs.replaceBuffer(commandBuffer->getUnderlyingBuffer(), commandBuffer->getUnderlyingBufferSize()); cs.replaceGraphicsAllocation(commandBuffer); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); CommandStreamReceiverHw::alignToCacheLine(cs); BatchBuffer batchBuffer2{cs.getGraphicsAllocation(), 8, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer2, csr->getResidencyAllocations()); auto allocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, allocation); auto allocation2 = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, allocation2); csr->makeResident(*allocation); csr->makeResident(*allocation2); csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr(commandBuffer), REUSABLE_ALLOCATION); auto commandBuffer2 = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer2); cs.replaceBuffer(commandBuffer2->getUnderlyingBuffer(), commandBuffer2->getUnderlyingBufferSize()); cs.replaceGraphicsAllocation(commandBuffer2); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); CommandStreamReceiverHw::alignToCacheLine(cs); BatchBuffer batchBuffer3{cs.getGraphicsAllocation(), 16, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer3, csr->getResidencyAllocations()); csr->makeSurfacePackNonResident(csr->getResidencyAllocations()); mm->freeGraphicsMemory(allocation); mm->freeGraphicsMemory(allocation2); csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr(commandBuffer2), REUSABLE_ALLOCATION); commandBuffer2 = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer2); cs.replaceBuffer(commandBuffer2->getUnderlyingBuffer(), commandBuffer2->getUnderlyingBufferSize()); cs.replaceGraphicsAllocation(commandBuffer2); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); CommandStreamReceiverHw::alignToCacheLine(cs); BatchBuffer batchBuffer4{cs.getGraphicsAllocation(), 24, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer4, csr->getResidencyAllocations()); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, FlushNotEmptyBB) { int bbUsed = 16 * sizeof(uint32_t); auto &cs = csr->getCS(); cs.getSpace(bbUsed); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); CommandStreamReceiverHw::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, FlushNotEmptyNotPaddedBB) { int bbUsed = 15 * sizeof(uint32_t); auto &cs = csr->getCS(); cs.getSpace(bbUsed); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); CommandStreamReceiverHw::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, FlushNotAligned) { auto &cs = csr->getCS(); auto commandBuffer = static_cast(cs.getGraphicsAllocation()); //make sure command buffer with offset is not page aligned ASSERT_NE(0u, (reinterpret_cast(commandBuffer->getUnderlyingBuffer()) + 4) & 0xFFF); ASSERT_EQ(4u, (reinterpret_cast(commandBuffer->getUnderlyingBuffer()) + 4) & 0x7F); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); CommandStreamReceiverHw::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, CheckDrmFree) { auto &cs = csr->getCS(); auto commandBuffer = static_cast(cs.getGraphicsAllocation()); //make sure command buffer with offset is not page aligned ASSERT_NE(0u, (reinterpret_cast(commandBuffer->getUnderlyingBuffer()) + 4) & 0xFFF); ASSERT_EQ(4u, (reinterpret_cast(commandBuffer->getUnderlyingBuffer()) + 4) & 0x7F); auto allocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); csr->makeResident(*allocation); CommandStreamReceiverHw::addBatchBufferEnd(cs, nullptr); CommandStreamReceiverHw::alignToCacheLine(cs); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 4, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); csr->makeNonResident(*allocation); mm->freeGraphicsMemory(allocation); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, MakeResidentClearResidencyAllocationsInCommandStreamReceiver) { auto allocation1 = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); auto allocation2 = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, allocation1); ASSERT_NE(nullptr, allocation2); csr->makeResident(*allocation1); csr->makeResident(*allocation2); EXPECT_NE(0u, csr->getResidencyAllocations().size()); csr->processResidency(csr->getResidencyAllocations(), 0u); csr->makeSurfacePackNonResident(csr->getResidencyAllocations()); EXPECT_EQ(0u, csr->getResidencyAllocations().size()); mm->freeGraphicsMemory(allocation1); mm->freeGraphicsMemory(allocation2); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenMultipleMakeResidentWhenMakeNonResidentIsCalledOnlyOnceThenSurfaceIsMadeNonResident) { auto allocation1 = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, allocation1); csr->makeResident(*allocation1); csr->makeResident(*allocation1); EXPECT_NE(0u, csr->getResidencyAllocations().size()); csr->processResidency(csr->getResidencyAllocations(), 0u); csr->makeSurfacePackNonResident(csr->getResidencyAllocations()); EXPECT_EQ(0u, csr->getResidencyAllocations().size()); EXPECT_FALSE(allocation1->isResident(csr->getOsContext().getContextId())); mm->freeGraphicsMemory(allocation1); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, makeNonResidentOnMemObjectCallsDrmCSMakeNonResidentWithGraphicsAllocation) { auto allocation1 = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), 0x1000}); ASSERT_NE(nullptr, allocation1); auto &makeNonResidentResult = static_cast *>(csr)->makeNonResidentResult; csr->makeResident(*allocation1); makeNonResidentResult.called = false; makeNonResidentResult.allocation = nullptr; csr->makeNonResident(*allocation1); EXPECT_TRUE(makeNonResidentResult.called); EXPECT_EQ(allocation1, makeNonResidentResult.allocation); EXPECT_EQ(0u, csr->getEvictionAllocations().size()); mm->freeGraphicsMemory(allocation1); } class DrmMockBuffer : public Buffer { public: static DrmMockBuffer *create() { char *data = static_cast(::alignedMalloc(128, 64)); DrmAllocation *alloc = new (std::nothrow) DrmAllocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, &data, sizeof(data), (osHandle)0, MemoryPool::MemoryNull); return new DrmMockBuffer(data, 128, alloc); } ~DrmMockBuffer() override { ::alignedFree(data); delete gfxAllocation; } DrmMockBuffer(char *data, size_t size, DrmAllocation *alloc) : Buffer(nullptr, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_USE_HOST_PTR, 0, 0), CL_MEM_USE_HOST_PTR, 0, size, data, data, alloc, true, false, false), data(data), gfxAllocation(alloc) { } void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly) override { } protected: char *data; DrmAllocation *gfxAllocation; }; HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, BufferResidency) { std::unique_ptr buffer(DrmMockBuffer::create()); auto osContextId = csr->getOsContext().getContextId(); ASSERT_FALSE(buffer->getGraphicsAllocation()->isResident(osContextId)); ASSERT_GT(buffer->getSize(), 0u); //make it resident 8 times for (int c = 0; c < 8; c++) { csr->makeResident(*buffer->getGraphicsAllocation()); csr->processResidency(csr->getResidencyAllocations(), 0u); EXPECT_TRUE(buffer->getGraphicsAllocation()->isResident(osContextId)); EXPECT_EQ(buffer->getGraphicsAllocation()->getResidencyTaskCount(osContextId), csr->peekTaskCount() + 1); } csr->makeNonResident(*buffer->getGraphicsAllocation()); EXPECT_FALSE(buffer->getGraphicsAllocation()->isResident(osContextId)); csr->makeNonResident(*buffer->getGraphicsAllocation()); EXPECT_FALSE(buffer->getGraphicsAllocation()->isResident(osContextId)); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenDrmCommandStreamReceiverWhenMemoryManagerIsCreatedThenItHasHostMemoryValidationEnabledByDefault) { EXPECT_TRUE(mm->isValidateHostMemoryEnabled()); } HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTest, givenAllocationWithSingleBufferObjectWhenMakeResidentBufferObjectsIsCalledThenTheBufferObjectIsMadeResident) { auto size = 1024u; auto bo = this->createBO(size); BufferObjects bos{{bo}}; auto allocation = new DrmAllocation(0, GraphicsAllocation::AllocationType::UNKNOWN, bos, nullptr, 0u, size, MemoryPool::LocalMemory); EXPECT_EQ(bo, allocation->getBO()); makeResidentBufferObjects(allocation); EXPECT_TRUE(isResident(bo)); mm->freeGraphicsMemory(allocation); } template struct MockDrmCsr : public DrmCommandStreamReceiver { using DrmCommandStreamReceiver::DrmCommandStreamReceiver; }; HWTEST_TEMPLATED_F(DrmCommandStreamTest, givenDrmCommandStreamReceiverWhenCreatePageTableMngrIsCalledThenCreatePageTableManager) { executionEnvironment.prepareRootDeviceEnvironments(2); executionEnvironment.rootDeviceEnvironments[1]->setHwInfo(defaultHwInfo.get()); executionEnvironment.rootDeviceEnvironments[1]->initGmm(); executionEnvironment.rootDeviceEnvironments[1]->osInterface = std::make_unique(); executionEnvironment.rootDeviceEnvironments[1]->osInterface->get()->setDrm(new DrmMockCustom()); auto csr = std::make_unique>(executionEnvironment, 1, gemCloseWorkerMode::gemCloseWorkerActive); auto pageTableManager = csr->createPageTableManager(); EXPECT_EQ(executionEnvironment.rootDeviceEnvironments[1]->pageTableManager.get(), pageTableManager); } HWTEST_TEMPLATED_F(DrmCommandStreamTest, givenPageTableManagerAndMapTrueWhenUpdateAuxTableIsCalledThenItReturnsTrue) { auto mockMngr = new MockGmmPageTableMngr(); executionEnvironment.rootDeviceEnvironments[0]->pageTableManager.reset(mockMngr); auto gmm = std::make_unique(); GMM_DDI_UPDATEAUXTABLE ddiUpdateAuxTable = {}; EXPECT_CALL(*mockMngr, updateAuxTable(::testing::_)).Times(1).WillOnce(::testing::Invoke([&](const GMM_DDI_UPDATEAUXTABLE *arg) {ddiUpdateAuxTable = *arg; return GMM_SUCCESS; })); auto result = executionEnvironment.rootDeviceEnvironments[0]->pageTableManager->updateAuxTable(0, gmm.get(), true); EXPECT_EQ(ddiUpdateAuxTable.BaseGpuVA, 0ull); EXPECT_EQ(ddiUpdateAuxTable.BaseResInfo, gmm->gmmResourceInfo->peekHandle()); EXPECT_EQ(ddiUpdateAuxTable.DoNotWait, true); EXPECT_EQ(ddiUpdateAuxTable.Map, 1u); EXPECT_TRUE(result); } HWTEST_TEMPLATED_F(DrmCommandStreamTest, givenPageTableManagerAndMapFalseWhenUpdateAuxTableIsCalledThenItReturnsTrue) { auto mockMngr = new MockGmmPageTableMngr(); executionEnvironment.rootDeviceEnvironments[0]->pageTableManager.reset(mockMngr); auto gmm = std::make_unique(); GMM_DDI_UPDATEAUXTABLE ddiUpdateAuxTable = {}; EXPECT_CALL(*mockMngr, updateAuxTable(::testing::_)).Times(1).WillOnce(::testing::Invoke([&](const GMM_DDI_UPDATEAUXTABLE *arg) {ddiUpdateAuxTable = *arg; return GMM_SUCCESS; })); auto result = executionEnvironment.rootDeviceEnvironments[0]->pageTableManager->updateAuxTable(0, gmm.get(), false); EXPECT_EQ(ddiUpdateAuxTable.BaseGpuVA, 0ull); EXPECT_EQ(ddiUpdateAuxTable.BaseResInfo, gmm->gmmResourceInfo->peekHandle()); EXPECT_EQ(ddiUpdateAuxTable.DoNotWait, true); EXPECT_EQ(ddiUpdateAuxTable.Map, 0u); EXPECT_TRUE(result); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/drm_engine_info_tests.cpp000066400000000000000000000015741363734646600325110ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/os_interface/linux/drm_mock.h" #include "gtest/gtest.h" using namespace NEO; TEST(DrmTest, whenQueryingDrmThenNullIsReturnedAndNoIoctlIsCalled) { std::unique_ptr drm = std::make_unique(); EXPECT_NE(nullptr, drm); EXPECT_EQ(nullptr, drm->query(1)); EXPECT_EQ(0u, drm->ioctlCallsCount); } TEST(DrmTest, whenQueryingEngineInfoThenNoIoctlIsCalled) { std::unique_ptr drm = std::make_unique(); EXPECT_NE(nullptr, drm); drm->queryEngineInfo(); EXPECT_EQ(0u, drm->ioctlCallsCount); } TEST(EngineInfoTest, givenEngineInfoImplementationWhenDestructingThenDestructorIsCalled) { struct EngineInfoImpl : EngineInfo { ~EngineInfoImpl() override = default; }; EngineInfoImpl engineInfo; } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/drm_gem_close_worker_tests.cpp000066400000000000000000000140171363734646600335530ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/device_command_stream.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/os_interface/linux/drm_buffer_object.h" #include "shared/source/os_interface/linux/drm_gem_close_worker.h" #include "shared/source/os_interface/linux/drm_memory_manager.h" #include "shared/source/os_interface/linux/drm_memory_operations_handler.h" #include "shared/source/os_interface/linux/os_interface.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/os_interface/linux/drm_command_stream.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/os_interface/linux/device_command_stream_fixture.h" #include "test.h" #include "drm/i915_drm.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include #include #include #include using namespace NEO; class DrmMockForWorker : public Drm { public: std::mutex mutex; std::atomic gem_close_cnt; std::atomic gem_close_expected; std::atomic ioctl_caller_thread_id; DrmMockForWorker() : Drm(std::make_unique(33), *platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]) { } int ioctl(unsigned long request, void *arg) override { if (_IOC_TYPE(request) == DRM_IOCTL_BASE) { //when drm ioctl is called, try acquire mutex //main thread can hold mutex, to prevent ioctl handling std::lock_guard lock(mutex); } if (request == DRM_IOCTL_GEM_CLOSE) gem_close_cnt++; ioctl_caller_thread_id = std::this_thread::get_id(); return 0; }; }; class DrmGemCloseWorkerFixture { public: DrmGemCloseWorkerFixture() : executionEnvironment(defaultHwInfo.get()){}; //max loop count for while static const uint32_t deadCntInit = 10 * 1000 * 1000; DrmMemoryManager *mm; DrmMockForWorker *drmMock; uint32_t deadCnt = deadCntInit; void SetUp() { this->drmMock = new DrmMockForWorker; this->drmMock->gem_close_cnt = 0; this->drmMock->gem_close_expected = 0; executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment.rootDeviceEnvironments[0]->osInterface->get()->setDrm(drmMock); executionEnvironment.rootDeviceEnvironments[0]->memoryOperationsInterface = std::make_unique(); this->mm = new DrmMemoryManager(gemCloseWorkerMode::gemCloseWorkerInactive, false, false, executionEnvironment); } void TearDown() { if (this->drmMock->gem_close_expected >= 0) { EXPECT_EQ(this->drmMock->gem_close_expected, this->drmMock->gem_close_cnt); } delete this->mm; } protected: class DrmAllocationWrapper : public DrmAllocation { public: DrmAllocationWrapper(BufferObject *bo) : DrmAllocation(0, GraphicsAllocation::AllocationType::UNKNOWN, bo, nullptr, 0, (osHandle)0u, MemoryPool::MemoryNull) { } }; MockExecutionEnvironment executionEnvironment; }; typedef Test DrmGemCloseWorkerTests; TEST_F(DrmGemCloseWorkerTests, gemClose) { this->drmMock->gem_close_expected = 1; auto worker = new DrmGemCloseWorker(*mm); auto bo = new BufferObject(this->drmMock, 1, 0); worker->push(bo); delete worker; } TEST_F(DrmGemCloseWorkerTests, gemCloseExit) { this->drmMock->gem_close_expected = -1; auto worker = new DrmGemCloseWorker(*mm); auto bo = new BufferObject(this->drmMock, 1, 0); worker->push(bo); //wait for worker to complete or deadCnt drops while (!worker->isEmpty() && (deadCnt-- > 0)) pthread_yield(); //yield to another threads worker->close(false); //and check if GEM was closed EXPECT_EQ(1, this->drmMock->gem_close_cnt.load()); delete worker; } TEST_F(DrmGemCloseWorkerTests, close) { this->drmMock->gem_close_expected = -1; auto worker = new DrmGemCloseWorker(*mm); auto bo = new BufferObject(this->drmMock, 1, 0); worker->push(bo); worker->close(false); //wait for worker to complete or deadCnt drops while (!worker->isEmpty() && (deadCnt-- > 0)) pthread_yield(); //yield to another threads //and check if GEM was closed EXPECT_EQ(1, this->drmMock->gem_close_cnt.load()); delete worker; } TEST_F(DrmGemCloseWorkerTests, givenAllocationWhenAskedForUnreferenceWithForceFlagSetThenAllocationIsReleasedFromCallingThread) { this->drmMock->gem_close_expected = 1; auto worker = new DrmGemCloseWorker(*mm); auto bo = new BufferObject(this->drmMock, 1, 0); bo->reference(); worker->push(bo); auto r = mm->unreference(bo, true); EXPECT_EQ(1u, r); EXPECT_EQ(drmMock->ioctl_caller_thread_id, std::this_thread::get_id()); delete worker; } TEST_F(DrmGemCloseWorkerTests, givenDrmGemCloseWorkerWhenCloseIsCalledWithBlockingFlagThenThreadIsClosed) { struct mockDrmGemCloseWorker : DrmGemCloseWorker { using DrmGemCloseWorker::DrmGemCloseWorker; using DrmGemCloseWorker::thread; }; std::unique_ptr worker(new mockDrmGemCloseWorker(*mm)); EXPECT_NE(nullptr, worker->thread); worker->close(true); EXPECT_EQ(nullptr, worker->thread); } TEST_F(DrmGemCloseWorkerTests, givenDrmGemCloseWorkerWhenCloseIsCalledMultipleTimeWithBlockingFlagThenThreadIsClosed) { struct mockDrmGemCloseWorker : DrmGemCloseWorker { using DrmGemCloseWorker::DrmGemCloseWorker; using DrmGemCloseWorker::thread; }; std::unique_ptr worker(new mockDrmGemCloseWorker(*mm)); worker->close(true); worker->close(true); worker->close(true); EXPECT_EQ(nullptr, worker->thread); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/drm_mapper_tests.cpp000066400000000000000000000016501363734646600315100ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_engine_mapper.h" #include "test.h" #include "drm/i915_drm.h" using namespace NEO; TEST(DrmMapperTests, engineNodeMapPass) { unsigned int rcsFlag = DrmEngineMapper::engineNodeMap(aub_stream::ENGINE_RCS); unsigned int bcsFlag = DrmEngineMapper::engineNodeMap(aub_stream::ENGINE_BCS); unsigned int ccsFlag = DrmEngineMapper::engineNodeMap(aub_stream::ENGINE_CCS); unsigned int expectedRcsFlag = I915_EXEC_RENDER; unsigned int expectedBcsFlag = I915_EXEC_BLT; unsigned int expectedCcsFlag = I915_EXEC_COMPUTE; EXPECT_EQ(expectedRcsFlag, rcsFlag); EXPECT_EQ(expectedBcsFlag, bcsFlag); EXPECT_EQ(expectedCcsFlag, ccsFlag); } TEST(DrmMapperTests, engineNodeMapNegative) { EXPECT_THROW(DrmEngineMapper::engineNodeMap(aub_stream::ENGINE_VCS), std::exception); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/drm_memory_info_tests.cpp000066400000000000000000000047411363734646600325530ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/memory_info.h" #include "opencl/test/unit_test/os_interface/linux/drm_mock.h" #include "gtest/gtest.h" using namespace NEO; struct MemoryInfoImpl : public NEO::MemoryInfo { MemoryInfoImpl() {} ~MemoryInfoImpl() override{}; }; TEST(DrmTest, whenQueryingEngineInfoThenEngineInfoIsNotCreatedAndNoIoctlsAreCalled) { std::unique_ptr drm = std::make_unique(); EXPECT_NE(nullptr, drm); EXPECT_TRUE(drm->queryEngineInfo()); EXPECT_EQ(nullptr, drm->engineInfo.get()); EXPECT_EQ(0u, drm->ioctlCallsCount); } TEST(DrmTest, whenQueryingMemoryInfoThenMemoryInfoIsNotCreatedAndNoIoctlsAreCalled) { std::unique_ptr drm = std::make_unique(); EXPECT_NE(nullptr, drm); EXPECT_TRUE(drm->queryMemoryInfo()); EXPECT_EQ(nullptr, drm->memoryInfo.get()); EXPECT_EQ(0u, drm->ioctlCallsCount); } TEST(DrmTest, givenMemoryInfoWhenGetMemoryInfoIsCalledThenValidPtrIsReturned) { std::unique_ptr drm = std::make_unique(); EXPECT_NE(nullptr, drm); drm->memoryInfo.reset(new MemoryInfoImpl); EXPECT_EQ(drm->memoryInfo.get(), drm->getMemoryInfo()); } TEST(MemoryInfo, givenMemoryInfoImplementationWhenDestructingThenDestructorIsCalled) { MemoryInfoImpl memoryInfoImpl; } TEST(MemoryInfo, givenMemoryRegionIdWhenGetMemoryTypeFromRegionAndGetInstanceFromRegionAreCalledThenMemoryTypeAndInstanceAreReturned) { std::unique_ptr drm = std::make_unique(); EXPECT_NE(nullptr, drm); auto regionSmem = drm->createMemoryRegionId(0, 0); EXPECT_EQ(0u, drm->getMemoryTypeFromRegion(regionSmem)); EXPECT_EQ(0u, drm->getInstanceFromRegion(regionSmem)); auto regionLmem = drm->createMemoryRegionId(1, 0); EXPECT_EQ(1u, drm->getMemoryTypeFromRegion(regionLmem)); EXPECT_EQ(0u, drm->getInstanceFromRegion(regionLmem)); auto regionLmem1 = drm->createMemoryRegionId(1, 1); EXPECT_EQ(1u, drm->getMemoryTypeFromRegion(regionLmem1)); EXPECT_EQ(1u, drm->getInstanceFromRegion(regionLmem1)); auto regionLmem2 = drm->createMemoryRegionId(1, 2); EXPECT_EQ(1u, drm->getMemoryTypeFromRegion(regionLmem2)); EXPECT_EQ(2u, drm->getInstanceFromRegion(regionLmem2)); auto regionLmem3 = drm->createMemoryRegionId(1, 3); EXPECT_EQ(1u, drm->getMemoryTypeFromRegion(regionLmem3)); EXPECT_EQ(3u, drm->getInstanceFromRegion(regionLmem3)); } drm_memory_manager_allocate_in_device_pool_tests.cpp000066400000000000000000000105521363734646600400520ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/os_interface/linux/drm_memory_manager.h" #include "shared/source/os_interface/linux/os_interface.h" #include "opencl/test/unit_test/mocks/linux/mock_drm_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.h" #include "test.h" #include "gtest/gtest.h" using namespace NEO; using AllocationData = TestedDrmMemoryManager::AllocationData; TEST(DrmMemoryManagerSimpleTest, givenDrmMemoryManagerWhenAllocateInDevicePoolIsCalledThenNullptrAndStatusRetryIsReturned) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment.rootDeviceEnvironments[0]->osInterface->get()->setDrm(Drm::create(nullptr, *executionEnvironment.rootDeviceEnvironments[0])); TestedDrmMemoryManager memoryManager(executionEnvironment); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.size = MemoryConstants::pageSize; allocData.flags.useSystemMemory = true; allocData.flags.allocateMemory = true; auto allocation = memoryManager.allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_EQ(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::RetryInNonDevicePool, status); } TEST(DrmMemoryManagerSimpleTest, givenDrmMemoryManagerWhenLockResourceIsCalledOnNullBufferObjectThenReturnNullPtr) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); TestedDrmMemoryManager memoryManager(executionEnvironment); DrmAllocation drmAllocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, nullptr, 0u, 0u, MemoryPool::LocalMemory); auto ptr = memoryManager.lockResourceInLocalMemoryImpl(drmAllocation.getBO()); EXPECT_EQ(nullptr, ptr); memoryManager.unlockResourceInLocalMemoryImpl(drmAllocation.getBO()); } TEST(DrmMemoryManagerSimpleTest, givenDrmMemoryManagerWhenFreeGraphicsMemoryIsCalledOnAllocationWithNullBufferObjectThenEarlyReturn) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); TestedDrmMemoryManager memoryManager(executionEnvironment); auto drmAllocation = new DrmAllocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, nullptr, 0u, 0u, MemoryPool::LocalMemory); EXPECT_NE(nullptr, drmAllocation); memoryManager.freeGraphicsMemoryImpl(drmAllocation); } using DrmMemoryManagerWithLocalMemoryTest = Test; TEST_F(DrmMemoryManagerWithLocalMemoryTest, givenDrmMemoryManagerWithLocalMemoryWhenLockResourceIsCalledOnAllocationInLocalMemoryThenReturnNullPtr) { DrmAllocation drmAllocation(rootDeviceIndex, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, nullptr, 0u, 0u, MemoryPool::LocalMemory); auto ptr = memoryManager->lockResource(&drmAllocation); EXPECT_EQ(nullptr, ptr); memoryManager->unlockResource(&drmAllocation); } using DrmMemoryManagerTest = Test; TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenCopyMemoryToAllocationThenAllocationIsFilledWithCorrectData) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; std::vector dataToCopy(MemoryConstants::pageSize, 1u); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties({rootDeviceIndex, dataToCopy.size(), GraphicsAllocation::AllocationType::BUFFER}); ASSERT_NE(nullptr, allocation); auto ret = memoryManager->copyMemoryToAllocation(allocation, dataToCopy.data(), dataToCopy.size()); EXPECT_TRUE(ret); EXPECT_EQ(0, memcmp(allocation->getUnderlyingBuffer(), dataToCopy.data(), dataToCopy.size())); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenGetLocalMemoryIsCalledThenSizeOfLocalMemoryIsReturned) { EXPECT_EQ(0 * GB, memoryManager->getLocalMemorySize(rootDeviceIndex)); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp000066400000000000000000004756611363734646600332470ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "drm_memory_manager_tests.h" #include "shared/source/command_stream/device_command_stream.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/memory_manager/host_ptr_manager.h" #include "shared/source/memory_manager/memory_constants.h" #include "shared/source/memory_manager/residency.h" #include "shared/source/os_interface/linux/allocator_helper.h" #include "shared/source/os_interface/linux/drm_allocation.h" #include "shared/source/os_interface/linux/drm_buffer_object.h" #include "shared/source/os_interface/linux/drm_memory_manager.h" #include "shared/source/os_interface/linux/os_context_linux.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/helpers/ult_hw_config.h" #include "opencl/source/event/event.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/os_interface/linux/drm_command_stream.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mocks/linux/mock_drm_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_gfx_partition.h" #include "opencl/test/unit_test/mocks/mock_gmm.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "test.h" #include "drm/i915_drm.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include #include namespace NEO { AllocationProperties createAllocationProperties(uint32_t rootDeviceIndex, size_t size, bool forcePin) { MockAllocationProperties properties(rootDeviceIndex, size); properties.alignment = MemoryConstants::preferredAlignment; properties.flags.forcePin = forcePin; return properties; } typedef Test DrmMemoryManagerTest; typedef Test DrmMemoryManagerWithExplicitExpectationsTest; TEST_F(DrmMemoryManagerTest, whenCreatingDrmMemoryManagerThenSupportsMultiStorageResourcesFlagIsSetToFalse) { EXPECT_TRUE(memoryManager->supportsMultiStorageResources); } TEST_F(DrmMemoryManagerTest, GivenGraphicsAllocationWhenAddAndRemoveAllocationToHostPtrManagerThenfragmentHasCorrectValues) { void *cpuPtr = (void *)0x30000; size_t size = 0x1000; DrmAllocation gfxAllocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, cpuPtr, size, (osHandle)1u, MemoryPool::MemoryNull); memoryManager->addAllocationToHostPtrManager(&gfxAllocation); auto fragment = memoryManager->getHostPtrManager()->getFragment(gfxAllocation.getUnderlyingBuffer()); EXPECT_NE(fragment, nullptr); EXPECT_TRUE(fragment->driverAllocation); EXPECT_EQ(fragment->refCount, 1); EXPECT_EQ(fragment->refCount, 1); EXPECT_EQ(fragment->fragmentCpuPointer, cpuPtr); EXPECT_EQ(fragment->fragmentSize, size); EXPECT_NE(fragment->osInternalStorage, nullptr); EXPECT_EQ(fragment->osInternalStorage->bo, gfxAllocation.getBO()); EXPECT_NE(fragment->residency, nullptr); FragmentStorage fragmentStorage = {}; fragmentStorage.fragmentCpuPointer = cpuPtr; memoryManager->getHostPtrManager()->storeFragment(fragmentStorage); fragment = memoryManager->getHostPtrManager()->getFragment(gfxAllocation.getUnderlyingBuffer()); EXPECT_EQ(fragment->refCount, 2); fragment->driverAllocation = false; memoryManager->removeAllocationFromHostPtrManager(&gfxAllocation); fragment = memoryManager->getHostPtrManager()->getFragment(gfxAllocation.getUnderlyingBuffer()); EXPECT_EQ(fragment->refCount, 2); fragment->driverAllocation = true; memoryManager->removeAllocationFromHostPtrManager(&gfxAllocation); fragment = memoryManager->getHostPtrManager()->getFragment(gfxAllocation.getUnderlyingBuffer()); EXPECT_EQ(fragment->refCount, 1); memoryManager->removeAllocationFromHostPtrManager(&gfxAllocation); fragment = memoryManager->getHostPtrManager()->getFragment(gfxAllocation.getUnderlyingBuffer()); EXPECT_EQ(fragment, nullptr); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenforcePinAllowedWhenMemoryManagerIsCreatedThenPinBbIsCreated) { auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); EXPECT_NE(nullptr, memoryManager->pinBBs[device->getRootDeviceIndex()]); } TEST_F(DrmMemoryManagerTest, pinBBisCreated) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemClose = 1; auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); EXPECT_NE(nullptr, memoryManager->pinBBs[rootDeviceIndex]); } TEST_F(DrmMemoryManagerTest, givenNotAllowedForcePinWhenMemoryManagerIsCreatedThenPinBBIsNotCreated) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, false, *executionEnvironment)); EXPECT_EQ(nullptr, memoryManager->pinBBs[rootDeviceIndex]); } TEST_F(DrmMemoryManagerTest, pinBBnotCreatedWhenIoctlFailed) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_res = -1; auto memoryManager = new (std::nothrow) TestedDrmMemoryManager(false, true, false, *executionEnvironment); EXPECT_EQ(nullptr, memoryManager->pinBBs[rootDeviceIndex]); mock->ioctl_res = 0; delete memoryManager; } TEST_F(DrmMemoryManagerTest, pinAfterAllocateWhenAskedAndAllowedAndBigAllocation) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.execbuffer2 = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 2; auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); memoryManager->registeredEngines = EngineControlContainer{this->device->engines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } ASSERT_NE(nullptr, memoryManager->pinBBs[rootDeviceIndex]); auto alloc = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(createAllocationProperties(rootDeviceIndex, 10 * MemoryConstants::megaByte, true))); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getBO()); memoryManager->freeGraphicsMemory(alloc); } TEST_F(DrmMemoryManagerTest, whenPeekInternalHandleIsCalledThenBoIsReturend) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; mock->ioctl_expected.handleToPrimeFd = 1; mock->outputFd = 1337; auto allocation = static_cast(this->memoryManager->allocateGraphicsMemoryWithProperties(createAllocationProperties(rootDeviceIndex, 10 * MemoryConstants::pageSize, true))); ASSERT_NE(allocation->getBO(), nullptr); ASSERT_EQ(allocation->peekInternalHandle(this->memoryManager), static_cast(1337)); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerTest, givenDrmContextIdWhenAllocationIsCreatedThenPinWithPassedDrmContextId) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.execbuffer2 = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 2; auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); memoryManager->registeredEngines = EngineControlContainer{this->device->engines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } auto drmContextId = memoryManager->getDefaultDrmContextId(); ASSERT_NE(nullptr, memoryManager->pinBBs[rootDeviceIndex]); EXPECT_NE(0u, drmContextId); auto alloc = memoryManager->allocateGraphicsMemoryWithProperties(createAllocationProperties(rootDeviceIndex, memoryManager->pinThreshold, true)); EXPECT_EQ(drmContextId, mock->execBuffer.rsvd1); memoryManager->freeGraphicsMemory(alloc); } TEST_F(DrmMemoryManagerTest, doNotPinAfterAllocateWhenAskedAndAllowedButSmallAllocation) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 2; auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); ASSERT_NE(nullptr, memoryManager->pinBBs[rootDeviceIndex]); // one page is too small for early pinning auto alloc = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(createAllocationProperties(rootDeviceIndex, MemoryConstants::pageSize, true))); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getBO()); memoryManager->freeGraphicsMemory(alloc); } TEST_F(DrmMemoryManagerTest, doNotPinAfterAllocateWhenNotAskedButAllowed) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.gemClose = 2; mock->ioctl_expected.gemWait = 1; auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); memoryManager->registeredEngines = EngineControlContainer{this->device->engines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } ASSERT_NE(nullptr, memoryManager->pinBBs[rootDeviceIndex]); auto alloc = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(createAllocationProperties(rootDeviceIndex, MemoryConstants::pageSize, false))); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getBO()); memoryManager->freeGraphicsMemory(alloc); } TEST_F(DrmMemoryManagerTest, doNotPinAfterAllocateWhenAskedButNotAllowed) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; auto memoryManager = std::make_unique(false, false, false, *executionEnvironment); memoryManager->registeredEngines = EngineControlContainer{this->device->engines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } auto alloc = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(createAllocationProperties(rootDeviceIndex, MemoryConstants::pageSize, true))); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getBO()); memoryManager->freeGraphicsMemory(alloc); } // ---- HostPtr TEST_F(DrmMemoryManagerTest, pinAfterAllocateWhenAskedAndAllowedAndBigAllocationHostPtr) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.gemClose = 2; mock->ioctl_expected.execbuffer2 = 1; mock->ioctl_expected.gemWait = 1; auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); memoryManager->registeredEngines = EngineControlContainer{this->device->engines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } ASSERT_NE(nullptr, memoryManager->pinBBs[rootDeviceIndex]); allocationData.size = 10 * MB; allocationData.hostPtr = ::alignedMalloc(allocationData.size, 4096); allocationData.flags.forcePin = true; auto alloc = memoryManager->allocateGraphicsMemoryWithHostPtr(allocationData); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getBO()); memoryManager->freeGraphicsMemory(alloc); ::alignedFree(const_cast(allocationData.hostPtr)); } TEST_F(DrmMemoryManagerTest, givenSmallAllocationHostPtrAllocationWhenForcePinIsTrueThenBufferObjectIsNotPinned) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 2; auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); memoryManager->registeredEngines = EngineControlContainer{this->device->engines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } ASSERT_NE(nullptr, memoryManager->pinBBs[rootDeviceIndex]); // one page is too small for early pinning allocationData.size = 4 * 1024; allocationData.hostPtr = ::alignedMalloc(allocationData.size, 4096); allocationData.flags.forcePin = true; auto alloc = memoryManager->allocateGraphicsMemoryWithHostPtr(allocationData); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getBO()); memoryManager->freeGraphicsMemory(alloc); ::alignedFree(const_cast(allocationData.hostPtr)); } TEST_F(DrmMemoryManagerTest, doNotPinAfterAllocateWhenNotAskedButAllowedHostPtr) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 2; auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); ASSERT_NE(nullptr, memoryManager->pinBBs[rootDeviceIndex]); allocationData.size = 4 * 1024; allocationData.hostPtr = ::alignedMalloc(allocationData.size, 4096); auto alloc = memoryManager->allocateGraphicsMemoryWithHostPtr(allocationData); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getBO()); memoryManager->freeGraphicsMemory(alloc); ::alignedFree(const_cast(allocationData.hostPtr)); } TEST_F(DrmMemoryManagerTest, doNotPinAfterAllocateWhenAskedButNotAllowedHostPtr) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; auto memoryManager = std::make_unique(false, false, false, *executionEnvironment); allocationData.size = 4 * 1024; allocationData.hostPtr = ::alignedMalloc(allocationData.size, 4096); allocationData.flags.forcePin = true; auto alloc = memoryManager->allocateGraphicsMemoryWithHostPtr(allocationData); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getBO()); memoryManager->freeGraphicsMemory(alloc); ::alignedFree(const_cast(allocationData.hostPtr)); } TEST_F(DrmMemoryManagerTest, unreference) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemClose = 1; BufferObject *bo = memoryManager->allocUserptr(0, (size_t)1024, 0ul, rootDeviceIndex); ASSERT_NE(nullptr, bo); memoryManager->unreference(bo, false); } TEST_F(DrmMemoryManagerTest, UnreferenceNullPtr) { memoryManager->unreference(nullptr, false); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerCreatedWithGemCloseWorkerModeInactiveThenGemCloseWorkerIsNotCreated) { DrmMemoryManager drmMemoryManger(gemCloseWorkerMode::gemCloseWorkerInactive, false, false, *executionEnvironment); EXPECT_EQ(nullptr, drmMemoryManger.peekGemCloseWorker()); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerCreatedWithGemCloseWorkerActiveThenGemCloseWorkerIsCreated) { DrmMemoryManager drmMemoryManger(gemCloseWorkerMode::gemCloseWorkerActive, false, false, *executionEnvironment); EXPECT_NE(nullptr, drmMemoryManger.peekGemCloseWorker()); } TEST_F(DrmMemoryManagerTest, AllocateThenFree) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; auto alloc = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, MemoryConstants::pageSize})); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getBO()); EXPECT_EQ(Sharing::nonSharedResource, alloc->peekSharedHandle()); memoryManager->freeGraphicsMemory(alloc); } TEST_F(DrmMemoryManagerTest, AllocateNewFail) { mock->ioctl_expected.total = -1; //don't care InjectedFunction method = [this](size_t failureIndex) { auto ptr = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, MemoryConstants::pageSize}); if (MemoryManagement::nonfailingAllocation != failureIndex) { EXPECT_EQ(nullptr, ptr); } else { EXPECT_NE(nullptr, ptr); memoryManager->freeGraphicsMemory(ptr); } }; injectFailures(method); } TEST_F(DrmMemoryManagerTest, Allocate0Bytes) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; auto ptr = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, 0u}); ASSERT_NE(nullptr, ptr); EXPECT_NE(nullptr, ptr->getUnderlyingBuffer()); memoryManager->freeGraphicsMemory(ptr); } TEST_F(DrmMemoryManagerTest, Allocate3Bytes) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; auto ptr = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, MemoryConstants::pageSize}); ASSERT_NE(nullptr, ptr); EXPECT_NE(nullptr, ptr->getUnderlyingBuffer()); memoryManager->freeGraphicsMemory(ptr); } TEST_F(DrmMemoryManagerTest, AllocateUserptrFail) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_res = -1; auto ptr = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, MemoryConstants::pageSize}); EXPECT_EQ(nullptr, ptr); mock->ioctl_res = 0; } TEST_F(DrmMemoryManagerTest, FreeNullPtr) { memoryManager->freeGraphicsMemory(nullptr); } TEST_F(DrmMemoryManagerTest, Allocate_HostPtr) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; void *ptr = ::alignedMalloc(1024, 4096); ASSERT_NE(nullptr, ptr); auto alloc = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, false, 1024}, ptr)); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getUnderlyingBuffer()); EXPECT_EQ(ptr, alloc->getUnderlyingBuffer()); auto bo = alloc->getBO(); ASSERT_NE(nullptr, bo); EXPECT_EQ(ptr, reinterpret_cast(bo->peekAddress())); EXPECT_EQ(Sharing::nonSharedResource, alloc->peekSharedHandle()); memoryManager->freeGraphicsMemory(alloc); ::alignedFree(ptr); } TEST_F(DrmMemoryManagerTest, Allocate_HostPtr_Nullptr) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; void *ptr = nullptr; allocationData.hostPtr = nullptr; allocationData.size = MemoryConstants::pageSize; auto alloc = static_cast(memoryManager->allocateGraphicsMemoryWithHostPtr(allocationData)); ASSERT_NE(nullptr, alloc); EXPECT_EQ(ptr, alloc->getUnderlyingBuffer()); auto bo = alloc->getBO(); ASSERT_NE(nullptr, bo); EXPECT_EQ(ptr, reinterpret_cast(bo->peekAddress())); memoryManager->freeGraphicsMemory(alloc); ::alignedFree(ptr); } TEST_F(DrmMemoryManagerTest, Allocate_HostPtr_MisAligned) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; void *ptrT = ::alignedMalloc(1024, 4096); ASSERT_NE(nullptr, ptrT); void *ptr = ptrOffset(ptrT, 128); auto alloc = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, false, 1024}, ptr)); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getUnderlyingBuffer()); EXPECT_EQ(ptr, alloc->getUnderlyingBuffer()); auto bo = alloc->getBO(); ASSERT_NE(nullptr, bo); EXPECT_EQ(ptrT, reinterpret_cast(bo->peekAddress())); memoryManager->freeGraphicsMemory(alloc); ::alignedFree(ptrT); } TEST_F(DrmMemoryManagerTest, Allocate_HostPtr_UserptrFail) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_res = -1; void *ptrT = ::alignedMalloc(1024, 4096); ASSERT_NE(nullptr, ptrT); auto alloc = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, false, 1024}, ptrT); EXPECT_EQ(nullptr, alloc); ::alignedFree(ptrT); mock->ioctl_res = 0; } TEST_F(DrmMemoryManagerTest, givenDrmAllocationWhenHandleFenceCompletionThenCallBufferObjectWait) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.contextDestroy = 0; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, 1024}); memoryManager->handleFenceCompletion(allocation); mock->testIoctls(); mock->ioctl_expected.gemClose = 1; mock->ioctl_expected.gemWait = 2; memoryManager->freeGraphicsMemory(allocation); } TEST(DrmMemoryManagerTest2, givenDrmMemoryManagerWhengetSystemSharedMemoryIsCalledThenContextGetParamIsCalled) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(4u); for (auto i = 0u; i < 4u; i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } auto memoryManager = std::make_unique(false, false, false, *executionEnvironment); for (auto i = 0u; i < 4u; i++) { auto mock = new DrmMockCustom(); executionEnvironment->rootDeviceEnvironments[i]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[i]->osInterface->get()->setDrm(mock); mock->getContextParamRetValue = 16 * MemoryConstants::gigaByte; uint64_t mem = memoryManager->getSystemSharedMemory(i); mock->ioctl_expected.contextGetParam = 1; EXPECT_EQ(mock->recordedGetContextParam.param, static_cast<__u64>(I915_CONTEXT_PARAM_GTT_SIZE)); EXPECT_GT(mem, 0u); executionEnvironment->rootDeviceEnvironments[i]->osInterface.reset(); } } TEST_F(DrmMemoryManagerTest, getMaxApplicationAddress) { uint64_t maxAddr = memoryManager->getMaxApplicationAddress(); if (is64bit) { EXPECT_EQ(maxAddr, MemoryConstants::max64BitAppAddress); } else { EXPECT_EQ(maxAddr, MemoryConstants::max32BitAppAddress); } } TEST(DrmMemoryManagerTest2, getMinimumSystemSharedMemory) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(4u); for (auto i = 0u; i < 4u; i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } auto memoryManager = std::make_unique(false, false, false, *executionEnvironment); for (auto i = 0u; i < 4u; i++) { auto mock = new DrmMockCustom(); executionEnvironment->rootDeviceEnvironments[i]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[i]->osInterface->get()->setDrm(mock); auto hostMemorySize = MemoryConstants::pageSize * (uint64_t)(sysconf(_SC_PHYS_PAGES)); // gpuMemSize < hostMemSize auto gpuMemorySize = hostMemorySize - 1u; mock->getContextParamRetValue = gpuMemorySize; uint64_t systemSharedMemorySize = memoryManager->getSystemSharedMemory(i); mock->ioctl_expected.contextGetParam = 1; EXPECT_EQ(gpuMemorySize, systemSharedMemorySize); mock->ioctl_expected.contextDestroy = 0; mock->ioctl_expected.contextCreate = 0; mock->testIoctls(); // gpuMemSize > hostMemSize gpuMemorySize = hostMemorySize + 1u; mock->getContextParamRetValue = gpuMemorySize; systemSharedMemorySize = memoryManager->getSystemSharedMemory(i); mock->ioctl_expected.contextGetParam = 2; EXPECT_EQ(hostMemorySize, systemSharedMemorySize); mock->testIoctls(); executionEnvironment->rootDeviceEnvironments[i]->osInterface.reset(); } } TEST_F(DrmMemoryManagerTest, BoWaitFailure) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; BufferObject *bo = memoryManager->allocUserptr(0, (size_t)1024, 0ul, rootDeviceIndex); ASSERT_NE(nullptr, bo); mock->ioctl_res = -EIO; EXPECT_THROW(bo->wait(-1), std::exception); mock->ioctl_res = 1; memoryManager->unreference(bo, false); mock->ioctl_res = 0; } TEST_F(DrmMemoryManagerTest, NullOsHandleStorageAskedForPopulationReturnsFilledPointer) { mock->ioctl_expected.gemUserptr = 0; mock->ioctl_expected.gemWait = 0; mock->ioctl_expected.gemClose = 0; nonDefaultDrm->ioctl_expected.gemUserptr = 1; nonDefaultDrm->ioctl_expected.gemWait = 1; nonDefaultDrm->ioctl_expected.gemClose = 1; OsHandleStorage storage; storage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); storage.fragmentStorageData[0].fragmentSize = 1; memoryManager->populateOsHandles(storage, nonDefaultRootDeviceIndex); EXPECT_NE(nullptr, storage.fragmentStorageData[0].osHandleStorage); EXPECT_EQ(nullptr, storage.fragmentStorageData[1].osHandleStorage); EXPECT_EQ(nullptr, storage.fragmentStorageData[2].osHandleStorage); storage.fragmentStorageData[0].freeTheFragment = true; memoryManager->cleanOsHandles(storage, nonDefaultRootDeviceIndex); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenEnabledHostMemoryValidationWhenReadOnlyPointerCausesPinningFailWithEfaultThenPopulateOsHandlesReturnsInvalidHostPointerError) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->engines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } OsHandleStorage storage; storage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); storage.fragmentStorageData[0].fragmentSize = 1; mock->reset(); DrmMockCustom::IoctlResExt ioctlResExt = {1, -1}; mock->ioctl_res_ext = &ioctlResExt; mock->errnoValue = EFAULT; mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.execbuffer2 = 1; MemoryManager::AllocationStatus result = memoryManager->populateOsHandles(storage, 0u); EXPECT_EQ(MemoryManager::AllocationStatus::InvalidHostPointer, result); mock->testIoctls(); EXPECT_NE(nullptr, storage.fragmentStorageData[0].osHandleStorage); EXPECT_EQ(nullptr, storage.fragmentStorageData[1].osHandleStorage); EXPECT_EQ(nullptr, storage.fragmentStorageData[2].osHandleStorage); storage.fragmentStorageData[0].freeTheFragment = true; memoryManager->cleanOsHandles(storage, 0); mock->ioctl_res_ext = &mock->NONE; } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenEnabledHostMemoryValidationWhenReadOnlyPointerCausesPinningFailWithEfaultThenAlocateMemoryForNonSvmHostPtrReturnsNullptr) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->engines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } mock->reset(); size_t dummySize = 13u; DrmMockCustom::IoctlResExt ioctlResExt = {1, -1}; mock->ioctl_res_ext = &ioctlResExt; mock->errnoValue = EFAULT; mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.execbuffer2 = 1; mock->ioctl_expected.gemClose = 1; AllocationData allocationData; allocationData.size = dummySize; allocationData.hostPtr = reinterpret_cast(0x5001); allocationData.rootDeviceIndex = device->getRootDeviceIndex(); auto gfxPartition = memoryManager->getGfxPartition(device->getRootDeviceIndex()); auto allocatedPointer = gfxPartition->heapAllocate(HeapIndex::HEAP_STANDARD, dummySize); gfxPartition->freeGpuAddressRange(allocatedPointer, dummySize); auto allocation = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData); EXPECT_EQ(nullptr, allocation); mock->testIoctls(); mock->ioctl_res_ext = &mock->NONE; //make sure that partition is free size_t dummySize2 = 13u; auto allocatedPointer2 = gfxPartition->heapAllocate(HeapIndex::HEAP_STANDARD, dummySize2); EXPECT_EQ(allocatedPointer2, allocatedPointer); gfxPartition->freeGpuAddressRange(allocatedPointer, dummySize2); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenEnabledHostMemoryValidationWhenHostPtrDoesntCausePinningFailThenAlocateMemoryForNonSvmHostPtrReturnsAllocation) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->engines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } mock->reset(); DrmMockCustom::IoctlResExt ioctlResExt = {1, -1}; mock->ioctl_res_ext = &ioctlResExt; mock->errnoValue = SUCCESS; mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.execbuffer2 = 1; AllocationData allocationData; allocationData.size = 13u; allocationData.hostPtr = reinterpret_cast(0x5001); allocationData.rootDeviceIndex = device->getRootDeviceIndex(); auto allocation = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData); EXPECT_NE(nullptr, allocation); mock->testIoctls(); mock->ioctl_res_ext = &mock->NONE; memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenEnabledHostMemoryValidationWhenHostPtrPinningWithGpuRangeAsOffsetInAlocateMemoryForNonSvmHostPtr) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->engines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } mock->reset(); DrmMockCustom::IoctlResExt ioctlResExt = {1, -1}; mock->ioctl_res_ext = &ioctlResExt; mock->errnoValue = SUCCESS; mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.execbuffer2 = 1; AllocationData allocationData; allocationData.size = 13u; allocationData.hostPtr = reinterpret_cast(0x5001); allocationData.rootDeviceIndex = device->getRootDeviceIndex(); auto allocation = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData); EXPECT_NE(nullptr, allocation); EXPECT_EQ(allocation->getGpuAddress() - allocation->getAllocationOffset(), mock->execBufferBufferObjects.offset); mock->testIoctls(); mock->ioctl_res_ext = &mock->NONE; memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenEnabledHostMemoryValidationWhenPinningFailWithErrorDifferentThanEfaultThenPopulateOsHandlesReturnsError) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->engines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } OsHandleStorage storage; storage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); storage.fragmentStorageData[0].fragmentSize = 1; mock->reset(); DrmMockCustom::IoctlResExt ioctlResExt = {1, -1}; mock->ioctl_res_ext = &ioctlResExt; mock->errnoValue = ENOMEM; mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.execbuffer2 = 1; MemoryManager::AllocationStatus result = memoryManager->populateOsHandles(storage, 0u); EXPECT_EQ(MemoryManager::AllocationStatus::Error, result); mock->testIoctls(); EXPECT_NE(nullptr, storage.fragmentStorageData[0].osHandleStorage); EXPECT_EQ(nullptr, storage.fragmentStorageData[1].osHandleStorage); EXPECT_EQ(nullptr, storage.fragmentStorageData[2].osHandleStorage); storage.fragmentStorageData[0].freeTheFragment = true; memoryManager->cleanOsHandles(storage, 0); mock->ioctl_res_ext = &mock->NONE; } TEST_F(DrmMemoryManagerTest, GivenNoInputsWhenOsHandleIsCreatedThenAllBoHandlesAreInitializedAsNullPtrs) { OsHandle boHandle; EXPECT_EQ(nullptr, boHandle.bo); std::unique_ptr boHandle2(new OsHandle); EXPECT_EQ(nullptr, boHandle2->bo); } TEST_F(DrmMemoryManagerTest, GivenPointerAndSizeWhenAskedToCreateGrahicsAllocationThenGraphicsAllocationIsCreated) { OsHandleStorage handleStorage; auto ptr = reinterpret_cast(0x1000); auto ptr2 = reinterpret_cast(0x1001); auto size = MemoryConstants::pageSize; handleStorage.fragmentStorageData[0].cpuPtr = ptr; handleStorage.fragmentStorageData[1].cpuPtr = ptr2; handleStorage.fragmentStorageData[2].cpuPtr = nullptr; handleStorage.fragmentStorageData[0].fragmentSize = size; handleStorage.fragmentStorageData[1].fragmentSize = size * 2; handleStorage.fragmentStorageData[2].fragmentSize = size * 3; allocationData.size = size; allocationData.hostPtr = ptr; auto allocation = std::unique_ptr(memoryManager->createGraphicsAllocation(handleStorage, allocationData)); EXPECT_EQ(reinterpret_cast(allocation->getGpuAddress()), ptr); EXPECT_EQ(ptr, allocation->getUnderlyingBuffer()); EXPECT_EQ(size, allocation->getUnderlyingBufferSize()); EXPECT_EQ(ptr, allocation->fragmentsStorage.fragmentStorageData[0].cpuPtr); EXPECT_EQ(ptr2, allocation->fragmentsStorage.fragmentStorageData[1].cpuPtr); EXPECT_EQ(nullptr, allocation->fragmentsStorage.fragmentStorageData[2].cpuPtr); EXPECT_EQ(size, allocation->fragmentsStorage.fragmentStorageData[0].fragmentSize); EXPECT_EQ(size * 2, allocation->fragmentsStorage.fragmentStorageData[1].fragmentSize); EXPECT_EQ(size * 3, allocation->fragmentsStorage.fragmentStorageData[2].fragmentSize); EXPECT_NE(&allocation->fragmentsStorage, &handleStorage); } TEST_F(DrmMemoryManagerTest, GivenPointerAndSizeWhenAskedToCreateGrahicsAllocation64kThenNullPtr) { allocationData.size = MemoryConstants::pageSize64k; auto allocation = memoryManager->allocateGraphicsMemory64kb(allocationData); EXPECT_EQ(nullptr, allocation); } TEST_F(DrmMemoryManagerTest, givenRequiresStandard64KBHeapSetToFalseThenStandardHeapIsUsed) { const uint32_t rootDeviceIndex = 0; size_t bufferSize = 4096u; uint64_t range = memoryManager->acquireGpuRange(bufferSize, false, rootDeviceIndex, false); EXPECT_LT(GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapBase(HeapIndex::HEAP_STANDARD)), range); EXPECT_GT(GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_STANDARD)), range); } TEST_F(DrmMemoryManagerTest, givenRequiresStandard64KBHeapSetToTrueThenStandard64KBHeapIsUsed) { const uint32_t rootDeviceIndex = 0; size_t bufferSize = 4096u; uint64_t range = memoryManager->acquireGpuRange(bufferSize, false, rootDeviceIndex, true); EXPECT_LT(GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapBase(HeapIndex::HEAP_STANDARD64KB)), range); EXPECT_GT(GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_STANDARD64KB)), range); } TEST_F(DrmMemoryManagerTest, GivenShareableEnabledWhenAskedToCreateGraphicsAllocationThenValidAllocationIsReturnedAndStandard64KBHeapIsUsed) { mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemCreate = 1; mock->ioctl_expected.gemClose = 1; allocationData.size = MemoryConstants::pageSize; allocationData.flags.shareable = true; auto allocation = memoryManager->allocateShareableMemory(allocationData); EXPECT_NE(nullptr, allocation); EXPECT_NE(0u, allocation->getGpuAddress()); EXPECT_LT(GmmHelper::canonize(memoryManager->getGfxPartition(allocation->getRootDeviceIndex())->getHeapBase(HeapIndex::HEAP_STANDARD64KB)), allocation->getGpuAddress()); EXPECT_GT(GmmHelper::canonize(memoryManager->getGfxPartition(allocation->getRootDeviceIndex())->getHeapLimit(HeapIndex::HEAP_STANDARD64KB)), allocation->getGpuAddress()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerTest, GivenMisalignedHostPtrAndMultiplePagesSizeWhenAskedForGraphicsAllcoationThenItContainsAllFragmentsWithProperGpuAdrresses) { mock->ioctl_expected.gemUserptr = 3; mock->ioctl_expected.gemWait = 3; mock->ioctl_expected.gemClose = 3; auto ptr = reinterpret_cast(0x1001); auto size = MemoryConstants::pageSize * 10; auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, false, size}, ptr); auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); ASSERT_EQ(3u, hostPtrManager->getFragmentCount()); auto reqs = MockHostPtrManager::getAllocationRequirements(ptr, size); for (int i = 0; i < maxFragmentsCount; i++) { ASSERT_NE(nullptr, graphicsAllocation->fragmentsStorage.fragmentStorageData[i].osHandleStorage->bo); EXPECT_EQ(reqs.allocationFragments[i].allocationSize, graphicsAllocation->fragmentsStorage.fragmentStorageData[i].osHandleStorage->bo->peekSize()); EXPECT_EQ(reqs.allocationFragments[i].allocationPtr, reinterpret_cast(graphicsAllocation->fragmentsStorage.fragmentStorageData[i].osHandleStorage->bo->peekAddress())); } memoryManager->freeGraphicsMemory(graphicsAllocation); EXPECT_EQ(0u, hostPtrManager->getFragmentCount()); } TEST_F(DrmMemoryManagerTest, givenMemoryManagerWhenAskedFor32BitAllocationThen32BitDrmAllocationIsBeingReturned) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; auto size = 10u; memoryManager->setForce32BitAllocations(true); auto allocation = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, size, nullptr, GraphicsAllocation::AllocationType::BUFFER); EXPECT_NE(nullptr, allocation); EXPECT_NE(nullptr, allocation->getUnderlyingBuffer()); EXPECT_GE(allocation->getUnderlyingBufferSize(), size); auto address64bit = allocation->getGpuAddressToPatch(); EXPECT_LT(address64bit, MemoryConstants::max32BitAddress); EXPECT_TRUE(allocation->is32BitAllocation()); EXPECT_EQ(GmmHelper::canonize(memoryManager->getExternalHeapBaseAddress(allocation->getRootDeviceIndex())), allocation->getGpuBaseAddress()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerTest, givenMemoryManagerWhenAskedFor32BitAllocationWhenLimitedAllocationEnabledThen32BitDrmAllocationWithGpuAddrDifferentFromCpuAddrIsBeingReturned) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); auto size = 10u; memoryManager->setForce32BitAllocations(true); auto allocation = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, size, nullptr, GraphicsAllocation::AllocationType::BUFFER); EXPECT_NE(nullptr, allocation); EXPECT_NE(nullptr, allocation->getUnderlyingBuffer()); EXPECT_GE(allocation->getUnderlyingBufferSize(), size); EXPECT_NE((uint64_t)allocation->getGpuAddress(), (uint64_t)allocation->getUnderlyingBuffer()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerTest, Given32bitAllocatorWhenAskedForBufferAllocationThen32BitBufferIsReturned) { DebugManagerStateRestore dbgRestorer; mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; DebugManager.flags.Force32bitAddressing.set(true); MockContext context(device); memoryManager->setForce32BitAllocations(true); auto size = MemoryConstants::pageSize; auto retVal = CL_SUCCESS; auto buffer = Buffer::create( &context, CL_MEM_ALLOC_HOST_PTR, size, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(buffer->isMemObjZeroCopy()); auto bufferAddress = buffer->getGraphicsAllocation()->getGpuAddress(); auto baseAddress = buffer->getGraphicsAllocation()->getGpuBaseAddress(); EXPECT_LT(ptrDiff(bufferAddress, baseAddress), MemoryConstants::max32BitAddress); delete buffer; } TEST_F(DrmMemoryManagerTest, Given32bitAllocatorWhenAskedForBufferCreatedFromHostPtrThen32BitBufferIsReturned) { DebugManagerStateRestore dbgRestorer; mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; DebugManager.flags.Force32bitAddressing.set(true); MockContext context(device); memoryManager->setForce32BitAllocations(true); auto size = MemoryConstants::pageSize; void *ptr = reinterpret_cast(0x1000); auto ptrOffset = MemoryConstants::cacheLineSize; uintptr_t offsetedPtr = (uintptr_t)ptr + ptrOffset; auto retVal = CL_SUCCESS; auto buffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, size, reinterpret_cast(offsetedPtr), retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(buffer->isMemObjZeroCopy()); auto bufferAddress = buffer->getGraphicsAllocation()->getGpuAddress(); auto drmAllocation = static_cast(buffer->getGraphicsAllocation()); auto baseAddress = buffer->getGraphicsAllocation()->getGpuBaseAddress(); EXPECT_LT(ptrDiff(bufferAddress, baseAddress), MemoryConstants::max32BitAddress); EXPECT_TRUE(drmAllocation->is32BitAllocation()); auto allocationCpuPtr = drmAllocation->getUnderlyingBuffer(); auto allocationPageOffset = ptrDiff(allocationCpuPtr, alignDown(allocationCpuPtr, MemoryConstants::pageSize)); auto allocationGpuPtr = drmAllocation->getGpuAddress(); auto allocationGpuOffset = ptrDiff(allocationGpuPtr, alignDown(allocationGpuPtr, MemoryConstants::pageSize)); auto bufferObject = drmAllocation->getBO(); EXPECT_EQ(drmAllocation->getUnderlyingBuffer(), reinterpret_cast(offsetedPtr)); // Gpu address should be different EXPECT_NE(offsetedPtr, drmAllocation->getGpuAddress()); // Gpu address offset iqual to cpu offset EXPECT_EQ(allocationGpuOffset, ptrOffset); EXPECT_EQ(allocationPageOffset, ptrOffset); auto boAddress = bufferObject->peekAddress(); EXPECT_EQ(alignDown(boAddress, MemoryConstants::pageSize), boAddress); delete buffer; } TEST_F(DrmMemoryManagerTest, Given32bitAllocatorWhenAskedForBufferCreatedFrom64BitHostPtrThen32BitBufferIsReturned) { DebugManagerStateRestore dbgRestorer; { if (is32bit) { mock->ioctl_expected.total = -1; } else { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; DebugManager.flags.Force32bitAddressing.set(true); MockContext context(device); memoryManager->setForce32BitAllocations(true); auto size = MemoryConstants::pageSize; void *ptr = reinterpret_cast(0x100000000000); auto ptrOffset = MemoryConstants::cacheLineSize; uintptr_t offsetedPtr = (uintptr_t)ptr + ptrOffset; auto retVal = CL_SUCCESS; auto buffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, size, reinterpret_cast(offsetedPtr), retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(buffer->isMemObjZeroCopy()); auto bufferAddress = buffer->getGraphicsAllocation()->getGpuAddress(); auto baseAddress = buffer->getGraphicsAllocation()->getGpuBaseAddress(); EXPECT_LT(ptrDiff(bufferAddress, baseAddress), MemoryConstants::max32BitAddress); auto drmAllocation = static_cast(buffer->getGraphicsAllocation()); EXPECT_TRUE(drmAllocation->is32BitAllocation()); auto allocationCpuPtr = drmAllocation->getUnderlyingBuffer(); auto allocationPageOffset = ptrDiff(allocationCpuPtr, alignDown(allocationCpuPtr, MemoryConstants::pageSize)); auto bufferObject = drmAllocation->getBO(); EXPECT_EQ(allocationPageOffset, ptrOffset); auto boAddress = bufferObject->peekAddress(); EXPECT_EQ(alignDown(boAddress, MemoryConstants::pageSize), boAddress); delete buffer; DebugManager.flags.Force32bitAddressing.set(false); } } } TEST_F(DrmMemoryManagerTest, givenMemoryManagerWhenLimitedRangeAllocatorSetThenHeapSizeAndEndAddrCorrectlySetForGivenGpuRange) { memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); uint64_t sizeBig = 4 * MemoryConstants::megaByte + MemoryConstants::pageSize; auto gpuAddressLimitedRange = memoryManager->getGfxPartition(rootDeviceIndex)->heapAllocate(HeapIndex::HEAP_STANDARD, sizeBig); EXPECT_LT(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapBase(HeapIndex::HEAP_STANDARD), gpuAddressLimitedRange); EXPECT_GT(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_STANDARD), gpuAddressLimitedRange + sizeBig); EXPECT_EQ(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapMinimalAddress(HeapIndex::HEAP_STANDARD), gpuAddressLimitedRange); auto gpuInternal32BitAlloc = memoryManager->getGfxPartition(rootDeviceIndex)->heapAllocate(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY, sizeBig); EXPECT_LT(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapBase(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY), gpuInternal32BitAlloc); EXPECT_GT(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY), gpuInternal32BitAlloc + sizeBig); EXPECT_EQ(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapMinimalAddress(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY), gpuInternal32BitAlloc); } TEST_F(DrmMemoryManagerTest, givenMemoryManagerWhenAskedForAllocationWithAlignmentAndLimitedRangeAllocatorSetAndAcquireGpuRangeFailsThenNullIsReturned) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemClose = 1; TestedDrmMemoryManager::AllocationData allocationData; // emulate GPU address space exhaust memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); memoryManager->getGfxPartition(rootDeviceIndex)->heapInit(HeapIndex::HEAP_STANDARD, 0x0, 0x10000); // set size to something bigger than allowed space allocationData.size = 0x20000; allocationData.rootDeviceIndex = rootDeviceIndex; EXPECT_EQ(nullptr, memoryManager->allocateGraphicsMemoryWithAlignment(allocationData)); } TEST_F(DrmMemoryManagerTest, givenMemoryManagerWhenAskedFor32BitAllocationWithHostPtrAndAllocUserptrFailsThenFails) { mock->ioctl_expected.gemUserptr = 1; this->ioctlResExt = {mock->ioctl_cnt.total, -1}; mock->ioctl_res_ext = &ioctlResExt; auto size = 10u; void *host_ptr = reinterpret_cast(0x1000); memoryManager->setForce32BitAllocations(true); auto allocation = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, size, host_ptr, GraphicsAllocation::AllocationType::BUFFER); EXPECT_EQ(nullptr, allocation); mock->ioctl_res_ext = &mock->NONE; } TEST_F(DrmMemoryManagerTest, givenMemoryManagerWhenAskedFor32BitAllocationAndAllocUserptrFailsThenFails) { mock->ioctl_expected.gemUserptr = 1; this->ioctlResExt = {mock->ioctl_cnt.total, -1}; mock->ioctl_res_ext = &ioctlResExt; auto size = 10u; memoryManager->setForce32BitAllocations(true); auto allocation = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, size, nullptr, GraphicsAllocation::AllocationType::BUFFER); EXPECT_EQ(nullptr, allocation); mock->ioctl_res_ext = &mock->NONE; } TEST_F(DrmMemoryManagerTest, givenLimitedRangeAllocatorWhenAskedForInternal32BitAllocationAndAllocUserptrFailsThenFails) { mock->ioctl_expected.gemUserptr = 1; this->ioctlResExt = {mock->ioctl_cnt.total, -1}; mock->ioctl_res_ext = &ioctlResExt; auto size = 10u; memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); auto allocation = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, size, nullptr, GraphicsAllocation::AllocationType::INTERNAL_HEAP); EXPECT_EQ(nullptr, allocation); mock->ioctl_res_ext = &mock->NONE; } TEST_F(DrmMemoryManagerTest, GivenSizeAbove2GBWhenUseHostPtrAndAllocHostPtrAreCreatedThenFirstSucceedsAndSecondFails) { DebugManagerStateRestore dbgRestorer; mock->ioctl_expected.total = -1; DebugManager.flags.Force32bitAddressing.set(true); MockContext context(device); memoryManager->setForce32BitAllocations(true); size_t size = 2 * GB; void *ptr = reinterpret_cast(0x100000000000); auto retVal = CL_SUCCESS; auto buffer = Buffer::create( &context, CL_MEM_USE_HOST_PTR, size, ptr, retVal); size_t size2 = 4 * GB - MemoryConstants::pageSize; // Keep size aligned auto buffer2 = Buffer::create( &context, CL_MEM_ALLOC_HOST_PTR, size2, nullptr, retVal); EXPECT_NE(retVal, CL_SUCCESS); EXPECT_EQ(nullptr, buffer2); if (buffer) { auto bufferPtr = buffer->getGraphicsAllocation()->getGpuAddress(); EXPECT_TRUE(buffer->getGraphicsAllocation()->is32BitAllocation()); auto baseAddress = buffer->getGraphicsAllocation()->getGpuBaseAddress(); EXPECT_LT(ptrDiff(bufferPtr, baseAddress), MemoryConstants::max32BitAddress); } delete buffer; } TEST_F(DrmMemoryManagerTest, GivenSizeAbove2GBWhenAllocHostPtrAndUseHostPtrAreCreatedThenFirstSucceedsAndSecondFails) { DebugManagerStateRestore dbgRestorer; mock->ioctl_expected.total = -1; DebugManager.flags.Force32bitAddressing.set(true); MockContext context(device); memoryManager->setForce32BitAllocations(true); size_t size = 2 * GB; void *ptr = reinterpret_cast(0x100000000000); auto retVal = CL_SUCCESS; auto buffer = Buffer::create( &context, CL_MEM_ALLOC_HOST_PTR, size, nullptr, retVal); size_t size2 = 4 * GB - MemoryConstants::pageSize; // Keep size aligned auto buffer2 = Buffer::create( &context, CL_MEM_USE_HOST_PTR, size2, ptr, retVal); EXPECT_NE(retVal, CL_SUCCESS); EXPECT_EQ(nullptr, buffer2); if (buffer) { auto bufferPtr = buffer->getGraphicsAllocation()->getGpuAddress(); EXPECT_TRUE(buffer->getGraphicsAllocation()->is32BitAllocation()); auto baseAddress = buffer->getGraphicsAllocation()->getGpuBaseAddress(); EXPECT_LT(ptrDiff(bufferPtr, baseAddress), MemoryConstants::max32BitAddress); } delete buffer; } TEST_F(DrmMemoryManagerTest, givenDrmBufferWhenItIsQueriedForInternalAllocationThenBoIsReturned) { mock->ioctl_expected.total = -1; mock->outputFd = 1337; MockContext context(device); size_t size = 1u; auto retVal = CL_SUCCESS; auto buffer = Buffer::create( &context, CL_MEM_ALLOC_HOST_PTR, size, nullptr, retVal); uint64_t handle = 0llu; retVal = clGetMemObjectInfo(buffer, CL_MEM_ALLOCATION_HANDLE_INTEL, sizeof(handle), &handle, nullptr); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(static_cast(1337), handle); clReleaseMemObject(buffer); } TEST_F(DrmMemoryManagerTest, Given32BitDeviceWithMemoryManagerWhenInternalHeapIsExhaustedAndNewAllocationsIsMadeThenNullIsReturned) { DebugManagerStateRestore dbgStateRestore; DebugManager.flags.Force32bitAddressing.set(true); memoryManager->setForce32BitAllocations(true); std::unique_ptr pDevice(MockDevice::createWithNewExecutionEnvironment(nullptr)); size_t size = MemoryConstants::pageSize64k; auto alloc = memoryManager->getGfxPartition(rootDeviceIndex)->heapAllocate(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY, size); EXPECT_NE(0llu, alloc); size_t allocationSize = 4 * GB; auto graphicsAllocation = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, allocationSize, nullptr, GraphicsAllocation::AllocationType::INTERNAL_HEAP); EXPECT_EQ(nullptr, graphicsAllocation); EXPECT_TRUE(pDevice->getDeviceInfo().force32BitAddressess); } TEST_F(DrmMemoryManagerTest, GivenMemoryManagerWhenAllocateGraphicsMemoryForImageIsCalledThenProperIoctlsAreCalledAndUnmapSizeIsNonZero) { mock->ioctl_expected.gemCreate = 1; mock->ioctl_expected.gemSetTiling = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; cl_image_desc imgDesc = {}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; // tiled imgDesc.image_width = 512; imgDesc.image_height = 512; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); imgInfo.imgDesc = Image::convertDescriptor(imgDesc); imgInfo.size = 4096u; imgInfo.rowPitch = 512u; TestedDrmMemoryManager::AllocationData allocationData; allocationData.imgInfo = &imgInfo; allocationData.rootDeviceIndex = rootDeviceIndex; auto imageGraphicsAllocation = memoryManager->allocateGraphicsMemoryForImage(allocationData); ASSERT_NE(nullptr, imageGraphicsAllocation); EXPECT_NE(0u, imageGraphicsAllocation->getGpuAddress()); EXPECT_EQ(nullptr, imageGraphicsAllocation->getUnderlyingBuffer()); EXPECT_TRUE(imageGraphicsAllocation->getDefaultGmm()->resourceParams.Usage == GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_IMAGE); EXPECT_EQ(1u, this->mock->createParamsHandle); EXPECT_EQ(imgInfo.size, this->mock->createParamsSize); __u32 tilingMode = I915_TILING_Y; EXPECT_EQ(tilingMode, this->mock->setTilingMode); EXPECT_EQ(imgInfo.rowPitch, this->mock->setTilingStride); EXPECT_EQ(1u, this->mock->setTilingHandle); memoryManager->freeGraphicsMemory(imageGraphicsAllocation); } HWTEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenTiledImageWithMipCountZeroIsBeingCreatedThenallocateGraphicsMemoryForImageIsUsed) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } mock->ioctl_expected.gemCreate = 1; mock->ioctl_expected.gemSetTiling = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; MockContext context(device); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 64u; imageDesc.image_height = 64u; auto retVal = CL_SUCCESS; cl_mem_flags flags = CL_MEM_WRITE_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); std::unique_ptr dstImage(Image::create(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, dstImage); auto imageGraphicsAllocation = dstImage->getGraphicsAllocation(); ASSERT_NE(nullptr, imageGraphicsAllocation); EXPECT_TRUE(imageGraphicsAllocation->getDefaultGmm()->resourceParams.Usage == GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_IMAGE); DrmAllocation *drmAllocation = static_cast(imageGraphicsAllocation); auto imageSize = drmAllocation->getUnderlyingBufferSize(); auto rowPitch = dstImage->getImageDesc().image_row_pitch; EXPECT_EQ(1u, this->mock->createParamsHandle); EXPECT_EQ(imageSize, this->mock->createParamsSize); __u32 tilingMode = I915_TILING_Y; EXPECT_EQ(tilingMode, this->mock->setTilingMode); EXPECT_EQ(rowPitch, this->mock->setTilingStride); EXPECT_EQ(1u, this->mock->setTilingHandle); } HWTEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenTiledImageWithMipCountNonZeroIsBeingCreatedThenallocateGraphicsMemoryForImageIsUsed) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } mock->ioctl_expected.gemCreate = 1; mock->ioctl_expected.gemSetTiling = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; MockContext context(device); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 64u; imageDesc.image_height = 64u; imageDesc.num_mip_levels = 1u; auto retVal = CL_SUCCESS; cl_mem_flags flags = CL_MEM_WRITE_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); std::unique_ptr dstImage(Image::create(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, dstImage); EXPECT_EQ(static_cast(imageDesc.num_mip_levels), dstImage->peekMipCount()); auto imageGraphicsAllocation = dstImage->getGraphicsAllocation(); ASSERT_NE(nullptr, imageGraphicsAllocation); EXPECT_TRUE(imageGraphicsAllocation->getDefaultGmm()->resourceParams.Usage == GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_IMAGE); DrmAllocation *drmAllocation = static_cast(imageGraphicsAllocation); auto imageSize = drmAllocation->getUnderlyingBufferSize(); auto rowPitch = dstImage->getImageDesc().image_row_pitch; EXPECT_EQ(1u, this->mock->createParamsHandle); EXPECT_EQ(imageSize, this->mock->createParamsSize); __u32 tilingMode = I915_TILING_Y; EXPECT_EQ(tilingMode, this->mock->setTilingMode); EXPECT_EQ(rowPitch, this->mock->setTilingStride); EXPECT_EQ(1u, this->mock->setTilingHandle); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenTiledImageIsBeingCreatedAndAllocationFailsThenReturnNullptr) { MockContext context(device); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 64u; imageDesc.image_height = 64u; auto retVal = CL_SUCCESS; InjectedFunction method = [&](size_t failureIndex) { cl_mem_flags flags = CL_MEM_WRITE_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); std::unique_ptr dstImage(Image::create(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_NE(nullptr, dstImage.get()); } else { EXPECT_EQ(nullptr, dstImage.get()); } }; injectFailures(method); mock->reset(); } HWTEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenTiledImageIsBeingCreatedFromHostPtrThenAllocateGraphicsMemoryForImageIsUsed) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } mock->ioctl_expected.gemCreate = 1; mock->ioctl_expected.gemSetTiling = 1; mock->ioctl_expected.gemWait = 2; mock->ioctl_expected.gemClose = 2; mock->ioctl_expected.gemUserptr = 1; // builtins kernels mock->ioctl_expected.gemUserptr += 5; // command buffers mock->ioctl_expected.gemUserptr += 2; additionalDestroyDeviceIoctls.gemClose += 2; additionalDestroyDeviceIoctls.gemWait += 2; // indirect heaps mock->ioctl_expected.gemUserptr += 3; additionalDestroyDeviceIoctls.gemClose += 3; additionalDestroyDeviceIoctls.gemWait += 3; if (device->getDefaultEngine().commandStreamReceiver->peekTimestampPacketWriteEnabled()) { mock->ioctl_expected.gemUserptr++; additionalDestroyDeviceIoctls.gemClose++; additionalDestroyDeviceIoctls.gemWait++; } MockContext context(device); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 64u; imageDesc.image_height = 64u; auto data = alignedMalloc(64u * 64u * 4 * 8, MemoryConstants::pageSize); auto retVal = CL_SUCCESS; cl_mem_flags flags = CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); std::unique_ptr dstImage(Image::create(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, data, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, dstImage); auto imageGraphicsAllocation = dstImage->getGraphicsAllocation(); ASSERT_NE(nullptr, imageGraphicsAllocation); EXPECT_TRUE(imageGraphicsAllocation->getDefaultGmm()->resourceParams.Usage == GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_IMAGE); DrmAllocation *drmAllocation = static_cast(imageGraphicsAllocation); auto imageSize = drmAllocation->getUnderlyingBufferSize(); auto rowPitch = dstImage->getImageDesc().image_row_pitch; EXPECT_EQ(1u, this->mock->createParamsHandle); EXPECT_EQ(imageSize, this->mock->createParamsSize); __u32 tilingMode = I915_TILING_Y; EXPECT_EQ(tilingMode, this->mock->setTilingMode); EXPECT_EQ(rowPitch, this->mock->setTilingStride); EXPECT_EQ(1u, this->mock->setTilingHandle); alignedFree(data); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenMemoryAllocatedForImageThenUnmapSizeCorrectlySetWhenLimitedRangeAllocationUsedOrNotUsed) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.gemWait = 2; mock->ioctl_expected.gemClose = 2; MockContext context(device); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 64u; auto data = alignedMalloc(64u * 4 * 8, MemoryConstants::pageSize); auto retVal = CL_SUCCESS; cl_mem_flags flags = CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); std::unique_ptr dstImage(Image::create(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, data, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, dstImage); auto imageGraphicsAllocation = dstImage->getGraphicsAllocation(); ASSERT_NE(nullptr, imageGraphicsAllocation); alignedFree(data); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenNonTiledImgWithMipCountZeroisBeingCreatedThenAllocateGraphicsMemoryIsUsed) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.gemWait = 2; mock->ioctl_expected.gemClose = 2; MockContext context(device); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 64u; auto data = alignedMalloc(64u * 4 * 8, MemoryConstants::pageSize); auto retVal = CL_SUCCESS; cl_mem_flags flags = CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); std::unique_ptr dstImage(Image::create(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, data, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, dstImage); auto imageGraphicsAllocation = dstImage->getGraphicsAllocation(); ASSERT_NE(nullptr, imageGraphicsAllocation); EXPECT_TRUE(imageGraphicsAllocation->getDefaultGmm()->resourceParams.Usage == GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_IMAGE); EXPECT_EQ(0u, this->mock->createParamsHandle); EXPECT_EQ(0u, this->mock->createParamsSize); __u32 tilingMode = I915_TILING_NONE; EXPECT_EQ(tilingMode, this->mock->setTilingMode); EXPECT_EQ(0u, this->mock->setTilingStride); EXPECT_EQ(0u, this->mock->setTilingHandle); EXPECT_EQ(Sharing::nonSharedResource, imageGraphicsAllocation->peekSharedHandle()); alignedFree(data); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenNonTiledImgWithMipCountNonZeroisBeingCreatedThenAllocateGraphicsMemoryIsUsed) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; MockContext context(device); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 64u; imageDesc.num_mip_levels = 1u; auto data = alignedMalloc(64u * 4 * 8, MemoryConstants::pageSize); auto retVal = CL_SUCCESS; cl_mem_flags flags = CL_MEM_WRITE_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); std::unique_ptr dstImage(Image::create(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, data, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, dstImage); EXPECT_EQ(static_cast(imageDesc.num_mip_levels), dstImage->peekMipCount()); auto imageGraphicsAllocation = dstImage->getGraphicsAllocation(); ASSERT_NE(nullptr, imageGraphicsAllocation); EXPECT_TRUE(imageGraphicsAllocation->getDefaultGmm()->resourceParams.Usage == GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_IMAGE); EXPECT_EQ(0u, this->mock->createParamsHandle); EXPECT_EQ(0u, this->mock->createParamsSize); __u32 tilingMode = I915_TILING_NONE; EXPECT_EQ(tilingMode, this->mock->setTilingMode); EXPECT_EQ(0u, this->mock->setTilingStride); EXPECT_EQ(0u, this->mock->setTilingHandle); EXPECT_EQ(Sharing::nonSharedResource, imageGraphicsAllocation->peekSharedHandle()); alignedFree(data); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhen1DarrayImageIsBeingCreatedFromHostPtrThenTilingIsNotCalled) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.gemWait = 2; mock->ioctl_expected.gemClose = 2; MockContext context(device); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 64u; auto data = alignedMalloc(64u * 4 * 8, MemoryConstants::pageSize); auto retVal = CL_SUCCESS; cl_mem_flags flags = CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); std::unique_ptr dstImage(Image::create(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, data, retVal)); auto imageGraphicsAllocation = dstImage->getGraphicsAllocation(); ASSERT_NE(nullptr, imageGraphicsAllocation); EXPECT_EQ(0u, this->mock->createParamsHandle); EXPECT_EQ(0u, this->mock->createParamsSize); __u32 tilingMode = I915_TILING_NONE; EXPECT_EQ(tilingMode, this->mock->setTilingMode); EXPECT_EQ(0u, this->mock->setTilingStride); EXPECT_EQ(0u, this->mock->setTilingHandle); EXPECT_EQ(Sharing::nonSharedResource, imageGraphicsAllocation->peekSharedHandle()); alignedFree(data); } TEST_F(DrmMemoryManagerTest, givenHostPointerNotRequiringCopyWhenAllocateGraphicsMemoryForImageIsCalledThenGraphicsAllocationIsReturned) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; cl_image_desc imgDesc = {}; imgDesc.image_width = MemoryConstants::pageSize; imgDesc.image_height = 1; imgDesc.image_type = CL_MEM_OBJECT_IMAGE1D; cl_image_format imageFormat = {}; imageFormat.image_channel_data_type = CL_UNSIGNED_INT8; imageFormat.image_channel_order = CL_R; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; MockContext context(device); auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, surfaceFormat); imgInfo.rowPitch = imgDesc.image_width * surfaceFormat->surfaceFormat.ImageElementSizeInBytes; imgInfo.slicePitch = imgInfo.rowPitch * imgDesc.image_height; imgInfo.size = imgInfo.slicePitch; imgInfo.linearStorage = true; auto hostPtr = alignedMalloc(imgDesc.image_width * imgDesc.image_height * 4, MemoryConstants::pageSize); bool copyRequired = MockMemoryManager::isCopyRequired(imgInfo, hostPtr); EXPECT_FALSE(copyRequired); TestedDrmMemoryManager::AllocationData allocationData; allocationData.imgInfo = &imgInfo; allocationData.hostPtr = hostPtr; allocationData.rootDeviceIndex = rootDeviceIndex; auto imageAllocation = memoryManager->allocateGraphicsMemoryForImage(allocationData); ASSERT_NE(nullptr, imageAllocation); EXPECT_EQ(hostPtr, imageAllocation->getUnderlyingBuffer()); memoryManager->freeGraphicsMemory(imageAllocation); alignedFree(hostPtr); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerAndOsHandleWhenCreateIsCalledThenGraphicsAllocationIsReturned) { mock->ioctl_expected.primeFdToHandle = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; osHandle handle = 1u; this->mock->outputHandle = 2u; size_t size = 4096u; AllocationProperties properties(rootDeviceIndex, false, size, GraphicsAllocation::AllocationType::SHARED_BUFFER, false); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, false); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_NE(nullptr, graphicsAllocation->getUnderlyingBuffer()); EXPECT_EQ(size, graphicsAllocation->getUnderlyingBufferSize()); EXPECT_EQ(this->mock->inputFd, (int)handle); EXPECT_EQ(this->mock->setTilingHandle, 0u); DrmAllocation *drmAllocation = static_cast(graphicsAllocation); auto bo = drmAllocation->getBO(); EXPECT_EQ(bo->peekHandle(), (int)this->mock->outputHandle); EXPECT_NE(0llu, bo->peekAddress()); EXPECT_EQ(1u, bo->getRefCount()); EXPECT_EQ(size, bo->peekSize()); EXPECT_EQ(handle, graphicsAllocation->peekSharedHandle()); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerAndOsHandleWhenCreateIsCalledAndRootDeviceIndexIsSpecifiedThenGraphicsAllocationIsReturnedWithCorrectRootDeviceIndex) { mock->ioctl_expected.primeFdToHandle = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; osHandle handle = 1u; this->mock->outputHandle = 2u; size_t size = 4096u; AllocationProperties properties(rootDeviceIndex, false, size, GraphicsAllocation::AllocationType::SHARED_BUFFER, false, false, 0u); ASSERT_TRUE(properties.subDevicesBitfield.none()); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, false); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_EQ(rootDeviceIndex, graphicsAllocation->getRootDeviceIndex()); EXPECT_NE(nullptr, graphicsAllocation->getUnderlyingBuffer()); EXPECT_EQ(size, graphicsAllocation->getUnderlyingBufferSize()); EXPECT_EQ(this->mock->inputFd, (int)handle); EXPECT_EQ(this->mock->setTilingHandle, 0u); DrmAllocation *drmAllocation = static_cast(graphicsAllocation); auto bo = drmAllocation->getBO(); EXPECT_EQ(bo->peekHandle(), (int)this->mock->outputHandle); EXPECT_NE(0llu, bo->peekAddress()); EXPECT_EQ(1u, bo->getRefCount()); EXPECT_EQ(size, bo->peekSize()); EXPECT_EQ(handle, graphicsAllocation->peekSharedHandle()); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(DrmMemoryManagerTest, givenOsHandleWithNonTiledObjectWhenCreateFromSharedHandleIsCalledThenNonTiledGmmIsCreatedAndSetInAllocation) { mock->ioctl_expected.primeFdToHandle = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; mock->ioctl_expected.gemGetTiling = 1; mock->getTilingModeOut = I915_TILING_NONE; osHandle handle = 1u; uint32_t boHandle = 2u; mock->outputHandle = boHandle; cl_mem_flags flags = CL_MEM_READ_ONLY; cl_image_desc imgDesc = {}; cl_image_format gmmImgFormat = {CL_NV12_INTEL, CL_UNORM_INT8}; const ClSurfaceFormatInfo *gmmSurfaceFormat = nullptr; ImageInfo imgInfo = {}; imgDesc.image_width = 4; imgDesc.image_height = 4; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imgInfo.imgDesc = Image::convertDescriptor(imgDesc); MockContext context(device); gmmSurfaceFormat = Image::getSurfaceFormatFromTable(flags, &gmmImgFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); imgInfo.surfaceFormat = &gmmSurfaceFormat->surfaceFormat; imgInfo.plane = GMM_PLANE_Y; AllocationProperties properties(rootDeviceIndex, false, imgInfo, GraphicsAllocation::AllocationType::SHARED_IMAGE); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, false); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_EQ(boHandle, mock->getTilingHandleIn); EXPECT_EQ(GraphicsAllocation::AllocationType::SHARED_IMAGE, graphicsAllocation->getAllocationType()); auto gmm = graphicsAllocation->getDefaultGmm(); ASSERT_NE(nullptr, gmm); EXPECT_EQ(1u, gmm->resourceParams.Flags.Info.Linear); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.TiledY); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(DrmMemoryManagerTest, givenOsHandleWithTileYObjectWhenCreateFromSharedHandleIsCalledThenTileYGmmIsCreatedAndSetInAllocation) { mock->ioctl_expected.primeFdToHandle = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; mock->ioctl_expected.gemGetTiling = 1; mock->getTilingModeOut = I915_TILING_Y; osHandle handle = 1u; uint32_t boHandle = 2u; mock->outputHandle = boHandle; cl_mem_flags flags = CL_MEM_READ_ONLY; cl_image_desc imgDesc = {}; cl_image_format gmmImgFormat = {CL_NV12_INTEL, CL_UNORM_INT8}; const ClSurfaceFormatInfo *gmmSurfaceFormat = nullptr; ImageInfo imgInfo = {}; imgDesc.image_width = 4; imgDesc.image_height = 4; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imgInfo.imgDesc = Image::convertDescriptor(imgDesc); MockContext context(device); gmmSurfaceFormat = Image::getSurfaceFormatFromTable(flags, &gmmImgFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); imgInfo.surfaceFormat = &gmmSurfaceFormat->surfaceFormat; imgInfo.plane = GMM_PLANE_Y; AllocationProperties properties(rootDeviceIndex, false, imgInfo, GraphicsAllocation::AllocationType::SHARED_IMAGE); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, false); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_EQ(boHandle, mock->getTilingHandleIn); EXPECT_EQ(GraphicsAllocation::AllocationType::SHARED_IMAGE, graphicsAllocation->getAllocationType()); auto gmm = graphicsAllocation->getDefaultGmm(); ASSERT_NE(nullptr, gmm); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.Linear); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenCreateFromSharedHandleFailsToCallGetTilingThenNonLinearStorageIsAssumed) { mock->ioctl_expected.primeFdToHandle = 1; mock->ioctl_expected.gemGetTiling = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; this->ioctlResExt = {mock->ioctl_cnt.total + 1, -1}; mock->ioctl_res_ext = &ioctlResExt; osHandle handle = 1u; uint32_t boHandle = 2u; mock->outputHandle = boHandle; cl_mem_flags flags = CL_MEM_READ_ONLY; cl_image_desc imgDesc = {}; cl_image_format gmmImgFormat = {CL_NV12_INTEL, CL_UNORM_INT8}; const ClSurfaceFormatInfo *gmmSurfaceFormat = nullptr; ImageInfo imgInfo = {}; imgDesc.image_width = 4; imgDesc.image_height = 4; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imgInfo.imgDesc = Image::convertDescriptor(imgDesc); MockContext context(device); gmmSurfaceFormat = Image::getSurfaceFormatFromTable(flags, &gmmImgFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); imgInfo.surfaceFormat = &gmmSurfaceFormat->surfaceFormat; imgInfo.plane = GMM_PLANE_Y; AllocationProperties properties(rootDeviceIndex, false, imgInfo, GraphicsAllocation::AllocationType::SHARED_IMAGE); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, false); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_EQ(boHandle, mock->getTilingHandleIn); EXPECT_EQ(GraphicsAllocation::AllocationType::SHARED_IMAGE, graphicsAllocation->getAllocationType()); auto gmm = graphicsAllocation->getDefaultGmm(); ASSERT_NE(nullptr, gmm); EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.Linear); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerAndOsHandleWhenAllocationFailsThenReturnNullPtr) { osHandle handle = 1u; InjectedFunction method = [this, &handle](size_t failureIndex) { AllocationProperties properties(rootDeviceIndex, false, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::SHARED_BUFFER, false); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, false); if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_NE(nullptr, graphicsAllocation); memoryManager->freeGraphicsMemory(graphicsAllocation); } else { EXPECT_EQ(nullptr, graphicsAllocation); } }; injectFailures(method); mock->reset(); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerAndThreeOsHandlesWhenReuseCreatesAreCalledThenGraphicsAllocationsAreReturned) { mock->ioctl_expected.primeFdToHandle = 3; mock->ioctl_expected.gemWait = 3; mock->ioctl_expected.gemClose = 2; osHandle handles[] = {1u, 2u, 3u}; size_t size = 4096u; GraphicsAllocation *graphicsAllocations[3]; DrmAllocation *drmAllocation; BufferObject *bo; unsigned int expectedRefCount; this->mock->outputHandle = 2u; for (unsigned int i = 0; i < 3; ++i) { expectedRefCount = i < 2 ? i + 1 : 1; if (i == 2) this->mock->outputHandle = 3u; AllocationProperties properties(rootDeviceIndex, false, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::SHARED_BUFFER, false); graphicsAllocations[i] = memoryManager->createGraphicsAllocationFromSharedHandle(handles[i], properties, false); //Clang-tidy false positive WA if (graphicsAllocations[i] == nullptr) { ASSERT_FALSE(true); continue; } ASSERT_NE(nullptr, graphicsAllocations[i]); EXPECT_NE(nullptr, graphicsAllocations[i]->getUnderlyingBuffer()); EXPECT_EQ(size, graphicsAllocations[i]->getUnderlyingBufferSize()); EXPECT_EQ(this->mock->inputFd, (int)handles[i]); EXPECT_EQ(this->mock->setTilingHandle, 0u); drmAllocation = static_cast(graphicsAllocations[i]); bo = drmAllocation->getBO(); EXPECT_EQ(bo->peekHandle(), (int)this->mock->outputHandle); EXPECT_NE(0llu, bo->peekAddress()); EXPECT_EQ(expectedRefCount, bo->getRefCount()); EXPECT_EQ(size, bo->peekSize()); EXPECT_EQ(handles[i], graphicsAllocations[i]->peekSharedHandle()); } for (const auto &it : graphicsAllocations) { //Clang-tidy false positive WA if (it != nullptr) memoryManager->freeGraphicsMemory(it); } } TEST_F(DrmMemoryManagerTest, given32BitAddressingWhenBufferFromSharedHandleAndBitnessRequiredIsCreatedThenItis32BitAllocation) { mock->ioctl_expected.primeFdToHandle = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; memoryManager->setForce32BitAllocations(true); osHandle handle = 1u; this->mock->outputHandle = 2u; AllocationProperties properties(rootDeviceIndex, false, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::SHARED_BUFFER, false); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, true); auto drmAllocation = static_cast(graphicsAllocation); EXPECT_TRUE(graphicsAllocation->is32BitAllocation()); EXPECT_EQ(1, lseekCalledCount); EXPECT_EQ(GmmHelper::canonize(memoryManager->getExternalHeapBaseAddress(graphicsAllocation->getRootDeviceIndex())), drmAllocation->getGpuBaseAddress()); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(DrmMemoryManagerTest, given32BitAddressingWhenBufferFromSharedHandleIsCreatedAndDoesntRequireBitnessThenItIsNot32BitAllocation) { mock->ioctl_expected.primeFdToHandle = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; memoryManager->setForce32BitAllocations(true); osHandle handle = 1u; this->mock->outputHandle = 2u; AllocationProperties properties(rootDeviceIndex, false, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::SHARED_BUFFER, false); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, false); auto drmAllocation = static_cast(graphicsAllocation); EXPECT_FALSE(graphicsAllocation->is32BitAllocation()); EXPECT_EQ(1, lseekCalledCount); EXPECT_EQ(0llu, drmAllocation->getGpuBaseAddress()); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(DrmMemoryManagerTest, givenLimitedRangeAllocatorWhenBufferFromSharedHandleIsCreatedThenItIsLimitedRangeAllocation) { mock->ioctl_expected.primeFdToHandle = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); osHandle handle = 1u; this->mock->outputHandle = 2u; AllocationProperties properties(rootDeviceIndex, false, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::SHARED_BUFFER, false); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, false); EXPECT_FALSE(graphicsAllocation->is32BitAllocation()); auto drmAllocation = static_cast(graphicsAllocation); EXPECT_EQ(0llu, drmAllocation->getGpuBaseAddress()); EXPECT_EQ(1, lseekCalledCount); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(DrmMemoryManagerTest, givenNon32BitAddressingWhenBufferFromSharedHandleIsCreatedAndDRequireBitnessThenItIsNot32BitAllocation) { mock->ioctl_expected.primeFdToHandle = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; memoryManager->setForce32BitAllocations(false); osHandle handle = 1u; this->mock->outputHandle = 2u; AllocationProperties properties(rootDeviceIndex, false, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::SHARED_BUFFER, false); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, true); auto drmAllocation = static_cast(graphicsAllocation); EXPECT_FALSE(graphicsAllocation->is32BitAllocation()); EXPECT_EQ(1, lseekCalledCount); EXPECT_EQ(0llu, drmAllocation->getGpuBaseAddress()); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(DrmMemoryManagerTest, givenSharedHandleWhenAllocationIsCreatedAndIoctlPrimeFdToHandleFailsThenNullPtrIsReturned) { mock->ioctl_expected.primeFdToHandle = 1; this->ioctlResExt = {mock->ioctl_cnt.total, -1}; mock->ioctl_res_ext = &this->ioctlResExt; osHandle handle = 1u; this->mock->outputHandle = 2u; AllocationProperties properties(rootDeviceIndex, false, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::SHARED_BUFFER, false); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(handle, properties, false); EXPECT_EQ(nullptr, graphicsAllocation); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(DrmMemoryManagerTest, givenTwoGraphicsAllocationsThatShareTheSameBufferObjectWhenTheyAreMadeResidentThenOnlyOneBoIsPassedToExec) { auto testedCsr = new TestedDrmCommandStreamReceiver(*executionEnvironment); device->resetCommandStreamReceiver(testedCsr); mock->reset(); mock->ioctl_expected.primeFdToHandle = 2; mock->ioctl_expected.gemClose = 1; mock->ioctl_expected.gemWait = 2; osHandle sharedHandle = 1u; AllocationProperties properties(rootDeviceIndex, false, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::SHARED_BUFFER, false); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(sharedHandle, properties, false); auto graphicsAllocation2 = memoryManager->createGraphicsAllocationFromSharedHandle(sharedHandle, properties, false); testedCsr->makeResident(*graphicsAllocation); testedCsr->makeResident(*graphicsAllocation2); EXPECT_EQ(2u, testedCsr->getResidencyAllocations().size()); testedCsr->processResidency(testedCsr->getResidencyAllocations(), 0u); EXPECT_EQ(1u, testedCsr->residency.size()); memoryManager->freeGraphicsMemory(graphicsAllocation); memoryManager->freeGraphicsMemory(graphicsAllocation2); } TEST_F(DrmMemoryManagerTest, givenTwoGraphicsAllocationsThatDoesnShareTheSameBufferObjectWhenTheyAreMadeResidentThenTwoBoIsPassedToExec) { mock->ioctl_expected.primeFdToHandle = 2; osHandle sharedHandle = 1u; AllocationProperties properties(rootDeviceIndex, false, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::SHARED_BUFFER, false); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(sharedHandle, properties, false); mock->outputHandle++; auto graphicsAllocation2 = memoryManager->createGraphicsAllocationFromSharedHandle(sharedHandle, properties, false); mock->testIoctls(); auto testedCsr = new TestedDrmCommandStreamReceiver(*executionEnvironment); device->resetCommandStreamReceiver(testedCsr); mock->reset(); mock->ioctl_expected.gemClose = 2; mock->ioctl_expected.gemWait = 2; testedCsr->makeResident(*graphicsAllocation); testedCsr->makeResident(*graphicsAllocation2); EXPECT_EQ(2u, testedCsr->getResidencyAllocations().size()); testedCsr->processResidency(testedCsr->getResidencyAllocations(), 0u); EXPECT_EQ(2u, testedCsr->residency.size()); memoryManager->freeGraphicsMemory(graphicsAllocation); memoryManager->freeGraphicsMemory(graphicsAllocation2); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerWhenCreateAllocationFromNtHandleIsCalledThenReturnNullptr) { auto graphicsAllocation = memoryManager->createGraphicsAllocationFromNTHandle(reinterpret_cast(1), 0); EXPECT_EQ(nullptr, graphicsAllocation); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenLockUnlockIsCalledThenReturnPtr) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemSetDomain = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, MemoryConstants::pageSize}); ASSERT_NE(nullptr, allocation); auto ptr = memoryManager->lockResource(allocation); EXPECT_NE(nullptr, ptr); memoryManager->unlockResource(allocation); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenLockUnlockIsCalledOnAllocationWithCpuPtrThenReturnCpuPtrAndSetCpuDomain) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemSetDomain = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, MemoryConstants::pageSize}); ASSERT_NE(nullptr, allocation); EXPECT_NE(nullptr, allocation->getUnderlyingBuffer()); auto ptr = memoryManager->lockResource(allocation); EXPECT_EQ(allocation->getUnderlyingBuffer(), ptr); //check DRM_IOCTL_I915_GEM_SET_DOMAIN input params auto drmAllocation = static_cast(allocation); EXPECT_EQ((uint32_t)drmAllocation->getBO()->peekHandle(), mock->setDomainHandle); EXPECT_EQ((uint32_t)I915_GEM_DOMAIN_CPU, mock->setDomainReadDomains); EXPECT_EQ(0u, mock->setDomainWriteDomain); memoryManager->unlockResource(allocation); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenLockUnlockIsCalledOnAllocationWithoutCpuPtrThenReturnLockedPtrAndSetCpuDomain) { mock->ioctl_expected.gemCreate = 1; mock->ioctl_expected.gemMmap = 1; mock->ioctl_expected.gemSetDomain = 1; mock->ioctl_expected.gemSetTiling = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; cl_image_desc imgDesc = {}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imgDesc.image_width = 512; imgDesc.image_height = 512; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); imgInfo.imgDesc = Image::convertDescriptor(imgDesc); imgInfo.size = 4096u; imgInfo.rowPitch = 512u; TestedDrmMemoryManager::AllocationData allocationData; allocationData.imgInfo = &imgInfo; allocationData.rootDeviceIndex = rootDeviceIndex; auto allocation = memoryManager->allocateGraphicsMemoryForImage(allocationData); ASSERT_NE(nullptr, allocation); EXPECT_EQ(nullptr, allocation->getUnderlyingBuffer()); auto ptr = memoryManager->lockResource(allocation); EXPECT_NE(nullptr, ptr); auto drmAllocation = static_cast(allocation); EXPECT_NE(nullptr, drmAllocation->getBO()->peekLockedAddress()); //check DRM_IOCTL_I915_GEM_MMAP input params EXPECT_EQ((uint32_t)drmAllocation->getBO()->peekHandle(), mock->mmapHandle); EXPECT_EQ(0u, mock->mmapPad); EXPECT_EQ(0u, mock->mmapOffset); EXPECT_EQ(drmAllocation->getBO()->peekSize(), mock->mmapSize); EXPECT_EQ(0u, mock->mmapFlags); //check DRM_IOCTL_I915_GEM_SET_DOMAIN input params EXPECT_EQ((uint32_t)drmAllocation->getBO()->peekHandle(), mock->setDomainHandle); EXPECT_EQ((uint32_t)I915_GEM_DOMAIN_CPU, mock->setDomainReadDomains); EXPECT_EQ(0u, mock->setDomainWriteDomain); memoryManager->unlockResource(allocation); EXPECT_EQ(nullptr, drmAllocation->getBO()->peekLockedAddress()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenLockUnlockIsCalledOnNullAllocationThenReturnNullPtr) { GraphicsAllocation *allocation = nullptr; auto ptr = memoryManager->lockResource(allocation); EXPECT_EQ(nullptr, ptr); memoryManager->unlockResource(allocation); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenLockUnlockIsCalledOnAllocationWithoutBufferObjectThenReturnNullPtr) { DrmAllocation drmAllocation(rootDeviceIndex, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, nullptr, 0, (osHandle)0u, MemoryPool::MemoryNull); EXPECT_EQ(nullptr, drmAllocation.getBO()); auto ptr = memoryManager->lockResource(&drmAllocation); EXPECT_EQ(nullptr, ptr); memoryManager->unlockResource(&drmAllocation); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenLockUnlockIsCalledButFailsOnIoctlMmapThenReturnNullPtr) { mock->ioctl_expected.gemMmap = 1; this->ioctlResExt = {mock->ioctl_cnt.total, -1}; mock->ioctl_res_ext = &ioctlResExt; DrmMockCustom drmMock; struct BufferObjectMock : public BufferObject { BufferObjectMock(Drm *drm) : BufferObject(drm, 1, 0) {} }; BufferObjectMock bo(&drmMock); DrmAllocation drmAllocation(rootDeviceIndex, GraphicsAllocation::AllocationType::UNKNOWN, &bo, nullptr, 0u, (osHandle)0u, MemoryPool::MemoryNull); EXPECT_NE(nullptr, drmAllocation.getBO()); auto ptr = memoryManager->lockResource(&drmAllocation); EXPECT_EQ(nullptr, ptr); memoryManager->unlockResource(&drmAllocation); mock->ioctl_res_ext = &mock->NONE; } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenUnlockResourceIsCalledOnAllocationInLocalMemoryThenRedirectToUnlockResourceInLocalMemory) { struct DrmMemoryManagerToTestUnlockResource : public DrmMemoryManager { using DrmMemoryManager::unlockResourceImpl; DrmMemoryManagerToTestUnlockResource(ExecutionEnvironment &executionEnvironment, bool localMemoryEnabled, size_t lockableLocalMemorySize) : DrmMemoryManager(gemCloseWorkerMode::gemCloseWorkerInactive, false, false, executionEnvironment) { } void unlockResourceInLocalMemoryImpl(BufferObject *bo) override { unlockResourceInLocalMemoryImplParam.bo = bo; unlockResourceInLocalMemoryImplParam.called = true; } struct unlockResourceInLocalMemoryImplParamType { BufferObject *bo = nullptr; bool called = false; } unlockResourceInLocalMemoryImplParam; }; DrmMemoryManagerToTestUnlockResource drmMemoryManager(*executionEnvironment, true, MemoryConstants::pageSize); DrmMockCustom drmMock; struct BufferObjectMock : public BufferObject { BufferObjectMock(Drm *drm) : BufferObject(drm, 1, 0) {} }; auto bo = new BufferObjectMock(&drmMock); auto drmAllocation = new DrmAllocation(rootDeviceIndex, GraphicsAllocation::AllocationType::UNKNOWN, bo, nullptr, 0u, (osHandle)0u, MemoryPool::LocalMemory); drmMemoryManager.unlockResourceImpl(*drmAllocation); EXPECT_TRUE(drmMemoryManager.unlockResourceInLocalMemoryImplParam.called); EXPECT_EQ(bo, drmMemoryManager.unlockResourceInLocalMemoryImplParam.bo); drmMemoryManager.freeGraphicsMemory(drmAllocation); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenSetDomainCpuIsCalledOnAllocationWithoutBufferObjectThenReturnFalse) { DrmAllocation drmAllocation(rootDeviceIndex, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, nullptr, 0, (osHandle)0u, MemoryPool::MemoryNull); EXPECT_EQ(nullptr, drmAllocation.getBO()); auto success = memoryManager->setDomainCpu(drmAllocation, false); EXPECT_FALSE(success); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenSetDomainCpuIsCalledButFailsOnIoctlSetDomainThenReturnFalse) { mock->ioctl_expected.gemSetDomain = 1; this->ioctlResExt = {mock->ioctl_cnt.total, -1}; mock->ioctl_res_ext = &ioctlResExt; DrmMockCustom drmMock; struct BufferObjectMock : public BufferObject { BufferObjectMock(Drm *drm) : BufferObject(drm, 1, 0) {} }; BufferObjectMock bo(&drmMock); DrmAllocation drmAllocation(rootDeviceIndex, GraphicsAllocation::AllocationType::UNKNOWN, &bo, nullptr, 0u, (osHandle)0u, MemoryPool::MemoryNull); EXPECT_NE(nullptr, drmAllocation.getBO()); auto success = memoryManager->setDomainCpu(drmAllocation, false); EXPECT_FALSE(success); mock->ioctl_res_ext = &mock->NONE; } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenSetDomainCpuIsCalledOnAllocationThenReturnSetWriteDomain) { mock->ioctl_expected.gemSetDomain = 1; DrmMockCustom drmMock; struct BufferObjectMock : public BufferObject { BufferObjectMock(Drm *drm) : BufferObject(drm, 1, 0) {} }; BufferObjectMock bo(&drmMock); DrmAllocation drmAllocation(rootDeviceIndex, GraphicsAllocation::AllocationType::UNKNOWN, &bo, nullptr, 0u, (osHandle)0u, MemoryPool::MemoryNull); EXPECT_NE(nullptr, drmAllocation.getBO()); auto success = memoryManager->setDomainCpu(drmAllocation, true); EXPECT_TRUE(success); //check DRM_IOCTL_I915_GEM_SET_DOMAIN input params EXPECT_EQ((uint32_t)drmAllocation.getBO()->peekHandle(), mock->setDomainHandle); EXPECT_EQ((uint32_t)I915_GEM_DOMAIN_CPU, mock->setDomainReadDomains); EXPECT_EQ((uint32_t)I915_GEM_DOMAIN_CPU, mock->setDomainWriteDomain); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerAndUnifiedAuxCapableAllocationWhenMappingThenReturnFalse) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; auto gmm = new Gmm(rootDeviceEnvironment->getGmmClientContext(), nullptr, 123, false); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, MemoryConstants::pageSize}); allocation->setDefaultGmm(gmm); auto mockGmmRes = static_cast(gmm->gmmResourceInfo.get()); mockGmmRes->setUnifiedAuxTranslationCapable(); EXPECT_FALSE(memoryManager->mapAuxGpuVA(allocation)); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerTest, given32BitAllocatorWithHeapAllocatorWhenLargerFragmentIsReusedThenOnlyUnmapSizeIsLargerWhileSizeStaysTheSame) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; DebugManagerStateRestore dbgFlagsKeeper; memoryManager->setForce32BitAllocations(true); size_t allocationSize = 4 * MemoryConstants::pageSize; auto ptr = memoryManager->getGfxPartition(rootDeviceIndex)->heapAllocate(HeapIndex::HEAP_EXTERNAL, allocationSize); size_t smallAllocationSize = MemoryConstants::pageSize; memoryManager->getGfxPartition(rootDeviceIndex)->heapAllocate(HeapIndex::HEAP_EXTERNAL, smallAllocationSize); //now free first allocation , this will move it to chunks memoryManager->getGfxPartition(rootDeviceIndex)->heapFree(HeapIndex::HEAP_EXTERNAL, ptr, allocationSize); //now ask for 3 pages, this will give ptr from chunks size_t pages3size = 3 * MemoryConstants::pageSize; void *host_ptr = reinterpret_cast(0x1000); DrmAllocation *graphicsAlloaction = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, pages3size, host_ptr, GraphicsAllocation::AllocationType::BUFFER); auto bo = graphicsAlloaction->getBO(); EXPECT_EQ(pages3size, bo->peekSize()); EXPECT_EQ(GmmHelper::canonize(ptr), graphicsAlloaction->getGpuAddress()); memoryManager->freeGraphicsMemory(graphicsAlloaction); } TEST_F(DrmMemoryManagerTest, givenSharedAllocationWithSmallerThenRealSizeWhenCreateIsCalledThenRealSizeIsUsed) { unsigned int realSize = 64 * 1024; lseekReturn = realSize; mock->ioctl_expected.primeFdToHandle = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; osHandle sharedHandle = 1u; AllocationProperties properties(rootDeviceIndex, false, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::SHARED_BUFFER, false); auto graphicsAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(sharedHandle, properties, false); EXPECT_NE(nullptr, graphicsAllocation->getUnderlyingBuffer()); EXPECT_EQ(realSize, graphicsAllocation->getUnderlyingBufferSize()); EXPECT_EQ(this->mock->inputFd, (int)sharedHandle); DrmAllocation *drmAllocation = static_cast(graphicsAllocation); auto bo = drmAllocation->getBO(); EXPECT_EQ(bo->peekHandle(), (int)this->mock->outputHandle); EXPECT_NE(0llu, bo->peekAddress()); EXPECT_EQ(1u, bo->getRefCount()); EXPECT_EQ(realSize, bo->peekSize()); EXPECT_EQ(1, lseekCalledCount); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(DrmMemoryManagerTest, givenMemoryManagerSupportingVirutalPaddingWhenItIsRequiredThenNewGraphicsAllocationIsCreated) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.gemWait = 2; mock->ioctl_expected.gemClose = 2; //first let's create normal buffer auto bufferSize = MemoryConstants::pageSize; auto buffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, bufferSize}); //buffer should have size 16 EXPECT_EQ(bufferSize, buffer->getUnderlyingBufferSize()); auto bufferWithPaddingSize = 8192u; auto paddedAllocation = memoryManager->createGraphicsAllocationWithPadding(buffer, 8192u); EXPECT_NE(nullptr, paddedAllocation); EXPECT_NE(0u, paddedAllocation->getGpuAddress()); EXPECT_NE(0u, paddedAllocation->getGpuAddressToPatch()); EXPECT_NE(buffer->getGpuAddress(), paddedAllocation->getGpuAddress()); EXPECT_NE(buffer->getGpuAddressToPatch(), paddedAllocation->getGpuAddressToPatch()); EXPECT_EQ(buffer->getUnderlyingBuffer(), paddedAllocation->getUnderlyingBuffer()); EXPECT_EQ(bufferWithPaddingSize, paddedAllocation->getUnderlyingBufferSize()); EXPECT_FALSE(paddedAllocation->isCoherent()); EXPECT_EQ(0u, paddedAllocation->fragmentsStorage.fragmentCount); auto bufferbo = static_cast(buffer)->getBO(); auto bo = static_cast(paddedAllocation)->getBO(); EXPECT_NE(nullptr, bo); EXPECT_NE(bufferbo->peekHandle(), bo->peekHandle()); memoryManager->freeGraphicsMemory(paddedAllocation); memoryManager->freeGraphicsMemory(buffer); } TEST_F(DrmMemoryManagerTest, givenMemoryManagerWhenAskedForInternalAllocationWithNoPointerThenAllocationFromInternalHeapIsReturned) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; auto bufferSize = MemoryConstants::pageSize; void *ptr = nullptr; auto drmAllocation = static_cast(memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, bufferSize, ptr, GraphicsAllocation::AllocationType::INTERNAL_HEAP)); ASSERT_NE(nullptr, drmAllocation); EXPECT_NE(nullptr, drmAllocation->getUnderlyingBuffer()); EXPECT_EQ(bufferSize, drmAllocation->getUnderlyingBufferSize()); EXPECT_TRUE(drmAllocation->is32BitAllocation()); auto gpuPtr = drmAllocation->getGpuAddress(); auto heapBase = GmmHelper::canonize(memoryManager->getInternalHeapBaseAddress(drmAllocation->getRootDeviceIndex())); auto heapSize = 4 * GB; EXPECT_GE(gpuPtr, heapBase); EXPECT_LE(gpuPtr, heapBase + heapSize); EXPECT_EQ(drmAllocation->getGpuBaseAddress(), heapBase); memoryManager->freeGraphicsMemory(drmAllocation); } TEST_F(DrmMemoryManagerTest, givenLimitedRangeAllocatorWhenAskedForInternalAllocationWithNoPointerThenAllocationFromInternalHeapIsReturned) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); auto bufferSize = MemoryConstants::pageSize; void *ptr = nullptr; auto drmAllocation = static_cast(memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, bufferSize, ptr, GraphicsAllocation::AllocationType::INTERNAL_HEAP)); ASSERT_NE(nullptr, drmAllocation); EXPECT_NE(nullptr, drmAllocation->getUnderlyingBuffer()); EXPECT_EQ(bufferSize, drmAllocation->getUnderlyingBufferSize()); ASSERT_NE(nullptr, drmAllocation->getDriverAllocatedCpuPtr()); EXPECT_EQ(drmAllocation->getDriverAllocatedCpuPtr(), drmAllocation->getUnderlyingBuffer()); EXPECT_TRUE(drmAllocation->is32BitAllocation()); auto gpuPtr = drmAllocation->getGpuAddress(); auto heapBase = GmmHelper::canonize(memoryManager->getInternalHeapBaseAddress(drmAllocation->getRootDeviceIndex())); auto heapSize = 4 * GB; EXPECT_GE(gpuPtr, heapBase); EXPECT_LE(gpuPtr, heapBase + heapSize); EXPECT_EQ(drmAllocation->getGpuBaseAddress(), heapBase); memoryManager->freeGraphicsMemory(drmAllocation); } TEST_F(DrmMemoryManagerTest, givenLimitedRangeAllocatorWhenAskedForExternalAllocationWithNoPointerThenAllocationFromInternalHeapIsReturned) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; memoryManager->setForce32BitAllocations(true); memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); auto bufferSize = MemoryConstants::pageSize; void *ptr = nullptr; auto drmAllocation = static_cast(memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, bufferSize, ptr, GraphicsAllocation::AllocationType::BUFFER)); ASSERT_NE(nullptr, drmAllocation); EXPECT_NE(nullptr, drmAllocation->getUnderlyingBuffer()); EXPECT_TRUE(drmAllocation->is32BitAllocation()); memoryManager->freeGraphicsMemory(drmAllocation); } TEST_F(DrmMemoryManagerTest, givenLimitedRangeAllocatorWhenAskedForInternalAllocationWithNoPointerAndHugeBufferSizeThenAllocationFromInternalHeapFailed) { memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); auto bufferSize = 128 * MemoryConstants::megaByte + 4 * MemoryConstants::pageSize; void *ptr = nullptr; auto drmAllocation = static_cast(memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, bufferSize, ptr, GraphicsAllocation::AllocationType::INTERNAL_HEAP)); ASSERT_EQ(nullptr, drmAllocation); } TEST_F(DrmMemoryManagerTest, givenMemoryManagerWhenAskedForInternalAllocationWithPointerThenAllocationFromInternalHeapIsReturned) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; auto bufferSize = MemoryConstants::pageSize; void *ptr = reinterpret_cast(0x100000); auto drmAllocation = static_cast(memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, bufferSize, ptr, GraphicsAllocation::AllocationType::INTERNAL_HEAP)); ASSERT_NE(nullptr, drmAllocation); EXPECT_NE(nullptr, drmAllocation->getUnderlyingBuffer()); EXPECT_EQ(ptr, drmAllocation->getUnderlyingBuffer()); EXPECT_EQ(bufferSize, drmAllocation->getUnderlyingBufferSize()); EXPECT_TRUE(drmAllocation->is32BitAllocation()); auto gpuPtr = drmAllocation->getGpuAddress(); auto heapBase = GmmHelper::canonize(memoryManager->getInternalHeapBaseAddress(drmAllocation->getRootDeviceIndex())); auto heapSize = 4 * GB; EXPECT_GE(gpuPtr, heapBase); EXPECT_LE(gpuPtr, heapBase + heapSize); EXPECT_EQ(drmAllocation->getGpuBaseAddress(), heapBase); memoryManager->freeGraphicsMemory(drmAllocation); } TEST_F(DrmMemoryManagerTest, givenMemoryManagerSupportingVirutalPaddingWhenAllocUserptrFailsThenReturnsNullptr) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; this->ioctlResExt = {mock->ioctl_cnt.total + 1, -1}; mock->ioctl_res_ext = &ioctlResExt; //first let's create normal buffer auto bufferSize = MemoryConstants::pageSize; auto buffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, bufferSize}); //buffer should have size 16 EXPECT_EQ(bufferSize, buffer->getUnderlyingBufferSize()); auto bufferWithPaddingSize = 8192u; auto paddedAllocation = memoryManager->createGraphicsAllocationWithPadding(buffer, bufferWithPaddingSize); EXPECT_EQ(nullptr, paddedAllocation); memoryManager->freeGraphicsMemory(buffer); mock->ioctl_res_ext = &mock->NONE; } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDefaultDrmMemoryManagerWhenAskedForVirtualPaddingSupportThenTrueIsReturned) { EXPECT_TRUE(memoryManager->peekVirtualPaddingSupport()); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDefaultDrmMemoryManagerWhenAskedForAlignedMallocRestrictionsThenNullPtrIsReturned) { EXPECT_EQ(nullptr, memoryManager->getAlignedMallocRestrictions()); } #include #include TEST(MmapFlags, givenVariousMmapParametersGetTimeDeltaForTheOperation) { //disabling this test in CI. return; typedef std::chrono::high_resolution_clock Time; typedef std::chrono::nanoseconds ns; typedef std::chrono::duration fsec; std::vector pointersForFree; //allocate 4GB. auto size = 4 * GB; unsigned int maxTime = 0; unsigned int minTime = -1; unsigned int totalTime = 0; auto iterCount = 10; for (int i = 0; i < iterCount; i++) { auto t0 = Time::now(); auto gpuRange = mmap(nullptr, size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); auto t1 = Time::now(); pointersForFree.push_back(gpuRange); fsec fs = t1 - t0; ns d = std::chrono::duration_cast(fs); unsigned int duration = (unsigned int)d.count(); totalTime += duration; minTime = std::min(duration, minTime); maxTime = std::max(duration, maxTime); } std::cout << "\n" << "min = " << minTime << "\nmax = " << maxTime << "\naverage = " << totalTime / iterCount << std::endl; for (auto &ptr : pointersForFree) { auto t0 = Time::now(); munmap(ptr, size); auto t1 = Time::now(); fsec fs = t1 - t0; ns d = std::chrono::duration_cast(fs); unsigned int duration = (unsigned int)d.count(); std::cout << "\nfreeing ptr " << ptr << " of size " << size << "time " << duration; } } TEST_F(DrmMemoryManagerBasic, givenDefaultMemoryManagerWhenItIsCreatedThenAsyncDeleterEnabledIsTrue) { DrmMemoryManager memoryManager(gemCloseWorkerMode::gemCloseWorkerInactive, false, true, executionEnvironment); EXPECT_FALSE(memoryManager.isAsyncDeleterEnabled()); EXPECT_EQ(nullptr, memoryManager.getDeferredDeleter()); memoryManager.commonCleanup(); } TEST_F(DrmMemoryManagerBasic, givenEnabledAsyncDeleterFlagWhenMemoryManagerIsCreatedThenAsyncDeleterEnabledIsFalseAndDeleterIsNullptr) { DebugManagerStateRestore dbgStateRestore; DebugManager.flags.EnableDeferredDeleter.set(true); DrmMemoryManager memoryManager(gemCloseWorkerMode::gemCloseWorkerInactive, false, true, executionEnvironment); EXPECT_FALSE(memoryManager.isAsyncDeleterEnabled()); EXPECT_EQ(nullptr, memoryManager.getDeferredDeleter()); memoryManager.commonCleanup(); } TEST_F(DrmMemoryManagerBasic, givenDisabledAsyncDeleterFlagWhenMemoryManagerIsCreatedThenAsyncDeleterEnabledIsFalseAndDeleterIsNullptr) { DebugManagerStateRestore dbgStateRestore; DebugManager.flags.EnableDeferredDeleter.set(false); DrmMemoryManager memoryManager(gemCloseWorkerMode::gemCloseWorkerInactive, false, true, executionEnvironment); EXPECT_FALSE(memoryManager.isAsyncDeleterEnabled()); EXPECT_EQ(nullptr, memoryManager.getDeferredDeleter()); memoryManager.commonCleanup(); } TEST_F(DrmMemoryManagerBasic, givenDefaultDrmMemoryManagerWhenItIsQueriedForInternalHeapBaseThenInternalHeapBaseIsReturned) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, true, true, executionEnvironment)); auto heapBase = memoryManager->getGfxPartition(rootDeviceIndex)->getHeapBase(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY); EXPECT_EQ(heapBase, memoryManager->getInternalHeapBaseAddress(rootDeviceIndex)); } TEST_F(DrmMemoryManagerBasic, givenMemoryManagerWithEnabledHostMemoryValidationWhenFeatureIsQueriedThenTrueIsReturned) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, true, executionEnvironment)); ASSERT_NE(nullptr, memoryManager.get()); EXPECT_TRUE(memoryManager->isValidateHostMemoryEnabled()); } TEST_F(DrmMemoryManagerBasic, givenMemoryManagerWithDisabledHostMemoryValidationWhenFeatureIsQueriedThenFalseIsReturned) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, false, executionEnvironment)); ASSERT_NE(nullptr, memoryManager.get()); EXPECT_FALSE(memoryManager->isValidateHostMemoryEnabled()); } TEST_F(DrmMemoryManagerBasic, givenEnabledHostMemoryValidationWhenMemoryManagerIsCreatedThenPinBBIsCreated) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, true, executionEnvironment)); ASSERT_NE(nullptr, memoryManager.get()); ASSERT_NE(nullptr, memoryManager->pinBBs[rootDeviceIndex]); } TEST_F(DrmMemoryManagerBasic, givenEnabledHostMemoryValidationAndForcePinWhenMemoryManagerIsCreatedThenPinBBIsCreated) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, true, true, executionEnvironment)); ASSERT_NE(nullptr, memoryManager.get()); ASSERT_NE(nullptr, memoryManager->pinBBs[rootDeviceIndex]); } TEST_F(DrmMemoryManagerBasic, givenMemoryManagerWhenAllocateGraphicsMemoryIsCalledThenMemoryPoolIsSystem4KBPages) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager( false, false, true, executionEnvironment)); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(createAllocationProperties(rootDeviceIndex, MemoryConstants::pageSize, false)); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenMemoryManagerWhenAllocateGraphicsMemoryWithPtrIsCalledThenMemoryPoolIsSystem4KBPages) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->engines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } void *ptr = reinterpret_cast(0x1001); auto size = 4096u; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), false, size}, ptr); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerBasic, givenMemoryManagerWhenAllocate32BitGraphicsMemoryWithPtrIsCalledThenMemoryPoolIsSystem4KBPagesWith32BitGpuAddressing) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, true, executionEnvironment)); memoryManager->setForce32BitAllocations(true); void *ptr = reinterpret_cast(0x1001); auto size = MemoryConstants::pageSize; auto allocation = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, size, ptr, GraphicsAllocation::AllocationType::BUFFER); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::System4KBPagesWith32BitGpuAddressing, allocation->getMemoryPool()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerBasic, givenMemoryManagerWhenCreateAllocationFromHandleIsCalledThenMemoryPoolIsSystemCpuInaccessible) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, true, executionEnvironment)); auto osHandle = 1u; AllocationProperties properties(rootDeviceIndex, false, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::SHARED_BUFFER, false); auto allocation = memoryManager->createGraphicsAllocationFromSharedHandle(osHandle, properties, false); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::SystemCpuInaccessible, allocation->getMemoryPool()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerBasic, DISABLED_givenMemoryManagerWith64KBPagesEnabledWhenAllocateGraphicsMemory64kbIsCalledThenMemoryPoolIsSystem64KBPages) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, true, executionEnvironment)); AllocationData allocationData; allocationData.size = 4096u; allocationData.rootDeviceIndex = rootDeviceIndex; auto allocation = memoryManager->allocateGraphicsMemory64kb(allocationData); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::System64KBPages, allocation->getMemoryPool()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDisabledForcePinAndEnabledValidateHostMemoryWhenPinBBAllocationFailsThenUnrecoverableIsCalled) { this->mock->ioctl_res = -1; this->mock->ioctl_expected.gemUserptr = 1; EXPECT_THROW( { std::unique_ptr memoryManager(new TestedDrmMemoryManager(false, false, true, *executionEnvironment)); EXPECT_NE(nullptr, memoryManager.get()); }, std::exception); this->mock->ioctl_res = 0; this->mock->ioctl_expected.contextDestroy = 0; this->mock->testIoctls(); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDisabledForcePinAndEnabledValidateHostMemoryWhenPopulateOsHandlesIsCalledThenHostMemoryIsValidated) { std::unique_ptr memoryManager(new TestedDrmMemoryManager(false, false, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->engines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } ASSERT_NE(nullptr, memoryManager.get()); ASSERT_NE(nullptr, memoryManager->pinBBs[device->getRootDeviceIndex()]); mock->reset(); mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.execbuffer2 = 1; // for pinning - host memory validation OsHandleStorage handleStorage; handleStorage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); handleStorage.fragmentStorageData[0].fragmentSize = 4096; auto result = memoryManager->populateOsHandles(handleStorage, 0u); EXPECT_EQ(MemoryManager::AllocationStatus::Success, result); mock->testIoctls(); EXPECT_NE(nullptr, handleStorage.fragmentStorageData[0].osHandleStorage); handleStorage.fragmentStorageData[0].freeTheFragment = true; memoryManager->cleanOsHandles(handleStorage, 0); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDisabledForcePinAndEnabledValidateHostMemoryWhenPopulateOsHandlesIsCalledWithFirstFragmentAlreadyAllocatedThenNewBosAreValidated) { class PinBufferObject : public BufferObject { public: PinBufferObject(Drm *drm) : BufferObject(drm, 1, 0) { } int pin(BufferObject *const boToPin[], size_t numberOfBos, uint32_t drmContextId) override { for (size_t i = 0; i < numberOfBos; i++) { pinnedBoArray[i] = boToPin[i]; } numberOfBosPinned = numberOfBos; return 0; } BufferObject *pinnedBoArray[5]; size_t numberOfBosPinned; }; std::unique_ptr memoryManager(new TestedDrmMemoryManager(false, false, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->engines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } ASSERT_NE(nullptr, memoryManager.get()); ASSERT_NE(nullptr, memoryManager->pinBBs[device->getRootDeviceIndex()]); PinBufferObject *pinBB = new PinBufferObject(this->mock); memoryManager->injectPinBB(pinBB); mock->reset(); mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.execbuffer2 = 0; // pinning for host memory validation is mocked OsHandleStorage handleStorage; OsHandle handle1; handleStorage.fragmentStorageData[0].osHandleStorage = &handle1; handleStorage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); handleStorage.fragmentStorageData[0].fragmentSize = 4096; handleStorage.fragmentStorageData[1].osHandleStorage = nullptr; handleStorage.fragmentStorageData[1].cpuPtr = reinterpret_cast(0x2000); handleStorage.fragmentStorageData[1].fragmentSize = 8192; handleStorage.fragmentStorageData[2].osHandleStorage = nullptr; handleStorage.fragmentStorageData[2].cpuPtr = reinterpret_cast(0x4000); handleStorage.fragmentStorageData[2].fragmentSize = 4096; auto result = memoryManager->populateOsHandles(handleStorage, 0u); EXPECT_EQ(MemoryManager::AllocationStatus::Success, result); mock->testIoctls(); EXPECT_NE(nullptr, handleStorage.fragmentStorageData[0].osHandleStorage); EXPECT_NE(nullptr, handleStorage.fragmentStorageData[1].osHandleStorage); EXPECT_NE(nullptr, handleStorage.fragmentStorageData[2].osHandleStorage); EXPECT_EQ(handleStorage.fragmentStorageData[1].osHandleStorage->bo, pinBB->pinnedBoArray[0]); EXPECT_EQ(handleStorage.fragmentStorageData[2].osHandleStorage->bo, pinBB->pinnedBoArray[1]); handleStorage.fragmentStorageData[0].freeTheFragment = false; handleStorage.fragmentStorageData[1].freeTheFragment = true; handleStorage.fragmentStorageData[2].freeTheFragment = true; memoryManager->cleanOsHandles(handleStorage, 0); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenValidateHostPtrMemoryEnabledWhenHostPtrAllocationIsCreatedWithoutForcingPinThenBufferObjectIsPinned) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 2; std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, true, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->engines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } ASSERT_NE(nullptr, memoryManager->pinBBs[device->getRootDeviceIndex()]); size_t size = 10 * MB; void *ptr = ::alignedMalloc(size, 4096); auto alloc = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), false, size}, ptr)); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getBO()); memoryManager->freeGraphicsMemory(alloc); ::alignedFree(ptr); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenEnabledHostMemoryValidationWhenValidHostPointerIsPassedToPopulateThenSuccessIsReturned) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->engines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } OsHandleStorage storage; storage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); storage.fragmentStorageData[0].fragmentSize = 1; auto result = memoryManager->populateOsHandles(storage, 0u); EXPECT_EQ(MemoryManager::AllocationStatus::Success, result); EXPECT_NE(nullptr, storage.fragmentStorageData[0].osHandleStorage); storage.fragmentStorageData[0].freeTheFragment = true; memoryManager->cleanOsHandles(storage, 0); } TEST_F(DrmMemoryManagerTest, givenForcePinAndHostMemoryValidationEnabledWhenSmallAllocationIsCreatedThenBufferObjectIsPinned) { mock->ioctl_expected.gemUserptr = 2; // 1 pinBB, 1 small allocation mock->ioctl_expected.execbuffer2 = 1; // pinning mock->ioctl_expected.gemWait = 1; // in freeGraphicsAllocation mock->ioctl_expected.gemClose = 2; // 1 pinBB, 1 small allocation std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, true, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->engines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } ASSERT_NE(nullptr, memoryManager->pinBBs[rootDeviceIndex]); // one page is too small for early pinning but pinning is used for host memory validation allocationData.size = 4 * 1024; allocationData.hostPtr = ::alignedMalloc(allocationData.size, 4096); auto alloc = memoryManager->allocateGraphicsMemoryWithHostPtr(allocationData); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getBO()); memoryManager->freeGraphicsMemory(alloc); ::alignedFree(const_cast(allocationData.hostPtr)); } TEST_F(DrmMemoryManagerTest, givenForcePinAllowedAndNoPinBBInMemoryManagerWhenAllocationWithForcePinFlagTrueIsCreatedThenAllocationIsNotPinned) { mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; mock->ioctl_res = -1; std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, true, false, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->engines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } EXPECT_EQ(nullptr, memoryManager->pinBBs[rootDeviceIndex]); mock->ioctl_res = 0; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(createAllocationProperties(rootDeviceIndex, MemoryConstants::pageSize, true)); EXPECT_NE(nullptr, allocation); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerTest, givenNullptrOrZeroSizeWhenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledThenAllocationIsNotCreated) { allocationData.size = 0; allocationData.hostPtr = nullptr; EXPECT_FALSE(memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData)); allocationData.size = 100; allocationData.hostPtr = nullptr; EXPECT_FALSE(memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData)); allocationData.size = 0; allocationData.hostPtr = reinterpret_cast(0x12345); EXPECT_FALSE(memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData)); } TEST_F(DrmMemoryManagerBasic, givenDrmMemoryManagerWhenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledWithNotAlignedPtrIsPassedThenAllocationIsCreated) { AllocationData allocationData; std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, false, executionEnvironment)); memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); allocationData.size = 13; allocationData.hostPtr = reinterpret_cast(0x5001); allocationData.rootDeviceIndex = rootDeviceIndex; auto allocation = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData); EXPECT_NE(nullptr, allocation); EXPECT_EQ(0x5001u, reinterpret_cast(allocation->getUnderlyingBuffer())); EXPECT_EQ(13u, allocation->getUnderlyingBufferSize()); EXPECT_EQ(1u, allocation->getAllocationOffset()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerBasic, givenDrmMemoryManagerWhenAllocateGraphicsMemoryForNonSvmHostPtrObjectAlignedSizeIsUsedByAllocUserPtrWhenBiggerSizeAllocatedInHeap) { AllocationData allocationData; allocationData.rootDeviceIndex = rootDeviceIndex; std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, false, executionEnvironment)); memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); allocationData.size = 4 * MB + 16 * 1024; allocationData.hostPtr = reinterpret_cast(0x10000000); auto allocation0 = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData); allocationData.hostPtr = reinterpret_cast(0x20000000); auto allocation1 = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData); memoryManager->freeGraphicsMemory(allocation0); allocationData.size = 4 * MB + 12 * 1024; allocationData.hostPtr = reinterpret_cast(0x30000000); allocation0 = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData); EXPECT_EQ((uint64_t)(allocation0->getBO()->peekSize()), 4 * MB + 12 * 1024); memoryManager->freeGraphicsMemory(allocation0); memoryManager->freeGraphicsMemory(allocation1); } TEST_F(DrmMemoryManagerBasic, givenDrmMemoryManagerWhenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledButAllocationFailedThenNullPtrReturned) { AllocationData allocationData; allocationData.rootDeviceIndex = rootDeviceIndex; std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, false, executionEnvironment)); memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); allocationData.size = 64 * GB; allocationData.hostPtr = reinterpret_cast(0x100000000000); EXPECT_FALSE(memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData)); } TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledWithHostPtrIsPassedAndWhenAllocUserptrFailsThenFails) { memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); mock->ioctl_expected.gemUserptr = 1; this->ioctlResExt = {mock->ioctl_cnt.total, -1}; mock->ioctl_res_ext = &ioctlResExt; allocationData.size = 10; allocationData.hostPtr = reinterpret_cast(0x1000); auto allocation = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData); EXPECT_EQ(nullptr, allocation); mock->ioctl_res_ext = &mock->NONE; } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenForcePinNotAllowedAndHostMemoryValidationEnabledWhenAllocationIsCreatedThenBufferObjectIsPinnedOnlyOnce) { std::unique_ptr memoryManager(new TestedDrmMemoryManager(false, false, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->engines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } mock->reset(); mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.execbuffer2 = 1; mock->ioctl_expected.gemClose = 1; mock->ioctl_expected.gemWait = 1; AllocationData allocationData; allocationData.size = 4 * 1024; allocationData.hostPtr = ::alignedMalloc(allocationData.size, 4096); allocationData.flags.forcePin = true; allocationData.rootDeviceIndex = device->getRootDeviceIndex(); auto alloc = memoryManager->allocateGraphicsMemoryWithHostPtr(allocationData); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getBO()); memoryManager->freeGraphicsMemory(alloc); mock->testIoctls(); ::alignedFree(const_cast(allocationData.hostPtr)); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenForcePinNotAllowedAndHostMemoryValidationDisabledWhenAllocationIsCreatedThenBufferObjectIsNotPinned) { std::unique_ptr memoryManager(new TestedDrmMemoryManager(false, false, false, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->engines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } mock->reset(); mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemClose = 1; mock->ioctl_expected.gemWait = 1; AllocationData allocationData; allocationData.size = 10 * MB; // bigger than threshold allocationData.hostPtr = ::alignedMalloc(allocationData.size, 4096); allocationData.flags.forcePin = true; allocationData.rootDeviceIndex = device->getRootDeviceIndex(); auto alloc = memoryManager->allocateGraphicsMemoryWithHostPtr(allocationData); ASSERT_NE(nullptr, alloc); EXPECT_NE(nullptr, alloc->getBO()); memoryManager->freeGraphicsMemory(alloc); mock->testIoctls(); ::alignedFree(const_cast(allocationData.hostPtr)); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenEnabledValidateHostMemoryWhenReadOnlyPointerCausesPinningFailWithEfaultThenPopulateOsHandlesMarksFragmentsToFree) { std::unique_ptr memoryManager(new TestedDrmMemoryManager(false, false, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->engines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } ASSERT_NE(nullptr, memoryManager.get()); ASSERT_NE(nullptr, memoryManager->pinBBs[device->getRootDeviceIndex()]); mock->reset(); DrmMockCustom::IoctlResExt ioctlResExt = {2, -1}; mock->ioctl_res_ext = &ioctlResExt; mock->errnoValue = EFAULT; mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.execbuffer2 = 1; OsHandleStorage handleStorage; OsHandle handle1; handleStorage.fragmentStorageData[0].osHandleStorage = &handle1; handleStorage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); handleStorage.fragmentStorageData[0].fragmentSize = 4096; handleStorage.fragmentStorageData[1].osHandleStorage = nullptr; handleStorage.fragmentStorageData[1].cpuPtr = reinterpret_cast(0x2000); handleStorage.fragmentStorageData[1].fragmentSize = 8192; handleStorage.fragmentStorageData[2].osHandleStorage = nullptr; handleStorage.fragmentStorageData[2].cpuPtr = reinterpret_cast(0x4000); handleStorage.fragmentStorageData[2].fragmentSize = 4096; auto result = memoryManager->populateOsHandles(handleStorage, 0u); EXPECT_EQ(MemoryManager::AllocationStatus::InvalidHostPointer, result); mock->testIoctls(); EXPECT_NE(nullptr, handleStorage.fragmentStorageData[0].osHandleStorage); EXPECT_NE(nullptr, handleStorage.fragmentStorageData[1].osHandleStorage); EXPECT_NE(nullptr, handleStorage.fragmentStorageData[2].osHandleStorage); EXPECT_TRUE(handleStorage.fragmentStorageData[1].freeTheFragment); EXPECT_TRUE(handleStorage.fragmentStorageData[2].freeTheFragment); handleStorage.fragmentStorageData[0].freeTheFragment = false; handleStorage.fragmentStorageData[1].freeTheFragment = true; handleStorage.fragmentStorageData[2].freeTheFragment = true; memoryManager->cleanOsHandles(handleStorage, 0u); mock->ioctl_res_ext = &mock->NONE; } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenEnabledValidateHostMemoryWhenReadOnlyPointerCausesPinningFailWithEfaultThenPopulateOsHandlesDoesNotStoreTheFragments) { std::unique_ptr memoryManager(new TestedDrmMemoryManager(false, false, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->engines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } ASSERT_NE(nullptr, memoryManager->pinBBs[device->getRootDeviceIndex()]); mock->reset(); DrmMockCustom::IoctlResExt ioctlResExt = {2, -1}; mock->ioctl_res_ext = &ioctlResExt; mock->errnoValue = EFAULT; mock->ioctl_expected.gemUserptr = 2; mock->ioctl_expected.execbuffer2 = 1; OsHandleStorage handleStorage; OsHandle handle1; handleStorage.fragmentStorageData[0].osHandleStorage = &handle1; handleStorage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); handleStorage.fragmentStorageData[0].fragmentSize = 4096; handleStorage.fragmentStorageData[1].osHandleStorage = nullptr; handleStorage.fragmentStorageData[1].cpuPtr = reinterpret_cast(0x2000); handleStorage.fragmentStorageData[1].fragmentSize = 8192; handleStorage.fragmentStorageData[2].osHandleStorage = nullptr; handleStorage.fragmentStorageData[2].cpuPtr = reinterpret_cast(0x4000); handleStorage.fragmentStorageData[2].fragmentSize = 4096; auto result = memoryManager->populateOsHandles(handleStorage, 0u); EXPECT_EQ(MemoryManager::AllocationStatus::InvalidHostPointer, result); mock->testIoctls(); auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); EXPECT_EQ(0u, hostPtrManager->getFragmentCount()); EXPECT_EQ(nullptr, hostPtrManager->getFragment(handleStorage.fragmentStorageData[1].cpuPtr)); EXPECT_EQ(nullptr, hostPtrManager->getFragment(handleStorage.fragmentStorageData[2].cpuPtr)); handleStorage.fragmentStorageData[0].freeTheFragment = false; handleStorage.fragmentStorageData[1].freeTheFragment = true; handleStorage.fragmentStorageData[2].freeTheFragment = true; memoryManager->cleanOsHandles(handleStorage, 0); mock->ioctl_res_ext = &mock->NONE; } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenEnabledValidateHostMemoryWhenPopulateOsHandlesSucceedsThenFragmentIsStoredInHostPtrManager) { std::unique_ptr memoryManager(new TestedDrmMemoryManager(false, false, true, *executionEnvironment)); memoryManager->registeredEngines = EngineControlContainer{this->device->engines}; for (auto engine : memoryManager->registeredEngines) { engine.osContext->incRefInternal(); } ASSERT_NE(nullptr, memoryManager->pinBBs[device->getRootDeviceIndex()]); mock->reset(); mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.execbuffer2 = 1; OsHandleStorage handleStorage; handleStorage.fragmentStorageData[0].osHandleStorage = nullptr; handleStorage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); handleStorage.fragmentStorageData[0].fragmentSize = 4096; auto result = memoryManager->populateOsHandles(handleStorage, 0u); EXPECT_EQ(MemoryManager::AllocationStatus::Success, result); mock->testIoctls(); auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); EXPECT_EQ(1u, hostPtrManager->getFragmentCount()); EXPECT_NE(nullptr, hostPtrManager->getFragment(handleStorage.fragmentStorageData[0].cpuPtr)); handleStorage.fragmentStorageData[0].freeTheFragment = true; memoryManager->cleanOsHandles(handleStorage, 0); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerWhenCleanOsHandlesDeletesHandleDataThenOsHandleStorageAndResidencyIsSetToNullptr) { std::unique_ptr memoryManager(new TestedDrmMemoryManager(false, false, true, *executionEnvironment)); ASSERT_NE(nullptr, memoryManager->pinBBs[device->getRootDeviceIndex()]); OsHandleStorage handleStorage; handleStorage.fragmentStorageData[0].osHandleStorage = new OsHandle(); handleStorage.fragmentStorageData[0].residency = new ResidencyData(); handleStorage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); handleStorage.fragmentStorageData[0].fragmentSize = 4096; handleStorage.fragmentStorageData[1].osHandleStorage = new OsHandle(); handleStorage.fragmentStorageData[1].residency = new ResidencyData(); handleStorage.fragmentStorageData[1].cpuPtr = reinterpret_cast(0x1000); handleStorage.fragmentStorageData[1].fragmentSize = 4096; handleStorage.fragmentStorageData[0].freeTheFragment = true; handleStorage.fragmentStorageData[1].freeTheFragment = true; memoryManager->cleanOsHandles(handleStorage, 0); for (uint32_t i = 0; i < 2; i++) { EXPECT_EQ(nullptr, handleStorage.fragmentStorageData[i].osHandleStorage); EXPECT_EQ(nullptr, handleStorage.fragmentStorageData[i].residency); } } TEST_F(DrmMemoryManagerBasic, ifLimitedRangeAllocatorAvailableWhenAskedForAllocationThenLimitedRangePointerIsReturned) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, false, executionEnvironment)); memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); size_t size = 100u; auto ptr = memoryManager->getGfxPartition(rootDeviceIndex)->heapAllocate(HeapIndex::HEAP_STANDARD, size); auto address64bit = ptrDiff(ptr, memoryManager->getGfxPartition(rootDeviceIndex)->getHeapBase(HeapIndex::HEAP_STANDARD)); EXPECT_LT(address64bit, defaultHwInfo->capabilityTable.gpuAddressSpace); EXPECT_LT(0u, address64bit); memoryManager->getGfxPartition(rootDeviceIndex)->heapFree(HeapIndex::HEAP_STANDARD, ptr, size); } TEST_F(DrmMemoryManagerBasic, givenSpecificAddressSpaceWhenInitializingMemoryManagerThenSetCorrectHeaps) { executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getMutableHardwareInfo()->capabilityTable.gpuAddressSpace = maxNBitValue(48); TestedDrmMemoryManager memoryManager(false, false, false, executionEnvironment); auto gfxPartition = memoryManager.getGfxPartition(rootDeviceIndex); auto limit = gfxPartition->getHeapLimit(HeapIndex::HEAP_SVM); EXPECT_EQ(maxNBitValue(48 - 1), limit); } TEST_F(DrmMemoryManagerBasic, givenDisabledHostPtrTrackingWhenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledWithNotAlignedPtrIsPassedThenAllocationIsCreated) { DebugManagerStateRestore restore; DebugManager.flags.EnableHostPtrTracking.set(false); AllocationData allocationData; std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, false, executionEnvironment)); memoryManager->forceLimitedRangeAllocator(MemoryConstants::max48BitAddress); allocationData.size = 13; allocationData.hostPtr = reinterpret_cast(0x5001); allocationData.rootDeviceIndex = rootDeviceIndex; auto allocation = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData); EXPECT_NE(nullptr, allocation); EXPECT_EQ(0x5001u, reinterpret_cast(allocation->getUnderlyingBuffer())); EXPECT_EQ(13u, allocation->getUnderlyingBufferSize()); EXPECT_EQ(1u, allocation->getAllocationOffset()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerBasic, givenImageOrSharedResourceCopyWhenGraphicsAllocationInDevicePoolIsAllocatedThenNullptrIsReturned) { std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, false, executionEnvironment)); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Error; AllocationData allocData; allocData.size = MemoryConstants::pageSize; allocData.flags.allocateMemory = true; allocData.rootDeviceIndex = rootDeviceIndex; GraphicsAllocation::AllocationType types[] = {GraphicsAllocation::AllocationType::IMAGE, GraphicsAllocation::AllocationType::SHARED_RESOURCE_COPY}; for (auto type : types) { allocData.type = type; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_EQ(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::RetryInNonDevicePool, status); } } TEST_F(DrmMemoryManagerBasic, givenLocalMemoryDisabledWhenAllocateInDevicePoolIsCalledThenNullptrAndStatusRetryIsReturned) { const bool localMemoryEnabled = false; TestedDrmMemoryManager memoryManager(localMemoryEnabled, false, false, executionEnvironment); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.size = MemoryConstants::pageSize; allocData.flags.useSystemMemory = false; allocData.flags.allocateMemory = true; allocData.rootDeviceIndex = rootDeviceIndex; auto allocation = memoryManager.allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_EQ(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::RetryInNonDevicePool, status); } TEST(DrmAllocationTest, givenAllocationTypeWhenPassedToDrmAllocationConstructorThenAllocationTypeIsStored) { DrmAllocation allocation{0, GraphicsAllocation::AllocationType::COMMAND_BUFFER, nullptr, nullptr, static_cast(0), 0u, MemoryPool::MemoryNull}; EXPECT_EQ(GraphicsAllocation::AllocationType::COMMAND_BUFFER, allocation.getAllocationType()); DrmAllocation allocation2{0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, nullptr, 0ULL, static_cast(0), MemoryPool::MemoryNull}; EXPECT_EQ(GraphicsAllocation::AllocationType::UNKNOWN, allocation2.getAllocationType()); } TEST(DrmAllocationTest, givenMemoryPoolWhenPassedToDrmAllocationConstructorThenMemoryPoolIsStored) { DrmAllocation allocation{0, GraphicsAllocation::AllocationType::COMMAND_BUFFER, nullptr, nullptr, static_cast(0), 0u, MemoryPool::System64KBPages}; EXPECT_EQ(MemoryPool::System64KBPages, allocation.getMemoryPool()); DrmAllocation allocation2{0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, nullptr, 0ULL, static_cast(0), MemoryPool::SystemCpuInaccessible}; EXPECT_EQ(MemoryPool::SystemCpuInaccessible, allocation2.getMemoryPool()); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, whenReservingAddressRangeThenExpectProperAddressAndReleaseWhenFreeing) { constexpr size_t size = 0x1000; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), size}); ASSERT_NE(nullptr, allocation); void *reserve = memoryManager->reserveCpuAddressRange(size, 0u); EXPECT_EQ(nullptr, reserve); allocation->setReservedAddressRange(reserve, size); EXPECT_EQ(reserve, allocation->getReservedAddressPtr()); EXPECT_EQ(size, allocation->getReservedAddressSize()); memoryManager->freeGraphicsMemory(allocation); } TEST(DrmMemoryManagerWithExplicitExpectationsTest2, whenObtainFdFromHandleIsCalledThenProperFdHandleIsReturned) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(4u); for (auto i = 0u; i < 4u; i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } auto memoryManager = std::make_unique(false, false, false, *executionEnvironment); for (auto i = 0u; i < 4u; i++) { auto mock = new DrmMockCustom(); executionEnvironment->rootDeviceEnvironments[i]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[i]->osInterface->get()->setDrm(mock); int boHandle = 3; mock->outputFd = 1337; mock->ioctl_expected.handleToPrimeFd = 1; auto fdHandle = memoryManager->obtainFdFromHandle(boHandle, i); EXPECT_EQ(mock->inputHandle, static_cast(boHandle)); EXPECT_EQ(mock->inputFlags, DRM_CLOEXEC | DRM_RDWR); EXPECT_EQ(1337, fdHandle); } } TEST_F(DrmMemoryManagerTest, givenSvmCpuAllocationWhenSizeAndAlignmentProvidedThenAllocateMemoryAndReserveGpuVa) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 1; mock->ioctl_expected.gemClose = 1; TestedDrmMemoryManager::AllocationData allocationData; allocationData.size = 2 * MemoryConstants::megaByte; allocationData.alignment = 2 * MemoryConstants::megaByte; allocationData.type = GraphicsAllocation::AllocationType::SVM_CPU; allocationData.rootDeviceIndex = rootDeviceIndex; DrmAllocation *allocation = memoryManager->allocateGraphicsMemoryWithAlignment(allocationData); ASSERT_NE(nullptr, allocation); EXPECT_EQ(GraphicsAllocation::AllocationType::SVM_CPU, allocation->getAllocationType()); EXPECT_EQ(allocationData.size, allocation->getUnderlyingBufferSize()); EXPECT_NE(nullptr, allocation->getUnderlyingBuffer()); EXPECT_EQ(allocation->getUnderlyingBuffer(), allocation->getDriverAllocatedCpuPtr()); EXPECT_NE(0llu, allocation->getGpuAddress()); EXPECT_NE(reinterpret_cast(allocation->getUnderlyingBuffer()), allocation->getGpuAddress()); auto bo = allocation->getBO(); ASSERT_NE(nullptr, bo); EXPECT_NE(0llu, bo->peekAddress()); EXPECT_LT(GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapBase(HeapIndex::HEAP_STANDARD)), bo->peekAddress()); EXPECT_GT(GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_STANDARD)), bo->peekAddress()); EXPECT_EQ(reinterpret_cast(allocation->getGpuAddress()), alignUp(allocation->getReservedAddressPtr(), allocationData.alignment)); EXPECT_EQ(alignUp(allocationData.size, allocationData.alignment) + allocationData.alignment, allocation->getReservedAddressSize()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(DrmMemoryManagerTest, givenSvmCpuAllocationWhenSizeAndAlignmentProvidedButFailsToReserveGpuVaThenNullAllocationIsReturned) { mock->ioctl_expected.gemUserptr = 1; mock->ioctl_expected.gemWait = 0; mock->ioctl_expected.gemClose = 1; memoryManager->getGfxPartition(rootDeviceIndex)->heapInit(HeapIndex::HEAP_STANDARD, 0, 0); TestedDrmMemoryManager::AllocationData allocationData; allocationData.size = 2 * MemoryConstants::megaByte; allocationData.alignment = 2 * MemoryConstants::megaByte; allocationData.type = GraphicsAllocation::AllocationType::SVM_CPU; allocationData.rootDeviceIndex = rootDeviceIndex; DrmAllocation *allocation = memoryManager->allocateGraphicsMemoryWithAlignment(allocationData); EXPECT_EQ(nullptr, allocation); } TEST_F(DrmMemoryManagerTest, DISABLED_givenDrmMemoryManagerAndReleaseGpuRangeIsCalledThenGpuAddressIsDecanonized) { auto mockGfxPartition = std::make_unique(); mockGfxPartition->init(maxNBitValue(48), 0, 0, 1); auto size = 2 * MemoryConstants::megaByte; auto gpuAddress = mockGfxPartition->heapAllocate(HeapIndex::HEAP_STANDARD, size); auto gpuAddressCanonized = GmmHelper::canonize(gpuAddress); EXPECT_NE(gpuAddress, gpuAddressCanonized); EXPECT_LE(gpuAddress, gpuAddressCanonized); EXPECT_CALL(*mockGfxPartition.get(), freeGpuAddressRange(gpuAddress, size)); memoryManager->overrideGfxPartition(mockGfxPartition.release()); memoryManager->releaseGpuRange(reinterpret_cast(gpuAddressCanonized), size, 0); } class GMockDrmMemoryManager : public TestedDrmMemoryManager { public: GMockDrmMemoryManager(ExecutionEnvironment &executionEnvironment) : TestedDrmMemoryManager(executionEnvironment) { ON_CALL(*this, unreference).WillByDefault([this](BufferObject *bo, bool synchronousDestroy) { return this->baseUnreference(bo, synchronousDestroy); }); ON_CALL(*this, releaseGpuRange).WillByDefault([this](void *ptr, size_t size, uint32_t rootDeviceIndex) { return this->baseReleaseGpuRange(ptr, size, rootDeviceIndex); }); ON_CALL(*this, alignedFreeWrapper).WillByDefault([this](void *ptr) { return this->baseAlignedFreeWrapper(ptr); }); } MOCK_METHOD2(unreference, uint32_t(BufferObject *, bool)); MOCK_METHOD3(releaseGpuRange, void(void *, size_t, uint32_t)); MOCK_METHOD1(alignedFreeWrapper, void(void *)); uint32_t baseUnreference(BufferObject *bo, bool synchronousDestroy) { return TestedDrmMemoryManager::unreference(bo, synchronousDestroy); } void baseReleaseGpuRange(void *ptr, size_t size, uint32_t rootDeviceIndex) { TestedDrmMemoryManager::releaseGpuRange(ptr, size, rootDeviceIndex); } void baseAlignedFreeWrapper(void *ptr) { TestedDrmMemoryManager::alignedFreeWrapper(ptr); } }; TEST(DrmMemoryManagerFreeGraphicsMemoryCallSequenceTest, givenDrmMemoryManagerAndFreeGraphicsMemoryIsCalledThenUnreferenceBufferObjectIsCalledFirstWithSynchronousDestroySetToTrue) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment.rootDeviceEnvironments[0]->osInterface->get()->setDrm(Drm::create(nullptr, *executionEnvironment.rootDeviceEnvironments[0])); GMockDrmMemoryManager gmockDrmMemoryManager(executionEnvironment); AllocationProperties properties{0, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::BUFFER}; auto allocation = gmockDrmMemoryManager.allocateGraphicsMemoryWithProperties(properties); ASSERT_NE(allocation, nullptr); { ::testing::InSequence inSequence; EXPECT_CALL(gmockDrmMemoryManager, unreference(::testing::_, true)).Times(EngineLimits::maxHandleCount); EXPECT_CALL(gmockDrmMemoryManager, releaseGpuRange(::testing::_, ::testing::_, ::testing::_)).Times(1); EXPECT_CALL(gmockDrmMemoryManager, alignedFreeWrapper(::testing::_)).Times(1); } gmockDrmMemoryManager.freeGraphicsMemory(allocation); } TEST(DrmMemoryManagerFreeGraphicsMemoryUnreferenceTest, givenDrmMemoryManagerAndFreeGraphicsMemoryIsCalledForSharedAllocationThenUnreferenceBufferObjectIsCalledWithSynchronousDestroySetToFalse) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); const uint32_t rootDeviceIndex = 0u; executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->osInterface = std::make_unique(); executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->osInterface->get()->setDrm(Drm::create(nullptr, *executionEnvironment.rootDeviceEnvironments[rootDeviceIndex])); ::testing::NiceMock gmockDrmMemoryManager(executionEnvironment); osHandle handle = 1u; AllocationProperties properties(rootDeviceIndex, false, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::SHARED_BUFFER, false); auto allocation = gmockDrmMemoryManager.createGraphicsAllocationFromSharedHandle(handle, properties, false); ASSERT_NE(nullptr, allocation); EXPECT_CALL(gmockDrmMemoryManager, unreference(::testing::_, false)).Times(1); EXPECT_CALL(gmockDrmMemoryManager, unreference(::testing::_, true)).Times(EngineLimits::maxHandleCount - 1); gmockDrmMemoryManager.freeGraphicsMemory(allocation); } TEST(DrmMemoryMangerTest, givenMultipleRootDeviceWhenMemoryManagerGetsDrmThenDrmIsFromCorrectRootDevice) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleRootDevices.set(4); VariableBackup backup{&ultHwConfig}; ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; initPlatform(); TestedDrmMemoryManager drmMemoryManager(*platform()->peekExecutionEnvironment()); for (auto i = 0u; i < platform()->peekExecutionEnvironment()->rootDeviceEnvironments.size(); i++) { auto drmFromRootDevice = platform()->peekExecutionEnvironment()->rootDeviceEnvironments[i]->osInterface->get()->getDrm(); EXPECT_EQ(drmFromRootDevice, &drmMemoryManager.getDrm(i)); EXPECT_EQ(i, drmMemoryManager.getRootDeviceIndex(drmFromRootDevice)); } EXPECT_EQ(invalidRootDeviceIndex, drmMemoryManager.getRootDeviceIndex(nullptr)); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.h000066400000000000000000000135401363734646600326740ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/linux/drm_memory_operations_handler.h" #include "shared/source/os_interface/linux/os_interface.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/mocks/linux/mock_drm_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/os_interface/linux/device_command_stream_fixture.h" #include namespace NEO { using AllocationData = TestedDrmMemoryManager::AllocationData; class DrmMemoryManagerBasic : public ::testing::Test { public: DrmMemoryManagerBasic() : executionEnvironment(defaultHwInfo.get()){}; void SetUp() override { executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment.rootDeviceEnvironments[0]->osInterface->get()->setDrm(Drm::create(nullptr, *executionEnvironment.rootDeviceEnvironments[0])); executionEnvironment.rootDeviceEnvironments[0]->memoryOperationsInterface = std::make_unique(); } const uint32_t rootDeviceIndex = 0u; MockExecutionEnvironment executionEnvironment; }; class DrmMemoryManagerFixture : public MemoryManagementFixture { public: DrmMockCustom *mock; DrmMockCustom *nonDefaultDrm = nullptr; const uint32_t nonDefaultRootDeviceIndex = 1u; const uint32_t rootDeviceIndex = 0u; TestedDrmMemoryManager *memoryManager = nullptr; MockClDevice *device = nullptr; void SetUp() override { MemoryManagementFixture::SetUp(); SetUp(new DrmMockCustom, false); } void SetUp(DrmMockCustom *mock, bool localMemoryEnabled) { allocationData.rootDeviceIndex = rootDeviceIndex; this->mock = mock; executionEnvironment = new MockExecutionEnvironment(defaultHwInfo.get(), false, nonDefaultRootDeviceIndex + 1); executionEnvironment->incRefInternal(); rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[0].get(); rootDeviceEnvironment->osInterface = std::make_unique(); rootDeviceEnvironment->osInterface->get()->setDrm(mock); nonDefaultDrm = new DrmMockCustom(); auto nonDefaultRootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[nonDefaultRootDeviceIndex].get(); nonDefaultRootDeviceEnvironment->osInterface = std::make_unique(); nonDefaultRootDeviceEnvironment->osInterface->get()->setDrm(nonDefaultDrm); memoryManager = new (std::nothrow) TestedDrmMemoryManager(localMemoryEnabled, false, false, *executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); //assert we have memory manager ASSERT_NE(nullptr, memoryManager); if (memoryManager->getgemCloseWorker()) { memoryManager->getgemCloseWorker()->close(true); } device = new MockClDevice{MockDevice::create(executionEnvironment, 0)}; mock->reset(); } void TearDown() override { mock->testIoctls(); mock->reset(); mock->ioctl_expected.contextDestroy = static_cast(device->engines.size()); mock->ioctl_expected.gemClose = static_cast(device->engines.size()); mock->ioctl_expected.gemWait = static_cast(device->engines.size()); if (device->getDefaultEngine().commandStreamReceiver->getPreemptionAllocation()) { mock->ioctl_expected.gemClose += static_cast(device->engines.size()); mock->ioctl_expected.gemWait += static_cast(device->engines.size()); } mock->ioctl_expected.gemWait += additionalDestroyDeviceIoctls.gemWait.load(); mock->ioctl_expected.gemClose += additionalDestroyDeviceIoctls.gemClose.load(); delete device; mock->testIoctls(); executionEnvironment->decRefInternal(); MemoryManagementFixture::TearDown(); } protected: ExecutionEnvironment *executionEnvironment; RootDeviceEnvironment *rootDeviceEnvironment = nullptr; DrmMockCustom::IoctlResExt ioctlResExt = {0, 0}; AllocationData allocationData{}; DrmMockCustom::Ioctls additionalDestroyDeviceIoctls{}; }; class DrmMemoryManagerWithLocalMemoryFixture : public DrmMemoryManagerFixture { public: void SetUp() override { MemoryManagementFixture::SetUp(); DrmMemoryManagerFixture::SetUp(new DrmMockCustom, true); } void TearDown() override { DrmMemoryManagerFixture::TearDown(); } }; class DrmMemoryManagerFixtureWithoutQuietIoctlExpectation { public: std::unique_ptr memoryManager; DrmMockCustom *mock; void SetUp() { executionEnvironment = new ExecutionEnvironment; executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); mock = new DrmMockCustom(); executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[0]->osInterface->get()->setDrm(mock); memoryManager.reset(new TestedDrmMemoryManager(*executionEnvironment)); ASSERT_NE(nullptr, memoryManager); if (memoryManager->getgemCloseWorker()) { memoryManager->getgemCloseWorker()->close(true); } device.reset(MockDevice::createWithExecutionEnvironment(defaultHwInfo.get(), executionEnvironment, 0)); } void TearDown() { } protected: ExecutionEnvironment *executionEnvironment = nullptr; std::unique_ptr device; DrmMockCustom::IoctlResExt ioctlResExt = {0, 0}; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/drm_mock.cpp000066400000000000000000000145771363734646600277470ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/os_interface/linux/drm_mock.h" const int DrmMock::mockFd; int DrmMock::ioctl(unsigned long request, void *arg) { ioctlCallsCount++; if ((request == DRM_IOCTL_I915_GETPARAM) && (arg != nullptr)) { auto gp = static_cast(arg); if (gp->param == I915_PARAM_EU_TOTAL) { if (0 == this->StoredRetValForEUVal) { *gp->value = this->StoredEUVal; } return this->StoredRetValForEUVal; } if (gp->param == I915_PARAM_SUBSLICE_TOTAL) { if (0 == this->StoredRetValForSSVal) { *gp->value = this->StoredSSVal; } return this->StoredRetValForSSVal; } if (gp->param == I915_PARAM_CHIPSET_ID) { if (0 == this->StoredRetValForDeviceID) { *gp->value = this->StoredDeviceID; } return this->StoredRetValForDeviceID; } if (gp->param == I915_PARAM_REVISION) { if (0 == this->StoredRetValForDeviceRevID) { *gp->value = this->StoredDeviceRevID; } return this->StoredRetValForDeviceRevID; } if (gp->param == I915_PARAM_HAS_POOLED_EU) { if (0 == this->StoredRetValForPooledEU) { *gp->value = this->StoredHasPooledEU; } return this->StoredRetValForPooledEU; } if (gp->param == I915_PARAM_MIN_EU_IN_POOL) { if (0 == this->StoredRetValForMinEUinPool) { *gp->value = this->StoredMinEUinPool; } return this->StoredRetValForMinEUinPool; } if (gp->param == I915_PARAM_HAS_SCHEDULER) { *gp->value = this->StoredPreemptionSupport; return this->StoredRetVal; } if (gp->param == I915_PARAM_HAS_EXEC_SOFTPIN) { *gp->value = this->StoredExecSoftPin; return this->StoredRetVal; } } if ((request == DRM_IOCTL_I915_GEM_CONTEXT_CREATE) && (arg != nullptr)) { auto create = static_cast(arg); create->ctx_id = this->StoredCtxId; return this->StoredRetVal; } if ((request == DRM_IOCTL_I915_GEM_CONTEXT_DESTROY) && (arg != nullptr)) { auto destroy = static_cast(arg); this->receivedDestroyContextId = destroy->ctx_id; return this->StoredRetVal; } if ((request == DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM) && (arg != nullptr)) { receivedContextParamRequestCount++; receivedContextParamRequest = *static_cast(arg); if (receivedContextParamRequest.param == I915_CONTEXT_PARAM_PRIORITY) { return this->StoredRetVal; } if ((receivedContextParamRequest.param == I915_CONTEXT_PRIVATE_PARAM_BOOST) && (receivedContextParamRequest.value == 1)) { return this->StoredRetVal; } if (receivedContextParamRequest.param == I915_CONTEXT_PARAM_SSEU) { if (StoredRetValForSetSSEU == 0) { storedParamSseu = (*static_cast(reinterpret_cast(receivedContextParamRequest.value))).slice_mask; } return this->StoredRetValForSetSSEU; } if (receivedContextParamRequest.param == I915_CONTEXT_PARAM_PERSISTENCE) { return this->StoredRetValForPersistant; } } if ((request == DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM) && (arg != nullptr)) { receivedContextParamRequestCount++; receivedContextParamRequest = *static_cast(arg); if (receivedContextParamRequest.param == I915_CONTEXT_PARAM_GTT_SIZE) { static_cast(arg)->value = this->storedGTTSize; return this->StoredRetValForGetGttSize; } if (receivedContextParamRequest.param == I915_CONTEXT_PARAM_SSEU) { if (StoredRetValForGetSSEU == 0) { (*static_cast(reinterpret_cast(receivedContextParamRequest.value))).slice_mask = storedParamSseu; } return this->StoredRetValForGetSSEU; } if (receivedContextParamRequest.param == I915_CONTEXT_PARAM_PERSISTENCE) { static_cast(arg)->value = this->StoredPersistentContextsSupport; return this->StoredRetValForPersistant; } } if (request == DRM_IOCTL_I915_GEM_EXECBUFFER2) { auto execbuf = static_cast(arg); this->execBuffer = *execbuf; return 0; } if (request == DRM_IOCTL_I915_GEM_USERPTR) { auto userPtrParams = static_cast(arg); userPtrParams->handle = returnHandle; returnHandle++; return 0; } if (request == DRM_IOCTL_I915_GEM_CREATE) { auto createParams = static_cast(arg); this->createParamsSize = createParams->size; this->createParamsHandle = createParams->handle = 1u; if (0 == this->createParamsSize) { return EINVAL; } return 0; } if (request == DRM_IOCTL_I915_GEM_SET_TILING) { auto setTilingParams = static_cast(arg); setTilingMode = setTilingParams->tiling_mode; setTilingHandle = setTilingParams->handle; setTilingStride = setTilingParams->stride; return 0; } if (request == DRM_IOCTL_PRIME_FD_TO_HANDLE) { auto primeToHandleParams = static_cast(arg); //return BO primeToHandleParams->handle = outputHandle; inputFd = primeToHandleParams->fd; return 0; } if (request == DRM_IOCTL_I915_GEM_GET_APERTURE) { auto aperture = static_cast(arg); aperture->aper_available_size = gpuMemSize; aperture->aper_size = gpuMemSize; return 0; } if (request == DRM_IOCTL_I915_GEM_MMAP) { auto mmap_arg = static_cast(arg); mmap_arg->addr_ptr = reinterpret_cast<__u64>(lockedPtr); return 0; } if (request == DRM_IOCTL_I915_GEM_WAIT) { return 0; } return handleRemainingRequests(request, arg); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/drm_mock.h000066400000000000000000000106551363734646600274050ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/memory_manager/memory_constants.h" #include "shared/source/os_interface/linux/drm_neo.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "drm/i915_drm.h" #include #include #include using namespace NEO; // Mock DRM class that responds to DRM_IOCTL_I915_GETPARAMs class DrmMock : public Drm { public: using Drm::checkQueueSliceSupport; using Drm::engineInfo; using Drm::getQueueSliceCount; using Drm::memoryInfo; using Drm::nonPersistentContextsSupported; using Drm::preemptionSupported; using Drm::query; using Drm::sliceCountChangeSupported; DrmMock(RootDeviceEnvironment &rootDeviceEnvironment) : Drm(std::make_unique(mockFd), rootDeviceEnvironment) { sliceCountChangeSupported = true; } DrmMock() : DrmMock(*platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]) {} ~DrmMock() override { if (sysFsDefaultGpuPathToRestore != nullptr) { sysFsDefaultGpuPath = sysFsDefaultGpuPathToRestore; } } int ioctl(unsigned long request, void *arg) override; void setSysFsDefaultGpuPath(const char *path) { sysFsDefaultGpuPathToRestore = sysFsDefaultGpuPath; sysFsDefaultGpuPath = path; } void writeConfigFile(const char *name, int deviceID) { std::ofstream tempfile(name, std::ios::binary); if (tempfile.is_open()) { PCIConfig config; config.DeviceID = deviceID; tempfile.write(reinterpret_cast(&config), sizeof(config)); tempfile.close(); } } void deleteConfigFile(const char *name) { std::ofstream tempfile(name); if (tempfile.is_open()) { tempfile.close(); remove(name); } } void setFileDescriptor(int fd) { hwDeviceId = std::make_unique(fd); } void setDeviceID(int deviceId) { this->deviceId = deviceId; } void setDeviceRevID(int revisionId) { this->revisionId = revisionId; } inline uint32_t createMemoryRegionId(uint16_t type, uint16_t instance) const { return (1u << (type + 16)) | (1u << instance); } static inline uint16_t getMemoryTypeFromRegion(uint32_t region) { return Math::log2(region >> 16); }; static inline uint16_t getInstanceFromRegion(uint32_t region) { return Math::log2(region & 0xFFFF); }; static const int mockFd = 33; int StoredEUVal = -1; int StoredSSVal = -1; int StoredDeviceID = 1; int StoredDeviceRevID = 1; int StoredHasPooledEU = 1; int StoredMinEUinPool = 1; int StoredPersistentContextsSupport = 1; int StoredRetVal = 0; int StoredRetValForGetGttSize = 0; int StoredRetValForGetSSEU = 0; int StoredRetValForSetSSEU = 0; int StoredRetValForDeviceID = 0; int StoredRetValForEUVal = 0; int StoredRetValForSSVal = 0; int StoredRetValForDeviceRevID = 0; int StoredRetValForPooledEU = 0; int StoredRetValForMinEUinPool = 0; int StoredRetValForPersistant = 0; int StoredPreemptionSupport = I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY | I915_SCHEDULER_CAP_PREEMPTION; int StoredExecSoftPin = 0; uint32_t StoredCtxId = 1; uint32_t receivedDestroyContextId = 0; uint32_t ioctlCallsCount = 0; uint32_t receivedContextParamRequestCount = 0; drm_i915_gem_context_param receivedContextParamRequest = {}; //DRM_IOCTL_I915_GEM_EXECBUFFER2 drm_i915_gem_execbuffer2 execBuffer = {0}; //DRM_IOCTL_I915_GEM_CREATE __u64 createParamsSize = 0; __u32 createParamsHandle = 0; //DRM_IOCTL_I915_GEM_SET_TILING __u32 setTilingMode = 0; __u32 setTilingHandle = 0; __u32 setTilingStride = 0; //DRM_IOCTL_PRIME_FD_TO_HANDLE __u32 outputHandle = 0; __s32 inputFd = 0; //DRM_IOCTL_I915_GEM_USERPTR __u32 returnHandle = 0; __u64 gpuMemSize = 3u * MemoryConstants::gigaByte; //DRM_IOCTL_I915_GEM_MMAP uint64_t lockedPtr[4]; uint64_t storedGTTSize = 1ull << 47; uint64_t storedParamSseu = ULONG_MAX; virtual int handleRemainingRequests(unsigned long request, void *arg) { return -1; } private: const char *sysFsDefaultGpuPathToRestore = nullptr; }; compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/drm_neo_create.cpp000066400000000000000000000021661363734646600311110ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/root_device_environment.h" #include "shared/test/unit_test/helpers/default_hw_info.h" #include "opencl/test/unit_test/os_interface/linux/drm_mock.h" #include "drm/i915_drm.h" namespace NEO { class DrmMockDefault : public DrmMock { public: DrmMockDefault(RootDeviceEnvironment &rootDeviceEnvironment) : DrmMock(rootDeviceEnvironment) { StoredRetVal = 0; StoredRetValForDeviceID = 0; StoredRetValForEUVal = 0; StoredRetValForSSVal = 0; StoredRetValForDeviceRevID = 0; StoredRetValForPooledEU = 0; StoredRetValForMinEUinPool = 0; setGtType(GTTYPE_GT1); } }; Drm **pDrmToReturnFromCreateFunc = nullptr; Drm *Drm::create(std::unique_ptr hwDeviceId, RootDeviceEnvironment &rootDeviceEnvironment) { rootDeviceEnvironment.setHwInfo(defaultHwInfo.get()); if (pDrmToReturnFromCreateFunc) { return *pDrmToReturnFromCreateFunc; } return new DrmMockDefault(rootDeviceEnvironment); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/drm_os_memory_tests.cpp000066400000000000000000000020531363734646600322330ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/os_memory_linux.h" #include "gmock/gmock.h" #include "gtest/gtest.h" using ::testing::_; namespace NEO { class MockOSMemoryLinux : public OSMemoryLinux { public: static std::unique_ptr create() { return std::make_unique(); } MOCK_METHOD6(mmapWrapper, void *(void *, size_t, int, int, int, off_t)); MOCK_METHOD2(munmapWrapper, int(void *, size_t)); }; TEST(OSMemoryLinux, givenOSMemoryLinuxWhenReserveCpuAddressRangeIsCalledThenMinusOneIsPassedToMmapAsFdParam) { auto mockOSMemoryLinux = MockOSMemoryLinux::create(); EXPECT_CALL(*mockOSMemoryLinux, mmapWrapper(_, _, _, _, -1, _)); size_t size = 0x1024; auto reservedCpuAddr = mockOSMemoryLinux->reserveCpuAddressRange(size); EXPECT_CALL(*mockOSMemoryLinux, munmapWrapper(reservedCpuAddr, size)); mockOSMemoryLinux->releaseCpuAddressRange(reservedCpuAddr, size); } }; // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/drm_residency_handler_tests.cpp000066400000000000000000000027321363734646600337100ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_memory_operations_handler.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "test.h" #include using namespace NEO; struct DrmMemoryOperationsHandlerTest : public ::testing::Test { void SetUp() override { drmMemoryOperationsHandler = std::make_unique(); allocationPtr = &graphicsAllocation; } MockGraphicsAllocation graphicsAllocation; GraphicsAllocation *allocationPtr; std::unique_ptr drmMemoryOperationsHandler; }; TEST_F(DrmMemoryOperationsHandlerTest, whenMakingResidentAllocaionExpectMakeResidentFail) { EXPECT_EQ(drmMemoryOperationsHandler->makeResident(ArrayRef(&allocationPtr, 1)), MemoryOperationsStatus::UNSUPPORTED); EXPECT_EQ(drmMemoryOperationsHandler->isResident(graphicsAllocation), MemoryOperationsStatus::UNSUPPORTED); } TEST_F(DrmMemoryOperationsHandlerTest, whenEvictingResidentAllocationExpectEvictFalse) { EXPECT_EQ(drmMemoryOperationsHandler->makeResident(ArrayRef(&allocationPtr, 1)), MemoryOperationsStatus::UNSUPPORTED); EXPECT_EQ(drmMemoryOperationsHandler->evict(graphicsAllocation), MemoryOperationsStatus::UNSUPPORTED); EXPECT_EQ(drmMemoryOperationsHandler->isResident(graphicsAllocation), MemoryOperationsStatus::UNSUPPORTED); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/drm_tests.cpp000066400000000000000000000306231363734646600301460ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/file_io.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/linux/os_context_linux.h" #include "shared/source/os_interface/linux/os_interface.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/os_interface/linux/drm_mock.h" #include "gtest/gtest.h" #include #include using namespace NEO; using namespace std; TEST(DrmTest, GetDeviceID) { DrmMock *pDrm = new DrmMock; EXPECT_NE(nullptr, pDrm); pDrm->StoredDeviceID = 0x1234; int deviceID = 0; int ret = pDrm->getDeviceID(deviceID); EXPECT_EQ(0, ret); EXPECT_EQ(pDrm->StoredDeviceID, deviceID); delete pDrm; } TEST(DrmTest, GivenConfigFileWithWrongDeviceIDWhenFrequencyIsQueriedThenReturnZero) { DrmMock *pDrm = new DrmMock; EXPECT_NE(nullptr, pDrm); pDrm->StoredDeviceID = 0x4321; int maxFrequency = 0; int ret = pDrm->getMaxGpuFrequency(maxFrequency); EXPECT_EQ(0, ret); EXPECT_EQ(0, maxFrequency); delete pDrm; } TEST(DrmTest, GivenConfigFileWithWrongDeviceIDFailIoctlWhenFrequencyIsQueriedThenReturnZero) { DrmMock *pDrm = new DrmMock; EXPECT_NE(nullptr, pDrm); pDrm->StoredDeviceID = 0x4321; pDrm->StoredRetValForDeviceID = -1; int maxFrequency = 0; int ret = pDrm->getMaxGpuFrequency(maxFrequency); EXPECT_EQ(-1, ret); EXPECT_EQ(0, maxFrequency); delete pDrm; } TEST(DrmTest, GivenValidConfigFileWhenFrequencyIsQueriedThenValidValueIsReturned) { int expectedMaxFrequency = 1000; DrmMock *pDrm = new DrmMock; EXPECT_NE(nullptr, pDrm); pDrm->StoredDeviceID = 0x1234; string gpuFile = "test_files/devices/config"; string gtMaxFreqFile = "test_files/devices/drm/card0/gt_max_freq_mhz"; EXPECT_TRUE(fileExists(gpuFile)); EXPECT_TRUE(fileExists(gtMaxFreqFile)); int maxFrequency = 0; int ret = pDrm->getMaxGpuFrequency(maxFrequency); EXPECT_EQ(0, ret); EXPECT_EQ(expectedMaxFrequency, maxFrequency); delete pDrm; } TEST(DrmTest, GivenNoConfigFileWhenFrequencyIsQueriedThenReturnZero) { DrmMock *pDrm = new DrmMock; EXPECT_NE(nullptr, pDrm); pDrm->StoredDeviceID = 0x1234; // change directory pDrm->setSysFsDefaultGpuPath("./"); int maxFrequency = 0; int ret = pDrm->getMaxGpuFrequency(maxFrequency); EXPECT_EQ(0, ret); EXPECT_EQ(0, maxFrequency); delete pDrm; } TEST(DrmTest, GetRevisionID) { DrmMock *pDrm = new DrmMock; EXPECT_NE(nullptr, pDrm); pDrm->StoredDeviceID = 0x1234; pDrm->StoredDeviceRevID = 0xB; int deviceID = 0; int ret = pDrm->getDeviceID(deviceID); EXPECT_EQ(0, ret); int revID = 0; ret = pDrm->getDeviceRevID(revID); EXPECT_EQ(0, ret); EXPECT_EQ(pDrm->StoredDeviceID, deviceID); EXPECT_EQ(pDrm->StoredDeviceRevID, revID); delete pDrm; } TEST(DrmTest, GivenDrmWhenAskedForGttSizeThenReturnCorrectValue) { auto drm = make_unique(); uint64_t queryGttSize = 0; drm->StoredRetValForGetGttSize = 0; drm->storedGTTSize = 1ull << 31; EXPECT_EQ(0, drm->queryGttSize(queryGttSize)); EXPECT_EQ(drm->storedGTTSize, queryGttSize); queryGttSize = 0; drm->StoredRetValForGetGttSize = -1; EXPECT_NE(0, drm->queryGttSize(queryGttSize)); EXPECT_EQ(0u, queryGttSize); } TEST(DrmTest, GivenDrmWhenAskedForPreemptionCorrectValueReturned) { DrmMock *pDrm = new DrmMock; pDrm->StoredRetVal = 0; pDrm->StoredPreemptionSupport = I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY | I915_SCHEDULER_CAP_PREEMPTION; pDrm->checkPreemptionSupport(); EXPECT_TRUE(pDrm->isPreemptionSupported()); pDrm->StoredPreemptionSupport = 0; pDrm->checkPreemptionSupport(); EXPECT_FALSE(pDrm->isPreemptionSupported()); pDrm->StoredRetVal = -1; pDrm->StoredPreemptionSupport = I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY | I915_SCHEDULER_CAP_PREEMPTION; pDrm->checkPreemptionSupport(); EXPECT_FALSE(pDrm->isPreemptionSupported()); pDrm->StoredPreemptionSupport = 0; pDrm->checkPreemptionSupport(); EXPECT_FALSE(pDrm->isPreemptionSupported()); delete pDrm; } TEST(DrmTest, GivenDrmWhenAskedForContextThatFailsThenFalseIsReturned) { DrmMock *pDrm = new DrmMock; pDrm->StoredRetVal = -1; EXPECT_THROW(pDrm->createDrmContext(), std::exception); pDrm->StoredRetVal = 0; delete pDrm; } TEST(DrmTest, givenDrmWhenOsContextIsCreatedThenCreateAndDestroyNewDrmOsContext) { DrmMock drmMock; uint32_t drmContextId1 = 123; uint32_t drmContextId2 = 456; { drmMock.StoredCtxId = drmContextId1; OsContextLinux osContext1(drmMock, 0u, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); EXPECT_EQ(1u, osContext1.getDrmContextIds().size()); EXPECT_EQ(drmContextId1, osContext1.getDrmContextIds()[0]); EXPECT_EQ(0u, drmMock.receivedDestroyContextId); { drmMock.StoredCtxId = drmContextId2; OsContextLinux osContext2(drmMock, 0u, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); EXPECT_EQ(1u, osContext2.getDrmContextIds().size()); EXPECT_EQ(drmContextId2, osContext2.getDrmContextIds()[0]); EXPECT_EQ(0u, drmMock.receivedDestroyContextId); } EXPECT_EQ(drmContextId2, drmMock.receivedDestroyContextId); } EXPECT_EQ(drmContextId1, drmMock.receivedDestroyContextId); EXPECT_EQ(0u, drmMock.receivedContextParamRequestCount); } TEST(DrmTest, givenDrmAndNegativeCheckNonPersistentContextsSupportWhenOsContextIsCreatedThenReceivedContextParamRequestCountReturnsCorrectValue) { DrmMock drmMock; uint32_t drmContextId1 = 123; drmMock.StoredCtxId = drmContextId1; auto expectedCount = 0u; { drmMock.StoredRetValForPersistant = -1; drmMock.checkNonPersistentContextsSupport(); ++expectedCount; OsContextLinux osContext(drmMock, 0u, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); EXPECT_EQ(expectedCount, drmMock.receivedContextParamRequestCount); } { drmMock.StoredRetValForPersistant = 0; drmMock.checkNonPersistentContextsSupport(); ++expectedCount; OsContextLinux osContext(drmMock, 0u, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); ++expectedCount; EXPECT_EQ(expectedCount, drmMock.receivedContextParamRequestCount); } } TEST(DrmTest, givenDrmPreemptionEnabledAndLowPriorityEngineWhenCreatingOsContextThenCallSetContextPriorityIoctl) { DrmMock drmMock; drmMock.StoredCtxId = 123; drmMock.preemptionSupported = false; OsContextLinux osContext1(drmMock, 0u, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); OsContextLinux osContext2(drmMock, 0u, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, true, false, false); EXPECT_EQ(0u, drmMock.receivedContextParamRequestCount); drmMock.preemptionSupported = true; OsContextLinux osContext3(drmMock, 0u, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); EXPECT_EQ(0u, drmMock.receivedContextParamRequestCount); OsContextLinux osContext4(drmMock, 0u, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, true, false, false); EXPECT_EQ(1u, drmMock.receivedContextParamRequestCount); EXPECT_EQ(drmMock.StoredCtxId, drmMock.receivedContextParamRequest.ctx_id); EXPECT_EQ(static_cast(I915_CONTEXT_PARAM_PRIORITY), drmMock.receivedContextParamRequest.param); EXPECT_EQ(static_cast(-1023), drmMock.receivedContextParamRequest.value); EXPECT_EQ(0u, drmMock.receivedContextParamRequest.size); } TEST(DrmTest, getExecSoftPin) { DrmMock *pDrm = new DrmMock; int execSoftPin = 0; int ret = pDrm->getExecSoftPin(execSoftPin); EXPECT_EQ(0, ret); EXPECT_EQ(0, execSoftPin); pDrm->StoredExecSoftPin = 1; ret = pDrm->getExecSoftPin(execSoftPin); EXPECT_EQ(0, ret); EXPECT_EQ(1, execSoftPin); delete pDrm; } TEST(DrmTest, enableTurboBoost) { DrmMock *pDrm = new DrmMock; int ret = pDrm->enableTurboBoost(); EXPECT_EQ(0, ret); delete pDrm; } TEST(DrmTest, getEnabledPooledEu) { DrmMock *pDrm = new DrmMock; int enabled = 0; int ret = 0; pDrm->StoredHasPooledEU = -1; #if defined(I915_PARAM_HAS_POOLED_EU) ret = pDrm->getEnabledPooledEu(enabled); EXPECT_EQ(0, ret); EXPECT_EQ(-1, enabled); pDrm->StoredHasPooledEU = 0; ret = pDrm->getEnabledPooledEu(enabled); EXPECT_EQ(0, ret); EXPECT_EQ(0, enabled); pDrm->StoredHasPooledEU = 1; ret = pDrm->getEnabledPooledEu(enabled); EXPECT_EQ(0, ret); EXPECT_EQ(1, enabled); pDrm->StoredRetValForPooledEU = -1; ret = pDrm->getEnabledPooledEu(enabled); EXPECT_EQ(-1, ret); EXPECT_EQ(1, enabled); #else ret = pDrm->getEnabledPooledEu(enabled); EXPECT_EQ(0, ret); EXPECT_EQ(0, enabled); #endif delete pDrm; } TEST(DrmTest, getMinEuInPool) { DrmMock *pDrm = new DrmMock; pDrm->StoredMinEUinPool = -1; int minEUinPool = 0; int ret = 0; #if defined(I915_PARAM_MIN_EU_IN_POOL) ret = pDrm->getMinEuInPool(minEUinPool); EXPECT_EQ(0, ret); EXPECT_EQ(-1, minEUinPool); pDrm->StoredMinEUinPool = 0; ret = pDrm->getMinEuInPool(minEUinPool); EXPECT_EQ(0, ret); EXPECT_EQ(0, minEUinPool); pDrm->StoredMinEUinPool = 1; ret = pDrm->getMinEuInPool(minEUinPool); EXPECT_EQ(0, ret); EXPECT_EQ(1, minEUinPool); pDrm->StoredRetValForMinEUinPool = -1; ret = pDrm->getMinEuInPool(minEUinPool); EXPECT_EQ(-1, ret); EXPECT_EQ(1, minEUinPool); #else ret = pDrm->getMinEuInPool(minEUinPool); EXPECT_EQ(0, ret); EXPECT_EQ(0, minEUinPool); #endif delete pDrm; } TEST(DrmTest, givenDrmWhenGetErrnoIsCalledThenErrnoValueIsReturned) { DrmMock *pDrm = new DrmMock; EXPECT_NE(nullptr, pDrm); auto errnoFromDrm = pDrm->getErrno(); EXPECT_EQ(errno, errnoFromDrm); delete pDrm; } TEST(DrmTest, givenPlatformWhereGetSseuRetFailureWhenCallSetQueueSliceCountThenSliceCountIsNotSet) { uint64_t newSliceCount = 1; std::unique_ptr drm = std::make_unique(); drm->StoredRetValForGetSSEU = -1; drm->checkQueueSliceSupport(); EXPECT_FALSE(drm->sliceCountChangeSupported); EXPECT_FALSE(drm->setQueueSliceCount(newSliceCount)); EXPECT_NE(drm->getSliceMask(newSliceCount), drm->storedParamSseu); } TEST(DrmTest, whenCheckNonPeristentSupportIsCalledThenAreNonPersistentContextsSupportedReturnsCorrectValues) { std::unique_ptr drm = std::make_unique(); drm->StoredRetValForPersistant = -1; drm->checkNonPersistentContextsSupport(); EXPECT_FALSE(drm->areNonPersistentContextsSupported()); drm->StoredRetValForPersistant = 0; drm->checkNonPersistentContextsSupport(); EXPECT_TRUE(drm->areNonPersistentContextsSupported()); } TEST(DrmTest, givenPlatformWhereSetSseuRetFailureWhenCallSetQueueSliceCountThenReturnFalse) { uint64_t newSliceCount = 1; std::unique_ptr drm = std::make_unique(); drm->StoredRetValForSetSSEU = -1; drm->StoredRetValForGetSSEU = 0; drm->checkQueueSliceSupport(); EXPECT_TRUE(drm->sliceCountChangeSupported); EXPECT_FALSE(drm->setQueueSliceCount(newSliceCount)); } TEST(DrmTest, givenPlatformWithSupportToChangeSliceCountWhenCallSetQueueSliceCountThenReturnTrue) { uint64_t newSliceCount = 1; std::unique_ptr drm = std::make_unique(); drm->StoredRetValForSetSSEU = 0; drm->StoredRetValForSetSSEU = 0; drm->checkQueueSliceSupport(); EXPECT_TRUE(drm->sliceCountChangeSupported); EXPECT_TRUE(drm->setQueueSliceCount(newSliceCount)); drm_i915_gem_context_param_sseu sseu = {}; EXPECT_EQ(0, drm->getQueueSliceCount(&sseu)); EXPECT_EQ(drm->getSliceMask(newSliceCount), sseu.slice_mask); } namespace NEO { namespace SysCalls { extern uint32_t closeFuncCalled; extern int closeFuncArgPassed; } // namespace SysCalls } // namespace NEO TEST(HwDeviceId, whenHwDeviceIdIsDestroyedThenFileDescriptorIsClosed) { SysCalls::closeFuncCalled = 0; int fileDescriptor = 0x1234; { HwDeviceId hwDeviceId(fileDescriptor); } EXPECT_EQ(1u, SysCalls::closeFuncCalled); EXPECT_EQ(fileDescriptor, SysCalls::closeFuncArgPassed); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/file_logger_linux_tests.cpp000066400000000000000000000076661363734646600330740ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "opencl/test/unit_test/mocks/linux/mock_drm_allocation.h" #include "opencl/test/unit_test/utilities/file_logger_tests.h" #include "test.h" using namespace NEO; TEST(FileLogger, GivenLogAllocationMemoryPoolFlagThenLogsCorrectInfo) { std::string testFile = "testfile"; DebugVariables flags; flags.LogAllocationMemoryPool.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); // Log file not created bool logFileCreated = fileExists(fileLogger.getLogFileName()); EXPECT_FALSE(logFileCreated); MockDrmAllocation allocation(GraphicsAllocation::AllocationType::BUFFER, MemoryPool::System64KBPages); MockBufferObject bo; bo.handle = 4; allocation.bufferObjects[0] = &bo; fileLogger.logAllocation(&allocation); std::thread::id thisThread = std::this_thread::get_id(); std::stringstream threadIDCheck; threadIDCheck << " ThreadID: " << thisThread; std::stringstream memoryPoolCheck; memoryPoolCheck << " MemoryPool: " << allocation.getMemoryPool(); if (fileLogger.wasFileCreated(fileLogger.getLogFileName())) { auto str = fileLogger.getFileString(fileLogger.getLogFileName()); EXPECT_TRUE(str.find(threadIDCheck.str()) != std::string::npos); EXPECT_TRUE(str.find(memoryPoolCheck.str()) != std::string::npos); EXPECT_TRUE(str.find("AllocationType: BUFFER") != std::string::npos); EXPECT_TRUE(str.find("Handle: 4") != std::string::npos); } } TEST(FileLogger, GivenDrmAllocationWithoutBOThenNoHandleLogged) { std::string testFile = "testfile"; DebugVariables flags; flags.LogAllocationMemoryPool.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); // Log file not created bool logFileCreated = fileExists(fileLogger.getLogFileName()); EXPECT_FALSE(logFileCreated); MockDrmAllocation allocation(GraphicsAllocation::AllocationType::BUFFER, MemoryPool::System64KBPages); fileLogger.logAllocation(&allocation); std::thread::id thisThread = std::this_thread::get_id(); std::stringstream threadIDCheck; threadIDCheck << " ThreadID: " << thisThread; std::stringstream memoryPoolCheck; memoryPoolCheck << " MemoryPool: " << allocation.getMemoryPool(); if (fileLogger.wasFileCreated(fileLogger.getLogFileName())) { auto str = fileLogger.getFileString(fileLogger.getLogFileName()); EXPECT_TRUE(str.find(threadIDCheck.str()) != std::string::npos); EXPECT_TRUE(str.find(memoryPoolCheck.str()) != std::string::npos); EXPECT_TRUE(str.find("AllocationType: BUFFER") != std::string::npos); EXPECT_FALSE(str.find("Handle: 4") != std::string::npos); } } TEST(FileLogger, GivenLogAllocationMemoryPoolFlagSetFalseThenAllocationIsNotLogged) { std::string testFile = "testfile"; DebugVariables flags; flags.LogAllocationMemoryPool.set(false); FullyEnabledFileLogger fileLogger(testFile, flags); // Log file not created bool logFileCreated = fileExists(fileLogger.getLogFileName()); EXPECT_FALSE(logFileCreated); MockDrmAllocation allocation(GraphicsAllocation::AllocationType::BUFFER, MemoryPool::System64KBPages); fileLogger.logAllocation(&allocation); std::thread::id thisThread = std::this_thread::get_id(); std::stringstream threadIDCheck; threadIDCheck << " ThreadID: " << thisThread; std::stringstream memoryPoolCheck; memoryPoolCheck << " MemoryPool: " << allocation.getMemoryPool(); if (fileLogger.wasFileCreated(fileLogger.getLogFileName())) { auto str = fileLogger.getFileString(fileLogger.getLogFileName()); EXPECT_FALSE(str.find(threadIDCheck.str()) != std::string::npos); EXPECT_FALSE(str.find(memoryPoolCheck.str()) != std::string::npos); EXPECT_FALSE(str.find("AllocationType: BUFFER") != std::string::npos); } } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.cpp000066400000000000000000000450231363734646600335610ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/os_interface/linux/os_interface.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/helpers/default_hw_info.h" #include namespace NEO { constexpr uint32_t hwConfigTestMidThreadBit = 1 << 8; constexpr uint32_t hwConfigTestThreadGroupBit = 1 << 9; constexpr uint32_t hwConfigTestMidBatchBit = 1 << 10; template <> int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) { FeatureTable *featureTable = &hwInfo->featureTable; featureTable->ftrGpGpuMidThreadLevelPreempt = 0; featureTable->ftrGpGpuThreadGroupLevelPreempt = 0; featureTable->ftrGpGpuMidBatchPreempt = 0; if (hwInfo->platform.usDeviceID == 30) { GT_SYSTEM_INFO *gtSystemInfo = &hwInfo->gtSystemInfo; gtSystemInfo->EdramSizeInKb = 128 * 1000; } if (hwInfo->platform.usDeviceID & hwConfigTestMidThreadBit) { featureTable->ftrGpGpuMidThreadLevelPreempt = 1; } if (hwInfo->platform.usDeviceID & hwConfigTestThreadGroupBit) { featureTable->ftrGpGpuThreadGroupLevelPreempt = 1; } if (hwInfo->platform.usDeviceID & hwConfigTestMidBatchBit) { featureTable->ftrGpGpuMidBatchPreempt = 1; } return (hwInfo->platform.usDeviceID == 10) ? -1 : 0; } template <> void HwInfoConfigHw::adjustPlatformForProductFamily(HardwareInfo *hwInfo) { } template <> cl_unified_shared_memory_capabilities_intel HwInfoConfigHw::getHostMemCapabilities() { return 0; } template <> cl_unified_shared_memory_capabilities_intel HwInfoConfigHw::getDeviceMemCapabilities() { return 0; } template <> cl_unified_shared_memory_capabilities_intel HwInfoConfigHw::getSingleDeviceSharedMemCapabilities() { return 0; } template <> cl_unified_shared_memory_capabilities_intel HwInfoConfigHw::getCrossDeviceSharedMemCapabilities() { return 0; } template <> cl_unified_shared_memory_capabilities_intel HwInfoConfigHw::getSharedSystemMemCapabilities() { return 0; } } // namespace NEO struct DummyHwConfig : HwInfoConfigHw { }; using namespace NEO; using namespace std; void mockCpuidex(int *cpuInfo, int functionId, int subfunctionId); void HwInfoConfigTestLinux::SetUp() { HwInfoConfigTest::SetUp(); osInterface = new OSInterface(); drm = new DrmMock(); osInterface->get()->setDrm(static_cast(drm)); drm->StoredDeviceID = pInHwInfo.platform.usDeviceID; drm->StoredDeviceRevID = 0; drm->StoredEUVal = pInHwInfo.gtSystemInfo.EUCount; drm->StoredSSVal = pInHwInfo.gtSystemInfo.SubSliceCount; rt_cpuidex_func = CpuInfo::cpuidexFunc; CpuInfo::cpuidexFunc = mockCpuidex; } void HwInfoConfigTestLinux::TearDown() { CpuInfo::cpuidexFunc = rt_cpuidex_func; delete osInterface; HwInfoConfigTest::TearDown(); } void mockCpuidex(int *cpuInfo, int functionId, int subfunctionId) { if (subfunctionId == 0) { cpuInfo[0] = 0x7F; } if (subfunctionId == 1) { cpuInfo[0] = 0x1F; } if (subfunctionId == 2) { cpuInfo[0] = 0; } } struct HwInfoConfigTestLinuxDummy : HwInfoConfigTestLinux { void SetUp() override { HwInfoConfigTestLinux::SetUp(); drm->StoredDeviceID = 1; drm->setGtType(GTTYPE_GT0); testPlatform->eRenderCoreFamily = defaultHwInfo->platform.eRenderCoreFamily; } void TearDown() override { HwInfoConfigTestLinux::TearDown(); } DummyHwConfig hwConfig; }; TEST_F(HwInfoConfigTestLinuxDummy, dummyConfig) { int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); } GTTYPE GtTypes[] = { GTTYPE_GT1, GTTYPE_GT2, GTTYPE_GT1_5, GTTYPE_GT2_5, GTTYPE_GT3, GTTYPE_GT4, GTTYPE_GTA, GTTYPE_GTC, GTTYPE_GTX}; TEST_F(HwInfoConfigTestLinuxDummy, dummyConfigGtTypes) { int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(GTTYPE_GT0, outHwInfo.platform.eGTType); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT1_5); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT2); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT2_5); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT3); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGT4); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTA); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTC); EXPECT_EQ(0u, outHwInfo.featureTable.ftrGTX); size_t arrSize = sizeof(GtTypes) / sizeof(GTTYPE); uint32_t FtrSum = 0; for (uint32_t i = 0; i < arrSize; i++) { drm->setGtType(GtTypes[i]); ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(GtTypes[i], outHwInfo.platform.eGTType); bool FtrPresent = (outHwInfo.featureTable.ftrGT1 || outHwInfo.featureTable.ftrGT1_5 || outHwInfo.featureTable.ftrGT2 || outHwInfo.featureTable.ftrGT2_5 || outHwInfo.featureTable.ftrGT3 || outHwInfo.featureTable.ftrGT4 || outHwInfo.featureTable.ftrGTA || outHwInfo.featureTable.ftrGTC || outHwInfo.featureTable.ftrGTX); EXPECT_TRUE(FtrPresent); FtrSum += (outHwInfo.featureTable.ftrGT1 + outHwInfo.featureTable.ftrGT1_5 + outHwInfo.featureTable.ftrGT2 + outHwInfo.featureTable.ftrGT2_5 + outHwInfo.featureTable.ftrGT3 + outHwInfo.featureTable.ftrGT4 + outHwInfo.featureTable.ftrGTA + outHwInfo.featureTable.ftrGTC + outHwInfo.featureTable.ftrGTX); } EXPECT_EQ(arrSize, FtrSum); } TEST_F(HwInfoConfigTestLinuxDummy, dummyConfigEdramDetection) { drm->StoredDeviceID = 30; int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(1u, outHwInfo.featureTable.ftrEDram); } TEST_F(HwInfoConfigTestLinuxDummy, givenEnabledPlatformCoherencyWhenConfiguringHwInfoThenIgnoreAndSetAsDisabled) { drm->StoredDeviceID = 21; int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_FALSE(outHwInfo.capabilityTable.ftrSupportsCoherency); } TEST_F(HwInfoConfigTestLinuxDummy, givenDisabledPlatformCoherencyWhenConfiguringHwInfoThenSetValidCapability) { drm->StoredDeviceID = 20; int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_FALSE(outHwInfo.capabilityTable.ftrSupportsCoherency); } TEST_F(HwInfoConfigTestLinuxDummy, dummyNegativeUnknownGtType) { drm->setGtType(GTTYPE_UNDEFINED); int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } TEST_F(HwInfoConfigTestLinuxDummy, dummyNegativeUnknownDevId) { drm->StoredDeviceID = 0; int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } TEST_F(HwInfoConfigTestLinuxDummy, dummyNegativeFailGetDevId) { drm->StoredRetValForDeviceID = -2; int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-2, ret); } TEST_F(HwInfoConfigTestLinuxDummy, dummydummyNegativeFailGetDevRevId) { drm->StoredRetValForDeviceRevID = -3; int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-3, ret); } TEST_F(HwInfoConfigTestLinuxDummy, dummydummyNegativeFailGetEuCount) { drm->StoredRetValForEUVal = -4; int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-4, ret); } TEST_F(HwInfoConfigTestLinuxDummy, dummydummyNegativeFailGetSsCount) { drm->StoredRetValForSSVal = -5; int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-5, ret); } TEST_F(HwInfoConfigTestLinuxDummy, dummyNegativeFailingConfigureCustom) { drm->StoredDeviceID = 10; int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } TEST_F(HwInfoConfigTestLinuxDummy, dummyNegativeUnknownDeviceId) { drm->StoredDeviceID = 0; drm->setGtType(GTTYPE_GT1); auto hwConfig = DummyHwConfig{}; int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(-1, ret); } TEST_F(HwInfoConfigTestLinuxDummy, whenConfigureHwInfoIsCalledThenAreNonPersistentContextsSupportedReturnsTrue) { int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_TRUE(drm->areNonPersistentContextsSupported()); } TEST_F(HwInfoConfigTestLinuxDummy, whenConfigureHwInfoIsCalledAndPersitentContextIsUnsupportedThenAreNonPersistentContextsSupportedReturnsFalse) { drm->StoredPersistentContextsSupport = 0; int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_FALSE(drm->areNonPersistentContextsSupported()); } TEST_F(HwInfoConfigTestLinuxDummy, dummyConfigPreemptionDrmEnabledMidThreadOn) { pInHwInfo.capabilityTable.defaultPreemptionMode = PreemptionMode::MidThread; drm->StoredPreemptionSupport = I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY | I915_SCHEDULER_CAP_PREEMPTION; drm->StoredDeviceID = hwConfigTestMidThreadBit; int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(PreemptionMode::MidThread, outHwInfo.capabilityTable.defaultPreemptionMode); EXPECT_TRUE(drm->isPreemptionSupported()); } TEST_F(HwInfoConfigTestLinuxDummy, dummyConfigPreemptionDrmEnabledThreadGroupOn) { pInHwInfo.capabilityTable.defaultPreemptionMode = PreemptionMode::MidThread; drm->StoredPreemptionSupport = I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY | I915_SCHEDULER_CAP_PREEMPTION; drm->StoredDeviceID = hwConfigTestThreadGroupBit; int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(PreemptionMode::ThreadGroup, outHwInfo.capabilityTable.defaultPreemptionMode); EXPECT_TRUE(drm->isPreemptionSupported()); } TEST_F(HwInfoConfigTestLinuxDummy, givenDebugFlagSetWhenConfiguringHwInfoThenPrintGetParamIoctlsOutput) { DebugManagerStateRestore restore; DebugManager.flags.PrintDebugMessages.set(true); testing::internal::CaptureStdout(); // start capturing int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); std::string euCount = std::to_string(outHwInfo.gtSystemInfo.EUCount); std::string subSliceCount = std::to_string(outHwInfo.gtSystemInfo.SubSliceCount); std::array expectedStrings = {{"DRM_IOCTL_I915_GETPARAM: param: I915_PARAM_CHIPSET_ID, output value: 1, retCode: 0", "DRM_IOCTL_I915_GETPARAM: param: I915_PARAM_REVISION, output value: 0, retCode: 0", "DRM_IOCTL_I915_GETPARAM: param: I915_PARAM_EU_TOTAL, output value: " + euCount + ", retCode: 0", "DRM_IOCTL_I915_GETPARAM: param: I915_PARAM_SUBSLICE_TOTAL, output value: " + subSliceCount + ", retCode: 0", "DRM_IOCTL_I915_GETPARAM: param: I915_PARAM_CHIPSET_ID, output value: 1, retCode: 0", "DRM_IOCTL_I915_GETPARAM: param: I915_PARAM_HAS_SCHEDULER, output value: 7, retCode: 0" }}; std::string output = testing::internal::GetCapturedStdout(); // stop capturing for (const auto &expectedString : expectedStrings) { EXPECT_NE(std::string::npos, output.find(expectedString)); } EXPECT_EQ(std::string::npos, output.find("UNKNOWN")); } TEST_F(HwInfoConfigTestLinuxDummy, dummyConfigPreemptionDrmEnabledMidBatchOn) { pInHwInfo.capabilityTable.defaultPreemptionMode = PreemptionMode::MidThread; drm->StoredPreemptionSupport = I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY | I915_SCHEDULER_CAP_PREEMPTION; drm->StoredDeviceID = hwConfigTestMidBatchBit; int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(PreemptionMode::MidBatch, outHwInfo.capabilityTable.defaultPreemptionMode); EXPECT_TRUE(drm->isPreemptionSupported()); } TEST_F(HwInfoConfigTestLinuxDummy, dummyConfigPreemptionDrmEnabledNoPreemption) { pInHwInfo.capabilityTable.defaultPreemptionMode = PreemptionMode::MidThread; drm->StoredPreemptionSupport = I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY | I915_SCHEDULER_CAP_PREEMPTION; drm->StoredDeviceID = 1; int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(PreemptionMode::Disabled, outHwInfo.capabilityTable.defaultPreemptionMode); EXPECT_TRUE(drm->isPreemptionSupported()); } TEST_F(HwInfoConfigTestLinuxDummy, dummyConfigPreemptionDrmDisabledAllPreemption) { pInHwInfo.capabilityTable.defaultPreemptionMode = PreemptionMode::MidThread; drm->StoredPreemptionSupport = 0; drm->StoredDeviceID = hwConfigTestMidThreadBit | hwConfigTestThreadGroupBit | hwConfigTestMidBatchBit; int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(PreemptionMode::Disabled, outHwInfo.capabilityTable.defaultPreemptionMode); EXPECT_FALSE(drm->isPreemptionSupported()); } TEST_F(HwInfoConfigTestLinuxDummy, dummyConfigPreemptionDrmEnabledAllPreemptionDriverThreadGroup) { pInHwInfo.capabilityTable.defaultPreemptionMode = PreemptionMode::ThreadGroup; drm->StoredPreemptionSupport = I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY | I915_SCHEDULER_CAP_PREEMPTION; drm->StoredDeviceID = hwConfigTestMidThreadBit | hwConfigTestThreadGroupBit | hwConfigTestMidBatchBit; int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(PreemptionMode::ThreadGroup, outHwInfo.capabilityTable.defaultPreemptionMode); EXPECT_TRUE(drm->isPreemptionSupported()); } TEST_F(HwInfoConfigTestLinuxDummy, dummyConfigPreemptionDrmEnabledAllPreemptionDriverMidBatch) { pInHwInfo.capabilityTable.defaultPreemptionMode = PreemptionMode::MidBatch; drm->StoredPreemptionSupport = I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY | I915_SCHEDULER_CAP_PREEMPTION; drm->StoredDeviceID = hwConfigTestMidThreadBit | hwConfigTestThreadGroupBit | hwConfigTestMidBatchBit; int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(PreemptionMode::MidBatch, outHwInfo.capabilityTable.defaultPreemptionMode); EXPECT_TRUE(drm->isPreemptionSupported()); } TEST_F(HwInfoConfigTestLinuxDummy, dummyConfigPreemptionDrmEnabledAllPreemptionDriverDisabled) { pInHwInfo.capabilityTable.defaultPreemptionMode = PreemptionMode::Disabled; drm->StoredPreemptionSupport = I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY | I915_SCHEDULER_CAP_PREEMPTION; drm->StoredDeviceID = hwConfigTestMidThreadBit | hwConfigTestThreadGroupBit | hwConfigTestMidBatchBit; int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(PreemptionMode::Disabled, outHwInfo.capabilityTable.defaultPreemptionMode); EXPECT_TRUE(drm->isPreemptionSupported()); } TEST_F(HwInfoConfigTestLinuxDummy, givenPlatformEnabledFtrCompressionWhenInitializingThenForceDisable) { pInHwInfo.capabilityTable.ftrRenderCompressedBuffers = true; pInHwInfo.capabilityTable.ftrRenderCompressedImages = true; int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_FALSE(outHwInfo.capabilityTable.ftrRenderCompressedBuffers); EXPECT_FALSE(outHwInfo.capabilityTable.ftrRenderCompressedImages); } TEST_F(HwInfoConfigTestLinuxDummy, givenPointerToHwInfoWhenConfigureHwInfoCalledThenRequiedSurfaceSizeIsSettedProperly) { EXPECT_EQ(MemoryConstants::pageSize, pInHwInfo.capabilityTable.requiredPreemptionSurfaceSize); int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(outHwInfo.gtSystemInfo.CsrSizeInMb * MemoryConstants::megaByte, outHwInfo.capabilityTable.requiredPreemptionSurfaceSize); } TEST_F(HwInfoConfigTestLinuxDummy, givenInstrumentationForHardwareIsEnabledOrDisabledWhenConfiguringHwInfoThenOverrideItUsingHaveInstrumentation) { int ret; pInHwInfo.capabilityTable.instrumentationEnabled = false; ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); ASSERT_EQ(0, ret); EXPECT_FALSE(outHwInfo.capabilityTable.instrumentationEnabled); pInHwInfo.capabilityTable.instrumentationEnabled = true; ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); ASSERT_EQ(0, ret); EXPECT_TRUE(outHwInfo.capabilityTable.instrumentationEnabled == haveInstrumentation); } TEST_F(HwInfoConfigTestLinuxDummy, givenGttSizeReturnedWhenInitializingHwInfoThenSetSvmFtr) { drm->storedGTTSize = MemoryConstants::max64BitAppAddress; int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_FALSE(outHwInfo.capabilityTable.ftrSvm); drm->storedGTTSize = MemoryConstants::max64BitAppAddress + 1; ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_TRUE(outHwInfo.capabilityTable.ftrSvm); } TEST_F(HwInfoConfigTestLinuxDummy, givenGttSizeReturnedWhenInitializingHwInfoThenSetGpuAddressSpace) { drm->storedGTTSize = maxNBitValue(40) + 1; int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_EQ(drm->storedGTTSize - 1, outHwInfo.capabilityTable.gpuAddressSpace); } TEST_F(HwInfoConfigTestLinuxDummy, givenFailingGttSizeIoctlWhenInitializingHwInfoThenSetDefaultValues) { drm->StoredRetValForGetGttSize = -1; int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface); EXPECT_EQ(0, ret); EXPECT_TRUE(outHwInfo.capabilityTable.ftrSvm); EXPECT_NE(0u, outHwInfo.capabilityTable.gpuAddressSpace); EXPECT_EQ(pInHwInfo.capabilityTable.gpuAddressSpace, outHwInfo.capabilityTable.gpuAddressSpace); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.h000066400000000000000000000012121363734646600332160ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/utilities/cpu_info.h" #include "opencl/test/unit_test/os_interface/hw_info_config_tests.h" #include "opencl/test/unit_test/os_interface/linux/drm_mock.h" using namespace NEO; using namespace std; struct HwInfoConfigTestLinux : public HwInfoConfigTest { void SetUp() override; void TearDown() override; OSInterface *osInterface; DrmMock *drm; void (*rt_cpuidex_func)(int *, int, int); }; linux_create_command_queue_with_properties_tests.cpp000066400000000000000000000215721363734646600402030ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/os_interface.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/mocks/linux/mock_drm_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/linux/mock_drm_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/os_interface/linux/drm_mock.h" #include "test.h" using namespace NEO; struct clCreateCommandQueueWithPropertiesLinux : public UltCommandStreamReceiverTest { void SetUp() override { UltCommandStreamReceiverTest::SetUp(); ExecutionEnvironment *executionEnvironment = new MockExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1); auto osInterface = new OSInterface(); osInterface->get()->setDrm(drm); executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(osInterface); executionEnvironment->memoryManager.reset(new TestedDrmMemoryManager(*executionEnvironment)); mdevice = std::make_unique(MockDevice::create(executionEnvironment, 0u)); clDevice = mdevice.get(); retVal = CL_SUCCESS; context = std::unique_ptr(Context::create(nullptr, ClDeviceVector(&clDevice, 1), nullptr, nullptr, retVal)); } void TearDown() override { UltCommandStreamReceiverTest::TearDown(); } DrmMock *drm = new DrmMock(); std::unique_ptr mdevice = nullptr; std::unique_ptr context; cl_device_id clDevice; cl_int retVal; }; namespace ULT { TEST_F(clCreateCommandQueueWithPropertiesLinux, givenUnPossiblePropertiesWithClQueueSliceCountWhenCreateCommandQueueThenQueueNotCreated) { uint64_t newSliceCount = 1; size_t maxSliceCount; clGetDeviceInfo(clDevice, CL_DEVICE_SLICE_COUNT_INTEL, sizeof(size_t), &maxSliceCount, nullptr); newSliceCount = maxSliceCount + 1; cl_queue_properties properties[] = {CL_QUEUE_SLICE_COUNT_INTEL, newSliceCount, 0}; cl_command_queue cmdQ = clCreateCommandQueueWithProperties(context.get(), clDevice, properties, &retVal); EXPECT_EQ(nullptr, cmdQ); EXPECT_EQ(CL_INVALID_QUEUE_PROPERTIES, retVal); } TEST_F(clCreateCommandQueueWithPropertiesLinux, givenZeroWithClQueueSliceCountWhenCreateCommandQueueThenSliceCountEqualDefaultSliceCount) { uint64_t newSliceCount = 0; cl_queue_properties properties[] = {CL_QUEUE_SLICE_COUNT_INTEL, newSliceCount, 0}; cl_command_queue cmdQ = clCreateCommandQueueWithProperties(context.get(), clDevice, properties, &retVal); ASSERT_NE(nullptr, cmdQ); ASSERT_EQ(CL_SUCCESS, retVal); auto commandQueue = castToObject(cmdQ); EXPECT_EQ(commandQueue->getSliceCount(), QueueSliceCount::defaultSliceCount); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(clCreateCommandQueueWithPropertiesLinux, givenPossiblePropertiesWithClQueueSliceCountWhenCreateCommandQueueThenSliceCountIsSet) { uint64_t newSliceCount = 1; size_t maxSliceCount; clGetDeviceInfo(clDevice, CL_DEVICE_SLICE_COUNT_INTEL, sizeof(size_t), &maxSliceCount, nullptr); if (maxSliceCount > 1) { newSliceCount = maxSliceCount - 1; } cl_queue_properties properties[] = {CL_QUEUE_SLICE_COUNT_INTEL, newSliceCount, 0}; cl_command_queue cmdQ = clCreateCommandQueueWithProperties(context.get(), clDevice, properties, &retVal); ASSERT_NE(nullptr, cmdQ); ASSERT_EQ(CL_SUCCESS, retVal); auto commandQueue = castToObject(cmdQ); EXPECT_EQ(commandQueue->getSliceCount(), newSliceCount); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(clCreateCommandQueueWithPropertiesLinux, givenPropertiesWithClQueueSliceCountWhenCreateCommandQueueThenCallFlushTaskAndSliceCountIsSet) { uint64_t newSliceCount = 1; size_t maxSliceCount; clGetDeviceInfo(clDevice, CL_DEVICE_SLICE_COUNT_INTEL, sizeof(size_t), &maxSliceCount, nullptr); if (maxSliceCount > 1) { newSliceCount = maxSliceCount - 1; } cl_queue_properties properties[] = {CL_QUEUE_SLICE_COUNT_INTEL, newSliceCount, 0}; auto mockCsr = new TestedDrmCommandStreamReceiver(*mdevice->executionEnvironment); mdevice->resetCommandStreamReceiver(mockCsr); cl_command_queue cmdQ = clCreateCommandQueueWithProperties(context.get(), clDevice, properties, &retVal); ASSERT_NE(nullptr, cmdQ); ASSERT_EQ(CL_SUCCESS, retVal); auto commandQueue = castToObject>(cmdQ); auto &commandStream = commandQueue->getCS(1024u); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.sliceCount = commandQueue->getSliceCount(); mockCsr->flushTask(commandStream, 0u, dsh, ioh, ssh, taskLevel, dispatchFlags, mdevice->getDevice()); auto expectedSliceMask = drm->getSliceMask(newSliceCount); EXPECT_EQ(expectedSliceMask, drm->storedParamSseu); drm_i915_gem_context_param_sseu sseu = {}; EXPECT_EQ(0, drm->getQueueSliceCount(&sseu)); EXPECT_EQ(expectedSliceMask, sseu.slice_mask); EXPECT_EQ(newSliceCount, mockCsr->lastSentSliceCount); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(clCreateCommandQueueWithPropertiesLinux, givenSameSliceCountAsRecentlySetWhenCreateCommandQueueThenSetQueueSliceCountNotCalled) { uint64_t newSliceCount = 1; size_t maxSliceCount; clGetDeviceInfo(clDevice, CL_DEVICE_SLICE_COUNT_INTEL, sizeof(size_t), &maxSliceCount, nullptr); if (maxSliceCount > 1) { newSliceCount = maxSliceCount - 1; } cl_queue_properties properties[] = {CL_QUEUE_SLICE_COUNT_INTEL, newSliceCount, 0}; auto mockCsr = new TestedDrmCommandStreamReceiver(*mdevice->executionEnvironment); mdevice->resetCommandStreamReceiver(mockCsr); cl_command_queue cmdQ = clCreateCommandQueueWithProperties(context.get(), clDevice, properties, &retVal); ASSERT_NE(nullptr, cmdQ); ASSERT_EQ(CL_SUCCESS, retVal); auto commandQueue = castToObject>(cmdQ); auto &commandStream = commandQueue->getCS(1024u); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.sliceCount = commandQueue->getSliceCount(); mockCsr->lastSentSliceCount = newSliceCount; mockCsr->flushTask(commandStream, 0u, dsh, ioh, ssh, taskLevel, dispatchFlags, mdevice->getDevice()); auto expectedSliceMask = drm->getSliceMask(newSliceCount); EXPECT_NE(expectedSliceMask, drm->storedParamSseu); drm_i915_gem_context_param_sseu sseu = {}; EXPECT_EQ(0, drm->getQueueSliceCount(&sseu)); EXPECT_NE(expectedSliceMask, sseu.slice_mask); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(clCreateCommandQueueWithPropertiesLinux, givenPropertiesWithClQueueSliceCountWhenCreateCommandQueueThenSetReturnFalseAndLastSliceCountNotModify) { uint64_t newSliceCount = 1; size_t maxSliceCount; clGetDeviceInfo(clDevice, CL_DEVICE_SLICE_COUNT_INTEL, sizeof(size_t), &maxSliceCount, nullptr); if (maxSliceCount > 1) { newSliceCount = maxSliceCount - 1; } cl_queue_properties properties[] = {CL_QUEUE_SLICE_COUNT_INTEL, newSliceCount, 0}; auto mockCsr = new TestedDrmCommandStreamReceiver(*mdevice->executionEnvironment); mdevice->resetCommandStreamReceiver(mockCsr); cl_command_queue cmdQ = clCreateCommandQueueWithProperties(context.get(), clDevice, properties, &retVal); ASSERT_NE(nullptr, cmdQ); ASSERT_EQ(CL_SUCCESS, retVal); auto commandQueue = castToObject>(cmdQ); auto &commandStream = commandQueue->getCS(1024u); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.sliceCount = commandQueue->getSliceCount(); drm->StoredRetValForSetSSEU = -1; auto lastSliceCountBeforeFlushTask = mockCsr->lastSentSliceCount; mockCsr->flushTask(commandStream, 0u, dsh, ioh, ssh, taskLevel, dispatchFlags, mdevice->getDevice()); EXPECT_NE(newSliceCount, mockCsr->lastSentSliceCount); EXPECT_EQ(lastSliceCountBeforeFlushTask, mockCsr->lastSentSliceCount); retVal = clReleaseCommandQueue(cmdQ); EXPECT_EQ(CL_SUCCESS, retVal); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/mock_os_time_linux.h000066400000000000000000000015471363734646600315010ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/linux/os_interface.h" #include "shared/source/os_interface/linux/os_time_linux.h" namespace NEO { class MockOSTimeLinux : public OSTimeLinux { public: MockOSTimeLinux(OSInterface *osInterface) : OSTimeLinux(osInterface){}; void setResolutionFunc(resolutionFunc_t func) { this->resolutionFunc = func; } void setGetTimeFunc(getTimeFunc_t func) { this->getTimeFunc = func; } void updateDrm(Drm *drm) { osInterface->get()->setDrm(drm); pDrm = drm; timestampTypeDetect(); } static std::unique_ptr create(OSInterface *osInterface) { return std::unique_ptr(new MockOSTimeLinux(osInterface)); } }; } // namespace NEO mock_performance_counters_linux.cpp000066400000000000000000000045231363734646600345360ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "mock_performance_counters_linux.h" #include "opencl/test/unit_test/os_interface/linux/mock_os_time_linux.h" #include "opencl/test/unit_test/os_interface/mock_performance_counters.h" namespace NEO { ////////////////////////////////////////////////////// // MockPerformanceCountersLinux::MockPerformanceCountersLinux ////////////////////////////////////////////////////// MockPerformanceCountersLinux::MockPerformanceCountersLinux(Device *device) : PerformanceCountersLinux() { } ////////////////////////////////////////////////////// // MockPerformanceCounters::create ////////////////////////////////////////////////////// std::unique_ptr MockPerformanceCounters::create(Device *device) { auto performanceCounters = std::unique_ptr(new MockPerformanceCountersLinux(device)); auto metricsLibrary = std::make_unique(); auto metricsLibraryDll = std::make_unique(); metricsLibrary->api = std::make_unique(); metricsLibrary->osLibrary = std::move(metricsLibraryDll); performanceCounters->setMetricsLibraryInterface(std::move(metricsLibrary)); return performanceCounters; } ////////////////////////////////////////////////////// // PerformanceCountersFixture::createPerfCounters ////////////////////////////////////////////////////// void PerformanceCountersFixture::createPerfCounters() { performanceCountersBase = MockPerformanceCounters::create(&device->getDevice()); } ////////////////////////////////////////////////////// // PerformanceCountersFixture::SetUp ////////////////////////////////////////////////////// void PerformanceCountersFixture::SetUp() { device = std::make_unique(new MockDevice()); context = std::make_unique(device.get()); queue = std::make_unique(context.get(), device.get(), &queueProperties); osInterface = std::unique_ptr(new OSInterface()); device->setOSTime(new MockOSTimeLinux(osInterface.get())); } ////////////////////////////////////////////////////// // PerformanceCountersFixture::TearDown ////////////////////////////////////////////////////// void PerformanceCountersFixture::TearDown() { } } // namespace NEO mock_performance_counters_linux.h000066400000000000000000000005341363734646600342010ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/os_interface/linux/performance_counters_linux.h" namespace NEO { class MockPerformanceCountersLinux : public PerformanceCountersLinux { public: MockPerformanceCountersLinux(Device *device); }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/options.cpp000066400000000000000000000020341363734646600276300ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_library.h" #include "common/gtsysinfo.h" #include "igfxfmid.h" namespace Os { /////////////////////////////////////////////////////////////////////////////// // These options determine the Linux specific behavior for // the runtime unit tests /////////////////////////////////////////////////////////////////////////////// #if defined(__linux__) const char *frontEndDllName = "libmock_igdfcl.so"; const char *igcDllName = "libmock_igc.so"; const char *libvaDllName = nullptr; const char *testDllName = "libtest_dynamic_lib.so"; const char *gmmDllName = "libmock_gmm.so"; const char *gmmInitFuncName = "initMockGmm"; const char *gmmDestroyFuncName = "destroyMockGmm"; const char *metricsLibraryDllName = ""; #endif const char *sysFsPciPath = "./test_files"; } // namespace Os NEO::OsLibrary *setAdapterInfo(const PLATFORM *platform, const GT_SYSTEM_INFO *gtSystemInfo, uint64_t gpuAddressSpace) { return nullptr; } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/os_interface_linux_tests.cpp000066400000000000000000000015171363734646600332440ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/preemption.h" #include "shared/source/gmm_helper/gmm_lib.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/os_interface/linux/os_context_linux.h" #include "shared/source/os_interface/linux/os_interface.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/os_interface/linux/drm_mock.h" #include "gtest/gtest.h" namespace NEO { TEST(OsInterfaceTest, GivenLinuxWhenare64kbPagesEnabledThenFalse) { EXPECT_FALSE(OSInterface::are64kbPagesEnabled()); } TEST(OsInterfaceTest, GivenLinuxOsInterfaceWhenDeviceHandleQueriedthenZeroIsReturned) { OSInterface osInterface; EXPECT_EQ(0u, osInterface.getDeviceHandle()); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/os_time_test.cpp000066400000000000000000000170241363734646600306400ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/linux/os_interface.h" #include "shared/source/os_interface/linux/os_time_linux.h" #include "opencl/test/unit_test/os_interface/linux/device_command_stream_fixture.h" #include "opencl/test/unit_test/os_interface/linux/mock_os_time_linux.h" #include "test.h" #include "gtest/gtest.h" #include static int actualTime = 0; int getTimeFuncFalse(clockid_t clkId, struct timespec *tp) throw() { return -1; } int getTimeFuncTrue(clockid_t clkId, struct timespec *tp) throw() { tp->tv_sec = 0; tp->tv_nsec = ++actualTime; return 0; } int resolutionFuncFalse(clockid_t clkId, struct timespec *res) throw() { return -1; } int resolutionFuncTrue(clockid_t clkId, struct timespec *res) throw() { res->tv_sec = 0; res->tv_nsec = 5; return 0; } using namespace NEO; struct DrmTimeTest : public ::testing::Test { public: void SetUp() override { osInterface = std::unique_ptr(new OSInterface()); osTime = MockOSTimeLinux::create(osInterface.get()); osTime->setResolutionFunc(resolutionFuncTrue); osTime->setGetTimeFunc(getTimeFuncTrue); } void TearDown() override { } std::unique_ptr osTime; std::unique_ptr osInterface; }; TEST_F(DrmTimeTest, DetectWithNullDrmNoCrash) { } TEST_F(DrmTimeTest, GetCpuTime) { uint64_t time = 0; auto error = osTime->getCpuTime(&time); EXPECT_TRUE(error); EXPECT_NE(0ULL, time); } TEST_F(DrmTimeTest, GetCpuTimeFail) { uint64_t time = 0; osTime->setGetTimeFunc(getTimeFuncFalse); auto error = osTime->getCpuTime(&time); EXPECT_FALSE(error); } TEST_F(DrmTimeTest, GetGpuTime) { uint64_t time = 0; auto pDrm = new DrmMockTime(); osTime->updateDrm(pDrm); auto error = osTime->getGpuTime32(&time); EXPECT_TRUE(error); EXPECT_NE(0ULL, time); error = osTime->getGpuTime36(&time); EXPECT_TRUE(error); EXPECT_NE(0ULL, time); error = osTime->getGpuTimeSplitted(&time); EXPECT_TRUE(error); EXPECT_NE(0ULL, time); } TEST_F(DrmTimeTest, GetGpuTimeFails) { uint64_t time = 0; auto pDrm = new DrmMockFail(); osTime->updateDrm(pDrm); auto error = osTime->getGpuTime32(&time); EXPECT_FALSE(error); error = osTime->getGpuTime36(&time); EXPECT_FALSE(error); error = osTime->getGpuTimeSplitted(&time); EXPECT_FALSE(error); } TEST_F(DrmTimeTest, GetCpuGpuTime) { TimeStampData CPUGPUTime01 = {0, 0}; TimeStampData CPUGPUTime02 = {0, 0}; auto pDrm = new DrmMockTime(); osTime->updateDrm(pDrm); auto error = osTime->getCpuGpuTime(&CPUGPUTime01); EXPECT_TRUE(error); EXPECT_NE(0ULL, CPUGPUTime01.CPUTimeinNS); EXPECT_NE(0ULL, CPUGPUTime01.GPUTimeStamp); error = osTime->getCpuGpuTime(&CPUGPUTime02); EXPECT_TRUE(error); EXPECT_NE(0ULL, CPUGPUTime02.CPUTimeinNS); EXPECT_NE(0ULL, CPUGPUTime02.GPUTimeStamp); EXPECT_GT(CPUGPUTime02.GPUTimeStamp, CPUGPUTime01.GPUTimeStamp); EXPECT_GT(CPUGPUTime02.CPUTimeinNS, CPUGPUTime01.CPUTimeinNS); } TEST_F(DrmTimeTest, GIVENDrmWHENGetCpuGpuTimeTHENPassed) { TimeStampData CPUGPUTime01 = {0, 0}; TimeStampData CPUGPUTime02 = {0, 0}; auto pDrm = new DrmMockTime(); osTime->updateDrm(pDrm); auto error = osTime->getCpuGpuTime(&CPUGPUTime01); EXPECT_TRUE(error); EXPECT_NE(0ULL, CPUGPUTime01.CPUTimeinNS); EXPECT_NE(0ULL, CPUGPUTime01.GPUTimeStamp); error = osTime->getCpuGpuTime(&CPUGPUTime02); EXPECT_TRUE(error); EXPECT_NE(0ULL, CPUGPUTime02.CPUTimeinNS); EXPECT_NE(0ULL, CPUGPUTime02.GPUTimeStamp); EXPECT_GT(CPUGPUTime02.GPUTimeStamp, CPUGPUTime01.GPUTimeStamp); EXPECT_GT(CPUGPUTime02.CPUTimeinNS, CPUGPUTime01.CPUTimeinNS); } TEST_F(DrmTimeTest, givenGetCpuGpuTimeWhenItIsUnavailableThenReturnFalse) { TimeStampData CPUGPUTime = {0, 0}; auto error = osTime->getCpuGpuTime(&CPUGPUTime); EXPECT_FALSE(error); } TEST_F(DrmTimeTest, GetCpuGpuTimeFails) { TimeStampData CPUGPUTime01 = {0, 0}; auto pDrm = new DrmMockFail(); osTime->updateDrm(pDrm); auto error = osTime->getCpuGpuTime(&CPUGPUTime01); EXPECT_FALSE(error); } TEST_F(DrmTimeTest, GetCpuGpuTimeCpuFails) { TimeStampData CPUGPUTime01 = {0, 0}; auto pDrm = new DrmMockTime(); osTime->setGetTimeFunc(getTimeFuncFalse); osTime->updateDrm(pDrm); auto error = osTime->getCpuGpuTime(&CPUGPUTime01); EXPECT_FALSE(error); } TEST_F(DrmTimeTest, detect) { auto drm = new DrmMockCustom; osTime->updateDrm(drm); { auto p = osTime->getGpuTime; EXPECT_EQ(p, &OSTimeLinux::getGpuTime36); } { drm->ioctl_res = -1; osTime->timestampTypeDetect(); auto p = osTime->getGpuTime; EXPECT_EQ(p, &OSTimeLinux::getGpuTime32); } DrmMockCustom::IoctlResExt ioctlToPass = {1, 0}; { drm->reset(); drm->ioctl_res = -1; drm->ioctl_res_ext = &ioctlToPass; // 2nd ioctl is successful osTime->timestampTypeDetect(); auto p = osTime->getGpuTime; EXPECT_EQ(p, &OSTimeLinux::getGpuTimeSplitted); drm->ioctl_res_ext = &drm->NONE; } } TEST_F(DrmTimeTest, givenGpuTimestampResolutionQueryWhenIoctlFailsThenDefaultResolutionIsReturned) { auto defaultResolution = defaultHwInfo->capabilityTable.defaultProfilingTimerResolution; auto drm = new DrmMockCustom(); osTime->updateDrm(drm); drm->getParamRetValue = 0; drm->ioctl_res = -1; auto result = osTime->getDynamicDeviceTimerResolution(*defaultHwInfo); EXPECT_DOUBLE_EQ(result, defaultResolution); } TEST_F(DrmTimeTest, givenGpuTimestampResolutionQueryWhenNoDrmThenDefaultResolutionIsReturned) { osTime->updateDrm(nullptr); auto defaultResolution = defaultHwInfo->capabilityTable.defaultProfilingTimerResolution; auto result = osTime->getDynamicDeviceTimerResolution(*defaultHwInfo); EXPECT_DOUBLE_EQ(result, defaultResolution); } TEST_F(DrmTimeTest, givenGpuTimestampResolutionQueryWhenIoctlSuccedsThenCorrectResolutionIsReturned) { auto drm = new DrmMockCustom(); osTime->updateDrm(drm); // 19200000 is frequency yelding 52.083ns resolution drm->getParamRetValue = 19200000; drm->ioctl_res = 0; auto result = osTime->getDynamicDeviceTimerResolution(*defaultHwInfo); EXPECT_DOUBLE_EQ(result, 52.08333333333333); } TEST_F(DrmTimeTest, givenAlwaysFailingResolutionFuncWhenGetHostTimerResolutionIsCalledThenReturnsZero) { osTime->setResolutionFunc(resolutionFuncFalse); auto retVal = osTime->getHostTimerResolution(); EXPECT_EQ(0, retVal); } TEST_F(DrmTimeTest, givenAlwaysPassingResolutionFuncWhenGetHostTimerResolutionIsCalledThenReturnsNonzero) { osTime->setResolutionFunc(resolutionFuncTrue); auto retVal = osTime->getHostTimerResolution(); EXPECT_EQ(5, retVal); } TEST_F(DrmTimeTest, givenAlwaysFailingResolutionFuncWhenGetCpuRawTimestampIsCalledThenReturnsZero) { osTime->setResolutionFunc(resolutionFuncFalse); auto retVal = osTime->getCpuRawTimestamp(); EXPECT_EQ(0ull, retVal); } TEST_F(DrmTimeTest, givenAlwaysFailingGetTimeFuncWhenGetCpuRawTimestampIsCalledThenReturnsZero) { osTime->setGetTimeFunc(getTimeFuncFalse); auto retVal = osTime->getCpuRawTimestamp(); EXPECT_EQ(0ull, retVal); } TEST_F(DrmTimeTest, givenAlwaysPassingResolutionFuncWhenGetCpuRawTimestampIsCalledThenReturnsNonzero) { actualTime = 4; auto retVal = osTime->getCpuRawTimestamp(); EXPECT_EQ(1ull, retVal); } performance_counters_linux_tests.cpp000066400000000000000000000011341363734646600347420ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/os_interface/linux/mock_performance_counters_linux.h" #include "opencl/test/unit_test/os_interface/mock_performance_counters.h" #include "gtest/gtest.h" using namespace NEO; struct PerformanceCountersLinuxTest : public PerformanceCountersFixture, public ::testing::Test { void SetUp() override { PerformanceCountersFixture::SetUp(); } void TearDown() override { PerformanceCountersFixture::TearDown(); } }; compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/self_lib_lin.cpp000066400000000000000000000002571363734646600305630ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ extern "C" __attribute__((visibility("default"))) void selfDynamicLibraryFunc() { } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/linux/sys_calls_linux_ult.cpp000066400000000000000000000021351363734646600322360ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/linux/sys_calls.h" #include "drm/i915_drm.h" #include #include #include #include namespace NEO { namespace SysCalls { uint32_t closeFuncCalled = 0u; int closeFuncArgPassed = 0; constexpr int fakeFileDescriptor = 123; int close(int fileDescriptor) { closeFuncCalled++; closeFuncArgPassed = fileDescriptor; return 0; } int open(const char *file, int flags) { if (strcmp(file, "/dev/dri/by-path/pci-0000:invalid-render") == 0) { return 0; } if (strcmp(file, "/dev/dri/renderD129") == 0) { return fakeFileDescriptor; } return 0; } int ioctl(int fileDescriptor, unsigned long int request, void *arg) { if (fileDescriptor == fakeFileDescriptor) { if (request == DRM_IOCTL_VERSION) { auto pVersion = reinterpret_cast(arg); snprintf(pVersion->name, pVersion->name_len, "i915"); } } return 0; } } // namespace SysCalls } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/mock_performance_counters.cpp000066400000000000000000000361421363734646600322410ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "mock_performance_counters.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/os_interface.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" using namespace MetricsLibraryApi; namespace NEO { ////////////////////////////////////////////////////// // MockMetricsLibrary::open ////////////////////////////////////////////////////// bool MockMetricsLibrary::open() { if (validOpen) { ++openCount; return true; } else { return false; } } ////////////////////////////////////////////////////// // MockMetricsLibrary::contextCreate ////////////////////////////////////////////////////// bool MockMetricsLibrary::contextCreate(const ClientType_1_0 &client, ClientData_1_0 &clientData, ContextCreateData_1_0 &createData, ContextHandle_1_0 &handle) { if (client.Api != MetricsLibraryApi::ClientApi::OpenCL) { return false; } handle.data = reinterpret_cast(this); ++contextCount; return true; } ////////////////////////////////////////////////////// // MockMetricsLibrary::contextDelete ////////////////////////////////////////////////////// bool MockMetricsLibrary::contextDelete(const ContextHandle_1_0 &handle) { if (!handle.IsValid()) { return false; } --contextCount; return true; } ////////////////////////////////////////////////////// // MockMetricsLibrary::hwCountersCreate ////////////////////////////////////////////////////// bool MockMetricsLibrary::hwCountersCreate(const ContextHandle_1_0 &context, const uint32_t slots, const ConfigurationHandle_1_0 mmio, QueryHandle_1_0 &handle) { ++queryCount; return true; }; ////////////////////////////////////////////////////// // MockMetricsLibrary::hwCountersDelete ////////////////////////////////////////////////////// bool MockMetricsLibrary::hwCountersDelete(const QueryHandle_1_0 &handle) { --queryCount; return true; } ////////////////////////////////////////////////////// // MockMetricsLibrary::hwCountersGetReport ////////////////////////////////////////////////////// bool MockMetricsLibrary::hwCountersGetReport(const QueryHandle_1_0 &handle, const uint32_t slot, const uint32_t slotsCount, const uint32_t dataSize, void *data) { return validGetData; } ////////////////////////////////////////////////////// // MockMetricsLibrary::hwCountersGetApiReportSize ////////////////////////////////////////////////////// uint32_t MockMetricsLibrary::hwCountersGetApiReportSize() { return 1; } ////////////////////////////////////////////////////// // MockMetricsLibrary::hwCountersGetGpuReportSize ////////////////////////////////////////////////////// uint32_t MockMetricsLibrary::hwCountersGetGpuReportSize() { return sizeof(HwPerfCounter); } ////////////////////////////////////////////////////// // MockMetricsLibrary::commandBufferGet ////////////////////////////////////////////////////// bool MockMetricsLibrary::commandBufferGet(CommandBufferData_1_0 &data) { MI_REPORT_PERF_COUNT mirpc = {}; mirpc.init(); DEBUG_BREAK_IF(data.Data == nullptr); memcpy(data.Data, &mirpc, sizeof(mirpc)); return true; } ////////////////////////////////////////////////////// // MockMetricsLibrary::commandBufferGetSize ////////////////////////////////////////////////////// bool MockMetricsLibrary::commandBufferGetSize(const CommandBufferData_1_0 &commandBufferData, CommandBufferSize_1_0 &commandBufferSize) { commandBufferSize.GpuMemorySize = sizeof(MI_REPORT_PERF_COUNT); return true; } ////////////////////////////////////////////////////// // MockMetricsLibrary::getProcAddress ////////////////////////////////////////////////////// void *MockMetricsLibraryDll::getProcAddress(const std::string &procName) { if (procName == METRICS_LIBRARY_CONTEXT_CREATE_1_0) { return validContextCreate ? reinterpret_cast(&MockMetricsLibraryValidInterface::ContextCreate) : nullptr; } else if (procName == METRICS_LIBRARY_CONTEXT_DELETE_1_0) { return validContextDelete ? reinterpret_cast(&MockMetricsLibraryValidInterface::ContextDelete) : nullptr; } else { return nullptr; } } ////////////////////////////////////////////////////// // MockMetricsLibrary::isLoaded ////////////////////////////////////////////////////// bool MockMetricsLibraryDll::isLoaded() { return validIsLoaded; } ////////////////////////////////////////////////////// // MockMetricsLibraryValidInterface::ContextCreate ////////////////////////////////////////////////////// StatusCode MockMetricsLibraryValidInterface::ContextCreate(ClientType_1_0 clientType, ContextCreateData_1_0 *createData, ContextHandle_1_0 *handle) { // Validate input. EXPECT_EQ(clientType.Api, ClientApi::OpenCL); // Library handle. auto library = new MockMetricsLibraryValidInterface(); handle->data = library; EXPECT_TRUE(handle->IsValid()); // Context count. library->contextCount++; EXPECT_EQ(library->contextCount, 1u); return handle->IsValid() ? StatusCode::Success : StatusCode::Failed; } ////////////////////////////////////////////////////// // MockMetricsLibraryValidInterface::ContextDelete ////////////////////////////////////////////////////// StatusCode MockMetricsLibraryValidInterface::ContextDelete(const ContextHandle_1_0 handle) { auto validHandle = handle.IsValid(); auto library = static_cast(handle.data); // Validate input. EXPECT_TRUE(validHandle); EXPECT_TRUE(validHandle); EXPECT_EQ(--library->contextCount, 0u); // Delete handle. delete library; return validHandle ? StatusCode::Success : StatusCode::IncorrectObject; } ////////////////////////////////////////////////////// // MockMetricsLibraryInterface::QueryCreate ////////////////////////////////////////////////////// StatusCode MockMetricsLibraryValidInterface::QueryCreate(const QueryCreateData_1_0 *createData, QueryHandle_1_0 *handle) { EXPECT_NE(handle, nullptr); EXPECT_NE(createData, nullptr); EXPECT_GE(createData->Slots, 1u); EXPECT_TRUE(createData->HandleContext.IsValid()); EXPECT_EQ(createData->Type, ObjectType::QueryHwCounters); handle->data = new uint32_t(0); return StatusCode::Success; } ////////////////////////////////////////////////////// // MockMetricsLibraryValidInterface::QueryDelete ////////////////////////////////////////////////////// StatusCode MockMetricsLibraryValidInterface::QueryDelete(const QueryHandle_1_0 handle) { EXPECT_TRUE(handle.IsValid()); delete (uint32_t *)handle.data; return StatusCode::Success; } ////////////////////////////////////////////////////// // MockMetricsLibraryValidInterface::CommandBufferGetSize ////////////////////////////////////////////////////// StatusCode MockMetricsLibraryValidInterface::CommandBufferGetSize(const CommandBufferData_1_0 *data, CommandBufferSize_1_0 *size) { auto library = static_cast(data->HandleContext.data); EXPECT_NE(data, nullptr); EXPECT_TRUE(data->HandleContext.IsValid()); EXPECT_TRUE(data->QueryHwCounters.Handle.IsValid()); EXPECT_EQ(data->Type, GpuCommandBufferType::Render); EXPECT_EQ(data->CommandsType, ObjectType::QueryHwCounters); EXPECT_NE(size, nullptr); size->GpuMemorySize = library->validGpuReportSize ? 123 : 0; return library->validGpuReportSize ? StatusCode::Success : StatusCode::Failed; } ////////////////////////////////////////////////////// // MockMetricsLibraryValidInterface::CommandBufferGet ////////////////////////////////////////////////////// StatusCode MockMetricsLibraryValidInterface::CommandBufferGet(const CommandBufferData_1_0 *data) { EXPECT_NE(data, nullptr); EXPECT_TRUE(data->HandleContext.IsValid()); EXPECT_TRUE(data->QueryHwCounters.Handle.IsValid()); EXPECT_EQ(data->Type, GpuCommandBufferType::Render); EXPECT_EQ(data->CommandsType, ObjectType::QueryHwCounters); EXPECT_NE(data->Data, nullptr); EXPECT_GT(data->Size, 0u); return StatusCode::Success; } ////////////////////////////////////////////////////// // MockMetricsLibraryValidInterface::CommandBufferGet ////////////////////////////////////////////////////// StatusCode MockMetricsLibraryValidInterface::GetParameter(const ParameterType parameter, ValueType *type, TypedValue_1_0 *value) { EXPECT_NE(type, nullptr); EXPECT_NE(value, nullptr); switch (parameter) { case ParameterType::QueryHwCountersReportApiSize: *type = ValueType::Uint32; value->ValueUInt32 = 123; break; case ParameterType::QueryHwCountersReportGpuSize: *type = ValueType::Uint32; value->ValueUInt32 = 123; break; default: EXPECT_TRUE(false); break; } return StatusCode::Success; } ////////////////////////////////////////////////////// // MockMetricsLibraryValidInterface::ConfigurationCreate ////////////////////////////////////////////////////// StatusCode ML_STDCALL MockMetricsLibraryValidInterface::ConfigurationCreate(const ConfigurationCreateData_1_0 *createData, ConfigurationHandle_1_0 *handle) { EXPECT_NE(createData, nullptr); EXPECT_NE(handle, nullptr); EXPECT_TRUE(createData->HandleContext.IsValid()); const bool validType = (createData->Type == ObjectType::ConfigurationHwCountersOa) || (createData->Type == ObjectType::ConfigurationHwCountersUser); // Mock overrides auto api = static_cast(createData->HandleContext.data); if (!api->validCreateConfigurationOa && (createData->Type == ObjectType::ConfigurationHwCountersOa)) { return StatusCode::Failed; } if (!api->validCreateConfigurationUser && (createData->Type == ObjectType::ConfigurationHwCountersUser)) { return StatusCode::Failed; } EXPECT_TRUE(validType); handle->data = api; return StatusCode::Success; } ////////////////////////////////////////////////////// // MockMetricsLibraryValidInterface::ConfigurationActivate ////////////////////////////////////////////////////// StatusCode ML_STDCALL MockMetricsLibraryValidInterface::ConfigurationActivate(const ConfigurationHandle_1_0 handle, const ConfigurationActivateData_1_0 *activateData) { auto api = static_cast(handle.data); return api->validActivateConfigurationOa ? StatusCode::Success : StatusCode::Failed; } ////////////////////////////////////////////////////// // MockMetricsLibraryValidInterface::ConfigurationDelete ////////////////////////////////////////////////////// StatusCode ML_STDCALL MockMetricsLibraryValidInterface::ConfigurationDelete(const ConfigurationHandle_1_0 handle) { EXPECT_TRUE(handle.IsValid()); return StatusCode::Success; } ////////////////////////////////////////////////////// // MockMetricsLibraryValidInterface::GetData ////////////////////////////////////////////////////// StatusCode MockMetricsLibraryValidInterface::GetData(GetReportData_1_0 *data) { EXPECT_NE(data, nullptr); EXPECT_EQ(data->Type, ObjectType::QueryHwCounters); EXPECT_TRUE(data->Query.Handle.IsValid()); EXPECT_GE(data->Query.Slot, 0u); EXPECT_GT(data->Query.SlotsCount, 0u); EXPECT_NE(data->Query.Data, nullptr); EXPECT_GT(data->Query.DataSize, 0u); return StatusCode::Success; } ////////////////////////////////////////////////////// // PerformanceCountersDeviceFixture::SetUp ////////////////////////////////////////////////////// void PerformanceCountersDeviceFixture::SetUp() { createFunc = Device::createPerformanceCountersFunc; Device::createPerformanceCountersFunc = MockPerformanceCounters::create; } ////////////////////////////////////////////////////// // PerformanceCountersDeviceFixture::TearDown ////////////////////////////////////////////////////// void PerformanceCountersDeviceFixture::TearDown() { Device::createPerformanceCountersFunc = createFunc; } ////////////////////////////////////////////////////// // PerformanceCountersMetricsLibraryFixture::SetUp ////////////////////////////////////////////////////// void PerformanceCountersMetricsLibraryFixture::SetUp() { PerformanceCountersFixture::SetUp(); } ////////////////////////////////////////////////////// // PerformanceCountersMetricsLibraryFixture::TearDown ////////////////////////////////////////////////////// void PerformanceCountersMetricsLibraryFixture::TearDown() { device->setPerfCounters(nullptr); PerformanceCountersFixture::TearDown(); } ////////////////////////////////////////////////////// // PerformanceCountersFixture::PerformanceCountersFixture ////////////////////////////////////////////////////// PerformanceCountersFixture::PerformanceCountersFixture() { executionEnvironment = std::make_unique(); rootDeviceEnvironment = std::make_unique(*executionEnvironment); } ////////////////////////////////////////////////////// // PerformanceCountersFixture::~PerformanceCountersFixture ////////////////////////////////////////////////////// PerformanceCountersFixture::~PerformanceCountersFixture() { } ////////////////////////////////////////////////////// // PerformanceCountersMetricsLibraryFixture::createPerformanceCounters ////////////////////////////////////////////////////// void PerformanceCountersMetricsLibraryFixture::createPerformanceCounters(const bool validMetricsLibraryApi, const bool mockMetricsLibrary) { performanceCountersBase = MockPerformanceCounters::create(&device->getDevice()); auto metricsLibraryInterface = performanceCountersBase->getMetricsLibraryInterface(); auto metricsLibraryDll = std::make_unique(); EXPECT_NE(performanceCountersBase, nullptr); EXPECT_NE(metricsLibraryInterface, nullptr); device->setPerfCounters(performanceCountersBase.get()); // Attached mock version of metrics library interface. if (mockMetricsLibrary) { performanceCountersBase->setMetricsLibraryInterface(std::make_unique()); metricsLibraryInterface = performanceCountersBase->getMetricsLibraryInterface(); } else { performanceCountersBase->setMetricsLibraryInterface(std::make_unique()); metricsLibraryInterface = performanceCountersBase->getMetricsLibraryInterface(); } if (validMetricsLibraryApi) { metricsLibraryInterface->api = std::make_unique(); metricsLibraryInterface->osLibrary = std::move(metricsLibraryDll); } else { metricsLibraryDll->validContextCreate = false; metricsLibraryDll->validContextDelete = false; metricsLibraryDll->validIsLoaded = false; metricsLibraryInterface->api = std::make_unique(); metricsLibraryInterface->osLibrary = std::move(metricsLibraryDll); } EXPECT_NE(metricsLibraryInterface->api, nullptr); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/mock_performance_counters.h000066400000000000000000000301121363734646600316750ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/device/device.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "instrumentation.h" namespace NEO { ////////////////////////////////////////////////////// // Metrics Library types ////////////////////////////////////////////////////// using MetricsLibraryApi::ClientApi; using MetricsLibraryApi::ClientData_1_0; using MetricsLibraryApi::ClientGen; using MetricsLibraryApi::ClientType_1_0; using MetricsLibraryApi::CommandBufferData_1_0; using MetricsLibraryApi::CommandBufferSize_1_0; using MetricsLibraryApi::ConfigurationHandle_1_0; using MetricsLibraryApi::ContextCreateData_1_0; using MetricsLibraryApi::ContextHandle_1_0; using MetricsLibraryApi::GpuMemory_1_0; using MetricsLibraryApi::QueryHandle_1_0; ////////////////////////////////////////////////////// // MI_REPORT_PERF_COUNT definition for all GENs ////////////////////////////////////////////////////// struct MI_REPORT_PERF_COUNT { uint32_t DwordLength : BITFIELD_RANGE(0, 5); uint32_t Reserved_6 : BITFIELD_RANGE(6, 22); uint32_t MiCommandOpcode : BITFIELD_RANGE(23, 28); uint32_t CommandType : BITFIELD_RANGE(29, 31); uint64_t UseGlobalGtt : BITFIELD_RANGE(0, 0); uint64_t Reserved_33 : BITFIELD_RANGE(1, 3); uint64_t CoreModeEnable : BITFIELD_RANGE(4, 4); uint64_t Reserved_37 : BITFIELD_RANGE(5, 5); uint64_t MemoryAddress : BITFIELD_RANGE(6, 63); uint32_t ReportId; typedef enum tagDWORD_LENGTH { DWORD_LENGTH_EXCLUDES_DWORD_0_1 = 0x2, } DWORD_LENGTH; typedef enum tagMI_COMMAND_OPCODE { MI_COMMAND_OPCODE_MI_REPORT_PERF_COUNT = 0x28, } MI_COMMAND_OPCODE; typedef enum tagCOMMAND_TYPE { COMMAND_TYPE_MI_COMMAND = 0x0, } COMMAND_TYPE; inline void init(void) { memset(this, 0, sizeof(MI_REPORT_PERF_COUNT)); DwordLength = DWORD_LENGTH_EXCLUDES_DWORD_0_1; MiCommandOpcode = MI_COMMAND_OPCODE_MI_REPORT_PERF_COUNT; CommandType = COMMAND_TYPE_MI_COMMAND; } }; // clang-format off ////////////////////////////////////////////////////// // MockMetricsLibrary ////////////////////////////////////////////////////// class MockMetricsLibrary : public MetricsLibrary { public: uint32_t openCount = 0; uint32_t contextCount = 0; uint32_t queryCount = 0; bool validOpen = true; bool validGetData = true; // Library open / close functions. bool open() override; // Context create / destroy functions. bool contextCreate (const ClientType_1_0 &client, ClientData_1_0& clientData, ContextCreateData_1_0 &createData, ContextHandle_1_0 &handle) override; bool contextDelete (const ContextHandle_1_0 &handle) override; // HwCounters functions. bool hwCountersCreate (const ContextHandle_1_0 &context, const uint32_t slots, const ConfigurationHandle_1_0 mmio, QueryHandle_1_0 &handle) override; bool hwCountersDelete (const QueryHandle_1_0 &handle) override; bool hwCountersGetReport (const QueryHandle_1_0 &handle, const uint32_t slot, const uint32_t slotsCount, const uint32_t dataSize, void *data) override; uint32_t hwCountersGetApiReportSize() override; uint32_t hwCountersGetGpuReportSize() override; // Command buffer functions. bool commandBufferGet (CommandBufferData_1_0 &data) override; bool commandBufferGetSize (const CommandBufferData_1_0 &commandBufferData, CommandBufferSize_1_0 &commandBufferSize) override; // Oa configuration functions. bool oaConfigurationCreate (const ContextHandle_1_0 &context, ConfigurationHandle_1_0 &handle) override { return true; } bool oaConfigurationDelete (const ConfigurationHandle_1_0 &handle) override { return true; } bool oaConfigurationActivate (const ConfigurationHandle_1_0 &handle) override { return true; } bool oaConfigurationDeactivate (const ConfigurationHandle_1_0 &handle) override { return true; } // User mmio configuration functions. bool userConfigurationCreate (const ContextHandle_1_0 &context, ConfigurationHandle_1_0 &handle) override { return true; } bool userConfigurationDelete (const ConfigurationHandle_1_0 &handle) override { return true; } }; ////////////////////////////////////////////////////// // MockMetricsLibraryValidInterface ////////////////////////////////////////////////////// class MockMetricsLibraryValidInterface: public MetricsLibraryInterface { public: uint32_t contextCount = 0; bool validCreateConfigurationOa = true; bool validCreateConfigurationUser = true; bool validActivateConfigurationOa = true; bool validGpuReportSize = true; static StatusCode ML_STDCALL ContextCreate ( ClientType_1_0 clientType, ContextCreateData_1_0* createData, ContextHandle_1_0* handle ); static StatusCode ML_STDCALL ContextDelete (const ContextHandle_1_0 handle); static StatusCode ML_STDCALL GetParameter (const ParameterType parameter, ValueType *type, TypedValue_1_0 *value); static StatusCode ML_STDCALL CommandBufferGet (const CommandBufferData_1_0 *data); static StatusCode ML_STDCALL CommandBufferGetSize (const CommandBufferData_1_0 *data, CommandBufferSize_1_0 *size); static StatusCode ML_STDCALL QueryCreate (const QueryCreateData_1_0 *createData, QueryHandle_1_0 *handle); static StatusCode ML_STDCALL QueryDelete (const QueryHandle_1_0 handle); static StatusCode ML_STDCALL ConfigurationCreate (const ConfigurationCreateData_1_0 *createData, ConfigurationHandle_1_0 *handle); static StatusCode ML_STDCALL ConfigurationActivate (const ConfigurationHandle_1_0 handle, const ConfigurationActivateData_1_0 *activateData); static StatusCode ML_STDCALL ConfigurationDeactivate (const ConfigurationHandle_1_0 handle) { return StatusCode::Success; } static StatusCode ML_STDCALL ConfigurationDelete (const ConfigurationHandle_1_0 handle); static StatusCode ML_STDCALL GetData (GetReportData_1_0 *data); MockMetricsLibraryValidInterface() { contextCreate = &ContextCreate; contextDelete = &ContextDelete; functions.GetParameter = &GetParameter; functions.CommandBufferGet = &CommandBufferGet; functions.CommandBufferGetSize = &CommandBufferGetSize; functions.QueryCreate = &QueryCreate; functions.QueryDelete = &QueryDelete; functions.ConfigurationCreate = &ConfigurationCreate; functions.ConfigurationActivate = &ConfigurationActivate; functions.ConfigurationDeactivate = &ConfigurationDeactivate; functions.ConfigurationDelete = &ConfigurationDelete; functions.GetData = &GetData; } }; ////////////////////////////////////////////////////// // MockMetricsLibraryInvalidInterface ////////////////////////////////////////////////////// class MockMetricsLibraryInvalidInterface: public MetricsLibraryInterface { public: static StatusCode ML_STDCALL ContextCreate ( ClientType_1_0 clientType, ContextCreateData_1_0* createData, ContextHandle_1_0* handle ){ return StatusCode::Failed;} static StatusCode ML_STDCALL ContextDelete (const ContextHandle_1_0 handle){ return StatusCode::Failed;} static StatusCode ML_STDCALL GetParameter (const ParameterType parameter, ValueType *type, TypedValue_1_0 *value){ return StatusCode::Failed;} static StatusCode ML_STDCALL CommandBufferGet (const CommandBufferData_1_0 *data){ return StatusCode::Failed;} static StatusCode ML_STDCALL CommandBufferGetSize (const CommandBufferData_1_0 *data, CommandBufferSize_1_0 *size){ return StatusCode::Failed;} static StatusCode ML_STDCALL QueryCreate (const QueryCreateData_1_0 *createData, QueryHandle_1_0 *handle){ return StatusCode::Failed;} static StatusCode ML_STDCALL QueryDelete (const QueryHandle_1_0 handle){ return StatusCode::Failed;} static StatusCode ML_STDCALL ConfigurationCreate (const ConfigurationCreateData_1_0 *createData, ConfigurationHandle_1_0 *handle){ return StatusCode::Failed;} static StatusCode ML_STDCALL ConfigurationActivate (const ConfigurationHandle_1_0 handle, const ConfigurationActivateData_1_0 *activateData){ return StatusCode::Failed;} static StatusCode ML_STDCALL ConfigurationDeactivate (const ConfigurationHandle_1_0 handle){ return StatusCode::Failed;} static StatusCode ML_STDCALL ConfigurationDelete (const ConfigurationHandle_1_0 handle){ return StatusCode::Failed;} static StatusCode ML_STDCALL GetData (GetReportData_1_0 *data){ return StatusCode::Failed;} MockMetricsLibraryInvalidInterface() { contextCreate = &ContextCreate; contextDelete = &ContextDelete; functions.GetParameter = &GetParameter; functions.CommandBufferGet = &CommandBufferGet; functions.CommandBufferGetSize = &CommandBufferGetSize; functions.QueryCreate = &QueryCreate; functions.QueryDelete = &QueryDelete; functions.ConfigurationCreate = &ConfigurationCreate; functions.ConfigurationActivate = &ConfigurationActivate; functions.ConfigurationDeactivate = &ConfigurationDeactivate; functions.ConfigurationDelete = &ConfigurationDelete; functions.GetData = &GetData; } }; // clang-format on ////////////////////////////////////////////////////// // MockMetricsLibraryDll ////////////////////////////////////////////////////// class MockMetricsLibraryDll : public OsLibrary { public: bool validContextCreate = true; bool validContextDelete = true; bool validIsLoaded = true; void *getProcAddress(const std::string &procName) override; bool isLoaded() override; }; ////////////////////////////////////////////////////// // MockPerformanceCounters ////////////////////////////////////////////////////// class MockPerformanceCounters { public: static std::unique_ptr create(Device *device); }; ////////////////////////////////////////////////////// // PerformanceCountersDeviceFixture ////////////////////////////////////////////////////// struct PerformanceCountersDeviceFixture { virtual void SetUp(); virtual void TearDown(); decltype(&PerformanceCounters::create) createFunc; }; struct MockExecutionEnvironment; struct RootDeviceEnvironment; ///////////////////////////////////////////////////// // PerformanceCountersFixture ////////////////////////////////////////////////////// struct PerformanceCountersFixture { PerformanceCountersFixture(); ~PerformanceCountersFixture(); virtual void SetUp(); virtual void TearDown(); virtual void createPerfCounters(); cl_queue_properties queueProperties = {}; std::unique_ptr device; std::unique_ptr context; std::unique_ptr queue; std::unique_ptr performanceCountersBase; std::unique_ptr executionEnvironment; std::unique_ptr rootDeviceEnvironment; std::unique_ptr osInterface; }; ////////////////////////////////////////////////////// // PerformanceCountersMetricsLibraryFixture ////////////////////////////////////////////////////// struct PerformanceCountersMetricsLibraryFixture : PerformanceCountersFixture { void SetUp() override; void TearDown() override; void createPerformanceCounters(const bool validMetricsLibraryApi, const bool mockMatricsLibrary); std::unique_ptr performanceCountersBase; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/os_context_tests.cpp000066400000000000000000000025601363734646600304110ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_context.h" #include "gtest/gtest.h" using namespace NEO; TEST(OSContext, whenCreatingDefaultOsContextThenExpectInitializedAlways) { OsContext *osContext = OsContext::create(nullptr, 0, 0, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); EXPECT_TRUE(osContext->isInitialized()); EXPECT_FALSE(osContext->isLowPriority()); EXPECT_FALSE(osContext->isInternalEngine()); EXPECT_FALSE(osContext->isRootDevice()); delete osContext; } TEST(OSContext, givenLowPriorityRootDeviceInternalAreTrueWhenCreatingDefaultOsContextThenExpectGettersTrue) { OsContext *osContext = OsContext::create(nullptr, 0, 0, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, true, true, true); EXPECT_TRUE(osContext->isLowPriority()); EXPECT_TRUE(osContext->isInternalEngine()); EXPECT_TRUE(osContext->isRootDevice()); delete osContext; } TEST(OSContext, givenOsContextCreatedDefaultIsFalseWhenSettingTrueThenFlagTrueReturned) { OsContext *osContext = OsContext::create(nullptr, 0, 0, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); EXPECT_FALSE(osContext->isDefaultContext()); osContext->setDefaultContext(true); EXPECT_TRUE(osContext->isDefaultContext()); delete osContext; } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/os_interface_tests.cpp000066400000000000000000000010421363734646600306570ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_interface.h" #include "gtest/gtest.h" #include TEST(OSInterface, NonCopyable) { EXPECT_FALSE(std::is_move_constructible::value); EXPECT_FALSE(std::is_copy_constructible::value); } TEST(OSInterface, NonAssignable) { EXPECT_FALSE(std::is_move_assignable::value); EXPECT_FALSE(std::is_copy_assignable::value); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/os_library_tests.cpp000066400000000000000000000070401363734646600303670ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #if defined(_WIN32) #include "shared/source/os_interface/windows/os_library_win.h" #elif defined(__linux__) #include "shared/source/os_interface/linux/os_library_linux.h" #endif #include "shared/source/os_interface/os_library.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "test.h" #include "gtest/gtest.h" #include namespace Os { extern const char *frontEndDllName; extern const char *igcDllName; extern const char *testDllName; } // namespace Os const std::string fakeLibName = "_fake_library_name_"; const std::string fnName = "testDynamicLibraryFunc"; using namespace NEO; TEST(OSLibraryTest, whenLibraryNameIsEmptyThenCurrentProcesIsUsedAsLibrary) { std::unique_ptr library{OsLibrary::load("")}; EXPECT_NE(nullptr, library); void *ptr = library->getProcAddress("selfDynamicLibraryFunc"); EXPECT_NE(nullptr, ptr); } TEST(OSLibraryTest, CreateFake) { OsLibrary *library = OsLibrary::load(fakeLibName); EXPECT_EQ(nullptr, library); } TEST(OSLibraryTest, whenLibraryNameIsValidThenLibraryIsLoadedCorrectly) { std::unique_ptr library(OsLibrary::load(Os::testDllName)); EXPECT_NE(nullptr, library); } TEST(OSLibraryTest, whenSymbolNameIsValidThenGetProcAddressReturnsNonNullPointer) { std::unique_ptr library(OsLibrary::load(Os::testDllName)); EXPECT_NE(nullptr, library); void *ptr = library->getProcAddress(fnName); EXPECT_NE(nullptr, ptr); } TEST(OSLibraryTest, whenSymbolNameIsInvalidThenGetProcAddressReturnsNullPointer) { std::unique_ptr library(OsLibrary::load(Os::testDllName)); EXPECT_NE(nullptr, library); void *ptr = library->getProcAddress(fnName + "invalid"); EXPECT_EQ(nullptr, ptr); } using OsLibraryTestWithFailureInjection = Test; TEST_F(OsLibraryTestWithFailureInjection, testFailNew) { InjectedFunction method = [](size_t failureIndex) { std::string libName(Os::testDllName); // System under test OsLibrary *library = OsLibrary::load(libName); if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_NE(nullptr, library); } else { EXPECT_EQ(nullptr, library); } // Make sure that we only have 1 buffer allocated at a time delete library; }; injectFailures(method); } TEST(OsLibrary, whenCallingIndexOperatorThenObjectConvertibleToFunctionOrVoidPointerIsReturned) { struct MockOsLibrary : OsLibrary { void *getProcAddress(const std::string &procName) override { lastRequestedProcName = procName; return ptrToReturn; } bool isLoaded() override { return true; } void *ptrToReturn = nullptr; std::string lastRequestedProcName; }; MockOsLibrary lib; int varA; int varB; int varC; using FunctionTypeA = void (*)(int *, float); using FunctionTypeB = int (*)(); lib.ptrToReturn = &varA; FunctionTypeA functionA = lib["funcA"]; EXPECT_STREQ("funcA", lib.lastRequestedProcName.c_str()); EXPECT_EQ(&varA, reinterpret_cast(functionA)); lib.ptrToReturn = &varB; FunctionTypeB functionB = lib["funcB"]; EXPECT_STREQ("funcB", lib.lastRequestedProcName.c_str()); EXPECT_EQ(&varB, reinterpret_cast(functionB)); lib.ptrToReturn = &varC; void *rawPtr = lib["funcC"]; EXPECT_STREQ("funcC", lib.lastRequestedProcName.c_str()); EXPECT_EQ(&varC, rawPtr); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/os_memory_tests.cpp000066400000000000000000000010571363734646600302350ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_memory.h" #include "gtest/gtest.h" using namespace NEO; TEST(OSMemory, reserveCpuAddressRange) { auto osMemory = OSMemory::create(); size_t reservedCpuAddressRangeSize = 1024; auto reservedCpuAddressRange = osMemory->reserveCpuAddressRange(reservedCpuAddressRangeSize); EXPECT_NE(reservedCpuAddressRange, nullptr); osMemory->releaseCpuAddressRange(reservedCpuAddressRange, reservedCpuAddressRangeSize); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/performance_counters_gen_tests.cpp000066400000000000000000000006171363734646600333010ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/os_interface/performance_counters.h" #include "test.h" using namespace NEO; struct PerformanceCountersGenTest : public ::testing::Test { }; class MockPerformanceCountersGen : public PerformanceCounters { public: MockPerformanceCountersGen() : PerformanceCounters() { } };compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/performance_counters_tests.cpp000066400000000000000000000614541363734646600324560ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/os_time.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/test/unit_test/fixtures/device_instrumentation_fixture.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/os_interface/mock_performance_counters.h" #include "gtest/gtest.h" using namespace NEO; struct PerformanceCountersDeviceTest : public PerformanceCountersDeviceFixture, public DeviceInstrumentationFixture, public ::testing::Test { void SetUp() override { PerformanceCountersDeviceFixture::SetUp(); } void TearDown() override { PerformanceCountersDeviceFixture::TearDown(); } }; TEST_F(PerformanceCountersDeviceTest, createDeviceWithPerformanceCounters) { DeviceInstrumentationFixture::SetUp(true); EXPECT_NE(nullptr, device->getPerformanceCounters()); } TEST_F(PerformanceCountersDeviceTest, createDeviceWithoutPerformanceCounters) { DeviceInstrumentationFixture::SetUp(false); EXPECT_EQ(nullptr, device->getPerformanceCounters()); } struct PerformanceCountersTest : public PerformanceCountersFixture, public ::testing::Test { public: void SetUp() override { PerformanceCountersFixture::SetUp(); } void TearDown() override { PerformanceCountersFixture::TearDown(); } }; TEST_F(PerformanceCountersTest, createPerformanceCounters) { auto performanceCounters = PerformanceCounters::create(&device->getDevice()); EXPECT_NE(nullptr, performanceCounters); EXPECT_NE(nullptr, performanceCounters.get()); } TEST_F(PerformanceCountersTest, givenPerformanceCountersWhenCreatedThenAllValuesProperlyInitialized) { createPerfCounters(); EXPECT_NE(nullptr, performanceCountersBase->getMetricsLibraryInterface()); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } struct PerformanceCountersProcessEventTest : public PerformanceCountersTest, public ::testing::WithParamInterface { void SetUp() override { PerformanceCountersTest::SetUp(); createPerfCounters(); eventComplete = true; outputParamSize = 0; inputParamSize = performanceCountersBase->getApiReportSize(); inputParam.reset(new uint8_t); } void TearDown() override { performanceCountersBase->shutdown(); PerformanceCountersTest::TearDown(); } std::unique_ptr inputParam; size_t inputParamSize; size_t outputParamSize; bool eventComplete; }; TEST_P(PerformanceCountersProcessEventTest, givenNullptrInputParamWhenProcessEventPerfCountersIsCalledThenReturnsFalse) { eventComplete = GetParam(); auto retVal = performanceCountersBase->getApiReport(inputParamSize, nullptr, &outputParamSize, eventComplete); EXPECT_FALSE(retVal); } TEST_P(PerformanceCountersProcessEventTest, givenCorrectInputParamWhenProcessEventPerfCountersIsCalledAndEventIsCompletedThenReturnsTrue) { eventComplete = GetParam(); EXPECT_EQ(0ull, outputParamSize); auto retVal = performanceCountersBase->getApiReport(inputParamSize, inputParam.get(), &outputParamSize, eventComplete); if (eventComplete) { EXPECT_TRUE(retVal); EXPECT_EQ(outputParamSize, inputParamSize); } else { EXPECT_FALSE(retVal); EXPECT_EQ(inputParamSize, outputParamSize); } } TEST_F(PerformanceCountersProcessEventTest, givenInvalidInputParamSizeWhenProcessEventPerfCountersIsCalledThenReturnsFalse) { EXPECT_EQ(0ull, outputParamSize); auto retVal = performanceCountersBase->getApiReport(inputParamSize - 1, inputParam.get(), &outputParamSize, eventComplete); EXPECT_FALSE(retVal); EXPECT_EQ(outputParamSize, inputParamSize); } TEST_F(PerformanceCountersProcessEventTest, givenNullptrOutputParamSizeWhenProcessEventPerfCountersIsCalledThenDoesNotReturnsOutputSize) { EXPECT_EQ(0ull, outputParamSize); auto retVal = performanceCountersBase->getApiReport(inputParamSize, inputParam.get(), nullptr, eventComplete); EXPECT_TRUE(retVal); EXPECT_EQ(0ull, outputParamSize); } TEST_F(PerformanceCountersProcessEventTest, givenNullptrInputZeroSizeWhenProcessEventPerfCountersIsCalledThenQueryProperSize) { EXPECT_EQ(0ull, outputParamSize); auto retVal = performanceCountersBase->getApiReport(0, nullptr, &outputParamSize, eventComplete); EXPECT_TRUE(retVal); EXPECT_EQ(inputParamSize, outputParamSize); } TEST_F(PerformanceCountersProcessEventTest, givenNullptrInputZeroSizeAndNullptrOutputSizeWhenProcessEventPerfCountersIsCalledThenReturnFalse) { EXPECT_EQ(0ull, outputParamSize); auto retVal = performanceCountersBase->getApiReport(0, nullptr, nullptr, eventComplete); EXPECT_FALSE(retVal); EXPECT_EQ(0ull, outputParamSize); } INSTANTIATE_TEST_CASE_P( PerfCountersTests, PerformanceCountersProcessEventTest, testing::Bool()); struct PerformanceCountersMetricsLibraryTest : public PerformanceCountersMetricsLibraryFixture, public ::testing::Test { public: void SetUp() override { PerformanceCountersMetricsLibraryFixture::SetUp(); } void TearDown() override { PerformanceCountersMetricsLibraryFixture::TearDown(); } }; TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetricLibraryIsValidThenQueryIsCreated) { // Create performance counters. createPerformanceCounters(true, true); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); // Check metric library context. auto context = static_cast(performanceCountersBase->getMetricsLibraryContext().data); EXPECT_NE(nullptr, context); EXPECT_EQ(1u, context->contextCount); // Close library. performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetricLibraryIsValidThenQueryReturnsValidGpuCommands) { // Create performance counters. createPerformanceCounters(true, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); // Obtain required command buffer size. uint32_t commandsSize = performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, true); EXPECT_NE(0u, commandsSize); // Fill command buffer. uint8_t buffer[1000] = {}; HwPerfCounter perfCounter = {}; TagNode query = {}; query.tagForCpuAccess = &perfCounter; EXPECT_TRUE(performanceCountersBase->getGpuCommands(MetricsLibraryApi::GpuCommandBufferType::Render, query, true, sizeof(buffer), buffer)); // Close library. performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenInitialNonCcsEngineWhenEnablingThenDontAllowCcsOnNextCalls) { createPerformanceCounters(true, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_FALSE(performanceCountersBase->enable(true)); performanceCountersBase->shutdown(); performanceCountersBase->shutdown(); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); EXPECT_TRUE(performanceCountersBase->enable(true)); performanceCountersBase->shutdown(); } TEST_F(PerformanceCountersMetricsLibraryTest, givenInitialCcsEngineWhenEnablingThenDontAllowNonCcsOnNextCalls) { createPerformanceCounters(true, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(true)); EXPECT_TRUE(performanceCountersBase->enable(true)); EXPECT_FALSE(performanceCountersBase->enable(false)); performanceCountersBase->shutdown(); performanceCountersBase->shutdown(); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); EXPECT_TRUE(performanceCountersBase->enable(false)); performanceCountersBase->shutdown(); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetricLibraryIsInvalidThenQueryReturnsInvalidGpuCommands) { // Create performance counters. createPerformanceCounters(false, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_FALSE(performanceCountersBase->enable(true)); // Obtain required command buffer size. uint32_t commandsSize = performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, true); EXPECT_EQ(0u, commandsSize); // Close library. performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetricLibraryIsValidThenApiReportSizeIsValid) { // Create performance counters. createPerformanceCounters(true, true); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); // Obtain api report size. uint32_t apiReportSize = performanceCountersBase->getApiReportSize(); EXPECT_GT(apiReportSize, 0u); // Close library. performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetricLibraryIsInvalidThenApiReportSizeIsInvalid) { // Create performance counters. createPerformanceCounters(false, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_FALSE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); // Obtain api report size. uint32_t apiReportSize = performanceCountersBase->getApiReportSize(); EXPECT_EQ(0u, apiReportSize); // Close library. performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetricLibraryIsInvalidThenGpuReportSizeIsInvalid) { // Create performance counters. createPerformanceCounters(false, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_FALSE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); // Obtain gpu report size. uint32_t gpuReportSize = performanceCountersBase->getGpuReportSize(); EXPECT_EQ(0u, gpuReportSize); // Close library. performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetricLibraryIsValidThenQueryIsAvailable) { // Create performance counters. createPerformanceCounters(true, true); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); // Close library. performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetricLibraryIsInvalidThenQueryIsNotAvailable) { // Create performance counters. createPerformanceCounters(false, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_FALSE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); // Close library. performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetricLibraryHasInvalidExportFunctionsDestroyThenQueryIsNotAvailable) { createPerformanceCounters(true, false); auto metricsLibraryInterface = performanceCountersBase->getMetricsLibraryInterface(); auto metricsLibraryDll = reinterpret_cast(metricsLibraryInterface->osLibrary.get()); metricsLibraryDll->validContextCreate = true; metricsLibraryDll->validContextDelete = false; EXPECT_NE(nullptr, performanceCountersBase); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); EXPECT_FALSE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); // Close library. performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetricLibraryHasInvalidExportFunctionsCreateAndDestroyThenQueryIsNotAvailable) { createPerformanceCounters(true, false); auto metricsLibraryInterface = performanceCountersBase->getMetricsLibraryInterface(); auto metricsLibraryDll = reinterpret_cast(metricsLibraryInterface->osLibrary.get()); metricsLibraryDll->validContextCreate = false; metricsLibraryDll->validContextDelete = false; EXPECT_NE(nullptr, performanceCountersBase); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); EXPECT_FALSE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); // Close library. performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetricLibraryIsValidThenQueryReturnsCorrectApiReport) { // Create performance counters. createPerformanceCounters(true, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); // Obtain required command buffer size. uint32_t commandsSize = performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, true); EXPECT_NE(0u, commandsSize); // Fill command buffer. uint8_t buffer[1000] = {}; TagNode query = {}; HwPerfCounter perfCounter = {}; query.tagForCpuAccess = &perfCounter; EXPECT_TRUE(performanceCountersBase->getGpuCommands(MetricsLibraryApi::GpuCommandBufferType::Render, query, true, sizeof(buffer), buffer)); // Obtain api report size. uint32_t apiReportSize = performanceCountersBase->getApiReportSize(); EXPECT_GT(apiReportSize, 0u); // Obtain gpu report size. uint32_t gpuReportSize = performanceCountersBase->getGpuReportSize(); EXPECT_GT(gpuReportSize, 0u); // Allocate memory for api report. uint8_t *apiReport = new uint8_t[apiReportSize]; EXPECT_NE(apiReport, nullptr); // Obtain api report. EXPECT_TRUE(performanceCountersBase->getApiReport(apiReportSize, apiReport, nullptr, true)); delete[] apiReport; apiReport = nullptr; // Close library. performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetricLibraryIsValidThenReferenceCounterIsValid) { createPerformanceCounters(true, true); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(2u, performanceCountersBase->getReferenceNumber()); performanceCountersBase->shutdown(); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetricLibraryIsValidThenQueryHandleIsValid) { createPerformanceCounters(true, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_TRUE(performanceCountersBase->getQueryHandle().IsValid()); EXPECT_TRUE(performanceCountersBase->getQueryHandle().IsValid()); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenOaConfigurationIsInvalidThenGpuReportSizeIsInvalid) { createPerformanceCounters(true, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); auto metricLibraryApi = static_cast(performanceCountersBase->getMetricsLibraryContext().data); metricLibraryApi->validCreateConfigurationOa = false; EXPECT_EQ(0u, performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, true)); EXPECT_GT(performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, false), 0u); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetricsLibraryIsInvalidGpuReportSizeIsInvalid) { createPerformanceCounters(true, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); auto metricLibraryApi = static_cast(performanceCountersBase->getMetricsLibraryContext().data); metricLibraryApi->validGpuReportSize = false; EXPECT_EQ(0u, performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, true)); EXPECT_EQ(0u, performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, false)); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenAllConfigurationsAreValidThenGpuReportSizeIsValid) { createPerformanceCounters(true, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); auto metricLibraryApi = static_cast(performanceCountersBase->getMetricsLibraryContext().data); metricLibraryApi->validCreateConfigurationOa = true; metricLibraryApi->validCreateConfigurationUser = true; EXPECT_GT(performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, true), 0u); EXPECT_GT(performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, false), 0u); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenOaConfigurationsActivationIsInvalidThenGpuReportSizeIsInvalid) { createPerformanceCounters(true, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); auto metricLibraryApi = static_cast(performanceCountersBase->getMetricsLibraryContext().data); metricLibraryApi->validActivateConfigurationOa = false; EXPECT_EQ(0u, performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, true)); EXPECT_GT(performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, false), 0u); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenCreatingUserConfigurationThenReturnSuccess) { createPerformanceCounters(true, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); ConfigurationHandle_1_0 configurationHandle = {}; auto metricsLibrary = performanceCountersBase->getMetricsLibraryInterface(); auto contextHandle = performanceCountersBase->getMetricsLibraryContext(); EXPECT_TRUE(metricsLibrary->userConfigurationCreate(contextHandle, configurationHandle)); EXPECT_TRUE(metricsLibrary->userConfigurationDelete(configurationHandle)); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, getHwPerfCounterReturnsValidPointer) { createPerformanceCounters(true, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); ASSERT_NE(nullptr, queue->getPerfCounters()); std::unique_ptr event(new Event(queue.get(), CL_COMMAND_COPY_BUFFER, 0, 0)); ASSERT_NE(nullptr, event); HwPerfCounter *perfCounter = event->getHwPerfCounterNode()->tagForCpuAccess; ASSERT_NE(nullptr, perfCounter); ASSERT_EQ(0ULL, perfCounter->report[0]); EXPECT_TRUE(perfCounter->isCompleted()); HwPerfCounter *perfCounter2 = event->getHwPerfCounterNode()->tagForCpuAccess; ASSERT_EQ(perfCounter, perfCounter2); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, getHwPerfCounterAllocationReturnsValidPointer) { createPerformanceCounters(true, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); ASSERT_NE(nullptr, queue->getPerfCounters()); std::unique_ptr event(new Event(queue.get(), CL_COMMAND_COPY_BUFFER, 0, 0)); ASSERT_NE(nullptr, event); GraphicsAllocation *allocation = event->getHwPerfCounterNode()->getBaseGraphicsAllocation(); ASSERT_NE(nullptr, allocation); void *memoryStorage = allocation->getUnderlyingBuffer(); size_t memoryStorageSize = allocation->getUnderlyingBufferSize(); EXPECT_NE(nullptr, memoryStorage); EXPECT_GT(memoryStorageSize, 0u); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, hwPerfCounterMemoryIsPlacedInGraphicsAllocation) { createPerformanceCounters(true, false); EXPECT_NE(nullptr, performanceCountersBase); EXPECT_TRUE(performanceCountersBase->enable(false)); EXPECT_EQ(1u, performanceCountersBase->getReferenceNumber()); ASSERT_NE(nullptr, queue->getPerfCounters()); std::unique_ptr event(new Event(queue.get(), CL_COMMAND_COPY_BUFFER, 0, 0)); ASSERT_NE(nullptr, event); HwPerfCounter *perfCounter = event->getHwPerfCounterNode()->tagForCpuAccess; ASSERT_NE(nullptr, perfCounter); GraphicsAllocation *allocation = event->getHwPerfCounterNode()->getBaseGraphicsAllocation(); ASSERT_NE(nullptr, allocation); void *memoryStorage = allocation->getUnderlyingBuffer(); size_t graphicsAllocationSize = allocation->getUnderlyingBufferSize(); EXPECT_GE(perfCounter, memoryStorage); EXPECT_LE(perfCounter + 1, ptrOffset(memoryStorage, graphicsAllocationSize)); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); } TEST_F(PerformanceCountersMetricsLibraryTest, hwPerfCounterNodeWhenPerformanceCountersObjectIsNotPresentThenNodeisNull) { std::unique_ptr event(new Event(queue.get(), CL_COMMAND_COPY_BUFFER, 0, 0)); ASSERT_NE(nullptr, event); auto node = event->getHwPerfCounterNode(); ASSERT_EQ(nullptr, node); } TEST_F(PerformanceCountersTest, givenRenderCoreFamilyThenMetricsLibraryGenIdentifierAreValid) { const auto &hwInfo = device->getHardwareInfo(); const auto gen = hwInfo.platform.eRenderCoreFamily; EXPECT_NE(ClientGen::Unknown, static_cast(HwHelper::get(gen).getMetricsLibraryGenId())); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/000077500000000000000000000000001363734646600257655ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/CMakeLists.txt000066400000000000000000000046201363734646600305270ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_os_interface_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/deferrable_deletion_win_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device_command_stream_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device_os_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/driver_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/file_logger_win_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gdi_dll_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_win_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_win_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_environment_variables.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_kmdaf_listener.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_os_time_win.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_performance_counters_win.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_performance_counters_win.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_wddm_memory_manager.h ${CMAKE_CURRENT_SOURCE_DIR}/os_context_win_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_interface_win_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_interface_win_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/os_library_win_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_time_win_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/performance_counters_win_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/registry_reader_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/registry_reader_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/self_lib_win.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm_address_space_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/wddm_kmdaf_listener_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm_memory_manager_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm_memory_manager_tests.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/wddm_mapper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/wddm_memory_manager_allocate_in_device_pool_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm_memory_manager_allocate_in_device_pool_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/wddm_residency_controller_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm_residency_handler_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_registry_reader.cpp ) if(WIN32) file(GLOB IGDRCL_SRC_tests_wddm_interface "${CMAKE_CURRENT_SOURCE_DIR}/wddm2[0-9]_tests\.cpp") target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_os_interface_windows} ${IGDRCL_SRC_tests_wddm_interface} ) endif() add_subdirectories() create_wddm_memory_manager.cpp000066400000000000000000000014761363734646600337620ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/windows/os_interface.h" #include "shared/source/os_interface/windows/wddm_memory_manager.h" #include "shared/test/unit_test/helpers/ult_hw_config.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" namespace NEO { std::unique_ptr MemoryManager::createMemoryManager(ExecutionEnvironment &executionEnvironment) { if (ultHwConfig.forceOsAgnosticMemoryManager) { return std::make_unique(executionEnvironment); } return std::make_unique(executionEnvironment); } } // namespace NEO deferrable_deletion_win_tests.cpp000066400000000000000000000050421363734646600344700ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/windows/deferrable_deletion_win.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_wddm.h" #include "gtest/gtest.h" #include using namespace NEO; TEST(DeferrableDeletionImpl, NonCopyable) { EXPECT_FALSE(std::is_move_constructible::value); EXPECT_FALSE(std::is_copy_constructible::value); } TEST(DeferrableDeletionImpl, NonAssignable) { EXPECT_FALSE(std::is_move_assignable::value); EXPECT_FALSE(std::is_copy_assignable::value); } class MockDeferrableDeletion : public DeferrableDeletionImpl { public: using DeferrableDeletionImpl::allocationCount; using DeferrableDeletionImpl::DeferrableDeletionImpl; using DeferrableDeletionImpl::handles; using DeferrableDeletionImpl::resourceHandle; using DeferrableDeletionImpl::wddm; }; class DeferrableDeletionTest : public ::testing::Test { public: std::unique_ptr executionEnvironment; std::unique_ptr wddm; const D3DKMT_HANDLE handle = 0; uint32_t allocationCount = 1; D3DKMT_HANDLE resourceHandle = 0; void SetUp() override { executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); wddm = std::make_unique(*executionEnvironment->rootDeviceEnvironments[0]); } }; TEST_F(DeferrableDeletionTest, givenDeferrableDeletionWhenIsCreatedThenObjectMembersAreSetProperly) { MockDeferrableDeletion deletion(wddm.get(), &handle, allocationCount, resourceHandle); EXPECT_EQ(wddm.get(), deletion.wddm); EXPECT_NE(nullptr, deletion.handles); EXPECT_EQ(handle, *deletion.handles); EXPECT_NE(&handle, deletion.handles); EXPECT_EQ(allocationCount, deletion.allocationCount); EXPECT_EQ(resourceHandle, deletion.resourceHandle); } TEST_F(DeferrableDeletionTest, givenDeferrableDeletionWhenApplyIsCalledThenDeletionIsApplied) { wddm->callBaseDestroyAllocations = false; std::unique_ptr deletion(DeferrableDeletion::create((Wddm *)wddm.get(), &handle, allocationCount, resourceHandle)); EXPECT_EQ(0, wddm->destroyAllocationResult.called); deletion->apply(); EXPECT_EQ(1, wddm->destroyAllocationResult.called); } device_command_stream_tests.cpp000066400000000000000000001720031363734646600341470ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/device_command_stream.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/source/helpers/hw_cmds.h" #include "shared/source/helpers/windows/gmm_callbacks.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/windows/os_context_win.h" #include "shared/source/os_interface/windows/os_interface.h" #include "shared/source/os_interface/windows/wddm_memory_manager.h" #include "shared/source/os_interface/windows/wddm_memory_operations_handler.h" #include "shared/source/os_interface/windows/wddm_residency_controller.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/os_interface/windows/mock_gdi_interface.h" #include "opencl/source/command_stream/aub_command_stream_receiver.h" #include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.h" #include "opencl/source/helpers/built_ins_helper.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/os_interface/windows/wddm_device_command_stream.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/fixtures/mock_aub_center_fixture.h" #include "opencl/test/unit_test/helpers/dispatch_flags_helper.h" #include "opencl/test/unit_test/helpers/execution_environment_helper.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_builtins.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_gmm_page_table_mngr.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/mocks/mock_submissions_aggregator.h" #include "opencl/test/unit_test/mocks/mock_wddm_interface23.h" #include "opencl/test/unit_test/os_interface/windows/mock_wddm_memory_manager.h" #include "opencl/test/unit_test/os_interface/windows/wddm_fixture.h" #include "test.h" using namespace NEO; using namespace ::testing; class WddmCommandStreamFixture { public: std::unique_ptr device; std::unique_ptr osContext; DeviceCommandStreamReceiver *csr; MockWddmMemoryManager *memoryManager = nullptr; WddmMock *wddm = nullptr; DebugManagerStateRestore stateRestore; virtual void SetUp() { HardwareInfo *hwInfo = nullptr; DebugManager.flags.CsrDispatchMode.set(static_cast(DispatchMode::ImmediateDispatch)); auto executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); memoryManager = new MockWddmMemoryManager(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); wddm = static_cast(executionEnvironment->rootDeviceEnvironments[0]->osInterface->get()->getWddm()); osContext.reset(OsContext::create(executionEnvironment->rootDeviceEnvironments[0]->osInterface.get(), 0, 0, aub_stream::ENGINE_RCS, PreemptionMode::ThreadGroup, false, false, false)); osContext->setDefaultContext(true); csr = new WddmCommandStreamReceiver(*executionEnvironment, 0); device.reset(MockDevice::create(executionEnvironment, 0u)); device->resetCommandStreamReceiver(csr); ASSERT_NE(nullptr, device); } virtual void TearDown() { } }; template struct MockWddmCsr : public WddmCommandStreamReceiver { using CommandStreamReceiver::commandStream; using CommandStreamReceiver::dispatchMode; using CommandStreamReceiver::getCS; using CommandStreamReceiverHw::directSubmission; using WddmCommandStreamReceiver::commandBufferHeader; using WddmCommandStreamReceiver::WddmCommandStreamReceiver; void overrideDispatchPolicy(DispatchMode overrideValue) { this->dispatchMode = overrideValue; } SubmissionAggregator *peekSubmissionAggregator() { return this->submissionAggregator.get(); } void overrideSubmissionAggregator(SubmissionAggregator *newSubmissionsAggregator) { this->submissionAggregator.reset(newSubmissionsAggregator); } void overrideRecorededCommandBuffer(Device &device) { recordedCommandBuffer = std::unique_ptr(new CommandBuffer(device)); } int flushCalledCount = 0; std::unique_ptr recordedCommandBuffer = nullptr; }; class WddmCommandStreamWithMockGdiFixture { public: MockWddmCsr *csr = nullptr; MemoryManager *memoryManager = nullptr; std::unique_ptr device = nullptr; WddmMock *wddm = nullptr; MockGdi *gdi = nullptr; DebugManagerStateRestore stateRestore; GraphicsAllocation *preemptionAllocation = nullptr; virtual void SetUp() { HardwareInfo *hwInfo = nullptr; ExecutionEnvironment *executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); wddm = static_cast(executionEnvironment->rootDeviceEnvironments[0]->osInterface->get()->getWddm()); gdi = new MockGdi(); wddm->resetGdi(gdi); ASSERT_NE(wddm, nullptr); DebugManager.flags.CsrDispatchMode.set(static_cast(DispatchMode::ImmediateDispatch)); this->csr = new MockWddmCsr(*executionEnvironment, 0); memoryManager = new WddmMemoryManager(*executionEnvironment); ASSERT_NE(nullptr, memoryManager); executionEnvironment->memoryManager.reset(memoryManager); device = std::unique_ptr(Device::create(executionEnvironment, 0u)); device->resetCommandStreamReceiver(this->csr); ASSERT_NE(nullptr, device); this->csr->overrideRecorededCommandBuffer(*device); } virtual void TearDown() { wddm = nullptr; } }; using WddmCommandStreamTest = ::Test; using WddmCommandStreamMockGdiTest = ::Test; using WddmDefaultTest = ::Test; using DeviceCommandStreamTest = ::Test; TEST_F(DeviceCommandStreamTest, CreateWddmCSR) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); auto wddm = Wddm::createWddm(nullptr, *executionEnvironment->rootDeviceEnvironments[0].get()); executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[0]->osInterface->get()->setWddm(static_cast(wddm)); executionEnvironment->initializeMemoryManager(); std::unique_ptr> csr(static_cast *>(WddmCommandStreamReceiver::create(false, *executionEnvironment, 0))); EXPECT_NE(nullptr, csr); auto wddmFromCsr = csr->peekWddm(); EXPECT_NE(nullptr, wddmFromCsr); } TEST_F(DeviceCommandStreamTest, CreateWddmCSRWithAubDump) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); auto wddm = Wddm::createWddm(nullptr, *executionEnvironment->rootDeviceEnvironments[0].get()); executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[0]->osInterface->get()->setWddm(static_cast(wddm)); executionEnvironment->initializeMemoryManager(); std::unique_ptr> csr(static_cast *>(WddmCommandStreamReceiver::create(true, *executionEnvironment, 0))); EXPECT_NE(nullptr, csr); auto wddmFromCsr = csr->peekWddm(); EXPECT_NE(nullptr, wddmFromCsr); auto aubCSR = static_cast> *>(csr.get())->aubCSR.get(); EXPECT_NE(nullptr, aubCSR); } TEST_F(WddmCommandStreamTest, givenFlushStampWhenWaitCalledThenWaitForSpecifiedMonitoredFence) { uint64_t stampToWait = 123; wddm->waitFromCpuResult.called = 0u; csr->waitForFlushStamp(stampToWait); EXPECT_EQ(1u, wddm->waitFromCpuResult.called); EXPECT_TRUE(wddm->waitFromCpuResult.success); EXPECT_EQ(stampToWait, wddm->waitFromCpuResult.uint64ParamPassed); } TEST_F(WddmCommandStreamTest, Flush) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(1u, wddm->submitResult.called); EXPECT_TRUE(wddm->submitResult.success); EXPECT_EQ(csr->obtainCurrentFlushStamp(), static_cast(csr->getOsContext()).getResidencyController().getMonitoredFence().lastSubmittedFence); memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmCommandStreamTest, givenGraphicsAllocationWithDifferentGpuAddressThenCpuAddressWhenSubmitIsCalledThenGpuAddressIsUsed) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); auto cpuAddress = commandBuffer->getUnderlyingBuffer(); uint64_t mockGpuAddres = 1337; commandBuffer->setCpuPtrAndGpuAddress(cpuAddress, mockGpuAddres); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(mockGpuAddres, wddm->submitResult.commandBufferSubmitted); memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmCommandStreamTest, FlushWithOffset) { auto offset = 128u; GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), offset, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(1u, wddm->submitResult.called); EXPECT_TRUE(wddm->submitResult.success); EXPECT_EQ(wddm->submitResult.commandBufferSubmitted, commandBuffer->getGpuAddress() + offset); memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmCommandStreamTest, givenWdmmWhenSubmitIsCalledThenCoherencyRequiredFlagIsSetToFalse) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); auto commandHeader = wddm->submitResult.commandHeaderSubmitted; COMMAND_BUFFER_HEADER *pHeader = reinterpret_cast(commandHeader); EXPECT_FALSE(pHeader->RequiresCoherency); memoryManager->freeGraphicsMemory(commandBuffer); } TEST(WddmPreemptionHeaderTests, givenWddmCommandStreamReceiverWhenPreemptionIsOffWhenWorkloadIsSubmittedThenHeaderDoesntHavePreemptionFieldSet) { HardwareInfo *hwInfo = nullptr; ExecutionEnvironment *executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); hwInfo->capabilityTable.defaultPreemptionMode = PreemptionMode::Disabled; executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(hwInfo); auto wddm = static_cast(executionEnvironment->rootDeviceEnvironments[0]->osInterface->get()->getWddm()); auto csr = std::make_unique>(*executionEnvironment, 0); executionEnvironment->memoryManager.reset(new MemoryManagerCreate(false, false, *executionEnvironment)); csr->overrideDispatchPolicy(DispatchMode::ImmediateDispatch); OsContextWin osContext(*wddm, 0u, 1, HwHelper::get(hwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*hwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*hwInfo), false, false, false); csr->setupContext(osContext); auto commandBuffer = executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); auto commandHeader = wddm->submitResult.commandHeaderSubmitted; COMMAND_BUFFER_HEADER *pHeader = reinterpret_cast(commandHeader); EXPECT_FALSE(pHeader->NeedsMidBatchPreEmptionSupport); executionEnvironment->memoryManager->freeGraphicsMemory(commandBuffer); } TEST(WddmPreemptionHeaderTests, givenWddmCommandStreamReceiverWhenPreemptionIsOnWhenWorkloadIsSubmittedThenHeaderDoesHavePreemptionFieldSet) { HardwareInfo *hwInfo = nullptr; ExecutionEnvironment *executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); hwInfo->capabilityTable.defaultPreemptionMode = PreemptionMode::MidThread; executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(hwInfo); auto wddm = static_cast(executionEnvironment->rootDeviceEnvironments[0]->osInterface->get()->getWddm()); auto csr = std::make_unique>(*executionEnvironment, 0); executionEnvironment->memoryManager.reset(new MemoryManagerCreate(false, false, *executionEnvironment)); csr->overrideDispatchPolicy(DispatchMode::ImmediateDispatch); OsContextWin osContext(*wddm, 0u, 1, HwHelper::get(hwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*hwInfo)[0], PreemptionHelper::getDefaultPreemptionMode(*hwInfo), false, false, false); csr->setupContext(osContext); auto commandBuffer = executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); auto commandHeader = wddm->submitResult.commandHeaderSubmitted; COMMAND_BUFFER_HEADER *pHeader = reinterpret_cast(commandHeader); EXPECT_TRUE(pHeader->NeedsMidBatchPreEmptionSupport); executionEnvironment->memoryManager->freeGraphicsMemory(commandBuffer); } TEST(WddmPreemptionHeaderTests, givenDeviceSupportingPreemptionWhenCommandStreamReceiverIsCreatedThenHeaderContainsPreemptionFieldSet) { HardwareInfo *hwInfo = nullptr; ExecutionEnvironment *executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); hwInfo->capabilityTable.defaultPreemptionMode = PreemptionMode::MidThread; executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(hwInfo); auto commandStreamReceiver = std::make_unique>(*executionEnvironment, 0); auto commandHeader = commandStreamReceiver->commandBufferHeader; auto header = reinterpret_cast(commandHeader); EXPECT_TRUE(header->NeedsMidBatchPreEmptionSupport); } TEST(WddmPreemptionHeaderTests, givenDevicenotSupportingPreemptionWhenCommandStreamReceiverIsCreatedThenHeaderPreemptionFieldIsNotSet) { HardwareInfo *hwInfo = nullptr; ExecutionEnvironment *executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); hwInfo->capabilityTable.defaultPreemptionMode = PreemptionMode::Disabled; executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(hwInfo); auto commandStreamReceiver = std::make_unique>(*executionEnvironment, 0); auto commandHeader = commandStreamReceiver->commandBufferHeader; auto header = reinterpret_cast(commandHeader); EXPECT_FALSE(header->NeedsMidBatchPreEmptionSupport); } TEST_F(WddmCommandStreamTest, givenWdmmWhenSubmitIsCalledWhenEUCountWouldBeOddThenRequestEvenEuCount) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); wddm->getGtSysInfo()->EUCount = 9; wddm->getGtSysInfo()->SubSliceCount = 3; BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::LOW, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); auto commandHeader = wddm->submitResult.commandHeaderSubmitted; COMMAND_BUFFER_HEADER *pHeader = reinterpret_cast(commandHeader); EXPECT_EQ(0, pHeader->UmdRequestedSliceState); EXPECT_EQ(0, pHeader->UmdRequestedSubsliceCount); EXPECT_EQ((wddm->getGtSysInfo()->EUCount / wddm->getGtSysInfo()->SubSliceCount) & (~1u), pHeader->UmdRequestedEUCount); memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmCommandStreamTest, givenWdmmWhenSubmitIsCalledAndThrottleIsToLowThenSetHeaderFieldsProperly) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::LOW, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); auto commandHeader = wddm->submitResult.commandHeaderSubmitted; COMMAND_BUFFER_HEADER *pHeader = reinterpret_cast(commandHeader); EXPECT_EQ(0, pHeader->UmdRequestedSliceState); EXPECT_EQ(0, pHeader->UmdRequestedSubsliceCount); EXPECT_EQ((wddm->getGtSysInfo()->EUCount / wddm->getGtSysInfo()->SubSliceCount) & (~1u), pHeader->UmdRequestedEUCount); memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmCommandStreamTest, givenWdmmWhenSubmitIsCalledAndThrottleIsToMediumThenSetHeaderFieldsProperly) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); auto commandHeader = wddm->submitResult.commandHeaderSubmitted; COMMAND_BUFFER_HEADER *pHeader = reinterpret_cast(commandHeader); EXPECT_EQ(0, pHeader->UmdRequestedSliceState); EXPECT_EQ(0, pHeader->UmdRequestedSubsliceCount); EXPECT_EQ((wddm->getGtSysInfo()->EUCount / wddm->getGtSysInfo()->SubSliceCount) & (~1u), pHeader->UmdRequestedEUCount); memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmCommandStreamTest, givenWdmmWhenSubmitIsCalledAndThrottleIsToHighThenSetHeaderFieldsProperly) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::HIGH, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); auto commandHeader = wddm->submitResult.commandHeaderSubmitted; COMMAND_BUFFER_HEADER *pHeader = reinterpret_cast(commandHeader); const uint32_t maxRequestedSubsliceCount = 7; EXPECT_EQ(0, pHeader->UmdRequestedSliceState); EXPECT_EQ((wddm->getGtSysInfo()->SubSliceCount <= maxRequestedSubsliceCount) ? wddm->getGtSysInfo()->SubSliceCount : 0, pHeader->UmdRequestedSubsliceCount); EXPECT_EQ((wddm->getGtSysInfo()->EUCount / wddm->getGtSysInfo()->SubSliceCount) & (~1u), pHeader->UmdRequestedEUCount); memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmCommandStreamTest, givenWddmWithKmDafDisabledWhenFlushIsCalledWithAllocationsForResidencyThenNoneAllocationShouldBeKmDafLocked) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; auto linearStreamAllocation = memoryManager->allocateGraphicsMemoryWithProperties({csr->getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::LINEAR_STREAM}); ASSERT_NE(nullptr, linearStreamAllocation); ResidencyContainer allocationsForResidency = {linearStreamAllocation}; EXPECT_FALSE(wddm->isKmDafEnabled()); csr->flush(batchBuffer, allocationsForResidency); EXPECT_EQ(0u, wddm->kmDafLockResult.called); EXPECT_EQ(0u, wddm->kmDafLockResult.lockedAllocations.size()); memoryManager->freeGraphicsMemory(commandBuffer); memoryManager->freeGraphicsMemory(linearStreamAllocation); } TEST_F(WddmCommandStreamTest, givenWddmWithKmDafEnabledWhenFlushIsCalledWithoutAllocationsForResidencyThenNoneAllocationShouldBeKmDafLocked) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; wddm->setKmDafEnabled(true); csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(0u, wddm->kmDafLockResult.called); EXPECT_EQ(0u, wddm->kmDafLockResult.lockedAllocations.size()); memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmCommandStreamTest, givenWddmWithKmDafEnabledWhenFlushIsCalledWithResidencyAllocationsInMemoryManagerThenLinearStreamAllocationsShouldBeKmDafLocked) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; auto linearStreamAllocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties({csr->getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::LINEAR_STREAM})); ASSERT_NE(nullptr, linearStreamAllocation); csr->makeResident(*linearStreamAllocation); EXPECT_EQ(1u, csr->getResidencyAllocations().size()); EXPECT_EQ(linearStreamAllocation, csr->getResidencyAllocations()[0]); wddm->setKmDafEnabled(true); csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(1u, wddm->kmDafLockResult.called); EXPECT_EQ(1u, wddm->kmDafLockResult.lockedAllocations.size()); EXPECT_EQ(linearStreamAllocation->getDefaultHandle(), wddm->kmDafLockResult.lockedAllocations[0]); memoryManager->freeGraphicsMemory(commandBuffer); memoryManager->freeGraphicsMemory(linearStreamAllocation); } TEST_F(WddmCommandStreamTest, givenWddmWithKmDafEnabledWhenFlushIsCalledWithAllocationsForResidencyThenLinearStreamAllocationsShouldBeKmDafLocked) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; auto linearStreamAllocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties({csr->getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::LINEAR_STREAM})); ASSERT_NE(nullptr, linearStreamAllocation); ResidencyContainer allocationsForResidency = {linearStreamAllocation}; wddm->setKmDafEnabled(true); csr->flush(batchBuffer, allocationsForResidency); EXPECT_EQ(1u, wddm->kmDafLockResult.called); EXPECT_EQ(1u, wddm->kmDafLockResult.lockedAllocations.size()); EXPECT_EQ(linearStreamAllocation->getDefaultHandle(), wddm->kmDafLockResult.lockedAllocations[0]); memoryManager->freeGraphicsMemory(commandBuffer); memoryManager->freeGraphicsMemory(linearStreamAllocation); } TEST_F(WddmCommandStreamTest, givenWddmWithKmDafEnabledWhenFlushIsCalledWithAllocationsForResidencyThenFillPatternAllocationsShouldBeKmDafLocked) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; auto fillPatternAllocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties({csr->getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::FILL_PATTERN})); ASSERT_NE(nullptr, fillPatternAllocation); ResidencyContainer allocationsForResidency = {fillPatternAllocation}; wddm->setKmDafEnabled(true); csr->flush(batchBuffer, allocationsForResidency); EXPECT_EQ(1u, wddm->kmDafLockResult.called); EXPECT_EQ(1u, wddm->kmDafLockResult.lockedAllocations.size()); EXPECT_EQ(fillPatternAllocation->getDefaultHandle(), wddm->kmDafLockResult.lockedAllocations[0]); memoryManager->freeGraphicsMemory(commandBuffer); memoryManager->freeGraphicsMemory(fillPatternAllocation); } TEST_F(WddmCommandStreamTest, givenWddmWithKmDafEnabledWhenFlushIsCalledWithAllocationsForResidencyThenCommandBufferAllocationsShouldBeKmDafLocked) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; auto commandBufferAllocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties({csr->getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::COMMAND_BUFFER})); ASSERT_NE(nullptr, commandBufferAllocation); ResidencyContainer allocationsForResidency = {commandBufferAllocation}; wddm->setKmDafEnabled(true); csr->flush(batchBuffer, allocationsForResidency); EXPECT_EQ(1u, wddm->kmDafLockResult.called); EXPECT_EQ(1u, wddm->kmDafLockResult.lockedAllocations.size()); EXPECT_EQ(commandBufferAllocation->getDefaultHandle(), wddm->kmDafLockResult.lockedAllocations[0]); memoryManager->freeGraphicsMemory(commandBuffer); memoryManager->freeGraphicsMemory(commandBufferAllocation); } TEST_F(WddmCommandStreamTest, givenWddmWithKmDafEnabledWhenFlushIsCalledWithAllocationsForResidencyThenNonLinearStreamAllocationShouldNotBeKmDafLocked) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; auto nonLinearStreamAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, nonLinearStreamAllocation); ResidencyContainer allocationsForResidency = {nonLinearStreamAllocation}; wddm->setKmDafEnabled(true); csr->flush(batchBuffer, allocationsForResidency); EXPECT_EQ(0u, wddm->kmDafLockResult.called); EXPECT_EQ(0u, wddm->kmDafLockResult.lockedAllocations.size()); memoryManager->freeGraphicsMemory(commandBuffer); memoryManager->freeGraphicsMemory(nonLinearStreamAllocation); } TEST_F(WddmCommandStreamTest, makeResident) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); csr->makeResident(*commandBuffer); EXPECT_EQ(0u, wddm->makeResidentResult.called); EXPECT_EQ(1u, csr->getResidencyAllocations().size()); EXPECT_EQ(commandBuffer, csr->getResidencyAllocations()[0]); memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmCommandStreamTest, makeNonResidentPutsAllocationInEvictionAllocations) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); csr->makeResident(*cs.getGraphicsAllocation()); csr->makeNonResident(*commandBuffer); EXPECT_EQ(1u, csr->getEvictionAllocations().size()); memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmCommandStreamTest, processEvictionPlacesAllAllocationsOnTrimCandidateList) { GraphicsAllocation *allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); GraphicsAllocation *allocation2 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, allocation); ASSERT_NE(nullptr, allocation2); csr->getEvictionAllocations().push_back(allocation); csr->getEvictionAllocations().push_back(allocation2); EXPECT_EQ(2u, csr->getEvictionAllocations().size()); csr->processEviction(); EXPECT_EQ(2u, static_cast(csr->getOsContext()).getResidencyController().peekTrimCandidateList().size()); memoryManager->freeGraphicsMemory(allocation); memoryManager->freeGraphicsMemory(allocation2); } TEST_F(WddmCommandStreamTest, processEvictionClearsEvictionAllocations) { GraphicsAllocation *allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, allocation); csr->getEvictionAllocations().push_back(allocation); EXPECT_EQ(1u, csr->getEvictionAllocations().size()); csr->processEviction(); EXPECT_EQ(0u, csr->getEvictionAllocations().size()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmCommandStreamTest, makeResidentNonResidentMemObj) { GraphicsAllocation *gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); Buffer *buffer = new AlignedBuffer(gfxAllocation); csr->makeResident(*buffer->getGraphicsAllocation()); EXPECT_EQ(0u, wddm->makeResidentResult.called); EXPECT_EQ(1u, csr->getResidencyAllocations().size()); EXPECT_EQ(gfxAllocation, csr->getResidencyAllocations()[0]); csr->makeNonResident(*buffer->getGraphicsAllocation()); EXPECT_EQ(gfxAllocation, csr->getEvictionAllocations()[0]); delete buffer; memoryManager->freeGraphicsMemory(gfxAllocation); } TEST_F(WddmCommandStreamTest, givenGraphicsAllocationWhenMakeResidentThenAllocationIsInResidencyContainer) { void *hostPtr = reinterpret_cast(wddm->virtualAllocAddress + 0x1234); auto size = 1234u; auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size}, hostPtr); ASSERT_NE(nullptr, gfxAllocation); csr->makeResidentHostPtrAllocation(gfxAllocation); EXPECT_EQ(1u, csr->getResidencyAllocations().size()); EXPECT_EQ(hostPtr, gfxAllocation->getUnderlyingBuffer()); memoryManager->freeGraphicsMemory(gfxAllocation); } TEST_F(WddmCommandStreamTest, givenHostPtrAllocationWhenMapFailsThenFragmentsAreClearedAndNullptrIsReturned) { this->wddm->callBaseMapGpuVa = false; this->wddm->mapGpuVaStatus = false; void *hostPtr = reinterpret_cast(wddm->virtualAllocAddress + 0x1234); auto size = 1234u; wddm->mapGpuVirtualAddressResult.called = 0u; wddm->destroyAllocationResult.called = 0u; auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size}, hostPtr); EXPECT_EQ(1u, wddm->mapGpuVirtualAddressResult.called); EXPECT_EQ(1u, wddm->destroyAllocationResult.called); EXPECT_EQ(nullptr, gfxAllocation); } TEST_F(WddmCommandStreamTest, givenAddressWithHighestBitSetWhenItIsMappedThenProperAddressIsPassed) { uintptr_t address = 0xffff0000; void *faultyAddress = reinterpret_cast(address); wddm->mapGpuVirtualAddressResult.called = 0u; auto gfxAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize}, faultyAddress); EXPECT_EQ(1u, wddm->mapGpuVirtualAddressResult.called); ASSERT_NE(nullptr, gfxAllocation); auto expectedAddress = castToUint64(faultyAddress); EXPECT_EQ(gfxAllocation->getGpuAddress(), expectedAddress); ASSERT_EQ(gfxAllocation->fragmentsStorage.fragmentCount, 1u); EXPECT_EQ(expectedAddress, gfxAllocation->fragmentsStorage.fragmentStorageData[0].osHandleStorage->gpuPtr); memoryManager->freeGraphicsMemory(gfxAllocation); } TEST_F(WddmCommandStreamTest, givenHostPtrWhenPtrBelowRestrictionThenCreateAllocationAndMakeResident) { void *hostPtr = reinterpret_cast(memoryManager->getAlignedMallocRestrictions()->minAddress - 0x1000); auto size = 0x2000u; auto gfxAllocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size}, hostPtr)); void *expectedReserve = reinterpret_cast(wddm->virtualAllocAddress); ASSERT_NE(nullptr, gfxAllocation); csr->makeResidentHostPtrAllocation(gfxAllocation); EXPECT_EQ(1u, csr->getResidencyAllocations().size()); EXPECT_EQ(hostPtr, gfxAllocation->getUnderlyingBuffer()); EXPECT_EQ(expectedReserve, gfxAllocation->getReservedAddressPtr()); memoryManager->freeGraphicsMemory(gfxAllocation); } TEST_F(WddmCommandStreamTest, givenTwoTemporaryAllocationsWhenCleanTemporaryAllocationListThenDestoryOnlyCompletedAllocations) { void *host_ptr = (void *)0x1212341; void *host_ptr2 = (void *)0x2212341; auto size = 17262u; GraphicsAllocation *graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size}, host_ptr); GraphicsAllocation *graphicsAllocation2 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size}, host_ptr2); csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr(graphicsAllocation), TEMPORARY_ALLOCATION); csr->getInternalAllocationStorage()->storeAllocation(std::unique_ptr(graphicsAllocation2), TEMPORARY_ALLOCATION); graphicsAllocation->updateTaskCount(1, csr->getOsContext().getContextId()); graphicsAllocation2->updateTaskCount(100, csr->getOsContext().getContextId()); csr->waitForTaskCountAndCleanAllocationList(1, TEMPORARY_ALLOCATION); // graphicsAllocation2 still lives EXPECT_EQ(host_ptr2, graphicsAllocation2->getUnderlyingBuffer()); auto hostPtrManager = memoryManager->getHostPtrManager(); auto alignedPtr = alignDown(host_ptr, MemoryConstants::pageSize); auto alignedPtr2 = alignDown(host_ptr2, MemoryConstants::pageSize); auto fragment = hostPtrManager->getFragment(alignedPtr2); ASSERT_NE(nullptr, fragment); EXPECT_EQ(alignedPtr2, fragment->fragmentCpuPointer); auto fragment2 = hostPtrManager->getFragment(alignedPtr); EXPECT_EQ(nullptr, fragment2); // destroy remaining allocation csr->waitForTaskCountAndCleanAllocationList(100, TEMPORARY_ALLOCATION); } TEST_F(WddmCommandStreamMockGdiTest, FlushCallsWddmMakeResidentForResidencyAllocations) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); csr->makeResident(*commandBuffer); EXPECT_EQ(1u, csr->getResidencyAllocations().size()); gdi->getMakeResidentArg().NumAllocations = 0; BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr}; csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_NE(0u, gdi->getMakeResidentArg().NumAllocations); memoryManager->freeGraphicsMemory(commandBuffer); } TEST_F(WddmCommandStreamMockGdiTest, makeResidentClearsResidencyAllocations) { GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); ASSERT_NE(nullptr, commandBuffer); LinearStream cs(commandBuffer); csr->makeResident(*commandBuffer); EXPECT_EQ(1u, csr->getResidencyAllocations().size()); EXPECT_EQ(0u, csr->getEvictionAllocations().size()); EXPECT_EQ(trimListUnusedPosition, static_cast(commandBuffer)->getTrimCandidateListPosition(csr->getOsContext().getContextId())); csr->processResidency(csr->getResidencyAllocations(), 0u); csr->makeSurfacePackNonResident(csr->getResidencyAllocations()); EXPECT_EQ(0u, csr->getResidencyAllocations().size()); EXPECT_EQ(0u, csr->getEvictionAllocations().size()); EXPECT_EQ(0u, static_cast(commandBuffer)->getTrimCandidateListPosition(csr->getOsContext().getContextId())); memoryManager->freeGraphicsMemory(commandBuffer); } HWTEST_F(WddmCommandStreamMockGdiTest, givenRecordedCommandBufferWhenItIsSubmittedThenFlushTaskIsProperlyCalled) { //preemption allocation + sip allocation size_t csrSurfaceCount = 0; GraphicsAllocation *tmpAllocation = nullptr; if (device->getPreemptionMode() == PreemptionMode::MidThread) { csrSurfaceCount = 2; tmpAllocation = GlobalMockSipProgram::sipProgram->getAllocation(); GlobalMockSipProgram::sipProgram->resetAllocation(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); } csr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); auto mockedSubmissionsAggregator = new mockSubmissionsAggregator(); csr->overrideSubmissionAggregator(mockedSubmissionsAggregator); auto commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); auto dshAlloc = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); auto iohAlloc = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); auto sshAlloc = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); auto tagAllocation = csr->getTagAllocation(); LinearStream cs(commandBuffer); IndirectHeap dsh(dshAlloc); IndirectHeap ioh(iohAlloc); IndirectHeap ssh(sshAlloc); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(device->getHardwareInfo()); dispatchFlags.guardCommandBufferWithPipeControl = true; dispatchFlags.requiresCoherency = true; csr->flushTask(cs, 0u, dsh, ioh, ssh, 0u, dispatchFlags, *device); auto &cmdBuffers = mockedSubmissionsAggregator->peekCommandBuffers(); auto storedCommandBuffer = cmdBuffers.peekHead(); ResidencyContainer copyOfResidency = storedCommandBuffer->surfaces; copyOfResidency.push_back(storedCommandBuffer->batchBuffer.commandBufferAllocation); csr->flushBatchedSubmissions(); EXPECT_TRUE(cmdBuffers.peekIsEmpty()); EXPECT_EQ(1u, wddm->submitResult.called); auto csrCommandStream = csr->commandStream.getGraphicsAllocation(); EXPECT_EQ(csrCommandStream->getGpuAddress(), wddm->submitResult.commandBufferSubmitted); EXPECT_TRUE(((COMMAND_BUFFER_HEADER *)wddm->submitResult.commandHeaderSubmitted)->RequiresCoherency); EXPECT_EQ(6u + csrSurfaceCount, wddm->makeResidentResult.handleCount); std::vector expectedHandles; expectedHandles.push_back(static_cast(tagAllocation)->getDefaultHandle()); expectedHandles.push_back(static_cast(commandBuffer)->getDefaultHandle()); expectedHandles.push_back(static_cast(dshAlloc)->getDefaultHandle()); expectedHandles.push_back(static_cast(iohAlloc)->getDefaultHandle()); expectedHandles.push_back(static_cast(sshAlloc)->getDefaultHandle()); expectedHandles.push_back(static_cast(csrCommandStream)->getDefaultHandle()); for (auto i = 0u; i < wddm->makeResidentResult.handleCount; i++) { auto handle = wddm->makeResidentResult.handlePack[i]; auto found = false; for (auto &expectedHandle : expectedHandles) { if (expectedHandle == handle) { found = true; } } EXPECT_TRUE(found); } EXPECT_NE(trimListUnusedPosition, static_cast(tagAllocation)->getTrimCandidateListPosition(csr->getOsContext().getContextId())); EXPECT_NE(trimListUnusedPosition, static_cast(commandBuffer)->getTrimCandidateListPosition(csr->getOsContext().getContextId())); EXPECT_EQ(trimListUnusedPosition, static_cast(dshAlloc)->getTrimCandidateListPosition(csr->getOsContext().getContextId())); EXPECT_EQ(trimListUnusedPosition, static_cast(iohAlloc)->getTrimCandidateListPosition(csr->getOsContext().getContextId())); EXPECT_NE(trimListUnusedPosition, static_cast(sshAlloc)->getTrimCandidateListPosition(csr->getOsContext().getContextId())); EXPECT_NE(trimListUnusedPosition, static_cast(csrCommandStream)->getTrimCandidateListPosition(csr->getOsContext().getContextId())); memoryManager->freeGraphicsMemory(dshAlloc); memoryManager->freeGraphicsMemory(iohAlloc); memoryManager->freeGraphicsMemory(sshAlloc); memoryManager->freeGraphicsMemory(commandBuffer); if (device->getPreemptionMode() == PreemptionMode::MidThread) { memoryManager->freeGraphicsMemory(GlobalMockSipProgram::sipProgram->getAllocation()); GlobalMockSipProgram::sipProgram->resetAllocation(tmpAllocation); } } using WddmSimpleTest = ::testing::Test; HWTEST_F(WddmSimpleTest, givenDefaultWddmCsrWhenItIsCreatedThenBatchingIsTurnedOn) { DebugManager.flags.CsrDispatchMode.set(0); ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); std::unique_ptr device(Device::create(executionEnvironment, 0u)); auto wddm = Wddm::createWddm(nullptr, *executionEnvironment->rootDeviceEnvironments[0].get()); executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[0]->osInterface->get()->setWddm(wddm); executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface = std::make_unique(wddm); std::unique_ptr> mockCsr(new MockWddmCsr(*executionEnvironment, 0)); EXPECT_EQ(DispatchMode::BatchedDispatch, mockCsr->dispatchMode); } HWTEST_F(WddmDefaultTest, givenFtrWddmHwQueuesFlagWhenCreatingCsrThenPickWddmVersionBasingOnFtrFlag) { auto wddm = Wddm::createWddm(nullptr, *pDevice->executionEnvironment->rootDeviceEnvironments[0].get()); pDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(); pDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface->get()->setWddm(wddm); pDevice->executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface = std::make_unique(wddm); WddmCommandStreamReceiver wddmCsr(*pDevice->executionEnvironment, 0); auto wddmFromCsr = wddmCsr.peekWddm(); EXPECT_EQ(typeid(*wddmFromCsr), typeid(WddmMock)); } struct WddmCsrCompressionTests : ::testing::Test { void setCompressionEnabled(bool enableForBuffer, bool enableForImages) { RuntimeCapabilityTable capabilityTable = defaultHwInfo->capabilityTable; capabilityTable.ftrRenderCompressedBuffers = enableForBuffer; capabilityTable.ftrRenderCompressedImages = enableForImages; hwInfo->capabilityTable = capabilityTable; } HardwareInfo *hwInfo = nullptr; WddmMock *myMockWddm; }; struct WddmCsrCompressionParameterizedTest : WddmCsrCompressionTests, ::testing::WithParamInterface { void SetUp() override { compressionEnabled = GetParam(); } bool compressionEnabled; }; HWTEST_P(WddmCsrCompressionParameterizedTest, givenEnabledCompressionWhenInitializedThenCreatePagetableMngr) { uint32_t index = 1u; ExecutionEnvironment *executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 2); std::unique_ptr device(Device::create(executionEnvironment, 1u)); setCompressionEnabled(compressionEnabled, !compressionEnabled); myMockWddm = static_cast(executionEnvironment->rootDeviceEnvironments[0]->osInterface->get()->getWddm()); EXPECT_EQ(nullptr, executionEnvironment->rootDeviceEnvironments[index]->pageTableManager.get()); MockWddmCsr mockWddmCsr(*executionEnvironment, index); mockWddmCsr.createPageTableManager(); ASSERT_NE(nullptr, executionEnvironment->rootDeviceEnvironments[index]->pageTableManager.get()); auto mockMngr = reinterpret_cast(executionEnvironment->rootDeviceEnvironments[index]->pageTableManager.get()); EXPECT_EQ(1u, mockMngr->setCsrHanleCalled); EXPECT_EQ(&mockWddmCsr, mockMngr->passedCsrHandle); GMM_TRANSLATIONTABLE_CALLBACKS expectedTTCallbacks = {}; unsigned int expectedFlags = TT_TYPE::AUXTT; expectedTTCallbacks.pfWriteL3Adr = TTCallbacks::writeL3Address; EXPECT_TRUE(memcmp(&expectedTTCallbacks, &mockMngr->translationTableCb, sizeof(GMM_TRANSLATIONTABLE_CALLBACKS)) == 0); EXPECT_TRUE(memcmp(&expectedFlags, &mockMngr->translationTableFlags, sizeof(unsigned int)) == 0); } HWTEST_F(WddmCsrCompressionTests, givenDisabledCompressionWhenInitializedThenDontCreatePagetableMngr) { ExecutionEnvironment *executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 2); std::unique_ptr device(Device::create(executionEnvironment, 1u)); setCompressionEnabled(false, false); myMockWddm = static_cast(executionEnvironment->rootDeviceEnvironments[0]->osInterface->get()->getWddm()); MockWddmCsr mockWddmCsr(*executionEnvironment, 1); EXPECT_EQ(nullptr, executionEnvironment->rootDeviceEnvironments[1]->pageTableManager.get()); } INSTANTIATE_TEST_CASE_P( WddmCsrCompressionParameterizedTestCreate, WddmCsrCompressionParameterizedTest, ::testing::Bool()); HWTEST_F(WddmCsrCompressionTests, givenDisabledCompressionWhenFlushingThenDontInitTranslationTable) { ExecutionEnvironment *executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 2); setCompressionEnabled(false, false); myMockWddm = static_cast(executionEnvironment->rootDeviceEnvironments[0]->osInterface->get()->getWddm()); auto mockWddmCsr = new MockWddmCsr(*executionEnvironment, 1); mockWddmCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); executionEnvironment->memoryManager.reset(new WddmMemoryManager(*executionEnvironment)); std::unique_ptr device(Device::create(executionEnvironment, 1u)); device->resetCommandStreamReceiver(mockWddmCsr); auto memoryManager = executionEnvironment->memoryManager.get(); EXPECT_EQ(nullptr, executionEnvironment->rootDeviceEnvironments[1]->pageTableManager.get()); auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{mockWddmCsr->getRootDeviceIndex(), MemoryConstants::pageSize}); IndirectHeap cs(graphicsAllocation); EXPECT_EQ(nullptr, executionEnvironment->rootDeviceEnvironments[1]->pageTableManager.get()); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); mockWddmCsr->flushTask(cs, 0u, cs, cs, cs, 0u, dispatchFlags, *device); EXPECT_EQ(nullptr, executionEnvironment->rootDeviceEnvironments[1]->pageTableManager.get()); mockWddmCsr->flushBatchedSubmissions(); memoryManager->freeGraphicsMemory(graphicsAllocation); } TEST_F(WddmCommandStreamTest, whenDirectSubmissionDisabledThenExpectNoFeatureAvailable) { bool ret = csr->initDirectSubmission(*device.get(), *osContext.get()); EXPECT_TRUE(ret); EXPECT_FALSE(csr->isDirectSubmissionEnabled()); } TEST_F(WddmCommandStreamTest, whenDirectSubmissionEnabledOnRcsThenExpectFeatureAvailable) { DebugManager.flags.EnableDirectSubmission.set(1); auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_RCS].engineSupported = true; bool ret = csr->initDirectSubmission(*device.get(), *osContext.get()); EXPECT_TRUE(ret); EXPECT_TRUE(csr->isDirectSubmissionEnabled()); } TEST_F(WddmCommandStreamTest, givenDirectSubmissionEnabledWhenPlatformNotSupportsRcsThenExpectFeatureNotAvailable) { DebugManager.flags.EnableDirectSubmission.set(1); auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_RCS].engineSupported = false; bool ret = csr->initDirectSubmission(*device.get(), *osContext.get()); EXPECT_TRUE(ret); EXPECT_FALSE(csr->isDirectSubmissionEnabled()); } TEST_F(WddmCommandStreamTest, whenDirectSubmissionEnabledOnBcsThenExpectFeatureAvailable) { DebugManager.flags.EnableDirectSubmission.set(1); osContext.reset(OsContext::create(device->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0, 0, aub_stream::ENGINE_BCS, PreemptionMode::ThreadGroup, false, false, false)); osContext->setDefaultContext(true); auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_BCS].engineSupported = true; bool ret = csr->initDirectSubmission(*device.get(), *osContext.get()); EXPECT_TRUE(ret); EXPECT_TRUE(csr->isDirectSubmissionEnabled()); } TEST_F(WddmCommandStreamTest, givenDirectSubmissionEnabledWhenPlatformNotSupportsBcsThenExpectFeatureNotAvailable) { DebugManager.flags.EnableDirectSubmission.set(1); osContext.reset(OsContext::create(device->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0, 0, aub_stream::ENGINE_BCS, PreemptionMode::ThreadGroup, false, false, false)); osContext->setDefaultContext(true); auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_BCS].engineSupported = false; bool ret = csr->initDirectSubmission(*device.get(), *osContext.get()); EXPECT_TRUE(ret); EXPECT_FALSE(csr->isDirectSubmissionEnabled()); } TEST_F(WddmCommandStreamTest, givenLowPriorityContextWhenDirectSubmissionDisabledOnLowPriorityThenExpectFeatureNotAvailable) { DebugManager.flags.EnableDirectSubmission.set(1); osContext.reset(OsContext::create(device->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0, 0, aub_stream::ENGINE_RCS, PreemptionMode::ThreadGroup, true, false, false)); osContext->setDefaultContext(true); auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_RCS].engineSupported = true; hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_RCS].useLowPriority = false; bool ret = csr->initDirectSubmission(*device.get(), *osContext.get()); EXPECT_TRUE(ret); EXPECT_FALSE(csr->isDirectSubmissionEnabled()); } TEST_F(WddmCommandStreamTest, givenLowPriorityContextWhenDirectSubmissionEnabledOnLowPriorityThenExpectFeatureAvailable) { DebugManager.flags.EnableDirectSubmission.set(1); osContext.reset(OsContext::create(device->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0, 0, aub_stream::ENGINE_RCS, PreemptionMode::ThreadGroup, true, false, false)); auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_RCS].engineSupported = true; hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_RCS].useLowPriority = true; bool ret = csr->initDirectSubmission(*device.get(), *osContext.get()); EXPECT_TRUE(ret); EXPECT_TRUE(csr->isDirectSubmissionEnabled()); } TEST_F(WddmCommandStreamTest, givenInternalContextWhenDirectSubmissionDisabledOnInternalThenExpectFeatureNotAvailable) { DebugManager.flags.EnableDirectSubmission.set(1); osContext.reset(OsContext::create(device->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0, 0, aub_stream::ENGINE_RCS, PreemptionMode::ThreadGroup, false, true, false)); osContext->setDefaultContext(true); auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_RCS].engineSupported = true; hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_RCS].useInternal = false; bool ret = csr->initDirectSubmission(*device.get(), *osContext.get()); EXPECT_TRUE(ret); EXPECT_FALSE(csr->isDirectSubmissionEnabled()); } TEST_F(WddmCommandStreamTest, givenInternalContextWhenDirectSubmissionEnabledOnInternalThenExpectFeatureAvailable) { DebugManager.flags.EnableDirectSubmission.set(1); osContext.reset(OsContext::create(device->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0, 0, aub_stream::ENGINE_RCS, PreemptionMode::ThreadGroup, false, true, false)); auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_RCS].engineSupported = true; hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_RCS].useInternal = true; bool ret = csr->initDirectSubmission(*device.get(), *osContext.get()); EXPECT_TRUE(ret); EXPECT_TRUE(csr->isDirectSubmissionEnabled()); } TEST_F(WddmCommandStreamTest, givenRootDeviceContextWhenDirectSubmissionDisabledOnRootDeviceThenExpectFeatureNotAvailable) { DebugManager.flags.EnableDirectSubmission.set(1); osContext.reset(OsContext::create(device->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0, 0, aub_stream::ENGINE_RCS, PreemptionMode::ThreadGroup, false, false, true)); osContext->setDefaultContext(true); auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_RCS].engineSupported = true; hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_RCS].useRootDevice = false; bool ret = csr->initDirectSubmission(*device.get(), *osContext.get()); EXPECT_TRUE(ret); EXPECT_FALSE(csr->isDirectSubmissionEnabled()); } TEST_F(WddmCommandStreamTest, givenRootDeviceContextWhenDirectSubmissionEnabledOnRootDeviceThenExpectFeatureAvailable) { DebugManager.flags.EnableDirectSubmission.set(1); osContext.reset(OsContext::create(device->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0, 0, aub_stream::ENGINE_RCS, PreemptionMode::ThreadGroup, false, false, true)); auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_RCS].engineSupported = true; hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_RCS].useRootDevice = true; bool ret = csr->initDirectSubmission(*device.get(), *osContext.get()); EXPECT_TRUE(ret); EXPECT_TRUE(csr->isDirectSubmissionEnabled()); } TEST_F(WddmCommandStreamTest, givenNonDefaultContextWhenDirectSubmissionDisabledOnNonDefaultThenExpectFeatureNotAvailable) { DebugManager.flags.EnableDirectSubmission.set(1); osContext.reset(OsContext::create(device->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0, 0, aub_stream::ENGINE_RCS, PreemptionMode::ThreadGroup, false, false, false)); osContext->setDefaultContext(false); auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_RCS].engineSupported = true; hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_RCS].useNonDefault = false; bool ret = csr->initDirectSubmission(*device.get(), *osContext.get()); EXPECT_TRUE(ret); EXPECT_FALSE(csr->isDirectSubmissionEnabled()); } TEST_F(WddmCommandStreamTest, givenNonDefaultContextContextWhenDirectSubmissionEnabledOnNonDefaultContextThenExpectFeatureAvailable) { DebugManager.flags.EnableDirectSubmission.set(1); osContext.reset(OsContext::create(device->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0, 0, aub_stream::ENGINE_RCS, PreemptionMode::ThreadGroup, false, false, false)); osContext->setDefaultContext(false); auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_RCS].engineSupported = true; hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_RCS].useNonDefault = true; bool ret = csr->initDirectSubmission(*device.get(), *osContext.get()); EXPECT_TRUE(ret); EXPECT_TRUE(csr->isDirectSubmissionEnabled()); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/device_os_tests.cpp000066400000000000000000000060101363734646600316500ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/helpers/get_info.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gmock/gmock.h" #include "gtest/gtest.h" using namespace ::testing; namespace NEO { TEST(DeviceOsTest, GivenDefaultClDeviceWhenCheckingForOsSpecificExtensionsThenCorrectExtensionsAreSet) { auto hwInfo = defaultHwInfo.get(); auto pDevice = MockDevice::createWithNewExecutionEnvironment(hwInfo); auto pClDevice = new ClDevice{*pDevice, platform()}; std::string extensionString(pClDevice->getDeviceInfo().deviceExtensions); EXPECT_THAT(extensionString, Not(HasSubstr(std::string("cl_intel_va_api_media_sharing ")))); EXPECT_THAT(extensionString, HasSubstr(std::string("cl_intel_dx9_media_sharing "))); EXPECT_THAT(extensionString, HasSubstr(std::string("cl_khr_dx9_media_sharing "))); EXPECT_THAT(extensionString, HasSubstr(std::string("cl_khr_d3d10_sharing "))); EXPECT_THAT(extensionString, HasSubstr(std::string("cl_khr_d3d11_sharing "))); EXPECT_THAT(extensionString, HasSubstr(std::string("cl_intel_d3d11_nv12_media_sharing "))); EXPECT_THAT(extensionString, HasSubstr(std::string("cl_intel_simultaneous_sharing "))); delete pClDevice; } TEST(DeviceOsTest, supportedSimultaneousInterops) { auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); std::vector expected = {CL_GL_CONTEXT_KHR, CL_WGL_HDC_KHR, CL_CONTEXT_ADAPTER_D3D9_KHR, CL_CONTEXT_D3D9_DEVICE_INTEL, CL_CONTEXT_ADAPTER_D3D9EX_KHR, CL_CONTEXT_D3D9EX_DEVICE_INTEL, CL_CONTEXT_ADAPTER_DXVA_KHR, CL_CONTEXT_DXVA_DEVICE_INTEL, CL_CONTEXT_D3D10_DEVICE_KHR, CL_CONTEXT_D3D11_DEVICE_KHR, 0}; EXPECT_TRUE(pDevice->simultaneousInterops == expected); } TEST(DeviceOsTest, DeviceCreationFail) { auto hwInfo = defaultHwInfo.get(); auto pDevice = MockDevice::createWithNewExecutionEnvironment(hwInfo); EXPECT_THAT(pDevice, nullptr); } TEST(DeviceOsTest, DeviceCreationFailMidThreadPreemption) { DebugManagerStateRestore dbgRestore; DebugManager.flags.ForcePreemptionMode.set(static_cast(PreemptionMode::MidThread)); auto pDevice = MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()); EXPECT_THAT(pDevice, nullptr); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/driver_info_tests.cpp000066400000000000000000000233061363734646600322250ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/preemption.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/windows/debug_registry_reader.h" #include "shared/source/os_interface/windows/driver_info_windows.h" #include "shared/source/os_interface/windows/os_interface.h" #include "shared/test/unit_test/helpers/ult_hw_config.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_wddm.h" #include "opencl/test/unit_test/os_interface/windows/registry_reader_tests.h" #include "gtest/gtest.h" #include namespace NEO { namespace SysCalls { extern const wchar_t *currentLibraryPath; } extern CommandStreamReceiverCreateFunc commandStreamReceiverFactory[IGFX_MAX_CORE]; CommandStreamReceiver *createMockCommandStreamReceiver(bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex); class DriverInfoDeviceTest : public ::testing::Test { public: void SetUp() { hwInfo = defaultHwInfo.get(); commandStreamReceiverCreateFunc = commandStreamReceiverFactory[hwInfo->platform.eRenderCoreFamily]; commandStreamReceiverFactory[hwInfo->platform.eRenderCoreFamily] = createMockCommandStreamReceiver; } void TearDown() { commandStreamReceiverFactory[hwInfo->platform.eRenderCoreFamily] = commandStreamReceiverCreateFunc; } CommandStreamReceiverCreateFunc commandStreamReceiverCreateFunc; const HardwareInfo *hwInfo; }; CommandStreamReceiver *createMockCommandStreamReceiver(bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) { auto csr = new MockCommandStreamReceiver(executionEnvironment, rootDeviceIndex); if (!executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->osInterface) { executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->osInterface = std::make_unique(); auto wddm = new WddmMock(*executionEnvironment.rootDeviceEnvironments[0]); wddm->init(); executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->osInterface->get()->setWddm(wddm); } EXPECT_NE(nullptr, executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->osInterface.get()); return csr; } class MockDriverInfoWindows : public DriverInfoWindows { public: using DriverInfoWindows::DriverInfoWindows; using DriverInfoWindows::path; using DriverInfoWindows::registryReader; const char *getRegistryReaderRegKey() { return reader->getRegKey(); } TestedRegistryReader *reader = nullptr; static MockDriverInfoWindows *create(std::string path) { auto result = new MockDriverInfoWindows(""); result->reader = new TestedRegistryReader(path); result->registryReader.reset(result->reader); return result; }; }; TEST_F(DriverInfoDeviceTest, GivenDeviceCreatedWhenCorrectOSInterfaceThenCreateDriverInfo) { VariableBackup backup(&ultHwConfig); ultHwConfig.useHwCsr = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(hwInfo)); EXPECT_NE(nullptr, device->driverInfo.get()); } TEST_F(DriverInfoDeviceTest, GivenDeviceCreatedWithoutCorrectOSInterfaceThenDontCreateDriverInfo) { VariableBackup backup(&ultHwConfig); ultHwConfig.useHwCsr = false; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(hwInfo)); EXPECT_EQ(nullptr, device->driverInfo.get()); } class MockRegistryReader : public SettingsReader { public: std::string nameString; std::string versionString; std::string getSetting(const char *settingName, const std::string &value) override { std::string key(settingName); if (key == "HardwareInformation.AdapterString") { properNameKey = true; } else if (key == "DriverVersion") { properVersionKey = true; } else if (key == "UserModeDriverName") { properMediaSharingExtensions = true; using64bit = true; return returnString; } else if (key == "UserModeDriverNameWOW") { properMediaSharingExtensions = true; return returnString; } if (key == "DriverStorePathForComputeRuntime") { return driverStorePath; } return value; } bool getSetting(const char *settingName, bool defaultValue) override { return defaultValue; }; int32_t getSetting(const char *settingName, int32_t defaultValue) override { return defaultValue; }; const char *appSpecificLocation(const std::string &name) override { return name.c_str(); }; bool properNameKey = false; bool properVersionKey = false; std::string driverStorePath = "driverStore\\0x8086"; bool properMediaSharingExtensions = false; bool using64bit = false; std::string returnString = ""; }; struct DriverInfoWindowsTest : public ::testing::Test { void SetUp() override { DriverInfoWindows::createRegistryReaderFunc = [](const std::string &) -> std::unique_ptr { return std::make_unique(); }; driverInfo = std::make_unique(""); } VariableBackup createFuncBackup{&DriverInfoWindows::createRegistryReaderFunc}; std::unique_ptr driverInfo; }; TEST_F(DriverInfoWindowsTest, GivenDriverInfoWhenThenReturnNonNullptr) { auto registryReaderMock = static_cast(driverInfo->registryReader.get()); std::string defaultName = "defaultName"; auto name = driverInfo->getDeviceName(defaultName); EXPECT_STREQ(defaultName.c_str(), name.c_str()); EXPECT_TRUE(registryReaderMock->properNameKey); std::string defaultVersion = "defaultVersion"; auto driverVersion = driverInfo->getVersion(defaultVersion); EXPECT_STREQ(defaultVersion.c_str(), driverVersion.c_str()); EXPECT_TRUE(registryReaderMock->properVersionKey); }; TEST(DriverInfo, givenDriverInfoWhenGetStringReturnNotMeaningEmptyStringThenEnableSharingSupport) { MockDriverInfoWindows driverInfo(""); MockRegistryReader *registryReaderMock = new MockRegistryReader(); driverInfo.registryReader.reset(registryReaderMock); auto enable = driverInfo.getMediaSharingSupport(); EXPECT_TRUE(enable); EXPECT_EQ(is64bit, registryReaderMock->using64bit); EXPECT_TRUE(registryReaderMock->properMediaSharingExtensions); }; TEST(DriverInfo, givenDriverInfoWhenGetStringReturnMeaningEmptyStringThenDisableSharingSupport) { MockDriverInfoWindows driverInfo(""); MockRegistryReader *registryReaderMock = new MockRegistryReader(); registryReaderMock->returnString = "<>"; driverInfo.registryReader.reset(registryReaderMock); auto enable = driverInfo.getMediaSharingSupport(); EXPECT_FALSE(enable); EXPECT_EQ(is64bit, registryReaderMock->using64bit); EXPECT_TRUE(registryReaderMock->properMediaSharingExtensions); }; TEST(DriverInfo, givenFullPathToRegistryWhenCreatingDriverInfoWindowsThenTheRegistryPathIsTrimmed) { std::string registryPath = "Path\\In\\Registry"; std::string fullRegistryPath = "\\REGISTRY\\MACHINE\\" + registryPath; std::string expectedTrimmedRegistryPath = registryPath; MockDriverInfoWindows driverInfo(std::move(fullRegistryPath)); EXPECT_STREQ(expectedTrimmedRegistryPath.c_str(), driverInfo.path.c_str()); }; TEST(DriverInfo, givenInitializedOsInterfaceWhenCreateDriverInfoThenReturnDriverInfoWindowsNotNullptr) { MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); std::unique_ptr osInterface(new OSInterface()); osInterface->get()->setWddm(Wddm::createWddm(nullptr, rootDeviceEnvironment)); EXPECT_NE(nullptr, osInterface->get()->getWddm()); std::unique_ptr driverInfo(DriverInfo::create(osInterface.get())); EXPECT_NE(nullptr, driverInfo); }; TEST(DriverInfo, givenNotInitializedOsInterfaceWhenCreateDriverInfoThenReturnDriverInfoWindowsNullptr) { std::unique_ptr osInterface; std::unique_ptr driverInfo(DriverInfo::create(osInterface.get())); EXPECT_EQ(nullptr, driverInfo); }; TEST(DriverInfo, givenInitializedOsInterfaceWhenCreateDriverInfoWindowsThenSetRegistryReaderWithExpectRegKey) { std::string path = ""; std::unique_ptr driverInfo(MockDriverInfoWindows::create(path)); EXPECT_STREQ(driverInfo->getRegistryReaderRegKey(), driverInfo->reader->getRegKey()); }; TEST_F(DriverInfoWindowsTest, whenCurrentLibraryIsLoadedFromDriverStorePointedByDriverInfoThenItIsCompatible) { VariableBackup currentLibraryPathBackup(&SysCalls::currentLibraryPath); currentLibraryPathBackup = L"driverStore\\0x8086\\myLib.dll"; EXPECT_TRUE(driverInfo->isCompatibleDriverStore()); } TEST_F(DriverInfoWindowsTest, whenCurrentLibraryIsLoadedFromDifferentDriverStoreThanPointedByDriverInfoThenItIsNotCompatible) { VariableBackup currentLibraryPathBackup(&SysCalls::currentLibraryPath); currentLibraryPathBackup = L"driverStore\\different_driverStore\\myLib.dll"; EXPECT_FALSE(driverInfo->isCompatibleDriverStore()); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/file_logger_win_tests.cpp000066400000000000000000000072101363734646600330460ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/gmm_helper/gmm.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/os_interface/windows/mock_wddm_allocation.h" #include "opencl/test/unit_test/utilities/file_logger_tests.h" #include "test.h" using namespace NEO; TEST(FileLogger, GivenLogAllocationMemoryPoolFlagThenLogsCorrectInfo) { std::string testFile = "testfile"; DebugVariables flags; flags.LogAllocationMemoryPool.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); // Log file not created bool logFileCreated = fileExists(fileLogger.getLogFileName()); EXPECT_FALSE(logFileCreated); MockWddmAllocation allocation; allocation.handle = 4; allocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER); allocation.memoryPool = MemoryPool::System64KBPages; auto gmm = std::make_unique(platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->getGmmClientContext(), nullptr, 0, false); allocation.setDefaultGmm(gmm.get()); allocation.getDefaultGmm()->resourceParams.Flags.Info.NonLocalOnly = 0; fileLogger.logAllocation(&allocation); std::thread::id thisThread = std::this_thread::get_id(); std::stringstream threadIDCheck; threadIDCheck << " ThreadID: " << thisThread; std::stringstream memoryPoolCheck; memoryPoolCheck << " MemoryPool: " << allocation.getMemoryPool(); if (fileLogger.wasFileCreated(fileLogger.getLogFileName())) { auto str = fileLogger.getFileString(fileLogger.getLogFileName()); EXPECT_TRUE(str.find(threadIDCheck.str()) != std::string::npos); EXPECT_TRUE(str.find("Handle: 4") != std::string::npos); EXPECT_TRUE(str.find(memoryPoolCheck.str()) != std::string::npos); EXPECT_TRUE(str.find("AllocationType: BUFFER") != std::string::npos); } } TEST(FileLogger, GivenLogAllocationMemoryPoolFlagSetFalseThenAllocationIsNotLogged) { std::string testFile = "testfile"; DebugVariables flags; flags.LogAllocationMemoryPool.set(false); FullyEnabledFileLogger fileLogger(testFile, flags); // Log file not created bool logFileCreated = fileExists(fileLogger.getLogFileName()); EXPECT_FALSE(logFileCreated); MockWddmAllocation allocation; allocation.handle = 4; allocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER); allocation.memoryPool = MemoryPool::System64KBPages; auto gmm = std::make_unique(platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->getGmmClientContext(), nullptr, 0, false); allocation.setDefaultGmm(gmm.get()); allocation.getDefaultGmm()->resourceParams.Flags.Info.NonLocalOnly = 0; fileLogger.logAllocation(&allocation); std::thread::id thisThread = std::this_thread::get_id(); std::stringstream threadIDCheck; threadIDCheck << " ThreadID: " << thisThread; std::stringstream memoryPoolCheck; memoryPoolCheck << " MemoryPool: " << allocation.getMemoryPool(); if (fileLogger.wasFileCreated(fileLogger.getLogFileName())) { auto str = fileLogger.getFileString(fileLogger.getLogFileName()); EXPECT_FALSE(str.find(threadIDCheck.str()) != std::string::npos); EXPECT_FALSE(str.find("Handle: 4") != std::string::npos); EXPECT_FALSE(str.find(memoryPoolCheck.str()) != std::string::npos); EXPECT_FALSE(str.find("AllocationType: BUFFER") != std::string::npos); EXPECT_FALSE(str.find("NonLocalOnly: 0") != std::string::npos); } } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/gdi_dll_fixture.h000066400000000000000000000134001363734646600313000ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/os_library.h" #include "shared/test/unit_test/helpers/default_hw_info.h" #include "opencl/test/unit_test/mock_gdi/mock_gdi.h" using namespace NEO; OsLibrary *setAdapterInfo(const PLATFORM *platform, const GT_SYSTEM_INFO *gtSystemInfo, uint64_t gpuAddressSpace); struct GdiDllFixture { virtual void SetUp() { const HardwareInfo *hwInfo = defaultHwInfo.get(); mockGdiDll.reset(setAdapterInfo(&hwInfo->platform, &hwInfo->gtSystemInfo, hwInfo->capabilityTable.gpuAddressSpace)); setSizesFcn = reinterpret_cast(mockGdiDll->getProcAddress("MockSetSizes")); getSizesFcn = reinterpret_cast(mockGdiDll->getProcAddress("GetMockSizes")); getMockLastDestroyedResHandleFcn = reinterpret_cast(mockGdiDll->getProcAddress("GetMockLastDestroyedResHandle")); setMockLastDestroyedResHandleFcn = reinterpret_cast(mockGdiDll->getProcAddress("SetMockLastDestroyedResHandle")); getMockCreateDeviceParamsFcn = reinterpret_cast(mockGdiDll->getProcAddress("GetMockCreateDeviceParams")); setMockCreateDeviceParamsFcn = reinterpret_cast(mockGdiDll->getProcAddress("SetMockCreateDeviceParams")); getMockAllocationFcn = reinterpret_cast(mockGdiDll->getProcAddress("getMockAllocation")); getAdapterInfoAddressFcn = reinterpret_cast(mockGdiDll->getProcAddress("getAdapterInfoAddress")); getLastCallMapGpuVaArgFcn = reinterpret_cast(mockGdiDll->getProcAddress("getLastCallMapGpuVaArg")); getLastCallReserveGpuVaArgFcn = reinterpret_cast(mockGdiDll->getProcAddress("getLastCallReserveGpuVaArg")); setMapGpuVaFailConfigFcn = reinterpret_cast(mockGdiDll->getProcAddress("setMapGpuVaFailConfig")); setMapGpuVaFailConfigFcn(0, 0); getCreateContextDataFcn = reinterpret_cast(mockGdiDll->getProcAddress("getCreateContextData")); getCreateHwQueueDataFcn = reinterpret_cast(mockGdiDll->getProcAddress("getCreateHwQueueData")); getDestroyHwQueueDataFcn = reinterpret_cast(mockGdiDll->getProcAddress("getDestroyHwQueueData")); getSubmitCommandToHwQueueDataFcn = reinterpret_cast(mockGdiDll->getProcAddress("getSubmitCommandToHwQueueData")); getDestroySynchronizationObjectDataFcn = reinterpret_cast(mockGdiDll->getProcAddress("getDestroySynchronizationObjectData")); getMonitorFenceCpuFenceAddressFcn = reinterpret_cast(mockGdiDll->getProcAddress("getMonitorFenceCpuFenceAddress")); getCreateSynchronizationObject2FailCallFcn = reinterpret_cast(mockGdiDll->getProcAddress("getCreateSynchronizationObject2FailCall")); getRegisterTrimNotificationFailCallFcn = reinterpret_cast(mockGdiDll->getProcAddress("getRegisterTrimNotificationFailCall")); setMockLastDestroyedResHandleFcn((D3DKMT_HANDLE)0); *getDestroySynchronizationObjectDataFcn() = {}; *getCreateSynchronizationObject2FailCallFcn() = false; *getRegisterTrimNotificationFailCallFcn() = false; } virtual void TearDown() { *getCreateHwQueueDataFcn() = {}; *getDestroyHwQueueDataFcn() = {}; *getSubmitCommandToHwQueueDataFcn() = {}; *getDestroySynchronizationObjectDataFcn() = {}; setMapGpuVaFailConfigFcn(0, 0); *getCreateSynchronizationObject2FailCallFcn() = false; *getRegisterTrimNotificationFailCallFcn() = false; } std::unique_ptr mockGdiDll; decltype(&MockSetSizes) setSizesFcn = nullptr; decltype(&GetMockSizes) getSizesFcn = nullptr; decltype(&GetMockLastDestroyedResHandle) getMockLastDestroyedResHandleFcn = nullptr; decltype(&SetMockLastDestroyedResHandle) setMockLastDestroyedResHandleFcn = nullptr; decltype(&GetMockCreateDeviceParams) getMockCreateDeviceParamsFcn = nullptr; decltype(&SetMockCreateDeviceParams) setMockCreateDeviceParamsFcn = nullptr; decltype(&getMockAllocation) getMockAllocationFcn = nullptr; decltype(&getAdapterInfoAddress) getAdapterInfoAddressFcn = nullptr; decltype(&getLastCallMapGpuVaArg) getLastCallMapGpuVaArgFcn = nullptr; decltype(&getLastCallReserveGpuVaArg) getLastCallReserveGpuVaArgFcn = nullptr; decltype(&setMapGpuVaFailConfig) setMapGpuVaFailConfigFcn = nullptr; decltype(&getCreateContextData) getCreateContextDataFcn = nullptr; decltype(&getCreateHwQueueData) getCreateHwQueueDataFcn = nullptr; decltype(&getDestroyHwQueueData) getDestroyHwQueueDataFcn = nullptr; decltype(&getSubmitCommandToHwQueueData) getSubmitCommandToHwQueueDataFcn = nullptr; decltype(&getDestroySynchronizationObjectData) getDestroySynchronizationObjectDataFcn = nullptr; decltype(&getMonitorFenceCpuFenceAddress) getMonitorFenceCpuFenceAddressFcn = nullptr; decltype(&getCreateSynchronizationObject2FailCall) getCreateSynchronizationObject2FailCallFcn = nullptr; decltype(&getRegisterTrimNotificationFailCall) getRegisterTrimNotificationFailCallFcn = nullptr; }; compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/gl/000077500000000000000000000000001363734646600263675ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/gl/CMakeLists.txt000066400000000000000000000006621363734646600311330ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(IGDRCL_SRCS_tests_os_interface_windows_gl ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/gl_os_sharing_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_options.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_dll_helper.h ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_os_interface_windows_gl}) endif() compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/gl/gl_dll_helper.h000066400000000000000000000066541363734646600313470ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/debug_helpers.h" #include "shared/source/os_interface/os_library.h" #include "opencl/extensions/public/cl_gl_private_intel.h" #include "Gl/gl.h" #include using namespace NEO; namespace Os { extern const char *openglDllName; } namespace NEO { struct GLMockReturnedValues; } using GLString = void (*)(const char *, unsigned int); using GLSharedOCLContext = void (*)(GLboolean); using glBoolean = GLboolean (*)(); using Void = void (*)(const char *); using Int = int (*)(const char *); using BufferParam = void (*)(CL_GL_BUFFER_INFO); using TextureParam = void (*)(CL_GL_RESOURCE_INFO); using BuffInfo = CL_GL_BUFFER_INFO (*)(); using TextureInfo = CL_GL_RESOURCE_INFO (*)(); using GLMockValue = GLMockReturnedValues (*)(); using setGLMockValue = void (*)(GLMockReturnedValues); struct glDllHelper { public: glDllHelper() { glDllLoad.reset(OsLibrary::load(Os::openglDllName)); if (glDllLoad) { glSetString = (*glDllLoad)["glSetString"]; UNRECOVERABLE_IF(glSetString == nullptr); glSetStringi = (*glDllLoad)["glSetStringi"]; UNRECOVERABLE_IF(glSetStringi == nullptr); setGLSetSharedOCLContextStateReturnedValue = (*glDllLoad)["setGLSetSharedOCLContextStateReturnedValue"]; UNRECOVERABLE_IF(setGLSetSharedOCLContextStateReturnedValue == nullptr); getGLSetSharedOCLContextStateReturnedValue = (*glDllLoad)["getGLSetSharedOCLContextStateReturnedValue"]; UNRECOVERABLE_IF(getGLSetSharedOCLContextStateReturnedValue == nullptr); resetParam = (*glDllLoad)["resetParam"]; UNRECOVERABLE_IF(resetParam == nullptr); getParam = (*glDllLoad)["getParam"]; UNRECOVERABLE_IF(getParam == nullptr); loadBuffer = (*glDllLoad)["loadBuffer"]; UNRECOVERABLE_IF(loadBuffer == nullptr); getBufferInfo = (*glDllLoad)["getBufferInfo"]; UNRECOVERABLE_IF(getBufferInfo == nullptr); getTextureInfo = (*glDllLoad)["getTextureInfo"]; UNRECOVERABLE_IF(getTextureInfo == nullptr); Void memParam = (*glDllLoad)["memParam"]; UNRECOVERABLE_IF(memParam == nullptr); loadTexture = (*glDllLoad)["loadTexture"]; UNRECOVERABLE_IF(loadTexture == nullptr); getGlMockReturnedValues = (*glDllLoad)["getGlMockReturnedValues"]; UNRECOVERABLE_IF(getGlMockReturnedValues == nullptr); setGlMockReturnedValues = (*glDllLoad)["setGlMockReturnedValues"]; UNRECOVERABLE_IF(setGlMockReturnedValues == nullptr); } } ~glDllHelper() { if (glDllLoad) { glSetString("Intel", GL_VENDOR); glSetString("4.0", GL_VERSION); glSetStringi("GL_OES_framebuffer_object", 0); glSetStringi("GL_EXT_framebuffer_object", 1); } } GLString glSetString; GLString glSetStringi; GLSharedOCLContext setGLSetSharedOCLContextStateReturnedValue; glBoolean getGLSetSharedOCLContextStateReturnedValue; Void resetParam; Int getParam; BufferParam loadBuffer; TextureParam loadTexture; BuffInfo getBufferInfo; TextureInfo getTextureInfo; GLMockValue getGlMockReturnedValues; setGLMockValue setGlMockReturnedValues; private: std::unique_ptr glDllLoad; }; compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/gl/gl_options.cpp000066400000000000000000000003441363734646600312510ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include // above is workaround for copyright checker issue namespace Os { const char *openglDllName = "mock_opengl32.dll"; } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/gl/gl_os_sharing_tests.cpp000066400000000000000000000434231363734646600331410ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/windows/os_context_win.h" #include "shared/source/os_interface/windows/os_interface.h" #include "shared/source/os_interface/windows/wddm/wddm.h" #include "shared/source/os_interface/windows/wddm_memory_operations_handler.h" #include "opencl/extensions/public/cl_gl_private_intel.h" #include "opencl/source/sharings/gl/gl_arb_sync_event.h" #include "opencl/source/sharings/gl/windows/gl_sharing_windows.h" #include "opencl/test/unit_test/mocks/gl/windows/mock_gl_sharing_windows.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/os_interface/windows/wddm_fixture.h" #include "gtest/gtest.h" #include using namespace NEO; struct MockOSInterfaceImpl : OSInterface::OSInterfaceImpl { HANDLE createEvent(LPSECURITY_ATTRIBUTES lpEventAttributes, BOOL bManualReset, BOOL bInitialState, LPCSTR lpName) override { if (eventNum++ == failEventNum) { return INVALID_HANDLE; } return OSInterface::OSInterfaceImpl::createEvent(lpEventAttributes, bManualReset, bInitialState, lpName); } BOOL closeHandle(HANDLE hObject) override { ++closedEventsCount; return OSInterface::OSInterfaceImpl::closeHandle(hObject); } int eventNum = 1; int failEventNum = 0; int closedEventsCount = 0; }; struct MockOSInterface : OSInterface { MockOSInterface() { if (osInterfaceImpl != nullptr) { delete osInterfaceImpl; } osInterfaceImpl = new MockOSInterfaceImpl(); } }; TEST(glSharingBasicTest, GivenSharingFunctionsWhenItIsConstructedThenBackupContextIsCreated) { GLType GLHDCType = CL_WGL_HDC_KHR; GLContext GLHGLRCHandle = 0; GLDisplay GLHDCHandle = 0; int32_t expectedContextAttrs[3] = {0}; glDllHelper dllHelper; auto glSharingFunctions = new GlSharingFunctionsMock(GLHDCType, GLHGLRCHandle, GLHGLRCHandle, GLHDCHandle); EXPECT_EQ(1, dllHelper.getParam("WGLCreateContextCalled")); EXPECT_EQ(1, dllHelper.getParam("WGLShareListsCalled")); EXPECT_EQ(0, EGLChooseConfigCalled); EXPECT_EQ(0, EGLCreateContextCalled); EXPECT_EQ(0, GlxChooseFBConfigCalled); EXPECT_EQ(0, GlxQueryContextCalled); EXPECT_EQ(0, GlxCreateNewContextCalled); EXPECT_EQ(0, GlxIsDirectCalled); EXPECT_EQ(0, eglBkpContextParams.configAttrs); EXPECT_EQ(0, eglBkpContextParams.numConfigs); EXPECT_TRUE(glSharingFunctions->getBackupContextHandle() != 0); EXPECT_TRUE(memcmp(eglBkpContextParams.contextAttrs, expectedContextAttrs, 3 * sizeof(int32_t)) == 0); EXPECT_EQ(0, glxBkpContextParams.FBConfigAttrs); EXPECT_EQ(0, glxBkpContextParams.queryAttribute); EXPECT_EQ(0, glxBkpContextParams.renderType); delete glSharingFunctions; EXPECT_EQ(1, dllHelper.getParam("WGLDeleteContextCalled")); EXPECT_EQ(1, dllHelper.getParam("GLDeleteContextCalled")); } struct GlArbSyncEventOsTest : public ::testing::Test { void SetUp() override { rootDeviceEnvironment = std::make_unique(executionEnvironment); sharing.GLContextHandle = 0x2cU; sharing.GLDeviceHandle = 0x3cU; wddm = new WddmMock(*rootDeviceEnvironment); gdi = new MockGdi(); wddm->resetGdi(gdi); osInterface.get()->setWddm(wddm); } MockExecutionEnvironment executionEnvironment; std::unique_ptr rootDeviceEnvironment; GlSharingFunctionsMock sharing; MockGdi *gdi = nullptr; WddmMock *wddm = nullptr; OSInterface osInterface; CL_GL_SYNC_INFO syncInfo = {}; }; TEST_F(GlArbSyncEventOsTest, WhenCreateSynchronizationObjectSucceedsThenAllHAndlesAreValid) { struct CreateSyncObjectMock { static int &getHandle() { static int handle = 1; return handle; } static void reset() { getHandle() = 1; } static NTSTATUS __stdcall createSynchObject(D3DKMT_CREATESYNCHRONIZATIONOBJECT *pData) { if (pData == nullptr) { return STATUS_INVALID_PARAMETER; } EXPECT_NE(NULL, pData->hDevice); EXPECT_EQ(D3DDDI_SEMAPHORE, pData->Info.Type); EXPECT_EQ(32, pData->Info.Semaphore.MaxCount); EXPECT_EQ(0, pData->Info.Semaphore.InitialCount); pData->hSyncObject = getHandle()++; return STATUS_SUCCESS; } static NTSTATUS __stdcall createSynchObject2(D3DKMT_CREATESYNCHRONIZATIONOBJECT2 *pData) { if (pData == nullptr) { return STATUS_INVALID_PARAMETER; } EXPECT_NE(NULL, pData->hDevice); EXPECT_EQ(D3DDDI_CPU_NOTIFICATION, pData->Info.Type); EXPECT_NE(nullptr, pData->Info.CPUNotification.Event); pData->hSyncObject = getHandle()++; return STATUS_SUCCESS; } }; CreateSyncObjectMock::reset(); wddm->init(); gdi->createSynchronizationObject.mFunc = CreateSyncObjectMock::createSynchObject; gdi->createSynchronizationObject2.mFunc = CreateSyncObjectMock::createSynchObject2; auto ret = setupArbSyncObject(sharing, osInterface, syncInfo); EXPECT_TRUE(ret); EXPECT_EQ(1U, syncInfo.serverSynchronizationObject); EXPECT_EQ(2U, syncInfo.clientSynchronizationObject); EXPECT_EQ(3U, syncInfo.submissionSynchronizationObject); EXPECT_EQ(sharing.GLContextHandle, syncInfo.hContextToBlock); EXPECT_NE(nullptr, syncInfo.event); EXPECT_NE(nullptr, syncInfo.eventName); EXPECT_NE(nullptr, syncInfo.submissionEvent); EXPECT_NE(nullptr, syncInfo.submissionEventName); EXPECT_FALSE(syncInfo.waitCalled); cleanupArbSyncObject(osInterface, &syncInfo); } TEST_F(GlArbSyncEventOsTest, GivenNewGlSyncInfoWhenCreateSynchronizationObjectFailsThenSetupArbSyncObjectFails) { struct CreateSyncObjectMock { static int &getHandle() { static int handle = 1; return handle; } static int &getFailHandleId() { static int failHandleId = 0; return failHandleId; } static void reset() { getHandle() = 1; getFailHandleId() = 0; } static NTSTATUS __stdcall createSynchObject(D3DKMT_CREATESYNCHRONIZATIONOBJECT *pData) { auto newHandle = getHandle()++; if (newHandle == getFailHandleId()) { return STATUS_INVALID_PARAMETER; } return STATUS_SUCCESS; } static NTSTATUS __stdcall createSynchObject2(D3DKMT_CREATESYNCHRONIZATIONOBJECT2 *pData) { auto newHandle = getHandle()++; if (newHandle == getFailHandleId()) { return STATUS_INVALID_PARAMETER; } return STATUS_SUCCESS; } }; CreateSyncObjectMock::reset(); wddm->init(); gdi->createSynchronizationObject.mFunc = CreateSyncObjectMock::createSynchObject; gdi->createSynchronizationObject2.mFunc = CreateSyncObjectMock::createSynchObject2; CreateSyncObjectMock::getFailHandleId() = CreateSyncObjectMock::getHandle(); int failuresCount = 0; auto ret = setupArbSyncObject(sharing, osInterface, syncInfo); while (false == ret) { ++failuresCount; CreateSyncObjectMock::getHandle() = 1; ++CreateSyncObjectMock::getFailHandleId(); ret = setupArbSyncObject(sharing, osInterface, syncInfo); } EXPECT_EQ(3, failuresCount); cleanupArbSyncObject(osInterface, &syncInfo); } TEST_F(GlArbSyncEventOsTest, GivenNewGlSyncInfoWhenCreateEventFailsThenSetupArbSyncObjectFails) { auto rootDeviceEnvironment = platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0].get(); MockOSInterface mockOsInterface; MockOSInterfaceImpl *mockOsInterfaceImpl = static_cast(mockOsInterface.get()); auto wddm = new WddmMock(*rootDeviceEnvironment); auto gdi = new MockGdi(); wddm->resetGdi(gdi); wddm->init(); mockOsInterface.get()->setWddm(wddm); mockOsInterfaceImpl->failEventNum = mockOsInterfaceImpl->eventNum; int failuresCount = 0; auto ret = setupArbSyncObject(sharing, mockOsInterface, syncInfo); while (false == ret) { ++failuresCount; mockOsInterfaceImpl->eventNum = 1; ++mockOsInterfaceImpl->failEventNum; ret = setupArbSyncObject(sharing, mockOsInterface, syncInfo); } EXPECT_EQ(2, failuresCount); cleanupArbSyncObject(mockOsInterface, &syncInfo); } TEST_F(GlArbSyncEventOsTest, GivenInvalidGlSyncInfoWhenCleanupArbSyncObjectIsCalledThenDestructorsOfSyncOrEventsAreNotInvoked) { struct DestroySyncObjectMock { static NTSTATUS __stdcall destroySynchObject(_In_ CONST D3DKMT_DESTROYSYNCHRONIZATIONOBJECT *sync) { EXPECT_FALSE(true); return STATUS_INVALID_PARAMETER; } }; auto rootDeviceEnvironment = platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0].get(); auto wddm = new WddmMock(*rootDeviceEnvironment); auto gdi = new MockGdi(); wddm->resetGdi(gdi); wddm->init(); MockOSInterface mockOsInterface; MockOSInterfaceImpl *mockOsInterfaceImpl = static_cast(mockOsInterface.get()); mockOsInterface.get()->setWddm(wddm); gdi->destroySynchronizationObject = DestroySyncObjectMock::destroySynchObject; cleanupArbSyncObject(mockOsInterface, nullptr); EXPECT_EQ(0, mockOsInterfaceImpl->closedEventsCount); } TEST_F(GlArbSyncEventOsTest, GivenValidGlSyncInfoWhenCleanupArbSyncObjectIsCalledThenProperCountOfDestructorsOfSyncAndEventsIsNotInvoked) { struct CreateDestroySyncObjectMock { static int &getDestroyCounter() { static int counter = 0; return counter; } static NTSTATUS __stdcall destroySynchObject(_In_ CONST D3DKMT_DESTROYSYNCHRONIZATIONOBJECT *sync) { ++getDestroyCounter(); return STATUS_SUCCESS; } static void reset() { getDestroyCounter() = 0; } }; auto rootDeviceEnvironment = platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0].get(); auto wddm = new WddmMock(*rootDeviceEnvironment); auto gdi = new MockGdi(); wddm->resetGdi(gdi); wddm->init(); MockOSInterface mockOsInterface; MockOSInterfaceImpl *mockOsInterfaceImpl = static_cast(mockOsInterface.get()); mockOsInterface.get()->setWddm(wddm); CreateDestroySyncObjectMock::reset(); gdi->destroySynchronizationObject = CreateDestroySyncObjectMock::destroySynchObject; auto ret = setupArbSyncObject(sharing, mockOsInterface, syncInfo); EXPECT_TRUE(ret); syncInfo.serverSynchronizationObject = 0x5cU; syncInfo.clientSynchronizationObject = 0x7cU; syncInfo.submissionSynchronizationObject = 0x13cU; cleanupArbSyncObject(mockOsInterface, &syncInfo); EXPECT_EQ(2, mockOsInterfaceImpl->closedEventsCount); EXPECT_EQ(3, CreateDestroySyncObjectMock::getDestroyCounter()); } TEST_F(GlArbSyncEventOsTest, GivenCallToSignalArbSyncObjectWhenSignalSynchronizationObjectForServerClientSyncFailsThenSubmissionSyncDoesNotGetSignalled) { struct FailSignalSyncObjectMock { static NTSTATUS __stdcall signal(_In_ CONST D3DKMT_SIGNALSYNCHRONIZATIONOBJECT *obj) { EXPECT_NE(nullptr, obj); if (obj == nullptr) { return STATUS_INVALID_PARAMETER; } EXPECT_EQ(2, obj->ObjectCount); EXPECT_EQ(getExpectedSynchHandle0(), obj->ObjectHandleArray[0]); EXPECT_EQ(getExpectedSynchHandle1(), obj->ObjectHandleArray[1]); EXPECT_EQ(0, obj->Flags.SignalAtSubmission); EXPECT_EQ(getExpectedContextHandle(), obj->hContext); return STATUS_INVALID_PARAMETER; } static D3DKMT_HANDLE &getExpectedSynchHandle0() { static D3DKMT_HANDLE handle = INVALID_HANDLE; return handle; } static D3DKMT_HANDLE &getExpectedSynchHandle1() { static D3DKMT_HANDLE handle = INVALID_HANDLE; return handle; } static D3DKMT_HANDLE &getExpectedContextHandle() { static D3DKMT_HANDLE handle = INVALID_HANDLE; return handle; } static void reset() { getExpectedSynchHandle0() = INVALID_HANDLE; getExpectedSynchHandle1() = INVALID_HANDLE; getExpectedContextHandle() = INVALID_HANDLE; } }; FailSignalSyncObjectMock::reset(); auto preemptionMode = PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo); wddm->init(); OsContextWin osContext(*osInterface.get()->getWddm(), 0u, 1, HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], preemptionMode, false, false, false); CL_GL_SYNC_INFO syncInfo = {}; syncInfo.serverSynchronizationObject = 0x5cU; syncInfo.clientSynchronizationObject = 0x6cU; gdi->signalSynchronizationObject.mFunc = FailSignalSyncObjectMock::signal; FailSignalSyncObjectMock::getExpectedContextHandle() = osContext.getWddmContextHandle(); FailSignalSyncObjectMock::getExpectedSynchHandle0() = syncInfo.serverSynchronizationObject; FailSignalSyncObjectMock::getExpectedSynchHandle1() = syncInfo.clientSynchronizationObject; signalArbSyncObject(osContext, syncInfo); } TEST_F(GlArbSyncEventOsTest, GivenCallToSignalArbSyncObjectWhenSignalSynchronizationObjectForServerClientSyncSucceedsThenSubmissionSyncGetsSignalledAsWell) { struct FailSignalSyncObjectMock { static NTSTATUS __stdcall signal(_In_ CONST D3DKMT_SIGNALSYNCHRONIZATIONOBJECT *obj) { EXPECT_NE(nullptr, obj); if (obj == nullptr) { return STATUS_INVALID_PARAMETER; } // validating only second call to signal if (getCounter()++ != 1) { return STATUS_SUCCESS; } EXPECT_EQ(1, obj->ObjectCount); EXPECT_EQ(getExpectedSynchHandle0(), obj->ObjectHandleArray[0]); EXPECT_EQ(1, obj->Flags.SignalAtSubmission); EXPECT_EQ(getExpectedContextHandle(), obj->hContext); return STATUS_SUCCESS; } static D3DKMT_HANDLE &getExpectedSynchHandle0() { static D3DKMT_HANDLE handle = INVALID_HANDLE; return handle; } static int &getCounter() { static int counter = 0; return counter; } static D3DKMT_HANDLE &getExpectedContextHandle() { static D3DKMT_HANDLE handle = INVALID_HANDLE; return handle; } static void reset() { getExpectedSynchHandle0() = INVALID_HANDLE; getCounter() = 0; getExpectedContextHandle() = INVALID_HANDLE; } }; FailSignalSyncObjectMock::reset(); auto preemptionMode = PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo); wddm->init(); OsContextWin osContext(*osInterface.get()->getWddm(), 0u, 1, HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], preemptionMode, false, false, false); CL_GL_SYNC_INFO syncInfo = {}; syncInfo.submissionSynchronizationObject = 0x7cU; gdi->signalSynchronizationObject.mFunc = FailSignalSyncObjectMock::signal; FailSignalSyncObjectMock::getExpectedContextHandle() = osContext.getWddmContextHandle(); FailSignalSyncObjectMock::getExpectedSynchHandle0() = syncInfo.submissionSynchronizationObject; signalArbSyncObject(osContext, syncInfo); } TEST_F(GlArbSyncEventOsTest, GivenCallToServerWaitForArbSyncObjectWhenWaitForSynchronizationObjectFailsThenWaitFlagDoesNotGetSet) { struct FailWaitSyncObjectMock { static NTSTATUS __stdcall waitForSynchObject(_In_ CONST_FROM_WDK_10_0_18328_0 D3DKMT_WAITFORSYNCHRONIZATIONOBJECT *waitData) { EXPECT_NE(nullptr, waitData); if (waitData == nullptr) { return STATUS_INVALID_PARAMETER; } EXPECT_EQ(1, waitData->ObjectCount); EXPECT_EQ(getExpectedSynchHandle0(), waitData->ObjectHandleArray[0]); EXPECT_EQ(getExpectedContextHandle(), waitData->hContext); return STATUS_INVALID_PARAMETER; } static D3DKMT_HANDLE &getExpectedSynchHandle0() { static D3DKMT_HANDLE handle = INVALID_HANDLE; return handle; } static D3DKMT_HANDLE &getExpectedContextHandle() { static D3DKMT_HANDLE handle = INVALID_HANDLE; return handle; } static void reset() { getExpectedSynchHandle0() = INVALID_HANDLE; getExpectedContextHandle() = INVALID_HANDLE; } }; FailWaitSyncObjectMock::reset(); CL_GL_SYNC_INFO syncInfo = {}; syncInfo.hContextToBlock = 0x4cU; FailWaitSyncObjectMock::getExpectedSynchHandle0() = syncInfo.serverSynchronizationObject; FailWaitSyncObjectMock::getExpectedContextHandle() = syncInfo.hContextToBlock; gdi->waitForSynchronizationObject.mFunc = FailWaitSyncObjectMock::waitForSynchObject; EXPECT_FALSE(syncInfo.waitCalled); serverWaitForArbSyncObject(osInterface, syncInfo); EXPECT_FALSE(syncInfo.waitCalled); } TEST_F(GlArbSyncEventOsTest, GivenCallToServerWaitForArbSyncObjectWhenWaitForSynchronizationObjectSucceedsThenWaitFlagGetsSet) { CL_GL_SYNC_INFO syncInfo = {}; syncInfo.serverSynchronizationObject = 0x7cU; EXPECT_FALSE(syncInfo.waitCalled); serverWaitForArbSyncObject(osInterface, syncInfo); EXPECT_TRUE(syncInfo.waitCalled); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/hw_info_config_win_tests.cpp000066400000000000000000000077611363734646600335610ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/os_interface/windows/hw_info_config_win_tests.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/os_interface/windows/os_interface.h" #include "shared/source/os_interface/windows/wddm/wddm.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "test.h" #include "instrumentation.h" namespace NEO { template <> uint64_t HwInfoConfigHw::getHostMemCapabilities() { return 0; } template <> uint64_t HwInfoConfigHw::getDeviceMemCapabilities() { return 0; } template <> uint64_t HwInfoConfigHw::getSingleDeviceSharedMemCapabilities() { return 0; } template <> uint64_t HwInfoConfigHw::getCrossDeviceSharedMemCapabilities() { return 0; } template <> uint64_t HwInfoConfigHw::getSharedSystemMemCapabilities() { return 0; } template <> int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) { return 0; } template <> void HwInfoConfigHw::adjustPlatformForProductFamily(HardwareInfo *hwInfo) { } HwInfoConfigTestWindows::HwInfoConfigTestWindows() { this->executionEnvironment = std::make_unique(); this->rootDeviceEnvironment = std::make_unique(*executionEnvironment); } HwInfoConfigTestWindows::~HwInfoConfigTestWindows() { } void HwInfoConfigTestWindows::SetUp() { HwInfoConfigTest::SetUp(); osInterface.reset(new OSInterface()); std::unique_ptr wddm(Wddm::createWddm(nullptr, *rootDeviceEnvironment)); wddm->init(); outHwInfo = *rootDeviceEnvironment->getHardwareInfo(); } void HwInfoConfigTestWindows::TearDown() { HwInfoConfigTest::TearDown(); } TEST_F(HwInfoConfigTestWindows, givenCorrectParametersWhenConfiguringHwInfoThenReturnSuccess) { int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface.get()); EXPECT_EQ(0, ret); } TEST_F(HwInfoConfigTestWindows, givenCorrectParametersWhenConfiguringHwInfoThenSetFtrSvmCorrectly) { auto ftrSvm = outHwInfo.featureTable.ftrSVM; int ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface.get()); ASSERT_EQ(0, ret); EXPECT_EQ(outHwInfo.capabilityTable.ftrSvm, ftrSvm); } TEST_F(HwInfoConfigTestWindows, givenInstrumentationForHardwareIsEnabledOrDisabledWhenConfiguringHwInfoThenOverrideItUsingHaveInstrumentation) { int ret; outHwInfo.capabilityTable.instrumentationEnabled = false; ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface.get()); ASSERT_EQ(0, ret); EXPECT_FALSE(outHwInfo.capabilityTable.instrumentationEnabled); outHwInfo.capabilityTable.instrumentationEnabled = true; ret = hwConfig.configureHwInfo(&pInHwInfo, &outHwInfo, osInterface.get()); ASSERT_EQ(0, ret); EXPECT_TRUE(outHwInfo.capabilityTable.instrumentationEnabled == haveInstrumentation); } HWTEST_F(HwInfoConfigTestWindows, givenFtrIaCoherencyFlagWhenConfiguringHwInfoThenSetCoherencySupportCorrectly) { HardwareInfo initialHwInfo = *defaultHwInfo; auto &hwHelper = HwHelper::get(initialHwInfo.platform.eRenderCoreFamily); auto hwInfoConfig = HwInfoConfig::get(initialHwInfo.platform.eProductFamily); bool initialCoherencyStatus = false; hwHelper.setCapabilityCoherencyFlag(&outHwInfo, initialCoherencyStatus); initialHwInfo.featureTable.ftrL3IACoherency = false; hwInfoConfig->configureHwInfo(&initialHwInfo, &outHwInfo, osInterface.get()); EXPECT_FALSE(outHwInfo.capabilityTable.ftrSupportsCoherency); initialHwInfo.featureTable.ftrL3IACoherency = true; hwInfoConfig->configureHwInfo(&initialHwInfo, &outHwInfo, osInterface.get()); EXPECT_EQ(initialCoherencyStatus, outHwInfo.capabilityTable.ftrSupportsCoherency); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/hw_info_config_win_tests.h000066400000000000000000000014411363734646600332130ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/hw_info_config.h" #include "opencl/test/unit_test/os_interface/hw_info_config_tests.h" #include namespace NEO { struct MockExecutionEnvironment; struct RootDeviceEnvironment; struct DummyHwConfig : HwInfoConfigHw { }; struct HwInfoConfigTestWindows : public HwInfoConfigTest { HwInfoConfigTestWindows(); ~HwInfoConfigTestWindows(); void SetUp() override; void TearDown() override; std::unique_ptr osInterface; DummyHwConfig hwConfig; std::unique_ptr executionEnvironment; std::unique_ptr rootDeviceEnvironment; }; } // namespace NEO mock_environment_variables.cpp000066400000000000000000000011321363734646600340140ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/os_interface/windows/mock_environment_variables.h" #include "shared/source/os_interface/windows/environment_variables.h" extern uint32_t (*getEnvironmentVariableMock)(const char *name, char *outBuffer, uint32_t outBufferSize) = nullptr; uint32_t getEnvironmentVariable(const char *name, char *outBuffer, uint32_t outBufferSize) { if (getEnvironmentVariableMock == nullptr) { return 0; } return getEnvironmentVariableMock(name, outBuffer, outBufferSize); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/mock_environment_variables.h000066400000000000000000000003311363734646600335400ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include extern uint32_t (*getEnvironmentVariableMock)(const char *name, char *outBuffer, uint32_t outBufferSize); compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/mock_kmdaf_listener.h000066400000000000000000000140741363734646600321440ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/windows/kmdaf_listener.h" namespace NEO { struct KmDafListenerMock : public KmDafListener { inline void notifyLock(bool ftrKmdDaf, D3DKMT_HANDLE hAdapter, D3DKMT_HANDLE hDevice, const D3DKMT_HANDLE hAllocation, D3DDDICB_LOCKFLAGS *pLockFlags, PFND3DKMT_ESCAPE pfnEscape) override { notifyLockParametrization.ftrKmdDaf = ftrKmdDaf; notifyLockParametrization.hAdapter = hAdapter; notifyLockParametrization.hDevice = hDevice; notifyLockParametrization.hAllocation = hAllocation; notifyLockParametrization.pLockFlags = pLockFlags; notifyLockParametrization.pfnEscape = pfnEscape; } inline void notifyUnlock(bool ftrKmdDaf, D3DKMT_HANDLE hAdapter, D3DKMT_HANDLE hDevice, const D3DKMT_HANDLE *phAllocation, ULONG allocations, PFND3DKMT_ESCAPE pfnEscape) override { notifyUnlockParametrization.ftrKmdDaf = ftrKmdDaf; notifyUnlockParametrization.hAdapter = hAdapter; notifyUnlockParametrization.hDevice = hDevice; notifyUnlockParametrization.phAllocation = phAllocation; notifyUnlockParametrization.allocations = allocations; notifyUnlockParametrization.pfnEscape = pfnEscape; } inline void notifyMapGpuVA(bool ftrKmdDaf, D3DKMT_HANDLE hAdapter, D3DKMT_HANDLE hDevice, const D3DKMT_HANDLE hAllocation, D3DGPU_VIRTUAL_ADDRESS GpuVirtualAddress, PFND3DKMT_ESCAPE pfnEscape) override { notifyMapGpuVAParametrization.ftrKmdDaf = ftrKmdDaf; notifyMapGpuVAParametrization.hAdapter = hAdapter; notifyMapGpuVAParametrization.hDevice = hDevice; notifyMapGpuVAParametrization.hAllocation = hAllocation; notifyMapGpuVAParametrization.GpuVirtualAddress = GpuVirtualAddress; notifyMapGpuVAParametrization.pfnEscape = pfnEscape; } inline void notifyUnmapGpuVA(bool ftrKmdDaf, D3DKMT_HANDLE hAdapter, D3DKMT_HANDLE hDevice, D3DGPU_VIRTUAL_ADDRESS GpuVirtualAddress, PFND3DKMT_ESCAPE pfnEscape) override { notifyUnmapGpuVAParametrization.ftrKmdDaf = ftrKmdDaf; notifyUnmapGpuVAParametrization.hAdapter = hAdapter; notifyUnmapGpuVAParametrization.hDevice = hDevice; notifyUnmapGpuVAParametrization.GpuVirtualAddress = GpuVirtualAddress; notifyUnmapGpuVAParametrization.pfnEscape = pfnEscape; } inline void notifyMakeResident(bool ftrKmdDaf, D3DKMT_HANDLE hAdapter, D3DKMT_HANDLE hDevice, const D3DKMT_HANDLE *phAllocation, ULONG allocations, PFND3DKMT_ESCAPE pfnEscape) override { notifyMakeResidentParametrization.ftrKmdDaf = ftrKmdDaf; notifyMakeResidentParametrization.hAdapter = hAdapter; notifyMakeResidentParametrization.hDevice = hDevice; notifyMakeResidentParametrization.phAllocation = phAllocation; notifyMakeResidentParametrization.allocations = allocations; notifyMakeResidentParametrization.pfnEscape = pfnEscape; } inline void notifyEvict(bool ftrKmdDaf, D3DKMT_HANDLE hAdapter, D3DKMT_HANDLE hDevice, const D3DKMT_HANDLE *phAllocation, ULONG allocations, PFND3DKMT_ESCAPE pfnEscape) override { notifyEvictParametrization.ftrKmdDaf = ftrKmdDaf; notifyEvictParametrization.hAdapter = hAdapter; notifyEvictParametrization.hDevice = hDevice; notifyEvictParametrization.phAllocation = phAllocation; notifyEvictParametrization.allocations = allocations; notifyEvictParametrization.pfnEscape = pfnEscape; } inline void notifyWriteTarget(bool ftrKmdDaf, D3DKMT_HANDLE hAdapter, D3DKMT_HANDLE hDevice, const D3DKMT_HANDLE hAllocation, PFND3DKMT_ESCAPE pfnEscape) override { notifyWriteTargetParametrization.ftrKmdDaf = ftrKmdDaf; notifyWriteTargetParametrization.hAdapter = hAdapter; notifyWriteTargetParametrization.hDevice = hDevice; notifyWriteTargetParametrization.hAllocation = hAllocation; notifyWriteTargetParametrization.pfnEscape = pfnEscape; } struct NotifyLockParametrization { bool ftrKmdDaf = false; D3DKMT_HANDLE hAdapter = 0; D3DKMT_HANDLE hDevice = 0; D3DKMT_HANDLE hAllocation = 0; D3DDDICB_LOCKFLAGS *pLockFlags = nullptr; PFND3DKMT_ESCAPE pfnEscape = nullptr; } notifyLockParametrization; struct NotifyUnlockParametrization { bool ftrKmdDaf = 0; D3DKMT_HANDLE hAdapter = 0; D3DKMT_HANDLE hDevice = 0; const D3DKMT_HANDLE *phAllocation = nullptr; ULONG allocations = 0; PFND3DKMT_ESCAPE pfnEscape = nullptr; } notifyUnlockParametrization; struct NotifyMapGpuVAParametrization { bool ftrKmdDaf = false; D3DKMT_HANDLE hAdapter = 0; D3DKMT_HANDLE hDevice = 0; D3DKMT_HANDLE hAllocation = 0; D3DGPU_VIRTUAL_ADDRESS GpuVirtualAddress = 0; PFND3DKMT_ESCAPE pfnEscape = nullptr; } notifyMapGpuVAParametrization; struct NotifyUnmapGpuVAParametrization { bool ftrKmdDaf = false; D3DKMT_HANDLE hAdapter = 0; D3DKMT_HANDLE hDevice = 0; D3DGPU_VIRTUAL_ADDRESS GpuVirtualAddress = 0; PFND3DKMT_ESCAPE pfnEscape = nullptr; } notifyUnmapGpuVAParametrization; struct NotifyMakeResidentParametrization { bool ftrKmdDaf = 0; D3DKMT_HANDLE hAdapter = 0; D3DKMT_HANDLE hDevice = 0; const D3DKMT_HANDLE *phAllocation = nullptr; ULONG allocations = 0; PFND3DKMT_ESCAPE pfnEscape = nullptr; } notifyMakeResidentParametrization; struct NotifyEvictParametrization { bool ftrKmdDaf = 0; D3DKMT_HANDLE hAdapter = 0; D3DKMT_HANDLE hDevice = 0; const D3DKMT_HANDLE *phAllocation = nullptr; ULONG allocations = 0; PFND3DKMT_ESCAPE pfnEscape = nullptr; } notifyEvictParametrization; struct NotifyWriteTargetParametrization { bool ftrKmdDaf = 0; D3DKMT_HANDLE hAdapter = 0; D3DKMT_HANDLE hDevice = 0; D3DKMT_HANDLE hAllocation = 0; PFND3DKMT_ESCAPE pfnEscape = nullptr; } notifyWriteTargetParametrization; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/mock_os_time_win.h000066400000000000000000000010751363734646600314660ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/windows/os_time_win.h" namespace NEO { class MockOSTimeWin : public OSTimeWin { public: MockOSTimeWin(OSInterface *osInterface) : OSTimeWin(osInterface){}; void overrideQueryPerformanceCounterFunction(decltype(&QueryPerformanceCounter) function) { this->QueryPerfomanceCounterFnc = function; } void setFrequency(LARGE_INTEGER frequency) { this->frequency = frequency; } }; } // namespace NEOmock_performance_counters_win.cpp000066400000000000000000000050541363734646600345270ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "mock_performance_counters_win.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/windows/os_interface.h" #include "shared/source/os_interface/windows/windows_wrapper.h" #include "opencl/test/unit_test/mocks/mock_wddm.h" #include "opencl/test/unit_test/os_interface/windows/mock_os_time_win.h" namespace NEO { /////////////////////////////////////////////////////// // MockPerformanceCounters::create /////////////////////////////////////////////////////// std::unique_ptr MockPerformanceCounters::create(Device *device) { auto performanceCounters = std::unique_ptr(new MockPerformanceCountersWin(device)); auto metricsLibrary = std::make_unique(); auto metricsLibraryDll = std::make_unique(); metricsLibrary->api = std::make_unique(); metricsLibrary->osLibrary = std::move(metricsLibraryDll); performanceCounters->setMetricsLibraryInterface(std::move(metricsLibrary)); return performanceCounters; } /////////////////////////////////////////////////////// // MockPerformanceCountersWin::MockPerformanceCountersWin /////////////////////////////////////////////////////// MockPerformanceCountersWin::MockPerformanceCountersWin(Device *device) : PerformanceCountersWin() { } ////////////////////////////////////////////////////// // PerformanceCountersFixture::createPerfCounters ////////////////////////////////////////////////////// void PerformanceCountersFixture::createPerfCounters() { performanceCountersBase = MockPerformanceCounters::create(&device->getDevice()); } ////////////////////////////////////////////////////// // PerformanceCountersFixture::SetUp ////////////////////////////////////////////////////// void PerformanceCountersFixture::SetUp() { device = std::make_unique(new MockDevice()); context = std::make_unique(device.get()); queue = std::make_unique(context.get(), device.get(), &queueProperties); osInterface = std::unique_ptr(new OSInterface()); osInterface->get()->setWddm(new WddmMock(*rootDeviceEnvironment)); device->setOSTime(new MockOSTimeWin(osInterface.get())); } ////////////////////////////////////////////////////// // PerformanceCountersFixture::TearDown ////////////////////////////////////////////////////// void PerformanceCountersFixture::TearDown() { } } // namespace NEO mock_performance_counters_win.h000066400000000000000000000006401363734646600341700ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/os_interface/windows/performance_counters_win.h" #include "opencl/test/unit_test/os_interface/mock_performance_counters.h" namespace NEO { class MockPerformanceCountersWin : public PerformanceCountersWin { public: MockPerformanceCountersWin(Device *device); }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/mock_registry_reader.cpp000066400000000000000000000042761363734646600327050ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/windows/windows_wrapper.h" uint32_t regOpenKeySuccessCount = 0u; uint32_t regQueryValueSuccessCount = 0u; const HKEY validHkey = reinterpret_cast(0); LSTATUS APIENTRY RegOpenKeyExA( HKEY hKey, LPCSTR lpSubKey, DWORD ulOptions, REGSAM samDesired, PHKEY phkResult) { if (regOpenKeySuccessCount > 0) { regOpenKeySuccessCount--; if (phkResult) { *phkResult = validHkey; } return ERROR_SUCCESS; } return ERROR_FILE_NOT_FOUND; }; LSTATUS APIENTRY RegQueryValueExA( HKEY hKey, LPCSTR lpValueName, LPDWORD lpReserved, LPDWORD lpType, LPBYTE lpData, LPDWORD lpcbData) { if (hKey == validHkey && regQueryValueSuccessCount > 0) { regQueryValueSuccessCount--; if (lpcbData) { if (strcmp(lpValueName, "settingSourceString") == 0) { const auto settingSource = "registry"; if (lpData) { strcpy(reinterpret_cast(lpData), settingSource); } else { *lpcbData = static_cast(strlen(settingSource) + 1u); if (lpType) { *lpType = REG_SZ; } } } else if (strcmp(lpValueName, "settingSourceInt") == 0) { if (lpData) { *reinterpret_cast(lpData) = 1; } else { *lpcbData = sizeof(DWORD); } } else if (strcmp(lpValueName, "settingSourceBinary") == 0) { const auto settingSource = L"registry"; auto size = wcslen(settingSource) * sizeof(wchar_t); if (lpData) { memcpy(reinterpret_cast(lpData), settingSource, size); } else { *lpcbData = static_cast(size); if (lpType) { *lpType = REG_BINARY; } } } } return ERROR_SUCCESS; } return ERROR_FILE_NOT_FOUND; }; compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/mock_wddm_allocation.h000066400000000000000000000015021363734646600323050ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/windows/wddm_allocation.h" #include "opencl/test/unit_test/mock_gdi/mock_gdi.h" namespace NEO { class MockWddmAllocation : public WddmAllocation { public: MockWddmAllocation() : WddmAllocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, 0, nullptr, MemoryPool::MemoryNull), gpuPtr(gpuAddress), handle(handles[0]) { for (uint32_t i = 0; i < EngineLimits::maxHandleCount; i++) { handles[i] = ALLOCATION_HANDLE; } } using WddmAllocation::cpuPtr; using WddmAllocation::handles; using WddmAllocation::memoryPool; using WddmAllocation::size; D3DGPU_VIRTUAL_ADDRESS &gpuPtr; D3DKMT_HANDLE &handle; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/mock_wddm_memory_manager.h000066400000000000000000000046621363734646600331740ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/memory_manager/deferred_deleter.h" #include "shared/source/os_interface/windows/wddm_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_host_ptr_manager.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" namespace NEO { class MockWddmMemoryManager : public MemoryManagerCreate { using BaseClass = WddmMemoryManager; public: using BaseClass::allocateGraphicsMemory64kb; using BaseClass::allocateGraphicsMemoryForNonSvmHostPtr; using BaseClass::allocateGraphicsMemoryInDevicePool; using BaseClass::allocateGraphicsMemoryWithProperties; using BaseClass::allocateShareableMemory; using BaseClass::createGraphicsAllocation; using BaseClass::createWddmAllocation; using BaseClass::getWddm; using BaseClass::gfxPartitions; using BaseClass::localMemorySupported; using BaseClass::supportsMultiStorageResources; using MemoryManagerCreate::MemoryManagerCreate; MockWddmMemoryManager(ExecutionEnvironment &executionEnvironment) : MemoryManagerCreate(false, false, executionEnvironment) { hostPtrManager.reset(new MockHostPtrManager); }; void setDeferredDeleter(DeferredDeleter *deleter) { this->deferredDeleter.reset(deleter); } void setForce32bitAllocations(bool newValue) { this->force32bitAllocations = newValue; } bool validateAllocationMock(WddmAllocation *graphicsAllocation) { return this->validateAllocation(graphicsAllocation); } GraphicsAllocation *allocate32BitGraphicsMemory(uint32_t rootDeviceIndex, size_t size, const void *ptr, GraphicsAllocation::AllocationType allocationType) { bool allocateMemory = ptr == nullptr; AllocationData allocationData; MockAllocationProperties properties(rootDeviceIndex, allocateMemory, size, allocationType); getAllocationData(allocationData, properties, ptr, createStorageInfoFromProperties(properties)); return allocate32BitGraphicsMemoryImpl(allocationData); } void freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation) override { freeGraphicsMemoryImplCalled++; BaseClass::freeGraphicsMemoryImpl(gfxAllocation); } uint32_t freeGraphicsMemoryImplCalled = 0u; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/options.cpp000066400000000000000000000022361363734646600301670ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_library.h" #include "common/gtsysinfo.h" #include "igfxfmid.h" namespace Os { /////////////////////////////////////////////////////////////////////////////// // These options determine the Windows specific behavior for // the runtime unit tests /////////////////////////////////////////////////////////////////////////////// const char *frontEndDllName = ""; const char *igcDllName = ""; const char *gdiDllName = "gdi32_mock.dll"; const char *testDllName = "test_dynamic_lib.dll"; const char *metricsLibraryDllName = ""; } // namespace Os NEO::OsLibrary *setAdapterInfo(const PLATFORM *platform, const GT_SYSTEM_INFO *gtSystemInfo, uint64_t gpuAddressSpace) { NEO::OsLibrary *mockGdiDll; mockGdiDll = NEO::OsLibrary::load("gdi32_mock.dll"); typedef void(__stdcall * pfSetAdapterInfo)(const void *, const void *, uint64_t); pfSetAdapterInfo setAdpaterInfo = reinterpret_cast(mockGdiDll->getProcAddress("MockSetAdapterInfo")); setAdpaterInfo(platform, gtSystemInfo, gpuAddressSpace); return mockGdiDll; } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/os_context_win_tests.cpp000066400000000000000000000046251363734646600327640ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/test/unit_test/mocks/mock_wddm.h" #include "opencl/test/unit_test/os_interface/windows/wddm_fixture.h" using namespace NEO; struct OsContextWinTest : public WddmTestWithMockGdiDll { void SetUp() override { WddmTestWithMockGdiDll::SetUp(); preemptionMode = PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo); engineType = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0]; init(); } PreemptionMode preemptionMode; aub_stream::EngineType engineType; }; TEST_F(OsContextWinTest, givenWddm20WhenCreatingOsContextThenOsContextIsInitialized) { osContext = std::make_unique(*osInterface->get()->getWddm(), 0u, 1, engineType, preemptionMode, false, false, false); EXPECT_TRUE(osContext->isInitialized()); } TEST_F(OsContextWinTest, givenWddm20WhenCreatingWddmContextFailThenOsContextIsNotInitialized) { wddm->device = INVALID_HANDLE; osContext = std::make_unique(*osInterface->get()->getWddm(), 0u, 1, engineType, preemptionMode, false, false, false); EXPECT_FALSE(osContext->isInitialized()); } TEST_F(OsContextWinTest, givenWddm20WhenCreatingWddmMonitorFenceFailThenOsContextIsNotInitialized) { *getCreateSynchronizationObject2FailCallFcn() = true; osContext = std::make_unique(*osInterface->get()->getWddm(), 0u, 1, engineType, preemptionMode, false, false, false); EXPECT_FALSE(osContext->isInitialized()); } TEST_F(OsContextWinTest, givenWddm20WhenRegisterTrimCallbackFailThenOsContextIsNotInitialized) { *getRegisterTrimNotificationFailCallFcn() = true; osContext = std::make_unique(*osInterface->get()->getWddm(), 0u, 1, engineType, preemptionMode, false, false, false); EXPECT_FALSE(osContext->isInitialized()); } TEST_F(OsContextWinTest, givenWddm20WhenRegisterTrimCallbackIsDisabledThenOsContextIsInitialized) { DebugManagerStateRestore stateRestore; DebugManager.flags.DoNotRegisterTrimCallback.set(true); *getRegisterTrimNotificationFailCallFcn() = true; osContext = std::make_unique(*osInterface->get()->getWddm(), 0u, 1, engineType, preemptionMode, false, false, false); EXPECT_TRUE(osContext->isInitialized()); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/os_interface_win_tests.cpp000066400000000000000000000035501363734646600332340ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/os_interface/windows/os_interface_win_tests.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/windows/os_context_win.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/os_interface/windows/wddm_fixture.h" TEST_F(OsInterfaceTest, GivenWindowsWhenOsSupportFor64KBpagesIsBeingQueriedThenTrueIsReturned) { EXPECT_TRUE(OSInterface::are64kbPagesEnabled()); } TEST_F(OsInterfaceTest, GivenWindowsWhenCreateEentIsCalledThenValidEventHandleIsReturned) { auto ev = osInterface->get()->createEvent(NULL, TRUE, FALSE, "DUMMY_EVENT_NAME"); EXPECT_NE(nullptr, ev); auto ret = osInterface->get()->closeHandle(ev); EXPECT_EQ(TRUE, ret); } TEST(OsContextTest, givenWddmWhenCreateOsContextAfterInitWddmThenOsContextIsInitializedTrimCallbackIsRegisteredMemoryOperationsHandlerCreated) { MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); auto wddm = new WddmMock(rootDeviceEnvironment); OSInterface osInterface; osInterface.get()->setWddm(wddm); auto preemptionMode = PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo); wddm->init(); EXPECT_EQ(0u, wddm->registerTrimCallbackResult.called); auto osContext = std::make_unique(*wddm, 0u, 1, HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], preemptionMode, false, false, false); EXPECT_TRUE(osContext->isInitialized()); EXPECT_EQ(osContext->getWddm(), wddm); EXPECT_EQ(1u, wddm->registerTrimCallbackResult.called); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/os_interface_win_tests.h000066400000000000000000000011571363734646600327020ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/windows/os_interface.h" #include "shared/source/os_interface/windows/wddm/wddm.h" #include "gtest/gtest.h" using namespace NEO; class OsInterfaceTest : public ::testing::Test { public: void SetUp() override { osInterface = std::unique_ptr(new OSInterface()); } void TearDown() override { } std::unique_ptr osInterface; }; compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/os_library_win_tests.cpp000066400000000000000000000046771363734646600327530ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/windows/os_library_win.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "test.h" #include "gtest/gtest.h" #include namespace Os { extern const char *testDllName; } using namespace NEO; class OsLibraryBackup : public Windows::OsLibrary { using Type = decltype(Windows::OsLibrary::loadLibraryExA); using BackupType = typename VariableBackup; using ModuleNameType = decltype(Windows::OsLibrary::getModuleFileNameA); using ModuleNameBackupType = typename VariableBackup; struct Backup { std::unique_ptr bkp1 = nullptr; std::unique_ptr bkp2 = nullptr; }; public: static std::unique_ptr backup(Type newValue, ModuleNameType newModuleName) { std::unique_ptr bkp(new Backup()); bkp->bkp1.reset(new BackupType(&OsLibrary::loadLibraryExA, newValue)); bkp->bkp2.reset(new ModuleNameBackupType(&OsLibrary::getModuleFileNameA, newModuleName)); return bkp; }; }; bool mockWillFail = true; void trimFileName(char *buff, size_t length) { for (size_t l = length; l > 0; l--) { if (buff[l - 1] == '\\') { buff[l] = '\0'; break; } } } DWORD WINAPI GetModuleFileNameAMock(HMODULE hModule, LPSTR lpFilename, DWORD nSize) { return snprintf(lpFilename, nSize, "z:\\SomeFakeName.dll"); } HMODULE WINAPI LoadLibraryExAMock(LPCSTR lpFileName, HANDLE hFile, DWORD dwFlags) { if (mockWillFail) return NULL; char fName[MAX_PATH]; auto lenFn = strlen(lpFileName); strcpy_s(fName, sizeof(fName), lpFileName); trimFileName(fName, lenFn); EXPECT_STREQ("z:\\", fName); return (HMODULE)1; } TEST(OSLibraryWinTest, gitOsLibraryWinWhenLoadDependencyFailsThenFallbackToNonDriverStore) { auto bkp = OsLibraryBackup::backup(LoadLibraryExAMock, GetModuleFileNameAMock); std::unique_ptr library(OsLibrary::load(Os::testDllName)); EXPECT_NE(nullptr, library); } TEST(OSLibraryWinTest, gitOsLibraryWinWhenLoadDependencyThenProperPathIsConstructed) { auto bkp = OsLibraryBackup::backup(LoadLibraryExAMock, GetModuleFileNameAMock); VariableBackup bkpM(&mockWillFail, false); std::unique_ptr library(OsLibrary::load(Os::testDllName)); EXPECT_NE(nullptr, library); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/os_time_win_tests.cpp000066400000000000000000000072251363734646600322350ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/windows/os_interface.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/os_interface/windows/wddm_fixture.h" #include "gtest/gtest.h" #include "mock_os_time_win.h" #include using namespace NEO; LARGE_INTEGER valueToSet = {0}; BOOL WINAPI QueryPerformanceCounterMock( _Out_ LARGE_INTEGER *lpPerformanceCount) { *lpPerformanceCount = valueToSet; return true; }; struct OSTimeWinTest : public ::testing::Test { public: void SetUp() override { osTime = std::unique_ptr(new MockOSTimeWin(nullptr)); } void TearDown() override { } std::unique_ptr osTime; }; TEST_F(OSTimeWinTest, givenZeroFrequencyWhenGetHostTimerFuncIsCalledThenReturnsZero) { LARGE_INTEGER frequency; frequency.QuadPart = 0; osTime->setFrequency(frequency); auto retVal = osTime->getHostTimerResolution(); EXPECT_EQ(0, retVal); } TEST_F(OSTimeWinTest, givenNonZeroFrequencyWhenGetHostTimerFuncIsCalledThenReturnsNonZero) { LARGE_INTEGER frequency; frequency.QuadPart = NSEC_PER_SEC; osTime->setFrequency(frequency); auto retVal = osTime->getHostTimerResolution(); EXPECT_EQ(1.0, retVal); } TEST_F(OSTimeWinTest, givenOsTimeWinWhenGetCpuRawTimestampIsCalledThenReturnsNonZero) { auto retVal = osTime->getCpuRawTimestamp(); EXPECT_NE(0ull, retVal); } TEST_F(OSTimeWinTest, givenHighValueOfCpuTimestampWhenItIsObtainedThenItHasProperValue) { osTime->overrideQueryPerformanceCounterFunction(QueryPerformanceCounterMock); LARGE_INTEGER frequency = {0}; frequency.QuadPart = 190457; osTime->setFrequency(frequency); valueToSet.QuadPart = 700894514854; uint64_t timeStamp = 0; uint64_t expectedTimestamp = static_cast((static_cast(valueToSet.QuadPart) * static_cast(NSEC_PER_SEC) / static_cast(frequency.QuadPart))); osTime->getCpuTime(&timeStamp); EXPECT_EQ(expectedTimestamp, timeStamp); } TEST(OSTimeWinTests, givenNoOSInterfaceWhenGetCpuTimeThenReturnsSuccess) { uint64_t time = 0; auto osTime(OSTime::create(nullptr)); auto error = osTime->getCpuTime(&time); EXPECT_TRUE(error); EXPECT_NE(0, time); } TEST(OSTimeWinTests, givenNoOSInterfaceWhenGetCpuGpuTimeThenReturnsError) { TimeStampData CPUGPUTime = {0}; auto osTime(OSTime::create(nullptr)); auto success = osTime->getCpuGpuTime(&CPUGPUTime); EXPECT_FALSE(success); EXPECT_EQ(0, CPUGPUTime.CPUTimeinNS); EXPECT_EQ(0, CPUGPUTime.GPUTimeStamp); } TEST(OSTimeWinTests, givenOSInterfaceWhenGetCpuGpuTimeThenReturnsSuccess) { MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); auto wddm = new WddmMock(rootDeviceEnvironment); TimeStampData CPUGPUTime01 = {0}; TimeStampData CPUGPUTime02 = {0}; std::unique_ptr osInterface(new OSInterface()); osInterface->get()->setWddm(wddm); auto osTime = OSTime::create(osInterface.get()); auto success = osTime->getCpuGpuTime(&CPUGPUTime01); EXPECT_TRUE(success); EXPECT_NE(0, CPUGPUTime01.CPUTimeinNS); EXPECT_NE(0, CPUGPUTime01.GPUTimeStamp); success = osTime->getCpuGpuTime(&CPUGPUTime02); EXPECT_TRUE(success); EXPECT_NE(0, CPUGPUTime02.CPUTimeinNS); EXPECT_NE(0, CPUGPUTime02.GPUTimeStamp); EXPECT_GT(CPUGPUTime02.GPUTimeStamp, CPUGPUTime01.GPUTimeStamp); EXPECT_GT(CPUGPUTime02.CPUTimeinNS, CPUGPUTime01.CPUTimeinNS); } performance_counters_win_tests.cpp000066400000000000000000000010301363734646600347260ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/os_interface/windows/mock_performance_counters_win.h" #include "gtest/gtest.h" using namespace NEO; struct PerformanceCountersWinTest : public PerformanceCountersFixture, public ::testing::Test { public: void SetUp() override { PerformanceCountersFixture::SetUp(); } void TearDown() override { PerformanceCountersFixture::TearDown(); } }; compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/registry_reader_tests.cpp000066400000000000000000000130321363734646600331040ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/os_interface/windows/registry_reader_tests.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "test.h" using namespace NEO; using RegistryReaderTest = ::testing::Test; extern uint32_t regOpenKeySuccessCount; extern uint32_t regQueryValueSuccessCount; TEST_F(RegistryReaderTest, givenRegistryReaderWhenItIsCreatedWithUserScopeSetToFalseThenItsHkeyTypeIsInitializedToHkeyLocalMachine) { bool userScope = false; TestedRegistryReader registryReader(userScope); EXPECT_EQ(HKEY_LOCAL_MACHINE, registryReader.getHkeyType()); } TEST_F(RegistryReaderTest, givenRegistryReaderWhenItIsCreatedWithUserScopeSetToTrueThenItsHkeyTypeIsInitializedHkeyCurrentUser) { bool userScope = true; TestedRegistryReader registryReader(userScope); EXPECT_EQ(HKEY_CURRENT_USER, registryReader.getHkeyType()); } TEST_F(RegistryReaderTest, givenRegistryReaderWhenCallAppSpecificLocationThenReturnCurrentProcessName) { char buff[MAX_PATH]; GetModuleFileNameA(nullptr, buff, MAX_PATH); TestedRegistryReader registryReader(false); const char *ret = registryReader.appSpecificLocation("cl_cache_dir"); EXPECT_STREQ(buff, ret); } TEST_F(RegistryReaderTest, givenRegistryReaderWhenRegKeyNotExistThenReturnDefaultValue) { std::string regKey = "notExistPath"; std::string value = "defaultValue"; TestedRegistryReader registryReader(regKey); EXPECT_EQ(value, registryReader.getSetting("", value)); } TEST_F(RegistryReaderTest, givenRegistryReaderWhenItIsCreatedWithRegKeySpecifiedThenRegKeyIsInitializedAccordingly) { std::string regKey = "Software\\Intel\\IGFX\\OCL\\regKey"; TestedRegistryReader registryReader(regKey); EXPECT_STREQ(regKey.c_str(), registryReader.getRegKey()); } TEST_F(RegistryReaderTest, givenRegistryReaderWhenEnvironmentVariableExistsThenReturnCorrectValue) { char *envVar = "TestedEnvironmentVariable"; std::string value = "defaultValue"; TestedRegistryReader registryReader(""); EXPECT_EQ("TestedEnvironmentVariableValue", registryReader.getSetting(envVar, value)); } TEST_F(RegistryReaderTest, givenRegistryReaderWhenEnvironmentIntVariableExistsThenReturnCorrectValue) { char *envVar = "TestedEnvironmentIntVariable"; int32_t value = -1; TestedRegistryReader registryReader(""); EXPECT_EQ(1234, registryReader.getSetting(envVar, value)); } struct DebugReaderWithRegistryAndEnvTest : ::testing::Test { VariableBackup openRegCountBackup{®OpenKeySuccessCount}; VariableBackup queryRegCountBackup{®QueryValueSuccessCount}; TestedRegistryReader registryReader{""}; }; TEST_F(DebugReaderWithRegistryAndEnvTest, givenIntDebugKeyWhenReadFromRegistrySucceedsThenReturnObtainedValue) { regOpenKeySuccessCount = 1u; regQueryValueSuccessCount = 1u; EXPECT_EQ(1, registryReader.getSetting("settingSourceInt", 0)); } TEST_F(DebugReaderWithRegistryAndEnvTest, givenIntDebugKeyWhenQueryValueFailsThenObtainValueFromEnv) { regOpenKeySuccessCount = 1u; regQueryValueSuccessCount = 0u; EXPECT_EQ(2, registryReader.getSetting("settingSourceInt", 0)); } TEST_F(DebugReaderWithRegistryAndEnvTest, givenIntDebugKeyWhenOpenKeyFailsThenObtainValueFromEnv) { regOpenKeySuccessCount = 0u; regQueryValueSuccessCount = 0u; EXPECT_EQ(2, registryReader.getSetting("settingSourceInt", 0)); } TEST_F(DebugReaderWithRegistryAndEnvTest, givenStringDebugKeyWhenReadFromRegistrySucceedsThenReturnObtainedValue) { std::string defaultValue("default"); regOpenKeySuccessCount = 1u; regQueryValueSuccessCount = 2u; EXPECT_STREQ("registry", registryReader.getSetting("settingSourceString", defaultValue).c_str()); } TEST_F(DebugReaderWithRegistryAndEnvTest, givenStringDebugKeyWhenQueryValueFailsThenObtainValueFromEnv) { std::string defaultValue("default"); regOpenKeySuccessCount = 1u; regQueryValueSuccessCount = 0u; EXPECT_STREQ("environment", registryReader.getSetting("settingSourceString", defaultValue).c_str()); regOpenKeySuccessCount = 1u; regQueryValueSuccessCount = 1u; EXPECT_STREQ("environment", registryReader.getSetting("settingSourceString", defaultValue).c_str()); } TEST_F(DebugReaderWithRegistryAndEnvTest, givenStringDebugKeyWhenOpenKeyFailsThenObtainValueFromEnv) { std::string defaultValue("default"); regOpenKeySuccessCount = 0u; regQueryValueSuccessCount = 0u; EXPECT_STREQ("environment", registryReader.getSetting("settingSourceString", defaultValue).c_str()); } TEST_F(DebugReaderWithRegistryAndEnvTest, givenBinaryDebugKeyWhenReadFromRegistrySucceedsThenReturnObtainedValue) { std::string defaultValue("default"); regOpenKeySuccessCount = 1u; regQueryValueSuccessCount = 2u; EXPECT_STREQ("registry", registryReader.getSetting("settingSourceBinary", defaultValue).c_str()); } TEST_F(DebugReaderWithRegistryAndEnvTest, givenBinaryDebugKeyOnlyInRegistryWhenReadFromRegistryFailsThenReturnDefaultValue) { std::string defaultValue("default"); regOpenKeySuccessCount = 1u; regQueryValueSuccessCount = 1u; EXPECT_STREQ("default", registryReader.getSetting("settingSourceBinary", defaultValue).c_str()); regOpenKeySuccessCount = 1u; regQueryValueSuccessCount = 0u; EXPECT_STREQ("default", registryReader.getSetting("settingSourceBinary", defaultValue).c_str()); regOpenKeySuccessCount = 0u; regQueryValueSuccessCount = 0u; EXPECT_STREQ("default", registryReader.getSetting("settingSourceBinary", defaultValue).c_str()); }compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/registry_reader_tests.h000066400000000000000000000022341363734646600325530ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/windows/debug_registry_reader.h" #include "opencl/source/os_interface/ocl_reg_path.h" namespace NEO { class TestedRegistryReader : public RegistryReader { public: TestedRegistryReader(bool userScope) : RegistryReader(userScope, oclRegPath){}; TestedRegistryReader(std::string regKey) : RegistryReader(false, regKey){}; HKEY getHkeyType() const { return hkeyType; } using RegistryReader::getSetting; char *getenv(const char *envVar) override { if (strcmp(envVar, "TestedEnvironmentVariable") == 0) { return "TestedEnvironmentVariableValue"; } else if (strcmp(envVar, "TestedEnvironmentIntVariable") == 0) { return "1234"; } else if (strcmp(envVar, "settingSourceString") == 0) { return "environment"; } else if (strcmp(envVar, "settingSourceInt") == 0) { return "2"; } else { return nullptr; } } const char *getRegKey() const { return registryReadRootKey.c_str(); } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/self_lib_win.cpp000066400000000000000000000002361363734646600311260ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ extern "C" __declspec(dllexport) void selfDynamicLibraryFunc() { } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/sys_calls.cpp000066400000000000000000000026361363734646600304740ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "sys_calls.h" namespace NEO { namespace SysCalls { constexpr uintptr_t dummyHandle = static_cast(0x7); BOOL systemPowerStatusRetVal = 1; BYTE systemPowerStatusACLineStatusOverride = 1; HMODULE handleValue = reinterpret_cast(dummyHandle); const wchar_t *currentLibraryPath = L""; HANDLE createEvent(LPSECURITY_ATTRIBUTES lpEventAttributes, BOOL bManualReset, BOOL bInitialState, LPCSTR lpName) { return reinterpret_cast(dummyHandle); } BOOL closeHandle(HANDLE hObject) { return (reinterpret_cast(dummyHandle) == hObject) ? TRUE : FALSE; } BOOL getSystemPowerStatus(LPSYSTEM_POWER_STATUS systemPowerStatusPtr) { systemPowerStatusPtr->ACLineStatus = systemPowerStatusACLineStatusOverride; return systemPowerStatusRetVal; } BOOL getModuleHandle(DWORD dwFlags, LPCWSTR lpModuleName, HMODULE *phModule) { constexpr auto expectedFlags = GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT; if (dwFlags != expectedFlags) { return FALSE; } *phModule = handleValue; return TRUE; } DWORD getModuleFileName(HMODULE hModule, LPWSTR lpFilename, DWORD nSize) { if (hModule != handleValue) { return FALSE; } lstrcpyW(lpFilename, currentLibraryPath); return TRUE; } } // namespace SysCalls } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/ult_dxgi_factory.cpp000066400000000000000000000012771363734646600320460ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/os_interface/windows/ult_dxgi_factory.h" namespace NEO { HRESULT WINAPI ULTCreateDXGIFactory(REFIID riid, void **ppFactory) { UltIDXGIFactory1 *factory = new UltIDXGIFactory1; *(UltIDXGIFactory1 **)ppFactory = factory; return S_OK; } void WINAPI ULTGetSystemInfo(SYSTEM_INFO *pSystemInfo) { pSystemInfo->lpMaximumApplicationAddress = is32bit ? (LPVOID)MemoryConstants::max32BitAppAddress : (LPVOID)MemoryConstants::max64BitAppAddress; } const wchar_t *UltIDXGIAdapter1::description = L"Intel"; extern uint32_t numRootDevicesToEnum = 0; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/ult_dxgi_factory.h000066400000000000000000000113271363734646600315100ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/memory_manager/memory_constants.h" #include #include namespace NEO { class UltIDXGIAdapter1 : public IDXGIAdapter1 { public: const static wchar_t *description; // IDXGIAdapter1 HRESULT STDMETHODCALLTYPE GetDesc1( _Out_ DXGI_ADAPTER_DESC1 *pDesc) { if (pDesc == nullptr) { return S_FALSE; } swprintf(pDesc->Description, 128, description); pDesc->AdapterLuid.HighPart = 0x1234; pDesc->DeviceId = 0x1234; return S_OK; } // IDXGIAdapter HRESULT STDMETHODCALLTYPE EnumOutputs( UINT Output, IDXGIOutput **ppOutput) { return S_OK; } HRESULT STDMETHODCALLTYPE GetDesc( DXGI_ADAPTER_DESC *pDesc) { return S_OK; } HRESULT STDMETHODCALLTYPE CheckInterfaceSupport( _In_ REFGUID InterfaceName, _Out_ LARGE_INTEGER *pUMDVersion) { return S_OK; } // IDXGIObject HRESULT STDMETHODCALLTYPE SetPrivateData( _In_ REFGUID Name, UINT DataSize, const void *pData) { return S_OK; } HRESULT STDMETHODCALLTYPE SetPrivateDataInterface( _In_ REFGUID Name, _In_opt_ const IUnknown *pUnknown) { return S_OK; } HRESULT STDMETHODCALLTYPE GetPrivateData( _In_ REFGUID Name, _Inout_ UINT *pDataSize, _Out_writes_bytes_(*pDataSize) void *pData) { return S_OK; } HRESULT STDMETHODCALLTYPE GetParent( _In_ REFIID riid, _COM_Outptr_ void **ppParent) { return S_OK; } // IUnknown HRESULT STDMETHODCALLTYPE QueryInterface( REFIID riid, void __RPC_FAR *__RPC_FAR *ppvObject) { return S_OK; } ULONG STDMETHODCALLTYPE AddRef(void) { return 0; } ULONG STDMETHODCALLTYPE Release(void) { // this must be the last instruction delete this; return 0; } }; extern uint32_t numRootDevicesToEnum; class UltIDXGIFactory1 : public IDXGIFactory1 { public: HRESULT STDMETHODCALLTYPE EnumAdapters1( UINT Adapter, IDXGIAdapter1 **ppAdapter) { UINT numRootDevices = 1u; if (numRootDevicesToEnum > 0u) { numRootDevices = numRootDevicesToEnum; } else if (DebugManager.flags.CreateMultipleRootDevices.get()) { numRootDevices = static_cast(DebugManager.flags.CreateMultipleRootDevices.get()); } if (Adapter >= numRootDevices) { *(IDXGIAdapter1 **)ppAdapter = nullptr; return DXGI_ERROR_NOT_FOUND; } *(IDXGIAdapter1 **)ppAdapter = new UltIDXGIAdapter1; return S_OK; } BOOL STDMETHODCALLTYPE IsCurrent(void) { return 0; } HRESULT STDMETHODCALLTYPE EnumAdapters( UINT Adapter, IDXGIAdapter **ppAdapter) { return S_OK; } HRESULT STDMETHODCALLTYPE MakeWindowAssociation( HWND WindowHandle, UINT Flags) { return S_OK; } HRESULT STDMETHODCALLTYPE GetWindowAssociation( _Out_ HWND *pWindowHandle) { return S_OK; } HRESULT STDMETHODCALLTYPE CreateSwapChain( _In_ IUnknown *pDevice, _In_ DXGI_SWAP_CHAIN_DESC *pDesc, IDXGISwapChain **ppSwapChain) { return S_OK; } HRESULT STDMETHODCALLTYPE CreateSoftwareAdapter( HMODULE Module, IDXGIAdapter **ppAdapter) { return S_OK; } // IDXGIObject HRESULT STDMETHODCALLTYPE SetPrivateData( _In_ REFGUID Name, UINT DataSize, const void *pData) { return S_OK; } HRESULT STDMETHODCALLTYPE SetPrivateDataInterface( _In_ REFGUID Name, _In_opt_ const IUnknown *pUnknown) { return S_OK; } HRESULT STDMETHODCALLTYPE GetPrivateData( _In_ REFGUID Name, _Inout_ UINT *pDataSize, _Out_writes_bytes_(*pDataSize) void *pData) { return S_OK; } HRESULT STDMETHODCALLTYPE GetParent( _In_ REFIID riid, _COM_Outptr_ void **ppParent) { return S_OK; } // IUnknown HRESULT STDMETHODCALLTYPE QueryInterface( REFIID riid, void __RPC_FAR *__RPC_FAR *ppvObject) { return S_OK; } ULONG STDMETHODCALLTYPE AddRef(void) { return 0; } ULONG STDMETHODCALLTYPE Release(void) { // this must be the last instruction delete this; return 0; } }; HRESULT WINAPI ULTCreateDXGIFactory(REFIID riid, void **ppFactory); void WINAPI ULTGetSystemInfo(SYSTEM_INFO *pSystemInfo); } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/wddm20_tests.cpp000066400000000000000000002062271363734646600310210ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/os_library.h" #include "shared/source/os_interface/os_time.h" #include "shared/source/os_interface/windows/driver_info_windows.h" #include "shared/source/os_interface/windows/os_context_win.h" #include "shared/source/os_interface/windows/os_environment_win.h" #include "shared/source/os_interface/windows/os_interface.h" #include "shared/source/os_interface/windows/sys_calls.h" #include "shared/source/os_interface/windows/wddm/wddm_interface.h" #include "shared/source/os_interface/windows/wddm_allocation.h" #include "shared/source/os_interface/windows/wddm_engine_mapper.h" #include "shared/source/os_interface/windows/wddm_memory_manager.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_gfx_partition.h" #include "opencl/test/unit_test/mocks/mock_gmm_resource_info.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_wddm_residency_logger.h" #include "opencl/test/unit_test/mocks/mock_wddm_residency_logger_functions.h" #include "opencl/test/unit_test/os_interface/windows/mock_wddm_allocation.h" #include "opencl/test/unit_test/os_interface/windows/ult_dxgi_factory.h" #include "opencl/test/unit_test/os_interface/windows/wddm_fixture.h" #include "gtest/gtest.h" #include #include namespace NEO { namespace SysCalls { extern const wchar_t *currentLibraryPath; } extern uint32_t numRootDevicesToEnum; } // namespace NEO using namespace NEO; namespace GmmHelperFunctions { Gmm *getGmm(void *ptr, size_t size) { size_t alignedSize = alignSizeWholePage(ptr, size); void *alignedPtr = alignUp(ptr, 4096); Gmm *gmm = new Gmm(platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0]->getGmmClientContext(), alignedPtr, alignedSize, false); EXPECT_NE(gmm->gmmResourceInfo.get(), nullptr); return gmm; } } // namespace GmmHelperFunctions using Wddm20Tests = WddmTest; using Wddm20WithMockGdiDllTestsWithoutWddmInit = WddmTestWithMockGdiDll; using Wddm20InstrumentationTest = WddmInstrumentationTest; struct Wddm20WithMockGdiDllTests : public Wddm20WithMockGdiDllTestsWithoutWddmInit { using Wddm20WithMockGdiDllTestsWithoutWddmInit::TearDown; void SetUp() override { Wddm20WithMockGdiDllTestsWithoutWddmInit::SetUp(); init(); } }; TEST_F(Wddm20Tests, givenMinWindowsAddressWhenWddmIsInitializedThenWddmUseThisAddress) { uintptr_t expectedAddress = 0x200000; EXPECT_EQ(expectedAddress, NEO::windowsMinAddress); EXPECT_EQ(expectedAddress, wddm->getWddmMinAddress()); } TEST_F(Wddm20Tests, doubleCreation) { EXPECT_EQ(1u, wddm->createContextResult.called); wddm->init(); EXPECT_EQ(1u, wddm->createContextResult.called); } TEST_F(Wddm20Tests, givenNullPageTableManagerAndRenderCompressedResourceWhenMappingGpuVaThenDontUpdateAuxTable) { auto gmm = std::unique_ptr(new Gmm(rootDeviceEnvironemnt->getGmmClientContext(), nullptr, 1, false)); auto mockGmmRes = reinterpret_cast(gmm->gmmResourceInfo.get()); mockGmmRes->setUnifiedAuxTranslationCapable(); void *fakePtr = reinterpret_cast(0x100); WddmAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, fakePtr, 0x2100, nullptr, MemoryPool::MemoryNull); allocation.setDefaultGmm(gmm.get()); allocation.getHandleToModify(0u) = ALLOCATION_HANDLE; EXPECT_TRUE(wddm->mapGpuVirtualAddress(&allocation)); } TEST(WddmDiscoverDevices, WhenNoHwDeviceIdIsProvidedToWddmThenWddmIsNotCreated) { struct MockWddm : public Wddm { MockWddm(std::unique_ptr hwDeviceIdIn, RootDeviceEnvironment &rootDeviceEnvironment) : Wddm(std::move(hwDeviceIdIn), rootDeviceEnvironment) {} }; MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); EXPECT_THROW(auto wddm = std::make_unique(nullptr, rootDeviceEnvironment), std::exception); } TEST(WddmDiscoverDevices, WhenMultipleRootDevicesAreAvailableThenAllAreDiscovered) { VariableBackup backup{&numRootDevicesToEnum}; numRootDevicesToEnum = 3u; ExecutionEnvironment executionEnvironment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_EQ(numRootDevicesToEnum, hwDeviceIds.size()); } TEST(WddmDiscoverDevices, WhenAdapterDescriptionContainsVirtualRenderThenAdapterIsDiscovered) { VariableBackup descriptionBackup(&UltIDXGIAdapter1::description); descriptionBackup = L"Virtual Render"; ExecutionEnvironment executionEnvironment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_EQ(1u, hwDeviceIds.size()); EXPECT_NE(nullptr, hwDeviceIds[0].get()); } TEST(Wddm20EnumAdaptersTest, expectTrue) { const HardwareInfo *hwInfo = defaultHwInfo.get(); std::unique_ptr mockGdiDll(setAdapterInfo(&hwInfo->platform, &hwInfo->gtSystemInfo, hwInfo->capabilityTable.gpuAddressSpace)); MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); std::unique_ptr wddm(Wddm::createWddm(nullptr, rootDeviceEnvironment)); bool success = wddm->init(); EXPECT_TRUE(success); EXPECT_EQ(rootDeviceEnvironment.getHardwareInfo()->platform.eDisplayCoreFamily, hwInfo->platform.eDisplayCoreFamily); } TEST(Wddm20EnumAdaptersTest, givenEmptyHardwareInfoWhenEnumAdapterIsCalledThenCapabilityTableIsSet) { const HardwareInfo *hwInfo = defaultHwInfo.get(); std::unique_ptr mockGdiDll(setAdapterInfo(&hwInfo->platform, &hwInfo->gtSystemInfo, hwInfo->capabilityTable.gpuAddressSpace)); ExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(1); auto rootDeviceEnvironment = executionEnvironment.rootDeviceEnvironments[0].get(); std::unique_ptr wddm(Wddm::createWddm(nullptr, *rootDeviceEnvironment)); bool success = wddm->init(); HardwareInfo outHwInfo = *rootDeviceEnvironment->getHardwareInfo(); EXPECT_TRUE(success); EXPECT_EQ(outHwInfo.platform.eDisplayCoreFamily, hwInfo->platform.eDisplayCoreFamily); EXPECT_EQ(outHwInfo.capabilityTable.defaultProfilingTimerResolution, hwInfo->capabilityTable.defaultProfilingTimerResolution); EXPECT_EQ(outHwInfo.capabilityTable.clVersionSupport, hwInfo->capabilityTable.clVersionSupport); EXPECT_EQ(outHwInfo.capabilityTable.kmdNotifyProperties.enableKmdNotify, hwInfo->capabilityTable.kmdNotifyProperties.enableKmdNotify); EXPECT_EQ(outHwInfo.capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds, hwInfo->capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds); EXPECT_EQ(outHwInfo.capabilityTable.kmdNotifyProperties.enableQuickKmdSleep, hwInfo->capabilityTable.kmdNotifyProperties.enableQuickKmdSleep); EXPECT_EQ(outHwInfo.capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds, hwInfo->capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds); } TEST(Wddm20EnumAdaptersTest, givenUnknownPlatformWhenEnumAdapterIsCalledThenFalseIsReturnedAndOutputIsEmpty) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.platform.eProductFamily = IGFX_UNKNOWN; std::unique_ptr mockGdiDll(setAdapterInfo(&hwInfo.platform, &hwInfo.gtSystemInfo, hwInfo.capabilityTable.gpuAddressSpace)); MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); std::unique_ptr wddm(Wddm::createWddm(nullptr, rootDeviceEnvironment)); auto ret = wddm->init(); EXPECT_FALSE(ret); // reset mock gdi hwInfo = *defaultHwInfo; mockGdiDll.reset(setAdapterInfo(&hwInfo.platform, &hwInfo.gtSystemInfo, hwInfo.capabilityTable.gpuAddressSpace)); } TEST_F(Wddm20Tests, whenInitializeWddmThenContextIsCreated) { auto context = osContext->getWddmContextHandle(); EXPECT_TRUE(context != static_cast(0)); } TEST_F(Wddm20Tests, allocation) { OsAgnosticMemoryManager mm(*executionEnvironment); WddmAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, mm.allocateSystemMemory(100, 0), 100, nullptr, MemoryPool::MemoryNull); Gmm *gmm = GmmHelperFunctions::getGmm(allocation.getUnderlyingBuffer(), allocation.getUnderlyingBufferSize()); allocation.setDefaultGmm(gmm); auto status = wddm->createAllocation(&allocation); EXPECT_EQ(STATUS_SUCCESS, status); EXPECT_TRUE(allocation.getDefaultHandle() != 0); auto error = wddm->destroyAllocation(&allocation, osContext.get()); EXPECT_TRUE(error); delete gmm; mm.freeSystemMemory(allocation.getUnderlyingBuffer()); } TEST_F(Wddm20WithMockGdiDllTests, givenAllocationSmallerUnderlyingThanAlignedSizeWhenCreatedThenWddmUseAligned) { void *ptr = reinterpret_cast(wddm->virtualAllocAddress + 0x1000); size_t underlyingSize = 0x2100; size_t alignedSize = 0x3000; size_t underlyingPages = underlyingSize / MemoryConstants::pageSize; size_t alignedPages = alignedSize / MemoryConstants::pageSize; WddmAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, ptr, 0x2100, nullptr, MemoryPool::MemoryNull); Gmm *gmm = GmmHelperFunctions::getGmm(allocation.getAlignedCpuPtr(), allocation.getAlignedSize()); allocation.setDefaultGmm(gmm); auto status = wddm->createAllocation(&allocation); EXPECT_EQ(STATUS_SUCCESS, status); EXPECT_NE(0, allocation.getDefaultHandle()); bool ret = wddm->mapGpuVirtualAddress(&allocation); EXPECT_TRUE(ret); EXPECT_EQ(alignedPages, getLastCallMapGpuVaArgFcn()->SizeInPages); EXPECT_NE(underlyingPages, getLastCallMapGpuVaArgFcn()->SizeInPages); ret = wddm->destroyAllocation(&allocation, osContext.get()); EXPECT_TRUE(ret); delete gmm; } TEST_F(Wddm20WithMockGdiDllTests, givenReserveCallWhenItIsCalledWithProperParamtersThenAddressInRangeIsReturend) { auto sizeAlignedTo64Kb = 64 * KB; auto reservationAddress = wddm->reserveGpuVirtualAddress(wddm->getGfxPartition().Heap32[0].Base, wddm->getGfxPartition().Heap32[0].Limit, sizeAlignedTo64Kb); EXPECT_GE(reservationAddress, wddm->getGfxPartition().Heap32[0].Base); auto programmedReserved = getLastCallReserveGpuVaArgFcn(); EXPECT_EQ(0llu, programmedReserved->BaseAddress); EXPECT_EQ(wddm->getGfxPartition().Heap32[0].Base, programmedReserved->MinimumAddress); EXPECT_EQ(wddm->getGfxPartition().Heap32[0].Limit, programmedReserved->MaximumAddress); EXPECT_EQ(sizeAlignedTo64Kb, programmedReserved->Size); auto pagingQueue = wddm->getPagingQueue(); EXPECT_NE(0llu, pagingQueue); EXPECT_EQ(pagingQueue, programmedReserved->hPagingQueue); } TEST_F(Wddm20WithMockGdiDllTests, givenWddmAllocationWhenMappingGpuVaThenUseGmmSize) { void *fakePtr = reinterpret_cast(0x123); WddmAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, fakePtr, 100, nullptr, MemoryPool::MemoryNull); std::unique_ptr gmm(GmmHelperFunctions::getGmm(allocation.getAlignedCpuPtr(), allocation.getAlignedSize())); allocation.setDefaultGmm(gmm.get()); auto status = wddm->createAllocation(&allocation); EXPECT_EQ(STATUS_SUCCESS, status); auto mockResourceInfo = static_cast(gmm->gmmResourceInfo.get()); mockResourceInfo->overrideReturnedSize(allocation.getAlignedSize() + (2 * MemoryConstants::pageSize)); wddm->mapGpuVirtualAddress(&allocation); uint64_t expectedSizeInPages = static_cast(mockResourceInfo->getSizeAllocation() / MemoryConstants::pageSize); EXPECT_EQ(expectedSizeInPages, getLastCallMapGpuVaArgFcn()->SizeInPages); } TEST_F(Wddm20Tests, givenGraphicsAllocationWhenItIsMappedInHeap0ThenItHasGpuAddressWithinHeapInternalLimits) { void *alignedPtr = (void *)0x12000; size_t alignedSize = 0x2000; std::unique_ptr gmm(GmmHelperFunctions::getGmm(alignedPtr, alignedSize)); uint64_t gpuAddress = 0u; auto heapBase = wddm->getGfxPartition().Heap32[static_cast(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY)].Base; auto heapLimit = wddm->getGfxPartition().Heap32[static_cast(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY)].Limit; bool ret = wddm->mapGpuVirtualAddress(gmm.get(), ALLOCATION_HANDLE, heapBase, heapLimit, 0u, gpuAddress); EXPECT_TRUE(ret); auto cannonizedHeapBase = GmmHelper::canonize(heapBase); auto cannonizedHeapEnd = GmmHelper::canonize(heapLimit); EXPECT_GE(gpuAddress, cannonizedHeapBase); EXPECT_LE(gpuAddress, cannonizedHeapEnd); } TEST_F(Wddm20WithMockGdiDllTests, GivenThreeOsHandlesWhenAskedForDestroyAllocationsThenAllMarkedAllocationsAreDestroyed) { OsHandleStorage storage; OsHandle osHandle1 = {0}; OsHandle osHandle2 = {0}; OsHandle osHandle3 = {0}; osHandle1.handle = ALLOCATION_HANDLE; osHandle2.handle = ALLOCATION_HANDLE; osHandle3.handle = ALLOCATION_HANDLE; storage.fragmentStorageData[0].osHandleStorage = &osHandle1; storage.fragmentStorageData[0].freeTheFragment = true; storage.fragmentStorageData[1].osHandleStorage = &osHandle2; storage.fragmentStorageData[1].freeTheFragment = false; storage.fragmentStorageData[2].osHandleStorage = &osHandle3; storage.fragmentStorageData[2].freeTheFragment = true; D3DKMT_HANDLE handles[3] = {ALLOCATION_HANDLE, ALLOCATION_HANDLE, ALLOCATION_HANDLE}; bool retVal = wddm->destroyAllocations(handles, 3, 0); EXPECT_TRUE(retVal); auto destroyWithResourceHandleCalled = 0u; D3DKMT_DESTROYALLOCATION2 *ptrToDestroyAlloc2 = nullptr; getSizesFcn(destroyWithResourceHandleCalled, ptrToDestroyAlloc2); EXPECT_EQ(0u, ptrToDestroyAlloc2->Flags.SynchronousDestroy); EXPECT_EQ(1u, ptrToDestroyAlloc2->Flags.AssumeNotInUse); } TEST_F(Wddm20Tests, mapAndFreeGpuVa) { OsAgnosticMemoryManager mm(*executionEnvironment); WddmAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, mm.allocateSystemMemory(100, 0), 100, nullptr, MemoryPool::MemoryNull); Gmm *gmm = GmmHelperFunctions::getGmm(allocation.getUnderlyingBuffer(), allocation.getUnderlyingBufferSize()); allocation.setDefaultGmm(gmm); auto status = wddm->createAllocation(&allocation); EXPECT_EQ(STATUS_SUCCESS, status); EXPECT_TRUE(allocation.getDefaultHandle() != 0); auto error = wddm->mapGpuVirtualAddress(&allocation); EXPECT_TRUE(error); EXPECT_TRUE(allocation.getGpuAddress() != 0); error = wddm->freeGpuVirtualAddress(allocation.getGpuAddressToModify(), allocation.getUnderlyingBufferSize()); EXPECT_TRUE(error); EXPECT_TRUE(allocation.getGpuAddress() == 0); error = wddm->destroyAllocation(&allocation, osContext.get()); EXPECT_TRUE(error); delete gmm; mm.freeSystemMemory(allocation.getUnderlyingBuffer()); } TEST_F(Wddm20Tests, givenNullAllocationWhenCreateThenAllocateAndMap) { OsAgnosticMemoryManager mm(*executionEnvironment); WddmAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, 100, nullptr, MemoryPool::MemoryNull); Gmm *gmm = GmmHelperFunctions::getGmm(allocation.getUnderlyingBuffer(), allocation.getUnderlyingBufferSize()); allocation.setDefaultGmm(gmm); auto status = wddm->createAllocation(&allocation); EXPECT_EQ(STATUS_SUCCESS, status); bool ret = wddm->mapGpuVirtualAddress(&allocation); EXPECT_TRUE(ret); EXPECT_NE(0u, allocation.getGpuAddress()); EXPECT_EQ(allocation.getGpuAddress(), GmmHelper::canonize(allocation.getGpuAddress())); delete gmm; mm.freeSystemMemory(allocation.getUnderlyingBuffer()); } TEST_F(WddmTestWithMockGdiDll, givenShareableAllocationWhenCreateThenCreateResourceFlagIsEnabled) { init(); WddmAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, MemoryConstants::pageSize, nullptr, MemoryPool::MemoryNull, true); auto gmm = std::unique_ptr(GmmHelperFunctions::getGmm(nullptr, MemoryConstants::pageSize)); allocation.setDefaultGmm(gmm.get()); auto status = wddm->createAllocation(&allocation); EXPECT_EQ(STATUS_SUCCESS, status); auto passedCreateAllocation = getMockAllocationFcn(); EXPECT_EQ(TRUE, passedCreateAllocation->Flags.CreateShared); EXPECT_EQ(TRUE, passedCreateAllocation->Flags.CreateResource); } TEST_F(WddmTestWithMockGdiDll, givenShareableAllocationWhenCreateThenSharedHandleAndResourceHandleAreSet) { init(); struct MockWddmMemoryManager : public WddmMemoryManager { using WddmMemoryManager::createGpuAllocationsWithRetry; using WddmMemoryManager::WddmMemoryManager; }; MemoryManagerCreate memoryManager(false, false, *executionEnvironment); WddmAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, MemoryConstants::pageSize, nullptr, MemoryPool::MemoryNull, true); auto gmm = std::unique_ptr(GmmHelperFunctions::getGmm(nullptr, MemoryConstants::pageSize)); allocation.setDefaultGmm(gmm.get()); auto status = memoryManager.createGpuAllocationsWithRetry(&allocation); EXPECT_TRUE(status); EXPECT_NE(0u, allocation.resourceHandle); EXPECT_NE(0u, allocation.peekSharedHandle()); } TEST(WddmAllocationTest, whenAllocationIsShareableThenSharedHandleToModifyIsSharedHandleOfAllocation) { WddmAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, MemoryConstants::pageSize, nullptr, MemoryPool::MemoryNull, true); auto sharedHandleToModify = allocation.getSharedHandleToModify(); EXPECT_NE(nullptr, sharedHandleToModify); *sharedHandleToModify = 1234u; EXPECT_EQ(*sharedHandleToModify, allocation.peekSharedHandle()); } TEST(WddmAllocationTest, whenAllocationIsNotShareableThenItDoesntReturnSharedHandleToModify) { WddmAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, MemoryConstants::pageSize, nullptr, MemoryPool::MemoryNull, false); auto sharedHandleToModify = allocation.getSharedHandleToModify(); EXPECT_EQ(nullptr, sharedHandleToModify); } TEST_F(Wddm20Tests, makeResidentNonResident) { OsAgnosticMemoryManager mm(*executionEnvironment); WddmAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, mm.allocateSystemMemory(100, 0), 100, nullptr, MemoryPool::MemoryNull); Gmm *gmm = GmmHelperFunctions::getGmm(allocation.getUnderlyingBuffer(), allocation.getUnderlyingBufferSize()); allocation.setDefaultGmm(gmm); auto status = wddm->createAllocation(&allocation); EXPECT_EQ(STATUS_SUCCESS, status); EXPECT_TRUE(allocation.getDefaultHandle() != 0); auto error = wddm->mapGpuVirtualAddress(&allocation); EXPECT_TRUE(error); EXPECT_TRUE(allocation.getGpuAddress() != 0); error = wddm->makeResident(allocation.getHandles().data(), allocation.getNumHandles(), false, nullptr, allocation.getAlignedSize()); EXPECT_TRUE(error); uint64_t sizeToTrim; error = wddm->evict(allocation.getHandles().data(), allocation.getNumHandles(), sizeToTrim); EXPECT_TRUE(error); auto monitoredFence = osContext->getResidencyController().getMonitoredFence(); UINT64 fenceValue = 100; monitoredFence.cpuAddress = &fenceValue; monitoredFence.currentFenceValue = 101; error = wddm->destroyAllocation(&allocation, osContext.get()); EXPECT_TRUE(error); delete gmm; mm.freeSystemMemory(allocation.getUnderlyingBuffer()); } TEST_F(Wddm20WithMockGdiDllTests, givenSharedHandleWhenCreateGraphicsAllocationFromSharedHandleIsCalledThenGraphicsAllocationWithSharedPropertiesIsCreated) { void *pSysMem = (void *)0x1000; std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), pSysMem, 4096u, false)); auto status = setSizesFcn(gmm->gmmResourceInfo.get(), 1u, 1024u, 1u); EXPECT_EQ(0u, status); MemoryManagerCreate mm(false, false, *executionEnvironment); AllocationProperties properties(0, false, 4096u, GraphicsAllocation::AllocationType::SHARED_BUFFER, false); auto graphicsAllocation = mm.createGraphicsAllocationFromSharedHandle(ALLOCATION_HANDLE, properties, false); auto wddmAllocation = (WddmAllocation *)graphicsAllocation; ASSERT_NE(nullptr, wddmAllocation); EXPECT_EQ(ALLOCATION_HANDLE, wddmAllocation->peekSharedHandle()); EXPECT_EQ(RESOURCE_HANDLE, wddmAllocation->resourceHandle); EXPECT_NE(0u, wddmAllocation->getDefaultHandle()); EXPECT_EQ(ALLOCATION_HANDLE, wddmAllocation->getDefaultHandle()); EXPECT_NE(0u, wddmAllocation->getGpuAddress()); EXPECT_EQ(4096u, wddmAllocation->getUnderlyingBufferSize()); EXPECT_EQ(nullptr, wddmAllocation->getAlignedCpuPtr()); EXPECT_NE(nullptr, wddmAllocation->getDefaultGmm()); EXPECT_EQ(4096u, wddmAllocation->getDefaultGmm()->gmmResourceInfo->getSizeAllocation()); mm.freeGraphicsMemory(graphicsAllocation); auto destroyWithResourceHandleCalled = 0u; D3DKMT_DESTROYALLOCATION2 *ptrToDestroyAlloc2 = nullptr; status = getSizesFcn(destroyWithResourceHandleCalled, ptrToDestroyAlloc2); EXPECT_EQ(0u, ptrToDestroyAlloc2->Flags.SynchronousDestroy); EXPECT_EQ(1u, ptrToDestroyAlloc2->Flags.AssumeNotInUse); EXPECT_EQ(0u, status); EXPECT_EQ(1u, destroyWithResourceHandleCalled); } TEST_F(Wddm20WithMockGdiDllTests, givenSharedHandleWhenCreateGraphicsAllocationFromSharedHandleIsCalledThenMapGpuVaWithCpuPtrDepensOnBitness) { void *pSysMem = (void *)0x1000; std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), pSysMem, 4096u, false)); auto status = setSizesFcn(gmm->gmmResourceInfo.get(), 1u, 1024u, 1u); EXPECT_EQ(0u, status); MemoryManagerCreate mm(false, false, *executionEnvironment); AllocationProperties properties(0, false, 4096, GraphicsAllocation::AllocationType::SHARED_BUFFER, false); auto graphicsAllocation = mm.createGraphicsAllocationFromSharedHandle(ALLOCATION_HANDLE, properties, false); auto wddmAllocation = (WddmAllocation *)graphicsAllocation; ASSERT_NE(nullptr, wddmAllocation); if (is32bit && executionEnvironment->rootDeviceEnvironments[graphicsAllocation->getRootDeviceIndex()]->isFullRangeSvm()) { EXPECT_NE(wddm->mapGpuVirtualAddressResult.cpuPtrPassed, nullptr); } else { EXPECT_EQ(wddm->mapGpuVirtualAddressResult.cpuPtrPassed, nullptr); } mm.freeGraphicsMemory(graphicsAllocation); } TEST_F(Wddm20Tests, givenWddmCreatedWhenInitedThenMinAddressValid) { uintptr_t expected = windowsMinAddress; uintptr_t actual = wddm->getWddmMinAddress(); EXPECT_EQ(expected, actual); } HWTEST_F(Wddm20InstrumentationTest, configureDeviceAddressSpaceOnInit) { SYSTEM_INFO sysInfo = {}; WddmMock::getSystemInfo(&sysInfo); D3DKMT_HANDLE adapterHandle = ADAPTER_HANDLE; D3DKMT_HANDLE deviceHandle = DEVICE_HANDLE; const HardwareInfo hwInfo = *defaultHwInfo; BOOLEAN FtrL3IACoherency = hwInfo.featureTable.ftrL3IACoherency ? 1 : 0; uintptr_t maxAddr = hwInfo.capabilityTable.gpuAddressSpace >= MemoryConstants::max64BitAppAddress ? reinterpret_cast(sysInfo.lpMaximumApplicationAddress) + 1 : 0; EXPECT_CALL(*gmmMem, configureDeviceAddressSpace(adapterHandle, deviceHandle, wddm->getGdi()->escape.mFunc, maxAddr, FtrL3IACoherency)) .Times(1) .WillRepeatedly(::testing::Return(true)); wddm->init(); } TEST_F(Wddm20InstrumentationTest, configureDeviceAddressSpaceNoAdapter) { auto gdi = std::make_unique(); wddm->resetGdi(gdi.release()); EXPECT_CALL(*gmmMem, configureDeviceAddressSpace(static_cast(0), ::testing::_, ::testing::_, ::testing::_, ::testing::_)) .Times(0); auto ret = wddm->configureDeviceAddressSpace(); EXPECT_FALSE(ret); } TEST_F(Wddm20InstrumentationTest, configureDeviceAddressSpaceNoDevice) { wddm->device = static_cast(0); EXPECT_CALL(*gmmMem, configureDeviceAddressSpace(::testing::_, static_cast(0), ::testing::_, ::testing::_, ::testing::_)) .Times(0); auto ret = wddm->configureDeviceAddressSpace(); EXPECT_FALSE(ret); } TEST_F(Wddm20InstrumentationTest, configureDeviceAddressSpaceNoEscFunc) { wddm->getGdi()->escape = static_cast(nullptr); EXPECT_CALL(*gmmMem, configureDeviceAddressSpace(::testing::_, ::testing::_, static_cast(nullptr), ::testing::_, ::testing::_)) .Times(0); auto ret = wddm->configureDeviceAddressSpace(); EXPECT_FALSE(ret); } TEST_F(Wddm20Tests, getMaxApplicationAddress) { uint64_t maxAddr = wddm->getMaxApplicationAddress(); if (is32bit) { EXPECT_EQ(maxAddr, MemoryConstants::max32BitAppAddress); } else { EXPECT_EQ(maxAddr, MemoryConstants::max64BitAppAddress); } } TEST_F(Wddm20WithMockGdiDllTestsWithoutWddmInit, givenUseNoRingFlushesKmdModeDebugFlagToFalseWhenCreateContextIsCalledThenNoRingFlushesKmdModeIsSetToFalse) { DebugManagerStateRestore dbgRestore; DebugManager.flags.UseNoRingFlushesKmdMode.set(false); init(); auto createContextParams = this->getCreateContextDataFcn(); auto privateData = (CREATECONTEXT_PVTDATA *)createContextParams->pPrivateDriverData; EXPECT_FALSE(!!privateData->NoRingFlushes); } TEST_F(Wddm20WithMockGdiDllTestsWithoutWddmInit, givenCreateContextCallWhenDriverHintsItPointsToOpenCL) { init(); auto createContextParams = this->getCreateContextDataFcn(); EXPECT_EQ(D3DKMT_CLIENTHINT_OPENCL, createContextParams->ClientHint); } TEST_F(Wddm20WithMockGdiDllTestsWithoutWddmInit, givenUseNoRingFlushesKmdModeDebugFlagToTrueWhenCreateContextIsCalledThenNoRingFlushesKmdModeIsSetToTrue) { DebugManagerStateRestore dbgRestore; DebugManager.flags.UseNoRingFlushesKmdMode.set(true); init(); auto createContextParams = this->getCreateContextDataFcn(); auto privateData = (CREATECONTEXT_PVTDATA *)createContextParams->pPrivateDriverData; EXPECT_TRUE(!!privateData->NoRingFlushes); } TEST_F(Wddm20WithMockGdiDllTestsWithoutWddmInit, givenEngineTypeWhenCreatingContextThenPassCorrectNodeOrdinal) { init(); auto createContextParams = this->getCreateContextDataFcn(); UINT expected = WddmEngineMapper::engineNodeMap(HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0]); EXPECT_EQ(expected, createContextParams->NodeOrdinal); } TEST_F(Wddm20WithMockGdiDllTests, whenCreateContextIsCalledThenDisableHwQueues) { EXPECT_FALSE(wddm->wddmInterface->hwQueuesSupported()); EXPECT_EQ(0u, getCreateContextDataFcn()->Flags.HwQueueSupported); } TEST_F(Wddm20WithMockGdiDllTests, givenDestructionOsContextWinWhenCallingDestroyMonitorFenceThenDoCallGdiDestroy) { auto fenceHandle = osContext->getResidencyController().getMonitoredFence().fenceHandle; osContext.reset(nullptr); EXPECT_EQ(1u, wddmMockInterface->destroyMonitorFenceCalled); EXPECT_EQ(fenceHandle, getDestroySynchronizationObjectDataFcn()->hSyncObject); } TEST_F(Wddm20Tests, whenCreateHwQueueIsCalledThenAlwaysReturnFalse) { EXPECT_FALSE(wddm->wddmInterface->createHwQueue(*osContext.get())); } TEST_F(Wddm20Tests, whenWddmIsInitializedThenGdiDoesntHaveHwQueueDDIs) { EXPECT_EQ(nullptr, wddm->getGdi()->createHwQueue.mFunc); EXPECT_EQ(nullptr, wddm->getGdi()->destroyHwQueue.mFunc); EXPECT_EQ(nullptr, wddm->getGdi()->submitCommandToHwQueue.mFunc); } TEST(DebugFlagTest, givenDebugManagerWhenGetForUseNoRingFlushesKmdModeIsCalledThenTrueIsReturned) { EXPECT_TRUE(DebugManager.flags.UseNoRingFlushesKmdMode.get()); } TEST_F(Wddm20Tests, makeResidentMultipleHandles) { D3DKMT_HANDLE handles[2] = {ALLOCATION_HANDLE, ALLOCATION_HANDLE}; gdi->getMakeResidentArg().NumAllocations = 0; gdi->getMakeResidentArg().AllocationList = nullptr; bool error = wddm->makeResident(handles, 2, false, nullptr, 0x1000); EXPECT_TRUE(error); EXPECT_EQ(2u, gdi->getMakeResidentArg().NumAllocations); EXPECT_EQ(handles, gdi->getMakeResidentArg().AllocationList); } TEST_F(Wddm20Tests, makeResidentMultipleHandlesWithReturnBytesToTrim) { D3DKMT_HANDLE handles[2] = {ALLOCATION_HANDLE, ALLOCATION_HANDLE}; gdi->getMakeResidentArg().NumAllocations = 0; gdi->getMakeResidentArg().AllocationList = nullptr; gdi->getMakeResidentArg().NumBytesToTrim = 30; uint64_t bytesToTrim = 0; bool success = wddm->makeResident(handles, 2, false, &bytesToTrim, 0x1000); EXPECT_TRUE(success); EXPECT_EQ(gdi->getMakeResidentArg().NumBytesToTrim, bytesToTrim); } TEST_F(Wddm20Tests, makeNonResidentCallsEvict) { D3DKMT_HANDLE handle = (D3DKMT_HANDLE)0x1234; gdi->getEvictArg().AllocationList = nullptr; gdi->getEvictArg().Flags.Value = 0; gdi->getEvictArg().hDevice = 0; gdi->getEvictArg().NumAllocations = 0; gdi->getEvictArg().NumBytesToTrim = 20; uint64_t sizeToTrim = 10; wddm->evict(&handle, 1, sizeToTrim); EXPECT_EQ(1u, gdi->getEvictArg().NumAllocations); EXPECT_EQ(&handle, gdi->getEvictArg().AllocationList); EXPECT_EQ(wddm->getDevice(), gdi->getEvictArg().hDevice); EXPECT_EQ(0u, gdi->getEvictArg().NumBytesToTrim); } TEST_F(Wddm20Tests, givenDestroyAllocationWhenItIsCalledThenAllocationIsPassedToDestroyAllocation) { MockWddmAllocation allocation; allocation.getResidencyData().updateCompletionData(10, osContext->getContextId()); allocation.handle = ALLOCATION_HANDLE; *osContext->getResidencyController().getMonitoredFence().cpuAddress = 10; gdi->getWaitFromCpuArg().FenceValueArray = nullptr; gdi->getWaitFromCpuArg().Flags.Value = 0; gdi->getWaitFromCpuArg().hDevice = (D3DKMT_HANDLE)0; gdi->getWaitFromCpuArg().ObjectCount = 0; gdi->getWaitFromCpuArg().ObjectHandleArray = nullptr; gdi->getDestroyArg().AllocationCount = 0; gdi->getDestroyArg().Flags.Value = 0; gdi->getDestroyArg().hDevice = (D3DKMT_HANDLE)0; gdi->getDestroyArg().hResource = (D3DKMT_HANDLE)0; gdi->getDestroyArg().phAllocationList = nullptr; wddm->destroyAllocation(&allocation, osContext.get()); EXPECT_EQ(wddm->getDevice(), gdi->getDestroyArg().hDevice); EXPECT_EQ(1u, gdi->getDestroyArg().AllocationCount); EXPECT_NE(nullptr, gdi->getDestroyArg().phAllocationList); } TEST_F(Wddm20Tests, WhenLastFenceLessEqualThanMonitoredThenWaitFromCpuIsNotCalled) { MockWddmAllocation allocation; allocation.getResidencyData().updateCompletionData(10, osContext->getContextId()); allocation.handle = ALLOCATION_HANDLE; *osContext->getResidencyController().getMonitoredFence().cpuAddress = 10; gdi->getWaitFromCpuArg().FenceValueArray = nullptr; gdi->getWaitFromCpuArg().Flags.Value = 0; gdi->getWaitFromCpuArg().hDevice = (D3DKMT_HANDLE)0; gdi->getWaitFromCpuArg().ObjectCount = 0; gdi->getWaitFromCpuArg().ObjectHandleArray = nullptr; auto status = wddm->waitFromCpu(10, osContext->getResidencyController().getMonitoredFence()); EXPECT_TRUE(status); EXPECT_EQ(nullptr, gdi->getWaitFromCpuArg().FenceValueArray); EXPECT_EQ((D3DKMT_HANDLE)0, gdi->getWaitFromCpuArg().hDevice); EXPECT_EQ(0u, gdi->getWaitFromCpuArg().ObjectCount); EXPECT_EQ(nullptr, gdi->getWaitFromCpuArg().ObjectHandleArray); } TEST_F(Wddm20Tests, WhenLastFenceGreaterThanMonitoredThenWaitFromCpuIsCalled) { MockWddmAllocation allocation; allocation.getResidencyData().updateCompletionData(10, osContext->getContextId()); allocation.handle = ALLOCATION_HANDLE; *osContext->getResidencyController().getMonitoredFence().cpuAddress = 10; gdi->getWaitFromCpuArg().FenceValueArray = nullptr; gdi->getWaitFromCpuArg().Flags.Value = 0; gdi->getWaitFromCpuArg().hDevice = (D3DKMT_HANDLE)0; gdi->getWaitFromCpuArg().ObjectCount = 0; gdi->getWaitFromCpuArg().ObjectHandleArray = nullptr; auto status = wddm->waitFromCpu(20, osContext->getResidencyController().getMonitoredFence()); EXPECT_TRUE(status); EXPECT_NE(nullptr, gdi->getWaitFromCpuArg().FenceValueArray); EXPECT_EQ((D3DKMT_HANDLE)wddm->getDevice(), gdi->getWaitFromCpuArg().hDevice); EXPECT_EQ(1u, gdi->getWaitFromCpuArg().ObjectCount); EXPECT_NE(nullptr, gdi->getWaitFromCpuArg().ObjectHandleArray); } TEST_F(Wddm20Tests, createMonitoredFenceIsInitializedWithFenceValueZeroAndCurrentFenceValueIsSetToOne) { gdi->createSynchronizationObject2 = gdi->createSynchronizationObject2Mock; gdi->getCreateSynchronizationObject2Arg().Info.MonitoredFence.InitialFenceValue = 300; wddm->wddmInterface->createMonitoredFence(*osContext); EXPECT_EQ(0u, gdi->getCreateSynchronizationObject2Arg().Info.MonitoredFence.InitialFenceValue); EXPECT_EQ(1u, osContext->getResidencyController().getMonitoredFence().currentFenceValue); } NTSTATUS APIENTRY queryResourceInfoMock(D3DKMT_QUERYRESOURCEINFO *pData) { pData->NumAllocations = 0; return 0; } TEST_F(Wddm20Tests, givenOpenSharedHandleWhenZeroAllocationsThenReturnNull) { D3DKMT_HANDLE handle = 0; WddmAllocation *alloc = nullptr; gdi->queryResourceInfo = reinterpret_cast(queryResourceInfoMock); auto ret = wddm->openSharedHandle(handle, alloc); EXPECT_EQ(false, ret); } TEST_F(Wddm20Tests, whenCreateAllocation64kFailsThenReturnFalse) { struct FailingCreateAllocation { static NTSTATUS APIENTRY mockCreateAllocation(D3DKMT_CREATEALLOCATION *param) { return STATUS_GRAPHICS_NO_VIDEO_MEMORY; }; }; gdi->createAllocation = FailingCreateAllocation::mockCreateAllocation; void *fakePtr = reinterpret_cast(0x123); auto gmm = std::make_unique(rootDeviceEnvironemnt->getGmmClientContext(), fakePtr, 100, false); WddmAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, fakePtr, 100, nullptr, MemoryPool::MemoryNull); allocation.setDefaultGmm(gmm.get()); EXPECT_FALSE(wddm->createAllocation64k(&allocation)); } TEST_F(Wddm20Tests, givenReadOnlyMemoryWhenCreateAllocationFailsWithNoVideoMemoryThenCorrectStatusIsReturned) { class MockCreateAllocation { public: static NTSTATUS APIENTRY mockCreateAllocation(D3DKMT_CREATEALLOCATION *param) { return STATUS_GRAPHICS_NO_VIDEO_MEMORY; }; }; gdi->createAllocation = MockCreateAllocation::mockCreateAllocation; OsHandleStorage handleStorage; OsHandle handle = {0}; ResidencyData residency; handleStorage.fragmentCount = 1; handleStorage.fragmentStorageData[0].cpuPtr = (void *)0x1000; handleStorage.fragmentStorageData[0].fragmentSize = 0x1000; handleStorage.fragmentStorageData[0].freeTheFragment = false; handleStorage.fragmentStorageData[0].osHandleStorage = &handle; handleStorage.fragmentStorageData[0].residency = &residency; handleStorage.fragmentStorageData[0].osHandleStorage->gmm = GmmHelperFunctions::getGmm(nullptr, 0); NTSTATUS result = wddm->createAllocationsAndMapGpuVa(handleStorage); EXPECT_EQ(STATUS_GRAPHICS_NO_VIDEO_MEMORY, result); delete handleStorage.fragmentStorageData[0].osHandleStorage->gmm; } TEST_F(Wddm20Tests, whenContextIsInitializedThenApplyAdditionalContextFlagsIsCalled) { auto result = wddm->init(); EXPECT_TRUE(result); EXPECT_EQ(1u, wddm->applyAdditionalContextFlagsResult.called); } TEST_F(Wddm20Tests, givenTrimCallbackRegistrationIsDisabledInDebugVariableWhenRegisteringCallbackThenReturnNullptr) { DebugManagerStateRestore stateRestore; DebugManager.flags.DoNotRegisterTrimCallback.set(true); WddmResidencyController residencyController{*wddm, 0u}; EXPECT_EQ(nullptr, wddm->registerTrimCallback([](D3DKMT_TRIMNOTIFICATION *) {}, residencyController)); } TEST_F(Wddm20Tests, givenSuccessWhenRegisteringTrimCallbackThenReturnTrimCallbackHandle) { WddmResidencyController residencyController{*wddm, 0u}; auto trimCallbackHandle = wddm->registerTrimCallback([](D3DKMT_TRIMNOTIFICATION *) {}, residencyController); EXPECT_NE(nullptr, trimCallbackHandle); } TEST_F(Wddm20Tests, givenCorrectArgumentsWhenUnregisteringTrimCallbackThenPassArgumentsToGdiCall) { PFND3DKMT_TRIMNOTIFICATIONCALLBACK callback = [](D3DKMT_TRIMNOTIFICATION *) {}; auto trimCallbackHandle = reinterpret_cast(0x9876); wddm->unregisterTrimCallback(callback, trimCallbackHandle); EXPECT_EQ(callback, gdi->getUnregisterTrimNotificationArg().Callback); EXPECT_EQ(trimCallbackHandle, gdi->getUnregisterTrimNotificationArg().Handle); } TEST_F(Wddm20Tests, givenNullTrimCallbackHandleWhenUnregisteringTrimCallbackThenDoNotDoGdiCall) { PFND3DKMT_TRIMNOTIFICATIONCALLBACK callbackBefore = [](D3DKMT_TRIMNOTIFICATION *) {}; auto trimCallbackHandleBefore = reinterpret_cast(0x9876); gdi->getUnregisterTrimNotificationArg().Callback = callbackBefore; gdi->getUnregisterTrimNotificationArg().Handle = trimCallbackHandleBefore; wddm->unregisterTrimCallback([](D3DKMT_TRIMNOTIFICATION *) {}, nullptr); EXPECT_EQ(callbackBefore, gdi->getUnregisterTrimNotificationArg().Callback); EXPECT_EQ(trimCallbackHandleBefore, gdi->getUnregisterTrimNotificationArg().Handle); } using WddmLockWithMakeResidentTests = Wddm20Tests; TEST_F(WddmLockWithMakeResidentTests, givenAllocationThatDoesntNeedMakeResidentBeforeLockWhenLockThenDontStoreItOrCallMakeResident) { EXPECT_TRUE(mockTemporaryResources->resourceHandles.empty()); EXPECT_EQ(0u, wddm->makeResidentResult.called); wddm->lockResource(ALLOCATION_HANDLE, false, 0x1000); EXPECT_TRUE(mockTemporaryResources->resourceHandles.empty()); EXPECT_EQ(0u, wddm->makeResidentResult.called); wddm->unlockResource(ALLOCATION_HANDLE); } TEST_F(WddmLockWithMakeResidentTests, givenAllocationThatNeedsMakeResidentBeforeLockWhenLockThenCallBlockingMakeResident) { wddm->lockResource(ALLOCATION_HANDLE, true, 0x1000); EXPECT_EQ(1u, mockTemporaryResources->makeResidentResult.called); } TEST_F(WddmLockWithMakeResidentTests, givenAllocationWhenApplyBlockingMakeResidentThenAcquireUniqueLock) { wddm->temporaryResources->makeResidentResource(ALLOCATION_HANDLE, 0x1000); EXPECT_EQ(1u, mockTemporaryResources->acquireLockResult.called); EXPECT_EQ(reinterpret_cast(&mockTemporaryResources->resourcesLock), mockTemporaryResources->acquireLockResult.uint64ParamPassed); } TEST_F(WddmLockWithMakeResidentTests, givenAllocationWhenApplyBlockingMakeResidentThenCallMakeResidentAndStoreAllocation) { wddm->temporaryResources->makeResidentResource(ALLOCATION_HANDLE, 0x1000); EXPECT_EQ(1u, wddm->makeResidentResult.called); EXPECT_EQ(ALLOCATION_HANDLE, mockTemporaryResources->resourceHandles.back()); } TEST_F(WddmLockWithMakeResidentTests, givenAllocationWhenApplyBlockingMakeResidentThenWaitForCurrentPagingFenceValue) { wddm->mockPagingFence = 0u; wddm->temporaryResources->makeResidentResource(ALLOCATION_HANDLE, 0x1000); UINT64 expectedCallNumber = NEO::residencyLoggingAvailable ? MockGdi::pagingFenceReturnValue + 1 : 0ull; EXPECT_EQ(1u, wddm->makeResidentResult.called); EXPECT_EQ(MockGdi::pagingFenceReturnValue + 1, wddm->mockPagingFence); EXPECT_EQ(expectedCallNumber, wddm->getPagingFenceAddressResult.called); } TEST_F(WddmLockWithMakeResidentTests, givenAllocationWhenApplyBlockingMakeResidentAndMakeResidentCallFailsThenEvictTemporaryResourcesAndRetry) { MockWddmAllocation allocation; allocation.handle = 0x3; GmockWddm gmockWddm(*executionEnvironment->rootDeviceEnvironments[0].get()); auto mockTemporaryResources = reinterpret_cast(gmockWddm.temporaryResources.get()); EXPECT_CALL(gmockWddm, makeResident(&allocation.handle, ::testing::_, ::testing::_, ::testing::_, ::testing::_)).Times(2).WillRepeatedly(::testing::Return(false)); gmockWddm.temporaryResources->makeResidentResource(allocation.handle, 0x1000); EXPECT_EQ(1u, mockTemporaryResources->evictAllResourcesResult.called); } TEST_F(WddmLockWithMakeResidentTests, whenApplyBlockingMakeResidentAndTemporaryResourcesAreEvictedSuccessfullyThenCallMakeResidentOneMoreTime) { MockWddmAllocation allocation; allocation.handle = 0x3; GmockWddm gmockWddm(*executionEnvironment->rootDeviceEnvironments[0].get()); auto mockTemporaryResources = reinterpret_cast(gmockWddm.temporaryResources.get()); mockTemporaryResources->resourceHandles.push_back(allocation.handle); EXPECT_CALL(gmockWddm, evict(::testing::_, ::testing::_, ::testing::_)).Times(1).WillRepeatedly(::testing::Return(true)); EXPECT_CALL(gmockWddm, makeResident(&allocation.handle, ::testing::_, ::testing::_, ::testing::_, ::testing::_)).Times(3).WillRepeatedly(::testing::Return(false)); gmockWddm.temporaryResources->makeResidentResource(allocation.handle, 0x1000); EXPECT_EQ(2u, mockTemporaryResources->evictAllResourcesResult.called); } TEST_F(WddmLockWithMakeResidentTests, whenApplyBlockingMakeResidentAndMakeResidentStillFailsThenDontStoreTemporaryResource) { MockWddmAllocation allocation; allocation.handle = 0x2; GmockWddm gmockWddm(*executionEnvironment->rootDeviceEnvironments[0].get()); auto mockTemporaryResources = reinterpret_cast(gmockWddm.temporaryResources.get()); mockTemporaryResources->resourceHandles.push_back(0x1); EXPECT_CALL(gmockWddm, evict(::testing::_, ::testing::_, ::testing::_)).Times(1).WillRepeatedly(::testing::Return(true)); EXPECT_CALL(gmockWddm, makeResident(&allocation.handle, ::testing::_, ::testing::_, ::testing::_, ::testing::_)).Times(3).WillRepeatedly(::testing::Return(false)); EXPECT_EQ(1u, mockTemporaryResources->resourceHandles.size()); gmockWddm.temporaryResources->makeResidentResource(allocation.handle, 0x1000); EXPECT_EQ(0u, mockTemporaryResources->resourceHandles.size()); } TEST_F(WddmLockWithMakeResidentTests, whenApplyBlockingMakeResidentAndMakeResidentPassesAfterEvictThenStoreTemporaryResource) { MockWddmAllocation allocation; allocation.handle = 0x2; GmockWddm gmockWddm(*executionEnvironment->rootDeviceEnvironments[0].get()); auto mockTemporaryResources = reinterpret_cast(gmockWddm.temporaryResources.get()); mockTemporaryResources->resourceHandles.push_back(0x1); EXPECT_CALL(gmockWddm, evict(::testing::_, ::testing::_, ::testing::_)).Times(1).WillRepeatedly(::testing::Return(true)); EXPECT_CALL(gmockWddm, makeResident(&allocation.handle, ::testing::_, ::testing::_, ::testing::_, ::testing::_)).Times(2).WillOnce(::testing::Return(false)).WillOnce(::testing::Return(true)); EXPECT_EQ(1u, mockTemporaryResources->resourceHandles.size()); gmockWddm.temporaryResources->makeResidentResource(allocation.handle, 0x1000); EXPECT_EQ(1u, mockTemporaryResources->resourceHandles.size()); EXPECT_EQ(0x2, mockTemporaryResources->resourceHandles.back()); } TEST_F(WddmLockWithMakeResidentTests, whenApplyBlockingMakeResidentAndMakeResidentPassesThenStoreTemporaryResource) { MockWddmAllocation allocation; allocation.handle = 0x2; GmockWddm gmockWddm(*executionEnvironment->rootDeviceEnvironments[0].get()); auto mockTemporaryResources = reinterpret_cast(gmockWddm.temporaryResources.get()); mockTemporaryResources->resourceHandles.push_back(0x1); EXPECT_CALL(gmockWddm, makeResident(&allocation.handle, ::testing::_, ::testing::_, ::testing::_, ::testing::_)).Times(1).WillOnce(::testing::Return(true)); gmockWddm.temporaryResources->makeResidentResource(allocation.handle, 0x1000); EXPECT_EQ(2u, mockTemporaryResources->resourceHandles.size()); EXPECT_EQ(0x2, mockTemporaryResources->resourceHandles.back()); } TEST_F(WddmLockWithMakeResidentTests, givenNoTemporaryResourcesWhenEvictingAllTemporaryResourcesThenEvictionIsNotApplied) { wddm->getTemporaryResourcesContainer()->evictAllResources(); EXPECT_EQ(MemoryOperationsStatus::MEMORY_NOT_FOUND, mockTemporaryResources->evictAllResourcesResult.operationSuccess); } TEST_F(WddmLockWithMakeResidentTests, whenEvictingAllTemporaryResourcesThenAcquireTemporaryResourcesLock) { wddm->getTemporaryResourcesContainer()->evictAllResources(); EXPECT_EQ(1u, mockTemporaryResources->acquireLockResult.called); EXPECT_EQ(reinterpret_cast(&mockTemporaryResources->resourcesLock), mockTemporaryResources->acquireLockResult.uint64ParamPassed); } TEST_F(WddmLockWithMakeResidentTests, whenEvictingAllTemporaryResourcesAndAllEvictionsSucceedThenReturnSuccess) { MockWddmAllocation allocation; GmockWddm gmockWddm(*executionEnvironment->rootDeviceEnvironments[0].get()); auto mockTemporaryResources = reinterpret_cast(gmockWddm.temporaryResources.get()); mockTemporaryResources->resourceHandles.push_back(allocation.handle); EXPECT_CALL(gmockWddm, evict(::testing::_, ::testing::_, ::testing::_)).Times(1).WillOnce(::testing::Return(true)); gmockWddm.getTemporaryResourcesContainer()->evictAllResources(); EXPECT_EQ(1u, mockTemporaryResources->evictAllResourcesResult.called); EXPECT_EQ(MemoryOperationsStatus::SUCCESS, mockTemporaryResources->evictAllResourcesResult.operationSuccess); } TEST_F(WddmLockWithMakeResidentTests, givenThreeAllocationsWhenEvictingAllTemporaryResourcesThenCallEvictForEachAllocationAndCleanList) { GmockWddm gmockWddm(*executionEnvironment->rootDeviceEnvironments[0].get()); auto mockTemporaryResources = reinterpret_cast(gmockWddm.temporaryResources.get()); constexpr uint32_t numAllocations = 3u; for (auto i = 0u; i < numAllocations; i++) { mockTemporaryResources->resourceHandles.push_back(i); } EXPECT_CALL(gmockWddm, evict(::testing::_, ::testing::_, ::testing::_)).Times(1).WillRepeatedly(::testing::Return(true)); gmockWddm.getTemporaryResourcesContainer()->evictAllResources(); EXPECT_TRUE(mockTemporaryResources->resourceHandles.empty()); } TEST_F(WddmLockWithMakeResidentTests, givenThreeAllocationsWhenEvictingAllTemporaryResourcesAndOneOfThemFailsThenReturnFail) { GmockWddm gmockWddm(*executionEnvironment->rootDeviceEnvironments[0].get()); auto mockTemporaryResources = reinterpret_cast(gmockWddm.temporaryResources.get()); constexpr uint32_t numAllocations = 3u; for (auto i = 0u; i < numAllocations; i++) { mockTemporaryResources->resourceHandles.push_back(i); } EXPECT_CALL(gmockWddm, evict(::testing::_, ::testing::_, ::testing::_)).Times(1).WillOnce(::testing::Return(false)); gmockWddm.getTemporaryResourcesContainer()->evictAllResources(); EXPECT_EQ(MemoryOperationsStatus::FAILED, mockTemporaryResources->evictAllResourcesResult.operationSuccess); } TEST_F(WddmLockWithMakeResidentTests, givenNoTemporaryResourcesWhenEvictingTemporaryResourceThenEvictionIsNotApplied) { wddm->getTemporaryResourcesContainer()->evictResource(ALLOCATION_HANDLE); EXPECT_EQ(MemoryOperationsStatus::MEMORY_NOT_FOUND, mockTemporaryResources->evictResourceResult.operationSuccess); } TEST_F(WddmLockWithMakeResidentTests, whenEvictingTemporaryResourceThenAcquireTemporaryResourcesLock) { wddm->getTemporaryResourcesContainer()->evictResource(ALLOCATION_HANDLE); EXPECT_EQ(1u, mockTemporaryResources->acquireLockResult.called); EXPECT_EQ(reinterpret_cast(&mockTemporaryResources->resourcesLock), mockTemporaryResources->acquireLockResult.uint64ParamPassed); } TEST_F(WddmLockWithMakeResidentTests, whenEvictingNonExistingTemporaryResourceThenEvictIsNotAppliedAndTemporaryResourcesAreRestored) { mockTemporaryResources->resourceHandles.push_back(ALLOCATION_HANDLE); EXPECT_FALSE(mockTemporaryResources->resourceHandles.empty()); wddm->getTemporaryResourcesContainer()->evictResource(ALLOCATION_HANDLE + 1); EXPECT_FALSE(mockTemporaryResources->resourceHandles.empty()); EXPECT_EQ(MemoryOperationsStatus::MEMORY_NOT_FOUND, mockTemporaryResources->evictResourceResult.operationSuccess); } TEST_F(WddmLockWithMakeResidentTests, whenEvictingTemporaryResourceAndEvictFailsThenReturnFail) { GmockWddm gmockWddm(*executionEnvironment->rootDeviceEnvironments[0].get()); auto mockTemporaryResources = reinterpret_cast(gmockWddm.temporaryResources.get()); mockTemporaryResources->resourceHandles.push_back(ALLOCATION_HANDLE); EXPECT_CALL(gmockWddm, evict(::testing::_, ::testing::_, ::testing::_)).Times(1).WillOnce(::testing::Return(false)); gmockWddm.getTemporaryResourcesContainer()->evictResource(ALLOCATION_HANDLE); EXPECT_TRUE(mockTemporaryResources->resourceHandles.empty()); EXPECT_EQ(MemoryOperationsStatus::FAILED, mockTemporaryResources->evictResourceResult.operationSuccess); } TEST_F(WddmLockWithMakeResidentTests, whenEvictingTemporaryResourceAndEvictSucceedThenReturnSuccess) { GmockWddm gmockWddm(*executionEnvironment->rootDeviceEnvironments[0].get()); auto mockTemporaryResources = reinterpret_cast(gmockWddm.temporaryResources.get()); mockTemporaryResources->resourceHandles.push_back(ALLOCATION_HANDLE); EXPECT_CALL(gmockWddm, evict(::testing::_, ::testing::_, ::testing::_)).Times(1).WillOnce(::testing::Return(true)); gmockWddm.getTemporaryResourcesContainer()->evictResource(ALLOCATION_HANDLE); EXPECT_TRUE(mockTemporaryResources->resourceHandles.empty()); EXPECT_EQ(MemoryOperationsStatus::SUCCESS, mockTemporaryResources->evictResourceResult.operationSuccess); } TEST_F(WddmLockWithMakeResidentTests, whenEvictingTemporaryResourceThenOtherResourcesRemainOnTheList) { mockTemporaryResources->resourceHandles.push_back(0x1); mockTemporaryResources->resourceHandles.push_back(0x2); mockTemporaryResources->resourceHandles.push_back(0x3); wddm->getTemporaryResourcesContainer()->evictResource(0x2); EXPECT_EQ(2u, mockTemporaryResources->resourceHandles.size()); EXPECT_EQ(0x1, mockTemporaryResources->resourceHandles.front()); EXPECT_EQ(0x3, mockTemporaryResources->resourceHandles.back()); } TEST_F(WddmLockWithMakeResidentTests, whenAlllocationNeedsBlockingMakeResidentBeforeLockThenLockWithBlockingMakeResident) { WddmMemoryManager memoryManager(*executionEnvironment); MockWddmAllocation allocation; allocation.needsMakeResidentBeforeLock = false; memoryManager.lockResource(&allocation); EXPECT_EQ(1u, wddm->lockResult.called); EXPECT_EQ(0u, wddm->lockResult.uint64ParamPassed); memoryManager.unlockResource(&allocation); allocation.needsMakeResidentBeforeLock = true; memoryManager.lockResource(&allocation); EXPECT_EQ(2u, wddm->lockResult.called); EXPECT_EQ(1u, wddm->lockResult.uint64ParamPassed); memoryManager.unlockResource(&allocation); } using WddmGfxPartitionTest = Wddm20Tests; TEST_F(WddmGfxPartitionTest, initGfxPartition) { MockGfxPartition gfxPartition; for (auto heap : MockGfxPartition::allHeapNames) { ASSERT_FALSE(gfxPartition.heapInitialized(heap)); } wddm->initGfxPartition(gfxPartition, 0, 1); for (auto heap : MockGfxPartition::allHeapNames) { if (!gfxPartition.heapInitialized(heap)) { EXPECT_TRUE(heap == HeapIndex::HEAP_SVM || heap == HeapIndex::HEAP_EXTENDED); } else { EXPECT_TRUE(gfxPartition.heapInitialized(heap)); } } } TEST(WddmGfxPartitionTests, initGfxPartitionHeapStandard64KBSplit) { struct MockWddm : public Wddm { using Wddm::gfxPartition; MockWddm(RootDeviceEnvironment &rootDeviceEnvironment) : Wddm(std::move(OSInterface::discoverDevices(rootDeviceEnvironment.executionEnvironment)[0]), rootDeviceEnvironment) {} }; MockWddm wddm(*platform()->peekExecutionEnvironment()->rootDeviceEnvironments[0].get()); uint32_t rootDeviceIndex = 3; size_t numRootDevices = 5; MockGfxPartition gfxPartition; wddm.init(); wddm.initGfxPartition(gfxPartition, rootDeviceIndex, numRootDevices); auto heapStandard64KBSize = alignDown((wddm.gfxPartition.Standard64KB.Limit - wddm.gfxPartition.Standard64KB.Base + 1) / numRootDevices, GfxPartition::heapGranularity); EXPECT_EQ(heapStandard64KBSize, gfxPartition.getHeapSize(HeapIndex::HEAP_STANDARD64KB)); EXPECT_EQ(wddm.gfxPartition.Standard64KB.Base + rootDeviceIndex * heapStandard64KBSize, gfxPartition.getHeapBase(HeapIndex::HEAP_STANDARD64KB)); } TEST_F(Wddm20Tests, givenWddmWhenDiscoverDevicesAndForceDeviceIdIsTheSameAsTheExistingDeviceThenReturnTheAdapter) { DebugManagerStateRestore stateRestore; DebugManager.flags.ForceDeviceId.set("1234"); // Existing device Id ExecutionEnvironment executionEnvironment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_EQ(1u, hwDeviceIds.size()); EXPECT_NE(nullptr, hwDeviceIds[0].get()); } TEST_F(WddmTest, WhenFeatureFlagHwQueueIsDisabledThenReturnWddm20Version) { wddm->featureTable->ftrWddmHwQueues = 0; EXPECT_EQ(WddmVersion::WDDM_2_0, wddm->getWddmVersion()); } TEST_F(WddmTest, WhenFeatureFlagHwQueueIsEnabledThenReturnWddm23Version) { wddm->featureTable->ftrWddmHwQueues = 1; EXPECT_EQ(WddmVersion::WDDM_2_3, wddm->getWddmVersion()); } TEST_F(Wddm20WithMockGdiDllTests, GivenCreationSucceedWhenCreatingSeparateMonitorFenceThenReturnFilledStructure) { MonitoredFence monitorFence = {0}; bool ret = wddmMockInterface->createMonitoredFence(monitorFence); EXPECT_TRUE(ret); EXPECT_EQ(4u, monitorFence.fenceHandle); EXPECT_EQ(getMonitorFenceCpuFenceAddressFcn(), monitorFence.cpuAddress); } TEST_F(Wddm20WithMockGdiDllTests, GivenCreationFailWhenCreatingSeparateMonitorFenceThenReturnNotFilledStructure) { MonitoredFence monitorFence = {0}; *getCreateSynchronizationObject2FailCallFcn() = true; bool ret = wddmMockInterface->createMonitoredFence(monitorFence); EXPECT_FALSE(ret); EXPECT_EQ(0u, monitorFence.fenceHandle); void *retAddress = reinterpret_cast(0); EXPECT_EQ(retAddress, monitorFence.cpuAddress); } TEST_F(Wddm20WithMockGdiDllTests, WhenDestroyingSeparateMonitorFenceThenExpectGdiCalled) { MonitoredFence monitorFence = {0}; monitorFence.fenceHandle = 10u; wddmMockInterface->destroyMonitorFence(monitorFence); EXPECT_EQ(monitorFence.fenceHandle, getDestroySynchronizationObjectDataFcn()->hSyncObject); } namespace NEO { long __stdcall notifyAubCapture(void *csrHandle, uint64_t gfxAddress, size_t gfxSize, bool allocate); } TEST_F(Wddm20WithMockGdiDllTests, whenSetDeviceInfoSucceedsThenDeviceCallbacksArePassedToGmmMemory) { GMM_DEVICE_CALLBACKS_INT expectedDeviceCb{}; wddm->init(); auto gdi = wddm->getGdi(); auto gmmMemory = static_cast(wddm->getGmmMemory()); expectedDeviceCb.Adapter.KmtHandle = wddm->getAdapter(); expectedDeviceCb.hDevice.KmtHandle = wddm->getDevice(); expectedDeviceCb.hCsr = nullptr; expectedDeviceCb.PagingQueue = wddm->getPagingQueue(); expectedDeviceCb.PagingFence = wddm->getPagingQueueSyncObject(); expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnAllocate = gdi->createAllocation; expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnDeallocate = gdi->destroyAllocation; expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnMapGPUVA = gdi->mapGpuVirtualAddress; expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnMakeResident = gdi->makeResident; expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnEvict = gdi->evict; expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnReserveGPUVA = gdi->reserveGpuVirtualAddress; expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnUpdateGPUVA = gdi->updateGpuVirtualAddress; expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnWaitFromCpu = gdi->waitForSynchronizationObjectFromCpu; expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnLock = gdi->lock2; expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnUnLock = gdi->unlock2; expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnEscape = gdi->escape; expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnFreeGPUVA = gdi->freeGpuVirtualAddress; expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnNotifyAubCapture = notifyAubCapture; EXPECT_EQ(expectedDeviceCb.Adapter.KmtHandle, gmmMemory->deviceCallbacks.Adapter.KmtHandle); EXPECT_EQ(expectedDeviceCb.hDevice.KmtHandle, gmmMemory->deviceCallbacks.hDevice.KmtHandle); EXPECT_EQ(expectedDeviceCb.hCsr, gmmMemory->deviceCallbacks.hCsr); EXPECT_EQ(expectedDeviceCb.PagingQueue, gmmMemory->deviceCallbacks.PagingQueue); EXPECT_EQ(expectedDeviceCb.PagingFence, gmmMemory->deviceCallbacks.PagingFence); EXPECT_EQ(expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnAllocate, gmmMemory->deviceCallbacks.DevCbPtrs.KmtCbPtrs.pfnAllocate); EXPECT_EQ(expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnDeallocate, gmmMemory->deviceCallbacks.DevCbPtrs.KmtCbPtrs.pfnDeallocate); EXPECT_EQ(expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnMapGPUVA, gmmMemory->deviceCallbacks.DevCbPtrs.KmtCbPtrs.pfnMapGPUVA); EXPECT_EQ(expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnMakeResident, gmmMemory->deviceCallbacks.DevCbPtrs.KmtCbPtrs.pfnMakeResident); EXPECT_EQ(expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnEvict, gmmMemory->deviceCallbacks.DevCbPtrs.KmtCbPtrs.pfnEvict); EXPECT_EQ(expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnReserveGPUVA, gmmMemory->deviceCallbacks.DevCbPtrs.KmtCbPtrs.pfnReserveGPUVA); EXPECT_EQ(expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnUpdateGPUVA, gmmMemory->deviceCallbacks.DevCbPtrs.KmtCbPtrs.pfnUpdateGPUVA); EXPECT_EQ(expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnWaitFromCpu, gmmMemory->deviceCallbacks.DevCbPtrs.KmtCbPtrs.pfnWaitFromCpu); EXPECT_EQ(expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnLock, gmmMemory->deviceCallbacks.DevCbPtrs.KmtCbPtrs.pfnLock); EXPECT_EQ(expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnUnLock, gmmMemory->deviceCallbacks.DevCbPtrs.KmtCbPtrs.pfnUnLock); EXPECT_EQ(expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnEscape, gmmMemory->deviceCallbacks.DevCbPtrs.KmtCbPtrs.pfnEscape); EXPECT_EQ(expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnFreeGPUVA, gmmMemory->deviceCallbacks.DevCbPtrs.KmtCbPtrs.pfnFreeGPUVA); EXPECT_EQ(expectedDeviceCb.DevCbPtrs.KmtCbPtrs.pfnNotifyAubCapture, gmmMemory->deviceCallbacks.DevCbPtrs.KmtCbPtrs.pfnNotifyAubCapture); } TEST_F(Wddm20WithMockGdiDllTests, whenSetDeviceInfoFailsThenDeviceIsNotConfigured) { auto gmockGmmMemory = new ::testing::NiceMock(rootDeviceEnvironment->getGmmClientContext()); ON_CALL(*gmockGmmMemory, setDeviceInfo(::testing::_)) .WillByDefault(::testing::Return(false)); EXPECT_CALL(*gmockGmmMemory, configureDeviceAddressSpace(::testing::_, ::testing::_, ::testing::_, ::testing::_, ::testing::_)) .Times(0); wddm->gmmMemory.reset(gmockGmmMemory); wddm->init(); } HWTEST_F(Wddm20WithMockGdiDllTests, givenNonGen12LPPlatformWhenConfigureDeviceAddressSpaceThenDontObtainMinAddress) { if (defaultHwInfo->platform.eRenderCoreFamily == IGFX_GEN12LP_CORE) { GTEST_SKIP(); } auto gmmMemory = new ::testing::NiceMock(rootDeviceEnvironment->getGmmClientContext()); wddm->gmmMemory.reset(gmmMemory); ON_CALL(*gmmMemory, configureDeviceAddressSpace(::testing::_, ::testing::_, ::testing::_, ::testing::_, ::testing::_)) .WillByDefault(::testing::Return(true)); EXPECT_CALL(*gmmMemory, getInternalGpuVaRangeLimit()) .Times(0); wddm->init(); EXPECT_EQ(NEO::windowsMinAddress, wddm->getWddmMinAddress()); } struct GdiWithMockedCloseFunc : public Gdi { GdiWithMockedCloseFunc() : Gdi() { closeAdapter = mockCloseAdapter; GdiWithMockedCloseFunc::closeAdapterCalled = 0u; GdiWithMockedCloseFunc::closeAdapterCalledArgPassed = 0u; } static NTSTATUS __stdcall mockCloseAdapter(IN CONST D3DKMT_CLOSEADAPTER *adapter) { closeAdapterCalled++; closeAdapterCalledArgPassed = adapter->hAdapter; return STATUS_SUCCESS; } static uint32_t closeAdapterCalled; static D3DKMT_HANDLE closeAdapterCalledArgPassed; }; uint32_t GdiWithMockedCloseFunc::closeAdapterCalled; D3DKMT_HANDLE GdiWithMockedCloseFunc::closeAdapterCalledArgPassed; TEST(HwDeviceId, whenHwDeviceIdIsDestroyedThenAdapterIsClosed) { auto gdi = std::make_unique(); auto osEnv = std::make_unique(); osEnv->gdi.reset(gdi.release()); D3DKMT_HANDLE adapter = 0x1234; { HwDeviceId hwDeviceId{adapter, {}, osEnv.get()}; } EXPECT_EQ(1u, GdiWithMockedCloseFunc::closeAdapterCalled); EXPECT_EQ(adapter, GdiWithMockedCloseFunc::closeAdapterCalledArgPassed); } TEST_F(WddmTest, WhenResidencyLoggingEnabledThenExpectLoggerCreated) { NEO::ResLog::mockFopenCalled = 0; NEO::ResLog::mockVfptrinfCalled = 0; NEO::ResLog::mockFcloseCalled = 0; DebugManagerStateRestore dbgRestore; DebugManager.flags.WddmResidencyLogger.set(true); wddm->createPagingFenceLogger(); EXPECT_NE(nullptr, wddm->residencyLogger.get()); wddm->residencyLogger.reset(); if (NEO::residencyLoggingAvailable) { EXPECT_EQ(1u, NEO::ResLog::mockFopenCalled); EXPECT_EQ(1u, NEO::ResLog::mockVfptrinfCalled); EXPECT_EQ(1u, NEO::ResLog::mockFcloseCalled); } } TEST_F(WddmTest, GivenResidencyLoggingEnabledWhenMakeResidentSuccessThenExpectSizeRapport) { if (!NEO::residencyLoggingAvailable) { GTEST_SKIP(); } NEO::ResLog::mockFopenCalled = 0; NEO::ResLog::mockVfptrinfCalled = 0; NEO::ResLog::mockFcloseCalled = 0; DebugManagerStateRestore dbgRestore; DebugManager.flags.WddmResidencyLogger.set(true); wddm->callBaseCreatePagingLogger = false; wddm->createPagingFenceLogger(); EXPECT_NE(nullptr, wddm->residencyLogger.get()); auto logger = static_cast(wddm->residencyLogger.get()); D3DKMT_HANDLE handle = ALLOCATION_HANDLE; uint64_t bytesToTrim = 0; wddm->makeResident(&handle, 1, false, &bytesToTrim, 0x1000); //2 - one for open log, second for allocation size EXPECT_EQ(2u, NEO::ResLog::mockVfptrinfCalled); EXPECT_TRUE(logger->makeResidentCall); EXPECT_EQ(MockGdi::pagingFenceReturnValue, logger->makeResidentPagingFence); } TEST_F(WddmTest, GivenResidencyLoggingEnabledWhenMakeResidentFailThenExpectTrimReport) { if (!NEO::residencyLoggingAvailable) { GTEST_SKIP(); } NEO::ResLog::mockFopenCalled = 0; NEO::ResLog::mockVfptrinfCalled = 0; NEO::ResLog::mockFcloseCalled = 0; DebugManagerStateRestore dbgRestore; DebugManager.flags.WddmResidencyLogger.set(true); wddm->callBaseCreatePagingLogger = false; wddm->createPagingFenceLogger(); EXPECT_NE(nullptr, wddm->residencyLogger.get()); auto logger = static_cast(wddm->residencyLogger.get()); D3DKMT_HANDLE handle = INVALID_HANDLE; uint64_t bytesToTrim = 0; wddm->makeResident(&handle, 1, false, &bytesToTrim, 0x1000); //3 - one for open log, second for report allocations, 3rd for trim size EXPECT_EQ(3u, NEO::ResLog::mockVfptrinfCalled); EXPECT_FALSE(logger->makeResidentCall); } TEST_F(WddmTest, GivenResidencyLoggingEnabledWhenEnterWaitCalledThenExpectInternalFlagOn) { if (!NEO::residencyLoggingAvailable) { GTEST_SKIP(); } NEO::ResLog::mockFopenCalled = 0; NEO::ResLog::mockVfptrinfCalled = 0; NEO::ResLog::mockFcloseCalled = 0; DebugManagerStateRestore dbgRestore; DebugManager.flags.WddmResidencyLogger.set(true); wddm->callBaseCreatePagingLogger = false; wddm->createPagingFenceLogger(); EXPECT_NE(nullptr, wddm->residencyLogger.get()); auto logger = static_cast(wddm->residencyLogger.get()); logger->enteredWait(); EXPECT_TRUE(logger->enterWait); } TEST_F(WddmTest, GivenResidencyLoggingEnabledWhenMakeResidentAndWaitPagingThenExpectFlagsOff) { if (!NEO::residencyLoggingAvailable) { GTEST_SKIP(); } NEO::ResLog::mockFopenCalled = 0; NEO::ResLog::mockVfptrinfCalled = 0; NEO::ResLog::mockFcloseCalled = 0; DebugManagerStateRestore dbgRestore; DebugManager.flags.WddmResidencyLogger.set(true); wddm->callBaseCreatePagingLogger = false; wddm->createPagingFenceLogger(); EXPECT_NE(nullptr, wddm->residencyLogger.get()); auto logger = static_cast(wddm->residencyLogger.get()); D3DKMT_HANDLE handle = ALLOCATION_HANDLE; uint64_t bytesToTrim = 0; wddm->makeResident(&handle, 1, false, &bytesToTrim, 0x1000); //2 - one for open log, second for allocation size EXPECT_EQ(2u, NEO::ResLog::mockVfptrinfCalled); EXPECT_TRUE(logger->makeResidentCall); EXPECT_EQ(MockGdi::pagingFenceReturnValue, logger->makeResidentPagingFence); logger->enterWait = true; wddm->waitOnPagingFenceFromCpu(); EXPECT_EQ(5u, NEO::ResLog::mockVfptrinfCalled); EXPECT_FALSE(logger->makeResidentCall); EXPECT_FALSE(logger->enterWait); EXPECT_EQ(MockGdi::pagingFenceReturnValue, logger->startWaitPagingFenceSave); } TEST(DiscoverDevices, whenDriverInfoHasIncompatibleDriverStoreThenHwDeviceIdIsNotCreated) { class MockRegistryReader : public SettingsReader { public: std::string getSetting(const char *settingName, const std::string &value) override { std::string key(settingName); if (key == "DriverStorePathForComputeRuntime") { return driverStorePath; } return value; } bool getSetting(const char *settingName, bool defaultValue) override { return defaultValue; }; int32_t getSetting(const char *settingName, int32_t defaultValue) override { return defaultValue; }; const char *appSpecificLocation(const std::string &name) override { return name.c_str(); }; std::string driverStorePath = "driverStore\\0x8086"; }; VariableBackup createFuncBackup{&DriverInfoWindows::createRegistryReaderFunc}; DriverInfoWindows::createRegistryReaderFunc = [](const std::string &) -> std::unique_ptr { return std::make_unique(); }; VariableBackup currentLibraryPathBackup(&SysCalls::currentLibraryPath); currentLibraryPathBackup = L"driverStore\\different_driverStore\\myLib.dll"; ExecutionEnvironment executionEnvironment; auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); EXPECT_TRUE(hwDeviceIds.empty()); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/wddm23_tests.cpp000066400000000000000000000223111363734646600310120ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/preemption.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/memory_constants.h" #include "shared/source/os_interface/windows/gdi_interface.h" #include "shared/source/os_interface/windows/os_context_win.h" #include "shared/source/os_interface/windows/os_interface.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_wddm.h" #include "opencl/test/unit_test/mocks/mock_wddm_interface23.h" #include "opencl/test/unit_test/os_interface/windows/gdi_dll_fixture.h" #include "test.h" using namespace NEO; struct Wddm23TestsWithoutWddmInit : public ::testing::Test, GdiDllFixture { void SetUp() override { GdiDllFixture::SetUp(); executionEnvironment = platform()->peekExecutionEnvironment(); wddm = static_cast(Wddm::createWddm(nullptr, *executionEnvironment->rootDeviceEnvironments[0].get())); osInterface = std::make_unique(); osInterface->get()->setWddm(wddm); wddm->featureTable->ftrWddmHwQueues = true; wddmMockInterface = new WddmMockInterface23(*wddm); wddm->wddmInterface.reset(wddmMockInterface); wddm->registryReader.reset(new RegistryReaderMock()); } void init() { auto preemptionMode = PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo); wddmMockInterface = static_cast(wddm->wddmInterface.release()); wddm->init(); wddm->wddmInterface.reset(wddmMockInterface); osContext = std::make_unique(*wddm, 0u, 1, HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0], preemptionMode, false, false, false); } void TearDown() override { GdiDllFixture::TearDown(); } std::unique_ptr osInterface; std::unique_ptr osContext; WddmMock *wddm = nullptr; WddmMockInterface23 *wddmMockInterface = nullptr; ExecutionEnvironment *executionEnvironment; }; struct Wddm23Tests : public Wddm23TestsWithoutWddmInit { using Wddm23TestsWithoutWddmInit::TearDown; void SetUp() override { Wddm23TestsWithoutWddmInit::SetUp(); init(); } }; TEST_F(Wddm23Tests, whenGetDedicatedVideoMemoryIsCalledThenCorrectValueIsReturned) { EXPECT_EQ(wddm->dedicatedVideoMemory, wddm->getDedicatedVideoMemory()); } TEST_F(Wddm23Tests, whenCreateContextIsCalledThenEnableHwQueues) { EXPECT_TRUE(wddm->wddmInterface->hwQueuesSupported()); EXPECT_EQ(1u, getCreateContextDataFcn()->Flags.HwQueueSupported); } TEST_F(Wddm23Tests, givenPreemptionModeWhenCreateHwQueueCalledThenSetGpuTimeoutIfEnabled) { auto defaultEngine = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*defaultHwInfo)[0]; OsContextWin osContextWithoutPreemption(*osInterface->get()->getWddm(), 0u, 1, defaultEngine, PreemptionMode::Disabled, false, false, false); OsContextWin osContextWithPreemption(*osInterface->get()->getWddm(), 0u, 1, defaultEngine, PreemptionMode::MidBatch, false, false, false); wddm->wddmInterface->createHwQueue(osContextWithoutPreemption); EXPECT_EQ(0u, getCreateHwQueueDataFcn()->Flags.DisableGpuTimeout); wddm->wddmInterface->createHwQueue(osContextWithPreemption); EXPECT_EQ(1u, getCreateHwQueueDataFcn()->Flags.DisableGpuTimeout); } TEST_F(Wddm23Tests, whenDestroyHwQueueCalledThenPassExistingHandle) { D3DKMT_HANDLE hwQueue = 123; osContext->setHwQueue({hwQueue, 0, nullptr, 0}); wddmMockInterface->destroyHwQueue(osContext->getHwQueue().handle); EXPECT_EQ(hwQueue, getDestroyHwQueueDataFcn()->hHwQueue); hwQueue = 0; osContext->setHwQueue({hwQueue, 0, nullptr, 0}); wddmMockInterface->destroyHwQueue(osContext->getHwQueue().handle); EXPECT_NE(hwQueue, getDestroyHwQueueDataFcn()->hHwQueue); // gdi not called when 0 } TEST_F(Wddm23Tests, whenObjectIsDestructedThenDestroyHwQueue) { D3DKMT_HANDLE hwQueue = 123; osContext->setHwQueue({hwQueue, 0, nullptr, 0}); osContext.reset(); EXPECT_EQ(hwQueue, getDestroyHwQueueDataFcn()->hHwQueue); } TEST_F(Wddm23Tests, givenCmdBufferWhenSubmitCalledThenSetAllRequiredFiledsAndUpdateMonitoredFence) { uint64_t cmdBufferAddress = 123; size_t cmdSize = 456; auto hwQueue = osContext->getHwQueue(); COMMAND_BUFFER_HEADER cmdBufferHeader = {}; EXPECT_EQ(1u, osContext->getResidencyController().getMonitoredFence().currentFenceValue); EXPECT_EQ(0u, osContext->getResidencyController().getMonitoredFence().lastSubmittedFence); WddmSubmitArguments submitArgs = {}; submitArgs.contextHandle = osContext->getWddmContextHandle(); submitArgs.hwQueueHandle = hwQueue.handle; submitArgs.monitorFence = &osContext->getResidencyController().getMonitoredFence(); wddm->submit(cmdBufferAddress, cmdSize, &cmdBufferHeader, submitArgs); EXPECT_EQ(cmdBufferAddress, getSubmitCommandToHwQueueDataFcn()->CommandBuffer); EXPECT_EQ(static_cast(cmdSize), getSubmitCommandToHwQueueDataFcn()->CommandLength); EXPECT_EQ(hwQueue.handle, getSubmitCommandToHwQueueDataFcn()->hHwQueue); EXPECT_EQ(osContext->getResidencyController().getMonitoredFence().lastSubmittedFence, getSubmitCommandToHwQueueDataFcn()->HwQueueProgressFenceId); EXPECT_EQ(&cmdBufferHeader, getSubmitCommandToHwQueueDataFcn()->pPrivateDriverData); EXPECT_EQ(static_cast(MemoryConstants::pageSize), getSubmitCommandToHwQueueDataFcn()->PrivateDriverDataSize); EXPECT_EQ(0u, cmdBufferHeader.MonitorFenceVA); EXPECT_EQ(0u, cmdBufferHeader.MonitorFenceValue); EXPECT_EQ(2u, osContext->getResidencyController().getMonitoredFence().currentFenceValue); EXPECT_EQ(1u, osContext->getResidencyController().getMonitoredFence().lastSubmittedFence); } TEST_F(Wddm23Tests, whenMonitoredFenceIsCreatedThenSetupAllRequiredFields) { wddm->wddmInterface->createMonitoredFence(*osContext); auto hwQueue = osContext->getHwQueue(); EXPECT_EQ(hwQueue.progressFenceCpuVA, osContext->getResidencyController().getMonitoredFence().cpuAddress); EXPECT_EQ(1u, osContext->getResidencyController().getMonitoredFence().currentFenceValue); EXPECT_EQ(hwQueue.progressFenceHandle, osContext->getResidencyController().getMonitoredFence().fenceHandle); EXPECT_EQ(hwQueue.progressFenceGpuVA, osContext->getResidencyController().getMonitoredFence().gpuAddress); EXPECT_EQ(0u, osContext->getResidencyController().getMonitoredFence().lastSubmittedFence); } TEST_F(Wddm23Tests, givenCurrentPendingFenceValueGreaterThanPendingFenceValueWhenSubmitCalledThenCallWaitOnGpu) { uint64_t cmdBufferAddress = 123; size_t cmdSize = 456; COMMAND_BUFFER_HEADER cmdBufferHeader = {}; WddmSubmitArguments submitArgs = {}; submitArgs.contextHandle = osContext->getWddmContextHandle(); submitArgs.hwQueueHandle = osContext->getHwQueue().handle; submitArgs.monitorFence = &osContext->getResidencyController().getMonitoredFence(); *wddm->pagingFenceAddress = 1; wddm->currentPagingFenceValue = 1; wddm->submit(cmdBufferAddress, cmdSize, &cmdBufferHeader, submitArgs); EXPECT_EQ(0u, wddm->waitOnGPUResult.called); wddm->currentPagingFenceValue = 2; wddm->submit(cmdBufferAddress, cmdSize, &cmdBufferHeader, submitArgs); EXPECT_EQ(1u, wddm->waitOnGPUResult.called); } TEST_F(Wddm23Tests, givenDestructionOsContextWinWhenCallingDestroyMonitorFenceThenDoNotCallGdiDestroy) { osContext.reset(nullptr); EXPECT_EQ(1u, wddmMockInterface->destroyMonitorFenceCalled); EXPECT_EQ(0u, getDestroySynchronizationObjectDataFcn()->hSyncObject); } TEST_F(Wddm23TestsWithoutWddmInit, whenInitCalledThenInitializeNewGdiDDIsAndCallToCreateHwQueue) { EXPECT_EQ(nullptr, wddm->getGdi()->createHwQueue.mFunc); EXPECT_EQ(nullptr, wddm->getGdi()->destroyHwQueue.mFunc); EXPECT_EQ(nullptr, wddm->getGdi()->submitCommandToHwQueue.mFunc); init(); EXPECT_EQ(1u, wddmMockInterface->createHwQueueCalled); EXPECT_NE(nullptr, wddm->getGdi()->createHwQueue.mFunc); EXPECT_NE(nullptr, wddm->getGdi()->destroyHwQueue.mFunc); EXPECT_NE(nullptr, wddm->getGdi()->submitCommandToHwQueue.mFunc); } TEST_F(Wddm23TestsWithoutWddmInit, whenCreateHwQueueFailedThenReturnFalseFromInit) { wddmMockInterface->forceCreateHwQueueFail = true; init(); EXPECT_FALSE(osContext->isInitialized()); } TEST_F(Wddm23TestsWithoutWddmInit, givenFailureOnGdiInitializationWhenCreatingHwQueueThenReturnFailure) { struct MyMockGdi : public Gdi { bool setupHwQueueProcAddresses() override { return false; } }; auto myMockGdi = new MyMockGdi(); wddm->resetGdi(myMockGdi); init(); EXPECT_FALSE(osContext->isInitialized()); EXPECT_EQ(1u, wddmMockInterface->createHwQueueCalled); EXPECT_FALSE(wddmMockInterface->createHwQueueResult); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/wddm_address_space_tests.cpp000066400000000000000000000116301363734646600335270ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/preemption.h" #include "shared/source/execution_environment/root_device_environment.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_wddm.h" #include "test.h" using namespace NEO; class WddmMockReserveAddress : public WddmMock { public: WddmMockReserveAddress(RootDeviceEnvironment &rootDeviceEnvironment) : WddmMock(rootDeviceEnvironment) {} void *virtualAlloc(void *inPtr, size_t size, unsigned long flags, unsigned long type) override { if (returnGood != 0) { return WddmMock::virtualAlloc(inPtr, size, flags, type); } if (returnInvalidCount != 0) { returnInvalidIter++; if (returnInvalidIter > returnInvalidCount) { return WddmMock::virtualAlloc(inPtr, size, flags, type); } if (returnNullCount != 0) { returnNullIter++; if (returnNullIter > returnNullCount) { return nullptr; } return reinterpret_cast(0x1000); } return reinterpret_cast(0x1000); } return nullptr; } int virtualFree(void *ptr, size_t size, unsigned long flags) override { if ((ptr == reinterpret_cast(0x1000)) || (ptr == reinterpret_cast(0x0))) { return 1; } return WddmMock::virtualFree(ptr, size, flags); } uint32_t returnGood = 0; uint32_t returnInvalidCount = 0; uint32_t returnInvalidIter = 0; uint32_t returnNullCount = 0; uint32_t returnNullIter = 0; }; TEST(WddmReserveAddressTest, givenWddmWhenFirstIsSuccessfulThenReturnReserveAddress) { MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); std::unique_ptr wddm(new WddmMockReserveAddress(rootDeviceEnvironment)); size_t size = 0x1000; void *reserve = nullptr; wddm->init(); wddm->returnGood = 1; auto ret = wddm->reserveValidAddressRange(size, reserve); uintptr_t expectedReserve = wddm->virtualAllocAddress; EXPECT_TRUE(ret); EXPECT_EQ(expectedReserve, reinterpret_cast(reserve)); wddm->releaseReservedAddress(reserve); } TEST(WddmReserveAddressTest, givenWddmWhenFirstIsNullThenReturnNull) { MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); std::unique_ptr wddm(new WddmMockReserveAddress(rootDeviceEnvironment)); size_t size = 0x1000; void *reserve = nullptr; wddm->init(); uintptr_t expectedReserve = 0; auto ret = wddm->reserveValidAddressRange(size, reserve); EXPECT_FALSE(ret); EXPECT_EQ(expectedReserve, reinterpret_cast(reserve)); } TEST(WddmReserveAddressTest, givenWddmWhenFirstIsInvalidSecondSuccessfulThenReturnSecond) { MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); std::unique_ptr wddm(new WddmMockReserveAddress(rootDeviceEnvironment)); size_t size = 0x1000; void *reserve = nullptr; wddm->init(); wddm->returnInvalidCount = 1; auto ret = wddm->reserveValidAddressRange(size, reserve); uintptr_t expectedReserve = wddm->virtualAllocAddress; EXPECT_TRUE(ret); EXPECT_EQ(expectedReserve, reinterpret_cast(reserve)); wddm->releaseReservedAddress(reserve); } TEST(WddmReserveAddressTest, givenWddmWhenSecondIsInvalidThirdSuccessfulThenReturnThird) { MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); std::unique_ptr wddm(new WddmMockReserveAddress(rootDeviceEnvironment)); size_t size = 0x1000; void *reserve = nullptr; wddm->init(); wddm->returnInvalidCount = 2; auto ret = wddm->reserveValidAddressRange(size, reserve); uintptr_t expectedReserve = wddm->virtualAllocAddress; EXPECT_TRUE(ret); EXPECT_EQ(expectedReserve, reinterpret_cast(reserve)); wddm->releaseReservedAddress(reserve); } TEST(WddmReserveAddressTest, givenWddmWhenFirstIsInvalidSecondNullThenReturnSecondNull) { MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); std::unique_ptr wddm(new WddmMockReserveAddress(rootDeviceEnvironment)); size_t size = 0x1000; void *reserve = nullptr; wddm->init(); wddm->returnInvalidCount = 2; wddm->returnNullCount = 1; uintptr_t expectedReserve = 0; auto ret = wddm->reserveValidAddressRange(size, reserve); EXPECT_FALSE(ret); EXPECT_EQ(expectedReserve, reinterpret_cast(reserve)); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/wddm_calls.cpp000066400000000000000000000015171363734646600306060ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/host_ptr_defines.h" #include "opencl/test/unit_test/mocks/mock_wddm.h" #include "opencl/test/unit_test/os_interface/windows/ult_dxgi_factory.h" namespace NEO { BOOL WINAPI ULTVirtualFree(LPVOID ptr, SIZE_T size, DWORD flags) { return 1; } LPVOID WINAPI ULTVirtualAlloc(LPVOID inPtr, SIZE_T size, DWORD flags, DWORD type) { return reinterpret_cast(virtualAllocAddress); } Wddm::CreateDXGIFactoryFcn getCreateDxgiFactory() { return ULTCreateDXGIFactory; } Wddm::GetSystemInfoFcn getGetSystemInfo() { return ULTGetSystemInfo; } Wddm::VirtualFreeFcn getVirtualFree() { return ULTVirtualFree; } Wddm::VirtualAllocFcn getVirtualAlloc() { return ULTVirtualAlloc; } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/wddm_create.cpp000066400000000000000000000012421363734646600307460ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_wddm.h" #include "opencl/test/unit_test/mocks/mock_wddm_residency_logger_functions.h" namespace NEO { Wddm *Wddm::createWddm(std::unique_ptr hwDeviceId, RootDeviceEnvironment &rootDeviceEnvironment) { return new WddmMock(rootDeviceEnvironment); } namespace ResLog { fopenFuncPtr fopenPtr = &mockFopen; vfprintfFuncPtr vfprintfPtr = &mockVfptrinf; fcloseFuncPtr fclosePtr = &mockFclose; uint32_t mockFopenCalled = 0; uint32_t mockVfptrinfCalled = 0; uint32_t mockFcloseCalled = 0; } // namespace ResLog } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/wddm_fixture.h000066400000000000000000000130051363734646600306360ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/preemption.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/os_interface/windows/gdi_interface.h" #include "shared/source/os_interface/windows/os_context_win.h" #include "shared/source/os_interface/windows/os_environment_win.h" #include "shared/source/os_interface/windows/os_interface.h" #include "shared/source/os_interface/windows/wddm_memory_operations_handler.h" #include "shared/test/unit_test/helpers/default_hw_info.h" #include "shared/test/unit_test/os_interface/windows/mock_gdi_interface.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_wddm.h" #include "opencl/test/unit_test/mocks/mock_wddm_interface20.h" #include "opencl/test/unit_test/mocks/mock_wddm_residency_allocations_container.h" #include "opencl/test/unit_test/os_interface/windows/gdi_dll_fixture.h" #include "test.h" #include "mock_gmm_memory.h" namespace NEO { struct WddmFixture : ::testing::Test { void SetUp() override { executionEnvironment = platform()->peekExecutionEnvironment(); rootDeviceEnvironemnt = executionEnvironment->rootDeviceEnvironments[0].get(); auto osEnvironment = new OsEnvironmentWin(); gdi = new MockGdi(); osEnvironment->gdi.reset(gdi); executionEnvironment->osEnvironment.reset(osEnvironment); wddm = static_cast(Wddm::createWddm(nullptr, *rootDeviceEnvironemnt)); rootDeviceEnvironemnt->osInterface = std::make_unique(); rootDeviceEnvironemnt->osInterface->get()->setWddm(wddm); rootDeviceEnvironemnt->memoryOperationsInterface = std::make_unique(wddm); osInterface = rootDeviceEnvironemnt->osInterface.get(); auto preemptionMode = PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo); wddm->init(); auto hwInfo = rootDeviceEnvironemnt->getHardwareInfo(); auto engine = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*hwInfo)[0]; osContext = std::make_unique(*osInterface->get()->getWddm(), 0u, 1u, engine, preemptionMode, false, false, false); mockTemporaryResources = static_cast(wddm->temporaryResources.get()); } WddmMock *wddm = nullptr; OSInterface *osInterface; ExecutionEnvironment *executionEnvironment; RootDeviceEnvironment *rootDeviceEnvironemnt = nullptr; std::unique_ptr osContext; MockGdi *gdi = nullptr; MockWddmResidentAllocationsContainer *mockTemporaryResources; }; struct WddmFixtureWithMockGdiDll : public GdiDllFixture { void SetUp() override { executionEnvironment = platform()->peekExecutionEnvironment(); rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[0].get(); GdiDllFixture::SetUp(); wddm = static_cast(Wddm::createWddm(nullptr, *rootDeviceEnvironment)); wddmMockInterface = new WddmMockInterface20(*wddm); wddm->wddmInterface.reset(wddmMockInterface); rootDeviceEnvironment->osInterface = std::make_unique(); rootDeviceEnvironment->osInterface->get()->setWddm(wddm); rootDeviceEnvironment->memoryOperationsInterface = std::make_unique(wddm); osInterface = rootDeviceEnvironment->osInterface.get(); } void init() { auto preemptionMode = PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo); wddmMockInterface = static_cast(wddm->wddmInterface.release()); wddm->init(); wddm->wddmInterface.reset(wddmMockInterface); auto hwInfo = rootDeviceEnvironment->getHardwareInfo(); auto engine = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*hwInfo)[0]; osContext = std::make_unique(*osInterface->get()->getWddm(), 0u, 1, engine, preemptionMode, false, false, false); } void TearDown() override { GdiDllFixture::TearDown(); } WddmMock *wddm = nullptr; OSInterface *osInterface; std::unique_ptr osContext; ExecutionEnvironment *executionEnvironment; WddmMockInterface20 *wddmMockInterface = nullptr; RootDeviceEnvironment *rootDeviceEnvironment = nullptr; }; struct WddmInstrumentationGmmFixture { void SetUp() { executionEnvironment = platform()->peekExecutionEnvironment(); auto rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[0].get(); wddm.reset(static_cast(Wddm::createWddm(nullptr, *rootDeviceEnvironment))); gmmMem = new ::testing::NiceMock(rootDeviceEnvironment->getGmmClientContext()); wddm->gmmMemory.reset(gmmMem); } void TearDown() { } std::unique_ptr wddm; GmockGmmMemory *gmmMem = nullptr; ExecutionEnvironment *executionEnvironment; }; using WddmTest = WddmFixture; using WddmTestWithMockGdiDll = Test; using WddmInstrumentationTest = Test; using WddmTestSingle = ::testing::Test; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/wddm_kmdaf_listener_tests.cpp000066400000000000000000000320551363734646600337220ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/preemption.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/windows/os_environment_win.h" #include "shared/source/os_interface/windows/wddm/wddm.h" #include "shared/source/os_interface/windows/wddm_allocation.h" #include "shared/test/unit_test/os_interface/windows/mock_gdi_interface.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mock_gdi/mock_gdi.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/os_interface/windows/mock_kmdaf_listener.h" #include "opencl/test/unit_test/os_interface/windows/mock_wddm_allocation.h" #include "test.h" using namespace NEO; class WddmWithKmDafMock : public Wddm { public: using Wddm::featureTable; using Wddm::mapGpuVirtualAddress; WddmWithKmDafMock(RootDeviceEnvironment &rootDeviceEnvironment) : Wddm(std::make_unique(ADAPTER_HANDLE, LUID{}, rootDeviceEnvironment.executionEnvironment.osEnvironment.get()), rootDeviceEnvironment) { kmDafListener.reset(new KmDafListenerMock); } KmDafListenerMock &getKmDafListenerMock() { return static_cast(*this->kmDafListener); } }; class WddmKmDafListenerTest : public ::testing::Test { public: void SetUp() { executionEnvironment = platform()->peekExecutionEnvironment(); rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[0].get(); auto osEnvironment = new OsEnvironmentWin(); osEnvironment->gdi.reset(new MockGdi()); executionEnvironment->osEnvironment.reset(osEnvironment); wddmWithKmDafMock.reset(new WddmWithKmDafMock(*rootDeviceEnvironment)); wddmWithKmDafMock->init(); wddmWithKmDafMock->featureTable->ftrKmdDaf = true; } void TearDown() { } std::unique_ptr wddmWithKmDafMock; ExecutionEnvironment *executionEnvironment; RootDeviceEnvironment *rootDeviceEnvironment = nullptr; }; TEST_F(WddmKmDafListenerTest, givenWddmWhenLockResourceIsCalledThenKmDafListenerNotifyLockIsFedWithCorrectParams) { wddmWithKmDafMock->lockResource(ALLOCATION_HANDLE, false, 0x1000); EXPECT_EQ(wddmWithKmDafMock->featureTable->ftrKmdDaf, wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.ftrKmdDaf); EXPECT_EQ(wddmWithKmDafMock->getAdapter(), wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.hAdapter); EXPECT_EQ(wddmWithKmDafMock->getDevice(), wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.hDevice); EXPECT_EQ(ALLOCATION_HANDLE, wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.hAllocation); EXPECT_EQ(0, wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.pLockFlags); EXPECT_EQ(wddmWithKmDafMock->getGdi()->escape, wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.pfnEscape); } TEST_F(WddmKmDafListenerTest, givenWddmWhenUnlockResourceIsCalledThenKmDafListenerNotifyUnlockIsFedWithCorrectParams) { wddmWithKmDafMock->unlockResource(ALLOCATION_HANDLE); EXPECT_EQ(wddmWithKmDafMock->featureTable->ftrKmdDaf, wddmWithKmDafMock->getKmDafListenerMock().notifyUnlockParametrization.ftrKmdDaf); EXPECT_EQ(wddmWithKmDafMock->getAdapter(), wddmWithKmDafMock->getKmDafListenerMock().notifyUnlockParametrization.hAdapter); EXPECT_EQ(wddmWithKmDafMock->getDevice(), wddmWithKmDafMock->getKmDafListenerMock().notifyUnlockParametrization.hDevice); EXPECT_EQ(ALLOCATION_HANDLE, *wddmWithKmDafMock->getKmDafListenerMock().notifyUnlockParametrization.phAllocation); EXPECT_EQ(1u, wddmWithKmDafMock->getKmDafListenerMock().notifyUnlockParametrization.allocations); EXPECT_EQ(wddmWithKmDafMock->getGdi()->escape, wddmWithKmDafMock->getKmDafListenerMock().notifyUnlockParametrization.pfnEscape); } TEST_F(WddmKmDafListenerTest, givenWddmWhenMapGpuVirtualAddressIsCalledThenKmDafListenerNotifyMapGpuVAIsFedWithCorrectParams) { uint64_t gpuPtr = 0u; auto gmm = std::make_unique(rootDeviceEnvironment->getGmmClientContext(), nullptr, 1, false); wddmWithKmDafMock->mapGpuVirtualAddress(gmm.get(), ALLOCATION_HANDLE, wddmWithKmDafMock->getGfxPartition().Standard.Base, wddmWithKmDafMock->getGfxPartition().Standard.Limit, 0u, gpuPtr); EXPECT_EQ(wddmWithKmDafMock->featureTable->ftrKmdDaf, wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.ftrKmdDaf); EXPECT_EQ(wddmWithKmDafMock->getAdapter(), wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.hAdapter); EXPECT_EQ(wddmWithKmDafMock->getDevice(), wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.hDevice); EXPECT_EQ(ALLOCATION_HANDLE, wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.hAllocation); EXPECT_EQ(GmmHelper::decanonize(gpuPtr), wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.GpuVirtualAddress); EXPECT_EQ(wddmWithKmDafMock->getGdi()->escape, wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.pfnEscape); } TEST_F(WddmKmDafListenerTest, givenWddmWhenFreeGpuVirtualAddressIsCalledThenKmDafListenerNotifyUnmapGpuVAIsFedWithCorrectParams) { uint64_t gpuPtr = GPUVA; wddmWithKmDafMock->freeGpuVirtualAddress(gpuPtr, MemoryConstants::pageSize); EXPECT_EQ(wddmWithKmDafMock->featureTable->ftrKmdDaf, wddmWithKmDafMock->getKmDafListenerMock().notifyUnmapGpuVAParametrization.ftrKmdDaf); EXPECT_EQ(wddmWithKmDafMock->getAdapter(), wddmWithKmDafMock->getKmDafListenerMock().notifyUnmapGpuVAParametrization.hAdapter); EXPECT_EQ(wddmWithKmDafMock->getDevice(), wddmWithKmDafMock->getKmDafListenerMock().notifyUnmapGpuVAParametrization.hDevice); EXPECT_EQ(GPUVA, wddmWithKmDafMock->getKmDafListenerMock().notifyUnmapGpuVAParametrization.GpuVirtualAddress); EXPECT_EQ(wddmWithKmDafMock->getGdi()->escape, wddmWithKmDafMock->getKmDafListenerMock().notifyUnmapGpuVAParametrization.pfnEscape); } TEST_F(WddmKmDafListenerTest, givenWddmWhenMakeResidentIsCalledThenKmDafListenerNotifyMakeResidentIsFedWithCorrectParams) { MockWddmAllocation allocation; wddmWithKmDafMock->makeResident(&allocation.handle, 1, false, nullptr, 0x1000); EXPECT_EQ(wddmWithKmDafMock->featureTable->ftrKmdDaf, wddmWithKmDafMock->getKmDafListenerMock().notifyMakeResidentParametrization.ftrKmdDaf); EXPECT_EQ(wddmWithKmDafMock->getAdapter(), wddmWithKmDafMock->getKmDafListenerMock().notifyMakeResidentParametrization.hAdapter); EXPECT_EQ(wddmWithKmDafMock->getDevice(), wddmWithKmDafMock->getKmDafListenerMock().notifyMakeResidentParametrization.hDevice); EXPECT_EQ(allocation.handle, *wddmWithKmDafMock->getKmDafListenerMock().notifyMakeResidentParametrization.phAllocation); EXPECT_EQ(1u, wddmWithKmDafMock->getKmDafListenerMock().notifyMakeResidentParametrization.allocations); EXPECT_EQ(wddmWithKmDafMock->getGdi()->escape, wddmWithKmDafMock->getKmDafListenerMock().notifyMakeResidentParametrization.pfnEscape); } TEST_F(WddmKmDafListenerTest, givenWddmWhenEvictIsCalledThenKmDafListenerNotifyEvictIsFedWithCorrectParams) { MockWddmAllocation allocation; uint64_t sizeToTrim; wddmWithKmDafMock->evict(&allocation.handle, 1, sizeToTrim); EXPECT_EQ(wddmWithKmDafMock->featureTable->ftrKmdDaf, wddmWithKmDafMock->getKmDafListenerMock().notifyEvictParametrization.ftrKmdDaf); EXPECT_EQ(wddmWithKmDafMock->getAdapter(), wddmWithKmDafMock->getKmDafListenerMock().notifyEvictParametrization.hAdapter); EXPECT_EQ(wddmWithKmDafMock->getDevice(), wddmWithKmDafMock->getKmDafListenerMock().notifyEvictParametrization.hDevice); EXPECT_EQ(allocation.handle, *wddmWithKmDafMock->getKmDafListenerMock().notifyEvictParametrization.phAllocation); EXPECT_EQ(1u, wddmWithKmDafMock->getKmDafListenerMock().notifyEvictParametrization.allocations); EXPECT_EQ(wddmWithKmDafMock->getGdi()->escape, wddmWithKmDafMock->getKmDafListenerMock().notifyEvictParametrization.pfnEscape); } TEST_F(WddmKmDafListenerTest, givenWddmWhenCreateAllocationIsCalledThenKmDafListenerNotifyWriteTargetIsFedWithCorrectParams) { auto gmm = std::make_unique(rootDeviceEnvironment->getGmmClientContext(), nullptr, 1, false); auto handle = 0u; auto resourceHandle = 0u; auto ptr = reinterpret_cast(0x10000); wddmWithKmDafMock->createAllocation(ptr, gmm.get(), handle, resourceHandle, nullptr); EXPECT_EQ(wddmWithKmDafMock->featureTable->ftrKmdDaf, wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.ftrKmdDaf); EXPECT_EQ(wddmWithKmDafMock->getAdapter(), wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.hAdapter); EXPECT_EQ(wddmWithKmDafMock->getDevice(), wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.hDevice); EXPECT_EQ(handle, wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.hAllocation); EXPECT_EQ(wddmWithKmDafMock->getGdi()->escape, wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.pfnEscape); } TEST_F(WddmKmDafListenerTest, givenWddmWhenCreateAllocation64IsCalledThenKmDafListenerNotifyWriteTargetIsFedWithCorrectParams) { auto gmm = std::make_unique(rootDeviceEnvironment->getGmmClientContext(), nullptr, 1, false); auto handle = 0u; wddmWithKmDafMock->createAllocation64k(gmm.get(), handle); EXPECT_EQ(wddmWithKmDafMock->featureTable->ftrKmdDaf, wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.ftrKmdDaf); EXPECT_EQ(wddmWithKmDafMock->getAdapter(), wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.hAdapter); EXPECT_EQ(wddmWithKmDafMock->getDevice(), wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.hDevice); EXPECT_EQ(handle, wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.hAllocation); EXPECT_EQ(wddmWithKmDafMock->getGdi()->escape, wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.pfnEscape); } TEST_F(WddmKmDafListenerTest, givenWddmWhenCreateAllocationsAndMapGpuVaIsCalledThenKmDafListenerNotifyWriteTargetAndMapGpuVAIsFedWithCorrectParams) { OsHandleStorage storage; OsHandle osHandle = {0}; auto gmm = std::unique_ptr(new Gmm(rootDeviceEnvironment->getGmmClientContext(), nullptr, 1, false)); storage.fragmentStorageData[0].osHandleStorage = &osHandle; storage.fragmentStorageData[0].fragmentSize = 100; storage.fragmentStorageData[0].osHandleStorage->gmm = gmm.get(); wddmWithKmDafMock->createAllocationsAndMapGpuVa(storage); EXPECT_EQ(wddmWithKmDafMock->featureTable->ftrKmdDaf, wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.ftrKmdDaf); EXPECT_EQ(wddmWithKmDafMock->getAdapter(), wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.hAdapter); EXPECT_EQ(wddmWithKmDafMock->getDevice(), wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.hDevice); EXPECT_EQ(osHandle.handle, wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.hAllocation); EXPECT_EQ(wddmWithKmDafMock->getGdi()->escape, wddmWithKmDafMock->getKmDafListenerMock().notifyWriteTargetParametrization.pfnEscape); EXPECT_EQ(wddmWithKmDafMock->featureTable->ftrKmdDaf, wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.ftrKmdDaf); EXPECT_EQ(wddmWithKmDafMock->getAdapter(), wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.hAdapter); EXPECT_EQ(wddmWithKmDafMock->getDevice(), wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.hDevice); EXPECT_EQ(osHandle.handle, wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.hAllocation); EXPECT_EQ(GmmHelper::decanonize(osHandle.gpuPtr), wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.GpuVirtualAddress); EXPECT_EQ(wddmWithKmDafMock->getGdi()->escape, wddmWithKmDafMock->getKmDafListenerMock().notifyMapGpuVAParametrization.pfnEscape); } TEST_F(WddmKmDafListenerTest, givenWddmWhenKmDafLockIsCalledThenKmDafListenerNotifyLockIsFedWithCorrectParams) { wddmWithKmDafMock->kmDafLock(ALLOCATION_HANDLE); EXPECT_EQ(wddmWithKmDafMock->featureTable->ftrKmdDaf, wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.ftrKmdDaf); EXPECT_EQ(wddmWithKmDafMock->getAdapter(), wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.hAdapter); EXPECT_EQ(wddmWithKmDafMock->getDevice(), wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.hDevice); EXPECT_EQ(ALLOCATION_HANDLE, wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.hAllocation); EXPECT_EQ(0, wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.pLockFlags); EXPECT_EQ(wddmWithKmDafMock->getGdi()->escape, wddmWithKmDafMock->getKmDafListenerMock().notifyLockParametrization.pfnEscape); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/wddm_mapper_tests.cpp000066400000000000000000000017321363734646600322150ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/windows/wddm_engine_mapper.h" #include "test.h" using namespace NEO; TEST(WddmMapperTests, givenRcsEngineTypeWhenAskedForNodeOrdinalThenReturn3d) { GPUNODE_ORDINAL rcsNode = WddmEngineMapper::engineNodeMap(aub_stream::ENGINE_RCS); GPUNODE_ORDINAL bcsNode = WddmEngineMapper::engineNodeMap(aub_stream::ENGINE_BCS); GPUNODE_ORDINAL ccsNode = WddmEngineMapper::engineNodeMap(aub_stream::ENGINE_CCS); GPUNODE_ORDINAL expectedRcsNode = GPUNODE_3D; GPUNODE_ORDINAL expectedBcsNode = GPUNODE_BLT; GPUNODE_ORDINAL expectedCcsNode = GPUNODE_CCS0; EXPECT_EQ(expectedRcsNode, rcsNode); EXPECT_EQ(expectedBcsNode, bcsNode); EXPECT_EQ(expectedCcsNode, ccsNode); } TEST(WddmMapperTests, givenNotSupportedEngineWhenAskedForNodeThenAbort) { EXPECT_THROW(WddmEngineMapper::engineNodeMap(aub_stream::ENGINE_VCS), std::exception); } wddm_memory_manager_allocate_in_device_pool_tests.cpp000066400000000000000000000005771363734646600405640ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/os_interface/windows/wddm_memory_manager_allocate_in_device_pool_tests.inl" TEST_F(WddmMemoryManagerSimpleTest, givenWddmMemoryManagerWhenGetLocalMemoryIsCalledThenSizeOfLocalMemoryIsReturned) { EXPECT_EQ(0 * GB, memoryManager->getLocalMemorySize(0u)); } wddm_memory_manager_allocate_in_device_pool_tests.inl000066400000000000000000000020321363734646600405500ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/windows/wddm_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/os_interface/windows/wddm_memory_manager_tests.h" #include "gtest/gtest.h" using namespace NEO; using namespace ::testing; TEST_F(WddmMemoryManagerSimpleTest, givenUseSystemMemorySetToTrueWhenAllocateInDevicePoolIsCalledThenNullptrIsReturned) { memoryManager.reset(new MockWddmMemoryManager(false, false, *executionEnvironment)); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; allocData.size = MemoryConstants::pageSize; allocData.flags.useSystemMemory = true; allocData.flags.allocateMemory = true; auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); EXPECT_EQ(nullptr, allocation); EXPECT_EQ(MemoryManager::AllocationStatus::RetryInNonDevicePool, status); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp000066400000000000000000003064351363734646600337430ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/os_interface/windows/wddm_memory_manager_tests.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/array_count.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/os_library.h" #include "shared/source/os_interface/windows/os_context_win.h" #include "shared/source/os_interface/windows/wddm_residency_controller.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/helpers/ult_hw_config.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/mem_obj/mem_obj_helper.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/helpers/execution_environment_helper.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_deferred_deleter.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_os_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/os_interface/windows/mock_wddm_allocation.h" using namespace NEO; using namespace ::testing; void WddmMemoryManagerFixture::SetUp() { GdiDllFixture::SetUp(); executionEnvironment = platform()->peekExecutionEnvironment(); rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[rootDeviceIndex].get(); wddm = static_cast(Wddm::createWddm(nullptr, *rootDeviceEnvironment)); if (defaultHwInfo->capabilityTable.ftrRenderCompressedBuffers || defaultHwInfo->capabilityTable.ftrRenderCompressedImages) { GMM_TRANSLATIONTABLE_CALLBACKS dummyTTCallbacks = {}; rootDeviceEnvironment->pageTableManager.reset(GmmPageTableMngr::create(nullptr, 0, &dummyTTCallbacks)); } wddm->init(); constexpr uint64_t heap32Base = (is32bit) ? 0x1000 : 0x800000000000; wddm->setHeap32(heap32Base, 1000 * MemoryConstants::pageSize - 1); rootDeviceEnvironment->osInterface = std::make_unique(); rootDeviceEnvironment->osInterface->get()->setWddm(wddm); rootDeviceEnvironment->memoryOperationsInterface = std::make_unique(wddm); memoryManager = std::make_unique(*executionEnvironment); } TEST(ResidencyData, givenNewlyConstructedResidencyDataThenItIsNotResidentOnAnyOsContext) { ResidencyData residencyData; for (auto contextId = 0u; contextId < MemoryManager::maxOsContextCount; contextId++) { EXPECT_EQ(false, residencyData.resident[contextId]); } } TEST(WddmMemoryManager, NonCopyable) { EXPECT_FALSE(std::is_move_constructible::value); EXPECT_FALSE(std::is_copy_constructible::value); } TEST(WddmMemoryManager, NonAssignable) { EXPECT_FALSE(std::is_move_assignable::value); EXPECT_FALSE(std::is_copy_assignable::value); } TEST(WddmAllocationTest, givenAllocationIsTrimCandidateInOneOsContextWhenGettingTrimCandidatePositionThenReturnItsPositionAndUnusedPositionInOtherContexts) { MockWddmAllocation allocation; MockOsContext osContext(1u, 1, aub_stream::ENGINE_RCS, PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo), false, false, false); allocation.setTrimCandidateListPosition(osContext.getContextId(), 700u); EXPECT_EQ(trimListUnusedPosition, allocation.getTrimCandidateListPosition(0u)); EXPECT_EQ(700u, allocation.getTrimCandidateListPosition(1u)); } TEST(WddmAllocationTest, givenAllocationCreatedWithOsContextCountOneWhenItIsCreatedThenMaxOsContextCountIsUsedInstead) { MockWddmAllocation allocation; allocation.setTrimCandidateListPosition(1u, 700u); EXPECT_EQ(700u, allocation.getTrimCandidateListPosition(1u)); EXPECT_EQ(trimListUnusedPosition, allocation.getTrimCandidateListPosition(0u)); } TEST(WddmAllocationTest, givenRequestedContextIdTooLargeWhenGettingTrimCandidateListPositionThenReturnUnusedPosition) { MockWddmAllocation allocation; EXPECT_EQ(trimListUnusedPosition, allocation.getTrimCandidateListPosition(1u)); EXPECT_EQ(trimListUnusedPosition, allocation.getTrimCandidateListPosition(1000u)); } TEST(WddmAllocationTest, givenAllocationTypeWhenPassedToWddmAllocationConstructorThenAllocationTypeIsStored) { WddmAllocation allocation{0, GraphicsAllocation::AllocationType::COMMAND_BUFFER, nullptr, 0, nullptr, MemoryPool::MemoryNull}; EXPECT_EQ(GraphicsAllocation::AllocationType::COMMAND_BUFFER, allocation.getAllocationType()); } TEST(WddmAllocationTest, givenMemoryPoolWhenPassedToWddmAllocationConstructorThenMemoryPoolIsStored) { WddmAllocation allocation{0, GraphicsAllocation::AllocationType::COMMAND_BUFFER, nullptr, 0, nullptr, MemoryPool::System64KBPages}; EXPECT_EQ(MemoryPool::System64KBPages, allocation.getMemoryPool()); WddmAllocation allocation2{0, GraphicsAllocation::AllocationType::COMMAND_BUFFER, nullptr, 0, 0u, MemoryPool::SystemCpuInaccessible}; EXPECT_EQ(MemoryPool::SystemCpuInaccessible, allocation2.getMemoryPool()); } TEST(WddmMemoryManagerExternalHeapTest, externalHeapIsCreatedWithCorrectBase) { HardwareInfo *hwInfo; auto executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); std::unique_ptr wddm(static_cast(Wddm::createWddm(nullptr, *executionEnvironment->rootDeviceEnvironments[0].get()))); wddm->init(); uint64_t base = 0x56000; uint64_t size = 0x9000; wddm->setHeap32(base, size); executionEnvironment->rootDeviceEnvironments[0]->osInterface->get()->setWddm(wddm.release()); std::unique_ptr memoryManager = std::unique_ptr(new WddmMemoryManager(*executionEnvironment)); EXPECT_EQ(base, memoryManager->getExternalHeapBaseAddress(0)); } TEST(WddmMemoryManagerWithDeferredDeleterTest, givenWMMWhenAsyncDeleterIsEnabledAndWaitForDeletionsIsCalledThenDeleterInWddmIsSetToNullptr) { HardwareInfo *hwInfo; auto executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); auto wddm = std::make_unique(*executionEnvironment->rootDeviceEnvironments[0].get()); wddm->init(); executionEnvironment->rootDeviceEnvironments[0]->osInterface->get()->setWddm(wddm.release()); bool actualDeleterFlag = DebugManager.flags.EnableDeferredDeleter.get(); DebugManager.flags.EnableDeferredDeleter.set(true); MockWddmMemoryManager memoryManager(*executionEnvironment); EXPECT_NE(nullptr, memoryManager.getDeferredDeleter()); memoryManager.waitForDeletions(); EXPECT_EQ(nullptr, memoryManager.getDeferredDeleter()); DebugManager.flags.EnableDeferredDeleter.set(actualDeleterFlag); } TEST_F(WddmMemoryManagerSimpleTest, givenMemoryManagerWhenAllocateGraphicsMemoryIsCalledThenMemoryPoolIsSystem4KBPages) { memoryManager.reset(new MockWddmMemoryManager(false, false, *executionEnvironment)); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); EXPECT_TRUE(allocation->getDefaultGmm()->useSystemMemoryPool); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenMemoryManagerWith64KBPagesEnabledWhenAllocateGraphicsMemory64kbIsCalledThenMemoryPoolIsSystem64KBPages) { memoryManager.reset(new MockWddmMemoryManager(false, false, *executionEnvironment)); AllocationData allocationData; allocationData.size = 4096u; auto allocation = memoryManager->allocateGraphicsMemory64kb(allocationData); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::System64KBPages, allocation->getMemoryPool()); EXPECT_TRUE(allocation->getDefaultGmm()->useSystemMemoryPool); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenMemoryManagerWhenAllocateGraphicsMemoryWithPtrIsCalledThenMemoryPoolIsSystem4KBPages) { memoryManager.reset(new MockWddmMemoryManager(false, false, *executionEnvironment)); void *ptr = reinterpret_cast(0x1001); auto size = 4096u; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, size}, ptr); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::System4KBPages, allocation->getMemoryPool()); for (size_t i = 0; i < allocation->fragmentsStorage.fragmentCount; i++) { EXPECT_TRUE(allocation->fragmentsStorage.fragmentStorageData[i].osHandleStorage->gmm->useSystemMemoryPool); } memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenMemoryManagerWhenAllocate32BitGraphicsMemoryWithPtrIsCalledThenMemoryPoolIsSystem4KBPagesWith32BitGpuAddressing) { memoryManager.reset(new MockWddmMemoryManager(false, false, *executionEnvironment)); void *ptr = reinterpret_cast(0x1001); auto size = MemoryConstants::pageSize; auto allocation = memoryManager->allocate32BitGraphicsMemory(csr->getRootDeviceIndex(), size, ptr, GraphicsAllocation::AllocationType::BUFFER); ASSERT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::System4KBPagesWith32BitGpuAddressing, allocation->getMemoryPool()); EXPECT_TRUE(allocation->getDefaultGmm()->useSystemMemoryPool); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenMemoryManagerWith64KBPagesDisabledWhenAllocateGraphicsMemoryForSVMThen4KBGraphicsAllocationIsReturned) { memoryManager.reset(new MockWddmMemoryManager(false, false, *executionEnvironment)); auto size = MemoryConstants::pageSize; auto svmAllocation = memoryManager->allocateGraphicsMemoryWithProperties({csr->getRootDeviceIndex(), size, GraphicsAllocation::AllocationType::SVM_ZERO_COPY}); EXPECT_NE(nullptr, svmAllocation); EXPECT_EQ(MemoryPool::System4KBPages, svmAllocation->getMemoryPool()); EXPECT_TRUE(svmAllocation->getDefaultGmm()->useSystemMemoryPool); memoryManager->freeGraphicsMemory(svmAllocation); } TEST_F(WddmMemoryManagerSimpleTest, givenMemoryManagerWith64KBPagesEnabledWhenAllocateGraphicsMemoryForSVMThenMemoryPoolIsSystem64KBPages) { memoryManager.reset(new MockWddmMemoryManager(true, false, *executionEnvironment)); auto size = MemoryConstants::pageSize; auto svmAllocation = memoryManager->allocateGraphicsMemoryWithProperties({csr->getRootDeviceIndex(), size, GraphicsAllocation::AllocationType::SVM_ZERO_COPY}); EXPECT_NE(nullptr, svmAllocation); EXPECT_EQ(MemoryPool::System64KBPages, svmAllocation->getMemoryPool()); memoryManager->freeGraphicsMemory(svmAllocation); } TEST_F(WddmMemoryManagerSimpleTest, givenMemoryManagerWhenCreateAllocationFromHandleIsCalledThenMemoryPoolIsSystemCpuInaccessible) { memoryManager.reset(new MockWddmMemoryManager(false, false, *executionEnvironment)); auto osHandle = 1u; gdi->getQueryResourceInfoArgOut().NumAllocations = 1; std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), nullptr, 0, false)); D3DDDI_OPENALLOCATIONINFO allocationInfo; allocationInfo.pPrivateDriverData = gmm->gmmResourceInfo->peekHandle(); allocationInfo.hAllocation = ALLOCATION_HANDLE; allocationInfo.PrivateDriverDataSize = sizeof(GMM_RESOURCE_INFO); gdi->getOpenResourceArgOut().pOpenAllocationInfo = &allocationInfo; AllocationProperties properties(0, false, 0, GraphicsAllocation::AllocationType::SHARED_BUFFER, false, false, 0); auto allocation = memoryManager->createGraphicsAllocationFromSharedHandle(osHandle, properties, false); EXPECT_NE(nullptr, allocation); EXPECT_EQ(MemoryPool::SystemCpuInaccessible, allocation->getMemoryPool()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenAllocationPropertiesWhenCreateAllocationFromHandleIsCalledThenCorrectAllocationTypeIsSet) { memoryManager.reset(new MockWddmMemoryManager(false, false, *executionEnvironment)); auto osHandle = 1u; gdi->getQueryResourceInfoArgOut().NumAllocations = 1; std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), nullptr, 0, false)); D3DDDI_OPENALLOCATIONINFO allocationInfo; allocationInfo.pPrivateDriverData = gmm->gmmResourceInfo->peekHandle(); allocationInfo.hAllocation = ALLOCATION_HANDLE; allocationInfo.PrivateDriverDataSize = sizeof(GMM_RESOURCE_INFO); gdi->getOpenResourceArgOut().pOpenAllocationInfo = &allocationInfo; AllocationProperties propertiesBuffer(0, false, 0, GraphicsAllocation::AllocationType::SHARED_BUFFER, false, false, 0); AllocationProperties propertiesImage(0, false, 0, GraphicsAllocation::AllocationType::SHARED_IMAGE, false, false, 0); AllocationProperties *propertiesArray[2] = {&propertiesBuffer, &propertiesImage}; for (auto properties : propertiesArray) { auto allocation = memoryManager->createGraphicsAllocationFromSharedHandle(osHandle, *properties, false); EXPECT_NE(nullptr, allocation); EXPECT_EQ(properties->allocationType, allocation->getAllocationType()); memoryManager->freeGraphicsMemory(allocation); } } TEST_F(WddmMemoryManagerSimpleTest, whenCreateAllocationFromHandleAndMapCallFailsThenFreeGraphicsMemoryIsCalled) { memoryManager.reset(new MockWddmMemoryManager(false, false, *executionEnvironment)); auto osHandle = 1u; gdi->getQueryResourceInfoArgOut().NumAllocations = 1; auto gmm = std::make_unique(rootDeviceEnvironment->getGmmClientContext(), nullptr, 0, false); D3DDDI_OPENALLOCATIONINFO allocationInfo; allocationInfo.pPrivateDriverData = gmm->gmmResourceInfo->peekHandle(); allocationInfo.hAllocation = ALLOCATION_HANDLE; allocationInfo.PrivateDriverDataSize = sizeof(GMM_RESOURCE_INFO); wddm->mapGpuVaStatus = false; wddm->callBaseMapGpuVa = false; gdi->getOpenResourceArgOut().pOpenAllocationInfo = &allocationInfo; EXPECT_EQ(0u, memoryManager->freeGraphicsMemoryImplCalled); AllocationProperties properties(0, false, 0, GraphicsAllocation::AllocationType::SHARED_BUFFER, false, false, 0); auto allocation = memoryManager->createGraphicsAllocationFromSharedHandle(osHandle, properties, false); EXPECT_EQ(nullptr, allocation); EXPECT_EQ(1u, memoryManager->freeGraphicsMemoryImplCalled); } TEST_F(WddmMemoryManagerSimpleTest, givenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledWhenNotAlignedPtrIsPassedThenAlignedGraphicsAllocationIsCreated) { memoryManager.reset(new MockWddmMemoryManager(false, false, *executionEnvironment)); auto size = 13u; auto hostPtr = reinterpret_cast(0x10001); AllocationData allocationData; allocationData.size = size; allocationData.hostPtr = hostPtr; auto allocation = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData); EXPECT_NE(nullptr, allocation); EXPECT_EQ(hostPtr, allocation->getUnderlyingBuffer()); EXPECT_EQ(size, allocation->getUnderlyingBufferSize()); EXPECT_EQ(1u, allocation->getAllocationOffset()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerTest, givenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledWhencreateWddmAllocationFailsThenGraphicsAllocationIsNotCreated) { char hostPtr[64]; memoryManager->setDeferredDeleter(nullptr); setMapGpuVaFailConfigFcn(0, 1); AllocationData allocationData; allocationData.size = sizeof(hostPtr); allocationData.hostPtr = hostPtr; auto allocation = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData); EXPECT_EQ(nullptr, allocation); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, GivenShareableEnabledWhenAskedToCreateGrahicsAllocationThenValidAllocationIsReturned) { memoryManager.reset(new MockWddmMemoryManager(false, false, *executionEnvironment)); AllocationData allocationData; allocationData.size = 4096u; allocationData.flags.shareable = true; auto allocation = memoryManager->allocateShareableMemory(allocationData); EXPECT_NE(nullptr, allocation); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenZeroFenceValueOnSingleEngineRegisteredWhenHandleFenceCompletionIsCalledThenDoNotWaitOnCpu) { ASSERT_EQ(1u, memoryManager->getRegisteredEnginesCount()); auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties({0, 32, GraphicsAllocation::AllocationType::BUFFER})); allocation->getResidencyData().updateCompletionData(0u, 0u); memoryManager->handleFenceCompletion(allocation); EXPECT_EQ(0u, wddm->waitFromCpuResult.called); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenNonZeroFenceValueOnSingleEngineRegisteredWhenHandleFenceCompletionIsCalledThenWaitOnCpuOnce) { ASSERT_EQ(1u, memoryManager->getRegisteredEnginesCount()); auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties({0, 32, GraphicsAllocation::AllocationType::BUFFER})); auto fence = &static_cast(memoryManager->getRegisteredEngines()[0].osContext)->getResidencyController().getMonitoredFence(); allocation->getResidencyData().updateCompletionData(129u, 0u); memoryManager->handleFenceCompletion(allocation); EXPECT_EQ(1u, wddm->waitFromCpuResult.called); EXPECT_EQ(129u, wddm->waitFromCpuResult.uint64ParamPassed); EXPECT_EQ(fence, wddm->waitFromCpuResult.monitoredFence); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenNonZeroFenceValuesOnMultipleEnginesRegisteredWhenHandleFenceCompletionIsCalledThenWaitOnCpuForEachEngine) { executionEnvironment->prepareRootDeviceEnvironments(2u); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } std::unique_ptr csr(createCommandStream(*executionEnvironment, 1u)); auto wddm2 = static_cast(Wddm::createWddm(nullptr, *executionEnvironment->rootDeviceEnvironments[0].get())); wddm2->init(); executionEnvironment->rootDeviceEnvironments[1]->osInterface.reset(new OSInterface()); executionEnvironment->rootDeviceEnvironments[1]->osInterface->get()->setWddm(wddm2); executionEnvironment->rootDeviceEnvironments[1]->memoryOperationsInterface = std::make_unique(wddm2); auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(); memoryManager->createAndRegisterOsContext(csr.get(), HwHelper::get(hwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*hwInfo)[1], 2, PreemptionHelper::getDefaultPreemptionMode(*hwInfo), false, false, false); ASSERT_EQ(2u, memoryManager->getRegisteredEnginesCount()); auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties({0, 32, GraphicsAllocation::AllocationType::BUFFER})); auto lastEngineFence = &static_cast(memoryManager->getRegisteredEngines()[1].osContext)->getResidencyController().getMonitoredFence(); allocation->getResidencyData().updateCompletionData(129u, 0u); allocation->getResidencyData().updateCompletionData(152u, 1u); memoryManager->handleFenceCompletion(allocation); EXPECT_EQ(1u, wddm->waitFromCpuResult.called); EXPECT_EQ(1u, wddm2->waitFromCpuResult.called); EXPECT_EQ(129u, wddm->waitFromCpuResult.uint64ParamPassed); EXPECT_EQ(152u, wddm2->waitFromCpuResult.uint64ParamPassed); EXPECT_EQ(lastEngineFence, wddm2->waitFromCpuResult.monitoredFence); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenNonZeroFenceValueOnSomeOfMultipleEnginesRegisteredWhenHandleFenceCompletionIsCalledThenWaitOnCpuForTheseEngines) { executionEnvironment->prepareRootDeviceEnvironments(2u); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } std::unique_ptr csr(createCommandStream(*executionEnvironment, 1u)); auto wddm2 = static_cast(Wddm::createWddm(nullptr, *executionEnvironment->rootDeviceEnvironments[0].get())); wddm2->init(); executionEnvironment->rootDeviceEnvironments[1]->osInterface.reset(new OSInterface()); executionEnvironment->rootDeviceEnvironments[1]->osInterface->get()->setWddm(wddm2); executionEnvironment->rootDeviceEnvironments[1]->memoryOperationsInterface = std::make_unique(wddm2); auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(); memoryManager->createAndRegisterOsContext(csr.get(), HwHelper::get(hwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*hwInfo)[1], 2, PreemptionHelper::getDefaultPreemptionMode(*hwInfo), false, false, false); ASSERT_EQ(2u, memoryManager->getRegisteredEnginesCount()); auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties({0, 32, GraphicsAllocation::AllocationType::BUFFER})); auto lastEngineFence = &static_cast(memoryManager->getRegisteredEngines()[0].osContext)->getResidencyController().getMonitoredFence(); allocation->getResidencyData().updateCompletionData(129u, 0u); allocation->getResidencyData().updateCompletionData(0, 1u); memoryManager->handleFenceCompletion(allocation); EXPECT_EQ(1u, wddm->waitFromCpuResult.called); EXPECT_EQ(129, wddm->waitFromCpuResult.uint64ParamPassed); EXPECT_EQ(lastEngineFence, wddm->waitFromCpuResult.monitoredFence); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerTest, givenDefaultWddmMemoryManagerWhenAskedForVirtualPaddingSupportThenFalseIsReturned) { EXPECT_FALSE(memoryManager->peekVirtualPaddingSupport()); } TEST_F(WddmMemoryManagerTest, GivenGraphicsAllocationWhenAddAndRemoveAllocationToHostPtrManagerThenfragmentHasCorrectValues) { void *cpuPtr = (void *)0x30000; size_t size = 0x1000; uint64_t gpuPtr = 0x123; MockWddmAllocation gfxAllocation; gfxAllocation.cpuPtr = cpuPtr; gfxAllocation.size = size; gfxAllocation.gpuPtr = gpuPtr; memoryManager->addAllocationToHostPtrManager(&gfxAllocation); auto fragment = memoryManager->getHostPtrManager()->getFragment(gfxAllocation.getUnderlyingBuffer()); EXPECT_NE(fragment, nullptr); EXPECT_TRUE(fragment->driverAllocation); EXPECT_EQ(fragment->refCount, 1); EXPECT_EQ(fragment->fragmentCpuPointer, cpuPtr); EXPECT_EQ(fragment->fragmentSize, size); EXPECT_NE(fragment->osInternalStorage, nullptr); EXPECT_EQ(fragment->osInternalStorage->gmm, gfxAllocation.getDefaultGmm()); EXPECT_EQ(fragment->osInternalStorage->gpuPtr, gpuPtr); EXPECT_EQ(fragment->osInternalStorage->handle, gfxAllocation.handle); EXPECT_NE(fragment->residency, nullptr); FragmentStorage fragmentStorage = {}; fragmentStorage.fragmentCpuPointer = cpuPtr; memoryManager->getHostPtrManager()->storeFragment(fragmentStorage); fragment = memoryManager->getHostPtrManager()->getFragment(gfxAllocation.getUnderlyingBuffer()); EXPECT_EQ(fragment->refCount, 2); fragment->driverAllocation = false; memoryManager->removeAllocationFromHostPtrManager(&gfxAllocation); fragment = memoryManager->getHostPtrManager()->getFragment(gfxAllocation.getUnderlyingBuffer()); EXPECT_EQ(fragment->refCount, 2); fragment->driverAllocation = true; memoryManager->removeAllocationFromHostPtrManager(&gfxAllocation); fragment = memoryManager->getHostPtrManager()->getFragment(gfxAllocation.getUnderlyingBuffer()); EXPECT_EQ(fragment->refCount, 1); memoryManager->removeAllocationFromHostPtrManager(&gfxAllocation); fragment = memoryManager->getHostPtrManager()->getFragment(gfxAllocation.getUnderlyingBuffer()); EXPECT_EQ(fragment, nullptr); } TEST_F(WddmMemoryManagerTest, AllocateGpuMemHostPtr) { // three pages void *ptr = alignedMalloc(3 * 4096, 4096); ASSERT_NE(nullptr, ptr); auto *gpuAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, false, MemoryConstants::pageSize}, ptr); // Should be same cpu ptr and gpu ptr EXPECT_EQ(ptr, gpuAllocation->getUnderlyingBuffer()); memoryManager->freeGraphicsMemory(gpuAllocation); alignedFree(ptr); } TEST_F(WddmMemoryManagerTest, givenDefaultMemoryManagerWhenAllocateWithSizeIsCalledThenSharedHandleIsZero) { auto *gpuAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, MemoryConstants::pageSize}); auto wddmAllocation = static_cast(gpuAllocation); EXPECT_EQ(0u, wddmAllocation->peekSharedHandle()); memoryManager->freeGraphicsMemory(gpuAllocation); } TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenCreateFromSharedHandleIsCalledThenNonNullGraphicsAllocationIsReturned) { auto osHandle = 1u; void *pSysMem = reinterpret_cast(0x1000); std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), pSysMem, 4096u, false)); setSizesFcn(gmm->gmmResourceInfo.get(), 1u, 1024u, 1u); AllocationProperties properties(0, false, 4096u, GraphicsAllocation::AllocationType::SHARED_BUFFER, false, false, 0); auto *gpuAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(osHandle, properties, false); auto wddmAlloc = static_cast(gpuAllocation); ASSERT_NE(nullptr, gpuAllocation); EXPECT_EQ(RESOURCE_HANDLE, wddmAlloc->resourceHandle); EXPECT_EQ(ALLOCATION_HANDLE, wddmAlloc->getDefaultHandle()); memoryManager->freeGraphicsMemory(gpuAllocation); } TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenCreateFromNTHandleIsCalledThenNonNullGraphicsAllocationIsReturned) { void *pSysMem = reinterpret_cast(0x1000); std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), pSysMem, 4096u, false)); setSizesFcn(gmm->gmmResourceInfo.get(), 1u, 1024u, 1u); auto *gpuAllocation = memoryManager->createGraphicsAllocationFromNTHandle(reinterpret_cast(1), 0); auto wddmAlloc = static_cast(gpuAllocation); ASSERT_NE(nullptr, gpuAllocation); EXPECT_EQ(NT_RESOURCE_HANDLE, wddmAlloc->resourceHandle); EXPECT_EQ(NT_ALLOCATION_HANDLE, wddmAlloc->getDefaultHandle()); EXPECT_EQ(GraphicsAllocation::AllocationType::SHARED_IMAGE, wddmAlloc->getAllocationType()); memoryManager->freeGraphicsMemory(gpuAllocation); } TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenLockUnlockIsCalledThenReturnPtr) { auto alloc = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, MemoryConstants::pageSize}); auto ptr = memoryManager->lockResource(alloc); EXPECT_NE(nullptr, ptr); EXPECT_EQ(1u, wddm->lockResult.called); EXPECT_TRUE(wddm->lockResult.success); memoryManager->unlockResource(alloc); EXPECT_EQ(1u, wddm->unlockResult.called); EXPECT_TRUE(wddm->unlockResult.success); memoryManager->freeGraphicsMemory(alloc); } TEST_F(WddmMemoryManagerTest, createAllocationFromSharedHandleReturns32BitAllocWhenForce32bitAddressingIsSetAndRequireSpecificBitnessIsTrue) { auto osHandle = 1u; void *pSysMem = reinterpret_cast(0x1000); std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), pSysMem, 4096u, false)); setSizesFcn(gmm->gmmResourceInfo.get(), 1u, 1024u, 1u); memoryManager->setForce32BitAllocations(true); AllocationProperties properties(0, false, 4096u, GraphicsAllocation::AllocationType::SHARED_BUFFER, false, false, 0); auto *gpuAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(osHandle, properties, true); ASSERT_NE(nullptr, gpuAllocation); if (is64bit) { EXPECT_TRUE(gpuAllocation->is32BitAllocation()); uint64_t base = memoryManager->getExternalHeapBaseAddress(gpuAllocation->getRootDeviceIndex()); EXPECT_EQ(GmmHelper::canonize(base), gpuAllocation->getGpuBaseAddress()); } memoryManager->freeGraphicsMemory(gpuAllocation); } TEST_F(WddmMemoryManagerTest, createAllocationFromSharedHandleDoesNotReturn32BitAllocWhenForce32bitAddressingIsSetAndRequireSpecificBitnessIsFalse) { auto osHandle = 1u; void *pSysMem = reinterpret_cast(0x1000); std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), pSysMem, 4096u, false)); setSizesFcn(gmm->gmmResourceInfo.get(), 1u, 1024u, 1u); memoryManager->setForce32BitAllocations(true); AllocationProperties properties(0, false, 4096u, GraphicsAllocation::AllocationType::SHARED_BUFFER, false, false, 0); auto *gpuAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(osHandle, properties, false); ASSERT_NE(nullptr, gpuAllocation); EXPECT_FALSE(gpuAllocation->is32BitAllocation()); if (is64bit) { uint64_t base = 0; EXPECT_EQ(base, gpuAllocation->getGpuBaseAddress()); } memoryManager->freeGraphicsMemory(gpuAllocation); } TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenFreeAllocFromSharedHandleIsCalledThenDestroyResourceHandle) { auto osHandle = 1u; void *pSysMem = reinterpret_cast(0x1000); std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), pSysMem, 4096u, false)); setSizesFcn(gmm->gmmResourceInfo.get(), 1u, 1024u, 1u); AllocationProperties properties(0, false, 4096u, GraphicsAllocation::AllocationType::SHARED_BUFFER, false, false, 0); auto gpuAllocation = (WddmAllocation *)memoryManager->createGraphicsAllocationFromSharedHandle(osHandle, properties, false); EXPECT_NE(nullptr, gpuAllocation); auto expectedDestroyHandle = gpuAllocation->resourceHandle; EXPECT_NE(0u, expectedDestroyHandle); auto lastDestroyed = getMockLastDestroyedResHandleFcn(); EXPECT_EQ(0u, lastDestroyed); memoryManager->freeGraphicsMemory(gpuAllocation); lastDestroyed = getMockLastDestroyedResHandleFcn(); EXPECT_EQ(lastDestroyed, expectedDestroyHandle); } TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerSizeZeroWhenCreateFromSharedHandleIsCalledThenUpdateSize) { auto osHandle = 1u; auto size = 4096u; void *pSysMem = reinterpret_cast(0x1000); std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), pSysMem, size, false)); setSizesFcn(gmm->gmmResourceInfo.get(), 1u, 1024u, 1u); AllocationProperties properties(0, false, size, GraphicsAllocation::AllocationType::SHARED_BUFFER, false, false, 0); auto *gpuAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(osHandle, properties, false); ASSERT_NE(nullptr, gpuAllocation); EXPECT_EQ(size, gpuAllocation->getUnderlyingBufferSize()); memoryManager->freeGraphicsMemory(gpuAllocation); } TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenCreateFromSharedHandleFailsThenReturnNull) { auto osHandle = 1u; auto size = 4096u; void *pSysMem = reinterpret_cast(0x1000); std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), pSysMem, size, false)); setSizesFcn(gmm->gmmResourceInfo.get(), 1u, 1024u, 1u); wddm->failOpenSharedHandle = true; AllocationProperties properties(0, false, size, GraphicsAllocation::AllocationType::SHARED_BUFFER, false, false, 0); auto *gpuAllocation = memoryManager->createGraphicsAllocationFromSharedHandle(osHandle, properties, false); EXPECT_EQ(nullptr, gpuAllocation); } HWTEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenTiledImageWithMipCountZeroIsBeingCreatedThenallocateGraphicsMemoryForImageIsUsed) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } MockContext context; context.memoryManager = memoryManager.get(); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 64u; imageDesc.image_height = 64u; auto retVal = CL_SUCCESS; cl_mem_flags flags = CL_MEM_WRITE_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); std::unique_ptr dstImage(Image::create(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, dstImage); auto imageGraphicsAllocation = dstImage->getGraphicsAllocation(); ASSERT_NE(nullptr, imageGraphicsAllocation); EXPECT_EQ(GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_IMAGE, imageGraphicsAllocation->getDefaultGmm()->resourceParams.Usage); } TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenTiledImageWithMipCountNonZeroIsBeingCreatedThenallocateGraphicsMemoryForImageIsUsed) { MockContext context; context.memoryManager = memoryManager.get(); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 64u; imageDesc.image_height = 64u; imageDesc.num_mip_levels = 1u; auto retVal = CL_SUCCESS; cl_mem_flags flags = CL_MEM_WRITE_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); std::unique_ptr dstImage(Image::create(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, dstImage); EXPECT_EQ(static_cast(imageDesc.num_mip_levels), dstImage->peekMipCount()); auto imageGraphicsAllocation = dstImage->getGraphicsAllocation(); ASSERT_NE(nullptr, imageGraphicsAllocation); EXPECT_EQ(GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_IMAGE, imageGraphicsAllocation->getDefaultGmm()->resourceParams.Usage); } HWTEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenTiledImageIsBeingCreatedFromHostPtrThenallocateGraphicsMemoryForImageIsUsed) { if (!UnitTestHelper::tiledImagesSupported) { GTEST_SKIP(); } auto device = std::make_unique(MockDevice::createWithExecutionEnvironment(defaultHwInfo.get(), executionEnvironment, 0u)); MockContext context(device.get()); context.memoryManager = memoryManager.get(); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 64u; imageDesc.image_height = 64u; char data[64u * 64u * 4 * 8]; auto retVal = CL_SUCCESS; cl_mem_flags flags = CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); std::unique_ptr dstImage(Image::create(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, data, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, dstImage); auto imageGraphicsAllocation = dstImage->getGraphicsAllocation(); ASSERT_NE(nullptr, imageGraphicsAllocation); EXPECT_EQ(GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_IMAGE, imageGraphicsAllocation->getDefaultGmm()->resourceParams.Usage); } TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenNonTiledImgWithMipCountZeroisBeingCreatedThenAllocateGraphicsMemoryIsUsed) { MockContext context; context.memoryManager = memoryManager.get(); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 64u; char data[64u * 4 * 8]; auto retVal = CL_SUCCESS; cl_mem_flags flags = CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); std::unique_ptr dstImage(Image::create(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, data, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, dstImage); auto imageGraphicsAllocation = dstImage->getGraphicsAllocation(); ASSERT_NE(nullptr, imageGraphicsAllocation); EXPECT_EQ(GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_BUFFER, imageGraphicsAllocation->getDefaultGmm()->resourceParams.Usage); } TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenNonTiledImgWithMipCountNonZeroisBeingCreatedThenAllocateGraphicsMemoryForImageIsUsed) { MockContext context; context.memoryManager = memoryManager.get(); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_R; cl_image_desc imageDesc = {}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imageDesc.image_width = 64u; imageDesc.num_mip_levels = 1u; auto retVal = CL_SUCCESS; cl_mem_flags flags = CL_MEM_WRITE_ONLY; auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat, context.getDevice(0)->getHardwareInfo().capabilityTable.clVersionSupport); std::unique_ptr dstImage(Image::create(&context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, surfaceFormat, &imageDesc, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, dstImage); EXPECT_EQ(static_cast(imageDesc.num_mip_levels), dstImage->peekMipCount()); auto imageGraphicsAllocation = dstImage->getGraphicsAllocation(); ASSERT_NE(nullptr, imageGraphicsAllocation); EXPECT_EQ(GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_IMAGE, imageGraphicsAllocation->getDefaultGmm()->resourceParams.Usage); } TEST_F(WddmMemoryManagerTest, AllocateGpuMemHostPtrOffseted) { MockWddmAllocation alloc, allocOffseted; // three pages void *ptr = alignedMalloc(4 * 4096, 4096); ASSERT_NE(nullptr, ptr); size_t baseOffset = 1024; // misalligned buffer spanning accross 3 pages auto *gpuAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, false, 2 * MemoryConstants::pageSize}, (char *)ptr + baseOffset); // Should be same cpu ptr and gpu ptr EXPECT_EQ((char *)ptr + baseOffset, gpuAllocation->getUnderlyingBuffer()); auto hostPtrManager = memoryManager->getHostPtrManager(); auto fragment = hostPtrManager->getFragment(ptr); ASSERT_NE(nullptr, fragment); EXPECT_TRUE(fragment->refCount == 1); EXPECT_NE(fragment->osInternalStorage, nullptr); // offseted by 3 pages, not in boundary auto fragment2 = hostPtrManager->getFragment((char *)ptr + 3 * 4096); EXPECT_EQ(nullptr, fragment2); // offseted by one page, still in boundary void *offsetedPtr = ptrOffset(ptr, 4096); auto *gpuAllocation2 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, false, MemoryConstants::pageSize}, offsetedPtr); // Should be same cpu ptr and gpu ptr EXPECT_EQ(offsetedPtr, gpuAllocation2->getUnderlyingBuffer()); auto fragment3 = hostPtrManager->getFragment(offsetedPtr); ASSERT_NE(nullptr, fragment3); EXPECT_TRUE(fragment3->refCount == 2); EXPECT_EQ(alloc.handle, allocOffseted.handle); EXPECT_EQ(alloc.getUnderlyingBufferSize(), allocOffseted.getUnderlyingBufferSize()); EXPECT_EQ(alloc.getAlignedCpuPtr(), allocOffseted.getAlignedCpuPtr()); memoryManager->freeGraphicsMemory(gpuAllocation2); auto fragment4 = hostPtrManager->getFragment(ptr); ASSERT_NE(nullptr, fragment4); EXPECT_TRUE(fragment4->refCount == 1); memoryManager->freeGraphicsMemory(gpuAllocation); fragment4 = hostPtrManager->getFragment(ptr); EXPECT_EQ(nullptr, fragment4); alignedFree(ptr); } TEST_F(WddmMemoryManagerTest, AllocateGpuMemCheckGmm) { MockWddmAllocation allocation; // three pages void *ptr = alignedMalloc(3 * 4096, 4096); auto *gpuAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, false, 3 * MemoryConstants::pageSize}, ptr); // Should be same cpu ptr and gpu ptr ASSERT_NE(nullptr, gpuAllocation); EXPECT_EQ(ptr, gpuAllocation->getUnderlyingBuffer()); auto fragment = memoryManager->getHostPtrManager()->getFragment(ptr); ASSERT_NE(nullptr, fragment); EXPECT_TRUE(fragment->refCount == 1); EXPECT_NE(fragment->osInternalStorage->handle, 0); EXPECT_NE(fragment->osInternalStorage->gmm, nullptr); memoryManager->freeGraphicsMemory(gpuAllocation); alignedFree(ptr); } TEST_F(WddmMemoryManagerTest, GivenAlignedPointerWhenAllocate32BitMemoryThenGmmCalledWithCorrectPointerAndSize) { MockWddmAllocation allocation; uint32_t size = 4096; void *ptr = reinterpret_cast(4096); auto *gpuAllocation = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, size, ptr, GraphicsAllocation::AllocationType::BUFFER); EXPECT_EQ(ptr, reinterpret_cast(gpuAllocation->getDefaultGmm()->resourceParams.pExistingSysMem)); EXPECT_EQ(size, gpuAllocation->getDefaultGmm()->resourceParams.ExistingSysMemSize); memoryManager->freeGraphicsMemory(gpuAllocation); } TEST_F(WddmMemoryManagerTest, GivenUnAlignedPointerAndSizeWhenAllocate32BitMemoryThenGmmCalledWithCorrectPointerAndSize) { MockWddmAllocation allocation; uint32_t size = 0x1001; void *ptr = reinterpret_cast(0x1001); auto *gpuAllocation = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, size, ptr, GraphicsAllocation::AllocationType::BUFFER); EXPECT_EQ(reinterpret_cast(0x1000), reinterpret_cast(gpuAllocation->getDefaultGmm()->resourceParams.pExistingSysMem)); EXPECT_EQ(0x2000, gpuAllocation->getDefaultGmm()->resourceParams.ExistingSysMemSize); memoryManager->freeGraphicsMemory(gpuAllocation); } TEST_F(WddmMemoryManagerTest, getSystemSharedMemory) { executionEnvironment->prepareRootDeviceEnvironments(4u); for (auto i = 0u; i < 4u; i++) { auto mockWddm = Wddm::createWddm(nullptr, *executionEnvironment->rootDeviceEnvironments[i].get()); mockWddm->init(); executionEnvironment->rootDeviceEnvironments[i]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[i]->osInterface->get()->setWddm(mockWddm); int64_t mem = memoryManager->getSystemSharedMemory(i); EXPECT_EQ(mem, 4249540608); executionEnvironment->rootDeviceEnvironments[i]->osInterface.reset(); } } TEST_F(WddmMemoryManagerTest, getMaxApplicationAddress) { uint64_t maxAddr = memoryManager->getMaxApplicationAddress(); if (is32bit) { EXPECT_EQ(maxAddr, MemoryConstants::max32BitAppAddress); } else { EXPECT_EQ(maxAddr, MemoryConstants::max64BitAppAddress); } } TEST_F(WddmMemoryManagerTest, Allocate32BitMemoryWithNullptr) { auto *gpuAllocation = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, 3 * MemoryConstants::pageSize, nullptr, GraphicsAllocation::AllocationType::BUFFER); ASSERT_NE(nullptr, gpuAllocation); EXPECT_LT(GmmHelper::canonize(memoryManager->getGfxPartition(0)->getHeapBase(HeapIndex::HEAP_EXTERNAL)), gpuAllocation->getGpuAddress()); EXPECT_GT(GmmHelper::canonize(memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::HEAP_EXTERNAL)), gpuAllocation->getGpuAddress() + gpuAllocation->getUnderlyingBufferSize()); EXPECT_EQ(0u, gpuAllocation->fragmentsStorage.fragmentCount); memoryManager->freeGraphicsMemory(gpuAllocation); } TEST_F(WddmMemoryManagerTest, given32BitAllocationWhenItIsCreatedThenItHasNonZeroGpuAddressToPatch) { auto *gpuAllocation = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, 3 * MemoryConstants::pageSize, nullptr, GraphicsAllocation::AllocationType::BUFFER); ASSERT_NE(nullptr, gpuAllocation); EXPECT_NE(0llu, gpuAllocation->getGpuAddressToPatch()); EXPECT_LT(GmmHelper::canonize(memoryManager->getGfxPartition(0)->getHeapBase(HeapIndex::HEAP_EXTERNAL)), gpuAllocation->getGpuAddress()); EXPECT_GT(GmmHelper::canonize(memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::HEAP_EXTERNAL)), gpuAllocation->getGpuAddress() + gpuAllocation->getUnderlyingBufferSize()); memoryManager->freeGraphicsMemory(gpuAllocation); } TEST_F(WddmMemoryManagerTest, Allocate32BitMemoryWithMisalignedHostPtrDoesNotDoTripleAlloc) { size_t misalignedSize = 0x2500; void *misalignedPtr = reinterpret_cast(0x12500); auto *gpuAllocation = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, misalignedSize, misalignedPtr, GraphicsAllocation::AllocationType::BUFFER); ASSERT_NE(nullptr, gpuAllocation); EXPECT_EQ(alignSizeWholePage(misalignedPtr, misalignedSize), gpuAllocation->getUnderlyingBufferSize()); EXPECT_LT(GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapBase(HeapIndex::HEAP_EXTERNAL)), gpuAllocation->getGpuAddress()); EXPECT_GT(GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_EXTERNAL)), gpuAllocation->getGpuAddress() + gpuAllocation->getUnderlyingBufferSize()); EXPECT_EQ(0u, gpuAllocation->fragmentsStorage.fragmentCount); void *alignedPtr = alignDown(misalignedPtr, MemoryConstants::allocationAlignment); uint64_t offset = ptrDiff(misalignedPtr, alignedPtr); EXPECT_EQ(offset, gpuAllocation->getAllocationOffset()); memoryManager->freeGraphicsMemory(gpuAllocation); } TEST_F(WddmMemoryManagerTest, Allocate32BitMemorySetsCannonizedGpuBaseAddress) { auto *gpuAllocation = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, 3 * MemoryConstants::pageSize, nullptr, GraphicsAllocation::AllocationType::BUFFER); ASSERT_NE(nullptr, gpuAllocation); uint64_t cannonizedAddress = GmmHelper::canonize(memoryManager->getGfxPartition(0)->getHeapBase(HeapIndex::HEAP_EXTERNAL)); EXPECT_EQ(cannonizedAddress, gpuAllocation->getGpuBaseAddress()); memoryManager->freeGraphicsMemory(gpuAllocation); } TEST_F(WddmMemoryManagerTest, GivenThreeOsHandlesWhenAskedForDestroyAllocationsThenAllMarkedAllocationsAreDestroyed) { OsHandleStorage storage; void *pSysMem = reinterpret_cast(0x1000); storage.fragmentStorageData[0].osHandleStorage = new OsHandle; storage.fragmentStorageData[0].residency = new ResidencyData; storage.fragmentStorageData[0].osHandleStorage->handle = ALLOCATION_HANDLE; storage.fragmentStorageData[0].freeTheFragment = true; storage.fragmentStorageData[0].osHandleStorage->gmm = new Gmm(rootDeviceEnvironment->getGmmClientContext(), pSysMem, 4096u, false); storage.fragmentStorageData[1].osHandleStorage = new OsHandle; storage.fragmentStorageData[1].osHandleStorage->handle = ALLOCATION_HANDLE; storage.fragmentStorageData[1].residency = new ResidencyData; storage.fragmentStorageData[1].freeTheFragment = false; storage.fragmentStorageData[2].osHandleStorage = new OsHandle; storage.fragmentStorageData[2].osHandleStorage->handle = ALLOCATION_HANDLE; storage.fragmentStorageData[2].freeTheFragment = true; storage.fragmentStorageData[2].osHandleStorage->gmm = new Gmm(rootDeviceEnvironment->getGmmClientContext(), pSysMem, 4096u, false); storage.fragmentStorageData[2].residency = new ResidencyData; memoryManager->cleanOsHandles(storage, 0); auto destroyWithResourceHandleCalled = 0u; D3DKMT_DESTROYALLOCATION2 *ptrToDestroyAlloc2 = nullptr; getSizesFcn(destroyWithResourceHandleCalled, ptrToDestroyAlloc2); EXPECT_EQ(0u, ptrToDestroyAlloc2->Flags.SynchronousDestroy); EXPECT_EQ(1u, ptrToDestroyAlloc2->Flags.AssumeNotInUse); EXPECT_EQ(ALLOCATION_HANDLE, storage.fragmentStorageData[1].osHandleStorage->handle); delete storage.fragmentStorageData[1].osHandleStorage; delete storage.fragmentStorageData[1].residency; } TEST_F(WddmMemoryManagerTest, freeNullAllocationNoCrash) { EXPECT_NO_THROW(memoryManager->freeGraphicsMemory(nullptr)); } TEST_F(WddmMemoryManagerTest, givenDefaultWddmMemoryManagerWhenAskedForAlignedMallocRestrictionsThenValueIsReturned) { AlignedMallocRestrictions *mallocRestrictions = memoryManager->getAlignedMallocRestrictions(); ASSERT_NE(nullptr, mallocRestrictions); EXPECT_EQ(NEO::windowsMinAddress, mallocRestrictions->minAddress); } TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenCpuMemNotMeetRestrictionsThenReserveMemRangeForMap) { void *cpuPtr = reinterpret_cast(memoryManager->getAlignedMallocRestrictions()->minAddress - 0x1000); size_t size = 0x1000; auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, false, size}, cpuPtr)); void *expectReserve = reinterpret_cast(wddm->virtualAllocAddress); ASSERT_NE(nullptr, allocation); EXPECT_EQ(expectReserve, allocation->getReservedAddressPtr()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerTest, givenManagerWithDisabledDeferredDeleterWhenMapGpuVaFailThenFailToCreateAllocation) { void *ptr = reinterpret_cast(0x1000); size_t size = 0x1000; std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), ptr, size, false)); memoryManager->setDeferredDeleter(nullptr); setMapGpuVaFailConfigFcn(0, 1); WddmAllocation allocation(0, GraphicsAllocation::AllocationType::BUFFER, ptr, size, nullptr, MemoryPool::MemoryNull); allocation.setDefaultGmm(gmm.get()); bool ret = memoryManager->createWddmAllocation(&allocation, allocation.getAlignedCpuPtr()); EXPECT_FALSE(ret); } TEST_F(WddmMemoryManagerTest, givenManagerWithEnabledDeferredDeleterWhenFirstMapGpuVaFailSecondAfterDrainSuccessThenCreateAllocation) { void *ptr = reinterpret_cast(0x10000); size_t size = 0x1000; std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), ptr, size, false)); MockDeferredDeleter *deleter = new MockDeferredDeleter; memoryManager->setDeferredDeleter(deleter); setMapGpuVaFailConfigFcn(0, 1); WddmAllocation allocation(0, GraphicsAllocation::AllocationType::BUFFER, ptr, size, nullptr, MemoryPool::MemoryNull); allocation.setDefaultGmm(gmm.get()); bool ret = memoryManager->createWddmAllocation(&allocation, allocation.getAlignedCpuPtr()); EXPECT_TRUE(ret); } TEST_F(WddmMemoryManagerTest, givenManagerWithEnabledDeferredDeleterWhenFirstAndMapGpuVaFailSecondAfterDrainFailThenFailToCreateAllocation) { void *ptr = reinterpret_cast(0x1000); size_t size = 0x1000; std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), ptr, size, false)); MockDeferredDeleter *deleter = new MockDeferredDeleter; memoryManager->setDeferredDeleter(deleter); setMapGpuVaFailConfigFcn(0, 2); WddmAllocation allocation(0, GraphicsAllocation::AllocationType::BUFFER, ptr, size, nullptr, MemoryPool::MemoryNull); allocation.setDefaultGmm(gmm.get()); bool ret = memoryManager->createWddmAllocation(&allocation, allocation.getAlignedCpuPtr()); EXPECT_FALSE(ret); } TEST_F(WddmMemoryManagerTest, givenNullPtrAndSizePassedToCreateInternalAllocationWhenCallIsMadeThenAllocationIsCreatedIn32BitHeapInternal) { auto wddmAllocation = static_cast(memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, MemoryConstants::pageSize, nullptr, GraphicsAllocation::AllocationType::INTERNAL_HEAP)); ASSERT_NE(nullptr, wddmAllocation); EXPECT_EQ(wddmAllocation->getGpuBaseAddress(), GmmHelper::canonize(memoryManager->getInternalHeapBaseAddress(wddmAllocation->getRootDeviceIndex()))); EXPECT_NE(nullptr, wddmAllocation->getUnderlyingBuffer()); EXPECT_EQ(4096u, wddmAllocation->getUnderlyingBufferSize()); EXPECT_NE((uint64_t)wddmAllocation->getUnderlyingBuffer(), wddmAllocation->getGpuAddress()); auto cannonizedHeapBase = GmmHelper::canonize(memoryManager->getInternalHeapBaseAddress(rootDeviceIndex)); auto cannonizedHeapEnd = GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY)); EXPECT_GT(wddmAllocation->getGpuAddress(), cannonizedHeapBase); EXPECT_LT(wddmAllocation->getGpuAddress() + wddmAllocation->getUnderlyingBufferSize(), cannonizedHeapEnd); EXPECT_NE(nullptr, wddmAllocation->getDriverAllocatedCpuPtr()); EXPECT_TRUE(wddmAllocation->is32BitAllocation()); memoryManager->freeGraphicsMemory(wddmAllocation); } TEST_F(WddmMemoryManagerTest, givenPtrAndSizePassedToCreateInternalAllocationWhenCallIsMadeThenAllocationIsCreatedIn32BitHeapInternal) { auto ptr = reinterpret_cast(0x1000000); auto wddmAllocation = static_cast(memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, MemoryConstants::pageSize, ptr, GraphicsAllocation::AllocationType::INTERNAL_HEAP)); ASSERT_NE(nullptr, wddmAllocation); EXPECT_EQ(wddmAllocation->getGpuBaseAddress(), GmmHelper::canonize(memoryManager->getInternalHeapBaseAddress(rootDeviceIndex))); EXPECT_EQ(ptr, wddmAllocation->getUnderlyingBuffer()); EXPECT_EQ(4096u, wddmAllocation->getUnderlyingBufferSize()); EXPECT_NE((uint64_t)wddmAllocation->getUnderlyingBuffer(), wddmAllocation->getGpuAddress()); auto cannonizedHeapBase = GmmHelper::canonize(memoryManager->getInternalHeapBaseAddress(rootDeviceIndex)); auto cannonizedHeapEnd = GmmHelper::canonize(memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY)); EXPECT_GT(wddmAllocation->getGpuAddress(), cannonizedHeapBase); EXPECT_LT(wddmAllocation->getGpuAddress() + wddmAllocation->getUnderlyingBufferSize(), cannonizedHeapEnd); EXPECT_EQ(nullptr, wddmAllocation->getDriverAllocatedCpuPtr()); EXPECT_TRUE(wddmAllocation->is32BitAllocation()); memoryManager->freeGraphicsMemory(wddmAllocation); } TEST_F(BufferWithWddmMemory, ValidHostPtr) { flags = CL_MEM_USE_HOST_PTR | CL_MEM_FORCE_SHARED_PHYSICAL_MEMORY_INTEL; auto ptr = alignedMalloc(MemoryConstants::preferredAlignment, MemoryConstants::preferredAlignment); auto buffer = Buffer::create( &context, flags, MemoryConstants::preferredAlignment, ptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, buffer); auto address = buffer->getCpuAddress(); if (buffer->isMemObjZeroCopy()) { EXPECT_EQ(ptr, address); } else { EXPECT_NE(address, ptr); } EXPECT_NE(nullptr, buffer->getGraphicsAllocation()); EXPECT_NE(nullptr, buffer->getGraphicsAllocation()->getUnderlyingBuffer()); delete buffer; alignedFree(ptr); } TEST_F(BufferWithWddmMemory, NullOsHandleStorageAskedForPopulationReturnsFilledPointer) { OsHandleStorage storage; storage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); storage.fragmentStorageData[0].fragmentSize = MemoryConstants::pageSize; memoryManager->populateOsHandles(storage, 0); EXPECT_NE(nullptr, storage.fragmentStorageData[0].osHandleStorage); EXPECT_NE(nullptr, storage.fragmentStorageData[0].osHandleStorage->gmm); EXPECT_EQ(nullptr, storage.fragmentStorageData[1].osHandleStorage); EXPECT_EQ(nullptr, storage.fragmentStorageData[2].osHandleStorage); storage.fragmentStorageData[0].freeTheFragment = true; memoryManager->cleanOsHandles(storage, 0); } TEST_F(BufferWithWddmMemory, GivenMisalignedHostPtrAndMultiplePagesSizeWhenAskedForGraphicsAllcoationThenItContainsAllFragmentsWithProperGpuAdrresses) { auto ptr = reinterpret_cast(wddm->virtualAllocAddress + 0x1001); auto size = MemoryConstants::pageSize * 10; auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), false, size}, ptr); auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); ASSERT_EQ(3u, hostPtrManager->getFragmentCount()); auto reqs = MockHostPtrManager::getAllocationRequirements(ptr, size); for (int i = 0; i < maxFragmentsCount; i++) { EXPECT_NE((D3DKMT_HANDLE) nullptr, graphicsAllocation->fragmentsStorage.fragmentStorageData[i].osHandleStorage->handle); EXPECT_NE(nullptr, graphicsAllocation->fragmentsStorage.fragmentStorageData[i].osHandleStorage->gmm); EXPECT_EQ(reqs.allocationFragments[i].allocationPtr, reinterpret_cast(graphicsAllocation->fragmentsStorage.fragmentStorageData[i].osHandleStorage->gmm->resourceParams.pExistingSysMem)); EXPECT_EQ(reqs.allocationFragments[i].allocationSize, graphicsAllocation->fragmentsStorage.fragmentStorageData[i].osHandleStorage->gmm->resourceParams.BaseWidth); } memoryManager->freeGraphicsMemory(graphicsAllocation); EXPECT_EQ(0u, hostPtrManager->getFragmentCount()); } TEST_F(BufferWithWddmMemory, GivenPointerAndSizeWhenAskedToCreateGrahicsAllocationThenGraphicsAllocationIsCreated) { OsHandleStorage handleStorage; auto ptr = reinterpret_cast(wddm->virtualAllocAddress + 0x1000); auto ptr2 = reinterpret_cast(wddm->virtualAllocAddress + 0x1001); auto size = MemoryConstants::pageSize; handleStorage.fragmentStorageData[0].cpuPtr = ptr; handleStorage.fragmentStorageData[1].cpuPtr = ptr2; handleStorage.fragmentStorageData[2].cpuPtr = nullptr; handleStorage.fragmentStorageData[0].fragmentSize = size; handleStorage.fragmentStorageData[1].fragmentSize = size * 2; handleStorage.fragmentStorageData[2].fragmentSize = size * 3; AllocationData allocationData; allocationData.size = size; allocationData.hostPtr = ptr; auto allocation = memoryManager->createGraphicsAllocation(handleStorage, allocationData); EXPECT_EQ(ptr, allocation->getUnderlyingBuffer()); EXPECT_EQ(size, allocation->getUnderlyingBufferSize()); EXPECT_EQ(ptr, allocation->fragmentsStorage.fragmentStorageData[0].cpuPtr); EXPECT_EQ(ptr2, allocation->fragmentsStorage.fragmentStorageData[1].cpuPtr); EXPECT_EQ(nullptr, allocation->fragmentsStorage.fragmentStorageData[2].cpuPtr); EXPECT_EQ(size, allocation->fragmentsStorage.fragmentStorageData[0].fragmentSize); EXPECT_EQ(size * 2, allocation->fragmentsStorage.fragmentStorageData[1].fragmentSize); EXPECT_EQ(size * 3, allocation->fragmentsStorage.fragmentStorageData[2].fragmentSize); EXPECT_NE(&allocation->fragmentsStorage, &handleStorage); memoryManager->freeGraphicsMemory(allocation); } TEST_F(BufferWithWddmMemory, givenFragmentsThatAreNotInOrderWhenGraphicsAllocationIsBeingCreatedThenGraphicsAddressIsPopulatedFromProperFragment) { memoryManager->setForce32bitAllocations(true); OsHandleStorage handleStorage = {}; D3DGPU_VIRTUAL_ADDRESS gpuAdress = MemoryConstants::pageSize * 1; auto ptr = reinterpret_cast(wddm->virtualAllocAddress + MemoryConstants::pageSize); auto size = MemoryConstants::pageSize * 2; handleStorage.fragmentStorageData[0].cpuPtr = ptr; handleStorage.fragmentStorageData[0].fragmentSize = size; handleStorage.fragmentStorageData[0].osHandleStorage = new OsHandle(); handleStorage.fragmentStorageData[0].residency = new ResidencyData(); handleStorage.fragmentStorageData[0].freeTheFragment = true; auto rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[0].get(); handleStorage.fragmentStorageData[0].osHandleStorage->gmm = new Gmm(rootDeviceEnvironment->getGmmClientContext(), ptr, size, false); handleStorage.fragmentCount = 1; FragmentStorage fragment = {}; fragment.driverAllocation = true; fragment.fragmentCpuPointer = ptr; fragment.fragmentSize = size; fragment.osInternalStorage = handleStorage.fragmentStorageData[0].osHandleStorage; fragment.osInternalStorage->gpuPtr = gpuAdress; memoryManager->getHostPtrManager()->storeFragment(fragment); AllocationData allocationData; allocationData.size = size; allocationData.hostPtr = ptr; auto allocation = memoryManager->createGraphicsAllocation(handleStorage, allocationData); EXPECT_EQ(ptr, allocation->getUnderlyingBuffer()); EXPECT_EQ(size, allocation->getUnderlyingBufferSize()); EXPECT_EQ(gpuAdress, allocation->getGpuAddress()); EXPECT_EQ(0ULL, allocation->getAllocationOffset()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(BufferWithWddmMemory, givenFragmentsThatAreNotInOrderWhenGraphicsAllocationIsBeingCreatedNotAllignedToPageThenGraphicsAddressIsPopulatedFromProperFragmentAndOffsetisAssigned) { memoryManager->setForce32bitAllocations(true); OsHandleStorage handleStorage = {}; D3DGPU_VIRTUAL_ADDRESS gpuAdress = MemoryConstants::pageSize * 1; auto ptr = reinterpret_cast(wddm->virtualAllocAddress + MemoryConstants::pageSize); auto size = MemoryConstants::pageSize * 2; handleStorage.fragmentStorageData[0].cpuPtr = ptr; handleStorage.fragmentStorageData[0].fragmentSize = size; handleStorage.fragmentStorageData[0].osHandleStorage = new OsHandle(); handleStorage.fragmentStorageData[0].residency = new ResidencyData(); handleStorage.fragmentStorageData[0].freeTheFragment = true; auto rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[0].get(); handleStorage.fragmentStorageData[0].osHandleStorage->gmm = new Gmm(rootDeviceEnvironment->getGmmClientContext(), ptr, size, false); handleStorage.fragmentCount = 1; FragmentStorage fragment = {}; fragment.driverAllocation = true; fragment.fragmentCpuPointer = ptr; fragment.fragmentSize = size; fragment.osInternalStorage = handleStorage.fragmentStorageData[0].osHandleStorage; fragment.osInternalStorage->gpuPtr = gpuAdress; memoryManager->getHostPtrManager()->storeFragment(fragment); auto offset = 80; auto allocationPtr = ptrOffset(ptr, offset); AllocationData allocationData; allocationData.size = size; allocationData.hostPtr = allocationPtr; auto allocation = memoryManager->createGraphicsAllocation(handleStorage, allocationData); EXPECT_EQ(allocationPtr, allocation->getUnderlyingBuffer()); EXPECT_EQ(size, allocation->getUnderlyingBufferSize()); EXPECT_EQ(gpuAdress + offset, allocation->getGpuAddress()); // getGpuAddress returns gpuAddress + allocationOffset EXPECT_EQ(offset, allocation->getAllocationOffset()); memoryManager->freeGraphicsMemory(allocation); } struct WddmMemoryManagerWithAsyncDeleterTest : public ::testing::Test { void SetUp() { executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); wddm = static_cast(executionEnvironment->rootDeviceEnvironments[0]->osInterface->get()->getWddm()); wddm->resetGdi(new MockGdi()); wddm->callBaseDestroyAllocations = false; wddm->init(); deleter = new MockDeferredDeleter; memoryManager = std::make_unique(*executionEnvironment); memoryManager->setDeferredDeleter(deleter); } MockDeferredDeleter *deleter = nullptr; std::unique_ptr memoryManager; ExecutionEnvironment *executionEnvironment; HardwareInfo *hwInfo; WddmMock *wddm; }; TEST_F(WddmMemoryManagerWithAsyncDeleterTest, givenWddmWhenAsyncDeleterIsEnabledThenCanDeferDeletions) { EXPECT_EQ(0, deleter->deferDeletionCalled); memoryManager->tryDeferDeletions(nullptr, 0, 0, 0); EXPECT_EQ(1, deleter->deferDeletionCalled); EXPECT_EQ(1u, wddm->destroyAllocationResult.called); } TEST_F(WddmMemoryManagerWithAsyncDeleterTest, givenWddmWhenAsyncDeleterIsDisabledThenCannotDeferDeletions) { memoryManager->setDeferredDeleter(nullptr); memoryManager->tryDeferDeletions(nullptr, 0, 0, 0); EXPECT_EQ(1u, wddm->destroyAllocationResult.called); } TEST_F(WddmMemoryManagerWithAsyncDeleterTest, givenMemoryManagerWithAsyncDeleterWhenCannotAllocateMemoryForTiledImageThenDrainIsCalledAndCreateAllocationIsCalledTwice) { cl_image_desc imgDesc = {}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE3D; ImageInfo imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); wddm->createAllocationStatus = STATUS_GRAPHICS_NO_VIDEO_MEMORY; EXPECT_EQ(0, deleter->drainCalled); EXPECT_EQ(0u, wddm->createAllocationResult.called); deleter->expectDrainBlockingValue(true); AllocationProperties allocProperties = MemObjHelper::getAllocationPropertiesWithImageInfo(0, imgInfo, true, {}, *hwInfo); memoryManager->allocateGraphicsMemoryInPreferredPool(allocProperties, nullptr); EXPECT_EQ(1, deleter->drainCalled); EXPECT_EQ(2u, wddm->createAllocationResult.called); } TEST_F(WddmMemoryManagerWithAsyncDeleterTest, givenMemoryManagerWithAsyncDeleterWhenCanAllocateMemoryForTiledImageThenDrainIsNotCalledAndCreateAllocationIsCalledOnce) { cl_image_desc imgDesc; imgDesc.image_type = CL_MEM_OBJECT_IMAGE3D; ImageInfo imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); wddm->createAllocationStatus = STATUS_SUCCESS; wddm->mapGpuVaStatus = true; wddm->callBaseMapGpuVa = false; EXPECT_EQ(0, deleter->drainCalled); EXPECT_EQ(0u, wddm->createAllocationResult.called); EXPECT_EQ(0u, wddm->mapGpuVirtualAddressResult.called); AllocationProperties allocProperties = MemObjHelper::getAllocationPropertiesWithImageInfo(0, imgInfo, true, {}, *hwInfo); auto allocation = memoryManager->allocateGraphicsMemoryInPreferredPool(allocProperties, nullptr); EXPECT_EQ(0, deleter->drainCalled); EXPECT_EQ(1u, wddm->createAllocationResult.called); EXPECT_EQ(1u, wddm->mapGpuVirtualAddressResult.called); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerWithAsyncDeleterTest, givenMemoryManagerWithoutAsyncDeleterWhenCannotAllocateMemoryForTiledImageThenCreateAllocationIsCalledOnce) { memoryManager->setDeferredDeleter(nullptr); cl_image_desc imgDesc; imgDesc.image_type = CL_MEM_OBJECT_IMAGE3D; ImageInfo imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); wddm->createAllocationStatus = STATUS_GRAPHICS_NO_VIDEO_MEMORY; EXPECT_EQ(0u, wddm->createAllocationResult.called); AllocationProperties allocProperties = MemObjHelper::getAllocationPropertiesWithImageInfo(0, imgInfo, true, {}, *hwInfo); memoryManager->allocateGraphicsMemoryInPreferredPool(allocProperties, nullptr); EXPECT_EQ(1u, wddm->createAllocationResult.called); } TEST(WddmMemoryManagerDefaults, givenDefaultWddmMemoryManagerWhenItIsQueriedForInternalHeapBaseThenHeapInternalBaseIsReturned) { HardwareInfo *hwInfo; auto executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); auto wddm = new WddmMock(*executionEnvironment->rootDeviceEnvironments[0].get()); executionEnvironment->rootDeviceEnvironments[0]->osInterface->get()->setWddm(wddm); executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface = std::make_unique(wddm); wddm->init(); MockWddmMemoryManager memoryManager(*executionEnvironment); auto heapBase = wddm->getGfxPartition().Heap32[static_cast(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY)].Base; EXPECT_EQ(heapBase, memoryManager.getInternalHeapBaseAddress(0)); } TEST_F(MockWddmMemoryManagerTest, givenValidateAllocationFunctionWhenItIsCalledWithTripleAllocationThenSuccessIsReturned) { wddm->init(); MockWddmMemoryManager memoryManager(*executionEnvironment); auto wddmAlloc = static_cast(memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, false, MemoryConstants::pageSize}, reinterpret_cast(0x1000))); EXPECT_TRUE(memoryManager.validateAllocationMock(wddmAlloc)); memoryManager.freeGraphicsMemory(wddmAlloc); } TEST_F(MockWddmMemoryManagerTest, givenEnabled64kbpagesWhenCreatingGraphicsMemoryForBufferWithoutHostPtrThen64kbAdressIsAllocated) { DebugManagerStateRestore dbgRestore; wddm->init(); DebugManager.flags.Enable64kbpages.set(true); MemoryManagerCreate memoryManager64k(true, false, *executionEnvironment); EXPECT_EQ(0, wddm->createAllocationResult.called); GraphicsAllocation *galloc = memoryManager64k.allocateGraphicsMemoryWithProperties({rootDeviceIndex, MemoryConstants::pageSize64k, GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY}); EXPECT_EQ(1, wddm->createAllocationResult.called); EXPECT_NE(nullptr, galloc); EXPECT_EQ(true, galloc->isLocked()); EXPECT_NE(nullptr, galloc->getUnderlyingBuffer()); EXPECT_EQ(0u, (uintptr_t)galloc->getUnderlyingBuffer() % MemoryConstants::pageSize64k); EXPECT_EQ(0u, (uintptr_t)galloc->getGpuAddress() % MemoryConstants::pageSize64k); memoryManager64k.freeGraphicsMemory(galloc); } TEST_F(OsAgnosticMemoryManagerUsingWddmTest, givenEnabled64kbPagesWhenAllocationIsCreatedWithSizeSmallerThan64kbThenGraphicsAllocationsHas64kbAlignedUnderlyingSize) { DebugManagerStateRestore dbgRestore; wddm->init(); DebugManager.flags.Enable64kbpages.set(true); MockWddmMemoryManager memoryManager(true, false, *executionEnvironment); AllocationData allocationData; allocationData.size = 1u; auto graphicsAllocation = memoryManager.allocateGraphicsMemory64kb(allocationData); EXPECT_NE(nullptr, graphicsAllocation); EXPECT_EQ(MemoryConstants::pageSize64k, graphicsAllocation->getUnderlyingBufferSize()); EXPECT_NE(0llu, graphicsAllocation->getGpuAddress()); EXPECT_NE(nullptr, graphicsAllocation->getUnderlyingBuffer()); EXPECT_EQ(1u, graphicsAllocation->getDefaultGmm()->resourceParams.Flags.Info.Cacheable); memoryManager.freeGraphicsMemory(graphicsAllocation); } TEST_F(MockWddmMemoryManagerTest, givenWddmWhenallocateGraphicsMemory64kbThenLockResultAndmapGpuVirtualAddressIsCalled) { DebugManagerStateRestore dbgRestore; DebugManager.flags.Enable64kbpages.set(true); wddm->init(); MockWddmMemoryManager memoryManager64k(*executionEnvironment); uint32_t lockCount = wddm->lockResult.called; uint32_t mapGpuVirtualAddressResult = wddm->mapGpuVirtualAddressResult.called; AllocationData allocationData; allocationData.size = MemoryConstants::pageSize64k; GraphicsAllocation *galloc = memoryManager64k.allocateGraphicsMemory64kb(allocationData); EXPECT_EQ(lockCount + 1, wddm->lockResult.called); EXPECT_EQ(mapGpuVirtualAddressResult + 1, wddm->mapGpuVirtualAddressResult.called); if (executionEnvironment->rootDeviceEnvironments[0]->isFullRangeSvm()) { EXPECT_NE(nullptr, wddm->mapGpuVirtualAddressResult.cpuPtrPassed); } else { EXPECT_EQ(nullptr, wddm->mapGpuVirtualAddressResult.cpuPtrPassed); } memoryManager64k.freeGraphicsMemory(galloc); } TEST_F(MockWddmMemoryManagerTest, givenDefaultMemoryManagerWhenItIsCreatedThenAsyncDeleterEnabledIsTrue) { wddm->init(); WddmMemoryManager memoryManager(*executionEnvironment); EXPECT_TRUE(memoryManager.isAsyncDeleterEnabled()); EXPECT_NE(nullptr, memoryManager.getDeferredDeleter()); } TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWithNoRegisteredOsContextsWhenCallingIsMemoryBudgetExhaustedThenReturnFalse) { EXPECT_FALSE(memoryManager->isMemoryBudgetExhausted()); } TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerAnd32bitBuildThenSvmPartitionIsAlwaysInitialized) { if (is32bit) { EXPECT_EQ(memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::HEAP_SVM), MemoryConstants::max32BitAddress); } } TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWithRegisteredOsContextWhenCallingIsMemoryBudgetExhaustedThenReturnFalse) { executionEnvironment->prepareRootDeviceEnvironments(3u); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } executionEnvironment->initializeMemoryManager(); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { auto wddm = static_cast(Wddm::createWddm(nullptr, *executionEnvironment->rootDeviceEnvironments[i].get())); wddm->init(); executionEnvironment->rootDeviceEnvironments[i]->osInterface.reset(new OSInterface()); executionEnvironment->rootDeviceEnvironments[i]->osInterface->get()->setWddm(wddm); executionEnvironment->rootDeviceEnvironments[i]->memoryOperationsInterface = std::make_unique(wddm); } std::unique_ptr csr(createCommandStream(*executionEnvironment, 0u)); std::unique_ptr csr1(createCommandStream(*executionEnvironment, 1u)); std::unique_ptr csr2(createCommandStream(*executionEnvironment, 2u)); memoryManager->createAndRegisterOsContext(csr.get(), aub_stream::ENGINE_RCS, 1, PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo), false, false, false); memoryManager->createAndRegisterOsContext(csr1.get(), aub_stream::ENGINE_RCS, 2, PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo), false, false, false); memoryManager->createAndRegisterOsContext(csr2.get(), aub_stream::ENGINE_RCS, 3, PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo), false, false, false); EXPECT_FALSE(memoryManager->isMemoryBudgetExhausted()); } TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWithRegisteredOsContextWithExhaustedMemoryBudgetWhenCallingIsMemoryBudgetExhaustedThenReturnTrue) { executionEnvironment->prepareRootDeviceEnvironments(3u); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } executionEnvironment->initializeMemoryManager(); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { auto wddm = static_cast(Wddm::createWddm(nullptr, *executionEnvironment->rootDeviceEnvironments[i].get())); wddm->init(); executionEnvironment->rootDeviceEnvironments[i]->osInterface.reset(new OSInterface()); executionEnvironment->rootDeviceEnvironments[i]->osInterface->get()->setWddm(wddm); executionEnvironment->rootDeviceEnvironments[i]->memoryOperationsInterface = std::make_unique(wddm); } std::unique_ptr csr(createCommandStream(*executionEnvironment, 0u)); std::unique_ptr csr1(createCommandStream(*executionEnvironment, 1u)); std::unique_ptr csr2(createCommandStream(*executionEnvironment, 2u)); memoryManager->createAndRegisterOsContext(csr.get(), aub_stream::ENGINE_RCS, 1, PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo), false, false, false); memoryManager->createAndRegisterOsContext(csr1.get(), aub_stream::ENGINE_RCS, 2, PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo), false, false, false); memoryManager->createAndRegisterOsContext(csr2.get(), aub_stream::ENGINE_RCS, 3, PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo), false, false, false); auto osContext = static_cast(memoryManager->getRegisteredEngines()[1].osContext); osContext->getResidencyController().setMemoryBudgetExhausted(); EXPECT_TRUE(memoryManager->isMemoryBudgetExhausted()); } TEST_F(MockWddmMemoryManagerTest, givenEnabledAsyncDeleterFlagWhenMemoryManagerIsCreatedThenAsyncDeleterEnabledIsTrueAndDeleterIsNotNullptr) { wddm->init(); bool defaultEnableDeferredDeleterFlag = DebugManager.flags.EnableDeferredDeleter.get(); DebugManager.flags.EnableDeferredDeleter.set(true); WddmMemoryManager memoryManager(*executionEnvironment); EXPECT_TRUE(memoryManager.isAsyncDeleterEnabled()); EXPECT_NE(nullptr, memoryManager.getDeferredDeleter()); DebugManager.flags.EnableDeferredDeleter.set(defaultEnableDeferredDeleterFlag); } TEST_F(MockWddmMemoryManagerTest, givenDisabledAsyncDeleterFlagWhenMemoryManagerIsCreatedThenAsyncDeleterEnabledIsFalseAndDeleterIsNullptr) { wddm->init(); bool defaultEnableDeferredDeleterFlag = DebugManager.flags.EnableDeferredDeleter.get(); DebugManager.flags.EnableDeferredDeleter.set(false); WddmMemoryManager memoryManager(*executionEnvironment); EXPECT_FALSE(memoryManager.isAsyncDeleterEnabled()); EXPECT_EQ(nullptr, memoryManager.getDeferredDeleter()); DebugManager.flags.EnableDeferredDeleter.set(defaultEnableDeferredDeleterFlag); } TEST_F(MockWddmMemoryManagerTest, givenPageTableManagerWhenMapAuxGpuVaCalledThenUseWddmToMap) { wddm->init(); WddmMemoryManager memoryManager(*executionEnvironment); auto mockMngr = new NiceMock(); executionEnvironment->rootDeviceEnvironments[1]->pageTableManager.reset(mockMngr); auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(AllocationProperties(1, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY)); GMM_DDI_UPDATEAUXTABLE givenDdiUpdateAuxTable = {}; GMM_DDI_UPDATEAUXTABLE expectedDdiUpdateAuxTable = {}; expectedDdiUpdateAuxTable.BaseGpuVA = allocation->getGpuAddress(); expectedDdiUpdateAuxTable.BaseResInfo = allocation->getDefaultGmm()->gmmResourceInfo->peekHandle(); expectedDdiUpdateAuxTable.DoNotWait = true; expectedDdiUpdateAuxTable.Map = true; EXPECT_CALL(*mockMngr, updateAuxTable(_)).Times(1).WillOnce(Invoke([&](const GMM_DDI_UPDATEAUXTABLE *arg) {givenDdiUpdateAuxTable = *arg; return GMM_SUCCESS; })); auto result = memoryManager.mapAuxGpuVA(allocation); EXPECT_TRUE(result); EXPECT_TRUE(memcmp(&expectedDdiUpdateAuxTable, &givenDdiUpdateAuxTable, sizeof(GMM_DDI_UPDATEAUXTABLE)) == 0); memoryManager.freeGraphicsMemory(allocation); } TEST_F(MockWddmMemoryManagerTest, givenRenderCompressedAllocationWhenMappedGpuVaThenMapAuxVa) { auto rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[1].get(); std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), reinterpret_cast(123), 4096u, false)); gmm->isRenderCompressed = true; D3DGPU_VIRTUAL_ADDRESS gpuVa = 0; WddmMock wddm(*executionEnvironment->rootDeviceEnvironments[1].get()); wddm.init(); auto mockMngr = new NiceMock(); rootDeviceEnvironment->pageTableManager.reset(mockMngr); GMM_DDI_UPDATEAUXTABLE givenDdiUpdateAuxTable = {}; GMM_DDI_UPDATEAUXTABLE expectedDdiUpdateAuxTable = {}; expectedDdiUpdateAuxTable.BaseGpuVA = GmmHelper::canonize(wddm.getGfxPartition().Standard.Base); expectedDdiUpdateAuxTable.BaseResInfo = gmm->gmmResourceInfo->peekHandle(); expectedDdiUpdateAuxTable.DoNotWait = true; expectedDdiUpdateAuxTable.Map = true; EXPECT_CALL(*mockMngr, updateAuxTable(_)).Times(1).WillOnce(Invoke([&](const GMM_DDI_UPDATEAUXTABLE *arg) {givenDdiUpdateAuxTable = *arg; return GMM_SUCCESS; })); auto hwInfoMock = hardwareInfoTable[wddm.getGfxPlatform()->eProductFamily]; ASSERT_NE(nullptr, hwInfoMock); auto result = wddm.mapGpuVirtualAddress(gmm.get(), ALLOCATION_HANDLE, wddm.getGfxPartition().Standard.Base, wddm.getGfxPartition().Standard.Limit, 0u, gpuVa); ASSERT_TRUE(result); EXPECT_EQ(GmmHelper::canonize(wddm.getGfxPartition().Standard.Base), gpuVa); EXPECT_TRUE(memcmp(&expectedDdiUpdateAuxTable, &givenDdiUpdateAuxTable, sizeof(GMM_DDI_UPDATEAUXTABLE)) == 0); } TEST_F(MockWddmMemoryManagerTest, givenRenderCompressedAllocationWhenReleaseingThenUnmapAuxVa) { wddm->init(); WddmMemoryManager memoryManager(*executionEnvironment); D3DGPU_VIRTUAL_ADDRESS gpuVa = 123; auto mockMngr = new NiceMock(); executionEnvironment->rootDeviceEnvironments[1]->pageTableManager.reset(mockMngr); auto wddmAlloc = static_cast(memoryManager.allocateGraphicsMemoryWithProperties(AllocationProperties(1, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY))); wddmAlloc->setGpuAddress(gpuVa); wddmAlloc->getDefaultGmm()->isRenderCompressed = true; GMM_DDI_UPDATEAUXTABLE givenDdiUpdateAuxTable = {}; GMM_DDI_UPDATEAUXTABLE expectedDdiUpdateAuxTable = {}; expectedDdiUpdateAuxTable.BaseGpuVA = gpuVa; expectedDdiUpdateAuxTable.BaseResInfo = wddmAlloc->getDefaultGmm()->gmmResourceInfo->peekHandle(); expectedDdiUpdateAuxTable.DoNotWait = true; expectedDdiUpdateAuxTable.Map = false; EXPECT_CALL(*mockMngr, updateAuxTable(_)).Times(1).WillOnce(Invoke([&](const GMM_DDI_UPDATEAUXTABLE *arg) {givenDdiUpdateAuxTable = *arg; return GMM_SUCCESS; })); memoryManager.freeGraphicsMemory(wddmAlloc); EXPECT_TRUE(memcmp(&expectedDdiUpdateAuxTable, &givenDdiUpdateAuxTable, sizeof(GMM_DDI_UPDATEAUXTABLE)) == 0); } TEST_F(MockWddmMemoryManagerTest, givenNonRenderCompressedAllocationWhenReleaseingThenDontUnmapAuxVa) { wddm->init(); WddmMemoryManager memoryManager(*executionEnvironment); auto mockMngr = new NiceMock(); executionEnvironment->rootDeviceEnvironments[1]->pageTableManager.reset(mockMngr); auto wddmAlloc = static_cast(memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, MemoryConstants::pageSize})); wddmAlloc->getDefaultGmm()->isRenderCompressed = false; EXPECT_CALL(*mockMngr, updateAuxTable(_)).Times(0); memoryManager.freeGraphicsMemory(wddmAlloc); } TEST_F(MockWddmMemoryManagerTest, givenNonRenderCompressedAllocationWhenMappedGpuVaThenDontMapAuxVa) { auto rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[1].get(); std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), reinterpret_cast(123), 4096u, false)); gmm->isRenderCompressed = false; D3DGPU_VIRTUAL_ADDRESS gpuVa = 0; WddmMock wddm(*rootDeviceEnvironment); wddm.init(); auto mockMngr = new NiceMock(); rootDeviceEnvironment->pageTableManager.reset(mockMngr); EXPECT_CALL(*mockMngr, updateAuxTable(_)).Times(0); auto result = wddm.mapGpuVirtualAddress(gmm.get(), ALLOCATION_HANDLE, wddm.getGfxPartition().Standard.Base, wddm.getGfxPartition().Standard.Limit, 0u, gpuVa); ASSERT_TRUE(result); } TEST_F(MockWddmMemoryManagerTest, givenFailingAllocationWhenMappedGpuVaThenReturnFalse) { auto rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[1].get(); std::unique_ptr gmm(new Gmm(rootDeviceEnvironment->getGmmClientContext(), reinterpret_cast(123), 4096u, false)); gmm->isRenderCompressed = false; D3DGPU_VIRTUAL_ADDRESS gpuVa = 0; WddmMock wddm(*rootDeviceEnvironment); wddm.init(); auto result = wddm.mapGpuVirtualAddress(gmm.get(), 0, 0, 0, 0, gpuVa); ASSERT_FALSE(result); } TEST_F(MockWddmMemoryManagerTest, givenRenderCompressedFlagSetWhenInternalIsUnsetThenDontUpdateAuxTable) { D3DGPU_VIRTUAL_ADDRESS gpuVa = 0; wddm->init(); WddmMemoryManager memoryManager(*executionEnvironment); auto mockMngr = new NiceMock(); auto rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[1].get(); rootDeviceEnvironment->pageTableManager.reset(mockMngr); auto myGmm = new Gmm(rootDeviceEnvironment->getGmmClientContext(), reinterpret_cast(123), 4096u, false); myGmm->isRenderCompressed = false; myGmm->gmmResourceInfo->getResourceFlags()->Info.RenderCompressed = 1; auto wddmAlloc = static_cast(memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, MemoryConstants::pageSize})); delete wddmAlloc->getDefaultGmm(); wddmAlloc->setDefaultGmm(myGmm); EXPECT_CALL(*mockMngr, updateAuxTable(_)).Times(0); auto result = wddm->mapGpuVirtualAddress(myGmm, ALLOCATION_HANDLE, wddm->getGfxPartition().Standard.Base, wddm->getGfxPartition().Standard.Limit, 0u, gpuVa); EXPECT_TRUE(result); memoryManager.freeGraphicsMemory(wddmAlloc); } TEST_F(MockWddmMemoryManagerTest, givenRenderCompressedFlagSetWhenInternalIsSetThenUpdateAuxTable) { D3DGPU_VIRTUAL_ADDRESS gpuVa = 0; wddm->init(); WddmMemoryManager memoryManager(*executionEnvironment); auto mockMngr = new NiceMock(); auto rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[1].get(); rootDeviceEnvironment->pageTableManager.reset(mockMngr); auto myGmm = new Gmm(rootDeviceEnvironment->getGmmClientContext(), reinterpret_cast(123), 4096u, false); myGmm->isRenderCompressed = true; myGmm->gmmResourceInfo->getResourceFlags()->Info.RenderCompressed = 1; auto wddmAlloc = static_cast(memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, MemoryConstants::pageSize})); delete wddmAlloc->getDefaultGmm(); wddmAlloc->setDefaultGmm(myGmm); EXPECT_CALL(*mockMngr, updateAuxTable(_)).Times(1); auto result = wddm->mapGpuVirtualAddress(myGmm, ALLOCATION_HANDLE, wddm->getGfxPartition().Standard.Base, wddm->getGfxPartition().Standard.Limit, 0u, gpuVa); EXPECT_TRUE(result); memoryManager.freeGraphicsMemory(wddmAlloc); } TEST_F(WddmMemoryManagerTest2, givenReadOnlyMemoryWhenCreateAllocationFailsThenPopulateOsHandlesReturnsInvalidPointer) { OsHandleStorage handleStorage; handleStorage.fragmentCount = 1; handleStorage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); handleStorage.fragmentStorageData[0].fragmentSize = 0x1000; handleStorage.fragmentStorageData[0].freeTheFragment = false; EXPECT_CALL(*wddm, createAllocationsAndMapGpuVa(::testing::_)).WillOnce(::testing::Return(STATUS_GRAPHICS_NO_VIDEO_MEMORY)); auto result = memoryManager->populateOsHandles(handleStorage, 0); EXPECT_EQ(MemoryManager::AllocationStatus::InvalidHostPointer, result); handleStorage.fragmentStorageData[0].freeTheFragment = true; memoryManager->cleanOsHandles(handleStorage, 0); } TEST_F(WddmMemoryManagerTest2, givenReadOnlyMemoryPassedToPopulateOsHandlesWhenCreateAllocationFailsThenAllocatedFragmentsAreNotStored) { OsHandleStorage handleStorage; OsHandle handle; handleStorage.fragmentCount = 2; handleStorage.fragmentStorageData[0].osHandleStorage = &handle; handleStorage.fragmentStorageData[0].cpuPtr = reinterpret_cast(0x1000); handleStorage.fragmentStorageData[0].fragmentSize = 0x1000; handleStorage.fragmentStorageData[1].cpuPtr = reinterpret_cast(0x2000); handleStorage.fragmentStorageData[1].fragmentSize = 0x6000; EXPECT_CALL(*wddm, createAllocationsAndMapGpuVa(::testing::_)).WillOnce(::testing::Return(STATUS_GRAPHICS_NO_VIDEO_MEMORY)); auto result = memoryManager->populateOsHandles(handleStorage, 0); auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); EXPECT_EQ(MemoryManager::AllocationStatus::InvalidHostPointer, result); auto numberOfStoredFragments = hostPtrManager->getFragmentCount(); EXPECT_EQ(0u, numberOfStoredFragments); EXPECT_EQ(nullptr, hostPtrManager->getFragment(handleStorage.fragmentStorageData[1].cpuPtr)); handleStorage.fragmentStorageData[1].freeTheFragment = true; memoryManager->cleanOsHandles(handleStorage, 0); } TEST(WddmMemoryManagerCleanupTest, givenUsedTagAllocationInWddmMemoryManagerWhenCleanupMemoryManagerThenDontAccessCsr) { ExecutionEnvironment &executionEnvironment = *platform()->peekExecutionEnvironment(); auto csr = std::unique_ptr(createCommandStream(executionEnvironment, 0)); auto wddm = new WddmMock(*executionEnvironment.rootDeviceEnvironments[0].get()); auto preemptionMode = PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo); wddm->init(); executionEnvironment.rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment.rootDeviceEnvironments[0]->osInterface->get()->setWddm(wddm); executionEnvironment.rootDeviceEnvironments[0]->memoryOperationsInterface = std::make_unique(wddm); executionEnvironment.memoryManager = std::make_unique(executionEnvironment); auto osContext = executionEnvironment.memoryManager->createAndRegisterOsContext(csr.get(), aub_stream::ENGINE_RCS, 1, preemptionMode, false, false, false); csr->setupContext(*osContext); auto tagAllocator = csr->getEventPerfCountAllocator(100); auto allocation = tagAllocator->getTag()->getBaseGraphicsAllocation(); allocation->updateTaskCount(1, csr->getOsContext().getContextId()); csr.reset(); EXPECT_NO_THROW(executionEnvironment.memoryManager.reset()); } TEST_F(WddmMemoryManagerSimpleTest, whenDestroyingLockedAllocationThatDoesntNeedMakeResidentBeforeLockThenDontEvictAllocationFromWddmTemporaryResources) { auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); memoryManager->lockResource(allocation); EXPECT_FALSE(allocation->needsMakeResidentBeforeLock); memoryManager->freeGraphicsMemory(allocation); EXPECT_EQ(0u, mockTemporaryResources->evictResourceResult.called); } TEST_F(WddmMemoryManagerSimpleTest, whenDestroyingNotLockedAllocationThatDoesntNeedMakeResidentBeforeLockThenDontEvictAllocationFromWddmTemporaryResources) { auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); EXPECT_FALSE(allocation->isLocked()); EXPECT_FALSE(allocation->needsMakeResidentBeforeLock); memoryManager->freeGraphicsMemory(allocation); EXPECT_EQ(0u, mockTemporaryResources->evictResourceResult.called); } TEST_F(WddmMemoryManagerSimpleTest, whenDestroyingLockedAllocationThatNeedsMakeResidentBeforeLockThenRemoveTemporaryResource) { auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); allocation->needsMakeResidentBeforeLock = true; memoryManager->lockResource(allocation); memoryManager->freeGraphicsMemory(allocation); EXPECT_EQ(1u, mockTemporaryResources->removeResourceResult.called); } TEST_F(WddmMemoryManagerSimpleTest, whenDestroyingNotLockedAllocationThatNeedsMakeResidentBeforeLockThenDontEvictAllocationFromWddmTemporaryResources) { auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); allocation->needsMakeResidentBeforeLock = true; EXPECT_FALSE(allocation->isLocked()); memoryManager->freeGraphicsMemory(allocation); EXPECT_EQ(0u, mockTemporaryResources->evictResourceResult.called); } TEST_F(WddmMemoryManagerSimpleTest, whenDestroyingAllocationWithReservedGpuVirtualAddressThenReleaseTheAddress) { auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize})); uint64_t gpuAddress = 0x123; uint64_t sizeForFree = 0x1234; allocation->reservedGpuVirtualAddress = gpuAddress; allocation->reservedSizeForGpuVirtualAddress = sizeForFree; memoryManager->freeGraphicsMemory(allocation); EXPECT_EQ(1u, wddm->freeGpuVirtualAddressResult.called); EXPECT_EQ(gpuAddress, wddm->freeGpuVirtualAddressResult.uint64ParamPassed); EXPECT_EQ(sizeForFree, wddm->freeGpuVirtualAddressResult.sizePassed); } TEST_F(WddmMemoryManagerSimpleTest, givenAllocationWithReservedGpuVirtualAddressWhenMapCallFailsDuringCreateWddmAllocationThenReleasePreferredAddress) { MockWddmAllocation allocation; allocation.setAllocationType(GraphicsAllocation::AllocationType::KERNEL_ISA); uint64_t gpuAddress = 0x123; uint64_t sizeForFree = 0x1234; allocation.reservedGpuVirtualAddress = gpuAddress; allocation.reservedSizeForGpuVirtualAddress = sizeForFree; wddm->callBaseMapGpuVa = false; wddm->mapGpuVaStatus = false; memoryManager->createWddmAllocation(&allocation, nullptr); EXPECT_EQ(1u, wddm->freeGpuVirtualAddressResult.called); EXPECT_EQ(gpuAddress, wddm->freeGpuVirtualAddressResult.uint64ParamPassed); EXPECT_EQ(sizeForFree, wddm->freeGpuVirtualAddressResult.sizePassed); } TEST_F(WddmMemoryManagerSimpleTest, givenSvmCpuAllocationWhenSizeAndAlignmentProvidedThenAllocateMemoryReserveGpuVa) { size_t size = 2 * MemoryConstants::megaByte; MockAllocationProperties properties{csr->getRootDeviceIndex(), true, size, GraphicsAllocation::AllocationType::SVM_CPU}; properties.alignment = size; auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(properties)); ASSERT_NE(nullptr, allocation); EXPECT_EQ(size, allocation->getUnderlyingBufferSize()); EXPECT_NE(nullptr, allocation->getUnderlyingBuffer()); EXPECT_EQ(allocation->getUnderlyingBuffer(), allocation->getDriverAllocatedCpuPtr()); //limited platforms will not use heap HeapIndex::HEAP_SVM if (executionEnvironment->rootDeviceEnvironments[allocation->getRootDeviceIndex()]->isFullRangeSvm()) { EXPECT_EQ(alignUp(allocation->getReservedAddressPtr(), size), reinterpret_cast(allocation->getGpuAddress())); } EXPECT_EQ((2 * size), allocation->getReservedAddressSize()); memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, givenWriteCombinedAllocationThenCpuAddressIsEqualToGpuAddress) { if (is32bit) { GTEST_SKIP(); } memoryManager.reset(new MockWddmMemoryManager(true, true, *executionEnvironment)); size_t size = 2 * MemoryConstants::megaByte; auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties({0, size, GraphicsAllocation::AllocationType::WRITE_COMBINED})); ASSERT_NE(nullptr, allocation); EXPECT_EQ(size, allocation->getUnderlyingBufferSize()); EXPECT_NE(nullptr, allocation->getUnderlyingBuffer()); EXPECT_NE(nullptr, reinterpret_cast(allocation->getGpuAddress())); if (executionEnvironment->rootDeviceEnvironments[allocation->getRootDeviceIndex()]->isFullRangeSvm()) { EXPECT_EQ(allocation->getUnderlyingBuffer(), reinterpret_cast(allocation->getGpuAddress())); } memoryManager->freeGraphicsMemory(allocation); } TEST_F(WddmMemoryManagerSimpleTest, whenCreatingWddmMemoryManagerThenSupportsMultiStorageResourcesFlagIsSetToFalse) { EXPECT_TRUE(memoryManager->supportsMultiStorageResources); } TEST_F(WddmMemoryManagerSimpleTest, givenBufferHostMemoryAllocationAndLimitedRangeAnd32BitThenAllocationGoesToSvmHeap) { if (executionEnvironment->rootDeviceEnvironments[0]->isFullRangeSvm()) { GTEST_SKIP(); } memoryManager.reset(new MockWddmMemoryManager(true, true, *executionEnvironment)); size_t size = 2 * MemoryConstants::megaByte; auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties({0, size, GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY})); ASSERT_NE(nullptr, allocation); EXPECT_EQ(size, allocation->getUnderlyingBufferSize()); EXPECT_NE(nullptr, allocation->getUnderlyingBuffer()); EXPECT_NE(nullptr, reinterpret_cast(allocation->getGpuAddress())); auto heap = is32bit ? HeapIndex::HEAP_SVM : HeapIndex::HEAP_STANDARD; EXPECT_LT(GmmHelper::canonize(memoryManager->getGfxPartition(0)->getHeapBase(heap)), allocation->getGpuAddress()); EXPECT_GT(GmmHelper::canonize(memoryManager->getGfxPartition(0)->getHeapLimit(heap)), allocation->getGpuAddress()); memoryManager->freeGraphicsMemory(allocation); } TEST(WddmMemoryManager, givenMultipleRootDeviceWhenMemoryManagerGetsWddmThenWddmIsFromCorrectRootDevice) { DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleRootDevices.set(4); VariableBackup backup{&ultHwConfig}; ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; auto executionEnvironment = platform()->peekExecutionEnvironment(); prepareDeviceEnvironments(*executionEnvironment); MockWddmMemoryManager wddmMemoryManager(*executionEnvironment); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { auto wddmFromRootDevice = executionEnvironment->rootDeviceEnvironments[i]->osInterface->get()->getWddm(); EXPECT_EQ(wddmFromRootDevice, &wddmMemoryManager.getWddm(i)); } } TEST(WddmMemoryManager, givenMultipleRootDeviceWhenCreateMemoryManagerThenTakeMaxMallocRestrictionAvailable) { uint32_t numRootDevices = 4u; DebugManagerStateRestore restorer; DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices); VariableBackup backup{&ultHwConfig}; ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; auto executionEnvironment = platform()->peekExecutionEnvironment(); prepareDeviceEnvironments(*executionEnvironment); for (auto i = 0u; i < numRootDevices; i++) { auto wddm = static_cast(executionEnvironment->rootDeviceEnvironments[i]->osInterface->get()->getWddm()); wddm->minAddress = i * (numRootDevices - i); } MockWddmMemoryManager wddmMemoryManager(*executionEnvironment); EXPECT_EQ(4u, wddmMemoryManager.getAlignedMallocRestrictions()->minAddress); } TEST(WddmMemoryManager, givenNoLocalMemoryOnAnyDeviceWhenIsCpuCopyRequiredIsCalledThenFalseIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.EnableLocalMemory.set(false); VariableBackup backup{&ultHwConfig}; ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; auto executionEnvironment = platform()->peekExecutionEnvironment(); prepareDeviceEnvironments(*executionEnvironment); MockWddmMemoryManager wddmMemoryManager(*executionEnvironment); EXPECT_FALSE(wddmMemoryManager.isCpuCopyRequired(&restorer)); } TEST(WddmMemoryManager, givenLocalPointerPassedToIsCpuCopyRequiredThenFalseIsReturned) { auto executionEnvironment = platform()->peekExecutionEnvironment(); VariableBackup backup{&ultHwConfig}; ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; prepareDeviceEnvironments(*executionEnvironment); MockWddmMemoryManager wddmMemoryManager(*executionEnvironment); EXPECT_FALSE(wddmMemoryManager.isCpuCopyRequired(&backup)); //call multiple times to make sure that result is constant EXPECT_FALSE(wddmMemoryManager.isCpuCopyRequired(&backup)); EXPECT_FALSE(wddmMemoryManager.isCpuCopyRequired(&backup)); EXPECT_FALSE(wddmMemoryManager.isCpuCopyRequired(&backup)); EXPECT_FALSE(wddmMemoryManager.isCpuCopyRequired(&backup)); } compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/wddm_memory_manager_tests.h000066400000000000000000000170241363734646600334010ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/windows/os_environment_win.h" #include "shared/source/os_interface/windows/os_interface.h" #include "shared/source/os_interface/windows/wddm_memory_operations_handler.h" #include "shared/test/unit_test/os_interface/windows/mock_gdi_interface.h" #include "opencl/test/unit_test/helpers/execution_environment_helper.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_gmm.h" #include "opencl/test/unit_test/mocks/mock_gmm_page_table_mngr.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_wddm_residency_allocations_container.h" #include "opencl/test/unit_test/os_interface/windows/mock_wddm_memory_manager.h" #include "opencl/test/unit_test/os_interface/windows/wddm_fixture.h" #include "test.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include using namespace NEO; using namespace ::testing; class WddmMemoryManagerFixture : public GdiDllFixture { public: void SetUp() override; void TearDown() override { GdiDllFixture::TearDown(); } ExecutionEnvironment *executionEnvironment; RootDeviceEnvironment *rootDeviceEnvironment = nullptr; std::unique_ptr memoryManager; WddmMock *wddm = nullptr; const uint32_t rootDeviceIndex = 0u; }; typedef ::Test WddmMemoryManagerTest; class MockWddmMemoryManagerFixture { public: void SetUp() { executionEnvironment = platform()->peekExecutionEnvironment(); rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[0].get(); auto osEnvironment = new OsEnvironmentWin(); gdi = new MockGdi(); osEnvironment->gdi.reset(gdi); executionEnvironment->osEnvironment.reset(osEnvironment); wddm = static_cast(Wddm::createWddm(nullptr, *rootDeviceEnvironment)); constexpr uint64_t heap32Base = (is32bit) ? 0x1000 : 0x800000000000; wddm->setHeap32(heap32Base, 1000 * MemoryConstants::pageSize - 1); wddm->init(); rootDeviceEnvironment->osInterface.reset(new OSInterface()); rootDeviceEnvironment->osInterface->get()->setWddm(wddm); rootDeviceEnvironment->memoryOperationsInterface = std::make_unique(wddm); executionEnvironment->initializeMemoryManager(); memoryManager = std::make_unique(*executionEnvironment); csr.reset(createCommandStream(*executionEnvironment, 0u)); auto hwInfo = rootDeviceEnvironment->getHardwareInfo(); osContext = memoryManager->createAndRegisterOsContext(csr.get(), HwHelper::get(hwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*hwInfo)[0], 1, PreemptionHelper::getDefaultPreemptionMode(*hwInfo), false, false, false); osContext->incRefInternal(); mockTemporaryResources = reinterpret_cast(wddm->getTemporaryResourcesContainer()); } void TearDown() { osContext->decRefInternal(); } RootDeviceEnvironment *rootDeviceEnvironment = nullptr; ExecutionEnvironment *executionEnvironment; std::unique_ptr memoryManager; std::unique_ptr csr; WddmMock *wddm = nullptr; MockWddmResidentAllocationsContainer *mockTemporaryResources; OsContext *osContext = nullptr; MockGdi *gdi = nullptr; }; typedef ::Test WddmMemoryManagerResidencyTest; class ExecutionEnvironmentFixture : public ::testing::Test { public: ExecutionEnvironmentFixture() { executionEnvironment = platform()->peekExecutionEnvironment(); } ExecutionEnvironment *executionEnvironment; }; class WddmMemoryManagerFixtureWithGmockWddm : public ExecutionEnvironmentFixture { public: MockWddmMemoryManager *memoryManager = nullptr; void SetUp() override { // wddm is deleted by memory manager wddm = new NiceMock(*executionEnvironment->rootDeviceEnvironments[0].get()); executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(); ASSERT_NE(nullptr, wddm); auto preemptionMode = PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo); wddm->init(); executionEnvironment->rootDeviceEnvironments[0]->osInterface->get()->setWddm(wddm); executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface = std::make_unique(wddm); osInterface = executionEnvironment->rootDeviceEnvironments[0]->osInterface.get(); memoryManager = new (std::nothrow) MockWddmMemoryManager(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); //assert we have memory manager ASSERT_NE(nullptr, memoryManager); csr.reset(createCommandStream(*executionEnvironment, 0u)); auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(); osContext = memoryManager->createAndRegisterOsContext(csr.get(), HwHelper::get(hwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*hwInfo)[0], 1, preemptionMode, false, false, false); osContext->incRefInternal(); ON_CALL(*wddm, createAllocationsAndMapGpuVa(::testing::_)).WillByDefault(::testing::Invoke(wddm, &GmockWddm::baseCreateAllocationAndMapGpuVa)); } void TearDown() override { osContext->decRefInternal(); } NiceMock *wddm = nullptr; std::unique_ptr csr; OSInterface *osInterface; OsContext *osContext; }; using WddmMemoryManagerTest2 = WddmMemoryManagerFixtureWithGmockWddm; class BufferWithWddmMemory : public ::testing::Test, public WddmMemoryManagerFixture { public: protected: void SetUp() { WddmMemoryManagerFixture::SetUp(); tmp = context.getMemoryManager(); context.memoryManager = memoryManager.get(); flags = 0; } void TearDown() { context.memoryManager = tmp; WddmMemoryManagerFixture::TearDown(); } MemoryManager *tmp; MockContext context; cl_mem_flags flags; cl_int retVal; }; class WddmMemoryManagerSimpleTest : public MockWddmMemoryManagerFixture, public ::testing::Test { public: void SetUp() override { MockWddmMemoryManagerFixture::SetUp(); } void TearDown() override { MockWddmMemoryManagerFixture::TearDown(); } }; class MockWddmMemoryManagerTest : public ::testing::Test { public: void SetUp() override { executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 2); wddm = new WddmMock(*executionEnvironment->rootDeviceEnvironments[1].get()); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->osInterface->get()->setWddm(wddm); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface = std::make_unique(wddm); } HardwareInfo *hwInfo = nullptr; WddmMock *wddm = nullptr; ExecutionEnvironment *executionEnvironment = nullptr; const uint32_t rootDeviceIndex = 0u; }; using OsAgnosticMemoryManagerUsingWddmTest = MockWddmMemoryManagerTest; wddm_residency_controller_tests.cpp000066400000000000000000001460571363734646600351140ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/memory_operations_handler.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/windows/os_context_win.h" #include "shared/source/os_interface/windows/os_interface.h" #include "shared/source/os_interface/windows/wddm/wddm_interface.h" #include "shared/source/os_interface/windows/wddm_memory_operations_handler.h" #include "shared/source/os_interface/windows/wddm_residency_controller.h" #include "shared/test/unit_test/os_interface/windows/mock_gdi_interface.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/libult/create_command_stream.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_wddm.h" #include "opencl/test/unit_test/os_interface/windows/mock_wddm_allocation.h" #include "opencl/test/unit_test/os_interface/windows/mock_wddm_memory_manager.h" #include "test.h" #include "gmock/gmock.h" #include using namespace NEO; class MockWddmResidencyController : public WddmResidencyController { public: using WddmResidencyController::lastTrimFenceValue; using WddmResidencyController::trimCallbackHandle; using WddmResidencyController::trimCandidateList; using WddmResidencyController::trimCandidatesCount; using WddmResidencyController::trimResidency; using WddmResidencyController::trimResidencyToBudget; using WddmResidencyController::WddmResidencyController; uint32_t acquireLockCallCount = 0u; std::unique_lock acquireLock() override { acquireLockCallCount++; return WddmResidencyController::acquireLock(); } }; class MockOsContextWin : public OsContextWin { public: MockOsContextWin(Wddm &wddm, uint32_t contextId, DeviceBitfield deviceBitfield, aub_stream::EngineType engineType, PreemptionMode preemptionMode, bool lowPriority, bool internalEngine, bool rootDevice) : OsContextWin(wddm, contextId, deviceBitfield, engineType, preemptionMode, lowPriority, internalEngine, rootDevice), mockResidencyController(wddm, contextId) {} WddmResidencyController &getResidencyController() override { return mockResidencyController; }; MockWddmResidencyController mockResidencyController; }; struct WddmResidencyControllerTest : ::testing::Test { const uint32_t osContextId = 0u; void SetUp() { executionEnvironment = std::make_unique(); rootDeviceEnvironment = std::make_unique(*executionEnvironment); wddm = std::unique_ptr(static_cast(Wddm::createWddm(nullptr, *rootDeviceEnvironment))); wddm->init(); mockOsContextWin = std::make_unique(*wddm, osContextId, 0, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); wddm->getWddmInterface()->createMonitoredFence(*mockOsContextWin); residencyController = &mockOsContextWin->mockResidencyController; } std::unique_ptr executionEnvironment; std::unique_ptr rootDeviceEnvironment; std::unique_ptr wddm; std::unique_ptr mockOsContextWin; MockWddmResidencyController *residencyController = nullptr; }; struct WddmResidencyControllerWithGdiTest : ::testing::Test { const uint32_t osContextId = 0u; void SetUp() { executionEnvironment = std::make_unique(); rootDeviceEnvironment = std::make_unique(*executionEnvironment); wddm = std::unique_ptr(static_cast(Wddm::createWddm(nullptr, *rootDeviceEnvironment))); gdi = new MockGdi(); wddm->resetGdi(gdi); wddm->init(); mockOsContextWin = std::make_unique(*wddm, osContextId, 0, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); wddm->getWddmInterface()->createMonitoredFence(*mockOsContextWin); residencyController = &mockOsContextWin->mockResidencyController; residencyController->registerCallback(); } std::unique_ptr executionEnvironment; std::unique_ptr rootDeviceEnvironment; std::unique_ptr wddm; std::unique_ptr mockOsContextWin; MockWddmResidencyController *residencyController = nullptr; MockGdi *gdi; }; struct WddmResidencyControllerWithMockWddmTest : public WddmResidencyControllerTest { void SetUp() { executionEnvironment = platform()->peekExecutionEnvironment(); wddm = new ::testing::NiceMock(*executionEnvironment->rootDeviceEnvironments[0].get()); wddm->resetGdi(new MockGdi()); auto preemptionMode = PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo); wddm->init(); executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[0]->osInterface->get()->setWddm(wddm); executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface = std::make_unique(wddm); executionEnvironment->initializeMemoryManager(); memoryManager = std::make_unique(*executionEnvironment); csr.reset(createCommandStream(*executionEnvironment, 0u)); auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(); osContext = memoryManager->createAndRegisterOsContext(csr.get(), HwHelper::get(hwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*hwInfo)[0], 1, preemptionMode, false, false, false); osContext->incRefInternal(); residencyController = &static_cast(osContext)->getResidencyController(); } void TearDown() { osContext->decRefInternal(); } ExecutionEnvironment *executionEnvironment; std::unique_ptr memoryManager; std::unique_ptr csr; ::testing::NiceMock *wddm = nullptr; OsContext *osContext; WddmResidencyController *residencyController; }; struct WddmResidencyControllerWithGdiAndMemoryManagerTest : ::testing::Test { const uint32_t osContextId = 0u; void SetUp() { executionEnvironment = platform()->peekExecutionEnvironment(); wddm = static_cast(Wddm::createWddm(nullptr, *executionEnvironment->rootDeviceEnvironments[0].get())); wddm->init(); gdi = new MockGdi(); wddm->resetGdi(gdi); executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique(); executionEnvironment->rootDeviceEnvironments[0]->osInterface->get()->setWddm(wddm); executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface = std::make_unique(wddm); executionEnvironment->initializeMemoryManager(); memoryManager = std::make_unique(*executionEnvironment); csr.reset(createCommandStream(*executionEnvironment, 0u)); auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getHardwareInfo(); osContext = memoryManager->createAndRegisterOsContext(csr.get(), HwHelper::get(hwInfo->platform.eRenderCoreFamily).getGpgpuEngineInstances(*hwInfo)[0], 1, PreemptionHelper::getDefaultPreemptionMode(*hwInfo), false, false, false); osContext->incRefInternal(); residencyController = &static_cast(osContext)->getResidencyController(); } void TearDown() { osContext->decRefInternal(); } ExecutionEnvironment *executionEnvironment; std::unique_ptr memoryManager; std::unique_ptr csr; WddmMock *wddm = nullptr; OsContext *osContext = nullptr; MockGdi *gdi = nullptr; WddmResidencyController *residencyController = nullptr; }; TEST(WddmResidencyController, givenWddmResidencyControllerWhenItIsConstructedThenDoNotRegisterTrimCallback) { MockExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(1); auto gdi = new MockGdi(); auto wddm = std::unique_ptr{static_cast(Wddm::createWddm(nullptr, *executionEnvironment.rootDeviceEnvironments[0].get()))}; wddm->resetGdi(gdi); wddm->init(); std::memset(&gdi->getRegisterTrimNotificationArg(), 0, sizeof(D3DKMT_REGISTERTRIMNOTIFICATION)); MockWddmResidencyController residencyController{*wddm, 0u}; EXPECT_EQ(0u, wddm->registerTrimCallbackResult.called); EXPECT_EQ(nullptr, residencyController.trimCallbackHandle); EXPECT_EQ(nullptr, gdi->getRegisterTrimNotificationArg().Callback); EXPECT_EQ(nullptr, gdi->getRegisterTrimNotificationArg().Context); EXPECT_EQ(0u, gdi->getRegisterTrimNotificationArg().hDevice); } TEST(WddmResidencyController, givenWddmResidencyControllerWhenRegisterCallbackThenCallbackIsSetUpProperly) { MockExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(1); auto gdi = new MockGdi(); auto wddm = std::unique_ptr{static_cast(Wddm::createWddm(nullptr, *executionEnvironment.rootDeviceEnvironments[0].get()))}; wddm->resetGdi(gdi); wddm->init(); std::memset(&gdi->getRegisterTrimNotificationArg(), 0, sizeof(D3DKMT_REGISTERTRIMNOTIFICATION)); WddmResidencyController residencyController{*wddm, 0u}; residencyController.registerCallback(); EXPECT_EQ(1u, wddm->registerTrimCallbackResult.called); EXPECT_EQ(reinterpret_cast(WddmResidencyController::trimCallback), gdi->getRegisterTrimNotificationArg().Callback); EXPECT_EQ(reinterpret_cast(&residencyController), gdi->getRegisterTrimNotificationArg().Context); EXPECT_EQ(wddm->getDevice(), gdi->getRegisterTrimNotificationArg().hDevice); } TEST_F(WddmResidencyControllerTest, givenWddmResidencyControllerWhenCallingWasAllocationUsedSinceLastTrimThenReturnCorrectValues) { residencyController->lastTrimFenceValue = 100; EXPECT_FALSE(residencyController->wasAllocationUsedSinceLastTrim(99)); EXPECT_FALSE(residencyController->wasAllocationUsedSinceLastTrim(99)); EXPECT_TRUE(residencyController->wasAllocationUsedSinceLastTrim(101)); } TEST_F(WddmResidencyControllerTest, givenWddmResidencyControllerThenUpdateLastTrimFenceValueUsesMonitoredFence) { *residencyController->getMonitoredFence().cpuAddress = 1234; residencyController->updateLastTrimFenceValue(); EXPECT_EQ(1234, residencyController->lastTrimFenceValue); *residencyController->getMonitoredFence().cpuAddress = 12345; residencyController->updateLastTrimFenceValue(); EXPECT_EQ(12345, residencyController->lastTrimFenceValue); } TEST_F(WddmResidencyControllerWithGdiTest, givenWddmResidencyControllerWhenItIsDestructedThenUnregisterTrimCallback) { auto trimCallbackHandle = residencyController->trimCallbackHandle; auto trimCallbackAddress = reinterpret_cast(WddmResidencyController::trimCallback); std::memset(&gdi->getUnregisterTrimNotificationArg(), 0, sizeof(D3DKMT_UNREGISTERTRIMNOTIFICATION)); mockOsContextWin.reset(); EXPECT_EQ(trimCallbackAddress, gdi->getUnregisterTrimNotificationArg().Callback); EXPECT_EQ(trimCallbackHandle, gdi->getUnregisterTrimNotificationArg().Handle); } TEST_F(WddmResidencyControllerTest, givenUsedAllocationWhenCallingRemoveFromTrimCandidateListIfUsedThenRemoveIt) { MockWddmAllocation allocation; residencyController->addToTrimCandidateList(&allocation); residencyController->removeFromTrimCandidateListIfUsed(&allocation, false); EXPECT_EQ(trimListUnusedPosition, allocation.getTrimCandidateListPosition(osContextId)); } TEST_F(WddmResidencyControllerTest, givenWddmResidencyControllerWhenIsMemoryExhaustedIsCalledThenReturnCorrectResult) { EXPECT_FALSE(residencyController->isMemoryBudgetExhausted()); residencyController->setMemoryBudgetExhausted(); EXPECT_TRUE(residencyController->isMemoryBudgetExhausted()); } TEST_F(WddmResidencyControllerTest, givenUnusedAllocationWhenCallingRemoveFromTrimCandidateListIfUsedThenIgnore) { MockWddmAllocation allocation; residencyController->removeFromTrimCandidateListIfUsed(&allocation, false); EXPECT_EQ(trimListUnusedPosition, allocation.getTrimCandidateListPosition(osContextId)); } TEST_F(WddmResidencyControllerTest, addToTrimCandidateListPlacesAllocationInContainerAndAssignsPosition) { MockWddmAllocation allocation; residencyController->addToTrimCandidateList(&allocation); EXPECT_NE(0u, residencyController->trimCandidateList.size()); EXPECT_NE(trimListUnusedPosition, allocation.getTrimCandidateListPosition(osContextId)); size_t position = allocation.getTrimCandidateListPosition(osContextId); ASSERT_LT(position, residencyController->trimCandidateList.size()); EXPECT_EQ(&allocation, residencyController->trimCandidateList[position]); } TEST_F(WddmResidencyControllerTest, addToTrimCandidateListDoesNotInsertAllocationAlreadyOnTheList) { MockWddmAllocation allocation; residencyController->trimCandidateList.resize(0); residencyController->addToTrimCandidateList(&allocation); EXPECT_NE(trimListUnusedPosition, allocation.getTrimCandidateListPosition(osContextId)); size_t position = allocation.getTrimCandidateListPosition(osContextId); ASSERT_LT(position, residencyController->trimCandidateList.size()); EXPECT_EQ(&allocation, residencyController->trimCandidateList[position]); size_t previousSize = residencyController->trimCandidateList.size(); residencyController->addToTrimCandidateList(&allocation); EXPECT_EQ(previousSize, residencyController->trimCandidateList.size()); EXPECT_EQ(position, allocation.getTrimCandidateListPosition(osContextId)); } TEST_F(WddmResidencyControllerTest, removeFromTrimCandidateListAssignsUnusedPosition) { MockWddmAllocation allocation; residencyController->addToTrimCandidateList(&allocation); residencyController->removeFromTrimCandidateList(&allocation, false); EXPECT_EQ(trimListUnusedPosition, allocation.getTrimCandidateListPosition(osContextId)); } TEST_F(WddmResidencyControllerTest, removeFromTrimCandidateListRemovesAllocationInAssignedPosition) { MockWddmAllocation allocation; residencyController->addToTrimCandidateList(&allocation); size_t position = allocation.getTrimCandidateListPosition(osContextId); residencyController->removeFromTrimCandidateList(&allocation, false); if (residencyController->trimCandidateList.size() > position) { EXPECT_NE(&allocation, residencyController->trimCandidateList[position]); } } TEST_F(WddmResidencyControllerTest, removeFromTrimCandidateListRemovesLastAllocation) { MockWddmAllocation allocation; residencyController->trimCandidateList.resize(0); residencyController->addToTrimCandidateList(&allocation); residencyController->removeFromTrimCandidateList(&allocation, false); EXPECT_EQ(0u, residencyController->trimCandidateList.size()); } TEST_F(WddmResidencyControllerTest, removeFromTrimCandidateListRemovesLastAllocationAndAllPreviousEmptyEntries) { MockWddmAllocation allocation1, allocation2; residencyController->trimCandidateList.resize(0); residencyController->addToTrimCandidateList(&allocation1); residencyController->trimCandidateList.push_back(nullptr); residencyController->trimCandidateList.push_back(nullptr); residencyController->trimCandidateList.push_back(nullptr); residencyController->addToTrimCandidateList(&allocation2); EXPECT_EQ(5u, residencyController->trimCandidateList.size()); residencyController->removeFromTrimCandidateList(&allocation2, false); EXPECT_EQ(1u, residencyController->trimCandidateList.size()); } TEST_F(WddmResidencyControllerTest, successiveAddingToTrimCandidateListAssignsNewPositions) { MockWddmAllocation allocation1, allocation2, allocation3; residencyController->addToTrimCandidateList(&allocation1); residencyController->addToTrimCandidateList(&allocation2); residencyController->addToTrimCandidateList(&allocation3); EXPECT_EQ(3u, residencyController->trimCandidateList.size()); EXPECT_NE(allocation1.getTrimCandidateListPosition(osContextId), allocation2.getTrimCandidateListPosition(osContextId)); EXPECT_NE(allocation2.getTrimCandidateListPosition(osContextId), allocation3.getTrimCandidateListPosition(osContextId)); } TEST_F(WddmResidencyControllerTest, DISABLED_removingNotLastAllocationFromTrimCandidateListSubstituesLastPositionAllocation) { MockWddmAllocation allocation1, allocation2, allocation3; residencyController->addToTrimCandidateList(&allocation1); residencyController->addToTrimCandidateList(&allocation2); residencyController->addToTrimCandidateList(&allocation3); residencyController->removeFromTrimCandidateList(&allocation2, false); EXPECT_EQ(2u, residencyController->trimCandidateList.size()); EXPECT_EQ(2u, allocation3.getTrimCandidateListPosition(osContextId)); EXPECT_NE(allocation2.getTrimCandidateListPosition(osContextId), allocation3.getTrimCandidateListPosition(osContextId)); } TEST_F(WddmResidencyControllerTest, removingNotLastAllocationFromTrimCandidateListPutsNullEntry) { MockWddmAllocation allocation1, allocation2, allocation3; residencyController->addToTrimCandidateList(&allocation1); residencyController->addToTrimCandidateList(&allocation2); residencyController->addToTrimCandidateList(&allocation3); size_t position2 = allocation2.getTrimCandidateListPosition(osContextId); size_t position3 = allocation3.getTrimCandidateListPosition(osContextId); residencyController->removeFromTrimCandidateList(&allocation2, false); EXPECT_EQ(3u, residencyController->trimCandidateList.size()); EXPECT_EQ(2u, position3); EXPECT_EQ(nullptr, residencyController->trimCandidateList[position2]); } TEST_F(WddmResidencyControllerTest, compactTrimCandidateListRemovesInitialNullEntriesAndUpdatesPositions) { MockWddmAllocation allocation1, allocation2, allocation3, allocation4; residencyController->addToTrimCandidateList(&allocation1); residencyController->addToTrimCandidateList(&allocation2); residencyController->addToTrimCandidateList(&allocation3); residencyController->addToTrimCandidateList(&allocation4); allocation3.getTrimCandidateListPosition(osContextId); allocation4.getTrimCandidateListPosition(osContextId); residencyController->removeFromTrimCandidateList(&allocation2, false); residencyController->removeFromTrimCandidateList(&allocation1, false); EXPECT_EQ(4u, residencyController->trimCandidateList.size()); residencyController->compactTrimCandidateList(); EXPECT_EQ(2u, residencyController->trimCandidateList.size()); EXPECT_EQ(residencyController->trimCandidateList[0], &allocation3); EXPECT_EQ(0u, allocation3.getTrimCandidateListPosition(osContextId)); EXPECT_EQ(residencyController->trimCandidateList[1], &allocation4); EXPECT_EQ(1u, allocation4.getTrimCandidateListPosition(osContextId)); } TEST_F(WddmResidencyControllerTest, compactTrimCandidateListWithNonNullEntries) { MockWddmAllocation allocation1, allocation2, allocation3, allocation4; residencyController->addToTrimCandidateList(&allocation1); residencyController->addToTrimCandidateList(&allocation2); residencyController->addToTrimCandidateList(&allocation3); residencyController->addToTrimCandidateList(&allocation4); EXPECT_EQ(4u, residencyController->trimCandidateList.size()); residencyController->compactTrimCandidateList(); EXPECT_EQ(4u, residencyController->trimCandidateList.size()); } TEST_F(WddmResidencyControllerTest, checkTrimCandidateListCompaction) { bool comapactionRequired; residencyController->trimCandidatesCount = 10; residencyController->trimCandidateList.resize(20); comapactionRequired = residencyController->checkTrimCandidateListCompaction(); EXPECT_TRUE(comapactionRequired); residencyController->trimCandidatesCount = 5; residencyController->trimCandidateList.resize(20); comapactionRequired = residencyController->checkTrimCandidateListCompaction(); EXPECT_TRUE(comapactionRequired); residencyController->trimCandidatesCount = 18; residencyController->trimCandidateList.resize(20); comapactionRequired = residencyController->checkTrimCandidateListCompaction(); EXPECT_FALSE(comapactionRequired); } TEST_F(WddmResidencyControllerWithGdiTest, givenNotUsedAllocationsFromPreviousPeriodicTrimWhenTrimResidencyPeriodicTrimIsCalledThenAllocationsAreEvictedMarkedAndRemovedFromTrimCandidateList) { D3DKMT_TRIMNOTIFICATION trimNotification = {0}; trimNotification.Flags.PeriodicTrim = 1; trimNotification.NumBytesToTrim = 0; // allocations have fence value == 0 by default MockWddmAllocation allocation1, allocation2; allocation1.getResidencyData().updateCompletionData(0, osContextId); allocation2.getResidencyData().updateCompletionData(0, osContextId); allocation1.getResidencyData().resident[osContextId] = true; allocation2.getResidencyData().resident[osContextId] = true; // Set last periodic fence value residencyController->lastTrimFenceValue = 10; // Set current fence value to greater value residencyController->getMonitoredFence().currentFenceValue = 20; wddm->makeNonResidentResult.called = 0; residencyController->addToTrimCandidateList(&allocation1); residencyController->addToTrimCandidateList(&allocation2); residencyController->trimResidency(trimNotification.Flags, trimNotification.NumBytesToTrim); // 2 allocations evicted EXPECT_EQ(2u, wddm->makeNonResidentResult.called); // removed from trim candidate list EXPECT_EQ(0u, residencyController->peekTrimCandidateList().size()); // marked nonresident EXPECT_FALSE(allocation1.getResidencyData().resident[osContextId]); EXPECT_FALSE(allocation2.getResidencyData().resident[osContextId]); } TEST_F(WddmResidencyControllerWithGdiTest, givenOneUsedAllocationFromPreviousPeriodicTrimWhenTrimResidencyPeriodicTrimIsCalledThenOneAllocationIsTrimmed) { D3DKMT_TRIMNOTIFICATION trimNotification = {0}; trimNotification.Flags.PeriodicTrim = 1; trimNotification.NumBytesToTrim = 0; // allocations have fence value == 0 by default MockWddmAllocation allocation1, allocation2; allocation1.getResidencyData().resident[osContextId] = true; // mark allocation used from last periodic trim allocation1.getResidencyData().updateCompletionData(0, osContextId); allocation2.getResidencyData().updateCompletionData(11, osContextId); allocation2.getResidencyData().resident[osContextId] = true; // Set last periodic fence value residencyController->lastTrimFenceValue = 10; // Set current fence value to greater value residencyController->getMonitoredFence().currentFenceValue = 20; wddm->makeNonResidentResult.called = 0; residencyController->addToTrimCandidateList(&allocation1); residencyController->addToTrimCandidateList(&allocation2); residencyController->trimResidency(trimNotification.Flags, trimNotification.NumBytesToTrim); // 1 allocation evicted EXPECT_EQ(1u, wddm->makeNonResidentResult.called); // removed from trim candidate list EXPECT_EQ(trimListUnusedPosition, allocation1.getTrimCandidateListPosition(osContextId)); //marked nonresident EXPECT_FALSE(allocation1.getResidencyData().resident[osContextId]); // second stays resident EXPECT_TRUE(allocation2.getResidencyData().resident[osContextId]); } TEST_F(WddmResidencyControllerWithGdiAndMemoryManagerTest, givenTripleAllocationWithUsedAndUnusedFragmentsSincePreviousTrimWhenTrimResidencyPeriodicTrimIsCalledThenProperFragmentsAreEvictedAndMarked) { D3DKMT_TRIMNOTIFICATION trimNotification = {0}; trimNotification.Flags.PeriodicTrim = 1; trimNotification.NumBytesToTrim = 0; // 3-fragment Allocation void *ptr = reinterpret_cast(wddm->virtualAllocAddress + 0x1500); auto allocationTriple = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, 2 * MemoryConstants::pageSize}, ptr)); // whole allocation unused since previous trim allocationTriple->getResidencyData().updateCompletionData(0, osContextId); EXPECT_EQ(3u, allocationTriple->fragmentsStorage.fragmentCount); allocationTriple->fragmentsStorage.fragmentStorageData[0].residency->updateCompletionData(0, osContextId); allocationTriple->fragmentsStorage.fragmentStorageData[0].residency->resident[osContextId] = true; // this fragment was used allocationTriple->fragmentsStorage.fragmentStorageData[1].residency->updateCompletionData(11, osContextId); allocationTriple->fragmentsStorage.fragmentStorageData[0].residency->resident[osContextId] = true; allocationTriple->fragmentsStorage.fragmentStorageData[2].residency->updateCompletionData(0, osContextId); allocationTriple->fragmentsStorage.fragmentStorageData[2].residency->resident[osContextId] = true; // Set last periodic fence value *residencyController->getMonitoredFence().cpuAddress = 10; residencyController->updateLastTrimFenceValue(); // Set current fence value to greater value residencyController->getMonitoredFence().currentFenceValue = 20; wddm->makeNonResidentResult.called = 0; residencyController->addToTrimCandidateList(allocationTriple); residencyController->trimResidency(trimNotification.Flags, trimNotification.NumBytesToTrim); // 2 fragments evicted with one call EXPECT_EQ(1u, wddm->makeNonResidentResult.called); // marked nonresident EXPECT_FALSE(allocationTriple->fragmentsStorage.fragmentStorageData[0].residency->resident[osContextId]); EXPECT_FALSE(allocationTriple->fragmentsStorage.fragmentStorageData[2].residency->resident[osContextId]); memoryManager->freeGraphicsMemory(allocationTriple); } TEST_F(WddmResidencyControllerWithGdiTest, givenPeriodicTrimWhenTrimCallbackCalledThenLastPeriodicTrimFenceIsSetToCurrentFenceValue) { D3DKMT_TRIMNOTIFICATION trimNotification = {0}; trimNotification.Flags.PeriodicTrim = 1; trimNotification.NumBytesToTrim = 0; // Set last periodic fence value residencyController->lastTrimFenceValue = 10; // Set current fence value to greater value *residencyController->getMonitoredFence().cpuAddress = 20; residencyController->trimResidency(trimNotification.Flags, trimNotification.NumBytesToTrim); EXPECT_EQ(20u, residencyController->lastTrimFenceValue); } TEST_F(WddmResidencyControllerWithGdiTest, givenRestartPeriodicTrimWhenTrimCallbackCalledThenLastPeriodicTrimFenceIsSetToCurrentFenceValue) { D3DKMT_TRIMNOTIFICATION trimNotification = {0}; trimNotification.Flags.RestartPeriodicTrim = 1; trimNotification.NumBytesToTrim = 0; // Set last periodic fence value residencyController->lastTrimFenceValue = 10; // Set current fence value to greater value *residencyController->getMonitoredFence().cpuAddress = 20; residencyController->trimResidency(trimNotification.Flags, trimNotification.NumBytesToTrim); EXPECT_EQ(20u, residencyController->lastTrimFenceValue); } TEST_F(WddmResidencyControllerWithGdiTest, trimToBudgetWithZeroSizeReturnsTrue) { bool status = residencyController->trimResidencyToBudget(0); EXPECT_TRUE(status); } TEST_F(WddmResidencyControllerWithGdiTest, trimToBudgetAllDoneAllocations) { gdi->setNonZeroNumBytesToTrimInEvict(); MockWddmAllocation allocation1, allocation2, allocation3; allocation1.getResidencyData().resident[osContextId] = true; allocation1.getResidencyData().updateCompletionData(0, osContextId); allocation2.getResidencyData().updateCompletionData(1, osContextId); allocation2.getResidencyData().resident[osContextId] = true; allocation3.getResidencyData().updateCompletionData(2, osContextId); allocation3.getResidencyData().resident[osContextId] = true; *residencyController->getMonitoredFence().cpuAddress = 1; residencyController->getMonitoredFence().lastSubmittedFence = 1; residencyController->getMonitoredFence().currentFenceValue = 1; wddm->makeNonResidentResult.called = 0; residencyController->addToTrimCandidateList(&allocation1); residencyController->addToTrimCandidateList(&allocation2); residencyController->addToTrimCandidateList(&allocation3); residencyController->trimResidencyToBudget(3 * 4096); EXPECT_EQ(2u, wddm->makeNonResidentResult.called); EXPECT_EQ(1u, residencyController->peekTrimCandidatesCount()); residencyController->compactTrimCandidateList(); EXPECT_EQ(1u, residencyController->peekTrimCandidateList().size()); EXPECT_EQ(trimListUnusedPosition, allocation1.getTrimCandidateListPosition(osContextId)); EXPECT_EQ(trimListUnusedPosition, allocation2.getTrimCandidateListPosition(osContextId)); EXPECT_NE(trimListUnusedPosition, allocation3.getTrimCandidateListPosition(osContextId)); } TEST_F(WddmResidencyControllerWithGdiTest, trimToBudgetReturnsFalseWhenNumBytesToTrimIsNotZero) { gdi->setNonZeroNumBytesToTrimInEvict(); MockWddmAllocation allocation1; allocation1.getResidencyData().resident[osContextId] = true; allocation1.getResidencyData().updateCompletionData(0, osContextId); *residencyController->getMonitoredFence().cpuAddress = 1; residencyController->getMonitoredFence().lastSubmittedFence = 1; wddm->makeNonResidentResult.called = 0; residencyController->addToTrimCandidateList(&allocation1); bool status = residencyController->trimResidencyToBudget(3 * 4096); EXPECT_EQ(1u, wddm->makeNonResidentResult.called); EXPECT_EQ(0u, residencyController->peekTrimCandidateList().size()); EXPECT_FALSE(status); } TEST_F(WddmResidencyControllerWithGdiTest, trimToBudgetStopsEvictingWhenNumBytesToTrimIsZero) { WddmAllocation allocation1(0, GraphicsAllocation::AllocationType::UNKNOWN, reinterpret_cast(0x1000), 0x1000, nullptr, MemoryPool::MemoryNull); WddmAllocation allocation2(0, GraphicsAllocation::AllocationType::UNKNOWN, reinterpret_cast(0x1000), 0x3000, nullptr, MemoryPool::MemoryNull); WddmAllocation allocation3(0, GraphicsAllocation::AllocationType::UNKNOWN, reinterpret_cast(0x1000), 0x1000, nullptr, MemoryPool::MemoryNull); allocation1.getResidencyData().resident[osContextId] = true; allocation1.getResidencyData().updateCompletionData(0, osContextId); allocation2.getResidencyData().updateCompletionData(1, osContextId); allocation2.getResidencyData().resident[osContextId] = true; allocation3.getResidencyData().updateCompletionData(2, osContextId); allocation3.getResidencyData().resident[osContextId] = true; *residencyController->getMonitoredFence().cpuAddress = 1; residencyController->getMonitoredFence().lastSubmittedFence = 1; residencyController->getMonitoredFence().currentFenceValue = 1; wddm->makeNonResidentResult.called = 0; residencyController->addToTrimCandidateList(&allocation1); residencyController->addToTrimCandidateList(&allocation2); residencyController->addToTrimCandidateList(&allocation3); bool status = residencyController->trimResidencyToBudget(3 * 4096); EXPECT_TRUE(status); EXPECT_EQ(2u, wddm->makeNonResidentResult.called); EXPECT_EQ(1u, residencyController->peekTrimCandidateList().size()); EXPECT_EQ(trimListUnusedPosition, allocation1.getTrimCandidateListPosition(osContextId)); EXPECT_EQ(trimListUnusedPosition, allocation2.getTrimCandidateListPosition(osContextId)); EXPECT_NE(trimListUnusedPosition, allocation3.getTrimCandidateListPosition(osContextId)); } TEST_F(WddmResidencyControllerWithGdiTest, trimToBudgetMarksEvictedAllocationNonResident) { gdi->setNonZeroNumBytesToTrimInEvict(); MockWddmAllocation allocation1, allocation2, allocation3; allocation1.getResidencyData().resident[osContextId] = true; allocation1.getResidencyData().updateCompletionData(0, osContextId); allocation2.getResidencyData().updateCompletionData(1, osContextId); allocation2.getResidencyData().resident[osContextId] = true; allocation3.getResidencyData().updateCompletionData(2, osContextId); allocation3.getResidencyData().resident[osContextId] = true; *residencyController->getMonitoredFence().cpuAddress = 1; residencyController->getMonitoredFence().lastSubmittedFence = 1; residencyController->getMonitoredFence().currentFenceValue = 1; wddm->makeNonResidentResult.called = 0; residencyController->addToTrimCandidateList(&allocation1); residencyController->addToTrimCandidateList(&allocation2); residencyController->addToTrimCandidateList(&allocation3); residencyController->trimResidencyToBudget(3 * 4096); EXPECT_FALSE(allocation1.getResidencyData().resident[osContextId]); EXPECT_FALSE(allocation2.getResidencyData().resident[osContextId]); EXPECT_TRUE(allocation3.getResidencyData().resident[osContextId]); } TEST_F(WddmResidencyControllerWithGdiTest, trimToBudgetWaitsFromCpuWhenLastFenceIsGreaterThanMonitored) { gdi->setNonZeroNumBytesToTrimInEvict(); MockWddmAllocation allocation1; allocation1.getResidencyData().resident[osContextId] = true; allocation1.getResidencyData().updateCompletionData(2, osContextId); *residencyController->getMonitoredFence().cpuAddress = 1; residencyController->getMonitoredFence().lastSubmittedFence = 2; residencyController->getMonitoredFence().currentFenceValue = 3; wddm->makeNonResidentResult.called = 0; wddm->waitFromCpuResult.called = 0; residencyController->addToTrimCandidateList(&allocation1); gdi->getWaitFromCpuArg().hDevice = 0; residencyController->trimResidencyToBudget(3 * 4096); EXPECT_EQ(1u, wddm->makeNonResidentResult.called); EXPECT_FALSE(allocation1.getResidencyData().resident[osContextId]); EXPECT_EQ(wddm->getDevice(), gdi->getWaitFromCpuArg().hDevice); } TEST_F(WddmResidencyControllerWithGdiAndMemoryManagerTest, trimToBudgetEvictsDoneFragmentsOnly) { gdi->setNonZeroNumBytesToTrimInEvict(); void *ptr = reinterpret_cast(wddm->virtualAllocAddress + 0x1000); WddmAllocation allocation1(0, GraphicsAllocation::AllocationType::UNKNOWN, ptr, 0x1000, nullptr, MemoryPool::MemoryNull); WddmAllocation allocation2(0, GraphicsAllocation::AllocationType::UNKNOWN, ptr, 0x1000, nullptr, MemoryPool::MemoryNull); allocation1.getResidencyData().resident[osContextId] = true; allocation1.getResidencyData().updateCompletionData(0, osContextId); allocation2.getResidencyData().updateCompletionData(1, osContextId); allocation2.getResidencyData().resident[osContextId] = true; void *ptrTriple = reinterpret_cast(reinterpret_cast(ptr) + 0x500); WddmAllocation *allocationTriple = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, 2 * MemoryConstants::pageSize}, ptrTriple)); allocationTriple->getResidencyData().updateCompletionData(1, osContextId); allocationTriple->getResidencyData().resident[osContextId] = true; EXPECT_EQ(3u, allocationTriple->fragmentsStorage.fragmentCount); for (uint32_t i = 0; i < 3; i++) { allocationTriple->fragmentsStorage.fragmentStorageData[i].residency->updateCompletionData(1, osContextId); allocationTriple->fragmentsStorage.fragmentStorageData[i].residency->resident[osContextId] = true; } // This should not be evicted allocationTriple->fragmentsStorage.fragmentStorageData[1].residency->updateCompletionData(2, osContextId); residencyController->addToTrimCandidateList(&allocation1); residencyController->addToTrimCandidateList(allocationTriple); residencyController->addToTrimCandidateList(&allocation2); *residencyController->getMonitoredFence().cpuAddress = 1; residencyController->getMonitoredFence().lastSubmittedFence = 1; residencyController->getMonitoredFence().currentFenceValue = 2; wddm->makeNonResidentResult.called = 0; residencyController->trimResidencyToBudget(3 * 4096); EXPECT_EQ(2u, wddm->makeNonResidentResult.called); EXPECT_FALSE(allocationTriple->fragmentsStorage.fragmentStorageData[0].residency->resident[osContextId]); EXPECT_TRUE(allocationTriple->fragmentsStorage.fragmentStorageData[1].residency->resident[osContextId]); EXPECT_FALSE(allocationTriple->fragmentsStorage.fragmentStorageData[2].residency->resident[osContextId]); memoryManager->freeGraphicsMemory(allocationTriple); } TEST_F(WddmResidencyControllerWithGdiTest, givenThreeAllocationsAlignedSizeBiggerThanAllocSizeWhenBudgetEqualTwoAlignedAllocationThenEvictOnlyTwo) { gdi->setNonZeroNumBytesToTrimInEvict(); size_t underlyingSize = 0xF00; size_t alignedSize = 0x1000; size_t budget = 2 * alignedSize; //trim budget should consider aligned size, not underlying, so if function considers underlying, it should evict three, not two EXPECT_GT((3 * underlyingSize), budget); EXPECT_LT((2 * underlyingSize), budget); void *ptr1 = reinterpret_cast(wddm->virtualAllocAddress + 0x1000); void *ptr2 = reinterpret_cast(wddm->virtualAllocAddress + 0x3000); void *ptr3 = reinterpret_cast(wddm->virtualAllocAddress + 0x5000); WddmAllocation allocation1(0, GraphicsAllocation::AllocationType::UNKNOWN, ptr1, underlyingSize, nullptr, MemoryPool::MemoryNull); WddmAllocation allocation2(0, GraphicsAllocation::AllocationType::UNKNOWN, ptr2, underlyingSize, nullptr, MemoryPool::MemoryNull); WddmAllocation allocation3(0, GraphicsAllocation::AllocationType::UNKNOWN, ptr3, underlyingSize, nullptr, MemoryPool::MemoryNull); allocation1.getResidencyData().resident[osContextId] = true; allocation1.getResidencyData().updateCompletionData(0, osContextId); allocation2.getResidencyData().updateCompletionData(1, osContextId); allocation2.getResidencyData().resident[osContextId] = true; allocation3.getResidencyData().updateCompletionData(1, osContextId); allocation3.getResidencyData().resident[osContextId] = true; *residencyController->getMonitoredFence().cpuAddress = 1; residencyController->getMonitoredFence().lastSubmittedFence = 1; residencyController->getMonitoredFence().currentFenceValue = 1; wddm->makeNonResidentResult.called = 0; residencyController->addToTrimCandidateList(&allocation1); residencyController->addToTrimCandidateList(&allocation2); residencyController->addToTrimCandidateList(&allocation3); bool status = residencyController->trimResidencyToBudget(budget); EXPECT_TRUE(status); EXPECT_FALSE(allocation1.getResidencyData().resident[osContextId]); EXPECT_FALSE(allocation2.getResidencyData().resident[osContextId]); EXPECT_TRUE(allocation3.getResidencyData().resident[osContextId]); } using WddmResidencyControllerLockTest = WddmResidencyControllerWithGdiTest; TEST_F(WddmResidencyControllerLockTest, givenPeriodicTrimWhenTrimmingResidencyThenLockOnce) { D3DKMT_TRIMNOTIFICATION trimNotification = {0}; trimNotification.Flags.PeriodicTrim = 1; trimNotification.NumBytesToTrim = 0; residencyController->trimResidency(trimNotification.Flags, trimNotification.NumBytesToTrim); EXPECT_EQ(1, residencyController->acquireLockCallCount); } TEST_F(WddmResidencyControllerLockTest, givenTrimToBudgetWhenTrimmingResidencyThenLockOnce) { D3DKMT_TRIMNOTIFICATION trimNotification = {0}; trimNotification.Flags.TrimToBudget = 1; trimNotification.NumBytesToTrim = 0; residencyController->trimResidency(trimNotification.Flags, trimNotification.NumBytesToTrim); EXPECT_EQ(1, residencyController->acquireLockCallCount); } TEST_F(WddmResidencyControllerLockTest, givenPeriodicTrimAndTrimToBudgetWhenTrimmingResidencyThenLockTwice) { D3DKMT_TRIMNOTIFICATION trimNotification = {0}; trimNotification.Flags.PeriodicTrim = 1; trimNotification.Flags.TrimToBudget = 1; trimNotification.NumBytesToTrim = 0; residencyController->trimResidency(trimNotification.Flags, trimNotification.NumBytesToTrim); EXPECT_EQ(2, residencyController->acquireLockCallCount); } TEST_F(WddmResidencyControllerWithGdiAndMemoryManagerTest, makeResidentResidencyAllocationsMarksAllocationsResident) { MockWddmAllocation allocation1, allocation2, allocation3, allocation4; ResidencyContainer residencyPack{&allocation1, &allocation2, &allocation3, &allocation4}; residencyController->makeResidentResidencyAllocations(residencyPack); EXPECT_TRUE(allocation1.getResidencyData().resident[osContextId]); EXPECT_TRUE(allocation2.getResidencyData().resident[osContextId]); EXPECT_TRUE(allocation3.getResidencyData().resident[osContextId]); EXPECT_TRUE(allocation4.getResidencyData().resident[osContextId]); } TEST_F(WddmResidencyControllerWithGdiAndMemoryManagerTest, makeResidentResidencyAllocationsUpdatesLastFence) { MockWddmAllocation allocation1, allocation2, allocation3, allocation4; ResidencyContainer residencyPack{&allocation1, &allocation2, &allocation3, &allocation4}; residencyController->getMonitoredFence().currentFenceValue = 20; residencyController->makeResidentResidencyAllocations(residencyPack); EXPECT_EQ(20u, allocation1.getResidencyData().getFenceValueForContextId(osContext->getContextId())); EXPECT_EQ(20u, allocation2.getResidencyData().getFenceValueForContextId(osContext->getContextId())); EXPECT_EQ(20u, allocation3.getResidencyData().getFenceValueForContextId(osContext->getContextId())); EXPECT_EQ(20u, allocation4.getResidencyData().getFenceValueForContextId(osContext->getContextId())); } TEST_F(WddmResidencyControllerWithGdiAndMemoryManagerTest, makeResidentResidencyAllocationsMarksTripleAllocationsResident) { MockWddmAllocation allocation1, allocation2; void *ptr = reinterpret_cast(wddm->virtualAllocAddress + 0x1500); WddmAllocation *allocationTriple = (WddmAllocation *)memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, 2 * MemoryConstants::pageSize}, ptr); ResidencyContainer residencyPack{&allocation1, allocationTriple, &allocation2}; residencyController->makeResidentResidencyAllocations(residencyPack); for (uint32_t i = 0; i < allocationTriple->fragmentsStorage.fragmentCount; i++) { EXPECT_TRUE(allocationTriple->fragmentsStorage.fragmentStorageData[i].residency->resident[osContextId]); } EXPECT_EQ(5u, gdi->getMakeResidentArg().NumAllocations); memoryManager->freeGraphicsMemory(allocationTriple); } TEST_F(WddmResidencyControllerWithGdiAndMemoryManagerTest, makeResidentResidencyAllocationsSetsLastFencePLusOneForTripleAllocations) { MockWddmAllocation allocation1, allocation2; WddmAllocation *allocationTriple = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, 2 * MemoryConstants::pageSize}, reinterpret_cast(0x1500))); residencyController->getMonitoredFence().currentFenceValue = 20; ResidencyContainer residencyPack{&allocation1, allocationTriple, &allocation2}; residencyController->makeResidentResidencyAllocations(residencyPack); for (uint32_t i = 0; i < allocationTriple->fragmentsStorage.fragmentCount; i++) { EXPECT_EQ(20u, allocationTriple->fragmentsStorage.fragmentStorageData[i].residency->getFenceValueForContextId(0)); } memoryManager->freeGraphicsMemory(allocationTriple); } TEST_F(WddmResidencyControllerWithMockWddmTest, givenMakeResidentFailsWhenCallingMakeResidentResidencyAllocationsThenDontMarkAllocationsAsResident) { MockWddmAllocation allocation1, allocation2, allocation3, allocation4; auto makeResidentWithOutBytesToTrim = [](const D3DKMT_HANDLE *handles, uint32_t count, bool cantTrimFurther, uint64_t *numberOfBytesToTrim, size_t size) -> bool { *numberOfBytesToTrim = 4 * 4096; return false; }; ON_CALL(*wddm, makeResident(::testing::_, ::testing::_, ::testing::_, ::testing::_, ::testing::_)).WillByDefault(::testing::Invoke(makeResidentWithOutBytesToTrim)); EXPECT_CALL(*wddm, makeResident(::testing::_, ::testing::_, ::testing::_, ::testing::_, ::testing::_)).Times(2); ResidencyContainer residencyPack{&allocation1, &allocation2, &allocation3, &allocation4}; bool result = residencyController->makeResidentResidencyAllocations(residencyPack); EXPECT_FALSE(result); EXPECT_FALSE(allocation1.getResidencyData().resident[osContextId]); EXPECT_FALSE(allocation2.getResidencyData().resident[osContextId]); EXPECT_FALSE(allocation3.getResidencyData().resident[osContextId]); EXPECT_FALSE(allocation4.getResidencyData().resident[osContextId]); } TEST_F(WddmResidencyControllerWithMockWddmTest, givenMakeResidentFailsWhenCallingMakeResidentResidencyAllocationsThenDontMarkTripleAllocationsAsResident) { MockWddmAllocation allocation1, allocation2; void *ptr = reinterpret_cast(wddm->getWddmMinAddress() + 0x1500); WddmAllocation *allocationTriple = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, 2 * MemoryConstants::pageSize}, ptr)); ASSERT_NE(nullptr, allocationTriple); auto makeResidentWithOutBytesToTrim = [](const D3DKMT_HANDLE *handles, uint32_t count, bool cantTrimFurther, uint64_t *numberOfBytesToTrim, size_t size) -> bool { *numberOfBytesToTrim = 4 * 4096; return false; }; ON_CALL(*wddm, makeResident(::testing::_, ::testing::_, ::testing::_, ::testing::_, ::testing::_)).WillByDefault(::testing::Invoke(makeResidentWithOutBytesToTrim)); EXPECT_CALL(*wddm, makeResident(::testing::_, ::testing::_, ::testing::_, ::testing::_, ::testing::_)).Times(2); ResidencyContainer residencyPack{&allocation1, allocationTriple, &allocation2}; bool result = residencyController->makeResidentResidencyAllocations(residencyPack); EXPECT_FALSE(result); for (uint32_t i = 0; i < allocationTriple->fragmentsStorage.fragmentCount; i++) { EXPECT_FALSE(allocationTriple->fragmentsStorage.fragmentStorageData[i].residency->resident[osContextId]); } memoryManager->freeGraphicsMemory(allocationTriple); } TEST_F(WddmResidencyControllerWithMockWddmTest, givenMakeResidentFailsWhenCallingMakeResidentResidencyAllocationsThenCallItAgainWithCantTrimFurtherSetToTrue) { MockWddmAllocation allocation1; auto makeResidentWithOutBytesToTrim = [](const D3DKMT_HANDLE *handles, uint32_t count, bool cantTrimFurther, uint64_t *numberOfBytesToTrim, size_t size) -> bool { *numberOfBytesToTrim = 4 * 4096; return false; }; ON_CALL(*wddm, makeResident(::testing::_, ::testing::_, ::testing::_, ::testing::_, ::testing::_)).WillByDefault(::testing::Invoke(makeResidentWithOutBytesToTrim)); EXPECT_CALL(*wddm, makeResident(::testing::_, ::testing::_, false, ::testing::_, ::testing::_)).Times(1); EXPECT_CALL(*wddm, makeResident(::testing::_, ::testing::_, true, ::testing::_, ::testing::_)).Times(1); ResidencyContainer residencyPack{&allocation1}; bool result = residencyController->makeResidentResidencyAllocations(residencyPack); EXPECT_FALSE(result); } TEST_F(WddmResidencyControllerWithMockWddmTest, givenAllocationPackPassedWhenCallingMakeResidentResidencyAllocationsThenItIsUsed) { MockWddmAllocation allocation1; MockWddmAllocation allocation2; allocation1.handle = 1; allocation2.handle = 2; ResidencyContainer residencyPack{&allocation1, &allocation2}; auto makeResidentWithOutBytesToTrim = [](const D3DKMT_HANDLE *handles, uint32_t count, bool cantTrimFurther, uint64_t *numberOfBytesToTrim, size_t size) -> bool { EXPECT_EQ(1, handles[0]); EXPECT_EQ(2, handles[1]); return true; }; ON_CALL(*wddm, makeResident(::testing::_, ::testing::_, ::testing::_, ::testing::_, ::testing::_)).WillByDefault(::testing::Invoke(makeResidentWithOutBytesToTrim)); EXPECT_CALL(*wddm, makeResident(::testing::_, 2, false, ::testing::_, ::testing::_)).Times(1); bool result = residencyController->makeResidentResidencyAllocations(residencyPack); EXPECT_TRUE(result); } TEST_F(WddmResidencyControllerWithMockWddmTest, givenMakeResidentFailsAndTrimToBudgetSuceedsWhenCallingMakeResidentResidencyAllocationsThenSucceed) { MockWddmAllocation allocation1; void *cpuPtr = reinterpret_cast(wddm->getWddmMinAddress() + 0x1000); size_t allocationSize = 0x1000; WddmAllocation allocationToTrim(0, GraphicsAllocation::AllocationType::UNKNOWN, cpuPtr, allocationSize, nullptr, MemoryPool::MemoryNull); allocationToTrim.getResidencyData().updateCompletionData(residencyController->getMonitoredFence().lastSubmittedFence, osContext->getContextId()); auto makeResidentWithOutBytesToTrim = [allocationSize](const D3DKMT_HANDLE *handles, uint32_t count, bool cantTrimFurther, uint64_t *numberOfBytesToTrim, size_t size) -> bool { *numberOfBytesToTrim = allocationSize; return false; }; EXPECT_CALL(*wddm, makeResident(::testing::_, ::testing::_, ::testing::_, ::testing::_, ::testing::_)).Times(2).WillOnce(::testing::Invoke(makeResidentWithOutBytesToTrim)).WillOnce(::testing::Return(true)); residencyController->addToTrimCandidateList(&allocationToTrim); ResidencyContainer residencyPack{&allocation1}; bool result = residencyController->makeResidentResidencyAllocations(residencyPack); EXPECT_TRUE(result); EXPECT_TRUE(allocation1.getResidencyData().resident[osContextId]); } TEST_F(WddmResidencyControllerWithMockWddmTest, givenMakeResidentFailsWhenCallingMakeResidentResidencyAllocationsThenMemoryBudgetExhaustedIsSetToTrue) { MockWddmAllocation allocation1; ResidencyContainer residencyPack{&allocation1}; auto makeResidentThatFails = [](const D3DKMT_HANDLE *handles, uint32_t count, bool cantTrimFurther, uint64_t *numberOfBytesToTrim, size_t size) -> bool { return false; }; auto makeResidentThatSucceds = [](const D3DKMT_HANDLE *handles, uint32_t count, bool cantTrimFurther, uint64_t *numberOfBytesToTrim, size_t size) -> bool { return true; }; EXPECT_CALL(*wddm, makeResident(::testing::_, ::testing::_, ::testing::_, ::testing::_, ::testing::_)).Times(2).WillOnce(::testing::Invoke(makeResidentThatFails)).WillOnce(::testing::Invoke(makeResidentThatSucceds)); residencyController->makeResidentResidencyAllocations(residencyPack); EXPECT_TRUE(residencyController->isMemoryBudgetExhausted()); } wddm_residency_handler_tests.cpp000066400000000000000000000141571363734646600343410ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/os_interface/windows/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/windows/wddm_memory_operations_handler.h" #include "shared/source/utilities/stackvec.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/helpers/ult_hw_config.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_wddm.h" #include "opencl/test/unit_test/os_interface/windows/mock_wddm_allocation.h" #include "opencl/test/unit_test/os_interface/windows/wddm_fixture.h" #include "test.h" using namespace NEO; struct WddmMemoryOperationsHandlerTest : public WddmTest { void SetUp() override { WddmTest::SetUp(); wddmMemoryOperationsHandler = std::make_unique(wddm); wddmAllocation.handle = 0x2u; osHandleStorageFirst = std::make_unique(); osHandleStorageSecond = std::make_unique(); wddmFragmentedAllocation.fragmentsStorage.fragmentCount = 2; wddmFragmentedAllocation.fragmentsStorage.fragmentStorageData[0].osHandleStorage = osHandleStorageFirst.get(); wddmFragmentedAllocation.fragmentsStorage.fragmentStorageData[0].osHandleStorage->handle = 0x3u; wddmFragmentedAllocation.fragmentsStorage.fragmentStorageData[1].osHandleStorage = osHandleStorageSecond.get(); wddmFragmentedAllocation.fragmentsStorage.fragmentStorageData[1].osHandleStorage->handle = 0x4u; allocationPtr = &wddmAllocation; allocationData.push_back(&wddmAllocation); allocationData.push_back(&wddmFragmentedAllocation); } std::unique_ptr wddmMemoryOperationsHandler; MockWddmAllocation wddmAllocation; MockWddmAllocation wddmFragmentedAllocation; std::unique_ptr osHandleStorageFirst; std::unique_ptr osHandleStorageSecond; GraphicsAllocation *allocationPtr; StackVec allocationData; }; TEST_F(WddmMemoryOperationsHandlerTest, givenRegularAllocatioWhenMakingResidentAllocaionExpectMakeResidentCalled) { EXPECT_EQ(wddmMemoryOperationsHandler->makeResident(ArrayRef(&allocationPtr, 1)), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(wddmMemoryOperationsHandler->isResident(wddmAllocation), MemoryOperationsStatus::SUCCESS); } TEST_F(WddmMemoryOperationsHandlerTest, givenFragmentedAllocationWhenMakingResidentAllocaionExpectMakeResidentCalled) { allocationPtr = &wddmFragmentedAllocation; EXPECT_EQ(wddmMemoryOperationsHandler->makeResident(ArrayRef(&allocationPtr, 1)), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(wddmMemoryOperationsHandler->isResident(wddmFragmentedAllocation), MemoryOperationsStatus::SUCCESS); } TEST_F(WddmMemoryOperationsHandlerTest, givenVariousAllocationsWhenMakingResidentAllocaionExpectMakeResidentCalled) { EXPECT_EQ(wddmMemoryOperationsHandler->makeResident(ArrayRef(allocationData)), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(wddmMemoryOperationsHandler->isResident(wddmAllocation), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(wddmMemoryOperationsHandler->isResident(wddmFragmentedAllocation), MemoryOperationsStatus::SUCCESS); } TEST_F(WddmMemoryOperationsHandlerTest, givenRegularAllocatioWhenEvictingResidentAllocationExpectEvictCalled) { EXPECT_EQ(wddmMemoryOperationsHandler->makeResident(ArrayRef(&allocationPtr, 1)), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(wddmMemoryOperationsHandler->evict(wddmAllocation), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(wddmMemoryOperationsHandler->isResident(wddmAllocation), MemoryOperationsStatus::MEMORY_NOT_FOUND); } TEST_F(WddmMemoryOperationsHandlerTest, givenFragmentedAllocationWhenEvictingResidentAllocationExpectEvictCalled) { allocationPtr = &wddmFragmentedAllocation; EXPECT_EQ(wddmMemoryOperationsHandler->makeResident(ArrayRef(&allocationPtr, 1)), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(wddmMemoryOperationsHandler->evict(wddmFragmentedAllocation), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(wddmMemoryOperationsHandler->isResident(wddmFragmentedAllocation), MemoryOperationsStatus::MEMORY_NOT_FOUND); } TEST_F(WddmMemoryOperationsHandlerTest, givenVariousAllocationsWhenEvictingResidentAllocationExpectEvictCalled) { EXPECT_EQ(wddmMemoryOperationsHandler->makeResident(ArrayRef(allocationData)), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(wddmMemoryOperationsHandler->evict(wddmAllocation), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(wddmMemoryOperationsHandler->isResident(wddmAllocation), MemoryOperationsStatus::MEMORY_NOT_FOUND); EXPECT_EQ(wddmMemoryOperationsHandler->evict(wddmFragmentedAllocation), MemoryOperationsStatus::SUCCESS); EXPECT_EQ(wddmMemoryOperationsHandler->isResident(wddmFragmentedAllocation), MemoryOperationsStatus::MEMORY_NOT_FOUND); } TEST(WddmResidentBufferTests, whenBuffersIsCreatedWithMakeResidentFlagSetThenItIsMadeResidentUponCreation) { VariableBackup backup(&ultHwConfig); ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; DebugManagerStateRestore restorer; DebugManager.flags.MakeAllBuffersResident.set(true); initPlatform(); auto device = platform()->getClDevice(0u); MockContext context(device, false); auto retValue = CL_SUCCESS; auto clBuffer = clCreateBuffer(&context, 0u, 4096u, nullptr, &retValue); ASSERT_EQ(retValue, CL_SUCCESS); auto memoryOperationsHandler = context.getDevice(0)->getRootDeviceEnvironment().memoryOperationsInterface.get(); auto neoBuffer = castToObject(clBuffer); auto bufferAllocation = neoBuffer->getGraphicsAllocation(); auto status = memoryOperationsHandler->isResident(*bufferAllocation); EXPECT_EQ(status, MemoryOperationsStatus::SUCCESS); clReleaseMemObject(clBuffer); } compute-runtime-20.13.16352/opencl/test/unit_test/perf_tests/000077500000000000000000000000001363734646600240105ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/perf_tests/CMakeLists.txt000066400000000000000000000010041363734646600265430ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # cmake_minimum_required(VERSION 3.2.0 FATAL_ERROR) add_subdirectory(api) add_subdirectory(fixtures) # Setting up our local list of test files set(IGDRCL_SRCS_performance_tests ${IGDRCL_SRCS_perf_tests_api} ${IGDRCL_SRCS_perf_tests_fixtures} "${CMAKE_CURRENT_SOURCE_DIR}/options_perf_tests.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/perf_test_utils.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/perf_test_utils.h" PARENT_SCOPE ) compute-runtime-20.13.16352/opencl/test/unit_test/perf_tests/api/000077500000000000000000000000001363734646600245615ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/perf_tests/api/CMakeLists.txt000066400000000000000000000005061363734646600273220ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_perf_tests_api "${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt" "${CMAKE_CURRENT_SOURCE_DIR}/api_tests.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/api_tests.h" "${CMAKE_CURRENT_SOURCE_DIR}/context_tests.cpp" PARENT_SCOPE) compute-runtime-20.13.16352/opencl/test/unit_test/perf_tests/api/api_tests.cpp000066400000000000000000000020731363734646600272620ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/compiler_interface/compiler_interface.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/platform/platform.h" #include "cl_api_tests.h" namespace NEO { api_fixture::api_fixture() : retVal(CL_SUCCESS), retSize(0), pContext(nullptr), pKernel(nullptr), pProgram(nullptr) { } void api_fixture::SetUp() { setReferenceTime(); PlatformFixture::SetUp(numPlatformDevices, platformDevices); DeviceFixture::SetUp(); ASSERT_NE(nullptr, pDevice); auto pDevice = pPlatform->getDevice(0); ASSERT_NE(nullptr, pDevice); cl_device_id clDevice = pDevice; pContext = Context::create(nullptr, DeviceVector(&clDevice, 1), nullptr, nullptr, retVal); CommandQueueHwFixture::SetUp(pDevice, pContext); } void api_fixture::TearDown() { delete pKernel; delete pContext; delete pProgram; CommandQueueHwFixture::TearDown(); DeviceFixture::TearDown(); PlatformFixture::TearDown(); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/perf_tests/api/api_tests.h000066400000000000000000000022541363734646600267300ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/program/program.h" #include "opencl/test/unit_test/perf_tests/fixtures/command_queue_fixture.h" #include "opencl/test/unit_test/perf_tests/fixtures/device_fixture.h" #include "opencl/test/unit_test/perf_tests/fixtures/platform_fixture.h" #include "opencl/test/unit_test/perf_tests/perf_test_utils.h" #include "gtest/gtest.h" namespace NEO { struct api_fixture : public PlatformFixture, public CommandQueueHwFixture, public DeviceFixture { public: api_fixture(void); protected: virtual void SetUp(); virtual void TearDown(); cl_int retVal; size_t retSize; CommandQueue *pCommandQueue; Context *pContext; Kernel *pKernel; Program *pProgram; }; struct api_tests : public api_fixture, public ::testing::Test { void SetUp() override { api_fixture::SetUp(); } void TearDown() override { api_fixture::TearDown(); } }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/perf_tests/api/context_tests.cpp000066400000000000000000000067271363734646600302070ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/compiler_interface/compiler_interface.h" #include "opencl/source/helpers/file_io.h" #include "opencl/test/unit_test/helpers/memory_management.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "cl_api_tests.h" using namespace NEO; typedef api_tests ContextTest; namespace ULT { // multiplier of reference ratio that is compared ( checked if less than ) with current result const double multiplier = 1.5000; // ratio results that are not checked be EXPECT ( very short time tests are not chceked due to high fluctuations ) const double ratioThreshold = 0.005; //------------------------------------------------------------------------------ // clCreateContext //------------------------------------------------------------------------------ TEST_F(ContextTest, clCreateContext) { double previousRatio = -1.0; uint64_t hash = getHash(__FUNCTION__, strlen(__FUNCTION__)); bool success = getTestRatio(hash, previousRatio); long long times[3] = {0, 0, 0}; for (int i = 0; i < 3; i++) { Timer t; t.start(); auto context = clCreateContext(nullptr, num_devices, devices, nullptr, nullptr, &retVal); t.end(); times[i] = t.get(); ((Context *)context)->release(); } long long time = majorityVote(times[0], times[1], times[2]); double ratio = static_cast(time) / static_cast(refTime); if (success && previousRatio > ratioThreshold) { EXPECT_TRUE(isLowerThanReference(ratio, previousRatio, multiplier)) << "Current: " << ratio << " previous: " << previousRatio << "\n"; } updateTestRatio(hash, ratio); } TEST_F(ContextTest, clReleaseContext) { double previousRatio = -1.0; uint64_t hash = getHash(__FUNCTION__, strlen(__FUNCTION__)); bool success = getTestRatio(hash, previousRatio); long long times[3] = {0, 0, 0}; cl_context contexts[3]; cl_device_id clDevice = pDevice; for (int i = 0; i < 3; i++) { contexts[i] = Context::create(nullptr, DeviceVector(&clDevice, 1), nullptr, nullptr, retVal); Timer t; t.start(); auto retVal = clReleaseContext(contexts[i]); t.end(); times[i] = t.get(); } long long time = majorityVote(times[0], times[1], times[2]); double ratio = static_cast(time) / static_cast(refTime); if (success && previousRatio > ratioThreshold) { EXPECT_TRUE(isLowerThanReference(ratio, previousRatio, multiplier)) << "Current: " << ratio << " previous: " << previousRatio << "\n"; } updateTestRatio(hash, ratio); } TEST_F(ContextTest, clRetainContext) { double previousRatio = -1.0; uint64_t hash = getHash(__FUNCTION__, strlen(__FUNCTION__)); bool success = getTestRatio(hash, previousRatio); long long times[3] = {0, 0, 0}; for (int i = 0; i < 3; i++) { Timer t; t.start(); auto retVal = clRetainContext(pContext); t.end(); times[i] = t.get(); pContext->release(); } long long time = majorityVote(times[0], times[1], times[2]); double ratio = static_cast(time) / static_cast(refTime); if (success && previousRatio > ratioThreshold) { EXPECT_TRUE(isLowerThanReference(ratio, previousRatio, multiplier)) << "Current: " << ratio << " previous: " << previousRatio << "\n"; } updateTestRatio(hash, ratio); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/perf_tests/fixtures/000077500000000000000000000000001363734646600256615ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/perf_tests/fixtures/CMakeLists.txt000066400000000000000000000010041363734646600304140ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_perf_tests_fixtures "${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt" "${CMAKE_CURRENT_SOURCE_DIR}/command_queue_fixture.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/command_queue_fixture.h" "${CMAKE_CURRENT_SOURCE_DIR}/device_fixture.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/device_fixture.h" "${CMAKE_CURRENT_SOURCE_DIR}/platform_fixture.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/platform_fixture.h" PARENT_SCOPE ) compute-runtime-20.13.16352/opencl/test/unit_test/perf_tests/fixtures/command_queue_fixture.cpp000066400000000000000000000034211363734646600327550ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/perf_tests/fixtures/command_queue_fixture.h" #include "shared/source/device/device.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/context/context.h" #include "gtest/gtest.h" namespace NEO { // Global table of create functions extern CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE]; CommandQueueHwFixture::CommandQueueHwFixture() : pCmdQ(nullptr) { } CommandQueue *CommandQueueHwFixture::createCommandQueue( Context *context, Device *pDevice, cl_command_queue_properties properties) { auto funcCreate = commandQueueFactory[pDevice->getHardwareInfo().platform->eRenderCoreFamily]; assert(nullptr != funcCreate); return funcCreate(context, pDevice, properties); } void CommandQueueHwFixture::SetUp() { ASSERT_NE(nullptr, pCmdQ); } void CommandQueueHwFixture::SetUp( Device *pDevice, Context *context) { ASSERT_NE(nullptr, pDevice); pCmdQ = createCommandQueue(context, pDevice); CommandQueueHwFixture::SetUp(); } void CommandQueueHwFixture::TearDown() { delete pCmdQ; } CommandQueueFixture::CommandQueueFixture() : pCmdQ(nullptr) { } CommandQueue *CommandQueueFixture::createCommandQueue( Context *context, Device *device, cl_command_queue_properties properties) { return new CommandQueue( context, device, properties); } void CommandQueueFixture::SetUp( Context *context, Device *device, cl_command_queue_properties properties) { pCmdQ = createCommandQueue( context, device, properties); } void CommandQueueFixture::TearDown() { delete pCmdQ; pCmdQ = nullptr; } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/perf_tests/fixtures/command_queue_fixture.h000066400000000000000000000021401363734646600324170ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/command_queue/command_queue.h" #include "CL/cl.h" #include "gtest/gtest.h" namespace NEO { class Context; class Device; struct CommandQueueHwFixture { CommandQueueHwFixture(); CommandQueue *createCommandQueue( Context *context, Device *device, cl_command_queue_properties _properties = 0); virtual void SetUp(); virtual void SetUp(Device *_pDevice, Context *context); virtual void TearDown(); CommandQueue *pCmdQ; }; struct CommandQueueFixture { CommandQueueFixture(); virtual void SetUp( Context *context, Device *device, cl_command_queue_properties properties = 0); virtual void TearDown(); CommandQueue *createCommandQueue( Context *context, Device *device, cl_command_queue_properties properties); CommandQueue *pCmdQ; }; static const cl_command_queue_properties DefaultCommandQueueProperties[] = { 0, CL_QUEUE_PROFILING_ENABLE, }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/perf_tests/fixtures/device_fixture.cpp000066400000000000000000000012021363734646600313650ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "device_fixture.h" #include "opencl/source/command_stream/command_stream_receiver.h" #include "gtest/gtest.h" using NEO::Device; using NEO::HardwareInfo; using NEO::platformDevices; void DeviceFixture::SetUp() { pDevice = ClDeviceHelper<>::create(); ASSERT_NE(nullptr, pDevice); auto &commandStreamReceiver = pDevice->getGpgpuCommandStreamReceiver(); pTagMemory = commandStreamReceiver.getTagAddress(); ASSERT_NE(nullptr, const_cast(pTagMemory)); } void DeviceFixture::TearDown() { delete pDevice; } compute-runtime-20.13.16352/opencl/test/unit_test/perf_tests/fixtures/device_fixture.h000066400000000000000000000016151363734646600310420ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/device/device.h" #include namespace NEO { struct HardwareInfo; extern const HardwareInfo **platformDevices; } // namespace NEO // Even though there aren't any defaults, this pattern is used // throughout testing. Included here for consistency. struct DeviceDefaults { }; template struct ClDeviceHelper { static NEO::Device *create(const NEO::HardwareInfo *hardwareInfo = nullptr) { auto device = NEO::Device::create(hardwareInfo); assert(device != nullptr); return device; } }; struct DeviceFixture { DeviceFixture() : pDevice(nullptr), pTagMemory(nullptr) { } void SetUp(); void TearDown(); NEO::Device *pDevice; volatile uint32_t *pTagMemory; }; compute-runtime-20.13.16352/opencl/test/unit_test/perf_tests/fixtures/platform_fixture.cpp000066400000000000000000000022271363734646600317620ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "shared/source/device/device.h" #include "gtest/gtest.h" namespace NEO { PlatformFixture::PlatformFixture() : pPlatform(nullptr), num_devices(0), devices(nullptr) { } void PlatformFixture::SetUp(size_t numDevices, const HardwareInfo **pDevices) { pPlatform = platform(); ASSERT_EQ(0u, pPlatform->getNumDevices()); // setup platform / context bool isInitialized = pPlatform->initialize(numDevices, pDevices); ASSERT_EQ(true, isInitialized); num_devices = static_cast(pPlatform->getNumDevices()); ASSERT_GT(num_devices, 0u); auto allDev = pPlatform->getDevices(); ASSERT_NE(nullptr, allDev); devices = new cl_device_id[num_devices]; for (cl_uint deviceOrdinal = 0; deviceOrdinal < num_devices; ++deviceOrdinal) { auto device = allDev[deviceOrdinal]; ASSERT_NE(nullptr, device); devices[deviceOrdinal] = device; } } void PlatformFixture::TearDown() { pPlatform->shutdown(); delete[] devices; } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/perf_tests/fixtures/platform_fixture.h000066400000000000000000000013501363734646600314230ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/api/cl_types.h" #include "opencl/source/platform/platform.h" namespace NEO { struct HardwareInfo; //////////////////////////////////////////////////////////////////////////////// // CPlatformFixture // Used to test the Platform class (and many others) //////////////////////////////////////////////////////////////////////////////// class PlatformFixture { public: PlatformFixture(); protected: virtual void SetUp(size_t numDevices, const HardwareInfo **pDevices); virtual void TearDown(); Platform *pPlatform; cl_uint num_devices; cl_device_id *devices; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/perf_tests/options_perf_tests.cpp000066400000000000000000000015021363734646600304430ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/hw_cmds.h" #include "opencl/source/helpers/array_count.h" namespace NEO { // IP address for TBX server const char *tbxServerIp = "127.0.0.1"; // AUB file folder location const char *folderAUB = "aub_out"; // Initial value for HW tag // Set to 0 if using HW or simulator, otherwise 0xFFFFFF00, needs to be lower then Event::EventNotReady. uint32_t initialHardwareTag = static_cast(0); // Number of devices in the platform static const HardwareInfo *DefaultPlatformDevices[] = { &DEFAULT_PLATFORM::hwInfo, }; size_t numPlatformDevices = ARRAY_COUNT(DefaultPlatformDevices); const HardwareInfo **platformDevices = DefaultPlatformDevices; } // namespace NEO bool printMemoryOpCallStack = true; compute-runtime-20.13.16352/opencl/test/unit_test/perf_tests/perf_test_utils.cpp000066400000000000000000000060211363734646600277260ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "perf_test_utils.h" #include "shared/source/helpers/aligned_memory.h" #include "opencl/source/helpers/hash.h" #include #include using namespace NEO; using namespace std; const char *perfLogPath = "perf_logs/"; // Global reference time long long refTime = 0; void setReferenceTime() { if (refTime == 0) { Timer t1, t2, t3; long long time1 = 0; long long time2 = 0; long long time3 = 0; Timer::setFreq(); void *bufferDst = alignedMalloc(128 * 4096, 4096); void *bufferSrc1 = alignedMalloc(128 * 4096, 4096); void *bufferSrc2 = alignedMalloc(128 * 4096, 4096); void *bufferSrc3 = alignedMalloc(128 * 4096, 4096); t1.start(); memset(bufferSrc1, 0, 128 * 4096); memcpy(bufferDst, bufferSrc1, 128 * 4096); t1.end(); t2.start(); memset(bufferSrc2, 1, 128 * 4096); memcpy(bufferDst, bufferSrc2, 128 * 4096); t2.end(); t3.start(); memset(bufferSrc3, 2, 128 * 4096); memcpy(bufferDst, bufferSrc3, 128 * 4096); t3.end(); time1 = t1.get(); time2 = t2.get(); time3 = t3.get(); refTime = majorityVote(time1, time2, time3); alignedFree(bufferDst); alignedFree(bufferSrc1); alignedFree(bufferSrc2); alignedFree(bufferSrc3); } } bool getTestRatio(uint64_t hash, double &ratio) { ifstream file; string filename(perfLogPath); double data = 0.0; filename.append(std::to_string(hash)); file.open(filename); if (file.is_open()) { file >> data; ratio = data; file.close(); return true; } ratio = 0.0; return false; } bool saveTestRatio(uint64_t hash, double ratio) { ofstream file; string filename(perfLogPath); double data = 0.0; filename.append(std::to_string(hash)); file.open(filename); if (file.is_open()) { file << ratio; file.close(); return true; } return false; } bool isInRange(double data, double reference, double multiplier) { double lower = reference / multiplier; double higher = reference * multiplier; return data >= lower && data <= higher; } bool isLowerThanReference(double data, double reference, double multiplier) { double higher = multiplier * reference; if (data <= higher) { return true; } return false; } bool updateTestRatio(uint64_t hash, double ratio) { double oldRatio = 0.0; if (getTestRatio(hash, oldRatio)) { if (oldRatio != 0.0) { if (isInRange(ratio, oldRatio, 2.000)) { double newRatio = (0.8000 * oldRatio + 0.2000 * ratio); if (newRatio < 0.8 * oldRatio) return false; saveTestRatio(hash, newRatio); return true; } } } else { saveTestRatio(hash, ratio); } return false; } compute-runtime-20.13.16352/opencl/test/unit_test/perf_tests/perf_test_utils.h000066400000000000000000000016531363734646600274010ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/utilities/timer_util.h" #include "gtest/gtest.h" #include extern const char *perfLogPath; extern long long refTime; void setReferenceTime(); bool getTestRatio(uint64_t hash, double &ratio); bool saveTestRatio(uint64_t hash, double ratio); bool isInRange(double data, double reference, double rangePercentage); bool isLowerThanReference(double data, double reference, double rangePercentage); bool updateTestRatio(uint64_t hash, double ratio); template T majorityVote(T time1, T time2, T time3) { T minTime1 = 0; T minTime2 = 0; if (time1 < time2) { minTime1 = time1; minTime2 = time2; } else { minTime1 = time2; minTime2 = time1; } if (minTime2 > time3) minTime2 = time3; return (minTime1 + minTime2) / 2; } compute-runtime-20.13.16352/opencl/test/unit_test/platform/000077500000000000000000000000001363734646600234565ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/platform/CMakeLists.txt000066400000000000000000000005111363734646600262130ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_platform ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/platform_icd_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/platform_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_platform}) compute-runtime-20.13.16352/opencl/test/unit_test/platform/platform_icd_tests.cpp000066400000000000000000000045561363734646600300610ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/string.h" #include "opencl/source/api/dispatch.h" #include "opencl/source/sharings/sharing_factory.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gtest/gtest.h" using namespace NEO; class IcdRestore : public SharingFactory { public: IcdRestore() { icdSnapshot = icdGlobalDispatchTable; memcpy_s(savedState, sizeof(savedState), sharingContextBuilder, sizeof(sharingContextBuilder)); for (auto &builder : sharingContextBuilder) { builder = nullptr; } } ~IcdRestore() { memcpy_s(sharingContextBuilder, sizeof(sharingContextBuilder), savedState, sizeof(savedState)); icdGlobalDispatchTable = icdSnapshot; } template void registerSharing(SharingType type) { auto object = std::make_unique(); sharingContextBuilder[type] = object.get(); sharings.push_back(std::move(object)); } protected: decltype(icdGlobalDispatchTable) icdSnapshot; decltype(SharingFactory::sharingContextBuilder) savedState; std::vector> sharings; }; void fakeGlF() { } class PlatformTestedSharingBuilderFactory : public SharingBuilderFactory { public: std::unique_ptr createContextBuilder() override { return nullptr; } std::string getExtensions() override { return "--extension--"; }; void fillGlobalDispatchTable() override { icdGlobalDispatchTable.clCreateFromGLBuffer = (decltype(icdGlobalDispatchTable.clCreateFromGLBuffer)) & fakeGlF; }; void *getExtensionFunctionAddress(const std::string &functionName) override { return nullptr; } }; TEST(PlatformIcdTest, WhenPlatformSetupThenDispatchTableInitialization) { IcdRestore icdRestore; icdGlobalDispatchTable.clCreateFromGLBuffer = nullptr; EXPECT_EQ(nullptr, icdGlobalDispatchTable.clCreateFromGLBuffer); MockPlatform myPlatform; myPlatform.fillGlobalDispatchTable(); EXPECT_EQ(nullptr, icdGlobalDispatchTable.clCreateFromGLBuffer); icdRestore.registerSharing(SharingType::CLGL_SHARING); myPlatform.fillGlobalDispatchTable(); EXPECT_NE(nullptr, icdGlobalDispatchTable.clCreateFromGLBuffer); } compute-runtime-20.13.16352/opencl/test/unit_test/platform/platform_tests.cpp000066400000000000000000000473651363734646600272470ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/device_factory.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/helpers/ult_hw_config.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/platform/extensions.h" #include "opencl/source/sharings/sharing_factory.h" #include "opencl/test/unit_test/fixtures/mock_aub_center_fixture.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/mocks/mock_async_event_handler.h" #include "opencl/test/unit_test/mocks/mock_builtins.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_source_level_debugger.h" #include "gmock/gmock.h" #include "gtest/gtest.h" using namespace NEO; namespace NEO { namespace MockSipData { extern SipKernelType calledType; extern bool called; } // namespace MockSipData } // namespace NEO struct PlatformTest : public ::testing::Test { void SetUp() override { MockSipData::calledType = SipKernelType::COUNT; MockSipData::called = false; pPlatform.reset(new MockPlatform()); } void TearDown() override { MockSipData::calledType = SipKernelType::COUNT; MockSipData::called = false; } cl_int retVal = CL_SUCCESS; std::unique_ptr pPlatform; }; struct MockPlatformWithMockExecutionEnvironment : public MockPlatform { MockPlatformWithMockExecutionEnvironment() : MockPlatform(*(new MockExecutionEnvironment(nullptr, false, 1))) { MockAubCenterFixture::setMockAubCenter(*executionEnvironment.rootDeviceEnvironments[0]); } }; TEST_F(PlatformTest, GivenUninitializedPlatformWhenInitializeIsCalledThenPlatformIsInitialized) { EXPECT_FALSE(pPlatform->isInitialized()); pPlatform->initializeWithNewDevices(); EXPECT_TRUE(pPlatform->isInitialized()); } TEST_F(PlatformTest, WhenGetNumDevicesIsCalledThenExpectedValuesAreReturned) { EXPECT_EQ(0u, pPlatform->getNumDevices()); pPlatform->initializeWithNewDevices(); EXPECT_GT(pPlatform->getNumDevices(), 0u); } TEST_F(PlatformTest, WhenGetDeviceIsCalledThenExpectedValuesAreReturned) { EXPECT_EQ(nullptr, pPlatform->getClDevice(0)); pPlatform->initializeWithNewDevices(); EXPECT_NE(nullptr, pPlatform->getClDevice(0)); auto numDevices = pPlatform->getNumDevices(); EXPECT_EQ(nullptr, pPlatform->getClDevice(numDevices)); } TEST_F(PlatformTest, WhenGetClDevicesIsCalledThenExpectedValuesAreReturned) { EXPECT_EQ(nullptr, pPlatform->getClDevices()); pPlatform->initializeWithNewDevices(); EXPECT_NE(nullptr, pPlatform->getClDevices()); } TEST_F(PlatformTest, PlatformgetAsCompilerEnabledExtensionsString) { pPlatform->initializeWithNewDevices(); auto compilerExtensions = pPlatform->getClDevice(0)->peekCompilerExtensions(); EXPECT_THAT(compilerExtensions, ::testing::HasSubstr(std::string(" -cl-ext=-all,+cl"))); if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { EXPECT_THAT(compilerExtensions, ::testing::HasSubstr(std::string("cl_khr_subgroups"))); } } TEST_F(PlatformTest, hasAsyncEventsHandler) { EXPECT_NE(nullptr, pPlatform->getAsyncEventsHandler()); } TEST_F(PlatformTest, givenMidThreadPreemptionWhenInitializingPlatformThenCallGetSipKernel) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForcePreemptionMode.set(static_cast(PreemptionMode::MidThread)); auto builtIns = new MockBuiltins(); auto executionEnvironment = pPlatform->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(builtIns); EXPECT_EQ(SipKernelType::COUNT, MockSipData::calledType); EXPECT_FALSE(MockSipData::called); pPlatform->initializeWithNewDevices(); EXPECT_EQ(SipKernelType::Csr, MockSipData::calledType); EXPECT_TRUE(MockSipData::called); } TEST_F(PlatformTest, givenDisabledPreemptionAndNoSourceLevelDebuggerWhenInitializingPlatformThenDoNotCallGetSipKernel) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForcePreemptionMode.set(static_cast(PreemptionMode::Disabled)); auto builtIns = new MockBuiltins(); auto executionEnvironment = pPlatform->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(builtIns); EXPECT_EQ(SipKernelType::COUNT, MockSipData::calledType); EXPECT_FALSE(MockSipData::called); pPlatform->initializeWithNewDevices(); EXPECT_EQ(SipKernelType::COUNT, MockSipData::calledType); EXPECT_FALSE(MockSipData::called); } TEST_F(PlatformTest, givenDisabledPreemptionInactiveSourceLevelDebuggerWhenInitializingPlatformThenDoNotCallGetSipKernel) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForcePreemptionMode.set(static_cast(PreemptionMode::Disabled)); auto builtIns = new MockBuiltins(); auto executionEnvironment = pPlatform->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(builtIns); auto sourceLevelDebugger = new MockSourceLevelDebugger(); sourceLevelDebugger->setActive(false); executionEnvironment->rootDeviceEnvironments[0]->debugger.reset(sourceLevelDebugger); EXPECT_EQ(SipKernelType::COUNT, MockSipData::calledType); EXPECT_FALSE(MockSipData::called); pPlatform->initializeWithNewDevices(); EXPECT_EQ(SipKernelType::COUNT, MockSipData::calledType); EXPECT_FALSE(MockSipData::called); } TEST_F(PlatformTest, givenDisabledPreemptionActiveSourceLevelDebuggerWhenInitializingPlatformThenCallGetSipKernel) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForcePreemptionMode.set(static_cast(PreemptionMode::Disabled)); auto builtIns = new MockBuiltins(); auto executionEnvironment = pPlatform->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(builtIns); executionEnvironment->rootDeviceEnvironments[0]->debugger.reset(new MockActiveSourceLevelDebugger()); EXPECT_EQ(SipKernelType::COUNT, MockSipData::calledType); EXPECT_FALSE(MockSipData::called); pPlatform->initializeWithNewDevices(); EXPECT_TRUE(MockSipData::called); EXPECT_LE(SipKernelType::DbgCsr, MockSipData::calledType); EXPECT_GE(SipKernelType::DbgCsrLocal, MockSipData::calledType); } TEST(PlatformTestSimple, givenCsrHwTypeWhenPlatformIsInitializedThenInitAubCenterIsNotCalled) { DebugManagerStateRestore stateRestore; DebugManager.flags.SetCommandStreamReceiver.set(0); MockPlatformWithMockExecutionEnvironment platform; bool ret = platform.initializeWithNewDevices(); EXPECT_TRUE(ret); auto rootDeviceEnvironment = static_cast(platform.peekExecutionEnvironment()->rootDeviceEnvironments[0].get()); EXPECT_FALSE(rootDeviceEnvironment->initAubCenterCalled); } TEST(PlatformTestSimple, givenNotCsrHwTypeWhenPlatformIsInitializedThenInitAubCenterIsCalled) { DebugManagerStateRestore stateRestore; DebugManager.flags.SetCommandStreamReceiver.set(1); VariableBackup backup(&ultHwConfig); ultHwConfig.useHwCsr = true; MockPlatformWithMockExecutionEnvironment platform; bool ret = platform.initializeWithNewDevices(); EXPECT_TRUE(ret); auto rootDeviceEnvironment = static_cast(platform.peekExecutionEnvironment()->rootDeviceEnvironments[0].get()); EXPECT_TRUE(rootDeviceEnvironment->initAubCenterCalled); } TEST(PlatformTestSimple, shutdownClosesAsyncEventHandlerThread) { Platform *platform = new MockPlatform(); MockHandler *mockAsyncHandler = new MockHandler(); auto oldHandler = platform->setAsyncEventsHandler(std::unique_ptr(mockAsyncHandler)); EXPECT_EQ(mockAsyncHandler, platform->getAsyncEventsHandler()); mockAsyncHandler->openThread(); delete platform; EXPECT_TRUE(MockAsyncEventHandlerGlobals::destructorCalled); } namespace NEO { extern CommandStreamReceiverCreateFunc commandStreamReceiverFactory[IGFX_MAX_CORE]; } CommandStreamReceiver *createMockCommandStreamReceiver(bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) { return nullptr; }; class PlatformFailingTest : public PlatformTest { public: PlatformFailingTest() { ultHwConfig.useHwCsr = true; } void SetUp() override { PlatformTest::SetUp(); hwInfo = defaultHwInfo.get(); commandStreamReceiverCreateFunc = commandStreamReceiverFactory[hwInfo->platform.eRenderCoreFamily]; commandStreamReceiverFactory[hwInfo->platform.eRenderCoreFamily] = createMockCommandStreamReceiver; } void TearDown() override { commandStreamReceiverFactory[hwInfo->platform.eRenderCoreFamily] = commandStreamReceiverCreateFunc; PlatformTest::TearDown(); } VariableBackup backup{&ultHwConfig}; CommandStreamReceiverCreateFunc commandStreamReceiverCreateFunc; const HardwareInfo *hwInfo; }; TEST_F(PlatformFailingTest, givenPlatformInitializationWhenIncorrectHwInfoThenInitializationFails) { auto platform = new MockPlatform(); bool ret = platform->initializeWithNewDevices(); EXPECT_FALSE(ret); EXPECT_FALSE(platform->isInitialized()); delete platform; } TEST_F(PlatformTest, givenSupportingCl21WhenPlatformSupportsFp64ThenFillMatchingSubstringsAndMandatoryTrailingSpace) { const HardwareInfo *hwInfo; hwInfo = defaultHwInfo.get(); std::string extensionsList = getExtensionsList(*hwInfo); std::string compilerExtensions = convertEnabledExtensionsToCompilerInternalOptions(extensionsList.c_str()); EXPECT_THAT(compilerExtensions, ::testing::HasSubstr(std::string(" -cl-ext=-all,+cl"))); if (hwInfo->capabilityTable.clVersionSupport > 20) { EXPECT_THAT(compilerExtensions, ::testing::HasSubstr(std::string("cl_khr_subgroups"))); EXPECT_THAT(compilerExtensions, ::testing::HasSubstr(std::string("cl_khr_il_program"))); if (hwInfo->capabilityTable.supportsVme) { EXPECT_THAT(compilerExtensions, ::testing::HasSubstr(std::string("cl_intel_spirv_device_side_avc_motion_estimation"))); } else { EXPECT_THAT(compilerExtensions, testing::Not(::testing::HasSubstr(std::string("cl_intel_spirv_device_side_avc_motion_estimation")))); } if (hwInfo->capabilityTable.supportsImages) { EXPECT_THAT(compilerExtensions, ::testing::HasSubstr(std::string("cl_intel_spirv_media_block_io"))); } else { EXPECT_THAT(compilerExtensions, testing::Not(::testing::HasSubstr(std::string("cl_intel_spirv_media_block_io")))); } EXPECT_THAT(compilerExtensions, ::testing::HasSubstr(std::string("cl_intel_spirv_subgroups"))); EXPECT_THAT(compilerExtensions, ::testing::HasSubstr(std::string("cl_khr_spirv_no_integer_wrap_decoration"))); } if (hwInfo->capabilityTable.ftrSupportsFP64) { EXPECT_THAT(compilerExtensions, ::testing::HasSubstr(std::string("cl_khr_fp64"))); } if (hwInfo->capabilityTable.supportsImages) { EXPECT_THAT(extensionsList, ::testing::HasSubstr(std::string("cl_khr_3d_image_writes"))); } EXPECT_THAT(compilerExtensions, ::testing::EndsWith(std::string(" "))); } TEST_F(PlatformTest, givenNotSupportingCl21WhenPlatformNotSupportFp64ThenNotFillMatchingSubstringAndFillMandatoryTrailingSpace) { HardwareInfo TesthwInfo = *defaultHwInfo; TesthwInfo.capabilityTable.ftrSupportsFP64 = false; TesthwInfo.capabilityTable.clVersionSupport = 10; std::string extensionsList = getExtensionsList(TesthwInfo); if (TesthwInfo.capabilityTable.supportsImages) { EXPECT_THAT(extensionsList, ::testing::HasSubstr(std::string("cl_khr_3d_image_writes"))); } std::string compilerExtensions = convertEnabledExtensionsToCompilerInternalOptions(extensionsList.c_str()); EXPECT_THAT(compilerExtensions, ::testing::HasSubstr(std::string("-cl-ext=-all,+cl"))); EXPECT_THAT(compilerExtensions, ::testing::Not(::testing::HasSubstr(std::string("cl_khr_fp64")))); EXPECT_THAT(compilerExtensions, ::testing::Not(::testing::HasSubstr(std::string("cl_khr_subgroups")))); EXPECT_THAT(compilerExtensions, ::testing::EndsWith(std::string(" "))); } TEST_F(PlatformTest, givenFtrSupportAtomicsWhenCreateExtentionsListThenGetMatchingSubstrings) { const HardwareInfo *hwInfo; hwInfo = defaultHwInfo.get(); std::string extensionsList = getExtensionsList(*hwInfo); std::string compilerExtensions = convertEnabledExtensionsToCompilerInternalOptions(extensionsList.c_str()); if (hwInfo->capabilityTable.ftrSupportsInteger64BitAtomics) { EXPECT_THAT(compilerExtensions, ::testing::HasSubstr(std::string("cl_khr_int64_base_atomics"))); EXPECT_THAT(compilerExtensions, ::testing::HasSubstr(std::string("cl_khr_int64_extended_atomics"))); } else { EXPECT_THAT(compilerExtensions, ::testing::Not(::testing::HasSubstr(std::string("cl_khr_int64_base_atomics")))); EXPECT_THAT(compilerExtensions, ::testing::Not(::testing::HasSubstr(std::string("cl_khr_int64_extended_atomics")))); } } TEST_F(PlatformTest, givenSupporteImagesAndClVersion21WhenCreateExtentionsListThenDeviceReportsSpritvMediaBlockIoExtension) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsImages = true; hwInfo.capabilityTable.clVersionSupport = 21; std::string extensionsList = getExtensionsList(hwInfo); std::string compilerExtensions = convertEnabledExtensionsToCompilerInternalOptions(extensionsList.c_str()); EXPECT_THAT(compilerExtensions, testing::HasSubstr(std::string("cl_intel_spirv_media_block_io"))); } TEST_F(PlatformTest, givenNotSupporteImagesAndClVersion21WhenCreateExtentionsListThenDeviceNotReportsSpritvMediaBlockIoExtension) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.supportsImages = false; hwInfo.capabilityTable.clVersionSupport = 21; std::string extensionsList = getExtensionsList(hwInfo); std::string compilerExtensions = convertEnabledExtensionsToCompilerInternalOptions(extensionsList.c_str()); EXPECT_THAT(compilerExtensions, testing::Not(testing::HasSubstr(std::string("cl_intel_spirv_media_block_io")))); } TEST_F(PlatformTest, testRemoveLastSpace) { std::string emptyString = ""; removeLastSpace(emptyString); EXPECT_EQ(std::string(""), emptyString); std::string xString = "x"; removeLastSpace(xString); EXPECT_EQ(std::string("x"), xString); std::string xSpaceString = "x "; removeLastSpace(xSpaceString); EXPECT_EQ(std::string("x"), xSpaceString); } TEST(PlatformConstructionTest, givenPlatformConstructorWhenItIsCalledTwiceThenTheSamePlatformIsReturned) { platformsImpl.clear(); auto platform1 = constructPlatform(); EXPECT_EQ(platform1, platform()); auto platform2 = constructPlatform(); EXPECT_EQ(platform2, platform1); EXPECT_NE(platform1, nullptr); } TEST(PlatformConstructionTest, givenPlatformConstructorWhenItIsCalledAfterResetThenNewPlatformIsConstructed) { platformsImpl.clear(); auto platform = constructPlatform(); std::unique_ptr temporaryOwnership(std::move(platformsImpl[0])); platformsImpl.clear(); auto platform2 = constructPlatform(); EXPECT_NE(platform2, platform); EXPECT_NE(platform, nullptr); EXPECT_NE(platform2, nullptr); platformsImpl.clear(); } TEST(PlatformInitLoopTests, givenPlatformWhenInitLoopHelperIsCalledThenItDoesNothing) { MockPlatform platform; platform.initializationLoopHelper(); } TEST(PlatformInitTest, givenNullptrDeviceInPassedDeviceVectorWhenInitializePlatformThenExceptionIsThrown) { std::vector> devices; devices.push_back(nullptr); EXPECT_THROW(platform()->initialize(std::move(devices)), std::exception); } TEST(PlatformInitTest, givenInitializedPlatformWhenInitializeIsCalledOneMoreTimeWithNullptrDeviceThenSuccessIsEarlyReturned) { initPlatform(); EXPECT_TRUE(platform()->isInitialized()); std::vector> devices; devices.push_back(nullptr); EXPECT_TRUE(platform()->initialize(std::move(devices))); } TEST(PlatformInitTest, givenSingleDeviceWithNonZeroRootDeviceIndexInPassedDeviceVectorWhenInitializePlatformThenCreateOnlyOneClDevice) { std::vector> devices; auto executionEnvironment = new MockExecutionEnvironment(defaultHwInfo.get(), false, 3); devices.push_back(std::make_unique(executionEnvironment, 2)); auto status = platform()->initialize(std::move(devices)); EXPECT_TRUE(status); size_t expectedNumDevices = 1u; EXPECT_EQ(expectedNumDevices, platform()->getNumDevices()); EXPECT_EQ(2u, platform()->getClDevice(0)->getRootDeviceIndex()); } TEST(PlatformInitLoopTests, givenPlatformWithDebugSettingWhenInitIsCalledThenItEntersEndlessLoop) { DebugManagerStateRestore stateRestore; DebugManager.flags.LoopAtPlatformInitialize.set(true); bool called = false; struct mockPlatform : public MockPlatform { mockPlatform(bool &called) : called(called){}; void initializationLoopHelper() override { DebugManager.flags.LoopAtPlatformInitialize.set(false); called = true; } bool &called; }; mockPlatform platform(called); platform.initializeWithNewDevices(); EXPECT_TRUE(called); } TEST(PlatformGroupDevicesTest, whenMultipleDevicesAreCreatedThenGroupDevicesCreatesVectorPerEachProductFamily) { DebugManagerStateRestore restorer; const size_t numRootDevices = 5u; DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices); auto executionEnvironment = new ExecutionEnvironment(); for (auto i = 0u; i < numRootDevices; i++) { executionEnvironment->rootDeviceEnvironments.push_back(std::make_unique(*executionEnvironment)); } auto inputDevices = DeviceFactory::createDevices(*executionEnvironment); EXPECT_EQ(numRootDevices, inputDevices.size()); auto skl0Device = inputDevices[0].get(); auto kbl0Device = inputDevices[1].get(); auto skl1Device = inputDevices[2].get(); auto skl2Device = inputDevices[3].get(); auto cfl0Device = inputDevices[4].get(); executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo()->platform.eProductFamily = IGFX_SKYLAKE; executionEnvironment->rootDeviceEnvironments[1]->getMutableHardwareInfo()->platform.eProductFamily = IGFX_KABYLAKE; executionEnvironment->rootDeviceEnvironments[2]->getMutableHardwareInfo()->platform.eProductFamily = IGFX_SKYLAKE; executionEnvironment->rootDeviceEnvironments[3]->getMutableHardwareInfo()->platform.eProductFamily = IGFX_SKYLAKE; executionEnvironment->rootDeviceEnvironments[4]->getMutableHardwareInfo()->platform.eProductFamily = IGFX_COFFEELAKE; auto groupedDevices = Platform::groupDevices(std::move(inputDevices)); EXPECT_EQ(3u, groupedDevices.size()); EXPECT_EQ(1u, groupedDevices[0].size()); EXPECT_EQ(1u, groupedDevices[1].size()); EXPECT_EQ(3u, groupedDevices[2].size()); EXPECT_EQ(skl0Device, groupedDevices[2][0].get()); EXPECT_EQ(skl1Device, groupedDevices[2][1].get()); EXPECT_EQ(skl2Device, groupedDevices[2][2].get()); EXPECT_EQ(kbl0Device, groupedDevices[1][0].get()); EXPECT_EQ(cfl0Device, groupedDevices[0][0].get()); } compute-runtime-20.13.16352/opencl/test/unit_test/profiling/000077500000000000000000000000001363734646600236235ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/profiling/CMakeLists.txt000066400000000000000000000004261363734646600263650ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_profiling ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/profiling_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_profiling})compute-runtime-20.13.16352/opencl/test/unit_test/profiling/profiling_tests.cpp000066400000000000000000001164771363734646600275620ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/os_interface/os_interface.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/source/command_queue/enqueue_common.h" #include "opencl/source/command_queue/enqueue_kernel.h" #include "opencl/source/command_queue/enqueue_marker.h" #include "opencl/source/command_queue/enqueue_migrate_mem_objects.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "opencl/test/unit_test/event/event_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/os_interface/mock_performance_counters.h" #include "test.h" namespace NEO { struct ProfilingTests : public CommandEnqueueFixture, public ::testing::Test { void SetUp() override { CommandEnqueueFixture::SetUp(CL_QUEUE_PROFILING_ENABLE); program = ReleaseableObjectPtr(new MockProgram(*pDevice->getExecutionEnvironment())); program->setContext(&ctx); memset(&kernelHeader, 0, sizeof(kernelHeader)); kernelHeader.KernelHeapSize = sizeof(kernelIsa); memset(&dataParameterStream, 0, sizeof(dataParameterStream)); dataParameterStream.DataParameterStreamSize = sizeof(crossThreadData); executionEnvironment = {}; memset(&executionEnvironment, 0, sizeof(executionEnvironment)); executionEnvironment.CompiledSIMD32 = 1; executionEnvironment.LargestCompiledSIMDSize = 32; memset(&threadPayload, 0, sizeof(threadPayload)); threadPayload.LocalIDXPresent = 1; threadPayload.LocalIDYPresent = 1; threadPayload.LocalIDZPresent = 1; kernelInfo.heapInfo.pKernelHeap = kernelIsa; kernelInfo.heapInfo.pKernelHeader = &kernelHeader; kernelInfo.patchInfo.dataParameterStream = &dataParameterStream; kernelInfo.patchInfo.executionEnvironment = &executionEnvironment; kernelInfo.patchInfo.threadPayload = &threadPayload; } void TearDown() override { CommandEnqueueFixture::TearDown(); } ReleaseableObjectPtr program; SKernelBinaryHeaderCommon kernelHeader = {}; SPatchDataParameterStream dataParameterStream = {}; SPatchExecutionEnvironment executionEnvironment = {}; SPatchThreadPayload threadPayload = {}; KernelInfo kernelInfo; MockContext ctx; uint32_t kernelIsa[32]; uint32_t crossThreadData[32]; }; HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GIVENCommandQueueWithProfilingAndForWorkloadWithKernelWHENGetCSFromCmdQueueTHENEnoughSpaceInCS) { typedef typename FamilyType::MI_STORE_REGISTER_MEM MI_STORE_REGISTER_MEM; typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; MockKernel kernel(program.get(), kernelInfo, *pClDevice); uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 2 * sizeof(MI_STORE_REGISTER_MEM) + sizeof(GPGPU_WALKER) + HardwareCommandsHelper::getSizeRequiredCS(&kernel); MultiDispatchInfo multiDispatchInfo(&kernel); auto &commandStreamNDRangeKernel = getCommandStream(*pCmdQ, CsrDependencies(), true, false, false, multiDispatchInfo, nullptr, 0); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, true, false, *pCmdQ, &kernel); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamNDRangeKernel.getAvailableSpace(), requiredSize); auto &commandStreamTask = getCommandStream(*pCmdQ, CsrDependencies(), true, false, false, multiDispatchInfo, nullptr, 0); expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_TASK, true, false, *pCmdQ, &kernel); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamTask.getAvailableSpace(), requiredSize); } HWTEST_F(ProfilingTests, GIVENCommandQueueWithProfilingAndForWorkloadWithNoKernelWHENGetCSFromCmdQueueTHENEnoughSpaceInCS) { typedef typename FamilyType::MI_STORE_REGISTER_MEM MI_STORE_REGISTER_MEM; typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 4 * sizeof(MI_STORE_REGISTER_MEM); MultiDispatchInfo multiDispatchInfo(nullptr); auto &commandStreamMigrateMemObjects = getCommandStream(*pCmdQ, CsrDependencies(), true, false, false, multiDispatchInfo, nullptr, 0); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_MIGRATE_MEM_OBJECTS, true, false, *pCmdQ, nullptr); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamMigrateMemObjects.getAvailableSpace(), requiredSize); auto &commandStreamMarker = getCommandStream(*pCmdQ, CsrDependencies(), true, false, false, multiDispatchInfo, nullptr, 0); expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_MARKER, true, false, *pCmdQ, nullptr); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamMarker.getAvailableSpace(), requiredSize); } HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GIVENCommandQueueWithProfilingAndForWorkloadWithTwoKernelsInMdiWHENGetCSFromCmdQueueTHENEnoughSpaceInCS) { typedef typename FamilyType::MI_STORE_REGISTER_MEM MI_STORE_REGISTER_MEM; typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; MockKernel kernel(program.get(), kernelInfo, *pClDevice); uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 4 * sizeof(MI_STORE_REGISTER_MEM) + HardwareCommandsHelper::getSizeRequiredCS(&kernel); requiredSize += 2 * sizeof(GPGPU_WALKER); DispatchInfo dispatchInfo; dispatchInfo.setKernel(&kernel); MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); multiDispatchInfo.push(dispatchInfo); auto &commandStreamTask = getCommandStream(*pCmdQ, CsrDependencies(), true, false, false, multiDispatchInfo, nullptr, 0); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_TASK, CsrDependencies(), true, false, false, *pCmdQ, multiDispatchInfo); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamTask.getAvailableSpace(), requiredSize); } /* # Two additional PIPE_CONTROLs are expected before first MI_STORE_REGISTER_MEM (which is before GPGPU_WALKER) # and after second MI_STORE_REGISTER_MEM (which is after GPGPU_WALKER). */ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GIVENCommandQueueWithProfolingWHENWalkerIsDispatchedTHENPipeControlWithTimeStampIsPresentInCS) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_event event; cl_kernel clKernel = &kernel; static_cast *>(pCmdQ)->enqueueKernel( clKernel, dimensions, globalOffsets, workItems, nullptr, 0, nullptr, &event); parseCommands(*pCmdQ); // Find GPGPU_WALKER auto itorGPGPUWalkerCmd = find(cmdList.begin(), cmdList.end()); GenCmdList::reverse_iterator rItorGPGPUWalkerCmd(itorGPGPUWalkerCmd); ASSERT_NE(cmdList.end(), itorGPGPUWalkerCmd); // Check PIPE_CONTROLs auto itorBeforePC = reverse_find(rItorGPGPUWalkerCmd, cmdList.rbegin()); ASSERT_NE(cmdList.rbegin(), itorBeforePC); auto pBeforePC = genCmdCast(*itorBeforePC); ASSERT_NE(nullptr, pBeforePC); EXPECT_EQ(1u, pBeforePC->getCommandStreamerStallEnable()); auto itorAfterPC = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterPC); auto pAfterPC = genCmdCast(*itorAfterPC); ASSERT_NE(nullptr, pAfterPC); EXPECT_EQ(1u, pAfterPC->getCommandStreamerStallEnable()); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pBeforePC->getPostSyncOperation()); EXPECT_TRUE(static_cast *>(event)->calcProfilingData()); clReleaseEvent(event); } /* # One additional MI_STORE_REGISTER_MEM is expected before and after GPGPU_WALKER. */ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GIVENCommandQueueWithProflingWHENWalkerIsDispatchedTHENMiStoreRegisterMemIsPresentInCS) { typedef typename FamilyType::MI_STORE_REGISTER_MEM MI_STORE_REGISTER_MEM; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_event event; static_cast *>(pCmdQ)->enqueueKernel( &kernel, dimensions, globalOffsets, workItems, nullptr, 0, nullptr, &event); parseCommands(*pCmdQ); // Find GPGPU_WALKER auto itorGPGPUWalkerCmd = find(cmdList.begin(), cmdList.end()); GenCmdList::reverse_iterator rItorGPGPUWalkerCmd(itorGPGPUWalkerCmd); ASSERT_NE(cmdList.end(), itorGPGPUWalkerCmd); // Check MI_STORE_REGISTER_MEMs auto itorBeforeMI = reverse_find(rItorGPGPUWalkerCmd, cmdList.rbegin()); ASSERT_NE(cmdList.rbegin(), itorBeforeMI); auto pBeforeMI = genCmdCast(*itorBeforeMI); pBeforeMI = genCmdCast(*itorBeforeMI); ASSERT_NE(nullptr, pBeforeMI); EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, pBeforeMI->getRegisterAddress()); auto itorAfterMI = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterMI); auto pAfterMI = genCmdCast(*itorAfterMI); ASSERT_NE(nullptr, pAfterMI); EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, pAfterMI->getRegisterAddress()); ++itorAfterMI; pAfterMI = genCmdCast(*itorAfterMI); EXPECT_EQ(nullptr, pAfterMI); clReleaseEvent(event); } /* # Two additional PIPE_CONTROLs are expected before first MI_STORE_REGISTER_MEM (which is before GPGPU_WALKER) # and after second MI_STORE_REGISTER_MEM (which is after GPGPU_WALKER). # If queue is blocked commands should be added to event */ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GIVENCommandQueueBlockedWithProfilingWHENWalkerIsDispatchedTHENPipeControlWithTimeStampIsPresentInCS) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_event event; cl_event ue = new UserEvent(); static_cast *>(pCmdQ)->enqueueKernel( &kernel, dimensions, globalOffsets, workItems, nullptr, 1, // one user event to block queue &ue, // user event not signaled &event); //rseCommands(*pCmdQ); ASSERT_NE(nullptr, pCmdQ->virtualEvent); ASSERT_NE(nullptr, pCmdQ->virtualEvent->peekCommand()); NEO::LinearStream *eventCommandStream = pCmdQ->virtualEvent->peekCommand()->getCommandStream(); ASSERT_NE(nullptr, eventCommandStream); parseCommands(*eventCommandStream); // Find GPGPU_WALKER auto itorGPGPUWalkerCmd = find(cmdList.begin(), cmdList.end()); GenCmdList::reverse_iterator rItorGPGPUWalkerCmd(itorGPGPUWalkerCmd); ASSERT_NE(cmdList.end(), itorGPGPUWalkerCmd); // Check PIPE_CONTROLs auto itorBeforePC = reverse_find(rItorGPGPUWalkerCmd, cmdList.rbegin()); ASSERT_NE(cmdList.rbegin(), itorBeforePC); auto pBeforePC = genCmdCast(*itorBeforePC); ASSERT_NE(nullptr, pBeforePC); auto itorAfterPC = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterPC); auto pAfterPC = genCmdCast(*itorAfterPC); ASSERT_NE(nullptr, pAfterPC); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pBeforePC->getPostSyncOperation()); clReleaseEvent(event); ((UserEvent *)ue)->release(); pCmdQ->isQueueBlocked(); } /* # One additional MI_STORE_REGISTER_MEM is expected before and after GPGPU_WALKER. # If queue is blocked commands should be added to event */ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GIVENCommandQueueBlockedWithProfilingWHENWalkerIsDispatchedTHENMiStoreRegisterMemIsPresentInCS) { typedef typename FamilyType::MI_STORE_REGISTER_MEM MI_STORE_REGISTER_MEM; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; MockKernel kernel(program.get(), kernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_event event; cl_event ue = new UserEvent(); static_cast *>(pCmdQ)->enqueueKernel( &kernel, dimensions, globalOffsets, workItems, nullptr, 1, // one user event to block queue &ue, // user event not signaled &event); // parseCommands(*pCmdQ); ASSERT_NE(nullptr, pCmdQ->virtualEvent); ASSERT_NE(nullptr, pCmdQ->virtualEvent->peekCommand()); NEO::LinearStream *eventCommandStream = pCmdQ->virtualEvent->peekCommand()->getCommandStream(); ASSERT_NE(nullptr, eventCommandStream); parseCommands(*eventCommandStream); // Find GPGPU_WALKER auto itorGPGPUWalkerCmd = find(cmdList.begin(), cmdList.end()); GenCmdList::reverse_iterator rItorGPGPUWalkerCmd(itorGPGPUWalkerCmd); ASSERT_NE(cmdList.end(), itorGPGPUWalkerCmd); // Check MI_STORE_REGISTER_MEMs auto itorBeforeMI = reverse_find(rItorGPGPUWalkerCmd, cmdList.rbegin()); ASSERT_NE(cmdList.rbegin(), itorBeforeMI); auto pBeforeMI = genCmdCast(*itorBeforeMI); pBeforeMI = genCmdCast(*itorBeforeMI); ASSERT_NE(nullptr, pBeforeMI); EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, pBeforeMI->getRegisterAddress()); auto itorAfterMI = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterMI); auto pAfterMI = genCmdCast(*itorAfterMI); ASSERT_NE(nullptr, pAfterMI); EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, pAfterMI->getRegisterAddress()); ++itorAfterMI; EXPECT_EQ(itorAfterMI, cmdList.end()); clReleaseEvent(event); ((UserEvent *)ue)->release(); pCmdQ->isQueueBlocked(); } HWTEST_F(ProfilingTests, givenNonKernelEnqueueWhenNonBlockedEnqueueThenSetCpuPath) { cl_event event; pCmdQ->enqueueMarkerWithWaitList(0, nullptr, &event); auto eventObj = static_cast(event); EXPECT_TRUE(eventObj->isCPUProfilingPath() == CL_TRUE); pCmdQ->finish(); uint64_t queued, submit, start, end; cl_int retVal; retVal = eventObj->getEventProfilingInfo(CL_PROFILING_COMMAND_QUEUED, sizeof(uint64_t), &queued, 0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = eventObj->getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(uint64_t), &submit, 0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = eventObj->getEventProfilingInfo(CL_PROFILING_COMMAND_START, sizeof(uint64_t), &start, 0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = eventObj->getEventProfilingInfo(CL_PROFILING_COMMAND_END, sizeof(uint64_t), &end, 0); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_LT(0u, queued); EXPECT_LT(queued, submit); EXPECT_LT(submit, start); EXPECT_LT(start, end); eventObj->release(); } template struct MockTagNode : public TagNode { public: using TagNode::tagForCpuAccess; using TagNode::gfxAllocation; MockTagNode() { gfxAllocation = nullptr; tagForCpuAccess = nullptr; } }; class MyOSTime : public OSTime { public: static int instanceNum; MyOSTime() { instanceNum++; } double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override { EXPECT_FALSE(true); return 1.0; } bool getCpuGpuTime(TimeStampData *pGpuCpuTime) override { EXPECT_FALSE(true); return false; } bool getCpuTime(uint64_t *timeStamp) override { EXPECT_FALSE(true); return false; }; double getHostTimerResolution() const override { EXPECT_FALSE(true); return 0; } uint64_t getCpuRawTimestamp() override { EXPECT_FALSE(true); return 0; } }; int MyOSTime::instanceNum = 0; TEST(EventProfilingTest, givenEventWhenCompleteIsZeroThenCalcProfilingDataSetsEndTimestampInCompleteTimestampAndDoesntCallOsTimeMethods) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MyOSTime::instanceNum = 0; device->setOSTime(new MyOSTime()); EXPECT_EQ(1, MyOSTime::instanceNum); MockContext context; cl_command_queue_properties props[5] = {0, 0, 0, 0, 0}; MockCommandQueue cmdQ(&context, device.get(), props); cmdQ.setProfilingEnabled(); cmdQ.device = device.get(); HwTimeStamps timestamp; timestamp.GlobalStartTS = 10; timestamp.ContextStartTS = 20; timestamp.GlobalEndTS = 80; timestamp.ContextEndTS = 56; timestamp.GlobalCompleteTS = 0; timestamp.ContextCompleteTS = 0; MockTagNode timestampNode; timestampNode.tagForCpuAccess = ×tamp; MockEvent event(&cmdQ, CL_COMPLETE, 0, 0); event.setCPUProfilingPath(false); event.timeStampNode = ×tampNode; event.calcProfilingData(); EXPECT_EQ(timestamp.ContextEndTS, timestamp.ContextCompleteTS); cmdQ.device = nullptr; event.timeStampNode = nullptr; } TEST(EventProfilingTest, givenRawTimestampsDebugModeWhenDataIsQueriedThenRawDataIsReturned) { DebugManagerStateRestore stateRestore; DebugManager.flags.ReturnRawGpuTimestamps.set(1); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MyOSTime::instanceNum = 0; device->setOSTime(new MyOSTime()); EXPECT_EQ(1, MyOSTime::instanceNum); MockContext context; cl_command_queue_properties props[5] = {0, 0, 0, 0, 0}; MockCommandQueue cmdQ(&context, device.get(), props); cmdQ.setProfilingEnabled(); cmdQ.device = device.get(); HwTimeStamps timestamp; timestamp.GlobalStartTS = 10; timestamp.ContextStartTS = 20; timestamp.GlobalEndTS = 80; timestamp.ContextEndTS = 56; timestamp.GlobalCompleteTS = 0; timestamp.ContextCompleteTS = 70; MockTagNode timestampNode; timestampNode.tagForCpuAccess = ×tamp; MockEvent event(&cmdQ, CL_COMPLETE, 0, 0); cl_event clEvent = &event; event.queueTimeStamp.CPUTimeinNS = 1; event.queueTimeStamp.GPUTimeStamp = 2; event.submitTimeStamp.CPUTimeinNS = 3; event.submitTimeStamp.GPUTimeStamp = 4; event.setCPUProfilingPath(false); event.timeStampNode = ×tampNode; event.calcProfilingData(); cl_ulong queued, submited, start, end, complete; clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &queued, nullptr); clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &submited, nullptr); clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr); clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, nullptr); clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_COMPLETE, sizeof(cl_ulong), &complete, nullptr); EXPECT_EQ(timestamp.ContextCompleteTS, complete); EXPECT_EQ(timestamp.ContextEndTS, end); EXPECT_EQ(timestamp.ContextStartTS, start); EXPECT_EQ(event.submitTimeStamp.GPUTimeStamp, submited); EXPECT_EQ(event.queueTimeStamp.GPUTimeStamp, queued); event.timeStampNode = nullptr; } struct ProfilingWithPerfCountersTests : public PerformanceCountersFixture, ::testing::Test { void SetUp() override { PerformanceCountersFixture::SetUp(); createPerfCounters(); HardwareInfo hwInfo = *defaultHwInfo; if (hwInfo.capabilityTable.defaultEngineType == aub_stream::EngineType::ENGINE_CCS) { hwInfo.featureTable.ftrCCSNode = true; } pDevice = MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0); pClDevice = std::make_unique(*pDevice, nullptr); pDevice->setPerfCounters(performanceCountersBase.release()); context = std::make_unique(pClDevice.get()); cl_int retVal = CL_SUCCESS; cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; pCmdQ.reset(CommandQueue::create(context.get(), pClDevice.get(), properties, false, retVal)); kernel = std::make_unique(*pClDevice); } void TearDown() override { PerformanceCountersFixture::TearDown(); } template GenCmdList::iterator expectStoreRegister(const GenCmdList &cmdList, GenCmdList::iterator itor, uint64_t memoryAddress, uint32_t registerAddress) { using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); auto pStore = genCmdCast(*itor); EXPECT_EQ(memoryAddress, pStore->getMemoryAddress()); EXPECT_EQ(registerAddress, pStore->getRegisterAddress()); itor++; return itor; } MockDevice *pDevice = nullptr; std::unique_ptr pClDevice; std::unique_ptr context; std::unique_ptr pCmdQ; std::unique_ptr kernel; }; HWTEST_F(ProfilingWithPerfCountersTests, GIVENCommandQueueWithProfilingPerfCounterAndForWorkloadWithNoKernelWHENGetCSFromCmdQueueTHENEnoughSpaceInCS) { typedef typename FamilyType::MI_STORE_REGISTER_MEM MI_STORE_REGISTER_MEM; typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; pCmdQ->setPerfCountersEnabled(); uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 4 * sizeof(MI_STORE_REGISTER_MEM); MultiDispatchInfo multiDispatchInfo(nullptr); auto &commandStreamMigrateMemObjects = getCommandStream(*pCmdQ, CsrDependencies(), true, true, false, multiDispatchInfo, nullptr, 0); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_MIGRATE_MEM_OBJECTS, true, true, *pCmdQ, nullptr); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamMigrateMemObjects.getAvailableSpace(), requiredSize); auto &commandStreamMarker = getCommandStream(*pCmdQ, CsrDependencies(), true, true, false, multiDispatchInfo, nullptr, 0); expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_MARKER, true, true, *pCmdQ, nullptr); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamMarker.getAvailableSpace(), requiredSize); } HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GIVENCommandQueueWithProfilingPerfCountersWHENWalkerIsDispatchedTHENPipeControlWithTimeStampIsPresentInCS) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; typedef typename FamilyType::MI_REPORT_PERF_COUNT MI_REPORT_PERF_COUNT; pCmdQ->setPerfCountersEnabled(); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_event event; cl_kernel clKernel = kernel->mockKernel; static_cast *>(pCmdQ.get())->enqueueKernel(clKernel, dimensions, globalOffsets, workItems, nullptr, 0, nullptr, &event); HardwareParse parse; auto &cmdList = parse.cmdList; parse.parseCommands(*pCmdQ); // expect MI_REPORT_PERF_COUNT before WALKER auto itorBeforeReportPerf = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorBeforeReportPerf); // Find GPGPU_WALKER auto itorGPGPUWalkerCmd = find(itorBeforeReportPerf, cmdList.end()); GenCmdList::reverse_iterator rItorGPGPUWalkerCmd(itorGPGPUWalkerCmd); ASSERT_NE(cmdList.end(), itorGPGPUWalkerCmd); // Check PIPE_CONTROLs auto itorBeforePC = reverse_find(rItorGPGPUWalkerCmd, cmdList.rbegin()); ASSERT_NE(cmdList.rbegin(), itorBeforePC); auto pBeforePC = genCmdCast(*itorBeforePC); ASSERT_NE(nullptr, pBeforePC); EXPECT_EQ(1u, pBeforePC->getCommandStreamerStallEnable()); auto itorAfterPC = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterPC); auto pAfterPC = genCmdCast(*itorAfterPC); ASSERT_NE(nullptr, pAfterPC); EXPECT_EQ(1u, pAfterPC->getCommandStreamerStallEnable()); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pBeforePC->getPostSyncOperation()); // expect MI_REPORT_PERF_COUNT after WALKER auto itorAfterReportPerf = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterReportPerf); EXPECT_TRUE(static_cast *>(event)->calcProfilingData()); clReleaseEvent(event); } HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GIVENCommandQueueWithProfilingPerfCountersNoUserRegistersWHENWalkerIsDispatchedTHENPipeControlWithTimeStampIsPresentInCS) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; typedef typename FamilyType::MI_REPORT_PERF_COUNT MI_REPORT_PERF_COUNT; pCmdQ->setPerfCountersEnabled(); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_event event; cl_kernel clKernel = kernel->mockKernel; static_cast *>(pCmdQ.get())->enqueueKernel(clKernel, dimensions, globalOffsets, workItems, nullptr, 0, nullptr, &event); HardwareParse parse; auto &cmdList = parse.cmdList; parse.parseCommands(*pCmdQ); // expect MI_REPORT_PERF_COUNT before WALKER auto itorBeforeReportPerf = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorBeforeReportPerf); // Find GPGPU_WALKER auto itorGPGPUWalkerCmd = find(itorBeforeReportPerf, cmdList.end()); GenCmdList::reverse_iterator rItorGPGPUWalkerCmd(itorGPGPUWalkerCmd); ASSERT_NE(cmdList.end(), itorGPGPUWalkerCmd); // Check PIPE_CONTROLs auto itorBeforePC = reverse_find(rItorGPGPUWalkerCmd, cmdList.rbegin()); ASSERT_NE(cmdList.rbegin(), itorBeforePC); auto pBeforePC = genCmdCast(*itorBeforePC); ASSERT_NE(nullptr, pBeforePC); EXPECT_EQ(1u, pBeforePC->getCommandStreamerStallEnable()); auto itorAfterPC = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterPC); auto pAfterPC = genCmdCast(*itorAfterPC); ASSERT_NE(nullptr, pAfterPC); EXPECT_EQ(1u, pAfterPC->getCommandStreamerStallEnable()); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pBeforePC->getPostSyncOperation()); // expect MI_REPORT_PERF_COUNT after WALKER auto itorAfterReportPerf = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterReportPerf); EXPECT_TRUE(static_cast *>(event)->calcProfilingData()); clReleaseEvent(event); } HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GIVENCommandQueueBlockedWithProflingPerfCounterWHENWalkerIsDispatchedTHENPipeControlWithTimeStampIsPresentInCS) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; typedef typename FamilyType::MI_REPORT_PERF_COUNT MI_REPORT_PERF_COUNT; pCmdQ->setPerfCountersEnabled(); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_event event; cl_event ue = new UserEvent(); cl_kernel clKernel = kernel->mockKernel; static_cast *>(pCmdQ.get())->enqueueKernel(clKernel, dimensions, globalOffsets, workItems, nullptr, 1, // one user event to block queue &ue, // user event not signaled &event); //rseCommands(*pCmdQ); ASSERT_NE(nullptr, pCmdQ->virtualEvent); ASSERT_NE(nullptr, pCmdQ->virtualEvent->peekCommand()); NEO::LinearStream *eventCommandStream = pCmdQ->virtualEvent->peekCommand()->getCommandStream(); ASSERT_NE(nullptr, eventCommandStream); HardwareParse parse; auto &cmdList = parse.cmdList; parse.parseCommands(*eventCommandStream); // expect MI_REPORT_PERF_COUNT before WALKER auto itorBeforeReportPerf = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorBeforeReportPerf); // Find GPGPU_WALKER auto itorGPGPUWalkerCmd = find(itorBeforeReportPerf, cmdList.end()); GenCmdList::reverse_iterator rItorGPGPUWalkerCmd(itorGPGPUWalkerCmd); ASSERT_NE(cmdList.end(), itorGPGPUWalkerCmd); // Check PIPE_CONTROLs auto itorBeforePC = reverse_find(rItorGPGPUWalkerCmd, cmdList.rbegin()); ASSERT_NE(cmdList.rbegin(), itorBeforePC); auto pBeforePC = genCmdCast(*itorBeforePC); ASSERT_NE(nullptr, pBeforePC); auto itorAfterPC = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterPC); auto pAfterPC = genCmdCast(*itorAfterPC); ASSERT_NE(nullptr, pAfterPC); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pBeforePC->getPostSyncOperation()); // expect MI_REPORT_PERF_COUNT after WALKER auto itorAfterReportPerf = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterReportPerf); clReleaseEvent(event); ((UserEvent *)ue)->release(); pCmdQ->isQueueBlocked(); } HWTEST_F(ProfilingWithPerfCountersTests, GIVENCommandQueueWithProfilingPerfCountersNoEventWHENWalkerIsDispatchedTHENPipeControlWithTimeStampIsNotPresentInCS) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::WALKER_TYPE GPGPU_WALKER; typedef typename FamilyType::MI_REPORT_PERF_COUNT MI_REPORT_PERF_COUNT; pCmdQ->setPerfCountersEnabled(); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_kernel clKernel = kernel->mockKernel; static_cast *>(pCmdQ.get())->enqueueKernel(clKernel, dimensions, globalOffsets, workItems, nullptr, 0, nullptr, nullptr); HardwareParse parse; auto &cmdList = parse.cmdList; parse.parseCommands(*pCmdQ); // expect no MI_REPORT_PERF_COUNT before WALKER auto itorBeforeReportPerf = find(cmdList.begin(), cmdList.end()); ASSERT_EQ(cmdList.end(), itorBeforeReportPerf); // Find GPGPU_WALKER auto itorGPGPUWalkerCmd = find(cmdList.begin(), cmdList.end()); GenCmdList::reverse_iterator rItorGPGPUWalkerCmd(itorGPGPUWalkerCmd); ASSERT_NE(cmdList.end(), itorGPGPUWalkerCmd); // Check PIPE_CONTROLs auto itorBeforePC = reverse_find(rItorGPGPUWalkerCmd, cmdList.rbegin()); ASSERT_NE(cmdList.rbegin(), itorBeforePC); auto pBeforePC = genCmdCast(*itorBeforePC); ASSERT_NE(nullptr, pBeforePC); EXPECT_EQ(1u, pBeforePC->getCommandStreamerStallEnable()); auto itorAfterPC = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_NE(cmdList.end(), itorAfterPC); auto pAfterPC = genCmdCast(*itorAfterPC); ASSERT_NE(nullptr, pAfterPC); EXPECT_EQ(1u, pAfterPC->getCommandStreamerStallEnable()); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_NO_WRITE, pBeforePC->getPostSyncOperation()); // expect MI_REPORT_PERF_COUNT after WALKER auto itorAfterReportPerf = find(itorGPGPUWalkerCmd, cmdList.end()); ASSERT_EQ(cmdList.end(), itorAfterReportPerf); } template struct FixedGpuAddressTagAllocator : TagAllocator { struct MockTagNode : TagNode { void setGpuAddress(uint64_t value) { this->gpuAddress = value; } }; FixedGpuAddressTagAllocator(CommandStreamReceiver &csr, uint64_t gpuAddress) : TagAllocator(0, csr.getMemoryManager(), csr.getPreferredTagPoolSize(), MemoryConstants::cacheLineSize, sizeof(TagType), false) { auto tag = reinterpret_cast(this->freeTags.peekHead()); tag->setGpuAddress(gpuAddress); } }; HWTEST_F(ProfilingWithPerfCountersTests, GIVENCommandQueueWithProfilingPerfCountersWHENWalkerIsDispatchedTHENRegisterStoresArePresentInCS) { uint64_t timeStampGpuAddress = 0x123456000; uint64_t perfCountersGpuAddress = 0xabcdef000; auto &csr = pDevice->getUltCommandStreamReceiver(); csr.profilingTimeStampAllocator.reset(new FixedGpuAddressTagAllocator(csr, timeStampGpuAddress)); csr.perfCounterAllocator.reset(new FixedGpuAddressTagAllocator(csr, perfCountersGpuAddress)); pCmdQ->setPerfCountersEnabled(); size_t globalOffsets[3] = {0, 0, 0}; size_t workItems[3] = {1, 1, 1}; uint32_t dimensions = 1; cl_event event; cl_kernel clKernel = kernel->mockKernel; static_cast *>(pCmdQ.get())->enqueueKernel(clKernel, dimensions, globalOffsets, workItems, nullptr, 0, nullptr, &event); auto pEvent = static_cast *>(event); EXPECT_EQ(pEvent->getHwTimeStampNode()->getGpuAddress(), timeStampGpuAddress); EXPECT_EQ(pEvent->getHwPerfCounterNode()->getGpuAddress(), perfCountersGpuAddress); HardwareParse parse; auto &cmdList = parse.cmdList; parse.parseCommands(*pCmdQ); auto itor = expectStoreRegister(cmdList, cmdList.begin(), timeStampGpuAddress + offsetof(HwTimeStamps, ContextStartTS), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); // after WALKER: itor = expectStoreRegister(cmdList, itor, timeStampGpuAddress + offsetof(HwTimeStamps, ContextEndTS), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); EXPECT_TRUE(pEvent->calcProfilingData()); clReleaseEvent(event); } struct MockTimestampContainer : public TimestampPacketContainer { ~MockTimestampContainer() override { for (const auto &node : timestampPacketNodes) { delete node->tagForCpuAccess; delete node; } timestampPacketNodes.clear(); } }; struct ProfilingTimestampPacketsTest : public ::testing::Test { void SetUp() override { DebugManager.flags.ReturnRawGpuTimestamps.set(true); cmdQ->setProfilingEnabled(); ev->timestampPacketContainer = std::make_unique(); } void addTimestampNode(int contextStart, int contextEnd, int globalStart) { auto node = new MockTagNode(); auto timestampPacketStorage = new TimestampPacketStorage(); node->tagForCpuAccess = timestampPacketStorage; timestampPacketStorage->packets[0].contextStart = contextStart; timestampPacketStorage->packets[0].contextEnd = contextEnd; timestampPacketStorage->packets[0].globalStart = globalStart; ev->timestampPacketContainer->add(node); } DebugManagerStateRestore restorer; MockContext context; cl_command_queue_properties props[5] = {0, 0, 0, 0, 0}; ReleaseableObjectPtr cmdQ = clUniquePtr(new MockCommandQueue(&context, context.getDevice(0), props)); ReleaseableObjectPtr> ev = clUniquePtr(new MockEvent(cmdQ.get(), CL_COMMAND_USER, CompletionStamp::levelNotReady, CompletionStamp::levelNotReady)); }; TEST_F(ProfilingTimestampPacketsTest, givenTimestampsPacketContainerWithOneElementAndTimestampNodeWhenCalculatingProfilingThenTimesAreTakenFromPacket) { addTimestampNode(10, 11, 12); HwTimeStamps hwTimestamps; hwTimestamps.ContextStartTS = 100; hwTimestamps.ContextEndTS = 110; hwTimestamps.GlobalStartTS = 120; MockTagNode hwTimestampsNode; hwTimestampsNode.tagForCpuAccess = &hwTimestamps; ev->timeStampNode = &hwTimestampsNode; ev->calcProfilingData(); EXPECT_EQ(10u, ev->getStartTimeStamp()); EXPECT_EQ(11u, ev->getEndTimeStamp()); EXPECT_EQ(12u, ev->getGlobalStartTimestamp()); ev->timeStampNode = nullptr; } TEST_F(ProfilingTimestampPacketsTest, givenTimestampsPacketContainerWithThreeElementsWhenCalculatingProfilingThenTimesAreTakenFromProperPacket) { addTimestampNode(10, 11, 12); addTimestampNode(1, 21, 22); addTimestampNode(5, 31, 2); ev->calcProfilingData(); EXPECT_EQ(1u, ev->getStartTimeStamp()); EXPECT_EQ(31u, ev->getEndTimeStamp()); EXPECT_EQ(2u, ev->getGlobalStartTimestamp()); } TEST_F(ProfilingTimestampPacketsTest, givenTimestampsPacketContainerWithZeroElementsWhenCalculatingProfilingThenDataIsNotCalculated) { EXPECT_EQ(0u, ev->timestampPacketContainer->peekNodes().size()); ev->calcProfilingData(); EXPECT_FALSE(ev->getDataCalcStatus()); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/program/000077500000000000000000000000001363734646600233015ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/program/CMakeLists.txt000066400000000000000000000027041363734646600260440ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_program ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/block_kernel_manager_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_data.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_data_OCL2_0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_info_from_patchtokens_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/printf_handler_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/printf_helper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/process_debug_data_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/process_elf_binary_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/process_spir_binary_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program_data_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program_from_binary.h ${CMAKE_CURRENT_SOURCE_DIR}/program_nonuniform.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program_spec_constants_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/program_with_block_kernels_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program_with_kernel_debug_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program_with_source.h ) get_property(NEO_CORE_SRCS_tests_program GLOBAL PROPERTY NEO_CORE_SRCS_tests_program) list(APPEND IGDRCL_SRCS_tests_program ${NEO_CORE_SRCS_tests_program} ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_program}) add_subdirectories() compute-runtime-20.13.16352/opencl/test/unit_test/program/block_kernel_manager_tests.cpp000066400000000000000000000055501363734646600313600ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/program/kernel_info.h" #include "opencl/test/unit_test/mocks/mock_block_kernel_manager.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "gtest/gtest.h" using namespace NEO; TEST(BlockKernelManagerTest, pushPrivateSurfaceResizesArray) { MockGraphicsAllocation allocation(0, 0); KernelInfo *blockInfo = new KernelInfo; MockBlockKernelManager blockManager; blockManager.addBlockKernelInfo(blockInfo); EXPECT_EQ(0u, blockManager.blockPrivateSurfaceArray.size()); blockManager.pushPrivateSurface(&allocation, 0); EXPECT_EQ(1u, blockManager.blockPrivateSurfaceArray.size()); } TEST(BlockKernelManagerTest, pushPrivateSurfacePlacesAllocationInCorrectPosition) { MockGraphicsAllocation allocation1(0, 0); MockGraphicsAllocation allocation2(0, 0); KernelInfo *blockInfo = new KernelInfo; KernelInfo *blockInfo2 = new KernelInfo; MockBlockKernelManager blockManager; blockManager.addBlockKernelInfo(blockInfo); blockManager.addBlockKernelInfo(blockInfo2); blockManager.pushPrivateSurface(&allocation1, 0); blockManager.pushPrivateSurface(&allocation2, 1); EXPECT_EQ(2u, blockManager.blockPrivateSurfaceArray.size()); EXPECT_EQ(&allocation1, blockManager.blockPrivateSurfaceArray[0]); EXPECT_EQ(&allocation2, blockManager.blockPrivateSurfaceArray[1]); } TEST(BlockKernelManagerTest, pushPrivateSurfaceSetsPrivateSurfaceArrayToNullptrOnFirstCall) { MockGraphicsAllocation allocation(0, 0); KernelInfo *blockInfo = new KernelInfo; KernelInfo *blockInfo2 = new KernelInfo; KernelInfo *blockInfo3 = new KernelInfo; MockBlockKernelManager blockManager; blockManager.addBlockKernelInfo(blockInfo); blockManager.addBlockKernelInfo(blockInfo2); blockManager.addBlockKernelInfo(blockInfo3); blockManager.pushPrivateSurface(&allocation, 1); EXPECT_EQ(3u, blockManager.blockPrivateSurfaceArray.size()); EXPECT_EQ(nullptr, blockManager.blockPrivateSurfaceArray[0]); EXPECT_EQ(nullptr, blockManager.blockPrivateSurfaceArray[2]); } TEST(BlockKernelManagerTest, getPrivateSurface) { MockGraphicsAllocation allocation(0, 0); KernelInfo *blockInfo = new KernelInfo; MockBlockKernelManager blockManager; blockManager.addBlockKernelInfo(blockInfo); blockManager.pushPrivateSurface(&allocation, 0); EXPECT_EQ(1u, blockManager.blockPrivateSurfaceArray.size()); EXPECT_EQ(&allocation, blockManager.getPrivateSurface(0)); } TEST(BlockKernelManagerTest, getPrivateSurfaceWithOutOfBoundOrdinalRetunrsNullptr) { MockBlockKernelManager blockManager; EXPECT_EQ(nullptr, blockManager.getPrivateSurface(0)); EXPECT_EQ(nullptr, blockManager.getPrivateSurface(10)); } compute-runtime-20.13.16352/opencl/test/unit_test/program/evaluate_unhandled_token_tests.cpp000066400000000000000000000146411363734646600322650ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device_binary_format/patchtokens_decoder.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/test/unit_test/device_binary_format/patchtokens_tests.h" #include "opencl/source/program/create.inl" #include "opencl/source/program/program.h" #include "gtest/gtest.h" using namespace NEO; extern GFXCORE_FAMILY renderCoreFamily; template inline void PushBackToken(ContainerT &container, const TokenT &token) { container.insert(container.end(), reinterpret_cast(&token), reinterpret_cast(&token) + sizeof(token)); } struct MockProgramRecordUnhandledTokens : public Program { bool allowUnhandledTokens; mutable int lastUnhandledTokenFound; MockProgramRecordUnhandledTokens(ExecutionEnvironment &executionEnvironment) : Program(executionEnvironment) {} MockProgramRecordUnhandledTokens(ExecutionEnvironment &executionEnvironment, Context *context, bool isBuiltinKernel) : Program(executionEnvironment, context, isBuiltinKernel) {} bool isSafeToSkipUnhandledToken(unsigned int token) const override { lastUnhandledTokenFound = static_cast(token); return allowUnhandledTokens; } bool getDefaultIsSafeToSkipUnhandledToken() const { return Program::isSafeToSkipUnhandledToken(iOpenCL::NUM_PATCH_TOKENS); } }; inline cl_int GetDecodeErrorCode(const std::vector &binary, bool allowUnhandledTokens, int defaultUnhandledTokenId, int &foundUnhandledTokenId) { NEO::ExecutionEnvironment executionEnvironment; using PT = MockProgramRecordUnhandledTokens; std::unique_ptr prog; cl_int errorCode = CL_INVALID_BINARY; prog.reset(NEO::Program::createFromGenBinary(executionEnvironment, nullptr, binary.data(), binary.size(), false, &errorCode)); prog->allowUnhandledTokens = allowUnhandledTokens; prog->lastUnhandledTokenFound = defaultUnhandledTokenId; auto ret = prog->processGenBinary(); foundUnhandledTokenId = prog->lastUnhandledTokenFound; return ret; }; inline std::vector CreateBinary(bool addUnhandledProgramScopePatchToken, bool addUnhandledKernelScopePatchToken, int32_t unhandledTokenId = static_cast(iOpenCL::NUM_PATCH_TOKENS)) { std::vector ret; if (addUnhandledProgramScopePatchToken && addUnhandledKernelScopePatchToken) { return {}; } if (addUnhandledProgramScopePatchToken) { PatchTokensTestData::ValidProgramWithConstantSurface programWithUnhandledToken; iOpenCL::SPatchItemHeader &unhandledToken = *programWithUnhandledToken.constSurfMutable; unhandledToken.Size += programWithUnhandledToken.constSurfMutable->InlineDataSize; unhandledToken.Token = static_cast(unhandledTokenId); ret.assign(reinterpret_cast(programWithUnhandledToken.storage.data()), reinterpret_cast(programWithUnhandledToken.storage.data() + programWithUnhandledToken.storage.size())); } else if (addUnhandledKernelScopePatchToken) { PatchTokensTestData::ValidProgramWithKernelAndArg programWithKernelWithUnhandledToken; iOpenCL::SPatchItemHeader &unhandledToken = *programWithKernelWithUnhandledToken.arg0InfoMutable; unhandledToken.Token = static_cast(unhandledTokenId); programWithKernelWithUnhandledToken.recalcTokPtr(); ret.assign(reinterpret_cast(programWithKernelWithUnhandledToken.storage.data()), reinterpret_cast(programWithKernelWithUnhandledToken.storage.data() + programWithKernelWithUnhandledToken.storage.size())); } else { PatchTokensTestData::ValidProgramWithKernel regularProgramTokens; ret.assign(reinterpret_cast(regularProgramTokens.storage.data()), reinterpret_cast(regularProgramTokens.storage.data() + regularProgramTokens.storage.size())); } return ret; } constexpr int32_t unhandledTokenId = iOpenCL::NUM_PATCH_TOKENS; TEST(EvaluateUnhandledToken, ByDefaultSkippingOfUnhandledTokensInUnitTestsIsSafe) { ExecutionEnvironment executionEnvironment; MockProgramRecordUnhandledTokens program(executionEnvironment); EXPECT_TRUE(program.getDefaultIsSafeToSkipUnhandledToken()); } TEST(EvaluateUnhandledToken, WhenDecodingProgramBinaryIfAllTokensAreSupportedThenDecodingSucceeds) { int lastUnhandledTokenFound = -1; auto retVal = GetDecodeErrorCode(CreateBinary(false, false), false, -7, lastUnhandledTokenFound); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(-7, lastUnhandledTokenFound); } TEST(EvaluateUnhandledToken, WhenDecodingProgramBinaryIfUnhandledTokenIsFoundAndIsSafeToSkipThenDecodingSucceeds) { int lastUnhandledTokenFound = -1; auto retVal = GetDecodeErrorCode(CreateBinary(true, false, unhandledTokenId), true, -7, lastUnhandledTokenFound); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(unhandledTokenId, lastUnhandledTokenFound); } TEST(EvaluateUnhandledToken, WhenDecodingProgramBinaryIfUnhandledTokenIsFoundAndIsUnsafeToSkipThenDecodingFails) { int lastUnhandledTokenFound = -1; auto retVal = GetDecodeErrorCode(CreateBinary(true, false, unhandledTokenId), false, -7, lastUnhandledTokenFound); EXPECT_EQ(CL_INVALID_BINARY, retVal); EXPECT_EQ(unhandledTokenId, lastUnhandledTokenFound); } TEST(EvaluateUnhandledToken, WhenDecodingKernelBinaryIfUnhandledTokenIsFoundAndIsSafeToSkipThenDecodingSucceeds) { int lastUnhandledTokenFound = -1; auto retVal = GetDecodeErrorCode(CreateBinary(false, true, unhandledTokenId), true, -7, lastUnhandledTokenFound); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(unhandledTokenId, lastUnhandledTokenFound); } TEST(EvaluateUnhandledToken, WhenDecodingKernelBinaryIfUnhandledTokenIsFoundAndIsUnsafeToSkipThenDecodingFails) { int lastUnhandledTokenFound = -1; auto retVal = GetDecodeErrorCode(CreateBinary(false, true, unhandledTokenId), false, -7, lastUnhandledTokenFound); EXPECT_EQ(CL_INVALID_BINARY, retVal); EXPECT_EQ(unhandledTokenId, lastUnhandledTokenFound); } compute-runtime-20.13.16352/opencl/test/unit_test/program/kernel_data.cpp000066400000000000000000001654721363734646600262750ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/string.h" #include "opencl/source/platform/platform.h" #include "opencl/source/program/program.h" #include "opencl/test/unit_test/fixtures/kernel_data_fixture.h" #include "opencl/test/unit_test/helpers/gtest_helpers.h" TEST_F(KernelDataTest, KernelInfo_Name) { kernelName = "myTestKernel"; kernelNameSize = (uint32_t)alignUp(strlen(kernelName.c_str()) + 1, sizeof(uint32_t)); buildAndDecode(); } TEST_F(KernelDataTest, KernelInfo_Heaps) { char gshData[8] = "a"; char dshData[8] = "bb"; char sshData[8] = "ccc"; char kernelHeapData[8] = "dddd"; pGsh = gshData; pDsh = dshData; pSsh = sshData; pKernelHeap = kernelHeapData; gshSize = 4; dshSize = 4; sshSize = 4; kernelHeapSize = 4; buildAndDecode(); } TEST_F(KernelDataTest, MediaIDLoad) { iOpenCL::SPatchMediaInterfaceDescriptorLoad mediaIdLoad; mediaIdLoad.Token = PATCH_TOKEN_MEDIA_INTERFACE_DESCRIPTOR_LOAD; mediaIdLoad.Size = sizeof(SPatchMediaInterfaceDescriptorLoad); mediaIdLoad.InterfaceDescriptorDataOffset = 0xabcd; pPatchList = &mediaIdLoad; patchListSize = mediaIdLoad.Size; buildAndDecode(); EXPECT_EQ_CONST(PATCH_TOKEN_MEDIA_INTERFACE_DESCRIPTOR_LOAD, pKernelInfo->patchInfo.interfaceDescriptorDataLoad->Token); } TEST_F(KernelDataTest, AllocateLocalSurface) { iOpenCL::SPatchAllocateLocalSurface allocateLocalSurface; allocateLocalSurface.Token = PATCH_TOKEN_ALLOCATE_LOCAL_SURFACE; allocateLocalSurface.Size = sizeof(SPatchAllocateLocalSurface); allocateLocalSurface.Offset = 0; // think this is SSH offset for local memory when we used to have surface state for local memory allocateLocalSurface.TotalInlineLocalMemorySize = 4; // 4 bytes of local memory just for test pPatchList = &allocateLocalSurface; patchListSize = allocateLocalSurface.Size; buildAndDecode(); EXPECT_EQ_CONST(PATCH_TOKEN_ALLOCATE_LOCAL_SURFACE, pKernelInfo->patchInfo.localsurface->Token); EXPECT_EQ_VAL(allocateLocalSurface.TotalInlineLocalMemorySize, pKernelInfo->patchInfo.localsurface->TotalInlineLocalMemorySize); } TEST_F(KernelDataTest, AllocateStatelessConstantMemoryWithInit) { iOpenCL::SPatchAllocateStatelessConstantMemorySurfaceWithInitialization allocateStatelessConstantMemoryWithInit; allocateStatelessConstantMemoryWithInit.Token = PATCH_TOKEN_ALLOCATE_STATELESS_CONSTANT_MEMORY_SURFACE_WITH_INITIALIZATION; allocateStatelessConstantMemoryWithInit.Size = sizeof(SPatchAllocateStatelessConstantMemorySurfaceWithInitialization); allocateStatelessConstantMemoryWithInit.ConstantBufferIndex = 0; allocateStatelessConstantMemoryWithInit.SurfaceStateHeapOffset = 0xddu; pPatchList = &allocateStatelessConstantMemoryWithInit; patchListSize = allocateStatelessConstantMemoryWithInit.Size; buildAndDecode(); EXPECT_EQ_CONST(PATCH_TOKEN_ALLOCATE_STATELESS_CONSTANT_MEMORY_SURFACE_WITH_INITIALIZATION, pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->Token); EXPECT_EQ_VAL(0xddu, pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->SurfaceStateHeapOffset); } TEST_F(KernelDataTest, AllocateStatelessGlobalMemoryWithInit) { iOpenCL::SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization allocateStatelessGlobalMemoryWithInit; allocateStatelessGlobalMemoryWithInit.Token = PATCH_TOKEN_ALLOCATE_STATELESS_GLOBAL_MEMORY_SURFACE_WITH_INITIALIZATION; allocateStatelessGlobalMemoryWithInit.Size = sizeof(SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization); allocateStatelessGlobalMemoryWithInit.GlobalBufferIndex = 0; allocateStatelessGlobalMemoryWithInit.SurfaceStateHeapOffset = 0xddu; pPatchList = &allocateStatelessGlobalMemoryWithInit; patchListSize = allocateStatelessGlobalMemoryWithInit.Size; buildAndDecode(); EXPECT_EQ_CONST(PATCH_TOKEN_ALLOCATE_STATELESS_GLOBAL_MEMORY_SURFACE_WITH_INITIALIZATION, pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->Token); EXPECT_EQ_VAL(0xddu, pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->SurfaceStateHeapOffset); } TEST_F(KernelDataTest, PrintfString) { char stringValue[] = "%d\n"; size_t strSize = strlen(stringValue) + 1; iOpenCL::SPatchString printfString; printfString.Token = PATCH_TOKEN_STRING; printfString.Size = static_cast(sizeof(SPatchString) + strSize); printfString.Index = 0; printfString.StringSize = static_cast(strSize); iOpenCL::SPatchString emptyString; emptyString.Token = PATCH_TOKEN_STRING; emptyString.Size = static_cast(sizeof(SPatchString)); emptyString.Index = 1; emptyString.StringSize = 0; cl_char *pPrintfString = new cl_char[printfString.Size + emptyString.Size]; memcpy_s(pPrintfString, sizeof(SPatchString), &printfString, sizeof(SPatchString)); memcpy_s((cl_char *)pPrintfString + sizeof(printfString), strSize, stringValue, strSize); memcpy_s((cl_char *)pPrintfString + printfString.Size, emptyString.Size, &emptyString, emptyString.Size); pPatchList = (void *)pPrintfString; patchListSize = printfString.Size + emptyString.Size; buildAndDecode(); EXPECT_EQ_VAL(0, strcmp(stringValue, pKernelInfo->patchInfo.stringDataMap.find(0)->second.c_str())); delete[] pPrintfString; } TEST_F(KernelDataTest, MediaVFEState) { iOpenCL::SPatchMediaVFEState MediaVFEState; MediaVFEState.Token = PATCH_TOKEN_MEDIA_VFE_STATE; MediaVFEState.Size = sizeof(SPatchMediaVFEState); MediaVFEState.PerThreadScratchSpace = 1; // lets say 1KB of perThreadScratchSpace MediaVFEState.ScratchSpaceOffset = 0; pPatchList = &MediaVFEState; patchListSize = MediaVFEState.Size; buildAndDecode(); EXPECT_EQ_CONST(PATCH_TOKEN_MEDIA_VFE_STATE, pKernelInfo->patchInfo.mediavfestate->Token); EXPECT_EQ_VAL(MediaVFEState.PerThreadScratchSpace, pKernelInfo->patchInfo.mediavfestate->PerThreadScratchSpace); EXPECT_EQ_VAL(MediaVFEState.ScratchSpaceOffset, pKernelInfo->patchInfo.mediavfestate->ScratchSpaceOffset); } TEST_F(KernelDataTest, WhenMediaVFEStateSlot1TokenIsParsedThenCorrectValuesAreSet) { iOpenCL::SPatchMediaVFEState MediaVFEState; MediaVFEState.Token = PATCH_TOKEN_MEDIA_VFE_STATE_SLOT1; MediaVFEState.Size = sizeof(SPatchMediaVFEState); MediaVFEState.PerThreadScratchSpace = 1; MediaVFEState.ScratchSpaceOffset = 0; pPatchList = &MediaVFEState; patchListSize = MediaVFEState.Size; buildAndDecode(); EXPECT_EQ_CONST(PATCH_TOKEN_MEDIA_VFE_STATE_SLOT1, pKernelInfo->patchInfo.mediaVfeStateSlot1->Token); EXPECT_EQ_VAL(MediaVFEState.PerThreadScratchSpace, pKernelInfo->patchInfo.mediaVfeStateSlot1->PerThreadScratchSpace); EXPECT_EQ_VAL(MediaVFEState.ScratchSpaceOffset, pKernelInfo->patchInfo.mediaVfeStateSlot1->ScratchSpaceOffset); } TEST_F(KernelDataTest, GivenSyncBufferTokenWhenParsingProgramThenTokenIsFound) { SPatchAllocateSyncBuffer token; token.Token = PATCH_TOKEN_ALLOCATE_SYNC_BUFFER; token.Size = static_cast(sizeof(SPatchAllocateSyncBuffer)); token.SurfaceStateHeapOffset = 32; token.DataParamOffset = 1024; token.DataParamSize = 2; pPatchList = &token; patchListSize = token.Size; buildAndDecode(); EXPECT_EQ(token.Token, pKernelInfo->patchInfo.pAllocateSyncBuffer->Token); EXPECT_EQ(token.SurfaceStateHeapOffset, pKernelInfo->patchInfo.pAllocateSyncBuffer->SurfaceStateHeapOffset); EXPECT_EQ(token.DataParamOffset, pKernelInfo->patchInfo.pAllocateSyncBuffer->DataParamOffset); EXPECT_EQ(token.DataParamSize, pKernelInfo->patchInfo.pAllocateSyncBuffer->DataParamSize); } TEST_F(KernelDataTest, MediaIDData) { iOpenCL::SPatchInterfaceDescriptorData idData; idData.Token = PATCH_TOKEN_INTERFACE_DESCRIPTOR_DATA; idData.Size = sizeof(SPatchInterfaceDescriptorData); idData.BindingTableOffset = 0xaa; idData.KernelOffset = 0xbb; idData.Offset = 0xcc; idData.SamplerStateOffset = 0xdd; pPatchList = &idData; patchListSize = idData.Size; buildAndDecode(); EXPECT_EQ_CONST(PATCH_TOKEN_INTERFACE_DESCRIPTOR_DATA, pKernelInfo->patchInfo.interfaceDescriptorData->Token); } TEST_F(KernelDataTest, SamplerArgument) { iOpenCL::SPatchSamplerKernelArgument samplerData; samplerData.Token = PATCH_TOKEN_SAMPLER_KERNEL_ARGUMENT; samplerData.ArgumentNumber = 3; samplerData.Offset = 0x40; samplerData.Type = iOpenCL::SAMPLER_OBJECT_TEXTURE; samplerData.Size = sizeof(samplerData); pPatchList = &samplerData; patchListSize = samplerData.Size; buildAndDecode(); EXPECT_TRUE(pKernelInfo->kernelArgInfo[3].isSampler); EXPECT_EQ_VAL(samplerData.Offset, pKernelInfo->kernelArgInfo[3].offsetHeap); } TEST_F(KernelDataTest, AcceleratorArgument) { iOpenCL::SPatchSamplerKernelArgument samplerData; samplerData.Token = PATCH_TOKEN_SAMPLER_KERNEL_ARGUMENT; samplerData.ArgumentNumber = 3; samplerData.Offset = 0x40; samplerData.Type = iOpenCL::SAMPLER_OBJECT_VME; samplerData.Size = sizeof(samplerData); pPatchList = &samplerData; patchListSize = samplerData.Size; buildAndDecode(); EXPECT_TRUE(pKernelInfo->kernelArgInfo[3].isAccelerator); EXPECT_EQ_VAL(samplerData.Offset, pKernelInfo->kernelArgInfo[3].offsetHeap); } TEST_F(KernelDataTest, BindingTableState) { iOpenCL::SPatchBindingTableState bindingTableState; bindingTableState.Token = PATCH_TOKEN_BINDING_TABLE_STATE; bindingTableState.Size = sizeof(SPatchBindingTableState); bindingTableState.Count = 0xaa; bindingTableState.Offset = 0xbb; bindingTableState.SurfaceStateOffset = 0xcc; pPatchList = &bindingTableState; patchListSize = bindingTableState.Size; buildAndDecode(); EXPECT_EQ_CONST(PATCH_TOKEN_BINDING_TABLE_STATE, pKernelInfo->patchInfo.bindingTableState->Token); } TEST_F(KernelDataTest, DataParameterStream) { iOpenCL::SPatchDataParameterStream dataParameterStream; dataParameterStream.Token = PATCH_TOKEN_DATA_PARAMETER_STREAM; dataParameterStream.Size = sizeof(SPatchDataParameterStream); dataParameterStream.DataParameterStreamSize = 0x10; pPatchList = &dataParameterStream; patchListSize = dataParameterStream.Size; buildAndDecode(); EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_STREAM, pKernelInfo->patchInfo.dataParameterStream->Token); } TEST_F(KernelDataTest, ThreadPayload) { iOpenCL::SPatchThreadPayload threadPayload; threadPayload.Token = PATCH_TOKEN_THREAD_PAYLOAD; threadPayload.Size = sizeof(SPatchThreadPayload); threadPayload.GetGlobalOffsetPresent = true; threadPayload.GetGroupIDPresent = true; threadPayload.GetLocalIDPresent = true; threadPayload.HeaderPresent = true; threadPayload.IndirectPayloadStorage = true; threadPayload.LocalIDFlattenedPresent = true; threadPayload.LocalIDXPresent = true; threadPayload.LocalIDYPresent = true; threadPayload.LocalIDZPresent = true; threadPayload.OffsetToSkipPerThreadDataLoad = true; threadPayload.PassInlineData = true; pPatchList = &threadPayload; patchListSize = threadPayload.Size; buildAndDecode(); EXPECT_EQ_CONST(PATCH_TOKEN_THREAD_PAYLOAD, pKernelInfo->patchInfo.threadPayload->Token); } TEST_F(KernelDataTest, ExecutionEnvironmentNoReqdWorkGroupSize) { iOpenCL::SPatchExecutionEnvironment executionEnvironment = {}; executionEnvironment.Token = PATCH_TOKEN_EXECUTION_ENVIRONMENT; executionEnvironment.Size = sizeof(SPatchExecutionEnvironment); executionEnvironment.RequiredWorkGroupSizeX = 0; executionEnvironment.RequiredWorkGroupSizeY = 0; executionEnvironment.RequiredWorkGroupSizeZ = 0; executionEnvironment.LargestCompiledSIMDSize = 32; executionEnvironment.CompiledSubGroupsNumber = 0xaa; executionEnvironment.HasBarriers = false; executionEnvironment.DisableMidThreadPreemption = true; executionEnvironment.CompiledSIMD16 = false; executionEnvironment.CompiledSIMD32 = true; executionEnvironment.CompiledSIMD8 = false; executionEnvironment.HasDeviceEnqueue = false; executionEnvironment.MayAccessUndeclaredResource = false; executionEnvironment.UsesFencesForReadWriteImages = false; executionEnvironment.UsesStatelessSpillFill = false; executionEnvironment.IsCoherent = true; executionEnvironment.IsInitializer = false; executionEnvironment.IsFinalizer = false; executionEnvironment.SubgroupIndependentForwardProgressRequired = false; executionEnvironment.CompiledForGreaterThan4GBBuffers = false; pPatchList = &executionEnvironment; patchListSize = executionEnvironment.Size; buildAndDecode(); EXPECT_EQ_CONST(PATCH_TOKEN_EXECUTION_ENVIRONMENT, pKernelInfo->patchInfo.executionEnvironment->Token); EXPECT_EQ_VAL(WorkloadInfo::undefinedOffset, pKernelInfo->reqdWorkGroupSize[0]); EXPECT_EQ_VAL(WorkloadInfo::undefinedOffset, pKernelInfo->reqdWorkGroupSize[1]); EXPECT_EQ_VAL(WorkloadInfo::undefinedOffset, pKernelInfo->reqdWorkGroupSize[2]); } TEST_F(KernelDataTest, ExecutionEnvironment) { iOpenCL::SPatchExecutionEnvironment executionEnvironment = {}; executionEnvironment.Token = PATCH_TOKEN_EXECUTION_ENVIRONMENT; executionEnvironment.Size = sizeof(SPatchExecutionEnvironment); executionEnvironment.RequiredWorkGroupSizeX = 32; executionEnvironment.RequiredWorkGroupSizeY = 16; executionEnvironment.RequiredWorkGroupSizeZ = 8; executionEnvironment.LargestCompiledSIMDSize = 32; executionEnvironment.CompiledSubGroupsNumber = 0xaa; executionEnvironment.HasBarriers = false; executionEnvironment.DisableMidThreadPreemption = true; executionEnvironment.CompiledSIMD16 = false; executionEnvironment.CompiledSIMD32 = true; executionEnvironment.CompiledSIMD8 = false; executionEnvironment.HasDeviceEnqueue = false; executionEnvironment.MayAccessUndeclaredResource = false; executionEnvironment.UsesFencesForReadWriteImages = false; executionEnvironment.UsesStatelessSpillFill = false; executionEnvironment.IsCoherent = true; executionEnvironment.IsInitializer = false; executionEnvironment.IsFinalizer = false; executionEnvironment.SubgroupIndependentForwardProgressRequired = false; executionEnvironment.CompiledForGreaterThan4GBBuffers = false; pPatchList = &executionEnvironment; patchListSize = executionEnvironment.Size; buildAndDecode(); EXPECT_EQ_CONST(PATCH_TOKEN_EXECUTION_ENVIRONMENT, pKernelInfo->patchInfo.executionEnvironment->Token); EXPECT_EQ(32u, pKernelInfo->reqdWorkGroupSize[0]); EXPECT_EQ(16u, pKernelInfo->reqdWorkGroupSize[1]); EXPECT_EQ(8u, pKernelInfo->reqdWorkGroupSize[2]); EXPECT_TRUE(pKernelInfo->requiresSshForBuffers); } TEST_F(KernelDataTest, ExecutionEnvironmentCompiledForGreaterThan4GBBuffers) { iOpenCL::SPatchExecutionEnvironment executionEnvironment = {}; executionEnvironment.Token = PATCH_TOKEN_EXECUTION_ENVIRONMENT; executionEnvironment.Size = sizeof(SPatchExecutionEnvironment); executionEnvironment.RequiredWorkGroupSizeX = 32; executionEnvironment.RequiredWorkGroupSizeY = 16; executionEnvironment.RequiredWorkGroupSizeZ = 8; executionEnvironment.LargestCompiledSIMDSize = 32; executionEnvironment.CompiledSubGroupsNumber = 0xaa; executionEnvironment.HasBarriers = false; executionEnvironment.DisableMidThreadPreemption = true; executionEnvironment.CompiledSIMD16 = false; executionEnvironment.CompiledSIMD32 = true; executionEnvironment.CompiledSIMD8 = false; executionEnvironment.HasDeviceEnqueue = false; executionEnvironment.MayAccessUndeclaredResource = false; executionEnvironment.UsesFencesForReadWriteImages = false; executionEnvironment.UsesStatelessSpillFill = false; executionEnvironment.IsCoherent = true; executionEnvironment.IsInitializer = false; executionEnvironment.IsFinalizer = false; executionEnvironment.SubgroupIndependentForwardProgressRequired = false; executionEnvironment.CompiledForGreaterThan4GBBuffers = true; pPatchList = &executionEnvironment; patchListSize = executionEnvironment.Size; buildAndDecode(); EXPECT_EQ_CONST(PATCH_TOKEN_EXECUTION_ENVIRONMENT, pKernelInfo->patchInfo.executionEnvironment->Token); EXPECT_FALSE(pKernelInfo->requiresSshForBuffers); } TEST_F(KernelDataTest, ExecutionEnvironmentDoesntHaveDeviceEnqueue) { iOpenCL::SPatchExecutionEnvironment executionEnvironment = {}; executionEnvironment.Token = PATCH_TOKEN_EXECUTION_ENVIRONMENT; executionEnvironment.Size = sizeof(SPatchExecutionEnvironment); executionEnvironment.HasDeviceEnqueue = false; pPatchList = &executionEnvironment; patchListSize = executionEnvironment.Size; buildAndDecode(); EXPECT_EQ_CONST(PATCH_TOKEN_EXECUTION_ENVIRONMENT, pKernelInfo->patchInfo.executionEnvironment->Token); EXPECT_EQ_VAL(0u, program->getParentKernelInfoArray().size()); } TEST_F(KernelDataTest, ExecutionEnvironmentHasDeviceEnqueue) { iOpenCL::SPatchExecutionEnvironment executionEnvironment = {}; executionEnvironment.Token = PATCH_TOKEN_EXECUTION_ENVIRONMENT; executionEnvironment.Size = sizeof(SPatchExecutionEnvironment); executionEnvironment.HasDeviceEnqueue = true; pPatchList = &executionEnvironment; patchListSize = executionEnvironment.Size; buildAndDecode(); EXPECT_EQ_CONST(PATCH_TOKEN_EXECUTION_ENVIRONMENT, pKernelInfo->patchInfo.executionEnvironment->Token); EXPECT_EQ_VAL(1u, program->getParentKernelInfoArray().size()); } TEST_F(KernelDataTest, ExecutionEnvironmentDoesntRequireSubgroupIndependentForwardProgress) { iOpenCL::SPatchExecutionEnvironment executionEnvironment = {}; executionEnvironment.Token = PATCH_TOKEN_EXECUTION_ENVIRONMENT; executionEnvironment.Size = sizeof(SPatchExecutionEnvironment); executionEnvironment.SubgroupIndependentForwardProgressRequired = false; pPatchList = &executionEnvironment; patchListSize = executionEnvironment.Size; buildAndDecode(); EXPECT_EQ_CONST(PATCH_TOKEN_EXECUTION_ENVIRONMENT, pKernelInfo->patchInfo.executionEnvironment->Token); EXPECT_EQ_VAL(0u, program->getSubgroupKernelInfoArray().size()); } TEST_F(KernelDataTest, ExecutionEnvironmentRequiresSubgroupIndependentForwardProgress) { iOpenCL::SPatchExecutionEnvironment executionEnvironment = {}; executionEnvironment.Token = PATCH_TOKEN_EXECUTION_ENVIRONMENT; executionEnvironment.Size = sizeof(SPatchExecutionEnvironment); executionEnvironment.SubgroupIndependentForwardProgressRequired = true; pPatchList = &executionEnvironment; patchListSize = executionEnvironment.Size; buildAndDecode(); EXPECT_EQ_CONST(PATCH_TOKEN_EXECUTION_ENVIRONMENT, pKernelInfo->patchInfo.executionEnvironment->Token); EXPECT_EQ_VAL(1u, program->getSubgroupKernelInfoArray().size()); } TEST_F(KernelDataTest, KernelAttributesInfo) { iOpenCL::SPatchKernelAttributesInfo kernelAttributesInfo; kernelAttributesInfo.Token = PATCH_TOKEN_KERNEL_ATTRIBUTES_INFO; kernelAttributesInfo.Size = sizeof(SPatchKernelAttributesInfo); kernelAttributesInfo.AttributesSize = 0x10; pPatchList = &kernelAttributesInfo; patchListSize = kernelAttributesInfo.Size; buildAndDecode(); EXPECT_EQ_CONST(PATCH_TOKEN_KERNEL_ATTRIBUTES_INFO, pKernelInfo->patchInfo.pKernelAttributesInfo->Token); } TEST_F(KernelDataTest, WhenDecodingExecutionEnvironmentTokenThenWalkOrderIsForcedToXMajor) { iOpenCL::SPatchExecutionEnvironment executionEnvironment = {}; executionEnvironment.Token = PATCH_TOKEN_EXECUTION_ENVIRONMENT; executionEnvironment.Size = sizeof(SPatchExecutionEnvironment); pPatchList = &executionEnvironment; patchListSize = executionEnvironment.Size; buildAndDecode(); std::array expectedWalkOrder = {{0, 1, 2}}; std::array expectedDimsIds = {{0, 1, 2}}; EXPECT_EQ(expectedWalkOrder, pKernelInfo->workgroupWalkOrder); EXPECT_EQ(expectedDimsIds, pKernelInfo->workgroupDimensionsOrder); EXPECT_FALSE(pKernelInfo->requiresWorkGroupOrder); } TEST_F(KernelDataTest, whenWorkgroupOrderIsSpecifiedViaPatchTokenThenProperWorkGroupOrderIsParsed) { iOpenCL::SPatchExecutionEnvironment executionEnvironment = {}; executionEnvironment.Token = PATCH_TOKEN_EXECUTION_ENVIRONMENT; executionEnvironment.Size = sizeof(SPatchExecutionEnvironment); //dim0 : [0 : 1]; dim1 : [2 : 3]; dim2 : [4 : 5] executionEnvironment.WorkgroupWalkOrderDims = 1 | (2 << 2); pPatchList = &executionEnvironment; patchListSize = executionEnvironment.Size; buildAndDecode(); std::array expectedWalkOrder = {{1, 2, 0}}; std::array expectedDimsIds = {{2, 0, 1}}; EXPECT_EQ(expectedWalkOrder, pKernelInfo->workgroupWalkOrder); EXPECT_EQ(expectedDimsIds, pKernelInfo->workgroupDimensionsOrder); EXPECT_TRUE(pKernelInfo->requiresWorkGroupOrder); } TEST_F(KernelDataTest, whenWorkgroupOrderIsSpecifiedViaPatchToken2ThenProperWorkGroupOrderIsParsed) { iOpenCL::SPatchExecutionEnvironment executionEnvironment = {}; executionEnvironment.Token = PATCH_TOKEN_EXECUTION_ENVIRONMENT; executionEnvironment.Size = sizeof(SPatchExecutionEnvironment); //dim0 : [0 : 1]; dim1 : [2 : 3]; dim2 : [4 : 5] executionEnvironment.WorkgroupWalkOrderDims = 2 | (1 << 4); pPatchList = &executionEnvironment; patchListSize = executionEnvironment.Size; buildAndDecode(); std::array expectedWalkOrder = {{2, 0, 1}}; std::array expectedDimsIds = {{1, 2, 0}}; EXPECT_EQ(expectedWalkOrder, pKernelInfo->workgroupWalkOrder); EXPECT_EQ(expectedDimsIds, pKernelInfo->workgroupDimensionsOrder); EXPECT_TRUE(pKernelInfo->requiresWorkGroupOrder); } // Test all the different data parameters with the same "made up" data class DataParameterTest : public KernelDataTest, public testing::WithParamInterface {}; TEST_P(DataParameterTest, DataParameterTests) { SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = GetParam(); dataParameterToken.ArgumentNumber = 1; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; dataParameterToken.Offset = 0; dataParameterToken.SourceOffset = 8; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); if (pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size() > 0) { EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs[0]->Token); EXPECT_EQ_VAL(GetParam(), pKernelInfo->patchInfo.dataParameterBuffersKernelArgs[0]->Type); if (pKernelInfo->kernelArgInfo.size() == dataParameterToken.ArgumentNumber + 1) { if (GetParam() == DATA_PARAMETER_BUFFER_STATEFUL) { EXPECT_TRUE(pKernelInfo->kernelArgInfo[dataParameterToken.ArgumentNumber].pureStatefulBufferAccess); } else { EXPECT_FALSE(pKernelInfo->kernelArgInfo[dataParameterToken.ArgumentNumber].pureStatefulBufferAccess); } } // no else - some params are skipped } } // note that we start at '2' because we test kernel arg tokens elsewhere INSTANTIATE_TEST_CASE_P(DataParameterTests, DataParameterTest, testing::Range(2u, static_cast(NUM_DATA_PARAMETER_TOKENS))); class KernelDataParameterTest : public KernelDataTest {}; TEST_F(KernelDataParameterTest, DataParameterTestsDataPatameterBufferOffset) { SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_BUFFER_OFFSET; dataParameterToken.ArgumentNumber = 1; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; dataParameterToken.Offset = 128; dataParameterToken.SourceOffset = 8; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ_VAL(pKernelInfo->kernelArgInfo[1].offsetBufferOffset, dataParameterToken.Offset); } TEST_F(KernelDataParameterTest, givenDataParameterBufferStatefulWhenDecodingThenSetArgAsPureStateful) { SPatchDataParameterBuffer dataParameterToken = {}; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_BUFFER_STATEFUL; dataParameterToken.ArgumentNumber = 1; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_TRUE(pKernelInfo->kernelArgInfo[1].pureStatefulBufferAccess); } TEST_F(KernelDataParameterTest, givenUnknownDataParameterWhenDecodedThenParameterIsIgnored) { SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = NUM_DATA_PARAMETER_TOKENS + 1; dataParameterToken.ArgumentNumber = 1; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; dataParameterToken.Offset = 0; dataParameterToken.SourceOffset = 8; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ_VAL(0u, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); } TEST_F(KernelDataTest, DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES) { uint32_t argumentNumber = 1; uint32_t alignment = 16; uint32_t offsetCrossThread = 4; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetCrossThread; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(alignment, pKernelInfo->kernelArgInfo[argumentNumber].slmAlignment); ASSERT_EQ(1U, pKernelInfo->kernelArgInfo[argumentNumber].kernelArgPatchInfoVector.size()); EXPECT_EQ(offsetCrossThread, pKernelInfo->kernelArgInfo[argumentNumber].kernelArgPatchInfoVector[0].crossthreadOffset); } TEST_F(KernelDataTest, DATA_PARAMETER_IMAGE_WIDTH) { uint32_t argumentNumber = 1; uint32_t alignment = 16; uint32_t offsetImgWidth = 4; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_IMAGE_WIDTH; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetImgWidth; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetImgWidth, pKernelInfo->kernelArgInfo[argumentNumber].offsetImgWidth); } TEST_F(KernelDataTest, DATA_PARAMETER_IMAGE_HEIGHT) { uint32_t argumentNumber = 1; uint32_t alignment = 16; uint32_t offsetImgHeight = 8; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_IMAGE_HEIGHT; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetImgHeight; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetImgHeight, pKernelInfo->kernelArgInfo[argumentNumber].offsetImgHeight); } TEST_F(KernelDataTest, DATA_PARAMETER_IMAGE_DEPTH) { uint32_t argumentNumber = 1; uint32_t alignment = 16; uint32_t offsetImgDepth = 12; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_IMAGE_DEPTH; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetImgDepth; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetImgDepth, pKernelInfo->kernelArgInfo[argumentNumber].offsetImgDepth); } TEST_F(KernelDataTest, DATA_PARAMETER_IMAGE_NUM_SAMPLES) { uint32_t argumentNumber = 1; uint32_t alignment = 16; uint32_t offsetNumSamples = 60; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_IMAGE_NUM_SAMPLES; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetNumSamples; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetNumSamples, pKernelInfo->kernelArgInfo[argumentNumber].offsetNumSamples); } TEST_F(KernelDataTest, DATA_PARAMETER_IMAGE_NUM_MIP_LEVELS) { uint32_t argumentNumber = 1; uint32_t alignment = 16; uint32_t offsetNumMipLevels = 60; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_IMAGE_NUM_MIP_LEVELS; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetNumMipLevels; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetNumMipLevels, pKernelInfo->kernelArgInfo[argumentNumber].offsetNumMipLevels); } TEST_F(KernelDataTest, givenFlatImageDataParamTokenWhenDecodingThenSetAllOffsets) { uint32_t argumentNumber = 1; uint32_t alignment = 16; auto testToken = [&](iOpenCL::DATA_PARAMETER_TOKEN token, uint32_t offsetToken) { { // reset program if (pKernelData) { alignedFree(pKernelData); } program = std::make_unique(*pContext->getDevice(0)->getExecutionEnvironment(), pContext, false, nullptr); } SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = token; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetToken; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); }; testToken(iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_FLAT_IMAGE_BASEOFFSET, 10u); EXPECT_EQ(10u, pKernelInfo->kernelArgInfo[argumentNumber].offsetFlatBaseOffset); testToken(iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_FLAT_IMAGE_WIDTH, 14u); EXPECT_EQ(14u, pKernelInfo->kernelArgInfo[argumentNumber].offsetFlatWidth); testToken(iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_FLAT_IMAGE_HEIGHT, 16u); EXPECT_EQ(16u, pKernelInfo->kernelArgInfo[argumentNumber].offsetFlatHeight); testToken(iOpenCL::DATA_PARAMETER_TOKEN::DATA_PARAMETER_FLAT_IMAGE_PITCH, 18u); EXPECT_EQ(18u, pKernelInfo->kernelArgInfo[argumentNumber].offsetFlatPitch); } TEST_F(KernelDataTest, DATA_PARAMETER_IMAGE_DATA_TYPE) { uint32_t argumentNumber = 1; uint32_t alignment = 16; uint32_t offsetChannelDataType = 52; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_IMAGE_CHANNEL_DATA_TYPE; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetChannelDataType; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetChannelDataType, pKernelInfo->kernelArgInfo[argumentNumber].offsetChannelDataType); } TEST_F(KernelDataTest, DATA_PARAMETER_IMAGE_CHANNEL_ORDER) { uint32_t argumentNumber = 1; uint32_t alignment = 16; uint32_t offsetChannelOrder = 56; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_IMAGE_CHANNEL_ORDER; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetChannelOrder; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetChannelOrder, pKernelInfo->kernelArgInfo[argumentNumber].offsetChannelOrder); } TEST_F(KernelDataTest, DATA_PARAMETER_IMAGE_ARRAY_SIZE) { uint32_t argumentNumber = 1; uint32_t alignment = 16; uint32_t offsetImageArraySize = 60; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_IMAGE_ARRAY_SIZE; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetImageArraySize; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetImageArraySize, pKernelInfo->kernelArgInfo[argumentNumber].offsetArraySize); } TEST_F(KernelDataTest, DATA_PARAMETER_WORK_DIMENSIONS) { uint32_t argumentNumber = 1; uint32_t alignment = 16; uint32_t offsetWorkDim = 12; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_WORK_DIMENSIONS; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetWorkDim; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); EXPECT_EQ(0U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetWorkDim, pKernelInfo->workloadInfo.workDimOffset); } TEST_F(KernelDataTest, DATA_PARAMETER_SIMD_SIZE) { uint32_t argumentNumber = 17; uint32_t alignment = 16; uint32_t offsetSimdSize = 16; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_SIMD_SIZE; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetSimdSize; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); EXPECT_EQ(0u, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetSimdSize, pKernelInfo->workloadInfo.simdSizeOffset); } TEST_F(KernelDataTest, DATA_PARAMETER_PRIVATE_MEMORY_STATELESS_SIZE) { uint32_t argumentNumber = 17; uint32_t alignment = 16; uint32_t offset = 16; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_PRIVATE_MEMORY_STATELESS_SIZE; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offset; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); EXPECT_EQ(0u, pKernelInfo->kernelArgInfo.size()); } TEST_F(KernelDataTest, DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_SIZE) { uint32_t argumentNumber = 17; uint32_t alignment = 16; uint32_t offset = 16; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_SIZE; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offset; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); EXPECT_EQ(0u, pKernelInfo->kernelArgInfo.size()); } TEST_F(KernelDataTest, DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS) { uint32_t argumentNumber = 17; uint32_t alignment = 16; uint32_t offset = 16; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offset; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); EXPECT_EQ(0u, pKernelInfo->kernelArgInfo.size()); } TEST_F(KernelDataTest, DATA_PARAMETER_NUM_WORK_GROUPS) { uint32_t argumentNumber = 1; uint32_t alignment = 4; uint32_t offsetNumWorkGroups[3] = {0, 4, 8}; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_NUM_WORK_GROUPS; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetNumWorkGroups[argumentNumber]; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = argumentNumber * alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); EXPECT_EQ(0U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetNumWorkGroups[argumentNumber], pKernelInfo->workloadInfo.numWorkGroupsOffset[argumentNumber]); } TEST_F(KernelDataTest, DATA_PARAMETER_MAX_WORKGROUP_SIZE) { uint32_t argumentNumber = 1; uint32_t alignment = 4; uint32_t offsetMaxWorkGroupSize = 4; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_MAX_WORKGROUP_SIZE; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetMaxWorkGroupSize; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); EXPECT_EQ(0U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetMaxWorkGroupSize, pKernelInfo->workloadInfo.maxWorkGroupSizeOffset); } TEST_F(KernelDataTest, DATA_PARAMETER_SAMPLER_ADDRESS_MODE) { uint32_t argumentNumber = 0; uint32_t dataOffset = 20; uint32_t dataSize = sizeof(uint32_t); SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_SAMPLER_ADDRESS_MODE; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = dataOffset; dataParameterToken.DataSize = dataSize; dataParameterToken.SourceOffset = 0; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); ASSERT_EQ(1U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(dataOffset, pKernelInfo->kernelArgInfo[0].offsetSamplerAddressingMode); } TEST_F(KernelDataTest, DATA_PARAMETER_SAMPLER_COORDINATE_SNAP_WA_REQUIRED) { uint32_t argumentNumber = 1; uint32_t dataOffset = 20; uint32_t dataSize = sizeof(uint32_t); SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_SAMPLER_COORDINATE_SNAP_WA_REQUIRED; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = dataOffset; dataParameterToken.DataSize = dataSize; dataParameterToken.SourceOffset = 0; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(dataOffset, pKernelInfo->kernelArgInfo[1].offsetSamplerSnapWa); } TEST_F(KernelDataTest, DATA_PARAMETER_SAMPLER_NORMALIZED_COORDS) { uint32_t argumentNumber = 1; uint32_t dataOffset = 20; uint32_t dataSize = sizeof(uint32_t); SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_SAMPLER_NORMALIZED_COORDS; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = dataOffset; dataParameterToken.DataSize = dataSize; dataParameterToken.SourceOffset = 0; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(dataOffset, pKernelInfo->kernelArgInfo[1].offsetSamplerNormalizedCoords); } TEST_F(KernelDataTest, DATA_PARAMETER_KERNEL_ARGUMENT) { uint32_t argumentNumber = 0; uint32_t dataOffset = 20; uint32_t dataSize = sizeof(uint32_t); SPatchDataParameterBuffer dataParameterTokens[2]; dataParameterTokens[0].Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterTokens[0].Size = sizeof(SPatchDataParameterBuffer); dataParameterTokens[0].Type = DATA_PARAMETER_KERNEL_ARGUMENT; dataParameterTokens[0].ArgumentNumber = argumentNumber; dataParameterTokens[0].Offset = dataOffset + dataSize * 0; dataParameterTokens[0].DataSize = dataSize; dataParameterTokens[0].SourceOffset = 0; dataParameterTokens[0].LocationIndex = 0x0; dataParameterTokens[0].LocationIndex2 = 0x0; dataParameterTokens[1].Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterTokens[1].Size = sizeof(SPatchDataParameterBuffer); dataParameterTokens[1].Type = DATA_PARAMETER_KERNEL_ARGUMENT; dataParameterTokens[1].ArgumentNumber = argumentNumber; dataParameterTokens[1].Offset = dataOffset + dataSize * 1; dataParameterTokens[1].DataSize = dataSize; dataParameterTokens[1].SourceOffset = dataSize * 1; dataParameterTokens[1].LocationIndex = 0x0; dataParameterTokens[1].LocationIndex2 = 0x0; pPatchList = &dataParameterTokens[0]; patchListSize = dataParameterTokens[0].Size * (sizeof(dataParameterTokens) / sizeof(SPatchDataParameterBuffer)); buildAndDecode(); EXPECT_EQ_CONST(PATCH_TOKEN_DATA_PARAMETER_BUFFER, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs[0]->Token); EXPECT_EQ_VAL(DATA_PARAMETER_KERNEL_ARGUMENT, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs[0]->Type); ASSERT_EQ(1u, pKernelInfo->kernelArgInfo.size()); ASSERT_EQ(2u, pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.size()); ASSERT_EQ(dataSize, pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size); EXPECT_EQ(dataOffset + dataSize * 0, pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); ASSERT_EQ(dataSize, pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[1].size); EXPECT_EQ(dataOffset + dataSize * 1, pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[1].crossthreadOffset); } TEST_F(KernelDataTest, PATCH_TOKEN_ALLOCATE_LOCAL_SURFACE) { SPatchAllocateLocalSurface slmToken; slmToken.TotalInlineLocalMemorySize = 1024; slmToken.Size = sizeof(SPatchAllocateLocalSurface); slmToken.Token = PATCH_TOKEN_ALLOCATE_LOCAL_SURFACE; pPatchList = &slmToken; patchListSize = slmToken.Size; buildAndDecode(); EXPECT_EQ(1024u, pKernelInfo->workloadInfo.slmStaticSize); } TEST_F(KernelDataTest, PATCH_TOKEN_ALLOCATE_STATELESS_PRINTF_SURFACE) { SPatchAllocateStatelessPrintfSurface printfSurface; printfSurface.Token = PATCH_TOKEN_ALLOCATE_STATELESS_PRINTF_SURFACE; printfSurface.Size = static_cast(sizeof(SPatchAllocateStatelessPrintfSurface)); printfSurface.PrintfSurfaceIndex = 33; printfSurface.SurfaceStateHeapOffset = 0x1FF0; printfSurface.DataParamOffset = 0x3FF0; printfSurface.DataParamSize = 0x1000; pPatchList = &printfSurface; patchListSize = printfSurface.Size; buildAndDecode(); ASSERT_NE(nullptr, pKernelInfo->patchInfo.pAllocateStatelessPrintfSurface); EXPECT_EQ(printfSurface.PrintfSurfaceIndex, pKernelInfo->patchInfo.pAllocateStatelessPrintfSurface->PrintfSurfaceIndex); EXPECT_EQ(printfSurface.SurfaceStateHeapOffset, pKernelInfo->patchInfo.pAllocateStatelessPrintfSurface->SurfaceStateHeapOffset); EXPECT_EQ(printfSurface.DataParamOffset, pKernelInfo->patchInfo.pAllocateStatelessPrintfSurface->DataParamOffset); EXPECT_EQ(printfSurface.DataParamSize, pKernelInfo->patchInfo.pAllocateStatelessPrintfSurface->DataParamSize); } TEST_F(KernelDataTest, PATCH_TOKEN_SAMPLER_STATE_ARRAY) { SPatchSamplerStateArray token; token.Token = PATCH_TOKEN_SAMPLER_STATE_ARRAY; token.Size = static_cast(sizeof(SPatchSamplerStateArray)); token.Offset = 33; token.Count = 0x1FF0; token.BorderColorOffset = 0x3FF0; pPatchList = &token; patchListSize = token.Size; buildAndDecode(); ASSERT_NE(nullptr, pKernelInfo->patchInfo.samplerStateArray); EXPECT_EQ_VAL(token.Offset, pKernelInfo->patchInfo.samplerStateArray->Offset); EXPECT_EQ_VAL(token.Count, pKernelInfo->patchInfo.samplerStateArray->Count); EXPECT_EQ_VAL(token.BorderColorOffset, pKernelInfo->patchInfo.samplerStateArray->BorderColorOffset); } TEST_F(KernelDataTest, PATCH_TOKEN_ALLOCATE_STATELESS_PRIVATE_MEMORY) { SPatchAllocateStatelessPrivateSurface token; token.Token = PATCH_TOKEN_ALLOCATE_STATELESS_PRIVATE_MEMORY; token.Size = static_cast(sizeof(SPatchAllocateStatelessPrivateSurface)); token.SurfaceStateHeapOffset = 64; token.DataParamOffset = 40; token.DataParamSize = 8; token.PerThreadPrivateMemorySize = 112; pPatchList = &token; patchListSize = token.Size; buildAndDecode(); ASSERT_NE(nullptr, pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface); EXPECT_EQ_VAL(token.SurfaceStateHeapOffset, pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface->SurfaceStateHeapOffset); EXPECT_EQ_VAL(token.DataParamOffset, pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface->DataParamOffset); EXPECT_EQ_VAL(token.DataParamSize, pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface->DataParamSize); EXPECT_EQ_VAL(token.PerThreadPrivateMemorySize, pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize); } TEST_F(KernelDataTest, DATA_PARAMETER_VME_MB_BLOCK_TYPE) { uint32_t argumentNumber = 1; uint32_t alignment = 16; uint32_t offsetVmeMbBlockType = 0xaa; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_VME_MB_BLOCK_TYPE; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetVmeMbBlockType; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetVmeMbBlockType, pKernelInfo->kernelArgInfo[argumentNumber].offsetVmeMbBlockType); } TEST_F(KernelDataTest, DATA_PARAMETER_VME_SUBPIXEL_MODE) { uint32_t argumentNumber = 1; uint32_t alignment = 17; uint32_t offsetVmeSubpixelMode = 0xab; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_VME_SUBPIXEL_MODE; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetVmeSubpixelMode; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetVmeSubpixelMode, pKernelInfo->kernelArgInfo[argumentNumber].offsetVmeSubpixelMode); } TEST_F(KernelDataTest, DATA_PARAMETER_VME_SAD_ADJUST_MODE) { uint32_t argumentNumber = 1; uint32_t alignment = 18; uint32_t offsetVmeSadAdjustMode = 0xac; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_VME_SAD_ADJUST_MODE; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetVmeSadAdjustMode; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetVmeSadAdjustMode, pKernelInfo->kernelArgInfo[argumentNumber].offsetVmeSadAdjustMode); } TEST_F(KernelDataTest, DATA_PARAMETER_VME_SEARCH_PATH_TYPE) { uint32_t argumentNumber = 1; uint32_t alignment = 19; uint32_t offsetVmeSearchPathType = 0xad; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_VME_SEARCH_PATH_TYPE; dataParameterToken.ArgumentNumber = argumentNumber; dataParameterToken.Offset = offsetVmeSearchPathType; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = alignment; dataParameterToken.LocationIndex = 0x0; dataParameterToken.LocationIndex2 = 0x0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); ASSERT_EQ(2U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ(offsetVmeSearchPathType, pKernelInfo->kernelArgInfo[argumentNumber].offsetVmeSearchPathType); } TEST_F(KernelDataTest, PATCH_TOKEN_STATE_SIP) { SPatchStateSIP token; token.Token = PATCH_TOKEN_STATE_SIP; token.Size = static_cast(sizeof(SPatchStateSIP)); token.SystemKernelOffset = 33; pPatchList = &token; patchListSize = token.Size; buildAndDecode(); EXPECT_EQ(0U, pKernelInfo->patchInfo.dataParameterBuffersKernelArgs.size()); EXPECT_EQ(0U, pKernelInfo->kernelArgInfo.size()); EXPECT_EQ_VAL(token.SystemKernelOffset, pKernelInfo->systemKernelOffset); } TEST_F(KernelDataTest, PATCH_TOKEN_ALLOCATE_SIP_SURFACE) { SPatchAllocateSystemThreadSurface token; token.Token = PATCH_TOKEN_ALLOCATE_SIP_SURFACE; token.Size = static_cast(sizeof(SPatchAllocateSystemThreadSurface)); token.Offset = 32; token.BTI = 0; token.PerThreadSystemThreadSurfaceSize = 0x10000; pPatchList = &token; patchListSize = token.Size; buildAndDecode(); EXPECT_EQ(0u, pKernelInfo->patchInfo.pAllocateSystemThreadSurface->BTI); EXPECT_EQ(token.Offset, pKernelInfo->patchInfo.pAllocateSystemThreadSurface->Offset); EXPECT_EQ(token.Token, pKernelInfo->patchInfo.pAllocateSystemThreadSurface->Token); EXPECT_EQ(token.PerThreadSystemThreadSurfaceSize, pKernelInfo->patchInfo.pAllocateSystemThreadSurface->PerThreadSystemThreadSurfaceSize); } TEST_F(KernelDataTest, givenSymbolTablePatchTokenThenLinkerInputIsCreated) { SPatchFunctionTableInfo token; token.Token = PATCH_TOKEN_PROGRAM_SYMBOL_TABLE; token.Size = static_cast(sizeof(SPatchFunctionTableInfo)); token.NumEntries = 0; pPatchList = &token; patchListSize = token.Size; buildAndDecode(); EXPECT_NE(nullptr, program->linkerInput); } TEST_F(KernelDataTest, givenRelocationTablePatchTokenThenLinkerInputIsCreated) { SPatchFunctionTableInfo token; token.Token = PATCH_TOKEN_PROGRAM_RELOCATION_TABLE; token.Size = static_cast(sizeof(SPatchFunctionTableInfo)); token.NumEntries = 0; pPatchList = &token; patchListSize = token.Size; buildAndDecode(); EXPECT_NE(nullptr, program->linkerInput); } compute-runtime-20.13.16352/opencl/test/unit_test/program/kernel_data_OCL2_0.cpp000066400000000000000000000172761363734646600272710ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/kernel_data_fixture.h" #include "opencl/test/unit_test/helpers/gtest_helpers.h" TEST_F(KernelDataTest, GIVENpatchTokenAllocateStatelessEventPoolSurfaceWHENdecodeTokensTHENtokenLocatedInPatchInfo) { iOpenCL::SPatchAllocateStatelessEventPoolSurface allocateStatelessEventPoolSurface; allocateStatelessEventPoolSurface.Token = PATCH_TOKEN_ALLOCATE_STATELESS_EVENT_POOL_SURFACE; allocateStatelessEventPoolSurface.Size = sizeof(SPatchAllocateStatelessEventPoolSurface); allocateStatelessEventPoolSurface.DataParamSize = 7; allocateStatelessEventPoolSurface.DataParamOffset = 0xABC; allocateStatelessEventPoolSurface.SurfaceStateHeapOffset = 0xDEF; pPatchList = &allocateStatelessEventPoolSurface; patchListSize = allocateStatelessEventPoolSurface.Size; buildAndDecode(); EXPECT_EQ_VAL(allocateStatelessEventPoolSurface.Token, pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface->Token); EXPECT_EQ_VAL(allocateStatelessEventPoolSurface.DataParamOffset, pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamOffset); EXPECT_EQ_VAL(allocateStatelessEventPoolSurface.DataParamSize, pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface->DataParamSize); EXPECT_EQ_VAL(allocateStatelessEventPoolSurface.SurfaceStateHeapOffset, pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset); } TEST_F(KernelDataTest, GIVENpatchTokenAllocateStatelessDefaultDeviceQueueSurfaceWHENdecodeTokensTHENtokenLocatedInPatchInfo) { iOpenCL::SPatchAllocateStatelessDefaultDeviceQueueSurface allocateStatelessDefaultDeviceQueueSurface; allocateStatelessDefaultDeviceQueueSurface.Token = PATCH_TOKEN_ALLOCATE_STATELESS_DEFAULT_DEVICE_QUEUE_SURFACE; allocateStatelessDefaultDeviceQueueSurface.Size = sizeof(SPatchAllocateStatelessDefaultDeviceQueueSurface); allocateStatelessDefaultDeviceQueueSurface.DataParamSize = 7; allocateStatelessDefaultDeviceQueueSurface.DataParamOffset = 0xABC; allocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset = 0xDEF; pPatchList = &allocateStatelessDefaultDeviceQueueSurface; patchListSize = allocateStatelessDefaultDeviceQueueSurface.Size; buildAndDecode(); EXPECT_EQ(allocateStatelessDefaultDeviceQueueSurface.Token, pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->Token); EXPECT_EQ(allocateStatelessDefaultDeviceQueueSurface.DataParamOffset, pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamOffset); EXPECT_EQ(allocateStatelessDefaultDeviceQueueSurface.DataParamSize, pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->DataParamSize); EXPECT_EQ(allocateStatelessDefaultDeviceQueueSurface.SurfaceStateHeapOffset, pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset); } TEST_F(KernelDataTest, GIVENpatchTokenStatelessDeviceQueueKernelArgumentWHENdecodeTokensTHENapropriateKernelArgInfoFilled) { iOpenCL::SPatchStatelessDeviceQueueKernelArgument deviceQueueKernelArgument; deviceQueueKernelArgument.Token = PATCH_TOKEN_STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT; deviceQueueKernelArgument.Size = sizeof(SPatchStatelessDeviceQueueKernelArgument); deviceQueueKernelArgument.ArgumentNumber = 3; deviceQueueKernelArgument.DataParamSize = 7; deviceQueueKernelArgument.DataParamOffset = 0xABC; deviceQueueKernelArgument.SurfaceStateHeapOffset = 0xDEF; pPatchList = &deviceQueueKernelArgument; patchListSize = deviceQueueKernelArgument.Size; buildAndDecode(); ASSERT_GE(pKernelInfo->kernelArgInfo.size(), size_t(4u)); EXPECT_EQ(pKernelInfo->kernelArgInfo[3].isDeviceQueue, true); EXPECT_EQ(pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[0].crossthreadOffset, deviceQueueKernelArgument.DataParamOffset); EXPECT_EQ(pKernelInfo->kernelArgInfo[3].kernelArgPatchInfoVector[0].size, deviceQueueKernelArgument.DataParamSize); EXPECT_EQ(pKernelInfo->kernelArgInfo[3].offsetHeap, deviceQueueKernelArgument.SurfaceStateHeapOffset); } TEST_F(KernelDataTest, GIVENdataParameterParentEventWHENdecodeTokensTHENoffsetLocatedInWorkloadInfo) { const uint32_t offsetSimdSize = 0xABC; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_PARENT_EVENT; dataParameterToken.ArgumentNumber = 0; dataParameterToken.Offset = offsetSimdSize; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = 0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(pKernelInfo->workloadInfo.parentEventOffset, offsetSimdSize); } TEST_F(KernelDataTest, GIVENdataParameterPreferredWorkgroupMultipleTokenWHENbinaryIsdecodedTHENcorrectOffsetIsAssigned) { const uint32_t offset = 0x100; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_PREFERRED_WORKGROUP_MULTIPLE; dataParameterToken.ArgumentNumber = 0; dataParameterToken.Offset = offset; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = 0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); EXPECT_EQ(pKernelInfo->workloadInfo.preferredWkgMultipleOffset, offset); } TEST_F(KernelDataTest, GIVENdataParameterObjectIdWHENdecodeTokensTHENoffsetLocatedInKernelArgInfo) { const uint32_t offsetObjectId = 0xABC; const uint32_t argNum = 7; SPatchDataParameterBuffer dataParameterToken; dataParameterToken.Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; dataParameterToken.Size = sizeof(SPatchDataParameterBuffer); dataParameterToken.Type = DATA_PARAMETER_OBJECT_ID; dataParameterToken.ArgumentNumber = argNum; dataParameterToken.Offset = offsetObjectId; dataParameterToken.DataSize = sizeof(uint32_t); dataParameterToken.SourceOffset = 0; pPatchList = &dataParameterToken; patchListSize = dataParameterToken.Size; buildAndDecode(); ASSERT_GE(pKernelInfo->kernelArgInfo.size(), size_t(argNum + 1)); EXPECT_EQ(pKernelInfo->kernelArgInfo[argNum].offsetObjectId, offsetObjectId); } TEST_F(KernelDataTest, GIVENdataParameterChildSimdSizeWHENdecodeTokensTHENchildsIdsStoredInKernelInfoWithOffset) { SPatchDataParameterBuffer patchList[3]; uint32_t childrenKernelIds[3] = {7, 14, 21}; uint32_t childrenSimdSizeOffsets[3] = {0x77, 0xAB, 0xCD}; for (int i = 0; i < 3; i++) { patchList[i].Token = PATCH_TOKEN_DATA_PARAMETER_BUFFER; patchList[i].Size = sizeof(SPatchDataParameterBuffer); patchList[i].Type = DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE; patchList[i].ArgumentNumber = childrenKernelIds[i]; patchList[i].Offset = childrenSimdSizeOffsets[i]; patchList[i].DataSize = sizeof(uint32_t); patchList[i].SourceOffset = 0; } pPatchList = patchList; patchListSize = sizeof(patchList); buildAndDecode(); ASSERT_GE(pKernelInfo->childrenKernelsIdOffset.size(), size_t(3u)); for (int i = 0; i < 3; i++) { EXPECT_EQ(pKernelInfo->childrenKernelsIdOffset[i].first, childrenKernelIds[i]); EXPECT_EQ(pKernelInfo->childrenKernelsIdOffset[i].second, childrenSimdSizeOffsets[i]); } } compute-runtime-20.13.16352/opencl/test/unit_test/program/kernel_info_from_patchtokens_tests.cpp000066400000000000000000000313711363734646600331550ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device_binary_format/patchtokens_decoder.h" #include "shared/test/unit_test/device_binary_format/patchtokens_tests.h" #include "opencl/source/program/kernel_info.h" #include "opencl/source/program/kernel_info_from_patchtokens.h" #include "gtest/gtest.h" TEST(KernelInfoFromPatchTokens, GivenValidEmptyKernelFromPatchtokensThenReturnEmptyKernelInfo) { std::vector storage; auto src = PatchTokensTestData::ValidEmptyKernel::create(storage); NEO::KernelInfo dst = {}; NEO::populateKernelInfo(dst, src, 4); NEO::KernelInfo expectedKernelInfo = {}; expectedKernelInfo.name = std::string(src.name.begin()).c_str(); expectedKernelInfo.heapInfo.pKernelHeader = src.header; EXPECT_STREQ(expectedKernelInfo.name.c_str(), dst.name.c_str()); EXPECT_EQ(expectedKernelInfo.heapInfo.pKernelHeader, dst.heapInfo.pKernelHeader); } TEST(KernelInfoFromPatchTokens, GivenValidKernelWithArgThenMetadataIsProperlyPopulated) { PatchTokensTestData::ValidProgramWithKernelAndArg src; NEO::KernelInfo dst = {}; NEO::populateKernelInfo(dst, src.kernels[0], 4); ASSERT_EQ(1U, dst.kernelArgInfo.size()); EXPECT_EQ(NEO::KernelArgMetadata::AccessReadWrite, dst.kernelArgInfo[0].metadata.accessQualifier); EXPECT_EQ(NEO::KernelArgMetadata::AddrGlobal, dst.kernelArgInfo[0].metadata.addressQualifier); NEO::KernelArgMetadata::TypeQualifiers typeQualifiers = {}; typeQualifiers.constQual = true; EXPECT_EQ(typeQualifiers.packed, dst.kernelArgInfo[0].metadata.typeQualifiers.packed); EXPECT_EQ(0U, dst.kernelArgInfo[0].metadata.argByValSize); ASSERT_NE(nullptr, dst.kernelArgInfo[0].metadataExtended); EXPECT_STREQ("__global", dst.kernelArgInfo[0].metadataExtended->addressQualifier.c_str()); EXPECT_STREQ("read_write", dst.kernelArgInfo[0].metadataExtended->accessQualifier.c_str()); EXPECT_STREQ("custom_arg", dst.kernelArgInfo[0].metadataExtended->argName.c_str()); EXPECT_STREQ("int*", dst.kernelArgInfo[0].metadataExtended->type.c_str()); EXPECT_STREQ("const", dst.kernelArgInfo[0].metadataExtended->typeQualifiers.c_str()); } TEST(KernelInfoFromPatchTokens, GivenValidKernelWithImageArgThenArgAccessQualifierIsPopulatedBasedOnArgInfo) { PatchTokensTestData::ValidProgramWithKernelAndArg src; iOpenCL::SPatchImageMemoryObjectKernelArgument imageArg = {}; imageArg.Token = iOpenCL::PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT; imageArg.Writeable = false; src.kernels[0].tokens.kernelArgs[0].objectArg = &imageArg; NEO::KernelInfo dst = {}; NEO::populateKernelInfo(dst, src.kernels[0], 4); ASSERT_EQ(1U, dst.kernelArgInfo.size()); EXPECT_EQ(NEO::KernelArgMetadata::AccessReadWrite, dst.kernelArgInfo[0].metadata.accessQualifier); } TEST(KernelInfoFromPatchTokens, GivenValidKernelWithImageArgWhenArgInfoIsMissingThenArgAccessQualifierIsPopulatedBasedOnImageArgWriteableFlag) { PatchTokensTestData::ValidProgramWithKernelAndArg src; iOpenCL::SPatchImageMemoryObjectKernelArgument imageArg = {}; imageArg.Token = iOpenCL::PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT; src.kernels[0].tokens.kernelArgs[0].objectArg = &imageArg; src.kernels[0].tokens.kernelArgs[0].argInfo = nullptr; { imageArg.Writeable = false; NEO::KernelInfo dst = {}; NEO::populateKernelInfo(dst, src.kernels[0], 4); ASSERT_EQ(1U, dst.kernelArgInfo.size()); EXPECT_EQ(NEO::KernelArgMetadata::AccessReadOnly, dst.kernelArgInfo[0].metadata.accessQualifier); } { imageArg.Writeable = true; NEO::KernelInfo dst = {}; NEO::populateKernelInfo(dst, src.kernels[0], 4); ASSERT_EQ(1U, dst.kernelArgInfo.size()); EXPECT_EQ(NEO::KernelArgMetadata::AccessReadWrite, dst.kernelArgInfo[0].metadata.accessQualifier); } } TEST(KernelInfoFromPatchTokens, GivenValidKernelWithNonDelimitedArgTypeThenUsesArgTypeAsIs) { PatchTokensTestData::ValidProgramWithKernelAndArg src; src.arg0TypeMutable[4] = '*'; NEO::KernelInfo dst = {}; NEO::populateKernelInfo(dst, src.kernels[0], 4); ASSERT_EQ(1U, dst.kernelArgInfo.size()); EXPECT_STREQ("int**", dst.kernelArgInfo[0].metadataExtended->type.c_str()); } TEST(KernelInfoFromPatchTokens, GivenDataParameterStreamWithEmptySizeThenTokenIsIgnored) { std::vector storage; auto src = PatchTokensTestData::ValidEmptyKernel::create(storage); iOpenCL::SPatchDataParameterStream dataParameterStream = {}; src.tokens.dataParameterStream = &dataParameterStream; dataParameterStream.DataParameterStreamSize = 0U; NEO::KernelInfo dst; NEO::populateKernelInfo(dst, src, 4); EXPECT_EQ(nullptr, dst.crossThreadData); } TEST(KernelInfoFromPatchTokens, GivenDataParameterStreamWithNonEmptySizeThenCrossthreadDataIsAllocated) { std::vector storage; auto src = PatchTokensTestData::ValidEmptyKernel::create(storage); iOpenCL::SPatchDataParameterStream dataParameterStream = {}; src.tokens.dataParameterStream = &dataParameterStream; dataParameterStream.DataParameterStreamSize = 256U; NEO::KernelInfo dst; NEO::populateKernelInfo(dst, src, 4); EXPECT_NE(nullptr, dst.crossThreadData); } TEST(KernelInfoFromPatchTokens, GivenDataParameterStreamWhenTokensRequiringDeviceInfoPayloadConstantsArePresentThenCrossthreadDataIsProperlyPatched) { std::vector storage; auto src = PatchTokensTestData::ValidEmptyKernel::create(storage); iOpenCL::SPatchDataParameterStream dataParameterStream = {}; src.tokens.dataParameterStream = &dataParameterStream; dataParameterStream.DataParameterStreamSize = 256U; NEO::DeviceInfoKernelPayloadConstants deviceInfoConstants; deviceInfoConstants.computeUnitsUsedForScratch = 128U; deviceInfoConstants.maxWorkGroupSize = 64U; std::unique_ptr slm = std::make_unique(); deviceInfoConstants.slmWindow = slm.get(); deviceInfoConstants.slmWindowSize = 512U; iOpenCL::SPatchAllocateStatelessPrivateSurface privateSurface = {}; privateSurface.PerThreadPrivateMemorySize = 8U; src.tokens.allocateStatelessPrivateSurface = &privateSurface; iOpenCL::SPatchDataParameterBuffer privateMemorySize = {}; privateMemorySize.Offset = 8U; src.tokens.crossThreadPayloadArgs.privateMemoryStatelessSize = &privateMemorySize; iOpenCL::SPatchDataParameterBuffer localMemoryWindowStartVA = {}; localMemoryWindowStartVA.Offset = 16U; src.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowStartAddress = &localMemoryWindowStartVA; iOpenCL::SPatchDataParameterBuffer localMemoryWindowsSize = {}; localMemoryWindowsSize.Offset = 24U; src.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowSize = &localMemoryWindowsSize; iOpenCL::SPatchDataParameterBuffer maxWorkgroupSize = {}; maxWorkgroupSize.Offset = 32U; src.tokens.crossThreadPayloadArgs.maxWorkGroupSize = &maxWorkgroupSize; NEO::KernelInfo dst; NEO::populateKernelInfo(dst, src, 4); ASSERT_NE(nullptr, dst.crossThreadData); dst.apply(deviceInfoConstants); uint32_t expectedPrivateMemorySize = privateSurface.PerThreadPrivateMemorySize * deviceInfoConstants.computeUnitsUsedForScratch * src.tokens.executionEnvironment->LargestCompiledSIMDSize; EXPECT_EQ(expectedPrivateMemorySize, *reinterpret_cast(dst.crossThreadData + privateMemorySize.Offset)); EXPECT_EQ(deviceInfoConstants.slmWindowSize, *reinterpret_cast(dst.crossThreadData + localMemoryWindowsSize.Offset)); EXPECT_EQ(deviceInfoConstants.maxWorkGroupSize, *reinterpret_cast(dst.crossThreadData + maxWorkgroupSize.Offset)); EXPECT_EQ(reinterpret_cast(deviceInfoConstants.slmWindow), *reinterpret_cast(dst.crossThreadData + localMemoryWindowStartVA.Offset)); } TEST(KernelInfoFromPatchTokens, GivenDataParameterStreamWhenPrivateSurfaceIsNotAllocatedButPrivateSurfaceMemorySizePatchIsNeededThenPatchWithZero) { std::vector storage; auto src = PatchTokensTestData::ValidEmptyKernel::create(storage); iOpenCL::SPatchDataParameterStream dataParameterStream = {}; src.tokens.dataParameterStream = &dataParameterStream; dataParameterStream.DataParameterStreamSize = 256U; NEO::DeviceInfoKernelPayloadConstants deviceInfoConstants; deviceInfoConstants.computeUnitsUsedForScratch = 128U; deviceInfoConstants.maxWorkGroupSize = 64U; std::unique_ptr slm = std::make_unique(); deviceInfoConstants.slmWindow = slm.get(); deviceInfoConstants.slmWindowSize = 512U; iOpenCL::SPatchDataParameterBuffer privateMemorySize = {}; privateMemorySize.Offset = 8U; src.tokens.crossThreadPayloadArgs.privateMemoryStatelessSize = &privateMemorySize; NEO::KernelInfo dst; NEO::populateKernelInfo(dst, src, 4); ASSERT_NE(nullptr, dst.crossThreadData); dst.apply(deviceInfoConstants); uint32_t expectedPrivateMemorySize = 0U; EXPECT_EQ(expectedPrivateMemorySize, *reinterpret_cast(dst.crossThreadData + privateMemorySize.Offset)); } TEST(KernelInfoFromPatchTokens, GivenKernelWithGtpinInfoTokenThenKernelInfoIsProperlyPopulated) { std::vector storage; NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); iOpenCL::SPatchItemHeader gtpinInfo = {}; gtpinInfo.Token = iOpenCL::PATCH_TOKEN_GTPIN_INFO; gtpinInfo.Size = sizeof(iOpenCL::SPatchItemHeader); kernelTokens.tokens.gtpinInfo = >pinInfo; NEO::KernelInfo kernelInfo = {}; NEO::populateKernelInfo(kernelInfo, kernelTokens, sizeof(void *)); EXPECT_NE(nullptr, kernelInfo.igcInfoForGtpin); } TEST(KernelInfoFromPatchTokens, GivenKernelWithGlobalObjectArgThenKernelInfoIsProperlyPopulated) { std::vector storage; NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); iOpenCL::SPatchGlobalMemoryObjectKernelArgument globalMemArg = {}; globalMemArg.Token = iOpenCL::PATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT; globalMemArg.Size = sizeof(iOpenCL::SPatchGlobalMemoryObjectKernelArgument); globalMemArg.ArgumentNumber = 1; globalMemArg.Offset = 0x40; kernelTokens.tokens.kernelArgs.resize(2); kernelTokens.tokens.kernelArgs[1].objectArg = &globalMemArg; NEO::KernelInfo kernelInfo = {}; NEO::populateKernelInfo(kernelInfo, kernelTokens, sizeof(void *)); EXPECT_TRUE(kernelInfo.usesSsh); EXPECT_EQ(1U, kernelInfo.argumentsToPatchNum); ASSERT_EQ(2U, kernelInfo.kernelArgInfo.size()); EXPECT_TRUE(kernelInfo.kernelArgInfo[1].isBuffer); ASSERT_EQ(1U, kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector.size()); EXPECT_EQ(0U, kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(0U, kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].sourceOffset); EXPECT_EQ(0U, kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].size); EXPECT_EQ(globalMemArg.Offset, kernelInfo.kernelArgInfo[1].offsetHeap); } TEST(KernelInfoFromPatchTokens, GivenDefaultModeThenKernelDescriptorIsNotBeingPopulated) { std::vector storage; NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); iOpenCL::SPatchGlobalMemoryObjectKernelArgument globalMemArg = {}; globalMemArg.Token = iOpenCL::PATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT; globalMemArg.Size = sizeof(iOpenCL::SPatchGlobalMemoryObjectKernelArgument); globalMemArg.ArgumentNumber = 1; globalMemArg.Offset = 0x40; kernelTokens.tokens.kernelArgs.resize(2); kernelTokens.tokens.kernelArgs[1].objectArg = &globalMemArg; NEO::KernelInfo kernelInfo = {}; NEO::populateKernelInfo(kernelInfo, kernelTokens, sizeof(void *)); EXPECT_TRUE(kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.empty()); } TEST(KernelInfoFromPatchTokens, WhenUseKernelDescriptorIsEnabledThenKernelDescriptorIsBeingPopulated) { std::vector storage; NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens = PatchTokensTestData::ValidEmptyKernel::create(storage); iOpenCL::SPatchGlobalMemoryObjectKernelArgument globalMemArg = {}; globalMemArg.Token = iOpenCL::PATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT; globalMemArg.Size = sizeof(iOpenCL::SPatchGlobalMemoryObjectKernelArgument); globalMemArg.ArgumentNumber = 1; globalMemArg.Offset = 0x40; kernelTokens.tokens.kernelArgs.resize(2); kernelTokens.tokens.kernelArgs[1].objectArg = &globalMemArg; NEO::KernelInfo kernelInfo = {}; NEO::useKernelDescriptor = true; NEO::populateKernelInfo(kernelInfo, kernelTokens, sizeof(void *)); NEO::useKernelDescriptor = false; EXPECT_FALSE(kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.empty()); } compute-runtime-20.13.16352/opencl/test/unit_test/program/kernel_info_tests.cpp000066400000000000000000000234251363734646600275300ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "opencl/source/memory_manager/os_agnostic_memory_manager.h" #include "opencl/source/program/kernel_arg_info.h" #include "opencl/source/program/kernel_info.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "gtest/gtest.h" #include #include using namespace NEO; TEST(KernelInfo, KernelInfoHasCopyMoveAssingmentDisabled) { static_assert(false == std::is_move_constructible::value, ""); static_assert(false == std::is_copy_constructible::value, ""); static_assert(false == std::is_move_assignable::value, ""); static_assert(false == std::is_copy_assignable::value, ""); } TEST(KernelInfo, whenDefaultConstructedThenUsesSshFlagIsNotSet) { KernelInfo kernelInfo; EXPECT_FALSE(kernelInfo.usesSsh); } TEST(KernelInfo, decodeConstantMemoryKernelArgument) { uint32_t argumentNumber = 0; auto pKernelInfo = std::make_unique(); SPatchStatelessConstantMemoryObjectKernelArgument arg; arg.Token = 0xa; arg.Size = 0x20; arg.ArgumentNumber = argumentNumber; arg.SurfaceStateHeapOffset = 0x30; arg.DataParamOffset = 0x40; arg.DataParamSize = 0x4; arg.LocationIndex = static_cast(-1); arg.LocationIndex2 = static_cast(-1); pKernelInfo->storeKernelArgument(&arg); EXPECT_TRUE(pKernelInfo->usesSsh); const auto &argInfo = pKernelInfo->kernelArgInfo[argumentNumber]; EXPECT_EQ(arg.SurfaceStateHeapOffset, argInfo.offsetHeap); EXPECT_FALSE(argInfo.isImage); const auto &patchInfo = pKernelInfo->patchInfo; EXPECT_EQ(1u, patchInfo.statelessGlobalMemObjKernelArgs.size()); } TEST(KernelInfo, decodeGlobalMemoryKernelArgument) { uint32_t argumentNumber = 1; auto pKernelInfo = std::make_unique(); SPatchStatelessGlobalMemoryObjectKernelArgument arg; arg.Token = 0xb; arg.Size = 0x30; arg.ArgumentNumber = argumentNumber; arg.SurfaceStateHeapOffset = 0x40; arg.DataParamOffset = 050; arg.DataParamSize = 0x8; arg.LocationIndex = static_cast(-1); arg.LocationIndex2 = static_cast(-1); pKernelInfo->storeKernelArgument(&arg); EXPECT_TRUE(pKernelInfo->usesSsh); const auto &argInfo = pKernelInfo->kernelArgInfo[argumentNumber]; EXPECT_EQ(arg.SurfaceStateHeapOffset, argInfo.offsetHeap); EXPECT_FALSE(argInfo.isImage); const auto &patchInfo = pKernelInfo->patchInfo; EXPECT_EQ(1u, patchInfo.statelessGlobalMemObjKernelArgs.size()); } TEST(KernelInfo, decodeImageKernelArgument) { uint32_t argumentNumber = 1; auto pKernelInfo = std::make_unique(); SPatchImageMemoryObjectKernelArgument arg; arg.Token = 0xc; arg.Size = 0x20; arg.ArgumentNumber = argumentNumber; arg.Type = 0x4; arg.Offset = 0x40; arg.LocationIndex = static_cast(-1); arg.LocationIndex2 = static_cast(-1); arg.Writeable = true; pKernelInfo->storeKernelArgument(&arg); EXPECT_TRUE(pKernelInfo->usesSsh); const auto &argInfo = pKernelInfo->kernelArgInfo[argumentNumber]; EXPECT_EQ(sizeof(cl_mem), static_cast(argInfo.metadata.argByValSize)); EXPECT_EQ(arg.Offset, argInfo.offsetHeap); EXPECT_TRUE(argInfo.isImage); EXPECT_EQ(KernelArgMetadata::AccessReadWrite, argInfo.metadata.accessQualifier); EXPECT_TRUE(argInfo.metadata.typeQualifiers.empty()); } TEST(KernelInfoTest, givenKernelInfoWhenCreateKernelAllocationThenCopyWholeKernelHeapToKernelAllocation) { KernelInfo kernelInfo; MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); OsAgnosticMemoryManager memoryManager(executionEnvironment); SKernelBinaryHeaderCommon kernelHeader; const size_t heapSize = 0x40; char heap[heapSize]; kernelHeader.KernelHeapSize = heapSize; kernelInfo.heapInfo.pKernelHeader = &kernelHeader; kernelInfo.heapInfo.pKernelHeap = &heap; for (size_t i = 0; i < heapSize; i++) { heap[i] = static_cast(i); } auto retVal = kernelInfo.createKernelAllocation(0, &memoryManager); EXPECT_TRUE(retVal); auto allocation = kernelInfo.kernelAllocation; EXPECT_EQ(0, memcmp(allocation->getUnderlyingBuffer(), heap, heapSize)); EXPECT_EQ(heapSize, allocation->getUnderlyingBufferSize()); memoryManager.checkGpuUsageAndDestroyGraphicsAllocations(allocation); } class MyMemoryManager : public OsAgnosticMemoryManager { public: using OsAgnosticMemoryManager::OsAgnosticMemoryManager; GraphicsAllocation *allocate32BitGraphicsMemoryImpl(const AllocationData &allocationData) override { return nullptr; } }; TEST(KernelInfoTest, givenKernelInfoWhenCreateKernelAllocationAndCannotAllocateMemoryThenReturnsFalse) { KernelInfo kernelInfo; MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MyMemoryManager memoryManager(executionEnvironment); SKernelBinaryHeaderCommon kernelHeader; kernelInfo.heapInfo.pKernelHeader = &kernelHeader; auto retVal = kernelInfo.createKernelAllocation(0, &memoryManager); EXPECT_FALSE(retVal); } TEST(KernelInfo, decodeGlobalMemObjectKernelArgument) { uint32_t argumentNumber = 1; auto pKernelInfo = std::make_unique(); SPatchGlobalMemoryObjectKernelArgument arg; arg.Token = 0xb; arg.Size = 0x10; arg.ArgumentNumber = argumentNumber; arg.Offset = 0x40; arg.LocationIndex = static_cast(-1); arg.LocationIndex2 = static_cast(-1); pKernelInfo->storeKernelArgument(&arg); EXPECT_TRUE(pKernelInfo->usesSsh); const auto &argInfo = pKernelInfo->kernelArgInfo[argumentNumber]; EXPECT_EQ(arg.Offset, argInfo.offsetHeap); EXPECT_TRUE(argInfo.isBuffer); } TEST(KernelInfo, decodeSamplerKernelArgument) { uint32_t argumentNumber = 1; auto pKernelInfo = std::make_unique(); SPatchSamplerKernelArgument arg; arg.ArgumentNumber = argumentNumber; arg.Token = 0x10; arg.Size = 0x18; arg.LocationIndex = static_cast(-1); arg.LocationIndex2 = static_cast(-1); arg.Offset = 0x40; arg.Type = iOpenCL::SAMPLER_OBJECT_TEXTURE; pKernelInfo->usesSsh = true; pKernelInfo->storeKernelArgument(&arg); const auto &argInfo = pKernelInfo->kernelArgInfo[argumentNumber]; EXPECT_EQ(arg.Offset, argInfo.offsetHeap); EXPECT_FALSE(argInfo.isImage); EXPECT_TRUE(argInfo.isSampler); EXPECT_TRUE(pKernelInfo->usesSsh); } TEST(KernelInfo, whenStoringArgInfoThenMetadataIsProperlyPopulated) { KernelInfo kernelInfo; NEO::ArgTypeTraits metadata; metadata.accessQualifier = NEO::KernelArgMetadata::AccessWriteOnly; metadata.addressQualifier = NEO::KernelArgMetadata::AddrGlobal; metadata.argByValSize = sizeof(void *); metadata.typeQualifiers.pipeQual = true; auto metadataExtended = std::make_unique(); auto metadataExtendedPtr = metadataExtended.get(); kernelInfo.storeArgInfo(2, metadata, std::move(metadataExtended)); ASSERT_EQ(3U, kernelInfo.kernelArgInfo.size()); EXPECT_EQ(metadata.accessQualifier, kernelInfo.kernelArgInfo[2].metadata.accessQualifier); EXPECT_EQ(metadata.addressQualifier, kernelInfo.kernelArgInfo[2].metadata.addressQualifier); EXPECT_EQ(metadata.argByValSize, kernelInfo.kernelArgInfo[2].metadata.argByValSize); EXPECT_EQ(metadata.typeQualifiers.packed, kernelInfo.kernelArgInfo[2].metadata.typeQualifiers.packed); EXPECT_EQ(metadataExtendedPtr, kernelInfo.kernelArgInfo[2].metadataExtended.get()); } TEST(KernelInfo, givenKernelInfoWhenStoreTransformableArgThenArgInfoIsTransformable) { uint32_t argumentNumber = 1; auto kernelInfo = std::make_unique(); SPatchImageMemoryObjectKernelArgument arg; arg.ArgumentNumber = argumentNumber; arg.Transformable = true; kernelInfo->storeKernelArgument(&arg); const auto &argInfo = kernelInfo->kernelArgInfo[argumentNumber]; EXPECT_TRUE(argInfo.isTransformable); } TEST(KernelInfo, givenKernelInfoWhenStoreNonTransformableArgThenArgInfoIsNotTransformable) { uint32_t argumentNumber = 1; auto kernelInfo = std::make_unique(); SPatchImageMemoryObjectKernelArgument arg; arg.ArgumentNumber = argumentNumber; arg.Transformable = false; kernelInfo->storeKernelArgument(&arg); const auto &argInfo = kernelInfo->kernelArgInfo[argumentNumber]; EXPECT_FALSE(argInfo.isTransformable); } using KernelInfoMultiRootDeviceTests = MultiRootDeviceFixture; TEST_F(KernelInfoMultiRootDeviceTests, kernelAllocationHasCorrectRootDeviceIndex) { KernelInfo kernelInfo; SKernelBinaryHeaderCommon kernelHeader; const size_t heapSize = 0x40; char heap[heapSize]; kernelHeader.KernelHeapSize = heapSize; kernelInfo.heapInfo.pKernelHeader = &kernelHeader; kernelInfo.heapInfo.pKernelHeap = &heap; auto retVal = kernelInfo.createKernelAllocation(expectedRootDeviceIndex, mockMemoryManager); EXPECT_TRUE(retVal); auto allocation = kernelInfo.kernelAllocation; ASSERT_NE(nullptr, allocation); EXPECT_EQ(expectedRootDeviceIndex, allocation->getRootDeviceIndex()); mockMemoryManager->checkGpuUsageAndDestroyGraphicsAllocations(allocation); } TEST(KernelInfo, whenGetKernelNamesStringIsCalledThenNamesAreProperlyConcatenated) { ExecutionEnvironment execEnv; KernelInfo kernel1 = {}; kernel1.name = "kern1"; KernelInfo kernel2 = {}; kernel2.name = "kern2"; std::vector kernelInfoArray; kernelInfoArray.push_back(&kernel1); kernelInfoArray.push_back(&kernel2); EXPECT_STREQ("kern1;kern2", concatenateKernelNames(kernelInfoArray).c_str()); } compute-runtime-20.13.16352/opencl/test/unit_test/program/printf_handler_tests.cpp000066400000000000000000000202731363734646600302320ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/program/printf_handler.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "gtest/gtest.h" using namespace NEO; TEST(PrintfHandlerTest, givenNotPreparedPrintfHandlerWhenGetSurfaceIsCalledThenResultIsNullptr) { MockClDevice *device = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}; MockContext context; SPatchAllocateStatelessPrintfSurface *pPrintfSurface = new SPatchAllocateStatelessPrintfSurface(); pPrintfSurface->DataParamOffset = 0; pPrintfSurface->DataParamSize = 8; auto pKernelInfo = std::make_unique(); pKernelInfo->patchInfo.pAllocateStatelessPrintfSurface = pPrintfSurface; MockProgram *pProgram = new MockProgram(*device->getExecutionEnvironment(), &context, false, &device->getDevice()); MockKernel *pKernel = new MockKernel(pProgram, *pKernelInfo, *device); MockMultiDispatchInfo multiDispatchInfo(pKernel); PrintfHandler *printfHandler = PrintfHandler::create(multiDispatchInfo, *device); EXPECT_EQ(nullptr, printfHandler->getSurface()); delete printfHandler; delete pPrintfSurface; delete pKernel; delete pProgram; delete device; } TEST(PrintfHandlerTest, givenPreparedPrintfHandlerWhenGetSurfaceIsCalledThenResultIsNullptr) { MockClDevice *device = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}; MockContext context; SPatchAllocateStatelessPrintfSurface *pPrintfSurface = new SPatchAllocateStatelessPrintfSurface(); pPrintfSurface->DataParamOffset = 0; pPrintfSurface->DataParamSize = 8; auto pKernelInfo = std::make_unique(); pKernelInfo->patchInfo.pAllocateStatelessPrintfSurface = pPrintfSurface; MockProgram *pProgram = new MockProgram(*device->getExecutionEnvironment(), &context, false, &device->getDevice()); uint64_t crossThread[10]; MockKernel *pKernel = new MockKernel(pProgram, *pKernelInfo, *device); pKernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8); MockMultiDispatchInfo multiDispatchInfo(pKernel); PrintfHandler *printfHandler = PrintfHandler::create(multiDispatchInfo, *device); printfHandler->prepareDispatch(multiDispatchInfo); EXPECT_NE(nullptr, printfHandler->getSurface()); delete printfHandler; delete pPrintfSurface; delete pKernel; delete pProgram; delete device; } TEST(PrintfHandlerTest, givenParentKernelWihoutPrintfAndBlockKernelWithPrintfWhenPrintfHandlerCreateCalledThenResaultIsAnObject) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext context(device.get()); std::unique_ptr parentKernelWithoutPrintf(MockParentKernel::create(context, false, false, false, false)); MockMultiDispatchInfo multiDispatchInfo(parentKernelWithoutPrintf.get()); std::unique_ptr printfHandler(PrintfHandler::create(multiDispatchInfo, *device)); ASSERT_NE(nullptr, printfHandler.get()); } TEST(PrintfHandlerTest, givenParentKernelAndBlockKernelWithoutPrintfWhenPrintfHandlerCreateCalledThenResaultIsNullptr) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext context(device.get()); std::unique_ptr blockKernelWithoutPrintf(MockParentKernel::create(context, false, false, false, false, false)); MockMultiDispatchInfo multiDispatchInfo(blockKernelWithoutPrintf.get()); std::unique_ptr printfHandler(PrintfHandler::create(multiDispatchInfo, *device)); ASSERT_EQ(nullptr, printfHandler.get()); } TEST(PrintfHandlerTest, givenParentKernelWithPrintfAndBlockKernelWithoutPrintfWhenPrintfHandlerCreateCalledThenResaultIsAnObject) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockContext context(device.get()); std::unique_ptr parentKernelWithPrintfBlockKernelWithoutPrintf(MockParentKernel::create(context, false, false, false, true, false)); MockMultiDispatchInfo multiDispatchInfo(parentKernelWithPrintfBlockKernelWithoutPrintf.get()); std::unique_ptr printfHandler(PrintfHandler::create(multiDispatchInfo, *device)); ASSERT_NE(nullptr, printfHandler); } TEST(PrintfHandlerTest, givenMultiDispatchInfoWithMultipleKernelsWhenCreatingAndDispatchingPrintfHandlerThenPickMainKernel) { MockContext context; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto program = std::make_unique(*device->getExecutionEnvironment(), &context, false, &device->getDevice()); auto mainKernelInfo = std::make_unique(); auto kernelInfo = std::make_unique(); auto printfSurface = std::make_unique(); printfSurface->DataParamOffset = 0; printfSurface->DataParamSize = 8; mainKernelInfo->patchInfo.pAllocateStatelessPrintfSurface = printfSurface.get(); uint64_t crossThread[8]; auto mainKernel = std::make_unique(program.get(), *mainKernelInfo, *device); auto kernel1 = std::make_unique(program.get(), *kernelInfo, *device); auto kernel2 = std::make_unique(program.get(), *kernelInfo, *device); mainKernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8); DispatchInfo mainDispatchInfo(mainKernel.get(), 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}); DispatchInfo dispatchInfo1(kernel1.get(), 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}); DispatchInfo dispatchInfo2(kernel2.get(), 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}); MultiDispatchInfo multiDispatchInfo(mainKernel.get()); multiDispatchInfo.push(dispatchInfo1); multiDispatchInfo.push(mainDispatchInfo); multiDispatchInfo.push(dispatchInfo2); std::unique_ptr printfHandler(PrintfHandler::create(multiDispatchInfo, *device)); ASSERT_NE(nullptr, printfHandler.get()); printfHandler->prepareDispatch(multiDispatchInfo); EXPECT_NE(nullptr, printfHandler->getSurface()); } TEST(PrintfHandlerTest, GivenEmptyMultiDispatchInfoWhenCreatingPrintfHandlerThenPrintfHandlerIsNotCreated) { MockClDevice device{new MockDevice}; MockKernelWithInternals mockKernelWithInternals{device}; MockMultiDispatchInfo multiDispatchInfo{mockKernelWithInternals.mockKernel}; multiDispatchInfo.dispatchInfos.resize(0); EXPECT_EQ(nullptr, multiDispatchInfo.peekMainKernel()); auto printfHandler = PrintfHandler::create(multiDispatchInfo, device); EXPECT_EQ(nullptr, printfHandler); } using PrintfHandlerMultiRootDeviceTests = MultiRootDeviceFixture; TEST_F(PrintfHandlerMultiRootDeviceTests, printfSurfaceHasCorrectRootDeviceIndex) { auto printfSurface = std::make_unique(); printfSurface->DataParamOffset = 0; printfSurface->DataParamSize = 8; auto kernelInfo = std::make_unique(); kernelInfo->patchInfo.pAllocateStatelessPrintfSurface = printfSurface.get(); auto program = std::make_unique(*device->getExecutionEnvironment(), context.get(), false, &device->getDevice()); uint64_t crossThread[10]; auto kernel = std::make_unique(program.get(), *kernelInfo, *device); kernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8); MockMultiDispatchInfo multiDispatchInfo(kernel.get()); std::unique_ptr printfHandler(PrintfHandler::create(multiDispatchInfo, *device)); printfHandler->prepareDispatch(multiDispatchInfo); auto surface = printfHandler->getSurface(); ASSERT_NE(nullptr, surface); EXPECT_EQ(expectedRootDeviceIndex, surface->getRootDeviceIndex()); } compute-runtime-20.13.16352/opencl/test/unit_test/program/printf_helper_tests.cpp000066400000000000000000000676731363734646600301130ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/string.h" #include "shared/source/program/print_formatter.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "gtest/gtest.h" #include using namespace NEO; using namespace iOpenCL; // -------------------- Base Fixture ------------------------ class PrintFormatterTest : public testing::Test { public: std::unique_ptr printFormatter; std::string format; uint8_t buffer; MockGraphicsAllocation *data; MockKernel *kernel; std::unique_ptr program; std::unique_ptr kernelInfo; ClDevice *device; uint8_t underlyingBuffer[PrintFormatter::maxPrintfOutputLength]; uint32_t offset; int maxStringIndex; protected: void SetUp() override { offset = 4; maxStringIndex = 0; data = new MockGraphicsAllocation(underlyingBuffer, PrintFormatter::maxPrintfOutputLength); kernelInfo = std::make_unique(); device = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}; program = std::make_unique(*device->getExecutionEnvironment()); kernel = new MockKernel(program.get(), *kernelInfo, *device); printFormatter = std::unique_ptr(new PrintFormatter(static_cast(data->getUnderlyingBuffer()), PrintFormatter::maxPrintfOutputLength, is32bit, kernelInfo->patchInfo.stringDataMap)); underlyingBuffer[0] = 0; underlyingBuffer[1] = 0; underlyingBuffer[2] = 0; underlyingBuffer[3] = 0; } void TearDown() override { delete data; delete kernel; delete device; } enum class PRINTF_DATA_TYPE : int { INVALID, BYTE, SHORT, INT, FLOAT, STRING, LONG, POINTER, DOUBLE, VECTOR_BYTE, VECTOR_SHORT, VECTOR_INT, VECTOR_LONG, VECTOR_FLOAT, VECTOR_DOUBLE }; PRINTF_DATA_TYPE getPrintfDataType(int8_t value) { return PRINTF_DATA_TYPE::BYTE; }; PRINTF_DATA_TYPE getPrintfDataType(uint8_t value) { return PRINTF_DATA_TYPE::BYTE; }; PRINTF_DATA_TYPE getPrintfDataType(int16_t value) { return PRINTF_DATA_TYPE::SHORT; }; PRINTF_DATA_TYPE getPrintfDataType(uint16_t value) { return PRINTF_DATA_TYPE::SHORT; }; PRINTF_DATA_TYPE getPrintfDataType(int32_t value) { return PRINTF_DATA_TYPE::INT; }; PRINTF_DATA_TYPE getPrintfDataType(uint32_t value) { return PRINTF_DATA_TYPE::INT; }; PRINTF_DATA_TYPE getPrintfDataType(int64_t value) { return PRINTF_DATA_TYPE::LONG; }; PRINTF_DATA_TYPE getPrintfDataType(uint64_t value) { return PRINTF_DATA_TYPE::LONG; }; PRINTF_DATA_TYPE getPrintfDataType(float value) { return PRINTF_DATA_TYPE::FLOAT; }; PRINTF_DATA_TYPE getPrintfDataType(double value) { return PRINTF_DATA_TYPE::DOUBLE; }; PRINTF_DATA_TYPE getPrintfDataType(char *value) { return PRINTF_DATA_TYPE::STRING; }; template void injectValue(T value) { storeData(getPrintfDataType(value)); storeData(value); } void injectStringValue(int value) { storeData(PRINTF_DATA_TYPE::STRING); storeData(value); } template void storeData(T value) { T *valuePointer = reinterpret_cast(underlyingBuffer + offset); if (isAligned(valuePointer)) *valuePointer = value; else { memcpy_s(valuePointer, sizeof(underlyingBuffer) - offset, &value, sizeof(T)); } offset += sizeof(T); // first four bytes always store the size uint32_t *pointer = reinterpret_cast(underlyingBuffer); *pointer = offset; } int injectFormatString(std::string str) { size_t strSize = str.length() + 1; SPatchString printfString; printfString.Token = PATCH_TOKEN_STRING; printfString.Size = static_cast(sizeof(SPatchString) + strSize); printfString.Index = maxStringIndex++; printfString.StringSize = static_cast(strSize); cl_char *pPrintfString = new cl_char[printfString.Size]; memcpy_s(pPrintfString, sizeof(SPatchString), &printfString, sizeof(SPatchString)); memcpy_s((cl_char *)pPrintfString + sizeof(printfString), strSize, str.c_str(), strSize); kernelInfo->storePatchToken(reinterpret_cast(pPrintfString)); delete[] pPrintfString; return printfString.Index; } }; // for tests printing a single value template struct SingleValueTestParam { std::string format; T value; }; typedef SingleValueTestParam Int8Params; typedef SingleValueTestParam Uint8Params; typedef SingleValueTestParam Int16Params; typedef SingleValueTestParam Uint16Params; typedef SingleValueTestParam Int32Params; typedef SingleValueTestParam Uint32Params; typedef SingleValueTestParam Int64Params; typedef SingleValueTestParam Uint64Params; typedef SingleValueTestParam FloatParams; typedef SingleValueTestParam DoubleParams; typedef SingleValueTestParam StringParams; Int8Params byteValues[] = { {"%c", 'a'}, }; class PrintfInt8Test : public PrintFormatterTest, public ::testing::WithParamInterface {}; TEST_P(PrintfInt8Test, GivenPrintfFormatWhenConatinsIntThenInstertValueIntoString) { auto input = GetParam(); auto stringIndex = injectFormatString(input.format); storeData(stringIndex); injectValue(input.value); char referenceOutput[PrintFormatter::maxPrintfOutputLength]; char actualOutput[PrintFormatter::maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, PrintFormatter::maxPrintfOutputLength, str, PrintFormatter::maxPrintfOutputLength); }); snprintf(referenceOutput, sizeof(referenceOutput), input.format.c_str(), input.value); EXPECT_STREQ(referenceOutput, actualOutput); } INSTANTIATE_TEST_CASE_P(PrintfInt8Test, PrintfInt8Test, ::testing::ValuesIn(byteValues)); Int32Params intValues[] = { {"%d", 0}, {"%d", 1}, {"%d", -1}, {"%d", INT32_MAX}, {"%d", INT32_MIN}, {"%5d", 10}, {"%-5d", 10}, {"%05d", 10}, {"%+5d", 10}, {"%-+5d", 10}, {"%.5i", 100}, {"%6.5i", 100}, {"%-06i", 100}, {"%06.5i", 100}}; class PrintfInt32Test : public PrintFormatterTest, public ::testing::WithParamInterface {}; TEST_P(PrintfInt32Test, GivenPrintfFormatWhenConatinsIntThenInstertValueIntoString) { auto input = GetParam(); auto stringIndex = injectFormatString(input.format); storeData(stringIndex); injectValue(input.value); char referenceOutput[PrintFormatter::maxPrintfOutputLength]; char actualOutput[PrintFormatter::maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, PrintFormatter::maxPrintfOutputLength, str, PrintFormatter::maxPrintfOutputLength); }); snprintf(referenceOutput, sizeof(referenceOutput), input.format.c_str(), input.value); EXPECT_STREQ(referenceOutput, actualOutput); } INSTANTIATE_TEST_CASE_P(PrintfInt32Test, PrintfInt32Test, ::testing::ValuesIn(intValues)); Uint32Params uintValues[] = { {"%u", 0}, {"%u", 1}, {"%u", UINT32_MAX}, {"%.0u", 0}, // octal {"%o", 10}, {"%.5o", 10}, {"%#o", 100000000}, {"%04.5o", 10}, // hexadecimal {"%#x", 0xABCDEF}, {"%#X", 0xABCDEF}, {"%#X", 0}, {"%8x", 399}, {"%04x", 399}}; class PrintfUint32Test : public PrintFormatterTest, public ::testing::WithParamInterface {}; TEST_P(PrintfUint32Test, GivenPrintfFormatWhenConatinsUintThenInstertValueIntoString) { auto input = GetParam(); auto stringIndex = injectFormatString(input.format); storeData(stringIndex); injectValue(input.value); char referenceOutput[PrintFormatter::maxPrintfOutputLength]; char actualOutput[PrintFormatter::maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, PrintFormatter::maxPrintfOutputLength, str, PrintFormatter::maxPrintfOutputLength); }); snprintf(referenceOutput, sizeof(referenceOutput), input.format.c_str(), input.value); EXPECT_STREQ(referenceOutput, actualOutput); } INSTANTIATE_TEST_CASE_P(PrintfUint32Test, PrintfUint32Test, ::testing::ValuesIn(uintValues)); FloatParams floatValues[] = { {"%f", 10.3456f}, {"%.1f", 10.3456f}, {"%.2f", 10.3456f}, {"%8.3f", 10.3456f}, {"%08.2f", 10.3456f}, {"%-8.2f", 10.3456f}, {"%+8.2f", -10.3456f}, {"%.0f", 0.1f}, {"%.0f", 0.6f}, {"%0f", 0.6f}, }; class PrintfFloatTest : public PrintFormatterTest, public ::testing::WithParamInterface {}; TEST_P(PrintfFloatTest, GivenPrintfFormatWhenConatinsFloatThenInstertValueIntoString) { auto input = GetParam(); auto stringIndex = injectFormatString(input.format); storeData(stringIndex); injectValue(input.value); char referenceOutput[PrintFormatter::maxPrintfOutputLength]; char actualOutput[PrintFormatter::maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, PrintFormatter::maxPrintfOutputLength, str, PrintFormatter::maxPrintfOutputLength); }); snprintf(referenceOutput, sizeof(referenceOutput), input.format.c_str(), input.value); EXPECT_STREQ(referenceOutput, actualOutput); } INSTANTIATE_TEST_CASE_P(PrintfFloatTest, PrintfFloatTest, ::testing::ValuesIn(floatValues)); class PrintfDoubleToFloatTest : public PrintFormatterTest, public ::testing::WithParamInterface {}; DoubleParams doubleToFloatValues[] = { {"%f", 10.3456}, {"%.1f", 10.3456}, {"%.2f", 10.3456}, {"%8.3f", 10.3456}, {"%08.2f", 10.3456}, {"%-8.2f", 10.3456}, {"%+8.2f", -10.3456}, {"%.0f", 0.1}, {"%0f", 0.6}, {"%4g", 12345.6789}, {"%4.2g", 12345.6789}, {"%4G", 0.0000023}, {"%4G", 0.023}, {"%-#20.15e", 19456120.0}, {"%+#21.15E", 19456120.0}, {"%.6a", 0.1}, {"%10.2a", 9990.235}}; TEST_P(PrintfDoubleToFloatTest, GivenPrintfFormatWhenConatinsFloatFormatAndDoubleValueThenInstertValueIntoString) { auto input = GetParam(); auto stringIndex = injectFormatString(input.format); storeData(stringIndex); injectValue(input.value); char referenceOutput[PrintFormatter::maxPrintfOutputLength]; char actualOutput[PrintFormatter::maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, PrintFormatter::maxPrintfOutputLength, str, PrintFormatter::maxPrintfOutputLength); }); snprintf(referenceOutput, sizeof(referenceOutput), input.format.c_str(), input.value); EXPECT_STREQ(referenceOutput, actualOutput); } INSTANTIATE_TEST_CASE_P(PrintfDoubleToFloatTest, PrintfDoubleToFloatTest, ::testing::ValuesIn(doubleToFloatValues)); DoubleParams doubleValues[] = { {"%f", 302230.12156260}, {"%+f", 2937289102.1562}, {"% #F", (double)-1254}, {"%6.2f", 0.1562}, {"%06.2f", -0.1562}, {"%e", 0.1562}, {"%E", -1254.0001001}, {"%+.10e", 0.1562000102241}, {"% E", (double)1254}, {"%10.2e", 100230.1562}, {"%g", 74010.00001562}, {"%G", -1254.0001001}, {"%+g", 325001.00001562}, {"%+#G", -1254.0001001}, {"%8.2g", 19.844}, {"%1.5G", -1.1}, {"%.13a", 1890.00001562}, {"%.13A", -1254.0001001}, }; class PrintfDoubleTest : public PrintFormatterTest, public ::testing::WithParamInterface {}; TEST_P(PrintfDoubleTest, GivenPrintfFormatWhenConatinsDoubleThenInstertValueIntoString) { auto input = GetParam(); auto stringIndex = injectFormatString(input.format); storeData(stringIndex); injectValue(input.value); char referenceOutput[PrintFormatter::maxPrintfOutputLength]; char actualOutput[PrintFormatter::maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, PrintFormatter::maxPrintfOutputLength, str, PrintFormatter::maxPrintfOutputLength); }); if (input.format[input.format.length() - 1] == 'F') input.format[input.format.length() - 1] = 'f'; snprintf(referenceOutput, sizeof(referenceOutput), input.format.c_str(), input.value); EXPECT_STREQ(referenceOutput, actualOutput); } INSTANTIATE_TEST_CASE_P(PrintfDoubleTest, PrintfDoubleTest, ::testing::ValuesIn(doubleValues)); std::pair specialValues[] = { {"%%", "%"}, {"nothing%", "nothing"}, }; class PrintfSpecialTest : public PrintFormatterTest, public ::testing::WithParamInterface> {}; TEST_P(PrintfSpecialTest, DoublePercentageIntoOne) { auto input = GetParam(); auto stringIndex = injectFormatString(input.first); storeData(stringIndex); char actualOutput[PrintFormatter::maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, PrintFormatter::maxPrintfOutputLength, str, PrintFormatter::maxPrintfOutputLength); }); EXPECT_STREQ(input.second.c_str(), actualOutput); } INSTANTIATE_TEST_CASE_P(PrintfSpecialTest, PrintfSpecialTest, ::testing::ValuesIn(specialValues)); // ------------------------- Testing Strings only with no Formatting ------------------------ class PrintfNoArgumentsTest : public PrintFormatterTest, public ::testing::WithParamInterface> {}; // escape/non-escaped strings are specified manually to avoid converting them in code // automatic code would have to do the same thing it is testing and therefore would be prone to mistakes // this is needed because compiler doesn't escape the format strings and provides them exactly as they were typed in kernel source std::pair stringValues[] = { {R"(test)", "test"}, {R"(test\n)", "test\n"}, }; TEST_P(PrintfNoArgumentsTest, GivenPrintfFormatWhenNoArgumentsThenEscapeChars) { auto input = GetParam(); auto stringIndex = injectFormatString(input.first); storeData(stringIndex); char actualOutput[PrintFormatter::maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, PrintFormatter::maxPrintfOutputLength, str, PrintFormatter::maxPrintfOutputLength); }); EXPECT_STREQ(input.second.c_str(), actualOutput); } INSTANTIATE_TEST_CASE_P(PrintfNoArgumentsTest, PrintfNoArgumentsTest, ::testing::ValuesIn(stringValues)); StringParams stringValues2[] = { {"%s", "foo"}, }; class PrintfStringTest : public PrintFormatterTest, public ::testing::WithParamInterface {}; TEST_P(PrintfStringTest, GivenPrintfFormatWhenStringArgumentThenInsertValue) { auto input = GetParam(); auto stringIndex = injectFormatString(input.format); storeData(stringIndex); auto inputIndex = injectFormatString(input.value); injectStringValue(inputIndex); char referenceOutput[PrintFormatter::maxPrintfOutputLength]; char actualOutput[PrintFormatter::maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, PrintFormatter::maxPrintfOutputLength, str, PrintFormatter::maxPrintfOutputLength); }); snprintf(referenceOutput, sizeof(referenceOutput), input.format.c_str(), input.value.c_str()); EXPECT_STREQ(input.value.c_str(), actualOutput); } INSTANTIATE_TEST_CASE_P(PrintfStringTest, PrintfStringTest, ::testing::ValuesIn(stringValues2)); TEST_F(PrintFormatterTest, GivenPrintfFormatWhenStringArgumentButNullTokenThenPrintNull) { auto stringIndex = injectFormatString("%s"); storeData(stringIndex); storeData(PRINTF_DATA_TYPE::VECTOR_INT); storeData(0); char actualOutput[PrintFormatter::maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, PrintFormatter::maxPrintfOutputLength, str, PrintFormatter::maxPrintfOutputLength); }); EXPECT_STREQ("(null)", actualOutput); } // ----------------------- Vector channel count --------------------------------- TEST_F(PrintFormatterTest, GivenPrintfFormatWhenVector2ThenInsertAllValues) { int channelCount = 2; auto stringIndex = injectFormatString("%v2d"); storeData(stringIndex); storeData(PRINTF_DATA_TYPE::VECTOR_INT); // channel count storeData(channelCount); // channel values for (int i = 0; i < channelCount; i++) storeData(i + 1); char actualOutput[PrintFormatter::maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, PrintFormatter::maxPrintfOutputLength, str, PrintFormatter::maxPrintfOutputLength); }); EXPECT_STREQ("1,2", actualOutput); } TEST_F(PrintFormatterTest, GivenPrintfFormatWhenVector4ThenInsertAllValues) { int channelCount = 4; auto stringIndex = injectFormatString("%v4d"); storeData(stringIndex); storeData(PRINTF_DATA_TYPE::VECTOR_INT); // channel count storeData(channelCount); // channel values for (int i = 0; i < channelCount; i++) storeData(i + 1); char actualOutput[PrintFormatter::maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, PrintFormatter::maxPrintfOutputLength, str, PrintFormatter::maxPrintfOutputLength); }); EXPECT_STREQ("1,2,3,4", actualOutput); } TEST_F(PrintFormatterTest, GivenPrintfFormatWhenVector8ThenInsertAllValues) { int channelCount = 8; auto stringIndex = injectFormatString("%v8d"); storeData(stringIndex); storeData(PRINTF_DATA_TYPE::VECTOR_INT); // channel count storeData(channelCount); // channel values for (int i = 0; i < channelCount; i++) storeData(i + 1); char actualOutput[PrintFormatter::maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, PrintFormatter::maxPrintfOutputLength, str, PrintFormatter::maxPrintfOutputLength); }); EXPECT_STREQ("1,2,3,4,5,6,7,8", actualOutput); } TEST_F(PrintFormatterTest, GivenPrintfFormatWhenVector16ThenInsertAllValues) { int channelCount = 16; auto stringIndex = injectFormatString("%v16d"); storeData(stringIndex); storeData(PRINTF_DATA_TYPE::VECTOR_INT); // channel count storeData(channelCount); // channel values for (int i = 0; i < channelCount; i++) storeData(i + 1); char actualOutput[PrintFormatter::maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, PrintFormatter::maxPrintfOutputLength, str, PrintFormatter::maxPrintfOutputLength); }); EXPECT_STREQ("1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16", actualOutput); } // ------------------- vector types ---------------------------- TEST_F(PrintFormatterTest, GivenPrintfFormatWhenVectorOfBytesThenInsertAllValues) { int channelCount = 2; auto stringIndex = injectFormatString("%v2hhd"); storeData(stringIndex); storeData(PRINTF_DATA_TYPE::VECTOR_BYTE); // channel count storeData(channelCount); storeData(1); storeData(2); char actualOutput[PrintFormatter::maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, PrintFormatter::maxPrintfOutputLength, str, PrintFormatter::maxPrintfOutputLength); }); EXPECT_STREQ("1,2", actualOutput); } TEST_F(PrintFormatterTest, GivenPrintfFormatWhenVectorOfShortsThenInsertAllValues) { int channelCount = 2; auto stringIndex = injectFormatString("%v2hd"); storeData(stringIndex); storeData(PRINTF_DATA_TYPE::VECTOR_SHORT); // channel count storeData(channelCount); storeData(1); storeData(2); char actualOutput[PrintFormatter::maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, PrintFormatter::maxPrintfOutputLength, str, PrintFormatter::maxPrintfOutputLength); }); EXPECT_STREQ("1,2", actualOutput); } TEST_F(PrintFormatterTest, GivenPrintfFormatWhenVectorOfIntsThenInsertAllValues) { int channelCount = 2; auto stringIndex = injectFormatString("%v2d"); storeData(stringIndex); storeData(PRINTF_DATA_TYPE::VECTOR_INT); // channel count storeData(channelCount); storeData(1); storeData(2); char actualOutput[PrintFormatter::maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, PrintFormatter::maxPrintfOutputLength, str, PrintFormatter::maxPrintfOutputLength); }); EXPECT_STREQ("1,2", actualOutput); } TEST_F(PrintFormatterTest, GivenPrintfSpecialVectorFormatWhenVectorOfIntsThenInsertAllValues) { int channelCount = 2; auto stringIndex = injectFormatString("%v2hld"); storeData(stringIndex); storeData(PRINTF_DATA_TYPE::VECTOR_INT); // channel count storeData(channelCount); storeData(1); storeData(2); char actualOutput[PrintFormatter::maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, PrintFormatter::maxPrintfOutputLength, str, PrintFormatter::maxPrintfOutputLength); }); EXPECT_STREQ("1,2", actualOutput); } TEST_F(PrintFormatterTest, GivenPrintfFormatWhenVectorOfLongsThenInsertAllValues) { int channelCount = 2; auto stringIndex = injectFormatString("%v2lld"); storeData(stringIndex); storeData(PRINTF_DATA_TYPE::VECTOR_LONG); // channel count storeData(channelCount); storeData(1); storeData(2); char actualOutput[PrintFormatter::maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, PrintFormatter::maxPrintfOutputLength, str, PrintFormatter::maxPrintfOutputLength); }); EXPECT_STREQ("1,2", actualOutput); } TEST_F(PrintFormatterTest, GivenPrintfFormatWhenVectorOfFloatsThenInsertAllValues) { int channelCount = 2; auto stringIndex = injectFormatString("%v2f"); storeData(stringIndex); storeData(PRINTF_DATA_TYPE::VECTOR_FLOAT); // channel count storeData(channelCount); storeData(1.0); storeData(2.0); char actualOutput[PrintFormatter::maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, PrintFormatter::maxPrintfOutputLength, str, PrintFormatter::maxPrintfOutputLength); }); EXPECT_STREQ("1.000000,2.000000", actualOutput); } TEST_F(PrintFormatterTest, GivenPrintfFormatWhenVectorOfDoublesThenInsertAllValues) { int channelCount = 2; auto stringIndex = injectFormatString("%v2f"); storeData(stringIndex); storeData(PRINTF_DATA_TYPE::VECTOR_DOUBLE); // channel count storeData(channelCount); storeData(1.0); storeData(2.0); char actualOutput[PrintFormatter::maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, PrintFormatter::maxPrintfOutputLength, str, PrintFormatter::maxPrintfOutputLength); }); EXPECT_STREQ("1.000000,2.000000", actualOutput); } TEST_F(PrintFormatterTest, GivenPrintfFormatWhenPointerThenInsertAddress) { auto stringIndex = injectFormatString("%p"); storeData(stringIndex); int temp; storeData(PRINTF_DATA_TYPE::POINTER); // channel count storeData(reinterpret_cast(&temp)); // on 32bit configurations add extra 4 bytes when storing pointers, IGC always stores pointers on 8 bytes if (is32bit) { uint32_t padding = 0; storeData(padding); } char actualOutput[PrintFormatter::maxPrintfOutputLength]; char referenceOutput[PrintFormatter::maxPrintfOutputLength]; snprintf(referenceOutput, sizeof(referenceOutput), "%p", reinterpret_cast(&temp)); printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, PrintFormatter::maxPrintfOutputLength, str, PrintFormatter::maxPrintfOutputLength); }); EXPECT_STREQ(referenceOutput, actualOutput); } TEST_F(PrintFormatterTest, GivenPrintfFormatWhenPointerWith32BitKernelThenPrint32BitPointer) { printFormatter.reset(new PrintFormatter(static_cast(data->getUnderlyingBuffer()), PrintFormatter::maxPrintfOutputLength, true, kernelInfo->patchInfo.stringDataMap)); auto stringIndex = injectFormatString("%p"); storeData(stringIndex); kernelInfo->gpuPointerSize = 4; storeData(PRINTF_DATA_TYPE::POINTER); // store pointer uint32_t addressValue = 0; storeData(addressValue); void *pointer = nullptr; // store non zero padding uint32_t padding = 0xdeadbeef; storeData(padding); char actualOutput[PrintFormatter::maxPrintfOutputLength]; char referenceOutput[PrintFormatter::maxPrintfOutputLength]; snprintf(referenceOutput, sizeof(referenceOutput), "%p", pointer); printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, PrintFormatter::maxPrintfOutputLength, str, PrintFormatter::maxPrintfOutputLength); }); EXPECT_STREQ(referenceOutput, actualOutput); } TEST_F(PrintFormatterTest, GivenPrintfFormatWhen2ByteVectorsThenParseDataBufferProperly) { int channelCount = 4; auto stringIndex = injectFormatString("%v4hhd %v4hhd"); storeData(stringIndex); storeData(PRINTF_DATA_TYPE::VECTOR_BYTE); // channel count storeData(channelCount); // channel values for (int i = 0; i < channelCount; i++) storeData(static_cast(i + 1)); // filler, should not be printed for (int i = 0; i < 12; i++) storeData(static_cast(0)); storeData(PRINTF_DATA_TYPE::VECTOR_BYTE); // channel count storeData(channelCount); // channel values for (int i = 0; i < channelCount; i++) storeData(static_cast(i + 1)); // filler, should not be printed for (int i = 0; i < 12; i++) storeData(static_cast(0)); char actualOutput[PrintFormatter::maxPrintfOutputLength]; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, PrintFormatter::maxPrintfOutputLength, str, PrintFormatter::maxPrintfOutputLength); }); EXPECT_STREQ("1,2,3,4 1,2,3,4", actualOutput); } TEST_F(PrintFormatterTest, GivenEmptyBufferWhenPrintingThenFailSafely) { char actualOutput[PrintFormatter::maxPrintfOutputLength]; actualOutput[0] = 0; printFormatter->printKernelOutput([&actualOutput](char *str) { strncpy_s(actualOutput, PrintFormatter::maxPrintfOutputLength, str, PrintFormatter::maxPrintfOutputLength); }); EXPECT_STREQ("", actualOutput); } TEST(printToSTDOUTTest, GivenStringWhenPrintingToSTDOUTThenExpectOutput) { testing::internal::CaptureStdout(); printToSTDOUT("test"); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STREQ("test", output.c_str()); } TEST(simpleSprintf, GivenEmptyFormatStringWhenSimpleSprintfIsCalledThenBailOutWith0) { char out[1024] = {7, 0}; auto ret = simple_sprintf(out, sizeof(out), "", 3.0f); EXPECT_EQ(0U, ret); EXPECT_EQ(0, out[0]); EXPECT_EQ(0, out[1]); } compute-runtime-20.13.16352/opencl/test/unit_test/program/process_debug_data_tests.cpp000066400000000000000000000077421363734646600310560ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/program/program_tests.h" #include "test.h" #include "program_debug_data.h" #include using namespace iOpenCL; using namespace NEO; TEST_F(ProgramTests, GivenProgramWithDebugDataForTwoKernelsWhenPorcessedThenDebugDataIsSetInKernelInfos) { const char kernelName1[] = "kernel1"; const char kernelName2[] = "kernel2"; uint32_t kernelNameSize = static_cast(sizeof(kernelName1)); uint32_t genIsaSize = 8; uint32_t visaSize = 8; size_t debugDataSize = sizeof(SProgramDebugDataHeaderIGC) + 2 * (sizeof(SKernelDebugDataHeaderIGC) + kernelNameSize + genIsaSize + visaSize); std::unique_ptr debugData(new char[debugDataSize]); auto kernelInfo1 = new KernelInfo(); kernelInfo1->name = kernelName1; auto kernelInfo2 = new KernelInfo(); kernelInfo2->name = kernelName2; auto program = std::make_unique(*pDevice->getExecutionEnvironment()); SProgramDebugDataHeaderIGC *programDebugHeader = reinterpret_cast(debugData.get()); programDebugHeader->NumberOfKernels = 2; SKernelDebugDataHeaderIGC *kernelDebugHeader = reinterpret_cast(ptrOffset(programDebugHeader, sizeof(SProgramDebugDataHeaderIGC))); kernelDebugHeader->KernelNameSize = kernelNameSize; kernelDebugHeader->SizeGenIsaDbgInBytes = genIsaSize; kernelDebugHeader->SizeVisaDbgInBytes = visaSize; char *kernelName = reinterpret_cast(ptrOffset(kernelDebugHeader, sizeof(SKernelDebugDataHeaderIGC))); memcpy_s(kernelName, kernelNameSize, kernelName1, kernelNameSize); char *vIsa1 = (ptrOffset(kernelName, kernelNameSize)); memset(vIsa1, 10, visaSize); char *genIsa1 = (ptrOffset(vIsa1, visaSize)); memset(genIsa1, 20, genIsaSize); kernelDebugHeader = reinterpret_cast(ptrOffset(vIsa1, genIsaSize + visaSize)); kernelDebugHeader->KernelNameSize = kernelNameSize; kernelDebugHeader->SizeGenIsaDbgInBytes = genIsaSize; kernelDebugHeader->SizeVisaDbgInBytes = visaSize; kernelName = reinterpret_cast(ptrOffset(kernelDebugHeader, sizeof(SKernelDebugDataHeaderIGC))); memcpy_s(kernelName, kernelNameSize, kernelName2, kernelNameSize); char *vIsa2 = (ptrOffset(kernelName, kernelNameSize)); memset(vIsa2, 10, visaSize); char *genIsa2 = (ptrOffset(vIsa2, visaSize)); memset(genIsa2, 20, genIsaSize); program->debugData = makeCopy(debugData.get(), debugDataSize); program->debugDataSize = debugDataSize; program->addKernelInfo(kernelInfo1); program->addKernelInfo(kernelInfo2); program->processDebugData(); EXPECT_EQ(genIsaSize, kernelInfo1->debugData.genIsaSize); EXPECT_EQ(visaSize, kernelInfo1->debugData.vIsaSize); EXPECT_EQ(ptrDiff(vIsa1, debugData.get()), ptrDiff(kernelInfo1->debugData.vIsa, program->getDebugData())); EXPECT_EQ(ptrDiff(genIsa1, debugData.get()), ptrDiff(kernelInfo1->debugData.genIsa, program->getDebugData())); EXPECT_EQ(genIsaSize, kernelInfo2->debugData.genIsaSize); EXPECT_EQ(visaSize, kernelInfo2->debugData.vIsaSize); EXPECT_EQ(ptrDiff(vIsa2, debugData.get()), ptrDiff(kernelInfo2->debugData.vIsa, program->getDebugData())); EXPECT_EQ(ptrDiff(genIsa2, debugData.get()), ptrDiff(kernelInfo2->debugData.genIsa, program->getDebugData())); } TEST_F(ProgramTests, GivenProgramWithoutDebugDataWhenPorcessedThenDebugDataIsNotSetInKernelInfo) { const char kernelName1[] = "kernel1"; auto kernelInfo1 = new KernelInfo(); kernelInfo1->name = kernelName1; auto program = std::make_unique(*pDevice->getExecutionEnvironment()); program->addKernelInfo(kernelInfo1); program->processDebugData(); EXPECT_EQ(0u, kernelInfo1->debugData.genIsaSize); EXPECT_EQ(0u, kernelInfo1->debugData.vIsaSize); EXPECT_EQ(nullptr, program->getDebugData()); } compute-runtime-20.13.16352/opencl/test/unit_test/program/process_elf_binary_tests.cpp000066400000000000000000000242751363734646600311110ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/device_binary_format/elf/elf.h" #include "shared/source/device_binary_format/elf/elf_decoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/string.h" #include "shared/test/unit_test/device_binary_format/patchtokens_tests.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "compiler_options.h" #include "gtest/gtest.h" #include using namespace NEO; class ProcessElfBinaryTests : public ::testing::Test { public: void SetUp() override { device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); program = std::make_unique(*device->getExecutionEnvironment()); program->pDevice = &device->getDevice(); } std::unique_ptr program; std::unique_ptr device; }; TEST_F(ProcessElfBinaryTests, NullBinary) { cl_int retVal = program->createProgramFromBinary(nullptr, 0); EXPECT_EQ(CL_INVALID_BINARY, retVal); } TEST_F(ProcessElfBinaryTests, InvalidBinary) { char pBinary[] = "thisistotallyinvalid\0"; size_t binarySize = strnlen_s(pBinary, 21); cl_int retVal = program->createProgramFromBinary(pBinary, binarySize); EXPECT_EQ(CL_INVALID_BINARY, retVal); } TEST_F(ProcessElfBinaryTests, ValidBinary) { std::string filePath; retrieveBinaryKernelFilename(filePath, "CopyBuffer_simd16_", ".bin"); size_t binarySize = 0; auto pBinary = loadDataFromFile(filePath.c_str(), binarySize); cl_int retVal = program->createProgramFromBinary(pBinary.get(), binarySize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, memcmp(pBinary.get(), program->packedDeviceBinary.get(), binarySize)); } TEST_F(ProcessElfBinaryTests, ValidSpirvBinary) { //clCreateProgramWithIL => SPIR-V stored as source code const uint32_t spirvBinary[2] = {0x03022307, 0x07230203}; size_t spirvBinarySize = sizeof(spirvBinary); //clCompileProgram => SPIR-V stored as IR binary program->isSpirV = true; program->irBinary = makeCopy(spirvBinary, spirvBinarySize); program->irBinarySize = spirvBinarySize; program->programBinaryType = CL_PROGRAM_BINARY_TYPE_LIBRARY; EXPECT_NE(nullptr, program->irBinary); EXPECT_NE(0u, program->irBinarySize); EXPECT_TRUE(program->getIsSpirV()); //clGetProgramInfo => SPIR-V stored as ELF binary cl_int retVal = program->packDeviceBinary(); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, program->packedDeviceBinary); EXPECT_NE(0u, program->packedDeviceBinarySize); //use ELF reader to parse and validate ELF binary std::string decodeErrors; std::string decodeWarnings; auto elf = NEO::Elf::decodeElf(ArrayRef(reinterpret_cast(program->packedDeviceBinary.get()), program->packedDeviceBinarySize), decodeErrors, decodeWarnings); auto header = elf.elfFileHeader; ASSERT_NE(nullptr, header); //check if ELF binary contains section SECTION_HEADER_TYPE_SPIRV bool hasSpirvSection = false; for (const auto &elfSectionHeader : elf.sectionHeaders) { if (elfSectionHeader.header->type == NEO::Elf::SHT_OPENCL_SPIRV) { hasSpirvSection = true; break; } } EXPECT_TRUE(hasSpirvSection); //clCreateProgramWithBinary => new program should recognize SPIR-V binary program->isSpirV = false; auto elfBinary = makeCopy(program->packedDeviceBinary.get(), program->packedDeviceBinarySize); retVal = program->createProgramFromBinary(elfBinary.get(), program->packedDeviceBinarySize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(program->getIsSpirV()); } unsigned int BinaryTypeValues[] = { CL_PROGRAM_BINARY_TYPE_EXECUTABLE, CL_PROGRAM_BINARY_TYPE_LIBRARY, CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT}; class ProcessElfBinaryTestsWithBinaryType : public ::testing::TestWithParam { public: void SetUp() override { device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); program = std::make_unique(*device->getExecutionEnvironment()); program->pDevice = &device->getDevice(); } std::unique_ptr program; std::unique_ptr device; }; TEST_P(ProcessElfBinaryTestsWithBinaryType, GivenBinaryTypeWhenResolveProgramThenProgramIsProperlyResolved) { std::string filePath; retrieveBinaryKernelFilename(filePath, "CopyBuffer_simd16_", ".bin"); size_t binarySize = 0; auto pBinary = loadDataFromFile(filePath.c_str(), binarySize); cl_int retVal = program->createProgramFromBinary(pBinary.get(), binarySize); auto options = program->options; auto genBinary = makeCopy(program->unpackedDeviceBinary.get(), program->unpackedDeviceBinarySize); auto genBinarySize = program->unpackedDeviceBinarySize; auto irBinary = makeCopy(program->irBinary.get(), program->irBinarySize); auto irBinarySize = program->irBinarySize; EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(binarySize, program->packedDeviceBinarySize); EXPECT_EQ(0, memcmp(pBinary.get(), program->packedDeviceBinary.get(), binarySize)); // delete program's elf reference to force a resolve program->packedDeviceBinary.reset(); program->packedDeviceBinarySize = 0U; program->programBinaryType = GetParam(); retVal = program->packDeviceBinary(); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, program->packedDeviceBinary); std::string decodeErrors; std::string decodeWarnings; auto elf = NEO::Elf::decodeElf(ArrayRef(reinterpret_cast(program->packedDeviceBinary.get()), program->packedDeviceBinarySize), decodeErrors, decodeWarnings); ASSERT_NE(nullptr, elf.elfFileHeader); ArrayRef decodedIr; ArrayRef decodedDeviceBinary; ArrayRef decodedOptions; for (auto §ion : elf.sectionHeaders) { switch (section.header->type) { default: break; case NEO::Elf::SHT_OPENCL_LLVM_BINARY: decodedIr = section.data; break; case NEO::Elf::SHT_OPENCL_SPIRV: decodedIr = section.data; break; case NEO::Elf::SHT_OPENCL_DEV_BINARY: decodedDeviceBinary = section.data; break; case NEO::Elf::SHT_OPENCL_OPTIONS: decodedDeviceBinary = section.data; break; } } ASSERT_EQ(options.size(), decodedOptions.size()); ASSERT_EQ(genBinarySize, decodedDeviceBinary.size()); ASSERT_EQ(irBinarySize, decodedIr.size()); EXPECT_EQ(0, memcmp(genBinary.get(), decodedDeviceBinary.begin(), genBinarySize)); EXPECT_EQ(0, memcmp(irBinary.get(), decodedIr.begin(), irBinarySize)); } INSTANTIATE_TEST_CASE_P(ResolveBinaryTests, ProcessElfBinaryTestsWithBinaryType, ::testing::ValuesIn(BinaryTypeValues)); TEST_F(ProcessElfBinaryTests, BackToBack) { std::string filePath; retrieveBinaryKernelFilename(filePath, "CopyBuffer_simd16_", ".bin"); size_t binarySize = 0; auto pBinary = loadDataFromFile(filePath.c_str(), binarySize); cl_int retVal = program->createProgramFromBinary(pBinary.get(), binarySize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, memcmp(pBinary.get(), program->packedDeviceBinary.get(), binarySize)); std::string filePath2; retrieveBinaryKernelFilename(filePath2, "simple_arg_int_", ".bin"); pBinary = loadDataFromFile(filePath2.c_str(), binarySize); retVal = program->createProgramFromBinary(pBinary.get(), binarySize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, memcmp(pBinary.get(), program->packedDeviceBinary.get(), binarySize)); } TEST_F(ProcessElfBinaryTests, BuildOptionsEmpty) { std::string filePath; retrieveBinaryKernelFilename(filePath, "simple_kernels_", ".bin"); size_t binarySize = 0; auto pBinary = loadDataFromFile(filePath.c_str(), binarySize); cl_int retVal = program->createProgramFromBinary(pBinary.get(), binarySize); EXPECT_EQ(CL_SUCCESS, retVal); const auto &options = program->getOptions(); size_t optionsSize = strlen(options.c_str()) + 1; EXPECT_EQ(0, memcmp("", options.c_str(), optionsSize)); } TEST_F(ProcessElfBinaryTests, BuildOptionsNotEmpty) { std::string filePath; retrieveBinaryKernelFilename(filePath, "simple_kernels_opts_", ".bin"); size_t binarySize = 0; auto pBinary = loadDataFromFile(filePath.c_str(), binarySize); cl_int retVal = program->createProgramFromBinary(pBinary.get(), binarySize); EXPECT_EQ(CL_SUCCESS, retVal); const auto &options = program->getOptions(); std::string buildOptionsNotEmpty = CompilerOptions::concatenate(CompilerOptions::optDisable, "-DDEF_WAS_SPECIFIED=1"); EXPECT_STREQ(buildOptionsNotEmpty.c_str(), options.c_str()); } TEST_F(ProcessElfBinaryTests, GivenBinaryWhenIncompatiblePatchtokenVerionThenProramCreationFails) { PatchTokensTestData::ValidEmptyProgram programTokens; { NEO::Elf::ElfEncoder<> elfEncoder; elfEncoder.getElfFileHeader().type = NEO::Elf::ET_OPENCL_EXECUTABLE; elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_DEV_BINARY, NEO::Elf::SectionNamesOpenCl::deviceBinary, programTokens.storage); auto elfBinary = elfEncoder.encode(); cl_int retVal = program->createProgramFromBinary(elfBinary.data(), elfBinary.size()); EXPECT_EQ(CL_SUCCESS, retVal); } { programTokens.headerMutable->Version -= 1; NEO::Elf::ElfEncoder<> elfEncoder; elfEncoder.getElfFileHeader().type = NEO::Elf::ET_OPENCL_EXECUTABLE; elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_DEV_BINARY, NEO::Elf::SectionNamesOpenCl::deviceBinary, programTokens.storage); auto elfBinary = elfEncoder.encode(); cl_int retVal = program->createProgramFromBinary(elfBinary.data(), elfBinary.size()); EXPECT_EQ(CL_INVALID_BINARY, retVal); } } compute-runtime-20.13.16352/opencl/test/unit_test/program/process_spir_binary_tests.cpp000066400000000000000000000036751363734646600313210ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/helpers/string.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "gtest/gtest.h" using namespace NEO; class ProcessSpirBinaryTests : public ::testing::Test { public: void SetUp() override { executionEnvironment = std::make_unique(); program = std::make_unique(*executionEnvironment); } std::unique_ptr executionEnvironment; std::unique_ptr program; }; TEST_F(ProcessSpirBinaryTests, NullBinary) { auto retVal = program->processSpirBinary(nullptr, 0, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(true, program->sourceCode.empty()); } TEST_F(ProcessSpirBinaryTests, InvalidSizeBinary) { char pBinary[] = "somebinary\0"; size_t binarySize = 1; auto retVal = program->processSpirBinary(pBinary, binarySize, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(binarySize, program->irBinarySize); } TEST_F(ProcessSpirBinaryTests, SomeBinary) { char pBinary[] = "somebinary\0"; size_t binarySize = strnlen_s(pBinary, 11); auto retVal = program->processSpirBinary(pBinary, binarySize, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, memcmp(pBinary, program->irBinary.get(), program->irBinarySize)); EXPECT_EQ(binarySize, program->irBinarySize); // Verify no built log is available auto pBuildLog = program->getBuildLog(program->getDevicePtr()); EXPECT_EQ(nullptr, pBuildLog); } TEST_F(ProcessSpirBinaryTests, SpirvBinary) { const uint32_t pBinary[2] = {0x03022307, 0x07230203}; size_t binarySize = sizeof(pBinary); program->processSpirBinary(pBinary, binarySize, false); EXPECT_FALSE(program->getIsSpirV()); program->processSpirBinary(pBinary, binarySize, true); EXPECT_TRUE(program->getIsSpirV()); } compute-runtime-20.13.16352/opencl/test/unit_test/program/program_data_tests.cpp000066400000000000000000000675361363734646600277100ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/program/program_info_from_patchtokens.h" #include "shared/test/unit_test/compiler_interface/linker_mock.h" #include "shared/test/unit_test/device_binary_format/patchtokens_tests.h" #include "opencl/source/platform/platform.h" #include "opencl/source/program/program.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/program/program_with_source.h" #include "test.h" using namespace NEO; using namespace iOpenCL; static const char constValue[] = "11223344"; static const char globalValue[] = "55667788"; class ProgramDataTestBase : public testing::Test, public ContextFixture, public PlatformFixture, public ProgramFixture { using ContextFixture::SetUp; using PlatformFixture::SetUp; public: ProgramDataTestBase() { memset(&programBinaryHeader, 0x00, sizeof(SProgramBinaryHeader)); pCurPtr = nullptr; pProgramPatchList = nullptr; programPatchListSize = 0; } void buildAndDecodeProgramPatchList(); void SetUp() override { PlatformFixture::SetUp(); cl_device_id device = pPlatform->getClDevice(0); ContextFixture::SetUp(1, &device); ProgramFixture::SetUp(); CreateProgramWithSource( pContext, &device, "CopyBuffer_simd16.cl"); } void TearDown() override { knownSource.reset(); ProgramFixture::TearDown(); ContextFixture::TearDown(); PlatformFixture::TearDown(); } size_t setupConstantAllocation() { size_t constSize = strlen(constValue) + 1; EXPECT_EQ(nullptr, pProgram->getConstantSurface()); SPatchAllocateConstantMemorySurfaceProgramBinaryInfo allocateConstMemorySurface; allocateConstMemorySurface.Token = PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO; allocateConstMemorySurface.Size = static_cast(sizeof(SPatchAllocateConstantMemorySurfaceProgramBinaryInfo)); allocateConstMemorySurface.ConstantBufferIndex = 0; allocateConstMemorySurface.InlineDataSize = static_cast(constSize); pAllocateConstMemorySurface.reset(new cl_char[allocateConstMemorySurface.Size + constSize]); memcpy_s(pAllocateConstMemorySurface.get(), sizeof(SPatchAllocateConstantMemorySurfaceProgramBinaryInfo), &allocateConstMemorySurface, sizeof(SPatchAllocateConstantMemorySurfaceProgramBinaryInfo)); memcpy_s((cl_char *)pAllocateConstMemorySurface.get() + sizeof(allocateConstMemorySurface), constSize, constValue, constSize); pProgramPatchList = (void *)pAllocateConstMemorySurface.get(); programPatchListSize = static_cast(allocateConstMemorySurface.Size + constSize); return constSize; } size_t setupGlobalAllocation() { size_t globalSize = strlen(globalValue) + 1; EXPECT_EQ(nullptr, pProgram->getGlobalSurface()); SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo allocateGlobalMemorySurface; allocateGlobalMemorySurface.Token = PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_PROGRAM_BINARY_INFO; allocateGlobalMemorySurface.Size = static_cast(sizeof(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo)); allocateGlobalMemorySurface.GlobalBufferIndex = 0; allocateGlobalMemorySurface.InlineDataSize = static_cast(globalSize); pAllocateGlobalMemorySurface.reset(new cl_char[allocateGlobalMemorySurface.Size + globalSize]); memcpy_s(pAllocateGlobalMemorySurface.get(), sizeof(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo), &allocateGlobalMemorySurface, sizeof(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo)); memcpy_s((cl_char *)pAllocateGlobalMemorySurface.get() + sizeof(allocateGlobalMemorySurface), globalSize, globalValue, globalSize); pProgramPatchList = pAllocateGlobalMemorySurface.get(); programPatchListSize = static_cast(allocateGlobalMemorySurface.Size + globalSize); return globalSize; } std::unique_ptr pAllocateConstMemorySurface; std::unique_ptr pAllocateGlobalMemorySurface; char *pCurPtr; SProgramBinaryHeader programBinaryHeader; void *pProgramPatchList; uint32_t programPatchListSize; cl_int patchlistDecodeErrorCode = 0; bool allowDecodeFailure = false; }; void ProgramDataTestBase::buildAndDecodeProgramPatchList() { size_t headerSize = sizeof(SProgramBinaryHeader); cl_int error = CL_SUCCESS; programBinaryHeader.Magic = 0x494E5443; programBinaryHeader.Version = CURRENT_ICBE_VERSION; programBinaryHeader.Device = defaultHwInfo->platform.eRenderCoreFamily; programBinaryHeader.GPUPointerSizeInBytes = 8; programBinaryHeader.NumberOfKernels = 0; programBinaryHeader.PatchListSize = programPatchListSize; char *pProgramData = new char[headerSize + programBinaryHeader.PatchListSize]; ASSERT_NE(nullptr, pProgramData); pCurPtr = pProgramData; // program header memset(pCurPtr, 0, sizeof(SProgramBinaryHeader)); *(SProgramBinaryHeader *)pCurPtr = programBinaryHeader; pCurPtr += sizeof(SProgramBinaryHeader); // patch list memcpy_s(pCurPtr, programPatchListSize, pProgramPatchList, programPatchListSize); pCurPtr += programPatchListSize; //as we use mock compiler in unit test, replace the genBinary here. pProgram->unpackedDeviceBinary = makeCopy(pProgramData, headerSize + programBinaryHeader.PatchListSize); pProgram->unpackedDeviceBinarySize = headerSize + programBinaryHeader.PatchListSize; error = pProgram->processGenBinary(); patchlistDecodeErrorCode = error; if (allowDecodeFailure == false) { EXPECT_EQ(CL_SUCCESS, error); } delete[] pProgramData; } using ProgramDataTest = ProgramDataTestBase; TEST_F(ProgramDataTest, EmptyProgramBinaryHeader) { buildAndDecodeProgramPatchList(); } TEST_F(ProgramDataTest, AllocateConstantMemorySurfaceProgramBinaryInfo) { auto constSize = setupConstantAllocation(); buildAndDecodeProgramPatchList(); EXPECT_NE(nullptr, pProgram->getConstantSurface()); EXPECT_EQ(0, memcmp(constValue, pProgram->getConstantSurface()->getUnderlyingBuffer(), constSize)); } TEST_F(ProgramDataTest, whenGlobalConstantsAreExportedThenAllocateSurfacesAsSvm) { if (this->pContext->getSVMAllocsManager() == nullptr) { return; } char constantData[128] = {}; ProgramInfo programInfo; programInfo.globalConstants.initData = constantData; programInfo.globalConstants.size = sizeof(constantData); std::unique_ptr> mockLinkerInput = std::make_unique>(); mockLinkerInput->traits.exportsGlobalConstants = true; programInfo.linkerInput = std::move(mockLinkerInput); this->pProgram->processProgramInfo(programInfo); ASSERT_NE(nullptr, pProgram->getConstantSurface()); EXPECT_NE(nullptr, this->pContext->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(pProgram->getConstantSurface()->getGpuAddress()))); } TEST_F(ProgramDataTest, whenGlobalConstantsAreNotExportedThenAllocateSurfacesAsNonSvm) { if (this->pContext->getSVMAllocsManager() == nullptr) { return; } char constantData[128] = {}; ProgramInfo programInfo; programInfo.globalConstants.initData = constantData; programInfo.globalConstants.size = sizeof(constantData); std::unique_ptr> mockLinkerInput = std::make_unique>(); mockLinkerInput->traits.exportsGlobalConstants = false; programInfo.linkerInput = std::move(mockLinkerInput); this->pProgram->processProgramInfo(programInfo); ASSERT_NE(nullptr, pProgram->getConstantSurface()); EXPECT_EQ(nullptr, this->pContext->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(pProgram->getConstantSurface()->getGpuAddress()))); } TEST_F(ProgramDataTest, whenGlobalConstantsAreExportedButContextUnavailableThenAllocateSurfacesAsNonSvm) { if (this->pContext->getSVMAllocsManager() == nullptr) { return; } char constantData[128] = {}; ProgramInfo programInfo; programInfo.globalConstants.initData = constantData; programInfo.globalConstants.size = sizeof(constantData); std::unique_ptr> mockLinkerInput = std::make_unique>(); mockLinkerInput->traits.exportsGlobalConstants = true; programInfo.linkerInput = std::move(mockLinkerInput); pProgram->context = nullptr; this->pProgram->processProgramInfo(programInfo); pProgram->context = pContext; ASSERT_NE(nullptr, pProgram->getConstantSurface()); EXPECT_EQ(nullptr, this->pContext->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(pProgram->getConstantSurface()->getGpuAddress()))); } TEST_F(ProgramDataTest, whenGlobalVariablesAreExportedThenAllocateSurfacesAsSvm) { if (this->pContext->getSVMAllocsManager() == nullptr) { return; } char globalData[128] = {}; ProgramInfo programInfo; programInfo.globalVariables.initData = globalData; programInfo.globalVariables.size = sizeof(globalData); std::unique_ptr> mockLinkerInput = std::make_unique>(); mockLinkerInput->traits.exportsGlobalVariables = true; programInfo.linkerInput = std::move(mockLinkerInput); this->pProgram->processProgramInfo(programInfo); ASSERT_NE(nullptr, pProgram->getGlobalSurface()); EXPECT_NE(nullptr, this->pContext->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(pProgram->getGlobalSurface()->getGpuAddress()))); } TEST_F(ProgramDataTest, whenGlobalVariablesAreExportedButContextUnavailableThenAllocateSurfacesAsNonSvm) { if (this->pContext->getSVMAllocsManager() == nullptr) { return; } char globalData[128] = {}; ProgramInfo programInfo; programInfo.globalVariables.initData = globalData; programInfo.globalVariables.size = sizeof(globalData); std::unique_ptr> mockLinkerInput = std::make_unique>(); mockLinkerInput->traits.exportsGlobalVariables = true; programInfo.linkerInput = std::move(mockLinkerInput); pProgram->context = nullptr; this->pProgram->processProgramInfo(programInfo); pProgram->context = pContext; ASSERT_NE(nullptr, pProgram->getGlobalSurface()); EXPECT_EQ(nullptr, this->pContext->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(pProgram->getGlobalSurface()->getGpuAddress()))); } TEST_F(ProgramDataTest, whenGlobalVariablesAreNotExportedThenAllocateSurfacesAsNonSvm) { if (this->pContext->getSVMAllocsManager() == nullptr) { return; } char globalData[128] = {}; ProgramInfo programInfo; programInfo.globalVariables.initData = globalData; programInfo.globalVariables.size = sizeof(globalData); std::unique_ptr> mockLinkerInput = std::make_unique>(); mockLinkerInput->traits.exportsGlobalVariables = false; programInfo.linkerInput = std::move(mockLinkerInput); this->pProgram->processProgramInfo(programInfo); ASSERT_NE(nullptr, pProgram->getGlobalSurface()); EXPECT_EQ(nullptr, this->pContext->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(pProgram->getGlobalSurface()->getGpuAddress()))); } TEST_F(ProgramDataTest, givenConstantAllocationThatIsInUseByGpuWhenProgramIsBeingDestroyedThenItIsAddedToTemporaryAllocationList) { setupConstantAllocation(); buildAndDecodeProgramPatchList(); auto &csr = *pPlatform->getClDevice(0)->getDefaultEngine().commandStreamReceiver; auto tagAddress = csr.getTagAddress(); auto constantSurface = pProgram->getConstantSurface(); constantSurface->updateTaskCount(*tagAddress + 1, csr.getOsContext().getContextId()); EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); delete pProgram; pProgram = nullptr; EXPECT_FALSE(csr.getTemporaryAllocations().peekIsEmpty()); EXPECT_EQ(constantSurface, csr.getTemporaryAllocations().peekHead()); } TEST_F(ProgramDataTest, givenGlobalAllocationThatIsInUseByGpuWhenProgramIsBeingDestroyedThenItIsAddedToTemporaryAllocationList) { setupGlobalAllocation(); buildAndDecodeProgramPatchList(); auto &csr = *pPlatform->getClDevice(0)->getDefaultEngine().commandStreamReceiver; auto tagAddress = csr.getTagAddress(); auto globalSurface = pProgram->getGlobalSurface(); globalSurface->updateTaskCount(*tagAddress + 1, csr.getOsContext().getContextId()); EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); delete pProgram; pProgram = nullptr; EXPECT_FALSE(csr.getTemporaryAllocations().peekIsEmpty()); EXPECT_EQ(globalSurface, csr.getTemporaryAllocations().peekHead()); } TEST_F(ProgramDataTest, GivenDeviceForcing32BitMessagesWhenConstAllocationIsPresentInProgramBinariesThen32BitStorageIsAllocated) { auto constSize = setupConstantAllocation(); this->pContext->getDevice(0)->getMemoryManager()->setForce32BitAllocations(true); buildAndDecodeProgramPatchList(); EXPECT_NE(nullptr, pProgram->getConstantSurface()); EXPECT_EQ(0, memcmp(constValue, pProgram->getConstantSurface()->getUnderlyingBuffer(), constSize)); if (is64bit) { EXPECT_TRUE(pProgram->getConstantSurface()->is32BitAllocation()); } } TEST_F(ProgramDataTest, AllocateGlobalMemorySurfaceProgramBinaryInfo) { auto globalSize = setupGlobalAllocation(); buildAndDecodeProgramPatchList(); EXPECT_NE(nullptr, pProgram->getGlobalSurface()); EXPECT_EQ(0, memcmp(globalValue, pProgram->getGlobalSurface()->getUnderlyingBuffer(), globalSize)); } TEST_F(ProgramDataTest, Given32BitDeviceWhenGlobalMemorySurfaceIsPresentThenItHas32BitStorage) { char globalValue[] = "55667788"; size_t globalSize = strlen(globalValue) + 1; this->pContext->getDevice(0)->getMemoryManager()->setForce32BitAllocations(true); EXPECT_EQ(nullptr, pProgram->getGlobalSurface()); SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo allocateGlobalMemorySurface; allocateGlobalMemorySurface.Token = PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_PROGRAM_BINARY_INFO; allocateGlobalMemorySurface.Size = static_cast(sizeof(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo)); allocateGlobalMemorySurface.GlobalBufferIndex = 0; allocateGlobalMemorySurface.InlineDataSize = static_cast(globalSize); cl_char *pAllocateGlobalMemorySurface = new cl_char[allocateGlobalMemorySurface.Size + globalSize]; memcpy_s(pAllocateGlobalMemorySurface, sizeof(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo), &allocateGlobalMemorySurface, sizeof(SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo)); memcpy_s((cl_char *)pAllocateGlobalMemorySurface + sizeof(allocateGlobalMemorySurface), globalSize, globalValue, globalSize); pProgramPatchList = (void *)pAllocateGlobalMemorySurface; programPatchListSize = static_cast(allocateGlobalMemorySurface.Size + globalSize); buildAndDecodeProgramPatchList(); EXPECT_NE(nullptr, pProgram->getGlobalSurface()); EXPECT_EQ(0, memcmp(globalValue, pProgram->getGlobalSurface()->getUnderlyingBuffer(), globalSize)); if (is64bit) { EXPECT_TRUE(pProgram->getGlobalSurface()->is32BitAllocation()); } delete[] pAllocateGlobalMemorySurface; } TEST(ProgramScopeMetadataTest, WhenPatchingGlobalSurfaceThenPickProperSourceBuffer) { MockExecutionEnvironment execEnv; MockClDevice device{new MockDevice}; execEnv.memoryManager = std::make_unique(); PatchTokensTestData::ValidProgramWithMixedGlobalVarAndConstSurfacesAndPointers decodedProgram; decodedProgram.globalPointerMutable->GlobalPointerOffset = 0U; decodedProgram.constantPointerMutable->ConstantPointerOffset = 0U; memset(decodedProgram.globalSurfMutable + 1, 0U, sizeof(uintptr_t)); memset(decodedProgram.constSurfMutable + 1, 0U, sizeof(uintptr_t)); ProgramInfo programInfo; MockProgram program(execEnv); program.pDevice = &device.getDevice(); NEO::populateProgramInfo(programInfo, decodedProgram); program.processProgramInfo(programInfo); ASSERT_NE(nullptr, program.globalSurface); ASSERT_NE(nullptr, program.constantSurface); ASSERT_NE(nullptr, program.globalSurface->getUnderlyingBuffer()); ASSERT_NE(nullptr, program.constantSurface->getUnderlyingBuffer()); EXPECT_EQ(static_cast(program.globalSurface->getGpuAddressToPatch()), *reinterpret_cast(program.constantSurface->getUnderlyingBuffer())); EXPECT_EQ(static_cast(program.constantSurface->getGpuAddressToPatch()), *reinterpret_cast(program.globalSurface->getUnderlyingBuffer())); } TEST_F(ProgramDataTest, GivenProgramWith32bitPointerOptWhenProgramScopeConstantBufferPatchTokensAreReadThenConstantPointerOffsetIsPatchedWith32bitPointer) { cl_device_id device = pPlatform->getClDevice(0); CreateProgramWithSource(pContext, &device, "CopyBuffer_simd16.cl"); ASSERT_NE(nullptr, pProgram); MockProgram *prog = pProgram; // simulate case when constant surface was not allocated EXPECT_EQ(nullptr, prog->getConstantSurface()); ProgramInfo programInfo; programInfo.prepareLinkerInputStorage(); NEO::LinkerInput::RelocationInfo relocInfo; relocInfo.relocationSegment = NEO::SegmentType::GlobalConstants; relocInfo.symbolSegment = NEO::SegmentType::GlobalConstants; relocInfo.offset = 0U; relocInfo.type = NEO::LinkerInput::RelocationInfo::Type::Address; programInfo.linkerInput->addDataRelocationInfo(relocInfo); programInfo.linkerInput->setPointerSize(LinkerInput::Traits::PointerSize::Ptr32bit); MockBuffer constantSurface; ASSERT_LT(8U, constantSurface.getSize()); prog->setConstantSurface(&constantSurface.mockGfxAllocation); constantSurface.mockGfxAllocation.set32BitAllocation(true); uint32_t *constantSurfaceStorage = reinterpret_cast(constantSurface.getCpuAddress()); uint32_t sentinel = 0x17192329U; constantSurfaceStorage[0] = 0U; constantSurfaceStorage[1] = sentinel; pProgram->linkerInput = std::move(programInfo.linkerInput); pProgram->linkBinary(); uint32_t expectedAddr = static_cast(constantSurface.getGraphicsAllocation()->getGpuAddressToPatch()); EXPECT_EQ(expectedAddr, constantSurfaceStorage[0]); EXPECT_EQ(sentinel, constantSurfaceStorage[1]); constantSurface.mockGfxAllocation.set32BitAllocation(false); prog->setConstantSurface(nullptr); } TEST_F(ProgramDataTest, GivenProgramWith32bitPointerOptWhenProgramScopeGlobalPointerPatchTokensAreReadThenGlobalPointerOffsetIsPatchedWith32bitPointer) { cl_device_id device = pPlatform->getClDevice(0); CreateProgramWithSource(pContext, &device, "CopyBuffer_simd16.cl"); ASSERT_NE(nullptr, pProgram); MockProgram *prog = pProgram; // simulate case when constant surface was not allocated EXPECT_EQ(nullptr, prog->getConstantSurface()); ProgramInfo programInfo; programInfo.prepareLinkerInputStorage(); NEO::LinkerInput::RelocationInfo relocInfo; relocInfo.relocationSegment = NEO::SegmentType::GlobalVariables; relocInfo.symbolSegment = NEO::SegmentType::GlobalVariables; relocInfo.offset = 0U; relocInfo.type = NEO::LinkerInput::RelocationInfo::Type::Address; programInfo.linkerInput->addDataRelocationInfo(relocInfo); programInfo.linkerInput->setPointerSize(LinkerInput::Traits::PointerSize::Ptr32bit); MockBuffer globalSurface; ASSERT_LT(8U, globalSurface.getSize()); prog->setGlobalSurface(&globalSurface.mockGfxAllocation); globalSurface.mockGfxAllocation.set32BitAllocation(true); uint32_t *globalSurfaceStorage = reinterpret_cast(globalSurface.getCpuAddress()); uint32_t sentinel = 0x17192329U; globalSurfaceStorage[0] = 0U; globalSurfaceStorage[1] = sentinel; pProgram->linkerInput = std::move(programInfo.linkerInput); pProgram->linkBinary(); uint32_t expectedAddr = static_cast(globalSurface.getGraphicsAllocation()->getGpuAddressToPatch()); EXPECT_EQ(expectedAddr, globalSurfaceStorage[0]); EXPECT_EQ(sentinel, globalSurfaceStorage[1]); globalSurface.mockGfxAllocation.set32BitAllocation(false); prog->setGlobalSurface(nullptr); } TEST_F(ProgramDataTest, givenSymbolTablePatchTokenThenLinkerInputIsCreated) { SPatchFunctionTableInfo token; token.Token = PATCH_TOKEN_PROGRAM_SYMBOL_TABLE; token.Size = static_cast(sizeof(SPatchFunctionTableInfo)); token.NumEntries = 0; pProgramPatchList = &token; programPatchListSize = token.Size; buildAndDecodeProgramPatchList(); EXPECT_NE(nullptr, pProgram->getLinkerInput()); } TEST(ProgramLinkBinaryTest, whenLinkerInputEmptyThenLinkSuccessful) { auto linkerInput = std::make_unique>(); NEO::ExecutionEnvironment env; MockProgram program{env}; program.linkerInput = std::move(linkerInput); auto ret = program.linkBinary(); EXPECT_EQ(CL_SUCCESS, ret); } TEST(ProgramLinkBinaryTest, whenLinkerUnresolvedExternalThenLinkFailedAndBuildLogAvailable) { auto linkerInput = std::make_unique>(); NEO::LinkerInput::RelocationInfo relocation = {}; relocation.symbolName = "A"; relocation.offset = 0; linkerInput->relocations.push_back(NEO::LinkerInput::Relocations{relocation}); linkerInput->traits.requiresPatchingOfInstructionSegments = true; NEO::ExecutionEnvironment env; MockProgram program{env}; KernelInfo kernelInfo = {}; kernelInfo.name = "onlyKernel"; std::vector kernelHeap; kernelHeap.resize(32, 7); kernelInfo.heapInfo.pKernelHeap = kernelHeap.data(); iOpenCL::SKernelBinaryHeaderCommon kernelHeader = {}; kernelHeader.KernelHeapSize = static_cast(kernelHeap.size()); kernelInfo.heapInfo.pKernelHeader = &kernelHeader; program.getKernelInfoArray().push_back(&kernelInfo); program.linkerInput = std::move(linkerInput); EXPECT_EQ(nullptr, program.getBuildLog(nullptr)); auto ret = program.linkBinary(); EXPECT_NE(CL_SUCCESS, ret); program.getKernelInfoArray().clear(); auto buildLog = program.getBuildLog(nullptr); ASSERT_NE(nullptr, buildLog); Linker::UnresolvedExternals expectedUnresolvedExternals; expectedUnresolvedExternals.push_back(Linker::UnresolvedExternal{relocation, 0, false}); auto expectedError = constructLinkerErrorMessage(expectedUnresolvedExternals, std::vector{"kernel : " + kernelInfo.name}); EXPECT_THAT(buildLog, ::testing::HasSubstr(expectedError)); } TEST_F(ProgramDataTest, whenLinkerInputValidThenIsaIsProperlyPatched) { auto linkerInput = std::make_unique>(); linkerInput->symbols["A"] = NEO::SymbolInfo{4U, 4U, NEO::SegmentType::GlobalVariables}; linkerInput->symbols["B"] = NEO::SymbolInfo{8U, 4U, NEO::SegmentType::GlobalConstants}; linkerInput->symbols["C"] = NEO::SymbolInfo{16U, 4U, NEO::SegmentType::Instructions}; auto relocationType = NEO::LinkerInput::RelocationInfo::Type::Address; linkerInput->relocations.push_back({NEO::LinkerInput::RelocationInfo{"A", 8U, relocationType}, NEO::LinkerInput::RelocationInfo{"B", 16U, relocationType}, NEO::LinkerInput::RelocationInfo{"C", 24U, relocationType}}); linkerInput->traits.requiresPatchingOfInstructionSegments = true; linkerInput->exportedFunctionsSegmentId = 0; NEO::ExecutionEnvironment env; MockProgram program{env}; KernelInfo kernelInfo = {}; kernelInfo.name = "onlyKernel"; std::vector kernelHeap; kernelHeap.resize(32, 7); kernelInfo.heapInfo.pKernelHeap = kernelHeap.data(); iOpenCL::SKernelBinaryHeaderCommon kernelHeader = {}; kernelHeader.KernelHeapSize = static_cast(kernelHeap.size()); kernelInfo.heapInfo.pKernelHeader = &kernelHeader; MockGraphicsAllocation kernelIsa(kernelHeap.data(), kernelHeap.size()); kernelInfo.kernelAllocation = &kernelIsa; program.getKernelInfoArray().push_back(&kernelInfo); program.linkerInput = std::move(linkerInput); program.exportedFunctionsSurface = kernelInfo.kernelAllocation; std::vector globalVariablesBuffer; globalVariablesBuffer.resize(32, 7); std::vector globalConstantsBuffer; globalConstantsBuffer.resize(32, 7); program.globalSurface = new MockGraphicsAllocation(globalVariablesBuffer.data(), globalVariablesBuffer.size()); program.constantSurface = new MockGraphicsAllocation(globalConstantsBuffer.data(), globalConstantsBuffer.size()); program.pDevice = &this->pContext->getDevice(0)->getDevice(); auto ret = program.linkBinary(); EXPECT_EQ(CL_SUCCESS, ret); linkerInput.reset(static_cast *>(program.linkerInput.release())); for (size_t i = 0; i < linkerInput->relocations.size(); ++i) { auto expectedPatch = program.globalSurface->getGpuAddress() + linkerInput->symbols[linkerInput->relocations[0][0].symbolName].offset; auto relocationAddress = kernelHeap.data() + linkerInput->relocations[0][0].offset; EXPECT_EQ(static_cast(expectedPatch), *reinterpret_cast(relocationAddress)) << i; } program.getKernelInfoArray().clear(); delete program.globalSurface; program.globalSurface = nullptr; delete program.constantSurface; program.constantSurface = nullptr; } TEST_F(ProgramDataTest, whenRelocationsAreNotNeededThenIsaIsPreserved) { auto linkerInput = std::make_unique>(); linkerInput->symbols["A"] = NEO::SymbolInfo{4U, 4U, NEO::SegmentType::GlobalVariables}; linkerInput->symbols["B"] = NEO::SymbolInfo{8U, 4U, NEO::SegmentType::GlobalConstants}; NEO::ExecutionEnvironment env; MockProgram program{env}; KernelInfo kernelInfo = {}; kernelInfo.name = "onlyKernel"; std::vector kernelHeapData; kernelHeapData.resize(32, 7); std::vector kernelHeap(kernelHeapData.begin(), kernelHeapData.end()); kernelInfo.heapInfo.pKernelHeap = kernelHeap.data(); iOpenCL::SKernelBinaryHeaderCommon kernelHeader = {}; kernelHeader.KernelHeapSize = static_cast(kernelHeap.size()); kernelInfo.heapInfo.pKernelHeader = &kernelHeader; MockGraphicsAllocation kernelIsa(kernelHeap.data(), kernelHeap.size()); kernelInfo.kernelAllocation = &kernelIsa; program.getKernelInfoArray().push_back(&kernelInfo); program.linkerInput = std::move(linkerInput); std::vector globalVariablesBuffer; globalVariablesBuffer.resize(32, 7); std::vector globalConstantsBuffer; globalConstantsBuffer.resize(32, 7); program.globalSurface = new MockGraphicsAllocation(globalVariablesBuffer.data(), globalVariablesBuffer.size()); program.constantSurface = new MockGraphicsAllocation(globalConstantsBuffer.data(), globalConstantsBuffer.size()); program.pDevice = &this->pContext->getDevice(0)->getDevice(); auto ret = program.linkBinary(); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(kernelHeapData, kernelHeap); program.getKernelInfoArray().clear(); delete program.globalSurface; program.globalSurface = nullptr; delete program.constantSurface; program.constantSurface = nullptr; } compute-runtime-20.13.16352/opencl/test/unit_test/program/program_from_binary.h000066400000000000000000000052101363734646600275060ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/program_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include namespace NEO { //////////////////////////////////////////////////////////////////////////////// // ProgramFromBinaryTest Test Fixture // Used to test the Program class //////////////////////////////////////////////////////////////////////////////// class ProgramFromBinaryTest : public DeviceFixture, public ContextFixture, public ProgramFixture, public testing::TestWithParam> { using ContextFixture::SetUp; protected: void SetUp() override { std::tie(BinaryFileName, KernelName) = GetParam(); DeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); ProgramFixture::SetUp(); if (options.size()) CreateProgramFromBinary(pContext, &device, BinaryFileName, options); else CreateProgramFromBinary(pContext, &device, BinaryFileName); } void TearDown() override { knownSource.reset(); ProgramFixture::TearDown(); ContextFixture::TearDown(); DeviceFixture::TearDown(); } void setOptions(std::string &optionsIn) { options = optionsIn; } const char *BinaryFileName = nullptr; const char *KernelName = nullptr; cl_int retVal = CL_SUCCESS; std::string options; }; //////////////////////////////////////////////////////////////////////////////// // ProgramSimpleFixture Test Fixture // Used to test the Program class, but not using parameters //////////////////////////////////////////////////////////////////////////////// class ProgramSimpleFixture : public DeviceFixture, public ContextFixture, public ProgramFixture { using ContextFixture::SetUp; public: void SetUp() override { DeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); ProgramFixture::SetUp(); } void TearDown() override { knownSource.reset(); ProgramFixture::TearDown(); ContextFixture::TearDown(); DeviceFixture::TearDown(); } protected: cl_int retVal = CL_SUCCESS; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/program/program_nonuniform.cpp000066400000000000000000000233541363734646600277350ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/hash.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/helpers/kernel_binary_helper.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/program/program_from_binary.h" #include "opencl/test/unit_test/program/program_with_source.h" #include "test.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include "program_tests.h" #include #include #include using namespace NEO; class MyMockProgram : public MockProgram { public: MyMockProgram() : MockProgram(*new ExecutionEnvironment()), executionEnvironment(&this->peekExecutionEnvironment()) {} private: std::unique_ptr executionEnvironment; }; TEST(ProgramNonUniform, UpdateAllowNonUniform) { MyMockProgram pm; EXPECT_FALSE(pm.getAllowNonUniform()); EXPECT_EQ(12u, pm.getProgramOptionVersion()); pm.setBuildOptions(nullptr); pm.updateNonUniformFlag(); EXPECT_FALSE(pm.getAllowNonUniform()); EXPECT_EQ(12u, pm.getProgramOptionVersion()); } TEST(ProgramNonUniform, UpdateAllowNonUniform12) { MyMockProgram pm; EXPECT_FALSE(pm.getAllowNonUniform()); EXPECT_EQ(12u, pm.getProgramOptionVersion()); pm.setBuildOptions("-cl-std=CL1.2"); pm.updateNonUniformFlag(); EXPECT_FALSE(pm.getAllowNonUniform()); EXPECT_EQ(12u, pm.getProgramOptionVersion()); } TEST(ProgramNonUniform, UpdateAllowNonUniform20) { MyMockProgram pm; EXPECT_FALSE(pm.getAllowNonUniform()); EXPECT_EQ(12u, pm.getProgramOptionVersion()); pm.setBuildOptions("-cl-std=CL2.0"); pm.updateNonUniformFlag(); EXPECT_TRUE(pm.getAllowNonUniform()); EXPECT_EQ(20u, pm.getProgramOptionVersion()); } TEST(ProgramNonUniform, UpdateAllowNonUniform21) { MyMockProgram pm; EXPECT_FALSE(pm.getAllowNonUniform()); EXPECT_EQ(12u, pm.getProgramOptionVersion()); pm.setBuildOptions("-cl-std=CL2.1"); pm.updateNonUniformFlag(); EXPECT_TRUE(pm.getAllowNonUniform()); EXPECT_EQ(21u, pm.getProgramOptionVersion()); } TEST(ProgramNonUniform, UpdateAllowNonUniform20UniformFlag) { MyMockProgram pm; EXPECT_FALSE(pm.getAllowNonUniform()); EXPECT_EQ(12u, pm.getProgramOptionVersion()); pm.setBuildOptions("-cl-std=CL2.0 -cl-uniform-work-group-size"); pm.updateNonUniformFlag(); EXPECT_FALSE(pm.getAllowNonUniform()); EXPECT_EQ(20u, pm.getProgramOptionVersion()); } TEST(ProgramNonUniform, UpdateAllowNonUniform21UniformFlag) { MyMockProgram pm; EXPECT_FALSE(pm.getAllowNonUniform()); EXPECT_EQ(12u, pm.getProgramOptionVersion()); pm.setBuildOptions("-cl-std=CL2.1 -cl-uniform-work-group-size"); pm.updateNonUniformFlag(); EXPECT_FALSE(pm.getAllowNonUniform()); EXPECT_EQ(21u, pm.getProgramOptionVersion()); } TEST(KernelNonUniform, GetAllowNonUniformFlag) { KernelInfo ki; MockClDevice d{new MockDevice}; MockProgram pm(*d.getExecutionEnvironment()); struct KernelMock : Kernel { KernelMock(Program *p, KernelInfo &ki, ClDevice &d) : Kernel(p, ki, d) { } }; KernelMock k{&pm, ki, d}; pm.setAllowNonUniform(false); EXPECT_FALSE(k.getAllowNonUniform()); pm.setAllowNonUniform(true); EXPECT_TRUE(k.getAllowNonUniform()); pm.setAllowNonUniform(false); EXPECT_FALSE(k.getAllowNonUniform()); } TEST(ProgramNonUniform, UpdateAllowNonUniformOutcomeUniformFlag) { ExecutionEnvironment executionEnvironment; MockProgram pm(executionEnvironment); MockProgram pm1(executionEnvironment); MockProgram pm2(executionEnvironment); const MockProgram *inputPrograms[] = {&pm1, &pm2}; cl_uint numInputPrograms = 2; pm1.setAllowNonUniform(false); pm2.setAllowNonUniform(false); pm.updateNonUniformFlag((const Program **)inputPrograms, numInputPrograms); EXPECT_FALSE(pm.getAllowNonUniform()); pm1.setAllowNonUniform(false); pm2.setAllowNonUniform(true); pm.updateNonUniformFlag((const Program **)inputPrograms, numInputPrograms); EXPECT_FALSE(pm.getAllowNonUniform()); pm1.setAllowNonUniform(true); pm2.setAllowNonUniform(false); pm.updateNonUniformFlag((const Program **)inputPrograms, numInputPrograms); EXPECT_FALSE(pm.getAllowNonUniform()); pm1.setAllowNonUniform(true); pm2.setAllowNonUniform(true); pm.updateNonUniformFlag((const Program **)inputPrograms, numInputPrograms); EXPECT_TRUE(pm.getAllowNonUniform()); } #include "opencl/source/kernel/kernel.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/fixtures/program_fixture.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include namespace NEO { class ProgramNonUniformTest : public ContextFixture, public PlatformFixture, public ProgramFixture, public CommandQueueHwFixture, public testing::Test { using ContextFixture::SetUp; using PlatformFixture::SetUp; protected: ProgramNonUniformTest() { } void SetUp() override { PlatformFixture::SetUp(); device = pPlatform->getClDevice(0); ContextFixture::SetUp(1, &device); ProgramFixture::SetUp(); CommandQueueHwFixture::SetUp(pPlatform->getClDevice(0), 0); } void TearDown() override { CommandQueueHwFixture::TearDown(); ProgramFixture::TearDown(); ContextFixture::TearDown(); PlatformFixture::TearDown(); } cl_device_id device; cl_int retVal = CL_SUCCESS; }; TEST_F(ProgramNonUniformTest, ExecuteKernelNonUniform21) { if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.1") != std::string::npos) { CreateProgramFromBinary(pContext, &device, "kernel_data_param"); auto mockProgram = (MockProgram *)pProgram; ASSERT_NE(nullptr, mockProgram); mockProgram->setBuildOptions("-cl-std=CL2.1"); retVal = mockProgram->build( 1, &device, nullptr, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto pKernelInfo = mockProgram->Program::getKernelInfo("test_get_local_size"); EXPECT_NE(nullptr, pKernelInfo); // create a kernel auto pKernel = Kernel::create(mockProgram, *pKernelInfo, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pKernel); size_t globalWorkSize[3] = {12, 12, 12}; size_t localWorkSize[3] = {11, 12, 1}; retVal = pCmdQ->enqueueKernel( pKernel, 3, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); delete pKernel; } } TEST_F(ProgramNonUniformTest, ExecuteKernelNonUniform20) { if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.0") != std::string::npos) { CreateProgramFromBinary(pContext, &device, "kernel_data_param"); auto mockProgram = pProgram; ASSERT_NE(nullptr, mockProgram); mockProgram->setBuildOptions("-cl-std=CL2.0"); retVal = mockProgram->build( 1, &device, nullptr, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto pKernelInfo = mockProgram->Program::getKernelInfo("test_get_local_size"); EXPECT_NE(nullptr, pKernelInfo); // create a kernel auto pKernel = Kernel::create(mockProgram, *pKernelInfo, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pKernel); size_t globalWorkSize[3] = {12, 12, 12}; size_t localWorkSize[3] = {11, 12, 12}; retVal = pCmdQ->enqueueKernel( pKernel, 3, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); delete pKernel; } } TEST_F(ProgramNonUniformTest, ExecuteKernelNonUniform12) { CreateProgramFromBinary(pContext, &device, "kernel_data_param"); auto mockProgram = pProgram; ASSERT_NE(nullptr, mockProgram); mockProgram->setBuildOptions("-cl-std=CL1.2"); retVal = mockProgram->build( 1, &device, nullptr, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto pKernelInfo = mockProgram->Program::getKernelInfo("test_get_local_size"); EXPECT_NE(nullptr, pKernelInfo); // create a kernel auto pKernel = Kernel::create(mockProgram, *pKernelInfo, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pKernel); size_t globalWorkSize[3] = {12, 12, 12}; size_t localWorkSize[3] = {11, 12, 12}; retVal = pCmdQ->enqueueKernel( pKernel, 3, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, retVal); delete pKernel; } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/program/program_spec_constants_tests.cpp000066400000000000000000000076401363734646600320130ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/compiler_interface/compiler_interface.inl" #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/hw_info.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/global_environment.h" #include "opencl/test/unit_test/helpers/test_files.h" #include "opencl/test/unit_test/mocks/mock_cif.h" #include "opencl/test/unit_test/mocks/mock_compilers.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "gmock/gmock.h" #include using namespace NEO; struct UpdateSpecConstantsTest : public ::testing::Test { void SetUp() override { mockProgram.reset(new MockProgram(executionEnvironment)); mockProgram->specConstantsIds.reset(new MockCIFBuffer()); mockProgram->specConstantsSizes.reset(new MockCIFBuffer()); mockProgram->specConstantsIds->PushBackRawCopy(id1); mockProgram->specConstantsIds->PushBackRawCopy(id2); mockProgram->specConstantsIds->PushBackRawCopy(id3); uint32_t size1 = sizeof(char), size2 = sizeof(uint16_t), size3 = sizeof(int); mockProgram->specConstantsSizes->PushBackRawCopy(size1); mockProgram->specConstantsSizes->PushBackRawCopy(size2); mockProgram->specConstantsSizes->PushBackRawCopy(size3); mockProgram->specConstantsValues.insert({id1, static_cast(val1)}); mockProgram->specConstantsValues.insert({id2, static_cast(val2)}); mockProgram->specConstantsValues.insert({id3, static_cast(val3)}); values = &mockProgram->specConstantsValues; EXPECT_EQ(val1, static_cast(values->at(id1))); EXPECT_EQ(val2, static_cast(values->at(id2))); EXPECT_EQ(val3, static_cast(values->at(id3))); } ExecutionEnvironment executionEnvironment; std::unique_ptr mockProgram; uint32_t id1 = 1u; uint32_t id2 = 2u; uint32_t id3 = 3u; char val1 = 5; uint16_t val2 = 50; int val3 = 500; specConstValuesMap *values; }; TEST_F(UpdateSpecConstantsTest, givenNewSpecConstValueWhenUpdateSpecializationConstantThenProperValueIsCopiedAndUpdated) { int newSpecConstVal3 = 5000; auto ret = mockProgram->updateSpecializationConstant(3, sizeof(int), &newSpecConstVal3); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(val1, static_cast(values->at(id1))); EXPECT_EQ(val2, static_cast(values->at(id2))); EXPECT_EQ(newSpecConstVal3, static_cast(values->at(id3))); newSpecConstVal3 = 50000; EXPECT_NE(newSpecConstVal3, static_cast(values->at(id3))); ret = mockProgram->updateSpecializationConstant(3, sizeof(int), &newSpecConstVal3); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(newSpecConstVal3, static_cast(values->at(id3))); } TEST_F(UpdateSpecConstantsTest, givenNewSpecConstValueWithUnproperSizeWhenUpdateSpecializationConstantThenErrorIsReturned) { int newSpecConstVal3 = 5000; auto ret = mockProgram->updateSpecializationConstant(3, 10 * sizeof(int), &newSpecConstVal3); EXPECT_EQ(CL_INVALID_VALUE, ret); EXPECT_EQ(val1, static_cast(values->at(id1))); EXPECT_EQ(val2, static_cast(values->at(id2))); EXPECT_EQ(val3, static_cast(values->at(id3))); } TEST_F(UpdateSpecConstantsTest, givenNewSpecConstValueWithUnproperIdAndSizeWhenUpdateSpecializationConstantThenErrorIsReturned) { int newSpecConstVal3 = 5000; auto ret = mockProgram->updateSpecializationConstant(4, sizeof(int), &newSpecConstVal3); EXPECT_EQ(CL_INVALID_SPEC_ID, ret); EXPECT_EQ(val1, static_cast(values->at(id1))); EXPECT_EQ(val2, static_cast(values->at(id2))); EXPECT_EQ(val3, static_cast(values->at(id3))); } compute-runtime-20.13.16352/opencl/test/unit_test/program/program_tests.cpp000066400000000000000000003671521363734646600267140ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/program/program_tests.h" #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/compiler_interface/intermediate_representations.h" #include "shared/source/device_binary_format/elf/elf_decoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/device_binary_format/patchtokens_decoder.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/hash.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/unit_test/device_binary_format/patchtokens_tests.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/gtpin/gtpin_notify.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/program/create.inl" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/global_environment.h" #include "opencl/test/unit_test/helpers/kernel_binary_helper.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_allocation_properties.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/program/program_from_binary.h" #include "opencl/test/unit_test/program/program_with_source.h" #include "test.h" #include "compiler_options.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include #include #include #include using namespace NEO; void ProgramTests::SetUp() { DeviceFixture::SetUp(); cl_device_id device = pClDevice; ContextFixture::SetUp(1, &device); } void ProgramTests::TearDown() { ContextFixture::TearDown(); DeviceFixture::TearDown(); } void CL_CALLBACK notifyFunc( cl_program program, void *userData) { *((char *)userData) = 'a'; } std::vector BinaryFileNames{ "CopyBuffer_simd32", }; std::vector SourceFileNames{ "CopyBuffer_simd16.cl", }; std::vector BinaryForSourceFileNames{ "CopyBuffer_simd16", }; std::vector KernelNames{ "CopyBuffer", }; class NoCompilerInterfaceRootDeviceEnvironment : public RootDeviceEnvironment { public: NoCompilerInterfaceRootDeviceEnvironment(ExecutionEnvironment &executionEnvironment) : RootDeviceEnvironment(executionEnvironment) { } CompilerInterface *getCompilerInterface() override { return nullptr; } }; class FailingGenBinaryProgram : public MockProgram { public: FailingGenBinaryProgram(ExecutionEnvironment &executionEnvironment) : MockProgram(executionEnvironment) {} cl_int processGenBinary() override { return CL_INVALID_BINARY; } }; class SucceedingGenBinaryProgram : public MockProgram { public: SucceedingGenBinaryProgram(ExecutionEnvironment &executionEnvironment) : MockProgram(executionEnvironment) {} cl_int processGenBinary() override { return CL_SUCCESS; } }; TEST_P(ProgramFromBinaryTest, WhenBuildingProgramThenSuccessIsReturned) { cl_device_id device = pClDevice; retVal = pProgram->build( 1, &device, nullptr, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_P(ProgramFromBinaryTest, WhenGettingProgramContextInfoThenCorrectContextIsReturned) { cl_context contextRet = reinterpret_cast(static_cast(0xdeaddead)); size_t paramValueSizeRet = 0; retVal = pProgram->getInfo( CL_PROGRAM_CONTEXT, sizeof(cl_context), &contextRet, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(pContext, contextRet); EXPECT_EQ(sizeof(cl_context), paramValueSizeRet); } TEST_P(ProgramFromBinaryTest, GivenNonNullParamValueWhenGettingProgramBinaryInfoThenCorrectBinaryIsReturned) { size_t paramValueSize = sizeof(unsigned char **); size_t paramValueSizeRet = 0; auto testBinary = std::make_unique(knownSourceSize); retVal = pProgram->getInfo( CL_PROGRAM_BINARIES, paramValueSize, &testBinary, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSize, paramValueSizeRet); EXPECT_STREQ((const char *)knownSource.get(), (const char *)testBinary.get()); } TEST_P(ProgramFromBinaryTest, GivenNullParamValueWhenGettingProgramBinaryInfoThenSuccessIsReturned) { size_t paramValueSize = sizeof(unsigned char **); size_t paramValueSizeRet = 0; retVal = pProgram->getInfo( CL_PROGRAM_BINARIES, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSize, paramValueSizeRet); } TEST_P(ProgramFromBinaryTest, GivenNonNullParamValueAndParamValueSizeZeroWhenGettingProgramBinaryInfoThenInvalidValueErrorIsReturned) { size_t paramValueSizeRet = 0; auto testBinary = std::make_unique(knownSourceSize); retVal = pProgram->getInfo( CL_PROGRAM_BINARIES, 0, &testBinary, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_P(ProgramFromBinaryTest, GivenInvalidParamWhenGettingProgramBinaryInfoThenInvalidValueErrorIsReturned) { size_t paramValueSizeRet = 0; auto testBinary = std::make_unique(knownSourceSize); retVal = pProgram->getInfo( CL_PROGRAM_BUILD_STATUS, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_P(ProgramFromBinaryTest, WhenGettingBinarySizesThenCorrectSizesAreReturned) { size_t paramValueSize = sizeof(size_t *); size_t paramValue[1]; size_t paramValueSizeRet = 0; retVal = pProgram->getInfo( CL_PROGRAM_BINARY_SIZES, paramValueSize, paramValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(knownSourceSize, paramValue[0]); EXPECT_EQ(paramValueSize, paramValueSizeRet); } TEST_P(ProgramFromBinaryTest, GivenProgramWithOneKernelWhenGettingNumKernelsThenOneIsReturned) { size_t paramValue = 0; size_t paramValueSize = sizeof(paramValue); size_t paramValueSizeRet = 0; cl_device_id device = pClDevice; retVal = pProgram->build( 1, &device, nullptr, nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); retVal = pProgram->getInfo( CL_PROGRAM_NUM_KERNELS, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(1u, paramValue); EXPECT_EQ(paramValueSize, paramValueSizeRet); } TEST_P(ProgramFromBinaryTest, GivenProgramWithNoExecutableCodeWhenGettingNumKernelsThenInvalidProgramExecutableErrorIsReturned) { size_t paramValue = 0; size_t paramValueSize = sizeof(paramValue); size_t paramValueSizeRet = 0; cl_device_id device = pClDevice; CreateProgramFromBinary(pContext, &device, BinaryFileName); MockProgram *p = pProgram; p->SetBuildStatus(CL_BUILD_NONE); retVal = pProgram->getInfo( CL_PROGRAM_NUM_KERNELS, paramValueSize, ¶mValue, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_PROGRAM_EXECUTABLE, retVal); } TEST_P(ProgramFromBinaryTest, WhenGettingKernelNamesThenCorrectNameIsReturned) { size_t paramValueSize = sizeof(size_t *); size_t paramValueSizeRet = 0; cl_device_id device = pClDevice; retVal = pProgram->build( 1, &device, nullptr, nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); // get info successfully about required sizes for kernel names retVal = pProgram->getInfo( CL_PROGRAM_KERNEL_NAMES, 0, nullptr, ¶mValueSizeRet); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(0u, paramValueSizeRet); // get info successfully about kernel names auto paramValue = std::make_unique(paramValueSizeRet); paramValueSize = paramValueSizeRet; ASSERT_NE(paramValue, nullptr); size_t expectedKernelsStringSize = strlen(KernelName) + 1; retVal = pProgram->getInfo( CL_PROGRAM_KERNEL_NAMES, paramValueSize, paramValue.get(), ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_STREQ(KernelName, (char *)paramValue.get()); EXPECT_EQ(expectedKernelsStringSize, paramValueSizeRet); } TEST_P(ProgramFromBinaryTest, GivenProgramWithNoExecutableCodeWhenGettingKernelNamesThenInvalidProgramExecutableErrorIsReturned) { size_t paramValueSize = sizeof(size_t *); size_t paramValueSizeRet = 0; cl_device_id device = pClDevice; CreateProgramFromBinary(pContext, &device, BinaryFileName); MockProgram *p = pProgram; p->SetBuildStatus(CL_BUILD_NONE); retVal = pProgram->getInfo( CL_PROGRAM_KERNEL_NAMES, paramValueSize, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_PROGRAM_EXECUTABLE, retVal); } TEST_P(ProgramFromBinaryTest, GivenInvalidDeviceWhenGettingBuildStatusThenInvalidDeviceErrorIsReturned) { cl_build_status buildStatus = 0; size_t paramValueSize = sizeof(buildStatus); size_t paramValueSizeRet = 0; size_t invalidDevice = 0xdeadbee0; retVal = pProgram->getBuildInfo( reinterpret_cast(invalidDevice), CL_PROGRAM_BUILD_STATUS, paramValueSize, &buildStatus, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_P(ProgramFromBinaryTest, GivenCorruptedDeviceWhenGettingBuildStatusThenInvalidDiveErrorIsReturned) { cl_build_status buildStatus = 0; size_t paramValueSize = sizeof(buildStatus); size_t paramValueSizeRet = 0; cl_device_id device = pClDevice; CreateProgramFromBinary(pContext, &device, BinaryFileName); MockProgram *p = pProgram; p->SetDevice(&pClDevice->getDevice()); retVal = pProgram->getBuildInfo( reinterpret_cast(pContext), CL_PROGRAM_BUILD_STATUS, paramValueSize, &buildStatus, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_DEVICE, retVal); } TEST_P(ProgramFromBinaryTest, GivenNullDeviceWhenGettingBuildStatusThenBuildNoneIsReturned) { cl_device_id device = pClDevice; cl_build_status buildStatus = 0; size_t paramValueSize = sizeof(buildStatus); size_t paramValueSizeRet = 0; retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BUILD_STATUS, paramValueSize, &buildStatus, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSize, paramValueSizeRet); EXPECT_EQ(CL_BUILD_NONE, buildStatus); } TEST_P(ProgramFromBinaryTest, GivenDefaultDeviceWhenGettingBuildOptionsThenBuildOptionsAreEmpty) { cl_device_id device = pClDevice; size_t paramValueSizeRet = 0u; size_t paramValueSize = 0u; retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BUILD_OPTIONS, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(paramValueSizeRet, 0u); auto paramValue = std::make_unique(paramValueSizeRet); paramValueSize = paramValueSizeRet; retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BUILD_OPTIONS, paramValueSize, paramValue.get(), ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_STREQ("", (char *)paramValue.get()); } TEST_P(ProgramFromBinaryTest, GivenDefaultDeviceWhenGettingLogThenLogEmpty) { cl_device_id device = pClDevice; size_t paramValueSizeRet = 0u; size_t paramValueSize = 0u; retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BUILD_LOG, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(paramValueSizeRet, 0u); auto paramValue = std::make_unique(paramValueSizeRet); paramValueSize = paramValueSizeRet; retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BUILD_LOG, paramValueSize, paramValue.get(), ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_STREQ("", (char *)paramValue.get()); } TEST_P(ProgramFromBinaryTest, GivenLogEntriesWhenGetBuildLogThenLogIsApended) { cl_device_id device = pClDevice; size_t paramValueSizeRet = 0u; size_t paramValueSize = 0u; retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BUILD_LOG, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(paramValueSizeRet, 0u); auto paramValue = std::make_unique(paramValueSizeRet); paramValueSize = paramValueSizeRet; retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BUILD_LOG, paramValueSize, paramValue.get(), ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_STREQ("", (char *)paramValue.get()); // Add more text to the log pProgram->updateBuildLog(&pClDevice->getDevice(), "testing", 8); pProgram->updateBuildLog(&pClDevice->getDevice(), "several", 8); retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BUILD_LOG, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_GE(paramValueSizeRet, 16u); paramValue = std::make_unique(paramValueSizeRet); paramValueSize = paramValueSizeRet; retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BUILD_LOG, paramValueSize, paramValue.get(), ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, strstr(paramValue.get(), "testing")); const char *paramValueContinued = strstr(paramValue.get(), "testing") + 7; ASSERT_NE(nullptr, strstr(paramValueContinued, "several")); } TEST_P(ProgramFromBinaryTest, GivenNullParamValueWhenGettingProgramBinaryTypeThenParamValueSizeIsReturned) { cl_device_id device = pClDevice; size_t paramValueSizeRet = 0u; size_t paramValueSize = 0u; retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BINARY_TYPE, paramValueSize, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(paramValueSizeRet, 0u); } TEST_P(ProgramFromBinaryTest, WhenGettingProgramBinaryTypeThenCorrectProgramTypeIsReturned) { cl_device_id device = pClDevice; cl_program_binary_type programType = 0; char *paramValue = (char *)&programType; size_t paramValueSizeRet = 0u; size_t paramValueSize = 0u; retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BINARY_TYPE, paramValueSize, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(paramValueSizeRet, 0u); paramValueSize = paramValueSizeRet; retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BINARY_TYPE, paramValueSize, paramValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ((cl_program_binary_type)CL_PROGRAM_BINARY_TYPE_EXECUTABLE, programType); } TEST_P(ProgramFromBinaryTest, GivenInvalidParamWhenGettingBuildInfoThenInvalidValueErrorIsReturned) { cl_device_id device = pClDevice; size_t paramValueSizeRet = 0u; retVal = pProgram->getBuildInfo( device, CL_PROGRAM_KERNEL_NAMES, 0, nullptr, ¶mValueSizeRet); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_P(ProgramFromBinaryTest, GivenGlobalVariableTotalSizeSetWhenGettingBuildGlobalVariableTotalSizeThenCorrectSizeIsReturned) { cl_device_id device = pClDevice; size_t globalVarSize = 22; size_t paramValueSize = sizeof(globalVarSize); size_t paramValueSizeRet = 0; char *paramValue = (char *)&globalVarSize; // get build info as is retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, paramValueSize, paramValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSizeRet, sizeof(globalVarSize)); EXPECT_EQ(globalVarSize, 0u); // Set GlobalVariableTotalSize as 1024 CreateProgramFromBinary(pContext, &device, BinaryFileName); MockProgram *p = pProgram; p->SetGlobalVariableTotalSize(1024u); // get build info once again retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, paramValueSize, paramValue, ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSizeRet, sizeof(globalVarSize)); EXPECT_EQ(globalVarSize, 1024u); } TEST_P(ProgramFromBinaryTest, givenProgramWhenItIsBeingBuildThenItContainsGraphicsAllocationInKernelInfo) { cl_device_id device = pClDevice; pProgram->build(1, &device, nullptr, nullptr, nullptr, true); auto kernelInfo = pProgram->getKernelInfo(size_t(0)); auto graphicsAllocation = kernelInfo->getGraphicsAllocation(); ASSERT_NE(nullptr, graphicsAllocation); EXPECT_TRUE(graphicsAllocation->is32BitAllocation()); EXPECT_EQ(graphicsAllocation->getUnderlyingBufferSize(), kernelInfo->heapInfo.pKernelHeader->KernelHeapSize); auto kernelIsa = graphicsAllocation->getUnderlyingBuffer(); EXPECT_NE(kernelInfo->heapInfo.pKernelHeap, kernelIsa); EXPECT_EQ(0, memcmp(kernelIsa, kernelInfo->heapInfo.pKernelHeap, kernelInfo->heapInfo.pKernelHeader->KernelHeapSize)); auto rootDeviceIndex = graphicsAllocation->getRootDeviceIndex(); EXPECT_EQ(GmmHelper::decanonize(graphicsAllocation->getGpuBaseAddress()), pProgram->getDevice().getMemoryManager()->getInternalHeapBaseAddress(rootDeviceIndex)); } TEST_P(ProgramFromBinaryTest, whenProgramIsBeingRebuildThenOutdatedGlobalBuffersAreFreed) { cl_device_id device = pClDevice; pProgram->build(1, &device, nullptr, nullptr, nullptr, true); EXPECT_EQ(nullptr, pProgram->constantSurface); EXPECT_EQ(nullptr, pProgram->globalSurface); pProgram->constantSurface = new MockGraphicsAllocation(); pProgram->processGenBinary(); EXPECT_EQ(nullptr, pProgram->constantSurface); EXPECT_EQ(nullptr, pProgram->globalSurface); pProgram->globalSurface = new MockGraphicsAllocation(); pProgram->processGenBinary(); EXPECT_EQ(nullptr, pProgram->constantSurface); EXPECT_EQ(nullptr, pProgram->globalSurface); } TEST_P(ProgramFromBinaryTest, givenProgramWhenCleanKernelInfoIsCalledThenKernelAllocationIsFreed) { cl_device_id device = pClDevice; pProgram->build(1, &device, nullptr, nullptr, nullptr, true); EXPECT_EQ(1u, pProgram->getNumKernels()); pProgram->cleanCurrentKernelInfo(); EXPECT_EQ(0u, pProgram->getNumKernels()); } HWTEST_P(ProgramFromBinaryTest, givenProgramWhenCleanCurrentKernelInfoIsCalledButGpuIsNotYetDoneThenKernelAllocationIsPutOnDefferedFreeListAndCsrRegistersCacheFlush) { cl_device_id device = pClDevice; auto &csr = pDevice->getGpgpuCommandStreamReceiver(); EXPECT_TRUE(csr.getTemporaryAllocations().peekIsEmpty()); pProgram->build(1, &device, nullptr, nullptr, nullptr, true); auto kernelAllocation = pProgram->getKernelInfo(size_t(0))->getGraphicsAllocation(); kernelAllocation->updateTaskCount(100, csr.getOsContext().getContextId()); *csr.getTagAddress() = 0; pProgram->cleanCurrentKernelInfo(); EXPECT_FALSE(csr.getTemporaryAllocations().peekIsEmpty()); EXPECT_EQ(csr.getTemporaryAllocations().peekHead(), kernelAllocation); EXPECT_TRUE(this->pDevice->getUltCommandStreamReceiver().requiresInstructionCacheFlush); } HWTEST_P(ProgramFromBinaryTest, givenIsaAllocationUsedByMultipleCsrsWhenItIsDeletedItRegistersCacheFlushInEveryCsrThatUsedIt) { auto &csr0 = this->pDevice->getUltCommandStreamReceiverFromIndex(0u); auto &csr1 = this->pDevice->getUltCommandStreamReceiverFromIndex(1u); cl_device_id device = pClDevice; pProgram->build(1, &device, nullptr, nullptr, nullptr, true); auto kernelAllocation = pProgram->getKernelInfo(size_t(0))->getGraphicsAllocation(); csr0.makeResident(*kernelAllocation); csr1.makeResident(*kernelAllocation); csr0.processResidency(csr0.getResidencyAllocations(), 0u); csr1.processResidency(csr1.getResidencyAllocations(), 0u); csr0.makeNonResident(*kernelAllocation); csr1.makeNonResident(*kernelAllocation); EXPECT_FALSE(csr0.requiresInstructionCacheFlush); EXPECT_FALSE(csr1.requiresInstructionCacheFlush); pProgram->cleanCurrentKernelInfo(); EXPECT_TRUE(csr0.requiresInstructionCacheFlush); EXPECT_TRUE(csr1.requiresInstructionCacheFlush); } TEST_P(ProgramFromSourceTest, GivenSpecificParamatersWhenBuildingProgramThenSuccessOrCorrectErrorCodeIsReturned) { KernelBinaryHelper kbHelper(BinaryFileName, true); auto device = pPlatform->getClDevice(0); cl_device_id deviceList = {0}; char data[4] = {0}; cl_device_id usedDevice = pPlatform->getClDevice(0); CreateProgramWithSource( pContext, &usedDevice, SourceFileName); // Order of following microtests is important - do not change. // Add new microtests at end. auto pMockProgram = pProgram; // invalid build parameters: combinations of numDevices & deviceList retVal = pProgram->build(1, nullptr, nullptr, nullptr, nullptr, false); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = pProgram->build(0, &deviceList, nullptr, nullptr, nullptr, false); EXPECT_EQ(CL_INVALID_VALUE, retVal); // invalid build parameters: combinations of funcNotify & userData retVal = pProgram->build(0, nullptr, nullptr, nullptr, &data[0], false); EXPECT_EQ(CL_INVALID_VALUE, retVal); // invalid build parameters: invalid content of deviceList retVal = pProgram->build(1, &deviceList, nullptr, nullptr, nullptr, false); EXPECT_EQ(CL_INVALID_DEVICE, retVal); // fail build - another build is already in progress pMockProgram->SetBuildStatus(CL_BUILD_IN_PROGRESS); retVal = pProgram->build(0, nullptr, nullptr, nullptr, nullptr, false); EXPECT_EQ(CL_INVALID_OPERATION, retVal); pMockProgram->SetBuildStatus(CL_BUILD_NONE); // fail build - CompilerInterface cannot be obtained auto executionEnvironment = device->getExecutionEnvironment(); std::unique_ptr rootDeviceEnvironment = std::make_unique(*executionEnvironment); std::swap(rootDeviceEnvironment, executionEnvironment->rootDeviceEnvironments[device->getRootDeviceIndex()]); auto p2 = std::make_unique(*executionEnvironment); p2->setDevice(&device->getDevice()); retVal = p2->build(0, nullptr, nullptr, nullptr, nullptr, false); EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal); p2.reset(nullptr); std::swap(rootDeviceEnvironment, executionEnvironment->rootDeviceEnvironments[device->getRootDeviceIndex()]); // fail build - any build error (here caused by specifying unrecognized option) retVal = pProgram->build(0, nullptr, "-invalid-option", nullptr, nullptr, false); EXPECT_EQ(CL_BUILD_PROGRAM_FAILURE, retVal); // fail build - linked code is corrupted and cannot be postprocessed auto p3 = std::make_unique(*executionEnvironment); p3->setDevice(&device->getDevice()); std::string testFile; size_t sourceSize; testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); // source file auto pSourceBuffer = loadDataFromFile(testFile.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSourceBuffer); p3->sourceCode = pSourceBuffer.get(); p3->createdFrom = Program::CreatedFrom::SOURCE; retVal = p3->build(0, nullptr, nullptr, nullptr, nullptr, false); EXPECT_EQ(CL_INVALID_BINARY, retVal); p3.reset(nullptr); // build successfully without notifyFunc - build kernel and write it to Kernel Cache pMockProgram->ClearOptions(); retVal = pProgram->build(0, nullptr, nullptr, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(CompilerOptions::contains(pProgram->getInternalOptions(), pPlatform->getClDevice(0)->peekCompilerExtensions())) << pProgram->getInternalOptions(); // get build log size_t param_value_size_ret = 0u; retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BUILD_LOG, 0, nullptr, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(param_value_size_ret, 0u); // get build log when the log does not exist pMockProgram->ClearLog(); retVal = pProgram->getBuildInfo( device, CL_PROGRAM_BUILD_LOG, 0, nullptr, ¶m_value_size_ret); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(param_value_size_ret, 0u); // build successfully without notifyFunc - build kernel but do not write it to Kernel Cache (kernel is already in the Cache) pMockProgram->SetBuildStatus(CL_BUILD_NONE); retVal = pProgram->build(0, nullptr, nullptr, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); // build successfully with notifyFunc - duplicate build (kernel already built), do not build and just take it retVal = pProgram->build(0, nullptr, nullptr, notifyFunc, &data[0], false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ('a', data[0]); // build successfully without notifyFunc - kernel is already in Kernel Cache, do not build and take it from Cache retVal = pProgram->build(0, nullptr, nullptr, nullptr, nullptr, true); EXPECT_EQ(CL_SUCCESS, retVal); // fail build - code to be build does not exist pMockProgram->sourceCode = ""; // set source code as non-existent (invalid) pMockProgram->createdFrom = Program::CreatedFrom::SOURCE; pMockProgram->SetBuildStatus(CL_BUILD_NONE); pMockProgram->SetCreatedFromBinary(false); retVal = pProgram->build(0, nullptr, nullptr, nullptr, nullptr, false); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); } TEST_P(ProgramFromSourceTest, CreateWithSource_Build_Options_Duplicate) { KernelBinaryHelper kbHelper(BinaryFileName, false); retVal = pProgram->build(0, nullptr, nullptr, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pProgram->build(0, nullptr, CompilerOptions::fastRelaxedMath, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pProgram->build(0, nullptr, CompilerOptions::fastRelaxedMath, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pProgram->build(0, nullptr, CompilerOptions::finiteMathOnly, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pProgram->build(0, nullptr, nullptr, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); } class Callback { public: Callback() { this->oldCallback = MemoryManagement::deleteCallback; MemoryManagement::deleteCallback = thisCallback; } ~Callback() { MemoryManagement::deleteCallback = this->oldCallback; } static void watch(const void *p) { watchList[p] = 0u; } static void unwatch(const void *p) { EXPECT_GT(watchList[p], 0u); watchList.erase(p); } private: void (*oldCallback)(void *); static void thisCallback(void *p) { if (watchList.find(p) != watchList.end()) watchList[p]++; } static std::map watchList; }; std::map Callback::watchList; TEST_P(ProgramFromSourceTest, GivenDifferentCommpilerOptionsWhenBuildingProgramThenKernelHashesAreDifferent) { KernelBinaryHelper kbHelper(BinaryFileName, true); cl_device_id usedDevice = pPlatform->getClDevice(0); CreateProgramWithSource( pContext, &usedDevice, SourceFileName); Callback callback; retVal = pProgram->build(0, nullptr, nullptr, nullptr, nullptr, true); EXPECT_EQ(CL_SUCCESS, retVal); auto hash1 = pProgram->getCachedFileName(); auto kernel1 = pProgram->getKernelInfo("CopyBuffer"); Callback::watch(kernel1); EXPECT_NE(nullptr, kernel1); retVal = pProgram->build(0, nullptr, CompilerOptions::fastRelaxedMath, nullptr, nullptr, true); EXPECT_EQ(CL_SUCCESS, retVal); auto hash2 = pProgram->getCachedFileName(); auto kernel2 = pProgram->getKernelInfo("CopyBuffer"); EXPECT_NE(nullptr, kernel2); EXPECT_NE(hash1, hash2); Callback::unwatch(kernel1); Callback::watch(kernel2); retVal = pProgram->build(0, nullptr, CompilerOptions::finiteMathOnly, nullptr, nullptr, true); EXPECT_EQ(CL_SUCCESS, retVal); auto hash3 = pProgram->getCachedFileName(); auto kernel3 = pProgram->getKernelInfo("CopyBuffer"); EXPECT_NE(nullptr, kernel3); EXPECT_NE(hash1, hash3); EXPECT_NE(hash2, hash3); Callback::unwatch(kernel2); Callback::watch(kernel3); retVal = pProgram->build(0, nullptr, nullptr, nullptr, nullptr, true); EXPECT_EQ(CL_SUCCESS, retVal); auto hash4 = pProgram->getCachedFileName(); auto kernel4 = pProgram->getKernelInfo("CopyBuffer"); EXPECT_NE(nullptr, kernel4); EXPECT_EQ(hash1, hash4); Callback::unwatch(kernel3); } TEST_P(ProgramFromSourceTest, GivenEmptyProgramWhenCreatingProgramThenInvalidValueErrorIsReturned) { auto p = Program::create(pContext, 0, nullptr, nullptr, retVal); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_EQ(nullptr, p); delete p; } TEST_P(ProgramFromSourceTest, GivenSpecificParamatersWhenCompilingProgramThenSuccessOrCorrectErrorCodeIsReturned) { cl_device_id usedDevice = pPlatform->getClDevice(0); CreateProgramWithSource( pContext, &usedDevice, SourceFileName); auto *p = (MockProgram *)pProgram; cl_device_id deviceList = {0}; cl_program inputHeaders; const char *headerIncludeNames = ""; cl_program nullprogram = nullptr; cl_program invprogram = (cl_program)pContext; char data[4]; // Order of following microtests is important - do not change. // Add new microtests at end. // invalid compile parameters: combinations of numDevices & deviceList retVal = pProgram->compile(1, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = pProgram->compile(0, &deviceList, nullptr, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); // invalid compile parameters: combinations of numInputHeaders==0 & inputHeaders & headerIncludeNames retVal = pProgram->compile(0, nullptr, nullptr, 0, &inputHeaders, nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = pProgram->compile(0, nullptr, nullptr, 0, nullptr, &headerIncludeNames, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); // invalid compile parameters: combinations of numInputHeaders!=0 & inputHeaders & headerIncludeNames retVal = pProgram->compile(0, nullptr, nullptr, 1, &inputHeaders, nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = pProgram->compile(0, nullptr, nullptr, 1, nullptr, &headerIncludeNames, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); // invalid compile parameters: combinations of funcNotify & userData with valid numInputHeaders!=0 & inputHeaders & headerIncludeNames retVal = pProgram->compile(0, nullptr, nullptr, 1, &inputHeaders, &headerIncludeNames, nullptr, &data[0]); EXPECT_EQ(CL_INVALID_VALUE, retVal); // invalid compile parameters: invalid content of deviceList retVal = pProgram->compile(1, &deviceList, nullptr, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_DEVICE, retVal); // fail compilation - another compilation is already in progress p->SetBuildStatus(CL_BUILD_IN_PROGRESS); retVal = pProgram->compile(0, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); p->SetBuildStatus(CL_BUILD_NONE); // invalid compile parameters: invalid header Program object==nullptr retVal = pProgram->compile(0, nullptr, nullptr, 1, &nullprogram, &headerIncludeNames, nullptr, nullptr); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); // invalid compile parameters: invalid header Program object==non Program object retVal = pProgram->compile(0, nullptr, nullptr, 1, &invprogram, &headerIncludeNames, nullptr, nullptr); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); // compile successfully kernel with header std::string testFile; size_t sourceSize; Program *p3; // header Program object testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); // header source file auto pSourceBuffer = loadDataFromFile(testFile.c_str(), sourceSize); EXPECT_NE(0u, sourceSize); EXPECT_NE(nullptr, pSourceBuffer); const char *sources[1] = {pSourceBuffer.get()}; p3 = Program::create(pContext, 1, sources, &sourceSize, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, p3); inputHeaders = p3; retVal = pProgram->compile(0, nullptr, nullptr, 1, &inputHeaders, &headerIncludeNames, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // fail compilation of kernel with header - header is invalid p = (MockProgram *)p3; p->sourceCode = ""; // set header source code as non-existent (invalid) retVal = pProgram->compile(0, nullptr, nullptr, 1, &inputHeaders, &headerIncludeNames, nullptr, nullptr); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); delete p3; // fail compilation - CompilerInterface cannot be obtained auto device = pContext->getDevice(0); auto executionEnvironment = device->getExecutionEnvironment(); std::unique_ptr rootDeviceEnvironment = std::make_unique(*executionEnvironment); std::swap(rootDeviceEnvironment, executionEnvironment->rootDeviceEnvironments[device->getRootDeviceIndex()]); auto p2 = std::make_unique(*executionEnvironment); p2->setDevice(&device->getDevice()); retVal = p2->compile(0, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal); p2.reset(nullptr); std::swap(rootDeviceEnvironment, executionEnvironment->rootDeviceEnvironments[device->getRootDeviceIndex()]); // fail compilation - any compilation error (here caused by specifying unrecognized option) retVal = pProgram->compile(0, nullptr, "-invalid-option", 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_COMPILE_PROGRAM_FAILURE, retVal); // compile successfully without notifyFunc retVal = pProgram->compile(0, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // compile successfully with notifyFunc data[0] = 0; retVal = pProgram->compile(0, nullptr, nullptr, 0, nullptr, nullptr, notifyFunc, &data[0]); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ('a', data[0]); } struct MockCompilerInterfaceCaptureBuildOptions : CompilerInterface { TranslationOutput::ErrorCode compile(const NEO::Device &device, const TranslationInput &input, TranslationOutput &) override { buildOptions.clear(); if ((input.apiOptions.size() > 0) && (input.apiOptions.begin() != nullptr)) { buildOptions.assign(input.apiOptions.begin(), input.apiOptions.end()); } buildInternalOptions.clear(); if ((input.internalOptions.size() > 0) && (input.internalOptions.begin() != nullptr)) { buildInternalOptions.assign(input.internalOptions.begin(), input.internalOptions.end()); } return TranslationOutput::ErrorCode::Success; } TranslationOutput::ErrorCode build(const NEO::Device &device, const TranslationInput &input, TranslationOutput &out) override { return this->MockCompilerInterfaceCaptureBuildOptions::compile(device, input, out); } std::string buildOptions; std::string buildInternalOptions; }; TEST_P(ProgramFromSourceTest, GivenFlagsWhenCompilingProgramThenBuildOptionsHaveBeenApplied) { auto cip = new MockCompilerInterfaceCaptureBuildOptions(); auto pDevice = pContext->getDevice(0); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->compilerInterface.reset(cip); auto program = std::make_unique(*pDevice->getExecutionEnvironment()); program->setDevice(&pDevice->getDevice()); program->sourceCode = "__kernel mock() {}"; // Ask to build created program without NEO::CompilerOptions::gtpinRera and NEO::CompilerOptions::greaterThan4gbBuffersRequired flags. cl_int retVal = program->compile(0, nullptr, CompilerOptions::fastRelaxedMath, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // Check build options that were applied EXPECT_TRUE(CompilerOptions::contains(cip->buildOptions, CompilerOptions::fastRelaxedMath)) << cip->buildOptions; EXPECT_FALSE(CompilerOptions::contains(cip->buildInternalOptions, CompilerOptions::gtpinRera)) << cip->buildInternalOptions; EXPECT_FALSE(CompilerOptions::contains(cip->buildInternalOptions, CompilerOptions::greaterThan4gbBuffersRequired)) << cip->buildInternalOptions; EXPECT_TRUE(CompilerOptions::contains(cip->buildInternalOptions, pPlatform->getClDevice(0)->peekCompilerExtensions())) << cip->buildInternalOptions; // Ask to build created program with NEO::CompilerOptions::gtpinRera and NEO::CompilerOptions::greaterThan4gbBuffersRequired flags. cip->buildOptions.clear(); cip->buildInternalOptions.clear(); auto options = CompilerOptions::concatenate(CompilerOptions::greaterThan4gbBuffersRequired, CompilerOptions::gtpinRera, CompilerOptions::finiteMathOnly); retVal = program->compile(0, nullptr, options.c_str(), 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // Check build options that were applied EXPECT_FALSE(CompilerOptions::contains(cip->buildOptions, CompilerOptions::fastRelaxedMath)) << cip->buildOptions; EXPECT_TRUE(CompilerOptions::contains(cip->buildOptions, CompilerOptions::finiteMathOnly)) << cip->buildOptions; EXPECT_TRUE(CompilerOptions::contains(cip->buildInternalOptions, CompilerOptions::gtpinRera)) << cip->buildInternalOptions; EXPECT_TRUE(CompilerOptions::contains(cip->buildInternalOptions, CompilerOptions::greaterThan4gbBuffersRequired)) << cip->buildInternalOptions; EXPECT_TRUE(CompilerOptions::contains(cip->buildInternalOptions, pPlatform->getClDevice(0)->peekCompilerExtensions())) << cip->buildInternalOptions; } TEST_P(ProgramFromSourceTest, GivenAdvancedOptionsWhenCreatingProgramThenSuccessIsReturned) { std::string testFile; size_t sourceSize = 0; Program *p; testFile.append(clFiles); testFile.append("CopyBuffer_simd16.cl"); auto pSourceBuffer = loadDataFromFile(testFile.c_str(), sourceSize); const char *sources[1] = {pSourceBuffer.get()}; EXPECT_NE(nullptr, pSourceBuffer); //According to spec: If lengths is NULL, all strings in the strings argument are considered null-terminated. p = Program::create(pContext, 1, sources, nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, p); delete p; //According to spec: If an element in lengths is zero, its accompanying string is null-terminated. p = Program::create(pContext, 1, sources, &sourceSize, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, p); delete p; std::stringstream dataStream(pSourceBuffer.get()); std::string line; std::vector lines; while (std::getline(dataStream, line, '\n')) { char *ptr = new char[line.length() + 1](); strcpy_s(ptr, line.length() + 1, line.c_str()); lines.push_back(ptr); } // Work on array of strings p = Program::create(pContext, 1, &lines[0], nullptr, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, p); delete p; std::vector sizes; for (auto ptr : lines) sizes.push_back(strlen(ptr)); sizes[sizes.size() / 2] = 0; p = Program::create(pContext, (cl_uint)sizes.size(), &lines[0], &sizes[0], retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, p); delete p; for (auto ptr : lines) delete[] ptr; } TEST_P(ProgramFromSourceTest, GivenSpecificParamatersWhenLinkingProgramThenSuccessOrCorrectErrorCodeIsReturned) { cl_device_id usedDevice = pPlatform->getClDevice(0); CreateProgramWithSource( pContext, &usedDevice, SourceFileName); cl_device_id deviceList = {0}; char data[4]; cl_program program = pProgram; cl_program nullprogram = nullptr; cl_program invprogram = (cl_program)pContext; // Order of following microtests is important - do not change. // Add new microtests at end. // invalid link parameters: combinations of numDevices & deviceList retVal = pProgram->link(1, nullptr, nullptr, 1, &program, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = pProgram->link(0, &deviceList, nullptr, 1, &program, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); // invalid link parameters: combinations of numInputPrograms & inputPrograms retVal = pProgram->link(0, nullptr, nullptr, 0, &program, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = pProgram->link(0, nullptr, nullptr, 1, nullptr, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); // invalid link parameters: combinations of funcNotify & userData with valid numInputPrograms & inputPrograms retVal = pProgram->link(0, nullptr, nullptr, 1, &program, nullptr, &data[0]); EXPECT_EQ(CL_INVALID_VALUE, retVal); // invalid link parameters: invalid content of deviceList retVal = pProgram->link(1, &deviceList, nullptr, 1, &program, nullptr, nullptr); EXPECT_EQ(CL_INVALID_DEVICE, retVal); // fail linking - another linking is already in progress pProgram->SetBuildStatus(CL_BUILD_IN_PROGRESS); retVal = pProgram->link(0, nullptr, nullptr, 1, &program, nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, retVal); pProgram->SetBuildStatus(CL_BUILD_NONE); // invalid link parameters: invalid Program object==nullptr retVal = pProgram->link(0, nullptr, nullptr, 1, &nullprogram, nullptr, nullptr); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); // invalid link parameters: invalid Program object==non Program object retVal = pProgram->link(0, nullptr, nullptr, 1, &invprogram, nullptr, nullptr); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); // compile successfully a kernel to be linked later retVal = pProgram->compile(0, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // fail linking - code to be linked does not exist bool isSpirvTmp = pProgram->getIsSpirV(); char *pIrBin = pProgram->irBinary.get(); pProgram->irBinary.release(); size_t irBinSize = pProgram->irBinarySize; pProgram->SetIrBinary(nullptr, false); retVal = pProgram->link(0, nullptr, nullptr, 1, &program, nullptr, nullptr); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); pProgram->SetIrBinary(pIrBin, isSpirvTmp); // fail linking - size of code to be linked is == 0 pProgram->SetIrBinarySize(0, isSpirvTmp); retVal = pProgram->link(0, nullptr, nullptr, 1, &program, nullptr, nullptr); EXPECT_EQ(CL_INVALID_PROGRAM, retVal); pProgram->SetIrBinarySize(irBinSize, isSpirvTmp); // fail linking - any link error (here caused by specifying unrecognized option) retVal = pProgram->link(0, nullptr, "-invalid-option", 1, &program, nullptr, nullptr); EXPECT_EQ(CL_LINK_PROGRAM_FAILURE, retVal); // fail linking - linked code is corrupted and cannot be postprocessed auto device = static_cast(usedDevice); auto p2 = std::make_unique(*device->getExecutionEnvironment()); p2->setDevice(&device->getDevice()); retVal = p2->link(0, nullptr, nullptr, 1, &program, nullptr, nullptr); EXPECT_EQ(CL_INVALID_BINARY, retVal); p2.reset(nullptr); // link successfully without notifyFunc retVal = pProgram->link(0, nullptr, nullptr, 1, &program, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // link successfully with notifyFunc data[0] = 0; retVal = pProgram->link(0, nullptr, "", 1, &program, notifyFunc, &data[0]); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ('a', data[0]); } TEST_P(ProgramFromSourceTest, GivenInvalidOptionsWhenCreatingLibraryThenCorrectErrorIsReturned) { cl_program program = pProgram; // Order of following microtests is important - do not change. // Add new microtests at end. // compile successfully a kernel to be later used to create library retVal = pProgram->compile(0, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // create library successfully retVal = pProgram->link(0, nullptr, CompilerOptions::createLibrary, 1, &program, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); // fail library creation - any link error (here caused by specifying unrecognized option) retVal = pProgram->link(0, nullptr, CompilerOptions::concatenate(CompilerOptions::createLibrary, "-invalid-option").c_str(), 1, &program, nullptr, nullptr); EXPECT_EQ(CL_LINK_PROGRAM_FAILURE, retVal); auto device = pContext->getDevice(0); auto executionEnvironment = device->getExecutionEnvironment(); std::unique_ptr rootDeviceEnvironment = std::make_unique(*executionEnvironment); std::swap(rootDeviceEnvironment, executionEnvironment->rootDeviceEnvironments[device->getRootDeviceIndex()]); auto failingProgram = std::make_unique(*executionEnvironment); failingProgram->setDevice(&device->getDevice()); // fail library creation - CompilerInterface cannot be obtained retVal = failingProgram->link(0, nullptr, CompilerOptions::createLibrary, 1, &program, nullptr, nullptr); EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal); std::swap(rootDeviceEnvironment, executionEnvironment->rootDeviceEnvironments[device->getRootDeviceIndex()]); } class PatchTokenFromBinaryTest : public ProgramSimpleFixture { public: void SetUp() override { ProgramSimpleFixture::SetUp(); } void TearDown() override { ProgramSimpleFixture::TearDown(); } }; using PatchTokenTests = Test; template class CommandStreamReceiverMock : public UltCommandStreamReceiver { using BaseClass = UltCommandStreamReceiver; using BaseClass::BaseClass; public: void makeResident(GraphicsAllocation &graphicsAllocation) override { residency[graphicsAllocation.getUnderlyingBuffer()] = graphicsAllocation.getUnderlyingBufferSize(); CommandStreamReceiver::makeResident(graphicsAllocation); } void makeNonResident(GraphicsAllocation &graphicsAllocation) override { residency.erase(graphicsAllocation.getUnderlyingBuffer()); CommandStreamReceiver::makeNonResident(graphicsAllocation); } std::map residency; }; HWTEST_F(PatchTokenTests, givenKernelRequiringConstantAllocationWhenMakeResidentIsCalledThenConstantAllocationIsMadeResident) { cl_device_id device = pClDevice; CreateProgramFromBinary(pContext, &device, "test_constant_memory"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( 1, &device, nullptr, nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); auto pKernelInfo = pProgram->getKernelInfo("test"); EXPECT_NE(nullptr, pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization); ASSERT_NE(nullptr, pProgram->getConstantSurface()); uint32_t expected_values[] = {0xabcd5432u, 0xaabb5533u}; uint32_t *constBuff = reinterpret_cast(pProgram->getConstantSurface()->getUnderlyingBuffer()); EXPECT_EQ(expected_values[0], constBuff[0]); EXPECT_EQ(expected_values[1], constBuff[1]); std::unique_ptr pKernel(Kernel::create(pProgram, *pKernelInfo, &retVal)); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pKernel); auto pCommandStreamReceiver = new CommandStreamReceiverMock(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()); ASSERT_NE(nullptr, pCommandStreamReceiver); pDevice->resetCommandStreamReceiver(pCommandStreamReceiver); pCommandStreamReceiver->residency.clear(); pKernel->makeResident(*pCommandStreamReceiver); EXPECT_EQ(2u, pCommandStreamReceiver->residency.size()); auto &residencyVector = pCommandStreamReceiver->getResidencyAllocations(); //we expect kernel ISA here and constant allocation auto kernelIsa = pKernel->getKernelInfo().getGraphicsAllocation(); auto constantAllocation = pProgram->getConstantSurface(); auto element = std::find(residencyVector.begin(), residencyVector.end(), kernelIsa); EXPECT_NE(residencyVector.end(), element); element = std::find(residencyVector.begin(), residencyVector.end(), constantAllocation); EXPECT_NE(residencyVector.end(), element); auto crossThreadData = pKernel->getCrossThreadData(); uint32_t *constBuffGpuAddr = reinterpret_cast(pProgram->getConstantSurface()->getGpuAddressToPatch()); uintptr_t *pDst = reinterpret_cast(crossThreadData + pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->DataParamOffset); EXPECT_EQ(*pDst, reinterpret_cast(constBuffGpuAddr)); pCommandStreamReceiver->makeSurfacePackNonResident(pCommandStreamReceiver->getResidencyAllocations()); EXPECT_EQ(0u, pCommandStreamReceiver->residency.size()); std::vector surfaces; pKernel->getResidency(surfaces); EXPECT_EQ(2u, surfaces.size()); for (Surface *surface : surfaces) { delete surface; } } TEST_F(PatchTokenTests, WhenBuildingProgramThenGwsIsSet) { cl_device_id device = pClDevice; CreateProgramFromBinary(pContext, &device, "kernel_data_param"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( 1, &device, nullptr, nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); auto pKernelInfo = pProgram->getKernelInfo("test"); ASSERT_NE(nullptr, pKernelInfo->patchInfo.dataParameterStream); ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.globalWorkSizeOffsets[0]); ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.globalWorkSizeOffsets[1]); ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.globalWorkSizeOffsets[2]); } TEST_F(PatchTokenTests, WhenBuildingProgramThenLwsIsSet) { cl_device_id device = pClDevice; CreateProgramFromBinary(pContext, &device, "kernel_data_param"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( 1, &device, nullptr, nullptr, nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); auto pKernelInfo = pProgram->getKernelInfo("test"); ASSERT_NE(nullptr, pKernelInfo->patchInfo.dataParameterStream); ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets[0]); ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets[1]); ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets[2]); pKernelInfo = pProgram->getKernelInfo("test_get_local_size"); ASSERT_NE(nullptr, pKernelInfo->patchInfo.dataParameterStream); ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets[0]); ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets[1]); ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets[2]); ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets2[0]); ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets2[1]); ASSERT_NE(static_cast(-1), pKernelInfo->workloadInfo.localWorkSizeOffsets2[2]); } TEST_F(PatchTokenTests, WhenBuildingProgramThenConstantKernelArgsAreAvailable) { // PATCH_TOKEN_STATELESS_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT cl_device_id device = pClDevice; CreateProgramFromBinary(pContext, &device, "test_basic_constant"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( 1, &device, nullptr, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto pKernelInfo = pProgram->getKernelInfo("constant_kernel"); ASSERT_NE(nullptr, pKernelInfo); auto pKernel = Kernel::create( pProgram, *pKernelInfo, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pKernel); uint32_t numArgs; retVal = pKernel->getInfo(CL_KERNEL_NUM_ARGS, sizeof(numArgs), &numArgs, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(3u, numArgs); uint32_t sizeOfPtr = sizeof(void *); EXPECT_EQ(pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size, sizeOfPtr); EXPECT_EQ(pKernelInfo->kernelArgInfo[1].kernelArgPatchInfoVector[0].size, sizeOfPtr); delete pKernel; } TEST_F(PatchTokenTests, GivenVmeKernelWhenBuildingKernelThenArgAvailable) { if (!pDevice->getHardwareInfo().capabilityTable.supportsVme) { GTEST_SKIP(); } // PATCH_TOKEN_INLINE_VME_SAMPLER_INFO token indicates a VME kernel. cl_device_id device = pClDevice; CreateProgramFromBinary(pContext, &device, "vme_kernels"); ASSERT_NE(nullptr, pProgram); retVal = pProgram->build( 1, &device, nullptr, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto pKernelInfo = pProgram->getKernelInfo("device_side_block_motion_estimate_intel"); ASSERT_NE(nullptr, pKernelInfo); EXPECT_EQ(true, pKernelInfo->isVmeWorkload); auto pKernel = Kernel::create( pProgram, *pKernelInfo, &retVal); ASSERT_NE(nullptr, pKernel); delete pKernel; } class ProgramPatchTokenFromBinaryTest : public ProgramSimpleFixture { public: void SetUp() override { ProgramSimpleFixture::SetUp(); } void TearDown() override { ProgramSimpleFixture::TearDown(); } }; typedef Test ProgramPatchTokenTests; TEST(ProgramFromBinaryTests, givenBinaryWithInvalidICBEThenErrorIsReturned) { cl_int retVal = CL_INVALID_BINARY; SProgramBinaryHeader binHeader; memset(&binHeader, 0, sizeof(binHeader)); binHeader.Magic = iOpenCL::MAGIC_CL; binHeader.Version = iOpenCL::CURRENT_ICBE_VERSION - 3; binHeader.Device = defaultHwInfo->platform.eRenderCoreFamily; binHeader.GPUPointerSizeInBytes = 8; binHeader.NumberOfKernels = 0; binHeader.SteppingId = 0; binHeader.PatchListSize = 0; size_t binSize = sizeof(SProgramBinaryHeader); { const unsigned char *binaries[1] = {reinterpret_cast(&binHeader)}; const cl_device_id deviceId = 0; MockContext context; std::unique_ptr pProgram(Program::create(&context, 0, &deviceId, &binSize, binaries, nullptr, retVal)); EXPECT_EQ(nullptr, pProgram.get()); EXPECT_EQ(CL_INVALID_BINARY, retVal); } { // whatever method we choose CL_INVALID_BINARY is always returned ExecutionEnvironment executionEnvironment; std::unique_ptr pProgram(Program::createFromGenBinary(executionEnvironment, nullptr, &binHeader, binSize, false, &retVal, nullptr)); ASSERT_NE(nullptr, pProgram.get()); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pProgram->processGenBinary(); EXPECT_EQ(CL_INVALID_BINARY, retVal); } } TEST(ProgramFromBinaryTests, givenEmptyProgramThenErrorIsReturned) { cl_int retVal = CL_INVALID_BINARY; SProgramBinaryHeader binHeader; memset(&binHeader, 0, sizeof(binHeader)); binHeader.Magic = iOpenCL::MAGIC_CL; binHeader.Version = iOpenCL::CURRENT_ICBE_VERSION; binHeader.Device = defaultHwInfo->platform.eRenderCoreFamily; binHeader.GPUPointerSizeInBytes = 8; binHeader.NumberOfKernels = 0; binHeader.SteppingId = 0; binHeader.PatchListSize = 0; size_t binSize = sizeof(SProgramBinaryHeader); ExecutionEnvironment executionEnvironment; std::unique_ptr pProgram(MockProgram::createFromGenBinary(executionEnvironment, nullptr, &binHeader, binSize, false, &retVal, nullptr)); ASSERT_NE(nullptr, pProgram.get()); EXPECT_EQ(CL_SUCCESS, retVal); pProgram->unpackedDeviceBinary.reset(nullptr); retVal = pProgram->processGenBinary(); EXPECT_EQ(CL_INVALID_BINARY, retVal); } INSTANTIATE_TEST_CASE_P(ProgramFromBinaryTests, ProgramFromBinaryTest, ::testing::Combine( ::testing::ValuesIn(BinaryFileNames), ::testing::ValuesIn(KernelNames))); INSTANTIATE_TEST_CASE_P(ProgramFromSourceTests, ProgramFromSourceTest, ::testing::Combine( ::testing::ValuesIn(SourceFileNames), ::testing::ValuesIn(BinaryForSourceFileNames), ::testing::ValuesIn(KernelNames))); TEST_F(ProgramTests, WhenProgramIsCreatedThenCorrectOclVersionIsInOptions) { auto defaultSetting = DebugManager.flags.DisableStatelessToStatefulOptimization.get(); DebugManager.flags.DisableStatelessToStatefulOptimization.set(false); MockProgram program(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); char paramValue[32]; pClDevice->getDeviceInfo(CL_DEVICE_VERSION, 32, paramValue, 0); if (strstr(paramValue, "2.1")) { EXPECT_TRUE(CompilerOptions::contains(program.getInternalOptions(), "-ocl-version=210")) << program.getInternalOptions(); } else if (strstr(paramValue, "2.0")) { EXPECT_TRUE(CompilerOptions::contains(program.getInternalOptions(), "-ocl-version=200")) << program.getInternalOptions(); } else if (strstr(paramValue, "1.2")) { EXPECT_TRUE(CompilerOptions::contains(program.getInternalOptions(), "-ocl-version=120")) << program.getInternalOptions(); } else { EXPECT_TRUE(CompilerOptions::contains(program.getInternalOptions(), "-ocl-version=000")) << program.getInternalOptions(); } DebugManager.flags.DisableStatelessToStatefulOptimization.set(defaultSetting); } TEST_F(ProgramTests, GivenForced20WhenProgramIsCreatedThenOcl20IsInOptions) { auto defaultVersion = pClDevice->deviceInfo.clVersion; pClDevice->deviceInfo.clVersion = "OpenCL 2.0"; MockProgram program(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); char paramValue[32]; pClDevice->getDeviceInfo(CL_DEVICE_VERSION, 32, paramValue, 0); EXPECT_STREQ("OpenCL 2.0", paramValue); EXPECT_TRUE(CompilerOptions::contains(program.getInternalOptions(), "-ocl-version=200")); pClDevice->deviceInfo.clVersion = defaultVersion; } TEST_F(ProgramTests, GivenStatelessToStatefulIsDisabledWhenProgramIsCreatedThenGreaterThan4gbBuffersRequiredOptionIsSet) { DebugManagerStateRestore restorer; DebugManager.flags.DisableStatelessToStatefulOptimization.set(true); MockProgram program(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); char paramValue[32]; pClDevice->getDeviceInfo(CL_DEVICE_VERSION, 32, paramValue, 0); if (strstr(paramValue, "2.1")) { EXPECT_TRUE(CompilerOptions::contains(program.getInternalOptions(), "-ocl-version=210")) << program.getInternalOptions(); EXPECT_TRUE(CompilerOptions::contains(program.getInternalOptions(), NEO::CompilerOptions::greaterThan4gbBuffersRequired)); } else if (strstr(paramValue, "2.0")) { EXPECT_TRUE(CompilerOptions::contains(program.getInternalOptions(), "-ocl-version=200")) << program.getInternalOptions(); EXPECT_TRUE(CompilerOptions::contains(program.getInternalOptions(), NEO::CompilerOptions::greaterThan4gbBuffersRequired)); } else if (strstr(paramValue, "1.2")) { EXPECT_TRUE(CompilerOptions::contains(program.getInternalOptions(), "-ocl-version=120")) << program.getInternalOptions(); EXPECT_TRUE(CompilerOptions::contains(program.getInternalOptions(), NEO::CompilerOptions::greaterThan4gbBuffersRequired)); } else { EXPECT_TRUE(CompilerOptions::contains(program.getInternalOptions(), "-ocl-version=000")) << program.getInternalOptions(); EXPECT_TRUE(CompilerOptions::contains(program.getInternalOptions(), NEO::CompilerOptions::greaterThan4gbBuffersRequired)); } } TEST_F(ProgramTests, WhenCreatingProgramThenBindlessIsEnabledOnlyIfDebugFlagIsEnabled) { using namespace testing; DebugManagerStateRestore restorer; { EXPECT_FALSE(DebugManager.flags.UseBindlessBuffers.get()); EXPECT_FALSE(DebugManager.flags.UseBindlessImages.get()); MockProgram programNoBindless(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); EXPECT_FALSE(CompilerOptions::contains(programNoBindless.getInternalOptions(), CompilerOptions::bindlessBuffers)) << programNoBindless.getInternalOptions(); EXPECT_FALSE(CompilerOptions::contains(programNoBindless.getInternalOptions(), CompilerOptions::bindlessImages)) << programNoBindless.getInternalOptions(); } { DebugManager.flags.UseBindlessBuffers.set(true); MockProgram programNoBindless(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); EXPECT_TRUE(CompilerOptions::contains(programNoBindless.getInternalOptions(), CompilerOptions::bindlessBuffers)) << programNoBindless.getInternalOptions(); EXPECT_FALSE(CompilerOptions::contains(programNoBindless.getInternalOptions(), CompilerOptions::bindlessImages)) << programNoBindless.getInternalOptions(); } { DebugManager.flags.UseBindlessBuffers.set(false); DebugManager.flags.UseBindlessImages.set(true); MockProgram programNoBindless(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); EXPECT_FALSE(CompilerOptions::contains(programNoBindless.getInternalOptions(), CompilerOptions::bindlessBuffers)) << programNoBindless.getInternalOptions(); EXPECT_TRUE(CompilerOptions::contains(programNoBindless.getInternalOptions(), CompilerOptions::bindlessImages)) << programNoBindless.getInternalOptions(); } { DebugManager.flags.UseBindlessBuffers.set(true); DebugManager.flags.UseBindlessImages.set(true); MockProgram programNoBindless(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); EXPECT_TRUE(CompilerOptions::contains(programNoBindless.getInternalOptions(), CompilerOptions::bindlessBuffers)) << programNoBindless.getInternalOptions(); EXPECT_TRUE(CompilerOptions::contains(programNoBindless.getInternalOptions(), CompilerOptions::bindlessImages)) << programNoBindless.getInternalOptions(); } } TEST_F(ProgramTests, givenDeviceThatSupportsSharedSystemMemoryAllocationWhenProgramIsCompiledThenItForcesStatelessCompilation) { pClDevice->deviceInfo.sharedSystemMemCapabilities = CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL; pClDevice->sharedDeviceInfo.sharedSystemAllocationsSupport = true; MockProgram program(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); EXPECT_TRUE(CompilerOptions::contains(program.getInternalOptions().c_str(), CompilerOptions::greaterThan4gbBuffersRequired)) << program.getInternalOptions(); } TEST_F(ProgramTests, GivenForce32BitAddressessWhenProgramIsCreatedThenGreaterThan4gbBuffersRequiredIsCorrectlySet) { cl_int retVal = CL_DEVICE_NOT_FOUND; auto defaultSetting = DebugManager.flags.DisableStatelessToStatefulOptimization.get(); DebugManager.flags.DisableStatelessToStatefulOptimization.set(false); if (pDevice) { const_cast(&pDevice->getDeviceInfo())->force32BitAddressess = true; MockProgram program(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); char paramValue[32]; pClDevice->getDeviceInfo(CL_DEVICE_VERSION, 32, paramValue, 0); if (strstr(paramValue, "2.1")) { EXPECT_TRUE(CompilerOptions::contains(program.getInternalOptions(), "-ocl-version=210")) << program.getInternalOptions(); } else if (strstr(paramValue, "2.0")) { EXPECT_TRUE(CompilerOptions::contains(program.getInternalOptions(), "-ocl-version=200")) << program.getInternalOptions(); } else if (strstr(paramValue, "1.2")) { EXPECT_TRUE(CompilerOptions::contains(program.getInternalOptions(), "-ocl-version=120")) << program.getInternalOptions(); } else { EXPECT_TRUE(CompilerOptions::contains(program.getInternalOptions(), "-ocl-version=000")) << program.getInternalOptions(); } if (pDevice->areSharedSystemAllocationsAllowed()) { EXPECT_TRUE(CompilerOptions::contains(program.getInternalOptions(), CompilerOptions::greaterThan4gbBuffersRequired)) << program.getInternalOptions(); } else { EXPECT_FALSE(CompilerOptions::contains(program.getInternalOptions(), NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << program.getInternalOptions(); } } else { EXPECT_NE(CL_DEVICE_NOT_FOUND, retVal); } DebugManager.flags.DisableStatelessToStatefulOptimization.set(defaultSetting); } TEST_F(ProgramTests, Given32bitSupportWhenProgramIsCreatedThenGreaterThan4gbBuffersRequiredIsCorrectlySet) { auto defaultSetting = DebugManager.flags.DisableStatelessToStatefulOptimization.get(); DebugManager.flags.DisableStatelessToStatefulOptimization.set(false); std::unique_ptr program{Program::create("", pContext, *pClDevice, true, nullptr)}; if ((false == pDevice->areSharedSystemAllocationsAllowed()) && (false == is32bit)) { EXPECT_FALSE(CompilerOptions::contains(program->getInternalOptions(), NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << program->getInternalOptions(); } else { EXPECT_TRUE(CompilerOptions::contains(program->getInternalOptions(), NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << program->getInternalOptions(); } DebugManager.flags.DisableStatelessToStatefulOptimization.set(defaultSetting); } TEST_F(ProgramTests, GivenStatelessToStatefulIsDisabledWhenProgramIsCreatedThenGreaterThan4gbBuffersRequiredIsCorrectlySet) { auto defaultSetting = DebugManager.flags.DisableStatelessToStatefulOptimization.get(); DebugManager.flags.DisableStatelessToStatefulOptimization.set(true); std::unique_ptr program{Program::create("", pContext, *pClDevice, true, nullptr)}; EXPECT_TRUE(CompilerOptions::contains(program->getInternalOptions(), NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << program->getInternalOptions(); DebugManager.flags.DisableStatelessToStatefulOptimization.set(defaultSetting); } TEST_F(ProgramTests, givenProgramWhenItIsCompiledThenItAlwaysHavePreserveVec3TypeInternalOptionSet) { std::unique_ptr program(Program::create("", pContext, *pClDevice, true, nullptr)); EXPECT_TRUE(CompilerOptions::contains(program->getInternalOptions(), CompilerOptions::preserveVec3Type)) << program->getInternalOptions(); } TEST_F(ProgramTests, Force32BitAddressessWhenProgramIsCreatedThenGreaterThan4gbBuffersRequiredIsCorrectlySet) { auto defaultSetting = DebugManager.flags.DisableStatelessToStatefulOptimization.get(); DebugManager.flags.DisableStatelessToStatefulOptimization.set(false); const_cast(&pDevice->getDeviceInfo())->force32BitAddressess = true; std::unique_ptr program{Program::create("", pContext, *pClDevice, true, nullptr)}; if (is32bit) { EXPECT_TRUE(CompilerOptions::contains(program->getInternalOptions(), CompilerOptions::greaterThan4gbBuffersRequired)) << program->getInternalOptions(); } else { if (false == pDevice->areSharedSystemAllocationsAllowed()) { EXPECT_FALSE(CompilerOptions::contains(program->getInternalOptions(), NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << program->getInternalOptions(); } else { EXPECT_TRUE(CompilerOptions::contains(program->getInternalOptions(), NEO::CompilerOptions::greaterThan4gbBuffersRequired)) << program->getInternalOptions(); } } DebugManager.flags.DisableStatelessToStatefulOptimization.set(defaultSetting); } TEST_F(ProgramTests, GivenStatelessToStatefulBufferOffsetOptimizationWhenProgramIsCreatedThenBufferOffsetArgIsSet) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(1); cl_int errorCode = CL_SUCCESS; const char programSource[] = "program"; const char *programPointer = programSource; const char **programSources = reinterpret_cast(&programPointer); size_t length = sizeof(programSource); std::unique_ptr program(Program::create(pContext, 1u, programSources, &length, errorCode)); EXPECT_TRUE(CompilerOptions::contains(program->getInternalOptions(), CompilerOptions::hasBufferOffsetArg)) << program->getInternalOptions(); } TEST_F(ProgramTests, givenStatelessToStatefullOptimizationOffWHenProgramIsCreatedThenOptimizationStringIsNotPresent) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.set(0); cl_int errorCode = CL_SUCCESS; const char programSource[] = "program"; const char *programPointer = programSource; const char **programSources = reinterpret_cast(&programPointer); size_t length = sizeof(programSource); std::unique_ptr program(Program::create(pContext, 1u, programSources, &length, errorCode)); EXPECT_FALSE(CompilerOptions::contains(program->getInternalOptions(), CompilerOptions::hasBufferOffsetArg)) << program->getInternalOptions(); } TEST_F(ProgramTests, GivenContextWhenCreateProgramThenIncrementContextRefCount) { auto initialApiRefCount = pContext->getReference(); auto initialInternalRefCount = pContext->getRefInternalCount(); MockProgram *program = new MockProgram(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); EXPECT_EQ(pContext->getReference(), initialApiRefCount); EXPECT_EQ(pContext->getRefInternalCount(), initialInternalRefCount + 1); program->release(); EXPECT_EQ(pContext->getReference(), initialApiRefCount); EXPECT_EQ(pContext->getRefInternalCount(), initialInternalRefCount); } TEST_F(ProgramTests, GivenContextWhenCreateProgramFromSourceThenIncrementContextRefCount) { auto initialApiRefCount = pContext->getReference(); auto initialInternalRefCount = pContext->getRefInternalCount(); auto tempProgram = Program::create("", nullptr, *pClDevice, false, nullptr); EXPECT_FALSE(tempProgram->getIsBuiltIn()); auto program = Program::create("", pContext, *pClDevice, false, nullptr); EXPECT_FALSE(program->getIsBuiltIn()); EXPECT_EQ(pContext->getReference(), initialApiRefCount); EXPECT_EQ(pContext->getRefInternalCount(), initialInternalRefCount + 1); program->release(); EXPECT_EQ(pContext->getReference(), initialApiRefCount); EXPECT_EQ(pContext->getRefInternalCount(), initialInternalRefCount); tempProgram->release(); EXPECT_EQ(pContext->getReference(), initialApiRefCount); EXPECT_EQ(pContext->getRefInternalCount(), initialInternalRefCount); } TEST_F(ProgramTests, GivenContextWhenCreateBuiltInProgramFromSourceThenDontIncrementContextRefCount) { auto initialApiRefCount = pContext->getReference(); auto initialInternalRefCount = pContext->getRefInternalCount(); auto tempProgram = Program::create("", nullptr, *pClDevice, true, nullptr); EXPECT_TRUE(tempProgram->getIsBuiltIn()); auto program = Program::create("", pContext, *pClDevice, true, nullptr); EXPECT_TRUE(program->getIsBuiltIn()); EXPECT_EQ(pContext->getReference(), initialApiRefCount); EXPECT_EQ(pContext->getRefInternalCount(), initialInternalRefCount); program->release(); EXPECT_EQ(pContext->getReference(), initialApiRefCount); EXPECT_EQ(pContext->getRefInternalCount(), initialInternalRefCount); tempProgram->release(); EXPECT_EQ(pContext->getReference(), initialApiRefCount); EXPECT_EQ(pContext->getRefInternalCount(), initialInternalRefCount); } TEST_F(ProgramTests, WhenBuildingProgramThenPointerToProgramIsReturned) { cl_int retVal = CL_DEVICE_NOT_FOUND; Program *pProgram = Program::create("", pContext, *pClDevice, false, &retVal); EXPECT_NE(nullptr, pProgram); EXPECT_EQ(CL_SUCCESS, retVal); delete pProgram; pProgram = Program::create("", pContext, *pClDevice, false, nullptr); EXPECT_NE(nullptr, pProgram); delete pProgram; } TEST_F(ProgramTests, GivenNullBinaryWhenCreatingProgramFromGenBinaryThenInvalidValueErrorIsReturned) { cl_int retVal = CL_SUCCESS; Program *pProgram = Program::createFromGenBinary(*pDevice->getExecutionEnvironment(), pContext, nullptr, 0, false, &retVal, pDevice); EXPECT_EQ(nullptr, pProgram); EXPECT_NE(CL_SUCCESS, retVal); } TEST_F(ProgramTests, WhenCreatingProgramFromGenBinaryThenSuccessIsReturned) { cl_int retVal = CL_INVALID_BINARY; char binary[10] = {1, 2, 3, 4, 5, 6, 7, 8, 9, '\0'}; size_t size = 10; Program *pProgram = Program::createFromGenBinary(*pDevice->getExecutionEnvironment(), pContext, binary, size, false, &retVal, pDevice); EXPECT_NE(nullptr, pProgram); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ((uint32_t)CL_PROGRAM_BINARY_TYPE_EXECUTABLE, (uint32_t)pProgram->getProgramBinaryType()); EXPECT_FALSE(pProgram->getIsBuiltIn()); cl_device_id deviceId = pContext->getDevice(0); cl_build_status status = 0; pProgram->getBuildInfo(deviceId, CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, nullptr); EXPECT_EQ(CL_BUILD_SUCCESS, status); delete pProgram; } TEST_F(ProgramTests, GivenBuiltInFlagSetWhenCreatingProgramFromGenBinaryThenBuiltInIsCreated) { cl_int retVal = CL_INVALID_BINARY; char binary[10] = {1, 2, 3, 4, 5, 6, 7, 8, 9, '\0'}; size_t size = 10; Program *pProgram = Program::createFromGenBinary(*pDevice->getExecutionEnvironment(), pContext, binary, size, true, &retVal, pDevice); EXPECT_NE(nullptr, pProgram); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(pProgram->getIsBuiltIn()); delete pProgram; } TEST_F(ProgramTests, GivenRetValNullPointerWhenCreatingProgramFromGenBinaryThenSuccessIsReturned) { char binary[10] = {1, 2, 3, 4, 5, 6, 7, 8, 9, '\0'}; size_t size = 10; Program *pProgram = Program::createFromGenBinary(*pDevice->getExecutionEnvironment(), pContext, binary, size, false, nullptr, pDevice); EXPECT_NE(nullptr, pProgram); EXPECT_EQ((uint32_t)CL_PROGRAM_BINARY_TYPE_EXECUTABLE, (uint32_t)pProgram->getProgramBinaryType()); cl_device_id deviceId = pContext->getDevice(0); cl_build_status status = 0; pProgram->getBuildInfo(deviceId, CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, nullptr); EXPECT_EQ(CL_BUILD_SUCCESS, status); delete pProgram; } TEST_F(ProgramTests, GivenNullContextWhenCreatingProgramFromGenBinaryThenSuccessIsReturned) { cl_int retVal = CL_INVALID_BINARY; char binary[10] = {1, 2, 3, 4, 5, 6, 7, 8, 9, '\0'}; size_t size = 10; Program *pProgram = Program::createFromGenBinary(*pDevice->getExecutionEnvironment(), nullptr, binary, size, false, &retVal, pDevice); EXPECT_NE(nullptr, pProgram); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ((uint32_t)CL_PROGRAM_BINARY_TYPE_EXECUTABLE, (uint32_t)pProgram->getProgramBinaryType()); cl_device_id deviceId = nullptr; cl_build_status status = 0; pProgram->getBuildInfo(deviceId, CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, nullptr); EXPECT_EQ(CL_BUILD_SUCCESS, status); delete pProgram; } TEST_F(ProgramTests, givenProgramFromGenBinaryWhenSLMSizeIsBiggerThenDeviceLimitThenReturnError) { PatchTokensTestData::ValidProgramWithKernelUsingSlm patchtokensProgram; patchtokensProgram.slmMutable->TotalInlineLocalMemorySize = static_cast(pDevice->getDeviceInfo().localMemSize * 2); patchtokensProgram.recalcTokPtr(); auto program = std::make_unique(*pDevice->getExecutionEnvironment(), nullptr, false, pDevice); program->unpackedDeviceBinary = makeCopy(patchtokensProgram.storage.data(), patchtokensProgram.storage.size()); program->unpackedDeviceBinarySize = patchtokensProgram.storage.size(); auto retVal = program->processGenBinary(); EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); } TEST_F(ProgramTests, GivenNoCompilerInterfaceRootDeviceEnvironmentWhenRebuildingBinaryThenOutOfHostMemoryErrorIsReturned) { auto pDevice = pContext->getDevice(0); auto executionEnvironment = pDevice->getExecutionEnvironment(); std::unique_ptr rootDeviceEnvironment = std::make_unique(*executionEnvironment); rootDeviceEnvironment->setHwInfo(&pDevice->getHardwareInfo()); std::swap(rootDeviceEnvironment, executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]); auto program = std::make_unique(*executionEnvironment); EXPECT_NE(nullptr, program); program->setDevice(&pDevice->getDevice()); // Load a binary program file std::string filePath; retrieveBinaryKernelFilename(filePath, "CopyBuffer_simd16_", ".bin"); size_t binarySize = 0; auto pBinary = loadDataFromFile(filePath.c_str(), binarySize); EXPECT_NE(0u, binarySize); // Create program from loaded binary cl_int retVal = program->createProgramFromBinary(pBinary.get(), binarySize); EXPECT_EQ(CL_SUCCESS, retVal); // Ask to rebuild program from its IR binary - it should fail (no Compiler Interface) retVal = program->rebuildProgramFromIr(); EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal); std::swap(rootDeviceEnvironment, executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]); } TEST_F(ProgramTests, GivenGtpinReraFlagWhenBuildingProgramThenCorrectOptionsAreSet) { auto cip = new MockCompilerInterfaceCaptureBuildOptions(); auto pDevice = pContext->getDevice(0); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->compilerInterface.reset(cip); auto program = std::make_unique(*pDevice->getExecutionEnvironment()); program->setDevice(&pDevice->getDevice()); program->sourceCode = "__kernel mock() {}"; program->createdFrom = Program::CreatedFrom::SOURCE; // Ask to build created program without NEO::CompilerOptions::gtpinRera flag. cl_int retVal = program->build(0, nullptr, CompilerOptions::fastRelaxedMath, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); // Check build options that were applied EXPECT_TRUE(CompilerOptions::contains(cip->buildOptions, CompilerOptions::fastRelaxedMath)) << cip->buildOptions; EXPECT_FALSE(CompilerOptions::contains(cip->buildOptions, CompilerOptions::gtpinRera)) << cip->buildInternalOptions; // Ask to build created program with NEO::CompilerOptions::gtpinRera flag. cip->buildOptions.clear(); cip->buildInternalOptions.clear(); retVal = program->build(0, nullptr, CompilerOptions::concatenate(CompilerOptions::gtpinRera, CompilerOptions::finiteMathOnly).c_str(), nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); // Check build options that were applied EXPECT_FALSE(CompilerOptions::contains(cip->buildOptions, CompilerOptions::fastRelaxedMath)) << cip->buildOptions; EXPECT_TRUE(CompilerOptions::contains(cip->buildOptions, CompilerOptions::finiteMathOnly)) << cip->buildOptions; EXPECT_TRUE(CompilerOptions::contains(cip->buildInternalOptions, CompilerOptions::gtpinRera)) << cip->buildInternalOptions; } TEST_F(ProgramTests, GivenFailingGenBinaryProgramWhenRebuildingBinaryThenInvalidBinaryErrorIsReturned) { cl_int retVal; auto program = std::make_unique(*pDevice->getExecutionEnvironment()); EXPECT_NE(nullptr, program); cl_device_id deviceId = pContext->getDevice(0); ClDevice *pDevice = castToObject(deviceId); program->setDevice(&pDevice->getDevice()); // Load a binary program file std::string filePath; retrieveBinaryKernelFilename(filePath, "CopyBuffer_simd16_", ".bin"); size_t binarySize = 0; auto pBinary = loadDataFromFile(filePath.c_str(), binarySize); EXPECT_NE(0u, binarySize); // Create program from loaded binary retVal = program->createProgramFromBinary(pBinary.get(), binarySize); EXPECT_EQ(CL_SUCCESS, retVal); // Ask to rebuild program from its IR binary - it should fail (simulated invalid binary) retVal = program->rebuildProgramFromIr(); EXPECT_EQ(CL_INVALID_BINARY, retVal); } TEST_F(ProgramTests, GivenZeroPrivateSizeInBlockWhenAllocateBlockProvateSurfacesCalledThenNoSurfaceIsCreated) { MockProgram *program = new MockProgram(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); uint32_t crossThreadOffsetBlock = 0; KernelInfo *infoBlock = new KernelInfo; SPatchAllocateStatelessPrivateSurface *privateSurfaceBlock = new SPatchAllocateStatelessPrivateSurface; privateSurfaceBlock->DataParamOffset = crossThreadOffsetBlock; privateSurfaceBlock->DataParamSize = 8; privateSurfaceBlock->Size = 8; privateSurfaceBlock->SurfaceStateHeapOffset = 0; privateSurfaceBlock->Token = 0; privateSurfaceBlock->PerThreadPrivateMemorySize = 0; infoBlock->patchInfo.pAllocateStatelessPrivateSurface = privateSurfaceBlock; program->blockKernelManager->addBlockKernelInfo(infoBlock); program->allocateBlockPrivateSurfaces(pDevice->getRootDeviceIndex()); EXPECT_EQ(nullptr, program->getBlockKernelManager()->getPrivateSurface(0)); delete privateSurfaceBlock; delete program; } TEST_F(ProgramTests, GivenNonZeroPrivateSizeInBlockWhenAllocateBlockProvateSurfacesCalledThenSurfaceIsCreated) { MockProgram *program = new MockProgram(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); uint32_t crossThreadOffsetBlock = 0; KernelInfo *infoBlock = new KernelInfo; SPatchAllocateStatelessPrivateSurface *privateSurfaceBlock = new SPatchAllocateStatelessPrivateSurface; privateSurfaceBlock->DataParamOffset = crossThreadOffsetBlock; privateSurfaceBlock->DataParamSize = 8; privateSurfaceBlock->Size = 8; privateSurfaceBlock->SurfaceStateHeapOffset = 0; privateSurfaceBlock->Token = 0; privateSurfaceBlock->PerThreadPrivateMemorySize = 1000; infoBlock->patchInfo.pAllocateStatelessPrivateSurface = privateSurfaceBlock; program->blockKernelManager->addBlockKernelInfo(infoBlock); program->allocateBlockPrivateSurfaces(pDevice->getRootDeviceIndex()); EXPECT_NE(nullptr, program->getBlockKernelManager()->getPrivateSurface(0)); delete privateSurfaceBlock; delete program; } TEST_F(ProgramTests, GivenNonZeroPrivateSizeInBlockWhenAllocateBlockProvateSurfacesCalledThenSecondSurfaceIsNotCreated) { MockProgram *program = new MockProgram(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); uint32_t crossThreadOffsetBlock = 0; KernelInfo *infoBlock = new KernelInfo; SPatchAllocateStatelessPrivateSurface *privateSurfaceBlock = new SPatchAllocateStatelessPrivateSurface; privateSurfaceBlock->DataParamOffset = crossThreadOffsetBlock; privateSurfaceBlock->DataParamSize = 8; privateSurfaceBlock->Size = 8; privateSurfaceBlock->SurfaceStateHeapOffset = 0; privateSurfaceBlock->Token = 0; privateSurfaceBlock->PerThreadPrivateMemorySize = 1000; infoBlock->patchInfo.pAllocateStatelessPrivateSurface = privateSurfaceBlock; program->blockKernelManager->addBlockKernelInfo(infoBlock); program->allocateBlockPrivateSurfaces(pDevice->getRootDeviceIndex()); GraphicsAllocation *privateSurface = program->getBlockKernelManager()->getPrivateSurface(0); EXPECT_NE(nullptr, privateSurface); program->allocateBlockPrivateSurfaces(pDevice->getRootDeviceIndex()); GraphicsAllocation *privateSurface2 = program->getBlockKernelManager()->getPrivateSurface(0); EXPECT_EQ(privateSurface, privateSurface2); delete privateSurfaceBlock; delete program; } TEST_F(ProgramTests, givenProgramWithBlockKernelsWhenfreeBlockResourcesisCalledThenFreeGraphhicsAllocationsFromBlockKernelManagerIsCalled) { MockProgram *program = new MockProgram(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); uint32_t crossThreadOffsetBlock = 0; KernelInfo *infoBlock = new KernelInfo; SPatchAllocateStatelessPrivateSurface *privateSurfaceBlock = new SPatchAllocateStatelessPrivateSurface; privateSurfaceBlock->DataParamOffset = crossThreadOffsetBlock; privateSurfaceBlock->DataParamSize = 8; privateSurfaceBlock->Size = 8; privateSurfaceBlock->SurfaceStateHeapOffset = 0; privateSurfaceBlock->Token = 0; privateSurfaceBlock->PerThreadPrivateMemorySize = 1000; infoBlock->patchInfo.pAllocateStatelessPrivateSurface = privateSurfaceBlock; program->blockKernelManager->addBlockKernelInfo(infoBlock); GraphicsAllocation *privateSurface = program->getDevice().getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); EXPECT_NE(nullptr, privateSurface); program->getBlockKernelManager()->pushPrivateSurface(privateSurface, 0); program->freeBlockResources(); delete privateSurfaceBlock; delete program; } class Program32BitTests : public ProgramTests { public: void SetUp() override { DebugManager.flags.Force32bitAddressing.set(true); ProgramTests::SetUp(); } void TearDown() override { ProgramTests::TearDown(); DebugManager.flags.Force32bitAddressing.set(false); } }; TEST_F(Program32BitTests, givenDeviceWithForce32BitAddressingOnWhenBuiltinIsCreatedThenNoFlagsArePassedAsInternalOptions) { MockProgram program(*pDevice->getExecutionEnvironment()); auto &internalOptions = program.getInternalOptions(); EXPECT_THAT(internalOptions, testing::HasSubstr(std::string(""))); } TEST_F(Program32BitTests, givenDeviceWithForce32BitAddressingOnWhenProgramIsCreatedThen32bitFlagIsPassedAsInternalOption) { MockProgram program(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); auto &internalOptions = program.getInternalOptions(); std::string s1 = internalOptions; size_t pos = s1.find(NEO::CompilerOptions::arch32bit); if (is64bit) { EXPECT_NE(pos, std::string::npos); } else { EXPECT_EQ(pos, std::string::npos); } } TEST_F(ProgramTests, givenNewProgramTheStatelessToStatefulBufferOffsetOtimizationIsMatchingThePlatformEnablingStatus) { MockProgram prog(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); auto &internalOpts = prog.getInternalOptions(); HardwareCapabilities hwCaps = {0}; HwHelper::get(prog.getDevice().getHardwareInfo().platform.eRenderCoreFamily).setupHardwareCapabilities(&hwCaps, prog.getDevice().getHardwareInfo()); if (hwCaps.isStatelesToStatefullWithOffsetSupported) { EXPECT_TRUE(CompilerOptions::contains(internalOpts, CompilerOptions::hasBufferOffsetArg)); } else { EXPECT_FALSE(CompilerOptions::contains(internalOpts, CompilerOptions::hasBufferOffsetArg)); } } template struct CreateProgramFromBinaryMock : public MockProgram { CreateProgramFromBinaryMock(ExecutionEnvironment &executionEnvironment, Context *context, bool isBuiltIn, Device *device) : MockProgram(executionEnvironment, context, isBuiltIn, nullptr) { } cl_int createProgramFromBinary(const void *pBinary, size_t binarySize) override { this->irBinary.reset(new char[binarySize]); this->irBinarySize = binarySize; this->isSpirV = spirv; memcpy_s(this->irBinary.get(), binarySize, pBinary, binarySize); return ErrCodeToReturn; } }; TEST_F(ProgramTests, GivenFailedBinaryWhenCreatingFromIlThenInvalidBinaryErrorIsReturned) { const uint32_t notSpirv[16] = {0xDEADBEEF}; cl_int retVal = CL_SUCCESS; auto prog = Program::createFromIL>(pContext, reinterpret_cast(notSpirv), sizeof(notSpirv), retVal); EXPECT_EQ(nullptr, prog); EXPECT_EQ(CL_INVALID_BINARY, retVal); } TEST_F(ProgramTests, GivenSuccessfullyBuiltBinaryWhenCreatingFromIlThenValidProgramIsReturned) { const uint32_t spirv[16] = {0x03022307}; cl_int retVal = CL_SUCCESS; auto prog = Program::createFromIL>(pContext, reinterpret_cast(spirv), sizeof(spirv), retVal); ASSERT_NE(nullptr, prog); EXPECT_EQ(CL_SUCCESS, retVal); prog->release(); } TEST_F(ProgramTests, givenProgramCreatedFromILWhenCompileIsCalledThenReuseTheILInsteadOfCallingCompilerInterface) { const uint32_t spirv[16] = {0x03022307}; cl_int errCode = 0; auto prog = Program::createFromIL(pContext, reinterpret_cast(spirv), sizeof(spirv), errCode); ASSERT_NE(nullptr, prog); cl_device_id deviceId = pClDevice; auto debugVars = NEO::getIgcDebugVars(); debugVars.forceBuildFailure = true; gEnvironment->fclPushDebugVars(debugVars); auto compilerErr = prog->compile(1, &deviceId, nullptr, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, compilerErr); gEnvironment->fclPopDebugVars(); prog->release(); } TEST_F(ProgramTests, givenProgramCreatedFromIntermediateBinaryRepresentationWhenCompileIsCalledThenReuseTheILInsteadOfCallingCompilerInterface) { const uint32_t spirv[16] = {0x03022307}; cl_int errCode = 0; cl_device_id deviceId = pClDevice; cl_context ctx = pContext; size_t lengths = sizeof(spirv); const unsigned char *binaries[1] = {reinterpret_cast(spirv)}; auto prog = Program::create(ctx, 1U, &deviceId, &lengths, binaries, nullptr, errCode); ASSERT_NE(nullptr, prog); auto debugVars = NEO::getIgcDebugVars(); debugVars.forceBuildFailure = true; gEnvironment->fclPushDebugVars(debugVars); auto compilerErr = prog->compile(1, &deviceId, nullptr, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, compilerErr); gEnvironment->fclPopDebugVars(); prog->release(); } TEST_F(ProgramTests, GivenIlIsNullptrWhenCreatingFromIlThenInvalidBinaryErrorIsReturned) { cl_int retVal = CL_SUCCESS; auto prog = Program::createFromIL>(pContext, nullptr, 16, retVal); EXPECT_EQ(nullptr, prog); EXPECT_EQ(CL_INVALID_BINARY, retVal); } TEST_F(ProgramTests, GivenIlSizeZeroWhenCreatingFromIlThenInvalidBinaryErrorIsReturned) { const uint32_t spirv[16] = {0x03022307}; cl_int retVal = CL_SUCCESS; auto prog = Program::createFromIL>(pContext, reinterpret_cast(spirv), 0, retVal); EXPECT_EQ(nullptr, prog); EXPECT_EQ(CL_INVALID_BINARY, retVal); } TEST_F(ProgramTests, WhenCreatingFromIlThenIsSpirvIsSetCorrectly) { const uint32_t spirv[16] = {0x03022307}; cl_int retVal = CL_SUCCESS; auto prog = Program::createFromIL(pContext, reinterpret_cast(spirv), sizeof(spirv), retVal); EXPECT_NE(nullptr, prog); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(prog->getIsSpirV()); prog->release(); const char llvmBc[16] = {'B', 'C', '\xc0', '\xde'}; prog = Program::createFromIL(pContext, reinterpret_cast(llvmBc), sizeof(llvmBc), retVal); EXPECT_NE(nullptr, prog); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(prog->getIsSpirV()); prog->release(); } static const uint8_t llvmBinary[] = "BC\xc0\xde "; TEST(isValidLlvmBinary, whenLlvmMagicWasFoundThenBinaryIsValidLLvm) { EXPECT_TRUE(NEO::isLlvmBitcode(llvmBinary)); } TEST(isValidLlvmBinary, whenBinaryIsNullptrThenBinaryIsNotValidLLvm) { EXPECT_FALSE(NEO::isLlvmBitcode(ArrayRef())); } TEST(isValidLlvmBinary, whenBinaryIsShorterThanLllvMagicThenBinaryIsNotValidLLvm) { EXPECT_FALSE(NEO::isLlvmBitcode(ArrayRef(llvmBinary, 2))); } TEST(isValidLlvmBinary, whenBinaryDoesNotContainLllvMagicThenBinaryIsNotValidLLvm) { const uint8_t notLlvmBinary[] = "ABCDEFGHIJKLMNO"; EXPECT_FALSE(NEO::isLlvmBitcode(notLlvmBinary)); } const uint32_t spirv[16] = {0x03022307}; const uint32_t spirvInvEndianes[16] = {0x07230203}; TEST(isValidSpirvBinary, whenSpirvMagicWasFoundThenBinaryIsValidSpirv) { EXPECT_TRUE(NEO::isSpirVBitcode(ArrayRef(reinterpret_cast(&spirv), sizeof(spirv)))); EXPECT_TRUE(NEO::isSpirVBitcode(ArrayRef(reinterpret_cast(&spirvInvEndianes), sizeof(spirvInvEndianes)))); } TEST(isValidSpirvBinary, whenBinaryIsNullptrThenBinaryIsNotValidLLvm) { EXPECT_FALSE(NEO::isSpirVBitcode(ArrayRef())); } TEST(isValidSpirvBinary, whenBinaryIsShorterThanLllvMagicThenBinaryIsNotValidLLvm) { EXPECT_FALSE(NEO::isSpirVBitcode(ArrayRef(reinterpret_cast(&spirvInvEndianes), 2))); } TEST(isValidSpirvBinary, whenBinaryDoesNotContainLllvMagicThenBinaryIsNotValidLLvm) { const uint8_t notSpirvBinary[] = "ABCDEFGHIJKLMNO"; EXPECT_FALSE(NEO::isSpirVBitcode(notSpirvBinary)); } TEST_F(ProgramTests, WhenLinkingTwoValidSpirvProgramsThenValidProgramIsReturned) { const uint32_t spirv[16] = {0x03022307}; cl_int errCode = CL_SUCCESS; auto node1 = Program::createFromIL>(pContext, reinterpret_cast(spirv), sizeof(spirv), errCode); ASSERT_NE(nullptr, node1); EXPECT_EQ(CL_SUCCESS, errCode); auto node2 = Program::createFromIL>(pContext, reinterpret_cast(spirv), sizeof(spirv), errCode); ASSERT_NE(nullptr, node2); EXPECT_EQ(CL_SUCCESS, errCode); auto prog = Program::createFromIL>(pContext, reinterpret_cast(spirv), sizeof(spirv), errCode); ASSERT_NE(nullptr, prog); EXPECT_EQ(CL_SUCCESS, errCode); cl_program linkNodes[] = {node1, node2}; errCode = prog->link(0, nullptr, nullptr, 2, linkNodes, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, errCode); prog->release(); node2->release(); node1->release(); } TEST_F(ProgramTests, givenSeparateBlockKernelsWhenNoParentAndSubgroupKernelsThenSeparateNoneKernel) { MockProgram program(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); EXPECT_EQ(0u, program.getKernelInfoArray().size()); EXPECT_EQ(0u, program.getParentKernelInfoArray().size()); EXPECT_EQ(0u, program.getSubgroupKernelInfoArray().size()); program.separateBlockKernels(); EXPECT_EQ(0u, program.getKernelInfoArray().size()); EXPECT_EQ(0u, program.getBlockKernelManager()->getCount()); } TEST_F(ProgramTests, givenSeparateBlockKernelsWhenRegularKernelsThenSeparateNoneKernel) { MockProgram program(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); auto pRegularKernel1Info = new KernelInfo(); pRegularKernel1Info->name = "regular_kernel_1"; program.getKernelInfoArray().push_back(pRegularKernel1Info); auto pRegularKernel2Info = new KernelInfo(); pRegularKernel2Info->name = "regular_kernel_2"; program.getKernelInfoArray().push_back(pRegularKernel2Info); EXPECT_EQ(2u, program.getKernelInfoArray().size()); program.separateBlockKernels(); EXPECT_EQ(2u, program.getKernelInfoArray().size()); EXPECT_EQ(0, strcmp("regular_kernel_1", program.getKernelInfoArray().at(0)->name.c_str())); EXPECT_EQ(0, strcmp("regular_kernel_2", program.getKernelInfoArray().at(1)->name.c_str())); EXPECT_EQ(0u, program.getBlockKernelManager()->getCount()); } TEST_F(ProgramTests, givenSeparateBlockKernelsWhenChildLikeKernelWithoutParentKernelThenSeparateNoneKernel) { MockProgram program(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); auto pParentKernelInfo = new KernelInfo(); pParentKernelInfo->name = "another_parent_kernel"; program.getKernelInfoArray().push_back(pParentKernelInfo); program.getParentKernelInfoArray().push_back(pParentKernelInfo); auto pChildKernelInfo = new KernelInfo(); pChildKernelInfo->name = "childlike_kernel_dispatch_0"; program.getKernelInfoArray().push_back(pChildKernelInfo); EXPECT_EQ(2u, program.getKernelInfoArray().size()); EXPECT_EQ(1u, program.getParentKernelInfoArray().size()); program.separateBlockKernels(); EXPECT_EQ(2u, program.getKernelInfoArray().size()); EXPECT_EQ(0, strcmp("another_parent_kernel", program.getKernelInfoArray().at(0)->name.c_str())); EXPECT_EQ(0, strcmp("childlike_kernel_dispatch_0", program.getKernelInfoArray().at(1)->name.c_str())); EXPECT_EQ(0u, program.getBlockKernelManager()->getCount()); } TEST_F(ProgramTests, givenSeparateBlockKernelsWhenChildLikeKernelWithoutSubgroupKernelThenSeparateNoneKernel) { MockProgram program(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); auto pSubgroupKernelInfo = new KernelInfo(); pSubgroupKernelInfo->name = "another_subgroup_kernel"; program.getKernelInfoArray().push_back(pSubgroupKernelInfo); program.getSubgroupKernelInfoArray().push_back(pSubgroupKernelInfo); auto pChildKernelInfo = new KernelInfo(); pChildKernelInfo->name = "childlike_kernel_dispatch_0"; program.getKernelInfoArray().push_back(pChildKernelInfo); EXPECT_EQ(2u, program.getKernelInfoArray().size()); EXPECT_EQ(1u, program.getSubgroupKernelInfoArray().size()); program.separateBlockKernels(); EXPECT_EQ(2u, program.getKernelInfoArray().size()); EXPECT_EQ(0, strcmp("another_subgroup_kernel", program.getKernelInfoArray().at(0)->name.c_str())); EXPECT_EQ(0, strcmp("childlike_kernel_dispatch_0", program.getKernelInfoArray().at(1)->name.c_str())); EXPECT_EQ(0u, program.getBlockKernelManager()->getCount()); } TEST_F(ProgramTests, givenSeparateBlockKernelsWhenParentKernelWithChildKernelThenSeparateChildKernel) { MockProgram program(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); auto pParentKernelInfo = new KernelInfo(); pParentKernelInfo->name = "parent_kernel"; program.getKernelInfoArray().push_back(pParentKernelInfo); program.getParentKernelInfoArray().push_back(pParentKernelInfo); auto pChildKernelInfo = new KernelInfo(); pChildKernelInfo->name = "parent_kernel_dispatch_0"; program.getKernelInfoArray().push_back(pChildKernelInfo); EXPECT_EQ(2u, program.getKernelInfoArray().size()); EXPECT_EQ(1u, program.getParentKernelInfoArray().size()); program.separateBlockKernels(); EXPECT_EQ(1u, program.getKernelInfoArray().size()); EXPECT_EQ(0, strcmp("parent_kernel", program.getKernelInfoArray().at(0)->name.c_str())); EXPECT_EQ(1u, program.getBlockKernelManager()->getCount()); EXPECT_EQ(0, strcmp("parent_kernel_dispatch_0", program.getBlockKernelManager()->getBlockKernelInfo(0)->name.c_str())); } TEST_F(ProgramTests, givenSeparateBlockKernelsWhenSubgroupKernelWithChildKernelThenSeparateChildKernel) { MockProgram program(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); auto pSubgroupKernelInfo = new KernelInfo(); pSubgroupKernelInfo->name = "subgroup_kernel"; program.getKernelInfoArray().push_back(pSubgroupKernelInfo); program.getSubgroupKernelInfoArray().push_back(pSubgroupKernelInfo); auto pChildKernelInfo = new KernelInfo(); pChildKernelInfo->name = "subgroup_kernel_dispatch_0"; program.getKernelInfoArray().push_back(pChildKernelInfo); EXPECT_EQ(2u, program.getKernelInfoArray().size()); EXPECT_EQ(1u, program.getSubgroupKernelInfoArray().size()); program.separateBlockKernels(); EXPECT_EQ(1u, program.getKernelInfoArray().size()); EXPECT_EQ(0, strcmp("subgroup_kernel", program.getKernelInfoArray().at(0)->name.c_str())); EXPECT_EQ(1u, program.getBlockKernelManager()->getCount()); EXPECT_EQ(0, strcmp("subgroup_kernel_dispatch_0", program.getBlockKernelManager()->getBlockKernelInfo(0)->name.c_str())); } TEST(SimpleProgramTests, givenDefaultProgramWhenSetDeviceIsCalledThenDeviceIsSet) { ExecutionEnvironment executionEnvironment; MockProgram program(executionEnvironment); EXPECT_EQ(nullptr, program.getDevicePtr()); auto dummyDevice = (Device *)0x1337; program.SetDevice(dummyDevice); EXPECT_EQ(dummyDevice, program.getDevicePtr()); program.SetDevice(nullptr); EXPECT_EQ(nullptr, program.getDevicePtr()); } TEST(ProgramDestructionTests, givenProgramUsingDeviceWhenItIsDestroyedAfterPlatfromCleanupThenItIsCleanedUpProperly) { initPlatform(); auto device = platform()->getClDevice(0); MockContext *context = new MockContext(device, false); MockProgram *pProgram = new MockProgram(*device->getExecutionEnvironment(), context, false, &device->getDevice()); auto globalAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); pProgram->setGlobalSurface(globalAllocation); platformsImpl.clear(); EXPECT_EQ(1, device->getRefInternalCount()); EXPECT_EQ(1, pProgram->getRefInternalCount()); context->decRefInternal(); pProgram->decRefInternal(); } TEST_F(ProgramTests, givenProgramWithSpirvWhenRebuildProgramIsCalledThenSpirvPathIsTaken) { auto device = castToObject(pContext->getDevice(0)); auto compilerInterface = new MockCompilerInterface(); auto compilerMain = new MockCIFMain(); compilerInterface->SetFclMain(compilerMain); compilerMain->Retain(); compilerInterface->SetIgcMain(compilerMain); compilerMain->setDefaultCreatorFunc(NEO::MockIgcOclDeviceCtx::Create); compilerMain->setDefaultCreatorFunc(NEO::MockFclOclDeviceCtx::Create); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->compilerInterface.reset(compilerInterface); std::string receivedInput; MockCompilerDebugVars debugVars = {}; debugVars.receivedInput = &receivedInput; debugVars.forceBuildFailure = true; gEnvironment->igcPushDebugVars(debugVars); std::unique_ptr igcDebugVarsAutoPop{&gEnvironment, [](void *) { gEnvironment->igcPopDebugVars(); }}; auto program = clUniquePtr(new MockProgram(*pDevice->getExecutionEnvironment())); program->setDevice(&device->getDevice()); uint32_t spirv[16] = {0x03022307, 0x23471113, 0x17192329}; program->irBinary = makeCopy(spirv, sizeof(spirv)); program->irBinarySize = sizeof(spirv); program->isSpirV = true; auto buildRet = program->rebuildProgramFromIr(); EXPECT_NE(CL_SUCCESS, buildRet); ASSERT_EQ(sizeof(spirv), receivedInput.size()); EXPECT_EQ(0, memcmp(spirv, receivedInput.c_str(), receivedInput.size())); ASSERT_EQ(1U, compilerInterface->requestedTranslationCtxs.size()); EXPECT_EQ(IGC::CodeType::spirV, compilerInterface->requestedTranslationCtxs[0].first); EXPECT_EQ(IGC::CodeType::oclGenBin, compilerInterface->requestedTranslationCtxs[0].second); } TEST_F(ProgramTests, whenRebuildingProgramThenStoreDeviceBinaryProperly) { auto device = castToObject(pContext->getDevice(0)); auto compilerInterface = new MockCompilerInterface(); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->compilerInterface.reset(compilerInterface); auto compilerMain = new MockCIFMain(); compilerInterface->SetIgcMain(compilerMain); compilerMain->setDefaultCreatorFunc(NEO::MockIgcOclDeviceCtx::Create); MockCompilerDebugVars debugVars = {}; char binaryToReturn[] = "abcdfghijklmnop"; debugVars.binaryToReturn = binaryToReturn; debugVars.binaryToReturnSize = sizeof(binaryToReturn); gEnvironment->igcPushDebugVars(debugVars); std::unique_ptr igcDebugVarsAutoPop{&gEnvironment, [](void *) { gEnvironment->igcPopDebugVars(); }}; auto program = clUniquePtr(new MockProgram(*pDevice->getExecutionEnvironment())); program->setDevice(&device->getDevice()); uint32_t ir[16] = {0x03022307, 0x23471113, 0x17192329}; program->irBinary = makeCopy(ir, sizeof(ir)); program->irBinarySize = sizeof(ir); EXPECT_EQ(nullptr, program->unpackedDeviceBinary); EXPECT_EQ(0U, program->unpackedDeviceBinarySize); program->rebuildProgramFromIr(); ASSERT_NE(nullptr, program->unpackedDeviceBinary); ASSERT_EQ(sizeof(binaryToReturn), program->unpackedDeviceBinarySize); EXPECT_EQ(0, memcmp(binaryToReturn, program->unpackedDeviceBinary.get(), program->unpackedDeviceBinarySize)); } TEST_F(ProgramTests, givenProgramWhenInternalOptionsArePassedThenTheyAreAddedToProgramInternalOptions) { ExecutionEnvironment executionEnvironment; MockProgram program(executionEnvironment); program.getInternalOptions().erase(); EXPECT_EQ(nullptr, program.getDevicePtr()); std::string buildOptions = NEO::CompilerOptions::gtpinRera.str(); program.extractInternalOptions(buildOptions); EXPECT_STREQ(program.getInternalOptions().c_str(), NEO::CompilerOptions::gtpinRera.data()); } TEST_F(ProgramTests, givenProgramWhenUnknownInternalOptionsArePassedThenTheyAreNotAddedToProgramInternalOptions) { ExecutionEnvironment executionEnvironment; MockProgram program(executionEnvironment); program.getInternalOptions().erase(); EXPECT_EQ(nullptr, program.getDevicePtr()); const char *internalOption = "-unknown-internal-options-123"; std::string buildOptions(internalOption); program.extractInternalOptions(buildOptions); EXPECT_EQ(0u, program.getInternalOptions().length()); } TEST_F(ProgramTests, givenProgramWhenAllInternalOptionsArePassedMixedWithUnknownInputThenTheyAreParsedCorrectly) { ExecutionEnvironment executionEnvironment; MockProgram program(executionEnvironment); program.getInternalOptions().erase(); EXPECT_EQ(nullptr, program.getDevicePtr()); std::string buildOptions = CompilerOptions::concatenate("###", CompilerOptions::gtpinRera, "###", CompilerOptions::greaterThan4gbBuffersRequired, "###"); std::string expectedOutput = CompilerOptions::concatenate(CompilerOptions::gtpinRera, CompilerOptions::greaterThan4gbBuffersRequired); program.extractInternalOptions(buildOptions); EXPECT_EQ(expectedOutput, program.getInternalOptions()); } TEST_F(ProgramTests, givenProgramWhenInternalOptionsArePassedWithValidValuesThenTheyAreAddedToProgramInternalOptions) { ExecutionEnvironment executionEnvironment; MockProgram program(executionEnvironment); program.getInternalOptions().erase(); EXPECT_EQ(nullptr, program.getDevicePtr()); program.isFlagOptionOverride = false; program.isOptionValueValidOverride = true; std::string buildOptions = CompilerOptions::concatenate(CompilerOptions::gtpinRera, "someValue"); program.extractInternalOptions(buildOptions); EXPECT_EQ(buildOptions, program.getInternalOptions()) << program.getInternalOptions(); } TEST_F(ProgramTests, givenProgramWhenInternalOptionsArePassedWithInvalidValuesThenTheyAreNotAddedToProgramInternalOptions) { ExecutionEnvironment executionEnvironment; MockProgram program(executionEnvironment); EXPECT_EQ(nullptr, program.getDevicePtr()); program.isFlagOptionOverride = false; std::string buildOptions = CompilerOptions::concatenate(CompilerOptions::gtpinRera, "someValue"); std::string expectedOutput = ""; program.getInternalOptions().erase(); program.extractInternalOptions(buildOptions); EXPECT_EQ(expectedOutput, program.getInternalOptions()); program.isOptionValueValidOverride = true; buildOptions = CompilerOptions::gtpinRera; program.getInternalOptions().erase(); program.extractInternalOptions(buildOptions); EXPECT_EQ(expectedOutput, program.getInternalOptions()); } TEST_F(ProgramTests, givenProgramWhenGetSymbolsIsCalledThenMapWithExportedSymbolsIsReturned) { ExecutionEnvironment executionEnvironment; MockProgram program(executionEnvironment); EXPECT_EQ(&program.symbols, &program.getSymbols()); } class AdditionalOptionsMockProgram : public MockProgram { public: AdditionalOptionsMockProgram() : MockProgram(executionEnvironment) {} void applyAdditionalOptions() override { applyAdditionalOptionsCalled++; MockProgram::applyAdditionalOptions(); } uint32_t applyAdditionalOptionsCalled = 0; ExecutionEnvironment executionEnvironment; }; TEST_F(ProgramTests, givenProgramWhenBuiltThenAdditionalOptionsAreApplied) { AdditionalOptionsMockProgram program; program.setDevice(pDevice); cl_device_id device = pClDevice; program.build(1, &device, nullptr, nullptr, nullptr, false); EXPECT_EQ(1u, program.applyAdditionalOptionsCalled); } TEST_F(ProgramTests, WhenProgramIsCreatedThenItsDeviceIsProperlySet) { auto wasValidClDeviceUsed = [](MockProgram &program) -> bool { return (program.getInternalOptions().find(CompilerOptions::arch32bit) != std::string::npos); }; MockExecutionEnvironment executionEnvironment; MockDevice mockDevice; mockDevice.deviceInfo.force32BitAddressess = true; auto pContextMockDevice = new MockDevice; MockClDevice contextMockClDevice{pContextMockDevice}; MockContext mockContext{&contextMockClDevice}; MockProgram programWithDeviceGiven{executionEnvironment, &mockContext, false, &mockDevice}; EXPECT_EQ(&mockDevice, programWithDeviceGiven.pDevice); MockProgram programWithDeviceFromContext{executionEnvironment, &mockContext, false, nullptr}; EXPECT_EQ(pContextMockDevice, programWithDeviceFromContext.pDevice); MockProgram programWithDeviceWithoutSpecializedDevice{executionEnvironment, nullptr, false, &mockDevice}; EXPECT_FALSE(wasValidClDeviceUsed(programWithDeviceWithoutSpecializedDevice)); MockDevice invalidClDevice; mockDevice.setSpecializedDevice(&invalidClDevice); MockProgram programWithDeviceWithInvalidSpecializedDevice{executionEnvironment, nullptr, false, &mockDevice}; EXPECT_FALSE(wasValidClDeviceUsed(programWithDeviceWithInvalidSpecializedDevice)); MockClDevice validClDevice{new MockDevice}; validClDevice.sharedDeviceInfo.force32BitAddressess = true; MockProgram programWithDeviceWithValidSpecializedDevice{executionEnvironment, nullptr, false, &validClDevice.getDevice()}; EXPECT_TRUE(wasValidClDeviceUsed(programWithDeviceWithValidSpecializedDevice)); } TEST(CreateProgramFromBinaryTests, givenBinaryProgramWhenKernelRebulildIsForcedThenDeviceBinaryIsNotUsed) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.RebuildPrecompiledKernels.set(true); cl_int retVal = CL_INVALID_BINARY; PatchTokensTestData::ValidEmptyProgram programTokens; auto clDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); std::unique_ptr pProgram(Program::createFromGenBinary(*clDevice->getExecutionEnvironment(), nullptr, programTokens.storage.data(), programTokens.storage.size(), false, &retVal, &clDevice->getDevice())); pProgram->pDevice = &clDevice->getDevice(); ASSERT_NE(nullptr, pProgram.get()); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pProgram->createProgramFromBinary(programTokens.storage.data(), programTokens.storage.size()); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, pProgram->unpackedDeviceBinary.get()); EXPECT_EQ(0U, pProgram->unpackedDeviceBinarySize); EXPECT_EQ(nullptr, pProgram->packedDeviceBinary); EXPECT_EQ(0U, pProgram->packedDeviceBinarySize); } TEST(CreateProgramFromBinaryTests, givenBinaryProgramWhenKernelRebulildIsNotForcedThenDeviceBinaryIsUsed) { cl_int retVal = CL_INVALID_BINARY; PatchTokensTestData::ValidEmptyProgram programTokens; auto clDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); std::unique_ptr pProgram(Program::createFromGenBinary(*clDevice->getExecutionEnvironment(), nullptr, programTokens.storage.data(), programTokens.storage.size(), false, &retVal, &clDevice->getDevice())); pProgram->pDevice = &clDevice->getDevice(); ASSERT_NE(nullptr, pProgram.get()); EXPECT_EQ(CL_SUCCESS, retVal); retVal = pProgram->createProgramFromBinary(programTokens.storage.data(), programTokens.storage.size()); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, reinterpret_cast(pProgram->unpackedDeviceBinary.get())); EXPECT_EQ(programTokens.storage.size(), pProgram->unpackedDeviceBinarySize); EXPECT_NE(nullptr, reinterpret_cast(pProgram->packedDeviceBinary.get())); EXPECT_EQ(programTokens.storage.size(), pProgram->packedDeviceBinarySize); } struct SpecializationConstantProgramMock : public MockProgram { using MockProgram::MockProgram; cl_int updateSpecializationConstant(cl_uint specId, size_t specSize, const void *specValue) override { return CL_SUCCESS; } }; struct SpecializationConstantCompilerInterfaceMock : public CompilerInterface { TranslationOutput::ErrorCode retVal = TranslationOutput::ErrorCode::Success; int counter = 0; const char *spirV = nullptr; TranslationOutput::ErrorCode getSpecConstantsInfo(const NEO::Device &device, ArrayRef srcSpirV, SpecConstantInfo &output) override { counter++; spirV = srcSpirV.begin(); return retVal; } void returnError() { retVal = TranslationOutput::ErrorCode::CompilationFailure; } }; struct SpecializationConstantRootDeviceEnvironemnt : public RootDeviceEnvironment { SpecializationConstantRootDeviceEnvironemnt(ExecutionEnvironment &executionEnvironment) : RootDeviceEnvironment(executionEnvironment) { compilerInterface.reset(new SpecializationConstantCompilerInterfaceMock()); } CompilerInterface *getCompilerInterface() override { return compilerInterface.get(); } }; struct setProgramSpecializationConstantTests : public ::testing::Test { void SetUp() override { mockCompiler = new SpecializationConstantCompilerInterfaceMock(); auto rootDeviceEnvironment = device.getExecutionEnvironment()->rootDeviceEnvironments[0].get(); rootDeviceEnvironment->compilerInterface.reset(mockCompiler); mockProgram.reset(new SpecializationConstantProgramMock(*device.getExecutionEnvironment())); mockProgram->isSpirV = true; mockProgram->setDevice(&device); EXPECT_FALSE(mockProgram->areSpecializationConstantsInitialized); EXPECT_EQ(0, mockCompiler->counter); } SpecializationConstantCompilerInterfaceMock *mockCompiler = nullptr; std::unique_ptr mockProgram; MockDevice device; int specValue = 1; }; TEST_F(setProgramSpecializationConstantTests, whenSetProgramSpecializationConstantThenBinarySourceIsUsed) { auto retVal = mockProgram->setProgramSpecializationConstant(1, sizeof(int), &specValue); EXPECT_EQ(1, mockCompiler->counter); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(mockProgram->areSpecializationConstantsInitialized); EXPECT_EQ(mockProgram->irBinary.get(), mockCompiler->spirV); } TEST_F(setProgramSpecializationConstantTests, whenSetProgramSpecializationConstantMultipleTimesThenSpecializationConstantsAreInitializedOnce) { auto retVal = mockProgram->setProgramSpecializationConstant(1, sizeof(int), &specValue); EXPECT_EQ(1, mockCompiler->counter); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(mockProgram->areSpecializationConstantsInitialized); retVal = mockProgram->setProgramSpecializationConstant(1, sizeof(int), &specValue); EXPECT_EQ(1, mockCompiler->counter); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(mockProgram->areSpecializationConstantsInitialized); } TEST_F(setProgramSpecializationConstantTests, givenInvalidGetSpecConstantsInfoReturnValueWhenSetProgramSpecializationConstantThenErrorIsReturned) { mockCompiler->returnError(); auto retVal = mockProgram->setProgramSpecializationConstant(1, sizeof(int), &specValue); EXPECT_EQ(1, mockCompiler->counter); EXPECT_EQ(CL_INVALID_VALUE, retVal); EXPECT_FALSE(mockProgram->areSpecializationConstantsInitialized); } TEST(setProgramSpecializationConstantTest, givenUninitializedCompilerinterfaceWhenSetProgramSpecializationConstantThenErrorIsReturned) { auto executionEnvironment = new MockExecutionEnvironment(); executionEnvironment->rootDeviceEnvironments[0] = std::make_unique(*executionEnvironment); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); MockDevice mockDevice(executionEnvironment, 0); SpecializationConstantProgramMock mockProgram(*executionEnvironment); mockProgram.setDevice(&mockDevice); mockProgram.isSpirV = true; int specValue = 1; auto retVal = mockProgram.setProgramSpecializationConstant(1, sizeof(int), &specValue); EXPECT_EQ(CL_OUT_OF_HOST_MEMORY, retVal); } using ProgramBinTest = Test; TEST_F(ProgramBinTest, givenPrintProgramBinaryProcessingTimeSetWhenBuildProgramThenProcessingTimeIsPrinted) { DebugManagerStateRestore restorer; DebugManager.flags.PrintProgramBinaryProcessingTime.set(true); testing::internal::CaptureStdout(); cl_device_id device = pClDevice; CreateProgramFromBinary(pContext, &device, "kernel_data_param"); auto retVal = pProgram->build( 1, &device, nullptr, nullptr, nullptr, false); auto output = testing::internal::GetCapturedStdout(); EXPECT_FALSE(output.compare(0, 14, "Elapsed time: ")); EXPECT_EQ(CL_SUCCESS, retVal); } struct DebugDataGuard { DebugDataGuard(const DebugDataGuard &) = delete; DebugDataGuard(DebugDataGuard &&) = delete; DebugDataGuard() { for (size_t n = 0; n < sizeof(mockDebugData); n++) { mockDebugData[n] = (char)n; } auto vars = NEO::getIgcDebugVars(); vars.debugDataToReturn = mockDebugData; vars.debugDataToReturnSize = sizeof(mockDebugData); NEO::setIgcDebugVars(vars); } ~DebugDataGuard() { auto vars = NEO::getIgcDebugVars(); vars.debugDataToReturn = nullptr; vars.debugDataToReturnSize = 0; NEO::setIgcDebugVars(vars); } char mockDebugData[32]; }; TEST_F(ProgramBinTest, GivenBuildWithDebugDataThenBuildDataAvailableViaGetInfo) { DebugDataGuard debugDataGuard; cl_device_id device = pClDevice; const char *sourceCode = "__kernel void\nCB(\n__global unsigned int* src, __global unsigned int* dst)\n{\nint id = (int)get_global_id(0);\ndst[id] = src[id];\n}\n"; pProgram = Program::create( pContext, 1, &sourceCode, &knownSourceSize, retVal); retVal = pProgram->build(1, &device, nullptr, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); // Verify size_t debugDataSize = 0; retVal = pProgram->getInfo(CL_PROGRAM_DEBUG_INFO_SIZES_INTEL, sizeof(debugDataSize), &debugDataSize, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); std::unique_ptr debugData{new char[debugDataSize]}; for (size_t n = 0; n < sizeof(debugData); n++) { debugData[n] = 0; } char *pDebugData = &debugData[0]; size_t retData = 0; bool isOK = true; retVal = pProgram->getInfo(CL_PROGRAM_DEBUG_INFO_INTEL, 1, &pDebugData, &retData); EXPECT_EQ(CL_INVALID_VALUE, retVal); retVal = pProgram->getInfo(CL_PROGRAM_DEBUG_INFO_INTEL, debugDataSize, &pDebugData, &retData); EXPECT_EQ(CL_SUCCESS, retVal); cl_uint numDevices; retVal = clGetProgramInfo(pProgram, CL_PROGRAM_NUM_DEVICES, sizeof(numDevices), &numDevices, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(numDevices * sizeof(debugData), retData); // Check integrity of returned debug data for (size_t n = 0; n < debugDataSize; n++) { if (debugData[n] != (char)n) { isOK = false; break; } } EXPECT_TRUE(isOK); for (size_t n = debugDataSize; n < sizeof(debugData); n++) { if (debugData[n] != (char)0) { isOK = false; break; } } EXPECT_TRUE(isOK); retData = 0; retVal = pProgram->getInfo(CL_PROGRAM_DEBUG_INFO_INTEL, debugDataSize, nullptr, &retData); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(numDevices * sizeof(debugData), retData); } TEST_F(ProgramBinTest, GivenDebugDataAvailableWhenLinkingProgramThenDebugDataIsStoredInProgram) { DebugDataGuard debugDataGuard; cl_device_id device = pClDevice; const char *sourceCode = "__kernel void\nCB(\n__global unsigned int* src, __global unsigned int* dst)\n{\nint id = (int)get_global_id(0);\ndst[id] = src[id];\n}\n"; pProgram = Program::create( pContext, 1, &sourceCode, &knownSourceSize, retVal); retVal = pProgram->compile(1, &device, nullptr, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); cl_program programToLink = pProgram; retVal = pProgram->link(1, &device, nullptr, 1, &programToLink, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, pProgram->getDebugData()); } using ProgramMultiRootDeviceTests = MultiRootDeviceFixture; TEST_F(ProgramMultiRootDeviceTests, WhenPrivateSurfaceIsCreatedThenItHasCorrectRootDeviceIndex) { auto program = std::make_unique(*device->getExecutionEnvironment(), context.get(), false, &device->getDevice()); auto privateSurfaceBlock = std::make_unique(); privateSurfaceBlock->DataParamOffset = 0; privateSurfaceBlock->DataParamSize = 8; privateSurfaceBlock->Size = 8; privateSurfaceBlock->SurfaceStateHeapOffset = 0; privateSurfaceBlock->Token = 0; privateSurfaceBlock->PerThreadPrivateMemorySize = 1000; auto infoBlock = std::make_unique(); infoBlock->patchInfo.pAllocateStatelessPrivateSurface = privateSurfaceBlock.get(); program->blockKernelManager->addBlockKernelInfo(infoBlock.release()); program->allocateBlockPrivateSurfaces(device->getRootDeviceIndex()); auto privateSurface = program->getBlockKernelManager()->getPrivateSurface(0); EXPECT_NE(nullptr, privateSurface); EXPECT_EQ(expectedRootDeviceIndex, privateSurface->getRootDeviceIndex()); } class MockCompilerInterfaceWithGtpinParam : public CompilerInterface { public: TranslationOutput::ErrorCode link( const NEO::Device &device, const TranslationInput &input, TranslationOutput &output) override { gtpinInfoPassed = input.GTPinInput; return CompilerInterface::link(device, input, output); } void *gtpinInfoPassed; }; TEST_F(ProgramBinTest, GivenSourceKernelWhenLinkingProgramThenGtpinInitInfoIsPassed) { cl_device_id device = pClDevice; void *pIgcInitPtr = reinterpret_cast(0x1234); gtpinSetIgcInit(pIgcInitPtr); const char *sourceCode = "__kernel void\nCB(\n__global unsigned int* src, __global unsigned int* dst)\n{\nint id = (int)get_global_id(0);\ndst[id] = src[id];\n}\n"; pProgram = Program::create( pContext, 1, &sourceCode, &knownSourceSize, retVal); std::unique_ptr mockCompilerInterface(new MockCompilerInterfaceWithGtpinParam); retVal = pProgram->compile(1, &device, nullptr, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->compilerInterface.reset(mockCompilerInterface.get()); cl_program programToLink = pProgram; retVal = pProgram->link(1, &device, nullptr, 1, &programToLink, nullptr, nullptr); EXPECT_EQ(pIgcInitPtr, mockCompilerInterface->gtpinInfoPassed); mockCompilerInterface.release(); }compute-runtime-20.13.16352/opencl/test/unit_test/program/program_tests.h000066400000000000000000000013301363734646600263400ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "gtest/gtest.h" #include extern std::vector BinaryFileNames; extern std::vector SourceFileNames; extern std::vector BinaryForSourceFileNames; extern std::vector KernelNames; class ProgramTests : public NEO::DeviceFixture, public ::testing::Test, public NEO::ContextFixture { using NEO::ContextFixture::SetUp; public: void SetUp() override; void TearDown() override; }; compute-runtime-20.13.16352/opencl/test/unit_test/program/program_with_block_kernels_tests.cpp000066400000000000000000000111731363734646600326310ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/device/device.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/program/block_kernel_manager.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/fixtures/program_fixture.h" #include "opencl/test/unit_test/fixtures/run_kernel_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include namespace NEO { class ProgramWithBlockKernelsTest : public ContextFixture, public PlatformFixture, public ProgramFixture, public testing::Test { using ContextFixture::SetUp; using PlatformFixture::SetUp; protected: ProgramWithBlockKernelsTest() { } void SetUp() override { PlatformFixture::SetUp(); device = pPlatform->getClDevice(0); ContextFixture::SetUp(1, &device); ProgramFixture::SetUp(); } void TearDown() override { ProgramFixture::TearDown(); ContextFixture::TearDown(); PlatformFixture::TearDown(); } cl_device_id device; cl_int retVal = CL_SUCCESS; }; TEST_F(ProgramWithBlockKernelsTest, GivenKernelWithBlockKernelsWhenProgramIsBuildingThenKernelInfosHaveCorrectNames) { if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) { CreateProgramFromBinary(pContext, &device, "simple_block_kernel", "-cl-std=CL2.0"); auto mockProgram = (MockProgram *)pProgram; ASSERT_NE(nullptr, mockProgram); retVal = mockProgram->build( 1, &device, nullptr, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto kernelInfo = mockProgram->Program::getKernelInfo("simple_block_kernel"); EXPECT_NE(nullptr, kernelInfo); auto blockKernelInfo = mockProgram->Program::getKernelInfo("simple_block_kernel_dispatch_0"); EXPECT_EQ(nullptr, blockKernelInfo); std::vector blockKernelInfos(mockProgram->blockKernelManager->getCount()); for (size_t i = 0; i < mockProgram->blockKernelManager->getCount(); i++) { const KernelInfo *blockKernelInfo = mockProgram->blockKernelManager->getBlockKernelInfo(i); EXPECT_NE(nullptr, blockKernelInfo); blockKernelInfos[i] = blockKernelInfo; } bool blockKernelFound = false; for (size_t i = 0; i < mockProgram->blockKernelManager->getCount(); i++) { if (blockKernelInfos[i]->name.find("simple_block_kernel_dispatch") != std::string::npos) { blockKernelFound = true; break; } } EXPECT_TRUE(blockKernelFound); } else { EXPECT_EQ(nullptr, pProgram); } } TEST_F(ProgramWithBlockKernelsTest, GivenKernelWithBlockKernelsWhenProgramIsLinkedThenBlockKernelsAreSeparated) { if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.0") != std::string::npos) { CreateProgramFromBinary(pContext, &device, "simple_block_kernel", "-cl-std=CL2.0"); const char *buildOptions = "-cl-std=CL2.0"; overwriteBuiltInBinaryName( &pPlatform->getClDevice(0)->getDevice(), "simple_block_kernel", true); ASSERT_NE(nullptr, pProgram); EXPECT_EQ(CL_SUCCESS, retVal); Program *programLinked = new Program(*pPlatform->peekExecutionEnvironment(), pContext, false, nullptr); cl_program program = pProgram; retVal = pProgram->compile(1, &device, buildOptions, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = programLinked->link(1, &device, buildOptions, 1, &program, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); BlockKernelManager *blockManager = programLinked->getBlockKernelManager(); EXPECT_NE(0u, blockManager->getCount()); for (uint32_t i = 0; i < blockManager->getCount(); i++) { const KernelInfo *info = blockManager->getBlockKernelInfo(i); if (info->name.find("simple_block_kernel_dispatch") != std::string::npos) { break; } } restoreBuiltInBinaryName(nullptr); delete programLinked; } else { EXPECT_EQ(nullptr, pProgram); } } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/program/program_with_kernel_debug_tests.cpp000066400000000000000000000320571363734646600324460ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/program_fixture.h" #include "opencl/test/unit_test/global_environment.h" #include "opencl/test/unit_test/helpers/kernel_binary_helper.h" #include "opencl/test/unit_test/helpers/kernel_filename_helper.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/mocks/mock_source_level_debugger.h" #include "opencl/test/unit_test/program/program_from_binary.h" #include "opencl/test/unit_test/program/program_tests.h" #include "test.h" #include "compiler_options.h" #include "gmock/gmock.h" #include #include #include using namespace NEO; TEST_F(ProgramTests, givenDeafultProgramObjectWhenKernelDebugEnabledIsQueriedThenFalseIsReturned) { MockProgram program(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); EXPECT_FALSE(program.isKernelDebugEnabled()); } TEST_F(ProgramTests, givenProgramObjectWhenEnableKernelDebugIsCalledThenProgramHasKernelDebugEnabled) { MockProgram program(*pDevice->getExecutionEnvironment(), pContext, false, pDevice); program.enableKernelDebug(); EXPECT_TRUE(program.isKernelDebugEnabled()); } class ProgramWithKernelDebuggingTest : public ProgramSimpleFixture, public ::testing::Test { public: void SetUp() override { ProgramSimpleFixture::SetUp(); device = pClDevice; if (!pDevice->getHardwareInfo().capabilityTable.debuggerSupported) { GTEST_SKIP(); } std::string filename; std::string kernelOption(CompilerOptions::debugKernelEnable); KernelFilenameHelper::getKernelFilenameFromInternalOption(kernelOption, filename); kbHelper = std::make_unique(filename, false); CreateProgramWithSource( pContext, &device, "copybuffer.cl"); mockProgram = reinterpret_cast(pProgram); pProgram->enableKernelDebug(); } void TearDown() override { ProgramSimpleFixture::TearDown(); } cl_device_id device; std::unique_ptr kbHelper; MockProgram *mockProgram = nullptr; }; TEST_F(ProgramWithKernelDebuggingTest, givenEnabledKernelDebugWhenProgramIsCompiledThenInternalOptionsIncludeDebugFlag) { if (pDevice->getHardwareInfo().platform.eRenderCoreFamily >= IGFX_GEN9_CORE) { std::string receivedInternalOptions; auto debugVars = NEO::getFclDebugVars(); debugVars.receivedInternalOptionsOutput = &receivedInternalOptions; gEnvironment->fclPushDebugVars(debugVars); cl_int retVal = pProgram->compile(1, &device, nullptr, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(CompilerOptions::contains(receivedInternalOptions, CompilerOptions::debugKernelEnable)) << receivedInternalOptions; gEnvironment->fclPopDebugVars(); } } TEST_F(ProgramWithKernelDebuggingTest, givenEnabledKernelDebugWhenProgramIsCompiledThenInternalOptionsIncludeDashGFlag) { if (pDevice->getHardwareInfo().platform.eRenderCoreFamily >= IGFX_GEN9_CORE) { cl_int retVal = pProgram->compile(1, &device, nullptr, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_THAT(pProgram->getOptions(), ::testing::HasSubstr("-g")); } } TEST_F(ProgramWithKernelDebuggingTest, givenEnabledKernelDebugAndOptDisabledWhenProgramIsCompiledThenOptionsIncludeClOptDisableFlag) { if (pDevice->getHardwareInfo().platform.eRenderCoreFamily >= IGFX_GEN9_CORE) { MockActiveSourceLevelDebugger *sourceLevelDebugger = new MockActiveSourceLevelDebugger; sourceLevelDebugger->isOptDisabled = true; pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->debugger.reset(sourceLevelDebugger); cl_int retVal = pProgram->compile(1, &device, nullptr, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_THAT(pProgram->getOptions(), ::testing::HasSubstr(CompilerOptions::optDisable.data())); } } TEST_F(ProgramWithKernelDebuggingTest, givenEnabledKernelDebugWhenProgramIsCompiledThenOptionsStartsWithDashSFilename) { if (pDevice->getHardwareInfo().platform.eRenderCoreFamily >= IGFX_GEN9_CORE) { MockActiveSourceLevelDebugger *sourceLevelDebugger = new MockActiveSourceLevelDebugger; sourceLevelDebugger->sourceCodeFilename = "debugFileName"; pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->debugger.reset(sourceLevelDebugger); cl_int retVal = pProgram->compile(1, &device, nullptr, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_THAT(pProgram->getOptions(), ::testing::StartsWith("-s debugFileName")); } } TEST_F(ProgramWithKernelDebuggingTest, givenEnabledKernelDebugWhenProgramIsBuiltThenInternalOptionsIncludeDebugFlag) { if (pDevice->getHardwareInfo().platform.eRenderCoreFamily >= IGFX_GEN9_CORE) { std::string receivedInternalOptions; auto debugVars = NEO::getFclDebugVars(); debugVars.receivedInternalOptionsOutput = &receivedInternalOptions; gEnvironment->fclPushDebugVars(debugVars); cl_int retVal = pProgram->build(1, &device, nullptr, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(CompilerOptions::contains(receivedInternalOptions, CompilerOptions::debugKernelEnable)) << receivedInternalOptions; gEnvironment->fclPopDebugVars(); } } TEST_F(ProgramWithKernelDebuggingTest, givenEnabledKernelDebugWhenProgramIsBuiltThenOptionsIncludeDashGFlag) { if (pDevice->getHardwareInfo().platform.eRenderCoreFamily >= IGFX_GEN9_CORE) { cl_int retVal = pProgram->build(1, &device, nullptr, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_THAT(pProgram->getOptions(), ::testing::HasSubstr("-g")); } } TEST_F(ProgramWithKernelDebuggingTest, givenEnabledKernelDebugAndOptDisabledWhenProgramIsBuiltThenOptionsIncludeClOptDisableFlag) { if (pDevice->getHardwareInfo().platform.eRenderCoreFamily >= IGFX_GEN9_CORE) { MockActiveSourceLevelDebugger *sourceLevelDebugger = new MockActiveSourceLevelDebugger; sourceLevelDebugger->isOptDisabled = true; pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->debugger.reset(sourceLevelDebugger); cl_int retVal = pProgram->build(1, &device, nullptr, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_THAT(pProgram->getOptions(), ::testing::HasSubstr(CompilerOptions::optDisable.data())); } } TEST_F(ProgramWithKernelDebuggingTest, givenEnabledKernelDebugWhenProgramIsBuiltThenOptionsStartsWithDashSFilename) { if (pDevice->getHardwareInfo().platform.eRenderCoreFamily >= IGFX_GEN9_CORE) { MockActiveSourceLevelDebugger *sourceLevelDebugger = new MockActiveSourceLevelDebugger; sourceLevelDebugger->sourceCodeFilename = "debugFileName"; pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->debugger.reset(sourceLevelDebugger); cl_int retVal = pProgram->build(1, &device, nullptr, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_THAT(pProgram->getOptions(), ::testing::StartsWith("-s debugFileName")); } } TEST_F(ProgramWithKernelDebuggingTest, givenEnabledKernelDebugWhenProgramIsLinkedThenKernelDebugOptionsAreAppended) { if (pDevice->getHardwareInfo().platform.eRenderCoreFamily >= IGFX_GEN9_CORE) { MockActiveSourceLevelDebugger *sourceLevelDebugger = new MockActiveSourceLevelDebugger; pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->debugger.reset(sourceLevelDebugger); cl_int retVal = pProgram->compile(1, &device, nullptr, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto program = std::unique_ptr(new GMockProgram(*pContext->getDevice(0)->getExecutionEnvironment(), pContext, false, &pContext->getDevice(0)->getDevice())); program->enableKernelDebug(); EXPECT_CALL(*program, appendKernelDebugOptions()).Times(1); cl_program clProgramToLink = pProgram; retVal = program->link(1, &device, nullptr, 1, &clProgramToLink, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } } TEST_F(ProgramWithKernelDebuggingTest, givenEnabledKernelDebugWhenProgramIsBuiltThenDebuggerIsNotifiedWithKernelDebugData) { if (pDevice->getHardwareInfo().platform.eRenderCoreFamily >= IGFX_GEN9_CORE) { GMockSourceLevelDebugger *sourceLevelDebugger = new GMockSourceLevelDebugger(nullptr); ON_CALL(*sourceLevelDebugger, notifySourceCode(::testing::_, ::testing::_, ::testing::_)).WillByDefault(::testing::Return(false)); ON_CALL(*sourceLevelDebugger, isOptimizationDisabled()).WillByDefault(::testing::Return(false)); EXPECT_CALL(*sourceLevelDebugger, isOptimizationDisabled()).Times(1); EXPECT_CALL(*sourceLevelDebugger, notifySourceCode(::testing::_, ::testing::_, ::testing::_)).Times(1); EXPECT_CALL(*sourceLevelDebugger, notifyKernelDebugData(::testing::_, ::testing::_, ::testing::_, ::testing::_)).Times(1); sourceLevelDebugger->setActive(true); pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->debugger.reset(sourceLevelDebugger); cl_int retVal = pProgram->build(1, &device, nullptr, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); } } TEST_F(ProgramWithKernelDebuggingTest, givenEnabledKernelDebugWhenProgramIsLinkedThenDebuggerIsNotifiedWithKernelDebugData) { if (pDevice->getHardwareInfo().platform.eRenderCoreFamily >= IGFX_GEN9_CORE) { GMockSourceLevelDebugger *sourceLevelDebugger = new GMockSourceLevelDebugger(nullptr); ON_CALL(*sourceLevelDebugger, notifySourceCode(::testing::_, ::testing::_, ::testing::_)).WillByDefault(::testing::Return(false)); ON_CALL(*sourceLevelDebugger, isOptimizationDisabled()).WillByDefault(::testing::Return(false)); EXPECT_CALL(*sourceLevelDebugger, isOptimizationDisabled()).Times(2); EXPECT_CALL(*sourceLevelDebugger, notifySourceCode(::testing::_, ::testing::_, ::testing::_)).Times(1); EXPECT_CALL(*sourceLevelDebugger, notifyKernelDebugData(::testing::_, ::testing::_, ::testing::_, ::testing::_)).Times(1); sourceLevelDebugger->setActive(true); pDevice->executionEnvironment->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->debugger.reset(sourceLevelDebugger); cl_int retVal = pProgram->compile(1, &device, nullptr, 0, nullptr, nullptr, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); cl_program program = pProgram; retVal = pProgram->link(1, &device, nullptr, 1, &program, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); } } TEST_F(ProgramWithKernelDebuggingTest, givenProgramWithKernelDebugEnabledWhenBuiltThenPatchTokenAllocateSipSurfaceHasSizeGreaterThanZero) { if (pDevice->getHardwareInfo().platform.eRenderCoreFamily >= IGFX_GEN9_CORE) { retVal = pProgram->build(1, &device, CompilerOptions::debugKernelEnable, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); auto kernelInfo = pProgram->getKernelInfo("CopyBuffer"); EXPECT_NE(0u, kernelInfo->patchInfo.pAllocateSystemThreadSurface->PerThreadSystemThreadSurfaceSize); } } TEST_F(ProgramWithKernelDebuggingTest, givenKernelDebugEnabledWhenProgramIsBuiltThenDebugDataIsStored) { if (pDevice->getHardwareInfo().platform.eRenderCoreFamily >= IGFX_GEN9_CORE) { retVal = pProgram->build(1, &device, nullptr, nullptr, nullptr, false); auto debugData = mockProgram->getDebugData(); EXPECT_NE(nullptr, debugData); EXPECT_NE(0u, mockProgram->getDebugDataSize()); } } TEST_F(ProgramWithKernelDebuggingTest, givenProgramWithKernelDebugEnabledWhenProcessDebugDataIsCalledThenKernelInfosAreFilledWithDebugData) { if (pDevice->getHardwareInfo().platform.eRenderCoreFamily >= IGFX_GEN9_CORE) { retVal = pProgram->build(1, &device, nullptr, nullptr, nullptr, false); EXPECT_EQ(CL_SUCCESS, retVal); pProgram->processDebugData(); auto kernelInfo = pProgram->getKernelInfo("CopyBuffer"); EXPECT_NE(0u, kernelInfo->debugData.vIsaSize); EXPECT_NE(nullptr, kernelInfo->debugData.vIsa); } } compute-runtime-20.13.16352/opencl/test/unit_test/program/program_with_source.h000066400000000000000000000036161363734646600275420ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/compiler_interface/compiler_interface.h" #include "opencl/test/unit_test/fixtures/context_fixture.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/fixtures/program_fixture.h" #include "opencl/test/unit_test/helpers/kernel_binary_helper.h" #include "opencl/test/unit_test/mocks/mock_context.h" namespace NEO { // ProgramFromSource Test Fixture // Used to test the Program class //////////////////////////////////////////////////////////////////////////////// class ProgramFromSourceTest : public ContextFixture, public PlatformFixture, public ProgramFixture, public testing::TestWithParam> { using ContextFixture::SetUp; using PlatformFixture::SetUp; protected: void SetUp() override { std::tie(SourceFileName, BinaryFileName, KernelName) = GetParam(); kbHelper = new KernelBinaryHelper(BinaryFileName); PlatformFixture::SetUp(); cl_device_id device = pPlatform->getClDevice(0); ContextFixture::SetUp(1, &device); ProgramFixture::SetUp(); CreateProgramWithSource( pContext, &device, SourceFileName); } void TearDown() override { knownSource.reset(); ProgramFixture::TearDown(); ContextFixture::TearDown(); PlatformFixture::TearDown(); delete kbHelper; } KernelBinaryHelper *kbHelper = nullptr; const char *SourceFileName = nullptr; const char *BinaryFileName = nullptr; const char *KernelName = nullptr; cl_int retVal = CL_SUCCESS; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/sampler/000077500000000000000000000000001363734646600232755ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/sampler/CMakeLists.txt000066400000000000000000000006271363734646600260420ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_sampler ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/get_sampler_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_set_arg_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_sampler}) add_subdirectories() compute-runtime-20.13.16352/opencl/test/unit_test/sampler/get_sampler_info_tests.cpp000066400000000000000000000042241363734646600305420ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sampler/sampler.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; TEST(GetSamplerInfo, InvalidFlags_returnsError) { MockContext context; auto retVal = CL_INVALID_VALUE; auto normalizedCoords = CL_TRUE; auto addressingMode = CL_ADDRESS_MIRRORED_REPEAT; auto filterMode = CL_FILTER_NEAREST; auto sampler = Sampler::create(&context, normalizedCoords, addressingMode, filterMode, retVal); retVal = sampler->getInfo(0, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); delete sampler; } struct GetSamplerInfo : public ::testing::TestWithParam { void SetUp() override { param = GetParam(); } cl_sampler_info param; }; TEST_P(GetSamplerInfo, valid_returnsSuccess) { MockContext context; auto retVal = CL_INVALID_VALUE; auto normalizedCoords = CL_TRUE; auto addressingMode = CL_ADDRESS_MIRRORED_REPEAT; auto filterMode = CL_FILTER_NEAREST; auto sampler = Sampler::create(&context, normalizedCoords, addressingMode, filterMode, CL_FILTER_NEAREST, 2.0f, 3.0f, retVal); size_t sizeReturned = 0; retVal = sampler->getInfo(param, 0, nullptr, &sizeReturned); ASSERT_EQ(CL_SUCCESS, retVal) << " param = " << param; ASSERT_NE(0u, sizeReturned); auto *object = new char[sizeReturned]; retVal = sampler->getInfo(param, sizeReturned, object, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); delete[] object; delete sampler; } // Define new command types to run the parameterized tests cl_sampler_info samplerInfoParams[] = { CL_SAMPLER_REFERENCE_COUNT, CL_SAMPLER_CONTEXT, CL_SAMPLER_NORMALIZED_COORDS, CL_SAMPLER_ADDRESSING_MODE, CL_SAMPLER_FILTER_MODE, CL_SAMPLER_MIP_FILTER_MODE, CL_SAMPLER_LOD_MIN, CL_SAMPLER_LOD_MAX}; INSTANTIATE_TEST_CASE_P( Sampler_, GetSamplerInfo, testing::ValuesIn(samplerInfoParams)); compute-runtime-20.13.16352/opencl/test/unit_test/sampler/sampler_set_arg_tests.cpp000066400000000000000000000516111363734646600303760ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/ptr_math.h" #include "shared/source/utilities/numeric.h" #include "opencl/source/helpers/sampler_helpers.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/sampler/sampler.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" using namespace NEO; namespace NEO { class Surface; }; class SamplerSetArgFixture : public DeviceFixture { public: SamplerSetArgFixture() { memset(&kernelHeader, 0, sizeof(kernelHeader)); } protected: void SetUp() { DeviceFixture::SetUp(); pKernelInfo = std::make_unique(); // define kernel info kernelHeader.DynamicStateHeapSize = sizeof(samplerStateHeap); pKernelInfo->heapInfo.pDsh = samplerStateHeap; pKernelInfo->heapInfo.pKernelHeader = &kernelHeader; // setup kernel arg offsets pKernelInfo->kernelArgInfo.resize(2); pKernelInfo->kernelArgInfo[0].offsetHeap = 0x40; pKernelInfo->kernelArgInfo[0].isSampler = true; pKernelInfo->kernelArgInfo[0].offsetObjectId = 0x0; pKernelInfo->kernelArgInfo[0].offsetSamplerSnapWa = 0x4; pKernelInfo->kernelArgInfo[0].offsetSamplerAddressingMode = 0x8; pKernelInfo->kernelArgInfo[0].offsetSamplerNormalizedCoords = 0x10; pKernelInfo->kernelArgInfo[1].offsetHeap = 0x40; pKernelInfo->kernelArgInfo[1].isSampler = true; program = std::make_unique(*pDevice->getExecutionEnvironment()); pKernel = new MockKernel(program.get(), *pKernelInfo, *pClDevice); ASSERT_NE(nullptr, pKernel); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setKernelArgHandler(0, &Kernel::setArgSampler); pKernel->setKernelArgHandler(1, &Kernel::setArgSampler); uint32_t crossThreadData[crossThreadDataSize] = {}; pKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData)); context = new MockContext(pClDevice); retVal = CL_INVALID_VALUE; } void TearDown() { delete pKernel; delete sampler; delete context; DeviceFixture::TearDown(); } bool crossThreadDataUnchanged() { for (uint32_t i = 0; i < crossThreadDataSize; i++) { if (pKernel->mockCrossThreadData[i] != 0u) { return false; } } return true; } void createSampler() { sampler = Sampler::create( context, CL_TRUE, CL_ADDRESS_MIRRORED_REPEAT, CL_FILTER_NEAREST, retVal); } static const uint32_t crossThreadDataSize = 0x40; cl_int retVal = CL_SUCCESS; std::unique_ptr program; MockKernel *pKernel = nullptr; SKernelBinaryHeaderCommon kernelHeader; std::unique_ptr pKernelInfo; char samplerStateHeap[0x80]; MockContext *context; Sampler *sampler = nullptr; }; typedef Test SamplerSetArgTest; HWTEST_F(SamplerSetArgTest, clSetKernelArgSampler) { typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; createSampler(); cl_sampler samplerObj = sampler; retVal = clSetKernelArg( pKernel, 0, sizeof(samplerObj), &samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); auto samplerState = reinterpret_cast( ptrOffset(pKernel->getDynamicStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_EQ(static_cast(CL_TRUE), static_cast(!samplerState->getNonNormalizedCoordinateEnable())); EXPECT_EQ(SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR, samplerState->getTcxAddressControlMode()); EXPECT_EQ(SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR, samplerState->getTcyAddressControlMode()); EXPECT_EQ(SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR, samplerState->getTczAddressControlMode()); EXPECT_EQ(SAMPLER_STATE::MIN_MODE_FILTER_NEAREST, samplerState->getMinModeFilter()); EXPECT_EQ(SAMPLER_STATE::MAG_MODE_FILTER_NEAREST, samplerState->getMagModeFilter()); EXPECT_EQ(SAMPLER_STATE::MIP_MODE_FILTER_NEAREST, samplerState->getMipModeFilter()); std::vector surfaces; pKernel->getResidency(surfaces); EXPECT_EQ(0u, surfaces.size()); } HWTEST_F(SamplerSetArgTest, getKernelArgShouldReturnSampler) { createSampler(); cl_sampler samplerObj = sampler; retVal = pKernel->setArg( 0, sizeof(samplerObj), &samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(samplerObj, pKernel->getKernelArg(0)); } HWTEST_F(SamplerSetArgTest, GivenSamplerObjectWhenSetKernelArgIsCalledThenIncreaseSamplerRefcount) { cl_sampler samplerObj = Sampler::create( context, CL_TRUE, CL_ADDRESS_MIRRORED_REPEAT, CL_FILTER_NEAREST, retVal); auto pSampler = castToObject(samplerObj); auto refCountBefore = pSampler->getRefInternalCount(); retVal = pKernel->setArg( 0, sizeof(samplerObj), &samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); auto refCountAfter = pSampler->getRefInternalCount(); EXPECT_EQ(refCountBefore + 1, refCountAfter); retVal = clReleaseSampler(samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); } HWTEST_F(SamplerSetArgTest, GivenSamplerObjectWhenSetKernelArgIsCalledThenSamplerObjectSurvivesClReleaseSampler) { cl_sampler samplerObj = Sampler::create( context, CL_TRUE, CL_ADDRESS_MIRRORED_REPEAT, CL_FILTER_NEAREST, retVal); auto pSampler = castToObject(samplerObj); auto refCountBefore = pSampler->getRefInternalCount(); retVal = pKernel->setArg( 0, sizeof(samplerObj), &samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseSampler(samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); auto refCountAfter = pSampler->getRefInternalCount(); EXPECT_EQ(refCountBefore, refCountAfter); } HWTEST_F(SamplerSetArgTest, GivenSamplerObjectWhenSetKernelArgIsCalledAndKernelIsDeletedThenRefCountIsUnchanged) { auto myKernel = std::make_unique(program.get(), *pKernelInfo, *pClDevice); ASSERT_NE(nullptr, myKernel.get()); ASSERT_EQ(CL_SUCCESS, myKernel->initialize()); myKernel->setKernelArgHandler(0, &Kernel::setArgSampler); myKernel->setKernelArgHandler(1, &Kernel::setArgSampler); uint32_t crossThreadData[crossThreadDataSize] = {}; myKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData)); cl_sampler samplerObj = Sampler::create( context, CL_TRUE, CL_ADDRESS_MIRRORED_REPEAT, CL_FILTER_NEAREST, retVal); auto pSampler = castToObject(samplerObj); auto refCountBefore = pSampler->getRefInternalCount(); retVal = myKernel->setArg( 0, sizeof(samplerObj), &samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); myKernel.reset(); auto refCountAfter = pSampler->getRefInternalCount(); EXPECT_EQ(refCountBefore, refCountAfter); retVal = clReleaseSampler(samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); } HWTEST_F(SamplerSetArgTest, GivenNewSamplerObjectWhensSetKernelArgIsCalledThenDecreaseOldSamplerRefcount) { cl_sampler samplerObj = Sampler::create( context, CL_TRUE, CL_ADDRESS_MIRRORED_REPEAT, CL_FILTER_NEAREST, retVal); auto pSampler = castToObject(samplerObj); retVal = pKernel->setArg( 0, sizeof(samplerObj), &samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); auto refCountBefore = pSampler->getRefInternalCount(); cl_sampler samplerObj2 = Sampler::create( context, CL_TRUE, CL_ADDRESS_MIRRORED_REPEAT, CL_FILTER_NEAREST, retVal); retVal = pKernel->setArg( 0, sizeof(samplerObj2), &samplerObj2); ASSERT_EQ(CL_SUCCESS, retVal); auto refCountAfter = pSampler->getRefInternalCount(); EXPECT_EQ(refCountBefore - 1, refCountAfter); retVal = clReleaseSampler(samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); retVal = clReleaseSampler(samplerObj2); ASSERT_EQ(CL_SUCCESS, retVal); } HWTEST_F(SamplerSetArgTest, GivenIncorrentSamplerObjectWhenSetKernelArgSamplerIsCalledThenLeaveRefcountAsIs) { auto notSamplerObj = std::unique_ptr(ImageHelper::create(context)); auto pNotSampler = castToObject(notSamplerObj.get()); auto refCountBefore = pNotSampler->getRefInternalCount(); retVal = pKernel->setArgSampler( 0, sizeof(notSamplerObj.get()), notSamplerObj.get()); auto refCountAfter = pNotSampler->getRefInternalCount(); EXPECT_EQ(refCountBefore, refCountAfter); } HWTEST_F(SamplerSetArgTest, WithFilteringNearestAndAddressingClClampSetAsKernelArgumentSetsConstantBuffer) { sampler = Sampler::create( context, CL_TRUE, CL_ADDRESS_CLAMP, CL_FILTER_NEAREST, retVal); cl_sampler samplerObj = sampler; retVal = pKernel->setArg( 0, sizeof(samplerObj), &samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(samplerObj, pKernel->getKernelArg(0)); auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData()); auto snapWaCrossThreadData = ptrOffset(crossThreadData, 0x4); unsigned int snapWaValue = 0xffffffff; unsigned int objectId = SAMPLER_OBJECT_ID_SHIFT + pKernelInfo->kernelArgInfo[0].offsetHeap; EXPECT_EQ(snapWaValue, *snapWaCrossThreadData); EXPECT_EQ(objectId, *crossThreadData); } HWTEST_F(SamplerSetArgTest, GIVENkernelWithoutObjIdOffsetWHENsetArgTHENdoesntPatchObjId) { sampler = Sampler::create( context, CL_TRUE, CL_ADDRESS_CLAMP, CL_FILTER_NEAREST, retVal); cl_sampler samplerObj = sampler; retVal = pKernel->setArg( 1, sizeof(samplerObj), &samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(samplerObj, pKernel->getKernelArg(1)); EXPECT_TRUE(crossThreadDataUnchanged()); } HWTEST_F(SamplerSetArgTest, setKernelArgWithNullptrSampler) { createSampler(); cl_sampler samplerObj = sampler; retVal = pKernel->setArg( 0, sizeof(samplerObj), nullptr); ASSERT_EQ(CL_INVALID_SAMPLER, retVal); } HWTEST_F(SamplerSetArgTest, setKernelArgWithInvalidSampler) { createSampler(); cl_sampler samplerObj = sampler; const void *notASampler = reinterpret_cast(pKernel); retVal = pKernel->setArg( 0, sizeof(samplerObj), notASampler); ASSERT_EQ(CL_INVALID_SAMPLER, retVal); } TEST_F(SamplerSetArgTest, givenSamplerTypeStrAndIsSamplerTrueWhenInitializeKernelThenKernelArgumentsTypeIsSamplerObj) { pKernelInfo->kernelArgInfo.resize(2); pKernelInfo->kernelArgInfo[0].metadataExtended = std::make_unique(); pKernelInfo->kernelArgInfo[0].metadataExtended->type = "sampler*"; pKernelInfo->kernelArgInfo[0].isSampler = true; pKernelInfo->kernelArgInfo[1].metadataExtended = std::make_unique(); pKernelInfo->kernelArgInfo[1].metadataExtended->type = "sampler"; pKernelInfo->kernelArgInfo[1].isSampler = true; auto pMockKernell = std::make_unique(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pMockKernell->initialize()); EXPECT_EQ(pMockKernell->getKernelArguments()[0].type, MockKernel::SAMPLER_OBJ); EXPECT_EQ(pMockKernell->getKernelArguments()[1].type, MockKernel::SAMPLER_OBJ); } TEST_F(SamplerSetArgTest, givenSamplerTypeStrAndAndIsSamplerFalseWhenInitializeKernelThenKernelArgumentsTypeIsNotSamplerObj) { pKernelInfo->kernelArgInfo.resize(2); pKernelInfo->kernelArgInfo[0].metadataExtended = std::make_unique(); pKernelInfo->kernelArgInfo[0].metadataExtended->type = "sampler*"; pKernelInfo->kernelArgInfo[0].isSampler = false; pKernelInfo->kernelArgInfo[1].metadataExtended = std::make_unique(); pKernelInfo->kernelArgInfo[1].metadataExtended->type = "sampler"; pKernelInfo->kernelArgInfo[1].isSampler = false; auto pMockKernell = std::make_unique(program.get(), *pKernelInfo, *pClDevice); ASSERT_EQ(CL_SUCCESS, pMockKernell->initialize()); EXPECT_NE(pMockKernell->getKernelArguments()[0].type, MockKernel::SAMPLER_OBJ); EXPECT_NE(pMockKernell->getKernelArguments()[1].type, MockKernel::SAMPLER_OBJ); } //////////////////////////////////////////////////////////////////////////////// struct NormalizedTest : public SamplerSetArgFixture, public ::testing::TestWithParam { void SetUp() override { SamplerSetArgFixture::SetUp(); } void TearDown() override { SamplerSetArgFixture::TearDown(); } }; HWTEST_P(NormalizedTest, setKernelArgSampler) { typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; auto normalizedCoordinates = GetParam(); sampler = Sampler::create( context, normalizedCoordinates, CL_ADDRESS_MIRRORED_REPEAT, CL_FILTER_NEAREST, retVal); cl_sampler samplerObj = sampler; retVal = pKernel->setArg( 0, sizeof(samplerObj), &samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); auto samplerState = reinterpret_cast( ptrOffset(pKernel->getDynamicStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_EQ(normalizedCoordinates, static_cast(!samplerState->getNonNormalizedCoordinateEnable())); auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData()); auto normalizedCoordsAddress = ptrOffset(crossThreadData, 0x10); unsigned int normalizedCoordsValue = GetNormCoordsEnum(normalizedCoordinates); EXPECT_EQ(normalizedCoordsValue, *normalizedCoordsAddress); } cl_bool normalizedCoordinatesCases[] = { CL_FALSE, CL_TRUE}; INSTANTIATE_TEST_CASE_P(SamplerSetArg, NormalizedTest, ::testing::ValuesIn(normalizedCoordinatesCases)); //////////////////////////////////////////////////////////////////////////////// struct AddressingModeTest : public SamplerSetArgFixture, public ::testing::TestWithParam { void SetUp() override { SamplerSetArgFixture::SetUp(); } void TearDown() override { SamplerSetArgFixture::TearDown(); } }; HWTEST_P(AddressingModeTest, setKernelArgSampler) { typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; auto addressingMode = GetParam(); sampler = Sampler::create( context, CL_TRUE, addressingMode, CL_FILTER_NEAREST, retVal); cl_sampler samplerObj = sampler; retVal = pKernel->setArg( 0, sizeof(samplerObj), &samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); auto samplerState = reinterpret_cast( ptrOffset(pKernel->getDynamicStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); auto expectedModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR; auto expectedModeY = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR; auto expectedModeZ = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR; // clang-format off switch (addressingMode) { case CL_ADDRESS_NONE: case CL_ADDRESS_CLAMP: expectedModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP_BORDER; expectedModeY = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP_BORDER; expectedModeZ = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP_BORDER; break; case CL_ADDRESS_CLAMP_TO_EDGE: expectedModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP; expectedModeY = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP; expectedModeZ = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_CLAMP; break; case CL_ADDRESS_MIRRORED_REPEAT: expectedModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR; expectedModeY = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR; expectedModeZ = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_MIRROR; break; case CL_ADDRESS_REPEAT: expectedModeX = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_WRAP; expectedModeY = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_WRAP; expectedModeZ = SAMPLER_STATE::TEXTURE_COORDINATE_MODE_WRAP; break; } // clang-format on EXPECT_EQ(expectedModeX, samplerState->getTcxAddressControlMode()); EXPECT_EQ(expectedModeY, samplerState->getTcyAddressControlMode()); EXPECT_EQ(expectedModeZ, samplerState->getTczAddressControlMode()); auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData()); auto addressingModeAddress = ptrOffset(crossThreadData, 0x8); unsigned int addresingValue = GetAddrModeEnum(addressingMode); EXPECT_EQ(addresingValue, *addressingModeAddress); } cl_addressing_mode addressingModeCases[] = { CL_ADDRESS_NONE, CL_ADDRESS_CLAMP_TO_EDGE, CL_ADDRESS_CLAMP, CL_ADDRESS_REPEAT, CL_ADDRESS_MIRRORED_REPEAT}; INSTANTIATE_TEST_CASE_P(SamplerSetArg, AddressingModeTest, ::testing::ValuesIn(addressingModeCases)); HWTEST_F(SamplerSetArgTest, setKernelArgSamplerWithMipMaps) { typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; FixedU4D8 minLod = 2.0f; FixedU4D8 maxLod = 3.0f; sampler = Sampler::create( context, CL_TRUE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, CL_FILTER_LINEAR, minLod.asFloat(), maxLod.asFloat(), retVal); cl_sampler samplerObj = sampler; retVal = pKernel->setArg( 0, sizeof(samplerObj), &samplerObj); ASSERT_EQ(CL_SUCCESS, retVal); auto samplerState = reinterpret_cast( ptrOffset(pKernel->getDynamicStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_EQ(FamilyType::SAMPLER_STATE::MIP_MODE_FILTER_LINEAR, samplerState->getMipModeFilter()); EXPECT_EQ(minLod.getRawAccess(), samplerState->getMinLod()); EXPECT_EQ(maxLod.getRawAccess(), samplerState->getMaxLod()); } //////////////////////////////////////////////////////////////////////////////// struct FilterModeTest : public SamplerSetArgFixture, public ::testing::TestWithParam { void SetUp() override { SamplerSetArgFixture::SetUp(); } void TearDown() override { SamplerSetArgFixture::TearDown(); } }; HWTEST_P(FilterModeTest, setKernelArgSampler) { typedef typename FamilyType::SAMPLER_STATE SAMPLER_STATE; auto filterMode = GetParam(); sampler = Sampler::create( context, CL_TRUE, CL_ADDRESS_MIRRORED_REPEAT, filterMode, retVal); auto samplerState = reinterpret_cast( ptrOffset(pKernel->getDynamicStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); sampler->setArg(const_cast(samplerState)); if (CL_FILTER_NEAREST == filterMode) { EXPECT_EQ(SAMPLER_STATE::MIN_MODE_FILTER_NEAREST, samplerState->getMinModeFilter()); EXPECT_EQ(SAMPLER_STATE::MAG_MODE_FILTER_NEAREST, samplerState->getMagModeFilter()); EXPECT_EQ(SAMPLER_STATE::MIP_MODE_FILTER_NEAREST, samplerState->getMipModeFilter()); EXPECT_FALSE(samplerState->getUAddressMinFilterRoundingEnable()); EXPECT_FALSE(samplerState->getUAddressMagFilterRoundingEnable()); EXPECT_FALSE(samplerState->getVAddressMinFilterRoundingEnable()); EXPECT_FALSE(samplerState->getVAddressMagFilterRoundingEnable()); EXPECT_FALSE(samplerState->getRAddressMagFilterRoundingEnable()); EXPECT_FALSE(samplerState->getRAddressMinFilterRoundingEnable()); } else { EXPECT_EQ(SAMPLER_STATE::MIN_MODE_FILTER_LINEAR, samplerState->getMinModeFilter()); EXPECT_EQ(SAMPLER_STATE::MAG_MODE_FILTER_LINEAR, samplerState->getMagModeFilter()); EXPECT_EQ(SAMPLER_STATE::MIP_MODE_FILTER_NEAREST, samplerState->getMipModeFilter()); EXPECT_TRUE(samplerState->getUAddressMinFilterRoundingEnable()); EXPECT_TRUE(samplerState->getUAddressMagFilterRoundingEnable()); EXPECT_TRUE(samplerState->getVAddressMinFilterRoundingEnable()); EXPECT_TRUE(samplerState->getVAddressMagFilterRoundingEnable()); EXPECT_TRUE(samplerState->getRAddressMagFilterRoundingEnable()); EXPECT_TRUE(samplerState->getRAddressMinFilterRoundingEnable()); } } cl_filter_mode filterModeCase[] = { CL_FILTER_NEAREST, CL_FILTER_LINEAR}; INSTANTIATE_TEST_CASE_P(SamplerSetArg, FilterModeTest, ::testing::ValuesIn(filterModeCase)); compute-runtime-20.13.16352/opencl/test/unit_test/sampler/sampler_tests.cpp000066400000000000000000000112251363734646600266670ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sampler/sampler.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_sampler.h" #include "gtest/gtest.h" #include "patch_list.h" #include using namespace NEO; struct CreateSampler : public ::testing::TestWithParam< std::tuple> { CreateSampler() { } void SetUp() override { std::tie(normalizedCoords, addressingMode, filterMode) = GetParam(); context = new MockContext(); } void TearDown() override { delete context; } MockContext *context; cl_int retVal = CL_INVALID_VALUE; cl_bool normalizedCoords; cl_addressing_mode addressingMode; cl_filter_mode filterMode; }; TEST_P(CreateSampler, shouldReturnSuccess) { auto sampler = Sampler::create( context, normalizedCoords, addressingMode, filterMode, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, sampler); delete sampler; } TEST_P(CreateSampler, shouldPropagateSamplerState) { auto sampler = new MockSampler( context, normalizedCoords, addressingMode, filterMode); ASSERT_NE(nullptr, sampler); EXPECT_EQ(context, sampler->getContext()); EXPECT_EQ(normalizedCoords, sampler->getNormalizedCoordinates()); EXPECT_EQ(addressingMode, sampler->getAddressingMode()); EXPECT_EQ(filterMode, sampler->getFilterMode()); //check for SnapWA bool snapWaNeeded = addressingMode == CL_ADDRESS_CLAMP && filterMode == CL_FILTER_NEAREST; auto snapWaValue = snapWaNeeded ? iOpenCL::CONSTANT_REGISTER_BOOLEAN_TRUE : iOpenCL::CONSTANT_REGISTER_BOOLEAN_FALSE; EXPECT_EQ(snapWaValue, sampler->getSnapWaValue()); delete sampler; } static cl_bool normalizedCoordModes[] = { CL_FALSE, CL_TRUE}; static cl_addressing_mode addressingModes[] = { CL_ADDRESS_MIRRORED_REPEAT, CL_ADDRESS_REPEAT, CL_ADDRESS_CLAMP_TO_EDGE, CL_ADDRESS_CLAMP, CL_ADDRESS_NONE}; static cl_filter_mode filterModes[] = { CL_FILTER_NEAREST, CL_FILTER_LINEAR}; INSTANTIATE_TEST_CASE_P(Sampler, CreateSampler, ::testing::Combine( ::testing::ValuesIn(normalizedCoordModes), ::testing::ValuesIn(addressingModes), ::testing::ValuesIn(filterModes))); typedef ::testing::TestWithParam> TransformableSamplerTest; TEST_P(TransformableSamplerTest, givenSamplerWhenHasProperParametersThenIsTransformable) { bool expectedRetVal; bool retVal; cl_bool normalizedCoords; cl_addressing_mode addressingMode; cl_filter_mode filterMode; std::tie(normalizedCoords, addressingMode, filterMode) = GetParam(); expectedRetVal = addressingMode == CL_ADDRESS_CLAMP_TO_EDGE && filterMode == CL_FILTER_NEAREST && normalizedCoords == CL_FALSE; MockSampler sampler(nullptr, normalizedCoords, addressingMode, filterMode); retVal = sampler.isTransformable(); EXPECT_EQ(expectedRetVal, retVal); } INSTANTIATE_TEST_CASE_P(Sampler, TransformableSamplerTest, ::testing::Combine( ::testing::ValuesIn(normalizedCoordModes), ::testing::ValuesIn(addressingModes), ::testing::ValuesIn(filterModes))); TEST(castToSamplerTest, GivenGenericPointerWhichHoldsSamplerObjectWhenCastToSamplerIsCalledThenCastWithSuccess) { cl_int retVal; auto context = std::make_unique(); cl_sampler clSampler = Sampler::create( context.get(), CL_TRUE, CL_ADDRESS_MIRRORED_REPEAT, CL_FILTER_NEAREST, retVal); ASSERT_EQ(CL_SUCCESS, retVal); auto ptr = reinterpret_cast(clSampler); auto sampler = castToObject(ptr); EXPECT_NE(nullptr, sampler); clReleaseSampler(clSampler); } TEST(castToSamplerTest, GivenGenericPointerWhichDoestNotHoldSamplerObjectWhenCastToSamplerIsCalledThenCastWithAFailure) { auto context = std::make_unique(); auto notSamplerObj = std::unique_ptr(ImageHelper::create(context.get())); void *ptr = notSamplerObj.get(); auto notSampler = castToObject(ptr); EXPECT_EQ(nullptr, notSampler); } compute-runtime-20.13.16352/opencl/test/unit_test/scenarios/000077500000000000000000000000001363734646600236205ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/scenarios/CMakeLists.txt000066400000000000000000000006211363734646600263570ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_scenarios ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/blocked_enqueue_barrier_scenario_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/blocked_enqueue_with_callback_scenario_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_scenarios}) add_subdirectories()blocked_enqueue_barrier_scenario_tests.cpp000066400000000000000000000034471363734646600342220ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/scenarios/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/fixtures/scenario_test_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" #include "gtest/gtest.h" using namespace NEO; typedef ScenarioTest BarrierScenarioTest; HWTEST_F(BarrierScenarioTest, givenBlockedEnqueueBarrierOnOOQWhenUserEventIsUnblockedThenNextEnqueuesAreNotBlocked) { cl_command_queue clCommandQ = nullptr; cl_queue_properties properties[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; auto mockCmdQ = clUniquePtr(new MockCommandQueueHw(context, pPlatform->getClDevice(0), properties)); clCommandQ = mockCmdQ.get(); cl_kernel clKernel = kernel; size_t offset[] = {0, 0, 0}; size_t gws[] = {1, 1, 1}; cl_int retVal = CL_SUCCESS; cl_int success = CL_SUCCESS; UserEvent *userEvent = new UserEvent(context); cl_event eventBlocking = userEvent; retVal = clEnqueueBarrierWithWaitList(clCommandQ, 1, &eventBlocking, nullptr); EXPECT_EQ(success, retVal); EXPECT_EQ(CompletionStamp::levelNotReady, mockCmdQ->taskLevel); EXPECT_NE(nullptr, mockCmdQ->virtualEvent); clSetUserEventStatus(eventBlocking, CL_COMPLETE); userEvent->release(); mockCmdQ->isQueueBlocked(); EXPECT_NE(CompletionStamp::levelNotReady, mockCmdQ->taskLevel); EXPECT_EQ(nullptr, mockCmdQ->virtualEvent); retVal = clEnqueueNDRangeKernel(clCommandQ, clKernel, 1, offset, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(success, retVal); retVal = clFinish(clCommandQ); EXPECT_EQ(success, retVal); } blocked_enqueue_with_callback_scenario_tests.cpp000066400000000000000000000101671363734646600353600ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/scenarios/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/event/async_events_handler.h" #include "opencl/source/event/user_event.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/fixtures/scenario_test_fixture.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "test.h" #include "gtest/gtest.h" using namespace NEO; struct CallbackData { cl_kernel kernel; cl_command_queue queue; bool callbackCalled = false; UserEvent *signalCallbackDoneEvent = nullptr; }; void CL_CALLBACK callback(cl_event event, cl_int status, void *data) { CallbackData *callbackData = (CallbackData *)data; size_t offset[] = {0, 0, 0}; size_t gws[] = {1, 1, 1}; clEnqueueNDRangeKernel(callbackData->queue, callbackData->kernel, 1, offset, gws, nullptr, 0, nullptr, nullptr); clFinish(callbackData->queue); callbackData->callbackCalled = true; if (callbackData->signalCallbackDoneEvent) { cl_event callbackEvent = callbackData->signalCallbackDoneEvent; clSetUserEventStatus(callbackEvent, CL_COMPLETE); // No need to reatin and release this synchronization event //clReleaseEvent(callbackEvent); } } TEST_F(ScenarioTest, givenAsyncHandlerDisabledAndUserEventBlockingEnqueueAndOutputEventWithCallbackWhenUserEventIsSetCompleteThanCallbackIsExecuted) { DebugManager.flags.EnableAsyncEventsHandler.set(false); cl_command_queue clCommandQ = nullptr; cl_queue_properties properties = 0; cl_kernel clKernel = kernel; size_t offset[] = {0, 0, 0}; size_t gws[] = {1, 1, 1}; cl_int retVal = CL_SUCCESS; cl_int success = CL_SUCCESS; UserEvent *userEvent = new UserEvent(context); cl_event eventBlocking = userEvent; cl_event eventOut = nullptr; clCommandQ = clCreateCommandQueue(context, devices[0], properties, &retVal); retVal = clEnqueueNDRangeKernel(clCommandQ, clKernel, 1, offset, gws, nullptr, 1, &eventBlocking, &eventOut); EXPECT_EQ(success, retVal); ASSERT_NE(nullptr, eventOut); CallbackData data; data.kernel = clKernel; data.queue = clCommandQ; data.callbackCalled = false; clSetEventCallback(eventOut, CL_COMPLETE, callback, &data); EXPECT_FALSE(data.callbackCalled); clSetUserEventStatus(eventBlocking, CL_COMPLETE); userEvent->release(); clWaitForEvents(1, &eventOut); EXPECT_TRUE(data.callbackCalled); clReleaseEvent(eventOut); clReleaseCommandQueue(clCommandQ); } TEST_F(ScenarioTest, givenAsyncHandlerEnabledAndUserEventBlockingEnqueueAndOutputEventWithCallbackWhenUserEventIsSetCompleteThanCallbackIsExecuted) { DebugManager.flags.EnableAsyncEventsHandler.set(true); cl_command_queue clCommandQ = nullptr; cl_queue_properties properties = 0; cl_kernel clKernel = kernel; size_t offset[] = {0, 0, 0}; size_t gws[] = {1, 1, 1}; cl_int retVal = CL_SUCCESS; cl_int success = CL_SUCCESS; UserEvent *userEvent = new UserEvent(context); cl_event eventBlocking = userEvent; cl_event eventOut = nullptr; clCommandQ = clCreateCommandQueue(context, devices[0], properties, &retVal); retVal = clEnqueueNDRangeKernel(clCommandQ, clKernel, 1, offset, gws, nullptr, 1, &eventBlocking, &eventOut); EXPECT_EQ(success, retVal); ASSERT_NE(nullptr, eventOut); CallbackData data; data.kernel = clKernel; data.queue = clCommandQ; data.callbackCalled = false; data.signalCallbackDoneEvent = new UserEvent(context); cl_event callbackEvent = data.signalCallbackDoneEvent; clSetEventCallback(eventOut, CL_COMPLETE, callback, &data); EXPECT_FALSE(data.callbackCalled); clSetUserEventStatus(eventBlocking, CL_COMPLETE); userEvent->release(); clWaitForEvents(1, &eventOut); clWaitForEvents(1, &callbackEvent); EXPECT_TRUE(data.callbackCalled); data.signalCallbackDoneEvent->release(); clReleaseEvent(eventOut); clReleaseCommandQueue(clCommandQ); platform()->getAsyncEventsHandler()->closeThread(); } compute-runtime-20.13.16352/opencl/test/unit_test/scenarios/windows/000077500000000000000000000000001363734646600253125ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/scenarios/windows/CMakeLists.txt000066400000000000000000000005341363734646600300540ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_scenarios_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_write_buffer_scenarios_windows_tests.cpp ) if(WIN32) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_scenarios_windows}) endif()enqueue_read_write_buffer_scenarios_windows_tests.cpp000066400000000000000000000133431363734646600402120ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/scenarios/windows/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/os_interface/windows/os_interface.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/os_interface/windows/wddm_device_command_stream.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/helpers/execution_environment_helper.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/os_interface/windows/mock_wddm_memory_manager.h" #include "test.h" using namespace NEO; struct EnqueueBufferWindowsTest : public HardwareParse, public ::testing::Test { EnqueueBufferWindowsTest(void) : buffer(nullptr) { } void SetUp() override { DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(0); executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); } void TearDown() override { buffer.reset(nullptr); } template void initializeFixture() { EnvironmentWithCsrWrapper environment; environment.setCsrType>(); memoryManager = new MockWddmMemoryManager(*executionEnvironment); executionEnvironment->memoryManager.reset(memoryManager); device = std::make_unique(Device::create(executionEnvironment, 0)); context = std::make_unique(device.get()); const size_t bufferMisalignment = 1; const size_t bufferSize = 16; bufferMemory = std::make_unique(alignUp(bufferSize + bufferMisalignment, sizeof(uint32_t))); cl_int retVal = 0; buffer.reset(Buffer::create(context.get(), CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, bufferSize, reinterpret_cast(bufferMemory.get()) + bufferMisalignment, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); } protected: DebugManagerStateRestore restore; HardwareInfo hardwareInfo; HardwareInfo *hwInfo = nullptr; ExecutionEnvironment *executionEnvironment; cl_queue_properties properties = {}; std::unique_ptr bufferMemory; std::unique_ptr device; std::unique_ptr context; std::unique_ptr buffer; MockWddmMemoryManager *memoryManager = nullptr; }; HWTEST_F(EnqueueBufferWindowsTest, givenMisalignedHostPtrWhenEnqueueReadBufferCalledThenStateBaseAddressAddressIsAlignedAndMatchesKernelDispatchInfoParams) { initializeFixture(); if (device->areSharedSystemAllocationsAllowed()) { GTEST_SKIP(); } auto cmdQ = std::make_unique>(context.get(), device.get(), &properties); char *misalignedPtr = reinterpret_cast(device->getMemoryManager()->getAlignedMallocRestrictions()->minAddress + 1); buffer->forceDisallowCPUCopy = true; auto retVal = cmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 4, misalignedPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(0, cmdQ->lastEnqueuedKernels.size()); Kernel *kernel = cmdQ->lastEnqueuedKernels[0]; auto hostPtrAllcoation = cmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()->getTemporaryAllocations().peekHead(); while (hostPtrAllcoation != nullptr) { if (hostPtrAllcoation->getUnderlyingBuffer() == misalignedPtr) { break; } hostPtrAllcoation = hostPtrAllcoation->next; } ASSERT_NE(nullptr, hostPtrAllcoation); uint64_t gpuVa = hostPtrAllcoation->getGpuAddress(); cmdQ->finish(); parseCommands(*cmdQ); if (hwInfo->capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress) { const auto &surfaceStateDst = getSurfaceState(&cmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), 1); if (kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) { auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData() + kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(alignDown(gpuVa, 4), static_cast(*pKernelArg)); EXPECT_EQ(*pKernelArg, surfaceStateDst.getSurfaceBaseAddress()); } else if (kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) { auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData() + kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(alignDown(gpuVa, 4), static_cast(*pKernelArg)); EXPECT_EQ(static_cast(*pKernelArg), surfaceStateDst.getSurfaceBaseAddress()); } } if (kernel->getKernelInfo().kernelArgInfo[3].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) { auto dstOffset = (uint32_t *)(kernel->getCrossThreadData() + kernel->getKernelInfo().kernelArgInfo[3].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(ptrDiff(misalignedPtr, alignDown(misalignedPtr, 4)), *dstOffset); } else { // dstOffset arg should be 4 bytes in size, if that changes, above if path should be modified EXPECT_TRUE(false); } } compute-runtime-20.13.16352/opencl/test/unit_test/scheduler/000077500000000000000000000000001363734646600236105ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/scheduler/CMakeLists.txt000066400000000000000000000007071363734646600263540ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_scheduler ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_kernel_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_source_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_source_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_source_tests.inl ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_scheduler}) compute-runtime-20.13.16352/opencl/test/unit_test/scheduler/scheduler_kernel_tests.cpp000066400000000000000000000324321363734646600310600ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/scheduler/scheduler_kernel.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "opencl/test/unit_test/mocks/mock_ostime.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" #include "gtest/gtest.h" #include #include using namespace NEO; using namespace std; class MockSchedulerKernel : public SchedulerKernel { public: MockSchedulerKernel(Program *program, const KernelInfo &info, const ClDevice &device) : SchedulerKernel(program, info, device) { } static MockSchedulerKernel *create(Program &program, Device &device, KernelInfo *&info) { info = new KernelInfo; SPatchDataParameterStream dataParametrStream; dataParametrStream.DataParameterStreamSize = 8; dataParametrStream.Size = 8; SPatchExecutionEnvironment executionEnvironment = {}; executionEnvironment.CompiledSIMD32 = 1; executionEnvironment.HasDeviceEnqueue = 0; info->patchInfo.dataParameterStream = &dataParametrStream; info->patchInfo.executionEnvironment = &executionEnvironment; KernelArgInfo bufferArg; bufferArg.isBuffer = true; for (uint32_t i = 0; i < 9; i++) { bufferArg.kernelArgPatchInfoVector.resize(1); bufferArg.kernelArgPatchInfoVector[0].crossthreadOffset = 0; bufferArg.kernelArgPatchInfoVector[0].size = 0; bufferArg.kernelArgPatchInfoVector[0].sourceOffset = 0; info->kernelArgInfo.push_back(std::move(bufferArg)); } MockSchedulerKernel *mock = Kernel::create(&program, *info, nullptr); return mock; } }; TEST(SchedulerKernelTest, getLws) { auto device = make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(*device->getExecutionEnvironment()); KernelInfo info; MockSchedulerKernel kernel(&program, info, *device); size_t lws = kernel.getLws(); EXPECT_EQ((size_t)24u, lws); } TEST(SchedulerKernelTest, getGws) { auto device = make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(*device->getExecutionEnvironment()); KernelInfo info; MockSchedulerKernel kernel(&program, info, *device); const size_t hwThreads = 3; const size_t simdSize = 8; size_t maxGws = defaultHwInfo->gtSystemInfo.EUCount * hwThreads * simdSize; size_t gws = kernel.getGws(); EXPECT_GE(maxGws, gws); EXPECT_LT(hwThreads * simdSize, gws); } TEST(SchedulerKernelTest, setGws) { auto device = make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(*device->getExecutionEnvironment()); KernelInfo info; MockSchedulerKernel kernel(&program, info, *device); kernel.setGws(24); size_t gws = kernel.getGws(); EXPECT_EQ(24u, gws); } TEST(SchedulerKernelTest, getCurbeSize) { auto device = make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(*device->getExecutionEnvironment()); KernelInfo info; uint32_t crossTrheadDataSize = 32; uint32_t dshSize = 48; SPatchDataParameterStream dataParameterStream; dataParameterStream.DataParameterStreamSize = crossTrheadDataSize; SKernelBinaryHeaderCommon kernelHeader; kernelHeader.DynamicStateHeapSize = dshSize; info.patchInfo.dataParameterStream = &dataParameterStream; info.heapInfo.pKernelHeader = &kernelHeader; MockSchedulerKernel kernel(&program, info, *device); uint32_t expectedCurbeSize = alignUp(crossTrheadDataSize, 64) + alignUp(dshSize, 64) + alignUp(SCHEDULER_DYNAMIC_PAYLOAD_SIZE, 64); EXPECT_GE((size_t)expectedCurbeSize, kernel.getCurbeSize()); } TEST(SchedulerKernelTest, setArgsForSchedulerKernel) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(*device->getExecutionEnvironment(), context.get(), false, &device->getDevice())); program->setDevice(&device->getDevice()); unique_ptr info(nullptr); KernelInfo *infoPtr = nullptr; unique_ptr scheduler = unique_ptr(MockSchedulerKernel::create(*program, device->getDevice(), infoPtr)); info.reset(infoPtr); unique_ptr allocs[9]; for (uint32_t i = 0; i < 9; i++) { allocs[i] = unique_ptr(new MockGraphicsAllocation((void *)0x1234, 10)); } scheduler->setArgs(allocs[0].get(), allocs[1].get(), allocs[2].get(), allocs[3].get(), allocs[4].get(), allocs[5].get(), allocs[6].get(), allocs[7].get(), allocs[8].get()); for (uint32_t i = 0; i < 9; i++) { EXPECT_EQ(allocs[i].get(), scheduler->getKernelArg(i)); } } TEST(SchedulerKernelTest, setArgsForSchedulerKernelWithNullDebugQueue) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(*device->getExecutionEnvironment(), context.get(), false, &device->getDevice())); program->setDevice(&device->getDevice()); unique_ptr info(nullptr); KernelInfo *infoPtr = nullptr; unique_ptr scheduler = unique_ptr(MockSchedulerKernel::create(*program, device->getDevice(), infoPtr)); info.reset(infoPtr); unique_ptr allocs[9]; for (uint32_t i = 0; i < 9; i++) { allocs[i] = unique_ptr(new MockGraphicsAllocation((void *)0x1234, 10)); } scheduler->setArgs(allocs[0].get(), allocs[1].get(), allocs[2].get(), allocs[3].get(), allocs[4].get(), allocs[5].get(), allocs[6].get(), allocs[7].get()); for (uint32_t i = 0; i < 8; i++) { EXPECT_EQ(allocs[i].get(), scheduler->getKernelArg(i)); } EXPECT_EQ(nullptr, scheduler->getKernelArg(8)); } TEST(SchedulerKernelTest, givenGraphicsAllocationWithDifferentCpuAndGpuAddressesWhenCallSetArgsThenGpuAddressesAreTaken) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(*device->getExecutionEnvironment(), context.get(), false, &device->getDevice())); program->setDevice(&device->getDevice()); unique_ptr info(nullptr); KernelInfo *infoPtr = nullptr; auto scheduler = clUniquePtr(MockSchedulerKernel::create(*program, device->getDevice(), infoPtr)); info.reset(infoPtr); unique_ptr allocs[9]; for (uint32_t i = 0; i < 9; i++) { allocs[i] = std::make_unique(reinterpret_cast(0x1234), 0x4321, 10); } scheduler->setArgs(allocs[0].get(), allocs[1].get(), allocs[2].get(), allocs[3].get(), allocs[4].get(), allocs[5].get(), allocs[6].get(), allocs[7].get(), allocs[8].get()); for (uint32_t i = 0; i < 9; i++) { auto argAddr = reinterpret_cast(scheduler->getKernelArgInfo(i).value); EXPECT_EQ(allocs[i]->getGpuAddress(), argAddr); } } TEST(SchedulerKernelTest, createKernelReflectionForForcedSchedulerDispatch) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceDispatchScheduler.set(true); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(*device->getExecutionEnvironment(), context.get(), false, &device->getDevice())); program->setDevice(&device->getDevice()); unique_ptr info(nullptr); KernelInfo *infoPtr = nullptr; unique_ptr scheduler = unique_ptr(MockSchedulerKernel::create(*program, device->getDevice(), infoPtr)); info.reset(infoPtr); scheduler->createReflectionSurface(); EXPECT_NE(nullptr, scheduler->getKernelReflectionSurface()); } TEST(SchedulerKernelTest, createKernelReflectionSecondTimeForForcedSchedulerDispatchReturnsExistingAllocation) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceDispatchScheduler.set(true); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(*device->getExecutionEnvironment(), context.get(), false, &device->getDevice())); program->setDevice(&device->getDevice()); unique_ptr info(nullptr); KernelInfo *infoPtr = nullptr; unique_ptr scheduler = unique_ptr(MockSchedulerKernel::create(*program, device->getDevice(), infoPtr)); info.reset(infoPtr); scheduler->createReflectionSurface(); auto *allocation = scheduler->getKernelReflectionSurface(); scheduler->createReflectionSurface(); auto *allocation2 = scheduler->getKernelReflectionSurface(); EXPECT_EQ(allocation, allocation2); } TEST(SchedulerKernelTest, createKernelReflectionForSchedulerDoesNothing) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.ForceDispatchScheduler.set(false); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(*device->getExecutionEnvironment(), context.get(), false, &device->getDevice())); program->setDevice(&device->getDevice()); unique_ptr info(nullptr); KernelInfo *infoPtr = nullptr; unique_ptr scheduler = unique_ptr(MockSchedulerKernel::create(*program, device->getDevice(), infoPtr)); info.reset(infoPtr); scheduler->createReflectionSurface(); EXPECT_EQ(nullptr, scheduler->getKernelReflectionSurface()); } TEST(SchedulerKernelTest, getCurbeSizeWithNullKernelInfo) { auto device = make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(*device->getExecutionEnvironment()); KernelInfo info; info.patchInfo.dataParameterStream = nullptr; info.heapInfo.pKernelHeader = nullptr; MockSchedulerKernel kernel(&program, info, *device); uint32_t expectedCurbeSize = alignUp(SCHEDULER_DYNAMIC_PAYLOAD_SIZE, 64); EXPECT_GE((size_t)expectedCurbeSize, kernel.getCurbeSize()); } TEST(SchedulerKernelTest, givenForcedSchedulerGwsByDebugVariableWhenSchedulerKernelIsCreatedThenGwsIsSetToForcedValue) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.SchedulerGWS.set(48); auto device = make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(*device->getExecutionEnvironment()); KernelInfo info; MockSchedulerKernel kernel(&program, info, *device); size_t gws = kernel.getGws(); EXPECT_EQ(static_cast(48u), gws); } TEST(SchedulerKernelTest, givenSimulationModeWhenSchedulerKernelIsCreatedThenGwsIsSetToOneWorkgroup) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.ftrSimulationMode = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); MockProgram program(*device->getExecutionEnvironment()); KernelInfo info; MockSchedulerKernel kernel(&program, info, *device); size_t gws = kernel.getGws(); EXPECT_EQ(static_cast(24u), gws); } TEST(SchedulerKernelTest, givenForcedSchedulerGwsByDebugVariableAndSimulationModeWhenSchedulerKernelIsCreatedThenGwsIsSetToForcedValue) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.SchedulerGWS.set(48); HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.ftrSimulationMode = true; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); MockProgram program(*device->getExecutionEnvironment()); KernelInfo info; MockSchedulerKernel kernel(&program, info, *device); size_t gws = kernel.getGws(); EXPECT_EQ(static_cast(48u), gws); } compute-runtime-20.13.16352/opencl/test/unit_test/scheduler/scheduler_source_tests.cpp000066400000000000000000000245671363734646600311120ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/scheduler/scheduler_source_tests.h" #include "shared/source/helpers/hw_cmds.h" #include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" #include "opencl/test/unit_test/fixtures/execution_model_fixture.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/mocks/mock_device_queue.h" #include "test.h" #include "gtest/gtest.h" // Keep this include after execution_model_fixture.h otherwise there is high chance of conflict with macros #include "opencl/source/builtin_kernels_simulation/opencl_c.h" #include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h" using namespace NEO; using namespace BuiltinKernelsSimulation; HWCMDTEST_F(IGFX_GEN8_CORE, SchedulerSourceTest, PatchGpgpuWalker) { using MEDIA_STATE_FLUSH = typename FamilyType::MEDIA_STATE_FLUSH; using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using MI_ARB_CHECK = typename FamilyType::MI_ARB_CHECK; using MI_ATOMIC = typename FamilyType::MI_ATOMIC; using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; size_t msfOffset = 0; size_t miArbCheckOffset = 0; size_t miAtomicOffset = 0; size_t mediaIDLoadOffset = 0; size_t miLoadRegOffset = 0; size_t pipeControlOffset = 0; size_t gpgpuOffset = 0; size_t msfOffset2 = 0; size_t miArbCheckOffset2 = 0; size_t msfOffsetAfter = 0; size_t miArbCheckOffsetAfter = 0; size_t miAtomicOffsetAfter = 0; size_t mediaIDLoadOffsetAfter = 0; size_t miLoadRegOffsetAfter = 0; size_t pipeControlOffsetAfter = 0; size_t gpgpuOffsetAfter = 0; size_t msfOffsetAfter2 = 0; size_t miArbCheckOffsetAfter2 = 0; auto pDevQueueHw = new MockDeviceQueueHw(&context, pDevice, DeviceHostQueue::deviceQueueProperties::minimumProperties[0]); // Prepopulate SLB with commands pDevQueueHw->buildSlbDummyCommands(); LinearStream *slb = pDevQueueHw->getSlbCS(); HardwareParse hwParser; hwParser.parseCommands(*slb, 0); // Parse commands and save offsets of first enqueue space auto itorMediaStateFlush = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); auto *msf = (MEDIA_STATE_FLUSH *)*itorMediaStateFlush; EXPECT_EQ((void *)slb->getCpuBase(), (void *)msf); auto itorArbCheck = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); auto *arbCheck = itorArbCheck != hwParser.cmdList.end() ? (MI_ARB_CHECK *)*itorArbCheck : nullptr; auto itorMiAtomic = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); auto *miAtomic = itorMiAtomic != hwParser.cmdList.end() ? (MI_ATOMIC *)*itorMiAtomic : nullptr; auto itorIDLoad = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); auto *idLoad = itorIDLoad != hwParser.cmdList.end() ? (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorIDLoad : nullptr; auto itorMiLoadReg = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); auto *miLoadReg = itorMiLoadReg != hwParser.cmdList.end() ? (MI_LOAD_REGISTER_IMM *)*itorMiLoadReg : nullptr; auto itorPipeControl = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); auto *pipeControl = itorPipeControl != hwParser.cmdList.end() ? (PIPE_CONTROL *)*itorPipeControl : nullptr; auto itorWalker = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); auto *walker = itorWalker != hwParser.cmdList.end() ? (GPGPU_WALKER *)*itorWalker : nullptr; auto itorMediaStateFlush2 = find(itorWalker, hwParser.cmdList.end()); auto *msf2 = itorMediaStateFlush2 != hwParser.cmdList.end() ? (MEDIA_STATE_FLUSH *)*itorMediaStateFlush2 : nullptr; auto itorArbCheck2 = find(itorWalker, hwParser.cmdList.end()); auto *arbCheck2 = itorArbCheck2 != hwParser.cmdList.end() ? (MI_ARB_CHECK *)*itorArbCheck2 : nullptr; if (msf) msfOffset = ptrDiff(msf, slb->getCpuBase()); if (arbCheck) miArbCheckOffset = ptrDiff(arbCheck, slb->getCpuBase()); if (miAtomic) miAtomicOffset = ptrDiff(miAtomic, slb->getCpuBase()); if (idLoad) mediaIDLoadOffset = ptrDiff(idLoad, slb->getCpuBase()); if (miLoadReg) miLoadRegOffset = ptrDiff(miLoadReg, slb->getCpuBase()); if (pipeControl) pipeControlOffset = ptrDiff(pipeControl, slb->getCpuBase()); if (walker) gpgpuOffset = ptrDiff(walker, slb->getCpuBase()); if (msf2) msfOffset2 = ptrDiff(msf2, slb->getCpuBase()); if (arbCheck2) miArbCheckOffset2 = ptrDiff(arbCheck2, slb->getCpuBase()); uint32_t *slbBuffer = (uint32_t *)slb->getCpuBase(); uint32_t secondLevelBatchOffset = 0; uint32_t InterfaceDescriptorOffset = 3; uint32_t SIMDSize = 16; uint32_t TotalLocalWorkSize = 24; uint3 DimSize = {6, 4, 1}; uint3 StartPoint = {4, 4, 0}; uint32_t NumberOfHWThreadsPerWG = 3; uint32_t IndirectPayloadSize = 10; uint32_t IOHoffset = 256; SchedulerSimulation::patchGpGpuWalker(secondLevelBatchOffset, slbBuffer, InterfaceDescriptorOffset, SIMDSize, TotalLocalWorkSize, DimSize, StartPoint, NumberOfHWThreadsPerWG, IndirectPayloadSize, IOHoffset); size_t commandsSize = pDevQueueHw->getMinimumSlbSize() + pDevQueueHw->getWaCommandsSize(); // Parse again LinearStream slbTested(slbBuffer, commandsSize); hwParser.cmdList.clear(); slbTested.getSpace(commandsSize); hwParser.parseCommands(slbTested, 0); itorMediaStateFlush = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); msf = (MEDIA_STATE_FLUSH *)*itorMediaStateFlush; EXPECT_EQ((void *)slb->getCpuBase(), (void *)msf); itorArbCheck = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); arbCheck = itorArbCheck != hwParser.cmdList.end() ? (MI_ARB_CHECK *)*itorArbCheck : nullptr; itorMiAtomic = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); miAtomic = itorMiAtomic != hwParser.cmdList.end() ? (MI_ATOMIC *)*itorMiAtomic : nullptr; itorIDLoad = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); idLoad = itorIDLoad != hwParser.cmdList.end() ? (MEDIA_INTERFACE_DESCRIPTOR_LOAD *)*itorIDLoad : nullptr; itorMiLoadReg = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); miLoadReg = itorMiLoadReg != hwParser.cmdList.end() ? (MI_LOAD_REGISTER_IMM *)*itorMiLoadReg : nullptr; itorPipeControl = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); pipeControl = itorPipeControl != hwParser.cmdList.end() ? (PIPE_CONTROL *)*itorPipeControl : nullptr; itorWalker = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); walker = itorWalker != hwParser.cmdList.end() ? (GPGPU_WALKER *)*itorWalker : nullptr; itorMediaStateFlush2 = find(itorWalker, hwParser.cmdList.end()); msf2 = itorMediaStateFlush2 != hwParser.cmdList.end() ? (MEDIA_STATE_FLUSH *)*itorMediaStateFlush2 : nullptr; itorArbCheck2 = find(itorWalker, hwParser.cmdList.end()); arbCheck2 = itorArbCheck2 != hwParser.cmdList.end() ? (MI_ARB_CHECK *)*itorArbCheck2 : nullptr; if (msf) msfOffsetAfter = ptrDiff(msf, slbTested.getCpuBase()); if (arbCheck) miArbCheckOffsetAfter = ptrDiff(arbCheck, slbTested.getCpuBase()); if (miAtomic) miAtomicOffsetAfter = ptrDiff(miAtomic, slbTested.getCpuBase()); if (idLoad) mediaIDLoadOffsetAfter = ptrDiff(idLoad, slbTested.getCpuBase()); if (miLoadReg) miLoadRegOffsetAfter = ptrDiff(miLoadReg, slbTested.getCpuBase()); if (pipeControl) pipeControlOffsetAfter = ptrDiff(pipeControl, slbTested.getCpuBase()); if (walker) gpgpuOffsetAfter = ptrDiff(walker, slbTested.getCpuBase()); if (msf2) msfOffsetAfter2 = ptrDiff(msf2, slbTested.getCpuBase()); if (arbCheck2) miArbCheckOffsetAfter2 = ptrDiff(arbCheck2, slbTested.getCpuBase()); EXPECT_EQ(msfOffset, msfOffsetAfter); EXPECT_EQ(miArbCheckOffset, miArbCheckOffsetAfter); EXPECT_EQ(miAtomicOffset, miAtomicOffsetAfter); EXPECT_EQ(mediaIDLoadOffset, mediaIDLoadOffsetAfter); EXPECT_EQ(miLoadRegOffset, miLoadRegOffsetAfter); EXPECT_EQ(pipeControlOffset, pipeControlOffsetAfter); EXPECT_EQ(gpgpuOffset, gpgpuOffsetAfter); EXPECT_EQ(msfOffset2, msfOffsetAfter2); EXPECT_EQ(miArbCheckOffset2, miArbCheckOffsetAfter2); if (walker) { EXPECT_EQ(InterfaceDescriptorOffset, walker->getInterfaceDescriptorOffset()); EXPECT_EQ(NumberOfHWThreadsPerWG, walker->getThreadWidthCounterMaximum()); EXPECT_EQ(16u, SIMDSize); typename GPGPU_WALKER::SIMD_SIZE simd = GPGPU_WALKER::SIMD_SIZE::SIMD_SIZE_SIMD16; EXPECT_EQ(simd, walker->getSimdSize()); EXPECT_EQ(StartPoint.x, walker->getThreadGroupIdStartingX()); EXPECT_EQ(StartPoint.y, walker->getThreadGroupIdStartingY()); //EXPECT_EQ(StartPoint.z, walker->GetThreadGroupIdStartingZ()); EXPECT_EQ(DimSize.x, walker->getThreadGroupIdXDimension()); EXPECT_EQ(DimSize.y, walker->getThreadGroupIdYDimension()); //EXPECT_EQ(DimSize.z, walker->getThreadGroupIdZDimension()); uint32_t mask = static_cast(maxNBitValue(TotalLocalWorkSize % SIMDSize)); if (mask == 0) mask = ~0; uint32_t yMask = 0xffffffff; EXPECT_EQ(mask, walker->getRightExecutionMask()); EXPECT_EQ(yMask, walker->getBottomExecutionMask()); EXPECT_EQ(IndirectPayloadSize, walker->getIndirectDataLength()); EXPECT_EQ(IOHoffset, walker->getIndirectDataStartAddress()); } else { EXPECT_TRUE(false) << "GPGPU_WALKER commandnot found, patchGpGpuWalker could have corrupted prepopulated commands\n"; } delete pDevQueueHw; } compute-runtime-20.13.16352/opencl/test/unit_test/scheduler/scheduler_source_tests.h000066400000000000000000000017441363734646600305470ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" #include "gtest/gtest.h" namespace NEO { class SchedulerSourceTest : public testing::Test { public: void SetUp() override { pDevice = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}; } void TearDown() override { delete pDevice; } MockClDevice *pDevice; MockContext context; template void givenDeviceQueueThenNumberOfEnqueuesEqualsNumberOfEnqueuesInSchedulerKernelCodeTest(); template void givenDeviceQueueWhenCommandsSizeIsCalculatedThenItEqualsSpaceForEachEnqueueInSchedulerKernelCodeTest(); template void givenDeviceQueueWhenSlbDummyCommandsAreBuildThenSizeUsedIsCorrectTest(); }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/scheduler/scheduler_source_tests.inl000066400000000000000000000033441363734646600311000ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/device_queue/device_queue.h" #include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" #include "opencl/test/unit_test/mocks/mock_device_queue.h" #include template void SchedulerSourceTest::givenDeviceQueueWhenCommandsSizeIsCalculatedThenItEqualsSpaceForEachEnqueueInSchedulerKernelCodeTest() { auto devQueueHw = std::unique_ptr>(new MockDeviceQueueHw(&context, pDevice, DeviceHostQueue::deviceQueueProperties::minimumProperties[0])); auto singleEnqueueSpace = devQueueHw->getMinimumSlbSize() + devQueueHw->getWaCommandsSize(); EXPECT_EQ(singleEnqueueSpace, SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE); } template void SchedulerSourceTest::givenDeviceQueueWhenSlbDummyCommandsAreBuildThenSizeUsedIsCorrectTest() { auto devQueueHw = std::unique_ptr>(new MockDeviceQueueHw(&context, pDevice, DeviceHostQueue::deviceQueueProperties::minimumProperties[0])); devQueueHw->buildSlbDummyCommands(); auto slbCS = devQueueHw->getSlbCS(); auto usedSpace = slbCS->getUsed(); auto spaceRequiredForEnqueuesAndBBStart = SECOND_LEVEL_BUFFER_SPACE_FOR_EACH_ENQUEUE * SECOND_LEVEL_BUFFER_NUMBER_OF_ENQUEUES + sizeof(typename GfxFamily::MI_BATCH_BUFFER_START); EXPECT_EQ(usedSpace, spaceRequiredForEnqueuesAndBBStart); } template void SchedulerSourceTest::givenDeviceQueueThenNumberOfEnqueuesEqualsNumberOfEnqueuesInSchedulerKernelCodeTest() { EXPECT_EQ(DeviceQueue::numberOfDeviceEnqueues, static_cast(SECOND_LEVEL_BUFFER_NUMBER_OF_ENQUEUES)); } compute-runtime-20.13.16352/opencl/test/unit_test/sharings/000077500000000000000000000000001363734646600234505ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/sharings/CMakeLists.txt000066400000000000000000000005401363734646600262070ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_sharings ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/sharing_factory_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sharing_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_sharings}) add_subdirectories() compute-runtime-20.13.16352/opencl/test/unit_test/sharings/d3d/000077500000000000000000000000001363734646600241225ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/sharings/d3d/CMakeLists.txt000066400000000000000000000005001363734646600266550ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(IGDRCL_SRCS_tests_sharings_d3d ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/context_d3d_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_sharings_d3d}) endif(WIN32) compute-runtime-20.13.16352/opencl/test/unit_test/sharings/d3d/context_d3d_tests.cpp000066400000000000000000000205521363734646600302720ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/os_interface/windows/d3d_sharing_functions.h" #include "opencl/source/sharings/d3d/cl_d3d_api.h" #include "opencl/test/unit_test/api/cl_api_tests.h" #include "opencl/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; TEST(D3DContextTest, sharingAreNotPresentByDefault) { MockContext context; EXPECT_EQ(nullptr, context.getSharing>()); EXPECT_EQ(nullptr, context.getSharing>()); EXPECT_EQ(nullptr, context.getSharing>()); } TEST(D3DContextTest, giveDispatchtableContainsValidEntries) { sharingFactory.fillGlobalDispatchTable(); MockContext context; EXPECT_EQ(&clGetDeviceIDsFromDX9INTEL, context.dispatch.crtDispatch->clGetDeviceIDsFromDX9INTEL); EXPECT_EQ(&clCreateFromDX9MediaSurfaceINTEL, context.dispatch.crtDispatch->clCreateFromDX9MediaSurfaceINTEL); EXPECT_EQ(&clEnqueueAcquireDX9ObjectsINTEL, context.dispatch.crtDispatch->clEnqueueAcquireDX9ObjectsINTEL); EXPECT_EQ(&clEnqueueReleaseDX9ObjectsINTEL, context.dispatch.crtDispatch->clEnqueueReleaseDX9ObjectsINTEL); } struct clIntelSharingFormatQueryDX9 : public api_tests { std::vector supportedNonPlanarFormats; std::vector supportedPlanarFormats; std::vector supportedPlane1Formats; std::vector supportedPlane2Formats; std::vector retrievedFormats; cl_uint numImageFormats; void SetUp() override { api_tests::SetUp(); supportedNonPlanarFormats = {D3DFMT_R32F, D3DFMT_R16F, D3DFMT_L16, D3DFMT_A8, D3DFMT_L8, D3DFMT_G32R32F, D3DFMT_G16R16F, D3DFMT_G16R16, D3DFMT_A8L8, D3DFMT_A32B32G32R32F, D3DFMT_A16B16G16R16F, D3DFMT_A16B16G16R16, D3DFMT_A8B8G8R8, D3DFMT_X8B8G8R8, D3DFMT_A8R8G8B8, D3DFMT_X8R8G8B8}; supportedPlanarFormats = {D3DFMT_YUY2, D3DFMT_UYVY, static_cast(MAKEFOURCC('N', 'V', '1', '2')), static_cast(MAKEFOURCC('Y', 'V', '1', '2')), static_cast(MAKEFOURCC('Y', 'V', 'Y', 'U')), static_cast(MAKEFOURCC('V', 'Y', 'U', 'Y'))}; supportedPlane1Formats = {static_cast(MAKEFOURCC('N', 'V', '1', '2')), static_cast(MAKEFOURCC('Y', 'V', '1', '2'))}; supportedPlane2Formats = {static_cast(MAKEFOURCC('Y', 'V', '1', '2'))}; retrievedFormats.assign(supportedNonPlanarFormats.size() + supportedPlanarFormats.size(), D3DFMT_UNKNOWN); } void TearDown() override { api_tests::TearDown(); } }; namespace ULT { TEST_F(clIntelSharingFormatQueryDX9, givenInvalidContextWhenMediaSurfaceFormatsRequestedThenInvalidContextError) { retVal = clGetSupportedDX9MediaSurfaceFormatsINTEL(nullptr, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(clIntelSharingFormatQueryDX9, givenInvalidFlagsWhenMediaSurfaceFormatsRequestedThenInvalidValueError) { retVal = clGetSupportedDX9MediaSurfaceFormatsINTEL( pContext, 0, CL_MEM_OBJECT_IMAGE2D, 0, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clIntelSharingFormatQueryDX9, givenInvalidImageTypeWhenMediaSurfaceFormatsRequestedThenInvalidValueError) { retVal = clGetSupportedDX9MediaSurfaceFormatsINTEL(pContext, CL_MEM_READ_WRITE, 0, 0, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); EXPECT_EQ(CL_INVALID_VALUE, retVal); } TEST_F(clIntelSharingFormatQueryDX9, givenValidParametersWhenRequestedMediaSurfaceFormatsBelowMaximumThenExceedingFormatAreaRemainsUntouched) { for (cl_uint i = 0; i <= static_cast(retrievedFormats.size()); ++i) { retVal = clGetSupportedDX9MediaSurfaceFormatsINTEL(pContext, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, i, &retrievedFormats[0], &numImageFormats); EXPECT_EQ(retVal, CL_SUCCESS); for (cl_uint j = i; j < retrievedFormats.size(); ++j) { EXPECT_EQ(retrievedFormats[j], D3DFMT_UNKNOWN); } } } TEST_F(clIntelSharingFormatQueryDX9, givenValidParametersWhenRequestingMediaSurfaceFormatsForPlane0ThenAllKnownFormatsAreIncludedInTheResult) { retVal = clGetSupportedDX9MediaSurfaceFormatsINTEL(pContext, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(supportedNonPlanarFormats.size() + supportedPlanarFormats.size(), numImageFormats); for (auto format : supportedNonPlanarFormats) { auto found = std::find(retrievedFormats.begin(), retrievedFormats.end(), format); EXPECT_NE(found, retrievedFormats.end()); } for (auto format : supportedPlanarFormats) { auto found = std::find(retrievedFormats.begin(), retrievedFormats.end(), format); EXPECT_NE(found, retrievedFormats.end()); } } TEST_F(clIntelSharingFormatQueryDX9, givenValidParametersWhenRequestingMediaSurfaceFormatsForPlane1ThenOnlyPlanarFormatsAreIncludedInTheResult) { retVal = clGetSupportedDX9MediaSurfaceFormatsINTEL(pContext, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 1, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); ASSERT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(supportedPlane1Formats.size(), numImageFormats); for (auto format : supportedPlane1Formats) { auto found = std::find(retrievedFormats.begin(), retrievedFormats.end(), format); EXPECT_NE(found, retrievedFormats.end()); } for (auto format : supportedNonPlanarFormats) { auto found = std::find(retrievedFormats.begin(), retrievedFormats.end(), format); EXPECT_EQ(found, retrievedFormats.end()); } } TEST_F(clIntelSharingFormatQueryDX9, givenValidParametersWhenRequestingMediaSurfaceFormatsForPlane2ThenOnlyYV12FormatIsIncludedInTheResult) { retVal = clGetSupportedDX9MediaSurfaceFormatsINTEL(pContext, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 2, static_cast(retrievedFormats.size()), &retrievedFormats[0], &numImageFormats); ASSERT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(supportedPlane2Formats.size(), numImageFormats); for (auto format : supportedPlane2Formats) { auto found = std::find(retrievedFormats.begin(), retrievedFormats.end(), format); EXPECT_NE(found, retrievedFormats.end()); } for (auto format : supportedNonPlanarFormats) { auto found = std::find(retrievedFormats.begin(), retrievedFormats.end(), format); EXPECT_EQ(found, retrievedFormats.end()); } } TEST_F(clIntelSharingFormatQueryDX9, givenValidParametersWhenRequestingMediaSurfaceFormatsForPlaneGraterThan2ThenZeroNumFormatsIsReturned) { retVal = clGetSupportedDX9MediaSurfaceFormatsINTEL(pContext, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 3, 0, nullptr, &numImageFormats); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(0u, numImageFormats); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/sharings/gl/000077500000000000000000000000001363734646600240525ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/sharings/gl/CMakeLists.txt000066400000000000000000000003761363734646600266200ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_sharings_gl ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_sharings_gl}) add_subdirectories() compute-runtime-20.13.16352/opencl/test/unit_test/sharings/gl/windows/000077500000000000000000000000001363734646600255445ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/sharings/gl/windows/CMakeLists.txt000066400000000000000000000012451363734646600303060ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(IGDRCL_SRCS_tests_sharings_gl_windows ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/gl_arb_sync_event_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_create_from_texture_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_reused_buffers_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_sharing_enable_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_sharing_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_texture_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gl_types_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_sharings_gl_windows}) endif() compute-runtime-20.13.16352/opencl/test/unit_test/sharings/gl/windows/gl_arb_sync_event_tests.cpp000066400000000000000000000352101363734646600331560ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/os_interface/os_interface.h" #include "opencl/source/context/context.h" #include "opencl/source/event/user_event.h" #include "opencl/source/platform/platform.h" #include "opencl/source/sharings/gl/gl_arb_sync_event.h" #include "opencl/source/sharings/sharing.h" #include "opencl/test/unit_test/mocks/gl/windows/mock_gl_arb_sync_event_windows.h" #include "opencl/test/unit_test/mocks/gl/windows/mock_gl_sharing_windows.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "test.h" using namespace NEO; TEST(GlArbSyncEvent, whenCreateArbSyncEventNameIsCalledMultipleTimesThenEachCallReturnsUniqueName) { char *name1 = NEO::createArbSyncEventName(); EXPECT_NE(nullptr, name1); EXPECT_STRNE("", name1); char *name2 = NEO::createArbSyncEventName(); EXPECT_NE(nullptr, name2); EXPECT_STRNE("", name2); char *name3 = NEO::createArbSyncEventName(); EXPECT_NE(nullptr, name3); EXPECT_STRNE("", name3); EXPECT_STRNE(name1, name2); EXPECT_STRNE(name1, name3); EXPECT_STRNE(name2, name3); NEO::destroyArbSyncEventName(name1); NEO::destroyArbSyncEventName(name2); NEO::destroyArbSyncEventName(name3); } template inline void glArbSyncObjectWaitServerMock(NEO::OSInterface &osInterface, CL_GL_SYNC_INFO &glSyncInfo) { glSyncInfo.waitCalled = SignalWaited; } struct MockBaseEvent : Event { using Event::Event; bool wasUpdated = false; void updateExecutionStatus() override { Event::updateExecutionStatus(); wasUpdated = true; } }; struct GlArbSyncEventTest : public ::testing::Test { GlArbSyncEventTest(void) { } void SetUp() override { executionEnvironment = platform()->peekExecutionEnvironment(); auto mockCsr = new MockCommandStreamReceiver(*executionEnvironment, 0); executionEnvironment->memoryManager = std::make_unique(*executionEnvironment); device = std::make_unique(MockDevice::create(executionEnvironment, 0u)); device->resetCommandStreamReceiver(mockCsr); ctx.reset(new MockContext); cmdQ.reset(new MockCommandQueue(ctx.get(), device.get(), nullptr)); sharing = new GlSharingFunctionsMock(); ctx->setSharingFunctions(sharing); sharing->pfnGlArbSyncObjectCleanup = glArbSyncObjectCleanupMockDoNothing; sharing->pfnGlArbSyncObjectSetup = mockGlArbSyncObjectSetup; sharing->pfnGlArbSyncObjectSignal = glArbSyncObjectSignalMockDoNothing; sharing->pfnGlArbSyncObjectWaitServer = glArbSyncObjectWaitServerMock; osInterface = new OSInterface; executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(osInterface); } void TearDown() override { if (baseEvent) { triggerEvent->setStatus(-1); baseEvent->release(); triggerEvent->release(); } } template T *createArbEventMock() { T *ret = new T(*ctx); ret->osInterface = osInterface; ret->baseEvent = getBaseEvent(); baseEvent->incRefInternal(); baseEvent->addChild(*ret); return ret; } MockBaseEvent *getBaseEvent() { if (baseEvent == nullptr) { triggerEvent = new UserEvent(ctx.get()); baseEvent = new MockBaseEvent(cmdQ.get(), CL_COMMAND_RELEASE_GL_OBJECTS, CompletionStamp::levelNotReady, CompletionStamp::levelNotReady); triggerEvent->addChild(*baseEvent); } return baseEvent; } void failSyncObjectCreation() { sharing->pfnGlArbSyncObjectSetup = mockGlArbSyncObjectSetup; } void setWaitCalledFlagOnServerWait() { sharing->pfnGlArbSyncObjectWaitServer = glArbSyncObjectWaitServerMock; } std::unique_ptr device; std::unique_ptr ctx; std::unique_ptr cmdQ; OSInterface *osInterface = nullptr; Event *triggerEvent = nullptr; MockBaseEvent *baseEvent = nullptr; GlSharingFunctionsMock *sharing = nullptr; ExecutionEnvironment *executionEnvironment = nullptr; }; TEST_F(GlArbSyncEventTest, whenGlArbEventIsCreatedThenBaseEventObjectIsConstructedWithProperContextAndCommandType) { auto *syncEv = createArbEventMock>(); EXPECT_EQ(static_cast(CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR), syncEv->getCommandType()); EXPECT_EQ(ctx.get(), syncEv->getContext()); EXPECT_NE(nullptr, syncEv->glSyncInfo); syncEv->release(); } TEST_F(GlArbSyncEventTest, whenGetSyncInfoisCalledThenEventsSyncInfoIsReturned) { auto *syncEv = createArbEventMock>(); EXPECT_NE(nullptr, syncEv->glSyncInfo); EXPECT_EQ(syncEv->glSyncInfo.get(), syncEv->getSyncInfo()); syncEv->release(); } TEST_F(GlArbSyncEventTest, whenSetBaseEventIsCalledThenProperMembersOfParentEventAreCopiedToSyncEventAndReferenceCountersAreUpdated) { ASSERT_NE(nullptr, getBaseEvent()->getCommandQueue()); EXPECT_EQ(2, getBaseEvent()->getRefInternalCount()); EXPECT_EQ(2, getBaseEvent()->getCommandQueue()->getRefInternalCount()); EXPECT_FALSE(getBaseEvent()->peekHasChildEvents()); auto *syncEv = new DummyArbEvent(*ctx); EXPECT_EQ(nullptr, syncEv->baseEvent); EXPECT_EQ(nullptr, syncEv->osInterface); EXPECT_EQ(nullptr, syncEv->getCommandQueue()); syncEv->useBaseSetEvent = true; bool ret = syncEv->setBaseEvent(*getBaseEvent()); EXPECT_TRUE(ret); EXPECT_TRUE(getBaseEvent()->peekHasChildEvents()); EXPECT_EQ(getBaseEvent(), syncEv->baseEvent); EXPECT_EQ(getBaseEvent()->getCommandQueue(), syncEv->getCommandQueue()); EXPECT_EQ(syncEv->getCommandQueue()->getGpgpuCommandStreamReceiver().getOSInterface(), syncEv->osInterface); EXPECT_EQ(3, getBaseEvent()->getRefInternalCount()); EXPECT_EQ(3, getBaseEvent()->getCommandQueue()->getRefInternalCount()); EXPECT_TRUE(getBaseEvent()->peekHasChildEvents()); syncEv->release(); } TEST_F(GlArbSyncEventTest, whenSetBaseEventIsCalledButGlArbSyncObjectCreationFailsThenOperationIsAborted) { ASSERT_NE(nullptr, getBaseEvent()->getCommandQueue()); EXPECT_EQ(2, getBaseEvent()->getRefInternalCount()); EXPECT_EQ(2, getBaseEvent()->getCommandQueue()->getRefInternalCount()); EXPECT_FALSE(getBaseEvent()->peekHasChildEvents()); auto *syncEv = new DummyArbEvent(*ctx); EXPECT_EQ(nullptr, syncEv->baseEvent); EXPECT_EQ(nullptr, syncEv->osInterface); EXPECT_EQ(nullptr, syncEv->getCommandQueue()); syncEv->useBaseSetEvent = true; failSyncObjectCreation(); bool ret = syncEv->setBaseEvent(*getBaseEvent()); EXPECT_FALSE(ret); EXPECT_EQ(2, getBaseEvent()->getRefInternalCount()); EXPECT_EQ(2, getBaseEvent()->getCommandQueue()->getRefInternalCount()); EXPECT_FALSE(getBaseEvent()->peekHasChildEvents()); EXPECT_EQ(nullptr, syncEv->baseEvent); EXPECT_EQ(nullptr, syncEv->osInterface); EXPECT_EQ(nullptr, syncEv->getCommandQueue()); syncEv->osInterface = this->osInterface; syncEv->baseEvent = getBaseEvent(); getBaseEvent()->incRefInternal(); syncEv->release(); } TEST_F(GlArbSyncEventTest, whenGlArbSyncEventGetsUnblockedByTerminatedBaseEventThenSyncObjectDoesntGetSignalled) { auto *syncEv = createArbEventMock>(); triggerEvent->setStatus(-1); EXPECT_FALSE(syncEv->getSyncInfo()->waitCalled); syncEv->release(); } TEST_F(GlArbSyncEventTest, whenGlArbSyncEventGetsUnblockedByQueuedBaseEventThenSyncObjectDoesntGetSignalled) { auto *syncEv = createArbEventMock>(); syncEv->unblockEventBy(*this->baseEvent, 0, CL_QUEUED); EXPECT_FALSE(syncEv->getSyncInfo()->waitCalled); syncEv->release(); } TEST_F(GlArbSyncEventTest, whenGlArbSyncEventGetsUnblockedBySubmittedOrCompletedEventThenSyncObjectGetsSignalled) { setWaitCalledFlagOnServerWait(); auto *syncEv = createArbEventMock>(); triggerEvent->setStatus(CL_COMPLETE); EXPECT_TRUE(syncEv->getSyncInfo()->waitCalled); syncEv->release(); } TEST_F(GlArbSyncEventTest, whenGlArbSyncEventIsCreatedFromBaseEventWithoutValidContextThenCreationFails) { Event *baseEvent = new Event(nullptr, CL_COMMAND_RELEASE_GL_OBJECTS, CompletionStamp::levelNotReady, CompletionStamp::levelNotReady); auto *arbEvent = GlArbSyncEvent::create(*baseEvent); EXPECT_EQ(nullptr, arbEvent); baseEvent->release(); } TEST_F(GlArbSyncEventTest, whenGlArbSyncEventIsCreatedAndSetEventFailsThenCreationFails) { failSyncObjectCreation(); auto *arbEvent = GlArbSyncEvent::create(*this->getBaseEvent()); EXPECT_EQ(nullptr, arbEvent); } TEST_F(GlArbSyncEventTest, whenGlArbSyncEventIsCreatedTheBaseEventIsProperlySet) { auto *arbEvent = GlArbSyncEvent::create(*this->getBaseEvent()); EXPECT_NE(nullptr, arbEvent); EXPECT_TRUE(this->baseEvent->peekHasChildEvents()); EXPECT_EQ(arbEvent, this->baseEvent->peekChildEvents()->ref); arbEvent->release(); } TEST_F(GlArbSyncEventTest, whenClEnqueueMarkerWithSyncObjectINTELIsCalledThenInvalidOperationErrorCodeIsReturned) { cl_command_queue queue = static_cast(this->cmdQ.get()); auto ret = clEnqueueMarkerWithSyncObjectINTEL(queue, nullptr, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, ret); } TEST_F(GlArbSyncEventTest, whenClGetCLObjectInfoINTELIsCalledThenInvalidOperationErrorCodeIsReturned) { cl_mem mem = {}; auto ret = clGetCLObjectInfoINTEL(mem, nullptr); EXPECT_EQ(CL_INVALID_OPERATION, ret); } TEST_F(GlArbSyncEventTest, givenNullSynInfoParameterWhenClGetCLEventInfoINTELIsCalledThenInvalidArgValueErrorCodeIsReturned) { cl_event ev = getBaseEvent(); cl_context ctxRet = {}; auto ret = clGetCLEventInfoINTEL(ev, nullptr, &ctxRet); EXPECT_EQ(CL_INVALID_ARG_VALUE, ret); } TEST_F(GlArbSyncEventTest, givenNullContextParameterWhenClGetCLEventInfoINTELIsCalledThenInvalidArgValueErrorCodeIsReturned) { cl_event ev = getBaseEvent(); CL_GL_SYNC_INFO *synInfoRet = nullptr; auto ret = clGetCLEventInfoINTEL(ev, &synInfoRet, nullptr); EXPECT_EQ(CL_INVALID_ARG_VALUE, ret); } TEST_F(GlArbSyncEventTest, givenUnknownEventWhenclGetCLEventInfoINTELIsCalledThenInvalidEventErrorCodeIsReturned) { auto deadEvent = new MockEvent(nullptr, 0, 0, 0); deadEvent->magic = Event::deadMagic; cl_event unknownEvent = deadEvent; CL_GL_SYNC_INFO *synInfoRet = nullptr; cl_context ctxRet = {}; auto ret = clGetCLEventInfoINTEL(unknownEvent, &synInfoRet, &ctxRet); EXPECT_EQ(CL_INVALID_EVENT, ret); deadEvent->release(); } TEST_F(GlArbSyncEventTest, givenEventWithCommandDifferentThanReleaseGlObjectsWhenClGetCLEventInfoINTELIsCalledThenValidContextIsReturned) { getBaseEvent(); cl_event ev = triggerEvent; CL_GL_SYNC_INFO *synInfoRet = reinterpret_cast(static_cast(0xFF)); cl_context ctxRet = {}; auto ret = clGetCLEventInfoINTEL(ev, &synInfoRet, &ctxRet); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(nullptr, synInfoRet); EXPECT_EQ(ctxRet, ctx.get()); } TEST_F(GlArbSyncEventTest, givenDisabledSharingWhenClGetCLEventInfoINTELIsCalledThenInvalidOperationErrorCodeIsReturned) { getBaseEvent(); cl_event ev = baseEvent; CL_GL_SYNC_INFO *synInfoRet = reinterpret_cast(static_cast(0xFF)); cl_context ctxRet = {}; auto sharing = ctx->getSharing(); ctx->sharingFunctions[sharing->getId()] = nullptr; auto ret = clGetCLEventInfoINTEL(ev, &synInfoRet, &ctxRet); ctx->setSharingFunctions(new GlSharingFunctionsMock()); EXPECT_EQ(CL_INVALID_OPERATION, ret); } TEST_F(GlArbSyncEventTest, givenCallToClGetCLEventInfoINTELWhenGetOrCreateGlArbSyncFailsThenOutOfMemoryErrorCodeIsReturned) { getBaseEvent(); cl_event ev = this->baseEvent; CL_GL_SYNC_INFO *synInfoRet = reinterpret_cast(static_cast(0xFF)); cl_context ctxRet = {}; sharing->pfnGlArbSyncObjectSetup = mockGlArbSyncObjectSetup; auto ret = clGetCLEventInfoINTEL(ev, &synInfoRet, &ctxRet); EXPECT_EQ(CL_OUT_OF_RESOURCES, ret); } TEST_F(GlArbSyncEventTest, givenCallToClGetCLEventInfoINTELWhenFunctionSucceedsThenEventsGetUpdatedAndValidContextAndSyncInfoAreReturned) { auto *arbEvent = GlArbSyncEvent::create(*this->getBaseEvent()); this->sharing->glArbEventMapping[this->baseEvent] = arbEvent; cl_event ev = this->baseEvent; CL_GL_SYNC_INFO *synInfoRet = reinterpret_cast(static_cast(0xFF)); cl_context ctxRet = {}; EXPECT_FALSE(this->baseEvent->wasUpdated); auto ret = clGetCLEventInfoINTEL(ev, &synInfoRet, &ctxRet); EXPECT_TRUE(this->baseEvent->wasUpdated); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(ctx.get(), ctxRet); EXPECT_EQ(arbEvent->getSyncInfo(), synInfoRet); arbEvent->release(); } TEST_F(GlArbSyncEventTest, givenUnknownEventWhenClReleaseGlSharedEventINTELIsCalledThenInvalidEventErrorCodeIsReturned) { auto deadEvent = new MockEvent(nullptr, 0, 0, 0); deadEvent->magic = Event::deadMagic; cl_event unknownEvent = deadEvent; auto ret = clReleaseGlSharedEventINTEL(unknownEvent); EXPECT_EQ(CL_INVALID_EVENT, ret); deadEvent->release(); } TEST_F(GlArbSyncEventTest, givenEventWithoutArbSyncWhenClReleaseGlSharedEventINTELIsCalledThenThisEventsRefcountIsDecreased) { this->getBaseEvent(); triggerEvent->retain(); EXPECT_EQ(2, triggerEvent->getRefInternalCount()); cl_event ev = triggerEvent; auto ret = clReleaseGlSharedEventINTEL(ev); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(1, triggerEvent->getRefInternalCount()); } TEST_F(GlArbSyncEventTest, givenEventWithArbSyncWhenClReleaseGlSharedEventINTELIsCalledThenThisEventsAndArbSyncsRefcountsAreDecreased) { auto *arbEvent = GlArbSyncEvent::create(*this->getBaseEvent()); baseEvent->retain(); arbEvent->retain(); this->sharing->glArbEventMapping[baseEvent] = arbEvent; EXPECT_EQ(4, baseEvent->getRefInternalCount()); EXPECT_EQ(3, arbEvent->getRefInternalCount()); cl_event ev = baseEvent; auto ret = clReleaseGlSharedEventINTEL(ev); EXPECT_EQ(CL_SUCCESS, ret); EXPECT_EQ(3, baseEvent->getRefInternalCount()); EXPECT_EQ(2, arbEvent->getRefInternalCount()); arbEvent->release(); } gl_create_from_texture_tests.cpp000066400000000000000000000342351363734646600341520ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/sharings/gl/windows/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/get_info.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/helpers/gmm_types_converter.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/sharings/gl/gl_texture.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/gl/windows/mock_gl_sharing_windows.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_gmm.h" #include "test.h" #include "gtest/gtest.h" namespace NEO { class CreateFromGlTexture : public ::testing::Test { public: // temp solution - we need to query size from GMM: class TempMM : public OsAgnosticMemoryManager { public: TempMM() : OsAgnosticMemoryManager(*(new MockExecutionEnvironment(defaultHwInfo.get()))) { mockExecutionEnvironment.reset(&executionEnvironment); } GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness) override { auto alloc = OsAgnosticMemoryManager::createGraphicsAllocationFromSharedHandle(handle, properties, requireSpecificBitness); if (handle == CreateFromGlTexture::mcsHandle) { alloc->setDefaultGmm(forceMcsGmm); } else { alloc->setDefaultGmm(forceGmm); } return alloc; } size_t forceAllocationSize; Gmm *forceGmm = nullptr; Gmm *forceMcsGmm = nullptr; std::unique_ptr mockExecutionEnvironment; }; void SetUp() override { imgDesc = {}; imgInfo = {}; clContext.setSharingFunctions(glSharing->sharingFunctions.release()); clContext.memoryManager = &tempMM; } void TearDown() override { gmm.release(); mcsGmm.release(); } void updateImgInfoAndForceGmm() { imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); gmm = MockGmm::queryImgParams(clContext.getDevice(0)->getGmmClientContext(), imgInfo); tempMM.forceAllocationSize = imgInfo.size; tempMM.forceGmm = gmm.get(); if (glSharing->m_textureInfoOutput.globalShareHandleMCS != 0) { cl_image_desc mcsImgDesc = {}; mcsImgDesc.image_height = 128; mcsImgDesc.image_row_pitch = 256; mcsImgDesc.image_width = 128; mcsImgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; auto mcsImgInfo = MockGmm::initImgInfo(mcsImgDesc, 0, nullptr); mcsGmm = MockGmm::queryImgParams(clContext.getDevice(0)->getGmmClientContext(), mcsImgInfo); tempMM.forceMcsGmm = mcsGmm.get(); } } cl_image_desc imgDesc; ImageInfo imgInfo = {}; std::unique_ptr gmm; std::unique_ptr mcsGmm; TempMM tempMM; MockContext clContext; std::unique_ptr glSharing = std::make_unique(); cl_int retVal; static const unsigned int mcsHandle = 0xFF; }; class CreateFromGlTextureTestsWithParams : public CreateFromGlTexture, public ::testing::WithParamInterface { }; class CreateFromGlTextureTests : public CreateFromGlTexture { }; INSTANTIATE_TEST_CASE_P( CreateFromGlTextureTestsWithParams, CreateFromGlTextureTestsWithParams, testing::ValuesIn(glTextureTargets::supportedTargets)); TEST_P(CreateFromGlTextureTestsWithParams, givenAllTextureSpecificParamsWhenCreateIsCalledThenFillImageDescription) { unsigned int target = GetParam(); unsigned int baseTarget = GlTexture::getBaseTargetType(target); imgDesc.image_type = GlTexture::getClMemObjectType(target); imgDesc.image_width = 5; if (target == GL_TEXTURE_1D_ARRAY || target == GL_TEXTURE_2D_ARRAY || target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) { imgDesc.image_array_size = 5; } if (target == GL_TEXTURE_2D || target == GL_TEXTURE_RECTANGLE || target == GL_TEXTURE_2D_ARRAY || target == GL_TEXTURE_3D || target == GL_RENDERBUFFER_EXT || baseTarget == GL_TEXTURE_CUBE_MAP_ARB || target == GL_TEXTURE_2D_MULTISAMPLE || target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) { imgDesc.image_height = 5; } if (target == GL_TEXTURE_3D) { imgDesc.image_depth = 5; } if (target == GL_TEXTURE_BUFFER) { // size and width for texture buffer are queried from textureInfo - not from gmm glSharing->m_textureInfoOutput.textureBufferWidth = 64; glSharing->m_textureInfoOutput.textureBufferSize = 1024; glSharing->uploadDataToTextureInfo(); } if (target == GL_TEXTURE_2D_MULTISAMPLE || target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) { imgDesc.num_samples = 16; glSharing->m_textureInfoOutput.numberOfSamples = 16; glSharing->m_textureInfoOutput.globalShareHandleMCS = CreateFromGlTexture::mcsHandle; glSharing->uploadDataToTextureInfo(); } updateImgInfoAndForceGmm(); auto glImage = GlTexture::createSharedGlTexture(&clContext, (cl_mem_flags)0, target, 0, 0, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); if (target == GL_RENDERBUFFER_EXT) { EXPECT_EQ(1, glSharing->dllParam->getParam("GLAcquireSharedRenderBufferCalled")); } else { EXPECT_EQ(1, glSharing->dllParam->getParam("GLAcquireSharedTextureCalled")); } EXPECT_EQ(GmmTypesConverter::getCubeFaceIndex(target), glImage->getCubeFaceIndex()); auto glTexture = reinterpret_cast(glImage->peekSharingHandler()); EXPECT_EQ(glTexture->getTarget(), target); EXPECT_EQ(glImage->getImageDesc().image_type, imgDesc.image_type); if (target == GL_TEXTURE_BUFFER) { EXPECT_EQ(glImage->getImageDesc().image_width, static_cast(glTexture->getTextureInfo()->textureBufferWidth)); EXPECT_EQ(glImage->getImageDesc().image_row_pitch, static_cast(glTexture->getTextureInfo()->textureBufferSize)); } else { EXPECT_EQ(glImage->getImageDesc().image_width, gmm->gmmResourceInfo->getBaseWidth()); size_t slicePitch = glImage->getHostPtrSlicePitch(); size_t rowPitch = glImage->getHostPtrRowPitch(); EXPECT_EQ(glImage->getImageDesc().image_row_pitch, rowPitch); EXPECT_EQ(glImage->getImageDesc().image_slice_pitch, slicePitch); size_t gmmRowPitch = gmm->gmmResourceInfo->getRenderPitch(); if (gmmRowPitch == 0) { size_t alignedWidth = alignUp(glImage->getImageDesc().image_width, gmm->gmmResourceInfo->getHAlign()); size_t bpp = gmm->gmmResourceInfo->getBitsPerPixel() >> 3; EXPECT_EQ(glImage->getImageDesc().image_row_pitch, alignedWidth * bpp); } else { EXPECT_EQ(glImage->getImageDesc().image_row_pitch, gmmRowPitch); } size_t ImageInfoRowPitch = 0; retVal = clGetImageInfo(glImage, CL_IMAGE_ROW_PITCH, sizeof(size_t), &ImageInfoRowPitch, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(rowPitch, ImageInfoRowPitch); size_t ImageInfoSlicePitch = 0; slicePitch *= !(glImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE2D || glImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D || glImage->getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER); retVal = clGetImageInfo(glImage, CL_IMAGE_SLICE_PITCH, sizeof(size_t), &ImageInfoSlicePitch, NULL); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_EQ(slicePitch, ImageInfoSlicePitch); } EXPECT_EQ(glImage->getImageDesc().image_height, gmm->gmmResourceInfo->getBaseHeight()); EXPECT_EQ(glImage->getImageDesc().image_array_size, gmm->gmmResourceInfo->getArraySize()); if (target == GL_TEXTURE_3D) { EXPECT_EQ(glImage->getImageDesc().image_depth, gmm->gmmResourceInfo->getBaseDepth()); } else { EXPECT_EQ(glImage->getImageDesc().image_depth, 0u); } if (imgDesc.image_array_size > 1 || imgDesc.image_depth > 1) { GMM_REQ_OFFSET_INFO GMMReqInfo = {}; GMMReqInfo.ArrayIndex = imgDesc.image_array_size > 1 ? 1 : 0; GMMReqInfo.Slice = imgDesc.image_depth > 1 ? 1 : 0; GMMReqInfo.ReqLock = 1; gmm->gmmResourceInfo->getOffset(GMMReqInfo); size_t expectedSlicePitch = GMMReqInfo.Lock.Offset; EXPECT_EQ(glImage->getImageDesc().image_slice_pitch, expectedSlicePitch); } else { EXPECT_EQ(glImage->getImageDesc().image_slice_pitch, imgInfo.size); } EXPECT_EQ(glImage->getQPitch(), gmm->queryQPitch(gmm->gmmResourceInfo->getResourceType())); // gmm returns 1 by default - OCL requires 0 uint32_t numSamples = static_cast(gmm->gmmResourceInfo->getNumSamples()); auto expectedNumSamples = getValidParam(numSamples, 0u, 1u); EXPECT_EQ(expectedNumSamples, glImage->getImageDesc().num_samples); if (target == GL_TEXTURE_2D_MULTISAMPLE || target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) { EXPECT_NE(nullptr, glImage->getMcsAllocation()); EXPECT_EQ(getValidParam(static_cast(mcsGmm->gmmResourceInfo->getRenderPitch() / 128)), glImage->getMcsSurfaceInfo().pitch); EXPECT_EQ(static_cast(mcsGmm->gmmResourceInfo->getQPitch()), glImage->getMcsSurfaceInfo().qPitch); EXPECT_EQ(GmmTypesConverter::getRenderMultisamplesCount(static_cast(gmm->gmmResourceInfo->getNumSamples())), glImage->getMcsSurfaceInfo().multisampleCount); } delete glImage; } TEST_P(CreateFromGlTextureTestsWithParams, givenArrayTextureTargetAndArraySizeEqualOneWhenCreateIsCalledThenSlicePitchAndSizeAreEqual) { unsigned int target = GetParam(); // only array targets if (target == GL_TEXTURE_1D_ARRAY || target == GL_TEXTURE_2D_ARRAY) { imgDesc.image_type = GlTexture::getClMemObjectType(target); imgDesc.image_width = 5; if (target == GL_TEXTURE_2D_ARRAY) { imgDesc.image_height = 5; } imgDesc.image_array_size = 1; updateImgInfoAndForceGmm(); auto glImage = GlTexture::createSharedGlTexture(&clContext, (cl_mem_flags)0, target, 0, 0, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(glImage->getImageDesc().image_slice_pitch, imgInfo.size); delete glImage; } } TEST_P(CreateFromGlTextureTestsWithParams, givenZeroRowPitchFromGmmWhenCreatingTextureThenComputeIt) { unsigned int target = GL_TEXTURE_2D; imgDesc.image_type = GlTexture::getClMemObjectType(target); imgDesc.image_width = 5; imgDesc.image_height = 5; imgDesc.image_array_size = 1; updateImgInfoAndForceGmm(); auto mockResInfo = reinterpret_cast<::testing::NiceMock *>(gmm->gmmResourceInfo.get()); mockResInfo->overrideReturnedRenderPitch(0u); auto alignedWidth = alignUp(imgDesc.image_width, gmm->gmmResourceInfo->getHAlign()); auto expectedRowPitch = alignedWidth * (gmm->gmmResourceInfo->getBitsPerPixel() >> 3); auto glImage = std::unique_ptr(GlTexture::createSharedGlTexture(&clContext, (cl_mem_flags)0, target, 0, 0, &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(imgInfo.size, glImage->getImageDesc().image_slice_pitch); EXPECT_EQ(expectedRowPitch, glImage->getImageDesc().image_row_pitch); } TEST_F(CreateFromGlTextureTests, GivenGlTextureTargetAndMipLevelNegativeWhenCreateIsCalledThenMipMappedImageIsCreated) { unsigned int target = GL_TEXTURE_3D; cl_GLint miplevel = -1; imgDesc.image_type = GlTexture::getClMemObjectType(target); imgDesc.image_height = 13; imgDesc.image_width = 15; imgDesc.image_depth = 7; updateImgInfoAndForceGmm(); auto glImage = std::unique_ptr(GlTexture::createSharedGlTexture(&clContext, 0u, target, miplevel, 0, &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); size_t actualHeight = 0; size_t actualWidth = 0; size_t actualDepth = 0; glImage->getImageInfo(CL_IMAGE_HEIGHT, sizeof(size_t), &actualHeight, nullptr); glImage->getImageInfo(CL_IMAGE_WIDTH, sizeof(size_t), &actualWidth, nullptr); glImage->getImageInfo(CL_IMAGE_DEPTH, sizeof(size_t), &actualDepth, nullptr); EXPECT_EQ(13u, actualHeight); EXPECT_EQ(15u, actualWidth); EXPECT_EQ(7u, actualDepth); EXPECT_EQ(gmm->gmmResourceInfo->getMaxLod() + 1, glImage->getImageDesc().num_mip_levels); EXPECT_EQ(glImage->peekBaseMipLevel(), 0); } TEST_F(CreateFromGlTextureTests, GivenGlTextureTargetAndMipLevelNonNegativeWhenCreateIsCalledThenImageFromChosenMipLevelIsCreated) { unsigned int target = GL_TEXTURE_3D; cl_GLint miplevel = 2; imgDesc.image_type = GlTexture::getClMemObjectType(target); imgDesc.image_height = 13; imgDesc.image_width = 15; imgDesc.image_depth = 7; updateImgInfoAndForceGmm(); auto glImage = std::unique_ptr(GlTexture::createSharedGlTexture(&clContext, 0u, target, miplevel, 0, &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); size_t actualHeight = 0; size_t actualWidth = 0; size_t actualDepth = 0; glImage->getImageInfo(CL_IMAGE_HEIGHT, sizeof(size_t), &actualHeight, nullptr); glImage->getImageInfo(CL_IMAGE_WIDTH, sizeof(size_t), &actualWidth, nullptr); glImage->getImageInfo(CL_IMAGE_DEPTH, sizeof(size_t), &actualDepth, nullptr); EXPECT_EQ(3u, actualHeight); EXPECT_EQ(3u, actualWidth); EXPECT_EQ(1u, actualDepth); EXPECT_GE(1u, glImage->getImageDesc().num_mip_levels); EXPECT_EQ(glImage->peekBaseMipLevel(), 2); } TEST_F(CreateFromGlTextureTests, GivenGlTextureWhenCreateIsCalledThenAllocationTypeIsSharedImage) { unsigned int target = GL_TEXTURE_3D; cl_GLint miplevel = 2; imgDesc.image_type = GlTexture::getClMemObjectType(target); imgDesc.image_height = 13; imgDesc.image_width = 15; imgDesc.image_depth = 7; updateImgInfoAndForceGmm(); auto glImage = std::unique_ptr(GlTexture::createSharedGlTexture(&clContext, 0u, target, miplevel, 0, &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, glImage->getGraphicsAllocation()); EXPECT_EQ(GraphicsAllocation::AllocationType::SHARED_IMAGE, glImage->getGraphicsAllocation()->getAllocationType()); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/sharings/gl/windows/gl_reused_buffers_tests.cpp000066400000000000000000000241521363734646600331630ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/resource_info.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/sharings/gl/gl_buffer.h" #include "opencl/test/unit_test/mocks/gl/windows/mock_gl_sharing_windows.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "test.h" using namespace NEO; struct GlReusedBufferTests : public ::testing::Test { void SetUp() override { glSharingFunctions = new GlSharingFunctionsMock(); context.setSharingFunctions(glSharingFunctions); graphicsAllocationsForGlBufferReuse = &glSharingFunctions->graphicsAllocationsForGlBufferReuse; } GlSharingFunctionsMock *glSharingFunctions = nullptr; MockContext context; std::vector> *graphicsAllocationsForGlBufferReuse = nullptr; unsigned int bufferId1 = 5; unsigned int bufferId2 = 7; cl_int retVal = CL_SUCCESS; }; class FailingMemoryManager : public MockMemoryManager { public: GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness) override { return nullptr; } }; TEST_F(GlReusedBufferTests, givenMultipleBuffersWithTheSameIdWhenCreatedThenReuseGraphicsAllocation) { std::unique_ptr glBuffers[10]; // first 5 with bufferId1, next 5 with bufferId2 for (size_t i = 0; i < 10; i++) { glBuffers[i].reset(GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, (i < 5 ? bufferId1 : bufferId2), &retVal)); EXPECT_NE(nullptr, glBuffers[i].get()); EXPECT_NE(nullptr, glBuffers[i]->getGraphicsAllocation()); } EXPECT_EQ(2u, graphicsAllocationsForGlBufferReuse->size()); EXPECT_EQ(bufferId1, graphicsAllocationsForGlBufferReuse->at(0).first); EXPECT_EQ(bufferId2, graphicsAllocationsForGlBufferReuse->at(1).first); auto storedGraphicsAllocation1 = graphicsAllocationsForGlBufferReuse->at(0).second; auto storedGraphicsAllocation2 = graphicsAllocationsForGlBufferReuse->at(1).second; EXPECT_EQ(5u, storedGraphicsAllocation1->peekReuseCount()); EXPECT_EQ(5u, storedGraphicsAllocation2->peekReuseCount()); for (size_t i = 0; i < 10; i++) { EXPECT_EQ(i < 5 ? storedGraphicsAllocation1 : storedGraphicsAllocation2, glBuffers[i]->getGraphicsAllocation()); } } TEST_F(GlReusedBufferTests, givenMultipleBuffersWithReusedAllocationWhenReleasingThenClearVectorByLastObject) { std::unique_ptr glBuffer1(GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, bufferId1, &retVal)); std::unique_ptr glBuffer2(GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, bufferId1, &retVal)); EXPECT_EQ(1u, graphicsAllocationsForGlBufferReuse->size()); EXPECT_EQ(2u, graphicsAllocationsForGlBufferReuse->at(0).second->peekReuseCount()); glBuffer1.reset(nullptr); EXPECT_EQ(1u, graphicsAllocationsForGlBufferReuse->size()); EXPECT_EQ(1u, graphicsAllocationsForGlBufferReuse->at(0).second->peekReuseCount()); glBuffer2.reset(nullptr); EXPECT_EQ(0u, graphicsAllocationsForGlBufferReuse->size()); } TEST_F(GlReusedBufferTests, givenMultipleBuffersWithReusedAllocationWhenCreatingThenReuseGmmResourceToo) { std::unique_ptr glBuffer1(GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, bufferId1, &retVal)); glBuffer1->getGraphicsAllocation()->setDefaultGmm(new Gmm(context.getDevice(0)->getGmmClientContext(), (void *)0x100, 1, false)); std::unique_ptr glBuffer2(GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, bufferId1, &retVal)); EXPECT_EQ(glBuffer1->getGraphicsAllocation()->getDefaultGmm()->gmmResourceInfo->peekHandle(), glBuffer2->getGraphicsAllocation()->getDefaultGmm()->gmmResourceInfo->peekHandle()); } TEST_F(GlReusedBufferTests, givenGlobalShareHandleChangedWhenAcquiringSharedBufferThenChangeGraphicsAllocation) { std::unique_ptr dllParam = std::make_unique(); CL_GL_BUFFER_INFO bufferInfoOutput = dllParam->getBufferInfo(); bufferInfoOutput.globalShareHandle = 40; dllParam->loadBuffer(bufferInfoOutput); auto clBuffer = std::unique_ptr(GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, bufferId1, &retVal)); auto glBuffer = clBuffer->peekSharingHandler(); auto oldGraphicsAllocation = clBuffer->getGraphicsAllocation(); ASSERT_EQ(40, oldGraphicsAllocation->peekSharedHandle()); bufferInfoOutput.globalShareHandle = 41; dllParam->loadBuffer(bufferInfoOutput); glBuffer->acquire(clBuffer.get()); auto newGraphicsAllocation = clBuffer->getGraphicsAllocation(); EXPECT_NE(oldGraphicsAllocation, newGraphicsAllocation); EXPECT_EQ(41, newGraphicsAllocation->peekSharedHandle()); glBuffer->release(clBuffer.get()); } TEST_F(GlReusedBufferTests, givenGlobalShareHandleDidNotChangeWhenAcquiringSharedBufferThenDontDynamicallyAllocateBufferInfo) { class MyGlBuffer : public GlBuffer { public: MyGlBuffer(GLSharingFunctions *sharingFunctions, unsigned int glObjectId) : GlBuffer(sharingFunctions, glObjectId) {} protected: void resolveGraphicsAllocationChange(osHandle currentSharedHandle, UpdateData *updateData) override { EXPECT_EQ(nullptr, updateData->updateData); GlBuffer::resolveGraphicsAllocationChange(currentSharedHandle, updateData); } }; std::unique_ptr dllParam = std::make_unique(); CL_GL_BUFFER_INFO bufferInfoOutput = dllParam->getBufferInfo(); bufferInfoOutput.globalShareHandle = 40; dllParam->loadBuffer(bufferInfoOutput); auto clBuffer = std::unique_ptr(GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, bufferId1, &retVal)); auto glBuffer = new MyGlBuffer(context.getSharing(), bufferId1); clBuffer->setSharingHandler(glBuffer); glBuffer->acquire(clBuffer.get()); glBuffer->release(clBuffer.get()); } TEST_F(GlReusedBufferTests, givenGlobalShareHandleChangedWhenAcquiringSharedBufferThenDynamicallyAllocateBufferInfo) { class MyGlBuffer : public GlBuffer { public: MyGlBuffer(GLSharingFunctions *sharingFunctions, unsigned int glObjectId) : GlBuffer(sharingFunctions, glObjectId) {} protected: void resolveGraphicsAllocationChange(osHandle currentSharedHandle, UpdateData *updateData) override { EXPECT_NE(nullptr, updateData->updateData); GlBuffer::resolveGraphicsAllocationChange(currentSharedHandle, updateData); } }; std::unique_ptr dllParam = std::make_unique(); CL_GL_BUFFER_INFO bufferInfoOutput = dllParam->getBufferInfo(); bufferInfoOutput.globalShareHandle = 40; dllParam->loadBuffer(bufferInfoOutput); auto clBuffer = std::unique_ptr(GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, bufferId1, &retVal)); auto glBuffer = new MyGlBuffer(context.getSharing(), bufferId1); clBuffer->setSharingHandler(glBuffer); bufferInfoOutput.globalShareHandle = 41; dllParam->loadBuffer(bufferInfoOutput); glBuffer->acquire(clBuffer.get()); glBuffer->release(clBuffer.get()); } TEST_F(GlReusedBufferTests, givenMultipleBuffersAndGlobalShareHandleChangedWhenAcquiringSharedBufferDeleteOldGfxAllocationFromReuseVector) { std::unique_ptr dllParam = std::make_unique(); CL_GL_BUFFER_INFO bufferInfoOutput = dllParam->getBufferInfo(); bufferInfoOutput.globalShareHandle = 40; dllParam->loadBuffer(bufferInfoOutput); auto clBuffer1 = std::unique_ptr(GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, bufferId1, &retVal)); auto clBuffer2 = std::unique_ptr(GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, bufferId1, &retVal)); auto graphicsAllocation1 = clBuffer1->getGraphicsAllocation(); auto graphicsAllocation2 = clBuffer2->getGraphicsAllocation(); ASSERT_EQ(graphicsAllocation1, graphicsAllocation2); ASSERT_EQ(2, graphicsAllocation1->peekReuseCount()); ASSERT_EQ(1, graphicsAllocationsForGlBufferReuse->size()); bufferInfoOutput.globalShareHandle = 41; dllParam->loadBuffer(bufferInfoOutput); clBuffer1->peekSharingHandler()->acquire(clBuffer1.get()); auto newGraphicsAllocation = clBuffer1->getGraphicsAllocation(); EXPECT_EQ(1, graphicsAllocationsForGlBufferReuse->size()); EXPECT_EQ(newGraphicsAllocation, graphicsAllocationsForGlBufferReuse->at(0).second); clBuffer2->peekSharingHandler()->acquire(clBuffer2.get()); EXPECT_EQ(clBuffer2->getGraphicsAllocation(), newGraphicsAllocation); EXPECT_EQ(1, graphicsAllocationsForGlBufferReuse->size()); EXPECT_EQ(newGraphicsAllocation, graphicsAllocationsForGlBufferReuse->at(0).second); clBuffer1->peekSharingHandler()->release(clBuffer1.get()); clBuffer2->peekSharingHandler()->release(clBuffer2.get()); } TEST_F(GlReusedBufferTests, givenGraphicsAllocationCreationReturnsNullptrWhenAcquiringGlBufferThenReturnOutOfResourcesAndNullifyAllocation) { auto suceedingMemoryManager = context.getMemoryManager(); auto failingMemoryManager = std::unique_ptr(new FailingMemoryManager()); std::unique_ptr dllParam = std::make_unique(); CL_GL_BUFFER_INFO bufferInfoOutput = dllParam->getBufferInfo(); bufferInfoOutput.globalShareHandle = 40; dllParam->loadBuffer(bufferInfoOutput); auto clBuffer = std::unique_ptr(GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, bufferId1, &retVal)); auto glBuffer = clBuffer->peekSharingHandler(); bufferInfoOutput.globalShareHandle = 41; dllParam->loadBuffer(bufferInfoOutput); context.memoryManager = failingMemoryManager.get(); auto result = glBuffer->acquire(clBuffer.get()); EXPECT_EQ(CL_OUT_OF_RESOURCES, result); EXPECT_EQ(nullptr, clBuffer->getGraphicsAllocation()); context.memoryManager = suceedingMemoryManager; } compute-runtime-20.13.16352/opencl/test/unit_test/sharings/gl/windows/gl_sharing_enable_tests.cpp000066400000000000000000000161651363734646600331260ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/gl/windows/win_enable_gl.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; class GlSharingEnablerTests : public ::testing::Test { public: void SetUp() override { factory.reset(new GlSharingBuilderFactory()); ASSERT_NE(nullptr, factory.get()); } std::unique_ptr factory; }; TEST_F(GlSharingEnablerTests, givenGlFactoryWhenAskedThenExtensionsAreReturned) { auto ext = factory->getExtensions(); EXPECT_GT(ext.length(), 0u); EXPECT_STRNE("", ext.c_str()); } TEST_F(GlSharingEnablerTests, givenGlFactoryWhenAskedThenGlobalIcdIsConfigured) { class IcdRestore { public: IcdRestore() { icdSnapshot = icdGlobalDispatchTable; } ~IcdRestore() { icdGlobalDispatchTable = icdSnapshot; } decltype(icdGlobalDispatchTable) icdSnapshot; }; // we play with global table, so first save state then restore it with use of RAII IcdRestore icdRestore; // clear ICD table icdGlobalDispatchTable.clCreateFromGLBuffer = nullptr; icdGlobalDispatchTable.clCreateFromGLTexture = nullptr; icdGlobalDispatchTable.clCreateFromGLTexture2D = nullptr; icdGlobalDispatchTable.clCreateFromGLTexture3D = nullptr; icdGlobalDispatchTable.clCreateFromGLRenderbuffer = nullptr; icdGlobalDispatchTable.clGetGLObjectInfo = nullptr; icdGlobalDispatchTable.clGetGLTextureInfo = nullptr; icdGlobalDispatchTable.clEnqueueAcquireGLObjects = nullptr; icdGlobalDispatchTable.clEnqueueReleaseGLObjects = nullptr; icdGlobalDispatchTable.clCreateEventFromGLsyncKHR = nullptr; icdGlobalDispatchTable.clGetGLContextInfoKHR = nullptr; factory->fillGlobalDispatchTable(); EXPECT_NE(nullptr, icdGlobalDispatchTable.clCreateFromGLBuffer); EXPECT_NE(nullptr, icdGlobalDispatchTable.clCreateFromGLTexture); EXPECT_NE(nullptr, icdGlobalDispatchTable.clCreateFromGLTexture2D); EXPECT_NE(nullptr, icdGlobalDispatchTable.clCreateFromGLTexture3D); EXPECT_NE(nullptr, icdGlobalDispatchTable.clCreateFromGLRenderbuffer); EXPECT_NE(nullptr, icdGlobalDispatchTable.clGetGLObjectInfo); EXPECT_NE(nullptr, icdGlobalDispatchTable.clGetGLTextureInfo); EXPECT_NE(nullptr, icdGlobalDispatchTable.clEnqueueAcquireGLObjects); EXPECT_NE(nullptr, icdGlobalDispatchTable.clEnqueueReleaseGLObjects); EXPECT_NE(nullptr, icdGlobalDispatchTable.clCreateEventFromGLsyncKHR); EXPECT_NE(nullptr, icdGlobalDispatchTable.clGetGLContextInfoKHR); } TEST_F(GlSharingEnablerTests, givenGlFactoryWhenAskedThenBuilderIsCreated) { auto builder = factory->createContextBuilder(); EXPECT_NE(nullptr, builder); } TEST_F(GlSharingEnablerTests, givenGlBuilderWhenUnknownPropertyThenFalseIsReturnedAndErrcodeUnchanged) { auto builder = factory->createContextBuilder(); ASSERT_NE(nullptr, builder); cl_context_properties property = CL_CONTEXT_PLATFORM; cl_context_properties value; int32_t errcodeRet = CL_SUCCESS; auto res = builder->processProperties(property, value, errcodeRet); EXPECT_FALSE(res); EXPECT_EQ(CL_SUCCESS, errcodeRet); } TEST_F(GlSharingEnablerTests, givenGlBuilderWhenInvalidPropertyThenTrueIsReturnedAndErrcodeSet) { auto builder = factory->createContextBuilder(); ASSERT_NE(nullptr, builder); cl_context_properties property = CL_CGL_SHAREGROUP_KHR; cl_context_properties value; int32_t errcodeRet = CL_SUCCESS; auto res = builder->processProperties(property, value, errcodeRet); EXPECT_TRUE(res); EXPECT_EQ(CL_INVALID_PROPERTY, errcodeRet); } TEST_F(GlSharingEnablerTests, givenGlBuilderWhenValidPropertyThenTrueIsReturnedAndErrcodeUnchanged) { cl_context_properties props[] = {CL_GL_CONTEXT_KHR, CL_WGL_HDC_KHR, CL_GLX_DISPLAY_KHR, CL_EGL_DISPLAY_KHR}; for (auto currProperty : props) { auto builder = factory->createContextBuilder(); ASSERT_NE(nullptr, builder); cl_context_properties property = currProperty; cl_context_properties value = 0x10000; int32_t errcodeRet = CL_SUCCESS; auto res = builder->processProperties(property, value, errcodeRet); EXPECT_TRUE(res); EXPECT_EQ(CL_SUCCESS, errcodeRet); // repeat to check if we don't allocate twice auto prevAllocations = MemoryManagement::numAllocations.load(); res = builder->processProperties(property, value, errcodeRet); EXPECT_TRUE(res); EXPECT_EQ(CL_SUCCESS, errcodeRet); auto currAllocations = MemoryManagement::numAllocations.load(); EXPECT_EQ(prevAllocations, currAllocations); } } TEST_F(GlSharingEnablerTests, givenGlBuilderWhenNoPropertiesThenFinalizerReturnsTrue) { auto builder = factory->createContextBuilder(); ASSERT_NE(nullptr, builder); MockContext context; int32_t errcodeRet = CL_SUCCESS; auto res = builder->finalizeProperties(context, errcodeRet); EXPECT_TRUE(res); EXPECT_EQ(CL_SUCCESS, errcodeRet); } TEST_F(GlSharingEnablerTests, givenGlBuilderWhenInvalidPropertiesThenFinalizerReturnsTrue) { auto builder = factory->createContextBuilder(); ASSERT_NE(nullptr, builder); cl_context_properties property = CL_CONTEXT_PLATFORM; cl_context_properties value; int32_t errcodeRet = CL_SUCCESS; auto res = builder->processProperties(property, value, errcodeRet); EXPECT_FALSE(res); EXPECT_EQ(CL_SUCCESS, errcodeRet); MockContext context; errcodeRet = CL_SUCCESS; res = builder->finalizeProperties(context, errcodeRet); EXPECT_TRUE(res); EXPECT_EQ(CL_SUCCESS, errcodeRet); } TEST_F(GlSharingEnablerTests, givenGlBuilderWhenNullHandleThenFinalizerReturnsTrueAndNoSharingRegistered) { auto builder = factory->createContextBuilder(); ASSERT_NE(nullptr, builder); cl_context_properties property = CL_GL_CONTEXT_KHR; cl_context_properties value = 0x0; int32_t errcodeRet = CL_SUCCESS; auto res = builder->processProperties(property, value, errcodeRet); EXPECT_TRUE(res); EXPECT_EQ(CL_SUCCESS, errcodeRet); MockContext context; errcodeRet = CL_SUCCESS; res = builder->finalizeProperties(context, errcodeRet); EXPECT_TRUE(res); EXPECT_EQ(CL_SUCCESS, errcodeRet); auto sharing = context.getSharing(); EXPECT_EQ(nullptr, sharing); } TEST_F(GlSharingEnablerTests, givenGlBuilderWhenHandleThenFinalizerReturnsTrueAndSharingIsRegistered) { auto builder = factory->createContextBuilder(); ASSERT_NE(nullptr, builder); cl_context_properties property = CL_GL_CONTEXT_KHR; cl_context_properties value = 0x1000; int32_t errcodeRet = CL_SUCCESS; auto res = builder->processProperties(property, value, errcodeRet); EXPECT_TRUE(res); EXPECT_EQ(CL_SUCCESS, errcodeRet); MockContext context; errcodeRet = CL_SUCCESS; res = builder->finalizeProperties(context, errcodeRet); EXPECT_TRUE(res); EXPECT_EQ(CL_SUCCESS, errcodeRet); auto sharing = context.getSharing(); EXPECT_NE(nullptr, sharing); } compute-runtime-20.13.16352/opencl/test/unit_test/sharings/gl/windows/gl_sharing_tests.cpp000066400000000000000000001611131363734646600316120ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/helpers/array_count.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/event/user_event.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/platform/platform.h" #include "opencl/source/sharings/gl/cl_gl_api_intel.h" #include "opencl/source/sharings/gl/gl_arb_sync_event.h" #include "opencl/source/sharings/gl/gl_buffer.h" #include "opencl/source/sharings/gl/gl_context_guard.h" #include "opencl/source/sharings/gl/gl_sync_event.h" #include "opencl/source/sharings/gl/gl_texture.h" #include "opencl/source/sharings/gl/windows/gl_sharing_windows.h" #include "opencl/source/sharings/sharing.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/gl/windows/mock_gl_arb_sync_event_windows.h" #include "opencl/test/unit_test/mocks/gl/windows/mock_gl_sharing_windows.h" #include "opencl/test/unit_test/mocks/mock_async_event_handler.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_gmm_resource_info.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "test.h" #include "gl/gl_sharing_os.h" using namespace NEO; bool MockGLSharingFunctions::SharingEnabled = false; class glSharingTests : public ::testing::Test { public: void SetUp() override { mockGlSharingFunctions = mockGlSharing->sharingFunctions.release(); context.setSharingFunctions(mockGlSharingFunctions); mockGlSharing->m_bufferInfoOutput.globalShareHandle = bufferId; mockGlSharing->m_bufferInfoOutput.bufferSize = 4096u; mockGlSharing->uploadDataToBufferInfo(); } MockContext context; std::unique_ptr mockGlSharing = std::make_unique(); GlSharingFunctionsMock *mockGlSharingFunctions; unsigned int bufferId = 1u; }; TEST_F(glSharingTests, givenGlMockWhenItIsCreatedThenNonZeroObjectIsReturned) { EXPECT_NE(nullptr, &mockGlSharing); EXPECT_NE(nullptr, &mockGlSharing->m_clGlResourceInfo); EXPECT_NE(nullptr, &mockGlSharing->m_glClResourceInfo); } TEST_F(glSharingTests, givenGLSharingFunctionsWhenAskedForIdThenClGlSharingIdIsReturned) { auto v = SharingType::CLGL_SHARING; EXPECT_EQ(v, mockGlSharingFunctions->getId()); } TEST_F(glSharingTests, givenMockGlWhenGlBufferIsCreatedThenMemObjectHasGlHandler) { auto retVal = CL_SUCCESS; auto glBuffer = GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, bufferId, &retVal); EXPECT_NE(nullptr, glBuffer); EXPECT_NE(nullptr, glBuffer->getGraphicsAllocation()); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(4096u, glBuffer->getGraphicsAllocation()->getUnderlyingBufferSize()); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); EXPECT_EQ(bufferId, mockGlSharing->dllParam->getBufferInfo().bufferName); EXPECT_EQ(4096u, glBuffer->getSize()); size_t flagsExpected = CL_MEM_READ_WRITE; EXPECT_EQ(flagsExpected, glBuffer->getMemoryPropertiesFlags()); auto handler = glBuffer->peekSharingHandler(); ASSERT_NE(nullptr, handler); auto glHandler = static_cast(handler); EXPECT_EQ(glHandler->peekFunctionsHandler(), mockGlSharingFunctions); delete glBuffer; } class FailingMemoryManager : public MockMemoryManager { public: GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness) override { return nullptr; } }; TEST_F(glSharingTests, givenMockGlWhenGlBufferIsCreatedFromWrongHandleThenErrorAndNoBufferIsReturned) { auto tempMemoryManager = context.getMemoryManager(); auto memoryManager = std::unique_ptr(new FailingMemoryManager()); context.memoryManager = memoryManager.get(); auto retVal = CL_SUCCESS; auto glBuffer = GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, 0, &retVal); EXPECT_EQ(nullptr, glBuffer); EXPECT_EQ(CL_INVALID_GL_OBJECT, retVal); context.memoryManager = tempMemoryManager; } TEST_F(glSharingTests, givenContextWhenClCreateFromGlBufferIsCalledThenBufferIsReturned) { auto retVal = CL_SUCCESS; auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, glBuffer); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenContextWithoutSharingWhenClCreateFromGlBufferIsCalledThenErrorIsReturned) { context.resetSharingFunctions(CLGL_SHARING); auto retVal = CL_SUCCESS; auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); ASSERT_EQ(CL_INVALID_CONTEXT, retVal); ASSERT_EQ(nullptr, glBuffer); } GLboolean OSAPI mockGLAcquireSharedBuffer(GLDisplay, GLContext, GLContext, GLvoid *pResourceInfo) { return GL_FALSE; }; TEST_F(glSharingTests, givenContextWithSharingWhenClCreateFromGlBufferIsCalledWithIncorrectThenErrorIsReturned) { mockGlSharingFunctions->setGLAcquireSharedBufferMock(mockGLAcquireSharedBuffer); auto retVal = CL_SUCCESS; auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); ASSERT_EQ(CL_INVALID_GL_OBJECT, retVal); ASSERT_EQ(nullptr, glBuffer); } TEST_F(glSharingTests, givenContextAnd32BitAddressingWhenClCreateFromGlBufferIsCalledThenBufferIsReturned) { auto flagToRestore = DebugManager.flags.Force32bitAddressing.get(); DebugManager.flags.Force32bitAddressing.set(true); auto retVal = CL_SUCCESS; auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, glBuffer); EXPECT_TRUE(castToObject(glBuffer)->getGraphicsAllocation()->is32BitAllocation()); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); DebugManager.flags.Force32bitAddressing.set(flagToRestore); } TEST_F(glSharingTests, givenGlClBufferWhenAskedForCLGLGetInfoThenIdAndTypeIsReturned) { auto retVal = CL_SUCCESS; auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); cl_gl_object_type objectType = 0u; cl_GLuint objectId = 0u; retVal = clGetGLObjectInfo(glBuffer, &objectType, &objectId); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(objectType, (cl_gl_object_type)CL_GL_OBJECT_BUFFER); EXPECT_EQ(objectId, bufferId); retVal = clGetGLObjectInfo(glBuffer, &objectType, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(objectType, (cl_gl_object_type)CL_GL_OBJECT_BUFFER); retVal = clGetGLObjectInfo(glBuffer, nullptr, &objectId); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(objectId, bufferId); retVal = clGetGLObjectInfo(glBuffer, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenClBufferWhenAskedForCLGLGetInfoThenErrorIsReturned) { auto retVal = CL_SUCCESS; auto glBuffer = clCreateBuffer(&context, 0, 1, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); cl_gl_object_type objectType = 0u; cl_GLuint objectId = 0u; retVal = clGetGLObjectInfo(glBuffer, &objectType, &objectId); EXPECT_EQ(CL_INVALID_GL_OBJECT, retVal); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenClGLBufferWhenItIsAcquiredThenAcuqireCountIsIncremented) { auto retVal = CL_SUCCESS; auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); auto memObject = castToObject(glBuffer); EXPECT_FALSE(memObject->isMemObjZeroCopy()); EXPECT_FALSE(memObject->isReadWriteOnCpuAllowed()); auto currentGraphicsAllocation = memObject->getGraphicsAllocation(); memObject->peekSharingHandler()->acquire(memObject); EXPECT_EQ(2, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLGetCurrentContextCalled")); auto currentGraphicsAllocation2 = memObject->getGraphicsAllocation(); EXPECT_EQ(currentGraphicsAllocation2, currentGraphicsAllocation); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenClGLBufferWhenItIsAcquiredTwiceThenAcuqireIsNotCalled) { auto retVal = CL_SUCCESS; auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); auto memObject = castToObject(glBuffer); memObject->peekSharingHandler()->acquire(memObject); EXPECT_EQ(2, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLGetCurrentContextCalled")); memObject->peekSharingHandler()->acquire(memObject); EXPECT_EQ(2, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLGetCurrentContextCalled")); memObject->peekSharingHandler()->release(memObject); memObject->peekSharingHandler()->release(memObject); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLGetCurrentContextCalled")); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenClGLBufferWhenItIsCreatedAndGmmIsAvailableThenItIsUsedInGraphicsAllocation) { void *ptr = (void *)0x1000; auto gmm = new Gmm(context.getDevice(0)->getGmmClientContext(), ptr, 4096u, false); mockGlSharing->m_bufferInfoOutput.pGmmResInfo = gmm->gmmResourceInfo->peekHandle(); mockGlSharing->uploadDataToBufferInfo(); auto retVal = CL_SUCCESS; auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); auto memObject = castToObject(glBuffer); auto graphicsAllocation = memObject->getGraphicsAllocation(); ASSERT_NE(nullptr, graphicsAllocation->getDefaultGmm()); EXPECT_NE(nullptr, graphicsAllocation->getDefaultGmm()->gmmResourceInfo->peekHandle()); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); delete gmm; } TEST_F(glSharingTests, givenClGLBufferWhenItIsAcquiredTwiceAfterReleaseThenAcuqireIsIncremented) { auto retVal = CL_SUCCESS; auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); auto memObject = castToObject(glBuffer); memObject->peekSharingHandler()->acquire(memObject); EXPECT_EQ(2, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLGetCurrentContextCalled")); memObject->peekSharingHandler()->release(memObject); memObject->peekSharingHandler()->acquire(memObject); EXPECT_EQ(3, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); EXPECT_EQ(2, mockGlSharing->dllParam->getParam("GLGetCurrentContextCalled")); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenClGLBufferWhenItIsAcquireCountIsDecrementedToZeroThenCallReleaseFunction) { std::unique_ptr buffer(GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, bufferId, nullptr)); auto sharingHandler = buffer->peekSharingHandler(); sharingHandler->acquire(buffer.get()); sharingHandler->acquire(buffer.get()); sharingHandler->release(buffer.get()); EXPECT_EQ(0, mockGlSharing->dllParam->getParam("GLReleaseSharedBufferCalled")); sharingHandler->release(buffer.get()); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLReleaseSharedBufferCalled")); EXPECT_EQ(bufferId, mockGlSharing->dllParam->getBufferInfo().bufferName); } TEST_F(glSharingTests, givenClGLBufferWhenItIsAcquiredWithDifferentOffsetThenGraphicsAllocationContainsLatestOffsetValue) { auto retVal = CL_SUCCESS; auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); auto memObject = castToObject(glBuffer); auto graphicsAddress = memObject->getGraphicsAllocation()->getGpuAddress(); mockGlSharing->m_bufferInfoOutput.bufferOffset = 50u; mockGlSharing->uploadDataToBufferInfo(); memObject->peekSharingHandler()->acquire(memObject); auto offsetedGraphicsAddress = memObject->getGraphicsAllocation()->getGpuAddress(); EXPECT_EQ(offsetedGraphicsAddress, graphicsAddress + mockGlSharing->m_bufferInfoOutput.bufferOffset); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenHwCommandQueueWhenAcquireIsCalledThenAcquireCountIsIncremented) { auto retVal = CL_SUCCESS; auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); auto buffer = castToObject(glBuffer); EXPECT_EQ(0u, buffer->acquireCount); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); retVal = clEnqueueAcquireGLObjects(commandQueue, 1, &glBuffer, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); EXPECT_EQ(1u, buffer->acquireCount); retVal = clEnqueueReleaseGLObjects(commandQueue, 1, &glBuffer, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, buffer->acquireCount); retVal = clEnqueueAcquireGLObjects(commandQueue, 1, &glBuffer, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(3, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); EXPECT_EQ(1u, buffer->acquireCount); retVal = clReleaseCommandQueue(commandQueue); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenHwCommandQueueWhenAcquireIsCalledWithIncorrectWaitlistThenReturnError) { auto retVal = CL_SUCCESS; auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); auto buffer = castToObject(glBuffer); EXPECT_EQ(0u, buffer->acquireCount); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); retVal = clEnqueueAcquireGLObjects(commandQueue, 0, &glBuffer, 1, nullptr, nullptr); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); retVal = clReleaseCommandQueue(commandQueue); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenEnabledAsyncEventsHandlerWhenAcquireGlObjectsIsCalledWithIncompleteExternallySynchronizedEventThenItIsAddedToAsyncEventsHandler) { std::unique_ptr dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(true); auto handler = new MockHandler(false); auto oldHandler = NEO::platform()->setAsyncEventsHandler(std::unique_ptr(handler)); struct ExternallySynchronizedEvent : Event { ExternallySynchronizedEvent() : Event(nullptr, 0, 0, 0) { } bool isExternallySynchronized() const override { return true; } void updateExecutionStatus() override { ++updateCount; if (complete) { transitionExecutionStatus(CL_COMPLETE); } } bool complete = false; uint32_t updateCount = 0; }; auto *event = new ExternallySynchronizedEvent; cl_event clEvent = static_cast(event); auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, nullptr); auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, nullptr); EXPECT_EQ(CL_SUCCESS, clEnqueueAcquireGLObjects(commandQueue, 1, &glBuffer, 1, &clEvent, nullptr)); EXPECT_EQ(CL_SUCCESS, clReleaseCommandQueue(commandQueue)); EXPECT_EQ(CL_SUCCESS, clReleaseMemObject(glBuffer)); EXPECT_LT(CL_SUCCESS, event->peekExecutionStatus()); EXPECT_FALSE(handler->peekIsRegisterListEmpty()); uint32_t updateCount = event->updateCount; handler->process(); EXPECT_LT(updateCount, event->updateCount); updateCount = event->updateCount; handler->process(); EXPECT_LT(updateCount, event->updateCount); updateCount = event->updateCount; event->complete = true; handler->process(); EXPECT_LE(updateCount, event->updateCount); updateCount = event->updateCount; handler->process(); EXPECT_EQ(updateCount, event->updateCount); event->release(); NEO::platform()->setAsyncEventsHandler(std::move(oldHandler)); } TEST_F(glSharingTests, givenDisabledAsyncEventsHandlerWhenAcquireGlObjectsIsCalledWithIncompleteExternallySynchronizedEventThenItIsNotAddedToAsyncEventsHandler) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); auto handler = new MockHandler(false); auto oldHandler = NEO::platform()->setAsyncEventsHandler(std::unique_ptr(handler)); struct ExternallySynchronizedEvent : Event { ExternallySynchronizedEvent() : Event(nullptr, 0, 0, 0) { } bool isExternallySynchronized() const override { return true; } }; auto *event = new ExternallySynchronizedEvent; cl_event clEvent = static_cast(event); auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, nullptr); auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, nullptr); EXPECT_EQ(CL_SUCCESS, clEnqueueAcquireGLObjects(commandQueue, 1, &glBuffer, 1, &clEvent, nullptr)); EXPECT_EQ(CL_SUCCESS, clReleaseCommandQueue(commandQueue)); EXPECT_EQ(CL_SUCCESS, clReleaseMemObject(glBuffer)); EXPECT_LT(CL_SUCCESS, event->peekExecutionStatus()); EXPECT_TRUE(handler->peekIsRegisterListEmpty()); event->release(); NEO::platform()->setAsyncEventsHandler(std::move(oldHandler)); } TEST_F(glSharingTests, givenEnabledAsyncEventsHandlerWhenAcquireGlObjectsIsCalledWithIncompleteButNotExternallySynchronizedEventThenItIsNotAddedToAsyncEventsHandler) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableAsyncEventsHandler.set(false); auto handler = new MockHandler(false); auto oldHandler = NEO::platform()->setAsyncEventsHandler(std::unique_ptr(handler)); auto *event = new UserEvent; cl_event clEvent = static_cast(event); auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, nullptr); auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, nullptr); EXPECT_EQ(CL_SUCCESS, clEnqueueAcquireGLObjects(commandQueue, 1, &glBuffer, 1, &clEvent, nullptr)); EXPECT_EQ(CL_SUCCESS, clReleaseCommandQueue(commandQueue)); EXPECT_EQ(CL_SUCCESS, clReleaseMemObject(glBuffer)); EXPECT_LT(CL_SUCCESS, event->peekExecutionStatus()); EXPECT_TRUE(handler->peekIsRegisterListEmpty()); event->release(); NEO::platform()->setAsyncEventsHandler(std::move(oldHandler)); } TEST_F(glSharingTests, givenHwCommandQueueWhenReleaseIsCalledWithIncorrectWaitlistThenReturnError) { auto retVal = CL_SUCCESS; auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); auto buffer = castToObject(glBuffer); EXPECT_EQ(0u, buffer->acquireCount); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); retVal = clEnqueueAcquireGLObjects(commandQueue, 1, &glBuffer, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueReleaseGLObjects(commandQueue, 1, &glBuffer, 1, nullptr, nullptr); EXPECT_EQ(CL_INVALID_EVENT_WAIT_LIST, retVal); retVal = clEnqueueReleaseGLObjects(commandQueue, 1, &glBuffer, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseCommandQueue(commandQueue); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenContextWithoutSharingWhenAcquireIsCalledThenErrorIsReturned) { auto retVal = CL_SUCCESS; auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); auto buffer = castToObject(glBuffer); EXPECT_EQ(0u, buffer->acquireCount); context.releaseSharingFunctions(CLGL_SHARING); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); retVal = clEnqueueAcquireGLObjects(commandQueue, 1, &glBuffer, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); context.setSharingFunctions(mockGlSharingFunctions); retVal = clReleaseCommandQueue(commandQueue); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenContextWithoutSharingWhenReleaseIsCalledThenErrorIsReturned) { auto retVal = CL_SUCCESS; auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); auto buffer = castToObject(glBuffer); EXPECT_EQ(0u, buffer->acquireCount); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); retVal = clEnqueueAcquireGLObjects(commandQueue, 1, &glBuffer, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); EXPECT_EQ(1u, buffer->acquireCount); context.releaseSharingFunctions(CLGL_SHARING); retVal = clEnqueueReleaseGLObjects(commandQueue, 1, &glBuffer, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); context.setSharingFunctions(mockGlSharingFunctions); retVal = clReleaseCommandQueue(commandQueue); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenHwCommandQueueWhenAcquireAndReleaseCallsAreMadeWithEventsThenProperCmdTypeIsReturned) { cl_event retEvent; auto retVal = CL_SUCCESS; auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, &retVal); auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); retVal = clEnqueueAcquireGLObjects(commandQueue, 1, &glBuffer, 0, nullptr, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); cl_command_type cmdType = 0; size_t sizeReturned = 0; retVal = clGetEventInfo(retEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast(CL_COMMAND_ACQUIRE_GL_OBJECTS), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueReleaseGLObjects(commandQueue, 1, &glBuffer, 0, nullptr, &retEvent); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clGetEventInfo(retEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast(CL_COMMAND_RELEASE_GL_OBJECTS), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); retVal = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseCommandQueue(commandQueue); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } HWTEST_F(glSharingTests, givenCommandQueueWhenReleaseGlObjectIsCalledThenFinishIsCalled) { MockCommandQueueHw mockCmdQueue(&context, context.getDevice(0), nullptr); auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, nullptr); EXPECT_EQ(CL_SUCCESS, clEnqueueAcquireGLObjects(&mockCmdQueue, 1, &glBuffer, 0, nullptr, nullptr)); mockCmdQueue.taskCount = 5u; EXPECT_EQ(CL_SUCCESS, clEnqueueReleaseGLObjects(&mockCmdQueue, 1, &glBuffer, 0, nullptr, nullptr)); EXPECT_EQ(5u, mockCmdQueue.latestTaskCountWaited); clReleaseMemObject(glBuffer); } TEST_F(glSharingTests, givenMockGLWhenFunctionsAreCalledThenCallsAreReceived) { auto ptrToStruct = &mockGlSharing->m_clGlResourceInfo; auto glDisplay = (GLDisplay)1; auto glContext = (GLContext)1; mockGlSharing->overrideGetCurrentValues(glContext, glDisplay); EXPECT_EQ(1u, mockGlSharingFunctions->setSharedOCLContextState()); EXPECT_EQ(1u, mockGlSharingFunctions->acquireSharedBufferINTEL(ptrToStruct)); EXPECT_EQ(1u, mockGlSharingFunctions->acquireSharedRenderBuffer(ptrToStruct)); EXPECT_EQ(1u, mockGlSharingFunctions->acquireSharedTexture(ptrToStruct)); EXPECT_EQ(1u, mockGlSharingFunctions->releaseSharedBufferINTEL(ptrToStruct)); EXPECT_EQ(1u, mockGlSharingFunctions->releaseSharedRenderBuffer(ptrToStruct)); EXPECT_EQ(1u, mockGlSharingFunctions->releaseSharedTexture(ptrToStruct)); EXPECT_EQ(glContext, mockGlSharingFunctions->getCurrentContext()); EXPECT_EQ(glDisplay, mockGlSharingFunctions->getCurrentDisplay()); EXPECT_EQ(1u, mockGlSharingFunctions->makeCurrent(glContext, glDisplay)); EXPECT_EQ(1, mockGlSharing->dllParam->getGLSetSharedOCLContextStateReturnedValue()); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLAcquireSharedBufferCalled")); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLAcquireSharedRenderBufferCalled")); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLAcquireSharedTextureCalled")); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLReleaseSharedBufferCalled")); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLReleaseSharedRenderBufferCalled")); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLReleaseSharedTextureCalled")); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLGetCurrentContextCalled")); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLGetCurrentDisplayCalled")); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLMakeCurrentCalled")); } TEST(glSharingBasicTest, GivenSharingFunctionsWhenItIsConstructedThenOglContextFunctionIsCalled) { GLType GLHDCType = 0; GLContext GLHGLRCHandle = 0; GLDisplay GLHDCHandle = 0; glDllHelper getDllParam; GlSharingFunctionsMock glSharingFunctions(GLHDCType, GLHGLRCHandle, GLHGLRCHandle, GLHDCHandle); EXPECT_EQ(1, getDllParam.getGLSetSharedOCLContextStateReturnedValue()); } TEST(glSharingBasicTest, givenInvalidExtensionNameWhenCheckGLExtensionSupportedThenReturnFalse) { MockGLSharingFunctions glSharingFunctions; const unsigned char invalidExtension[] = "InvalidExtensionName"; bool RetVal = glSharingFunctions.isOpenGlExtensionSupported(invalidExtension); EXPECT_FALSE(RetVal); } TEST(glSharingBasicTest, givenglGetIntegervIsNullWhenCheckGLExtensionSupportedThenReturnFalse) { MockGLSharingFunctions glSharingFunctions; glSharingFunctions.glGetIntegerv = nullptr; const unsigned char invalidExtension[] = "InvalidExtensionName"; bool RetVal = glSharingFunctions.isOpenGlExtensionSupported(invalidExtension); EXPECT_FALSE(RetVal); } TEST(glSharingBasicTest, givenValidExtensionNameWhenCheckGLExtensionSupportedThenReturnTrue) { MockGLSharingFunctions glSharingFunctions; const unsigned char supportGLOES[] = "GL_OES_framebuffer_object"; bool RetVal = glSharingFunctions.isOpenGlExtensionSupported(supportGLOES); EXPECT_TRUE(RetVal); } TEST(glSharingBasicTest, givenWhenCheckGLSharingSupportedThenReturnTrue) { MockGLSharingFunctions glSharingFunctions; bool RetVal = glSharingFunctions.isOpenGlSharingSupported(); EXPECT_TRUE(RetVal); } TEST(glSharingBasicTest, givenVendorisNullWhenCheckGLSharingSupportedThenReturnFalse) { auto invalidGetStringFcn = [](GLenum name) { return (const GLubyte *)""; }; MockGLSharingFunctions glSharingFunctions; glSharingFunctions.glGetString = invalidGetStringFcn; bool RetVal = glSharingFunctions.isOpenGlSharingSupported(); EXPECT_FALSE(RetVal); } TEST(glSharingBasicTest, givenVersionisNullWhenCheckGLSharingSupportedThenReturnFalse) { MockGLSharingFunctions glSharingFunctions; glSharingFunctions.dllParam->glSetString("", GL_VERSION); // version returns null bool RetVal = glSharingFunctions.isOpenGlSharingSupported(); EXPECT_FALSE(RetVal); glSharingFunctions.dllParam->glSetString("Int..", GL_VENDOR); RetVal = glSharingFunctions.isOpenGlSharingSupported(); EXPECT_FALSE(RetVal); } TEST(glSharingBasicTest, givenVersionisGlesWhenCheckGLSharingSupportedThenReturnFalse) { MockGLSharingFunctions glSharingFunctions; glSharingFunctions.dllParam->glSetString("OpenGL ES", GL_VERSION); bool RetVal = glSharingFunctions.isOpenGlSharingSupported(); EXPECT_TRUE(RetVal); glSharingFunctions.dllParam->glSetString("OpenGL ES 1.", GL_VERSION); RetVal = glSharingFunctions.isOpenGlSharingSupported(); EXPECT_TRUE(RetVal); glSharingFunctions.dllParam->glSetString("2.0", GL_VERSION); RetVal = glSharingFunctions.isOpenGlSharingSupported(); EXPECT_TRUE(RetVal); glSharingFunctions.dllParam->glSetStringi("GL_EXT_framebuffer_o...", 1); RetVal = glSharingFunctions.isOpenGlSharingSupported(); EXPECT_FALSE(RetVal); glSharingFunctions.dllParam->glSetStringi("GL_EXT_framebuffer_object", 1); RetVal = glSharingFunctions.isOpenGlSharingSupported(); EXPECT_TRUE(RetVal); glSharingFunctions.dllParam->glSetString("OpenGL ES 1.", GL_VERSION); glSharingFunctions.dllParam->glSetStringi("GL_OES_framebuffer_o...", 0); RetVal = glSharingFunctions.isOpenGlSharingSupported(); EXPECT_FALSE(RetVal); } TEST(glSharingBasicTest, givensetSharedOCLContextStateWhenCallThenCorrectValue) { MockGLSharingFunctions glSharingFunctions; glSharingFunctions.dllParam->setGLSetSharedOCLContextStateReturnedValue(0u); EXPECT_EQ(0u, glSharingFunctions.setSharedOCLContextState()); glSharingFunctions.dllParam->setGLSetSharedOCLContextStateReturnedValue(1u); EXPECT_EQ(1u, glSharingFunctions.setSharedOCLContextState()); } TEST(glSharingBasicTest, givenGlSharingFunctionsWhenItIsConstructedThenFunctionsAreLoaded) { GLType GLHDCType = 0; GLContext GLHGLRCHandle = 0; GLDisplay GLHDCHandle = 0; GlSharingFunctionsMock glSharingFunctions(GLHDCType, GLHGLRCHandle, GLHGLRCHandle, GLHDCHandle); EXPECT_NE(nullptr, glSharingFunctions.GLGetCurrentContext); EXPECT_NE(nullptr, glSharingFunctions.GLGetCurrentDisplay); EXPECT_NE(nullptr, glSharingFunctions.glGetString); EXPECT_NE(nullptr, glSharingFunctions.glGetIntegerv); EXPECT_NE(nullptr, glSharingFunctions.pfnWglCreateContext); EXPECT_NE(nullptr, glSharingFunctions.pfnWglDeleteContext); EXPECT_NE(nullptr, glSharingFunctions.pfnWglShareLists); EXPECT_NE(nullptr, glSharingFunctions.wglMakeCurrent); EXPECT_NE(nullptr, glSharingFunctions.GLSetSharedOCLContextState); EXPECT_NE(nullptr, glSharingFunctions.GLAcquireSharedBuffer); EXPECT_NE(nullptr, glSharingFunctions.GLReleaseSharedBuffer); EXPECT_NE(nullptr, glSharingFunctions.GLAcquireSharedRenderBuffer); EXPECT_NE(nullptr, glSharingFunctions.GLReleaseSharedRenderBuffer); EXPECT_NE(nullptr, glSharingFunctions.GLAcquireSharedTexture); EXPECT_NE(nullptr, glSharingFunctions.GLReleaseSharedTexture); EXPECT_NE(nullptr, glSharingFunctions.GLRetainSync); EXPECT_NE(nullptr, glSharingFunctions.GLReleaseSync); EXPECT_NE(nullptr, glSharingFunctions.GLGetSynciv); EXPECT_NE(nullptr, glSharingFunctions.glGetStringi); } TEST(glSharingBasicTest, givenNumEntriesLowerThanSupportedFormatsWhenGettingSupportedFormatsThenOnlyNumEntiresAreReturned) { MockGLSharingFunctions glSharingFunctions; cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; cl_uint numImageFormats = 0; cl_GLenum glFormats[3] = {}; auto retVal = glSharingFunctions.getSupportedFormats(flags, image_type, 1, glFormats, &numImageFormats); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(static_cast(GlSharing::gLToCLFormats.size()), numImageFormats); EXPECT_NE(0u, glFormats[0]); EXPECT_EQ(0u, glFormats[1]); EXPECT_EQ(0u, glFormats[2]); } TEST(glSharingBasicTest, givenCorrectFlagsWhenGettingSupportedFormatsThenCorrectListIsReturned) { MockGLSharingFunctions glSharingFunctions; cl_mem_flags flags[] = {CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE, CL_MEM_KERNEL_READ_AND_WRITE}; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; cl_GLenum glFormats[3] = {}; cl_uint numImageFormats = 0; for (auto flag : flags) { auto result = glSharingFunctions.getSupportedFormats(flag, image_type, arrayCount(glFormats), glFormats, &numImageFormats); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(GlSharing::gLToCLFormats.size()), numImageFormats); for (uint32_t formatIndex = 0; formatIndex < arrayCount(glFormats); formatIndex++) { EXPECT_NE(GlSharing::gLToCLFormats.end(), GlSharing::gLToCLFormats.find(glFormats[formatIndex])); } } } TEST(glSharingBasicTest, givenSupportedImageTypesWhenGettingSupportedFormatsThenCorrectListIsReturned) { MockGLSharingFunctions glSharingFunctions; cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_types[] = {CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE3D, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE1D_BUFFER}; cl_GLenum glFormats[3] = {}; cl_uint numImageFormats = 0; for (auto image_type : image_types) { auto result = glSharingFunctions.getSupportedFormats(flags, image_type, arrayCount(glFormats), glFormats, &numImageFormats); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(GlSharing::gLToCLFormats.size()), numImageFormats); for (auto glFormat : glFormats) { EXPECT_NE(GlSharing::gLToCLFormats.end(), GlSharing::gLToCLFormats.find(glFormat)); } } } TEST(glSharingBasicTest, givenZeroNumEntriesWhenGettingSupportedFormatsThenNumFormatsIsReturned) { MockGLSharingFunctions glSharingFunctions; cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; cl_uint numImageFormats = 0; auto result = glSharingFunctions.getSupportedFormats(flags, image_type, 0, nullptr, &numImageFormats); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(static_cast(GlSharing::gLToCLFormats.size()), numImageFormats); } TEST(glSharingBasicTest, givenNullNumImageFormatsWhenGettingSupportedFormatsThenNumFormatsIsNotDereferenced) { MockGLSharingFunctions glSharingFunctions; cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; auto result = glSharingFunctions.getSupportedFormats(flags, image_type, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, result); } TEST(glSharingBasicTest, givenInvalidImageTypeWhenGettingSupportedFormatsThenIvalidValueErrorIsReturned) { MockGLSharingFunctions glSharingFunctions; cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; cl_GLenum glFormats[3] = {}; cl_uint numImageFormats = 0; auto result = glSharingFunctions.getSupportedFormats(flags, image_type, arrayCount(glFormats), glFormats, &numImageFormats); EXPECT_EQ(CL_INVALID_VALUE, result); EXPECT_EQ(0u, numImageFormats); } TEST(glSharingBasicTest, givenInvalidFlagsWhenGettingSupportedFormatsThenIvalidValueErrorIsReturned) { MockGLSharingFunctions glSharingFunctions; cl_mem_flags flags = CL_MEM_NO_ACCESS_INTEL; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; cl_GLenum glFormats[3] = {}; cl_uint numImageFormats = 0; auto result = glSharingFunctions.getSupportedFormats(flags, image_type, arrayCount(glFormats), glFormats, &numImageFormats); EXPECT_EQ(CL_INVALID_VALUE, result); EXPECT_EQ(0u, numImageFormats); } TEST_F(glSharingTests, givenContextWhenCreateFromSharedBufferThenSharedImageIsReturned) { auto retVal = CL_SUCCESS; auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, glBuffer); auto parentbBuffer = castToObject(glBuffer); cl_image_format format = {CL_RGBA, CL_FLOAT}; cl_image_desc image_desc = {CL_MEM_OBJECT_IMAGE1D_BUFFER, 1, 1, 1, 1, 0, 0, 0, 0, {glBuffer}}; cl_mem image = clCreateImage(&context, CL_MEM_READ_WRITE, &format, &image_desc, 0, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, glBuffer); auto childImage = castToObject(image); EXPECT_EQ(parentbBuffer->peekSharingHandler(), childImage->peekSharingHandler()); retVal = clReleaseMemObject(image); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenClGLBufferWhenMapAndUnmapBufferIsCalledThenCopyOnGpu) { auto retVal = CL_SUCCESS; auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); auto buffer = castToObject(glBuffer); EXPECT_EQ(buffer->getCpuAddressForMemoryTransfer(), nullptr); // no cpu ptr auto commandQueue = CommandQueue::create(&context, context.getDevice(0), 0, false, retVal); ASSERT_EQ(CL_SUCCESS, retVal); size_t offset = 1; auto taskCount = commandQueue->taskCount; auto mappedPtr = clEnqueueMapBuffer(commandQueue, glBuffer, CL_TRUE, CL_MAP_WRITE, offset, (buffer->getSize() - offset), 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(taskCount + 1, commandQueue->taskCount); MapInfo mapInfo; EXPECT_TRUE(buffer->findMappedPtr(mappedPtr, mapInfo)); EXPECT_EQ(mappedPtr, ptrOffset(buffer->getAllocatedMapPtr(), offset)); EXPECT_EQ(mapInfo.size[0], buffer->getSize() - offset); EXPECT_EQ(mapInfo.offset[0], offset); retVal = commandQueue->enqueueUnmapMemObject(buffer, mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(taskCount + 2, commandQueue->taskCount); EXPECT_FALSE(buffer->findMappedPtr(mappedPtr, mapInfo)); // delete in destructor retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); delete commandQueue; } TEST_F(glSharingTests, givenClGLBufferWhenretValIsNotPassedToCreateFunctionThenBufferIsCreated) { auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, nullptr); ASSERT_NE(nullptr, glBuffer); auto retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(glSharingTests, givenClGLBufferWhenMapAndUnmapBufferIsCalledTwiceThenReuseStorage) { auto retVal = CL_SUCCESS; auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); auto buffer = castToObject(glBuffer); EXPECT_EQ(buffer->getCpuAddressForMemoryTransfer(), nullptr); // no cpu ptr auto commandQueue = CommandQueue::create(&context, context.getDevice(0), 0, false, retVal); ASSERT_EQ(CL_SUCCESS, retVal); auto mappedPtr = clEnqueueMapBuffer(commandQueue, glBuffer, CL_TRUE, CL_MAP_READ, 0, buffer->getSize(), 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueUnmapMemObject(commandQueue, glBuffer, mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto mappedPtr2 = clEnqueueMapBuffer(commandQueue, glBuffer, CL_TRUE, CL_MAP_READ, 0, buffer->getSize(), 0, nullptr, nullptr, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mappedPtr, mappedPtr2); retVal = clEnqueueUnmapMemObject(commandQueue, glBuffer, mappedPtr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(glBuffer); EXPECT_EQ(CL_SUCCESS, retVal); delete commandQueue; } TEST(APIclCreateEventFromGLsyncKHR, givenInvalidContexWhenCreateThenReturnError) { cl_int retVal = CL_SUCCESS; cl_GLsync sync = {0}; auto event = clCreateEventFromGLsyncKHR(nullptr, sync, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, event); } TEST_F(glSharingTests, givenContextWithoutSharingWhenCreateEventFromGLThenErrorIsReturned) { context.resetSharingFunctions(CLGL_SHARING); cl_int retVal = CL_SUCCESS; cl_GLsync sync = {0}; auto event = clCreateEventFromGLsyncKHR(&context, sync, &retVal); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); EXPECT_EQ(nullptr, event); } TEST(glSharingContextSwitch, givenContextOrBkpContextHandleAsCurrentWhenSwitchAttemptedThenDontMakeSwitch) { GLType type = 0; auto context = (GLContext)1; auto display = (GLDisplay)2; auto bkpContext = (GLContext)3; MockGlSharing mockGlSharing(type, context, bkpContext, display); mockGlSharing.overrideGetCurrentValues(context, display); { GLContextGuard guard(*mockGlSharing.sharingFunctions); EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().currentContext == context); EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().currentDisplay == display); } EXPECT_EQ(0, mockGlSharing.dllParam->getParam("GLMakeCurrentCalled")); EXPECT_EQ(1, mockGlSharing.dllParam->getParam("GLGetCurrentContextCalled")); EXPECT_EQ(1, mockGlSharing.dllParam->getParam("GLGetCurrentDisplayCalled")); mockGlSharing.overrideGetCurrentValues(bkpContext, display); { GLContextGuard guard(*mockGlSharing.sharingFunctions); EXPECT_EQ(0, mockGlSharing.dllParam->getParam("GLMakeCurrentCalled")); EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().currentContext == bkpContext); } EXPECT_EQ(1, mockGlSharing.dllParam->getParam("GLMakeCurrentCalled")); // destructor EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().madeCurrentContext == bkpContext); } TEST(glSharingContextSwitch, givenUnknownCurrentContextAndNoFailsOnCallWhenSwitchAttemptedThenMakeSwitchToCtxHandle) { GLType type = 0; auto context = (GLContext)1; auto display = (GLDisplay)2; auto bkpContext = (GLContext)3; auto unknownContext = (GLContext)4; MockGlSharing mockGlSharing(type, context, bkpContext, display); mockGlSharing.overrideGetCurrentValues(unknownContext, display, false); { GLContextGuard guard(*mockGlSharing.sharingFunctions); EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().currentContext == unknownContext); EXPECT_EQ(1, mockGlSharing.dllParam->getParam("GLMakeCurrentCalled")); EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().madeCurrentContext == context); } EXPECT_EQ(2, mockGlSharing.dllParam->getParam("GLMakeCurrentCalled")); // destructor EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().madeCurrentContext == unknownContext); } TEST(glSharingContextSwitch, givenUnknownCurrentContextAndOneFailOnCallWhenSwitchAttemptedThenMakeSwitchToBkpCtxHandle) { GLType type = 0; auto context = (GLContext)1; auto display = (GLDisplay)2; auto bkpContext = (GLContext)3; auto unknownContext = (GLContext)4; MockGlSharing mockGlSharing(type, context, bkpContext, display); mockGlSharing.overrideGetCurrentValues(unknownContext, display, true, 1); { GLContextGuard guard(*mockGlSharing.sharingFunctions); EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().currentContext == unknownContext); EXPECT_EQ(2, mockGlSharing.dllParam->getParam("GLMakeCurrentCalled")); EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().madeCurrentContext == bkpContext); } EXPECT_EQ(3, mockGlSharing.dllParam->getParam("GLMakeCurrentCalled")); // destructor EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().madeCurrentContext == unknownContext); } TEST(glSharingContextSwitch, givenUnknownCurrentContextAndMultipleFailOnCallWhenSwitchAttemptedThenMakeSwitchToBkpCtxHandleUntilSuccess) { GLType type = 0; auto context = (GLContext)1; auto display = (GLDisplay)2; auto bkpContext = (GLContext)3; auto unknownContext = (GLContext)4; MockGlSharing mockGlSharing(type, context, bkpContext, display); mockGlSharing.overrideGetCurrentValues(unknownContext, display, true, 5); { GLContextGuard guard(*mockGlSharing.sharingFunctions); EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().currentContext == unknownContext); EXPECT_EQ(6, mockGlSharing.dllParam->getParam("GLMakeCurrentCalled")); EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().madeCurrentContext == bkpContext); } EXPECT_EQ(7, mockGlSharing.dllParam->getParam("GLMakeCurrentCalled")); // destructor EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().madeCurrentContext == unknownContext); } TEST(glSharingContextSwitch, givenZeroCurrentContextWhenSwitchAttemptedThenMakeSwitchToBkpCtxHandle) { GLType type = 0; auto context = (GLContext)1; auto display = (GLDisplay)2; auto bkpContext = (GLContext)3; auto zeroContext = (GLContext)0; MockGlSharing mockGlSharing(type, context, bkpContext, display); mockGlSharing.overrideGetCurrentValues(zeroContext, display, false); { GLContextGuard guard(*mockGlSharing.sharingFunctions); EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().currentContext == zeroContext); EXPECT_EQ(1, mockGlSharing.dllParam->getParam("GLMakeCurrentCalled")); EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().madeCurrentContext == bkpContext); } EXPECT_EQ(2, mockGlSharing.dllParam->getParam("GLMakeCurrentCalled")); // destructor EXPECT_TRUE(mockGlSharing.dllParam->getGlMockReturnedValues().madeCurrentContext == zeroContext); } TEST(glSharingContextSwitch, givenSharingFunctionsWhenGlDeleteContextIsNotPresentThenItIsNotCalled) { auto glSharingFunctions = new GLSharingFunctionsWindows(); glDllHelper dllParam; auto currentGlDeleteContextCalledCount = dllParam.getParam("GLDeleteContextCalled"); delete glSharingFunctions; EXPECT_EQ(currentGlDeleteContextCalledCount, dllParam.getParam("GLDeleteContextCalled")); } HWTEST_F(glSharingTests, givenSyncObjectWhenCreateEventIsCalledThenCreateGLSyncObj) { cl_int retVal = CL_SUCCESS; GLsync glSync = {0}; auto event = clCreateEventFromGLsyncKHR(&context, glSync, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, event); auto &csr = reinterpret_cast(context.getDevice(0))->getUltCommandStreamReceiver(); csr.taskLevel = 123; auto eventObj = castToObject(event); EXPECT_TRUE(eventObj->getCommandType() == CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR); EXPECT_TRUE(eventObj->peekExecutionStatus() == CL_SUBMITTED); EXPECT_EQ(CompletionStamp::levelNotReady, eventObj->taskLevel); EXPECT_EQ(CompletionStamp::levelNotReady, eventObj->getTaskLevel()); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLRetainSyncCalled")); eventObj->setStatus(CL_COMPLETE); EXPECT_EQ(0u, eventObj->getTaskLevel()); clReleaseEvent(event); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLReleaseSyncCalled")); } HWTEST_F(glSharingTests, givenEventCreatedFromFenceObjectWhenItIsPassedToAcquireThenItsStatusIsUpdated) { GLsync glSync = {0}; auto retVal = CL_SUCCESS; auto event = clCreateEventFromGLsyncKHR(&context, glSync, &retVal); auto glBuffer = clCreateFromGLBuffer(&context, 0, bufferId, &retVal); auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, &retVal); mockGlSharing->setGetSyncivReturnValue(GL_SIGNALED); auto neoEvent = castToObject(event); EXPECT_FALSE(neoEvent->isReadyForSubmission()); retVal = clEnqueueAcquireGLObjects(commandQueue, 1, &glBuffer, 1, &event, nullptr); EXPECT_TRUE(neoEvent->isReadyForSubmission()); EXPECT_EQ(CL_SUCCESS, retVal); clReleaseCommandQueue(commandQueue); clReleaseMemObject(glBuffer); clReleaseEvent(event); } TEST_F(glSharingTests, glSyncEventReportsAsExternallySynchronized) { GLsync glSync = {0}; auto syncEvent = GlSyncEvent::create(context, glSync, nullptr); ASSERT_NE(nullptr, syncEvent); EXPECT_TRUE(syncEvent->isExternallySynchronized()); syncEvent->release(); } TEST_F(glSharingTests, givenSyncEventWhenUpdateExecutionStatusIsCalledThenGLGetSyncivCalled) { GLsync glSync = {0}; auto syncEvent = GlSyncEvent::create(context, glSync, nullptr); ASSERT_NE(nullptr, syncEvent); mockGlSharing->setGetSyncivReturnValue(GL_UNSIGNALED); syncEvent->updateExecutionStatus(); EXPECT_EQ(1, mockGlSharing->dllParam->getParam("GLGetSyncivCalled")); EXPECT_TRUE(syncEvent->updateEventAndReturnCurrentStatus() == CL_SUBMITTED); EXPECT_EQ(2, mockGlSharing->dllParam->getParam("GLGetSyncivCalled")); // updateExecutionStatus called in peekExecutionStatus mockGlSharing->setGetSyncivReturnValue(GL_SIGNALED); syncEvent->updateExecutionStatus(); EXPECT_EQ(3, mockGlSharing->dllParam->getParam("GLGetSyncivCalled")); EXPECT_TRUE(syncEvent->peekExecutionStatus() == CL_COMPLETE); delete syncEvent; } TEST_F(glSharingTests, givenContextWhenEmptySharingTableEmptyThenReturnsNullptr) { MockContext context; context.clearSharingFunctions(); GLSharingFunctions *sharingF = context.getSharing(); EXPECT_EQ(sharingF, nullptr); } TEST_F(glSharingTests, givenUnknownBaseEventWhenGetGlArbSyncEventIsCalledThenNullptrIsReturned) { auto *sharing = context.getSharing(); ASSERT_NE(nullptr, sharing); auto event = new MockEvent(); MockContext context; EXPECT_EQ(nullptr, sharing->getGlArbSyncEvent(*event)); event->release(); } TEST_F(glSharingTests, givenKnownBaseEventWhenGetGlArbSyncEventIsCalledThenProperArbEventIsReturned) { auto *sharing = static_cast(context.getSharing()); ASSERT_NE(nullptr, sharing); auto baseEvent = new MockEvent; auto arbSyncEvent = reinterpret_cast(0x1c); sharing->glArbEventMapping[baseEvent] = arbSyncEvent; EXPECT_EQ(arbSyncEvent, sharing->getGlArbSyncEvent(*baseEvent)); baseEvent->release(); } TEST_F(glSharingTests, givenKnownBaseEventWhenRemoveGlArbSyncEventMappingIsCalledThenProperArbEventIsRemovedFromMap) { auto *sharing = static_cast(context.getSharing()); ASSERT_NE(nullptr, sharing); auto baseEvent = new MockEvent; auto arbSyncEvent = reinterpret_cast(0x1c); sharing->glArbEventMapping[baseEvent] = arbSyncEvent; EXPECT_NE(sharing->glArbEventMapping.end(), sharing->glArbEventMapping.find(baseEvent)); sharing->removeGlArbSyncEventMapping(*baseEvent); EXPECT_EQ(sharing->glArbEventMapping.end(), sharing->glArbEventMapping.find(baseEvent)); baseEvent->release(); } TEST_F(glSharingTests, givenUnknownBaseEventWhenRemoveGlArbSyncEventMappingIsCalledThenProperArbEventIsRemovedFromMap) { auto *sharing = static_cast(context.getSharing()); ASSERT_NE(nullptr, sharing); auto baseEvent = new MockEvent; auto unknownBaseEvent = new MockEvent; auto arbSyncEvent = reinterpret_cast(0x1c); sharing->glArbEventMapping[baseEvent] = arbSyncEvent; EXPECT_NE(sharing->glArbEventMapping.end(), sharing->glArbEventMapping.find(baseEvent)); EXPECT_EQ(sharing->glArbEventMapping.end(), sharing->glArbEventMapping.find(unknownBaseEvent)); sharing->removeGlArbSyncEventMapping(*unknownBaseEvent); EXPECT_NE(sharing->glArbEventMapping.end(), sharing->glArbEventMapping.find(baseEvent)); EXPECT_EQ(sharing->glArbEventMapping.end(), sharing->glArbEventMapping.find(unknownBaseEvent)); unknownBaseEvent->release(); baseEvent->release(); } TEST_F(glSharingTests, givenUnknownBaseEventWhenGetOrCreateGlArbSyncEventIsCalledThenNewArbEventIsReturned) { auto *sharing = static_cast(context.getSharing()); sharing->pfnGlArbSyncObjectCleanup = glArbSyncObjectCleanupMockDoNothing; ASSERT_NE(nullptr, sharing); auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, nullptr); ASSERT_NE(nullptr, commandQueue); auto baseEvent = new Event(castToObjectOrAbort(commandQueue), CL_COMMAND_RELEASE_GL_OBJECTS, -1, -1); auto syncEv = sharing->getOrCreateGlArbSyncEvent>(*baseEvent); ASSERT_NE(nullptr, syncEv); EXPECT_NE(nullptr, syncEv->getSyncInfo()); std::unique_ptr osInterface{new OSInterface}; static_cast *>(syncEv)->osInterface = osInterface.get(); syncEv->release(); clReleaseCommandQueue(commandQueue); } TEST_F(glSharingTests, givenKnownBaseEventWhenGetOrCreateGlArbSyncEventIsCalledThenOldArbEventIsReused) { auto *sharing = static_cast(context.getSharing()); sharing->pfnGlArbSyncObjectCleanup = glArbSyncObjectCleanupMockDoNothing; ASSERT_NE(nullptr, sharing); auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, nullptr); ASSERT_NE(nullptr, commandQueue); auto baseEvent = new Event(castToObjectOrAbort(commandQueue), CL_COMMAND_RELEASE_GL_OBJECTS, -1, -1); auto syncEv = sharing->getOrCreateGlArbSyncEvent>(*baseEvent); ASSERT_NE(nullptr, syncEv); EXPECT_EQ(syncEv, sharing->getOrCreateGlArbSyncEvent>(*baseEvent)); std::unique_ptr osInterface{new OSInterface}; static_cast *>(syncEv)->osInterface = osInterface.get(); syncEv->release(); clReleaseCommandQueue(commandQueue); } TEST_F(glSharingTests, WhenArbSyncEventCreationFailsThenGetOrCreateGlArbSyncEventReturnsNull) { auto *sharing = static_cast(context.getSharing()); ASSERT_NE(nullptr, sharing); auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, nullptr); ASSERT_NE(nullptr, commandQueue); auto baseEvent = new Event(castToObjectOrAbort(commandQueue), CL_COMMAND_RELEASE_GL_OBJECTS, -1, -1); auto syncEv = sharing->getOrCreateGlArbSyncEvent>(*baseEvent); EXPECT_EQ(nullptr, syncEv); baseEvent->release(); clReleaseCommandQueue(commandQueue); } TEST_F(glSharingTests, whenGetGlDeviceHandleIsCalledThenProperHandleIsReturned) { auto *sharing = static_cast(context.getSharing()); ASSERT_NE(nullptr, sharing); sharing->GLDeviceHandle = 0x2c; EXPECT_EQ(0x2cU, sharing->getGLDeviceHandle()); } TEST_F(glSharingTests, whenGetGlContextHandleIsCalledThenProperHandleIsReturned) { auto *sharing = static_cast(context.getSharing()); ASSERT_NE(nullptr, sharing); sharing->GLContextHandle = 0x2c; EXPECT_EQ(0x2cU, sharing->getGLContextHandle()); } TEST_F(glSharingTests, givenClGLBufferWhenCreatedThenSharedBufferAllocatoinTypeIsSet) { std::unique_ptr buffer(GlBuffer::createSharedGlBuffer(&context, CL_MEM_READ_WRITE, bufferId, nullptr)); ASSERT_NE(nullptr, buffer->getGraphicsAllocation()); EXPECT_EQ(GraphicsAllocation::AllocationType::SHARED_BUFFER, buffer->getGraphicsAllocation()->getAllocationType()); } using clGetSupportedGLTextureFormatsINTELTests = glSharingTests; TEST_F(clGetSupportedGLTextureFormatsINTELTests, givenContextWithoutGlSharingWhenGettingFormatsThenInvalidContextErrorIsReturned) { MockContext context; auto retVal = clGetSupportedGLTextureFormatsINTEL(&context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } TEST_F(clGetSupportedGLTextureFormatsINTELTests, givenValidInputsWhenGettingFormatsThenSuccesAndValidFormatsAreReturned) { cl_uint numFormats = 0; cl_GLenum glFormats[2] = {}; auto glFormatsCount = static_cast(arrayCount(glFormats)); auto retVal = clGetSupportedGLTextureFormatsINTEL(&context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, glFormatsCount, glFormats, &numFormats); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(0u, numFormats); for (uint32_t i = 0; i < glFormatsCount; i++) { EXPECT_NE(GlSharing::gLToCLFormats.end(), GlSharing::gLToCLFormats.find(glFormats[i])); } } compute-runtime-20.13.16352/opencl/test/unit_test/sharings/gl/windows/gl_texture_tests.cpp000066400000000000000000000650331363734646600316630ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/gmm_types_converter.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/platform/platform.h" #include "opencl/source/sharings/gl/gl_texture.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/gl/windows/mock_gl_sharing_windows.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_gmm.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gtest/gtest.h" using namespace NEO; class GlSharingTextureTests : public ::testing::Test { public: // temp solution - we need to query size from GMM: class TempMM : public MockMemoryManager { public: using MockMemoryManager::MockMemoryManager; GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness) override { auto alloc = OsAgnosticMemoryManager::createGraphicsAllocationFromSharedHandle(handle, properties, requireSpecificBitness); if (useForcedGmm) { alloc->setDefaultGmm(forceGmm.get()); } return alloc; } void freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation) override { if (useForcedGmm) { forceGmm.release(); } OsAgnosticMemoryManager::freeGraphicsMemoryImpl(gfxAllocation); } bool mapAuxGpuVA(GraphicsAllocation *graphicsAllocation) override { mapAuxGpuVACalled++; return false; } uint32_t mapAuxGpuVACalled = 0u; size_t forceAllocationSize; std::unique_ptr forceGmm; bool useForcedGmm = true; }; void SetUp() override { executionEnvironment = platform()->peekExecutionEnvironment(); imgDesc = {}; imgDesc.image_type = CL_MEM_OBJECT_IMAGE1D; imgDesc.image_width = 10; auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); tempMM = new TempMM(*executionEnvironment); executionEnvironment->memoryManager.reset(tempMM); device = std::make_unique(MockDevice::create(executionEnvironment, 0)); clContext = std::make_unique(device.get()); mockGlSharingFunctions = glSharing->sharingFunctions.release(); clContext->setSharingFunctions(mockGlSharingFunctions); tempMM->forceGmm = MockGmm::queryImgParams(device->getGmmClientContext(), imgInfo); tempMM->forceAllocationSize = textureSize; textureSize = imgInfo.size; textureId = 1; } void setUnifiedAuxSurf() { tempMM->useForcedGmm = true; auto mockGmmResInfo = reinterpret_cast<::testing::NiceMock *>(tempMM->forceGmm->gmmResourceInfo.get()); mockGmmResInfo->setUnifiedAuxTranslationCapable(); } ExecutionEnvironment *executionEnvironment; cl_image_desc imgDesc; TempMM *tempMM; std::unique_ptr device; std::unique_ptr clContext; std::unique_ptr glSharing = std::make_unique(); GlSharingFunctionsMock *mockGlSharingFunctions; size_t textureSize; unsigned int textureId; }; TEST_F(GlSharingTextureTests, givenMockGlWhen1dGlTextureIsCreatedThenMemObjectHasGlHandler) { cl_int retVal = CL_INVALID_VALUE; glSharing->uploadDataToTextureInfo(textureId); auto glTexture = GlTexture::createSharedGlTexture(clContext.get(), (cl_mem_flags)0, GL_TEXTURE_1D, 0, textureId, &retVal); ASSERT_NE(nullptr, glTexture); EXPECT_NE(nullptr, glTexture->getGraphicsAllocation()); EXPECT_EQ(textureSize, glTexture->getGraphicsAllocation()->getUnderlyingBufferSize()); EXPECT_EQ(1, glSharing->dllParam->getParam("GLAcquireSharedTextureCalled")); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(textureId, glSharing->dllParam->getTextureInfo().name); //input auto handler = glTexture->peekSharingHandler(); ASSERT_NE(nullptr, handler); auto glHandler = static_cast(handler); EXPECT_EQ(glHandler->peekFunctionsHandler(), mockGlSharingFunctions); delete glTexture; } class FailingMemoryManager : public MockMemoryManager { public: GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness) override { return nullptr; } }; TEST_F(GlSharingTextureTests, givenMockGlWhenGlTextureIsCreatedFromWrongHandleThenErrorAndNoTextureIsReturned) { auto tempMemoryManager = clContext->getMemoryManager(); tempMM->useForcedGmm = false; auto memoryManager = std::unique_ptr(new FailingMemoryManager()); clContext->memoryManager = memoryManager.get(); auto retVal = CL_SUCCESS; auto glTexture = GlTexture::createSharedGlTexture(clContext.get(), (cl_mem_flags)0, GL_TEXTURE_1D, 0, textureId, &retVal); EXPECT_EQ(nullptr, glTexture); EXPECT_EQ(CL_INVALID_GL_OBJECT, retVal); clContext->memoryManager = tempMemoryManager; } GLboolean OSAPI mockGLAcquireSharedTexture(GLDisplay, GLContext, GLContext, GLvoid *pResourceInfo) { auto pTextureInfo = (CL_GL_RESOURCE_INFO *)pResourceInfo; glDllHelper dllParam; pTextureInfo->globalShareHandle = dllParam.getTextureInfo().globalShareHandle; pTextureInfo->globalShareHandleMCS = dllParam.getTextureInfo().globalShareHandleMCS; if (pTextureInfo->target == GL_TEXTURE_BUFFER) { // size and width for texture buffer are queried from textureInfo - not from gmm pTextureInfo->textureBufferSize = dllParam.getTextureInfo().textureBufferSize; pTextureInfo->textureBufferWidth = dllParam.getTextureInfo().textureBufferWidth; } pTextureInfo->pGmmResInfo = dllParam.getTextureInfo().pGmmResInfo; pTextureInfo->glInternalFormat = 99999; pTextureInfo->glHWFormat = dllParam.getTextureInfo().glHWFormat; pTextureInfo->textureBufferOffset = dllParam.getTextureInfo().textureBufferOffset; dllParam.loadTexture(*pTextureInfo); return (GLboolean)1; }; TEST_F(GlSharingTextureTests, givenMockGlWhenGlTextureIsCreatedFromIncorrectFormatThenErrorAndNoTextureIsReturned) { mockGlSharingFunctions->setGLAcquireSharedTextureMock(mockGLAcquireSharedTexture); auto retVal = CL_SUCCESS; auto glTexture = GlTexture::createSharedGlTexture(clContext.get(), (cl_mem_flags)0, GL_TEXTURE_1D, 0, textureId, &retVal); EXPECT_EQ(nullptr, glTexture); EXPECT_EQ(CL_INVALID_GL_OBJECT, retVal); } TEST_F(GlSharingTextureTests, givenMockGlWhenRenderBufferTextureIsCreatedThenMemObjectHasGlHandler) { cl_int retVal = CL_INVALID_VALUE; glSharing->uploadDataToTextureInfo(textureId); auto glTexture = GlTexture::createSharedGlTexture(clContext.get(), (cl_mem_flags)0, GL_RENDERBUFFER_EXT, 0, textureId, &retVal); ASSERT_NE(nullptr, glTexture); EXPECT_NE(nullptr, glTexture->getGraphicsAllocation()); EXPECT_EQ(textureSize, glTexture->getGraphicsAllocation()->getUnderlyingBufferSize()); EXPECT_EQ(1, glSharing->dllParam->getParam("GLAcquireSharedRenderBufferCalled")); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(textureId, glSharing->dllParam->getTextureInfo().name); auto handler = glTexture->peekSharingHandler(); ASSERT_NE(nullptr, handler); auto glHandler = static_cast(handler); EXPECT_EQ(glHandler->peekFunctionsHandler(), mockGlSharingFunctions); delete glTexture; } TEST_F(GlSharingTextureTests, givenGmmResourceAsInputeWhenTextureIsCreatedItHasGmmSet) { cl_int retVal = CL_INVALID_VALUE; glSharing->m_textureInfoOutput.globalShareHandle = textureId; glSharing->m_textureInfoOutput.pGmmResInfo = this->tempMM->forceGmm->gmmResourceInfo->peekHandle(); this->tempMM->useForcedGmm = false; glSharing->m_textureInfoOutput.pGmmResInfo = this->tempMM->forceGmm->gmmResourceInfo->peekHandle(); glSharing->uploadDataToTextureInfo(); auto glTexture = GlTexture::createSharedGlTexture(clContext.get(), (cl_mem_flags)0, GL_TEXTURE_1D, 0, textureId, &retVal); ASSERT_NE(nullptr, glTexture); EXPECT_NE(nullptr, glTexture->getGraphicsAllocation()); ASSERT_NE(nullptr, glTexture->getGraphicsAllocation()->getDefaultGmm()); ASSERT_NE(nullptr, glTexture->getGraphicsAllocation()->getDefaultGmm()->gmmResourceInfo->peekHandle()); delete glTexture; } TEST_F(GlSharingTextureTests, givenDifferentHwFormatWhenSurfaceFormatInfoIsSetThenOverwrite) { cl_int retVal = CL_INVALID_VALUE; cl_image_format imageFormat = {}; GlTexture::setClImageFormat(GL_DEPTH32F_STENCIL8, imageFormat); auto format = Image::getSurfaceFormatFromTable(CL_MEM_READ_ONLY, &imageFormat, defaultHwInfo->capabilityTable.clVersionSupport); ASSERT_NE(format, nullptr); auto newHwFormat = 217u; EXPECT_TRUE(format->surfaceFormat.GenxSurfaceFormat != newHwFormat); glSharing->m_textureInfoOutput.glHWFormat = newHwFormat; glSharing->m_textureInfoOutput.glInternalFormat = GL_DEPTH32F_STENCIL8; glSharing->uploadDataToTextureInfo(); auto glTexture = GlTexture::createSharedGlTexture(clContext.get(), CL_MEM_READ_ONLY, GL_TEXTURE_2D, 0, textureId, &retVal); ASSERT_NE(nullptr, glTexture); EXPECT_TRUE(newHwFormat == glTexture->getSurfaceFormatInfo().surfaceFormat.GenxSurfaceFormat); delete glTexture; } TEST_F(GlSharingTextureTests, givenGLRGB10FormatWhenSharedGlTextureIsCreatedThenItHasCorrectGenxSurfaceFormatAssigned) { cl_int retVal = CL_INVALID_VALUE; glSharing->m_textureInfoOutput.glInternalFormat = GL_RGB10; glSharing->m_textureInfoOutput.glHWFormat = GFX3DSTATE_SURFACEFORMAT_R16G16B16X16_UNORM; glSharing->uploadDataToTextureInfo(); std::unique_ptr glTexture(GlTexture::createSharedGlTexture(clContext.get(), CL_MEM_READ_ONLY, GL_TEXTURE_2D, 0, textureId, &retVal)); ASSERT_NE(nullptr, glTexture); EXPECT_EQ(glTexture->getSurfaceFormatInfo().surfaceFormat.GenxSurfaceFormat, GFX3DSTATE_SURFACEFORMAT_R16G16B16A16_UNORM); } TEST_F(GlSharingTextureTests, givenContextAnd1dTextureWhenClCreateFromGlTextureIsCalledThenImageIsReturned) { cl_int retVal = CL_INVALID_GL_OBJECT; auto glImage = clCreateFromGLTexture(clContext.get(), 0, GL_TEXTURE_1D, 0, textureId, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, glImage); retVal = clReleaseMemObject(glImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GlSharingTextureTests, givenContextWithoutSharingAnd1dTextureWhenClCreateFromGlTextureIsCalledThenErrorIsReturned) { tempMM->useForcedGmm = false; clContext->resetSharingFunctions(CLGL_SHARING); cl_int retVal = CL_INVALID_GL_OBJECT; auto glImage = clCreateFromGLTexture(clContext.get(), 0, GL_TEXTURE_1D, 0, textureId, &retVal); ASSERT_EQ(CL_INVALID_CONTEXT, retVal); ASSERT_EQ(nullptr, glImage); } TEST_F(GlSharingTextureTests, givenContextAndRenderBufferTextureWhenClCreateFromGlTextureIsCalledThenImageIsReturned) { cl_int retVal = CL_INVALID_GL_OBJECT; auto glImage = clCreateFromGLRenderbuffer(clContext.get(), 0, textureId, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, glImage); retVal = clReleaseMemObject(glImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GlSharingTextureTests, givenContextWithoutSharingAndRenderBufferTextureWhenClCreateFromGlTextureIsCalledThenErrorIsReturned) { tempMM->useForcedGmm = false; clContext->resetSharingFunctions(CLGL_SHARING); cl_int retVal = CL_INVALID_GL_OBJECT; auto glImage = clCreateFromGLRenderbuffer(clContext.get(), 0, textureId, &retVal); ASSERT_EQ(CL_INVALID_CONTEXT, retVal); ASSERT_EQ(nullptr, glImage); } TEST_F(GlSharingTextureTests, givenGlCl1dTextureWhenAskedForCLGLGetInfoThenIdAndTypeIsReturned) { auto retVal = CL_SUCCESS; auto glImage = clCreateFromGLTexture(clContext.get(), 0, GL_TEXTURE_1D, 0, textureId, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, glImage); cl_gl_object_type objectType = 0u; cl_GLuint objectId = 0u; retVal = clGetGLObjectInfo(glImage, &objectType, &objectId); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(objectType, (cl_gl_object_type)CL_GL_OBJECT_TEXTURE1D); EXPECT_EQ(objectId, textureId); retVal = clReleaseMemObject(glImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GlSharingTextureTests, givenHwCommandQueueAndGlTextureWhenItIsCreatedWithClCreateFromGlTexture2dThenImageObjectIsReturned) { auto retVal = CL_SUCCESS; auto glImage = clCreateFromGLTexture2D(clContext.get(), 0, GL_TEXTURE_2D, 0, textureId, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, glImage); cl_gl_object_type objectType = 0u; cl_GLuint objectId = 0u; retVal = clGetGLObjectInfo(glImage, &objectType, &objectId); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(objectType, (cl_gl_object_type)CL_GL_OBJECT_TEXTURE2D); EXPECT_EQ(objectId, textureId); retVal = clReleaseMemObject(glImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GlSharingTextureTests, givenContextWithoutSharingAndGlTextureWhenItIsCreatedWithClCreateFromGlTexture2dThenErrorIsReturned) { tempMM->useForcedGmm = false; clContext->resetSharingFunctions(CLGL_SHARING); auto retVal = CL_SUCCESS; auto glImage = clCreateFromGLTexture2D(clContext.get(), 0, GL_TEXTURE_2D, 0, textureId, &retVal); ASSERT_EQ(CL_INVALID_CONTEXT, retVal); ASSERT_EQ(nullptr, glImage); } TEST_F(GlSharingTextureTests, givenHwCommandQueueAndGlTextureWhenItIsCreatedWithClCreateFromGlTexture3dThenImageObjectIsReturned) { auto retVal = CL_SUCCESS; auto glImage = clCreateFromGLTexture3D(clContext.get(), 0, GL_TEXTURE_3D, 0, textureId, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, glImage); cl_gl_object_type objectType = 0u; cl_GLuint objectId = 0u; retVal = clGetGLObjectInfo(glImage, &objectType, &objectId); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(objectType, (cl_gl_object_type)CL_GL_OBJECT_TEXTURE3D); EXPECT_EQ(objectId, textureId); retVal = clReleaseMemObject(glImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GlSharingTextureTests, givenContextWithoutSharingAndGlTextureWhenItIsCreatedWithClCreateFromGlTexture3dThenErrorIsReturned) { tempMM->useForcedGmm = false; clContext->resetSharingFunctions(CLGL_SHARING); auto retVal = CL_SUCCESS; auto glImage = clCreateFromGLTexture3D(clContext.get(), 0, GL_TEXTURE_3D, 0, textureId, &retVal); ASSERT_EQ(CL_INVALID_CONTEXT, retVal); ASSERT_EQ(nullptr, glImage); } TEST_F(GlSharingTextureTests, givenHwCommandQueueAndGlTextureWhenAcquireIsCalledThenAcquireCountIsIncremented) { glSharing->uploadDataToTextureInfo(textureId); auto retVal = CL_SUCCESS; auto commandQueue = clCreateCommandQueue(clContext.get(), clContext->getDevice(0), 0, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); auto glImage = clCreateFromGLTexture(clContext.get(), 0, GL_TEXTURE_1D, 0, textureId, &retVal); EXPECT_EQ(1, glSharing->dllParam->getParam("GLAcquireSharedTextureCalled")); retVal = clEnqueueAcquireGLObjects(commandQueue, 1, &glImage, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2, glSharing->dllParam->getParam("GLAcquireSharedTextureCalled")); retVal = clEnqueueReleaseGLObjects(commandQueue, 1, &glImage, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueAcquireGLObjects(commandQueue, 1, &glImage, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(3, glSharing->dllParam->getParam("GLAcquireSharedTextureCalled")); retVal = clReleaseCommandQueue(commandQueue); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(glImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GlSharingTextureTests, verifyGlTextureBufferOffset) { glSharing->uploadDataToTextureInfo(textureId); auto retVal = CL_SUCCESS; auto commandQueue = clCreateCommandQueue(clContext.get(), clContext->getDevice(0), 0, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); auto glImage = clCreateFromGLTexture(clContext.get(), 0, GL_TEXTURE_1D, 0, textureId, &retVal); EXPECT_NE(glImage, nullptr); retVal = clEnqueueAcquireGLObjects(commandQueue, 1, &glImage, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); auto memObj = castToObject(glImage); EXPECT_NE(memObj, nullptr); EXPECT_EQ(memObj->getGraphicsAllocation()->getAllocationOffset(), 0u); retVal = clEnqueueReleaseGLObjects(commandQueue, 1, &glImage, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); glSharing->uploadTextureBufferOffsetToTextureInfo(0x660); retVal = clEnqueueAcquireGLObjects(commandQueue, 1, &glImage, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); memObj = castToObject(glImage); EXPECT_NE(memObj, nullptr); EXPECT_EQ(memObj->getGraphicsAllocation()->getAllocationOffset(), 0x660u); retVal = clEnqueueReleaseGLObjects(commandQueue, 1, &glImage, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseCommandQueue(commandQueue); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(glImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GlSharingTextureTests, givenHwCommandQueueAndGlRenderBufferWhenAcquireIsCalledThenAcquireCountIsIncremented) { glSharing->uploadDataToTextureInfo(textureId); auto retVal = CL_SUCCESS; auto commandQueue = clCreateCommandQueue(clContext.get(), clContext->getDevice(0), 0, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); auto glImage = clCreateFromGLRenderbuffer(clContext.get(), 0, textureId, &retVal); EXPECT_EQ(1, glSharing->dllParam->getParam("GLAcquireSharedRenderBufferCalled")); retVal = clEnqueueAcquireGLObjects(commandQueue, 1, &glImage, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(2, glSharing->dllParam->getParam("GLAcquireSharedRenderBufferCalled")); retVal = clEnqueueReleaseGLObjects(commandQueue, 1, &glImage, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueAcquireGLObjects(commandQueue, 1, &glImage, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(3, glSharing->dllParam->getParam("GLAcquireSharedRenderBufferCalled")); retVal = clReleaseCommandQueue(commandQueue); EXPECT_EQ(CL_SUCCESS, retVal); retVal = clReleaseMemObject(glImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_F(GlSharingTextureTests, givenSharedGlTextureWhenItIsAcquireCountIsDecrementedToZeroThenCallReleaseFunction) { std::unique_ptr image(GlTexture::createSharedGlTexture(clContext.get(), CL_MEM_READ_ONLY, GL_TEXTURE_2D, 0, textureId, nullptr)); auto sharingHandler = image->peekSharingHandler(); sharingHandler->acquire(image.get()); sharingHandler->acquire(image.get()); sharingHandler->release(image.get()); EXPECT_EQ(0, glSharing->dllParam->getParam("GLReleaseSharedTextureCalled")); sharingHandler->release(image.get()); EXPECT_EQ(1, glSharing->dllParam->getParam("GLReleaseSharedTextureCalled")); EXPECT_EQ(0, glSharing->dllParam->getParam("GLReleaseSharedRenderBufferCalled")); EXPECT_EQ(textureId, glSharing->dllParam->getTextureInfo().name); } TEST_F(GlSharingTextureTests, givenSharedRenderBufferWhenItIsAcquireCountIsDecrementedToZeroThenCallReleaseFunction) { std::unique_ptr image(GlTexture::createSharedGlTexture(clContext.get(), CL_MEM_READ_WRITE, GL_RENDERBUFFER_EXT, 0, textureId, nullptr)); auto sharingHandler = image->peekSharingHandler(); sharingHandler->acquire(image.get()); sharingHandler->acquire(image.get()); sharingHandler->release(image.get()); EXPECT_EQ(0, glSharing->dllParam->getParam("GLReleaseSharedRenderBufferCalled")); sharingHandler->release(image.get()); EXPECT_EQ(1, glSharing->dllParam->getParam("GLReleaseSharedRenderBufferCalled")); EXPECT_EQ(0, glSharing->dllParam->getParam("GLReleaseSharedTextureCalled")); EXPECT_EQ(textureId, glSharing->dllParam->getTextureInfo().name); } TEST_F(GlSharingTextureTests, givenMultisampleTextureWithMoreThanOneSampleWhenAskedForNumSamplesThenReturnCorrectValue) { GLsizei expectedNumSamples = 2; glSharing->m_textureInfoOutput.numberOfSamples = expectedNumSamples; glSharing->uploadDataToTextureInfo(); std::unique_ptr image(GlTexture::createSharedGlTexture(clContext.get(), CL_MEM_READ_WRITE, GL_TEXTURE_2D_MULTISAMPLE, 0, textureId, nullptr)); GLsizei numSamples = 0; size_t retSize = 0; auto retVal = clGetGLTextureInfo(image.get(), CL_GL_NUM_SAMPLES, sizeof(GLsizei), &numSamples, &retSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedNumSamples, numSamples); EXPECT_EQ(sizeof(GLsizei), retSize); } TEST_F(GlSharingTextureTests, givenTextureWithOneSampleWhenAskedForNumSamplesThenReturnZero) { glSharing->m_textureInfoOutput.numberOfSamples = 1; glSharing->uploadDataToTextureInfo(); std::unique_ptr image(GlTexture::createSharedGlTexture(clContext.get(), CL_MEM_READ_WRITE, GL_TEXTURE_2D_MULTISAMPLE, 0, textureId, nullptr)); GLenum numSamples = 0; size_t retSize = 0; auto retVal = clGetGLTextureInfo(image.get(), CL_GL_NUM_SAMPLES, sizeof(GLsizei), &numSamples, &retSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, numSamples); EXPECT_EQ(sizeof(GLsizei), retSize); } TEST_F(GlSharingTextureTests, givenTextureWithZeroSamplesWhenAskedForNumSamplesThenReturnZero) { glSharing->m_textureInfoOutput.numberOfSamples = 0; glSharing->uploadDataToTextureInfo(); std::unique_ptr image(GlTexture::createSharedGlTexture(clContext.get(), CL_MEM_READ_WRITE, GL_TEXTURE_2D_MULTISAMPLE, 0, textureId, nullptr)); GLenum numSamples = 0; size_t retSize = 0; auto retVal = clGetGLTextureInfo(image.get(), CL_GL_NUM_SAMPLES, sizeof(GLsizei), &numSamples, &retSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0, numSamples); EXPECT_EQ(sizeof(GLsizei), retSize); } TEST_F(GlSharingTextureTests, givenMockGlWhenGlTextureIsCreatedFromFormatNotIncludedInSurfaceFormatsThenErrorAndNoTextureIsReturned) { cl_int retVal = CL_SUCCESS; auto textureInfoOutput = std::make_unique(); textureInfoOutput->glInternalFormat = GL_SRGB8_ALPHA8; glSharing->dllParam->loadTexture(*textureInfoOutput); auto glTexture = GlTexture::createSharedGlTexture(clContext.get(), CL_MEM_WRITE_ONLY, GL_SRGB8_ALPHA8, 0, textureId, &retVal); EXPECT_EQ(nullptr, glTexture); EXPECT_EQ(CL_INVALID_GL_OBJECT, retVal); } TEST_F(GlSharingTextureTests, givenMockGlWhenGlTextureIsCreatedWithUnifiedAuxSurfThenMapAuxGpuVaIsCalled) { cl_int retVal = CL_SUCCESS; setUnifiedAuxSurf(); EXPECT_EQ(0u, tempMM->mapAuxGpuVACalled); auto glTexture = std::unique_ptr(GlTexture::createSharedGlTexture(clContext.get(), CL_MEM_WRITE_ONLY, GL_SRGB8_ALPHA8, 0, textureId, &retVal)); auto hwInfo = clContext->getDevice(0)->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); uint32_t expectedMapAuxGpuVaCalls = hwHelper.isPageTableManagerSupported(hwInfo) ? 1 : 0; EXPECT_EQ(expectedMapAuxGpuVaCalls, tempMM->mapAuxGpuVACalled); } class GetGlTextureInfoTests : public GlSharingTextureTests, public ::testing::WithParamInterface { }; INSTANTIATE_TEST_CASE_P( GetGlTextureInfoTests, GetGlTextureInfoTests, testing::ValuesIn(glTextureTargets::supportedTargets)); TEST_P(GetGlTextureInfoTests, givenGlTextureWhenAskedForCLGetGLTextureInfoThenReturnValidInfo) { auto retVal = CL_SUCCESS; GLenum expectedTarget = GetParam(); GLint mipLevel = 1u; auto glImage = clCreateFromGLTexture(clContext.get(), 0, expectedTarget, mipLevel, textureId, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, glImage); auto pMemObj = castToObject(glImage); auto glTextureObj = (GlTexture *)pMemObj->peekSharingHandler(); GLenum textureTarget = 0u; size_t retSize = 0; retVal = clGetGLTextureInfo(glImage, CL_GL_TEXTURE_TARGET, sizeof(GLenum), &textureTarget, &retSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedTarget, textureTarget); EXPECT_EQ(sizeof(GLenum), retSize); retVal = clGetGLTextureInfo(glImage, CL_GL_MIPMAP_LEVEL, sizeof(GLenum), &mipLevel, &retSize); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(glTextureObj->getMiplevel(), mipLevel); EXPECT_EQ(sizeof(GLint), retSize); retVal = clGetGLTextureInfo(glImage, CL_INVALID_VALUE, 0, nullptr, nullptr); EXPECT_EQ(CL_INVALID_VALUE, retVal); auto image = castToObject(glImage); EXPECT_EQ(mipLevel, image->peekBaseMipLevel()); retVal = clReleaseMemObject(glImage); EXPECT_EQ(CL_SUCCESS, retVal); } TEST_P(GetGlTextureInfoTests, givenApiTargetTypeWhenAskedForBaseTypeThenConvertOnlyCubeMaps) { tempMM->useForcedGmm = false; auto apiTarget = GetParam(); unsigned int expectedBaseType; switch (apiTarget) { case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: case GL_TEXTURE_CUBE_MAP_POSITIVE_X: case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: expectedBaseType = GL_TEXTURE_CUBE_MAP_ARB; break; default: expectedBaseType = apiTarget; break; } EXPECT_EQ(GlTexture::getBaseTargetType(apiTarget), expectedBaseType); } TEST_P(GetGlTextureInfoTests, givenApiTargetTypeWhenAskedForGmmCubeFaceIndexThenReturnValidOnlyForCubeType) { tempMM->useForcedGmm = false; auto apiTarget = GetParam(); auto gmmCubeFaceIndex = static_cast(GmmTypesConverter::getCubeFaceIndex(apiTarget)); switch (apiTarget) { case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: EXPECT_EQ(gmmCubeFaceIndex, static_cast(__GMM_CUBE_FACE_NEG_X)); break; case GL_TEXTURE_CUBE_MAP_POSITIVE_X: EXPECT_EQ(gmmCubeFaceIndex, static_cast(__GMM_CUBE_FACE_POS_X)); break; case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: EXPECT_EQ(gmmCubeFaceIndex, static_cast(__GMM_CUBE_FACE_NEG_Y)); break; case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: EXPECT_EQ(gmmCubeFaceIndex, static_cast(__GMM_CUBE_FACE_POS_Y)); break; case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: EXPECT_EQ(gmmCubeFaceIndex, static_cast(__GMM_CUBE_FACE_NEG_Z)); break; case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: EXPECT_EQ(gmmCubeFaceIndex, static_cast(__GMM_CUBE_FACE_POS_Z)); break; default: EXPECT_EQ(gmmCubeFaceIndex, static_cast(__GMM_NO_CUBE_MAP)); break; } } compute-runtime-20.13.16352/opencl/test/unit_test/sharings/gl/windows/gl_types_tests.cpp000066400000000000000000000142451363734646600313260ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/gl/gl_texture.h" #include "config.h" #include "gtest/gtest.h" namespace NEO { namespace glTypes { static const std::tuple allImageFormats[] = { // input, output, output std::make_tuple(GL_RGBA8, CL_UNORM_INT8, CL_RGBA), std::make_tuple(GL_RGBA8I, CL_SIGNED_INT8, CL_RGBA), std::make_tuple(GL_RGBA16, CL_UNORM_INT16, CL_RGBA), std::make_tuple(GL_RGBA16I, CL_SIGNED_INT16, CL_RGBA), std::make_tuple(GL_RGBA32I, CL_SIGNED_INT32, CL_RGBA), std::make_tuple(GL_RGBA8UI, CL_UNSIGNED_INT8, CL_RGBA), std::make_tuple(GL_RGBA16UI, CL_UNSIGNED_INT16, CL_RGBA), std::make_tuple(GL_RGBA32UI, CL_UNSIGNED_INT32, CL_RGBA), std::make_tuple(GL_RGBA16F, CL_HALF_FLOAT, CL_RGBA), std::make_tuple(GL_RGBA32F, CL_FLOAT, CL_RGBA), std::make_tuple(GL_RGBA, CL_UNORM_INT8, CL_RGBA), std::make_tuple(GL_RGBA8_SNORM, CL_SNORM_INT8, CL_RGBA), std::make_tuple(GL_RGBA16_SNORM, CL_SNORM_INT16, CL_RGBA), std::make_tuple(GL_BGRA, CL_UNORM_INT8, CL_BGRA), std::make_tuple(GL_R8, CL_UNORM_INT8, CL_R), std::make_tuple(GL_R8_SNORM, CL_SNORM_INT8, CL_R), std::make_tuple(GL_R16, CL_UNORM_INT16, CL_R), std::make_tuple(GL_R16_SNORM, CL_SNORM_INT16, CL_R), std::make_tuple(GL_R16F, CL_HALF_FLOAT, CL_R), std::make_tuple(GL_R32F, CL_FLOAT, CL_R), std::make_tuple(GL_R8I, CL_SIGNED_INT8, CL_R), std::make_tuple(GL_R16I, CL_SIGNED_INT16, CL_R), std::make_tuple(GL_R32I, CL_SIGNED_INT32, CL_R), std::make_tuple(GL_R8UI, CL_UNSIGNED_INT8, CL_R), std::make_tuple(GL_R16UI, CL_UNSIGNED_INT16, CL_R), std::make_tuple(GL_R32UI, CL_UNSIGNED_INT32, CL_R), std::make_tuple(GL_DEPTH_COMPONENT32F, CL_FLOAT, CL_DEPTH), std::make_tuple(GL_DEPTH_COMPONENT16, CL_UNORM_INT16, CL_DEPTH), std::make_tuple(GL_DEPTH24_STENCIL8, CL_UNORM_INT24, CL_DEPTH_STENCIL), std::make_tuple(GL_DEPTH32F_STENCIL8, CL_FLOAT, CL_DEPTH_STENCIL), std::make_tuple(GL_SRGB8_ALPHA8, CL_UNORM_INT8, CL_sRGBA), std::make_tuple(GL_RG8, CL_UNORM_INT8, CL_RG), std::make_tuple(GL_RG8_SNORM, CL_SNORM_INT8, CL_RG), std::make_tuple(GL_RG16, CL_UNORM_INT16, CL_RG), std::make_tuple(GL_RG16_SNORM, CL_SNORM_INT16, CL_RG), std::make_tuple(GL_RG16F, CL_HALF_FLOAT, CL_RG), std::make_tuple(GL_RG32F, CL_FLOAT, CL_RG), std::make_tuple(GL_RG8I, CL_SIGNED_INT8, CL_RG), std::make_tuple(GL_RG16I, CL_SIGNED_INT16, CL_RG), std::make_tuple(GL_RG32I, CL_SIGNED_INT32, CL_RG), std::make_tuple(GL_RG8UI, CL_UNSIGNED_INT8, CL_RG), std::make_tuple(GL_RG16UI, CL_UNSIGNED_INT16, CL_RG), std::make_tuple(GL_RG32UI, CL_UNSIGNED_INT32, CL_RG), std::make_tuple(GL_RGB10, CL_UNORM_INT16, CL_RGBA), std::make_tuple(CL_INVALID_VALUE, 0, 0)}; static const std::tuple allObjTypes[] = { // input, output, output std::make_tuple(GL_TEXTURE_1D, CL_GL_OBJECT_TEXTURE1D, CL_MEM_OBJECT_IMAGE1D), std::make_tuple(GL_TEXTURE_1D_ARRAY, CL_GL_OBJECT_TEXTURE1D_ARRAY, CL_MEM_OBJECT_IMAGE1D_ARRAY), std::make_tuple(GL_TEXTURE_2D, CL_GL_OBJECT_TEXTURE2D, CL_MEM_OBJECT_IMAGE2D), std::make_tuple(GL_TEXTURE_RECTANGLE, CL_GL_OBJECT_TEXTURE2D, CL_MEM_OBJECT_IMAGE2D), std::make_tuple(GL_TEXTURE_CUBE_MAP_NEGATIVE_X, CL_GL_OBJECT_TEXTURE2D, CL_MEM_OBJECT_IMAGE2D), std::make_tuple(GL_TEXTURE_CUBE_MAP_POSITIVE_X, CL_GL_OBJECT_TEXTURE2D, CL_MEM_OBJECT_IMAGE2D), std::make_tuple(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, CL_GL_OBJECT_TEXTURE2D, CL_MEM_OBJECT_IMAGE2D), std::make_tuple(GL_TEXTURE_CUBE_MAP_POSITIVE_Y, CL_GL_OBJECT_TEXTURE2D, CL_MEM_OBJECT_IMAGE2D), std::make_tuple(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, CL_GL_OBJECT_TEXTURE2D, CL_MEM_OBJECT_IMAGE2D), std::make_tuple(GL_TEXTURE_CUBE_MAP_POSITIVE_Z, CL_GL_OBJECT_TEXTURE2D, CL_MEM_OBJECT_IMAGE2D), std::make_tuple(GL_TEXTURE_2D_MULTISAMPLE, CL_GL_OBJECT_TEXTURE2D, CL_MEM_OBJECT_IMAGE2D), std::make_tuple(GL_TEXTURE_2D_ARRAY, CL_GL_OBJECT_TEXTURE2D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY), std::make_tuple(GL_TEXTURE_2D_MULTISAMPLE_ARRAY, CL_GL_OBJECT_TEXTURE2D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY), std::make_tuple(GL_TEXTURE_3D, CL_GL_OBJECT_TEXTURE3D, CL_MEM_OBJECT_IMAGE3D), std::make_tuple(GL_TEXTURE_BUFFER, CL_GL_OBJECT_TEXTURE_BUFFER, CL_MEM_OBJECT_IMAGE1D_BUFFER), std::make_tuple(GL_RENDERBUFFER_EXT, CL_GL_OBJECT_RENDERBUFFER, CL_MEM_OBJECT_IMAGE2D), std::make_tuple(CL_INVALID_VALUE, 0, 0)}; } // namespace glTypes struct GlClImageFormatTests : public ::testing::WithParamInterface>, public ::testing::Test {}; INSTANTIATE_TEST_CASE_P(GlClImageFormatTests, GlClImageFormatTests, testing::ValuesIn(glTypes::allImageFormats)); TEST_P(GlClImageFormatTests, validFormat) { cl_image_format imgFormat = {}; auto glFormat = std::get<0>(GetParam()); auto expectedClChannelType = static_cast(std::get<1>(GetParam())); auto expectedClChannelOrder = static_cast(std::get<2>(GetParam())); GlTexture::setClImageFormat(glFormat, imgFormat); EXPECT_EQ(imgFormat.image_channel_data_type, expectedClChannelType); EXPECT_EQ(imgFormat.image_channel_order, expectedClChannelOrder); } struct GlClObjTypesTests : public ::testing::WithParamInterface>, public ::testing::Test {}; INSTANTIATE_TEST_CASE_P(GlClObjTypesTests, GlClObjTypesTests, testing::ValuesIn(glTypes::allObjTypes)); TEST_P(GlClObjTypesTests, typeConversion) { auto glType = static_cast(std::get<0>(GetParam())); auto expectedClGlObjType = static_cast(std::get<1>(GetParam())); auto expectedClMemObjType = static_cast(std::get<2>(GetParam())); auto clGlObjType = GlTexture::getClGlObjectType(glType); auto clMemObjType = GlTexture::getClMemObjectType(glType); EXPECT_EQ(expectedClGlObjType, clGlObjType); EXPECT_EQ(clMemObjType, expectedClMemObjType); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/sharings/sharing_factory_tests.cpp000066400000000000000000000244561363734646600305730ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/device/device.h" #include "shared/source/helpers/string.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/context/context.h" #include "opencl/source/platform/extensions.h" #include "opencl/source/platform/platform.h" #include "opencl/source/sharings/sharing.h" #include "opencl/source/sharings/sharing_factory.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_sharing_factory.h" #include "gtest/gtest.h" using namespace NEO; class SharingFactoryStateRestore : public SharingFactory { public: SharingFactoryStateRestore() { memcpy_s(savedState, sizeof(savedState), sharingContextBuilder, sizeof(sharingContextBuilder)); } ~SharingFactoryStateRestore() { memcpy_s(sharingContextBuilder, sizeof(sharingContextBuilder), savedState, sizeof(savedState)); } void clearCurrentState() { for (auto &builder : sharingContextBuilder) { builder = nullptr; } } template void registerSharing(SharingType type) { auto object = std::make_unique(); sharingContextBuilder[type] = object.get(); sharings.push_back(std::move(object)); } template Sharing *getSharing(); protected: decltype(SharingFactory::sharingContextBuilder) savedState; std::vector> sharings; }; class TestedSharingBuilderFactory : public SharingBuilderFactory { public: std::unique_ptr createContextBuilder() override { return nullptr; } std::string getExtensions() override { return extension; }; void fillGlobalDispatchTable() override { invocationCount++; }; void *getExtensionFunctionAddress(const std::string &functionName) override { if (functionName == "someFunction") invocationCount++; return nullptr; } static const std::string extension; uint32_t invocationCount = 0u; }; const std::string TestedSharingBuilderFactory::extension("--extensions--"); template <> TestedSharingBuilderFactory *SharingFactoryStateRestore::getSharing() { return reinterpret_cast(sharingContextBuilder[SharingType::CLGL_SHARING]); } void dummyHandler() { } const cl_context_properties mockContextPassFinalize = 1; const cl_context_properties mockContextFailFinalize = 2; const cl_context_properties clContextPropertyMock = 0x2000; class MockSharingContextBuilder : public SharingContextBuilder { cl_context_properties value; public: bool processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue, cl_int &errcodeRet) override; bool finalizeProperties(Context &context, int32_t &errcodeRet) override; }; bool MockSharingContextBuilder::processProperties(cl_context_properties &propertyType, cl_context_properties &propertyValue, cl_int &errcodeRet) { if (propertyType == clContextPropertyMock) { if (propertyValue) { value = propertyValue; return true; } } return false; } class VASharingFunctionsMock : public SharingFunctions { public: static const uint32_t sharingId = VA_SHARING; uint32_t getId() const override { return sharingId; } }; struct VAMockSharingContextBuilder : public MockSharingContextBuilder { bool finalizeProperties(Context &context, int32_t &errcodeRet) override; }; bool VAMockSharingContextBuilder::finalizeProperties(Context &context, int32_t &errcodeRet) { auto &mockContext = static_cast(context); mockContext.registerSharingWithId(new VASharingFunctionsMock(), VA_SHARING); return true; } bool MockSharingContextBuilder::finalizeProperties(Context &context, int32_t &errcodeRet) { if (value == mockContextPassFinalize) { return true; } else { return false; } } class MockSharingBuilderFactory : public TestedSharingBuilderFactory { public: std::unique_ptr createContextBuilder() override { return std::unique_ptr(new MockSharingContextBuilder()); } void *getExtensionFunctionAddress(const std::string &functionName) override { if (functionName == "dummyHandler") { return reinterpret_cast(dummyHandler); } else { return nullptr; } } }; class VAMockSharingBuilderFactory : public TestedSharingBuilderFactory { public: std::unique_ptr createContextBuilder() override { return std::unique_ptr(new VAMockSharingContextBuilder()); } }; TEST(SharingFactoryTests, givenFactoryWithEmptyTableWhenAskedForExtensionThenEmptyStringIsReturned) { SharingFactoryStateRestore stateRestore; stateRestore.clearCurrentState(); auto ext = stateRestore.getExtensions(); EXPECT_EQ(0u, ext.length()); EXPECT_STREQ("", ext.c_str()); } TEST(SharingFactoryTests, givenFactoryWithSharingWhenAskedForExtensionThenStringIsReturned) { SharingFactoryStateRestore stateRestore; stateRestore.clearCurrentState(); stateRestore.registerSharing(SharingType::CLGL_SHARING); auto ext = stateRestore.getExtensions(); EXPECT_EQ(TestedSharingBuilderFactory::extension.length(), ext.length()); EXPECT_STREQ(TestedSharingBuilderFactory::extension.c_str(), ext.c_str()); } TEST(SharingFactoryTests, givenFactoryWithSharingWhenDispatchFillRequestedThenMethodsAreInvoked) { SharingFactoryStateRestore stateRestore; stateRestore.clearCurrentState(); stateRestore.registerSharing(SharingType::CLGL_SHARING); auto sharing = stateRestore.getSharing(); ASSERT_EQ(0u, sharing->invocationCount); stateRestore.fillGlobalDispatchTable(); EXPECT_EQ(1u, sharing->invocationCount); } TEST(SharingFactoryTests, givenFactoryWithSharingWhenAskedThenAddressIsReturned) { SharingFactoryStateRestore stateRestore; stateRestore.clearCurrentState(); stateRestore.registerSharing(SharingType::CLGL_SHARING); auto sharing = stateRestore.getSharing(); ASSERT_EQ(0u, sharing->invocationCount); auto ptr = stateRestore.getExtensionFunctionAddress("someFunction"); EXPECT_EQ(nullptr, ptr); EXPECT_EQ(1u, sharing->invocationCount); } TEST(SharingFactoryTests, givenMockFactoryWithSharingWhenAskedThenAddressIsReturned) { SharingFactoryStateRestore stateRestore; stateRestore.clearCurrentState(); stateRestore.registerSharing(SharingType::CLGL_SHARING); auto ptr = stateRestore.getExtensionFunctionAddress("dummyHandler"); EXPECT_EQ(reinterpret_cast(dummyHandler), ptr); ptr = clGetExtensionFunctionAddress("dummyHandler"); EXPECT_EQ(reinterpret_cast(dummyHandler), ptr); } TEST(Context, givenMockSharingBuilderWhenContextWithInvalidPropertiesThenContextCreateShouldFail) { SharingFactoryStateRestore stateRestore; stateRestore.clearCurrentState(); stateRestore.registerSharing(SharingType::CLGL_SHARING); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); cl_device_id clDevice = device.get(); auto deviceVector = ClDeviceVector(&clDevice, 1); cl_int retVal; cl_platform_id platformId[] = {platform()}; cl_context_properties validProperties[5] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platformId[0], clContextPropertyMock, mockContextPassFinalize, 0}; cl_context_properties invalidProperties[5] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platformId[0], clContextPropertyMock, 0, 0}; cl_context_properties invalidPropertiesFailFinalize[5] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platformId[0], clContextPropertyMock, mockContextFailFinalize, 0}; std::unique_ptr context; context.reset(Context::create(invalidProperties, deviceVector, nullptr, nullptr, retVal)); EXPECT_EQ(nullptr, context.get()); context.reset(Context::create(invalidPropertiesFailFinalize, deviceVector, nullptr, nullptr, retVal)); EXPECT_EQ(nullptr, context.get()); context.reset(Context::create(validProperties, deviceVector, nullptr, nullptr, retVal)); EXPECT_NE(nullptr, context.get()); }; TEST(SharingFactoryTests, givenDisabledFormatQueryAndFactoryWithSharingWhenAskedForExtensionThenFormatQueryExtensionIsNotReturned) { DebugManagerStateRestore restorer; DebugManager.flags.EnableFormatQuery.set(false); SharingFactoryStateRestore stateRestore; stateRestore.clearCurrentState(); stateRestore.registerSharing(SharingType::CLGL_SHARING); auto extensionsList = sharingFactory.getExtensions(); EXPECT_THAT(extensionsList, ::testing::Not(::testing::HasSubstr(Extensions::sharingFormatQuery))); } TEST(SharingFactoryTests, givenEnabledFormatQueryAndFactoryWithSharingWhenAskedForExtensionThenFormatQueryExtensionIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.EnableFormatQuery.set(true); SharingFactoryStateRestore stateRestore; stateRestore.clearCurrentState(); stateRestore.registerSharing(SharingType::CLGL_SHARING); auto extensionsList = sharingFactory.getExtensions(); EXPECT_THAT(extensionsList, ::testing::HasSubstr(Extensions::sharingFormatQuery)); } TEST(SharingFactoryTests, givenEnabledFormatQueryAndFactoryWithNoSharingsWhenAskedForExtensionThenNoExtensionIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.EnableFormatQuery.set(true); SharingFactoryStateRestore sharingFactory; sharingFactory.clearCurrentState(); auto extensionsList = sharingFactory.getExtensions(); EXPECT_THAT(extensionsList, ::testing::Not(::testing::HasSubstr(Extensions::sharingFormatQuery))); }compute-runtime-20.13.16352/opencl/test/unit_test/sharings/sharing_tests.cpp000066400000000000000000000110421363734646600270270ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/helpers/memory_properties_flags_helpers.h" #include "opencl/source/mem_obj/mem_obj.h" #include "opencl/source/sharings/sharing.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "gtest/gtest.h" using namespace NEO; TEST(sharingHandler, givenBasicSharingHandlerWhenSynchronizeObjectThenErrorIsReturned) { struct SH : SharingHandler { int synchronizeHandlerMock(UpdateData &updateData) { return synchronizeHandler(updateData); } } sharingHandler; UpdateData updateData; sharingHandler.synchronizeHandlerMock(updateData); EXPECT_EQ(SynchronizeStatus::SYNCHRONIZE_ERROR, updateData.synchronizationStatus); size_t paramSize = 0; void *paramValue = nullptr; // there is no default implementation. parameters should be unchanged. sharingHandler.getMemObjectInfo(paramSize, paramValue); EXPECT_EQ(paramSize, 0u); EXPECT_EQ(paramValue, nullptr); } TEST(sharingHandler, givenMemObjWhenAcquireIncrementCounterThenReleaseShouldDecrementIt) { char buffer[64]; MockContext context; MockGraphicsAllocation *mockAllocation = new MockGraphicsAllocation(buffer, sizeof(buffer)); std::unique_ptr memObj(new MemObj(&context, CL_MEM_OBJECT_BUFFER, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_USE_HOST_PTR, 0, 0), CL_MEM_USE_HOST_PTR, 0, sizeof(buffer), buffer, buffer, mockAllocation, true, false, false)); struct MockSharingHandler : SharingHandler { unsigned int acquire(MemObj *memObj) { SharingHandler::acquire(memObj); return acquireCount; } unsigned int release(MemObj *memObj) { SharingHandler::release(memObj); return acquireCount; } void synchronizeObject(UpdateData &updateData) override { updateData.synchronizationStatus = ACQUIRE_SUCCESFUL; } } sharingHandler; EXPECT_EQ(sharingHandler.acquire(memObj.get()), 1u); EXPECT_EQ(sharingHandler.release(memObj.get()), 0u); } TEST(sharingHandler, givenMemObjWhenAcquireTwoTimesThenReleaseShouldBeCalledTwoTimesToReleaseObject) { char buffer[64]; MockContext context; MockGraphicsAllocation *mockAllocation = new MockGraphicsAllocation(buffer, sizeof(buffer)); std::unique_ptr memObj(new MemObj(&context, CL_MEM_OBJECT_BUFFER, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_USE_HOST_PTR, 0, 0), CL_MEM_USE_HOST_PTR, 0, sizeof(buffer), buffer, buffer, mockAllocation, true, false, false)); struct MockSharingHandler : SharingHandler { MockSharingHandler() { releaseCount = 0; } unsigned int acquire(MemObj *memObj) { SharingHandler::acquire(memObj); return acquireCount; } unsigned int release(MemObj *memObj) { SharingHandler::release(memObj); return acquireCount; } void synchronizeObject(UpdateData &updateData) override { updateData.synchronizationStatus = ACQUIRE_SUCCESFUL; } void releaseResource(MemObj *memObject) override { releaseCount++; }; int releaseCount; } sharingHandler; EXPECT_EQ(sharingHandler.acquire(memObj.get()), 1u); EXPECT_EQ(sharingHandler.acquire(memObj.get()), 2u); EXPECT_EQ(sharingHandler.release(memObj.get()), 1u); EXPECT_EQ(sharingHandler.release(memObj.get()), 0u); EXPECT_EQ(sharingHandler.releaseCount, 1); } TEST(sharingHandler, givenSharingHandlerWhenValidateUpdateDataIsCalledWithNonNullInputThenAbortIsNotCalled) { class MockSharingHandler : SharingHandler { public: using SharingHandler::validateUpdateData; }; MockSharingHandler sharingHandler; UpdateData updateData; sharingHandler.validateUpdateData(updateData); } TEST(sharingHandler, givenSharingHandlerWhenAcquiringThenReturnErrorCode) { SharingHandler sharingHandler; MockContext context; MockGraphicsAllocation *graphicsAllocation = new MockGraphicsAllocation(nullptr, 0); MemObj memObj(&context, CL_MEM_OBJECT_BUFFER, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_USE_HOST_PTR, 0, 0), CL_MEM_USE_HOST_PTR, 0, 1, nullptr, nullptr, graphicsAllocation, true, false, false); auto result = sharingHandler.acquire(&memObj); EXPECT_NE(CL_SUCCESS, result); } compute-runtime-20.13.16352/opencl/test/unit_test/sharings/unified/000077500000000000000000000000001363734646600250735ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/sharings/unified/CMakeLists.txt000066400000000000000000000012231363734646600276310ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_sharings_unified ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/unified_sharing_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unified_sharing_fixtures.h ${CMAKE_CURRENT_SOURCE_DIR}/unified_sharing_image_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unified_sharing_mocks.h ${CMAKE_CURRENT_SOURCE_DIR}/unified_sharing_tests.cpp ) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_sharings_unified}) set_property(GLOBAL PROPERTY IGDRCL_SRCS_tests_sharings_unified ${IGDRCL_SRCS_tests_sharings_unified}) add_subdirectories() compute-runtime-20.13.16352/opencl/test/unit_test/sharings/unified/unified_sharing_buffer_tests.cpp000066400000000000000000000060171363734646600335140ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/unified/unified_buffer.h" #include "opencl/test/unit_test/sharings/unified/unified_sharing_fixtures.h" #include "opencl/test/unit_test/sharings/unified/unified_sharing_mocks.h" using namespace NEO; using UnifiedSharingBufferTestsWithMemoryManager = UnifiedSharingFixture; using UnifiedSharingBufferTestsWithInvalidMemoryManager = UnifiedSharingFixture; TEST_F(UnifiedSharingBufferTestsWithMemoryManager, givenUnifiedBufferThenItCanBeAcquiredAndReleased) { cl_mem_flags flags{}; UnifiedSharingMemoryDescription desc{}; desc.handle = reinterpret_cast(0x1234); desc.type = UnifiedSharingHandleType::Win32Nt; cl_int retVal{}; auto buffer = std::unique_ptr(UnifiedBuffer::createSharedUnifiedBuffer(context.get(), flags, desc, &retVal)); ASSERT_EQ(CL_SUCCESS, retVal); UnifiedSharingFunctions sharingFunctions; MockUnifiedBuffer *sharingHandler = new MockUnifiedBuffer(&sharingFunctions, desc.type); buffer->setSharingHandler(sharingHandler); ASSERT_EQ(0u, sharingHandler->acquireCount); ASSERT_EQ(CL_SUCCESS, sharingHandler->acquire(buffer.get())); EXPECT_EQ(1u, sharingHandler->acquireCount); ASSERT_EQ(CL_SUCCESS, sharingHandler->acquire(buffer.get())); EXPECT_EQ(2u, sharingHandler->acquireCount); sharingHandler->release(buffer.get()); EXPECT_EQ(1u, sharingHandler->acquireCount); sharingHandler->release(buffer.get()); EXPECT_EQ(0u, sharingHandler->acquireCount); } TEST_F(UnifiedSharingBufferTestsWithInvalidMemoryManager, givenValidContextAndAllocationFailsWhenCreatingBufferFromSharedHandleThenReturnInvalidMemObject) { cl_mem_flags flags{}; UnifiedSharingMemoryDescription desc{}; desc.handle = reinterpret_cast(0x1234); desc.type = UnifiedSharingHandleType::Win32Nt; cl_int retVal{}; auto buffer = std::unique_ptr(UnifiedBuffer::createSharedUnifiedBuffer(context.get(), flags, desc, &retVal)); ASSERT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(UnifiedSharingBufferTestsWithMemoryManager, givenUnsupportedHandleTypeWhenCreatingBufferFromSharedHandleThenReturnInvalidMemObject) { cl_mem_flags flags{}; cl_int retVal{}; UnifiedSharingMemoryDescription desc{}; desc.handle = reinterpret_cast(0x1234); auto buffer = std::unique_ptr(UnifiedBuffer::createSharedUnifiedBuffer(context.get(), flags, desc, &retVal)); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(UnifiedSharingBufferTestsWithMemoryManager, givenValidContextAndMemoryManagerWhenCreatingBufferFromSharedHandleThenReturnSuccess) { cl_mem_flags flags{}; UnifiedSharingMemoryDescription desc{}; desc.handle = reinterpret_cast(0x1234); desc.type = UnifiedSharingHandleType::Win32Nt; cl_int retVal{}; auto buffer = std::unique_ptr(UnifiedBuffer::createSharedUnifiedBuffer(context.get(), flags, desc, &retVal)); ASSERT_EQ(CL_SUCCESS, retVal); } compute-runtime-20.13.16352/opencl/test/unit_test/sharings/unified/unified_sharing_fixtures.h000066400000000000000000000072001363734646600323320ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/unified/unified_sharing_types.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_gmm.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "test.h" namespace NEO { template struct UnifiedSharingContextFixture : ::testing::Test { void SetUp() override { device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); cl_device_id deviceId = device.get(); deviceVector = std::make_unique(&deviceId, 1); if (validContext) { context = createValidContext(); } else { context = createInvalidContext(); } } std::unique_ptr createContext(const cl_context_properties *contextProperties) { cl_int retVal{}; auto context = std::unique_ptr(Context::create(contextProperties, *deviceVector, nullptr, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); return context; } std::unique_ptr createValidContext() { const cl_context_properties contextProperties[] = { static_cast(UnifiedSharingContextType::DeviceHandle), 0, CL_CONTEXT_INTEROP_USER_SYNC, 1, 0}; return createContext(contextProperties); } std::unique_ptr createInvalidContext() { return createContext(nullptr); } std::unique_ptr device; std::unique_ptr deviceVector; std::unique_ptr context; }; template struct UnifiedSharingMockMemoryManager : MockMemoryManager { GraphicsAllocation *createGraphicsAllocationFromNTHandle(void *handle, uint32_t rootDeviceIndex) override { if (!validMemoryManager) { return nullptr; } auto graphicsAllocation = createMemoryAllocation(GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY, nullptr, reinterpret_cast(1), 1, 4096u, reinterpret_cast(handle), MemoryPool::SystemCpuInaccessible, rootDeviceIndex, false, false, false); graphicsAllocation->setSharedHandle(static_cast(reinterpret_cast(handle))); graphicsAllocation->set32BitAllocation(false); graphicsAllocation->setDefaultGmm(new MockGmm()); return graphicsAllocation; } }; template struct UnifiedSharingFixture : UnifiedSharingContextFixture { void SetUp() override { UnifiedSharingContextFixture::SetUp(); this->memoryManager = std::make_unique>(); this->memoryManagerBackup = std::make_unique>(&this->context->memoryManager, this->memoryManager.get()); } void TearDown() override { UnifiedSharingContextFixture::TearDown(); } std::unique_ptr> memoryManager; std::unique_ptr> memoryManagerBackup; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/sharings/unified/unified_sharing_image_tests.cpp000066400000000000000000000200301363734646600333140ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm.h" #include "opencl/source/mem_obj/image.h" #include "opencl/source/sharings/unified/unified_image.h" #include "opencl/test/unit_test/helpers/raii_hw_helper.h" #include "opencl/test/unit_test/mocks/mock_gmm_resource_info.h" #include "opencl/test/unit_test/sharings/unified/unified_sharing_fixtures.h" #include "opencl/test/unit_test/sharings/unified/unified_sharing_mocks.h" using namespace NEO; using UnifiedSharingImageTestsWithMemoryManager = UnifiedSharingFixture; using UnifiedSharingImageTestsWithInvalidMemoryManager = UnifiedSharingFixture; static UnifiedSharingMemoryDescription getValidUnifiedSharingDesc() { UnifiedSharingMemoryDescription desc{}; desc.handle = reinterpret_cast(0x1234); desc.type = UnifiedSharingHandleType::Win32Nt; return desc; } static cl_image_format getValidImageFormat() { cl_image_format format{}; format.image_channel_data_type = CL_UNORM_INT8; format.image_channel_order = CL_RGBA; return format; } static cl_image_desc getValidImageDesc() { cl_image_desc imageDesc{}; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = 128; imageDesc.image_height = 128; imageDesc.image_depth = 1; imageDesc.image_array_size = 1; imageDesc.image_row_pitch = 256; imageDesc.image_slice_pitch = 0u; imageDesc.num_mip_levels = 1; imageDesc.num_samples = 0; imageDesc.buffer = nullptr; return imageDesc; } TEST_F(UnifiedSharingImageTestsWithInvalidMemoryManager, givenValidContextAndAllocationFailsWhenCreatingImageFromSharedHandleThenReturnInvalidMemObject) { cl_mem_flags flags{}; cl_int retVal{}; const auto format = getValidImageFormat(); const auto imageDesc = getValidImageDesc(); auto image = std::unique_ptr(UnifiedImage::createSharedUnifiedImage(context.get(), flags, getValidUnifiedSharingDesc(), &format, &imageDesc, &retVal)); ASSERT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(UnifiedSharingImageTestsWithMemoryManager, givenUnsupportedHandleTypeWhenCreatingImageFromSharedHandleThenReturnInvalidMemObject) { cl_mem_flags flags{}; cl_int retVal{}; UnifiedSharingMemoryDescription desc{}; desc.handle = reinterpret_cast(0x1234); const auto format = getValidImageFormat(); const auto imageDesc = getValidImageDesc(); auto image = std::unique_ptr(UnifiedImage::createSharedUnifiedImage(context.get(), flags, desc, &format, &imageDesc, &retVal)); EXPECT_EQ(CL_INVALID_MEM_OBJECT, retVal); } TEST_F(UnifiedSharingImageTestsWithMemoryManager, givenValidContextAndMemoryManagerWhenCreatingImageFromSharedHandleThenReturnSuccess) { cl_mem_flags flags{}; cl_int retVal{}; const auto format = getValidImageFormat(); const auto imageDesc = getValidImageDesc(); auto image = std::unique_ptr(UnifiedImage::createSharedUnifiedImage(context.get(), flags, getValidUnifiedSharingDesc(), &format, &imageDesc, &retVal)); ASSERT_EQ(CL_SUCCESS, retVal); } TEST_F(UnifiedSharingImageTestsWithMemoryManager, givenPassedFormatWhenCreatingUnifiedImageThenFormatIsCorrectlySetInImageObject) { cl_image_format format{}; format.image_channel_data_type = CL_HALF_FLOAT; format.image_channel_order = CL_RG; cl_mem_flags flags{}; cl_int retVal{}; const auto imageDesc = getValidImageDesc(); auto image = std::unique_ptr(UnifiedImage::createSharedUnifiedImage(context.get(), flags, getValidUnifiedSharingDesc(), &format, &imageDesc, &retVal)); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(GMM_FORMAT_R16G16_FLOAT_TYPE, image->getSurfaceFormatInfo().surfaceFormat.GMMSurfaceFormat); EXPECT_EQ(GFX3DSTATE_SURFACEFORMAT_R16G16_FLOAT, image->getSurfaceFormatInfo().surfaceFormat.GenxSurfaceFormat); } template class MockHwHelper : public HwHelperHw { public: bool isPageTableManagerSupported(const HardwareInfo &hwInfo) const override { return pageTableManagerSupported; } }; struct MemoryManagerReturningCompressedAllocations : UnifiedSharingMockMemoryManager { GraphicsAllocation *createGraphicsAllocationFromNTHandle(void *handle, uint32_t rootDeviceIndex) override { auto allocation = UnifiedSharingMockMemoryManager::createGraphicsAllocationFromNTHandle(handle, rootDeviceIndex); auto gmm = allocation->getDefaultGmm(); auto mockGmmResourceInfo = std::make_unique(gmm->gmmResourceInfo->peekHandle()); mockGmmResourceInfo->setUnifiedAuxTranslationCapable(); gmm->gmmResourceInfo = std::move(mockGmmResourceInfo); return allocation; } bool mapAuxGpuVA(GraphicsAllocation *graphicsAllocation) override { calledMapAuxGpuVA++; return resultOfMapAuxGpuVA; } unsigned int calledMapAuxGpuVA{}; bool resultOfMapAuxGpuVA{}; }; HWTEST_F(UnifiedSharingImageTestsWithMemoryManager, givenCompressedImageAndNoPageTableManagerWhenCreatingUnifiedImageThenSetCorrespondingFieldInGmmAndDoNotUsePageTableManager) { MemoryManagerReturningCompressedAllocations memoryManager{}; VariableBackup memoryManagerBackup{&this->context->memoryManager, &memoryManager}; using HwHelperNotSupportingPageTableManager = MockHwHelper; RAIIHwHelperFactory hwHelperBackup{this->context->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily}; cl_mem_flags flags{}; cl_int retVal{}; const auto format = getValidImageFormat(); const auto imageDesc = getValidImageDesc(); auto image = std::unique_ptr(UnifiedImage::createSharedUnifiedImage(context.get(), flags, getValidUnifiedSharingDesc(), &format, &imageDesc, &retVal)); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(image->getGraphicsAllocation()->getDefaultGmm()->isRenderCompressed); EXPECT_EQ(0u, memoryManager.calledMapAuxGpuVA); } HWTEST_F(UnifiedSharingImageTestsWithMemoryManager, givenCompressedImageAndPageTableManagerWhenCreatingUnifiedImageThenSetCorrespondingFieldInGmmBasedOnAuxGpuVaMappingResult) { MemoryManagerReturningCompressedAllocations memoryManager{}; VariableBackup memoryManagerBackup{&this->context->memoryManager, &memoryManager}; using HwHelperSupportingPageTableManager = MockHwHelper; RAIIHwHelperFactory hwHelperBackup{this->context->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily}; cl_mem_flags flags{}; cl_int retVal{}; const auto format = getValidImageFormat(); const auto imageDesc = getValidImageDesc(); memoryManager.resultOfMapAuxGpuVA = true; auto image = std::unique_ptr(UnifiedImage::createSharedUnifiedImage(context.get(), flags, getValidUnifiedSharingDesc(), &format, &imageDesc, &retVal)); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(memoryManager.resultOfMapAuxGpuVA, image->getGraphicsAllocation()->getDefaultGmm()->isRenderCompressed); EXPECT_EQ(1u, memoryManager.calledMapAuxGpuVA); memoryManager.resultOfMapAuxGpuVA = false; image = std::unique_ptr(UnifiedImage::createSharedUnifiedImage(context.get(), flags, getValidUnifiedSharingDesc(), &format, &imageDesc, &retVal)); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(memoryManager.resultOfMapAuxGpuVA, image->getGraphicsAllocation()->getDefaultGmm()->isRenderCompressed); EXPECT_EQ(2u, memoryManager.calledMapAuxGpuVA); } compute-runtime-20.13.16352/opencl/test/unit_test/sharings/unified/unified_sharing_mocks.h000066400000000000000000000005071363734646600316000ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "opencl/source/sharings/unified/unified_buffer.h" namespace NEO { struct MockUnifiedBuffer : UnifiedBuffer { using UnifiedBuffer::acquireCount; using UnifiedBuffer::UnifiedBuffer; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/sharings/unified/unified_sharing_tests.cpp000066400000000000000000000257421363734646600321710ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/unified/enable_unified.h" #include "opencl/source/sharings/unified/unified_buffer.h" #include "opencl/source/sharings/unified/unified_sharing.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "opencl/test/unit_test/sharings/unified/unified_sharing_fixtures.h" #include "test.h" using namespace NEO; TEST(UnifiedSharingTests, givenContextCreatedWithExternalDeviceHandlePropertyWhenGettingUnifiedSharingThenReturnIt) { MockClDevice device{MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())}; cl_device_id deviceId = &device; ClDeviceVector allDevs(&deviceId, 1); cl_int retVal{}; const cl_context_properties context_props[] = { static_cast(UnifiedSharingContextType::DeviceHandle), 0, CL_CONTEXT_INTEROP_USER_SYNC, 1, 0}; auto context = std::unique_ptr(Context::create(context_props, allDevs, nullptr, nullptr, retVal)); auto sharingFunctions = context->getSharing(); EXPECT_NE(nullptr, sharingFunctions); } struct MockUnifiedSharingContextBuilder : UnifiedSharingContextBuilder { using UnifiedSharingContextBuilder::contextData; }; TEST(UnifiedSharingTests, givenExternalDeviceHandleWhenProcessingBySharingContextBuilderThenResultIsTrue) { MockUnifiedSharingContextBuilder builder{}; cl_context_properties propertyType = static_cast(UnifiedSharingContextType::DeviceHandle); cl_context_properties propertyValue = 0x1234; cl_int retVal{}; bool result = builder.processProperties(propertyType, propertyValue, retVal); EXPECT_TRUE(result); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, builder.contextData); } TEST(UnifiedSharingTests, givenExternalDeviceGroupHandleWhenProcessingBySharingContextBuilderThenResultIsTrue) { MockUnifiedSharingContextBuilder builder{}; cl_context_properties propertyType = static_cast(UnifiedSharingContextType::DeviceGroup); cl_context_properties propertyValue = 0x1234; cl_int retVal{}; bool result = builder.processProperties(propertyType, propertyValue, retVal); EXPECT_TRUE(result); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, builder.contextData); } TEST(UnifiedSharingTests, givenExternalDeviceGroupHandleWhenProcessingBySharingContextBuilderThenReturnSuccess) { MockUnifiedSharingContextBuilder builder{}; cl_context_properties propertyType = CL_CONTEXT_PLATFORM; cl_context_properties propertyValue = 0x1234; cl_int retVal{}; bool result = builder.processProperties(propertyType, propertyValue, retVal); EXPECT_FALSE(result); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, builder.contextData); } TEST(UnifiedSharingTests, givenContextWithUserSyncWhenFinalizingPropertiesBySharingContextBuilderThenRegisterSharingInContextAndClearContextData) { MockUnifiedSharingContextBuilder builder{}; builder.contextData = std::make_unique(); MockContext context{}; context.setInteropUserSyncEnabled(true); cl_int retVal{}; bool result = builder.finalizeProperties(context, retVal); EXPECT_TRUE(result); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, context.sharingFunctions[UnifiedSharingFunctions::sharingId]); EXPECT_EQ(nullptr, builder.contextData); } TEST(UnifiedSharingTests, givenContextWithoutUserSyncWhenFinalizingPropertiesBySharingContextBuilderThenDoNotRegisterSharingInContextAndClearContextData) { MockUnifiedSharingContextBuilder builder{}; builder.contextData = std::make_unique(); MockContext context{}; context.setInteropUserSyncEnabled(false); cl_int retVal{}; bool result = builder.finalizeProperties(context, retVal); EXPECT_TRUE(result); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, context.sharingFunctions[UnifiedSharingFunctions::sharingId]); EXPECT_EQ(nullptr, builder.contextData); } TEST(UnifiedSharingTests, givenBuilderWithoutContextDataWhenFinalizingPropertiesBySharingContextBuilderThenDoNotRegisterSharingInContext) { MockUnifiedSharingContextBuilder builder{}; MockContext context{}; cl_int retVal{}; bool result = builder.finalizeProperties(context, retVal); EXPECT_TRUE(result); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, context.sharingFunctions[UnifiedSharingFunctions::sharingId]); EXPECT_EQ(nullptr, builder.contextData); } TEST(UnifiedSharingTests, givenSharingHandlerThenItReturnsCorrectValues) { UnifiedSharingFunctions sharingFunctions; EXPECT_EQ(UnifiedSharingFunctions::sharingId, sharingFunctions.getId()); UnifiedSharing sharingHandler{&sharingFunctions, UnifiedSharingHandleType::LinuxFd}; EXPECT_EQ(&sharingFunctions, sharingHandler.peekFunctionsHandler()); EXPECT_EQ(UnifiedSharingHandleType::LinuxFd, sharingHandler.getExternalMemoryType()); } using UnifiedSharingTestsWithMemoryManager = UnifiedSharingFixture; TEST_F(UnifiedSharingTestsWithMemoryManager, givenUnifiedSharingHandlerWhenAcquiringAndReleasingThenMethodsAreCalledAppropriately) { struct MockSharingHandler : UnifiedSharing { using UnifiedSharing::UnifiedSharing; unsigned int synchronizeObjectCalled = 0u; unsigned int releaseResourceCalled = 0u; void synchronizeObject(UpdateData &updateData) override { UnifiedSharing::synchronizeObject(updateData); synchronizeObjectCalled++; } void releaseResource(MemObj *memObject) override { UnifiedSharing::releaseResource(memObject); releaseResourceCalled++; }; }; cl_mem_flags flags{}; UnifiedSharingMemoryDescription desc{}; desc.handle = reinterpret_cast(0x1234); desc.type = UnifiedSharingHandleType::Win32Nt; cl_int retVal{}; auto buffer = std::unique_ptr(UnifiedBuffer::createSharedUnifiedBuffer(context.get(), flags, desc, &retVal)); ASSERT_EQ(CL_SUCCESS, retVal); UnifiedSharingFunctions sharingFunctions; MockSharingHandler *sharingHandler = new MockSharingHandler(&sharingFunctions, desc.type); buffer->setSharingHandler(sharingHandler); ASSERT_EQ(0u, sharingHandler->synchronizeObjectCalled); ASSERT_EQ(CL_SUCCESS, sharingHandler->acquire(buffer.get())); EXPECT_EQ(1u, sharingHandler->synchronizeObjectCalled); ASSERT_EQ(CL_SUCCESS, sharingHandler->acquire(buffer.get())); EXPECT_EQ(1u, sharingHandler->synchronizeObjectCalled); ASSERT_EQ(0u, sharingHandler->releaseResourceCalled); sharingHandler->release(buffer.get()); EXPECT_EQ(0u, sharingHandler->releaseResourceCalled); sharingHandler->release(buffer.get()); EXPECT_EQ(1u, sharingHandler->releaseResourceCalled); } struct UnifiedSharingCreateAllocationTests : UnifiedSharingTestsWithMemoryManager { struct MemoryManagerCheckingAllocationMethod : MockMemoryManager { using MockMemoryManager::MockMemoryManager; GraphicsAllocation *createGraphicsAllocationFromNTHandle(void *handle, uint32_t rootDeviceIndex) override { this->createFromNTHandleCalled = true; this->handle = toOsHandle(handle); return nullptr; } GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness) override { this->createFromSharedHandleCalled = true; this->handle = handle; this->properties = std::make_unique(properties); return nullptr; } bool createFromNTHandleCalled = false; bool createFromSharedHandleCalled = false; osHandle handle; std::unique_ptr properties; }; struct MockSharingHandler : UnifiedSharing { using UnifiedSharing::createGraphicsAllocation; }; void SetUp() override { UnifiedSharingTestsWithMemoryManager::SetUp(); this->memoryManager = std::make_unique(); this->memoryManagerBackup = std::make_unique>(&this->context->memoryManager, this->memoryManager.get()); } std::unique_ptr memoryManager; std::unique_ptr> memoryManagerBackup; }; TEST_F(UnifiedSharingCreateAllocationTests, givenWindowsNtHandleWhenCreateGraphicsAllocationIsCalledThenUseNtHandleMethod) { UnifiedSharingMemoryDescription desc{}; desc.handle = reinterpret_cast(0x1234); desc.type = UnifiedSharingHandleType::Win32Nt; GraphicsAllocation::AllocationType allocationType = GraphicsAllocation::AllocationType::SHARED_IMAGE; MockSharingHandler::createGraphicsAllocation(this->context.get(), desc, allocationType); EXPECT_TRUE(memoryManager->createFromNTHandleCalled); EXPECT_FALSE(memoryManager->createFromSharedHandleCalled); EXPECT_EQ(toOsHandle(desc.handle), memoryManager->handle); } TEST_F(UnifiedSharingCreateAllocationTests, givenWindowsSharedHandleWhenCreateGraphicsAllocationIsCalledThenUseSharedHandleMethod) { UnifiedSharingMemoryDescription desc{}; desc.handle = reinterpret_cast(0x1234); desc.type = UnifiedSharingHandleType::Win32Shared; GraphicsAllocation::AllocationType allocationType = GraphicsAllocation::AllocationType::SHARED_IMAGE; MockSharingHandler::createGraphicsAllocation(this->context.get(), desc, allocationType); EXPECT_FALSE(memoryManager->createFromNTHandleCalled); EXPECT_TRUE(memoryManager->createFromSharedHandleCalled); EXPECT_EQ(toOsHandle(desc.handle), memoryManager->handle); const AllocationProperties expectedProperties{0u, false, 0u, allocationType, false}; EXPECT_EQ(expectedProperties.allFlags, memoryManager->properties->allFlags); } TEST_F(UnifiedSharingCreateAllocationTests, givenLinuxSharedHandleWhenCreateGraphicsAllocationIsCalledThenUseSharedHandleMethod) { UnifiedSharingMemoryDescription desc{}; desc.handle = reinterpret_cast(0x1234); desc.type = UnifiedSharingHandleType::LinuxFd; GraphicsAllocation::AllocationType allocationType = GraphicsAllocation::AllocationType::SHARED_IMAGE; MockSharingHandler::createGraphicsAllocation(this->context.get(), desc, allocationType); EXPECT_FALSE(memoryManager->createFromNTHandleCalled); EXPECT_TRUE(memoryManager->createFromSharedHandleCalled); EXPECT_EQ(toOsHandle(desc.handle), memoryManager->handle); const AllocationProperties expectedProperties{0u, false, 0u, allocationType, false}; EXPECT_EQ(expectedProperties.allFlags, memoryManager->properties->allFlags); } compute-runtime-20.13.16352/opencl/test/unit_test/sharings/va/000077500000000000000000000000001363734646600240565ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/sharings/va/CMakeLists.txt000066400000000000000000000021111363734646600266110ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_sharings_va ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cl_create_from_va_media_surface_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_acquire_va_media_surfaces_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_enqueue_release_va_media_surfaces_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_get_extension_function_address_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/context_va_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_va_image_arg_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_va_sharing.h ${CMAKE_CURRENT_SOURCE_DIR}/va_base_object_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/va_sharing_linux_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/va_sharing_tests.cpp ) set_property(GLOBAL PROPERTY IGDRCL_SRCS_tests_sharings_va ${IGDRCL_SRCS_tests_sharings_va}) if(NEO__LIBVA_FOUND) list(APPEND IGDRCL_SRCS_tests_sharings_va ${CMAKE_CURRENT_SOURCE_DIR}/va_sharing_enable_tests.cpp) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_sharings_va}) endif(NEO__LIBVA_FOUND) cl_create_from_va_media_surface_tests.cpp000066400000000000000000000011031363734646600342210ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/sharings/va/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" #include using namespace NEO; typedef api_tests clCreateFromVaMediaSurfaceTests; namespace ULT { TEST_F(clCreateFromVaMediaSurfaceTests, givenNullContextWhenCreateIsCalledThenErrorIsReturned) { auto memObj = clCreateFromVA_APIMediaSurfaceINTEL(nullptr, CL_MEM_READ_WRITE, nullptr, 0, &retVal); EXPECT_EQ(nullptr, memObj); EXPECT_EQ(CL_INVALID_CONTEXT, retVal); } } // namespace ULT cl_enqueue_acquire_va_media_surfaces_tests.cpp000066400000000000000000000011001363734646600352730ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/sharings/va/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" #include using namespace NEO; typedef api_tests clEnqueueAcquireVaMediaSurfacesTests; namespace ULT { TEST_F(clEnqueueAcquireVaMediaSurfacesTests, givenNullCommandQueueWhenAcquireIsCalledThenInvalidCommandQueueIsReturned) { retVal = clEnqueueAcquireVA_APIMediaSurfacesINTEL(nullptr, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_INVALID_COMMAND_QUEUE); } } // namespace ULT cl_enqueue_release_va_media_surfaces_tests.cpp000066400000000000000000000011071363734646600352710ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/sharings/va/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/api/cl_api_tests.h" #include using namespace NEO; typedef api_tests clEnqueueReleaseVaMediaSurfacesTests; namespace ULT { TEST_F(clEnqueueReleaseVaMediaSurfacesTests, givenNullCommandQueueWhenReleaseObjectsIsCalledThenInvalidCommandQueueIsReturned) { retVal = clEnqueueReleaseVA_APIMediaSurfacesINTEL(nullptr, 0, nullptr, 0, nullptr, nullptr); EXPECT_EQ(retVal, CL_INVALID_COMMAND_QUEUE); } } // namespace ULT cl_get_extension_function_address_tests.cpp000066400000000000000000000035131363734646600346720ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/sharings/va/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/sharings/va/cl_va_api.h" #include "opencl/test/unit_test/api/cl_api_tests.h" using namespace NEO; typedef api_tests clGetExtensionFunctionAddressTests; namespace ULT { TEST_F(clGetExtensionFunctionAddressTests, clCreateFromVaMediaSurfaceINTEL) { auto retVal = clGetExtensionFunctionAddress("clCreateFromVA_APIMediaSurfaceINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clCreateFromVA_APIMediaSurfaceINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, clEnqueueAcquireVA_APIMediaSurfacesINTEL) { auto retVal = clGetExtensionFunctionAddress("clEnqueueAcquireVA_APIMediaSurfacesINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clEnqueueAcquireVA_APIMediaSurfacesINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, clEnqueueReleaseVA_APIMediaSurfacesINTEL) { auto retVal = clGetExtensionFunctionAddress("clEnqueueReleaseVA_APIMediaSurfacesINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clEnqueueReleaseVA_APIMediaSurfacesINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, clGetDeviceIDsFromVA_APIMediaAdapterINTEL) { auto retVal = clGetExtensionFunctionAddress("clGetDeviceIDsFromVA_APIMediaAdapterINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clGetDeviceIDsFromVA_APIMediaAdapterINTEL)); } TEST_F(clGetExtensionFunctionAddressTests, givenEnabledFormatQueryWhenGettingFuncionAddressThenCorrectAddressIsReturned) { DebugManagerStateRestore restorer; DebugManager.flags.EnableFormatQuery.set(true); auto retVal = clGetExtensionFunctionAddress("clGetSupportedVA_APIMediaSurfaceFormatsINTEL"); EXPECT_EQ(retVal, reinterpret_cast(clGetSupportedVA_APIMediaSurfaceFormatsINTEL)); } } // namespace ULT compute-runtime-20.13.16352/opencl/test/unit_test/sharings/va/context_va_tests.cpp000066400000000000000000000043471363734646600301660ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/sharings/va/va_sharing.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gtest/gtest.h" using namespace NEO; struct VAContextTest : public PlatformFixture, public ::testing::Test { using PlatformFixture::SetUp; VAContextTest() { } void SetUp() override { PlatformFixture::SetUp(); cl_platform_id platform = pPlatform; properties = new cl_context_properties[3]; properties[0] = CL_CONTEXT_PLATFORM; properties[1] = (cl_context_properties)platform; properties[2] = 0; context = Context::create(properties, ClDeviceVector(devices, num_devices), nullptr, nullptr, retVal); ASSERT_NE(nullptr, context); } void TearDown() override { delete[] properties; delete context; PlatformFixture::TearDown(); } cl_int retVal = CL_SUCCESS; Context *context = nullptr; cl_context_properties *properties = nullptr; }; TEST_F(VAContextTest, sharingAreNotPresentByDefault) { ASSERT_EQ(context->getSharing(), nullptr); } TEST_F(VAContextTest, GivenVaContextParamWhenCreateContextThenReturnError) { cl_device_id deviceID = devices[0]; auto pPlatform = NEO::platform(); cl_platform_id pid[1]; pid[0] = pPlatform; DebugManagerStateRestore dbgRestorer; DebugManager.flags.EnableVaLibCalls.set(false); // avoid libva calls on initialization cl_context_properties validProperties[5] = {CL_CONTEXT_PLATFORM, (cl_context_properties)pid[0], CL_CONTEXT_VA_API_DISPLAY_INTEL, 0x10000, 0}; cl_int retVal = CL_SUCCESS; auto ctx = Context::create(validProperties, ClDeviceVector(&deviceID, 1), nullptr, nullptr, retVal); // not supported by default // use MockVaSharing to test va-sharing functionality EXPECT_EQ(CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL, retVal); EXPECT_EQ(nullptr, ctx); } compute-runtime-20.13.16352/opencl/test/unit_test/sharings/va/kernel_va_image_arg_tests.cpp000066400000000000000000000023631363734646600317510ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/va/va_surface.h" #include "opencl/test/unit_test/fixtures/kernel_arg_fixture.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/sharings/va/mock_va_sharing.h" #include "test.h" #include "gtest/gtest.h" using namespace NEO; TEST_F(KernelImageArgTest, givenSharedImageWhenSetArgIsCalledThenReportSharedObjUsage) { MockVaSharing vaSharing; VASurfaceID vaSurfaceId = 0u; vaSharing.updateAcquiredHandle(1u); std::unique_ptr sharedImage(VASurface::createSharedVaSurface(context.get(), &vaSharing.sharingFunctions, CL_MEM_READ_WRITE, &vaSurfaceId, 0, nullptr)); auto sharedMem = static_cast(sharedImage.get()); auto nonSharedMem = static_cast(image.get()); EXPECT_FALSE(pKernel->isUsingSharedObjArgs()); this->pKernel->setArg(0, sizeof(cl_mem *), &nonSharedMem); EXPECT_FALSE(pKernel->isUsingSharedObjArgs()); this->pKernel->setArg(0, sizeof(cl_mem *), &sharedMem); EXPECT_TRUE(pKernel->isUsingSharedObjArgs()); } compute-runtime-20.13.16352/opencl/test/unit_test/sharings/va/mock_va_sharing.h000066400000000000000000000066251363734646600273720ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/aligned_memory.h" #include "opencl/source/sharings/va/va_sharing.h" namespace NEO { class VASharingFunctionsMock : public VASharingFunctions { public: using VASharingFunctions::supportedFormats; VAImage mockVaImage = {}; int32_t derivedImageFormatFourCC = VA_FOURCC_NV12; int32_t derivedImageFormatBpp = 8; uint16_t derivedImageHeight = 256; uint16_t derivedImageWidth = 256; VAStatus queryImageFormatsReturnStatus = VA_STATUS_SUCCESS; bool isValidDisplayCalled = false; bool deriveImageCalled = false; bool destroyImageCalled = false; bool syncSurfaceCalled = false; bool extGetSurfaceHandleCalled = false; osHandle acquiredVaHandle = 0; VASharingFunctionsMock(VADisplay vaDisplay) : VASharingFunctions(vaDisplay) {} VASharingFunctionsMock() : VASharingFunctionsMock(nullptr){}; VAStatus deriveImage(VASurfaceID vaSurface, VAImage *vaImage) override { deriveImageCalled = true; uint32_t pitch; vaImage->height = derivedImageHeight; vaImage->width = derivedImageWidth; pitch = alignUp(derivedImageWidth, 128); vaImage->offsets[1] = alignUp(vaImage->height, 32) * pitch; vaImage->offsets[2] = vaImage->offsets[1] + 1; vaImage->pitches[0] = pitch; vaImage->pitches[1] = pitch; vaImage->pitches[2] = pitch; vaImage->format.fourcc = derivedImageFormatFourCC; vaImage->format.bits_per_pixel = derivedImageFormatBpp; mockVaImage.width = vaImage->width; mockVaImage.height = vaImage->height; return VA_STATUS_SUCCESS; } bool isValidVaDisplay() override { isValidDisplayCalled = true; return 1; } VAStatus destroyImage(VAImageID vaImageId) override { destroyImageCalled = true; return VA_STATUS_SUCCESS; } VAStatus extGetSurfaceHandle(VASurfaceID *vaSurface, unsigned int *handleId) override { extGetSurfaceHandleCalled = true; *handleId = acquiredVaHandle; return VA_STATUS_SUCCESS; } VAStatus syncSurface(VASurfaceID vaSurface) override { syncSurfaceCalled = true; return VA_STATUS_SUCCESS; } VAStatus queryImageFormats(VADisplay vaDisplay, VAImageFormat *formatList, int *numFormats) override { if (queryImageFormatsReturnStatus != VA_STATUS_SUCCESS) { return queryImageFormatsReturnStatus; } if (numFormats) { *numFormats = 2; } if (formatList) { formatList[0].fourcc = VA_FOURCC_NV12; formatList[0].bits_per_pixel = 12; formatList[0].byte_order = VA_LSB_FIRST; formatList[1].fourcc = VA_FOURCC_P010; formatList[1].bits_per_pixel = 24; formatList[1].byte_order = VA_LSB_FIRST; } return VA_STATUS_SUCCESS; } int maxNumImageFormats(VADisplay vaDisplay) override { return 2; } }; class MockVaSharing { public: void updateAcquiredHandle() { sharingFunctions.acquiredVaHandle = sharingHandle; } void updateAcquiredHandle(unsigned int handle) { sharingHandle = handle; sharingFunctions.acquiredVaHandle = sharingHandle; } VASharingFunctionsMock sharingFunctions; osHandle sharingHandle = 0; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/sharings/va/va_base_object_tests.cpp000066400000000000000000000040531363734646600307340ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/accelerators/intel_accelerator.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/device_queue/device_queue.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/sharings/sharing_factory.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "gtest/gtest.h" namespace NEO { template struct VABaseObjectTests : public ::testing::Test { void SetUp() override { } void TearDown() override { } }; typedef ::testing::Types< MockPlatform, IntelAccelerator, //Context, //Program, //Kernel, //Sampler //others... MockCommandQueue, DeviceQueue> BaseObjectTypes; TYPED_TEST_CASE(VABaseObjectTests, BaseObjectTypes); TYPED_TEST(VABaseObjectTests, commonRuntimeExpectsDispatchTableAtFirstPointerInObject) { TypeParam objectDrv; // Automatic downcasting to _cl_type *. typename TypeParam::BaseType *objectCL = &objectDrv; sharingFactory.fillGlobalDispatchTable(); // Common runtime casts to generic type assuming // the dispatch table is the first ptr in the structure auto genericObject = reinterpret_cast(objectCL); EXPECT_EQ(reinterpret_cast(clCreateFromVA_APIMediaSurfaceINTEL), genericObject->dispatch.crtDispatch->clCreateFromVA_APIMediaSurfaceINTEL); EXPECT_EQ(reinterpret_cast(clEnqueueAcquireVA_APIMediaSurfacesINTEL), genericObject->dispatch.crtDispatch->clEnqueueAcquireVA_APIMediaSurfacesINTEL); EXPECT_EQ(reinterpret_cast(clEnqueueReleaseVA_APIMediaSurfacesINTEL), genericObject->dispatch.crtDispatch->clEnqueueReleaseVA_APIMediaSurfacesINTEL); EXPECT_EQ(reinterpret_cast(clGetDeviceIDsFromVA_APIMediaAdapterINTEL), genericObject->dispatch.crtDispatch->clGetDeviceIDsFromVA_APIMediaAdapterINTEL); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/sharings/va/va_sharing_enable_tests.cpp000066400000000000000000000160671363734646600314450ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/sharings/va/enable_va.h" #include "opencl/source/sharings/va/va_sharing_functions.h" #include "opencl/test/unit_test/fixtures/memory_management_fixture.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "gtest/gtest.h" using namespace NEO; static int vaDisplayIsValidRet = 1; extern "C" int vaDisplayIsValid(VADisplay vaDisplay) { return vaDisplayIsValidRet; } class VaSharingEnablerTests : public MemoryManagementFixture, public ::testing::Test { public: void SetUp() override { MemoryManagementFixture::SetUp(); factory.reset(new VaSharingBuilderFactory()); ASSERT_NE(nullptr, factory.get()); } void TearDown() override { factory.reset(nullptr); MemoryManagementFixture::TearDown(); } std::unique_ptr factory; }; TEST_F(VaSharingEnablerTests, givenVaFactoryWhenNoLibVaThenNoExtensionIsReturned) { // hijack dlopen function VariableBackup> bkp(&VASharingFunctions::fdlopen); bkp = [&](const char *filename, int flag) -> void * { // no libva in system return nullptr; }; auto ext = factory->getExtensions(); EXPECT_EQ(0u, ext.length()); EXPECT_STREQ("", ext.c_str()); } TEST_F(VaSharingEnablerTests, givenVaFactoryWhenLibVaAvailableThenExtensionIsReturned) { VariableBackup> bkpOpen(&VASharingFunctions::fdlopen); bkpOpen = [&](const char *filename, int flag) -> void * { return this; }; VariableBackup> bkpClose(&VASharingFunctions::fdlclose); bkpClose = [&](void *handle) -> int { return 0; }; VariableBackup> bkpSym(&VASharingFunctions::fdlsym); bkpSym = [&](void *handle, const char *symbol) -> void * { return nullptr; }; auto ext = factory->getExtensions(); EXPECT_STREQ("cl_intel_va_api_media_sharing ", ext.c_str()); } TEST_F(VaSharingEnablerTests, givenVaFactoryWhenAskedThenGlobalIcdIsConfigured) { class CrtRestore { public: CrtRestore() { crtSnapshot = crtGlobalDispatchTable; } ~CrtRestore() { crtGlobalDispatchTable = crtSnapshot; } decltype(crtGlobalDispatchTable) crtSnapshot; }; // we play with global table, so first save state then restore it with use of RAII CrtRestore crtRestore; crtGlobalDispatchTable.clCreateFromVA_APIMediaSurfaceINTEL = nullptr; crtGlobalDispatchTable.clEnqueueReleaseVA_APIMediaSurfacesINTEL = nullptr; crtGlobalDispatchTable.clEnqueueAcquireVA_APIMediaSurfacesINTEL = nullptr; crtGlobalDispatchTable.clGetDeviceIDsFromVA_APIMediaAdapterINTEL = nullptr; factory->fillGlobalDispatchTable(); EXPECT_NE(nullptr, crtGlobalDispatchTable.clCreateFromVA_APIMediaSurfaceINTEL); EXPECT_NE(nullptr, crtGlobalDispatchTable.clEnqueueAcquireVA_APIMediaSurfacesINTEL); EXPECT_NE(nullptr, crtGlobalDispatchTable.clEnqueueAcquireVA_APIMediaSurfacesINTEL); EXPECT_NE(nullptr, crtGlobalDispatchTable.clGetDeviceIDsFromVA_APIMediaAdapterINTEL); } TEST_F(VaSharingEnablerTests, givenVaFactoryWhenAskedThenBuilderIsCreated) { auto builder = factory->createContextBuilder(); EXPECT_NE(nullptr, builder); } TEST_F(VaSharingEnablerTests, givenVaBuilderWhenUnknownPropertyThenFalseIsReturnedAndErrcodeUnchanged) { auto builder = factory->createContextBuilder(); ASSERT_NE(nullptr, builder); cl_context_properties property = CL_CONTEXT_PLATFORM; cl_context_properties value; int32_t errcodeRet = CL_SUCCESS; auto res = builder->processProperties(property, value, errcodeRet); EXPECT_FALSE(res); EXPECT_EQ(CL_SUCCESS, errcodeRet); } TEST_F(VaSharingEnablerTests, givenVaBuilderWhenValidPropertyThenTrueIsReturnedAndErrcodeUnchanged) { auto builder = factory->createContextBuilder(); ASSERT_NE(nullptr, builder); cl_context_properties property = CL_CONTEXT_VA_API_DISPLAY_INTEL; cl_context_properties value = 0x1243; int32_t errcodeRet = CL_SUCCESS; auto res = builder->processProperties(property, value, errcodeRet); EXPECT_TRUE(res); EXPECT_EQ(CL_SUCCESS, errcodeRet); //repeat to check if we don't allocate twice auto prevAllocations = MemoryManagement::numAllocations.load(); res = builder->processProperties(property, value, errcodeRet); EXPECT_TRUE(res); EXPECT_EQ(CL_SUCCESS, errcodeRet); auto currAllocations = MemoryManagement::numAllocations.load(); EXPECT_EQ(prevAllocations, currAllocations); } TEST_F(VaSharingEnablerTests, givenVaBuilderWhenNoPropertiesThenFinalizerReturnsTrue) { auto builder = factory->createContextBuilder(); ASSERT_NE(nullptr, builder); MockContext context; int32_t errcodeRet = CL_SUCCESS; auto res = builder->finalizeProperties(context, errcodeRet); EXPECT_TRUE(res); EXPECT_EQ(CL_SUCCESS, errcodeRet); } TEST_F(VaSharingEnablerTests, givenVaBuilderWhenInvalidPropertiesThenFinalizerReturnsTrue) { auto builder = factory->createContextBuilder(); ASSERT_NE(nullptr, builder); cl_context_properties property = CL_CONTEXT_PLATFORM; cl_context_properties value; int32_t errcodeRet = CL_SUCCESS; auto res = builder->processProperties(property, value, errcodeRet); EXPECT_FALSE(res); EXPECT_EQ(CL_SUCCESS, errcodeRet); MockContext context; errcodeRet = CL_SUCCESS; res = builder->finalizeProperties(context, errcodeRet); EXPECT_TRUE(res); EXPECT_EQ(CL_SUCCESS, errcodeRet); } TEST_F(VaSharingEnablerTests, givenVaBuilderWhenValidPropertyButInvalidDisplayThenFinalizerReturnsFalseAndErrcode) { auto builder = factory->createContextBuilder(); ASSERT_NE(nullptr, builder); vaDisplayIsValidRet = 0; cl_context_properties property = CL_CONTEXT_VA_API_DISPLAY_INTEL; cl_context_properties value = 0x10000; int32_t errcodeRet = CL_SUCCESS; auto res = builder->processProperties(property, value, errcodeRet); EXPECT_TRUE(res); EXPECT_EQ(CL_SUCCESS, errcodeRet); MockContext context; errcodeRet = CL_SUCCESS; res = builder->finalizeProperties(context, errcodeRet); EXPECT_FALSE(res); EXPECT_EQ(CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL, errcodeRet); } TEST_F(VaSharingEnablerTests, givenVaBuilderWhenValidPropertyButValidDisplayThenFinalizerReturnsTrue) { auto builder = factory->createContextBuilder(); ASSERT_NE(nullptr, builder); vaDisplayIsValidRet = 1; cl_context_properties property = CL_CONTEXT_VA_API_DISPLAY_INTEL; cl_context_properties value = 0x10000; int32_t errcodeRet = CL_SUCCESS; auto res = builder->processProperties(property, value, errcodeRet); EXPECT_TRUE(res); EXPECT_EQ(CL_SUCCESS, errcodeRet); MockContext context; errcodeRet = CL_SUCCESS; res = builder->finalizeProperties(context, errcodeRet); EXPECT_TRUE(res); EXPECT_EQ(CL_SUCCESS, errcodeRet); } compute-runtime-20.13.16352/opencl/test/unit_test/sharings/va/va_sharing_linux_tests.cpp000066400000000000000000000130421363734646600313440ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/sharings/va/va_sharing_functions.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/sharings/va/mock_va_sharing.h" #include "gtest/gtest.h" #include #include using namespace NEO; class VASharingFunctionsTested : public VASharingFunctions { public: VASharingFunctionsTested() : VASharingFunctions(nullptr) {} bool wereFunctionsAssigned() const { return vaDisplayIsValidPFN != nullptr && vaDeriveImagePFN != nullptr && vaDestroyImagePFN != nullptr && vaSyncSurfacePFN != nullptr && vaGetLibFuncPFN != nullptr && vaExtGetSurfaceHandlePFN != nullptr; } bool wereFunctionsAssignedNull() const { return vaDisplayIsValidPFN == nullptr && vaDeriveImagePFN == nullptr && vaDestroyImagePFN == nullptr && vaSyncSurfacePFN == nullptr && vaGetLibFuncPFN == nullptr && vaExtGetSurfaceHandlePFN == nullptr; } }; TEST(VASharingFunctions, GivenInitFunctionsWhenDLOpenFailsThenFunctionsAreNull) { VariableBackup dlopenBackup(&VASharingFunctions::fdlopen); VariableBackup dlsymBackup(&VASharingFunctions::fdlsym); VariableBackup dlcloseBackup(&VASharingFunctions::fdlclose); VASharingFunctions::fdlopen = [&](const char *filename, int flag) -> void * { return nullptr; }; VASharingFunctions::fdlsym = [&](void *handle, const char *symbol) -> void * { return nullptr; }; VASharingFunctions::fdlclose = [&](void *handle) -> int { return 0; }; VASharingFunctionsTested functions; EXPECT_TRUE(functions.wereFunctionsAssignedNull()); } void *GetLibFunc(VADisplay vaDisplay, const char *func) { return (void *)0xdeadbeef; } TEST(VASharingFunctions, GivenInitFunctionsWhenDLOpenSuccedsThenFunctionsAreNotNull) { VariableBackup dlopenBackup(&VASharingFunctions::fdlopen); VariableBackup dlsymBackup(&VASharingFunctions::fdlsym); VariableBackup dlcloseBackup(&VASharingFunctions::fdlclose); std::unique_ptr valib(new uint32_t); ASSERT_NE(nullptr, valib.get()); VASharingFunctions::fdlopen = [&](const char *filename, int flag) -> void * { return valib.get(); }; VASharingFunctions::fdlsym = [&](void *handle, const char *symbol) -> void * { return (void *)GetLibFunc; }; VASharingFunctions::fdlclose = [&](void *handle) -> int { return 0; }; VASharingFunctionsTested functions; EXPECT_TRUE(functions.wereFunctionsAssigned()); } TEST(VASharingFunctions, GivenFunctionsWhenNoLibvaThenDlcloseNotCalled) { VariableBackup dlopenBackup(&VASharingFunctions::fdlopen); VariableBackup dlsymBackup(&VASharingFunctions::fdlsym); VariableBackup dlcloseBackup(&VASharingFunctions::fdlclose); uint32_t closeCalls = 0; VASharingFunctions::fdlopen = [&](const char *filename, int flag) -> void * { return nullptr; }; VASharingFunctions::fdlsym = [&](void *handle, const char *symbol) -> void * { return nullptr; }; VASharingFunctions::fdlclose = [&](void *handle) -> int { closeCalls++; return 0; }; { // we need this to properly track closeCalls VASharingFunctionsTested functions; } EXPECT_EQ(0u, closeCalls); } TEST(VASharingFunctions, GivenFunctionsWhenLibvaLoadedThenDlcloseIsCalled) { VariableBackup dlopenBackup(&VASharingFunctions::fdlopen); VariableBackup dlsymBackup(&VASharingFunctions::fdlsym); VariableBackup dlcloseBackup(&VASharingFunctions::fdlclose); std::unique_ptr valib(new uint32_t); ASSERT_NE(nullptr, valib.get()); uint32_t closeCalls = 0; VASharingFunctions::fdlopen = [&](const char *filename, int flag) -> void * { return valib.get(); }; VASharingFunctions::fdlsym = [&](void *handle, const char *symbol) -> void * { return nullptr; }; VASharingFunctions::fdlclose = [&](void *handle) -> int { if (handle == valib.get()) { closeCalls++; } return 0; }; { // we need this to properly track closeCalls VASharingFunctionsTested functions; } EXPECT_EQ(1u, closeCalls); } TEST(VASharingFunctions, givenEnabledExtendedVaFormatsWhenQueryingSupportedFormatsThenAllSupportedFormatsAreStored) { DebugManagerStateRestore restore; DebugManager.flags.EnableExtendedVaFormats.set(true); VASharingFunctionsMock sharingFunctions; sharingFunctions.querySupportedVaImageFormats(VADisplay(1)); EXPECT_EQ(2u, sharingFunctions.supportedFormats.size()); size_t allFormatsFound = 0; for (const auto &supportedFormat : sharingFunctions.supportedFormats) { if (supportedFormat.fourcc == VA_FOURCC_NV12 || supportedFormat.fourcc == VA_FOURCC_P010) { allFormatsFound++; } } EXPECT_EQ(2u, allFormatsFound); } compute-runtime-20.13.16352/opencl/test/unit_test/sharings/va/va_sharing_tests.cpp000066400000000000000000000731221363734646600301320ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gmm_helper/gmm.h" #include "shared/source/helpers/array_count.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/source/api/api.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/platform/platform.h" #include "opencl/source/sharings/va/cl_va_api.h" #include "opencl/source/sharings/va/va_sharing.h" #include "opencl/source/sharings/va/va_surface.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/libult/create_command_stream.h" #include "opencl/test/unit_test/libult/ult_command_stream_receiver.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/sharings/va/mock_va_sharing.h" #include "gtest/gtest.h" using namespace NEO; class VaSharingTests : public ::testing::Test, public PlatformFixture { public: void SetUp() override { PlatformFixture::SetUp(); vaSharing = new MockVaSharing; context.setSharingFunctions(&vaSharing->sharingFunctions); vaSharing->sharingFunctions.querySupportedVaImageFormats(VADisplay(1)); vaSharing->updateAcquiredHandle(sharingHandle); sharedImg = nullptr; sharedClMem = nullptr; } void TearDown() override { if (sharedImg) { delete sharedImg; } context.releaseSharingFunctions(SharingType::VA_SHARING); delete vaSharing; PlatformFixture::TearDown(); } void updateAcquiredHandle(unsigned int handle) { sharingHandle = handle; vaSharing->updateAcquiredHandle(sharingHandle); } void createMediaSurface(cl_uint plane = 0, cl_mem_flags flags = CL_MEM_READ_WRITE) { sharedClMem = clCreateFromVA_APIMediaSurfaceINTEL(&context, flags, &vaSurfaceId, plane, &errCode); ASSERT_NE(nullptr, sharedClMem); EXPECT_EQ(CL_SUCCESS, errCode); sharedImg = castToObject(sharedClMem); ASSERT_NE(sharedImg, nullptr); } Image *sharedImg; cl_mem sharedClMem; MockContext context; MockVaSharing *vaSharing; VASurfaceID vaSurfaceId = 0u; VAImage vaImage = {}; cl_int errCode; unsigned int sharingHandle = 1u; }; TEST(VaSharingTest, givenVASharingFunctionsObjectWhenFunctionsAreCalledThenCallsAreRedirectedToVaFunctionPointers) { unsigned int handle = 0u; VASurfaceID vaSurfaceId = 0u; VAImage vaImage = {}; class VASharingFunctionsGlobalFunctionPointersMock : public VASharingFunctions { public: VASharingFunctionsGlobalFunctionPointersMock() : VASharingFunctions(nullptr) { initMembers(); } bool vaDisplayIsValidCalled = false; bool vaDeriveImageCalled = false; bool vaDestroyImageCalled = false; bool vaSyncSurfaceCalled = false; bool vaGetLibFuncCalled = false; bool vaExtGetSurfaceHandleCalled = false; bool vaQueryImageFormatsCalled = false; bool vaMaxNumImageFormatsCalled = false; void initMembers() { vaDisplayIsValidPFN = mockVaDisplayIsValid; vaDeriveImagePFN = mockVaDeriveImage; vaDestroyImagePFN = mockVaDestroyImage; vaSyncSurfacePFN = mockVaSyncSurface; vaGetLibFuncPFN = mockVaGetLibFunc; vaExtGetSurfaceHandlePFN = mockExtGetSurfaceHandle; vaQueryImageFormatsPFN = mockVaQueryImageFormats; vaMaxNumImageFormatsPFN = mockVaMaxNumImageFormats; } static VASharingFunctionsGlobalFunctionPointersMock *getInstance(bool release) { static VASharingFunctionsGlobalFunctionPointersMock *vaSharingFunctions = nullptr; if (!vaSharingFunctions) { vaSharingFunctions = new VASharingFunctionsGlobalFunctionPointersMock; } else if (release) { delete vaSharingFunctions; vaSharingFunctions = nullptr; } return vaSharingFunctions; } static int mockVaDisplayIsValid(VADisplay vaDisplay) { getInstance(false)->vaDisplayIsValidCalled = true; return 1; }; static VAStatus mockVaDeriveImage(VADisplay vaDisplay, VASurfaceID vaSurface, VAImage *vaImage) { getInstance(false)->vaDeriveImageCalled = true; return VA_STATUS_SUCCESS; }; static VAStatus mockVaDestroyImage(VADisplay vaDisplay, VAImageID vaImageId) { getInstance(false)->vaDestroyImageCalled = true; return VA_STATUS_SUCCESS; }; static VAStatus mockVaSyncSurface(VADisplay vaDisplay, VASurfaceID vaSurface) { getInstance(false)->vaSyncSurfaceCalled = true; return VA_STATUS_SUCCESS; }; static void *mockVaGetLibFunc(VADisplay vaDisplay, const char *func) { getInstance(false)->vaGetLibFuncCalled = true; return nullptr; }; static VAStatus mockExtGetSurfaceHandle(VADisplay vaDisplay, VASurfaceID *vaSurface, unsigned int *handleId) { getInstance(false)->vaExtGetSurfaceHandleCalled = true; return VA_STATUS_SUCCESS; }; static VAStatus mockVaQueryImageFormats(VADisplay vaDisplay, VAImageFormat *formatList, int *numFormats) { getInstance(false)->vaQueryImageFormatsCalled = true; return VA_STATUS_SUCCESS; }; static int mockVaMaxNumImageFormats(VADisplay vaDisplay) { getInstance(false)->vaMaxNumImageFormatsCalled = true; return 0; }; }; auto vaSharingFunctions = VASharingFunctionsGlobalFunctionPointersMock::getInstance(false); EXPECT_TRUE(vaSharingFunctions->isValidVaDisplay()); EXPECT_EQ(0, vaSharingFunctions->deriveImage(vaSurfaceId, &vaImage)); EXPECT_EQ(0, vaSharingFunctions->destroyImage(vaImage.image_id)); EXPECT_EQ(0, vaSharingFunctions->syncSurface(vaSurfaceId)); EXPECT_TRUE(nullptr == vaSharingFunctions->getLibFunc("funcName")); EXPECT_EQ(0, vaSharingFunctions->extGetSurfaceHandle(&vaSurfaceId, &handle)); int numFormats = 0; EXPECT_EQ(0, vaSharingFunctions->queryImageFormats(VADisplay(1), nullptr, &numFormats)); EXPECT_EQ(0, vaSharingFunctions->maxNumImageFormats(VADisplay(1))); EXPECT_EQ(0u, handle); EXPECT_TRUE(VASharingFunctionsGlobalFunctionPointersMock::getInstance(false)->vaDisplayIsValidCalled); EXPECT_TRUE(VASharingFunctionsGlobalFunctionPointersMock::getInstance(false)->vaDeriveImageCalled); EXPECT_TRUE(VASharingFunctionsGlobalFunctionPointersMock::getInstance(false)->vaDestroyImageCalled); EXPECT_TRUE(VASharingFunctionsGlobalFunctionPointersMock::getInstance(false)->vaSyncSurfaceCalled); EXPECT_TRUE(VASharingFunctionsGlobalFunctionPointersMock::getInstance(false)->vaGetLibFuncCalled); EXPECT_TRUE(VASharingFunctionsGlobalFunctionPointersMock::getInstance(false)->vaExtGetSurfaceHandleCalled); EXPECT_TRUE(VASharingFunctionsGlobalFunctionPointersMock::getInstance(false)->vaQueryImageFormatsCalled); EXPECT_TRUE(VASharingFunctionsGlobalFunctionPointersMock::getInstance(false)->vaMaxNumImageFormatsCalled); VASharingFunctionsGlobalFunctionPointersMock::getInstance(true); } TEST_F(VaSharingTests, givenMockVaWhenVaSurfaceIsCreatedThenMemObjectHasVaHandler) { auto vaSurface = VASurface::createSharedVaSurface(&context, &vaSharing->sharingFunctions, CL_MEM_READ_WRITE, &vaSurfaceId, 0, &errCode); EXPECT_NE(nullptr, vaSurface); EXPECT_NE(nullptr, vaSurface->getGraphicsAllocation()); EXPECT_EQ(4096u, vaSurface->getGraphicsAllocation()->getUnderlyingBufferSize()); EXPECT_EQ(1u, vaSurface->getGraphicsAllocation()->peekSharedHandle()); EXPECT_EQ(4096u, vaSurface->getSize()); auto handler = vaSurface->peekSharingHandler(); ASSERT_NE(nullptr, handler); auto vaHandler = static_cast(handler); EXPECT_EQ(vaHandler->peekFunctionsHandler(), &vaSharing->sharingFunctions); EXPECT_EQ(1u, vaSharing->sharingFunctions.acquiredVaHandle); EXPECT_TRUE(vaSharing->sharingFunctions.deriveImageCalled); EXPECT_TRUE(vaSharing->sharingFunctions.destroyImageCalled); EXPECT_TRUE(vaSharing->sharingFunctions.extGetSurfaceHandleCalled); size_t paramSize = 0; void *paramValue = nullptr; handler->getMemObjectInfo(paramSize, paramValue); EXPECT_EQ(sizeof(VASurfaceID *), paramSize); VASurfaceID **paramSurfaceId = reinterpret_cast(paramValue); EXPECT_EQ(vaSurfaceId, **paramSurfaceId); delete vaSurface; } TEST_F(VaSharingTests, givenInvalidPlaneWhenVaSurfaceIsCreatedThenUnrecoverableIsCalled) { EXPECT_THROW(VASurface::createSharedVaSurface(&context, &vaSharing->sharingFunctions, CL_MEM_READ_WRITE, &vaSurfaceId, 2, &errCode), std::exception); } TEST_F(VaSharingTests, givenInvalidPlaneInputWhenVaSurfaceIsCreatedThenInvalidValueErrorIsReturned) { sharedClMem = clCreateFromVA_APIMediaSurfaceINTEL(&context, CL_MEM_READ_WRITE, &vaSurfaceId, 2, &errCode); EXPECT_EQ(nullptr, sharedClMem); EXPECT_EQ(CL_INVALID_VALUE, errCode); } TEST_F(VaSharingTests, givenMockVaWhenVaSurfaceIsCreatedWithNotAlignedWidthAndHeightThenSurfaceOffsetsUseAlignedValues) { vaSharing->sharingFunctions.derivedImageWidth = 256 + 16; vaSharing->sharingFunctions.derivedImageHeight = 512 + 16; auto vaSurface = VASurface::createSharedVaSurface(&context, &vaSharing->sharingFunctions, CL_MEM_READ_WRITE, &vaSurfaceId, 1, &errCode); EXPECT_NE(nullptr, vaSurface); EXPECT_NE(nullptr, vaSurface->getGraphicsAllocation()); EXPECT_EQ(4096u, vaSurface->getGraphicsAllocation()->getUnderlyingBufferSize()); EXPECT_EQ(1u, vaSurface->getGraphicsAllocation()->peekSharedHandle()); EXPECT_EQ(4096u, vaSurface->getSize()); auto handler = vaSurface->peekSharingHandler(); ASSERT_NE(nullptr, handler); auto vaHandler = static_cast(handler); EXPECT_EQ(vaHandler->peekFunctionsHandler(), &vaSharing->sharingFunctions); EXPECT_EQ(1u, vaSharing->sharingFunctions.acquiredVaHandle); EXPECT_TRUE(vaSharing->sharingFunctions.deriveImageCalled); EXPECT_TRUE(vaSharing->sharingFunctions.destroyImageCalled); EXPECT_TRUE(vaSharing->sharingFunctions.extGetSurfaceHandleCalled); SurfaceOffsets surfaceOffsets; uint16_t alignedWidth = alignUp(vaSharing->sharingFunctions.derivedImageWidth, 128); uint16_t alignedHeight = alignUp(vaSharing->sharingFunctions.derivedImageHeight, 32); uint64_t alignedOffset = alignedWidth * alignedHeight; vaSurface->getSurfaceOffsets(surfaceOffsets); EXPECT_EQ(alignedHeight, surfaceOffsets.yOffsetForUVplane); EXPECT_EQ(alignedOffset, surfaceOffsets.offset); EXPECT_EQ(0u, surfaceOffsets.yOffset); EXPECT_EQ(0u, surfaceOffsets.xOffset); delete vaSurface; } TEST_F(VaSharingTests, givenContextWhenClCreateFromVaApiMediaSurfaceIsCalledThenSurfaceIsReturned) { sharedClMem = clCreateFromVA_APIMediaSurfaceINTEL(&context, CL_MEM_READ_WRITE, &vaSurfaceId, 0, &errCode); ASSERT_EQ(CL_SUCCESS, errCode); ASSERT_NE(nullptr, sharedClMem); errCode = clReleaseMemObject(sharedClMem); EXPECT_EQ(CL_SUCCESS, errCode); } TEST_F(VaSharingTests, givenVASurfaceWhenItIsAcquiredTwiceThenAcquireIsNotCalled) { createMediaSurface(); sharedImg->peekSharingHandler()->acquire(sharedImg); EXPECT_TRUE(vaSharing->sharingFunctions.extGetSurfaceHandleCalled); vaSharing->sharingFunctions.extGetSurfaceHandleCalled = false; sharedImg->peekSharingHandler()->acquire(sharedImg); EXPECT_FALSE(vaSharing->sharingFunctions.extGetSurfaceHandleCalled); } TEST_F(VaSharingTests, givenHwCommandQueueWhenEnqueueAcquireIsCalledMultipleTimesThenSharingFunctionAcquireIsNotCalledMultipleTimes) { auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, &errCode); ASSERT_EQ(CL_SUCCESS, errCode); createMediaSurface(); EXPECT_TRUE(vaSharing->sharingFunctions.extGetSurfaceHandleCalled); vaSharing->sharingFunctions.extGetSurfaceHandleCalled = false; errCode = clEnqueueAcquireVA_APIMediaSurfacesINTEL(commandQueue, 1, &sharedClMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, errCode); EXPECT_FALSE(vaSharing->sharingFunctions.extGetSurfaceHandleCalled); errCode = clEnqueueReleaseVA_APIMediaSurfacesINTEL(commandQueue, 1, &sharedClMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, errCode); errCode = clEnqueueAcquireVA_APIMediaSurfacesINTEL(commandQueue, 1, &sharedClMem, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, errCode); EXPECT_FALSE(vaSharing->sharingFunctions.extGetSurfaceHandleCalled); errCode = clReleaseCommandQueue(commandQueue); EXPECT_EQ(CL_SUCCESS, errCode); } TEST_F(VaSharingTests, givenHwCommandQueueWhenAcquireAndReleaseCallsAreMadeWithEventsThenProperCmdTypeIsReturned) { cl_event retEvent = nullptr; cl_command_type cmdType = 0; size_t sizeReturned = 0; createMediaSurface(); auto commandQueue = clCreateCommandQueue(&context, context.getDevice(0), 0, &errCode); errCode = clEnqueueAcquireVA_APIMediaSurfacesINTEL(commandQueue, 1, &sharedClMem, 0, nullptr, &retEvent); EXPECT_EQ(CL_SUCCESS, errCode); ASSERT_NE(retEvent, nullptr); errCode = clGetEventInfo(retEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, errCode); EXPECT_EQ(static_cast(CL_COMMAND_ACQUIRE_VA_API_MEDIA_SURFACES_INTEL), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); errCode = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, errCode); errCode = clEnqueueReleaseVA_APIMediaSurfacesINTEL(commandQueue, 1, &sharedClMem, 0, nullptr, &retEvent); EXPECT_EQ(CL_SUCCESS, errCode); errCode = clGetEventInfo(retEvent, CL_EVENT_COMMAND_TYPE, sizeof(cmdType), &cmdType, &sizeReturned); ASSERT_EQ(CL_SUCCESS, errCode); EXPECT_EQ(static_cast(CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL), cmdType); EXPECT_EQ(sizeof(cl_command_type), sizeReturned); errCode = clReleaseEvent(retEvent); EXPECT_EQ(CL_SUCCESS, errCode); errCode = clReleaseCommandQueue(commandQueue); EXPECT_EQ(CL_SUCCESS, errCode); } TEST_F(VaSharingTests, givenVaMediaSurfaceWhenGetMemObjectInfoIsCalledThenSurfaceIdIsReturned) { createMediaSurface(); VASurfaceID *retVaSurfaceId = nullptr; size_t retSize = 0; errCode = clGetMemObjectInfo(sharedClMem, CL_MEM_VA_API_MEDIA_SURFACE_INTEL, sizeof(VASurfaceID *), &retVaSurfaceId, &retSize); EXPECT_EQ(CL_SUCCESS, errCode); EXPECT_EQ(sizeof(VASurfaceID *), retSize); EXPECT_EQ(vaSurfaceId, *retVaSurfaceId); } TEST_F(VaSharingTests, givenVaMediaSurfaceWhenGetImageInfoIsCalledThenPlaneIsReturned) { cl_uint plane = 1u; createMediaSurface(plane); cl_uint retPlane = 0u; size_t retSize = 0; errCode = clGetImageInfo(sharedClMem, CL_IMAGE_VA_API_PLANE_INTEL, sizeof(cl_uint), &retPlane, &retSize); EXPECT_EQ(CL_SUCCESS, errCode); EXPECT_EQ(sizeof(cl_uint), retSize); EXPECT_EQ(plane, retPlane); } TEST_F(VaSharingTests, givenPlaneWhenCreateSurfaceIsCalledThenSetPlaneFields) { cl_uint planes[2] = {0, 1}; updateAcquiredHandle(2); for (int i = 0; i < 2; i++) { createMediaSurface(planes[i]); EXPECT_TRUE(sharedImg->getSurfaceFormatInfo().OCLImageFormat.image_channel_data_type == CL_UNORM_INT8); EXPECT_EQ(planes[i], sharedImg->getMediaPlaneType()); if (planes[i] == 0u) { EXPECT_TRUE(sharedImg->getSurfaceFormatInfo().OCLImageFormat.image_channel_order == CL_R); } else if (planes[i] == 1) { EXPECT_TRUE(sharedImg->getSurfaceFormatInfo().OCLImageFormat.image_channel_order == CL_RG); } delete sharedImg; sharedImg = nullptr; } } TEST_F(VaSharingTests, givenSimpleParamsWhenCreateSurfaceIsCalledThenSetImgObject) { updateAcquiredHandle(2); createMediaSurface(0u); EXPECT_TRUE(sharedImg->getImageDesc().buffer == nullptr); EXPECT_EQ(0u, sharedImg->getImageDesc().image_array_size); EXPECT_EQ(0u, sharedImg->getImageDesc().image_depth); EXPECT_EQ(vaSharing->sharingFunctions.mockVaImage.height, static_cast(sharedImg->getImageDesc().image_height)); EXPECT_EQ(vaSharing->sharingFunctions.mockVaImage.width, static_cast(sharedImg->getImageDesc().image_width)); EXPECT_TRUE(CL_MEM_OBJECT_IMAGE2D == sharedImg->getImageDesc().image_type); EXPECT_EQ(0u, sharedImg->getImageDesc().image_slice_pitch); EXPECT_NE(0u, sharedImg->getImageDesc().image_row_pitch); EXPECT_EQ(0u, sharedImg->getHostPtrSlicePitch()); EXPECT_NE(0u, sharedImg->getHostPtrRowPitch()); EXPECT_TRUE(sharedImg->getMemoryPropertiesFlags() == CL_MEM_READ_WRITE); EXPECT_TRUE(sharedImg->getCubeFaceIndex() == __GMM_NO_CUBE_MAP); EXPECT_EQ(vaSharing->sharingHandle, sharedImg->getGraphicsAllocation()->peekSharedHandle()); } TEST_F(VaSharingTests, givenNonInteropUserSyncContextWhenAcquireIsCalledThenSyncSurface) { context.setInteropUserSyncEnabled(false); createMediaSurface(); auto memObj = castToObject(sharedClMem); EXPECT_FALSE(vaSharing->sharingFunctions.syncSurfaceCalled); memObj->peekSharingHandler()->acquire(sharedImg); EXPECT_TRUE(vaSharing->sharingFunctions.syncSurfaceCalled); } TEST_F(VaSharingTests, givenInteropUserSyncContextWhenAcquireIsCalledThenDontSyncSurface) { context.setInteropUserSyncEnabled(true); createMediaSurface(); EXPECT_FALSE(vaSharing->sharingFunctions.syncSurfaceCalled); sharedImg->peekSharingHandler()->acquire(sharedImg); EXPECT_FALSE(vaSharing->sharingFunctions.syncSurfaceCalled); } TEST_F(VaSharingTests, givenYuvPlaneWhenCreateIsCalledThenChangeWidthAndHeight) { cl_uint planeTypes[] = { 0, //Y 1 //U }; context.setInteropUserSyncEnabled(true); for (int i = 0; i < 2; i++) { createMediaSurface(planeTypes[i]); size_t retParam; errCode = clGetImageInfo(sharedClMem, CL_IMAGE_WIDTH, sizeof(size_t), &retParam, nullptr); EXPECT_EQ(CL_SUCCESS, errCode); if (planeTypes[i] == 1) { EXPECT_EQ(128u, retParam); } else { EXPECT_EQ(256u, retParam); } errCode = clGetImageInfo(sharedClMem, CL_IMAGE_HEIGHT, sizeof(size_t), &retParam, nullptr); EXPECT_EQ(CL_SUCCESS, errCode); if (planeTypes[i] == 1) { EXPECT_EQ(128u, retParam); } else { EXPECT_EQ(256u, retParam); } delete sharedImg; sharedImg = nullptr; } } TEST_F(VaSharingTests, givenContextWhenSharingTableEmptyThenReturnsNullptr) { MockContext context; context.clearSharingFunctions(); VASharingFunctions *sharingF = context.getSharing(); EXPECT_EQ(sharingF, nullptr); } TEST_F(VaSharingTests, givenValidPlatformWhenGetDeviceIdsFromVaApiMediaAdapterCalledThenReturnFirstDevice) { cl_device_id devices = 0; cl_uint numDevices = 0; cl_platform_id platformId = this->pPlatform; auto errCode = clGetDeviceIDsFromVA_APIMediaAdapterINTEL(platformId, 0u, nullptr, 0u, 1, &devices, &numDevices); EXPECT_EQ(CL_SUCCESS, errCode); EXPECT_EQ(1u, numDevices); EXPECT_NE(nullptr, platform()->getClDevice(0)); EXPECT_EQ(platform()->getClDevice(0), devices); } TEST_F(VaSharingTests, givenInValidPlatformWhenGetDeviceIdsFromVaApiMediaAdapterCalledThenReturnFirstDevice) { cl_device_id devices = 0; cl_uint numDevices = 0; auto errCode = clGetDeviceIDsFromVA_APIMediaAdapterINTEL(nullptr, 0u, nullptr, 0u, 1, &devices, &numDevices); EXPECT_EQ(CL_INVALID_PLATFORM, errCode); EXPECT_EQ(0u, numDevices); EXPECT_EQ(0u, devices); } TEST_F(VaSharingTests, givenEnabledExtendedVaFormatsAndP010FormatWhenCreatingSharedVaSurfaceForPlane0ThenCorrectFormatIsUsedByImageAndGMM) { DebugManagerStateRestore restore; DebugManager.flags.EnableExtendedVaFormats.set(true); vaSharing->sharingFunctions.derivedImageFormatBpp = 16; vaSharing->sharingFunctions.derivedImageFormatFourCC = VA_FOURCC_P010; auto vaSurface = std::unique_ptr(VASurface::createSharedVaSurface(&context, &vaSharing->sharingFunctions, CL_MEM_READ_WRITE, &vaSurfaceId, 0, &errCode)); EXPECT_EQ(static_cast(CL_UNORM_INT16), vaSurface->getImageFormat().image_channel_data_type); EXPECT_EQ(static_cast(CL_R), vaSurface->getImageFormat().image_channel_order); EXPECT_EQ(GMM_RESOURCE_FORMAT::GMM_FORMAT_R16_UNORM, vaSurface->getSurfaceFormatInfo().surfaceFormat.GMMSurfaceFormat); EXPECT_EQ(GMM_RESOURCE_FORMAT::GMM_FORMAT_P010, vaSurface->getGraphicsAllocation()->getDefaultGmm()->resourceParams.Format); EXPECT_EQ(CL_SUCCESS, errCode); } TEST_F(VaSharingTests, givenEnabledExtendedVaFormatsAndP010FormatWhenCreatingSharedVaSurfaceForPlane1ThenCorrectFormatIsUsedByImageAndGMM) { DebugManagerStateRestore restore; DebugManager.flags.EnableExtendedVaFormats.set(true); vaSharing->sharingFunctions.derivedImageFormatBpp = 16; vaSharing->sharingFunctions.derivedImageFormatFourCC = VA_FOURCC_P010; auto vaSurface = std::unique_ptr(VASurface::createSharedVaSurface(&context, &vaSharing->sharingFunctions, CL_MEM_READ_WRITE, &vaSurfaceId, 1, &errCode)); EXPECT_EQ(static_cast(CL_UNORM_INT16), vaSurface->getImageFormat().image_channel_data_type); EXPECT_EQ(static_cast(CL_RG), vaSurface->getImageFormat().image_channel_order); EXPECT_EQ(GMM_RESOURCE_FORMAT::GMM_FORMAT_R16G16_UNORM, vaSurface->getSurfaceFormatInfo().surfaceFormat.GMMSurfaceFormat); EXPECT_EQ(GMM_RESOURCE_FORMAT::GMM_FORMAT_P010, vaSurface->getGraphicsAllocation()->getDefaultGmm()->resourceParams.Format); EXPECT_EQ(CL_SUCCESS, errCode); } TEST_F(VaSharingTests, givenEnabledExtendedVaFormatsAndNV12FormatWhenCreatingSharedVaSurfaceForPlane0ThenCorrectFormatIsUsedByImageAndGMM) { DebugManagerStateRestore restore; DebugManager.flags.EnableExtendedVaFormats.set(true); vaSharing->sharingFunctions.derivedImageFormatBpp = 12; vaSharing->sharingFunctions.derivedImageFormatFourCC = VA_FOURCC_NV12; auto vaSurface = std::unique_ptr(VASurface::createSharedVaSurface(&context, &vaSharing->sharingFunctions, CL_MEM_READ_WRITE, &vaSurfaceId, 0, &errCode)); EXPECT_EQ(static_cast(CL_UNORM_INT8), vaSurface->getImageFormat().image_channel_data_type); EXPECT_EQ(static_cast(CL_R), vaSurface->getImageFormat().image_channel_order); EXPECT_EQ(GMM_RESOURCE_FORMAT::GMM_FORMAT_R8_UNORM, vaSurface->getSurfaceFormatInfo().surfaceFormat.GMMSurfaceFormat); EXPECT_EQ(GMM_RESOURCE_FORMAT::GMM_FORMAT_NV12, vaSurface->getGraphicsAllocation()->getDefaultGmm()->resourceParams.Format); EXPECT_EQ(CL_SUCCESS, errCode); } using ApiVaSharingTests = VaSharingTests; TEST_F(ApiVaSharingTests, givenSupportedImageTypeWhenGettingSupportedVAApiFormatsThenCorrectListIsReturned) { cl_mem_flags flags[] = {CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE}; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; VAImageFormat vaApiFormats[10] = {}; cl_uint numImageFormats = 0; VAImageFormat supportedFormat = {VA_FOURCC_NV12, VA_LSB_FIRST, 8, 0, 0, 0, 0, 0}; for (auto flag : flags) { cl_int result = clGetSupportedVA_APIMediaSurfaceFormatsINTEL( &context, flag, image_type, arrayCount(vaApiFormats), vaApiFormats, &numImageFormats); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(1u, numImageFormats); EXPECT_EQ(supportedFormat.fourcc, vaApiFormats[0].fourcc); } } TEST_F(ApiVaSharingTests, givenZeroNumEntriesWhenGettingSupportedVAApiFormatsThenNumFormatsIsReturned) { cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; cl_uint numImageFormats = 0; cl_int result = clGetSupportedVA_APIMediaSurfaceFormatsINTEL( &context, flags, image_type, 0, nullptr, &numImageFormats); EXPECT_EQ(CL_SUCCESS, result); EXPECT_EQ(1u, numImageFormats); } TEST_F(ApiVaSharingTests, givenNullNumImageFormatsWhenGettingSupportedVAApiFormatsThenNumFormatsIsNotDereferenced) { cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; cl_int result = clGetSupportedVA_APIMediaSurfaceFormatsINTEL( &context, flags, image_type, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, result); } TEST_F(ApiVaSharingTests, givenInvalidImageTypeWhenGettingSupportedVAApiFormatsThenIvalidValueErrorIsReturned) { cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE3D; VAImageFormat vaApiFormats[10] = {}; cl_uint numImageFormats = 0; cl_int result = clGetSupportedVA_APIMediaSurfaceFormatsINTEL( &context, flags, image_type, arrayCount(vaApiFormats), vaApiFormats, &numImageFormats); EXPECT_EQ(CL_INVALID_VALUE, result); EXPECT_EQ(0u, numImageFormats); } TEST_F(ApiVaSharingTests, givenInvalidFlagsWhenGettingSupportedVAApiFormatsThenIvalidValueErrorIsReturned) { cl_mem_flags flags = CL_MEM_NO_ACCESS_INTEL; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; VAImageFormat vaApiFormats[10] = {}; cl_uint numImageFormats = 0; cl_int result = clGetSupportedVA_APIMediaSurfaceFormatsINTEL( &context, flags, image_type, arrayCount(vaApiFormats), vaApiFormats, &numImageFormats); EXPECT_EQ(CL_INVALID_VALUE, result); EXPECT_EQ(0u, numImageFormats); } TEST_F(ApiVaSharingTests, givenInvalidContextWhenGettingSupportedVAApiFormatsThenIvalidContextErrorIsReturned) { cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; VAImageFormat vaApiFormats[10] = {}; cl_uint numImageFormats = 0; MockContext contextWihtoutVASharing; cl_int result = clGetSupportedVA_APIMediaSurfaceFormatsINTEL( &contextWihtoutVASharing, flags, image_type, arrayCount(vaApiFormats), vaApiFormats, &numImageFormats); EXPECT_EQ(CL_INVALID_CONTEXT, result); EXPECT_EQ(0u, numImageFormats); } TEST(VaSurface, givenValidPlaneAndFlagsWhenValidatingInputsThenTrueIsReturned) { for (cl_uint plane = 0; plane <= 1; plane++) { EXPECT_TRUE(VASurface::validate(CL_MEM_READ_ONLY, plane)); EXPECT_TRUE(VASurface::validate(CL_MEM_WRITE_ONLY, plane)); EXPECT_TRUE(VASurface::validate(CL_MEM_READ_WRITE, plane)); } } TEST(VaSurface, givenInValidPlaneOrFlagsWhenValidatingInputsThenTrueIsReturned) { cl_uint plane = 2; EXPECT_FALSE(VASurface::validate(CL_MEM_READ_ONLY, plane)); EXPECT_FALSE(VASurface::validate(CL_MEM_USE_HOST_PTR, 0)); } TEST(VaSurface, givenEnabledExtendedVaFormatsWhenGettingUnsupportedSurfaceFormatInfoThenNullptrIsReturned) { auto formatInfo = VASurface::getExtendedSurfaceFormatInfo(VA_FOURCC_P016); EXPECT_EQ(nullptr, formatInfo); } TEST(VaSurface, givenNotSupportedVaFormatsWhenCheckingIfSupportedThenFalseIsReturned) { EXPECT_FALSE(VASurface::isSupportedFourCC(VA_FOURCC_NV11)); DebugManagerStateRestore restore; DebugManager.flags.EnableExtendedVaFormats.set(true); EXPECT_FALSE(VASurface::isSupportedFourCC(VA_FOURCC_P016)); } TEST(VaSharingFunctions, givenErrorReturnedFromVaLibWhenQuerySupportedVaImageFormatsThenSupportedFormatsAreNotSet) { VASharingFunctionsMock sharingFunctions; sharingFunctions.queryImageFormatsReturnStatus = VA_STATUS_ERROR_INVALID_VALUE; sharingFunctions.querySupportedVaImageFormats(VADisplay(1)); EXPECT_EQ(0u, sharingFunctions.supportedFormats.size()); } TEST(VaSharingFunctions, givenNoSupportedFormatsWhenQuerySupportedVaImageFormatsThenSupportedFormatsAreNotSet) { VASharingFunctionsMock sharingFunctions; EXPECT_EQ(0u, sharingFunctions.supportedFormats.size()); cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; cl_uint numImageFormats = 0; VAImageFormat vaApiFormats[10] = {}; sharingFunctions.getSupportedFormats( flags, image_type, 10, vaApiFormats, &numImageFormats); EXPECT_EQ(0u, numImageFormats); } TEST(VaSharingFunctions, givenNumEntriesLowerThanSupportedFormatsWhenGettingSupportedFormatsThenOnlyNumEntiresAreReturned) { VASharingFunctionsMock sharingFunctions; VAImageFormat imageFormat = {VA_FOURCC_NV12, 1, 12}; sharingFunctions.supportedFormats.emplace_back(imageFormat); imageFormat.fourcc = VA_FOURCC_NV21; sharingFunctions.supportedFormats.emplace_back(imageFormat); EXPECT_EQ(2u, sharingFunctions.supportedFormats.size()); cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_object_type image_type = CL_MEM_OBJECT_IMAGE2D; cl_uint numImageFormats = 0; VAImageFormat vaApiFormats[3] = {}; sharingFunctions.getSupportedFormats( flags, image_type, 1, vaApiFormats, &numImageFormats); EXPECT_EQ(2u, numImageFormats); EXPECT_EQ(static_cast(VA_FOURCC_NV12), vaApiFormats[0].fourcc); EXPECT_EQ(0u, vaApiFormats[1].fourcc); EXPECT_EQ(0u, vaApiFormats[2].fourcc); } compute-runtime-20.13.16352/opencl/test/unit_test/sku_info/000077500000000000000000000000001363734646600234475ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/sku_info/CMakeLists.txt000066400000000000000000000007701363734646600262130ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_sku_info ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/sku_info_base_reference.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/sku_info_transfer_tests.cpp ) if(WIN32) list(APPEND IGDRCL_SRCS_tests_sku_info ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/sku_info_receiver_tests.cpp ) endif() target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_sku_info})compute-runtime-20.13.16352/opencl/test/unit_test/sku_info/sku_info_base_reference.h000066400000000000000000000142621363734646600304520ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/gmm_helper/gmm_lib.h" #include "sku_info.h" namespace NEO { struct SkuInfoBaseReference { static void fillReferenceFtrForTransfer(_SKU_FEATURE_TABLE &refFtrTable) { memset(&refFtrTable, 0, sizeof(refFtrTable)); refFtrTable.FtrStandardMipTailFormat = 1; refFtrTable.FtrULT = 1; refFtrTable.FtrEDram = 1; refFtrTable.FtrFrameBufferLLC = 1; refFtrTable.FtrCrystalwell = 1; refFtrTable.FtrDisplayEngineS3d = 1; refFtrTable.FtrTileY = 1; refFtrTable.FtrDisplayYTiling = 1; refFtrTable.FtrFbc = 1; refFtrTable.FtrVERing = 1; refFtrTable.FtrVcs2 = 1; refFtrTable.FtrLCIA = 1; refFtrTable.FtrIA32eGfxPTEs = 1; refFtrTable.FtrWddm2GpuMmu = 1; refFtrTable.FtrWddm2_1_64kbPages = 1; refFtrTable.FtrTranslationTable = 1; refFtrTable.FtrUserModeTranslationTable = 1; refFtrTable.FtrLLCBypass = 1; refFtrTable.FtrWddm2Svm = 1; refFtrTable.FtrE2ECompression = 1; refFtrTable.FtrLinearCCS = 1; refFtrTable.FtrCCSRing = 1; refFtrTable.FtrCCSNode = 1; refFtrTable.FtrMemTypeMocsDeferPAT = 1; } static void fillReferenceWaForTransfer(_WA_TABLE &refWaTable) { memset(&refWaTable, 0, sizeof(refWaTable)); refWaTable.WaFbcLinearSurfaceStride = 1; refWaTable.WaDisableEdramForDisplayRT = 1; refWaTable.WaEncryptedEdramOnlyPartials = 1; refWaTable.WaLosslessCompressionSurfaceStride = 1; refWaTable.WaRestrictPitch128KB = 1; refWaTable.WaLimit128BMediaCompr = 1; refWaTable.WaUntypedBufferCompression = 1; refWaTable.WaAuxTable16KGranular = 1; } static void fillReferenceFtrToReceive(FeatureTable &refFtrTable) { refFtrTable = {}; refFtrTable.ftrDesktop = true; refFtrTable.ftrChannelSwizzlingXOREnabled = true; refFtrTable.ftrGtBigDie = true; refFtrTable.ftrGtMediumDie = true; refFtrTable.ftrGtSmallDie = true; refFtrTable.ftrGT1 = true; refFtrTable.ftrGT1_5 = true; refFtrTable.ftrGT2 = true; refFtrTable.ftrGT2_5 = true; refFtrTable.ftrGT3 = true; refFtrTable.ftrGT4 = true; refFtrTable.ftrIVBM0M1Platform = true; refFtrTable.ftrSGTPVSKUStrapPresent = true; refFtrTable.ftrGTA = true; refFtrTable.ftrGTC = true; refFtrTable.ftrGTX = true; refFtrTable.ftr5Slice = true; refFtrTable.ftrGpGpuMidBatchPreempt = true; refFtrTable.ftrGpGpuThreadGroupLevelPreempt = true; refFtrTable.ftrGpGpuMidThreadLevelPreempt = true; refFtrTable.ftrIoMmuPageFaulting = true; refFtrTable.ftrWddm2Svm = true; refFtrTable.ftrPooledEuEnabled = true; refFtrTable.ftrResourceStreamer = true; refFtrTable.ftrPPGTT = true; refFtrTable.ftrSVM = true; refFtrTable.ftrEDram = true; refFtrTable.ftrL3IACoherency = true; refFtrTable.ftrIA32eGfxPTEs = true; refFtrTable.ftr3dMidBatchPreempt = true; refFtrTable.ftr3dObjectLevelPreempt = true; refFtrTable.ftrPerCtxtPreemptionGranularityControl = true; refFtrTable.ftrTileY = true; refFtrTable.ftrDisplayYTiling = true; refFtrTable.ftrTranslationTable = true; refFtrTable.ftrUserModeTranslationTable = true; refFtrTable.ftrEnableGuC = true; refFtrTable.ftrFbc = true; refFtrTable.ftrFbc2AddressTranslation = true; refFtrTable.ftrFbcBlitterTracking = true; refFtrTable.ftrFbcCpuTracking = true; refFtrTable.ftrVcs2 = true; refFtrTable.ftrVEBOX = true; refFtrTable.ftrSingleVeboxSlice = true; refFtrTable.ftrULT = true; refFtrTable.ftrLCIA = true; refFtrTable.ftrGttCacheInvalidation = true; refFtrTable.ftrTileMappedResource = true; refFtrTable.ftrAstcHdr2D = true; refFtrTable.ftrAstcLdr2D = true; refFtrTable.ftrStandardMipTailFormat = true; refFtrTable.ftrFrameBufferLLC = true; refFtrTable.ftrCrystalwell = true; refFtrTable.ftrLLCBypass = true; refFtrTable.ftrDisplayEngineS3d = true; refFtrTable.ftrVERing = true; refFtrTable.ftrWddm2GpuMmu = true; refFtrTable.ftrWddm2_1_64kbPages = true; refFtrTable.ftrKmdDaf = true; refFtrTable.ftrSimulationMode = true; refFtrTable.ftrE2ECompression = 1; refFtrTable.ftrLinearCCS = 1; refFtrTable.ftrCCSRing = 1; refFtrTable.ftrCCSNode = 1; refFtrTable.ftrMemTypeMocsDeferPAT = 1; } static void fillReferenceWaToReceive(WorkaroundTable &refWaTable) { refWaTable = {}; refWaTable.waDoNotUseMIReportPerfCount = true; refWaTable.waEnablePreemptionGranularityControlByUMD = true; refWaTable.waSendMIFLUSHBeforeVFE = true; refWaTable.waReportPerfCountUseGlobalContextID = true; refWaTable.waDisableLSQCROPERFforOCL = true; refWaTable.waMsaa8xTileYDepthPitchAlignment = true; refWaTable.waLosslessCompressionSurfaceStride = true; refWaTable.waFbcLinearSurfaceStride = true; refWaTable.wa4kAlignUVOffsetNV12LinearSurface = true; refWaTable.waEncryptedEdramOnlyPartials = true; refWaTable.waDisableEdramForDisplayRT = true; refWaTable.waForcePcBbFullCfgRestore = true; refWaTable.waCompressedResourceRequiresConstVA21 = true; refWaTable.waDisablePerCtxtPreemptionGranularityControl = true; refWaTable.waLLCCachingUnsupported = true; refWaTable.waUseVAlign16OnTileXYBpp816 = true; refWaTable.waModifyVFEStateAfterGPGPUPreemption = true; refWaTable.waCSRUncachable = true; refWaTable.waSamplerCacheFlushBetweenRedescribedSurfaceReads = true; refWaTable.waRestrictPitch128KB = true; refWaTable.waLimit128BMediaCompr = 1; refWaTable.waUntypedBufferCompression = 1; refWaTable.waAuxTable16KGranular = 1; refWaTable.waDisableFusedThreadScheduling = true; } }; // namespace SkuInfoBaseReference } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/sku_info/sku_info_receiver_tests.cpp000066400000000000000000000024311363734646600310760ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/sku_info/operations/windows/sku_info_receiver.h" #include "opencl/test/unit_test/sku_info/sku_info_base_reference.h" #include "gtest/gtest.h" using namespace NEO; TEST(SkuInfoReceiverTest, givenAdapterInfoWhenReceivingThenUpdateFtrTable) { FeatureTable refFeatureTable = {}; FeatureTable requestedFeatureTable = {}; ADAPTER_INFO adapterInfo = {}; memset(&adapterInfo.SkuTable, ~0, sizeof(adapterInfo.SkuTable)); SkuInfoReceiver::receiveFtrTableFromAdapterInfo(&requestedFeatureTable, &adapterInfo); SkuInfoBaseReference::fillReferenceFtrToReceive(refFeatureTable); EXPECT_TRUE(memcmp(&requestedFeatureTable, &refFeatureTable, sizeof(FeatureTable)) == 0); } TEST(SkuInfoReceiverTest, givenAdapterInfoWhenReceivingThenUpdateWaTable) { WorkaroundTable refWaTable = {}; WorkaroundTable requestedWaTable = {}; ADAPTER_INFO adapterInfo = {}; memset(&adapterInfo.WaTable, ~0, sizeof(adapterInfo.WaTable)); SkuInfoReceiver::receiveWaTableFromAdapterInfo(&requestedWaTable, &adapterInfo); SkuInfoBaseReference::fillReferenceWaToReceive(refWaTable); EXPECT_TRUE(memcmp(&requestedWaTable, &refWaTable, sizeof(WorkaroundTable)) == 0); } compute-runtime-20.13.16352/opencl/test/unit_test/sku_info/sku_info_transfer_tests.cpp000066400000000000000000000024571363734646600311260ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/sku_info/operations/sku_info_transfer.h" #include "opencl/test/unit_test/sku_info/sku_info_base_reference.h" #include "gtest/gtest.h" using namespace NEO; TEST(SkuInfoTransferTest, givenFeatureTableWhenFillingStructureForGmmThenCopyOnlySelectedValues) { _SKU_FEATURE_TABLE requestedFtrTable = {}; _SKU_FEATURE_TABLE refFtrTable = {}; FeatureTable featureTable; memset(reinterpret_cast(&featureTable), 1, sizeof(FeatureTable)); SkuInfoTransfer::transferFtrTableForGmm(&requestedFtrTable, &featureTable); SkuInfoBaseReference::fillReferenceFtrForTransfer(refFtrTable); EXPECT_TRUE(memcmp(&requestedFtrTable, &refFtrTable, sizeof(_SKU_FEATURE_TABLE)) == 0); } TEST(SkuInfoTransferTest, givenWaTableWhenFillingStructureForGmmThenCopyOnlySelectedValues) { _WA_TABLE requestedWaTable = {}; _WA_TABLE refWaTable = {}; WorkaroundTable waTable; refWaTable = {}; memset(reinterpret_cast(&waTable), 1, sizeof(WorkaroundTable)); SkuInfoTransfer::transferWaTableForGmm(&requestedWaTable, &waTable); SkuInfoBaseReference::fillReferenceWaForTransfer(refWaTable); EXPECT_TRUE(memcmp(&requestedWaTable, &refWaTable, sizeof(_WA_TABLE)) == 0); } compute-runtime-20.13.16352/opencl/test/unit_test/source_level_debugger/000077500000000000000000000000001363734646600261655ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/source_level_debugger/CMakeLists.txt000066400000000000000000000012101363734646600307170ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_source_level_debugger ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/source_level_debugger_device_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/source_level_debugger_csr_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/source_level_debugger_tests.cpp ) get_property(NEO_CORE_TESTS_SOURCE_LEVEL_DEBUGGER GLOBAL PROPERTY NEO_CORE_TESTS_SOURCE_LEVEL_DEBUGGER) list(APPEND IGDRCL_SRCS_tests_source_level_debugger ${NEO_CORE_TESTS_SOURCE_LEVEL_DEBUGGER}) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_source_level_debugger})source_level_debugger_csr_tests.cpp000066400000000000000000000213651363734646600352450ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/source_level_debugger/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/source_level_debugger/source_level_debugger.h" #include "opencl/source/command_queue/command_queue_hw.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/helpers/dispatch_flags_helper.h" #include "opencl/test/unit_test/helpers/execution_environment_helper.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/mocks/mock_builtins.h" #include "opencl/test/unit_test/mocks/mock_csr.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "test.h" #include class CommandStreamReceiverWithActiveDebuggerTest : public ::testing::Test { protected: template auto createCSR() { hwInfo = nullptr; EnvironmentWithCsrWrapper environment; environment.setCsrType>(); executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); hwInfo->capabilityTable = defaultHwInfo->capabilityTable; hwInfo->capabilityTable.debuggerSupported = true; auto mockMemoryManager = new MockMemoryManager(*executionEnvironment); executionEnvironment->memoryManager.reset(mockMemoryManager); device = std::make_unique(Device::create(executionEnvironment, 0)); device->setSourceLevelDebuggerActive(true); return static_cast *>(device->getDefaultEngine().commandStreamReceiver); } void TearDown() override { device->setSourceLevelDebuggerActive(false); } std::unique_ptr device; ExecutionEnvironment *executionEnvironment = nullptr; HardwareInfo *hwInfo = nullptr; }; HWTEST_F(CommandStreamReceiverWithActiveDebuggerTest, givenCsrWithActiveDebuggerAndDisabledPreemptionWhenFlushTaskIsCalledThenSipKernelIsMadeResident) { auto mockCsr = createCSR(); CommandQueueHw commandQueue(nullptr, device.get(), 0, false); auto &commandStream = commandQueue.getCS(4096u); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); void *buffer = alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize64k); std::unique_ptr allocation(new MockGraphicsAllocation(buffer, MemoryConstants::pageSize)); std::unique_ptr heap(new IndirectHeap(allocation.get())); auto &baseDevice = device->getDevice(); mockCsr->flushTask(commandStream, 0, *heap.get(), *heap.get(), *heap.get(), 0, dispatchFlags, baseDevice); auto sipType = SipKernel::getSipKernelType(baseDevice.getHardwareInfo().platform.eRenderCoreFamily, true); auto sipAllocation = baseDevice.getBuiltIns()->getSipKernel(sipType, baseDevice).getSipAllocation(); bool found = false; for (auto allocation : mockCsr->copyOfAllocations) { if (allocation == sipAllocation) { found = true; break; } } EXPECT_TRUE(found); alignedFree(buffer); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverWithActiveDebuggerTest, givenCsrWithActiveDebuggerAndDisabledPreemptionWhenFlushTaskIsCalledThenStateSipCmdIsProgrammed) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; using STATE_SIP = typename FamilyType::STATE_SIP; auto mockCsr = createCSR(); if (device->getHardwareInfo().capabilityTable.defaultPreemptionMode == PreemptionMode::MidThread) { CommandQueueHw commandQueue(nullptr, device.get(), 0, false); auto &commandStream = commandQueue.getCS(4096u); auto &preambleStream = mockCsr->getCS(0); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); void *buffer = alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize64k); std::unique_ptr allocation(new MockGraphicsAllocation(buffer, MemoryConstants::pageSize)); std::unique_ptr heap(new IndirectHeap(allocation.get())); mockCsr->flushTask(commandStream, 0, *heap.get(), *heap.get(), *heap.get(), 0, dispatchFlags, device->getDevice()); auto sipType = SipKernel::getSipKernelType(device->getHardwareInfo().platform.eRenderCoreFamily, true); auto sipAllocation = device->getBuiltIns()->getSipKernel(sipType, device->getDevice()).getSipAllocation(); HardwareParse hwParser; hwParser.parseCommands(preambleStream); auto itorStateBaseAddr = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); auto itorStateSip = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), itorStateBaseAddr); ASSERT_NE(hwParser.cmdList.end(), itorStateSip); STATE_BASE_ADDRESS *sba = (STATE_BASE_ADDRESS *)*itorStateBaseAddr; STATE_SIP *stateSipCmd = (STATE_SIP *)*itorStateSip; EXPECT_LT(reinterpret_cast(sba), reinterpret_cast(stateSipCmd)); auto sipAddress = stateSipCmd->getSystemInstructionPointer(); EXPECT_EQ(sipAllocation->getGpuAddressToPatch(), sipAddress); alignedFree(buffer); } } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverWithActiveDebuggerTest, givenCsrWithActiveDebuggerAndWhenFlushTaskIsCalledThenAlwaysProgramStateBaseAddressAndSip) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; using STATE_SIP = typename FamilyType::STATE_SIP; auto mockCsr = createCSR(); if (device->getHardwareInfo().capabilityTable.defaultPreemptionMode == PreemptionMode::MidThread) { mockCsr->overrideDispatchPolicy(DispatchMode::ImmediateDispatch); CommandQueueHw commandQueue(nullptr, device.get(), 0, false); auto &commandStream = commandQueue.getCS(4096u); auto &preambleStream = mockCsr->getCS(0); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); void *buffer = alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize64k); std::unique_ptr allocation(new MockGraphicsAllocation(buffer, MemoryConstants::pageSize)); std::unique_ptr heap(new IndirectHeap(allocation.get())); mockCsr->flushTask(commandStream, 0, *heap.get(), *heap.get(), *heap.get(), 0, dispatchFlags, device->getDevice()); mockCsr->flushBatchedSubmissions(); mockCsr->flushTask(commandStream, 0, *heap.get(), *heap.get(), *heap.get(), 0, dispatchFlags, device->getDevice()); auto sipType = SipKernel::getSipKernelType(device->getHardwareInfo().platform.eRenderCoreFamily, true); auto sipAllocation = device->getBuiltIns()->getSipKernel(sipType, device->getDevice()).getSipAllocation(); HardwareParse hwParser; hwParser.parseCommands(preambleStream); auto itorStateBaseAddr = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); auto itorStateSip = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), itorStateBaseAddr); ASSERT_NE(hwParser.cmdList.end(), itorStateSip); auto itorStateBaseAddr2 = find(std::next(itorStateBaseAddr), hwParser.cmdList.end()); auto itorStateSip2 = find(std::next(itorStateSip), hwParser.cmdList.end()); ASSERT_NE(hwParser.cmdList.end(), itorStateBaseAddr2); ASSERT_NE(hwParser.cmdList.end(), itorStateSip2); STATE_BASE_ADDRESS *sba = (STATE_BASE_ADDRESS *)*itorStateBaseAddr2; STATE_SIP *stateSipCmd = (STATE_SIP *)*itorStateSip2; EXPECT_LT(reinterpret_cast(sba), reinterpret_cast(stateSipCmd)); auto sipAddress = stateSipCmd->getSystemInstructionPointer(); EXPECT_EQ(sipAllocation->getGpuAddressToPatch(), sipAddress); alignedFree(buffer); } } source_level_debugger_device_tests.cpp000066400000000000000000000041541363734646600357120ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/source_level_debugger/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/source_level_debugger/source_level_debugger.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/mocks/mock_os_library.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/mocks/mock_builtins.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_source_level_debugger.h" #include "test.h" using PreambleTest = ::testing::Test; using namespace NEO; class MockDeviceWithDebuggerActive : public MockDevice { public: MockDeviceWithDebuggerActive(ExecutionEnvironment *executionEnvironment, uint32_t deviceIndex) : MockDevice(executionEnvironment, deviceIndex) {} void initializeCaps() override { MockDevice::initializeCaps(); this->setDebuggerActive(true); } }; TEST(DeviceWithSourceLevelDebugger, givenDeviceWithSourceLevelDebuggerActiveWhenDeviceIsDestructedThenSourceLevelDebuggerIsNotified) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); auto gmock = new ::testing::NiceMock(new MockOsLibrary); executionEnvironment->rootDeviceEnvironments[0]->debugger.reset(gmock); auto device = std::make_unique(MockDevice::create(executionEnvironment, 0u)); EXPECT_CALL(*gmock, notifyDeviceDestruction()).Times(1); } TEST(DeviceWithSourceLevelDebugger, givenDeviceWithSourceLevelDebuggerActiveWhenDeviceIsCreatedThenPreemptionIsDisabled) { ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->rootDeviceEnvironments[0]->debugger.reset(new MockActiveSourceLevelDebugger(new MockOsLibrary)); auto device = std::unique_ptr(MockDevice::create(executionEnvironment, 0u)); EXPECT_EQ(PreemptionMode::Disabled, device->getPreemptionMode()); } source_level_debugger_tests.cpp000066400000000000000000000545671363734646600344100ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/source_level_debugger/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/source_level_debugger/source_level_debugger.h" #include "shared/test/unit_test/helpers/ult_hw_config.h" #include "opencl/source/platform/platform.h" #include "opencl/source/program/kernel_info.h" #include "opencl/test/unit_test/fixtures/device_fixture.h" #include "opencl/test/unit_test/helpers/execution_environment_helper.h" #include "opencl/test/unit_test/helpers/variable_backup.h" #include "opencl/test/unit_test/libult/source_level_debugger_library.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_source_level_debugger.h" #include #include #include using namespace NEO; using std::string; using std::unique_ptr; class DebuggerLibraryRestorer { public: DebuggerLibraryRestorer() { restoreActiveState = DebuggerLibrary::getDebuggerActive(); restoreAvailableState = DebuggerLibrary::getLibraryAvailable(); } ~DebuggerLibraryRestorer() { DebuggerLibrary::clearDebuggerLibraryInterceptor(); DebuggerLibrary::setDebuggerActive(restoreActiveState); DebuggerLibrary::setLibraryAvailable(restoreAvailableState); } bool restoreActiveState = false; bool restoreAvailableState = false; }; TEST(SourceLevelDebugger, givenPlatformWhenItIsCreatedThenSourceLevelDebuggerIsCreatedInExecutionEnvironment) { DebuggerLibraryRestorer restorer; if (defaultHwInfo->capabilityTable.debuggerSupported) { DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); auto executionEnvironment = new ExecutionEnvironment(); MockPlatform platform(*executionEnvironment); platform.initializeWithNewDevices(); EXPECT_NE(nullptr, executionEnvironment->rootDeviceEnvironments[0]->debugger); } } TEST(SourceLevelDebugger, givenNoKernelDebuggerLibraryWhenSourceLevelDebuggerIsCreatedThenLibraryIsNotLoaded) { DebuggerLibraryRestorer restorer; DebuggerLibrary::setLibraryAvailable(false); MockSourceLevelDebugger debugger; EXPECT_EQ(nullptr, debugger.debuggerLibrary.get()); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryAvailableWhenSourceLevelDebuggerIsConstructedThenLibraryIsLoaded) { DebuggerLibraryRestorer restorer; DebuggerLibrary::setLibraryAvailable(true); MockSourceLevelDebugger debugger; EXPECT_NE(nullptr, debugger.debuggerLibrary.get()); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryAvailableWhenIsDebuggerActiveIsCalledThenFalseIsReturned) { DebuggerLibraryRestorer restorer; DebuggerLibrary::setLibraryAvailable(true); MockSourceLevelDebugger debugger; bool active = debugger.isDebuggerActive(); EXPECT_FALSE(active); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryActiveWhenIsDebuggerActiveIsCalledThenTrueIsReturned) { DebuggerLibraryRestorer restorer; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); MockSourceLevelDebugger debugger; bool active = debugger.isDebuggerActive(); EXPECT_TRUE(active); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryNotAvailableWhenIsDebuggerActiveIsCalledThenFalseIsReturned) { DebuggerLibraryRestorer restorer; DebuggerLibrary::setLibraryAvailable(false); MockSourceLevelDebugger debugger; bool active = debugger.isDebuggerActive(); EXPECT_FALSE(active); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryActiveWhenNotifySourceCodeIsCalledThenDebuggerLibraryFunctionIsCalled) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; GfxDbgSourceCode argOut; char fileName[] = "filename"; argOut.sourceName = fileName; argOut.sourceNameMaxLen = sizeof(fileName); interceptor.sourceCodeArgOut = &argOut; const char source[] = "sourceCode"; string file; debugger.notifySourceCode(source, sizeof(source), file); EXPECT_TRUE(interceptor.sourceCodeCalled); EXPECT_EQ(reinterpret_cast(static_cast(MockSourceLevelDebugger::mockDeviceHandle)), interceptor.sourceCodeArgIn.hDevice); EXPECT_EQ(source, interceptor.sourceCodeArgIn.sourceCode); EXPECT_EQ(sizeof(source), interceptor.sourceCodeArgIn.sourceCodeSize); EXPECT_NE(nullptr, interceptor.sourceCodeArgIn.sourceName); EXPECT_NE(0u, interceptor.sourceCodeArgIn.sourceNameMaxLen); EXPECT_STREQ(fileName, file.c_str()); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryNotActiveWhenNotifySourceCodeIsCalledThenDebuggerLibraryFunctionIsNotCalled) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(false); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; debugger.setActive(false); const char source[] = "sourceCode"; string file; debugger.notifySourceCode(source, sizeof(source), file); EXPECT_FALSE(interceptor.sourceCodeCalled); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryActiveWhenNotifyNewDeviceIsCalledThenDebuggerLibraryFunctionIsCalled) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; debugger.notifyNewDevice(4); EXPECT_TRUE(interceptor.newDeviceCalled); EXPECT_EQ(reinterpret_cast(static_cast(4u)), interceptor.newDeviceArgIn.dh); EXPECT_EQ(4u, debugger.deviceHandle); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryNotActiveWhenNotifyNewDeviceIsCalledThenDebuggerLibraryFunctionIsNotCalled) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(false); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; debugger.setActive(false); debugger.notifyNewDevice(4); EXPECT_FALSE(interceptor.newDeviceCalled); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryActiveWhenIsOptimizationDisabledIsCalledThenDebuggerLibraryFunctionIsCalled) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; bool isOptDisabled = debugger.isOptimizationDisabled(); EXPECT_FALSE(isOptDisabled); EXPECT_TRUE(interceptor.optionCalled); EXPECT_EQ(GfxDbgOptionNames::DBG_OPTION_IS_OPTIMIZATION_DISABLED, interceptor.optionArgIn.optionName); EXPECT_NE(nullptr, interceptor.optionArgIn.value); EXPECT_LT(0u, interceptor.optionArgIn.valueLen); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryNotActiveWhenIsOptimizationDisabledIsCalledThenDebuggerLibraryFunctionIsNotCalled) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; debugger.setActive(false); bool isOptDisabled = debugger.isOptimizationDisabled(); EXPECT_FALSE(isOptDisabled); EXPECT_FALSE(interceptor.optionCalled); } TEST(SourceLevelDebugger, givenActiveDebuggerWhenGetDebuggerOptionReturnsZeroThenIsOptimizationDisabledReturnsFalse) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); char value = '1'; GfxDbgOption optionArgOut; interceptor.optionArgOut = &optionArgOut; interceptor.optionArgOut->value = &value; interceptor.optionArgOut->valueLen = sizeof(value); interceptor.optionRetVal = 0; MockSourceLevelDebugger debugger; bool isOptDisabled = debugger.isOptimizationDisabled(); EXPECT_FALSE(isOptDisabled); } TEST(SourceLevelDebugger, givenActiveDebuggerAndOptDisabledWhenGetDebuggerOptionReturnsNonZeroAndOneInValueThenIsOptimizationDisabledReturnsTrue) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); char value[2] = {'1', 0}; GfxDbgOption optionArgOut; interceptor.optionArgOut = &optionArgOut; interceptor.optionArgOut->value = value; interceptor.optionArgOut->valueLen = sizeof(value); interceptor.optionRetVal = 1; MockSourceLevelDebugger debugger; bool isOptDisabled = debugger.isOptimizationDisabled(); EXPECT_TRUE(isOptDisabled); } TEST(SourceLevelDebugger, givenActiveDebuggerAndOptDisabledWhenGetDebuggerOptionReturnsNonZeroAndZeroInValueThenIsOptimizationDisabledReturnsFalse) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); char value = '0'; GfxDbgOption optionArgOut; interceptor.optionArgOut = &optionArgOut; interceptor.optionArgOut->value = &value; interceptor.optionArgOut->valueLen = sizeof(value); interceptor.optionRetVal = 1; MockSourceLevelDebugger debugger; bool isOptDisabled = debugger.isOptimizationDisabled(); EXPECT_FALSE(isOptDisabled); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryActiveWhenNotifyKernelDebugDataIsCalledThenDebuggerLibraryFunctionIsCalled) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; char isa[8]; char dbgIsa[10]; char visa[12]; KernelInfo info; info.debugData.genIsa = dbgIsa; info.debugData.vIsa = visa; info.debugData.genIsaSize = sizeof(dbgIsa); info.debugData.vIsaSize = sizeof(visa); info.name = "debugKernel"; SKernelBinaryHeaderCommon kernelHeader; kernelHeader.KernelHeapSize = sizeof(isa); info.heapInfo.pKernelHeader = &kernelHeader; info.heapInfo.pKernelHeap = isa; debugger.notifyKernelDebugData(&info.debugData, info.name, info.heapInfo.pKernelHeap, info.heapInfo.pKernelHeader->KernelHeapSize); EXPECT_TRUE(interceptor.kernelDebugDataCalled); EXPECT_EQ(static_cast(IGFXDBG_CURRENT_VERSION), interceptor.kernelDebugDataArgIn.version); EXPECT_EQ(reinterpret_cast(static_cast(MockSourceLevelDebugger::mockDeviceHandle)), interceptor.kernelDebugDataArgIn.hDevice); EXPECT_EQ(reinterpret_cast(0), interceptor.kernelDebugDataArgIn.hProgram); EXPECT_EQ(dbgIsa, interceptor.kernelDebugDataArgIn.dbgGenIsaBuffer); EXPECT_EQ(sizeof(dbgIsa), interceptor.kernelDebugDataArgIn.dbgGenIsaSize); EXPECT_EQ(visa, interceptor.kernelDebugDataArgIn.dbgVisaBuffer); EXPECT_EQ(sizeof(visa), interceptor.kernelDebugDataArgIn.dbgVisaSize); EXPECT_EQ(kernelHeader.KernelHeapSize, interceptor.kernelDebugDataArgIn.KernelBinSize); EXPECT_EQ(isa, interceptor.kernelDebugDataArgIn.kernelBinBuffer); EXPECT_STREQ(info.name.c_str(), interceptor.kernelDebugDataArgIn.kernelName); } TEST(SourceLevelDebugger, givenNoVisaWhenNotifyKernelDebugDataIsCalledThenDebuggerLibraryFunctionIsNotCalled) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; char isa[8]; char dbgIsa[10]; KernelInfo info; info.debugData.genIsa = dbgIsa; info.debugData.vIsa = nullptr; info.debugData.genIsaSize = sizeof(dbgIsa); info.debugData.vIsaSize = 0; info.name = "debugKernel"; SKernelBinaryHeaderCommon kernelHeader; kernelHeader.KernelHeapSize = sizeof(isa); info.heapInfo.pKernelHeader = &kernelHeader; info.heapInfo.pKernelHeap = isa; debugger.notifyKernelDebugData(&info.debugData, info.name, info.heapInfo.pKernelHeap, info.heapInfo.pKernelHeader->KernelHeapSize); EXPECT_FALSE(interceptor.kernelDebugDataCalled); } TEST(SourceLevelDebugger, givenNoGenIsaWhenNotifyKernelDebugDataIsCalledThenDebuggerLibraryFunctionIsNotCalled) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; char isa[8]; char visa[12]; KernelInfo info; info.debugData.genIsa = nullptr; info.debugData.vIsa = visa; info.debugData.genIsaSize = 0; info.debugData.vIsaSize = sizeof(visa); info.name = "debugKernel"; SKernelBinaryHeaderCommon kernelHeader; kernelHeader.KernelHeapSize = sizeof(isa); info.heapInfo.pKernelHeader = &kernelHeader; info.heapInfo.pKernelHeap = isa; debugger.notifyKernelDebugData(&info.debugData, info.name, isa, sizeof(isa)); EXPECT_FALSE(interceptor.kernelDebugDataCalled); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryNotActiveWhenNotifyKernelDebugDataIsCalledThenDebuggerLibraryFunctionIsNotCalled) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(false); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; debugger.setActive(false); KernelInfo info; debugger.notifyKernelDebugData(&info.debugData, info.name, nullptr, 0); EXPECT_FALSE(interceptor.kernelDebugDataCalled); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryActiveWhenInitializeIsCalledWithLocalMemoryUsageFalseThenDebuggerFunctionIsCalledWithCorrectArg) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; debugger.initialize(false); EXPECT_TRUE(interceptor.initCalled); EXPECT_FALSE(interceptor.targetCapsArgIn.supportsLocalMemory); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryActiveWhenInitializeReturnsErrorThenIsActiveIsSetToFalse) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; interceptor.initRetVal = IgfxdbgRetVal::IGFXDBG_FAILURE; debugger.initialize(false); EXPECT_TRUE(interceptor.initCalled); EXPECT_FALSE(debugger.isDebuggerActive()); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryActiveWhenInitializeIsCalledWithLocalMemoryUsageTrueThenDebuggerFunctionIsCalledWithCorrectArg) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; debugger.initialize(true); EXPECT_TRUE(interceptor.initCalled); EXPECT_TRUE(interceptor.targetCapsArgIn.supportsLocalMemory); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryNotActiveWhenInitializeIsCalledThenDebuggerFunctionIsNotCalled) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(false); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); MockSourceLevelDebugger debugger; debugger.initialize(false); EXPECT_FALSE(interceptor.initCalled); } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryActiveWhenDeviceIsConstructedThenDebuggerIsInitialized) { DebuggerLibraryRestorer restorer; if (defaultHwInfo->capabilityTable.debuggerSupported) { DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_TRUE(interceptor.initCalled); } } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryActiveWhenDeviceImplIsCreatedThenDebuggerIsNotified) { DebuggerLibraryRestorer restorer; if (defaultHwInfo->capabilityTable.debuggerSupported) { DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); unique_ptr device(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); unique_ptr pClDevice(new MockClDevice{device.get()}); EXPECT_TRUE(interceptor.newDeviceCalled); uint32_t deviceHandleExpected = device->getGpgpuCommandStreamReceiver().getOSInterface() != nullptr ? device->getGpgpuCommandStreamReceiver().getOSInterface()->getDeviceHandle() : 0; EXPECT_EQ(reinterpret_cast(static_cast(deviceHandleExpected)), interceptor.newDeviceArgIn.dh); pClDevice.reset(); device.release(); } } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryActiveWhenDeviceImplIsCreatedWithOsCsrThenDebuggerIsNotifiedWithCorrectDeviceHandle) { DebuggerLibraryRestorer restorer; if (defaultHwInfo->capabilityTable.debuggerSupported) { DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); VariableBackup backup(&ultHwConfig); ultHwConfig.useHwCsr = true; HardwareInfo *hwInfo = nullptr; ExecutionEnvironment *executionEnvironment = getExecutionEnvironmentImpl(hwInfo, 1); hwInfo->capabilityTable.instrumentationEnabled = true; unique_ptr device(Device::create(executionEnvironment, 0)); unique_ptr pClDevice(new MockClDevice{device.get()}); ASSERT_NE(nullptr, device->getGpgpuCommandStreamReceiver().getOSInterface()); EXPECT_TRUE(interceptor.newDeviceCalled); uint32_t deviceHandleExpected = device->getGpgpuCommandStreamReceiver().getOSInterface()->getDeviceHandle(); EXPECT_EQ(reinterpret_cast(static_cast(deviceHandleExpected)), interceptor.newDeviceArgIn.dh); device.release(); } } TEST(SourceLevelDebugger, givenKernelDebuggerLibraryNotActiveWhenDeviceIsCreatedThenDebuggerIsNotCreatedInitializedAndNotNotified) { DebuggerLibraryRestorer restorer; DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(false); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); EXPECT_EQ(nullptr, device->getDebugger()); EXPECT_FALSE(interceptor.initCalled); EXPECT_FALSE(interceptor.newDeviceCalled); } TEST(SourceLevelDebugger, givenTwoRootDevicesWhenSecondIsCreatedThenCreatingNewSourceLevelDebugger) { DebuggerLibraryRestorer restorer; if (defaultHwInfo->capabilityTable.debuggerSupported) { DebuggerLibraryInterceptor interceptor; DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); DebuggerLibrary::injectDebuggerLibraryInterceptor(&interceptor); ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(2); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } auto device1 = std::make_unique(Device::create(executionEnvironment, 0u)); EXPECT_NE(nullptr, executionEnvironment->memoryManager); EXPECT_TRUE(interceptor.initCalled); interceptor.initCalled = false; auto device2 = std::make_unique(Device::create(executionEnvironment, 1u)); EXPECT_NE(nullptr, executionEnvironment->memoryManager); EXPECT_TRUE(interceptor.initCalled); } } TEST(SourceLevelDebugger, givenMultipleRootDevicesWhenTheyAreCreatedTheyUseDedicatedSourceLevelDebugger) { DebuggerLibraryRestorer restorer; if (defaultHwInfo->capabilityTable.debuggerSupported) { DebuggerLibrary::setLibraryAvailable(true); DebuggerLibrary::setDebuggerActive(true); ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(2); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get()); } auto device1 = std::make_unique(Device::create(executionEnvironment, 0u)); auto sourceLevelDebugger = device1->getDebugger(); auto device2 = std::make_unique(Device::create(executionEnvironment, 1u)); EXPECT_NE(sourceLevelDebugger, device2->getDebugger()); } } compute-runtime-20.13.16352/opencl/test/unit_test/test_dynamic_lib/000077500000000000000000000000001363734646600251435ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/test_dynamic_lib/CMakeLists.txt000066400000000000000000000007101363734646600277010ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # project(test_dynamic_lib) if(WIN32) set(OS_SUFFIX win) else() set(OS_SUFFIX lin) endif() add_library(test_dynamic_lib SHARED test_dynamic_lib_${OS_SUFFIX}.cpp) create_project_source_tree(test_dynamic_lib) set_target_properties(test_dynamic_lib PROPERTIES FOLDER "test mocks") set_property(TARGET test_dynamic_lib APPEND_STRING PROPERTY COMPILE_FLAGS ${ASAN_FLAGS}) compute-runtime-20.13.16352/opencl/test/unit_test/test_dynamic_lib/test_dynamic_lib_lin.cpp000066400000000000000000000002571363734646600320260ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ extern "C" __attribute__((visibility("default"))) void testDynamicLibraryFunc() { } compute-runtime-20.13.16352/opencl/test/unit_test/test_dynamic_lib/test_dynamic_lib_win.cpp000066400000000000000000000002361363734646600320360ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ extern "C" __declspec(dllexport) void testDynamicLibraryFunc() { } compute-runtime-20.13.16352/opencl/test/unit_test/test_files/000077500000000000000000000000001363734646600237735ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/test_files/7206969092167061981.cl000066400000000000000000000450201363734646600263440ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void fullCopy(__global const uint *src, __global uint *dst) { unsigned int gid = get_global_id(0); uint4 loaded = vload4(gid, src); vstore4(loaded, gid, dst); } __kernel void CopyBufferToBufferBytes( const __global uchar *pSrc, __global uchar *pDst, uint srcOffsetInBytes, uint dstOffsetInBytes, uint bytesToRead) { pSrc += (srcOffsetInBytes + get_global_id(0)); pDst += (dstOffsetInBytes + get_global_id(0)); pDst[0] = pSrc[0]; } __kernel void CopyBufferToBufferLeftLeftover( const __global uchar *pSrc, __global uchar *pDst, uint srcOffsetInBytes, uint dstOffsetInBytes) { unsigned int gid = get_global_id(0); pDst[gid + dstOffsetInBytes] = pSrc[gid + srcOffsetInBytes]; } __kernel void CopyBufferToBufferMiddle( const __global uint *pSrc, __global uint *pDst, uint srcOffsetInBytes, uint dstOffsetInBytes) { unsigned int gid = get_global_id(0); pDst += dstOffsetInBytes >> 2; pSrc += srcOffsetInBytes >> 2; uint4 loaded = vload4(gid, pSrc); vstore4(loaded, gid, pDst); } __kernel void CopyBufferToBufferRightLeftover( const __global uchar *pSrc, __global uchar *pDst, uint srcOffsetInBytes, uint dstOffsetInBytes) { unsigned int gid = get_global_id(0); pDst[gid + dstOffsetInBytes] = pSrc[gid + srcOffsetInBytes]; } // assumption is local work size = pattern size __kernel void FillBufferBytes( __global uchar *pDst, uint dstOffsetInBytes, const __global uchar *pPattern) { uint dstIndex = get_global_id(0) + dstOffsetInBytes; uint srcIndex = get_local_id(0); pDst[dstIndex] = pPattern[srcIndex]; } __kernel void FillBufferLeftLeftover( __global uchar *pDst, uint dstOffsetInBytes, const __global uchar *pPattern, const uint patternSizeInEls) { uint gid = get_global_id(0); pDst[gid + dstOffsetInBytes] = pPattern[gid & (patternSizeInEls - 1)]; } __kernel void FillBufferMiddle( __global uchar *pDst, uint dstOffsetInBytes, const __global uint *pPattern, const uint patternSizeInEls) { uint gid = get_global_id(0); ((__global uint *)(pDst + dstOffsetInBytes))[gid] = pPattern[gid & (patternSizeInEls - 1)]; } __kernel void FillBufferRightLeftover( __global uchar *pDst, uint dstOffsetInBytes, const __global uchar *pPattern, const uint patternSizeInEls) { uint gid = get_global_id(0); pDst[gid + dstOffsetInBytes] = pPattern[gid & (patternSizeInEls - 1)]; } __kernel void FillImage1d( __write_only image1d_t output, uint4 color, int4 dstOffset) { const int x = get_global_id(0); const int dstCoord = x + dstOffset.x; write_imageui(output, dstCoord, color); } __kernel void FillImage2d( __write_only image2d_t output, uint4 color, int4 dstOffset) { const int x = get_global_id(0); const int y = get_global_id(1); const int2 dstCoord = (int2)(x, y) + (int2)(dstOffset.x, dstOffset.y); write_imageui(output, dstCoord, color); } #pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable __kernel void FillImage3d( __write_only image3d_t output, uint4 color, int4 dstOffset) { const int x = get_global_id(0); const int y = get_global_id(1); const int z = get_global_id(2); const int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; write_imageui(output, dstCoord, color); } __kernel void CopyImageToImage1d( __read_only image1d_t input, __write_only image1d_t output, int4 srcOffset, int4 dstOffset) { const int x = get_global_id(0); const int srcCoord = x + srcOffset.x; const int dstCoord = x + dstOffset.x; const uint4 c = read_imageui(input, srcCoord); write_imageui(output, dstCoord, c); } __kernel void CopyImageToImage2d( __read_only image2d_t input, __write_only image2d_t output, int4 srcOffset, int4 dstOffset) { const int x = get_global_id(0); const int y = get_global_id(1); const int2 srcCoord = (int2)(x, y) + (int2)(srcOffset.x, srcOffset.y); const int2 dstCoord = (int2)(x, y) + (int2)(dstOffset.x, dstOffset.y); const uint4 c = read_imageui(input, srcCoord); write_imageui(output, dstCoord, c); } #pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable __kernel void CopyImageToImage3d( __read_only image3d_t input, __write_only image3d_t output, int4 srcOffset, int4 dstOffset) { const int x = get_global_id(0); const int y = get_global_id(1); const int z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; const int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; const uint4 c = read_imageui(input, srcCoord); write_imageui(output, dstCoord, c); } ////////////////////////////////////////////////////////////////////////////// __kernel void CopyBufferRectBytes2d( __global const char *src, __global char *dst, uint4 SrcOrigin, uint4 DstOrigin, uint2 SrcPitch, uint2 DstPitch) { int x = get_global_id(0); int y = get_global_id(1); uint LSrcOffset = x + SrcOrigin.x + ((y + SrcOrigin.y) * SrcPitch.x); uint LDstOffset = x + DstOrigin.x + ((y + DstOrigin.y) * DstPitch.x); *(dst + LDstOffset) = *(src + LSrcOffset); } ////////////////////////////////////////////////////////////////////////////// __kernel void CopyBufferRectBytes3d( __global const char *src, __global char *dst, uint4 SrcOrigin, uint4 DstOrigin, uint2 SrcPitch, uint2 DstPitch) { int x = get_global_id(0); int y = get_global_id(1); int z = get_global_id(2); uint LSrcOffset = x + SrcOrigin.x + ((y + SrcOrigin.y) * SrcPitch.x) + ((z + SrcOrigin.z) * SrcPitch.y); uint LDstOffset = x + DstOrigin.x + ((y + DstOrigin.y) * DstPitch.x) + ((z + DstOrigin.z) * DstPitch.y); *(dst + LDstOffset) = *(src + LSrcOffset); } #pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable __kernel void CopyBufferToImage3dBytes(__global uchar *src, __write_only image3d_t output, int srcOffset, int4 dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y); write_imageui(output, dstCoord, (uint4)(*(src + LOffset + x), 0, 0, 1)); } __kernel void CopyBufferToImage3d2Bytes(__global uchar *src, __write_only image3d_t output, int srcOffset, int4 dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = (uint4)(0, 0, 0, 1); if ((ulong)(src + srcOffset) & 0x00000001) { ushort upper = *((__global uchar *)(src + LOffset + x * 2 + 1)); ushort lower = *((__global uchar *)(src + LOffset + x * 2)); ushort combined = (upper << 8) | lower; c.x = (uint)combined; } else { c.x = (uint)(*(__global ushort *)(src + LOffset + x * 2)); } write_imageui(output, dstCoord, c); } __kernel void CopyBufferToImage3d4Bytes(__global uchar *src, __write_only image3d_t output, int srcOffset, int4 dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = (uint4)(0, 0, 0, 1); if ((ulong)(src + srcOffset) & 0x00000003) { uint upper2 = *((__global uchar *)(src + LOffset + x * 4 + 3)); uint upper = *((__global uchar *)(src + LOffset + x * 4 + 2)); uint lower2 = *((__global uchar *)(src + LOffset + x * 4 + 1)); uint lower = *((__global uchar *)(src + LOffset + x * 4)); uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.x = combined; } else { c.x = (*(__global uint *)(src + LOffset + x * 4)); } write_imageui(output, dstCoord, c); } __kernel void CopyBufferToImage3d8Bytes(__global uchar *src, __write_only image3d_t output, int srcOffset, int4 dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y); uint2 c = (uint2)(0, 0); //*((__global uint2*)(src + LOffset + x * 8)); if ((ulong)(src + srcOffset) & 0x00000007) { uint upper2 = *((__global uchar *)(src + LOffset + x * 8 + 3)); uint upper = *((__global uchar *)(src + LOffset + x * 8 + 2)); uint lower2 = *((__global uchar *)(src + LOffset + x * 8 + 1)); uint lower = *((__global uchar *)(src + LOffset + x * 8)); uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.x = combined; upper2 = *((__global uchar *)(src + LOffset + x * 8 + 7)); upper = *((__global uchar *)(src + LOffset + x * 8 + 6)); lower2 = *((__global uchar *)(src + LOffset + x * 8 + 5)); lower = *((__global uchar *)(src + LOffset + x * 8 + 4)); combined = ((uint)upper2 << 24) | ((uint)upper << 16) | ((uint)lower2 << 8) | lower; c.y = combined; } else { c = *((__global uint2 *)(src + LOffset + x * 8)); } write_imageui(output, dstCoord, (uint4)(c.x, c.y, 0, 1)); } __kernel void CopyBufferToImage3d16Bytes(__global uchar *src, __write_only image3d_t output, int srcOffset, int4 dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = (uint4)(0, 0, 0, 0); if ((ulong)(src + srcOffset) & 0x0000000f) { uint upper2 = *((__global uchar *)(src + LOffset + x * 16 + 3)); uint upper = *((__global uchar *)(src + LOffset + x * 16 + 2)); uint lower2 = *((__global uchar *)(src + LOffset + x * 16 + 1)); uint lower = *((__global uchar *)(src + LOffset + x * 16)); uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.x = combined; upper2 = *((__global uchar *)(src + LOffset + x * 16 + 7)); upper = *((__global uchar *)(src + LOffset + x * 16 + 6)); lower2 = *((__global uchar *)(src + LOffset + x * 16 + 5)); lower = *((__global uchar *)(src + LOffset + x * 16 + 4)); combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.y = combined; upper2 = *((__global uchar *)(src + LOffset + x * 16 + 11)); upper = *((__global uchar *)(src + LOffset + x * 16 + 10)); lower2 = *((__global uchar *)(src + LOffset + x * 16 + 9)); lower = *((__global uchar *)(src + LOffset + x * 16 + 8)); combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.z = combined; upper2 = *((__global uchar *)(src + LOffset + x * 16 + 15)); upper = *((__global uchar *)(src + LOffset + x * 16 + 14)); lower2 = *((__global uchar *)(src + LOffset + x * 16 + 13)); lower = *((__global uchar *)(src + LOffset + x * 16 + 12)); combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.w = combined; } else { c = *((__global uint4 *)(src + LOffset + x * 16)); } write_imageui(output, dstCoord, c); } __kernel void CopyImage3dToBufferBytes(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, int dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = read_imageui(input, srcCoord); *(dst + DstOffset + x) = convert_uchar_sat(c.x); } __kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, int dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = read_imageui(input, srcCoord); if ((ulong)(dst + dstOffset) & 0x00000001) { *((__global uchar *)(dst + DstOffset + x * 2 + 1)) = convert_uchar_sat((c.x >> 8) & 0xff); *((__global uchar *)(dst + DstOffset + x * 2)) = convert_uchar_sat(c.x & 0xff); } else { *((__global ushort *)(dst + DstOffset + x * 2)) = convert_ushort_sat(c.x); } } __kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, int dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = read_imageui(input, srcCoord); if ((ulong)(dst + dstOffset) & 0x00000003) { *((__global uchar *)(dst + DstOffset + x * 4 + 3)) = convert_uchar_sat((c.x >> 24) & 0xff); *((__global uchar *)(dst + DstOffset + x * 4 + 2)) = convert_uchar_sat((c.x >> 16) & 0xff); *((__global uchar *)(dst + DstOffset + x * 4 + 1)) = convert_uchar_sat((c.x >> 8) & 0xff); *((__global uchar *)(dst + DstOffset + x * 4)) = convert_uchar_sat(c.x & 0xff); } else { *((__global uint *)(dst + DstOffset + x * 4)) = c.x; } } __kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, int dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = read_imageui(input, srcCoord); if ((ulong)(dst + dstOffset) & 0x00000007) { *((__global uchar *)(dst + DstOffset + x * 8 + 3)) = convert_uchar_sat((c.x >> 24) & 0xff); *((__global uchar *)(dst + DstOffset + x * 8 + 2)) = convert_uchar_sat((c.x >> 16) & 0xff); *((__global uchar *)(dst + DstOffset + x * 8 + 1)) = convert_uchar_sat((c.x >> 8) & 0xff); *((__global uchar *)(dst + DstOffset + x * 8)) = convert_uchar_sat(c.x & 0xff); *((__global uchar *)(dst + DstOffset + x * 8 + 7)) = convert_uchar_sat((c.y >> 24) & 0xff); *((__global uchar *)(dst + DstOffset + x * 8 + 6)) = convert_uchar_sat((c.y >> 16) & 0xff); *((__global uchar *)(dst + DstOffset + x * 8 + 5)) = convert_uchar_sat((c.y >> 8) & 0xff); *((__global uchar *)(dst + DstOffset + x * 8 + 4)) = convert_uchar_sat(c.y & 0xff); } else { uint2 d = (uint2)(c.x, c.y); *((__global uint2 *)(dst + DstOffset + x * 8)) = d; } } __kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, int dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); const uint4 c = read_imageui(input, srcCoord); if ((ulong)(dst + dstOffset) & 0x0000000f) { *((__global uchar *)(dst + DstOffset + x * 16 + 3)) = convert_uchar_sat((c.x >> 24) & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 2)) = convert_uchar_sat((c.x >> 16) & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 1)) = convert_uchar_sat((c.x >> 8) & 0xff); *((__global uchar *)(dst + DstOffset + x * 16)) = convert_uchar_sat(c.x & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 7)) = convert_uchar_sat((c.y >> 24) & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 6)) = convert_uchar_sat((c.y >> 16) & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 5)) = convert_uchar_sat((c.y >> 8) & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 4)) = convert_uchar_sat(c.y & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 11)) = convert_uchar_sat((c.z >> 24) & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 10)) = convert_uchar_sat((c.z >> 16) & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 9)) = convert_uchar_sat((c.z >> 8) & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 8)) = convert_uchar_sat(c.z & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 15)) = convert_uchar_sat((c.w >> 24) & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 14)) = convert_uchar_sat((c.w >> 16) & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 13)) = convert_uchar_sat((c.w >> 8) & 0xff); *((__global uchar *)(dst + DstOffset + x * 16 + 12)) = convert_uchar_sat(c.w & 0xff); } else { *(__global uint4 *)(dst + DstOffset + x * 16) = c; } } compute-runtime-20.13.16352/opencl/test/unit_test/test_files/7206969092167061981_options.txt000066400000000000000000000001541363734646600303370ustar00rootroot00000000000000/* * Copyright (C) 2019 Intel Corporation * * SPDX-License-Identifier: MIT * */ -cl-fast-relaxed-math compute-runtime-20.13.16352/opencl/test/unit_test/test_files/CopyBuffer_simd16.cl000066400000000000000000000004441363734646600275440ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ __attribute__((intel_reqd_sub_group_size(16))) __kernel void CopyBuffer( __global unsigned int* src, __global unsigned int* dst ) { int id = (int)get_global_id(0); dst[id] = src[id]; } compute-runtime-20.13.16352/opencl/test/unit_test/test_files/CopyBuffer_simd32.cl000066400000000000000000000004441363734646600275420ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ __attribute__((intel_reqd_sub_group_size(32))) __kernel void CopyBuffer( __global unsigned int* src, __global unsigned int* dst ) { int id = (int)get_global_id(0); dst[id] = src[id]; } compute-runtime-20.13.16352/opencl/test/unit_test/test_files/CopyBuffer_simd8.cl000066400000000000000000000004431363734646600274640ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ __attribute__((intel_reqd_sub_group_size(8))) __kernel void CopyBuffer( __global unsigned int* src, __global unsigned int* dst ) { int id = (int)get_global_id(0); dst[id] = src[id]; } compute-runtime-20.13.16352/opencl/test/unit_test/test_files/binary_with_zeroes000066400000000000000000000001001363734646600276130ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/test_files/copybuffer.cl000066400000000000000000000004211363734646600264540ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void CopyBuffer(__global unsigned int *src, __global unsigned int *dst) { int id = (int)get_global_id(0); dst[id] = lgamma((float)src[id]); dst[id] = src[id]; } compute-runtime-20.13.16352/opencl/test/unit_test/test_files/copybuffer_with_header.cl000066400000000000000000000004171363734646600310240ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "simple_header.h" __kernel void CopyBuffer( __global unsigned int *src, __global unsigned int *dst) { int id = (int)get_global_id(0); dst[id] = src[id]; } compute-runtime-20.13.16352/opencl/test/unit_test/test_files/devices/000077500000000000000000000000001363734646600254155ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/test_files/devices/config000066400000000000000000000001001363734646600265740ustar00rootroot00000000000000ÌÌ4ÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌcompute-runtime-20.13.16352/opencl/test/unit_test/test_files/devices/drm/000077500000000000000000000000001363734646600261775ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/test_files/devices/drm/card0/000077500000000000000000000000001363734646600271705ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/test_files/devices/drm/card0/gt_max_freq_mhz000066400000000000000000000000041363734646600322570ustar00rootroot000000000000001000compute-runtime-20.13.16352/opencl/test/unit_test/test_files/emptykernel.cl000066400000000000000000000002341363734646600266510ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ /* No character after the last "}" */ __kernel void empty() { }compute-runtime-20.13.16352/opencl/test/unit_test/test_files/igdrcl.config000066400000000000000000000101001363734646600264160ustar00rootroot00000000000000FlushAllCaches = 0 MakeEachEnqueueBlocking = 0 EnableNullHardware = 0 DoCpuCopyOnReadBuffer = -1 DoCpuCopyOnWriteBuffer = -1 DisableResourceRecycling = 0 PrintDebugSettings = 0 PrintDebugMessages = 0 DumpKernels = 0 DumpKernelArgs = 0 LogApiCalls = 0 LogPatchTokens = 0 LogTaskCounts = 0 LogAlignedAllocations = 0 LogAllocationMemoryPool = 0 LogMemoryObject = 0 ForceLinearImages = 0 ForceSLML3Config = 0 SetCommandStreamReceiver = -1 ForceOCLVersion = 0 Force32bitAddressing = 0 EnableVaLibCalls = 1 EnableExtendedVaFormats = 0 EnableNV12 = 1 EnablePackedYuv = 1 EnableIntelVme = -1 EnableAdvancedIntelVme = -1 DisableStatelessToStatefulOptimization = 0 ForceDispatchScheduler = 0 PrintEMDebugInformation = 0 ForceDeviceId = unk SchedulerSimulationReturnInstance = 0 DisableConcurrentBlockExecution = 0 ResidencyDebugEnable = 0 ForcePreemptionMode = -1 EnableStatelessToStatefulBufferOffsetOpt = -1 TbxPort = 4321 TbxServer = 127.0.0.1 EnableDeferredDeleter = 1 EnableAsyncDestroyAllocations = 1 EnableAsyncEventsHandler = 1 EnableForcePin = 1 CsrDispatchMode = 0 OverrideDefaultFP64Settings = -1 OverrideEnableKmdNotify = -1 OverrideKmdNotifyDelayMs = -1 OverrideEnableQuickKmdSleep = -1 OverrideQuickKmdSleepDelayMicroseconds = -1 OverrideEnableQuickKmdSleepForSporadicWaits = -1 OverrideDelayQuickKmdSleepForSporadicWaitsMicroseconds = -1 Enable64kbpages = -1 NodeOrdinal = -1 ProductFamilyOverride = unk HardwareInfoOverride = default EnableDebugBreak = 1 EnableComputeWorkSizeND = 1 EventsDebugEnable = 0 EventsTrackerEnable = 0 UseMaxSimdSizeToDeduceMaxWorkgroupSize = 0 EnableComputeWorkSizeSquared = 0 TrackParentEvents = 0 PrintLWSSizes = 0 UseNoRingFlushesKmdMode = 1 OverrideThreadArbitrationPolicy = -1 PrintDriverDiagnostics = -1 FlattenBatchBufferForAUBDump = 0 PrintDispatchParameters = 0 AddPatchInfoCommentsForAUBDump = 0 DisableZeroCopyForUseHostPtr = 0 SchedulerGWS = 0 DisableZeroCopyForBuffers = 0 OverrideAubDeviceId = -1 ForceCompilerUsePlatform = unk ForceCsrFlushing = 0 ForceCsrReprogramming = 0 OmitTimestampPacketDependencies = 0 AUBDumpBufferFormat = unk AUBDumpImageFormat = unk AUBDumpCaptureFileName = unk AUBDumpSubCaptureMode = 0 AUBDumpToggleFileName = unk AUBDumpToggleCaptureOnOff = 0 AUBDumpFilterKernelName = unk AUBDumpFilterNamedKernelStartIdx = 0 AUBDumpFilterNamedKernelEndIdx = -1 AUBDumpFilterKernelStartIdx = 0 AUBDumpFilterKernelEndIdx = -1 RebuildPrecompiledKernels = 0 CreateMultipleRootDevices = 0 CreateMultipleSubDevices = 0 EnableExperimentalCommandBuffer = 0 LoopAtPlatformInitialize = 0 EnableTimestampPacket = -1 ReturnRawGpuTimestamps = 0 DoNotRegisterTrimCallback = 0 OverrideGdiPath = unk AddClGlSharing = 0 EnablePassInlineData = -1 LimitAmountOfReturnedDevices = 0 EnableLocalMemory = -1 UseAubStream = 1 AubDumpOverrideMmioRegister = 0 AubDumpOverrideMmioRegisterValue = 0 PowerSavingMode = 0 AubDumpAddMmioRegistersList = unk RenderCompressedImagesEnabled = -1 RenderCompressedBuffersEnabled = -1 AUBDumpAllocsOnEnqueueReadOnly = 0 AUBDumpForceAllToLocalMemory = 0 EnableCacheFlushAfterWalker = -1 EnableHostPtrTracking = -1 DisableDcFlushInEpilogue = 0 OverrideInvalidEngineWithDefault = 0 EnableFormatQuery = 0 EnableBlitterOperationsSupport = -1 EnableBlitterOperationsForReadWriteBuffers = -1 DisableAuxTranslation = 0 ForceAuxTranslationMode = -1 EnableFreeMemory = 0 OverrideStatelessMocsIndex = -1 CFEFusedEUDispatch = -1 AllocateSharedAllocationsWithCpuAndGpuStorage = -1 EnableSharedSystemUsmSupport = -1 ForcePerDssBackedBufferProgramming = 0 ForceSamplerLowFilteringPrecision = 0 UseBindlessBuffers = 0 UseBindlessImages = 0 PrintProgramBinaryProcessingTime = 0 OverrideGpuAddressSpace = -1 OverrideMaxWorkgroupSize = -1 DisableTimestampPacketOptimizations = 0 MakeAllBuffersResident = 0 EnableDirectSubmission = -1 DirectSubmissionBufferPlacement = -1 DirectSubmissionSemaphorePlacement = -1 DirectSubmissionDisableCpuCacheFlush = -1 WddmResidencyLogger = 0 DirectSubmissionEnableDebugBuffer = 0 DirectSubmissionDisableCacheFlush = 0 DirectSubmissionDisableMonitorFence = 0 ForceFineGrainedSVMSupport = -1 DirectSubmissionBufferAddressing = -1 DirectSubmissionSemaphoreAddressing = -1compute-runtime-20.13.16352/opencl/test/unit_test/test_files/igdrcl_string.config000066400000000000000000000000511363734646600300100ustar00rootroot00000000000000StringTestKey = TestValue IntTestKey = 1 compute-runtime-20.13.16352/opencl/test/unit_test/test_files/kernel_data_param.cl000066400000000000000000000005461363734646600277510ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void test(__global int *dst) { int tid = get_global_id(0); int n = get_global_size(0); dst[tid] = n; }; __kernel void test_get_local_size(__global int *dst) { int tid = get_global_id(0); int n = get_local_size(0); dst[tid] = n; }; compute-runtime-20.13.16352/opencl/test/unit_test/test_files/kernel_num_args.cl000066400000000000000000000003111363734646600274610ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void test(__global float *argGlobal, __read_only image3d_t argImg3D, __constant float *argConst) { } compute-runtime-20.13.16352/opencl/test/unit_test/test_files/media_kernels_backend.cl000066400000000000000000001164051363734646600305730ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ // VME KERNELS __kernel __attribute__((reqd_work_group_size(16, 1, 1))) void block_motion_estimate_intel(sampler_t accelerator, __read_only image2d_t srcImg, __read_only image2d_t refImg, __global short2 *prediction_motion_vector_buffer, __global short2 *motion_vector_buffer, __global ushort *residuals, int height, int width, int stride) { __local uint dst[64]; __local ushort *dist = (__local ushort *)&dst[8 * 5]; int sid_0 = stride * get_group_id(0); int gid_0 = sid_0 / height; int gid_1 = sid_0 % height; for (int sid = sid_0; sid < sid_0 + stride && gid_0 < width && gid_1 < height; sid++, gid_0 = sid / height, gid_1 = sid % height) { int2 srcCoord = 0; int2 refCoord = 0; srcCoord.x = gid_0 * 16 + get_global_offset(0); srcCoord.y = gid_1 * 16 + get_global_offset(1); short2 predMV = 0; #ifndef HW_NULL_CHECK if (prediction_motion_vector_buffer != NULL) #endif { predMV = prediction_motion_vector_buffer[gid_0 + gid_1 * width]; refCoord.x = predMV.x / 4; refCoord.y = predMV.y / 4; refCoord.y = refCoord.y & 0xFFFE; } { intel_work_group_vme_mb_query(dst, srcCoord, refCoord, srcImg, refImg, accelerator); } barrier(CLK_LOCAL_MEM_FENCE); // Write Out Result // 4x4 if (intel_get_accelerator_mb_block_type(accelerator) == 0x2) { int x = get_local_id(0) % 4; int y = get_local_id(0) / 4; int index = (gid_0 * 4 + x) + (gid_1 * 4 + y) * width * 4; short2 val = as_short2(dst[8 + (y * 4 + x) * 2]); motion_vector_buffer[index] = val; #ifndef HW_NULL_CHECK if (residuals != NULL) #endif { residuals[index] = dist[y * 4 + x]; } } // 8x8 if (intel_get_accelerator_mb_block_type(accelerator) == 0x1) { if (get_local_id(0) < 4) { int x = get_local_id(0) % 2; int y = get_local_id(0) / 2; int index = (gid_0 * 2 + x) + (gid_1 * 2 + y) * width * 2; short2 val = as_short2(dst[8 + (y * 2 + x) * 8]); motion_vector_buffer[index] = val; #ifndef HW_NULL_CHECK if (residuals != NULL) #endif { residuals[index] = dist[(y * 2 + x) * 4]; } } } // 16x16 if (intel_get_accelerator_mb_block_type(accelerator) == 0x0) { if (get_local_id(0) == 0) { int index = gid_0 + gid_1 * width; short2 val = as_short2(dst[8]); motion_vector_buffer[index] = val; #ifndef HW_NULL_CHECK if (residuals != NULL) #endif { residuals[index] = dist[0]; } } } } } __kernel __attribute__((reqd_work_group_size(16, 1, 1))) void block_advanced_motion_estimate_check_intel( sampler_t accelerator, __read_only image2d_t srcImg, __read_only image2d_t refImg, uint flags, uint skip_block_type, uint search_cost_penalty, uint search_cost_precision, __global short2 *count_motion_vector_buffer, __global short2 *predictors_buffer, __global short2 *skip_motion_vector_buffer, __global short2 *motion_vector_buffer, __global char *intra_search_predictor_modes, __global ushort *residuals, __global ushort *skip_residuals, __global ushort *intra_residuals, __read_only image2d_t intraSrcImg, int height, int width, int stride) { __local uint dstSearch[64]; // 8 GRFs __local uint dstSkipIntra[64 + 24]; // 11 GRFs (8 for inter, 3 for intra) __local ushort *distSearch = (__local ushort *)&dstSearch[8 * 5]; // distortion in the 6th GRF // Initialize the MV cost table: // MV Cost in U4U4 format: // No cost : 0, 0, 0, 0, 0, 0, 0, 0 // Low Cost : 1, 4, 5, 9, 10, 12, 14, 15 // Normal Cost: 5, 26, 29, 43, 45, 47, 57, 57 // High Cost : 29, 61, 72, 78, 88, 89, 91, 92 uint2 MVCostTable; if (search_cost_penalty == 1) { MVCostTable.s0 = 0x09050401; MVCostTable.s1 = 0x0F0E0C0A; } else if (search_cost_penalty == 2) { MVCostTable.s0 = 0x2B1D1A05; MVCostTable.s1 = 0x39392F2D; } else if (search_cost_penalty == 3) { MVCostTable.s0 = 0x4E483D1D; MVCostTable.s1 = 0x5C5B5958; } else { MVCostTable.s0 = 0; MVCostTable.s1 = 0; } uint MVCostPrecision = ((uint)search_cost_precision) << 16; // Frame is divided into rows * columns of MBs. // One h/w thread per WG. // One WG processes 'row' MBs - one row per iteration and one MB per row. // Number of WGs (or h/w threads) is number of columns MBs // Each iteration processes the MB in a row - gid_0 is the MB id in a row and // gid_1 is the row offset. int sid_0 = stride * get_group_id(0); int gid_0 = sid_0 / height; int gid_1 = sid_0 % height; for (int sid = sid_0; sid < sid_0 + stride && gid_0 < width && gid_1 < height; sid++, gid_0 = sid / height, gid_1 = sid % height) { int2 srcCoord; srcCoord.x = gid_0 * 16 + get_global_offset(0); // 16 pixels wide MBs (globally scalar) srcCoord.y = gid_1 * 16 + get_global_offset(1); // 16 pixels tall MBs (globally scalar) uint curMB = gid_0 + gid_1 * width; // current MB id short2 count = count_motion_vector_buffer[curMB]; int countPredMVs = count.x; if (countPredMVs != 0) { uint offset = curMB * 8; // 8 predictors per MB offset += get_local_id(0) % 8; // 16 work-items access 8 MVs for MB // one predictor for MB per SIMD channel // Reduce predictors from Q-pixel to integer precision. int2 predMV = 0; if (get_local_id(0) < countPredMVs) { predMV = convert_int2(predictors_buffer[offset]); // one MV per work-item predMV.x /= 4; predMV.y /= 4; predMV.y &= 0xFFFE; } // Do up to 8 IMEs, get the best MVs and their distortions, and optionally // a FBR of the best MVs. // Finally the results are written out to SLM. intel_work_group_vme_mb_multi_query_8( dstSearch, // best search MV and its distortions into SLM countPredMVs, // count of predictor MVs (globally scalar - value range // 1 to 8) MVCostPrecision, // MV cost precision MVCostTable, // MV cost table srcCoord, // MB 2-D offset (globally scalar) predMV, // predictor MVs (up to 8 distinct MVs for SIMD16 thread) srcImg, // source refImg, // reference accelerator); // vme object } int doIntra = (flags & 0x2) != 0; int intraEdges = 0; if (doIntra) { // Enable all edges by default. intraEdges = 0x3C; // If this is a left-edge MB, then disable left edges. if ((gid_0 == 0) & (get_global_offset(0) == 0)) { intraEdges &= 0x14; } // If this is a right edge MB then disable right edges. if (gid_0 == width - 1) { intraEdges &= 0x38; } // If this is a top-edge MB, then disable top edges. if ((gid_1 == 0) & (get_global_offset(1) == 0)) { intraEdges &= 0x20; } // Set bit6=bit5. intraEdges |= ((intraEdges & 0x20) << 1); intraEdges <<= 8; } int countSkipMVs = count.y; if (countSkipMVs != 0 || doIntra == true) { uint offset = curMB * 8; // 8 sets of skip check MVs per MB offset += (get_local_id(0) % 8); // 16 work-items access 8 sets of MVs for MB // one set of skip MV per SIMD channel // Do up to 8 skip checks and get the distortions for each of them. // Finally the results are written out to SLM. if ((skip_block_type == 0x0) | ((doIntra) & (countSkipMVs == 0))) { int skipMVs = 0; if (get_local_id(0) < countSkipMVs) { __global int *skip1_motion_vector_buffer = (__global int *)skip_motion_vector_buffer; skipMVs = skip1_motion_vector_buffer[offset]; // one packed MV for one // work-item } intel_work_group_vme_mb_multi_check_16x16( dstSkipIntra, // distortions into SLM countSkipMVs, // count of skip check MVs (value range 0 to 8) doIntra, // compute intra modes intraEdges, // intra edges to use srcCoord, // MB 2-D offset (globally scalar) skipMVs, // skip check MVs (up to 8 sets of skip check MVs for // SIMD16 thread) srcImg, // source refImg, // reference intraSrcImg, // intra source accelerator); } if ((skip_block_type == 0x1) & (countSkipMVs > 0)) { int4 skipMVs = 0; if (get_local_id(0) < countSkipMVs) { __global int4 *skip4_motion_vector_buffer = (__global int4 *)(skip_motion_vector_buffer); skipMVs = skip4_motion_vector_buffer[offset]; // four component MVs // per work-item } intel_work_group_vme_mb_multi_check_8x8( dstSkipIntra, // distortions into SLM countSkipMVs, // count of skip check MVs per MB (value range 0 to 8) doIntra, // compute intra modes intraEdges, // intra edges to use srcCoord, // MB 2-D offset (globally scalar) skipMVs, // skip check MVs (up to 8 ets of skip check MVs for SIMD16 // thread) srcImg, // source refImg, // reference intraSrcImg, // intra source accelerator); } } barrier(CLK_LOCAL_MEM_FENCE); // Write Out motion estimation result: // Result format // Hierarchical row-major layout // i.e. row-major of blocks MVs in MBs, and row-major of 8 sets of // MVs/distortion in blocks if (countPredMVs != 0) { // 4x4 if (intel_get_accelerator_mb_block_type(accelerator) == 0x2) { int index = (gid_0 * 16 + get_local_id(0)) + (gid_1 * 16 * width); // 1. 16 work-items enabled. // 2. Work-items gather fwd MVs in strided dword locations 0, 2, .., 30 // (interleaved // fwd/bdw MVs) with constant offset 8 (control data size) from SLM // into contiguous // short2 locations 0, 1, .., 15 of global buffer // search_motion_vector_buffer with // offset index. // 3. Work-items gather contiguous ushort locations 0, 1, .., 15 from // distSearch into // contiguous ushort locations 0, 1, .., 15 of search_residuals with // offset index. short2 val = as_short2(dstSearch[8 + get_local_id(0) * 2]); motion_vector_buffer[index] = val; #ifndef HW_NULL_CHECK if (residuals != NULL) #endif { residuals[index] = distSearch[get_local_id(0)]; } } // 8x8 else if (intel_get_accelerator_mb_block_type(accelerator) == 0x1) { // Only 1st 4 work-item are needed. if (get_local_id(0) < 4) { int index = (gid_0 * 4 + get_local_id(0)) + (gid_1 * 4 * width); // 1. 4 work-items enabled. // 2. Work-items gather fw MVs in strided dword locations 0, 8, 16, 24 // (interleaved // fwd/bdw MVs) with constant offset 8 from SLM into contiguous // short2 locations // 0, 1, .., 15 of global buffer search_motion_vector_buffer with // offset index. // 3. Work-items gather strided ushort locations 0, 4, 8, 12 from // distSearch into // contiguous ushort locations 0, 1, .., 15 of search_residuals // with offset index. short2 val = as_short2(dstSearch[8 + get_local_id(0) * 4 * 2]); motion_vector_buffer[index] = val; #ifndef HW_NULL_CHECK if (residuals != NULL) #endif { residuals[index] = distSearch[get_local_id(0) * 4]; } } } // 16x16 else if (intel_get_accelerator_mb_block_type(accelerator) == 0x0) { // One 1st work is needed. if (get_local_id(0) == 0) { int index = gid_0 + gid_1 * width; // 1. 1 work-item enabled. // 2. Work-item gathers fwd MV in dword location 0 with constant // offset 8 from // SLM into short2 locations 0 of global buffer // search_motion_vector_buffer. // 3. Work-item gathers ushort location 0 from distSearch into ushort // location 0 of search_residuals with offset index. short2 val = as_short2(dstSearch[8]); motion_vector_buffer[index] = val; #ifndef HW_NULL_CHECK if (residuals != NULL) #endif { residuals[index] = distSearch[0]; } } } } // Write out motion skip check result: // Result format // Hierarchical row-major layout // i.e. row-major of blocks in MBs, and row-major of 8 sets of // distortions in blocks if (countSkipMVs != 0) { if (skip_block_type == 0x0) { // Copy out 8 (1 component) sets of distortion values. int index = (gid_0 * 8) + (get_local_id(0)) + (gid_1 * 8 * width); if (get_local_id(0) < countSkipMVs) { __local ushort *distSkip = (__local ushort *)&dstSkipIntra[0]; // 1. Up to 8 work-items are enabled. // 2. The work-item gathers distSkip locations 0, 16*1, .., 16*7 and // copies them to contiguous skip_residual locations 0, 1, 2, .., // 7. skip_residuals[index] = distSkip[get_local_id(0) * 16]; } } else { // Copy out 8 (4 component) sets of distortion values. int index = (gid_0 * 8 * 4) + (get_local_id(0)) + (gid_1 * 8 * 4 * width); __local ushort *distSkip = (__local ushort *)&dstSkipIntra[0]; if (get_local_id(0) < countSkipMVs * 4) { // 1. Up to 16 work-items are enabled. // 2. The work-item gathers distSkip locations 0, 4*1, .., 4*31 and // copies them to contiguous skip_residual locations 0, 1, 2, .., // 31. skip_residuals[index] = distSkip[get_local_id(0) * 4]; skip_residuals[index + 16] = distSkip[(get_local_id(0) + 16) * 4]; } } } // Write out intra search result: if (doIntra) { int index_low = (gid_0 * 22) + (get_local_id(0) * 2) + (gid_1 * 22 * width); int index_high = (gid_0 * 22) + (get_local_id(0) * 2) + 1 + (gid_1 * 22 * width); // Write out the 4x4 intra modes if (get_local_id(0) < 8) { __local char *dstIntra_4x4 = (__local char *)(&dstSkipIntra[64 + 16 + 4]); char value = dstIntra_4x4[get_local_id(0)]; char value_low = (value)&0xf; char value_high = (value >> 4) & 0xf; intra_search_predictor_modes[index_low + 5] = value_low; intra_search_predictor_modes[index_high + 5] = value_high; } // Write out the 8x8 intra modes if (get_local_id(0) < 4) { __local char *dstIntra_8x8 = (__local char *)(&dstSkipIntra[64 + 8 + 4]); char value = dstIntra_8x8[get_local_id(0) * 2]; char value_low = (value)&0xf; int index = (gid_0 * 22) + (get_local_id(0)) + (gid_1 * 22 * width); intra_search_predictor_modes[index + 1] = value_low; } // Write out the 16x16 intra modes if (get_local_id(0) < 1) { __local char *dstIntra_16x16 = (__local char *)(&dstSkipIntra[64 + 0 + 4]); char value = dstIntra_16x16[get_local_id(0)]; char value_low = (value)&0xf; intra_search_predictor_modes[index_low] = value_low; } // Get the intra residuals. #ifndef HW_NULL_CHECK if (intra_residuals != NULL) #endif { int index = (gid_0 * 4) + (gid_1 * 4 * width); if (get_local_id(0) < 1) { __local ushort *distIntra_4x4 = (__local ushort *)(&dstSkipIntra[64 + 16 + 3]); __local ushort *distIntra_8x8 = (__local ushort *)(&dstSkipIntra[64 + 8 + 3]); __local ushort *distIntra_16x16 = (__local ushort *)(&dstSkipIntra[64 + 0 + 3]); intra_residuals[index + 2] = distIntra_4x4[0]; intra_residuals[index + 1] = distIntra_8x8[0]; intra_residuals[index + 0] = distIntra_16x16[0]; } } } } } /************************************************************************************************* Built-in kernel: block_advanced_motion_estimate_bidirectional_check_intel Description: 1. Do motion estimation with 0 to 4 predictor MVs using 0 to 4 (integer motion estimation) IMEs per macro-block, calculating the best search MVs per specified (16x16, 8x8, 4x4) luma block with lowest distortion from amongst the 0 to 4 IME results, and optionally do (fractional bi-directional refinement) FBR on the best IME search results to refine the best search results. The best search (FBR if done, or IME) MVs and their distortions are returned. 2. Do undirectional or bidirectional skip (zero search) checks with 0 to 4 sets of skip check MVs for (16x16, 8x8) luma blocks using 0 to 4 (skip and intra check) SICs and return the distortions associated with the input sets of skip check MVs per specified luma block. 4x4 blocks are not supported by h/w for skip checks. 3. Do intra-prediction for (16x16, 8x8, 4x4) luma blocks and (8x8) chroma blocks using 3 SICs and returning the predictor modes and their associated distortions. Intra-prediction is done for all block sizes. Support for 8x8 chroma blocks cannot be enabled until NV image formats are supported in OCL. **************************************************************************************************/ __kernel __attribute__((reqd_work_group_size(16, 1, 1))) void block_advanced_motion_estimate_bidirectional_check_intel( sampler_t accelerator, __read_only image2d_t srcImg, __read_only image2d_t refImg, __read_only image2d_t src_check_image, __read_only image2d_t ref0_check_image, __read_only image2d_t ref1_check_image, uint flags, uint search_cost_penalty, uint search_cost_precision, short2 count_global, uchar bidir_weight, __global short2 *count_motion_vector_buffer, __global short2 *prediction_motion_vector_buffer, __global char *skip_input_mode_buffer, __global short2 *skip_motion_vector_buffer, __global short2 *search_motion_vector_buffer, __global char *intra_search_predictor_modes, __global ushort *search_residuals, __global ushort *skip_residuals, __global ushort *intra_residuals, __read_only image2d_t intraSrcImg, int height, int width, int stride) { __local uint dstSearch[64]; // 8 GRFs __local uint dstSkipIntra[32 + 24]; // 7 GRFs (4 for inter, 3 for intra) // distortion in the 6th GRF __local ushort *distSearch = (__local ushort *)&dstSearch[8 * 5]; // Initialize the MV cost table: // MV Cost in U4U4 format: // No cost : 0, 0, 0, 0, 0, 0, 0, 0 // Low Cost : 1, 4, 5, 9, 10, 12, 14, 15 // Normal Cost: 5, 26, 29, 43, 45, 47, 57, 57 // High Cost : 29, 61, 72, 78, 88, 89, 91, 92 uint2 MVCostTable; if (search_cost_penalty == 1) { MVCostTable.s0 = 0x09050401; MVCostTable.s1 = 0x0F0E0C0A; } else if (search_cost_penalty == 2) { MVCostTable.s0 = 0x2B1D1A05; MVCostTable.s1 = 0x39392F2D; } else if (search_cost_penalty == 3) { MVCostTable.s0 = 0x4E483D1D; MVCostTable.s1 = 0x5C5B5958; } else { MVCostTable.s0 = 0; MVCostTable.s1 = 0; } uint MVCostPrecision = ((uint)search_cost_precision) << 16; // Frame is divided into rows * columns of MBs. // One h/w thread per WG. // One WG processes "row" MBs - one row per iteration and one MB per row. // Number of WGs (or h/w threads) is number of columns MBs.Each iteration // processes the MB in a row - gid_0 is the MB id in a row and gid_1 is the // row offset. int sid_0 = stride * get_group_id(0); int gid_0 = sid_0 / height; int gid_1 = sid_0 % height; for (int sid = sid_0; sid < sid_0 + stride && gid_0 < width && gid_1 < height; sid++, gid_0 = sid / height, gid_1 = sid % height) { int2 srcCoord; srcCoord.x = gid_0 * 16 + get_global_offset(0); // 16 pixels wide MBs (globally scalar) srcCoord.y = gid_1 * 16 + get_global_offset(1); // 16 pixels tall MBs (globally scalar) uint curMB = gid_0 + gid_1 * width; // current MB id short2 count; // If either the search or skip vector counts are per-MB, then we need to // read in // the count motion vector buffer. if ((count_global.s0 == -1) | (count_global.s1 == -1)) { count = count_motion_vector_buffer[curMB]; } // If either the search or skip vector counts are per-frame, we need to use // those. if (count_global.s0 >= 0) { count.s0 = count_global.s0; } if (count_global.s1 >= 0) { count.s1 = count_global.s1; } int countPredMVs = count.x; if (countPredMVs != 0) { uint offset = curMB * 4; // 4 predictors per MB offset += get_local_id(0) % 4; // 16 work-items access 4 MVs for MB // one predictor for MB per SIMD channel // Reduce predictors from Q-pixel to integer precision. int2 predMV = 0; if (get_local_id(0) < countPredMVs) { // one MV per work-item predMV = convert_int2(prediction_motion_vector_buffer[offset]); // Predictors are input in QP resolution. Convert that to integer // resolution. predMV.x /= 4; predMV.y /= 4; predMV.y &= 0xFFFFFFFE; } // Do up to 4 IMEs, get the best MVs and their distortions, and optionally // a FBR of // the best MVs. Finally the results are written out to SLM. intel_work_group_vme_mb_multi_query_4( dstSearch, // best search MV and its distortions into SLM countPredMVs, // count of predictor MVs (globally scalar - value range // 1 to 4) MVCostPrecision, // MV cost precision MVCostTable, // MV cost table srcCoord, // MB 2-D offset (globally scalar) predMV, // predictor MVs (up to 4 distinct MVs for SIMD16 thread) srcImg, // source refImg, // reference accelerator); // vme object } int doIntra = ((flags & 0x2) != 0); int intraEdges = 0; if (doIntra) { // Enable all edges by default. intraEdges = 0x3C; // If this is a left-edge MB, then disable left edges. if ((gid_0 == 0) & (get_global_offset(0) == 0)) { intraEdges &= 0x14; } // If this is a right edge MB then disable right edges. if (gid_0 == width - 1) { intraEdges &= 0x38; } // If this is a top-edge MB, then disable top edges. if ((gid_1 == 0) & (get_global_offset(1) == 0)) { intraEdges &= 0x20; } // Set bit6=bit5. intraEdges |= ((intraEdges & 0x20) << 1); intraEdges <<= 8; } int skip_block_type_8x8 = flags & 0x4; int countSkipMVs = count.y; if (countSkipMVs != 0 || doIntra == true) { // one set of skip MV per SIMD channel // Do up to 4 skip checks and get the distortions for each of them. // Finally the results are written out to SLM. if ((skip_block_type_8x8 == 0) | ((doIntra) & (countSkipMVs == 0))) { // 16x16: uint offset = curMB * 4 * 2; // 4 sets of skip check MVs per MB int skipMV = 0; if (get_local_id(0) < countSkipMVs * 2) // need 2 values per MV { offset += (get_local_id(0)); // 16 work-items access 4 sets of MVs for MB __global int *skip1_motion_vector_buffer = (__global int *)skip_motion_vector_buffer; skipMV = skip1_motion_vector_buffer[offset]; // one MV per work-item } uchar skipMode = 0; if (get_local_id(0) < countSkipMVs) { skipMode = skip_input_mode_buffer[curMB]; if (skipMode == 0) { skipMode = 1; } if (skipMode > 3) { skipMode = 3; } } intel_work_group_vme_mb_multi_bidir_check_16x16( dstSkipIntra, // distortions into SLM countSkipMVs, // count of skip check MVs (globally scalar - value // range 1 to 4) doIntra, // compute intra modes intraEdges, // intra edges to use srcCoord, // MB 2-D offset (globally scalar) bidir_weight, // bidirectional weight skipMode, // skip modes skipMV, // skip check MVs (up to 4 distinct sets of skip check MVs // for SIMD16 thread) src_check_image, // source ref0_check_image, // reference fwd ref1_check_image, // reference bwd intraSrcImg, // intra source accelerator); // vme object } else { // 8x8: uint offset = curMB * 4 * 8; // 4 sets of skip check MVs, 16 shorts (8 ints) each per MB int2 skipMVs = 0; if (get_local_id(0) < countSkipMVs * 8) // need 8 values per MV { offset += (get_local_id(0)); // 16 work-items access 4 sets of MVs for MB __global int *skip1_motion_vector_buffer = (__global int *)(skip_motion_vector_buffer); skipMVs.x = skip1_motion_vector_buffer[offset]; // four component MVs // per work-item skipMVs.y = skip1_motion_vector_buffer[offset + 16]; } uchar skipModes = 0; if (get_local_id(0) < countSkipMVs) { skipModes = skip_input_mode_buffer[curMB]; } intel_work_group_vme_mb_multi_bidir_check_8x8( dstSkipIntra, // distortions into SLM countSkipMVs, // count of skip check MVs per MB (globally scalar - // value range 1 to 4) doIntra, // compute intra modes intraEdges, // intra edges to use srcCoord, // MB 2-D offset (globally scalar) bidir_weight, // bidirectional weight skipModes, // skip modes skipMVs, // skip check MVs (up to 4 distinct sets of skip check MVs // for SIMD16 thread) src_check_image, // source ref0_check_image, // reference fwd ref1_check_image, // reference bwd intraSrcImg, // intra source accelerator); // vme object } } barrier(CLK_LOCAL_MEM_FENCE); // Write Out motion estimation result: // Result format // Hierarchical row-major layout // i.e. row-major of blocks MVs in MBs, and row-major of 4 sets of // MVs/distortion in blocks if (countPredMVs != 0) { // 4x4 if (intel_get_accelerator_mb_block_type(accelerator) == 0x2) { int index = (gid_0 * 16 + get_local_id(0)) + (gid_1 * 16 * width); // 1. 16 work-items enabled. // 2. Work-items gather fwd MVs in strided dword locations 0, 2, .., 30 // (interleaved // fwd/bdw MVs) with constant offset 8 (control data size) from SLM // into contiguous // short2 locations 0, 1, .., 15 of global buffer // search_motion_vector_buffer with // offset index. // 3. Work-items gather contiguous ushort locations 0, 1, .., 15 from // distSearch into // contiguous ushort locations 0, 1, .., 15 of search_residuals with // offset index. short2 val = as_short2(dstSearch[8 + get_local_id(0) * 2]); search_motion_vector_buffer[index] = val; #ifndef HW_NULL_CHECK if (search_residuals != NULL) #endif { search_residuals[index] = distSearch[get_local_id(0)]; } } // 8x8 else if (intel_get_accelerator_mb_block_type(accelerator) == 0x1) { // Only 1st 4 work-item are needed. if (get_local_id(0) < 4) { int index = (gid_0 * 4 + get_local_id(0)) + (gid_1 * 4 * width); // 1. 4 work-items enabled. // 2. Work-items gather fw MVs in strided dword locations 0, 8, 16, 24 // (interleaved // fwd/bdw MVs) with constant offset 8 from SLM into contiguous // short2 locations // 0, 1, .., 15 of global buffer search_motion_vector_buffer with // offset index. // 3. Work-items gather strided ushort locations 0, 4, 8, 12 from // distSearch into // contiguous ushort locations 0, 1, .., 15 of search_residuals // with offset index. short2 val = as_short2(dstSearch[8 + get_local_id(0) * 4 * 2]); search_motion_vector_buffer[index] = val; #ifndef HW_NULL_CHECK if (search_residuals != NULL) #endif { search_residuals[index] = distSearch[get_local_id(0) * 4]; } } } // 16x16 else if (intel_get_accelerator_mb_block_type(accelerator) == 0x0) { // One 1st work is needed. if (get_local_id(0) == 0) { int index = gid_0 + gid_1 * width; // 1. 1 work-item enabled. // 2. Work-item gathers fwd MV in dword location 0 with constant // offset 8 from // SLM into short2 locations 0 of global buffer // search_motion_vector_buffer. // 3. Work-item gathers ushort location 0 from distSearch into ushort // location 0 of search_residuals with offset index. short2 val = as_short2(dstSearch[8]); search_motion_vector_buffer[index] = val; #ifndef HW_NULL_CHECK if (search_residuals != NULL) #endif { search_residuals[index] = distSearch[0]; } } } } // Write out motion skip check result: // Result format // Hierarchical row-major layout // i.e. row-major of blocks in MBs, and row-major of 8 sets of // distortions in blocks if (countSkipMVs != 0) { if (skip_block_type_8x8 == false) { // Copy out 4 (1 component) sets of distortion values. int index = (gid_0 * 4) + (get_local_id(0)) + (gid_1 * 4 * width); if (get_local_id(0) < countSkipMVs) { // 1. Up to 4 work-items are enabled. // 2. The work-item gathers distSkip locations 0, 16*1, .., 16*7 and // copies them to contiguous skip_residual locations 0, 1, 2, .., // 7. __local ushort *distSkip = (__local ushort *)&dstSkipIntra[0]; skip_residuals[index] = distSkip[get_local_id(0) * 16]; } } else { // Copy out 4 (4 component) sets of distortion values. int index = (gid_0 * 4 * 4) + (get_local_id(0)) + (gid_1 * 4 * 4 * width); if (get_local_id(0) < countSkipMVs * 4) { // 1. Up to 16 work-items are enabled. // 2. The work-item gathers distSkip locations 0, 4*1, .., 4*15 and // copies them to contiguous skip_residual locations 0, 1, 2, .., // 15. __local ushort *distSkip = (__local ushort *)&dstSkipIntra[0]; skip_residuals[index] = distSkip[get_local_id(0) * 4]; } } } // Write out intra search result: if (doIntra) { // Write out the 4x4 intra modes if (get_local_id(0) < 8) { __local char *dstIntra_4x4 = (__local char *)(&dstSkipIntra[32 + 16 + 4]); char value = dstIntra_4x4[get_local_id(0)]; char value_low = (value)&0xf; char value_high = (value >> 4) & 0xf; int index_low = (gid_0 * 22) + (get_local_id(0) * 2) + (gid_1 * 22 * width); int index_high = (gid_0 * 22) + (get_local_id(0) * 2) + 1 + (gid_1 * 22 * width); intra_search_predictor_modes[index_low + 5] = value_low; intra_search_predictor_modes[index_high + 5] = value_high; } // Write out the 8x8 intra modes if (get_local_id(0) < 4) { __local char *dstIntra_8x8 = (__local char *)(&dstSkipIntra[32 + 8 + 4]); char value = dstIntra_8x8[get_local_id(0) * 2]; char value_low = (value)&0xf; int index = (gid_0 * 22) + (get_local_id(0)) + (gid_1 * 22 * width); intra_search_predictor_modes[index + 1] = value_low; } // Write out the 16x16 intra modes if (get_local_id(0) < 1) { __local char *dstIntra_16x16 = (__local char *)(&dstSkipIntra[32 + 0 + 4]); char value = dstIntra_16x16[0]; char value_low = (value)&0xf; int index = (gid_0 * 22) + (gid_1 * 22 * width); intra_search_predictor_modes[index] = value_low; } // Get the intra residuals. #ifndef HW_NULL_CHECK if (intra_residuals != NULL) #endif { int index = (gid_0 * 4) + (gid_1 * 4 * width); if (get_local_id(0) < 1) { __local ushort *distIntra_4x4 = (__local ushort *)(&dstSkipIntra[32 + 16 + 3]); __local ushort *distIntra_8x8 = (__local ushort *)(&dstSkipIntra[32 + 8 + 3]); __local ushort *distIntra_16x16 = (__local ushort *)(&dstSkipIntra[32 + 0 + 3]); intra_residuals[index + 2] = distIntra_4x4[0]; intra_residuals[index + 1] = distIntra_8x8[0]; intra_residuals[index + 0] = distIntra_16x16[0]; } } } } } // VEBOX KERNELS: __kernel void ve_enhance_intel(sampler_t accelerator, int flags, __read_only image2d_t current_input, __write_only image2d_t current_output) { } __kernel void ve_dn_enhance_intel(sampler_t accelerator, int flags, __read_only image2d_t ref_input, __read_only image2d_t current_input, __write_only image2d_t current_output) { } __kernel void ve_dn_di_enhance_intel(sampler_t accelerator, int flags, __read_only image2d_t current_input, __read_only image2d_t ref_input, __write_only image2d_t current_output, __write_only image2d_t ref_output, __write_only image2d_t dndi_output) { } compute-runtime-20.13.16352/opencl/test/unit_test/test_files/media_kernels_backend_options.txt000066400000000000000000000024701363734646600325630ustar00rootroot00000000000000/* * Copyright (c) 2017, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ -D cl_intel_device_side_advanced_vme_enable -D cl_intel_device_side_avc_vme_enable -D cl_intel_device_side_vme_enable -D cl_intel_media_block_io -cl-unsafe-math-optimizations -cl-mad-enable -cl-fast-relaxed-math compute-runtime-20.13.16352/opencl/test/unit_test/test_files/media_kernels_frontend.cl000066400000000000000000000057441363734646600310260ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ // VME KERNELS __kernel __attribute__((reqd_work_group_size(16, 1, 1))) void block_motion_estimate_intel(sampler_t accelerator, __read_only image2d_t srcImg, __read_only image2d_t refImg, __global short2 *prediction_motion_vector_buffer, __global short2 *motion_vector_buffer, __global ushort *residuals) { } __kernel __attribute__((reqd_work_group_size(16, 1, 1))) void block_advanced_motion_estimate_check_intel( sampler_t accelerator, __read_only image2d_t srcImg, __read_only image2d_t refImg, uint flags, uint skip_block_type, uint search_cost_penalty, uint search_cost_precision, __global short2 *count_motion_vector_buffer, __global short2 *predictors_buffer, __global short2 *skip_motion_vector_buffer, __global short2 *motion_vector_buffer, __global char *intra_search_predictor_modes, __global ushort *residuals, __global ushort *skip_residuals, __global ushort *intra_residuals) { } __kernel __attribute__((reqd_work_group_size(16, 1, 1))) void block_advanced_motion_estimate_bidirectional_check_intel( sampler_t accelerator, __read_only image2d_t srcImg, __read_only image2d_t refImg, __read_only image2d_t src_check_image, __read_only image2d_t ref0_check_image, __read_only image2d_t ref1_check_image, uint flags, uint search_cost_penalty, uint search_cost_precision, short2 count_global, uchar bidir_weight, __global short2 *count_motion_vector_buffer, __global short2 *prediction_motion_vector_buffer, __global char *skip_input_mode_buffer, __global short2 *skip_motion_vector_buffer, __global short2 *search_motion_vector_buffer, __global char *intra_search_predictor_modes, __global ushort *search_residuals, __global ushort *skip_residuals, __global ushort *intra_residuals) { } // VEBOX KERNELS: __kernel void ve_enhance_intel(sampler_t accelerator, int flags, __read_only image2d_t current_input, __write_only image2d_t current_output) { } __kernel void ve_dn_enhance_intel(sampler_t accelerator, int flags, __read_only image2d_t ref_input, __read_only image2d_t current_input, __write_only image2d_t current_output) { } __kernel void ve_dn_di_enhance_intel(sampler_t accelerator, int flags, __read_only image2d_t current_input, __read_only image2d_t ref_input, __write_only image2d_t current_output, __write_only image2d_t ref_output, __write_only image2d_t dndi_output) { } compute-runtime-20.13.16352/opencl/test/unit_test/test_files/media_kernels_frontend_options.txt000066400000000000000000000024701363734646600330130ustar00rootroot00000000000000/* * Copyright (c) 2017, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ -D cl_intel_device_side_advanced_vme_enable -D cl_intel_device_side_avc_vme_enable -D cl_intel_device_side_vme_enable -D cl_intel_media_block_io -cl-unsafe-math-optimizations -cl-mad-enable -cl-fast-relaxed-math compute-runtime-20.13.16352/opencl/test/unit_test/test_files/patch_list.h000066400000000000000000000173061363734646600263050ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ // clang-format off #pragma once #pragma pack( push, 1 ) #include const uint32_t MAGIC_CL = 0x494E5443; struct SProgramBinaryHeader { uint32_t Magic; uint32_t Version; uint32_t Device; uint32_t GPUPointerSizeInBytes; uint32_t NumberOfKernels; uint32_t SteppingId; uint32_t PatchListSize; }; static_assert( sizeof( SProgramBinaryHeader ) == 28 , "The size of SProgramBinaryHeader is not what is expected" ); struct SKernelBinaryHeader { uint32_t CheckSum; uint64_t ShaderHashCode; uint32_t KernelNameSize; uint32_t PatchListSize; }; static_assert( sizeof( SKernelBinaryHeader ) == 20 , "The size of SKernelBinaryHeader is not what is expected" ); struct SKernelBinaryHeaderCommon : SKernelBinaryHeader { uint32_t KernelHeapSize; uint32_t GeneralStateHeapSize; uint32_t DynamicStateHeapSize; uint32_t SurfaceStateHeapSize; uint32_t KernelUnpaddedSize; }; static_assert( sizeof( SKernelBinaryHeaderCommon ) == ( 20 + sizeof( SKernelBinaryHeader ) ) , "The size of SKernelBinaryHeaderCommon is not what is expected" ); enum PATCH_TOKEN { PATCH_TOKEN_UNKNOWN, // 0 - (Unused) PATCH_TOKEN_MEDIA_STATE_POINTERS, // 1 - (Unused) PATCH_TOKEN_STATE_SIP, // 2 @SPatchStateSIP@ PATCH_TOKEN_CS_URB_STATE, // 3 - (Unused) PATCH_TOKEN_CONSTANT_BUFFER, // 4 - (Unused) PATCH_TOKEN_SAMPLER_STATE_ARRAY, // 5 @SPatchSamplerStateArray@ PATCH_TOKEN_INTERFACE_DESCRIPTOR, // 6 - (Unused) PATCH_TOKEN_VFE_STATE, // 7 - (Unused) PATCH_TOKEN_BINDING_TABLE_STATE, // 8 @SPatchBindingTableState@ PATCH_TOKEN_ALLOCATE_SCRATCH_SURFACE, // 9 - (Unused) PATCH_TOKEN_ALLOCATE_SIP_SURFACE, // 10 @SPatchAllocateSystemThreadSurface@ PATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT, // 11 @SPatchGlobalMemoryObjectKernelArgument@ - OpenCL PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT, // 12 @SPatchImageMemoryObjectKernelArgument@ - OpenCL PATCH_TOKEN_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT, // 13 - (Unused) - OpenCL PATCH_TOKEN_ALLOCATE_SURFACE_WITH_INITIALIZATION, // 14 - (Unused) PATCH_TOKEN_ALLOCATE_LOCAL_SURFACE, // 15 @SPatchAllocateLocalSurface@ PATCH_TOKEN_SAMPLER_KERNEL_ARGUMENT, // 16 @SPatchSamplerKernelArgument@ - OpenCL PATCH_TOKEN_DATA_PARAMETER_BUFFER, // 17 @SPatchDataParameterBuffer@ - OpenCL PATCH_TOKEN_MEDIA_VFE_STATE, // 18 @SPatchMediaVFEState@ PATCH_TOKEN_MEDIA_INTERFACE_DESCRIPTOR_LOAD, // 19 @SPatchMediaInterfaceDescriptorLoad@ PATCH_TOKEN_MEDIA_CURBE_LOAD, // 20 - (Unused) PATCH_TOKEN_INTERFACE_DESCRIPTOR_DATA, // 21 @SPatchInterfaceDescriptorData@ PATCH_TOKEN_THREAD_PAYLOAD, // 22 @SPatchThreadPayload@ PATCH_TOKEN_EXECUTION_ENVIRONMENT, // 23 @SPatchExecutionEnvironment@ PATCH_TOKEN_ALLOCATE_PRIVATE_MEMORY, // 24 - (Unused) PATCH_TOKEN_DATA_PARAMETER_STREAM, // 25 @SPatchDataParameterStream PATCH_TOKEN_KERNEL_ARGUMENT_INFO, // 26 @SPatchKernelArgumentInfo@ - OpenCL PATCH_TOKEN_KERNEL_ATTRIBUTES_INFO, // 27 @SPatchKernelAttributesInfo@ - OpenCL PATCH_TOKEN_STRING, // 28 @SPatchString@ - OpenCL PATCH_TOKEN_ALLOCATE_PRINTF_SURFACE, // 29 - (Unused) - OpenCL PATCH_TOKEN_STATELESS_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT, // 30 @SPatchStatelessGlobalMemoryObjectKernelArgument@ - OpenCL PATCH_TOKEN_STATELESS_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT,//31 @SPatchStatelessConstantMemoryObjectKernelArgument@ - OpenCL PATCH_TOKEN_ALLOCATE_STATELESS_SURFACE_WITH_INITIALIZATION, // 32 - (Unused) PATCH_TOKEN_ALLOCATE_STATELESS_PRINTF_SURFACE, // 33 @SPatchAllocateStatelessPrintfSurface@ PATCH_TOKEN_CB_MAPPING, // 34 - (Unused) PATCH_TOKEN_CB2CR_GATHER_TABLE, // 35 - (Unused) PATCH_TOKEN_ALLOCATE_STATELESS_EVENT_POOL_SURFACE, // 36 @SPatchAllocateStatelessEventPoolSurface@ PATCH_TOKEN_NULL_SURFACE_LOCATION, // 37 - (Unused) PATCH_TOKEN_ALLOCATE_STATELESS_PRIVATE_MEMORY, // 38 @SPatchAllocateStatelessPrivateSurface@ PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_WITH_INITIALIZATION, // 39 - (Unused) PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_WITH_INITIALIZATION, // 40 - (Unused) PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_PROGRAM_BINARY_INFO, // 41 @SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo@ PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO, // 42 @SPatchAllocateConstantMemorySurfaceProgramBinaryInfo@ PATCH_TOKEN_ALLOCATE_STATELESS_GLOBAL_MEMORY_SURFACE_WITH_INITIALIZATION, // 43 @SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization@ PATCH_TOKEN_ALLOCATE_STATELESS_CONSTANT_MEMORY_SURFACE_WITH_INITIALIZATION, // 44 @SPatchAllocateStatelessConstantMemorySurfaceWithInitialization@ PATCH_TOKEN_ALLOCATE_STATELESS_DEFAULT_DEVICE_QUEUE_SURFACE, // 45 @SPatchAllocateStatelessDefaultDeviceQueueSurface@ PATCH_TOKEN_STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT, // 46 @SPatchStatelessDeviceQueueKernelArgument@ PATCH_TOKEN_GLOBAL_POINTER_PROGRAM_BINARY_INFO, // 47 @SPatchGlobalPointerProgramBinaryInfo@ PATCH_TOKEN_CONSTANT_POINTER_PROGRAM_BINARY_INFO, // 48 @SPatchConstantPointerProgramBinaryInfo@ PATCH_TOKEN_CONSTRUCTOR_DESTRUCTOR_KERNEL_PROGRAM_BINARY_INFO, // 49 - (Unused) PATCH_TOKEN_INLINE_VME_SAMPLER_INFO, // 50 - (Unused) PATCH_TOKEN_GTPIN_FREE_GRF_INFO, // 51 @SPatchGtpinFreeGRFInfo@ PATCH_TOKEN_GTPIN_INFO, NUM_PATCH_TOKENS }; struct SPatchItemHeader { uint32_t Token; uint32_t Size; }; struct SPatchDataParameterBuffer : SPatchItemHeader { uint32_t Type; uint32_t ArgumentNumber; uint32_t Offset; uint32_t DataSize; uint32_t SourceOffset; uint32_t LocationIndex; uint32_t LocationIndex2; uint32_t IsEmulationArgument; }; struct SPatchMediaInterfaceDescriptorLoad : SPatchItemHeader { uint32_t InterfaceDescriptorDataOffset; }; static_assert( sizeof( SPatchMediaInterfaceDescriptorLoad ) == ( 4 + sizeof( SPatchItemHeader ) ) , "The size of SPatchMediaInterfaceDescriptorLoad is not what is expected" ); struct SPatchStateSIP : SPatchItemHeader { uint32_t SystemKernelOffset; }; struct SPatchSamplerStateArray : SPatchItemHeader { uint32_t Offset; uint32_t Count; uint32_t BorderColorOffset; }; struct SPatchAllocateConstantMemorySurfaceProgramBinaryInfo : SPatchItemHeader { uint32_t ConstantBufferIndex; uint32_t InlineDataSize; }; static_assert( sizeof( SPatchAllocateConstantMemorySurfaceProgramBinaryInfo ) == ( 8 + sizeof( SPatchItemHeader ) ) , "The size of SPatchAllocateConstantMemorySurfaceProgramBinaryInfo is not what is expected" ); #pragma pack( pop ) // clang-format oncompute-runtime-20.13.16352/opencl/test/unit_test/test_files/printf.cl000066400000000000000000000002141363734646600256120ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void test() { printf("OpenCL\n"); } compute-runtime-20.13.16352/opencl/test/unit_test/test_files/required_work_group.cl000066400000000000000000000007551363734646600304200ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel __attribute__((reqd_work_group_size(8, 4, 4))) void CopyBuffer( __global unsigned int *src, __global unsigned int *dst) { int id = (int)get_global_id(0); dst[id] = src[id]; } __kernel __attribute__((reqd_work_group_size(1, 1, 1))) void CopyBuffer2( __global unsigned int *src, __global unsigned int *dst) { int id = (int)get_global_id(0); dst[id] = src[id]; } compute-runtime-20.13.16352/opencl/test/unit_test/test_files/shouldfail.cl000066400000000000000000000003501363734646600264430ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void shouldfail(global ushort *dst) { // idx and dummy are not defined, compiler should fail the build. dst[idx] = dummy; } compute-runtime-20.13.16352/opencl/test/unit_test/test_files/shouldfail_internal_options.txt000066400000000000000000000021751363734646600323420ustar00rootroot00000000000000/* * Copyright (c) 2018, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ -shouldfailInternalOptionscompute-runtime-20.13.16352/opencl/test/unit_test/test_files/shouldfail_options.txt000066400000000000000000000021651363734646600304450ustar00rootroot00000000000000/* * Copyright (c) 2018, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ -shouldfailOptionscompute-runtime-20.13.16352/opencl/test/unit_test/test_files/simple_arg_int.cl000066400000000000000000000003121363734646600273030ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void SimpleArg(int src, __global int *dst) { int id = (int)get_global_id(0); dst[id] = src; } compute-runtime-20.13.16352/opencl/test/unit_test/test_files/simple_block_kernel.cl000066400000000000000000000041401363734646600303150ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ void block_fn(size_t tid, int mul, __global int* res) { res[tid] = mul * 7 - 21; } kernel void simple_block_kernel(__global int* res) { int multiplier = 3; size_t tid = get_global_id(0); void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); }; res[tid] = -1; queue_t def_q = get_default_queue(); ndrange_t ndrange = ndrange_1D(1); int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock); if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; } } void block_reflection(ulong scalar, __global uint* buffer, read_only image3d_t img, sampler_t sampler) { float4 color; int4 coord; buffer[0] = scalar; coord.x = scalar; coord.y = 0; coord.z = 0; int width = get_image_width( img ); int heigth = get_image_height( img ); int depth = get_image_depth( img ); int order = get_image_channel_order( img ); int type = get_image_channel_data_type( img ); color = read_imagef( img, sampler, coord ); buffer[1] = (uint)color.x; buffer[2] = (uint)width; buffer[3] = (uint)heigth; buffer[4] = (uint)depth; buffer[5] = (uint)order; buffer[6] = (uint)type; sampler_t samplerA = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; color = read_imagef( img, samplerA, coord ); buffer[7] = (uint)color.y; queue_t def_q = get_default_queue(); ndrange_t ndrange = ndrange_1D(1); if( scalar > 2 ){ scalar--; int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, ^{block_reflection(scalar, buffer, img, sampler);}); } } kernel void kernel_reflection(sampler_t sampler, read_only image3d_t img, __global uint* buffer, ulong scalar) { size_t tid = get_global_id(0); void (^kernelBlock)(void) = ^{ block_reflection(scalar, buffer, img, sampler); }; queue_t def_q = get_default_queue(); ndrange_t ndrange = ndrange_1D(1); if( scalar > 0 ){ int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock); if(enq_res != CLK_SUCCESS) { buffer[2] = 1; } } } compute-runtime-20.13.16352/opencl/test/unit_test/test_files/simple_header.h000066400000000000000000000002741363734646600267500ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern __kernel void AddBuffer( __global float *src, __global float *dst); compute-runtime-20.13.16352/opencl/test/unit_test/test_files/simple_kernels.cl000066400000000000000000000065351363734646600273400ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable __kernel void simple_kernel_0( const uint arg0, const float arg1, __global uint *dst) { uint idx = get_global_id(0); uint data = arg0 + (uint)arg1; dst[idx] = data; } __kernel void simple_kernel_1( __global const uint *src, const uint arg1, __global uint *dst) { uint idx = get_global_id(0); dst[idx] = src[idx] + arg1; } __kernel void simple_kernel_2( const uint arg0, __global uint *dst) { uint idx = get_global_id(0); dst[idx] = arg0; } __kernel void simple_kernel_3( __global uint *dst) { dst[get_global_id(0)] = 0; } __kernel void simple_kernel_4() { } __kernel void simple_kernel_5(__global uint *dst) { //first uint holds the total work item count atomic_inc(dst); uint groupIdX = get_group_id(0); uint groupIdY = get_group_id(1); uint groupIdZ = get_group_id(2); uint groupCountX = get_num_groups(0); uint groupCountY = get_num_groups(1); uint groupCountZ = get_num_groups(2); __global uint *groupCounters = dst + 1; //store current group position in 3D array uint destination = groupIdZ * groupCountY * groupCountX + groupIdY * groupCountX + groupIdX; atomic_inc(&groupCounters[destination]); } #define SIMPLE_KERNEL_6_ARRAY_SIZE 32 __kernel void simple_kernel_6(__global uint *dst, __constant uint2 *src, uint scalar, uint maxIterations, uint maxIterations2) { __private uint2 array[SIMPLE_KERNEL_6_ARRAY_SIZE]; __private uint2 sum; __private size_t gid = get_global_id(0); __private size_t lid = get_local_id(0); __private uint multi = 1; if (lid == 1024) { multi = 4; } sum = (uint2)(0, 0); for (int i = 0; i < maxIterations; ++i) { array[i] = src[i] + (uint2)(i * multi, i * multi + scalar); } for (int i = 0; i < maxIterations2; ++i) { sum.x = array[i].x + sum.x; sum.y = array[i].y + sum.y; } vstore2(sum, gid, dst); } typedef long16 TYPE; __attribute__((reqd_work_group_size(32, 1, 1))) // force LWS to 32 __attribute__((intel_reqd_sub_group_size(16))) // force SIMD to 16 __kernel void simple_kernel_7(__global int *resIdx, global TYPE *src, global TYPE *dst) { size_t lid = get_local_id(0); size_t gid = get_global_id(0); TYPE res1 = src[gid * 3]; TYPE res2 = src[gid * 3 + 1]; TYPE res3 = src[gid * 3 + 2]; __local TYPE locMem[32]; locMem[lid] = res1; barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE); TYPE res = (locMem[resIdx[gid]] * res3) * res2 + res1; dst[gid] = res; } __kernel void simple_kernel_8(__global uint *dst, uint incrementationsCount) { uint groupIdX = get_group_id(0); uint groupIdY = get_group_id(1); uint groupIdZ = get_group_id(2); uint groupCountX = get_num_groups(0); uint groupCountY = get_num_groups(1); uint groupCountZ = get_num_groups(2); uint destination = groupIdZ * groupCountY * groupCountX + groupIdY * groupCountX + groupIdX; for (uint i = 0; i < incrementationsCount; i++) { dst[destination]++; } } __kernel void simple_kernel_9(__global uint *dst) { uint offset = get_max_sub_group_size() * get_sub_group_id(); dst[get_sub_group_local_id() + offset] = get_local_id(0); } compute-runtime-20.13.16352/opencl/test/unit_test/test_files/simple_kernels_opts.cl000066400000000000000000000004401363734646600303720ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void SimpleArg(int src, __global int *dst) { int id = (int)get_global_id(0); #ifdef DEF_WAS_SPECIFIED int val = 1; #else // fail to compile #endif dst[id] = src + val; } compute-runtime-20.13.16352/opencl/test/unit_test/test_files/simple_kernels_opts_options.txt000066400000000000000000000022661363734646600323760ustar00rootroot00000000000000/* * Copyright (c) 2017, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ -cl-opt-disable -DDEF_WAS_SPECIFIED=1 compute-runtime-20.13.16352/opencl/test/unit_test/test_files/simple_nonuniform.cl000066400000000000000000000010251363734646600300540ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void simpleNonUniform(int atomicOffset, __global volatile int *dst) { int id = (int)(get_global_id(2) * (get_global_size(1) * get_global_size(0)) + get_global_id(1) * get_global_size(0) + get_global_id(0)); dst[id] = id; __global volatile atomic_int *atomic_dst = ( __global volatile atomic_int * )dst; atomic_fetch_add_explicit( &atomic_dst[atomicOffset], 1 , memory_order_relaxed, memory_scope_all_svm_devices ); }compute-runtime-20.13.16352/opencl/test/unit_test/test_files/stateful_copy_buffer.cl000066400000000000000000000003521363734646600305250ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void StatefulCopyBuffer( const __global uchar* src, __global uchar* dst) { uint id = get_global_id(0); dst[id] = src[id]; }compute-runtime-20.13.16352/opencl/test/unit_test/test_files/stateful_copy_buffer_ocloc_options.txt000066400000000000000000000001461363734646600337010ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ -force_stos_opt compute-runtime-20.13.16352/opencl/test/unit_test/test_files/stateless_kernel.cl000066400000000000000000000003071363734646600276620ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void statelessKernel(__global uchar* src) { uint tid = get_global_id(0); src[tid] = 0xCD; }compute-runtime-20.13.16352/opencl/test/unit_test/test_files/test_basic_constant.cl000066400000000000000000000004651363734646600303510ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void constant_kernel(__global float *out, __constant float *tmpF, __constant int *tmpI) { int tid = get_global_id(0); float ftmp = tmpF[tid]; float Itmp = tmpI[tid]; out[tid] = ftmp * Itmp; } test_basic_kernel_memory_alignment_private.cl000066400000000000000000000010101363734646600350640ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/test_files/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ __kernel void test(__global ulong *results) { __private char mem0[3]; __private char2 mem2[3]; __private char3 mem3[3]; __private char4 mem4[3]; __private char8 mem8[3]; __private char16 mem16[3]; results[0] = (ulong)&mem0[0]; results[1] = (ulong)&mem2[0]; results[2] = (ulong)&mem3[0]; results[3] = (ulong)&mem4[0]; results[4] = (ulong)&mem8[0]; results[5] = (ulong)&mem16[0]; } compute-runtime-20.13.16352/opencl/test/unit_test/test_files/test_constant_memory.cl000066400000000000000000000005361363734646600305770ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ __constant uint constant_a[2] = {0xabcd5432u, 0xaabb5533u}; __kernel void test(__global uint *in, __global uint *out) { int i = get_global_id(0); int j = get_global_id(0) % (sizeof(constant_a) / sizeof(constant_a[0])); out[i] = constant_a[j]; } compute-runtime-20.13.16352/opencl/test/unit_test/test_files/vme_kernels.cl000066400000000000000000000115401363734646600266260ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ /*************************************************************************************************\ Device-side basic vme kernel: device_side_block_motion_estimate_intel Description: Computes motion vectors by comparing a 2d image source with a 2d reference image, producing a vector field motion vectors. The algorithm searches the best match of each macroblock pixel block in the source image by searching an image region in the reference image, centered on the coordinates of that pixel macroblock in the source image (optionally offset by the prediction motion vectors). This kernel optionally takes a vector field of motion vector predictors via the prediction_motion_vector_image kernel argument. The kernel also optionally returns a vector field of per-macroblock pixel-block information records. Each record contains the best-match distortion (SAD) value and additional search result information. This kernel needs to be compiled with following compiler option: " -D cl_intel_device_side_avc_vme_enable " \*************************************************************************************************/ __kernel __attribute__((reqd_work_group_size(16, 1, 1))) void device_side_block_motion_estimate_intel(__read_only image2d_t srcImg, __read_only image2d_t refImg, __global short2 *prediction_motion_vector_buffer, __global short2 *motion_vector_buffer, __global ushort *residuals_buffer, __global uchar2 *shapes_buffer, int iterations, int partitionMask) { int gid_0 = get_group_id(0); int gid_1 = 0; sampler_t vme_samp = 0; for (int i = 0; i < iterations; i++, gid_1++) { ushort2 srcCoord = 0; short2 refCoord = 0; short2 predMV = 0; srcCoord.x = gid_0 * 16 + get_global_offset(0); srcCoord.y = gid_1 * 16 + get_global_offset(1); if (prediction_motion_vector_buffer != NULL) { predMV = prediction_motion_vector_buffer[gid_0 + gid_1 * get_num_groups(0)]; refCoord.x = predMV.x / 4; refCoord.y = predMV.y / 4; refCoord.y = refCoord.y & 0xFFFE; } uchar partition_mask = (uchar)partitionMask; uchar sad_adjustment = CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL; uchar pixel_mode = CLK_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL; intel_sub_group_avc_ime_payload_t payload = intel_sub_group_avc_ime_initialize(srcCoord, partition_mask, sad_adjustment); payload = intel_sub_group_avc_ime_set_single_reference(refCoord, CLK_AVC_ME_SEARCH_WINDOW_16x12_RADIUS_INTEL, payload); intel_sub_group_avc_ime_result_t result = intel_sub_group_avc_ime_evaluate_with_single_reference(srcImg, refImg, vme_samp, payload); // Process Results long mvs = intel_sub_group_avc_ime_get_motion_vectors(result); ushort sads = intel_sub_group_avc_ime_get_inter_distortions(result); uchar major_shape = intel_sub_group_avc_ime_get_inter_major_shape(result); uchar minor_shapes = intel_sub_group_avc_ime_get_inter_minor_shapes(result); uchar2 shapes = {major_shape, minor_shapes}; uchar directions = intel_sub_group_avc_ime_get_inter_directions(result); // Perform FME for non-Integer Pixel mode if (pixel_mode != CLK_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL) { intel_sub_group_avc_ref_payload_t payload = intel_sub_group_avc_fme_initialize(srcCoord, mvs, major_shape, minor_shapes, directions, pixel_mode, sad_adjustment); intel_sub_group_avc_ref_result_t result = intel_sub_group_avc_ref_evaluate_with_single_reference(srcImg, refImg, vme_samp, payload); mvs = intel_sub_group_avc_ref_get_motion_vectors(result); sads = intel_sub_group_avc_ref_get_inter_distortions(result); } // Write Out Result if ((get_local_id(0) % 4) == 0) { int x = get_local_id(0) % 4; int y = get_local_id(0) / 4; int width = get_image_width(srcImg); int index = (gid_0 * 4 + x) + (gid_1 * width / 4 + y); int2 bi_mvs = as_int2(mvs); motion_vector_buffer[index] = as_short2(bi_mvs.s0); if (residuals_buffer != NULL) { residuals_buffer[index] = sads; } shapes_buffer[gid_0 + gid_1 * get_num_groups(0)] = shapes; } } } __kernel void non_vme_kernel(__global unsigned int *src, __global unsigned int *dst) { int id = (int)get_global_id(0); dst[id] = lgamma((float)src[id]); dst[id] = src[id]; } compute-runtime-20.13.16352/opencl/test/unit_test/test_files/vme_kernels_options.txt000066400000000000000000000022301363734646600306160ustar00rootroot00000000000000/* * Copyright (c) 2017, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ -D cl_intel_device_side_vme_enable -D HW_NULL_CHECK compute-runtime-20.13.16352/opencl/test/unit_test/test_mode.h000066400000000000000000000003761363734646600237740ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/unit_test/tests_configuration.h" namespace NEO { constexpr TestMode defaultTestMode = TestMode::UnitTests; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/ult_config_listener.cpp000066400000000000000000000027741363734646600264060ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/ult_config_listener.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/test/unit_test/helpers/default_hw_info.h" #include "shared/test/unit_test/helpers/ult_hw_config.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/mocks/mock_platform.h" void NEO::UltConfigListener::OnTestStart(const ::testing::TestInfo &testInfo) { referencedHwInfo = *defaultHwInfo; auto executionEnvironment = constructPlatform()->peekExecutionEnvironment(); executionEnvironment->prepareRootDeviceEnvironments(1); executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(defaultHwInfo.get()); executionEnvironment->calculateMaxOsContextCount(); executionEnvironment->rootDeviceEnvironments[0]->initGmm(); } void NEO::UltConfigListener::OnTestEnd(const ::testing::TestInfo &testInfo) { // Clear global platform that it shouldn't be reused between tests platformsImpl.clear(); MemoryManager::maxOsContextCount = 0u; // Ensure that global state is restored UltHwConfig expectedState{}; static_assert(sizeof(UltHwConfig) == 7 * sizeof(bool), ""); // Ensure that there is no internal padding EXPECT_EQ(0, memcmp(&expectedState, &ultHwConfig, sizeof(UltHwConfig))); EXPECT_EQ(0, memcmp(&referencedHwInfo, defaultHwInfo.get(), sizeof(HardwareInfo))); } compute-runtime-20.13.16352/opencl/test/unit_test/ult_config_listener.h000066400000000000000000000006751363734646600260510ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/hw_info.h" #include "gtest/gtest.h" namespace NEO { class UltConfigListener : public ::testing::EmptyTestEventListener { private: void OnTestStart(const ::testing::TestInfo &) override; void OnTestEnd(const ::testing::TestInfo &) override; HardwareInfo referencedHwInfo; }; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/ult_configuration.cpp000066400000000000000000000004451363734646600260740ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "test_mode.h" namespace NEO { unsigned int ultIterationMaxTime = 45; bool useMockGmm = true; const char *executionDirectorySuffix = ""; TestMode testMode = defaultTestMode; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/user_settings_32.user000066400000000000000000000045241363734646600257410ustar00rootroot00000000000000 --gtest_filter=* --gtest_catch_exceptions=0 --enable_default_listener --disable_pagefaulting_tests $(TargetDir) WindowsLocalDebugger --gtest_filter=* --gtest_catch_exceptions=0 --enable_default_listener $(TargetDir) WindowsLocalDebugger --gtest_filter=* --gtest_catch_exceptions=0 --enable_default_listener $(TargetDir) WindowsLocalDebugger compute-runtime-20.13.16352/opencl/test/unit_test/user_settings_64.user000066400000000000000000000045161363734646600257470ustar00rootroot00000000000000 --gtest_filter=* --gtest_catch_exceptions=0 --enable_default_listener --disable_pagefaulting_tests $(TargetDir) WindowsLocalDebugger --gtest_filter=* --gtest_catch_exceptions=0 --enable_default_listener $(TargetDir) WindowsLocalDebugger --gtest_filter=* --gtest_catch_exceptions=0 --enable_default_listener $(TargetDir) WindowsLocalDebugger compute-runtime-20.13.16352/opencl/test/unit_test/utilities/000077500000000000000000000000001363734646600236455ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/utilities/.clang-tidy000066400000000000000000000027311363734646600257040ustar00rootroot00000000000000--- Checks: 'clang-diagnostic-*,clang-analyzer-*,google-default-arguments,modernize-use-override,modernize-use-default-member-init,-clang-analyzer-alpha*,readability-identifier-naming,-clang-analyzer-core.UndefinedBinaryOperatorResult' # WarningsAsErrors: '.*' HeaderFilterRegex: '/runtime/|/core/|/offline_compiler/' AnalyzeTemporaryDtors: false CheckOptions: - key: google-readability-braces-around-statements.ShortStatementLines value: '1' - key: google-readability-function-size.StatementThreshold value: '800' - key: google-readability-namespace-comments.ShortNamespaceLines value: '10' - key: google-readability-namespace-comments.SpacesBeforeComments value: '2' - key: readability-identifier-naming.ParameterCase value: camelBack - key: modernize-loop-convert.MaxCopySize value: '16' - key: modernize-loop-convert.MinConfidence value: reasonable - key: modernize-loop-convert.NamingStyle value: CamelCase - key: modernize-pass-by-value.IncludeStyle value: llvm - key: modernize-replace-auto-ptr.IncludeStyle value: llvm - key: modernize-use-nullptr.NullMacros value: 'NULL' - key: modernize-use-default-member-init.UseAssignment value: '1' ... compute-runtime-20.13.16352/opencl/test/unit_test/utilities/CMakeLists.txt000066400000000000000000000013361363734646600264100ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(IGDRCL_SRCS_tests_utilities ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/debug_file_reader_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/debug_file_reader_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/debug_settings_reader_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/file_logger_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/file_logger_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/tag_allocator_tests.cpp ) get_property(NEO_CORE_UTILITIES_TESTS GLOBAL PROPERTY NEO_CORE_UTILITIES_TESTS) list(APPEND IGDRCL_SRCS_tests_utilities ${NEO_CORE_UTILITIES_TESTS}) target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_utilities}) compute-runtime-20.13.16352/opencl/test/unit_test/utilities/debug_file_reader_tests.cpp000066400000000000000000000040321363734646600312010ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/utilities/debug_file_reader_tests.inl" using namespace NEO; using namespace std; TEST(SettingsFileReader, givenTestFileWithDefaultValuesWhenTheyAreQueriedThenDefaultValuesMatch) { // Use test settings file std::unique_ptr reader = unique_ptr(new TestSettingsFileReader(TestSettingsFileReader::testPath)); ASSERT_NE(nullptr, reader); size_t debugVariableCount = 0; bool compareSuccessful = false; #define DECLARE_DEBUG_VARIABLE(dataType, variableName, defaultValue, description) \ compareSuccessful = (defaultValue == reader->getSetting(#variableName, defaultValue)); \ EXPECT_TRUE(compareSuccessful) << #variableName; \ debugVariableCount++; #include "debug_variables.inl" #undef DECLARE_DEBUG_VARIABLE size_t mapCount = reader->getStringSettingsCount(); EXPECT_EQ(mapCount, debugVariableCount); } TEST(SettingsFileReader, GetSetting) { // Use test settings file std::unique_ptr reader = unique_ptr(new TestSettingsFileReader(TestSettingsFileReader::testPath)); ASSERT_NE(nullptr, reader); #define DECLARE_DEBUG_VARIABLE(dataType, variableName, defaultValue, description) \ { \ dataType defaultData = defaultValue; \ dataType tempData = reader->getSetting(#variableName, defaultData); \ \ if (tempData == defaultData) { \ EXPECT_TRUE(true); \ } \ } #include "debug_variables.inl" #undef DECLARE_DEBUG_VARIABLE } compute-runtime-20.13.16352/opencl/test/unit_test/utilities/debug_file_reader_tests.inl000066400000000000000000000120351363734646600312030ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/file_io.h" #include "shared/source/utilities/debug_file_reader.h" #include "test.h" #include "gtest/gtest.h" #include #include using namespace NEO; using namespace std; class TestSettingsFileReader : public SettingsFileReader { public: using SettingsFileReader::parseStream; TestSettingsFileReader(const char *filePath = nullptr) : SettingsFileReader(filePath) { } ~TestSettingsFileReader() override { } size_t getStringSettingsCount() { return settingStringMap.size(); } static const char *testPath; static const char *stringTestPath; }; const char *TestSettingsFileReader::testPath = "./test_files/igdrcl.config"; const char *TestSettingsFileReader::stringTestPath = "./test_files/igdrcl_string.config"; TEST(SettingsFileReader, CreateFileReaderWithoutFile) { bool settingsFileExists = fileExists(SettingsReader::settingsFileName); // if settings file exists, remove it if (settingsFileExists) { remove(SettingsReader::settingsFileName); } // Use current location for file read std::unique_ptr reader = unique_ptr(new TestSettingsFileReader()); ASSERT_NE(nullptr, reader); EXPECT_EQ(0u, reader->getStringSettingsCount()); } TEST(SettingsFileReader, GetStringSettingFromFile) { // Use test settings file std::unique_ptr reader = unique_ptr(new TestSettingsFileReader(TestSettingsFileReader::stringTestPath)); ASSERT_NE(nullptr, reader); string retValue; // StringTestKey is defined in file: unit_tests\helpers\test_debug_variables.inl string returnedStringValue = reader->getSetting("StringTestKey", retValue); // "Test Value" is a value that should be read from file defined in stringTestPath member EXPECT_STREQ(returnedStringValue.c_str(), "TestValue"); #define DECLARE_DEBUG_VARIABLE(dataType, variableName, defaultValue, description) \ { \ dataType defaultData = defaultValue; \ dataType tempData = reader->getSetting(#variableName, defaultData); \ if (tempData == defaultData) { \ EXPECT_TRUE(true); \ } \ } #include "opencl/test/unit_test/helpers/test_debug_variables.inl" #undef DECLARE_DEBUG_VARIABLE } TEST(SettingsFileReader, givenDebugFileSettingInWhichStringIsFollowedByIntegerWhenItIsParsedThenProperValuesAreObtained) { std::unique_ptr reader(new TestSettingsFileReader(TestSettingsFileReader::stringTestPath)); ASSERT_NE(nullptr, reader.get()); int32_t retValue = 0; int32_t returnedIntValue = reader->getSetting("IntTestKey", retValue); EXPECT_EQ(1, returnedIntValue); string retValueString; string returnedStringValue = reader->getSetting("StringTestKey", retValueString); EXPECT_STREQ(returnedStringValue.c_str(), "TestValue"); } TEST(SettingsFileReader, GetSettingWhenNotInFile) { // Use test settings file std::unique_ptr reader = unique_ptr(new TestSettingsFileReader(TestSettingsFileReader::testPath)); ASSERT_NE(nullptr, reader); bool defaultBoolValue = false; bool returnedBoolValue = reader->getSetting("BoolSettingNotExistingInFile", defaultBoolValue); EXPECT_EQ(defaultBoolValue, returnedBoolValue); int32_t defaultIntValue = 123; int32_t returnedIntValue = reader->getSetting("IntSettingNotExistingInFile", defaultIntValue); EXPECT_EQ(defaultIntValue, returnedIntValue); string defaultStringValue = "ABCD"; string returnedStringValue = reader->getSetting("StringSettingNotExistingInFile", defaultStringValue); EXPECT_EQ(defaultStringValue, returnedStringValue); } TEST(SettingsFileReader, appSpecificLocation) { std::unique_ptr reader(new TestSettingsFileReader(TestSettingsFileReader::testPath)); std::string appSpecific = "cl_cache_dir"; EXPECT_EQ(appSpecific, reader->appSpecificLocation(appSpecific)); } TEST(SettingsFileReader, givenHexNumbersSemiColonSeparatedListInInputStreamWhenParsingThenCorrectStringValueIsStored) { std::unique_ptr reader = unique_ptr(new TestSettingsFileReader()); ASSERT_NE(nullptr, reader); //No settings should be parsed initially EXPECT_EQ(0u, reader->getStringSettingsCount()); stringstream inputLineWithSemiColonList("KeyName = 0x1234;0x5555"); reader->parseStream(inputLineWithSemiColonList); string defaultStringValue = "FailedToParse"; string returnedStringValue = reader->getSetting("KeyName", defaultStringValue); EXPECT_STREQ("0x1234;0x5555", returnedStringValue.c_str()); } compute-runtime-20.13.16352/opencl/test/unit_test/utilities/debug_settings_reader_creator.cpp000066400000000000000000000005721363734646600324240ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/utilities/debug_settings_reader_creator.h" namespace NEO { std::unique_ptr SettingsReaderCreator::create(const std::string ®Key) { return std::unique_ptr(SettingsReader::createOsReader(false, regKey)); } } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/utilities/debug_settings_reader_tests.cpp000066400000000000000000000060051363734646600321240ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/file_io.h" #include "shared/source/utilities/debug_settings_reader.h" #include "opencl/source/os_interface/ocl_reg_path.h" #include "test.h" #include "gtest/gtest.h" #include #include using namespace NEO; using namespace std; TEST(SettingsReader, Create) { SettingsReader *reader = SettingsReader::create(oclRegPath); EXPECT_NE(nullptr, reader); delete reader; } TEST(SettingsReader, GivenNoSettingsFileWhenCreatingSettingsReaderThenOsReaderIsCreated) { remove(SettingsReader::settingsFileName); auto fileReader = std::unique_ptr(SettingsReader::createFileReader()); EXPECT_EQ(nullptr, fileReader.get()); auto osReader = std::unique_ptr(SettingsReader::create(oclRegPath)); EXPECT_NE(nullptr, osReader.get()); } TEST(SettingsReader, GivenSettingsFileExistsWhenCreatingSettingsReaderThenFileReaderIsCreated) { bool settingsFileExists = fileExists(SettingsReader::settingsFileName); if (!settingsFileExists) { const char data[] = "ProductFamilyOverride = test"; writeDataToFile(SettingsReader::settingsFileName, &data, sizeof(data)); } auto reader = std::unique_ptr(SettingsReader::create(oclRegPath)); EXPECT_NE(nullptr, reader.get()); string defaultValue("unk"); EXPECT_STREQ("test", reader->getSetting("ProductFamilyOverride", defaultValue).c_str()); remove(SettingsReader::settingsFileName); } TEST(SettingsReader, CreateFileReader) { bool settingsFileExists = fileExists(SettingsReader::settingsFileName); if (!settingsFileExists) { char data = 0; writeDataToFile(SettingsReader::settingsFileName, &data, 0); } SettingsReader *reader = SettingsReader::createFileReader(); EXPECT_NE(nullptr, reader); if (!settingsFileExists) { remove(SettingsReader::settingsFileName); } delete reader; } TEST(SettingsReader, CreateOsReader) { SettingsReader *reader = SettingsReader::createOsReader(false, oclRegPath); EXPECT_NE(nullptr, reader); delete reader; } TEST(SettingsReader, CreateOsReaderWithRegKey) { std::string regKey = oclRegPath; unique_ptr reader(SettingsReader::createOsReader(false, regKey)); EXPECT_NE(nullptr, reader); } TEST(SettingsReader, givenPrintDebugStringWhenCalledWithTrueItPrintsToOutput) { int i = 4; testing::internal::CaptureStdout(); printDebugString(true, stdout, "testing error %d", i); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STRNE(output.c_str(), ""); } TEST(SettingsReader, givenPrintDebugStringWhenCalledWithFalseThenNothingIsPrinted) { int i = 4; testing::internal::CaptureStdout(); printDebugString(false, stderr, "Error String %d", i); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STREQ(output.c_str(), ""); } compute-runtime-20.13.16352/opencl/test/unit_test/utilities/file_logger_tests.cpp000066400000000000000000001131571363734646600300610ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/test/unit_test/utilities/file_logger_tests.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/utilities/logger.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include #include #include #include using namespace std; using namespace NEO; TEST(FileLogger, WithDebugFunctionality) { DebugVariables flags; FullyEnabledFileLogger fileLogger(std::string(""), flags); EXPECT_TRUE(fileLogger.enabled()); } TEST(FileLogger, GivenFileLoggerWhenSettingFileNameThenCorrectFilenameIsSet) { DebugVariables flags; FullyEnabledFileLogger fileLogger(std::string(""), flags); fileLogger.setLogFileName("new_filename"); EXPECT_STREQ("new_filename", fileLogger.getLogFileName()); } TEST(FileLogger, WithDebugFunctionalityCreatesAndDumpsToLogFile) { DebugVariables flags; flags.LogApiCalls.set(true); FullyEnabledFileLogger fileLogger(std::string("test.log"), flags); fileLogger.logApiCall("searchString", true, 0); fileLogger.logApiCall("searchString2", false, 0); fileLogger.logInputs("searchString3", "any"); fileLogger.logInputs("searchString3", "any", "and more"); fileLogger.log(false, "searchString4"); // Log file not created EXPECT_TRUE(fileLogger.wasFileCreated(fileLogger.getLogFileName())); if (fileLogger.wasFileCreated(fileLogger.getLogFileName())) { auto str = fileLogger.getFileString(fileLogger.getLogFileName()); EXPECT_TRUE(str.find("searchString") != std::string::npos); EXPECT_TRUE(str.find("searchString2") != std::string::npos); EXPECT_TRUE(str.find("searchString3") != std::string::npos); EXPECT_FALSE(str.find("searchString4") != std::string::npos); } fileLogger.log(true, "searchString4"); if (fileLogger.wasFileCreated(fileLogger.getLogFileName())) { auto str = fileLogger.getFileString(fileLogger.getLogFileName()); EXPECT_TRUE(str.find("searchString4") != std::string::npos); } } TEST(FileLogger, WithoutDebugFunctionalityDoesNotCreateLogFile) { DebugVariables flags; flags.LogApiCalls.set(true); FullyDisabledFileLogger fileLogger(std::string(" "), flags); // Log file not created bool logFileCreated = fileExists(fileLogger.getLogFileName()); EXPECT_FALSE(logFileCreated); fileLogger.logApiCall("searchString", true, 0); fileLogger.logApiCall("searchString2", false, 0); fileLogger.logInputs("searchString3", "any"); fileLogger.logInputs("searchString3", "any", "and more"); fileLogger.log(false, "searchString4"); EXPECT_FALSE(fileLogger.wasFileCreated(fileLogger.getLogFileName())); } TEST(FileLogger, WithIncorrectFilenameFileNotCreated) { DebugVariables flags; flags.LogApiCalls.set(true); FullyEnabledFileLogger fileLogger(std::string("test.log"), flags); fileLogger.useRealFiles(true); fileLogger.writeToFile("", "", 0, std::ios_base::in | std::ios_base::out); EXPECT_FALSE(fileLogger.wasFileCreated(fileLogger.getLogFileName())); } TEST(FileLogger, WithCorrectFilenameFileCreated) { std::string testFile = "testfile"; DebugVariables flags; FullyEnabledFileLogger fileLogger(testFile, flags); fileLogger.useRealFiles(true); fileLogger.writeToFile(testFile, "test", 4, std::fstream::out); EXPECT_TRUE(fileExists(testFile)); if (fileExists(testFile)) { std::remove(testFile.c_str()); } } TEST(FileLogger, CreatingNewInstanceRemovesOldFile) { std::string testFile = "testfile"; DebugVariables flags; flags.LogApiCalls.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); fileLogger.useRealFiles(true); fileLogger.writeToFile(fileLogger.getLogFileName(), "test", 4, std::fstream::out); EXPECT_TRUE(fileExists(fileLogger.getLogFileName())); FullyEnabledFileLogger fileLogger2(testFile, flags); EXPECT_FALSE(fileExists(fileLogger.getLogFileName())); } TEST(FileLogger, WithDebugFunctionalityDoesNotDumpApiCallLogWhenFlagIsFalseButDumpsCustomLogs) { std::string testFile = "testfile"; DebugVariables flags; flags.LogApiCalls.set(false); FullyEnabledFileLogger fileLogger(testFile, flags); // Log file not created bool logFileCreated = fileExists(fileLogger.getLogFileName()); EXPECT_FALSE(logFileCreated); fileLogger.logApiCall("searchString", true, 0); fileLogger.logApiCall("searchString2", false, 0); fileLogger.logInputs("searchString3", "any"); fileLogger.logInputs("searchString3", "any", "and more"); fileLogger.log(false, "searchString4"); if (fileLogger.wasFileCreated(fileLogger.getLogFileName())) { auto str = fileLogger.getFileString(fileLogger.getLogFileName()); EXPECT_FALSE(str.find("searchString\n") != std::string::npos); EXPECT_FALSE(str.find("searchString2\n") != std::string::npos); EXPECT_FALSE(str.find("searchString3") != std::string::npos); EXPECT_FALSE(str.find("searchString4") != std::string::npos); } // Log still works fileLogger.log(true, "searchString4"); if (fileLogger.wasFileCreated(fileLogger.getLogFileName())) { auto str = fileLogger.getFileString(fileLogger.getLogFileName()); EXPECT_TRUE(str.find("searchString4") != std::string::npos); } } TEST(FileLogger, WithDebugFunctionalityGetInputReturnsCorectValue) { std::string testFile = "testfile"; DebugVariables flags; FullyEnabledFileLogger fileLogger(testFile, flags); // getInput returns 0 size_t input = 5; size_t output = fileLogger.getInput(&input, 0); EXPECT_EQ(input, output); } TEST(FileLogger, WithDebugFunctionalityGetInputNegative) { std::string testFile = "testfile"; DebugVariables flags; FullyEnabledFileLogger fileLogger(testFile, flags); // getInput returns 0 size_t output = fileLogger.getInput(nullptr, 2); EXPECT_EQ(0u, output); } TEST(FileLogger, WithDebugFunctionalityGetSizesReturnsCorectString) { std::string testFile = "testfile"; DebugVariables flags; flags.LogApiCalls.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); // getSizes returns string uintptr_t input[3] = {1, 2, 3}; string lwsSizes = fileLogger.getSizes(input, 3, true); string gwsSizes = fileLogger.getSizes(input, 3, false); string lwsExpected = "localWorkSize[0]: \t1\nlocalWorkSize[1]: \t2\nlocalWorkSize[2]: \t3\n"; string gwsExpected = "globalWorkSize[0]: \t1\nglobalWorkSize[1]: \t2\nglobalWorkSize[2]: \t3\n"; EXPECT_EQ(lwsExpected, lwsSizes); EXPECT_EQ(gwsExpected, gwsSizes); } TEST(FileLogger, WithDebugFunctionalityGetSizesNegative) { std::string testFile = "testfile"; DebugVariables flags; flags.LogApiCalls.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); // getSizes returns string string lwsSizes = fileLogger.getSizes(nullptr, 3, true); string gwsSizes = fileLogger.getSizes(nullptr, 3, false); EXPECT_EQ(0u, lwsSizes.size()); EXPECT_EQ(0u, gwsSizes.size()); } TEST(FileLogger, WithoutDebugFunctionalityGetSizesDoesNotReturnString) { std::string testFile = "testfile"; DebugVariables flags; flags.LogApiCalls.set(true); FullyDisabledFileLogger fileLogger(testFile, flags); uintptr_t input[3] = {1, 2, 3}; string lwsSizes = fileLogger.getSizes(input, 3, true); string gwsSizes = fileLogger.getSizes(input, 3, false); EXPECT_EQ(0u, lwsSizes.size()); EXPECT_EQ(0u, gwsSizes.size()); } TEST(FileLogger, WithDebugFunctionalityGetEventsReturnsCorectString) { std::string testFile = "testfile"; DebugVariables flags; flags.LogApiCalls.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); // getEvents returns string uintptr_t event = 8; uintptr_t *input[3] = {&event, &event, &event}; string eventsString = fileLogger.getEvents((uintptr_t *)input, 2); EXPECT_NE(0u, eventsString.size()); } TEST(FileLogger, WithDebugFunctionalityGetEventsNegative) { std::string testFile = "testfile"; DebugVariables flags; FullyEnabledFileLogger fileLogger(testFile, flags); // getEvents returns 0 sized string string event = fileLogger.getEvents(nullptr, 2); EXPECT_EQ(0u, event.size()); } TEST(FileLogger, GivenLoggerWithDebugFunctionalityWhenGetMemObjectsIsCalledThenCorrectStringIsReturned) { std::string testFile = "testfile"; DebugVariables flags; flags.LogApiCalls.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); MockBuffer buffer; MemObj *memoryObject = &buffer; cl_mem clMem = memoryObject; cl_mem clMemObjects[] = {clMem, clMem}; cl_uint numObjects = 2; string memObjectString = fileLogger.getMemObjects(reinterpret_cast(clMemObjects), numObjects); EXPECT_NE(0u, memObjectString.size()); stringstream output; output << "cl_mem " << clMem << ", MemObj " << memoryObject; EXPECT_THAT(memObjectString, ::testing::HasSubstr(output.str())); } TEST(FileLogger, GivenDebugFunctionalityWhenGetMemObjectsIsCalledWithNullptrThenStringIsEmpty) { std::string testFile = "testfile"; DebugVariables flags; FullyEnabledFileLogger fileLogger(testFile, flags); string memObjectString = fileLogger.getMemObjects(nullptr, 2); EXPECT_EQ(0u, memObjectString.size()); } TEST(FileLogger, GiveDisabledDebugFunctionalityWhenGetMemObjectsIsCalledThenCallReturnsImmediately) { std::string testFile = "testfile"; DebugVariables flags; FullyDisabledFileLogger fileLogger(testFile, flags); string memObjectString = fileLogger.getMemObjects(nullptr, 2); EXPECT_EQ(0u, memObjectString.size()); } TEST(FileLogger, WithDebugFunctionalityDumpKernel) { std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernels.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); string kernelDumpFile = "testDumpKernel"; // test kernel dumping fileLogger.dumpKernel(kernelDumpFile, "kernel source here"); EXPECT_TRUE(fileLogger.wasFileCreated(kernelDumpFile.append(".txt"))); } TEST(FileLogger, WithoutDebugFunctionalityDumpKernel) { std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernels.set(false); string kernelDumpFile = "testDumpKernel"; FullyEnabledFileLogger fileLogger(testFile, flags); // test kernel dumping fileLogger.dumpKernel(kernelDumpFile, "kernel source here"); EXPECT_FALSE(fileLogger.wasFileCreated(kernelDumpFile.append(".txt"))); } TEST(FileLogger, WithDebugFunctionalityDumpBinary) { std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernels.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); string programDumpFile = "programBinary.bin"; size_t length = 4; unsigned char binary[4]; const unsigned char *ptrBinary = binary; fileLogger.dumpBinaryProgram(1, &length, &ptrBinary); EXPECT_TRUE(fileLogger.wasFileCreated(programDumpFile)); } TEST(FileLogger, WithDebugFunctionalityDumpNullBinary) { std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernels.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); string programDumpFile = "programBinary.bin"; size_t length = 4; fileLogger.dumpBinaryProgram(1, &length, nullptr); EXPECT_FALSE(fileLogger.wasFileCreated(programDumpFile)); } TEST(FileLogger, WithDebugFunctionalityDontDumpKernelsForNullMdi) { std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernelArgs.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); fileLogger.dumpKernelArgs((const MultiDispatchInfo *)nullptr); EXPECT_EQ(fileLogger.createdFilesCount(), 0); } TEST(FileLogger, WithDebugFunctionalityDontDumpKernelArgsForNullMdi) { std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernelArgs.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); fileLogger.dumpKernelArgs((const MultiDispatchInfo *)nullptr); EXPECT_EQ(fileLogger.createdFilesCount(), 0); } TEST(FileLogger, GivenDebugFunctionalityWhenDebugFlagIsDisabledThenDoNotDumpKernelArgsForMdi) { auto kernelInfo = std::make_unique(); auto device = make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(*device->getExecutionEnvironment()); auto kernel = unique_ptr(new MockKernel(&program, *kernelInfo, *device)); auto multiDispatchInfo = unique_ptr(new MockMultiDispatchInfo(kernel.get())); KernelArgPatchInfo kernelArgPatchInfo; kernelArgPatchInfo.size = 32; kernelArgPatchInfo.sourceOffset = 0; kernelArgPatchInfo.crossthreadOffset = 32; kernelInfo->kernelArgInfo.resize(1); kernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); size_t crossThreadDataSize = 64; auto crossThreadData = unique_ptr(new uint8_t[crossThreadDataSize]); kernel->setCrossThreadData(crossThreadData.get(), static_cast(crossThreadDataSize)); std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernelArgs.set(false); FullyEnabledFileLogger fileLogger(testFile, flags); fileLogger.dumpKernelArgs(multiDispatchInfo.get()); // check if file was created std::string expectedFile = "_arg_0_immediate_size_32_flags_0.bin"; EXPECT_FALSE(fileLogger.wasFileCreated(expectedFile)); // no files should be created EXPECT_EQ(fileLogger.createdFilesCount(), 0); } TEST(FileLogger, WithDebugFunctionalityDumpKernelArgsForMdi) { auto kernelInfo = std::make_unique(); auto device = make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(*device->getExecutionEnvironment()); auto kernel = unique_ptr(new MockKernel(&program, *kernelInfo, *device)); auto multiDispatchInfo = unique_ptr(new MockMultiDispatchInfo(kernel.get())); KernelArgPatchInfo kernelArgPatchInfo; kernelArgPatchInfo.size = 32; kernelArgPatchInfo.sourceOffset = 0; kernelArgPatchInfo.crossthreadOffset = 32; kernelInfo->kernelArgInfo.resize(1); kernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); size_t crossThreadDataSize = 64; auto crossThreadData = unique_ptr(new uint8_t[crossThreadDataSize]); kernel->setCrossThreadData(crossThreadData.get(), static_cast(crossThreadDataSize)); std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernelArgs.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); fileLogger.dumpKernelArgs(multiDispatchInfo.get()); // check if file was created std::string expectedFile = "_arg_0_immediate_size_32_flags_0.bin"; EXPECT_TRUE(fileLogger.wasFileCreated(expectedFile)); // file should be created EXPECT_EQ(fileLogger.createdFilesCount(), 1); } TEST(FileLogger, WithDebugFunctionalityDumpKernelNullKernel) { std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernelArgs.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); fileLogger.dumpKernelArgs((const Kernel *)nullptr); EXPECT_EQ(fileLogger.createdFilesCount(), 0); } TEST(FileLogger, WithDebugFunctionalityAndEmptyKernelDontDumpKernelArgs) { auto kernelInfo = std::make_unique(); auto device = make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(*device->getExecutionEnvironment()); auto kernel = unique_ptr(new MockKernel(&program, *kernelInfo, *device)); std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernelArgs.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); fileLogger.dumpKernelArgs(kernel.get()); EXPECT_EQ(fileLogger.createdFilesCount(), 0); } TEST(FileLogger, WithDebugFunctionalityDumpKernelArgsImmediate) { auto kernelInfo = std::make_unique(); auto device = make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(*device->getExecutionEnvironment()); auto kernel = unique_ptr(new MockKernel(&program, *kernelInfo, *device)); KernelArgPatchInfo kernelArgPatchInfo; kernelArgPatchInfo.size = 32; kernelArgPatchInfo.sourceOffset = 0; kernelArgPatchInfo.crossthreadOffset = 32; kernelInfo->kernelArgInfo.resize(1); kernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); size_t crossThreadDataSize = 64; auto crossThreadData = unique_ptr(new uint8_t[crossThreadDataSize]); kernel->setCrossThreadData(crossThreadData.get(), static_cast(crossThreadDataSize)); std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernelArgs.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); fileLogger.dumpKernelArgs(kernel.get()); // check if file was created EXPECT_TRUE(fileLogger.wasFileCreated("_arg_0_immediate_size_32_flags_0.bin")); // no files should be created for local buffer EXPECT_EQ(fileLogger.createdFilesCount(), 1); } TEST(FileLogger, WithDebugFunctionalityDumpKernelArgsImmediateZeroSize) { auto kernelInfo = std::make_unique(); auto device = make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(*device->getExecutionEnvironment()); auto kernel = unique_ptr(new MockKernel(&program, *kernelInfo, *device)); KernelArgPatchInfo kernelArgPatchInfo; kernelArgPatchInfo.size = 0; kernelArgPatchInfo.sourceOffset = 0; kernelArgPatchInfo.crossthreadOffset = 32; kernelInfo->kernelArgInfo.resize(1); kernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); size_t crossThreadDataSize = sizeof(64); auto crossThreadData = unique_ptr(new uint8_t[crossThreadDataSize]); kernel->setCrossThreadData(crossThreadData.get(), static_cast(crossThreadDataSize)); std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernelArgs.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); fileLogger.dumpKernelArgs(kernel.get()); // no files should be created for zero size EXPECT_EQ(fileLogger.createdFilesCount(), 0); } TEST(FileLogger, WithDebugFunctionalityDumpKernelArgsLocalBuffer) { auto kernelInfo = std::make_unique(); auto device = make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(*device->getExecutionEnvironment()); auto kernel = unique_ptr(new MockKernel(&program, *kernelInfo, *device)); KernelArgPatchInfo kernelArgPatchInfo; kernelInfo->kernelArgInfo.resize(1); kernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); kernelInfo->kernelArgInfo[0].metadata.addressQualifier = KernelArgMetadata::AddrLocal; std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernelArgs.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); fileLogger.dumpKernelArgs(kernel.get()); // no files should be created for local buffer EXPECT_EQ(fileLogger.createdFilesCount(), 0); } TEST(FileLogger, WithDebugFunctionalityDumpKernelArgsBufferNotSet) { auto kernelInfo = std::make_unique(); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(*device->getExecutionEnvironment(), context.get(), false, &device->getDevice())); auto kernel = clUniquePtr(new MockKernel(program.get(), *kernelInfo, *device)); KernelArgPatchInfo kernelArgPatchInfo; kernelInfo->kernelArgInfo.resize(1); kernelInfo->kernelArgInfo[0].metadataExtended = std::make_unique(); kernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); kernelInfo->kernelArgInfo[0].metadataExtended->type = "uint8 *buffer"; kernel->initialize(); size_t crossThreadDataSize = sizeof(void *); auto crossThreadData = unique_ptr(new uint8_t[crossThreadDataSize]); kernel->setCrossThreadData(crossThreadData.get(), static_cast(crossThreadDataSize)); std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernelArgs.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); fileLogger.dumpKernelArgs(kernel.get()); // no files should be created for local buffer EXPECT_EQ(fileLogger.createdFilesCount(), 0); } TEST(FileLogger, WithDebugFunctionalityDumpKernelArgsBuffer) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto context = clUniquePtr(new MockContext(device.get())); auto buffer = BufferHelper<>::create(context.get()); cl_mem clObj = buffer; auto kernelInfo = std::make_unique(); auto program = clUniquePtr(new MockProgram(*device->getExecutionEnvironment(), context.get(), false, &device->getDevice())); auto kernel = clUniquePtr(new MockKernel(program.get(), *kernelInfo, *device)); KernelArgPatchInfo kernelArgPatchInfo; kernelInfo->kernelArgInfo.resize(1); kernelInfo->kernelArgInfo[0].metadataExtended = std::make_unique(); kernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); kernelInfo->kernelArgInfo[0].metadataExtended->type = "uint8 *buffer"; kernelInfo->kernelArgInfo.at(0).isBuffer = true; kernel->initialize(); size_t crossThreadDataSize = sizeof(void *); auto crossThreadData = unique_ptr(new uint8_t[crossThreadDataSize]); kernel->setCrossThreadData(crossThreadData.get(), static_cast(crossThreadDataSize)); kernel->setArg(0, clObj); std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernelArgs.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); fileLogger.dumpKernelArgs(kernel.get()); buffer->release(); // check if file was created EXPECT_TRUE(fileLogger.wasFileCreated("_arg_0_buffer_size_16_flags_1.bin")); // no files should be created for local buffer EXPECT_EQ(fileLogger.createdFilesCount(), 1); } TEST(FileLogger, WithDebugFunctionalityDumpKernelArgsSampler) { auto kernelInfo = std::make_unique(); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(*device->getExecutionEnvironment(), context.get(), false, &device->getDevice())); auto kernel = clUniquePtr(new MockKernel(program.get(), *kernelInfo, *device)); KernelArgPatchInfo kernelArgPatchInfo; kernelInfo->kernelArgInfo.resize(1); kernelInfo->kernelArgInfo[0].metadataExtended = std::make_unique(); kernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); kernelInfo->kernelArgInfo[0].metadataExtended->type = "sampler test"; kernel->initialize(); std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernelArgs.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); fileLogger.dumpKernelArgs(kernel.get()); // no files should be created for sampler arg EXPECT_EQ(fileLogger.createdFilesCount(), 0); } TEST(FileLogger, WithDebugFunctionalityDumpKernelArgsImageNotSet) { auto kernelInfo = std::make_unique(); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(*device->getExecutionEnvironment(), context.get(), false, &device->getDevice())); auto kernel = clUniquePtr(new MockKernel(program.get(), *kernelInfo, *device)); SKernelBinaryHeaderCommon kernelHeader; char surfaceStateHeap[0x80]; kernelHeader.SurfaceStateHeapSize = sizeof(surfaceStateHeap); kernelInfo->heapInfo.pSsh = surfaceStateHeap; kernelInfo->heapInfo.pKernelHeader = &kernelHeader; kernelInfo->usesSsh = true; KernelArgPatchInfo kernelArgPatchInfo; kernelInfo->kernelArgInfo.resize(1); kernelInfo->kernelArgInfo[0].metadataExtended = std::make_unique(); kernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); kernelInfo->kernelArgInfo[0].metadataExtended->type = "image2d buffer"; kernelInfo->kernelArgInfo[0].isImage = true; kernelInfo->kernelArgInfo[0].offsetImgWidth = 0x4; kernel->initialize(); size_t crossThreadDataSize = sizeof(void *); auto crossThreadData = unique_ptr(new uint8_t[crossThreadDataSize]); kernel->setCrossThreadData(crossThreadData.get(), static_cast(crossThreadDataSize)); std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernelArgs.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); fileLogger.dumpKernelArgs(kernel.get()); // no files should be created for local buffer EXPECT_EQ(fileLogger.createdFilesCount(), 0); } TEST(FileLogger, WithDebugFunctionalityDumpBinaryNegativeCases) { std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernels.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); size_t length = 1; unsigned char binary[4]; const unsigned char *ptrBinary = binary; fileLogger.dumpBinaryProgram(1, nullptr, nullptr); fileLogger.dumpBinaryProgram(1, nullptr, &ptrBinary); fileLogger.dumpBinaryProgram(1, &length, nullptr); length = 0; fileLogger.dumpBinaryProgram(1, &length, &ptrBinary); fileLogger.dumpBinaryProgram(1, &length, nullptr); EXPECT_EQ(fileLogger.createdFilesCount(), 0); } TEST(FileLogger, WithoutDebugFunctionality) { string path = "."; vector files = Directory::getFiles(path); size_t initialNumberOfFiles = files.size(); std::string testFile = "testfile"; DebugVariables flags; flags.DumpKernels.set(true); flags.LogApiCalls.set(true); flags.DumpKernelArgs.set(true); FullyDisabledFileLogger fileLogger(testFile, flags); // Should not be enabled without debug functionality EXPECT_FALSE(fileLogger.enabled()); // Log file not created bool logFileCreated = fileExists(fileLogger.getLogFileName()); EXPECT_FALSE(logFileCreated); // test kernel dumping bool kernelDumpCreated = false; string kernelDumpFile = "testDumpKernel"; fileLogger.dumpKernel(kernelDumpFile, "kernel source here"); kernelDumpCreated = fileExists(kernelDumpFile.append(".txt")); EXPECT_FALSE(kernelDumpCreated); // test api logging fileLogger.logApiCall(__FUNCTION__, true, 0); logFileCreated = fileExists(fileLogger.getLogFileName()); EXPECT_FALSE(logFileCreated); // getInput returns 0 size_t input = 5; size_t output = fileLogger.getInput(&input, 0); EXPECT_EQ(0u, output); // getEvents returns 0-size string string event = fileLogger.getEvents(&input, 0); EXPECT_EQ(0u, event.size()); // getSizes returns 0-size string string lwsSizes = fileLogger.getSizes(&input, 0, true); string gwsSizes = fileLogger.getSizes(&input, 0, false); EXPECT_EQ(0u, lwsSizes.size()); EXPECT_EQ(0u, gwsSizes.size()); // no programDump file string programDumpFile = "programBinary.bin"; size_t length = 4; unsigned char binary[4]; const unsigned char *ptrBinary = binary; fileLogger.dumpBinaryProgram(1, &length, &ptrBinary); EXPECT_FALSE(fileLogger.wasFileCreated(programDumpFile)); fileLogger.dumpKernelArgs((const Kernel *)nullptr); // test api input logging fileLogger.logInputs("Arg name", "value"); fileLogger.logInputs("int", 5); logFileCreated = fileExists(fileLogger.getLogFileName()); EXPECT_FALSE(logFileCreated); // check Log fileLogger.log(true, "string to be logged"); logFileCreated = fileExists(fileLogger.getLogFileName()); EXPECT_FALSE(logFileCreated); files = Directory::getFiles(path); size_t finalNumberOfFiles = files.size(); EXPECT_EQ(initialNumberOfFiles, finalNumberOfFiles); } TEST(LoggerApiEnterWrapper, WithDebugFunctionality) { const char *name = "function"; int error = 0; { auto debugApiWrapper = std::make_unique>(name, nullptr); EXPECT_TRUE(debugApiWrapper->loggedEnter); } { auto debugApiWrapper2 = std::make_unique>(name, &error); EXPECT_TRUE(debugApiWrapper2->loggedEnter); } } TEST(LoggerApiEnterWrapper, WithoutDebugFunctionality) { const char *name = "function"; int error = 0; { auto debugApiWrapper = std::make_unique>(name, nullptr); EXPECT_FALSE(debugApiWrapper->loggedEnter); } { auto debugApiWrapper2 = std::make_unique>(name, &error); EXPECT_FALSE(debugApiWrapper2->loggedEnter); } } TEST(FileLogger, infoPointerToStringReturnsCorrectString) { std::string testFile = "testfile"; DebugVariables flags; FullyEnabledFileLogger fileLogger(testFile, flags); uint64_t value64bit = 64; string string64bit = fileLogger.infoPointerToString(&value64bit, sizeof(uint64_t)); uint32_t value32bit = 32; string string32bit = fileLogger.infoPointerToString(&value32bit, sizeof(uint32_t)); uint8_t value8bit = 0; string string8bit = fileLogger.infoPointerToString(&value8bit, sizeof(uint8_t)); EXPECT_STREQ("64", string64bit.c_str()); EXPECT_STREQ("32", string32bit.c_str()); EXPECT_STREQ("0", string8bit.c_str()); string stringNonValue = fileLogger.infoPointerToString(nullptr, 56); EXPECT_STREQ("", stringNonValue.c_str()); char valueChar = 0; stringNonValue = fileLogger.infoPointerToString(&valueChar, 56); EXPECT_STREQ("", stringNonValue.c_str()); } TEST(FileLogger, givenDisabledDebugFunctionalityWhenLogLazyEvaluateArgsIsCalledThenCallToLambdaIsDropped) { std::string testFile = "testfile"; DebugVariables flags; FullyDisabledFileLogger fileLogger(testFile, flags); bool wasCalled = false; fileLogger.logLazyEvaluateArgs(true, [&] { wasCalled = true; }); EXPECT_FALSE(wasCalled); } TEST(FileLogger, givenDisabledPredicateWhenLogLazyEvaluateArgsIsCalledThenCallToLambdaIsDropped) { std::string testFile = "testfile"; DebugVariables flags; FullyEnabledFileLogger fileLogger(testFile, flags); bool wasCalled = false; fileLogger.logLazyEvaluateArgs(false, [&] { wasCalled = true; }); EXPECT_FALSE(wasCalled); } TEST(FileLogger, givenEnabledPredicateWhenLogLazyEvaluateArgsIsCalledThenCallToLambdaIsExecuted) { std::string testFile = "testfile"; DebugVariables flags; FullyEnabledFileLogger fileLogger(testFile, flags); bool wasCalled = false; fileLogger.logLazyEvaluateArgs(true, [&] { wasCalled = true; }); EXPECT_TRUE(wasCalled); } struct DummyEvaluator { DummyEvaluator(bool &wasCalled) { wasCalled = true; } operator const char *() { return ""; } }; TEST(FileLogger, givenDisabledPredicateWhenDbgLogLazyEvaluateArgsIsCalledThenInputParametersAreNotEvaluated) { std::string testFile = "testfile"; DebugVariables flags; flags.LogApiCalls.set(false); FullyEnabledFileLogger fileLogger(testFile, flags); bool wasCalled = false; DBG_LOG_LAZY_EVALUATE_ARGS(fileLogger, false, log, true, DummyEvaluator(wasCalled)); EXPECT_FALSE(wasCalled); } TEST(FileLogger, givenEnabledPredicateWhenDbgLogLazyEvaluateArgsIsCalledThenInputParametersAreEvaluated) { std::string testFile = "testfile"; DebugVariables flags; flags.LogApiCalls.set(true); FullyEnabledFileLogger fileLogger(testFile, flags); bool wasCalled = false; DBG_LOG_LAZY_EVALUATE_ARGS(fileLogger, true, log, true, DummyEvaluator(wasCalled)); EXPECT_TRUE(wasCalled); } TEST(FileLogger, whenDisabledThenDebugFunctionalityIsNotAvailableAtCompileTime) { std::string testFile = "testfile"; DebugVariables flags; FileLogger fileLogger(testFile, flags); static_assert(false == fileLogger.enabled(), ""); } TEST(FileLogger, whenFullyEnabledThenAllDebugFunctionalityIsAvailableAtCompileTime) { std::string testFile = "testfile"; DebugVariables flags; FileLogger fileLogger(testFile, flags); static_assert(true == fileLogger.enabled(), ""); } struct AllocationTypeTestCase { GraphicsAllocation::AllocationType type; const char *str; }; AllocationTypeTestCase allocationTypeValues[] = { {GraphicsAllocation::AllocationType::BUFFER, "BUFFER"}, {GraphicsAllocation::AllocationType::BUFFER_COMPRESSED, "BUFFER_COMPRESSED"}, {GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, "BUFFER_HOST_MEMORY"}, {GraphicsAllocation::AllocationType::COMMAND_BUFFER, "COMMAND_BUFFER"}, {GraphicsAllocation::AllocationType::CONSTANT_SURFACE, "CONSTANT_SURFACE"}, {GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER, "DEVICE_QUEUE_BUFFER"}, {GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR, "EXTERNAL_HOST_PTR"}, {GraphicsAllocation::AllocationType::FILL_PATTERN, "FILL_PATTERN"}, {GraphicsAllocation::AllocationType::GLOBAL_SURFACE, "GLOBAL_SURFACE"}, {GraphicsAllocation::AllocationType::IMAGE, "IMAGE"}, {GraphicsAllocation::AllocationType::INDIRECT_OBJECT_HEAP, "INDIRECT_OBJECT_HEAP"}, {GraphicsAllocation::AllocationType::INSTRUCTION_HEAP, "INSTRUCTION_HEAP"}, {GraphicsAllocation::AllocationType::INTERNAL_HEAP, "INTERNAL_HEAP"}, {GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY, "INTERNAL_HOST_MEMORY"}, {GraphicsAllocation::AllocationType::KERNEL_ISA, "KERNEL_ISA"}, {GraphicsAllocation::AllocationType::LINEAR_STREAM, "LINEAR_STREAM"}, {GraphicsAllocation::AllocationType::MAP_ALLOCATION, "MAP_ALLOCATION"}, {GraphicsAllocation::AllocationType::MCS, "MCS"}, {GraphicsAllocation::AllocationType::PIPE, "PIPE"}, {GraphicsAllocation::AllocationType::PREEMPTION, "PREEMPTION"}, {GraphicsAllocation::AllocationType::PRINTF_SURFACE, "PRINTF_SURFACE"}, {GraphicsAllocation::AllocationType::PRIVATE_SURFACE, "PRIVATE_SURFACE"}, {GraphicsAllocation::AllocationType::PROFILING_TAG_BUFFER, "PROFILING_TAG_BUFFER"}, {GraphicsAllocation::AllocationType::SCRATCH_SURFACE, "SCRATCH_SURFACE"}, {GraphicsAllocation::AllocationType::SHARED_BUFFER, "SHARED_BUFFER"}, {GraphicsAllocation::AllocationType::SHARED_CONTEXT_IMAGE, "SHARED_CONTEXT_IMAGE"}, {GraphicsAllocation::AllocationType::SHARED_IMAGE, "SHARED_IMAGE"}, {GraphicsAllocation::AllocationType::SHARED_RESOURCE_COPY, "SHARED_RESOURCE_COPY"}, {GraphicsAllocation::AllocationType::SURFACE_STATE_HEAP, "SURFACE_STATE_HEAP"}, {GraphicsAllocation::AllocationType::SVM_CPU, "SVM_CPU"}, {GraphicsAllocation::AllocationType::SVM_GPU, "SVM_GPU"}, {GraphicsAllocation::AllocationType::SVM_ZERO_COPY, "SVM_ZERO_COPY"}, {GraphicsAllocation::AllocationType::TAG_BUFFER, "TAG_BUFFER"}, {GraphicsAllocation::AllocationType::GLOBAL_FENCE, "GLOBAL_FENCE"}, {GraphicsAllocation::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, "TIMESTAMP_PACKET_TAG_BUFFER"}, {GraphicsAllocation::AllocationType::UNKNOWN, "UNKNOWN"}, {GraphicsAllocation::AllocationType::WRITE_COMBINED, "WRITE_COMBINED"}}; class AllocationTypeLogging : public ::testing::TestWithParam {}; TEST_P(AllocationTypeLogging, givenGraphicsAllocationTypeWhenConvertingToStringThenCorrectStringIsReturned) { std::string testFile = "testfile"; DebugVariables flags; FullyEnabledFileLogger fileLogger(testFile, flags); auto input = GetParam(); GraphicsAllocation graphicsAllocation(0, input.type, nullptr, 0ull, 0ull, 0, MemoryPool::MemoryNull); auto result = fileLogger.getAllocationTypeString(&graphicsAllocation); EXPECT_STREQ(result, input.str); } INSTANTIATE_TEST_CASE_P(AllAllocationTypes, AllocationTypeLogging, ::testing::ValuesIn(allocationTypeValues)); TEST(AllocationTypeLoggingSingle, givenGraphicsAllocationTypeWhenConvertingToStringIllegalValueThenILLEGAL_VALUEIsReturned) { std::string testFile = "testfile"; DebugVariables flags; FullyEnabledFileLogger fileLogger(testFile, flags); GraphicsAllocation graphicsAllocation(0, static_cast(999), nullptr, 0ull, 0ull, 0, MemoryPool::MemoryNull); auto result = fileLogger.getAllocationTypeString(&graphicsAllocation); EXPECT_STREQ(result, "ILLEGAL_VALUE"); } TEST(AllocationTypeLoggingSingle, givenDisabledDebugFunctionalityWhenGettingGraphicsAllocationTypeThenNullptrReturned) { std::string testFile = "testfile"; DebugVariables flags; FullyDisabledFileLogger fileLogger(testFile, flags); GraphicsAllocation graphicsAllocation(0, GraphicsAllocation::AllocationType::BUFFER, nullptr, 0ull, 0ull, 0, MemoryPool::MemoryNull); auto result = fileLogger.getAllocationTypeString(&graphicsAllocation); EXPECT_STREQ(result, nullptr); } compute-runtime-20.13.16352/opencl/test/unit_test/utilities/file_logger_tests.h000066400000000000000000000037521363734646600275250ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/file_io.h" #include "shared/source/utilities/directory.h" #include "opencl/source/helpers/string_helpers.h" #include "opencl/source/utilities/logger.h" #include template class TestFileLogger : public NEO::FileLogger { public: using NEO::FileLogger::FileLogger; ~TestFileLogger() { std::remove(NEO::FileLogger::logFileName.c_str()); } void useRealFiles(bool value) { mockFileSystem = !value; } void writeToFile(std::string filename, const char *str, size_t length, std::ios_base::openmode mode) override { savedFiles[filename] << std::string(str, str + length); if (mockFileSystem == false) { NEO::FileLogger::writeToFile(filename, str, length, mode); } }; int32_t createdFilesCount() { return static_cast(savedFiles.size()); } bool wasFileCreated(std::string filename) { return savedFiles.find(filename) != savedFiles.end(); } std::string getFileString(std::string filename) { return savedFiles[filename].str(); } protected: bool mockFileSystem = true; std::map savedFiles; }; using FullyEnabledFileLogger = TestFileLogger; using FullyDisabledFileLogger = TestFileLogger; template class TestLoggerApiEnterWrapper : public NEO::LoggerApiEnterWrapper { public: TestLoggerApiEnterWrapper(const char *functionName, int *errCode) : NEO::LoggerApiEnterWrapper(functionName, errCode), loggedEnter(false) { if (DebugFunctionality) { loggedEnter = true; } } bool loggedEnter; }; compute-runtime-20.13.16352/opencl/test/unit_test/utilities/tag_allocator_tests.cpp000066400000000000000000000341051363734646600304110ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/utilities/tag_allocator.h" #include "opencl/test/unit_test/fixtures/memory_allocator_fixture.h" #include "test.h" #include "gtest/gtest.h" #include using namespace NEO; typedef Test TagAllocatorTest; struct TimeStamps { void initialize() { start = 1; end = 2; release = true; } static GraphicsAllocation::AllocationType getAllocationType() { return GraphicsAllocation::AllocationType::PROFILING_TAG_BUFFER; } bool isCompleted() const { return release; } bool release; uint64_t start; uint64_t end; }; template class MockTagAllocator : public TagAllocator { using BaseClass = TagAllocator; using TagNodeT = TagNode; public: using BaseClass::deferredTags; using BaseClass::doNotReleaseNodes; using BaseClass::freeTags; using BaseClass::populateFreeTags; using BaseClass::releaseDeferredTags; using BaseClass::usedTags; MockTagAllocator(MemoryManager *memMngr, size_t tagCount, size_t tagAlignment, bool disableCompletionCheck) : BaseClass(0, memMngr, tagCount, tagAlignment, sizeof(TagType), disableCompletionCheck) { } MockTagAllocator(MemoryManager *memMngr, size_t tagCount, size_t tagAlignment) : MockTagAllocator(memMngr, tagCount, tagAlignment, false) { } GraphicsAllocation *getGraphicsAllocation(size_t id = 0) { return this->gfxAllocations[id]; } TagNodeT *getFreeTagsHead() { return this->freeTags.peekHead(); } TagNodeT *getUsedTagsHead() { return this->usedTags.peekHead(); } size_t getGraphicsAllocationsCount() { return this->gfxAllocations.size(); } size_t getTagPoolCount() { return this->tagPoolMemory.size(); } }; TEST_F(TagAllocatorTest, Initialize) { MockTagAllocator tagAllocator(memoryManager, 100, 64); ASSERT_NE(nullptr, tagAllocator.getGraphicsAllocation()); ASSERT_NE(nullptr, tagAllocator.getFreeTagsHead()); EXPECT_EQ(nullptr, tagAllocator.getUsedTagsHead()); void *gfxMemory = tagAllocator.getGraphicsAllocation()->getUnderlyingBuffer(); void *head = reinterpret_cast(tagAllocator.getFreeTagsHead()->tagForCpuAccess); EXPECT_EQ(gfxMemory, head); } TEST_F(TagAllocatorTest, GetReturnTagCheckFreeAndUsedLists) { MockTagAllocator tagAllocator(memoryManager, 10, 16); ASSERT_NE(nullptr, tagAllocator.getGraphicsAllocation()); ASSERT_NE(nullptr, tagAllocator.getFreeTagsHead()); EXPECT_EQ(nullptr, tagAllocator.getUsedTagsHead()); TagNode *tagNode = tagAllocator.getTag(); EXPECT_NE(nullptr, tagNode); IDList> &freeList = tagAllocator.freeTags; IDList> &usedList = tagAllocator.usedTags; bool isFoundOnUsedList = usedList.peekContains(*tagNode); bool isFoundOnFreeList = freeList.peekContains(*tagNode); EXPECT_FALSE(isFoundOnFreeList); EXPECT_TRUE(isFoundOnUsedList); tagAllocator.returnTag(tagNode); isFoundOnUsedList = usedList.peekContains(*tagNode); isFoundOnFreeList = freeList.peekContains(*tagNode); EXPECT_TRUE(isFoundOnFreeList); EXPECT_FALSE(isFoundOnUsedList); } TEST_F(TagAllocatorTest, TagAlignment) { size_t alignment = 64; MockTagAllocator tagAllocator(memoryManager, 10, alignment); ASSERT_NE(nullptr, tagAllocator.getFreeTagsHead()); TagNode *tagNode = tagAllocator.getTag(); ASSERT_NE(nullptr, tagNode); EXPECT_EQ(0u, (uintptr_t)tagNode->tagForCpuAccess % alignment); tagAllocator.returnTag(tagNode); } TEST_F(TagAllocatorTest, givenTagAllocatorWhenAllNodesWereUsedThenCreateNewGraphicsAllocation) { // Big alignment to force only 4 tags size_t alignment = 1024; MockTagAllocator tagAllocator(memoryManager, 4, alignment); ASSERT_NE(nullptr, tagAllocator.getFreeTagsHead()); TagNode *tagNodes[4]; for (size_t i = 0; i < 4; i++) { tagNodes[i] = tagAllocator.getTag(); EXPECT_NE(nullptr, tagNodes[i]); } EXPECT_EQ(1u, tagAllocator.getGraphicsAllocationsCount()); EXPECT_EQ(1u, tagAllocator.getTagPoolCount()); TagNode *tagNode = tagAllocator.getTag(); EXPECT_NE(nullptr, tagNode); EXPECT_EQ(2u, tagAllocator.getGraphicsAllocationsCount()); EXPECT_EQ(2u, tagAllocator.getTagPoolCount()); } TEST_F(TagAllocatorTest, givenInputTagCountWhenCreatingAllocatorThen) { class MyMockMemoryManager : public MockMemoryManager { public: using MockMemoryManager::MockMemoryManager; GraphicsAllocation *allocateGraphicsMemoryWithAlignment(const AllocationData &allocationData) override { return new MemoryAllocation(0, TimestampPacketStorage::getAllocationType(), nullptr, nullptr, 0, MemoryConstants::pageSize, 1, MemoryPool::System4KBPages, false, false); } }; auto mockMemoryManager = std::make_unique(true, true, *executionEnvironment); const size_t tagsCount = 3; MockTagAllocator tagAllocator(mockMemoryManager.get(), tagsCount, 1); size_t nodesFound = 0; auto head = tagAllocator.freeTags.peekHead(); while (head) { nodesFound++; head = head->next; } EXPECT_EQ(tagsCount, nodesFound); } TEST_F(TagAllocatorTest, GetTagsAndReturnInDifferentOrder) { // Big alignment to force only 4 tags size_t alignment = 1024; MockTagAllocator tagAllocator(memoryManager, 4, alignment); ASSERT_NE(nullptr, tagAllocator.getFreeTagsHead()); TagNode *tagNodes[4]; for (int i = 0; i < 4; i++) { tagNodes[i] = tagAllocator.getTag(); EXPECT_NE(nullptr, tagNodes[i]); } EXPECT_EQ(1u, tagAllocator.getGraphicsAllocationsCount()); EXPECT_EQ(1u, tagAllocator.getTagPoolCount()); TagNode *tagNode2 = tagAllocator.getTag(); EXPECT_NE(nullptr, tagNode2); EXPECT_EQ(2u, tagAllocator.getGraphicsAllocationsCount()); EXPECT_EQ(2u, tagAllocator.getTagPoolCount()); IDList> &freeList = tagAllocator.freeTags; bool isFoundOnFreeList = freeList.peekContains(*tagNodes[0]); EXPECT_FALSE(isFoundOnFreeList); tagAllocator.returnTag(tagNodes[2]); isFoundOnFreeList = freeList.peekContains(*tagNodes[2]); EXPECT_TRUE(isFoundOnFreeList); EXPECT_NE(nullptr, tagAllocator.getFreeTagsHead()); tagAllocator.returnTag(tagNodes[3]); isFoundOnFreeList = freeList.peekContains(*tagNodes[3]); EXPECT_TRUE(isFoundOnFreeList); tagAllocator.returnTag(tagNodes[1]); isFoundOnFreeList = freeList.peekContains(*tagNodes[1]); EXPECT_TRUE(isFoundOnFreeList); isFoundOnFreeList = freeList.peekContains(*tagNodes[0]); EXPECT_FALSE(isFoundOnFreeList); tagAllocator.returnTag(tagNodes[0]); } TEST_F(TagAllocatorTest, GetTagsFromTwoPools) { // Big alignment to force only 1 tag size_t alignment = 4096; MockTagAllocator tagAllocator(memoryManager, 1, alignment); ASSERT_NE(nullptr, tagAllocator.getFreeTagsHead()); TagNode *tagNode1, *tagNode2; tagNode1 = tagAllocator.getTag(); ASSERT_NE(nullptr, tagNode1); tagNode2 = tagAllocator.getTag(); ASSERT_NE(nullptr, tagNode2); EXPECT_EQ(2u, tagAllocator.getGraphicsAllocationsCount()); EXPECT_EQ(2u, tagAllocator.getTagPoolCount()); EXPECT_NE(tagNode1->getBaseGraphicsAllocation(), tagNode2->getBaseGraphicsAllocation()); tagAllocator.returnTag(tagNode1); tagAllocator.returnTag(tagNode2); } TEST_F(TagAllocatorTest, CleanupResources) { // Big alignment to force only 1 tag size_t alignment = 4096; MockTagAllocator tagAllocator(memoryManager, 1, alignment); TagNode *tagNode1, *tagNode2; // Allocate first Pool tagNode1 = tagAllocator.getTag(); EXPECT_NE(nullptr, tagNode1); // Allocate second Pool tagNode2 = tagAllocator.getTag(); ASSERT_NE(nullptr, tagNode2); // Two pools should have different gfxAllocations EXPECT_NE(tagNode1->getBaseGraphicsAllocation(), tagNode2->getBaseGraphicsAllocation()); // Return tags tagAllocator.returnTag(tagNode1); tagAllocator.returnTag(tagNode2); // Should cleanup all resources tagAllocator.cleanUpResources(); EXPECT_EQ(0u, tagAllocator.getGraphicsAllocationsCount()); EXPECT_EQ(0u, tagAllocator.getTagPoolCount()); } TEST_F(TagAllocatorTest, whenNewTagIsTakenThenInitialize) { MockTagAllocator tagAllocator(memoryManager, 1, 2); tagAllocator.getFreeTagsHead()->tagForCpuAccess->start = 3; tagAllocator.getFreeTagsHead()->tagForCpuAccess->end = 4; auto node = tagAllocator.getTag(); EXPECT_EQ(1u, node->tagForCpuAccess->start); EXPECT_EQ(2u, node->tagForCpuAccess->end); } TEST_F(TagAllocatorTest, givenMultipleReferencesOnTagWhenReleasingThenReturnWhenAllRefCountsAreReleased) { MockTagAllocator tagAllocator(memoryManager, 2, 1); auto tag = tagAllocator.getTag(); EXPECT_NE(nullptr, tagAllocator.getUsedTagsHead()); tagAllocator.returnTag(tag); EXPECT_EQ(nullptr, tagAllocator.getUsedTagsHead()); // only 1 reference tag = tagAllocator.getTag(); tag->incRefCount(); EXPECT_NE(nullptr, tagAllocator.getUsedTagsHead()); tagAllocator.returnTag(tag); EXPECT_NE(nullptr, tagAllocator.getUsedTagsHead()); // 1 reference left tagAllocator.returnTag(tag); EXPECT_EQ(nullptr, tagAllocator.getUsedTagsHead()); } TEST_F(TagAllocatorTest, givenNotReadyTagWhenReturnedThenMoveToDeferredList) { MockTagAllocator tagAllocator(memoryManager, 1, 1); auto node = tagAllocator.getTag(); node->tagForCpuAccess->release = false; EXPECT_TRUE(tagAllocator.deferredTags.peekIsEmpty()); tagAllocator.returnTag(node); EXPECT_FALSE(tagAllocator.deferredTags.peekIsEmpty()); EXPECT_TRUE(tagAllocator.freeTags.peekIsEmpty()); } TEST_F(TagAllocatorTest, givenTagNodeWhenCompletionCheckIsDisabledThenStatusIsMarkedAsNotReady) { MockTagAllocator tagAllocator(memoryManager, 1, 1); EXPECT_FALSE(tagAllocator.doNotReleaseNodes); auto node = tagAllocator.getTag(); EXPECT_TRUE(node->canBeReleased()); node->setDoNotReleaseNodes(true); EXPECT_FALSE(node->canBeReleased()); tagAllocator.returnTag(node); EXPECT_FALSE(tagAllocator.deferredTags.peekIsEmpty()); EXPECT_TRUE(tagAllocator.freeTags.peekIsEmpty()); } TEST_F(TagAllocatorTest, givenTagAllocatorWhenDisabledCompletionCheckThenNodeInheritsItsState) { MockTagAllocator tagAllocator(memoryManager, 1, 1, true); EXPECT_TRUE(tagAllocator.doNotReleaseNodes); auto node = tagAllocator.getTag(); EXPECT_FALSE(node->canBeReleased()); node->setDoNotReleaseNodes(false); EXPECT_TRUE(node->canBeReleased()); tagAllocator.returnTag(node); EXPECT_TRUE(tagAllocator.deferredTags.peekIsEmpty()); EXPECT_FALSE(tagAllocator.freeTags.peekIsEmpty()); } TEST_F(TagAllocatorTest, givenReadyTagWhenReturnedThenMoveToFreeList) { MockTagAllocator tagAllocator(memoryManager, 1, 1); auto node = tagAllocator.getTag(); node->tagForCpuAccess->release = true; EXPECT_TRUE(tagAllocator.deferredTags.peekIsEmpty()); tagAllocator.returnTag(node); EXPECT_TRUE(tagAllocator.deferredTags.peekIsEmpty()); EXPECT_FALSE(tagAllocator.freeTags.peekIsEmpty()); } TEST_F(TagAllocatorTest, givenEmptyFreeListWhenAskingForNewTagThenTryToReleaseDeferredListFirst) { MockTagAllocator tagAllocator(memoryManager, 1, 1); auto node = tagAllocator.getTag(); node->tagForCpuAccess->release = false; tagAllocator.returnTag(node); node->tagForCpuAccess->release = false; EXPECT_TRUE(tagAllocator.freeTags.peekIsEmpty()); node = tagAllocator.getTag(); EXPECT_NE(nullptr, node); EXPECT_TRUE(tagAllocator.freeTags.peekIsEmpty()); // empty again - new pool wasnt allocated } TEST_F(TagAllocatorTest, givenTagsOnDeferredListWhenReleasingItThenMoveReadyTagsToFreePool) { MockTagAllocator tagAllocator(memoryManager, 2, 1); // pool with 2 tags auto node1 = tagAllocator.getTag(); auto node2 = tagAllocator.getTag(); node1->tagForCpuAccess->release = false; node2->tagForCpuAccess->release = false; tagAllocator.returnTag(node1); tagAllocator.returnTag(node2); tagAllocator.releaseDeferredTags(); EXPECT_FALSE(tagAllocator.deferredTags.peekIsEmpty()); EXPECT_TRUE(tagAllocator.freeTags.peekIsEmpty()); node1->tagForCpuAccess->release = true; tagAllocator.releaseDeferredTags(); EXPECT_FALSE(tagAllocator.deferredTags.peekIsEmpty()); EXPECT_FALSE(tagAllocator.freeTags.peekIsEmpty()); node2->tagForCpuAccess->release = true; tagAllocator.releaseDeferredTags(); EXPECT_TRUE(tagAllocator.deferredTags.peekIsEmpty()); EXPECT_FALSE(tagAllocator.freeTags.peekIsEmpty()); } TEST_F(TagAllocatorTest, givenTagAllocatorWhenGraphicsAllocationIsCreatedThenSetValidllocationType) { TagAllocator timestampPacketAllocator(0, memoryManager, 1, 1, sizeof(TimestampPacketStorage), false); TagAllocator hwTimeStampsAllocator(0, memoryManager, 1, 1, sizeof(HwTimeStamps), false); TagAllocator hwPerfCounterAllocator(0, memoryManager, 1, 1, sizeof(HwPerfCounter), false); auto timestampPacketTag = timestampPacketAllocator.getTag(); auto hwTimeStampsTag = hwTimeStampsAllocator.getTag(); auto hwPerfCounterTag = hwPerfCounterAllocator.getTag(); EXPECT_EQ(GraphicsAllocation::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, timestampPacketTag->getBaseGraphicsAllocation()->getAllocationType()); EXPECT_EQ(GraphicsAllocation::AllocationType::PROFILING_TAG_BUFFER, hwTimeStampsTag->getBaseGraphicsAllocation()->getAllocationType()); EXPECT_EQ(GraphicsAllocation::AllocationType::PROFILING_TAG_BUFFER, hwPerfCounterTag->getBaseGraphicsAllocation()->getAllocationType()); } compute-runtime-20.13.16352/opencl/test/unit_test/windows/000077500000000000000000000000001363734646600233245ustar00rootroot00000000000000compute-runtime-20.13.16352/opencl/test/unit_test/windows/CMakeLists.txt000066400000000000000000000040161363734646600260650ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) project(igdrcl_windows_dll_tests) set(NEO_IGDRCL_WINDOWS_DLL_TESTS_TARGET_OBJECTS $ $ $ ) add_executable(igdrcl_windows_dll_tests ${NEO_IGDRCL_WINDOWS_DLL_TESTS_TARGET_OBJECTS} ${NEO_SOURCE_DIR}/opencl/source/aub/aub_stream_interface.cpp ${NEO_SOURCE_DIR}/opencl/source/dll/create_command_stream.cpp ${NEO_SOURCE_DIR}/opencl/source/dll${BRANCH_DIR_SUFFIX}/get_devices.cpp ${NEO_SOURCE_DIR}/opencl/source/dll/windows/os_interface.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/wddm/wddm_create.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/ult_configuration.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/get_devices_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/os_interface_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm_create_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_mode.h ) target_link_libraries(igdrcl_windows_dll_tests ${NEO_MOCKABLE_LIB_NAME} ${NEO_SHARED_MOCKABLE_LIB_NAME} igdrcl_mocks gmock-gtest ${IGDRCL_EXTRA_LIBS}) target_include_directories(igdrcl_windows_dll_tests PRIVATE ${NEO_SHARED_TEST_DIRECTORY}/unit_test/test_macros${BRANCH_DIR_SUFFIX} ${NEO_SOURCE_DIR}/opencl/test/unit_test/gen_common${BRANCH_DIR_SUFFIX} ${CMAKE_CURRENT_SOURCE_DIR} ) create_project_source_tree(igdrcl_windows_dll_tests ${NEO_SOURCE_DIR}/runtime ${NEO_SOURCE_DIR}/unit_tests) add_custom_target(run_windows_dll_tests ALL DEPENDS unit_tests igdrcl_windows_dll_tests) add_custom_command( TARGET run_windows_dll_tests POST_BUILD COMMAND WORKING_DIRECTORY ${TargetDir} COMMAND echo Target Directory is: ${TargetDir} COMMAND echo Running Windows dll tests COMMAND igdrcl_windows_dll_tests ) add_dependencies(run_unit_tests run_windows_dll_tests) set_target_properties(igdrcl_windows_dll_tests PROPERTIES FOLDER ${OPENCL_TEST_PROJECTS_FOLDER}) set_target_properties(run_windows_dll_tests PROPERTIES FOLDER ${OPENCL_TEST_PROJECTS_FOLDER}) endif() compute-runtime-20.13.16352/opencl/test/unit_test/windows/get_devices_tests.cpp000066400000000000000000000026751363734646600275450ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/device_factory.h" #include "test.h" using namespace NEO; using PrepareDeviceEnvironmentsTests = ::testing::Test; HWTEST_F(PrepareDeviceEnvironmentsTests, WhenPrepareDeviceEnvironmentsIsCalledThenSuccessIsReturned) { ExecutionEnvironment executionEnvironment; auto returnValue = DeviceFactory::prepareDeviceEnvironments(executionEnvironment); EXPECT_EQ(true, returnValue); } HWTEST_F(PrepareDeviceEnvironmentsTests, whenPrepareDeviceEnvironmentsIsCalledThenGmmIsBeingInitializedAfterFillingHwInfo) { ExecutionEnvironment executionEnvironment; executionEnvironment.prepareRootDeviceEnvironments(1u); auto hwInfo = executionEnvironment.rootDeviceEnvironments[0]->getMutableHardwareInfo(); hwInfo->platform.eProductFamily = PRODUCT_FAMILY::IGFX_UNKNOWN; hwInfo->platform.ePCHProductFamily = PCH_PRODUCT_FAMILY::PCH_UNKNOWN; EXPECT_EQ(nullptr, executionEnvironment.rootDeviceEnvironments[0]->getGmmHelper()); auto returnValue = DeviceFactory::prepareDeviceEnvironments(executionEnvironment); EXPECT_TRUE(returnValue); EXPECT_NE(nullptr, executionEnvironment.rootDeviceEnvironments[0]->getGmmHelper()); } compute-runtime-20.13.16352/opencl/test/unit_test/windows/os_interface_tests.cpp000066400000000000000000000023211363734646600277110ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/memory_manager/memory_constants.h" #include "shared/source/os_interface/windows/os_interface.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "opencl/test/unit_test/mocks/mock_wddm.h" #include "test.h" using namespace NEO; TEST(osInterfaceTests, osInterfaceLocalMemoryEnabledByDefault) { EXPECT_TRUE(OSInterface::osEnableLocalMemory); } TEST(osInterfaceTests, whenOsInterfaceSetupGmmInputArgsThenProperAdapterBDFIsSet) { MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); auto wddm = new WddmMock(rootDeviceEnvironment); OSInterface osInterface; osInterface.get()->setWddm(wddm); wddm->init(); auto &adapterBDF = wddm->adapterBDF; adapterBDF.Bus = 0x12; adapterBDF.Device = 0x34; adapterBDF.Function = 0x56; GMM_INIT_IN_ARGS gmmInputArgs = {}; EXPECT_NE(0, memcmp(&adapterBDF, &gmmInputArgs.stAdapterBDF, sizeof(ADAPTER_BDF))); osInterface.setGmmInputArgs(&gmmInputArgs); EXPECT_EQ(0, memcmp(&adapterBDF, &gmmInputArgs.stAdapterBDF, sizeof(ADAPTER_BDF))); } compute-runtime-20.13.16352/opencl/test/unit_test/windows/test_mode.h000066400000000000000000000003761363734646600254660ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/test/unit_test/tests_configuration.h" namespace NEO { constexpr TestMode defaultTestMode = TestMode::UnitTests; } // namespace NEO compute-runtime-20.13.16352/opencl/test/unit_test/windows/wddm_create_tests.cpp000066400000000000000000000016651363734646600275400ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/windows/wddm/wddm.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "opencl/test/unit_test/mocks/mock_execution_environment.h" #include "test.h" #include using namespace NEO; TEST(wddmCreateTests, givenInputVersionWhenCreatingThenCreateRequestedObject) { MockExecutionEnvironment executionEnvironment; RootDeviceEnvironment rootDeviceEnvironment(executionEnvironment); auto hwDeviceIds = OSInterface::discoverDevices(executionEnvironment); std::unique_ptr wddm(Wddm::createWddm(std::move(hwDeviceIds[0]), rootDeviceEnvironment)); EXPECT_EQ(typeid(*wddm.get()), typeid(Wddm)); } compute-runtime-20.13.16352/os_release_info.cmake000066400000000000000000000135071363734646600215210ustar00rootroot00000000000000# # Copyright (C) 2018-2019 Intel Corporation # # SPDX-License-Identifier: MIT # if(NOT DEFINED _os_release_info) set(_os_release_info TRUE) # os_release_info.cmake - Function to dump OS name and version # This file has no dependencies on other files (e.g., functions or definitions) # of the local cmake environment. # Set cmake policies for at least this level: cmake_minimum_required(VERSION 2.8.12) # Function get_os_release_info - Determine and return OS name and version # # Args: # 1. the name of a variable to receive os_name # 2. the name of a variable to receive os_version # # Return values: (Quotation marks are always stripped). # Upon failure, return values are null strings. # # Examples: # os_name os_version # -------------- ------- # clear-linux-os 21180 (Changes twice daily) # ubuntu 12.04 16.04 17.10 18.04 # fedora 27 # centos 6.9 7.4.1708 # # Potential sources are tried (in order of preference) until a # suitable one is found. # Implementation documentation: # # The potential sources, in order, are as follows. # - /etc/centos-release # Centos 7 also has /etc/os-release. File /etc/os-release is less # precise about the Centos version (e.g., "7" instead of "7.4.1708"). # For that reason, this file is checked first. # Examples: # CentOS release 6.9 (Final) # CentOS Linux release 7.4.1708 (Core) # - /usr/lib/os-release # Present for Clear Linux, modern Fedora, and Ubuntu since some time # between 14.04 and 16.04. The ID and VERSION_ID values are used. # Examples: # ID=clear-linux-os VERSION_ID=21180 # ID=fedora VERSION_ID=27 # ID=ubuntu VERSION_ID="14.04" # ID=ubuntu VERSION_ID="16.04" # ID="ubuntu" VERSION_ID="17.10" # - /etc/os-release - Same form as (sometimes a link to) /usr/lib/os-release # ID="Ubuntu" VERSION_ID="12.04" # ID="Ubuntu" VERSION_ID="14.04" # with a symbolic link: /etc/os-release -> ../usr/lib/os-release # ID="CentOS Linux" VERSION_ID="7" Also: ID_LIKE="rhel fedora" # - /etc/lsb-release # For Centos, not too meaningful. # Other "OS"s are more reasonable: # DISTRIB_ID=Ubuntu DISTRIB_RELEASE=12.04 # DISTRIB_ID=Ubuntu DISTRIB_RELEASE=14.04 # DISTRIB_ID=Ubuntu DISTRIB_RELEASE=17.10 function(get_os_release_info _vn_id _vn_version_id _vn_codename) set(_var_id "") set(_var_version_id "") set(_var_codename "") if("${_var_id}" STREQUAL "") set(file_path "/etc/centos-release") if(EXISTS "${file_path}") # Example: CentOS release 6.9 (Final) file(STRINGS "${file_path}" file_list LIMIT_COUNT 1) list(GET file_list 0 file_line) # Remove all parenthesized items. string(REGEX REPLACE "\\([^)]+\\)" "" file_line "${file_line}") # Extract start and end, discard optional "version" or "release" string(REGEX MATCH "^([A-Za-z0-9_]+)( +(version|release))? +(.*)$" _dummy "${file_line}") # 1 2 3 4 set(_var_id "${CMAKE_MATCH_1}") set(_var_version_id "${CMAKE_MATCH_4}") endif() endif() if("${_var_id}" STREQUAL "") if(EXISTS "/usr/lib/os-release") set(file_path "/usr/lib/os-release") elseif(EXISTS "/etc/os-release") set(file_path "/etc/os-release") else() set(file_path "") endif() if(NOT "${file_path}" STREQUAL "") file(STRINGS "${file_path}" data_list REGEX "^(ID|VERSION_ID|VERSION_CODENAME)=") # Look for lines like "ID="..." and VERSION_ID="..." foreach(_var ${data_list}) if("${_var}" MATCHES "^(ID)=(.*)$") set(_var_id "${CMAKE_MATCH_2}") elseif("${_var}" MATCHES "^(VERSION_ID)=(.*)$") set(_var_version_id "${CMAKE_MATCH_2}") elseif("${_var}" MATCHES "^(VERSION_CODENAME)=(.*)$") set(_var_codename "${CMAKE_MATCH_2}") endif() endforeach() endif() endif() if("${_var_id}" STREQUAL "") set(file_path "/etc/lsb-release") if(EXISTS "${file_path}") file(STRINGS "${file_path}" data_list REGEX "^(DISTRIB_ID|DISTRIB_RELEASE|DISTRIB_CODENAME)=") # Look for lines like "DISTRIB_ID="..." and DISTRIB_RELEASE="..." foreach(_var ${data_list}) if("${_var}" MATCHES "^(DISTRIB_ID)=(.*)$") set(_var_id "${CMAKE_MATCH_2}") elseif("${_var}" MATCHES "^(DISTRIB_RELEASE)=(.*)$") set(_var_version_id "${CMAKE_MATCH_2}") elseif("${_var}" MATCHES "^(DISTRIB_CODENAME)=(.*)$") set(_var_codename "${CMAKE_MATCH_2}") endif() endforeach() endif() endif() string(TOLOWER "${_var_id}" "_var_id") string(STRIP "${_var_id}" _var_id) string(STRIP "${_var_version_id}" _var_version_id) string(STRIP "${_var_codename}" _var_codename) # Remove any enclosing quotation marks string(REGEX REPLACE "^\"(.*)\"$" "\\1" _var_id "${_var_id}") string(REGEX REPLACE "^\"(.*)\"$" "\\1" _var_version_id "${_var_version_id}") string(REGEX REPLACE "^\"(.*)\"$" "\\1" _var_codename "${_var_codename}") if(NOT "${_vn_id}" STREQUAL "") set(${_vn_id} "${_var_id}" PARENT_SCOPE) endif() if(NOT "${_vn_version_id}" STREQUAL "") set(${_vn_version_id} "${_var_version_id}" PARENT_SCOPE) endif() if(NOT "${_vn_codename}" STREQUAL "") set(${_vn_codename} "${_var_codename}" PARENT_SCOPE) endif() endfunction() endif(NOT DEFINED _os_release_info) compute-runtime-20.13.16352/package.cmake000066400000000000000000000211711363734646600177540ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(UNIX) set(package_input_dir ${NEO_BINARY_DIR}/packageinput) set(package_output_dir ${NEO_BINARY_DIR}/packages) if(NEO_BUILD_OCL_PACKAGE AND NEO_BUILD_L0_PACKAGE) message(FATAL_ERROR "OpenCL and LevelZero packages cannot be created simultaneously") endif() if(NOT DEFINED NEO_OCL_VERSION_MAJOR) set(NEO_OCL_VERSION_MAJOR 1) endif() if(NOT DEFINED NEO_OCL_VERSION_MINOR) set(NEO_OCL_VERSION_MINOR 0) endif() if(NOT DEFINED NEO_VERSION_BUILD) set(NEO_VERSION_BUILD 0) endif() include("os_release_info.cmake") get_os_release_info(os_name os_version os_codename) if(NOT DEFINED OCL_ICD_VENDORDIR) if("${os_name}" STREQUAL "clear-linux-os") # clear-linux-os distribution avoids /etc for distribution defaults. set(OCL_ICD_VENDORDIR "/usr/share/defaults/etc/OpenCL/vendors") else() set(OCL_ICD_VENDORDIR "/etc/OpenCL/vendors") endif() endif() set(OCL_ICD_RUNTIME_NAME ${CMAKE_SHARED_LIBRARY_PREFIX}${NEO_DLL_NAME_BASE}${CMAKE_SHARED_LIBRARY_SUFFIX}) install( CODE "file( WRITE ${NEO_BINARY_DIR}/intel.icd \"${CMAKE_INSTALL_FULL_LIBDIR}/intel-opencl/${OCL_ICD_RUNTIME_NAME}\n\" )" CODE "file( WRITE ${NEO_BINARY_DIR}/tmp/postinst \"/sbin/ldconfig\n\" )" CODE "file( WRITE ${NEO_BINARY_DIR}/tmp/postrm \"/sbin/ldconfig\n\" )" CODE "file( COPY ${NEO_BINARY_DIR}/tmp/postinst DESTINATION ${NEO_BINARY_DIR} FILE_PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE )" CODE "file( COPY ${NEO_BINARY_DIR}/tmp/postrm DESTINATION ${NEO_BINARY_DIR} FILE_PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE )" COMPONENT opencl ) install(FILES ${NEO_BINARY_DIR}/intel.icd DESTINATION ${OCL_ICD_VENDORDIR} COMPONENT opencl) if(NEO_CPACK_GENERATOR) set(CPACK_GENERATOR "${NEO_CPACK_GENERATOR}") else() # If generators list was not define build native package for current distro if(EXISTS "/etc/debian_version") set(CPACK_GENERATOR "DEB") elseif(EXISTS "/etc/redhat-release") set(CPACK_GENERATOR "RPM") else() set(CPACK_GENERATOR "TXZ") endif() endif() set(CPACK_SET_DESTDIR TRUE) set(CPACK_PACKAGE_ARCHITECTURE "x86_64") set(CPACK_PACKAGE_RELOCATABLE FALSE) set(CPACK_PACKAGE_NAME "intel") set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Intel(R) Graphics Compute Runtime") set(CPACK_PACKAGE_VENDOR "Intel") set(CPACK_PACKAGE_VERSION_MAJOR ${NEO_OCL_VERSION_MAJOR}) set(CPACK_PACKAGE_VERSION_MINOR ${NEO_OCL_VERSION_MINOR}) set(CPACK_PACKAGE_VERSION_PATCH ${NEO_VERSION_BUILD}) set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "amd64") set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "postinst;postrm") set(CPACK_DEBIAN_PACKAGE_HOMEPAGE "http://01.org/compute-runtime") set(CPACK_DEBIAN_PACKAGE_SHLIBDEPS ON) set(CPACK_RPM_COMPRESSION_TYPE "xz") set(CPACK_RPM_PACKAGE_ARCHITECTURE "x86_64") set(CPACK_RPM_PACKAGE_AUTOREQ OFF) set(CPACK_RPM_PACKAGE_DESCRIPTION "Intel OpenCL GPU driver") set(CPACK_RPM_PACKAGE_GROUP "System Environment/Libraries") set(CPACK_RPM_PACKAGE_LICENSE "MIT") set(CPACK_RPM_PACKAGE_RELEASE 1) set(CPACK_RPM_PACKAGE_RELEASE_DIST ON) set(CPACK_RPM_PACKAGE_URL "http://01.org/compute-runtime") set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${NEO_BINARY_DIR}/postinst") set(CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${NEO_BINARY_DIR}/postrm") set(CPACK_PACKAGE_INSTALL_DIRECTORY ${CMAKE_INSTALL_PREFIX}) set(CPACK_PACKAGE_CONTACT "Intel Corporation") set(CPACK_DEB_COMPONENT_INSTALL ON) set(CPACK_RPM_COMPONENT_INSTALL ON) set(CPACK_ARCHIVE_COMPONENT_INSTALL ON) if(NEO_BUILD_OCL_PACKAGE) get_property(CPACK_COMPONENTS_ALL GLOBAL PROPERTY NEO_OCL_COMPONENTS_LIST) endif() if(NEO_BUILD_L0_PACKAGE) set(CPACK_PACKAGE_VERSION_MAJOR ${NEO_L0_VERSION_MAJOR}) set(CPACK_PACKAGE_VERSION_MINOR ${NEO_L0_VERSION_MINOR}) get_property(CPACK_COMPONENTS_ALL GLOBAL PROPERTY NEO_L0_COMPONENTS_LIST) endif() set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION /etc/ld.so.conf.d /usr/local /usr/local/lib64 /usr/local/bin ) if(CMAKE_VERSION VERSION_GREATER 3.6 OR CMAKE_VERSION VERSION_EQUAL 3.6) set(CPACK_DEBIAN_OPENCL_FILE_NAME "intel-opencl_${NEO_OCL_VERSION_MAJOR}.${NEO_OCL_VERSION_MINOR}.${NEO_VERSION_BUILD}-1~${os_codename}_${CPACK_DEBIAN_PACKAGE_ARCHITECTURE}.deb") set(CPACK_DEBIAN_OCLOC_FILE_NAME "intel-opencl-ocloc_${NEO_OCL_VERSION_MAJOR}.${NEO_OCL_VERSION_MINOR}.${NEO_VERSION_BUILD}-1~${os_codename}_${CPACK_DEBIAN_PACKAGE_ARCHITECTURE}.deb") set(CPACK_DEBIAN_LEVEL-ZERO-GPU_FILE_NAME "intel-level-zero-gpu_${NEO_L0_VERSION_MAJOR}.${NEO_L0_VERSION_MINOR}.${NEO_VERSION_BUILD}-1~${os_codename}_${CPACK_DEBIAN_PACKAGE_ARCHITECTURE}.deb") set(CPACK_RPM_OPENCL_FILE_NAME "intel-opencl-${NEO_OCL_VERSION_MAJOR}.${NEO_OCL_VERSION_MINOR}.${NEO_VERSION_BUILD}-${CPACK_RPM_PACKAGE_RELEASE}%{?dist}.${CPACK_RPM_PACKAGE_ARCHITECTURE}.rpm") set(CPACK_RPM_OCLOC_FILE_NAME "intel-opencl-ocloc-${NEO_OCL_VERSION_MAJOR}.${NEO_OCL_VERSION_MINOR}.${NEO_VERSION_BUILD}-${CPACK_RPM_PACKAGE_RELEASE}%{?dist}.${CPACK_RPM_PACKAGE_ARCHITECTURE}.rpm") set(CPACK_RPM_LEVEL-ZERO-GPU_FILE_NAME "intel-level-zero-gpu-${NEO_L0_VERSION_MAJOR}.${NEO_L0_VERSION_MINOR}.${NEO_VERSION_BUILD}-${CPACK_RPM_PACKAGE_RELEASE}%{?dist}.${CPACK_RPM_PACKAGE_ARCHITECTURE}.rpm") set(CPACK_ARCHIVE_OPENCL_FILE_NAME "intel-opencl-${NEO_OCL_VERSION_MAJOR}.${NEO_OCL_VERSION_MINOR}.${NEO_VERSION_BUILD}-${os_codename}-${CPACK_PACKAGE_ARCHITECTURE}") set(CPACK_ARCHIVE_OCLOC_FILE_NAME "intel-opencl-ocloc-${NEO_OCL_VERSION_MAJOR}.${NEO_OCL_VERSION_MINOR}.${NEO_VERSION_BUILD}-${os_codename}-${CPACK_PACKAGE_ARCHITECTURE}") set(CPACK_ARCHIVE_LEVEL-ZERO-GPU_FILE_NAME "intel-level-zero-gpu-${NEO_L0_VERSION_MAJOR}.${NEO_L0_VERSION_MINOR}.${NEO_VERSION_BUILD}-${os_codename}_${CPACK_PACKAGE_ARCHITECTURE}") else() if(CPACK_GENERATOR STREQUAL "DEB") set(CPACK_PACKAGE_FILE_NAME "intel-compute-runtime_${NEO_OCL_VERSION_MAJOR}.${NEO_OCL_VERSION_MINOR}.${NEO_VERSION_BUILD}_${CPACK_DEBIAN_PACKAGE_ARCHITECTURE}") elseif(CPACK_GENERATOR STREQUAL "RPM") set(CPACK_PACKAGE_FILE_NAME "intel-compute-runtime-${NEO_OCL_VERSION_MAJOR}.${NEO_OCL_VERSION_MINOR}.${NEO_VERSION_BUILD}-${CPACK_RPM_PACKAGE_RELEASE}%{?dist}.${CPACK_RPM_PACKAGE_ARCHITECTURE}.rpm") else() set(CPACK_PACKAGE_FILE_NAME "intel-compute-runtime-${NEO_OCL_VERSION_MAJOR}.${NEO_OCL_VERSION_MINOR}.${NEO_VERSION_BUILD}-${CPACK_PACKAGE_ARCHITECTURE}") endif() endif() if(NEO__GMM_FOUND) list(APPEND _external_package_dependencies_debian "intel-gmmlib(=${NEO__GMM_VERSION})") list(APPEND _external_package_dependencies_rpm "intel-gmmlib = ${NEO__GMM_VERSION}") else() list(APPEND _external_package_dependencies_debian "intel-gmmlib") list(APPEND _external_package_dependencies_rpm "intel-gmmlib") endif() if(NEO__IGC_FOUND) list(APPEND _external_package_dependencies_debian "intel-igc-opencl(=${NEO__IGC_VERSION})") list(APPEND _external_package_dependencies_rpm "intel-igc-opencl = ${NEO__IGC_VERSION}") list(APPEND _igc_package_dependencies_debian "intel-igc-opencl(=${NEO__IGC_VERSION})") list(APPEND _igc_package_dependencies_rpm "intel-igc-opencl = ${NEO__IGC_VERSION}") else() list(APPEND _external_package_dependencies_debian "intel-igc-opencl") list(APPEND _external_package_dependencies_rpm "intel-igc-opencl") list(APPEND _igc_package_dependencies_debian "intel-igc-opencl") list(APPEND _igc_package_dependencies_rpm "intel-igc-opencl") endif() string(REPLACE ";" ", " CPACK_DEBIAN_OPENCL_PACKAGE_DEPENDS "${_external_package_dependencies_debian}") string(REPLACE ";" ", " CPACK_DEBIAN_OCLOC_PACKAGE_DEPENDS "${_igc_package_dependencies_debian}") string(REPLACE ";" ", " CPACK_DEBIAN_LEVEL-ZERO-GPU_PACKAGE_DEPENDS "${_external_package_dependencies_debian}") string(REPLACE ";" ", " CPACK_RPM_OPENCL_PACKAGE_REQUIRES "${_external_package_dependencies_rpm}") string(REPLACE ";" ", " CPACK_RPM_OCLOC_PACKAGE_REQUIRES "${_igc_package_dependencies_rpm}") string(REPLACE ";" ", " CPACK_RPM_LEVEL-ZERO-GPU_PACKAGE_REQUIRES "${_external_package_dependencies_rpm}") set(CPACK_DEBIAN_LEVEL-ZERO-GPU_PACKAGE_RECOMMENDS "level-zero") set(CPACK_DEBIAN_LEVEL-ZERO-GPU_PACKAGE_SUGGESTS "level-zero") set(CPACK_RPM_LEVEL-ZERO-GPU_PACKAGE_SUGGESTS "level-zero") set(CPACK_PROPERTIES_FILE "${CMAKE_CURRENT_SOURCE_DIR}/package_config.cmake") set(CPACK_LD_LIBRARY_PATH "${NEO__GMM_LIBRARY_PATH}") include(CPack) get_directory_property(__HAS_PARENT PARENT_DIRECTORY) if(__HAS_PARENT) set(NEO__COMPONENT_NAME "opencl" PARENT_SCOPE) endif() endif(UNIX) compute-runtime-20.13.16352/package_config.cmake000066400000000000000000000002001363734646600212670ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(ENV{LD_LIBRARY_PATH} ${CPACK_LD_LIBRARY_PATH}) compute-runtime-20.13.16352/platforms.cmake000066400000000000000000000320141363734646600203660ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(MAX_GEN 64) set(ALL_GEN_TYPES "") list(APPEND ALL_GEN_TYPES "GEN8") list(APPEND ALL_GEN_TYPES "GEN9") list(APPEND ALL_GEN_TYPES "GEN11") list(APPEND ALL_GEN_TYPES "GEN12LP") set(ALL_GEN_TYPES_REVERSED ${ALL_GEN_TYPES}) list(REVERSE ALL_GEN_TYPES_REVERSED) macro(FIND_IDX_FOR_GEN_TYPE GEN_TYPE GEN_IDX) list(FIND ALL_GEN_TYPES "${GEN_TYPE}" GEN_IDX) if(${GEN_IDX} EQUAL -1) message(FATAL_ERROR "No ${GEN_TYPE} allowed, exiting") endif() endmacro() macro(INIT_LIST LIST_TYPE ELEMENT_TYPE) foreach(IT RANGE 0 ${MAX_GEN} 1) list(APPEND ALL_${ELEMENT_TYPE}_${LIST_TYPE} " ") endforeach() endmacro() macro(GET_LIST_FOR_GEN LIST_TYPE ELEMENT_TYPE GEN_IDX OUT_LIST) list(GET ALL_${ELEMENT_TYPE}_${LIST_TYPE} ${GEN_IDX} GEN_X_${LIST_TYPE}) string(REPLACE "_" ";" ${OUT_LIST} ${GEN_X_${LIST_TYPE}}) endmacro() macro(ADD_ITEM_FOR_GEN LIST_TYPE ELEMENT_TYPE GEN_TYPE ITEM) FIND_IDX_FOR_GEN_TYPE(${GEN_TYPE} GEN_IDX) list(GET ALL_${ELEMENT_TYPE}_${LIST_TYPE} ${GEN_IDX} GEN_X_LIST) string(REPLACE " " "" GEN_X_LIST ${GEN_X_LIST}) if("${GEN_X_LIST}" STREQUAL "") set(GEN_X_LIST "${ITEM}") else() set(GEN_X_LIST "${GEN_X_LIST}_${ITEM}") endif() list(REMOVE_AT ALL_${ELEMENT_TYPE}_${LIST_TYPE} ${GEN_IDX}) list(INSERT ALL_${ELEMENT_TYPE}_${LIST_TYPE} ${GEN_IDX} ${GEN_X_LIST}) endmacro() macro(GEN_CONTAINS_PLATFORMS TYPE GEN_TYPE OUT_FLAG) FIND_IDX_FOR_GEN_TYPE(${GEN_TYPE} GEN_IDX) GET_LIST_FOR_GEN("PLATFORMS" ${TYPE} ${GEN_IDX} GEN_X_PLATFORMS) string(REPLACE " " "" GEN_X_PLATFORMS ${GEN_X_PLATFORMS}) if("${GEN_X_PLATFORMS}" STREQUAL "") set(${OUT_FLAG} FALSE) else() set(${OUT_FLAG} TRUE) endif() endmacro() macro(INIT_PRODUCTS_LIST TYPE) list(APPEND ALL_${TYPE}_PRODUCT_FAMILY " ") list(APPEND ALL_${TYPE}_PRODUCT_TO_PRODUCT_FAMILY " ") endmacro() macro(ADD_PRODUCT TYPE PRODUCT ITEM) list(APPEND ALL_${TYPE}_PRODUCT_FAMILY ${ITEM}) list(APPEND ALL_${TYPE}_PRODUCT_TO_PRODUCT_FAMILY ${PRODUCT}) endmacro() macro(GET_AVAILABLE_PRODUCTS TYPE PRODUCT_FAMILY_LIST DEFAULT_PRODUCT_FAMILY) list(REMOVE_ITEM ALL_${TYPE}_PRODUCT_FAMILY " ") list(REMOVE_ITEM ALL_${TYPE}_PRODUCT_TO_PRODUCT_FAMILY " ") set(${PRODUCT_FAMILY_LIST} ${ALL_${TYPE}_PRODUCT_FAMILY}) set(${DEFAULT_PRODUCT_FAMILY}) if(NOT "${DEFAULT_${TYPE}_PLATFORM}" STREQUAL "") list(FIND ALL_${TYPE}_PRODUCT_TO_PRODUCT_FAMILY ${DEFAULT_${TYPE}_PLATFORM} INDEX) if(${INDEX} EQUAL -1) message(FATAL_ERROR "${DEFAULT_${TYPE}_PLATFORM} not found in product families.") endif() list(GET ALL_${TYPE}_PRODUCT_FAMILY ${INDEX} DEFAULT) set(${DEFAULT_PRODUCT_FAMILY} ${DEFAULT}) endif() endmacro() macro(GET_AVAILABLE_PLATFORMS TYPE FLAG_NAME OUT_STR) set(${TYPE}_PLATFORM_LIST) set(${TYPE}_GEN_FLAGS_DEFINITONS) if(NOT DEFAULT_${TYPE}_PLATFORM AND DEFINED PREFERRED_PLATFORM AND ${FLAG_NAME}_${PREFERRED_PLATFORM}) set(DEFAULT_${TYPE}_PLATFORM ${PREFERRED_PLATFORM}) endif() foreach(GEN_TYPE ${ALL_GEN_TYPES_REVERSED}) GEN_CONTAINS_PLATFORMS(${TYPE} ${GEN_TYPE} GENX_HAS_PLATFORMS) if(${GENX_HAS_PLATFORMS}) FIND_IDX_FOR_GEN_TYPE(${GEN_TYPE} GEN_IDX) list(APPEND ${TYPE}_GEN_FLAGS_DEFINITONS ${FLAG_NAME}_${GEN_TYPE}) GET_LIST_FOR_GEN("PLATFORMS" ${TYPE} ${GEN_IDX} ${TYPE}_GENX_PLATFORMS) list(APPEND ${TYPE}_PLATFORM_LIST ${${TYPE}_GENX_PLATFORMS}) if(NOT DEFAULT_${TYPE}_PLATFORM) list(GET ${TYPE}_PLATFORM_LIST 0 DEFAULT_${TYPE}_PLATFORM ${PLATFORM_IT}) endif() if(NOT DEFAULT_${TYPE}_${GEN_TYPE}_PLATFORM) list(GET ${TYPE}_GENX_PLATFORMS 0 DEFAULT_${TYPE}_${GEN_TYPE}_PLATFORM) endif() endif() endforeach() foreach(PLATFORM_IT ${${TYPE}_PLATFORM_LIST}) set(${OUT_STR} "${${OUT_STR}} ${PLATFORM_IT}") list(APPEND ${TYPE}_GEN_FLAGS_DEFINITONS ${FLAG_NAME}_${PLATFORM_IT}) endforeach() endmacro() macro(GET_PLATFORMS_FOR_GEN TYPE GEN_TYPE OUT_LIST) FIND_IDX_FOR_GEN_TYPE(${GEN_TYPE} GEN_IDX) GET_LIST_FOR_GEN("PLATFORMS" ${TYPE} ${GEN_IDX} ${OUT_LIST}) endmacro() macro(PLATFORM_HAS_2_0 GEN_TYPE PLATFORM_NAME OUT_FLAG) FIND_IDX_FOR_GEN_TYPE(${GEN_TYPE} GEN_IDX) GET_LIST_FOR_GEN("PLATFORMS" "SUPPORTED_2_0" ${GEN_IDX} GEN_X_PLATFORMS) list(FIND GEN_X_PLATFORMS ${PLATFORM_NAME} PLATFORM_EXISTS) if("${PLATFORM_EXISTS}" LESS 0) set(${OUT_FLAG} FALSE) else() set(${OUT_FLAG} TRUE) endif() endmacro() macro(PLATFORM_HAS_VME GEN_TYPE PLATFORM_NAME OUT_FLAG) FIND_IDX_FOR_GEN_TYPE(${GEN_TYPE} GEN_IDX) GET_LIST_FOR_GEN("PLATFORMS" "SUPPORTED_VME" ${GEN_IDX} GEN_X_PLATFORMS) list(FIND GEN_X_PLATFORMS ${PLATFORM_NAME} PLATFORM_EXISTS) if("${PLATFORM_EXISTS}" LESS 0) set(${OUT_FLAG} FALSE) else() set(${OUT_FLAG} TRUE) endif() endmacro() # default flag for GenX devices support set(SUPPORT_GEN_DEFAULT TRUE CACHE BOOL "default value for SUPPORT_GENx") # default flag for platform support set(SUPPORT_PLATFORM_DEFAULT TRUE CACHE BOOL "default value for support platform") # Define the hardware configurations we support and test macro(SET_FLAGS_FOR GEN_TYPE) set(SUPPORT_${GEN_TYPE} ${SUPPORT_GEN_DEFAULT} CACHE BOOL "Support ${GEN_TYPE} devices") set(TESTS_${GEN_TYPE} ${SUPPORT_${GEN_TYPE}} CACHE BOOL "Build ULTs for ${GEN_TYPE} devices") set(SUPPORT_DEVICE_ENQUEUE_${GEN_TYPE} TRUE CACHE BOOL "Support ${GEN_TYPE} for device side enqueue") if(NOT SUPPORT_${GEN_TYPE} OR SKIP_UNIT_TESTS) set(TESTS_${GEN_TYPE} FALSE) endif() if(SUPPORT_${GEN_TYPE}) foreach(${GEN_TYPE}_PLATFORM ${ARGN}) set(SUPPORT_${${GEN_TYPE}_PLATFORM} ${SUPPORT_PLATFORM_DEFAULT} CACHE BOOL "Support ${${GEN_TYPE}_PLATFORM}") if(TESTS_${GEN_TYPE}) set(TESTS_${${GEN_TYPE}_PLATFORM} ${SUPPORT_${${GEN_TYPE}_PLATFORM}} CACHE BOOL "Build ULTs for ${${GEN_TYPE}_PLATFORM}") endif() if(NOT SUPPORT_${${GEN_TYPE}_PLATFORM} OR NOT TESTS_${GEN_TYPE} OR SKIP_UNIT_TESTS) set(TESTS_${${GEN_TYPE}_PLATFORM} FALSE) endif() endforeach() endif() endmacro() macro(ADD_PLATFORM_FOR_GEN LIST_TYPE GEN_TYPE PLATFORM_NAME PLATFORM_TYPE) list(APPEND PLATFORM_TYPES ${PLATFORM_TYPE}) list(REMOVE_DUPLICATES PLATFORM_TYPES) ADD_ITEM_FOR_GEN("PLATFORMS" ${LIST_TYPE} ${GEN_TYPE} ${PLATFORM_NAME}) set(${GEN_TYPE}_HAS_${PLATFORM_TYPE} TRUE) set(${PLATFORM_NAME}_IS_${PLATFORM_TYPE} TRUE) if(NOT DEFAULT_${LIST_TYPE}_${GEN_TYPE}_${PLATFORM_TYPE}_PLATFORM) string(TOLOWER ${PLATFORM_NAME} DEFAULT_${LIST_TYPE}_${GEN_TYPE}_${PLATFORM_TYPE}_PLATFORM) endif() endmacro() SET_FLAGS_FOR("GEN8" "BDW") SET_FLAGS_FOR("GEN9" "SKL" "KBL" "BXT" "GLK" "CFL") SET_FLAGS_FOR("GEN11" "ICLLP" "LKF" "EHL") SET_FLAGS_FOR("GEN12LP" "TGLLP") # Init lists INIT_LIST("FAMILY_NAME" "TESTED") INIT_LIST("PLATFORMS" "SUPPORTED") INIT_LIST("PLATFORMS" "SUPPORTED_2_0") INIT_LIST("PLATFORMS" "SUPPORTED_VME") INIT_LIST("PLATFORMS" "TESTED") INIT_PRODUCTS_LIST("TESTED") INIT_PRODUCTS_LIST("SUPPORTED") # Add supported and tested platforms if(SUPPORT_GEN8) if(TESTS_GEN8) ADD_ITEM_FOR_GEN("FAMILY_NAME" "TESTED" "GEN8" "BDWFamily") endif() if(SUPPORT_BDW) ADD_PRODUCT("SUPPORTED" "BDW" "IGFX_BROADWELL") ADD_PLATFORM_FOR_GEN("SUPPORTED" "GEN8" "BDW" "CORE") ADD_PLATFORM_FOR_GEN("SUPPORTED_2_0" "GEN8" "BDW" "CORE") if(TESTS_BDW) ADD_ITEM_FOR_GEN("PLATFORMS" "TESTED" "GEN8" "BDW") ADD_PRODUCT("TESTED" "BDW" "IGFX_BROADWELL") endif() endif() endif() if(SUPPORT_GEN9) if(TESTS_GEN9) ADD_ITEM_FOR_GEN("FAMILY_NAME" "TESTED" "GEN9" "SKLFamily") endif() if(SUPPORT_SKL) ADD_PRODUCT("SUPPORTED" "SKL" "IGFX_SKYLAKE") ADD_PLATFORM_FOR_GEN("SUPPORTED" "GEN9" "SKL" "CORE") ADD_PLATFORM_FOR_GEN("SUPPORTED_2_0" "GEN9" "SKL" "CORE") ADD_PLATFORM_FOR_GEN("SUPPORTED_VME" "GEN9" "SKL" "CORE") set(PREFERRED_PLATFORM "SKL") if(TESTS_SKL) set(PREFERRED_FAMILY_NAME "SKLFamily") ADD_ITEM_FOR_GEN("PLATFORMS" "TESTED" "GEN9" "SKL") ADD_PRODUCT("TESTED" "SKL" "IGFX_SKYLAKE") endif() endif() if(SUPPORT_KBL) ADD_PRODUCT("SUPPORTED" "KBL" "IGFX_KABYLAKE") ADD_PLATFORM_FOR_GEN("SUPPORTED" "GEN9" "KBL" "CORE") ADD_PLATFORM_FOR_GEN("SUPPORTED_2_0" "GEN9" "KBL" "CORE") ADD_PLATFORM_FOR_GEN("SUPPORTED_VME" "GEN9" "KBL" "CORE") if(TESTS_KBL) ADD_ITEM_FOR_GEN("PLATFORMS" "TESTED" "GEN9" "KBL") ADD_PRODUCT("TESTED" "KBL" "IGFX_KABYLAKE") endif() endif() if(SUPPORT_GLK) ADD_PRODUCT("SUPPORTED" "GLK" "IGFX_GEMINILAKE") ADD_PLATFORM_FOR_GEN("SUPPORTED" "GEN9" "GLK" "LP") ADD_PLATFORM_FOR_GEN("SUPPORTED_VME" "GEN9" "GLK" "LP") if(TESTS_GLK) ADD_ITEM_FOR_GEN("PLATFORMS" "TESTED" "GEN9" "GLK") ADD_PRODUCT("TESTED" "GLK" "IGFX_GEMINILAKE") endif() endif() if(SUPPORT_CFL) ADD_PRODUCT("SUPPORTED" "CFL" "IGFX_COFFEELAKE") ADD_PLATFORM_FOR_GEN("SUPPORTED" "GEN9" "CFL" "CORE") ADD_ITEM_FOR_GEN("PLATFORMS" "SUPPORTED_2_0" "GEN9" "CFL") ADD_ITEM_FOR_GEN("PLATFORMS" "SUPPORTED_VME" "GEN9" "CFL") if(TESTS_CFL) ADD_ITEM_FOR_GEN("PLATFORMS" "TESTED" "GEN9" "CFL") ADD_PRODUCT("TESTED" "CFL" "IGFX_COFFEELAKE") endif() endif() if(SUPPORT_BXT) ADD_PRODUCT("SUPPORTED" "BXT" "IGFX_BROXTON") ADD_PLATFORM_FOR_GEN("SUPPORTED" "GEN9" "BXT" "LP") ADD_PLATFORM_FOR_GEN("SUPPORTED_VME" "GEN9" "BXT" "LP") if(TESTS_BXT) ADD_ITEM_FOR_GEN("PLATFORMS" "TESTED" "GEN9" "BXT") ADD_PRODUCT("TESTED" "BXT" "IGFX_BROXTON") endif() endif() endif() if(SUPPORT_GEN11) if(TESTS_GEN11) ADD_ITEM_FOR_GEN("FAMILY_NAME" "TESTED" "GEN11" "ICLFamily") endif() if(SUPPORT_ICLLP) ADD_PRODUCT("SUPPORTED" "ICLLP" "IGFX_ICELAKE_LP") ADD_PLATFORM_FOR_GEN("SUPPORTED" "GEN11" "ICLLP" "LP") ADD_PLATFORM_FOR_GEN("SUPPORTED_2_0" "GEN11" "ICLLP" "LP") ADD_PLATFORM_FOR_GEN("SUPPORTED_VME" "GEN11" "ICLLP" "LP") if(TESTS_ICLLP) ADD_ITEM_FOR_GEN("PLATFORMS" "TESTED" "GEN11" "ICLLP") ADD_PRODUCT("TESTED" "ICLLP" "IGFX_ICELAKE_LP") endif() endif() if(SUPPORT_LKF) ADD_PRODUCT("SUPPORTED" "LKF" "IGFX_LAKEFIELD") ADD_PLATFORM_FOR_GEN("SUPPORTED" "GEN11" "LKF" "LP") if(TESTS_LKF) ADD_ITEM_FOR_GEN("PLATFORMS" "TESTED" "GEN11" "LKF") ADD_PRODUCT("TESTED" "LKF" "IGFX_LAKEFIELD") endif() endif() if(SUPPORT_EHL) ADD_PRODUCT("SUPPORTED" "EHL" "IGFX_ELKHARTLAKE") ADD_PLATFORM_FOR_GEN("SUPPORTED" "GEN11" "EHL" "LP") if(TESTS_EHL) ADD_ITEM_FOR_GEN("PLATFORMS" "TESTED" "GEN11" "EHL") ADD_PRODUCT("TESTED" "EHL" "IGFX_ELKHARTLAKE") endif() endif() endif() if(SUPPORT_GEN12LP) if(TESTS_GEN12LP) ADD_ITEM_FOR_GEN("FAMILY_NAME" "TESTED" "GEN12LP" "TGLLPFamily") endif() if(SUPPORT_TGLLP) ADD_PRODUCT("SUPPORTED" "TGLLP" "IGFX_TIGERLAKE_LP") ADD_PLATFORM_FOR_GEN("SUPPORTED" "GEN12LP" "TGLLP" "LP") ADD_PLATFORM_FOR_GEN("SUPPORTED_2_0" "GEN12LP" "TGLLP" "LP") if(TESTS_TGLLP) ADD_ITEM_FOR_GEN("PLATFORMS" "TESTED" "GEN12LP" "TGLLP") ADD_PRODUCT("TESTED" "TGLLP" "IGFX_TIGERLAKE_LP") endif() endif() endif() # Get platform lists, flag definition and set default platforms GET_AVAILABLE_PLATFORMS("SUPPORTED" "SUPPORT" ALL_AVAILABLE_SUPPORTED_PLATFORMS) GET_AVAILABLE_PLATFORMS("TESTED" "TESTS" ALL_AVAILABLE_TESTED_PLATFORMS) GET_AVAILABLE_PRODUCTS("TESTED" ALL_PRODUCT_FAMILY_LIST DEFAULT_TESTED_PRODUCT_FAMILY) GET_AVAILABLE_PRODUCTS("SUPPORTED" ALL_PRODUCT_FAMILY_LIST DEFAULT_SUPPORTED_PRODUCT_FAMILY) message(STATUS "All supported platforms: ${ALL_AVAILABLE_SUPPORTED_PLATFORMS}") message(STATUS "All tested platforms: ${ALL_AVAILABLE_TESTED_PLATFORMS}") message(STATUS "Default supported platform: ${DEFAULT_SUPPORTED_PLATFORM}") message(STATUS "All tested product families: ${ALL_TESTED_PRODUCT_FAMILY}") message(STATUS "All supported product families: ${ALL_SUPPORTED_PRODUCT_FAMILY}") message(STATUS "Default tested product family: ${DEFAULT_TESTED_PRODUCT_FAMILY}") list(FIND SUPPORTED_PLATFORM_LIST ${DEFAULT_SUPPORTED_PLATFORM} VALID_DEFAULT_SUPPORTED_PLATFORM) if(VALID_DEFAULT_SUPPORTED_PLATFORM LESS 0) message(FATAL_ERROR "Not a valid supported platform: ${DEFAULT_SUPPORTED_PLATFORM}") endif() message(STATUS "Default tested platform: ${DEFAULT_TESTED_PLATFORM}") if(DEFAULT_TESTED_PLATFORM) list(FIND TESTED_PLATFORM_LIST ${DEFAULT_TESTED_PLATFORM} VALID_DEFAULT_TESTED_PLATFORM) if(VALID_DEFAULT_TESTED_PLATFORM LESS 0) message(FATAL_ERROR "Not a valid tested platform: ${DEFAULT_TESTED_PLATFORM}") endif() endif() if(NOT DEFAULT_TESTED_FAMILY_NAME) if(DEFINED PREFERRED_FAMILY_NAME) list(FIND ALL_TESTED_FAMILY_NAME ${PREFERRED_FAMILY_NAME} GEN_IDX) if(${GEN_IDX} GREATER -1) set(DEFAULT_TESTED_FAMILY_NAME ${PREFERRED_FAMILY_NAME}) endif() endif() if(NOT DEFINED DEFAULT_TESTED_FAMILY_NAME) foreach(GEN_TYPE ${ALL_GEN_TYPES_REVERSED}) FIND_IDX_FOR_GEN_TYPE(${GEN_TYPE} GEN_IDX) list(GET ALL_TESTED_FAMILY_NAME ${GEN_IDX} GEN_FAMILY_NAME) if(NOT GEN_FAMILY_NAME STREQUAL " ") set(DEFAULT_TESTED_FAMILY_NAME ${GEN_FAMILY_NAME}) break() endif() endforeach() endif() endif() message(STATUS "Default tested family name: ${DEFAULT_TESTED_FAMILY_NAME}") compute-runtime-20.13.16352/scripts/000077500000000000000000000000001363734646600170445ustar00rootroot00000000000000compute-runtime-20.13.16352/scripts/build-arch-clang.sh000077500000000000000000000003311363734646600224740ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2018-2019 Intel Corporation # # SPDX-License-Identifier: MIT # git fetch -t git clone ../compute-runtime neo docker build -f scripts/docker/Dockerfile-arch-clang -t neo-arch-clang:ci . compute-runtime-20.13.16352/scripts/build-arch-dep.sh000077500000000000000000000006621363734646600221670ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2018-2019 Intel Corporation # # SPDX-License-Identifier: MIT # wget https://aur.archlinux.org/cgit/aur.git/snapshot/opencl-clang-git.tar.gz tar -xzf opencl-clang-git.tar.gz cd opencl-clang-git makepkg -i --noconfirm cd .. wget https://aur.archlinux.org/cgit/aur.git/snapshot/intel-graphics-compiler.tar.gz tar -xzf intel-graphics-compiler.tar.gz cd intel-graphics-compiler makepkg -i --noconfirm cd .. compute-runtime-20.13.16352/scripts/build-arch-docker.sh000077500000000000000000000003151363734646600226610ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2018-2019 Intel Corporation # # SPDX-License-Identifier: MIT # git fetch -t git clone ../compute-runtime neo docker build -f scripts/docker/Dockerfile-arch -t neo-arch:ci . compute-runtime-20.13.16352/scripts/build-fedora30-copr-docker.sh000077500000000000000000000003541363734646600243130ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2018-2019 Intel Corporation # # SPDX-License-Identifier: MIT # git fetch -t git clone ../compute-runtime neo docker info docker build -f scripts/docker/Dockerfile-fedora-30-copr -t neo-fedora-30-copr:ci . compute-runtime-20.13.16352/scripts/build-fedora31-copr-docker.sh000077500000000000000000000003541363734646600243140ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2018-2019 Intel Corporation # # SPDX-License-Identifier: MIT # git fetch -t git clone ../compute-runtime neo docker info docker build -f scripts/docker/Dockerfile-fedora-31-copr -t neo-fedora-31-copr:ci . compute-runtime-20.13.16352/scripts/build-fedora32-copr-docker.sh000077500000000000000000000003541363734646600243150ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # git fetch -t git clone ../compute-runtime neo docker info docker build -f scripts/docker/Dockerfile-fedora-32-copr -t neo-fedora-32-copr:ci . compute-runtime-20.13.16352/scripts/docker/000077500000000000000000000000001363734646600203135ustar00rootroot00000000000000compute-runtime-20.13.16352/scripts/docker/Dockerfile-arch000066400000000000000000000005331363734646600232210ustar00rootroot00000000000000FROM docker.io/archlinux/base LABEL maintainer="jacek.danecki@intel.com" COPY neo /root/neo COPY scripts/prepare-arch.sh /root RUN /root/prepare-arch.sh RUN cd /root/build ; cmake -G Ninja -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ \ -DDO_NOT_RUN_AUB_TESTS=1 ../neo ; \ ninja -j 1 CMD ["/bin/bash"] compute-runtime-20.13.16352/scripts/docker/Dockerfile-arch-clang000066400000000000000000000005631363734646600243060ustar00rootroot00000000000000FROM docker.io/archlinux/base LABEL maintainer="jacek.danecki@intel.com" COPY neo /root/neo COPY scripts/prepare-arch-clang.sh /root RUN /root/prepare-arch-clang.sh RUN cd /root/build ; cmake -G Ninja -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \ -DDO_NOT_RUN_AUB_TESTS=1 ../neo ; \ ninja -j `nproc` CMD ["/bin/bash"] compute-runtime-20.13.16352/scripts/docker/Dockerfile-fedora-30-copr000066400000000000000000000011001363734646600247140ustar00rootroot00000000000000FROM fedora:30 LABEL maintainer="jacek.danecki@intel.com" COPY neo /root/neo RUN dnf install -y gcc-c++ cmake ninja-build git pkg-config; \ dnf install -y 'dnf-command(copr)'; \ dnf copr enable -y jdanecki/intel-opencl; \ dnf copr enable -y jdanecki/intel-opencl-ci; \ dnf --showduplicate list intel-igc-opencl-devel intel-gmmlib-devel; \ dnf install -y intel-igc-opencl-devel intel-gmmlib-devel; \ mkdir /root/build; cd /root/build ; cmake -G Ninja \ -DDO_NOT_RUN_AUB_TESTS=1 -DDONT_CARE_OF_VIRTUALS=1 ../neo; \ ninja -j 1 CMD ["/bin/bash"] compute-runtime-20.13.16352/scripts/docker/Dockerfile-fedora-31-copr000066400000000000000000000011001363734646600247150ustar00rootroot00000000000000FROM fedora:31 LABEL maintainer="jacek.danecki@intel.com" COPY neo /root/neo RUN dnf install -y gcc-c++ cmake ninja-build git pkg-config; \ dnf install -y 'dnf-command(copr)'; \ dnf copr enable -y jdanecki/intel-opencl; \ dnf copr enable -y jdanecki/intel-opencl-ci; \ dnf --showduplicate list intel-igc-opencl-devel intel-gmmlib-devel; \ dnf install -y intel-igc-opencl-devel intel-gmmlib-devel; \ mkdir /root/build; cd /root/build ; cmake -G Ninja \ -DDO_NOT_RUN_AUB_TESTS=1 -DDONT_CARE_OF_VIRTUALS=1 ../neo; \ ninja -j 1 CMD ["/bin/bash"] compute-runtime-20.13.16352/scripts/docker/Dockerfile-fedora-32-copr000066400000000000000000000011001363734646600247160ustar00rootroot00000000000000FROM fedora:32 LABEL maintainer="jacek.danecki@intel.com" COPY neo /root/neo RUN dnf install -y gcc-c++ cmake ninja-build git pkg-config; \ dnf install -y 'dnf-command(copr)'; \ dnf copr enable -y jdanecki/intel-opencl; \ dnf copr enable -y jdanecki/intel-opencl-ci; \ dnf --showduplicate list intel-igc-opencl-devel intel-gmmlib-devel; \ dnf install -y intel-igc-opencl-devel intel-gmmlib-devel; \ mkdir /root/build; cd /root/build ; cmake -G Ninja \ -DDO_NOT_RUN_AUB_TESTS=1 -DDONT_CARE_OF_VIRTUALS=1 ../neo; \ ninja -j 1 CMD ["/bin/bash"] compute-runtime-20.13.16352/scripts/docker/Dockerfile-ubuntu-16.04-gcc-gen-11000066400000000000000000000012701363734646600260130ustar00rootroot00000000000000FROM docker.io/ubuntu:16.04 LABEL maintainer="jacek.danecki@intel.com" COPY neo /root/neo RUN echo "deb http://ppa.launchpad.net/ocl-dev/intel-opencl/ubuntu xenial main" >> /etc/apt/sources.list; \ apt-key adv --keyserver keyserver.ubuntu.com --recv-keys C3086B78CC05B8B1; \ apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated cmake g++ pkg-config ninja-build libigc-dev intel-gmmlib-dev RUN mkdir /root/build; cd /root/build ; cmake -G Ninja -DCMAKE_BUILD_TYPE=Release \ -DSUPPORT_GEN8=0 -DSUPPORT_GEN9=0 -DSUPPORT_GEN11=1 -DSUPPORT_GEN12LP=0 \ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ ../neo; \ ninja -j `nproc` CMD ["/bin/bash"] compute-runtime-20.13.16352/scripts/docker/Dockerfile-ubuntu-16.04-gcc-gen-12000066400000000000000000000012701363734646600260140ustar00rootroot00000000000000FROM docker.io/ubuntu:16.04 LABEL maintainer="jacek.danecki@intel.com" COPY neo /root/neo RUN echo "deb http://ppa.launchpad.net/ocl-dev/intel-opencl/ubuntu xenial main" >> /etc/apt/sources.list; \ apt-key adv --keyserver keyserver.ubuntu.com --recv-keys C3086B78CC05B8B1; \ apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated cmake g++ pkg-config ninja-build libigc-dev intel-gmmlib-dev RUN mkdir /root/build; cd /root/build ; cmake -G Ninja -DCMAKE_BUILD_TYPE=Release \ -DSUPPORT_GEN8=0 -DSUPPORT_GEN9=0 -DSUPPORT_GEN11=0 -DSUPPORT_GEN12LP=1 \ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ ../neo; \ ninja -j `nproc` CMD ["/bin/bash"] compute-runtime-20.13.16352/scripts/docker/Dockerfile-ubuntu-16.04-gcc-gen-8000066400000000000000000000012701363734646600257410ustar00rootroot00000000000000FROM docker.io/ubuntu:16.04 LABEL maintainer="jacek.danecki@intel.com" COPY neo /root/neo RUN echo "deb http://ppa.launchpad.net/ocl-dev/intel-opencl/ubuntu xenial main" >> /etc/apt/sources.list; \ apt-key adv --keyserver keyserver.ubuntu.com --recv-keys C3086B78CC05B8B1; \ apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated cmake g++ pkg-config ninja-build libigc-dev intel-gmmlib-dev RUN mkdir /root/build; cd /root/build ; cmake -G Ninja -DCMAKE_BUILD_TYPE=Release \ -DSUPPORT_GEN8=1 -DSUPPORT_GEN9=0 -DSUPPORT_GEN11=0 -DSUPPORT_GEN12LP=0 \ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ ../neo; \ ninja -j `nproc` CMD ["/bin/bash"] compute-runtime-20.13.16352/scripts/docker/Dockerfile-ubuntu-16.04-gcc-gen-9000066400000000000000000000012701363734646600257420ustar00rootroot00000000000000FROM docker.io/ubuntu:16.04 LABEL maintainer="jacek.danecki@intel.com" COPY neo /root/neo RUN echo "deb http://ppa.launchpad.net/ocl-dev/intel-opencl/ubuntu xenial main" >> /etc/apt/sources.list; \ apt-key adv --keyserver keyserver.ubuntu.com --recv-keys C3086B78CC05B8B1; \ apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated cmake g++ pkg-config ninja-build libigc-dev intel-gmmlib-dev RUN mkdir /root/build; cd /root/build ; cmake -G Ninja -DCMAKE_BUILD_TYPE=Release \ -DSUPPORT_GEN8=0 -DSUPPORT_GEN9=1 -DSUPPORT_GEN11=0 -DSUPPORT_GEN12LP=0 \ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ ../neo; \ ninja -j `nproc` CMD ["/bin/bash"] compute-runtime-20.13.16352/scripts/docker/Dockerfile-ubuntu-18.04-clang-8000066400000000000000000000014401363734646600255230ustar00rootroot00000000000000FROM docker.io/ubuntu:18.04 LABEL maintainer="jacek.danecki@intel.com" COPY neo /root/neo RUN apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated gpg dirmngr gpg-agent; \ echo "deb http://ppa.launchpad.net/ocl-dev/intel-opencl/ubuntu bionic main" >> /etc/apt/sources.list; \ apt-key adv --keyserver keyserver.ubuntu.com --recv-keys C3086B78CC05B8B1; \ apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated cmake pkg-config ninja-build libigc-dev intel-gmmlib-dev clang-8 RUN mkdir /root/build; cd /root/build ; cmake -G Ninja -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_C_COMPILER=clang-8 -DCMAKE_CXX_COMPILER=clang++-8 \ -DDO_NOT_RUN_AUB_TESTS=1 -DDONT_CARE_OF_VIRTUALS=1 ../neo ; ninja -j `nproc` CMD ["/bin/bash"] compute-runtime-20.13.16352/scripts/docker/Dockerfile-ubuntu-18.04-gcc-gen-11000066400000000000000000000014611363734646600260170ustar00rootroot00000000000000FROM docker.io/ubuntu:18.04 LABEL maintainer="jacek.danecki@intel.com" COPY neo /root/neo RUN apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated gpg dirmngr gpg-agent; \ echo "deb http://ppa.launchpad.net/ocl-dev/intel-opencl/ubuntu bionic main" >> /etc/apt/sources.list; \ apt-key adv --keyserver keyserver.ubuntu.com --recv-keys C3086B78CC05B8B1; \ apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated cmake g++ pkg-config ninja-build libigc-dev intel-gmmlib-dev RUN mkdir /root/build; cd /root/build ; cmake -G Ninja -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ \ -DSUPPORT_GEN8=0 -DSUPPORT_GEN9=0 -DSUPPORT_GEN11=1 -DSUPPORT_GEN12LP=0 \ ../neo; \ ninja -j `nproc` CMD ["/bin/bash"] compute-runtime-20.13.16352/scripts/docker/Dockerfile-ubuntu-18.04-gcc-gen-12000066400000000000000000000014611363734646600260200ustar00rootroot00000000000000FROM docker.io/ubuntu:18.04 LABEL maintainer="jacek.danecki@intel.com" COPY neo /root/neo RUN apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated gpg dirmngr gpg-agent; \ echo "deb http://ppa.launchpad.net/ocl-dev/intel-opencl/ubuntu bionic main" >> /etc/apt/sources.list; \ apt-key adv --keyserver keyserver.ubuntu.com --recv-keys C3086B78CC05B8B1; \ apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated cmake g++ pkg-config ninja-build libigc-dev intel-gmmlib-dev RUN mkdir /root/build; cd /root/build ; cmake -G Ninja -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ \ -DSUPPORT_GEN8=0 -DSUPPORT_GEN9=0 -DSUPPORT_GEN11=0 -DSUPPORT_GEN12LP=1 \ ../neo; \ ninja -j `nproc` CMD ["/bin/bash"] compute-runtime-20.13.16352/scripts/docker/Dockerfile-ubuntu-18.04-gcc-gen-8000066400000000000000000000014611363734646600257450ustar00rootroot00000000000000FROM docker.io/ubuntu:18.04 LABEL maintainer="jacek.danecki@intel.com" COPY neo /root/neo RUN apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated gpg dirmngr gpg-agent; \ echo "deb http://ppa.launchpad.net/ocl-dev/intel-opencl/ubuntu bionic main" >> /etc/apt/sources.list; \ apt-key adv --keyserver keyserver.ubuntu.com --recv-keys C3086B78CC05B8B1; \ apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated cmake g++ pkg-config ninja-build libigc-dev intel-gmmlib-dev RUN mkdir /root/build; cd /root/build ; cmake -G Ninja -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ \ -DSUPPORT_GEN8=1 -DSUPPORT_GEN9=0 -DSUPPORT_GEN11=0 -DSUPPORT_GEN12LP=0 \ ../neo; \ ninja -j `nproc` CMD ["/bin/bash"] compute-runtime-20.13.16352/scripts/docker/Dockerfile-ubuntu-18.04-gcc-gen-9000066400000000000000000000014611363734646600257460ustar00rootroot00000000000000FROM docker.io/ubuntu:18.04 LABEL maintainer="jacek.danecki@intel.com" COPY neo /root/neo RUN apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated gpg dirmngr gpg-agent; \ echo "deb http://ppa.launchpad.net/ocl-dev/intel-opencl/ubuntu bionic main" >> /etc/apt/sources.list; \ apt-key adv --keyserver keyserver.ubuntu.com --recv-keys C3086B78CC05B8B1; \ apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated cmake g++ pkg-config ninja-build libigc-dev intel-gmmlib-dev RUN mkdir /root/build; cd /root/build ; cmake -G Ninja -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ \ -DSUPPORT_GEN8=0 -DSUPPORT_GEN9=1 -DSUPPORT_GEN11=0 -DSUPPORT_GEN12LP=0 \ ../neo; \ ninja -j `nproc` CMD ["/bin/bash"] compute-runtime-20.13.16352/scripts/docker/Dockerfile-ubuntu-19.10-clang000066400000000000000000000014301363734646600253530ustar00rootroot00000000000000FROM docker.io/ubuntu:19.10 LABEL maintainer="jacek.danecki@intel.com" COPY neo /root/neo RUN apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated gpg dirmngr gpg-agent; \ echo "deb http://ppa.launchpad.net/ocl-dev/intel-opencl/ubuntu eoan main" >> /etc/apt/sources.list; \ apt-key adv --keyserver keyserver.ubuntu.com --recv-keys C3086B78CC05B8B1; \ apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated cmake pkg-config ninja-build libigc-dev intel-gmmlib-dev clang RUN mkdir /root/build; cd /root/build ; cmake -G Ninja -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \ -DDO_NOT_RUN_AUB_TESTS=1 -DDONT_CARE_OF_VIRTUALS=1 ../neo ; ninja -j `nproc` CMD ["/bin/bash"] compute-runtime-20.13.16352/scripts/docker/Dockerfile-ubuntu-19.10-gcc-gen-11000066400000000000000000000014571363734646600260220ustar00rootroot00000000000000FROM docker.io/ubuntu:19.10 LABEL maintainer="jacek.danecki@intel.com" COPY neo /root/neo RUN apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated gpg dirmngr gpg-agent; \ echo "deb http://ppa.launchpad.net/ocl-dev/intel-opencl/ubuntu eoan main" >> /etc/apt/sources.list; \ apt-key adv --keyserver keyserver.ubuntu.com --recv-keys C3086B78CC05B8B1; \ apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated cmake g++ pkg-config ninja-build libigc-dev intel-gmmlib-dev RUN mkdir /root/build; cd /root/build ; cmake -G Ninja -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ \ -DSUPPORT_GEN8=0 -DSUPPORT_GEN9=0 -DSUPPORT_GEN11=1 -DSUPPORT_GEN12LP=0 \ ../neo; \ ninja -j `nproc` CMD ["/bin/bash"] compute-runtime-20.13.16352/scripts/docker/Dockerfile-ubuntu-19.10-gcc-gen-12000066400000000000000000000014571363734646600260230ustar00rootroot00000000000000FROM docker.io/ubuntu:19.10 LABEL maintainer="jacek.danecki@intel.com" COPY neo /root/neo RUN apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated gpg dirmngr gpg-agent; \ echo "deb http://ppa.launchpad.net/ocl-dev/intel-opencl/ubuntu eoan main" >> /etc/apt/sources.list; \ apt-key adv --keyserver keyserver.ubuntu.com --recv-keys C3086B78CC05B8B1; \ apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated cmake g++ pkg-config ninja-build libigc-dev intel-gmmlib-dev RUN mkdir /root/build; cd /root/build ; cmake -G Ninja -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ \ -DSUPPORT_GEN8=0 -DSUPPORT_GEN9=0 -DSUPPORT_GEN11=0 -DSUPPORT_GEN12LP=1 \ ../neo; \ ninja -j `nproc` CMD ["/bin/bash"] compute-runtime-20.13.16352/scripts/docker/Dockerfile-ubuntu-19.10-gcc-gen-8000066400000000000000000000014571363734646600257500ustar00rootroot00000000000000FROM docker.io/ubuntu:19.10 LABEL maintainer="jacek.danecki@intel.com" COPY neo /root/neo RUN apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated gpg dirmngr gpg-agent; \ echo "deb http://ppa.launchpad.net/ocl-dev/intel-opencl/ubuntu eoan main" >> /etc/apt/sources.list; \ apt-key adv --keyserver keyserver.ubuntu.com --recv-keys C3086B78CC05B8B1; \ apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated cmake g++ pkg-config ninja-build libigc-dev intel-gmmlib-dev RUN mkdir /root/build; cd /root/build ; cmake -G Ninja -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ \ -DSUPPORT_GEN8=1 -DSUPPORT_GEN9=0 -DSUPPORT_GEN11=0 -DSUPPORT_GEN12LP=0 \ ../neo; \ ninja -j `nproc` CMD ["/bin/bash"] compute-runtime-20.13.16352/scripts/docker/Dockerfile-ubuntu-19.10-gcc-gen-9000066400000000000000000000014571363734646600257510ustar00rootroot00000000000000FROM docker.io/ubuntu:19.10 LABEL maintainer="jacek.danecki@intel.com" COPY neo /root/neo RUN apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated gpg dirmngr gpg-agent; \ echo "deb http://ppa.launchpad.net/ocl-dev/intel-opencl/ubuntu eoan main" >> /etc/apt/sources.list; \ apt-key adv --keyserver keyserver.ubuntu.com --recv-keys C3086B78CC05B8B1; \ apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated cmake g++ pkg-config ninja-build libigc-dev intel-gmmlib-dev RUN mkdir /root/build; cd /root/build ; cmake -G Ninja -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ \ -DSUPPORT_GEN8=0 -DSUPPORT_GEN9=1 -DSUPPORT_GEN11=0 -DSUPPORT_GEN12LP=0 \ ../neo; \ ninja -j `nproc` CMD ["/bin/bash"] compute-runtime-20.13.16352/scripts/docker/Dockerfile-ubuntu-20.04-clang000066400000000000000000000015211363734646600253470ustar00rootroot00000000000000FROM docker.io/ubuntu:20.04 LABEL maintainer="jacek.danecki@intel.com" COPY neo /root/neo RUN apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated gpg dirmngr gpg-agent; \ echo "deb http://ppa.launchpad.net/ocl-dev/intel-opencl/ubuntu focal main" >> /etc/apt/sources.list; \ apt-key adv --keyserver keyserver.ubuntu.com --recv-keys C3086B78CC05B8B1; \ apt-get -y update ; DEBIAN_FRONTEND="noninteractive" apt-get --no-install-recommends install -y --allow-unauthenticated cmake pkg-config \ ninja-build libigc-dev intel-gmmlib-dev clang libstdc++-10-dev RUN mkdir /root/build; cd /root/build ; cmake -G Ninja -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \ -DDO_NOT_RUN_AUB_TESTS=1 -DDONT_CARE_OF_VIRTUALS=1 ../neo ; ninja -j `nproc` CMD ["/bin/bash"] compute-runtime-20.13.16352/scripts/docker/Dockerfile-ubuntu-20.04-gcc-gen-11000066400000000000000000000015271363734646600260130ustar00rootroot00000000000000FROM docker.io/ubuntu:20.04 LABEL maintainer="jacek.danecki@intel.com" COPY neo /root/neo RUN apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated gpg dirmngr gpg-agent; \ echo "deb http://ppa.launchpad.net/ocl-dev/intel-opencl/ubuntu focal main" >> /etc/apt/sources.list; \ apt-key adv --keyserver keyserver.ubuntu.com --recv-keys C3086B78CC05B8B1; \ apt-get -y update ; DEBIAN_FRONTEND="noninteractive" apt-get --no-install-recommends install -y --allow-unauthenticated cmake g++ \ pkg-config ninja-build libigc-dev intel-gmmlib-dev RUN mkdir /root/build; cd /root/build ; cmake -G Ninja -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ \ -DSUPPORT_GEN8=0 -DSUPPORT_GEN9=0 -DSUPPORT_GEN11=1 -DSUPPORT_GEN12LP=0 \ ../neo; \ ninja -j `nproc` CMD ["/bin/bash"] compute-runtime-20.13.16352/scripts/docker/Dockerfile-ubuntu-20.04-gcc-gen-12000066400000000000000000000015271363734646600260140ustar00rootroot00000000000000FROM docker.io/ubuntu:20.04 LABEL maintainer="jacek.danecki@intel.com" COPY neo /root/neo RUN apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated gpg dirmngr gpg-agent; \ echo "deb http://ppa.launchpad.net/ocl-dev/intel-opencl/ubuntu focal main" >> /etc/apt/sources.list; \ apt-key adv --keyserver keyserver.ubuntu.com --recv-keys C3086B78CC05B8B1; \ apt-get -y update ; DEBIAN_FRONTEND="noninteractive" apt-get --no-install-recommends install -y --allow-unauthenticated cmake g++ \ pkg-config ninja-build libigc-dev intel-gmmlib-dev RUN mkdir /root/build; cd /root/build ; cmake -G Ninja -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ \ -DSUPPORT_GEN8=0 -DSUPPORT_GEN9=0 -DSUPPORT_GEN11=0 -DSUPPORT_GEN12LP=1 \ ../neo; \ ninja -j `nproc` CMD ["/bin/bash"] compute-runtime-20.13.16352/scripts/docker/Dockerfile-ubuntu-20.04-gcc-gen-8000066400000000000000000000015271363734646600257410ustar00rootroot00000000000000FROM docker.io/ubuntu:20.04 LABEL maintainer="jacek.danecki@intel.com" COPY neo /root/neo RUN apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated gpg dirmngr gpg-agent; \ echo "deb http://ppa.launchpad.net/ocl-dev/intel-opencl/ubuntu focal main" >> /etc/apt/sources.list; \ apt-key adv --keyserver keyserver.ubuntu.com --recv-keys C3086B78CC05B8B1; \ apt-get -y update ; DEBIAN_FRONTEND="noninteractive" apt-get --no-install-recommends install -y --allow-unauthenticated cmake g++ \ pkg-config ninja-build libigc-dev intel-gmmlib-dev RUN mkdir /root/build; cd /root/build ; cmake -G Ninja -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ \ -DSUPPORT_GEN8=1 -DSUPPORT_GEN9=0 -DSUPPORT_GEN11=0 -DSUPPORT_GEN12LP=0 \ ../neo; \ ninja -j `nproc` CMD ["/bin/bash"] compute-runtime-20.13.16352/scripts/docker/Dockerfile-ubuntu-20.04-gcc-gen-9000066400000000000000000000015271363734646600257420ustar00rootroot00000000000000FROM docker.io/ubuntu:20.04 LABEL maintainer="jacek.danecki@intel.com" COPY neo /root/neo RUN apt-get -y update ; apt-get --no-install-recommends install -y --allow-unauthenticated gpg dirmngr gpg-agent; \ echo "deb http://ppa.launchpad.net/ocl-dev/intel-opencl/ubuntu focal main" >> /etc/apt/sources.list; \ apt-key adv --keyserver keyserver.ubuntu.com --recv-keys C3086B78CC05B8B1; \ apt-get -y update ; DEBIAN_FRONTEND="noninteractive" apt-get --no-install-recommends install -y --allow-unauthenticated cmake g++ \ pkg-config ninja-build libigc-dev intel-gmmlib-dev RUN mkdir /root/build; cd /root/build ; cmake -G Ninja -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ \ -DSUPPORT_GEN8=0 -DSUPPORT_GEN9=1 -DSUPPORT_GEN11=0 -DSUPPORT_GEN12LP=0 \ ../neo; \ ninja -j `nproc` CMD ["/bin/bash"] compute-runtime-20.13.16352/scripts/driver-version.bat000066400000000000000000000050511363734646600225130ustar00rootroot00000000000000:: Copyright (c) 2019, Intel Corporation :: :: Permission is hereby granted, free of charge, to any person obtaining a :: copy of this software and associated documentation files (the "Software"), :: to deal in the Software without restriction, including without limitation :: the rights to use, copy, modify, merge, publish, distribute, sublicense, :: and/or sell copies of the Software, and to permit persons to whom the :: Software is furnished to do so, subject to the following conditions: :: :: The above copyright notice and this permission notice shall be included :: in all copies or substantial portions of the Software. :: :: THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS :: OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, :: FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL :: THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR :: OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, :: ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR :: OTHER DEALINGS IN THE SOFTWARE. @ECHO OFF :: One parameter is expected IF NOT B"%~2"==B"" ( ECHO %0 called with no parameters, prints the version of the installed OpenCL driver ECHO %0 called with a single parameter containing expected version number, ECHO returns success ^(ERRORLEVEL=0^) if installed the specified driver version or newer ECHO returns failure ^(ERRORLEVEL=1^) if no driver or older than specified ECHO example: ECHO driver-version.bat 26.20.100.7158 EXIT /B 1 ) SET DriverVersion= FOR /F "tokens=3" %%D in ('WMIC path Win32_VideoController where AdapterCompatibility^="Intel Corporation" get AdapterCompatibility^, DriverVersion ^| findstr "Intel"') do ( set DriverVersion=%%D ) IF B"%DriverVersion%"==B"" ( ECHO No driver detected in the system EXIT /B 1 ) IF B"%~1"==B"" ( ECHO %DriverVersion% EXIT /B 1 ) FOR /F "delims=. tokens=1-4" %%A IN ("%DriverVersion%") DO ( SET d1=%%A SET d2=%%B SET d3=%%C SET d4=%%D ) FOR /F "delims=. tokens=1-4" %%A IN ("%~1") DO ( SET p1=%%A SET p2=%%B SET p3=%%C SET p4=%%D ) IF %d1% LSS %p1% GOTO FAIL IF %d1% GTR %p1% GOTO PASS IF %d2% LSS %p2% GOTO FAIL IF %d2% GTR %p2% GOTO PASS IF %d3% LSS %p3% GOTO FAIL IF %d3% GTR %p3% GOTO PASS IF %d4% LSS %p4% GOTO FAIL :PASS ECHO Driver %DriverVersion% is newer than or equal to referenced version passed from command line %1 EXIT /B 0 :FAIL ECHO Driver %DriverVersion% is older than referenced from command line %1 EXIT /B 1 compute-runtime-20.13.16352/scripts/driver-version.sh000077500000000000000000000037061363734646600223670ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # check_deb() { DriverVer=$(dpkg -l 'intel-opencl' | awk '/ii intel-opencl / { print $3 }') if [ -z $DriverVer ] then DriverVer=$(dpkg -l 'intel-opencl-icd' | awk '/ii intel-opencl-icd / { print $3 }') fi } check_rpm() { DriverVer=$(rpm -q --queryformat '%{VERSION}' intel-opencl) if [ $? != 0 ] then DriverVer="" fi } check_pacman() { DriverVer=$(pacman -Q intel-compute-runtime | awk '{print $2}' | sed "s/-.*$//") } if [ -f /etc/os-release ] then source /etc/os-release if [ -z "${ID}" ] then echo "Unknown OS" exit 1 fi fi case "${ID}" in debian | ubuntu ) check_deb ;; fedora | centos | rhel) check_rpm ;; arch ) check_pacman ;; * ) echo "Unsupported OS: ${ID}" exit 1 ;; esac if [ -z $DriverVer ] then echo No driver detected in the system exit 1 fi if [ $# -eq 0 ] then echo $DriverVer exit 1 fi if [ $# -ne 1 ] || [ $1 == "-h" ] || [ $1 == "--help" ] then echo $0 called with no parameters, prints the version of the installed OpenCL driver echo $0 called with a single parameter containing expected version number, echo returns success \(0\) if installed the specified driver version or newer echo returns failure \(1\) if no driver or older than specified exit 1 fi if ! [[ $1 =~ ^[0-9]+\.[0-9]+\.[0-9]+.* ]]; then echo Invalid version format exit 1 fi TestedString=$(echo "$1" | awk -F. '{ printf("%d.%02d.%d\n", $1,$2,$3); }';) DriverStatus=$( echo -e "${DriverVer}\n${TestedString}" | sort -V -C -r ; echo $? ) if [ $DriverStatus -eq 1 ] then echo Driver $DriverVer is older than referenced version passed from command line ${TestedString} else echo Driver $DriverVer is newer than or equal to referenced version passed from command line ${TestedString} fi exit $DriverStatus compute-runtime-20.13.16352/scripts/format/000077500000000000000000000000001363734646600203345ustar00rootroot00000000000000compute-runtime-20.13.16352/scripts/format/CMakeLists.txt000066400000000000000000000006601363734646600230760ustar00rootroot00000000000000# # Copyright (C) 2019 Intel Corporation # # SPDX-License-Identifier: MIT # if(WIN32) set(extension bat) else() set(extension sh) endif() add_custom_target(format_files ${NEO_SOURCE_DIR}/scripts/format/format.${extension} ${NEO_SOURCE_DIR} WORKING_DIRECTORY ${NEO_SOURCE_DIR} COMMENT "Formatting changed files" ) set_target_properties(format_files PROPERTIES EXCLUDE_FROM_DEFAULT_BUILD TRUE EXCLUDE_FROM_ALL TRUE ) compute-runtime-20.13.16352/scripts/format/format.bat000066400000000000000000000016051363734646600223160ustar00rootroot00000000000000REM REM Copyright (C) 2019 Intel Corporation REM REM SPDX-License-Identifier: MIT REM @echo off setlocal EnableDelayedExpansion IF NOT EXIST "%1" ( echo Directory "%1" does not exist. exit /b 1 ) call clang-format --version set err=%ERRORLEVEL% if not "%err%"=="0" ( echo clang-format not found exit /b 1 ) git --version set err=%ERRORLEVEL% if not "%err%"=="0" ( echo git not found exit /b 1 ) pushd . cd %1 git rev-parse --git-dir > NUL set err=%ERRORLEVEL% if not "%err%"=="0" ( echo Not a git repository. exit /b 1 ) for /f %%i in ('git diff HEAD --name-only') do ( set file="%%i" call :get_extension %%i call :test_extension !ext! if "!test!" == "1" ( clang-format -i -style=file !file! ) ) popd exit /b :get_extension set ext=%~x1 exit /b :test_extension set test=0 if "%1"==".h" set test=1 if "%1"==".cpp" set test=1 if "%1"==".inl" set test=1 exit /b compute-runtime-20.13.16352/scripts/format/format.sh000066400000000000000000000011531363734646600221600ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2019 Intel Corporation # # SPDX-License-Identifier: MIT # if [ ! -d "$1" ]; then echo "Directory "$1" does not exist." exit 1 fi clang-format --version err=$? if [$err -ne 0] then echo "clang-format not found" exit 1 fi git --version err=$? if [$err -ne 0] then echo "git not found" exit 1 fi pushd $1 if git rev-parse --git-dir > /dev/null 2>&1; then files=$(git diff HEAD --name-only) for i in $files; do if [[ $i =~ .*\.(h|cpp|inl) ]] then clang-format -i -style=file $i fi done else echo Not a git repository. exit 1 fi popd exit 0 compute-runtime-20.13.16352/scripts/install-deps.sh000077500000000000000000000013011363734646600217750ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # wait_apt() { while fuser -u -v /var/lib/dpkg/lock do echo wait sleep 5 done } echo "deb http://ppa.launchpad.net/ocl-dev/intel-opencl/ubuntu xenial main" >> /etc/apt/sources.list apt-key adv --keyserver keyserver.ubuntu.com --recv-keys C3086B78CC05B8B1 apt-get -y update if [ $? -ne 0 ] then wait_apt apt-get -y update fi apt-get --no-install-recommends install -y --allow-unauthenticated cmake ninja-build libigc-dev intel-gmmlib-dev if [ $? -ne 0 ] then wait_apt apt-get --no-install-recommends install -y --allow-unauthenticated cmake ninja-build libigc-dev intel-gmmlib-dev fi dpkg -r ccache compute-runtime-20.13.16352/scripts/lint/000077500000000000000000000000001363734646600200125ustar00rootroot00000000000000compute-runtime-20.13.16352/scripts/lint/CMakeLists.txt000066400000000000000000000012001363734646600225430ustar00rootroot00000000000000# # Copyright (C) 2018-2019 Intel Corporation # # SPDX-License-Identifier: MIT # if(IS_DIRECTORY ${NEO_SOURCE_DIR}/.git) add_custom_target(lint ${NEO_SOURCE_DIR}/scripts/lint${BRANCH_DIR_SUFFIX}/set_copyright.sh WORKING_DIRECTORY ${NEO_SOURCE_DIR} ) set_target_properties(lint PROPERTIES EXCLUDE_FROM_DEFAULT_BUILD TRUE EXCLUDE_FROM_ALL TRUE ) add_custom_target(lint_head ${NEO_SOURCE_DIR}/scripts/lint${BRANCH_DIR_SUFFIX}/set_copyright.sh HEAD WORKING_DIRECTORY ${NEO_SOURCE_DIR} ) set_target_properties(lint_head PROPERTIES EXCLUDE_FROM_DEFAULT_BUILD TRUE EXCLUDE_FROM_ALL TRUE ) endif() compute-runtime-20.13.16352/scripts/lint/set_copyright.py000077500000000000000000000146701363734646600232620ustar00rootroot00000000000000#!/usr/bin/env python3 # # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # """Usage: ./scripts/lint/set_copyright.py """ import re import sys import os import datetime import stat import argparse def is_banned(path): """Check if path is banned.""" banned_paths = [ 'scripts/tests/copyright/in', 'scripts/tests/copyright/out', 'third_party' ] banned_files = [ 'scripts/lint/set_copyright.sh' ] path_banned = False for banned_file in banned_files: if os.path.normpath(path) == os.path.normpath(banned_file): path_banned = True break if not path_banned: dirname = os.path.dirname(path) for banned_path in banned_paths: if dirname.startswith(banned_path): path_banned = True break return path_banned def can_be_scanned(path): """Check whether we should scan this file""" allowed_extensions = [ 'cpp', 'h', 'inl', 'hpp', 'm', 'cmake', 'py', 'sh', 'cl', 'exports' ] allowed_extensions_2 = [ 'h.in', 'rc.in', 'options.txt' ] allowed_files = [ 'CMakeLists.txt' ] path_ext = path.split('.') path_ok = False filename = os.path.basename(path) if not os.path.isfile(path): print(f'Cannot find file {path}, skipping') path_ok = False elif is_banned(path): path_ok = False elif filename in allowed_files: path_ok = True elif path_ext[-1].lower() in allowed_extensions: path_ok = True elif '.'.join(path_ext[-2:]) in allowed_extensions_2: path_ok = True if not path_ok: print(f'[MIT] Ignoring file: {path}') return path_ok def _parse_args(): parser = argparse.ArgumentParser(description='Usage: ./scripts/lint/set_copyright.py ') parser.add_argument('-c', '--check', action='store_true', help='Checks only, not changing files, fails if wrong copyright') parser.add_argument('files', nargs='*') args = parser.parse_args() return vars(args) def main(args): header_cpp = """/* * Copyright (C) {} Intel Corporation * * SPDX-License-Identifier: MIT * */ """ header_bash_style = """# # Copyright (C) {} Intel Corporation # # SPDX-License-Identifier: MIT # """ cpp_sharp_lines = [ '#pragma', '#include' ] status = 0 for path in args['files']: # avoid self scan if os.path.abspath(path) == os.path.abspath(sys.argv[0]): continue if not can_be_scanned(path): continue print(f'[MIT] Processing file: {path}') gathered_lines = [] gathered_header = [] start_year = None header = header_cpp header_start = '/*' header_end = '*/' comment_char = r'\*' # now read line by line with open(path) as fin: # take care of hashbang first_line = fin.readline() if not first_line.startswith('#!'): line = first_line first_line = '' else: line = fin.readline() is_cpp = False # check whether comment type is '#' if first_line or line.startswith('#'): for i in cpp_sharp_lines: print(f'a: {i} ~ {line}') if line.startswith(i): is_cpp = True break if not is_cpp: header_start = '#' header_end = '\n' header = header_bash_style comment_char = '#' curr_comment = [] is_header = None is_header_end = None # copyright have to be first comment in file if line.startswith(header_start): is_header = True is_header_end = False else: is_header = False is_header_end = True is_copyright = False while line: if is_header: if header_end == '\n' and len(line.strip()) == 0: is_header = False is_header_end = True elif line.strip().endswith(header_end): is_header = False is_header_end = True elif 'Copyright' in line: expr = (rf'^{comment_char} Copyright \([Cc]\) (\d+)( *- *\d+)?') match = re.match(expr, line.strip()) if match: start_year = match.groups()[0] curr_comment = [] is_copyright = True if not is_copyright: curr_comment.append(line) gathered_header.append(line) elif is_copyright and is_header_end: if len(line.strip()) > 0: gathered_lines.append(line) is_header_end = False else: gathered_header.append(line) else: gathered_lines.append(line) line = fin.readline() year = datetime.datetime.now().year if start_year is None: start_year = str(year) elif int(start_year) < year: start_year += '-' start_year += str(year) written_header = [header.format(start_year)] if len(curr_comment) > 0 or len(gathered_lines) > 0: written_header.append('\n') if len(curr_comment) > 0: written_header.append(''.join(curr_comment)) if not args['check']: # store file mode because we want to preserve this old_mode = os.stat(path)[stat.ST_MODE] os.remove(path) with open(path, 'w') as fout: if first_line: fout.write(first_line) fout.write(''.join(written_header)) contents = ''.join(gathered_lines) fout.write(contents) # chmod to original value os.chmod(path, old_mode) if args['check'] and ''.join(gathered_header) != ''.join(written_header): status = 1 return status if __name__ == '__main__': sys.exit(main(_parse_args())) compute-runtime-20.13.16352/scripts/lint/set_copyright.sh000077500000000000000000000010231363734646600232300ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # python_interpreter="python3" if [[ "$OSTYPE" == "msys" ]]; then python_interpreter="python" fi converter=$(dirname $(readlink -f $0))/set_copyright.py if [ "${1:-STAGED}" = "HEAD" ]; then git diff-tree --no-commit-id --name-only -r HEAD | xargs -n 1 $python_interpreter $converter else git diff --cached --name-only | xargs -n 1 $python_interpreter $converter git diff --name-only | xargs -n 1 echo "Not scanned: " fi compute-runtime-20.13.16352/scripts/prepare-arch-clang.sh000077500000000000000000000004341363734646600230370ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2018-2019 Intel Corporation # # SPDX-License-Identifier: MIT # set -e set -x mkdir /root/build pacman -Suy --noconfirm clang cmake git make pkg-config ninja libva \ intel-gmmlib intel-opencl-clang spirv-llvm-translator intel-graphics-compiler compute-runtime-20.13.16352/scripts/prepare-arch.sh000077500000000000000000000004321363734646600217530ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2018-2019 Intel Corporation # # SPDX-License-Identifier: MIT # set -e set -x mkdir /root/build pacman -Suy --noconfirm gcc cmake git make pkg-config ninja libva \ intel-gmmlib intel-opencl-clang spirv-llvm-translator intel-graphics-compiler compute-runtime-20.13.16352/scripts/run-build.sh000077500000000000000000000006171363734646600213100ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2018 Intel Corporation # # SPDX-License-Identifier: MIT # DOCKERFILE=Dockerfile-${BUILD_OS}-${COMPILER} IMAGE=neo-${BUILD_OS}-${COMPILER}:ci if [ -n "$GEN" ] then DOCKERFILE=${DOCKERFILE}-${GEN} IMAGE=neo-${BUILD_OS}-${COMPILER}-${GEN}:ci fi git clone ../compute-runtime neo && \ docker build -f scripts/docker/${DOCKERFILE} -t ${IMAGE} . && \ docker images compute-runtime-20.13.16352/scripts/tests/000077500000000000000000000000001363734646600202065ustar00rootroot00000000000000compute-runtime-20.13.16352/scripts/tests/copyright/000077500000000000000000000000001363734646600222165ustar00rootroot00000000000000compute-runtime-20.13.16352/scripts/tests/copyright/in/000077500000000000000000000000001363734646600226245ustar00rootroot00000000000000compute-runtime-20.13.16352/scripts/tests/copyright/in/file1.cpp000066400000000000000000000000361363734646600243270ustar00rootroot00000000000000/* * No copyright at all */ compute-runtime-20.13.16352/scripts/tests/copyright/in/file1.sh000066400000000000000000000000271363734646600241570ustar00rootroot00000000000000#!/bin/bash echo 123 compute-runtime-20.13.16352/scripts/tests/copyright/in/file2.cpp000066400000000000000000000000411363734646600243240ustar00rootroot00000000000000/* * Copyright (C) 2017 XYZ */ compute-runtime-20.13.16352/scripts/tests/copyright/in/file2.sh000066400000000000000000000000331363734646600241550ustar00rootroot00000000000000# # Copyright (C) 2017 XYZ compute-runtime-20.13.16352/scripts/tests/copyright/in/file3.cpp000066400000000000000000000000531363734646600243300ustar00rootroot00000000000000// // This comment shouldn't be removed // compute-runtime-20.13.16352/scripts/tests/copyright/in/file3.sh000066400000000000000000000000501363734646600241550ustar00rootroot00000000000000# # This comment shouldn't be removed # compute-runtime-20.13.16352/scripts/tests/copyright/in/file4.cpp000066400000000000000000000000731363734646600243330ustar00rootroot00000000000000/* * No copyright at all */ #include "file.h" class C; compute-runtime-20.13.16352/scripts/tests/copyright/in/file4.sh000066400000000000000000000000611363734646600241600ustar00rootroot00000000000000# # No copyright at all # echo "file.h" exit 1 compute-runtime-20.13.16352/scripts/tests/copyright/in/file5.cpp000066400000000000000000000001151363734646600243310ustar00rootroot00000000000000/* * Copyright (C) 2012 - 2016 Intel Corporation * * No spdx header */ compute-runtime-20.13.16352/scripts/tests/copyright/in/file6.cpp000066400000000000000000000000621363734646600243330ustar00rootroot00000000000000#pragma once // header file with # in first line compute-runtime-20.13.16352/scripts/tests/copyright/in/file7.cpp000066400000000000000000000000701363734646600243330ustar00rootroot00000000000000#include // header file with # in first line compute-runtime-20.13.16352/scripts/tests/copyright/out/000077500000000000000000000000001363734646600230255ustar00rootroot00000000000000compute-runtime-20.13.16352/scripts/tests/copyright/out/file1.cpp000066400000000000000000000001641363734646600245320ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ /* * No copyright at all */ compute-runtime-20.13.16352/scripts/tests/copyright/out/file1.sh000066400000000000000000000001441363734646600243600ustar00rootroot00000000000000#!/bin/bash # # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # echo 123 compute-runtime-20.13.16352/scripts/tests/copyright/out/file2.cpp000066400000000000000000000001321363734646600245260ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ compute-runtime-20.13.16352/scripts/tests/copyright/out/file2.sh000066400000000000000000000001211363734646600243540ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # compute-runtime-20.13.16352/scripts/tests/copyright/out/file3.cpp000066400000000000000000000002011363734646600245240ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ // // This comment shouldn't be removed // compute-runtime-20.13.16352/scripts/tests/copyright/out/file3.sh000066400000000000000000000001651363734646600243650ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # # # This comment shouldn't be removed # compute-runtime-20.13.16352/scripts/tests/copyright/out/file4.cpp000066400000000000000000000002211363734646600245270ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ /* * No copyright at all */ #include "file.h" class C; compute-runtime-20.13.16352/scripts/tests/copyright/out/file4.sh000066400000000000000000000001761363734646600243700ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # # # No copyright at all # echo "file.h" exit 1 compute-runtime-20.13.16352/scripts/tests/copyright/out/file5.cpp000066400000000000000000000001321363734646600245310ustar00rootroot00000000000000/* * Copyright (C) 2012-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ compute-runtime-20.13.16352/scripts/tests/copyright/out/file6.cpp000066400000000000000000000002101363734646600245270ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once // header file with # in first line compute-runtime-20.13.16352/scripts/tests/copyright/out/file7.cpp000066400000000000000000000002161363734646600245360ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include // header file with # in first line compute-runtime-20.13.16352/scripts/tests/copyright/test.sh000077500000000000000000000011361363734646600235350ustar00rootroot00000000000000#!/bin/sh # # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # # # Simple, file based tests for copyright script # script return non-zero error code if something went wrong. # diff output is printed # script_directory=$(dirname "$0") python_interpreter="python3" if [[ "$OSTYPE" == "msys" ]]; then python_interpreter="python" fi $python_interpreter "${script_directory}/../../lint/set_copyright.py" "${script_directory}"/in/* for i in "${script_directory}"/in/* do fn=$(basename $i) diff -du "${script_directory}/in/${fn}" "${script_directory}/out/${fn}" done compute-runtime-20.13.16352/scripts/verify.bat000066400000000000000000000006751363734646600210500ustar00rootroot00000000000000REM Copyright (C) 2017-2020 Intel Corporation REM REM SPDX-License-Identifier: MIT REM @where appverif @if not "%ERRORLEVEL%"=="0" ( @echo No appverif command. cmd /c exit /b 0 set testError=0 goto end ) appverif.exe -enable Exceptions Handles Heaps Leak Locks Memory Threadpool TLS DirtyStacks -for %1 %* set testError=%errorlevel% echo App Verifier returned: %testError% appverif.exe -disable * -for * > nul :end exit /b %testError% compute-runtime-20.13.16352/shared/000077500000000000000000000000001363734646600166235ustar00rootroot00000000000000compute-runtime-20.13.16352/shared/offline_compiler/000077500000000000000000000000001363734646600221375ustar00rootroot00000000000000compute-runtime-20.13.16352/shared/offline_compiler/CMakeLists.txt000066400000000000000000000002441363734646600246770ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # add_subdirectory(source) set(CLOC_LIB_SRCS_LIB ${CLOC_LIB_SRCS_LIB} PARENT_SCOPE) compute-runtime-20.13.16352/shared/offline_compiler/source/000077500000000000000000000000001363734646600234375ustar00rootroot00000000000000compute-runtime-20.13.16352/shared/offline_compiler/source/CMakeLists.txt000066400000000000000000000200461363734646600262010ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # project(ocloc_lib) set(CLOC_LIB_SRCS_LIB ${NEO_SHARED_DIRECTORY}/device_binary_format/ar/ar.h ${NEO_SHARED_DIRECTORY}/device_binary_format/ar/ar_decoder.h ${NEO_SHARED_DIRECTORY}/device_binary_format/ar/ar_decoder.cpp ${NEO_SHARED_DIRECTORY}/device_binary_format/ar/ar_encoder.h ${NEO_SHARED_DIRECTORY}/device_binary_format/ar/ar_encoder.cpp ${NEO_SHARED_DIRECTORY}/device_binary_format/elf/elf.h ${NEO_SHARED_DIRECTORY}/device_binary_format/elf/elf_decoder.h ${NEO_SHARED_DIRECTORY}/device_binary_format/elf/elf_decoder.cpp ${NEO_SHARED_DIRECTORY}/device_binary_format/elf/elf_encoder.h ${NEO_SHARED_DIRECTORY}/device_binary_format/elf/elf_encoder.cpp ${NEO_SHARED_DIRECTORY}/device_binary_format/elf/ocl_elf.h ${NEO_SHARED_DIRECTORY}/helpers/abort.cpp ${NEO_SHARED_DIRECTORY}/helpers/debug_helpers.cpp ${NEO_SHARED_DIRECTORY}/helpers/file_io.cpp ${NEO_SHARED_DIRECTORY}/os_interface/os_library.h ${OCLOC_DIRECTORY}/source/decoder/binary_decoder.cpp ${OCLOC_DIRECTORY}/source/decoder/binary_decoder.h ${OCLOC_DIRECTORY}/source/decoder/binary_encoder.cpp ${OCLOC_DIRECTORY}/source/decoder/binary_encoder.h ${OCLOC_DIRECTORY}/source/decoder/helper.cpp ${OCLOC_DIRECTORY}/source/decoder/helper.h ${OCLOC_DIRECTORY}/source/decoder/iga_wrapper.h ${OCLOC_DIRECTORY}/source/decoder/translate_platform_base.h ${OCLOC_DIRECTORY}/source/ocloc_api.cpp ${OCLOC_DIRECTORY}/source/ocloc_api.h ${OCLOC_DIRECTORY}/source/ocloc_arg_helper.h ${OCLOC_DIRECTORY}/source/ocloc_arg_helper.cpp ${OCLOC_DIRECTORY}/source/ocloc_fatbinary.cpp ${OCLOC_DIRECTORY}/source/ocloc_fatbinary.h ${OCLOC_DIRECTORY}/source/offline_compiler_helper.cpp ${OCLOC_DIRECTORY}/source/offline_compiler.cpp ${OCLOC_DIRECTORY}/source/offline_compiler.h ${OCLOC_DIRECTORY}/source/multi_command.cpp ${OCLOC_DIRECTORY}/source/multi_command.h ${OCLOC_DIRECTORY}/source/offline_compiler_options.cpp ${OCLOC_DIRECTORY}/source/${BRANCH_DIR_SUFFIX}/extra_settings.cpp ${NEO_SHARED_DIRECTORY}/compiler_interface/compiler_options/compiler_options_base.cpp ${NEO_SHARED_DIRECTORY}/compiler_interface/create_main.cpp ${NEO_SHARED_DIRECTORY}/helpers/hw_info.cpp ${NEO_SOURCE_DIR}/opencl/source/platform/extensions.cpp ${NEO_SOURCE_DIR}/opencl/source/platform/extensions.h ) if(${IGA_HEADERS_AVAILABLE}) set(CLOC_LIB_SRCS_LIB ${CLOC_LIB_SRCS_LIB} ${OCLOC_DIRECTORY}/source/decoder/iga_wrapper.cpp ${OCLOC_DIRECTORY}/source/decoder${BRANCH_DIR_SUFFIX}/translate_platform.cpp ) else() set(CLOC_LIB_SRCS_LIB ${CLOC_LIB_SRCS_LIB} ${OCLOC_DIRECTORY}/source/decoder/iga_stubs.cpp ) endif() if(WIN32) list(APPEND CLOC_LIB_SRCS_LIB ${NEO_SHARED_DIRECTORY}/os_interface/windows/os_library_win.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/os_library_win.h ${NEO_SOURCE_DIR}/opencl/source/dll/windows/options_windows.cpp ) else() list(APPEND CLOC_LIB_SRCS_LIB ${NEO_SHARED_DIRECTORY}/os_interface/linux/os_library_linux.cpp ${NEO_SHARED_DIRECTORY}/os_interface/linux/os_library_linux.h ${NEO_SOURCE_DIR}/opencl/source/dll/linux/options_linux.cpp ) endif() string(REPLACE ";" "," ALL_SUPPORTED_PRODUCT_FAMILIES "${ALL_SUPPORTED_PRODUCT_FAMILY}") set(CLOC_LIB_LIB_FLAGS_DEFINITIONS -DCIF_HEADERS_ONLY_BUILD -DALL_SUPPORTED_PRODUCT_FAMILIES=${ALL_SUPPORTED_PRODUCT_FAMILIES} ) set(RUNTIME_GENX_CPP_FILES hw_info ) macro(macro_for_each_platform) list(APPEND CLOC_LIB_SRCS_LIB ${NEO_SOURCE_DIR}/opencl/source/${GEN_TYPE_LOWER}/hw_info_${PLATFORM_IT_LOWER}.inl) endmacro() macro(macro_for_each_gen) foreach(SRC_IT ${RUNTIME_GENX_CPP_FILES}) set(SRC_FILE ${NEO_SOURCE_DIR}/opencl/source/${GEN_TYPE_LOWER}/${SRC_IT}) if(EXISTS ${SRC_FILE}_${GEN_TYPE_LOWER}.cpp) list(APPEND CLOC_LIB_SRCS_LIB ${SRC_FILE}_${GEN_TYPE_LOWER}.cpp) endif() endforeach() apply_macro_for_each_platform() list(APPEND CLOC_LIB_SRCS_LIB ${NEO_SHARED_DIRECTORY}/${GEN_TYPE_LOWER}/enable_${GEN_TYPE_LOWER}.cpp) endmacro() apply_macro_for_each_gen("SUPPORTED") set(CLOC_LIB_SRCS ${CLOC_LIB_SRCS_LIB} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ) add_library(ocloc_lib SHARED ${CLOC_LIB_SRCS}) set(CLOC_SRCS ${NEO_SHARED_DIRECTORY}/os_interface/os_library.h ${OCLOC_DIRECTORY}/source/ocloc_wrapper.h ${OCLOC_DIRECTORY}/source/ocloc_wrapper.cpp ${OCLOC_DIRECTORY}/source/utilities/get_path.h ) if(WIN32) list(APPEND CLOC_SRCS ${NEO_SHARED_DIRECTORY}/os_interface/windows/os_library_win.cpp ${NEO_SHARED_DIRECTORY}/os_interface/windows/os_library_win.h ${OCLOC_DIRECTORY}/source/utilities/windows/get_path.cpp ) else() list(APPEND CLOC_SRCS ${NEO_SHARED_DIRECTORY}/os_interface/linux/os_library_linux.cpp ${NEO_SHARED_DIRECTORY}/os_interface/linux/os_library_linux.h ${OCLOC_DIRECTORY}/source/utilities/linux/get_path.cpp ) endif() add_executable(ocloc ${CLOC_SRCS} ${OCLOC_DIRECTORY}/source/main.cpp ${CLOC_LIB_SRCS}) add_subdirectories() create_project_source_tree(ocloc_lib) set(CLOC_LIB_INCLUDES ${ENGINE_NODE_DIR} ${IGC_OCL_ADAPTOR_DIR} ${CIF_BASE_DIR} ${NEO__GMM_INCLUDE_DIR} ${KHRONOS_HEADERS_DIR} ${NEO__IGC_INCLUDE_DIR} ) target_include_directories(ocloc_lib BEFORE PRIVATE ${CLOC_LIB_INCLUDES}) target_include_directories(ocloc_lib BEFORE PRIVATE ${IGA_INCLUDE_DIR}) target_compile_definitions(ocloc_lib PUBLIC ${CLOC_LIB_LIB_FLAGS_DEFINITIONS} ${SUPPORTED_GEN_FLAGS_DEFINITONS} DEFAULT_PLATFORM=${DEFAULT_SUPPORTED_PLATFORM} IGA_LIBRARY_NAME=${CMAKE_SHARED_LIBRARY_PREFIX}${IGA_LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) target_compile_definitions(ocloc_lib PUBLIC ${NEO__IGC_COMPILE_DEFINITIONS}) if(MSVC) target_link_libraries(ocloc_lib dbghelp) target_link_libraries(ocloc dbghelp) endif() if(UNIX) target_link_libraries(ocloc_lib dl pthread) target_link_libraries(ocloc dl pthread) endif() set(CLOC_LIB_SRCS_LIB ${CLOC_LIB_SRCS_LIB} PARENT_SCOPE) add_dependencies(ocloc ocloc_lib) target_include_directories(ocloc BEFORE PRIVATE ${CLOC_LIB_INCLUDES}) target_include_directories(ocloc BEFORE PRIVATE ${IGA_INCLUDE_DIR}) target_compile_definitions(ocloc PUBLIC ${CLOC_LIB_LIB_FLAGS_DEFINITIONS} ${SUPPORTED_GEN_FLAGS_DEFINITONS} DEFAULT_PLATFORM=${DEFAULT_SUPPORTED_PLATFORM} IGA_LIBRARY_NAME=${CMAKE_SHARED_LIBRARY_PREFIX}${IGA_LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX} MOCKABLE_VIRTUAL=) target_compile_definitions(ocloc PRIVATE ${NEO__IGC_COMPILE_DEFINITIONS}) set(OCLOC_LIB_NAME "ocloc") if(CMAKE_SIZEOF_VOID_P EQUAL 4) set(OCLOC_BITNESS_SUFFIX 32) elseif(CMAKE_SIZEOF_VOID_P EQUAL 8) set(OCLOC_BITNESS_SUFFIX 64) endif() if(UNIX) install(FILES $ DESTINATION ${CMAKE_INSTALL_BINDIR} PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE COMPONENT ocloc) set_property(GLOBAL APPEND PROPERTY NEO_OCL_COMPONENTS_LIST "ocloc") set_target_properties(ocloc_lib PROPERTIES OUTPUT_NAME ${OCLOC_LIB_NAME}) add_definitions(-DOCLOC_LIB_NAME="lib${OCLOC_LIB_NAME}.so") else() set_target_properties(ocloc_lib PROPERTIES OUTPUT_NAME "${OCLOC_LIB_NAME}${OCLOC_BITNESS_SUFFIX}") add_definitions(-DOCLOC_LIB_NAME="${OCLOC_LIB_NAME}${OCLOC_BITNESS_SUFFIX}.dll") endif() create_project_source_tree(ocloc) set_target_properties(ocloc PROPERTIES FOLDER "offline_compiler") set_target_properties(ocloc_lib PROPERTIES FOLDER "offline_compiler") add_custom_target(copy_compiler_files DEPENDS ${NEO__IGC_TARGETS}) set_target_properties(copy_compiler_files PROPERTIES FOLDER "opencl runtime") if(WIN32) foreach(TARGET_tmp ${NEO__IGC_TARGETS}) add_custom_command( TARGET copy_compiler_files PRE_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory $ COMMAND ${CMAKE_COMMAND} -E copy_if_different $ $ COMMAND ${CMAKE_COMMAND} -E make_directory $ COMMAND ${CMAKE_COMMAND} -E copy_if_different $ $ ) endforeach() endif() compute-runtime-20.13.16352/shared/offline_compiler/source/decoder/000077500000000000000000000000001363734646600250445ustar00rootroot00000000000000compute-runtime-20.13.16352/shared/offline_compiler/source/decoder/binary_decoder.cpp000066400000000000000000000531031363734646600305230ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/decoder/binary_decoder.h" #include "shared/offline_compiler/source/decoder/helper.h" #include "shared/offline_compiler/source/offline_compiler.h" #include "shared/source/device_binary_format/elf/elf_decoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/ptr_math.h" #include #include #include #ifdef _WIN32 #include #define MakeDirectory _mkdir #else #include #define MakeDirectory(dir) mkdir(dir, 0777) #endif template T readUnaligned(const void *ptr) { T retVal = 0; const uint8_t *tmp1 = reinterpret_cast(ptr); uint8_t *tmp2 = reinterpret_cast(&retVal); for (uint8_t i = 0; i < sizeof(T); ++i) { *(tmp2++) = *(tmp1++); } return retVal; } int BinaryDecoder::decode() { parseTokens(); std::stringstream ptmFile; auto devBinPtr = getDevBinary(); if (devBinPtr == nullptr) { argHelper->printf("Error! Device Binary section was not found.\n"); exit(1); } return processBinary(devBinPtr, ptmFile); } void BinaryDecoder::dumpField(const void *&binaryPtr, const PTField &field, std::ostream &ptmFile) { ptmFile << '\t' << static_cast(field.size) << ' '; switch (field.size) { case 1: { auto val = readUnaligned(binaryPtr); ptmFile << field.name << " " << +val << '\n'; break; } case 2: { auto val = readUnaligned(binaryPtr); ptmFile << field.name << " " << val << '\n'; break; } case 4: { auto val = readUnaligned(binaryPtr); ptmFile << field.name << " " << val << '\n'; break; } case 8: { auto val = readUnaligned(binaryPtr); ptmFile << field.name << " " << val << '\n'; break; } default: argHelper->printf("Error! Unknown size.\n"); exit(1); } binaryPtr = ptrOffset(binaryPtr, field.size); } const void *BinaryDecoder::getDevBinary() { binary = argHelper->readBinaryFile(binaryFile); const void *data = nullptr; std::string decoderErrors; std::string decoderWarnings; auto input = ArrayRef(reinterpret_cast(binary.data()), binary.size()); auto elf = NEO::Elf::decodeElf(input, decoderErrors, decoderWarnings); for (const auto §ionHeader : elf.sectionHeaders) { //Finding right section auto sectionData = ArrayRef(reinterpret_cast(sectionHeader.data.begin()), sectionHeader.data.size()); switch (sectionHeader.header->type) { case NEO::Elf::SHT_OPENCL_LLVM_BINARY: { argHelper->saveOutput(pathToDump + "llvm.bin", sectionData.begin(), sectionData.size()); break; } case NEO::Elf::SHT_OPENCL_SPIRV: { argHelper->saveOutput(pathToDump + "spirv.bin", sectionData.begin(), sectionData.size()); break; } case NEO::Elf::SHT_OPENCL_OPTIONS: { argHelper->saveOutput(pathToDump + "build.bin", sectionData.begin(), sectionData.size()); break; } case NEO::Elf::SHT_OPENCL_DEV_BINARY: { data = sectionData.begin(); break; } default: break; } } return data; } uint8_t BinaryDecoder::getSize(const std::string &typeStr) { if (typeStr == "uint8_t") { return 1; } else if (typeStr == "uint16_t") { return 2; } else if (typeStr == "uint32_t") { return 4; } else if (typeStr == "uint64_t") { return 8; } else { argHelper->printf("Unhandled type : %s\n", typeStr.c_str()); exit(1); } } std::vector BinaryDecoder::loadPatchList() { if (argHelper->hasHeaders()) { return argHelper->headersToVectorOfStrings(); } else { std::vector patchList; if (pathToPatch.empty()) { argHelper->printf("Path to patch list not provided - using defaults, skipping patchokens as undefined.\n"); patchList = { "struct SProgramBinaryHeader", "{", " uint32_t Magic;", " uint32_t Version;", " uint32_t Device;", " uint32_t GPUPointerSizeInBytes;", " uint32_t NumberOfKernels;", " uint32_t SteppingId;", " uint32_t PatchListSize;", "};", "", "struct SKernelBinaryHeader", "{", " uint32_t CheckSum;", " uint64_t ShaderHashCode;", " uint32_t KernelNameSize;", " uint32_t PatchListSize;", "};", "", "struct SKernelBinaryHeaderCommon :", " SKernelBinaryHeader", "{", " uint32_t KernelHeapSize;", " uint32_t GeneralStateHeapSize;", " uint32_t DynamicStateHeapSize;", " uint32_t SurfaceStateHeapSize;", " uint32_t KernelUnpaddedSize;", "};", "", "enum PATCH_TOKEN", "{", " PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_PROGRAM_BINARY_INFO, // 41 @SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo@", " PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO, // 42 @SPatchAllocateConstantMemorySurfaceProgramBinaryInfo@", "};", "struct SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo :", " SPatchItemHeader", "{", " uint32_t Type;", " uint32_t GlobalBufferIndex;", " uint32_t InlineDataSize;", "};", "struct SPatchAllocateConstantMemorySurfaceProgramBinaryInfo :", " SPatchItemHeader", "{", " uint32_t ConstantBufferIndex;", " uint32_t InlineDataSize;", "};", }; } else { readFileToVectorOfStrings(patchList, pathToPatch + "patch_list.h", true); readFileToVectorOfStrings(patchList, pathToPatch + "patch_shared.h", true); readFileToVectorOfStrings(patchList, pathToPatch + "patch_g7.h", true); readFileToVectorOfStrings(patchList, pathToPatch + "patch_g8.h", true); readFileToVectorOfStrings(patchList, pathToPatch + "patch_g9.h", true); readFileToVectorOfStrings(patchList, pathToPatch + "patch_g10.h", true); } return patchList; } } void BinaryDecoder::parseTokens() { //Creating patchlist definitions auto patchList = loadPatchList(); size_t pos = findPos(patchList, "struct SProgramBinaryHeader"); if (pos == patchList.size()) { argHelper->printf("While parsing patchtoken definitions: couldn't find SProgramBinaryHeader."); exit(1); } pos = findPos(patchList, "enum PATCH_TOKEN"); if (pos == patchList.size()) { argHelper->printf("While parsing patchtoken definitions: couldn't find enum PATCH_TOKEN."); exit(1); } pos = findPos(patchList, "struct SKernelBinaryHeader"); if (pos == patchList.size()) { argHelper->printf("While parsing patchtoken definitions: couldn't find SKernelBinaryHeader."); exit(1); } pos = findPos(patchList, "struct SKernelBinaryHeaderCommon :"); if (pos == patchList.size()) { argHelper->printf("While parsing patchtoken definitions: couldn't find SKernelBinaryHeaderCommon."); exit(1); } // Reading all Patch Tokens and according structs size_t patchTokenEnumPos = findPos(patchList, "enum PATCH_TOKEN"); if (patchTokenEnumPos == patchList.size()) { exit(1); } for (auto i = patchTokenEnumPos + 1; i < patchList.size(); ++i) { if (patchList[i].find("};") != std::string::npos) { break; } else if (patchList[i].find("PATCH_TOKEN") == std::string::npos) { continue; } else if (patchList[i].find("@") == std::string::npos) { continue; } size_t patchTokenNoStartPos, patchTokenNoEndPos; patchTokenNoStartPos = patchList[i].find('/') + 3; patchTokenNoEndPos = patchList[i].find(' ', patchTokenNoStartPos); std::stringstream patchTokenNoStream(patchList[i].substr(patchTokenNoStartPos, patchTokenNoEndPos - patchTokenNoStartPos)); int patchNo; patchTokenNoStream >> patchNo; auto patchTokenPtr = std::make_unique(); size_t nameStartPos, nameEndPos; nameStartPos = patchList[i].find("PATCH_TOKEN"); nameEndPos = patchList[i].find(',', nameStartPos); patchTokenPtr->name = patchList[i].substr(nameStartPos, nameEndPos - nameStartPos); nameStartPos = patchList[i].find("@"); nameEndPos = patchList[i].find('@', nameStartPos + 1); if (nameEndPos == std::string::npos) { continue; } std::string structName = "struct " + patchList[i].substr(nameStartPos + 1, nameEndPos - nameStartPos - 1) + " :"; size_t structPos = findPos(patchList, structName); if (structPos == patchList.size()) { continue; } patchTokenPtr->size = readStructFields(patchList, structPos + 1, patchTokenPtr->fields); patchTokens[static_cast(patchNo)] = std::move(patchTokenPtr); } //Finding and reading Program Binary Header size_t structPos = findPos(patchList, "struct SProgramBinaryHeader") + 1; programHeader.size = readStructFields(patchList, structPos, programHeader.fields); //Finding and reading Kernel Binary Header structPos = findPos(patchList, "struct SKernelBinaryHeader") + 1; kernelHeader.size = readStructFields(patchList, structPos, kernelHeader.fields); structPos = findPos(patchList, "struct SKernelBinaryHeaderCommon :") + 1; kernelHeader.size += readStructFields(patchList, structPos, kernelHeader.fields); } void BinaryDecoder::printHelp() { argHelper->printf(R"===(Disassembles Intel Compute GPU device binary files. Output of such operation is a set of files that can be later used to reassemble back a valid Intel Compute GPU device binary (using ocloc 'asm' command). This set of files contains: Program-scope data : - spirv.bin (optional) - spirV representation of the program from which the input binary was generated - build.bin - build options that were used when generating the input binary - PTM.txt - 'patch tokens' describing program-scope and kernel-scope metadata about the input binary Kernel-scope data ( is replaced by corresponding kernel's name): - _DynamicStateHeap.bin - initial DynamicStateHeap (binary file) - _SurfaceStateHeap.bin - initial SurfaceStateHeap (binary file) - _KernelHeap.asm - list of instructions describing the kernel function (text file) Usage: ocloc disasm -file [-patch ] [-dump ] [-device ] [-ignore_isa_padding] -file Input file to be disassembled. This file should be an Intel Compute GPU device binary. -patch Optional path to the directory containing patchtoken definitions (patchlist.h, etc.) as defined in intel-graphics-compiler (IGC) repo, IGC subdirectory : IGC/AdaptorOCL/ocl_igc_shared/executable_format By default (when patchtokens_dir is not provided) patchtokens won't be decoded. -dump Optional path for files representing decoded binary. Default is './dump'. -device Optional target device of input binary can be: %s By default ocloc will pick base device within a generation - i.e. both skl and kbl will fallback to skl. If specific product (e.g. kbl) is needed, provide it as device_type. -ignore_isa_padding Ignores Kernel Heap padding - Kernel Heap binary will be saved without padding. --help Print this usage message. Examples: Disassemble Intel Compute GPU device binary ocloc disasm -file source_file_Gen9core.bin )===", NEO::getDevicesTypes().c_str()); } int BinaryDecoder::processBinary(const void *&ptr, std::ostream &ptmFile) { ptmFile << "ProgramBinaryHeader:\n"; uint32_t numberOfKernels = 0, patchListSize = 0, device = 0; for (const auto &v : programHeader.fields) { if (v.name == "NumberOfKernels") { numberOfKernels = readUnaligned(ptr); } else if (v.name == "PatchListSize") { patchListSize = readUnaligned(ptr); } else if (v.name == "Device") { device = readUnaligned(ptr); } dumpField(ptr, v, ptmFile); } if (numberOfKernels == 0) { argHelper->printf("Warning! Number of Kernels is 0.\n"); } readPatchTokens(ptr, patchListSize, ptmFile); iga->setGfxCore(static_cast(device)); //Reading Kernels for (uint32_t i = 0; i < numberOfKernels; ++i) { ptmFile << "Kernel #" << i << '\n'; processKernel(ptr, ptmFile); } argHelper->saveOutput(pathToDump + "PTM.txt", ptmFile); return 0; } void BinaryDecoder::processKernel(const void *&ptr, std::ostream &ptmFile) { uint32_t KernelNameSize = 0, KernelPatchListSize = 0, KernelHeapSize = 0, KernelHeapUnpaddedSize = 0, GeneralStateHeapSize = 0, DynamicStateHeapSize = 0, SurfaceStateHeapSize = 0; ptmFile << "KernelBinaryHeader:\n"; for (const auto &v : kernelHeader.fields) { if (v.name == "PatchListSize") KernelPatchListSize = readUnaligned(ptr); else if (v.name == "KernelNameSize") KernelNameSize = readUnaligned(ptr); else if (v.name == "KernelHeapSize") KernelHeapSize = readUnaligned(ptr); else if (v.name == "KernelUnpaddedSize") KernelHeapUnpaddedSize = readUnaligned(ptr); else if (v.name == "GeneralStateHeapSize") GeneralStateHeapSize = readUnaligned(ptr); else if (v.name == "DynamicStateHeapSize") DynamicStateHeapSize = readUnaligned(ptr); else if (v.name == "SurfaceStateHeapSize") SurfaceStateHeapSize = readUnaligned(ptr); dumpField(ptr, v, ptmFile); } if (KernelNameSize == 0) { argHelper->printf("Error! KernelNameSize was 0.\n"); exit(1); } ptmFile << "\tKernelName "; std::string kernelName(static_cast(ptr), 0, KernelNameSize); ptmFile << kernelName << '\n'; ptr = ptrOffset(ptr, KernelNameSize); std::string fileName = pathToDump + kernelName + "_KernelHeap"; argHelper->printf("Trying to disassemble %s.krn\n", kernelName.c_str()); std::string disassembledKernel; if (iga->tryDisassembleGenISA(ptr, KernelHeapUnpaddedSize, disassembledKernel)) { argHelper->saveOutput(fileName + ".asm", disassembledKernel.data(), disassembledKernel.size()); } else { if (ignoreIsaPadding) { argHelper->saveOutput(fileName + ".dat", ptr, KernelHeapUnpaddedSize); } else { argHelper->saveOutput(fileName + ".dat", ptr, KernelHeapSize); } } ptr = ptrOffset(ptr, KernelHeapSize); if (GeneralStateHeapSize != 0) { argHelper->printf("Warning! GeneralStateHeapSize wasn't 0.\n"); fileName = pathToDump + kernelName + "_GeneralStateHeap.bin"; argHelper->saveOutput(fileName, ptr, DynamicStateHeapSize); ptr = ptrOffset(ptr, GeneralStateHeapSize); } fileName = pathToDump + kernelName + "_DynamicStateHeap.bin"; argHelper->saveOutput(fileName, ptr, DynamicStateHeapSize); ptr = ptrOffset(ptr, DynamicStateHeapSize); fileName = pathToDump + kernelName + "_SurfaceStateHeap.bin"; argHelper->saveOutput(fileName, ptr, SurfaceStateHeapSize); ptr = ptrOffset(ptr, SurfaceStateHeapSize); if (KernelPatchListSize == 0) { argHelper->printf("Warning! Kernel's patch list size was 0.\n"); } readPatchTokens(ptr, KernelPatchListSize, ptmFile); } void BinaryDecoder::readPatchTokens(const void *&patchListPtr, uint32_t patchListSize, std::ostream &ptmFile) { auto endPatchListPtr = ptrOffset(patchListPtr, patchListSize); while (patchListPtr != endPatchListPtr) { auto patchTokenPtr = patchListPtr; auto token = readUnaligned(patchTokenPtr); patchTokenPtr = ptrOffset(patchTokenPtr, sizeof(uint32_t)); auto Size = readUnaligned(patchTokenPtr); patchTokenPtr = ptrOffset(patchTokenPtr, sizeof(uint32_t)); if (patchTokens.count(token) > 0) { ptmFile << patchTokens[(token)]->name << ":\n"; } else { ptmFile << "Unidentified PatchToken:\n"; } ptmFile << '\t' << "4 Token " << token << '\n'; ptmFile << '\t' << "4 Size " << Size << '\n'; if (patchTokens.count(token) > 0) { uint32_t fieldsSize = 0; for (const auto &v : patchTokens[(token)]->fields) { if ((fieldsSize += static_cast(v.size)) > (Size - sizeof(uint32_t) * 2)) { break; } if (v.name == "InlineDataSize") { // Because InlineData field value is not added to PT size auto inlineDataSize = readUnaligned(patchTokenPtr); patchListPtr = ptrOffset(patchListPtr, inlineDataSize); } dumpField(patchTokenPtr, v, ptmFile); } } patchListPtr = ptrOffset(patchListPtr, Size); if (patchListPtr > patchTokenPtr) { ptmFile << "\tHex"; const uint8_t *byte = reinterpret_cast(patchTokenPtr); while (ptrDiff(patchListPtr, patchTokenPtr) != 0) { ptmFile << ' ' << std::hex << +*(byte++); patchTokenPtr = ptrOffset(patchTokenPtr, sizeof(uint8_t)); } ptmFile << std::dec << '\n'; } } } uint32_t BinaryDecoder::readStructFields(const std::vector &patchList, const size_t &structPos, std::vector &fields) { std::string typeStr, fieldName; uint8_t size; uint32_t fullSize = 0; size_t f1, f2; for (auto i = structPos; i < patchList.size(); ++i) { if (patchList[i].find("};") != std::string::npos) { break; } else if (patchList[i].find("int") == std::string::npos) { continue; } f1 = patchList[i].find_first_not_of(' '); f2 = patchList[i].find(' ', f1 + 1); typeStr = patchList[i].substr(f1, f2 - f1); size = getSize(typeStr); f1 = patchList[i].find_first_not_of(' ', f2); f2 = patchList[i].find(';'); fieldName = patchList[i].substr(f1, f2 - f1); fields.push_back(PTField{size, fieldName}); fullSize += size; } return fullSize; } int BinaryDecoder::validateInput(const std::vector &args) { if (args[args.size() - 1] == "-help") { printHelp(); return -1; } for (size_t argIndex = 2; argIndex < args.size(); ++argIndex) { const auto &currArg = args[argIndex]; const bool hasMoreArgs = (argIndex + 1 < args.size()); if ("-file" == currArg && hasMoreArgs) { binaryFile = args[++argIndex]; } else if ("-device" == currArg && hasMoreArgs) { iga->setProductFamily(getProductFamilyFromDeviceName(args[++argIndex])); } else if ("-patch" == currArg && hasMoreArgs) { pathToPatch = args[++argIndex]; addSlash(pathToPatch); } else if ("-dump" == currArg && hasMoreArgs) { pathToDump = args[++argIndex]; addSlash(pathToDump); } else if ("-ignore_isa_padding" == currArg) { ignoreIsaPadding = true; } else if ("-q" == currArg) { argHelper->getPrinterRef() = MessagePrinter(true); iga->setMessagePrinter(argHelper->getPrinterRef()); } else { argHelper->printf("Unknown argument %s\n", currArg.c_str()); printHelp(); return -1; } } if (binaryFile.find(".bin") == std::string::npos) { argHelper->printf(".bin extension is expected for binary file.\n"); printHelp(); return -1; } if (false == iga->isKnownPlatform()) { argHelper->printf("Warning : missing or invalid -device parameter - results may be inacurate\n"); } if (!argHelper->outputEnabled()) { if (pathToDump.empty()) { argHelper->printf("Warning : Path to dump folder not specificed - using ./dump as default.\n"); pathToDump = std::string("dump/"); } MakeDirectory(pathToDump.c_str()); } return 0; } compute-runtime-20.13.16352/shared/offline_compiler/source/decoder/binary_decoder.h000066400000000000000000000037311363734646600301720ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/offline_compiler/source/decoder/helper.h" #include "shared/offline_compiler/source/decoder/iga_wrapper.h" #include "shared/offline_compiler/source/ocloc_arg_helper.h" #include #include #include #include struct PTField { uint8_t size = 0U; std::string name; }; struct BinaryHeader { std::vector fields; uint32_t size = 0U; }; struct PatchToken : BinaryHeader { std::string name; }; using PTMap = std::unordered_map>; class BinaryDecoder { public: BinaryDecoder(const std::string &file, const std::string &patch, const std::string &dump) : binaryFile(file), pathToPatch(patch), pathToDump(dump){}; BinaryDecoder(OclocArgHelper *helper) : argHelper(helper), iga(new IgaWrapper) { iga->setMessagePrinter(argHelper->getPrinterRef()); }; int decode(); int validateInput(const std::vector &args); protected: OclocArgHelper *argHelper = nullptr; bool ignoreIsaPadding = false; BinaryHeader programHeader, kernelHeader; std::vector binary; std::unique_ptr iga; PTMap patchTokens; std::string binaryFile, pathToPatch, pathToDump; void dumpField(const void *&binaryPtr, const PTField &field, std::ostream &ptmFile); uint8_t getSize(const std::string &typeStr); const void *getDevBinary(); std::vector loadPatchList(); void parseTokens(); void printHelp(); int processBinary(const void *&ptr, std::ostream &ptmFile); void processKernel(const void *&ptr, std::ostream &ptmFile); void readPatchTokens(const void *&patchListPtr, uint32_t patchListSize, std::ostream &ptmFile); uint32_t readStructFields(const std::vector &patchList, const size_t &structPos, std::vector &fields); }; compute-runtime-20.13.16352/shared/offline_compiler/source/decoder/binary_encoder.cpp000066400000000000000000000410211363734646600305310ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "binary_encoder.h" #include "shared/offline_compiler/source/offline_compiler.h" #include "shared/source/device_binary_format/elf/elf_encoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/hash.h" #include "CL/cl.h" #include "helper.h" #include #include #include #include void BinaryEncoder::calculatePatchListSizes(std::vector &ptmFile) { size_t patchListPos = 0; for (size_t i = 0; i < ptmFile.size(); ++i) { if (ptmFile[i].find("PatchListSize") != std::string::npos) { patchListPos = i; } else if (ptmFile[i].find("PATCH_TOKEN") != std::string::npos) { uint32_t calcSize = 0; i++; while (i < ptmFile.size() && ptmFile[i].find("Kernel #") == std::string::npos) { if (ptmFile[i].find(':') == std::string::npos) { if (ptmFile[i].find("Hex") != std::string::npos) { calcSize += static_cast(std::count(ptmFile[i].begin(), ptmFile[i].end(), ' ')); } else { calcSize += std::atoi(&ptmFile[i][1]); } } i++; } uint32_t size = static_cast(std::stoul(ptmFile[patchListPos].substr(ptmFile[patchListPos].find_last_of(' ') + 1))); if (size != calcSize) { argHelper->printf("Warning! Calculated PatchListSize ( %u ) differs from file ( %u ) - changing it. Line %d\n", calcSize, size, static_cast(patchListPos + 1)); ptmFile[patchListPos] = ptmFile[patchListPos].substr(0, ptmFile[patchListPos].find_last_of(' ') + 1); ptmFile[patchListPos] += std::to_string(calcSize); } } } } bool BinaryEncoder::copyBinaryToBinary(const std::string &srcFileName, std::ostream &outBinary, uint32_t *binaryLength) { if (argHelper->fileExists(srcFileName)) { return false; } auto binary = argHelper->readBinaryFile(srcFileName); auto length = binary.size(); outBinary.write(binary.data(), length); if (binaryLength) { *binaryLength = static_cast(length); } return true; } int BinaryEncoder::createElf(std::stringstream &deviceBinary) { NEO::Elf::ElfEncoder ElfEncoder; ElfEncoder.getElfFileHeader().type = NEO::Elf::ET_OPENCL_EXECUTABLE; //Build Options if (argHelper->fileExists(pathToDump + "build.bin")) { auto binary = argHelper->readBinaryFile(pathToDump + "build.bin"); ElfEncoder.appendSection(NEO::Elf::SHT_OPENCL_OPTIONS, "BuildOptions", ArrayRef(reinterpret_cast(binary.data()), binary.size())); } else { argHelper->printf("Warning! Missing build section.\n"); } //LLVM or SPIRV if (argHelper->fileExists(pathToDump + "llvm.bin")) { auto binary = argHelper->readBinaryFile(pathToDump + "llvm.bin"); ElfEncoder.appendSection(NEO::Elf::SHT_OPENCL_LLVM_BINARY, "Intel(R) OpenCL LLVM Object", ArrayRef(reinterpret_cast(binary.data()), binary.size())); } else if (argHelper->fileExists(pathToDump + "spirv.bin")) { auto binary = argHelper->readBinaryFile(pathToDump + "spirv.bin"); ElfEncoder.appendSection(NEO::Elf::SHT_OPENCL_SPIRV, "SPIRV Object", ArrayRef(reinterpret_cast(binary.data()), binary.size())); } else { argHelper->printf("Warning! Missing llvm/spirv section.\n"); } //Device Binary auto deviceBinaryStr = deviceBinary.str(); std::vector binary(deviceBinaryStr.begin(), deviceBinaryStr.end()); ElfEncoder.appendSection(NEO::Elf::SHT_OPENCL_DEV_BINARY, "Intel(R) OpenCL Device Binary", ArrayRef(reinterpret_cast(binary.data()), binary.size())); //Resolve Elf Binary auto elfBinary = ElfEncoder.encode(); argHelper->saveOutput(elfName, elfBinary.data(), elfBinary.size()); return 0; } void BinaryEncoder::printHelp() { argHelper->printf(R"===(Assembles Intel Compute GPU device binary from input files. It's expected that input files were previously generated by 'ocloc disasm' command or are compatible with 'ocloc disasm' output (especially in terms of file naming scheme). See 'ocloc disasm --help' for additional info. Usage: ocloc asm -out [-dump ] [-device ] [-ignore_isa_padding] -out Filename for newly assembled binary. -dump Path to the input directory containing disassembled binary (as disassembled by ocloc's disasm command). Default is './dump'. -device Optional target device of output binary can be: %s By default ocloc will pick base device within a generation - i.e. both skl and kbl will fallback to skl. If specific product (e.g. kbl) is needed, provide it as device_type. -ignore_isa_padding Ignores Kernel Heap padding - padding will not be added to Kernel Heap binary. --help Print this usage message. Examples: Assemble to Intel Compute GPU device binary ocloc asm -out reassembled.bin )===", NEO::getDevicesTypes().c_str()); } int BinaryEncoder::encode() { std::vector ptmFile; if (!argHelper->fileExists(pathToDump + "PTM.txt")) { argHelper->printf("Error! Couldn't find PTM.txt"); return -1; } argHelper->readFileToVectorOfStrings(pathToDump + "PTM.txt", ptmFile); calculatePatchListSizes(ptmFile); std::stringstream deviceBinary; //(pathToDump + "device_binary.bin", std::ios::binary); int retVal = processBinary(ptmFile, deviceBinary); argHelper->saveOutput(pathToDump + "device_binary.bin", deviceBinary.str().c_str(), deviceBinary.str().length()); if (retVal != 0) { return retVal; } retVal = createElf(deviceBinary); return retVal; } int BinaryEncoder::processBinary(const std::vector &ptmFileLines, std::ostream &deviceBinary) { if (false == iga->isKnownPlatform()) { auto deviceMarker = findPos(ptmFileLines, "Device"); if (deviceMarker != ptmFileLines.size()) { std::stringstream ss(ptmFileLines[deviceMarker]); ss.ignore(32, ' '); ss.ignore(32, ' '); uint32_t gfxCore = 0; ss >> gfxCore; iga->setGfxCore(static_cast(gfxCore)); } } size_t i = 0; while (i < ptmFileLines.size()) { if (ptmFileLines[i].find("Kernel #") != std::string::npos) { if (processKernel(++i, ptmFileLines, deviceBinary)) { argHelper->printf("Warning while processing kernel!\n"); return -1; } } else if (writeDeviceBinary(ptmFileLines[i++], deviceBinary)) { argHelper->printf("Error while writing to binary!\n"); return -1; } } return 0; } void BinaryEncoder::addPadding(std::ostream &out, size_t numBytes) { for (size_t i = 0; i < numBytes; ++i) { const char nullByte = 0; out.write(&nullByte, 1U); } } int BinaryEncoder::processKernel(size_t &line, const std::vector &ptmFileLines, std::ostream &deviceBinary) { auto kernelInfoBeginMarker = line; auto kernelInfoEndMarker = ptmFileLines.size(); auto kernelNameMarker = ptmFileLines.size(); auto kernelPatchtokensMarker = ptmFileLines.size(); std::stringstream kernelBlob; // Normally these are added by the compiler, need to take or of them when reassembling constexpr size_t isaPaddingSizeInBytes = 128; constexpr uint32_t kernelHeapAlignmentInBytes = 64; uint32_t kernelNameSizeInBinary = 0; std::string kernelName; // Scan PTM lines for kernel info while (line < ptmFileLines.size()) { if (ptmFileLines[line].find("KernelName ") != std::string::npos) { kernelName = std::string(ptmFileLines[line], ptmFileLines[line].find(' ') + 1); kernelNameMarker = line; kernelPatchtokensMarker = kernelNameMarker + 1; // patchtokens come after name } else if (ptmFileLines[line].find("KernelNameSize") != std::string::npos) { std::stringstream ss(ptmFileLines[line]); ss.ignore(32, ' '); ss.ignore(32, ' '); ss >> kernelNameSizeInBinary; } else if (ptmFileLines[line].find("Kernel #") != std::string::npos) { kernelInfoEndMarker = line; break; } ++line; } // Write KernelName and padding kernelBlob.write(kernelName.c_str(), kernelName.size()); addPadding(kernelBlob, kernelNameSizeInBinary - kernelName.size()); // Write KernelHeap and padding uint32_t kernelHeapSizeUnpadded = 0U; bool heapsCopiedSuccesfully = true; // Use .asm if available, fallback to .dat if (argHelper->fileExists(pathToDump + kernelName + "_KernelHeap.asm")) { auto kernelAsAsm = argHelper->readBinaryFile(pathToDump + kernelName + "_KernelHeap.asm"); std::string kernelAsBinary; argHelper->printf("Trying to assemble %s.asm\n", kernelName.c_str()); if (false == iga->tryAssembleGenISA(std::string(kernelAsAsm.begin(), kernelAsAsm.end()), kernelAsBinary)) { argHelper->printf("Error : Could not assemble : %s\n", kernelName.c_str()); return -1; } kernelHeapSizeUnpadded = static_cast(kernelAsBinary.size()); kernelBlob.write(kernelAsBinary.data(), kernelAsBinary.size()); } else { heapsCopiedSuccesfully = copyBinaryToBinary(pathToDump + kernelName + "_KernelHeap.dat", kernelBlob, &kernelHeapSizeUnpadded); } uint32_t kernelHeapSize = 0U; // Adding padding and alignment if (ignoreIsaPadding) { kernelHeapSize = kernelHeapSizeUnpadded; } else { addPadding(kernelBlob, isaPaddingSizeInBytes); const uint32_t kernelHeapPaddedSize = kernelHeapSizeUnpadded + isaPaddingSizeInBytes; kernelHeapSize = alignUp(kernelHeapPaddedSize, kernelHeapAlignmentInBytes); addPadding(kernelBlob, kernelHeapSize - kernelHeapPaddedSize); } // Write GeneralStateHeap, DynamicStateHeap, SurfaceStateHeap if (argHelper->fileExists(pathToDump + kernelName + "_GeneralStateHeap.bin")) { heapsCopiedSuccesfully = heapsCopiedSuccesfully && copyBinaryToBinary(pathToDump + kernelName + "_GeneralStateHeap.bin", kernelBlob); } heapsCopiedSuccesfully = heapsCopiedSuccesfully && copyBinaryToBinary(pathToDump + kernelName + "_DynamicStateHeap.bin", kernelBlob); heapsCopiedSuccesfully = heapsCopiedSuccesfully && copyBinaryToBinary(pathToDump + kernelName + "_SurfaceStateHeap.bin", kernelBlob); if (false == heapsCopiedSuccesfully) { return -1; } // Write kernel patchtokens for (size_t i = kernelPatchtokensMarker; i < kernelInfoEndMarker; ++i) { if (writeDeviceBinary(ptmFileLines[i], kernelBlob)) { argHelper->printf("Error while writing to binary.\n"); return -1; } } auto kernelBlobData = kernelBlob.str(); uint64_t hashValue = NEO::Hash::hash(reinterpret_cast(kernelBlobData.data()), kernelBlobData.size()); uint32_t calcCheckSum = hashValue & 0xFFFFFFFF; // Add kernel header for (size_t i = kernelInfoBeginMarker; i < kernelNameMarker; ++i) { if (ptmFileLines[i].find("CheckSum") != std::string::npos) { static_assert(std::is_same::value, ""); deviceBinary.write(reinterpret_cast(&calcCheckSum), sizeof(uint32_t)); } else if (ptmFileLines[i].find("KernelHeapSize") != std::string::npos) { static_assert(sizeof(kernelHeapSize) == sizeof(uint32_t), ""); deviceBinary.write(reinterpret_cast(&kernelHeapSize), sizeof(uint32_t)); } else if (ptmFileLines[i].find("KernelUnpaddedSize") != std::string::npos) { static_assert(sizeof(kernelHeapSizeUnpadded) == sizeof(uint32_t), ""); deviceBinary.write(reinterpret_cast(&kernelHeapSizeUnpadded), sizeof(uint32_t)); } else { if (writeDeviceBinary(ptmFileLines[i], deviceBinary)) { argHelper->printf("Error while writing to binary.\n"); return -1; } } } // Add kernel blob after the header deviceBinary.write(kernelBlobData.c_str(), kernelBlobData.size()); return 0; } int BinaryEncoder::validateInput(const std::vector &args) { if ("-help" == args[args.size() - 1]) { printHelp(); return -1; } for (size_t argIndex = 2; argIndex < args.size(); ++argIndex) { const auto &currArg = args[argIndex]; const bool hasMoreArgs = (argIndex + 1 < args.size()); if ("-dump" == currArg && hasMoreArgs) { pathToDump = args[++argIndex]; addSlash(pathToDump); } else if ("-device" == currArg && hasMoreArgs) { iga->setProductFamily(getProductFamilyFromDeviceName(args[++argIndex])); } else if ("-out" == currArg && hasMoreArgs) { elfName = args[++argIndex]; } else if ("-ignore_isa_padding" == currArg) { ignoreIsaPadding = true; } else if ("-q" == currArg) { argHelper->getPrinterRef() = MessagePrinter(true); iga->setMessagePrinter(argHelper->getPrinterRef()); } else { argHelper->printf("Unknown argument %s\n", currArg.c_str()); printHelp(); return -1; } } if (pathToDump.empty()) { if (!argHelper->outputEnabled()) { argHelper->printf("Warning : Path to dump folder not specificed - using ./dump as default.\n"); pathToDump = "dump"; addSlash(pathToDump); } } if (elfName.find(".bin") == std::string::npos) { argHelper->printf(".bin extension is expected for binary file.\n"); printHelp(); return -1; } if (false == iga->isKnownPlatform()) { argHelper->printf("Warning : missing or invalid -device parameter - results may be inacurate\n"); } return 0; } template void BinaryEncoder::write(std::stringstream &in, std::ostream &deviceBinary) { T val; in >> val; deviceBinary.write(reinterpret_cast(&val), sizeof(T)); } template <> void BinaryEncoder::write(std::stringstream &in, std::ostream &deviceBinary) { uint8_t val; uint16_t help; in >> help; val = static_cast(help); deviceBinary.write(reinterpret_cast(&val), sizeof(uint8_t)); } template void BinaryEncoder::write(std::stringstream &in, std::ostream &deviceBinary); template void BinaryEncoder::write(std::stringstream &in, std::ostream &deviceBinary); template void BinaryEncoder::write(std::stringstream &in, std::ostream &deviceBinary); int BinaryEncoder::writeDeviceBinary(const std::string &line, std::ostream &deviceBinary) { if (line.find(':') != std::string::npos) { return 0; } else if (line.find("Hex") != std::string::npos) { std::stringstream ss(line); ss.ignore(32, ' '); uint16_t tmp; uint8_t byte; while (!ss.eof()) { ss >> std::hex >> tmp; byte = static_cast(tmp); deviceBinary.write(reinterpret_cast(&byte), sizeof(uint8_t)); } } else { std::stringstream ss(line); uint16_t size; std::string name; ss >> size; ss >> name; switch (size) { case 1: write(ss, deviceBinary); break; case 2: write(ss, deviceBinary); break; case 4: write(ss, deviceBinary); break; case 8: write(ss, deviceBinary); break; default: argHelper->printf("Unknown size in line: %s\n", line.c_str()); return -1; } } return 0; } compute-runtime-20.13.16352/shared/offline_compiler/source/decoder/binary_encoder.h000066400000000000000000000032051363734646600302000ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/offline_compiler/source/ocloc_arg_helper.h" #include "helper.h" #include "iga_wrapper.h" #include #include #include class BinaryEncoder { public: BinaryEncoder(const std::string &dump, const std::string &elf) : pathToDump(dump), elfName(elf){}; BinaryEncoder(OclocArgHelper *helper) : argHelper(helper), iga(new IgaWrapper) { iga->setMessagePrinter(argHelper->getPrinterRef()); } int encode(); int validateInput(const std::vector &args); protected: OclocArgHelper *argHelper = nullptr; bool ignoreIsaPadding = false; std::string pathToDump, elfName; std::unique_ptr iga; void calculatePatchListSizes(std::vector &ptmFile); MOCKABLE_VIRTUAL bool copyBinaryToBinary(const std::string &srcFileName, std::ostream &outBinary, uint32_t *binaryLength); bool copyBinaryToBinary(const std::string &srcFileName, std::ostream &outBinary) { return copyBinaryToBinary(srcFileName, outBinary, nullptr); } int createElf(std::stringstream &deviceBinary); void printHelp(); int processBinary(const std::vector &ptmFile, std::ostream &deviceBinary); int processKernel(size_t &i, const std::vector &ptmFileLines, std::ostream &deviceBinary); template void write(std::stringstream &in, std::ostream &deviceBinary); int writeDeviceBinary(const std::string &line, std::ostream &deviceBinary); void addPadding(std::ostream &out, size_t numBytes); }; compute-runtime-20.13.16352/shared/offline_compiler/source/decoder/helper.cpp000066400000000000000000000050251363734646600270310ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/os_library.h" #include "opencl/source/os_interface/os_inc_base.h" #include "igfxfmid.h" #include #include void addSlash(std::string &path) { if (!path.empty()) { auto lastChar = *path.rbegin(); if ((lastChar != '/') && (lastChar != '\\')) { path.append("/"); } } } std::vector readBinaryFile(const std::string &fileName) { std::ifstream file(fileName, std::ios_base::binary); if (file.good()) { size_t length; file.seekg(0, file.end); length = static_cast(file.tellg()); file.seekg(0, file.beg); std::vector binary(length); file.read(binary.data(), length); return binary; } else { printf("Error! Couldn't open %s\n", fileName.c_str()); exit(1); } } void readFileToVectorOfStrings(std::vector &lines, const std::string &fileName, bool replaceTabs) { std::ifstream file(fileName); if (file.good()) { if (replaceTabs) { for (std::string line; std::getline(file, line);) { std::replace_if( line.begin(), line.end(), [](auto c) { return c == '\t'; }, ' '); lines.push_back(std::move(line)); } } else { for (std::string line; std::getline(file, line);) { lines.push_back(std::move(line)); } } } } size_t findPos(const std::vector &lines, const std::string &whatToFind) { for (size_t i = 0; i < lines.size(); ++i) { auto it = lines[i].find(whatToFind); if (it != std::string::npos) { if (it + whatToFind.size() == lines[i].size()) { return i; } char delimiter = lines[i][it + whatToFind.size()]; if ((delimiter == ' ') || (delimiter = '\t') || (delimiter = '\n') || (delimiter = '\r')) { return i; } } } return lines.size(); } PRODUCT_FAMILY getProductFamilyFromDeviceName(const std::string &deviceName) { for (unsigned int productId = 0; productId < IGFX_MAX_PRODUCT; ++productId) { if (NEO::hardwarePrefix[productId] != nullptr && deviceName == NEO::hardwarePrefix[productId]) { return static_cast(productId); } } return IGFX_UNKNOWN; } compute-runtime-20.13.16352/shared/offline_compiler/source/decoder/helper.h000066400000000000000000000034201363734646600264730ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/os_library.h" #include "igfxfmid.h" #include #include #include #include #include void addSlash(std::string &path); std::vector readBinaryFile(const std::string &fileName); void readFileToVectorOfStrings(std::vector &lines, const std::string &fileName, bool replaceTabs = false); size_t findPos(const std::vector &lines, const std::string &whatToFind); PRODUCT_FAMILY getProductFamilyFromDeviceName(const std::string &deviceName); class MessagePrinter { public: MessagePrinter() = default; MessagePrinter(bool suppressMessages) : suppressMessages(suppressMessages) {} void printf(const char *message) { if (!suppressMessages) { ::printf("%s", message); } ss << std::string(message); } template void printf(const char *format, Args... args) { if (!suppressMessages) { ::printf(format, std::forward(args)...); } ss << stringFormat(format, std::forward(args)...); } const std::ostream &getLog() { return ss; } private: template std::string stringFormat(const std::string &format, Args... args) { std::string outputString; size_t size = static_cast(snprintf(nullptr, 0, format.c_str(), args...) + 1); if (size <= 0) { return outputString; } outputString.resize(size); snprintf(&*outputString.begin(), size, format.c_str(), args...); return outputString; } std::stringstream ss; bool suppressMessages = false; }; compute-runtime-20.13.16352/shared/offline_compiler/source/decoder/iga_stubs.cpp000066400000000000000000000016541363734646600275360ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "helper.h" #include "iga_wrapper.h" struct IgaWrapper::Impl { }; IgaWrapper::IgaWrapper() = default; IgaWrapper::~IgaWrapper() = default; bool IgaWrapper::tryDisassembleGenISA(const void *kernelPtr, uint32_t kernelSize, std::string &out) { messagePrinter->printf("Warning: ocloc built without support for IGA - kernel binaries won't be disassembled.\n"); return false; } bool IgaWrapper::tryAssembleGenISA(const std::string &inAsm, std::string &outBinary) { messagePrinter->printf("Warning: ocloc built without support for IGA - kernel binaries won't be assembled.\n"); return false; } bool IgaWrapper::tryLoadIga() { return false; } void IgaWrapper::setGfxCore(GFXCORE_FAMILY core) { } void IgaWrapper::setProductFamily(PRODUCT_FAMILY product) { } bool IgaWrapper::isKnownPlatform() const { return false; } compute-runtime-20.13.16352/shared/offline_compiler/source/decoder/iga_wrapper.cpp000066400000000000000000000142311363734646600300510ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "iga_wrapper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/os_library.h" #include "opencl/source/os_interface/os_inc_base.h" #include "helper.h" #include "igfxfmid.h" #include "translate_platform_base.h" #include struct IgaLibrary { pIGAAssemble assemble = nullptr; pIGAContextCreate contextCreate = nullptr; pIGAContextGetErrors contextGetErrors = nullptr; pIGAContextGetWarnings contextGetWarnings = nullptr; pIGAContextRelease contextRelease = nullptr; pIGADisassemble disassemble = nullptr; pIGAStatusToString statusToString = nullptr; iga_context_options_t OptsContext = {}; std::unique_ptr library; bool isLoaded() { return library != nullptr; } }; struct IgaWrapper::Impl { iga_gen_t igaGen = IGA_GEN_INVALID; IgaLibrary igaLib; void loadIga() { IgaLibrary iga; iga.OptsContext.cb = sizeof(igaLib.OptsContext); iga.OptsContext.gen = igaGen; #define STR2(X) #X #define STR(X) STR2(X) iga.library.reset(NEO::OsLibrary::load(STR(IGA_LIBRARY_NAME))); if (iga.library == nullptr) { return; } #define LOAD_OR_ERROR(MEMBER, FUNC_NAME) \ if (nullptr == (iga.MEMBER = reinterpret_cast(iga.library->getProcAddress(FUNC_NAME)))) { \ printf("Warning : Couldn't find %s in %s\n", FUNC_NAME, STR(IGA_LIBRARY_NAME)); \ return; \ } LOAD_OR_ERROR(assemble, IGA_ASSEMBLE_STR); LOAD_OR_ERROR(contextCreate, IGA_CONTEXT_CREATE_STR); LOAD_OR_ERROR(contextGetErrors, IGA_CONTEXT_GET_ERRORS_STR); LOAD_OR_ERROR(contextGetWarnings, IGA_CONTEXT_GET_WARNINGS_STR); LOAD_OR_ERROR(contextRelease, IGA_CONTEXT_RELEASE_STR); LOAD_OR_ERROR(disassemble, IGA_DISASSEMBLE_STR); LOAD_OR_ERROR(statusToString, IGA_STATUS_TO_STRING_STR); #undef LOAD_OR_ERROR #undef STR #undef STR2 this->igaLib = std::move(iga); } }; IgaWrapper::IgaWrapper() : pimpl(std::make_unique()) { } IgaWrapper::~IgaWrapper() = default; bool IgaWrapper::tryDisassembleGenISA(const void *kernelPtr, uint32_t kernelSize, std::string &out) { if (false == tryLoadIga()) { messagePrinter->printf("Warning: couldn't load iga - kernel binaries won't be disassembled.\n"); return false; } iga_context_t context; iga_disassemble_options_t disassembleOptions = IGA_DISASSEMBLE_OPTIONS_INIT(); iga_status_t stat; stat = pimpl->igaLib.contextCreate(&pimpl->igaLib.OptsContext, &context); if (stat != 0) { messagePrinter->printf("Error while creating IGA Context! Error msg: %s", pimpl->igaLib.statusToString(stat)); return false; } char kernelText = '\0'; char *pKernelText = &kernelText; stat = pimpl->igaLib.disassemble(context, &disassembleOptions, kernelPtr, kernelSize, nullptr, nullptr, &pKernelText); if (stat != 0) { messagePrinter->printf("Error while disassembling with IGA!\nStatus msg: %s\n", pimpl->igaLib.statusToString(stat)); const iga_diagnostic_t *errors; uint32_t size = 100; pimpl->igaLib.contextGetErrors(context, &errors, &size); if (errors != nullptr) { messagePrinter->printf("Errors: %s\n", errors->message); } pimpl->igaLib.contextRelease(context); return false; } const iga_diagnostic_t *warnings; uint32_t warningsSize = 100; pimpl->igaLib.contextGetWarnings(context, &warnings, &warningsSize); if (warningsSize > 0 && warnings != nullptr) { messagePrinter->printf("Warnings: %s\n", warnings->message); } out = pKernelText; pimpl->igaLib.contextRelease(context); return true; } bool IgaWrapper::tryAssembleGenISA(const std::string &inAsm, std::string &outBinary) { if (false == tryLoadIga()) { messagePrinter->printf("Warning: couldn't load iga - kernel binaries won't be assembled.\n"); return false; } iga_context_t context; iga_status_t stat; iga_assemble_options_t assembleOptions = IGA_ASSEMBLE_OPTIONS_INIT(); stat = pimpl->igaLib.contextCreate(&pimpl->igaLib.OptsContext, &context); if (stat != 0) { messagePrinter->printf("Error while creating IGA Context! Error msg: %s", pimpl->igaLib.statusToString(stat)); return false; } uint32_t size = 0; void *pOutput = nullptr; stat = pimpl->igaLib.assemble(context, &assembleOptions, inAsm.c_str(), &pOutput, &size); if (stat != 0) { messagePrinter->printf("Error while assembling with IGA!\nStatus msg: %s\n", pimpl->igaLib.statusToString(stat)); const iga_diagnostic_t *errors; uint32_t size = 100; pimpl->igaLib.contextGetErrors(context, &errors, &size); if (errors != nullptr) { messagePrinter->printf("Errors: %s\n", errors->message); } pimpl->igaLib.contextRelease(context); return false; } const iga_diagnostic_t *warnings; uint32_t context_size; pimpl->igaLib.contextGetWarnings(context, &warnings, &context_size); if (context_size > 0 && warnings != nullptr) { messagePrinter->printf("Warnings: %s\n", warnings->message); } outBinary.assign(reinterpret_cast(pOutput), reinterpret_cast(pOutput) + size); pimpl->igaLib.contextRelease(context); return true; } bool IgaWrapper::tryLoadIga() { if (false == pimpl->igaLib.isLoaded()) { pimpl->loadIga(); } return pimpl->igaLib.isLoaded(); } void IgaWrapper::setGfxCore(GFXCORE_FAMILY core) { if (pimpl->igaGen == IGA_GEN_INVALID) { pimpl->igaGen = translateToIgaGen(core); } } void IgaWrapper::setProductFamily(PRODUCT_FAMILY product) { if (pimpl->igaGen == IGA_GEN_INVALID) { pimpl->igaGen = translateToIgaGen(product); } } bool IgaWrapper::isKnownPlatform() const { return pimpl->igaGen != IGA_GEN_INVALID; } compute-runtime-20.13.16352/shared/offline_compiler/source/decoder/iga_wrapper.h000066400000000000000000000021261363734646600275160ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "igfxfmid.h" #include #include class MessagePrinter; struct IgaWrapper { IgaWrapper(); MOCKABLE_VIRTUAL ~IgaWrapper(); IgaWrapper(IgaWrapper &) = delete; IgaWrapper(const IgaWrapper &&) = delete; IgaWrapper &operator=(const IgaWrapper &) = delete; IgaWrapper &operator=(IgaWrapper &&) = delete; MOCKABLE_VIRTUAL bool tryDisassembleGenISA(const void *kernelPtr, uint32_t kernelSize, std::string &out); MOCKABLE_VIRTUAL bool tryAssembleGenISA(const std::string &inAsm, std::string &outBinary); MOCKABLE_VIRTUAL void setGfxCore(GFXCORE_FAMILY core); MOCKABLE_VIRTUAL void setProductFamily(PRODUCT_FAMILY product); MOCKABLE_VIRTUAL bool isKnownPlatform() const; void setMessagePrinter(MessagePrinter &messagePrinter) { this->messagePrinter = &messagePrinter; } protected: MOCKABLE_VIRTUAL bool tryLoadIga(); struct Impl; std::unique_ptr pimpl; MessagePrinter *messagePrinter = nullptr; }; compute-runtime-20.13.16352/shared/offline_compiler/source/decoder/translate_platform.cpp000066400000000000000000000006011363734646600314460ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/decoder/translate_platform_base.h" iga_gen_t translateToIgaGen(PRODUCT_FAMILY productFamily) { return translateToIgaGenBase(productFamily); } iga_gen_t translateToIgaGen(GFXCORE_FAMILY coreFamily) { return translateToIgaGenBase(coreFamily); } compute-runtime-20.13.16352/shared/offline_compiler/source/decoder/translate_platform_base.h000066400000000000000000000022271363734646600321130ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "igad.h" #include "igfxfmid.h" inline iga_gen_t translateToIgaGenBase(PRODUCT_FAMILY productFamily) { switch (productFamily) { default: return IGA_GEN_INVALID; case IGFX_BROADWELL: return IGA_GEN8; case IGFX_CHERRYVIEW: return IGA_GEN8lp; case IGFX_SKYLAKE: return IGA_GEN9; case IGFX_BROXTON: return IGA_GEN9lp; case IGFX_KABYLAKE: return IGA_GEN9p5; case IGFX_COFFEELAKE: return IGA_GEN9p5; case IGFX_ICELAKE: return IGA_GEN11; case IGFX_ICELAKE_LP: return IGA_GEN11; } } inline iga_gen_t translateToIgaGenBase(GFXCORE_FAMILY coreFamily) { switch (coreFamily) { default: return IGA_GEN_INVALID; case IGFX_GEN8_CORE: return IGA_GEN8; case IGFX_GEN9_CORE: return IGA_GEN9; case IGFX_GEN11_CORE: return IGA_GEN11; case IGFX_GEN11LP_CORE: return IGA_GEN11; } } iga_gen_t translateToIgaGen(PRODUCT_FAMILY productFamily); iga_gen_t translateToIgaGen(GFXCORE_FAMILY coreFamily); compute-runtime-20.13.16352/shared/offline_compiler/source/extra_settings.cpp000066400000000000000000000007131363734646600272070ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/offline_compiler.h" #include "shared/source/os_interface/os_library.h" #include "compiler_options.h" namespace NEO { void OfflineCompiler::resolveExtraSettings() { if (deviceName == "tgllp") { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::forceEmuInt32DivRemSP); } } } // namespace NEO compute-runtime-20.13.16352/shared/offline_compiler/source/main.cpp000066400000000000000000000003551363734646600250720ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "ocloc_wrapper.h" int main(int argc, const char *argv[]) { OclocWrapper oclocWrapper; return oclocWrapper.invokeOcloc(argc, argv); } compute-runtime-20.13.16352/shared/offline_compiler/source/multi_command.cpp000066400000000000000000000153361363734646600270030ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/multi_command.h" #include "shared/source/utilities/const_stringref.h" #include namespace NEO { int MultiCommand::singleBuild(const std::vector &allArgs) { int retVal = SUCCESS; std::unique_ptr pCompiler{OfflineCompiler::create(allArgs.size(), allArgs, true, retVal, argHelper)}; if (retVal == SUCCESS) { retVal = buildWithSafetyGuard(pCompiler.get()); std::string &buildLog = pCompiler->getBuildLog(); if (buildLog.empty() == false) { argHelper->printf("%s\n", buildLog.c_str()); } if (retVal == ErrorCode::SUCCESS) { if (!pCompiler->isQuiet()) argHelper->printf("Build succeeded.\n"); } else { argHelper->printf("Build failed with error code: %d\n", retVal); } } if (retVal == SUCCESS) { outputFile << getCurrentDirectoryOwn(outDirForBuilds) + outFileName + ".bin"; } else { outputFile << "Unsuccesful build"; } outputFile << '\n'; return retVal; } MultiCommand *MultiCommand::create(const std::vector &args, int &retVal, OclocArgHelper *helper) { retVal = ErrorCode::SUCCESS; auto pMultiCommand = new MultiCommand(); if (pMultiCommand) { pMultiCommand->argHelper = helper; retVal = pMultiCommand->initialize(args); } if (retVal != ErrorCode::SUCCESS) { delete pMultiCommand; pMultiCommand = nullptr; } return pMultiCommand; } void MultiCommand::addAdditionalOptionsToSingleCommandLine(std::vector &singleLineWithArguments, size_t buildId) { bool hasOutDir = false; bool hasOutName = false; for (const auto &arg : singleLineWithArguments) { if (ConstStringRef("-out_dir") == arg) { hasOutDir = true; } else if (ConstStringRef("-output") == arg) { hasOutName = true; } } if (!hasOutDir) { singleLineWithArguments.push_back("-out_dir"); outDirForBuilds = OfflineCompiler::getFileNameTrunk(pathToCommandFile); singleLineWithArguments.push_back(outDirForBuilds); } if (!hasOutName) { singleLineWithArguments.push_back("-output"); outFileName = "build_no_" + std::to_string(buildId + 1); singleLineWithArguments.push_back(outFileName); } if (quiet) singleLineWithArguments.push_back("-q"); } int MultiCommand::initialize(const std::vector &args) { if (args[args.size() - 1] == "--help") { printHelp(); return -1; } for (size_t argIndex = 1; argIndex < args.size(); argIndex++) { const auto &currArg = args[argIndex]; const bool hasMoreArgs = (argIndex + 1 < args.size()); if (hasMoreArgs && ConstStringRef("-multi") == currArg) { pathToCommandFile = args[++argIndex]; } else if (hasMoreArgs && ConstStringRef("-output_file_list") == currArg) { outputFileList = args[++argIndex]; } else if (ConstStringRef("-q") == currArg) { quiet = true; } else { argHelper->printf("Invalid option (arg %zu): %s\n", argIndex, currArg.c_str()); printHelp(); return INVALID_COMMAND_LINE; } } //save file with builds arguments to vector of strings, line by line if (argHelper->fileExists(pathToCommandFile)) { argHelper->readFileToVectorOfStrings(pathToCommandFile, lines); if (lines.empty()) { argHelper->printf("Command file was empty.\n"); return INVALID_FILE; } } else { argHelper->printf("Could not find/open file with builds argument.s\n"); return INVALID_FILE; } runBuilds(args[0]); if (outputFileList != "") { argHelper->saveOutput(outputFileList, outputFile); } return showResults(); } void MultiCommand::runBuilds(const std::string &argZero) { for (size_t i = 0; i < lines.size(); ++i) { std::vector args = {argZero}; int retVal = splitLineInSeparateArgs(args, lines[i], i); if (retVal != SUCCESS) { retValues.push_back(retVal); continue; } if (!quiet) { argHelper->printf("Command numer %zu: \n", i + 1); } addAdditionalOptionsToSingleCommandLine(args, i); retVal = singleBuild(args); retValues.push_back(retVal); } } void MultiCommand::printHelp() { argHelper->printf(R"===(Compiles multiple files using a config file. Usage: ocloc multi Input file containing a list of arguments for subsequent ocloc invocations. Expected format of each line inside such file is: '-file -device [compile_options]. See 'ocloc compile --help' for available compile_options. Results of subsequent compilations will be dumped into a directory with name indentical file_name's base name. -output_file_list Name of optional file containing paths to outputs .bin files )==="); } int MultiCommand::splitLineInSeparateArgs(std::vector &qargs, const std::string &commandsLine, size_t numberOfBuild) { size_t start, end, argLen; for (size_t i = 0; i < commandsLine.length(); ++i) { const char &currChar = commandsLine[i]; if ('\"' == currChar) { start = i + 1; end = commandsLine.find('\"', start); } else if ('\'' == currChar) { start = i + 1; end = commandsLine.find('\'', start); } else if (' ' == currChar) { continue; } else { start = i; end = commandsLine.find(" ", start); end = (end == std::string::npos) ? commandsLine.length() : end; } if (end == std::string::npos) { argHelper->printf("One of the quotes is open in build number %zu\n", numberOfBuild + 1); return INVALID_FILE; } argLen = end - start; i = end; qargs.push_back(commandsLine.substr(start, argLen)); } return SUCCESS; } int MultiCommand::showResults() { int retValue = SUCCESS; int indexRetVal = 0; for (int retVal : retValues) { retValue |= retVal; if (!quiet) { if (retVal != SUCCESS) { argHelper->printf("Build %d: failed. Error code: %d\n", indexRetVal, retVal); } else { argHelper->printf("Build %d: successful\n", indexRetVal); } } indexRetVal++; } return retValue; } } // namespace NEO compute-runtime-20.13.16352/shared/offline_compiler/source/multi_command.h000066400000000000000000000031221363734646600264360ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/decoder/binary_decoder.h" #include "shared/offline_compiler/source/decoder/binary_encoder.h" #include "shared/offline_compiler/source/offline_compiler.h" #include "shared/offline_compiler/source/utilities/get_current_dir.h" #include "shared/offline_compiler/source/utilities/safety_caller.h" #include "shared/source/os_interface/os_library.h" #include #include #include namespace NEO { class MultiCommand { public: MultiCommand &operator=(const MultiCommand &) = delete; MultiCommand(const MultiCommand &) = delete; ~MultiCommand() = default; static MultiCommand *create(const std::vector &args, int &retVal, OclocArgHelper *helper); std::string outDirForBuilds; std::string outputFileList; protected: MultiCommand() = default; int initialize(const std::vector &args); int splitLineInSeparateArgs(std::vector &qargs, const std::string &command, size_t numberOfBuild); int showResults(); int singleBuild(const std::vector &args); void addAdditionalOptionsToSingleCommandLine(std::vector &, size_t buildId); void printHelp(); void runBuilds(const std::string &argZero); OclocArgHelper *argHelper = nullptr; std::vector retValues; std::vector lines; std::string outFileName; std::string pathToCommandFile; std::stringstream outputFile; bool quiet = false; }; } // namespace NEO compute-runtime-20.13.16352/shared/offline_compiler/source/ocloc_api.cpp000066400000000000000000000125531363734646600261010ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "ocloc_api.h" #include "shared/offline_compiler/source/decoder/binary_decoder.h" #include "shared/offline_compiler/source/decoder/binary_encoder.h" #include "shared/offline_compiler/source/multi_command.h" #include "shared/offline_compiler/source/ocloc_fatbinary.h" #include "shared/offline_compiler/source/offline_compiler.h" #include using namespace NEO; const char *help = R"===(ocloc is a tool for managing Intel Compute GPU device binary format. It can be used for generation (as part of 'compile' command) as well as manipulation (decoding/modifying - as part of 'disasm'/'asm' commands) of such binary files. Intel Compute GPU device binary is a format used by Intel Compute GPU runtime (aka NEO). Intel Compute GPU runtime will return this binary format when queried using clGetProgramInfo(..., CL_PROGRAM_BINARIES, ...). It will also honor this format as input to clCreateProgramWithBinary function call. ocloc does not require Intel GPU device to be present in the system nor does it depend on Intel Compute GPU runtime driver to be installed. It does however rely on the same set of compilers (IGC, common_clang) as the runtime driver. Usage: ocloc [--help] [] Available commands are listed below. Use 'ocloc --help' to get help about specific command. Commands: compile Compiles input to Intel Compute GPU device binary. disasm Disassembles Intel Compute GPU device binary. asm Assembles Intel Compute GPU device binary. multi Compiles multiple files using a config file. Default command (when none provided) is 'compile'. Examples: Compile file to Intel Compute GPU device binary (out = source_file_Gen9core.bin) ocloc -file source_file.cl -device skl Disassemble Intel Compute GPU device binary ocloc disasm -file source_file_Gen9core.bin Assemble to Intel Compute GPU device binary (after above disasm) ocloc asm -out reassembled.bin )==="; extern "C" { int oclocInvoke(unsigned int numArgs, const char *argv[], const uint32_t numSources, const uint8_t **dataSources, const uint64_t *lenSources, const char **nameSources, const uint32_t numInputHeaders, const uint8_t **dataInputHeaders, const uint64_t *lenInputHeaders, const char **nameInputHeaders, uint32_t *numOutputs, uint8_t ***dataOutputs, uint64_t **lenOutputs, char ***nameOutputs) { auto helper = std::make_unique( numSources, dataSources, lenSources, nameSources, numInputHeaders, dataInputHeaders, lenInputHeaders, nameInputHeaders, numOutputs, dataOutputs, lenOutputs, nameOutputs); std::vector allArgs; if (numArgs > 1) { allArgs.assign(argv, argv + numArgs); } try { if (numArgs == 1 || (numArgs > 1 && (ConstStringRef("-h") == allArgs[1] || ConstStringRef("--help") == allArgs[1]))) { helper->printf("%s", help); return ErrorCode::SUCCESS; } else if (numArgs > 1 && ConstStringRef("disasm") == allArgs[1]) { BinaryDecoder disasm(helper.get()); int retVal = disasm.validateInput(allArgs); if (retVal == 0) { return disasm.decode(); } else { return retVal; } } else if (numArgs > 1 && ConstStringRef("asm") == allArgs[1]) { BinaryEncoder assembler(helper.get()); int retVal = assembler.validateInput(allArgs); if (retVal == 0) { return assembler.encode(); } else { return retVal; } } else if (numArgs > 1 && (ConstStringRef("multi") == allArgs[1] || ConstStringRef("-multi") == allArgs[1])) { int retValue = ErrorCode::SUCCESS; std::unique_ptr pMulti{(MultiCommand::create(allArgs, retValue, helper.get()))}; return retValue; } else if (requestedFatBinary(numArgs, argv)) { return buildFatbinary(numArgs, argv, helper.get()); } else { int retVal = ErrorCode::SUCCESS; std::unique_ptr pCompiler{OfflineCompiler::create(numArgs, allArgs, true, retVal, helper.get())}; if (retVal == ErrorCode::SUCCESS) { retVal = buildWithSafetyGuard(pCompiler.get()); std::string buildLog = pCompiler->getBuildLog(); if (buildLog.empty() == false) { helper->printf("%s\n", buildLog.c_str()); } if (retVal == ErrorCode::SUCCESS) { if (!pCompiler->isQuiet()) helper->printf("Build succeeded.\n"); } else { helper->printf("Build failed with error code: %d\n", retVal); } } return retVal; } } catch (const std::exception &e) { helper->printf("%s\n", e.what()); return -1; } return -1; } int oclocFreeOutput(uint32_t *numOutputs, uint8_t ***dataOutputs, uint64_t **lenOutputs, char ***nameOutputs) { for (uint32_t i = 0; i < *numOutputs; i++) { delete[](*dataOutputs)[i]; delete[](*nameOutputs)[i]; } delete[](*dataOutputs); delete[](*lenOutputs); delete[](*nameOutputs); return 0; } } compute-runtime-20.13.16352/shared/offline_compiler/source/ocloc_api.h000066400000000000000000000014411363734646600255400ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #ifdef _WIN32 #define SIGNATURE __declspec(dllexport) int __cdecl #else #define SIGNATURE int #endif extern "C" { SIGNATURE oclocInvoke(unsigned int numArgs, const char *argv[], const uint32_t numSources, const uint8_t **dataSources, const uint64_t *lenSources, const char **nameSources, const uint32_t numInputHeaders, const uint8_t **dataInputHeaders, const uint64_t *lenInputHeaders, const char **nameInputHeaders, uint32_t *numOutputs, uint8_t ***dataOutputs, uint64_t **lenOutputs, char ***nameOutputs); SIGNATURE oclocFreeOutput(uint32_t *numOutputs, uint8_t ***dataOutputs, uint64_t **lenOutputs, char ***nameOutputs); } compute-runtime-20.13.16352/shared/offline_compiler/source/ocloc_arg_helper.cpp000066400000000000000000000115421363734646600274350ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "ocloc_arg_helper.h" #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/string.h" #include #include void Source::toVectorOfStrings(std::vector &lines, bool replaceTabs) { std::string line; const char *file = reinterpret_cast(data); while (*file != '\0') { if (replaceTabs && *file == '\t') { line += ' '; } else if (*file == '\n') { lines.push_back(line); line = ""; } else { line += *file; } file++; } } Output::Output(const std::string &name, const void *data, const size_t &size) : name(name), size(size) { this->data = new uint8_t[size]; memcpy_s(reinterpret_cast(this->data), this->size, data, size); }; OclocArgHelper::OclocArgHelper(const uint32_t numSources, const uint8_t **dataSources, const uint64_t *lenSources, const char **nameSources, const uint32_t numInputHeaders, const uint8_t **dataInputHeaders, const uint64_t *lenInputHeaders, const char **nameInputHeaders, uint32_t *numOutputs, uint8_t ***dataOutputs, uint64_t **lenOutputs, char ***nameOutputs) : numOutputs(numOutputs), nameOutputs(nameOutputs), dataOutputs(dataOutputs), lenOutputs(lenOutputs), hasOutput(numOutputs != nullptr) { for (uint32_t i = 0; i < numSources; ++i) { inputs.push_back(Source(dataSources[i], static_cast(lenSources[i]), nameSources[i])); } for (uint32_t i = 0; i < numInputHeaders; ++i) { headers.push_back(Source(dataInputHeaders[i], static_cast(lenInputHeaders[i]), nameInputHeaders[i])); } } OclocArgHelper::~OclocArgHelper() { if (outputEnabled()) { saveOutput(oclocStdoutLogName, messagePrinter.getLog()); moveOutputs(); } } bool OclocArgHelper::fileExists(const std::string &filename) const { return sourceFileExists(filename) || ::fileExists(filename); } void OclocArgHelper::moveOutputs() { *numOutputs = static_cast(outputs.size()); *nameOutputs = new char *[outputs.size()]; *dataOutputs = new uint8_t *[outputs.size()]; *lenOutputs = new uint64_t[outputs.size()]; for (size_t i = 0; i < outputs.size(); ++i) { size_t size = outputs[i]->name.length() + 1; (*nameOutputs)[i] = new char[size]; strncpy_s((*nameOutputs)[i], size, outputs[i]->name.c_str(), outputs[i]->name.length() + 1); (*dataOutputs)[i] = outputs[i]->data; (*lenOutputs)[i] = outputs[i]->size; } } Source *OclocArgHelper::findSourceFile(const std::string &filename) { for (auto &source : inputs) { if (filename == source.name) { return &source; } } return nullptr; } bool OclocArgHelper::sourceFileExists(const std::string &filename) const { for (auto &input : inputs) { if (filename == input.name) { return true; } } return false; } std::vector OclocArgHelper::headersToVectorOfStrings() { std::vector lines; for (auto &header : headers) { header.toVectorOfStrings(lines, true); } return lines; } void OclocArgHelper::readFileToVectorOfStrings(const std::string &filename, std::vector &lines) { if (Source *s = findSourceFile(filename)) { s->toVectorOfStrings(lines); } else { ::readFileToVectorOfStrings(lines, filename); } } std::vector OclocArgHelper::readBinaryFile(const std::string &filename) { if (Source *s = findSourceFile(filename)) { return s->toBinaryVector(); } else { return ::readBinaryFile(filename); } } std::unique_ptr OclocArgHelper::loadDataFromFile(const std::string &filename, size_t &retSize) { if (Source *s = findSourceFile(filename)) { auto size = s->length; std::unique_ptr ret(new char[size]()); memcpy_s(ret.get(), size, s->data, s->length); retSize = s->length; return ret; } else { return ::loadDataFromFile(filename.c_str(), retSize); } } void OclocArgHelper::saveOutput(const std::string &filename, const void *pData, const size_t &dataSize) { if (outputEnabled()) { addOutput(filename, pData, dataSize); } else { writeDataToFile(filename.c_str(), pData, dataSize); } } void OclocArgHelper::saveOutput(const std::string &filename, const std::ostream &stream) { std::stringstream ss; ss << stream.rdbuf(); if (outputEnabled()) { addOutput(filename, ss.str().c_str(), ss.str().length()); } else { std::ofstream file(filename); file << ss.str(); } } compute-runtime-20.13.16352/shared/offline_compiler/source/ocloc_arg_helper.h000066400000000000000000000057511363734646600271070ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/decoder/helper.h" #include #include #include #include #include #pragma once static constexpr auto *oclocStdoutLogName = "stdout.log"; struct Source { const uint8_t *data; const size_t length; const char *name; Source(const uint8_t *data, const size_t length, const char *name) : data(data), length(length), name(name){}; void toVectorOfStrings(std::vector &lines, bool replaceTabs = false); inline std::vector toBinaryVector() { return std::vector(data, data + length); }; }; struct Output { std::string name; uint8_t *data; const size_t size; Output(const std::string &name, const void *data, const size_t &size); }; class OclocArgHelper { protected: std::vector inputs, headers; std::vector outputs; uint32_t *numOutputs = nullptr; char ***nameOutputs = nullptr; uint8_t ***dataOutputs = nullptr; uint64_t **lenOutputs = nullptr; bool hasOutput = false; void moveOutputs(); MessagePrinter messagePrinter; Source *findSourceFile(const std::string &filename); bool sourceFileExists(const std::string &filename) const; inline void addOutput(const std::string &filename, const void *data, const size_t &size) { outputs.push_back(new Output(filename, data, size)); } public: OclocArgHelper() = default; OclocArgHelper(const uint32_t numSources, const uint8_t **dataSources, const uint64_t *lenSources, const char **nameSources, const uint32_t numInputHeaders, const uint8_t **dataInputHeaders, const uint64_t *lenInputHeaders, const char **nameInputHeaders, uint32_t *numOutputs, uint8_t ***dataOutputs, uint64_t **lenOutputs, char ***nameOutputs); virtual ~OclocArgHelper(); MOCKABLE_VIRTUAL bool fileExists(const std::string &filename) const; std::vector headersToVectorOfStrings(); void readFileToVectorOfStrings(const std::string &filename, std::vector &lines); MOCKABLE_VIRTUAL std::vector readBinaryFile(const std::string &filename); std::unique_ptr loadDataFromFile(const std::string &filename, size_t &retSize); inline bool outputEnabled() { return hasOutput; } inline bool hasHeaders() { return headers.size() > 0; } void saveOutput(const std::string &filename, const void *pData, const size_t &dataSize); void saveOutput(const std::string &filename, const std::ostream &stream); MessagePrinter &getPrinterRef() { return messagePrinter; } void printf(const char *message) { messagePrinter.printf(message); } template void printf(const char *format, Args... args) { messagePrinter.printf(format, std::forward(args)...); } }; compute-runtime-20.13.16352/shared/offline_compiler/source/ocloc_fatbinary.cpp000066400000000000000000000310251363734646600273020ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/ocloc_fatbinary.h" #include "shared/offline_compiler/source/ocloc_arg_helper.h" #include "shared/offline_compiler/source/offline_compiler.h" #include "shared/offline_compiler/source/utilities/safety_caller.h" #include "shared/source/device_binary_format/ar/ar_encoder.h" #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/hw_info.h" #include "compiler_options.h" #include "igfxfmid.h" #include #include #include #include namespace NEO { bool requestedFatBinary(int argc, const char *argv[]) { for (int argIndex = 1; argIndex < argc; argIndex++) { const auto &currArg = argv[argIndex]; const bool hasMoreArgs = (argIndex + 1 < argc); if ((ConstStringRef("-device") == currArg) && hasMoreArgs) { ConstStringRef deviceArg(argv[argIndex + 1], strlen(argv[argIndex + 1])); return deviceArg.contains("*") || deviceArg.contains("-") || deviceArg.contains(",") || deviceArg.contains("gen"); } } return false; } std::vector getAllSupportedTargetPlatforms() { return std::vector{ALL_SUPPORTED_PRODUCT_FAMILIES}; } std::vector toProductNames(const std::vector &productIds) { std::vector ret; for (auto prodId : productIds) { ret.push_back(ConstStringRef(hardwarePrefix[prodId], strlen(hardwarePrefix[prodId]))); } return ret; } PRODUCT_FAMILY asProductId(ConstStringRef product, const std::vector &allSupportedPlatforms) { for (auto family : allSupportedPlatforms) { if (product == hardwarePrefix[family]) { return family; } } return IGFX_UNKNOWN; } GFXCORE_FAMILY asGfxCoreId(ConstStringRef core) { ConstStringRef coreIgnoreG(core.begin() + 1, core.size() - 1); for (unsigned int coreId = 0; coreId < IGFX_MAX_CORE; ++coreId) { if (nullptr == familyName[coreId]) { continue; } if (ConstStringRef(familyName[coreId] + 1, strlen(familyName[coreId]) - 1) == coreIgnoreG) { return static_cast(coreId); } } return IGFX_UNKNOWN_CORE; } void appendPlatformsForGfxCore(GFXCORE_FAMILY core, const std::vector &allSupportedPlatforms, std::vector &out) { for (auto family : allSupportedPlatforms) { if (core == hardwareInfoTable[family]->platform.eRenderCoreFamily) { out.push_back(family); } } } std::vector getTargetPlatformsForFatbinary(ConstStringRef deviceArg, OclocArgHelper *argHelper) { std::vector allSupportedPlatforms = getAllSupportedTargetPlatforms(); if (deviceArg == "*") { return toProductNames(allSupportedPlatforms); } auto genArg = ConstStringRef("gen"); std::vector requestedPlatforms; auto sets = CompilerOptions::tokenize(deviceArg, ','); for (auto set : sets) { if (set.contains("-")) { auto range = CompilerOptions::tokenize(deviceArg, '-'); if (range.size() > 2) { argHelper->printf("Invalid range : %s - should be from-to or -to or from-\n", set.str().c_str()); return {}; } if (range.size() == 1) { // open range , from-max or min-to if (range[0].contains("gen")) { auto coreId = asGfxCoreId(range[0]); if (IGFX_UNKNOWN_CORE == coreId) { argHelper->printf("Unknown device : %s\n", set.str().c_str()); return {}; } if ('-' == set[0]) { // to unsigned int coreIt = IGFX_UNKNOWN_CORE; ++coreIt; while (coreIt <= static_cast(coreId)) { appendPlatformsForGfxCore(static_cast(coreIt), allSupportedPlatforms, requestedPlatforms); ++coreIt; } } else { // from unsigned int coreIt = coreId; while (coreIt < static_cast(IGFX_MAX_CORE)) { appendPlatformsForGfxCore(static_cast(coreIt), allSupportedPlatforms, requestedPlatforms); ++coreIt; } } } else { auto prodId = asProductId(range[0], allSupportedPlatforms); if (IGFX_UNKNOWN == prodId) { argHelper->printf("Unknown device : %s\n", range[0].str().c_str()); return {}; } auto prodIt = std::find(allSupportedPlatforms.begin(), allSupportedPlatforms.end(), prodId); assert(prodIt != allSupportedPlatforms.end()); if ('-' == set[0]) { // to requestedPlatforms.insert(requestedPlatforms.end(), allSupportedPlatforms.begin(), prodIt + 1); } else { // from requestedPlatforms.insert(requestedPlatforms.end(), prodIt, allSupportedPlatforms.end()); } } } else { if (range[0].contains("gen")) { if (false == range[1].contains("gen")) { argHelper->printf("Ranges mixing platforms and gfxCores is not supported : %s - should be genFrom-genTo or platformFrom-platformTo\n", set.str().c_str()); return {}; } auto coreFrom = asGfxCoreId(range[0]); auto coreTo = asGfxCoreId(range[1]); if (IGFX_UNKNOWN_CORE == coreFrom) { argHelper->printf("Unknown device : %s\n", set.str().c_str()); return {}; } if (IGFX_UNKNOWN_CORE == coreTo) { argHelper->printf("Unknown device : %s\n", set.str().c_str()); return {}; } if (coreFrom > coreTo) { std::swap(coreFrom, coreTo); } while (coreFrom <= coreTo) { appendPlatformsForGfxCore(static_cast(coreFrom), allSupportedPlatforms, requestedPlatforms); coreFrom = static_cast(static_cast(coreFrom) + 1); } } else { auto platformFrom = asProductId(range[0], allSupportedPlatforms); auto platformTo = asProductId(range[1], allSupportedPlatforms); if (IGFX_UNKNOWN == platformFrom) { argHelper->printf("Unknown device : %s\n", set.str().c_str()); return {}; } if (IGFX_UNKNOWN == platformTo) { argHelper->printf("Unknown device : %s\n", set.str().c_str()); return {}; } if (platformFrom > platformTo) { std::swap(platformFrom, platformTo); } auto from = std::find(allSupportedPlatforms.begin(), allSupportedPlatforms.end(), platformFrom); auto to = std::find(allSupportedPlatforms.begin(), allSupportedPlatforms.end(), platformTo) + 1; requestedPlatforms.insert(requestedPlatforms.end(), from, to); } } } else if (set.contains("gen")) { if (set.size() == genArg.size()) { argHelper->printf("Invalid gen-based device : %s - gen should be followed by a number\n", set.str().c_str()); } else { auto coreId = asGfxCoreId(set); if (IGFX_UNKNOWN_CORE == coreId) { argHelper->printf("Unknown device : %s\n", set.str().c_str()); return {}; } appendPlatformsForGfxCore(coreId, allSupportedPlatforms, requestedPlatforms); } } else { auto prodId = asProductId(set, allSupportedPlatforms); if (IGFX_UNKNOWN == prodId) { argHelper->printf("Unknown device : %s\n", set.str().c_str()); return {}; } requestedPlatforms.push_back(prodId); } } return toProductNames(requestedPlatforms); } int buildFatbinary(int argc, const char *argv[], OclocArgHelper *argHelper) { std::string pointerSizeInBits = (sizeof(void *) == 4) ? "32" : "64"; int deviceArgIndex = -1; std::string inputFileName = ""; std::string outputFileName = ""; std::string outputDirectory = ""; std::vector argsCopy; if (argc > 1) { argsCopy.assign(argv, argv + argc); } for (int argIndex = 1; argIndex < argc; argIndex++) { const auto &currArg = argv[argIndex]; const bool hasMoreArgs = (argIndex + 1 < argc); if ((ConstStringRef("-device") == currArg) && hasMoreArgs) { deviceArgIndex = argIndex + 1; ++argIndex; } else if ((CompilerOptions::arch32bit == currArg) || (ConstStringRef("-32") == currArg)) { pointerSizeInBits = "32"; } else if ((CompilerOptions::arch64bit == currArg) || (ConstStringRef("-64") == currArg)) { pointerSizeInBits = "64"; } else if ((ConstStringRef("-file") == currArg) && hasMoreArgs) { inputFileName = argv[argIndex + 1]; ++argIndex; } else if ((ConstStringRef("-output") == currArg) && hasMoreArgs) { outputFileName = argv[argIndex + 1]; ++argIndex; } else if ((ConstStringRef("-out_dir") == currArg) && hasMoreArgs) { outputDirectory = argv[argIndex + 1]; ++argIndex; } } std::vector targetPlatforms; targetPlatforms = getTargetPlatformsForFatbinary(ConstStringRef(argv[deviceArgIndex], strlen(argv[deviceArgIndex])), argHelper); if (targetPlatforms.empty()) { argHelper->printf("Failed to parse target devices from : %s\n", argv[deviceArgIndex]); return 1; } NEO::Ar::ArEncoder fatbinary(true); for (auto targetPlatform : targetPlatforms) { int retVal = 0; argsCopy[deviceArgIndex] = targetPlatform.str(); std::unique_ptr pCompiler{OfflineCompiler::create(argc, argsCopy, false, retVal, argHelper)}; if (ErrorCode::SUCCESS != retVal) { argHelper->printf("Error! Couldn't create OfflineCompiler. Exiting.\n"); return retVal; } auto stepping = pCompiler->getHardwareInfo().platform.usRevId; if (retVal == 0) { retVal = buildWithSafetyGuard(pCompiler.get()); std::string buildLog = pCompiler->getBuildLog(); if (buildLog.empty() == false) { argHelper->printf("%s\n", buildLog.c_str()); } if (retVal == 0) { if (!pCompiler->isQuiet()) argHelper->printf("Build succeeded for : %s.\n", (targetPlatform.str() + "." + std::to_string(stepping)).c_str()); } else { argHelper->printf("Build failed for : %s with error code: %d\n", (targetPlatform.str() + "." + std::to_string(stepping)).c_str(), retVal); argHelper->printf("Command was:"); for (auto i = 0; i < argc; ++i) argHelper->printf(" %s", argv[i]); argHelper->printf("\n"); } } if (0 != retVal) { return retVal; } fatbinary.appendFileEntry(pointerSizeInBits + "." + targetPlatform.str() + "." + std::to_string(stepping), pCompiler->getPackedDeviceBinaryOutput()); } auto fatbinaryData = fatbinary.encode(); std::string fatbinaryFileName = outputFileName; if (outputFileName.empty() && (false == inputFileName.empty())) { fatbinaryFileName = OfflineCompiler::getFileNameTrunk(inputFileName) + ".ar"; } if (false == outputDirectory.empty()) { fatbinaryFileName = outputDirectory + "/" + outputFileName; } argHelper->saveOutput(fatbinaryFileName, fatbinaryData.data(), fatbinaryData.size()); return 0; } } // namespace NEO compute-runtime-20.13.16352/shared/offline_compiler/source/ocloc_fatbinary.h000066400000000000000000000016771363734646600267610ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/utilities/const_stringref.h" #include "igfxfmid.h" #include class OclocArgHelper; namespace NEO { bool requestedFatBinary(int argc, const char *argv[]); int buildFatbinary(int argc, const char *argv[], OclocArgHelper *argHelper); std::vector getAllSupportedTargetPlatforms(); std::vector toProductNames(const std::vector &productIds); PRODUCT_FAMILY asProductId(ConstStringRef product, const std::vector &allSupportedPlatforms); GFXCORE_FAMILY asGfxCoreId(ConstStringRef core); void appendPlatformsForGfxCore(GFXCORE_FAMILY core, const std::vector &allSupportedPlatforms, std::vector &out); std::vector getTargetPlatformsForFatbinary(ConstStringRef deviceArg, OclocArgHelper *argHelper); } // namespace NEO compute-runtime-20.13.16352/shared/offline_compiler/source/ocloc_wrapper.cpp000066400000000000000000000066701363734646600270130ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "ocloc_wrapper.h" #include "shared/source/os_interface/os_library.h" #include "utilities/get_path.h" #include #include typedef int (*pOclocInvoke)( unsigned int numArgs, const char *argv[], const uint32_t numSources, const uint8_t **dataSources, const uint64_t *lenSources, const char **nameSources, const uint32_t numInputHeaders, const uint8_t **dataInputHeaders, const uint64_t *lenInputHeaders, const char **nameInputHeaders, uint32_t *numOutputs, uint8_t ***dataOutputs, uint64_t **lenOutputs, char ***nameOutputs); typedef int (*pOclocFreeOutput)( uint32_t *numOutputs, uint8_t ***dataOutputs, uint64_t **lenOutputs, char ***nameOutputs); struct OclocLibrary { pOclocInvoke invoke = nullptr; pOclocFreeOutput freeOutput = nullptr; std::unique_ptr library; bool isLoaded() { return library != nullptr; } }; OclocWrapper::OclocWrapper() : pImpl(std::make_unique()){}; OclocWrapper::~OclocWrapper() = default; struct OclocWrapper::Impl { OclocLibrary oclocLib; void loadOcloc() { OclocLibrary ocloc; std::string oclocLibName = ""; ocloc.library.reset(NEO::OsLibrary::load(oclocLibName)); if (nullptr == (ocloc.invoke = reinterpret_cast(ocloc.library->getProcAddress("oclocInvoke")))) { std::cout << "Error! Couldn't find OclocInvoke function.\n"; return; } if (nullptr == (ocloc.freeOutput = reinterpret_cast(ocloc.library->getProcAddress("oclocFreeOutput")))) { std::cout << "Error! Couldn't find OclocFreeOutput function.\n"; return; } this->oclocLib = std::move(ocloc); } }; int OclocWrapper::invokeOcloc(unsigned int numArgs, const char *argv[]) { return invokeOcloc(numArgs, argv, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); } int OclocWrapper::invokeOcloc(unsigned int numArgs, const char *argv[], const uint32_t numSources, const uint8_t **dataSources, const uint64_t *lenSources, const char **nameSources, const uint32_t numInputHeaders, const uint8_t **dataInputHeaders, const uint64_t *lenInputHeaders, const char **nameInputHeaders, uint32_t *numOutputs, uint8_t ***dataOutputs, uint64_t **lenOutputs, char ***nameOutputs) { if (false == tryLoadOcloc()) { std::cout << "Error! Ocloc Library couldn't be loaded.\n"; return -1; } return pImpl->oclocLib.invoke(numArgs, argv, numSources, dataSources, lenSources, nameSources, numInputHeaders, dataInputHeaders, lenInputHeaders, nameInputHeaders, numOutputs, dataOutputs, lenOutputs, nameOutputs); } int OclocWrapper::freeOutput(uint32_t *numOutputs, uint8_t ***dataOutputs, uint64_t **lenOutputs, char ***nameOutputs) { if (false == tryLoadOcloc()) { std::cout << "Error! Ocloc Library couldn't be loaded.\n"; return -1; } return pImpl->oclocLib.freeOutput(numOutputs, dataOutputs, lenOutputs, nameOutputs); } bool OclocWrapper::tryLoadOcloc() { if (false == pImpl->oclocLib.isLoaded()) { pImpl->loadOcloc(); } return pImpl->oclocLib.isLoaded(); } compute-runtime-20.13.16352/shared/offline_compiler/source/ocloc_wrapper.h000066400000000000000000000021661363734646600264540ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include #include struct OclocWrapper { public: OclocWrapper(); ~OclocWrapper(); OclocWrapper(OclocWrapper &) = delete; OclocWrapper(const OclocWrapper &&) = delete; OclocWrapper &operator=(const OclocWrapper &) = delete; OclocWrapper &operator=(OclocWrapper &&) = delete; int invokeOcloc(unsigned int numArgs, const char *argv[]); int invokeOcloc(unsigned int numArgs, const char *argv[], const uint32_t numSources, const uint8_t **dataSources, const uint64_t *lenSources, const char **nameSources, const uint32_t numInputHeaders, const uint8_t **dataInputHeaders, const uint64_t *lenInputHeaders, const char **nameInputHeaders, uint32_t *numOutputs, uint8_t ***dataOutputs, uint64_t **lenOutputs, char ***nameOutputs); int freeOutput(uint32_t *numOutputs, uint8_t ***dataOutputs, uint64_t **lenOutputs, char ***nameOutputs); protected: bool tryLoadOcloc(); struct Impl; std::unique_ptr pImpl; }; compute-runtime-20.13.16352/shared/offline_compiler/source/offline_compiler.cpp000066400000000000000000001126071363734646600274660ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "offline_compiler.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device_binary_format/device_binary_formats.h" #include "shared/source/device_binary_format/elf/elf_encoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/string.h" #include "shared/source/os_interface/os_library.h" #include "opencl/source/helpers/validators.h" #include "opencl/source/os_interface/os_inc_base.h" #include "opencl/source/platform/extensions.h" #include "cif/common/cif_main.h" #include "cif/helpers/error.h" #include "cif/import/library_api.h" #include "compiler_options.h" #include "igfxfmid.h" #include "ocl_igc_interface/code_type.h" #include "ocl_igc_interface/fcl_ocl_device_ctx.h" #include "ocl_igc_interface/igc_ocl_device_ctx.h" #include "ocl_igc_interface/platform_helper.h" #include #include #include #include #ifdef _WIN32 #include #define MakeDirectory _mkdir #define GetCurrentWorkingDirectory _getcwd #else #include #define MakeDirectory(dir) mkdir(dir, 0777) #define GetCurrentWorkingDirectory getcwd #endif namespace NEO { CIF::CIFMain *createMainNoSanitize(CIF::CreateCIFMainFunc_t createFunc); std::string convertToPascalCase(const std::string &inString) { std::string outString; bool capitalize = true; for (unsigned int i = 0; i < inString.length(); i++) { if (isalpha(inString[i]) && capitalize == true) { outString += toupper(inString[i]); capitalize = false; } else if (inString[i] == '_') { capitalize = true; } else { outString += inString[i]; } } return outString; } OfflineCompiler::OfflineCompiler() = default; OfflineCompiler::~OfflineCompiler() { delete[] irBinary; delete[] genBinary; } OfflineCompiler *OfflineCompiler::create(size_t numArgs, const std::vector &allArgs, bool dumpFiles, int &retVal, OclocArgHelper *helper) { retVal = SUCCESS; auto pOffCompiler = new OfflineCompiler(); if (pOffCompiler) { pOffCompiler->argHelper = helper; retVal = pOffCompiler->initialize(numArgs, allArgs, dumpFiles); } if (retVal != SUCCESS) { delete pOffCompiler; pOffCompiler = nullptr; } return pOffCompiler; } int OfflineCompiler::buildSourceCode() { int retVal = SUCCESS; do { if (strcmp(sourceCode.c_str(), "") == 0) { retVal = INVALID_PROGRAM; break; } UNRECOVERABLE_IF(igcDeviceCtx == nullptr); CIF::RAII::UPtr_t igcOutput; bool inputIsIntermediateRepresentation = inputFileLlvm || inputFileSpirV; if (false == inputIsIntermediateRepresentation) { UNRECOVERABLE_IF(fclDeviceCtx == nullptr); IGC::CodeType::CodeType_t intermediateRepresentation = useLlvmText ? IGC::CodeType::llvmLl : (useLlvmBc ? IGC::CodeType::llvmBc : preferredIntermediateRepresentation); // sourceCode.size() returns the number of characters without null terminated char auto fclSrc = CIF::Builtins::CreateConstBuffer(fclMain.get(), sourceCode.c_str(), sourceCode.size() + 1); auto fclOptions = CIF::Builtins::CreateConstBuffer(fclMain.get(), options.c_str(), options.size()); auto fclInternalOptions = CIF::Builtins::CreateConstBuffer(fclMain.get(), internalOptions.c_str(), internalOptions.size()); auto fclTranslationCtx = fclDeviceCtx->CreateTranslationCtx(IGC::CodeType::oclC, intermediateRepresentation); auto igcTranslationCtx = igcDeviceCtx->CreateTranslationCtx(intermediateRepresentation, IGC::CodeType::oclGenBin); if (false == NEO::areNotNullptr(fclSrc.get(), fclOptions.get(), fclInternalOptions.get(), fclTranslationCtx.get(), igcTranslationCtx.get())) { retVal = OUT_OF_HOST_MEMORY; break; } auto fclOutput = fclTranslationCtx->Translate(fclSrc.get(), fclOptions.get(), fclInternalOptions.get(), nullptr, 0); if (fclOutput == nullptr) { retVal = OUT_OF_HOST_MEMORY; break; } UNRECOVERABLE_IF(fclOutput->GetBuildLog() == nullptr); UNRECOVERABLE_IF(fclOutput->GetOutput() == nullptr); if (fclOutput->Successful() == false) { updateBuildLog(fclOutput->GetBuildLog()->GetMemory(), fclOutput->GetBuildLog()->GetSizeRaw()); retVal = BUILD_PROGRAM_FAILURE; break; } storeBinary(irBinary, irBinarySize, fclOutput->GetOutput()->GetMemory(), fclOutput->GetOutput()->GetSizeRaw()); isSpirV = intermediateRepresentation == IGC::CodeType::spirV; updateBuildLog(fclOutput->GetBuildLog()->GetMemory(), fclOutput->GetBuildLog()->GetSizeRaw()); igcOutput = igcTranslationCtx->Translate(fclOutput->GetOutput(), fclOptions.get(), fclInternalOptions.get(), nullptr, 0); } else { auto igcSrc = CIF::Builtins::CreateConstBuffer(igcMain.get(), sourceCode.c_str(), sourceCode.size()); auto igcOptions = CIF::Builtins::CreateConstBuffer(igcMain.get(), options.c_str(), options.size()); auto igcInternalOptions = CIF::Builtins::CreateConstBuffer(igcMain.get(), internalOptions.c_str(), internalOptions.size()); auto igcTranslationCtx = igcDeviceCtx->CreateTranslationCtx(inputFileSpirV ? IGC::CodeType::spirV : IGC::CodeType::llvmBc, IGC::CodeType::oclGenBin); igcOutput = igcTranslationCtx->Translate(igcSrc.get(), igcOptions.get(), igcInternalOptions.get(), nullptr, 0); } if (igcOutput == nullptr) { retVal = OUT_OF_HOST_MEMORY; break; } UNRECOVERABLE_IF(igcOutput->GetBuildLog() == nullptr); UNRECOVERABLE_IF(igcOutput->GetOutput() == nullptr); updateBuildLog(igcOutput->GetBuildLog()->GetMemory(), igcOutput->GetBuildLog()->GetSizeRaw()); if (igcOutput->GetOutput()->GetSizeRaw() != 0) { storeBinary(genBinary, genBinarySize, igcOutput->GetOutput()->GetMemory(), igcOutput->GetOutput()->GetSizeRaw()); } if (igcOutput->GetDebugData()->GetSizeRaw() != 0) { storeBinary(debugDataBinary, debugDataBinarySize, igcOutput->GetDebugData()->GetMemory(), igcOutput->GetDebugData()->GetSizeRaw()); } retVal = igcOutput->Successful() ? SUCCESS : BUILD_PROGRAM_FAILURE; } while (0); return retVal; } int OfflineCompiler::build() { int retVal = SUCCESS; retVal = buildSourceCode(); if (retVal == SUCCESS) { generateElfBinary(); if (dumpFiles) { writeOutAllFiles(); } } return retVal; } void OfflineCompiler::updateBuildLog(const char *pErrorString, const size_t errorStringSize) { std::string errorString = (errorStringSize && pErrorString) ? std::string(pErrorString, pErrorString + errorStringSize) : ""; if (errorString[0] != '\0') { if (buildLog.empty()) { buildLog.assign(errorString); } else { buildLog.append("\n" + errorString); } } } std::string &OfflineCompiler::getBuildLog() { return buildLog; } int OfflineCompiler::getHardwareInfo(const char *pDeviceName) { int retVal = INVALID_DEVICE; for (unsigned int productId = 0; productId < IGFX_MAX_PRODUCT; ++productId) { if (hardwarePrefix[productId] && (0 == strcmp(pDeviceName, hardwarePrefix[productId]))) { if (hardwareInfoTable[productId]) { hwInfo = hardwareInfoTable[productId]; familyNameWithType.clear(); familyNameWithType.append(familyName[hwInfo->platform.eRenderCoreFamily]); familyNameWithType.append(hwInfo->capabilityTable.platformType); retVal = SUCCESS; break; } } } return retVal; } std::string OfflineCompiler::getStringWithinDelimiters(const std::string &src) { size_t start = src.find("R\"===("); size_t stop = src.find(")===\""); DEBUG_BREAK_IF(std::string::npos == start); DEBUG_BREAK_IF(std::string::npos == stop); start += strlen("R\"===("); size_t size = stop - start; std::string dst(src, start, size + 1); dst[size] = '\0'; // put null char at the end return dst; } int OfflineCompiler::initialize(size_t numArgs, const std::vector &allArgs, bool dumpFiles) { this->dumpFiles = dumpFiles; int retVal = SUCCESS; const char *source = nullptr; std::unique_ptr sourceFromFile; size_t sourceFromFileSize = 0; retVal = parseCommandLine(numArgs, allArgs); if (retVal != SUCCESS) { return retVal; } if (options.empty()) { // try to read options from file if not provided by commandline size_t ext_start = inputFile.find(".cl"); if (ext_start != std::string::npos) { std::string oclocOptionsFileName = inputFile.substr(0, ext_start); oclocOptionsFileName.append("_ocloc_options.txt"); std::string oclocOptionsFromFile; bool oclocOptionsRead = readOptionsFromFile(oclocOptionsFromFile, oclocOptionsFileName, argHelper); if (oclocOptionsRead && !isQuiet()) { argHelper->printf("Building with ocloc options:\n%s\n", oclocOptionsFromFile.c_str()); } if (oclocOptionsRead) { std::istringstream iss(allArgs[0] + " " + oclocOptionsFromFile); std::vector tokens{ std::istream_iterator{iss}, std::istream_iterator{}}; retVal = parseCommandLine(tokens.size(), tokens); if (retVal != SUCCESS) { return retVal; } } std::string optionsFileName = inputFile.substr(0, ext_start); optionsFileName.append("_options.txt"); bool optionsRead = readOptionsFromFile(options, optionsFileName, argHelper); if (optionsRead && !isQuiet()) { argHelper->printf("Building with options:\n%s\n", options.c_str()); } std::string internalOptionsFileName = inputFile.substr(0, ext_start); internalOptionsFileName.append("_internal_options.txt"); std::string internalOptionsFromFile; bool internalOptionsRead = readOptionsFromFile(internalOptionsFromFile, internalOptionsFileName, argHelper); if (internalOptionsRead && !isQuiet()) { argHelper->printf("Building with internal options:\n%s\n", internalOptionsFromFile.c_str()); } CompilerOptions::concatenateAppend(internalOptions, internalOptionsFromFile); } } parseDebugSettings(); // set up the device inside the program sourceFromFile = argHelper->loadDataFromFile(inputFile, sourceFromFileSize); if (sourceFromFileSize == 0) { retVal = INVALID_FILE; return retVal; } if (inputFileLlvm || inputFileSpirV) { // use the binary input "as is" sourceCode.assign(sourceFromFile.get(), sourceFromFileSize); } else { // for text input, we also accept files used as runtime builtins source = strstr((const char *)sourceFromFile.get(), "R\"===("); sourceCode = (source != nullptr) ? getStringWithinDelimiters(sourceFromFile.get()) : sourceFromFile.get(); } if ((inputFileSpirV == false) && (inputFileLlvm == false)) { auto fclLibFile = OsLibrary::load(Os::frontEndDllName); if (fclLibFile == nullptr) { argHelper->printf("Error: Failed to load %s\n", Os::frontEndDllName); return OUT_OF_HOST_MEMORY; } this->fclLib.reset(fclLibFile); if (this->fclLib == nullptr) { return OUT_OF_HOST_MEMORY; } auto fclCreateMain = reinterpret_cast(this->fclLib->getProcAddress(CIF::CreateCIFMainFuncName)); if (fclCreateMain == nullptr) { return OUT_OF_HOST_MEMORY; } this->fclMain = CIF::RAII::UPtr(createMainNoSanitize(fclCreateMain)); if (this->fclMain == nullptr) { return OUT_OF_HOST_MEMORY; } if (false == this->fclMain->IsCompatible()) { argHelper->printf("Incompatible interface in FCL : %s\n", CIF::InterfaceIdCoder::Dec(this->fclMain->FindIncompatible()).c_str()); DEBUG_BREAK_IF(true); return OUT_OF_HOST_MEMORY; } this->fclDeviceCtx = this->fclMain->CreateInterface(); if (this->fclDeviceCtx == nullptr) { return OUT_OF_HOST_MEMORY; } fclDeviceCtx->SetOclApiVersion(hwInfo->capabilityTable.clVersionSupport * 10); preferredIntermediateRepresentation = fclDeviceCtx->GetPreferredIntermediateRepresentation(); } else { if (!isQuiet()) { argHelper->printf("Compilation from IR - skipping loading of FCL\n"); } preferredIntermediateRepresentation = IGC::CodeType::spirV; } this->igcLib.reset(OsLibrary::load(Os::igcDllName)); if (this->igcLib == nullptr) { return OUT_OF_HOST_MEMORY; } auto igcCreateMain = reinterpret_cast(this->igcLib->getProcAddress(CIF::CreateCIFMainFuncName)); if (igcCreateMain == nullptr) { return OUT_OF_HOST_MEMORY; } this->igcMain = CIF::RAII::UPtr(createMainNoSanitize(igcCreateMain)); if (this->igcMain == nullptr) { return OUT_OF_HOST_MEMORY; } std::vector interfacesToIgnore = {IGC::OclGenBinaryBase::GetInterfaceId()}; if (false == this->igcMain->IsCompatible(&interfacesToIgnore)) { argHelper->printf("Incompatible interface in IGC : %s\n", CIF::InterfaceIdCoder::Dec(this->igcMain->FindIncompatible(&interfacesToIgnore)).c_str()); DEBUG_BREAK_IF(true); return OUT_OF_HOST_MEMORY; } CIF::Version_t verMin = 0, verMax = 0; if (false == this->igcMain->FindSupportedVersions(IGC::OclGenBinaryBase::GetInterfaceId(), verMin, verMax)) { argHelper->printf("Patchtoken interface is missing"); return OUT_OF_HOST_MEMORY; } this->igcDeviceCtx = this->igcMain->CreateInterface(); if (this->igcDeviceCtx == nullptr) { return OUT_OF_HOST_MEMORY; } this->igcDeviceCtx->SetProfilingTimerResolution(static_cast(hwInfo->capabilityTable.defaultProfilingTimerResolution)); auto igcPlatform = this->igcDeviceCtx->GetPlatformHandle(); auto igcGtSystemInfo = this->igcDeviceCtx->GetGTSystemInfoHandle(); auto igcFeWa = this->igcDeviceCtx->GetIgcFeaturesAndWorkaroundsHandle(); if ((igcPlatform == nullptr) || (igcGtSystemInfo == nullptr) || (igcFeWa == nullptr)) { return OUT_OF_HOST_MEMORY; } IGC::PlatformHelper::PopulateInterfaceWith(*igcPlatform.get(), hwInfo->platform); IGC::GtSysInfoHelper::PopulateInterfaceWith(*igcGtSystemInfo.get(), hwInfo->gtSystemInfo); // populate with features igcFeWa.get()->SetFtrDesktop(hwInfo->featureTable.ftrDesktop); igcFeWa.get()->SetFtrChannelSwizzlingXOREnabled(hwInfo->featureTable.ftrChannelSwizzlingXOREnabled); igcFeWa.get()->SetFtrGtBigDie(hwInfo->featureTable.ftrGtBigDie); igcFeWa.get()->SetFtrGtMediumDie(hwInfo->featureTable.ftrGtMediumDie); igcFeWa.get()->SetFtrGtSmallDie(hwInfo->featureTable.ftrGtSmallDie); igcFeWa.get()->SetFtrGT1(hwInfo->featureTable.ftrGT1); igcFeWa.get()->SetFtrGT1_5(hwInfo->featureTable.ftrGT1_5); igcFeWa.get()->SetFtrGT2(hwInfo->featureTable.ftrGT2); igcFeWa.get()->SetFtrGT3(hwInfo->featureTable.ftrGT3); igcFeWa.get()->SetFtrGT4(hwInfo->featureTable.ftrGT4); igcFeWa.get()->SetFtrIVBM0M1Platform(hwInfo->featureTable.ftrIVBM0M1Platform); igcFeWa.get()->SetFtrGTL(hwInfo->featureTable.ftrGT1); igcFeWa.get()->SetFtrGTM(hwInfo->featureTable.ftrGT2); igcFeWa.get()->SetFtrGTH(hwInfo->featureTable.ftrGT3); igcFeWa.get()->SetFtrSGTPVSKUStrapPresent(hwInfo->featureTable.ftrSGTPVSKUStrapPresent); igcFeWa.get()->SetFtrGTA(hwInfo->featureTable.ftrGTA); igcFeWa.get()->SetFtrGTC(hwInfo->featureTable.ftrGTC); igcFeWa.get()->SetFtrGTX(hwInfo->featureTable.ftrGTX); igcFeWa.get()->SetFtr5Slice(hwInfo->featureTable.ftr5Slice); igcFeWa.get()->SetFtrGpGpuMidThreadLevelPreempt(hwInfo->featureTable.ftrGpGpuMidThreadLevelPreempt); igcFeWa.get()->SetFtrIoMmuPageFaulting(hwInfo->featureTable.ftrIoMmuPageFaulting); igcFeWa.get()->SetFtrWddm2Svm(hwInfo->featureTable.ftrWddm2Svm); igcFeWa.get()->SetFtrPooledEuEnabled(hwInfo->featureTable.ftrPooledEuEnabled); igcFeWa.get()->SetFtrResourceStreamer(hwInfo->featureTable.ftrResourceStreamer); return retVal; } int OfflineCompiler::parseCommandLine(size_t numArgs, const std::vector &argv) { int retVal = SUCCESS; bool compile32 = false; bool compile64 = false; if (numArgs < 2) { printUsage(); retVal = PRINT_USAGE; } for (uint32_t argIndex = 1; argIndex < numArgs; argIndex++) { const auto &currArg = argv[argIndex]; const bool hasMoreArgs = (argIndex + 1 < numArgs); if ("compile" == currArg) { //skip it } else if (("-file" == currArg) && hasMoreArgs) { inputFile = argv[argIndex + 1]; argIndex++; } else if (("-output" == currArg) && hasMoreArgs) { outputFile = argv[argIndex + 1]; argIndex++; } else if ((CompilerOptions::arch32bit == currArg) || ("-32" == currArg)) { compile32 = true; CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::arch32bit); } else if ((CompilerOptions::arch64bit == currArg) || ("-64" == currArg)) { compile64 = true; CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::arch64bit); } else if (CompilerOptions::greaterThan4gbBuffersRequired == currArg) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::greaterThan4gbBuffersRequired); } else if (("-device" == currArg) && hasMoreArgs) { deviceName = argv[argIndex + 1]; argIndex++; } else if ("-llvm_text" == currArg) { useLlvmText = true; } else if ("-llvm_bc" == currArg) { useLlvmBc = true; } else if ("-llvm_input" == currArg) { inputFileLlvm = true; } else if ("-spirv_input" == currArg) { inputFileSpirV = true; } else if ("-cpp_file" == currArg) { useCppFile = true; } else if (("-options" == currArg) && hasMoreArgs) { options = argv[argIndex + 1]; argIndex++; } else if (("-internal_options" == currArg) && hasMoreArgs) { CompilerOptions::concatenateAppend(internalOptions, argv[argIndex + 1]); argIndex++; } else if ("-options_name" == currArg) { useOptionsSuffix = true; } else if ("-force_stos_opt" == currArg) { forceStatelessToStatefulOptimization = true; } else if (("-out_dir" == currArg) && hasMoreArgs) { outputDirectory = argv[argIndex + 1]; argIndex++; } else if ("-q" == currArg) { argHelper->getPrinterRef() = MessagePrinter(true); quiet = true; } else if ("-output_no_suffix" == currArg) { outputNoSuffix = true; } else if ("--help" == currArg) { printUsage(); retVal = PRINT_USAGE; } else { argHelper->printf("Invalid option (arg %d): %s\n", argIndex, argv[argIndex].c_str()); retVal = INVALID_COMMAND_LINE; break; } } if (retVal == SUCCESS) { if (compile32 && compile64) { argHelper->printf("Error: Cannot compile for 32-bit and 64-bit, please choose one.\n"); retVal = INVALID_COMMAND_LINE; } else if (inputFile.empty()) { argHelper->printf("Error: Input file name missing.\n"); retVal = INVALID_COMMAND_LINE; } else if (deviceName.empty()) { argHelper->printf("Error: Device name missing.\n"); retVal = INVALID_COMMAND_LINE; } else if (!argHelper->fileExists(inputFile)) { argHelper->printf("Error: Input file %s missing.\n", inputFile.c_str()); retVal = INVALID_FILE; } else { retVal = getHardwareInfo(deviceName.c_str()); if (retVal != SUCCESS) { argHelper->printf("Error: Cannot get HW Info for device %s.\n", deviceName.c_str()); } else { std::string extensionsList = getExtensionsList(*hwInfo); CompilerOptions::concatenateAppend(internalOptions, convertEnabledExtensionsToCompilerInternalOptions(extensionsList.c_str())); } } } return retVal; } void OfflineCompiler::setStatelessToStatefullBufferOffsetFlag() { bool isStatelessToStatefulBufferOffsetSupported = true; if (deviceName == "bdw") { isStatelessToStatefulBufferOffsetSupported = false; } if (DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.get() != -1) { isStatelessToStatefulBufferOffsetSupported = DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.get() != 0; } if (isStatelessToStatefulBufferOffsetSupported) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::hasBufferOffsetArg); } } void OfflineCompiler::parseDebugSettings() { setStatelessToStatefullBufferOffsetFlag(); resolveExtraSettings(); } std::string OfflineCompiler::parseBinAsCharArray(uint8_t *binary, size_t size, std::string &fileName) { std::string builtinName = convertToPascalCase(fileName); std::ostringstream out; // Convert binary to cpp out << "#include \n"; out << "#include \n\n"; out << "size_t " << builtinName << "BinarySize_" << familyNameWithType << " = " << size << ";\n"; out << "uint32_t " << builtinName << "Binary_" << familyNameWithType << "[" << (size + 3) / 4 << "] = {" << std::endl << " "; uint32_t *binaryUint = (uint32_t *)binary; for (size_t i = 0; i < (size + 3) / 4; i++) { if (i != 0) { out << ", "; if (i % 8 == 0) { out << std::endl << " "; } } if (i < size / 4) { out << "0x" << std::hex << std::setw(8) << std::setfill('0') << binaryUint[i]; } else { uint32_t lastBytes = size & 0x3; uint32_t lastUint = 0; uint8_t *pLastUint = (uint8_t *)&lastUint; for (uint32_t j = 0; j < lastBytes; j++) { pLastUint[sizeof(uint32_t) - 1 - j] = binary[i * 4 + j]; } out << "0x" << std::hex << std::setw(8) << std::setfill('0') << lastUint; } } out << "};" << std::endl; out << std::endl << "#include \"shared/source/built_ins/registry/built_ins_registry.h\"\n" << std::endl; out << "namespace NEO {" << std::endl; out << "static RegisterEmbeddedResource register" << builtinName << "Bin(" << std::endl; out << " \"" << familyNameWithType << "_0_" << fileName.c_str() << ".builtin_kernel.bin\"," << std::endl; out << " (const char *)" << builtinName << "Binary_" << familyNameWithType << "," << std::endl; out << " " << builtinName << "BinarySize_" << familyNameWithType << ");" << std::endl; out << "}" << std::endl; return out.str(); } std::string OfflineCompiler::getFileNameTrunk(std::string &filePath) { size_t slashPos = filePath.find_last_of("\\/", filePath.size()) + 1; size_t extPos = filePath.find_last_of(".", filePath.size()); if (extPos == std::string::npos) { extPos = filePath.size(); } std::string fileTrunk = filePath.substr(slashPos, (extPos - slashPos)); return fileTrunk; } // std::string getDevicesTypes() { std::list prefixes; for (int j = 0; j < IGFX_MAX_PRODUCT; j++) { if (hardwarePrefix[j] == nullptr) continue; prefixes.push_back(hardwarePrefix[j]); } std::ostringstream os; for (auto it = prefixes.begin(); it != prefixes.end(); it++) { if (it != prefixes.begin()) os << ","; os << *it; } return os.str(); } void OfflineCompiler::printUsage() { argHelper->printf(R"===(Compiles input file to Intel Compute GPU device binary (*.bin). Additionally, outputs intermediate representation (e.g. spirV). Different input and intermediate file formats are available. Usage: ocloc [compile] -file -device [-output ] [-out_dir ] [-options ] [-32|-64] [-internal_options ] [-llvm_text|-llvm_input|-spirv_input] [-options_name] [-q] [-cpp_file] [-output_no_suffix] [--help] -file The input file to be compiled (by default input source format is OpenCL C kernel language). -device Target device. can be: %s If multiple target devices are provided, ocloc will compile for each of these targets and will create a fatbinary archive that contains all of device binaries produced this way. Supported -device patterns examples : -device skl ; will compile 1 target -device skl,icllp ; will compile 2 targets -device skl-icllp ; will compile all targets in range (inclusive) -device skl- ; will compile all targets newer/same as provided -device -skl ; will compile all targets older/same as provided -device gen9 ; will compile all targets matching the same gen -device gen9-gen11 ; will compile all targets in range (inclusive) -device gen9- ; will compile all targets newer/same as provided -device -gen9 ; will compile all targets older/same as provided -device * ; will compile all targets known to ocloc -output Optional output file base name. Default is input file's base name. This base name will be used for all output files. Proper sufixes (describing file formats) will be added automatically. -out_dir Optional output directory. Default is current working directory. -options Optional OpenCL C compilation options as defined by OpenCL specification. -32 Forces target architecture to 32-bit pointers. Default pointer size is inherited from ocloc's pointer size. This option is exclusive with -64. -64 Forces target architecture to 64-bit pointers. Default pointer size is inherited from ocloc's pointer size. This option is exclusive with -32. -internal_options Optional compiler internal options as defined by compilers used underneath. Check intel-graphics-compiler (IGC) project for details on available internal options. -llvm_text Forces intermediate representation (IR) format to human-readable LLVM IR (.ll). This option affects only output files and should not be used in combination with '-llvm_input' option. Default IR is spirV. This option is exclusive with -spirv_input. This option is exclusive with -llvm_input. -llvm_input Indicates that input file is an llvm binary. Default is OpenCL C kernel language. This option is exclusive with -spirv_input. This option is exclusive with -llvm_text. -spirv_input Indicates that input file is a spirV binary. Default is OpenCL C kernel language format. This option is exclusive with -llvm_input. This option is exclusive with -llvm_text. -options_name Will add suffix to output files. This suffix will be generated based on input options (useful when rebuilding with different set of options so that results won't get overwritten). This suffix is added always as the last part of the filename (even after file's extension). It does not affect '--output' parameter and can be used along with it ('--output' parameter defines the base name - i.e. prefix). -force_stos_opt Will forcibly enable stateless to stateful optimization, i.e. skip "-cl-intel-greater-than-4GB-buffer-required". -q Will silence most of output messages. -cpp_file Will generate c++ file with C-array containing Intel Compute device binary. -output_no_suffix Prevents ocloc from adding family name suffix. --help Print this usage message. Examples : Compile file to Intel Compute GPU device binary (out = source_file_Gen9core.bin) ocloc -file source_file.cl -device skl )===", NEO::getDevicesTypes().c_str()); } void OfflineCompiler::storeBinary( char *&pDst, size_t &dstSize, const void *pSrc, const size_t srcSize) { dstSize = 0; DEBUG_BREAK_IF(!(pSrc && srcSize > 0)); delete[] pDst; pDst = new char[srcSize]; dstSize = (cl_uint)srcSize; memcpy_s(pDst, dstSize, pSrc, srcSize); } bool OfflineCompiler::generateElfBinary() { if (!genBinary || !genBinarySize) { return false; } SingleDeviceBinary binary = {}; binary.buildOptions = this->options; binary.intermediateRepresentation = ArrayRef(reinterpret_cast(this->irBinary), this->irBinarySize); binary.deviceBinary = ArrayRef(reinterpret_cast(this->genBinary), this->genBinarySize); binary.debugData = ArrayRef(reinterpret_cast(this->debugDataBinary), this->debugDataBinarySize); std::string packErrors; std::string packWarnings; using namespace NEO::Elf; ElfEncoder ElfEncoder; ElfEncoder.getElfFileHeader().type = ET_OPENCL_EXECUTABLE; if (binary.buildOptions.empty() == false) { ElfEncoder.appendSection(SHT_OPENCL_OPTIONS, SectionNamesOpenCl::buildOptions, ArrayRef(reinterpret_cast(binary.buildOptions.data()), binary.buildOptions.size())); } if (binary.intermediateRepresentation.empty() == false) { if (isSpirV) { ElfEncoder.appendSection(SHT_OPENCL_SPIRV, SectionNamesOpenCl::spirvObject, binary.intermediateRepresentation); } else { ElfEncoder.appendSection(SHT_OPENCL_LLVM_BINARY, SectionNamesOpenCl::llvmObject, binary.intermediateRepresentation); } } if (binary.debugData.empty() == false) { ElfEncoder.appendSection(SHT_OPENCL_DEV_DEBUG, SectionNamesOpenCl::deviceDebug, binary.debugData); } if (binary.deviceBinary.empty() == false) { ElfEncoder.appendSection(SHT_OPENCL_DEV_BINARY, SectionNamesOpenCl::deviceBinary, binary.deviceBinary); } this->elfBinary = ElfEncoder.encode(); return true; } void OfflineCompiler::writeOutAllFiles() { std::string fileBase; std::string fileTrunk = getFileNameTrunk(inputFile); if (outputNoSuffix) { if (outputFile.empty()) { fileBase = fileTrunk; } else { fileBase = outputFile; } } else { if (outputFile.empty()) { fileBase = fileTrunk + "_" + familyNameWithType; } else { fileBase = outputFile + "_" + familyNameWithType; } } if (outputDirectory != "") { std::list dirList; std::string tmp = outputDirectory; size_t pos = outputDirectory.size() + 1; do { dirList.push_back(tmp); pos = tmp.find_last_of("/\\", pos); tmp = tmp.substr(0, pos); } while (pos != std::string::npos); while (!dirList.empty()) { MakeDirectory(dirList.back().c_str()); dirList.pop_back(); } } if (irBinary) { std::string irOutputFileName = generateFilePathForIr(fileBase) + generateOptsSuffix(); argHelper->saveOutput(irOutputFileName, irBinary, irBinarySize); } if (genBinary) { std::string genOutputFile = generateFilePath(outputDirectory, fileBase, ".gen") + generateOptsSuffix(); argHelper->saveOutput(genOutputFile, genBinary, genBinarySize); if (useCppFile) { std::string cppOutputFile = generateFilePath(outputDirectory, fileBase, ".cpp"); std::string cpp = parseBinAsCharArray((uint8_t *)genBinary, genBinarySize, fileTrunk); argHelper->saveOutput(cppOutputFile, cpp.c_str(), cpp.size()); } } if (!elfBinary.empty()) { std::string elfOutputFile; if (outputNoSuffix) { elfOutputFile = generateFilePath(outputDirectory, fileBase, ""); } else { elfOutputFile = generateFilePath(outputDirectory, fileBase, ".bin") + generateOptsSuffix(); } argHelper->saveOutput( elfOutputFile, elfBinary.data(), elfBinary.size()); } if (debugDataBinary) { std::string debugOutputFile = generateFilePath(outputDirectory, fileBase, ".dbg") + generateOptsSuffix(); argHelper->saveOutput( debugOutputFile, debugDataBinary, debugDataBinarySize); } } bool OfflineCompiler::readOptionsFromFile(std::string &options, const std::string &file, OclocArgHelper *helper) { if (!helper->fileExists(file)) { return false; } size_t optionsSize = 0U; auto optionsFromFile = helper->loadDataFromFile(file, optionsSize); if (optionsSize > 0) { // Remove comment containing copyright header options = optionsFromFile.get(); size_t commentBegin = options.find_first_of("/*"); size_t commentEnd = options.find_last_of("*/"); if (commentBegin != std::string::npos && commentEnd != std::string::npos) { options = options.replace(commentBegin, commentEnd - commentBegin + 1, ""); size_t optionsBegin = options.find_first_not_of(" \t\n\r"); if (optionsBegin != std::string::npos) { options = options.substr(optionsBegin, options.length()); } } auto trimPos = options.find_last_not_of(" \n\r"); options = options.substr(0, trimPos + 1); } return true; } std::string generateFilePath(const std::string &directory, const std::string &fileNameBase, const char *extension) { UNRECOVERABLE_IF(extension == nullptr); if (directory.empty()) { return fileNameBase + extension; } bool hasTrailingSlash = (*directory.rbegin() == '/'); std::string ret; ret.reserve(directory.size() + (hasTrailingSlash ? 0 : 1) + fileNameBase.size() + strlen(extension) + 1); ret.append(directory); if (false == hasTrailingSlash) { ret.append("/", 1); } ret.append(fileNameBase); ret.append(extension); return ret; } } // namespace NEO compute-runtime-20.13.16352/shared/offline_compiler/source/offline_compiler.h000066400000000000000000000104761363734646600271340ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/offline_compiler/source/ocloc_arg_helper.h" #include "shared/source/os_interface/os_library.h" #include "shared/source/utilities/arrayref.h" #include "shared/source/utilities/const_stringref.h" #include "cif/common/cif_main.h" #include "ocl_igc_interface/fcl_ocl_device_ctx.h" #include "ocl_igc_interface/igc_ocl_device_ctx.h" #include #include #include namespace NEO { struct HardwareInfo; class OsLibrary; std::string convertToPascalCase(const std::string &inString); enum ErrorCode { SUCCESS = 0, OUT_OF_HOST_MEMORY = -6, BUILD_PROGRAM_FAILURE = -11, INVALID_DEVICE = -33, INVALID_PROGRAM = -44, INVALID_COMMAND_LINE = -5150, INVALID_FILE = -5151, PRINT_USAGE = -5152, }; std::string generateFilePath(const std::string &directory, const std::string &fileNameBase, const char *extension); std::string getDevicesTypes(); class OfflineCompiler { public: static OfflineCompiler *create(size_t numArgs, const std::vector &allArgs, bool dumpFiles, int &retVal, OclocArgHelper *helper); int build(); std::string &getBuildLog(); void printUsage(); OfflineCompiler &operator=(const OfflineCompiler &) = delete; OfflineCompiler(const OfflineCompiler &) = delete; ~OfflineCompiler(); bool isQuiet() const { return quiet; } std::string parseBinAsCharArray(uint8_t *binary, size_t size, std::string &fileName); static bool readOptionsFromFile(std::string &optionsOut, const std::string &file, OclocArgHelper *helper); ArrayRef getPackedDeviceBinaryOutput() { return this->elfBinary; } static std::string getFileNameTrunk(std::string &filePath); const HardwareInfo &getHardwareInfo() const { return *hwInfo; } protected: OfflineCompiler(); int getHardwareInfo(const char *pDeviceName); std::string getStringWithinDelimiters(const std::string &src); int initialize(size_t numArgs, const std::vector &allArgs, bool dumpFiles); int parseCommandLine(size_t numArgs, const std::vector &allArgs); void setStatelessToStatefullBufferOffsetFlag(); void resolveExtraSettings(); void parseDebugSettings(); void storeBinary(char *&pDst, size_t &dstSize, const void *pSrc, const size_t srcSize); int buildSourceCode(); void updateBuildLog(const char *pErrorString, const size_t errorStringSize); bool generateElfBinary(); std::string generateFilePathForIr(const std::string &fileNameBase) { const char *ext = (isSpirV) ? ".spv" : ".bc"; return generateFilePath(outputDirectory, fileNameBase, useLlvmText ? ".ll" : ext); } std::string generateOptsSuffix() { std::string suffix{useOptionsSuffix ? options : ""}; std::replace(suffix.begin(), suffix.end(), ' ', '_'); return suffix; } void writeOutAllFiles(); const HardwareInfo *hwInfo = nullptr; std::string deviceName; std::string familyNameWithType; std::string inputFile; std::string outputFile; std::string outputDirectory; std::string options; std::string internalOptions; std::string sourceCode; std::string buildLog; bool dumpFiles = true; bool useLlvmText = false; bool useLlvmBc = false; bool useCppFile = false; bool useOptionsSuffix = false; bool quiet = false; bool inputFileLlvm = false; bool inputFileSpirV = false; bool outputNoSuffix = false; bool forceStatelessToStatefulOptimization = false; std::vector elfBinary; char *genBinary = nullptr; size_t genBinarySize = 0; char *irBinary = nullptr; size_t irBinarySize = 0; bool isSpirV = false; char *debugDataBinary = nullptr; size_t debugDataBinarySize = 0; std::unique_ptr igcLib = nullptr; CIF::RAII::UPtr_t igcMain = nullptr; CIF::RAII::UPtr_t igcDeviceCtx = nullptr; std::unique_ptr fclLib = nullptr; CIF::RAII::UPtr_t fclMain = nullptr; CIF::RAII::UPtr_t fclDeviceCtx = nullptr; IGC::CodeType::CodeType_t preferredIntermediateRepresentation; OclocArgHelper *argHelper = nullptr; }; } // namespace NEO compute-runtime-20.13.16352/shared/offline_compiler/source/offline_compiler_helper.cpp000066400000000000000000000012161363734646600310160ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/utilities/debug_settings_reader_creator.h" namespace NEO { template DebugSettingsManager::DebugSettingsManager(const char *registryPath) { } template DebugSettingsManager::~DebugSettingsManager() = default; // Global Debug Settings Manager DebugSettingsManager DebugManager(""); } // namespace NEO compute-runtime-20.13.16352/shared/offline_compiler/source/offline_compiler_options.cpp000066400000000000000000000005041363734646600312310ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include namespace NEO { // AUB file folder location const char *folderAUB = "aub_out"; // Initial value for HW tag uint32_t initialHardwareTag = std::numeric_limits::max(); } // namespace NEO compute-runtime-20.13.16352/shared/offline_compiler/source/utilities/000077500000000000000000000000001363734646600254525ustar00rootroot00000000000000compute-runtime-20.13.16352/shared/offline_compiler/source/utilities/CMakeLists.txt000066400000000000000000000017631363734646600302210ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(CLOC_LIB_SRCS_UTILITIES ${CMAKE_CURRENT_SOURCE_DIR}/safety_caller.h ${CMAKE_CURRENT_SOURCE_DIR}/get_current_dir.h ${CMAKE_CURRENT_SOURCE_DIR}/get_path.h ) if(WIN32) list(APPEND CLOC_LIB_SRCS_UTILITIES ${CMAKE_CURRENT_SOURCE_DIR}/windows/safety_caller_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/windows/safety_guard_windows.h ${CMAKE_CURRENT_SOURCE_DIR}/windows/seh_exception.cpp ${CMAKE_CURRENT_SOURCE_DIR}/windows/seh_exception.h ${CMAKE_CURRENT_SOURCE_DIR}/windows/get_current_dir_windows.cpp ${CMAKE_CURRENT_SOURCE_DIR}/windows/get_path.cpp ) else() list(APPEND CLOC_LIB_SRCS_UTILITIES ${CMAKE_CURRENT_SOURCE_DIR}/linux/safety_caller_linux.cpp ${CMAKE_CURRENT_SOURCE_DIR}/linux/get_current_dir_linux.cpp ${CMAKE_CURRENT_SOURCE_DIR}/linux/get_path.cpp ) endif() target_sources(ocloc_lib PRIVATE ${CLOC_LIB_SRCS_UTILITIES}) target_sources(ocloc PRIVATE ${CLOC_LIB_SRCS_UTILITIES}) compute-runtime-20.13.16352/shared/offline_compiler/source/utilities/get_current_dir.h000066400000000000000000000002601363734646600310000ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern std::string getCurrentDirectoryOwn(std::string outDirForBuilds);compute-runtime-20.13.16352/shared/offline_compiler/source/utilities/get_path.h000066400000000000000000000002021363734646600274100ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once extern std::string getPath(); compute-runtime-20.13.16352/shared/offline_compiler/source/utilities/linux/000077500000000000000000000000001363734646600266115ustar00rootroot00000000000000compute-runtime-20.13.16352/shared/offline_compiler/source/utilities/linux/get_current_dir_linux.cpp000066400000000000000000000006051363734646600337140ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include std::string getCurrentDirectoryOwn(std::string outDirForBuilds) { char buf[256]; if (getcwd(buf, sizeof(buf)) != NULL) return std::string(buf) + "/" + outDirForBuilds + "/"; else return std::string("./") + outDirForBuilds + "/"; } compute-runtime-20.13.16352/shared/offline_compiler/source/utilities/linux/get_path.cpp000066400000000000000000000010201363734646600311010ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include #include #include #include std::string getPath() { char exepath[128] = {0}; std::stringstream ss; ss << "/proc/" << getpid() << "/exe"; if (readlink(ss.str().c_str(), exepath, 128) != -1) { std::string path = std::string(exepath); path = path.substr(0, path.find_last_of('/') + 1); return path; } else { return std::string(""); } } compute-runtime-20.13.16352/shared/offline_compiler/source/utilities/linux/safety_caller_linux.cpp000066400000000000000000000010411363734646600333450ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/offline_compiler.h" #include "shared/offline_compiler/source/utilities/linux/safety_guard_linux.h" #include "shared/source/os_interface/os_library.h" using namespace NEO; int buildWithSafetyGuard(OfflineCompiler *compiler) { SafetyGuardLinux safetyGuard; int retVal = 0; return safetyGuard.call(compiler, &OfflineCompiler::build, retVal); } compute-runtime-20.13.16352/shared/offline_compiler/source/utilities/linux/safety_guard_linux.h000066400000000000000000000031301363734646600326530ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/abort.h" #include #include #include #include #include static jmp_buf jmpbuf; class SafetyGuardLinux { public: SafetyGuardLinux() { struct sigaction sigact; sigact.sa_sigaction = sigAction; sigact.sa_flags = SA_RESTART | SA_SIGINFO; sigaction(SIGSEGV, &sigact, (struct sigaction *)NULL); sigaction(SIGILL, &sigact, (struct sigaction *)NULL); } static void sigAction(int sigNum, siginfo_t *info, void *ucontext) { const int callstackDepth = 30; void *addresses[callstackDepth]; char **callstack; int backtraceSize = 0; backtraceSize = backtrace(addresses, callstackDepth); callstack = backtrace_symbols(addresses, backtraceSize); for (int i = 0; i < backtraceSize; ++i) { printf("[%d]: %s\n", i, callstack[i]); } free(callstack); longjmp(jmpbuf, 1); } template T call(Object *object, Method method, T retValueOnCrash) { int jump = 0; jump = setjmp(jmpbuf); if (jump == 0) { return (object->*method)(); } else { if (onSigSegv) { onSigSegv(); } else { NEO::abortExecution(); } } return retValueOnCrash; } typedef void (*callbackFunction)(); callbackFunction onSigSegv = nullptr; }; compute-runtime-20.13.16352/shared/offline_compiler/source/utilities/safety_caller.h000066400000000000000000000003221363734646600304350ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once namespace NEO { class OfflineCompiler; } extern int buildWithSafetyGuard(NEO::OfflineCompiler *compiler);compute-runtime-20.13.16352/shared/offline_compiler/source/utilities/windows/000077500000000000000000000000001363734646600271445ustar00rootroot00000000000000get_current_dir_windows.cpp000066400000000000000000000004741363734646600345270ustar00rootroot00000000000000compute-runtime-20.13.16352/shared/offline_compiler/source/utilities/windows/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "Windows.h" #include std::string getCurrentDirectoryOwn(std::string outDirForBuilds) { char buf[256]; GetCurrentDirectoryA(256, buf); return std::string(buf) + "\\" + outDirForBuilds + "\\"; } compute-runtime-20.13.16352/shared/offline_compiler/source/utilities/windows/get_path.cpp000066400000000000000000000002371363734646600314450ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include std::string getPath() { return std::string(""); } safety_caller_windows.cpp000066400000000000000000000007631363734646600341660ustar00rootroot00000000000000compute-runtime-20.13.16352/shared/offline_compiler/source/utilities/windows/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/offline_compiler/source/offline_compiler.h" #include "shared/offline_compiler/source/utilities/windows/safety_guard_windows.h" using namespace NEO; int buildWithSafetyGuard(OfflineCompiler *compiler) { SafetyGuardWindows safetyGuard; int retVal = 0; return safetyGuard.call(compiler, &OfflineCompiler::build, retVal); } compute-runtime-20.13.16352/shared/offline_compiler/source/utilities/windows/safety_guard_windows.h000066400000000000000000000017461363734646600335540ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/offline_compiler/source/utilities/windows/seh_exception.h" #include "shared/source/helpers/abort.h" #include static jmp_buf jmpbuf; class SafetyGuardWindows { public: template T call(Object *object, Method method, T retValueOnCrash) { int jump = 0; jump = setjmp(jmpbuf); if (jump == 0) { __try { return (object->*method)(); } __except (SehException::filter(GetExceptionCode(), GetExceptionInformation())) { if (onExcept) { onExcept(); } else { NEO::abortExecution(); } longjmp(jmpbuf, 1); } } return retValueOnCrash; } typedef void (*callbackFunction)(); callbackFunction onExcept = nullptr; }; compute-runtime-20.13.16352/shared/offline_compiler/source/utilities/windows/seh_exception.cpp000066400000000000000000000114201363734646600325030ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "seh_exception.h" #include "shared/source/os_interface/os_library.h" #include #include #pragma warning(push) #pragma warning(disable : 4091) #include #pragma warning(pop) #include #include #include using namespace std; string SehException::getExceptionDescription(unsigned int code) { switch (code) { case EXCEPTION_ACCESS_VIOLATION: return "Access violation"; case EXCEPTION_DATATYPE_MISALIGNMENT: return "Datatype misalignement"; case EXCEPTION_FLT_DIVIDE_BY_ZERO: case EXCEPTION_INT_DIVIDE_BY_ZERO: return "Divide by zero"; case EXCEPTION_STACK_OVERFLOW: return "Stack overflow"; default: break; } return "Unknown"; } int SehException::filter(unsigned int code, struct _EXCEPTION_POINTERS *ep) { printf("EXCEPTION: %s\n", SehException::getExceptionDescription(code).c_str()); if (code != EXCEPTION_STACK_OVERFLOW) { string callstack; SehException::getCallStack(code, ep, callstack); printf("Callstack:\n\n%s", callstack.c_str()); } return EXCEPTION_EXECUTE_HANDLER; } void SehException::getCallStack(unsigned int code, struct _EXCEPTION_POINTERS *ep, string &stack) { DWORD machine = 0; HANDLE hProcess = GetCurrentProcess(); HANDLE hThread = GetCurrentThread(); SYSTEM_INFO systemInfo; GetSystemInfo(&systemInfo); if (systemInfo.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_INTEL) { machine = IMAGE_FILE_MACHINE_I386; } else if (systemInfo.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_AMD64) { machine = IMAGE_FILE_MACHINE_AMD64; } else { stack = "invalid processor arch"; return; } stack.clear(); BOOL result = SymInitialize(hProcess, NULL, TRUE); if (result == FALSE) { return; } STACKFRAME64 stackFrame; memset(&stackFrame, 0, sizeof(STACKFRAME64)); const int nameSize = 255; char buffer[sizeof(IMAGEHLP_SYMBOL64) + (nameSize + 1) * sizeof(char)]; IMAGEHLP_SYMBOL64 *symbol = reinterpret_cast(buffer); symbol->MaxNameLength = nameSize; DWORD displacement = 0; DWORD64 displacement64 = 0; unique_ptr psApiLib(NEO::OsLibrary::load("psapi.dll")); auto getMappedFileName = reinterpret_cast(psApiLib->getProcAddress("GetMappedFileNameA")); size_t callstackCounter = 0; const size_t maxCallstackDepth = 1000; #ifdef _WIN64 stackFrame.AddrPC.Offset = ep->ContextRecord->Rip; stackFrame.AddrPC.Mode = AddrModeFlat; stackFrame.AddrStack.Offset = ep->ContextRecord->Rsp; stackFrame.AddrStack.Mode = AddrModeFlat; stackFrame.AddrFrame.Offset = ep->ContextRecord->Rbp; stackFrame.AddrFrame.Mode = AddrModeFlat; #else stackFrame.AddrPC.Offset = ep->ContextRecord->Eip; stackFrame.AddrPC.Mode = AddrModeFlat; stackFrame.AddrStack.Offset = ep->ContextRecord->Esp; stackFrame.AddrStack.Mode = AddrModeFlat; stackFrame.AddrFrame.Offset = ep->ContextRecord->Ebp; stackFrame.AddrFrame.Mode = AddrModeFlat; #endif while (callstackCounter < maxCallstackDepth) { symbol->Name[255] = '\0'; if (!StackWalk64(machine, hProcess, hThread, &stackFrame, ep->ContextRecord, nullptr, SymFunctionTableAccess64, SymGetModuleBase64, 0)) { break; } if (stackFrame.AddrFrame.Offset == 0) { break; } string lineInCode; string module; string symbolName; DWORD64 address = stackFrame.AddrPC.Offset; IMAGEHLP_LINE64 imageLine; imageLine.SizeOfStruct = sizeof(IMAGEHLP_LINE64); if (SymGetLineFromAddr64(hProcess, address, &displacement, &imageLine)) { lineInCode = imageLine.FileName; char filename[MAX_PATH + 1]; filename[MAX_PATH] = '\0'; if (getMappedFileName(hProcess, reinterpret_cast(imageLine.Address), filename, MAX_PATH)) { module = filename; } } if (SymGetSymFromAddr64(hProcess, address, &displacement64, symbol)) { symbolName = symbol->Name; } addLineToCallstack(stack, callstackCounter, module, lineInCode, symbolName); callstackCounter++; } } void SehException::addLineToCallstack(std::string &callstack, size_t counter, std::string &module, std::string &line, std::string &symbol) { callstack += "["; callstack += to_string(counter); callstack += "]: "; if (module.size()) { callstack += "Module:"; callstack += module + "\n\t"; } if (line.size()) { callstack += line + ":"; } callstack += symbol + "\n"; } compute-runtime-20.13.16352/shared/offline_compiler/source/utilities/windows/seh_exception.h000066400000000000000000000013021363734646600321460ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "excpt.h" #include "windows.h" #include class SehException { public: static std::string getExceptionDescription(unsigned int code); static void getCallStack(unsigned int code, struct _EXCEPTION_POINTERS *ep, std::string &stack); static int filter(unsigned int code, struct _EXCEPTION_POINTERS *ep); protected: static void addLineToCallstack(std::string &callstack, size_t counter, std::string &module, std::string &line, std::string &symbol); typedef DWORD(WINAPI *getMappedFileNameFunction)(HANDLE hProcess, LPVOID lpv, LPSTR lpFilename, DWORD nSize); }; compute-runtime-20.13.16352/shared/source/000077500000000000000000000000001363734646600201235ustar00rootroot00000000000000compute-runtime-20.13.16352/shared/source/CMakeLists.txt000066400000000000000000000072601363734646600226700ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # if(NOT DEFINED NEO_SHARED_RELEASE_LIB_NAME) MESSAGE(FATAL_ERROR "NEO Core library name undefined!") endif() include(enable_gens.cmake) function(generate_shared_lib LIB_NAME MOCKABLE) add_library(${LIB_NAME} STATIC ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/enable_gens.cmake ${CORE_SOURCES} ) if(${MOCKABLE}) target_compile_definitions(${LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=virtual) else() target_compile_definitions(${LIB_NAME} PUBLIC MOCKABLE_VIRTUAL=) endif() list(APPEND LIB_FLAGS_DEFINITIONS ${SUPPORTED_GEN_FLAGS_DEFINITONS}) set_property(TARGET ${LIB_NAME} APPEND_STRING PROPERTY COMPILE_FLAGS ${ASAN_FLAGS} ${TSAN_FLAGS}) target_compile_definitions(${LIB_NAME} PUBLIC PUBLIC GMM_LIB_DLL ${LIB_FLAGS_DEFINITIONS} DEFAULT_PLATFORM=${DEFAULT_SUPPORTED_PLATFORM} PRIVATE OGL=1) target_compile_definitions(${LIB_NAME} PUBLIC ${NEO_CORE_COMPILE_DEFS}) target_include_directories(${LIB_NAME} PUBLIC ${KMDAF_HEADERS_DIR} ${ENGINE_NODE_DIR} ${NEO__GMM_INCLUDE_DIR} ${CIF_BASE_DIR} ${IGC_OCL_ADAPTOR_DIR} ${NEO__IGC_INCLUDE_DIR} ${KHRONOS_HEADERS_DIR} ${SOURCE_LEVEL_DEBUGGER_HEADERS_DIR} ) if(WIN32) target_include_directories(${LIB_NAME} PUBLIC ${WDK_INCLUDE_PATHS} ${CMAKE_CURRENT_SOURCE_DIR}/os_interface/windows ) else() target_include_directories(${LIB_NAME} PUBLIC ${I915_INCLUDES_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/os_interface/linux ) endif() create_project_source_tree(${LIB_NAME}) endfunction() set(NEO_CORE_COMPILE_DEFS "") set(CORE_SOURCES ${CORE_SRCS_GENX_ALL_BASE}) add_subdirectories() if(WIN32) list(APPEND CORE_SOURCES ${CORE_SRCS_GENX_ALL_WINDOWS}) else() list(APPEND CORE_SOURCES ${CORE_SRCS_GENX_ALL_LINUX}) endif() append_sources_from_properties(CORE_SOURCES NEO_CORE_BUILT_INS NEO_CORE_COMMAND_STREAM NEO_CORE_COMMANDS NEO_CORE_COMMAND_CONTAINER NEO_CORE_COMMAND_ENCODERS NEO_CORE_DEBUG_SETTINGS NEO_CORE_DEBUGGER NEO_CORE_DEVICE NEO_CORE_DIRECT_SUBMISSION NEO_CORE_DIRECT_SUBMISSION_DISPATCHERS NEO_CORE_EXECUTION_ENVIRONMENT NEO_CORE_GEN_COMMON NEO_CORE_GMM_HELPER NEO_CORE_HELPERS NEO_CORE_IMAGE NEO_CORE_INDIRECT_HEAP NEO_CORE_KERNEL NEO_CORE_MEMORY_MANAGER NEO_CORE_OS_INTERFACE NEO_CORE_PAGE_FAULT_MANAGER NEO_CORE_PROGRAM NEO_CORE_SKU_INFO_BASE NEO_CORE_SRCS_BUILT_INS NEO_CORE_SRCS_BUILT_IN_KERNELS NEO_CORE_SRCS_BUILT_INS_OPS NEO_CORE_SRCS_SOURCE_LEVEL_DEBUGGER NEO_CORE_UTILITIES NEO_UNIFIED_MEMORY ) if(WIN32) append_sources_from_properties(CORE_SOURCES NEO_CORE_GMM_HELPER_WINDOWS NEO_CORE_DIRECT_SUBMISSION_WINDOWS NEO_CORE_OS_INTERFACE_WINDOWS NEO_CORE_PAGE_FAULT_MANAGER_WINDOWS NEO_CORE_SKU_INFO_WINDOWS NEO_CORE_SRCS_HELPERS_WINDOWS NEO_CORE_UTILITIES_WINDOWS ) else() append_sources_from_properties(CORE_SOURCES NEO_CORE_DIRECT_SUBMISSION_LINUX NEO_CORE_OS_INTERFACE_LINUX NEO_CORE_PAGE_FAULT_MANAGER_LINUX NEO_CORE_UTILITIES_LINUX ) endif() if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${BRANCH_TYPE}/core_sources.cmake) include(${CMAKE_CURRENT_SOURCE_DIR}/${BRANCH_TYPE}/core_sources.cmake) endif() if(NOT MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fpermissive -fPIC") endif() if(DONT_CARE_OF_VIRTUALS) generate_shared_lib(${NEO_SHARED_RELEASE_LIB_NAME} TRUE) else() generate_shared_lib(${NEO_SHARED_RELEASE_LIB_NAME} FALSE) if(NOT SKIP_UNIT_TESTS) generate_shared_lib(${NEO_SHARED_MOCKABLE_LIB_NAME} TRUE) endif() endif() set_property(GLOBAL PROPERTY NEO_CORE_SRCS_LINK ${CORE_SRCS_LINK}) compute-runtime-20.13.16352/shared/source/built_ins/000077500000000000000000000000001363734646600221135ustar00rootroot00000000000000compute-runtime-20.13.16352/shared/source/built_ins/CMakeLists.txt000066400000000000000000000040241363734646600246530ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(NEO_CORE_SRCS_BUILT_INS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/built_ins_storage.cpp ${CMAKE_CURRENT_SOURCE_DIR}/built_ins.cpp ${CMAKE_CURRENT_SOURCE_DIR}/built_ins.h ${CMAKE_CURRENT_SOURCE_DIR}/built_in_ops_base.h ${CMAKE_CURRENT_SOURCE_DIR}/sip.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sip.h ${CMAKE_CURRENT_SOURCE_DIR}/sip_kernel_type.h ) add_subdirectory(builtinops) set_property(GLOBAL PROPERTY NEO_CORE_SRCS_BUILT_INS ${NEO_CORE_SRCS_BUILT_INS}) set(NEO_CORE_SRCS_BUILT_IN_KERNELS ${CMAKE_CURRENT_SOURCE_DIR}/kernels/aux_translation.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_rect.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_rect_stateless.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_buffer.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_buffer_stateless.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_image3d.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_image3d_stateless.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image3d_to_buffer.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image3d_to_buffer_stateless.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image1d.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image2d.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image3d.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_buffer.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_buffer_stateless.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image1d.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image2d.builtin_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image3d.builtin_kernel ) set_property(GLOBAL PROPERTY NEO_CORE_SRCS_BUILT_IN_KERNELS ${NEO_CORE_SRCS_BUILT_IN_KERNELS}) if(NOT (TARGET ${BUILTINS_BINARIES_LIB_NAME})) include(builtins_binary.cmake) endif() compute-runtime-20.13.16352/shared/source/built_ins/built_in_ops_base.h000066400000000000000000000016641363734646600257530ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { namespace EBuiltInOps { using Type = uint32_t; constexpr Type AuxTranslation{0}; constexpr Type CopyBufferToBuffer{1}; constexpr Type CopyBufferToBufferStateless{2}; constexpr Type CopyBufferRect{3}; constexpr Type CopyBufferRectStateless{4}; constexpr Type FillBuffer{5}; constexpr Type FillBufferStateless{6}; constexpr Type CopyBufferToImage3d{7}; constexpr Type CopyBufferToImage3dStateless{8}; constexpr Type CopyImage3dToBuffer{9}; constexpr Type CopyImage3dToBufferStateless{10}; constexpr Type CopyImageToImage1d{11}; constexpr Type CopyImageToImage2d{12}; constexpr Type CopyImageToImage3d{13}; constexpr Type FillImage1d{14}; constexpr Type FillImage2d{15}; constexpr Type FillImage3d{16}; constexpr Type MaxBaseValue{16}; constexpr Type COUNT{64}; } // namespace EBuiltInOps } // namespace NEO compute-runtime-20.13.16352/shared/source/built_ins/built_ins.cpp000066400000000000000000000041411363734646600246070ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/built_ins/sip.h" #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/debug_helpers.h" #include "opencl/source/helpers/built_ins_helper.h" #include "opencl/source/helpers/convert_color.h" #include "opencl/source/helpers/dispatch_info_builder.h" #include "compiler_options.h" #include #include namespace NEO { BuiltIns::BuiltIns() { builtinsLib.reset(new BuiltinsLib()); } BuiltIns::~BuiltIns() = default; const SipKernel &BuiltIns::getSipKernel(SipKernelType type, Device &device) { uint32_t kernelId = static_cast(type); UNRECOVERABLE_IF(kernelId >= static_cast(SipKernelType::COUNT)); auto &sipBuiltIn = this->sipKernels[kernelId]; auto initializer = [&] { int retVal = 0; std::vector sipBinary; auto compilerInteface = device.getCompilerInterface(); UNRECOVERABLE_IF(compilerInteface == nullptr); auto ret = compilerInteface->getSipKernelBinary(device, type, sipBinary); UNRECOVERABLE_IF(ret != TranslationOutput::ErrorCode::Success); UNRECOVERABLE_IF(sipBinary.size() == 0); auto program = createProgramForSip(*device.getExecutionEnvironment(), nullptr, sipBinary, sipBinary.size(), &retVal, &device); DEBUG_BREAK_IF(retVal != 0); UNRECOVERABLE_IF(program == nullptr); program->setDevice(&device); retVal = program->processGenBinary(); DEBUG_BREAK_IF(retVal != 0); sipBuiltIn.first.reset(new SipKernel(type, program)); }; std::call_once(sipBuiltIn.second, initializer); UNRECOVERABLE_IF(sipBuiltIn.first == nullptr); return *sipBuiltIn.first; } } // namespace NEO compute-runtime-20.13.16352/shared/source/built_ins/built_ins.h000066400000000000000000000131321363734646600242540ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/sip_kernel_type.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/helpers/vec.h" #include "built_in_ops.h" #include "compiler_options.h" #include #include #include #include #include #include #include #include #include namespace NEO { typedef std::vector BuiltinResourceT; class Context; class Device; class Kernel; struct KernelInfo; struct MultiDispatchInfo; class Program; class SchedulerKernel; class SipKernel; static constexpr ConstStringRef mediaKernelsBuildOptionsList[] = { "-D cl_intel_device_side_advanced_vme_enable", "-D cl_intel_device_side_avc_vme_enable", "-D cl_intel_device_side_vme_enable", "-D cl_intel_media_block_io", CompilerOptions::fastRelaxedMath}; static constexpr CompilerOptions::ConstConcatenation<> mediaKernelsBuildOptions{mediaKernelsBuildOptionsList}; BuiltinResourceT createBuiltinResource(const char *ptr, size_t size); BuiltinResourceT createBuiltinResource(const BuiltinResourceT &r); std::string createBuiltinResourceName(EBuiltInOps::Type builtin, const std::string &extension, const std::string &platformName = "", uint32_t deviceRevId = 0); std::string joinPath(const std::string &lhs, const std::string &rhs); const char *getBuiltinAsString(EBuiltInOps::Type builtin); const char *getUnknownBuiltinAsString(EBuiltInOps::Type builtin); const char *getAdditionalBuiltinAsString(EBuiltInOps::Type builtin); class Storage { public: Storage(const std::string &rootPath) : rootPath(rootPath) { } virtual ~Storage() = default; BuiltinResourceT load(const std::string &resourceName); protected: virtual BuiltinResourceT loadImpl(const std::string &fullResourceName) = 0; std::string rootPath; }; class FileStorage : public Storage { public: FileStorage(const std::string &rootPath = "") : Storage(rootPath) { } protected: BuiltinResourceT loadImpl(const std::string &fullResourceName) override; }; struct EmbeddedStorageRegistry { static EmbeddedStorageRegistry &getInstance() { static EmbeddedStorageRegistry gsr; return gsr; } void store(const std::string &name, BuiltinResourceT &&resource) { resources.emplace(name, BuiltinResourceT(std::move(resource))); } const BuiltinResourceT *get(const std::string &name) const; private: using ResourcesContainer = std::unordered_map; ResourcesContainer resources; }; class EmbeddedStorage : public Storage { public: EmbeddedStorage(const std::string &rootPath) : Storage(rootPath) { } protected: BuiltinResourceT loadImpl(const std::string &fullResourceName) override; }; struct BuiltinCode { enum class ECodeType { Any = 0, // for requesting "any" code available - priorities as below Binary = 1, // ISA - highest priority Intermediate = 2, // SPIR/LLVM - medium prioroty Source = 3, // OCL C - lowest priority COUNT, INVALID }; static const char *getExtension(ECodeType ct) { switch (ct) { default: return ""; case ECodeType::Binary: return ".bin"; case ECodeType::Intermediate: return ".bc"; case ECodeType::Source: return ".cl"; } } ECodeType type; BuiltinResourceT resource; Device *targetDevice; }; class BuiltinsLib { public: BuiltinsLib(); BuiltinCode getBuiltinCode(EBuiltInOps::Type builtin, BuiltinCode::ECodeType requestedCodeType, Device &device); static std::unique_ptr createProgramFromCode(const BuiltinCode &bc, Device &device); protected: BuiltinResourceT getBuiltinResource(EBuiltInOps::Type builtin, BuiltinCode::ECodeType requestedCodeType, Device &device); using StoragesContainerT = std::vector>; StoragesContainerT allStorages; // sorted by priority allStorages[0] will be checked before allStorages[1], etc. std::mutex mutex; }; struct BuiltInKernel { const char *pSource = nullptr; Program *pProgram = nullptr; std::once_flag programIsInitialized; // guard for creating+building the program Kernel *pKernel = nullptr; BuiltInKernel() { } }; class BuiltinDispatchInfoBuilder; class BuiltIns { public: std::pair, std::once_flag> BuiltinOpsBuilders[static_cast(EBuiltInOps::COUNT)]; BuiltIns(); virtual ~BuiltIns(); MOCKABLE_VIRTUAL const SipKernel &getSipKernel(SipKernelType type, Device &device); BuiltinsLib &getBuiltinsLib() { DEBUG_BREAK_IF(!builtinsLib.get()); return *builtinsLib; } void setCacheingEnableState(bool enableCacheing) { this->enableCacheing = enableCacheing; } bool isCacheingEnabled() const { return this->enableCacheing; } protected: // sip builtins std::pair, std::once_flag> sipKernels[static_cast(SipKernelType::COUNT)]; std::unique_ptr builtinsLib; using ProgramsContainerT = std::array, std::once_flag>, static_cast(EBuiltInOps::COUNT)>; ProgramsContainerT builtinPrograms; bool enableCacheing = true; }; template class BuiltInOp; } // namespace NEO compute-runtime-20.13.16352/shared/source/built_ins/built_ins_storage.cpp000066400000000000000000000167521363734646600263460ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/built_ins.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "os_inc.h" #include namespace NEO { const char *getBuiltinAsString(EBuiltInOps::Type builtin) { const char *builtinString = getAdditionalBuiltinAsString(builtin); if (builtinString) { return builtinString; } switch (builtin) { default: return getUnknownBuiltinAsString(builtin); case EBuiltInOps::AuxTranslation: return "aux_translation.builtin_kernel"; case EBuiltInOps::CopyBufferToBuffer: return "copy_buffer_to_buffer.builtin_kernel"; case EBuiltInOps::CopyBufferToBufferStateless: return "copy_buffer_to_buffer_stateless.builtin_kernel"; case EBuiltInOps::CopyBufferRect: return "copy_buffer_rect.builtin_kernel"; case EBuiltInOps::CopyBufferRectStateless: return "copy_buffer_rect_stateless.builtin_kernel"; case EBuiltInOps::FillBuffer: return "fill_buffer.builtin_kernel"; case EBuiltInOps::FillBufferStateless: return "fill_buffer_stateless.builtin_kernel"; case EBuiltInOps::CopyBufferToImage3d: return "copy_buffer_to_image3d.builtin_kernel"; case EBuiltInOps::CopyBufferToImage3dStateless: return "copy_buffer_to_image3d_stateless.builtin_kernel"; case EBuiltInOps::CopyImage3dToBuffer: return "copy_image3d_to_buffer.builtin_kernel"; case EBuiltInOps::CopyImage3dToBufferStateless: return "copy_image3d_to_buffer_stateless.builtin_kernel"; case EBuiltInOps::CopyImageToImage1d: return "copy_image_to_image1d.builtin_kernel"; case EBuiltInOps::CopyImageToImage2d: return "copy_image_to_image2d.builtin_kernel"; case EBuiltInOps::CopyImageToImage3d: return "copy_image_to_image3d.builtin_kernel"; case EBuiltInOps::FillImage1d: return "fill_image1d.builtin_kernel"; case EBuiltInOps::FillImage2d: return "fill_image2d.builtin_kernel"; case EBuiltInOps::FillImage3d: return "fill_image3d.builtin_kernel"; }; } BuiltinResourceT createBuiltinResource(const char *ptr, size_t size) { return BuiltinResourceT(ptr, ptr + size); } BuiltinResourceT createBuiltinResource(const BuiltinResourceT &r) { return BuiltinResourceT(r); } std::string createBuiltinResourceName(EBuiltInOps::Type builtin, const std::string &extension, const std::string &platformName, uint32_t deviceRevId) { std::string ret; if (platformName.size() > 0) { ret = platformName; ret += "_" + std::to_string(deviceRevId); ret += "_"; } ret += getBuiltinAsString(builtin); if (extension.size() > 0) { ret += extension; } return ret; } std::string joinPath(const std::string &lhs, const std::string &rhs) { if (lhs.size() == 0) { return rhs; } if (rhs.size() == 0) { return lhs; } if (*lhs.rbegin() == PATH_SEPARATOR) { return lhs + rhs; } return lhs + PATH_SEPARATOR + rhs; } std::string getDriverInstallationPath() { return ""; } BuiltinResourceT Storage::load(const std::string &resourceName) { return loadImpl(joinPath(rootPath, resourceName)); } BuiltinResourceT FileStorage::loadImpl(const std::string &fullResourceName) { BuiltinResourceT ret; std::ifstream f{fullResourceName, std::ios::in | std::ios::binary | std::ios::ate}; auto end = f.tellg(); f.seekg(0, std::ios::beg); auto beg = f.tellg(); auto s = end - beg; ret.resize(static_cast(s)); f.read(ret.data(), s); return ret; } const BuiltinResourceT *EmbeddedStorageRegistry::get(const std::string &name) const { auto it = resources.find(name); if (resources.end() == it) { return nullptr; } return &it->second; } BuiltinResourceT EmbeddedStorage::loadImpl(const std::string &fullResourceName) { auto *constResource = EmbeddedStorageRegistry::getInstance().get(fullResourceName); if (constResource == nullptr) { BuiltinResourceT ret; return ret; } return createBuiltinResource(*constResource); } BuiltinsLib::BuiltinsLib() { allStorages.push_back(std::unique_ptr(new EmbeddedStorage(""))); allStorages.push_back(std::unique_ptr(new FileStorage(getDriverInstallationPath()))); } BuiltinCode BuiltinsLib::getBuiltinCode(EBuiltInOps::Type builtin, BuiltinCode::ECodeType requestedCodeType, Device &device) { std::lock_guard lockRaii{mutex}; BuiltinResourceT bc; BuiltinCode::ECodeType usedCodetType = BuiltinCode::ECodeType::INVALID; if (requestedCodeType == BuiltinCode::ECodeType::Any) { uint32_t codeType = static_cast(BuiltinCode::ECodeType::Binary); if (DebugManager.flags.RebuildPrecompiledKernels.get()) { codeType = static_cast(BuiltinCode::ECodeType::Source); } for (uint32_t e = static_cast(BuiltinCode::ECodeType::COUNT); codeType != e; ++codeType) { bc = getBuiltinResource(builtin, static_cast(codeType), device); if (bc.size() > 0) { usedCodetType = static_cast(codeType); break; } } } else { bc = getBuiltinResource(builtin, requestedCodeType, device); usedCodetType = requestedCodeType; } BuiltinCode ret; std::swap(ret.resource, bc); ret.type = usedCodetType; ret.targetDevice = &device; return ret; } std::unique_ptr BuiltinsLib::createProgramFromCode(const BuiltinCode &bc, Device &device) { std::unique_ptr ret; const char *data = bc.resource.data(); size_t dataLen = bc.resource.size(); cl_int err = 0; switch (bc.type) { default: break; case BuiltinCode::ECodeType::Source: case BuiltinCode::ECodeType::Intermediate: ret.reset(Program::create(data, nullptr, device, true, &err)); break; case BuiltinCode::ECodeType::Binary: ret.reset(Program::createFromGenBinary(*device.getExecutionEnvironment(), nullptr, data, dataLen, true, nullptr, &device)); break; } return ret; } BuiltinResourceT BuiltinsLib::getBuiltinResource(EBuiltInOps::Type builtin, BuiltinCode::ECodeType requestedCodeType, Device &device) { BuiltinResourceT bc; std::string resourceNameGeneric = createBuiltinResourceName(builtin, BuiltinCode::getExtension(requestedCodeType)); std::string resourceNameForPlatformType = createBuiltinResourceName(builtin, BuiltinCode::getExtension(requestedCodeType), getFamilyNameWithType(device.getHardwareInfo())); std::string resourceNameForPlatformTypeAndStepping = createBuiltinResourceName(builtin, BuiltinCode::getExtension(requestedCodeType), getFamilyNameWithType(device.getHardwareInfo()), device.getHardwareInfo().platform.usRevId); for (auto &rn : {resourceNameForPlatformTypeAndStepping, resourceNameForPlatformType, resourceNameGeneric}) { // first look for dedicated version, only fallback to generic one for (auto &s : allStorages) { bc = s.get()->load(rn); if (bc.size() != 0) { return bc; } } } return bc; } } // namespace NEO compute-runtime-20.13.16352/shared/source/built_ins/builtinops/000077500000000000000000000000001363734646600243035ustar00rootroot00000000000000compute-runtime-20.13.16352/shared/source/built_ins/builtinops/CMakeLists.txt000066400000000000000000000005141363734646600270430ustar00rootroot00000000000000# # Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(NEO_CORE_SRCS_BUILT_INS_OPS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/built_in_ops.h ) add_subdirectories() set_property(GLOBAL PROPERTY NEO_CORE_SRCS_BUILT_INS_OPS ${NEO_CORE_SRCS_BUILT_INS_OPS}) compute-runtime-20.13.16352/shared/source/built_ins/builtinops/built_in_ops.h000066400000000000000000000004071363734646600271430ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_in_ops_base.h" namespace NEO { namespace EBuiltInOps { constexpr Type MaxCoreValue{MaxBaseValue}; } } // namespace NEOcompute-runtime-20.13.16352/shared/source/built_ins/builtins_binary.cmake000066400000000000000000000042211363734646600263110ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # add_library(${BUILTINS_BINARIES_LIB_NAME} OBJECT EXCLUDE_FROM_ALL builtins_binary.cmake) # Add builtins sources add_subdirectory(registry) set(GENERATED_BUILTINS "aux_translation" "copy_buffer_rect" "copy_buffer_to_buffer" "copy_buffer_to_image3d" "copy_image3d_to_buffer" "copy_image_to_image1d" "copy_image_to_image2d" "copy_image_to_image3d" "fill_buffer" "fill_image1d" "fill_image2d" "fill_image3d" ) set(GENERATED_BUILTINS_STATELESS "copy_buffer_to_buffer_stateless" "copy_buffer_rect_stateless" "copy_buffer_to_image3d_stateless" "copy_image3d_to_buffer_stateless" "fill_buffer_stateless" ) # Generate builtins cpps if(COMPILE_BUILT_INS) add_subdirectory(kernels) endif() macro(macro_for_each_gen) foreach(PLATFORM_TYPE ${PLATFORM_TYPES}) get_family_name_with_type(${GEN_TYPE} ${PLATFORM_TYPE}) foreach(GENERATED_BUILTIN ${GENERATED_BUILTINS}) list(APPEND GENERATED_BUILTINS_CPPS ${BUILTINS_INCLUDE_DIR}/${RUNTIME_GENERATED_${GENERATED_BUILTIN}_${family_name_with_type}}) endforeach() foreach(GENERATED_BUILTIN_STATELESS ${GENERATED_BUILTINS_STATELESS}) list(APPEND GENERATED_BUILTINS_CPPS ${BUILTINS_INCLUDE_DIR}/${RUNTIME_GENERATED_${GENERATED_BUILTIN_STATELESS}_${family_name_with_type}}) endforeach() endforeach() source_group("generated files\\${GEN_TYPE_LOWER}" FILES ${GENERATED_BUILTINS_CPPS}) endmacro() apply_macro_for_each_gen("SUPPORTED") if(COMPILE_BUILT_INS) target_sources(${BUILTINS_BINARIES_LIB_NAME} PUBLIC ${GENERATED_BUILTINS_CPPS}) set_source_files_properties(${GENERATED_BUILTINS_CPPS} PROPERTIES GENERATED TRUE) endif() set_target_properties(${BUILTINS_BINARIES_LIB_NAME} PROPERTIES LINKER_LANGUAGE CXX) set_target_properties(${BUILTINS_BINARIES_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(${BUILTINS_BINARIES_LIB_NAME} PROPERTIES FOLDER "built_ins") target_include_directories(${BUILTINS_BINARIES_LIB_NAME} PRIVATE ${ENGINE_NODE_DIR} ${KHRONOS_HEADERS_DIR} ${KHRONOS_GL_HEADERS_DIR} ${NEO__GMM_INCLUDE_DIR} ${NEO__IGC_INCLUDE_DIR} ${THIRD_PARTY_DIR} ) compute-runtime-20.13.16352/shared/source/built_ins/kernels/000077500000000000000000000000001363734646600235565ustar00rootroot00000000000000compute-runtime-20.13.16352/shared/source/built_ins/kernels/CMakeLists.txt000066400000000000000000000103311363734646600263140ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # add_custom_target(builtins) set_target_properties(builtins PROPERTIES FOLDER "built_ins") set(BUILTINS_OUTDIR_WITH_ARCH "${TargetDir}/built_ins/${NEO_ARCH}") add_dependencies(${BUILTINS_BINARIES_LIB_NAME} builtins) add_subdirectories() set(GENERATED_BUILTINS ${GENERATED_BUILTINS} PARENT_SCOPE) set(GENERATED_BUILTINS_STATELESS ${GENERATED_BUILTINS_STATELESS} PARENT_SCOPE) if("${NEO_ARCH}" STREQUAL "x32") set(BUILTIN_OPTIONS "-cl-intel-greater-than-4GB-buffer-required") else() set(BUILTIN_OPTIONS "") endif() set(BUILTIN_OPTIONS_STATELESS "-cl-intel-greater-than-4GB-buffer-required" ) if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug" ) list(APPEND __cloc__options__ "-D DEBUG") endif() set(BUILTINS_INCLUDE_DIR ${TargetDir} PARENT_SCOPE) set(BUILTIN_CPP "") function(get_bits_for_stateless gen_type platform_type) # Force 32bits compiling on gen9lp for stateless builtins if( (${GEN_TYPE} STREQUAL "GEN9" ) AND ( ${PLATFORM_TYPE} STREQUAL "LP")) set(BITS "32" PARENT_SCOPE) else() set(BITS ${NEO_BITS} PARENT_SCOPE) endif() endfunction() # Define function for compiling built-ins (with ocloc) function(compile_builtin gen_type platform_type builtin bits builtin_options) string(TOLOWER ${gen_type} gen_type_lower) get_family_name_with_type(${gen_type} ${platform_type}) set(OUTPUTDIR "${BUILTINS_OUTDIR_WITH_ARCH}/${gen_type_lower}") # get filename set(FILENAME ${builtin}) # get name of the file w/o extension get_filename_component(BASENAME ${builtin} NAME_WE) set(OUTPUTPATH_BASE "${OUTPUTDIR}/${BASENAME}_${family_name_with_type}") set(OUTPUT_FILES ${OUTPUTPATH_BASE}.spv ${OUTPUTPATH_BASE}.bin ${OUTPUTPATH_BASE}.cpp ${OUTPUTPATH_BASE}.gen ) # function returns builtin cpp filename unset(BUILTIN_CPP) # set variable outside function set(BUILTIN_CPP built_ins/${NEO_ARCH}/${gen_type_lower}/${BASENAME}_${family_name_with_type}.cpp PARENT_SCOPE) if(NOT DEFINED cloc_cmd_prefix) if(WIN32) set(cloc_cmd_prefix ocloc) else() if(DEFINED NEO__IGC_LIBRARY_PATH) set(cloc_cmd_prefix LD_LIBRARY_PATH=${NEO__IGC_LIBRARY_PATH}:$ $) else() set(cloc_cmd_prefix LD_LIBRARY_PATH=$ $) endif() endif() endif() list(APPEND __cloc__options__ "-cl-kernel-arg-info") add_custom_command( OUTPUT ${OUTPUT_FILES} COMMAND ${cloc_cmd_prefix} -q -file ${FILENAME} -device ${DEFAULT_SUPPORTED_${gen_type}_${platform_type}_PLATFORM} ${builtin_options} -${bits} -out_dir ${OUTPUTDIR} -cpp_file -options "$" WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DEPENDS ${builtin} ocloc copy_compiler_files ) endfunction() macro(macro_for_each_gen) foreach(PLATFORM_TYPE ${PLATFORM_TYPES}) if(${GEN_TYPE}_HAS_${PLATFORM_TYPE}) get_family_name_with_type(${GEN_TYPE} ${PLATFORM_TYPE}) string(TOLOWER ${PLATFORM_TYPE} PLATFORM_TYPE_LOWER) unset(BUILTINS_COMMANDS) foreach(GENERATED_BUILTIN ${GENERATED_BUILTINS}) compile_builtin(${GEN_TYPE} ${PLATFORM_TYPE} ${GENERATED_BUILTIN}.builtin_kernel ${NEO_BITS} "${BUILTIN_OPTIONS}") list(APPEND BUILTINS_COMMANDS ${TargetDir}/${BUILTIN_CPP}) set(RUNTIME_GENERATED_${GENERATED_BUILTIN}_${family_name_with_type} ${BUILTIN_CPP} PARENT_SCOPE) endforeach() get_bits_for_stateless(${GEN_TYPE} ${PLATFORM_TYPE}) foreach(GENERATED_BUILTIN_STATELESS ${GENERATED_BUILTINS_STATELESS}) compile_builtin(${GEN_TYPE} ${PLATFORM_TYPE} ${GENERATED_BUILTIN_STATELESS}.builtin_kernel ${BITS} "${BUILTIN_OPTIONS_STATELESS}") list(APPEND BUILTINS_COMMANDS ${TargetDir}/${BUILTIN_CPP}) set(RUNTIME_GENERATED_${GENERATED_BUILTIN_STATELESS}_${family_name_with_type} ${BUILTIN_CPP} PARENT_SCOPE) endforeach() set(target_name builtins_${family_name_with_type}) add_custom_target(${target_name} DEPENDS ${BUILTINS_COMMANDS}) add_dependencies(builtins ${target_name}) set_target_properties(${target_name} PROPERTIES FOLDER "opencl/source/built_ins/${family_name_with_type}") endif() endforeach() endmacro() apply_macro_for_each_gen("SUPPORTED") compute-runtime-20.13.16352/shared/source/built_ins/kernels/aux_translation.builtin_kernel000066400000000000000000000004361363734646600317240ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel void fullCopy(__global const uint* src, __global uint* dst) { unsigned int gid = get_global_id(0); uint4 loaded = vload4(gid, src); vstore4(loaded, gid, dst); } )===" compute-runtime-20.13.16352/shared/source/built_ins/kernels/copy_buffer_rect.builtin_kernel000066400000000000000000000024111363734646600320240ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( ////////////////////////////////////////////////////////////////////////////// __kernel void CopyBufferRectBytes2d( __global const char* src, __global char* dst, uint4 SrcOrigin, uint4 DstOrigin, uint2 SrcPitch, uint2 DstPitch ) { int x = get_global_id(0); int y = get_global_id(1); uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ); uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ); *( dst + LDstOffset ) = *( src + LSrcOffset ); } ////////////////////////////////////////////////////////////////////////////// __kernel void CopyBufferRectBytes3d( __global const char* src, __global char* dst, uint4 SrcOrigin, uint4 DstOrigin, uint2 SrcPitch, uint2 DstPitch ) { int x = get_global_id(0); int y = get_global_id(1); int z = get_global_id(2); uint LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y ); uint LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y ); *( dst + LDstOffset ) = *( src + LSrcOffset ); } )===" copy_buffer_rect_stateless.builtin_kernel000066400000000000000000000024251363734646600340410ustar00rootroot00000000000000compute-runtime-20.13.16352/shared/source/built_ins/kernels/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( ////////////////////////////////////////////////////////////////////////////// __kernel void CopyBufferRectBytes2d( __global const char* src, __global char* dst, ulong4 SrcOrigin, ulong4 DstOrigin, ulong2 SrcPitch, ulong2 DstPitch ) { size_t x = get_global_id(0); size_t y = get_global_id(1); size_t LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ); size_t LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ); *( dst + LDstOffset ) = *( src + LSrcOffset ); } ////////////////////////////////////////////////////////////////////////////// __kernel void CopyBufferRectBytes3d( __global const char* src, __global char* dst, ulong4 SrcOrigin, ulong4 DstOrigin, ulong2 SrcPitch, ulong2 DstPitch ) { size_t x = get_global_id(0); size_t y = get_global_id(1); size_t z = get_global_id(2); size_t LSrcOffset = x + SrcOrigin.x + ( ( y + SrcOrigin.y ) * SrcPitch.x ) + ( ( z + SrcOrigin.z ) * SrcPitch.y ); size_t LDstOffset = x + DstOrigin.x + ( ( y + DstOrigin.y ) * DstPitch.x ) + ( ( z + DstOrigin.z ) * DstPitch.y ); *( dst + LDstOffset ) = *( src + LSrcOffset ); } )===" compute-runtime-20.13.16352/shared/source/built_ins/kernels/copy_buffer_to_buffer.builtin_kernel000066400000000000000000000053011363734646600330430ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel void CopyBufferToBufferBytes( const __global uchar* pSrc, __global uchar* pDst, uint srcOffsetInBytes, uint dstOffsetInBytes, uint bytesToRead ) { pSrc += ( srcOffsetInBytes + get_global_id(0) ); pDst += ( dstOffsetInBytes + get_global_id(0) ); pDst[ 0 ] = pSrc[ 0 ]; } __kernel void CopyBufferToBufferLeftLeftover( const __global uchar* pSrc, __global uchar* pDst, uint srcOffsetInBytes, uint dstOffsetInBytes) { unsigned int gid = get_global_id(0); pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ]; } __kernel void CopyBufferToBufferMiddle( const __global uint* pSrc, __global uint* pDst, uint srcOffsetInBytes, uint dstOffsetInBytes) { unsigned int gid = get_global_id(0); pDst += dstOffsetInBytes >> 2; pSrc += srcOffsetInBytes >> 2; uint4 loaded = vload4(gid, pSrc); vstore4(loaded, gid, pDst); } __kernel void CopyBufferToBufferRightLeftover( const __global uchar* pSrc, __global uchar* pDst, uint srcOffsetInBytes, uint dstOffsetInBytes) { unsigned int gid = get_global_id(0); pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ]; } __kernel void copyBufferToBufferBytesSingle(__global uchar *dst, const __global uchar *src) { unsigned int gid = get_global_id(0); dst[gid] = (uchar)(src[gid]); } __kernel void CopyBufferToBufferSideRegion( __global uchar* pDst, const __global uchar* pSrc, unsigned int len, uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment uint srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment ) { unsigned int gid = get_global_id(0); __global uchar* pDstWithOffset = (__global uchar*)((__global uchar*)pDst + dstSshOffset); __global uchar* pSrcWithOffset = (__global uchar*)((__global uchar*)pSrc + srcSshOffset); if (gid < len) { pDstWithOffset[ gid ] = pSrcWithOffset[ gid ]; } } __kernel void CopyBufferToBufferMiddleRegion( __global uint* pDst, const __global uint* pSrc, unsigned int elems, uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment uint srcSshOffset // Offset needed in case ptr has been adjusted for SSH alignment ) { unsigned int gid = get_global_id(0); __global uint* pDstWithOffset = (__global uint*)((__global uchar*)pDst + dstSshOffset); __global uint* pSrcWithOffset = (__global uint*)((__global uchar*)pSrc + srcSshOffset); if (gid < elems) { uint4 loaded = vload4(gid, pSrcWithOffset); vstore4(loaded, gid, pDstWithOffset); } } )==="copy_buffer_to_buffer_stateless.builtin_kernel000066400000000000000000000024111363734646600350520ustar00rootroot00000000000000compute-runtime-20.13.16352/shared/source/built_ins/kernels/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel void CopyBufferToBufferBytes( const __global uchar* pSrc, __global uchar* pDst, ulong srcOffsetInBytes, ulong dstOffsetInBytes, ulong bytesToRead ) { pSrc += ( srcOffsetInBytes + get_global_id(0) ); pDst += ( dstOffsetInBytes + get_global_id(0) ); pDst[ 0 ] = pSrc[ 0 ]; } __kernel void CopyBufferToBufferLeftLeftover( const __global uchar* pSrc, __global uchar* pDst, ulong srcOffsetInBytes, ulong dstOffsetInBytes) { size_t gid = get_global_id(0); pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ]; } __kernel void CopyBufferToBufferMiddle( const __global uint* pSrc, __global uint* pDst, ulong srcOffsetInBytes, ulong dstOffsetInBytes) { size_t gid = get_global_id(0); pDst += dstOffsetInBytes >> 2; pSrc += srcOffsetInBytes >> 2; uint4 loaded = vload4(gid, pSrc); vstore4(loaded, gid, pDst); } __kernel void CopyBufferToBufferRightLeftover( const __global uchar* pSrc, __global uchar* pDst, ulong srcOffsetInBytes, ulong dstOffsetInBytes) { size_t gid = get_global_id(0); pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ]; } )==="compute-runtime-20.13.16352/shared/source/built_ins/kernels/copy_buffer_to_image3d.builtin_kernel000066400000000000000000000147501363734646600331130ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( #pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable __kernel void CopyBufferToImage3dBytes(__global uchar *src, __write_only image3d_t output, int srcOffset, int4 dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y); write_imageui(output, dstCoord, (uint4)(*(src + LOffset + x), 0, 0, 1)); } __kernel void CopyBufferToImage3d2Bytes(__global uchar *src, __write_only image3d_t output, int srcOffset, int4 dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = (uint4)(0, 0, 0, 1); if(( ulong )(src + srcOffset) & 0x00000001){ ushort upper = *((__global uchar*)(src + LOffset + x * 2 + 1)); ushort lower = *((__global uchar*)(src + LOffset + x * 2)); ushort combined = (upper << 8) | lower; c.x = (uint)combined; } else{ c.x = (uint)(*(__global ushort*)(src + LOffset + x * 2)); } write_imageui(output, dstCoord, c); } __kernel void CopyBufferToImage3d4Bytes(__global uchar *src, __write_only image3d_t output, int srcOffset, int4 dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = (uint4)(0, 0, 0, 1); if(( ulong )(src + srcOffset) & 0x00000003){ uint upper2 = *((__global uchar*)(src + LOffset + x * 4 + 3)); uint upper = *((__global uchar*)(src + LOffset + x * 4 + 2)); uint lower2 = *((__global uchar*)(src + LOffset + x * 4 + 1)); uint lower = *((__global uchar*)(src + LOffset + x * 4)); uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.x = combined; } else{ c.x = (*(__global uint*)(src + LOffset + x * 4)); } write_imageui(output, dstCoord, c); } __kernel void CopyBufferToImage3d8Bytes(__global uchar *src, __write_only image3d_t output, int srcOffset, int4 dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y); uint2 c = (uint2)(0, 0);//*((__global uint2*)(src + LOffset + x * 8)); if(( ulong )(src + srcOffset) & 0x00000007){ uint upper2 = *((__global uchar*)(src + LOffset + x * 8 + 3)); uint upper = *((__global uchar*)(src + LOffset + x * 8 + 2)); uint lower2 = *((__global uchar*)(src + LOffset + x * 8 + 1)); uint lower = *((__global uchar*)(src + LOffset + x * 8)); uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.x = combined; upper2 = *((__global uchar*)(src + LOffset + x * 8 + 7)); upper = *((__global uchar*)(src + LOffset + x * 8 + 6)); lower2 = *((__global uchar*)(src + LOffset + x * 8 + 5)); lower = *((__global uchar*)(src + LOffset + x * 8 + 4)); combined = ((uint)upper2 << 24) | ((uint)upper << 16) | ((uint)lower2 << 8) | lower; c.y = combined; } else{ c = *((__global uint2*)(src + LOffset + x * 8)); } write_imageui(output, dstCoord, (uint4)(c.x, c.y, 0, 1)); } __kernel void CopyBufferToImage3d16Bytes(__global uchar *src, __write_only image3d_t output, int srcOffset, int4 dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; uint LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = (uint4)(0, 0, 0, 0); if(( ulong )(src + srcOffset) & 0x0000000f){ uint upper2 = *((__global uchar*)(src + LOffset + x * 16 + 3)); uint upper = *((__global uchar*)(src + LOffset + x * 16 + 2)); uint lower2 = *((__global uchar*)(src + LOffset + x * 16 + 1)); uint lower = *((__global uchar*)(src + LOffset + x * 16)); uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.x = combined; upper2 = *((__global uchar*)(src + LOffset + x * 16 + 7)); upper = *((__global uchar*)(src + LOffset + x * 16 + 6)); lower2 = *((__global uchar*)(src + LOffset + x * 16 + 5)); lower = *((__global uchar*)(src + LOffset + x * 16 + 4)); combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.y = combined; upper2 = *((__global uchar*)(src + LOffset + x * 16 + 11)); upper = *((__global uchar*)(src + LOffset + x * 16 + 10)); lower2 = *((__global uchar*)(src + LOffset + x * 16 + 9)); lower = *((__global uchar*)(src + LOffset + x * 16 + 8)); combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.z = combined; upper2 = *((__global uchar*)(src + LOffset + x * 16 + 15)); upper = *((__global uchar*)(src + LOffset + x * 16 + 14)); lower2 = *((__global uchar*)(src + LOffset + x * 16 + 13)); lower = *((__global uchar*)(src + LOffset + x * 16 + 12)); combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.w = combined; } else{ c = *((__global uint4 *)(src + LOffset + x * 16)); } write_imageui(output, dstCoord, c); } )===" copy_buffer_to_image3d_stateless.builtin_kernel000066400000000000000000000147741363734646600351310ustar00rootroot00000000000000compute-runtime-20.13.16352/shared/source/built_ins/kernels/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( #pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable __kernel void CopyBufferToImage3dBytes(__global uchar *src, __write_only image3d_t output, ulong srcOffset, int4 dstOffset, ulong2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y); write_imageui(output, dstCoord, (uint4)(*(src + LOffset + x), 0, 0, 1)); } __kernel void CopyBufferToImage3d2Bytes(__global uchar *src, __write_only image3d_t output, ulong srcOffset, int4 dstOffset, ulong2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = (uint4)(0, 0, 0, 1); if(( ulong )(src + srcOffset) & 0x00000001){ ushort upper = *((__global uchar*)(src + LOffset + x * 2 + 1)); ushort lower = *((__global uchar*)(src + LOffset + x * 2)); ushort combined = (upper << 8) | lower; c.x = (uint)combined; } else{ c.x = (uint)(*(__global ushort*)(src + LOffset + x * 2)); } write_imageui(output, dstCoord, c); } __kernel void CopyBufferToImage3d4Bytes(__global uchar *src, __write_only image3d_t output, ulong srcOffset, int4 dstOffset, ulong2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = (uint4)(0, 0, 0, 1); if(( ulong )(src + srcOffset) & 0x00000003){ uint upper2 = *((__global uchar*)(src + LOffset + x * 4 + 3)); uint upper = *((__global uchar*)(src + LOffset + x * 4 + 2)); uint lower2 = *((__global uchar*)(src + LOffset + x * 4 + 1)); uint lower = *((__global uchar*)(src + LOffset + x * 4)); uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.x = combined; } else{ c.x = (*(__global uint*)(src + LOffset + x * 4)); } write_imageui(output, dstCoord, c); } __kernel void CopyBufferToImage3d8Bytes(__global uchar *src, __write_only image3d_t output, ulong srcOffset, int4 dstOffset, ulong2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y); uint2 c = (uint2)(0, 0);//*((__global uint2*)(src + LOffset + x * 8)); if(( ulong )(src + srcOffset) & 0x00000007){ uint upper2 = *((__global uchar*)(src + LOffset + x * 8 + 3)); uint upper = *((__global uchar*)(src + LOffset + x * 8 + 2)); uint lower2 = *((__global uchar*)(src + LOffset + x * 8 + 1)); uint lower = *((__global uchar*)(src + LOffset + x * 8)); uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.x = combined; upper2 = *((__global uchar*)(src + LOffset + x * 8 + 7)); upper = *((__global uchar*)(src + LOffset + x * 8 + 6)); lower2 = *((__global uchar*)(src + LOffset + x * 8 + 5)); lower = *((__global uchar*)(src + LOffset + x * 8 + 4)); combined = ((uint)upper2 << 24) | ((uint)upper << 16) | ((uint)lower2 << 8) | lower; c.y = combined; } else{ c = *((__global uint2*)(src + LOffset + x * 8)); } write_imageui(output, dstCoord, (uint4)(c.x, c.y, 0, 1)); } __kernel void CopyBufferToImage3d16Bytes(__global uchar *src, __write_only image3d_t output, ulong srcOffset, int4 dstOffset, ulong2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; ulong LOffset = srcOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = (uint4)(0, 0, 0, 0); if(( ulong )(src + srcOffset) & 0x0000000f){ uint upper2 = *((__global uchar*)(src + LOffset + x * 16 + 3)); uint upper = *((__global uchar*)(src + LOffset + x * 16 + 2)); uint lower2 = *((__global uchar*)(src + LOffset + x * 16 + 1)); uint lower = *((__global uchar*)(src + LOffset + x * 16)); uint combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.x = combined; upper2 = *((__global uchar*)(src + LOffset + x * 16 + 7)); upper = *((__global uchar*)(src + LOffset + x * 16 + 6)); lower2 = *((__global uchar*)(src + LOffset + x * 16 + 5)); lower = *((__global uchar*)(src + LOffset + x * 16 + 4)); combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.y = combined; upper2 = *((__global uchar*)(src + LOffset + x * 16 + 11)); upper = *((__global uchar*)(src + LOffset + x * 16 + 10)); lower2 = *((__global uchar*)(src + LOffset + x * 16 + 9)); lower = *((__global uchar*)(src + LOffset + x * 16 + 8)); combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.z = combined; upper2 = *((__global uchar*)(src + LOffset + x * 16 + 15)); upper = *((__global uchar*)(src + LOffset + x * 16 + 14)); lower2 = *((__global uchar*)(src + LOffset + x * 16 + 13)); lower = *((__global uchar*)(src + LOffset + x * 16 + 12)); combined = (upper2 << 24) | (upper << 16) | (lower2 << 8) | lower; c.w = combined; } else{ c = *((__global uint4 *)(src + LOffset + x * 16)); } write_imageui(output, dstCoord, c); } )===" compute-runtime-20.13.16352/shared/source/built_ins/kernels/copy_image3d_to_buffer.builtin_kernel000066400000000000000000000146521363734646600331140ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel void CopyImage3dToBufferBytes(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, int dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = read_imageui(input, srcCoord); *(dst + DstOffset + x) = convert_uchar_sat(c.x); } __kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, int dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = read_imageui(input, srcCoord); if(( ulong )(dst + dstOffset) & 0x00000001){ *((__global uchar*)(dst + DstOffset + x * 2 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 2)) = convert_uchar_sat(c.x & 0xff); } else{ *((__global ushort*)(dst + DstOffset + x * 2)) = convert_ushort_sat(c.x); } } __kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, int dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = read_imageui(input, srcCoord); if(( ulong )(dst + dstOffset) & 0x00000003){ *((__global uchar*)(dst + DstOffset + x * 4 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 4 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 4 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 4)) = convert_uchar_sat(c.x & 0xff); } else{ *((__global uint*)(dst + DstOffset + x * 4)) = c.x; } } __kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, int dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = read_imageui(input, srcCoord); if(( ulong )(dst + dstOffset) & 0x00000007){ *((__global uchar*)(dst + DstOffset + x * 8 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 8 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 8 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 8)) = convert_uchar_sat(c.x & 0xff); *((__global uchar*)(dst + DstOffset + x * 8 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 8 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 8 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 8 + 4)) = convert_uchar_sat(c.y & 0xff); } else{ uint2 d = (uint2)(c.x,c.y); *((__global uint2*)(dst + DstOffset + x * 8)) = d; } } __kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, int dstOffset, uint2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; uint DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); const uint4 c = read_imageui(input, srcCoord); if(( ulong )(dst + dstOffset) & 0x0000000f){ *((__global uchar*)(dst + DstOffset + x * 16 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16)) = convert_uchar_sat(c.x & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 4)) = convert_uchar_sat(c.y & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 11)) = convert_uchar_sat((c.z >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 10)) = convert_uchar_sat((c.z >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 9)) = convert_uchar_sat((c.z >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 8)) = convert_uchar_sat(c.z & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 15)) = convert_uchar_sat((c.w >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 14)) = convert_uchar_sat((c.w >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 13)) = convert_uchar_sat((c.w >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 12)) = convert_uchar_sat(c.w & 0xff); } else{ *(__global uint4*)(dst + DstOffset + x * 16) = c; } } )===" copy_image3d_to_buffer_stateless.builtin_kernel000066400000000000000000000146721363734646600351260ustar00rootroot00000000000000compute-runtime-20.13.16352/shared/source/built_ins/kernels/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel void CopyImage3dToBufferBytes(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, ulong dstOffset, ulong2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = read_imageui(input, srcCoord); *(dst + DstOffset + x) = convert_uchar_sat(c.x); } __kernel void CopyImage3dToBuffer2Bytes(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, ulong dstOffset, ulong2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = read_imageui(input, srcCoord); if(( ulong )(dst + dstOffset) & 0x00000001){ *((__global uchar*)(dst + DstOffset + x * 2 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 2)) = convert_uchar_sat(c.x & 0xff); } else{ *((__global ushort*)(dst + DstOffset + x * 2)) = convert_ushort_sat(c.x); } } __kernel void CopyImage3dToBuffer4Bytes(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, ulong dstOffset, ulong2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = read_imageui(input, srcCoord); if(( ulong )(dst + dstOffset) & 0x00000003){ *((__global uchar*)(dst + DstOffset + x * 4 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 4 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 4 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 4)) = convert_uchar_sat(c.x & 0xff); } else{ *((__global uint*)(dst + DstOffset + x * 4)) = c.x; } } __kernel void CopyImage3dToBuffer8Bytes(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, ulong dstOffset, ulong2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); uint4 c = read_imageui(input, srcCoord); if(( ulong )(dst + dstOffset) & 0x00000007){ *((__global uchar*)(dst + DstOffset + x * 8 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 8 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 8 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 8)) = convert_uchar_sat(c.x & 0xff); *((__global uchar*)(dst + DstOffset + x * 8 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 8 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 8 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 8 + 4)) = convert_uchar_sat(c.y & 0xff); } else{ uint2 d = (uint2)(c.x,c.y); *((__global uint2*)(dst + DstOffset + x * 8)) = d; } } __kernel void CopyImage3dToBuffer16Bytes(__read_only image3d_t input, __global uchar *dst, int4 srcOffset, ulong dstOffset, ulong2 Pitch) { const uint x = get_global_id(0); const uint y = get_global_id(1); const uint z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; ulong DstOffset = dstOffset + (y * Pitch.x) + (z * Pitch.y); const uint4 c = read_imageui(input, srcCoord); if(( ulong )(dst + dstOffset) & 0x0000000f){ *((__global uchar*)(dst + DstOffset + x * 16 + 3)) = convert_uchar_sat((c.x >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 2)) = convert_uchar_sat((c.x >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 1)) = convert_uchar_sat((c.x >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16)) = convert_uchar_sat(c.x & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 7)) = convert_uchar_sat((c.y >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 6)) = convert_uchar_sat((c.y >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 5)) = convert_uchar_sat((c.y >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 4)) = convert_uchar_sat(c.y & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 11)) = convert_uchar_sat((c.z >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 10)) = convert_uchar_sat((c.z >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 9)) = convert_uchar_sat((c.z >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 8)) = convert_uchar_sat(c.z & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 15)) = convert_uchar_sat((c.w >> 24 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 14)) = convert_uchar_sat((c.w >> 16 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 13)) = convert_uchar_sat((c.w >> 8 ) & 0xff); *((__global uchar*)(dst + DstOffset + x * 16 + 12)) = convert_uchar_sat(c.w & 0xff); } else{ *(__global uint4*)(dst + DstOffset + x * 16) = c; } } )===" compute-runtime-20.13.16352/shared/source/built_ins/kernels/copy_image_to_image1d.builtin_kernel000066400000000000000000000007161363734646600327170ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel void CopyImageToImage1d( __read_only image1d_t input, __write_only image1d_t output, int4 srcOffset, int4 dstOffset) { const int x = get_global_id(0); const int srcCoord = x + srcOffset.x; const int dstCoord = x + dstOffset.x; const uint4 c = read_imageui(input, srcCoord); write_imageui(output, dstCoord, c); } )===" compute-runtime-20.13.16352/shared/source/built_ins/kernels/copy_image_to_image2d.builtin_kernel000066400000000000000000000010641363734646600327150ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel void CopyImageToImage2d( __read_only image2d_t input, __write_only image2d_t output, int4 srcOffset, int4 dstOffset) { const int x = get_global_id(0); const int y = get_global_id(1); const int2 srcCoord = (int2)(x, y) + (int2)(srcOffset.x, srcOffset.y); const int2 dstCoord = (int2)(x, y) + (int2)(dstOffset.x, dstOffset.y); const uint4 c = read_imageui(input, srcCoord); write_imageui(output, dstCoord, c); } )===" compute-runtime-20.13.16352/shared/source/built_ins/kernels/copy_image_to_image3d.builtin_kernel000066400000000000000000000011601363734646600327130ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( #pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable __kernel void CopyImageToImage3d( __read_only image3d_t input, __write_only image3d_t output, int4 srcOffset, int4 dstOffset) { const int x = get_global_id(0); const int y = get_global_id(1); const int z = get_global_id(2); const int4 srcCoord = (int4)(x, y, z, 0) + srcOffset; const int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; const uint4 c = read_imageui(input, srcCoord); write_imageui(output, dstCoord, c); } )===" compute-runtime-20.13.16352/shared/source/built_ins/kernels/fill_buffer.builtin_kernel000066400000000000000000000040621363734646600307670ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( // assumption is local work size = pattern size __kernel void FillBufferBytes( __global uchar* pDst, uint dstOffsetInBytes, const __global uchar* pPattern ) { uint dstIndex = get_global_id(0) + dstOffsetInBytes; uint srcIndex = get_local_id(0); pDst[dstIndex] = pPattern[srcIndex]; } __kernel void FillBufferLeftLeftover( __global uchar* pDst, uint dstOffsetInBytes, const __global uchar* pPattern, const uint patternSizeInEls ) { uint gid = get_global_id(0); pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ]; } __kernel void FillBufferMiddle( __global uchar* pDst, uint dstOffsetInBytes, const __global uint* pPattern, const uint patternSizeInEls ) { uint gid = get_global_id(0); ((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ]; } __kernel void FillBufferRightLeftover( __global uchar* pDst, uint dstOffsetInBytes, const __global uchar* pPattern, const uint patternSizeInEls ) { uint gid = get_global_id(0); pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ]; } __kernel void FillBufferImmediate( __global uchar* ptr, uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment const uint value) { uint dstIndex = get_global_id(0); __global uchar* pDst = (__global uchar*)ptr + dstSshOffset; pDst[dstIndex] = value; } __kernel void FillBufferSSHOffset( __global uchar* ptr, uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment const __global uchar* pPattern, uint patternSshOffset // Offset needed in case pPattern has been adjusted for SSH alignment ) { uint dstIndex = get_global_id(0); uint srcIndex = get_local_id(0); __global uchar* pDst = (__global uchar*)ptr + dstSshOffset; __global uchar* pSrc = (__global uchar*)pPattern + patternSshOffset; pDst[dstIndex] = pSrc[srcIndex]; } )==="compute-runtime-20.13.16352/shared/source/built_ins/kernels/fill_buffer_stateless.builtin_kernel000066400000000000000000000023721363734646600330600ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( // assumption is local work size = pattern size __kernel void FillBufferBytes( __global uchar* pDst, ulong dstOffsetInBytes, const __global uchar* pPattern ) { size_t dstIndex = get_global_id(0) + dstOffsetInBytes; size_t srcIndex = get_local_id(0); pDst[dstIndex] = pPattern[srcIndex]; } __kernel void FillBufferLeftLeftover( __global uchar* pDst, ulong dstOffsetInBytes, const __global uchar* pPattern, const ulong patternSizeInEls ) { size_t gid = get_global_id(0); pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ]; } __kernel void FillBufferMiddle( __global uchar* pDst, ulong dstOffsetInBytes, const __global uint* pPattern, const ulong patternSizeInEls ) { size_t gid = get_global_id(0); ((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ]; } __kernel void FillBufferRightLeftover( __global uchar* pDst, ulong dstOffsetInBytes, const __global uchar* pPattern, const ulong patternSizeInEls ) { size_t gid = get_global_id(0); pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ]; } )==="compute-runtime-20.13.16352/shared/source/built_ins/kernels/fill_image1d.builtin_kernel000066400000000000000000000005121363734646600310210ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel void FillImage1d( __write_only image1d_t output, uint4 color, int4 dstOffset) { const int x = get_global_id(0); const int dstCoord = x + dstOffset.x; write_imageui(output, dstCoord, color); } )===" compute-runtime-20.13.16352/shared/source/built_ins/kernels/fill_image2d.builtin_kernel000066400000000000000000000006171363734646600310300ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( __kernel void FillImage2d( __write_only image2d_t output, uint4 color, int4 dstOffset) { const int x = get_global_id(0); const int y = get_global_id(1); const int2 dstCoord = (int2)(x, y) + (int2)(dstOffset.x, dstOffset.y); write_imageui(output, dstCoord, color); } )===" compute-runtime-20.13.16352/shared/source/built_ins/kernels/fill_image3d.builtin_kernel000066400000000000000000000007341363734646600310310ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ R"===( #pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable __kernel void FillImage3d( __write_only image3d_t output, uint4 color, int4 dstOffset) { const int x = get_global_id(0); const int y = get_global_id(1); const int z = get_global_id(2); const int4 dstCoord = (int4)(x, y, z, 0) + dstOffset; write_imageui(output, dstCoord, color); } )===" compute-runtime-20.13.16352/shared/source/built_ins/registry/000077500000000000000000000000001363734646600237635ustar00rootroot00000000000000compute-runtime-20.13.16352/shared/source/built_ins/registry/CMakeLists.txt000066400000000000000000000011731363734646600265250ustar00rootroot00000000000000# # Copyright (C) 2017-2020 Intel Corporation # # SPDX-License-Identifier: MIT # add_library(${BUILTINS_SOURCES_LIB_NAME} OBJECT EXCLUDE_FROM_ALL CMakeLists.txt built_ins_registry.h register_copy_kernels_source.cpp ) set_target_properties(${BUILTINS_SOURCES_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(${BUILTINS_SOURCES_LIB_NAME} PROPERTIES FOLDER "built_ins") target_include_directories(${BUILTINS_SOURCES_LIB_NAME} PRIVATE ${ENGINE_NODE_DIR} ${KHRONOS_HEADERS_DIR} ${KHRONOS_GL_HEADERS_DIR} ${NEO__GMM_INCLUDE_DIR} ${NEO__IGC_INCLUDE_DIR} ${THIRD_PARTY_DIR} ) add_subdirectories() compute-runtime-20.13.16352/shared/source/built_ins/registry/built_ins_registry.h000066400000000000000000000012601363734646600300530ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/built_ins.h" #include #include namespace NEO { struct RegisterEmbeddedResource { RegisterEmbeddedResource(const char *name, const char *resource, size_t resourceLength) { auto &storageRegistry = EmbeddedStorageRegistry::getInstance(); storageRegistry.store(name, createBuiltinResource(resource, resourceLength)); } RegisterEmbeddedResource(const char *name, std::string &&resource) : RegisterEmbeddedResource(name, resource.data(), resource.size() + 1) { } }; } // namespace NEO compute-runtime-20.13.16352/shared/source/built_ins/registry/register_copy_kernels_source.cpp000066400000000000000000000132651363734646600324570ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/registry/built_ins_registry.h" #include namespace NEO { static RegisterEmbeddedResource registerCopyBufferToBufferSrc( createBuiltinResourceName( EBuiltInOps::CopyBufferToBuffer, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/copy_buffer_to_buffer.builtin_kernel" )); static RegisterEmbeddedResource registerCopyBufferToBufferStatelessSrc( createBuiltinResourceName( EBuiltInOps::CopyBufferToBufferStateless, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/copy_buffer_to_buffer_stateless.builtin_kernel" )); static RegisterEmbeddedResource registerCopyBufferRectSrc( createBuiltinResourceName( EBuiltInOps::CopyBufferRect, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/copy_buffer_rect.builtin_kernel" )); static RegisterEmbeddedResource registerCopyBufferRectStatelessSrc( createBuiltinResourceName( EBuiltInOps::CopyBufferRectStateless, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/copy_buffer_rect_stateless.builtin_kernel" )); static RegisterEmbeddedResource registerFillBufferSrc( createBuiltinResourceName( EBuiltInOps::FillBuffer, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/fill_buffer.builtin_kernel" )); static RegisterEmbeddedResource registerFillBufferStatelessSrc( createBuiltinResourceName( EBuiltInOps::FillBufferStateless, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/fill_buffer_stateless.builtin_kernel" )); static RegisterEmbeddedResource registerCopyBufferToImage3dSrc( createBuiltinResourceName( EBuiltInOps::CopyBufferToImage3d, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/copy_buffer_to_image3d.builtin_kernel" )); static RegisterEmbeddedResource registerCopyBufferToImage3dStatelessSrc( createBuiltinResourceName( EBuiltInOps::CopyBufferToImage3dStateless, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/copy_buffer_to_image3d_stateless.builtin_kernel" )); static RegisterEmbeddedResource registerCopyImage3dToBufferSrc( createBuiltinResourceName( EBuiltInOps::CopyImage3dToBuffer, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/copy_image3d_to_buffer.builtin_kernel" )); static RegisterEmbeddedResource registerCopyImage3dToBufferStatelessSrc( createBuiltinResourceName( EBuiltInOps::CopyImage3dToBufferStateless, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/copy_image3d_to_buffer_stateless.builtin_kernel" )); static RegisterEmbeddedResource registerCopyImageToImage1dSrc( createBuiltinResourceName( EBuiltInOps::CopyImageToImage1d, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/copy_image_to_image1d.builtin_kernel" )); static RegisterEmbeddedResource registerCopyImageToImage2dSrc( createBuiltinResourceName( EBuiltInOps::CopyImageToImage2d, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/copy_image_to_image2d.builtin_kernel" )); static RegisterEmbeddedResource registerCopyImageToImage3dSrc( createBuiltinResourceName( EBuiltInOps::CopyImageToImage3d, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/copy_image_to_image3d.builtin_kernel" )); static RegisterEmbeddedResource registerFillImage1dSrc( createBuiltinResourceName( EBuiltInOps::FillImage1d, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/fill_image1d.builtin_kernel" )); static RegisterEmbeddedResource registerFillImage2dSrc( createBuiltinResourceName( EBuiltInOps::FillImage2d, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/fill_image2d.builtin_kernel" )); static RegisterEmbeddedResource registerFillImage3dSrc( createBuiltinResourceName( EBuiltInOps::FillImage3d, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/fill_image3d.builtin_kernel" )); static RegisterEmbeddedResource registerAuxTranslationSrc( createBuiltinResourceName( EBuiltInOps::AuxTranslation, BuiltinCode::getExtension(BuiltinCode::ECodeType::Source)) .c_str(), std::string( #include "shared/source/built_ins/kernels/aux_translation.builtin_kernel" )); } // namespace NEO compute-runtime-20.13.16352/shared/source/built_ins/sip.cpp000066400000000000000000000072611363734646600234200ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/sip.h" #include "shared/source/built_ins/built_ins.h" #include "shared/source/device/device.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/program/kernel_info.h" #include "opencl/source/program/program.h" namespace NEO { const size_t SipKernel::maxDbgSurfaceSize = 0x49c000; // proper value should be taken from compiler when it's ready const char *getSipKernelCompilerInternalOptions(SipKernelType kernel) { switch (kernel) { default: DEBUG_BREAK_IF(true); return ""; case SipKernelType::Csr: return "-cl-include-sip-csr"; case SipKernelType::DbgCsr: return "-cl-include-sip-kernel-debug -cl-include-sip-csr -cl-set-bti:0"; case SipKernelType::DbgCsrLocal: return "-cl-include-sip-kernel-local-debug -cl-include-sip-csr -cl-set-bti:0"; } } const char *getSipLlSrc(const Device &device) { #define M_DUMMY_LL_SRC \ "define void @f() { \n" \ " ret void \n" \ "} \n" \ "!opencl.compiler.options = !{!0} \n" \ "!opencl.kernels = !{!1} \n" \ "!0 = !{} \n" \ "!1 = !{void()* @f, !2, !3, !4, !5, !6, !7} \n" \ "!2 = !{!\"kernel_arg_addr_space\"} \n" \ "!3 = !{!\"kernel_arg_access_qual\"} \n" \ "!4 = !{!\"kernel_arg_type\"} \n" \ "!5 = !{!\"kernel_arg_type_qual\"} \n" \ "!6 = !{!\"kernel_arg_base_type\"} \n" \ "!7 = !{!\"kernel_arg_name\"} \n" constexpr const char *llDummySrc32 = "target datalayout = \"e-p:32:32:32\" \n" "target triple = \"spir\" \n" M_DUMMY_LL_SRC; constexpr const char *llDummySrc64 = "target datalayout = \"e-p:64:64:64\" \n" "target triple = \"spir64\" \n" M_DUMMY_LL_SRC; #undef M_DUMMY_LL_SRC const uint32_t ptrSize = device.getDeviceInfo().force32BitAddressess ? 4 : sizeof(void *); return (ptrSize == 8) ? llDummySrc64 : llDummySrc32; } SipKernel::SipKernel(SipKernelType type, Program *sipProgram) : type(type) { program = sipProgram; } SipKernel::~SipKernel() { program->release(); } GraphicsAllocation *SipKernel::getSipAllocation() const { return program->getKernelInfo(size_t{0})->getGraphicsAllocation(); } const char *SipKernel::getBinary() const { auto kernelInfo = program->getKernelInfo(size_t{0}); return reinterpret_cast(ptrOffset(kernelInfo->heapInfo.pKernelHeap, kernelInfo->systemKernelOffset)); } size_t SipKernel::getBinarySize() const { auto kernelInfo = program->getKernelInfo(size_t{0}); return kernelInfo->heapInfo.pKernelHeader->KernelHeapSize - kernelInfo->systemKernelOffset; } SipKernelType SipKernel::getSipKernelType(GFXCORE_FAMILY family, bool debuggingActive) { auto &hwHelper = HwHelper::get(family); return hwHelper.getSipKernelType(debuggingActive); } GraphicsAllocation *SipKernel::getSipKernelAllocation(Device &device) { auto sipType = SipKernel::getSipKernelType(device.getHardwareInfo().platform.eRenderCoreFamily, device.isDebuggerActive()); return device.getBuiltIns()->getSipKernel(sipType, device).getSipAllocation(); } } // namespace NEO compute-runtime-20.13.16352/shared/source/built_ins/sip.h000066400000000000000000000023161363734646600230610ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/sip_kernel_type.h" #include "shared/source/helpers/hw_info.h" #include namespace NEO { class Device; class Program; class GraphicsAllocation; const char *getSipKernelCompilerInternalOptions(SipKernelType kernel); const char *getSipLlSrc(const Device &device); class SipKernel { public: SipKernel(SipKernelType type, Program *sipProgram); SipKernel(const SipKernel &) = delete; SipKernel &operator=(const SipKernel &) = delete; SipKernel(SipKernel &&) = default; SipKernel &operator=(SipKernel &&) = default; virtual ~SipKernel(); const char *getBinary() const; size_t getBinarySize() const; SipKernelType getType() const { return type; } static const size_t maxDbgSurfaceSize; MOCKABLE_VIRTUAL GraphicsAllocation *getSipAllocation() const; static SipKernelType getSipKernelType(GFXCORE_FAMILY family, bool debuggingActive); static GraphicsAllocation *getSipKernelAllocation(Device &device); protected: SipKernelType type = SipKernelType::COUNT; Program *program = nullptr; }; } // namespace NEO compute-runtime-20.13.16352/shared/source/built_ins/sip_kernel_type.h000066400000000000000000000003771363734646600254670ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { enum class SipKernelType : std::uint32_t { Csr = 0, DbgCsr, DbgCsrLocal, COUNT }; } // namespace NEO compute-runtime-20.13.16352/shared/source/command_container/000077500000000000000000000000001363734646600236035ustar00rootroot00000000000000compute-runtime-20.13.16352/shared/source/command_container/CMakeLists.txt000066400000000000000000000012011363734646600263350ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(NEO_CORE_COMMAND_CONTAINER ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/cmdcontainer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmdcontainer.h ${CMAKE_CURRENT_SOURCE_DIR}/command_encoder.h ${CMAKE_CURRENT_SOURCE_DIR}/command_encoder.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/encode_compute_mode_bdw_plus.inl ${CMAKE_CURRENT_SOURCE_DIR}/encode_compute_mode_tgllp_plus.inl ) add_subdirectories() set_property(GLOBAL PROPERTY NEO_CORE_COMMAND_CONTAINER ${NEO_CORE_COMMAND_CONTAINER}) compute-runtime-20.13.16352/shared/source/command_container/cmdcontainer.cpp000066400000000000000000000204321363734646600267560ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/cmdcontainer.h" #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/device/device.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/heap_helper.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/memory_manager/memory_manager.h" namespace NEO { CommandContainer::~CommandContainer() { if (!device) { DEBUG_BREAK_IF(device); return; } auto memoryManager = device->getMemoryManager(); for (auto *alloc : cmdBufferAllocations) { memoryManager->freeGraphicsMemory(alloc); } for (auto allocationIndirectHeap : allocationIndirectHeaps) { heapHelper->storeHeapAllocation(allocationIndirectHeap); } for (auto deallocation : deallocationContainer) { if (((deallocation->getAllocationType() == GraphicsAllocation::AllocationType::INTERNAL_HEAP) || (deallocation->getAllocationType() == GraphicsAllocation::AllocationType::LINEAR_STREAM))) { getHeapHelper()->storeHeapAllocation(deallocation); } } } bool CommandContainer::initialize(Device *device) { if (!device) { DEBUG_BREAK_IF(device); return false; } this->device = device; heapHelper = std::unique_ptr(new HeapHelper(device->getMemoryManager(), device->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage(), device->getNumAvailableDevices() > 1u)); size_t alignedSize = alignUp(totalCmdBufferSize, MemoryConstants::pageSize64k); AllocationProperties properties{0u, true /* allocateMemory*/, alignedSize, GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY, (device->getNumAvailableDevices() > 1u) /* multiOsContextCapable */, false, {}}; auto cmdBufferAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); UNRECOVERABLE_IF(!cmdBufferAllocation); cmdBufferAllocations.push_back(cmdBufferAllocation); commandStream = std::unique_ptr(new LinearStream(cmdBufferAllocation->getUnderlyingBuffer(), defaultListCmdBufferSize)); commandStream->replaceGraphicsAllocation(cmdBufferAllocation); addToResidencyContainer(cmdBufferAllocation); constexpr size_t heapSize = 65536u; for (uint32_t i = 0; i < IndirectHeap::Type::NUM_TYPES; i++) { allocationIndirectHeaps[i] = heapHelper->getHeapAllocation(i, heapSize, alignedSize, 0u); UNRECOVERABLE_IF(!allocationIndirectHeaps[i]); residencyContainer.push_back(allocationIndirectHeaps[i]); bool requireInternalHeap = (IndirectHeap::INDIRECT_OBJECT == i); indirectHeaps[i] = std::make_unique(allocationIndirectHeaps[i], requireInternalHeap); } instructionHeapBaseAddress = device->getMemoryManager()->getInternalHeapBaseAddress(0); iddBlock = nullptr; nextIddInBlock = this->getNumIddPerBlock(); return true; } void CommandContainer::addToResidencyContainer(GraphicsAllocation *alloc) { if (alloc == nullptr) { return; } this->residencyContainer.push_back(alloc); } void CommandContainer::removeDuplicatesFromResidencyContainer() { std::sort(this->residencyContainer.begin(), this->residencyContainer.end()); this->residencyContainer.erase(std::unique(this->residencyContainer.begin(), this->residencyContainer.end()), this->residencyContainer.end()); } void CommandContainer::reset() { setDirtyStateForAllHeaps(true); slmSize = std::numeric_limits::max(); getResidencyContainer().clear(); getDeallocationContainer().clear(); for (size_t i = 1; i < cmdBufferAllocations.size(); i++) { device->getMemoryManager()->freeGraphicsMemory(cmdBufferAllocations[i]); } cmdBufferAllocations.erase(cmdBufferAllocations.begin() + 1, cmdBufferAllocations.end()); commandStream->replaceBuffer(cmdBufferAllocations[0]->getUnderlyingBuffer(), defaultListCmdBufferSize); addToResidencyContainer(commandStream->getGraphicsAllocation()); for (auto &indirectHeap : indirectHeaps) { indirectHeap->replaceBuffer(indirectHeap->getCpuBase(), indirectHeap->getMaxAvailableSpace()); addToResidencyContainer(indirectHeap->getGraphicsAllocation()); } } void *CommandContainer::getHeapSpaceAllowGrow(HeapType heapType, size_t size) { auto indirectHeap = getIndirectHeap(heapType); if (indirectHeap->getAvailableSpace() < size) { size_t newSize = indirectHeap->getUsed() + indirectHeap->getAvailableSpace(); newSize *= 2; newSize = std::max(newSize, indirectHeap->getAvailableSpace() + size); newSize = alignUp(newSize, MemoryConstants::pageSize); auto oldAlloc = getIndirectHeapAllocation(heapType); auto newAlloc = getHeapHelper()->getHeapAllocation(heapType, newSize, MemoryConstants::pageSize, device->getRootDeviceIndex()); UNRECOVERABLE_IF(!oldAlloc); UNRECOVERABLE_IF(!newAlloc); indirectHeap->replaceGraphicsAllocation(newAlloc); indirectHeap->replaceBuffer(newAlloc->getUnderlyingBuffer(), newAlloc->getUnderlyingBufferSize()); getResidencyContainer().push_back(newAlloc); getDeallocationContainer().push_back(oldAlloc); setIndirectHeapAllocation(heapType, newAlloc); setHeapDirty(heapType); } return indirectHeap->getSpace(size); } IndirectHeap *CommandContainer::getHeapWithRequiredSizeAndAlignment(HeapType heapType, size_t sizeRequired, size_t alignment) { auto indirectHeap = getIndirectHeap(heapType); auto sizeRequested = sizeRequired; auto heapBuffer = indirectHeap->getSpace(0); if (alignment && (heapBuffer != alignUp(heapBuffer, alignment))) { sizeRequested += alignment; } if (indirectHeap->getAvailableSpace() < sizeRequested) { size_t newSize = indirectHeap->getUsed() + indirectHeap->getAvailableSpace(); newSize = alignUp(newSize, MemoryConstants::pageSize); auto oldAlloc = getIndirectHeapAllocation(heapType); auto newAlloc = getHeapHelper()->getHeapAllocation(heapType, newSize, MemoryConstants::pageSize, device->getRootDeviceIndex()); UNRECOVERABLE_IF(!oldAlloc); UNRECOVERABLE_IF(!newAlloc); indirectHeap->replaceGraphicsAllocation(newAlloc); indirectHeap->replaceBuffer(newAlloc->getUnderlyingBuffer(), newAlloc->getUnderlyingBufferSize()); getResidencyContainer().push_back(newAlloc); getDeallocationContainer().push_back(oldAlloc); setIndirectHeapAllocation(heapType, newAlloc); setHeapDirty(heapType); } if (alignment) { indirectHeap->align(alignment); } return indirectHeap; } void CommandContainer::allocateNextCommandBuffer() { size_t alignedSize = alignUp(totalCmdBufferSize, MemoryConstants::pageSize64k); AllocationProperties properties{0u, true /* allocateMemory*/, alignedSize, GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY, (device->getNumAvailableDevices() > 1u) /* multiOsContextCapable */, false, {}}; auto cmdBufferAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); UNRECOVERABLE_IF(!cmdBufferAllocation); cmdBufferAllocations.push_back(cmdBufferAllocation); commandStream->replaceBuffer(cmdBufferAllocation->getUnderlyingBuffer(), defaultListCmdBufferSize); commandStream->replaceGraphicsAllocation(cmdBufferAllocation); addToResidencyContainer(cmdBufferAllocation); } } // namespace NEO compute-runtime-20.13.16352/shared/source/command_container/cmdcontainer.h000066400000000000000000000075541363734646600264350ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/helpers/heap_helper.h" #include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/indirect_heap/indirect_heap.h" #include #include #include #include namespace NEO { class Device; class GraphicsAllocation; class LinearStream; using ResidencyContainer = std::vector; using CmdBufferContainer = std::vector; using HeapType = IndirectHeap::Type; class CommandContainer : public NonCopyableOrMovableClass { public: static constexpr size_t defaultListCmdBufferSize = MemoryConstants::kiloByte * 256; static constexpr size_t totalCmdBufferSize = defaultListCmdBufferSize + MemoryConstants::cacheLineSize + CSRequirements::csOverfetchSize; CommandContainer() { for (auto &indirectHeap : indirectHeaps) { indirectHeap = nullptr; } for (auto &allocationIndirectHeap : allocationIndirectHeaps) { allocationIndirectHeap = nullptr; } } CommandContainer(uint32_t maxNumAggregatedIdds) : CommandContainer() { numIddsPerBlock = maxNumAggregatedIdds; } CmdBufferContainer &getCmdBufferAllocations() { return cmdBufferAllocations; } ResidencyContainer &getResidencyContainer() { return residencyContainer; } std::vector &getDeallocationContainer() { return deallocationContainer; } void addToResidencyContainer(GraphicsAllocation *alloc); void removeDuplicatesFromResidencyContainer(); LinearStream *getCommandStream() { return commandStream.get(); } IndirectHeap *getIndirectHeap(HeapType heapType) { return indirectHeaps[heapType].get(); } HeapHelper *getHeapHelper() { return heapHelper.get(); } GraphicsAllocation *getIndirectHeapAllocation(HeapType heapType) { return allocationIndirectHeaps[heapType]; } void setIndirectHeapAllocation(HeapType heapType, GraphicsAllocation *allocation) { allocationIndirectHeaps[heapType] = allocation; } uint64_t getInstructionHeapBaseAddress() const { return instructionHeapBaseAddress; } void *getHeapSpaceAllowGrow(HeapType heapType, size_t size); bool initialize(Device *device); virtual ~CommandContainer(); uint32_t slmSize = std::numeric_limits::max(); uint32_t nextIddInBlock = 0; uint32_t lastSentNumGrfRequired = 0; Device *getDevice() const { return device; } IndirectHeap *getHeapWithRequiredSizeAndAlignment(HeapType heapType, size_t sizeRequired, size_t alignment); void allocateNextCommandBuffer(); void reset(); bool isHeapDirty(HeapType heapType) const { return (dirtyHeaps & (1u << heapType)); } bool isAnyHeapDirty() const { return dirtyHeaps != 0; } void setHeapDirty(HeapType heapType) { dirtyHeaps |= (1u << heapType); } void setDirtyStateForAllHeaps(bool dirty) { dirtyHeaps = dirty ? std::numeric_limits::max() : 0; } void setIddBlock(void *iddBlock) { this->iddBlock = iddBlock; } void *getIddBlock() { return iddBlock; } uint32_t getNumIddPerBlock() const { return numIddsPerBlock; } protected: void *iddBlock = nullptr; Device *device = nullptr; std::unique_ptr heapHelper; CmdBufferContainer cmdBufferAllocations; GraphicsAllocation *allocationIndirectHeaps[HeapType::NUM_TYPES] = {}; uint64_t instructionHeapBaseAddress = 0u; uint32_t dirtyHeaps = std::numeric_limits::max(); uint32_t numIddsPerBlock = 64; std::unique_ptr commandStream; std::unique_ptr indirectHeaps[HeapType::NUM_TYPES]; ResidencyContainer residencyContainer; std::vector deallocationContainer; }; } // namespace NEO compute-runtime-20.13.16352/shared/source/command_container/command_encoder.h000066400000000000000000000226751363734646600271050ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/cmdcontainer.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/register_offsets.h" #include "shared/source/helpers/simd_helper.h" #include "shared/source/kernel/dispatch_kernel_encoder_interface.h" #include namespace NEO { template struct EncodeDispatchKernel { using WALKER_TYPE = typename GfxFamily::WALKER_TYPE; using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; static void encode(CommandContainer &container, const void *pThreadGroupDimensions, bool isIndirect, bool isPredicate, DispatchKernelEncoderI *dispatchInterface, uint64_t eventAddress, Device *device, PreemptionMode preemptionMode); static void *getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset); static size_t estimateEncodeDispatchKernelCmdsSize(Device *device); }; template struct EncodeStates { using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE; static const uint32_t alignIndirectStatePointer = MemoryConstants::cacheLineSize; static uint32_t copySamplerState(IndirectHeap *dsh, uint32_t samplerStateOffset, uint32_t samplerCount, uint32_t borderColorOffset, const void *fnDynamicStateHeap); static void adjustStateComputeMode(LinearStream &csr, uint32_t numGrfRequired, void *const stateComputeModePtr, bool isMultiOsContextCapable, bool requiresCoherency); static size_t getAdjustStateComputeModeSize(); }; template struct EncodeMath { using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE; using MI_MATH = typename GfxFamily::MI_MATH; static uint32_t *commandReserve(CommandContainer &container); static void greaterThan(CommandContainer &container, AluRegisters firstOperandRegister, AluRegisters secondOperandRegister, AluRegisters finalResultRegister); static void addition(CommandContainer &container, AluRegisters firstOperandRegister, AluRegisters secondOperandRegister, AluRegisters finalResultRegister); }; template struct EncodeMathMMIO { using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE; using MI_MATH = typename GfxFamily::MI_MATH; static const size_t size = sizeof(MI_STORE_REGISTER_MEM); static void encodeMulRegVal(CommandContainer &container, uint32_t offset, uint32_t val, uint64_t dstAddress); static void encodeGreaterThanPredicate(CommandContainer &container, uint64_t lhsVal, uint32_t rhsVal); static void encodeAlu(MI_MATH_ALU_INST_INLINE *pAluParam, AluRegisters srcA, AluRegisters srcB, AluRegisters op, AluRegisters dest, AluRegisters result); static void encodeAluSubStoreCarry(MI_MATH_ALU_INST_INLINE *pAluParam, AluRegisters regA, AluRegisters regB, AluRegisters finalResultRegister); static void encodeAluAdd(MI_MATH_ALU_INST_INLINE *pAluParam, AluRegisters firstOperandRegister, AluRegisters secondOperandRegister, AluRegisters finalResultRegister); }; template struct EncodeIndirectParams { using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; using MI_LOAD_REGISTER_MEM = typename GfxFamily::MI_LOAD_REGISTER_MEM; using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG; using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; using MI_MATH = typename GfxFamily::MI_MATH; using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE; static void setGroupCountIndirect(CommandContainer &container, uint32_t offsets[3], void *crossThreadAddress); static void setGroupSizeIndirect(CommandContainer &container, uint32_t offsets[3], void *crossThreadAddress, uint32_t lws[3]); static size_t getCmdsSizeForIndirectParams(); static size_t getCmdsSizeForSetGroupSizeIndirect(); static size_t getCmdsSizeForSetGroupCountIndirect(); }; template struct EncodeSetMMIO { using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; using MI_LOAD_REGISTER_MEM = typename GfxFamily::MI_LOAD_REGISTER_MEM; using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG; static const size_t sizeIMM = sizeof(MI_LOAD_REGISTER_IMM); static const size_t sizeMEM = sizeof(MI_LOAD_REGISTER_MEM); static const size_t sizeREG = sizeof(MI_LOAD_REGISTER_REG); static void encodeIMM(CommandContainer &container, uint32_t offset, uint32_t data); static void encodeMEM(CommandContainer &container, uint32_t offset, uint64_t address); static void encodeREG(CommandContainer &container, uint32_t dstOffset, uint32_t srcOffset); }; template struct EncodeL3State { static void encode(CommandContainer &container, bool enableSLM); }; template struct EncodeMediaInterfaceDescriptorLoad { using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; static void encode(CommandContainer &container); }; template struct EncodeStateBaseAddress { static void encode(CommandContainer &container); }; template struct EncodeStoreMMIO { using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; static const size_t size = sizeof(MI_STORE_REGISTER_MEM); static void encode(CommandContainer &container, uint32_t offset, uint64_t address); }; template struct EncodeSurfaceState { using R_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; using SURFACE_FORMAT = typename R_SURFACE_STATE::SURFACE_FORMAT; using AUXILIARY_SURFACE_MODE = typename R_SURFACE_STATE::AUXILIARY_SURFACE_MODE; static void encodeBuffer(void *dst, void *address, size_t size, uint32_t mocs, bool cpuCoherent); static constexpr uintptr_t getSurfaceBaseAddressAlignmentMask() { return ~(getSurfaceBaseAddressAlignment() - 1); } static constexpr uintptr_t getSurfaceBaseAddressAlignment() { return 4; } static void getSshAlignedPointer(uintptr_t &ptr, size_t &offset); }; template struct EncodeComputeMode { using STATE_COMPUTE_MODE = typename GfxFamily::STATE_COMPUTE_MODE; static void adjustComputeMode(LinearStream &csr, uint32_t numGrfRequired, void *const stateComputeModePtr, bool isMultiOsContextCapable); static void adjustPipelineSelect(CommandContainer &container, uint32_t numGrfRequired); }; template struct EncodeSempahore { using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; static constexpr uint32_t invalidHardwareTag = -2; static void programMiSemaphoreWait(MI_SEMAPHORE_WAIT *cmd, uint64_t compareAddress, uint32_t compareData, COMPARE_OPERATION compareMode); static void addMiSemaphoreWaitCommand(LinearStream &commandStream, uint64_t compareAddress, uint32_t compareData, COMPARE_OPERATION compareMode); static size_t getSizeMiSemaphoreWait(); }; template struct EncodeAtomic { using MI_ATOMIC = typename GfxFamily::MI_ATOMIC; using ATOMIC_OPCODES = typename GfxFamily::MI_ATOMIC::ATOMIC_OPCODES; using DATA_SIZE = typename GfxFamily::MI_ATOMIC::DATA_SIZE; static void programMiAtomic(MI_ATOMIC *atomic, uint64_t writeAddress, ATOMIC_OPCODES opcode, DATA_SIZE dataSize); }; template struct EncodeBatchBufferStartOrEnd { using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END; static void programBatchBufferStart(LinearStream *commandStream, uint64_t address, bool secondLevel); static void programBatchBufferEnd(CommandContainer &container); }; template struct EncodeMiFlushDW { using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW; static void programMiFlushDw(LinearStream &commandStream, uint64_t immediateDataGpuAddress, uint64_t immediateData); static void programMiFlushDwWA(LinearStream &commandStream); static void appendMiFlushDw(MI_FLUSH_DW *miFlushDwCmd); static size_t getMiFlushDwCmdSizeForDataWrite(); static size_t getMiFlushDwWaSize(); }; } // namespace NEO compute-runtime-20.13.16352/shared/source/command_container/command_encoder.inl000066400000000000000000000450331363734646600274310ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/device/device.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/preamble.h" #include "shared/source/helpers/register_offsets.h" #include "shared/source/helpers/simd_helper.h" #include "shared/source/helpers/string.h" #include "shared/source/kernel/dispatch_kernel_encoder_interface.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include namespace NEO { template uint32_t EncodeStates::copySamplerState(IndirectHeap *dsh, uint32_t samplerStateOffset, uint32_t samplerCount, uint32_t borderColorOffset, const void *fnDynamicStateHeap) { auto sizeSamplerState = sizeof(SAMPLER_STATE) * samplerCount; auto borderColorSize = samplerStateOffset - borderColorOffset; dsh->align(alignIndirectStatePointer); auto borderColorOffsetInDsh = static_cast(dsh->getUsed()); auto borderColor = dsh->getSpace(borderColorSize); memcpy_s(borderColor, borderColorSize, ptrOffset(fnDynamicStateHeap, borderColorOffset), borderColorSize); dsh->align(INTERFACE_DESCRIPTOR_DATA::SAMPLERSTATEPOINTER_ALIGN_SIZE); auto samplerStateOffsetInDsh = static_cast(dsh->getUsed()); auto samplerState = dsh->getSpace(sizeSamplerState); memcpy_s(samplerState, sizeSamplerState, ptrOffset(fnDynamicStateHeap, samplerStateOffset), sizeSamplerState); auto pSmplr = reinterpret_cast(samplerState); for (uint32_t i = 0; i < samplerCount; i++) { pSmplr[i].setIndirectStatePointer((uint32_t)borderColorOffsetInDsh); } return samplerStateOffsetInDsh; } template void EncodeMathMMIO::encodeMulRegVal(CommandContainer &container, uint32_t offset, uint32_t val, uint64_t dstAddress) { int logLws = 0; int i = val; while (val >> logLws) { logLws++; } EncodeSetMMIO::encodeREG(container, CS_GPR_R0, offset); EncodeSetMMIO::encodeIMM(container, CS_GPR_R1, 0); i = 0; while (i < logLws) { if (val & (1 << i)) { EncodeMath::addition(container, AluRegisters::R_1, AluRegisters::R_0, AluRegisters::R_2); EncodeSetMMIO::encodeREG(container, CS_GPR_R1, CS_GPR_R2); } EncodeMath::addition(container, AluRegisters::R_0, AluRegisters::R_0, AluRegisters::R_2); EncodeSetMMIO::encodeREG(container, CS_GPR_R0, CS_GPR_R2); i++; } EncodeStoreMMIO::encode(container, CS_GPR_R1, dstAddress); } /* * Compute *firstOperand > secondOperand and store the result in * MI_PREDICATE_RESULT where firstOperand is an device memory address. * * To calculate the "greater than" operation in the device, * (secondOperand - *firstOperand) is used, and if the carry flag register is * set, then (*firstOperand) is greater than secondOperand. */ template void EncodeMathMMIO::encodeGreaterThanPredicate(CommandContainer &container, uint64_t firstOperand, uint32_t secondOperand) { EncodeSetMMIO::encodeMEM(container, CS_GPR_R0, firstOperand); EncodeSetMMIO::encodeIMM(container, CS_GPR_R1, secondOperand); /* CS_GPR_R* registers map to AluRegisters::R_* registers */ EncodeMath::greaterThan(container, AluRegisters::R_0, AluRegisters::R_1, AluRegisters::R_2); EncodeSetMMIO::encodeREG(container, CS_PREDICATE_RESULT, CS_GPR_R2); } /* * encodeAlu() performs operations that leave a state including the result of * an operation such as the carry flag, and the accu flag with subtraction and * addition result. * * Parameter "postOperationStateRegister" is the ALU register with the result * from the operation that the function caller is interested in obtaining. * * Parameter "finalResultRegister" is the final destination register where * data from "postOperationStateRegister" will be copied. */ template void EncodeMathMMIO::encodeAlu(MI_MATH_ALU_INST_INLINE *pAluParam, AluRegisters srcA, AluRegisters srcB, AluRegisters op, AluRegisters finalResultRegister, AluRegisters postOperationStateRegister) { pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_LOAD); pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_SRCA); pAluParam->DW0.BitField.Operand2 = static_cast(srcA); pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_LOAD); pAluParam->DW0.BitField.Operand1 = static_cast(AluRegisters::R_SRCB); pAluParam->DW0.BitField.Operand2 = static_cast(srcB); pAluParam++; /* Order of operation: Operand1 Operand2 */ pAluParam->DW0.BitField.ALUOpcode = static_cast(op); pAluParam->DW0.BitField.Operand1 = 0; pAluParam->DW0.BitField.Operand2 = 0; pAluParam++; pAluParam->DW0.BitField.ALUOpcode = static_cast(AluRegisters::OPCODE_STORE); pAluParam->DW0.BitField.Operand1 = static_cast(finalResultRegister); pAluParam->DW0.BitField.Operand2 = static_cast(postOperationStateRegister); pAluParam++; } template void EncodeMathMMIO::encodeAluSubStoreCarry(MI_MATH_ALU_INST_INLINE *pAluParam, AluRegisters regA, AluRegisters regB, AluRegisters finalResultRegister) { /* regB is subtracted from regA */ encodeAlu(pAluParam, regA, regB, AluRegisters::OPCODE_SUB, finalResultRegister, AluRegisters::R_CF); } template uint32_t *EncodeMath::commandReserve(CommandContainer &container) { size_t size = sizeof(MI_MATH) + sizeof(MI_MATH_ALU_INST_INLINE) * NUM_ALU_INST_FOR_READ_MODIFY_WRITE; auto cmd = reinterpret_cast(container.getCommandStream()->getSpace(size)); reinterpret_cast(cmd)->DW0.Value = 0x0; reinterpret_cast(cmd)->DW0.BitField.InstructionType = MI_MATH::COMMAND_TYPE_MI_COMMAND; reinterpret_cast(cmd)->DW0.BitField.InstructionOpcode = MI_MATH::MI_COMMAND_OPCODE_MI_MATH; reinterpret_cast(cmd)->DW0.BitField.DwordLength = NUM_ALU_INST_FOR_READ_MODIFY_WRITE - 1; cmd++; return cmd; } /* * greaterThan() tests if firstOperandRegister is greater than * secondOperandRegister. */ template void EncodeMath::greaterThan(CommandContainer &container, AluRegisters firstOperandRegister, AluRegisters secondOperandRegister, AluRegisters finalResultRegister) { uint32_t *cmd = EncodeMath::commandReserve(container); MI_MATH_ALU_INST_INLINE *pAluParam = reinterpret_cast(cmd); /* firstOperandRegister will be subtracted from secondOperandRegister */ EncodeMathMMIO::encodeAluSubStoreCarry(pAluParam, secondOperandRegister, firstOperandRegister, finalResultRegister); } template void EncodeMath::addition(CommandContainer &container, AluRegisters firstOperandRegister, AluRegisters secondOperandRegister, AluRegisters finalResultRegister) { uint32_t *cmd = EncodeMath::commandReserve(container); EncodeMath::MI_MATH_ALU_INST_INLINE *pAluParam = reinterpret_cast(cmd); EncodeMathMMIO::encodeAluAdd(pAluParam, firstOperandRegister, secondOperandRegister, finalResultRegister); } template void EncodeMathMMIO::encodeAluAdd(MI_MATH_ALU_INST_INLINE *pAluParam, AluRegisters firstOperandRegister, AluRegisters secondOperandRegister, AluRegisters finalResultRegister) { encodeAlu(pAluParam, firstOperandRegister, secondOperandRegister, AluRegisters::OPCODE_ADD, finalResultRegister, AluRegisters::R_ACCU); } template void EncodeIndirectParams::setGroupCountIndirect(CommandContainer &container, uint32_t offsets[3], void *crossThreadAddress) { EncodeStoreMMIO::encode(container, GPUGPU_DISPATCHDIMX, ptrOffset(reinterpret_cast(crossThreadAddress), offsets[0])); EncodeStoreMMIO::encode(container, GPUGPU_DISPATCHDIMY, ptrOffset(reinterpret_cast(crossThreadAddress), offsets[1])); EncodeStoreMMIO::encode(container, GPUGPU_DISPATCHDIMZ, ptrOffset(reinterpret_cast(crossThreadAddress), offsets[2])); } template void EncodeIndirectParams::setGroupSizeIndirect(CommandContainer &container, uint32_t offsets[3], void *crossThreadAddress, uint32_t lws[3]) { EncodeMathMMIO::encodeMulRegVal(container, GPUGPU_DISPATCHDIMX, lws[0], ptrOffset(reinterpret_cast(crossThreadAddress), offsets[0])); EncodeMathMMIO::encodeMulRegVal(container, GPUGPU_DISPATCHDIMY, lws[1], ptrOffset(reinterpret_cast(crossThreadAddress), offsets[1])); EncodeMathMMIO::encodeMulRegVal(container, GPUGPU_DISPATCHDIMZ, lws[2], ptrOffset(reinterpret_cast(crossThreadAddress), offsets[2])); } template void EncodeSetMMIO::encodeIMM(CommandContainer &container, uint32_t offset, uint32_t data) { MI_LOAD_REGISTER_IMM cmd = Family::cmdInitLoadRegisterImm; cmd.setRegisterOffset(offset); cmd.setDataDword(data); auto buffer = container.getCommandStream()->getSpace(sizeof(cmd)); *(MI_LOAD_REGISTER_IMM *)buffer = cmd; } template void EncodeSetMMIO::encodeMEM(CommandContainer &container, uint32_t offset, uint64_t address) { MI_LOAD_REGISTER_MEM cmd = Family::cmdInitLoadRegisterMem; cmd.setRegisterAddress(offset); cmd.setMemoryAddress(address); auto buffer = container.getCommandStream()->getSpace(sizeof(cmd)); *(MI_LOAD_REGISTER_MEM *)buffer = cmd; } template void EncodeSetMMIO::encodeREG(CommandContainer &container, uint32_t dstOffset, uint32_t srcOffset) { MI_LOAD_REGISTER_REG cmd = Family::cmdInitLoadRegisterReg; cmd.setSourceRegisterAddress(srcOffset); cmd.setDestinationRegisterAddress(dstOffset); auto buffer = container.getCommandStream()->getSpace(sizeof(cmd)); *(MI_LOAD_REGISTER_REG *)buffer = cmd; } template void EncodeStoreMMIO::encode(CommandContainer &container, uint32_t offset, uint64_t address) { MI_STORE_REGISTER_MEM cmd = Family::cmdInitStoreRegisterMem; cmd.setRegisterAddress(offset); cmd.setMemoryAddress(address); auto buffer = container.getCommandStream()->getSpace(sizeof(cmd)); *(MI_STORE_REGISTER_MEM *)buffer = cmd; } template void EncodeSurfaceState::encodeBuffer(void *dst, void *address, size_t size, uint32_t mocs, bool cpuCoherent) { auto ss = reinterpret_cast(dst); UNRECOVERABLE_IF(!isAligned(size)); SURFACE_STATE_BUFFER_LENGTH Length = {0}; Length.Length = static_cast(size - 1); ss->setWidth(Length.SurfaceState.Width + 1); ss->setHeight(Length.SurfaceState.Height + 1); ss->setDepth(Length.SurfaceState.Depth + 1); ss->setSurfaceType((address != nullptr) ? R_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER : R_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL); ss->setSurfaceFormat(SURFACE_FORMAT::SURFACE_FORMAT_RAW); ss->setSurfaceVerticalAlignment(R_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4); ss->setSurfaceHorizontalAlignment(R_SURFACE_STATE::SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_4); ss->setTileMode(R_SURFACE_STATE::TILE_MODE_LINEAR); ss->setVerticalLineStride(0); ss->setVerticalLineStrideOffset(0); ss->setMemoryObjectControlState(mocs); ss->setSurfaceBaseAddress(reinterpret_cast(address)); ss->setCoherencyType(cpuCoherent ? R_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT : R_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT); ss->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); } template void *EncodeDispatchKernel::getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset) { if (container.nextIddInBlock == container.getNumIddPerBlock()) { container.getIndirectHeap(HeapType::DYNAMIC_STATE)->align(HardwareCommandsHelper::alignInterfaceDescriptorData); container.setIddBlock(container.getHeapSpaceAllowGrow(HeapType::DYNAMIC_STATE, sizeof(INTERFACE_DESCRIPTOR_DATA) * container.getNumIddPerBlock())); container.nextIddInBlock = 0; EncodeMediaInterfaceDescriptorLoad::encode(container); } iddOffset = container.nextIddInBlock; auto interfaceDescriptorData = static_cast(container.getIddBlock()); return &interfaceDescriptorData[container.nextIddInBlock++]; } template size_t EncodeStates::getAdjustStateComputeModeSize() { return 0; } template size_t EncodeIndirectParams::getCmdsSizeForIndirectParams() { return 3 * sizeof(typename Family::MI_LOAD_REGISTER_MEM); } template size_t EncodeIndirectParams::getCmdsSizeForSetGroupCountIndirect() { return 3 * (sizeof(MI_STORE_REGISTER_MEM)); } template size_t EncodeIndirectParams::getCmdsSizeForSetGroupSizeIndirect() { return 3 * (sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_LOAD_REGISTER_IMM) + sizeof(MI_MATH) + sizeof(MI_MATH_ALU_INST_INLINE) + sizeof(MI_STORE_REGISTER_MEM)); } template void EncodeSempahore::programMiSemaphoreWait(MI_SEMAPHORE_WAIT *cmd, uint64_t compareAddress, uint32_t compareData, COMPARE_OPERATION compareMode) { *cmd = Family::cmdInitMiSemaphoreWait; cmd->setCompareOperation(compareMode); cmd->setSemaphoreDataDword(compareData); cmd->setSemaphoreGraphicsAddress(compareAddress); cmd->setWaitMode(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE); } template void EncodeSempahore::addMiSemaphoreWaitCommand(LinearStream &commandStream, uint64_t compareAddress, uint32_t compareData, COMPARE_OPERATION compareMode) { auto semaphoreCommand = commandStream.getSpaceForCmd(); programMiSemaphoreWait(semaphoreCommand, compareAddress, compareData, compareMode); } template size_t EncodeSempahore::getSizeMiSemaphoreWait() { return sizeof(MI_SEMAPHORE_WAIT); } template void EncodeAtomic::programMiAtomic(MI_ATOMIC *atomic, uint64_t writeAddress, ATOMIC_OPCODES opcode, DATA_SIZE dataSize) { *atomic = Family::cmdInitAtomic; atomic->setAtomicOpcode(opcode); atomic->setDataSize(dataSize); atomic->setMemoryAddress(static_cast(writeAddress & 0x0000FFFFFFFFULL)); atomic->setMemoryAddressHigh(static_cast(writeAddress >> 32)); } template void EncodeBatchBufferStartOrEnd::programBatchBufferStart(LinearStream *commandStream, uint64_t address, bool secondLevel) { MI_BATCH_BUFFER_START cmd = Family::cmdInitBatchBufferStart; if (secondLevel) { cmd.setSecondLevelBatchBuffer(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH); } cmd.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT); cmd.setBatchBufferStartAddressGraphicsaddress472(address); auto buffer = commandStream->getSpaceForCmd(); *reinterpret_cast(buffer) = cmd; } template void EncodeBatchBufferStartOrEnd::programBatchBufferEnd(CommandContainer &container) { MI_BATCH_BUFFER_END cmd = Family::cmdInitBatchBufferEnd; auto buffer = container.getCommandStream()->getSpace(sizeof(cmd)); *reinterpret_cast(buffer) = cmd; } template void EncodeSurfaceState::getSshAlignedPointer(uintptr_t &ptr, size_t &offset) { auto sshAlignmentMask = getSurfaceBaseAddressAlignmentMask(); uintptr_t alignedPtr = ptr & sshAlignmentMask; offset = 0; if (ptr != alignedPtr) { offset = ptrDiff(ptr, alignedPtr); ptr = alignedPtr; } } template void EncodeMiFlushDW::programMiFlushDw(LinearStream &commandStream, uint64_t immediateDataGpuAddress, uint64_t immediateData) { programMiFlushDwWA(commandStream); auto miFlushDwCmd = commandStream.getSpaceForCmd(); *miFlushDwCmd = GfxFamily::cmdInitMiFlushDw; miFlushDwCmd->setPostSyncOperation(MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD); miFlushDwCmd->setDestinationAddress(immediateDataGpuAddress); miFlushDwCmd->setImmediateData(immediateData); appendMiFlushDw(miFlushDwCmd); } template size_t EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() { return sizeof(typename GfxFamily::MI_FLUSH_DW) + EncodeMiFlushDW::getMiFlushDwWaSize(); } } // namespace NEO compute-runtime-20.13.16352/shared/source/command_container/command_encoder_base.inl000066400000000000000000000310051363734646600304150ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/simd_helper.h" #include "shared/source/helpers/state_base_address.h" #include "shared/source/kernel/dispatch_kernel_encoder_interface.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include namespace NEO { template void EncodeDispatchKernel::encode(CommandContainer &container, const void *pThreadGroupDimensions, bool isIndirect, bool isPredicate, DispatchKernelEncoderI *dispatchInterface, uint64_t eventAddress, Device *device, PreemptionMode preemptionMode) { using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH; using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename Family::MEDIA_INTERFACE_DESCRIPTOR_LOAD; using MI_BATCH_BUFFER_END = typename Family::MI_BATCH_BUFFER_END; auto sizeCrossThreadData = dispatchInterface->getSizeCrossThreadData(); auto sizePerThreadData = dispatchInterface->getSizePerThreadData(); auto sizePerThreadDataForWholeGroup = dispatchInterface->getSizePerThreadDataForWholeGroup(); LinearStream *listCmdBufferStream = container.getCommandStream(); size_t estimatedSizeRequired = estimateEncodeDispatchKernelCmdsSize(device); if (container.getCommandStream()->getAvailableSpace() < estimatedSizeRequired) { auto bbEnd = listCmdBufferStream->getSpaceForCmd(); *bbEnd = Family::cmdInitBatchBufferEnd; container.allocateNextCommandBuffer(); } WALKER_TYPE cmd = Family::cmdInitGpgpuWalker; auto idd = Family::cmdInitInterfaceDescriptorData; { auto alloc = dispatchInterface->getIsaAllocation(); UNRECOVERABLE_IF(nullptr == alloc); auto offset = alloc->getGpuAddressToPatch(); idd.setKernelStartPointer(offset); idd.setKernelStartPointerHigh(0u); } EncodeStates::adjustStateComputeMode(*container.getCommandStream(), container.lastSentNumGrfRequired, nullptr, false, false); auto threadsPerThreadGroup = dispatchInterface->getThreadsPerThreadGroupCount(); idd.setNumberOfThreadsInGpgpuThreadGroup(threadsPerThreadGroup); idd.setBarrierEnable(dispatchInterface->hasBarriers()); idd.setSharedLocalMemorySize( dispatchInterface->getSlmTotalSize() > 0 ? static_cast(HardwareCommandsHelper::computeSlmValues(dispatchInterface->getSlmTotalSize())) : INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K); { auto bindingTableStateCount = dispatchInterface->getNumSurfaceStates(); uint32_t bindingTablePointer = 0u; if (bindingTableStateCount > 0u) { auto ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, dispatchInterface->getSizeSurfaceStateHeapData(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); bindingTablePointer = static_cast(HardwareCommandsHelper::pushBindingTableAndSurfaceStates( *ssh, bindingTableStateCount, dispatchInterface->getSurfaceStateHeap(), dispatchInterface->getSizeSurfaceStateHeapData(), bindingTableStateCount, dispatchInterface->getBindingTableOffset())); } idd.setBindingTablePointer(bindingTablePointer); auto bindingTableStatePrefetchCount = std::min(31u, bindingTableStateCount); idd.setBindingTableEntryCount(bindingTableStatePrefetchCount); } PreemptionHelper::programInterfaceDescriptorDataPreemption(&idd, preemptionMode); auto heap = container.getIndirectHeap(HeapType::DYNAMIC_STATE); UNRECOVERABLE_IF(!heap); uint32_t samplerStateOffset = 0; uint32_t samplerCount = 0; if (dispatchInterface->getNumSamplers() > 0) { samplerCount = dispatchInterface->getNumSamplers(); samplerStateOffset = EncodeStates::copySamplerState(heap, dispatchInterface->getSamplerTableOffset(), dispatchInterface->getNumSamplers(), dispatchInterface->getBorderColor(), dispatchInterface->getDynamicStateHeap()); } idd.setSamplerStatePointer(samplerStateOffset); auto samplerCountState = static_cast((samplerCount + 3) / 4); idd.setSamplerCount(samplerCountState); auto numGrfCrossThreadData = static_cast(sizeCrossThreadData / sizeof(float[8])); DEBUG_BREAK_IF(numGrfCrossThreadData <= 0u); idd.setCrossThreadConstantDataReadLength(numGrfCrossThreadData); auto numGrfPerThreadData = static_cast(sizePerThreadData / sizeof(float[8])); DEBUG_BREAK_IF(numGrfPerThreadData <= 0u); idd.setConstantIndirectUrbEntryReadLength(numGrfPerThreadData); uint32_t sizeThreadData = sizePerThreadDataForWholeGroup + sizeCrossThreadData; uint64_t offsetThreadData = 0u; { auto heapIndirect = container.getIndirectHeap(HeapType::INDIRECT_OBJECT); UNRECOVERABLE_IF(!(heapIndirect)); heapIndirect->align(WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); auto ptr = container.getHeapSpaceAllowGrow(HeapType::INDIRECT_OBJECT, sizeThreadData); UNRECOVERABLE_IF(!(ptr)); offsetThreadData = heapIndirect->getHeapGpuStartOffset() + static_cast(heapIndirect->getUsed() - sizeThreadData); memcpy_s(ptr, sizeCrossThreadData, dispatchInterface->getCrossThread(), sizeCrossThreadData); if (isIndirect) { void *gpuPtr = reinterpret_cast(heapIndirect->getHeapGpuBase() + heapIndirect->getUsed() - sizeThreadData); if (dispatchInterface->hasGroupCounts()) { EncodeIndirectParams::setGroupCountIndirect(container, dispatchInterface->getCountOffsets(), gpuPtr); } if (dispatchInterface->hasGroupSize()) { EncodeIndirectParams::setGroupSizeIndirect(container, dispatchInterface->getSizeOffsets(), gpuPtr, dispatchInterface->getLocalWorkSize()); } } ptr = ptrOffset(ptr, sizeCrossThreadData); memcpy_s(ptr, sizePerThreadDataForWholeGroup, dispatchInterface->getPerThread(), sizePerThreadDataForWholeGroup); } auto slmSizeNew = dispatchInterface->getSlmTotalSize(); bool flush = container.slmSize != slmSizeNew || container.isAnyHeapDirty(); if (flush) { MemorySynchronizationCommands::addPipeControl(*container.getCommandStream(), true); if (container.slmSize != slmSizeNew) { EncodeL3State::encode(container, slmSizeNew != 0u); container.slmSize = slmSizeNew; if (container.nextIddInBlock != container.getNumIddPerBlock()) { EncodeMediaInterfaceDescriptorLoad::encode(container); } } if (container.isAnyHeapDirty()) { EncodeStateBaseAddress::encode(container); container.setDirtyStateForAllHeaps(false); } } uint32_t numIDD = 0u; void *ptr = getInterfaceDescriptor(container, numIDD); memcpy_s(ptr, sizeof(idd), &idd, sizeof(idd)); cmd.setIndirectDataStartAddress(static_cast(offsetThreadData)); cmd.setIndirectDataLength(sizeThreadData); cmd.setInterfaceDescriptorOffset(numIDD); if (isIndirect) { cmd.setIndirectParameterEnable(true); } else { UNRECOVERABLE_IF(!pThreadGroupDimensions); auto threadDims = static_cast(pThreadGroupDimensions); cmd.setThreadGroupIdXDimension(threadDims[0]); cmd.setThreadGroupIdYDimension(threadDims[1]); cmd.setThreadGroupIdZDimension(threadDims[2]); } auto simdSize = dispatchInterface->getSimdSize(); auto simdSizeOp = getSimdConfig(simdSize); cmd.setSimdSize(simdSizeOp); cmd.setRightExecutionMask(dispatchInterface->getPerThreadExecutionMask()); cmd.setBottomExecutionMask(0xffffffff); cmd.setThreadWidthCounterMaximum(threadsPerThreadGroup); cmd.setPredicateEnable(isPredicate); PreemptionHelper::applyPreemptionWaCmdsBegin(listCmdBufferStream, *device); auto buffer = listCmdBufferStream->getSpace(sizeof(cmd)); *(decltype(cmd) *)buffer = cmd; PreemptionHelper::applyPreemptionWaCmdsEnd(listCmdBufferStream, *device); { auto mediaStateFlush = listCmdBufferStream->getSpace(sizeof(MEDIA_STATE_FLUSH)); *reinterpret_cast(mediaStateFlush) = Family::cmdInitMediaStateFlush; } } template void EncodeMediaInterfaceDescriptorLoad::encode(CommandContainer &container) { using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH; using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename Family::MEDIA_INTERFACE_DESCRIPTOR_LOAD; auto heap = container.getIndirectHeap(HeapType::DYNAMIC_STATE); auto mediaStateFlush = container.getCommandStream()->getSpace(sizeof(MEDIA_STATE_FLUSH)); *reinterpret_cast(mediaStateFlush) = Family::cmdInitMediaStateFlush; MEDIA_INTERFACE_DESCRIPTOR_LOAD cmd = Family::cmdInitMediaInterfaceDescriptorLoad; cmd.setInterfaceDescriptorDataStartAddress(static_cast(ptrDiff(container.getIddBlock(), heap->getCpuBase()))); cmd.setInterfaceDescriptorTotalLength(sizeof(INTERFACE_DESCRIPTOR_DATA) * container.getNumIddPerBlock()); auto buffer = container.getCommandStream()->getSpace(sizeof(cmd)); *(decltype(cmd) *)buffer = cmd; } template void EncodeStateBaseAddress::encode(CommandContainer &container) { auto gmmHelper = container.getDevice()->getGmmHelper(); StateBaseAddressHelper::programStateBaseAddress( *container.getCommandStream(), container.isHeapDirty(HeapType::DYNAMIC_STATE) ? container.getIndirectHeap(HeapType::DYNAMIC_STATE) : nullptr, container.isHeapDirty(HeapType::INDIRECT_OBJECT) ? container.getIndirectHeap(HeapType::INDIRECT_OBJECT) : nullptr, container.isHeapDirty(HeapType::SURFACE_STATE) ? container.getIndirectHeap(HeapType::SURFACE_STATE) : nullptr, 0, false, (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1), container.getInstructionHeapBaseAddress(), false, gmmHelper, false); } template void EncodeL3State::encode(CommandContainer &container, bool enableSLM) { auto offset = L3CNTLRegisterOffset::registerOffset; auto data = PreambleHelper::getL3Config(container.getDevice()->getHardwareInfo(), enableSLM); EncodeSetMMIO::encodeIMM(container, offset, data); } template size_t EncodeDispatchKernel::estimateEncodeDispatchKernelCmdsSize(Device *device) { using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH; using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename Family::MEDIA_INTERFACE_DESCRIPTOR_LOAD; using MI_BATCH_BUFFER_END = typename Family::MI_BATCH_BUFFER_END; size_t issueMediaInterfaceDescriptorLoad = sizeof(MEDIA_STATE_FLUSH) + sizeof(MEDIA_INTERFACE_DESCRIPTOR_LOAD); size_t totalSize = sizeof(WALKER_TYPE); totalSize += PreemptionHelper::getPreemptionWaCsSize(*device); totalSize += sizeof(MEDIA_STATE_FLUSH); totalSize += issueMediaInterfaceDescriptorLoad; totalSize += EncodeStates::getAdjustStateComputeModeSize(); totalSize += EncodeIndirectParams::getCmdsSizeForIndirectParams(); totalSize += EncodeIndirectParams::getCmdsSizeForSetGroupCountIndirect(); totalSize += EncodeIndirectParams::getCmdsSizeForSetGroupSizeIndirect(); totalSize += sizeof(MI_BATCH_BUFFER_END); return totalSize; } template void EncodeMiFlushDW::appendMiFlushDw(MI_FLUSH_DW *miFlushDwCmd) {} template void EncodeMiFlushDW::programMiFlushDwWA(LinearStream &commandStream) {} template size_t EncodeMiFlushDW::getMiFlushDwWaSize() { return 0; } } // namespace NEO compute-runtime-20.13.16352/shared/source/command_container/encode_compute_mode_bdw_plus.inl000066400000000000000000000007171363734646600322100ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/linear_stream.h" namespace NEO { template void EncodeStates::adjustStateComputeMode(LinearStream &csr, uint32_t numGrfRequired, void *const stateComputeModePtr, bool isMultiOsContextCapable, bool requiresCoherency) { } } // namespace NEOcompute-runtime-20.13.16352/shared/source/command_container/encode_compute_mode_tgllp_plus.inl000066400000000000000000000023411363734646600325510ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/linear_stream.h" namespace NEO { template void EncodeStates::adjustStateComputeMode(LinearStream &csr, uint32_t numGrfRequired, void *const stateComputeModePtr, bool isMultiOsContextCapable, bool requiresCoherency) { using STATE_COMPUTE_MODE = typename Family::STATE_COMPUTE_MODE; using FORCE_NON_COHERENT = typename STATE_COMPUTE_MODE::FORCE_NON_COHERENT; STATE_COMPUTE_MODE stateComputeMode = (stateComputeModePtr != nullptr) ? *(static_cast(stateComputeModePtr)) : Family::cmdInitStateComputeMode; FORCE_NON_COHERENT coherencyValue = !requiresCoherency ? FORCE_NON_COHERENT::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT : FORCE_NON_COHERENT::FORCE_NON_COHERENT_FORCE_DISABLED; stateComputeMode.setForceNonCoherent(coherencyValue); stateComputeMode.setMaskBits(stateComputeMode.getMaskBits() | Family::stateComputeModeForceNonCoherentMask); EncodeComputeMode::adjustComputeMode(csr, numGrfRequired, &stateComputeMode, isMultiOsContextCapable); } } // namespace NEOcompute-runtime-20.13.16352/shared/source/command_stream/000077500000000000000000000000001363734646600231145ustar00rootroot00000000000000compute-runtime-20.13.16352/shared/source/command_stream/CMakeLists.txt000066400000000000000000000034701363734646600256600ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(NEO_CORE_COMMAND_STREAM ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_subcapture_status.h ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver.h ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_bdw_plus.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tgllp_plus.inl ${CMAKE_CURRENT_SOURCE_DIR}/csr_definitions.h ${CMAKE_CURRENT_SOURCE_DIR}/csr_deps.cpp ${CMAKE_CURRENT_SOURCE_DIR}/csr_deps.h ${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/command_stream_receiver_hw_ext.inl ${CMAKE_CURRENT_SOURCE_DIR}/device_command_stream.h ${CMAKE_CURRENT_SOURCE_DIR}/experimental_command_buffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/experimental_command_buffer.h ${CMAKE_CURRENT_SOURCE_DIR}/experimental_command_buffer.inl ${CMAKE_CURRENT_SOURCE_DIR}/linear_stream.cpp ${CMAKE_CURRENT_SOURCE_DIR}/linear_stream.h ${CMAKE_CURRENT_SOURCE_DIR}/preemption_mode.h ${CMAKE_CURRENT_SOURCE_DIR}/preemption.cpp ${CMAKE_CURRENT_SOURCE_DIR}/preemption.h ${CMAKE_CURRENT_SOURCE_DIR}/preemption.inl ${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller.cpp ${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller.h ${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller_base.cpp ${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller_base.h ${CMAKE_CURRENT_SOURCE_DIR}/submissions_aggregator.cpp ${CMAKE_CURRENT_SOURCE_DIR}/submissions_aggregator.h ${CMAKE_CURRENT_SOURCE_DIR}/thread_arbitration_policy.h ) set_property(GLOBAL PROPERTY NEO_CORE_COMMAND_STREAM ${NEO_CORE_COMMAND_STREAM}) add_subdirectories()compute-runtime-20.13.16352/shared/source/command_stream/aub_subcapture_status.h000066400000000000000000000003401363734646600276710ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once namespace NEO { struct AubSubCaptureStatus { bool isActive; bool wasActiveInPreviousEnqueue; }; } // namespace NEO compute-runtime-20.13.16352/shared/source/command_stream/command_stream_receiver.cpp000066400000000000000000000525231363734646600305040ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/built_ins/built_ins.h" #include "shared/source/command_stream/experimental_command_buffer.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/command_stream/scratch_space_controller.h" #include "shared/source/device/device.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/array_count.h" #include "shared/source/helpers/cache_policy.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/string.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/utilities/cpuintrinsics.h" #include "shared/source/utilities/tag_allocator.h" namespace NEO { // Global table of CommandStreamReceiver factories for HW and tests CommandStreamReceiverCreateFunc commandStreamReceiverFactory[2 * IGFX_MAX_CORE] = {}; CommandStreamReceiver::CommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) : executionEnvironment(executionEnvironment), rootDeviceIndex(rootDeviceIndex) { residencyAllocations.reserve(20); latestSentStatelessMocsConfig = CacheSettings::unknownMocs; submissionAggregator.reset(new SubmissionAggregator()); if (DebugManager.flags.CsrDispatchMode.get()) { this->dispatchMode = (DispatchMode)DebugManager.flags.CsrDispatchMode.get(); } flushStamp.reset(new FlushStampTracker(true)); for (int i = 0; i < IndirectHeap::NUM_TYPES; ++i) { indirectHeap[i] = nullptr; } internalAllocationStorage = std::make_unique(*this); } CommandStreamReceiver::~CommandStreamReceiver() { for (int i = 0; i < IndirectHeap::NUM_TYPES; ++i) { if (indirectHeap[i] != nullptr) { auto allocation = indirectHeap[i]->getGraphicsAllocation(); if (allocation != nullptr) { internalAllocationStorage->storeAllocation(std::unique_ptr(allocation), REUSABLE_ALLOCATION); } delete indirectHeap[i]; } } cleanupResources(); internalAllocationStorage->cleanAllocationList(-1, REUSABLE_ALLOCATION); internalAllocationStorage->cleanAllocationList(-1, TEMPORARY_ALLOCATION); getMemoryManager()->unregisterEngineForCsr(this); } bool CommandStreamReceiver::submitBatchBuffer(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) { this->latestFlushedTaskCount = taskCount + 1; this->latestSentTaskCount = taskCount + 1; auto ret = this->flush(batchBuffer, allocationsForResidency); taskCount++; return ret; } void CommandStreamReceiver::makeResident(GraphicsAllocation &gfxAllocation) { auto submissionTaskCount = this->taskCount + 1; if (gfxAllocation.isResidencyTaskCountBelow(submissionTaskCount, osContext->getContextId())) { this->getResidencyAllocations().push_back(&gfxAllocation); gfxAllocation.updateTaskCount(submissionTaskCount, osContext->getContextId()); if (!gfxAllocation.isResident(osContext->getContextId())) { this->totalMemoryUsed += gfxAllocation.getUnderlyingBufferSize(); } } gfxAllocation.updateResidencyTaskCount(submissionTaskCount, osContext->getContextId()); } void CommandStreamReceiver::processEviction() { this->getEvictionAllocations().clear(); } void CommandStreamReceiver::makeNonResident(GraphicsAllocation &gfxAllocation) { if (gfxAllocation.isResident(osContext->getContextId())) { if (gfxAllocation.peekEvictable()) { this->getEvictionAllocations().push_back(&gfxAllocation); } else { gfxAllocation.setEvictable(true); } } gfxAllocation.releaseResidencyInOsContext(this->osContext->getContextId()); } void CommandStreamReceiver::makeSurfacePackNonResident(ResidencyContainer &allocationsForResidency) { for (auto &surface : allocationsForResidency) { this->makeNonResident(*surface); } allocationsForResidency.clear(); this->processEviction(); } void CommandStreamReceiver::makeResidentHostPtrAllocation(GraphicsAllocation *gfxAllocation) { makeResident(*gfxAllocation); } void CommandStreamReceiver::waitForTaskCountAndCleanAllocationList(uint32_t requiredTaskCount, uint32_t allocationUsage) { auto address = getTagAddress(); if (address) { while (*address < requiredTaskCount) ; } internalAllocationStorage->cleanAllocationList(requiredTaskCount, allocationUsage); } void CommandStreamReceiver::waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount) { waitForTaskCountAndCleanAllocationList(requiredTaskCount, TEMPORARY_ALLOCATION); }; void CommandStreamReceiver::ensureCommandBufferAllocation(LinearStream &commandStream, size_t minimumRequiredSize, size_t additionalAllocationSize) { if (commandStream.getAvailableSpace() >= minimumRequiredSize) { return; } const auto allocationSize = alignUp(minimumRequiredSize + additionalAllocationSize, MemoryConstants::pageSize64k); constexpr static auto allocationType = GraphicsAllocation::AllocationType::COMMAND_BUFFER; auto allocation = this->getInternalAllocationStorage()->obtainReusableAllocation(allocationSize, allocationType).release(); if (allocation == nullptr) { const AllocationProperties commandStreamAllocationProperties{rootDeviceIndex, true, allocationSize, allocationType, isMultiOsContextCapable(), false, osContext->getDeviceBitfield()}; allocation = this->getMemoryManager()->allocateGraphicsMemoryWithProperties(commandStreamAllocationProperties); } DEBUG_BREAK_IF(allocation == nullptr); if (commandStream.getGraphicsAllocation() != nullptr) { getInternalAllocationStorage()->storeAllocation(std::unique_ptr(commandStream.getGraphicsAllocation()), REUSABLE_ALLOCATION); } commandStream.replaceBuffer(allocation->getUnderlyingBuffer(), allocationSize - additionalAllocationSize); commandStream.replaceGraphicsAllocation(allocation); } MemoryManager *CommandStreamReceiver::getMemoryManager() const { DEBUG_BREAK_IF(!executionEnvironment.memoryManager); return executionEnvironment.memoryManager.get(); } LinearStream &CommandStreamReceiver::getCS(size_t minRequiredSize) { constexpr static auto additionalAllocationSize = MemoryConstants::cacheLineSize + CSRequirements::csOverfetchSize; ensureCommandBufferAllocation(this->commandStream, minRequiredSize, additionalAllocationSize); return commandStream; } OSInterface *CommandStreamReceiver::getOSInterface() const { return executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->osInterface.get(); } bool CommandStreamReceiver::isRcs() const { return this->osContext->getEngineType() == aub_stream::ENGINE_RCS; } void CommandStreamReceiver::cleanupResources() { waitForTaskCountAndCleanAllocationList(this->latestFlushedTaskCount, TEMPORARY_ALLOCATION); waitForTaskCountAndCleanAllocationList(this->latestFlushedTaskCount, REUSABLE_ALLOCATION); if (debugSurface) { getMemoryManager()->freeGraphicsMemory(debugSurface); debugSurface = nullptr; } if (commandStream.getCpuBase()) { getMemoryManager()->freeGraphicsMemory(commandStream.getGraphicsAllocation()); commandStream.replaceGraphicsAllocation(nullptr); commandStream.replaceBuffer(nullptr, 0); } if (tagAllocation) { getMemoryManager()->freeGraphicsMemory(tagAllocation); tagAllocation = nullptr; tagAddress = nullptr; } if (globalFenceAllocation) { getMemoryManager()->freeGraphicsMemory(globalFenceAllocation); globalFenceAllocation = nullptr; } if (preemptionAllocation) { getMemoryManager()->freeGraphicsMemory(preemptionAllocation); preemptionAllocation = nullptr; } if (perDssBackedBuffer) { getMemoryManager()->freeGraphicsMemory(perDssBackedBuffer); perDssBackedBuffer = nullptr; } } bool CommandStreamReceiver::waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) { std::chrono::high_resolution_clock::time_point time1, time2; int64_t timeDiff = 0; uint32_t latestSentTaskCount = this->latestFlushedTaskCount; if (latestSentTaskCount < taskCountToWait) { if (!this->flushBatchedSubmissions()) { return false; } } time1 = std::chrono::high_resolution_clock::now(); while (*getTagAddress() < taskCountToWait && timeDiff <= timeoutMicroseconds) { std::this_thread::yield(); CpuIntrinsics::pause(); if (enableTimeout) { time2 = std::chrono::high_resolution_clock::now(); timeDiff = std::chrono::duration_cast(time2 - time1).count(); } } if (*getTagAddress() >= taskCountToWait) { return true; } return false; } void CommandStreamReceiver::setTagAllocation(GraphicsAllocation *allocation) { this->tagAllocation = allocation; UNRECOVERABLE_IF(allocation == nullptr); this->tagAddress = reinterpret_cast(allocation->getUnderlyingBuffer()); } FlushStamp CommandStreamReceiver::obtainCurrentFlushStamp() const { return flushStamp->peekStamp(); } void CommandStreamReceiver::setRequiredScratchSizes(uint32_t newRequiredScratchSize, uint32_t newRequiredPrivateScratchSize) { if (newRequiredScratchSize > requiredScratchSize) { requiredScratchSize = newRequiredScratchSize; } if (newRequiredPrivateScratchSize > requiredPrivateScratchSize) { requiredPrivateScratchSize = newRequiredPrivateScratchSize; } } GraphicsAllocation *CommandStreamReceiver::getScratchAllocation() { return scratchSpaceController->getScratchSpaceAllocation(); } void CommandStreamReceiver::initProgrammingFlags() { isPreambleSent = false; GSBAFor32BitProgrammed = false; bindingTableBaseAddressRequired = true; mediaVfeStateDirty = true; lastVmeSubslicesConfig = false; lastSentL3Config = 0; lastSentCoherencyRequest = -1; lastMediaSamplerConfig = -1; lastPreemptionMode = PreemptionMode::Initial; latestSentStatelessMocsConfig = 0; } void CommandStreamReceiver::programForAubSubCapture(bool wasActiveInPreviousEnqueue, bool isActive) { if (!wasActiveInPreviousEnqueue && isActive) { // force CSR reprogramming upon subcapture activation this->initProgrammingFlags(); } if (wasActiveInPreviousEnqueue && !isActive) { // flush BB upon subcapture deactivation this->flushBatchedSubmissions(); } } ResidencyContainer &CommandStreamReceiver::getResidencyAllocations() { return this->residencyAllocations; } ResidencyContainer &CommandStreamReceiver::getEvictionAllocations() { return this->evictionAllocations; } AubSubCaptureStatus CommandStreamReceiver::checkAndActivateAubSubCapture(const MultiDispatchInfo &dispatchInfo) { return {false, false}; } void CommandStreamReceiver::addAubComment(const char *comment) {} GraphicsAllocation *CommandStreamReceiver::allocateDebugSurface(size_t size) { UNRECOVERABLE_IF(debugSurface != nullptr); debugSurface = getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, size, GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY}); return debugSurface; } IndirectHeap &CommandStreamReceiver::getIndirectHeap(IndirectHeap::Type heapType, size_t minRequiredSize) { DEBUG_BREAK_IF(static_cast(heapType) >= arrayCount(indirectHeap)); auto &heap = indirectHeap[heapType]; GraphicsAllocation *heapMemory = nullptr; if (heap) heapMemory = heap->getGraphicsAllocation(); if (heap && heap->getAvailableSpace() < minRequiredSize && heapMemory) { internalAllocationStorage->storeAllocation(std::unique_ptr(heapMemory), REUSABLE_ALLOCATION); heapMemory = nullptr; } if (!heapMemory) { allocateHeapMemory(heapType, minRequiredSize, heap); } return *heap; } void CommandStreamReceiver::allocateHeapMemory(IndirectHeap::Type heapType, size_t minRequiredSize, IndirectHeap *&indirectHeap) { size_t reservedSize = 0; auto finalHeapSize = defaultHeapSize; if (IndirectHeap::SURFACE_STATE == heapType) { finalHeapSize = defaultSshSize; } bool requireInternalHeap = IndirectHeap::INDIRECT_OBJECT == heapType ? true : false; if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { requireInternalHeap = false; } minRequiredSize += reservedSize; finalHeapSize = alignUp(std::max(finalHeapSize, minRequiredSize), MemoryConstants::pageSize); auto allocationType = GraphicsAllocation::AllocationType::LINEAR_STREAM; if (requireInternalHeap) { allocationType = GraphicsAllocation::AllocationType::INTERNAL_HEAP; } auto heapMemory = internalAllocationStorage->obtainReusableAllocation(finalHeapSize, allocationType).release(); if (!heapMemory) { heapMemory = getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, true, finalHeapSize, allocationType, isMultiOsContextCapable(), false, osContext->getDeviceBitfield()}); } else { finalHeapSize = std::max(heapMemory->getUnderlyingBufferSize(), finalHeapSize); } if (IndirectHeap::SURFACE_STATE == heapType) { DEBUG_BREAK_IF(minRequiredSize > defaultSshSize - MemoryConstants::pageSize); finalHeapSize = defaultSshSize - MemoryConstants::pageSize; } if (indirectHeap) { indirectHeap->replaceBuffer(heapMemory->getUnderlyingBuffer(), finalHeapSize); indirectHeap->replaceGraphicsAllocation(heapMemory); } else { indirectHeap = new IndirectHeap(heapMemory, requireInternalHeap); indirectHeap->overrideMaxSize(finalHeapSize); } scratchSpaceController->reserveHeap(heapType, indirectHeap); } void CommandStreamReceiver::releaseIndirectHeap(IndirectHeap::Type heapType) { DEBUG_BREAK_IF(static_cast(heapType) >= arrayCount(indirectHeap)); auto &heap = indirectHeap[heapType]; if (heap) { auto heapMemory = heap->getGraphicsAllocation(); if (heapMemory != nullptr) internalAllocationStorage->storeAllocation(std::unique_ptr(heapMemory), REUSABLE_ALLOCATION); heap->replaceBuffer(nullptr, 0); heap->replaceGraphicsAllocation(nullptr); } } void CommandStreamReceiver::setExperimentalCmdBuffer(std::unique_ptr &&cmdBuffer) { experimentalCmdBuffer = std::move(cmdBuffer); } bool CommandStreamReceiver::initializeTagAllocation() { auto tagAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::TAG_BUFFER}); if (!tagAllocation) { return false; } this->setTagAllocation(tagAllocation); *this->tagAddress = DebugManager.flags.EnableNullHardware.get() ? -1 : initialHardwareTag; return true; } bool CommandStreamReceiver::createGlobalFenceAllocation() { auto hwInfo = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); if (!HwHelper::get(hwInfo->platform.eRenderCoreFamily).isFenceAllocationRequired(*hwInfo)) { return true; } DEBUG_BREAK_IF(this->globalFenceAllocation != nullptr); this->globalFenceAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::GLOBAL_FENCE}); return this->globalFenceAllocation != nullptr; } bool CommandStreamReceiver::createPreemptionAllocation() { auto hwInfo = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); AllocationProperties properties{rootDeviceIndex, true, hwInfo->capabilityTable.requiredPreemptionSurfaceSize, GraphicsAllocation::AllocationType::PREEMPTION, false}; properties.flags.uncacheable = hwInfo->workaroundTable.waCSRUncachable; properties.alignment = 256 * MemoryConstants::kiloByte; this->preemptionAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); return this->preemptionAllocation != nullptr; } std::unique_lock CommandStreamReceiver::obtainUniqueOwnership() { return std::unique_lock(this->ownershipMutex); } AllocationsList &CommandStreamReceiver::getTemporaryAllocations() { return internalAllocationStorage->getTemporaryAllocations(); } AllocationsList &CommandStreamReceiver::getAllocationsForReuse() { return internalAllocationStorage->getAllocationsForReuse(); } bool CommandStreamReceiver::createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush) { auto memoryManager = getMemoryManager(); AllocationProperties properties{rootDeviceIndex, false, surface.getSurfaceSize(), GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR, false}; properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = requiresL3Flush; auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, surface.getMemoryPointer()); if (allocation == nullptr && surface.peekIsPtrCopyAllowed()) { // Try with no host pointer allocation and copy AllocationProperties copyProperties{rootDeviceIndex, surface.getSurfaceSize(), GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY}; copyProperties.alignment = MemoryConstants::pageSize; allocation = memoryManager->allocateGraphicsMemoryWithProperties(copyProperties); if (allocation) { memcpy_s(allocation->getUnderlyingBuffer(), allocation->getUnderlyingBufferSize(), surface.getMemoryPointer(), surface.getSurfaceSize()); } } if (allocation == nullptr) { return false; } allocation->updateTaskCount(CompletionStamp::levelNotReady, osContext->getContextId()); surface.setAllocation(allocation); internalAllocationStorage->storeAllocation(std::unique_ptr(allocation), TEMPORARY_ALLOCATION); return true; } TagAllocator *CommandStreamReceiver::getEventTsAllocator() { if (profilingTimeStampAllocator.get() == nullptr) { profilingTimeStampAllocator = std::make_unique>( rootDeviceIndex, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize, sizeof(HwTimeStamps), false); } return profilingTimeStampAllocator.get(); } TagAllocator *CommandStreamReceiver::getEventPerfCountAllocator(const uint32_t tagSize) { if (perfCounterAllocator.get() == nullptr) { perfCounterAllocator = std::make_unique>( rootDeviceIndex, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize, tagSize, false); } return perfCounterAllocator.get(); } TagAllocator *CommandStreamReceiver::getTimestampPacketAllocator() { if (timestampPacketAllocator.get() == nullptr) { // dont release nodes in aub/tbx mode, to avoid removing semaphores optimization or reusing returned tags bool doNotReleaseNodes = (getType() > CommandStreamReceiverType::CSR_HW) || DebugManager.flags.DisableTimestampPacketOptimizations.get(); timestampPacketAllocator = std::make_unique>( rootDeviceIndex, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize, sizeof(TimestampPacketStorage), doNotReleaseNodes); } return timestampPacketAllocator.get(); } size_t CommandStreamReceiver::getPreferredTagPoolSize() const { if (DebugManager.flags.DisableTimestampPacketOptimizations.get()) { return 1; } return 512; } bool CommandStreamReceiver::expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation) { auto isMemoryEqual = (memcmp(gfxAddress, srcAddress, length) == 0); auto isEqualMemoryExpected = (compareOperation == AubMemDump::CmdServicesMemTraceMemoryCompare::CompareOperationValues::CompareEqual); return (isMemoryEqual == isEqualMemoryExpected); } bool CommandStreamReceiver::needsPageTableManager(aub_stream::EngineType engineType) const { auto hwInfo = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); auto defaultEngineType = getChosenEngineType(*hwInfo); if (engineType != defaultEngineType) { return false; } auto rootDeviceEnvironment = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex].get(); if (rootDeviceEnvironment->pageTableManager.get() != nullptr) { return false; } return HwHelper::get(hwInfo->platform.eRenderCoreFamily).isPageTableManagerSupported(*hwInfo); } } // namespace NEO compute-runtime-20.13.16352/shared/source/command_stream/command_stream_receiver.h000066400000000000000000000303771363734646600301540ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/aub_subcapture_status.h" #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/submissions_aggregator.h" #include "shared/source/command_stream/thread_arbitration_policy.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/blit_commands_helper.h" #include "shared/source/helpers/completion_stamp.h" #include "shared/source/helpers/flat_batch_buffer_helper.h" #include "shared/source/helpers/options.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/kernel/grf_config.h" #include #include namespace NEO { class AllocationsList; class Device; class ExecutionEnvironment; class ExperimentalCommandBuffer; class GmmPageTableMngr; class GraphicsAllocation; class HostPtrSurface; class IndirectHeap; class InternalAllocationStorage; class LinearStream; class MemoryManager; class OsContext; class OSInterface; class ScratchSpaceController; struct HwPerfCounter; struct HwTimeStamps; struct TimestampPacketStorage; template class TagAllocator; enum class DispatchMode { DeviceDefault = 0, //default for given device ImmediateDispatch, //everything is submitted to the HW immediately AdaptiveDispatch, //dispatching is handled to async thread, which combines batch buffers basing on load (not implemented) BatchedDispatchWithCounter, //dispatching is batched, after n commands there is implicit flush (not implemented) BatchedDispatch // dispatching is batched, explicit clFlush is required }; class CommandStreamReceiver { public: enum class SamplerCacheFlushState { samplerCacheFlushNotRequired, samplerCacheFlushBefore, //add sampler cache flush before Walker with redescribed image samplerCacheFlushAfter //add sampler cache flush after Walker with redescribed image }; using MutexType = std::recursive_mutex; CommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex); virtual ~CommandStreamReceiver(); virtual bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) = 0; virtual CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap &dsh, const IndirectHeap &ioh, const IndirectHeap &ssh, uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) = 0; virtual bool flushBatchedSubmissions() = 0; bool submitBatchBuffer(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency); MOCKABLE_VIRTUAL void makeResident(GraphicsAllocation &gfxAllocation); virtual void makeNonResident(GraphicsAllocation &gfxAllocation); MOCKABLE_VIRTUAL void makeSurfacePackNonResident(ResidencyContainer &allocationsForResidency); virtual void processResidency(const ResidencyContainer &allocationsForResidency, uint32_t handleId) {} virtual void processEviction(); void makeResidentHostPtrAllocation(GraphicsAllocation *gfxAllocation); void ensureCommandBufferAllocation(LinearStream &commandStream, size_t minimumRequiredSize, size_t additionalAllocationSize); MemoryManager *getMemoryManager() const; ResidencyContainer &getResidencyAllocations(); ResidencyContainer &getEvictionAllocations(); virtual GmmPageTableMngr *createPageTableManager() { return nullptr; } bool needsPageTableManager(aub_stream::EngineType engineType) const; void waitForTaskCountAndCleanAllocationList(uint32_t requiredTaskCount, uint32_t allocationUsage); MOCKABLE_VIRTUAL void waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount); LinearStream &getCS(size_t minRequiredSize = 1024u); OSInterface *getOSInterface() const; ExecutionEnvironment &peekExecutionEnvironment() const { return executionEnvironment; }; MOCKABLE_VIRTUAL void setTagAllocation(GraphicsAllocation *allocation); GraphicsAllocation *getTagAllocation() const { return tagAllocation; } volatile uint32_t *getTagAddress() const { return tagAddress; } virtual bool waitForFlushStamp(FlushStamp &flushStampToWait) { return true; }; uint32_t peekTaskCount() const { return taskCount; } uint32_t peekTaskLevel() const { return taskLevel; } FlushStamp obtainCurrentFlushStamp() const; uint32_t peekLatestSentTaskCount() const { return latestSentTaskCount; } uint32_t peekLatestFlushedTaskCount() const { return latestFlushedTaskCount; } void enableNTo1SubmissionModel() { this->nTo1SubmissionModelEnabled = true; } bool isNTo1SubmissionModelEnabled() const { return this->nTo1SubmissionModelEnabled; } void overrideDispatchPolicy(DispatchMode overrideValue) { this->dispatchMode = overrideValue; } void setMediaVFEStateDirty(bool dirty) { mediaVfeStateDirty = dirty; } void setRequiredScratchSizes(uint32_t newRequiredScratchSize, uint32_t newRequiredPrivateScratchSize); GraphicsAllocation *getScratchAllocation(); GraphicsAllocation *getDebugSurfaceAllocation() const { return debugSurface; } GraphicsAllocation *allocateDebugSurface(size_t size); GraphicsAllocation *getPreemptionAllocation() const { return preemptionAllocation; } void requestStallingPipeControlOnNextFlush() { stallingPipeControlOnNextFlushRequired = true; } bool isStallingPipeControlOnNextFlushRequired() const { return stallingPipeControlOnNextFlushRequired; } virtual void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) = 0; MOCKABLE_VIRTUAL bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait); virtual void downloadAllocation(GraphicsAllocation &gfxAllocation){}; void setSamplerCacheFlushRequired(SamplerCacheFlushState value) { this->samplerCacheFlushRequired = value; } FlatBatchBufferHelper &getFlatBatchBufferHelper() const { return *flatBatchBufferHelper; } void overwriteFlatBatchBufferHelper(FlatBatchBufferHelper *newHelper) { flatBatchBufferHelper.reset(newHelper); } MOCKABLE_VIRTUAL void initProgrammingFlags(); virtual AubSubCaptureStatus checkAndActivateAubSubCapture(const MultiDispatchInfo &dispatchInfo); void programForAubSubCapture(bool wasActiveInPreviousEnqueue, bool isActive); virtual void addAubComment(const char *comment); IndirectHeap &getIndirectHeap(IndirectHeap::Type heapType, size_t minRequiredSize); void allocateHeapMemory(IndirectHeap::Type heapType, size_t minRequiredSize, IndirectHeap *&indirectHeap); void releaseIndirectHeap(IndirectHeap::Type heapType); virtual enum CommandStreamReceiverType getType() = 0; void setExperimentalCmdBuffer(std::unique_ptr &&cmdBuffer); bool initializeTagAllocation(); MOCKABLE_VIRTUAL bool createGlobalFenceAllocation(); MOCKABLE_VIRTUAL bool createPreemptionAllocation(); MOCKABLE_VIRTUAL bool createPerDssBackedBuffer(Device &device); MOCKABLE_VIRTUAL std::unique_lock obtainUniqueOwnership(); bool peekTimestampPacketWriteEnabled() const { return timestampPacketWriteEnabled; } size_t defaultSshSize; AllocationsList &getTemporaryAllocations(); AllocationsList &getAllocationsForReuse(); InternalAllocationStorage *getInternalAllocationStorage() const { return internalAllocationStorage.get(); } MOCKABLE_VIRTUAL bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush); virtual size_t getPreferredTagPoolSize() const; virtual void setupContext(OsContext &osContext) { this->osContext = &osContext; } OsContext &getOsContext() const { return *osContext; } TagAllocator *getEventTsAllocator(); TagAllocator *getEventPerfCountAllocator(const uint32_t tagSize); TagAllocator *getTimestampPacketAllocator(); virtual bool expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation); virtual bool isMultiOsContextCapable() const = 0; void setLatestSentTaskCount(uint32_t latestSentTaskCount) { this->latestSentTaskCount = latestSentTaskCount; } virtual uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking) = 0; ScratchSpaceController *getScratchSpaceController() const { return scratchSpaceController.get(); } void registerInstructionCacheFlush() { auto mutex = obtainUniqueOwnership(); requiresInstructionCacheFlush = true; } bool isLocalMemoryEnabled() const { return localMemoryEnabled; } uint32_t getRootDeviceIndex() { return rootDeviceIndex; } virtual bool initDirectSubmission(Device &device, OsContext &osContext) { return true; } virtual bool isDirectSubmissionEnabled() const { return false; } bool isRcs() const; protected: void cleanupResources(); std::unique_ptr flushStamp; std::unique_ptr submissionAggregator; std::unique_ptr flatBatchBufferHelper; std::unique_ptr experimentalCmdBuffer; std::unique_ptr internalAllocationStorage; std::unique_ptr kmdNotifyHelper; std::unique_ptr scratchSpaceController; std::unique_ptr> profilingTimeStampAllocator; std::unique_ptr> perfCounterAllocator; std::unique_ptr> timestampPacketAllocator; ResidencyContainer residencyAllocations; ResidencyContainer evictionAllocations; MutexType ownershipMutex; ExecutionEnvironment &executionEnvironment; LinearStream commandStream; volatile uint32_t *tagAddress = nullptr; GraphicsAllocation *tagAllocation = nullptr; GraphicsAllocation *globalFenceAllocation = nullptr; GraphicsAllocation *preemptionAllocation = nullptr; GraphicsAllocation *debugSurface = nullptr; GraphicsAllocation *perDssBackedBuffer = nullptr; IndirectHeap *indirectHeap[IndirectHeap::NUM_TYPES]; // current taskLevel. Used for determining if a PIPE_CONTROL is needed. std::atomic taskLevel{0}; std::atomic latestSentTaskCount{0}; std::atomic latestFlushedTaskCount{0}; OsContext *osContext = nullptr; DispatchMode dispatchMode = DispatchMode::ImmediateDispatch; SamplerCacheFlushState samplerCacheFlushRequired = SamplerCacheFlushState::samplerCacheFlushNotRequired; PreemptionMode lastPreemptionMode = PreemptionMode::Initial; uint64_t totalMemoryUsed = 0u; // taskCount - # of tasks submitted uint32_t taskCount = 0; uint32_t lastSentL3Config = 0; uint32_t latestSentStatelessMocsConfig = 0; uint32_t lastSentNumGrfRequired = GrfConfig::DefaultGrfNumber; uint32_t requiredThreadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin; uint32_t lastSentThreadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; uint64_t lastSentSliceCount = QueueSliceCount::defaultSliceCount; uint32_t requiredScratchSize = 0; uint32_t requiredPrivateScratchSize = 0; const uint32_t rootDeviceIndex; int8_t lastSentCoherencyRequest = -1; int8_t lastMediaSamplerConfig = -1; bool isPreambleSent = false; bool isStateSipSent = false; bool isEnginePrologueSent = false; bool GSBAFor32BitProgrammed = false; bool bindingTableBaseAddressRequired = false; bool mediaVfeStateDirty = true; bool lastVmeSubslicesConfig = false; bool stallingPipeControlOnNextFlushRequired = false; bool timestampPacketWriteEnabled = false; bool nTo1SubmissionModelEnabled = false; bool lastSpecialPipelineSelectMode = false; bool requiresInstructionCacheFlush = false; bool localMemoryEnabled = false; bool pageTableManagerInitialized = false; }; typedef CommandStreamReceiver *(*CommandStreamReceiverCreateFunc)(bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex); } // namespace NEO compute-runtime-20.13.16352/shared/source/command_stream/command_stream_receiver_hw.h000066400000000000000000000135231363734646600306440ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/dirty_state_helpers.h" #include "shared/source/helpers/hw_cmds.h" #include "shared/source/helpers/hw_info.h" namespace NEO { template class DeviceCommandStreamReceiver; template class DirectSubmissionHw; template class CommandStreamReceiverHw : public CommandStreamReceiver { typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL; public: static CommandStreamReceiver *create(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) { return new CommandStreamReceiverHw(executionEnvironment, rootDeviceIndex); } CommandStreamReceiverHw(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex); ~CommandStreamReceiverHw() override; bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override; CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap &dsh, const IndirectHeap &ioh, const IndirectHeap &ssh, uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override; bool flushBatchedSubmissions() override; static void addBatchBufferEnd(LinearStream &commandStream, void **patchLocation); void programEndingCmd(LinearStream &commandStream, void **patchLocation, bool directSubmissionEnabled); void addBatchBufferStart(MI_BATCH_BUFFER_START *commandBufferMemory, uint64_t startAddress, bool secondary); static void alignToCacheLine(LinearStream &commandStream); size_t getRequiredStateBaseAddressSize() const; size_t getRequiredCmdStreamSize(const DispatchFlags &dispatchFlags, Device &device); size_t getRequiredCmdStreamSizeAligned(const DispatchFlags &dispatchFlags, Device &device); size_t getRequiredCmdSizeForPreamble(Device &device) const; size_t getCmdSizeForPreemption(const DispatchFlags &dispatchFlags) const; size_t getCmdSizeForEpilogue(const DispatchFlags &dispatchFlags) const; size_t getCmdSizeForEpilogueCommands(const DispatchFlags &dispatchFlags) const; size_t getCmdSizeForL3Config() const; size_t getCmdSizeForPipelineSelect() const; size_t getCmdSizeForComputeMode(); size_t getCmdSizeForMediaSampler(bool mediaSamplerRequired) const; size_t getCmdSizeForEngineMode(const DispatchFlags &dispatchFlags) const; bool isComputeModeNeeded() const; bool isPipelineSelectAlreadyProgrammed() const; void programComputeMode(LinearStream &csr, DispatchFlags &dispatchFlags); void adjustThreadArbitionPolicy(void *const stateComputeMode); void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override; const HardwareInfo &peekHwInfo() const; void collectStateBaseAddresPatchInfo( uint64_t commandBufferAddress, uint64_t commandOffset, const LinearStream &dsh, const LinearStream &ioh, const LinearStream &ssh, uint64_t generalStateBase); void resetKmdNotifyHelper(KmdNotifyHelper *newHelper); CommandStreamReceiverType getType() override { return CommandStreamReceiverType::CSR_HW; } uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking) override; bool isMultiOsContextCapable() const override; bool isDirectSubmissionEnabled() const override { return directSubmission.get() != nullptr; } protected: void programPreemption(LinearStream &csr, DispatchFlags &dispatchFlags); void programL3(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t &newL3Config); void programPreamble(LinearStream &csr, Device &device, DispatchFlags &dispatchFlags, uint32_t &newL3Config); void programPipelineSelect(LinearStream &csr, PipelineSelectArgs &pipelineSelectArgs); void programAdditionalPipelineSelect(LinearStream &csr, PipelineSelectArgs &pipelineSelectArgs, bool is3DPipeline); void programEpilogue(LinearStream &csr, void **batchBufferEndLocation, DispatchFlags &dispatchFlags); void programEpliogueCommands(LinearStream &csr, const DispatchFlags &dispatchFlags); void programMediaSampler(LinearStream &csr, DispatchFlags &dispatchFlags); void programStateSip(LinearStream &cmdStream, Device &device); void programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t maxFrontEndThreads); void programStallingPipeControlForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags); void programEngineModeCommands(LinearStream &csr, const DispatchFlags &dispatchFlags); void programEngineModeEpliogue(LinearStream &csr, const DispatchFlags &dispatchFlags); void programEnginePrologue(LinearStream &csr); size_t getCmdSizeForPrologue(const DispatchFlags &dispatchFlags) const; void addClearSLMWorkAround(typename GfxFamily::PIPE_CONTROL *pCmd); PIPE_CONTROL *addPipeControlCmd(LinearStream &commandStream); PIPE_CONTROL *addPipeControlBeforeStateBaseAddress(LinearStream &commandStream); size_t getSshHeapSize(); uint64_t getScratchPatchAddress(); void createScratchSpaceController(); static void emitNoop(LinearStream &commandStream, size_t bytesToUpdate); bool detectInitProgrammingFlagsRequired(const DispatchFlags &dispatchFlags) const; HeapDirtyState dshState; HeapDirtyState iohState; HeapDirtyState sshState; CsrSizeRequestFlags csrSizeRequestFlags = {}; std::unique_ptr> directSubmission; }; } // namespace NEO compute-runtime-20.13.16352/shared/source/command_stream/command_stream_receiver_hw_base.inl000066400000000000000000001332621363734646600321740ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/command_stream/experimental_command_buffer.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/command_stream/scratch_space_controller_base.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" #include "shared/source/direct_submission/direct_submission_hw.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/page_table_mngr.h" #include "shared/source/helpers/blit_commands_helper.h" #include "shared/source/helpers/cache_policy.h" #include "shared/source/helpers/flat_batch_buffer_helper_hw.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/preamble.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/state_base_address.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/utilities/tag_allocator.h" #include "command_stream_receiver_hw_ext.inl" namespace NEO { template CommandStreamReceiverHw::~CommandStreamReceiverHw() = default; template CommandStreamReceiverHw::CommandStreamReceiverHw(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) : CommandStreamReceiver(executionEnvironment, rootDeviceIndex) { auto &hwHelper = HwHelper::get(peekHwInfo().platform.eRenderCoreFamily); localMemoryEnabled = hwHelper.getEnableLocalMemory(peekHwInfo()); requiredThreadArbitrationPolicy = PreambleHelper::getDefaultThreadArbitrationPolicy(); resetKmdNotifyHelper(new KmdNotifyHelper(&peekHwInfo().capabilityTable.kmdNotifyProperties)); flatBatchBufferHelper.reset(new FlatBatchBufferHelperHw(executionEnvironment)); defaultSshSize = getSshHeapSize(); timestampPacketWriteEnabled = hwHelper.timestampPacketWriteSupported(); if (DebugManager.flags.EnableTimestampPacket.get() != -1) { timestampPacketWriteEnabled = !!DebugManager.flags.EnableTimestampPacket.get(); } createScratchSpaceController(); } template bool CommandStreamReceiverHw::flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) { return true; } template inline void CommandStreamReceiverHw::addBatchBufferEnd(LinearStream &commandStream, void **patchLocation) { typedef typename GfxFamily::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; auto pCmd = (MI_BATCH_BUFFER_END *)commandStream.getSpace(sizeof(MI_BATCH_BUFFER_END)); *pCmd = GfxFamily::cmdInitBatchBufferEnd; if (patchLocation) { *patchLocation = pCmd; } } template inline void CommandStreamReceiverHw::programEndingCmd(LinearStream &commandStream, void **patchLocation, bool directSubmissionEnabled) { if (directSubmissionEnabled) { *patchLocation = commandStream.getSpace(sizeof(MI_BATCH_BUFFER_START)); auto bbStart = reinterpret_cast(*patchLocation); addBatchBufferStart(bbStart, 0ull, false); } else { this->addBatchBufferEnd(commandStream, patchLocation); } } template inline void CommandStreamReceiverHw::addBatchBufferStart(MI_BATCH_BUFFER_START *commandBufferMemory, uint64_t startAddress, bool secondary) { *commandBufferMemory = GfxFamily::cmdInitBatchBufferStart; commandBufferMemory->setBatchBufferStartAddressGraphicsaddress472(startAddress); commandBufferMemory->setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT); if (secondary) { commandBufferMemory->setSecondLevelBatchBuffer(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH); } if (DebugManager.flags.FlattenBatchBufferForAUBDump.get()) { flatBatchBufferHelper->registerBatchBufferStartAddress(reinterpret_cast(commandBufferMemory), startAddress); } } template inline void CommandStreamReceiverHw::alignToCacheLine(LinearStream &commandStream) { auto used = commandStream.getUsed(); auto alignment = MemoryConstants::cacheLineSize; auto partialCacheline = used & (alignment - 1); if (partialCacheline) { auto amountToPad = alignment - partialCacheline; auto pCmd = commandStream.getSpace(amountToPad); memset(pCmd, 0, amountToPad); } } template inline size_t CommandStreamReceiverHw::getRequiredCmdSizeForPreamble(Device &device) const { size_t size = 0; if (mediaVfeStateDirty) { size += PreambleHelper::getVFECommandsSize(); } if (!this->isPreambleSent) { size += PreambleHelper::getAdditionalCommandsSize(device); } if (!this->isPreambleSent || this->lastSentThreadArbitrationPolicy != this->requiredThreadArbitrationPolicy) { size += PreambleHelper::getThreadArbitrationCommandsSize(); } if (DebugManager.flags.ForcePerDssBackedBufferProgramming.get()) { if (!this->isPreambleSent) { size += PreambleHelper::getPerDssBackedBufferCommandsSize(device.getHardwareInfo()); } } return size; } template inline typename GfxFamily::PIPE_CONTROL *CommandStreamReceiverHw::addPipeControlCmd(LinearStream &commandStream) { typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL; auto pCmd = reinterpret_cast(commandStream.getSpace(sizeof(PIPE_CONTROL))); *pCmd = GfxFamily::cmdInitPipeControl; pCmd->setCommandStreamerStallEnable(true); return pCmd; } template CompletionStamp CommandStreamReceiverHw::flushTask( LinearStream &commandStreamTask, size_t commandStreamStartTask, const IndirectHeap &dsh, const IndirectHeap &ioh, const IndirectHeap &ssh, uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) { typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; typedef typename GfxFamily::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL; typedef typename GfxFamily::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; DEBUG_BREAK_IF(&commandStreamTask == &commandStream); DEBUG_BREAK_IF(!(dispatchFlags.preemptionMode == PreemptionMode::Disabled ? device.getPreemptionMode() == PreemptionMode::Disabled : true)); DEBUG_BREAK_IF(taskLevel >= CompletionStamp::levelNotReady); DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "taskLevel", taskLevel); auto levelClosed = false; void *currentPipeControlForNooping = nullptr; void *epiloguePipeControlLocation = nullptr; if (DebugManager.flags.ForceCsrFlushing.get()) { flushBatchedSubmissions(); } if (detectInitProgrammingFlagsRequired(dispatchFlags)) { initProgrammingFlags(); } if (dispatchFlags.blocking || dispatchFlags.dcFlush || dispatchFlags.guardCommandBufferWithPipeControl) { if (this->dispatchMode == DispatchMode::ImmediateDispatch) { //for ImmediateDispatch we will send this right away, therefore this pipe control will close the level //for BatchedSubmissions it will be nooped and only last ppc in batch will be emitted. levelClosed = true; //if we guard with ppc, flush dc as well to speed up completion latency if (dispatchFlags.guardCommandBufferWithPipeControl) { dispatchFlags.dcFlush = true; } } epiloguePipeControlLocation = ptrOffset(commandStreamTask.getCpuBase(), commandStreamTask.getUsed()); if ((dispatchFlags.outOfOrderExecutionAllowed || timestampPacketWriteEnabled) && !dispatchFlags.dcFlush) { currentPipeControlForNooping = epiloguePipeControlLocation; } auto address = getTagAllocation()->getGpuAddress(); MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( commandStreamTask, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, address, taskCount + 1, dispatchFlags.dcFlush, peekHwInfo()); this->latestSentTaskCount = taskCount + 1; DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "taskCount", taskCount); if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { flatBatchBufferHelper->setPatchInfoData(PatchInfoData(address, 0u, PatchInfoAllocationType::TagAddress, commandStreamTask.getGraphicsAllocation()->getGpuAddress(), commandStreamTask.getUsed() - 2 * sizeof(uint64_t), PatchInfoAllocationType::Default)); flatBatchBufferHelper->setPatchInfoData(PatchInfoData(address, 0u, PatchInfoAllocationType::TagValue, commandStreamTask.getGraphicsAllocation()->getGpuAddress(), commandStreamTask.getUsed() - sizeof(uint64_t), PatchInfoAllocationType::Default)); } } if (DebugManager.flags.ForceSLML3Config.get()) { dispatchFlags.useSLM = true; } if (DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) { dispatchFlags.threadArbitrationPolicy = static_cast(DebugManager.flags.OverrideThreadArbitrationPolicy.get()); } auto newL3Config = PreambleHelper::getL3Config(peekHwInfo(), dispatchFlags.useSLM); csrSizeRequestFlags.l3ConfigChanged = this->lastSentL3Config != newL3Config; csrSizeRequestFlags.coherencyRequestChanged = this->lastSentCoherencyRequest != static_cast(dispatchFlags.requiresCoherency); csrSizeRequestFlags.preemptionRequestChanged = this->lastPreemptionMode != dispatchFlags.preemptionMode; csrSizeRequestFlags.mediaSamplerConfigChanged = this->lastMediaSamplerConfig != static_cast(dispatchFlags.pipelineSelectArgs.mediaSamplerRequired); csrSizeRequestFlags.specialPipelineSelectModeChanged = this->lastSpecialPipelineSelectMode != dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode; csrSizeRequestFlags.numGrfRequiredChanged = this->lastSentNumGrfRequired != dispatchFlags.numGrfRequired; lastSentNumGrfRequired = dispatchFlags.numGrfRequired; if (dispatchFlags.threadArbitrationPolicy != ThreadArbitrationPolicy::NotPresent) { this->requiredThreadArbitrationPolicy = dispatchFlags.threadArbitrationPolicy; } auto force32BitAllocations = getMemoryManager()->peekForce32BitAllocations(); bool stateBaseAddressDirty = false; bool checkVfeStateDirty = false; if (requiredScratchSize || requiredPrivateScratchSize) { scratchSpaceController->setRequiredScratchSpace(ssh.getCpuBase(), requiredScratchSize, requiredPrivateScratchSize, this->taskCount, *this->osContext, stateBaseAddressDirty, checkVfeStateDirty); if (checkVfeStateDirty) { setMediaVFEStateDirty(true); } if (scratchSpaceController->getScratchSpaceAllocation()) { makeResident(*scratchSpaceController->getScratchSpaceAllocation()); } if (scratchSpaceController->getPrivateScratchSpaceAllocation()) { makeResident(*scratchSpaceController->getPrivateScratchSpaceAllocation()); } } if (dispatchFlags.usePerDssBackedBuffer) { if (!perDssBackedBuffer) { createPerDssBackedBuffer(device); } makeResident(*perDssBackedBuffer); } auto &commandStreamCSR = this->getCS(getRequiredCmdStreamSizeAligned(dispatchFlags, device)); auto commandStreamStartCSR = commandStreamCSR.getUsed(); TimestampPacketHelper::programCsrDependencies(commandStreamCSR, dispatchFlags.csrDependencies); if (stallingPipeControlOnNextFlushRequired) { programStallingPipeControlForBarrier(commandStreamCSR, dispatchFlags); } programEngineModeCommands(commandStreamCSR, dispatchFlags); if (executionEnvironment.rootDeviceEnvironments[device.getRootDeviceIndex()]->pageTableManager.get() && !pageTableManagerInitialized) { pageTableManagerInitialized = executionEnvironment.rootDeviceEnvironments[device.getRootDeviceIndex()]->pageTableManager->initPageTableManagerRegisters(this); } programEnginePrologue(commandStreamCSR); programComputeMode(commandStreamCSR, dispatchFlags); programPipelineSelect(commandStreamCSR, dispatchFlags.pipelineSelectArgs); programL3(commandStreamCSR, dispatchFlags, newL3Config); programPreamble(commandStreamCSR, device, dispatchFlags, newL3Config); programMediaSampler(commandStreamCSR, dispatchFlags); if (this->lastSentThreadArbitrationPolicy != this->requiredThreadArbitrationPolicy) { PreambleHelper::programThreadArbitration(&commandStreamCSR, this->requiredThreadArbitrationPolicy); this->lastSentThreadArbitrationPolicy = this->requiredThreadArbitrationPolicy; } stateBaseAddressDirty |= ((GSBAFor32BitProgrammed ^ dispatchFlags.gsba32BitRequired) && force32BitAllocations); programVFEState(commandStreamCSR, dispatchFlags, device.getDeviceInfo().maxFrontEndThreads); programPreemption(commandStreamCSR, dispatchFlags); bool dshDirty = dshState.updateAndCheck(&dsh); bool iohDirty = iohState.updateAndCheck(&ioh); bool sshDirty = sshState.updateAndCheck(&ssh); auto isStateBaseAddressDirty = dshDirty || iohDirty || sshDirty || stateBaseAddressDirty; auto &hwHelper = HwHelper::get(peekHwInfo().platform.eRenderCoreFamily); auto l3On = dispatchFlags.l3CacheSettings != L3CachingSettings::l3CacheOff; auto l1On = dispatchFlags.l3CacheSettings == L3CachingSettings::l3AndL1On; auto mocsIndex = hwHelper.getMocsIndex(*device.getGmmHelper(), l3On, l1On); if (mocsIndex != latestSentStatelessMocsConfig) { isStateBaseAddressDirty = true; latestSentStatelessMocsConfig = mocsIndex; } //Reprogram state base address if required if (isStateBaseAddressDirty || device.isDebuggerActive()) { addPipeControlBeforeStateBaseAddress(commandStreamCSR); programAdditionalPipelineSelect(commandStreamCSR, dispatchFlags.pipelineSelectArgs, true); uint64_t newGSHbase = 0; GSBAFor32BitProgrammed = false; if (is64bit && scratchSpaceController->getScratchSpaceAllocation() && !force32BitAllocations) { newGSHbase = scratchSpaceController->calculateNewGSH(); } else if (is64bit && force32BitAllocations && dispatchFlags.gsba32BitRequired) { newGSHbase = getMemoryManager()->getExternalHeapBaseAddress(rootDeviceIndex); GSBAFor32BitProgrammed = true; } auto stateBaseAddressCmdOffset = commandStreamCSR.getUsed(); StateBaseAddressHelper::programStateBaseAddress( commandStreamCSR, &dsh, &ioh, &ssh, newGSHbase, true, mocsIndex, getMemoryManager()->getInternalHeapBaseAddress(rootDeviceIndex), true, device.getGmmHelper(), isMultiOsContextCapable()); if (sshDirty) { bindingTableBaseAddressRequired = true; } if (bindingTableBaseAddressRequired) { StateBaseAddressHelper::programBindingTableBaseAddress(commandStreamCSR, ssh, device.getGmmHelper()); bindingTableBaseAddressRequired = false; } programAdditionalPipelineSelect(commandStreamCSR, dispatchFlags.pipelineSelectArgs, false); programStateSip(commandStreamCSR, device); if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { collectStateBaseAddresPatchInfo(commandStream.getGraphicsAllocation()->getGpuAddress(), stateBaseAddressCmdOffset, dsh, ioh, ssh, newGSHbase); } } DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "this->taskLevel", (uint32_t)this->taskLevel); if (executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->workaroundTable.waSamplerCacheFlushBetweenRedescribedSurfaceReads) { if (this->samplerCacheFlushRequired != SamplerCacheFlushState::samplerCacheFlushNotRequired) { auto pCmd = addPipeControlCmd(commandStreamCSR); pCmd->setTextureCacheInvalidationEnable(true); if (this->samplerCacheFlushRequired == SamplerCacheFlushState::samplerCacheFlushBefore) { this->samplerCacheFlushRequired = SamplerCacheFlushState::samplerCacheFlushAfter; } else { this->samplerCacheFlushRequired = SamplerCacheFlushState::samplerCacheFlushNotRequired; } } } if (experimentalCmdBuffer.get() != nullptr) { size_t startingOffset = experimentalCmdBuffer->programExperimentalCommandBuffer(); experimentalCmdBuffer->injectBufferStart(commandStreamCSR, startingOffset); } if (requiresInstructionCacheFlush) { auto pipeControl = MemorySynchronizationCommands::addPipeControl(commandStreamCSR, false); pipeControl->setInstructionCacheInvalidateEnable(true); requiresInstructionCacheFlush = false; } // Add a PC if we have a dependency on a previous walker to avoid concurrency issues. if (taskLevel > this->taskLevel) { if (!timestampPacketWriteEnabled) { MemorySynchronizationCommands::addPipeControl(commandStreamCSR, false); } this->taskLevel = taskLevel; DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "this->taskCount", this->taskCount); } auto dshAllocation = dsh.getGraphicsAllocation(); auto iohAllocation = ioh.getGraphicsAllocation(); auto sshAllocation = ssh.getGraphicsAllocation(); this->makeResident(*dshAllocation); dshAllocation->setEvictable(false); this->makeResident(*iohAllocation); this->makeResident(*sshAllocation); iohAllocation->setEvictable(false); this->makeResident(*tagAllocation); if (globalFenceAllocation) { makeResident(*globalFenceAllocation); } if (preemptionAllocation) { makeResident(*preemptionAllocation); } if (dispatchFlags.preemptionMode == PreemptionMode::MidThread || device.isDebuggerActive()) { makeResident(*SipKernel::getSipKernelAllocation(device)); if (debugSurface) { makeResident(*debugSurface); } } if (experimentalCmdBuffer.get() != nullptr) { experimentalCmdBuffer->makeResidentAllocations(); } // If the CSR has work in its CS, flush it before the task bool submitTask = commandStreamStartTask != commandStreamTask.getUsed(); bool submitCSR = (commandStreamStartCSR != commandStreamCSR.getUsed()) || this->isMultiOsContextCapable(); bool submitCommandStreamFromCsr = false; void *bbEndLocation = nullptr; auto bbEndPaddingSize = this->dispatchMode == DispatchMode::ImmediateDispatch ? 0 : sizeof(MI_BATCH_BUFFER_START) - sizeof(MI_BATCH_BUFFER_END); size_t chainedBatchBufferStartOffset = 0; GraphicsAllocation *chainedBatchBuffer = nullptr; bool directSubmissionEnabled = isDirectSubmissionEnabled(); if (submitTask) { programEndingCmd(commandStreamTask, &bbEndLocation, directSubmissionEnabled); this->emitNoop(commandStreamTask, bbEndPaddingSize); this->alignToCacheLine(commandStreamTask); if (submitCSR) { chainedBatchBufferStartOffset = commandStreamCSR.getUsed(); chainedBatchBuffer = commandStreamTask.getGraphicsAllocation(); // Add MI_BATCH_BUFFER_START to chain from CSR -> Task auto pBBS = reinterpret_cast(commandStreamCSR.getSpace(sizeof(MI_BATCH_BUFFER_START))); addBatchBufferStart(pBBS, ptrOffset(commandStreamTask.getGraphicsAllocation()->getGpuAddress(), commandStreamStartTask), false); if (DebugManager.flags.FlattenBatchBufferForAUBDump.get()) { flatBatchBufferHelper->registerCommandChunk(commandStreamTask.getGraphicsAllocation()->getGpuAddress(), reinterpret_cast(commandStreamTask.getCpuBase()), commandStreamStartTask, static_cast(ptrDiff(bbEndLocation, commandStreamTask.getGraphicsAllocation()->getGpuAddress())) + sizeof(MI_BATCH_BUFFER_START)); } auto commandStreamAllocation = commandStreamTask.getGraphicsAllocation(); DEBUG_BREAK_IF(commandStreamAllocation == nullptr); this->makeResident(*commandStreamAllocation); this->alignToCacheLine(commandStreamCSR); submitCommandStreamFromCsr = true; } else if (dispatchFlags.epilogueRequired) { this->makeResident(*commandStreamCSR.getGraphicsAllocation()); } this->programEpilogue(commandStreamCSR, &bbEndLocation, dispatchFlags); } else if (submitCSR) { programEndingCmd(commandStreamCSR, &bbEndLocation, directSubmissionEnabled); this->emitNoop(commandStreamCSR, bbEndPaddingSize); this->alignToCacheLine(commandStreamCSR); DEBUG_BREAK_IF(commandStreamCSR.getUsed() > commandStreamCSR.getMaxAvailableSpace()); submitCommandStreamFromCsr = true; } size_t startOffset = submitCommandStreamFromCsr ? commandStreamStartCSR : commandStreamStartTask; auto &streamToSubmit = submitCommandStreamFromCsr ? commandStreamCSR : commandStreamTask; BatchBuffer batchBuffer{streamToSubmit.getGraphicsAllocation(), startOffset, chainedBatchBufferStartOffset, chainedBatchBuffer, dispatchFlags.requiresCoherency, dispatchFlags.lowPriority, dispatchFlags.throttle, dispatchFlags.sliceCount, streamToSubmit.getUsed(), &streamToSubmit, bbEndLocation}; if (submitCSR | submitTask) { if (this->dispatchMode == DispatchMode::ImmediateDispatch) { this->flush(batchBuffer, this->getResidencyAllocations()); this->latestFlushedTaskCount = this->taskCount + 1; this->makeSurfacePackNonResident(this->getResidencyAllocations()); } else { auto commandBuffer = new CommandBuffer(device); commandBuffer->batchBuffer = batchBuffer; commandBuffer->surfaces.swap(this->getResidencyAllocations()); commandBuffer->batchBufferEndLocation = bbEndLocation; commandBuffer->taskCount = this->taskCount + 1; commandBuffer->flushStamp->replaceStampObject(dispatchFlags.flushStampReference); commandBuffer->pipeControlThatMayBeErasedLocation = currentPipeControlForNooping; commandBuffer->epiloguePipeControlLocation = epiloguePipeControlLocation; this->submissionAggregator->recordCommandBuffer(commandBuffer); } } else { this->makeSurfacePackNonResident(this->getResidencyAllocations()); } //check if we are not over the budget, if we are do implicit flush if (getMemoryManager()->isMemoryBudgetExhausted()) { if (this->totalMemoryUsed >= device.getDeviceInfo().globalMemSize / 4) { dispatchFlags.implicitFlush = true; } } if (this->dispatchMode == DispatchMode::BatchedDispatch && (dispatchFlags.blocking || dispatchFlags.implicitFlush)) { this->flushBatchedSubmissions(); } ++taskCount; DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "taskCount", taskCount); DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "Current taskCount:", tagAddress ? *tagAddress : 0); CompletionStamp completionStamp = { taskCount, this->taskLevel, flushStamp->peekStamp()}; this->taskLevel += levelClosed ? 1 : 0; return completionStamp; } template inline void CommandStreamReceiverHw::programStallingPipeControlForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags) { stallingPipeControlOnNextFlushRequired = false; PIPE_CONTROL *stallingPipeControlCmd; auto barrierTimestampPacketNodes = dispatchFlags.barrierTimestampPacketNodes; if (barrierTimestampPacketNodes && barrierTimestampPacketNodes->peekNodes().size() != 0) { auto barrierTimestampPacketGpuAddress = dispatchFlags.barrierTimestampPacketNodes->peekNodes()[0]->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); stallingPipeControlCmd = MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( cmdStream, PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, barrierTimestampPacketGpuAddress, 0, true, peekHwInfo()); dispatchFlags.barrierTimestampPacketNodes->makeResident(*this); } else { stallingPipeControlCmd = MemorySynchronizationCommands::addPipeControl(cmdStream, false); } stallingPipeControlCmd->setCommandStreamerStallEnable(true); } template inline bool CommandStreamReceiverHw::flushBatchedSubmissions() { if (this->dispatchMode == DispatchMode::ImmediateDispatch) { return true; } typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL; std::unique_lock lockGuard(ownershipMutex); bool submitResult = true; auto &commandBufferList = this->submissionAggregator->peekCmdBufferList(); if (!commandBufferList.peekIsEmpty()) { const auto totalMemoryBudget = static_cast(commandBufferList.peekHead()->device.getDeviceInfo().globalMemSize / 2); ResidencyContainer surfacesForSubmit; ResourcePackage resourcePackage; auto pipeControlLocationSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(peekHwInfo()); void *currentPipeControlForNooping = nullptr; void *epiloguePipeControlLocation = nullptr; while (!commandBufferList.peekIsEmpty()) { size_t totalUsedSize = 0u; this->submissionAggregator->aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, osContext->getContextId()); auto primaryCmdBuffer = commandBufferList.removeFrontOne(); auto nextCommandBuffer = commandBufferList.peekHead(); auto currentBBendLocation = primaryCmdBuffer->batchBufferEndLocation; auto lastTaskCount = primaryCmdBuffer->taskCount; FlushStampUpdateHelper flushStampUpdateHelper; flushStampUpdateHelper.insert(primaryCmdBuffer->flushStamp->getStampReference()); currentPipeControlForNooping = primaryCmdBuffer->pipeControlThatMayBeErasedLocation; epiloguePipeControlLocation = primaryCmdBuffer->epiloguePipeControlLocation; if (DebugManager.flags.FlattenBatchBufferForAUBDump.get()) { flatBatchBufferHelper->registerCommandChunk(primaryCmdBuffer.get()->batchBuffer, sizeof(MI_BATCH_BUFFER_START)); } while (nextCommandBuffer && nextCommandBuffer->inspectionId == primaryCmdBuffer->inspectionId) { //noop pipe control if (currentPipeControlForNooping) { if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { flatBatchBufferHelper->removePipeControlData(pipeControlLocationSize, currentPipeControlForNooping, peekHwInfo()); } memset(currentPipeControlForNooping, 0, pipeControlLocationSize); } //obtain next candidate for nooping currentPipeControlForNooping = nextCommandBuffer->pipeControlThatMayBeErasedLocation; //track epilogue pipe control epiloguePipeControlLocation = nextCommandBuffer->epiloguePipeControlLocation; flushStampUpdateHelper.insert(nextCommandBuffer->flushStamp->getStampReference()); auto nextCommandBufferAddress = nextCommandBuffer->batchBuffer.commandBufferAllocation->getGpuAddress(); auto offsetedCommandBuffer = (uint64_t)ptrOffset(nextCommandBufferAddress, nextCommandBuffer->batchBuffer.startOffset); addBatchBufferStart((MI_BATCH_BUFFER_START *)currentBBendLocation, offsetedCommandBuffer, false); if (DebugManager.flags.FlattenBatchBufferForAUBDump.get()) { flatBatchBufferHelper->registerCommandChunk(nextCommandBuffer->batchBuffer, sizeof(MI_BATCH_BUFFER_START)); } currentBBendLocation = nextCommandBuffer->batchBufferEndLocation; lastTaskCount = nextCommandBuffer->taskCount; nextCommandBuffer = nextCommandBuffer->next; commandBufferList.removeFrontOne(); } surfacesForSubmit.reserve(resourcePackage.size() + 1); for (auto &surface : resourcePackage) { surfacesForSubmit.push_back(surface); } //make sure we flush DC if needed if (epiloguePipeControlLocation) { bool flushDcInEpilogue = true; if (DebugManager.flags.DisableDcFlushInEpilogue.get()) { flushDcInEpilogue = false; } ((PIPE_CONTROL *)epiloguePipeControlLocation)->setDcFlushEnable(flushDcInEpilogue); } if (!this->flush(primaryCmdBuffer->batchBuffer, surfacesForSubmit)) { submitResult = false; break; } //after flush task level is closed this->taskLevel++; flushStampUpdateHelper.updateAll(flushStamp->peekStamp()); this->latestFlushedTaskCount = lastTaskCount; this->makeSurfacePackNonResident(surfacesForSubmit); resourcePackage.clear(); } this->totalMemoryUsed = 0; } return submitResult; } template size_t CommandStreamReceiverHw::getRequiredCmdStreamSizeAligned(const DispatchFlags &dispatchFlags, Device &device) { size_t size = getRequiredCmdStreamSize(dispatchFlags, device); return alignUp(size, MemoryConstants::cacheLineSize); } template size_t CommandStreamReceiverHw::getRequiredCmdStreamSize(const DispatchFlags &dispatchFlags, Device &device) { size_t size = getRequiredCmdSizeForPreamble(device); size += getRequiredStateBaseAddressSize(); if (!this->isStateSipSent || device.isDebuggerActive()) { size += PreemptionHelper::getRequiredStateSipCmdSize(device); } size += MemorySynchronizationCommands::getSizeForSinglePipeControl(); size += sizeof(typename GfxFamily::MI_BATCH_BUFFER_START); size += getCmdSizeForL3Config(); size += getCmdSizeForComputeMode(); size += getCmdSizeForMediaSampler(dispatchFlags.pipelineSelectArgs.mediaSamplerRequired); size += getCmdSizeForPipelineSelect(); size += getCmdSizeForPreemption(dispatchFlags); size += getCmdSizeForEpilogue(dispatchFlags); size += getCmdSizeForPrologue(dispatchFlags); if (executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->workaroundTable.waSamplerCacheFlushBetweenRedescribedSurfaceReads) { if (this->samplerCacheFlushRequired != SamplerCacheFlushState::samplerCacheFlushNotRequired) { size += sizeof(typename GfxFamily::PIPE_CONTROL); } } if (experimentalCmdBuffer.get() != nullptr) { size += experimentalCmdBuffer->getRequiredInjectionSize(); } size += TimestampPacketHelper::getRequiredCmdStreamSize(dispatchFlags.csrDependencies); if (stallingPipeControlOnNextFlushRequired) { auto barrierTimestampPacketNodes = dispatchFlags.barrierTimestampPacketNodes; if (barrierTimestampPacketNodes && barrierTimestampPacketNodes->peekNodes().size() > 0) { size += MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(peekHwInfo()); } else { size += sizeof(typename GfxFamily::PIPE_CONTROL); } } if (requiresInstructionCacheFlush) { size += sizeof(typename GfxFamily::PIPE_CONTROL); } return size; } template inline size_t CommandStreamReceiverHw::getCmdSizeForPipelineSelect() const { size_t size = 0; if ((csrSizeRequestFlags.mediaSamplerConfigChanged || csrSizeRequestFlags.specialPipelineSelectModeChanged || !isPreambleSent) && !isPipelineSelectAlreadyProgrammed()) { size += PreambleHelper::getCmdSizeForPipelineSelect(peekHwInfo()); } return size; } template inline void CommandStreamReceiverHw::emitNoop(LinearStream &commandStream, size_t bytesToUpdate) { if (bytesToUpdate) { auto ptr = commandStream.getSpace(bytesToUpdate); memset(ptr, 0, bytesToUpdate); } } template inline void CommandStreamReceiverHw::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) { int64_t waitTimeout = 0; bool enableTimeout = kmdNotifyHelper->obtainTimeoutParams(waitTimeout, useQuickKmdSleep, *getTagAddress(), taskCountToWait, flushStampToWait, forcePowerSavingMode); auto status = waitForCompletionWithTimeout(enableTimeout, waitTimeout, taskCountToWait); if (!status) { waitForFlushStamp(flushStampToWait); //now call blocking wait, this is to ensure that task count is reached waitForCompletionWithTimeout(false, 0, taskCountToWait); } UNRECOVERABLE_IF(*getTagAddress() < taskCountToWait); if (kmdNotifyHelper->quickKmdSleepForSporadicWaitsEnabled()) { kmdNotifyHelper->updateLastWaitForCompletionTimestamp(); } } template inline const HardwareInfo &CommandStreamReceiverHw::peekHwInfo() const { return *executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); } template inline void CommandStreamReceiverHw::programPreemption(LinearStream &csr, DispatchFlags &dispatchFlags) { PreemptionHelper::programCmdStream(csr, dispatchFlags.preemptionMode, this->lastPreemptionMode, preemptionAllocation); this->lastPreemptionMode = dispatchFlags.preemptionMode; } template inline size_t CommandStreamReceiverHw::getCmdSizeForPreemption(const DispatchFlags &dispatchFlags) const { return PreemptionHelper::getRequiredCmdStreamSize(dispatchFlags.preemptionMode, this->lastPreemptionMode); } template inline void CommandStreamReceiverHw::programStateSip(LinearStream &cmdStream, Device &device) { if (!this->isStateSipSent || device.isDebuggerActive()) { PreemptionHelper::programStateSip(cmdStream, device); this->isStateSipSent = true; } } template inline void CommandStreamReceiverHw::programPreamble(LinearStream &csr, Device &device, DispatchFlags &dispatchFlags, uint32_t &newL3Config) { if (!this->isPreambleSent) { GraphicsAllocation *perDssBackedBufferToUse = dispatchFlags.usePerDssBackedBuffer ? this->perDssBackedBuffer : nullptr; PreambleHelper::programPreamble(&csr, device, newL3Config, this->requiredThreadArbitrationPolicy, this->preemptionAllocation, perDssBackedBufferToUse); this->isPreambleSent = true; this->lastSentL3Config = newL3Config; this->lastSentThreadArbitrationPolicy = this->requiredThreadArbitrationPolicy; } } template inline void CommandStreamReceiverHw::programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t maxFrontEndThreads) { if (mediaVfeStateDirty) { auto commandOffset = PreambleHelper::programVFEState(&csr, peekHwInfo(), requiredScratchSize, getScratchPatchAddress(), maxFrontEndThreads, getOsContext().getEngineType()); if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { flatBatchBufferHelper->collectScratchSpacePatchInfo(getScratchPatchAddress(), commandOffset, csr); } setMediaVFEStateDirty(false); } } template void CommandStreamReceiverHw::programMediaSampler(LinearStream &commandStream, DispatchFlags &dispatchFlags) { } template size_t CommandStreamReceiverHw::getCmdSizeForMediaSampler(bool mediaSamplerRequired) const { return 0; } template void CommandStreamReceiverHw::collectStateBaseAddresPatchInfo( uint64_t baseAddress, uint64_t commandOffset, const LinearStream &dsh, const LinearStream &ioh, const LinearStream &ssh, uint64_t generalStateBase) { typedef typename GfxFamily::STATE_BASE_ADDRESS STATE_BASE_ADDRESS; PatchInfoData dynamicStatePatchInfo = {dsh.getGraphicsAllocation()->getGpuAddress(), 0u, PatchInfoAllocationType::DynamicStateHeap, baseAddress, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::DYNAMICSTATEBASEADDRESS_BYTEOFFSET, PatchInfoAllocationType::Default}; PatchInfoData generalStatePatchInfo = {generalStateBase, 0u, PatchInfoAllocationType::GeneralStateHeap, baseAddress, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::GENERALSTATEBASEADDRESS_BYTEOFFSET, PatchInfoAllocationType::Default}; PatchInfoData surfaceStatePatchInfo = {ssh.getGraphicsAllocation()->getGpuAddress(), 0u, PatchInfoAllocationType::SurfaceStateHeap, baseAddress, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::SURFACESTATEBASEADDRESS_BYTEOFFSET, PatchInfoAllocationType::Default}; PatchInfoData indirectObjectPatchInfo = {ioh.getGraphicsAllocation()->getGpuAddress(), 0u, PatchInfoAllocationType::IndirectObjectHeap, baseAddress, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::INDIRECTOBJECTBASEADDRESS_BYTEOFFSET, PatchInfoAllocationType::Default}; flatBatchBufferHelper->setPatchInfoData(dynamicStatePatchInfo); flatBatchBufferHelper->setPatchInfoData(generalStatePatchInfo); flatBatchBufferHelper->setPatchInfoData(surfaceStatePatchInfo); flatBatchBufferHelper->setPatchInfoData(indirectObjectPatchInfo); } template void CommandStreamReceiverHw::resetKmdNotifyHelper(KmdNotifyHelper *newHelper) { kmdNotifyHelper.reset(newHelper); kmdNotifyHelper->updateAcLineStatus(); if (kmdNotifyHelper->quickKmdSleepForSporadicWaitsEnabled()) { kmdNotifyHelper->updateLastWaitForCompletionTimestamp(); } } template void CommandStreamReceiverHw::addClearSLMWorkAround(typename GfxFamily::PIPE_CONTROL *pCmd) { } template uint64_t CommandStreamReceiverHw::getScratchPatchAddress() { return scratchSpaceController->getScratchPatchAddress(); } template bool CommandStreamReceiverHw::detectInitProgrammingFlagsRequired(const DispatchFlags &dispatchFlags) const { return DebugManager.flags.ForceCsrReprogramming.get(); } template uint32_t CommandStreamReceiverHw::blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking) { using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END; using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW; auto lock = obtainUniqueOwnership(); auto &commandStream = getCS(BlitCommandsHelper::estimateBlitCommandsSize(blitPropertiesContainer, peekHwInfo())); auto commandStreamStart = commandStream.getUsed(); auto newTaskCount = taskCount + 1; latestSentTaskCount = newTaskCount; programEnginePrologue(commandStream); for (auto &blitProperties : blitPropertiesContainer) { TimestampPacketHelper::programCsrDependencies(commandStream, blitProperties.csrDependencies); BlitCommandsHelper::dispatchBlitCommandsForBuffer(blitProperties, commandStream, *this->executionEnvironment.rootDeviceEnvironments[this->rootDeviceIndex]); if (blitProperties.outputTimestampPacket) { auto timestampPacketGpuAddress = blitProperties.outputTimestampPacket->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); EncodeMiFlushDW::programMiFlushDw(commandStream, timestampPacketGpuAddress, 0); makeResident(*blitProperties.outputTimestampPacket->getBaseGraphicsAllocation()); } blitProperties.csrDependencies.makeResident(*this); makeResident(*blitProperties.srcAllocation); makeResident(*blitProperties.dstAllocation); } MemorySynchronizationCommands::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), peekHwInfo()); EncodeMiFlushDW::programMiFlushDw(commandStream, tagAllocation->getGpuAddress(), newTaskCount); MemorySynchronizationCommands::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), peekHwInfo()); auto batchBufferEnd = reinterpret_cast(commandStream.getSpace(sizeof(MI_BATCH_BUFFER_END))); *batchBufferEnd = GfxFamily::cmdInitBatchBufferEnd; alignToCacheLine(commandStream); makeResident(*tagAllocation); if (globalFenceAllocation) { makeResident(*globalFenceAllocation); } BatchBuffer batchBuffer{commandStream.getGraphicsAllocation(), commandStreamStart, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, commandStream.getUsed(), &commandStream, nullptr}; flush(batchBuffer, getResidencyAllocations()); makeSurfacePackNonResident(getResidencyAllocations()); latestFlushedTaskCount = newTaskCount; taskCount = newTaskCount; auto flushStampToWait = flushStamp->peekStamp(); lock.unlock(); if (blocking) { waitForTaskCountWithKmdNotifyFallback(newTaskCount, flushStampToWait, false, false); internalAllocationStorage->cleanAllocationList(newTaskCount, TEMPORARY_ALLOCATION); } return newTaskCount; } template inline void CommandStreamReceiverHw::programAdditionalPipelineSelect(LinearStream &csr, PipelineSelectArgs &pipelineSelectArgs, bool is3DPipeline) { auto &hwHelper = HwHelper::get(peekHwInfo().platform.eRenderCoreFamily); if (hwHelper.is3DPipelineSelectWARequired(peekHwInfo()) && isRcs()) { auto localPipelineSelectArgs = pipelineSelectArgs; localPipelineSelectArgs.is3DPipelineRequired = is3DPipeline; PreambleHelper::programPipelineSelect(&csr, localPipelineSelectArgs, peekHwInfo()); } } template inline bool CommandStreamReceiverHw::isComputeModeNeeded() const { return false; } template inline bool CommandStreamReceiverHw::isPipelineSelectAlreadyProgrammed() const { auto &hwHelper = HwHelper::get(peekHwInfo().platform.eRenderCoreFamily); return isComputeModeNeeded() && hwHelper.is3DPipelineSelectWARequired(peekHwInfo()) && isRcs(); } template inline void CommandStreamReceiverHw::programEpilogue(LinearStream &csr, void **batchBufferEndLocation, DispatchFlags &dispatchFlags) { if (dispatchFlags.epilogueRequired) { auto currentOffset = ptrDiff(csr.getSpace(0u), csr.getCpuBase()); auto gpuAddress = ptrOffset(csr.getGraphicsAllocation()->getGpuAddress(), currentOffset); addBatchBufferStart(reinterpret_cast(*batchBufferEndLocation), gpuAddress, false); this->programEpliogueCommands(csr, dispatchFlags); programEndingCmd(csr, batchBufferEndLocation, isDirectSubmissionEnabled()); this->alignToCacheLine(csr); } } template inline size_t CommandStreamReceiverHw::getCmdSizeForEpilogue(const DispatchFlags &dispatchFlags) const { if (dispatchFlags.epilogueRequired) { size_t terminateCmd = sizeof(typename GfxFamily::MI_BATCH_BUFFER_END); if (isDirectSubmissionEnabled()) { terminateCmd = sizeof(typename GfxFamily::MI_BATCH_BUFFER_START); } auto size = getCmdSizeForEpilogueCommands(dispatchFlags) + terminateCmd; return alignUp(size, MemoryConstants::cacheLineSize); } return 0u; } template inline void CommandStreamReceiverHw::programEnginePrologue(LinearStream &csr) { } template inline size_t CommandStreamReceiverHw::getCmdSizeForPrologue(const DispatchFlags &dispatchFlags) const { return 0u; } } // namespace NEO compute-runtime-20.13.16352/shared/source/command_stream/command_stream_receiver_hw_bdw_plus.inl000066400000000000000000000072221363734646600330750ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw_base.inl" namespace NEO { template size_t CommandStreamReceiverHw::getSshHeapSize() { return defaultHeapSize; } template inline void CommandStreamReceiverHw::programL3(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t &newL3Config) { typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL; if (csrSizeRequestFlags.l3ConfigChanged && this->isPreambleSent) { // Add a PIPE_CONTROL w/ CS_stall auto pCmd = (PIPE_CONTROL *)csr.getSpace(sizeof(PIPE_CONTROL)); *pCmd = GfxFamily::cmdInitPipeControl; pCmd->setCommandStreamerStallEnable(true); pCmd->setDcFlushEnable(true); addClearSLMWorkAround(pCmd); PreambleHelper::programL3(&csr, newL3Config); this->lastSentL3Config = newL3Config; } } template size_t CommandStreamReceiverHw::getRequiredStateBaseAddressSize() const { using PIPELINE_SELECT = typename GfxFamily::PIPELINE_SELECT; size_t size = 0; auto &hwHelper = HwHelper::get(peekHwInfo().platform.eRenderCoreFamily); if (hwHelper.is3DPipelineSelectWARequired(peekHwInfo())) { size += (2 * PreambleHelper::getCmdSizeForPipelineSelect(peekHwInfo())); } size += sizeof(typename GfxFamily::STATE_BASE_ADDRESS) + sizeof(PIPE_CONTROL); return size; } template inline size_t CommandStreamReceiverHw::getCmdSizeForL3Config() const { if (!this->isPreambleSent) { return sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM); } else if (csrSizeRequestFlags.l3ConfigChanged) { return sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM) + sizeof(typename GfxFamily::PIPE_CONTROL); } return 0; } template void CommandStreamReceiverHw::programPipelineSelect(LinearStream &commandStream, PipelineSelectArgs &pipelineSelectArgs) { if (csrSizeRequestFlags.mediaSamplerConfigChanged || !isPreambleSent) { if (!isPipelineSelectAlreadyProgrammed()) { PreambleHelper::programPipelineSelect(&commandStream, pipelineSelectArgs, peekHwInfo()); } this->lastMediaSamplerConfig = pipelineSelectArgs.mediaSamplerRequired; } } template void CommandStreamReceiverHw::adjustThreadArbitionPolicy(void *const stateComputeMode) {} template void CommandStreamReceiverHw::createScratchSpaceController() { scratchSpaceController = std::make_unique(rootDeviceIndex, executionEnvironment, *internalAllocationStorage.get()); } template void CommandStreamReceiverHw::programEpliogueCommands(LinearStream &csr, const DispatchFlags &dispatchFlags) { this->programEngineModeEpliogue(csr, dispatchFlags); } template size_t CommandStreamReceiverHw::getCmdSizeForEpilogueCommands(const DispatchFlags &dispatchFlags) const { return this->getCmdSizeForEngineMode(dispatchFlags); } template bool CommandStreamReceiverHw::isMultiOsContextCapable() const { return false; } template inline typename GfxFamily::PIPE_CONTROL *CommandStreamReceiverHw::addPipeControlBeforeStateBaseAddress(LinearStream &commandStream) { auto pCmd = addPipeControlCmd(commandStream); pCmd->setTextureCacheInvalidationEnable(true); pCmd->setDcFlushEnable(true); return pCmd; } } // namespace NEO compute-runtime-20.13.16352/shared/source/command_stream/command_stream_receiver_hw_tgllp_plus.inl000066400000000000000000000040061363734646600334400ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/helpers/state_compute_mode_helper.h" namespace NEO { template void CommandStreamReceiverHw::programComputeMode(LinearStream &stream, DispatchFlags &dispatchFlags) { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; if (isComputeModeNeeded()) { programAdditionalPipelineSelect(stream, dispatchFlags.pipelineSelectArgs, true); this->lastSentCoherencyRequest = static_cast(dispatchFlags.requiresCoherency); auto stateComputeMode = GfxFamily::cmdInitStateComputeMode; adjustThreadArbitionPolicy(&stateComputeMode); EncodeStates::adjustStateComputeMode(stream, dispatchFlags.numGrfRequired, &stateComputeMode, isMultiOsContextCapable(), dispatchFlags.requiresCoherency); if (csrSizeRequestFlags.hasSharedHandles) { auto pc = stream.getSpaceForCmd(); *pc = GfxFamily::cmdInitPipeControl; } programAdditionalPipelineSelect(stream, dispatchFlags.pipelineSelectArgs, false); } } template <> inline bool CommandStreamReceiverHw::isComputeModeNeeded() const { return csrSizeRequestFlags.coherencyRequestChanged || csrSizeRequestFlags.hasSharedHandles || csrSizeRequestFlags.numGrfRequiredChanged || StateComputeModeHelper::isStateComputeModeRequired(csrSizeRequestFlags, this->lastSentThreadArbitrationPolicy != this->requiredThreadArbitrationPolicy); } template <> inline typename Family::PIPE_CONTROL *CommandStreamReceiverHw::addPipeControlBeforeStateBaseAddress(LinearStream &commandStream) { auto pCmd = addPipeControlCmd(commandStream); pCmd->setTextureCacheInvalidationEnable(true); pCmd->setDcFlushEnable(true); pCmd->setHdcPipelineFlush(true); return pCmd; } } // namespace NEO compute-runtime-20.13.16352/shared/source/command_stream/csr_definitions.h000066400000000000000000000142121363734646600264470ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/csr_deps.h" #include "shared/source/command_stream/queue_throttle.h" #include "shared/source/command_stream/thread_arbitration_policy.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/pipeline_select_args.h" #include "shared/source/kernel/grf_config.h" #include "shared/source/memory_manager/memory_constants.h" #include namespace NEO { struct FlushStampTrackingObj; namespace CSRequirements { //cleanup section usually contains 1-2 pipeControls BB end and place for BB start //that makes 16 * 2 + 4 + 8 = 40 bytes //then command buffer is aligned to cacheline that can take up to 63 bytes //to be sure everything fits minimal size is at 2 x cacheline. constexpr auto minCommandQueueCommandStreamSize = 2 * MemoryConstants::cacheLineSize; constexpr auto csOverfetchSize = MemoryConstants::pageSize; } // namespace CSRequirements namespace TimeoutControls { constexpr int64_t maxTimeout = std::numeric_limits::max(); } namespace QueueSliceCount { constexpr uint64_t defaultSliceCount = 0; } namespace L3CachingSettings { constexpr uint32_t l3CacheOn = 0u; constexpr uint32_t l3CacheOff = 1u; constexpr uint32_t l3AndL1On = 2u; } // namespace L3CachingSettings struct DispatchFlags { DispatchFlags() = delete; DispatchFlags(CsrDependencies csrDependencies, TimestampPacketContainer *barrierTimestampPacketNodes, PipelineSelectArgs pipelineSelectArgs, FlushStampTrackingObj *flushStampReference, QueueThrottle throttle, PreemptionMode preemptionMode, uint32_t numGrfRequired, uint32_t l3CacheSettings, uint32_t threadArbitrationPolicy, uint64_t sliceCount, bool blocking, bool dcFlush, bool useSLM, bool guardCommandBufferWithPipeControl, bool gsba32BitRequired, bool requiresCoherency, bool lowPriority, bool implicitFlush, bool outOfOrderExecutionAllowed, bool epilogueRequired, bool usePerDSSbackedBuffer) : csrDependencies(csrDependencies), barrierTimestampPacketNodes(barrierTimestampPacketNodes), pipelineSelectArgs(pipelineSelectArgs), flushStampReference(flushStampReference), throttle(throttle), preemptionMode(preemptionMode), numGrfRequired(numGrfRequired), l3CacheSettings(l3CacheSettings), threadArbitrationPolicy(threadArbitrationPolicy), sliceCount(sliceCount), blocking(blocking), dcFlush(dcFlush), useSLM(useSLM), guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControl), gsba32BitRequired(gsba32BitRequired), requiresCoherency(requiresCoherency), lowPriority(lowPriority), implicitFlush(implicitFlush), outOfOrderExecutionAllowed(outOfOrderExecutionAllowed), epilogueRequired(epilogueRequired), usePerDssBackedBuffer(usePerDSSbackedBuffer){}; CsrDependencies csrDependencies; TimestampPacketContainer *barrierTimestampPacketNodes = nullptr; PipelineSelectArgs pipelineSelectArgs; FlushStampTrackingObj *flushStampReference = nullptr; QueueThrottle throttle = QueueThrottle::MEDIUM; PreemptionMode preemptionMode = PreemptionMode::Disabled; uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber; uint32_t l3CacheSettings = L3CachingSettings::l3CacheOn; uint32_t threadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; uint64_t sliceCount = QueueSliceCount::defaultSliceCount; uint64_t engineHints = 0; bool blocking = false; bool dcFlush = false; bool useSLM = false; bool guardCommandBufferWithPipeControl = false; bool gsba32BitRequired = false; bool requiresCoherency = false; bool lowPriority = false; bool implicitFlush = false; bool outOfOrderExecutionAllowed = false; bool epilogueRequired = false; bool usePerDssBackedBuffer = false; }; struct CsrSizeRequestFlags { bool l3ConfigChanged = false; bool coherencyRequestChanged = false; bool preemptionRequestChanged = false; bool mediaSamplerConfigChanged = false; bool hasSharedHandles = false; bool numGrfRequiredChanged = false; bool specialPipelineSelectModeChanged = false; }; } // namespace NEO compute-runtime-20.13.16352/shared/source/command_stream/csr_deps.cpp000066400000000000000000000007031363734646600254220ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/csr_deps.h" #include "shared/source/helpers/timestamp_packet.h" namespace NEO { void CsrDependencies::makeResident(CommandStreamReceiver &commandStreamReceiver) const { for (auto ×tampPacketContainer : *this) { timestampPacketContainer->makeResident(commandStreamReceiver); } } } // namespace NEO compute-runtime-20.13.16352/shared/source/command_stream/csr_deps.h000066400000000000000000000007621363734646600250740ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/utilities/stackvec.h" namespace NEO { class TimestampPacketContainer; class CommandStreamReceiver; class CsrDependencies : public StackVec { public: enum class DependenciesType { OnCsr, OutOfCsr, All }; void makeResident(CommandStreamReceiver &commandStreamReceiver) const; }; } // namespace NEO compute-runtime-20.13.16352/shared/source/command_stream/definitions/000077500000000000000000000000001363734646600254275ustar00rootroot00000000000000command_stream_receiver_hw_ext.inl000066400000000000000000000014351363734646600343120ustar00rootroot00000000000000compute-runtime-20.13.16352/shared/source/command_stream/definitions/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/command_stream/linear_stream.h" namespace NEO { template void CommandStreamReceiverHw::programEngineModeCommands(LinearStream &csr, const DispatchFlags &dispatchFlags) { } template void CommandStreamReceiverHw::programEngineModeEpliogue(LinearStream &csr, const DispatchFlags &dispatchFlags) { } template size_t CommandStreamReceiverHw::getCmdSizeForEngineMode(const DispatchFlags &dispatchFlags) const { return 0u; } } // namespace NEO compute-runtime-20.13.16352/shared/source/command_stream/device_command_stream.h000066400000000000000000000013011363734646600275700ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver_hw.h" namespace NEO { template class DeviceCommandStreamReceiver : public CommandStreamReceiverHw { typedef CommandStreamReceiverHw BaseClass; protected: DeviceCommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) : BaseClass(executionEnvironment, rootDeviceIndex) { } public: static CommandStreamReceiver *create(bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex); }; } // namespace NEO compute-runtime-20.13.16352/shared/source/command_stream/experimental_command_buffer.cpp000066400000000000000000000072161363734646600313520ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/experimental_command_buffer.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_constants.h" #include "shared/source/memory_manager/memory_manager.h" #include #include namespace NEO { ExperimentalCommandBuffer::ExperimentalCommandBuffer(CommandStreamReceiver *csr, double profilingTimerResolution) : commandStreamReceiver(csr), currentStream(nullptr), timestampsOffset(0), experimentalAllocationOffset(0), defaultPrint(true), timerResolution(profilingTimerResolution) { auto rootDeviceIndex = csr->getRootDeviceIndex(); timestamps = csr->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY}); memset(timestamps->getUnderlyingBuffer(), 0, timestamps->getUnderlyingBufferSize()); experimentalAllocation = csr->getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, MemoryConstants::pageSize, GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY}); memset(experimentalAllocation->getUnderlyingBuffer(), 0, experimentalAllocation->getUnderlyingBufferSize()); } ExperimentalCommandBuffer::~ExperimentalCommandBuffer() { auto timestamp = static_cast(timestamps->getUnderlyingBuffer()); for (uint32_t i = 0; i < timestampsOffset / (2 * sizeof(uint64_t)); i++) { auto stop = static_cast(*(timestamp + 1) * timerResolution); auto start = static_cast(*timestamp * timerResolution); auto delta = stop - start; printDebugString(defaultPrint, stdout, "#%u: delta %llu start %llu stop %llu\n", i, delta, start, stop); timestamp += 2; } MemoryManager *memoryManager = commandStreamReceiver->getMemoryManager(); memoryManager->freeGraphicsMemory(timestamps); memoryManager->freeGraphicsMemory(experimentalAllocation); if (currentStream.get()) { memoryManager->freeGraphicsMemory(currentStream->getGraphicsAllocation()); currentStream->replaceGraphicsAllocation(nullptr); } } void ExperimentalCommandBuffer::getCS(size_t minRequiredSize) { if (!currentStream) { currentStream.reset(new LinearStream(nullptr)); } minRequiredSize += CSRequirements::minCommandQueueCommandStreamSize; constexpr static auto additionalAllocationSize = CSRequirements::minCommandQueueCommandStreamSize + CSRequirements::csOverfetchSize; commandStreamReceiver->ensureCommandBufferAllocation(*currentStream, minRequiredSize, additionalAllocationSize); } void ExperimentalCommandBuffer::makeResidentAllocations() { commandStreamReceiver->makeResident(*currentStream->getGraphicsAllocation()); commandStreamReceiver->makeResident(*timestamps); commandStreamReceiver->makeResident(*experimentalAllocation); } } // namespace NEO compute-runtime-20.13.16352/shared/source/command_stream/experimental_command_buffer.h000066400000000000000000000030021363734646600310040ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include namespace NEO { class CommandStreamReceiver; class GraphicsAllocation; class LinearStream; class MemoryManager; class ExperimentalCommandBuffer { public: virtual ~ExperimentalCommandBuffer(); ExperimentalCommandBuffer(CommandStreamReceiver *csr, double profilingTimerResolution); template void injectBufferStart(LinearStream &parentStream, size_t cmdBufferOffset); template size_t getRequiredInjectionSize() noexcept; template size_t programExperimentalCommandBuffer(); void makeResidentAllocations(); protected: template size_t getTotalExperimentalSize() noexcept; void getCS(size_t minRequiredSize); template void addTimeStampPipeControl(); template size_t getTimeStampPipeControlSize() noexcept; template void addExperimentalCommands(); template size_t getExperimentalCommandsSize() noexcept; CommandStreamReceiver *commandStreamReceiver; std::unique_ptr currentStream; GraphicsAllocation *timestamps; uint32_t timestampsOffset; GraphicsAllocation *experimentalAllocation; uint32_t experimentalAllocationOffset; bool defaultPrint; double timerResolution; }; } // namespace NEO compute-runtime-20.13.16352/shared/source/command_stream/experimental_command_buffer.inl000066400000000000000000000104071363734646600313460ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/command_stream/experimental_command_buffer.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/graphics_allocation.h" namespace NEO { template void ExperimentalCommandBuffer::injectBufferStart(LinearStream &parentStream, size_t cmdBufferOffset) { using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; auto pCmd = parentStream.getSpaceForCmd(); auto commandStreamReceiverHw = static_cast *>(commandStreamReceiver); commandStreamReceiverHw->addBatchBufferStart(pCmd, currentStream->getGraphicsAllocation()->getGpuAddress() + cmdBufferOffset, true); } template size_t ExperimentalCommandBuffer::getRequiredInjectionSize() noexcept { using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; return sizeof(MI_BATCH_BUFFER_START); } template size_t ExperimentalCommandBuffer::programExperimentalCommandBuffer() { using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END; getCS(getTotalExperimentalSize()); size_t returnOffset = currentStream->getUsed(); //begin timestamp addTimeStampPipeControl(); addExperimentalCommands(); //end timestamp addTimeStampPipeControl(); //end auto pCmd = currentStream->getSpaceForCmd(); *pCmd = GfxFamily::cmdInitBatchBufferEnd; return returnOffset; } template size_t ExperimentalCommandBuffer::getTotalExperimentalSize() noexcept { using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END; size_t size = sizeof(MI_BATCH_BUFFER_END) + getTimeStampPipeControlSize() + getExperimentalCommandsSize(); return size; } template size_t ExperimentalCommandBuffer::getTimeStampPipeControlSize() noexcept { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; // Two P_C for timestamps return 2 * MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation( *commandStreamReceiver->peekExecutionEnvironment().rootDeviceEnvironments[commandStreamReceiver->getRootDeviceIndex()]->getHardwareInfo()); } template void ExperimentalCommandBuffer::addTimeStampPipeControl() { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; uint64_t timeStampAddress = timestamps->getGpuAddress() + timestampsOffset; MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( *currentStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, timeStampAddress, 0llu, false, *commandStreamReceiver->peekExecutionEnvironment().rootDeviceEnvironments[commandStreamReceiver->getRootDeviceIndex()]->getHardwareInfo()); //moving to next chunk timestampsOffset += sizeof(uint64_t); DEBUG_BREAK_IF(timestamps->getUnderlyingBufferSize() < timestampsOffset); } template void ExperimentalCommandBuffer::addExperimentalCommands() { using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; uint32_t *semaphoreData = reinterpret_cast(ptrOffset(experimentalAllocation->getUnderlyingBuffer(), experimentalAllocationOffset)); *semaphoreData = 1; uint64_t gpuAddr = experimentalAllocation->getGpuAddress() + experimentalAllocationOffset; auto semaphoreCmd = currentStream->getSpaceForCmd(); *semaphoreCmd = GfxFamily::cmdInitMiSemaphoreWait; semaphoreCmd->setCompareOperation(MI_SEMAPHORE_WAIT::COMPARE_OPERATION_SAD_EQUAL_SDD); semaphoreCmd->setSemaphoreDataDword(*semaphoreData); semaphoreCmd->setSemaphoreGraphicsAddress(gpuAddr); } template size_t ExperimentalCommandBuffer::getExperimentalCommandsSize() noexcept { using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; return sizeof(MI_SEMAPHORE_WAIT); } } // namespace NEO compute-runtime-20.13.16352/shared/source/command_stream/linear_stream.cpp000066400000000000000000000017071363734646600264520ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/linear_stream.h" #include "shared/source/memory_manager/graphics_allocation.h" namespace NEO { LinearStream::LinearStream(GraphicsAllocation *gfxAllocation, void *buffer, size_t bufferSize) : sizeUsed(0), maxAvailableSpace(bufferSize), buffer(buffer), graphicsAllocation(gfxAllocation) { } LinearStream::LinearStream(void *buffer, size_t bufferSize) : LinearStream(nullptr, buffer, bufferSize) { } LinearStream::LinearStream(GraphicsAllocation *gfxAllocation) : sizeUsed(0), graphicsAllocation(gfxAllocation) { if (gfxAllocation) { maxAvailableSpace = gfxAllocation->getUnderlyingBufferSize(); buffer = gfxAllocation->getUnderlyingBuffer(); } else { maxAvailableSpace = 0; buffer = nullptr; } } LinearStream::LinearStream() : LinearStream(nullptr) { } } // namespace NEO compute-runtime-20.13.16352/shared/source/command_stream/linear_stream.h000066400000000000000000000044741363734646600261230ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/ptr_math.h" #include #include #include namespace NEO { class GraphicsAllocation; class LinearStream { public: virtual ~LinearStream() = default; LinearStream(); LinearStream(void *buffer, size_t bufferSize); LinearStream(GraphicsAllocation *buffer); LinearStream(GraphicsAllocation *gfxAllocation, void *buffer, size_t bufferSize); void *getCpuBase() const; void *getSpace(size_t size); size_t getMaxAvailableSpace() const; size_t getAvailableSpace() const; size_t getUsed() const; void overrideMaxSize(size_t newMaxSize); void replaceBuffer(void *buffer, size_t bufferSize); GraphicsAllocation *getGraphicsAllocation() const; void replaceGraphicsAllocation(GraphicsAllocation *gfxAllocation); template Cmd *getSpaceForCmd() { auto ptr = getSpace(sizeof(Cmd)); return reinterpret_cast(ptr); } protected: std::atomic sizeUsed; size_t maxAvailableSpace; void *buffer; GraphicsAllocation *graphicsAllocation; }; inline void *LinearStream::getCpuBase() const { return buffer; } inline void *LinearStream::getSpace(size_t size) { UNRECOVERABLE_IF(sizeUsed + size > maxAvailableSpace); auto memory = ptrOffset(buffer, sizeUsed); sizeUsed += size; return memory; } inline size_t LinearStream::getMaxAvailableSpace() const { return maxAvailableSpace; } inline size_t LinearStream::getAvailableSpace() const { DEBUG_BREAK_IF(sizeUsed > maxAvailableSpace); return maxAvailableSpace - sizeUsed; } inline size_t LinearStream::getUsed() const { return sizeUsed; } inline void LinearStream::overrideMaxSize(size_t newMaxSize) { maxAvailableSpace = newMaxSize; } inline void LinearStream::replaceBuffer(void *buffer, size_t bufferSize) { this->buffer = buffer; maxAvailableSpace = bufferSize; sizeUsed = 0; } inline GraphicsAllocation *LinearStream::getGraphicsAllocation() const { return graphicsAllocation; } inline void LinearStream::replaceGraphicsAllocation(GraphicsAllocation *gfxAllocation) { graphicsAllocation = gfxAllocation; } } // namespace NEO compute-runtime-20.13.16352/shared/source/command_stream/preemption.cpp000066400000000000000000000107321363734646600260050ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/preemption.h" #include "shared/source/built_ins/built_ins.h" #include "shared/source/device/device.h" #include "shared/source/helpers/string.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/kernel/kernel.h" namespace NEO { bool PreemptionHelper::allowThreadGroupPreemption(const PreemptionFlags &flags) { if (flags.flags.disablePerCtxtPreemptionGranularityControl) { return false; } if (flags.flags.usesFencesForReadWriteImages && flags.flags.disableLSQCROPERFforOCL) { return false; } if (flags.flags.schedulerKernel || flags.flags.vmeKernel) { return false; } return true; } bool PreemptionHelper::allowMidThreadPreemption(const PreemptionFlags &flags) { return (flags.flags.disabledMidThreadPreemptionKernel == 0) && !(flags.flags.vmeKernel && !flags.flags.deviceSupportsVmePreemption); } PreemptionMode PreemptionHelper::taskPreemptionMode(PreemptionMode devicePreemptionMode, const PreemptionFlags &flags) { if (devicePreemptionMode == PreemptionMode::Disabled) { return PreemptionMode::Disabled; } if (devicePreemptionMode >= PreemptionMode::MidThread && allowMidThreadPreemption(flags)) { return PreemptionMode::MidThread; } if (devicePreemptionMode >= PreemptionMode::ThreadGroup && allowThreadGroupPreemption(flags)) { return PreemptionMode::ThreadGroup; } return PreemptionMode::MidBatch; }; void PreemptionHelper::setPreemptionLevelFlags(PreemptionFlags &flags, Device &device, Kernel *kernel) { if (kernel) { flags.flags.disabledMidThreadPreemptionKernel = kernel->getKernelInfo().patchInfo.executionEnvironment && kernel->getKernelInfo().patchInfo.executionEnvironment->DisableMidThreadPreemption; flags.flags.vmeKernel = kernel->isVmeKernel(); flags.flags.usesFencesForReadWriteImages = kernel->getKernelInfo().patchInfo.executionEnvironment && kernel->getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages; flags.flags.schedulerKernel = kernel->isSchedulerKernel; } flags.flags.deviceSupportsVmePreemption = device.getDeviceInfo().vmeAvcSupportsPreemption; flags.flags.disablePerCtxtPreemptionGranularityControl = device.getHardwareInfo().workaroundTable.waDisablePerCtxtPreemptionGranularityControl; flags.flags.disableLSQCROPERFforOCL = device.getHardwareInfo().workaroundTable.waDisableLSQCROPERFforOCL; } PreemptionMode PreemptionHelper::taskPreemptionMode(Device &device, const MultiDispatchInfo &multiDispatchInfo) { PreemptionMode devMode = device.getPreemptionMode(); for (const auto &di : multiDispatchInfo) { auto kernel = di.getKernel(); PreemptionFlags flags = {}; setPreemptionLevelFlags(flags, device, kernel); PreemptionMode taskMode = taskPreemptionMode(devMode, flags); if (devMode > taskMode) { devMode = taskMode; } printDebugString(DebugManager.flags.PrintDebugMessages.get(), stdout, "devMode = %d, taskMode = %d.\n", static_cast(device.getPreemptionMode()), static_cast(taskMode)); } return devMode; } void PreemptionHelper::adjustDefaultPreemptionMode(RuntimeCapabilityTable &deviceCapabilities, bool allowMidThread, bool allowThreadGroup, bool allowMidBatch) { if (deviceCapabilities.defaultPreemptionMode >= PreemptionMode::MidThread && allowMidThread) { deviceCapabilities.defaultPreemptionMode = PreemptionMode::MidThread; } else if (deviceCapabilities.defaultPreemptionMode >= PreemptionMode::ThreadGroup && allowThreadGroup) { deviceCapabilities.defaultPreemptionMode = PreemptionMode::ThreadGroup; } else if (deviceCapabilities.defaultPreemptionMode >= PreemptionMode::MidBatch && allowMidBatch) { deviceCapabilities.defaultPreemptionMode = PreemptionMode::MidBatch; } else { deviceCapabilities.defaultPreemptionMode = PreemptionMode::Disabled; } } PreemptionMode PreemptionHelper::getDefaultPreemptionMode(const HardwareInfo &hwInfo) { return DebugManager.flags.ForcePreemptionMode.get() == -1 ? hwInfo.capabilityTable.defaultPreemptionMode : static_cast(DebugManager.flags.ForcePreemptionMode.get()); } } // namespace NEO compute-runtime-20.13.16352/shared/source/command_stream/preemption.h000066400000000000000000000065721363734646600254610ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption_mode.h" #include "shared/source/helpers/hw_info.h" #include "sku_info.h" namespace NEO { class Kernel; class Device; class GraphicsAllocation; struct MultiDispatchInfo; struct PreemptionFlags { PreemptionFlags() { data = 0; } union { struct { uint32_t disabledMidThreadPreemptionKernel : 1; uint32_t vmeKernel : 1; uint32_t deviceSupportsVmePreemption : 1; uint32_t disablePerCtxtPreemptionGranularityControl : 1; uint32_t usesFencesForReadWriteImages : 1; uint32_t disableLSQCROPERFforOCL : 1; uint32_t schedulerKernel : 1; uint32_t reserved : 25; } flags; uint32_t data; }; }; class PreemptionHelper { public: template using INTERFACE_DESCRIPTOR_DATA = typename CmdFamily::INTERFACE_DESCRIPTOR_DATA; static PreemptionMode taskPreemptionMode(PreemptionMode devicePreemptionMode, const PreemptionFlags &flags); static PreemptionMode taskPreemptionMode(Device &device, const MultiDispatchInfo &multiDispatchInfo); static bool allowThreadGroupPreemption(const PreemptionFlags &flags); static bool allowMidThreadPreemption(const PreemptionFlags &flags); static void adjustDefaultPreemptionMode(RuntimeCapabilityTable &deviceCapabilities, bool allowMidThread, bool allowThreadGroup, bool allowMidBatch); static void setPreemptionLevelFlags(PreemptionFlags &flags, Device &device, Kernel *kernel); template static size_t getRequiredPreambleSize(const Device &device); template static size_t getRequiredStateSipCmdSize(const Device &device); template static void programCsrBaseAddress(LinearStream &preambleCmdStream, Device &device, const GraphicsAllocation *preemptionCsr); template static void programStateSip(LinearStream &preambleCmdStream, Device &device); template static size_t getRequiredCmdStreamSize(PreemptionMode newPreemptionMode, PreemptionMode oldPreemptionMode); template static void programCmdStream(LinearStream &cmdStream, PreemptionMode newPreemptionMode, PreemptionMode oldPreemptionMode, GraphicsAllocation *preemptionCsr); template static size_t getPreemptionWaCsSize(const Device &device); template static void applyPreemptionWaCmdsBegin(LinearStream *pCommandStream, const Device &device); template static void applyPreemptionWaCmdsEnd(LinearStream *pCommandStream, const Device &device); static PreemptionMode getDefaultPreemptionMode(const HardwareInfo &hwInfo); template static void programInterfaceDescriptorDataPreemption(INTERFACE_DESCRIPTOR_DATA *idd, PreemptionMode preemptionMode); }; template struct PreemptionConfig { static const uint32_t mmioAddress; static const uint32_t mask; static const uint32_t threadGroupVal; static const uint32_t cmdLevelVal; static const uint32_t midThreadVal; }; } // namespace NEO compute-runtime-20.13.16352/shared/source/command_stream/preemption.inl000066400000000000000000000116211363734646600260030ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/built_ins/sip.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/device/device.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "opencl/source/command_queue/gpgpu_walker.h" namespace NEO { template void PreemptionHelper::programCsrBaseAddress(LinearStream &preambleCmdStream, Device &device, const GraphicsAllocation *preemptionCsr) { using GPGPU_CSR_BASE_ADDRESS = typename GfxFamily::GPGPU_CSR_BASE_ADDRESS; bool isMidThreadPreemption = device.getPreemptionMode() == PreemptionMode::MidThread; if (isMidThreadPreemption) { UNRECOVERABLE_IF(nullptr == preemptionCsr); auto csr = reinterpret_cast(preambleCmdStream.getSpace(sizeof(GPGPU_CSR_BASE_ADDRESS))); *csr = GfxFamily::cmdInitGpgpuCsrBaseAddress; csr->setGpgpuCsrBaseAddress(preemptionCsr->getGpuAddressToPatch()); } } template void PreemptionHelper::programStateSip(LinearStream &preambleCmdStream, Device &device) { using STATE_SIP = typename GfxFamily::STATE_SIP; bool debuggerActive = device.isDebuggerActive(); bool isMidThreadPreemption = device.getPreemptionMode() == PreemptionMode::MidThread; if (isMidThreadPreemption || debuggerActive) { auto sip = reinterpret_cast(preambleCmdStream.getSpace(sizeof(STATE_SIP))); *sip = GfxFamily::cmdInitStateSip; auto sipAllocation = SipKernel::getSipKernelAllocation(device); sip->setSystemInstructionPointer(sipAllocation->getGpuAddressToPatch()); } } template void PreemptionHelper::programCmdStream(LinearStream &cmdStream, PreemptionMode newPreemptionMode, PreemptionMode oldPreemptionMode, GraphicsAllocation *preemptionCsr) { if (oldPreemptionMode == newPreemptionMode) { return; } uint32_t regVal = 0; if (newPreemptionMode == PreemptionMode::MidThread) { regVal = PreemptionConfig::midThreadVal | PreemptionConfig::mask; } else if (newPreemptionMode == PreemptionMode::ThreadGroup) { regVal = PreemptionConfig::threadGroupVal | PreemptionConfig::mask; } else { regVal = PreemptionConfig::cmdLevelVal | PreemptionConfig::mask; } LriHelper::program(&cmdStream, PreemptionConfig::mmioAddress, regVal); } template size_t PreemptionHelper::getRequiredCmdStreamSize(PreemptionMode newPreemptionMode, PreemptionMode oldPreemptionMode) { if (newPreemptionMode == oldPreemptionMode) { return 0; } return sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM); } template size_t PreemptionHelper::getRequiredPreambleSize(const Device &device) { if (device.getPreemptionMode() == PreemptionMode::MidThread) { return sizeof(typename GfxFamily::GPGPU_CSR_BASE_ADDRESS); } return 0; } template size_t PreemptionHelper::getRequiredStateSipCmdSize(const Device &device) { size_t size = 0; bool isMidThreadPreemption = device.getPreemptionMode() == PreemptionMode::MidThread; if (isMidThreadPreemption || device.isDebuggerActive()) { size += sizeof(typename GfxFamily::STATE_SIP); } return size; } template size_t PreemptionHelper::getPreemptionWaCsSize(const Device &device) { return 0u; } template void PreemptionHelper::applyPreemptionWaCmdsBegin(LinearStream *pCommandStream, const Device &device) { } template void PreemptionHelper::applyPreemptionWaCmdsEnd(LinearStream *pCommandStream, const Device &device) { } template void PreemptionHelper::programInterfaceDescriptorDataPreemption(INTERFACE_DESCRIPTOR_DATA *idd, PreemptionMode preemptionMode) { using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; if (preemptionMode == PreemptionMode::MidThread) { idd->setThreadPreemptionDisable(INTERFACE_DESCRIPTOR_DATA::THREAD_PREEMPTION_DISABLE_DISABLE); } else { idd->setThreadPreemptionDisable(INTERFACE_DESCRIPTOR_DATA::THREAD_PREEMPTION_DISABLE_ENABLE); } } template constexpr uint32_t PreemptionConfig::mmioAddress = 0x2580; template constexpr uint32_t PreemptionConfig::mask = ((1 << 1) | (1 << 2)) << 16; template constexpr uint32_t PreemptionConfig::threadGroupVal = (1 << 1); template constexpr uint32_t PreemptionConfig::cmdLevelVal = (1 << 2); template constexpr uint32_t PreemptionConfig::midThreadVal = 0; } // namespace NEO compute-runtime-20.13.16352/shared/source/command_stream/preemption_mode.h000066400000000000000000000005201363734646600264500ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { enum PreemptionMode : uint32_t { // Keep in sync with ForcePreemptionMode debug variable Initial = 0, Disabled = 1, MidBatch, ThreadGroup, MidThread, }; } // namespace NEO compute-runtime-20.13.16352/shared/source/command_stream/queue_throttle.h000066400000000000000000000003331363734646600263350ustar00rootroot00000000000000/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { enum QueueThrottle : uint32_t { LOW, MEDIUM, HIGH }; } // namespace NEO compute-runtime-20.13.16352/shared/source/command_stream/scratch_space_controller.cpp000066400000000000000000000030401363734646600306620ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/scratch_space_controller.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" namespace NEO { ScratchSpaceController::ScratchSpaceController(uint32_t rootDeviceIndex, ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage) : rootDeviceIndex(rootDeviceIndex), executionEnvironment(environment), csrAllocationStorage(allocationStorage) { auto hwInfo = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily); computeUnitsUsedForScratch = hwHelper.getComputeUnitsUsedForScratch(hwInfo); } ScratchSpaceController::~ScratchSpaceController() { if (scratchAllocation) { getMemoryManager()->freeGraphicsMemory(scratchAllocation); } if (privateScratchAllocation) { getMemoryManager()->freeGraphicsMemory(privateScratchAllocation); } } MemoryManager *ScratchSpaceController::getMemoryManager() const { UNRECOVERABLE_IF(executionEnvironment.memoryManager.get() == nullptr); return executionEnvironment.memoryManager.get(); } } // namespace NEO compute-runtime-20.13.16352/shared/source/command_stream/scratch_space_controller.h000066400000000000000000000040141363734646600303310ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/indirect_heap/indirect_heap.h" #include #include namespace NEO { class Device; class ExecutionEnvironment; class GraphicsAllocation; class InternalAllocationStorage; class MemoryManager; struct HardwareInfo; class OsContext; namespace ScratchSpaceConstants { constexpr size_t scratchSpaceOffsetFor64Bit = 4096u; } class ScratchSpaceController { public: ScratchSpaceController(uint32_t rootDeviceIndex, ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage); virtual ~ScratchSpaceController(); GraphicsAllocation *getScratchSpaceAllocation() { return scratchAllocation; } GraphicsAllocation *getPrivateScratchSpaceAllocation() { return privateScratchAllocation; } virtual void setRequiredScratchSpace(void *sshBaseAddress, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) = 0; virtual uint64_t calculateNewGSH() = 0; virtual uint64_t getScratchPatchAddress() = 0; virtual void reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) = 0; protected: MemoryManager *getMemoryManager() const; const uint32_t rootDeviceIndex; ExecutionEnvironment &executionEnvironment; GraphicsAllocation *scratchAllocation = nullptr; GraphicsAllocation *privateScratchAllocation = nullptr; InternalAllocationStorage &csrAllocationStorage; size_t scratchSizeBytes = 0; size_t privateScratchSizeBytes = 0; bool force32BitAllocation = false; uint32_t computeUnitsUsedForScratch = 0; }; } // namespace NEO compute-runtime-20.13.16352/shared/source/command_stream/scratch_space_controller_base.cpp000066400000000000000000000073101363734646600316600ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/scratch_space_controller_base.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/preamble.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_constants.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/os_context.h" namespace NEO { ScratchSpaceControllerBase::ScratchSpaceControllerBase(uint32_t rootDeviceIndex, ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage) : ScratchSpaceController(rootDeviceIndex, environment, allocationStorage) { } void ScratchSpaceControllerBase::setRequiredScratchSpace(void *sshBaseAddress, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) { size_t requiredScratchSizeInBytes = requiredPerThreadScratchSize * computeUnitsUsedForScratch; if (requiredScratchSizeInBytes && (!scratchAllocation || scratchSizeBytes < requiredScratchSizeInBytes)) { if (scratchAllocation) { scratchAllocation->updateTaskCount(currentTaskCount, osContext.getContextId()); csrAllocationStorage.storeAllocation(std::unique_ptr(scratchAllocation), TEMPORARY_ALLOCATION); } scratchSizeBytes = requiredScratchSizeInBytes; createScratchSpaceAllocation(); vfeStateDirty = true; force32BitAllocation = getMemoryManager()->peekForce32BitAllocations(); if (is64bit && !force32BitAllocation) { stateBaseAddressDirty = true; } } } void ScratchSpaceControllerBase::createScratchSpaceAllocation() { scratchAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, scratchSizeBytes, GraphicsAllocation::AllocationType::SCRATCH_SURFACE}); UNRECOVERABLE_IF(scratchAllocation == nullptr); } uint64_t ScratchSpaceControllerBase::calculateNewGSH() { uint64_t gsh = 0; if (scratchAllocation) { gsh = scratchAllocation->getGpuAddress() - ScratchSpaceConstants::scratchSpaceOffsetFor64Bit; } return gsh; } uint64_t ScratchSpaceControllerBase::getScratchPatchAddress() { //for 32 bit scratch space pointer is being programmed in Media VFE State and is relative to 0 as General State Base Address //for 64 bit, scratch space pointer is being programmed as "General State Base Address - scratchSpaceOffsetFor64bit" // and "0 + scratchSpaceOffsetFor64bit" is being programmed in Media VFE state uint64_t scratchAddress = 0; if (scratchAllocation) { scratchAddress = scratchAllocation->getGpuAddressToPatch(); if (is64bit && !getMemoryManager()->peekForce32BitAllocations()) { //this is to avoid scractch allocation offset "0" scratchAddress = ScratchSpaceConstants::scratchSpaceOffsetFor64Bit; } } return scratchAddress; } void ScratchSpaceControllerBase::reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) { } } // namespace NEO compute-runtime-20.13.16352/shared/source/command_stream/scratch_space_controller_base.h000066400000000000000000000021251363734646600313240ustar00rootroot00000000000000/* * Copyright (C) 2018-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/scratch_space_controller.h" namespace NEO { class ScratchSpaceControllerBase : public ScratchSpaceController { public: ScratchSpaceControllerBase(uint32_t rootDeviceIndex, ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage); void setRequiredScratchSpace(void *sshBaseAddress, uint32_t requiredPerThreadScratchSize, uint32_t requiredPerThreadPrivateScratchSize, uint32_t currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) override; uint64_t calculateNewGSH() override; uint64_t getScratchPatchAddress() override; void reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) override; protected: void createScratchSpaceAllocation(); }; } // namespace NEO compute-runtime-20.13.16352/shared/source/command_stream/submissions_aggregator.cpp000066400000000000000000000114551363734646600304060ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "submissions_aggregator.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/source/memory_manager/graphics_allocation.h" void NEO::SubmissionAggregator::recordCommandBuffer(CommandBuffer *commandBuffer) { this->cmdBuffers.pushTailOne(*commandBuffer); } void NEO::SubmissionAggregator::aggregateCommandBuffers(ResourcePackage &resourcePackage, size_t &totalUsedSize, size_t totalMemoryBudget, uint32_t osContextId) { auto primaryCommandBuffer = this->cmdBuffers.peekHead(); auto currentInspection = this->inspectionId; if (!primaryCommandBuffer) { return; } auto primaryBatchGraphicsAllocation = primaryCommandBuffer->batchBuffer.commandBufferAllocation; this->inspectionId++; primaryCommandBuffer->inspectionId = currentInspection; //primary command buffers must fix to budget for (auto &graphicsAllocation : primaryCommandBuffer->surfaces) { if (graphicsAllocation->getInspectionId(osContextId) < currentInspection) { graphicsAllocation->setInspectionId(currentInspection, osContextId); resourcePackage.push_back(graphicsAllocation); totalUsedSize += graphicsAllocation->getUnderlyingBufferSize(); } } //check if we have anything for merge if (!primaryCommandBuffer->next) { return; } //check if next cmd buffer is compatible if (primaryCommandBuffer->next->batchBuffer.requiresCoherency != primaryCommandBuffer->batchBuffer.requiresCoherency) { return; } if (primaryCommandBuffer->next->batchBuffer.low_priority != primaryCommandBuffer->batchBuffer.low_priority) { return; } if (primaryCommandBuffer->next->batchBuffer.throttle != primaryCommandBuffer->batchBuffer.throttle) { return; } if (primaryCommandBuffer->next->batchBuffer.sliceCount != primaryCommandBuffer->batchBuffer.sliceCount) { return; } auto nextCommandBuffer = primaryCommandBuffer->next; ResourcePackage newResources; while (nextCommandBuffer) { size_t nextCommandBufferNewResourcesSize = 0; //evaluate if buffer fits for (auto &graphicsAllocation : nextCommandBuffer->surfaces) { if (graphicsAllocation == primaryBatchGraphicsAllocation) { continue; } if (graphicsAllocation->getInspectionId(osContextId) < currentInspection) { graphicsAllocation->setInspectionId(currentInspection, osContextId); newResources.push_back(graphicsAllocation); nextCommandBufferNewResourcesSize += graphicsAllocation->getUnderlyingBufferSize(); } } if (nextCommandBuffer->batchBuffer.commandBufferAllocation && (nextCommandBuffer->batchBuffer.commandBufferAllocation != primaryBatchGraphicsAllocation)) { if (nextCommandBuffer->batchBuffer.commandBufferAllocation->getInspectionId(osContextId) < currentInspection) { nextCommandBuffer->batchBuffer.commandBufferAllocation->setInspectionId(currentInspection, osContextId); newResources.push_back(nextCommandBuffer->batchBuffer.commandBufferAllocation); nextCommandBufferNewResourcesSize += nextCommandBuffer->batchBuffer.commandBufferAllocation->getUnderlyingBufferSize(); } } if (nextCommandBufferNewResourcesSize + totalUsedSize <= totalMemoryBudget) { auto currentNode = nextCommandBuffer; nextCommandBuffer = nextCommandBuffer->next; totalUsedSize += nextCommandBufferNewResourcesSize; currentNode->inspectionId = currentInspection; for (auto &newResource : newResources) { resourcePackage.push_back(newResource); } newResources.clear(); } else { break; } } } NEO::BatchBuffer::BatchBuffer(GraphicsAllocation *commandBufferAllocation, size_t startOffset, size_t chainedBatchBufferStartOffset, GraphicsAllocation *chainedBatchBuffer, bool requiresCoherency, bool lowPriority, QueueThrottle throttle, uint64_t sliceCount, size_t usedSize, LinearStream *stream, void *endCmdPtr) : commandBufferAllocation(commandBufferAllocation), startOffset(startOffset), chainedBatchBufferStartOffset(chainedBatchBufferStartOffset), chainedBatchBuffer(chainedBatchBuffer), requiresCoherency(requiresCoherency), low_priority(lowPriority), throttle(throttle), sliceCount(sliceCount), usedSize(usedSize), stream(stream), endCmdPtr(endCmdPtr) {} NEO::CommandBuffer::CommandBuffer(Device &device) : device(device) { flushStamp.reset(new FlushStampTracker(false)); } compute-runtime-20.13.16352/shared/source/command_stream/submissions_aggregator.h000066400000000000000000000047031363734646600300510ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/memory_manager/residency_container.h" #include "shared/source/utilities/idlist.h" #include "shared/source/utilities/stackvec.h" #include namespace NEO { class Device; class Event; class FlushStampTracker; class GraphicsAllocation; struct BatchBuffer { BatchBuffer(GraphicsAllocation *commandBufferAllocation, size_t startOffset, size_t chainedBatchBufferStartOffset, GraphicsAllocation *chainedBatchBuffer, bool requiresCoherency, bool lowPriority, QueueThrottle throttle, uint64_t sliceCount, size_t usedSize, LinearStream *stream, void *endCmdPtr); BatchBuffer() {} GraphicsAllocation *commandBufferAllocation = nullptr; size_t startOffset = 0u; size_t chainedBatchBufferStartOffset = 0u; GraphicsAllocation *chainedBatchBuffer = nullptr; bool requiresCoherency = false; bool low_priority = false; QueueThrottle throttle = QueueThrottle::MEDIUM; uint64_t sliceCount = QueueSliceCount::defaultSliceCount; size_t usedSize = 0u; //only used in drm csr in gem close worker active mode LinearStream *stream = nullptr; void *endCmdPtr = nullptr; }; struct CommandBuffer : public IDNode { CommandBuffer(Device &device); ResidencyContainer surfaces; BatchBuffer batchBuffer; void *batchBufferEndLocation = nullptr; uint32_t inspectionId = 0; uint32_t taskCount = 0u; void *pipeControlThatMayBeErasedLocation = nullptr; void *epiloguePipeControlLocation = nullptr; std::unique_ptr flushStamp; Device &device; }; struct CommandBufferList : public IDList {}; using ResourcePackage = StackVec; class SubmissionAggregator { public: void recordCommandBuffer(CommandBuffer *commandBuffer); void aggregateCommandBuffers(ResourcePackage &resourcePackage, size_t &totalUsedSize, size_t totalMemoryBudget, uint32_t osContextId); CommandBufferList &peekCmdBufferList() { return cmdBuffers; } protected: CommandBufferList cmdBuffers; uint32_t inspectionId = 1; }; } // namespace NEO compute-runtime-20.13.16352/shared/source/command_stream/thread_arbitration_policy.h000066400000000000000000000005451363734646600305150ustar00rootroot00000000000000/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include namespace NEO { namespace ThreadArbitrationPolicy { enum { AgeBased = 0x0u, RoundRobin = 0x1u, RoundRobinAfterDependency = 0x2u, NotPresent = 0xffffffffu }; } // namespace ThreadArbitrationPolicy } // namespace NEO compute-runtime-20.13.16352/shared/source/commands/000077500000000000000000000000001363734646600217245ustar00rootroot00000000000000compute-runtime-20.13.16352/shared/source/commands/CMakeLists.txt000066400000000000000000000004211363734646600244610ustar00rootroot00000000000000# # Copyright (C) 2018-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(NEO_CORE_COMMANDS ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/bxml_generator_glue.h ) set_property(GLOBAL PROPERTY NEO_CORE_COMMANDS ${NEO_CORE_COMMANDS}) compute-runtime-20.13.16352/shared/source/commands/bxml_generator_glue.h000066400000000000000000000017301363734646600261220ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include #include // Macro helpers #ifndef STATIC_ASSERT #define STATIC_ASSERT(e) static_assert(e, #e) #endif #ifndef SIZE32 #define SIZE32(x) (sizeof(x) / sizeof(uint32_t)) #endif // SIZE32 /*****************************************************************************\ MACRO: BITFIELD_RANGE PURPOSE: Calculates the number of bits between the startbit and the endbit (0 based) \*****************************************************************************/ #ifndef BITFIELD_RANGE #define BITFIELD_RANGE(startbit, endbit) ((endbit) - (startbit) + 1) #endif /*****************************************************************************\ MACRO: BITFIELD_BIT PURPOSE: Definition declared for clarity when creating structs \*****************************************************************************/ #ifndef BITFIELD_BIT #define BITFIELD_BIT(bit) 1 #endif compute-runtime-20.13.16352/shared/source/compiler_interface/000077500000000000000000000000001363734646600237555ustar00rootroot00000000000000compute-runtime-20.13.16352/shared/source/compiler_interface/CMakeLists.txt000066400000000000000000000016771363734646600265300ustar00rootroot00000000000000# # Copyright (C) 2019-2020 Intel Corporation # # SPDX-License-Identifier: MIT # set(NEO_COMPILER_INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/compiler_cache.h ${CMAKE_CURRENT_SOURCE_DIR}/compiler_cache.cpp ${CMAKE_CURRENT_SOURCE_DIR}/compiler_interface.cpp ${CMAKE_CURRENT_SOURCE_DIR}/compiler_interface.h ${CMAKE_CURRENT_SOURCE_DIR}/compiler_interface.inl ${CMAKE_CURRENT_SOURCE_DIR}/create_main.cpp ${CMAKE_CURRENT_SOURCE_DIR}/default_cache_config.h ${CMAKE_CURRENT_SOURCE_DIR}/intermediate_representations.h ${CMAKE_CURRENT_SOURCE_DIR}/linker.h ${CMAKE_CURRENT_SOURCE_DIR}/linker.cpp ${CMAKE_CURRENT_SOURCE_DIR}/compiler_options/compiler_options_base.h ${CMAKE_CURRENT_SOURCE_DIR}/compiler_options/compiler_options_base.cpp ${CMAKE_CURRENT_SOURCE_DIR}/compiler_options${BRANCH_DIR_SUFFIX}/compiler_options.h ) set_property(GLOBAL PROPERTY NEO_COMPILER_INTERFACE ${NEO_COMPILER_INTERFACE}) compute-runtime-20.13.16352/shared/source/compiler_interface/compiler_cache.cpp000066400000000000000000000050111363734646600274130ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_cache.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/hash.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/utilities/debug_settings_reader.h" #include "config.h" #include "os_inc.h" #include #include #include #include #include namespace NEO { std::mutex CompilerCache::cacheAccessMtx; const std::string CompilerCache::getCachedFileName(const HardwareInfo &hwInfo, const ArrayRef input, const ArrayRef options, const ArrayRef internalOptions) { Hash hash; hash.update("----", 4); hash.update(&*input.begin(), input.size()); hash.update("----", 4); hash.update(&*options.begin(), options.size()); hash.update("----", 4); hash.update(&*internalOptions.begin(), internalOptions.size()); hash.update("----", 4); hash.update(reinterpret_cast(&hwInfo.platform), sizeof(hwInfo.platform)); hash.update("----", 4); hash.update(reinterpret_cast(&hwInfo.featureTable), sizeof(hwInfo.featureTable)); hash.update("----", 4); hash.update(reinterpret_cast(&hwInfo.workaroundTable), sizeof(hwInfo.workaroundTable)); auto res = hash.finish(); std::stringstream stream; stream << std::setfill('0') << std::setw(sizeof(res) * 2) << std::hex << res; return stream.str(); } CompilerCache::CompilerCache(const CompilerCacheConfig &cacheConfig) : config(cacheConfig){}; bool CompilerCache::cacheBinary(const std::string kernelFileHash, const char *pBinary, uint32_t binarySize) { if (pBinary == nullptr || binarySize == 0) { return false; } std::string filePath = config.cacheDir + PATH_SEPARATOR + kernelFileHash + config.cacheFileExtension; std::lock_guard lock(cacheAccessMtx); return 0 != writeDataToFile(filePath.c_str(), pBinary, binarySize); } std::unique_ptr CompilerCache::loadCachedBinary(const std::string kernelFileHash, size_t &cachedBinarySize) { std::string filePath = config.cacheDir + PATH_SEPARATOR + kernelFileHash + config.cacheFileExtension; std::lock_guard lock(cacheAccessMtx); return loadDataFromFile(filePath.c_str(), cachedBinarySize); } } // namespace NEO compute-runtime-20.13.16352/shared/source/compiler_interface/compiler_cache.h000066400000000000000000000024361363734646600270700ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/utilities/arrayref.h" #include #include #include #include #include namespace NEO { struct HardwareInfo; struct CompilerCacheConfig { bool enabled = true; std::string cacheFileExtension; std::string cacheDir; }; class CompilerCache { public: static const std::string getCachedFileName(const HardwareInfo &hwInfo, ArrayRef input, ArrayRef options, ArrayRef internalOptions); CompilerCache(const CompilerCacheConfig &config); virtual ~CompilerCache() = default; CompilerCache(const CompilerCache &) = delete; CompilerCache(CompilerCache &&) = delete; CompilerCache &operator=(const CompilerCache &) = delete; CompilerCache &operator=(CompilerCache &&) = delete; MOCKABLE_VIRTUAL bool cacheBinary(const std::string kernelFileHash, const char *pBinary, uint32_t binarySize); MOCKABLE_VIRTUAL std::unique_ptr loadCachedBinary(const std::string kernelFileHash, size_t &cachedBinarySize); protected: static std::mutex cacheAccessMtx; CompilerCacheConfig config; }; } // namespace NEO compute-runtime-20.13.16352/shared/source/compiler_interface/compiler_interface.cpp000066400000000000000000000473341363734646600303260ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/compiler_interface/compiler_cache.h" #include "shared/source/compiler_interface/compiler_interface.inl" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" #include "shared/source/helpers/hw_info.h" #include "opencl/source/os_interface/os_inc_base.h" #include "cif/common/cif_main.h" #include "cif/helpers/error.h" #include "cif/import/library_api.h" #include "ocl_igc_interface/code_type.h" #include "ocl_igc_interface/fcl_ocl_device_ctx.h" #include "ocl_igc_interface/igc_ocl_device_ctx.h" #include "ocl_igc_interface/platform_helper.h" #include namespace NEO { SpinLock CompilerInterface::spinlock; enum CachingMode { None, Direct, PreProcess }; CompilerInterface::CompilerInterface() : cache() { } CompilerInterface::~CompilerInterface() = default; TranslationOutput::ErrorCode CompilerInterface::build( const NEO::Device &device, const TranslationInput &input, TranslationOutput &output) { if (false == isCompilerAvailable(input.srcType, input.outType)) { return TranslationOutput::ErrorCode::CompilerNotAvailable; } IGC::CodeType::CodeType_t srcCodeType = input.srcType; IGC::CodeType::CodeType_t intermediateCodeType = IGC::CodeType::undefined; if (input.preferredIntermediateType != IGC::CodeType::undefined) { intermediateCodeType = input.preferredIntermediateType; } CachingMode cachingMode = None; if (input.allowCaching) { if ((srcCodeType == IGC::CodeType::oclC) && (std::strstr(input.src.begin(), "#include") == nullptr)) { cachingMode = CachingMode::Direct; } else { cachingMode = CachingMode::PreProcess; } } std::string kernelFileHash; if (cachingMode == CachingMode::Direct) { kernelFileHash = CompilerCache::getCachedFileName(device.getHardwareInfo(), input.src, input.apiOptions, input.internalOptions); output.deviceBinary.mem = cache->loadCachedBinary(kernelFileHash, output.deviceBinary.size); if (output.deviceBinary.mem) { return TranslationOutput::ErrorCode::Success; } } auto inSrc = CIF::Builtins::CreateConstBuffer(igcMain.get(), input.src.begin(), input.src.size()); auto fclOptions = CIF::Builtins::CreateConstBuffer(igcMain.get(), input.apiOptions.begin(), input.apiOptions.size()); auto fclInternalOptions = CIF::Builtins::CreateConstBuffer(igcMain.get(), input.internalOptions.begin(), input.internalOptions.size()); auto idsBuffer = CIF::Builtins::CreateConstBuffer(igcMain.get(), nullptr, 0); auto valuesBuffer = CIF::Builtins::CreateConstBuffer(igcMain.get(), nullptr, 0); for (const auto &specConst : input.specializedValues) { idsBuffer->PushBackRawCopy(specConst.first); valuesBuffer->PushBackRawCopy(specConst.second); } CIF::RAII::UPtr_t intermediateRepresentation; if (srcCodeType == IGC::CodeType::oclC) { if (intermediateCodeType == IGC::CodeType::undefined) { intermediateCodeType = getPreferredIntermediateRepresentation(device); } auto fclTranslationCtx = createFclTranslationCtx(device, srcCodeType, intermediateCodeType); auto fclOutput = translate(fclTranslationCtx.get(), inSrc.get(), fclOptions.get(), fclInternalOptions.get()); if (fclOutput == nullptr) { return TranslationOutput::ErrorCode::UnknownError; } TranslationOutput::makeCopy(output.frontendCompilerLog, fclOutput->GetBuildLog()); if (fclOutput->Successful() == false) { return TranslationOutput::ErrorCode::BuildFailure; } output.intermediateCodeType = intermediateCodeType; TranslationOutput::makeCopy(output.intermediateRepresentation, fclOutput->GetOutput()); fclOutput->GetOutput()->Retain(); // will be used as input to compiler intermediateRepresentation.reset(fclOutput->GetOutput()); } else { inSrc->Retain(); // will be used as input to compiler directly intermediateRepresentation.reset(inSrc.get()); intermediateCodeType = srcCodeType; } if (cachingMode == CachingMode::PreProcess) { kernelFileHash = CompilerCache::getCachedFileName(device.getHardwareInfo(), ArrayRef(intermediateRepresentation->GetMemory(), intermediateRepresentation->GetSize()), input.apiOptions, input.internalOptions); output.deviceBinary.mem = cache->loadCachedBinary(kernelFileHash, output.deviceBinary.size); if (output.deviceBinary.mem) { return TranslationOutput::ErrorCode::Success; } } auto igcTranslationCtx = createIgcTranslationCtx(device, intermediateCodeType, IGC::CodeType::oclGenBin); auto igcOutput = translate(igcTranslationCtx.get(), intermediateRepresentation.get(), idsBuffer.get(), valuesBuffer.get(), fclOptions.get(), fclInternalOptions.get(), input.GTPinInput); if (igcOutput == nullptr) { return TranslationOutput::ErrorCode::UnknownError; } TranslationOutput::makeCopy(output.backendCompilerLog, igcOutput->GetBuildLog()); if (igcOutput->Successful() == false) { return TranslationOutput::ErrorCode::BuildFailure; } if (input.allowCaching) { cache->cacheBinary(kernelFileHash, igcOutput->GetOutput()->GetMemory(), static_cast(igcOutput->GetOutput()->GetSize())); } TranslationOutput::makeCopy(output.deviceBinary, igcOutput->GetOutput()); TranslationOutput::makeCopy(output.debugData, igcOutput->GetDebugData()); return TranslationOutput::ErrorCode::Success; } TranslationOutput::ErrorCode CompilerInterface::compile( const NEO::Device &device, const TranslationInput &input, TranslationOutput &output) { if ((IGC::CodeType::oclC != input.srcType) && (IGC::CodeType::elf != input.srcType)) { return TranslationOutput::ErrorCode::AlreadyCompiled; } if (false == isCompilerAvailable(input.srcType, input.outType)) { return TranslationOutput::ErrorCode::CompilerNotAvailable; } auto outType = input.outType; if (outType == IGC::CodeType::undefined) { outType = getPreferredIntermediateRepresentation(device); } auto fclSrc = CIF::Builtins::CreateConstBuffer(fclMain.get(), input.src.begin(), input.src.size()); auto fclOptions = CIF::Builtins::CreateConstBuffer(fclMain.get(), input.apiOptions.begin(), input.apiOptions.size()); auto fclInternalOptions = CIF::Builtins::CreateConstBuffer(fclMain.get(), input.internalOptions.begin(), input.internalOptions.size()); auto fclTranslationCtx = createFclTranslationCtx(device, input.srcType, outType); auto fclOutput = translate(fclTranslationCtx.get(), fclSrc.get(), fclOptions.get(), fclInternalOptions.get()); if (fclOutput == nullptr) { return TranslationOutput::ErrorCode::UnknownError; } TranslationOutput::makeCopy(output.frontendCompilerLog, fclOutput->GetBuildLog()); if (fclOutput->Successful() == false) { return TranslationOutput::ErrorCode::CompilationFailure; } output.intermediateCodeType = outType; TranslationOutput::makeCopy(output.intermediateRepresentation, fclOutput->GetOutput()); return TranslationOutput::ErrorCode::Success; } TranslationOutput::ErrorCode CompilerInterface::link( const NEO::Device &device, const TranslationInput &input, TranslationOutput &output) { if (false == isCompilerAvailable(input.srcType, input.outType)) { return TranslationOutput::ErrorCode::CompilerNotAvailable; } auto inSrc = CIF::Builtins::CreateConstBuffer(igcMain.get(), input.src.begin(), input.src.size()); auto igcOptions = CIF::Builtins::CreateConstBuffer(igcMain.get(), input.apiOptions.begin(), input.apiOptions.size()); auto igcInternalOptions = CIF::Builtins::CreateConstBuffer(igcMain.get(), input.internalOptions.begin(), input.internalOptions.size()); if (inSrc == nullptr) { return TranslationOutput::ErrorCode::UnknownError; } CIF::RAII::UPtr_t currOut; inSrc->Retain(); // shared with currSrc CIF::RAII::UPtr_t currSrc(inSrc.get()); IGC::CodeType::CodeType_t translationChain[] = {IGC::CodeType::elf, IGC::CodeType::llvmBc, IGC::CodeType::oclGenBin}; constexpr size_t numTranslations = sizeof(translationChain) / sizeof(translationChain[0]); for (size_t ti = 1; ti < numTranslations; ti++) { IGC::CodeType::CodeType_t inType = translationChain[ti - 1]; IGC::CodeType::CodeType_t outType = translationChain[ti]; auto igcTranslationCtx = createIgcTranslationCtx(device, inType, outType); currOut = translate(igcTranslationCtx.get(), currSrc.get(), igcOptions.get(), igcInternalOptions.get(), input.GTPinInput); if (currOut == nullptr) { return TranslationOutput::ErrorCode::UnknownError; } if (currOut->Successful() == false) { TranslationOutput::makeCopy(output.backendCompilerLog, currOut->GetBuildLog()); return TranslationOutput::ErrorCode::LinkFailure; } currOut->GetOutput()->Retain(); // shared with currSrc currSrc.reset(currOut->GetOutput()); } TranslationOutput::makeCopy(output.backendCompilerLog, currOut->GetBuildLog()); TranslationOutput::makeCopy(output.deviceBinary, currOut->GetOutput()); TranslationOutput::makeCopy(output.debugData, currOut->GetDebugData()); return TranslationOutput::ErrorCode::Success; } TranslationOutput::ErrorCode CompilerInterface::getSpecConstantsInfo(const NEO::Device &device, ArrayRef srcSpirV, SpecConstantInfo &output) { if (false == isIgcAvailable()) { return TranslationOutput::ErrorCode::CompilerNotAvailable; } auto igcTranslationCtx = createIgcTranslationCtx(device, IGC::CodeType::spirV, IGC::CodeType::oclGenBin); auto inSrc = CIF::Builtins::CreateConstBuffer(igcMain.get(), srcSpirV.begin(), srcSpirV.size()); output.idsBuffer = CIF::Builtins::CreateConstBuffer(igcMain.get(), nullptr, 0); output.sizesBuffer = CIF::Builtins::CreateConstBuffer(igcMain.get(), nullptr, 0); auto retVal = getSpecConstantsInfoImpl(igcTranslationCtx.get(), inSrc.get(), output.idsBuffer.get(), output.sizesBuffer.get()); if (!retVal) { return TranslationOutput::ErrorCode::UnknownError; } return TranslationOutput::ErrorCode::Success; } TranslationOutput::ErrorCode CompilerInterface::createLibrary( NEO::Device &device, const TranslationInput &input, TranslationOutput &output) { if (false == isIgcAvailable()) { return TranslationOutput::ErrorCode::CompilerNotAvailable; } auto igcSrc = CIF::Builtins::CreateConstBuffer(igcMain.get(), input.src.begin(), input.src.size()); auto igcOptions = CIF::Builtins::CreateConstBuffer(igcMain.get(), input.apiOptions.begin(), input.apiOptions.size()); auto igcInternalOptions = CIF::Builtins::CreateConstBuffer(igcMain.get(), input.internalOptions.begin(), input.internalOptions.size()); auto intermediateRepresentation = IGC::CodeType::llvmBc; auto igcTranslationCtx = createIgcTranslationCtx(device, IGC::CodeType::elf, intermediateRepresentation); auto igcOutput = translate(igcTranslationCtx.get(), igcSrc.get(), igcOptions.get(), igcInternalOptions.get()); if (igcOutput == nullptr) { return TranslationOutput::ErrorCode::UnknownError; } TranslationOutput::makeCopy(output.backendCompilerLog, igcOutput->GetBuildLog()); if (igcOutput->Successful() == false) { return TranslationOutput::ErrorCode::LinkFailure; } output.intermediateCodeType = intermediateRepresentation; TranslationOutput::makeCopy(output.intermediateRepresentation, igcOutput->GetOutput()); return TranslationOutput::ErrorCode::Success; } TranslationOutput::ErrorCode CompilerInterface::getSipKernelBinary(NEO::Device &device, SipKernelType type, std::vector &retBinary) { if (false == isIgcAvailable()) { return TranslationOutput::ErrorCode::CompilerNotAvailable; } const char *sipSrc = getSipLlSrc(device); std::string sipInternalOptions = getSipKernelCompilerInternalOptions(type); auto igcSrc = CIF::Builtins::CreateConstBuffer(igcMain.get(), sipSrc, strlen(sipSrc) + 1); auto igcOptions = CIF::Builtins::CreateConstBuffer(igcMain.get(), nullptr, 0); auto igcInternalOptions = CIF::Builtins::CreateConstBuffer(igcMain.get(), sipInternalOptions.c_str(), sipInternalOptions.size() + 1); auto igcTranslationCtx = createIgcTranslationCtx(device, IGC::CodeType::llvmLl, IGC::CodeType::oclGenBin); auto igcOutput = translate(igcTranslationCtx.get(), igcSrc.get(), igcOptions.get(), igcInternalOptions.get()); if ((igcOutput == nullptr) || (igcOutput->Successful() == false)) { return TranslationOutput::ErrorCode::UnknownError; } retBinary.assign(igcOutput->GetOutput()->GetMemory(), igcOutput->GetOutput()->GetMemory() + igcOutput->GetOutput()->GetSizeRaw()); return TranslationOutput::ErrorCode::Success; } bool CompilerInterface::loadFcl() { return NEO::loadCompiler(Os::frontEndDllName, fclLib, fclMain); ; } bool CompilerInterface::loadIgc() { return NEO::loadCompiler(Os::igcDllName, igcLib, igcMain); } bool CompilerInterface::initialize(std::unique_ptr cache, bool requireFcl) { bool fclAvailable = requireFcl ? this->loadFcl() : false; bool igcAvailable = this->loadIgc(); this->cache.swap(cache); return this->cache && igcAvailable && (fclAvailable || (false == requireFcl)); } IGC::FclOclDeviceCtxTagOCL *CompilerInterface::getFclDeviceCtx(const Device &device) { auto ulock = this->lock(); auto it = fclDeviceContexts.find(&device); if (it != fclDeviceContexts.end()) { return it->second.get(); } if (fclMain == nullptr) { DEBUG_BREAK_IF(true); // compiler not available return nullptr; } auto newDeviceCtx = fclMain->CreateInterface(); if (newDeviceCtx == nullptr) { DEBUG_BREAK_IF(true); // could not create device context return nullptr; } newDeviceCtx->SetOclApiVersion(device.getHardwareInfo().capabilityTable.clVersionSupport * 10); fclDeviceContexts[&device] = std::move(newDeviceCtx); return fclDeviceContexts[&device].get(); } IGC::IgcOclDeviceCtxTagOCL *CompilerInterface::getIgcDeviceCtx(const Device &device) { auto ulock = this->lock(); auto it = igcDeviceContexts.find(&device); if (it != igcDeviceContexts.end()) { return it->second.get(); } if (igcMain == nullptr) { DEBUG_BREAK_IF(true); // compiler not available return nullptr; } auto newDeviceCtx = igcMain->CreateInterface(); if (newDeviceCtx == nullptr) { DEBUG_BREAK_IF(true); // could not create device context return nullptr; } newDeviceCtx->SetProfilingTimerResolution(static_cast(device.getDeviceInfo().outProfilingTimerResolution)); auto igcPlatform = newDeviceCtx->GetPlatformHandle(); auto igcGtSystemInfo = newDeviceCtx->GetGTSystemInfoHandle(); auto igcFeWa = newDeviceCtx->GetIgcFeaturesAndWorkaroundsHandle(); if (false == NEO::areNotNullptr(igcPlatform.get(), igcGtSystemInfo.get(), igcFeWa.get())) { DEBUG_BREAK_IF(true); // could not acquire handles to device descriptors return nullptr; } const HardwareInfo *hwInfo = &device.getHardwareInfo(); auto productFamily = DebugManager.flags.ForceCompilerUsePlatform.get(); if (productFamily != "unk") { getHwInfoForPlatformString(productFamily, hwInfo); } IGC::PlatformHelper::PopulateInterfaceWith(*igcPlatform, hwInfo->platform); IGC::GtSysInfoHelper::PopulateInterfaceWith(*igcGtSystemInfo, hwInfo->gtSystemInfo); igcFeWa.get()->SetFtrDesktop(device.getHardwareInfo().featureTable.ftrDesktop); igcFeWa.get()->SetFtrChannelSwizzlingXOREnabled(device.getHardwareInfo().featureTable.ftrChannelSwizzlingXOREnabled); igcFeWa.get()->SetFtrGtBigDie(device.getHardwareInfo().featureTable.ftrGtBigDie); igcFeWa.get()->SetFtrGtMediumDie(device.getHardwareInfo().featureTable.ftrGtMediumDie); igcFeWa.get()->SetFtrGtSmallDie(device.getHardwareInfo().featureTable.ftrGtSmallDie); igcFeWa.get()->SetFtrGT1(device.getHardwareInfo().featureTable.ftrGT1); igcFeWa.get()->SetFtrGT1_5(device.getHardwareInfo().featureTable.ftrGT1_5); igcFeWa.get()->SetFtrGT2(device.getHardwareInfo().featureTable.ftrGT2); igcFeWa.get()->SetFtrGT3(device.getHardwareInfo().featureTable.ftrGT3); igcFeWa.get()->SetFtrGT4(device.getHardwareInfo().featureTable.ftrGT4); igcFeWa.get()->SetFtrIVBM0M1Platform(device.getHardwareInfo().featureTable.ftrIVBM0M1Platform); igcFeWa.get()->SetFtrGTL(device.getHardwareInfo().featureTable.ftrGT1); igcFeWa.get()->SetFtrGTM(device.getHardwareInfo().featureTable.ftrGT2); igcFeWa.get()->SetFtrGTH(device.getHardwareInfo().featureTable.ftrGT3); igcFeWa.get()->SetFtrSGTPVSKUStrapPresent(device.getHardwareInfo().featureTable.ftrSGTPVSKUStrapPresent); igcFeWa.get()->SetFtrGTA(device.getHardwareInfo().featureTable.ftrGTA); igcFeWa.get()->SetFtrGTC(device.getHardwareInfo().featureTable.ftrGTC); igcFeWa.get()->SetFtrGTX(device.getHardwareInfo().featureTable.ftrGTX); igcFeWa.get()->SetFtr5Slice(device.getHardwareInfo().featureTable.ftr5Slice); igcFeWa.get()->SetFtrGpGpuMidThreadLevelPreempt(device.getHardwareInfo().featureTable.ftrGpGpuMidThreadLevelPreempt); igcFeWa.get()->SetFtrIoMmuPageFaulting(device.getHardwareInfo().featureTable.ftrIoMmuPageFaulting); igcFeWa.get()->SetFtrWddm2Svm(device.getHardwareInfo().featureTable.ftrWddm2Svm); igcFeWa.get()->SetFtrPooledEuEnabled(device.getHardwareInfo().featureTable.ftrPooledEuEnabled); igcFeWa.get()->SetFtrResourceStreamer(device.getHardwareInfo().featureTable.ftrResourceStreamer); igcDeviceContexts[&device] = std::move(newDeviceCtx); return igcDeviceContexts[&device].get(); } IGC::CodeType::CodeType_t CompilerInterface::getPreferredIntermediateRepresentation(const Device &device) { return getFclDeviceCtx(device)->GetPreferredIntermediateRepresentation(); } CIF::RAII::UPtr_t CompilerInterface::createFclTranslationCtx(const Device &device, IGC::CodeType::CodeType_t inType, IGC::CodeType::CodeType_t outType) { auto deviceCtx = getFclDeviceCtx(device); if (deviceCtx == nullptr) { DEBUG_BREAK_IF(true); // could not create device context return nullptr; } if (fclBaseTranslationCtx == nullptr) { fclBaseTranslationCtx = deviceCtx->CreateTranslationCtx(inType, outType); } return deviceCtx->CreateTranslationCtx(inType, outType); } CIF::RAII::UPtr_t CompilerInterface::createIgcTranslationCtx(const Device &device, IGC::CodeType::CodeType_t inType, IGC::CodeType::CodeType_t outType) { auto deviceCtx = getIgcDeviceCtx(device); if (deviceCtx == nullptr) { DEBUG_BREAK_IF(true); // could not create device context return nullptr; } return deviceCtx->CreateTranslationCtx(inType, outType); } } // namespace NEO compute-runtime-20.13.16352/shared/source/compiler_interface/compiler_interface.h000066400000000000000000000167531363734646600277740ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/built_ins/sip.h" #include "shared/source/compiler_interface/compiler_cache.h" #include "shared/source/helpers/string.h" #include "shared/source/os_interface/os_library.h" #include "shared/source/utilities/arrayref.h" #include "shared/source/utilities/spinlock.h" #include "cif/common/cif_main.h" #include "ocl_igc_interface/code_type.h" #include "ocl_igc_interface/fcl_ocl_device_ctx.h" #include "ocl_igc_interface/igc_ocl_device_ctx.h" #include #include namespace NEO { class Device; using specConstValuesMap = std::unordered_map; struct TranslationInput { TranslationInput(IGC::CodeType::CodeType_t srcType, IGC::CodeType::CodeType_t outType, IGC::CodeType::CodeType_t preferredIntermediateType = IGC::CodeType::undefined) : srcType(srcType), preferredIntermediateType(preferredIntermediateType), outType(outType) { } bool allowCaching = false; ArrayRef src; ArrayRef apiOptions; ArrayRef internalOptions; const char *tracingOptions = nullptr; uint32_t tracingOptionsCount = 0; IGC::CodeType::CodeType_t srcType = IGC::CodeType::invalid; IGC::CodeType::CodeType_t preferredIntermediateType = IGC::CodeType::invalid; IGC::CodeType::CodeType_t outType = IGC::CodeType::invalid; void *GTPinInput = nullptr; specConstValuesMap specializedValues; }; struct TranslationOutput { enum class ErrorCode { Success = 0, CompilerNotAvailable, CompilationFailure, BuildFailure, LinkFailure, AlreadyCompiled, UnknownError, }; struct MemAndSize { std::unique_ptr mem; size_t size = 0; }; IGC::CodeType::CodeType_t intermediateCodeType = IGC::CodeType::invalid; MemAndSize intermediateRepresentation; MemAndSize deviceBinary; MemAndSize debugData; std::string frontendCompilerLog; std::string backendCompilerLog; template static void makeCopy(ContainerT &dst, CIF::Builtins::BufferSimple *src) { if ((nullptr == src) || (src->GetSizeRaw() == 0)) { dst.clear(); return; } dst.assign(src->GetMemory(), src->GetSize()); } static void makeCopy(MemAndSize &dst, CIF::Builtins::BufferSimple *src) { if ((nullptr == src) || (src->GetSizeRaw() == 0)) { dst.mem.reset(); dst.size = 0U; return; } dst.size = src->GetSize(); dst.mem = ::makeCopy(src->GetMemory(), src->GetSize()); } }; struct SpecConstantInfo { CIF::RAII::UPtr_t idsBuffer; CIF::RAII::UPtr_t sizesBuffer; }; class CompilerInterface { public: CompilerInterface(); CompilerInterface(const CompilerInterface &) = delete; CompilerInterface &operator=(const CompilerInterface &) = delete; CompilerInterface(CompilerInterface &&) = delete; CompilerInterface &operator=(CompilerInterface &&) = delete; virtual ~CompilerInterface(); template static CompilerInterfaceT *createInstance(std::unique_ptr cache, bool requireFcl) { auto instance = new CompilerInterfaceT(); if (!instance->initialize(std::move(cache), requireFcl)) { delete instance; instance = nullptr; } return instance; } MOCKABLE_VIRTUAL TranslationOutput::ErrorCode build(const NEO::Device &device, const TranslationInput &input, TranslationOutput &output); MOCKABLE_VIRTUAL TranslationOutput::ErrorCode compile(const NEO::Device &device, const TranslationInput &input, TranslationOutput &output); MOCKABLE_VIRTUAL TranslationOutput::ErrorCode link(const NEO::Device &device, const TranslationInput &input, TranslationOutput &output); MOCKABLE_VIRTUAL TranslationOutput::ErrorCode getSpecConstantsInfo(const NEO::Device &device, ArrayRef srcSpirV, SpecConstantInfo &output); TranslationOutput::ErrorCode createLibrary(NEO::Device &device, const TranslationInput &input, TranslationOutput &output); MOCKABLE_VIRTUAL TranslationOutput::ErrorCode getSipKernelBinary(NEO::Device &device, SipKernelType type, std::vector &retBinary); protected: MOCKABLE_VIRTUAL bool initialize(std::unique_ptr cache, bool requireFcl); MOCKABLE_VIRTUAL bool loadFcl(); MOCKABLE_VIRTUAL bool loadIgc(); static SpinLock spinlock; MOCKABLE_VIRTUAL std::unique_lock lock() { return std::unique_lock{spinlock}; } std::unique_ptr cache = nullptr; using igcDevCtxUptr = CIF::RAII::UPtr_t; using fclDevCtxUptr = CIF::RAII::UPtr_t; std::unique_ptr igcLib; CIF::RAII::UPtr_t igcMain = nullptr; std::map igcDeviceContexts; std::unique_ptr fclLib; CIF::RAII::UPtr_t fclMain = nullptr; std::map fclDeviceContexts; CIF::RAII::UPtr_t fclBaseTranslationCtx = nullptr; MOCKABLE_VIRTUAL IGC::FclOclDeviceCtxTagOCL *getFclDeviceCtx(const Device &device); MOCKABLE_VIRTUAL IGC::IgcOclDeviceCtxTagOCL *getIgcDeviceCtx(const Device &device); MOCKABLE_VIRTUAL IGC::CodeType::CodeType_t getPreferredIntermediateRepresentation(const Device &device); MOCKABLE_VIRTUAL CIF::RAII::UPtr_t createFclTranslationCtx(const Device &device, IGC::CodeType::CodeType_t inType, IGC::CodeType::CodeType_t outType); MOCKABLE_VIRTUAL CIF::RAII::UPtr_t createIgcTranslationCtx(const Device &device, IGC::CodeType::CodeType_t inType, IGC::CodeType::CodeType_t outType); bool isFclAvailable() const { return (fclMain != nullptr); } bool isIgcAvailable() const { return (igcMain != nullptr); } bool isCompilerAvailable(IGC::CodeType::CodeType_t translationSrc, IGC::CodeType::CodeType_t translationDst) const { bool requiresFcl = (IGC::CodeType::oclC == translationSrc); bool requiresIgc = (IGC::CodeType::oclC != translationSrc) || ((IGC::CodeType::spirV != translationDst) && (IGC::CodeType::llvmBc != translationDst) && (IGC::CodeType::llvmLl != translationDst)); return (isFclAvailable() || (false == requiresFcl)) && (isIgcAvailable() || (false == requiresIgc)); } }; } // namespace NEO compute-runtime-20.13.16352/shared/source/compiler_interface/compiler_interface.inl000066400000000000000000000103621363734646600303150ustar00rootroot00000000000000/* * Copyright (C) 2017-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/os_interface/os_library.h" #include "opencl/source/helpers/validators.h" #include "cif/builtins/memory/buffer/buffer.h" #include "cif/common/cif.h" #include "cif/import/library_api.h" #include "ocl_igc_interface/ocl_translation_output.h" namespace NEO { using CIFBuffer = CIF::Builtins::BufferSimple; template inline CIF::RAII::UPtr_t translate(TranslationCtx *tCtx, CIFBuffer *src, CIFBuffer *options, CIFBuffer *internalOptions) { if (false == NEO::areNotNullptr(tCtx, src, options, internalOptions)) { return nullptr; } auto ret = tCtx->Translate(src, options, internalOptions, nullptr, 0); if (ret == nullptr) { return nullptr; // assume OOM or internal error } if ((ret->GetOutput() == nullptr) || (ret->GetBuildLog() == nullptr) || (ret->GetDebugData() == nullptr)) { return nullptr; // assume OOM or internal error } return ret; } template inline CIF::RAII::UPtr_t translate(TranslationCtx *tCtx, CIFBuffer *src, CIFBuffer *options, CIFBuffer *internalOptions, void *gtpinInit) { if (false == NEO::areNotNullptr(tCtx, src, options, internalOptions)) { return nullptr; } auto ret = tCtx->Translate(src, options, internalOptions, nullptr, 0, gtpinInit); if (ret == nullptr) { return nullptr; // assume OOM or internal error } if ((ret->GetOutput() == nullptr) || (ret->GetBuildLog() == nullptr) || (ret->GetDebugData() == nullptr)) { return nullptr; // assume OOM or internal error } return ret; } template inline bool getSpecConstantsInfoImpl(TranslationCtx *tCtx, CIFBuffer *src, CIFBuffer *outSpecConstantsIds, CIFBuffer *outSpecConstantsSizes) { if (!NEO::areNotNullptr(tCtx, src, outSpecConstantsIds, outSpecConstantsSizes)) { return false; } return tCtx->GetSpecConstantsInfoImpl(src, outSpecConstantsIds, outSpecConstantsSizes); } template inline CIF::RAII::UPtr_t translate(TranslationCtx *tCtx, CIFBuffer *src, CIFBuffer *specConstantsIds, CIFBuffer *specConstantsValues, CIFBuffer *options, CIFBuffer *internalOptions, void *gtpinInit) { if (false == NEO::areNotNullptr(tCtx, src, options, internalOptions)) { return nullptr; } auto ret = tCtx->Translate(src, specConstantsIds, specConstantsValues, options, internalOptions, nullptr, 0, gtpinInit); if (ret == nullptr) { return nullptr; // assume OOM or internal error } if (!NEO::areNotNullptr(ret->GetOutput(), ret->GetBuildLog(), ret->GetDebugData())) { return nullptr; // assume OOM or internal error } return ret; } CIF::CIFMain *createMainNoSanitize(CIF::CreateCIFMainFunc_t createFunc); template